
/*
 * UTIL/DEXPIREOVER.C
 *
 * (c)Copyright 1998, Matthew Dillon, All Rights Reserved.  Refer to
 *    the COPYRIGHT file in the base directory of this distribution
 *    for specific rights granted.
 *
 * dexpireover [-a] [-v[N]] [-w grpwildcard] [-f dactive.kp] [-NB] [-n] [-O[n]]
 *             [-s] [-R] [-e] [-o] [-pN]
 *
 * 	In this incarnation, dexpireover cleans up overview information as
 *	specified in the 'x' fields in dexpire.ctl (see the sample dexpire.ctl)
 *
 *	This is still very rough.  I still need to add in a free-space-target
 *	option and have dexpireover adjust the expiration dynamically based on
 *	free space.  IT DOESN'T DO THIS YET!! You have to make sure your
 *	expiration ('x' option) in dexpire.ctl is not set too high.
 *
 *	I also need to have a remote-server scanning option to allow 
 *	dexpireover to adjust expirations based on remote server retentions.
 *	It does not do this yet either.
 *
 *	Modifications by Nickolai Zeldovich to allow spool-based expiration
 *	(ExpireBySpool and ExpireFromFile)
 *
 *	Specifying the -pN option will fork N dexpireover processes and
 *	perform the expiration process in parallel. This is useful to speed
 *	up ExpireBySpool where dhistory lookups take a long time.
 */

#include <dreaderd/defs.h>

typedef struct Group {
    struct Group *gr_Next;
    int		gr_State;
    int		gr_StartNo;
    int		gr_EndNo;
    int		gr_CTS;
    int		gr_LMTS;
    char	*gr_GroupName;
    char	*gr_Flags;
    hash_t	gr_Hv;
    int		gr_UpdateFlag;
} Group;

#define GRF_DESCRIPTION 0x00000001
#define GRF_STARTNO     0x00000002
#define GRF_ENDNO       0x00000004
#define GRF_FLAGS       0x00000008
#define GRF_FROMLOCAL   0x00000800
#define GRF_NEW         0x00001000
#define GRF_FROMREMOTE  0x00002000
#define GRF_MODIFIED    0x00008000
#define GRF_EDITEDBEG   0x00010000

#define GHSIZE		1024
#define GHMASK		(GHSIZE-1)

typedef struct ReplaceFile {
    struct ReplaceFile	*rf_Next;
    int			rf_Fd;
    int			rf_ArtBase;
} ReplaceFile;

#define DEXPOVER_READ_BUFFER_SIZE	4096
#define DEXPOVER_HASH_SIZE		32768

/* Since we only look at the first char of the first-level directory,
 * we do not support more than 16 forks.
 */
#define	MAX_PAR_COUNT			16

/*
 * These aren't really buckets, they're parts of a bucket
 */

typedef struct bucket_t {
    struct bucket_t *next;
    hash_t hash_item;
    short valid;
} bucket_t;

KPDB  *KDBActive;
Group *GHash[GHSIZE];

void DeleteJunkFile(const char *dirPath, const char *name);
void ProcessOverviewFile(const char *dirPath, const char *name);
char *allocTmpCopy(const char *buf, int bufLen);
Group *EnterGroup(const char *groupName, int begNo, int endNo, int lmts, int cts, const char *flags);
Group *FindGroupByHash(hash_t *hv);
int SetField(char **pptr, const char *str);
void ExpireByDays(Group *group, int fd, OverHead *oh);
void ExpireBySpool(Group *group, int fd, OverHead *oh);
void ExpireFromFile(Group *group, int fd, OverHead *oh);
void RewriteData(Group *group, int fd, OverHead *oh, const char *dirPath, int h1, int h2);
void rewriteDataFile(Group *group, ReplaceFile **prf, const char *cacheBase, int cacheSize, const OverArt *oa, OverArt *ob, const char *dirPath, int h1, int h2);
void ResizeGroup(Group *group, int fd, OverHead *oh, int maxArts);
int nearestPower(int n);
void ReadDExpOverList(void);
int expOverListCheckExpired(hash_t *hv);
int hexCharToInt(char c);

int UpdateBegArtNoOpt = 0;
int UpdateCTSOpt = 0;
int RewriteDataOpt = 0;
int BadGroups = 0;
int ResizedGroups = 0;
int NoResizedGroups = 0;
int GoodGroups = 0;
int ActiveUpdated = 0;
int VerboseOpt = -1;
int ResizeOpt = -1;
int ForReal = 1;
int OldGroups = 0;
int UseExpireBySpool = 0;
int UseExpireFromFile = 0;
int ParallelCount = 0;
char *Wild;

bucket_t *dexpover_msgid_hash;

int
main(int ac, char **av)
{
    int i;
    int freeSpaceTarget = 0;
    char *dbfile = NULL;
    int ParallelIdx = 0;
    int ParallelPid[MAX_PAR_COUNT];

    LoadDiabloConfig(ac, av);

    for (i = 1; i < ac; ++i) {
	char *ptr = av[i];
	if (*ptr != '-') {
	    fprintf(stderr, "Unexpected argument: %s\n", ptr);
	    exit(1);
	}
	ptr += 2;
	switch(ptr[-1]) {
	case 'C':           /* parsed by LoadDiabloConfig */
	    if (*ptr == 0)
		++i;
	    break;
	case 'a':
	    UpdateBegArtNoOpt = 1;
	    UpdateCTSOpt = 1;
	    ResizeOpt = 1;
	    if (freeSpaceTarget == 0)
		freeSpaceTarget = 512;
	    break;
	case 's':
	    ResizeOpt = 1;
	    break;
	case 'v':
	    VerboseOpt = (*ptr) ? strtol(ptr, NULL, 0) : 1;
	    break;
	case 'r':
	    ptr = (*ptr) ? ptr : av[++i];
	    freeSpaceTarget = strtol(ptr, NULL, 0);
	    break;
	case 'w':
	    Wild = (*ptr) ? ptr : av[++i];
	    break;
	case 'n':
	    ForReal = 0;
	    if (VerboseOpt < 0)
		VerboseOpt = 1;
	    break;
	case 'f':
	    dbfile = (*ptr) ? ptr : av[++i];
	    break;
	case 'e':
	    UseExpireBySpool = 1;
	    break;
	case 'o':
	    UseExpireFromFile = 1;
	    break;
	case 'p':
	    if (*ptr)
		ParallelCount = strtol(ptr, NULL, 0);
	    else
		ParallelCount = 1;
	    if (ParallelCount > MAX_PAR_COUNT)
		ParallelCount = MAX_PAR_COUNT;
	    /* Note that a parcount of 1 doesn't do anything useful. */
	    break;
	case 'R':
	    RewriteDataOpt = 1;
	    break;
	case 'O':
	    if (*ptr)
		OldGroups = strtol(ptr, NULL, 0);
	    else
		OldGroups = 30 * 3;	/* 3 months by default */
	    break;
	case 'U':
	    UpdateCTSOpt = 1;
	    break;
	case 'N':
	    while (*ptr) {
		switch(*ptr) {
		case 'B':
		    UpdateBegArtNoOpt = 1;
		    break;
		default:
		    break;
		}
		++ptr;
	    }
	    break;
	default:
	    fprintf(stderr, "Unknown option: %s\n", ptr - 2);
	    exit(1);
	}
    }

    /*
     * Read in the list of expired msgid hashes, if we are using it
     */

    if (UseExpireFromFile)
	ReadDExpOverList();

    /*
     * fork off parallel copies of dexpireover at this point
     */

    if (ParallelCount) {
	char *stdout_buffer;

	for (ParallelIdx=0; ParallelIdx < ParallelCount; ParallelIdx++) {
	    int pid;

	    pid = fork();
	    if(pid == 0)
		break;
	    ParallelPid[ParallelIdx] = pid;
	}

	stdout_buffer = (char *)malloc(BUFSIZ);
	setvbuf(stdout, stdout_buffer, _IOLBF, BUFSIZ);

	if (ParallelIdx == ParallelCount) {
	    pid_t pid;
	    int remaining = ParallelCount;

	    while (remaining) {
		while (remaining && ((pid = wait3(NULL, 0, NULL)) > 0)) {
		    for (i=0; i<ParallelCount; i++)
			if(ParallelPid[i] == pid) {
			    ParallelPid[i] = 0;
			    --remaining;
			}
		}
	    }

	    printf("Parallelizing dexpireover (%d forks) finished.\n",
	       ParallelCount);
	    exit(0);
	}
    }

    /*
     * Open active file database
     */

    if (dbfile) {
	KDBActive = KPDBOpen(dbfile, O_RDWR);
    } else {
	KDBActive = KPDBOpen(PatDbExpand(ReaderDActivePat), O_RDWR);
    }
    if (KDBActive == NULL) {
	fprintf(stderr, "Unable to open dactive.kp\n");
	exit(1);
    }
    if (OldGroups && Wild == NULL) {
	fprintf(stderr, "group wildcard must be specified if -O option used\n");
	exit(1);
    }

    LoadExpireCtl(1);

    /*
     * Open the history file if we are going to expire based on local spool
     */

    if (UseExpireBySpool)
	HistoryOpen(NULL, 0);

    /*
     * scan dactive.kp
     */

    {
	int recLen;
	int recOff;
	int cts0 = (int)time(NULL);

	for (recOff = KPDBScanFirst(KDBActive, 0, &recLen);
	     recOff;
	     recOff = KPDBScanNext(KDBActive, recOff, 0, &recLen)
	) {
	    int groupLen;
	    int flagsLen;
	    const char *rec = KPDBReadRecordAt(KDBActive, recOff, 0, NULL);
	    const char *group = KPDBGetField(rec, recLen, NULL, &groupLen, NULL);
	    const char *flags = KPDBGetField(rec, recLen, "S", &flagsLen, "y");
	    int begNo = strtol(KPDBGetField(rec, recLen, "NB", NULL, "-1"), NULL, 10);
	    int endNo = strtol(KPDBGetField(rec, recLen, "NE", NULL, "-1"), NULL, 10);
	    int lmts = (int)strtoul(KPDBGetField(rec, recLen, "LMTS", NULL, "0"), NULL, 16);
	    int cts = (int)strtoul(KPDBGetField(rec, recLen, "CTS", NULL, "0"), NULL, 16);
	    Group *grp;

	    if (cts == 0)	/* enter non-zero cts only if group has no CTS field */
		cts = cts0;
	    else
		cts = 0;

	    if (group)
		group = allocTmpCopy(group, groupLen);
	    if (flags)
		flags = allocTmpCopy(flags, flagsLen);

	    /*
	     * ignore bad group or group that does not match the wildcard
	     */

	    if (group == NULL)
		continue;
	    if (Wild && WildCmp(Wild, group) != 0)
		continue;

	    grp = EnterGroup(
		group,
		begNo,
		endNo,
		lmts,
		cts,
		flags
	    );
	    grp->gr_State &= ~(GRF_NEW|GRF_MODIFIED);
	}
    }

    /*
     * scan /news/spool/group/
     */

    {
	DIR *dir;

	if ((dir = opendir(PatExpand(GroupHomePat))) != NULL) {
	    den_t *den;
	    char *tmp = malloc(strlen(PatExpand(GroupHomePat)) + 32);

	    while ((den = readdir(dir)) != NULL) {
		if (strlen(den->d_name) == 2 && 
		    isalnum(den->d_name[0]) &&
		    isalnum(den->d_name[1]) &&
		    /* We explicitly use the first char, because overview
		     * sizes appear to be not evenly distributed wrt second
		     * char.
		     */
		    (ParallelCount ? ((hexCharToInt(den->d_name[0]) % ParallelCount) ==
				      ParallelIdx) : 1)
		) {
		    DIR *dir2;

		    sprintf(tmp, "%s/%s", PatExpand(GroupHomePat), den->d_name);
		    if ((dir2 = opendir(tmp)) != NULL) {
			den_t *den2;

			while ((den2 = readdir(dir2)) != NULL) {
			    if (strncmp(den2->d_name, "over.", 5) == 0)
				ProcessOverviewFile(tmp, den2->d_name);
			}
			rewinddir(dir2);
			while ((den2 = readdir(dir2)) != NULL) {
			    /*
			     * delete junk files from previously interrupted
			     * dexpireover -R
			     */
			    if (strncmp(den2->d_name, ".data.", 6) == 0)
				DeleteJunkFile(tmp, den2->d_name);

			    /*
			     * process data. files
			     */
			    if (strncmp(den2->d_name, "data.", 5) == 0)
				ProcessOverviewFile(tmp, den2->d_name);
			}
			closedir(dir2);
		    }
		}
	    }
	    closedir(dir);
	    free(tmp);
	}
    }

    printf("Scanned %d groups, %d were bad, %d/%d resized\n",
	GoodGroups + BadGroups, 
	BadGroups, 
	ResizedGroups, 
	NoResizedGroups + ResizedGroups
    );

    /*
     * Writeback active file
     */

    if (UpdateBegArtNoOpt || OldGroups || UpdateCTSOpt) {
	int t0 = (int)time(NULL);	/* int-sized for LMTS compare */
	int t;
	int i;
	int count = 0;

	t = t0 - OldGroups * (60 * 60 * 24);

	for (i = 0; i < GHSIZE; ++i) {
	    Group *group;

	    for (group = GHash[i]; group; group = group->gr_Next) {
		/*
		 * If we have a new group not previously in the database,
		 * we only add it if SyncGroupsOpt is set.
		 */
		int add = 0;

		if (OldGroups) {
		    if (group->gr_LMTS) {
			/*
			 * Existing LMTS
			 */
			if ((int)(t - group->gr_LMTS) > 0) {
			    if (ForReal)
				KPDBDelete(KDBActive, group->gr_GroupName);
			    if (VerboseOpt)
				printf("%s: stale group deleted\n", group->gr_GroupName);
			    add = 1;
			    group->gr_State &= ~GRF_MODIFIED;	/* prevent NB update */
			} 
		    } else {
			/*
			 * no LMTS in record, add one
			 */
			if (ForReal) {
			    char tsBuf[16];
			    sprintf(tsBuf, "%08x", (int)t0);
			    KPDBWriteEncode(KDBActive, group->gr_GroupName, "LMTS", tsBuf, 0);
			    add = 1;
			}
			if (VerboseOpt)
			    printf("%s: added missing LMTS\n", group->gr_GroupName);
		    }
		}

		if (UpdateCTSOpt) {
		    if (group->gr_CTS) {
			if (ForReal) {
			    char tsBuf[16];
			    sprintf(tsBuf, "%08x", group->gr_CTS);
			    KPDBWriteEncode(KDBActive, group->gr_GroupName, "CTS", tsBuf, 0);
			}
			if (VerboseOpt)
			    printf("%s: added missing CTS\n", group->gr_GroupName);
			add = 1;
		    }
		}

		if (ForReal && (group->gr_State & GRF_MODIFIED)) {
		    if (group->gr_State & GRF_EDITEDBEG) {
			char startBuf[16];
			sprintf(startBuf, "%010d", group->gr_StartNo);
			KPDBWriteEncode(KDBActive, group->gr_GroupName, "NB", startBuf, 0);
			add = 1;
		    }
		}
		count += add;
	    }
	}
	printf("Updated article range in %d groups\n", count);
    }
    if (KDBActive)
	KPDBClose(KDBActive);

    /*
     * Close history if we had it open
     */
    if (UseExpireBySpool)
	HistoryClose();

    return(0);
}

void
DeleteJunkFile(const char *dirPath, const char *name)
{
    if (ForReal) {
	char path[1024];

	snprintf(path, sizeof(path), "%s/%s", dirPath, name);
	remove(path);
    }
}

/*
 * ProcessOverviewFile() - process over. and data. files.  All over. files
 *			   are processed first. 
 *
 *	When processing over. files, we may resize the index array (-s)
 *	and/or cleanup the file (-R).
 *
 *	When processing data. files, we typically remove whole files. 
 *	If the -R option was used, however, we rewrite the files.  We can
 *	safely copy/rename-over data. files as long as we are able to
 *	lock the associated over. file.
 */

void
ProcessOverviewFile(const char *dirPath, const char *name)
{
    long h1;
    long h2;
    long artBase;
    int type = 0;
    Group *group;
    hash_t hv;
    char path[1024];

    snprintf(path, sizeof(path), "%s/%s", dirPath, name);

    if (strncmp(name, "over.", 5) == 0)
	type = 1;
    if (strncmp(name, "data.", 5) == 0)
	type = 2;

    if (type == 0)
	return;

    if (sscanf(name + 5, "%ld.%lx.%lx", &artBase, &h1, &h2) != 3)
	return;
    hv.h1 = h1;
    hv.h2 = h2;

    if ((group = FindGroupByHash(&hv)) == NULL) {
	/*
	 * If we gave a wildcard, we can't remove stale groups because
	 * we do not have a full group list.
	 */

	if (Wild == NULL) {
	    ++BadGroups;
	    printf("Group not found, removing %s\n", path);
	    remove(path);
	}
	return;
    }
    ++GoodGroups;

    if (type == 1) {
	/*
	 * over. file	(fixed length file)
	 */
	int fd;
	OverHead oh;

	if ((fd = open(path, O_RDWR)) >= 0) {
	    if (read(fd, &oh, sizeof(oh)) == sizeof(oh) &&
		oh.oh_Version == OH_VERSION &&
		oh.oh_ByteOrder == OH_BYTEORDER
	    ) {
		if (group->gr_EndNo - group->gr_StartNo >= oh.oh_MaxArts) {
		    group->gr_StartNo = group->gr_EndNo - oh.oh_MaxArts + 1;
		    group->gr_State |= GRF_EDITEDBEG | GRF_MODIFIED;
		}

		/*
		 * Force regeneration of over. file if RewriteDataOpt, else
		 * only regenerate if ResizeOpt and a size differential.  Valid
		 * overview index sizes run in steps if the nearest higher
		 * power of 2 divided by 3.
		 *
		 */
		if (ResizeOpt > 0 || RewriteDataOpt > 0) {
		    int numArts = group->gr_EndNo - group->gr_StartNo + 1;
		    int maxArts = oh.oh_MaxArts;

		    if (numArts < maxArts / 2)
			numArts = maxArts - nearestPower(maxArts) / 3;
		    else if (numArts > maxArts * 2 / 3)
			numArts = maxArts + nearestPower(maxArts) / 3;
		    else
			numArts = maxArts;

		    /*
		     * The minimum is somewhat contrived, but if we can
		     * fit the index file into a fragment for unused groups
		     * we save a considerable amount of space.
		     */

		    if (numArts < 24)
			numArts = 24;

		    if (numArts != maxArts || RewriteDataOpt > 0)
			ResizeGroup(group, fd, &oh, numArts);
		}

		if (UseExpireBySpool)
		    ExpireBySpool(group, fd, &oh);
		else if (UseExpireFromFile)
		    ExpireFromFile(group, fd, &oh);
		else
		    ExpireByDays(group, fd, &oh);

		/*
		 * Rewrite data files associated with over. file if -R.
		 */
		if (RewriteDataOpt > 0)
		    RewriteData(group, fd, &oh, dirPath, h1, h2);

	    } else {
		printf("group %s, file \"%s\" bad file header\n",
		    group->gr_GroupName,
		    path
		);
	    }
	    close(fd);
	}
    } else {
	/*
	 * data. file, modulo OD_HARTS.  OD_HARTS constant in second
	 * part of conditional is a fudge to make 100% sure we do not
	 * delete a brand new data file.
	 */
	if (artBase + OD_HARTS <= group->gr_StartNo ||
	    artBase >= group->gr_EndNo + OD_HARTS
	) {
	    printf("Deleting stale overview data %s: %s\n",
		group->gr_GroupName,
		path
	    );
	    remove(path);
	} 
    }
}

char *
allocTmpCopy(const char *buf, int bufLen)
{
    static char *SaveAry[8];
    static int SaveCnt;
    char **pptr;

    SaveCnt = (SaveCnt + 1) % arysize(SaveAry);
    pptr = &SaveAry[SaveCnt];
    if (*pptr)
	free(*pptr);
    *pptr = malloc(bufLen + 1);
    memcpy(*pptr, buf, bufLen);
    (*pptr)[bufLen] = 0;
    return(*pptr);
}

Group *
EnterGroup(const char *groupName, int begNo, int endNo, int lmts, int cts, const char *flags)
{
    hash_t hv = hhash(groupName);
    Group **pgroup = &GHash[hv.h1 & GHMASK];
    Group *group;

    while ((group = *pgroup) != NULL) {
	if (strcmp(groupName, group->gr_GroupName) == 0)
	    break;
	pgroup = &group->gr_Next;
    }
    if (group == NULL) {
	*pgroup = group = calloc(sizeof(Group) + strlen(groupName) + 1, 1);
	group->gr_State = GRF_NEW;
	group->gr_GroupName = (char *)(group + 1);
	group->gr_Hv = hv;
	strcpy(group->gr_GroupName, groupName);
    }

    /*
     * update fields
     */
    if (begNo >= 0) {
	group->gr_State |= GRF_STARTNO;
	if (group->gr_StartNo != begNo) {
	    group->gr_State |= GRF_MODIFIED;
	    group->gr_StartNo = begNo;
	}
    }
    if (endNo >= 0) {
	group->gr_State |= GRF_ENDNO;
	if (group->gr_EndNo != endNo) {
	    group->gr_EndNo = endNo;
	    group->gr_State |= GRF_MODIFIED;
	}
    }
    group->gr_LMTS = lmts;

    if (cts) {
	group->gr_CTS = cts;
    }

    if (flags) {
	group->gr_State |= GRF_FLAGS;
	if (SetField(&group->gr_Flags, flags))
	    group->gr_State |= GRF_MODIFIED;
    }
    return(group);
}

Group *
FindGroupByHash(hash_t *hv)
{
    Group *group;

    for (group = GHash[hv->h1 & GHMASK]; group; group = group->gr_Next) {
	if (group->gr_Hv.h1 == hv->h1 &&
	    group->gr_Hv.h2 == hv->h2
	) {
	    break;
	}
    }
    return(group);
}

int
SetField(char **pptr, const char *str)
{
    if (*pptr && strcmp(*pptr, str) == 0)
	return(0);
    if (*pptr)
	free(*pptr);
    *pptr = strcpy(malloc(strlen(str) + 1), str);
    return(1);
}

/*
 * Scan overview records from beginning article to ending article
 */

void
ExpireByDays(Group *group, int fd, OverHead *oh)
{
    const OverArt *oaBase;
    struct stat st;
    int count = 0;
    int jumped = 0;
    int expired = 0;
    int canceled = 0;
    int stale = 0;
    int n;
    int expireSecs = -1;
    time_t t = time(NULL);

    if (fstat(fd, &st) != 0)
	return;

    /*
     * Figure out expireSecs
     */
    {
	if ((expireSecs = GetOverExpire(group->gr_GroupName)) == 0)
	    expireSecs = -1;
    }

    /*
     * Calculate number of overview records
     */

    n = (st.st_size - oh->oh_HeadSize) / sizeof(OverArt);

    oaBase = xmap(NULL, n * sizeof(OverArt), PROT_READ, MAP_SHARED, fd, oh->oh_HeadSize);
    if (oaBase == NULL) {
	fprintf(stderr, "Unable to xmap over.* file for group %s\n", group->gr_GroupName);
	return;
    }

    /*
     * Delete expired overview
     */

    {
	int i;

	for (i = 0; i < n; ++i) {
	    const OverArt *oa = &oaBase[i];

	    if (oa->oa_ArtNo > 0) {
		int dt = (int)(t - oa->oa_TimeRcvd);

		if (VerboseOpt > 2)
		    printf("DT %d/%d %08lx %08lx\n", dt, expireSecs, (long)t, (long)oa->oa_TimeRcvd);

		if (expireSecs > 0 && 
		    (dt > expireSecs || dt < -(60 * 60 * 24))
		) {
		    OverArt copy = *oa;

		    copy.oa_ArtNo = -2;		/* EXPIRED */
		    if (ForReal) {
			lseek(fd, oh->oh_HeadSize + i * sizeof(OverArt), 0);
			write(fd, &copy, sizeof(OverArt));
		    }
		    ++count;
		}
	    }
	}
    }

    {
	/*
	 * Update history file begin sequence number
	 */
	while (group->gr_StartNo < group->gr_EndNo) {
	    int i = (group->gr_StartNo & 0x7FFFFFFF) % n;
	    const OverArt *oa = &oaBase[i];

	    if (VerboseOpt > 2)
		printf("test %d vs %d (i = %d)\n", oa->oa_ArtNo, group->gr_StartNo, i);
	    if (oa->oa_ArtNo == group->gr_StartNo)
		break;
	    ++group->gr_StartNo;
	    switch(oa->oa_ArtNo) {
	    case -2:
		++expired;
		break;
	    case -1:
		++canceled;
		break;
	    default:
		++stale;
		break;
	    }
	    ++jumped;
	}
	if (jumped)
	    group->gr_State |= GRF_EDITEDBEG | GRF_MODIFIED;
    }
    if (VerboseOpt && (jumped || count)) {
	printf("expired %-4d NB += %-4d (%3d can, %3d stale, %3d exprd) left %-4d expires in %6.2f days, grp=%s\n", 
	    count,
	    jumped,
	    canceled, stale, expired,
	    group->gr_EndNo - group->gr_StartNo,
	    ((expireSecs>0) ? (double)expireSecs / (60.0 * 60.0 * 24.0) :-1.0),
	    group->gr_GroupName
	);
    }
    xunmap((void *)oaBase, n * sizeof(OverArt));
}

/*
 * Scan overview records from beginning article to ending article
 *
 * Expire by checking the history file for the expired bit
 */

void
ExpireBySpool(Group *group, int fd, OverHead *oh)
{
    const OverArt *oaBase;
    struct stat st;
    int count = 0;
    int jumped = 0;
    int expired = 0;
    int canceled = 0;
    int stale = 0;
    int n;

    if (fstat(fd, &st) != 0)
	return;

    /*
     * Calculate number of overview records
     */

    n = (st.st_size - oh->oh_HeadSize) / sizeof(OverArt);

    oaBase = xmap(NULL, n * sizeof(OverArt), PROT_READ, MAP_SHARED, fd, oh->oh_HeadSize);
    if (oaBase == NULL) {
	fprintf(stderr, "Unable to xmap over.* file for group %s\n", group->gr_GroupName);
	return;
    }

    /*
     * Delete expired overview
     */

    {
	int i;

	for (i = 0; i < n; ++i) {
	    const OverArt *oa = &oaBase[i];

	    if (oa->oa_ArtNo > 0) {
		hash_t dh = oa->oa_MsgHash;
		History dh_lookup;

		/*
		 * Make sure that the history entry exists. It's possible
		 * that dexpire already removed the article, and dhistory
		 * was cleaned, so the msgID doesn't exist.
		 *
		 * If the article does not exist or is expired, then expire
		 * its overview entry as well.
		 */
		if ((HistoryLookupByHash(dh, &dh_lookup) == -1) ||
		    (H_EXP(dh_lookup.exp) == H_EXP((unsigned short)-1))
		    ) {
		    OverArt copy = *oa;

		    copy.oa_ArtNo = -2;		/* EXPIRED */
		    if (ForReal) {
			lseek(fd, oh->oh_HeadSize + i * sizeof(OverArt), 0);
			write(fd, &copy, sizeof(OverArt));
		    }
		    ++count;
		}
	    }
	}
    }

    {
	/*
	 * Update history file begin sequence number
	 */
	while (group->gr_StartNo < group->gr_EndNo) {
	    int i = (group->gr_StartNo & 0x7FFFFFFF) % n;
	    const OverArt *oa = &oaBase[i];

	    if (VerboseOpt > 2)
		printf("test %d vs %d (i = %d)\n", oa->oa_ArtNo, group->gr_StartNo, i);
	    if (oa->oa_ArtNo == group->gr_StartNo)
		break;
	    ++group->gr_StartNo;
	    switch(oa->oa_ArtNo) {
	    case -2:
		++expired;
		break;
	    case -1:
		++canceled;
		break;
	    default:
		++stale;
		break;
	    }
	    ++jumped;
	}
	if (jumped)
	    group->gr_State |= GRF_EDITEDBEG | GRF_MODIFIED;
    }
    if (VerboseOpt && (jumped || count)) {
	printf("expired %-4d NB += %-4d (%3d can, %3d stale, %3d exprd) left %-4d expires by spool, grp=%s\n", 
	    count,
	    jumped,
	    canceled, stale, expired,
	    group->gr_EndNo - group->gr_StartNo,
	    group->gr_GroupName
	);
    }
    xunmap((void *)oaBase, n * sizeof(OverArt));
}

/*
 * Similar to ExpireBySpool but uses a file generated by dexpire as a list
 * of msgid hashes of messages which are expired.
 * 
 * The 'x' parameter in dexpire.ctl is also checked, to punt articles which
 * have been laying in overview for a long time and somehow escaped being
 * written to the dexpover.dat file.
 */

void
ExpireFromFile(Group *group, int fd, OverHead *oh)
{
    const OverArt *oaBase;
    struct stat st;
    int count = 0;
    int jumped = 0;
    int expired = 0;
    int canceled = 0;
    int stale = 0;
    int n;
    int expireSecs = -1;
    time_t t = time(NULL);

    if (fstat(fd, &st) != 0)
	return;

    /*
     * Figure out expireSecs
     */
    {
	if ((expireSecs = GetOverExpire(group->gr_GroupName)) == 0)
	    expireSecs = -1;
    }

    /*
     * Calculate number of overview records
     */

    n = (st.st_size - oh->oh_HeadSize) / sizeof(OverArt);

    oaBase = xmap(NULL, n * sizeof(OverArt), PROT_READ, MAP_SHARED, fd, oh->oh_HeadSize);
    if (oaBase == NULL) {
	fprintf(stderr, "Unable to xmap over.* file for group %s\n", group->gr_GroupName);
	return;
    }

    /*
     * Delete expired overview
     */

    {
	int i;

	for (i = 0; i < n; ++i) {
	    const OverArt *oa = &oaBase[i];

	    if (oa->oa_ArtNo > 0) {
		int dt = (int)(t - oa->oa_TimeRcvd);
		hash_t *hv = (hash_t *)(&(oa->oa_MsgHash));

		if (VerboseOpt > 2)
		    printf("DT %d/%d %08lx %08lx\n", dt, expireSecs, (long)t, (long)oa->oa_TimeRcvd);

		if ((expOverListCheckExpired(hv) == 0) ||
		    ((expireSecs > 0) && 
		     (dt > expireSecs || dt < -(60 * 60 * 24)))
		) {
		    OverArt copy = *oa;

		    copy.oa_ArtNo = -2;		/* EXPIRED */
		    if (ForReal) {
			lseek(fd, oh->oh_HeadSize + i * sizeof(OverArt), 0);
			write(fd, &copy, sizeof(OverArt));
		    }
		    ++count;
		}
	    }
	}
    }

    {
	/*
	 * Update history file begin sequence number
	 */
	while (group->gr_StartNo < group->gr_EndNo) {
	    int i = (group->gr_StartNo & 0x7FFFFFFF) % n;
	    const OverArt *oa = &oaBase[i];

	    if (VerboseOpt > 2)
		printf("test %d vs %d (i = %d)\n", oa->oa_ArtNo, group->gr_StartNo, i);
	    if (oa->oa_ArtNo == group->gr_StartNo)
		break;
	    ++group->gr_StartNo;
	    switch(oa->oa_ArtNo) {
	    case -2:
		++expired;
		break;
	    case -1:
		++canceled;
		break;
	    default:
		++stale;
		break;
	    }
	    ++jumped;
	}
	if (jumped)
	    group->gr_State |= GRF_EDITEDBEG | GRF_MODIFIED;
    }
    if (VerboseOpt && (jumped || count)) {
	printf("expired %-4d NB += %-4d (%3d can, %3d stale, %3d exprd) left %-4d expires in %6.2f days, grp=%s\n", 
	    count,
	    jumped,
	    canceled, stale, expired,
	    group->gr_EndNo - group->gr_StartNo,
	    ((expireSecs>0) ? (double)expireSecs / (60.0 * 60.0 * 24.0) :-1.0),
	    group->gr_GroupName
	);
    }
    xunmap((void *)oaBase, n * sizeof(OverArt));
}

/*
 * Rewrite the data.* files associated with an overview file
 */


void
RewriteData(Group *group, int fd, OverHead *oh, const char *dirPath, int h1, int h2)
{
    const OverArt *oaBase;
    OverArt *obBase;
    ReplaceFile   *rfBase = NULL;
    struct stat st;
    int i;
    int n;
    int ok = 1;

    /*
     * If not for real, do not do anything
     */

    if (ForReal == 0)
	return;

    /*
     * We need an exclusive lock for this to work, and if the
     * number of links has fallen to zero we are in a race with
     * another expireover.
     */

    if (hflock(fd, 4, XLOCK_EX|XLOCK_NB) < 0)
	return;

    if (fstat(fd, &st) != 0 || st.st_nlink == 0) {
	hflock(fd, 4, XLOCK_UN);
	return;
    }

    /*
     * Calculate number of overview records
     */

    n = (st.st_size - oh->oh_HeadSize) / sizeof(OverArt);

    oaBase = xmap(NULL, n * sizeof(OverArt), PROT_READ, MAP_SHARED, fd, oh->oh_HeadSize);
    if (oaBase == NULL) {
	fprintf(stderr, "Unable to xmap over.* file for group %s\n", group->gr_GroupName);
	hflock(fd, 4, XLOCK_UN);
	return;
    }

    obBase = calloc(n, sizeof(OverArt));

    /*
     * Scan entire overview index, building replacement data.* files on the
     * fly.  We have already cleaned it up, so there should not be any
     * garbarge in the index.
     */

    {
	const char *cacheBase = NULL;
	int cacheSize = 0;
	int cacheArtBase = -1;

	for (i = 0; i < n; ++i) {
	    const OverArt *oa = &oaBase[i];

	    if (oa->oa_ArtNo <= 0)
		continue;

	    if (
		cacheArtBase == -1 ||
		oa->oa_ArtNo < cacheArtBase ||
		oa->oa_ArtNo >= cacheArtBase + OD_HARTS
	    ) {
		char path[1024];
		struct stat st;
		int cfd;

		if (cacheBase != NULL) {
		    xunmap((void *)cacheBase, cacheSize);
		    cacheSize = 0;
		    cacheBase = NULL;
		}
		cacheArtBase = oa->oa_ArtNo & ~OD_HMASK;
		snprintf(path, sizeof(path), "%s/data.%d.%08x.%08x", dirPath, cacheArtBase, h1, h2);
		if ((cfd = open(path, O_RDONLY)) >= 0 &&
		    fstat(cfd, &st) == 0
		) {
		    cacheBase = xmap(NULL, st.st_size, PROT_READ, MAP_SHARED, cfd, 0);
		    cacheSize = st.st_size;
		}
		if (cfd >= 0)
		    close(cfd);
	    }
	    rewriteDataFile(group, &rfBase, cacheBase, cacheSize, oa, &obBase[i], dirPath, h1, h2);
	}
	if (cacheBase != NULL) {
	    xunmap((void *)cacheBase, cacheSize);
	    cacheSize = 0;
	    cacheBase = NULL;
	}
    }

    /*
     * Remove original data.* files so we can replace the over. file
     * safely.  If we are broken after this point, portions or all of the
     * overview information relating to this group will be lost.
     */
    {
	ReplaceFile *rf = rfBase;

	while (rf) {
	    char path1[1024];
	    snprintf(path1, sizeof(path1), "%s/data.%d.%08x.%08x", dirPath, rf->rf_ArtBase, h1, h2);
	    remove(path1);
	    rf = rf->rf_Next;
	}
    }

    /*
     * Replace the over. file.  If an error occurs we pretty much have to
     * blow the file away because we already deleted the data. files.
     */

    {
	char path1[1024];
	char path2[1024];
	int ovFd;

	snprintf(path1, sizeof(path1), "%s/.over.0.%08x.%08x", dirPath, h1, h2);
	snprintf(path2, sizeof(path2), "%s/over.0.%08x.%08x", dirPath, h1, h2);

	ovFd = open(path1, O_RDWR|O_CREAT|O_TRUNC, 0644);
	if (ovFd >= 0) {
	    if (write(ovFd, oh, oh->oh_HeadSize) != oh->oh_HeadSize)
		ok = 0;
	    if (write(ovFd, obBase, n*sizeof(OverArt)) != n*sizeof(OverArt))
		ok = 0;
	    close(ovFd);
	    if (ok) {
		if (rename(path1, path2) < 0)
		    ok = 0;
	    }
	} else {
	    ok = 0;
	}
	if (ok == 0) {
	    printf("Rewrite of %s over. file failed, removing\n", group->gr_GroupName);
	    remove(path2);
	}
    }

    /*
     * Rename the temporary .data files to the real ones.
     */

    while (rfBase) {
	char path1[1024];
	char path2[1024];
	ReplaceFile *rf = rfBase;

	snprintf(path1, sizeof(path1), "%s/.data.%d.%08x.%08x", dirPath, rf->rf_ArtBase, h1, h2);
	snprintf(path2, sizeof(path2), "%s/data.%d.%08x.%08x", dirPath, rf->rf_ArtBase, h1, h2);

	if (rename(path1, path2) < 0)
	    remove(path2);

	if (rf->rf_Fd >= 0)
	    close(rf->rf_Fd);

	rfBase = rf->rf_Next;
	free(rf);
    }

    /*
     * cleanup
     */
    hflock(fd, 4, XLOCK_UN);
    xunmap((void *)oaBase, n * sizeof(OverArt));
    free(obBase);
}

/*
 * rewriteDataFile() - rewrite the file from cacheFd/oa-params to
 *		       ob, maintaining the ReplaceFile list.
 */

void
rewriteDataFile(Group *group, ReplaceFile **prf, const char *cacheBase, int cacheSize, const OverArt *oa, OverArt *ob, const char *dirPath, int h1, int h2)
{
    int artBase = oa->oa_ArtNo & ~OD_HMASK;
    ReplaceFile *rf;

    /*
     * Locate the artBase file in our rewrite 'cache'.  Note: it's possible to
     * negatively cache an ArtBase, where rf_Fd will be < 0.
     */

    while ((rf = *prf) != NULL) {
	if (rf->rf_ArtBase == artBase)
	    break;
	prf = &rf->rf_Next;
    }
    if (rf == NULL) {
	char path[1024];
	rf = calloc(1, sizeof(ReplaceFile));

	snprintf(path, sizeof(path), "%s/.data.%d.%08x.%08x", dirPath, artBase, h1, h2);
	rf->rf_Fd = open(path, O_RDWR|O_CREAT|O_TRUNC, 0644);
	rf->rf_ArtBase = artBase;
	*prf = rf;
    }

    /*
     * copy data from cacheFd/oa-params to rf->rf_Fd and fill in ob.  It is
     * possible for cacheBase to be NULL if there is no valid data source
     * for the file.
     */
    if (
	cacheBase != NULL && 
	rf->rf_Fd >= 0 && 
	oa->oa_SeekPos >= 0 &&
	oa->oa_Bytes > 0 &&
	oa->oa_SeekPos + oa->oa_Bytes < cacheSize &&
	cacheBase[oa->oa_SeekPos + oa->oa_Bytes] == 0 && 	  /* guard */
	(oa->oa_SeekPos == 0 || cacheBase[oa->oa_SeekPos-1] == 0) /* guard */

    ) {
	*ob = *oa;
	ob->oa_SeekPos = lseek(rf->rf_Fd, 0L, 1);
	if (write(rf->rf_Fd, cacheBase + oa->oa_SeekPos, oa->oa_Bytes + 1) != oa->oa_Bytes + 1) {
	    lseek(rf->rf_Fd, ob->oa_SeekPos, 0);
	    ftruncate(rf->rf_Fd, ob->oa_SeekPos);
	    ob->oa_ArtNo = -2;
	    ob->oa_SeekPos = 0;
	    ob->oa_Bytes = 0;
	    printf("copy failed %s:%d, write error\n", group->gr_GroupName, oa->oa_ArtNo);
	}
    } else if (oa->oa_SeekPos == -1) {
	; /* do nothing */
    } else {
	printf("copy failed %s:%d, %s\n",
	    group->gr_GroupName,
	    oa->oa_ArtNo,
	    ((cacheBase == NULL) ? "source-missing" :
	    (rf->rf_Fd < 0) ? "dest-failure" :
	    (oa->oa_Bytes <= 0) ? "source-bounds1" :
	    (oa->oa_SeekPos + oa->oa_Bytes >= cacheSize) ? "source-bounds2" :
	    "source-corrupt")
	);
    }
}

/*
 * Resize a newsgroup's over.* index file, if possible.  If called via the -s
 * option, only groups that need resizing are rebuilt.  If called via the -R
 * option, the group is always rebuild AND the associated data files are 
 * rebuilt.
 */

void
ResizeGroup(Group *group, int fd, OverHead *oh, int newSize)
{
    int oldSize = oh->oh_MaxArts;
    struct stat st;

    if (hflock(fd, 4, XLOCK_EX|XLOCK_NB) < 0) {
	++NoResizedGroups;
	if (VerboseOpt) {
	    printf("resize maxArts from %d to %d failed, file in use grp=%s\n", oldSize, newSize, group->gr_GroupName);
	}
	return;
    }

    if (fstat(fd, &st) < 0 || st.st_nlink == 0) {
	++NoResizedGroups;
	hflock(fd, 4, XLOCK_UN);
	if (VerboseOpt) {
	    printf("resize maxArts from %d to %d failed, file in use grp=%s\n", oldSize, newSize, group->gr_GroupName);
	}
	return;
    }

    /*
     * Resize a group.  We 'own' the overview index file (because other 
     * processes must get a shared lock on offset 4 and we got the exclusive
     * lock).  We can do anything we want with it, but we must rewrite the
     * file in-place to maintain lock consistancy.
     *
     * Resize the group by copying the existing data into an array, validating
     * it based on known information, then putting it back.
     */

    {
	OverArt *oa = calloc(oldSize, sizeof(OverArt));
	OverArt *ob = calloc(newSize, sizeof(OverArt));
	int n;
	int i;

	lseek(fd, oh->oh_HeadSize, 0);
	n = read(fd, oa, oldSize * sizeof(OverArt)) / sizeof(OverArt);

	if (group->gr_EndNo - group->gr_StartNo >= newSize) {
	    group->gr_StartNo = group->gr_EndNo - newSize + 1;
	    group->gr_State |= GRF_EDITEDBEG | GRF_MODIFIED;
	}

	/*
	 * Run through and copy validated entries
	 */

	for (i = group->gr_StartNo; i <= group->gr_EndNo; ++i) {
	    OverArt *op = &oa[(i & 0x7FFFFFFF) % oh->oh_MaxArts];

	    if (op->oa_ArtNo == i)
		ob[(i & 0x7FFFFFFF) % newSize] = *op;
	}

	/*
	 * Rewrite.  Only if ForReal.  Note that oh_MaxArts isn't updated 
	 * (for use in other parts of dexpireover) if not for real.
	 */

	if (ForReal) {
	    lseek(fd, 0L, 0);
	    ftruncate(fd, oh->oh_HeadSize + newSize * sizeof(OverArt));
	    oh->oh_MaxArts = newSize;
	    write(fd, oh, sizeof(OverHead));
	    write(fd, ob, newSize * sizeof(OverArt));
	}

	free(ob);
	free(oa);
    }
    hflock(fd, 4, XLOCK_UN);
    ++ResizedGroups;

    if (VerboseOpt && oldSize != newSize) {
	printf("resized maxArts from %d to %d grp=%s\n", oldSize, newSize, group->gr_GroupName);
    }
}

int
nearestPower(int n)
{
    int i;

    for (i = 1; i < n; i <<= 1)
	;
    return(i);
}

void
ReadDExpOverList()
{
    FILE *DExpOverList;
    hash_t read_buffer[DEXPOVER_READ_BUFFER_SIZE];
    int i, n;
    char path[128];

    dexpover_msgid_hash =
	(struct bucket_t *)malloc(DEXPOVER_HASH_SIZE * sizeof(struct bucket_t));
    for(i=0; i<DEXPOVER_HASH_SIZE; i++) {
	dexpover_msgid_hash[i].valid=0;
	dexpover_msgid_hash[i].next=NULL;
    }

    snprintf(path, 128, "%s.bak", PatDbExpand(DExpireOverListPat));
    rename(PatDbExpand(DExpireOverListPat), path);
    DExpOverList = fopen(path, "r");

    if(DExpOverList == NULL) return;

    while((n = fread(read_buffer, sizeof(hash_t),
		     DEXPOVER_READ_BUFFER_SIZE, DExpOverList))) {
	for(i=0; i<n; i++) {
	    int hashval;
	    struct bucket_t *chain;

	    hashval = (read_buffer[i].h1)&(DEXPOVER_HASH_SIZE-1);
	    chain = &dexpover_msgid_hash[hashval];

	    while((chain->valid == 1) && (chain->next != NULL))
		chain = chain->next;

	    if(chain->valid == 1) {
		chain->next = (struct bucket_t *)malloc(sizeof(struct bucket_t));
		chain = chain->next;
	    }

	    chain->valid = 1;
	    chain->hash_item = read_buffer[i];
	    chain->next = NULL;
	}
    }

    fclose(DExpOverList);
}

int
hexCharToInt(char c)
{
    return
	(c == '0') ? 0 :
	(c == '1') ? 1 :
	(c == '2') ? 2 :
	(c == '3') ? 3 :
	(c == '4') ? 4 :
	(c == '5') ? 5 :
	(c == '6') ? 6 :
	(c == '7') ? 7 :
	(c == '8') ? 8 :
	(c == '9') ? 9 :

	(c == 'a') ? 10 :
	(c == 'b') ? 11 :
	(c == 'c') ? 12 :
	(c == 'd') ? 13 :
	(c == 'e') ? 14 :
	(c == 'f') ? 15 :

	(c == 'A') ? 10 :
	(c == 'B') ? 11 :
	(c == 'C') ? 12 :
	(c == 'D') ? 13 :
	(c == 'E') ? 14 :
	(c == 'F') ? 15 :

	-1;
}

int
expOverListCheckExpired(hash_t *hv)
{
    int hashval;
    bucket_t *chain;

    hashval = (hv->h1)&(DEXPOVER_HASH_SIZE-1);
    chain = &dexpover_msgid_hash[hashval];

    while(chain && chain->valid) {
	if((chain->hash_item.h1 == hv->h1) &&
	   (chain->hash_item.h2 == hv->h2)) {
	    return 0;
	}
	chain = chain->next;
    }

    return -1;
}

