
/*
 * DEXPIRE.C	- diablo expire.
 *
 *	remove directories in time order until sufficient space is 
 *	available.
 *
 *	When sufficient space is available, scan remaining files and
 *	then scan history and set the expired flags as appropriate.
 *
 *	This algorithm is relatively self-repairing, so dclean is no
 *	longer required.
 *
 * (c)Copyright 1997, Matthew Dillon, All Rights Reserved.  Refer to
 *    the COPYRIGHT file in the base directory of this distribution 
 *    for specific rights granted.
 *
 * Modification by Nickolai Zeldovich to store msgid hashes when
 * expiring articles to allow for better overview expiration.
 */

#include "defs.h"
#include <sys/param.h>
#ifndef _AIX
#include <sys/mount.h>
#endif
#ifdef _AIX
#include <sys/statfs.h>
#endif

#if USE_SYSV_STATFS
#include <sys/statfs.h>
#define f_bavail f_bfree
#endif

#if USE_SUN_STATVFS
#include <sys/statvfs.h>	/* god knows if this hack will work */
#define f_bsize	f_frsize	/* god knows if this hack will work */
#define statfs statvfs
#endif

#if USE_SYS_VFS			/* this is mainly for linux	*/
#include <sys/vfs.h>
#endif

int ROpt;
int FreeSpaceTarget;
int LotsOfFreeSpace = 1;
int ParallelOpt = 1;
int VerboseOpt = 0;
int NotForReal = 0;
int SoftUpdates = 0;
int UpdateHistoryOpt = 1;

int SkipOpt = 1;
int IndexOpt = 0;
int HistoryUpdateOpt = 1;

int WriteHashesToFileOpt = 0;

int NotEnoughFreeSpace(void);
int FindNode(const char *path, int createMe);
int UpdateHistory(void);

int
strSort(const void *s1, const void *s2)
{
    char *str1 = *(char **)s1;
    char *str2 = *(char **)s2;
    return(strcmp(str1, str2));
}

int
main(int ac, char **av)
{
    int n;
    int countAttempt = 10;

    LoadDiabloConfig(ac, av);

    for (n = 1; n < ac; ++n) {
	char *ptr = av[n];

	if (*ptr == '-') {
	    ptr += 2;
	    switch(ptr[-1]) {
	    case 'C':		/* parsed by LoadDiabloConfig */
		if (*ptr == 0)
		    ++n;
		break;
	    case 's':
		if (*ptr)
		    SoftUpdates = strtol(ptr, NULL, 0);
		else
		    SoftUpdates = 1;
		break;
	    case 'a':
		fprintf(stderr, "Note: option %s currently does not do anything\n", ptr - 2);
		break;
	    case 'r':
		/* in MB */
		FreeSpaceTarget = strtol((*ptr ? ptr : av[++n]), NULL, 0);
		ROpt = 1;
		break;
	    case 'v':
		VerboseOpt = (*ptr) ? strtol(ptr, NULL, 0) : 1;
		break;
	    case 'n':
		NotForReal = 1;
		break;
	    case 'k':
		HistoryUpdateOpt = 0;
		break;
	    case 'q':
	    case 't':
	    case 'f':
		/* no longer used */
		fprintf(stderr, "Note: option %s no longer does anything\n",
		    ptr - 2
		);
		break;
	    case 'p':
		(void)strtol((*ptr ? ptr : av[++n]), NULL, 0);
		fprintf(stderr, "Note: option %s no longer does anything\n",
		    ptr - 2
		);
		break;
	    case 'o':
		WriteHashesToFileOpt = 1;
		break;
	    case 'h':
		if (*ptr)
		    UpdateHistoryOpt = strtol(ptr, NULL, 0);
		else
		    UpdateHistoryOpt = 1;	/* note: already 1 by default */
		break;
	    default:
		fprintf(stderr, "Illegal option: %s\n", ptr - 2);
		exit(1);
	    }
	} else {
	    fprintf(stderr, "Illegal argument: %s\n", ptr);
	    exit(1);
	}
    }

    /*
     * this isn't an error, but a request to list 
     * valid arguments, then exit.
     */

    if (ac == 1) {
	printf("dexpire [-f] [-v] [-t] [-r0] [-r freespacetarget]\n");
	exit(0);
    }

    /*
     * Removal Scan
     */

    while (countAttempt > 0 && NotEnoughFreeSpace()) {
	/*
	 * Get into spool directory
	 */
	DIR *dir;
	int i;
	int daryMax = 16;
	int daryIdx = 0;
	char **dary = malloc(daryMax * sizeof(char *));

	if (!dary) {
	  fprintf(stderr, "unable to malloc in removal scan\n");
	  exit(1);
	}
	if (chdir(PatExpand(SpoolHomePat)) != 0) {
	    fprintf(stderr, "Unable to chdir(%s)\n", PatExpand(SpoolHomePat));
	    exit(1);
	}

	/*
	 * Scan directory for queue dirs (on 10 minute boundries)
	 */

	if ((dir = opendir(".")) != NULL) {
	    den_t *den;

	    while ((den = readdir(dir)) != NULL) {
		if ((den->d_name[0] != 'D' && den->d_name[0] != 'A') ||
		    den->d_name[1] != '.'
		) {
		    continue;
		}
		if (daryIdx == daryMax) {
		    daryMax = daryMax * 2;
		    dary = realloc(dary, daryMax * sizeof(char *));
		    if (!dary) {
		      fprintf(stderr, "unable to realloc in scan for queue dirs\n");
		      exit(1);
		    }
		}
		dary[daryIdx] = malloc(strlen(den->d_name) + 1);
		if (!dary[daryIdx]) {
		  fprintf(stderr, "unable to malloc in scan for queue dirs\n");
		  exit(1);
		}
		strcpy(dary[daryIdx], den->d_name);
		++daryIdx;
	    }
	    closedir(dir);
	}

	/*
	 * Sort directory	'0' <= 'a' so we can safely sort numerically
	 *			with strcmp.
	 */

	qsort(dary, daryIdx, sizeof(char *), strSort);

	if (daryIdx)
	    printf("%d directories (%s - %s)\n", daryIdx, dary[0], dary[daryIdx-1]);

	if (VerboseOpt) {
	    int i;

	    for (i = 0; i < daryIdx; ++i) {
		time_t t = strtoul(dary[i] + 2, NULL, 16) * 60;
		printf("Scan %s %s", dary[i], ctime(&t));
	    }
	    printf("\n");
	}

	/*
	 * Remove files a directory at a time.  The spool directories are
	 * named A.* or D.*.  We remove a directory by renaming it from D.
	 * to A., removing the files, then removing the directory.  The
	 * rename is required to prevent Diablo from recreating files in
	 * the directory (and thus potentially corrupting articles by 
	 * reusing history keys).
	 */

	if (NotForReal)
	    exit(0);

	{
	    int count = 0;
	    int ccount = 0;

	    printf("file removal pass\n");

	    for (i = 0; i < daryIdx && NotEnoughFreeSpace(); ++i) {
		char tmp[256];

		if (chdir(PatExpand(SpoolHomePat)) != 0) {
		    printf("unable to chdir to spool home! %s\n", PatExpand(SpoolHomePat));
		    break;
		}
		if (dary[i][0] == 'D') {
		    strcpy(tmp, dary[i]);
		    dary[i][0] = 'A';
		    if (rename(tmp, dary[i]) < 0) {
			printf("unable to rename directory: %s\n", dary[i]);
			continue;
		    }
		}

		if (chdir(dary[i]) != 0) {
		    printf("unable to scan %s\n", dary[i]);
		    continue;
		}

		if (VerboseOpt) {
		    printf("Removing %s", dary[i]);
		}

		if ((dir = opendir(".")) != NULL) {
		    den_t *den;

		    while ((den = readdir(dir)) != NULL) {
			if ((den->d_name[0] == 'B' && den->d_name[1]=='.') ||
			    (strlen(den->d_name) > 8 && den->d_name[8] =='.')
			) {
			    remove(den->d_name);
			    ++count;
			} else
			if (WildCmp("*.core", den->d_name) == 0 ||
			    strcmp(den->d_name, "core") == 0
			) {
			    remove(den->d_name);
			    ++ccount;
			}
		    }
		    closedir(dir);
		}
		chdir(PatExpand(SpoolHomePat));
		printf("    -%s\n", dary[i]);
		errno = 0;

		if (rmdir(dary[i]) < 0) {
		    fprintf(stderr, "Unable to rmdir(\"%s\"): %s\n", 
			dary[i],
			strerror(errno)
		    );
		    /* do not exit */
		}
	    }
	    printf("%d files removed", count);
	    if (ccount)
		printf(", and %d core files removed!", ccount);
	    printf("\n");
	}

	/*
	 * clean up dary, try again
	 */

	for (i = 0; i < daryIdx; ++i)
	   free(dary[i]);
	free(dary);

	sleep(1);
	--countAttempt;
    }

    if (countAttempt == 0) {
	fprintf(stderr, "DExpire giving up on removal pass, there are serious problems with the spool\n");
    }

    /*
     * History file update scan
     */

    if (chdir(PatExpand(SpoolHomePat)) != 0) {
	fprintf(stderr, "Unable to chdir(%s)\n", PatExpand(SpoolHomePat));
	exit(1);
    }

    if (UpdateHistoryOpt && NotForReal == 0) {
	int n;
	fprintf(stderr, "DExpire updating history file\n");
	n = UpdateHistory();
	fprintf(stderr, "DExpire history file update complete, %d articles marked expired\n", n);
    } else {
	fprintf(stderr, "DExpire history file will not be updated\n");
    }

    return(0);
}

int
UpdateHistory(void)
{
    uint32 startGmt = time(NULL) / 60;	/* time_t in minutes */
    uint32 countExp = 0;
    FILE *DExpOverList = NULL;

    /*
     * Write expired article msgid hashes to a file if requested.
     */

    if (WriteHashesToFileOpt == 1)
	DExpOverList = fopen(PatDbExpand(DExpireOverListPat), "a");

    /*
     * scan all directories in the spool.   Expire history records by
     * directory.  We can't expire history records by file anymore 
     * because 'reader mode' expire may create new files with 'old' gmt
     * times.
     */

    {
	DIR *d1;

	if ((d1 = opendir(".")) != NULL) {
	    den_t *den1;

	    while ((den1 = readdir(d1)) != NULL) {
#ifdef NOTDEF
		DIR *d2;
#endif

		if (strncmp(den1->d_name, "D.", 2) != 0)
		    continue;
		(void)FindNode(den1->d_name, 1);
#ifdef NOTDEF
		if ((d2 = opendir(den1->d_name)) != NULL) {
		    den_t *den2;

		    while ((den2 = readdir(d2)) != NULL) {
			if ((den2->d_name[0] == 'B' && den2->d_name[1]=='.') ||
			    (strlen(den2->d_name) > 8 && den2->d_name[8]== '.')
			) {
			    char path[256];

			    sprintf(path, "%s/%s", den1->d_name, den2->d_name);
			    (void)FindNode(path, 1);
			}
		    }
		    closedir(d2);
		}
#endif
	    }
	    closedir(d1);
	}
    }

    /*
     * scan history file and update the expiration
     */

    {
	int fd = open(PatDbExpand(DHistoryPat), O_RDWR, 0644);
	int n;
	HistHead hh;
	History hist[4096];

	if (fd < 0 || read(fd, &hh, sizeof(hh)) != sizeof(hh)) {
	    fprintf(stderr, "Unable to open file %s\n", PatDbExpand(DHistoryPat));
	    exit(1);
	}
	if (hh.hmagic != HMAGIC) {
	    fprintf(stderr, "corrupted history file or old version of history file\n");
	    exit(1);
	}
	if (hh.version != HVERSION) {
	    fprintf(stderr, "wrong dhistory file version (%d), expected %d\n",
		hh.version,
		HVERSION
	    );
	    exit(1);
	}

	lseek(fd, hh.headSize + sizeof(int32) * hh.hashSize, 0);

	while ((n = read(fd, hist, sizeof(hist))) > 0) {
	    int i;
	    int changed = 0;
	    uint32 nowGmt = time(NULL) / 60;
	    int32 npos = lseek(fd, 0L, 1);
	    int32 bpos = npos - n;

	    n /= sizeof(History);

	    for (i = 0; i < n; ++i) {
		History *h = &hist[i];
		char path[256];

		/*
		 * skip if the article has already expired or if it
		 * is a new article that we may not have scanned, or
		 * if it is an expansion slot.
		 */

		if (H_EXP(h->exp) == H_EXP((unsigned short)-1))
		    continue;
		if (h->hv.h1 == 0 && h->hv.h2 == 0)
		    continue;
		if (h->gmt >= startGmt - 1 && h->gmt < nowGmt + 24 * 60)
		    continue;

		ArticleFileName(path, sizeof(path), h, -1);

		if (FindNode(path, 0) < 0) {
		    if (VerboseOpt > 1) {
			printf("Unable to find path %s, %s history record\n",
			    path,
			    (HistoryUpdateOpt ? "expiring" : "would expire")
			);
		    }
		    if (HistoryUpdateOpt) {
			h->exp = H_EXP((unsigned short)-1) | (h->exp & EXPF_FLAGS);
			lseek(
			    fd,
			    bpos + sizeof(History) * i + offsetof(History, exp),
			    0
			);
			write(fd, &h->exp, sizeof(h->exp));

			if (WriteHashesToFileOpt == 1)
			    fwrite(&h->hv, sizeof(hash_t), 1, DExpOverList);

			changed = 1;
		    }
		    ++countExp;
		}
	    }

	    if (changed)
		lseek(fd, npos, 0);
	}
	close(fd);
    }

    if (WriteHashesToFileOpt == 1)
	fclose(DExpOverList);

    return(countExp);
}

typedef struct ENode {
    struct ENode *no_Next;
    char	*no_Path;
} ENode;

#define EHSIZE	16384
#define EHMASK	(EHSIZE - 1)

ENode	*NodeAry[EHSIZE];

int
FindNode(const char *path, int createMe)
{
    int hv = 0xA4FC3244;
    int i;
    ENode **pnode;
    ENode *node;

    for (i = 0; path[i]; ++i)
	hv = (hv << 5) ^ path[i] ^ (hv >> 23);

    for (pnode = &NodeAry[(hv ^ (hv >> 16)) & EHMASK]; 
	(node = *pnode) != NULL; 
	pnode = &node->no_Next
    ) {
	if (strcmp(path, node->no_Path) == 0)
	    return(0);
    }
    if (createMe) {
	node = malloc(sizeof(ENode) + strlen(path) + 1);
	if (!node) {
	  fprintf(stderr, "unable to malloc in FindNode\n");
	  exit(1);
	}
	node->no_Next = NULL;
	node->no_Path = (char *)(node + 1);
	*pnode = node;
	strcpy(node->no_Path, path);
	return(1);
    }
    return(-1);
}

void
FreeNodes(void)
{
    int i;

    for (i = 0; i < EHSIZE; ++i) {
	ENode *node;

	while ((node = NodeAry[i]) != NULL) {
	    NodeAry[i] = node->no_Next;
	    free(node);
	}
    }
}

/*
 * calculate the free space and adjust the expiration munge
 * accordingly.
 */

int
NotEnoughFreeSpace(void)
{
    struct statfs stmp;
    int avail;
    char path[1024];

    snprintf(path, sizeof(path), "%s/.", PatExpand(SpoolHomePat));

    if (FreeSpaceTarget == 0)
	return(0);

    /*
     * This code does not significantly slow dexpire down, but it does give
     * the system sync a chance to update the bitmaps so statfs returns a
     * more accurate value.  Certain filesystems such as FreeBSD and BSDI
     * w/ softupdates are so decoupled and so fast that dexpire might remove 
     * 80% of the spool before statfs() realizes that sufficient free space 
     * remains.
     */
    if (SoftUpdates) {
	sync();
	sleep(SoftUpdates);
	sync();
	sleep(SoftUpdates);
	sync();
	sleep(SoftUpdates);
    }

#if USE_SYSV_STATFS
    if (statfs(path, &stmp, sizeof(stmp), 0) != 0) {
#else
    if (statfs(path, &stmp) != 0) {
#endif
	fprintf(stderr, "dexpire: unable to statfs %s\n", path);
	return(0);
    }
    avail = stmp.f_bavail * (stmp.f_bsize / 512) / 2;	/* available space in KB */
    fprintf(stderr, "%4.2f MB free space available\n", (double)avail / 1024.0);
    if (avail / 1024 < FreeSpaceTarget) {
	LotsOfFreeSpace = 0;
	return(1);
    }
    return(0);

#ifdef NOTDEF
	if (stmp.f_bavail < sfs.f_bavail)
	    sfs.f_bavail = stmp.f_bavail;
	if (stmp.f_ffree < sfs.f_ffree) {
	    sfs.f_ffree = stmp.f_ffree;
	    sfs.f_files = stmp.f_files;
	}
    }
#endif
}

