#include #include #include #include /************************************************************************ htstat.c Summarizes statistics of an HTTP access log file htstat file -u[n] -f[n,l] -d[n,l] -t where: file is the input file (if none specified, uses STDIN) -u gives a report on access by user -f gives a report on access by filename -d gives a report on access by domain -t gives a report by date (t stands for "time") n is the "minimum # of hits to report" (default 1) l is the "number of levels to store" (default 1) Example - given a user name of: nessus.vip.best.com storing the domain to level 1 gives "com"; this is the default storing the domain to level 2 gives "best.com" storing the domain to level 3 gives "vip.best.com" given a file name of: http://www.best.com/~nessus/jde.gif level 0 gives the entire file name (inefficient!) level 1 gives the filename "jde.gif" (the default) level 2 gives the filename "~nessus/jde.gif" (c) 1995 J. David Eisenberg Note: although this program is copyright, you may feel free to distribute/modify the code as long as you keep the original credit in the program. Please e-mail any improvements to nessus@best.com Thanks! *************************************************************************/ #define TRUE 1 #define FALSE 0 #define true 1 #define false 0 #ifndef max #define max(a,b) ((a) > (b) ? (a) : (b)) #endif typedef short BOOLEAN; typedef struct tagAccess ACCESS; struct tagAccess { char *name; ACCESS *lTree, *rTree; long nAccesses; }; /* domain names consisting of only digits are considered to be IP numbers */ char ANON_IP[] = "(IP number only)"; FILE *inFile = NULL; ACCESS *userHead = NULL; ACCESS *fileHead = NULL; ACCESS *domainHead = NULL; ACCESS *dateHead = NULL; long gCutoff; /* to avoid passing parameter to recursive routine */ long userCutoff; /* minimum # of hits to report in user listing */ long fileCutoff; /* minimum # of hits to report in file listing */ long domainCutoff; /* minimum # of hits to report in domain listing */ int gFieldLen; /* to avoid passing this to recursive print proc */ int maxUserLen = 0; /* maximum length of user name */ int maxFileLen = 0; /* maximum length of file name */ int maxDomainLen = 0; /* maximum length of domain name */ int maxDateLen = 0; /* maximum length of time/date field */ long totBytes; BOOLEAN listUsers = FALSE; BOOLEAN listFiles = FALSE; BOOLEAN listDomains = FALSE; BOOLEAN listDates = FALSE; BOOLEAN noOptions = TRUE; long fileLevel = 1; long domainLevel = 1; char *listName[] = { "user", "users", "domain", "domains", "file", "files", "date", "dates" }; BOOLEAN dbug = FALSE; /****************************************************************** newNode purpose: allocate a new ACCESS structure. input: name - the entry into the structure's name field returns: node created NULL if out of memory *******************************************************************/ ACCESS *newNode(char *name) { ACCESS *temp; temp = calloc(1, sizeof(ACCESS)); if (!temp) { fprintf(stderr,"Out of memory.\n"); return NULL; } temp->name = calloc(strlen(name)+1, sizeof(char)); if (!temp->name) { free(temp); fprintf(stderr, "Out of memory allocating name.\n"); return NULL; } strcpy(temp->name, name); temp->lTree = temp->rTree = NULL; temp->nAccesses = 0; return temp; } /****************************************************************** search purpose: searches for the given name in a binary tree. If not found, allocates a node and enters the name at the proper location. input: name - the item to search for head - the root of the tree to search through returns: node where item was found NULL if out of memory *******************************************************************/ ACCESS *search(char *name, ACCESS *head) { int cmp; while (head != NULL) { cmp = strcmp(name, head->name); if (cmp == 0) break; if (cmp < 0) { if (!head->lTree) { head->lTree = newNode(name); if (!head->lTree) return NULL; } head = head->lTree; } else if (cmp > 0) { if (!head->rTree) { head->rTree = newNode(name); if (!head->rTree) return NULL; } head = head->rTree; } } return head; } /****************************************************************** printAlphaTree purpose: prints a binary tree in alphabetical order. input: head - the root of the tree to print returns: number of items printed *******************************************************************/ long printAlphaTree(ACCESS *head) { int i; long n = 0; if (head) { n += printAlphaTree(head->lTree); if (head->nAccesses >= gCutoff) { i = gFieldLen-strlen(head->name)+1; printf("%s %*c %ld\n", head->name, i, ' ', head->nAccesses); n++; } n += printAlphaTree(head->rTree); } return n; } /****************************************************************** countTree purpose: calculates number of items in a tree. input: head - the root of the tree to print returns: the number of items in the tree *******************************************************************/ size_t countTree(ACCESS *head) { size_t n = 0; if (head) { n += countTree(head->lTree); n++; n += countTree(head->rTree); } return n; } /****************************************************************** treeToArray purpose: take pointers from binary tree and put them into a sortable array. input: head - the root of the tree to print array - the linear array destination n - starting point in the array returns: ending position in array *******************************************************************/ size_t treeToArray(ACCESS *head, ACCESS **array, size_t n) { if (head) { n = treeToArray(head->lTree, array, n); array[n++] = head; n = treeToArray(head->rTree, array, n); } return n; } /****************************************************************** accessSort purpose: sort an array of ACCESS * into descending order by number of accesses. In case of a tie, sort alpha ascending. Used in call to qsort. input: a - pointer to an ACCESS * b - pointer to an ACCESS * returns: -1 for a with more accesses than b 0 for a with same # of accesses as b 1 for a with fewer access than b *******************************************************************/ int accessSort( const void *a, const void *b) { if ((*((ACCESS **) a))->nAccesses < (*((ACCESS **) b))->nAccesses) return 1; if ((*((ACCESS **) a))->nAccesses > (*((ACCESS **) b))->nAccesses) return -1; return( strcmp((*((ACCESS **) a))->name, (*((ACCESS **) b))->name) ); } /****************************************************************** accessPrint purpose: sorts a set of ACCESS records by number of accesses and prints the results input: head - the root of the tree to print returns: number of lines printed, or -1 if not enough memory was available for sort *******************************************************************/ long accessPrint(ACCESS *head) { ACCESS **accessArray; size_t i, nItems, nPrint; int nBlanks; if (head) { nPrint = 0; nItems = countTree(head); accessArray = (ACCESS **) malloc(nItems * sizeof(ACCESS *)); if (!accessArray) { fprintf(stderr, "Unable to allocate array for sort.\n"); return -1; } treeToArray(head, accessArray, 0); qsort((void *)accessArray, nItems, sizeof(ACCESS *), accessSort); for (i=0; inAccesses < gCutoff) break; nBlanks = gFieldLen-strlen(accessArray[i]->name)+1; printf("%s %*c %ld\n", accessArray[i]->name, nBlanks, ' ', accessArray[i]->nAccesses); nPrint++; } free(accessArray); return nPrint; } else return -1; } /****************************************************************** deAllocate purpose: releases memory allocated to a tree input: head - the root of the tree to deallocate *******************************************************************/ void deAllocate(ACCESS *head) { if (head) { deAllocate(head->lTree); deAllocate(head->rTree); free(head->name); free(head); } } /****************************************************************** setValue purpose: set an integer value from the argument list, and increment pointer to argument string. input: p - current pointer to argument thingToSet - the value to set returns: pointer to location past the integer *******************************************************************/ char *setValue(char *p, long *thingToSet) { int i; i = strspn(p, "012345789"); if (i) *thingToSet = atol(p); return p + i; } /****************************************************************** analyzeOption purpose: analyze a single "-" option in the command line input: the argument, extracted from the argv array NOTE: sets global variables for hit-limit cutoffs and levels-to-display. If any valid option is used, then "noOptions" is set FALSE, so we will not get any "user list" output if we want only "file lists". *******************************************************************/ void analyzeOption(char *arg) { char *p; p = arg + 1; /* skip the leading hyphen */ while (*p) { if (*p == 'u') /* can be followed by limit */ { noOptions = FALSE; listUsers = TRUE; p++; p = setValue(p, &userCutoff); } if (*p == 'f') /* can be followed by limit and level */ { noOptions = FALSE; listFiles = TRUE; p++; p = setValue(p, &fileCutoff); if (*p == ',') { p++; p = setValue(p, &fileLevel); } } else if (*p == 'd') /* can be followed by limit and # of levels */ { noOptions = FALSE; listDomains = TRUE; p++; p = setValue(p, &domainCutoff); if (*p == ',') { p++; p = setValue(p, &domainLevel); } } else if (*p == 't') { listDates = TRUE; p++; } } } /****************************************************************** analyzeArguments purpose: check validity of argument list. input: argc - number of arguments on command line argv - array of arguments returns: TRUE if argument list seems all right FALSE if more than one argument begins with a non-dash or does not name a valid file *******************************************************************/ BOOLEAN analyzeArguments(int argc, char *argv[]) { int i; for (i=1; i pUser) { if (*pDomain == '.') { i++; if (i == domainLevel) { pDomain++; break; } } pDomain--; } /* back off and find the tail end of the file name to desired number of levels */ p = pFile + strlen(pFile) - 1; i = 0; while (p > pFile) { if (*p == '/') { i++; if (i == fileLevel) { p++; break; } } p--; } pFile = p; /* construct the binary trees */ if (*pUser) { if (!userHead) theUser = userHead = newNode(pUser); else theUser = search(pUser, userHead); if (!theUser) break; theUser->nAccesses++; maxUserLen = max(strlen(pUser), maxUserLen); } if (*pDomain) { if (strspn(pDomain, "0123456789.") == strlen(pDomain)) pDomain = ANON_IP; if (!domainHead) theDomain = domainHead = newNode(pDomain); else theDomain = search(pDomain, domainHead); if (!theDomain) break; theDomain->nAccesses++; maxDomainLen = max(strlen(pDomain), maxDomainLen); } if (*pFile) { if (!fileHead) theFile = fileHead = newNode(pFile); else theFile = search(pFile, fileHead); if (!theFile) break; theFile->nAccesses++; maxFileLen = max(strlen(pFile), maxFileLen); } if (*pDate) { if (!dateHead) theDate = dateHead = newNode(pDate); else theDate = search(pDate, dateHead); if (!theDate) break; theDate->nAccesses++; maxDateLen = max(strlen(pDate), maxDateLen); } } while (TRUE); /* if we had a memory error, read through the entire file */ if (totLines != nLines) { totLines = nLines; while (p = fgets(oneLine, 511, inFile)) totLines++; } fclose(inFile); printf("Lines processed: %ld of %ld\n", nLines, totLines); printf("\n"); if (listUsers) { printf("Access by User:\n"); gFieldLen = maxUserLen; gCutoff = userCutoff; nPrint = accessPrint(userHead); if (nPrint < 0) nPrint = printAlphaTree(userHead); printFooter(nPrint, 0); printf("\n"); } printf("Total bytes: %ld\n\n", totBytes); if (noOptions) { printf("Total number of users: %u\n", countTree(userHead)); } deAllocate(userHead); if (listDomains) { printf("\nAccess by Domain:\n"); gFieldLen = maxDomainLen; gCutoff = domainCutoff; if ((nPrint = accessPrint(domainHead)) < 0) nPrint = printAlphaTree(domainHead); printFooter(nPrint, 1); printf("\n"); } if (noOptions) printf("Total number of domains: %u\n", countTree(domainHead)); deAllocate(domainHead); if (listFiles) { printf("\nAccess by File:\n"); gFieldLen = maxFileLen; gCutoff = fileCutoff; if ((nPrint = accessPrint(fileHead)) < 0) nPrint = printAlphaTree(fileHead); printFooter(nPrint, 2); printf("\n"); } if (noOptions) printf("Total number of files: %u\n", countTree(fileHead)); deAllocate(fileHead); if (listDates) { printf("\nAccess by Date:\n"); gFieldLen = maxDateLen; gCutoff = 1; if ((nPrint = accessPrint(dateHead)) < 0) nPrint = printAlphaTree(dateHead); printFooter(nPrint, 3); } if (noOptions) printf("Total number of dates: %u\n", countTree(dateHead)); deAllocate(dateHead); }