/*
Analyse a MUSH database, we chew it in on stdin, extract
attributes, and stuff them into a radix tree. We accept a single
parameter telling us what the maximum length word to store is, and
then we dump the whole tree out. This gives a complete list of all
substrings of length N or less found in a MUSH database, complete
with counts, for post-processing.
*/
#include <stdio.h>
#include "radix.h"
/* forward */
void copyattr();
char attr[4002];
main(ac, av)
int ac;
char *av[];
{
struct r_node *root;
int ch, i, len, sslen, max_len;
char substr[128];
int objcnt = 0;
if(ac != 2) {
fprintf(stderr, "usage: %s <max string length>\n", av[0]);
exit(1);
}
max_len = atoi(av[1]);
if(max_len <= 0) {
fprintf(stderr, "usage: %s <positive max string length>\n",
av[0]);
exit(1);
}
if(max_len >= 128) {
fprintf(stderr, "A max string length os over 128 is insane.\n");
exit(1);
}
switch(max_len) {
case 1:
case 2:
case 3:
case 4:
case 5:
break;
case 6:
case 7:
case 8:
fprintf(stderr,
"counting all strings up to %s long is going to be expensive\n",
av[1]);
break;
default:
fprintf(stderr,
"All strings of length %s? I hope you have some real iron\n",
av[1]);
break;
}
root = (struct r_node *)malloc(sizeof(struct r_node));
root->count = 0;
while((ch = getchar()) != EOF) {
switch(ch) {
case '>':
eatline();
copyattr(attr);
len = strlen(attr);
for(i = 0; i < len; i++) {
sslen = ((len - i) < max_len ?
(len - i) : max_len);
bcopy(attr + i, substr, sslen);
substr[sslen] = '\0';
r_insert(&root, substr);
}
break;
case'!':
objcnt++;
if((objcnt & 0x1ff) == 0)
fprintf(stderr,".");fflush(stderr);
default:
eatline();
}
}
r_dump(root);
}
/* Consume a line up to and including the newline */
eatline()
{
int ch;
while((ch = getchar()) != '\n') {
if(ch == EOF)
break;
}
}
/* Read in a string on stdin and stuff it into the passed down array, observing
the strange rules for attribute escaping.
*/
void
copyattr(buff)
char *buff;
{
char last;
int i;
char ch = '\0'; /* anything other than a \r */
i = 0;
do {
last = ch;
ch = getchar();
buff[i++] = ch;
/* Internal newlines are escaped as \r\n */
if(ch == '\n' && last == '\r') {
buff[i-2] = ' ';
i--;
}
} while((ch != '\n' || last == '\r') && ch != EOF && i < 4000);
buff[i-1] = '\0'; /* Whack trailing newline */
}