/*
* Analyse a MUSH database, we chew it in on stdin, extract
* attributes, and stuff them into a radix tree. We accept a single
* parameter telling us what the maximum length word to store is, and
* then we dump the whole tree out. This gives a complete list of all
* substrings of length N or less found in a MUSH database, complete
* with counts, for post-processing.
*
*/
#include <stdio.h>
#include "radix.h"
/*
* forward
*/
void copyattr();
char attr[4002];
main(ac, av)
int ac;
char *av[];
{
struct r_node *root;
int ch, i, len, sslen, max_len;
char substr[128];
int objcnt = 0;
if (ac != 2) {
fprintf(stderr, "usage: %s <max string length>\n", av[0]);
exit(1);
}
max_len = atoi(av[1]);
if (max_len <= 0) {
fprintf(stderr, "usage: %s <positive max string length>\n",
av[0]);
exit(1);
}
if (max_len >= 128) {
fprintf(stderr, "A max string length os over 128 is insane.\n");
exit(1);
}
switch (max_len) {
case 1:
case 2:
case 3:
case 4:
case 5:
break;
case 6:
case 7:
case 8:
fprintf(stderr,
"counting all strings up to %s long is going to be expensive\n",
av[1]);
break;
default:
fprintf(stderr,
"All strings of length %s? I hope you have some real iron\n",
av[1]);
break;
}
root = (struct r_node *)malloc(sizeof(struct r_node));
root->count = 0;
while ((ch = getchar()) != EOF) {
switch (ch) {
case '>':
eatline();
copyattr(attr);
len = strlen(attr);
for (i = 0; i < len; i++) {
sslen = ((len - i) < max_len ?
(len - i) : max_len);
bcopy(attr + i, substr, sslen);
substr[sslen] = '\0';
r_insert(&root, substr);
}
break;
case '!':
objcnt++;
if ((objcnt & 0x1ff) == 0)
fprintf(stderr, ".");
fflush(stderr);
default:
eatline();
}
}
r_dump(root);
}
/*
* Consume a line up to and including the newline
*/
eatline()
{
int ch;
while ((ch = getchar()) != '\n') {
if (ch == EOF)
break;
}
}
/*
* Read in a string on stdin and stuff it into the passed down array, observing
* * the strange rules for attribute escaping.
*/
void copyattr(buff)
char *buff;
{
char last;
int i;
char ch = '\0'; /*
* anything other than a \r
*/
i = 0;
do {
last = ch;
ch = getchar();
buff[i++] = ch;
/*
* Internal newlines are escaped as \r\n
*/
if (ch == '\n' && last == '\r') {
buff[i - 2] = ' ';
i--;
}
} while ((ch != '\n' || last == '\r') && ch != EOF && i < 4000);
buff[i - 1] = '\0'; /*
* Whack trailing newline
*/
}