/* Radix tree library for storing text strings */ #include <stdio.h> #include "radix.h" static int do_insert(); static void do_dump(); #ifdef COMPRESSOR static int do_compress(); static int next_code = 1; #endif /* Returns -1 on failure, and 0 on success (as an analyser) or the code */ /* assigned (as a compressor) */ int r_insert(root, key) struct r_node **root; unsigned char *key; { if(!key || !*key) { return -1; /* Failure */ } return do_insert(root, key); } #ifndef COMPRESSOR void r_dump(root) struct r_node *root; { unsigned char buffer[4096]; do_dump(root, buffer, 0); } #endif #ifdef COMPRESSOR int r_compress(root, string) struct r_node *root; unsigned char **string; { return do_compress(root, string); } #endif static int do_insert(curr, key) struct r_node **curr; unsigned char *key; { int hi, lo, mid; if(!*key) { return 0; /* end of string, we're done */ } /* Find the character 'key' points at in the current node */ /* adding it in if necessary */ /* Empty nodes are always allocated with room for one letter */ if((*curr)->count == 0) { (**curr).letter[0].c = *key; (**curr).letter[0].child = NULL; #ifdef COMPRESSOR (**curr).letter[0].code = 0; #else (**curr).letter[0].count = 0; #endif mid = 0; (*curr)->count = 1; } else { lo = 0; hi = (*curr)->count - 1; while(hi >= lo) { mid = ((hi - lo) >> 1) + lo; if((**curr).letter[mid].c == *key) { break; } else if((**curr).letter[mid].c < *key) { hi = mid - 1; } else { lo = mid + 1; } } if(hi < lo) { /* We failed to find it, insert */ struct r_node *tmp; tmp = (struct r_node *) realloc(*curr, sizeof(struct r_node) + (sizeof(struct r_letter) * (*curr)->count)); if(!tmp) { return -1; /* Failure */ } *curr = tmp; (*curr)->count += 1; if(lo + 1 < (*curr)->count) { bcopy(&((*curr)->letter[lo]), &((*curr)->letter[lo+1]), ((*curr)->count - (lo + 1)) * sizeof(struct r_letter)); } (*curr)->letter[lo].c = *key; (*curr)->letter[lo].child = NULL; #ifdef COMPRESSOR (*curr)->letter[lo].code = 0; #else (*curr)->letter[lo].count = 0; #endif mid = lo; } else { } } /* If we get here, mid is the index of the right character, so */ /* update it and recurse as needed */ #ifndef COMPRESSOR (*curr)->letter[mid].count += 1; #endif if(key[1] == '\0') { #ifdef COMPRESSOR (*curr)->letter[mid].code = next_code; return next_code++; /* success */ #else return 0; #endif } /* We have more string to go, so carry on */ if((*curr)->letter[mid].child == NULL) { struct r_node *tmp; tmp = (struct r_node *) malloc(sizeof(struct r_node)); if(!tmp) { return -1; } tmp->count = 0; (*curr)->letter[mid].child = tmp; } return do_insert(&((*curr)->letter[mid].child), key + 1); } #ifdef COMPRESSOR static int do_compress(curr, str) struct r_node *curr; unsigned char **str; { int hi, lo, mid, code; if(!*str) { return 0; /* end of string, we're done */ } /* Find the character 'key' points at in the current node */ /* adding it in if necessary */ if(curr == NULL || curr->count == 0) { /* Can't code the next letter, return 0 */ return 0; } lo = 0; hi = curr->count - 1; while(hi >= lo) { mid = ((hi - lo) >> 1) + lo; if(curr->letter[mid].c == **str) { /* See if we can go further */ (*str)++; code = do_compress(curr->letter[mid].child, str); if(code == 0) { if(curr->letter[mid].code){ return curr->letter[mid].code; } else { (*str)--; return 0; } } else { return code; } } else if(curr->letter[mid].c < **str) { hi = mid - 1; } else { lo = mid + 1; } } /* Failed to code this letter, return 0 */ return 0; } #endif #ifndef COMPRESSOR static void do_dump(curr, buff, depth) struct r_node *curr; unsigned char *buff; int depth; { int i; if(!curr) return; for(i = 0; i < curr->count; i++) { /* Emit the current string */ buff[depth] = curr->letter[i].c; buff[depth + 1] = '\0'; #ifdef COMPRESSOR printf("%d '%s'\n", curr->letter[i].code, buff); #else printf("%d '%s'\n", curr->letter[i].count, buff); #endif /* Recurse to handle all extensions of this string too */ do_dump(curr->letter[i].child, buff, depth + 1); } } #endif