/*
Radix tree library for storing text strings
*/
#include <stdio.h>
#include "radix.h"
static int do_insert();
static void do_dump();
#ifdef COMPRESSOR
static int do_compress();
static int next_code = 1;
#endif
/* Returns -1 on failure, and 0 on success (as an analyser) or the code */
/* assigned (as a compressor) */
int
r_insert(root, key)
struct r_node **root;
unsigned char *key;
{
if(!key || !*key) {
return -1; /* Failure */
}
return do_insert(root, key);
}
#ifndef COMPRESSOR
void
r_dump(root)
struct r_node *root;
{
unsigned char buffer[4096];
do_dump(root, buffer, 0);
}
#endif
#ifdef COMPRESSOR
int
r_compress(root, string)
struct r_node *root;
unsigned char **string;
{
return do_compress(root, string);
}
#endif
static int
do_insert(curr, key)
struct r_node **curr;
unsigned char *key;
{
int hi, lo, mid;
if(!*key) {
return 0; /* end of string, we're done */
}
/* Find the character 'key' points at in the current node */
/* adding it in if necessary */
/* Empty nodes are always allocated with room for one letter */
if((*curr)->count == 0) {
(**curr).letter[0].c = *key;
(**curr).letter[0].child = NULL;
#ifdef COMPRESSOR
(**curr).letter[0].code = 0;
#else
(**curr).letter[0].count = 0;
#endif
mid = 0;
(*curr)->count = 1;
} else {
lo = 0;
hi = (*curr)->count - 1;
while(hi >= lo) {
mid = ((hi - lo) >> 1) + lo;
if((**curr).letter[mid].c == *key) {
break;
} else if((**curr).letter[mid].c < *key) {
hi = mid - 1;
} else {
lo = mid + 1;
}
}
if(hi < lo) { /* We failed to find it, insert */
struct r_node *tmp;
tmp = (struct r_node *) realloc(*curr,
sizeof(struct r_node) +
(sizeof(struct r_letter) * (*curr)->count));
if(!tmp) {
return -1; /* Failure */
}
*curr = tmp;
(*curr)->count += 1;
if(lo + 1 < (*curr)->count) {
bcopy(&((*curr)->letter[lo]),
&((*curr)->letter[lo+1]),
((*curr)->count - (lo + 1)) *
sizeof(struct r_letter));
}
(*curr)->letter[lo].c = *key;
(*curr)->letter[lo].child = NULL;
#ifdef COMPRESSOR
(*curr)->letter[lo].code = 0;
#else
(*curr)->letter[lo].count = 0;
#endif
mid = lo;
} else {
}
}
/* If we get here, mid is the index of the right character, so */
/* update it and recurse as needed */
#ifndef COMPRESSOR
(*curr)->letter[mid].count += 1;
#endif
if(key[1] == '\0') {
#ifdef COMPRESSOR
(*curr)->letter[mid].code = next_code;
return next_code++; /* success */
#else
return 0;
#endif
}
/* We have more string to go, so carry on */
if((*curr)->letter[mid].child == NULL) {
struct r_node *tmp;
tmp = (struct r_node *) malloc(sizeof(struct r_node));
if(!tmp) {
return -1;
}
tmp->count = 0;
(*curr)->letter[mid].child = tmp;
}
return do_insert(&((*curr)->letter[mid].child), key + 1);
}
#ifdef COMPRESSOR
static int
do_compress(curr, str)
struct r_node *curr;
unsigned char **str;
{
int hi, lo, mid, code;
if(!*str) {
return 0; /* end of string, we're done */
}
/* Find the character 'key' points at in the current node */
/* adding it in if necessary */
if(curr == NULL || curr->count == 0) {
/* Can't code the next letter, return 0 */
return 0;
}
lo = 0;
hi = curr->count - 1;
while(hi >= lo) {
mid = ((hi - lo) >> 1) + lo;
if(curr->letter[mid].c == **str) {
/* See if we can go further */
(*str)++;
code = do_compress(curr->letter[mid].child, str);
if(code == 0) {
if(curr->letter[mid].code){
return curr->letter[mid].code;
} else {
(*str)--;
return 0;
}
} else {
return code;
}
} else if(curr->letter[mid].c < **str) {
hi = mid - 1;
} else {
lo = mid + 1;
}
}
/* Failed to code this letter, return 0 */
return 0;
}
#endif
#ifndef COMPRESSOR
static void
do_dump(curr, buff, depth)
struct r_node *curr;
unsigned char *buff;
int depth;
{
int i;
if(!curr)
return;
for(i = 0; i < curr->count; i++) {
/* Emit the current string */
buff[depth] = curr->letter[i].c;
buff[depth + 1] = '\0';
#ifdef COMPRESSOR
printf("%d '%s'\n", curr->letter[i].code, buff);
#else
printf("%d '%s'\n", curr->letter[i].count, buff);
#endif
/* Recurse to handle all extensions of this string too */
do_dump(curr->letter[i].child, buff, depth + 1);
}
}
#endif