/*
* Radix tree library for storing text strings
*/
#include <stdio.h>
#include "radix.h"
static int do_insert();
static void do_dump();
#ifdef COMPRESSOR
static int do_compress();
static int next_code = 1;
#endif
/*
* Returns -1 on failure, and 0 on success (as an analyser) or the code
*/
/*
* assigned (as a compressor)
*/
int r_insert(root, key)
struct r_node **root;
unsigned char *key;
{
if (!key || !*key) {
return -1; /*
* Failure
*/
}
return do_insert(root, key);
}
#ifndef COMPRESSOR
void r_dump(root)
struct r_node *root;
{
unsigned char buffer[4096];
do_dump(root, buffer, 0);
}
#endif
#ifdef COMPRESSOR
int r_compress(root, string)
struct r_node *root;
unsigned char **string;
{
return do_compress(root, string);
}
#endif
static int do_insert(curr, key)
struct r_node **curr;
unsigned char *key;
{
int hi, lo, mid;
if (!*key) {
return 0; /*
* end of string, we're done
*/
}
/*
* Find the character 'key' points at in the current node
*/
/*
* adding it in if necessary
*/
/*
* Empty nodes are always allocated with room for one letter
*/
if ((*curr)->count == 0) {
(**curr).letter[0].c = *key;
(**curr).letter[0].child = NULL;
#ifdef COMPRESSOR
(**curr).letter[0].code = 0;
#else
(**curr).letter[0].count = 0;
#endif
mid = 0;
(*curr)->count = 1;
} else {
lo = 0;
hi = (*curr)->count - 1;
while (hi >= lo) {
mid = ((hi - lo) >> 1) + lo;
if ((**curr).letter[mid].c == *key) {
break;
} else if ((**curr).letter[mid].c < *key) {
hi = mid - 1;
} else {
lo = mid + 1;
}
}
if (hi < lo) { /*
* We failed to find it, insert
*/
struct r_node *tmp;
tmp = (struct r_node *)realloc(*curr,
sizeof(struct r_node) +
(sizeof(struct r_letter) * (*curr)->count));
if (!tmp) {
return -1; /*
* Failure
*/
}
*curr = tmp;
(*curr)->count += 1;
if (lo + 1 < (*curr)->count) {
bcopy(&((*curr)->letter[lo]),
&((*curr)->letter[lo + 1]),
((*curr)->count - (lo + 1)) *
sizeof(struct r_letter));
}
(*curr)->letter[lo].c = *key;
(*curr)->letter[lo].child = NULL;
#ifdef COMPRESSOR
(*curr)->letter[lo].code = 0;
#else
(*curr)->letter[lo].count = 0;
#endif
mid = lo;
} else {
}
}
/*
* If we get here, mid is the index of the right character, so
*/
/*
* update it and recurse as needed
*/
#ifndef COMPRESSOR
(*curr)->letter[mid].count += 1;
#endif
if (key[1] == '\0') {
#ifdef COMPRESSOR
(*curr)->letter[mid].code = next_code;
return next_code++; /*
* success
*/
#else
return 0;
#endif
}
/*
* We have more string to go, so carry on
*/
if ((*curr)->letter[mid].child == NULL) {
struct r_node *tmp;
tmp = (struct r_node *)malloc(sizeof(struct r_node));
if (!tmp) {
return -1;
}
tmp->count = 0;
(*curr)->letter[mid].child = tmp;
}
return do_insert(&((*curr)->letter[mid].child), key + 1);
}
#ifdef COMPRESSOR
static int do_compress(curr, str)
struct r_node *curr;
unsigned char **str;
{
int hi, lo, mid, code;
if (!*str) {
return 0; /*
* end of string, we're done
*/
}
/*
* Find the character 'key' points at in the current node
*/
/*
* adding it in if necessary
*/
if (curr == NULL || curr->count == 0) {
/*
* Can't code the next letter, return 0
*/
return 0;
}
lo = 0;
hi = curr->count - 1;
while (hi >= lo) {
mid = ((hi - lo) >> 1) + lo;
if (curr->letter[mid].c == **str) {
/*
* See if we can go further
*/
(*str)++;
code = do_compress(curr->letter[mid].child, str);
if (code == 0) {
if (curr->letter[mid].code) {
return curr->letter[mid].code;
} else {
(*str)--;
return 0;
}
} else {
return code;
}
} else if (curr->letter[mid].c < **str) {
hi = mid - 1;
} else {
lo = mid + 1;
}
}
/*
* Failed to code this letter, return 0
*/
return 0;
}
#endif
#ifndef COMPRESSOR
static void do_dump(curr, buff, depth)
struct r_node *curr;
unsigned char *buff;
int depth;
{
int i;
if (!curr)
return;
for (i = 0; i < curr->count; i++) {
/*
* Emit the current string
*/
buff[depth] = curr->letter[i].c;
buff[depth + 1] = '\0';
#ifdef COMPRESSOR
printf("%d '%s'\n", curr->letter[i].code, buff);
#else
printf("%d '%s'\n", curr->letter[i].count, buff);
#endif
/*
* Recurse to handle all extensions of this string too
*/
do_dump(curr->letter[i].child, buff, depth + 1);
}
}
#endif