/*
* Compression library. Uses a radix tree to compress substrings
* down to 12 bit codes.
*/
#include <stdio.h>
#include "radix.h"
#include "compresstab.h"
static struct r_node *root;
static unsigned char *decompresstab[4096];
static unsigned char singletons[260];
unsigned int strings_compressed = 0;
unsigned int strings_decompressed = 0;
unsigned int chars_in = 0;
unsigned int symbols_out = 0;
void init_string_compress()
{
int code, i;
unsigned char *p;
root = (struct r_node *)malloc(sizeof(struct r_node));
root->count = 0;
/*
* Get all the printables
*/
p = singletons;
*p = '\0';
decompresstab[0] = p;
p++;
for (i = 1; i < 128; i++) {
p[0] = (unsigned char)i;
p[1] = '\0';
code = r_insert(&root, p);
if (code <= 0 || code > 4095) {
printf("Invalid code 1\n");
abort();
}
decompresstab[code] = p;
p += 2;
}
/*
* Now insert all the strings in our compression table
*/
for (i = 0; cmptab[i] != NULL; i++) {
code = r_insert(&root, (unsigned char *)(cmptab[i]));
if (code <= 0 || code > 4095) {
printf("Invalid code 2\n");
abort();
}
p = (unsigned char *)(cmptab[i]);
decompresstab[code] = p;
}
#ifdef DEBUG
printf("Done inserting.\n");
#endif
}
/*
* Compress the input string to the output array as 12 bit codes.
* Return the number of bytes to worry about in the output.
*/
int string_compress(src, dst)
unsigned char *src;
unsigned char *dst;
{
int code;
int bitnum = 0;
int bytenum;
#ifdef DEBUG
int i;
#endif
strings_compressed++;
chars_in += strlen(src) + 1;
do {
code = r_compress(root, &src);
#ifdef DEBUG
printf("emit code %x\n", code);
#endif
bytenum = (bitnum >> 3);
if (bitnum & 7) {
dst[bytenum] = dst[bytenum] | ((code >> 8) & 0x0f);
dst[bytenum + 1] = (code & 0xff);
} else {
dst[bytenum] = (code >> 4) & 0xff;
dst[bytenum + 1] = ((code << 4) & 0xf0);
}
bitnum += 12;
symbols_out++;
} while (code != 0);
#ifdef DEBUG
bytenum = ((bitnum & 7) ? (bitnum >> 3) + 1 : (bitnum >> 3));
for (i = 0; i < bytenum; i++) {
printf("%x ", dst[i]);
}
printf("\n");
#endif
return ((bitnum & 7) ? (bitnum >> 3) + 1 : (bitnum >> 3));
}
/*
* Decompress an array of 12 bit codes as produced by string_compress().
* Return the number of bytes in the string, including zero terminator.
*/
int string_decompress(src, dst)
unsigned char *src;
unsigned char *dst;
{
unsigned int code, bitnum, bytenum;
register unsigned char *p, tmp;
int count = 0;
strings_decompressed++;
bitnum = 0;
while (1) {
bytenum = bitnum >> 3;
if (bitnum & 7) {
code = (tmp & 0x0f) << 8;
code |= src[bytenum + 1];
} else {
code = src[bytenum] << 4;
tmp = src[bytenum + 1];
code |= tmp >> 4;
}
#ifdef DEBUG
printf("Extract code %x = '%s'\n",
code, decompresstab[code]);
#endif
if (code == 0)
break;
p = decompresstab[code];
while (*p) {
count++;
*dst++ = *p++;
}
bitnum += 12;
}
*dst = '\0';
return count + 1; /*
* Include zero terminator
*/
}