#include <stdio.h>
#define STRALLOC
#include "stralloc.h"
#ifndef DEBUG
#define BUG_FREE
#endif
/*
* stralloc.c - string management.
*
* All strings are stored in an extensible hash table, with reference counts.
* free_string decreases the reference count; if it gets to zero, the string
* will be deallocated. add_string increases the ref count if it finds a
* matching string, or allocates it if it cant. There is no way to allocate
* a string of a particular size to fill later (hash wont work!), so you'll
* have to copy things from a static (or malloced and later freed) buffer -
* that is, if you want to avoid space leaks...
*
* Current overhead:
* 8 bytes per string (next pointer, and 2 shorts for length and refs)
* Strings are nearly all fairly short, so this is a significant overhead-
* there is also the 4 byte malloc overhead and the fact that malloc
* generally allocates blocks which are a power of 2 (should write my
* own best-fit malloc specialised to strings); then again, GNU malloc
* is bug free...
*/
/*
* there is also generic hash table management code, but strings can be shared
* (that was the point of this code), will be unique in the table,
* require a reference count, and are malloced, copied and freed at
* will by the string manager. Besides, I wrote this code first :-).
* Look at htable.c for the other code. It uses the Hash() function
* defined here, and requires hashed objects to have a pointer to the
* next element in the chain (which you specify when you call the functions).
*/
#define WORD_ALIGN_BIT 0x3 /* these are 0 for aligned ptrs */
static int num_distinct_strings = 0;
int bytes_distinct_strings = 0;
int stralloc_allocd_strings = 0;
int stralloc_allocd_bytes = 0;
static int search_len = 0;
static int num_str_searches = 0;
/*
* strings are stored:
* (char * next) (short numrefs) string
* ^
* pointer points here
*/
#define NEXT(p) SHSTR_NEXT(p)
#define REFS(p) SHSTR_REFS(p)
/*
* hash table - list of pointers to heads of string chains.
* Each string in chain has a pointer to the next string and a
* reference count (char *, int) stored just before the start of the string.
* HTABLE_SIZE is in config.h, and should be a prime, probably between
* 1000 and 5000.
*/
static char ** base_table = 0;
char *out_of_memory_string;
extern struct ident *ident_table[ITABLE_SIZE];
void init_shared_strings()
{
int x;
base_table = (char **) xalloc(sizeof(char *) * HTABLE_SIZE);
if (!base_table)
fatal("Out of memory\n");
for (x=0; x<HTABLE_SIZE; x++)
base_table[x] = 0;
out_of_memory_string = make_shared_string(
"This string is used as a substitute if allocating another one failed."
);
}
void clear_shared_string_refs()
{
int x;
char *p;
for (x=0; x<HTABLE_SIZE; x++)
for (p = base_table[x]; p; p = NEXT(p) )
REFS(p) = 0;
}
#ifdef MALLOC_smalloc
void note_shared_string_table_ref() {
note_malloced_block_ref((char *)base_table);
count_ref_from_string(out_of_memory_string);
}
void walk_shared_strings(func)
void (*func) PROT((char *, char *));
{
int x;
char *p, *n;
for (x=0; x<HTABLE_SIZE; x++)
for (n = base_table[x]; p = n; ) {
n = NEXT(p); /* p may be freed by (*func)() . */
(*func)(p-sizeof(short)-sizeof(char *), p);
}
}
#endif /* MALLOC_smalloc */
static int overhead_bytes() {
return (sizeof(char *) * HTABLE_SIZE) +
num_distinct_strings * (sizeof(char *) + sizeof(short));
}
/*
* generic hash function. This is probably overkill; I haven't checked the
* stats for different prime numbers, etc.
*/
/* some compilers can't grasp #if BITNUM(HTABLE_SIZE) == 1 */
/* some compilers can't even grasp #if BITNUM_IS_1(HTABLE_SIZE) *sigh* */
#if !( (HTABLE_SIZE) & (HTABLE_SIZE)-1 )
#define StrHash(s) (whashstr((s), 20) & ((HTABLE_SIZE)-1))
#else
#define StrHash(s) (hashstr((s), 20, HTABLE_SIZE))
#endif
#if 0
static int INLINE StrHash(s)
char * s;
{
if (!base_table)
init_strings();
return hashstr(s, 20, HTABLE_SIZE);
}
#endif
/*
* Looks for a string in the table. If it finds it, returns a pointer to
* the start of the string part, and moves the entry for the string to
* the head of the pointer chain. One thing (blech!) - puts the previous
* pointer on the hash chain into fs_prev.
*/
static int hash_index; /* to be used by alloc_new_string
without further notice */
/* is also used opaque inside free_string */
char * findstring(s)
char * s;
{
char * curr, *prev;
int h = StrHash(s);
hash_index = h;
curr = base_table[h];
prev = 0;
num_str_searches++;
while (curr) {
search_len++;
if (*curr == *s && !strcmp(curr, s)) { /* found it */
if (prev) { /* not at head of list */
NEXT(prev) = NEXT(curr);
NEXT(curr) = base_table[h];
base_table[h] = curr;
}
return(curr); /* pointer to string */
}
prev = curr;
curr = NEXT(curr);
}
return(0); /* not found */
}
/*
* Make a space for a string. This is rather nasty, as we want to use
* alloc/free, but malloc overhead is generally severe. Later, we
* will have to compact strings...
*/
static INLINE char * alloc_new_string(string)
char * string;
{
mp_int length;
char *s;
int h;
length = strlen(string);
s = xalloc(1 + length + sizeof(char *) + sizeof(short));
if (!s)
return s;
h = hash_index;
s += sizeof(char *) + sizeof(short);
strcpy(s, string);
#if 0
REFS(s) = 0;
#endif
NEXT(s) = base_table[h];
base_table[h] = s;
num_distinct_strings++;
bytes_distinct_strings +=
#ifdef MALLOC_smalloc
SMALLOC_OVERHEAD * sizeof(char *) +
#else
sizeof(char *) +
#endif
(sizeof(char *) + sizeof(short) +
length + 1 + (sizeof(char *)-1)) & ~(sizeof(char *)-1);
REFS(s) = 1;
stralloc_allocd_strings++;
stralloc_allocd_bytes += shstr_malloced_size(s);
return(s);
}
char * make_shared_string(str)
char * str;
{
char * s;
s = findstring(str);
if (!s) {
return alloc_new_string(str);
} else {
if (REFS(s))
REFS(s)++;
}
stralloc_allocd_strings++;
stralloc_allocd_bytes += shstr_malloced_size(s);
return(s);
}
INLINE
void decrement_string_ref(str)
char *str;
{
stralloc_allocd_strings--;
stralloc_allocd_bytes -= shstr_malloced_size(str);
if (REFS(str))
REFS(str)--;
}
/*
* free_string - reduce the ref count on a string. Various sanity
* checks applied, the best of them being that a add_string allocated
* string will point to a word boundary + sizeof(int)+sizeof(short),
* since malloc always allocates on a word boundary.
* On systems where a short is 1/2 a word, this gives an easy check
* to see whather we did in fact allocate it.
*
* Don't worry about the overhead for all those checks!
*/
/*
* function called on free_string detected errors; things return checked(s).
*/
#ifndef BUG_FREE
static void checked(s, str) char * s, *str;
{
fprintf(stderr, "%s (\"%s\")\n", s, str);
fatal(s); /* brutal - debugging */
}
#endif
void free_string(str)
char * str;
{
#ifndef BUG_FREE
extern int d_flag;
#endif
char * s;
stralloc_allocd_strings--;
stralloc_allocd_bytes -= shstr_malloced_size(str);
#ifndef BUG_FREE
#ifdef dcheck /* GNU malloc range check flag */
{ int align;
align = (((int)str) - sizeof(int) - sizeof(short)) & WORD_B_MASK;
if (align)
checked("Free string: improperly aligned string!", str);
}
#endif /* dcheck */
#endif
#ifndef BUG_FREE
s = findstring(str); /* moves it to head of table if found */
if (!s) {
checked("Free string: not found in string table!", str);
return;
}
if (s != str) {
checked("Free string: string didnt hash to the same spot!", str);
return;
}
if (REFS(s) <= 0 && d_flag) {
fprintf(
stderr,
"Free String: String refs zero or -ve! (\"%s\")\n",
str
);
}
#endif /* BUG_FREE */
if (!REFS(str) || --REFS(str) > 0) return;
s = findstring(str); /* moves it to head of table if found */
#ifndef BUG_FREE
/* It will be at the head of the hash chain */
base_table[StrHash(str)] = NEXT(str);
#else
base_table[hash_index] = NEXT(str);
#endif
num_distinct_strings--;
/* We know how much overhead malloc has */
bytes_distinct_strings -= shstr_malloced_size(str) << 2;
xfree(str-sizeof(short)-sizeof(char *));
return;
}
/*
* you think this looks bad! and we didn't even tell them about the
* GNU malloc overhead! tee hee!
*/
int add_string_status(verbose)
int verbose;
{
int net_bytes_distinct_strings, net_allocd_bytes;
if (verbose) {
add_message("\nShared string hash table:\n");
add_message("-------------------------\t Strings Bytes\n");
}
net_bytes_distinct_strings = (bytes_distinct_strings & (malloc_size_mask()<<2)) -
num_distinct_strings * (sizeof(char*) + sizeof(short));
add_message("Strings malloced\t\t%8d %8d + %d overhead\n",
num_distinct_strings, net_bytes_distinct_strings, overhead_bytes());
if (verbose) {
char print_buff[20];
stralloc_allocd_bytes &= malloc_size_mask();
net_allocd_bytes = (stralloc_allocd_bytes << 2) -
stralloc_allocd_strings * (sizeof(char*) + sizeof(short));
add_message("Total asked for\t\t\t%8d %8d\n",
stralloc_allocd_strings, net_allocd_bytes );
add_message("Space actually required/total string bytes %d%%\n",
(net_bytes_distinct_strings + overhead_bytes())*100 /
net_allocd_bytes );
sprintf(print_buff, " %7.3f\n",
(float)search_len / (float)num_str_searches);
add_message("Searches: %d Average search length:%s\n",
num_str_searches, print_buff);
}
return net_bytes_distinct_strings + overhead_bytes();
}