#include <stdio.h>
#include <string.h>
#include "config.h"
#include "lint.h"
#include "stralloc.h"
/*
this code is not the same as the original code. I cleaned it up to
use structs to: 1) make it easier to check the driver for memory leaks
(using MallocDebug on a NeXT) and 2) because it looks cleaner this way.
--Truilkan@TMI 92/04/19
modified to make calls to strlen() unnecessary and to remove superfluous
calls to findblock(). -- Truilkan@TMI, 1992/08/05
*/
/*
* stralloc.c - string management.
*
* All strings are stored in an extensible hash table, with reference counts.
* free_string decreases the reference count; if it gets to zero, the string
* will be deallocated. add_string increases the ref count if it finds a
* matching string, or allocates it if it cant. There is no way to allocate
* a string of a particular size to fill later (hash wont work!), so you'll
* have to copy things from a static (or malloced and later freed) buffer -
* that is, if you want to avoid space leaks...
*
* Current overhead:
* sizeof(block_t) per string (string pointer, next pointer, and a short
* for refs). Strings are nearly all fairly short, so this is a significant
* overhead - there is also the 4 byte malloc overhead and the fact that
* malloc generally allocates blocks which are a power of 2 (should write my
* own best-fit malloc specialised to strings); then again, GNU malloc
* is bug free...
*/
/*
* there is also generic hash table management code, but strings can be shared
* (that was the point of this code), will be unique in the table,
* require a reference count, and are malloced, copied and freed at
* will by the string manager. Besides, I wrote this code first :-).
* Look at htable.c for the other code. It uses the Hash() function
* defined here, and requires hashed objects to have a pointer to the
* next element in the chain (which you specify when you call the functions).
*/
#define MAXSHORT (unsigned short)((1 << (sizeof(short)*8)) - 1)
extern int max_string_length;
char * xalloc();
#if !defined(_AIX) && !defined(LATTICE)
char * strcpy();
#endif
static int num_distinct_strings = 0;
int bytes_distinct_strings = 0;
static int allocd_strings = 0;
static int allocd_bytes = 0;
int overhead_bytes = 0;
static int search_len = 0;
static int num_str_searches = 0;
#define StrHash(s) (whashstr((s), 20) & (htable_size_minus_one))
#define hfindblock(s, h) sfindblock(s, h = StrHash(s))
#define findblock(s) sfindblock(s, StrHash(s))
/*
* hash table - list of pointers to heads of string chains.
* Each string in chain has a pointer to the next string and a
* reference count (char *, int) stored just before the start of the string.
* HTABLE_SIZE is in config.h, and should be a prime, probably between
* 1000 and 5000.
*/
static block_t **base_table = (block_t **)0;
static int htable_size;
static int htable_size_minus_one;
void
init_strings()
{
int x;
/* ensure that htable size is a power of 2 */
for (htable_size = 1; htable_size <= HTABLE_SIZE; htable_size *= 2)
;
htable_size_minus_one = htable_size - 1;
base_table = (block_t **)
DXALLOC(sizeof(block_t *) * htable_size, 116, "init_strings");
overhead_bytes += (sizeof(block_t *) * htable_size);
for (x = 0; x < htable_size; x++) {
base_table[x] = 0;
}
}
/*
* Looks for a string in the table. If it finds it, returns a pointer to
* the start of the string part, and moves the entry for the string to
* the head of the pointer chain. One thing (blech!) - puts the previous
* pointer on the hash chain into fs_prev.
*/
INLINE block_t *
sfindblock(s, h)
char *s;
int h;
{
block_t *curr, *prev;
curr = base_table[h];
prev = NULL;
num_str_searches++;
while (curr) {
search_len++;
if (*(STRING(curr)) == *s && !strcmp(STRING(curr), s)) { /* found it */
if (prev) { /* not at head of list */
NEXT(prev) = NEXT(curr);
NEXT(curr) = base_table[h];
base_table[h] = curr;
}
return(curr); /* pointer to string */
}
prev = curr;
curr = NEXT(curr);
}
return((block_t *)0); /* not found */
}
char *
findstring(s)
char *s;
{
block_t *b;
if ((b = findblock(s))) {
return STRING(b);
} else {
return(NULL);
}
}
/* alloc_new_string: Make a space for a string. */
INLINE static block_t *
alloc_new_string(string, h)
char * string;
int h;
{
block_t *b;
int len = strlen(string);
int size;
if (len > max_string_length) {
len = max_string_length;
}
size = sizeof(block_t) + len + 1;
b = (block_t *)DXALLOC(size, 117, "alloc_new_string");
strncpy(STRING(b), string, len);
STRING(b)[len] = '\0'; /* strncpy doesn't put on \0 if 'from' too long */
SIZE(b) = size;
REFS(b) = 0;
NEXT(b) = base_table[h];
base_table[h] = b;
num_distinct_strings++;
bytes_distinct_strings += size;
overhead_bytes += sizeof(block_t);
return(b);
}
char *
make_shared_string(str)
char *str;
{
block_t *b;
int h;
b = hfindblock(str, h); /* hfindblock macro sets h = StrHash(s) */
if (!b) {
b = alloc_new_string(str, h);
}
/* stop keeping track of ref counts at the point where overflow would
occur.
*/
if (REFS(b) < MAXSHORT) {
REFS(b)++;
}
allocd_strings++;
allocd_bytes += SIZE(b);
return(STRING(b));
}
/*
ref_string: Fatal to call this function on a string that isn't shared.
*/
char *
ref_string(str)
char *str;
{
block_t *b;
b = BLOCK(str);
#ifdef DEBUG
if (b != findblock(str)) {
fatal("stralloc.c: called ref_string on non-shared string: %s.\n",str);
}
#endif /* defined(DEBUG) */
REFS(b)++;
allocd_strings++;
allocd_bytes += SIZE(b);
return STRING(b);
}
/*
* function called on free_string detected errors; things return checked(s).
*/
static void
checked(s, str)
char *s, *str;
{
fprintf(stderr, "%s (\"%s\")\n", s, str);
fatal(s); /* brutal - debugging */
}
/* free_string: fatal to call free_string on a non-shared string */
/*
* free_string - reduce the ref count on a string. Various sanity
* checks applied.
*/
void
free_string(str)
char * str;
{
block_t *b;
b = BLOCK(str);
#ifdef DEBUG
if (b != findblock(str)) {
fatal("stralloc.c: free_string called on non-shared string: %s.\n",str);
}
#endif /* defined(DEBUG) */
allocd_strings--;
allocd_bytes -= SIZE(b);
/* if a string has been ref'd MAXSHORT times then we assume that its
used often enough to justify never freeing it.
*/
if (REFS(b) >= MAXSHORT)
return;
REFS(b)--;
if (REFS(b) > 0) return;
b = findblock(str); /* findblock moves str to head of hash chain */
#ifndef BUG_FREE
if (!b) {
checked("free_string: not found in string table!", str);
return;
}
if (STRING(b) != str) {
checked("free_string: string didn't hash to the same spot!", str);
return;
}
#endif /* BUG_FREE */
/* It will be at the head of the hash chain */
base_table[StrHash(str)] = NEXT(b);
num_distinct_strings--;
bytes_distinct_strings -= SIZE(b);
overhead_bytes -= sizeof(block_t);
FREE(b);
}
/*
* you think this looks bad! and we didn't even tell them about the
* GNU malloc overhead! tee hee!
*/
int
add_string_status(verbose)
int verbose;
{
if (verbose == 1) {
add_message("\nShared string hash table:\n");
add_message("-------------------------\t Strings Bytes\n");
}
if (verbose != -1)
add_message("Strings malloced\t\t%8d %8d + %d overhead\n",
num_distinct_strings, bytes_distinct_strings, overhead_bytes);
if (verbose == 1) {
add_message("Total asked for\t\t\t%8d %8d\n",
allocd_strings, allocd_bytes);
add_message("Space actually required/total string bytes %d%%\n",
(bytes_distinct_strings + overhead_bytes)*100 / allocd_bytes);
add_message("Searches: %d Average search length: %6.3f\n",
num_str_searches, (double)search_len / num_str_searches);
}
return(bytes_distinct_strings + overhead_bytes);
}