#include <stdio.h>
#include "config.h"
#include "lint.h"
/*
* stralloc.c - string management.
*
* All strings are stored in an extensible hash table, with reference counts.
* free_string decreases the reference count; if it gets to zero, the string
* will be deallocated. add_string increases the ref count if it finds a
* matching string, or allocates it if it cant. There is no way to allocate
* a string of a particular size to fill later (hash wont work!), so you'll
* have to copy things freom a static (or malloced and later freed) buffer -
* that is, if you want to avoid space leaks...
*
* Current overhead:
* 8 bytes per string (next pointer, and 2 shorts for length and refs)
* Strings are nearly all fairly short, so this is a significant overhead-
* there is also the 4 byte malloc overhead and the fact that malloc
* generally allocates blocks which are a power of 2 (should write my
* own best-fit malloc specialised to strings); then again, GNU malloc
* is bug free...
*/
/*
* there is also generic hash table management code, but strings can be shared
* (that was the point of this code), will be unique in the table,
* require a reference count, and are malloced, copied and freed at
* will by the string manager. Besides, I wrote this code first :-).
* Look at htable.c for the other code. It uses the Hash() function
* defined here, and requires hashed objects to have a pointer to the
* next element in the chain (which you specify when you call the functions).
*/
#define MAXSHORT (1 << (sizeof(short)*8 - 2))
#define WORD_ALIGN_BIT 0x3 /* these are 0 for aligned ptrs */
char *xalloc ();
char *strcpy ();
static int num_distinct_strings = 0;
int bytes_distinct_strings = 0;
static int allocd_strings = 0;
static int allocd_bytes = 0;
int overhead_bytes = 0;
static int search_len = 0;
static int num_searches = 0;
/*
* strings are stored:
* (char * next) (short numrefs) string
* ^
* pointer points here
*/
#define NEXT(str) (*(char **)((char *) (str) - sizeof(short) \
- sizeof(int)))
#define REFS(str) (*(short *)((char *) (str) - sizeof(short)))
/*
* hash table - list of pointers to heads of string chains.
* Each string in chain has a pointer to the next string and a
* reference count (char *, int) stored just before the start of the string.
* HTABLE_SIZE is in config.h, and should be a prime, probably between
* 1000 and 5000.
*/
static char **base_table = 0;
static
init_strings ()
{
int x;
base_table = (char **) xalloc (sizeof (char *) * HTABLE_SIZE);
overhead_bytes += (sizeof (char *) * HTABLE_SIZE);
for (x = 0; x < HTABLE_SIZE; x++)
base_table[x] = 0;
}
/*
* generic hash function. This is probably overkill; I haven't checked the
* stats for different prime numbers, etc.
*/
static int
StrHash (s)
char *s;
{
int h = 0;
if (!base_table)
init_strings ();
while (*s)
h = (h * P1 + *(s++) * P2 + P3) % HTABLE_SIZE;
return (h);
}
/*
* Looks for a string in the table. If it finds it, returns a pointer to
* the start of the string part, and moves the entry for the string to
* the head of the pointer chain. One thing (blech!) - puts the previous
* pointer on the hash chain into fs_prev.
*/
static char *
findstring (s)
char *s;
{
char *curr, *prev;
char *str;
int h = StrHash (s);
curr = base_table[h];
prev = 0;
num_searches++;
while (curr)
{
search_len++;
if (*curr == *s && !strcmp (curr, s))
{ /* found it */
if (prev)
{ /* not at head of list */
NEXT (prev) = NEXT (curr);
NEXT (curr) = base_table[h];
base_table[h] = curr;
}
return (curr); /* pointer to string */
}
prev = curr;
curr = NEXT (curr);
}
return (0); /* not found */
}
/*
* Make a space for a string. This is rather nasty, as we want to use
* alloc/free, but malloc overhead is generally severe. Later, we
* will have to compact strings...
*/
static char *
alloc_new_string (string)
char *string;
{
char *s = xalloc (1 + strlen (string) + sizeof (char *) + sizeof (short));
int h = StrHash (string);
s += sizeof (char *) + sizeof (short);
strcpy (s, string);
REFS (s) = 0;
NEXT (s) = base_table[h];
base_table[h] = s;
num_distinct_strings++;
bytes_distinct_strings += 4 + (strlen (s) + 3) & ~3;
overhead_bytes += sizeof (char *) + sizeof (short);
return (s);
}
char *
make_shared_string (str)
char *str;
{
char *s;
s = findstring (str);
if (!s)
s = alloc_new_string (str);
REFS (s)++;
allocd_strings++;
allocd_bytes += 4 + (strlen (str) + 3) & ~3;
return (s);
}
/*
* free_string - reduce the ref count on a string. Various sanity
* checks applied, the best of them being that a add_string allocated
* string will point to a word boundary + sizeof(int)+sizeof(short),
* since malloc always allocates on a word boundary.
* On systems where a short is 1/2 a word, this gives an easy check
* to see whather we did in fact allocate it.
*
* Don't worry about the overhead for all those checks!
*/
/*
* function called on free_string detected errors; things return checked(s).
*/
static
checked (s, str)
char *s, *str;
{
fprintf (stderr, "%s (\"%s\")\n", s, str);
fatal (s); /* brutal - debugging */
}
void
free_string (str)
char *str;
{
char *s;
allocd_strings--;
allocd_bytes -= 4 + (strlen (str) + 3) & ~3;
#ifdef BUG_FREE
if (REFS (str) > MAXSHORT)
return;
REFS (str)--;
if (REFS (str) > 0)
return;
#endif /* BUG_FREE */
#ifdef dcheck /* GNU malloc range check flag */
{
int align;
align = (((int) str) - sizeof (int) - sizeof (short)) & WORD_B_MASK;
if (align)
checked ("Free string: improperly aligned string!", str);
}
#endif /* dcheck */
s = findstring (str); /* moves it to head of table if found */
if (!s)
{
checked ("Free string: not found in string table!", str);
return;
}
if (s != str)
{
checked ("Free string: string didnt hash to the same spot!", str);
return;
}
if (REFS (s) > MAXSHORT)
return;
if (REFS (s) <= 0)
{
checked ("Free String: String refs zero or -ve!", str);
return;
}
REFS (s)--;
if (REFS (s) > 0)
return;
/* It will be at the head of the hash chain */
base_table[StrHash (str)] = NEXT (s);
num_distinct_strings--;
/* We know how much overhead malloc has */
bytes_distinct_strings -= 4 + (strlen (s) + 3) & ~3;
overhead_bytes -= sizeof (short) + sizeof (char *);
free (s - sizeof (short) - sizeof (char *));
return;
}
/*
* you think this looks bad! and we didn't even tell them about the
* GNU malloc overhead! tee hee!
*/
static char stbuf[100];
int
add_string_status ()
{
add_message (" Strings\t Bytes\n");
add_message ("Strings malloced %5d %10d + %d overhead\n",
num_distinct_strings, bytes_distinct_strings, overhead_bytes);
add_message ("Total asked for: %5d %10d\n",
allocd_strings, allocd_bytes);
add_message ("Space actually required/total string bytes: %d%%\n",
(bytes_distinct_strings + overhead_bytes) * 100 / allocd_bytes);
add_message ("Searches: %d Average search length: %6.3f\n",
num_searches, (double) search_len / num_searches);
return (bytes_distinct_strings + overhead_bytes);
}