#include <stdio.h>
#include <string.h>
#define STRALLOC
#include "config.h"
#include "lint.h"
#ifdef MALLOC_smalloc
extern int malloc_size_mask(void);
#define malloced_size(str) ( *( (unsigned long *)\
(str-sizeof(unsigned long)-sizeof(char*)-sizeof(unsigned short))\
) )
#else
#define malloc_size_mask() (~0)
#define malloced_size(str) ( (4 + 4 + 2 + strlen(str) + 1 + 3) >> 2)
#endif
/*
* stralloc.c - string management.
* This code was written by Mike McGaughy. It was placed in the public domain
* in April 1993.
*
* All strings are stored in an extensible hash table, with reference counts.
* free_string decreases the reference count; if it gets to zero, the string
* will be deallocated. add_string increases the ref count if it finds a
* matching string, or allocates it if it cant. There is no way to allocate
* a string of a particular size to fill later (hash wont work!), so you'll
* have to copy things from a static (or malloced and later freed) buffer -
* that is, if you want to avoid space leaks...
*
* Current overhead:
* 8 bytes per string (next pointer, and 2 shorts for length and refs)
* Strings are nearly all fairly short, so this is a significant overhead-
* there is also the 4 byte malloc overhead and the fact that malloc
* generally allocates blocks which are a power of 2 (should write my
* own best-fit malloc specialised to strings); then again, GNU malloc
* is bug free...
*/
/*
* there is also generic hash table management code, but strings can be shared
* (that was the point of this code), will be unique in the table,
* require a reference count, and are malloced, copied and freed at
* will by the string manager. Besides, I wrote this code first :-).
* Look at htable.c for the other code. It uses the Hash() function
* defined here, and requires hashed objects to have a pointer to the
* next element in the chain (which you specify when you call the functions).
*/
#define MAXSHORT (1 << (sizeof(short)*8 - 2))
#define WORD_ALIGN_BIT (sizeof(void*)-1) /* these are 0 for aligned ptrs */
int num_distinct_strings = 0;
int bytes_distinct_strings = 0;
static int allocd_strings = 0;
static int allocd_bytes = 0;
static int search_len = 0;
static int num_str_searches = 0;
/*
* strings are stored:
* (char * next) (short numrefs) string
* ^
* pointer points here
*/
#define NEXT(str) (*(char **)((char *) (str) - sizeof(unsigned short)\
- sizeof(char *)))
#define REFS(str) (*(unsigned short *)((char *) (str)\
- sizeof(unsigned short)))
/*
* hash table - list of pointers to heads of string chains.
* Each string in chain has a pointer to the next string and a
* reference count (char *, int) stored just before the start of the string.
* HTABLE_SIZE is in config.h, and should be a prime, probably between
* 1000 and 5000.
*/
static char *(base_table[HTABLE_SIZE]);
void
init_shared_strings()
{
}
int
overhead_bytes()
{
return (sizeof(char *) * HTABLE_SIZE) +
num_distinct_strings * (sizeof(char *) + sizeof(short));
}
/*
* generic hash function. This is probably overkill; I haven't checked the
* stats for different prime numbers, etc.
*/
#if defined(ultrix) && !defined(__GNUC__)
#define StrHash(s) (hashstr((s), 20, HTABLE_SIZE))
#else
#if BITNUM(HTABLE_SIZE) == 1
/* This one only works for even power-of-2 table size, but is faster */
#define StrHash(s) (hashstr16((s), 20) & ((HTABLE_SIZE)-1))
#else
#define StrHash(s) (hashstr((s), 20, HTABLE_SIZE))
#endif
#endif
/*
* Looks for a string in the table. If it finds it, returns a pointer to
* the start of the string part, and moves the entry for the string to
* the head of the pointer chain. One thing (blech!) - puts the previous
* pointer on the hash chain into fs_prev.
*/
static int hash_index; /* to be used by alloc_new_string
without further notice */
/* is also used opaque inside free_string */
char *
findstring(char *s)
{
char * curr, *prev;
int h = StrHash(s);
hash_index = h;
curr = base_table[h];
prev = 0;
num_str_searches++;
while (curr)
{
search_len++;
if (*curr == *s && !strcmp(curr, s))
{ /* found it */
if (prev)
{ /* not at head of list */
NEXT(prev) = NEXT(curr);
NEXT(curr) = base_table[h];
base_table[h] = curr;
}
return(curr); /* pointer to string */
}
prev = curr;
curr = NEXT(curr);
}
return(0); /* not found */
}
/*
* Make a space for a string. This is rather nasty, as we want to use
* alloc/free, but malloc overhead is generally severe. Later, we
* will have to compact strings...
*/
static INLINE char *
alloc_new_string(char *string)
{
long length = strlen(string);
char * s = xalloc(1 + length + sizeof(char *) + sizeof(short));
int h = hash_index;
s += sizeof(char *) + sizeof(short);
strcpy(s, string);
NEXT(s) = base_table[h];
base_table[h] = s;
num_distinct_strings++;
bytes_distinct_strings += 4 + (4 + 2 + length + 1 + 3) & ~3;
return(s);
}
char *
make_shared_string(char *str)
{
char * s;
s = findstring(str);
if (!s)
{
s = alloc_new_string(str);
REFS(s) = 1;
}
else
{
if (REFS(s))
REFS(s)++;
}
allocd_strings++;
allocd_bytes += malloced_size(s);
return(s);
}
INLINE void
increment_string_ref(char *str)
{
allocd_strings++;
allocd_bytes += malloced_size(str);
if (REFS(str))
REFS(str)++;
}
INLINE void
decrement_string_ref(char *str)
{
allocd_strings--;
allocd_bytes -= malloced_size(str);
if (REFS(str))
REFS(str)--;
}
/*
* free_string - reduce the ref count on a string. Various sanity
* checks applied, the best of them being that a add_string allocated
* string will point to a word boundary + sizeof(int)+sizeof(short),
* since malloc always allocates on a word boundary.
* On systems where a short is 1/2 a word, this gives an easy check
* to see whather we did in fact allocate it.
*
* Don't worry about the overhead for all those checks!
*/
void
free_string(char *str)
{
char *s;
allocd_strings--;
allocd_bytes -= malloced_size(str);
if (!REFS(str) || --REFS(str) > 0)
return;
s = findstring(str); /* moves it to head of table if found */
base_table[hash_index] = NEXT(str);
num_distinct_strings--;
/* We know how much overhead malloc has */
bytes_distinct_strings -= malloced_size(str) << 2;
free(str - sizeof(short) - sizeof(char *));
return;
}
/*
* you think this looks bad! and we didn't even tell them about the
* GNU malloc overhead! tee hee!
*/
void
add_string_status(char *debinf)
{
int net_bytes_distinct_strings, net_allocd_bytes;
strcat(debinf, "\nShared string hash table:\n");
strcat(debinf, "-------------------------\t Strings Bytes\n");
net_bytes_distinct_strings =
(bytes_distinct_strings & (malloc_size_mask()<<2)) -
num_distinct_strings * (sizeof(char*) + sizeof(short));
allocd_bytes &= malloc_size_mask();
net_allocd_bytes = (allocd_bytes << 2) -
allocd_strings * (sizeof(char*) + sizeof(short));
sprintf(debinf + strlen(debinf), "Total asked for\t\t\t%8d %8d\n",
allocd_strings, net_allocd_bytes );
sprintf(debinf + strlen(debinf),
"Space actually required/total string bytes %d%%\n",
(net_bytes_distinct_strings + overhead_bytes())*100 /
net_allocd_bytes );
sprintf(debinf + strlen(debinf),
"Searches: %d Average search length:%7.3f\n",
num_str_searches, (double)search_len / (double)num_str_searches);
}