#include <stdio.h> #include <string.h> #define STRALLOC #include "config.h" #include "lint.h" #ifdef MALLOC_smalloc extern int malloc_size_mask(void); #define malloced_size(str) ( *( (unsigned long *)\ (str-sizeof(unsigned long)-sizeof(char*)-sizeof(unsigned short))\ ) ) #else #define malloc_size_mask() (~0) #define malloced_size(str) ( (4 + 4 + 2 + strlen(str) + 1 + 3) >> 2) #endif /* * stralloc.c - string management. * This code was written by Mike McGaughy. It was placed in the public domain * in April 1993. * * All strings are stored in an extensible hash table, with reference counts. * free_string decreases the reference count; if it gets to zero, the string * will be deallocated. add_string increases the ref count if it finds a * matching string, or allocates it if it cant. There is no way to allocate * a string of a particular size to fill later (hash wont work!), so you'll * have to copy things from a static (or malloced and later freed) buffer - * that is, if you want to avoid space leaks... * * Current overhead: * 8 bytes per string (next pointer, and 2 shorts for length and refs) * Strings are nearly all fairly short, so this is a significant overhead- * there is also the 4 byte malloc overhead and the fact that malloc * generally allocates blocks which are a power of 2 (should write my * own best-fit malloc specialised to strings); then again, GNU malloc * is bug free... */ /* * there is also generic hash table management code, but strings can be shared * (that was the point of this code), will be unique in the table, * require a reference count, and are malloced, copied and freed at * will by the string manager. Besides, I wrote this code first :-). * Look at htable.c for the other code. It uses the Hash() function * defined here, and requires hashed objects to have a pointer to the * next element in the chain (which you specify when you call the functions). */ #define MAXSHORT (1 << (sizeof(short)*8 - 2)) #define WORD_ALIGN_BIT (sizeof(void*)-1) /* these are 0 for aligned ptrs */ int num_distinct_strings = 0; int bytes_distinct_strings = 0; static int allocd_strings = 0; static int allocd_bytes = 0; static int search_len = 0; static int num_str_searches = 0; /* * strings are stored: * (char * next) (short numrefs) string * ^ * pointer points here */ #define NEXT(str) (*(char **)((char *) (str) - sizeof(unsigned short)\ - sizeof(char *))) #define REFS(str) (*(unsigned short *)((char *) (str)\ - sizeof(unsigned short))) /* * hash table - list of pointers to heads of string chains. * Each string in chain has a pointer to the next string and a * reference count (char *, int) stored just before the start of the string. * HTABLE_SIZE is in config.h, and should be a prime, probably between * 1000 and 5000. */ static char *(base_table[HTABLE_SIZE]); void init_shared_strings() { } int overhead_bytes() { return (sizeof(char *) * HTABLE_SIZE) + num_distinct_strings * (sizeof(char *) + sizeof(short)); } /* * generic hash function. This is probably overkill; I haven't checked the * stats for different prime numbers, etc. */ #if defined(ultrix) && !defined(__GNUC__) #define StrHash(s) (hashstr((s), 20, HTABLE_SIZE)) #else #if BITNUM(HTABLE_SIZE) == 1 /* This one only works for even power-of-2 table size, but is faster */ #define StrHash(s) (hashstr16((s), 20) & ((HTABLE_SIZE)-1)) #else #define StrHash(s) (hashstr((s), 20, HTABLE_SIZE)) #endif #endif /* * Looks for a string in the table. If it finds it, returns a pointer to * the start of the string part, and moves the entry for the string to * the head of the pointer chain. One thing (blech!) - puts the previous * pointer on the hash chain into fs_prev. */ static int hash_index; /* to be used by alloc_new_string without further notice */ /* is also used opaque inside free_string */ char * findstring(char *s) { char * curr, *prev; int h = StrHash(s); hash_index = h; curr = base_table[h]; prev = 0; num_str_searches++; while (curr) { search_len++; if (*curr == *s && !strcmp(curr, s)) { /* found it */ if (prev) { /* not at head of list */ NEXT(prev) = NEXT(curr); NEXT(curr) = base_table[h]; base_table[h] = curr; } return(curr); /* pointer to string */ } prev = curr; curr = NEXT(curr); } return(0); /* not found */ } /* * Make a space for a string. This is rather nasty, as we want to use * alloc/free, but malloc overhead is generally severe. Later, we * will have to compact strings... */ static INLINE char * alloc_new_string(char *string) { long length = strlen(string); char * s = xalloc(1 + length + sizeof(char *) + sizeof(short)); int h = hash_index; s += sizeof(char *) + sizeof(short); strcpy(s, string); NEXT(s) = base_table[h]; base_table[h] = s; num_distinct_strings++; bytes_distinct_strings += 4 + (4 + 2 + length + 1 + 3) & ~3; return(s); } char * make_shared_string(char *str) { char * s; s = findstring(str); if (!s) { s = alloc_new_string(str); REFS(s) = 1; } else { if (REFS(s)) REFS(s)++; } allocd_strings++; allocd_bytes += malloced_size(s); return(s); } INLINE void increment_string_ref(char *str) { allocd_strings++; allocd_bytes += malloced_size(str); if (REFS(str)) REFS(str)++; } INLINE void decrement_string_ref(char *str) { allocd_strings--; allocd_bytes -= malloced_size(str); if (REFS(str)) REFS(str)--; } /* * free_string - reduce the ref count on a string. Various sanity * checks applied, the best of them being that a add_string allocated * string will point to a word boundary + sizeof(int)+sizeof(short), * since malloc always allocates on a word boundary. * On systems where a short is 1/2 a word, this gives an easy check * to see whather we did in fact allocate it. * * Don't worry about the overhead for all those checks! */ void free_string(char *str) { char *s; allocd_strings--; allocd_bytes -= malloced_size(str); if (!REFS(str) || --REFS(str) > 0) return; s = findstring(str); /* moves it to head of table if found */ base_table[hash_index] = NEXT(str); num_distinct_strings--; /* We know how much overhead malloc has */ bytes_distinct_strings -= malloced_size(str) << 2; free(str - sizeof(short) - sizeof(char *)); return; } /* * you think this looks bad! and we didn't even tell them about the * GNU malloc overhead! tee hee! */ void add_string_status(char *debinf) { int net_bytes_distinct_strings, net_allocd_bytes; strcat(debinf, "\nShared string hash table:\n"); strcat(debinf, "-------------------------\t Strings Bytes\n"); net_bytes_distinct_strings = (bytes_distinct_strings & (malloc_size_mask()<<2)) - num_distinct_strings * (sizeof(char*) + sizeof(short)); allocd_bytes &= malloc_size_mask(); net_allocd_bytes = (allocd_bytes << 2) - allocd_strings * (sizeof(char*) + sizeof(short)); sprintf(debinf + strlen(debinf), "Total asked for\t\t\t%8d %8d\n", allocd_strings, net_allocd_bytes ); sprintf(debinf + strlen(debinf), "Space actually required/total string bytes %d%%\n", (net_bytes_distinct_strings + overhead_bytes())*100 / net_allocd_bytes ); sprintf(debinf + strlen(debinf), "Searches: %d Average search length:%7.3f\n", num_str_searches, (double)search_len / (double)num_str_searches); }