#include <stdio.h> #include "config.h" #include "lint.h" /* * stralloc.c - string management. * * All strings are stored in an extensible hash table, with reference counts. * free_string decreases the reference count; if it gets to zero, the string * will be deallocated. add_string increases the ref count if it finds a * matching string, or allocates it if it cant. There is no way to allocate * a string of a particular size to fill later (hash wont work!), so you'll * have to copy things freom a static (or malloced and later freed) buffer - * that is, if you want to avoid space leaks... * * Current overhead: * 8 bytes per string (next pointer, and 2 shorts for length and refs) * Strings are nearly all fairly short, so this is a significant overhead- * there is also the 4 byte malloc overhead and the fact that malloc * generally allocates blocks which are a power of 2 (should write my * own best-fit malloc specialised to strings); then again, GNU malloc * is bug free... */ /* * there is also generic hash table management code, but strings can be shared * (that was the point of this code), will be unique in the table, * require a reference count, and are malloced, copied and freed at * will by the string manager. Besides, I wrote this code first :-). * Look at htable.c for the other code. It uses the Hash() function * defined here, and requires hashed objects to have a pointer to the * next element in the chain (which you specify when you call the functions). */ #define MAXSHORT (1 << (sizeof(short)*8 - 2)) #define WORD_ALIGN_BIT 0x3 /* these are 0 for aligned ptrs */ char *xalloc (); char *strcpy (); static int num_distinct_strings = 0; int bytes_distinct_strings = 0; static int allocd_strings = 0; static int allocd_bytes = 0; int overhead_bytes = 0; static int search_len = 0; static int num_searches = 0; /* * strings are stored: * (char * next) (short numrefs) string * ^ * pointer points here */ #define NEXT(str) (*(char **)((char *) (str) - sizeof(short) \ - sizeof(int))) #define REFS(str) (*(short *)((char *) (str) - sizeof(short))) /* * hash table - list of pointers to heads of string chains. * Each string in chain has a pointer to the next string and a * reference count (char *, int) stored just before the start of the string. * HTABLE_SIZE is in config.h, and should be a prime, probably between * 1000 and 5000. */ static char **base_table = 0; static init_strings () { int x; base_table = (char **) xalloc (sizeof (char *) * HTABLE_SIZE); overhead_bytes += (sizeof (char *) * HTABLE_SIZE); for (x = 0; x < HTABLE_SIZE; x++) base_table[x] = 0; } /* * generic hash function. This is probably overkill; I haven't checked the * stats for different prime numbers, etc. */ static int StrHash (s) char *s; { int h = 0; if (!base_table) init_strings (); while (*s) h = (h * P1 + *(s++) * P2 + P3) % HTABLE_SIZE; return (h); } /* * Looks for a string in the table. If it finds it, returns a pointer to * the start of the string part, and moves the entry for the string to * the head of the pointer chain. One thing (blech!) - puts the previous * pointer on the hash chain into fs_prev. */ static char * findstring (s) char *s; { char *curr, *prev; char *str; int h = StrHash (s); curr = base_table[h]; prev = 0; num_searches++; while (curr) { search_len++; if (*curr == *s && !strcmp (curr, s)) { /* found it */ if (prev) { /* not at head of list */ NEXT (prev) = NEXT (curr); NEXT (curr) = base_table[h]; base_table[h] = curr; } return (curr); /* pointer to string */ } prev = curr; curr = NEXT (curr); } return (0); /* not found */ } /* * Make a space for a string. This is rather nasty, as we want to use * alloc/free, but malloc overhead is generally severe. Later, we * will have to compact strings... */ static char * alloc_new_string (string) char *string; { char *s = xalloc (1 + strlen (string) + sizeof (char *) + sizeof (short)); int h = StrHash (string); s += sizeof (char *) + sizeof (short); strcpy (s, string); REFS (s) = 0; NEXT (s) = base_table[h]; base_table[h] = s; num_distinct_strings++; bytes_distinct_strings += 4 + (strlen (s) + 3) & ~3; overhead_bytes += sizeof (char *) + sizeof (short); return (s); } char * make_shared_string (str) char *str; { char *s; s = findstring (str); if (!s) s = alloc_new_string (str); REFS (s)++; allocd_strings++; allocd_bytes += 4 + (strlen (str) + 3) & ~3; return (s); } /* * free_string - reduce the ref count on a string. Various sanity * checks applied, the best of them being that a add_string allocated * string will point to a word boundary + sizeof(int)+sizeof(short), * since malloc always allocates on a word boundary. * On systems where a short is 1/2 a word, this gives an easy check * to see whather we did in fact allocate it. * * Don't worry about the overhead for all those checks! */ /* * function called on free_string detected errors; things return checked(s). */ static checked (s, str) char *s, *str; { fprintf (stderr, "%s (\"%s\")\n", s, str); fatal (s); /* brutal - debugging */ } void free_string (str) char *str; { char *s; allocd_strings--; allocd_bytes -= 4 + (strlen (str) + 3) & ~3; #ifdef BUG_FREE if (REFS (str) > MAXSHORT) return; REFS (str)--; if (REFS (str) > 0) return; #endif /* BUG_FREE */ #ifdef dcheck /* GNU malloc range check flag */ { int align; align = (((int) str) - sizeof (int) - sizeof (short)) & WORD_B_MASK; if (align) checked ("Free string: improperly aligned string!", str); } #endif /* dcheck */ s = findstring (str); /* moves it to head of table if found */ if (!s) { checked ("Free string: not found in string table!", str); return; } if (s != str) { checked ("Free string: string didnt hash to the same spot!", str); return; } if (REFS (s) > MAXSHORT) return; if (REFS (s) <= 0) { checked ("Free String: String refs zero or -ve!", str); return; } REFS (s)--; if (REFS (s) > 0) return; /* It will be at the head of the hash chain */ base_table[StrHash (str)] = NEXT (s); num_distinct_strings--; /* We know how much overhead malloc has */ bytes_distinct_strings -= 4 + (strlen (s) + 3) & ~3; overhead_bytes -= sizeof (short) + sizeof (char *); free (s - sizeof (short) - sizeof (char *)); return; } /* * you think this looks bad! and we didn't even tell them about the * GNU malloc overhead! tee hee! */ static char stbuf[100]; int add_string_status () { add_message (" Strings\t Bytes\n"); add_message ("Strings malloced %5d %10d + %d overhead\n", num_distinct_strings, bytes_distinct_strings, overhead_bytes); add_message ("Total asked for: %5d %10d\n", allocd_strings, allocd_bytes); add_message ("Space actually required/total string bytes: %d%%\n", (bytes_distinct_strings + overhead_bytes) * 100 / allocd_bytes); add_message ("Searches: %d Average search length: %6.3f\n", num_searches, (double) search_len / num_searches); return (bytes_distinct_strings + overhead_bytes); }