#include <stdio.h>
#include <string.h>

#define STRALLOC
#include "config.h"
#include "lint.h"

#ifdef MALLOC_smalloc
extern int malloc_size_mask(void);
#define malloced_size(str) ( *( (unsigned long *)\
		(str-sizeof(unsigned long)-sizeof(char*)-sizeof(unsigned short))\
	) )
#else
#define malloc_size_mask() (~0)
#define malloced_size(str) ( (4 + 4 + 2 + strlen(str) + 1 + 3) >> 2)
#endif

/*
 * stralloc.c - string management.
 * This code was written by Mike McGaughy. It was placed in the public domain
 * in April 1993.
 *
 * All strings are stored in an extensible hash table, with reference counts.
 * free_string decreases the reference count; if it gets to zero, the string
 * will be deallocated.  add_string increases the ref count if it finds a
 * matching string, or allocates it if it cant.  There is no way to allocate
 * a string of a particular size to fill later (hash wont work!), so you'll
 * have to copy things from a static (or malloced and later freed) buffer -
 * that is, if you want to avoid space leaks...
 *
 * Current overhead:
 *	8 bytes per string (next pointer, and 2 shorts for length and refs)
 *	Strings are nearly all fairly short, so this is a significant overhead-
 *	there is also the 4 byte malloc overhead and the fact that malloc
 *	generally allocates blocks which are a power of 2 (should write my
 *	own best-fit malloc specialised to strings); then again, GNU malloc
 *	is bug free...
 */

/*
 * there is also generic hash table management code, but strings can be shared
 * (that was the point of this code), will be unique in the table,
 * require a reference count, and are malloced, copied and freed at
 * will by the string manager.  Besides, I wrote this code first :-).
 * Look at htable.c for the other code.  It uses the Hash() function
 * defined here, and requires hashed objects to have a pointer to the
 * next element in the chain (which you specify when you call the functions).
 */

#define	MAXSHORT	(1 << (sizeof(short)*8 - 2))
#define WORD_ALIGN_BIT  (sizeof(void*)-1)    /* these are 0 for aligned ptrs */

int num_distinct_strings = 0;
int bytes_distinct_strings = 0;
static int allocd_strings = 0;
static int allocd_bytes = 0;
static int search_len = 0;
static int num_str_searches = 0;

/*
 * strings are stored:
 *	(char * next) (short numrefs) string
 *				      ^
 *				pointer points here
 */

#define	NEXT(str)	(*(char **)((char *) (str) - sizeof(unsigned short)\
						   - sizeof(char *)))
#define	REFS(str)	(*(unsigned short *)((char *) (str)\
						   - sizeof(unsigned short)))

/*
 * hash table - list of pointers to heads of string chains.
 * Each string in chain has a pointer to the next string and a
 * reference count (char *, int) stored just before the start of the string.
 * HTABLE_SIZE is in config.h, and should be a prime, probably between
 * 1000 and 5000.
 */

static char *(base_table[HTABLE_SIZE]);

void 
init_shared_strings()
{
}

int 
overhead_bytes() 
{
    return (sizeof(char *) * HTABLE_SIZE) +
	num_distinct_strings * (sizeof(char *) + sizeof(short));
}

/*
 * generic hash function.  This is probably overkill; I haven't checked the
 * stats for different prime numbers, etc.
 */

#if defined(ultrix) && !defined(__GNUC__)
#define StrHash(s) (hashstr((s), 20, HTABLE_SIZE))
#else
#if BITNUM(HTABLE_SIZE) == 1
/* This one only works for even power-of-2 table size, but is faster */
#define StrHash(s) (hashstr16((s), 20) & ((HTABLE_SIZE)-1))
#else
#define StrHash(s) (hashstr((s), 20, HTABLE_SIZE))
#endif
#endif

/*
 * Looks for a string in the table.  If it finds it, returns a pointer to
 * the start of the string part, and moves the entry for the string to
 * the head of the pointer chain.  One thing (blech!) - puts the previous
 * pointer on the hash chain into fs_prev.
 */

static int hash_index;	/* to be used by alloc_new_string
			   without further notice 		  */
			/* is also used opaque inside free_string */

char *
findstring(char *s)
{
    char * curr, *prev;
    int h = StrHash(s);
    hash_index = h;
    
    curr = base_table[h];
    prev = 0;
    num_str_searches++;
    
    while (curr)
    {
	search_len++;
	if (*curr == *s && !strcmp(curr, s))
	{ /* found it */
	    if (prev)
	    { /* not at head of list */
		NEXT(prev) = NEXT(curr);
		NEXT(curr) = base_table[h];
		base_table[h] = curr;
	    }
	    return(curr);	/* pointer to string */
	}
	prev = curr;
	curr = NEXT(curr);
    }
    
    return(0); /* not found */
}

/*
 * Make a space for a string.  This is rather nasty, as we want to use
 * alloc/free, but malloc overhead is generally severe.  Later, we
 * will have to compact strings...
 */

static INLINE char *
alloc_new_string(char *string)
{
    long length = strlen(string);
    char * s = xalloc(1 + length + sizeof(char *) + sizeof(short));
    int h = hash_index;
    
    s += sizeof(char *) + sizeof(short);
    strcpy(s, string);
    NEXT(s) = base_table[h];
    base_table[h] = s;
    num_distinct_strings++;
    bytes_distinct_strings += 4 + (4 + 2 + length + 1 + 3) & ~3;
    return(s);
}

char *
make_shared_string(char *str)
{
    char * s;
    
    s = findstring(str);
    if (!s)
    {
	s = alloc_new_string(str);
	REFS(s) = 1;
    }
    else
    {
	if (REFS(s))
	    REFS(s)++;
    }
    allocd_strings++;
    allocd_bytes += malloced_size(s);
    return(s);
}

INLINE void 
increment_string_ref(char *str)
{
    allocd_strings++;
    allocd_bytes += malloced_size(str);
    if (REFS(str))
	REFS(str)++;
}

INLINE void 
decrement_string_ref(char *str)
{
    allocd_strings--;
    allocd_bytes -= malloced_size(str);
    if (REFS(str))
	REFS(str)--;
}


/*
 * free_string - reduce the ref count on a string.  Various sanity
 * checks applied, the best of them being that a add_string allocated
 * string will point to a word boundary + sizeof(int)+sizeof(short),
 * since malloc always allocates on a word boundary.
 * On systems where a short is 1/2 a word, this gives an easy check
 * to see whather we did in fact allocate it.
 *
 * Don't worry about the overhead for all those checks!
 */

void 
free_string(char *str)
{
    char *s;
    
    allocd_strings--;
    allocd_bytes -= malloced_size(str);

    if (!REFS(str) || --REFS(str) > 0)
	return;

    s = findstring(str); /* moves it to head of table if found */
    base_table[hash_index] = NEXT(str);
    num_distinct_strings--;
    /* We know how much overhead malloc has */
    bytes_distinct_strings -= malloced_size(str) << 2;
    free(str - sizeof(short) - sizeof(char *));
    
    return;
}

/*
 * you think this looks bad!  and we didn't even tell them about the
 * GNU malloc overhead!  tee hee!
 */

void 
add_string_status(char *debinf)
{
    int net_bytes_distinct_strings, net_allocd_bytes;
    
    strcat(debinf, "\nShared string hash table:\n");
    strcat(debinf, "-------------------------\t Strings    Bytes\n");
    
    net_bytes_distinct_strings =
	(bytes_distinct_strings & (malloc_size_mask()<<2)) -
	    num_distinct_strings * (sizeof(char*) + sizeof(short));
    
    allocd_bytes &= malloc_size_mask();
    net_allocd_bytes = (allocd_bytes << 2) -
	allocd_strings * (sizeof(char*) + sizeof(short));

    sprintf(debinf + strlen(debinf), "Total asked for\t\t\t%8d %8d\n",
	    allocd_strings, net_allocd_bytes );

    sprintf(debinf + strlen(debinf),
	    "Space actually required/total string bytes %d%%\n",
	    (net_bytes_distinct_strings + overhead_bytes())*100 /
	    net_allocd_bytes );

    sprintf(debinf + strlen(debinf),
	    "Searches: %d    Average search length:%7.3f\n",
	    num_str_searches, (double)search_len / (double)num_str_searches);

}