/**
* \file htab.c
*
* \brief Hashtable routines.
* This code is largely ripped out of TinyMUSH 2.2.5, with tweaks
* to make it Penn-compatible by Trivian.
*
*
*/
#include "config.h"
#include "copyrite.h"
#include <string.h>
#include "conf.h"
#include "externs.h"
#include "htab.h"
#include "mymalloc.h"
#include "confmagic.h"
static HASHENT *hash_new(HASHTAB *htab, const char *key);
static int hash_val(register const char *k, int mask);
/* ---------------------------------------------------------------------------
* hash_val: Compute hash value of a string for a hash table.
*/
/*#define NEW_HASH_FUN /**/
#ifdef NEW_HASH_FUN
/* This hash function adapted from http://burtleburtle.net/bob/hash/evahash.html */
typedef unsigned long int u4; /**< unsigned 4-byte type */
typedef unsigned char u1; /**< unsigned 1-byte type */
/* The mixing step */
#define mix(a,b,c) \
{ \
a=a-b; a=a-c; a=a^(c>>13); \
b=b-c; b=b-a; b=b^(a<<8); \
c=c-a; c=c-b; c=c^(b>>13); \
a=a-b; a=a-c; a=a^(c>>12); \
b=b-c; b=b-a; b=b^(a<<16); \
c=c-a; c=c-b; c=c^(b>>5); \
a=a-b; a=a-c; a=a^(c>>3); \
b=b-c; b=b-a; b=b^(a<<10); \
c=c-a; c=c-b; c=c^(b>>15); \
}
/* The whole new hash function */
static int
hash_val(register const char *k, int mask)
{
register u4 a, b, c; /* the internal state */
u4 len, length; /* how many key bytes still need mixing */
static u4 initval = 5432; /* the previous hash, or an arbitrary value */
/* Set up the internal state */
length = len = strlen(k);
a = b = 0x9e3779b9; /* the golden ratio; an arbitrary value */
c = initval; /* variable initialization of internal state */
/*---------------------------------------- handle most of the key */
while (len >= 12) {
a = a + (k[0] + ((u4) k[1] << 8) + ((u4) k[2] << 16) + ((u4) k[3] << 24));
b = b + (k[4] + ((u4) k[5] << 8) + ((u4) k[6] << 16) + ((u4) k[7] << 24));
c = c + (k[8] + ((u4) k[9] << 8) + ((u4) k[10] << 16) + ((u4) k[11] << 24));
mix(a, b, c);
k = k + 12;
len = len - 12;
}
/*------------------------------------- handle the last 11 bytes */
c = c + length;
switch (len) { /* all the case statements fall through */
case 11:
c = c + ((u4) k[10] << 24);
case 10:
c = c + ((u4) k[9] << 16);
case 9:
c = c + ((u4) k[8] << 8);
/* the first byte of c is reserved for the length */
case 8:
b = b + ((u4) k[7] << 24);
case 7:
b = b + ((u4) k[6] << 16);
case 6:
b = b + ((u4) k[5] << 8);
case 5:
b = b + k[4];
case 4:
a = a + ((u4) k[3] << 24);
case 3:
a = a + ((u4) k[2] << 16);
case 2:
a = a + ((u4) k[1] << 8);
case 1:
a = a + k[0];
/* case 0: nothing left to add */
}
mix(a, b, c);
/*-------------------------------------------- report the result */
return c & mask;
}
#else /* NEW_HASH_FUN */
/** Compute a hash value for mask-style hashing.
* Given a null key, return 0. Otherwise, add up the numeric value
* of all the characters and return the sum modulo the size of the
* hash table.
* \param key key to hash.
* \param hashmask hash table size to use as modulus.
* \return hash value.
*/
int
hash_val(const char *key, int hashmask)
{
int hash = 0;
const char *sp;
if (!key || !*key)
return 0;
for (sp = key; *sp; sp++)
hash = (hash << 5) + hash + *sp;
return (hash & hashmask);
}
#endif /* NEW_HASH_FUN */
/* ----------------------------------------------------------------------
* hash_getmask: Get hash mask for mask-style hashing.
*/
/** Get the hash mask for mask-style hashing.
* Given the data size, return closest power-of-two less than that size.
* \param size data size.
* \return hash mask.
*/
int
hash_getmask(int *size)
{
int tsize;
if (!size || !*size)
return 0;
for (tsize = 1; tsize < *size; tsize = tsize << 1) ;
*size = tsize;
return tsize - 1;
}
/** Initialize a hashtable.
* \param htab pointer to hash table to initialize.
* \param size size of hashtable.
* \param data_size size of an individual datum to store in the table.
*/
void
hash_init(HASHTAB *htab, int size, int data_size)
{
int i;
htab->mask = get_hashmask(&size);
htab->hashsize = size;
htab->entries = 0;
htab->buckets = mush_malloc(size * sizeof(HASHENT *), "hash_buckets");
for (i = 0; i < size; i++)
htab->buckets[i] = NULL;
htab->entry_size = data_size;
}
/** Return a hashtable entry given a key.
* \param htab pointer to hash table to search.
* \param key key to look up in the table.
* \return pointer to hash table entry for given key.
*/
HASHENT *
hash_find(HASHTAB *htab, const char *key)
{
int hval, cmp;
HASHENT *hptr;
if (!htab->buckets)
return NULL;
hval = hash_val(key, htab->mask);
for (hptr = htab->buckets[hval]; hptr != NULL; hptr = hptr->next) {
cmp = strcmp(key, hptr->key);
if (cmp == 0) {
return hptr;
} else if (cmp < 0)
break;
}
return NULL;
}
/** Return the value stored in a hash entry.
* \param entry pointer to a hash table entry.
* \return generic pointer to the stored value.
*/
void *
hash_value(HASHENT *entry)
{
if (entry)
return entry->data;
else
return NULL;
}
/** Return the key stored in a hash entry.
* \param entry pointer to a hash table entry.
* \return pointer to the stored key.
*/
char *
hash_key(HASHENT *entry)
{
if (entry)
return entry->key;
else
return NULL;
}
/** Resize a hash table.
* \param htab pointer to hashtable.
* \param size new size.
*/
void
hash_resize(HASHTAB *htab, int size)
{
int i;
HASHENT **oldarr;
HASHENT **newarr;
HASHENT *hent, *nent, *curr, *old;
int hval;
int osize;
int mask;
/* We don't want hashes outside these limits */
if ((size < (1 << 4)) || (size > (1 << 20)))
return;
/* Save the old data we need */
osize = htab->hashsize;
oldarr = htab->buckets;
mask = htab->mask = get_hashmask(&size);
if (size == htab->hashsize)
return;
htab->hashsize = size;
newarr =
(HASHENT **) mush_malloc(size * sizeof(struct hashentry *), "hash_buckets");
htab->buckets = newarr;
for (i = 0; i < size; i++)
newarr[i] = NULL;
for (i = 0; i < osize; i++) {
hent = oldarr[i];
while (hent) {
nent = hent->next;
hval = hash_val(hent->key, mask);
for (curr = newarr[hval], old = NULL; curr; old = curr, curr = curr->next) {
if (strcmp(curr->key, hent->key) > 0)
break;
}
if (old) {
old->next = hent;
hent->next = curr;
} else {
hent->next = newarr[hval];
newarr[hval] = hent;
}
hent = nent;
}
}
mush_free(oldarr, "hash_buckets");
return;
}
static HASHENT *
hash_new(HASHTAB *htab, const char *key)
{
int hval;
size_t keylen;
HASHENT *hptr, *curr, *old;
hptr = hash_find(htab, key);
if (hptr)
return hptr;
if (htab->entries > (htab->hashsize * HTAB_UPSCALE))
hash_resize(htab, htab->hashsize << 1);
hval = hash_val(key, htab->mask);
htab->entries++;
keylen = strlen(key) + 1;
hptr = (HASHENT *) mush_malloc(HASHENT_SIZE + keylen, "hash_entry");
memcpy(hptr->key, key, keylen);
hptr->data = NULL;
if (!htab->buckets[hval] || strcmp(key, htab->buckets[hval]->key) < 0) {
hptr->next = htab->buckets[hval];
htab->buckets[hval] = hptr;
return hptr;
}
/* Insert in sorted order. There's always at least one item in
the chain already at this point. */
old = htab->buckets[hval];
for (curr = old->next; curr; old = curr, curr = curr->next) {
/* Comparison will never be 0 because hash_add checks to see
if the entry is already present. */
if (strcmp(key, curr->key) < 0) { /* Insert before curr */
old->next = hptr;
hptr->next = curr;
return hptr;
}
}
/* If we get here, we reached the end of the chain */
old->next = hptr;
hptr->next = NULL;
return hptr;
}
/** Add an entry to a hash table.
* \param htab pointer to hash table.
* \param key key string to store data under.
* \param hashdata void pointer to data to be stored.
* \param extra_size unused.
* \retval -1 failure.
* \retval 0 success.
*/
int
hash_add(HASHTAB *htab, const char *key, void *hashdata,
int extra_size __attribute__ ((__unused__)))
{
HASHENT *hptr;
if (hash_find(htab, key) != NULL) {
return -1;
}
hptr = hash_new(htab, key);
if (!hptr)
return -1;
hptr->data = hashdata;
/* hptr->extra_size = extra_size; */
return 0;
}
/** Delete an entry in a hash table.
* \param htab pointer to hash table.
* \param entry pointer to hash entry to delete (and free).
*/
void
hash_delete(HASHTAB *htab, HASHENT *entry)
{
int hval;
HASHENT *hptr, *last;
if (!entry)
return;
hval = hash_val(entry->key, htab->mask);
last = NULL;
for (hptr = htab->buckets[hval]; hptr; last = hptr, hptr = hptr->next) {
if (entry == hptr) {
if (last == NULL)
htab->buckets[hval] = hptr->next;
else
last->next = hptr->next;
mush_free(hptr, "hash_entry");
htab->entries--;
return;
}
}
if (htab->entries < (htab->hashsize * HTAB_DOWNSCALE))
hash_resize(htab, htab->hashsize >> 1);
}
/** Flush a hash table, freeing all entries.
* \param htab pointer to a hash table.
* \param size size of hash table.
*/
void
hash_flush(HASHTAB *htab, int size)
{
HASHENT *hent, *thent;
int i;
if (htab->buckets) {
for (i = 0; i < htab->hashsize; i++) {
hent = htab->buckets[i];
while (hent != NULL) {
thent = hent;
hent = hent->next;
mush_free(thent, "hash_entry");
}
htab->buckets[i] = NULL;
}
}
if (size == 0) {
mush_free(htab->buckets, "hash_buckets");
htab->buckets = NULL;
} else if (size != htab->hashsize) {
if (htab->buckets)
mush_free(htab->buckets, "hash_buckets");
hashinit(htab, size, htab->entry_size);
} else {
htab->entries = 0;
}
}
/** Return the first entry of a hash table.
* This function is used with hash_nextentry() to iterate through a
* hash table.
* \param htab pointer to hash table.
* \return first hash table entry.
*/
void *
hash_firstentry(HASHTAB *htab)
{
int hval;
for (hval = 0; hval < htab->hashsize; hval++)
if (htab->buckets[hval]) {
htab->last_hval = hval;
htab->last_entry = htab->buckets[hval];
return htab->buckets[hval]->data;
}
return NULL;
}
/** Return the first key of a hash table.
* This function is used with hash_nextentry_key() to iterate through a
* hash table.
* \param htab pointer to hash table.
* \return first hash table key.
*/
char *
hash_firstentry_key(HASHTAB *htab)
{
int hval;
for (hval = 0; hval < htab->hashsize; hval++)
if (htab->buckets[hval]) {
htab->last_hval = hval;
htab->last_entry = htab->buckets[hval];
return htab->buckets[hval]->key;
}
return NULL;
}
/** Return the next entry of a hash table.
* This function is used with hash_firstentry() to iterate through a
* hash table. hash_firstentry() must be called before calling
* this function.
* \param htab pointer to hash table.
* \return next hash table entry.
*/
void *
hash_nextentry(HASHTAB *htab)
{
int hval;
HASHENT *hptr;
hval = htab->last_hval;
hptr = htab->last_entry;
if (hptr->next) {
htab->last_entry = hptr->next;
return hptr->next->data;
}
hval++;
while (hval < htab->hashsize) {
if (htab->buckets[hval]) {
htab->last_hval = hval;
htab->last_entry = htab->buckets[hval];
return htab->buckets[hval]->data;
}
hval++;
}
return NULL;
}
/** Return the next key of a hash table.
* This function is used with hash_firstentry{,_key}() to iterate through a
* hash table. hash_firstentry{,_key}() must be called before calling
* this function.
* \param htab pointer to hash table.
* \return next hash table key.
*/
char *
hash_nextentry_key(HASHTAB *htab)
{
int hval;
HASHENT *hptr;
hval = htab->last_hval;
hptr = htab->last_entry;
if (hptr->next) {
htab->last_entry = hptr->next;
return hptr->next->key;
}
hval++;
while (hval < htab->hashsize) {
if (htab->buckets[hval]) {
htab->last_hval = hval;
htab->last_entry = htab->buckets[hval];
return htab->buckets[hval]->key;
}
hval++;
}
return NULL;
}
/** Display a header for a stats listing.
* \param player player to notify with header.
*/
void
hash_stats_header(dbref player)
{
notify_format(player,
"Table Buckets Entries LChain ECh 1Ch 2Ch 3Ch 4+Ch AvgCh ~Memory");
}
/** Display stats on a hashtable.
* \param player player to notify with stats.
* \param htab pointer to the hash table.
* \param hname name of the hash table.
*/
void
hash_stats(dbref player, HASHTAB *htab, const char *hname)
{
int longest = 0, n;
int lengths[5];
double chainlens = 0.0;
double totchains = 0.0;
unsigned int bytes = 0;
if (!htab || !hname)
return;
for (n = 0; n < 5; n++)
lengths[n] = 0;
bytes += sizeof(HASHTAB);
bytes += htab->entry_size * htab->entries;
if (htab->buckets) {
bytes += HASHENT_SIZE * htab->hashsize;
for (n = 0; n < htab->hashsize; n++) {
int chain = 0;
HASHENT *b;
if (htab->buckets[n]) {
for (b = htab->buckets[n]; b; b = b->next) {
chain++;
bytes += strlen(b->key) + 1 /* + b->extra_size */ ;
}
if (chain > longest)
longest = chain;
}
lengths[(chain > 4) ? 4 : chain]++;
chainlens += chain;
}
}
for (n = 1; n < 5; n++)
totchains += lengths[n];
notify_format(player,
"%-10s %7d %7d %6d %4d %4d %4d %4d %4d %6.3f %7u", hname,
htab->hashsize, htab->entries, longest, lengths[0], lengths[1],
lengths[2], lengths[3], lengths[4],
totchains == 0.0 ? 0.0 : chainlens / totchains, bytes);
}