/* Copyright 1989, 1990 by James Aspnes, David Applegate, and Bennet Yee */
/* See the file COPYING for distribution information */
#include "db.h"
#include "config.h"

extern const char *alloc_string(const char *);

/* Incremental garbage collector defs */

#define GC_DORMANT 0		/* not currently running */
#define GC_CLEAR   1		/* pass to clear string marks */
#define GC_CLEAR_CODE 2 	/* pass to clear compiled code */
#define GC_SCAN    3		/* object scanning pass */
#define GC_RECOVER 4		/* pass to free unmarked strings */

#define GC_SCAN_COST 16		/* cost of scanning one object */
				/* relative to clearing a string */

static void igc_reset(void);

static int igc_state = GC_DORMANT;
static datum igc_position = 0;	/* position in string or object table */

#ifdef NOISY_GC
static int gc_strcount = 0;	/* strings freed */
static int gc_codecount = 0;	/* code blobs freed */
static int gc_expcount = 0;	/* alias expansions freed */
#endif /* NOISY_GC */

/*** STRING TABLE ***/

static datum hash_function_table[256];
static int hft_initialized = 0;

static void init_hft(void)
{
    int i;

    for(i = 0; i < 256; i++) hash_function_table[i] = RAND();
    hft_initialized = 1;
}

static datum hash_function(const char *string)
{
    datum hash;

    if(!hft_initialized) init_hft();
    hash = 0;
    while(*string) {
	hash ^= ((hash >> 1) ^ hash_function_table[*string++]);
    }
    return(hash);
}

/* tags for st_elt's */
#define ST_EMPTY 0		/* empty element, reserved */
#define ST_FREE 1		/* free element */
#define ST_USED 2		/* element in use */
#define ST_MARKED 3		/* marked used element (for gc) */

struct st_elt {
    const char *s;		/* string held here */
    datum *code;		/* compiled version of string */
    alist expansion;		/* expanded aliases */
    datum d;			/* next item in free list OR hash value */
    char tag;
} *st = 0;

static datum st_top = 0;
static datum st_free_head = NOTHING;
static alist st_alist;

#define st_empty(loc) (loc < 0 || loc >= st_top || !(st[loc].tag & ST_USED))

static void grow_st(datum new_elt)
{
    datum old_top;
    datum i;

    if(new_elt < st_top) return;
    if(new_elt <= FIXED_STRINGS) new_elt = FIXED_STRINGS+1;

    /* else get a new size */
    old_top = st_top;
#ifdef DB_DOUBLING
    while(new_elt < st_top) st_top *= 2;
#else
    st_top = new_elt+1;
#endif /* DB_DOUBLING */    

    /* grow it */
    if((st
	= (struct st_elt *) realloc((void *) st,
				    (new_elt + 1) * sizeof(struct st_elt)))
       == 0) abort();

    /* free all the new elements */
    for(i = st_top - 1; i >= old_top; i--) {
	if(i < FIXED_STRINGS) {
	    st[i].tag = ST_EMPTY;
	} else {
	    st[i].tag = ST_FREE;
	    st[i].d = st_free_head;
	    st_free_head = i;
	}
    }
}

static int add_string(const char *s, datum hash)
{
    datum loc;			/* where to put it */

    if(strlen(s) >= MAX_STRLEN) return NOTHING;	

    while(st_free_head == NOTHING) {
	grow_st(st_top);
    }

    loc = st_free_head;
    st_free_head = st[loc].d;

    st[loc].tag = ST_MARKED;	/* for incremental gc */
    st[loc].d = hash;
    st[loc].s = alloc_string(s);
    st[loc].code = 0;
    st[loc].expansion = empty_alist();

    /* add it to the alist */
    st_alist = add_assoc(st_alist, hash, loc);

    return loc;
}

datum intern_soft(const char *s)
{
    datum hash;
    datum loc;

    if(s == 0) return NOTHING;

    hash = hash_function(s);

    FOREACH_MATCH(st_alist, hash, loc) {
	if(!strcmp(s, st[loc].s)) {
	    st[loc].tag = ST_MARKED;
	    return loc;
	}
    } END_FOREACH;

    return NOTHING;
}

datum intern(const char *s)
{
    datum hash;
    datum loc;

    if(s == 0) return NOTHING;

    hash = hash_function(s);

    FOREACH_MATCH(st_alist, hash, loc) {
	if(!strcmp(s, st[loc].s)) {
	    st[loc].tag = ST_MARKED;
	    return loc;
	}
    } END_FOREACH;

    /* else stuff it in */
    return add_string(s, hash);
}

const char *string(datum loc)
{
    if(st_empty(loc)) {
	return 0;		/* you lose */
    } else {
	return st[loc].s;
    }
}

void set_compiled_string(datum loc, datum *c)
{
    if(st_empty(loc)) abort();
    if(st[loc].code) free((void *) st[loc].code);
    st[loc].code = c;
}

datum *get_compiled_string(datum loc)
{
    if(st_empty(loc)) {
	return 0;
    } else {
	return st[loc].code;
    }
}

void set_expansion(datum loc, alist a)
{
    if(st_empty(loc)) abort();
    st[loc].expansion = a;
}

alist get_expansion(datum loc)
{
    if(st_empty(loc)) {
	return 0;
    } else {
	return st[loc].expansion;
    }
}

static void clear_st(void)
{
    datum i;

    /* clear out the old string table */
    if(st) {
	free_alist(st_alist);
	for(i = 0; i < st_top; i++) {
	    if(!st_empty(i)) {
		free((void *) st[i].s);
		if(st[i].code) free((void *) st[i].code);
		free_alist(st[i].expansion);
	    }
	}
	st_top = 0;
	free((void *) st);
    }

    /* make a new string table */
    st = (struct st_elt *) malloc(1);
    st_top = 0;
    st_alist = empty_alist();
}
 
/* call this if the free list has been trashed */
static void rebuild_st_free(void)
{
    datum i;

    /* nuke the old one */
    st_free_head = 0;

    /* go from the top so small numbers end up near front */
    for(i = st_top - 1; i >= FIXED_STRINGS; i--) {
	if(st[i].tag == ST_FREE) {
	    st[i].d = st_free_head;
	    st_free_head = i;
	}
    }
}
 
static int put_datum(datum x, FILE *f)
{
    fprintf(f, "%d\n", x);

    return 0;
}

static int put_string(const char *s, FILE *f)
{
    while(*s) {
	switch(*s) {
	  case '\n':
	    /* mark it */
	    putc('\\', f);
	    putc('n', f);
	    break;
	  case '\\':
	    putc('\\', f);
	    putc('\\', f);
	    break;
	  default:
	    putc(*s, f);
	}
	s++;
    }
    putc('\n', f);

    return 0;
}

static int write_st(FILE *f)
{
    datum i;

    for(i = 0; i < st_top; i++) {
	if(!st_empty(i)) {
	    put_datum(i, f);
	    put_string(st[i].s, f);
	}
    }
    put_datum(NOTHING, f);		/* end of table */

    return 0;
}

static datum get_datum(FILE *f)
{
    static char buf[32];

    fgets(buf, sizeof(buf), f);
    return atol(buf);
}

static const char *get_string(FILE *f)
{
    static char buf[MAX_STRLEN+1];
    int i;
    int c;

    for(i = 0; i < MAX_STRLEN;) {
	switch(c = getc(f)) {
	  case '\\':
	    /* escape sequence */
	    switch(c = getc(f)) {
	      case 'n':
		buf[i++] = '\n';
		break;
	      case '\\':
		buf[i++] = '\\';
		break;
	      default:
		return 0;	/* bad input */
	    }
	    break;
	  case '\n':
	    /* end of input */
	    buf[i] = '\0';
	    return buf;
	  case EOF:
	    /* disaster */
	    return 0;
	  default:
	    buf[i++] = c;
	    break;
	}
    }
    
    /* ran off the end */
    return 0;
}

static int read_st(FILE *f)
{
    datum loc;

    clear_st();

    while((loc = get_datum(f)) != NOTHING) {
	grow_st(loc);

	/* we'll clean up the free list later */
	st[loc].tag = ST_USED;
	st[loc].s = alloc_string(get_string(f));
	if(st[loc].s == 0) {
	    return -1; /* error */
	}
	st[loc].d = hash_function(st[loc].s);
	st[loc].code = 0;
	st[loc].expansion = empty_alist();

	st_alist = add_assoc(st_alist, st[loc].d, loc);
    }

    /* fix the free list, which we have so happily trashed above */
    rebuild_st_free();

    return 0;
}

/*** OBJECT TABLE ***/
/* Uses bucket hashing 'cuz the overhead isn't that bad here */

struct hashed_object {
    datum id;
    struct object object;
    struct hashed_object *next;
};

static struct hashed_object **object_table = 0;
static datum object_table_size = 0;
static datum object_table_count = 0;
static datum top_object = 1;

struct object *_safe_get_TMP;

struct object *object(datum id)
{
    struct hashed_object *h;

    if(object_table_size != 0) {
	h = object_table[hash_range(object_table_size, id)];
	while(h) {
	    if(h->id == id) return &h->object;
	    h = h->next;
	}
    }
    return 0;
}

#define OBJECT_TABLE_INITIAL_SIZE (1)

static void init_object_table(void)
{
    datum i;

    object_table_size = OBJECT_TABLE_INITIAL_SIZE;
    object_table_count = 0;
    object_table = (struct hashed_object **)
	malloc(object_table_size * sizeof(struct hashed_object *));

    for(i = 0; i < object_table_size; i++) object_table[i] = 0;
}
 
static void grow_object_table(void)
{
    datum i;
    struct hashed_object **old_table;
    datum old_size;
    struct hashed_object **new_table;
    datum new_size;
    struct hashed_object *h;
    struct hashed_object *next;
    datum loc;

    /* get a new table */
    /* we don't assign directly to object_table so it will still be valid */
    /* even if the malloc fails */
    new_size = object_table_size << 1;
    if((new_table = (struct hashed_object **)
	malloc(new_size * sizeof(struct hashed_object *))) == 0) {
	/* disaster */
	abort();
    }

    /* got it ok, map everything over */
    old_table = object_table;
    old_size = object_table_size;

    object_table = new_table;
    object_table_size = new_size;
    /* count doesn't change */

    /* clear the new table */
    for(i = 0; i < object_table_size; i++) object_table[i] = 0;

    /* bring the old objects */
    for(i = 0; i < old_size; i++) {
	h = old_table[i];
	while(h) {
	    next = h->next;
	    loc = hash_range(object_table_size, h->id);
	    h->next = object_table[loc];
	    object_table[loc] = h;
	    h = next;
	}
    }

    /* free the old table */
    free((void *) old_table);

    /* reset the incremental garbage collector */
    /* values are now bogus */
    igc_reset();
}

/* creates a new entry if there wasn't one before */
static struct object *get_object(datum id)
{
    datum loc;
    struct hashed_object *h;

    /* make sure the table is there */
    if(object_table_size == 0) {
	init_object_table();
    }

    loc = hash_range(object_table_size, id);

    h = object_table[loc];
    while(h) {
	if(h->id == id) return &h->object;
	h = h->next;
    }

    /* not there */
    /* create it */
    h = (struct hashed_object *) malloc(sizeof(struct hashed_object));
    h->id = id;
    h->next = object_table[loc];
    object_table[loc] = h;

    /* make sure it won't get allocated by new_object */
    if(id >= top_object) top_object = id+1;

    /* load factor threshold for rehash is 1 */
    if(++object_table_count >= object_table_size) {
	grow_object_table();
    }

    return &h->object;
}

datum new_object(datum owner)
{
    datum id;
    struct object *o;
    
    id = top_object++;		/* get a new number */
    o = get_object(id);		/* allocate the new object */

    o->vars =  empty_alist();
    o->sets = empty_alist();
    o->actions = empty_alist();
    o->owner = owner;
    o->parent = NOTHING;
    o->location = NOTHING;
    o->flags = 0;

    return id;
}
    
void free_object(datum id)
{
    datum loc;
    struct hashed_object *prev;
    struct hashed_object *h;
    struct object *l;
    set c;			/* contents list */

    loc = hash_range(object_table_size, id);
    if(object_table[loc]->id == id) {
	/* delete the very first entry */
	h = object_table[loc];
	object_table[loc] = h->next;
    } else {
	for(prev = object_table[loc]; prev->next; prev = prev->next) {
	    if(prev->next->id == id) {
		/* nuke prev->next */
		h = prev->next;
		prev->next = h->next;
		goto got_it;
	    }
	}
	/* not there! */
	return;
    }

  got_it:
    /* remove it from its location if it has one */
    if((l = object(h->object.location))
       && assoc(l->sets, CONTENTS_NAME, (datum *) &c)) {
	set_assoc(l->sets, CONTENTS_NAME, (datum) del_member(c, h->id));
    }

    /* free up everything */
    free_alist(h->object.vars);
    free_alist(h->object.actions);

    free((void *) h);
}

static int put_alist(alist a, FILE *f)
{
    datum key;
    datum value;

    FOREACH(a, key, value) {
	put_datum(key, f);
	put_datum(value, f);
    } END_FOREACH;
    put_datum(NOTHING, f);

    return 0;
}

static int put_set(set s, FILE *f)
{
    datum x;

    SET_FOREACH(s, x) {
	put_datum(x, f);
    } END_SET_FOREACH;

    put_datum(NOTHING, f);

    return 0;
}

static int put_sets(alist a, FILE *f)
{
    datum key;
    datum value;

    FOREACH(a, key, value) {
	put_datum(key, f);
	put_set((set) value, f);
    } END_FOREACH;
    put_datum(NOTHING, f);

    return 0;
}

static int put_object(struct hashed_object *h, FILE *f)
{
    putc('#', f);
    put_datum(h->id, f);
    put_alist(h->object.vars, f);
    put_sets(h->object.sets, f);
    put_alist(h->object.actions, f);
    put_datum(h->object.owner, f);
    put_datum(h->object.parent, f);
    put_datum(h->object.location, f);
    put_datum(h->object.flags, f);

    return 0;
}

int db_write(FILE *f)
{
    datum i;
    struct hashed_object *h;

    fputs(DB_HEADER, f);

    if(write_st(f) < 0) return -1;

    for(i = 0; i < object_table_size; i++) {
	for(h = object_table[i]; h; h = h->next) {
	    if(put_object(h, f) < 0) return -1;
	}
    }

    /* end it with a NOTHING id */
    putc('#', f);
    put_datum(NOTHING, f);

    return 0;
}
    
static alist get_alist(FILE *f)
{
    datum key;
    datum value;
    alist a;

    a = empty_alist();

    while((key = get_datum(f)) != NOTHING) {
	value = get_datum(f);
	a = add_assoc(a, key, value);
    }
    return a;
}

static set get_set(FILE *f)
{
    datum x;
    set s;

    s = empty_set();

    while((x = get_datum(f)) != NOTHING) {
	s = add_member(s, x);
    }
    return s;
}

static alist get_sets(FILE *f)
{
    alist a;
    set s;

    datum key;

    a = empty_alist();

    while((key = get_datum(f)) != NOTHING) {
	s = get_set(f);
	a = add_assoc(a, key, (datum) s);
    }
    return a;
}

/* Don't do this more than once if you care about storage leaks */
int db_read(FILE *f)
{
    datum id;
    struct object *o;
    char buf[sizeof(DB_HEADER)+1];

    /* check the header */
    if(fgets(buf, sizeof(DB_HEADER), f) == 0
       || strcmp(buf, DB_HEADER)) return -1;

    if(read_st(f) < 0) return -1;

    for(;;) {
	if(getc(f) != '#') return -1; /* bad object head */
	if((id = get_datum(f)) == NOTHING) break; /* done */

	/* else */
	o = get_object(id);
	o->vars = get_alist(f);
	o->sets = get_sets(f);
	o->actions = get_alist(f);
	o->owner = get_datum(f);
	o->parent = get_datum(f);
	o->location = get_datum(f);
	o->flags = get_datum(f);
    }

    return 0;
}

/*** GARBAGE COLLECTOR ***/

void gc_mark_string(datum s)
{
    if(s != NOTHING) {
#ifdef DEBUG
	if(st_empty(s)) abort();
#endif /* DEBUG */	
	st[s].tag = ST_MARKED;
    }
}

static void gc_mark_keys(alist a)
{
    datum key;
    datum value;

    FOREACH(a, key, value) {
	gc_mark_string(key);
    } END_FOREACH;
}

static void gc_free(datum s)
{
    /* take it out of the alist */
    st_alist = del_assoc(st_alist, st[s].d, s);

    /* nuke the string */
    free((void *) st[s].s);

    /* nuke the code and expansion */
    if(st[s].code) free((void *) st[s].code);
    free_alist(st[s].expansion);

    /* put it on the free list */
    st[s].tag = ST_FREE;
    st[s].d = st_free_head;
    st_free_head = s;

#ifdef NOISY_GC
    /* count it */
    gc_strcount++;
#endif /* NOISY_GC */
}

static int gc_mark_objects(datum position)
{
    struct hashed_object *ho;
    struct object *o;
    datum key;
    datum value;
    int count;

    count = 0;
    for(ho = object_table[position]; ho; ho = ho->next) {
	count++;
	o = &ho->object;
	
	/* mark all keys */
	gc_mark_keys(o->vars);
	gc_mark_keys(o->sets);
	gc_mark_keys(o->actions);

	/* mark string variables */
	FOREACH(o->vars, key, value) {
	    if(*string(key) == STRING_MARKER) {
		gc_mark_string(value);
	    }
	} END_FOREACH;

	/* mark action values */
	FOREACH(o->actions, key, value) {
	    gc_mark_string(value);
	} END_FOREACH;

	/* blast name lists */
	FOREACH(o->sets, key, value) {
	    set_clear_name_list((set) value);
	} END_FOREACH;
    }

    return count;
}


/* GC everything */
void full_gc(void)
{
    int i;

    /* clear string table marks */
    for(i = 0; i < st_top; i++) {
	if(st[i].tag == ST_MARKED) st[i].tag = ST_USED;

	/* we can combine CLEAR and CLEAR_CODE steps */
	/* because nobody can compile new code while we're running */
	if(!st_empty(i) && st[i].code) {
	    free((void *) st[i].code);
	    st[i].code = 0;
#ifdef NOISY_GC
	    gc_codecount++;
#endif /* NOISY_GC */	    
	}
	if(!isempty(st[i].expansion)) {
	    free_alist(st[i].expansion);
	    st[i].expansion = empty_alist();
#ifdef NOISY_GC
	    gc_expcount++;
#endif /* NOISY_GC */	    
	}
    }

    /* walk the object table */
    for(i = 0; i < object_table_size; i++) {
	gc_mark_objects(i);
    }

    /* clear unfixed strings that aren't marked */
    for(i = FIXED_STRINGS; i < st_top; i++) {
	if(st[i].tag == ST_USED) {
	    /* zap it */
	    gc_free(i);
	}
    }
#ifdef NOISY_GC 
	fprintf(stderr,
		"GC finished, reclaimed %d strings %d codes %d aliases\n",
		gc_strcount,
		gc_codecount,
		gc_expcount);
#endif /* NOISY_GC */

    igc_reset();
}

/*** INCREMENTAL GC ***/
static void igc_reset(void)
{
    igc_state = GC_DORMANT;
#ifdef NOISY_GC
    gc_codecount = 0;
    gc_strcount = 0;
    gc_expcount = 0;
#endif /* NOISY_GC */
}

void incremental_gc(void)
{
    int count;

    /* compute the count for this step */
    count = (st_top		/* CLEAR */
	     + st_top		/* CLEAR_CODE */
	     + GC_SCAN_COST * object_table_size	/* SCAN */
	     + st_top		/* RECOVER */
	     + GC_RATE - 1	/* round up */
	     ) / GC_RATE;

    switch(igc_state) {
      case GC_DORMANT:
	igc_state = GC_CLEAR;
	igc_position = 0;
	/* fall through */
      case GC_CLEAR:
	while(igc_position < st_top) {
	    if(--count < 0) return;
	    if(st[igc_position].tag == ST_MARKED) {
		st[igc_position].tag = ST_USED;
	    }
	    igc_position++;
	}
	
	/* if got here we're done with the CLEAR stage */
	igc_state = GC_CLEAR_CODE;
	igc_position = 0;
	
	/* fall through */
      case GC_CLEAR_CODE:
	while(igc_position < st_top) {
	    if(--count < 0) return;
	    if(!st_empty(igc_position)) {
		if(st[igc_position].code) {
		    free((void *) st[igc_position].code);
		    st[igc_position].code = 0;
#ifdef NOISY_GC
		    gc_codecount++;
#endif				/* NOISY_GC */
		}
		if(!isempty(st[igc_position].expansion)) {
		    free_alist(st[igc_position].expansion);
		    st[igc_position].expansion = empty_alist();
#ifdef NOISY_GC
		    gc_expcount++;
#endif				/* NOISY_GC */
		}
	    }
	    igc_position++;
	}
	/* if got here we're done with the CLEAR_CODE stage */
	igc_state = GC_SCAN;
	igc_position = 0;
	
	/* fall through */
      case GC_SCAN:
	while(igc_position < object_table_size) {
	    if(count < 0) return;
	    count -= GC_SCAN_COST * gc_mark_objects(igc_position);
	    igc_position++;
	}

	/* if we got here we're done with the SCAN stage */
	igc_state = GC_RECOVER;
	igc_position = FIXED_STRINGS;

	/* fall through */
      case GC_RECOVER:
	while(igc_position < st_top) {
	    if(--count < 0) return;
	    if(st[igc_position].tag == ST_USED) {
		gc_free(igc_position);
	    }
	    igc_position++;
	}
	
	/* if got here we're done with the RECOVER stage */
	/* give up and go home */
#ifdef NOISY_GC 
	fprintf(stderr,
		"GC finished, reclaimed %d strings %d codes %d aliases\n",
		gc_strcount,
		gc_codecount,
		gc_expcount);
#endif /* NOISY_GC */
	igc_reset();
	return;
    }
}

/*** for mapping over every object in the database ***/
int foreach_object(void (*func)(datum))
{
    datum pos;
    struct hashed_object *h;
    datum *object_list;
    datum i;
    
    /* construct a list of all object id's currently in the database */
    /* this way it doesn't matter if the list changes */
    object_list = (datum *) malloc(sizeof(datum) * object_table_count);

    i = 0;
    for(pos = 0; pos < object_table_size; pos++) {
	for(h = object_table[pos]; h; h = h->next) {
	    object_list[i++] = h->id;
	}
    }

    if(i != object_table_count) abort(); /* object_table_count lied! */

    /* now call func on all of them */
    while(--i >= 0) (*func)(object_list[i]);
    
    /* free up the list */
    free((void *) object_list);

    return object_table_count;
}