/*------------------------------------------------------------------
* Regular Expression Cache
* Written 1998 by Lars Duening.
* Share and Enjoy!
*------------------------------------------------------------------
* Implementation of a cache for compiled regular expressions and
* regexp matches. Usage of the cache can reduce the setup time
* for regexps by factor 4; the actual regexp matching showed up
* in experiments as being fast enough to make a cache for the
* results worthless.
*
* Compiled expressions are stored together with their generator
* strings in a hash table, hashed over the generator string content.
*
* The table sizes are specified in config.h as follows:
* RXCACHE_TABLE: size of the expression hash table
*
* If RXCACHE_TABLE is not defined, the whole caching is disabled.
*
* The include rxcache.h file offers some macros for transparent use:
* REGCOMP() wrap up the calls to hs_regcomp().
* RX_DUP() and REGFREE() handle the refcounting necessary.
* The macros map to the standard uncached, non-refcounted calls
* if the rxcache is disabled.
*
* Beware! hs_regexec() stores result data in the regexp structure (the
* startp[] and end[] arrays), so the same pattern must not be used
* in two concurrent regcomp_cache/hs_regexec pairs.
*
* TODO: Using shared strings as cache-indices can speed things up,
* TODO:: especially when knowing where to find the hashvalue from
* TODO:: the strtable.
* TODO: Use in-table chaining to handle collisions.
*------------------------------------------------------------------
*/
/*--------------------------------------------------------------------*/
#include "driver.h"
#include <stdio.h>
#include <string.h>
#define NO_REF_STRING
#include "rxcache.h"
#include "gcollect.h"
#include "hash.h"
#include "regexp.h"
#include "smalloc.h"
#include "stralloc.h"
#include "strfuns.h"
#include "svalue.h"
#include "xalloc.h"
#include "../mudlib/sys/debug_info.h"
#ifdef RXCACHE_TABLE
/*--------------------------------------------------------------------*/
/* Hash functions, inspired by interpret.c */
#if !( (RXCACHE_TABLE) & (RXCACHE_TABLE)-1 )
#define RxStrHash(s) ((s) & ((RXCACHE_TABLE)-1))
#else
#define RxStrHash(s) ((s) % RXCACHE_TABLE)
#endif
/* One expression hashtable entry */
typedef struct RxHashEntry {
unsigned char * pString; /* Generator string, a shared string
* NULL if unused */
p_uint hString; /* Hash of pString */
Bool from_ed; /* The from_ed value */
Bool excompat; /* The excompat value */
regexp * pRegexp; /* The generated regular expression from hs_regcomp() */
} RxHashEntry;
/* Variables */
static RxHashEntry xtable[RXCACHE_TABLE]; /* The Expression Hashtable */
/* Expression cache statistics */
static uint32 iNumXRequests = 0; /* Number of calls to regcomp() */
static uint32 iNumXFound = 0; /* Number of calls satisfied from table */
static uint32 iNumXCollisions = 0; /* Number of hashcollisions */
static uint32 iNumXEntries = 0; /* Number of used cache entries */
static uint32 iXSizeAlloc = 0; /* Dynamic memory held in regexp structs */
/*--------------------------------------------------------------------*/
void rxcache_init(void)
/* Initialise the module. */
{
memset(xtable, 0, sizeof(xtable));
}
/*--------------------------------------------------------------------*/
regexp *
regcomp_cache(unsigned char * expr, Bool excompat, Bool from_ed)
/* Compile a regexp structure from the expression <expr>, more or
* less ex compatible.
*
* If possible, take a ready-compiled structure from the hashtable,
* else enter the newly compiled structure into the table.
*
* The caller gets his own reference to the structure, which he has
* to rx_free() after use.
*/
{
p_uint hExpr;
regexp * pRegexp;
RxHashEntry *pHash;
iNumXRequests++;
hExpr = whashstr((char *)expr, 50);
pHash = xtable+RxStrHash(hExpr);
/* Look for a ready-compiled regexp */
if (pHash->pString != NULL
&& pHash->hString == hExpr
&& pHash->from_ed == from_ed
&& pHash->excompat == excompat
&& !strcmp((char *)pHash->pString, (char *)expr)
)
{
iNumXFound++;
return rx_dup(pHash->pRegexp);
}
/* Regexp not found: compile a new one and enter it
* into the table.
*/
pRegexp = hs_regcomp(expr, excompat, from_ed);
if (NULL == pRegexp)
return NULL;
expr = (unsigned char *)make_shared_string((char *)expr);
if (NULL != pHash->pString)
{
iNumXCollisions++;
iNumXEntries--;
iXSizeAlloc -= pHash->pRegexp->regalloc;
free_string((char *)pHash->pString);
rx_free(pHash->pRegexp);
}
pHash->pString = expr; /* refs are transferred */
pHash->hString = hExpr;
pHash->pRegexp = pRegexp;
pHash->from_ed = from_ed;
pHash->excompat = excompat;
iNumXEntries++;
iXSizeAlloc += pRegexp->regalloc;
return rx_dup(pRegexp);
} /* regcomp_cache() */
/*--------------------------------------------------------------------*/
size_t
rxcache_status (strbuf_t *sbuf, Bool verbose)
/* Gather (and optionally print) the statistics from the rxcache.
* Return the amount of memory used.
*/
{
uint32 iNumXReq; /* Number of regcomp() requests, made non-zero */
#if defined(__MWERKS__) && !defined(WARN_ALL)
# pragma warn_largeargs off
#endif
/* In verbose mode, print the statistics */
if (verbose)
{
strbuf_add(sbuf, "\nRegexp cache status:\n");
strbuf_add(sbuf, "--------------------\n");
strbuf_addf(sbuf, "Expressions in cache: %lu (%.1f%%)\n"
, iNumXEntries, 100.0 * (float)iNumXEntries / RXCACHE_TABLE);
strbuf_addf(sbuf, "Memory allocated: %lu\n", iXSizeAlloc);
iNumXReq = iNumXRequests ? iNumXRequests : 1;
strbuf_addf(sbuf
, "Requests: %lu - Found: %lu (%.1f%%) - Coll: %lu (%.1f%% req/%.1f%% entries)\n"
, iNumXRequests, iNumXFound, 100.0 * (float)iNumXFound/(float)iNumXReq
, iNumXCollisions, 100.0 * (float)iNumXCollisions/(float)iNumXReq
, 100.0 * (float)iNumXCollisions/(iNumXEntries ? iNumXEntries : 1)
);
}
else
{
strbuf_addf(sbuf, "Regexp cache:\t\t\t%8ld %9lu\n", iNumXEntries, iXSizeAlloc);
}
return iXSizeAlloc;
#if defined(__MWERKS__)
# pragma warn_largeargs reset
#endif
} /* rxcache_status() */
/*-------------------------------------------------------------------------*/
void
rxcache_dinfo_status (svalue_t *svp, int value)
/* Return the rxcache information for debug_info(DINFO_DATA, DID_STATUS).
* <svp> points to the svalue block for the result, this function fills in
* the spots for the object table.
* If <value> is -1, <svp> points indeed to a value block; other it is
* the index of the desired value and <svp> points to a single svalue.
*/
{
#define ST_NUMBER(which,code) \
if (value == -1) svp[which].u.number = code; \
else if (value == which) svp->u.number = code
ST_NUMBER(DID_ST_RX_CACHED, iNumXEntries);
ST_NUMBER(DID_ST_RX_TABLE, RXCACHE_TABLE);
ST_NUMBER(DID_ST_RX_TABLE_SIZE, iXSizeAlloc);
ST_NUMBER(DID_ST_RX_REQUESTS, iNumXRequests);
ST_NUMBER(DID_ST_RX_REQ_FOUND, iNumXFound);
ST_NUMBER(DID_ST_RX_REQ_COLL, iNumXCollisions);
#undef ST_NUMBER
} /* rxcache_dinfo_status() */
/*--------------------------------------------------------------------*/
regexp *
rx_dup (regexp * expr)
/* Increase the reference count of <expr> and return it.
*/
{
expr->refs++;
return expr;
}
/*--------------------------------------------------------------------*/
void
rx_free (regexp * expr)
/* Decrease the reference count of <expr>. If it reaches 0, the
* structure and all associated data is deallocated.
*/
{
expr->refs--;
if (!expr->refs)
xfree(expr);
}
/*--------------------------------------------------------------------*/
#if defined(GC_SUPPORT)
/*--------------------------------------------------------------------*/
void
clear_rxcache_refs (void)
/* Clear all the refcounts in the hashtables.
* The refs of the shared strings and of the memory blocks are
* not of our concern.
*/
{
int i;
for (i = 0; i < RXCACHE_TABLE; i++)
if (NULL != xtable[i].pString)
xtable[i].pRegexp->refs = 0;
} /* clear_rxcache_refs() */
/*--------------------------------------------------------------------*/
void
count_rxcache_refs (void)
/* Mark all memory referenced from the hashtables. */
{
int i;
for (i = 0; i < RXCACHE_TABLE; i++)
{
if (NULL != xtable[i].pString)
{
count_ref_from_string((char *)xtable[i].pString);
count_rxcache_ref(xtable[i].pRegexp);
}
} /* for (i) */
} /* count_rxcache_refs() */
/*--------------------------------------------------------------------*/
void
count_rxcache_ref (regexp * pRegexp)
/* Mark all memory associated with one regexp structure and count
* the refs.
* This function is called both from rxcache as well as from ed.
*/
{
note_malloced_block_ref((char *)pRegexp);
pRegexp->refs++;
} /* count_rxcache_ref() */
#endif /* if GC_SUPPORT */
#endif /* if RXCACHE_TABLE */
/*====================================================================*/