pennmush/game/data/
pennmush/game/log/
pennmush/game/save/
pennmush/game/txt/evt/
pennmush/game/txt/nws/
pennmush/os2/
pennmush/po/
pennmush/win32/msvc.net/
pennmush/win32/msvc6/
/**
 * \file chunk.c
 *
 * \brief Chunk memory management system
 *
 * <h3>Synopsis:</h3>
 * The chunk memory management system has three goals: to reduce overall
 * memory consumption, to improve locality of reference, and to allow
 * less-used sections of memory to be paged out to disk.  These three
 * goals are accomplished by implementing an allocation management layer
 * on top of malloc(), with significantly reduced overhead and the ability
 * to rearrange allocations to actively control fragmentation and increase
 * locality.
 * 
 * 
 * <h3>Basic operation:</h3>
 * The managed memory pool is divided into regions of approximately 64KB.
 * These regions contain variable-size chunks representing allocated and
 * available (free) memory.  No individual allocation may be larger than
 * will fit in a single region, and no allocation may be smaller than one
 * byte.  Each chunk has between two and four bytes of overhead (indicating
 * the used/free status, the size of the chunk, and the number of
 * dereferences for the chunk), and each region has additional overhead
 * of about 42 bytes.
 * 
 * Allocations are made with the chunk_create() call, which is given
 * the size of the data, the data value to be stored, and an initial
 * dereference count to be assigned to the chunk.  Once created, the
 * value of a chunk cannot be changed; the storage is immutable.
 * chunk_create() returns an integral reference value that can be
 * used to retrieve or free the allocation.
 * 
 * Allocations are accessed with the chunk_fetch(), chunk_len(), and
 * chunk_derefs() calls.  Each of these if given a reference (as
 * returned by chunk_create()), and chunk_fetch() is additionally
 * given a buffer and length to fill with the allocated value.  Both
 * chunk_fetch() and chunk_len() increment a chunk's dereference
 * count (up to the maximum of 255), which is used in migration to
 * improve locality.
 * 
 * Allocations are freed with the chunk_delete() call, which also
 * requires a reference as input.
 * 
 * Finally, allocations are allowed to rearrange themselves with the
 * chunk_migration() call.  chunk_migration() takes an array of
 * pointers to chunk references as input, and examines each of the
 * indicated chunks to see which need to be moved to improve the
 * distribution of allocations.  If any allocations are moved, then
 * the references to the moved allocations are updated in place
 * (hence the array of pointers to references, instead of just an
 * array of references).  Migration may be done incrementally by
 * submitting only a portion of the allocations with each call to
 * chunk_migration(); however, _all_ allocations made with chunk_create()
 * must eventually be submitted for migration in order to maintain the
 * memory pool in a non-fragmented state.
 * 
 * 
 * <h3>Migration:</h3>
 * Under normal conditions, extended use of this chunk allocation system
 * would lead to a significantly fragmented datastore, unless there was
 * some means to defragment the storage arena.  In the long run, this could
 * be very bad, leading to quite a mess.  Calling chunk_migration() gives
 * the allocator permission to move allocations around both to defragment
 * the arena and to improve locality of reference (by making sure that
 * all the infrequently used chunks are segregated from the chunks in
 * active use).  Of course, moving all the allocated chunks at once would
 * be a slow and painful process.  Instead, migration may be done
 * incrementally, giving permission to move a small number of chunks
 * at any one time, and spreading out the cost of defragmenting the
 * data store.
 * 
 * Just because you give permission to move a chunk doesn't mean that it
 * will be moved.  The chunk may be perfectly happy where it is, with
 * no need to move it elsewhere.  Chunks are only moved when their
 * personal happiness would be improved by a move.  In general, maximizing
 * the happiness of individual chunks will improve the happiness of the
 * whole.
 * 
 * There are two things that factor into a chunk's happiness.
 * The things that make a chunk unhappy are:
 * <ul>
 * <li> Having a dereference count different from the region average.
 *      The greater the difference, the more unhappy the chunk is.
 * <li> Being in a sparsely populated region.  The fewer chunks in a
 *      region, the more unhappy the chunks in it.
 * </ul>
 * Neither of these factors are absolute; both of them have different
 * weights that add into a general unhappiness for the chunk.  The lower
 * the unhappiness, the better.
 * 
 * Over time and usage, the dereference counts for chunks will increase
 * and eventually reach a maximum value of 255.  (The count is limited
 * by the fact that it's stored in a single byte for each chunk.)  If
 * this is left unchecked, eventually all chunks would have a dereference
 * count of 255, and the counts would be useless for improving locality.
 * To counteract this, when the average dereference count for a certain
 * number of regions exceeds 128, the 'migration period' is incremented
 * and all chunk dereference counts are halved.  The critical number of
 * regions is determined based on the cache size and the total number of
 * regions.  If you're not using forking dumps, then period change should
 * be controlled primarily by the frequency of database dumps (which end
 * up incrementing the dereference count on all chunks, and thus all
 * regions).  Given a dump frequency of once per hour (the default), there
 * should be a period change about every 2.6 days.
 * 
 * 
 * <h3>Statistics:</h3>
 * The chunk memory management system keeps several statistics about
 * the allocation pool, both to maintain good operation through active
 * encouragement of locality, and to satisfy the curiosity of people
 * using the system (and its designer ;-)).  These statistics are
 * reported (in PennMUSH) through the use of the @stats command,
 * with /chunks switch.
 * 
 * @stats/chunks generates output similar to this:
 * \verbatim
 * Chunks:         99407 allocated (   8372875 bytes,     223808 ( 2%) overhead)
 *                   74413 short     (   1530973 bytes,     148826 ( 9%) overhead)
 *                   24994 medium    (   6841902 bytes,      74982 ( 1%) overhead)
 *                       0 long      (         0 bytes,          0 ( 0%) overhead)
 *                   128 free      (   1319349 bytes,      23058 ( 1%) fragmented)
 * Regions:          147 total,       16 cached
 * Paging:        158686 out,     158554 in
 * Storage:      9628500 total (86% saturation)
 *  
 * Period:             1 (   5791834 accesses so far,       1085 chunks at max)
 * Migration:     245543 moves this period
 *                  145536 slide
 *                      45 away
 *                   30719 fill exact
 *                   69243 fill inexact
 * \endverbatim
 * First, the number of allocated chunks is given, along with their
 * total size and overhead.  Then, the allocated chunks are broken
 * up by size-range; short chunks (2 to 63 bytes) with two bytes of
 * overhead each, medium chunks (64 to 8191 bytes) with three bytes
 * of overhead each, and long chunks (8192 to ~64K bytes) with four
 * bytes of overhead each.  Rounding out the individual chunk statistics
 * is the number of free chunks, their total size, and the amount of
 * fragmented free space (free space not in the largest free chunk for
 * its region is considered fragmented).
 * 
 * Next comes statistics on regions: the number of regions in use
 * and the number held in the memory cache.  All regions not in the
 * cache are paged out to disk.  Paging statistics follow, listing
 * the number of times a region has been moved out of or into 
 * memory cache.  After that, the total amount of storage (in memory
 * or on disk) used is given, along with the saturation rate (where
 * saturation is indicated by what fraction of the used space is
 * actually allocated in chunks).
 * 
 * Finally comes statistics on migration and the migration period.
 * The period number is listed, along with the total number of
 * dereferences in the period and how many chunks have the maximum
 * dereference count of 255.  Then the amount of migration movement
 * is listed, both in total and broken up by category.  Slides occur
 * when an allocation is shifted to the other side of a neighboring
 * free space.  Away moves are made when an allocation is extremely
 * unhappy where it is, and is pushed out to somewhere else.  Fills
 * are when an allocation is moved in order to fill in a free space;
 * the space can be either exactly filled by the move, or inexactly
 * filled (leaving some remaining free space).
 * 
 * 
 * <h3>Histograms:</h3>
 * The chunk memory management system can also display a few
 * histograms about itself.  These histograms are reported (in PennMUSH)
 * through the use of the @stats command, with the /regions, /freespace,
 * or /paging switches.
 * 
 * All of @stats/regions, @stats/freespace, and @stas/paging produce
 * histograms vs. region average dereference count.  The histograms
 * use buckets four counts wide, so all regions from 0-3 will be in
 * the first bucket, 4-7 in the second, etc., up to 252-255 in the
 * last bucket.  If the heights of the buckets are significantly
 * different, then the highest spikes will be allowed to extend off
 * the top of the histogram (with their real values labeled in
 * parenthesis next to them).
 * 
 * @stats/regions is a histogram of how many regions at each count
 * currently exist.  In a healthy game, there should be a large spike
 * at some dereference count between 64 and 128 (representing the
 * largely unused portion of the database), a lesser spike at 255
 * (representing the portion of the database that's used very frequently),
 * and a smattering of regions at other counts, with either new areas
 * of the database (below the large spike) or semi-frequently used
 * areas (above the large spike).  New migration periods occur when
 * the large spike would pass 128, at which point everything is halved
 * and the spike is pushed back down to 64.
 * 
 * @stats/freespace is a histogram of how much free space exists in
 * regions at each dereference count.  This histogram is included
 * to aid in diagnosis of the cause for dropping saturation rates.
 * 
 * @stats/paging is a histogram of the number of regions being paged
 * in or out at each dereference count.  As of this writing, a very
 * unhealthy behaviour is observed, wherein the histogram shows a
 * trapeziod between 64 and 128, drowning out most of the rest of the
 * chart.  This indicates that as time goes on, the attributes
 * associated with a single low-use object are getting scattered
 * randomly throughout all the low-use regions, and thus when dumps
 * occur (with their linear enumeration of all attributes on objects)
 * the low-use regions thrash in and out of cache.  This can be very
 * detrimental to dump performance.  Something will have to be done
 * to fix this tendency of migration.  Healthy behaviour will make
 * some other pattern in the paging histogram which has not yet been
 * determined.
 */

#include "copyrite.h"
#include "config.h"
#include "conf.h"

#include <limits.h>
#include <string.h>
#include <stdarg.h>
#include <stdlib.h>

#include <fcntl.h>
#include <assert.h>
#include <sys/types.h>
#ifdef WIN32
#include <io.h>
#else
#include <unistd.h>
#endif
#include <errno.h>

#include "externs.h"
#include "chunk.h"
#include "command.h"
#include "dbdefs.h"
#include "intrface.h"
#include "log.h"
#include "mymalloc.h"
#include "confmagic.h"

#ifdef WIN32
#pragma warning( disable : 4761)	/* disable warning re conversion */
#endif

/* A whole bunch of debugging #defines. */
/** Basic debugging stuff - are assertions checked? */
#undef CHUNK_DEBUG
/** Paranoid people check for region validity after every operation
 * that modifies a region. */
#undef CHUNK_PARANOID
/** Log all moves and slides during migration. */
#undef DEBUG_CHUNK_MIGRATE
/** Log creation of regions. */
#undef DEBUG_CHUNK_REGION_CREATE
/** Log paging of regions. */
#undef DEBUG_CHUNK_PAGING
/** Log all mallocs. */
#undef DEBUG_CHUNK_MALLOC

/** For debugging, we keep a rolling log of debug messages.
 * These get dumped to disk if we're about to panic.
 */
#define ROLLING_LOG_SIZE 200
#define ROLLING_LOG_ENTRY_LEN 1024

/* debug... */
#ifdef CHUNK_DEBUG
#define ASSERT(x) assert(x)
#else				/* CHUNK_DEBUG */
static int ignore;	/**< Used to shut up compiler warnings when not asserting */
#define ASSERT(x) ignore++
#endif				/* CHUNK_DEBUG */

/*
 * Sizes, limits, etc.
 */
/** Region size, including header.
 * This is a little less than 64K to allow for malloc overhead without
 * spilling into next page */
#define REGION_SIZE 65500

/** Region capacity.
 * This is the size minus the fixed region overhead.
 */
#define REGION_CAPACITY (REGION_SIZE - FIRST_CHUNK_OFFSET_IN_REGION)

/** Maximum chunk length.
 * This is fairly arbitrary, but must be less than
 * REGION_CAPACITY (it must fit in a region).
 */
#define MAX_CHUNK_LEN (16384-1)

/** Number of oddballs tracked in regions.
 * This is used to figure out when we should pull regions in because
 * we have an opportunity to migrate chunks that don't match.
 * Relatively arbitrary; too low means you don't move things out
 * enough, but boosting it too high wastes memory.
 */
#define NUM_ODDBALLS 10

/** Minimum disagreement to be an oddball.
 * This is used to figure out when we should pull regions in because
 * we have an opportunity to migrate chunks that don't match.
 * Relatively arbitrary; too low means you don't move things out
 * enough, but boosting it too high wastes migration time.
 */
#define ODDBALL_THRESHOLD 8

/*
 * FIXME: pulling config variables out of my left ear. Fix later.
 */
/** How much space is initially allocated for the in-memory region array? */
#define FIXME_INIT_REGION_LEN 20
/** How much does the region array grow by each time it has to grow? */
#define FIXME_REGION_ARRAY_INCREMENT 10

/** Limit for when being a nearly-empty region counts against being
 * a good region.  This is exponential: an empty region gets a penalty
 * of 1 << LONLINESS_LIMIT.  A near-empty region gets a penalty of
 * 1 << (LONLINESS_LIMIT - used_count).
 *
 * Rationale: we don't want to reuse empty regions (or make new regions)
 * for trivialities.
 */
#define LONLINESS_LIMIT 5

/** Free space limit for when we consider making new regions.
 * The total free space must be less than this percent of capacity.
 *
 * Rationale: we don't want to waste memory with lots of extra regions.
 */
#define FREE_PERCENT_LIMIT 2

/** Bias for allocating chunks in a region that's already in memory.
 * Actually, this is a bias against allocating in swapped-out regions,
 * but that's a nit...
 *
 * Rationale: reduce the amount of paging during migration.
 */
#define IN_MEMORY_BIAS 4

/*
 *  Structures and Accessor Macros
 */
/*
 * What a chunk_reference_t looks like from the inside
 */
/** Get the region from a chunk_reference_t. */
#define ChunkReferenceToRegion(ref) ((ref) >> 16)
/** Get the offset from a chunk_reference_t. */
#define ChunkReferenceToOffset(ref) ((ref) & 0xFFFF)
/** Make a chunk_reference_t from a region and offset. */
#define ChunkReference(region, offset) \
  ((chunk_reference_t)(((region) << 16) | (offset)))

/** Sentinel value used to mark unused cache regions. */
#define INVALID_REGION_ID 0xffff

/**
 * \verbatim
 * The chunk headers look like this:
 *
 * Short:
 * byte 0     byte 1
 *  76543210   76543210
 * +--------+ +--------+
 * |ft len  | | deref  |
 * +--------+ +--------+
 *  ||\__6_/   \___8__/
 *  ||   |         `----- deref count (decays)
 *  ||   `--------------- length of data
 *  |`------------------- tag bit (0 for short)
 *  `-------------------- free flag (0 for allocated, 1 for free)
 *
 * Medium:
 * byte 0     byte 1     byte 2
 *  76543210   76543210   76543210
 * +--------+ +--------+ +--------+
 * |ftg lenM| | deref  | |  lenL  |
 * +--------+ +--------+ +--------+
 *  |||\_5_/   \___8__/   \___8__/
 *  |||  |         |          `----- length, least significant
 *  |||  |         `---------------- deref count (decays)
 *  |||  `-------------------------- length, most significant
 *  ||`----------------------------- tag bit (0 for medium)
 *  |`------------------------------ tag bit (1 for medium/long)
 *  `------------------------------- free flag
 *
 * Long:
 * byte 0     byte 1     byte 2     byte 3
 *  76543210   76543210   76543210   76543210
 * +--------+ +--------+ +--------+ +--------+
 * |ftg     | | deref  | |  lenM  | |  lenL  |
 * +--------+ +--------+ +--------+ +--------+
 *  |||        \___8__/   \_______16________/
 *  |||            |               `----------- length of data
 *  |||            `--------------------------- deref count (decays)
 *  ||`---------------------------------------- tag bit (1 for long)
 *  |`----------------------------------------- tag bit (1 for medium/long)
 *  `------------------------------------------ free flag
 *
 *
 * Note in particular that the dereference count is always in the second
 * byte of a chunk, to simplify the access logic.
 * \endverbatim
 */

/*
 * Fields in chunk headers
 */
#define CHUNK_FREE_MASK   0x80
#define CHUNK_FREE        0x80
#define CHUNK_USED        0x00

#define CHUNK_TAG1_MASK   0x40
#define CHUNK_TAG1_SHORT  0x00
#define CHUNK_TAG1_MEDIUM 0x40
#define CHUNK_TAG1_LONG   0x40
#define CHUNK_TAG1_OFFSET 0

#define CHUNK_TAG2_MASK   0x20
#define CHUNK_TAG2_MEDIUM 0x00
#define CHUNK_TAG2_LONG   0x20
#define CHUNK_TAG2_OFFSET 0

#define CHUNK_SHORT_LEN_MASK 0x3F
#define CHUNK_SHORT_LEN_OFFSET 0

#define CHUNK_MEDIUM_LEN_MSB_MASK 0x1F
#define CHUNK_MEDIUM_LEN_LSB_MASK 0xFF
#define CHUNK_MEDIUM_LEN_MSB_OFFSET 0
#define CHUNK_MEDIUM_LEN_LSB_OFFSET 2

#define CHUNK_LONG_LEN_MSB_MASK 0xFF
#define CHUNK_LONG_LEN_LSB_MASK 0xFF
#define CHUNK_LONG_LEN_MSB_OFFSET 2
#define CHUNK_LONG_LEN_LSB_OFFSET 3

#define CHUNK_DEREF_OFFSET 1

#define CHUNK_DEREF_MAX 0xFF

#define CHUNK_AGED_MASK   0x10
#define CHUNK_AGED_OFFSET 0

#define CHUNK_SHORT_DATA_OFFSET 2
#define CHUNK_MEDIUM_DATA_OFFSET 3
#define CHUNK_LONG_DATA_OFFSET 4

#define MIN_CHUNK_LEN 1
#define MIN_REMNANT_LEN (CHUNK_SHORT_DATA_OFFSET + MIN_CHUNK_LEN)

#define MAX_SHORT_CHUNK_LEN CHUNK_SHORT_LEN_MASK
#define MAX_MEDIUM_CHUNK_LEN \
        ((CHUNK_MEDIUM_LEN_MSB_MASK << 8) | CHUNK_MEDIUM_LEN_LSB_MASK)
#define MAX_LONG_CHUNK_LEN \
        (REGION_CAPACITY - CHUNK_LONG_DATA_OFFSET)

#define LenToFullLen(len) \
  ((len) + \
   (((len) > MAX_SHORT_CHUNK_LEN) \
    ? ((len) > MAX_MEDIUM_CHUNK_LEN) \
      ? CHUNK_LONG_DATA_OFFSET \
      : CHUNK_MEDIUM_DATA_OFFSET \
    : CHUNK_SHORT_DATA_OFFSET))

#define ChunkPointer(region, offset) \
  (((unsigned char *)(regions[(region)].in_memory)) + (offset))
#define ChunkReferenceToPointer(ref) \
  ChunkPointer(ChunkReferenceToRegion((ref)), ChunkReferenceToOffset((ref)))

/*
 * Macros for probing and manipulating chunk headers
 */
#define CPLenShort(cptr) \
  ((cptr)[CHUNK_SHORT_LEN_OFFSET] & CHUNK_SHORT_LEN_MASK)
#define CPLenMedium(cptr) \
  ((((cptr)[CHUNK_MEDIUM_LEN_MSB_OFFSET] & CHUNK_MEDIUM_LEN_MSB_MASK) << 8) + \
   ((cptr)[CHUNK_MEDIUM_LEN_LSB_OFFSET] & CHUNK_MEDIUM_LEN_LSB_MASK))
#define CPLenLong(cptr) \
  ((((cptr)[CHUNK_LONG_LEN_MSB_OFFSET] & CHUNK_LONG_LEN_MSB_MASK) << 8) + \
   ((cptr)[CHUNK_LONG_LEN_LSB_OFFSET] & CHUNK_LONG_LEN_LSB_MASK))
#define CPLen(cptr) \
  ((*(cptr) & CHUNK_TAG1_MASK) \
   ? (*(cptr) & CHUNK_TAG2_MASK) \
     ? CPLenLong((cptr)) \
     : CPLenMedium((cptr)) \
   : CPLenShort((cptr)))
#define ChunkLen(region, offset) \
  CPLen(ChunkPointer((region), (offset)))
#define CPFullLen(cptr) \
  ((*(cptr) & CHUNK_TAG1_MASK) \
   ? (*(cptr) & CHUNK_TAG2_MASK) \
     ? (CPLenLong((cptr)) + CHUNK_LONG_DATA_OFFSET) \
     : (CPLenMedium((cptr)) + CHUNK_MEDIUM_DATA_OFFSET) \
   : (CPLenShort((cptr)) + CHUNK_SHORT_DATA_OFFSET))
#define ChunkFullLen(region, offset) \
  CPFullLen(ChunkPointer((region), (offset)))

#define ChunkIsFree(region, offset) \
  ((*ChunkPointer((region), (offset)) & CHUNK_FREE_MASK) == CHUNK_FREE)
#define ChunkIsShort(region, offset) \
  ((*ChunkPointer((region), (offset)) & CHUNK_TAG1_MASK) == CHUNK_TAG1_SHORT)
#define ChunkIsMedium(region, offset) \
  ((*ChunkPointer((region), (offset)) & (CHUNK_TAG1_MASK | CHUNK_TAG2_MASK)) \
   == (CHUNK_TAG1_MEDIUM | CHUNK_TAG2_MEDIUM))
#define ChunkIsLong(region, offset) \
  ((*ChunkPointer((region), (offset)) & (CHUNK_TAG1_MASK | CHUNK_TAG2_MASK)) \
   == (CHUNK_TAG1_LONG | CHUNK_TAG2_LONG))

#define ChunkDerefs(region, offset) \
  (ChunkPointer((region), (offset))[CHUNK_DEREF_OFFSET])

#define CPDataPtr(cptr) \
  ((*(cptr) & CHUNK_TAG1_MASK) \
   ? (*(cptr) & CHUNK_TAG2_MASK) \
     ? (cptr) + CHUNK_LONG_DATA_OFFSET \
     : (cptr) + CHUNK_MEDIUM_DATA_OFFSET \
   : (cptr) + CHUNK_SHORT_DATA_OFFSET)
#define ChunkDataPtr(region, offset) \
  (CPDataPtr(ChunkPointer(region, offset)))

#define ChunkNextFree(region, offset) \
  ((ChunkDataPtr(region, offset)[0] << 8) + ChunkDerefs(region, offset))

/* 0 for no, 1 for yes with room, 2 for exact */
#define FitsInSpace(size, capacity) \
  (((size) == (capacity)) ? 2 : ((size) <= (capacity) - MIN_REMNANT_LEN))

/** Region info that gets paged out with its region.
 * This is at the start of the region;
 * the rest of the 64K bytes of the region contain chunks.
 */
typedef struct region_header {
  u_int_16 region_id;		/**< will be INVALID_REGION_ID if not in use */
  u_int_16 first_free;		/**< offset of 1st free chunk */
  struct region_header *prev;	/**< linked list prev for LRU cache */
  struct region_header *next;	/**< linked list next for LRU cache */
} RegionHeader;

#define FIRST_CHUNK_OFFSET_IN_REGION sizeof(RegionHeader)

/** In-memory (never paged) region info.  */
typedef struct region {
  u_int_16 used_count;		/**< number of used chunks */
  u_int_16 free_count;		/**< number of free chunks */
  u_int_16 free_bytes;		/**< number of free bytes (with headers) */
  u_int_16 largest_free_chunk;	/**< largest single free chunk */
  unsigned int total_derefs;	/**< total of all used chunk derefs */
  unsigned int period_last_touched;	/**< "this" period, for deref counts;
                                           we don't page in regions to update
                                           counts on period change! */
  RegionHeader *in_memory;	/**< cache entry; NULL if paged out */
  u_int_16 oddballs[NUM_ODDBALLS];	/**< chunk offsets with odd derefs */
} Region;

#define RegionDerefs(region) \
  (regions[(region)].used_count \
   ? (regions[(region)].total_derefs >> \
      (curr_period - regions[(region)].period_last_touched)) / \
     regions[(region)].used_count \
   : 0)
#define RegionDerefsWithChunk(region, derefs) \
   (((regions[(region)].total_derefs >> \
     (curr_period - regions[(region)].period_last_touched)) + derefs) / \
    (regions[(region)].used_count + 1))

/*
 *  Globals
 */

/** Swap File */
#ifdef WIN32
typedef HANDLE fd_type;
static HANDLE swap_fd;
static HANDLE swap_fd_child = INVALID_HANDLE_VALUE;
#else
typedef int fd_type;
static int swap_fd;
static int swap_fd_child = -1;
#endif
static char child_filename[300];

/** Deref scale control.
 * When the deref counts get too big, the current period is incremented
 * and all derefs are divided by 2. */
static u_int_32 curr_period;

/*
 * Info about all regions
 */
static u_int_32 region_count;	/**< regions in use */
static u_int_32 region_array_len;	/**< length of regions array */
static Region *regions;		/**< regions array, realloced as (rarely) needed */

/*
 * regions presently in memory
 */
static u_int_32 cached_region_count;	/**< number of regions in cache */
static RegionHeader *cache_head;	/**< most recently used region */
static RegionHeader *cache_tail;	/**< least recently used region */

/*
 * statistics
 */
static int stat_used_short_count;	/**< How many short chunks? */
static int stat_used_short_bytes;	/**< How much space in short chunks? */
static int stat_used_medium_count;	/**< How many medium chunks? */
static int stat_used_medium_bytes;	/**< How much space in medium chunks? */
static int stat_used_long_count;	/**< How many long chunks? */
static int stat_used_long_bytes;	/**< How much space in long chunks? */
static int stat_deref_count;		/**< Dereferences this period */
static int stat_deref_maxxed;		/**< Number of chunks with max derefs */
/** histogram for average derefs of regions being paged in/out */
static int stat_paging_histogram[CHUNK_DEREF_MAX + 1];
static int stat_page_out;		/**< Number of page-outs */
static int stat_page_in;		/**< Number of page-ins */
static int stat_migrate_slide;		/**< Number of slide migrations */
static int stat_migrate_move;		/**< Number of move migrations */
static int stat_migrate_away;		/**< Number of chunk evictions */
static int stat_create;			/**< Number of chunk creations */
static int stat_delete;			/**< Number of chunk deletions */



/*
 * migration globals that are used for holding relevant data...
 */
static int m_count;		/**< The used length for the arrays. */
static chunk_reference_t **m_references; /**< The passed-in references array. */

#ifdef CHUNK_PARANOID
/** Log of recent actions for debug purposes */
static char rolling_log[ROLLING_LOG_SIZE][ROLLING_LOG_ENTRY_LEN];
static int rolling_pos;
static int noisy_log = 0;
#endif

/*
 * Forward decls
 */
static void find_oddballs(u_int_16 region);

/*
 * Debug routines
 */
/** Add a message to the rolling log. */
static void
debug_log(char const *format, ...)
{
#ifdef CHUNK_PARANOID
  va_list args;

  va_start(args, format);
  vsprintf(rolling_log[rolling_pos], format, args);
  va_end(args);

  rolling_log[rolling_pos][ROLLING_LOG_ENTRY_LEN - 1] = '\0';
  if (noisy_log) {
    fprintf(tracelog_fp, "%s\n", rolling_log[rolling_pos]);
    fflush(tracelog_fp);
  }
  rolling_pos = (rolling_pos + 1) % ROLLING_LOG_SIZE;
#else
  if (format)
    return;			/* shut up the compiler warning */
#endif
}

#ifdef CHUNK_PARANOID
/** Dump the rolling log. */
static void
dump_debug_log(FILE * fp)
{
  int j;
  fputs("Recent chunk activity:\n", fp);
  j = rolling_pos;
  do {
    if (rolling_log[j][0]) {
      fputs(rolling_log[j], fp);
      fputc('\n', fp);
      rolling_log[j][0] = '\0';
    }
    j = (j + 1) % ROLLING_LOG_SIZE;
  } while (j != rolling_pos);
  fputs("End of recent chunk activity.\n", fp);
  fflush(fp);
}

/** Test if a chunk is migratable. */
static int
migratable(u_int_16 region, u_int_16 offset)
{
  chunk_reference_t ref = ChunkReference(region, offset);
  int j;

  for (j = 0; j < m_count; j++)
    if (m_references[j][0] == ref)
      return 1;
  return 0;
}

/** Give a detailed map of a region.
 * Lists pertinent region information, and all the chunks in the region.
 * Does not print the contents of the chunks (which would probably be
 * unreadable, anyway).
 * \param region the region to display.
 * \param fp the FILE* to output to.
 */
static void
debug_dump_region(u_int_16 region, FILE * fp)
{
  Region *rp = regions + region;
  RegionHeader *rhp;
  u_int_16 offset, count;

  ASSERT(region < region_count);
  rhp = rp->in_memory;

  fprintf(fp, "region: id:%04x period:%-8x deref:%-8x (%-2x per chunk)\n",
	  region, rp->period_last_touched, rp->total_derefs,
	  RegionDerefs(region));
  fprintf(fp, "        #used:%-4x #free:%-4x fbytes:%-4x hole:%-4x ",
	  rp->used_count, rp->free_count, rp->free_bytes,
	  rp->largest_free_chunk);
  if (rhp)
    fprintf(fp, "first:%-4x h_id:%-4x\n", rhp->first_free, rhp->region_id);
  else
    fprintf(fp, "PAGED\n");
  fflush(fp);

  if (rhp) {
    for (offset = FIRST_CHUNK_OFFSET_IN_REGION;
	 offset < REGION_SIZE; offset += ChunkFullLen(region, offset)) {
      fprintf(fp, "chunk:%c%4s %-6s off:%04x full:%04x ",
	      migratable(region, offset) ? '*' : ' ',
	      ChunkIsFree(region, offset) ? "FREE" : "",
	      ChunkIsShort(region, offset) ? "SHORT" :
	      (ChunkIsMedium(region, offset) ? "MEDIUM" : "LONG"),
	      offset, ChunkFullLen(region, offset));
      if (ChunkIsFree(region, offset)) {
	fprintf(fp, "next:%04x\n", ChunkNextFree(region, offset));
      } else {
	fprintf(fp, "doff:%04x len:%04x ",
		ChunkDataPtr(region, offset) - (unsigned char *) rhp,
		ChunkLen(region, offset));
	count = ChunkDerefs(region, offset);
	if (count == 0xFF) {
	  fprintf(fp, "deref:many\n");
	} else {
	  fprintf(fp, "deref:%04x\n", count);
	}
      }
    }
  }
}

/** Make sure a chunk is real.
 * Detect bogus chunk references handed to the system.
 * \param region the region to verify.
 * \param offset the offset to verify.
 */
static void
verify_used_chunk(u_int_16 region, u_int_16 offset)
{
  u_int_16 pos;

  ASSERT(region < region_count);

  for (pos = FIRST_CHUNK_OFFSET_IN_REGION;
       pos < REGION_SIZE; pos += ChunkFullLen(region, pos)) {
    if (pos == offset) {
      if (ChunkIsFree(region, pos))
	mush_panic("Invalid reference to free chunk as used");
      return;
    }
  }
  mush_panic("Invalid reference to non-chunk as used");
}

/** Verify that a region is sane.
 * Do a throrough consistency check on a region, verifying all the region
 * totals, making sure the counts are consistent, and that all the space
 * in the region is accounted for.
 * \param region the region to verify.
 * \return true if the region is valid.
 */
static int
region_is_valid(u_int_16 region)
{
  int result;
  Region *rp;
  RegionHeader *rhp;
  int used_count;
  int free_count;
  int free_bytes;
  int largest_free;
  unsigned int total_derefs;
  int len;
  int was_free;
  int dump;
  u_int_16 next_free;
  u_int_16 offset;

  if (region >= region_count) {
    do_rawlog(LT_ERR, "region 0x%04x is not valid: region_count is 0x%04x",
	      region, region_count);
    return 0;
  }
  result = 1;

  rp = regions + region;
  if (rp->used_count > REGION_SIZE / MIN_REMNANT_LEN) {
    do_rawlog(LT_ERR,
	      "region 0x%04x is not valid: chunk count is ludicrous: 0x%04x",
	      region, rp->used_count);
    result = 0;
  }
  if (rp->free_count > REGION_SIZE / MIN_REMNANT_LEN) {
    do_rawlog(LT_ERR,
	      "region 0x%04x is not valid: free count is ludicrous: 0x%04x",
	      region, rp->free_count);
    result = 0;
  }
  if (rp->largest_free_chunk > rp->free_bytes) {
    do_rawlog(LT_ERR,
	      "region 0x%04x is not valid: largest free chunk > free bytes:"
	      " 0x%04x > 0x%04x",
	      region, rp->largest_free_chunk, rp->free_bytes);
    result = 0;
  }
  if (!rp->in_memory)
    return result;

  rhp = rp->in_memory;

  if (rhp->region_id != region) {
    do_rawlog(LT_ERR, "region 0x%04x is not valid: region in cache is 0x%04x",
	      region, rhp->region_id);
    result = 0;
  }
  dump = 0;
  used_count = 0;
  total_derefs = 0;
  free_count = 0;
  free_bytes = 0;
  largest_free = 0;
  was_free = 0;
  next_free = rhp->first_free;
  for (offset = FIRST_CHUNK_OFFSET_IN_REGION;
       offset < REGION_SIZE; offset += len) {
    if (was_free && ChunkIsFree(region, offset)) {
      do_rawlog(LT_ERR,
		"region 0x%04x is not valid: uncoalesced free chunk:"
		" 0x%04x (see map)", region, offset);
      result = 0;
      dump = 1;
    }
    len = ChunkFullLen(region, offset);
    was_free = ChunkIsFree(region, offset);
    if (was_free) {
      free_count++;
      free_bytes += len;
      if (largest_free < len)
	largest_free = len;
      if (next_free != offset) {
	do_rawlog(LT_ERR,
		  "region 0x%04x is not valid: free chain broken:"
		  " 0x%04x, expecting 0x%04x (see map)",
		  region, offset, next_free);
	result = 0;
	dump = 1;
      }
      next_free = ChunkNextFree(region, offset);
    } else {
      used_count++;
      total_derefs += ChunkDerefs(region, offset);
      if (ChunkIsMedium(region, offset) &&
	  ChunkLen(region, offset) <= MAX_SHORT_CHUNK_LEN) {
	do_rawlog(LT_ERR,
		  "region 0x%04x is not valid: medium chunk too small:"
		  " 0x%04x (see map)", region, offset);
	result = 0;
	dump = 1;
      }
      if (ChunkIsLong(region, offset) &&
	  ChunkLen(region, offset) <= MAX_MEDIUM_CHUNK_LEN) {
	do_rawlog(LT_ERR,
		  "region 0x%04x is not valid: long chunk too small:"
		  " 0x%04x (see map)", region, offset);
	result = 0;
	dump = 1;
      }
    }
  }
  if (offset != REGION_SIZE) {
    do_rawlog(LT_ERR,
	      "region 0x%04x is not valid: last chunk past bounds (see map)",
	      region);
    result = 0;
  }
  if (next_free != 0) {
    do_rawlog(LT_ERR,
	      "region 0x%04x is not valid: free chain unterminated:"
	      " expecting 0x%04x (see map)", region, next_free);
    result = 0;
    dump = 1;
  }
  if (rp->used_count != used_count) {
    do_rawlog(LT_ERR,
	      "region 0x%04x is not valid: used count is wrong:"
	      " 0x%04x should be 0x%04x", region, rp->used_count, used_count);
    result = 0;
  }
  if (rp->total_derefs != total_derefs) {
    do_rawlog(LT_ERR,
	      "region 0x%04x is not valid: total derefs is wrong:"
	      " 0x%04x should be 0x%04x",
	      region, rp->total_derefs, total_derefs);
    result = 0;
  }
  if (rp->free_count != free_count) {
    do_rawlog(LT_ERR,
	      "region 0x%04x is not valid: free count is wrong:"
	      " 0x%04x should be 0x%04x", region, rp->free_count, free_count);
    result = 0;
  }
  if (rp->free_bytes != free_bytes) {
    do_rawlog(LT_ERR,
	      "region 0x%04x is not valid: free bytes is wrong:"
	      " 0x%04x should be 0x%04x", region, rp->free_bytes, free_bytes);
    result = 0;
  }
  if (rp->largest_free_chunk != largest_free) {
    do_rawlog(LT_ERR,
	      "region 0x%04x is not valid: largest free is wrong:"
	      " 0x%04x should be 0x%04x",
	      region, rp->largest_free_chunk, largest_free);
    result = 0;
  }
  if (dump) {
    debug_dump_region(region, tracelog_fp);
  }
  return result;
}
#endif


/*
 * Utility Routines - Chunks
 */
/** Write a used chunk.
 * \param region the region to put the chunk in.
 * \param offset the offset to put the chunk at.
 * \param full_len the length for the chunk, including headers.
 * \param data the externally supplied data for the chunk.
 * \param data_len the length of the externally supplied data.
 * \param derefs the deref count to set on the chunk.
 */
static void
write_used_chunk(u_int_16 region, u_int_16 offset, u_int_16 full_len,
		 unsigned char const *data, u_int_16 data_len,
		 unsigned char derefs)
{
  unsigned char *cptr = ChunkPointer(region, offset);
  if (full_len <= MAX_SHORT_CHUNK_LEN + CHUNK_SHORT_DATA_OFFSET) {
    /* chunk is short */
    cptr[0] = full_len - CHUNK_SHORT_DATA_OFFSET +
      CHUNK_USED + CHUNK_TAG1_SHORT;
    cptr[CHUNK_DEREF_OFFSET] = derefs;
    memcpy(cptr + CHUNK_SHORT_DATA_OFFSET, data, data_len);
  } else if (full_len <= MAX_MEDIUM_CHUNK_LEN + CHUNK_MEDIUM_DATA_OFFSET) {
    /* chunk is medium */
    u_int_16 len = full_len - CHUNK_MEDIUM_DATA_OFFSET;
    cptr[0] = (len >> 8) + CHUNK_USED + CHUNK_TAG1_MEDIUM + CHUNK_TAG2_MEDIUM;
    cptr[CHUNK_DEREF_OFFSET] = derefs;
    cptr[CHUNK_MEDIUM_LEN_LSB_OFFSET] = len & 0xff;
    memcpy(cptr + CHUNK_MEDIUM_DATA_OFFSET, data, data_len);
  } else {
    /* chunk is long */
    u_int_16 len = full_len - CHUNK_LONG_DATA_OFFSET;
    cptr[0] = CHUNK_USED + CHUNK_TAG1_LONG + CHUNK_TAG2_LONG;
    cptr[CHUNK_DEREF_OFFSET] = derefs;
    cptr[CHUNK_LONG_LEN_MSB_OFFSET] = len >> 8;
    cptr[CHUNK_LONG_LEN_LSB_OFFSET] = len & 0xff;
    memcpy(cptr + CHUNK_LONG_DATA_OFFSET, data, data_len);
  }
}

/** Write a free chunk.
 * \param region the region to put the chunk in.
 * \param offset the offset to put the chunk at.
 * \param full_len the length for the chunk, including headers.
 * \param next the offset for the next free chunk.
 */
static void
write_free_chunk(u_int_16 region, u_int_16 offset, u_int_16 full_len,
		 u_int_16 next)
{
  unsigned char *cptr = ChunkPointer(region, offset);
  if (full_len <= MAX_SHORT_CHUNK_LEN + CHUNK_SHORT_DATA_OFFSET) {
    /* chunk is short */
    cptr[0] = full_len - CHUNK_SHORT_DATA_OFFSET +
      CHUNK_FREE + CHUNK_TAG1_SHORT;
    cptr[CHUNK_SHORT_DATA_OFFSET] = next >> 8;
    cptr[CHUNK_DEREF_OFFSET] = next & 0xff;
  } else if (full_len <= MAX_MEDIUM_CHUNK_LEN + CHUNK_MEDIUM_DATA_OFFSET) {
    /* chunk is medium */
    u_int_16 len = full_len - CHUNK_MEDIUM_DATA_OFFSET;
    cptr[0] = (len >> 8) + CHUNK_FREE + CHUNK_TAG1_MEDIUM + CHUNK_TAG2_MEDIUM;
    cptr[CHUNK_MEDIUM_LEN_LSB_OFFSET] = len & 0xff;
    cptr[CHUNK_MEDIUM_DATA_OFFSET] = next >> 8;
    cptr[CHUNK_DEREF_OFFSET] = next & 0xff;
  } else {
    /* chunk is long */
    u_int_16 len = full_len - CHUNK_LONG_DATA_OFFSET;
    cptr[0] = CHUNK_FREE + CHUNK_TAG1_LONG + CHUNK_TAG2_LONG;
    cptr[CHUNK_LONG_LEN_MSB_OFFSET] = len >> 8;
    cptr[CHUNK_LONG_LEN_LSB_OFFSET] = len & 0xff;
    cptr[CHUNK_LONG_DATA_OFFSET] = next >> 8;
    cptr[CHUNK_DEREF_OFFSET] = next & 0xff;
  }
}

/** Write the next pointer for a free chunk.
 * \param region the region of the chunk to write in.
 * \param offset the offset of the chunk to write in.
 * \param next the offset for the next free chunk.
 */
static void
write_next_free(u_int_16 region, u_int_16 offset, u_int_16 next)
{
  unsigned char *cptr = ChunkPointer(region, offset);
  if (ChunkIsShort(region, offset)) {
    /* chunk is short */
    cptr[CHUNK_SHORT_DATA_OFFSET] = next >> 8;
    cptr[CHUNK_DEREF_OFFSET] = next & 0xff;
  } else if (ChunkIsMedium(region, offset)) {
    /* chunk is medium */
    cptr[CHUNK_MEDIUM_DATA_OFFSET] = next >> 8;
    cptr[CHUNK_DEREF_OFFSET] = next & 0xff;
  } else {
    /* chunk is long */
    cptr[CHUNK_LONG_DATA_OFFSET] = next >> 8;
    cptr[CHUNK_DEREF_OFFSET] = next & 0xff;
  }
}

/** Combine neighboring free chunks, if possible.
 * The left-hand candidate chunk is passed in.
 * \param region the region of the chunks to coalesce.
 * \param offset the offset of the left-hand chunk to coalesce.
 */
static void
coalesce_frees(u_int_16 region, u_int_16 offset)
{
  Region *rp = regions + region;
  u_int_16 full_len, next;
  full_len = ChunkFullLen(region, offset);
  next = ChunkNextFree(region, offset);
  if (offset + full_len == next) {
    full_len += ChunkFullLen(region, next);
    next = ChunkNextFree(region, next);
    write_free_chunk(region, offset, full_len, next);
    rp->free_count--;
    if (rp->largest_free_chunk < full_len)
      rp->largest_free_chunk = full_len;
  }
}

/** Free a used chunk.
 * \param region the region of the chunk to free.
 * \param offset the offset of the chunk to free.
 */
static void
free_chunk(u_int_16 region, u_int_16 offset)
{
  Region *rp = regions + region;
  u_int_16 full_len, left;

  full_len = ChunkFullLen(region, offset);
  rp->total_derefs -= ChunkDerefs(region, offset);
  rp->used_count--;
  rp->free_count++;
  rp->free_bytes += full_len;
  if (rp->largest_free_chunk < full_len)
    rp->largest_free_chunk = full_len;

  if (ChunkIsShort(region, offset)) {
    /* chunk is short */
    stat_used_short_count--;
    stat_used_short_bytes -= full_len;
  } else if (ChunkIsMedium(region, offset)) {
    /* chunk is medium */
    stat_used_medium_count--;
    stat_used_medium_bytes -= full_len;
  } else {
    /* chunk is long */
    stat_used_long_count--;
    stat_used_long_bytes -= full_len;
  }

  left = rp->in_memory->first_free;
  if (!left) {
    write_free_chunk(region, offset, full_len, 0);
    rp->largest_free_chunk = full_len;
    rp->in_memory->first_free = offset;
  } else if (left > offset) {
    write_free_chunk(region, offset, full_len, left);
    rp->in_memory->first_free = offset;
    left = 0;
  } else {
    u_int_16 next;
    next = ChunkNextFree(region, left);
    while (next && next < offset) {
      left = next;
      next = ChunkNextFree(region, left);
    }
    write_free_chunk(region, offset, full_len, next);
    write_next_free(region, left, offset);
  }
  coalesce_frees(region, offset);
  if (left)
    coalesce_frees(region, left);
}

/** Find the largest free chunk in a region.
 * \param region the region to search for a large hole in.
 * \return the size of the largest free chunk.
 */
static u_int_16
largest_hole(u_int_16 region)
{
  u_int_16 size;
  u_int_16 offset;
  size = 0;
  for (offset = regions[region].in_memory->first_free;
       offset; offset = ChunkNextFree(region, offset))
    if (size < ChunkFullLen(region, offset))
      size = ChunkFullLen(region, offset);
  return size;
}

/** Allocate a used chunk out of a free hole.
 * This possibly splits the hole into two chunks, and it maintains the
 * free list.  It does NOT write the used chunk; that must be done by
 * caller.
 * \param region the region to allocate in.
 * \param offset the offset of the hole to use.
 * \param full_len the length (including headers) of the space to allocate.
 * \param align the alignment to use: 0 = easiest, 1 = left, 2 = right.
 * \return the offset of the allocated space.
 */
static u_int_16
split_hole(u_int_16 region, u_int_16 offset, u_int_16 full_len, int align)
{
  Region *rp = regions + region;
  u_int_16 hole_len = ChunkFullLen(region, offset);

  rp->used_count++;
  if (full_len <= MAX_SHORT_CHUNK_LEN + CHUNK_SHORT_DATA_OFFSET) {
    /* chunk is short */
    stat_used_short_count++;
    stat_used_short_bytes += full_len;
  } else if (full_len <= MAX_MEDIUM_CHUNK_LEN + CHUNK_MEDIUM_DATA_OFFSET) {
    /* chunk is medium */
    stat_used_medium_count++;
    stat_used_medium_bytes += full_len;
  } else {
    /* chunk is long */
    stat_used_long_count++;
    stat_used_long_bytes += full_len;
  }

  if (hole_len == full_len) {
    rp->free_count--;
    rp->free_bytes -= full_len;
    if (rp->in_memory->first_free == offset)
      rp->in_memory->first_free = ChunkNextFree(region, offset);
    else {
      u_int_16 hole;
      for (hole = rp->in_memory->first_free;
	   hole; hole = ChunkNextFree(region, hole))
	if (ChunkNextFree(region, hole) == offset)
	  break;
      ASSERT(hole);
      write_next_free(region, hole, ChunkNextFree(region, offset));
    }
    if (rp->largest_free_chunk == hole_len)
      rp->largest_free_chunk = largest_hole(region);
    return offset;
  }

  ASSERT(hole_len >= full_len + MIN_REMNANT_LEN);
  if (!align) {
    if (rp->in_memory->first_free == offset)
      align = 1;
    else
      align = 2;
  }
  if (align == 1) {
    rp->free_bytes -= full_len;
    write_free_chunk(region, offset + full_len,
		     hole_len - full_len, ChunkNextFree(region, offset));
    if (rp->in_memory->first_free == offset)
      rp->in_memory->first_free += full_len;
    else {
      u_int_16 hole;
      for (hole = rp->in_memory->first_free;
	   hole; hole = ChunkNextFree(region, hole))
	if (ChunkNextFree(region, hole) == offset)
	  break;
      ASSERT(hole);
      write_next_free(region, hole, offset + full_len);
    }
    if (rp->largest_free_chunk == hole_len)
      rp->largest_free_chunk = largest_hole(region);
    return offset;
  } else {
    rp->free_bytes -= full_len;
    write_free_chunk(region, offset,
		     hole_len - full_len, ChunkNextFree(region, offset));
    if (rp->largest_free_chunk == hole_len)
      rp->largest_free_chunk = largest_hole(region);
    return offset + hole_len - full_len;
  }
}

/*
 *  Utility Routines - Cache
 */

/** Read a region from a file.
 * \param fd file to read from
 * \param rhp region buffer to use
 * \param region region to read
 */
static void
read_cache_region(fd_type fd, RegionHeader * rhp, u_int_16 region)
{
  off_t file_offset = region * REGION_SIZE;
  int j;
  char *pos;
  size_t remaining;
  int done;

  debug_log("read_cache_region %04x", region);

  /* Try to seek up to 3 times... */
  for (j = 0; j < 3; j++)
#ifdef WIN32
    if (SetFilePointer(fd, file_offset, NULL, FILE_BEGIN) == file_offset)
      break;
#else
    if (lseek(fd, file_offset, SEEK_SET) == file_offset)
      break;
#endif
  if (j >= 3)
#ifdef WIN32
    mush_panicf("chunk swap file seek, GetLastError %d", GetLastError());
#else
    mush_panicf("chunk swap file seek, errno %d: %s", errno, strerror(errno));
#endif
  pos = (char *) rhp;
  remaining = REGION_SIZE;
  for (j = 0; j < 10; j++) {
#ifdef WIN32
#ifndef __MINGW32__
    if (!ReadFile(fd, pos, remaining, &done, NULL)) {
      /* nothing */
    }
#endif
#else
    done = read(fd, pos, remaining);
#endif
    if (done >= 0) {
      remaining -= done;
      pos += done;
      if (!remaining)
	return;
    }
#ifndef WIN32
    if (done == -1 && errno == EAGAIN)
      sleep(0);
#endif
  }
#ifdef WIN32
  mush_panicf("chunk swap file read, %lu remaining, GetLastError %d",
	      (unsigned long) remaining, GetLastError());
#else
  mush_panicf("chunk swap file read, %lu remaining, errno %d: %s",
	      (unsigned long) remaining, errno, strerror(errno));
#endif
}

/** Write a region to a file.
 * \param fd file to write to
 * \param rhp region buffer to use
 * \param region region to write
 */
static void
write_cache_region(fd_type fd, RegionHeader * rhp, u_int_16 region)
{
  off_t file_offset = region * REGION_SIZE;
  int j;
  char *pos;
  size_t remaining;
  int done;

  debug_log("write_cache_region %04x", region);

  /* Try to seek up to 3 times... */
  for (j = 0; j < 3; j++)
#ifdef WIN32
    if (SetFilePointer(fd, file_offset, NULL, FILE_BEGIN) == file_offset)
      break;
#else
    if (lseek(fd, file_offset, SEEK_SET) == file_offset)
      break;
#endif
  if (j >= 3)
#ifdef WIN32
    mush_panicf("chunk swap file seek, GetLastError %d", GetLastError());
#else
    mush_panicf("chunk swap file seek, errno %d: %s", errno, strerror(errno));
#endif
  pos = (char *) rhp;
  remaining = REGION_SIZE;
  for (j = 0; j < 10; j++) {
#ifdef WIN32
#ifndef __MINGW32__
    if (!WriteFile(fd, pos, remaining, &done, NULL)) {
      /* nothing */
    }
#else
    done = write(fd, pos, remaining);
#endif
#else
    done = write(fd, pos, remaining);
#endif
    if (done >= 0) {
      remaining -= done;
      pos += done;
      if (!remaining)
	return;
    }
#ifndef WIN32
    if (done == -1 && errno == EAGAIN)
      sleep(0);
#endif
  }
#ifdef WIN32
  mush_panicf("chunk swap file write, %lu remaining, GetLastError %d",
	      (unsigned long) remaining, GetLastError());
#else
  mush_panicf("chunk swap file write, %lu remaining, errno %d: %s",
	      (unsigned long) remaining, errno, strerror(errno));
#endif
}

/** Update cache position to stave off recycling.
 * \param rhp the cached region to keep around.
 */
static void
touch_cache_region(RegionHeader * rhp)
{
  debug_log("touch_cache_region %04x", rhp->region_id);

  if (cache_head == rhp)
    return;
  if (cache_tail == rhp)
    cache_tail = rhp->prev;
  if (rhp->prev)
    rhp->prev->next = rhp->next;
  if (rhp->next)
    rhp->next->prev = rhp->prev;

  if (cache_head)
    cache_head->prev = rhp;
  rhp->next = cache_head;
  rhp->prev = NULL;
  cache_head = rhp;
  if (!cache_tail)
    cache_tail = rhp;
}

/** Find space in the cache.
 * This is likely to require paging out something.
 * \return a pointer to an available cache region.
 */
static RegionHeader *
find_available_cache_region(void)
{
  RegionHeader *rhp;

  debug_log("find_available_cache_region");

  if (!cache_tail ||
      cached_region_count * REGION_SIZE < (unsigned) CHUNK_CACHE_MEMORY) {
    /* first use ... normal case if empty ... so allocate space */
#ifdef DEBUG_CHUNK_MALLOC
    do_rawlog(LT_TRACE, "CHUNK: malloc()ing a cache region");
#endif
    rhp = mush_malloc(REGION_SIZE, "chunk region cache buffer");
    if (!rhp) {
      mush_panic("chunk region cache buffer allocation failure");
    }
    cached_region_count++;
    rhp->region_id = INVALID_REGION_ID;
    rhp->prev = NULL;
    rhp->next = NULL;
    return rhp;
  }
  if (cache_tail->region_id == INVALID_REGION_ID)
    return cache_tail;

  rhp = cache_tail;
  /* page the current occupant out */
  find_oddballs(rhp->region_id);
#ifdef DEBUG_CHUNK_PAGING
  do_rawlog(LT_TRACE, "CHUNK: Paging out region %04x (offset %08x)",
	    rhp->region_id, (unsigned) file_offset);
#endif
  write_cache_region(swap_fd, rhp, rhp->region_id);
  /* keep statistics */
  stat_paging_histogram[RegionDerefs(rhp->region_id)]++;
  stat_page_out++;

  /* mark the paged out region as not in memory */
  regions[rhp->region_id].in_memory = NULL;
  /* mark it not in use for sanity check reasons */
  rhp->region_id = INVALID_REGION_ID;

  return rhp;
}

/** Bring a paged out region back into memory.
 * If neccessary, make room by paging another region out.
 * \param region the region to bring in.
 */
static void
bring_in_region(u_int_16 region)
{
  Region *rp = regions + region;
  RegionHeader *rhp, *prev, *next;
  u_int_32 offset;
  unsigned int shift;

  debug_log("bring_in_region %04x", region);

  ASSERT(region < region_count);
  if (rp->in_memory)
    return;
  rhp = find_available_cache_region();
  ASSERT(rhp->region_id == INVALID_REGION_ID);

  /* This is cheesy, but I _really_ don't want to do dual data structures */
  prev = rhp->prev;
  next = rhp->next;

  /* page it in */
#ifdef DEBUG_CHUNK_PAGING
  do_rawlog(LT_TRACE, "CHUNK: Paging in region %04x (offset %08x)",
	    region, (unsigned) file_offset);
#endif
  read_cache_region(swap_fd, rhp, region);
  /* link the region to its cache entry */
  rp->in_memory = rhp;

  /* touch the cache entry */
  rhp->prev = prev;
  rhp->next = next;
  touch_cache_region(rhp);

  /* make derefs current */
  if (rp->period_last_touched != curr_period) {
    shift = curr_period - rp->period_last_touched;
    if (shift > 8) {
      rp->total_derefs = 0;
      for (offset = FIRST_CHUNK_OFFSET_IN_REGION;
	   offset < REGION_SIZE; offset += ChunkFullLen(region, offset)) {
	ChunkDerefs(region, offset) = 0;
      }
    } else {
      rp->total_derefs = 0;
      for (offset = FIRST_CHUNK_OFFSET_IN_REGION;
	   offset < REGION_SIZE; offset += ChunkFullLen(region, offset)) {
	if (ChunkIsFree(region, offset))
	  continue;
	ChunkDerefs(region, offset) >>= shift;
	rp->total_derefs += ChunkDerefs(region, offset);
      }
    }
    rp->period_last_touched = curr_period;
  }

  /* keep statistics */
  stat_page_in++;
  stat_paging_histogram[RegionDerefs(region)]++;
}

/*
 * Utility Routines - Regions
 */
/** Create a new region.
 * Recycle an empty region if possible.
 * \return the region id for the new region.
 */
static u_int_16
create_region(void)
{
  u_int_16 region;

  for (region = 0; region < region_count; region++)
    if (regions[region].used_count == 0)
      break;
  if (region >= region_count) {
    if (region_count >= region_array_len) {
      /* need to grow the regions array */
      region_array_len += FIXME_REGION_ARRAY_INCREMENT;
#ifdef DEBUG_CHUNK_MALLOC
      do_rawlog(LT_TRACE, "CHUNK: realloc()ing region array");
#endif
      regions = (Region *) realloc(regions, region_array_len * sizeof(Region));
      if (!regions)
	mush_panic("chunk: region array realloc failure");
    }
    region = region_count;
    region_count++;
    regions[region].in_memory = NULL;
  }

  regions[region].used_count = 0;
  regions[region].free_count = 1;
  regions[region].free_bytes = REGION_CAPACITY;
  regions[region].largest_free_chunk = regions[region].free_bytes;
  regions[region].total_derefs = 0;
  regions[region].period_last_touched = curr_period;
  if (!regions[region].in_memory)
    regions[region].in_memory = find_available_cache_region();
  regions[region].in_memory->region_id = region;
  regions[region].in_memory->first_free = FIRST_CHUNK_OFFSET_IN_REGION;
  write_free_chunk(region, FIRST_CHUNK_OFFSET_IN_REGION,
		   regions[region].free_bytes, 0);

  touch_cache_region(regions[region].in_memory);
  return region;
}

/** Find the oddball chunks in a region.
 * \param region the region to search in.
 */
static void
find_oddballs(u_int_16 region)
{
  Region *rp = regions + region;
  int j, d1, d2;
  u_int_16 offset, len;
  int mean;

  for (j = 0; j < NUM_ODDBALLS; j++)
    rp->oddballs[j] = 0;

  mean = RegionDerefs(region);

  for (offset = FIRST_CHUNK_OFFSET_IN_REGION;
       offset < REGION_SIZE; offset += len) {
    len = ChunkFullLen(region, offset);
    if (ChunkIsFree(region, offset))
      continue;
    d1 = abs(mean - ChunkDerefs(region, offset));
    if (d1 < ODDBALL_THRESHOLD)
      continue;
    j = NUM_ODDBALLS;
    while (j--) {
      if (!rp->oddballs[j])
	continue;
      d2 = abs(mean - ChunkDerefs(region, rp->oddballs[j]));
      if (d1 < d2)
	break;
      if (j < NUM_ODDBALLS - 1)
	rp->oddballs[j + 1] = rp->oddballs[j];
    }
    j++;
    if (j >= NUM_ODDBALLS)
      continue;
    rp->oddballs[j] = offset;
  }
}

/** Find the best region to hold a chunk.
 * This is done by going through all the known regions and getting
 * prospective unhappiness ratings if the chunk was placed there.
 * The region with the least unhappiness wins.  Note that this may
 * well be a new region, if all existing regions are either full or
 * sufficiently unhappy.
 * \param full_len the size of the chunk, including headers.
 * \param derefs the number of dereferences on the chunk.
 * \param old_region the region the chunk was in before (if any).
 * \return the region id for the least unhappy region.
 */
static u_int_16
find_best_region(u_int_16 full_len, int derefs, u_int_16 old_region)
{
  u_int_16 best_region, region;
  int best_score, score;
  int free_bytes;
  Region *rp;

  best_region = INVALID_REGION_ID;
  best_score = INT_MAX;
  free_bytes = 0;
  for (region = 0; region < region_count; region++) {
    rp = regions + region;
    free_bytes += rp->free_bytes;
    if (!FitsInSpace(full_len, rp->largest_free_chunk) &&
	!(rp->free_count == 2 &&
	  rp->free_bytes - rp->largest_free_chunk == full_len))
      continue;

    if (region == old_region)
      score = derefs - RegionDerefs(region);
    else
      score = derefs - RegionDerefsWithChunk(region, derefs);
    if (score < 0)
      score = -score;
    if (!rp->in_memory)
      score += IN_MEMORY_BIAS;
    if (rp->used_count <= LONLINESS_LIMIT)
      score += 1 << (LONLINESS_LIMIT - rp->used_count);

    if (best_score > score) {
      best_score = score;
      best_region = region;
    }
  }

  if (best_region == INVALID_REGION_ID) {
#ifdef DEBUG_CHUNK_REGION_CREATE
    do_rawlog(LT_TRACE, "find_best_region had to create region %04x", region);
#endif
    best_region = create_region();
  } else if (best_score > (1 << LONLINESS_LIMIT) + IN_MEMORY_BIAS &&
	     (free_bytes * 100 / (REGION_CAPACITY * region_count)) <
	     FREE_PERCENT_LIMIT) {
#ifdef DEBUG_CHUNK_REGION_CREATE
    do_rawlog(LT_TRACE, "find_best_region chose to create region %04x", region);
#endif
    best_region = create_region();
  }
  return best_region;
}

/** Find the best offset in a region to hold a chunk.
 * \param full_len the length of the chunk, including headers.
 * \param region the region to allocate in.
 * \param old_region the region the chunk was in before (if any).
 * \param old_offset the offset the chunk was at before (if any).
 */
static u_int_16
find_best_offset(u_int_16 full_len, u_int_16 region,
		 u_int_16 old_region, u_int_16 old_offset)
{
  u_int_16 fits, offset;

  bring_in_region(region);

  fits = 0;
  for (offset = regions[region].in_memory->first_free; offset;
       offset = ChunkNextFree(region, offset)) {
    if (region == old_region) {
      if (offset > old_offset)
	break;
      if (offset + ChunkFullLen(region, offset) == old_offset)
	return fits ? fits : offset;
    }
    if (ChunkFullLen(region, offset) == full_len)
      return offset;
    if (!fits && ChunkFullLen(region, offset) >= full_len + MIN_REMNANT_LEN)
      fits = offset;
  }

  return fits;
}

/*
 * Utility Routines - Statistics and debugging
 */
/** Compile a histogram for the region dereferences.
 * \return histogram data for the regions.
 */
static int *
chunk_regionhist(void)
{
  static int histogram[CHUNK_DEREF_MAX + 1];
  unsigned int j;

  for (j = 0; j <= CHUNK_DEREF_MAX; j++)
    histogram[j] = 0;
  for (j = 0; j < region_count; j++) {
    histogram[RegionDerefs(j)]++;
  }
  return histogram;
}

/** Compile a histogram for the region free space.
 * \return histogram data for the free space.
 */
static int const *
chunk_freehist(void)
{
  static int histogram[CHUNK_DEREF_MAX + 1];
  unsigned int j;

  for (j = 0; j <= CHUNK_DEREF_MAX; j++)
    histogram[j] = 0;
  for (j = 0; j < region_count; j++) {
    histogram[RegionDerefs(j)] += regions[j].free_bytes;
  }
  return histogram;
}

/** Display statistics to a player, or dump them to a log
 */
#define STAT_OUT(x) \
  do { \
    s = (x); \
    if (GoodObject(player)) \
      notify(player, s); \
    else \
      do_rawlog(LT_TRACE, "%s", s); \
  } while (0)

/** Display the stats summary page.
 * \param player the player to display it to, or NOTHING to log it.
 */
static void
chunk_statistics(dbref player)
{
  const char *s;
  int overhead;
  int free_count = 0;
  int free_bytes = 0;
  int free_large = 0;
  int used_count = 0;
  int used_bytes = 0;
  u_int_16 rid;

  for (rid = 0; rid < region_count; rid++) {
    free_count += regions[rid].free_count;
    free_bytes += regions[rid].free_bytes;
    free_large += regions[rid].largest_free_chunk;
    used_count += regions[rid].used_count;
  }
  used_bytes = (REGION_CAPACITY * region_count) - free_bytes;

  if (!GoodObject(player)) {
    do_rawlog(LT_TRACE, "---- Chunk statistics");
  }
  overhead = stat_used_short_count * CHUNK_SHORT_DATA_OFFSET +
    stat_used_medium_count * CHUNK_MEDIUM_DATA_OFFSET +
    stat_used_long_count * CHUNK_LONG_DATA_OFFSET;
  STAT_OUT(tprintf
	   ("Chunks:    %10d allocated (%10d bytes, %10d (%2d%%) overhead)",
	    used_count, used_bytes, overhead,
	    used_bytes ? overhead * 100 / used_bytes : 0));
  overhead = stat_used_short_count * CHUNK_SHORT_DATA_OFFSET;
  STAT_OUT(tprintf
	   ("             %10d short     (%10d bytes, %10d (%2d%%) overhead)",
	    stat_used_short_count, stat_used_short_bytes, overhead,
	    stat_used_short_bytes ? overhead * 100 /
	    stat_used_short_bytes : 0));
  overhead = stat_used_medium_count * CHUNK_MEDIUM_DATA_OFFSET;
  STAT_OUT(tprintf
	   ("             %10d medium    (%10d bytes, %10d (%2d%%) overhead)",
	    stat_used_medium_count, stat_used_medium_bytes, overhead,
	    stat_used_medium_bytes ? overhead * 100 /
	    stat_used_medium_bytes : 0));
  overhead = stat_used_long_count * CHUNK_LONG_DATA_OFFSET;
  STAT_OUT(tprintf
	   ("             %10d long      (%10d bytes, %10d (%2d%%) overhead)",
	    stat_used_long_count, stat_used_long_bytes, overhead,
	    stat_used_long_bytes ? overhead * 100 / stat_used_long_bytes : 0));
  STAT_OUT(tprintf
	   ("           %10d free      (%10d bytes, %10d (%2d%%) fragmented)",
	    free_count, free_bytes, free_bytes - free_large,
	    free_bytes ? (free_bytes - free_large) * 100 / free_bytes : 0));
  overhead = region_count * REGION_SIZE + region_array_len * sizeof(Region);
  STAT_OUT(tprintf("Storage:   %10d total (%2d%% saturation)",
		   overhead, used_bytes * 100 / overhead));
  STAT_OUT(tprintf("Regions:   %10d total, %8d cached",
		   region_count, cached_region_count));
  STAT_OUT(tprintf("Paging:    %10d out, %10d in",
		   stat_page_out, stat_page_in));
  STAT_OUT(" ");
  STAT_OUT(tprintf("Period:    %10d (%10d accesses so far, %10d chunks at max)",
		   curr_period, stat_deref_count, stat_deref_maxxed));
  STAT_OUT(tprintf("Activity:  %10d creates, %10d deletes this period",
		   stat_create, stat_delete));
  STAT_OUT(tprintf("Migration: %10d moves this period",
		   stat_migrate_slide + stat_migrate_move));
  STAT_OUT(tprintf("             %10d slide    %10d move",
		   stat_migrate_slide, stat_migrate_move));
  STAT_OUT(tprintf("             %10d in region%10d out of region",
		   stat_migrate_slide + stat_migrate_move - stat_migrate_away,
		   stat_migrate_away));
}

/** Show just the page counts.
 * \param player the player to display it to, or NOTHING to log it.
 */
static void
chunk_page_stats(dbref player)
{
  const char *s;
  STAT_OUT(tprintf("Paging:    %10d out, %10d in",
		   stat_page_out, stat_page_in));
}

/** Display the per-region stats.
 * \param player the player to display it to, or NOTHING to log it.
 */
static void
chunk_region_statistics(dbref player)
{
  u_int_16 rid;
  const char *s;

  if (!GoodObject(player)) {
    do_rawlog(LT_TRACE, "---- Region statistics");
  }
  for (rid = 0; rid < region_count; rid++) {
    STAT_OUT(tprintf
	     ("region:%4d  #used:%5d  #free:%5d  "
	      "fbytes:%04x  largest:%04x  deref:%3d",
	      rid, regions[rid].used_count, regions[rid].free_count,
	      regions[rid].free_bytes, regions[rid].largest_free_chunk,
	      RegionDerefs(rid)));
  }
}

/** Display a histogram.
 * \param player the player to display it to, or NOTHING to log it.
 * \param histogram the histogram data to display.
 * \param legend the legend for the histogram.
 */
static void
chunk_histogram(dbref player, int const *histogram, char const *legend)
{
  const char *s;
  int j, k, max, pen, ante;
  char buffer[20][65];
  char num[16];

  max = pen = ante = 0;
  for (j = 0; j < 64; j++) {
    k = histogram[j * 4 + 0] + histogram[j * 4 + 1] +
      histogram[j * 4 + 2] + histogram[j * 4 + 3];
    if (max < k) {
      ante = pen;
      pen = max;
      max = k;
    } else if (pen < k) {
      ante = pen;
      pen = k;
    } else if (ante < k) {
      ante = k;
    }
  }
  if (ante < max / 2) {
    if (pen < max / 2 && ante >= pen / 2)
      max = pen;
    else
      max = ante;
  }
  if (max == 0)
    max = 1;
  for (j = 0; j < 20; j++) {
    for (k = 0; k < 64; k++)
      buffer[j][k] = ' ';
    buffer[j][64] = '\0';
  }
  for (j = 0; j < 64; j++) {
    k = histogram[j * 4 + 0] + histogram[j * 4 + 1] +
      histogram[j * 4 + 2] + histogram[j * 4 + 3];
    k = k * 20 / max;
    if (k >= 20)
      k = 20;
    while (k-- > 0)
      buffer[k][j] = '*';
  }
  pen = 0;
  for (j = 0; j < 64; j++) {
    k = histogram[j * 4 + 0] + histogram[j * 4 + 1] +
      histogram[j * 4 + 2] + histogram[j * 4 + 3];
    if (k > max) {
      sprintf(num, "(%d)", k);
      if (j < 32) {
	if (j < pen)
	  ante = 18;
	else
	  ante = 19;
	memcpy(buffer[ante] + j + 1, num, strlen(num));
	pen = j + strlen(num) + 1;
      } else {
	if (j - (int) strlen(num) < pen)
	  ante = 18;
	else
	  ante = 19;
	memcpy(buffer[ante] + j - strlen(num), num, strlen(num));
	pen = j;
      }
    }
  }
  STAT_OUT("");
  STAT_OUT(legend);
  STAT_OUT(tprintf("%6d |%s", max, buffer[19]));
  j = 19;
  while (j-- > 1)
    STAT_OUT(tprintf("       |%s", buffer[j]));
  STAT_OUT(tprintf("     0 |%s", buffer[0]));
  for (j = 0, k = 2; j < 64; j++, k += 4)
    buffer[0][j] = '-';
  STAT_OUT(tprintf("       +%s", buffer[0]));
  STAT_OUT(tprintf("        0%31s%32d", "|", 255));
}

#undef STAT_OUT


/*
 * Utility Routines - Migration
 */

static void
migrate_sort(void)
{
  int j, k;
  chunk_reference_t *t;

  for (j = 1; j < m_count; j++) {
    t = m_references[j];
    for (k = j; k--;) {
      if (m_references[k][0] < t[0])
	break;
      m_references[k + 1] = m_references[k];
    }
    m_references[k + 1] = t;
  }
}

/** Slide an allocated chunk over into a neighboring free space.
 * \param region the region of the free space.
 * \param offset the offset of the free space.
 * \param which the index (in the migration arrays) of the chunk to move.
 */
static void
migrate_slide(u_int_16 region, u_int_16 offset, int which)
{
  Region *rp = regions + region;
  u_int_16 o_len, len, next, other, prev, o_off, o_oth;

  debug_log("migrate_slide %d (%08x) to %04x%04x",
	    which, m_references[which][0], region, offset);

  bring_in_region(region);

  len = ChunkFullLen(region, offset);
  next = ChunkNextFree(region, offset);
  other = ChunkReferenceToOffset(m_references[which][0]);
  o_len = ChunkFullLen(region, other);

  o_off = offset;
  o_oth = other;
  if (other > offset) {
    memmove(ChunkPointer(region, offset), ChunkPointer(region, other), o_len);
#ifdef DEBUG_CHUNK_MIGRATE
    do_rawlog(LT_TRACE, "CHUNK: Sliding chunk %08x to %04x%04x",
	      m_references[which][0], region, offset);
#endif
    m_references[which][0] = ChunkReference(region, offset);
    other = offset + o_len;
  } else {
    prev = offset + len - o_len;
    memmove(ChunkPointer(region, prev), ChunkPointer(region, other), o_len);
#ifdef DEBUG_CHUNK_MIGRATE
    do_rawlog(LT_TRACE, "CHUNK: Sliding chunk %08x to %04x%04x",
	      m_references[which][0], region, prev);
#endif
    m_references[which][0] = ChunkReference(region, prev);
  }
  write_free_chunk(region, other, len, next);
  coalesce_frees(region, other);
  if (rp->in_memory->first_free == offset) {
    rp->in_memory->first_free = other;
  } else {
    prev = rp->in_memory->first_free;
    while (prev && ChunkNextFree(region, prev) != offset)
      prev = ChunkNextFree(region, prev);
    write_next_free(region, prev, other);
    coalesce_frees(region, prev);
  }

  stat_migrate_slide++;

#ifdef CHUNK_PARANOID
  if (!region_is_valid(region)) {
    do_rawlog(LT_TRACE, "Invalid region after migrate_slide!");
    do_rawlog(LT_TRACE, "Was moving %04x%04x to %04x%04x (became %08x)...",
	      region, o_oth, region, o_off, m_references[which][0]);
    do_rawlog(LT_TRACE, "Chunk length %04x into hole length %04x", o_len, len);
    debug_dump_region(region, tracelog_fp);
    dump_debug_log(tracelog_fp);
    mush_panic("Invalid region after migrate_slide!");
  }
#endif
}

/** Move an allocated chunk into a free hole.
 * \param region the region of the free space.
 * \param offset the offset of the free space.
 * \param align the alignment to use: 0 = easiest, 1 = left, 2 = right.
 * \param which the index (in the migration arrays) of the chunk to move.
 */
static void
migrate_move(u_int_16 region, u_int_16 offset, int which, int align)
{
  Region *rp = regions + region;
  u_int_16 s_reg, s_off, s_len, o_off, length;
  Region *srp;

  debug_log("migrate_move %d (%08x) to %04x%04x, alignment %d",
	    which, m_references[which][0], region, offset, align);

  s_reg = ChunkReferenceToRegion(m_references[which][0]);
  s_off = ChunkReferenceToOffset(m_references[which][0]);
  srp = regions + s_reg;

  bring_in_region(region);
  if (!srp->in_memory) {
    touch_cache_region(rp->in_memory);
    bring_in_region(s_reg);
    touch_cache_region(rp->in_memory);
  }

  s_len = ChunkFullLen(s_reg, s_off);
  length = ChunkFullLen(region, offset);

  if (s_reg == region && (s_off + s_len == offset || offset + length == s_off)) {
    migrate_slide(region, offset, which);
    return;
  }
#ifdef CHUNK_PARANOID
  if (!FitsInSpace(s_len, ChunkFullLen(region, offset))) {
    dump_debug_log(tracelog_fp);
    mush_panicf("Trying to migrate into too small a hole: %04x into %04x!",
		s_len, length);
  }
#endif

  o_off = offset;
  offset = split_hole(region, offset, s_len, align);
  memcpy(ChunkPointer(region, offset), ChunkPointer(s_reg, s_off), s_len);
#ifdef DEBUG_CHUNK_MIGRATE
  do_rawlog(LT_TRACE, "CHUNK: moving chunk %08x to %04x%04x",
	    m_references[which][0], region, offset);
#endif
  m_references[which][0] = ChunkReference(region, offset);
  rp->total_derefs += ChunkDerefs(region, offset);
  free_chunk(s_reg, s_off);

  stat_migrate_move++;

#ifdef CHUNK_PARANOID
  if (!region_is_valid(region)) {
    do_rawlog(LT_TRACE, "Invalid region after migrate_move!");
    do_rawlog(LT_TRACE, "Was moving %04x%04x to %04x%04x (became %04x%04x)...",
	      s_reg, s_off, region, o_off, region, offset);
    do_rawlog(LT_TRACE, "Chunk length %04x into hole length %04x, alignment %d",
	      s_len, length, align);
    debug_dump_region(region, tracelog_fp);
    mush_panic("Invalid region after migrate_move!");
  }
#endif
}

static void
migrate_region(u_int_16 region)
{
  chunk_reference_t high, low;
  int j, derefs;
  u_int_16 offset, length, best_region, best_offset;

  bring_in_region(region);

  high = ChunkReference(region, REGION_SIZE);
  low = ChunkReference(region, 0);

  for (j = 0; j < m_count; j++) {
    if (low < m_references[j][0] && m_references[j][0] < high) {
      offset = ChunkReferenceToOffset(m_references[j][0]);
      derefs = ChunkDerefs(region, offset);
      length = ChunkFullLen(region, offset);
      best_region = find_best_region(length, derefs, region);
      best_offset = find_best_offset(length, best_region, region, offset);
      if (best_offset)
	migrate_move(best_region, best_offset, j, 1);
      if (best_region != region)
	stat_migrate_away++;
    }
  }
  migrate_sort();
}



/*
 * Interface routines
 */
/** Allocate a chunk of storage.
 * \param data the data to be stored.
 * \param len the length of the data to be stored.
 * \param derefs the deref count to set on the chunk.
 * \return the chunk reference for retrieving (or deleting) the data.
 */
chunk_reference_t
chunk_create(unsigned char const *data, u_int_16 len, unsigned char derefs)
{
  u_int_16 full_len, region, offset;

  if (len < MIN_CHUNK_LEN || len > MAX_CHUNK_LEN)
    mush_panicf("Illegal chunk length requested: %d bytes", len);

  full_len = LenToFullLen(len);
  region = find_best_region(full_len, derefs, INVALID_REGION_ID);
  offset = find_best_offset(full_len, region, INVALID_REGION_ID, 0);
  if (!offset) {
    region = create_region();
#ifdef DEBUG_CHUNK_REGION_CREATE
    do_rawlog(LT_TRACE, "chunk_create created region %04x", region);
#endif
    offset = FIRST_CHUNK_OFFSET_IN_REGION;
  }
  offset = split_hole(region, offset, full_len, 0);
  write_used_chunk(region, offset, full_len, data, len, derefs);
  regions[region].total_derefs += derefs;
  touch_cache_region(regions[region].in_memory);
#ifdef CHUNK_PARANOID
  if (!region_is_valid(region))
    mush_panic("Invalid region after chunk_create!");
#endif
  stat_create++;
  return ChunkReference(region, offset);
}

/** Deallocate a chunk of storage.
 * \param reference the reference to the chunk to be freed.
 */
void
chunk_delete(chunk_reference_t reference)
{
  u_int_16 region, offset;
  region = ChunkReferenceToRegion(reference);
  offset = ChunkReferenceToOffset(reference);
  ASSERT(region < region_count);
  bring_in_region(region);
#ifdef CHUNK_PARANOID
  verify_used_chunk(region, offset);
#endif
  free_chunk(region, offset);
  touch_cache_region(regions[region].in_memory);
#ifdef CHUNK_PARANOID
  if (!region_is_valid(region))
    mush_panic("Invalid region after chunk_delete!");
#endif
  stat_delete++;
}

/** Fetch a chunk of data.
 * If the chunk is too large to fit in the supplied buffer, then
 * the buffer will be left untouched.  The length of the data is
 * returned regardless; this can be used to resize the buffer
 * (or just as information for further processing of the data).
 * \param reference the reference to the chunk to be fetched.
 * \param buffer the buffer to put the data into.
 * \param buffer_len the length of the buffer.
 * \return the length of the data.
 */
u_int_16
chunk_fetch(chunk_reference_t reference,
	    unsigned char *buffer, u_int_16 buffer_len)
{
  u_int_16 region, offset, len;
  region = ChunkReferenceToRegion(reference);
  offset = ChunkReferenceToOffset(reference);
  ASSERT(region < region_count);
  bring_in_region(region);
#ifdef CHUNK_PARANOID
  verify_used_chunk(region, offset);
#endif
  len = ChunkLen(region, offset);
  if (len <= buffer_len)
    memcpy(buffer, ChunkDataPtr(region, offset), len);
  touch_cache_region(regions[region].in_memory);
  stat_deref_count++;
  if (ChunkDerefs(region, offset) < CHUNK_DEREF_MAX) {
    ChunkDerefs(region, offset)++;
    regions[region].total_derefs++;
    if (ChunkDerefs(region, offset) == CHUNK_DEREF_MAX)
      stat_deref_maxxed++;
  }
  return len;
}

/** Get the length of a chunk.
 * This is equivalent to calling chunk_fetch(reference, NULL, 0).
 * It can be used to glean the proper size for a buffer to actually
 * retrieve the data, if you're being stingy.
 * \param reference the reference to the chunk to be queried.
 * \return the length of the data.
 */
u_int_16
chunk_len(chunk_reference_t reference)
{
  return chunk_fetch(reference, NULL, 0);
}

/** Get the deref count of a chunk.
 * This can be used to preserve the deref count across database saves
 * or similar save and restore operations.
 * \param reference the reference to the chunk to be queried.
 * \return the deref count for data.
 */
unsigned char
chunk_derefs(chunk_reference_t reference)
{
  u_int_16 region, offset;
  region = ChunkReferenceToRegion(reference);
  offset = ChunkReferenceToOffset(reference);
  ASSERT(region < region_count);
  bring_in_region(region);
#ifdef CHUNK_PARANOID
  verify_used_chunk(region, offset);
#endif
  return ChunkDerefs(region, offset);
}

/** Migrate allocated chunks around.
 * 
 * \param count the number of chunks to move.
 * \param references an array of pointers to chunk references,
 * which will be updated in place if necessary.
 */
void
chunk_migration(int count, chunk_reference_t ** references)
{
  int k, l;
  unsigned total;
  u_int_16 region, offset;

  debug_log("*** chunk_migration starts, count = %d", count);

  /* Before everything, see if we need a new period. */
  total = 0;
  for (region = 0; region < region_count; region++) {
    if (RegionDerefs(region) > (CHUNK_DEREF_MAX / 2))
      total++;
  }
  if (total > cached_region_count || total > region_count / 2)
    chunk_new_period();

  m_count = count;
  m_references = references;
  migrate_sort();

  /* Go through each of the regions. */
  for (region = 0; region < region_count; region++) {
    /* Make sure we have something to migrate, in the region. */
    for (k = 0; k < m_count; k++)
      if (ChunkReferenceToRegion(m_references[k][0]) == region)
	break;
    if (k >= m_count)
      continue;

    if (!regions[region].in_memory) {
      /* If not in memory, see if we've got an oddball. */
      while (k < m_count) {
	if (ChunkReferenceToRegion(m_references[k][0]) != region)
	  break;
	offset = ChunkReferenceToOffset(m_references[k][0]);
	for (l = 0; l < NUM_ODDBALLS; l++) {
	  if (regions[region].oddballs[l] == offset) {
	    /* Yup, have an oddball... that's worth bringing it in. */
	    bring_in_region(region);
	    goto do_migrate;
	  }
	}
	k++;
      }
    } else {
    do_migrate:
      /* It's in memory, so migrate it. */
      migrate_region(region);
    }
  }

  m_references = NULL;
  m_count = 0;

  debug_log("*** chunk_migration ends", count);
}

/** Get the number of paged regions.
 * Since the memory allocator cannot be reliably accessed from
 * multiple processes if any of the chunks have been swapped out
 * to disk, it's useful to check on the number of paged out regions
 * before doing any forking maneuvers.
 * \return the number of regions pages out.
 */
int
chunk_num_swapped(void)
{
  int count;
  u_int_16 region;
  count = 0;
  for (region = 0; region < region_count; region++)
    if (!regions[region].in_memory)
      count++;
  return count;
}

/** Initialize chunk subsystem.
 * Nothing to see here... just call it before using the subsystem.
 */
void
chunk_init(void)
{
  /* In any case, this assert should be in main code, not here */
  ASSERT(BUFFER_LEN <= MAX_LONG_CHUNK_LEN);

#ifdef WIN32
  swap_fd = CreateFile(CHUNK_SWAP_FILE, GENERIC_READ | GENERIC_WRITE,
		       0, NULL, CREATE_ALWAYS, FILE_FLAG_DELETE_ON_CLOSE, NULL);
  if (swap_fd == INVALID_HANDLE_VALUE)
    mush_panicf("Cannot open swap file: %d", GetLastError());
#else
  swap_fd = open(CHUNK_SWAP_FILE, O_RDWR | O_TRUNC | O_CREAT, 0600);
  if (swap_fd < 0)
    mush_panicf("Cannot open swap file: %s", strerror(errno));
#endif
  curr_period = 0;

  cached_region_count = 0;
  cache_head = NULL;
  cache_tail = NULL;

  region_count = 0;
  region_array_len = FIXME_INIT_REGION_LEN;
#ifdef DEBUG_CHUNK_MALLOC
  do_rawlog(LT_TRACE, "CHUNK: malloc()ing initial region array");
#endif
  regions = mush_malloc(region_array_len * sizeof(Region), "chunk region list");
  if (!regions)
    mush_panic("cannot malloc space for chunk region list");

/*
  command_add("@DEBUGCHUNK", CMD_T_ANY | CMD_T_GOD, 0, 0, 0,
              switchmask("ALL BRIEF FULL"), cmd_debugchunk);
*/
  do_rawlog(LT_TRACE, "CHUNK: chunk subsystem initialized");
}

/** Report statistics.
 * Display either the statistics summary or one of the histograms.
 * \param player the player to display it to, or NOTHING to log it.
 * \param which what type of information to display.
 */
void
chunk_stats(dbref player, enum chunk_stats_type which)
{
  switch (which) {
  case CSTATS_SUMMARY:
    chunk_statistics(player);
    break;
  case CSTATS_REGIONG:
    chunk_histogram(player, chunk_regionhist(),
		    "Chart number of regions (y) vs. references (x)");
    break;
  case CSTATS_PAGINGG:
    chunk_histogram(player, stat_paging_histogram,
		    "Chart pages in/out (y) vs. references (x)");
    break;
  case CSTATS_FREESPACEG:
    chunk_histogram(player, chunk_freehist(),
		    "Chart region free space (y) vs. references (x)");
    break;
  case CSTATS_REGION:
    chunk_region_statistics(player);
    break;
  case CSTATS_PAGING:
    chunk_page_stats(player);
    break;
  }
}

/** Start a new migration period.
 * This chops all the dereference counts in half.  Since this is called
 * from migration as needed, you probably shouldn't bother calling it
 * yourself.
 */
void
chunk_new_period(void)
{
  RegionHeader *rhp;
  Region *rp;
  u_int_16 region, offset;
  int shift;

#ifdef LOG_CHUNK_STATS
  /* Log stats */
  chunk_statistics(NOTHING);
#endif

  /* Reset period info */
  curr_period++;
  stat_deref_count = 0;
  stat_deref_maxxed = 0;
  stat_migrate_slide = 0;
  stat_migrate_move = 0;
  stat_migrate_away = 0;
  stat_create = 0;
  stat_delete = 0;

  /* make derefs current */
  for (rhp = cache_head; rhp; rhp = rhp->next) {
    region = rhp->region_id;
    if (region == INVALID_REGION_ID)
      continue;
    rp = regions + region;

    shift = curr_period - rp->period_last_touched;
    if (shift > 8) {
      rp->total_derefs = 0;
      for (offset = FIRST_CHUNK_OFFSET_IN_REGION;
	   offset < REGION_SIZE; offset += ChunkFullLen(region, offset)) {
	ChunkDerefs(region, offset) = 0;
      }
    } else {
      rp->total_derefs = 0;
      for (offset = FIRST_CHUNK_OFFSET_IN_REGION;
	   offset < REGION_SIZE; offset += ChunkFullLen(region, offset)) {
	if (ChunkIsFree(region, offset))
	  continue;
	ChunkDerefs(region, offset) >>= shift;
	rp->total_derefs += ChunkDerefs(region, offset);
      }
    }
    rp->period_last_touched = curr_period;
  }
}

#ifndef WIN32
/** Clone the chunkswap file for forking dumps.
 * \retval 0 if unable to clone the swap file
 * \retval 1 if swap file clone succeeded
 */
int
chunk_fork_file(void)
{
  unsigned int j;
  RegionHeader *rhp, *prev, *next;

  /* abort if already cloned */
  if (swap_fd_child >= 0)
    return 0;

  j = 0;
  for (;;) {
    sprintf(child_filename, "%s.%d", CHUNK_SWAP_FILE, j);
    swap_fd_child = open(child_filename, O_RDWR | O_EXCL | O_CREAT, 0600);
    if (swap_fd_child >= 0)
      break;
    if (j >= 10)
      return 0;
    j++;
  }

  rhp = find_available_cache_region();
  prev = rhp->prev;
  next = rhp->next;
  for (j = 0; j < region_count; j++) {
    if (regions[j].in_memory)
      continue;

    read_cache_region(swap_fd, rhp, j);
    write_cache_region(swap_fd_child, rhp, j);
  }
  rhp->region_id = INVALID_REGION_ID;
  rhp->prev = prev;
  rhp->next = next;

  return 1;
}

/** Assert that we're the parent after fork.
 */
void
chunk_fork_parent(void)
{
  if (swap_fd_child < 0)
    return;

  close(swap_fd_child);
  swap_fd_child = -1;
}

/** Assert that we're the child after fork.
 */
void
chunk_fork_child(void)
{
  if (swap_fd_child < 0)
    return;

  close(swap_fd);
  swap_fd = swap_fd_child;
  swap_fd_child = -1;
}

/** Assert that we're done with the cloned chunkswap file.
 */
void
chunk_fork_done(void)
{
  if (swap_fd_child < 0)
    close(swap_fd);
  else
    close(swap_fd_child);

  unlink(child_filename);
  swap_fd_child = -1;
}
#endif				/* !WIN32 */