muddy/
muddy/CVS/
muddy/area/
muddy/area/CVS/
muddy/clans/CVS/
muddy/classes/CVS/
muddy/doc/
muddy/doc/CVS/
muddy/etc/CVS/
muddy/etc/i3/
muddy/etc/i3/CVS/
muddy/imc/CVS/
muddy/lang/CVS/
muddy/licenses/CVS/
muddy/msgdb/CVS/
muddy/new/CVS/
muddy/notes/
muddy/player/
muddy/races/CVS/
muddy/religions/CVS/
muddy/src/CVS/
muddy/src/comm/CVS/
muddy/src/db/CVS/
muddy/src/intermud/
muddy/src/intermud/CVS/
muddy/src/irc/CVS/
muddy/src/olc/CVS/
/* $Id: mtwist.c,v 1.666 2004/09/20 10:50:19 shrike Exp $                              */    

/************************************************************************************
 *    Copyright 2004 Astrum Metaphora consortium                                    *
 *                                                                                  *
 *    Licensed under the Apache License, Version 2.0 (the "License");               *
 *    you may not use this file except in compliance with the License.              *
 *    You may obtain a copy of the License at                                       *
 *                                                                                  *
 *    http://www.apache.org/licenses/LICENSE-2.0                                    *
 *                                                                                  *
 *    Unless required by applicable law or agreed to in writing, software           *
 *    distributed under the License is distributed on an "AS IS" BASIS,             *
 *    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.      *
 *    See the License for the specific language governing permissions and           *
 *    limitations under the License.                                                *
 *                                                                                  *
 ************************************************************************************/

/************************************************************************************
 * C library functions for generating pseudorandom numbers using the
 * Mersenne Twist algorithm.  See M. Matsumoto and T. Nishimura,
 * "Mersenne Twister: A 623-Dimensionally Equidistributed Uniform
 * Pseudo-Random Number Generator", ACM Transactions on Modeling and
 * Computer Simulation, Vol. 8, No. 1, January 1998, pp 3--30.
 *
 * The Web page on the Mersenne Twist algorithm is at:
 *
 * http://www.math.keio.ac.jp/~matumoto/emt.html
 *
 * These functions were written by Geoffrey H. Kuenning, Claremont, CA.
 *
 * IMPORTANT NOTE: the Makefile must define two machine-specific
 * variables to get optimum features and performance:
 *
 *	MT_NO_INLINE	should be defined if the compiler doesn't support
 *			the "inline" keyword.
 *	MT_NO_LONGLONG	should be defined if the compiler doesn't support a
 *			"long long" type for 64-bit integers
 *	MT_MACHINE_BITS	must be either 32 or 64, reflecting the natural
 *			size of the processor registers.  If undefined, it
 *			will default to 32.
 *
 * The first two variables above are defined in an inverted sense
 * because I expect that most compilers will support inline and
 * long-long.  By inverting the sense, this common case will require
 * no special compiler flags.
 *
 * IMPORTANT NOTE: this software assumes that the inherent width of a
 * "long" is 32 bits.  If you are running on a machine that uses
 * 64-bit longs, some of the declarations and code will have to be
 * modified.
 *
 * This software is based on LGPL-ed code by Takuji Nishimura.  It has
 * also been heavily influenced by code written by Shawn Cokus, and
 * somewhat influenced by code written by Richard J. Wagner.  It is
 * therefore also distributed under the LGPL:
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Library General Public License
 * as published by the Free Software Foundation; either version 2 of
 * the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Library General Public License for more details.  You should have
 * received a copy of the GNU Library General Public License along
 * with this library; if not, write to the Free Foundation, Inc., 59
 * Temple Place, Suite 330, Boston, MA 02111-1307 USA
 ************************************************************************************/
 
#ifdef _WIN32
#undef WIN32
#define WIN32
#endif /* _WIN32 */

#include <stdio.h>
#include <stdlib.h>
#ifdef WIN32
#include <sys/timeb.h>
#else /* WIN32 */
#include <sys/time.h>
#endif /* WIN32 */

/*
 * Before we include the Mersenne Twist header file, we must do a bit
 * of magic setup.  The code for actual random-number generation
 * resides in that file rather than here.  We need to arrange for the
 * code to be compiled into this .o file, either because inlines
 * aren't supported or because somebody might want to take a pointer
 * to a function.  We do so with a couple of careful #defines.
 */
#undef MT_NO_INLINE			/* Ask for code to be compiled */
#define MT_INLINE			/* Disable the inline keyword */
#define MT_EXTERN			/* Generate real code for functions */

#include "mtwist.h"

/*
 * Table of contents:
 */
void			mts_mark_initialized(mt_state* state);
					/* Mark a PRNG state as initialized */
void			mts_seed32(mt_state* state, unsigned long seed);
					/* Set random seed for any generator */
void			mts_seed32new(mt_state* state, unsigned long seed);
					/* Set random seed for any generator */
void			mts_seedfull(mt_state* state,
			  unsigned long seeds[MT_STATE_SIZE]);
					/* Set complicated seed for any gen. */
void			mts_seed(mt_state* state);
					/* Choose seed from random input */
void			mts_goodseed(mt_state* state);
					/* Choose seed from more random */
					/* ..input than mts_seed */
static void		mts_devseed(mt_state* state, char* seed_dev);
					/* Choose seed from a device */
void			mts_bestseed(mt_state* state);
					/* Choose seed from extremely random */
					/* ..input (can be *very* slow) */
void			mts_refresh(mt_state* state);
					/* Generate 624 more random values */
int			mts_savestate(FILE* statefile, mt_state* state);
					/* Save state to a file (ASCII) */
int			mts_loadstate(FILE* statefile, mt_state* state);
					/* Load state from a file (ASCII) */

void			mt_seed32(unsigned long seed);
					/* Set random seed for default gen. */
void			mt_seed32new(unsigned long seed);
					/* Set random seed for default gen. */
void			mt_seedfull(unsigned long seeds[MT_STATE_SIZE]);
					/* Set complicated seed for default */
void			mt_seed(void);	/* Choose seed from random input */
void			mt_goodseed(void);
					/* Choose seed from more random */
					/* ..input than mts_seed */
void			mt_bestseed(void);
					/* Choose seed from extremely random */
					/* ..input (can be *very* slow) */
extern mt_state*	mt_getstate(void);
					/* Get current state of default */
					/* ..generator */
int			mt_savestate(FILE* statefile);
					/* Save state to a file (ASCII) */
int			mt_loadstate(FILE* statefile);
					/* Load state from a file (ASCII) */


/*
 * The following values are fundamental parameters of the algorithm.
 * With the exception of the two masks, all of them were found
 * experimentally using methods described in Matsumoto and Nishimura's
 * paper.  They are exceedingly magic; don't change them.
 */

/* MT_STATE_SIZE is defined in the header file. */
#define RECURRENCE_OFFSET 397		/* Offset into state space for the */
					/* ..recurrence relation.  The */
					/* ..recurrence mashes together two */
					/* ..values that are separated by */
					/* ..this offset in the state */
					/* ..space. */
#define MATRIX_A	0x9908b0df	/* Constant vector A for the */
					/* ..recurrence relation.  The */
					/* ..mashed-together value is */
					/* ..multiplied by this vector to */
					/* ..get a new value that will be */
					/* ..stored into the state space. */

/*
 * Width of a long.  Don't change this even if your longs are 64 bits.
 */
#define BIT_WIDTH	32		/* Work with 32-bit words */

/*
 * Masks for extracting the bits to be mashed together.  The widths of these
 * masks are also fundamental parameters of the algorithm, determined
 * experimentally -- but of course the masks themselves are simply bit
 * selectors.
 */
#define UPPER_MASK	0x80000000	/* Most significant w-r bits */
#define LOWER_MASK	0x7fffffff	/* Least significant r bits */

/*
 * Macro to simplify code in the generation loop.  This function
 * combines the top bit of x with the bottom 31 bits of y.
 */
#define COMBINE_BITS(x, y) \
			(((x) & UPPER_MASK) | ((y) & LOWER_MASK))

/*
 * Another generation-simplification macro.  This one does the magic
 * scrambling function.
 */
#define MATRIX_MULTIPLY(original, new) \
			((original) ^ ((new) >> 1) \
			  ^ matrix_decider[(new) & 0x1])

/*
 * Parameters of Knuth's PRNG (Line 25, Table 1, p. 102 of "The Art of
 * Computer Programming, Vol. 2, 2nd ed, 1981).
 */
#define KNUTH_MULTIPLIER_OLD \
			69069

/*
 * Parameters of Knuth's PRNG (p. 106 of "The Art of Computer
 * Programming, Vol. 2, 3rd ed).
 */
#define KNUTH_MULTIPLIER_NEW \
			1812433253ul
#define KNUTH_SHIFT	30		// Even on a 64-bit machine!

/*
 * Default 32-bit random seed if mts_seed32 wasn't called
 */
#define DEFAULT_SEED32_OLD \
			4357
#define DEFAULT_SEED32_NEW \
			5489ul

/*
 * Where to get random numbers
 */
#define DEVRANDOM	"/dev/random"
#define DEVURANDOM	"/dev/urandom"

/*
 * Many applications need only a single PRNG, so it's a nuisance to have to
 * specify a state.  For those applications, we will provide a default
 * state, and functions to use it.
 */
mt_state		mt_default_state;

/*
 * To generate double-precision random numbers, we need to divide the result
 * of mts_lrand or mts_llrand by 2^32 or 2^64, respectively.  The quickest
 * way to do that on most machines is to multiply by the inverses of those
 * numbers.  However, I don't trust the compiler to correctly convert the
 * corresponding decimal constant.  So we will compute the correct number at
 * run time as part of initialization, which will produce a nice exact
 * result.
 */
double			mt_32_to_double;
					/* Multiplier to convert long to dbl */
double			mt_64_to_double;
					/* Mult'r to cvt long long to dbl */

/*
 * In the recurrence relation, the new value is XORed with MATRIX_A only if
 * the lower bit is nonzero.  Since most modern machines don't like to
 * branch, it's vastly faster to handle this decision by indexing into an
 * array.  The chosen bit is used as an index into the following vector,
 * which produces either zero or MATRIX_A and thus the desired effect.
 */
static unsigned long	matrix_decider[2] =
			  {0x0, MATRIX_A};

/*
 * Mark a PRNG's state as having been initialized.  This is the only
 * way to set that field nonzero; that way we can be sure that the
 * constants are set properly before the PRNG is used.
 *
 * As a side effect, set up some constants that the PRNG assumes are
 * valid.  These are calculated at initialization time rather than
 * being written as decimal constants because I frankly don't trust
 * the compiler's ASCII conversion routines.
 */
void mts_mark_initialized(
    mt_state*		state)		/* State vector to mark initialized */
    {
    int			i;		/* Power of 2 being calculated */

    /*
     * Figure out the proper multiplier for long-to-double conversion.  We
     * don't worry too much about efficiency, since the assumption is that
     * initialization is vastly rarer than generation of random numbers.
     */
    mt_32_to_double = 1.0;
    for (i = 0;  i < BIT_WIDTH;  i++)
	mt_32_to_double /= 2.0;
    mt_64_to_double = mt_32_to_double;
    for (i = 0;  i < BIT_WIDTH;  i++)
	mt_64_to_double /= 2.0;

    state->initialized = 1;
    }

/*
 * Initialize a Mersenne Twist PRNG from a 32-bit seed.
 *
 * According to Matsumoto and Nishimura's paper, the seed array needs to be
 * filled with nonzero values.  (My own interpretation is that there needs
 * to be at least one nonzero value).  They suggest using Knuth's PRNG from
 * Line 25, Table 1, p.102, "The Art of Computer Programming," Vol. 2 (2nd
 * ed.), 1981.  I find that rather odd, since that particular PRNG is
 * sensitive to having an initial seed of zero (there are many other PRNGs
 * out there that have an additive component, so that a seed of zero does
 * not generate a repeating-zero sequence).  However, one thing I learned
 * from reading Knuth is that you shouldn't second-guess mathematicians
 * about PRNGs.  Also, by following M & N's approach, we will be compatible
 * with other implementations.  So I'm going to stick with their version,
 * with the single addition that a zero seed will be changed to their
 * default seed.
 */
void mts_seed32(
    mt_state*		state,		/* State vector to initialize */
    unsigned long	seed)		/* 32-bit seed to start from */
    {
    int			i;		/* Loop index */

    if (seed == 0)
	seed = DEFAULT_SEED32_OLD;

    /*
     * Fill the state vector using Knuth's PRNG.  Be sure to mask down
     * to 32 bits in case we're running on a machine with 64-bit
     * longs.
     */
    state->statevec[MT_STATE_SIZE - 1] = seed & 0xffffffff;
    for (i = MT_STATE_SIZE - 2;  i >= 0;  i--)
        state->statevec[i] =
          (KNUTH_MULTIPLIER_OLD * state->statevec[i + 1]) & 0xffffffff;

    state->stateptr = MT_STATE_SIZE;
    mts_mark_initialized(state);

    /*
     * Matsumoto and Nishimura's implementation refreshes the PRNG
     * immediately after running the Knuth algorithm.  This is
     * probably a good thing, since Knuth's PRNG doesn't generate very
     * good numbers.
     */
    mts_refresh(state);
    }

/*
 * Initialize a Mersenne Twist PRNG from a 32-bit seed, using
 * Matsumoto and Nishimura's newer reference implementation (Jan. 9,
 * 2002).
 */
void mts_seed32new(
    mt_state*		state,		/* State vector to initialize */
    unsigned long	seed)		/* 32-bit seed to start from */
    {
    int			i;		/* Loop index */
    unsigned long	nextval;	/* Next value being calculated */

    /*
     * Fill the state vector using Knuth's PRNG.  Be sure to mask down
     * to 32 bits in case we're running on a machine with 64-bit
     * longs.
     */
    state->statevec[MT_STATE_SIZE - 1] = seed & 0xffffffffUL;
    for (i = MT_STATE_SIZE - 2;  i >= 0;  i--)
	{
	nextval = state->statevec[i + 1] >> KNUTH_SHIFT;
	nextval ^= state->statevec[i + 1];
	nextval *= KNUTH_MULTIPLIER_NEW;
	nextval += (MT_STATE_SIZE - 1) - i;
	state->statevec[i] = nextval & 0xffffffffUL;
	}

    state->stateptr = MT_STATE_SIZE;
    mts_mark_initialized(state);

    /*
     * Matsumoto and Nishimura's implementation refreshes the PRNG
     * immediately after running the Knuth algorithm.  This is
     * probably a good thing, since Knuth's PRNG doesn't generate very
     * good numbers.
     */
    mts_refresh(state);
    }

/*
 * Initialize a Mersenne Twist RNG from a 624-long seed.
 *
 * The 32-bit seeding routine given by Matsumoto and Nishimura has the
 * drawback that there are only 2^32 different PRNG sequences that can be
 * generated by calling that function.  This function solves that problem by
 * allowing a full 624*32-bit state to be given.  (Note that 31 bits of the
 * given state are ignored; see the paper for details.)
 *
 * Since an all-zero state would cause the PRNG to cycle, we detect
 * that case and abort the program (silently, since there is no
 * portable way to produce a message in both C and C++ environments).
 * An alternative would be to artificially force the state to some
 * known nonzero value.  However, I feel that if the user is providing
 * a full state, it's a bug to provide all zeros and we we shouldn't
 * conceal the bug by generating apparently correct output.
 */
void mts_seedfull(
    mt_state*		state,		/* State vector to initialize */
    unsigned long	seeds[MT_STATE_SIZE])
					/* Seed array to start from */
    {
    int			had_nz = 0;	/* NZ if at least one NZ seen */
    int			i;		/* Loop index */

    for (i = 0;  i < MT_STATE_SIZE;  i++)
        {
        if (seeds[i] != 0)
	    had_nz = 1;
        state->statevec[MT_STATE_SIZE - i - 1] = seeds[i];
	}

    if (!had_nz)
	{
	/*
	 * It would be nice to abort with a message.  Unfortunately, fprintf
	 * isn't compatible with all implementations of C++.  In the
	 * interest of C++ compatibility, therefore, we will simply abort
	 * silently.  It will unfortunately be up to a programmer to run
	 * under a debugger (or examine the core dump) to discover the cause
	 * of the abort.
	 */
	abort();
	}

    state->stateptr = MT_STATE_SIZE;
    mts_mark_initialized(state);
    }

/*
 * Choose a seed based on some moderately random input.  Prefers
 * /dev/urandom as a source of random numbers, but uses the lower bits
 * of the current time if /dev/urandom is not available.  In any case,
 * only provides 32 bits of entropy.
 */
void mts_seed(
    mt_state*		state)		/* State vector to seed */
    {
    mts_devseed(state, DEVURANDOM);
    }

/*
 * Choose a seed based on some fairly random input.  Prefers
 * /dev/random as a source of random numbers, but uses the lower bits
 * of the current time if /dev/random is not available.  In any case,
 * only provides 32 bits of entropy.
 */
void mts_goodseed(
    mt_state*		state)		/* State vector to seed */
    {
    mts_devseed(state, DEVRANDOM);
    }

/*
 * Choose a seed based on a random-number device given by the caller.
 * If that device can't be opened, use the lower 32 bits from the
 * current time.
 */
static void mts_devseed(
    mt_state*		state,		/* State vector to seed */
    char*		seed_dev)	/* Device to seed from */
    {
    int			bytesread;	/* Byte count read from device */
    int			nextbyte;	/* Index of next byte to read */
    FILE*		ranfile;	/* Access to device */
    union
	{
	char		ranbuffer[sizeof (unsigned long)];
					/* Space for reading random int */
	unsigned long	randomvalue;	/* Random value for initialization */
	}
			randomunion;	/* Union for reading random int */
#ifdef WIN32
    struct _timeb	tb;		/* Time of day (Windows mode) */
#else /* WIN32 */
    struct timeval	tv;		/* Time of day */
    struct timezone	tz;		/* Dummy for gettimeofday */
#endif /* WIN32 */

    ranfile = fopen(seed_dev, "rb");
    if (ranfile != NULL)
	{
	for (nextbyte = 0;
	  nextbyte < (int)sizeof randomunion.ranbuffer;
	  nextbyte += bytesread)
	    {
	    bytesread = fread(&randomunion.ranbuffer[nextbyte], 1,
	      sizeof randomunion.ranbuffer - nextbyte, ranfile);
	    if (bytesread == 0)
		break;
	    }
	fclose(ranfile);
	if (nextbyte == sizeof randomunion.ranbuffer)
	    {
	    mts_seed32new(state, randomunion.randomvalue);
	    return;
	    }
	}

    /*
     * The device isn't available.  Use the time.  We will
     * assume that the time of day is accurate to microsecond
     * resolution, which is true on most modern machines.
     */
#ifdef WIN32
    (void) _ftime (&tb);
#else /* WIN32 */
    (void) gettimeofday (&tv, &tz);
#endif /* WIN32 */

    /*
     * We just let the excess part of the seconds field overflow
     */
#ifdef WIN32
    randomunion.randomvalue = tb.time * 1000 + tb.millitm;
#else /* WIN32 */
    randomunion.randomvalue = tv.tv_sec * 1000000 + tv.tv_usec;
#endif /* WIN32 */
    mts_seed32new(state, randomunion.randomvalue);
    }

/*
 * Choose a seed based on the best random input available.  Prefers
 * /dev/random as a source of random numbers, and reads the entire
 * 624-long state from that device.  Because of this approach, the
 * function can take a long time (in real time) to complete, since
 * /dev/random may have to wait quite a while before it can provide
 * that much randomness.  If /dev/random is unavailable, falls back to
 * calling mts_goodseed.
 */
void mts_bestseed(
    mt_state*		state)		/* State vector to seed */
    {
    int			bytesread;	/* Byte count read from device */
    int			nextbyte;	/* Index of next byte to read */
    FILE*		ranfile;	/* Access to device */

    ranfile = fopen("/dev/random", "rb");
    if (ranfile == NULL)
	{
	mts_goodseed(state);
	return;
	}

    for (nextbyte = 0;
      nextbyte < (int)sizeof state->statevec;
      nextbyte += bytesread)
	{
	bytesread = fread((char *)&state->statevec + nextbyte, 1,
	  sizeof state->statevec - nextbyte, ranfile);
	if (bytesread == 0)
	    {
	    /*
	     * Something went wrong.  Fall back to time-based seeding.
	     */
	    fclose(ranfile);
	    mts_goodseed(state);
	    return;
	    }
	}
    }

/*
 * Generate 624 more random values.  This function is called when the
 * state vector has been exhausted.  It generates another batch of
 * pseudo-random values.  The performance of this function is critical
 * to the performance of the Mersenne Twist PRNG, so it has been
 * highly optimized.
 */
void mts_refresh(
    register mt_state*	state)		/* State for the PRNG */
    {
    register int	i;		/* Index into the state */
    register unsigned long*
			state_ptr;	/* Next place to get from state */
    register unsigned long
			value1;		/* Scratch val picked up from state */
    register unsigned long
			value2;		/* Scratch val picked up from state */

    /*
     * Start by making sure a random seed has been set.  If not, set
     * one.
     */
    if (!state->initialized)
	{
	mts_seed32(state, DEFAULT_SEED32_OLD);
	return;				/* Seed32 calls us recursively */
	}

    /*
     * Now generate the new pseudorandom values by applying the
     * recurrence relation.  We use two loops and a final
     * 2-statement sequence so that we can handle the wraparound
     * explicitly, rather than having to use the relatively slow
     * modulus operator.
     *
     * In essence, the recurrence relation concatenates bits
     * chosen from the current random value (last time around)
     * with the immediately preceding one.  Then it
     * matrix-multiplies the concatenated bits with a value
     * RECURRENCE_OFFSET away and a constant matrix.  The matrix
     * multiplication reduces to a shift and two XORs.
     *
     * Some comments on the optimizations are in order:
     *
     * Strictly speaking, none of the optimizations should be
     * necessary.  All could conceivably be done by a really good
     * compiler.  However, the compilers available to me aren't quite
     * smart enough, so hand optimization needs to be done.
     *
     * Shawn Cokus was the first to achieve a major speedup.  In the
     * original code, the first value given to COMBINE_BITS (in my
     * characterization) was re-fetched from the state array, rather
     * than being carried in a scratch variable.  Cokus noticed that
     * the first argument to COMBINE_BITS could be saved in a register
     * in the previous loop iteration, getting rid of the need for an
     * expensive memory reference.
     *
     * Cokus also switched to using pointers to access the state
     * array and broke the original loop into two so that he could
     * avoid using the expensive modulus operator.  Cokus used three
     * pointers; Richard J. Wagner noticed that the offsets between
     * the three were constant, so that they could be collapsed into a
     * single pointer and constant-offset accesses.  This is clearly
     * faster on x86 architectures, and is the same cost on RISC
     * machines.  A secondary benefit is that Cokus' version was
     * register-starved on the x86, while Wagner's version was not.
     *
     * I made several smaller improvements to these observations.
     * First, I reversed the contents of the state vector.  In the
     * current version of the code, this change doesn't directly
     * affect the performance of the refresh loop, but it has the nice
     * side benefit that an all-zero state structure represents an
     * uninitialized generator.  It also slightly speeds up the
     * random-number routines, since they can compare the state
     * pointer against zero instead of against a constant (this makes
     * the biggest difference on RISC machines).
     *
     * Second, I returned to Matsumoto and Nishimura's original
     * technique of using a lookup table to decide whether to xor the
     * constant vector A (MATRIX_A in this code) with the newly
     * computed value.  Cokus and Wagner had used the ?: operator,
     * which requires a test and branch.  Modern machines don't like
     * branches, so the table lookup is faster.
     *
     * Third, in the Cokus and Wagner versions the loop ends with a
     * statement similar to "value1 = value2", which is necessary to
     * carry the fetched value into the next loop iteration.  I
     * recognized that if the loop were unrolled so that it generates
     * two values per iteration, a bit of variable renaming would get
     * rid of that assignment.  A nice side effect is that the
     * overhead of loop control becomes only half as large.
     *
     * It is possible to improve the code's performance somewhat
     * further.  In particular, since the second loop's loop count
     * factors into 2*2*3*3*11, it could be unrolled yet further.
     * That's easy to do, too: just change the "/ 2" into a division
     * by whatever factor you choose, and then use cut-and-paste to
     * duplicate the code in the body.  To remove a few more cycles,
     * fix the code to decrement state_ptr by the unrolling factor, and
     * adjust the various offsets appropriately.  However, the payoff
     * will be small.  At the moment, the x86 version of the loop is
     * 25 instructions, of which 3 are involved in loop control
     * (including the decrementing of state_ptr).  Further unrolling by
     * a factor of 2 would thus produce only about a 6% speedup.
     *
     * The logical extension of the unrolling
     * approach would be to remove the loops and create 624
     * appropriate copies of the body.  However, I think that doing
     * the latter is a bit excessive!
     *
     * I suspect that a superior optimization would be to simplify the
     * mathematical operations involved in the recurrence relation.
     * However, I have no idea whether such a simplification is
     * feasible.
     */
    state_ptr = &state->statevec[MT_STATE_SIZE - 1];
    value1 = *state_ptr;
    for (i = (MT_STATE_SIZE - RECURRENCE_OFFSET) / 2;  --i >= 0;  )
	{
	state_ptr -= 2;
	value2 = state_ptr[1];
	value1 = COMBINE_BITS(value1, value2);
	state_ptr[2] =
	  MATRIX_MULTIPLY(state_ptr[-RECURRENCE_OFFSET + 2], value1);
	value1 = state_ptr[0];
	value2 = COMBINE_BITS(value2, value1);
	state_ptr[1] =
	  MATRIX_MULTIPLY(state_ptr[-RECURRENCE_OFFSET + 1], value2);
	}
    value2 = *--state_ptr;
    value1 = COMBINE_BITS(value1, value2);
    state_ptr[1] =
      MATRIX_MULTIPLY(state_ptr[-RECURRENCE_OFFSET + 1], value1);

    for (i = (RECURRENCE_OFFSET - 1) / 2;  --i >= 0;  )
	{
	state_ptr -= 2;
	value1 = state_ptr[1];
	value2 = COMBINE_BITS(value2, value1);
	state_ptr[2] =
	  MATRIX_MULTIPLY(state_ptr[MT_STATE_SIZE - RECURRENCE_OFFSET + 2],
	    value2);
	value2 = state_ptr[0];
	value1 = COMBINE_BITS(value1, value2);
	state_ptr[1] =
	  MATRIX_MULTIPLY(state_ptr[MT_STATE_SIZE - RECURRENCE_OFFSET + 1],
	    value1);
	}

    /*
     * The final entry in the table requires the "previous" value
     * to be gotten from the other end of the state vector, so it
     * must be handled specially.
     */
    value1 = COMBINE_BITS(value2, state->statevec[MT_STATE_SIZE - 1]);
    *state_ptr =
      MATRIX_MULTIPLY(state_ptr[MT_STATE_SIZE - RECURRENCE_OFFSET], value1);

    /*
     * Now that refresh is complete, reset the state pointer to allow more
     * pseudorandom values to be fetched from the state array.
     */
    state->stateptr = MT_STATE_SIZE;
    }

/*
 * Save state to a file.  The save format is compatible with Richard
 * J. Wagner's format, although the details are different.  Returns NZ
 * if the save succeeded.  Produces one very long line containing 625
 * numbers.
 */
int mts_savestate(
    FILE*		statefile,	/* File to save to */
    mt_state*		state)		/* State to be saved */
    {
    int			i;		/* Next word to save */

    if (!state->initialized)
	mts_seed32(state, DEFAULT_SEED32_OLD);

    for (i = MT_STATE_SIZE;  --i >= 0;  )
	{
	if (fprintf(statefile, "%lu ", state->statevec[i]) < 0)
	    return 0;
	}

    if (fprintf(statefile, "%d\n", state->stateptr) < 0)
	return 0;

    return 1;
    }

/*
 * Load state from a file.  Returns NZ if the load succeeded.
 */
int mts_loadstate(
    FILE*		statefile,	/* File to load from */
    mt_state*		state)		/* State to be loaded */
    {
    int			i;		/* Next word to load */

    /*
     * Set the state to "uninitialized" in case the load fails.
     */
    state->initialized = state->stateptr = 0;

    for (i = MT_STATE_SIZE;  --i >= 0;  )
	{
	if (fscanf(statefile, "%lu", &state->statevec[i]) != 1)
	    return 0;
	}

    if (fscanf(statefile, "%d", &state->stateptr) != 1)
	return 0;

    /*
     * The only validity checking we can do is to insist that the
     * state pointer be valid.
     */
    if (state->stateptr < 0  ||  state->stateptr > MT_STATE_SIZE)
	{
	state->stateptr = 0;
	return 0;
	}

    mts_mark_initialized(state);

    return 1;
    }

/*
 * Initialize the default Mersenne Twist PRNG from a 32-bit seed.
 *
 * See mts_seed32 for full commentary.
 */
void mt_seed32(
    unsigned long	seed)		/* 32-bit seed to start from */
    {
    mts_seed32(&mt_default_state, seed);
    }

/*
 * Initialize the default Mersenne Twist PRNG from a 32-bit seed.
 *
 * See mts_seed32new for full commentary.
 */
void mt_seed32new(
    unsigned long	seed)		/* 32-bit seed to start from */
    {
    mts_seed32new(&mt_default_state, seed);
    }

/*
 * Initialize a Mersenne Twist RNG from a 624-long seed.
 *
 * See mts_seedfull for full commentary.
 */
void mt_seedfull(
    unsigned long	seeds[MT_STATE_SIZE])
    {
    mts_seedfull(&mt_default_state, seeds);
    }

/*
 * Initialize the PRNG from random input.  See mts_seed.
 */
void mt_seed()
    {
    mts_seed(&mt_default_state);
    }

/*
 * Initialize the PRNG from random input.  See mts_goodseed.
 */
void mt_goodseed()
    {
    mts_goodseed(&mt_default_state);
    }

/*
 * Initialize the PRNG from random input.  See mts_bestseed.
 */
void mt_bestseed()
    {
    mts_bestseed(&mt_default_state);
    }

/*
 * Return a pointer to the current state of the PRNG.  The purpose of
 * this function is to allow the state to be saved for later
 * restoration.  The state should not be modified; instead, it should
 * be reused later as a parameter to one of the mts_xxx functions.
 */
extern mt_state* mt_getstate()
    {
    return &mt_default_state;
    }

/*
 * Save state to a file.  The save format is compatible with Richard
 * J. Wagner's format, although the details are different.
 */
int mt_savestate(
    FILE*		statefile)	/* File to save to */
    {
    return mts_savestate(statefile, &mt_default_state);
    }

/*
 * Load state from a file.
 */
int mt_loadstate(
    FILE*		statefile)	/* File to load from */
    {
    return mts_loadstate(statefile, &mt_default_state);
    }