pennmush/
pennmush/game/
pennmush/game/data/
README
pennmush/game/log/
README
pennmush/game/save/
README
pennmush/game/txt/
pennmush/game/txt/evt/
pennmush/game/txt/hlp/
pennmush/game/txt/nws/
pennmush/hdrs/
pennmush/hints/
pennmush/os2/
pennmush/po/
Makefile
pennmush/src/
pennmush/test/
pennmush/utils/
pennmush/win32/
pennmush/win32/msvc.net/
pennmush/win32/msvc6/
/**
 * \file wild.c
 *
 * \brief Wildcard matching routings for PennMUSH
 *
 * Written by T. Alexander Popiel, 24 June 1993
 * Last modified by Javelin, 2002-2003
 *
 * Thanks go to Andrew Molitor for debugging
 * Thanks also go to Rich $alz for code to benchmark against
 *
 * Copyright (c) 1993,2000 by T. Alexander Popiel
 * This code is available under the terms of the GPL,
 * see http://www.gnu.org/copyleft/gpl.html for details.
 *
 * This code is included in PennMUSH under the PennMUSH
 * license by special dispensation from the author,
 * T. Alexander Popiel.  If you wish to include this
 * code in other packages, but find the GPL too onerous,
 * then please feel free to contact the author at
 * popiel@wolfskeep.com to work out agreeable terms.
 */

#include "config.h"
#include <ctype.h>
#include <string.h>
#include <stdlib.h>

#include "copyrite.h"
#include "conf.h"
#include "case.h"
#include "externs.h"
#include "mymalloc.h"
#include "parse.h"
#include "pcre.h"
#include "confmagic.h"

/** Force a char to be lowercase */
#define FIXCASE(a) (DOWNCASE(a))
/** Check for equality of characters, maybe case-sensitive */
#define EQUAL(cs,a,b) ((cs) ? (a == b) : (FIXCASE(a) == FIXCASE(b)))
/** Check for inequality of characters, maybe case-sensitive */
#define NOTEQUAL(cs,a,b) ((cs) ? (a != b) : (FIXCASE(a) != FIXCASE(b)))
/** Maximum number of wildcarded arguments */
#define NUMARGS (10)

const unsigned char *tables = NULL;  /** Pointer to character tables */

static char wspace[3 * BUFFER_LEN + NUMARGS];	/* argument return buffer */
						/* big to match tprintf */

static int wild1
  (const char *RESTRICT tstr, const char *RESTRICT dstr, int arg,
   char *RESTRICT wbuf, int cs);
static int wild(const char *RESTRICT s, const char *RESTRICT d, int p, int cs);
static int check_literals(const char *RESTRICT tstr, const char *RESTRICT dstr,
			  int cs);
static char *strip_backslashes(const char *str);

/** Do a wildcard match, without remembering the wild data.
 *
 * This routine will cause crashes if fed NULLs instead of strings.
 *
 * \param tstr pattern to match against.
 * \param dstr string to check.
 * \retval 1 dstr matches the tstr pattern.
 * \retval 0 dstr does not match the tstr pattern.
 */
int
quick_wild(const char *RESTRICT tstr, const char *RESTRICT dstr)
{
  /* quick_wild_new does the real work, but before we call it, 
   * we do some sanity checking. 
   */
  if (!check_literals(tstr, dstr, 0))
    return 0;
  return quick_wild_new(tstr, dstr, 0);
}

/** Do a wildcard match, possibly case-sensitive, without memory.
 *
 * This probably crashes if fed NULLs instead of strings, too.
 *
 * \param tstr pattern to match against.
 * \param dstr string to check.
 * \param cs if 1, case-sensitive; if 0, case-insensitive.
 * \retval 1 dstr matches the tstr pattern.
 * \retval 0 dstr does not match the tstr pattern.
 */
int
quick_wild_new(const char *RESTRICT tstr, const char *RESTRICT dstr, int cs)
{
  while (*tstr != '*') {
    switch (*tstr) {
    case '?':
      /* Single character match.  Return false if at
       * end of data.
       */
      if (!*dstr)
	return 0;
      break;
    case '\\':
      /* Escape character.  Move up, and force literal
       * match of next character.
       */
      tstr++;
      /* FALL THROUGH */
    default:
      /* Literal character.  Check for a match.
       * If matching end of data, return true.
       */
      if (NOTEQUAL(cs, *dstr, *tstr))
	return 0;
      if (!*dstr)
	return 1;
    }
    tstr++;
    dstr++;
  }

  /* Skip over '*'. */
  tstr++;

  /* Return true on trailing '*'. */
  if (!*tstr)
    return 1;

  /* Skip over wildcards. */
  while ((*tstr == '?') || (*tstr == '*')) {
    if (*tstr == '?') {
      if (!*dstr)
	return 0;
      dstr++;
    }
    tstr++;
  }

  /* Skip over a backslash in the pattern string if it is there. */
  if (*tstr == '\\')
    tstr++;

  /* Return true on trailing '*'. */
  if (!*tstr)
    return 1;

  /* Scan for possible matches. */
  while (*dstr) {
    if (EQUAL(cs, *dstr, *tstr) && quick_wild_new(tstr + 1, dstr + 1, cs))
      return 1;
    dstr++;
  }
  return 0;
}

/** Do an attribute name wildcard match.
 *
 * This probably crashes if fed NULLs instead of strings, too.
 * The special thing about this one is that ` doesn't match normal
 * wildcards; you have to use ** to match embedded `.  Also, patterns
 * ending in ` are treated as patterns ending in `*, and empty patterns
 * are treated as *.
 *
 * \param tstr pattern to match against.
 * \param dstr string to check.
 * \retval 1 dstr matches the tstr pattern.
 * \retval 0 dstr does not match the tstr pattern.
 */
int
atr_wild(const char *RESTRICT tstr, const char *RESTRICT dstr)
{
  int starcount;

  if (!*tstr)
    return !strchr(dstr, '`');

  while (*tstr != '*') {
    switch (*tstr) {
    case '?':
      /* Single character match.  Return false if at
       * end of data.
       */
      if (!*dstr || *dstr == '`')
	return 0;
      break;
    case '`':
      /* Delimiter match.  Special handling if at end of pattern. */
      if (*dstr != '`')
	return 0;
      if (!tstr[1])
	return !strchr(dstr + 1, '`');
      break;
    case '\\':
      /* Escape character.  Move up, and force literal
       * match of next character.
       */
      tstr++;
      /* FALL THROUGH */
    default:
      /* Literal character.  Check for a match.
       * If matching end of data, return true.
       */
      if (NOTEQUAL(0, *dstr, *tstr))
	return 0;
      if (!*dstr)
	return 1;
    }
    tstr++;
    dstr++;
  }

  /* Skip over '*'. */
  tstr++;
  starcount = 1;

  /* Skip over wildcards. */
  while (starcount < 2 && ((*tstr == '?') || (*tstr == '*'))) {
    if (*tstr == '?') {
      if (!*dstr || *dstr == '`')
	return 0;
      dstr++;
      starcount = 0;
    } else
      starcount++;
    tstr++;
  }

  /* Skip over long strings of '*'. */
  while (*tstr == '*')
    tstr++;

  /* Return true on trailing '**'. */
  if (!*tstr)
    return starcount == 2 || !strchr(dstr, '`');

  if (*tstr == '?') {
    /* Scan for possible matches. */
    while (*dstr) {
      if (*dstr != '`' && atr_wild(tstr + 1, dstr + 1))
	return 1;
      dstr++;
    }
  } else {
    /* Skip over a backslash in the pattern string if it is there. */
    if (*tstr == '\\')
      tstr++;

    /* Scan for possible matches. */
    while (*dstr) {
      if (EQUAL(0, *dstr, *tstr)) {
	if (!*(tstr + 1) && *(dstr + 1))
	  return 0;		/* No more in pattern string, but more in target */
	if (atr_wild(tstr + 1, dstr + 1))
	  return 1;
      }
      if (starcount < 2 && *dstr == '`')
	return 0;
      dstr++;
    }
  }
  return 0;
}

/* ---------------------------------------------------------------------------
 * wild1: INTERNAL: do a wildcard match, remembering the wild data.
 *
 * DO NOT CALL THIS FUNCTION DIRECTLY - DOING SO MAY RESULT IN
 * SERVER CRASHES AND IMPROPER ARGUMENT RETURN.
 *
 * Side Effect: this routine modifies the 'wnxt' global variable,
 * and what it points to.
 */
static int
wild1(const char *RESTRICT tstr, const char *RESTRICT dstr, int arg,
      char *RESTRICT wbuf, int cs)
{
  const char *datapos;
  char *wnext;
  int argpos, numextra;

  while (*tstr != '*') {
    switch (*tstr) {
    case '?':
      /* Single character match.  Return false if at
       * end of data.
       */
      if (!*dstr)
	return 0;

      global_eval_context.wnxt[arg++] = wbuf;
      *wbuf++ = *dstr;
      *wbuf++ = '\0';

      /* Jump to the fast routine if we can. */

      if (arg >= NUMARGS)
	return quick_wild_new(tstr + 1, dstr + 1, cs);
      break;
    case '\\':
      /* Escape character.  Move up, and force literal
       * match of next character.
       */
      tstr++;
      /* FALL THROUGH */
    default:
      /* Literal character.  Check for a match.
       * If matching end of data, return true.
       */
      if (NOTEQUAL(cs, *dstr, *tstr))
	return 0;
      if (!*dstr)
	return 1;
    }
    tstr++;
    dstr++;
  }

  /* If at end of pattern, slurp the rest, and leave. */
  if (!tstr[1]) {
    global_eval_context.wnxt[arg] = wbuf;
    strcpy(wbuf, dstr);
    return 1;
  }
  /* Remember current position for filling in the '*' return. */
  datapos = dstr;
  argpos = arg;

  /* Scan forward until we find a non-wildcard. */
  do {
    if (argpos < arg) {
      /* Fill in arguments if someone put another '*'
       * before a fixed string.
       */
      global_eval_context.wnxt[argpos++] = wbuf;
      *wbuf++ = '\0';

      /* Jump to the fast routine if we can. */
      if (argpos >= NUMARGS)
	return quick_wild_new(tstr, dstr, cs);

      /* Fill in any intervening '?'s */
      while (argpos < arg) {
	global_eval_context.wnxt[argpos++] = wbuf;
	*wbuf++ = *datapos++;
	*wbuf++ = '\0';

	/* Jump to the fast routine if we can. */
	if (argpos >= NUMARGS)
	  return quick_wild_new(tstr, dstr, cs);
      }
    }
    /* Skip over the '*' for now... */
    tstr++;
    arg++;

    /* Skip over '?'s for now... */
    numextra = 0;
    while (*tstr == '?') {
      if (!*dstr)
	return 0;
      tstr++;
      dstr++;
      arg++;
      numextra++;
    }
  } while (*tstr == '*');

  /* Skip over a backslash in the pattern string if it is there. */
  if (*tstr == '\\')
    tstr++;

  /* Check for possible matches.  This loop terminates either at
   * end of data (resulting in failure), or at a successful match.
   */
  if (!*tstr)
    while (*dstr)
      dstr++;
  else {
    wnext = wbuf;
    wnext++;
    while (1) {
      if (EQUAL(cs, *dstr, *tstr) &&
	  ((arg < NUMARGS) ? wild1(tstr, dstr, arg, wnext, cs)
	   : quick_wild_new(tstr, dstr, cs)))
	break;
      if (!*dstr)
	return 0;
      dstr++;
      wnext++;
    }
  }

  /* Found a match!  Fill in all remaining arguments.
   * First do the '*'...
   */
  global_eval_context.wnxt[argpos++] = wbuf;
  strncpy(wbuf, datapos, (dstr - datapos) - numextra);
  wbuf += (dstr - datapos) - numextra;
  *wbuf++ = '\0';
  datapos = dstr - numextra;

  /* Fill in any trailing '?'s that are left. */
  while (numextra) {
    if (argpos >= NUMARGS)
      return 1;
    global_eval_context.wnxt[argpos++] = wbuf;
    *wbuf++ = *datapos++;
    *wbuf++ = '\0';
    numextra--;
  }

  /* It's done! */
  return 1;
}

/* ---------------------------------------------------------------------------
 * wild: do a wildcard match, remembering the wild data.
 *
 * This routine will cause crashes if fed NULLs instead of strings.
 *
 * This function may crash if malloc() fails.
 *
 * Side Effect: this routine modifies the 'wnxt' global variable.
 */
static int
wild(const char *RESTRICT s, const char *RESTRICT d, int p, int cs)
{
  /* Do fast match. */
  while ((*s != '*') && (*s != '?')) {
    if (*s == '\\')
      s++;
    if (NOTEQUAL(cs, *d, *s))
      return 0;
    if (!*d)
      return 1;
    s++;
    d++;
  }

  /* Do sanity check */
  if (!check_literals(s, d, cs))
    return 0;

  /* Do the match. */
  return wild1(s, d, p, wspace, cs);
}

/** Wildcard match, possibly case-sensitive, and remember the wild data.
 *
 * This routine will cause crashes if fed NULLs instead of strings.
 *
 * \param s pattern to match against.
 * \param d string to check.
 * \param cs if 1, case-sensitive; if 0, case-insensitive.
 * \retval 1 d matches s.
 * \retval 0 d doesn't match s.
 */
int
wild_match_case(const char *RESTRICT s, const char *RESTRICT d, int cs)
{
  int j;
  /* Clear %0-%9 and r(0) - r(9) */
  for (j = 0; j < NUMARGS; j++)
    global_eval_context.wnxt[j] = (char *) NULL;
  for (j = 0; j < NUMQ; j++)
    global_eval_context.rnxt[j] = (char *) NULL;
  return wild(s, d, 0, cs);
}

/** Regexp match, possibly case-sensitive, and remember matched subexpressions.
 *
 * This routine will cause crashes if fed NULLs instead of strings.
 *
 * \param s regexp to match against.
 * \param d string to check.
 * \param cs if 1, case-sensitive; if 0, case-insensitive.
 * \retval 1 d matches s.
 * \retval 0 d doesn't match s.
 */
int
regexp_match_case(const char *RESTRICT s, const char *RESTRICT d, int cs)
{
  int j;
  pcre *re;
  int i;
  static char wtmp[NUMARGS][BUFFER_LEN];
  const char *errptr;
  int erroffset;
  int offsets[99];
  int subpatterns;

  if ((re = pcre_compile(s, (cs ? 0 : PCRE_CASELESS), &errptr, &erroffset,
			 tables)) == NULL) {
    /*
     * This is a matching error. We have an error message in
     * errptr that we can ignore, since we're doing
     * command-matching.
     */
    return 0;
  }
  add_check("pcre");
  /* 
   * Now we try to match the pattern. The relevant fields will
   * automatically be filled in by this.
   */
  if ((subpatterns = pcre_exec(re, NULL, d, strlen(d), 0, 0, offsets, 99))
      < 0) {
    mush_free(re, "pcre");
    return 0;
  }
  /* If we had too many subpatterns for the offsets vector, set the number
   * to 1/3 of the size of the offsets vector
   */
  if (subpatterns == 0)
    subpatterns = 33;

  /*
   * Now we fill in our args vector. Note that in regexp matching,
   * 0 is the entire string matched, and the parenthesized strings
   * go from 1 to 9. We DO PRESERVE THIS PARADIGM, for consistency
   * with other languages.
   */

  /* Clear %0-%9 and r(0) - r(9) */
  for (j = 0; j < NUMARGS; j++) {
    wtmp[j][0] = '\0';
    global_eval_context.wnxt[j] = (char *) NULL;
  }
  for (j = 0; j < NUMQ; j++)
    global_eval_context.rnxt[j] = (char *) NULL;

  for (i = 0; (i < 10) && (i < NUMARGS); i++) {
    pcre_copy_substring(d, offsets, subpatterns, i, wtmp[i], BUFFER_LEN);
    global_eval_context.wnxt[i] = wtmp[i];
  }

  mush_free(re, "pcre");
  return 1;
}


/** Regexp match, possibly case-sensitive, and with no memory.
 *
 * This routine will cause crashes if fed NULLs instead of strings.
 *
 * \param s regexp to match against.
 * \param d string to check.
 * \param cs if 1, case-sensitive; if 0, case-insensitive.
 * \retval 1 d matches s.
 * \retval 0 d doesn't match s.
 */
int
quick_regexp_match(const char *RESTRICT s, const char *RESTRICT d, int cs)
{
  pcre *re;
  const char *errptr;
  int erroffset;
  int offsets[99];
  int r;
  int flags = 0;		/* There's a PCRE_NO_AUTO_CAPTURE flag to turn all raw
				   ()'s into (?:)'s, which would be nice to use,
				   except that people might use backreferences in
				   their patterns. Argh. */

  if (!cs)
    flags |= PCRE_CASELESS;

  if ((re = pcre_compile(s, flags, &errptr, &erroffset, tables)) == NULL) {
    /*
     * This is a matching error. We have an error message in
     * errptr that we can ignore, since we're doing
     * command-matching.
     */
    return 0;
  }
  add_check("pcre");
  /* 
   * Now we try to match the pattern. The relevant fields will
   * automatically be filled in by this.
   */
  r = pcre_exec(re, NULL, d, strlen(d), 0, 0, offsets, 99);

  mush_free(re, "pcre");

  return r >= 0;
}


/** Either an order comparison or a wildcard match with no memory.
 *
 * This routine will cause crashes if fed NULLs instead of strings.
 *
 * \param s pattern to match against.
 * \param d string to check.
 * \param cs if 1, case-sensitive; if 0, case-insensitive.
 * \retval 1 d matches s.
 * \retval 0 d doesn't match s.
 */
int
local_wild_match_case(const char *RESTRICT s, const char *RESTRICT d, int cs)
{
  switch (*s) {
  case '>':
    s++;
    if (is_number(s) && is_number(d))
      return (parse_number(s) < parse_number(d));
    else
      return (strcoll(s, d) < 0);
  case '<':
    s++;
    if (is_number(s) && is_number(d))
      return (parse_number(s) > parse_number(d));
    else
      return (strcoll(s, d) > 0);
  }

  return quick_wild_new(s, d, cs);
}

/** Does a string contain a wildcard character (* or ?)?
 * Not used by the wild matching routines, but suitable for outside use.
 * \param s string to check.
 * \retval 1 s contains a * or ?
 * \retval 0 s does not contain a * or ?
 */
int
wildcard(const char *s)
{
  if (strchr(s, '*') || strchr(s, '?'))
    return 1;
  return 0;
}

static int
check_literals(const char *RESTRICT tstr, const char *RESTRICT dstr, int cs)
{
  /* Every literal string in tstr must appear, in order, in dstr,
   * or no match can happen. That is, tstr is the pattern and dstr
   * is the string-to-match
   */
  char tbuf1[BUFFER_LEN];
  char dbuf1[BUFFER_LEN];
  const char delims[] = "?*";
  char *sp, *dp;
  strncpy(dbuf1, dstr, BUFFER_LEN - 1);
  dbuf1[BUFFER_LEN - 1] = '\0';
  strcpy(tbuf1, strip_backslashes(tstr));
  if (!cs) {
    upcasestr(tbuf1);
    upcasestr(dbuf1);
  }
  dp = dbuf1;
  sp = strtok(tbuf1, delims);
  while (sp) {
    if (!dp)
      return 0;
    if (!(dp = strstr(dp, sp)))
      return 0;
    dp += strlen(sp);
    sp = strtok(NULL, delims);
  }
  return 1;
}


static char *
strip_backslashes(const char *str)
{
  /* Remove backslashes from a string, and return it in a static buffer */
  static char buf[BUFFER_LEN];
  int i = 0;

  while (*str && (i < BUFFER_LEN - 1)) {
    if (*str == '\\' && *(str + 1))
      str++;
    buf[i++] = *str++;
  }
  buf[i] = '\0';
  return buf;
}