phantasmal_dgd_v1/
phantasmal_dgd_v1/bin/
phantasmal_dgd_v1/doc/
phantasmal_dgd_v1/mud/doc/
phantasmal_dgd_v1/mud/doc/api/
phantasmal_dgd_v1/mud/doc/kernel/
phantasmal_dgd_v1/mud/doc/kernel/hook/
phantasmal_dgd_v1/mud/doc/kernel/lfun/
phantasmal_dgd_v1/mud/include/
phantasmal_dgd_v1/mud/include/kernel/
phantasmal_dgd_v1/mud/kernel/lib/
phantasmal_dgd_v1/mud/kernel/lib/api/
phantasmal_dgd_v1/mud/kernel/obj/
phantasmal_dgd_v1/mud/kernel/sys/
phantasmal_dgd_v1/mud/tmp/
phantasmal_dgd_v1/mud/usr/System/
phantasmal_dgd_v1/mud/usr/System/keys/
phantasmal_dgd_v1/mud/usr/System/obj/
phantasmal_dgd_v1/mud/usr/System/open/lib/
phantasmal_dgd_v1/mud/usr/common/data/
phantasmal_dgd_v1/mud/usr/common/lib/parsed/
phantasmal_dgd_v1/mud/usr/common/obj/telopt/
phantasmal_dgd_v1/mud/usr/common/obj/ustate/
phantasmal_dgd_v1/mud/usr/game/
phantasmal_dgd_v1/mud/usr/game/include/
phantasmal_dgd_v1/mud/usr/game/obj/
phantasmal_dgd_v1/mud/usr/game/object/
phantasmal_dgd_v1/mud/usr/game/object/stuff/
phantasmal_dgd_v1/mud/usr/game/sys/
phantasmal_dgd_v1/mud/usr/game/text/
phantasmal_dgd_v1/mud/usr/game/users/
phantasmal_dgd_v1/src/host/
phantasmal_dgd_v1/src/host/beos/
phantasmal_dgd_v1/src/host/mac/
phantasmal_dgd_v1/src/host/unix/
phantasmal_dgd_v1/src/host/win32/res/
phantasmal_dgd_v1/src/kfun/
phantasmal_dgd_v1/src/lpc/
phantasmal_dgd_v1/src/parser/
/*
 * First-line command parser for parsing user input
 */

#include <kernel/kernel.h>

#include <phantasmal/parser.h>
#include <phantasmal/lpc_names.h>

#include <limits.h>
#include <type.h>

/* #define LOGGING */

/* Parser concatenation rules are stuck into another file so that this
   one is more readable. */
inherit concat "/usr/common/lib/parsed/ps_rules";
inherit reg    "/usr/common/lib/parsed/register";

/* code for enabling super-verbose logging */
#ifdef LOGGING
#define LOG(x) write_file("~/parser.log", x);
#else
#define LOG(x)
#endif

/* Where to find parser files */
#define NL_TOKEN_FILE    "/usr/common/sys/nl_tokens.dpd"
#define NL_GRAMMAR_FILE  "/usr/common/sys/nl_parser.dpd"

/* number of ambiguities to keep */
#define NUM_AMBIGUOUS 5

string grammar_file;
string token_file;
string grammar;

void upgraded(varargs int clone);
static string uncomment_file(string file);


static void create(varargs int clone) {
  concat::create();
  reg::create();
  upgraded();
}

void upgraded(varargs int clone) {
  /* Expect call only from self and ObjectD */
  if(!SYSTEM() && !COMMON())
    return;

  concat::upgraded(clone);
  reg::upgraded(clone);

  token_file = read_file(NL_TOKEN_FILE);
  grammar_file = read_file(NL_GRAMMAR_FILE);

  if (grammar_file == nil) {
    error("Error reading grammar from file " + NL_GRAMMAR_FILE);
  }
  if (token_file == nil) {
    error("Error reading token grammar from file " + NL_TOKEN_FILE);
  }

  LOG("Loaded token file:\n");
  LOG(token_file);
  LOG("\n");
  LOG("******************************************************\n");
  LOG("Loaded grammar file:\n");
  LOG(grammar_file);
  LOG("\n");
}


private string** divide_into_lines(string *words, int linelen, int divchar) {
  string **ret;
  string  *line;
  int      curlen, ctr;

  ret = ({ });
  ctr = 0;
  while(sizeof(words) > ctr) {
    curlen = 0;
    line = ({ });

    do {
      /* Move a word into 'line' */
      line += ({ words[ctr] });
      curlen += strlen(words[ctr]) + divchar;
      ctr++;
    } while((curlen < linelen) && (sizeof(words) > ctr));

    ret += ({ line });
  }

  return ret;
}


/* This function writes the part-of-speech token grammar to a string
   and returns it.  This depends on things like the list of nouns and
   adjectives that are currently registered. */
private string* pos_grammars(void) {
  string*  words;
  string** linelist;
  string   output, nontoken;
  int      ctr, line;
  int*     cat_list;
  mapping* categories, *pos_categories;
  mapping  word_type_map;

  /* Make sure we have a valid wordmap */
  init_wordmap();

  /* Allocate and initialize all the category mappings */
  categories = allocate(1 << sizeof(parts_of_speech));
  for(ctr = 0; ctr < (1 << sizeof(parts_of_speech)); ctr++)
    categories[ctr] = ([ ]);

  /* For each word, place it in its appropriate category */
  word_type_map = pvt_get_wordmap();
  words = map_indices(word_type_map);
  for(ctr = 0; ctr < sizeof(words); ctr++) {
    categories[word_type_map[words[ctr]]][words[ctr]] = 1;
  }

  /* Make the token grammar from the categories */
  output = "";
  for(ctr = 1; ctr < sizeof(categories); ctr++) {
    words = map_indices(categories[ctr]);

    if(words && sizeof(words)) {
      /* Divide into 50+-char lines, with 3-char separators */
      linelist = divide_into_lines(words, 50, 3);

      for(line = 0; line < sizeof(linelist); line++) {
	output += make_string_from_pos_bits(ctr) + " = /(";
	output += implode(linelist[line], ")|(");
	output += ")/\n";
      }
    } else {
      output += "# Skipping " + make_string_from_pos_bits(ctr) + "\n";
    }
  }

  /* Make new categories */
  pos_categories = allocate(sizeof(parts_of_speech));
  for(ctr = 0; ctr < sizeof(parts_of_speech); ctr++)
    pos_categories[ctr] = ([ ]);

  /* Sift category information into pos_categories */
  for(ctr = 1; ctr < sizeof(categories); ctr++) {

    /* For each part of speech (counted off by 'line'), check the bit
       and put the category into the pos_categories entry.  The
       category is assigned to the pos_categories entry by inserting into the
       pos_category entry's hash table with a key of the category number and
       a value of 1. */
    for(line = 0; line < sizeof(parts_of_speech); line++) {
      if((ctr & (1 << line)) && map_sizeof(categories[ctr])) {
	pos_categories[line][ctr] = 1;
      }
    }
  }

  /* Make the non-token grammar from the pos_categories */
  nontoken = "";
  for(ctr = 0; ctr < sizeof(parts_of_speech); ctr++) {
    nontoken += "# All different kinds of " + parts_of_speech[ctr] + "\n";

    cat_list = map_indices(pos_categories[ctr]);
    for(line = 0; line < sizeof(cat_list); line++) {
      nontoken += parts_of_speech[ctr] + ": "
	+ make_string_from_pos_bits(cat_list[line]) + "\n";
    }

    nontoken += "\n";
  }

  return ({ output, nontoken });
}


/* Function for removing comments from the grammar file.  A comment is
 * anything which starts with the pound ('#') sign.
 */

static string uncomment_file(string file) {
  string *lines;
  string result;
  int i, j;

  result = "";

  lines = explode(file, "\n");
  for(i = 0; i < sizeof(lines); ++i) {
    if (strlen(lines[i]) > 0 && lines[i][0] != '#') {
      result += lines[i] + "\n";
    }
  }

  return result;
}

private void set_grammar(void) {
  /* This function is told when it's time to regenerate the grammar
     from words and files.  This is generally after a recompile, or
     after OLC has added a new word to one of the part-of-speech
     tables.  You never know when the user will want to type one of
     those new words, so we just regenerate the grammar to be sure. */

  /* The grammar is made of a whitespace token (contains ctrl chars,
     must be inserted from LPC), an autogenerated chunk of
     part-of-speech tokens, a token-parsing grammar from one file, a
     bad-token token (also contains ctrl chars), and a non-token
     grammar from a second file.  These are assembled in the order
     listed, using simple string concatenation. */

  /* If we ever start overrunning the DGD string limit on this concat,
     we can uncomment_file() earlier in the process on the subfiles
     individually. */

  if(regenerate_grammar) {
    string  whitespace_string, bad_token_string;
    string  token_autogen, grammar_autogen;
    string *gram;

    whitespace_string = "whitespace = /[ \r\n\t\b,]+/\n";
    bad_token_string = "bad_token = /[^ \r\n\t\b\\\\!,.?:;]+/\n";
    gram = pos_grammars();
    token_autogen = gram[0];
    grammar_autogen = gram[1];

    grammar = whitespace_string + token_file + token_autogen + "\n"
      + bad_token_string + grammar_file + grammar_autogen;

    grammar = uncomment_file(grammar);
    regenerate_grammar = 0;

    LOG("*********************************************\n");
    LOG("*********************************************\n");
    LOG("*********************************************\n");
    LOG("Setting grammar:\n" + grammar + "\n");
  }
}


/* function for parsing commands */
mixed *parse_cmd(string cmd){
  mixed *ret;

  set_grammar();

  LOG("*********************************************\n");
  LOG("Parsing string: " + cmd + "$\n");

  if(!cmd || STRINGD->is_whitespace(cmd))
    return ({ });

  catch {
    ret = parse_string(grammar, cmd, NUM_AMBIGUOUS);

    return ret;
  } : {
    error("Parsing failed.  Command is '" + cmd + "'.");
    return nil;
  }
}

/* Commands should be passed an array of possible parses, just like
   parse_cmd returns. */
mixed *bind_commands(mixed *commands) {
  int ctr, iclause;

  /* First, iterate through all possible parses */
  for(ctr = 0; ctr < sizeof(commands); ctr++) {

    /* Then, for each one, iterate through the independent clauses */

  }

}