/*
* First-line command parser for parsing user input
*/
#include <kernel/kernel.h>
#include <phantasmal/parser.h>
#include <phantasmal/lpc_names.h>
#include <limits.h>
#include <type.h>
/* #define LOGGING */
/* Parser concatenation rules are stuck into another file so that this
one is more readable. */
inherit concat "/usr/common/lib/parsed/ps_rules";
inherit reg "/usr/common/lib/parsed/register";
/* code for enabling super-verbose logging */
#ifdef LOGGING
#define LOG(x) write_file("~/parser.log", x);
#else
#define LOG(x)
#endif
/* Where to find parser files */
#define NL_TOKEN_FILE "/usr/common/sys/nl_tokens.dpd"
#define NL_GRAMMAR_FILE "/usr/common/sys/nl_parser.dpd"
/* number of ambiguities to keep */
#define NUM_AMBIGUOUS 5
string grammar_file;
string token_file;
string grammar;
void upgraded(varargs int clone);
static string uncomment_file(string file);
static void create(varargs int clone) {
concat::create();
reg::create();
upgraded();
}
void upgraded(varargs int clone) {
/* Expect call only from self and ObjectD */
if(!SYSTEM() && !COMMON())
return;
concat::upgraded(clone);
reg::upgraded(clone);
token_file = read_file(NL_TOKEN_FILE);
grammar_file = read_file(NL_GRAMMAR_FILE);
if (grammar_file == nil) {
error("Error reading grammar from file " + NL_GRAMMAR_FILE);
}
if (token_file == nil) {
error("Error reading token grammar from file " + NL_TOKEN_FILE);
}
LOG("Loaded token file:\n");
LOG(token_file);
LOG("\n");
LOG("******************************************************\n");
LOG("Loaded grammar file:\n");
LOG(grammar_file);
LOG("\n");
}
private string** divide_into_lines(string *words, int linelen, int divchar) {
string **ret;
string *line;
int curlen, ctr;
ret = ({ });
ctr = 0;
while(sizeof(words) > ctr) {
curlen = 0;
line = ({ });
do {
/* Move a word into 'line' */
line += ({ words[ctr] });
curlen += strlen(words[ctr]) + divchar;
ctr++;
} while((curlen < linelen) && (sizeof(words) > ctr));
ret += ({ line });
}
return ret;
}
/* This function writes the part-of-speech token grammar to a string
and returns it. This depends on things like the list of nouns and
adjectives that are currently registered. */
private string* pos_grammars(void) {
string* words;
string** linelist;
string output, nontoken;
int ctr, line;
int* cat_list;
mapping* categories, *pos_categories;
mapping word_type_map;
/* Make sure we have a valid wordmap */
init_wordmap();
/* Allocate and initialize all the category mappings */
categories = allocate(1 << sizeof(parts_of_speech));
for(ctr = 0; ctr < (1 << sizeof(parts_of_speech)); ctr++)
categories[ctr] = ([ ]);
/* For each word, place it in its appropriate category */
word_type_map = pvt_get_wordmap();
words = map_indices(word_type_map);
for(ctr = 0; ctr < sizeof(words); ctr++) {
categories[word_type_map[words[ctr]]][words[ctr]] = 1;
}
/* Make the token grammar from the categories */
output = "";
for(ctr = 1; ctr < sizeof(categories); ctr++) {
words = map_indices(categories[ctr]);
if(words && sizeof(words)) {
/* Divide into 50+-char lines, with 3-char separators */
linelist = divide_into_lines(words, 50, 3);
for(line = 0; line < sizeof(linelist); line++) {
output += make_string_from_pos_bits(ctr) + " = /(";
output += implode(linelist[line], ")|(");
output += ")/\n";
}
} else {
output += "# Skipping " + make_string_from_pos_bits(ctr) + "\n";
}
}
/* Make new categories */
pos_categories = allocate(sizeof(parts_of_speech));
for(ctr = 0; ctr < sizeof(parts_of_speech); ctr++)
pos_categories[ctr] = ([ ]);
/* Sift category information into pos_categories */
for(ctr = 1; ctr < sizeof(categories); ctr++) {
/* For each part of speech (counted off by 'line'), check the bit
and put the category into the pos_categories entry. The
category is assigned to the pos_categories entry by inserting into the
pos_category entry's hash table with a key of the category number and
a value of 1. */
for(line = 0; line < sizeof(parts_of_speech); line++) {
if((ctr & (1 << line)) && map_sizeof(categories[ctr])) {
pos_categories[line][ctr] = 1;
}
}
}
/* Make the non-token grammar from the pos_categories */
nontoken = "";
for(ctr = 0; ctr < sizeof(parts_of_speech); ctr++) {
nontoken += "# All different kinds of " + parts_of_speech[ctr] + "\n";
cat_list = map_indices(pos_categories[ctr]);
for(line = 0; line < sizeof(cat_list); line++) {
nontoken += parts_of_speech[ctr] + ": "
+ make_string_from_pos_bits(cat_list[line]) + "\n";
}
nontoken += "\n";
}
return ({ output, nontoken });
}
/* Function for removing comments from the grammar file. A comment is
* anything which starts with the pound ('#') sign.
*/
static string uncomment_file(string file) {
string *lines;
string result;
int i, j;
result = "";
lines = explode(file, "\n");
for(i = 0; i < sizeof(lines); ++i) {
if (strlen(lines[i]) > 0 && lines[i][0] != '#') {
result += lines[i] + "\n";
}
}
return result;
}
private void set_grammar(void) {
/* This function is told when it's time to regenerate the grammar
from words and files. This is generally after a recompile, or
after OLC has added a new word to one of the part-of-speech
tables. You never know when the user will want to type one of
those new words, so we just regenerate the grammar to be sure. */
/* The grammar is made of a whitespace token (contains ctrl chars,
must be inserted from LPC), an autogenerated chunk of
part-of-speech tokens, a token-parsing grammar from one file, a
bad-token token (also contains ctrl chars), and a non-token
grammar from a second file. These are assembled in the order
listed, using simple string concatenation. */
/* If we ever start overrunning the DGD string limit on this concat,
we can uncomment_file() earlier in the process on the subfiles
individually. */
if(regenerate_grammar) {
string whitespace_string, bad_token_string;
string token_autogen, grammar_autogen;
string *gram;
whitespace_string = "whitespace = /[ \r\n\t\b,]+/\n";
bad_token_string = "bad_token = /[^ \r\n\t\b\\\\!,.?:;]+/\n";
gram = pos_grammars();
token_autogen = gram[0];
grammar_autogen = gram[1];
grammar = whitespace_string + token_file + token_autogen + "\n"
+ bad_token_string + grammar_file + grammar_autogen;
grammar = uncomment_file(grammar);
regenerate_grammar = 0;
LOG("*********************************************\n");
LOG("*********************************************\n");
LOG("*********************************************\n");
LOG("Setting grammar:\n" + grammar + "\n");
}
}
/* function for parsing commands */
mixed *parse_cmd(string cmd){
mixed *ret;
set_grammar();
LOG("*********************************************\n");
LOG("Parsing string: " + cmd + "$\n");
if(!cmd || STRINGD->is_whitespace(cmd))
return ({ });
catch {
ret = parse_string(grammar, cmd, NUM_AMBIGUOUS);
return ret;
} : {
error("Parsing failed. Command is '" + cmd + "'.");
return nil;
}
}
/* Commands should be passed an array of possible parses, just like
parse_cmd returns. */
mixed *bind_commands(mixed *commands) {
int ctr, iclause;
/* First, iterate through all possible parses */
for(ctr = 0; ctr < sizeof(commands); ctr++) {
/* Then, for each one, iterate through the independent clauses */
}
}