/*
* NAME: parser.c
* DESCRIPTION: MOO code parser; returns AST
*/
# define DEBUG 0
inherit "/std/lex";
# if DEBUG
inherit "/std/vartext";
# else
# define var2str(arg) ""
# endif
# include <objects.h>
# include <moo/data.h>
# include <moo/tokens.h>
mixed token; /* current token */
int line; /* current line number */
object btable; /* table of builtin functions */
string *errors; /* list of resulting errors */
# define ERROR(msg) (errors += ({ "Line " + line + ": " + (msg) }) )
# define PARSE_ERROR() (ERROR("parse error"), error("Parse error"))
# define TOKEN_TYPE token[0]
# define TOKEN_VALUE token[1]
# define IDENTIFIERP() (arrayp(token) && TOKEN_TYPE == TOK_IDENTIFIER)
# define LITERALP() (arrayp(token) && TOKEN_TYPE != TOK_IDENTIFIER)
# define COMMENTP() (arrayp(token) && TOKEN_TYPE == TOK_COMMENT)
# define PARSE(x) p_##x()
# define PRODUCTION(x) static mixed *p_##x(void)
/*
* NAME: advance()
* DESCRIPTION: retrieve the next token
*/
static
void advance(void)
{
string errmsg;
do {
if (errmsg = catch(token = ::advance()))
{
if (errmsg[0] == '*')
ERROR(errmsg[1 ..]);
else
error(errmsg); /* more serious */
}
} while (errmsg || COMMENTP());
}
/*
* NAME: match()
* DESCRIPTION: expect tokens
*/
private
void match(int what)
{
if (token == what)
advance();
else
PARSE_ERROR();
}
PRODUCTION(program);
PRODUCTION(statement);
PRODUCTION(if_condition);
PRODUCTION(for_loop);
PRODUCTION(while_loop);
PRODUCTION(fork_statements);
PRODUCTION(return);
PRODUCTION(expression);
PRODUCTION(value);
PRODUCTION(conditional);
PRODUCTION(logical);
PRODUCTION(relational);
PRODUCTION(term);
PRODUCTION(factor);
PRODUCTION(unary);
PRODUCTION(primary);
PRODUCTION(modifier);
PRODUCTION(func_args);
PRODUCTION(list_elems);
PRODUCTION(buf_elems);
/*
* NAME: main()
* DESCRIPTION: parse a MOO program
*/
mixed *main(string src)
{
mixed *code, *elt;
string errmsg;
set_input(src);
token = 0;
line = 1;
errors = ({ });
if (! btable)
btable = load_object(BTABLE);
advance();
code = ({ });
while (token != TOK_EOF)
{
if (errmsg = catch(elt = PARSE(statement)))
{
if (sizeof(errors))
return ({ 0, errors });
else
error(errmsg); /* more serious */
}
if (elt)
code += ({ elt });
}
if (sizeof(errors))
return ({ 0, errors });
else
return ({ 1, code });
}
PRODUCTION(statement)
{
mixed *stmt;
switch (token)
{
case TOK_IF:
return PARSE(if_condition);
case TOK_FOR:
return PARSE(for_loop);
case TOK_WHILE:
return PARSE(while_loop);
case TOK_FORK:
return PARSE(fork_statements);
case TOK_RETURN:
return PARSE(return);
case TOK_SEMICOLON:
advance();
return 0;
case TOK_EOF:
error("Unexpected end of input");
return 0;
default:
stmt = PARSE(expression);
match(TOK_SEMICOLON);
++line;
return stmt;
}
}
PRODUCTION(if_condition)
{
mixed *ast, *prog, *elt;
advance(); /* TOK_IF */
match(TOK_LPAREN);
ast = ({ TOK_IF, PARSE(expression) });
match(TOK_RPAREN);
++line;
prog = ({ });
while (token != TOK_ELSEIF && token != TOK_ELSE && token != TOK_ENDIF &&
token != TOK_EOF)
{
if (elt = PARSE(statement))
prog += ({ elt });
}
ast += ({ prog });
while (token == TOK_ELSEIF)
{
advance(); /* TOK_ELSEIF */
match(TOK_LPAREN);
ast += ({ TOK_ELSEIF, PARSE(expression) });
match(TOK_RPAREN);
++line;
prog = ({ });
while (token != TOK_ELSEIF && token != TOK_ELSE && token != TOK_ENDIF &&
token != TOK_EOF)
{
if (elt = PARSE(statement))
prog += ({ elt });
}
ast += ({ prog });
}
if (token == TOK_ELSE)
{
advance(); /* TOK_ELSE */
++line;
ast += ({ TOK_ELSE });
prog = ({ });
while (token != TOK_ENDIF && token != TOK_EOF)
{
if (elt = PARSE(statement))
prog += ({ elt });
}
ast += ({ prog });
}
match(TOK_ENDIF);
++line;
return ast;
}
PRODUCTION(for_loop)
{
mixed *ast, *prog, *elt;
mixed ident;
advance(); /* TOK_FOR */
if (! IDENTIFIERP())
PARSE_ERROR();
ident = TOKEN_VALUE;
advance();
if (token == TOK_ASSOC)
{
advance(); /* TOK_ASSOC */
if (! IDENTIFIERP())
PARSE_ERROR();
ast = ({ TOK_FOR, ({ ident, TOKEN_VALUE }), 0, 0 });
advance();
match(TOK_IN);
match(TOK_LPAREN);
ast[2] = ({ TOK_LPAREN, PARSE(expression) });
match(TOK_RPAREN);
}
else
{
ast = ({ TOK_FOR, ident, 0, 0 });
match(TOK_IN);
if (token == TOK_LPAREN)
{
advance(); /* TOK_LPAREN */
ast[2] = ({ TOK_LPAREN, PARSE(expression) });
match(TOK_RPAREN);
}
else
{
mixed *lo, *hi;
match(TOK_LBRACKET);
lo = PARSE(expression);
match(TOK_RANGE);
hi = PARSE(expression);
match(TOK_RBRACKET);
ast[2] = ({ TOK_RANGE, lo, hi });
}
}
++line;
prog = ({ });
while (token != TOK_ENDFOR && token != TOK_EOF)
{
if (elt = PARSE(statement))
prog += ({ elt });
}
ast[3] = prog;
match(TOK_ENDFOR);
++line;
return ast;
}
PRODUCTION(while_loop)
{
mixed *ast, *prog, *elt;
advance(); /* TOK_WHILE */
match(TOK_LPAREN);
ast = ({ TOK_WHILE, PARSE(expression) });
match(TOK_RPAREN);
++line;
prog = ({ });
while (token != TOK_ENDWHILE && token != TOK_EOF)
{
if (elt = PARSE(statement))
prog += ({ elt });
}
ast += ({ prog });
match(TOK_ENDWHILE);
++line;
return ast;
}
PRODUCTION(fork_statements)
{
mixed *ast, *prog, *elt;
advance(); /* TOK_FORK */
if (token != TOK_LPAREN)
{
if (! IDENTIFIERP())
PARSE_ERROR();
ast = ({ TOK_FORK, TOKEN_VALUE });
advance();
}
else
ast = ({ TOK_FORK, 0 });
match(TOK_LPAREN);
ast += ({ PARSE(expression) });
match(TOK_RPAREN);
++line;
prog = ({ });
while (token != TOK_ENDFORK && token != TOK_EOF)
{
if (elt = PARSE(statement))
prog += ({ elt });
}
ast += ({ prog });
match(TOK_ENDFORK);
++line;
return ast;
}
PRODUCTION(return)
{
mixed *ast;
advance(); /* TOK_RETURN */
if (token != TOK_SEMICOLON)
ast = ({ TOK_RETURN, PARSE(expression) });
else
ast = ({ TOK_RETURN });
match(TOK_SEMICOLON);
++line;
return ast;
}
PRODUCTION(expression)
{
mixed *ast;
ast = PARSE(value);
if (token == TOK_ASSIGN)
{
mixed *trace;
advance(); /* TOK_ASSIGN */
/* verify that we read an lvalue */
trace = ast;
if (TAG(trace) == TOK_LBRACKET && TAG(trace[2]) == TOK_RANGE)
trace = trace[1];
while (TAG(trace) == TOK_LBRACKET && TAG(trace[2]) != TOK_RANGE)
trace = trace[1];
ast = ({ TOK_ASSIGN, ast, PARSE(expression) });
if (SIMPLE(ast[1]) && SIMPLE(ast[2]))
ast[0] = TOK_ASSIGN | TF_SIMPLE;
if (TAG(trace) != TOK_IDENTIFIER && TAG(trace) != TOK_DOT)
ERROR("Illegal expression on left side of assignment.");
}
return ast;
}
PRODUCTION(value)
{
mixed *ast;
ast = PARSE(conditional);
while (token == TOK_QUESTION)
{
advance(); /* TOK_QUESTION */
ast = ({ TOK_QUESTION, ast, PARSE(expression), 0 });
match(TOK_PIPE);
ast[3] = PARSE(conditional);
if (SIMPLE(ast[1]) && SIMPLE(ast[2]) && SIMPLE(ast[3]))
ast[0] = TOK_QUESTION | TF_SIMPLE;
}
return ast;
}
PRODUCTION(conditional)
{
mixed *ast;
ast = PARSE(logical);
while (token == TOK_AND || token == TOK_OR)
{
ast = ({ token, ast, 0 });
advance();
ast[2] = PARSE(logical);
if (SIMPLE(ast[1]) && SIMPLE(ast[2]))
ast[0] |= TF_SIMPLE;
}
return ast;
}
PRODUCTION(logical)
{
mixed *ast;
ast = PARSE(relational);
while (token == TOK_EQUAL || token == TOK_NEQUAL ||
token == TOK_LESS || token == TOK_LSEQUAL ||
token == TOK_GREATER || token == TOK_GREQUAL ||
token == TOK_IN /* || token == TOK_ASSOC */ )
{
ast = ({ token, ast, 0 });
advance();
ast[2] = PARSE(relational);
if (SIMPLE(ast[1]) && SIMPLE(ast[2]))
ast[0] |= TF_SIMPLE;
}
return ast;
}
PRODUCTION(relational)
{
mixed *ast;
ast = PARSE(term);
while (token == TOK_PLUS || token == TOK_MINUS)
{
ast = ({ token, ast, 0 });
advance();
ast[2] = PARSE(term);
if (SIMPLE(ast[1]) && SIMPLE(ast[2]))
ast[0] |= TF_SIMPLE;
}
return ast;
}
PRODUCTION(term)
{
mixed *ast;
ast = PARSE(factor);
while (token == TOK_TIMES || token == TOK_DIVIDE || token == TOK_PERCENT)
{
ast = ({ token, ast, 0 });
advance();
ast[2] = PARSE(factor);
if (SIMPLE(ast[1]) && SIMPLE(ast[2]))
ast[0] |= TF_SIMPLE;
}
return ast;
}
PRODUCTION(factor)
{
mixed *ast;
if (token == TOK_BANG)
{
advance(); /* TOK_BANG */
ast = ({ TOK_BANG, PARSE(factor) });
if (SIMPLE(ast[1]))
ast[0] = TOK_BANG | TF_SIMPLE;
return ast;
}
else if (token == TOK_MINUS)
{
advance(); /* TOK_MINUS */
if (arrayp(token) && TOKEN_TYPE == TOK_LIT_NUM)
{
TOKEN_VALUE = -TOKEN_VALUE;
return PARSE(unary);
}
else
{
ast = ({ TOK_U_MINUS, PARSE(factor) });
if (SIMPLE(ast[1]))
ast[0] = TOK_U_MINUS | TF_SIMPLE;
return ast;
}
}
else
return PARSE(unary);
}
PRODUCTION(unary)
{
mixed *ast;
ast = PARSE(primary);
while (token == TOK_DOT || token == TOK_COLON || token == TOK_LBRACKET)
{
ast = ({ token, ast, 0 });
ast[2] = PARSE(modifier);
if (SIMPLE(ast[1]))
{
switch (ast[0])
{
case TOK_DOT:
if (SIMPLE(ast[2]))
ast[0] = TOK_DOT | TF_SIMPLE;
break;
case TOK_LBRACKET:
if ((TAG(ast[2]) == TOK_RANGE &&
SIMPLE(ast[2][1]) &&
SIMPLE(ast[2][2])) || (TAG(ast[2]) != TOK_RANGE &&
SIMPLE(ast[2])))
ast[0] = TOK_LBRACKET | TF_SIMPLE;
break;
}
}
}
return ast;
}
PRODUCTION(primary)
{
mixed *ast;
if (token == TOK_LPAREN)
{
advance(); /* TOK_LPAREN */
ast = PARSE(expression);
match(TOK_RPAREN);
return ast;
}
if (token == TOK_DOLLAR)
{
advance(); /* TOK_DOLLAR */
if (! IDENTIFIERP())
PARSE_ERROR();
ast = ({ TOK_DOT | TF_SIMPLE, ({ TOK_LIT_OBJ | TF_SIMPLE, 0 }),
({ TOK_LIT_STR | TF_SIMPLE, TOKEN_VALUE }) });
advance();
return ast;
}
if (IDENTIFIERP()) /* variable, builtin, or beginning of verb/property */
{
string name;
name = TOKEN_VALUE;
advance();
if (token == TOK_LPAREN) /* builtin */
{
string lower;
int exists;
exists = btable->exists(lower = tolower(name));
advance();
if (token == TOK_RPAREN)
{
ast = ({ TOK_LPAREN, lower, ({ TOK_LIST | TF_SIMPLE }) });
if (exists && btable->simple(lower))
ast[0] = TOK_LPAREN | TF_SIMPLE;
advance(); /* TOK_RPAREN */
}
else
{
ast = ({ TOK_LPAREN, lower, PARSE(func_args) });
if (exists && SIMPLE(ast[2]) && btable->simple(lower))
ast[0] = TOK_LPAREN | TF_SIMPLE;
match(TOK_RPAREN);
}
if (! exists)
ERROR("Unknown built-in function: " + name);
return ast;
}
return ({ TOK_IDENTIFIER | TF_SIMPLE, name });
}
/* default: literal */
if (token == TOK_LBRACE) /* list or table */
{
advance(); /* TOK_LBRACE */
if (token == TOK_RBRACE)
{
advance(); /* TOK_RBRACE */
ast = ({ TOK_LIST | TF_SIMPLE });
}
else if (token == TOK_ASSOC)
{
advance(); /* TOK_ASSOC */
ast = ({ TOK_TABLE | TF_SIMPLE });
match(TOK_RBRACE);
}
else
{
ast = PARSE(list_elems);
match(TOK_RBRACE);
}
return ast;
}
if (token == TOK_LBRACKET) /* buffer value */
{
advance(); /* TOK_LBRACKET */
if (token == TOK_RBRACKET)
{
advance(); /* TOK_RBRACKET */
ast = ({ TOK_BUFFER | TF_SIMPLE });
}
else
{
ast = PARSE(buf_elems);
match(TOK_RBRACKET);
}
return ast;
}
if (! LITERALP())
PARSE_ERROR();
ast = ({ TOKEN_TYPE | TF_SIMPLE, TOKEN_VALUE });
advance();
return ast;
}
PRODUCTION(modifier)
{
mixed *ast;
switch (token)
{
case TOK_DOT:
advance();
if (IDENTIFIERP())
{
ast = ({ TOK_LIT_STR | TF_SIMPLE, TOKEN_VALUE });
advance();
return ast;
}
match(TOK_LPAREN);
ast = PARSE(expression);
match(TOK_RPAREN);
return ast;
case TOK_COLON:
advance();
if (IDENTIFIERP())
{
ast = ({ ({ TOK_LIT_STR | TF_SIMPLE, TOKEN_VALUE }), 0 });
advance();
}
else
{
match(TOK_LPAREN);
ast = ({ PARSE(expression), 0 });
match(TOK_RPAREN);
}
match(TOK_LPAREN);
if (token == TOK_RPAREN)
ast[1] = ({ TOK_LIST | TF_SIMPLE });
else
ast[1] = PARSE(func_args);
match(TOK_RPAREN);
return ast;
case TOK_LBRACKET:
advance();
ast = PARSE(expression);
if (token == TOK_RANGE)
{
advance(); /* TOK_RANGE */
ast = ({ TOK_RANGE, ast, PARSE(expression) });
if (SIMPLE(ast[1]) && SIMPLE(ast[2]))
ast[0] |= TF_SIMPLE;
}
match(TOK_RBRACKET);
return ast;
}
error("PARSE(modifier) failed");
}
PRODUCTION(func_args)
{
mixed *list;
int simple;
list = ({ TOK_LIST });
simple = 1;
while (1)
{
mixed *elt;
if (token == TOK_SPLICE)
{
advance(); /* TOK_SPLICE */
elt = ({ TOK_SPLICE, PARSE(expression) });
if (SIMPLE(elt[1]))
elt[0] = TOK_SPLICE | TF_SIMPLE;
}
else
elt = PARSE(expression);
if (! SIMPLE(elt))
simple = 0;
list += ({ elt });
if (token != TOK_COMMA)
break;
advance(); /* TOK_COMMA */
}
if (simple)
list[0] = TOK_LIST | TF_SIMPLE;
return list;
}
PRODUCTION(list_elems)
{
mixed *list;
int type, simple;
type = TOK_AMBAGGR;
list = ({ 0 });
simple = 1;
while (1)
{
mixed *elt;
if (token == TOK_SPLICE)
{
advance(); /* TOK_SPLICE */
elt = ({ TOK_SPLICE, PARSE(expression) });
if (SIMPLE(elt[1]))
elt[0] = TOK_SPLICE | TF_SIMPLE;
}
else
{
elt = PARSE(expression);
if (token == TOK_ASSOC)
{
if (type == TOK_LIST)
ERROR("Bad table structure");
type = TOK_TABLE;
advance(); /* TOK_ASSOC */
elt = ({ TOK_ASSOC, elt, PARSE(expression) });
if (SIMPLE(elt[1]) && SIMPLE(elt[2]))
elt[0] = TOK_ASSOC | TF_SIMPLE;
}
else
{
if (type == TOK_TABLE)
ERROR("Bad table structure");
type = TOK_LIST;
}
}
if (! SIMPLE(elt))
simple = 0;
list += ({ elt });
if (token != TOK_COMMA)
break;
advance(); /* TOK_COMMA */
}
if (simple)
type |= TF_SIMPLE;
list[0] = type;
return list;
}
PRODUCTION(buf_elems)
{
mixed *elems;
int simple;
elems = ({ TOK_BUFFER });
simple = 1;
while (1)
{
mixed *elt;
if (token == TOK_SPLICE)
{
advance(); /* TOK_SPLICE */
elt = ({ TOK_SPLICE, PARSE(expression) });
if (SIMPLE(elt[1]))
elt[0] = TOK_SPLICE | TF_SIMPLE;
}
else
elt = PARSE(expression);
if (! SIMPLE(elt))
simple = 0;
elems += ({ elt });
if (token != TOK_COMMA)
break;
advance(); /* TOK_COMMA */
}
if (simple)
elems[0] = TOK_BUFFER | TF_SIMPLE;
return elems;
}