/* * NAME: parser.c * DESCRIPTION: MOO code parser; returns AST */ # define DEBUG 0 inherit "/std/lex"; # if DEBUG inherit "/std/vartext"; # else # define var2str(arg) "" # endif # include <objects.h> # include <moo/data.h> # include <moo/tokens.h> mixed token; /* current token */ int line; /* current line number */ object btable; /* table of builtin functions */ string *errors; /* list of resulting errors */ # define ERROR(msg) (errors += ({ "Line " + line + ": " + (msg) }) ) # define PARSE_ERROR() (ERROR("parse error"), error("Parse error")) # define TOKEN_TYPE token[0] # define TOKEN_VALUE token[1] # define IDENTIFIERP() (arrayp(token) && TOKEN_TYPE == TOK_IDENTIFIER) # define LITERALP() (arrayp(token) && TOKEN_TYPE != TOK_IDENTIFIER) # define COMMENTP() (arrayp(token) && TOKEN_TYPE == TOK_COMMENT) # define PARSE(x) p_##x() # define PRODUCTION(x) static mixed *p_##x(void) /* * NAME: advance() * DESCRIPTION: retrieve the next token */ static void advance(void) { string errmsg; do { if (errmsg = catch(token = ::advance())) { if (errmsg[0] == '*') ERROR(errmsg[1 ..]); else error(errmsg); /* more serious */ } } while (errmsg || COMMENTP()); } /* * NAME: match() * DESCRIPTION: expect tokens */ private void match(int what) { if (token == what) advance(); else PARSE_ERROR(); } PRODUCTION(program); PRODUCTION(statement); PRODUCTION(if_condition); PRODUCTION(for_loop); PRODUCTION(while_loop); PRODUCTION(fork_statements); PRODUCTION(return); PRODUCTION(expression); PRODUCTION(value); PRODUCTION(conditional); PRODUCTION(logical); PRODUCTION(relational); PRODUCTION(term); PRODUCTION(factor); PRODUCTION(unary); PRODUCTION(primary); PRODUCTION(modifier); PRODUCTION(func_args); PRODUCTION(list_elems); PRODUCTION(buf_elems); /* * NAME: main() * DESCRIPTION: parse a MOO program */ mixed *main(string src) { mixed *code, *elt; string errmsg; set_input(src); token = 0; line = 1; errors = ({ }); if (! btable) btable = load_object(BTABLE); advance(); code = ({ }); while (token != TOK_EOF) { if (errmsg = catch(elt = PARSE(statement))) { if (sizeof(errors)) return ({ 0, errors }); else error(errmsg); /* more serious */ } if (elt) code += ({ elt }); } if (sizeof(errors)) return ({ 0, errors }); else return ({ 1, code }); } PRODUCTION(statement) { mixed *stmt; switch (token) { case TOK_IF: return PARSE(if_condition); case TOK_FOR: return PARSE(for_loop); case TOK_WHILE: return PARSE(while_loop); case TOK_FORK: return PARSE(fork_statements); case TOK_RETURN: return PARSE(return); case TOK_SEMICOLON: advance(); return 0; case TOK_EOF: error("Unexpected end of input"); return 0; default: stmt = PARSE(expression); match(TOK_SEMICOLON); ++line; return stmt; } } PRODUCTION(if_condition) { mixed *ast, *prog, *elt; advance(); /* TOK_IF */ match(TOK_LPAREN); ast = ({ TOK_IF, PARSE(expression) }); match(TOK_RPAREN); ++line; prog = ({ }); while (token != TOK_ELSEIF && token != TOK_ELSE && token != TOK_ENDIF && token != TOK_EOF) { if (elt = PARSE(statement)) prog += ({ elt }); } ast += ({ prog }); while (token == TOK_ELSEIF) { advance(); /* TOK_ELSEIF */ match(TOK_LPAREN); ast += ({ TOK_ELSEIF, PARSE(expression) }); match(TOK_RPAREN); ++line; prog = ({ }); while (token != TOK_ELSEIF && token != TOK_ELSE && token != TOK_ENDIF && token != TOK_EOF) { if (elt = PARSE(statement)) prog += ({ elt }); } ast += ({ prog }); } if (token == TOK_ELSE) { advance(); /* TOK_ELSE */ ++line; ast += ({ TOK_ELSE }); prog = ({ }); while (token != TOK_ENDIF && token != TOK_EOF) { if (elt = PARSE(statement)) prog += ({ elt }); } ast += ({ prog }); } match(TOK_ENDIF); ++line; return ast; } PRODUCTION(for_loop) { mixed *ast, *prog, *elt; mixed ident; advance(); /* TOK_FOR */ if (! IDENTIFIERP()) PARSE_ERROR(); ident = TOKEN_VALUE; advance(); if (token == TOK_ASSOC) { advance(); /* TOK_ASSOC */ if (! IDENTIFIERP()) PARSE_ERROR(); ast = ({ TOK_FOR, ({ ident, TOKEN_VALUE }), 0, 0 }); advance(); match(TOK_IN); match(TOK_LPAREN); ast[2] = ({ TOK_LPAREN, PARSE(expression) }); match(TOK_RPAREN); } else { ast = ({ TOK_FOR, ident, 0, 0 }); match(TOK_IN); if (token == TOK_LPAREN) { advance(); /* TOK_LPAREN */ ast[2] = ({ TOK_LPAREN, PARSE(expression) }); match(TOK_RPAREN); } else { mixed *lo, *hi; match(TOK_LBRACKET); lo = PARSE(expression); match(TOK_RANGE); hi = PARSE(expression); match(TOK_RBRACKET); ast[2] = ({ TOK_RANGE, lo, hi }); } } ++line; prog = ({ }); while (token != TOK_ENDFOR && token != TOK_EOF) { if (elt = PARSE(statement)) prog += ({ elt }); } ast[3] = prog; match(TOK_ENDFOR); ++line; return ast; } PRODUCTION(while_loop) { mixed *ast, *prog, *elt; advance(); /* TOK_WHILE */ match(TOK_LPAREN); ast = ({ TOK_WHILE, PARSE(expression) }); match(TOK_RPAREN); ++line; prog = ({ }); while (token != TOK_ENDWHILE && token != TOK_EOF) { if (elt = PARSE(statement)) prog += ({ elt }); } ast += ({ prog }); match(TOK_ENDWHILE); ++line; return ast; } PRODUCTION(fork_statements) { mixed *ast, *prog, *elt; advance(); /* TOK_FORK */ if (token != TOK_LPAREN) { if (! IDENTIFIERP()) PARSE_ERROR(); ast = ({ TOK_FORK, TOKEN_VALUE }); advance(); } else ast = ({ TOK_FORK, 0 }); match(TOK_LPAREN); ast += ({ PARSE(expression) }); match(TOK_RPAREN); ++line; prog = ({ }); while (token != TOK_ENDFORK && token != TOK_EOF) { if (elt = PARSE(statement)) prog += ({ elt }); } ast += ({ prog }); match(TOK_ENDFORK); ++line; return ast; } PRODUCTION(return) { mixed *ast; advance(); /* TOK_RETURN */ if (token != TOK_SEMICOLON) ast = ({ TOK_RETURN, PARSE(expression) }); else ast = ({ TOK_RETURN }); match(TOK_SEMICOLON); ++line; return ast; } PRODUCTION(expression) { mixed *ast; ast = PARSE(value); if (token == TOK_ASSIGN) { mixed *trace; advance(); /* TOK_ASSIGN */ /* verify that we read an lvalue */ trace = ast; if (TAG(trace) == TOK_LBRACKET && TAG(trace[2]) == TOK_RANGE) trace = trace[1]; while (TAG(trace) == TOK_LBRACKET && TAG(trace[2]) != TOK_RANGE) trace = trace[1]; ast = ({ TOK_ASSIGN, ast, PARSE(expression) }); if (SIMPLE(ast[1]) && SIMPLE(ast[2])) ast[0] = TOK_ASSIGN | TF_SIMPLE; if (TAG(trace) != TOK_IDENTIFIER && TAG(trace) != TOK_DOT) ERROR("Illegal expression on left side of assignment."); } return ast; } PRODUCTION(value) { mixed *ast; ast = PARSE(conditional); while (token == TOK_QUESTION) { advance(); /* TOK_QUESTION */ ast = ({ TOK_QUESTION, ast, PARSE(expression), 0 }); match(TOK_PIPE); ast[3] = PARSE(conditional); if (SIMPLE(ast[1]) && SIMPLE(ast[2]) && SIMPLE(ast[3])) ast[0] = TOK_QUESTION | TF_SIMPLE; } return ast; } PRODUCTION(conditional) { mixed *ast; ast = PARSE(logical); while (token == TOK_AND || token == TOK_OR) { ast = ({ token, ast, 0 }); advance(); ast[2] = PARSE(logical); if (SIMPLE(ast[1]) && SIMPLE(ast[2])) ast[0] |= TF_SIMPLE; } return ast; } PRODUCTION(logical) { mixed *ast; ast = PARSE(relational); while (token == TOK_EQUAL || token == TOK_NEQUAL || token == TOK_LESS || token == TOK_LSEQUAL || token == TOK_GREATER || token == TOK_GREQUAL || token == TOK_IN /* || token == TOK_ASSOC */ ) { ast = ({ token, ast, 0 }); advance(); ast[2] = PARSE(relational); if (SIMPLE(ast[1]) && SIMPLE(ast[2])) ast[0] |= TF_SIMPLE; } return ast; } PRODUCTION(relational) { mixed *ast; ast = PARSE(term); while (token == TOK_PLUS || token == TOK_MINUS) { ast = ({ token, ast, 0 }); advance(); ast[2] = PARSE(term); if (SIMPLE(ast[1]) && SIMPLE(ast[2])) ast[0] |= TF_SIMPLE; } return ast; } PRODUCTION(term) { mixed *ast; ast = PARSE(factor); while (token == TOK_TIMES || token == TOK_DIVIDE || token == TOK_PERCENT) { ast = ({ token, ast, 0 }); advance(); ast[2] = PARSE(factor); if (SIMPLE(ast[1]) && SIMPLE(ast[2])) ast[0] |= TF_SIMPLE; } return ast; } PRODUCTION(factor) { mixed *ast; if (token == TOK_BANG) { advance(); /* TOK_BANG */ ast = ({ TOK_BANG, PARSE(factor) }); if (SIMPLE(ast[1])) ast[0] = TOK_BANG | TF_SIMPLE; return ast; } else if (token == TOK_MINUS) { advance(); /* TOK_MINUS */ if (arrayp(token) && TOKEN_TYPE == TOK_LIT_NUM) { TOKEN_VALUE = -TOKEN_VALUE; return PARSE(unary); } else { ast = ({ TOK_U_MINUS, PARSE(factor) }); if (SIMPLE(ast[1])) ast[0] = TOK_U_MINUS | TF_SIMPLE; return ast; } } else return PARSE(unary); } PRODUCTION(unary) { mixed *ast; ast = PARSE(primary); while (token == TOK_DOT || token == TOK_COLON || token == TOK_LBRACKET) { ast = ({ token, ast, 0 }); ast[2] = PARSE(modifier); if (SIMPLE(ast[1])) { switch (ast[0]) { case TOK_DOT: if (SIMPLE(ast[2])) ast[0] = TOK_DOT | TF_SIMPLE; break; case TOK_LBRACKET: if ((TAG(ast[2]) == TOK_RANGE && SIMPLE(ast[2][1]) && SIMPLE(ast[2][2])) || (TAG(ast[2]) != TOK_RANGE && SIMPLE(ast[2]))) ast[0] = TOK_LBRACKET | TF_SIMPLE; break; } } } return ast; } PRODUCTION(primary) { mixed *ast; if (token == TOK_LPAREN) { advance(); /* TOK_LPAREN */ ast = PARSE(expression); match(TOK_RPAREN); return ast; } if (token == TOK_DOLLAR) { advance(); /* TOK_DOLLAR */ if (! IDENTIFIERP()) PARSE_ERROR(); ast = ({ TOK_DOT | TF_SIMPLE, ({ TOK_LIT_OBJ | TF_SIMPLE, 0 }), ({ TOK_LIT_STR | TF_SIMPLE, TOKEN_VALUE }) }); advance(); return ast; } if (IDENTIFIERP()) /* variable, builtin, or beginning of verb/property */ { string name; name = TOKEN_VALUE; advance(); if (token == TOK_LPAREN) /* builtin */ { string lower; int exists; exists = btable->exists(lower = tolower(name)); advance(); if (token == TOK_RPAREN) { ast = ({ TOK_LPAREN, lower, ({ TOK_LIST | TF_SIMPLE }) }); if (exists && btable->simple(lower)) ast[0] = TOK_LPAREN | TF_SIMPLE; advance(); /* TOK_RPAREN */ } else { ast = ({ TOK_LPAREN, lower, PARSE(func_args) }); if (exists && SIMPLE(ast[2]) && btable->simple(lower)) ast[0] = TOK_LPAREN | TF_SIMPLE; match(TOK_RPAREN); } if (! exists) ERROR("Unknown built-in function: " + name); return ast; } return ({ TOK_IDENTIFIER | TF_SIMPLE, name }); } /* default: literal */ if (token == TOK_LBRACE) /* list or table */ { advance(); /* TOK_LBRACE */ if (token == TOK_RBRACE) { advance(); /* TOK_RBRACE */ ast = ({ TOK_LIST | TF_SIMPLE }); } else if (token == TOK_ASSOC) { advance(); /* TOK_ASSOC */ ast = ({ TOK_TABLE | TF_SIMPLE }); match(TOK_RBRACE); } else { ast = PARSE(list_elems); match(TOK_RBRACE); } return ast; } if (token == TOK_LBRACKET) /* buffer value */ { advance(); /* TOK_LBRACKET */ if (token == TOK_RBRACKET) { advance(); /* TOK_RBRACKET */ ast = ({ TOK_BUFFER | TF_SIMPLE }); } else { ast = PARSE(buf_elems); match(TOK_RBRACKET); } return ast; } if (! LITERALP()) PARSE_ERROR(); ast = ({ TOKEN_TYPE | TF_SIMPLE, TOKEN_VALUE }); advance(); return ast; } PRODUCTION(modifier) { mixed *ast; switch (token) { case TOK_DOT: advance(); if (IDENTIFIERP()) { ast = ({ TOK_LIT_STR | TF_SIMPLE, TOKEN_VALUE }); advance(); return ast; } match(TOK_LPAREN); ast = PARSE(expression); match(TOK_RPAREN); return ast; case TOK_COLON: advance(); if (IDENTIFIERP()) { ast = ({ ({ TOK_LIT_STR | TF_SIMPLE, TOKEN_VALUE }), 0 }); advance(); } else { match(TOK_LPAREN); ast = ({ PARSE(expression), 0 }); match(TOK_RPAREN); } match(TOK_LPAREN); if (token == TOK_RPAREN) ast[1] = ({ TOK_LIST | TF_SIMPLE }); else ast[1] = PARSE(func_args); match(TOK_RPAREN); return ast; case TOK_LBRACKET: advance(); ast = PARSE(expression); if (token == TOK_RANGE) { advance(); /* TOK_RANGE */ ast = ({ TOK_RANGE, ast, PARSE(expression) }); if (SIMPLE(ast[1]) && SIMPLE(ast[2])) ast[0] |= TF_SIMPLE; } match(TOK_RBRACKET); return ast; } error("PARSE(modifier) failed"); } PRODUCTION(func_args) { mixed *list; int simple; list = ({ TOK_LIST }); simple = 1; while (1) { mixed *elt; if (token == TOK_SPLICE) { advance(); /* TOK_SPLICE */ elt = ({ TOK_SPLICE, PARSE(expression) }); if (SIMPLE(elt[1])) elt[0] = TOK_SPLICE | TF_SIMPLE; } else elt = PARSE(expression); if (! SIMPLE(elt)) simple = 0; list += ({ elt }); if (token != TOK_COMMA) break; advance(); /* TOK_COMMA */ } if (simple) list[0] = TOK_LIST | TF_SIMPLE; return list; } PRODUCTION(list_elems) { mixed *list; int type, simple; type = TOK_AMBAGGR; list = ({ 0 }); simple = 1; while (1) { mixed *elt; if (token == TOK_SPLICE) { advance(); /* TOK_SPLICE */ elt = ({ TOK_SPLICE, PARSE(expression) }); if (SIMPLE(elt[1])) elt[0] = TOK_SPLICE | TF_SIMPLE; } else { elt = PARSE(expression); if (token == TOK_ASSOC) { if (type == TOK_LIST) ERROR("Bad table structure"); type = TOK_TABLE; advance(); /* TOK_ASSOC */ elt = ({ TOK_ASSOC, elt, PARSE(expression) }); if (SIMPLE(elt[1]) && SIMPLE(elt[2])) elt[0] = TOK_ASSOC | TF_SIMPLE; } else { if (type == TOK_TABLE) ERROR("Bad table structure"); type = TOK_LIST; } } if (! SIMPLE(elt)) simple = 0; list += ({ elt }); if (token != TOK_COMMA) break; advance(); /* TOK_COMMA */ } if (simple) type |= TF_SIMPLE; list[0] = type; return list; } PRODUCTION(buf_elems) { mixed *elems; int simple; elems = ({ TOK_BUFFER }); simple = 1; while (1) { mixed *elt; if (token == TOK_SPLICE) { advance(); /* TOK_SPLICE */ elt = ({ TOK_SPLICE, PARSE(expression) }); if (SIMPLE(elt[1])) elt[0] = TOK_SPLICE | TF_SIMPLE; } else elt = PARSE(expression); if (! SIMPLE(elt)) simple = 0; elems += ({ elt }); if (token != TOK_COMMA) break; advance(); /* TOK_COMMA */ } if (simple) elems[0] = TOK_BUFFER | TF_SIMPLE; return elems; }