/* * NAME: lex.c * DESCRIPTION: MOO code lexical analyzer */ inherit "/std/string"; # include <moo/data.h> # include <moo/errors.h> # include <moo/tokens.h> private string input; private int length, marker; private mapping moo_errors, moo_keywords; # define PARSE_ERROR "parse error" # define TOKEN(tok, step) do { marker += (step); return (tok); } while (0) # define ERROR(msg, step) (marker += (step), error("*" + msg)) /* * NAME: create() * DESCRIPTION: initialize tables */ static void create(void) { moo_errors = MOO_ERRORS; moo_keywords = MOO_KEYWORDS; } /* * NAME: set_input() * DESCRIPTION: initialize input text */ void set_input(string text) { length = strlen(input = text); marker = 0; } /* * NAME: advance() * DESCRIPTION: scan and return next token */ mixed advance(void) { int char; while (marker < length && ((char = input[marker]) == ' ' || char == '\n' || char == '\t')) ++marker; if (marker >= length) return TOK_EOF; switch (char) { case '(': case ')': case '[': case ']': case '-': case '*': case '?': case '+': case '@': case '~': case '%': case '{': case '}': case ',': case ';': case ':': case '$': return input[marker++]; case '/': if (marker < length - 1 && input[marker + 1] == '*') { int i; for (i = marker + 2; i < length - 1; ++i) { if (input[i] == '*' && input[i + 1] == '/') { mixed token; token = ({ TOK_COMMENT, input[marker .. i + 1] }); TOKEN(token, i + 2 - marker); } } ERROR("end of program while in a comment", i + 2 - marker); } else return input[marker++]; case '.': if (marker < length - 1 && input[marker + 1] == '.') TOKEN(TOK_RANGE, 2); else { int i; string str; i = marker + 1; while (i < length && input[i] >= '0' && input[i] <= '9') ++i; if (i == marker + 1) TOKEN(TOK_DOT, 1); if (i < length && (input[i] == 'e' || input[i] == 'E')) { ++i; if (i < length && (input[i] == '-' || input[i] == '+')) ++i; while (i < length && input[i] >= '0' && input[i] <= '9') ++i; } str = input[marker .. i - 1]; if (catch((float) str) != 0) ERROR("Overflow in floating point constant", i - marker); TOKEN( ({ TOK_LIT_FLT, str }), i - marker); } case '=': if (marker < length - 1 && input[marker + 1] == '=') TOKEN(TOK_EQUAL, 2); else TOKEN(TOK_ASSIGN, 1); case '!': if (marker < length - 1 && input[marker + 1] == '=') TOKEN(TOK_NEQUAL, 2); else TOKEN(TOK_BANG, 1); case '<': if (marker < length - 1 && input[marker + 1] == '=') TOKEN(TOK_LSEQUAL, 2); else TOKEN(TOK_LESS, 1); case '>': if (marker < length - 1 && input[marker + 1] == '=') TOKEN(TOK_GREQUAL, 2); else TOKEN(TOK_GREATER, 1); # if 0 case '%': if (marker < length - 1 && input[marker + 1] == '{') TOKEN(TOK_OPENBUF, 2); else TOKEN(TOK_PERCENT, 1); # endif case '&': if (marker < length - 1 && input[marker + 1] == '&') TOKEN(TOK_AND, 2); else ERROR(PARSE_ERROR, 1); case '|': if (marker < length - 1 && input[marker + 1] == '|') TOKEN(TOK_OR, 2); else TOKEN(TOK_PIPE, 1); case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': { int i, num; string str; i = marker; while (i < length && input[i] >= '0' && input[i] <= '9') ++i; if (i < length && ((input[i] == '.' && /* avoid ".." token */ (i + 1 == length || input[i + 1] != '.')) || input[i] == 'e' || input[i] == 'E')) { if (input[i] != 'e' && input[i] != 'E') { ++i; while (i < length && input[i] >= '0' && input[i] <= '9') ++i; } if (i < length && (input[i] == 'e' || input[i] == 'E')) { ++i; if (i < length && (input[i] == '-' || input[i] == '+')) ++i; while (i < length && input[i] >= '0' && input[i] <= '9') ++i; } str = input[marker .. i - 1]; # if 0 if (input[i - 1] == '.') str += "0"; # endif if (catch((float) str)) ERROR("Overflow in floating point constant", i - marker); TOKEN( ({ TOK_LIT_FLT, str }), i - marker); } else { sscanf(input[marker .. i - 1], "%d", num); TOKEN( ({ TOK_LIT_NUM, num }), i - marker); } } case '\"': { int i, j, sz; mixed token; string str; for (i = marker + 1; i < length && input[i] != '\"' && input[i] != '\n'; ++i) { if (input[i] == '\\') { ++i; continue; } } if (i == length || input[i] == '\n') ERROR("missing quote", i + 1 - marker); /* (i - marker == 1) ? "" : ... */ str = input[marker + 1 .. i - 1]; for (j = 0, sz = strlen(str); j < sz; ++j) { if ((str[j] == '\\' && str[j + 1] != '\\' && str[j + 1] != '\"') || (str[j] < ' ' && str[j] != '\t') || str[j] > '~') { str = str[.. j - 1] + str[j + 1 ..]; --j; --sz; } else if (str[j] == '\\') ++j; } token = ({ TOK_LIT_STR, str }); TOKEN(token, i + 1 - marker); } case '#': { int i, num; i = marker + 1; while (i < length && (input[i] == '-' || input[i] == ' ')) ++i; while (i < length && ((input[i] >= '0' && input[i] <= '9') || input[i] == ' ')) ++i; if (i == marker + 1) ERROR(PARSE_ERROR, i - marker); sscanf(input[marker + 1 .. i - 1], "%d", num); TOKEN( ({ TOK_LIT_OBJ, num }), i - marker); } default: if ((char >= 'a' && char <= 'z') || (char >= 'A' && char <= 'Z') || char == '_') { int i, id; string ident; mixed token; i = marker + 1; while (i < length && ((input[i] >= 'a' && input[i] <= 'z') || (input[i] >= 'A' && input[i] <= 'Z') || (input[i] >= '0' && input[i] <= '9') || input[i] == '_')) ++i; ident = input[marker .. i - 1]; if (id = moo_errors[toupper(ident)]) token = ({ TOK_LIT_ERR, id - 1 }); else if (id = moo_keywords[tolower(ident)]) token = id; else token = ({ TOK_IDENTIFIER, ident }); TOKEN(token, i - marker); } ERROR(PARSE_ERROR, 1); } }