/*
* NAME: lex.c
* DESCRIPTION: MOO code lexical analyzer
*/
inherit "/std/string";
# include <moo/data.h>
# include <moo/errors.h>
# include <moo/tokens.h>
private string input;
private int length, marker;
private mapping moo_errors, moo_keywords;
# define PARSE_ERROR "parse error"
# define TOKEN(tok, step) do { marker += (step); return (tok); } while (0)
# define ERROR(msg, step) (marker += (step), error("*" + msg))
/*
* NAME: create()
* DESCRIPTION: initialize tables
*/
static void create(void)
{
moo_errors = MOO_ERRORS;
moo_keywords = MOO_KEYWORDS;
}
/*
* NAME: set_input()
* DESCRIPTION: initialize input text
*/
void set_input(string text)
{
length = strlen(input = text);
marker = 0;
}
/*
* NAME: advance()
* DESCRIPTION: scan and return next token
*/
mixed advance(void)
{
int char;
while (marker < length &&
((char = input[marker]) == ' ' || char == '\n' || char == '\t'))
++marker;
if (marker >= length)
return TOK_EOF;
switch (char)
{
case '(': case ')': case '[': case ']': case '-':
case '*': case '?': case '+': case '@': case '~': case '%':
case '{': case '}': case ',': case ';': case ':': case '$':
return input[marker++];
case '/':
if (marker < length - 1 && input[marker + 1] == '*')
{
int i;
for (i = marker + 2; i < length - 1; ++i)
{
if (input[i] == '*' && input[i + 1] == '/')
{
mixed token;
token = ({ TOK_COMMENT, input[marker .. i + 1] });
TOKEN(token, i + 2 - marker);
}
}
ERROR("end of program while in a comment", i + 2 - marker);
}
else
return input[marker++];
case '.':
if (marker < length - 1 && input[marker + 1] == '.')
TOKEN(TOK_RANGE, 2);
else
{
int i;
string str;
i = marker + 1;
while (i < length && input[i] >= '0' && input[i] <= '9')
++i;
if (i == marker + 1)
TOKEN(TOK_DOT, 1);
if (i < length && (input[i] == 'e' || input[i] == 'E'))
{
++i;
if (i < length && (input[i] == '-' || input[i] == '+'))
++i;
while (i < length && input[i] >= '0' && input[i] <= '9')
++i;
}
str = input[marker .. i - 1];
if (catch((float) str) != 0)
ERROR("Overflow in floating point constant", i - marker);
TOKEN( ({ TOK_LIT_FLT, str }), i - marker);
}
case '=':
if (marker < length - 1 && input[marker + 1] == '=')
TOKEN(TOK_EQUAL, 2);
else
TOKEN(TOK_ASSIGN, 1);
case '!':
if (marker < length - 1 && input[marker + 1] == '=')
TOKEN(TOK_NEQUAL, 2);
else
TOKEN(TOK_BANG, 1);
case '<':
if (marker < length - 1 && input[marker + 1] == '=')
TOKEN(TOK_LSEQUAL, 2);
else
TOKEN(TOK_LESS, 1);
case '>':
if (marker < length - 1 && input[marker + 1] == '=')
TOKEN(TOK_GREQUAL, 2);
else
TOKEN(TOK_GREATER, 1);
# if 0
case '%':
if (marker < length - 1 && input[marker + 1] == '{')
TOKEN(TOK_OPENBUF, 2);
else
TOKEN(TOK_PERCENT, 1);
# endif
case '&':
if (marker < length - 1 && input[marker + 1] == '&')
TOKEN(TOK_AND, 2);
else
ERROR(PARSE_ERROR, 1);
case '|':
if (marker < length - 1 && input[marker + 1] == '|')
TOKEN(TOK_OR, 2);
else
TOKEN(TOK_PIPE, 1);
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
{
int i, num;
string str;
i = marker;
while (i < length && input[i] >= '0' && input[i] <= '9')
++i;
if (i < length && ((input[i] == '.' &&
/* avoid ".." token */
(i + 1 == length || input[i + 1] != '.')) ||
input[i] == 'e' || input[i] == 'E'))
{
if (input[i] != 'e' && input[i] != 'E')
{
++i;
while (i < length && input[i] >= '0' && input[i] <= '9')
++i;
}
if (i < length && (input[i] == 'e' || input[i] == 'E'))
{
++i;
if (i < length && (input[i] == '-' || input[i] == '+'))
++i;
while (i < length && input[i] >= '0' && input[i] <= '9')
++i;
}
str = input[marker .. i - 1];
# if 0
if (input[i - 1] == '.')
str += "0";
# endif
if (catch((float) str))
ERROR("Overflow in floating point constant", i - marker);
TOKEN( ({ TOK_LIT_FLT, str }), i - marker);
}
else
{
sscanf(input[marker .. i - 1], "%d", num);
TOKEN( ({ TOK_LIT_NUM, num }), i - marker);
}
}
case '\"':
{
int i, j, sz;
mixed token;
string str;
for (i = marker + 1;
i < length && input[i] != '\"' && input[i] != '\n';
++i)
{
if (input[i] == '\\')
{
++i;
continue;
}
}
if (i == length || input[i] == '\n')
ERROR("missing quote", i + 1 - marker);
/* (i - marker == 1) ? "" : ... */
str = input[marker + 1 .. i - 1];
for (j = 0, sz = strlen(str); j < sz; ++j)
{
if ((str[j] == '\\' &&
str[j + 1] != '\\' && str[j + 1] != '\"') ||
(str[j] < ' ' && str[j] != '\t') || str[j] > '~')
{
str = str[.. j - 1] + str[j + 1 ..];
--j; --sz;
}
else if (str[j] == '\\')
++j;
}
token = ({ TOK_LIT_STR, str });
TOKEN(token, i + 1 - marker);
}
case '#':
{
int i, num;
i = marker + 1;
while (i < length && (input[i] == '-' || input[i] == ' '))
++i;
while (i < length &&
((input[i] >= '0' && input[i] <= '9') || input[i] == ' '))
++i;
if (i == marker + 1)
ERROR(PARSE_ERROR, i - marker);
sscanf(input[marker + 1 .. i - 1], "%d", num);
TOKEN( ({ TOK_LIT_OBJ, num }), i - marker);
}
default:
if ((char >= 'a' && char <= 'z') ||
(char >= 'A' && char <= 'Z') ||
char == '_')
{
int i, id;
string ident;
mixed token;
i = marker + 1;
while (i < length && ((input[i] >= 'a' && input[i] <= 'z') ||
(input[i] >= 'A' && input[i] <= 'Z') ||
(input[i] >= '0' && input[i] <= '9') ||
input[i] == '_'))
++i;
ident = input[marker .. i - 1];
if (id = moo_errors[toupper(ident)])
token = ({ TOK_LIT_ERR, id - 1 });
else if (id = moo_keywords[tolower(ident)])
token = id;
else
token = ({ TOK_IDENTIFIER, ident });
TOKEN(token, i - marker);
}
ERROR(PARSE_ERROR, 1);
}
}