/******************************************************************************
**
** Lexer - a package with procedures to get tokens from a zone file and place
** all pertinant information about it in the token record. The
** reason this is used is to provide some abstraction between the
** text of the file and the parser. Only these functions have to
** worry about file IO and string formats, while the parser gets
** all file information through the lexer. This provides some
** abstraction between the file and the parser, which will make
** the parser less complex in the long run. All token records
** should be freed from memory after they are used to save on memory
** usage by the generate function
**
** Functions provided:
** get_token - function that gets a token from a file and returns a
** token record,
** defined in gen.h
** xfopen - used to open a file for lexing
** free_token - removes the token or strings of tokens (in the case of
** a description or examine) from memory
**
**
******************************************************************************/
#define LEXER_C
#include <stdio.h>
#include <ctype.h>
#include <stdlib.h>
#include <string.h>
#include "gen.h"
#include "events.h"
int theline = 1; /* keeps track of the line the file is on */
token_record *new_stack; /* holds a stack of tokens, used to keep track of for
freeing later */
token_record *old_stack; /* a stack of older tokens */
int num_in_stack; /* the number in the new stack */
/********************************************************************************
**
** lowercase - converts a string to entirely lowercase
**
** Parameters: str - the string to convert
**
** ret - same string passed in, but in lowercase
**
******************************************************************************/
char *xlowercase (char *str, int thesize)
{ char *oldstring;
char *newstring;
char *returnstr;
newstring = (char *) memAlloc(char,thesize,REC_CHAR);
returnstr = newstring;
for (oldstring = str; *oldstring != '\0'; oldstring++)
{
if (isupper (*oldstring))
*newstring = tolower (*oldstring);
else
*newstring = *oldstring;
newstring++;
}
*newstring = '\0';
return returnstr;
}
/********************************************************************************
**
** uppercase - converts a string to entirely uppercase
**
** Parameters: str - the string to convert
**
** ret - same string passed in, but in lowercase
**
*******************************************************************************/
char * uppercase (char *str)
{ char *p;
for (p = str; *p; p++)
if (islower (*p))
*p = toupper (*p);
return str;
}
/********************************************************************************
**
** lowercase - converts a string to entirely lowercase
**
** Parameters: str - the string to convert
**
** ret - same string passed in, but in lowercase
**
*******************************************************************************/
char * lowercase (char *str)
{ char *p;
for (p = str; *p; p++)
if (isupper (*p))
*p = tolower (*p);
return str;
}
/*******************************************************************************
**
** free_token - frees up the memory a token or string of tokens were occupying
**
** Parameters: tokenptr - the token record to free
**
**
*******************************************************************************/
void free_token(token_record *tokenptr)
{
if (tokenptr == NULL)
return;
if (tokenptr->more != NULL)
free_token(tokenptr->more);
mem_usage -= sizeof(tokenptr);
xfree(tokenptr,REC_TOKEN);
tokenptr = NULL;
}
/*******************************************************************************
**
** free_stack - frees a stack from memory
**
** Parameters: tokenptr - the token record to add
**
**
*******************************************************************************/
void free_stack(token_record *tokenptr)
{
token_record *tmp_ptr;
token_record *free_ptr;
tmp_ptr = tokenptr;
tokenptr = NULL;
while (tmp_ptr != NULL)
{
free_ptr = tmp_ptr;
tmp_ptr = tmp_ptr->next_tok;
free_token(free_ptr);
}
}
/*******************************************************************************
**
** add_to_stack - adds a token to the new stack
**
** Parameters: tokenptr - the token record to add
**
**
*******************************************************************************/
void add_to_stack(token_record *tokenptr)
{
token_record *tmp_ptr;
tmp_ptr = new_stack;
if (tmp_ptr == NULL)
{
new_stack = tokenptr;
new_stack->next_tok = NULL;
num_in_stack=1;
return;
}
while (tmp_ptr->next_tok != NULL)
tmp_ptr = tmp_ptr->next_tok;
tokenptr->next_tok = NULL;
tmp_ptr->next_tok = tokenptr;
num_in_stack++;
}
/*******************************************************************************
**
** get_ttype - returns the token type given a certain string
**
** Parameters: tokenstr - the pointer to the token string
**
** ret - the integer fitting with the token defines found in gen.h
**
**
******************************************************************************/
int get_ttype(char *tokenstr, Boolean get_event)
{
char *tempstr;
int the_last;
int i; /* used in for loop */
tempstr = xlowercase(tokenstr, TOKENSTRLEN);
if (get_event)
{ int event_id;
if ((event_id = find_event_num(tempstr)) >= 0)
{ xfree(tempstr,REC_CHAR);
return event_id;
}
}
the_last = (T_LAST-1);
for (i=0; i < the_last; i++)
{ /* I made it casecompare, because i know i will be as silly to put some
* tokens with cases in the table - Marty
*/
if (!strcasecmp(TokenNames[i], tempstr))
{
/* printf("yes!\n"); */
xfree(tempstr,REC_CHAR);
return (i+1);
}
}
if (!isalnum(*tokenstr))
return T_OTHER;
if (!strcasecmp(tokenstr, "true") || !strcasecmp(tokenstr, "false"))
return T_BOOLEAN;
xfree(tempstr,REC_CHAR);
return T_IDENTIFIER;
}
/********************************************************************************
**
** xfopen - opens a read file, as well as resets globals for reading. This
** should be used to open a file that is intended for the lexer to
** parse through. You should only have one file open at a time
** for use by the function get_token or else file line numbers
** will be all screwed up
**
** Parameters: filename - the string filename to open
**
** ret - the pointer to the file
**
**
******************************************************************************/
FILE *xfopen(char *filename)
{
num_in_stack = 0;
theline = 1;
return fopen(filename, "r");
}
/********************************************************************************
**
** goto_next_char - positions the file pointer right at the next character
**
** Parameters: thefile - the file pointer for the next
**
** ret - 1 for success, -1 for failure
**
**
*******************************************************************************/
int goto_next_char(FILE *thefile)
{
char thechar = '\r';
while ((thechar == '\r') || (thechar == '\t') || (thechar == ' ') ||
(thechar == '\n'))
{
if (thechar == '\n')
{
theline++;
}
if (feof(thefile))
return -1;
thechar = (char ) fgetc(thefile);
}
ungetc((int) thechar, thefile);
return 1;
}
/******************************************************************************
**
** check_next_char - returns the next char that isnt \t, \r, or space. This
** function also puts the char back in the stream to avoid
** disturbing it
**
** Parameters: thefile - the file pointer for the next
**
** ret - the character found, or space for end of file
**
**
******************************************************************************/
char check_next_char(FILE *thefile)
{
char thechar = '\r';
while ((thechar == '\r') || (thechar == '\t') || (thechar == ' ') ||
(thechar == '\n'))
{
if (feof(thefile))
return ' ';
thechar = (char ) fgetc(thefile);
}
ungetc((int) thechar, thefile);
return thechar;
}
/*******************************************************************************
**
** xend_of_line - checks if the next valid char is a newline
**
** Parameters: thefile - the file pointer for the next
**
** ret - 1 for it is end of line, 0 for not, -1 if it finds eof
**
**
******************************************************************************/
int xend_of_line(FILE *thefile)
{
char thechar = '\r';
while ((thechar == '\r') || (thechar == '\t') || (thechar == ' '))
{
if (feof(thefile))
return -1;
thechar = (char ) fgetc(thefile);
}
if (thechar == '\n')
{
ungetc((int) thechar, thefile);
return 1;
}
ungetc((int) thechar, thefile);
return 0;
}
/*******************************************************************************
**
** next_line - positions the file pointer at the beginning of the next line,
** ignoring all it reads in
**
** Parameters: thefile - the file pointer for the next
**
** ret - 1 for success, -1 for end of file reached
**
**
*******************************************************************************/
string_record *get_str_until(FILE *thefile, char until1, char until2)
{
string_record *temp_str;
string_record *str_ptr;
int count = 0;
char nextchar;
temp_str = (string_record * ) memAlloc (string_record,1,REC_STRING);
temp_str->the_string = (char *) memAlloc(char,TOKENSTRLEN,REC_CHAR);
temp_str->the_string[0] = '\0';
str_ptr = temp_str;
nextchar = (char) fgetc(thefile);
if (feof(thefile))
return temp_str;
while ((nextchar != until1) && (nextchar != until2))
{
str_ptr->the_string[count] = nextchar;
nextchar = (char) fgetc(thefile);
if (feof(thefile))
return temp_str;
if (nextchar == '\n')
{
theline++;
str_ptr->the_string[count+1] = '\0';
str_ptr->the_string = (char *)realloc(str_ptr->the_string,strlen(str_ptr->the_string) + 1);
str_ptr->nextstr = (string_record * ) memAlloc (string_record,1,REC_STRING);
str_ptr = str_ptr->nextstr;
str_ptr->the_string = (char *) memAlloc(char,TOKENSTRLEN,REC_CHAR);
str_ptr->the_string[0] = '\0';
count = -1;
nextchar = (char) fgetc(thefile);
if (feof(thefile))
return temp_str;
}
count++;
}
ungetc((int) nextchar, thefile);
str_ptr->the_string = (char *)realloc(str_ptr->the_string,strlen(str_ptr->the_string) + 1);
return temp_str;
}
/*******************************************************************************
**
** next_line - positions the file pointer at the beginning of the next line,
** ignoring all it reads in
**
** Parameters: thefile - the file pointer for the next
**
** ret - 1 for success, -1 for end of file reached
**
**
******************************************************************************/
int next_line(FILE *thefile)
{
char thechar = '\0';
while (thechar != '\n')
{
if (feof(thefile))
return -1;
thechar = (char ) fgetc(thefile);
}
theline++;
return 1;
}
/******************************************************************************
**
** get_token - gets the next token or description from the file and returns a
** token record containing the following:
** - the_type - the type of the token, all types defined in
** gen.h
** - the_string - the string of the token
** - line - the line number the token is found on
** - more - if until is defined and it gets more than one
** line, this will point to the next token
** record holding the next string
**
** Parameters: thefile - the file pointer, indicating where to get a token from
** until - gets a long line of strings until a specific
** character, like for descriptions
**
** ret - a pointer to a token record. If it was successful, the_type will
** contain a number greater than 0. If not, it will contain one of the
** following numbers:
** -1 = reached end of file
** -2 = number of strings too long (greater than 100) for the room
** description which probably signifies they forgot to close
** off a string
** -3 = invalid token - started with an integer, but had an alpha
** character in it such as 12bob. A number must be only a number.
** -4 = the string length exceeded TOKENSTRLEN (128?) without finding
** a \n
**
*******************************************************************************/
token_record *get_token(FILE *thefile, char until, Boolean get_event)
{
token_record *tokenptr; /* holds the struct to store the token info in */
token_record *strptr; /* Used to str. together tokens for a desc. list */
char nextchar; /* holds the next char. to examine from the file */
int counter = 0; /* counts the num. of chars in the next token */
int size_list = 0; /* checks size of linked list for > 100,
i.e. a problem */
int num_chars; /* number of chars gotten in the current string */
if (num_in_stack == 10)
{
free_stack(old_stack);
old_stack = new_stack;
new_stack = NULL;
num_in_stack = 0;
}
/* creates a new token structure */
tokenptr = (token_record * ) memAlloc(token_record,1,REC_TOKEN);
tokenptr->the_string[0] = '\0';
tokenptr->token_type = 0;
tokenptr->more = NULL;
tokenptr->line = 0;
/* if they passed in a file that was already at the end */
if (feof(thefile))
{
tokenptr->token_type = -1;
tokenptr->more = NULL;
add_to_stack(tokenptr);
return tokenptr;
}
/* get a character */
nextchar = (char ) fgetc(thefile);
if (until == '\n')
{
for ( ; ; )
{
tokenptr->line = theline;
/* if it is a newline */
if (nextchar == '\n')
{
theline++;
}
/* if it is a tab, carriage return, or space ignore it */
else if ((nextchar == '\r') || (nextchar == '\t') || (nextchar == ' '))
counter = 0;
/* if it is an identifier */
else if (isalpha(nextchar))
{
while (isalnum(nextchar) || (nextchar == '_') || (nextchar == '@'))
{
tokenptr->the_string[counter] = nextchar;
counter++;
nextchar = (char) fgetc(thefile);
}
ungetc((int) nextchar, thefile);
tokenptr->the_string[counter] = '\0';
tokenptr->token_type = get_ttype(tokenptr->the_string, get_event);
/* printf("type1: %d\n", tokenptr->token_type); */
tokenptr->more = NULL;
add_to_stack(tokenptr);
return tokenptr;
}
else if (isdigit(nextchar))
{
while (isdigit(nextchar))
{
tokenptr->the_string[counter] = nextchar;
counter++;
nextchar = (char) fgetc(thefile);
}
/* make sure we dont have a number first with alpha following somewhere, since
if the first character of a token is a number, we dont want any following
characters to be an alpha, like 12bob would be invalid. bob12 would be fine */
if (isalpha(nextchar))
{
tokenptr->token_type = -3;
tokenptr->more = NULL;
add_to_stack(tokenptr);
return tokenptr;
}
ungetc((int) nextchar, thefile);
tokenptr->the_string[counter] = '\0';
tokenptr->token_type = T_NUMERICAL;
tokenptr->more = NULL;
add_to_stack(tokenptr);
return tokenptr;
}
/* it must be some sort of non-identifer */
else
{
tokenptr->the_string[0] = nextchar;
tokenptr->the_string[1] = '\0';
tokenptr->token_type = get_ttype(tokenptr->the_string, get_event);
/* printf("type2: %d\n", tokenptr->token_type); */
tokenptr->more = NULL;
add_to_stack(tokenptr);
return tokenptr;
}
/* get another character */
nextchar = (char ) fgetc(thefile);
if (feof(thefile))
{
tokenptr->token_type = -1;
tokenptr->more = NULL;
add_to_stack(tokenptr);
return tokenptr;
}
}
}
/* they have specified an until character, get all strings until the until char */
else
{
num_chars = 0;
/* set strptr to the first token structure */
strptr = tokenptr;
/* while we dont get the until or end of file, keep getting characters */
while (!feof(thefile) && (nextchar != until))
{
size_list = 0;
strptr->line = theline;
/* if we hit a newline create a new token struct and attach it to the more element,
hence we have a linked list of strings */
if (nextchar == '\n')
{
theline++;
num_chars = 0;
nextchar = (char ) fgetc(thefile);
if (nextchar == until)
{
strptr->more = NULL;
}
else
{
size_list++;
strptr->the_string[counter] = '\0';
counter = 0;
strptr->more = (token_record * ) memAlloc(token_record,1,REC_TOKEN);
strptr = strptr->more;
/* the description is waaay too large, probably a forgotten 'until' character */
if (size_list > 100)
{
tokenptr->token_type = -2;
tokenptr->more = NULL;
add_to_stack(tokenptr);
return tokenptr;
}
}
}
/* if we hit the end of file, we allow it and just stop getting strings */
else if (!feof(thefile))
{
strptr->the_string[counter] = nextchar;
counter++;
nextchar = (char ) fgetc(thefile);
num_chars++;
if (num_chars >= TOKENSTRLEN)
{
tokenptr->token_type = -4;
tokenptr->more = NULL;
add_to_stack(tokenptr);
return tokenptr;
}
}
}
strptr->the_string[counter] = '\0';
strptr->token_type = T_STRING;
strptr->more = NULL;
add_to_stack(tokenptr);
return tokenptr;
}
return NULL;
}