/****************************************************************************** ** ** Lexer - a package with procedures to get tokens from a zone file and place ** all pertinant information about it in the token record. The ** reason this is used is to provide some abstraction between the ** text of the file and the parser. Only these functions have to ** worry about file IO and string formats, while the parser gets ** all file information through the lexer. This provides some ** abstraction between the file and the parser, which will make ** the parser less complex in the long run. All token records ** should be freed from memory after they are used to save on memory ** usage by the generate function ** ** Functions provided: ** get_token - function that gets a token from a file and returns a ** token record, ** defined in gen.h ** xfopen - used to open a file for lexing ** free_token - removes the token or strings of tokens (in the case of ** a description or examine) from memory ** ** ******************************************************************************/ #define LEXER_C #include <stdio.h> #include <ctype.h> #include <stdlib.h> #include <string.h> #include "gen.h" #include "events.h" int theline = 1; /* keeps track of the line the file is on */ token_record *new_stack; /* holds a stack of tokens, used to keep track of for freeing later */ token_record *old_stack; /* a stack of older tokens */ int num_in_stack; /* the number in the new stack */ /******************************************************************************** ** ** lowercase - converts a string to entirely lowercase ** ** Parameters: str - the string to convert ** ** ret - same string passed in, but in lowercase ** ******************************************************************************/ char *xlowercase (char *str, int thesize) { char *oldstring; char *newstring; char *returnstr; newstring = (char *) memAlloc(char,thesize,REC_CHAR); returnstr = newstring; for (oldstring = str; *oldstring != '\0'; oldstring++) { if (isupper (*oldstring)) *newstring = tolower (*oldstring); else *newstring = *oldstring; newstring++; } *newstring = '\0'; return returnstr; } /******************************************************************************** ** ** uppercase - converts a string to entirely uppercase ** ** Parameters: str - the string to convert ** ** ret - same string passed in, but in lowercase ** *******************************************************************************/ char * uppercase (char *str) { char *p; for (p = str; *p; p++) if (islower (*p)) *p = toupper (*p); return str; } /******************************************************************************** ** ** lowercase - converts a string to entirely lowercase ** ** Parameters: str - the string to convert ** ** ret - same string passed in, but in lowercase ** *******************************************************************************/ char * lowercase (char *str) { char *p; for (p = str; *p; p++) if (isupper (*p)) *p = tolower (*p); return str; } /******************************************************************************* ** ** free_token - frees up the memory a token or string of tokens were occupying ** ** Parameters: tokenptr - the token record to free ** ** *******************************************************************************/ void free_token(token_record *tokenptr) { if (tokenptr == NULL) return; if (tokenptr->more != NULL) free_token(tokenptr->more); mem_usage -= sizeof(tokenptr); xfree(tokenptr,REC_TOKEN); tokenptr = NULL; } /******************************************************************************* ** ** free_stack - frees a stack from memory ** ** Parameters: tokenptr - the token record to add ** ** *******************************************************************************/ void free_stack(token_record *tokenptr) { token_record *tmp_ptr; token_record *free_ptr; tmp_ptr = tokenptr; tokenptr = NULL; while (tmp_ptr != NULL) { free_ptr = tmp_ptr; tmp_ptr = tmp_ptr->next_tok; free_token(free_ptr); } } /******************************************************************************* ** ** add_to_stack - adds a token to the new stack ** ** Parameters: tokenptr - the token record to add ** ** *******************************************************************************/ void add_to_stack(token_record *tokenptr) { token_record *tmp_ptr; tmp_ptr = new_stack; if (tmp_ptr == NULL) { new_stack = tokenptr; new_stack->next_tok = NULL; num_in_stack=1; return; } while (tmp_ptr->next_tok != NULL) tmp_ptr = tmp_ptr->next_tok; tokenptr->next_tok = NULL; tmp_ptr->next_tok = tokenptr; num_in_stack++; } /******************************************************************************* ** ** get_ttype - returns the token type given a certain string ** ** Parameters: tokenstr - the pointer to the token string ** ** ret - the integer fitting with the token defines found in gen.h ** ** ******************************************************************************/ int get_ttype(char *tokenstr, Boolean get_event) { char *tempstr; int the_last; int i; /* used in for loop */ tempstr = xlowercase(tokenstr, TOKENSTRLEN); if (get_event) { int event_id; if ((event_id = find_event_num(tempstr)) >= 0) { xfree(tempstr,REC_CHAR); return event_id; } } the_last = (T_LAST-1); for (i=0; i < the_last; i++) { /* I made it casecompare, because i know i will be as silly to put some * tokens with cases in the table - Marty */ if (!strcasecmp(TokenNames[i], tempstr)) { /* printf("yes!\n"); */ xfree(tempstr,REC_CHAR); return (i+1); } } if (!isalnum(*tokenstr)) return T_OTHER; if (!strcasecmp(tokenstr, "true") || !strcasecmp(tokenstr, "false")) return T_BOOLEAN; xfree(tempstr,REC_CHAR); return T_IDENTIFIER; } /******************************************************************************** ** ** xfopen - opens a read file, as well as resets globals for reading. This ** should be used to open a file that is intended for the lexer to ** parse through. You should only have one file open at a time ** for use by the function get_token or else file line numbers ** will be all screwed up ** ** Parameters: filename - the string filename to open ** ** ret - the pointer to the file ** ** ******************************************************************************/ FILE *xfopen(char *filename) { num_in_stack = 0; theline = 1; return fopen(filename, "r"); } /******************************************************************************** ** ** goto_next_char - positions the file pointer right at the next character ** ** Parameters: thefile - the file pointer for the next ** ** ret - 1 for success, -1 for failure ** ** *******************************************************************************/ int goto_next_char(FILE *thefile) { char thechar = '\r'; while ((thechar == '\r') || (thechar == '\t') || (thechar == ' ') || (thechar == '\n')) { if (thechar == '\n') { theline++; } if (feof(thefile)) return -1; thechar = (char ) fgetc(thefile); } ungetc((int) thechar, thefile); return 1; } /****************************************************************************** ** ** check_next_char - returns the next char that isnt \t, \r, or space. This ** function also puts the char back in the stream to avoid ** disturbing it ** ** Parameters: thefile - the file pointer for the next ** ** ret - the character found, or space for end of file ** ** ******************************************************************************/ char check_next_char(FILE *thefile) { char thechar = '\r'; while ((thechar == '\r') || (thechar == '\t') || (thechar == ' ') || (thechar == '\n')) { if (feof(thefile)) return ' '; thechar = (char ) fgetc(thefile); } ungetc((int) thechar, thefile); return thechar; } /******************************************************************************* ** ** xend_of_line - checks if the next valid char is a newline ** ** Parameters: thefile - the file pointer for the next ** ** ret - 1 for it is end of line, 0 for not, -1 if it finds eof ** ** ******************************************************************************/ int xend_of_line(FILE *thefile) { char thechar = '\r'; while ((thechar == '\r') || (thechar == '\t') || (thechar == ' ')) { if (feof(thefile)) return -1; thechar = (char ) fgetc(thefile); } if (thechar == '\n') { ungetc((int) thechar, thefile); return 1; } ungetc((int) thechar, thefile); return 0; } /******************************************************************************* ** ** next_line - positions the file pointer at the beginning of the next line, ** ignoring all it reads in ** ** Parameters: thefile - the file pointer for the next ** ** ret - 1 for success, -1 for end of file reached ** ** *******************************************************************************/ string_record *get_str_until(FILE *thefile, char until1, char until2) { string_record *temp_str; string_record *str_ptr; int count = 0; char nextchar; temp_str = (string_record * ) memAlloc (string_record,1,REC_STRING); temp_str->the_string = (char *) memAlloc(char,TOKENSTRLEN,REC_CHAR); temp_str->the_string[0] = '\0'; str_ptr = temp_str; nextchar = (char) fgetc(thefile); if (feof(thefile)) return temp_str; while ((nextchar != until1) && (nextchar != until2)) { str_ptr->the_string[count] = nextchar; nextchar = (char) fgetc(thefile); if (feof(thefile)) return temp_str; if (nextchar == '\n') { theline++; str_ptr->the_string[count+1] = '\0'; str_ptr->the_string = (char *)realloc(str_ptr->the_string,strlen(str_ptr->the_string) + 1); str_ptr->nextstr = (string_record * ) memAlloc (string_record,1,REC_STRING); str_ptr = str_ptr->nextstr; str_ptr->the_string = (char *) memAlloc(char,TOKENSTRLEN,REC_CHAR); str_ptr->the_string[0] = '\0'; count = -1; nextchar = (char) fgetc(thefile); if (feof(thefile)) return temp_str; } count++; } ungetc((int) nextchar, thefile); str_ptr->the_string = (char *)realloc(str_ptr->the_string,strlen(str_ptr->the_string) + 1); return temp_str; } /******************************************************************************* ** ** next_line - positions the file pointer at the beginning of the next line, ** ignoring all it reads in ** ** Parameters: thefile - the file pointer for the next ** ** ret - 1 for success, -1 for end of file reached ** ** ******************************************************************************/ int next_line(FILE *thefile) { char thechar = '\0'; while (thechar != '\n') { if (feof(thefile)) return -1; thechar = (char ) fgetc(thefile); } theline++; return 1; } /****************************************************************************** ** ** get_token - gets the next token or description from the file and returns a ** token record containing the following: ** - the_type - the type of the token, all types defined in ** gen.h ** - the_string - the string of the token ** - line - the line number the token is found on ** - more - if until is defined and it gets more than one ** line, this will point to the next token ** record holding the next string ** ** Parameters: thefile - the file pointer, indicating where to get a token from ** until - gets a long line of strings until a specific ** character, like for descriptions ** ** ret - a pointer to a token record. If it was successful, the_type will ** contain a number greater than 0. If not, it will contain one of the ** following numbers: ** -1 = reached end of file ** -2 = number of strings too long (greater than 100) for the room ** description which probably signifies they forgot to close ** off a string ** -3 = invalid token - started with an integer, but had an alpha ** character in it such as 12bob. A number must be only a number. ** -4 = the string length exceeded TOKENSTRLEN (128?) without finding ** a \n ** *******************************************************************************/ token_record *get_token(FILE *thefile, char until, Boolean get_event) { token_record *tokenptr; /* holds the struct to store the token info in */ token_record *strptr; /* Used to str. together tokens for a desc. list */ char nextchar; /* holds the next char. to examine from the file */ int counter = 0; /* counts the num. of chars in the next token */ int size_list = 0; /* checks size of linked list for > 100, i.e. a problem */ int num_chars; /* number of chars gotten in the current string */ if (num_in_stack == 10) { free_stack(old_stack); old_stack = new_stack; new_stack = NULL; num_in_stack = 0; } /* creates a new token structure */ tokenptr = (token_record * ) memAlloc(token_record,1,REC_TOKEN); tokenptr->the_string[0] = '\0'; tokenptr->token_type = 0; tokenptr->more = NULL; tokenptr->line = 0; /* if they passed in a file that was already at the end */ if (feof(thefile)) { tokenptr->token_type = -1; tokenptr->more = NULL; add_to_stack(tokenptr); return tokenptr; } /* get a character */ nextchar = (char ) fgetc(thefile); if (until == '\n') { for ( ; ; ) { tokenptr->line = theline; /* if it is a newline */ if (nextchar == '\n') { theline++; } /* if it is a tab, carriage return, or space ignore it */ else if ((nextchar == '\r') || (nextchar == '\t') || (nextchar == ' ')) counter = 0; /* if it is an identifier */ else if (isalpha(nextchar)) { while (isalnum(nextchar) || (nextchar == '_') || (nextchar == '@')) { tokenptr->the_string[counter] = nextchar; counter++; nextchar = (char) fgetc(thefile); } ungetc((int) nextchar, thefile); tokenptr->the_string[counter] = '\0'; tokenptr->token_type = get_ttype(tokenptr->the_string, get_event); /* printf("type1: %d\n", tokenptr->token_type); */ tokenptr->more = NULL; add_to_stack(tokenptr); return tokenptr; } else if (isdigit(nextchar)) { while (isdigit(nextchar)) { tokenptr->the_string[counter] = nextchar; counter++; nextchar = (char) fgetc(thefile); } /* make sure we dont have a number first with alpha following somewhere, since if the first character of a token is a number, we dont want any following characters to be an alpha, like 12bob would be invalid. bob12 would be fine */ if (isalpha(nextchar)) { tokenptr->token_type = -3; tokenptr->more = NULL; add_to_stack(tokenptr); return tokenptr; } ungetc((int) nextchar, thefile); tokenptr->the_string[counter] = '\0'; tokenptr->token_type = T_NUMERICAL; tokenptr->more = NULL; add_to_stack(tokenptr); return tokenptr; } /* it must be some sort of non-identifer */ else { tokenptr->the_string[0] = nextchar; tokenptr->the_string[1] = '\0'; tokenptr->token_type = get_ttype(tokenptr->the_string, get_event); /* printf("type2: %d\n", tokenptr->token_type); */ tokenptr->more = NULL; add_to_stack(tokenptr); return tokenptr; } /* get another character */ nextchar = (char ) fgetc(thefile); if (feof(thefile)) { tokenptr->token_type = -1; tokenptr->more = NULL; add_to_stack(tokenptr); return tokenptr; } } } /* they have specified an until character, get all strings until the until char */ else { num_chars = 0; /* set strptr to the first token structure */ strptr = tokenptr; /* while we dont get the until or end of file, keep getting characters */ while (!feof(thefile) && (nextchar != until)) { size_list = 0; strptr->line = theline; /* if we hit a newline create a new token struct and attach it to the more element, hence we have a linked list of strings */ if (nextchar == '\n') { theline++; num_chars = 0; nextchar = (char ) fgetc(thefile); if (nextchar == until) { strptr->more = NULL; } else { size_list++; strptr->the_string[counter] = '\0'; counter = 0; strptr->more = (token_record * ) memAlloc(token_record,1,REC_TOKEN); strptr = strptr->more; /* the description is waaay too large, probably a forgotten 'until' character */ if (size_list > 100) { tokenptr->token_type = -2; tokenptr->more = NULL; add_to_stack(tokenptr); return tokenptr; } } } /* if we hit the end of file, we allow it and just stop getting strings */ else if (!feof(thefile)) { strptr->the_string[counter] = nextchar; counter++; nextchar = (char ) fgetc(thefile); num_chars++; if (num_chars >= TOKENSTRLEN) { tokenptr->token_type = -4; tokenptr->more = NULL; add_to_stack(tokenptr); return tokenptr; } } } strptr->the_string[counter] = '\0'; strptr->token_type = T_STRING; strptr->more = NULL; add_to_stack(tokenptr); return tokenptr; } return NULL; }