/* token.c - tokenizes a file for the compiler */
#include "config.h"
#include "object.h"
#include "file.h"
#include "token.h"
#include "construct.h"
#include "instr.h"
char expand_buf[EBUFSIZ+1];
char tmpbuf[EBUFSIZ+1];
char name_buf[MAX_TOK_LEN+1];
char string_buf[MAX_STR_LEN+1];
#define isstart(c) (isalpha(c) || ((c)=='_'))
#define iscname(c) (isstart(c) || isdigit(c))
#define getch() *((file_info->expanded)++)
#define ungetch() --(file_info->expanded)
struct define *find_define(filptr *file_info, char *name)
{
struct define *curr;
curr=file_info->defs;
while (curr) {
if (!strcmp(curr->name,name))
return curr;
curr=curr->next;
}
return NULL;
}
unsigned char find_keyword(char *name)
{
if (!strcmp(name,"if"))
return IF_TOK;
if ((!strcmp(name,"int")) || (!strcmp(name,"string")) ||
(!strcmp(name,"object")) || (!strcmp(name,"var")))
return VAR_DCL_TOK;
if (!strcmp(name,"static"))
return STATIC_TOK;
if (!strcmp(name,"else"))
return ELSE_TOK;
if (!strcmp(name,"while"))
return WHILE_TOK;
if (!strcmp(name,"for"))
return FOR_TOK;
if (!strcmp(name,"do"))
return DO_TOK;
if (!strcmp(name,"return"))
return RETURN_TOK;
return 0;
}
void get_exp_token(filptr *file_info, token_t *token)
{
char c;
signed long val;
int counter;
char *str;
struct define *tmp;
val=0;
counter=0;
c=getch();
while (c && isspace(c)) c=getch();
if (isstart(c)) {
while ((counter<MAX_TOK_LEN) && iscname(c)) {
name_buf[counter++]=c;
c=getch();
}
if (iscname(c)) {
token->type=NO_TOK;
return;
}
name_buf[counter]='\0';
ungetch();
if (token->type=find_keyword(name_buf))
return;
if (tmp=find_define(file_info,name_buf))
if (file_info->depth<(MAX_DEPTH-1)) {
expand_exp(tmp,file_info);
(file_info->depth)++;
get_exp_token(file_info,token);
return;
} else {
set_c_err_msg("recursive #define");
token->type=NO_TOK;
return;
}
token->type=NAME_TOK;
token->token_data.name=name_buf;
return;
}
if (isdigit(c)) {
while (isdigit(c)) {
val=(val*10)+digit_value(c);
c=getch();
}
ungetch();
token->type=INTEGER_TOK;
token->token_data.integer=val;
return;
}
if (c=='\"') {
str=string_buf;
c=getch();
while (c && (c!='\"') && ((counter++)<MAX_STR_LEN)) {
if (c=='\\') {
c=getch();
if (c=='n')
c='\n';
if (c=='t')
c='\t';
if (c=='r')
c='\r';
if (c=='a')
c='\a';
if (c=='b')
c='\b';
if (c=='f')
c='\f';
if (c=='v')
c='\v';
}
*(str++)=c;
if (c)
c=getch();
}
if (c!='\"') {
token->type=NO_TOK;
return;
}
*(str)='\0';
token->type=STRING_TOK;
token->token_data.name=string_buf;
return;
}
if (c=='{') {
token->type=LBRACK_TOK;
return;
}
if (c=='}') {
token->type=RBRACK_TOK;
return;
}
if (c==',') {
token->type=COMMA_TOK;
return;
}
if (c==';') {
token->type=SEMI_TOK;
return;
}
if (c=='(') {
token->type=LPAR_TOK;
return;
}
if (c==')') {
token->type=RPAR_TOK;
return;
}
if (c=='[') {
token->type=LARRAY_TOK;
return;
}
if (c=='?') {
token->type=COND_OPER;
return;
}
if (c==']') {
token->type=RARRAY_TOK;
return;
}
if (c==':') {
token->type=COLON_TOK;
c=getch();
if (c==':')
token->type=SECOND_TOK;
else
ungetch();
return;
}
if (c=='=') {
token->type=EQ_OPER;
c=getch();
if (c=='=')
token->type=CONDEQ_OPER;
else
ungetch();
return;
}
if (c=='+') {
token->type=ADD_OPER;
c=getch();
if (c=='+')
token->type=POSTADD_OPER;
else if (c=='=')
token->type=PLEQ_OPER;
else
ungetch();
return;
}
if (c=='-') {
token->type=MIN_OPER;
c=getch();
if (c=='-')
token->type=POSTMIN_OPER;
else if (c=='=')
token->type=MIEQ_OPER;
else
ungetch();
return;
}
if (c=='*') {
token->type=MUL_OPER;
c=getch();
if (c=='=')
token->type=MUEQ_OPER;
else
ungetch();
return;
}
if (c=='/') {
token->type=DIV_OPER;
c=getch();
if (c=='=')
token->type=DIEQ_OPER;
else
ungetch();
return;
}
if (c=='%') {
token->type=MOD_OPER;
c=getch();
if (c=='=')
token->type=MOEQ_OPER;
else
ungetch();
return;
}
if (c=='&') {
token->type=BITAND_OPER;
c=getch();
if (c=='=')
token->type=ANEQ_OPER;
else if (c=='&')
token->type=AND_OPER;
else
ungetch();
return;
}
if (c=='^') {
token->type=EXOR_OPER;
c=getch();
if (c=='=')
token->type=EXEQ_OPER;
else
ungetch();
return;
}
if (c=='|') {
token->type=BITOR_OPER;
c=getch();
if (c=='=')
token->type=OREQ_OPER;
else if (c=='|')
token->type=OR_OPER;
else
ungetch();
return;
}
if (c=='!') {
token->type=NOT_OPER;
c=getch();
if (c=='=')
token->type=NOTEQ_OPER;
else
ungetch();
return;
}
if (c=='~') {
token->type=BITNOT_OPER;
return;
}
if (c=='<') {
token->type=LESS_OPER;
c=getch();
if (c=='=')
token->type=LESSEQ_OPER;
else if (c=='<') {
token->type=LS_OPER;
c=getch();
if (c=='=')
token->type=LSEQ_OPER;
else
ungetch();
} else
ungetch();
return;
}
if (c=='>') {
token->type=GREAT_OPER;
c=getch();
if (c=='=')
token->type=GREATEQ_OPER;
else if (c=='>') {
token->type=RS_OPER;
c=getch();
if (c=='=')
token->type=RSEQ_OPER;
else
ungetch();
} else
ungetch();
return;
}
token->type=NO_TOK;
return;
}
void unget_token(filptr *file_info, token_t *token)
{
file_info->put_back_token=*token;
file_info->is_put_back=1;
}
void tokenize_name(filptr *file_info, token_t *token)
{
int c,counter;
struct define *tmp;
unsigned char type;
counter=0;
c=fgetc(file_info->curr_file);
while ((counter<MAX_TOK_LEN) && (c!=EOF) && iscname(c)) {
name_buf[counter++]=c;
c=fgetc(file_info->curr_file);
}
if ((c==EOF) || iscname(c)) {
token->type=NO_TOK;
return;
}
name_buf[counter]='\0';
ungetc(c,file_info->curr_file);
if (token->type=find_keyword(name_buf))
return;
if (tmp=find_define(file_info,name_buf)) {
expand(tmp,file_info);
return;
}
token->type=NAME_TOK;
token->token_data.name=name_buf;
}
void tokenize_int(filptr *file_info, token_t *token)
{
int c;
signed long val;
val=0;
c=fgetc(file_info->curr_file);
while ((c!=EOF) && isdigit(c)) {
val=(val*10)+digit_value(c);
c=fgetc(file_info->curr_file);
}
ungetc(c,file_info->curr_file);
token->type=INTEGER_TOK;
token->token_data.integer=val;
}
void tokenize_string(filptr *file_info, token_t *token)
{
char *str;
int counter;
int c;
counter=0;
str=string_buf;
c=fgetc(file_info->curr_file);
while ((c!=EOF) && (c!='\"') && (c!='\n') && ((counter++)<MAX_STR_LEN)) {
if (c=='\\') {
c=fgetc(file_info->curr_file);
if (c=='n')
c='\n';
if (c=='t')
c='\t';
if (c=='r')
c='\r';
if (c=='a')
c='\a';
if (c=='b')
c='\b';
if (c=='f')
c='\f';
if (c=='v')
c='\v';
}
*(str++)=c;
c=fgetc(file_info->curr_file);
}
if (c!='\"') {
token->type=NO_TOK;
return;
}
*(str)='\0';
token->type=STRING_TOK;
token->token_data.name=string_buf;
}
void get_token(filptr *file_info, token_t *token)
{
int c,done;
struct file_stack *tmp;
if (file_info->is_put_back) {
*token=file_info->put_back_token;
file_info->is_put_back=0;
return;
}
while (1) {
if (file_info->expanded) {
while (isspace(*(file_info->expanded)))
(file_info->expanded)++;
if (*(file_info->expanded)) {
get_exp_token(file_info,token);
return;
} else {
file_info->expanded=NULL;
file_info->depth=0;
}
}
c=fgetc(file_info->curr_file);
if (c==EOF) {
if (file_info->previous) {
close_file(file_info->curr_file);
file_info->curr_file=file_info->previous->file_ptr;
tmp=file_info->previous;
file_info->previous=tmp->previous;
FREE(tmp);
continue;
}
token->type=EOF_TOK;
return;
}
if (c=='\n') {
if (!(file_info->previous))
++(file_info->phys_line);
c=fgetc(file_info->curr_file);
if (c=='#') {
if (preprocess(file_info)) {
token->type=NO_TOK;
return;
}
} else
ungetc(c,file_info->curr_file);
continue;
}
if (isspace(c))
continue;
if (isstart(c)) {
ungetc(c,file_info->curr_file);
tokenize_name(file_info,token);
if (file_info->expanded)
continue;
return;
}
if (isdigit(c)) {
ungetc(c,file_info->curr_file);
tokenize_int(file_info,token);
return;
}
if (c=='\"') {
tokenize_string(file_info,token);
return;
}
if (c=='/') {
c=fgetc(file_info->curr_file);
if (c=='*') {
c=fgetc(file_info->curr_file);
done=0;
while ((c!=EOF) && (!done)) {
if (c=='\n')
if (!(file_info->previous))
(file_info->phys_line)++;
if (c=='*') {
c=fgetc(file_info->curr_file);
if (c=='/')
done=1;
} else
c=fgetc(file_info->curr_file);
}
if (c!='/') {
token->type=NO_TOK;
return;
}
continue;
}
ungetc(c,file_info->curr_file);
c='/';
}
if (c=='{') {
token->type=LBRACK_TOK;
return;
}
if (c=='}') {
token->type=RBRACK_TOK;
return;
}
if (c==',') {
token->type=COMMA_TOK;
return;
}
if (c==';') {
token->type=SEMI_TOK;
return;
}
if (c=='(') {
token->type=LPAR_TOK;
return;
}
if (c==')') {
token->type=RPAR_TOK;
return;
}
if (c=='[') {
token->type=LARRAY_TOK;
return;
}
if (c=='?') {
token->type=COND_OPER;
return;
}
if (c==']') {
token->type=RARRAY_TOK;
return;
}
if (c==':') {
token->type=COLON_TOK;
c=fgetc(file_info->curr_file);
if (c==':')
token->type=SECOND_TOK;
else
ungetc(c,file_info->curr_file);
return;
}
if (c=='=') {
token->type=EQ_OPER;
c=fgetc(file_info->curr_file);
if (c=='=')
token->type=CONDEQ_OPER;
else
ungetc(c,file_info->curr_file);
return;
}
if (c=='+') {
token->type=ADD_OPER;
c=fgetc(file_info->curr_file);
if (c=='+')
token->type=POSTADD_OPER;
else if (c=='=')
token->type=PLEQ_OPER;
else
ungetc(c,file_info->curr_file);
return;
}
if (c=='-') {
token->type=MIN_OPER;
c=fgetc(file_info->curr_file);
if (c=='-')
token->type=POSTMIN_OPER;
else if (c=='=')
token->type=MIEQ_OPER;
else
ungetc(c,file_info->curr_file);
return;
}
if (c=='*') {
token->type=MUL_OPER;
c=fgetc(file_info->curr_file);
if (c=='=')
token->type=MUEQ_OPER;
else
ungetc(c,file_info->curr_file);
return;
}
if (c=='/') {
token->type=DIV_OPER;
c=fgetc(file_info->curr_file);
if (c=='=')
token->type=DIEQ_OPER;
else
ungetc(c,file_info->curr_file);
return;
}
if (c=='%') {
token->type=MOD_OPER;
c=fgetc(file_info->curr_file);
if (c=='=')
token->type=MOEQ_OPER;
else
ungetc(c,file_info->curr_file);
return;
}
if (c=='&') {
token->type=BITAND_OPER;
c=fgetc(file_info->curr_file);
if (c=='=')
token->type=ANEQ_OPER;
else if (c=='&')
token->type=AND_OPER;
else
ungetc(c,file_info->curr_file);
return;
}
if (c=='^') {
token->type=EXOR_OPER;
c=fgetc(file_info->curr_file);
if (c=='=')
token->type=EXEQ_OPER;
else
ungetc(c,file_info->curr_file);
return;
}
if (c=='|') {
token->type=BITOR_OPER;
c=fgetc(file_info->curr_file);
if (c=='=')
token->type=OREQ_OPER;
else if (c=='|')
token->type=OR_OPER;
else
ungetc(c,file_info->curr_file);
return;
}
if (c=='!') {
token->type=NOT_OPER;
c=fgetc(file_info->curr_file);
if (c=='=')
token->type=NOTEQ_OPER;
else
ungetc(c,file_info->curr_file);
return;
}
if (c=='~') {
token->type=BITNOT_OPER;
return;
}
if (c=='<') {
token->type=LESS_OPER;
c=fgetc(file_info->curr_file);
if (c=='=')
token->type=LESSEQ_OPER;
else if (c=='<') {
token->type=LS_OPER;
c=fgetc(file_info->curr_file);
if (c=='=')
token->type=LSEQ_OPER;
else
ungetc(c,file_info->curr_file);
} else
ungetc(c,file_info->curr_file);
return;
}
if (c=='>') {
token->type=GREAT_OPER;
c=fgetc(file_info->curr_file);
if (c=='=')
token->type=GREATEQ_OPER;
else if (c=='>') {
token->type=RS_OPER;
c=fgetc(file_info->curr_file);
if (c=='=')
token->type=RSEQ_OPER;
else
ungetc(c,file_info->curr_file);
} else
ungetc(c,file_info->curr_file);
return;
}
token->type=NO_TOK;
return;
}
}