/* token.c - tokenizes a file for the compiler */

#include "config.h"
#include "object.h"
#include "file.h"
#include "token.h"
#include "construct.h"
#include "instr.h"

char expand_buf[EBUFSIZ+1];
char tmpbuf[EBUFSIZ+1];
char name_buf[MAX_TOK_LEN+1];
char string_buf[MAX_STR_LEN+1];

#define isstart(c) (isalpha(c) || ((c)=='_'))
#define iscname(c) (isstart(c) || isdigit(c))
#define getch() *((file_info->expanded)++)
#define ungetch() --(file_info->expanded)

struct define *find_define(filptr *file_info, char *name)
{
  struct define *curr;

  curr=file_info->defs;
  while (curr) {
    if (!strcmp(curr->name,name))
      return curr;
    curr=curr->next;
  }
  return NULL;
}

unsigned char find_keyword(char *name)
{
  if (!strcmp(name,"if"))
    return IF_TOK;
  if ((!strcmp(name,"int")) || (!strcmp(name,"string")) ||
      (!strcmp(name,"object")) || (!strcmp(name,"var")))
    return VAR_DCL_TOK;
  if (!strcmp(name,"static"))
    return STATIC_TOK;
  if (!strcmp(name,"else"))
    return ELSE_TOK;
  if (!strcmp(name,"while"))
    return WHILE_TOK;
  if (!strcmp(name,"for"))
    return FOR_TOK;
  if (!strcmp(name,"do"))
    return DO_TOK;
  if (!strcmp(name,"return"))
    return RETURN_TOK;
  return 0;
}

void get_exp_token(filptr *file_info, token_t *token)
{
  char c;
  signed long val;
  int counter;
  char *str;
  struct define *tmp;

  val=0;
  counter=0;
  c=getch();
  while (c && isspace(c)) c=getch();
  if (isstart(c)) {
    while ((counter<MAX_TOK_LEN) && iscname(c)) {
      name_buf[counter++]=c;
      c=getch();
    }
    if (iscname(c)) {
      token->type=NO_TOK;
      return;
    }
    name_buf[counter]='\0';
    ungetch();
    if (token->type=find_keyword(name_buf))
      return;
    if (tmp=find_define(file_info,name_buf))
      if (file_info->depth<(MAX_DEPTH-1)) {
        expand_exp(tmp,file_info);
        (file_info->depth)++;
        get_exp_token(file_info,token);
        return;
      } else {
        set_c_err_msg("recursive #define");
        token->type=NO_TOK;
        return;
      }
    token->type=NAME_TOK;
    token->token_data.name=name_buf;
    return;
  }
  if (isdigit(c)) {
    while (isdigit(c)) {
      val=(val*10)+digit_value(c);
      c=getch();
    }
    ungetch();
    token->type=INTEGER_TOK;
    token->token_data.integer=val;
    return;
  }
  if (c=='\"') {
    str=string_buf;
    c=getch();
    while (c && (c!='\"') && ((counter++)<MAX_STR_LEN)) {
      if (c=='\\') {
        c=getch();
        if (c=='n')
          c='\n';
        if (c=='t')
          c='\t';
        if (c=='r')
          c='\r';
        if (c=='a')
          c='\a';
        if (c=='b')
          c='\b';
        if (c=='f')
          c='\f';
        if (c=='v')
          c='\v';
      }
      *(str++)=c;
      if (c)
        c=getch();
    }
    if (c!='\"') {
      token->type=NO_TOK;
      return;
    }
    *(str)='\0';
    token->type=STRING_TOK;
    token->token_data.name=string_buf;
    return;
  }
  if (c=='{') {
    token->type=LBRACK_TOK;
    return;
  }
  if (c=='}') {
    token->type=RBRACK_TOK;
    return;
  }
  if (c==',') {
    token->type=COMMA_TOK;
    return;
  }
  if (c==';') {
    token->type=SEMI_TOK;
    return;
  }
  if (c=='(') {
    token->type=LPAR_TOK;
    return;
  }
  if (c==')') {
    token->type=RPAR_TOK;
    return;
  }
  if (c=='[') {
    token->type=LARRAY_TOK;
    return;
  }
  if (c=='?') {
    token->type=COND_OPER;
    return;
  }
  if (c==']') {
    token->type=RARRAY_TOK;
    return;
  }
  if (c==':') {
    token->type=COLON_TOK;
    c=getch();
    if (c==':')
      token->type=SECOND_TOK;
    else
      ungetch();
    return;
  }
  if (c=='=') {
    token->type=EQ_OPER;
    c=getch();
    if (c=='=')
      token->type=CONDEQ_OPER;
    else
      ungetch();
    return;
  }
  if (c=='+') {
    token->type=ADD_OPER;
    c=getch();
    if (c=='+')
      token->type=POSTADD_OPER;
    else if (c=='=')
      token->type=PLEQ_OPER;
    else
      ungetch();
    return;
  }
  if (c=='-') {
    token->type=MIN_OPER;
    c=getch();
    if (c=='-')
      token->type=POSTMIN_OPER;
    else if (c=='=')
      token->type=MIEQ_OPER;
    else
      ungetch();
    return;
  }
  if (c=='*') {
    token->type=MUL_OPER;
    c=getch();
    if (c=='=')
      token->type=MUEQ_OPER;
    else
      ungetch();
    return;
  }
  if (c=='/') {
    token->type=DIV_OPER;
    c=getch();
    if (c=='=')
      token->type=DIEQ_OPER;
    else
      ungetch();
    return;
  }
  if (c=='%') {
    token->type=MOD_OPER;
    c=getch();
    if (c=='=')
      token->type=MOEQ_OPER;
    else
      ungetch();
    return;
  }
  if (c=='&') {
    token->type=BITAND_OPER;
    c=getch();
    if (c=='=')
      token->type=ANEQ_OPER;
    else if (c=='&')
      token->type=AND_OPER;
    else
      ungetch();
    return;
  }
  if (c=='^') {
    token->type=EXOR_OPER;
    c=getch();
    if (c=='=')
      token->type=EXEQ_OPER;
    else
      ungetch();
    return;
  }
  if (c=='|') {
    token->type=BITOR_OPER;
    c=getch();
    if (c=='=')
      token->type=OREQ_OPER;
    else if (c=='|')
      token->type=OR_OPER;
    else
      ungetch();
    return;
  }
  if (c=='!') {
    token->type=NOT_OPER;
    c=getch();
    if (c=='=')
      token->type=NOTEQ_OPER;
    else
      ungetch();
    return;
  }
  if (c=='~') {
    token->type=BITNOT_OPER;
    return;
  }
  if (c=='<') {
    token->type=LESS_OPER;
    c=getch();
    if (c=='=')
      token->type=LESSEQ_OPER;
    else if (c=='<') {
      token->type=LS_OPER;
      c=getch();
      if (c=='=')
        token->type=LSEQ_OPER;
      else
        ungetch();
    } else
      ungetch();
    return;
  }
  if (c=='>') {
    token->type=GREAT_OPER;
    c=getch();
    if (c=='=')
      token->type=GREATEQ_OPER;
    else if (c=='>') {
      token->type=RS_OPER;
      c=getch();
      if (c=='=')
        token->type=RSEQ_OPER;
      else
        ungetch();
    } else
      ungetch();
    return;
  }
  token->type=NO_TOK;
  return;
}

void unget_token(filptr *file_info, token_t *token)
{
  file_info->put_back_token=*token;
  file_info->is_put_back=1;
}

void tokenize_name(filptr *file_info, token_t *token)
{
  int c,counter;
  struct define *tmp;
  unsigned char type;

  counter=0;
  c=fgetc(file_info->curr_file);
  while ((counter<MAX_TOK_LEN) && (c!=EOF) && iscname(c)) {
    name_buf[counter++]=c;
    c=fgetc(file_info->curr_file);
  }
  if ((c==EOF) || iscname(c)) {
    token->type=NO_TOK;
    return;
  }
  name_buf[counter]='\0';
  ungetc(c,file_info->curr_file);
  if (token->type=find_keyword(name_buf))
    return;
  if (tmp=find_define(file_info,name_buf)) {
    expand(tmp,file_info);
    return;
  }
  token->type=NAME_TOK;
  token->token_data.name=name_buf;
} 

void tokenize_int(filptr *file_info, token_t *token)
{
  int c;
  signed long val;

  val=0;
  c=fgetc(file_info->curr_file);
  while ((c!=EOF) && isdigit(c)) {
    val=(val*10)+digit_value(c);
    c=fgetc(file_info->curr_file);
  }
  ungetc(c,file_info->curr_file);
  token->type=INTEGER_TOK;
  token->token_data.integer=val;
} 

void tokenize_string(filptr *file_info, token_t *token)
{
  char *str;
  int counter;
  int c;

  counter=0;
  str=string_buf;
  c=fgetc(file_info->curr_file);
  while ((c!=EOF) && (c!='\"') && (c!='\n') && ((counter++)<MAX_STR_LEN)) {
    if (c=='\\') {
      c=fgetc(file_info->curr_file);
      if (c=='n')
        c='\n';
      if (c=='t')
        c='\t';
      if (c=='r')
        c='\r';
      if (c=='a')
        c='\a';
      if (c=='b')
        c='\b';
      if (c=='f')
        c='\f';
      if (c=='v')
        c='\v';
    }
    *(str++)=c;
    c=fgetc(file_info->curr_file);
  }
  if (c!='\"') {
    token->type=NO_TOK;
    return;
  }
  *(str)='\0';
  token->type=STRING_TOK;
  token->token_data.name=string_buf;
}

void get_token(filptr *file_info, token_t *token)
{
  int c,done;
  struct file_stack *tmp;

  if (file_info->is_put_back) {
    *token=file_info->put_back_token;
    file_info->is_put_back=0;
    return;
  }
  while (1) {
    if (file_info->expanded) {
      while (isspace(*(file_info->expanded)))
        (file_info->expanded)++;
      if (*(file_info->expanded)) {
        get_exp_token(file_info,token);
        return;
      } else {
        file_info->expanded=NULL;
        file_info->depth=0;
      }
    }
    c=fgetc(file_info->curr_file);
    if (c==EOF) {
      if (file_info->previous) {
        close_file(file_info->curr_file);
        file_info->curr_file=file_info->previous->file_ptr;
        tmp=file_info->previous;
        file_info->previous=tmp->previous;
        FREE(tmp);
        continue;
      }
      token->type=EOF_TOK;
      return;
    }
    if (c=='\n') {
      if (!(file_info->previous))
        ++(file_info->phys_line);
      c=fgetc(file_info->curr_file);
      if (c=='#') {
        if (preprocess(file_info)) {
          token->type=NO_TOK;
          return;
        }
      } else
        ungetc(c,file_info->curr_file);
      continue;
    }
    if (isspace(c))
      continue;
    if (isstart(c)) {
      ungetc(c,file_info->curr_file);
      tokenize_name(file_info,token);
      if (file_info->expanded)
        continue;
      return;
    }
    if (isdigit(c)) {
      ungetc(c,file_info->curr_file);
      tokenize_int(file_info,token);
      return;
    }
    if (c=='\"') {
      tokenize_string(file_info,token);
      return;
    }
    if (c=='/') {
      c=fgetc(file_info->curr_file);
      if (c=='*') {
        c=fgetc(file_info->curr_file);
        done=0;
        while ((c!=EOF) && (!done)) {
          if (c=='\n')
            if (!(file_info->previous))
              (file_info->phys_line)++;
          if (c=='*') {
            c=fgetc(file_info->curr_file);
            if (c=='/')
              done=1;
          } else
            c=fgetc(file_info->curr_file);
        }
        if (c!='/') {
          token->type=NO_TOK;
          return;
        }
        continue;
      }
      ungetc(c,file_info->curr_file);
      c='/';
    }
    if (c=='{') {
      token->type=LBRACK_TOK;
      return;
    }
    if (c=='}') {
      token->type=RBRACK_TOK;
      return;
    }
    if (c==',') {
      token->type=COMMA_TOK;
      return;
    }
    if (c==';') {
      token->type=SEMI_TOK;
      return;
    }
    if (c=='(') {
      token->type=LPAR_TOK;
      return;
    }
    if (c==')') {
      token->type=RPAR_TOK;
      return;
    }
    if (c=='[') {
      token->type=LARRAY_TOK;
      return;
    }
    if (c=='?') {
      token->type=COND_OPER;
      return;
    }
    if (c==']') {
      token->type=RARRAY_TOK;
      return;
    }
    if (c==':') {
      token->type=COLON_TOK;
      c=fgetc(file_info->curr_file);
      if (c==':')
        token->type=SECOND_TOK;
      else
        ungetc(c,file_info->curr_file);
      return;
    }
    if (c=='=') {
      token->type=EQ_OPER;
      c=fgetc(file_info->curr_file);
      if (c=='=')
        token->type=CONDEQ_OPER;
      else
        ungetc(c,file_info->curr_file);
      return;
    }
    if (c=='+') {
      token->type=ADD_OPER;
      c=fgetc(file_info->curr_file);
      if (c=='+')
        token->type=POSTADD_OPER;
      else if (c=='=')
        token->type=PLEQ_OPER;
      else
        ungetc(c,file_info->curr_file);
      return;
    }
    if (c=='-') {
      token->type=MIN_OPER;
      c=fgetc(file_info->curr_file);
      if (c=='-')
        token->type=POSTMIN_OPER;
      else if (c=='=')
        token->type=MIEQ_OPER;
      else
        ungetc(c,file_info->curr_file);
      return;
    }
    if (c=='*') {
      token->type=MUL_OPER;
      c=fgetc(file_info->curr_file);
      if (c=='=')
        token->type=MUEQ_OPER;
      else
        ungetc(c,file_info->curr_file);
      return;
    }
    if (c=='/') {
      token->type=DIV_OPER;
      c=fgetc(file_info->curr_file);
      if (c=='=')
        token->type=DIEQ_OPER;
      else
        ungetc(c,file_info->curr_file);
      return;
    }
    if (c=='%') {
      token->type=MOD_OPER;
      c=fgetc(file_info->curr_file);
      if (c=='=')
        token->type=MOEQ_OPER;
      else
        ungetc(c,file_info->curr_file);
      return;
    }
    if (c=='&') {
      token->type=BITAND_OPER;
      c=fgetc(file_info->curr_file);
      if (c=='=')
        token->type=ANEQ_OPER;
      else if (c=='&')
        token->type=AND_OPER;
      else
        ungetc(c,file_info->curr_file);
      return;
    }
    if (c=='^') {
      token->type=EXOR_OPER;
      c=fgetc(file_info->curr_file);
      if (c=='=')
        token->type=EXEQ_OPER;
      else
        ungetc(c,file_info->curr_file);
      return;
    }
    if (c=='|') {
      token->type=BITOR_OPER;
      c=fgetc(file_info->curr_file);
      if (c=='=')
        token->type=OREQ_OPER;
      else if (c=='|')
        token->type=OR_OPER;
      else
        ungetc(c,file_info->curr_file);
      return;
    }
    if (c=='!') {
      token->type=NOT_OPER;
      c=fgetc(file_info->curr_file);
      if (c=='=')
        token->type=NOTEQ_OPER;
      else
        ungetc(c,file_info->curr_file);
      return;
    }
    if (c=='~') {
      token->type=BITNOT_OPER;
      return;
    }
    if (c=='<') {
      token->type=LESS_OPER;
      c=fgetc(file_info->curr_file);
      if (c=='=')
        token->type=LESSEQ_OPER;
      else if (c=='<') {
        token->type=LS_OPER;
        c=fgetc(file_info->curr_file);
        if (c=='=')
          token->type=LSEQ_OPER;
        else
          ungetc(c,file_info->curr_file);
      } else
        ungetc(c,file_info->curr_file);
      return;
    }
    if (c=='>') {
      token->type=GREAT_OPER;
      c=fgetc(file_info->curr_file);
      if (c=='=')
        token->type=GREATEQ_OPER;
      else if (c=='>') {
        token->type=RS_OPER;
        c=fgetc(file_info->curr_file);
        if (c=='=')
          token->type=RSEQ_OPER;
        else
          ungetc(c,file_info->curr_file);
      } else
        ungetc(c,file_info->curr_file);
      return;
    }
    token->type=NO_TOK;
    return;
  }
}