#include <stdio.h> extern long random(); extern void perror(); extern void exit(); #define HISTORY 4 #define HMASK 03 #define TEMPLSIZ 2000 #define KEYWSIZ 2000 #define BLANK 040 /* ELIZA is a program that simulates a psychiatrist. It begins by reading a file containing templates and keywords, which it uses for analyzing input and generating responses. The templates contain literal text to be matched and the % symbol, which matches any text. A template may have at most one % symbol, and this may not be the first character. Following each template are one or more responses, each response line indicated by . Example: i feel % why do you feel %? are you sure you feel %? The second part of the file contains keywords, i.e. single word templates. Failing to find a template that matches the input, ELIZA looks for keywords. Responses follow keywords just as with templates. After reading the script and building up tables, the program goes into a loop asking for input and replying to it line by line. */ struct template { long toffset; char tfirstc; /* first char of template */ char tsecondc; /* second char of template */ int talts; /* number of alternative replies (>0) */ int tnext; /* next reply (1 <= tnext <= talts ) */ } templ[TEMPLSIZ]; struct keyword { /* one entry per keyword */ long koffset; /* byte offset of line containing keyword */ int kfirst3; /* first 3 chars of keyword in rad50 */ int ksecond3; /* second 3 chars of keyword in rad50 */ int kalts; /* number of alternative replies ( >0 ) */ int knext; /* next reply (1 <= knext <= kalts ) */ char priority; /* how important is keywd: 0 worst, 9 best */ } keywd[KEYWSIZ]; int oldkeywd[HISTORY]; /* queue of indices of most recent keywds */ char line[400]; /* current input line from patient */ char resp[400]; /* output response buffer */ char words[400]; /* a template has matched. this is % */ FILE *filed; /* file descriptor for script */ int maxtempl; /* templ[maxtempl] is last entry */ int maxkeywd; /* keywd[maxkeywd] is last entry */ char scrbuf[512]; /* script buffer */ int scrbfx = 512; /* scrbuf[scrbfx] is next char to pass on */ int cumbyte = 0; /* bytes delivered by getscript so far */ int resptype = 0; /* -n means templ n used, +n means keywd n, 0=dontkn */ extern int servfd; extern char *getenv(); main(argc, argv) int argc; char **argv; { char c; int i, x; char *script; /* initialization section, zero array, open files */ for (i = 0; i < HISTORY; i++) oldkeywd[i] = 0; /* no previous keywords yet */ script = getenv("BOTSCRIPT"); if(script == 0) { (void)fprintf(stderr,"must set BOTSCRIPT in environment!\n"); exit(1); } filed = fopen(script, "r"); if (filed == NULL) { perror(script); exit(0); } #ifdef DEBUG (void)fprintf(stderr,"<<<using script file %s>>>\n",script); #endif buildtempl(); /* read the templates, make 1 entry per template */ buildkeywd(); /* read the keywords, make 1 entry per keyword */ (void)srandom(getpid() / getppid()); if(connectserver()) { (void)fprintf(stderr,"couldn't connect to server\n"); exit(1); } if(loginserver()) { (void)fprintf(stderr,"couldn't login to server\n"); exit(1); } (void)fprintf(stderr,"<<<robot is connected>>>\n"); /* main loop of eliza, ask for input, give reply */ while (1) { getline(); i = trytempl(); if (i >= 0) { usetempl(i); continue; } i = trykeywd(); if (i >= 0) { usekeywd(i, 1); continue; } dontknow(); } } /* read the script and fill in the template table. each entry in the template table refers to a single template and all of its replies */ buildtempl() { int i, byteno; char c; /* initialize */ getscript(&c, &byteno); /* read 1 character from script */ i = 0; while (c == '\n') getscript(&c, &byteno); /* loop, one pass per template (including all replies */ /* '+' signals end of template list */ while (c != '+') { /* where to find template within script */ templ[i].toffset = byteno; templ[i].tfirstc = c; templ[i].talts = 0; getscript(&c, &byteno); /* second character of template */ templ[i].tsecondc = c; /* skip to lf, then read and return next char */ nextslin(&c, &byteno); /* count number of lines beginning with '*' */ while (c == '*') { templ[i].talts++; nextslin(&c, &byteno); while (c == '\n') getscript(&c, &byteno); } /* mjr - semi randomize that sucker! */ templ[i].tnext = 1 + (random() % (templ[i].talts + 1)); if(templ[i].tnext > templ[i].talts) templ[i].tnext = 1; i++; /* next template */ if (i >= TEMPLSIZ) { (void)fprintf(stderr,"template array too small\n"); exit(1); } } /* all templates have been read and stored */ maxtempl = i - 1; getscript(&c, &byteno); /* skip the lf following '+' */ } /* read the script and fill in the keyword table */ buildkeywd() { int i, j, byteno; char c, key[6]; i = 0; getscript(&c, &byteno); /* advance to lf following '+' */ getscript(&c, &byteno); /* go past lf */ while (c != '+') { /* end of file */ for (j = 0; j < 6; j++) key[j] = 0; /* where to find keyword in script */ keywd[i].koffset = byteno; /* will count number of possible replies */ keywd[i].kalts = 0; /* next reply */ keywd[i].knext = 1; j = 0; while (c != BLANK) { if (j <= 5) key[j++] = c - 'a' + 1; getscript(&c, &byteno); } keywd[i].kfirst3 = 676 * key[0] + 26 * key[1] + key[2]; keywd[i].ksecond3 = 676 * key[3] + 26 * key[4] + key[5]; getscript(&c, &byteno); /* c = priority, 0-9 */ if (c >= '0' && c <= '9') keywd[i].priority = c - '0'; else keywd[j].priority = 5; /* default priority */ /* count the number of replies listed under this keyword */ getscript(&c, &byteno); /* read the linefeed */ getscript(&c, &byteno); /* read first char of next line */ while (c == '*') { /* each reply begins with '*' */ keywd[i].kalts++; nextslin(&c, &byteno); } while (c == '\n') getscript(&c, &byteno); i++; if (i >= KEYWSIZ) { (void)fprintf(stderr,"keyword array too small\n"); exit(1); } } maxkeywd = i - 1; } /* try to match the current line to a template. in turn, try all the templates to see if the first two letters match. if so, read in the template and call match to check the entire template. */ trytempl() { int i, k, n, x; char c, t[80]; k = 0; words[0] = '\0'; for (i = 0; i <= maxtempl; i++) { if ((templ[i].tfirstc == line[k]) && (templ[i].tsecondc == line[k + 1])) { fseek(filed,templ[i].toffset,0); fgets(t,sizeof(t),filed); /* compare input line (line[]) to template (t[]) */ if (match(line, t)) return (i); } } return (-1); } /* a template has been sucessfully matched. generate output */ usetempl(i) int i; { int k, n, x; char c; /* indicates template match (used on log) */ resptype = -i; moveto(templ[i].toffset); /* seek the template */ getscript(&c, &x); if (templ[i].tnext > templ[i].talts) templ[i].tnext = 1; n = templ[i].tnext; templ[i].tnext++; /* skip to the proper alternative */ for (k = 0; k < n; k++) nextslin(&c, &x); /* make reply using template */ response(); } /* the line does not match a template. are there any keywords in it */ trykeywd() { int i, j, k, key1, key2, index, score; char key[80]; i = 0; index = -1; score = -1; while (line[i] != '\n') { /* find the end of the word */ k = i; while (line[k] != '\n' && line[k] != BLANK) k++; k--; /* line[i] ... line[k] is the word */ for (j = 0; j < 6; j++) key[j] = 0; for (j = i; j <= k; j++) key[j - i] = line[j] - 'a' + 1; key1 = 676 * key[0] + 26 * key[1] + key[2]; key2 = 676 * key[3] + 26 * key[4] + key[5]; for (j = 0; j <= maxkeywd; j++) if (keywd[j].kfirst3 == key1 && keywd[j].ksecond3 == key2 && keywd[j].priority > score) { score = keywd[j].priority; index = j; } if (line[k + 1] == '\n') return (index); i = k + 2; } return (index); } /* determine response to a matched keyword */ usekeywd(n, realkey) int n, realkey; { int i = 0; int k; int x; char c; if (realkey == 1) { resptype = i; } else resptype = 0; /* recycle fixed responses. if all responses used up, start over */ if (keywd[n].knext > keywd[n].kalts) keywd[n].knext = 1; moveto(keywd[n].koffset); c = 0; k = keywd[n].knext; keywd[n].knext++; for (i = 0; i < k; i++) nextslin(&c, &x); response(); /* save keyword in oldkeywd for use later if need be */ if (realkey == 0 || n == 0) return; for (i = 0; i < HISTORY - 1; i++) oldkeywd[i] = oldkeywd[i + 1]; oldkeywd[HISTORY - 1] = n; } /* nothing matched. use an old keyword */ dontknow() { int k; resptype = 0; /* idicates failure to match */ k = (random() % HISTORY) & HMASK; k = oldkeywd[k]; usekeywd(k, 0); /* use keyword, but dont update history */ } /* check to see if the input line (input) matches template (tem) . both strings are delimited by linefeeds. there are 3 cases to be distinguished: 1. they are identical, character by character 2. template is of form stuff % 3. template is of form stuff1 % stuff2 */ match(input, tem) char input[], tem[]; { int i, j, savei, savej; i = j = -1; while ((input[++i] == tem[++j]) && (tem[j] != '\n')) ; /* end of identical substrings. check for various cases */ /* exact match, no % present */ if (tem[j] == '\n' && input[j] == '\n') return (1); /* case 2. words begin at input[i] */ if ((tem[j] == '%') && (tem[j + 1] == '\n')) { j = 0; while ((words[j] = input[i++]) != '\n') j++; words[j + 1] = 0; return (1); } if (tem[j] == '%') { /* possibly case 3. do reverse scan */ savei = i; savej = j; while (tem[j] != '\n') j++; while (input[i] != '\n') i++; while (tem[--j] == input[--i]); if (tem[j] != '%') return (0); /* it is case 3. words are in input[savei]...input[i] */ for (j = savei; j <= i; j++) words[j - savei] = input[j]; words[i - savei + 1] = 0; return (1); } return (0); } /* read and return one character from the script, including its position */ getscript(c, byteno) char *c; int *byteno; { int n; if (scrbfx == 512) { n = fread(scrbuf, sizeof(*scrbuf), 512, filed); if (n < 0) { (void)fprintf(stderr,"script read error\n"); exit(0); } else scrbfx = 0; } *c = scrbuf[scrbfx++]; *byteno = cumbyte++; } nextslin(c, byteno) char *c; int *byteno; { while (*c != '\n') getscript(c, byteno); getscript(c, byteno); } /* move to byte n of the script */ moveto(n) long n; { scrbfx = 512; if(fseek(filed, n, 0) < 0) { perror("moveto address"); exit(1); } } static char lbuf[BUFSIZ]; /* read a line from the server and try to pick out statements in the form of XXXX says "thing */ getline() { int lbyts = 0; register char *feep; register char *ofp; /* one problem here is that tinyMUD does not seem to reliably */ /* give us a newline where we want it. but ! when we are handling */ /* a quote, we need to drop the trailing quote anyhow, so we */ /* turn it into a newline! */ while(1) { lbyts = read(servfd,lbuf,BUFSIZ); if(lbyts < 0) { (void)fprintf(stderr,"<<<Eliza dies!>>>\n"); exit(1); } /* we can't do anything with lines of less than 8 char */ if(lbyts < 8) continue; lbuf[lbyts] = '\0'; /* check for a statement */ feep = lbuf; /* skip word. */ while(*feep && *feep != ' ') feep++; /* look for says " */ if(!strncmp(feep," says \"",7)) { *feep = '\0'; /* skip */ feep += 7; /* put the line in our buffer and get out */ ofp = line; while(*feep != '\0' && *feep != '\"' && *feep != '\n') { if(*feep >= 'A' && *feep <= 'Z') { *ofp++ = *feep++ - 'A' + 'a'; } else if(*feep >= 'a' && *feep <= 'z' ) { *ofp++ = *feep++; } else if(*feep >= '0' && *feep <= '9' ) { *ofp++ = *feep++; } else if(*feep == '=' || *feep == '@' || *feep == ':' ) { *ofp++ = *feep++; } else if(*feep == '\t' || *feep == ' ') { *ofp++ = ' '; while(*feep == '\t' || *feep == ' ') feep++; } else feep++; } *ofp++ = '\n'; *ofp = '\0'; #ifdef DEBUG (void)fprintf(stderr,"%s says: %s",lbuf,line); #endif return(0); } } } /* getscript is set up to return the correct response. words contains the extracted words. make reply */ response() { char c, c1; int i, k, byteno; k = 0; fix(); /* replace i by you etc. */ getscript(&c, &byteno); while (c != '\n') { if (c == '%') { for (i = 0; words[i] != 0; i++) { c1 = words[i] & 0177; if (c1 != '\n') resp[k++] = c1; } } else { if (c == '&') { for(i = 0; lbuf[i] != 0; i++) resp[k++] = lbuf[i]; } else { resp[k++] = c; } } getscript(&c, &byteno); } resp[k++] = '\n'; resp[k] = 0; /* fix gross errors in grammar like "you am", "i are" */ grammar(resp); for (k = 0; resp[k] != '\n'; k++) /* empty */ ; resp[++k] = 0; #ifdef DEBUG (void)fprintf(stderr,"I replied: %s\n",resp); #endif (void)sleep(1); if(write(servfd,resp,k) < 0) { (void)fprintf(stderr,"<<<Eliza dies!>>>\n"); exit(1); } } fix() { subst("we are", "we are"); subst("that you", "that i"); subst("my", "your"); subst("you", "me"); subst("your", "my"); subst("me", "you"); subst("mine", "yours"); subst("am", "are"); subst("yours", "mine"); subst("yourself", "myself"); subst("myself", "yourself"); subst("are", "am"); subst("i", "you"); } /* scan the array "words" for the string OLD. if it occurs, replace it by NEW. also set the parity bits on in the replacement text to mark them as already modified */ subst(old, new) char old[], new[]; { int i, nlen, olen, flag, base, delim; olen = 0; while (old[olen] != 0) olen++; nlen = 0; while (new[nlen] != 0) nlen++; for (base = 0; words[base] != 0; base++) { flag = 1; for (i = 0; i < olen; i++) if (old[i] != words[base + i]) { flag = 0; break; } delim = words[base + olen]; if (flag == 1 && (base == 0 || words[base - 1] == BLANK) && (delim == BLANK || delim == '\n' || delim == 0)) { shift(base, nlen - olen); for (i = 0; i < nlen; i++) words[base + i] = new[i] + 128; } } } shift(base, delta) int base, delta; { int i, k; if (delta == 0) return; if (delta > 0) { k = base; while (words[k] != 0) k++; for (i = k; i >= base; i--) words[i + delta] = words[i]; } else /* delta <0 */ for (i = 0; i == 0 || words[base + i - 1] != 0; i++) words[base + i] = words[base + i - delta]; } grammar(a) char a[]; { int i; for (i = 0; a[i] != 0; i++) words[i] = a[i] & 0177; subst("I ARE", "I AM"); subst("YOU AM", "YOU ARE"); subst("ME AM", "I AM"); for (i = 0; words[i] != 0; i++) a[i] = words[i] & 0177; }