/* ************************************************************************
* File: webster.c Part of tbaMUD *
* Usage: Use an online dictionary via tell m-w <word>. *
* *
* Based on the Circle 3.0 syntax checker and wld2html programs. *
************************************************************************ */
#define log(msg) fprintf(stderr, "%s\n", msg)
#include "conf.h"
#include "sysdep.h"
#define MEM_USE 10000
char buf[MEM_USE];
int get_line(FILE * fl, char *buf);
void skip_spaces(char **string);
void parse_webster_html(char *arg);
int main(int argc, char **argv)
{
int pid = 0;
if (argc != 3) {
return 0; /* no word/pid given */
}
pid = atoi(argv[2]);
snprintf(buf, sizeof(buf),
"lynx -accept_all_cookies -source http://www.thefreedictionary.com/%s"
" >webster.html", argv[1]);
system(buf);
parse_webster_html(argv[1]);
if (pid)
kill(pid, SIGUSR2);
return (0);
}
void parse_webster_html(char *arg) {
FILE *infile, *outfile;
char scanbuf[MEM_USE], outline[MEM_USE], *p, *q;
outfile = fopen("websterinfo", "w");
if (!outfile)
exit(1);
infile = fopen("webster.html", "r");
if (!infile) {
fprintf(outfile, "A bug has occured in webster. (no webster.html) Please notify Welcor.");
fclose(outfile);
return;
}
unlink("webster.html"); /* We can still read */
for ( ; get_line(infile, buf)!=0; ) {
if (strncmp(buf, "<script>write_ads(AdsNum, 0, 1)</script>", 40) != 0)
continue; // read until we hit the line with results in it.
p = buf+40;
if (strncmp(p, "<br>", 4) == 0)
{
fprintf(outfile, "That word could not be found.\n");
goto end;
}
else if (strncmp(p, "<div ", 5) == 0) // definition is here, all in one line.
{
while (strncmp(p, "ds-list", 7)) //seek to the definition
p++;
strncpy(scanbuf, p, sizeof(scanbuf)); // strtok on a copy.
p = strtok(scanbuf, ">"); // chop the line at the end of tags: <br><b>word</b> becomes "<br" "<b" "word</b"
p = strtok(NULL, ">"); // skip the rest of this tag.
fprintf(outfile, "Info on: %s\n\n", arg);
while (1)
{
q = outline;
while (*p != '<')
{
assert(p < scanbuf+sizeof(scanbuf));
*q++ = *p++;
}
if (!strncmp(p, "<br", 3) || !strncmp(p, "<p", 2) || !strncmp(p, "<div class=\"ds-list\"", 23) || !strncmp(p, "<div class=\"sds-list\"", 24))
*q++ = '\n';
// if it's not a <br> tag or a <div class="sds-list"> or <div class="ds-list"> tag, ignore it.
*q++='\0';
fprintf(outfile, "%s", outline);
if (!strncmp(p, "</table", 7))
goto end;
p = strtok(NULL, ">");
}
}
else if (strncmp(p, "<div>", 5) == 0) // not found, but suggestions are ample:
{
strncpy(scanbuf, p, sizeof(scanbuf)); // strtok on a copy.
p = strtok(scanbuf, ">"); // chop the line at the end of tags: <br><b>word</b> becomes "<br>" "<b>" "word</b>"
p = strtok(NULL, ">"); // skip the rest of this tag.
while (1)
{
q = outline;
while (*p != '<')
*q++ = *p++;
if (!strncmp(p, "<td ", 4))
*q++ = '\n';
// if it's not a <td> tag, ignore it.
*q++='\0';
fprintf(outfile, "%s", outline);
if (!strncmp(p, "</table", 7))
goto end;
p = strtok(NULL, ">");
}
}
else
{
// weird.. one of the above should be correct.
fprintf(outfile, "It would appear that the free online dictionary has changed their format.\n"
"Sorry, but you might need a webrowser instead.\n\n"
"See http://www.thefreedictionary.com/%s", arg);
goto end;
}
}
end:
fclose(infile);
fprintf(outfile, "~");
fclose(outfile);
}
/* get_line reads the next non-blank line off of the input stream.
* The newline character is removed from the input.
*/
int get_line(FILE * fl, char *buf)
{
char temp[MEM_USE];
do {
fgets(temp, MEM_USE, fl);
if (*temp)
temp[strlen(temp) - 1] = '\0';
} while (!feof(fl) && !*temp);
if (feof(fl))
return (0);
else {
strcpy(buf, temp);
return (1);
}
}
/*
* Function to skip over the leading spaces of a string.
*/
void skip_spaces(char **string)
{
for (; **string && isspace(**string); (*string)++);
}