/* Chops a flat OIF from stdin up into shorter lines on stdout, or vice versa. Probably not at all wise to apply it to bigram-compressed OIF. Takes two args: -l <linesize> tells it to break up into lines of some size other than the default of 80. -d tells it to dechop -- i.e. paste a chopped file back together. NOTE: The dechop side assumes that every line that starts with a space is to be pasted on to the end of the previous line. Since we are assuming OIF input, where no line starts with a space, this is fine as long as you don't bugger up a chopped up file, OK? */ #include <stdio.h> #include <ctype.h> #include <string.h> #include <stdlib.h> /* My libraries want this. Your mileage may vary. */ #define index(a,b) strchr((a),(b)) char *getsep (); static int len = 80; static char *line; struct { char *typ; char *lst; } seplst[] = { { "lst", ";"}, /* Break lists ONLY at ;s */ { (char *) 0, " \t;"} /* Everything else at these */ }; static char *badfile = "Bad OIF file at line %d\n"; static char *badbreak = "Unable to do good line break at line %d\n"; int usage (); int main (int ac, char *av[]) { int dechop = 0; int onearg = 0; int currline = 0; int i; int ch = 0, ch2, ch3; char *p, *lastsep, *seps; FILE *infile = stdin; FILE *outfile = stdout; while (--ac) { if (av[ac][0] != '-') { onearg++; continue; } switch (av[ac][1]) { case 'l': if (!onearg) exit (usage ()); onearg = 0; len = atoi (av[ac + 1]); break; case 'd': dechop = 1; break; case 'i': if (!onearg) exit (usage ()); onearg = 0; infile = fopen (av[ac + 1], "rb"); if (infile == (FILE *) 0) { fputs ("Could not open input file.\n", stderr); exit (usage ()); } break; case 'o': if (!onearg) exit (usage ()); onearg = 0; outfile = fopen (av[ac + 1], "wb"); if (outfile == (FILE *) 0) { fputs ("Could not open output file.\n", stderr); exit (usage ()); } break; default: exit (usage ()); break; } } if (dechop) { /* Dechop infile to outfile */ while ((ch = fgetc (infile)) != -1) { if (ch == '\n') { /* Check next line for continuation */ if ((ch = fgetc (infile)) == -1) { fputc ('\n', outfile); exit (0); } /* If it starts with a space, it's continued */ /* This is, after all, OIF. */ if (ch != ' ') { fputc ('\n', outfile); fputc (ch, outfile); } } else { fputc (ch, outfile); } } } else { /* Chop infile to outfile */ if ((line = malloc (len + 3)) == (char *) 0) { fputs ("Could not allocate line buffer. Too long?\n", stderr); exit (1); } while (ch != -1) { p = line; lastsep = (char *) 0; i = 0; /* Read first word of OIF line */ currline++; ch = fgetc (infile); while (!isspace (ch) && ch != -1 && i < len) { *p++ = ch; i++; ch = fgetc (infile); } if (ch == '\n' || ch == -1) { if (i) { *p++ = '\n'; *p = '\0'; fputs (line, outfile); } i = 0; continue; } if (i >= len) { fprintf (stderr, badfile, currline); exit (1); } /* Look up separators */ *p = '\0'; seps = getsep (line); if (index (seps, ch)) lastsep = p; *p++ = ch; /* Do the rest of the line */ ch = fgetc (infile); while (ch != '\n' && ch != -1) { /* If we have space on the line... */ if (i < len) { if (index (seps, ch)) lastsep = p; *p++ = ch; i++; } else { /* No more room. Attempt linebreak */ if (lastsep) { *p = '\0'; ch2 = *lastsep; ch3 = lastsep[1]; *lastsep = '\n'; lastsep[1] = '\0'; fputs (line, outfile); p = line; *p++ = ' '; *p++ = ch2; *p++ = ch3; strcpy (p, lastsep + 2); i = strlen (line); p = line + i; *p++ = ch; } else { /* No seperators! Panic! */ fprintf (stderr, badbreak, currline); *p++ = '\n'; *p = '\0'; fputs (line, outfile); fflush (stdout); p = line; *p++ = ' '; *p++ = ch; i = 1; } lastsep = (char *) 0; } ch = fgetc (infile); } /* Put out remaining in the buffer */ if (i) { *p++ = '\n'; *p++ = '\0'; fputs (line, outfile); } } } return 0; } /* Returns the string of separators for the given attribute type */ char *getsep (typ) char *typ; { int i; for (i = 0; seplst[i].typ != (char *) 0; i++) if (strcmp (typ, seplst[i].typ) == 0) break; return (seplst[i].lst); } int usage () { fputs ("usage: bonk [-i infile] [-o outfile] [-l linesize] [-d]\n", stderr); return (1); }