/*
Chops a flat OIF from stdin up into shorter lines on stdout, or vice
versa. Probably not at all wise to apply it to bigram-compressed OIF.
Takes two args:
-l <linesize> tells it to break up into lines of some
size other than the default of 80.
-d tells it to dechop -- i.e. paste a chopped file
back together.
NOTE: The dechop side assumes that every line that starts with a space
is to be pasted on to the end of the previous line. Since we are assuming
OIF input, where no line starts with a space, this is fine as long as
you don't bugger up a chopped up file, OK?
*/
#include <stdio.h>
#include <ctype.h>
#include <string.h>
#include <stdlib.h>
/* My libraries want this. Your mileage may vary. */
#define index(a,b) strchr((a),(b))
char *getsep ();
static int len = 80;
static char *line;
struct {
char *typ;
char *lst;
} seplst[] = {
{
"lst", ";"}, /* Break lists ONLY at ;s */
{
(char *) 0, " \t;"} /* Everything else at these */
};
static char *badfile = "Bad OIF file at line %d\n";
static char *badbreak = "Unable to do good line break at line %d\n";
int usage ();
int main (int ac, char *av[])
{
int dechop = 0;
int onearg = 0;
int currline = 0;
int i;
int ch = 0, ch2, ch3;
char *p, *lastsep, *seps;
FILE *infile = stdin;
FILE *outfile = stdout;
while (--ac) {
if (av[ac][0] != '-') {
onearg++;
continue;
}
switch (av[ac][1]) {
case 'l':
if (!onearg)
exit (usage ());
onearg = 0;
len = atoi (av[ac + 1]);
break;
case 'd':
dechop = 1;
break;
case 'i':
if (!onearg)
exit (usage ());
onearg = 0;
infile = fopen (av[ac + 1], "rb");
if (infile == (FILE *) 0) {
fputs ("Could not open input file.\n", stderr);
exit (usage ());
}
break;
case 'o':
if (!onearg)
exit (usage ());
onearg = 0;
outfile = fopen (av[ac + 1], "wb");
if (outfile == (FILE *) 0) {
fputs ("Could not open output file.\n", stderr);
exit (usage ());
}
break;
default:
exit (usage ());
break;
}
}
if (dechop) {
/* Dechop infile to outfile */
while ((ch = fgetc (infile)) != -1) {
if (ch == '\n') {
/* Check next line for continuation */
if ((ch = fgetc (infile)) == -1) {
fputc ('\n', outfile);
exit (0);
}
/* If it starts with a space, it's continued */
/* This is, after all, OIF. */
if (ch != ' ') {
fputc ('\n', outfile);
fputc (ch, outfile);
}
} else {
fputc (ch, outfile);
}
}
} else {
/* Chop infile to outfile */
if ((line = malloc (len + 3)) == (char *) 0) {
fputs ("Could not allocate line buffer. Too long?\n", stderr);
exit (1);
}
while (ch != -1) {
p = line;
lastsep = (char *) 0;
i = 0;
/* Read first word of OIF line */
currline++;
ch = fgetc (infile);
while (!isspace (ch) && ch != -1 && i < len) {
*p++ = ch;
i++;
ch = fgetc (infile);
}
if (ch == '\n' || ch == -1) {
if (i) {
*p++ = '\n';
*p = '\0';
fputs (line, outfile);
}
i = 0;
continue;
}
if (i >= len) {
fprintf (stderr, badfile, currline);
exit (1);
}
/* Look up separators */
*p = '\0';
seps = getsep (line);
if (index (seps, ch))
lastsep = p;
*p++ = ch;
/* Do the rest of the line */
ch = fgetc (infile);
while (ch != '\n' && ch != -1) {
/* If we have space on the line... */
if (i < len) {
if (index (seps, ch))
lastsep = p;
*p++ = ch;
i++;
} else {
/* No more room. Attempt linebreak */
if (lastsep) {
*p = '\0';
ch2 = *lastsep;
ch3 = lastsep[1];
*lastsep = '\n';
lastsep[1] = '\0';
fputs (line, outfile);
p = line;
*p++ = ' ';
*p++ = ch2;
*p++ = ch3;
strcpy (p, lastsep + 2);
i = strlen (line);
p = line + i;
*p++ = ch;
} else {
/* No seperators! Panic! */
fprintf (stderr, badbreak, currline);
*p++ = '\n';
*p = '\0';
fputs (line, outfile);
fflush (stdout);
p = line;
*p++ = ' ';
*p++ = ch;
i = 1;
}
lastsep = (char *) 0;
}
ch = fgetc (infile);
}
/* Put out remaining in the buffer */
if (i) {
*p++ = '\n';
*p++ = '\0';
fputs (line, outfile);
}
}
}
return 0;
}
/*
Returns the string of separators for the given attribute type
*/
char *getsep (typ)
char *typ;
{
int i;
for (i = 0; seplst[i].typ != (char *) 0; i++)
if (strcmp (typ, seplst[i].typ) == 0)
break;
return (seplst[i].lst);
}
int usage ()
{
fputs ("usage: bonk [-i infile] [-o outfile] [-l linesize] [-d]\n", stderr);
return (1);
}