/*
Chops a flat OIF from stdin up into shorter lines on stdout, or vice
versa. Probably not at all wise to apply it to bigram-compressed OIF.
Takes two args:
-l <linesize> tells it to break up into lines of some
size other than the default of 80.
-d tells it to dechop -- i.e. paste a chopped file
back together.
NOTE: The dechop side assumes that every line that starts with a space
is to be pasted on to the end of the previous line. Since we are assuming
OIF input, where no line starts with a space, this is fine as long as
you don't bugger up a chopped up file, OK?
*/
#include <stdio.h>
#include <ctype.h>
#include <string.h>
/* My libraries want this. Your mileage may vary. */
#define index(a,b) strchr((a),(b))
char *malloc();
char *getsep();
static int len = 80;
static char *line;
struct {
char *typ;
char *lst;
} seplst[] = {
{"lst",";"}, /* Break lists ONLY at ;s */
{(char *)0, " \t;"} /* Everything else at these */
};
static char *badfile = "Bad OIF file at line %d\n";
static char *badbreak = "Unable to do good line break at line %d\n";
main(ac,av)
int ac;
char *av[];
{
int dechop = 0;
int onearg = 0;
int currline = 0;
int i;
int ch,ch2,ch3;
char *p,*lastsep,*seps;
FILE *infile = stdin;
FILE *outfile = stdout;
while(--ac){
if(av[ac][0] != '-'){
onearg++;
continue;
}
switch(av[ac][1]){
case 'l':
if(!onearg)
exit(usage());
onearg = 0;
len = atoi(av[ac+1]);
break;
case 'd':
dechop = 1;
break;
case 'i':
if(!onearg)
exit(usage());
onearg = 0;
infile = fopen(av[ac+1],"r");
if(infile == (FILE *)0){
fputs("Could not open input file.\n",stderr);
exit(usage());
}
break;
case 'o':
if(!onearg)
exit(usage());
onearg = 0;
outfile = fopen(av[ac+1],"w");
if(outfile == (FILE *)0){
fputs("Could not open output file.\n",stderr);
exit(usage());
}
break;
default:
exit(usage());
break;
}
}
if(dechop){
/* Dechop infile to outfile */
while((ch = fgetc(infile)) != -1){
if(ch == '\n'){
/* Check next line for continuation */
if((ch = fgetc(infile)) == -1){
fputc('\n',outfile);
exit(0);
}
/* If it starts with a space, it's continued */
/* This is, after all, OIF. */
if(ch != ' '){
fputc('\n',outfile);
fputc(ch,outfile);
}
} else {
fputc(ch,outfile);
}
}
} else {
/* Chop infile to outfile */
if((line = malloc(len+3)) == (char *)0){
fputs("Could not allocate line buffer. Too long?\n",
stderr);
exit(1);
}
while(ch != -1){
p = line;
lastsep = (char *)0;
i = 0;
/* Read first word of OIF line */
currline++;
ch = fgetc(infile);
while(!isspace(ch) && ch != -1 && i < len){
*p++ = ch;
i++;
ch = fgetc(infile);
}
if(ch == '\n' || ch == -1){
if(i){
*p++ = '\n';
*p = '\0';
fputs(line,outfile);
}
i = 0;
continue;
}
if(i >= len){
fprintf(stderr,badfile,currline);
exit(1);
}
/* Look up separators */
*p = '\0';
seps = getsep(line);
if(index(seps,ch))
lastsep = p;
*p++ = ch;
/* Do the rest of the line */
ch = fgetc(infile);
while(ch != '\n' && ch != -1){
/* If we have space on the line... */
if(i < len){
if(index(seps,ch))
lastsep = p;
*p++ = ch;
i++;
} else {
/* No more room. Attempt linebreak */
if(lastsep){
*p = '\0';
ch2 = *lastsep;
ch3 = lastsep[1];
*lastsep = '\n';
lastsep[1] = '\0';
fputs(line,outfile);
p = line;
*p++ = ' ';
*p++ = ch2;
*p++ = ch3;
strcpy(p,lastsep+2);
i = strlen(line);
p = line+i;
*p++ = ch;
} else {
/* No seperators! Panic! */
fprintf(stderr,badbreak,currline);
*p++ = '\n';
*p = '\0';
fputs(line,outfile);
fflush(stdout);
p = line;
*p++ = ' ';
*p++ = ch;
i = 1;
}
lastsep = (char *)0;
}
ch = fgetc(infile);
}
/* Put out remaining in the buffer */
if(i){
*p++ = '\n';
*p++ = '\0';
fputs(line,outfile);
}
}
}
}
/*
Returns the string of separators for the given attribute type
*/
char *
getsep(typ)
char *typ;
{
int i;
for(i = 0; seplst[i].typ != (char *)0;i++)
if(strcmp(typ,seplst[i].typ) == 0)
break;
return(seplst[i].lst);
}
usage()
{
fputs("usage: bonk [-i infile] [-o outfile] [-l linesize] [-d]\n",
stderr);
return(1);
}