From amolitor@spamalot.ecn.uoknor.edu Sun Aug 18 12:01:21 1991
Received: by hussar.dco.dec.com (5.65/ULTRIX-fma-071791);
id AA03103; Sun, 18 Aug 91 12:01:17 -0400
Received: by spamalot.ecn.uoknor.edu (5.64+/1.34)
id AA08293; Fri, 16 Aug 91 16:27:56 -0500
Date: Fri, 16 Aug 91 16:27:56 -0500
From: amolitor@spamalot.ecn.uoknor.edu (Andrew Molitor)
Message-Id: <9108162127.AA08293@spamalot.ecn.uoknor.edu>
To: mjr@decuac.DEC.COM
Subject: Something for umud/MISC?
Status: R
/*
Chops a flat OIF from stdin up into shorter lines on stdout, or vice
versa. Probably not at all wise to apply it to bigram-compressed OIF.
Takes two args:
-l <linesize> tells it to break up into lines of some
size other than the default of 80.
-d tells it to dechop -- i.e. paste a chopped file
back together.
NOTE: The dechop side assumes that every line that starts with a space
is to be pasted on to the end of the previous line. Since we are assuming
OIF input, where no line starts with a space, this is fine as long as
you don't bugger up a chopped up file, OK?
*/
#include <stdio.h>
#include <ctype.h>
char *malloc();
char *getsep();
static int len = 80;
static char *line;
struct {
char *typ;
char *lst;
} seplst[] = {
{"lst",";"}, /* Break lists ONLY at ;s */
{(char *)0, " \t;"} /* Everything else at these */
};
static char *badfile = "Bad OIF file at line %d\n";
static char *badbreak = "Unable to do good line break at line %d\n";
main(ac,av)
int ac;
char *av[];
{
int dechop = 0;
int onearg = 0;
int currline = 0;
int i;
int ch,ch2;
char *p,*lastsep,*seps;
while(--ac){
if(av[ac][0] != '-'){
onearg++;
continue;
}
switch(av[ac][1]){
case 'l':
if(!onearg)
exit(usage());
len = atoi(av[ac+1]);
break;
case 'd':
dechop = 1;
break;
default:
exit(usage());
break;
}
}
if(dechop){
/* Dechop stdin to stdout */
while((ch = getchar()) != -1){
if(ch == '\n'){
/* Check next line for continuation */
if((ch = getchar()) == -1){
putchar('\n');
exit(0);
}
/* If it starts with a space, it's continued */
/* This is, after all, OIF. */
if(ch != ' '){
putchar('\n');
putchar(ch);
}
} else {
putchar(ch);
}
}
} else {
/* Chop stdin to sdout */
if((line = malloc(len+1)) == (char *)0){
fputs("Could not allocate line buffer. Too long?",
stderr);
exit(1);
}
while(ch != -1){
p = line;
lastsep = (char *)0;
i = 0;
/* Read first word of OIF line */
currline++;
ch = getchar();
while(!isspace(ch) && ch != -1 && i < len){
*p++ = ch;
i++;
ch = getchar();
}
if(ch == '\n' || ch == -1){
if(i){
*p = '\0';
puts(line);
}
i = 0;
continue;
}
if(i >= len){
fprintf(stderr,badfile,currline);
exit(1);
}
/* Look up separators */
*p = '\0';
seps = getsep(line);
if(index(seps,ch))
lastsep = p;
*p++ = ch;
/* Do the rest of the line */
ch = getchar();
while(ch != '\n' && ch != -1){
/* If we have space on the line... */
if(i < len){
if(index(seps,ch))
lastsep = p;
*p++ = ch;
i++;
} else {
/* No more room. Attempt linebreak */
if(lastsep){
*p++ = '\0';
ch2 = *lastsep;
*lastsep = '\0';
puts(line);
p = line;
*p++ = ' ';
*p++ = ch2;
strcpy(p,lastsep+1);
i = strlen(line);
p = line+i;
*p++ = ch;
} else {
/* No seperators! Panic! */
fprintf(stderr,badbreak,currline);
*p = '\0';
puts(line);
fflush(stdout);
p = line;
*p++ = ' ';
*p++ = ch;
i = 1;
}
lastsep = (char *)0;
}
ch = getchar();
}
/* Put out remaining in the buffer */
if(i){
*p++ = '\0';
puts(line);
}
}
}
}
/*
Returns the string of separators for the given attribute type
*/
char *
getsep(typ)
char *typ;
{
int i;
for(i = 0; seplst[i].typ != (char *)0;i++)
if(strcmp(typ,seplst[i].typ) == 0)
break;
return(seplst[i].lst);
}
usage()
{
fputs("usage: bonk [-l linesize] [-d]",stderr);
return(1);
}