From amolitor@spamalot.ecn.uoknor.edu Sun Aug 18 12:01:21 1991 Received: by hussar.dco.dec.com (5.65/ULTRIX-fma-071791); id AA03103; Sun, 18 Aug 91 12:01:17 -0400 Received: by spamalot.ecn.uoknor.edu (5.64+/1.34) id AA08293; Fri, 16 Aug 91 16:27:56 -0500 Date: Fri, 16 Aug 91 16:27:56 -0500 From: amolitor@spamalot.ecn.uoknor.edu (Andrew Molitor) Message-Id: <9108162127.AA08293@spamalot.ecn.uoknor.edu> To: mjr@decuac.DEC.COM Subject: Something for umud/MISC? Status: R /* Chops a flat OIF from stdin up into shorter lines on stdout, or vice versa. Probably not at all wise to apply it to bigram-compressed OIF. Takes two args: -l <linesize> tells it to break up into lines of some size other than the default of 80. -d tells it to dechop -- i.e. paste a chopped file back together. NOTE: The dechop side assumes that every line that starts with a space is to be pasted on to the end of the previous line. Since we are assuming OIF input, where no line starts with a space, this is fine as long as you don't bugger up a chopped up file, OK? */ #include <stdio.h> #include <ctype.h> char *malloc(); char *getsep(); static int len = 80; static char *line; struct { char *typ; char *lst; } seplst[] = { {"lst",";"}, /* Break lists ONLY at ;s */ {(char *)0, " \t;"} /* Everything else at these */ }; static char *badfile = "Bad OIF file at line %d\n"; static char *badbreak = "Unable to do good line break at line %d\n"; main(ac,av) int ac; char *av[]; { int dechop = 0; int onearg = 0; int currline = 0; int i; int ch,ch2; char *p,*lastsep,*seps; while(--ac){ if(av[ac][0] != '-'){ onearg++; continue; } switch(av[ac][1]){ case 'l': if(!onearg) exit(usage()); len = atoi(av[ac+1]); break; case 'd': dechop = 1; break; default: exit(usage()); break; } } if(dechop){ /* Dechop stdin to stdout */ while((ch = getchar()) != -1){ if(ch == '\n'){ /* Check next line for continuation */ if((ch = getchar()) == -1){ putchar('\n'); exit(0); } /* If it starts with a space, it's continued */ /* This is, after all, OIF. */ if(ch != ' '){ putchar('\n'); putchar(ch); } } else { putchar(ch); } } } else { /* Chop stdin to sdout */ if((line = malloc(len+1)) == (char *)0){ fputs("Could not allocate line buffer. Too long?", stderr); exit(1); } while(ch != -1){ p = line; lastsep = (char *)0; i = 0; /* Read first word of OIF line */ currline++; ch = getchar(); while(!isspace(ch) && ch != -1 && i < len){ *p++ = ch; i++; ch = getchar(); } if(ch == '\n' || ch == -1){ if(i){ *p = '\0'; puts(line); } i = 0; continue; } if(i >= len){ fprintf(stderr,badfile,currline); exit(1); } /* Look up separators */ *p = '\0'; seps = getsep(line); if(index(seps,ch)) lastsep = p; *p++ = ch; /* Do the rest of the line */ ch = getchar(); while(ch != '\n' && ch != -1){ /* If we have space on the line... */ if(i < len){ if(index(seps,ch)) lastsep = p; *p++ = ch; i++; } else { /* No more room. Attempt linebreak */ if(lastsep){ *p++ = '\0'; ch2 = *lastsep; *lastsep = '\0'; puts(line); p = line; *p++ = ' '; *p++ = ch2; strcpy(p,lastsep+1); i = strlen(line); p = line+i; *p++ = ch; } else { /* No seperators! Panic! */ fprintf(stderr,badbreak,currline); *p = '\0'; puts(line); fflush(stdout); p = line; *p++ = ' '; *p++ = ch; i = 1; } lastsep = (char *)0; } ch = getchar(); } /* Put out remaining in the buffer */ if(i){ *p++ = '\0'; puts(line); } } } } /* Returns the string of separators for the given attribute type */ char * getsep(typ) char *typ; { int i; for(i = 0; seplst[i].typ != (char *)0;i++) if(strcmp(typ,seplst[i].typ) == 0) break; return(seplst[i].lst); } usage() { fputs("usage: bonk [-l linesize] [-d]",stderr); return(1); }