/
umud/DOC/
umud/DOC/examples/
umud/DOC/internals/
umud/DOC/wizard/
umud/MISC/
umud/MISC/dbchk/
umud/RWHO/rwhod/
From amolitor@spamalot.ecn.uoknor.edu Sun Aug 18 12:01:21 1991
Received: by hussar.dco.dec.com (5.65/ULTRIX-fma-071791);
	id AA03103; Sun, 18 Aug 91 12:01:17 -0400
Received: by spamalot.ecn.uoknor.edu (5.64+/1.34)
	id AA08293; Fri, 16 Aug 91 16:27:56 -0500
Date: Fri, 16 Aug 91 16:27:56 -0500
From: amolitor@spamalot.ecn.uoknor.edu (Andrew Molitor)
Message-Id: <9108162127.AA08293@spamalot.ecn.uoknor.edu>
To: mjr@decuac.DEC.COM
Subject: Something for umud/MISC?
Status: R

/*
	Chops a flat OIF from stdin up into shorter lines on stdout, or vice
versa. Probably not at all wise to apply it to bigram-compressed OIF.

	Takes two args:
		-l <linesize> tells it to break up into lines of some
			size other than the default of 80.
		-d tells it to dechop -- i.e. paste a chopped file
			back together.

	NOTE: The dechop side assumes that every line that starts with a space
is to be pasted on to the end of the previous line. Since we are assuming
OIF input, where no line starts with a space, this is fine as long as
you don't bugger up a chopped up file, OK?


*/

#include	<stdio.h>
#include	<ctype.h>

char	*malloc();
char	*getsep();

static	int	len = 80;
static	char	*line;

struct {
	char	*typ;
	char	*lst;
} seplst[] = {
	{"lst",";"},		/* Break lists ONLY at ;s */
	{(char *)0, " \t;"}	/* Everything else at these */
};

static	char	*badfile = "Bad OIF file at line %d\n";
static	char	*badbreak = "Unable to do good line break at line %d\n";

main(ac,av)
int	ac;
char	*av[];
{
	int	dechop = 0;
	int	onearg = 0;
	int	currline = 0;
	int	i;
	int	ch,ch2;
	char	*p,*lastsep,*seps;

	while(--ac){
		if(av[ac][0] != '-'){
			onearg++;
			continue;
		}
		switch(av[ac][1]){
		case 'l':
			if(!onearg)
				exit(usage());
			len = atoi(av[ac+1]);
			break;
		case 'd':
			dechop = 1;
			break;
		default:
			exit(usage());
			break;
		}
	}

	if(dechop){
		/* Dechop stdin to stdout */

		while((ch = getchar()) != -1){
			if(ch == '\n'){
				/* Check next line for continuation */

				if((ch = getchar()) == -1){
					putchar('\n');
					exit(0);
				}
				/* If it starts with a space, it's continued */
				/* This is, after all, OIF. */

				if(ch != ' '){
					putchar('\n');
					putchar(ch);
				}
			} else {
				putchar(ch);
			}
		}
	} else {
		/* Chop stdin to sdout */

		if((line = malloc(len+1)) == (char *)0){
			fputs("Could not allocate line buffer. Too long?",
				stderr);
			exit(1);
		}

		while(ch != -1){
			p = line;
			lastsep = (char *)0;
			i = 0;

			/* Read first word of OIF line */

			currline++;
			ch = getchar();
			while(!isspace(ch) && ch != -1 && i < len){
				*p++ = ch;
				i++;
				ch = getchar();
			}
			if(ch == '\n' || ch == -1){
				if(i){
					*p = '\0';
					puts(line);
				}
				i = 0;
				continue;
			}
			if(i >= len){
				fprintf(stderr,badfile,currline);
				exit(1);
			}

			/* Look up separators */

			*p = '\0';
			seps = getsep(line);
			if(index(seps,ch))
				lastsep = p;
			*p++ = ch;

			/* Do the rest of the line */

			ch = getchar();
			while(ch != '\n' && ch != -1){

				/* If we have space on the line... */

				if(i < len){
					if(index(seps,ch))
						lastsep = p;
					*p++ = ch;
					i++;
				} else {
					/* No more room. Attempt linebreak */

					if(lastsep){
						*p++ = '\0';
						ch2 = *lastsep;
						*lastsep = '\0';
						puts(line);
						p = line;
						*p++ = ' ';
						*p++ = ch2;
						strcpy(p,lastsep+1);
						i = strlen(line);
						p = line+i;
						*p++ = ch;
					} else {
						/* No seperators! Panic! */

						fprintf(stderr,badbreak,currline);
						*p = '\0';
						puts(line);
						fflush(stdout);
						p = line;
						*p++ = ' ';
						*p++ = ch;
						i = 1;
					}
					lastsep = (char *)0;
				}
				ch = getchar();
			}

			/* Put out remaining in the buffer */

			if(i){
				*p++ = '\0';
				puts(line);
			}
		}
	}
}

/*
	Returns the string of separators for the given attribute type
*/

char	*
getsep(typ)
char	*typ;
{
	int	i;

	for(i = 0; seplst[i].typ != (char *)0;i++)
		if(strcmp(typ,seplst[i].typ) == 0)
			break;

	return(seplst[i].lst);
}
usage()
{
	fputs("usage: bonk [-l linesize] [-d]",stderr);
	return(1);
}