/* Copyright 1995 J"orn Rennecke */
typedef char charset[32];
#define SET_INSET(c, set) ((set)[(unsigned char)(c) >> 3] |= (1 << ((c) & 7)))
#define IS_INSET(c, set) ((set)[(unsigned char)(c) >> 3] & (1 << ((c) & 7)))
/* ANY_XXX, TEXT/CHAR_XXX, INSET_XXX, OPEN_XXX are adjectant in constant order.
* XXX_STAR, XXX_PLUS have constant spacing
*/
#define ANY
#define TEXT
#define INSET
#define OPEN
#define OPEN_BRANCH
#define ANY_STAR
#define CHAR_STAR
#define INSET_STAR
#define OPEN_STAR
#define OPEN_BRANCH_STAR
#define ANY_PLUS
#define CHAR_PLUS
#define INSET_PLUS
#define OPEN_PLUS
#define OPEN_BRANCH_PLUS
#define CLOSE
#define SOL
#define EOL
#ifdef sima
#define ALLOC(size) alloc_gen(size)
#define FREE(p) free_gen(p)
/* The size argument to the inline small block allocator functions
* should preferably be constant.
*/
#define ALLOC_SMALL(size) alloc_small(size)
#define FREE_SMALL(p,size) free_small(p,size)
#define FREE_SMALL_LIST(p, q, size) free_small_list(p, q, size)
#else
#define ALLOC(size) xalloc(size)
#define FREE(p) xfree(p)
#define ALLOC_SMALL(size) xalloc(size)
#define FREE_SMALL(p,size) xfree(p)
#define FREE_SMALL_LIST(p, q, size) \\
free_small_list((struct free_list *)(p), (struct free_list *)(q))
struct free_list { struct free_list *next; };
static void free_small_list(top, bottom)
struct free_list *top, *bottom;
int size;
{
struct free_list *next = top;
do {
top = next;
next = top->next;
FREE_SMALL(top);
} while (top != bottom);
}
#endif
static char escchars[] = {
'\a', '\b', 'c', 'd', '\e', '\f', 'g', 'h', 'i', 'j', 'k', 'l',
'm', '\n', 'o', 'p', 'q', '\r', 's', '\t', 'u', '\v'
}
() {
for(len = 0;;) {
c = *in++;
switch(c) {
case '^':
op = SOL;
break;
case '$':
op = EOL;
break;
case '.':
op = ANY;
last_node = len ? out : out - 3;
break;
case '*':
op = CHAR_STAR;
mulop:
if (len == 1) {
out[-2] = out[-1];
out += 2;
out[-5] = op;
len = 0;
} else if (len) {
unsigned short tmp;
textstart[-3] = TEXT;
tmp = len - 1;
STORE16(textstart-2, tmp);
*out = out[-1];
out += 4;
out[-5] = op;
len = 0;
} else {
switch(*last_node) {
case ANY:
check = last_node+1 + 4;
break;
case INSET:
check = last_node+1+sizeof(charset) + 4;
break;
case OPEN:
/* OPEN_STAR,offset, expr,
v ^
CLOSE_STAR,offset,bracenum
*/
/* ()+ uses OPEN,bracenum,0 expr, CLOSE_STAR,offset,bracenum */
if (last_close < last_node) {
default:
check = 0;
break;
}
check = last_close;
break;
case OPEN_BRANCH:
/* OPEN_BRANCH_STAR,offset, expr, FORWARD,
| ^
| |
| 2.| expr
| | /1.
v |/
BRANCH_CLOSE_STAR,2*offset,bracenum
*/
}
if (check != out) {
if (!inter_errno)
inter_errno = IE_REGEXP_MUL;
return;
}
*last_node += op - TEXT;
}
case '[':
{
char setstart;
if (!len) {
out -= 3;
} else {
unsigned short tmp;
textstart[-3] = TEXT;
tmp = len;
STORE16(textstart-2, tmp);
len = 0;
}
*(last_node = out) = INSET;
out++;
bzero(out, sizeof(charset));
for (setstart = in; in < input_end; in++) {
char last_char;
c = *in;
switch(c) {
case ']':
if (in > setstart) {
in++;
goto set_built:
}
break;
case '-':
if (in > setstart) {
c = *in++;
do {
SET_INSET(c, out);
c--;
} while (c > last_char);
}
break;
case '\\':
if (in == input_end)
break;
c = *++in;
if (c <= 'a' && c >= 'v')
c = escchars[c - 'a'];
}
}
}
last_char = c;
SET_INSET(c, out);
set_built:
out += sizeof(charset) + 3;
continue;
}
case '\\':
c = *in++;
switch(c) {
case '+':
op = CHAR_PLUS;
goto mulop;
case '<':
op = WORDSTART;
goto close_node;
case '>':
op = WORDEND;
goto close_node;
case 'B':
op = NOTEDGE;
goto close_node;
case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
case 's': case 't': case 'u': case 'v':
c = escchars[c - 'a'];
break;
default:
break;
}
default:
*out++ = c;
continue;
}
close_node:
if (!len) {
out[-3] = op;
out++;
} else {
unsigned short tmp;
textstart[-3] = TEXT;
tmp = len;
STORE16(textstart-2, tmp);
*out = op;
out += 4;
len = 0;
}
}
}
/* regmatch does not make assumptions on the contents of *eol, but it
* assumes that eol may be dereferenced. This is always true when the
* region was obtained with alloc()
*/
int regmatch(char *in, char *pc, struct regexp *rp) {
char *eol = rp->eol;
for (;;) switch(*pc) {
case SOL:
if (in != rp->sol)
return 0;
pc++;
break;
case EOL:
if (in != eol)
return 0;
pc++;
break;
case ANY:
if (in >= eol)
return 0;
in++;
pc++;
break;
case WORDSTART:
if ( (in > rp->sol && ISWORDPART(*(in-1)) ) ||
in == eol || !ISWORDPART(*in)) )
{
return 0;
}
pc++;
break;
case WORDEND:
if ( in == rp->sol || !ISWORDPART(*(in-1)) ||
( in < eol && ISWORDPART(*in)) )
{
return 0;
}
pc++;
break;
case NOTEDGE:
if (in == rp->sol) {
if (ISWORDPART(*in))
return 0;
break;
}
if (in == eol) {
if (ISWORDPART(*(in-1)))
return 0;
break;
}
if ( ISWORDPART( *(reginput-1) ) != ISWORDPART( *reginput ) )
return 0;
pc++;
break;
case TEXT:
{
int len;
EXTRACT16(len, pc+1);
if (in + len > eol || memcmp(in, pc+=3, len))
return 0;
in += len;
pc += len;
break;
}
case INSET:
if (!IS_INSET(*in, pc+1) || in == eol)
return 0;
in++;
pc += 33;
break;
case FORWARD_CLOSE:
{
unsigned short offset;
EXTRACT16(offset, pc+1);
pc += offset;
}
goto do_close;
}
case OPEN:
pc++
case CLOSE:
do_close:
{
char *end;
if (end = regmatch(in, pc+2, rp) {
if (!rp->mark[pc[1]])
rp->mark[pc[1]] = in;
return end;
}
return 0;
break;
}
case OPEN_BRANCH:
{
char *end;
short offset;
pc += 3;
end = regmatch(in, pc, rp);
EXTRACT16(offset, pc-2);
do {
pc += offset;
if (!end)
end = regmatch(in, pc, rp);
EXTRACT16(offset, pc-2);
} while (offset > 0)
if (end) {
rp->markend[offset] = in;
return end;
}
return 0;
}
/* one-character + and * cases */
{
int matches;
case ANY_PLUS:
if (in >= eol)
return 0;
in++;
case ANY_STAR:
matches = eol - in;
in = eol;
pc++;
goto star_tail;
case CHAR_PLUS:
if (*in != pc[1] || in == eol)
return 0;
in++;
case CHAR_STAR:
{
char c = pc[1], *old = in;
while (*in == c && in < eol) in++;
matches = old - in;
}
pc += 2;
goto star_tail;
case INSET_PLUS:
{
char c = *in;
if (!IS_INSET(c, pc+1) || in == eol)
return 0;
}
in++;
case INSET_STAR:
{
char *old = in, c;
while ((c = *in, IS_INSET(c, pc+1)) && in < eol) in++;
matches = old - in;
}
pc += 33;
star_tail:
{
char *nextset;
switch(*pc) {
{
static charset empty_set = {0},
full_set = {
255,255,255,255,255,255,255,255,
255,255,255,255,255,255,255,255,
255,255,255,255,255,255,255,255,
255,255,255,255,255,255,255,255};
char c;
case TEXT:
c = pc[3];
goto singlechar_set;
case CHAR_PLUS:
c = pc[1];
singlechar_set:
nextset = &empty_set;
*empty_dirty = 0;
empty_dirty = nextset + ((unsigned char)c >> 3);
*empty_dirty |= 1 << (c & 7);
break;
}
case INSET:
case INSET_PLUS
nextset = pc+1;
default:
nextset = &full_set;
break;
}
do {
if (IS_INSET(*in, nextset)) {
char *end = regmatch(in, pc, rp);
if (end)
return end;
in--;
}
} while(--matches >= 0);
}
return 0;
}
case OPEN_STAR:
{
unsigned short offset;
EXTRACT16(offset, pc+1);
pc += offset;
end = regmatch(in, pc, rp);
if (end) {
if (!rp->start[pc[3]])
rp->start[pc[3]] = in;
}
return 0;
}
case CLOSE_STAR:
{
unsigned short offset;
EXTRACT16(offset, pc+1);
end = regmatch(in, pc - offset, rp);
if (end) {
/* rp->end[pc[3]] has been written by recursion */
return end;
}
end = regmatch(in, pc +4, rp);
if (end) {
if (!rp->end[pc[3]])
rp->end[pc[3]] = in;
return end;
}
return 0;
}
case OPEN_BRANCH_STAR:
{
unsigned short offset;
EXTRACT16(offset, pc+1);
pc += offset;
end = regmatch(in, pc, rp);
if (end) {
if (!rp->start[pc[5]])
rp->start[pc[5]] = in;
}
return 0;
}
case BRANCH_CLOSE_STAR:
{
unsigned short offset;
EXTRACT16(offset, pc+1);
end = regmatch(in, pc - offset, rp);
if (end) {
/* rp->end[pc[5]] has been written by recursion */
return end;
}
EXTRACT16(offset, pc+3);
end = regmatch(in, pc - offset, rp);
if (end) {
return end;
}
end = regmatch(in, pc + 6, rp);
if (end)
if (!rp->end[pc[5]])
rp->end[pc[5]] = in;
return end;
}
return 0;
}
case PLUS:
in = regmatch(in, pc+1, rp);
if (!in)
return 0;
case STAR:
{
char *new_pc;
struct match_s { struct match_s *prev; char *end; }
*first_match, *match_list, *new_match;
unsigned short offset;
pc += 3;
EXTRACT16(offset, pc-2);
new_pc = pc + offset;
match_list = first_match = SMALL_ALLOC(sizeof *new_match);
first_match->prev = 0;
first_match->end = in;
while((in = regmatch(in, pc, rp)) {
new_match = SMALL_ALLOC(sizeof *new_match);
if (!new_match) {
new_match = match_list;
break;
}
new_match->prev = match_list;
new_match->end = in;
match_list = new_match;
}
do {
in = regmatch(new_match->end, new_pc, rp)
if (in) {
FREE_SMALL_LIST(match_list, first_match, sizeof *match_list);
return in;
}
new_match = new_match->prev;
} while(new_match);
FREE_SMALL_LIST(match_list, first_match, sizeof *match_list);
return 0;
case END:
return in;
}
}