/* Primitives Package */
#include "copyright.h"
#include "config.h"
#include <sys/types.h>
#include <stdio.h>
#include <time.h>
#include <ctype.h>
#ifdef WIN32
# define __STDC__ 1
# include "./pcre.h"
#else
# include "pcre/pcre.h"
#endif
#include "db.h"
#include "tune.h"
#include "inst.h"
#include "externs.h"
#include "match.h"
#include "interface.h"
#include "params.h"
#include "fbstrings.h"
#include "interp.h"
#define MUF_RE_CACHE_ITEMS 64
static struct inst *oper1, *oper2, *oper3, *oper4;
static char buf[BUFFER_LEN];
typedef struct
{
struct shared_string* pattern;
int flags;
pcre* re;
}
muf_re;
static muf_re muf_re_cache[MUF_RE_CACHE_ITEMS];
muf_re* muf_re_get(struct shared_string* pattern, int flags, const char** errmsg)
{
int idx = (hash(DoNullInd(pattern), MUF_RE_CACHE_ITEMS) + flags) % MUF_RE_CACHE_ITEMS;
muf_re* re = &muf_re_cache[idx];
int erroff;
if (re->pattern)
{
if ((flags != re->flags) || strcmp(DoNullInd(pattern), DoNullInd(re->pattern)))
{
pcre_free(re->re);
if (re->pattern && (--re->pattern->links == 0))
free((void *)re->pattern);
}
else
return re;
}
re->re = pcre_compile(DoNullInd(pattern), flags, errmsg, &erroff, NULL);
if (re->re == NULL)
{
re->pattern = NULL;
return NULL;
}
re->pattern = pattern;
re->pattern->links++;
re->flags = flags;
return re;
}
const char* muf_re_error(int err)
{
switch(err)
{
case PCRE_ERROR_NOMATCH: return "No matches";
case PCRE_ERROR_NULL: return "Internal error: NULL arg to pcre_exec()";
case PCRE_ERROR_BADOPTION: return "Invalid regexp option.";
case PCRE_ERROR_BADMAGIC: return "Internal error: bad magic number.";
case PCRE_ERROR_UNKNOWN_NODE: return "Internal error: bad regexp node.";
case PCRE_ERROR_NOMEMORY: return "Out of memory.";
case PCRE_ERROR_NOSUBSTRING: return "No substring.";
case PCRE_ERROR_MATCHLIMIT: return "Match recursion limit exceeded.";
case PCRE_ERROR_CALLOUT: return "Internal error: callout error.";
default: return "Unknown error";
}
}
#define MATCH_ARR_SIZE 30
void
prim_regexp(PRIM_PROTOTYPE)
{
stk_array* nu_val = 0;
stk_array* nu_idx = 0;
int matches[MATCH_ARR_SIZE];
muf_re* re;
char* text;
int flags = 0;
int len, i;
int matchcnt = 0;
const char* errstr;
CHECKOP(3);
oper3 = POP(); /* int:Flags */
oper2 = POP(); /* str:Pattern */
oper1 = POP(); /* str:Text */
if (oper1->type != PROG_STRING)
abort_interp("Non-string argument (1)");
if (oper2->type != PROG_STRING)
abort_interp("Non-string argument (2)");
if (oper3->type != PROG_INTEGER)
abort_interp("Non-integer argument (3)");
if (!oper2->data.string)
abort_interp("Empty string argument (2)");
if (oper3->data.number & MUF_RE_ICASE)
flags |= PCRE_CASELESS;
if (oper3->data.number & MUF_RE_EXTENDED)
flags |= PCRE_EXTENDED;
if ((re = muf_re_get(oper2->data.string, flags, &errstr)) == NULL)
abort_interp(errstr);
text = DoNullInd(oper1->data.string);
len = strlen(text);
if ((matchcnt = pcre_exec(re->re, NULL, text, len, 0, 0, matches, MATCH_ARR_SIZE)) < 0)
{
if (matchcnt != PCRE_ERROR_NOMATCH)
{
abort_interp(muf_re_error(matchcnt));
}
if (((nu_val = new_array_packed(0)) == NULL) ||
((nu_idx = new_array_packed(0)) == NULL))
{
if (nu_val != NULL)
array_free(nu_val);
if (nu_idx != NULL)
array_free(nu_idx);
abort_interp("Out of memory");
}
}
else
{
if (((nu_val = new_array_packed(matchcnt)) == NULL) ||
((nu_idx = new_array_packed(matchcnt)) == NULL))
{
if (nu_val != NULL)
array_free(nu_val);
if (nu_idx != NULL)
array_free(nu_idx);
abort_interp("Out of memory");
}
for(i = 0; i < matchcnt; i++)
{
int substart = matches[i*2];
int subend = matches[i*2+1];
struct inst idx, val;
stk_array* nu;
if ((substart >= 0) && (subend >= 0) && (substart < len))
snprintf(buf, BUFFER_LEN, "%.*s", (subend - substart), &text[substart]);
else
buf[0] = '\0';
idx.type = PROG_INTEGER;
idx.data.number = i;
val.type = PROG_STRING;
val.data.string = alloc_prog_string(buf);
array_setitem(&nu_val, &idx, &val);
CLEAR(&idx);
CLEAR(&val);
if ((nu = new_array_packed(2)) == NULL)
{
array_free(nu_val);
array_free(nu_idx);
abort_interp("Out of memory");
}
idx.type = PROG_INTEGER;
idx.data.number = 0;
val.type = PROG_INTEGER;
val.data.number = substart + 1;
array_setitem(&nu, &idx, &val);
CLEAR(&idx);
CLEAR(&val);
idx.type = PROG_INTEGER;
idx.data.number = 1;
val.type = PROG_INTEGER;
val.data.number = subend - substart;
array_setitem(&nu, &idx, &val);
CLEAR(&idx);
CLEAR(&val);
idx.type = PROG_INTEGER;
idx.data.number = i;
val.type = PROG_ARRAY;
val.data.array = nu;
array_setitem(&nu_idx, &idx, &val);
CLEAR(&idx);
CLEAR(&val);
}
}
CLEAR(oper3);
CLEAR(oper2);
CLEAR(oper1);
PushArrayRaw(nu_val);
PushArrayRaw(nu_idx);
}
void
prim_regsub(PRIM_PROTOTYPE)
{
int matches[MATCH_ARR_SIZE];
int flags = 0;
char* write_ptr = buf;
int write_left = BUFFER_LEN - 1;
muf_re* re;
char* text;
char* textstart;
const char* errstr;
int matchcnt, len;
CHECKOP(4);
oper4 = POP(); /* int:Flags */
oper3 = POP(); /* str:Replace */
oper2 = POP(); /* str:Pattern */
oper1 = POP(); /* str:Text */
if (oper1->type != PROG_STRING)
abort_interp("Non-string argument (1)");
if (oper2->type != PROG_STRING)
abort_interp("Non-string argument (2)");
if (oper3->type != PROG_STRING)
abort_interp("Non-string argument (3)");
if (oper4->type != PROG_INTEGER)
abort_interp("Non-integer argument (4)");
if (!oper2->data.string)
abort_interp("Empty string argument (2)");
if (oper4->data.number & MUF_RE_ICASE)
flags |= PCRE_CASELESS;
if (oper4->data.number & MUF_RE_EXTENDED)
flags |= PCRE_EXTENDED;
if ((re = muf_re_get(oper2->data.string, flags, &errstr)) == NULL)
abort_interp(errstr);
textstart = text = DoNullInd(oper1->data.string);
len = strlen(textstart);
while((*text != '\0') && (write_left > 0))
{
if ((matchcnt = pcre_exec(re->re, NULL, textstart, len, text-textstart, 0, matches, MATCH_ARR_SIZE)) < 0)
{
if (matchcnt != PCRE_ERROR_NOMATCH)
{
abort_interp(muf_re_error(matchcnt));
}
while((write_left > 0) && (*text != '\0'))
{
*write_ptr++ = *text++;
write_left--;
}
break;
}
else
{
int allstart = matches[0];
int allend = matches[1];
int substart = -1;
int subend = -1;
char* read_ptr = DoNullInd(oper3->data.string);
int count;
for(count = allstart-(text-textstart); (write_left > 0) && (*text != '\0') && (count > 0); count--)
{
*write_ptr++ = *text++;
write_left--;
}
while((write_left > 0) && (*read_ptr != '\0'))
{
if (*read_ptr == '\\')
{
if (!isdigit(*(++read_ptr)))
{
*write_ptr++ = *read_ptr++;
write_left--;
}
else
{
int idx = (*read_ptr++) - '0';
if ((idx < 0) || (idx >= matchcnt))
{
abort_interp("Invalid \\subexp in substitution string. (3)");
}
substart = matches[idx*2];
subend = matches[idx*2+1];
if ((substart >= 0) && (subend >= 0) && (substart < len))
{
char* ptr = &textstart[substart];
count = subend - substart;
if (count > write_left)
{
abort_interp("Operation would result in overflow");
}
for(; (write_left > 0) && (count > 0) && (*ptr != '\0'); count--)
{
*write_ptr++ = *ptr++;
write_left--;
}
}
}
}
else
{
*write_ptr++ = *read_ptr++;
write_left--;
}
}
for(count = allend - allstart; (*text != '\0') && (count > 0); count--)
text++;
if (allstart == allend && *text) {
*write_ptr++ = *text++;
write_left--;
}
}
if ((oper4->data.number & MUF_RE_ALL) == 0)
{
while((write_left > 0) && (*text != '\0'))
{
*write_ptr++ = *text++;
write_left--;
}
break;
}
}
if (*text != '\0')
abort_interp("Operation would result in overflow");
*write_ptr = '\0';
CLEAR(oper4);
CLEAR(oper3);
CLEAR(oper2);
CLEAR(oper1);
PushString(buf);
}