Changeset View
Changeset View
Standalone View
Standalone View
lib/libc/regex/engine.c
Show First 20 Lines • Show All 98 Lines • ▼ Show 20 Lines | |||||
extern "C" { | extern "C" { | ||||
#endif | #endif | ||||
/* === engine.c === */ | /* === engine.c === */ | ||||
static int matcher(struct re_guts *g, const char *string, size_t nmatch, regmatch_t pmatch[], int eflags); | static int matcher(struct re_guts *g, const char *string, size_t nmatch, regmatch_t pmatch[], int eflags); | ||||
static const char *dissect(struct match *m, const char *start, const char *stop, sopno startst, sopno stopst); | static const char *dissect(struct match *m, const char *start, const char *stop, sopno startst, sopno stopst); | ||||
static const char *backref(struct match *m, const char *start, const char *stop, sopno startst, sopno stopst, sopno lev, int); | static const char *backref(struct match *m, const char *start, const char *stop, sopno startst, sopno stopst, sopno lev, int); | ||||
static const char *walk(struct match *m, const char *start, const char *stop, sopno startst, sopno stopst, bool fast); | static const char *walk(struct match *m, const char *start, const char *stop, sopno startst, sopno stopst, bool fast); | ||||
static states step(struct re_guts *g, sopno start, sopno stop, states bef, wint_t ch, states aft); | static states step(struct re_guts *g, sopno start, sopno stop, states bef, wint_t ch, states aft, int sflags); | ||||
#define MAX_RECURSION 100 | #define MAX_RECURSION 100 | ||||
#define BOL (OUT-1) | #define BOL (OUT-1) | ||||
#define EOL (BOL-1) | #define EOL (BOL-1) | ||||
#define BOLEOL (BOL-2) | #define BOLEOL (BOL-2) | ||||
#define NOTHING (BOL-3) | #define NOTHING (BOL-3) | ||||
#define BOW (BOL-4) | #define BOW (BOL-4) | ||||
#define EOW (BOL-5) | #define EOW (BOL-5) | ||||
#define BADCHAR (BOL-6) | #define BADCHAR (BOL-6) | ||||
#define NONCHAR(c) ((c) <= OUT) | #define NONCHAR(c) ((c) <= OUT) | ||||
/* sflags */ | |||||
#define SNWBND 01 | |||||
#define SBOS 02 | |||||
#define SEOS 04 | |||||
#ifdef REDEBUG | #ifdef REDEBUG | ||||
static void print(struct match *m, const char *caption, states st, int ch, FILE *d); | static void print(struct match *m, const char *caption, states st, int ch, FILE *d); | ||||
#endif | #endif | ||||
#ifdef REDEBUG | #ifdef REDEBUG | ||||
static void at(struct match *m, const char *title, const char *start, const char *stop, sopno startst, sopno stopst); | static void at(struct match *m, const char *title, const char *start, const char *stop, sopno startst, sopno stopst); | ||||
#endif | #endif | ||||
#ifdef REDEBUG | #ifdef REDEBUG | ||||
static const char *pchar(int ch); | static const char *pchar(int ch); | ||||
▲ Show 20 Lines • Show All 51 Lines • ▼ Show 20 Lines | if (eflags®_STARTEND) { | ||||
stop = string + pmatch[0].rm_eo; | stop = string + pmatch[0].rm_eo; | ||||
} else { | } else { | ||||
start = string; | start = string; | ||||
stop = start + strlen(start); | stop = start + strlen(start); | ||||
} | } | ||||
if (stop < start) | if (stop < start) | ||||
return(REG_INVARG); | return(REG_INVARG); | ||||
/* Trivial zero-length match on empty sub */ | |||||
if (g->iflags & EMPTBR) { | |||||
if (nmatch > 0) { | |||||
pmatch[0].rm_so = pmatch[0].rm_eo = 0; | |||||
for (i = 1; i < nmatch; i++) | |||||
pmatch[i].rm_so = pmatch[i].rm_eo = -1; | |||||
} | |||||
return(0); | |||||
} | |||||
/* prescreening; this does wonders for this rather slow code */ | /* prescreening; this does wonders for this rather slow code */ | ||||
if (g->must != NULL) { | if (g->must != NULL) { | ||||
if (g->charjump != NULL && g->matchjump != NULL) { | if (g->charjump != NULL && g->matchjump != NULL) { | ||||
mustfirst = g->must; | mustfirst = g->must; | ||||
mustlast = g->must + g->mlen - 1; | mustlast = g->must + g->mlen - 1; | ||||
charjump = g->charjump; | charjump = g->charjump; | ||||
matchjump = g->matchjump; | matchjump = g->matchjump; | ||||
pp = mustlast; | pp = mustlast; | ||||
▲ Show 20 Lines • Show All 213 Lines • ▼ Show 20 Lines | for (ss = startst; ss < stopst; ss = es) { | ||||
/* figure out what it matched */ | /* figure out what it matched */ | ||||
switch (OP(m->g->strip[ss])) { | switch (OP(m->g->strip[ss])) { | ||||
case OEND: | case OEND: | ||||
assert(nope); | assert(nope); | ||||
break; | break; | ||||
case OCHAR: | case OCHAR: | ||||
sp += XMBRTOWC(NULL, sp, stop - start, &m->mbs, 0); | sp += XMBRTOWC(NULL, sp, stop - start, &m->mbs, 0); | ||||
break; | break; | ||||
case OBOS: | |||||
case OEOS: | |||||
case OBOL: | case OBOL: | ||||
case OEOL: | case OEOL: | ||||
case OBOW: | case OBOW: | ||||
case OEOW: | case OEOW: | ||||
case OWBND: | |||||
case ONWBND: | |||||
break; | break; | ||||
case OANY: | case OANY: | ||||
case OANYOF: | case OANYOF: | ||||
sp += XMBRTOWC(NULL, sp, stop - start, &m->mbs, 0); | sp += XMBRTOWC(NULL, sp, stop - start, &m->mbs, 0); | ||||
break; | break; | ||||
case OBACK_: | case OBACK_: | ||||
case O_BACK: | case O_BACK: | ||||
assert(nope); | assert(nope); | ||||
▲ Show 20 Lines • Show All 189 Lines • ▼ Show 20 Lines | for (ss = startst; !hard && ss < stopst; ss++) | ||||
case OEOL: | case OEOL: | ||||
if ( (sp == m->endp && !(m->eflags®_NOTEOL)) || | if ( (sp == m->endp && !(m->eflags®_NOTEOL)) || | ||||
(sp < m->endp && *sp == '\n' && | (sp < m->endp && *sp == '\n' && | ||||
(m->g->cflags®_NEWLINE)) ) | (m->g->cflags®_NEWLINE)) ) | ||||
{ /* yes */ } | { /* yes */ } | ||||
else | else | ||||
return(NULL); | return(NULL); | ||||
break; | break; | ||||
case OBOS: | |||||
break; | |||||
if (sp == m->beginp) | |||||
{ /* yes */ } | |||||
else | |||||
return(NULL); | |||||
break; | |||||
case OEOS: | |||||
break; | |||||
if (sp == m->endp) | |||||
{ /* yes */ } | |||||
else | |||||
return(NULL); | |||||
break; | |||||
case ONWBND: | |||||
if (sp > m->beginp && sp < m->endp && | |||||
ISWORD(*(sp-1)) == ISWORD(*sp)) | |||||
{ /* yes */ } | |||||
else | |||||
return(NULL); | |||||
break; | |||||
case OWBND: | |||||
case OBOW: | case OBOW: | ||||
if (sp < m->endp && ISWORD(*sp) && | if (sp < m->endp && ISWORD(*sp) && | ||||
((sp == m->beginp && !(m->eflags®_NOTBOL)) || | ((sp == m->beginp && !(m->eflags®_NOTBOL)) || | ||||
(sp > m->offp && !ISWORD(*(sp-1))))) | (sp > m->offp && !ISWORD(*(sp-1))))) | ||||
{ /* yes */ } | { /* yes */ } | ||||
else | else if (OP(s) == OBOW) | ||||
return(NULL); | return(NULL); | ||||
break; | /* FALLTHROUGH */ | ||||
case OEOW: | case OEOW: | ||||
if (( (sp == m->endp && !(m->eflags®_NOTEOL)) || | if (OP(s) != OBOW && | ||||
( (sp == m->endp && !(m->eflags®_NOTEOL)) || | |||||
(sp < m->endp && *sp == '\n' && | (sp < m->endp && *sp == '\n' && | ||||
(m->g->cflags®_NEWLINE)) || | (m->g->cflags®_NEWLINE)) || | ||||
(sp < m->endp && !ISWORD(*sp)) ) && | (sp < m->endp && !ISWORD(*sp)) ) && | ||||
(sp > m->beginp && ISWORD(*(sp-1))) ) | (sp > m->beginp && ISWORD(*(sp-1))) ) | ||||
{ /* yes */ } | { /* yes */ } | ||||
else | else if (OP(s) != OBOW) | ||||
return(NULL); | return(NULL); | ||||
break; | break; | ||||
case O_QUEST: | case O_QUEST: | ||||
break; | break; | ||||
case OOR1: /* matches null but needs to skip */ | case OOR1: /* matches null but needs to skip */ | ||||
ss++; | ss++; | ||||
s = m->g->strip[ss]; | s = m->g->strip[ss]; | ||||
do { | do { | ||||
Show All 27 Lines | case OBACK_: /* the vilest depths */ | ||||
if (len == 0 && rec++ > MAX_RECURSION) | if (len == 0 && rec++ > MAX_RECURSION) | ||||
return(NULL); | return(NULL); | ||||
assert(stop - m->beginp >= len); | assert(stop - m->beginp >= len); | ||||
if (sp > stop - len) | if (sp > stop - len) | ||||
return(NULL); /* not enough left to match */ | return(NULL); /* not enough left to match */ | ||||
ssp = m->offp + m->pmatch[i].rm_so; | ssp = m->offp + m->pmatch[i].rm_so; | ||||
if (memcmp(sp, ssp, len) != 0) | if (memcmp(sp, ssp, len) != 0) | ||||
return(NULL); | return(NULL); | ||||
while (m->g->strip[ss] != SOP(O_BACK, i)) | while (m->g->strip[ss] != (sop)SOP(O_BACK, i)) | ||||
ss++; | ss++; | ||||
return(backref(m, sp+len, stop, ss+1, stopst, lev, rec)); | return(backref(m, sp+len, stop, ss+1, stopst, lev, rec)); | ||||
case OQUEST_: /* to null or not */ | case OQUEST_: /* to null or not */ | ||||
dp = backref(m, sp, stop, ss+1, stopst, lev, rec); | dp = backref(m, sp, stop, ss+1, stopst, lev, rec); | ||||
if (dp != NULL) | if (dp != NULL) | ||||
return(dp); /* not */ | return(dp); /* not */ | ||||
return(backref(m, sp, stop, ss+OPND(s)+1, stopst, lev, rec)); | return(backref(m, sp, stop, ss+OPND(s)+1, stopst, lev, rec)); | ||||
case OPLUS_: | case OPLUS_: | ||||
▲ Show 20 Lines • Show All 72 Lines • ▼ Show 20 Lines | |||||
static const char * /* where it ended, or NULL */ | static const char * /* where it ended, or NULL */ | ||||
walk(struct match *m, const char *start, const char *stop, sopno startst, | walk(struct match *m, const char *start, const char *stop, sopno startst, | ||||
sopno stopst, bool fast) | sopno stopst, bool fast) | ||||
{ | { | ||||
states st = m->st; | states st = m->st; | ||||
states fresh = m->fresh; | states fresh = m->fresh; | ||||
states empty = m->empty; | states empty = m->empty; | ||||
states tmp = m->tmp; | states tmp = m->tmp; | ||||
sopno nxop; | |||||
const char *p = start; | const char *p = start; | ||||
wint_t c; | wint_t c; | ||||
wint_t lastc; /* previous c */ | wint_t lastc; /* previous c */ | ||||
wint_t flagch; | wint_t flagch; | ||||
int i; | int i; | ||||
const char *matchp; /* last p at which a match ended */ | const char *matchp; /* last p at which a match ended */ | ||||
size_t clen; | size_t clen; | ||||
int sflags = 0; | |||||
AT("slow", start, stop, startst, stopst); | AT("slow", start, stop, startst, stopst); | ||||
CLEAR(st); | CLEAR(st); | ||||
SET1(st, startst); | SET1(st, startst); | ||||
SP("sstart", st, *p); | SP("sstart", st, *p); | ||||
st = step(m->g, startst, stopst, st, NOTHING, st); | st = step(m->g, startst, stopst, st, NOTHING, st, sflags); | ||||
if (fast) | if (fast) | ||||
ASSIGN(fresh, st); | ASSIGN(fresh, st); | ||||
matchp = NULL; | matchp = NULL; | ||||
if (start == m->offp || (start == m->beginp && !(m->eflags®_NOTBOL))) | if (start == m->offp || (start == m->beginp && !(m->eflags®_NOTBOL))) | ||||
c = OUT; | c = OUT; | ||||
else { | else { | ||||
/* | /* | ||||
* XXX Wrong if the previous character was multi-byte. | * XXX Wrong if the previous character was multi-byte. | ||||
Show All 24 Lines | for (;;) { | ||||
} | } | ||||
if ( (c == '\n' && m->g->cflags®_NEWLINE) || | if ( (c == '\n' && m->g->cflags®_NEWLINE) || | ||||
(c == OUT && !(m->eflags®_NOTEOL)) ) { | (c == OUT && !(m->eflags®_NOTEOL)) ) { | ||||
flagch = (flagch == BOL) ? BOLEOL : EOL; | flagch = (flagch == BOL) ? BOLEOL : EOL; | ||||
i += m->g->neol; | i += m->g->neol; | ||||
} | } | ||||
if (i != 0) { | if (i != 0) { | ||||
for (; i > 0; i--) | for (; i > 0; i--) | ||||
st = step(m->g, startst, stopst, st, flagch, st); | st = step(m->g, startst, stopst, st, flagch, st, sflags); | ||||
SP("sboleol", st, c); | SP("sboleol", st, c); | ||||
} | } | ||||
/* how about a word boundary? */ | /* how about a word boundary? */ | ||||
if ( (flagch == BOL || (lastc != OUT && !ISWORD(lastc))) && | if ( (flagch == BOL || (lastc != OUT && !ISWORD(lastc))) && | ||||
(c != OUT && ISWORD(c)) ) { | (c != OUT && ISWORD(c)) ) { | ||||
flagch = BOW; | flagch = BOW; | ||||
} | } | ||||
if ( (lastc != OUT && ISWORD(lastc)) && | if ( (lastc != OUT && ISWORD(lastc)) && | ||||
(flagch == EOL || (c != OUT && !ISWORD(c))) ) { | (flagch == EOL || (c != OUT && !ISWORD(c))) ) { | ||||
flagch = EOW; | flagch = EOW; | ||||
} | } | ||||
if (flagch == BOW || flagch == EOW) { | if (p == m->beginp) | ||||
st = step(m->g, startst, stopst, st, flagch, st); | sflags |= SBOS; | ||||
SP("sboweow", st, c); | if (p == m->endp) | ||||
sflags |= SEOS; | |||||
if (flagch != BOW && flagch != EOW && | |||||
lastc != OUT && c != OUT && ISWORD(lastc) == ISWORD(c)) | |||||
sflags |= SNWBND; | |||||
nxop = OP(m->g->strip[startst]); | |||||
/* Consume a match for BOW/EOW markers */ | |||||
if (flagch == BOW || flagch == EOW || | |||||
nxop == ONWBND || nxop == OBOS || nxop == OEOS) { | |||||
st = step(m->g, startst, stopst, st, flagch, st, sflags); | |||||
SP("sboweownbwnd", st, c); | |||||
} | } | ||||
/* Don't match 0-length ops elsewhere */ | |||||
sflags = 0; | |||||
/* are we done? */ | /* are we done? */ | ||||
if (ISSET(st, stopst)) { | if (ISSET(st, stopst)) { | ||||
if (fast) | if (fast) | ||||
break; | break; | ||||
else | else | ||||
matchp = p; | matchp = p; | ||||
} | } | ||||
if (EQ(st, empty) || p == stop || clen > stop - p) | if (EQ(st, empty) || p == stop || clen > (size_t)(stop - p)) | ||||
break; /* NOTE BREAK OUT */ | break; /* NOTE BREAK OUT */ | ||||
/* no, we must deal with this character */ | /* no, we must deal with this character */ | ||||
ASSIGN(tmp, st); | ASSIGN(tmp, st); | ||||
if (fast) | if (fast) | ||||
ASSIGN(st, fresh); | ASSIGN(st, fresh); | ||||
else | else | ||||
ASSIGN(st, empty); | ASSIGN(st, empty); | ||||
assert(c != OUT); | assert(c != OUT); | ||||
st = step(m->g, startst, stopst, tmp, c, st); | st = step(m->g, startst, stopst, tmp, c, st, sflags); | ||||
SP("saft", st, c); | SP("saft", st, c); | ||||
assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st)); | assert(EQ(step(m->g, startst, stopst, st, NOTHING, st, sflags), st)); | ||||
p += clen; | p += clen; | ||||
} | } | ||||
if (fast) { | if (fast) { | ||||
assert(matchp != NULL); | assert(matchp != NULL); | ||||
m->coldp = matchp; | m->coldp = matchp; | ||||
if (ISSET(st, stopst)) | if (ISSET(st, stopst)) | ||||
return (p + XMBRTOWC(NULL, p, stop - p, &m->mbs, 0)); | return (p + XMBRTOWC(NULL, p, stop - p, &m->mbs, 0)); | ||||
Show All 17 Lines | |||||
== #define NONCHAR(c) ((c) <= OUT) | == #define NONCHAR(c) ((c) <= OUT) | ||||
*/ | */ | ||||
static states | static states | ||||
step(struct re_guts *g, | step(struct re_guts *g, | ||||
sopno start, /* start state within strip */ | sopno start, /* start state within strip */ | ||||
sopno stop, /* state after stop state within strip */ | sopno stop, /* state after stop state within strip */ | ||||
states bef, /* states reachable before */ | states bef, /* states reachable before */ | ||||
wint_t ch, /* character or NONCHAR code */ | wint_t ch, /* character or NONCHAR code */ | ||||
states aft) /* states already known reachable after */ | states aft, /* states already known reachable after */ | ||||
int sflags) /* 0-length matching states*/ | |||||
{ | { | ||||
cset *cs; | cset *cs; | ||||
sop s; | sop s; | ||||
sopno pc; | sopno pc; | ||||
onestate here; /* note, macros know this name */ | onestate here; /* note, macros know this name */ | ||||
sopno look; | sopno look; | ||||
int i; | int i; | ||||
Show All 12 Lines | for (pc = start, INIT(here, pc); pc != stop; pc++, INC(here)) { | ||||
case OBOL: | case OBOL: | ||||
if (ch == BOL || ch == BOLEOL) | if (ch == BOL || ch == BOLEOL) | ||||
FWD(aft, bef, 1); | FWD(aft, bef, 1); | ||||
break; | break; | ||||
case OEOL: | case OEOL: | ||||
if (ch == EOL || ch == BOLEOL) | if (ch == EOL || ch == BOLEOL) | ||||
FWD(aft, bef, 1); | FWD(aft, bef, 1); | ||||
break; | break; | ||||
case ONWBND: | |||||
if (sflags & SNWBND) | |||||
FWD(aft, bef, 1); | |||||
break; | |||||
case OBOS: | |||||
if (sflags & SBOS) | |||||
FWD(aft, bef, 1); | |||||
break; | |||||
case OEOS: | |||||
if (sflags & SEOS) | |||||
FWD(aft, bef, 1); | |||||
break; | |||||
case OWBND: | |||||
case OBOW: | case OBOW: | ||||
if (ch == BOW) | if (ch == BOW) | ||||
FWD(aft, bef, 1); | FWD(aft, bef, 1); | ||||
break; | /* FALLTHROUGH */ | ||||
case OEOW: | case OEOW: | ||||
if (ch == EOW) | if (OP(s) != OBOW && ch == EOW) | ||||
FWD(aft, bef, 1); | FWD(aft, bef, 1); | ||||
break; | break; | ||||
case OANY: | case OANY: | ||||
if (!NONCHAR(ch)) | if (!NONCHAR(ch)) | ||||
FWD(aft, bef, 1); | FWD(aft, bef, 1); | ||||
break; | break; | ||||
case OANYOF: | case OANYOF: | ||||
cs = &g->sets[OPND(s)]; | cs = &g->sets[OPND(s)]; | ||||
▲ Show 20 Lines • Show All 155 Lines • Show Last 20 Lines |