Changeset View
Changeset View
Standalone View
Standalone View
lib/libc/regex/regcomp.c
Show First 20 Lines • Show All 49 Lines • ▼ Show 20 Lines | |||||
#include <ctype.h> | #include <ctype.h> | ||||
#include <limits.h> | #include <limits.h> | ||||
#include <stdlib.h> | #include <stdlib.h> | ||||
#include <regex.h> | #include <regex.h> | ||||
#include <stdbool.h> | #include <stdbool.h> | ||||
#include <wchar.h> | #include <wchar.h> | ||||
#include <wctype.h> | #include <wctype.h> | ||||
#ifndef LIBREGEX | |||||
#include "collate.h" | #include "collate.h" | ||||
#endif | |||||
#include "utils.h" | #include "utils.h" | ||||
#include "regex2.h" | #include "regex2.h" | ||||
#include "cname.h" | #include "cname.h" | ||||
/* | /* | ||||
* Branching context, used to keep track of branch state for all of the branch- | * Branching context, used to keep track of branch state for all of the branch- | ||||
Show All 16 Lines | |||||
/* | /* | ||||
* parse structure, passed up and down to avoid global variables and | * parse structure, passed up and down to avoid global variables and | ||||
* other clumsinesses | * other clumsinesses | ||||
*/ | */ | ||||
struct parse { | struct parse { | ||||
const char *next; /* next character in RE */ | const char *next; /* next character in RE */ | ||||
const char *end; /* end of string (-> NUL normally) */ | const char *end; /* end of string (-> NUL normally) */ | ||||
int error; /* has an error been seen? */ | int error; /* has an error been seen? */ | ||||
int gnuext; | |||||
sop *strip; /* malloced strip */ | sop *strip; /* malloced strip */ | ||||
sopno ssize; /* malloced strip size (allocated) */ | sopno ssize; /* malloced strip size (allocated) */ | ||||
sopno slen; /* malloced strip length (used) */ | sopno slen; /* malloced strip length (used) */ | ||||
int ncsalloc; /* number of csets allocated */ | int ncsalloc; /* number of csets allocated */ | ||||
struct re_guts *g; | struct re_guts *g; | ||||
# define NPAREN 10 /* we need to remember () 1-9 for back refs */ | # define NPAREN 10 /* we need to remember () 1-9 for back refs */ | ||||
sopno pbegin[NPAREN]; /* -> ( ([0] unused) */ | sopno pbegin[NPAREN]; /* -> ( ([0] unused) */ | ||||
sopno pend[NPAREN]; /* -> ) ([0] unused) */ | sopno pend[NPAREN]; /* -> ) ([0] unused) */ | ||||
Show All 18 Lines | |||||
static bool p_branch_empty(struct parse *p, struct branchc *bc); | static bool p_branch_empty(struct parse *p, struct branchc *bc); | ||||
static bool p_branch_do(struct parse *p, struct branchc *bc); | static bool p_branch_do(struct parse *p, struct branchc *bc); | ||||
static void p_bre_pre_parse(struct parse *p, struct branchc *bc); | static void p_bre_pre_parse(struct parse *p, struct branchc *bc); | ||||
static void p_bre_post_parse(struct parse *p, struct branchc *bc); | static void p_bre_post_parse(struct parse *p, struct branchc *bc); | ||||
static void p_re(struct parse *p, int end1, int end2); | static void p_re(struct parse *p, int end1, int end2); | ||||
static bool p_simp_re(struct parse *p, struct branchc *bc); | static bool p_simp_re(struct parse *p, struct branchc *bc); | ||||
static int p_count(struct parse *p); | static int p_count(struct parse *p); | ||||
static void p_bracket(struct parse *p); | static void p_bracket(struct parse *p); | ||||
static int p_range_cmp(wchar_t c1, wchar_t c2); | |||||
static void p_b_term(struct parse *p, cset *cs); | static void p_b_term(struct parse *p, cset *cs); | ||||
static int p_b_pseudoclass(struct parse *p, char c); | |||||
static void p_b_cclass(struct parse *p, cset *cs); | static void p_b_cclass(struct parse *p, cset *cs); | ||||
static void p_b_cclass_named(struct parse *p, cset *cs, const char[]); | |||||
static void p_b_eclass(struct parse *p, cset *cs); | static void p_b_eclass(struct parse *p, cset *cs); | ||||
static wint_t p_b_symbol(struct parse *p); | static wint_t p_b_symbol(struct parse *p); | ||||
static wint_t p_b_coll_elem(struct parse *p, wint_t endc); | static wint_t p_b_coll_elem(struct parse *p, wint_t endc); | ||||
static wint_t othercase(wint_t ch); | static wint_t othercase(wint_t ch); | ||||
static void bothcases(struct parse *p, wint_t ch); | static void bothcases(struct parse *p, wint_t ch); | ||||
static void ordinary(struct parse *p, wint_t ch); | static void ordinary(struct parse *p, wint_t ch); | ||||
static void nonnewline(struct parse *p); | static void nonnewline(struct parse *p); | ||||
static void repeat(struct parse *p, sopno start, int from, int to); | static void repeat(struct parse *p, sopno start, int from, int to); | ||||
Show All 32 Lines | |||||
#define PEEK2() (*(p->next+1)) | #define PEEK2() (*(p->next+1)) | ||||
#define MORE() (p->next < p->end) | #define MORE() (p->next < p->end) | ||||
#define MORE2() (p->next+1 < p->end) | #define MORE2() (p->next+1 < p->end) | ||||
#define SEE(c) (MORE() && PEEK() == (c)) | #define SEE(c) (MORE() && PEEK() == (c)) | ||||
#define SEETWO(a, b) (MORE() && MORE2() && PEEK() == (a) && PEEK2() == (b)) | #define SEETWO(a, b) (MORE() && MORE2() && PEEK() == (a) && PEEK2() == (b)) | ||||
#define SEESPEC(a) (p->bre ? SEETWO('\\', a) : SEE(a)) | #define SEESPEC(a) (p->bre ? SEETWO('\\', a) : SEE(a)) | ||||
#define EAT(c) ((SEE(c)) ? (NEXT(), 1) : 0) | #define EAT(c) ((SEE(c)) ? (NEXT(), 1) : 0) | ||||
#define EATTWO(a, b) ((SEETWO(a, b)) ? (NEXT2(), 1) : 0) | #define EATTWO(a, b) ((SEETWO(a, b)) ? (NEXT2(), 1) : 0) | ||||
#define EATSPEC(a) (p->bre ? EATTWO('\\', a) : EAT(a)) | |||||
#define NEXT() (p->next++) | #define NEXT() (p->next++) | ||||
#define NEXT2() (p->next += 2) | #define NEXT2() (p->next += 2) | ||||
#define NEXTn(n) (p->next += (n)) | #define NEXTn(n) (p->next += (n)) | ||||
#define GETNEXT() (*p->next++) | #define GETNEXT() (*p->next++) | ||||
#define WGETNEXT() wgetnext(p) | #define WGETNEXT() wgetnext(p) | ||||
#define SETERROR(e) seterr(p, (e)) | #define SETERROR(e) seterr(p, (e)) | ||||
#define REQUIRE(co, e) ((co) || SETERROR(e)) | #define REQUIRE(co, e) ((co) || SETERROR(e)) | ||||
#define MUSTSEE(c, e) (REQUIRE(MORE() && PEEK() == (c), e)) | #define MUSTSEE(c, e) (REQUIRE(MORE() && PEEK() == (c), e)) | ||||
▲ Show 20 Lines • Show All 90 Lines • ▼ Show 20 Lines | #endif | ||||
p->next = pattern; /* convenience; we do not modify it */ | p->next = pattern; /* convenience; we do not modify it */ | ||||
p->end = p->next + len; | p->end = p->next + len; | ||||
p->error = 0; | p->error = 0; | ||||
p->ncsalloc = 0; | p->ncsalloc = 0; | ||||
for (i = 0; i < NPAREN; i++) { | for (i = 0; i < NPAREN; i++) { | ||||
p->pbegin[i] = 0; | p->pbegin[i] = 0; | ||||
p->pend[i] = 0; | p->pend[i] = 0; | ||||
} | } | ||||
#ifdef LIBREGEX | |||||
if (cflags®_POSIX) { | |||||
p->gnuext = false; | |||||
p->allowbranch = (cflags & REG_EXTENDED) != 0; | |||||
} else | |||||
p->gnuext = p->allowbranch = true; | |||||
#else | |||||
p->gnuext = false; | |||||
p->allowbranch = (cflags & REG_EXTENDED) != 0; | |||||
#endif | |||||
if (cflags & REG_EXTENDED) { | if (cflags & REG_EXTENDED) { | ||||
p->allowbranch = true; | |||||
p->bre = false; | p->bre = false; | ||||
p->parse_expr = p_ere_exp; | p->parse_expr = p_ere_exp; | ||||
p->pre_parse = NULL; | p->pre_parse = NULL; | ||||
p->post_parse = NULL; | p->post_parse = NULL; | ||||
} else { | } else { | ||||
p->allowbranch = false; | |||||
p->bre = true; | p->bre = true; | ||||
p->parse_expr = p_simp_re; | p->parse_expr = p_simp_re; | ||||
p->pre_parse = p_bre_pre_parse; | p->pre_parse = p_bre_pre_parse; | ||||
p->post_parse = p_bre_post_parse; | p->post_parse = p_bre_post_parse; | ||||
} | } | ||||
g->sets = NULL; | g->sets = NULL; | ||||
g->ncsets = 0; | g->ncsets = 0; | ||||
g->cflags = cflags; | g->cflags = cflags; | ||||
▲ Show 20 Lines • Show All 57 Lines • ▼ Show 20 Lines | |||||
static bool | static bool | ||||
p_ere_exp(struct parse *p, struct branchc *bc) | p_ere_exp(struct parse *p, struct branchc *bc) | ||||
{ | { | ||||
char c; | char c; | ||||
wint_t wc; | wint_t wc; | ||||
sopno pos; | sopno pos; | ||||
int count; | int count; | ||||
int count2; | int count2; | ||||
#ifdef LIBREGEX | |||||
int i; | |||||
int handled; | |||||
#endif | |||||
sopno subno; | sopno subno; | ||||
int wascaret = 0; | int wascaret = 0; | ||||
assert(MORE()); /* caller should have ensured this */ | assert(MORE()); /* caller should have ensured this */ | ||||
c = GETNEXT(); | c = GETNEXT(); | ||||
(void)bc; | |||||
#ifdef LIBREGEX | |||||
handled = 0; | |||||
#endif | |||||
pos = HERE(); | pos = HERE(); | ||||
switch (c) { | switch (c) { | ||||
case '(': | case '(': | ||||
(void)REQUIRE(MORE(), REG_EPAREN); | (void)REQUIRE(MORE(), REG_EPAREN); | ||||
p->g->nsub++; | p->g->nsub++; | ||||
subno = p->g->nsub; | subno = p->g->nsub; | ||||
if (subno < NPAREN) | if (subno < NPAREN) | ||||
p->pbegin[subno] = HERE(); | p->pbegin[subno] = HERE(); | ||||
▲ Show 20 Lines • Show All 46 Lines • ▼ Show 20 Lines | else | ||||
EMIT(OANY, 0); | EMIT(OANY, 0); | ||||
break; | break; | ||||
case '[': | case '[': | ||||
p_bracket(p); | p_bracket(p); | ||||
break; | break; | ||||
case '\\': | case '\\': | ||||
(void)REQUIRE(MORE(), REG_EESCAPE); | (void)REQUIRE(MORE(), REG_EESCAPE); | ||||
wc = WGETNEXT(); | wc = WGETNEXT(); | ||||
#ifdef LIBREGEX | |||||
if (p->gnuext) { | |||||
handled = 1; | |||||
switch (wc) { | switch (wc) { | ||||
case '`': | |||||
EMIT(OBOS, 0); | |||||
break; | |||||
case '\'': | |||||
EMIT(OEOS, 0); | |||||
break; | |||||
case 'b': | |||||
EMIT(OWBND, 0); | |||||
break; | |||||
case 'B': | |||||
EMIT(ONWBND, 0); | |||||
break; | |||||
case 'W': | |||||
case 'w': | |||||
case 'S': | |||||
case 's': | |||||
p_b_pseudoclass(p, wc); | |||||
break; | |||||
case '1': | |||||
case '2': | |||||
case '3': | |||||
case '4': | |||||
case '5': | |||||
case '6': | |||||
case '7': | |||||
case '8': | |||||
case '9': | |||||
i = wc - '0'; | |||||
assert(i < NPAREN); | |||||
if (p->pend[i] != 0) { | |||||
assert(i <= p->g->nsub); | |||||
EMIT(OBACK_, i); | |||||
assert(p->pbegin[i] != 0); | |||||
assert(OP(p->strip[p->pbegin[i]]) == OLPAREN); | |||||
assert(OP(p->strip[p->pend[i]]) == ORPAREN); | |||||
(void) dupl(p, p->pbegin[i]+1, p->pend[i]); | |||||
EMIT(O_BACK, i); | |||||
} else | |||||
SETERROR(REG_ESUBREG); | |||||
p->g->backrefs = 1; | |||||
break; | |||||
default: | |||||
handled = 0; | |||||
} | |||||
/* Don't proceed to the POSIX bits if we've already handled it */ | |||||
if (handled) | |||||
break; | |||||
} | |||||
#endif | |||||
switch (wc) { | |||||
case '<': | case '<': | ||||
EMIT(OBOW, 0); | EMIT(OBOW, 0); | ||||
break; | break; | ||||
case '>': | case '>': | ||||
EMIT(OEOW, 0); | EMIT(OEOW, 0); | ||||
break; | break; | ||||
default: | default: | ||||
ordinary(p, wc); | ordinary(p, wc); | ||||
▲ Show 20 Lines • Show All 88 Lines • ▼ Show 20 Lines | |||||
* Eat consecutive branch delimiters for the kind of expression that we are | * Eat consecutive branch delimiters for the kind of expression that we are | ||||
* parsing, return the number of delimiters that we ate. | * parsing, return the number of delimiters that we ate. | ||||
*/ | */ | ||||
static int | static int | ||||
p_branch_eat_delim(struct parse *p, struct branchc *bc) | p_branch_eat_delim(struct parse *p, struct branchc *bc) | ||||
{ | { | ||||
int nskip; | int nskip; | ||||
(void)bc; | |||||
nskip = 0; | nskip = 0; | ||||
while (EAT('|')) | while (EATSPEC('|')) | ||||
++nskip; | ++nskip; | ||||
return (nskip); | return (nskip); | ||||
} | } | ||||
/* | /* | ||||
* Insert necessary branch book-keeping operations. This emits a | * Insert necessary branch book-keeping operations. This emits a | ||||
* bogus 'next' offset, since we still have more to parse | * bogus 'next' offset, since we still have more to parse | ||||
*/ | */ | ||||
Show All 35 Lines | |||||
* in the future, be used to allow for more permissive behavior with empty | * in the future, be used to allow for more permissive behavior with empty | ||||
* branches. The return value should indicate whether parsing may continue | * branches. The return value should indicate whether parsing may continue | ||||
* or not. | * or not. | ||||
*/ | */ | ||||
static bool | static bool | ||||
p_branch_empty(struct parse *p, struct branchc *bc) | p_branch_empty(struct parse *p, struct branchc *bc) | ||||
{ | { | ||||
#if defined(LIBREGEX) && defined(NOTYET) | |||||
if (bc->outer) | |||||
p->g->iflags |= EMPTBR; | |||||
return (true); | |||||
#else | |||||
(void)bc; | |||||
SETERROR(REG_EMPTY); | SETERROR(REG_EMPTY); | ||||
return (false); | return (false); | ||||
#endif | |||||
} | } | ||||
/* | /* | ||||
* Take care of any branching requirements. This includes inserting the | * Take care of any branching requirements. This includes inserting the | ||||
* appropriate branching instructions as well as eating all of the branch | * appropriate branching instructions as well as eating all of the branch | ||||
* delimiters until we either run out of pattern or need to parse more pattern. | * delimiters until we either run out of pattern or need to parse more pattern. | ||||
*/ | */ | ||||
static bool | static bool | ||||
▲ Show 20 Lines • Show All 75 Lines • ▼ Show 20 Lines | for (;;) { | ||||
if (p->pre_parse != NULL) | if (p->pre_parse != NULL) | ||||
p->pre_parse(p, &bc); | p->pre_parse(p, &bc); | ||||
while (MORE() && (!p->allowbranch || !SEESPEC('|')) && !SEEEND()) { | while (MORE() && (!p->allowbranch || !SEESPEC('|')) && !SEEEND()) { | ||||
bc.terminate = p->parse_expr(p, &bc); | bc.terminate = p->parse_expr(p, &bc); | ||||
++bc.nchain; | ++bc.nchain; | ||||
} | } | ||||
if (p->post_parse != NULL) | if (p->post_parse != NULL) | ||||
p->post_parse(p, &bc); | p->post_parse(p, &bc); | ||||
(void) REQUIRE(HERE() != bc.start, REG_EMPTY); | (void) REQUIRE(p->gnuext || HERE() != bc.start, REG_EMPTY); | ||||
#ifdef LIBREGEX | |||||
if (HERE() == bc.start && !p_branch_empty(p, &bc)) | |||||
break; | |||||
#endif | |||||
if (!p->allowbranch) | if (!p->allowbranch) | ||||
break; | break; | ||||
/* | /* | ||||
* p_branch_do's return value indicates whether we should | * p_branch_do's return value indicates whether we should | ||||
* continue parsing or not. This is both for correctness and | * continue parsing or not. This is both for correctness and | ||||
* a slight optimization, because it will check if we've | * a slight optimization, because it will check if we've | ||||
* encountered an empty branch or the end of the string | * encountered an empty branch or the end of the string | ||||
* immediately following a branch delimiter. | * immediately following a branch delimiter. | ||||
Show All 10 Lines | |||||
/* | /* | ||||
- p_simp_re - parse a simple RE, an atom possibly followed by a repetition | - p_simp_re - parse a simple RE, an atom possibly followed by a repetition | ||||
== static bool p_simp_re(struct parse *p, struct branchc *bc); | == static bool p_simp_re(struct parse *p, struct branchc *bc); | ||||
*/ | */ | ||||
static bool /* was the simple RE an unbackslashed $? */ | static bool /* was the simple RE an unbackslashed $? */ | ||||
p_simp_re(struct parse *p, struct branchc *bc) | p_simp_re(struct parse *p, struct branchc *bc) | ||||
{ | { | ||||
int c; | int c; | ||||
int cc; /* convenient/control character */ | |||||
int count; | int count; | ||||
int count2; | int count2; | ||||
sopno pos; | sopno pos; | ||||
bool handled; | |||||
int i; | int i; | ||||
wint_t wc; | wint_t wc; | ||||
sopno subno; | sopno subno; | ||||
# define BACKSL (1<<CHAR_BIT) | # define BACKSL (1<<CHAR_BIT) | ||||
pos = HERE(); /* repetition op, if any, covers from here */ | pos = HERE(); /* repetition op, if any, covers from here */ | ||||
handled = false; | |||||
assert(MORE()); /* caller should have ensured this */ | assert(MORE()); /* caller should have ensured this */ | ||||
c = GETNEXT(); | c = GETNEXT(); | ||||
if (c == '\\') { | if (c == '\\') { | ||||
(void)REQUIRE(MORE(), REG_EESCAPE); | (void)REQUIRE(MORE(), REG_EESCAPE); | ||||
c = BACKSL | GETNEXT(); | cc = GETNEXT(); | ||||
c = BACKSL | cc; | |||||
#ifdef LIBREGEX | |||||
if (p->gnuext) { | |||||
handled = true; | |||||
switch (c) { | |||||
case BACKSL|'`': | |||||
EMIT(OBOS, 0); | |||||
break; | |||||
case BACKSL|'\'': | |||||
EMIT(OEOS, 0); | |||||
break; | |||||
case BACKSL|'b': | |||||
EMIT(OWBND, 0); | |||||
break; | |||||
case BACKSL|'B': | |||||
EMIT(ONWBND, 0); | |||||
break; | |||||
case BACKSL|'W': | |||||
case BACKSL|'w': | |||||
case BACKSL|'S': | |||||
case BACKSL|'s': | |||||
p_b_pseudoclass(p, cc); | |||||
break; | |||||
default: | |||||
handled = false; | |||||
} | } | ||||
} | |||||
#endif | |||||
} | |||||
if (!handled) { | |||||
switch (c) { | switch (c) { | ||||
case '.': | case '.': | ||||
if (p->g->cflags®_NEWLINE) | if (p->g->cflags®_NEWLINE) | ||||
nonnewline(p); | nonnewline(p); | ||||
else | else | ||||
EMIT(OANY, 0); | EMIT(OANY, 0); | ||||
break; | break; | ||||
case '[': | case '[': | ||||
p_bracket(p); | p_bracket(p); | ||||
break; | break; | ||||
case BACKSL|'<': | case BACKSL|'<': | ||||
EMIT(OBOW, 0); | EMIT(OBOW, 0); | ||||
break; | break; | ||||
case BACKSL|'>': | case BACKSL|'>': | ||||
EMIT(OEOW, 0); | EMIT(OEOW, 0); | ||||
break; | break; | ||||
case BACKSL|'{': | case BACKSL|'{': | ||||
SETERROR(REG_BADRPT); | SETERROR(REG_BADRPT); | ||||
break; | break; | ||||
case BACKSL|'(': | case BACKSL|'(': | ||||
p->g->nsub++; | p->g->nsub++; | ||||
subno = p->g->nsub; | subno = p->g->nsub; | ||||
if (subno < NPAREN) | if (subno < NPAREN) | ||||
p->pbegin[subno] = HERE(); | p->pbegin[subno] = HERE(); | ||||
EMIT(OLPAREN, subno); | EMIT(OLPAREN, subno); | ||||
/* the MORE here is an error heuristic */ | /* the MORE here is an error heuristic */ | ||||
if (MORE() && !SEETWO('\\', ')')) | if (MORE() && !SEETWO('\\', ')')) | ||||
p_re(p, '\\', ')'); | p_re(p, '\\', ')'); | ||||
if (subno < NPAREN) { | if (subno < NPAREN) { | ||||
p->pend[subno] = HERE(); | p->pend[subno] = HERE(); | ||||
assert(p->pend[subno] != 0); | assert(p->pend[subno] != 0); | ||||
} | } | ||||
EMIT(ORPAREN, subno); | EMIT(ORPAREN, subno); | ||||
(void)REQUIRE(EATTWO('\\', ')'), REG_EPAREN); | (void)REQUIRE(EATTWO('\\', ')'), REG_EPAREN); | ||||
break; | break; | ||||
case BACKSL|')': /* should not get here -- must be user */ | case BACKSL|')': /* should not get here -- must be user */ | ||||
SETERROR(REG_EPAREN); | SETERROR(REG_EPAREN); | ||||
break; | break; | ||||
case BACKSL|'1': | case BACKSL|'1': | ||||
case BACKSL|'2': | case BACKSL|'2': | ||||
case BACKSL|'3': | case BACKSL|'3': | ||||
case BACKSL|'4': | case BACKSL|'4': | ||||
case BACKSL|'5': | case BACKSL|'5': | ||||
case BACKSL|'6': | case BACKSL|'6': | ||||
case BACKSL|'7': | case BACKSL|'7': | ||||
case BACKSL|'8': | case BACKSL|'8': | ||||
case BACKSL|'9': | case BACKSL|'9': | ||||
i = (c&~BACKSL) - '0'; | i = (c&~BACKSL) - '0'; | ||||
assert(i < NPAREN); | assert(i < NPAREN); | ||||
if (p->pend[i] != 0) { | if (p->pend[i] != 0) { | ||||
assert(i <= p->g->nsub); | assert(i <= p->g->nsub); | ||||
EMIT(OBACK_, i); | EMIT(OBACK_, i); | ||||
assert(p->pbegin[i] != 0); | assert(p->pbegin[i] != 0); | ||||
assert(OP(p->strip[p->pbegin[i]]) == OLPAREN); | assert(OP(p->strip[p->pbegin[i]]) == OLPAREN); | ||||
assert(OP(p->strip[p->pend[i]]) == ORPAREN); | assert(OP(p->strip[p->pend[i]]) == ORPAREN); | ||||
(void) dupl(p, p->pbegin[i]+1, p->pend[i]); | (void) dupl(p, p->pbegin[i]+1, p->pend[i]); | ||||
EMIT(O_BACK, i); | EMIT(O_BACK, i); | ||||
} else | } else | ||||
SETERROR(REG_ESUBREG); | SETERROR(REG_ESUBREG); | ||||
p->g->backrefs = 1; | p->g->backrefs = 1; | ||||
break; | break; | ||||
case '*': | case '*': | ||||
/* | |||||
* Ordinary if used as the first character beyond BOL anchor of | |||||
* a (sub-)expression, counts as a bad repetition operator if it | |||||
* appears otherwise. | |||||
*/ | |||||
(void)REQUIRE(bc->nchain == 0, REG_BADRPT); | (void)REQUIRE(bc->nchain == 0, REG_BADRPT); | ||||
/* FALLTHROUGH */ | /* FALLTHROUGH */ | ||||
default: | default: | ||||
if (p->error != 0) | |||||
return (false); /* Definitely not $... */ | |||||
p->next--; | p->next--; | ||||
wc = WGETNEXT(); | wc = WGETNEXT(); | ||||
ordinary(p, wc); | ordinary(p, wc); | ||||
break; | break; | ||||
} | } | ||||
} | |||||
if (EAT('*')) { /* implemented as +? */ | if (EAT('*')) { /* implemented as +? */ | ||||
/* this case does not require the (y|) trick, noKLUDGE */ | /* this case does not require the (y|) trick, noKLUDGE */ | ||||
INSERT(OPLUS_, pos); | INSERT(OPLUS_, pos); | ||||
ASTERN(O_PLUS, pos); | ASTERN(O_PLUS, pos); | ||||
INSERT(OQUEST_, pos); | INSERT(OQUEST_, pos); | ||||
ASTERN(O_QUEST, pos); | ASTERN(O_QUEST, pos); | ||||
#ifdef LIBREGEX | |||||
} else if (p->gnuext && EATTWO('\\', '?')) { | |||||
INSERT(OQUEST_, pos); | |||||
ASTERN(O_QUEST, pos); | |||||
} else if (p->gnuext && EATTWO('\\', '+')) { | |||||
INSERT(OPLUS_, pos); | |||||
ASTERN(O_PLUS, pos); | |||||
#endif | |||||
} else if (EATTWO('\\', '{')) { | } else if (EATTWO('\\', '{')) { | ||||
count = p_count(p); | count = p_count(p); | ||||
if (EAT(',')) { | if (EAT(',')) { | ||||
if (MORE() && isdigit((uch)PEEK())) { | if (MORE() && isdigit((uch)PEEK())) { | ||||
count2 = p_count(p); | count2 = p_count(p); | ||||
(void)REQUIRE(count <= count2, REG_BADBR); | (void)REQUIRE(count <= count2, REG_BADBR); | ||||
} else /* single number with comma */ | } else /* single number with comma */ | ||||
count2 = INFINITY; | count2 = INFINITY; | ||||
▲ Show 20 Lines • Show All 78 Lines • ▼ Show 20 Lines | p_bracket(struct parse *p) | ||||
if ((ch = singleton(cs)) != OUT) { /* optimize singleton sets */ | if ((ch = singleton(cs)) != OUT) { /* optimize singleton sets */ | ||||
ordinary(p, ch); | ordinary(p, ch); | ||||
freeset(p, cs); | freeset(p, cs); | ||||
} else | } else | ||||
EMIT(OANYOF, (int)(cs - p->g->sets)); | EMIT(OANYOF, (int)(cs - p->g->sets)); | ||||
} | } | ||||
static int | |||||
p_range_cmp(wchar_t c1, wchar_t c2) | |||||
{ | |||||
#ifndef LIBREGEX | |||||
return __wcollate_range_cmp(c1, c2); | |||||
#else | |||||
/* Copied from libc/collate __wcollate_range_cmp */ | |||||
wchar_t s1[2], s2[2]; | |||||
s1[0] = c1; | |||||
s1[1] = L'\0'; | |||||
s2[0] = c2; | |||||
s2[1] = L'\0'; | |||||
return (wcscoll(s1, s2)); | |||||
#endif | |||||
} | |||||
/* | /* | ||||
- p_b_term - parse one term of a bracketed character list | - p_b_term - parse one term of a bracketed character list | ||||
== static void p_b_term(struct parse *p, cset *cs); | == static void p_b_term(struct parse *p, cset *cs); | ||||
*/ | */ | ||||
static void | static void | ||||
p_b_term(struct parse *p, cset *cs) | p_b_term(struct parse *p, cset *cs) | ||||
{ | { | ||||
char c; | char c; | ||||
wint_t start, finish; | wint_t start, finish; | ||||
wint_t i; | wint_t i; | ||||
#ifndef LIBREGEX | |||||
struct xlocale_collate *table = | struct xlocale_collate *table = | ||||
(struct xlocale_collate*)__get_locale()->components[XLC_COLLATE]; | (struct xlocale_collate*)__get_locale()->components[XLC_COLLATE]; | ||||
#endif | |||||
/* classify what we've got */ | /* classify what we've got */ | ||||
switch ((MORE()) ? PEEK() : '\0') { | switch ((MORE()) ? PEEK() : '\0') { | ||||
case '[': | case '[': | ||||
c = (MORE2()) ? PEEK2() : '\0'; | c = (MORE2()) ? PEEK2() : '\0'; | ||||
break; | break; | ||||
case '-': | case '-': | ||||
SETERROR(REG_ERANGE); | SETERROR(REG_ERANGE); | ||||
return; /* NOTE RETURN */ | return; /* NOTE RETURN */ | ||||
Show All 30 Lines | if (SEE('-') && MORE2() && PEEK2() != ']') { | ||||
finish = '-'; | finish = '-'; | ||||
else | else | ||||
finish = p_b_symbol(p); | finish = p_b_symbol(p); | ||||
} else | } else | ||||
finish = start; | finish = start; | ||||
if (start == finish) | if (start == finish) | ||||
CHadd(p, cs, start); | CHadd(p, cs, start); | ||||
else { | else { | ||||
#ifndef LIBREGEX | |||||
if (table->__collate_load_error || MB_CUR_MAX > 1) { | if (table->__collate_load_error || MB_CUR_MAX > 1) { | ||||
#else | |||||
if (MB_CUR_MAX > 1) { | |||||
#endif | |||||
(void)REQUIRE(start <= finish, REG_ERANGE); | (void)REQUIRE(start <= finish, REG_ERANGE); | ||||
CHaddrange(p, cs, start, finish); | CHaddrange(p, cs, start, finish); | ||||
} else { | } else { | ||||
(void)REQUIRE(__wcollate_range_cmp(start, finish) <= 0, REG_ERANGE); | (void)REQUIRE(p_range_cmp(start, finish) <= 0, REG_ERANGE); | ||||
for (i = 0; i <= UCHAR_MAX; i++) { | for (i = 0; i <= UCHAR_MAX; i++) { | ||||
if ( __wcollate_range_cmp(start, i) <= 0 | if (p_range_cmp(start, i) <= 0 && | ||||
&& __wcollate_range_cmp(i, finish) <= 0 | p_range_cmp(i, finish) <= 0 ) | ||||
) | |||||
CHadd(p, cs, i); | CHadd(p, cs, i); | ||||
} | } | ||||
} | } | ||||
} | } | ||||
break; | break; | ||||
} | } | ||||
} | } | ||||
/* | /* | ||||
- p_b_pseudoclass - parse a pseudo-class (\w, \W, \s, \S) | |||||
== static int p_b_pseudoclass(struct parse *p, char c) | |||||
*/ | |||||
static int | |||||
p_b_pseudoclass(struct parse *p, char c) { | |||||
cset *cs; | |||||
if ((cs = allocset(p)) == NULL) | |||||
return(0); | |||||
if (p->g->cflags®_ICASE) | |||||
cs->icase = 1; | |||||
switch (c) { | |||||
case 'W': | |||||
cs->invert = 1; | |||||
/* PASSTHROUGH */ | |||||
case 'w': | |||||
p_b_cclass_named(p, cs, "alnum"); | |||||
break; | |||||
case 'S': | |||||
cs->invert = 1; | |||||
/* PASSTHROUGH */ | |||||
case 's': | |||||
p_b_cclass_named(p, cs, "space"); | |||||
break; | |||||
default: | |||||
return(0); | |||||
} | |||||
EMIT(OANYOF, (int)(cs - p->g->sets)); | |||||
return(1); | |||||
} | |||||
/* | |||||
- p_b_cclass - parse a character-class name and deal with it | - p_b_cclass - parse a character-class name and deal with it | ||||
== static void p_b_cclass(struct parse *p, cset *cs); | == static void p_b_cclass(struct parse *p, cset *cs); | ||||
*/ | */ | ||||
static void | static void | ||||
p_b_cclass(struct parse *p, cset *cs) | p_b_cclass(struct parse *p, cset *cs) | ||||
{ | { | ||||
const char *sp = p->next; | const char *sp = p->next; | ||||
size_t len; | size_t len; | ||||
wctype_t wct; | |||||
char clname[16]; | char clname[16]; | ||||
while (MORE() && isalpha((uch)PEEK())) | while (MORE() && isalpha((uch)PEEK())) | ||||
NEXT(); | NEXT(); | ||||
len = p->next - sp; | len = p->next - sp; | ||||
if (len >= sizeof(clname) - 1) { | if (len >= sizeof(clname) - 1) { | ||||
SETERROR(REG_ECTYPE); | SETERROR(REG_ECTYPE); | ||||
return; | return; | ||||
} | } | ||||
memcpy(clname, sp, len); | memcpy(clname, sp, len); | ||||
clname[len] = '\0'; | clname[len] = '\0'; | ||||
p_b_cclass_named(p, cs, clname); | |||||
} | |||||
/* | |||||
- p_b_cclass_named - deal with a named character class | |||||
== static void p_b_cclass_named(struct parse *p, cset *cs, const char []); | |||||
*/ | |||||
static void | |||||
p_b_cclass_named(struct parse *p, cset *cs, const char clname[]) { | |||||
wctype_t wct; | |||||
if ((wct = wctype(clname)) == 0) { | if ((wct = wctype(clname)) == 0) { | ||||
SETERROR(REG_ECTYPE); | SETERROR(REG_ECTYPE); | ||||
return; | return; | ||||
} | } | ||||
CHaddtype(p, cs, wct); | CHaddtype(p, cs, wct); | ||||
} | } | ||||
/* | /* | ||||
▲ Show 20 Lines • Show All 612 Lines • ▼ Show 20 Lines | case OCH_: | ||||
OP(s) != OOR2) { | OP(s) != OOR2) { | ||||
g->iflags |= BAD; | g->iflags |= BAD; | ||||
return; | return; | ||||
} | } | ||||
} while (OP(s) != O_QUEST && OP(s) != O_CH); | } while (OP(s) != O_QUEST && OP(s) != O_CH); | ||||
/* FALLTHROUGH */ | /* FALLTHROUGH */ | ||||
case OBOW: /* things that break a sequence */ | case OBOW: /* things that break a sequence */ | ||||
case OEOW: | case OEOW: | ||||
case OWBND: | |||||
case ONWBND: | |||||
case OBOL: | case OBOL: | ||||
case OEOL: | case OEOL: | ||||
case O_QUEST: | case O_QUEST: | ||||
case O_CH: | case O_CH: | ||||
case OEND: | case OEND: | ||||
if (newlen > g->mlen) { /* ends one */ | if (newlen > (sopno)g->mlen) { /* ends one */ | ||||
start = newstart; | start = newstart; | ||||
g->mlen = newlen; | g->mlen = newlen; | ||||
if (offset > -1) { | if (offset > -1) { | ||||
g->moffset += offset; | g->moffset += offset; | ||||
offset = newlen; | offset = newlen; | ||||
} else | } else | ||||
g->moffset = offset; | g->moffset = offset; | ||||
} else { | } else { | ||||
if (offset > -1) | if (offset > -1) | ||||
offset += newlen; | offset += newlen; | ||||
} | } | ||||
newlen = 0; | newlen = 0; | ||||
break; | break; | ||||
case OANY: | case OANY: | ||||
if (newlen > g->mlen) { /* ends one */ | if (newlen > (sopno)g->mlen) { /* ends one */ | ||||
start = newstart; | start = newstart; | ||||
g->mlen = newlen; | g->mlen = newlen; | ||||
if (offset > -1) { | if (offset > -1) { | ||||
g->moffset += offset; | g->moffset += offset; | ||||
offset = newlen; | offset = newlen; | ||||
} else | } else | ||||
g->moffset = offset; | g->moffset = offset; | ||||
} else { | } else { | ||||
if (offset > -1) | if (offset > -1) | ||||
offset += newlen; | offset += newlen; | ||||
} | } | ||||
if (offset > -1) | if (offset > -1) | ||||
offset++; | offset++; | ||||
newlen = 0; | newlen = 0; | ||||
break; | break; | ||||
case OANYOF: /* may or may not invalidate offset */ | case OANYOF: /* may or may not invalidate offset */ | ||||
/* First, everything as OANY */ | /* First, everything as OANY */ | ||||
if (newlen > g->mlen) { /* ends one */ | if (newlen > (sopno)g->mlen) { /* ends one */ | ||||
start = newstart; | start = newstart; | ||||
g->mlen = newlen; | g->mlen = newlen; | ||||
if (offset > -1) { | if (offset > -1) { | ||||
g->moffset += offset; | g->moffset += offset; | ||||
offset = newlen; | offset = newlen; | ||||
} else | } else | ||||
g->moffset = offset; | g->moffset = offset; | ||||
} else { | } else { | ||||
if (offset > -1) | if (offset > -1) | ||||
offset += newlen; | offset += newlen; | ||||
} | } | ||||
if (offset > -1) | if (offset > -1) | ||||
offset++; | offset++; | ||||
newlen = 0; | newlen = 0; | ||||
break; | break; | ||||
toohard: | toohard: | ||||
default: | default: | ||||
/* Anything here makes it impossible or too hard | /* Anything here makes it impossible or too hard | ||||
* to calculate the offset -- so we give up; | * to calculate the offset -- so we give up; | ||||
* save the last known good offset, in case the | * save the last known good offset, in case the | ||||
* must sequence doesn't occur later. | * must sequence doesn't occur later. | ||||
*/ | */ | ||||
if (newlen > g->mlen) { /* ends one */ | if (newlen > (sopno)g->mlen) { /* ends one */ | ||||
start = newstart; | start = newstart; | ||||
g->mlen = newlen; | g->mlen = newlen; | ||||
if (offset > -1) | if (offset > -1) | ||||
g->moffset += offset; | g->moffset += offset; | ||||
else | else | ||||
g->moffset = offset; | g->moffset = offset; | ||||
} | } | ||||
offset = -1; | offset = -1; | ||||
▲ Show 20 Lines • Show All 75 Lines • ▼ Show 20 Lines | case OCH_: | ||||
scan++; | scan++; | ||||
break; | break; | ||||
case OANYOF: | case OANYOF: | ||||
case OCHAR: | case OCHAR: | ||||
case OANY: | case OANY: | ||||
try++; | try++; | ||||
case OBOW: | case OBOW: | ||||
case OEOW: | case OEOW: | ||||
case OWBND: | |||||
case ONWBND: | |||||
case OLPAREN: | case OLPAREN: | ||||
case ORPAREN: | case ORPAREN: | ||||
case OOR2: | case OOR2: | ||||
break; | break; | ||||
default: | default: | ||||
try = -1; | try = -1; | ||||
break; | break; | ||||
} | } | ||||
▲ Show 20 Lines • Show All 171 Lines • Show Last 20 Lines |