Changeset View
Changeset View
Standalone View
Standalone View
head/lib/libc/regex/regcomp.c
Show First 20 Lines • Show All 96 Lines • ▼ Show 20 Lines | struct parse { | ||||
sopno slen; /* malloced strip length (used) */ | sopno slen; /* malloced strip length (used) */ | ||||
int ncsalloc; /* number of csets allocated */ | int ncsalloc; /* number of csets allocated */ | ||||
struct re_guts *g; | struct re_guts *g; | ||||
# define NPAREN 10 /* we need to remember () 1-9 for back refs */ | # define NPAREN 10 /* we need to remember () 1-9 for back refs */ | ||||
sopno pbegin[NPAREN]; /* -> ( ([0] unused) */ | sopno pbegin[NPAREN]; /* -> ( ([0] unused) */ | ||||
sopno pend[NPAREN]; /* -> ) ([0] unused) */ | sopno pend[NPAREN]; /* -> ) ([0] unused) */ | ||||
bool allowbranch; /* can this expression branch? */ | bool allowbranch; /* can this expression branch? */ | ||||
bool bre; /* convenience; is this a BRE? */ | bool bre; /* convenience; is this a BRE? */ | ||||
int pflags; /* other parsing flags -- legacy escapes? */ | |||||
bool (*parse_expr)(struct parse *, struct branchc *); | bool (*parse_expr)(struct parse *, struct branchc *); | ||||
void (*pre_parse)(struct parse *, struct branchc *); | void (*pre_parse)(struct parse *, struct branchc *); | ||||
void (*post_parse)(struct parse *, struct branchc *); | void (*post_parse)(struct parse *, struct branchc *); | ||||
}; | }; | ||||
#define PFLAG_LEGACY_ESC 0x00000001 | |||||
/* ========= begin header generated by ./mkh ========= */ | /* ========= begin header generated by ./mkh ========= */ | ||||
#ifdef __cplusplus | #ifdef __cplusplus | ||||
extern "C" { | extern "C" { | ||||
#endif | #endif | ||||
/* === regcomp.c === */ | /* === regcomp.c === */ | ||||
static bool p_ere_exp(struct parse *p, struct branchc *bc); | static bool p_ere_exp(struct parse *p, struct branchc *bc); | ||||
static void p_str(struct parse *p); | static void p_str(struct parse *p); | ||||
Show All 9 Lines | |||||
static int p_count(struct parse *p); | static int p_count(struct parse *p); | ||||
static void p_bracket(struct parse *p); | static void p_bracket(struct parse *p); | ||||
static int p_range_cmp(wchar_t c1, wchar_t c2); | static int p_range_cmp(wchar_t c1, wchar_t c2); | ||||
static void p_b_term(struct parse *p, cset *cs); | static void p_b_term(struct parse *p, cset *cs); | ||||
static void p_b_cclass(struct parse *p, cset *cs); | static void p_b_cclass(struct parse *p, cset *cs); | ||||
static void p_b_eclass(struct parse *p, cset *cs); | static void p_b_eclass(struct parse *p, cset *cs); | ||||
static wint_t p_b_symbol(struct parse *p); | static wint_t p_b_symbol(struct parse *p); | ||||
static wint_t p_b_coll_elem(struct parse *p, wint_t endc); | static wint_t p_b_coll_elem(struct parse *p, wint_t endc); | ||||
static bool may_escape(struct parse *p, const wint_t ch); | |||||
static wint_t othercase(wint_t ch); | static wint_t othercase(wint_t ch); | ||||
static void bothcases(struct parse *p, wint_t ch); | static void bothcases(struct parse *p, wint_t ch); | ||||
static void ordinary(struct parse *p, wint_t ch); | static void ordinary(struct parse *p, wint_t ch); | ||||
static void nonnewline(struct parse *p); | static void nonnewline(struct parse *p); | ||||
static void repeat(struct parse *p, sopno start, int from, int to); | static void repeat(struct parse *p, sopno start, int from, int to); | ||||
static int seterr(struct parse *p, int e); | static int seterr(struct parse *p, int e); | ||||
static cset *allocset(struct parse *p); | static cset *allocset(struct parse *p); | ||||
static void freeset(struct parse *p, cset *cs); | static void freeset(struct parse *p, cset *cs); | ||||
▲ Show 20 Lines • Show All 51 Lines • ▼ Show 20 Lines | |||||
#define HERE() (p->slen) | #define HERE() (p->slen) | ||||
#define THERE() (p->slen - 1) | #define THERE() (p->slen - 1) | ||||
#define THERETHERE() (p->slen - 2) | #define THERETHERE() (p->slen - 2) | ||||
#define DROP(n) (p->slen -= (n)) | #define DROP(n) (p->slen -= (n)) | ||||
/* Macro used by computejump()/computematchjump() */ | /* Macro used by computejump()/computematchjump() */ | ||||
#define MIN(a,b) ((a)<(b)?(a):(b)) | #define MIN(a,b) ((a)<(b)?(a):(b)) | ||||
/* | static int /* 0 success, otherwise REG_something */ | ||||
- regcomp - interface for parser and compilation | regcomp_internal(regex_t * __restrict preg, | ||||
= extern int regcomp(regex_t *, const char *, int); | |||||
= #define REG_BASIC 0000 | |||||
= #define REG_EXTENDED 0001 | |||||
= #define REG_ICASE 0002 | |||||
= #define REG_NOSUB 0004 | |||||
= #define REG_NEWLINE 0010 | |||||
= #define REG_NOSPEC 0020 | |||||
= #define REG_PEND 0040 | |||||
= #define REG_DUMP 0200 | |||||
*/ | |||||
int /* 0 success, otherwise REG_something */ | |||||
regcomp(regex_t * __restrict preg, | |||||
const char * __restrict pattern, | const char * __restrict pattern, | ||||
int cflags) | int cflags, int pflags) | ||||
{ | { | ||||
struct parse pa; | struct parse pa; | ||||
struct re_guts *g; | struct re_guts *g; | ||||
struct parse *p = &pa; | struct parse *p = &pa; | ||||
int i; | int i; | ||||
size_t len; | size_t len; | ||||
size_t maxlen; | size_t maxlen; | ||||
#ifdef REDEBUG | #ifdef REDEBUG | ||||
▲ Show 20 Lines • Show All 42 Lines • ▼ Show 20 Lines | #endif | ||||
} | } | ||||
/* set things up */ | /* set things up */ | ||||
p->g = g; | p->g = g; | ||||
p->next = pattern; /* convenience; we do not modify it */ | p->next = pattern; /* convenience; we do not modify it */ | ||||
p->end = p->next + len; | p->end = p->next + len; | ||||
p->error = 0; | p->error = 0; | ||||
p->ncsalloc = 0; | p->ncsalloc = 0; | ||||
p->pflags = pflags; | |||||
for (i = 0; i < NPAREN; i++) { | for (i = 0; i < NPAREN; i++) { | ||||
p->pbegin[i] = 0; | p->pbegin[i] = 0; | ||||
p->pend[i] = 0; | p->pend[i] = 0; | ||||
} | } | ||||
if (cflags & REG_EXTENDED) { | if (cflags & REG_EXTENDED) { | ||||
p->allowbranch = true; | p->allowbranch = true; | ||||
p->bre = false; | p->bre = false; | ||||
p->parse_expr = p_ere_exp; | p->parse_expr = p_ere_exp; | ||||
▲ Show 20 Lines • Show All 57 Lines • ▼ Show 20 Lines | #endif | ||||
/* win or lose, we're done */ | /* win or lose, we're done */ | ||||
if (p->error != 0) /* lose */ | if (p->error != 0) /* lose */ | ||||
regfree(preg); | regfree(preg); | ||||
return(p->error); | return(p->error); | ||||
} | } | ||||
/* | /* | ||||
- regcomp - interface for parser and compilation | |||||
= extern int regcomp(regex_t *, const char *, int); | |||||
= #define REG_BASIC 0000 | |||||
= #define REG_EXTENDED 0001 | |||||
= #define REG_ICASE 0002 | |||||
= #define REG_NOSUB 0004 | |||||
= #define REG_NEWLINE 0010 | |||||
= #define REG_NOSPEC 0020 | |||||
= #define REG_PEND 0040 | |||||
= #define REG_DUMP 0200 | |||||
*/ | |||||
int /* 0 success, otherwise REG_something */ | |||||
regcomp(regex_t * __restrict preg, | |||||
const char * __restrict pattern, | |||||
int cflags) | |||||
{ | |||||
return (regcomp_internal(preg, pattern, cflags, 0)); | |||||
} | |||||
#ifndef LIBREGEX | |||||
/* | |||||
* Legacy interface that requires more lax escaping behavior. | |||||
*/ | |||||
int | |||||
freebsd12_regcomp(regex_t * __restrict preg, | |||||
const char * __restrict pattern, | |||||
int cflags, int pflags) | |||||
{ | |||||
return (regcomp_internal(preg, pattern, cflags, PFLAG_LEGACY_ESC)); | |||||
} | |||||
__sym_compat(regcomp, freebsd12_regcomp, FBSD_1.0); | |||||
#endif /* !LIBREGEX */ | |||||
/* | |||||
- p_ere_exp - parse one subERE, an atom possibly followed by a repetition op, | - p_ere_exp - parse one subERE, an atom possibly followed by a repetition op, | ||||
- return whether we should terminate or not | - return whether we should terminate or not | ||||
== static bool p_ere_exp(struct parse *p); | == static bool p_ere_exp(struct parse *p); | ||||
*/ | */ | ||||
static bool | static bool | ||||
p_ere_exp(struct parse *p, struct branchc *bc) | p_ere_exp(struct parse *p, struct branchc *bc) | ||||
{ | { | ||||
char c; | char c; | ||||
▲ Show 20 Lines • Show All 73 Lines • ▼ Show 20 Lines | case '\\': | ||||
switch (wc) { | switch (wc) { | ||||
case '<': | case '<': | ||||
EMIT(OBOW, 0); | EMIT(OBOW, 0); | ||||
break; | break; | ||||
case '>': | case '>': | ||||
EMIT(OEOW, 0); | EMIT(OEOW, 0); | ||||
break; | break; | ||||
default: | default: | ||||
if (may_escape(p, wc)) | |||||
ordinary(p, wc); | ordinary(p, wc); | ||||
else | |||||
SETERROR(REG_EESCAPE); | |||||
break; | break; | ||||
} | } | ||||
break; | break; | ||||
default: | default: | ||||
if (p->error != 0) | if (p->error != 0) | ||||
return (false); | return (false); | ||||
p->next--; | p->next--; | ||||
wc = WGETNEXT(); | wc = WGETNEXT(); | ||||
▲ Show 20 Lines • Show All 345 Lines • ▼ Show 20 Lines | case '*': | ||||
*/ | */ | ||||
(void)REQUIRE(bc->nchain == 0, REG_BADRPT); | (void)REQUIRE(bc->nchain == 0, REG_BADRPT); | ||||
/* FALLTHROUGH */ | /* FALLTHROUGH */ | ||||
default: | default: | ||||
if (p->error != 0) | if (p->error != 0) | ||||
return (false); /* Definitely not $... */ | return (false); /* Definitely not $... */ | ||||
p->next--; | p->next--; | ||||
wc = WGETNEXT(); | wc = WGETNEXT(); | ||||
if ((c & BACKSL) == 0 || may_escape(p, wc)) | |||||
ordinary(p, wc); | ordinary(p, wc); | ||||
else | |||||
SETERROR(REG_EESCAPE); | |||||
break; | break; | ||||
} | } | ||||
if (EAT('*')) { /* implemented as +? */ | if (EAT('*')) { /* implemented as +? */ | ||||
/* this case does not require the (y|) trick, noKLUDGE */ | /* this case does not require the (y|) trick, noKLUDGE */ | ||||
INSERT(OPLUS_, pos); | INSERT(OPLUS_, pos); | ||||
ASTERN(O_PLUS, pos); | ASTERN(O_PLUS, pos); | ||||
INSERT(OQUEST_, pos); | INSERT(OQUEST_, pos); | ||||
▲ Show 20 Lines • Show All 278 Lines • ▼ Show 20 Lines | p_b_coll_elem(struct parse *p, | ||||
memset(&mbs, 0, sizeof(mbs)); | memset(&mbs, 0, sizeof(mbs)); | ||||
if ((clen = mbrtowc(&wc, sp, len, &mbs)) == len) | if ((clen = mbrtowc(&wc, sp, len, &mbs)) == len) | ||||
return (wc); /* single character */ | return (wc); /* single character */ | ||||
else if (clen == (size_t)-1 || clen == (size_t)-2) | else if (clen == (size_t)-1 || clen == (size_t)-2) | ||||
SETERROR(REG_ILLSEQ); | SETERROR(REG_ILLSEQ); | ||||
else | else | ||||
SETERROR(REG_ECOLLATE); /* neither */ | SETERROR(REG_ECOLLATE); /* neither */ | ||||
return(0); | return(0); | ||||
} | |||||
/* | |||||
- may_escape - determine whether 'ch' is escape-able in the current context | |||||
== static int may_escape(struct parse *p, const wint_t ch) | |||||
*/ | |||||
static bool | |||||
may_escape(struct parse *p, const wint_t ch) | |||||
{ | |||||
if ((p->pflags & PFLAG_LEGACY_ESC) != 0) | |||||
return (true); | |||||
if (isalpha(ch) || ch == '\'' || ch == '`') | |||||
return (false); | |||||
return (true); | |||||
#ifdef NOTYET | |||||
/* | |||||
* Build a whitelist of characters that may be escaped to produce an | |||||
* ordinary in the current context. This assumes that these have not | |||||
* been otherwise interpreted as a special character. Escaping an | |||||
* ordinary character yields undefined results according to | |||||
* IEEE 1003.1-2008. Some extensions (notably, some GNU extensions) take | |||||
* advantage of this and use escaped ordinary characters to provide | |||||
* special meaning, e.g. \b, \B, \w, \W, \s, \S. | |||||
*/ | |||||
switch(ch) { | |||||
case '|': | |||||
case '+': | |||||
case '?': | |||||
/* The above characters may not be escaped in BREs */ | |||||
if (!(p->g->cflags®_EXTENDED)) | |||||
return (false); | |||||
/* Fallthrough */ | |||||
case '(': | |||||
case ')': | |||||
case '{': | |||||
case '}': | |||||
case '.': | |||||
case '[': | |||||
case ']': | |||||
case '\\': | |||||
case '*': | |||||
case '^': | |||||
case '$': | |||||
return (true); | |||||
default: | |||||
return (false); | |||||
} | |||||
#endif | |||||
} | } | ||||
/* | /* | ||||
- othercase - return the case counterpart of an alphabetic | - othercase - return the case counterpart of an alphabetic | ||||
== static wint_t othercase(wint_t ch); | == static wint_t othercase(wint_t ch); | ||||
*/ | */ | ||||
static wint_t /* if no counterpart, return ch */ | static wint_t /* if no counterpart, return ch */ | ||||
othercase(wint_t ch) | othercase(wint_t ch) | ||||
▲ Show 20 Lines • Show All 879 Lines • Show Last 20 Lines |