Index: head/contrib/netbsd-tests/lib/libc/regex/data/meta.in =================================================================== --- head/contrib/netbsd-tests/lib/libc/regex/data/meta.in +++ head/contrib/netbsd-tests/lib/libc/regex/data/meta.in @@ -4,7 +4,9 @@ a\*c & a*c a*c a\\b & a\b a\b a\\\*b & a\*b a\*b -a\bc & abc abc +# Begin FreeBSD +a\bc &C EESCAPE +# End FreeBSD a\ &C EESCAPE a\\bc & a\bc a\bc \{ bC BADRPT Index: head/contrib/netbsd-tests/lib/libc/regex/data/subexp.in =================================================================== --- head/contrib/netbsd-tests/lib/libc/regex/data/subexp.in +++ head/contrib/netbsd-tests/lib/libc/regex/data/subexp.in @@ -12,7 +12,7 @@ a(b*)c - ac ac @c (a|ab)(bc([de]+)f|cde) - abcdef abcdef a,bcdef,de # Begin FreeBSD -a\(b\|c\)d b ab|cd ab|cd b|c +a\(b|c\)d b ab|cd ab|cd b|c # End FreeBSD # the regression tester only asks for 9 subexpressions a(b)(c)(d)(e)(f)(g)(h)(i)(j)k - abcdefghijk abcdefghijk b,c,d,e,f,g,h,i,j Index: head/lib/libc/regex/Symbol.map =================================================================== --- head/lib/libc/regex/Symbol.map +++ head/lib/libc/regex/Symbol.map @@ -3,8 +3,11 @@ */ FBSD_1.0 { - regcomp; regerror; regexec; regfree; +}; + +FBSD_1.6 { + regcomp; }; Index: head/lib/libc/regex/regcomp.c =================================================================== --- head/lib/libc/regex/regcomp.c +++ head/lib/libc/regex/regcomp.c @@ -102,11 +102,14 @@ sopno pend[NPAREN]; /* -> ) ([0] unused) */ bool allowbranch; /* can this expression branch? */ bool bre; /* convenience; is this a BRE? */ + int pflags; /* other parsing flags -- legacy escapes? */ bool (*parse_expr)(struct parse *, struct branchc *); void (*pre_parse)(struct parse *, struct branchc *); void (*post_parse)(struct parse *, struct branchc *); }; +#define PFLAG_LEGACY_ESC 0x00000001 + /* ========= begin header generated by ./mkh ========= */ #ifdef __cplusplus extern "C" { @@ -132,6 +135,7 @@ static void p_b_eclass(struct parse *p, cset *cs); static wint_t p_b_symbol(struct parse *p); static wint_t p_b_coll_elem(struct parse *p, wint_t endc); +static bool may_escape(struct parse *p, const wint_t ch); static wint_t othercase(wint_t ch); static void bothcases(struct parse *p, wint_t ch); static void ordinary(struct parse *p, wint_t ch); @@ -199,22 +203,10 @@ /* Macro used by computejump()/computematchjump() */ #define MIN(a,b) ((a)<(b)?(a):(b)) -/* - - regcomp - interface for parser and compilation - = extern int regcomp(regex_t *, const char *, int); - = #define REG_BASIC 0000 - = #define REG_EXTENDED 0001 - = #define REG_ICASE 0002 - = #define REG_NOSUB 0004 - = #define REG_NEWLINE 0010 - = #define REG_NOSPEC 0020 - = #define REG_PEND 0040 - = #define REG_DUMP 0200 - */ -int /* 0 success, otherwise REG_something */ -regcomp(regex_t * __restrict preg, +static int /* 0 success, otherwise REG_something */ +regcomp_internal(regex_t * __restrict preg, const char * __restrict pattern, - int cflags) + int cflags, int pflags) { struct parse pa; struct re_guts *g; @@ -273,6 +265,7 @@ p->end = p->next + len; p->error = 0; p->ncsalloc = 0; + p->pflags = pflags; for (i = 0; i < NPAREN; i++) { p->pbegin[i] = 0; p->pend[i] = 0; @@ -346,6 +339,43 @@ } /* + - regcomp - interface for parser and compilation + = extern int regcomp(regex_t *, const char *, int); + = #define REG_BASIC 0000 + = #define REG_EXTENDED 0001 + = #define REG_ICASE 0002 + = #define REG_NOSUB 0004 + = #define REG_NEWLINE 0010 + = #define REG_NOSPEC 0020 + = #define REG_PEND 0040 + = #define REG_DUMP 0200 + */ +int /* 0 success, otherwise REG_something */ +regcomp(regex_t * __restrict preg, + const char * __restrict pattern, + int cflags) +{ + + return (regcomp_internal(preg, pattern, cflags, 0)); +} + +#ifndef LIBREGEX +/* + * Legacy interface that requires more lax escaping behavior. + */ +int +freebsd12_regcomp(regex_t * __restrict preg, + const char * __restrict pattern, + int cflags, int pflags) +{ + + return (regcomp_internal(preg, pattern, cflags, PFLAG_LEGACY_ESC)); +} + +__sym_compat(regcomp, freebsd12_regcomp, FBSD_1.0); +#endif /* !LIBREGEX */ + +/* - p_ere_exp - parse one subERE, an atom possibly followed by a repetition op, - return whether we should terminate or not == static bool p_ere_exp(struct parse *p); @@ -435,7 +465,10 @@ EMIT(OEOW, 0); break; default: - ordinary(p, wc); + if (may_escape(p, wc)) + ordinary(p, wc); + else + SETERROR(REG_EESCAPE); break; } break; @@ -797,7 +830,10 @@ return (false); /* Definitely not $... */ p->next--; wc = WGETNEXT(); - ordinary(p, wc); + if ((c & BACKSL) == 0 || may_escape(p, wc)) + ordinary(p, wc); + else + SETERROR(REG_EESCAPE); break; } @@ -1092,6 +1128,55 @@ else SETERROR(REG_ECOLLATE); /* neither */ return(0); +} + +/* + - may_escape - determine whether 'ch' is escape-able in the current context + == static int may_escape(struct parse *p, const wint_t ch) + */ +static bool +may_escape(struct parse *p, const wint_t ch) +{ + + if ((p->pflags & PFLAG_LEGACY_ESC) != 0) + return (true); + if (isalpha(ch) || ch == '\'' || ch == '`') + return (false); + return (true); +#ifdef NOTYET + /* + * Build a whitelist of characters that may be escaped to produce an + * ordinary in the current context. This assumes that these have not + * been otherwise interpreted as a special character. Escaping an + * ordinary character yields undefined results according to + * IEEE 1003.1-2008. Some extensions (notably, some GNU extensions) take + * advantage of this and use escaped ordinary characters to provide + * special meaning, e.g. \b, \B, \w, \W, \s, \S. + */ + switch(ch) { + case '|': + case '+': + case '?': + /* The above characters may not be escaped in BREs */ + if (!(p->g->cflags®_EXTENDED)) + return (false); + /* Fallthrough */ + case '(': + case ')': + case '{': + case '}': + case '.': + case '[': + case ']': + case '\\': + case '*': + case '^': + case '$': + return (true); + default: + return (false); + } +#endif } /*