Index: contrib/netbsd-tests/lib/libc/regex/data/meta.in =================================================================== --- contrib/netbsd-tests/lib/libc/regex/data/meta.in +++ contrib/netbsd-tests/lib/libc/regex/data/meta.in @@ -4,7 +4,9 @@ a\*c & a*c a*c a\\b & a\b a\b a\\\*b & a\*b a\*b -a\bc & abc abc +# Begin FreeBSD +a\bc &C EESCAPE +# End FreeBSD a\ &C EESCAPE a\\bc & a\bc a\bc \{ bC BADRPT Index: contrib/netbsd-tests/lib/libc/regex/data/subexp.in =================================================================== --- contrib/netbsd-tests/lib/libc/regex/data/subexp.in +++ contrib/netbsd-tests/lib/libc/regex/data/subexp.in @@ -12,7 +12,7 @@ a(b*)c - ac ac @c (a|ab)(bc([de]+)f|cde) - abcdef abcdef a,bcdef,de # Begin FreeBSD -a\(b\|c\)d b ab|cd ab|cd b|c +a\(b|c\)d b ab|cd ab|cd b|c # End FreeBSD # the regression tester only asks for 9 subexpressions a(b)(c)(d)(e)(f)(g)(h)(i)(j)k - abcdefghijk abcdefghijk b,c,d,e,f,g,h,i,j Index: lib/libc/regex/regcomp.c =================================================================== --- lib/libc/regex/regcomp.c +++ lib/libc/regex/regcomp.c @@ -132,6 +132,7 @@ static void p_b_eclass(struct parse *p, cset *cs); static wint_t p_b_symbol(struct parse *p); static wint_t p_b_coll_elem(struct parse *p, wint_t endc); +static int may_escape(struct parse *p, const wint_t ch); static wint_t othercase(wint_t ch); static void bothcases(struct parse *p, wint_t ch); static void ordinary(struct parse *p, wint_t ch); @@ -441,7 +442,10 @@ EMIT(OEOW, 0); break; default: - ordinary(p, wc); + if (may_escape(p, wc) == 0) + ordinary(p, wc); + else + SETERROR(REG_EESCAPE); break; } break; @@ -803,7 +807,10 @@ return (false); /* Definitely not $... */ p->next--; wc = WGETNEXT(); - ordinary(p, wc); + if ((c & BACKSL) == 0 || may_escape(p, wc) == 0) + ordinary(p, wc); + else + SETERROR(REG_EESCAPE); break; } @@ -1100,6 +1107,53 @@ return(0); } +/* + - may_escape - determine whether 'ch' is escape-able in the current context + == static int may_escape(struct parse *p, const wint_t ch) + */ +static int +may_escape(struct parse *p, const wint_t ch) +{ + + if (isalpha(ch) || ch == '\'' || ch == '`') + return (1); + return (0); +#ifdef NOTYET + /* + * Build a whitelist of characters that may be escaped to produce an + * ordinary in the current context. This assumes that these have not + * been otherwise interpreted as a special character. Escaping an + * ordinary character yields undefined results according to + * IEEE 1003.1-2008. Some extensions (notably, some GNU extensions) take + * advantage of this and use escaped ordinary characters to provide + * special meaning, e.g. \b, \B, \w, \W, \s, \S. + */ + switch(ch) { + case '|': + case '+': + case '?': + /* The above characters may not be escaped in BREs */ + if (!(p->g->cflags®_EXTENDED)) + return 1; + /* Fallthrough */ + case '(': + case ')': + case '{': + case '}': + case '.': + case '[': + case ']': + case '\\': + case '*': + case '^': + case '$': + return 0; + default: + return 1; + } +#endif +} + /* - othercase - return the case counterpart of an alphabetic == static wint_t othercase(wint_t ch);