Index: contrib/netbsd-tests/lib/libc/regex/data/meta.in =================================================================== --- contrib/netbsd-tests/lib/libc/regex/data/meta.in +++ contrib/netbsd-tests/lib/libc/regex/data/meta.in @@ -4,7 +4,9 @@ a\*c & a*c a*c a\\b & a\b a\b a\\\*b & a\*b a\*b -a\bc & abc abc +# Begin FreeBSD +a\bc &C EESCAPE +# End FreeBSD a\ &C EESCAPE a\\bc & a\bc a\bc \{ bC BADRPT Index: lib/libc/regex/regcomp.c =================================================================== --- lib/libc/regex/regcomp.c +++ lib/libc/regex/regcomp.c @@ -97,6 +97,7 @@ static void p_b_eclass(struct parse *p, cset *cs); static wint_t p_b_symbol(struct parse *p); static wint_t p_b_coll_elem(struct parse *p, wint_t endc); +static int may_escape(struct parse *p, const wint_t ch); static wint_t othercase(wint_t ch); static void bothcases(struct parse *p, wint_t ch); static void ordinary(struct parse *p, wint_t ch); @@ -436,7 +437,10 @@ EMIT(OEOW, 0); break; default: - ordinary(p, wc); + if (may_escape(p, wc) == 0) + ordinary(p, wc); + else + SETERROR(REG_EESCAPE); break; } break; @@ -653,7 +657,11 @@ default: p->next--; wc = WGETNEXT(); - ordinary(p, wc); + /* Throw an error if we're not dealing with an escapable */ + if (c&BACKSL && may_escape(p, wc) != 0) + SETERROR(REG_EESCAPE); + else + ordinary(p, wc); break; } @@ -930,6 +938,46 @@ } /* + - may_escape - determine whether 'ch' is escape-able in the current context + == static int may_escape(struct parse *p, const wint_t ch) + */ +static int +may_escape(struct parse *p, const wint_t ch) +{ + /* + * Build a whitelist of characters that may be escaped to produce an + * ordinary in the current context. This assumes that these have not + * been otherwise interpreted as a special character. Escaping an + * ordinary character yields undefined results according to + * IEEE 1003.1-2008. Some extensions (notably, some GNU extensions) take + * advantage of this and use escaped ordinary characters to provide + * special meaning, e.g. \b, \B, \w, \W, \s, \S. + */ + switch(ch) { + case '(': + case ')': + case '+': + case '{': + case '}': + case '?': + /* The above characters may not be escaped in BREs */ + if (!(p->g->cflags®_EXTENDED)) + return 1; + /* Fallthrough */ + case '.': + case '[': + case ']': + case '\\': + case '*': + case '^': + case '$': + return 0; + default: + return 1; + } +} + +/* - othercase - return the case counterpart of an alphabetic == static wint_t othercase(wint_t ch); */ Index: lib/libc/regex/regerror.c =================================================================== --- lib/libc/regex/regerror.c +++ lib/libc/regex/regerror.c @@ -90,7 +90,7 @@ {REG_BADPAT, "REG_BADPAT", "invalid regular expression"}, {REG_ECOLLATE, "REG_ECOLLATE", "invalid collating element"}, {REG_ECTYPE, "REG_ECTYPE", "invalid character class"}, - {REG_EESCAPE, "REG_EESCAPE", "trailing backslash (\\)"}, + {REG_EESCAPE, "REG_EESCAPE", "invalid escape sequence or trailing backslash (\\)"}, {REG_ESUBREG, "REG_ESUBREG", "invalid backreference number"}, {REG_EBRACK, "REG_EBRACK", "brackets ([ ]) not balanced"}, {REG_EPAREN, "REG_EPAREN", "parentheses not balanced"},