Page MenuHomeFreeBSD

D10510.id75148.diff
No OneTemporary

D10510.id75148.diff

Index: head/contrib/netbsd-tests/lib/libc/regex/data/meta.in
===================================================================
--- head/contrib/netbsd-tests/lib/libc/regex/data/meta.in
+++ head/contrib/netbsd-tests/lib/libc/regex/data/meta.in
@@ -4,7 +4,9 @@
a\*c & a*c a*c
a\\b & a\b a\b
a\\\*b & a\*b a\*b
-a\bc & abc abc
+# Begin FreeBSD
+a\bc &C EESCAPE
+# End FreeBSD
a\ &C EESCAPE
a\\bc & a\bc a\bc
\{ bC BADRPT
Index: head/contrib/netbsd-tests/lib/libc/regex/data/subexp.in
===================================================================
--- head/contrib/netbsd-tests/lib/libc/regex/data/subexp.in
+++ head/contrib/netbsd-tests/lib/libc/regex/data/subexp.in
@@ -12,7 +12,7 @@
a(b*)c - ac ac @c
(a|ab)(bc([de]+)f|cde) - abcdef abcdef a,bcdef,de
# Begin FreeBSD
-a\(b\|c\)d b ab|cd ab|cd b|c
+a\(b|c\)d b ab|cd ab|cd b|c
# End FreeBSD
# the regression tester only asks for 9 subexpressions
a(b)(c)(d)(e)(f)(g)(h)(i)(j)k - abcdefghijk abcdefghijk b,c,d,e,f,g,h,i,j
Index: head/lib/libc/regex/Symbol.map
===================================================================
--- head/lib/libc/regex/Symbol.map
+++ head/lib/libc/regex/Symbol.map
@@ -3,8 +3,11 @@
*/
FBSD_1.0 {
- regcomp;
regerror;
regexec;
regfree;
+};
+
+FBSD_1.6 {
+ regcomp;
};
Index: head/lib/libc/regex/regcomp.c
===================================================================
--- head/lib/libc/regex/regcomp.c
+++ head/lib/libc/regex/regcomp.c
@@ -102,11 +102,14 @@
sopno pend[NPAREN]; /* -> ) ([0] unused) */
bool allowbranch; /* can this expression branch? */
bool bre; /* convenience; is this a BRE? */
+ int pflags; /* other parsing flags -- legacy escapes? */
bool (*parse_expr)(struct parse *, struct branchc *);
void (*pre_parse)(struct parse *, struct branchc *);
void (*post_parse)(struct parse *, struct branchc *);
};
+#define PFLAG_LEGACY_ESC 0x00000001
+
/* ========= begin header generated by ./mkh ========= */
#ifdef __cplusplus
extern "C" {
@@ -132,6 +135,7 @@
static void p_b_eclass(struct parse *p, cset *cs);
static wint_t p_b_symbol(struct parse *p);
static wint_t p_b_coll_elem(struct parse *p, wint_t endc);
+static bool may_escape(struct parse *p, const wint_t ch);
static wint_t othercase(wint_t ch);
static void bothcases(struct parse *p, wint_t ch);
static void ordinary(struct parse *p, wint_t ch);
@@ -199,22 +203,10 @@
/* Macro used by computejump()/computematchjump() */
#define MIN(a,b) ((a)<(b)?(a):(b))
-/*
- - regcomp - interface for parser and compilation
- = extern int regcomp(regex_t *, const char *, int);
- = #define REG_BASIC 0000
- = #define REG_EXTENDED 0001
- = #define REG_ICASE 0002
- = #define REG_NOSUB 0004
- = #define REG_NEWLINE 0010
- = #define REG_NOSPEC 0020
- = #define REG_PEND 0040
- = #define REG_DUMP 0200
- */
-int /* 0 success, otherwise REG_something */
-regcomp(regex_t * __restrict preg,
+static int /* 0 success, otherwise REG_something */
+regcomp_internal(regex_t * __restrict preg,
const char * __restrict pattern,
- int cflags)
+ int cflags, int pflags)
{
struct parse pa;
struct re_guts *g;
@@ -273,6 +265,7 @@
p->end = p->next + len;
p->error = 0;
p->ncsalloc = 0;
+ p->pflags = pflags;
for (i = 0; i < NPAREN; i++) {
p->pbegin[i] = 0;
p->pend[i] = 0;
@@ -346,6 +339,43 @@
}
/*
+ - regcomp - interface for parser and compilation
+ = extern int regcomp(regex_t *, const char *, int);
+ = #define REG_BASIC 0000
+ = #define REG_EXTENDED 0001
+ = #define REG_ICASE 0002
+ = #define REG_NOSUB 0004
+ = #define REG_NEWLINE 0010
+ = #define REG_NOSPEC 0020
+ = #define REG_PEND 0040
+ = #define REG_DUMP 0200
+ */
+int /* 0 success, otherwise REG_something */
+regcomp(regex_t * __restrict preg,
+ const char * __restrict pattern,
+ int cflags)
+{
+
+ return (regcomp_internal(preg, pattern, cflags, 0));
+}
+
+#ifndef LIBREGEX
+/*
+ * Legacy interface that requires more lax escaping behavior.
+ */
+int
+freebsd12_regcomp(regex_t * __restrict preg,
+ const char * __restrict pattern,
+ int cflags, int pflags)
+{
+
+ return (regcomp_internal(preg, pattern, cflags, PFLAG_LEGACY_ESC));
+}
+
+__sym_compat(regcomp, freebsd12_regcomp, FBSD_1.0);
+#endif /* !LIBREGEX */
+
+/*
- p_ere_exp - parse one subERE, an atom possibly followed by a repetition op,
- return whether we should terminate or not
== static bool p_ere_exp(struct parse *p);
@@ -435,7 +465,10 @@
EMIT(OEOW, 0);
break;
default:
- ordinary(p, wc);
+ if (may_escape(p, wc))
+ ordinary(p, wc);
+ else
+ SETERROR(REG_EESCAPE);
break;
}
break;
@@ -797,7 +830,10 @@
return (false); /* Definitely not $... */
p->next--;
wc = WGETNEXT();
- ordinary(p, wc);
+ if ((c & BACKSL) == 0 || may_escape(p, wc))
+ ordinary(p, wc);
+ else
+ SETERROR(REG_EESCAPE);
break;
}
@@ -1092,6 +1128,55 @@
else
SETERROR(REG_ECOLLATE); /* neither */
return(0);
+}
+
+/*
+ - may_escape - determine whether 'ch' is escape-able in the current context
+ == static int may_escape(struct parse *p, const wint_t ch)
+ */
+static bool
+may_escape(struct parse *p, const wint_t ch)
+{
+
+ if ((p->pflags & PFLAG_LEGACY_ESC) != 0)
+ return (true);
+ if (isalpha(ch) || ch == '\'' || ch == '`')
+ return (false);
+ return (true);
+#ifdef NOTYET
+ /*
+ * Build a whitelist of characters that may be escaped to produce an
+ * ordinary in the current context. This assumes that these have not
+ * been otherwise interpreted as a special character. Escaping an
+ * ordinary character yields undefined results according to
+ * IEEE 1003.1-2008. Some extensions (notably, some GNU extensions) take
+ * advantage of this and use escaped ordinary characters to provide
+ * special meaning, e.g. \b, \B, \w, \W, \s, \S.
+ */
+ switch(ch) {
+ case '|':
+ case '+':
+ case '?':
+ /* The above characters may not be escaped in BREs */
+ if (!(p->g->cflags&REG_EXTENDED))
+ return (false);
+ /* Fallthrough */
+ case '(':
+ case ')':
+ case '{':
+ case '}':
+ case '.':
+ case '[':
+ case ']':
+ case '\\':
+ case '*':
+ case '^':
+ case '$':
+ return (true);
+ default:
+ return (false);
+ }
+#endif
}
/*

File Metadata

Mime Type
text/plain
Expires
Fri, Nov 21, 10:13 PM (21 h, 28 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
25773249
Default Alt Text
D10510.id75148.diff (6 KB)

Event Timeline