Page MenuHomeFreeBSD

D18302.id51748.diff
No OneTemporary

D18302.id51748.diff

Index: lib/libc/regex/regcomp.c
===================================================================
--- lib/libc/regex/regcomp.c
+++ lib/libc/regex/regcomp.c
@@ -1841,21 +1841,29 @@
{
int ch;
int mindex;
+ int cmin, cmax;
+
+ /*
+ * For UTF-8 we process only the first 128 characters corresponding to
+ * the POSIX locale.
+ */
+ cmin = MB_CUR_MAX == 1 ? CHAR_MIN : 0;
+ cmax = MB_CUR_MAX == 1 ? CHAR_MAX : 127;
/* Avoid making errors worse */
if (p->error != 0)
return;
- g->charjump = (int*) malloc((NC + 1) * sizeof(int));
+ g->charjump = (int *)malloc((cmax - cmin + 1) * sizeof(int));
if (g->charjump == NULL) /* Not a fatal error */
return;
/* Adjust for signed chars, if necessary */
- g->charjump = &g->charjump[-(CHAR_MIN)];
+ g->charjump = &g->charjump[-(cmin)];
/* If the character does not exist in the pattern, the jump
* is equal to the number of characters in the pattern.
*/
- for (ch = CHAR_MIN; ch < (CHAR_MAX + 1); ch++)
+ for (ch = cmin; ch < cmax + 1; ch++)
g->charjump[ch] = g->mlen;
/* If the character does exist, compute the jump that would
Index: lib/libc/regex/regex2.h
===================================================================
--- lib/libc/regex/regex2.h
+++ lib/libc/regex/regex2.h
@@ -113,7 +113,7 @@
wint_t max;
} crange;
typedef struct {
- unsigned char bmp[NC / 8];
+ unsigned char bmp[NC_MAX / 8];
wctype_t *types;
unsigned int ntypes;
wint_t *wides;
@@ -133,9 +133,14 @@
if (ch < NC)
return (((cs->bmp[ch >> 3] & (1 << (ch & 7))) != 0) ^
cs->invert);
- for (i = 0; i < cs->nwides; i++)
- if (ch == cs->wides[i])
+ for (i = 0; i < cs->nwides; i++) {
+ if (cs->icase) {
+ if (ch == towlower(cs->wides[i]) ||
+ ch == towupper(cs->wides[i]))
+ return (!cs->invert);
+ } else if (ch == cs->wides[i])
return (!cs->invert);
+ }
for (i = 0; i < cs->nranges; i++)
if (cs->ranges[i].min <= ch && ch <= cs->ranges[i].max)
return (!cs->invert);
Index: lib/libc/regex/utils.h
===================================================================
--- lib/libc/regex/utils.h
+++ lib/libc/regex/utils.h
@@ -39,7 +39,9 @@
/* utility definitions */
#define DUPMAX _POSIX2_RE_DUP_MAX /* xxx is this right? */
#define INFINITY (DUPMAX + 1)
-#define NC (CHAR_MAX - CHAR_MIN + 1)
+
+#define NC_MAX (CHAR_MAX - CHAR_MIN + 1)
+#define NC ((MB_CUR_MAX) == 1 ? (NC_MAX) : (128))
typedef unsigned char uch;
/* switch off assertions (if not already off) if no REDEBUG */
Index: lib/libc/tests/regex/multibyte.sh
===================================================================
--- lib/libc/tests/regex/multibyte.sh
+++ lib/libc/tests/regex/multibyte.sh
@@ -1,11 +1,11 @@
# $FreeBSD$
-atf_test_case multibyte
-multibyte_head()
+atf_test_case bmpat
+bmpat_head()
{
atf_set "descr" "Check matching multibyte characters (PR153502)"
}
-multibyte_body()
+bmpat_body()
{
export LC_CTYPE="C.UTF-8"
@@ -29,7 +29,25 @@
sed -ne '/.a./p'
}
+atf_test_case icase
+icase_head()
+{
+ atf_set "descr" "Check case-insensitive matching for characters 128-255"
+}
+icase_body()
+{
+ export LC_CTYPE="C.UTF-8"
+
+ a=$(printf '\302\265\n') # U+00B5
+ b=$(printf '\316\234\n') # U+039C
+ c=$(printf '\316\274\n') # U+03BC
+
+ echo $b | atf_check -o "inline:$b\n" sed -ne "/$a/Ip"
+ echo $c | atf_check -o "inline:$c\n" sed -ne "/$a/Ip"
+}
+
atf_init_test_cases()
{
- atf_add_test_case multibyte
+ atf_add_test_case bmpat
+ atf_add_test_case icase
}

File Metadata

Mime Type
text/plain
Expires
Wed, Jan 21, 9:18 AM (3 h, 47 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
27798847
Default Alt Text
D18302.id51748.diff (3 KB)

Event Timeline