Index: head/lib/libc/regex/regcomp.c =================================================================== --- head/lib/libc/regex/regcomp.c +++ head/lib/libc/regex/regcomp.c @@ -1841,21 +1841,29 @@ { int ch; int mindex; + int cmin, cmax; + /* + * For UTF-8 we process only the first 128 characters corresponding to + * the POSIX locale. + */ + cmin = MB_CUR_MAX == 1 ? CHAR_MIN : 0; + cmax = MB_CUR_MAX == 1 ? CHAR_MAX : 127; + /* Avoid making errors worse */ if (p->error != 0) return; - g->charjump = (int*) malloc((NC + 1) * sizeof(int)); + g->charjump = (int *)malloc((cmax - cmin + 1) * sizeof(int)); if (g->charjump == NULL) /* Not a fatal error */ return; /* Adjust for signed chars, if necessary */ - g->charjump = &g->charjump[-(CHAR_MIN)]; + g->charjump = &g->charjump[-(cmin)]; /* If the character does not exist in the pattern, the jump * is equal to the number of characters in the pattern. */ - for (ch = CHAR_MIN; ch < (CHAR_MAX + 1); ch++) + for (ch = cmin; ch < cmax + 1; ch++) g->charjump[ch] = g->mlen; /* If the character does exist, compute the jump that would Index: head/lib/libc/regex/regex2.h =================================================================== --- head/lib/libc/regex/regex2.h +++ head/lib/libc/regex/regex2.h @@ -113,7 +113,7 @@ wint_t max; } crange; typedef struct { - unsigned char bmp[NC / 8]; + unsigned char bmp[NC_MAX / 8]; wctype_t *types; unsigned int ntypes; wint_t *wides; @@ -133,9 +133,14 @@ if (ch < NC) return (((cs->bmp[ch >> 3] & (1 << (ch & 7))) != 0) ^ cs->invert); - for (i = 0; i < cs->nwides; i++) - if (ch == cs->wides[i]) + for (i = 0; i < cs->nwides; i++) { + if (cs->icase) { + if (ch == towlower(cs->wides[i]) || + ch == towupper(cs->wides[i])) + return (!cs->invert); + } else if (ch == cs->wides[i]) return (!cs->invert); + } for (i = 0; i < cs->nranges; i++) if (cs->ranges[i].min <= ch && ch <= cs->ranges[i].max) return (!cs->invert); Index: head/lib/libc/regex/utils.h =================================================================== --- head/lib/libc/regex/utils.h +++ head/lib/libc/regex/utils.h @@ -39,7 +39,9 @@ /* utility definitions */ #define DUPMAX _POSIX2_RE_DUP_MAX /* xxx is this right? */ #define INFINITY (DUPMAX + 1) -#define NC (CHAR_MAX - CHAR_MIN + 1) + +#define NC_MAX (CHAR_MAX - CHAR_MIN + 1) +#define NC ((MB_CUR_MAX) == 1 ? (NC_MAX) : (128)) typedef unsigned char uch; /* switch off assertions (if not already off) if no REDEBUG */ Index: head/lib/libc/tests/regex/multibyte.sh =================================================================== --- head/lib/libc/tests/regex/multibyte.sh +++ head/lib/libc/tests/regex/multibyte.sh @@ -1,11 +1,11 @@ # $FreeBSD$ -atf_test_case multibyte -multibyte_head() +atf_test_case bmpat +bmpat_head() { atf_set "descr" "Check matching multibyte characters (PR153502)" } -multibyte_body() +bmpat_body() { export LC_CTYPE="C.UTF-8" @@ -29,7 +29,25 @@ sed -ne '/.a./p' } +atf_test_case icase +icase_head() +{ + atf_set "descr" "Check case-insensitive matching for characters 128-255" +} +icase_body() +{ + export LC_CTYPE="C.UTF-8" + + a=$(printf '\302\265\n') # U+00B5 + b=$(printf '\316\234\n') # U+039C + c=$(printf '\316\274\n') # U+03BC + + echo $b | atf_check -o "inline:$b\n" sed -ne "/$a/Ip" + echo $c | atf_check -o "inline:$c\n" sed -ne "/$a/Ip" +} + atf_init_test_cases() { - atf_add_test_case multibyte + atf_add_test_case bmpat + atf_add_test_case icase }