Index: lib/libc/regex/regcomp.c =================================================================== --- lib/libc/regex/regcomp.c +++ lib/libc/regex/regcomp.c @@ -1606,12 +1606,10 @@ return; /* - * It's not generally safe to do a ``char'' substring search on - * multibyte character strings, but it's safe for at least - * UTF-8 (see RFC 3629). + * It's not safe to do a ``char'' substring search on + * multibyte character strings. */ - if (MB_CUR_MAX > 1 && - strcmp(_CurrentRuneLocale->__encoding, "UTF-8") != 0) + if (MB_CUR_MAX > 1) return; /* find the longest OCHAR sequence in strip */ Index: lib/libc/tests/regex/Makefile.inc =================================================================== --- lib/libc/tests/regex/Makefile.inc +++ lib/libc/tests/regex/Makefile.inc @@ -4,6 +4,9 @@ BINDIR?= ${TESTSDIR} +# local test cases +ATF_TESTS_SH+= multibyte + # SKIP_LEFTASSOC -> these testcases fail on FreeBSD. IMPLEMENTATION?= -DREGEX_SPENCER -DSKIP_LEFTASSOC Index: lib/libc/tests/regex/multibyte.sh =================================================================== --- /dev/null +++ lib/libc/tests/regex/multibyte.sh @@ -0,0 +1,35 @@ +# $FreeBSD$ + +atf_test_case multibyte +multibyte_head() +{ + atf_set "descr" "Check matching multibyte characters (PR153502)" +} +multibyte_body() +{ + export LC_CTYPE="C.UTF-8" + + printf 'é' | atf_check -o "inline:é" \ + sed -ne '/^.$/p' + printf 'éé' | atf_check -o "inline:éé" \ + sed -ne '/^..$/p' + printf 'aéa' | atf_check -o "inline:aéa" \ + sed -ne '/a.a/p' + printf 'aéa'| atf_check -o "inline:aéa" \ + sed -ne '/a.*a/p' + printf 'aaéaa' | atf_check -o "inline:aaéaa" \ + sed -ne '/aa.aa/p' + printf 'aéaéa' | atf_check -o "inline:aéaéa" \ + sed -ne '/a.a.a/p' + printf 'éa' | atf_check -o "inline:éa" \ + sed -ne '/.a/p' + printf 'aéaa' | atf_check -o "inline:aéaa" \ + sed -ne '/a.aa/p' + printf 'éaé' | atf_check -o "inline:éaé" \ + sed -ne '/.a./p' +} + +atf_init_test_cases() +{ + atf_add_test_case multibyte +}