Index: head/lib/libc/regex/engine.c =================================================================== --- head/lib/libc/regex/engine.c +++ head/lib/libc/regex/engine.c @@ -48,6 +48,7 @@ */ #ifdef SNAMES +#define stepback sstepback #define matcher smatcher #define walk swalk #define dissect sdissect @@ -58,6 +59,7 @@ #define match smat #endif #ifdef LNAMES +#define stepback lstepback #define matcher lmatcher #define walk lwalk #define dissect ldissect @@ -68,6 +70,7 @@ #define match lmat #endif #ifdef MNAMES +#define stepback mstepback #define matcher mmatcher #define walk mwalk #define dissect mdissect @@ -142,6 +145,39 @@ #endif /* + * Given a multibyte string pointed to by start, step back nchar characters + * from current position pointed to by cur. + */ +static const char * +stepback(const char *start, const char *cur, int nchar) +{ + const char *ret; + int wc, mbc; + mbstate_t mbs; + size_t clen; + + if (MB_CUR_MAX == 1) + return ((cur - nchar) > start ? cur - nchar : NULL); + + ret = cur; + for (wc = nchar; wc > 0; wc--) { + for (mbc = 1; mbc <= MB_CUR_MAX; mbc++) { + if ((ret - mbc) < start) + return (NULL); + memset(&mbs, 0, sizeof(mbs)); + clen = mbrtowc(NULL, ret - mbc, mbc, &mbs); + if (clen != (size_t)-1 && clen != (size_t)-2) + break; + } + if (mbc > MB_CUR_MAX) + return (NULL); + ret -= mbc; + } + + return (ret); +} + +/* - matcher - the actual matching engine == static int matcher(struct re_guts *g, const char *string, \ == size_t nmatch, regmatch_t pmatch[], int eflags); @@ -244,9 +280,14 @@ ZAPSTATE(&m->mbs); /* Adjust start according to moffset, to speed things up */ - if (dp != NULL && g->moffset > -1) - start = ((dp - g->moffset) < start) ? start : dp - g->moffset; + if (dp != NULL && g->moffset > -1) { + const char *nstart; + nstart = stepback(start, dp, g->moffset); + if (nstart != NULL) + start = nstart; + } + SP("mloop", m->st, *start); /* this loop does only one repetition except for backrefs */ @@ -1083,6 +1124,7 @@ #endif #endif +#undef stepback #undef matcher #undef walk #undef dissect Index: head/lib/libc/tests/regex/Makefile =================================================================== --- head/lib/libc/tests/regex/Makefile +++ head/lib/libc/tests/regex/Makefile @@ -2,6 +2,9 @@ PACKAGE= tests +# local test cases +ATF_TESTS_SH+= multibyte + .include "Makefile.inc" .include "${.CURDIR:H}/Makefile.netbsd-tests" .include Index: head/lib/libc/tests/regex/multibyte.sh =================================================================== --- head/lib/libc/tests/regex/multibyte.sh +++ head/lib/libc/tests/regex/multibyte.sh @@ -0,0 +1,35 @@ +# $FreeBSD$ + +atf_test_case multibyte +multibyte_head() +{ + atf_set "descr" "Check matching multibyte characters (PR153502)" +} +multibyte_body() +{ + export LC_CTYPE="C.UTF-8" + + printf 'é' | atf_check -o "inline:é" \ + sed -ne '/^.$/p' + printf 'éé' | atf_check -o "inline:éé" \ + sed -ne '/^..$/p' + printf 'aéa' | atf_check -o "inline:aéa" \ + sed -ne '/a.a/p' + printf 'aéa'| atf_check -o "inline:aéa" \ + sed -ne '/a.*a/p' + printf 'aaéaa' | atf_check -o "inline:aaéaa" \ + sed -ne '/aa.aa/p' + printf 'aéaéa' | atf_check -o "inline:aéaéa" \ + sed -ne '/a.a.a/p' + printf 'éa' | atf_check -o "inline:éa" \ + sed -ne '/.a/p' + printf 'aéaa' | atf_check -o "inline:aéaa" \ + sed -ne '/a.aa/p' + printf 'éaé' | atf_check -o "inline:éaé" \ + sed -ne '/.a./p' +} + +atf_init_test_cases() +{ + atf_add_test_case multibyte +}