diff --git a/lib/libc/gen/fnmatch.3 b/lib/libc/gen/fnmatch.3 --- a/lib/libc/gen/fnmatch.3 +++ b/lib/libc/gen/fnmatch.3 @@ -27,7 +27,7 @@ .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" -.Dd April 2, 2022 +.Dd April 7, 2025 .Dt FNMATCH 3 .Os .Sh NAME @@ -129,12 +129,8 @@ .Sh STANDARDS The current implementation of the .Fn fnmatch -function -.Em does not -conform to +function is expected to conform to .St -p1003.2 . -Collating symbol expressions, equivalence class expressions and -character class expressions are not supported. .Sh HISTORY A predecessor to .Fn fnmatch , diff --git a/lib/libc/gen/fnmatch.c b/lib/libc/gen/fnmatch.c --- a/lib/libc/gen/fnmatch.c +++ b/lib/libc/gen/fnmatch.c @@ -67,7 +67,8 @@ #define RANGE_NOMATCH 0 #define RANGE_ERROR (-1) -static int rangematch(const char *, wchar_t, int, char **, mbstate_t *); +static int rangematch(const char *, wchar_t, const char *, int, char **, + char **, mbstate_t *, mbstate_t *); static int fnmatch1(const char *, const char *, const char *, int, mbstate_t, mbstate_t); @@ -85,7 +86,7 @@ { const char *bt_pattern, *bt_string; mbstate_t bt_patmbs, bt_strmbs; - char *newp; + char *newp, *news; char c; wchar_t pc, sc; size_t pclen, sclen; @@ -164,17 +165,17 @@ ((flags & FNM_PATHNAME) && *(string - 1) == '/'))) goto backtrack; - switch (rangematch(pattern, sc, flags, &newp, - &patmbs)) { + switch (rangematch(pattern, sc, string + sclen, flags, + &newp, &news, &patmbs, &strmbs)) { case RANGE_ERROR: goto norm; case RANGE_MATCH: pattern = newp; + string = news; break; case RANGE_NOMATCH: goto backtrack; } - string += sclen; break; case '\\': if (!(flags & FNM_NOESCAPE)) { @@ -218,8 +219,10 @@ if (sc == '/' && flags & FNM_PATHNAME) return (FNM_NOMATCH); bt_string += sclen; - pattern = bt_pattern, patmbs = bt_patmbs; - string = bt_string, strmbs = bt_strmbs; + pattern = bt_pattern; + patmbs = bt_patmbs; + string = bt_string; + strmbs = bt_strmbs; } break; } @@ -228,15 +231,20 @@ } static int -rangematch(const char *pattern, wchar_t test, int flags, char **newp, - mbstate_t *patmbs) +rangematch(const char *pattern, wchar_t test, const char *string, int flags, + char **newp, char **news, mbstate_t *patmbs, mbstate_t *strmbs) { int negate, ok; wchar_t c, c2; size_t pclen; const char *origpat; struct xlocale_collate *table = - (struct xlocale_collate*)__get_locale()->components[XLC_COLLATE]; + (struct xlocale_collate *)__get_locale()->components[XLC_COLLATE]; + wchar_t buf[COLLATE_STR_LEN]; /* STR_LEN defined in collate.h */ + const char *cp, *savestring; + int special; + mbstate_t save; + size_t sclen, len; /* * A bracket expression starting with an unquoted circumflex @@ -259,20 +267,132 @@ ok = 0; origpat = pattern; for (;;) { + c = 0; if (*pattern == ']' && pattern > origpat) { - pattern++; break; } else if (*pattern == '\0') { return (RANGE_ERROR); } else if (*pattern == '/' && (flags & FNM_PATHNAME)) { return (RANGE_NOMATCH); - } else if (*pattern == '\\' && !(flags & FNM_NOESCAPE)) + } else if (*pattern == '\\' && !(flags & FNM_NOESCAPE)) { pattern++; - pclen = mbrtowc(&c, pattern, MB_LEN_MAX, patmbs); - if (pclen == (size_t)-1 || pclen == (size_t)-2) - return (RANGE_NOMATCH); - pattern += pclen; + } else if (*pattern == '[' && + ((special = *(pattern + 1)) == '.' || + special == '=' || special == ':')) { + cp = (pattern += 2); + while ((cp = strchr(cp, special))) { + if (*(cp + 1) == ']') + break; + cp++; + } + if (!cp) + return (RANGE_ERROR); + if (special == '.') { +treat_like_collating_symbol: + len = __collate_collating_symbol(buf, + COLLATE_STR_LEN, pattern, + cp - pattern, patmbs); + if (len == (size_t)-1 || len == 0) + return (RANGE_ERROR); + pattern = cp + 2; + if (len > 1) { + wchar_t *wp, sc; + /* + * No multi-character collation + * symbols as start of range. + */ + if (*(cp + 2) == '-' && + *(cp + 3) != EOS && + *(cp + 3) != ']') + return (RANGE_ERROR); + wp = buf; + if (test != *wp++) + continue; + if (len == 1) { + ok = 1; + break; + } + memcpy(&save, strmbs, sizeof(save)); + savestring = string; + while (--len > 0) { + sclen = mbrtowc(&sc, string, + MB_LEN_MAX, strmbs); + if (sclen == (size_t)-1 || + sclen == (size_t)-2) { + sc = (unsigned char)*string; + sclen = 1; + memset(&strmbs, 0, + sizeof(strmbs)); + } + if (sc != *wp++) { + memcpy(strmbs, &save, + sizeof(save)); + string = savestring; + break; + } + string += sclen; + } + if (len == 0) { + ok = 1; + break; + } + continue; /* no match */ + } + c = *buf; + } else if (special == '=') { + int ec; + memcpy(&save, patmbs, sizeof(save)); + ec = __collate_equiv_class(pattern, + cp - pattern, patmbs); + if (ec < 0) + return (RANGE_ERROR); + if (ec == 0) { + memcpy(patmbs, &save, sizeof(save)); + goto treat_like_collating_symbol; + } + pattern = cp + 2; + /* no equivalence classes as start of range */ + if (*(cp + 2) == '-' && *(cp + 3) != EOS && + *(cp + 3) != ']') + return (RANGE_ERROR); + len = __collate_equiv_match(ec, NULL, 0, test, + string, strlen(string), strmbs, &sclen); + if (len < 0) + return (RANGE_ERROR); + if (len > 0) { + ok = 1; + string += sclen; + break; + } + continue; + } else { /* special == ':' */ + wctype_t charclass; + char name[CHARCLASS_NAME_MAX + 1]; + /* no character classes as start of range */ + if (*(cp + 2) == '-' && *(cp + 3) != EOS && + *(cp + 3) != ']') + return (RANGE_ERROR); + /* assume character class names are ascii */ + if (cp - pattern > CHARCLASS_NAME_MAX) + return (RANGE_ERROR); + strlcpy(name, pattern, cp - pattern + 1); + pattern = cp + 2; + if ((charclass = wctype(name)) == 0) + return (RANGE_ERROR); + if (iswctype(test, charclass)) { + ok = 1; + break; + } + continue; + } + } + if (!c) { + pclen = mbrtowc(&c, pattern, MB_LEN_MAX, patmbs); + if (pclen == (size_t)-1 || pclen == (size_t)-2) + return (RANGE_NOMATCH); + pattern += pclen; + } if (flags & FNM_CASEFOLD) c = towlower(c); @@ -288,6 +408,37 @@ if (c2 == EOS) return (RANGE_ERROR); + if ((c2 == '[' && (special = *pattern) == '.') || + special == '=' || special == ':') { + + /* + * No equivalence classes or character + * classes as end of range. + */ + if (special == '=' || special == ':') + return (RANGE_ERROR); + cp = ++pattern; + while ((cp = strchr(cp, special))) { + if (*(cp + 1) == ']') + break; + cp++; + } + if (!cp) + return (RANGE_ERROR); + len = __collate_collating_symbol(buf, + COLLATE_STR_LEN, pattern, + cp - pattern, patmbs); + + /* + * No multi-character collation symbols + * as end of range. + */ + if (len != 1) + return (RANGE_ERROR); + pattern = cp + 2; + c2 = *buf; + } + if (flags & FNM_CASEFOLD) c2 = towlower(c2); @@ -295,12 +446,44 @@ c <= test && test <= c2 : __wcollate_range_cmp(c, test) <= 0 && __wcollate_range_cmp(test, c2) <= 0 - ) + ) { ok = 1; - } else if (c == test) + break; + } + } else if (c == test) { ok = 1; + break; + } } - *newp = (char *)pattern; + /* go to end of bracket expression */ + special = 0; + while (*pattern != ']') { + if (*pattern == 0) + return (RANGE_ERROR); + if (*pattern == special) { + if (*++pattern == ']') { + special = 0; + pattern++; + } + continue; + } + if (!special && *pattern == '[') { + special = *++pattern; + if (special != '.' && special != '=' && special != ':') + special = 0; + else + pattern++; + continue; + } + pclen = mbrtowc(&c, pattern, MB_LEN_MAX, patmbs); + if (pclen == (size_t)-1 || pclen == (size_t)-2) + return (RANGE_NOMATCH); + pattern += pclen; + } + + *newp = (char *)++pattern; + *news = (char *)string; + return (ok == negate ? RANGE_NOMATCH : RANGE_MATCH); } diff --git a/lib/libc/tests/gen/fnmatch_test.c b/lib/libc/tests/gen/fnmatch_test.c --- a/lib/libc/tests/gen/fnmatch_test.c +++ b/lib/libc/tests/gen/fnmatch_test.c @@ -26,6 +26,7 @@ #include #include +#include #include #include #include @@ -176,10 +177,90 @@ } +ATF_TC(fnmatch_characterclass); +ATF_TC_HEAD(fnmatch_characterclass, tc) +{ + atf_tc_set_md_var(tc, "descr", "Test fnmatch with character classes"); +} + +ATF_TC_BODY(fnmatch_characterclass, tc) +{ + ATF_CHECK(fnmatch("[[:alnum:]]", "a", 0) == 0); + ATF_CHECK(fnmatch("[[:cntrl:]]", "\a", 0) == 0); + ATF_CHECK(fnmatch("[[:lower:]]", "a", 0) == 0); + ATF_CHECK(fnmatch("[[:space:]]", " ", 0) == 0); + ATF_CHECK(fnmatch("[[:alpha:]]", "a", 0) == 0); + ATF_CHECK(fnmatch("[[:digit:]]", "0", 0) == 0); + ATF_CHECK(fnmatch("[[:print:]]", "a", 0) == 0); + ATF_CHECK(fnmatch("[[:upper:]]", "A", 0) == 0); + ATF_CHECK(fnmatch("[[:blank:]]", " ", 0) == 0); + ATF_CHECK(fnmatch("[[:graph:]]", "a", 0) == 0); + ATF_CHECK(fnmatch("[[:punct:]]", ".", 0) == 0); + ATF_CHECK(fnmatch("[[:xdigit:]]", "f", 0) == 0); + + /* + * POSIX.1, section 9.3.5. states that '[:' and ':]' + * should be interpreted as character classes symbol only + * when part of a bracket expression. + */ + ATF_CHECK(fnmatch("[:alnum:]", "a", 0) == 0); + ATF_CHECK(fnmatch("[:alnum:]", ":", 0) == 0); + ATF_CHECK(fnmatch("[:alnum:]", "1", 0) != 0); +} + +ATF_TC(fnmatch_collsym); +ATF_TC_HEAD(fnmatch_collsym, tc) +{ + atf_tc_set_md_var(tc, "descr", "Test fnmatch with collating symbols"); +} + +ATF_TC_BODY(fnmatch_collsym, tc) +{ + setlocale(LC_ALL, "cs_CZ.UTF-8"); + ATF_CHECK(fnmatch("[ch]", "ch", 0) != 0); + ATF_CHECK(fnmatch("[[.ch.]]", "ch", 0) == 0); + ATF_CHECK(fnmatch("[[.ch.]]h", "chh", 0) == 0); + + /* + * POSIX.1, section 9.3.5. states that '[.' and '.]' + * should be interpreted as a collating symbol only + * when part of a bracket expression. + */ + ATF_CHECK(fnmatch("[.ch.]", "c", 0) == 0); + ATF_CHECK(fnmatch("[.ch.]", "h", 0) == 0); + ATF_CHECK(fnmatch("[.ch.]", ".", 0) == 0); +} + +ATF_TC(fnmatch_equivclass); +ATF_TC_HEAD(fnmatch_equivclass, tc) +{ + atf_tc_set_md_var(tc, "descr", "Test fnmatch with equivalence classes"); +} + +ATF_TC_BODY(fnmatch_equivclass, tc) +{ + setlocale(LC_ALL, "en_US.UTF-8"); + ATF_CHECK(fnmatch("[[=a=]]b", "ab", 0) == 0); + ATF_CHECK(fnmatch("[[=a=]]b", "Ab", 0) == 0); + ATF_CHECK(fnmatch("[[=à=]]b", "ab", 0) == 0); + ATF_CHECK(fnmatch("[[=a=]]b", "àb", 0) == 0); + + /* + * POSIX.1, section 9.3.5. states that '[=' and '=]' + * should be interpreted as an equivalence class only + * when part of a bracket expression. + */ + ATF_CHECK(fnmatch("[=a=]b", "=b", 0) == 0); + ATF_CHECK(fnmatch("[=a=]b", "ab", 0) == 0); +} + ATF_TP_ADD_TCS(tp) { ATF_TP_ADD_TC(tp, fnmatch_test); + ATF_TP_ADD_TC(tp, fnmatch_collsym); + ATF_TP_ADD_TC(tp, fnmatch_characterclass); + ATF_TP_ADD_TC(tp, fnmatch_equivclass); return (atf_no_error()); }