Page MenuHomeFreeBSD

D49660.id153247.diff
No OneTemporary

D49660.id153247.diff

diff --git a/lib/libc/gen/fnmatch.3 b/lib/libc/gen/fnmatch.3
--- a/lib/libc/gen/fnmatch.3
+++ b/lib/libc/gen/fnmatch.3
@@ -27,7 +27,7 @@
.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
.\" SUCH DAMAGE.
.\"
-.Dd April 2, 2022
+.Dd April 7, 2025
.Dt FNMATCH 3
.Os
.Sh NAME
@@ -133,8 +133,6 @@
.Em does not
conform to
.St -p1003.2 .
-Collating symbol expressions, equivalence class expressions and
-character class expressions are not supported.
.Sh HISTORY
A predecessor to
.Fn fnmatch ,
diff --git a/lib/libc/gen/fnmatch.c b/lib/libc/gen/fnmatch.c
--- a/lib/libc/gen/fnmatch.c
+++ b/lib/libc/gen/fnmatch.c
@@ -67,7 +67,8 @@
#define RANGE_NOMATCH 0
#define RANGE_ERROR (-1)
-static int rangematch(const char *, wchar_t, int, char **, mbstate_t *);
+static int rangematch(const char *, wchar_t, const char *, int, char **,
+ char **, mbstate_t *, mbstate_t *);
static int fnmatch1(const char *, const char *, const char *, int, mbstate_t,
mbstate_t);
@@ -85,7 +86,7 @@
{
const char *bt_pattern, *bt_string;
mbstate_t bt_patmbs, bt_strmbs;
- char *newp;
+ char *newp, *news;
char c;
wchar_t pc, sc;
size_t pclen, sclen;
@@ -164,17 +165,17 @@
((flags & FNM_PATHNAME) && *(string - 1) == '/')))
goto backtrack;
- switch (rangematch(pattern, sc, flags, &newp,
- &patmbs)) {
+ switch (rangematch(pattern, sc, string + sclen, flags,
+ &newp, &news, &patmbs, &strmbs)) {
case RANGE_ERROR:
goto norm;
case RANGE_MATCH:
pattern = newp;
+ string = news;
break;
case RANGE_NOMATCH:
goto backtrack;
}
- string += sclen;
break;
case '\\':
if (!(flags & FNM_NOESCAPE)) {
@@ -218,8 +219,10 @@
if (sc == '/' && flags & FNM_PATHNAME)
return (FNM_NOMATCH);
bt_string += sclen;
- pattern = bt_pattern, patmbs = bt_patmbs;
- string = bt_string, strmbs = bt_strmbs;
+ pattern = bt_pattern;
+ patmbs = bt_patmbs;
+ string = bt_string;
+ strmbs = bt_strmbs;
}
break;
}
@@ -228,15 +231,20 @@
}
static int
-rangematch(const char *pattern, wchar_t test, int flags, char **newp,
- mbstate_t *patmbs)
+rangematch(const char *pattern, wchar_t test, const char *string, int flags,
+ char **newp, char **news, mbstate_t *patmbs, mbstate_t *strmbs)
{
int negate, ok;
wchar_t c, c2;
size_t pclen;
const char *origpat;
struct xlocale_collate *table =
- (struct xlocale_collate*)__get_locale()->components[XLC_COLLATE];
+ (struct xlocale_collate *)__get_locale()->components[XLC_COLLATE];
+ wchar_t buf[COLLATE_STR_LEN]; /* STR_LEN defined in collate.h */
+ const char *cp, *savestring;
+ int special;
+ mbstate_t save;
+ size_t sclen, len;
/*
* A bracket expression starting with an unquoted circumflex
@@ -259,20 +267,132 @@
ok = 0;
origpat = pattern;
for (;;) {
+ c = 0;
if (*pattern == ']' && pattern > origpat) {
- pattern++;
break;
} else if (*pattern == '\0') {
return (RANGE_ERROR);
} else if (*pattern == '/' && (flags & FNM_PATHNAME)) {
return (RANGE_NOMATCH);
- } else if (*pattern == '\\' && !(flags & FNM_NOESCAPE))
+ } else if (*pattern == '\\' && !(flags & FNM_NOESCAPE)) {
pattern++;
- pclen = mbrtowc(&c, pattern, MB_LEN_MAX, patmbs);
- if (pclen == (size_t)-1 || pclen == (size_t)-2)
- return (RANGE_NOMATCH);
- pattern += pclen;
+ } else if (*pattern == '[' &&
+ ((special = *(pattern + 1)) == '.' ||
+ special == '=' || special == ':')) {
+ cp = (pattern += 2);
+ while ((cp = strchr(cp, special))) {
+ if (*(cp + 1) == ']')
+ break;
+ cp++;
+ }
+ if (!cp)
+ return (RANGE_ERROR);
+ if (special == '.') {
+treat_like_collating_symbol:
+ len = __collate_collating_symbol(buf,
+ COLLATE_STR_LEN, pattern,
+ cp - pattern, patmbs);
+ if (len == (size_t)-1 || len == 0)
+ return (RANGE_ERROR);
+ pattern = cp + 2;
+ if (len > 1) {
+ wchar_t *wp, sc;
+ /*
+ * No multi-character collation
+ * symbols as start of range.
+ */
+ if (*(cp + 2) == '-' &&
+ *(cp + 3) != EOS &&
+ *(cp + 3) != ']')
+ return (RANGE_ERROR);
+ wp = buf;
+ if (test != *wp++)
+ continue;
+ if (len == 1) {
+ ok = 1;
+ break;
+ }
+ memcpy(&save, strmbs, sizeof(save));
+ savestring = string;
+ while (--len > 0) {
+ sclen = mbrtowc(&sc, string,
+ MB_LEN_MAX, strmbs);
+ if (sclen == (size_t)-1 ||
+ sclen == (size_t)-2) {
+ sc = (unsigned char)*string;
+ sclen = 1;
+ memset(&strmbs, 0,
+ sizeof(strmbs));
+ }
+ if (sc != *wp++) {
+ memcpy(strmbs, &save,
+ sizeof(save));
+ string = savestring;
+ break;
+ }
+ string += sclen;
+ }
+ if (len == 0) {
+ ok = 1;
+ break;
+ }
+ continue; /* no match */
+ }
+ c = *buf;
+ } else if (special == '=') {
+ int ec;
+ memcpy(&save, patmbs, sizeof(save));
+ ec = __collate_equiv_class(pattern,
+ cp - pattern, patmbs);
+ if (ec < 0)
+ return (RANGE_ERROR);
+ if (ec == 0) {
+ memcpy(patmbs, &save, sizeof(save));
+ goto treat_like_collating_symbol;
+ }
+ pattern = cp + 2;
+ /* no equivalence classes as start of range */
+ if (*(cp + 2) == '-' && *(cp + 3) != EOS &&
+ *(cp + 3) != ']')
+ return (RANGE_ERROR);
+ len = __collate_equiv_match(ec, NULL, 0, test,
+ string, strlen(string), strmbs, &sclen);
+ if (len < 0)
+ return (RANGE_ERROR);
+ if (len > 0) {
+ ok = 1;
+ string += sclen;
+ break;
+ }
+ continue;
+ } else { /* special == ':' */
+ wctype_t charclass;
+ char name[CHARCLASS_NAME_MAX + 1];
+ /* no character classes as start of range */
+ if (*(cp + 2) == '-' && *(cp + 3) != EOS &&
+ *(cp + 3) != ']')
+ return (RANGE_ERROR);
+ /* assume character class names are ascii */
+ if (cp - pattern > CHARCLASS_NAME_MAX)
+ return (RANGE_ERROR);
+ strlcpy(name, pattern, cp - pattern + 1);
+ pattern = cp + 2;
+ if ((charclass = wctype(name)) == 0)
+ return (RANGE_ERROR);
+ if (iswctype(test, charclass)) {
+ ok = 1;
+ break;
+ }
+ continue;
+ }
+ }
+ if (!c) {
+ pclen = mbrtowc(&c, pattern, MB_LEN_MAX, patmbs);
+ if (pclen == (size_t)-1 || pclen == (size_t)-2)
+ return (RANGE_NOMATCH);
+ pattern += pclen;
+ }
if (flags & FNM_CASEFOLD)
c = towlower(c);
@@ -288,6 +408,37 @@
if (c2 == EOS)
return (RANGE_ERROR);
+ if ((c2 == '[' && (special = *pattern) == '.') ||
+ special == '=' || special == ':') {
+
+ /*
+ * No equivalence classes or character
+ * classes as end of range.
+ */
+ if (special == '=' || special == ':')
+ return (RANGE_ERROR);
+ cp = ++pattern;
+ while ((cp = strchr(cp, special))) {
+ if (*(cp + 1) == ']')
+ break;
+ cp++;
+ }
+ if (!cp)
+ return (RANGE_ERROR);
+ len = __collate_collating_symbol(buf,
+ COLLATE_STR_LEN, pattern,
+ cp - pattern, patmbs);
+
+ /*
+ * No multi-character collation symbols
+ * as end of range.
+ */
+ if (len != 1)
+ return (RANGE_ERROR);
+ pattern = cp + 2;
+ c2 = *buf;
+ }
+
if (flags & FNM_CASEFOLD)
c2 = towlower(c2);
@@ -295,12 +446,44 @@
c <= test && test <= c2 :
__wcollate_range_cmp(c, test) <= 0
&& __wcollate_range_cmp(test, c2) <= 0
- )
+ ) {
ok = 1;
- } else if (c == test)
+ break;
+ }
+ } else if (c == test) {
ok = 1;
+ break;
+ }
}
- *newp = (char *)pattern;
+ /* go to end of bracket expression */
+ special = 0;
+ while (*pattern != ']') {
+ if (*pattern == 0)
+ return (RANGE_ERROR);
+ if (*pattern == special) {
+ if (*++pattern == ']') {
+ special = 0;
+ pattern++;
+ }
+ continue;
+ }
+ if (!special && *pattern == '[') {
+ special = *++pattern;
+ if (special != '.' && special != '=' && special != ':')
+ special = 0;
+ else
+ pattern++;
+ continue;
+ }
+ pclen = mbrtowc(&c, pattern, MB_LEN_MAX, patmbs);
+ if (pclen == (size_t)-1 || pclen == (size_t)-2)
+ return (RANGE_NOMATCH);
+ pattern += pclen;
+ }
+
+ *newp = (char *)++pattern;
+ *news = (char *)string;
+
return (ok == negate ? RANGE_NOMATCH : RANGE_MATCH);
}
diff --git a/lib/libc/tests/gen/fnmatch_test.c b/lib/libc/tests/gen/fnmatch_test.c
--- a/lib/libc/tests/gen/fnmatch_test.c
+++ b/lib/libc/tests/gen/fnmatch_test.c
@@ -26,6 +26,7 @@
#include <sys/param.h>
#include <errno.h>
+#include <locale.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
@@ -176,10 +177,90 @@
}
+ATF_TC(fnmatch_characterclass);
+ATF_TC_HEAD(fnmatch_characterclass, tc)
+{
+ atf_tc_set_md_var(tc, "descr", "Test fnmatch with character classes");
+}
+
+ATF_TC_BODY(fnmatch_characterclass, tc)
+{
+ ATF_CHECK(fnmatch("[[:alnum:]]", "a", 0) == 0);
+ ATF_CHECK(fnmatch("[[:cntrl:]]", "\a", 0) == 0);
+ ATF_CHECK(fnmatch("[[:lower:]]", "a", 0) == 0);
+ ATF_CHECK(fnmatch("[[:space:]]", " ", 0) == 0);
+ ATF_CHECK(fnmatch("[[:alpha:]]", "a", 0) == 0);
+ ATF_CHECK(fnmatch("[[:digit:]]", "0", 0) == 0);
+ ATF_CHECK(fnmatch("[[:print:]]", "a", 0) == 0);
+ ATF_CHECK(fnmatch("[[:upper:]]", "A", 0) == 0);
+ ATF_CHECK(fnmatch("[[:blank:]]", " ", 0) == 0);
+ ATF_CHECK(fnmatch("[[:graph:]]", "a", 0) == 0);
+ ATF_CHECK(fnmatch("[[:punct:]]", ".", 0) == 0);
+ ATF_CHECK(fnmatch("[[:xdigit:]]", "f", 0) == 0);
+
+ /*
+ * POSIX.1, section 9.3.5. states that '[:' and ':]'
+ * should be interpreted as character classes symbol only
+ * when part of a bracket expression.
+ */
+ ATF_CHECK(fnmatch("[:alnum:]", "a", 0) == 0);
+ ATF_CHECK(fnmatch("[:alnum:]", ":", 0) == 0);
+ ATF_CHECK(fnmatch("[:alnum:]", "1", 0) != 0);
+}
+
+ATF_TC(fnmatch_collsym);
+ATF_TC_HEAD(fnmatch_collsym, tc)
+{
+ atf_tc_set_md_var(tc, "descr", "Test fnmatch with collating symbols");
+}
+
+ATF_TC_BODY(fnmatch_collsym, tc)
+{
+ setlocale(LC_ALL, "cs_CZ.UTF-8");
+ ATF_CHECK(fnmatch("[ch]", "ch", 0) != 0);
+ ATF_CHECK(fnmatch("[[.ch.]]", "ch", 0) == 0);
+ ATF_CHECK(fnmatch("[[.ch.]]h", "chh", 0) == 0);
+
+ /*
+ * POSIX.1, section 9.3.5. states that '[.' and '.]'
+ * should be interpreted as a collating symbol only
+ * when part of a bracket expression.
+ */
+ ATF_CHECK(fnmatch("[.ch.]", "c", 0) == 0);
+ ATF_CHECK(fnmatch("[.ch.]", "h", 0) == 0);
+ ATF_CHECK(fnmatch("[.ch.]", ".", 0) == 0);
+}
+
+ATF_TC(fnmatch_equivclass);
+ATF_TC_HEAD(fnmatch_equivclass, tc)
+{
+ atf_tc_set_md_var(tc, "descr", "Test fnmatch with equivalence classes");
+}
+
+ATF_TC_BODY(fnmatch_equivclass, tc)
+{
+ setlocale(LC_ALL, "en_US.UTF-8");
+ ATF_CHECK(fnmatch("[[=a=]]b", "ab", 0) == 0);
+ ATF_CHECK(fnmatch("[[=a=]]b", "Ab", 0) == 0);
+ ATF_CHECK(fnmatch("[[=à=]]b", "ab", 0) == 0);
+ ATF_CHECK(fnmatch("[[=a=]]b", "àb", 0) == 0);
+
+ /*
+ * POSIX.1, section 9.3.5. states that '[=' and '=]'
+ * should be interpreted as an equivalence class only
+ * when part of a bracket expression.
+ */
+ ATF_CHECK(fnmatch("[=a=]b", "=b", 0) == 0);
+ ATF_CHECK(fnmatch("[=a=]b", "ab", 0) == 0);
+}
+
ATF_TP_ADD_TCS(tp)
{
ATF_TP_ADD_TC(tp, fnmatch_test);
+ ATF_TP_ADD_TC(tp, fnmatch_collsym);
+ ATF_TP_ADD_TC(tp, fnmatch_characterclass);
+ ATF_TP_ADD_TC(tp, fnmatch_equivclass);
return (atf_no_error());
}

File Metadata

Mime Type
text/plain
Expires
Mon, Feb 9, 3:10 PM (1 h, 39 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
28592307
Default Alt Text
D49660.id153247.diff (11 KB)

Event Timeline