Page Menu
Home
FreeBSD
Search
Configure Global Search
Log In
Files
F144471904
D49660.id153247.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
11 KB
Referenced Files
None
Subscribers
None
D49660.id153247.diff
View Options
diff --git a/lib/libc/gen/fnmatch.3 b/lib/libc/gen/fnmatch.3
--- a/lib/libc/gen/fnmatch.3
+++ b/lib/libc/gen/fnmatch.3
@@ -27,7 +27,7 @@
.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
.\" SUCH DAMAGE.
.\"
-.Dd April 2, 2022
+.Dd April 7, 2025
.Dt FNMATCH 3
.Os
.Sh NAME
@@ -133,8 +133,6 @@
.Em does not
conform to
.St -p1003.2 .
-Collating symbol expressions, equivalence class expressions and
-character class expressions are not supported.
.Sh HISTORY
A predecessor to
.Fn fnmatch ,
diff --git a/lib/libc/gen/fnmatch.c b/lib/libc/gen/fnmatch.c
--- a/lib/libc/gen/fnmatch.c
+++ b/lib/libc/gen/fnmatch.c
@@ -67,7 +67,8 @@
#define RANGE_NOMATCH 0
#define RANGE_ERROR (-1)
-static int rangematch(const char *, wchar_t, int, char **, mbstate_t *);
+static int rangematch(const char *, wchar_t, const char *, int, char **,
+ char **, mbstate_t *, mbstate_t *);
static int fnmatch1(const char *, const char *, const char *, int, mbstate_t,
mbstate_t);
@@ -85,7 +86,7 @@
{
const char *bt_pattern, *bt_string;
mbstate_t bt_patmbs, bt_strmbs;
- char *newp;
+ char *newp, *news;
char c;
wchar_t pc, sc;
size_t pclen, sclen;
@@ -164,17 +165,17 @@
((flags & FNM_PATHNAME) && *(string - 1) == '/')))
goto backtrack;
- switch (rangematch(pattern, sc, flags, &newp,
- &patmbs)) {
+ switch (rangematch(pattern, sc, string + sclen, flags,
+ &newp, &news, &patmbs, &strmbs)) {
case RANGE_ERROR:
goto norm;
case RANGE_MATCH:
pattern = newp;
+ string = news;
break;
case RANGE_NOMATCH:
goto backtrack;
}
- string += sclen;
break;
case '\\':
if (!(flags & FNM_NOESCAPE)) {
@@ -218,8 +219,10 @@
if (sc == '/' && flags & FNM_PATHNAME)
return (FNM_NOMATCH);
bt_string += sclen;
- pattern = bt_pattern, patmbs = bt_patmbs;
- string = bt_string, strmbs = bt_strmbs;
+ pattern = bt_pattern;
+ patmbs = bt_patmbs;
+ string = bt_string;
+ strmbs = bt_strmbs;
}
break;
}
@@ -228,15 +231,20 @@
}
static int
-rangematch(const char *pattern, wchar_t test, int flags, char **newp,
- mbstate_t *patmbs)
+rangematch(const char *pattern, wchar_t test, const char *string, int flags,
+ char **newp, char **news, mbstate_t *patmbs, mbstate_t *strmbs)
{
int negate, ok;
wchar_t c, c2;
size_t pclen;
const char *origpat;
struct xlocale_collate *table =
- (struct xlocale_collate*)__get_locale()->components[XLC_COLLATE];
+ (struct xlocale_collate *)__get_locale()->components[XLC_COLLATE];
+ wchar_t buf[COLLATE_STR_LEN]; /* STR_LEN defined in collate.h */
+ const char *cp, *savestring;
+ int special;
+ mbstate_t save;
+ size_t sclen, len;
/*
* A bracket expression starting with an unquoted circumflex
@@ -259,20 +267,132 @@
ok = 0;
origpat = pattern;
for (;;) {
+ c = 0;
if (*pattern == ']' && pattern > origpat) {
- pattern++;
break;
} else if (*pattern == '\0') {
return (RANGE_ERROR);
} else if (*pattern == '/' && (flags & FNM_PATHNAME)) {
return (RANGE_NOMATCH);
- } else if (*pattern == '\\' && !(flags & FNM_NOESCAPE))
+ } else if (*pattern == '\\' && !(flags & FNM_NOESCAPE)) {
pattern++;
- pclen = mbrtowc(&c, pattern, MB_LEN_MAX, patmbs);
- if (pclen == (size_t)-1 || pclen == (size_t)-2)
- return (RANGE_NOMATCH);
- pattern += pclen;
+ } else if (*pattern == '[' &&
+ ((special = *(pattern + 1)) == '.' ||
+ special == '=' || special == ':')) {
+ cp = (pattern += 2);
+ while ((cp = strchr(cp, special))) {
+ if (*(cp + 1) == ']')
+ break;
+ cp++;
+ }
+ if (!cp)
+ return (RANGE_ERROR);
+ if (special == '.') {
+treat_like_collating_symbol:
+ len = __collate_collating_symbol(buf,
+ COLLATE_STR_LEN, pattern,
+ cp - pattern, patmbs);
+ if (len == (size_t)-1 || len == 0)
+ return (RANGE_ERROR);
+ pattern = cp + 2;
+ if (len > 1) {
+ wchar_t *wp, sc;
+ /*
+ * No multi-character collation
+ * symbols as start of range.
+ */
+ if (*(cp + 2) == '-' &&
+ *(cp + 3) != EOS &&
+ *(cp + 3) != ']')
+ return (RANGE_ERROR);
+ wp = buf;
+ if (test != *wp++)
+ continue;
+ if (len == 1) {
+ ok = 1;
+ break;
+ }
+ memcpy(&save, strmbs, sizeof(save));
+ savestring = string;
+ while (--len > 0) {
+ sclen = mbrtowc(&sc, string,
+ MB_LEN_MAX, strmbs);
+ if (sclen == (size_t)-1 ||
+ sclen == (size_t)-2) {
+ sc = (unsigned char)*string;
+ sclen = 1;
+ memset(&strmbs, 0,
+ sizeof(strmbs));
+ }
+ if (sc != *wp++) {
+ memcpy(strmbs, &save,
+ sizeof(save));
+ string = savestring;
+ break;
+ }
+ string += sclen;
+ }
+ if (len == 0) {
+ ok = 1;
+ break;
+ }
+ continue; /* no match */
+ }
+ c = *buf;
+ } else if (special == '=') {
+ int ec;
+ memcpy(&save, patmbs, sizeof(save));
+ ec = __collate_equiv_class(pattern,
+ cp - pattern, patmbs);
+ if (ec < 0)
+ return (RANGE_ERROR);
+ if (ec == 0) {
+ memcpy(patmbs, &save, sizeof(save));
+ goto treat_like_collating_symbol;
+ }
+ pattern = cp + 2;
+ /* no equivalence classes as start of range */
+ if (*(cp + 2) == '-' && *(cp + 3) != EOS &&
+ *(cp + 3) != ']')
+ return (RANGE_ERROR);
+ len = __collate_equiv_match(ec, NULL, 0, test,
+ string, strlen(string), strmbs, &sclen);
+ if (len < 0)
+ return (RANGE_ERROR);
+ if (len > 0) {
+ ok = 1;
+ string += sclen;
+ break;
+ }
+ continue;
+ } else { /* special == ':' */
+ wctype_t charclass;
+ char name[CHARCLASS_NAME_MAX + 1];
+ /* no character classes as start of range */
+ if (*(cp + 2) == '-' && *(cp + 3) != EOS &&
+ *(cp + 3) != ']')
+ return (RANGE_ERROR);
+ /* assume character class names are ascii */
+ if (cp - pattern > CHARCLASS_NAME_MAX)
+ return (RANGE_ERROR);
+ strlcpy(name, pattern, cp - pattern + 1);
+ pattern = cp + 2;
+ if ((charclass = wctype(name)) == 0)
+ return (RANGE_ERROR);
+ if (iswctype(test, charclass)) {
+ ok = 1;
+ break;
+ }
+ continue;
+ }
+ }
+ if (!c) {
+ pclen = mbrtowc(&c, pattern, MB_LEN_MAX, patmbs);
+ if (pclen == (size_t)-1 || pclen == (size_t)-2)
+ return (RANGE_NOMATCH);
+ pattern += pclen;
+ }
if (flags & FNM_CASEFOLD)
c = towlower(c);
@@ -288,6 +408,37 @@
if (c2 == EOS)
return (RANGE_ERROR);
+ if ((c2 == '[' && (special = *pattern) == '.') ||
+ special == '=' || special == ':') {
+
+ /*
+ * No equivalence classes or character
+ * classes as end of range.
+ */
+ if (special == '=' || special == ':')
+ return (RANGE_ERROR);
+ cp = ++pattern;
+ while ((cp = strchr(cp, special))) {
+ if (*(cp + 1) == ']')
+ break;
+ cp++;
+ }
+ if (!cp)
+ return (RANGE_ERROR);
+ len = __collate_collating_symbol(buf,
+ COLLATE_STR_LEN, pattern,
+ cp - pattern, patmbs);
+
+ /*
+ * No multi-character collation symbols
+ * as end of range.
+ */
+ if (len != 1)
+ return (RANGE_ERROR);
+ pattern = cp + 2;
+ c2 = *buf;
+ }
+
if (flags & FNM_CASEFOLD)
c2 = towlower(c2);
@@ -295,12 +446,44 @@
c <= test && test <= c2 :
__wcollate_range_cmp(c, test) <= 0
&& __wcollate_range_cmp(test, c2) <= 0
- )
+ ) {
ok = 1;
- } else if (c == test)
+ break;
+ }
+ } else if (c == test) {
ok = 1;
+ break;
+ }
}
- *newp = (char *)pattern;
+ /* go to end of bracket expression */
+ special = 0;
+ while (*pattern != ']') {
+ if (*pattern == 0)
+ return (RANGE_ERROR);
+ if (*pattern == special) {
+ if (*++pattern == ']') {
+ special = 0;
+ pattern++;
+ }
+ continue;
+ }
+ if (!special && *pattern == '[') {
+ special = *++pattern;
+ if (special != '.' && special != '=' && special != ':')
+ special = 0;
+ else
+ pattern++;
+ continue;
+ }
+ pclen = mbrtowc(&c, pattern, MB_LEN_MAX, patmbs);
+ if (pclen == (size_t)-1 || pclen == (size_t)-2)
+ return (RANGE_NOMATCH);
+ pattern += pclen;
+ }
+
+ *newp = (char *)++pattern;
+ *news = (char *)string;
+
return (ok == negate ? RANGE_NOMATCH : RANGE_MATCH);
}
diff --git a/lib/libc/tests/gen/fnmatch_test.c b/lib/libc/tests/gen/fnmatch_test.c
--- a/lib/libc/tests/gen/fnmatch_test.c
+++ b/lib/libc/tests/gen/fnmatch_test.c
@@ -26,6 +26,7 @@
#include <sys/param.h>
#include <errno.h>
+#include <locale.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
@@ -176,10 +177,90 @@
}
+ATF_TC(fnmatch_characterclass);
+ATF_TC_HEAD(fnmatch_characterclass, tc)
+{
+ atf_tc_set_md_var(tc, "descr", "Test fnmatch with character classes");
+}
+
+ATF_TC_BODY(fnmatch_characterclass, tc)
+{
+ ATF_CHECK(fnmatch("[[:alnum:]]", "a", 0) == 0);
+ ATF_CHECK(fnmatch("[[:cntrl:]]", "\a", 0) == 0);
+ ATF_CHECK(fnmatch("[[:lower:]]", "a", 0) == 0);
+ ATF_CHECK(fnmatch("[[:space:]]", " ", 0) == 0);
+ ATF_CHECK(fnmatch("[[:alpha:]]", "a", 0) == 0);
+ ATF_CHECK(fnmatch("[[:digit:]]", "0", 0) == 0);
+ ATF_CHECK(fnmatch("[[:print:]]", "a", 0) == 0);
+ ATF_CHECK(fnmatch("[[:upper:]]", "A", 0) == 0);
+ ATF_CHECK(fnmatch("[[:blank:]]", " ", 0) == 0);
+ ATF_CHECK(fnmatch("[[:graph:]]", "a", 0) == 0);
+ ATF_CHECK(fnmatch("[[:punct:]]", ".", 0) == 0);
+ ATF_CHECK(fnmatch("[[:xdigit:]]", "f", 0) == 0);
+
+ /*
+ * POSIX.1, section 9.3.5. states that '[:' and ':]'
+ * should be interpreted as character classes symbol only
+ * when part of a bracket expression.
+ */
+ ATF_CHECK(fnmatch("[:alnum:]", "a", 0) == 0);
+ ATF_CHECK(fnmatch("[:alnum:]", ":", 0) == 0);
+ ATF_CHECK(fnmatch("[:alnum:]", "1", 0) != 0);
+}
+
+ATF_TC(fnmatch_collsym);
+ATF_TC_HEAD(fnmatch_collsym, tc)
+{
+ atf_tc_set_md_var(tc, "descr", "Test fnmatch with collating symbols");
+}
+
+ATF_TC_BODY(fnmatch_collsym, tc)
+{
+ setlocale(LC_ALL, "cs_CZ.UTF-8");
+ ATF_CHECK(fnmatch("[ch]", "ch", 0) != 0);
+ ATF_CHECK(fnmatch("[[.ch.]]", "ch", 0) == 0);
+ ATF_CHECK(fnmatch("[[.ch.]]h", "chh", 0) == 0);
+
+ /*
+ * POSIX.1, section 9.3.5. states that '[.' and '.]'
+ * should be interpreted as a collating symbol only
+ * when part of a bracket expression.
+ */
+ ATF_CHECK(fnmatch("[.ch.]", "c", 0) == 0);
+ ATF_CHECK(fnmatch("[.ch.]", "h", 0) == 0);
+ ATF_CHECK(fnmatch("[.ch.]", ".", 0) == 0);
+}
+
+ATF_TC(fnmatch_equivclass);
+ATF_TC_HEAD(fnmatch_equivclass, tc)
+{
+ atf_tc_set_md_var(tc, "descr", "Test fnmatch with equivalence classes");
+}
+
+ATF_TC_BODY(fnmatch_equivclass, tc)
+{
+ setlocale(LC_ALL, "en_US.UTF-8");
+ ATF_CHECK(fnmatch("[[=a=]]b", "ab", 0) == 0);
+ ATF_CHECK(fnmatch("[[=a=]]b", "Ab", 0) == 0);
+ ATF_CHECK(fnmatch("[[=à=]]b", "ab", 0) == 0);
+ ATF_CHECK(fnmatch("[[=a=]]b", "àb", 0) == 0);
+
+ /*
+ * POSIX.1, section 9.3.5. states that '[=' and '=]'
+ * should be interpreted as an equivalence class only
+ * when part of a bracket expression.
+ */
+ ATF_CHECK(fnmatch("[=a=]b", "=b", 0) == 0);
+ ATF_CHECK(fnmatch("[=a=]b", "ab", 0) == 0);
+}
+
ATF_TP_ADD_TCS(tp)
{
ATF_TP_ADD_TC(tp, fnmatch_test);
+ ATF_TP_ADD_TC(tp, fnmatch_collsym);
+ ATF_TP_ADD_TC(tp, fnmatch_characterclass);
+ ATF_TP_ADD_TC(tp, fnmatch_equivclass);
return (atf_no_error());
}
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Mon, Feb 9, 3:10 PM (1 h, 39 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
28592307
Default Alt Text
D49660.id153247.diff (11 KB)
Attached To
Mode
D49660: fnmatch: Add support for collating symbols, equivalence classes, and character classes
Attached
Detach File
Event Timeline
Log In to Comment