Page Menu
Home
FreeBSD
Search
Configure Global Search
Log In
Files
F134566275
D18531.id52464.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
6 KB
Referenced Files
None
Subscribers
None
D18531.id52464.diff
View Options
Index: lib/libc/locale/Symbol.map
===================================================================
--- lib/libc/locale/Symbol.map
+++ lib/libc/locale/Symbol.map
@@ -212,6 +212,7 @@
FBSDprivate_1.0 {
_PathLocale;
__detect_path_locale;
+ __collate_equiv_value;
__collate_load_error;
__collate_range_cmp;
};
Index: lib/libc/locale/collate.h
===================================================================
--- lib/libc/locale/collate.h
+++ lib/libc/locale/collate.h
@@ -127,7 +127,7 @@
__BEGIN_DECLS
int __collate_load_tables(const char *);
-int __collate_equiv_value(locale_t, const wchar_t *, size_t);
+int __collate_equiv_value(const wchar_t);
void _collate_lookup(struct xlocale_collate *,const wchar_t *, int *, int *,
int, const int **);
int __collate_range_cmp(char, char);
Index: lib/libc/locale/collate.c
===================================================================
--- lib/libc/locale/collate.c
+++ lib/libc/locale/collate.c
@@ -659,53 +659,33 @@
/*
* __collate_equiv_value returns the primary collation value for the given
- * collating symbol specified by str and len. Zero or negative is returned
- * if the collating symbol was not found. This function is used by bracket
- * code in the TRE regex library.
+ * collating symbol specified by wc. Zero or negative is returned if the
+ * collating symbol was not found.
*/
int
-__collate_equiv_value(locale_t locale, const wchar_t *str, size_t len)
+__collate_equiv_value(const wchar_t wc)
{
+ locale_t locale;
int32_t e;
- if (len < 1 || len >= COLLATE_STR_LEN)
- return (-1);
-
+ locale = __get_locale();
FIX_LOCALE(locale);
struct xlocale_collate *table =
(struct xlocale_collate*)locale->components[XLC_COLLATE];
if (table->__collate_load_error)
- return ((len == 1 && *str <= UCHAR_MAX) ? *str : -1);
-
- if (len == 1) {
- e = -1;
- if (*str <= UCHAR_MAX)
- e = table->char_pri_table[*str].pri[0];
- else if (table->info->large_count > 0) {
- collate_large_t *match_large;
- match_large = largesearch(table, *str);
- if (match_large)
- e = match_large->pri.pri[0];
- }
- if (e == 0)
- return (1);
- return (e > 0 ? e : 0);
- }
- if (table->info->chain_count > 0) {
- wchar_t name[COLLATE_STR_LEN];
- collate_chain_t *match_chain;
- int clen;
-
- wcsncpy (name, str, len);
- name[len] = 0;
- match_chain = chainsearch(table, name, &clen);
- if (match_chain) {
- e = match_chain->pri[0];
- if (e == 0)
- return (1);
- return (e < 0 ? -e : e);
- }
+ return (wc <= UCHAR_MAX ? wc : -1);
+
+ e = -1;
+ if (wc <= UCHAR_MAX)
+ e = table->char_pri_table[wc].pri[0];
+ else if (table->info->large_count > 0) {
+ collate_large_t *match_large;
+ match_large = largesearch(table, wc);
+ if (match_large)
+ e = match_large->pri.pri[0];
}
- return (0);
+ if (e == 0)
+ return (1);
+ return (e > 0 ? e : 0);
}
Index: lib/libc/regex/regcomp.c
===================================================================
--- lib/libc/regex/regcomp.c
+++ lib/libc/regex/regcomp.c
@@ -129,7 +129,6 @@
static int p_range_cmp(wchar_t c1, wchar_t c2);
static void p_b_term(struct parse *p, cset *cs);
static void p_b_cclass(struct parse *p, cset *cs);
-static void p_b_eclass(struct parse *p, cset *cs);
static wint_t p_b_symbol(struct parse *p);
static wint_t p_b_coll_elem(struct parse *p, wint_t endc);
static wint_t othercase(wint_t ch);
@@ -900,7 +899,8 @@
if (cs->invert && p->g->cflags®_NEWLINE)
cs->bmp['\n' >> 3] |= 1 << ('\n' & 7);
- if ((ch = singleton(cs)) != OUT) { /* optimize singleton sets */
+ /* Optimize singleton set if it's not equivalence-class one */
+ if (!cs->cequiv && (ch = singleton(cs)) != OUT) {
ordinary(p, ch);
freeset(p, cs);
} else
@@ -966,7 +966,7 @@
(void)REQUIRE(MORE(), REG_EBRACK);
c = PEEK();
(void)REQUIRE(c != '-' && c != ']', REG_ECOLLATE);
- p_b_eclass(p, cs);
+ cs->cequiv = __collate_equiv_value(p_b_coll_elem(p, '='));
(void)REQUIRE(MORE(), REG_EBRACK);
(void)REQUIRE(EATTWO('=', ']'), REG_ECOLLATE);
break;
@@ -1032,21 +1032,6 @@
CHaddtype(p, cs, wct);
}
-/*
- - p_b_eclass - parse an equivalence-class name and deal with it
- == static void p_b_eclass(struct parse *p, cset *cs);
- *
- * This implementation is incomplete. xxx
- */
-static void
-p_b_eclass(struct parse *p, cset *cs)
-{
- wint_t c;
-
- c = p_b_coll_elem(p, '=');
- CHadd(p, cs, c);
-}
-
/*
- p_b_symbol - parse a character or [..]ed multicharacter collating symbol
== static wint_t p_b_symbol(struct parse *p);
Index: lib/libc/regex/regex2.h
===================================================================
--- lib/libc/regex/regex2.h
+++ lib/libc/regex/regex2.h
@@ -36,6 +36,8 @@
* $FreeBSD$
*/
+extern int __collate_equiv_value(const wchar_t);
+
/*
* First, the stuff that ends up in the outside-world include file
= typedef off_t regoff_t;
@@ -122,6 +124,7 @@
unsigned int nranges;
int invert;
int icase;
+ int cequiv;
} cset;
static int
@@ -155,6 +158,9 @@
{
assert(ch >= 0);
+
+ if (cs->cequiv > 0 && cs->cequiv == __collate_equiv_value(ch))
+ return (!cs->invert);
if (ch < NC)
return (((cs->bmp[ch >> 3] & (1 << (ch & 7))) != 0) ^
cs->invert);
Index: lib/libc/tests/regex/Makefile
===================================================================
--- lib/libc/tests/regex/Makefile
+++ lib/libc/tests/regex/Makefile
@@ -3,6 +3,7 @@
PACKAGE= tests
# local test cases
+ATF_TESTS_SH+= cequiv
ATF_TESTS_SH+= multibyte
.include "Makefile.inc"
Index: lib/libc/tests/regex/cequiv.sh
===================================================================
--- /dev/null
+++ lib/libc/tests/regex/cequiv.sh
@@ -0,0 +1,33 @@
+# $FreeBSD$
+
+atf_test_case cequiv
+cequiv_head()
+{
+ atf_set "descr" "Check equivalence-class handling"
+}
+cequiv_body()
+{
+ export LC_ALL="en_US.UTF-8"
+
+ printf 'á' | atf_check -o "inline:á" \
+ sed -ne '/[[=a=]]/p'
+ printf 'é' | atf_check -o "inline:é" \
+ sed -ne '/[[=e=]]/p'
+ printf 'è' | atf_check -o "inline:è" \
+ sed -ne '/[[=e=]]/p'
+ printf 'ê' | atf_check -o "inline:ê" \
+ sed -ne '/[[=e=]]/p'
+ printf 'é' | atf_check -o "inline:é" \
+ sed -ne '/[[=E=]]/p'
+ printf 'è' | atf_check -o "inline:è" \
+ sed -ne '/[[=E=]]/p'
+ printf 'ê' | atf_check -o "inline:ê" \
+ sed -ne '/[[=E=]]/p'
+ printf 'ô' | atf_check -o "inline:ô" \
+ sed -ne '/[[=o=]]/p'
+}
+
+atf_init_test_cases()
+{
+ atf_add_test_case cequiv
+}
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Tue, Nov 4, 6:19 AM (16 h, 34 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
24736621
Default Alt Text
D18531.id52464.diff (6 KB)
Attached To
Mode
D18531: PR208117: regex(3): implement equivalence classes
Attached
Detach File
Event Timeline
Log In to Comment