Index: lib/libc/locale/collate.h =================================================================== --- lib/libc/locale/collate.h +++ lib/libc/locale/collate.h @@ -127,7 +127,7 @@ __BEGIN_DECLS int __collate_load_tables(const char *); -int __collate_equiv_value(locale_t, const wchar_t *, size_t); +int __collate_equiv_value(const wchar_t); void _collate_lookup(struct xlocale_collate *,const wchar_t *, int *, int *, int, const int **); int __collate_range_cmp(char, char); Index: lib/libc/locale/collate.c =================================================================== --- lib/libc/locale/collate.c +++ lib/libc/locale/collate.c @@ -659,53 +659,33 @@ /* * __collate_equiv_value returns the primary collation value for the given - * collating symbol specified by str and len. Zero or negative is returned - * if the collating symbol was not found. This function is used by bracket - * code in the TRE regex library. + * collating symbol specified by wc. Zero or negative is returned if the + * collating symbol was not found. */ int -__collate_equiv_value(locale_t locale, const wchar_t *str, size_t len) +__collate_equiv_value(const wchar_t wc) { + locale_t locale; int32_t e; - if (len < 1 || len >= COLLATE_STR_LEN) - return (-1); - + locale = __get_locale(); FIX_LOCALE(locale); struct xlocale_collate *table = (struct xlocale_collate*)locale->components[XLC_COLLATE]; if (table->__collate_load_error) - return ((len == 1 && *str <= UCHAR_MAX) ? *str : -1); - - if (len == 1) { - e = -1; - if (*str <= UCHAR_MAX) - e = table->char_pri_table[*str].pri[0]; - else if (table->info->large_count > 0) { - collate_large_t *match_large; - match_large = largesearch(table, *str); - if (match_large) - e = match_large->pri.pri[0]; - } - if (e == 0) - return (1); - return (e > 0 ? e : 0); - } - if (table->info->chain_count > 0) { - wchar_t name[COLLATE_STR_LEN]; - collate_chain_t *match_chain; - int clen; - - wcsncpy (name, str, len); - name[len] = 0; - match_chain = chainsearch(table, name, &clen); - if (match_chain) { - e = match_chain->pri[0]; - if (e == 0) - return (1); - return (e < 0 ? -e : e); - } + return (wc <= UCHAR_MAX ? wc : -1); + + e = -1; + if (wc <= UCHAR_MAX) + e = table->char_pri_table[wc].pri[0]; + else if (table->info->large_count > 0) { + collate_large_t *match_large; + match_large = largesearch(table, wc); + if (match_large) + e = match_large->pri.pri[0]; } - return (0); + if (e == 0) + return (1); + return (e > 0 ? e : 0); } Index: lib/libc/locale/xlocale_private.h =================================================================== --- lib/libc/locale/xlocale_private.h +++ lib/libc/locale/xlocale_private.h @@ -87,7 +87,7 @@ */ struct xlocale_refcounted { /** Number of references to this component. */ - long retain_count; + u_long retain_count; /** Function used to destroy this component, if one is required*/ void(*destructor)(void*); }; Index: lib/libc/regex/regcomp.c =================================================================== --- lib/libc/regex/regcomp.c +++ lib/libc/regex/regcomp.c @@ -129,7 +129,6 @@ static int p_range_cmp(wchar_t c1, wchar_t c2); static void p_b_term(struct parse *p, cset *cs); static void p_b_cclass(struct parse *p, cset *cs); -static void p_b_eclass(struct parse *p, cset *cs); static wint_t p_b_symbol(struct parse *p); static wint_t p_b_coll_elem(struct parse *p, wint_t endc); static wint_t othercase(wint_t ch); @@ -900,7 +899,8 @@ if (cs->invert && p->g->cflags®_NEWLINE) cs->bmp['\n' >> 3] |= 1 << ('\n' & 7); - if ((ch = singleton(cs)) != OUT) { /* optimize singleton sets */ + /* Optimize singleton set if it's not equivalence-class one */ + if (!cs->cequiv && (ch = singleton(cs)) != OUT) { ordinary(p, ch); freeset(p, cs); } else @@ -966,7 +966,7 @@ (void)REQUIRE(MORE(), REG_EBRACK); c = PEEK(); (void)REQUIRE(c != '-' && c != ']', REG_ECOLLATE); - p_b_eclass(p, cs); + cs->cequiv = __collate_equiv_value(p_b_coll_elem(p, '=')); (void)REQUIRE(MORE(), REG_EBRACK); (void)REQUIRE(EATTWO('=', ']'), REG_ECOLLATE); break; @@ -1032,21 +1032,6 @@ CHaddtype(p, cs, wct); } -/* - - p_b_eclass - parse an equivalence-class name and deal with it - == static void p_b_eclass(struct parse *p, cset *cs); - * - * This implementation is incomplete. xxx - */ -static void -p_b_eclass(struct parse *p, cset *cs) -{ - wint_t c; - - c = p_b_coll_elem(p, '='); - CHadd(p, cs, c); -} - /* - p_b_symbol - parse a character or [..]ed multicharacter collating symbol == static wint_t p_b_symbol(struct parse *p); Index: lib/libc/regex/regex2.h =================================================================== --- lib/libc/regex/regex2.h +++ lib/libc/regex/regex2.h @@ -36,6 +36,8 @@ * $FreeBSD$ */ +#include "../locale/collate.h" + /* * First, the stuff that ends up in the outside-world include file = typedef off_t regoff_t; @@ -122,6 +124,7 @@ unsigned int nranges; int invert; int icase; + int cequiv; } cset; static int @@ -155,6 +158,9 @@ { assert(ch >= 0); + + if (cs->cequiv > 0 && cs->cequiv == __collate_equiv_value(ch)) + return (!cs->invert); if (ch < NC) return (((cs->bmp[ch >> 3] & (1 << (ch & 7))) != 0) ^ cs->invert); Index: lib/libc/tests/regex/Makefile =================================================================== --- lib/libc/tests/regex/Makefile +++ lib/libc/tests/regex/Makefile @@ -3,6 +3,7 @@ PACKAGE= tests # local test cases +ATF_TESTS_SH+= cequiv ATF_TESTS_SH+= multibyte .include "Makefile.inc" Index: lib/libc/tests/regex/cequiv.sh =================================================================== --- /dev/null +++ lib/libc/tests/regex/cequiv.sh @@ -0,0 +1,33 @@ +# $FreeBSD$ + +atf_test_case cequiv +cequiv_head() +{ + atf_set "descr" "Check equivalence-class handling" +} +cequiv_body() +{ + export LC_ALL="en_US.UTF-8" + + printf 'á' | atf_check -o "inline:á" \ + sed -ne '/[[=a=]]/p' + printf 'é' | atf_check -o "inline:é" \ + sed -ne '/[[=e=]]/p' + printf 'è' | atf_check -o "inline:è" \ + sed -ne '/[[=e=]]/p' + printf 'ê' | atf_check -o "inline:ê" \ + sed -ne '/[[=e=]]/p' + printf 'é' | atf_check -o "inline:é" \ + sed -ne '/[[=E=]]/p' + printf 'è' | atf_check -o "inline:è" \ + sed -ne '/[[=E=]]/p' + printf 'ê' | atf_check -o "inline:ê" \ + sed -ne '/[[=E=]]/p' + printf 'ô' | atf_check -o "inline:ô" \ + sed -ne '/[[=o=]]/p' +} + +atf_init_test_cases() +{ + atf_add_test_case cequiv +}