Index: stable/11/lib/libc/string/wcscoll.c =================================================================== --- stable/11/lib/libc/string/wcscoll.c (revision 317646) +++ stable/11/lib/libc/string/wcscoll.c (revision 317647) @@ -1,223 +1,227 @@ /*- - * Copyright 2010 Nexenta Systems, Inc. All rights reserved. + * Copyright 2017 Nexenta Systems, Inc. * Copyright (c) 2002 Tim J. Robbins * All rights reserved. * * Copyright (c) 2011 The FreeBSD Foundation * All rights reserved. * Portions of this software were developed by David Chisnall * under sponsorship from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include "collate.h" int wcscoll_l(const wchar_t *ws1, const wchar_t *ws2, locale_t locale) { - int len1, len2, pri1, pri2, ret; + int len1, len2, pri1, pri2; wchar_t *tr1 = NULL, *tr2 = NULL; int direc, pass; + int ret = wcscmp(ws1, ws2); FIX_LOCALE(locale); struct xlocale_collate *table = (struct xlocale_collate*)locale->components[XLC_COLLATE]; - if (table->__collate_load_error) - /* - * Locale has no special collating order or could not be - * loaded, do a fast binary comparison. - */ - return (wcscmp(ws1, ws2)); + if (table->__collate_load_error || ret == 0) + return (ret); - ret = 0; + if (*ws1 == 0 && *ws2 != 0) + return (-1); + if (*ws1 != 0 && *ws2 == 0) + return (1); /* * Once upon a time we had code to try to optimize this, but * it turns out that you really can't make many assumptions * safely. You absolutely have to run this pass by pass, * because some passes will be ignored for a given character, * while others will not. Simpler locales will benefit from * having fewer passes, and most comparisons should resolve * during the primary pass anyway. * * Note that we do one final extra pass at the end to pick * up UNDEFINED elements. There is special handling for them. */ for (pass = 0; pass <= table->info->directive_count; pass++) { const int32_t *st1 = NULL; const int32_t *st2 = NULL; const wchar_t *w1 = ws1; const wchar_t *w2 = ws2; - int check1, check2; /* special pass for UNDEFINED */ if (pass == table->info->directive_count) { - direc = DIRECTIVE_FORWARD | DIRECTIVE_UNDEFINED; + direc = DIRECTIVE_FORWARD; } else { direc = table->info->directive[pass]; } if (direc & DIRECTIVE_BACKWARD) { wchar_t *bp, *fp, c; + free(tr1); if ((tr1 = wcsdup(w1)) == NULL) - goto fail; + goto end; bp = tr1; fp = tr1 + wcslen(tr1) - 1; while (bp < fp) { c = *bp; *bp++ = *fp; *fp-- = c; } + free(tr2); if ((tr2 = wcsdup(w2)) == NULL) - goto fail; + goto end; bp = tr2; fp = tr2 + wcslen(tr2) - 1; while (bp < fp) { c = *bp; *bp++ = *fp; *fp-- = c; } w1 = tr1; w2 = tr2; } if (direc & DIRECTIVE_POSITION) { + int check1, check2; while (*w1 && *w2) { pri1 = pri2 = 0; check1 = check2 = 1; while ((pri1 == pri2) && (check1 || check2)) { if (check1) { _collate_lookup(table, w1, &len1, &pri1, pass, &st1); if (pri1 < 0) { errno = EINVAL; - goto fail; + goto end; } if (!pri1) { pri1 = COLLATE_MAX_PRIORITY; st1 = NULL; } check1 = (st1 != NULL); } if (check2) { _collate_lookup(table, w2, &len2, &pri2, pass, &st2); if (pri2 < 0) { errno = EINVAL; - goto fail; + goto end; } if (!pri2) { pri2 = COLLATE_MAX_PRIORITY; st2 = NULL; } check2 = (st2 != NULL); } } if (pri1 != pri2) { ret = pri1 - pri2; goto end; } w1 += len1; w2 += len2; } + if (!*w1) { + if (*w2) { + ret = -(int)*w2; + goto end; + } + } else { + ret = *w1; + goto end; + } } else { - while (*w1 && *w2) { - pri1 = pri2 = 0; - check1 = check2 = 1; - while ((pri1 == pri2) && (check1 || check2)) { - while (check1 && *w1) { - _collate_lookup(table, w1, - &len1, &pri1, pass, &st1); - if (pri1 > 0) - break; - if (pri1 < 0) { - errno = EINVAL; - goto fail; - } - st1 = NULL; - w1 += 1; + int vpri1 = 0, vpri2 = 0; + while (*w1 || *w2 || st1 || st2) { + pri1 = 1; + while (*w1 || st1) { + _collate_lookup(table, w1, &len1, &pri1, + pass, &st1); + w1 += len1; + if (pri1 > 0) { + vpri1++; + break; } - check1 = (st1 != NULL); - while (check2 && *w2) { - _collate_lookup(table, w2, - &len2, &pri2, pass, &st2); - if (pri2 > 0) - break; - if (pri2 < 0) { - errno = EINVAL; - goto fail; - } - st2 = NULL; - w2 += 1; + + if (pri1 < 0) { + errno = EINVAL; + goto end; } - check2 = (st2 != NULL); - if (!pri1 || !pri2) + st1 = NULL; + } + pri2 = 1; + while (*w2 || st2) { + _collate_lookup(table, w2, &len2, &pri2, + pass, &st2); + w2 += len2; + if (pri2 > 0) { + vpri2++; break; + } + if (pri2 < 0) { + errno = EINVAL; + goto end; + } + st2 = NULL; } - if (!pri1 || !pri2) + if ((!pri1 || !pri2) && (vpri1 == vpri2)) break; if (pri1 != pri2) { ret = pri1 - pri2; goto end; } - w1 += len1; - w2 += len2; } - } - if (!*w1) { - if (*w2) { - ret = -(int)*w2; + if (vpri1 && !vpri2) { + ret = 1; goto end; } - } else { - ret = *w1; - goto end; + if (!vpri1 && vpri2) { + ret = -1; + goto end; + } } } ret = 0; end: free(tr1); free(tr2); return (ret); - -fail: - ret = wcscmp(ws1, ws2); - goto end; } int wcscoll(const wchar_t *ws1, const wchar_t *ws2) { return wcscoll_l(ws1, ws2, __get_locale()); } Index: stable/11/lib/libc/tests/string/wcscoll_test.c =================================================================== --- stable/11/lib/libc/tests/string/wcscoll_test.c (revision 317646) +++ stable/11/lib/libc/tests/string/wcscoll_test.c (revision 317647) @@ -1,63 +1,156 @@ /*- * Copyright (c) 2016 Baptiste Daroussin + * Copyright 2016 Tom Lane + * Copyright 2017 Nexenta Systems, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include +#include +#include #include static int cmp(const void *a, const void *b) { const wchar_t wa[2] = { *(const wchar_t *)a, 0 }; const wchar_t wb[2] = { *(const wchar_t *)b, 0 }; return (wcscoll(wa, wb)); } ATF_TC_WITHOUT_HEAD(russian_collation); ATF_TC_BODY(russian_collation, tc) { wchar_t c[] = L"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzЁАБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯабвгдежзийклмнопрстуфхцчшщъыьэюяё"; wchar_t res[] = L"aAbBcCdDeEfFgGhHiIjJkKlLmMnNoOpPqQrRsStTuUvVwWxXyYzZаАбБвВгГдДеЕёЁжЖзЗиИйЙкКлЛмМнНоОпПрРсСтТуУфФхХцЦчЧшШщЩъЪыЫьЬэЭюЮяЯ"; ATF_CHECK_MSG(setlocale(LC_ALL, "ru_RU.UTF-8") != NULL, "Fail to set locale to \"ru_RU.UTF-8\""); qsort(c, wcslen(c), sizeof(wchar_t), cmp); ATF_CHECK_MSG(wcscmp(c, res) == 0, "Bad collation, expected: '%ls' got '%ls'", res, c); } +#define NSTRINGS 2000 +#define MAXSTRLEN 20 +#define MAXXFRMLEN (MAXSTRLEN * 20) + +typedef struct { + char sval[MAXSTRLEN]; + char xval[MAXXFRMLEN]; +} cstr; + +ATF_TC_WITHOUT_HEAD(strcoll_vs_strxfrm); +ATF_TC_BODY(strcoll_vs_strxfrm, tc) +{ + cstr data[NSTRINGS]; + char *curloc; + int i, j; + + curloc = setlocale(LC_ALL, "en_US.UTF-8"); + ATF_CHECK_MSG(curloc != NULL, "Fail to set locale"); + + /* Ensure new random() values on every run */ + srandom((unsigned int) time(NULL)); + + /* Generate random UTF8 strings of length less than MAXSTRLEN bytes */ + for (i = 0; i < NSTRINGS; i++) { + char *p; + int len; + +again: + p = data[i].sval; + len = 1 + (random() % (MAXSTRLEN - 1)); + while (len > 0) { + int c; + /* + * Generate random printable char in ISO8859-1 range. + * Bias towards producing a lot of spaces. + */ + + if ((random() % 16) < 3) { + c = ' '; + } else { + do { + c = random() & 0xFF; + } while (!((c >= ' ' && c <= 127) || + (c >= 0xA0 && c <= 0xFF))); + } + + if (c <= 127) { + *p++ = c; + len--; + } else { + if (len < 2) + break; + /* Poor man's utf8-ification */ + *p++ = 0xC0 + (c >> 6); + len--; + *p++ = 0x80 + (c & 0x3F); + len--; + } + } + *p = '\0'; + /* strxfrm() each string as we produce it */ + errno = 0; + ATF_CHECK_MSG(strxfrm(data[i].xval, data[i].sval, + MAXXFRMLEN) < MAXXFRMLEN, "strxfrm() result for %d-length " + " string exceeded %d bytes", (int)strlen(data[i].sval), + MAXXFRMLEN); + + /* + * Amend strxfrm() failing on certain characters to be fixed and + * test later + */ + if (errno != 0) + goto again; + } + + for (i = 0; i < NSTRINGS; i++) { + for (j = 0; j < NSTRINGS; j++) { + int sr = strcoll(data[i].sval, data[j].sval); + int sx = strcmp(data[i].xval, data[j].xval); + + ATF_CHECK_MSG(!((sr * sx < 0) || + (sr * sx == 0 && sr + sx != 0)), + "%s: diff for \"%s\" and \"%s\"", + curloc, data[i].sval, data[j].sval); + } + } +} + ATF_TP_ADD_TCS(tp) { ATF_TP_ADD_TC(tp, russian_collation); + ATF_TP_ADD_TC(tp, strcoll_vs_strxfrm); return (atf_no_error()); } Index: stable/11 =================================================================== --- stable/11 (revision 317646) +++ stable/11 (revision 317647) Property changes on: stable/11 ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head:r317034