Changeset View
Standalone View
lib/libc/string/strcoll.c
/*- | /*- | ||||
* Copyright 2010 Nexenta Systems, Inc. All rights reserved. | |||||
* Copyright (c) 1995 Alex Tatmanjants <alex@elvisti.kiev.ua> | * Copyright (c) 1995 Alex Tatmanjants <alex@elvisti.kiev.ua> | ||||
* at Electronni Visti IA, Kiev, Ukraine. | * at Electronni Visti IA, Kiev, Ukraine. | ||||
* All rights reserved. | * All rights reserved. | ||||
* | * | ||||
* Copyright (c) 2011 The FreeBSD Foundation | * Copyright (c) 2011 The FreeBSD Foundation | ||||
* All rights reserved. | * All rights reserved. | ||||
* Portions of this software were developed by David Chisnall | * Portions of this software were developed by David Chisnall | ||||
* under sponsorship from the FreeBSD Foundation. | * under sponsorship from the FreeBSD Foundation. | ||||
Show All 20 Lines | |||||
* SUCH DAMAGE. | * SUCH DAMAGE. | ||||
*/ | */ | ||||
#include <sys/cdefs.h> | #include <sys/cdefs.h> | ||||
__FBSDID("$FreeBSD$"); | __FBSDID("$FreeBSD$"); | ||||
#include <stdlib.h> | #include <stdlib.h> | ||||
#include <string.h> | #include <string.h> | ||||
#include <errno.h> | |||||
#include <wchar.h> | |||||
#include "collate.h" | #include "collate.h" | ||||
#include <stdio.h> | |||||
/* | |||||
* In order to properly handle multibyte locales, its easiest to just | |||||
jilles: easiest | |||||
* convert to wide characters and then use wcscoll. However if an | |||||
* error occurs, we gracefully fall back to simple strcmp. Caller | |||||
* should check errno. | |||||
Not Done Inline ActionsHmm, this means that strcoll_l() is not transitive, and therefore not suitable for use as a comparison function for qsort(), for example. One fix is to make strings that cannot be converted to wide characters compare greater than strings that cannot be converted, ordering two strings that cannot be converted by strcmp(). A corresponding change (prepending a byte indicating whether the string could be converted) is needed in strxfrm_l(). jilles: Hmm, this means that strcoll_l() is not transitive, and therefore not suitable for use as a… | |||||
Not Done Inline ActionsSorry but I do not really understand what you mean here? Would it be possible for you to show me an example code that would trigger the issue or even better to directly make that change in the collation branch? bapt: Sorry but I do not really understand what you mean here? Would it be possible for you to show… | |||||
*/ | |||||
int | int | ||||
strcoll_l(const char *s, const char *s2, locale_t locale) | strcoll_l(const char *s, const char *s2, locale_t locale) | ||||
{ | { | ||||
int len, len2, prim, prim2, sec, sec2, ret, ret2; | int ret; | ||||
const char *t, *t2; | wchar_t *t1 = NULL, *t2 = NULL; | ||||
char *tt, *tt2; | wchar_t *w1 = NULL, *w2 = NULL; | ||||
const char *cs1, *cs2; | |||||
mbstate_t mbs1; | |||||
mbstate_t mbs2; | |||||
size_t sz1, sz2; | |||||
memset(&mbs1, 0, sizeof (mbstate_t)); | |||||
memset(&mbs2, 0, sizeof (mbstate_t)); | |||||
/* | |||||
* The mbsrtowcs_l function can set the src pointer to null upon | |||||
* failure, so it should act on a copy to avoid: | |||||
* - sending null pointer to strcmp | |||||
* - having strcoll/strcoll_l change *s or *s2 to null | |||||
*/ | |||||
cs1 = s; | |||||
cs2 = s2; | |||||
FIX_LOCALE(locale); | FIX_LOCALE(locale); | ||||
struct xlocale_collate *table = | struct xlocale_collate *table = | ||||
(struct xlocale_collate*)locale->components[XLC_COLLATE]; | (struct xlocale_collate*)locale->components[XLC_COLLATE]; | ||||
if (table->__collate_load_error) | if (table->__collate_load_error) | ||||
return strcmp(s, s2); | goto error; | ||||
len = len2 = 1; | sz1 = strlen(s) + 1; | ||||
ret = ret2 = 0; | sz2 = strlen(s2) + 1; | ||||
if (table->__collate_substitute_nontrivial) { | |||||
t = tt = __collate_substitute(table, s); | |||||
t2 = tt2 = __collate_substitute(table, s2); | |||||
} else { | |||||
tt = tt2 = NULL; | |||||
t = s; | |||||
t2 = s2; | |||||
} | |||||
while(*t && *t2) { | |||||
prim = prim2 = 0; | |||||
while(*t && !prim) { | |||||
__collate_lookup(table, t, &len, &prim, &sec); | |||||
t += len; | |||||
} | |||||
while(*t2 && !prim2) { | |||||
__collate_lookup(table, t2, &len2, &prim2, &sec2); | |||||
t2 += len2; | |||||
} | |||||
if(!prim || !prim2) | |||||
break; | |||||
if(prim != prim2) { | |||||
ret = prim - prim2; | |||||
goto end; | |||||
} | |||||
if(!ret2) | |||||
ret2 = sec - sec2; | |||||
} | |||||
if(!*t && *t2) | |||||
ret = -(int)((u_char)*t2); | |||||
else if(*t && !*t2) | |||||
ret = (u_char)*t; | |||||
else if(!*t && !*t2) | |||||
ret = ret2; | |||||
end: | |||||
free(tt); | |||||
free(tt2); | |||||
return ret; | /* | ||||
* Simple assumption: conversion to wide format is strictly | |||||
* reducing, i.e. a single byte (or multibyte character) | |||||
* cannot result in multiple wide characters. | |||||
*/ | |||||
if ((t1 = malloc(sz1 * sizeof (wchar_t))) == NULL) | |||||
goto error; | |||||
w1 = t1; | |||||
if ((t2 = malloc(sz2 * sizeof (wchar_t))) == NULL) | |||||
goto error; | |||||
w2 = t2; | |||||
if ((mbsrtowcs_l(w1, &cs1, sz1, &mbs1, locale)) == (size_t)-1) | |||||
goto error; | |||||
if ((mbsrtowcs_l(w2, &cs2, sz2, &mbs2, locale)) == (size_t)-1) | |||||
goto error; | |||||
ret = wcscoll_l(w1, w2, locale); | |||||
free(t1); | |||||
free(t2); | |||||
return (ret); | |||||
error: | |||||
free(t1); | |||||
free(t2); | |||||
return (strcmp(s, s2)); | |||||
} | } | ||||
int | int | ||||
Done Inline Actionsfree(NULL) does nothing and I think it is OK to rely on that. Unless Illumos code style is different, I think the ifs should be removed. The ifs in the normal path are always true, even. jilles: `free(NULL)` does nothing and I think it is OK to rely on that. Unless Illumos code style is… | |||||
strcoll(const char *s, const char *s2) | strcoll(const char *s, const char *s2) | ||||
{ | { | ||||
return strcoll_l(s, s2, __get_locale()); | return strcoll_l(s, s2, __get_locale()); | ||||
} | } | ||||
easiest