Index: projects/collation/lib/libc/locale/ascii.c =================================================================== --- projects/collation/lib/libc/locale/ascii.c (revision 286458) +++ projects/collation/lib/libc/locale/ascii.c (nonexistent) @@ -1,192 +0,0 @@ -/*- - * Copyright (c) 2002-2004 Tim J. Robbins. All rights reserved. - * Copyright (c) 1993 - * The Regents of the University of California. All rights reserved. - * - * This code is derived from software contributed to Berkeley by - * Paul Borman at Krystal Technologies. - * - * Copyright (c) 2011 The FreeBSD Foundation - * All rights reserved. - * Portions of this software were developed by David Chisnall - * under sponsorship from the FreeBSD Foundation. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include -__FBSDID("$FreeBSD$"); - -#include -#include -#include -#include -#include -#include -#include -#include -#include "mblocal.h" - -static size_t _ascii_mbrtowc(wchar_t * __restrict, const char * __restrict, - size_t, mbstate_t * __restrict); -static int _ascii_mbsinit(const mbstate_t *); -static size_t _ascii_mbsnrtowcs(wchar_t * __restrict dst, - const char ** __restrict src, size_t nms, size_t len, - mbstate_t * __restrict ps __unused); -static size_t _ascii_wcrtomb(char * __restrict, wchar_t, - mbstate_t * __restrict); -static size_t _ascii_wcsnrtombs(char * __restrict, const wchar_t ** __restrict, - size_t, size_t, mbstate_t * __restrict); - -int -_ascii_init(struct xlocale_ctype *l,_RuneLocale *rl) -{ - - l->__mbrtowc = _ascii_mbrtowc; - l->__mbsinit = _ascii_mbsinit; - l->__mbsnrtowcs = _ascii_mbsnrtowcs; - l->__wcrtomb = _ascii_wcrtomb; - l->__wcsnrtombs = _ascii_wcsnrtombs; - l->runes = rl; - l->__mb_cur_max = 1; - l->__mb_sb_limit = 128; - return(0); -} - -static int -_ascii_mbsinit(const mbstate_t *ps __unused) -{ - - /* - * Encoding is not state dependent - we are always in the - * initial state. - */ - return (1); -} - -static size_t -_ascii_mbrtowc(wchar_t * __restrict pwc, const char * __restrict s, size_t n, - mbstate_t * __restrict ps __unused) -{ - - if (s == NULL) - /* Reset to initial shift state (no-op) */ - return (0); - if (n == 0) - /* Incomplete multibyte sequence */ - return ((size_t)-2); - if (*s & 0x80) { - errno = EILSEQ; - return ((size_t)-1); - } - if (pwc != NULL) - *pwc = (unsigned char)*s; - return (*s == '\0' ? 0 : 1); -} - -static size_t -_ascii_wcrtomb(char * __restrict s, wchar_t wc, - mbstate_t * __restrict ps __unused) -{ - - if (s == NULL) - /* Reset to initial shift state (no-op) */ - return (1); - if (wc < 0 || wc > 127) { - errno = EILSEQ; - return ((size_t)-1); - } - *s = (unsigned char)wc; - return (1); -} - -static size_t -_ascii_mbsnrtowcs(wchar_t * __restrict dst, const char ** __restrict src, - size_t nms, size_t len, mbstate_t * __restrict ps __unused) -{ - const char *s; - size_t nchr; - - if (dst == NULL) { - for (s = *src; nms > 0 && *s != '\0'; s++, nms--) { - if (*s & 0x80) { - errno = EILSEQ; - return ((size_t)-1); - } - } - return (s - *src); - } - - s = *src; - nchr = 0; - while (len-- > 0 && nms-- > 0) { - if (*s & 0x80) { - errno = EILSEQ; - return ((size_t)-1); - } - if ((*dst++ = (unsigned char)*s++) == L'\0') { - *src = NULL; - return (nchr); - } - nchr++; - } - *src = s; - return (nchr); -} - -static size_t -_ascii_wcsnrtombs(char * __restrict dst, const wchar_t ** __restrict src, - size_t nwc, size_t len, mbstate_t * __restrict ps __unused) -{ - const wchar_t *s; - size_t nchr; - - if (dst == NULL) { - for (s = *src; nwc > 0 && *s != L'\0'; s++, nwc--) { - if (*s < 0 || *s > 127) { - errno = EILSEQ; - return ((size_t)-1); - } - } - return (s - *src); - } - - s = *src; - nchr = 0; - while (len-- > 0 && nwc-- > 0) { - if (*s < 0 || *s > 127) { - errno = EILSEQ; - return ((size_t)-1); - } - if ((*dst++ = *s++) == '\0') { - *src = NULL; - return (nchr); - } - nchr++; - } - *src = s; - return (nchr); -} - Property changes on: projects/collation/lib/libc/locale/ascii.c ___________________________________________________________________ Deleted: svn:keywords ## -1 +0,0 ## -FreeBSD=%H \ No newline at end of property Index: projects/collation/lib/libc/locale/Makefile.inc =================================================================== --- projects/collation/lib/libc/locale/Makefile.inc (revision 286458) +++ projects/collation/lib/libc/locale/Makefile.inc (revision 286459) @@ -1,89 +1,89 @@ # from @(#)Makefile.inc 8.1 (Berkeley) 6/4/93 # $FreeBSD$ # locale sources .PATH: ${LIBC_SRCTOP}/${LIBC_ARCH}/locale ${LIBC_SRCTOP}/locale -SRCS+= ascii.c big5.c btowc.c collate.c collcmp.c euc.c fix_grouping.c \ +SRCS+= big5.c btowc.c collate.c collcmp.c euc.c fix_grouping.c \ gb18030.c gb2312.c gbk.c ctype.c isctype.c iswctype.c \ ldpart.c lmessages.c lmonetary.c lnumeric.c localeconv.c mblen.c \ mbrlen.c \ mbrtowc.c mbsinit.c mbsnrtowcs.c \ mbsrtowcs.c mbtowc.c mbstowcs.c \ mskanji.c nextwctype.c nl_langinfo.c nomacros.c none.c rpmatch.c \ rune.c \ runetype.c setlocale.c setrunelocale.c \ table.c \ tolower.c toupper.c utf8.c wcrtomb.c wcsnrtombs.c \ wcsrtombs.c wcsftime.c \ wcstof.c wcstod.c \ wcstoimax.c wcstol.c wcstold.c wcstoll.c \ wcstombs.c \ wcstoul.c wcstoull.c wcstoumax.c wctob.c wctomb.c wctrans.c wctype.c \ wcwidth.c\ xlocale.c .if ${MK_ICONV} != "no" SRCS+= c16rtomb_iconv.c c32rtomb_iconv.c mbrtoc16_iconv.c mbrtoc32_iconv.c .else SRCS+= c16rtomb.c c32rtomb.c mbrtoc16.c mbrtoc32.c .endif SYM_MAPS+=${LIBC_SRCTOP}/locale/Symbol.map MAN+= btowc.3 \ ctype_l.3 \ ctype.3 digittoint.3 isalnum.3 isalpha.3 isascii.3 isblank.3 iscntrl.3 \ isdigit.3 isgraph.3 isideogram.3 islower.3 isphonogram.3 isprint.3 \ ispunct.3 isrune.3 isspace.3 isspecial.3 \ isupper.3 iswalnum.3 iswalnum_l.3 isxdigit.3 \ localeconv.3 mblen.3 mbrlen.3 \ mbrtowc.3 \ mbsinit.3 \ mbsrtowcs.3 mbstowcs.3 mbtowc.3 multibyte.3 \ nextwctype.3 nl_langinfo.3 rpmatch.3 \ setlocale.3 toascii.3 tolower.3 toupper.3 towlower.3 towupper.3 \ wcsftime.3 \ wcrtomb.3 \ wcsrtombs.3 wcstod.3 wcstol.3 wcstombs.3 wctomb.3 \ wctrans.3 wctype.3 wcwidth.3 \ duplocale.3 freelocale.3 newlocale.3 querylocale.3 uselocale.3 xlocale.3 MAN+= big5.5 euc.5 gb18030.5 gb2312.5 gbk.5 mskanji.5 utf8.5 MLINKS+=btowc.3 wctob.3 MLINKS+=isdigit.3 isnumber.3 MLINKS+=isgraph.3 isgraph_l.3 MLINKS+=islower.3 islower_l.3 MLINKS+=ispunct.3 ispunct_l.3 MLINKS+=isspace.3 isspace_l.3 MLINKS+=nl_langinfo.3 nl_langinfo_l.3 MLINKS+=iswalnum.3 iswalpha.3 iswalnum.3 iswascii.3 iswalnum.3 iswblank.3 \ iswalnum.3 iswcntrl.3 iswalnum.3 iswdigit.3 iswalnum.3 iswgraph.3 \ iswalnum.3 iswhexnumber.3 \ iswalnum.3 iswideogram.3 iswalnum.3 iswlower.3 iswalnum.3 iswnumber.3 \ iswalnum.3 iswphonogram.3 iswalnum.3 iswprint.3 iswalnum.3 iswpunct.3 \ iswalnum.3 iswrune.3 iswalnum.3 iswspace.3 iswalnum.3 iswspecial.3 \ iswalnum.3 iswupper.3 iswalnum.3 iswxdigit.3 MLINKS+=iswalnum_l.3 iswalpha_l.3 iswalnum_l.3 iswcntrl_l.3 \ iswalnum_l.3 iswctype_l.3 iswalnum_l.3 iswdigit_l.3 \ iswalnum_l.3 iswgraph_l.3 iswalnum_l.3 iswlower_l.3 \ iswalnum_l.3 iswprint_l.3 iswalnum_l.3 iswpunct_l.3 \ iswalnum_l.3 iswspace_l.3 iswalnum_l.3 iswupper_l.3 \ iswalnum_l.3 iswxdigit_l.3 iswalnum_l.3 towlower_l.3 \ iswalnum_l.3 towupper_l.3 iswalnum_l.3 wctype_l.3 \ iswalnum_l.3 iswblank_l.3 iswalnum_l.3 iswhexnumber_l.3 \ iswalnum_l.3 iswideogram_l.3 iswalnum_l.3 iswnumber_l.3 \ iswalnum_l.3 iswphonogram_l.3 iswalnum_l.3 iswrune_l.3 \ iswalnum_l.3 iswspecial_l.3 iswalnum_l.3 nextwctype_l.3 \ iswalnum_l.3 towctrans_l.3 iswalnum_l.3 wctrans_l.3 MLINKS+=isxdigit.3 ishexnumber.3 MLINKS+=mbrtowc.3 mbrtoc16.3 mbrtowc.3 mbrtoc32.3 MLINKS+=mbsrtowcs.3 mbsnrtowcs.3 MLINKS+=wcrtomb.3 c16rtomb.3 wcrtomb.3 c32rtomb.3 MLINKS+=wcsrtombs.3 wcsnrtombs.3 MLINKS+=wcstod.3 wcstof.3 wcstod.3 wcstold.3 MLINKS+=wcstol.3 wcstoul.3 wcstol.3 wcstoll.3 wcstol.3 wcstoull.3 \ wcstol.3 wcstoimax.3 wcstol.3 wcstoumax.3 MLINKS+=wctrans.3 towctrans.3 MLINKS+=wctype.3 iswctype.3 Index: projects/collation/lib/libc/locale/big5.c =================================================================== --- projects/collation/lib/libc/locale/big5.c (revision 286458) +++ projects/collation/lib/libc/locale/big5.c (revision 286459) @@ -1,180 +1,200 @@ /*- + * Copyright 2013 Garrett D'Amore + * Copyright 2010 Nexenta Systems, Inc. All rights reserved. * Copyright (c) 2002-2004 Tim J. Robbins. All rights reserved. * Copyright (c) 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Paul Borman at Krystal Technologies. * * Copyright (c) 2011 The FreeBSD Foundation * All rights reserved. * Portions of this software were developed by David Chisnall * under sponsorship from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors + * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #if defined(LIBC_SCCS) && !defined(lint) static char sccsid[] = "@(#)big5.c 8.1 (Berkeley) 6/4/93"; #endif /* LIBC_SCCS and not lint */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include "mblocal.h" extern int __mb_sb_limit; static size_t _BIG5_mbrtowc(wchar_t * __restrict, const char * __restrict, size_t, mbstate_t * __restrict); static int _BIG5_mbsinit(const mbstate_t *); static size_t _BIG5_wcrtomb(char * __restrict, wchar_t, mbstate_t * __restrict); +static size_t _BIG5_mbsnrtowcs(wchar_t * __restrict, + const char ** __restrict, size_t, size_t, + mbstate_t * __restrict); +static size_t _BIG5_wcsnrtombs(char * __restrict, + const wchar_t ** __restrict, size_t, size_t, + mbstate_t * __restrict); typedef struct { wchar_t ch; } _BIG5State; int _BIG5_init(struct xlocale_ctype *l, _RuneLocale *rl) { l->__mbrtowc = _BIG5_mbrtowc; l->__wcrtomb = _BIG5_wcrtomb; + l->__mbsnrtowcs = _BIG5_mbsnrtowcs; + l->__wcsnrtombs = _BIG5_wcsnrtombs; l->__mbsinit = _BIG5_mbsinit; l->runes = rl; l->__mb_cur_max = 2; l->__mb_sb_limit = 128; return (0); } static int _BIG5_mbsinit(const mbstate_t *ps) { return (ps == NULL || ((const _BIG5State *)ps)->ch == 0); } static __inline int _big5_check(u_int c) { c &= 0xff; return ((c >= 0xa1 && c <= 0xfe) ? 2 : 1); } static size_t _BIG5_mbrtowc(wchar_t * __restrict pwc, const char * __restrict s, size_t n, mbstate_t * __restrict ps) { _BIG5State *bs; wchar_t wc; size_t len; bs = (_BIG5State *)ps; if ((bs->ch & ~0xFF) != 0) { /* Bad conversion state. */ errno = EINVAL; return ((size_t)-1); } if (s == NULL) { s = ""; n = 1; pwc = NULL; } if (n == 0) /* Incomplete multibyte sequence */ return ((size_t)-2); if (bs->ch != 0) { if (*s == '\0') { errno = EILSEQ; return ((size_t)-1); } wc = (bs->ch << 8) | (*s & 0xFF); if (pwc != NULL) *pwc = wc; bs->ch = 0; return (1); } len = (size_t)_big5_check(*s); wc = *s++ & 0xff; if (len == 2) { if (n < 2) { /* Incomplete multibyte sequence */ bs->ch = wc; return ((size_t)-2); } if (*s == '\0') { errno = EILSEQ; return ((size_t)-1); } wc = (wc << 8) | (*s++ & 0xff); if (pwc != NULL) *pwc = wc; - return (2); + return (2); } else { if (pwc != NULL) *pwc = wc; return (wc == L'\0' ? 0 : 1); } } static size_t _BIG5_wcrtomb(char * __restrict s, wchar_t wc, mbstate_t * __restrict ps) { _BIG5State *bs; bs = (_BIG5State *)ps; if (bs->ch != 0) { errno = EINVAL; return ((size_t)-1); } if (s == NULL) /* Reset to initial shift state (no-op) */ return (1); if (wc & 0x8000) { *s++ = (wc >> 8) & 0xff; *s = wc & 0xff; return (2); } *s = wc & 0xff; return (1); +} + +static size_t +_BIG5_mbsnrtowcs(wchar_t * __restrict dst, const char ** __restrict src, + size_t nms, size_t len, mbstate_t * __restrict ps) +{ + return (__mbsnrtowcs_std(dst, src, nms, len, ps, _BIG5_mbrtowc)); +} + +static size_t +_BIG5_wcsnrtombs(char * __restrict dst, const wchar_t ** __restrict src, + size_t nwc, size_t len, mbstate_t * __restrict ps) +{ + return (__wcsnrtombs_std(dst, src, nwc, len, ps, _BIG5_wcrtomb)); } Index: projects/collation/lib/libc/locale/collate.c =================================================================== --- projects/collation/lib/libc/locale/collate.c (revision 286458) +++ projects/collation/lib/libc/locale/collate.c (revision 286459) @@ -1,701 +1,702 @@ /*- + * Copyright 2014 Garrett D'Amore * Copright 2010 Nexenta Systems, Inc. All rights reserved. * Copyright (c) 1995 Alex Tatmanjants * at Electronni Visti IA, Kiev, Ukraine. * All rights reserved. * * Copyright (c) 2011 The FreeBSD Foundation * All rights reserved. * Portions of this software were developed by David Chisnall * under sponsorship from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * Adapted to xlocale by John Marino */ #include __FBSDID("$FreeBSD$"); #include "namespace.h" #include #include #include #include #include #include #include #include #include #include #include "un-namespace.h" #include "collate.h" #include "setlocale.h" #include "ldpart.h" struct xlocale_collate __xlocale_global_collate = { {{0}, "C"}, 1, 0, 0, 0 }; struct xlocale_collate __xlocale_C_collate = { {{0}, "C"}, 1, 0, 0, 0 }; #include "libc_private.h" int __collate_load_tables_l(const char *encoding, struct xlocale_collate *table); static void destruct_collate(void *t) { struct xlocale_collate *table = t; if (table->map && (table->maplen > 0)) { (void) munmap(table->map, table->maplen); } free(t); } void * __collate_load(const char *encoding, __unused locale_t unused) { if (strcmp(encoding, "C") == 0 || strcmp(encoding, "POSIX") == 0) { return &__xlocale_C_collate; } struct xlocale_collate *table = calloc(sizeof(struct xlocale_collate), 1); table->header.header.destructor = destruct_collate; // FIXME: Make sure that _LDP_CACHE is never returned. We should be doing // the caching outside of this section if (__collate_load_tables_l(encoding, table) != _LDP_LOADED) { xlocale_release(table); return NULL; } return table; } /** * Load the collation tables for the specified encoding into the global table. */ int __collate_load_tables(const char *encoding) { int ret = __collate_load_tables_l(encoding, &__xlocale_global_collate); return ret; } int __collate_load_tables_l(const char *encoding, struct xlocale_collate *table) { int i, chains, z; char buf[PATH_MAX]; char *TMP; char *map; collate_info_t *info; struct stat sbuf; int fd; /* 'encoding' must be already checked. */ if (strcmp(encoding, "C") == 0 || strcmp(encoding, "POSIX") == 0) { table->__collate_load_error = 1; return (_LDP_CACHE); } (void) snprintf(buf, sizeof (buf), "%s/%s/LC_COLLATE", _PathLocale, encoding); if ((fd = _open(buf, O_RDONLY)) < 0) return (_LDP_ERROR); if (_fstat(fd, &sbuf) < 0) { (void) _close(fd); return (_LDP_ERROR); } if (sbuf.st_size < (COLLATE_STR_LEN + sizeof (info))) { (void) _close(fd); errno = EINVAL; return (_LDP_ERROR); } map = mmap(NULL, sbuf.st_size, PROT_READ, MAP_PRIVATE, fd, 0); (void) _close(fd); if ((TMP = map) == NULL) { return (_LDP_ERROR); } if (strncmp(TMP, COLLATE_VERSION, COLLATE_STR_LEN) != 0) { (void) munmap(map, sbuf.st_size); errno = EINVAL; return (_LDP_ERROR); } TMP += COLLATE_STR_LEN; info = (void *)TMP; TMP += sizeof (*info); if ((info->directive_count < 1) || (info->directive_count >= COLL_WEIGHTS_MAX) || ((chains = info->chain_count) < 0)) { (void) munmap(map, sbuf.st_size); errno = EINVAL; return (_LDP_ERROR); } i = (sizeof (collate_char_t) * (UCHAR_MAX + 1)) + (sizeof (collate_chain_t) * chains) + (sizeof (collate_large_t) * info->large_count); for (z = 0; z < (info->directive_count); z++) { i += sizeof (collate_subst_t) * info->subst_count[z]; } if (i != (sbuf.st_size - (TMP - map))) { (void) munmap(map, sbuf.st_size); errno = EINVAL; return (_LDP_ERROR); } table->char_pri_table = (void *)TMP; TMP += sizeof (collate_char_t) * (UCHAR_MAX + 1); for (z = 0; z < info->directive_count; z++) { if (info->subst_count[z] > 0) { table->subst_table[z] = (void *)TMP; TMP += info->subst_count[z] * sizeof (collate_subst_t); } else { table->subst_table[z] = NULL; } } if (chains > 0) { table->chain_pri_table = (void *)TMP; TMP += chains * sizeof (collate_chain_t); } else table->chain_pri_table = NULL; if (info->large_count > 0) table->large_pri_table = (void *)TMP; else table->large_pri_table = NULL; table->info = info; table->__collate_load_error = 0; return (_LDP_LOADED); } /* * Note: for performance reasons, we have expanded bsearch here. This avoids * function call overhead with each comparison. */ static int32_t * substsearch(struct xlocale_collate *table, const wchar_t key, int pass) { collate_subst_t *p; int n = table->info->subst_count[pass]; if (n == 0) return (NULL); if (pass >= table->info->directive_count) return (NULL); if (!(key & COLLATE_SUBST_PRIORITY)) return (NULL); p = table->subst_table[pass] + (key & ~COLLATE_SUBST_PRIORITY); return (p->pri); } static collate_chain_t * chainsearch(struct xlocale_collate *table, const wchar_t *key, int *len) { int low; int high; int next, compar, l; collate_chain_t *p; collate_chain_t *tab; if (table->info->chain_count == 0) return (NULL); low = 0; high = table->info->chain_count - 1; tab = table->chain_pri_table; while (low <= high) { next = (low + high) / 2; p = tab + next; compar = *key - *p->str; if (compar == 0) { l = wcsnlen(p->str, COLLATE_STR_LEN); compar = wcsncmp(key, p->str, l); if (compar == 0) { *len = l; return (p); } } if (compar > 0) low = next + 1; else high = next - 1; } return (NULL); } static collate_large_t * largesearch(struct xlocale_collate *table, const wchar_t key) { int low = 0; int high = table->info->large_count - 1; int next, compar; collate_large_t *p; collate_large_t *tab = table->large_pri_table; if (table->info->large_count == 0) return (NULL); while (low <= high) { next = (low + high) / 2; p = tab + next; compar = key - p->val; if (compar == 0) return (p); if (compar > 0) low = next + 1; else high = next - 1; } return (NULL); } void _collate_lookup(struct xlocale_collate *table, const wchar_t *t, int *len, int *pri, int which, const int **state) { collate_chain_t *p2; collate_large_t *match; int p, l; const int *sptr; /* * If this is the "last" pass for the UNDEFINED, then * we just return the priority itself. */ if (which >= table->info->directive_count) { *pri = *t; *len = 1; *state = NULL; return; } /* * If we have remaining substitution data from a previous * call, consume it first. */ if ((sptr = *state) != NULL) { *pri = *sptr; sptr++; *state = *sptr ? sptr : NULL; *len = 0; return; } /* No active substitutions */ *len = 1; /* * Check for composites such as dipthongs that collate as a * single element (aka chains or collating-elements). */ if (((p2 = chainsearch(table, t, &l)) != NULL) && ((p = p2->pri[which]) >= 0)) { *len = l; *pri = p; } else if (*t <= UCHAR_MAX) { /* * Character is a small (8-bit) character. * We just look these up directly for speed. */ *pri = table->char_pri_table[*t].pri[which]; } else if ((table->info->large_count > 0) && ((match = largesearch(table, *t)) != NULL)) { /* * Character was found in the extended table. */ *pri = match->pri.pri[which]; } else { /* * Character lacks a specific definition. */ if (table->info->directive[which] & DIRECTIVE_UNDEFINED) { /* Mask off sign bit to prevent ordering confusion. */ *pri = (*t & COLLATE_MAX_PRIORITY); } else { *pri = table->info->undef_pri[which]; } /* No substitutions for undefined characters! */ return; } /* * Try substituting (expanding) the character. We are * currently doing this *after* the chain compression. I * think it should not matter, but this way might be slightly * faster. * * We do this after the priority search, as this will help us * to identify a single key value. In order for this to work, * its important that the priority assigned to a given element * to be substituted be unique for that level. The localedef * code ensures this for us. */ if ((sptr = substsearch(table, *pri, which)) != NULL) { if ((*pri = *sptr) != 0) { sptr++; *state = *sptr ? sptr : NULL; } } } /* * This is the meaty part of wcsxfrm & strxfrm. Note that it does * NOT NULL terminate. That is left to the caller. */ size_t _collate_wxfrm(struct xlocale_collate *table, const wchar_t *src, wchar_t *xf, size_t room) { int pri; int len; const wchar_t *t; wchar_t *tr = NULL; int direc; int pass; const int32_t *state; size_t want = 0; size_t need = 0; for (pass = 0; pass <= table->info->directive_count; pass++) { state = NULL; if (pass != 0) { /* insert level separator from the previous pass */ if (room) { *xf++ = 1; room--; } want++; } /* special pass for undefined */ if (pass == table->info->directive_count) { direc = DIRECTIVE_FORWARD | DIRECTIVE_UNDEFINED; } else { direc = table->info->directive[pass]; } t = src; if (direc & DIRECTIVE_BACKWARD) { wchar_t *bp, *fp, c; if (tr) free(tr); if ((tr = wcsdup(t)) == NULL) { errno = ENOMEM; goto fail; } bp = tr; fp = tr + wcslen(tr) - 1; while (bp < fp) { c = *bp; *bp++ = *fp; *fp-- = c; } t = (const wchar_t *)tr; } if (direc & DIRECTIVE_POSITION) { while (*t || state) { _collate_lookup(table, t, &len, &pri, pass, &state); t += len; if (pri <= 0) { if (pri < 0) { errno = EINVAL; goto fail; } pri = COLLATE_MAX_PRIORITY; } if (room) { *xf++ = pri; room--; } want++; need = want; } } else { while (*t || state) { _collate_lookup(table, t, &len, &pri, pass, &state); t += len; if (pri <= 0) { if (pri < 0) { errno = EINVAL; goto fail; } continue; } if (room) { *xf++ = pri; room--; } want++; need = want; } } } if (tr) free(tr); return (need); fail: if (tr) free(tr); return ((size_t)(-1)); } /* * In the non-POSIX case, we transform each character into a string of * characters representing the character's priority. Since char is usually * signed, we are limited by 7 bits per byte. To avoid zero, we need to add * XFRM_OFFSET, so we can't use a full 7 bits. For simplicity, we choose 6 * bits per byte. * * It turns out that we sometimes have real priorities that are * 31-bits wide. (But: be careful using priorities where the high * order bit is set -- i.e. the priority is negative. The sort order * may be surprising!) * * TODO: This would be a good area to optimize somewhat. It turns out * that real prioririties *except for the last UNDEFINED pass* are generally * very small. We need the localedef code to precalculate the max * priority for us, and ideally also give us a mask, and then we could * severely limit what we expand to. */ #define XFRM_BYTES 6 #define XFRM_OFFSET ('0') /* make all printable characters */ #define XFRM_SHIFT 6 #define XFRM_MASK ((1 << XFRM_SHIFT) - 1) #define XFRM_SEP ('.') /* chosen to be less than XFRM_OFFSET */ static int xfrm(struct xlocale_collate *table, unsigned char *p, int pri, int pass) { /* we use unsigned to ensure zero fill on right shift */ uint32_t val = (uint32_t)table->info->pri_count[pass]; int nc = 0; while (val) { *p = (pri & XFRM_MASK) + XFRM_OFFSET; pri >>= XFRM_SHIFT; val >>= XFRM_SHIFT; p++; nc++; } return (nc); } size_t _collate_sxfrm(struct xlocale_collate *table, const wchar_t *src, char *xf, size_t room) { int pri; int len; const wchar_t *t; wchar_t *tr = NULL; int direc; int pass; const int32_t *state; size_t want = 0; size_t need = 0; int b; uint8_t buf[XFRM_BYTES]; for (pass = 0; pass <= table->info->directive_count; pass++) { state = NULL; if (pass != 0) { /* insert level separator from the previous pass */ if (room) { *xf++ = XFRM_SEP; room--; } want++; } /* special pass for undefined */ if (pass == table->info->directive_count) { direc = DIRECTIVE_FORWARD | DIRECTIVE_UNDEFINED; } else { direc = table->info->directive[pass]; } t = src; if (direc & DIRECTIVE_BACKWARD) { wchar_t *bp, *fp, c; if (tr) free(tr); if ((tr = wcsdup(t)) == NULL) { errno = ENOMEM; goto fail; } bp = tr; fp = tr + wcslen(tr) - 1; while (bp < fp) { c = *bp; *bp++ = *fp; *fp-- = c; } t = (const wchar_t *)tr; } if (direc & DIRECTIVE_POSITION) { while (*t || state) { _collate_lookup(table, t, &len, &pri, pass, &state); t += len; if (pri <= 0) { if (pri < 0) { errno = EINVAL; goto fail; } pri = COLLATE_MAX_PRIORITY; } b = xfrm(table, buf, pri, pass); want += b; if (room) { while (b) { b--; if (room) { *xf++ = buf[b]; room--; } } } need = want; } } else { while (*t || state) { _collate_lookup(table, t, &len, &pri, pass, &state); t += len; if (pri <= 0) { if (pri < 0) { errno = EINVAL; goto fail; } continue; } b = xfrm(table, buf, pri, pass); want += b; if (room) { while (b) { b--; if (room) { *xf++ = buf[b]; room--; } } } need = want; } } } if (tr) free(tr); return (need); fail: if (tr) free(tr); return ((size_t)(-1)); } /* * __collate_equiv_value returns the primary collation value for the given * collating symbol specified by str and len. Zero or negative is returned * if the collating symbol was not found. This function is used by bracket * code in the TRE regex library. */ int __collate_equiv_value(locale_t locale, const wchar_t *str, size_t len) { int32_t e; if (len < 1 || len >= COLLATE_STR_LEN) return (-1); FIX_LOCALE(locale); struct xlocale_collate *table = (struct xlocale_collate*)locale->components[XLC_COLLATE]; if (table->__collate_load_error) return ((len == 1 && *str <= UCHAR_MAX) ? *str : -1); if (len == 1) { e = -1; if (*str <= UCHAR_MAX) e = table->char_pri_table[*str].pri[0]; else if (table->info->large_count > 0) { collate_large_t *match_large; match_large = largesearch(table, *str); if (match_large) e = match_large->pri.pri[0]; } if (e == 0) return (1); return (e > 0 ? e : 0); } if (table->info->chain_count > 0) { wchar_t name[COLLATE_STR_LEN]; collate_chain_t *match_chain; int clen; wcsncpy (name, str, len); name[len] = 0; match_chain = chainsearch(table, name, &clen); if (match_chain) { e = match_chain->pri[0]; if (e == 0) return (1); return (e < 0 ? -e : e); } } return (0); } Index: projects/collation/lib/libc/locale/collate.h =================================================================== --- projects/collation/lib/libc/locale/collate.h (revision 286458) +++ projects/collation/lib/libc/locale/collate.h (revision 286459) @@ -1,137 +1,138 @@ /*- + * Copyright 2010 Nexenta Systmes, Inc. All rights reserved. * Copyright (c) 1995 Alex Tatmanjants * at Electronni Visti IA, Kiev, Ukraine. * All rights reserved. * * Copyright (c) 2011 The FreeBSD Foundation * All rights reserved. * Portions of this software were developed by David Chisnall * under sponsorship from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _COLLATE_H_ #define _COLLATE_H_ #include #include #include #include "xlocale_private.h" /* * Work around buildworld bootstrapping from older systems whos limits.h * sets COLL_WEIGHTS_MAX to 0. */ #if COLL_WEIGHTS_MAX == 0 #undef COLL_WEIGHTS_MAX #define COLL_WEIGHTS_MAX 10 #endif #define COLLATE_STR_LEN 24 /* should be 64-bit multiple */ #define COLLATE_VERSION "BSD 1.0\n" #define COLLATE_MAX_PRIORITY (0x7fffffff) /* max signed value */ #define COLLATE_SUBST_PRIORITY (0x40000000) /* bit indicates subst table */ #define DIRECTIVE_UNDEF 0x00 #define DIRECTIVE_FORWARD 0x01 #define DIRECTIVE_BACKWARD 0x02 #define DIRECTIVE_POSITION 0x04 #define DIRECTIVE_UNDEFINED 0x08 /* special last weight for UNDEFINED */ #define DIRECTIVE_DIRECTION_MASK (DIRECTIVE_FORWARD | DIRECTIVE_BACKWARD) /* * The collate file format is as follows: * * char version[COLLATE_STR_LEN]; // must be COLLATE_VERSION * collate_info_t info; // see below, includes padding * collate_char_pri_t char_data[256]; // 8 bit char values * collate_subst_t subst[*]; // 0 or more substitutions * collate_chain_pri_t chains[*]; // 0 or more chains * collate_large_pri_t large[*]; // extended char priorities * * Note that all structures must be 32-bit aligned, as each structure * contains 32-bit member fields. The entire file is mmap'd, so its * critical that alignment be observed. It is not generally safe to * use any 64-bit values in the structures. */ typedef struct collate_info { uint8_t directive_count; uint8_t directive[COLL_WEIGHTS_MAX]; int32_t pri_count[COLL_WEIGHTS_MAX]; int32_t flags; int32_t chain_count; int32_t large_count; int32_t subst_count[COLL_WEIGHTS_MAX]; int32_t undef_pri[COLL_WEIGHTS_MAX]; } collate_info_t; typedef struct collate_char { int32_t pri[COLL_WEIGHTS_MAX]; } collate_char_t; typedef struct collate_chain { wchar_t str[COLLATE_STR_LEN]; int32_t pri[COLL_WEIGHTS_MAX]; } collate_chain_t; typedef struct collate_large { int32_t val; collate_char_t pri; } collate_large_t; typedef struct collate_subst { int32_t key; int32_t pri[COLLATE_STR_LEN]; } collate_subst_t; struct xlocale_collate { struct xlocale_component header; int __collate_load_error; char * map; size_t maplen; collate_info_t *info; collate_char_t *char_pri_table; collate_large_t *large_pri_table; collate_chain_t *chain_pri_table; collate_subst_t *subst_table[COLL_WEIGHTS_MAX]; }; __BEGIN_DECLS int __collate_load_tables(const char *); int __collate_equiv_value(locale_t, const wchar_t *, size_t); void _collate_lookup(struct xlocale_collate *,const wchar_t *, int *, int *, int, const int **); int __collate_range_cmp(struct xlocale_collate *, wchar_t, wchar_t); size_t _collate_wxfrm(struct xlocale_collate *, const wchar_t *, wchar_t *, size_t); size_t _collate_sxfrm(struct xlocale_collate *, const wchar_t *, char *, size_t); __END_DECLS #endif /* !_COLLATE_H_ */ Index: projects/collation/lib/libc/locale/euc.c =================================================================== --- projects/collation/lib/libc/locale/euc.c (revision 286458) +++ projects/collation/lib/libc/locale/euc.c (revision 286459) @@ -1,273 +1,453 @@ /*- + * Copyright 2013 Garrett D'Amore + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. * Copyright (c) 2002-2004 Tim J. Robbins. All rights reserved. * Copyright (c) 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Paul Borman at Krystal Technologies. * * Copyright (c) 2011 The FreeBSD Foundation * All rights reserved. * Portions of this software were developed by David Chisnall * under sponsorship from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors + * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #if defined(LIBC_SCCS) && !defined(lint) static char sccsid[] = "@(#)euc.c 8.1 (Berkeley) 6/4/93"; #endif /* LIBC_SCCS and not lint */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include "mblocal.h" extern int __mb_sb_limit; -static size_t _EUC_mbrtowc(wchar_t * __restrict, const char * __restrict, +static size_t _EUC_mbrtowc_impl(wchar_t * __restrict, const char * __restrict, + size_t, mbstate_t * __restrict, uint8_t, uint8_t, uint8_t, uint8_t); +static size_t _EUC_wcrtomb_impl(char * __restrict, wchar_t, + mbstate_t * __restrict, uint8_t, uint8_t, uint8_t, uint8_t); + +static size_t _EUC_CN_mbrtowc(wchar_t * __restrict, const char * __restrict, size_t, mbstate_t * __restrict); -static int _EUC_mbsinit(const mbstate_t *); -static size_t _EUC_wcrtomb(char * __restrict, wchar_t, +static size_t _EUC_JP_mbrtowc(wchar_t * __restrict, const char * __restrict, + size_t, mbstate_t * __restrict); +static size_t _EUC_KR_mbrtowc(wchar_t * __restrict, const char * __restrict, + size_t, mbstate_t * __restrict); +static size_t _EUC_TW_mbrtowc(wchar_t * __restrict, const char * __restrict, + size_t, mbstate_t * __restrict); + +static size_t _EUC_CN_wcrtomb(char * __restrict, wchar_t, mbstate_t * __restrict); +static size_t _EUC_JP_wcrtomb(char * __restrict, wchar_t, + mbstate_t * __restrict); +static size_t _EUC_KR_wcrtomb(char * __restrict, wchar_t, + mbstate_t * __restrict); +static size_t _EUC_TW_wcrtomb(char * __restrict, wchar_t, + mbstate_t * __restrict); -typedef struct { - int count[4]; - wchar_t bits[4]; - wchar_t mask; -} _EucInfo; +static size_t _EUC_CN_mbsnrtowcs(wchar_t * __restrict, + const char ** __restrict, size_t, size_t, + mbstate_t * __restrict); +static size_t _EUC_JP_mbsnrtowcs(wchar_t * __restrict, + const char ** __restrict, size_t, size_t, + mbstate_t * __restrict); +static size_t _EUC_KR_mbsnrtowcs(wchar_t * __restrict, + const char ** __restrict, size_t, size_t, + mbstate_t * __restrict); +static size_t _EUC_TW_mbsnrtowcs(wchar_t * __restrict, + const char ** __restrict, size_t, size_t, + mbstate_t * __restrict); +static size_t _EUC_CN_wcsnrtombs(char * __restrict, + const wchar_t ** __restrict, size_t, size_t, + mbstate_t * __restrict); +static size_t _EUC_JP_wcsnrtombs(char * __restrict, + const wchar_t ** __restrict, size_t, size_t, + mbstate_t * __restrict); +static size_t _EUC_KR_wcsnrtombs(char * __restrict, + const wchar_t ** __restrict, size_t, size_t, + mbstate_t * __restrict); +static size_t _EUC_TW_wcsnrtombs(char * __restrict, + const wchar_t ** __restrict, size_t, size_t, + mbstate_t * __restrict); + +static int _EUC_mbsinit(const mbstate_t *); + typedef struct { wchar_t ch; int set; int want; } _EucState; +static int +_EUC_mbsinit(const mbstate_t *ps) +{ + + return (ps == NULL || ((const _EucState *)ps)->want == 0); +} + +/* + * EUC-CN uses CS0, CS1 and CS2 (4 bytes). + */ int -_EUC_init(struct xlocale_ctype *l, _RuneLocale *rl) +_EUC_CN_init(struct xlocale_ctype *l, _RuneLocale *rl) { - _EucInfo *ei; - int x, new__mb_cur_max; - char *v, *e; + l->__mbrtowc = _EUC_CN_mbrtowc; + l->__wcrtomb = _EUC_CN_wcrtomb; + l->__mbsnrtowcs = _EUC_CN_mbsnrtowcs; + l->__wcsnrtombs = _EUC_CN_wcsnrtombs; + l->__mbsinit = _EUC_mbsinit; - if (rl->__variable == NULL) - return (EFTYPE); + l->runes = rl; + l->__mb_cur_max = 4; + l->__mb_sb_limit = 256; + return (0); +} - v = (char *)rl->__variable; +static size_t +_EUC_CN_mbrtowc(wchar_t * __restrict pwc, const char * __restrict s, + size_t n, mbstate_t * __restrict ps) +{ + return (_EUC_mbrtowc_impl(pwc, s, n, ps, SS2, 4, 0, 0)); +} - while (*v == ' ' || *v == '\t') - ++v; +static size_t +_EUC_CN_mbsnrtowcs(wchar_t * __restrict dst, + const char ** __restrict src, + size_t nms, size_t len, mbstate_t * __restrict ps) +{ + return (__mbsnrtowcs_std(dst, src, nms, len, ps, _EUC_CN_mbrtowc)); +} - if ((ei = malloc(sizeof(_EucInfo))) == NULL) - return (errno == 0 ? ENOMEM : errno); +static size_t +_EUC_CN_wcrtomb(char * __restrict s, wchar_t wc, + mbstate_t * __restrict ps) +{ + return (_EUC_wcrtomb_impl(s, wc, ps, SS2, 4, 0, 0)); +} - new__mb_cur_max = 0; - for (x = 0; x < 4; ++x) { - ei->count[x] = (int)strtol(v, &e, 0); - if (v == e || !(v = e)) { - free(ei); - return (EFTYPE); - } - if (new__mb_cur_max < ei->count[x]) - new__mb_cur_max = ei->count[x]; - while (*v == ' ' || *v == '\t') - ++v; - ei->bits[x] = (int)strtol(v, &e, 0); - if (v == e || !(v = e)) { - free(ei); - return (EFTYPE); - } - while (*v == ' ' || *v == '\t') - ++v; - } - ei->mask = (int)strtol(v, &e, 0); - if (v == e || !(v = e)) { - free(ei); - return (EFTYPE); - } - rl->__variable = ei; - rl->__variable_len = sizeof(_EucInfo); - l->runes = rl; - l->__mb_cur_max = new__mb_cur_max; - l->__mbrtowc = _EUC_mbrtowc; - l->__wcrtomb = _EUC_wcrtomb; +static size_t +_EUC_CN_wcsnrtombs(char * __restrict dst, const wchar_t ** __restrict src, + size_t nwc, size_t len, mbstate_t * __restrict ps) +{ + return (__wcsnrtombs_std(dst, src, nwc, len, ps, _EUC_CN_wcrtomb)); +} + +/* + * EUC-KR uses only CS0 and CS1. + */ +int +_EUC_KR_init(struct xlocale_ctype *l, _RuneLocale *rl) +{ + l->__mbrtowc = _EUC_KR_mbrtowc; + l->__wcrtomb = _EUC_KR_wcrtomb; + l->__mbsnrtowcs = _EUC_KR_mbsnrtowcs; + l->__wcsnrtombs = _EUC_KR_wcsnrtombs; l->__mbsinit = _EUC_mbsinit; - l->__mb_sb_limit = 256; + + l->runes = rl; + l->__mb_cur_max = 2; + l->__mb_sb_limit = 128; return (0); } -static int -_EUC_mbsinit(const mbstate_t *ps) +static size_t +_EUC_KR_mbrtowc(wchar_t * __restrict pwc, const char * __restrict s, + size_t n, mbstate_t * __restrict ps) { + return (_EUC_mbrtowc_impl(pwc, s, n, ps, 0, 0, 0, 0)); +} - return (ps == NULL || ((const _EucState *)ps)->want == 0); +static size_t +_EUC_KR_mbsnrtowcs(wchar_t * __restrict dst, + const char ** __restrict src, + size_t nms, size_t len, mbstate_t * __restrict ps) +{ + return (__mbsnrtowcs_std(dst, src, nms, len, ps, _EUC_KR_mbrtowc)); } -#define CEI ((_EucInfo *)(_CurrentRuneLocale->__variable)) +static size_t +_EUC_KR_wcrtomb(char * __restrict s, wchar_t wc, + mbstate_t * __restrict ps) +{ + return (_EUC_wcrtomb_impl(s, wc, ps, 0, 0, 0, 0)); +} -#define _SS2 0x008e -#define _SS3 0x008f +static size_t +_EUC_KR_wcsnrtombs(char * __restrict dst, const wchar_t ** __restrict src, + size_t nwc, size_t len, mbstate_t * __restrict ps) +{ + return (__wcsnrtombs_std(dst, src, nwc, len, ps, _EUC_KR_wcrtomb)); +} -#define GR_BITS 0x80808080 /* XXX: to be fixed */ +/* + * EUC-JP uses CS0, CS1, CS2, and CS3. + */ +int +_EUC_JP_init(struct xlocale_ctype *l, _RuneLocale *rl) +{ + l->__mbrtowc = _EUC_JP_mbrtowc; + l->__wcrtomb = _EUC_JP_wcrtomb; + l->__mbsnrtowcs = _EUC_JP_mbsnrtowcs; + l->__wcsnrtombs = _EUC_JP_wcsnrtombs; + l->__mbsinit = _EUC_mbsinit; -static __inline int -_euc_set(u_int c) + l->runes = rl; + l->__mb_cur_max = 3; + l->__mb_sb_limit = 196; + return (0); +} + +static size_t +_EUC_JP_mbrtowc(wchar_t * __restrict pwc, const char * __restrict s, + size_t n, mbstate_t * __restrict ps) { + return (_EUC_mbrtowc_impl(pwc, s, n, ps, SS2, 2, SS3, 3)); +} - c &= 0xff; - return ((c & 0x80) ? c == _SS3 ? 3 : c == _SS2 ? 2 : 1 : 0); +static size_t +_EUC_JP_mbsnrtowcs(wchar_t * __restrict dst, + const char ** __restrict src, + size_t nms, size_t len, mbstate_t * __restrict ps) +{ + return (__mbsnrtowcs_std(dst, src, nms, len, ps, _EUC_JP_mbrtowc)); } static size_t -_EUC_mbrtowc(wchar_t * __restrict pwc, const char * __restrict s, size_t n, +_EUC_JP_wcrtomb(char * __restrict s, wchar_t wc, mbstate_t * __restrict ps) { + return (_EUC_wcrtomb_impl(s, wc, ps, SS2, 2, SS3, 3)); +} + +static size_t +_EUC_JP_wcsnrtombs(char * __restrict dst, const wchar_t ** __restrict src, + size_t nwc, size_t len, mbstate_t * __restrict ps) +{ + return (__wcsnrtombs_std(dst, src, nwc, len, ps, _EUC_JP_wcrtomb)); +} + +/* + * EUC-TW uses CS0, CS1, and CS2. + */ +int +_EUC_TW_init(struct xlocale_ctype *l, _RuneLocale *rl) +{ + l->__mbrtowc = _EUC_TW_mbrtowc; + l->__wcrtomb = _EUC_TW_wcrtomb; + l->__mbsnrtowcs = _EUC_TW_mbsnrtowcs; + l->__wcsnrtombs = _EUC_TW_wcsnrtombs; + l->__mbsinit = _EUC_mbsinit; + + l->runes = rl; + l->__mb_cur_max = 4; + l->__mb_sb_limit = 256; + return (0); +} + +static size_t +_EUC_TW_mbrtowc(wchar_t * __restrict pwc, const char * __restrict s, + size_t n, mbstate_t * __restrict ps) +{ + return (_EUC_mbrtowc_impl(pwc, s, n, ps, SS2, 4, 0, 0)); +} + +static size_t +_EUC_TW_mbsnrtowcs(wchar_t * __restrict dst, + const char ** __restrict src, + size_t nms, size_t len, mbstate_t * __restrict ps) +{ + return (__mbsnrtowcs_std(dst, src, nms, len, ps, _EUC_TW_mbrtowc)); +} + +static size_t +_EUC_TW_wcrtomb(char * __restrict s, wchar_t wc, + mbstate_t * __restrict ps) +{ + return (_EUC_wcrtomb_impl(s, wc, ps, SS2, 4, 0, 0)); +} + +static size_t +_EUC_TW_wcsnrtombs(char * __restrict dst, const wchar_t ** __restrict src, + size_t nwc, size_t len, mbstate_t * __restrict ps) +{ + return (__wcsnrtombs_std(dst, src, nwc, len, ps, _EUC_TW_wcrtomb)); +} + +/* + * Common EUC code. + */ + +static size_t +_EUC_mbrtowc_impl(wchar_t * __restrict pwc, const char * __restrict s, + size_t n, mbstate_t * __restrict ps, + uint8_t cs2, uint8_t cs2width, uint8_t cs3, uint8_t cs3width) +{ _EucState *es; - int i, set, want; + int i, want; wchar_t wc; - const char *os; + unsigned char ch; es = (_EucState *)ps; - if (es->want < 0 || es->want > MB_CUR_MAX || es->set < 0 || - es->set > 3) { + if (es->want < 0 || es->want > MB_CUR_MAX) { errno = EINVAL; return ((size_t)-1); } if (s == NULL) { s = ""; n = 1; pwc = NULL; } if (n == 0) /* Incomplete multibyte sequence */ return ((size_t)-2); - os = s; - if (es->want == 0) { - want = CEI->count[set = _euc_set(*s)]; - if (set == 2 || set == 3) { - --want; - if (--n == 0) { - /* Incomplete multibyte sequence */ - es->set = set; - es->want = want; - es->ch = 0; - return ((size_t)-2); - } - ++s; - if (*s == '\0') { - errno = EILSEQ; - return ((size_t)-1); - } + /* Fast path for plain ASCII (CS0) */ + if (((ch = (unsigned char)*s) & 0x80) == 0) { + if (pwc != NULL) + *pwc = ch; + return (ch != '\0' ? 1 : 0); } - wc = (unsigned char)*s++; + + if (ch >= 0xa1) { + /* CS1 */ + want = 2; + } else if (ch == cs2) { + want = cs2width; + } else if (ch == cs3) { + want = cs3width; + } else { + errno = EILSEQ; + return ((size_t)-1); + } + + + es->want = want; + es->ch = 0; } else { - set = es->set; want = es->want; wc = es->ch; } - for (i = (es->want == 0) ? 1 : 0; i < MIN(want, n); i++) { - if (*s == '\0') { - errno = EILSEQ; - return ((size_t)-1); - } - wc = (wc << 8) | (unsigned char)*s++; + + for (i = 0; i < MIN(want, n); i++) { + wc <<= 8; + wc |= *s; + s++; } if (i < want) { /* Incomplete multibyte sequence */ - es->set = set; es->want = want - i; es->ch = wc; return ((size_t)-2); } - wc = (wc & ~CEI->mask) | CEI->bits[set]; if (pwc != NULL) *pwc = wc; es->want = 0; - return (wc == L'\0' ? 0 : s - os); + return (wc == L'\0' ? 0 : want); } static size_t -_EUC_wcrtomb(char * __restrict s, wchar_t wc, mbstate_t * __restrict ps) +_EUC_wcrtomb_impl(char * __restrict s, wchar_t wc, + mbstate_t * __restrict ps, + uint8_t cs2, uint8_t cs2width, uint8_t cs3, uint8_t cs3width) { _EucState *es; - wchar_t m, nm; int i, len; + wchar_t nm; es = (_EucState *)ps; if (es->want != 0) { errno = EINVAL; return ((size_t)-1); } if (s == NULL) /* Reset to initial shift state (no-op) */ return (1); - m = wc & CEI->mask; - nm = wc & ~m; + if ((wc & ~0x7f) == 0) { + /* Fast path for plain ASCII (CS0) */ + *s = (char)wc; + return (1); + } - if (m == CEI->bits[1]) { -CodeSet1: - /* Codeset 1: The first byte must have 0x80 in it. */ - i = len = CEI->count[1]; - while (i-- > 0) - *s++ = (nm >> (i << 3)) | 0x80; + /* Determine the "length" */ + if ((unsigned)wc > 0xffffff) { + len = 4; + } else if ((unsigned)wc > 0xffff) { + len = 3; + } else if ((unsigned)wc > 0xff) { + len = 2; } else { - if (m == CEI->bits[0]) - i = len = CEI->count[0]; - else if (m == CEI->bits[2]) { - i = len = CEI->count[2]; - *s++ = _SS2; - --i; - /* SS2 designates G2 into GR */ - nm |= GR_BITS; - } else if (m == CEI->bits[3]) { - i = len = CEI->count[3]; - *s++ = _SS3; - --i; - /* SS3 designates G3 into GR */ - nm |= GR_BITS; - } else - goto CodeSet1; /* Bletch */ - while (i-- > 0) - *s++ = (nm >> (i << 3)) & 0xff; + len = 1; + } + + if (len > MB_CUR_MAX) { + errno = EILSEQ; + return ((size_t)-1); + } + + /* This first check excludes CS1, which is implicitly valid. */ + if ((wc < 0xa100) || (wc > 0xffff)) { + /* Check for valid CS2 or CS3 */ + nm = (wc >> ((len - 1) * 8)); + if (nm == cs2) { + if (len != cs2width) { + errno = EILSEQ; + return ((size_t)-1); + } + } else if (nm == cs3) { + if (len != cs3width) { + errno = EILSEQ; + return ((size_t)-1); + } + } else { + errno = EILSEQ; + return ((size_t)-1); + } + } + + /* Stash the bytes, least significant last */ + for (i = len - 1; i >= 0; i--) { + s[i] = (wc & 0xff); + wc >>= 8; } return (len); } Index: projects/collation/lib/libc/locale/gb18030.c =================================================================== --- projects/collation/lib/libc/locale/gb18030.c (revision 286458) +++ projects/collation/lib/libc/locale/gb18030.c (revision 286459) @@ -1,224 +1,252 @@ /*- + * Copyright 2013 Garrett D'Amore + * Copyright 2010 Nexenta Systems, Inc. All rights reserved. * Copyright (c) 2002-2004 Tim J. Robbins * All rights reserved. * * Copyright (c) 2011 The FreeBSD Foundation * All rights reserved. * Portions of this software were developed by David Chisnall * under sponsorship from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ + /* * PRC National Standard GB 18030-2000 encoding of Chinese text. * * See gb18030(5) for details. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include "mblocal.h" static size_t _GB18030_mbrtowc(wchar_t * __restrict, const char * __restrict, size_t, mbstate_t * __restrict); static int _GB18030_mbsinit(const mbstate_t *); static size_t _GB18030_wcrtomb(char * __restrict, wchar_t, mbstate_t * __restrict); +static size_t _GB18030_mbsnrtowcs(wchar_t * __restrict, + const char ** __restrict, size_t, size_t, + mbstate_t * __restrict); +static size_t _GB18030_wcsnrtombs(char * __restrict, + const wchar_t ** __restrict, size_t, size_t, + mbstate_t * __restrict); + typedef struct { int count; u_char bytes[4]; } _GB18030State; int _GB18030_init(struct xlocale_ctype *l, _RuneLocale *rl) { l->__mbrtowc = _GB18030_mbrtowc; l->__wcrtomb = _GB18030_wcrtomb; l->__mbsinit = _GB18030_mbsinit; + l->__mbsnrtowcs = _GB18030_mbsnrtowcs; + l->__wcsnrtombs = _GB18030_wcsnrtombs; l->runes = rl; l->__mb_cur_max = 4; l->__mb_sb_limit = 128; return (0); } static int _GB18030_mbsinit(const mbstate_t *ps) { return (ps == NULL || ((const _GB18030State *)ps)->count == 0); } static size_t _GB18030_mbrtowc(wchar_t * __restrict pwc, const char * __restrict s, size_t n, mbstate_t * __restrict ps) { _GB18030State *gs; wchar_t wch; int ch, len, ocount; size_t ncopy; gs = (_GB18030State *)ps; if (gs->count < 0 || gs->count > sizeof(gs->bytes)) { errno = EINVAL; return ((size_t)-1); } if (s == NULL) { s = ""; n = 1; pwc = NULL; } ncopy = MIN(MIN(n, MB_CUR_MAX), sizeof(gs->bytes) - gs->count); memcpy(gs->bytes + gs->count, s, ncopy); ocount = gs->count; gs->count += ncopy; s = (char *)gs->bytes; n = gs->count; if (n == 0) /* Incomplete multibyte sequence */ return ((size_t)-2); /* * Single byte: [00-7f] * Two byte: [81-fe][40-7e,80-fe] * Four byte: [81-fe][30-39][81-fe][30-39] */ ch = (unsigned char)*s++; if (ch <= 0x7f) { len = 1; wch = ch; } else if (ch >= 0x81 && ch <= 0xfe) { wch = ch; if (n < 2) return ((size_t)-2); ch = (unsigned char)*s++; if ((ch >= 0x40 && ch <= 0x7e) || (ch >= 0x80 && ch <= 0xfe)) { wch = (wch << 8) | ch; len = 2; } else if (ch >= 0x30 && ch <= 0x39) { /* * Strip high bit off the wide character we will * eventually output so that it is positive when * cast to wint_t on 32-bit twos-complement machines. */ wch = ((wch & 0x7f) << 8) | ch; if (n < 3) return ((size_t)-2); ch = (unsigned char)*s++; if (ch < 0x81 || ch > 0xfe) goto ilseq; wch = (wch << 8) | ch; if (n < 4) return ((size_t)-2); ch = (unsigned char)*s++; if (ch < 0x30 || ch > 0x39) goto ilseq; wch = (wch << 8) | ch; len = 4; } else goto ilseq; } else goto ilseq; if (pwc != NULL) *pwc = wch; gs->count = 0; return (wch == L'\0' ? 0 : len - ocount); ilseq: errno = EILSEQ; return ((size_t)-1); } static size_t _GB18030_wcrtomb(char * __restrict s, wchar_t wc, mbstate_t * __restrict ps) { _GB18030State *gs; size_t len; int c; gs = (_GB18030State *)ps; if (gs->count != 0) { errno = EINVAL; return ((size_t)-1); } if (s == NULL) /* Reset to initial shift state (no-op) */ return (1); if ((wc & ~0x7fffffff) != 0) goto ilseq; if (wc & 0x7f000000) { /* Replace high bit that mbrtowc() removed. */ wc |= 0x80000000; c = (wc >> 24) & 0xff; if (c < 0x81 || c > 0xfe) goto ilseq; *s++ = c; c = (wc >> 16) & 0xff; if (c < 0x30 || c > 0x39) goto ilseq; *s++ = c; c = (wc >> 8) & 0xff; if (c < 0x81 || c > 0xfe) goto ilseq; *s++ = c; c = wc & 0xff; if (c < 0x30 || c > 0x39) goto ilseq; *s++ = c; len = 4; } else if (wc & 0x00ff0000) goto ilseq; else if (wc & 0x0000ff00) { c = (wc >> 8) & 0xff; if (c < 0x81 || c > 0xfe) goto ilseq; *s++ = c; c = wc & 0xff; if (c < 0x40 || c == 0x7f || c == 0xff) goto ilseq; *s++ = c; len = 2; } else if (wc <= 0x7f) { *s++ = wc; len = 1; } else goto ilseq; return (len); ilseq: errno = EILSEQ; return ((size_t)-1); +} + +static size_t +_GB18030_mbsnrtowcs(wchar_t * __restrict dst, + const char ** __restrict src, size_t nms, size_t len, + mbstate_t * __restrict ps) +{ + return (__mbsnrtowcs_std(dst, src, nms, len, ps, _GB18030_mbrtowc)); +} + +static size_t +_GB18030_wcsnrtombs(char * __restrict dst, + const wchar_t ** __restrict src, size_t nwc, size_t len, + mbstate_t * __restrict ps) +{ + return (__wcsnrtombs_std(dst, src, nwc, len, ps, _GB18030_wcrtomb)); } Index: projects/collation/lib/libc/locale/gb2312.c =================================================================== --- projects/collation/lib/libc/locale/gb2312.c (revision 286458) +++ projects/collation/lib/libc/locale/gb2312.c (revision 286459) @@ -1,160 +1,187 @@ /*- + * Copyright 2013 Garrett D'Amore + * Copyright 2010 Nexenta Systems, Inc. All rights reserved. * Copyright (c) 2004 Tim J. Robbins. All rights reserved. * Copyright (c) 2003 David Xu * All rights reserved. * * Copyright (c) 2011 The FreeBSD Foundation * All rights reserved. * Portions of this software were developed by David Chisnall * under sponsorship from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include "mblocal.h" static size_t _GB2312_mbrtowc(wchar_t * __restrict, const char * __restrict, size_t, mbstate_t * __restrict); static int _GB2312_mbsinit(const mbstate_t *); static size_t _GB2312_wcrtomb(char * __restrict, wchar_t, mbstate_t * __restrict); +static size_t _GB2312_mbsnrtowcs(wchar_t * __restrict, + const char ** __restrict, size_t, size_t, + mbstate_t * __restrict); +static size_t _GB2312_wcsnrtombs(char * __restrict, + const wchar_t ** __restrict, size_t, size_t, + mbstate_t * __restrict); + typedef struct { int count; u_char bytes[2]; } _GB2312State; int _GB2312_init(struct xlocale_ctype *l, _RuneLocale *rl) { l->runes = rl; l->__mbrtowc = _GB2312_mbrtowc; l->__wcrtomb = _GB2312_wcrtomb; l->__mbsinit = _GB2312_mbsinit; + l->__mbsnrtowcs = _GB2312_mbsnrtowcs; + l->__wcsnrtombs = _GB2312_wcsnrtombs; l->__mb_cur_max = 2; l->__mb_sb_limit = 128; return (0); } static int _GB2312_mbsinit(const mbstate_t *ps) { return (ps == NULL || ((const _GB2312State *)ps)->count == 0); } -static __inline int +static int _GB2312_check(const char *str, size_t n) { const u_char *s = (const u_char *)str; if (n == 0) /* Incomplete multibyte sequence */ return (-2); if (s[0] >= 0xa1 && s[0] <= 0xfe) { if (n < 2) /* Incomplete multibyte sequence */ return (-2); if (s[1] < 0xa1 || s[1] > 0xfe) /* Invalid multibyte sequence */ return (-1); return (2); } else if (s[0] & 0x80) { /* Invalid multibyte sequence */ return (-1); - } + } return (1); } static size_t _GB2312_mbrtowc(wchar_t * __restrict pwc, const char * __restrict s, size_t n, mbstate_t * __restrict ps) { _GB2312State *gs; wchar_t wc; int i, len, ocount; size_t ncopy; gs = (_GB2312State *)ps; if (gs->count < 0 || gs->count > sizeof(gs->bytes)) { errno = EINVAL; return ((size_t)-1); } if (s == NULL) { s = ""; n = 1; pwc = NULL; } ncopy = MIN(MIN(n, MB_CUR_MAX), sizeof(gs->bytes) - gs->count); memcpy(gs->bytes + gs->count, s, ncopy); ocount = gs->count; gs->count += ncopy; s = (char *)gs->bytes; n = gs->count; if ((len = _GB2312_check(s, n)) < 0) return ((size_t)len); wc = 0; i = len; while (i-- > 0) wc = (wc << 8) | (unsigned char)*s++; if (pwc != NULL) *pwc = wc; gs->count = 0; return (wc == L'\0' ? 0 : len - ocount); } static size_t _GB2312_wcrtomb(char * __restrict s, wchar_t wc, mbstate_t * __restrict ps) { _GB2312State *gs; gs = (_GB2312State *)ps; if (gs->count != 0) { errno = EINVAL; return ((size_t)-1); } if (s == NULL) /* Reset to initial shift state (no-op) */ return (1); if (wc & 0x8000) { *s++ = (wc >> 8) & 0xff; *s = wc & 0xff; return (2); } *s = wc & 0xff; return (1); +} + +static size_t +_GB2312_mbsnrtowcs(wchar_t * __restrict dst, + const char ** __restrict src, size_t nms, size_t len, + mbstate_t * __restrict ps) +{ + return (__mbsnrtowcs_std(dst, src, nms, len, ps, _GB2312_mbrtowc)); +} + +static size_t +_GB2312_wcsnrtombs(char * __restrict dst, + const wchar_t ** __restrict src, size_t nwc, size_t len, + mbstate_t * __restrict ps) +{ + return (__wcsnrtombs_std(dst, src, nwc, len, ps, _GB2312_wcrtomb)); } Index: projects/collation/lib/libc/locale/gbk.c =================================================================== --- projects/collation/lib/libc/locale/gbk.c (revision 286458) +++ projects/collation/lib/libc/locale/gbk.c (revision 286459) @@ -1,173 +1,197 @@ /*- + * Copyright 2013 Garrett D'Amore + * Copyright 2010 Nexenta Systems, Inc. All rights reserved. * Copyright (c) 2002-2004 Tim J. Robbins. All rights reserved. * Copyright (c) 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Paul Borman at Krystal Technologies. * * Copyright (c) 2011 The FreeBSD Foundation * All rights reserved. * Portions of this software were developed by David Chisnall * under sponsorship from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include "mblocal.h" extern int __mb_sb_limit; static size_t _GBK_mbrtowc(wchar_t * __restrict, const char * __restrict, size_t, mbstate_t * __restrict); static int _GBK_mbsinit(const mbstate_t *); static size_t _GBK_wcrtomb(char * __restrict, wchar_t, mbstate_t * __restrict); +static size_t _GBK_mbsnrtowcs(wchar_t * __restrict, + const char ** __restrict, size_t, size_t, + mbstate_t * __restrict); +static size_t _GBK_wcsnrtombs(char * __restrict, + const wchar_t ** __restrict, size_t, size_t, + mbstate_t * __restrict); typedef struct { wchar_t ch; } _GBKState; int _GBK_init(struct xlocale_ctype *l, _RuneLocale *rl) { l->__mbrtowc = _GBK_mbrtowc; l->__wcrtomb = _GBK_wcrtomb; l->__mbsinit = _GBK_mbsinit; + l->__mbsnrtowcs = _GBK_mbsnrtowcs; + l->__wcsnrtombs = _GBK_wcsnrtombs; l->runes = rl; l->__mb_cur_max = 2; l->__mb_sb_limit = 128; return (0); } static int _GBK_mbsinit(const mbstate_t *ps) { return (ps == NULL || ((const _GBKState *)ps)->ch == 0); } -static __inline int +static int _gbk_check(u_int c) { c &= 0xff; return ((c >= 0x81 && c <= 0xfe) ? 2 : 1); } static size_t _GBK_mbrtowc(wchar_t * __restrict pwc, const char * __restrict s, size_t n, mbstate_t * __restrict ps) { _GBKState *gs; wchar_t wc; size_t len; gs = (_GBKState *)ps; if ((gs->ch & ~0xFF) != 0) { /* Bad conversion state. */ errno = EINVAL; return ((size_t)-1); } if (s == NULL) { s = ""; n = 1; pwc = NULL; } if (n == 0) /* Incomplete multibyte sequence */ return ((size_t)-2); if (gs->ch != 0) { if (*s == '\0') { errno = EILSEQ; return ((size_t)-1); } wc = (gs->ch << 8) | (*s & 0xFF); if (pwc != NULL) *pwc = wc; gs->ch = 0; return (1); } len = (size_t)_gbk_check(*s); wc = *s++ & 0xff; if (len == 2) { if (n < 2) { /* Incomplete multibyte sequence */ gs->ch = wc; return ((size_t)-2); } if (*s == '\0') { errno = EILSEQ; return ((size_t)-1); } wc = (wc << 8) | (*s++ & 0xff); if (pwc != NULL) *pwc = wc; - return (2); + return (2); } else { if (pwc != NULL) *pwc = wc; return (wc == L'\0' ? 0 : 1); } } static size_t _GBK_wcrtomb(char * __restrict s, wchar_t wc, mbstate_t * __restrict ps) { _GBKState *gs; gs = (_GBKState *)ps; if (gs->ch != 0) { errno = EINVAL; return ((size_t)-1); } if (s == NULL) /* Reset to initial shift state (no-op) */ return (1); if (wc & 0x8000) { *s++ = (wc >> 8) & 0xff; *s = wc & 0xff; return (2); } *s = wc & 0xff; return (1); +} + +static size_t +_GBK_mbsnrtowcs(wchar_t * __restrict dst, const char ** __restrict src, + size_t nms, size_t len, mbstate_t * __restrict ps) +{ + return (__mbsnrtowcs_std(dst, src, nms, len, ps, _GBK_mbrtowc)); +} + +static size_t +_GBK_wcsnrtombs(char * __restrict dst, const wchar_t ** __restrict src, + size_t nwc, size_t len, mbstate_t * __restrict ps) +{ + return (__wcsnrtombs_std(dst, src, nwc, len, ps, _GBK_wcrtomb)); } Index: projects/collation/lib/libc/locale/mblocal.h =================================================================== --- projects/collation/lib/libc/locale/mblocal.h (revision 286458) +++ projects/collation/lib/libc/locale/mblocal.h (revision 286459) @@ -1,79 +1,89 @@ /*- + * Copyright 2013 Garrett D'Amore + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. * Copyright (c) 2004 Tim J. Robbins. * All rights reserved. * * Copyright (c) 2011 The FreeBSD Foundation * All rights reserved. * Portions of this software were developed by David Chisnall * under sponsorship from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _MBLOCAL_H_ #define _MBLOCAL_H_ #include #include "xlocale_private.h" +#define SS2 0x008e +#define SS3 0x008f /* * Conversion function pointers for current encoding. */ struct xlocale_ctype { struct xlocale_component header; _RuneLocale *runes; size_t (*__mbrtowc)(wchar_t * __restrict, const char * __restrict, size_t, mbstate_t * __restrict); int (*__mbsinit)(const mbstate_t *); size_t (*__mbsnrtowcs)(wchar_t * __restrict, const char ** __restrict, size_t, size_t, mbstate_t * __restrict); size_t (*__wcrtomb)(char * __restrict, wchar_t, mbstate_t * __restrict); size_t (*__wcsnrtombs)(char * __restrict, const wchar_t ** __restrict, size_t, size_t, mbstate_t * __restrict); int __mb_cur_max; int __mb_sb_limit; }; #define XLOCALE_CTYPE(x) ((struct xlocale_ctype*)(x)->components[XLC_CTYPE]) extern struct xlocale_ctype __xlocale_global_ctype; /* * Rune initialization function prototypes. */ int _none_init(struct xlocale_ctype *, _RuneLocale *); -int _ascii_init(struct xlocale_ctype *, _RuneLocale *); int _UTF8_init(struct xlocale_ctype *, _RuneLocale *); -int _EUC_init(struct xlocale_ctype *, _RuneLocale *); +int _EUC_CN_init(struct xlocale_ctype *, _RuneLocale *); +int _EUC_JP_init(struct xlocale_ctype *, _RuneLocale *); +int _EUC_KR_init(struct xlocale_ctype *, _RuneLocale *); +int _EUC_TW_init(struct xlocale_ctype *, _RuneLocale *); int _GB18030_init(struct xlocale_ctype *, _RuneLocale *); int _GB2312_init(struct xlocale_ctype *, _RuneLocale *); int _GBK_init(struct xlocale_ctype *, _RuneLocale *); int _BIG5_init(struct xlocale_ctype *, _RuneLocale *); int _MSKanji_init(struct xlocale_ctype *, _RuneLocale *); -extern size_t __mbsnrtowcs_std(wchar_t * __restrict, const char ** __restrict, - size_t, size_t, mbstate_t * __restrict); -extern size_t __wcsnrtombs_std(char * __restrict, const wchar_t ** __restrict, - size_t, size_t, mbstate_t * __restrict); +typedef size_t (*mbrtowc_pfn_t)(wchar_t * __restrict, + const char * __restrict, size_t, mbstate_t * __restrict); +typedef size_t (*wcrtomb_pfn_t)(char * __restrict, wchar_t, + mbstate_t * __restrict); +size_t __mbsnrtowcs_std(wchar_t * __restrict, const char ** __restrict, + size_t, size_t, mbstate_t * __restrict, mbrtowc_pfn_t); +size_t __wcsnrtombs_std(char * __restrict, const wchar_t ** __restrict, + size_t, size_t, mbstate_t * __restrict, wcrtomb_pfn_t); #endif /* _MBLOCAL_H_ */ Index: projects/collation/lib/libc/locale/mbsnrtowcs.c =================================================================== --- projects/collation/lib/libc/locale/mbsnrtowcs.c (revision 286458) +++ projects/collation/lib/libc/locale/mbsnrtowcs.c (revision 286459) @@ -1,102 +1,104 @@ /*- + * Copyright 2013 Garrett D'Amore + * Copyright 2010 Nexenta Systems, Inc. All rights reserved. * Copyright (c) 2002-2004 Tim J. Robbins. * * Copyright (c) 2011 The FreeBSD Foundation * All rights reserved. * Portions of this software were developed by David Chisnall * under sponsorship from the FreeBSD Foundation. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include "mblocal.h" size_t mbsnrtowcs_l(wchar_t * __restrict dst, const char ** __restrict src, size_t nms, size_t len, mbstate_t * __restrict ps, locale_t locale) { FIX_LOCALE(locale); if (ps == NULL) ps = &locale->mbsnrtowcs; return (XLOCALE_CTYPE(locale)->__mbsnrtowcs(dst, src, nms, len, ps)); } size_t mbsnrtowcs(wchar_t * __restrict dst, const char ** __restrict src, size_t nms, size_t len, mbstate_t * __restrict ps) { return mbsnrtowcs_l(dst, src, nms, len, ps, __get_locale()); } size_t __mbsnrtowcs_std(wchar_t * __restrict dst, const char ** __restrict src, - size_t nms, size_t len, mbstate_t * __restrict ps) + size_t nms, size_t len, mbstate_t * __restrict ps, + mbrtowc_pfn_t pmbrtowc) { const char *s; size_t nchr; wchar_t wc; size_t nb; - struct xlocale_ctype *ct = XLOCALE_CTYPE(__get_locale()); s = *src; nchr = 0; if (dst == NULL) { for (;;) { - if ((nb = ct->__mbrtowc(&wc, s, nms, ps)) == (size_t)-1) + if ((nb = pmbrtowc(&wc, s, nms, ps)) == (size_t)-1) /* Invalid sequence - mbrtowc() sets errno. */ return ((size_t)-1); else if (nb == 0 || nb == (size_t)-2) return (nchr); s += nb; nms -= nb; nchr++; } /*NOTREACHED*/ } while (len-- > 0) { - if ((nb = ct->__mbrtowc(dst, s, nms, ps)) == (size_t)-1) { + if ((nb = pmbrtowc(dst, s, nms, ps)) == (size_t)-1) { *src = s; return ((size_t)-1); } else if (nb == (size_t)-2) { *src = s + nms; return (nchr); } else if (nb == 0) { *src = NULL; return (nchr); } s += nb; nms -= nb; nchr++; dst++; } *src = s; return (nchr); } Index: projects/collation/lib/libc/locale/mskanji.c =================================================================== --- projects/collation/lib/libc/locale/mskanji.c (revision 286458) +++ projects/collation/lib/libc/locale/mskanji.c (revision 286459) @@ -1,165 +1,191 @@ /* + * Copyright 2013 Garrett D'Amore + * Copyright 2010 Nexenta Systems, Inc. All rights reserved. * Copyright (c) 2002-2004 Tim J. Robbins. All rights reserved. * * ja_JP.SJIS locale table for BSD4.4/rune * version 1.0 * (C) Sin'ichiro MIYATANI / Phase One, Inc * May 12, 1995 * * Copyright (c) 2011 The FreeBSD Foundation * All rights reserved. * Portions of this software were developed by David Chisnall * under sponsorship from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by Phase One, Inc. * 4. The name of Phase One, Inc. may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. */ #if defined(LIBC_SCCS) && !defined(lint) static char sccsid[] = "@(#)mskanji.c 1.0 (Phase One) 5/5/95"; #endif /* LIBC_SCCS and not lint */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include "mblocal.h" extern int __mb_sb_limit; static size_t _MSKanji_mbrtowc(wchar_t * __restrict, const char * __restrict, size_t, mbstate_t * __restrict); static int _MSKanji_mbsinit(const mbstate_t *); static size_t _MSKanji_wcrtomb(char * __restrict, wchar_t, mbstate_t * __restrict); +static size_t _MSKanji_mbsnrtowcs(wchar_t * __restrict, + const char ** __restrict, size_t, size_t, + mbstate_t * __restrict); +static size_t _MSKanji_wcsnrtombs(char * __restrict, + const wchar_t ** __restrict, size_t, size_t, + mbstate_t * __restrict); typedef struct { wchar_t ch; } _MSKanjiState; int _MSKanji_init(struct xlocale_ctype *l, _RuneLocale *rl) { l->__mbrtowc = _MSKanji_mbrtowc; l->__wcrtomb = _MSKanji_wcrtomb; + l->__mbsnrtowcs = _MSKanji_mbsnrtowcs; + l->__wcsnrtombs = _MSKanji_wcsnrtombs; l->__mbsinit = _MSKanji_mbsinit; l->runes = rl; l->__mb_cur_max = 2; l->__mb_sb_limit = 256; return (0); } static int _MSKanji_mbsinit(const mbstate_t *ps) { return (ps == NULL || ((const _MSKanjiState *)ps)->ch == 0); } static size_t _MSKanji_mbrtowc(wchar_t * __restrict pwc, const char * __restrict s, size_t n, mbstate_t * __restrict ps) { _MSKanjiState *ms; wchar_t wc; ms = (_MSKanjiState *)ps; if ((ms->ch & ~0xFF) != 0) { /* Bad conversion state. */ errno = EINVAL; return ((size_t)-1); } if (s == NULL) { s = ""; n = 1; pwc = NULL; } if (n == 0) /* Incomplete multibyte sequence */ return ((size_t)-2); if (ms->ch != 0) { if (*s == '\0') { errno = EILSEQ; return ((size_t)-1); } wc = (ms->ch << 8) | (*s & 0xFF); if (pwc != NULL) *pwc = wc; ms->ch = 0; return (1); } wc = *s++ & 0xff; if ((wc > 0x80 && wc < 0xa0) || (wc >= 0xe0 && wc < 0xfd)) { if (n < 2) { /* Incomplete multibyte sequence */ ms->ch = wc; return ((size_t)-2); } if (*s == '\0') { errno = EILSEQ; return ((size_t)-1); } wc = (wc << 8) | (*s++ & 0xff); if (pwc != NULL) *pwc = wc; return (2); } else { if (pwc != NULL) *pwc = wc; return (wc == L'\0' ? 0 : 1); } } static size_t _MSKanji_wcrtomb(char * __restrict s, wchar_t wc, mbstate_t * __restrict ps) { _MSKanjiState *ms; int len, i; ms = (_MSKanjiState *)ps; if (ms->ch != 0) { errno = EINVAL; return ((size_t)-1); } if (s == NULL) /* Reset to initial shift state (no-op) */ return (1); len = (wc > 0x100) ? 2 : 1; for (i = len; i-- > 0; ) *s++ = wc >> (i << 3); return (len); +} + +static size_t +_MSKanji_mbsnrtowcs(wchar_t * __restrict dst, + const char ** __restrict src, size_t nms, + size_t len, mbstate_t * __restrict ps) +{ + return (__mbsnrtowcs_std(dst, src, nms, len, ps, _MSKanji_mbrtowc)); +} + +static size_t +_MSKanji_wcsnrtombs(char * __restrict dst, + const wchar_t ** __restrict src, size_t nwc, + size_t len, mbstate_t * __restrict ps) +{ + return (__wcsnrtombs_std(dst, src, nwc, len, ps, _MSKanji_wcrtomb)); } Index: projects/collation/lib/libc/locale/none.c =================================================================== --- projects/collation/lib/libc/locale/none.c (revision 286458) +++ projects/collation/lib/libc/locale/none.c (revision 286459) @@ -1,222 +1,214 @@ /*- + * Copyright 2013 Garrett D'Amore + * Copyright 2010 Nexenta Systems, Inc. All rights reserved. * Copyright (c) 2002-2004 Tim J. Robbins. All rights reserved. * Copyright (c) 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Paul Borman at Krystal Technologies. * * Copyright (c) 2011 The FreeBSD Foundation * All rights reserved. * Portions of this software were developed by David Chisnall * under sponsorship from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #if defined(LIBC_SCCS) && !defined(lint) static char sccsid[] = "@(#)none.c 8.1 (Berkeley) 6/4/93"; #endif /* LIBC_SCCS and not lint */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include "mblocal.h" static size_t _none_mbrtowc(wchar_t * __restrict, const char * __restrict, size_t, mbstate_t * __restrict); static int _none_mbsinit(const mbstate_t *); static size_t _none_mbsnrtowcs(wchar_t * __restrict dst, const char ** __restrict src, size_t nms, size_t len, mbstate_t * __restrict ps __unused); static size_t _none_wcrtomb(char * __restrict, wchar_t, mbstate_t * __restrict); static size_t _none_wcsnrtombs(char * __restrict, const wchar_t ** __restrict, size_t, size_t, mbstate_t * __restrict); /* setup defaults */ int __mb_cur_max = 1; int __mb_sb_limit = 256; /* Expected to be <= _CACHED_RUNES */ int _none_init(struct xlocale_ctype *l, _RuneLocale *rl) { l->__mbrtowc = _none_mbrtowc; l->__mbsinit = _none_mbsinit; l->__mbsnrtowcs = _none_mbsnrtowcs; l->__wcrtomb = _none_wcrtomb; l->__wcsnrtombs = _none_wcsnrtombs; l->runes = rl; l->__mb_cur_max = 1; l->__mb_sb_limit = 256; return(0); } static int _none_mbsinit(const mbstate_t *ps __unused) { /* * Encoding is not state dependent - we are always in the * initial state. */ return (1); } static size_t _none_mbrtowc(wchar_t * __restrict pwc, const char * __restrict s, size_t n, mbstate_t * __restrict ps __unused) { if (s == NULL) /* Reset to initial shift state (no-op) */ return (0); if (n == 0) /* Incomplete multibyte sequence */ return ((size_t)-2); if (pwc != NULL) *pwc = (unsigned char)*s; return (*s == '\0' ? 0 : 1); } static size_t _none_wcrtomb(char * __restrict s, wchar_t wc, mbstate_t * __restrict ps __unused) { if (s == NULL) /* Reset to initial shift state (no-op) */ return (1); if (wc < 0 || wc > UCHAR_MAX) { errno = EILSEQ; return ((size_t)-1); } *s = (unsigned char)wc; return (1); } static size_t _none_mbsnrtowcs(wchar_t * __restrict dst, const char ** __restrict src, size_t nms, size_t len, mbstate_t * __restrict ps __unused) { const char *s; size_t nchr; if (dst == NULL) { s = memchr(*src, '\0', nms); return (s != NULL ? s - *src : nms); } s = *src; nchr = 0; while (len-- > 0 && nms-- > 0) { if ((*dst++ = (unsigned char)*s++) == L'\0') { *src = NULL; return (nchr); } nchr++; } *src = s; return (nchr); } static size_t _none_wcsnrtombs(char * __restrict dst, const wchar_t ** __restrict src, size_t nwc, size_t len, mbstate_t * __restrict ps __unused) { const wchar_t *s; size_t nchr; if (dst == NULL) { for (s = *src; nwc > 0 && *s != L'\0'; s++, nwc--) { if (*s < 0 || *s > UCHAR_MAX) { errno = EILSEQ; return ((size_t)-1); } } return (s - *src); } s = *src; nchr = 0; while (len-- > 0 && nwc-- > 0) { if (*s < 0 || *s > UCHAR_MAX) { errno = EILSEQ; return ((size_t)-1); } if ((*dst++ = *s++) == '\0') { *src = NULL; return (nchr); } nchr++; } *src = s; return (nchr); } /* setup defaults */ - -size_t (*__mbrtowc)(wchar_t * __restrict, const char * __restrict, size_t, - mbstate_t * __restrict) = _none_mbrtowc; -int (*__mbsinit)(const mbstate_t *) = _none_mbsinit; -size_t (*__mbsnrtowcs)(wchar_t * __restrict, const char ** __restrict, - size_t, size_t, mbstate_t * __restrict) = _none_mbsnrtowcs; -size_t (*__wcrtomb)(char * __restrict, wchar_t, mbstate_t * __restrict) = - _none_wcrtomb; -size_t (*__wcsnrtombs)(char * __restrict, const wchar_t ** __restrict, - size_t, size_t, mbstate_t * __restrict) = _none_wcsnrtombs; struct xlocale_ctype __xlocale_global_ctype = { {{0}, "C"}, (_RuneLocale*)&_DefaultRuneLocale, _none_mbrtowc, _none_mbsinit, _none_mbsnrtowcs, _none_wcrtomb, _none_wcsnrtombs, 1, /* __mb_cur_max, */ 256 /* __mb_sb_limit */ }; struct xlocale_ctype __xlocale_C_ctype = { {{0}, "C"}, (_RuneLocale*)&_DefaultRuneLocale, _none_mbrtowc, _none_mbsinit, _none_mbsnrtowcs, _none_wcrtomb, _none_wcsnrtombs, 1, /* __mb_cur_max, */ 256 /* __mb_sb_limit */ }; Index: projects/collation/lib/libc/locale/rune.c =================================================================== --- projects/collation/lib/libc/locale/rune.c (revision 286458) +++ projects/collation/lib/libc/locale/rune.c (revision 286459) @@ -1,286 +1,251 @@ /*- + * Copyright 2014 Garrett D'Amore + * Copyright 2010 Nexenta Systems, Inc. All rights reserved. * Copyright (c) 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Paul Borman at Krystal Technologies. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #if defined(LIBC_SCCS) && !defined(lint) static char sccsid[] = "@(#)rune.c 8.1 (Berkeley) 6/4/93"; #endif /* LIBC_SCCS and not lint */ #include __FBSDID("$FreeBSD$"); #include "namespace.h" #include #include #include #include #include #include #include #include +#include +#include +#include #include "un-namespace.h" #include "runefile.h" -_RuneLocale *_Read_RuneMagi(FILE *); - _RuneLocale * -_Read_RuneMagi(FILE *fp) +_Read_RuneMagi(const char *fname) { char *fdata, *data; void *lastp; _FileRuneLocale *frl; _RuneLocale *rl; _FileRuneEntry *frr; _RuneEntry *rr; struct stat sb; int x, saverr; void *variable; _FileRuneEntry *runetype_ext_ranges; _FileRuneEntry *maplower_ext_ranges; _FileRuneEntry *mapupper_ext_ranges; int runetype_ext_len = 0; + int fd; - if (_fstat(fileno(fp), &sb) < 0) + if ((fd = _open(fname, O_RDONLY)) < 0) { + errno = EINVAL; return (NULL); + } - if ((size_t)sb.st_size < sizeof(_FileRuneLocale)) { - errno = EFTYPE; + if (_fstat(fd, &sb) < 0) { + (void) _close(fd); + errno = EINVAL; return (NULL); } - if ((fdata = malloc(sb.st_size)) == NULL) + if ((size_t)sb.st_size < sizeof (_FileRuneLocale)) { + (void) _close(fd); + errno = EINVAL; return (NULL); - - errno = 0; - rewind(fp); /* Someone might have read the magic number once already */ - if (errno) { - saverr = errno; - free(fdata); - errno = saverr; - return (NULL); } - if (fread(fdata, sb.st_size, 1, fp) != 1) { - saverr = errno; - free(fdata); - errno = saverr; + + fdata = mmap(NULL, sb.st_size, PROT_READ, MAP_PRIVATE, fd, 0); + (void) _close(fd); + if (fdata == NULL) { + errno = EINVAL; return (NULL); } - frl = (_FileRuneLocale *)fdata; + frl = (_FileRuneLocale *)(void *)fdata; lastp = fdata + sb.st_size; variable = frl + 1; - if (memcmp(frl->magic, _FILE_RUNE_MAGIC_1, sizeof(frl->magic))) { - free(fdata); - errno = EFTYPE; - return (NULL); + if (memcmp(frl->magic, _FILE_RUNE_MAGIC_1, sizeof (frl->magic))) { + goto invalid; } - frl->variable_len = ntohl(frl->variable_len); - frl->runetype_ext_nranges = ntohl(frl->runetype_ext_nranges); - frl->maplower_ext_nranges = ntohl(frl->maplower_ext_nranges); - frl->mapupper_ext_nranges = ntohl(frl->mapupper_ext_nranges); - - for (x = 0; x < _CACHED_RUNES; ++x) { - frl->runetype[x] = ntohl(frl->runetype[x]); - frl->maplower[x] = ntohl(frl->maplower[x]); - frl->mapupper[x] = ntohl(frl->mapupper[x]); - } - runetype_ext_ranges = (_FileRuneEntry *)variable; variable = runetype_ext_ranges + frl->runetype_ext_nranges; if (variable > lastp) { - free(fdata); - errno = EFTYPE; - return (NULL); + goto invalid; } maplower_ext_ranges = (_FileRuneEntry *)variable; variable = maplower_ext_ranges + frl->maplower_ext_nranges; if (variable > lastp) { - free(fdata); - errno = EFTYPE; - return (NULL); + goto invalid; } mapupper_ext_ranges = (_FileRuneEntry *)variable; variable = mapupper_ext_ranges + frl->mapupper_ext_nranges; if (variable > lastp) { - free(fdata); - errno = EFTYPE; - return (NULL); + goto invalid; } frr = runetype_ext_ranges; for (x = 0; x < frl->runetype_ext_nranges; ++x) { uint32_t *types; - frr[x].min = ntohl(frr[x].min); - frr[x].max = ntohl(frr[x].max); - frr[x].map = ntohl(frr[x].map); if (frr[x].map == 0) { int len = frr[x].max - frr[x].min + 1; types = variable; variable = types + len; runetype_ext_len += len; if (variable > lastp) { - free(fdata); - errno = EFTYPE; - return (NULL); + goto invalid; } - while (len-- > 0) - types[len] = ntohl(types[len]); } } - frr = maplower_ext_ranges; - for (x = 0; x < frl->maplower_ext_nranges; ++x) { - frr[x].min = ntohl(frr[x].min); - frr[x].max = ntohl(frr[x].max); - frr[x].map = ntohl(frr[x].map); - } - - frr = mapupper_ext_ranges; - for (x = 0; x < frl->mapupper_ext_nranges; ++x) { - frr[x].min = ntohl(frr[x].min); - frr[x].max = ntohl(frr[x].max); - frr[x].map = ntohl(frr[x].map); - } if ((char *)variable + frl->variable_len > (char *)lastp) { - free(fdata); - errno = EFTYPE; - return (NULL); + goto invalid; } /* * Convert from disk format to host format. */ data = malloc(sizeof(_RuneLocale) + (frl->runetype_ext_nranges + frl->maplower_ext_nranges + frl->mapupper_ext_nranges) * sizeof(_RuneEntry) + runetype_ext_len * sizeof(*rr->__types) + frl->variable_len); if (data == NULL) { saverr = errno; - free(fdata); + munmap(fdata, sb.st_size); errno = saverr; return (NULL); } rl = (_RuneLocale *)data; rl->__variable = rl + 1; memcpy(rl->__magic, _RUNE_MAGIC_1, sizeof(rl->__magic)); memcpy(rl->__encoding, frl->encoding, sizeof(rl->__encoding)); - rl->__invalid_rune = 0; rl->__variable_len = frl->variable_len; rl->__runetype_ext.__nranges = frl->runetype_ext_nranges; rl->__maplower_ext.__nranges = frl->maplower_ext_nranges; rl->__mapupper_ext.__nranges = frl->mapupper_ext_nranges; for (x = 0; x < _CACHED_RUNES; ++x) { rl->__runetype[x] = frl->runetype[x]; rl->__maplower[x] = frl->maplower[x]; rl->__mapupper[x] = frl->mapupper[x]; } rl->__runetype_ext.__ranges = (_RuneEntry *)rl->__variable; rl->__variable = rl->__runetype_ext.__ranges + rl->__runetype_ext.__nranges; rl->__maplower_ext.__ranges = (_RuneEntry *)rl->__variable; rl->__variable = rl->__maplower_ext.__ranges + rl->__maplower_ext.__nranges; rl->__mapupper_ext.__ranges = (_RuneEntry *)rl->__variable; rl->__variable = rl->__mapupper_ext.__ranges + rl->__mapupper_ext.__nranges; variable = mapupper_ext_ranges + frl->mapupper_ext_nranges; frr = runetype_ext_ranges; rr = rl->__runetype_ext.__ranges; for (x = 0; x < rl->__runetype_ext.__nranges; ++x) { uint32_t *types; rr[x].__min = frr[x].min; rr[x].__max = frr[x].max; rr[x].__map = frr[x].map; if (rr[x].__map == 0) { int len = rr[x].__max - rr[x].__min + 1; types = variable; variable = types + len; rr[x].__types = rl->__variable; rl->__variable = rr[x].__types + len; while (len-- > 0) rr[x].__types[len] = types[len]; } else rr[x].__types = NULL; } frr = maplower_ext_ranges; rr = rl->__maplower_ext.__ranges; for (x = 0; x < rl->__maplower_ext.__nranges; ++x) { rr[x].__min = frr[x].min; rr[x].__max = frr[x].max; rr[x].__map = frr[x].map; } frr = mapupper_ext_ranges; rr = rl->__mapupper_ext.__ranges; for (x = 0; x < rl->__mapupper_ext.__nranges; ++x) { rr[x].__min = frr[x].min; rr[x].__max = frr[x].max; rr[x].__map = frr[x].map; } memcpy(rl->__variable, variable, rl->__variable_len); - free(fdata); + munmap(fdata, sb.st_size); /* * Go out and zero pointers that should be zero. */ if (!rl->__variable_len) rl->__variable = NULL; if (!rl->__runetype_ext.__nranges) rl->__runetype_ext.__ranges = NULL; if (!rl->__maplower_ext.__nranges) rl->__maplower_ext.__ranges = NULL; if (!rl->__mapupper_ext.__nranges) rl->__mapupper_ext.__ranges = NULL; return (rl); + +invalid: + munmap(fdata, sb.st_size); + errno = EINVAL; + return (NULL); } Index: projects/collation/lib/libc/locale/setrunelocale.c =================================================================== --- projects/collation/lib/libc/locale/setrunelocale.c (revision 286458) +++ projects/collation/lib/libc/locale/setrunelocale.c (revision 286459) @@ -1,219 +1,208 @@ /*- * Copyright (c) 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Paul Borman at Krystal Technologies. * * Copyright (c) 2011 The FreeBSD Foundation * All rights reserved. * Portions of this software were developed by David Chisnall * under sponsorship from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #define __RUNETYPE_INTERNAL 1 #include #include #include #include #include #include #include #include #include "ldpart.h" #include "mblocal.h" #include "setlocale.h" #undef _CurrentRuneLocale extern _RuneLocale const *_CurrentRuneLocale; #ifndef __NO_TLS /* * A cached version of the runes for this thread. Used by ctype.h */ _Thread_local const _RuneLocale *_ThreadRuneLocale; #endif extern int __mb_sb_limit; -extern _RuneLocale *_Read_RuneMagi(FILE *); +extern _RuneLocale *_Read_RuneMagi(const char *); static int __setrunelocale(struct xlocale_ctype *l, const char *); static void destruct_ctype(void *v) { struct xlocale_ctype *l = v; - if (strcmp(l->runes->__encoding, "EUC") == 0) - free(l->runes->__variable); if (&_DefaultRuneLocale != l->runes) free(l->runes); free(l); } const _RuneLocale * __getCurrentRuneLocale(void) { return XLOCALE_CTYPE(__get_locale())->runes; } static void free_runes(_RuneLocale *rl) { - - /* FIXME: The "EUC" check here is a hideous abstraction violation. */ if ((rl != &_DefaultRuneLocale) && (rl)) { - if (strcmp(rl->__encoding, "EUC") == 0) { - free(rl->__variable); - } free(rl); } } static int __setrunelocale(struct xlocale_ctype *l, const char *encoding) { - FILE *fp; - char name[PATH_MAX]; _RuneLocale *rl; - int saverr, ret; + int ret; + char path[PATH_MAX]; struct xlocale_ctype saved = *l; /* * The "C" and "POSIX" locale are always here. */ if (strcmp(encoding, "C") == 0 || strcmp(encoding, "POSIX") == 0) { free_runes(saved.runes); (void) _none_init(l, (_RuneLocale*)&_DefaultRuneLocale); return (0); } /* Range checking not needed, encoding length already checked before */ - (void) strcpy(name, _PathLocale); - (void) strcat(name, "/"); - (void) strcat(name, encoding); - (void) strcat(name, "/LC_CTYPE"); + (void) snprintf(path, sizeof (path), "%s/%s/LC_CTYPE", + _PathLocale, encoding); - if ((fp = fopen(name, "re")) == NULL) - return (errno == 0 ? ENOENT : errno); - - if ((rl = _Read_RuneMagi(fp)) == NULL) { - saverr = (errno == 0 ? EFTYPE : errno); - (void)fclose(fp); - return (saverr); + if ((rl = _Read_RuneMagi(path)) == NULL) { + errno = EINVAL; + return (errno); } - (void)fclose(fp); l->__mbrtowc = NULL; l->__mbsinit = NULL; - l->__mbsnrtowcs = __mbsnrtowcs_std; + l->__mbsnrtowcs = NULL; l->__wcrtomb = NULL; - l->__wcsnrtombs = __wcsnrtombs_std; + l->__wcsnrtombs = NULL; rl->__sputrune = NULL; rl->__sgetrune = NULL; if (strcmp(rl->__encoding, "NONE") == 0) ret = _none_init(l, rl); - else if (strcmp(rl->__encoding, "ASCII") == 0) - ret = _ascii_init(l, rl); else if (strcmp(rl->__encoding, "UTF-8") == 0) ret = _UTF8_init(l, rl); - else if (strcmp(rl->__encoding, "EUC") == 0) - ret = _EUC_init(l, rl); + else if (strcmp(rl->__encoding, "EUC-CN") == 0) + ret = _EUC_CN_init(l, rl); + else if (strcmp(rl->__encoding, "EUC-JP") == 0) + ret = _EUC_JP_init(l, rl); + else if (strcmp(rl->__encoding, "EUC-KR") == 0) + ret = _EUC_KR_init(l, rl); + else if (strcmp(rl->__encoding, "EUC-TW") == 0) + ret = _EUC_TW_init(l, rl); else if (strcmp(rl->__encoding, "GB18030") == 0) ret = _GB18030_init(l, rl); else if (strcmp(rl->__encoding, "GB2312") == 0) ret = _GB2312_init(l, rl); else if (strcmp(rl->__encoding, "GBK") == 0) ret = _GBK_init(l, rl); else if (strcmp(rl->__encoding, "BIG5") == 0) ret = _BIG5_init(l, rl); else if (strcmp(rl->__encoding, "MSKanji") == 0) ret = _MSKanji_init(l, rl); else ret = EFTYPE; if (ret == 0) { /* Free the old runes if it exists. */ free_runes(saved.runes); } else { /* Restore the saved version if this failed. */ memcpy(l, &saved, sizeof(struct xlocale_ctype)); free(rl); } return (ret); } int __wrap_setrunelocale(const char *locale) { int ret = __setrunelocale(&__xlocale_global_ctype, locale); if (ret != 0) { errno = ret; return (_LDP_ERROR); } __mb_cur_max = __xlocale_global_ctype.__mb_cur_max; __mb_sb_limit = __xlocale_global_ctype.__mb_sb_limit; _CurrentRuneLocale = __xlocale_global_ctype.runes; return (_LDP_LOADED); } #ifndef __NO_TLS void __set_thread_rune_locale(locale_t loc) { if (loc == NULL) { _ThreadRuneLocale = &_DefaultRuneLocale; } else if (loc == LC_GLOBAL_LOCALE) { _ThreadRuneLocale = 0; } else { _ThreadRuneLocale = XLOCALE_CTYPE(loc)->runes; } } #endif void * -__ctype_load(const char *locale, locale_t unused) +__ctype_load(const char *locale, locale_t unused __unused) { struct xlocale_ctype *l = calloc(sizeof(struct xlocale_ctype), 1); l->header.header.destructor = destruct_ctype; if (__setrunelocale(l, locale)) { free(l); return NULL; } return l; } Index: projects/collation/lib/libc/locale/utf8.c =================================================================== --- projects/collation/lib/libc/locale/utf8.c (revision 286458) +++ projects/collation/lib/libc/locale/utf8.c (revision 286459) @@ -1,432 +1,434 @@ /*- + * Copyright 2013 Garrett D'Amore * Copyright 2011 Nexenta Systems, Inc. All rights reserved. * Copyright (c) 2002-2004 Tim J. Robbins * All rights reserved. * * Copyright (c) 2011 The FreeBSD Foundation * All rights reserved. * Portions of this software were developed by David Chisnall * under sponsorship from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include "mblocal.h" extern int __mb_sb_limit; static size_t _UTF8_mbrtowc(wchar_t * __restrict, const char * __restrict, size_t, mbstate_t * __restrict); static int _UTF8_mbsinit(const mbstate_t *); static size_t _UTF8_mbsnrtowcs(wchar_t * __restrict, const char ** __restrict, size_t, size_t, mbstate_t * __restrict); static size_t _UTF8_wcrtomb(char * __restrict, wchar_t, mbstate_t * __restrict); static size_t _UTF8_wcsnrtombs(char * __restrict, const wchar_t ** __restrict, size_t, size_t, mbstate_t * __restrict); typedef struct { wchar_t ch; int want; wchar_t lbound; } _UTF8State; int _UTF8_init(struct xlocale_ctype *l, _RuneLocale *rl) { l->__mbrtowc = _UTF8_mbrtowc; l->__wcrtomb = _UTF8_wcrtomb; l->__mbsinit = _UTF8_mbsinit; l->__mbsnrtowcs = _UTF8_mbsnrtowcs; l->__wcsnrtombs = _UTF8_wcsnrtombs; l->runes = rl; - l->__mb_cur_max = 6; + l->__mb_cur_max = 4; /* * UCS-4 encoding used as the internal representation, so * slots 0x0080-0x00FF are occuped and must be excluded * from the single byte ctype by setting the limit. */ l->__mb_sb_limit = 128; return (0); } static int _UTF8_mbsinit(const mbstate_t *ps) { return (ps == NULL || ((const _UTF8State *)ps)->want == 0); } static size_t _UTF8_mbrtowc(wchar_t * __restrict pwc, const char * __restrict s, size_t n, mbstate_t * __restrict ps) { _UTF8State *us; int ch, i, mask, want; wchar_t lbound, wch; us = (_UTF8State *)ps; if (us->want < 0 || us->want > 6) { errno = EINVAL; return ((size_t)-1); } if (s == NULL) { s = ""; n = 1; pwc = NULL; } if (n == 0) /* Incomplete multibyte sequence */ return ((size_t)-2); if (us->want == 0) { /* * Determine the number of octets that make up this character * from the first octet, and a mask that extracts the * interesting bits of the first octet. We already know * the character is at least two bytes long. * * We also specify a lower bound for the character code to * detect redundant, non-"shortest form" encodings. For * example, the sequence C0 80 is _not_ a legal representation * of the null character. This enforces a 1-to-1 mapping * between character codes and their multibyte representations. */ ch = (unsigned char)*s; if ((ch & 0x80) == 0) { /* Fast path for plain ASCII characters. */ if (pwc != NULL) *pwc = ch; return (ch != '\0' ? 1 : 0); } if ((ch & 0xe0) == 0xc0) { mask = 0x1f; want = 2; lbound = 0x80; } else if ((ch & 0xf0) == 0xe0) { mask = 0x0f; want = 3; lbound = 0x800; } else if ((ch & 0xf8) == 0xf0) { mask = 0x07; want = 4; lbound = 0x10000; +#if 0 + /* These would be illegal in the UTF-8 space */ + } else if ((ch & 0xfc) == 0xf8) { mask = 0x03; want = 5; lbound = 0x200000; } else if ((ch & 0xfe) == 0xfc) { mask = 0x01; want = 6; lbound = 0x4000000; +#endif } else { /* * Malformed input; input is not UTF-8. */ errno = EILSEQ; return ((size_t)-1); } } else { want = us->want; lbound = us->lbound; } /* * Decode the octet sequence representing the character in chunks * of 6 bits, most significant first. */ if (us->want == 0) wch = (unsigned char)*s++ & mask; else wch = us->ch; + for (i = (us->want == 0) ? 1 : 0; i < MIN(want, n); i++) { if ((*s & 0xc0) != 0x80) { /* * Malformed input; bad characters in the middle * of a character. */ errno = EILSEQ; return ((size_t)-1); } wch <<= 6; wch |= *s++ & 0x3f; } if (i < want) { /* Incomplete multibyte sequence. */ us->want = want - i; us->lbound = lbound; us->ch = wch; return ((size_t)-2); } if (wch < lbound) { /* * Malformed input; redundant encoding. */ errno = EILSEQ; return ((size_t)-1); } - if (wch >= 0xd800 && wch <= 0xdfff) { - /* - * Malformed input; invalid code points. - */ - errno = EILSEQ; - return ((size_t)-1); - } if (pwc != NULL) *pwc = wch; us->want = 0; return (wch == L'\0' ? 0 : want); } static size_t _UTF8_mbsnrtowcs(wchar_t * __restrict dst, const char ** __restrict src, size_t nms, size_t len, mbstate_t * __restrict ps) { _UTF8State *us; const char *s; size_t nchr; wchar_t wc; size_t nb; us = (_UTF8State *)ps; s = *src; nchr = 0; if (dst == NULL) { /* * The fast path in the loop below is not safe if an ASCII * character appears as anything but the first byte of a * multibyte sequence. Check now to avoid doing it in the loop. */ if (nms > 0 && us->want > 0 && (signed char)*s > 0) { errno = EILSEQ; return ((size_t)-1); } for (;;) { if (nms > 0 && (signed char)*s > 0) /* * Fast path for plain ASCII characters * excluding NUL. */ nb = 1; else if ((nb = _UTF8_mbrtowc(&wc, s, nms, ps)) == (size_t)-1) /* Invalid sequence - mbrtowc() sets errno. */ return ((size_t)-1); else if (nb == 0 || nb == (size_t)-2) return (nchr); s += nb; nms -= nb; nchr++; } /*NOTREACHED*/ } /* * The fast path in the loop below is not safe if an ASCII * character appears as anything but the first byte of a * multibyte sequence. Check now to avoid doing it in the loop. */ if (nms > 0 && len > 0 && us->want > 0 && (signed char)*s > 0) { errno = EILSEQ; return ((size_t)-1); } while (len-- > 0) { if (nms > 0 && (signed char)*s > 0) { /* * Fast path for plain ASCII characters * excluding NUL. */ *dst = (wchar_t)*s; nb = 1; } else if ((nb = _UTF8_mbrtowc(dst, s, nms, ps)) == (size_t)-1) { *src = s; return ((size_t)-1); } else if (nb == (size_t)-2) { *src = s + nms; return (nchr); } else if (nb == 0) { *src = NULL; return (nchr); } s += nb; nms -= nb; nchr++; dst++; } *src = s; return (nchr); } static size_t _UTF8_wcrtomb(char * __restrict s, wchar_t wc, mbstate_t * __restrict ps) { _UTF8State *us; unsigned char lead; int i, len; us = (_UTF8State *)ps; if (us->want != 0) { errno = EINVAL; return ((size_t)-1); } if (s == NULL) /* Reset to initial shift state (no-op) */ return (1); /* * Determine the number of octets needed to represent this character. * We always output the shortest sequence possible. Also specify the * first few bits of the first octet, which contains the information * about the sequence length. */ if ((wc & ~0x7f) == 0) { /* Fast path for plain ASCII characters. */ *s = (char)wc; return (1); } else if ((wc & ~0x7ff) == 0) { lead = 0xc0; len = 2; } else if ((wc & ~0xffff) == 0) { lead = 0xe0; len = 3; } else if ((wc & ~0x1fffff) == 0) { lead = 0xf0; len = 4; +#if 0 + /* Again, 5 and 6 byte encodings are simply not permitted */ } else if ((wc & ~0x3ffffff) == 0) { lead = 0xf8; len = 5; } else if ((wc & ~0x7fffffff) == 0) { lead = 0xfc; len = 6; +#endif } else { errno = EILSEQ; return ((size_t)-1); } /* * Output the octets representing the character in chunks * of 6 bits, least significant last. The first octet is * a special case because it contains the sequence length * information. */ for (i = len - 1; i > 0; i--) { s[i] = (wc & 0x3f) | 0x80; wc >>= 6; } *s = (wc & 0xff) | lead; return (len); } static size_t _UTF8_wcsnrtombs(char * __restrict dst, const wchar_t ** __restrict src, size_t nwc, size_t len, mbstate_t * __restrict ps) { _UTF8State *us; char buf[MB_LEN_MAX]; const wchar_t *s; size_t nbytes; size_t nb; us = (_UTF8State *)ps; if (us->want != 0) { errno = EINVAL; return ((size_t)-1); } s = *src; nbytes = 0; if (dst == NULL) { while (nwc-- > 0) { if (0 <= *s && *s < 0x80) /* Fast path for plain ASCII characters. */ nb = 1; else if ((nb = _UTF8_wcrtomb(buf, *s, ps)) == (size_t)-1) /* Invalid character - wcrtomb() sets errno. */ return ((size_t)-1); if (*s == L'\0') return (nbytes + nb - 1); s++; nbytes += nb; } return (nbytes); } while (len > 0 && nwc-- > 0) { if (0 <= *s && *s < 0x80) { /* Fast path for plain ASCII characters. */ nb = 1; *dst = *s; } else if (len > (size_t)MB_CUR_MAX) { /* Enough space to translate in-place. */ if ((nb = _UTF8_wcrtomb(dst, *s, ps)) == (size_t)-1) { *src = s; return ((size_t)-1); } } else { /* * May not be enough space; use temp. buffer. */ if ((nb = _UTF8_wcrtomb(buf, *s, ps)) == (size_t)-1) { *src = s; return ((size_t)-1); } if (nb > (int)len) /* MB sequence for character won't fit. */ break; memcpy(dst, buf, nb); } if (*s == L'\0') { *src = NULL; return (nbytes + nb - 1); } s++; dst += nb; len -= nb; nbytes += nb; } *src = s; return (nbytes); } Index: projects/collation/lib/libc/locale/wcsnrtombs.c =================================================================== --- projects/collation/lib/libc/locale/wcsnrtombs.c (revision 286458) +++ projects/collation/lib/libc/locale/wcsnrtombs.c (revision 286459) @@ -1,123 +1,125 @@ /*- + * Copyright 2013 Garrett D'Amore + * Copyright 2010 Nexenta Systems, Inc. All rights reserved. * Copyright (c) 2002-2004 Tim J. Robbins. * All rights reserved. * * Copyright (c) 2011 The FreeBSD Foundation * All rights reserved. * Portions of this software were developed by David Chisnall * under sponsorship from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include "mblocal.h" size_t wcsnrtombs_l(char * __restrict dst, const wchar_t ** __restrict src, size_t nwc, size_t len, mbstate_t * __restrict ps, locale_t locale) { FIX_LOCALE(locale); if (ps == NULL) ps = &locale->wcsnrtombs; return (XLOCALE_CTYPE(locale)->__wcsnrtombs(dst, src, nwc, len, ps)); } size_t wcsnrtombs(char * __restrict dst, const wchar_t ** __restrict src, size_t nwc, size_t len, mbstate_t * __restrict ps) { return wcsnrtombs_l(dst, src, nwc, len, ps, __get_locale()); } size_t __wcsnrtombs_std(char * __restrict dst, const wchar_t ** __restrict src, - size_t nwc, size_t len, mbstate_t * __restrict ps) + size_t nwc, size_t len, mbstate_t * __restrict ps, + wcrtomb_pfn_t pwcrtomb) { mbstate_t mbsbak; char buf[MB_LEN_MAX]; const wchar_t *s; size_t nbytes; size_t nb; - struct xlocale_ctype *l = XLOCALE_CTYPE(__get_locale()); s = *src; nbytes = 0; if (dst == NULL) { while (nwc-- > 0) { - if ((nb = l->__wcrtomb(buf, *s, ps)) == (size_t)-1) + if ((nb = pwcrtomb(buf, *s, ps)) == (size_t)-1) /* Invalid character - wcrtomb() sets errno. */ return ((size_t)-1); else if (*s == L'\0') return (nbytes + nb - 1); s++; nbytes += nb; } return (nbytes); } while (len > 0 && nwc-- > 0) { if (len > (size_t)MB_CUR_MAX) { /* Enough space to translate in-place. */ - if ((nb = l->__wcrtomb(dst, *s, ps)) == (size_t)-1) { + if ((nb = pwcrtomb(dst, *s, ps)) == (size_t)-1) { *src = s; return ((size_t)-1); } } else { /* * May not be enough space; use temp. buffer. * * We need to save a copy of the conversion state * here so we can restore it if the multibyte * character is too long for the buffer. */ mbsbak = *ps; - if ((nb = l->__wcrtomb(buf, *s, ps)) == (size_t)-1) { + if ((nb = pwcrtomb(buf, *s, ps)) == (size_t)-1) { *src = s; return ((size_t)-1); } if (nb > (int)len) { /* MB sequence for character won't fit. */ *ps = mbsbak; break; } memcpy(dst, buf, nb); } if (*s == L'\0') { *src = NULL; return (nbytes + nb - 1); } s++; dst += nb; len -= nb; nbytes += nb; } *src = s; return (nbytes); }