Index: head/contrib/netbsd-tests/lib/libc/locale/t_mbstowcs.c =================================================================== --- head/contrib/netbsd-tests/lib/libc/locale/t_mbstowcs.c (revision 306781) +++ head/contrib/netbsd-tests/lib/libc/locale/t_mbstowcs.c (revision 306782) @@ -1,211 +1,211 @@ /* $NetBSD: t_mbstowcs.c,v 1.1 2011/07/15 07:35:21 jruoho Exp $ */ /*- * Copyright (c) 2011 The NetBSD Foundation, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ /*- * Copyright (c)2003 Citrus Project, * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __COPYRIGHT("@(#) Copyright (c) 2011\ The NetBSD Foundation, inc. All rights reserved."); __RCSID("$NetBSD: t_mbstowcs.c,v 1.1 2011/07/15 07:35:21 jruoho Exp $"); #include #include #include #include #include #include #include #include #define REQUIRE_ERRNO(x, v) \ ATF_REQUIRE_MSG((x) != (v), "%s: %s", #x, strerror(errno)) #define SIZE 256 static struct test { const char *locale; const char *data; wchar_t wchars[64]; int widths[64]; int width; } tests[] = { { "en_US.UTF-8", "[\001\177][\302\200\337\277][\340\240\200\357\277\277][\360\220\200" "\200\364\217\277\277]", { 0x5B, 0x01, 0x7F, 0x5D, 0x5B, 0x80, 0x07FF, 0x5D, 0x5B, 0x0800, 0xFFFF, 0x5D, 0x5B, 0x10000, 0x10FFFF, 0x5D, 0x0A }, #ifdef __FreeBSD__ - { 1, -1, -1, 1, 1, -1, 1, 1, 1, 1, -1, 1, 1, -1, -1, + { 1, -1, -1, 1, 1, -1, -1, 1, 1, 1, -1, 1, 1, -1, -1, #else { 1, -1, -1, 1, 1, -1, -1, 1, 1, -1, -1, 1, 1, -1, -1, #endif 1, 1, -1, -1, 1, 1, -1, -1, 1, -1 }, -1 }, { "ja_JP.ISO2022-JP", "\033$B#J#I#S$G$9!#\033(Baaaa\033$B$\"$$$&$($*\033(B", { 0x4200234A, 0x42002349, 0x42002353, 0x42002447, 0x42002439, 0x42002123, 0x61, 0x61, 0x61, 0x61, 0x42002422, 0x42002424, 0x42002426, 0x42002428, 0x4200242A, 0x0A }, { 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 2, 2, -1 }, 26 }, { "ja_JP.SJIS", "\202r\202i\202h\202r\202\305\202\267\201Baaaa\202\240\202\242" "\202\244\202\246\202\250", { 0x8272, 0x8269, 0x8268, 0x8272, 0x82C5, 0x82B7, 0x8142, 0x61, 0x61, 0x61, 0x61, 0x82A0, 0x82A2, 0x82A4, 0x82A6, 0x82A8, 0x0A }, { 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 2, 2, -1 }, 28 }, { "ja_JP.eucJP", "\243\305\243\325\243\303\244\307\244\271\241\243aaaa\244\242\244" "\244\244\246\244\250\244\252", { 0xA3C5, 0xA3D5, 0xA3C3, 0xA4C7, 0xA4B9, 0xA1A3, 0x61, 0x61, 0x61, 0x61, 0xA4A2, 0xA4A4, 0xA4A6, 0xA4A8, 0xA4AA, 0x0A }, { 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 2, 2, -1 }, 26 }, { NULL, NULL, {}, {}, 0 } }; ATF_TC(mbstowcs_basic); ATF_TC_HEAD(mbstowcs_basic, tc) { atf_tc_set_md_var(tc, "descr", "Checks wide character functions with different locales"); } ATF_TC_BODY(mbstowcs_basic, tc) { struct test *t; for (t = &tests[0]; t->data != NULL; ++t) { wchar_t wbuf[SIZE]; char buf[SIZE]; char visbuf[SIZE]; char *str; int i; ATF_REQUIRE_STREQ(setlocale(LC_ALL, "C"), "C"); #ifdef __NetBSD__ ATF_REQUIRE(setlocale(LC_CTYPE, t->locale) != NULL); #else if (setlocale(LC_CTYPE, t->locale) == NULL) { fprintf(stderr, "Locale %s not found.\n", t->locale); continue; } #endif (void)strvis(visbuf, t->data, VIS_WHITE | VIS_OCTAL); (void)printf("Checking string: \"%s\"\n", visbuf); ATF_REQUIRE((str = setlocale(LC_ALL, NULL)) != NULL); (void)printf("Using locale: %s\n", str); REQUIRE_ERRNO((ssize_t)mbstowcs(wbuf, t->data, SIZE-1), -1); REQUIRE_ERRNO((ssize_t)wcstombs(buf, wbuf, SIZE-1), -1); if (strcmp(buf, t->data) != 0) { (void)strvis(visbuf, buf, VIS_WHITE | VIS_OCTAL); (void)printf("Conversion to wcs and back failed: " "\"%s\"\n", visbuf); atf_tc_fail("Test failed"); } /* The output here is implementation-dependent. */ for (i = 0; wbuf[i] != 0; ++i) { if (wbuf[i] == t->wchars[i] && wcwidth(wbuf[i]) == t->widths[i]) continue; (void)printf("At position %d:\n", i); (void)printf(" expected: 0x%04X (%d)\n", t->wchars[i], t->widths[i]); (void)printf(" got : 0x%04X (%d)\n", wbuf[i], wcwidth(wbuf[i])); atf_tc_fail("Test failed"); } if (wcswidth(wbuf, SIZE-1) != t->width) { (void)printf("Incorrect wcswidth:\n"); (void)printf(" expected: %d\n", t->width); (void)printf(" got : %d\n", wcswidth(wbuf, SIZE-1)); atf_tc_fail("Test failed"); } (void)printf("Ok.\n"); } } ATF_TP_ADD_TCS(tp) { ATF_TP_ADD_TC(tp, mbstowcs_basic); return atf_no_error(); } Index: head/usr.bin/localedef/ctype.c =================================================================== --- head/usr.bin/localedef/ctype.c (revision 306781) +++ head/usr.bin/localedef/ctype.c (revision 306782) @@ -1,463 +1,463 @@ /* * Copyright 2011 Nexenta Systems, Inc. All rights reserved. * Copyright 2012 Garrett D'Amore All rights reserved. * Copyright 2015 John Marino * * This source code is derived from the illumos localedef command, and * provided under BSD-style license terms by Nexenta Systems, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ /* * LC_CTYPE database generation routines for localedef. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include "localedef.h" #include "parser.h" #include "runefile.h" /* Needed for bootstrapping, _CTYPE_N */ #ifndef _CTYPE_N #define _CTYPE_N 0x00400000L #endif #define _ISUPPER _CTYPE_U #define _ISLOWER _CTYPE_L #define _ISDIGIT _CTYPE_D #define _ISXDIGIT _CTYPE_X #define _ISSPACE _CTYPE_S #define _ISBLANK _CTYPE_B #define _ISALPHA _CTYPE_A #define _ISPUNCT _CTYPE_P #define _ISGRAPH _CTYPE_G #define _ISPRINT _CTYPE_R #define _ISCNTRL _CTYPE_C #define _E1 _CTYPE_Q #define _E2 _CTYPE_I #define _E3 0 #define _E4 _CTYPE_N #define _E5 _CTYPE_T static wchar_t last_ctype; static int ctype_compare(const void *n1, const void *n2); typedef struct ctype_node { wchar_t wc; int32_t ctype; int32_t toupper; int32_t tolower; RB_ENTRY(ctype_node) entry; } ctype_node_t; static RB_HEAD(ctypes, ctype_node) ctypes; RB_GENERATE_STATIC(ctypes, ctype_node, entry, ctype_compare); static int ctype_compare(const void *n1, const void *n2) { const ctype_node_t *c1 = n1; const ctype_node_t *c2 = n2; return (c1->wc < c2->wc ? -1 : c1->wc > c2->wc ? 1 : 0); } void init_ctype(void) { RB_INIT(&ctypes); } static void add_ctype_impl(ctype_node_t *ctn) { switch (last_kw) { case T_ISUPPER: ctn->ctype |= (_ISUPPER | _ISALPHA | _ISGRAPH | _ISPRINT); break; case T_ISLOWER: ctn->ctype |= (_ISLOWER | _ISALPHA | _ISGRAPH | _ISPRINT); break; case T_ISALPHA: ctn->ctype |= (_ISALPHA | _ISGRAPH | _ISPRINT); break; case T_ISDIGIT: ctn->ctype |= (_ISDIGIT | _ISGRAPH | _ISPRINT | _ISXDIGIT | _E4); break; case T_ISSPACE: ctn->ctype |= _ISSPACE; break; case T_ISCNTRL: ctn->ctype |= _ISCNTRL; break; case T_ISGRAPH: ctn->ctype |= (_ISGRAPH | _ISPRINT); break; case T_ISPRINT: ctn->ctype |= _ISPRINT; break; case T_ISPUNCT: ctn->ctype |= (_ISPUNCT | _ISGRAPH | _ISPRINT); break; case T_ISXDIGIT: ctn->ctype |= (_ISXDIGIT | _ISPRINT); break; case T_ISBLANK: ctn->ctype |= (_ISBLANK | _ISSPACE); break; case T_ISPHONOGRAM: ctn->ctype |= (_E1 | _ISPRINT | _ISGRAPH); break; case T_ISIDEOGRAM: ctn->ctype |= (_E2 | _ISPRINT | _ISGRAPH); break; case T_ISENGLISH: ctn->ctype |= (_E3 | _ISPRINT | _ISGRAPH); break; case T_ISNUMBER: ctn->ctype |= (_E4 | _ISPRINT | _ISGRAPH); break; case T_ISSPECIAL: ctn->ctype |= (_E5 | _ISPRINT | _ISGRAPH); break; case T_ISALNUM: /* * We can't do anything with this. The character * should already be specified as a digit or alpha. */ break; default: errf("not a valid character class"); } } static ctype_node_t * get_ctype(wchar_t wc) { ctype_node_t srch; ctype_node_t *ctn; srch.wc = wc; if ((ctn = RB_FIND(ctypes, &ctypes, &srch)) == NULL) { if ((ctn = calloc(1, sizeof (*ctn))) == NULL) { errf("out of memory"); return (NULL); } ctn->wc = wc; RB_INSERT(ctypes, &ctypes, ctn); } return (ctn); } void add_ctype(int val) { ctype_node_t *ctn; if ((ctn = get_ctype(val)) == NULL) { INTERR; return; } add_ctype_impl(ctn); last_ctype = ctn->wc; } void add_ctype_range(wchar_t end) { ctype_node_t *ctn; wchar_t cur; if (end < last_ctype) { errf("malformed character range (%u ... %u))", last_ctype, end); return; } for (cur = last_ctype + 1; cur <= end; cur++) { if ((ctn = get_ctype(cur)) == NULL) { INTERR; return; } add_ctype_impl(ctn); } last_ctype = end; } /* * A word about widths: if the width mask is specified, then libc * unconditionally honors it. Otherwise, it assumes printable * characters have width 1, and non-printable characters have width * -1 (except for NULL which is special with with 0). Hence, we have * no need to inject defaults here -- the "default" unset value of 0 * indicates that libc should use its own logic in wcwidth as described. */ void add_width(int wc, int width) { ctype_node_t *ctn; if ((ctn = get_ctype(wc)) == NULL) { INTERR; return; } ctn->ctype &= ~(_CTYPE_SWM); switch (width) { case 0: ctn->ctype |= _CTYPE_SW0; break; case 1: ctn->ctype |= _CTYPE_SW1; break; case 2: ctn->ctype |= _CTYPE_SW2; break; case 3: ctn->ctype |= _CTYPE_SW3; break; } } void add_width_range(int start, int end, int width) { for (; start <= end; start++) { add_width(start, width); } } void add_caseconv(int val, int wc) { ctype_node_t *ctn; ctn = get_ctype(val); if (ctn == NULL) { INTERR; return; } switch (last_kw) { case T_TOUPPER: ctn->toupper = wc; break; case T_TOLOWER: ctn->tolower = wc; break; default: INTERR; break; } } void dump_ctype(void) { FILE *f; _FileRuneLocale rl; ctype_node_t *ctn, *last_ct, *last_lo, *last_up; _FileRuneEntry *ct = NULL; _FileRuneEntry *lo = NULL; _FileRuneEntry *up = NULL; wchar_t wc; (void) memset(&rl, 0, sizeof (rl)); last_ct = NULL; last_lo = NULL; last_up = NULL; if ((f = open_category()) == NULL) return; (void) memcpy(rl.magic, _FILE_RUNE_MAGIC_1, 8); (void) strncpy(rl.encoding, get_wide_encoding(), sizeof (rl.encoding)); /* * Initialize the identity map. */ for (wc = 0; (unsigned)wc < _CACHED_RUNES; wc++) { rl.maplower[wc] = wc; rl.mapupper[wc] = wc; } RB_FOREACH(ctn, ctypes, &ctypes) { int conflict = 0; wc = ctn->wc; /* * POSIX requires certain portable characters have * certain types. Add them if they are missing. */ if ((wc >= 1) && (wc <= 127)) { if ((wc >= 'A') && (wc <= 'Z')) ctn->ctype |= _ISUPPER; if ((wc >= 'a') && (wc <= 'z')) ctn->ctype |= _ISLOWER; if ((wc >= '0') && (wc <= '9')) ctn->ctype |= _ISDIGIT; if (wc == ' ') ctn->ctype |= _ISPRINT; if (strchr(" \f\n\r\t\v", (char)wc) != NULL) ctn->ctype |= _ISSPACE; if (strchr("0123456789ABCDEFabcdef", (char)wc) != NULL) ctn->ctype |= _ISXDIGIT; if (strchr(" \t", (char)wc)) ctn->ctype |= _ISBLANK; /* * Technically these settings are only * required for the C locale. However, it * turns out that because of the historical * version of isprint(), we need them for all * locales as well. Note that these are not * necessarily valid punctation characters in * the current language, but ispunct() needs * to return TRUE for them. */ if (strchr("!\"'#$%&()*+,-./:;<=>?@[\\]^_`{|}~", (char)wc)) ctn->ctype |= _ISPUNCT; } /* * POSIX also requires that certain types imply * others. Add any inferred types here. */ if (ctn->ctype & (_ISUPPER |_ISLOWER)) ctn->ctype |= _ISALPHA; if (ctn->ctype & _ISDIGIT) ctn->ctype |= _ISXDIGIT; if (ctn->ctype & _ISBLANK) ctn->ctype |= _ISSPACE; if (ctn->ctype & (_ISALPHA|_ISDIGIT|_ISXDIGIT)) ctn->ctype |= _ISGRAPH; if (ctn->ctype & _ISGRAPH) ctn->ctype |= _ISPRINT; /* * Finally, POSIX requires that certain combinations * are invalid. We don't flag this as a fatal error, * but we will warn about. */ if ((ctn->ctype & _ISALPHA) && (ctn->ctype & (_ISPUNCT|_ISDIGIT))) conflict++; if ((ctn->ctype & _ISPUNCT) & (ctn->ctype & (_ISDIGIT|_ISALPHA|_ISXDIGIT))) conflict++; if ((ctn->ctype & _ISSPACE) && (ctn->ctype & _ISGRAPH)) conflict++; if ((ctn->ctype & _ISCNTRL) & _ISPRINT) conflict++; if ((wc == ' ') && (ctn->ctype & (_ISPUNCT|_ISGRAPH))) conflict++; if (conflict) { warn("conflicting classes for character 0x%x (%x)", wc, ctn->ctype); } /* * Handle the lower 256 characters using the simple * optimization. Note that if we have not defined the * upper/lower case, then we identity map it. */ if ((unsigned)wc < _CACHED_RUNES) { rl.runetype[wc] = ctn->ctype; if (ctn->tolower) rl.maplower[wc] = ctn->tolower; if (ctn->toupper) rl.mapupper[wc] = ctn->toupper; continue; } - if ((last_ct != NULL) && (last_ct->ctype == ctn->ctype)) { + if ((last_ct != NULL) && (last_ct->ctype == ctn->ctype) && + (last_ct->wc + 1 == wc)) { ct[rl.runetype_ext_nranges-1].max = wc; - last_ct = ctn; } else { rl.runetype_ext_nranges++; ct = realloc(ct, sizeof (*ct) * rl.runetype_ext_nranges); ct[rl.runetype_ext_nranges - 1].min = wc; ct[rl.runetype_ext_nranges - 1].max = wc; ct[rl.runetype_ext_nranges - 1].map = ctn->ctype; - last_ct = ctn; } + last_ct = ctn; if (ctn->tolower == 0) { last_lo = NULL; } else if ((last_lo != NULL) && (last_lo->tolower + 1 == ctn->tolower)) { lo[rl.maplower_ext_nranges-1].max = wc; last_lo = ctn; } else { rl.maplower_ext_nranges++; lo = realloc(lo, sizeof (*lo) * rl.maplower_ext_nranges); lo[rl.maplower_ext_nranges - 1].min = wc; lo[rl.maplower_ext_nranges - 1].max = wc; lo[rl.maplower_ext_nranges - 1].map = ctn->tolower; last_lo = ctn; } if (ctn->toupper == 0) { last_up = NULL; } else if ((last_up != NULL) && (last_up->toupper + 1 == ctn->toupper)) { up[rl.mapupper_ext_nranges-1].max = wc; last_up = ctn; } else { rl.mapupper_ext_nranges++; up = realloc(up, sizeof (*up) * rl.mapupper_ext_nranges); up[rl.mapupper_ext_nranges - 1].min = wc; up[rl.mapupper_ext_nranges - 1].max = wc; up[rl.mapupper_ext_nranges - 1].map = ctn->toupper; last_up = ctn; } } if ((wr_category(&rl, sizeof (rl), f) < 0) || (wr_category(ct, sizeof (*ct) * rl.runetype_ext_nranges, f) < 0) || (wr_category(lo, sizeof (*lo) * rl.maplower_ext_nranges, f) < 0) || (wr_category(up, sizeof (*up) * rl.mapupper_ext_nranges, f) < 0)) { return; } close_category(f); }