Index: head/lib/libc/locale/endian.h =================================================================== --- head/lib/libc/locale/endian.h (revision 339488) +++ head/lib/libc/locale/endian.h (nonexistent) @@ -1,52 +0,0 @@ -/*- - * Copyright (c) 2016 Ruslan Bukin - * All rights reserved. - * - * Portions of this software were developed by SRI International and the - * University of Cambridge Computer Laboratory under DARPA/AFRL contract - * FA8750-10-C-0237 ("CTSRD"), as part of the DARPA CRASH research programme. - * - * Portions of this software were developed by the University of Cambridge - * Computer Laboratory as part of the CTSRD Project, with support from the - * UK Higher Education Innovation Fund (HEIF). - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * $FreeBSD$ - */ - -#include - -/* - * We assume locale files were generated on EL machine - * (e.g. during cross build on amd64 host), but used on EB - * machine (e.g. MIPS64EB), so convert it to host endianness. - * - * TODO: detect host endianness on the build machine and use - * correct macros here. - */ - -#if BYTE_ORDER == BIG_ENDIAN && defined(__mips__) -#define BSWAP(x) le32toh(x) -#else -#define BSWAP(x) x -#endif Property changes on: head/lib/libc/locale/endian.h ___________________________________________________________________ Deleted: svn:eol-style ## -1 +0,0 ## -native \ No newline at end of property Deleted: svn:keywords ## -1 +0,0 ## -FreeBSD=%H \ No newline at end of property Deleted: svn:mime-type ## -1 +0,0 ## -text/plain \ No newline at end of property Index: head/lib/libc/locale/collate.c =================================================================== --- head/lib/libc/locale/collate.c (revision 339488) +++ head/lib/libc/locale/collate.c (revision 339489) @@ -1,710 +1,709 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright 2014 Garrett D'Amore * Copyright 2010 Nexenta Systems, Inc. All rights reserved. * Copyright (c) 1995 Alex Tatmanjants * at Electronni Visti IA, Kiev, Ukraine. * All rights reserved. * * Copyright (c) 2011 The FreeBSD Foundation * All rights reserved. * Portions of this software were developed by David Chisnall * under sponsorship from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * Adapted to xlocale by John Marino */ #include __FBSDID("$FreeBSD$"); #include "namespace.h" #include #include #include #include #include #include #include #include #include #include #include #include "un-namespace.h" -#include "endian.h" #include "collate.h" #include "setlocale.h" #include "ldpart.h" #include "libc_private.h" struct xlocale_collate __xlocale_global_collate = { {{0}, "C"}, 1, 0, 0, 0 }; struct xlocale_collate __xlocale_C_collate = { {{0}, "C"}, 1, 0, 0, 0 }; static int __collate_load_tables_l(const char *encoding, struct xlocale_collate *table); static void destruct_collate(void *t) { struct xlocale_collate *table = t; if (table->map && (table->maplen > 0)) { (void) munmap(table->map, table->maplen); } free(t); } void * __collate_load(const char *encoding, __unused locale_t unused) { if (strcmp(encoding, "C") == 0 || strcmp(encoding, "POSIX") == 0) { return &__xlocale_C_collate; } struct xlocale_collate *table = calloc(sizeof(struct xlocale_collate), 1); table->header.header.destructor = destruct_collate; // FIXME: Make sure that _LDP_CACHE is never returned. We should be doing // the caching outside of this section if (__collate_load_tables_l(encoding, table) != _LDP_LOADED) { xlocale_release(table); return NULL; } return table; } /** * Load the collation tables for the specified encoding into the global table. */ int __collate_load_tables(const char *encoding) { return (__collate_load_tables_l(encoding, &__xlocale_global_collate)); } int __collate_load_tables_l(const char *encoding, struct xlocale_collate *table) { int i, chains, z; char *buf; char *TMP; char *map; collate_info_t *info; struct stat sbuf; int fd; table->__collate_load_error = 1; /* 'encoding' must be already checked. */ if (strcmp(encoding, "C") == 0 || strcmp(encoding, "POSIX") == 0) { return (_LDP_CACHE); } if (asprintf(&buf, "%s/%s/LC_COLLATE", _PathLocale, encoding) == -1) return (_LDP_ERROR); if ((fd = _open(buf, O_RDONLY)) < 0) { free(buf); return (_LDP_ERROR); } free(buf); if (_fstat(fd, &sbuf) < 0) { (void) _close(fd); return (_LDP_ERROR); } if (sbuf.st_size < (COLLATE_STR_LEN + sizeof (info))) { (void) _close(fd); errno = EINVAL; return (_LDP_ERROR); } map = mmap(NULL, sbuf.st_size, PROT_READ, MAP_PRIVATE, fd, 0); (void) _close(fd); if ((TMP = map) == NULL) { return (_LDP_ERROR); } if (strncmp(TMP, COLLATE_VERSION, COLLATE_STR_LEN) != 0) { (void) munmap(map, sbuf.st_size); errno = EINVAL; return (_LDP_ERROR); } TMP += COLLATE_STR_LEN; info = (void *)TMP; TMP += sizeof (*info); if ((info->directive_count < 1) || (info->directive_count >= COLL_WEIGHTS_MAX) || - ((chains = BSWAP(info->chain_count)) < 0)) { + ((chains = info->chain_count) < 0)) { (void) munmap(map, sbuf.st_size); errno = EINVAL; return (_LDP_ERROR); } i = (sizeof (collate_char_t) * (UCHAR_MAX + 1)) + (sizeof (collate_chain_t) * chains) + - (sizeof (collate_large_t) * BSWAP(info->large_count)); + (sizeof (collate_large_t) * info->large_count); for (z = 0; z < info->directive_count; z++) { - i += sizeof (collate_subst_t) * BSWAP(info->subst_count[z]); + i += sizeof (collate_subst_t) * info->subst_count[z]; } if (i != (sbuf.st_size - (TMP - map))) { (void) munmap(map, sbuf.st_size); errno = EINVAL; return (_LDP_ERROR); } table->info = info; table->char_pri_table = (void *)TMP; TMP += sizeof (collate_char_t) * (UCHAR_MAX + 1); for (z = 0; z < info->directive_count; z++) { - if (BSWAP(info->subst_count[z]) > 0) { + if (info->subst_count[z] > 0) { table->subst_table[z] = (void *)TMP; - TMP += BSWAP(info->subst_count[z]) * sizeof (collate_subst_t); + TMP += info->subst_count[z] * sizeof (collate_subst_t); } else { table->subst_table[z] = NULL; } } if (chains > 0) { table->chain_pri_table = (void *)TMP; TMP += chains * sizeof (collate_chain_t); } else table->chain_pri_table = NULL; - if (BSWAP(info->large_count) > 0) + if (info->large_count > 0) table->large_pri_table = (void *)TMP; else table->large_pri_table = NULL; table->__collate_load_error = 0; return (_LDP_LOADED); } static const int32_t * substsearch(struct xlocale_collate *table, const wchar_t key, int pass) { const collate_subst_t *p; - int n = BSWAP(table->info->subst_count[pass]); + int n = table->info->subst_count[pass]; if (n == 0) return (NULL); if (pass >= table->info->directive_count) return (NULL); if (!(key & COLLATE_SUBST_PRIORITY)) return (NULL); p = table->subst_table[pass] + (key & ~COLLATE_SUBST_PRIORITY); - assert(BSWAP(p->key) == key); + assert(p->key == key); return (p->pri); } static collate_chain_t * chainsearch(struct xlocale_collate *table, const wchar_t *key, int *len) { int low = 0; - int high = BSWAP(table->info->chain_count) - 1; + int high = table->info->chain_count - 1; int next, compar, l; collate_chain_t *p; collate_chain_t *tab = table->chain_pri_table; if (high < 0) return (NULL); while (low <= high) { next = (low + high) / 2; p = tab + next; - compar = *key - le16toh(*p->str); + compar = *key - *p->str; if (compar == 0) { l = wcsnlen(p->str, COLLATE_STR_LEN); compar = wcsncmp(key, p->str, l); if (compar == 0) { *len = l; return (p); } } if (compar > 0) low = next + 1; else high = next - 1; } return (NULL); } static collate_large_t * largesearch(struct xlocale_collate *table, const wchar_t key) { int low = 0; - int high = BSWAP(table->info->large_count) - 1; + int high = table->info->large_count - 1; int next, compar; collate_large_t *p; collate_large_t *tab = table->large_pri_table; if (high < 0) return (NULL); while (low <= high) { next = (low + high) / 2; p = tab + next; - compar = key - BSWAP(p->val); + compar = key - p->val; if (compar == 0) return (p); if (compar > 0) low = next + 1; else high = next - 1; } return (NULL); } void _collate_lookup(struct xlocale_collate *table, const wchar_t *t, int *len, int *pri, int which, const int **state) { collate_chain_t *p2; collate_large_t *match; int p, l; const int *sptr; /* * If this is the "last" pass for the UNDEFINED, then * we just return the priority itself. */ if (which >= table->info->directive_count) { *pri = *t; *len = 1; *state = NULL; return; } /* * If we have remaining substitution data from a previous * call, consume it first. */ if ((sptr = *state) != NULL) { *pri = *sptr; sptr++; if ((sptr == *state) || (sptr == NULL)) *state = NULL; else *state = sptr; *len = 0; return; } /* No active substitutions */ *len = 1; /* * Check for composites such as diphthongs that collate as a * single element (aka chains or collating-elements). */ if (((p2 = chainsearch(table, t, &l)) != NULL) && ((p = p2->pri[which]) >= 0)) { *len = l; *pri = p; } else if (*t <= UCHAR_MAX) { /* * Character is a small (8-bit) character. * We just look these up directly for speed. */ - *pri = BSWAP(table->char_pri_table[*t].pri[which]); + *pri = table->char_pri_table[*t].pri[which]; - } else if ((BSWAP(table->info->large_count) > 0) && + } else if ((table->info->large_count > 0) && ((match = largesearch(table, *t)) != NULL)) { /* * Character was found in the extended table. */ - *pri = BSWAP(match->pri.pri[which]); + *pri = match->pri.pri[which]; } else { /* * Character lacks a specific definition. */ if (table->info->directive[which] & DIRECTIVE_UNDEFINED) { /* Mask off sign bit to prevent ordering confusion. */ *pri = (*t & COLLATE_MAX_PRIORITY); } else { - *pri = BSWAP(table->info->undef_pri[which]); + *pri = table->info->undef_pri[which]; } /* No substitutions for undefined characters! */ return; } /* * Try substituting (expanding) the character. We are * currently doing this *after* the chain compression. I * think it should not matter, but this way might be slightly * faster. * * We do this after the priority search, as this will help us * to identify a single key value. In order for this to work, * its important that the priority assigned to a given element * to be substituted be unique for that level. The localedef * code ensures this for us. */ if ((sptr = substsearch(table, *pri, which)) != NULL) { - if ((*pri = BSWAP(*sptr)) > 0) { + if ((*pri = *sptr) > 0) { sptr++; - *state = BSWAP(*sptr) ? sptr : NULL; + *state = *sptr ? sptr : NULL; } } } /* * This is the meaty part of wcsxfrm & strxfrm. Note that it does * NOT NULL terminate. That is left to the caller. */ size_t _collate_wxfrm(struct xlocale_collate *table, const wchar_t *src, wchar_t *xf, size_t room) { int pri; int len; const wchar_t *t; wchar_t *tr = NULL; int direc; int pass; const int32_t *state; size_t want = 0; size_t need = 0; int ndir = table->info->directive_count; assert(src); for (pass = 0; pass <= ndir; pass++) { state = NULL; if (pass != 0) { /* insert level separator from the previous pass */ if (room) { *xf++ = 1; room--; } want++; } /* special pass for undefined */ if (pass == ndir) { direc = DIRECTIVE_FORWARD | DIRECTIVE_UNDEFINED; } else { direc = table->info->directive[pass]; } t = src; if (direc & DIRECTIVE_BACKWARD) { wchar_t *bp, *fp, c; free(tr); if ((tr = wcsdup(t)) == NULL) { errno = ENOMEM; goto fail; } bp = tr; fp = tr + wcslen(tr) - 1; while (bp < fp) { c = *bp; *bp++ = *fp; *fp-- = c; } t = (const wchar_t *)tr; } if (direc & DIRECTIVE_POSITION) { while (*t || state) { _collate_lookup(table, t, &len, &pri, pass, &state); t += len; if (pri <= 0) { if (pri < 0) { errno = EINVAL; goto fail; } state = NULL; pri = COLLATE_MAX_PRIORITY; } if (room) { *xf++ = pri; room--; } want++; need = want; } } else { while (*t || state) { _collate_lookup(table, t, &len, &pri, pass, &state); t += len; if (pri <= 0) { if (pri < 0) { errno = EINVAL; goto fail; } state = NULL; continue; } if (room) { *xf++ = pri; room--; } want++; need = want; } } } free(tr); return (need); fail: free(tr); return ((size_t)(-1)); } /* * In the non-POSIX case, we transform each character into a string of * characters representing the character's priority. Since char is usually * signed, we are limited by 7 bits per byte. To avoid zero, we need to add * XFRM_OFFSET, so we can't use a full 7 bits. For simplicity, we choose 6 * bits per byte. * * It turns out that we sometimes have real priorities that are * 31-bits wide. (But: be careful using priorities where the high * order bit is set -- i.e. the priority is negative. The sort order * may be surprising!) * * TODO: This would be a good area to optimize somewhat. It turns out * that real prioririties *except for the last UNDEFINED pass* are generally * very small. We need the localedef code to precalculate the max * priority for us, and ideally also give us a mask, and then we could * severely limit what we expand to. */ #define XFRM_BYTES 6 #define XFRM_OFFSET ('0') /* make all printable characters */ #define XFRM_SHIFT 6 #define XFRM_MASK ((1 << XFRM_SHIFT) - 1) #define XFRM_SEP ('.') /* chosen to be less than XFRM_OFFSET */ static int xfrm(struct xlocale_collate *table, unsigned char *p, int pri, int pass) { /* we use unsigned to ensure zero fill on right shift */ - uint32_t val = BSWAP((uint32_t)table->info->pri_count[pass]); + uint32_t val = (uint32_t)table->info->pri_count[pass]; int nc = 0; while (val) { *p = (pri & XFRM_MASK) + XFRM_OFFSET; pri >>= XFRM_SHIFT; val >>= XFRM_SHIFT; p++; nc++; } return (nc); } size_t _collate_sxfrm(struct xlocale_collate *table, const wchar_t *src, char *xf, size_t room) { int pri; int len; const wchar_t *t; wchar_t *tr = NULL; int direc; int pass; const int32_t *state; size_t want = 0; size_t need = 0; int b; uint8_t buf[XFRM_BYTES]; int ndir = table->info->directive_count; assert(src); for (pass = 0; pass <= ndir; pass++) { state = NULL; if (pass != 0) { /* insert level separator from the previous pass */ if (room) { *xf++ = XFRM_SEP; room--; } want++; } /* special pass for undefined */ if (pass == ndir) { direc = DIRECTIVE_FORWARD | DIRECTIVE_UNDEFINED; } else { direc = table->info->directive[pass]; } t = src; if (direc & DIRECTIVE_BACKWARD) { wchar_t *bp, *fp, c; free(tr); if ((tr = wcsdup(t)) == NULL) { errno = ENOMEM; goto fail; } bp = tr; fp = tr + wcslen(tr) - 1; while (bp < fp) { c = *bp; *bp++ = *fp; *fp-- = c; } t = (const wchar_t *)tr; } if (direc & DIRECTIVE_POSITION) { while (*t || state) { _collate_lookup(table, t, &len, &pri, pass, &state); t += len; if (pri <= 0) { if (pri < 0) { errno = EINVAL; goto fail; } state = NULL; pri = COLLATE_MAX_PRIORITY; } b = xfrm(table, buf, pri, pass); want += b; if (room) { while (b) { b--; if (room) { *xf++ = buf[b]; room--; } } } need = want; } } else { while (*t || state) { _collate_lookup(table, t, &len, &pri, pass, &state); t += len; if (pri <= 0) { if (pri < 0) { errno = EINVAL; goto fail; } state = NULL; continue; } b = xfrm(table, buf, pri, pass); want += b; if (room) { while (b) { b--; if (room) { *xf++ = buf[b]; room--; } } } need = want; } } } free(tr); return (need); fail: free(tr); return ((size_t)(-1)); } /* * __collate_equiv_value returns the primary collation value for the given * collating symbol specified by str and len. Zero or negative is returned * if the collating symbol was not found. This function is used by bracket * code in the TRE regex library. */ int __collate_equiv_value(locale_t locale, const wchar_t *str, size_t len) { int32_t e; if (len < 1 || len >= COLLATE_STR_LEN) return (-1); FIX_LOCALE(locale); struct xlocale_collate *table = (struct xlocale_collate*)locale->components[XLC_COLLATE]; if (table->__collate_load_error) return ((len == 1 && *str <= UCHAR_MAX) ? *str : -1); if (len == 1) { e = -1; if (*str <= UCHAR_MAX) e = table->char_pri_table[*str].pri[0]; - else if (BSWAP(table->info->large_count) > 0) { + else if (table->info->large_count > 0) { collate_large_t *match_large; match_large = largesearch(table, *str); if (match_large) e = match_large->pri.pri[0]; } if (e == 0) return (1); return (e > 0 ? e : 0); } - if (BSWAP(table->info->chain_count) > 0) { + if (table->info->chain_count > 0) { wchar_t name[COLLATE_STR_LEN]; collate_chain_t *match_chain; int clen; wcsncpy (name, str, len); name[len] = 0; match_chain = chainsearch(table, name, &clen); if (match_chain) { e = match_chain->pri[0]; if (e == 0) return (1); return (e < 0 ? -e : e); } } return (0); } Index: head/lib/libc/locale/rune.c =================================================================== --- head/lib/libc/locale/rune.c (revision 339488) +++ head/lib/libc/locale/rune.c (revision 339489) @@ -1,254 +1,252 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright 2014 Garrett D'Amore * Copyright 2010 Nexenta Systems, Inc. All rights reserved. * Copyright (c) 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Paul Borman at Krystal Technologies. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #if defined(LIBC_SCCS) && !defined(lint) static char sccsid[] = "@(#)rune.c 8.1 (Berkeley) 6/4/93"; #endif /* LIBC_SCCS and not lint */ #include __FBSDID("$FreeBSD$"); #include "namespace.h" #include #include #include #include #include #include #include #include #include #include #include #include "un-namespace.h" -#include "endian.h" #include "runefile.h" _RuneLocale * _Read_RuneMagi(const char *fname) { char *fdata, *data; void *lastp; _FileRuneLocale *frl; _RuneLocale *rl; _FileRuneEntry *frr; _RuneEntry *rr; struct stat sb; int x, saverr; void *variable; _FileRuneEntry *runetype_ext_ranges; _FileRuneEntry *maplower_ext_ranges; _FileRuneEntry *mapupper_ext_ranges; int runetype_ext_len = 0; int fd; if ((fd = _open(fname, O_RDONLY)) < 0) { errno = EINVAL; return (NULL); } if (_fstat(fd, &sb) < 0) { (void) _close(fd); errno = EINVAL; return (NULL); } if ((size_t)sb.st_size < sizeof (_FileRuneLocale)) { (void) _close(fd); errno = EINVAL; return (NULL); } fdata = mmap(NULL, sb.st_size, PROT_READ, MAP_PRIVATE, fd, 0); (void) _close(fd); if (fdata == NULL) { errno = EINVAL; return (NULL); } frl = (_FileRuneLocale *)(void *)fdata; lastp = fdata + sb.st_size; variable = frl + 1; if (memcmp(frl->magic, _FILE_RUNE_MAGIC_1, sizeof (frl->magic))) { goto invalid; } runetype_ext_ranges = (_FileRuneEntry *)variable; - variable = runetype_ext_ranges + BSWAP(frl->runetype_ext_nranges); + variable = runetype_ext_ranges + frl->runetype_ext_nranges; if (variable > lastp) { goto invalid; } maplower_ext_ranges = (_FileRuneEntry *)variable; - variable = maplower_ext_ranges + BSWAP(frl->maplower_ext_nranges); + variable = maplower_ext_ranges + frl->maplower_ext_nranges; if (variable > lastp) { goto invalid; } mapupper_ext_ranges = (_FileRuneEntry *)variable; - variable = mapupper_ext_ranges + BSWAP(frl->mapupper_ext_nranges); + variable = mapupper_ext_ranges + frl->mapupper_ext_nranges; if (variable > lastp) { goto invalid; } frr = runetype_ext_ranges; - for (x = 0; x < BSWAP(frl->runetype_ext_nranges); ++x) { + for (x = 0; x < frl->runetype_ext_nranges; ++x) { uint32_t *types; - if (BSWAP(frr[x].map) == 0) { - int len = BSWAP(frr[x].max) - BSWAP(frr[x].min) + 1; + if (frr[x].map == 0) { + int len = frr[x].max - frr[x].min + 1; types = variable; variable = types + len; runetype_ext_len += len; if (variable > lastp) { goto invalid; } } } - if ((char *)variable + BSWAP(frl->variable_len) > (char *)lastp) { + if ((char *)variable + frl->variable_len > (char *)lastp) { goto invalid; } /* * Convert from disk format to host format. */ data = malloc(sizeof(_RuneLocale) + - (BSWAP(frl->runetype_ext_nranges) + BSWAP(frl->maplower_ext_nranges) + - BSWAP(frl->mapupper_ext_nranges)) * sizeof(_RuneEntry) + - runetype_ext_len * sizeof(*rr->__types) + - BSWAP(frl->variable_len)); + (frl->runetype_ext_nranges + frl->maplower_ext_nranges + + frl->mapupper_ext_nranges) * sizeof(_RuneEntry) + + runetype_ext_len * sizeof(*rr->__types) + frl->variable_len); if (data == NULL) { saverr = errno; munmap(fdata, sb.st_size); errno = saverr; return (NULL); } rl = (_RuneLocale *)data; rl->__variable = rl + 1; memcpy(rl->__magic, _RUNE_MAGIC_1, sizeof(rl->__magic)); memcpy(rl->__encoding, frl->encoding, sizeof(rl->__encoding)); - rl->__variable_len = BSWAP(frl->variable_len); - rl->__runetype_ext.__nranges = BSWAP(frl->runetype_ext_nranges); - rl->__maplower_ext.__nranges = BSWAP(frl->maplower_ext_nranges); - rl->__mapupper_ext.__nranges = BSWAP(frl->mapupper_ext_nranges); + rl->__variable_len = frl->variable_len; + rl->__runetype_ext.__nranges = frl->runetype_ext_nranges; + rl->__maplower_ext.__nranges = frl->maplower_ext_nranges; + rl->__mapupper_ext.__nranges = frl->mapupper_ext_nranges; for (x = 0; x < _CACHED_RUNES; ++x) { - rl->__runetype[x] = BSWAP(frl->runetype[x]); - rl->__maplower[x] = BSWAP(frl->maplower[x]); - rl->__mapupper[x] = BSWAP(frl->mapupper[x]); + rl->__runetype[x] = frl->runetype[x]; + rl->__maplower[x] = frl->maplower[x]; + rl->__mapupper[x] = frl->mapupper[x]; } rl->__runetype_ext.__ranges = (_RuneEntry *)rl->__variable; rl->__variable = rl->__runetype_ext.__ranges + rl->__runetype_ext.__nranges; rl->__maplower_ext.__ranges = (_RuneEntry *)rl->__variable; rl->__variable = rl->__maplower_ext.__ranges + rl->__maplower_ext.__nranges; rl->__mapupper_ext.__ranges = (_RuneEntry *)rl->__variable; rl->__variable = rl->__mapupper_ext.__ranges + rl->__mapupper_ext.__nranges; - variable = mapupper_ext_ranges + BSWAP(frl->mapupper_ext_nranges); + variable = mapupper_ext_ranges + frl->mapupper_ext_nranges; frr = runetype_ext_ranges; rr = rl->__runetype_ext.__ranges; for (x = 0; x < rl->__runetype_ext.__nranges; ++x) { uint32_t *types; - rr[x].__min = BSWAP(frr[x].min); - rr[x].__max = BSWAP(frr[x].max); - rr[x].__map = BSWAP(frr[x].map); + rr[x].__min = frr[x].min; + rr[x].__max = frr[x].max; + rr[x].__map = frr[x].map; if (rr[x].__map == 0) { int len = rr[x].__max - rr[x].__min + 1; types = variable; variable = types + len; rr[x].__types = rl->__variable; rl->__variable = rr[x].__types + len; while (len-- > 0) rr[x].__types[len] = types[len]; } else rr[x].__types = NULL; } frr = maplower_ext_ranges; rr = rl->__maplower_ext.__ranges; for (x = 0; x < rl->__maplower_ext.__nranges; ++x) { - rr[x].__min = BSWAP(frr[x].min); - rr[x].__max = BSWAP(frr[x].max); - rr[x].__map = BSWAP(frr[x].map); + rr[x].__min = frr[x].min; + rr[x].__max = frr[x].max; + rr[x].__map = frr[x].map; } frr = mapupper_ext_ranges; rr = rl->__mapupper_ext.__ranges; for (x = 0; x < rl->__mapupper_ext.__nranges; ++x) { - rr[x].__min = BSWAP(frr[x].min); - rr[x].__max = BSWAP(frr[x].max); - rr[x].__map = BSWAP(frr[x].map); + rr[x].__min = frr[x].min; + rr[x].__max = frr[x].max; + rr[x].__map = frr[x].map; } memcpy(rl->__variable, variable, rl->__variable_len); munmap(fdata, sb.st_size); /* * Go out and zero pointers that should be zero. */ if (!rl->__variable_len) rl->__variable = NULL; if (!rl->__runetype_ext.__nranges) rl->__runetype_ext.__ranges = NULL; if (!rl->__maplower_ext.__nranges) rl->__maplower_ext.__ranges = NULL; if (!rl->__mapupper_ext.__nranges) rl->__mapupper_ext.__ranges = NULL; return (rl); invalid: munmap(fdata, sb.st_size); errno = EINVAL; return (NULL); } Index: head/share/colldef/Makefile =================================================================== --- head/share/colldef/Makefile (revision 339488) +++ head/share/colldef/Makefile (revision 339489) @@ -1,231 +1,233 @@ # $FreeBSD$ # Warning: Do not edit. This file is automatically generated from the # tools in /usr/src/tools/tools/locale. LOCALEDIR= ${SHAREDIR}/locale FILESNAME= LC_COLLATE .SUFFIXES: .src .LC_COLLATE MAPLOC= ${.CURDIR}/../../tools/tools/locale/etc/final-maps +.include + .src.LC_COLLATE: - localedef -D -U -i ${.IMPSRC} \ + localedef ${LOCALEDEF_ENDIAN} -D -U -i ${.IMPSRC} \ -f ${MAPLOC}/map.${.TARGET:T:R:E:C/@.*//} ${.OBJDIR}/${.IMPSRC:T:R} LOCALES+= af_ZA.UTF-8 LOCALES+= am_ET.UTF-8 LOCALES+= ar_SA.UTF-8 LOCALES+= be_BY.UTF-8 LOCALES+= ca_AD.UTF-8 LOCALES+= cs_CZ.UTF-8 LOCALES+= da_DK.UTF-8 LOCALES+= el_GR.UTF-8 LOCALES+= en_US.UTF-8 LOCALES+= es_MX.UTF-8 LOCALES+= et_EE.UTF-8 LOCALES+= fi_FI.UTF-8 LOCALES+= fr_CA.UTF-8 LOCALES+= he_IL.UTF-8 LOCALES+= hi_IN.UTF-8 LOCALES+= hu_HU.UTF-8 LOCALES+= hy_AM.UTF-8 LOCALES+= is_IS.UTF-8 LOCALES+= ja_JP.UTF-8 LOCALES+= kk_KZ.UTF-8 LOCALES+= ko_KR.UTF-8 LOCALES+= lt_LT.UTF-8 LOCALES+= lv_LV.UTF-8 LOCALES+= nn_NO.UTF-8 LOCALES+= pl_PL.UTF-8 LOCALES+= ro_RO.UTF-8 LOCALES+= ru_RU.UTF-8 LOCALES+= se_NO.UTF-8 LOCALES+= sk_SK.UTF-8 LOCALES+= sl_SI.UTF-8 LOCALES+= sr_RS.UTF-8 LOCALES+= sr_RS.UTF-8@latin LOCALES+= sv_SE.UTF-8 LOCALES+= tr_TR.UTF-8 LOCALES+= uk_UA.UTF-8 LOCALES+= zh_CN.UTF-8 LOCALES+= zh_TW.UTF-8 LOCALES_MAPPED+= af_ZA.UTF-8 af_ZA.ISO8859-15 LOCALES_MAPPED+= af_ZA.UTF-8 af_ZA.ISO8859-1 LOCALES_MAPPED+= be_BY.UTF-8 be_BY.ISO8859-5 LOCALES_MAPPED+= be_BY.UTF-8 be_BY.CP1251 LOCALES_MAPPED+= be_BY.UTF-8 be_BY.CP1131 LOCALES_MAPPED+= ru_RU.UTF-8 ru_RU.KOI8-R LOCALES_MAPPED+= ru_RU.UTF-8 ru_RU.ISO8859-5 LOCALES_MAPPED+= ru_RU.UTF-8 ru_RU.CP866 LOCALES_MAPPED+= ru_RU.UTF-8 ru_RU.CP1251 LOCALES_MAPPED+= ru_RU.UTF-8 bg_BG.CP1251 LOCALES_MAPPED+= ca_AD.UTF-8 ca_IT.ISO8859-15 LOCALES_MAPPED+= ca_AD.UTF-8 ca_IT.ISO8859-1 LOCALES_MAPPED+= ca_AD.UTF-8 ca_FR.ISO8859-15 LOCALES_MAPPED+= ca_AD.UTF-8 ca_FR.ISO8859-1 LOCALES_MAPPED+= ca_AD.UTF-8 ca_ES.ISO8859-15 LOCALES_MAPPED+= ca_AD.UTF-8 ca_ES.ISO8859-1 LOCALES_MAPPED+= ca_AD.UTF-8 ca_AD.ISO8859-15 LOCALES_MAPPED+= ca_AD.UTF-8 ca_AD.ISO8859-1 LOCALES_MAPPED+= en_US.UTF-8 pt_PT.ISO8859-15 LOCALES_MAPPED+= en_US.UTF-8 pt_PT.ISO8859-1 LOCALES_MAPPED+= en_US.UTF-8 pt_BR.ISO8859-1 LOCALES_MAPPED+= en_US.UTF-8 nl_NL.ISO8859-15 LOCALES_MAPPED+= en_US.UTF-8 nl_NL.ISO8859-1 LOCALES_MAPPED+= en_US.UTF-8 nl_BE.ISO8859-15 LOCALES_MAPPED+= en_US.UTF-8 nl_BE.ISO8859-1 LOCALES_MAPPED+= en_US.UTF-8 it_IT.ISO8859-15 LOCALES_MAPPED+= en_US.UTF-8 it_IT.ISO8859-1 LOCALES_MAPPED+= en_US.UTF-8 it_CH.ISO8859-15 LOCALES_MAPPED+= en_US.UTF-8 it_CH.ISO8859-1 LOCALES_MAPPED+= en_US.UTF-8 fr_FR.ISO8859-15 LOCALES_MAPPED+= en_US.UTF-8 fr_FR.ISO8859-1 LOCALES_MAPPED+= en_US.UTF-8 fr_CH.ISO8859-15 LOCALES_MAPPED+= en_US.UTF-8 fr_CH.ISO8859-1 LOCALES_MAPPED+= en_US.UTF-8 fr_BE.ISO8859-15 LOCALES_MAPPED+= en_US.UTF-8 fr_BE.ISO8859-1 LOCALES_MAPPED+= en_US.UTF-8 eu_ES.ISO8859-15 LOCALES_MAPPED+= en_US.UTF-8 eu_ES.ISO8859-1 LOCALES_MAPPED+= en_US.UTF-8 en_ZA.US-ASCII LOCALES_MAPPED+= en_US.UTF-8 en_ZA.ISO8859-15 LOCALES_MAPPED+= en_US.UTF-8 en_ZA.ISO8859-1 LOCALES_MAPPED+= en_US.UTF-8 en_US.US-ASCII LOCALES_MAPPED+= en_US.UTF-8 en_US.ISO8859-15 LOCALES_MAPPED+= en_US.UTF-8 en_US.ISO8859-1 LOCALES_MAPPED+= en_US.UTF-8 en_SG.ISO8859-1 LOCALES_MAPPED+= en_US.UTF-8 en_NZ.US-ASCII LOCALES_MAPPED+= en_US.UTF-8 en_NZ.ISO8859-15 LOCALES_MAPPED+= en_US.UTF-8 en_NZ.ISO8859-1 LOCALES_MAPPED+= en_US.UTF-8 en_IE.ISO8859-15 LOCALES_MAPPED+= en_US.UTF-8 en_IE.ISO8859-1 LOCALES_MAPPED+= en_US.UTF-8 en_HK.ISO8859-1 LOCALES_MAPPED+= en_US.UTF-8 en_GB.US-ASCII LOCALES_MAPPED+= en_US.UTF-8 en_GB.ISO8859-15 LOCALES_MAPPED+= en_US.UTF-8 en_GB.ISO8859-1 LOCALES_MAPPED+= en_US.UTF-8 en_CA.US-ASCII LOCALES_MAPPED+= en_US.UTF-8 en_CA.ISO8859-15 LOCALES_MAPPED+= en_US.UTF-8 en_CA.ISO8859-1 LOCALES_MAPPED+= en_US.UTF-8 en_AU.US-ASCII LOCALES_MAPPED+= en_US.UTF-8 en_AU.ISO8859-15 LOCALES_MAPPED+= en_US.UTF-8 en_AU.ISO8859-1 LOCALES_MAPPED+= en_US.UTF-8 de_DE.ISO8859-15 LOCALES_MAPPED+= en_US.UTF-8 de_DE.ISO8859-1 LOCALES_MAPPED+= en_US.UTF-8 de_CH.ISO8859-15 LOCALES_MAPPED+= en_US.UTF-8 de_CH.ISO8859-1 LOCALES_MAPPED+= en_US.UTF-8 de_AT.ISO8859-15 LOCALES_MAPPED+= en_US.UTF-8 de_AT.ISO8859-1 LOCALES_MAPPED+= el_GR.UTF-8 el_GR.ISO8859-7 LOCALES_MAPPED+= es_MX.UTF-8 es_MX.ISO8859-1 LOCALES_MAPPED+= es_MX.UTF-8 es_ES.ISO8859-15 LOCALES_MAPPED+= es_MX.UTF-8 es_ES.ISO8859-1 LOCALES_MAPPED+= es_MX.UTF-8 es_AR.ISO8859-1 LOCALES_MAPPED+= et_EE.UTF-8 et_EE.ISO8859-15 LOCALES_MAPPED+= et_EE.UTF-8 et_EE.ISO8859-1 LOCALES_MAPPED+= fi_FI.UTF-8 fi_FI.ISO8859-15 LOCALES_MAPPED+= fi_FI.UTF-8 fi_FI.ISO8859-1 LOCALES_MAPPED+= fr_CA.UTF-8 fr_CA.ISO8859-15 LOCALES_MAPPED+= fr_CA.UTF-8 fr_CA.ISO8859-1 LOCALES_MAPPED+= hi_IN.UTF-8 hi_IN.ISCII-DEV LOCALES_MAPPED+= sr_RS.UTF-8@latin hr_HR.UTF-8 LOCALES_MAPPED+= hy_AM.UTF-8 hy_AM.ARMSCII-8 LOCALES_MAPPED+= is_IS.UTF-8 is_IS.ISO8859-15 LOCALES_MAPPED+= is_IS.UTF-8 is_IS.ISO8859-1 LOCALES_MAPPED+= ja_JP.UTF-8 ja_JP.SJIS LOCALES_MAPPED+= ko_KR.UTF-8 ko_KR.eucKR LOCALES_MAPPED+= lt_LT.UTF-8 lt_LT.ISO8859-13 LOCALES_MAPPED+= lv_LV.UTF-8 lv_LV.ISO8859-13 LOCALES_MAPPED+= pl_PL.UTF-8 pl_PL.ISO8859-2 LOCALES_MAPPED+= ro_RO.UTF-8 ro_RO.ISO8859-2 LOCALES_MAPPED+= sl_SI.UTF-8 sl_SI.ISO8859-2 LOCALES_MAPPED+= sv_SE.UTF-8 sv_SE.ISO8859-15 LOCALES_MAPPED+= sv_SE.UTF-8 sv_SE.ISO8859-1 LOCALES_MAPPED+= sv_SE.UTF-8 sv_FI.ISO8859-15 LOCALES_MAPPED+= sv_SE.UTF-8 sv_FI.ISO8859-1 LOCALES_MAPPED+= tr_TR.UTF-8 tr_TR.ISO8859-9 LOCALES_MAPPED+= uk_UA.UTF-8 uk_UA.KOI8-U LOCALES_MAPPED+= uk_UA.UTF-8 uk_UA.ISO8859-5 LOCALES_MAPPED+= uk_UA.UTF-8 uk_UA.CP1251 LOCALES+= cs_CZ.ISO8859-2 LOCALES+= da_DK.ISO8859-1 LOCALES+= da_DK.ISO8859-15 LOCALES+= hr_HR.ISO8859-2 LOCALES+= hu_HU.ISO8859-2 LOCALES+= nb_NO.ISO8859-1 LOCALES+= nb_NO.ISO8859-15 LOCALES+= sk_SK.ISO8859-2 LOCALES+= sr_RS.ISO8859-2 LOCALES+= sr_RS.ISO8859-5 LOCALES+= zh_CN.GB2312 LOCALES+= zh_CN.eucCN LOCALES+= zh_TW.Big5 LOCALES+= zh_CN.GB18030 LOCALES+= zh_CN.GBK LOCALES+= ja_JP.eucJP LOCALES+= nn_NO.ISO8859-15 LOCALES+= nn_NO.ISO8859-1 SAME+= ar_SA.UTF-8 ar_QA.UTF-8 SAME+= ar_SA.UTF-8 ar_MA.UTF-8 SAME+= ar_SA.UTF-8 ar_JO.UTF-8 SAME+= ar_SA.UTF-8 ar_EG.UTF-8 SAME+= ar_SA.UTF-8 ar_AE.UTF-8 SAME+= ru_RU.UTF-8 mn_MN.UTF-8 SAME+= ru_RU.UTF-8 bg_BG.UTF-8 SAME+= ca_AD.UTF-8 ca_IT.UTF-8 SAME+= ca_AD.UTF-8 ca_FR.UTF-8 SAME+= ca_AD.UTF-8 ca_ES.UTF-8 SAME+= en_US.UTF-8 pt_PT.UTF-8 SAME+= en_US.UTF-8 pt_BR.UTF-8 SAME+= en_US.UTF-8 nl_NL.UTF-8 SAME+= en_US.UTF-8 nl_BE.UTF-8 SAME+= en_US.UTF-8 it_IT.UTF-8 SAME+= en_US.UTF-8 it_CH.UTF-8 SAME+= en_US.UTF-8 fr_FR.UTF-8 SAME+= en_US.UTF-8 fr_CH.UTF-8 SAME+= en_US.UTF-8 fr_BE.UTF-8 SAME+= en_US.UTF-8 eu_ES.UTF-8 SAME+= en_US.UTF-8 en_ZA.UTF-8 SAME+= en_US.UTF-8 en_SG.UTF-8 SAME+= en_US.UTF-8 en_PH.UTF-8 SAME+= en_US.UTF-8 en_NZ.UTF-8 SAME+= en_US.UTF-8 en_IE.UTF-8 SAME+= en_US.UTF-8 en_HK.UTF-8 SAME+= en_US.UTF-8 en_GB.UTF-8 SAME+= en_US.UTF-8 en_CA.UTF-8 SAME+= en_US.UTF-8 en_AU.UTF-8 SAME+= en_US.UTF-8 de_DE.UTF-8 SAME+= en_US.UTF-8 de_CH.UTF-8 SAME+= en_US.UTF-8 de_AT.UTF-8 SAME+= es_MX.UTF-8 es_ES.UTF-8 SAME+= es_MX.UTF-8 es_CR.UTF-8 SAME+= es_MX.UTF-8 es_AR.UTF-8 SAME+= nn_NO.UTF-8 nb_NO.UTF-8 SAME+= se_NO.UTF-8 se_FI.UTF-8 SAME+= sv_SE.UTF-8 sv_FI.UTF-8 SAME+= zh_TW.UTF-8 zh_HK.UTF-8 SAME+= ko_KR.eucKR ko_KR.CP949 # legacy (same charset) FILES= ${LOCALES:S/$/.LC_COLLATE/} CLEANFILES= ${FILES} .for f t in ${SAME} SYMLINKS+= ../$f/${FILESNAME} \ ${LOCALEDIR}/$t/${FILESNAME} .endfor .for f in ${LOCALES} FILESDIR_${f}.LC_COLLATE= ${LOCALEDIR}/${f} .endfor .for f t in ${LOCALES_MAPPED} FILES+= $t.LC_COLLATE FILESDIR_$t.LC_COLLATE= ${LOCALEDIR}/$t $t.LC_COLLATE: ${.CURDIR}/$f.src - localedef -D -U -i ${.ALLSRC} \ + localedef ${LOCALEDEF_ENDIAN} -D -U -i ${.ALLSRC} \ -f ${MAPLOC}/map.${.TARGET:T:R:E:C/@.*//} \ ${.OBJDIR}/${.TARGET:T:R} .endfor .include Index: head/share/ctypedef/Makefile =================================================================== --- head/share/ctypedef/Makefile (revision 339488) +++ head/share/ctypedef/Makefile (revision 339489) @@ -1,246 +1,248 @@ # $FreeBSD$ # Warning: Do not edit. This file is automatically generated from the # tools in /usr/src/tools/tools/locale. LOCALEDIR= ${SHAREDIR}/locale FILESNAME= LC_CTYPE .SUFFIXES: .src .LC_CTYPE MAPLOC= ${.CURDIR}/../../tools/tools/locale/etc/final-maps +.include + .src.LC_CTYPE: - localedef -D -U -c -w ${MAPLOC}/widths.txt \ + localedef ${LOCALEDEF_ENDIAN} -D -U -c -w ${MAPLOC}/widths.txt \ -f ${MAPLOC}/map.${.IMPSRC:T:R:E} \ -i ${.IMPSRC} ${.OBJDIR}/${.IMPSRC:T:R} || true LOCALES+= be_BY.CP1131 LOCALES+= ca_IT.ISO8859-1 LOCALES+= ca_IT.ISO8859-15 LOCALES+= el_GR.ISO8859-7 LOCALES+= en_US.ISO8859-1 LOCALES+= en_US.ISO8859-15 LOCALES+= en_US.US-ASCII LOCALES+= en_US.UTF-8 LOCALES+= hi_IN.ISCII-DEV LOCALES+= hy_AM.ARMSCII-8 LOCALES+= ja_JP.SJIS LOCALES+= ja_JP.eucJP LOCALES+= ko_KR.eucKR LOCALES+= lv_LV.ISO8859-13 LOCALES+= ru_RU.CP1251 LOCALES+= ru_RU.CP866 LOCALES+= ru_RU.ISO8859-5 LOCALES+= ru_RU.KOI8-R LOCALES+= sr_RS.ISO8859-2 LOCALES+= tr_TR.ISO8859-9 LOCALES+= uk_UA.CP1251 LOCALES+= uk_UA.ISO8859-5 LOCALES+= uk_UA.KOI8-U LOCALES+= zh_CN.GB18030 LOCALES+= zh_CN.GB2312 LOCALES+= zh_CN.GBK LOCALES+= zh_CN.eucCN LOCALES+= zh_TW.Big5 SAME+= en_US.UTF-8 ru_RU.UTF-8 SAME+= en_US.UTF-8 zh_TW.UTF-8 SAME+= en_US.UTF-8 zh_HK.UTF-8 SAME+= en_US.UTF-8 zh_CN.UTF-8 SAME+= en_US.UTF-8 uk_UA.UTF-8 SAME+= en_US.UTF-8 tr_TR.UTF-8 SAME+= en_US.UTF-8 sv_SE.UTF-8 SAME+= en_US.UTF-8 sv_FI.UTF-8 SAME+= en_US.UTF-8 sr_RS.UTF-8@latin SAME+= en_US.UTF-8 sr_RS.UTF-8 SAME+= en_US.UTF-8 sl_SI.UTF-8 SAME+= en_US.UTF-8 sk_SK.UTF-8 SAME+= en_US.UTF-8 se_NO.UTF-8 SAME+= en_US.UTF-8 se_FI.UTF-8 SAME+= en_US.UTF-8 ro_RO.UTF-8 SAME+= en_US.UTF-8 pt_PT.UTF-8 SAME+= en_US.UTF-8 pt_BR.UTF-8 SAME+= en_US.UTF-8 pl_PL.UTF-8 SAME+= en_US.UTF-8 nn_NO.UTF-8 SAME+= en_US.UTF-8 nl_NL.UTF-8 SAME+= en_US.UTF-8 nl_BE.UTF-8 SAME+= en_US.UTF-8 nb_NO.UTF-8 SAME+= en_US.UTF-8 mn_MN.UTF-8 SAME+= en_US.UTF-8 lv_LV.UTF-8 SAME+= en_US.UTF-8 lt_LT.UTF-8 SAME+= en_US.UTF-8 ko_KR.UTF-8 SAME+= en_US.UTF-8 kk_KZ.UTF-8 SAME+= en_US.UTF-8 ja_JP.UTF-8 SAME+= en_US.UTF-8 it_IT.UTF-8 SAME+= en_US.UTF-8 it_CH.UTF-8 SAME+= en_US.UTF-8 is_IS.UTF-8 SAME+= en_US.UTF-8 hy_AM.UTF-8 SAME+= en_US.UTF-8 hu_HU.UTF-8 SAME+= en_US.UTF-8 hr_HR.UTF-8 SAME+= en_US.UTF-8 hi_IN.UTF-8 SAME+= en_US.UTF-8 he_IL.UTF-8 SAME+= en_US.UTF-8 fr_FR.UTF-8 SAME+= en_US.UTF-8 fr_CH.UTF-8 SAME+= en_US.UTF-8 fr_CA.UTF-8 SAME+= en_US.UTF-8 fr_BE.UTF-8 SAME+= en_US.UTF-8 fi_FI.UTF-8 SAME+= en_US.UTF-8 eu_ES.UTF-8 SAME+= en_US.UTF-8 et_EE.UTF-8 SAME+= en_US.UTF-8 es_MX.UTF-8 SAME+= en_US.UTF-8 es_ES.UTF-8 SAME+= en_US.UTF-8 es_CR.UTF-8 SAME+= en_US.UTF-8 es_AR.UTF-8 SAME+= en_US.UTF-8 en_ZA.UTF-8 SAME+= en_US.UTF-8 en_SG.UTF-8 SAME+= en_US.UTF-8 en_PH.UTF-8 SAME+= en_US.UTF-8 en_NZ.UTF-8 SAME+= en_US.UTF-8 en_IE.UTF-8 SAME+= en_US.UTF-8 en_HK.UTF-8 SAME+= en_US.UTF-8 en_GB.UTF-8 SAME+= en_US.UTF-8 en_CA.UTF-8 SAME+= en_US.UTF-8 en_AU.UTF-8 SAME+= en_US.UTF-8 el_GR.UTF-8 SAME+= en_US.UTF-8 de_DE.UTF-8 SAME+= en_US.UTF-8 de_CH.UTF-8 SAME+= en_US.UTF-8 de_AT.UTF-8 SAME+= en_US.UTF-8 da_DK.UTF-8 SAME+= en_US.UTF-8 cs_CZ.UTF-8 SAME+= en_US.UTF-8 ca_IT.UTF-8 SAME+= en_US.UTF-8 ca_FR.UTF-8 SAME+= en_US.UTF-8 ca_ES.UTF-8 SAME+= en_US.UTF-8 ca_AD.UTF-8 SAME+= en_US.UTF-8 bg_BG.UTF-8 SAME+= en_US.UTF-8 be_BY.UTF-8 SAME+= en_US.UTF-8 ar_SA.UTF-8 SAME+= en_US.UTF-8 ar_QA.UTF-8 SAME+= en_US.UTF-8 ar_MA.UTF-8 SAME+= en_US.UTF-8 ar_JO.UTF-8 SAME+= en_US.UTF-8 ar_EG.UTF-8 SAME+= en_US.UTF-8 ar_AE.UTF-8 SAME+= en_US.UTF-8 am_ET.UTF-8 SAME+= en_US.UTF-8 af_ZA.UTF-8 SAME+= en_US.ISO8859-1 sv_SE.ISO8859-1 SAME+= en_US.ISO8859-1 sv_FI.ISO8859-1 SAME+= en_US.ISO8859-1 pt_PT.ISO8859-1 SAME+= en_US.ISO8859-1 pt_BR.ISO8859-1 SAME+= en_US.ISO8859-1 nn_NO.ISO8859-1 SAME+= en_US.ISO8859-1 nl_NL.ISO8859-1 SAME+= en_US.ISO8859-1 nl_BE.ISO8859-1 SAME+= en_US.ISO8859-1 nb_NO.ISO8859-1 SAME+= en_US.ISO8859-1 it_IT.ISO8859-1 SAME+= en_US.ISO8859-1 it_CH.ISO8859-1 SAME+= en_US.ISO8859-1 is_IS.ISO8859-1 SAME+= en_US.ISO8859-1 fr_FR.ISO8859-1 SAME+= en_US.ISO8859-1 fr_CH.ISO8859-1 SAME+= en_US.ISO8859-1 fr_CA.ISO8859-1 SAME+= en_US.ISO8859-1 fr_BE.ISO8859-1 SAME+= en_US.ISO8859-1 fi_FI.ISO8859-1 SAME+= en_US.ISO8859-1 eu_ES.ISO8859-1 SAME+= en_US.ISO8859-1 et_EE.ISO8859-1 SAME+= en_US.ISO8859-1 es_MX.ISO8859-1 SAME+= en_US.ISO8859-1 es_ES.ISO8859-1 SAME+= en_US.ISO8859-1 es_AR.ISO8859-1 SAME+= en_US.ISO8859-1 en_ZA.ISO8859-1 SAME+= en_US.ISO8859-1 en_SG.ISO8859-1 SAME+= en_US.ISO8859-1 en_NZ.ISO8859-1 SAME+= en_US.ISO8859-1 en_IE.ISO8859-1 SAME+= en_US.ISO8859-1 en_HK.ISO8859-1 SAME+= en_US.ISO8859-1 en_GB.ISO8859-1 SAME+= en_US.ISO8859-1 en_CA.ISO8859-1 SAME+= en_US.ISO8859-1 en_AU.ISO8859-1 SAME+= en_US.ISO8859-1 de_DE.ISO8859-1 SAME+= en_US.ISO8859-1 de_CH.ISO8859-1 SAME+= en_US.ISO8859-1 de_AT.ISO8859-1 SAME+= en_US.ISO8859-1 da_DK.ISO8859-1 SAME+= en_US.ISO8859-1 af_ZA.ISO8859-1 SAME+= en_US.ISO8859-15 en_GB.ISO8859-15 SAME+= en_US.ISO8859-15 sv_SE.ISO8859-15 SAME+= en_US.ISO8859-15 sv_FI.ISO8859-15 SAME+= en_US.ISO8859-15 pt_PT.ISO8859-15 SAME+= en_US.ISO8859-15 nn_NO.ISO8859-15 SAME+= en_US.ISO8859-15 nl_NL.ISO8859-15 SAME+= en_US.ISO8859-15 nl_BE.ISO8859-15 SAME+= en_US.ISO8859-15 nb_NO.ISO8859-15 SAME+= en_US.ISO8859-15 it_IT.ISO8859-15 SAME+= en_US.ISO8859-15 it_CH.ISO8859-15 SAME+= en_US.ISO8859-15 is_IS.ISO8859-15 SAME+= en_US.ISO8859-15 fr_FR.ISO8859-15 SAME+= en_US.ISO8859-15 fr_CH.ISO8859-15 SAME+= en_US.ISO8859-15 fr_CA.ISO8859-15 SAME+= en_US.ISO8859-15 fr_BE.ISO8859-15 SAME+= en_US.ISO8859-15 fi_FI.ISO8859-15 SAME+= en_US.ISO8859-15 eu_ES.ISO8859-15 SAME+= en_US.ISO8859-15 et_EE.ISO8859-15 SAME+= en_US.ISO8859-15 es_ES.ISO8859-15 SAME+= en_US.ISO8859-15 en_ZA.ISO8859-15 SAME+= en_US.ISO8859-15 en_NZ.ISO8859-15 SAME+= en_US.ISO8859-15 en_IE.ISO8859-15 SAME+= en_US.ISO8859-15 en_CA.ISO8859-15 SAME+= en_US.ISO8859-15 en_AU.ISO8859-15 SAME+= en_US.ISO8859-15 de_DE.ISO8859-15 SAME+= en_US.ISO8859-15 de_CH.ISO8859-15 SAME+= en_US.ISO8859-15 de_AT.ISO8859-15 SAME+= en_US.ISO8859-15 da_DK.ISO8859-15 SAME+= en_US.ISO8859-15 af_ZA.ISO8859-15 SAME+= ru_RU.CP1251 bg_BG.CP1251 SAME+= ru_RU.CP1251 be_BY.CP1251 SAME+= ru_RU.ISO8859-5 sr_RS.ISO8859-5 SAME+= ru_RU.ISO8859-5 be_BY.ISO8859-5 SAME+= ca_IT.ISO8859-1 ca_FR.ISO8859-1 SAME+= ca_IT.ISO8859-1 ca_ES.ISO8859-1 SAME+= ca_IT.ISO8859-1 ca_AD.ISO8859-1 SAME+= ca_IT.ISO8859-15 ca_FR.ISO8859-15 SAME+= ca_IT.ISO8859-15 ca_ES.ISO8859-15 SAME+= ca_IT.ISO8859-15 ca_AD.ISO8859-15 SAME+= sr_RS.ISO8859-2 sl_SI.ISO8859-2 SAME+= sr_RS.ISO8859-2 sk_SK.ISO8859-2 SAME+= sr_RS.ISO8859-2 ro_RO.ISO8859-2 SAME+= sr_RS.ISO8859-2 pl_PL.ISO8859-2 SAME+= sr_RS.ISO8859-2 hu_HU.ISO8859-2 SAME+= sr_RS.ISO8859-2 hr_HR.ISO8859-2 SAME+= sr_RS.ISO8859-2 cs_CZ.ISO8859-2 SAME+= en_US.US-ASCII en_ZA.US-ASCII SAME+= en_US.US-ASCII en_NZ.US-ASCII SAME+= en_US.US-ASCII en_GB.US-ASCII SAME+= en_US.US-ASCII en_CA.US-ASCII SAME+= en_US.US-ASCII en_AU.US-ASCII SAME+= lv_LV.ISO8859-13 lt_LT.ISO8859-13 SAME+= ko_KR.eucKR ko_KR.CP949 # legacy (same charset) FILES= ${LOCALES:S/$/.LC_CTYPE/} CLEANFILES= ${FILES} .for f t in ${SAME} SYMLINKS+= ../$f/${FILESNAME} \ ${LOCALEDIR}/$t/${FILESNAME} .endfor .for f in ${LOCALES} FILESDIR_${f}.LC_CTYPE= ${LOCALEDIR}/${f} .endfor SYMPAIRS+= ja_JP.eucJP.src ja_JP.SJIS.src SYMPAIRS+= en_US.ISO8859-1.src en_US.ISO8859-15.src SYMPAIRS+= en_US.ISO8859-1.src en_US.US-ASCII.src SYMPAIRS+= en_US.ISO8859-1.src lv_LV.ISO8859-13.src SYMPAIRS+= en_US.ISO8859-1.src sr_RS.ISO8859-2.src SYMPAIRS+= en_US.ISO8859-1.src tr_TR.ISO8859-9.src SYMPAIRS+= zh_CN.eucCN.src zh_CN.GB18030.src SYMPAIRS+= zh_CN.eucCN.src zh_CN.GB2312.src SYMPAIRS+= zh_CN.eucCN.src zh_CN.GBK.src SYMPAIRS+= uk_UA.CP1251.src uk_UA.ISO8859-5.src SYMPAIRS+= uk_UA.CP1251.src uk_UA.KOI8-U.src SYMPAIRS+= ca_IT.ISO8859-1.src ca_IT.ISO8859-15.src SYMPAIRS+= be_BY.CP1131.src ru_RU.CP1251.src SYMPAIRS+= be_BY.CP1131.src ru_RU.CP866.src SYMPAIRS+= be_BY.CP1131.src ru_RU.ISO8859-5.src SYMPAIRS+= be_BY.CP1131.src ru_RU.KOI8-R.src .for s t in ${SYMPAIRS} ${t:S/src$/LC_CTYPE/}: $s - localedef -D -U -c -w ${MAPLOC}/widths.txt \ + localedef ${LOCALEDEF_ENDIAN} -D -U -c -w ${MAPLOC}/widths.txt \ -f ${MAPLOC}/map.${.TARGET:T:R:C/^.*\.//} \ -i ${.ALLSRC} ${.OBJDIR}/${.TARGET:T:R} || true .endfor .include Index: head/share/mk/bsd.endian.mk =================================================================== --- head/share/mk/bsd.endian.mk (revision 339488) +++ head/share/mk/bsd.endian.mk (revision 339489) @@ -1,19 +1,21 @@ # $FreeBSD$ .if ${MACHINE_ARCH} == "aarch64" || \ ${MACHINE_ARCH} == "amd64" || \ ${MACHINE_ARCH} == "i386" || \ (${MACHINE} == "arm" && ${MACHINE_ARCH:Marm*eb*} == "") || \ ${MACHINE_CPUARCH} == "riscv" || \ ${MACHINE_ARCH:Mmips*el*} != "" TARGET_ENDIANNESS= 1234 CAP_MKDB_ENDIAN= -l +LOCALEDEF_ENDIAN= -l .elif ${MACHINE_ARCH} == "powerpc" || \ ${MACHINE_ARCH} == "powerpc64" || \ ${MACHINE_ARCH} == "powerpcspe" || \ ${MACHINE_ARCH} == "sparc64" || \ (${MACHINE} == "arm" && ${MACHINE_ARCH:Marm*eb*} != "") || \ ${MACHINE_ARCH:Mmips*} != "" TARGET_ENDIANNESS= 4321 CAP_MKDB_ENDIAN= -b +LOCALEDEF_ENDIAN= -b .endif Index: head/targets/pseudo/userland/Makefile.depend =================================================================== --- head/targets/pseudo/userland/Makefile.depend (revision 339488) +++ head/targets/pseudo/userland/Makefile.depend (revision 339489) @@ -1,880 +1,881 @@ # $FreeBSD$ # This file is not autogenerated - take care! .include DIRDEPS= DIRDEPS+= \ bin/cat \ bin/chflags \ bin/chio \ bin/chmod \ bin/cp \ bin/csh \ bin/date \ bin/dd \ bin/df \ bin/domainname \ bin/echo \ bin/ed \ bin/expr \ bin/freebsd-version \ bin/getfacl \ bin/hostname \ bin/kenv \ bin/kill \ bin/ln \ bin/ls \ bin/mkdir \ bin/mv \ bin/pax \ bin/pkill \ bin/ps \ bin/pwait \ bin/pwd \ bin/rcp \ bin/realpath \ bin/rm \ bin/rmail \ bin/rmdir \ bin/setfacl \ bin/sh \ bin/sleep \ bin/stty \ bin/sync \ bin/test \ bin/uuidgen \ sbin/adjkerntz \ sbin/atacontrol \ sbin/atm/atmconfig \ sbin/camcontrol \ sbin/ccdconfig \ sbin/clri \ sbin/comcontrol \ sbin/conscontrol \ sbin/decryptcore \ sbin/ddb \ sbin/devd \ sbin/devfs \ sbin/dhclient \ sbin/dmesg \ sbin/dump \ sbin/dumpfs \ sbin/dumpon \ sbin/etherswitchcfg \ sbin/ffsinfo \ sbin/fsck \ sbin/fsck_ffs \ sbin/fsck_msdosfs \ sbin/fsdb \ sbin/fsirand \ sbin/gbde \ sbin/geom/core \ sbin/ggate/ggatec \ sbin/ggate/ggated \ sbin/ggate/ggatel \ sbin/growfs \ sbin/gvinum \ sbin/hastctl \ sbin/hastd \ sbin/ifconfig \ sbin/init \ sbin/ipf/ipf \ sbin/ipf/ipfs \ sbin/ipf/ipfstat \ sbin/ipf/ipmon \ sbin/ipf/ipnat \ sbin/ipf/ippool \ sbin/ipf/libipf \ sbin/ipfw \ sbin/iscontrol \ sbin/kldconfig \ sbin/kldload \ sbin/kldstat \ sbin/kldunload \ sbin/ldconfig \ sbin/md5 \ sbin/mdconfig \ sbin/mdmfs \ sbin/mknod \ sbin/mksnap_ffs \ sbin/mount \ sbin/mount_cd9660 \ sbin/mount_fusefs \ sbin/mount_msdosfs \ sbin/mount_nfs \ sbin/mount_nullfs \ sbin/mount_udf \ sbin/mount_unionfs \ sbin/natd \ sbin/newfs \ sbin/newfs_msdos \ sbin/nfsiod \ sbin/nos-tun \ sbin/pfctl \ sbin/pflogd \ sbin/ping \ sbin/ping6 \ sbin/quotacheck \ sbin/rcorder \ sbin/reboot \ sbin/recoverdisk \ sbin/resolvconf \ sbin/restore \ sbin/route \ sbin/routed \ sbin/routed/rtquery \ sbin/rtsol \ sbin/savecore \ sbin/setkey \ sbin/shutdown \ sbin/spppcontrol \ sbin/swapon \ sbin/sysctl \ sbin/tunefs \ sbin/umount \ usr.bin/alias \ usr.bin/apply \ usr.bin/ar \ usr.bin/asa \ usr.bin/at \ usr.bin/atf/atf-config \ usr.bin/atf/atf-report \ usr.bin/atf/atf-run \ usr.bin/atf/atf-version \ usr.bin/atm/sscop \ usr.bin/awk \ usr.bin/banner \ usr.bin/basename \ usr.bin/bc \ usr.bin/biff \ usr.bin/bluetooth/bthost \ usr.bin/bluetooth/btsockstat \ usr.bin/bluetooth/rfcomm_sppd \ usr.bin/bmake \ usr.bin/brandelf \ usr.bin/bsdiff/bsdiff \ usr.bin/bsdiff/bspatch \ usr.bin/bsdcat \ usr.bin/bzip2 \ usr.bin/bzip2recover \ usr.bin/c89 \ usr.bin/c99 \ usr.bin/calendar \ usr.bin/cap_mkdb \ usr.bin/chat \ usr.bin/checknr \ usr.bin/chkey \ usr.bin/chpass \ usr.bin/cksum \ usr.bin/cmp \ usr.bin/col \ usr.bin/colcrt \ usr.bin/colldef \ usr.bin/colrm \ usr.bin/column \ usr.bin/comm \ usr.bin/compile_et \ usr.bin/compress \ usr.bin/cpio \ usr.bin/cpuset \ usr.bin/csplit \ usr.bin/ctags \ usr.bin/ctlstat \ usr.bin/cut \ usr.bin/dc \ usr.bin/dig \ usr.bin/dirname \ usr.bin/dpv \ usr.bin/drill \ usr.bin/du \ usr.bin/ee \ usr.bin/elf2aout \ usr.bin/elfdump \ usr.bin/enigma \ usr.bin/env \ usr.bin/expand \ usr.bin/false \ usr.bin/fetch \ usr.bin/file \ usr.bin/file2c \ usr.bin/find \ usr.bin/finger \ usr.bin/fmt \ usr.bin/fold \ usr.bin/from \ usr.bin/fstat \ usr.bin/fsync \ usr.bin/ftp \ usr.bin/gcore \ usr.bin/gencat \ usr.bin/getaddrinfo \ usr.bin/getconf \ usr.bin/getent \ usr.bin/getopt \ usr.bin/gprof \ usr.bin/grep \ usr.bin/gzip \ usr.bin/head \ usr.bin/hexdump \ usr.bin/host \ usr.bin/iconv \ usr.bin/id \ usr.bin/ident \ usr.bin/indent \ usr.bin/ipcrm \ usr.bin/ipcs \ usr.bin/iscsictl \ usr.bin/join \ usr.bin/jot \ usr.bin/kdump \ usr.bin/keylogin \ usr.bin/keylogout \ usr.bin/killall \ usr.bin/ktrace \ usr.bin/ktrdump \ usr.bin/lam \ usr.bin/last \ usr.bin/lastcomm \ usr.bin/lex \ usr.bin/ldd \ usr.bin/leave \ usr.bin/less \ usr.bin/lessecho \ usr.bin/lesskey \ usr.bin/lex/lib \ usr.bin/limits \ usr.bin/locale \ + usr.bin/localedef \ usr.bin/locate/bigram \ usr.bin/locate/code \ usr.bin/locate/locate \ usr.bin/lock \ usr.bin/lockf \ usr.bin/logger \ usr.bin/login \ usr.bin/logins \ usr.bin/logname \ usr.bin/look \ usr.bin/lorder \ usr.bin/lsvfs \ usr.bin/lzmainfo \ usr.bin/m4 \ usr.bin/mail \ usr.bin/man \ usr.bin/mandoc \ usr.bin/mesg \ usr.bin/minigzip \ usr.bin/ministat \ usr.bin/mkcsmapper \ usr.bin/mkdep \ usr.bin/mkesdb \ usr.bin/mkfifo \ usr.bin/mkimg \ usr.bin/mklocale \ usr.bin/mkstr \ usr.bin/mktemp \ usr.bin/mkuzip \ usr.bin/msgs \ usr.bin/mt \ usr.bin/nc \ usr.bin/ncal \ usr.bin/netstat \ usr.bin/newgrp \ usr.bin/newkey \ usr.bin/nfsstat \ usr.bin/nice \ usr.bin/nl \ usr.bin/nohup \ usr.bin/opieinfo \ usr.bin/opiekey \ usr.bin/opiepasswd \ usr.bin/pagesize \ usr.bin/passwd \ usr.bin/paste \ usr.bin/patch \ usr.bin/pathchk \ usr.bin/perror \ usr.bin/pr \ usr.bin/printenv \ usr.bin/printf \ usr.bin/proccontrol \ usr.bin/procstat \ usr.bin/protect \ usr.bin/quota \ usr.bin/rctl \ usr.bin/renice \ usr.bin/resizewin \ usr.bin/rev \ usr.bin/revoke \ usr.bin/rlogin \ usr.bin/rpcgen \ usr.bin/rpcinfo \ usr.bin/rs \ usr.bin/rsh \ usr.bin/rup \ usr.bin/ruptime \ usr.bin/rusers \ usr.bin/rwall \ usr.bin/rwho \ usr.bin/script \ usr.bin/sdiff \ usr.bin/sed \ usr.bin/seq \ usr.bin/shar \ usr.bin/showmount \ usr.bin/smbutil \ usr.bin/sockstat \ usr.bin/soelim \ usr.bin/sort \ usr.bin/split \ usr.bin/ssh-copy-id \ usr.bin/stat \ usr.bin/stdbuf \ usr.bin/su \ usr.bin/svn/svn \ usr.bin/svn/svnadmin \ usr.bin/svn/svnbench \ usr.bin/svn/svndumpfilter \ usr.bin/svn/svnfsfs \ usr.bin/svn/svnlook \ usr.bin/svn/svnmucc \ usr.bin/svn/svnrdump \ usr.bin/svn/svnserve \ usr.bin/svn/svnsync \ usr.bin/svn/svnversion \ usr.bin/systat \ usr.bin/tabs \ usr.bin/tail \ usr.bin/talk \ usr.bin/tar \ usr.bin/tcopy \ usr.bin/tee \ usr.bin/telnet \ usr.bin/tftp \ usr.bin/time \ usr.bin/timeout \ usr.bin/tip/tip \ usr.bin/top \ usr.bin/touch \ usr.bin/tput \ usr.bin/tr \ usr.bin/true \ usr.bin/truncate \ usr.bin/truss \ usr.bin/tset \ usr.bin/tsort \ usr.bin/tty \ usr.bin/ul \ usr.bin/uname \ usr.bin/unexpand \ usr.bin/unifdef \ usr.bin/uniq \ usr.bin/units \ usr.bin/unvis \ usr.bin/unzip \ usr.bin/usbhidaction \ usr.bin/usbhidctl \ usr.bin/users \ usr.bin/uudecode \ usr.bin/uuencode \ usr.bin/vacation \ usr.bin/vgrind \ usr.bin/vi \ usr.bin/vi/catalog \ usr.bin/vis \ usr.bin/vmstat \ usr.bin/vtfontcvt \ usr.bin/w \ usr.bin/wall \ usr.bin/wc \ usr.bin/what \ usr.bin/whereis \ usr.bin/which \ usr.bin/who \ usr.bin/whois \ usr.bin/write \ usr.bin/xargs \ usr.bin/xinstall \ usr.bin/xo \ usr.bin/xstr \ usr.bin/xz \ usr.bin/xzdec \ usr.bin/yacc \ usr.bin/yes \ usr.bin/ypcat \ usr.bin/ypmatch \ usr.bin/ypwhich \ usr.bin/zstd \ usr.sbin/IPXrouted \ usr.sbin/ac \ usr.sbin/accton \ usr.sbin/adduser \ usr.sbin/amd/amd \ usr.sbin/amd/amq \ usr.sbin/amd/doc \ usr.sbin/amd/fixmount \ usr.sbin/amd/fsinfo \ usr.sbin/amd/hlfsd \ usr.sbin/amd/include \ usr.sbin/amd/libamu \ usr.sbin/amd/mk-amd-map \ usr.sbin/amd/pawd \ usr.sbin/amd/scripts \ usr.sbin/amd/wire-test \ usr.sbin/ancontrol \ usr.sbin/apm \ usr.sbin/arp \ usr.sbin/arpaname \ usr.sbin/audit \ usr.sbin/auditd \ usr.sbin/auditdistd \ usr.sbin/auditreduce \ usr.sbin/authpf \ usr.sbin/autofs \ usr.sbin/binmiscctl \ usr.sbin/bluetooth/ath3kfw \ usr.sbin/bluetooth/bcmfw \ usr.sbin/bluetooth/bt3cfw \ usr.sbin/bluetooth/bthidcontrol \ usr.sbin/bluetooth/bthidd \ usr.sbin/bluetooth/btpand \ usr.sbin/bluetooth/hccontrol \ usr.sbin/bluetooth/hcsecd \ usr.sbin/bluetooth/hcseriald \ usr.sbin/bluetooth/l2control \ usr.sbin/bluetooth/l2ping \ usr.sbin/bluetooth/rfcomm_pppd \ usr.sbin/bluetooth/sdpcontrol \ usr.sbin/bluetooth/sdpd \ usr.sbin/bootparamd/bootparamd \ usr.sbin/bootparamd/callbootd \ usr.sbin/bsdconfig \ usr.sbin/bsdconfig/console \ usr.sbin/bsdconfig/console/include \ usr.sbin/bsdconfig/diskmgmt \ usr.sbin/bsdconfig/diskmgmt/include \ usr.sbin/bsdconfig/docsinstall \ usr.sbin/bsdconfig/docsinstall/include \ usr.sbin/bsdconfig/dot \ usr.sbin/bsdconfig/dot/include \ usr.sbin/bsdconfig/examples \ usr.sbin/bsdconfig/include \ usr.sbin/bsdconfig/includes \ usr.sbin/bsdconfig/includes/include \ usr.sbin/bsdconfig/mouse \ usr.sbin/bsdconfig/mouse/include \ usr.sbin/bsdconfig/networking \ usr.sbin/bsdconfig/networking/include \ usr.sbin/bsdconfig/networking/share \ usr.sbin/bsdconfig/packages \ usr.sbin/bsdconfig/packages/include \ usr.sbin/bsdconfig/password \ usr.sbin/bsdconfig/password/include \ usr.sbin/bsdconfig/password/share \ usr.sbin/bsdconfig/security \ usr.sbin/bsdconfig/security/include \ usr.sbin/bsdconfig/share \ usr.sbin/bsdconfig/share/media \ usr.sbin/bsdconfig/share/packages \ usr.sbin/bsdconfig/startup \ usr.sbin/bsdconfig/startup/include \ usr.sbin/bsdconfig/startup/share \ usr.sbin/bsdconfig/timezone \ usr.sbin/bsdconfig/timezone/include \ usr.sbin/bsdconfig/timezone/share \ usr.sbin/bsdconfig/ttys \ usr.sbin/bsdconfig/ttys/include \ usr.sbin/bsdconfig/usermgmt \ usr.sbin/bsdconfig/usermgmt/include \ usr.sbin/bsdconfig/usermgmt/share \ usr.sbin/bsdinstall/distextract \ usr.sbin/bsdinstall/distfetch \ usr.sbin/bsdinstall/partedit \ usr.sbin/bsdinstall/scripts \ usr.sbin/bsnmpd/bsnmpd \ usr.sbin/bsnmpd/gensnmptree \ usr.sbin/bsnmpd/modules/snmp_atm \ usr.sbin/bsnmpd/modules/snmp_bridge \ usr.sbin/bsnmpd/modules/snmp_hast \ usr.sbin/bsnmpd/modules/snmp_hostres \ usr.sbin/bsnmpd/modules/snmp_lm75 \ usr.sbin/bsnmpd/modules/snmp_mibII \ usr.sbin/bsnmpd/modules/snmp_netgraph \ usr.sbin/bsnmpd/modules/snmp_pf \ usr.sbin/bsnmpd/modules/snmp_target \ usr.sbin/bsnmpd/modules/snmp_usm \ usr.sbin/bsnmpd/modules/snmp_vacm \ usr.sbin/bsnmpd/modules/snmp_wlan \ usr.sbin/bsnmpd/tools/bsnmptools \ usr.sbin/bsnmpd/tools/libbsnmptools \ usr.sbin/cdcontrol \ usr.sbin/chkgrp \ usr.sbin/chown \ usr.sbin/chroot \ usr.sbin/ckdist \ usr.sbin/clear_locks \ usr.sbin/config \ usr.sbin/crashinfo \ usr.sbin/cron/cron \ usr.sbin/cron/crontab \ usr.sbin/cron/lib \ usr.sbin/crunch/crunchgen \ usr.sbin/crunch/crunchide \ usr.sbin/ctladm \ usr.sbin/ctld \ usr.sbin/ctm/ctm \ usr.sbin/ctm/ctm_dequeue \ usr.sbin/ctm/ctm_rmail \ usr.sbin/ctm/ctm_smail \ usr.sbin/daemon \ usr.sbin/dconschat \ usr.sbin/ddns-confgen \ usr.sbin/devctl \ usr.sbin/devinfo \ usr.sbin/digictl \ usr.sbin/diskinfo \ usr.sbin/dnssec-dsfromkey \ usr.sbin/dnssec-keyfromlabel \ usr.sbin/dnssec-keygen \ usr.sbin/dnssec-revoke \ usr.sbin/dnssec-settime \ usr.sbin/dnssec-signzone \ usr.sbin/dumpcis \ usr.sbin/editmap \ usr.sbin/edquota \ usr.sbin/etcupdate \ usr.sbin/extattr \ usr.sbin/extattrctl \ usr.sbin/fdcontrol \ usr.sbin/fdformat \ usr.sbin/fdread \ usr.sbin/fdwrite \ usr.sbin/fifolog/fifolog_create \ usr.sbin/fifolog/fifolog_reader \ usr.sbin/fifolog/fifolog_writer \ usr.sbin/fifolog/lib \ usr.sbin/flowctl \ usr.sbin/fmtree \ usr.sbin/freebsd-update \ usr.sbin/fstyp \ usr.sbin/ftp-proxy \ usr.sbin/fwcontrol \ usr.sbin/genrandom \ usr.sbin/getfmac \ usr.sbin/getpmac \ usr.sbin/gpioctl \ usr.sbin/gssd \ usr.sbin/gstat \ usr.sbin/i2c \ usr.sbin/ifmcstat \ usr.sbin/inetd \ usr.sbin/iostat \ usr.sbin/iovctl \ usr.sbin/ip6addrctl \ usr.sbin/ipfwpcap \ usr.sbin/isc-hmac-fixup \ usr.sbin/iscsid \ usr.sbin/isfctl \ usr.sbin/jail \ usr.sbin/jexec \ usr.sbin/jls \ usr.sbin/kbdcontrol \ usr.sbin/kbdmap \ usr.sbin/keyserv \ usr.sbin/kldxref \ usr.sbin/lastlogin \ usr.sbin/lmcconfig \ usr.sbin/lpr/chkprintcap \ usr.sbin/lpr/common_source \ usr.sbin/lpr/filters \ usr.sbin/lpr/filters.ru/koi2855 \ usr.sbin/lpr/filters.ru/koi2alt \ usr.sbin/lpr/lp \ usr.sbin/lpr/lpc \ usr.sbin/lpr/lpd \ usr.sbin/lpr/lpq \ usr.sbin/lpr/lpr \ usr.sbin/lpr/lprm \ usr.sbin/lpr/lptest \ usr.sbin/lpr/pac \ usr.sbin/mailstats \ usr.sbin/mailwrapper \ usr.sbin/makefs \ usr.sbin/makemap \ usr.sbin/manctl \ usr.sbin/memcontrol \ usr.sbin/mergemaster \ usr.sbin/mfiutil \ usr.sbin/mixer \ usr.sbin/mld6query \ usr.sbin/mlxcontrol \ usr.sbin/mount_smbfs \ usr.sbin/mountd \ usr.sbin/moused \ usr.sbin/mpsutil \ usr.sbin/mptutil \ usr.sbin/mtest \ usr.sbin/named \ usr.sbin/named-checkconf \ usr.sbin/named-checkzone \ usr.sbin/named-journalprint \ usr.sbin/ndp \ usr.sbin/newsyslog \ usr.sbin/nfscbd \ usr.sbin/nfsd \ usr.sbin/nfsdumpstate \ usr.sbin/nfsrevoke \ usr.sbin/nfsuserd \ usr.sbin/ngctl \ usr.sbin/nghook \ usr.sbin/nmtree \ usr.sbin/nologin \ usr.sbin/nscd \ usr.sbin/nsec3hash \ usr.sbin/ntp/doc \ usr.sbin/ntp/doc/drivers/icons \ usr.sbin/ntp/doc/drivers/scripts \ usr.sbin/ntp/doc/drivers \ usr.sbin/ntp/doc/hints \ usr.sbin/ntp/doc/icons \ usr.sbin/ntp/doc/pic \ usr.sbin/ntp/doc/scripts \ usr.sbin/ntp/libntp \ usr.sbin/ntp/libopts \ usr.sbin/ntp/libparse \ usr.sbin/ntp/ntp-keygen \ usr.sbin/ntp/ntpd \ usr.sbin/ntp/ntpdate \ usr.sbin/ntp/ntpdc \ usr.sbin/ntp/ntpq \ usr.sbin/ntp/ntptime \ usr.sbin/ntp/sntp \ usr.sbin/pc-sysinstall/backend \ usr.sbin/pc-sysinstall/backend-partmanager \ usr.sbin/pc-sysinstall/backend-query \ usr.sbin/pc-sysinstall/conf \ usr.sbin/pc-sysinstall/doc \ usr.sbin/pc-sysinstall/examples \ usr.sbin/pc-sysinstall/pc-sysinstall \ usr.sbin/pciconf \ usr.sbin/periodic \ usr.sbin/pkg \ usr.sbin/pmcannotate \ usr.sbin/pmccontrol \ usr.sbin/pmcstat \ usr.sbin/pmcstudy \ usr.sbin/portsnap/make_index \ usr.sbin/portsnap/phttpget \ usr.sbin/portsnap/portsnap \ usr.sbin/powerd \ usr.sbin/ppp \ usr.sbin/pppctl \ usr.sbin/praliases \ usr.sbin/praudit \ usr.sbin/prometheus_sysctl_exporter \ usr.sbin/pstat \ usr.sbin/pw \ usr.sbin/pwd_mkdb \ usr.sbin/quot \ usr.sbin/quotaon \ usr.sbin/rarpd \ usr.sbin/repquota \ usr.sbin/rip6query \ usr.sbin/rmt \ usr.sbin/rndc \ usr.sbin/rndc-confgen \ usr.sbin/route6d \ usr.sbin/rpc.lockd \ usr.sbin/rpc.statd \ usr.sbin/rpc.umntall \ usr.sbin/rpc.yppasswdd \ usr.sbin/rpc.ypupdated \ usr.sbin/rpc.ypxfrd \ usr.sbin/rpcbind \ usr.sbin/rrenumd \ usr.sbin/rtadvctl \ usr.sbin/rtadvd \ usr.sbin/rtprio \ usr.sbin/rtsold \ usr.sbin/rwhod \ usr.sbin/sa \ usr.sbin/sendmail \ usr.sbin/service \ usr.sbin/services_mkdb \ usr.sbin/sesutil \ usr.sbin/setfib \ usr.sbin/setfmac \ usr.sbin/setpmac \ usr.sbin/smbmsg \ usr.sbin/snapinfo \ usr.sbin/spray \ usr.sbin/syslogd \ usr.sbin/sysrc \ usr.sbin/tcpdchk \ usr.sbin/tcpdmatch \ usr.sbin/tcpdrop \ usr.sbin/tcpdump/tcpdump \ usr.sbin/timed/timed \ usr.sbin/timed/timedc \ usr.sbin/traceroute \ usr.sbin/traceroute6 \ usr.sbin/trpt \ usr.sbin/tzsetup \ usr.sbin/uathload \ usr.sbin/uefisign \ usr.sbin/ugidfw \ usr.sbin/uhsoctl \ usr.sbin/unbound/anchor \ usr.sbin/unbound/checkconf \ usr.sbin/unbound/control \ usr.sbin/unbound/daemon \ usr.sbin/unbound/local-setup \ usr.sbin/usbconfig \ usr.sbin/usbdump \ usr.sbin/utx \ usr.sbin/vidcontrol \ usr.sbin/vigr \ usr.sbin/vipw \ usr.sbin/wake \ usr.sbin/watch \ usr.sbin/watchdogd \ usr.sbin/wlandebug \ usr.sbin/wpa/hostapd \ usr.sbin/wpa/hostapd_cli \ usr.sbin/wpa/ndis_events \ usr.sbin/wpa/wpa_cli \ usr.sbin/wpa/wpa_passphrase \ usr.sbin/wpa/wpa_supplicant \ usr.sbin/yp_mkdb \ usr.sbin/ypbind \ usr.sbin/ypldap \ usr.sbin/yppoll \ usr.sbin/yppush \ usr.sbin/ypserv \ usr.sbin/ypset \ usr.sbin/zic/zdump \ usr.sbin/zic/zic \ usr.sbin/zonectl \ ${DEP_RELDIR}/cddl \ ${DEP_RELDIR}/gnu \ ${DEP_RELDIR}/include \ ${DEP_RELDIR}/kerberos5 \ ${DEP_RELDIR}/lib \ ${DEP_RELDIR}/libexec \ ${DEP_RELDIR}/misc \ ${DEP_RELDIR}/secure \ ${DEP_RELDIR}/share \ .if ${MK_EFI} != "no" DIRDEPS+= usr.sbin/efidp .endif .if ${MK_NAND} != "no" DIRDEPS+= \ sbin/nandfs \ sbin/newfs_nandfs \ usr.sbin/nandsim \ usr.sbin/nandtool \ .endif DIRDEPS.amd64= \ sbin/bsdlabel \ sbin/fdisk \ sbin/nvmecontrol \ usr.sbin/acpi/acpiconf \ usr.sbin/acpi/acpidb \ usr.sbin/acpi/acpidump \ usr.sbin/acpi/iasl \ usr.sbin/apm \ usr.sbin/bhyve \ usr.sbin/bhyvectl \ usr.sbin/bhyveload \ usr.sbin/boot0cfg \ usr.sbin/btxld \ usr.sbin/camdd \ usr.sbin/cpucontrol \ usr.sbin/hyperv/tools \ usr.sbin/hyperv/tools/kvp \ usr.sbin/hyperv/tools/vss \ usr.sbin/kgmon \ usr.sbin/lptcontrol \ usr.sbin/mptable \ usr.sbin/ndiscvt \ usr.sbin/spkrtest \ usr.sbin/sade \ usr.sbin/zzz DIRDEPS.arm= \ sbin/bsdlabel \ sbin/fdisk \ usr.sbin/ofwdump \ usr.sbin/kgmon DIRDEPS.i386= \ sbin/bsdlabel \ sbin/fdisk \ sbin/nvmecontrol \ sbin/sconfig \ usr.sbin/apm \ usr.sbin/apmd \ usr.sbin/btxld \ usr.sbin/cpucontrol \ usr.sbin/hyperv/tools \ usr.sbin/hyperv/tools/kvp \ usr.sbin/hyperv/tools/vss \ usr.sbin/kgmon \ usr.sbin/kgzip \ usr.sbin/lptcontrol \ usr.sbin/mptable \ usr.sbin/ndiscvt \ usr.sbin/pnpinfo \ usr.sbin/sade \ usr.sbin/spkrtest \ usr.sbin/zzz \ usr.sbin/acpi \ usr.sbin/boot0cfg DIRDEPS.arm64= \ usr.sbin/acpi \ usr.sbin/ofwdump DIRDEPS.mips= \ sbin/bsdlabel \ sbin/fdisk DIRDEPS.sparc64= \ sbin/bsdlabel \ sbin/sunlabel \ usr.sbin/eeprom \ usr.sbin/ofwdump \ usr.sbin/sade DIRDEPS.powerpc= \ usr.sbin/nvram \ usr.sbin/ofwdump .if ${MK_BLACKLIST_SUPPORT} != "no" DIRDEPS+= \ usr.sbin/blacklistctl \ usr.sbin/blacklistd .endif .if ${MK_CXGBETOOL} != "no" DIRDEPS+= usr.sbin/cxgbetool .endif .if ${MK_MLX5TOOL} != "no" DIRDEPS+= usr.sbin/mlx5tool .endif .if ${MK_GPL_DTC} != "yes" DIRDEPS+= usr.bin/dtc .endif DIRDEPS+= ${DIRDEPS.${MACHINE}:U} .include Index: head/tools/tools/locale/tools/cldr2def.pl =================================================================== --- head/tools/tools/locale/tools/cldr2def.pl (revision 339488) +++ head/tools/tools/locale/tools/cldr2def.pl (revision 339489) @@ -1,1032 +1,1038 @@ #!/usr/local/bin/perl -wC # $FreeBSD$ use strict; use File::Copy; use XML::Parser; use Tie::IxHash; use Text::Iconv; use Data::Dumper; use Getopt::Long; use Digest::SHA qw(sha1_hex); require "charmaps.pm"; if ($#ARGV < 2) { print "Usage: $0 --cldr= --unidata= --etc= --type= [--lc=]\n"; exit(1); } my $DEFENCODING = "UTF-8"; my @filter = (); my $CLDRDIR = undef; my $UNIDATADIR = undef; my $ETCDIR = undef; my $TYPE = undef; my $doonly = undef; my $result = GetOptions ( "cldr=s" => \$CLDRDIR, "unidata=s" => \$UNIDATADIR, "etc=s" => \$ETCDIR, "type=s" => \$TYPE, "lc=s" => \$doonly ); my %convertors = (); my %ucd = (); my %values = (); my %hashtable = (); my %languages = (); my %translations = (); my %encodings = (); my %alternativemonths = (); get_languages(); my %utf8map = (); my %utf8aliases = (); get_unidata($UNIDATADIR); get_utf8map("$CLDRDIR/posix/$DEFENCODING.cm"); get_encodings("$ETCDIR/charmaps"); my %keys = (); tie(%keys, "Tie::IxHash"); tie(%hashtable, "Tie::IxHash"); my %FILESNAMES = ( "monetdef" => "LC_MONETARY", "timedef" => "LC_TIME", "msgdef" => "LC_MESSAGES", "numericdef" => "LC_NUMERIC", "colldef" => "LC_COLLATE", "ctypedef" => "LC_CTYPE" ); my %callback = ( mdorder => \&callback_mdorder, altmon => \&callback_altmon, cformat => \&callback_cformat, dformat => \&callback_dformat, dtformat => \&callback_dtformat, cbabmon => \&callback_abmon, cbampm => \&callback_ampm, data => undef, ); my %DESC = ( # numericdef "decimal_point" => "decimal_point", "thousands_sep" => "thousands_sep", "grouping" => "grouping", # monetdef "int_curr_symbol" => "int_curr_symbol (last character always " . "SPACE)", "currency_symbol" => "currency_symbol", "mon_decimal_point" => "mon_decimal_point", "mon_thousands_sep" => "mon_thousands_sep", "mon_grouping" => "mon_grouping", "positive_sign" => "positive_sign", "negative_sign" => "negative_sign", "int_frac_digits" => "int_frac_digits", "frac_digits" => "frac_digits", "p_cs_precedes" => "p_cs_precedes", "p_sep_by_space" => "p_sep_by_space", "n_cs_precedes" => "n_cs_precedes", "n_sep_by_space" => "n_sep_by_space", "p_sign_posn" => "p_sign_posn", "n_sign_posn" => "n_sign_posn", # msgdef "yesexpr" => "yesexpr", "noexpr" => "noexpr", "yesstr" => "yesstr", "nostr" => "nostr", # timedef "abmon" => "Short month names", "mon" => "Long month names (as in a date)", "abday" => "Short weekday names", "day" => "Long weekday names", "t_fmt" => "X_fmt", "d_fmt" => "x_fmt", "c_fmt" => "c_fmt", "am_pm" => "AM/PM", "d_t_fmt" => "date_fmt", "altmon" => "Long month names (without case ending)", "md_order" => "md_order", "t_fmt_ampm" => "ampm_fmt", ); if ($TYPE eq "colldef") { transform_collation(); make_makefile(); } if ($TYPE eq "ctypedef") { transform_ctypes(); make_makefile(); } if ($TYPE eq "numericdef") { %keys = ( "decimal_point" => "s", "thousands_sep" => "s", "grouping" => "ai", ); get_fields(); print_fields(); make_makefile(); } if ($TYPE eq "monetdef") { %keys = ( "int_curr_symbol" => "s", "currency_symbol" => "s", "mon_decimal_point" => "s", "mon_thousands_sep" => "s", "mon_grouping" => "ai", "positive_sign" => "s", "negative_sign" => "s", "int_frac_digits" => "i", "frac_digits" => "i", "p_cs_precedes" => "i", "p_sep_by_space" => "i", "n_cs_precedes" => "i", "n_sep_by_space" => "i", "p_sign_posn" => "i", "n_sign_posn" => "i" ); get_fields(); print_fields(); make_makefile(); } if ($TYPE eq "msgdef") { %keys = ( "yesexpr" => "s", "noexpr" => "s", "yesstr" => "s", "nostr" => "s" ); get_fields(); print_fields(); make_makefile(); } if ($TYPE eq "timedef") { %keys = ( "abmon" => " "as", "abday" => "as", "day" => "as", "t_fmt" => "s", "d_fmt" => " " " " " " "s", ); get_fields(); print_fields(); make_makefile(); } sub callback_ampm { my $s = shift; my $nl = $callback{data}{l} . "_" . $callback{data}{c}; my $enc = $callback{data}{e}; if ($nl eq 'ru_RU') { if ($enc eq 'UTF-8') { $s = 'дп;пп'; } else { my $converter = Text::Iconv->new("utf-8", "$enc"); $s = $converter->convert("дп;пп"); } } return $s; } sub callback_cformat { my $s = shift; my $nl = $callback{data}{l} . "_" . $callback{data}{c}; if ($nl eq 'ko_KR') { $s =~ s/(> )(%p)/$1%A $2/; } $s =~ s/\.,/\./; $s =~ s/ %Z//; $s =~ s/ %z//; $s =~ s/^"%e\./%A %e/; $s =~ s/^"(%B %e, )/"%A, $1/; $s =~ s/^"(%e %B )/"%A $1/; return $s; }; sub callback_dformat { my $s = shift; $s =~ s/(%m(|[-.]))%e/$1%d/; $s =~ s/%e((|[-.])%m)/%d$1/; return $s; }; sub callback_dtformat { my $s = shift; my $nl = $callback{data}{l} . "_" . $callback{data}{c}; if ($nl eq 'ja_JP') { $s =~ s/(> )(%H)/$1%A $2/; } elsif ($nl eq 'ko_KR' || $nl eq 'zh_CN' || $nl eq 'zh_TW') { if ($nl ne 'ko_KR') { $s =~ s/%m/%_m/; } $s =~ s/(> )(%p)/$1%A $2/; } $s =~ s/\.,/\./; $s =~ s/^"%e\./%A %e/; $s =~ s/^"(%B %e, )/"%A, $1/; $s =~ s/^"(%e %B )/"%A $1/; return $s; }; sub callback_mdorder { my $s = shift; return undef if (!defined $s); $s =~ s/[^dem]//g; $s =~ s/e/d/g; return $s; }; sub callback_altmon { # if the language/country is known in %alternative months then # return that, otherwise repeat mon my $s = shift; if (defined $alternativemonths{$callback{data}{l}}{$callback{data}{c}}) { my @altnames = split(";",$alternativemonths{$callback{data}{l}}{$callback{data}{c}}); my @cleaned; foreach (@altnames) { $_ =~ s/^\s+//; $_ =~ s/\s+$//; push @cleaned, $_; } return join(";",@cleaned); } return $s; } sub callback_abmon { # for specified CJK locales, pad result with a space to enable # columns to line up (style established in FreeBSD in 2001) my $s = shift; my $nl = $callback{data}{l} . "_" . $callback{data}{c}; if ($nl eq 'ja_JP' || $nl eq 'ko_KR' || $nl eq 'zh_CN' || $nl eq 'zh_HK' || $nl eq 'zh_TW') { my @monthnames = split(";", $s); my @cleaned; foreach (@monthnames) { if ($_ =~ /^"<(two|three|four|five|six|seven|eight|nine)>/ || ($_ =~ /^"/ && $_ !~ /^"(||)/)) { $_ =~ s/^"/"/; } push @cleaned, $_; } return join(";",@cleaned); } return $s; } ############################ sub get_unidata { my $directory = shift; open(FIN, "$directory/UnicodeData.txt") or die("Cannot open $directory/UnicodeData.txt");; my @lines = ; chomp(@lines); close(FIN); foreach my $l (@lines) { my @a = split(/;/, $l); $ucd{code2name}{"$a[0]"} = $a[1]; # Unicode name $ucd{name2code}{"$a[1]"} = $a[0]; # Unicode code } } sub get_utf8map { my $file = shift; open(FIN, $file); my @lines = ; close(FIN); chomp(@lines); my $prev_k = undef; my $prev_v = ""; my $incharmap = 0; foreach my $l (@lines) { $l =~ s/\r//; next if ($l =~ /^\#/); next if ($l eq ""); if ($l eq "CHARMAP") { $incharmap = 1; next; } next if (!$incharmap); last if ($l eq "END CHARMAP"); $l =~ /^<([^\s]+)>\s+(.*)/; my $k = $1; my $v = $2; $k =~ s/_/ /g; # unicode char string $v =~ s/\\x//g; # UTF-8 char code $utf8map{$k} = $v; $utf8aliases{$k} = $prev_k if ($prev_v eq $v); $prev_v = $v; $prev_k = $k; } } sub get_encodings { my $dir = shift; foreach my $e (sort(keys(%encodings))) { if (!open(FIN, "$dir/$e.TXT")) { print "Cannot open charmap for $e\n"; next; } $encodings{$e} = 1; my @lines = ; close(FIN); chomp(@lines); foreach my $l (@lines) { $l =~ s/\r//; next if ($l =~ /^\#/); next if ($l eq ""); my @a = split(" ", $l); next if ($#a < 1); $a[0] =~ s/^0[xX]//; # local char code $a[1] =~ s/^0[xX]//; # unicode char code $convertors{$e}{uc($a[1])} = uc($a[0]); } } } sub get_languages { my %data = get_xmldata($ETCDIR); %languages = %{$data{L}}; %translations = %{$data{T}}; %alternativemonths = %{$data{AM}}; %encodings = %{$data{E}}; return if (!defined $doonly); my @a = split(/_/, $doonly); if ($#a == 1) { $filter[0] = $a[0]; $filter[1] = "x"; $filter[2] = $a[1]; } elsif ($#a == 2) { $filter[0] = $a[0]; $filter[1] = $a[1]; $filter[2] = $a[2]; } print Dumper(@filter); return; } sub transform_ctypes { foreach my $l (sort keys(%languages)) { foreach my $f (sort keys(%{$languages{$l}})) { foreach my $c (sort keys(%{$languages{$l}{$f}{data}})) { next if ($#filter == 2 && ($filter[0] ne $l || $filter[1] ne $f || $filter[2] ne $c)); next if (defined $languages{$l}{$f}{definitions} && $languages{$l}{$f}{definitions} !~ /$TYPE/); $languages{$l}{$f}{data}{$c}{$DEFENCODING} = 0; # unread my $file; $file = $l . "_"; $file .= $f . "_" if ($f ne "x"); $file .= $c; my $actfile = $file; my $filename = "$CLDRDIR/posix/xx_Comm_US.UTF-8.src"; if (! -f $filename) { print STDERR "Cannot open $filename\n"; next; } open(FIN, "$filename"); print "Reading from $filename for ${l}_${f}_${c}\n"; $languages{$l}{$f}{data}{$c}{$DEFENCODING} = 1; # read my @lines; my $shex; my $uhex; while () { push @lines, $_; } close(FIN); $shex = sha1_hex(join("\n", @lines)); $languages{$l}{$f}{data}{$c}{$DEFENCODING} = $shex; $hashtable{$shex}{"${l}_${f}_${c}.$DEFENCODING"} = 1; open(FOUT, ">$TYPE.draft/$actfile.$DEFENCODING.src"); print FOUT @lines; close(FOUT); foreach my $enc (sort keys(%{$languages{$l}{$f}{data}{$c}})) { next if ($enc eq $DEFENCODING); $filename = "$CLDRDIR/posix/$file.$DEFENCODING.src"; if (! -f $filename) { print STDERR "Cannot open $filename\n"; next; } @lines = (); open(FIN, "$filename"); while () { if ((/^comment_char\s/) || (/^escape_char\s/)){ push @lines, $_; } if (/^LC_CTYPE/../^END LC_CTYPE/) { push @lines, $_; } } close(FIN); $uhex = sha1_hex(join("\n", @lines) . $enc); $languages{$l}{$f}{data}{$c}{$enc} = $uhex; $hashtable{$uhex}{"${l}_${f}_${c}.$enc"} = 1; open(FOUT, ">$TYPE.draft/$actfile.$enc.src"); print FOUT <) { if ((/^comment_char\s/) || (/^escape_char\s/)){ push @lines, $_; } if (/^LC_COLLATE/../^END LC_COLLATE/) { $_ =~ s/[ ]+/ /g; push @lines, $_; } } close(FIN); $shex = sha1_hex(join("\n", @lines)); $languages{$l}{$f}{data}{$c}{$DEFENCODING} = $shex; $hashtable{$shex}{"${l}_${f}_${c}.$DEFENCODING"} = 1; open(FOUT, ">$TYPE.draft/$actfile.$DEFENCODING.src"); print FOUT <; chomp(@lines); close(FIN); my $continue = 0; foreach my $k (keys(%keys)) { foreach my $line (@lines) { $line =~ s/\r//; next if (!$continue && $line !~ /^$k\s/); if ($continue) { $line =~ s/^\s+//; } else { $line =~ s/^$k\s+//; } $values{$l}{$f}{$c}{$k} = "" if (!defined $values{$l}{$f}{$c}{$k}); $continue = ($line =~ /\/$/); $line =~ s/\/$// if ($continue); while ($line =~ /_/) { $line =~ s/\<([^>_]+)_([^>]+)\>/<$1 $2>/; } die "_ in data - $line" if ($line =~ /_/); $values{$l}{$f}{$c}{$k} .= $line; last if (!$continue); } } } } } } sub decodecldr { my $e = shift; my $s = shift; my $v = undef; if ($e eq "UTF-8") { # # Conversion to UTF-8 can be done from the Unicode name to # the UTF-8 character code. # $v = $utf8map{$s}; die "Cannot convert $s in $e (charmap)" if (!defined $v); } else { # # Conversion to these encodings can be done from the Unicode # name to Unicode code to the encodings code. # my $ucc = undef; $ucc = $ucd{name2code}{$s} if (defined $ucd{name2code}{$s}); $ucc = $ucd{name2code}{$utf8aliases{$s}} if (!defined $ucc && $utf8aliases{$s} && defined $ucd{name2code}{$utf8aliases{$s}}); if (!defined $ucc) { if (defined $translations{$e}{$s}{hex}) { $v = $translations{$e}{$s}{hex}; $ucc = 0; } elsif (defined $translations{$e}{$s}{ucc}) { $ucc = $translations{$e}{$s}{ucc}; } } die "Cannot convert $s in $e (ucd string)" if (!defined $ucc); $v = $convertors{$e}{$ucc} if (!defined $v); $v = $translations{$e}{$s}{hex} if (!defined $v && defined $translations{$e}{$s}{hex}); if (!defined $v && defined $translations{$e}{$s}{unicode}) { my $ucn = $translations{$e}{$s}{unicode}; $ucc = $ucd{name2code}{$ucn} if (defined $ucd{name2code}{$ucn}); $ucc = $ucd{name2code}{$utf8aliases{$ucn}} if (!defined $ucc && defined $ucd{name2code}{$utf8aliases{$ucn}}); $v = $convertors{$e}{$ucc}; } die "Cannot convert $s in $e (charmap)" if (!defined $v); } return pack("C", hex($v)) if (length($v) == 2); return pack("CC", hex(substr($v, 0, 2)), hex(substr($v, 2, 2))) if (length($v) == 4); return pack("CCC", hex(substr($v, 0, 2)), hex(substr($v, 2, 2)), hex(substr($v, 4, 2))) if (length($v) == 6); print STDERR "Cannot convert $e $s\n"; return "length = " . length($v); } sub translate { my $enc = shift; my $v = shift; return $translations{$enc}{$v} if (defined $translations{$enc}{$v}); return undef; } sub print_fields { foreach my $l (sort keys(%languages)) { foreach my $f (sort keys(%{$languages{$l}})) { foreach my $c (sort keys(%{$languages{$l}{$f}{data}})) { next if ($#filter == 2 && ($filter[0] ne $l || $filter[1] ne $f || $filter[2] ne $c)); next if (defined $languages{$l}{$f}{definitions} && $languages{$l}{$f}{definitions} !~ /$TYPE/); foreach my $enc (sort keys(%{$languages{$l}{$f}{data}{$c}})) { if ($languages{$l}{$f}{data}{$c}{$DEFENCODING} eq "0") { print "Skipping ${l}_" . ($f eq "x" ? "" : "${f}_") . "${c} - not read\n"; next; } my $file = $l; $file .= "_" . $f if ($f ne "x"); $file .= "_" . $c; print "Writing to $file in $enc\n"; if ($enc ne $DEFENCODING && !defined $convertors{$enc}) { print "Failed! Cannot convert to $enc.\n"; next; }; open(FOUT, ">$TYPE.draft/$file.$enc.new"); my $okay = 1; my $output = ""; print FOUT </) { $k = substr($g, 1); $g = $keys{$k}; } # Callback function if ($g =~ /^\(.*)/) { my $p1 = $1; $cm = $2; my $p3 = $3; my $rv = decodecldr($enc, $cm); # $rv = translate($enc, $cm) # if (!defined $rv); if (!defined $rv) { print STDERR "Could not convert $k ($cm) from $DEFENCODING to $enc\n"; $okay = 0; next; } $v = $p1 . $rv . $p3; } $output .= "$v\n"; next; } if ($g eq "as") { foreach my $v (split(/;/, $v)) { $v =~ s/^"//; $v =~ s/"$//; my $cm = ""; while ($v =~ /^(.*?)<(.*?)>(.*)/) { my $p1 = $1; $cm = $2; my $p3 = $3; my $rv = decodecldr($enc, $cm); # $rv = translate($enc, # $cm) # if (!defined $rv); if (!defined $rv) { print STDERR "Could not convert $k ($cm) from $DEFENCODING to $enc\n"; $okay = 0; next; } $v = $1 . $rv . $3; } $output .= "$v\n"; } next; } die("$k is '$g'"); } $languages{$l}{$f}{data}{$c}{$enc} = sha1_hex($output); $hashtable{sha1_hex($output)}{"${l}_${f}_${c}.$enc"} = 1; print FOUT "$output# EOF\n"; close(FOUT); if ($okay) { rename("$TYPE.draft/$file.$enc.new", "$TYPE.draft/$file.$enc.src"); } else { rename("$TYPE.draft/$file.$enc.new", "$TYPE.draft/$file.$enc.failed"); } } } } } } sub make_makefile { return if ($#filter > -1); print "Creating Makefile for $TYPE\n"; my $SRCOUT; my $SRCOUT2; my $SRCOUT3 = ""; my $SRCOUT4 = ""; my $MAPLOC; if ($TYPE eq "colldef") { - $SRCOUT = "localedef -D -U -i \${.IMPSRC} \\\n" . + $SRCOUT = "localedef \${LOCALEDEF_ENDIAN} -D -U " . + "-i \${.IMPSRC} \\\n" . "\t-f \${MAPLOC}/map.\${.TARGET:T:R:E:C/@.*//} " . "\${.OBJDIR}/\${.IMPSRC:T:R}"; $MAPLOC = "MAPLOC=\t\t\${.CURDIR}/../../tools/tools/" . "locale/etc/final-maps\n"; $SRCOUT2 = "LC_COLLATE"; $SRCOUT3 = "" . ".for f t in \${LOCALES_MAPPED}\n" . "FILES+=\t\$t.LC_COLLATE\n" . "FILESDIR_\$t.LC_COLLATE=\t\${LOCALEDIR}/\$t\n" . "\$t.LC_COLLATE: \${.CURDIR}/\$f.src\n" . - "\tlocaledef -D -U -i \${.ALLSRC} \\\n" . + "\tlocaledef \${LOCALEDEF_ENDIAN} -D -U " . + "-i \${.ALLSRC} \\\n" . "\t\t-f \${MAPLOC}/map.\${.TARGET:T:R:E:C/@.*//} \\\n" . "\t\t\${.OBJDIR}/\${.TARGET:T:R}\n" . ".endfor\n\n"; $SRCOUT4 = "## LOCALES_MAPPED\n"; } elsif ($TYPE eq "ctypedef") { - $SRCOUT = "localedef -D -U -c -w \${MAPLOC}/widths.txt \\\n" . + $SRCOUT = "localedef \${LOCALEDEF_ENDIAN} -D -U -c " . + "-w \${MAPLOC}/widths.txt \\\n" . "\t-f \${MAPLOC}/map.\${.IMPSRC:T:R:E} " . "\\\n\t-i \${.IMPSRC} \${.OBJDIR}/\${.IMPSRC:T:R} " . " || true"; $SRCOUT2 = "LC_CTYPE"; $MAPLOC = "MAPLOC=\t\t\${.CURDIR}/../../tools/tools/" . "locale/etc/final-maps\n"; $SRCOUT3 = "## SYMPAIRS\n\n" . ".for s t in \${SYMPAIRS}\n" . "\${t:S/src\$/LC_CTYPE/}: " . "\$s\n" . - "\tlocaledef -D -U -c -w \${MAPLOC}/widths.txt \\\n" . + "\tlocaledef \${LOCALEDEF_ENDIAN} -D -U -c " . + "-w \${MAPLOC}/widths.txt \\\n" . "\t-f \${MAPLOC}/map.\${.TARGET:T:R:C/^.*\\.//} " . "\\\n\t-i \${.ALLSRC} \${.OBJDIR}/\${.TARGET:T:R} " . " || true\n" . ".endfor\n\n"; } else { $SRCOUT = "grep -v -E '^(\#\$\$|\#[ ])' < \${.IMPSRC} > \${.TARGET}"; $SRCOUT2 = "out"; $MAPLOC = ""; } open(FOUT, ">$TYPE.draft/Makefile"); print FOUT < + .src.${SRCOUT2}: $SRCOUT ## PLACEHOLDER ${SRCOUT4} EOF foreach my $hash (keys(%hashtable)) { # For colldef, weight LOCALES to UTF-8 # Sort as upper-case and reverse to achieve it # Make en_US, ru_RU, and ca_AD preferred my @files; if ($TYPE eq "colldef") { @files = sort { if ($a eq 'en_x_US.UTF-8' || $a eq 'ru_x_RU.UTF-8' || $a eq 'ca_x_AD.UTF-8') { return -1; } elsif ($b eq 'en_x_US.UTF-8' || $b eq 'ru_x_RU.UTF-8' || $b eq 'ca_x_AD.UTF-8') { return 1; } else { return uc($b) cmp uc($a); } } keys(%{$hashtable{$hash}}); } elsif ($TYPE eq "ctypedef") { @files = sort { if ($a eq 'en_x_US.UTF-8') { return -1; } elsif ($b eq 'en_x_US.UTF-8') { return 1; } if ($a =~ /^en_x_US/) { return -1; } elsif ($b =~ /^en_x_US/) { return 1; } if ($a =~ /^en_x_GB.ISO8859-15/ || $a =~ /^ru_x_RU/) { return -1; } elsif ($b =~ /^en_x_GB.ISO8859-15/ || $b =~ /ru_x_RU/) { return 1; } else { return uc($b) cmp uc($a); } } keys(%{$hashtable{$hash}}); } else { @files = sort { if ($a =~ /_Comm_/ || $b eq 'en_x_US.UTF-8') { return 1; } elsif ($b =~ /_Comm_/ || $a eq 'en_x_US.UTF-8') { return -1; } else { return uc($b) cmp uc($a); } } keys(%{$hashtable{$hash}}); } if ($#files > 0) { my $link = shift(@files); $link =~ s/_x_/_/; # strip family if none there foreach my $file (@files) { my @a = split(/_/, $file); my @b = split(/\./, $a[-1]); $file =~ s/_x_/_/; print FOUT "SAME+=\t\t$link $file\n"; undef($languages{$a[0]}{$a[1]}{data}{$b[0]}{$b[1]}); } } } foreach my $l (sort keys(%languages)) { foreach my $f (sort keys(%{$languages{$l}})) { foreach my $c (sort keys(%{$languages{$l}{$f}{data}})) { next if ($#filter == 2 && ($filter[0] ne $l || $filter[1] ne $f || $filter[2] ne $c)); next if (defined $languages{$l}{$f}{definitions} && $languages{$l}{$f}{definitions} !~ /$TYPE/); if (defined $languages{$l}{$f}{data}{$c}{$DEFENCODING} && $languages{$l}{$f}{data}{$c}{$DEFENCODING} eq "0") { print "Skipping ${l}_" . ($f eq "x" ? "" : "${f}_") . "${c} - not read\n"; next; } foreach my $e (sort keys(%{$languages{$l}{$f}{data}{$c}})) { my $file = $l . "_"; $file .= $f . "_" if ($f ne "x"); $file .= $c; next if (!defined $languages{$l}{$f}{data}{$c}{$e}); print FOUT "LOCALES+=\t$file.$e\n"; } if (defined $languages{$l}{$f}{nc_link}) { foreach my $e (sort keys(%{$languages{$l}{$f}{data}{$c}})) { my $file = $l . "_"; $file .= $f . "_" if ($f ne "x"); $file .= $c; print FOUT "SAME+=\t\t$file.$e $languages{$l}{$f}{nc_link}.$e\t# legacy (lang/country change)\n"; } } if (defined $languages{$l}{$f}{e_link}) { foreach my $el (split(" ", $languages{$l}{$f}{e_link})) { my @a = split(/:/, $el); my $file = $l . "_"; $file .= $f . "_" if ($f ne "x"); $file .= $c; print FOUT "SAME+=\t\t$file.$a[0] $file.$a[1]\t# legacy (same charset)\n"; } } } } } print FOUT < EOF close(FOUT); } Index: head/usr.bin/localedef/collate.c =================================================================== --- head/usr.bin/localedef/collate.c (revision 339488) +++ head/usr.bin/localedef/collate.c (revision 339489) @@ -1,1313 +1,1323 @@ /*- - * Copyright 2010 Nexenta Systems, Inc. All rights reserved. + * Copyright 2018 Nexenta Systems, Inc. * Copyright 2015 John Marino * * This source code is derived from the illumos localedef command, and * provided under BSD-style license terms by Nexenta Systems, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ /* * LC_COLLATE database generation routines for localedef. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include "localedef.h" #include "parser.h" #include "collate.h" /* * Design notes. * * It will be extremely helpful to the reader if they have access to * the localedef and locale file format specifications available. * Latest versions of these are available from www.opengroup.org. * * The design for the collation code is a bit complex. The goal is a * single collation database as described in collate.h (in * libc/port/locale). However, there are some other tidbits: * * a) The substitution entries are now a directly indexable array. A * priority elsewhere in the table is taken as an index into the * substitution table if it has a high bit (COLLATE_SUBST_PRIORITY) * set. (The bit is cleared and the result is the index into the * table. * * b) We eliminate duplicate entries into the substitution table. * This saves a lot of space. * * c) The priorities for each level are "compressed", so that each * sorting level has consecutively numbered priorities starting at 1. * (O is reserved for the ignore priority.) This means sort levels * which only have a few distinct priorities can represent the * priority level in fewer bits, which makes the strxfrm output * smaller. * * d) We record the total number of priorities so that strxfrm can * figure out how many bytes to expand a numeric priority into. * * e) For the UNDEFINED pass (the last pass), we record the maximum * number of bits needed to uniquely prioritize these entries, so that * the last pass can also use smaller strxfrm output when possible. * * f) Priorities with the sign bit set are verboten. This works out * because no active character set needs that bit to carry significant * information once the character is in wide form. * * To process the entire data to make the database, we actually run * multiple passes over the data. * * The first pass, which is done at parse time, identifies elements, * substitutions, and such, and records them in priority order. As * some priorities can refer to other priorities, using forward * references, we use a table of references indicating whether the * priority's value has been resolved, or whether it is still a * reference. * * The second pass walks over all the items in priority order, noting * that they are used directly, and not just an indirect reference. * This is done by creating a "weight" structure for the item. The * weights are stashed in an RB tree sorted by relative "priority". * * The third pass walks over all the weight structures, in priority * order, and assigns a new monotonically increasing (per sort level) * weight value to them. These are the values that will actually be * written to the file. * * The fourth pass just writes the data out. */ /* * In order to resolve the priorities, we create a table of priorities. * Entries in the table can be in one of three states. * * UNKNOWN is for newly allocated entries, and indicates that nothing * is known about the priority. (For example, when new entries are created * for collating-symbols, this is the value assigned for them until the * collating symbol's order has been determined. * * RESOLVED is used for an entry where the priority indicates the final * numeric weight. * * REFER is used for entries that reference other entries. Typically * this is used for forward references. A collating-symbol can never * have this value. * * The "pass" field is used during final resolution to aid in detection * of referencing loops. (For example depends on , but has its * priority dependent on .) */ typedef enum { UNKNOWN, /* priority is totally unknown */ RESOLVED, /* priority value fully resolved */ REFER /* priority is a reference (index) */ } res_t; typedef struct weight { int32_t pri; int opt; RB_ENTRY(weight) entry; } weight_t; typedef struct priority { res_t res; int32_t pri; int pass; int lineno; } collpri_t; #define NUM_WT collinfo.directive_count /* * These are the abstract collating symbols, which are just a symbolic * way to reference a priority. */ struct collsym { char *name; int32_t ref; RB_ENTRY(collsym) entry; }; /* * These are also abstract collating symbols, but we allow them to have * different priorities at different levels. */ typedef struct collundef { char *name; int32_t ref[COLL_WEIGHTS_MAX]; RB_ENTRY(collundef) entry; } collundef_t; /* * These are called "chains" in libc. This records the fact that two * more characters should be treated as a single collating entity when * they appear together. For example, in Spanish gets collated * as a character between and . */ struct collelem { char *symbol; wchar_t *expand; int32_t ref[COLL_WEIGHTS_MAX]; RB_ENTRY(collelem) rb_bysymbol; RB_ENTRY(collelem) rb_byexpand; }; /* * Individual characters have a sequence of weights as well. */ typedef struct collchar { wchar_t wc; int32_t ref[COLL_WEIGHTS_MAX]; RB_ENTRY(collchar) entry; } collchar_t; /* * Substitution entries. The key is itself a priority. Note that * when we create one of these, we *automatically* wind up with a * fully resolved priority for the key, because creation of * substitutions creates a resolved priority at the same time. */ typedef struct subst{ int32_t key; int32_t ref[COLLATE_STR_LEN]; RB_ENTRY(subst) entry; RB_ENTRY(subst) entry_ref; } subst_t; static RB_HEAD(collsyms, collsym) collsyms; static RB_HEAD(collundefs, collundef) collundefs; static RB_HEAD(elem_by_symbol, collelem) elem_by_symbol; static RB_HEAD(elem_by_expand, collelem) elem_by_expand; static RB_HEAD(collchars, collchar) collchars; static RB_HEAD(substs, subst) substs[COLL_WEIGHTS_MAX]; static RB_HEAD(substs_ref, subst) substs_ref[COLL_WEIGHTS_MAX]; static RB_HEAD(weights, weight) weights[COLL_WEIGHTS_MAX]; static int32_t nweight[COLL_WEIGHTS_MAX]; /* * This is state tracking for the ellipsis token. Note that we start * the initial values so that the ellipsis logic will think we got a * magic starting value of NUL. It starts at minus one because the * starting point is exclusive -- i.e. the starting point is not * itself handled by the ellipsis code. */ static int currorder = EOF; static int lastorder = EOF; static collelem_t *currelem; static collchar_t *currchar; static collundef_t *currundef; static wchar_t ellipsis_start = 0; static int32_t ellipsis_weights[COLL_WEIGHTS_MAX]; /* * We keep a running tally of weights. */ static int nextpri = 1; static int nextsubst[COLL_WEIGHTS_MAX] = { 0 }; /* * This array collects up the weights for each level. */ static int32_t order_weights[COLL_WEIGHTS_MAX]; static int curr_weight = 0; static int32_t subst_weights[COLLATE_STR_LEN]; static int curr_subst = 0; /* * Some initial priority values. */ static int32_t pri_undefined[COLL_WEIGHTS_MAX]; static int32_t pri_ignore; static collate_info_t collinfo; +static int32_t subst_count[COLL_WEIGHTS_MAX]; +static int32_t chain_count; +static int32_t large_count; static collpri_t *prilist = NULL; static int numpri = 0; static int maxpri = 0; static void start_order(int); static int32_t new_pri(void) { int i; if (numpri >= maxpri) { maxpri = maxpri ? maxpri * 2 : 1024; prilist = realloc(prilist, sizeof (collpri_t) * maxpri); if (prilist == NULL) { fprintf(stderr,"out of memory"); return (-1); } for (i = numpri; i < maxpri; i++) { prilist[i].res = UNKNOWN; prilist[i].pri = 0; prilist[i].pass = 0; } } return (numpri++); } static collpri_t * get_pri(int32_t ref) { if ((ref < 0) || (ref > numpri)) { INTERR; return (NULL); } return (&prilist[ref]); } static void set_pri(int32_t ref, int32_t v, res_t res) { collpri_t *pri; pri = get_pri(ref); if ((res == REFER) && ((v < 0) || (v >= numpri))) { INTERR; } /* Resolve self references */ if ((res == REFER) && (ref == v)) { v = nextpri; res = RESOLVED; } if (pri->res != UNKNOWN) { warn("repeated item in order list (first on %d)", pri->lineno); return; } pri->lineno = lineno; pri->pri = v; pri->res = res; } static int32_t resolve_pri(int32_t ref) { collpri_t *pri; static int32_t pass = 0; pri = get_pri(ref); pass++; while (pri->res == REFER) { if (pri->pass == pass) { /* report a line with the circular symbol */ lineno = pri->lineno; fprintf(stderr,"circular reference in order list"); return (-1); } if ((pri->pri < 0) || (pri->pri >= numpri)) { INTERR; return (-1); } pri->pass = pass; pri = &prilist[pri->pri]; } if (pri->res == UNKNOWN) { return (-1); } if (pri->res != RESOLVED) INTERR; return (pri->pri); } static int weight_compare(const void *n1, const void *n2) { int32_t k1 = ((const weight_t *)n1)->pri; int32_t k2 = ((const weight_t *)n2)->pri; return (k1 < k2 ? -1 : k1 > k2 ? 1 : 0); } RB_GENERATE_STATIC(weights, weight, entry, weight_compare); static int collsym_compare(const void *n1, const void *n2) { const collsym_t *c1 = n1; const collsym_t *c2 = n2; int rv; rv = strcmp(c1->name, c2->name); return ((rv < 0) ? -1 : (rv > 0) ? 1 : 0); } RB_GENERATE_STATIC(collsyms, collsym, entry, collsym_compare); static int collundef_compare(const void *n1, const void *n2) { const collundef_t *c1 = n1; const collundef_t *c2 = n2; int rv; rv = strcmp(c1->name, c2->name); return ((rv < 0) ? -1 : (rv > 0) ? 1 : 0); } RB_GENERATE_STATIC(collundefs, collundef, entry, collundef_compare); static int element_compare_symbol(const void *n1, const void *n2) { const collelem_t *c1 = n1; const collelem_t *c2 = n2; int rv; rv = strcmp(c1->symbol, c2->symbol); return ((rv < 0) ? -1 : (rv > 0) ? 1 : 0); } RB_GENERATE_STATIC(elem_by_symbol, collelem, rb_bysymbol, element_compare_symbol); static int element_compare_expand(const void *n1, const void *n2) { const collelem_t *c1 = n1; const collelem_t *c2 = n2; int rv; rv = wcscmp(c1->expand, c2->expand); return ((rv < 0) ? -1 : (rv > 0) ? 1 : 0); } RB_GENERATE_STATIC(elem_by_expand, collelem, rb_byexpand, element_compare_expand); static int collchar_compare(const void *n1, const void *n2) { wchar_t k1 = ((const collchar_t *)n1)->wc; wchar_t k2 = ((const collchar_t *)n2)->wc; return (k1 < k2 ? -1 : k1 > k2 ? 1 : 0); } RB_GENERATE_STATIC(collchars, collchar, entry, collchar_compare); static int subst_compare(const void *n1, const void *n2) { int32_t k1 = ((const subst_t *)n1)->key; int32_t k2 = ((const subst_t *)n2)->key; return (k1 < k2 ? -1 : k1 > k2 ? 1 : 0); } RB_GENERATE_STATIC(substs, subst, entry, subst_compare); static int subst_compare_ref(const void *n1, const void *n2) { const wchar_t *c1 = ((const subst_t *)n1)->ref; const wchar_t *c2 = ((const subst_t *)n2)->ref; int rv; rv = wcscmp(c1, c2); return ((rv < 0) ? -1 : (rv > 0) ? 1 : 0); } RB_GENERATE_STATIC(substs_ref, subst, entry_ref, subst_compare_ref); void init_collate(void) { int i; RB_INIT(&collsyms); RB_INIT(&collundefs); RB_INIT(&elem_by_symbol); RB_INIT(&elem_by_expand); RB_INIT(&collchars); for (i = 0; i < COLL_WEIGHTS_MAX; i++) { RB_INIT(&substs[i]); RB_INIT(&substs_ref[i]); RB_INIT(&weights[i]); nweight[i] = 1; } (void) memset(&collinfo, 0, sizeof (collinfo)); /* allocate some initial priorities */ pri_ignore = new_pri(); set_pri(pri_ignore, 0, RESOLVED); for (i = 0; i < COLL_WEIGHTS_MAX; i++) { pri_undefined[i] = new_pri(); /* we will override this later */ set_pri(pri_undefined[i], COLLATE_MAX_PRIORITY, UNKNOWN); } } void define_collsym(char *name) { collsym_t *sym; if ((sym = calloc(1, sizeof(*sym))) == NULL) { fprintf(stderr,"out of memory"); return; } sym->name = name; sym->ref = new_pri(); if (RB_FIND(collsyms, &collsyms, sym) != NULL) { /* * This should never happen because we are only called * for undefined symbols. */ free(sym); INTERR; return; } RB_INSERT(collsyms, &collsyms, sym); } collsym_t * lookup_collsym(char *name) { collsym_t srch; srch.name = name; return (RB_FIND(collsyms, &collsyms, &srch)); } collelem_t * lookup_collelem(char *symbol) { collelem_t srch; srch.symbol = symbol; return (RB_FIND(elem_by_symbol, &elem_by_symbol, &srch)); } static collundef_t * get_collundef(char *name) { collundef_t srch; collundef_t *ud; int i; srch.name = name; if ((ud = RB_FIND(collundefs, &collundefs, &srch)) == NULL) { if (((ud = calloc(1, sizeof(*ud))) == NULL) || ((ud->name = strdup(name)) == NULL)) { fprintf(stderr,"out of memory"); free(ud); return (NULL); } for (i = 0; i < NUM_WT; i++) { ud->ref[i] = new_pri(); } RB_INSERT(collundefs, &collundefs, ud); } add_charmap_undefined(name); return (ud); } static collchar_t * get_collchar(wchar_t wc, int create) { collchar_t srch; collchar_t *cc; int i; srch.wc = wc; cc = RB_FIND(collchars, &collchars, &srch); if ((cc == NULL) && create) { if ((cc = calloc(1, sizeof(*cc))) == NULL) { fprintf(stderr, "out of memory"); return (NULL); } for (i = 0; i < NUM_WT; i++) { cc->ref[i] = new_pri(); } cc->wc = wc; RB_INSERT(collchars, &collchars, cc); } return (cc); } void end_order_collsym(collsym_t *sym) { start_order(T_COLLSYM); /* update the weight */ set_pri(sym->ref, nextpri, RESOLVED); nextpri++; } void end_order(void) { int i; int32_t pri; int32_t ref; collpri_t *p; /* advance the priority/weight */ pri = nextpri; switch (currorder) { case T_CHAR: for (i = 0; i < NUM_WT; i++) { if (((ref = order_weights[i]) < 0) || ((p = get_pri(ref)) == NULL) || (p->pri == -1)) { /* unspecified weight is a self reference */ set_pri(currchar->ref[i], pri, RESOLVED); } else { set_pri(currchar->ref[i], ref, REFER); } order_weights[i] = -1; } /* leave a cookie trail in case next symbol is ellipsis */ ellipsis_start = currchar->wc + 1; currchar = NULL; break; case T_ELLIPSIS: /* save off the weights were we can find them */ for (i = 0; i < NUM_WT; i++) { ellipsis_weights[i] = order_weights[i]; order_weights[i] = -1; } break; case T_COLLELEM: if (currelem == NULL) { INTERR; } else { for (i = 0; i < NUM_WT; i++) { if (((ref = order_weights[i]) < 0) || ((p = get_pri(ref)) == NULL) || (p->pri == -1)) { set_pri(currelem->ref[i], pri, RESOLVED); } else { set_pri(currelem->ref[i], ref, REFER); } order_weights[i] = -1; } } break; case T_UNDEFINED: for (i = 0; i < NUM_WT; i++) { if (((ref = order_weights[i]) < 0) || ((p = get_pri(ref)) == NULL) || (p->pri == -1)) { set_pri(pri_undefined[i], -1, RESOLVED); } else { set_pri(pri_undefined[i], ref, REFER); } order_weights[i] = -1; } break; case T_SYMBOL: for (i = 0; i < NUM_WT; i++) { if (((ref = order_weights[i]) < 0) || ((p = get_pri(ref)) == NULL) || (p->pri == -1)) { set_pri(currundef->ref[i], pri, RESOLVED); } else { set_pri(currundef->ref[i], ref, REFER); } order_weights[i] = -1; } break; default: INTERR; } nextpri++; } static void start_order(int type) { int i; lastorder = currorder; currorder = type; /* this is used to protect ELLIPSIS processing */ if ((lastorder == T_ELLIPSIS) && (type != T_CHAR)) { fprintf(stderr, "character value expected"); } for (i = 0; i < COLL_WEIGHTS_MAX; i++) { order_weights[i] = -1; } curr_weight = 0; } void start_order_undefined(void) { start_order(T_UNDEFINED); } void start_order_symbol(char *name) { currundef = get_collundef(name); start_order(T_SYMBOL); } void start_order_char(wchar_t wc) { collchar_t *cc; int32_t ref; start_order(T_CHAR); /* * If we last saw an ellipsis, then we need to close the range. * Handle that here. Note that we have to be careful because the * items *inside* the range are treated exclusiveley to the items * outside of the range. The ends of the range can have quite * different weights than the range members. */ if (lastorder == T_ELLIPSIS) { int i; if (wc < ellipsis_start) { fprintf(stderr, "malformed range!"); return; } while (ellipsis_start < wc) { /* * pick all of the saved weights for the * ellipsis. note that -1 encodes for the * ellipsis itself, which means to take the * current relative priority. */ if ((cc = get_collchar(ellipsis_start, 1)) == NULL) { INTERR; return; } for (i = 0; i < NUM_WT; i++) { collpri_t *p; if (((ref = ellipsis_weights[i]) == -1) || ((p = get_pri(ref)) == NULL) || (p->pri == -1)) { set_pri(cc->ref[i], nextpri, RESOLVED); } else { set_pri(cc->ref[i], ref, REFER); } ellipsis_weights[i] = 0; } ellipsis_start++; nextpri++; } } currchar = get_collchar(wc, 1); } void start_order_collelem(collelem_t *e) { start_order(T_COLLELEM); currelem = e; } void start_order_ellipsis(void) { int i; start_order(T_ELLIPSIS); if (lastorder != T_CHAR) { fprintf(stderr, "illegal starting point for range"); return; } for (i = 0; i < NUM_WT; i++) { ellipsis_weights[i] = order_weights[i]; } } void define_collelem(char *name, wchar_t *wcs) { collelem_t *e; int i; if (wcslen(wcs) >= COLLATE_STR_LEN) { fprintf(stderr,"expanded collation element too long"); return; } if ((e = calloc(1, sizeof(*e))) == NULL) { fprintf(stderr, "out of memory"); return; } e->expand = wcs; e->symbol = name; /* * This is executed before the order statement, so we don't * know how many priorities we *really* need. We allocate one * for each possible weight. Not a big deal, as collating-elements * prove to be quite rare. */ for (i = 0; i < COLL_WEIGHTS_MAX; i++) { e->ref[i] = new_pri(); } /* A character sequence can only reduce to one element. */ if ((RB_FIND(elem_by_symbol, &elem_by_symbol, e) != NULL) || (RB_FIND(elem_by_expand, &elem_by_expand, e) != NULL)) { fprintf(stderr, "duplicate collating element definition"); free(e); return; } RB_INSERT(elem_by_symbol, &elem_by_symbol, e); RB_INSERT(elem_by_expand, &elem_by_expand, e); } void add_order_bit(int kw) { uint8_t bit = DIRECTIVE_UNDEF; switch (kw) { case T_FORWARD: bit = DIRECTIVE_FORWARD; break; case T_BACKWARD: bit = DIRECTIVE_BACKWARD; break; case T_POSITION: bit = DIRECTIVE_POSITION; break; default: INTERR; break; } collinfo.directive[collinfo.directive_count] |= bit; } void add_order_directive(void) { if (collinfo.directive_count >= COLL_WEIGHTS_MAX) { fprintf(stderr,"too many directives (max %d)", COLL_WEIGHTS_MAX); } collinfo.directive_count++; } static void add_order_pri(int32_t ref) { if (curr_weight >= NUM_WT) { fprintf(stderr,"too many weights (max %d)", NUM_WT); return; } order_weights[curr_weight] = ref; curr_weight++; } void add_order_collsym(collsym_t *s) { add_order_pri(s->ref); } void add_order_char(wchar_t wc) { collchar_t *cc; if ((cc = get_collchar(wc, 1)) == NULL) { INTERR; return; } add_order_pri(cc->ref[curr_weight]); } void add_order_collelem(collelem_t *e) { add_order_pri(e->ref[curr_weight]); } void add_order_ignore(void) { add_order_pri(pri_ignore); } void add_order_symbol(char *sym) { collundef_t *c; if ((c = get_collundef(sym)) == NULL) { INTERR; return; } add_order_pri(c->ref[curr_weight]); } void add_order_ellipsis(void) { /* special NULL value indicates self reference */ add_order_pri(0); } void add_order_subst(void) { subst_t srch; subst_t *s; int i; (void) memset(&srch, 0, sizeof (srch)); for (i = 0; i < curr_subst; i++) { srch.ref[i] = subst_weights[i]; subst_weights[i] = 0; } s = RB_FIND(substs_ref, &substs_ref[curr_weight], &srch); if (s == NULL) { if ((s = calloc(1, sizeof(*s))) == NULL) { fprintf(stderr,"out of memory"); return; } s->key = new_pri(); /* * We use a self reference for our key, but we set a * high bit to indicate that this is a substitution * reference. This will expedite table lookups later, * and prevent table lookups for situations that don't * require it. (In short, its a big win, because we * can skip a lot of binary searching.) */ set_pri(s->key, (nextsubst[curr_weight] | COLLATE_SUBST_PRIORITY), RESOLVED); nextsubst[curr_weight] += 1; for (i = 0; i < curr_subst; i++) { s->ref[i] = srch.ref[i]; } RB_INSERT(substs_ref, &substs_ref[curr_weight], s); if (RB_FIND(substs, &substs[curr_weight], s) != NULL) { INTERR; return; } RB_INSERT(substs, &substs[curr_weight], s); } curr_subst = 0; /* * We are using the current (unique) priority as a search key * in the substitution table. */ add_order_pri(s->key); } static void add_subst_pri(int32_t ref) { if (curr_subst >= COLLATE_STR_LEN) { fprintf(stderr,"substitution string is too long"); return; } subst_weights[curr_subst] = ref; curr_subst++; } void add_subst_char(wchar_t wc) { collchar_t *cc; if (((cc = get_collchar(wc, 1)) == NULL) || (cc->wc != wc)) { INTERR; return; } /* we take the weight for the character at that position */ add_subst_pri(cc->ref[curr_weight]); } void add_subst_collelem(collelem_t *e) { add_subst_pri(e->ref[curr_weight]); } void add_subst_collsym(collsym_t *s) { add_subst_pri(s->ref); } void add_subst_symbol(char *ptr) { collundef_t *cu; if ((cu = get_collundef(ptr)) != NULL) { add_subst_pri(cu->ref[curr_weight]); } } void add_weight(int32_t ref, int pass) { weight_t srch; weight_t *w; srch.pri = resolve_pri(ref); /* No translation of ignores */ if (srch.pri == 0) return; /* Substitution priorities are not weights */ if (srch.pri & COLLATE_SUBST_PRIORITY) return; if (RB_FIND(weights, &weights[pass], &srch) != NULL) return; if ((w = calloc(1, sizeof(*w))) == NULL) { fprintf(stderr, "out of memory"); return; } w->pri = srch.pri; RB_INSERT(weights, &weights[pass], w); } void add_weights(int32_t *refs) { int i; for (i = 0; i < NUM_WT; i++) { add_weight(refs[i], i); } } int32_t get_weight(int32_t ref, int pass) { weight_t srch; weight_t *w; int32_t pri; pri = resolve_pri(ref); if (pri & COLLATE_SUBST_PRIORITY) { return (pri); } if (pri <= 0) { return (pri); } srch.pri = pri; if ((w = RB_FIND(weights, &weights[pass], &srch)) == NULL) { INTERR; return (-1); } return (w->opt); } wchar_t * wsncpy(wchar_t *s1, const wchar_t *s2, size_t n) { wchar_t *os1 = s1; n++; - while (--n > 0 && (*s1++ = *s2++) != 0) + while (--n > 0 && (*s1++ = htote(*s2++)) != 0) continue; if (n > 0) while (--n > 0) *s1++ = 0; return (os1); } #define RB_COUNT(x, name, head, cnt) do { \ (cnt) = 0; \ RB_FOREACH(x, name, (head)) { \ (cnt)++; \ } \ } while (0) #define RB_NUMNODES(type, name, head, cnt) do { \ type *t; \ cnt = 0; \ RB_FOREACH(t, name, head) { \ cnt++; \ } \ } while (0) void dump_collate(void) { FILE *f; int i, j, n; size_t sz; int32_t pri; collelem_t *ce; collchar_t *cc; subst_t *sb; char vers[COLLATE_STR_LEN]; collate_char_t chars[UCHAR_MAX + 1]; collate_large_t *large; collate_subst_t *subst[COLL_WEIGHTS_MAX]; collate_chain_t *chain; /* * We have to run through a preliminary pass to identify all the * weights that we use for each sorting level. */ for (i = 0; i < NUM_WT; i++) { add_weight(pri_ignore, i); } for (i = 0; i < NUM_WT; i++) { RB_FOREACH(sb, substs, &substs[i]) { for (j = 0; sb->ref[j]; j++) { add_weight(sb->ref[j], i); } } } RB_FOREACH(ce, elem_by_expand, &elem_by_expand) { add_weights(ce->ref); } RB_FOREACH(cc, collchars, &collchars) { add_weights(cc->ref); } /* * Now we walk the entire set of weights, removing the gaps * in the weights. This gives us optimum usage. The walk * occurs in priority. */ for (i = 0; i < NUM_WT; i++) { weight_t *w; RB_FOREACH(w, weights, &weights[i]) { w->opt = nweight[i]; nweight[i] += 1; } } (void) memset(&chars, 0, sizeof (chars)); (void) memset(vers, 0, COLLATE_STR_LEN); (void) strlcpy(vers, COLLATE_VERSION, sizeof (vers)); /* * We need to make sure we arrange for the UNDEFINED field * to show up. Also, set the total weight counts. */ for (i = 0; i < NUM_WT; i++) { if (resolve_pri(pri_undefined[i]) == -1) { set_pri(pri_undefined[i], -1, RESOLVED); /* they collate at the end of everything else */ - collinfo.undef_pri[i] = COLLATE_MAX_PRIORITY; + collinfo.undef_pri[i] = htote(COLLATE_MAX_PRIORITY); } - collinfo.pri_count[i] = nweight[i]; + collinfo.pri_count[i] = htote(nweight[i]); } - collinfo.pri_count[NUM_WT] = max_wide(); - collinfo.undef_pri[NUM_WT] = COLLATE_MAX_PRIORITY; + collinfo.pri_count[NUM_WT] = htote(max_wide()); + collinfo.undef_pri[NUM_WT] = htote(COLLATE_MAX_PRIORITY); collinfo.directive[NUM_WT] = DIRECTIVE_UNDEFINED; /* * Ordinary character priorities */ for (i = 0; i <= UCHAR_MAX; i++) { if ((cc = get_collchar(i, 0)) != NULL) { for (j = 0; j < NUM_WT; j++) { - chars[i].pri[j] = get_weight(cc->ref[j], j); + chars[i].pri[j] = + htote(get_weight(cc->ref[j], j)); } } else { for (j = 0; j < NUM_WT; j++) { chars[i].pri[j] = - get_weight(pri_undefined[j], j); + htote(get_weight(pri_undefined[j], j)); } /* * Per POSIX, for undefined characters, we * also have to add a last item, which is the * character code. */ - chars[i].pri[NUM_WT] = i; + chars[i].pri[NUM_WT] = htote(i); } } /* * Substitution tables */ for (i = 0; i < NUM_WT; i++) { collate_subst_t *st = NULL; subst_t *temp; RB_COUNT(temp, substs, &substs[i], n); - collinfo.subst_count[i] = n; + subst_count[i] = n; if ((st = calloc(n, sizeof(collate_subst_t))) == NULL) { fprintf(stderr, "out of memory"); return; } n = 0; RB_FOREACH(sb, substs, &substs[i]) { if ((st[n].key = resolve_pri(sb->key)) < 0) { /* by definition these resolve! */ INTERR; } if (st[n].key != (n | COLLATE_SUBST_PRIORITY)) { INTERR; } + st[n].key = htote(st[n].key); for (j = 0; sb->ref[j]; j++) { - st[n].pri[j] = get_weight(sb->ref[j], i); + st[n].pri[j] = htote(get_weight(sb->ref[j], + i)); } n++; } - if (n != collinfo.subst_count[i]) + if (n != subst_count[i]) INTERR; subst[i] = st; } /* * Chains, i.e. collating elements */ - RB_NUMNODES(collelem_t, elem_by_expand, &elem_by_expand, - collinfo.chain_count); - chain = calloc(collinfo.chain_count, sizeof(collate_chain_t)); + RB_NUMNODES(collelem_t, elem_by_expand, &elem_by_expand, chain_count); + chain = calloc(chain_count, sizeof(collate_chain_t)); if (chain == NULL) { fprintf(stderr, "out of memory"); return; } n = 0; RB_FOREACH(ce, elem_by_expand, &elem_by_expand) { (void) wsncpy(chain[n].str, ce->expand, COLLATE_STR_LEN); for (i = 0; i < NUM_WT; i++) { - chain[n].pri[i] = get_weight(ce->ref[i], i); + chain[n].pri[i] = htote(get_weight(ce->ref[i], i)); } n++; } - if (n != collinfo.chain_count) + if (n != chain_count) INTERR; /* * Large (> UCHAR_MAX) character priorities */ RB_NUMNODES(collchar_t, collchars, &collchars, n); large = calloc(n, sizeof(collate_large_t)); if (large == NULL) { fprintf(stderr, "out of memory"); return; } i = 0; RB_FOREACH(cc, collchars, &collchars) { int undef = 0; /* we already gathered those */ if (cc->wc <= UCHAR_MAX) continue; for (j = 0; j < NUM_WT; j++) { if ((pri = get_weight(cc->ref[j], j)) < 0) { undef = 1; } if (undef && (pri >= 0)) { /* if undefined, then all priorities are */ INTERR; } else { - large[i].pri.pri[j] = pri; + large[i].pri.pri[j] = htote(pri); } } if (!undef) { - large[i].val = cc->wc; - collinfo.large_count = i++; + large[i].val = htote(cc->wc); + large_count = i++; } } if ((f = open_category()) == NULL) { return; } /* Time to write the entire data set out */ + for (i = 0; i < NUM_WT; i++) + collinfo.subst_count[i] = htote(subst_count[i]); + collinfo.chain_count = htote(chain_count); + collinfo.large_count = htote(large_count); + if ((wr_category(vers, COLLATE_STR_LEN, f) < 0) || (wr_category(&collinfo, sizeof (collinfo), f) < 0) || (wr_category(&chars, sizeof (chars), f) < 0)) { return; } for (i = 0; i < NUM_WT; i++) { - sz = sizeof (collate_subst_t) * collinfo.subst_count[i]; + sz = sizeof (collate_subst_t) * subst_count[i]; if (wr_category(subst[i], sz, f) < 0) { return; } } - sz = sizeof (collate_chain_t) * collinfo.chain_count; + sz = sizeof (collate_chain_t) * chain_count; if (wr_category(chain, sz, f) < 0) { return; } - sz = sizeof (collate_large_t) * collinfo.large_count; + sz = sizeof (collate_large_t) * large_count; if (wr_category(large, sz, f) < 0) { return; } close_category(f); } Index: head/usr.bin/localedef/ctype.c =================================================================== --- head/usr.bin/localedef/ctype.c (revision 339488) +++ head/usr.bin/localedef/ctype.c (revision 339489) @@ -1,463 +1,472 @@ /*- - * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + * Copyright 2018 Nexenta Systems, Inc. * Copyright 2012 Garrett D'Amore All rights reserved. * Copyright 2015 John Marino * * This source code is derived from the illumos localedef command, and * provided under BSD-style license terms by Nexenta Systems, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ /* * LC_CTYPE database generation routines for localedef. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include "localedef.h" #include "parser.h" #include "runefile.h" /* Needed for bootstrapping, _CTYPE_N */ #ifndef _CTYPE_N #define _CTYPE_N 0x00400000L #endif #define _ISUPPER _CTYPE_U #define _ISLOWER _CTYPE_L #define _ISDIGIT _CTYPE_D #define _ISXDIGIT _CTYPE_X #define _ISSPACE _CTYPE_S #define _ISBLANK _CTYPE_B #define _ISALPHA _CTYPE_A #define _ISPUNCT _CTYPE_P #define _ISGRAPH _CTYPE_G #define _ISPRINT _CTYPE_R #define _ISCNTRL _CTYPE_C #define _E1 _CTYPE_Q #define _E2 _CTYPE_I #define _E3 0 #define _E4 _CTYPE_N #define _E5 _CTYPE_T static wchar_t last_ctype; static int ctype_compare(const void *n1, const void *n2); typedef struct ctype_node { wchar_t wc; int32_t ctype; int32_t toupper; int32_t tolower; RB_ENTRY(ctype_node) entry; } ctype_node_t; static RB_HEAD(ctypes, ctype_node) ctypes; RB_GENERATE_STATIC(ctypes, ctype_node, entry, ctype_compare); static int ctype_compare(const void *n1, const void *n2) { const ctype_node_t *c1 = n1; const ctype_node_t *c2 = n2; return (c1->wc < c2->wc ? -1 : c1->wc > c2->wc ? 1 : 0); } void init_ctype(void) { RB_INIT(&ctypes); } static void add_ctype_impl(ctype_node_t *ctn) { switch (last_kw) { case T_ISUPPER: ctn->ctype |= (_ISUPPER | _ISALPHA | _ISGRAPH | _ISPRINT); break; case T_ISLOWER: ctn->ctype |= (_ISLOWER | _ISALPHA | _ISGRAPH | _ISPRINT); break; case T_ISALPHA: ctn->ctype |= (_ISALPHA | _ISGRAPH | _ISPRINT); break; case T_ISDIGIT: ctn->ctype |= (_ISDIGIT | _ISGRAPH | _ISPRINT | _ISXDIGIT | _E4); break; case T_ISSPACE: ctn->ctype |= _ISSPACE; break; case T_ISCNTRL: ctn->ctype |= _ISCNTRL; break; case T_ISGRAPH: ctn->ctype |= (_ISGRAPH | _ISPRINT); break; case T_ISPRINT: ctn->ctype |= _ISPRINT; break; case T_ISPUNCT: ctn->ctype |= (_ISPUNCT | _ISGRAPH | _ISPRINT); break; case T_ISXDIGIT: ctn->ctype |= (_ISXDIGIT | _ISPRINT); break; case T_ISBLANK: ctn->ctype |= (_ISBLANK | _ISSPACE); break; case T_ISPHONOGRAM: ctn->ctype |= (_E1 | _ISPRINT | _ISGRAPH); break; case T_ISIDEOGRAM: ctn->ctype |= (_E2 | _ISPRINT | _ISGRAPH); break; case T_ISENGLISH: ctn->ctype |= (_E3 | _ISPRINT | _ISGRAPH); break; case T_ISNUMBER: ctn->ctype |= (_E4 | _ISPRINT | _ISGRAPH); break; case T_ISSPECIAL: ctn->ctype |= (_E5 | _ISPRINT | _ISGRAPH); break; case T_ISALNUM: /* * We can't do anything with this. The character * should already be specified as a digit or alpha. */ break; default: errf("not a valid character class"); } } static ctype_node_t * get_ctype(wchar_t wc) { ctype_node_t srch; ctype_node_t *ctn; srch.wc = wc; if ((ctn = RB_FIND(ctypes, &ctypes, &srch)) == NULL) { if ((ctn = calloc(1, sizeof (*ctn))) == NULL) { errf("out of memory"); return (NULL); } ctn->wc = wc; RB_INSERT(ctypes, &ctypes, ctn); } return (ctn); } void add_ctype(int val) { ctype_node_t *ctn; if ((ctn = get_ctype(val)) == NULL) { INTERR; return; } add_ctype_impl(ctn); last_ctype = ctn->wc; } void add_ctype_range(wchar_t end) { ctype_node_t *ctn; wchar_t cur; if (end < last_ctype) { errf("malformed character range (%u ... %u))", last_ctype, end); return; } for (cur = last_ctype + 1; cur <= end; cur++) { if ((ctn = get_ctype(cur)) == NULL) { INTERR; return; } add_ctype_impl(ctn); } last_ctype = end; } /* * A word about widths: if the width mask is specified, then libc * unconditionally honors it. Otherwise, it assumes printable * characters have width 1, and non-printable characters have width * -1 (except for NULL which is special with width 0). Hence, we have * no need to inject defaults here -- the "default" unset value of 0 * indicates that libc should use its own logic in wcwidth as described. */ void add_width(int wc, int width) { ctype_node_t *ctn; if ((ctn = get_ctype(wc)) == NULL) { INTERR; return; } ctn->ctype &= ~(_CTYPE_SWM); switch (width) { case 0: ctn->ctype |= _CTYPE_SW0; break; case 1: ctn->ctype |= _CTYPE_SW1; break; case 2: ctn->ctype |= _CTYPE_SW2; break; case 3: ctn->ctype |= _CTYPE_SW3; break; } } void add_width_range(int start, int end, int width) { for (; start <= end; start++) { add_width(start, width); } } void add_caseconv(int val, int wc) { ctype_node_t *ctn; ctn = get_ctype(val); if (ctn == NULL) { INTERR; return; } switch (last_kw) { case T_TOUPPER: ctn->toupper = wc; break; case T_TOLOWER: ctn->tolower = wc; break; default: INTERR; break; } } void dump_ctype(void) { FILE *f; _FileRuneLocale rl; ctype_node_t *ctn, *last_ct, *last_lo, *last_up; _FileRuneEntry *ct = NULL; _FileRuneEntry *lo = NULL; _FileRuneEntry *up = NULL; wchar_t wc; + uint32_t runetype_ext_nranges; + uint32_t maplower_ext_nranges; + uint32_t mapupper_ext_nranges; (void) memset(&rl, 0, sizeof (rl)); + runetype_ext_nranges = 0; last_ct = NULL; + maplower_ext_nranges = 0; last_lo = NULL; + mapupper_ext_nranges = 0; last_up = NULL; if ((f = open_category()) == NULL) return; (void) memcpy(rl.magic, _FILE_RUNE_MAGIC_1, 8); (void) strlcpy(rl.encoding, get_wide_encoding(), sizeof (rl.encoding)); /* * Initialize the identity map. */ for (wc = 0; (unsigned)wc < _CACHED_RUNES; wc++) { - rl.maplower[wc] = wc; - rl.mapupper[wc] = wc; + rl.maplower[wc] = htote(wc); + rl.mapupper[wc] = htote(wc); } RB_FOREACH(ctn, ctypes, &ctypes) { int conflict = 0; wc = ctn->wc; /* * POSIX requires certain portable characters have * certain types. Add them if they are missing. */ if ((wc >= 1) && (wc <= 127)) { if ((wc >= 'A') && (wc <= 'Z')) ctn->ctype |= _ISUPPER; if ((wc >= 'a') && (wc <= 'z')) ctn->ctype |= _ISLOWER; if ((wc >= '0') && (wc <= '9')) ctn->ctype |= _ISDIGIT; if (wc == ' ') ctn->ctype |= _ISPRINT; if (strchr(" \f\n\r\t\v", (char)wc) != NULL) ctn->ctype |= _ISSPACE; if (strchr("0123456789ABCDEFabcdef", (char)wc) != NULL) ctn->ctype |= _ISXDIGIT; if (strchr(" \t", (char)wc)) ctn->ctype |= _ISBLANK; /* * Technically these settings are only * required for the C locale. However, it * turns out that because of the historical * version of isprint(), we need them for all * locales as well. Note that these are not * necessarily valid punctation characters in * the current language, but ispunct() needs * to return TRUE for them. */ if (strchr("!\"'#$%&()*+,-./:;<=>?@[\\]^_`{|}~", (char)wc)) ctn->ctype |= _ISPUNCT; } /* * POSIX also requires that certain types imply * others. Add any inferred types here. */ if (ctn->ctype & (_ISUPPER |_ISLOWER)) ctn->ctype |= _ISALPHA; if (ctn->ctype & _ISDIGIT) ctn->ctype |= _ISXDIGIT; if (ctn->ctype & _ISBLANK) ctn->ctype |= _ISSPACE; if (ctn->ctype & (_ISALPHA|_ISDIGIT|_ISXDIGIT)) ctn->ctype |= _ISGRAPH; if (ctn->ctype & _ISGRAPH) ctn->ctype |= _ISPRINT; /* * Finally, POSIX requires that certain combinations * are invalid. We don't flag this as a fatal error, * but we will warn about. */ if ((ctn->ctype & _ISALPHA) && (ctn->ctype & (_ISPUNCT|_ISDIGIT))) conflict++; if ((ctn->ctype & _ISPUNCT) && (ctn->ctype & (_ISDIGIT|_ISALPHA|_ISXDIGIT))) conflict++; if ((ctn->ctype & _ISSPACE) && (ctn->ctype & _ISGRAPH)) conflict++; if ((ctn->ctype & _ISCNTRL) && (ctn->ctype & _ISPRINT)) conflict++; if ((wc == ' ') && (ctn->ctype & (_ISPUNCT|_ISGRAPH))) conflict++; if (conflict) { warn("conflicting classes for character 0x%x (%x)", wc, ctn->ctype); } /* * Handle the lower 256 characters using the simple * optimization. Note that if we have not defined the * upper/lower case, then we identity map it. */ if ((unsigned)wc < _CACHED_RUNES) { - rl.runetype[wc] = ctn->ctype; + rl.runetype[wc] = htote(ctn->ctype); if (ctn->tolower) - rl.maplower[wc] = ctn->tolower; + rl.maplower[wc] = htote(ctn->tolower); if (ctn->toupper) - rl.mapupper[wc] = ctn->toupper; + rl.mapupper[wc] = htote(ctn->toupper); continue; } if ((last_ct != NULL) && (last_ct->ctype == ctn->ctype) && (last_ct->wc + 1 == wc)) { - ct[rl.runetype_ext_nranges-1].max = wc; + ct[runetype_ext_nranges - 1].max = htote(wc); } else { - rl.runetype_ext_nranges++; - ct = realloc(ct, - sizeof (*ct) * rl.runetype_ext_nranges); - ct[rl.runetype_ext_nranges - 1].min = wc; - ct[rl.runetype_ext_nranges - 1].max = wc; - ct[rl.runetype_ext_nranges - 1].map = ctn->ctype; + runetype_ext_nranges++; + ct = realloc(ct, sizeof (*ct) * runetype_ext_nranges); + ct[runetype_ext_nranges - 1].min = htote(wc); + ct[runetype_ext_nranges - 1].max = htote(wc); + ct[runetype_ext_nranges - 1].map = + htote(ctn->ctype); } last_ct = ctn; if (ctn->tolower == 0) { last_lo = NULL; } else if ((last_lo != NULL) && (last_lo->tolower + 1 == ctn->tolower)) { - lo[rl.maplower_ext_nranges-1].max = wc; + lo[maplower_ext_nranges - 1].max = htote(wc); last_lo = ctn; } else { - rl.maplower_ext_nranges++; - lo = realloc(lo, - sizeof (*lo) * rl.maplower_ext_nranges); - lo[rl.maplower_ext_nranges - 1].min = wc; - lo[rl.maplower_ext_nranges - 1].max = wc; - lo[rl.maplower_ext_nranges - 1].map = ctn->tolower; + maplower_ext_nranges++; + lo = realloc(lo, sizeof (*lo) * maplower_ext_nranges); + lo[maplower_ext_nranges - 1].min = htote(wc); + lo[maplower_ext_nranges - 1].max = htote(wc); + lo[maplower_ext_nranges - 1].map = + htote(ctn->tolower); last_lo = ctn; } if (ctn->toupper == 0) { last_up = NULL; } else if ((last_up != NULL) && (last_up->toupper + 1 == ctn->toupper)) { - up[rl.mapupper_ext_nranges-1].max = wc; + up[mapupper_ext_nranges-1].max = htote(wc); last_up = ctn; } else { - rl.mapupper_ext_nranges++; - up = realloc(up, - sizeof (*up) * rl.mapupper_ext_nranges); - up[rl.mapupper_ext_nranges - 1].min = wc; - up[rl.mapupper_ext_nranges - 1].max = wc; - up[rl.mapupper_ext_nranges - 1].map = ctn->toupper; + mapupper_ext_nranges++; + up = realloc(up, sizeof (*up) * mapupper_ext_nranges); + up[mapupper_ext_nranges - 1].min = htote(wc); + up[mapupper_ext_nranges - 1].max = htote(wc); + up[mapupper_ext_nranges - 1].map = + htote(ctn->toupper); last_up = ctn; } } + rl.runetype_ext_nranges = htote(runetype_ext_nranges); + rl.maplower_ext_nranges = htote(maplower_ext_nranges); + rl.mapupper_ext_nranges = htote(mapupper_ext_nranges); if ((wr_category(&rl, sizeof (rl), f) < 0) || - (wr_category(ct, sizeof (*ct) * rl.runetype_ext_nranges, f) < 0) || - (wr_category(lo, sizeof (*lo) * rl.maplower_ext_nranges, f) < 0) || - (wr_category(up, sizeof (*up) * rl.mapupper_ext_nranges, f) < 0)) { + (wr_category(ct, sizeof (*ct) * runetype_ext_nranges, f) < 0) || + (wr_category(lo, sizeof (*lo) * maplower_ext_nranges, f) < 0) || + (wr_category(up, sizeof (*up) * mapupper_ext_nranges, f) < 0)) { return; } close_category(f); } Index: head/usr.bin/localedef/localedef.1 =================================================================== --- head/usr.bin/localedef/localedef.1 (revision 339488) +++ head/usr.bin/localedef/localedef.1 (revision 339489) @@ -1,267 +1,271 @@ .\" Copyright (c) 1992, X/Open Company Limited All Rights Reserved .\" Portions Copyright (c) 2003, Sun Microsystems, Inc. All Rights Reserved .\" Portions Copyright 2013 DEY Storage Systems, Inc. .\" Sun Microsystems, Inc. gratefully acknowledges The Open Group for .\" permission to reproduce portions of its copyrighted documentation. .\" Original documentation from The Open Group can be obtained online at .\" http://www.opengroup.org/bookstore/. .\" The Institute of Electrical and Electronics Engineers and The Open Group, .\" have given us permission to reprint portions of their documentation. In .\" the following statement, the phrase "this text" refers to portions of the .\" system documentation. Portions of this text are reprinted and reproduced .\" in electronic form in the Sun OS Reference Manual, from IEEE Std 1003.1, .\" 2004 Edition, Standard for Information Technology -- Portable Operating .\" System Interface (POSIX), The Open Group Base Specifications Issue 6, .\" Copyright (C) 2001-2004 by the Institute of Electrical and Electronics .\" Engineers, Inc and The Open Group. In the event of any discrepancy between .\" these versions and the original IEEE and The Open Group Standard, the .\" original IEEE and The Open Group Standard is the referee document. The .\" original Standard can be obtained online at .\" http://www.opengroup.org/unix/online.html. .\" This notice shall appear on any product containing this material. .\" The contents of this file are subject to the terms of the Common .\" Development and Distribution License (the "License"). You may not use .\" this file except in compliance with the License. .\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or .\" http://www.opensolaris.org/os/licensing. See the License for the specific .\" language governing permissions and limitations under the License. .\" When distributing Covered Code, include this CDDL HEADER in each file and .\" include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, .\" add the following below this CDDL HEADER, with the fields enclosed by .\" brackets "[]" replaced with your own identifying information: .\" Portions Copyright [yyyy] [name of copyright owner] .\" .\" $FreeBSD$ .\" -.Dd July 28, 2015 +.Dd October 18, 2018 .Dt LOCALEDEF 1 .Os .Sh NAME .Nm localedef .Nd define locale environment .Sh SYNOPSIS .Nm -.Op Fl cDUv +.Op Fl bcDlUv .Op Fl f Ar charmap .Op Fl i Ar sourcefile .Op Fl u Ar codeset .Op Fl w Ar widthfile .Ar localename .Sh DESCRIPTION The .Nm utility converts source definitions for locale categories into a format usable by the functions and utilities whose operational behavior is determined by the setting of the locale environment variables; see .Xr environ 7 . .Pp The utility reads source definitions for one or more locale categories belonging to the same locale from the file named in the .Fl i option (if specified) or from standard input. .Pp Each category source definition is identified by the corresponding environment variable name and terminated by an .Sy END .Em category-name statement. The following categories are supported: .Bl -tag -width ".Ev LC_MONETARY" .It Ev LC_CTYPE Defines character classification and case conversion. .It Ev LC_COLLATE Defines collation rules. .It Ev LC_MONETARY Defines the format and symbols used in formatting of monetary information. .It Ev LC_NUMERIC Defines the decimal delimiter, grouping and grouping symbol for non-monetary numeric editing. .It Ev LC_TIME Defines the format and content of date and time information. .It Ev LC_MESSAGES Defines the format and values of affirmative and negative responses. .El .Pp The following options are supported: .Bl -tag -width indent +.It Fl b +Use big-endian byte order for output. .It Fl c Creates permanent output even if warning messages have been issued. .It Fl D BSD-style output. Rather than the default of creating the .Ar localename directory and creating files like .Pa LC_CTYPE , .Pa LC_COLLATE , etc.\& in that directory, the output file names have the format .Dq . and are dumped to the current directory. .It Fl f Ar charmap Specifies the pathname of a file containing a mapping of character symbols and collating element symbols to actual character encodings. This option must be specified if symbolic names (other than collating symbols defined in a .Sy collating-symbol keyword) are used. If the .Fl f option is not present, the default character mapping will be used. .It Fl i Ar sourcefile The path name of a file containing the source definitions. If this option is not present, source definitions will be read from standard input. +.It Fl l +Use little-endian byte order for output. .It Fl u Ar codeset Specifies the name of a codeset used as the target mapping of character symbols and collating element symbols whose encoding values are defined in terms of the ISO/IEC 10646-1:2000 standard position constant values. See .Sx NOTES . .It Fl U Ignore the presence of character symbols that have no matching character definition. This facilitates the use of a common locale definition file to be used across multiple encodings, even when some symbols are not present in a given encoding. .It Fl v Emit verbose debugging output on standard output. .It Fl w Ar widthfile The path name of the file containing character screen width definitions. If not supplied, then default screen widths will be assumed, which will generally not account for East Asian encodings requiring more than a single character cell to display, nor for combining or accent marks that occupy no additional screen width. .El .Pp The following operands are required: .Bl -tag -width ".Ar localename" .It Ar localename Identifies the locale. If the name contains one or more slash characters, .Ar localename will be interpreted as a path name where the created locale definitions will be stored. This capability may be restricted to users with appropriate privileges. (As a consequence of specifying one .Ar localename , although several categories can be processed in one execution, only categories belonging to the same locale can be processed.) .El .Sh OUTPUT .Nm creates a directory of files that represents the locale's data, unless instructed otherwise by the .Fl D ( BSD output) option. The contants of this directory should generally be copied into the appropriate subdirectory of .Pa /usr/share/locale in order the definitions to be visible to programs linked with libc. .Sh ENVIRONMENT See .Xr environ 7 for definitions of the following environment variables that affect the execution of .Nm : .Ev LANG , .Ev LC_ALL , .Ev LC_COLLATE , .Ev LC_CTYPE , .Ev LC_MESSAGES , .Ev LC_MONETARY , .Ev LC_MUMERIC , .Ev LC_TIME , and .Ev NLSPATH . .Sh EXIT STATUS The following exit values are returned: .Bl -tag -width XX .It 0 No errors occurred and the locales were successfully created. .It 1 Warnings occurred and the locales were successfully created. .It 2 The locale specification exceeded implementation limits or the coded character set or sets used were not supported by the implementation, and no locale was created. .It >3 Warnings or errors occurred and no output was created. .El .Pp If an error is detected, no permanent output will be created. .Sh SEE ALSO .Xr locale 1 , .Xr iconv_open 3 , .Xr nl_langinfo 3 , .Xr strftime 3 , .Xr environ 7 .Sh WARNINGS If warnings occur, permanent output will be created if the .Fl c option was specified. The following conditions will cause warning messages to be issued: .Bl -bullet .It If a symbolic name not found in the .Pa charmap file is used for the descriptions of the .Sy LC_CTYPE or .Sy LC_COLLATE categories (for other categories, this will be an error condition). .It If optional keywords not supported by the implementation are present in the source. .El .Sh NOTES When the .Fl u option is used, the .Ar codeset option-argument is interpreted as a name of a codeset to which the ISO/IEC 10646-1:2000 standard position constant values are converted. Both the ISO/IEC 10646-1:2000 standard position constant values and other formats (decimal, hexadecimal, or octal) are valid as encoding values within the charmap file. The .Ar codeset can be any codeset that is supported by the .Fn iconv_open 3 function. .Pp When conflicts occur between the charmap specification of .Ar codeset , .Em mb_cur_max , or .Em mb_cur_min and the corresponding value for the codeset represented by the .Fl u option-argument .Ar codeset , the .Nm utility fails with an error. .Pp When conflicts occur between the charmap encoding values specified for symbolic names of characters of the portable character set and the character encoding values defined by the US-ASCII, the result is unspecified. .Sh HISTORY .Nm first appeared in .Fx 11 . .Pp It was written by .An Garrett D'Amore .Aq Mt garrett@nexenta.com for Illumos. .An John Marino .Aq Mt draco@marino.st provided the alternations necessary to compile cleanly on .Dx . .An Baptiste Daroussin .Aq Mt bapt@FreeBSD.org ported it to .Fx and converted it to .Xr tree 3 . Index: head/usr.bin/localedef/localedef.c =================================================================== --- head/usr.bin/localedef/localedef.c (revision 339488) +++ head/usr.bin/localedef/localedef.c (revision 339489) @@ -1,345 +1,368 @@ /*- - * Copyright 2010 Nexenta Systems, Inc. All rights reserved. + * Copyright 2018 Nexenta Systems, Inc. * Copyright 2015 John Marino * * This source code is derived from the illumos localedef command, and * provided under BSD-style license terms by Nexenta Systems, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ /* * POSIX localedef. */ #include __FBSDID("$FreeBSD$"); +#include +#include +#include + #include #include #include -#include -#include #include #include #include #include #include #include #include #include "localedef.h" #include "parser.h" #ifndef TEXT_DOMAIN #define TEXT_DOMAIN "SYS_TEST" #endif static int bsd = 0; +static int byteorder = 0; int verbose = 0; int undefok = 0; int warnok = 0; static char *locname = NULL; static char locpath[PATH_MAX]; const char * category_name(void) { switch (get_category()) { case T_CHARMAP: return ("CHARMAP"); case T_WIDTH: return ("WIDTH"); case T_COLLATE: return ("LC_COLLATE"); case T_CTYPE: return ("LC_CTYPE"); case T_MESSAGES: return ("LC_MESSAGES"); case T_MONETARY: return ("LC_MONETARY"); case T_NUMERIC: return ("LC_NUMERIC"); case T_TIME: return ("LC_TIME"); default: INTERR; return (NULL); } } static char * category_file(void) { if (bsd) (void) snprintf(locpath, sizeof (locpath), "%s.%s", locname, category_name()); else (void) snprintf(locpath, sizeof (locpath), "%s/%s", locname, category_name()); return (locpath); } FILE * open_category(void) { FILE *file; if (verbose) { (void) printf("Writing category %s: ", category_name()); (void) fflush(stdout); } /* make the parent directory */ if (!bsd) (void) mkdir(dirname(category_file()), 0755); /* * note that we have to regenerate the file name, as dirname * clobbered it. */ file = fopen(category_file(), "w"); if (file == NULL) { errf("%s", strerror(errno)); return (NULL); } return (file); } void close_category(FILE *f) { if (fchmod(fileno(f), 0644) < 0) { (void) fclose(f); (void) unlink(category_file()); errf("%s", strerror(errno)); } if (fclose(f) < 0) { (void) unlink(category_file()); errf("%s", strerror(errno)); } if (verbose) { (void) fprintf(stdout, "done.\n"); (void) fflush(stdout); } } /* * This function is used when copying the category from another * locale. Note that the copy is actually performed using a hard * link for efficiency. */ void copy_category(char *src) { char srcpath[PATH_MAX]; int rv; (void) snprintf(srcpath, sizeof (srcpath), "%s/%s", src, category_name()); rv = access(srcpath, R_OK); if ((rv != 0) && (strchr(srcpath, '/') == NULL)) { /* Maybe we should try the system locale */ (void) snprintf(srcpath, sizeof (srcpath), "/usr/lib/locale/%s/%s", src, category_name()); rv = access(srcpath, R_OK); } if (rv != 0) { fprintf(stderr,"source locale data unavailable: %s", src); return; } if (verbose > 1) { (void) printf("Copying category %s from %s: ", category_name(), src); (void) fflush(stdout); } /* make the parent directory */ if (!bsd) (void) mkdir(dirname(category_file()), 0755); if (link(srcpath, category_file()) != 0) { fprintf(stderr,"unable to copy locale data: %s", strerror(errno)); return; } if (verbose > 1) { (void) printf("done.\n"); } } int putl_category(const char *s, FILE *f) { if (s && fputs(s, f) == EOF) { (void) fclose(f); (void) unlink(category_file()); errf("%s", strerror(errno)); return (EOF); } if (fputc('\n', f) == EOF) { (void) fclose(f); (void) unlink(category_file()); errf("%s", strerror(errno)); return (EOF); } return (0); } int wr_category(void *buf, size_t sz, FILE *f) { if (!sz) { return (0); } if (fwrite(buf, sz, 1, f) < 1) { (void) fclose(f); (void) unlink(category_file()); errf("%s", strerror(errno)); return (EOF); } return (0); } +uint32_t +htote(uint32_t arg) +{ + + if (byteorder == 4321) + return (htobe32(arg)); + else if (byteorder == 1234) + return (htole32(arg)); + else + return (arg); +} + int yyparse(void); static void usage(void) { (void) fprintf(stderr, "Usage: localedef [options] localename\n"); (void) fprintf(stderr, "[options] are:\n"); (void) fprintf(stderr, " -D : BSD-style output\n"); + (void) fprintf(stderr, " -b : big-endian output\n"); (void) fprintf(stderr, " -c : ignore warnings\n"); + (void) fprintf(stderr, " -l : little-endian output\n"); (void) fprintf(stderr, " -v : verbose output\n"); (void) fprintf(stderr, " -U : ignore undefined symbols\n"); (void) fprintf(stderr, " -f charmap : use given charmap file\n"); (void) fprintf(stderr, " -u encoding : assume encoding\n"); (void) fprintf(stderr, " -w widths : use screen widths file\n"); (void) fprintf(stderr, " -i locsrc : source file for locale\n"); exit(4); } int main(int argc, char **argv) { int c; char *lfname = NULL; char *cfname = NULL; char *wfname = NULL; DIR *dir; init_charmap(); init_collate(); init_ctype(); init_messages(); init_monetary(); init_numeric(); init_time(); yydebug = 0; (void) setlocale(LC_ALL, ""); - while ((c = getopt(argc, argv, "w:i:cf:u:vUD")) != -1) { + while ((c = getopt(argc, argv, "blw:i:cf:u:vUD")) != -1) { switch (c) { case 'D': bsd = 1; + break; + case 'b': + case 'l': + if (byteorder != 0) + usage(); + byteorder = c == 'b' ? 4321 : 1234; break; case 'v': verbose++; break; case 'i': lfname = optarg; break; case 'u': set_wide_encoding(optarg); break; case 'f': cfname = optarg; break; case 'U': undefok++; break; case 'c': warnok++; break; case 'w': wfname = optarg; break; case '?': usage(); break; } } if ((argc - 1) != (optind)) { usage(); } locname = argv[argc - 1]; if (verbose) { (void) printf("Processing locale %s.\n", locname); } if (cfname) { if (verbose) (void) printf("Loading charmap %s.\n", cfname); reset_scanner(cfname); (void) yyparse(); } if (wfname) { if (verbose) (void) printf("Loading widths %s.\n", wfname); reset_scanner(wfname); (void) yyparse(); } if (verbose) { (void) printf("Loading POSIX portable characters.\n"); } add_charmap_posix(); if (lfname) { reset_scanner(lfname); } else { reset_scanner(NULL); } /* make the directory for the locale if not already present */ if (!bsd) { while ((dir = opendir(locname)) == NULL) { if ((errno != ENOENT) || (mkdir(locname, 0755) < 0)) { errf("%s", strerror(errno)); } } (void) closedir(dir); (void) mkdir(dirname(category_file()), 0755); } (void) yyparse(); if (verbose) { (void) printf("All done.\n"); } return (warnings ? 1 : 0); } Index: head/usr.bin/localedef/localedef.h =================================================================== --- head/usr.bin/localedef/localedef.h (revision 339488) +++ head/usr.bin/localedef/localedef.h (revision 339489) @@ -1,173 +1,175 @@ /*- - * Copyright 2010 Nexenta Systems, Inc. All rights reserved. + * Copyright 2018 Nexenta Systems, Inc. * Copyright 2015 John Marino * * This source code is derived from the illumos localedef command, and * provided under BSD-style license terms by Nexenta Systems, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD$ */ /* * POSIX localedef. */ /* Common header files. */ #include #include #include #include #include extern int com_char; extern int esc_char; extern int mb_cur_max; extern int mb_cur_min; extern int last_kw; extern int verbose; extern int yydebug; extern int lineno; extern int undefok; /* mostly ignore undefined symbols */ extern int warnok; extern int warnings; int yylex(void); void yyerror(const char *); _Noreturn void errf(const char *, ...) __printflike(1, 2); void warn(const char *, ...) __printflike(1, 2); int putl_category(const char *, FILE *); int wr_category(void *, size_t, FILE *); FILE *open_category(void); void close_category(FILE *); void copy_category(char *); const char *category_name(void); int get_category(void); int get_symbol(void); int get_escaped(int); int get_wide(void); void reset_scanner(const char *); void scan_to_eol(void); void add_wcs(wchar_t); void add_tok(int); wchar_t *get_wcs(void); + +uint32_t htote(uint32_t); /* charmap.c - CHARMAP handling */ void init_charmap(void); void add_charmap(const char *, int); void add_charmap_undefined(char *); void add_charmap_posix(void); void add_charmap_range(char *, char *, int); void add_charmap_char(const char *name, int val); int lookup_charmap(const char *, wchar_t *); int check_charmap_undefined(char *); int check_charmap(wchar_t); /* collate.o - LC_COLLATE handling */ typedef struct collelem collelem_t; typedef struct collsym collsym_t; void init_collate(void); void define_collsym(char *); void define_collelem(char *, wchar_t *); void add_order_directive(void); void add_order_bit(int); void dump_collate(void); collsym_t *lookup_collsym(char *); collelem_t *lookup_collelem(char *); void start_order_collelem(collelem_t *); void start_order_undefined(void); void start_order_symbol(char *); void start_order_char(wchar_t); void start_order_ellipsis(void); void end_order_collsym(collsym_t *); void end_order(void); void add_weight(int32_t, int); void add_weights(int32_t *); void add_weight_num(int); void add_order_collelem(collelem_t *); void add_order_collsym(collsym_t *); void add_order_char(wchar_t); void add_order_ignore(void); void add_order_ellipsis(void); void add_order_symbol(char *); void add_order_subst(void); void add_subst_char(wchar_t); void add_subst_collsym(collsym_t *); void add_subst_collelem(collelem_t *); void add_subst_symbol(char *); int32_t get_weight(int32_t, int); wchar_t * wsncpy(wchar_t *, const wchar_t *, size_t); /* ctype.c - LC_CTYPE handling */ void init_ctype(void); void add_ctype(int); void add_ctype_range(wchar_t); void add_width(int, int); void add_width_range(int, int, int); void add_caseconv(int, int); void dump_ctype(void); /* messages.c - LC_MESSAGES handling */ void init_messages(void); void add_message(wchar_t *); void dump_messages(void); /* monetary.c - LC_MONETARY handling */ void init_monetary(void); void add_monetary_str(wchar_t *); void add_monetary_num(int); void reset_monetary_group(void); void add_monetary_group(int); void dump_monetary(void); /* numeric.c - LC_NUMERIC handling */ void init_numeric(void); void add_numeric_str(wchar_t *); void reset_numeric_group(void); void add_numeric_group(int); void dump_numeric(void); /* time.c - LC_TIME handling */ void init_time(void); void add_time_str(wchar_t *); void reset_time_list(void); void add_time_list(wchar_t *); void check_time_list(void); void dump_time(void); /* wide.c - Wide character handling. */ int to_wide(wchar_t *, const char *); int to_mbs(char *, wchar_t); int to_mb(char *, wchar_t); char *to_mb_string(const wchar_t *); void set_wide_encoding(const char *); void werr(const char *, ...); const char *get_wide_encoding(void); int max_wide(void); //#define _(x) gettext(x) #define INTERR fprintf(stderr,"internal fault (%s:%d)", __FILE__, __LINE__)