Index: head/include/xlocale/_locale.h =================================================================== --- head/include/xlocale/_locale.h (revision 367475) +++ head/include/xlocale/_locale.h (revision 367476) @@ -1,59 +1,60 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2011, 2012 The FreeBSD Foundation * All rights reserved. * * This software was developed by David Chisnall under sponsorship from * the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _XLOCALE_LOCALE_H #define _XLOCALE_LOCALE_H /* Bit shifting order of LC_*_MASK should match XLC_* and LC_* order. */ #define LC_COLLATE_MASK (1<<0) #define LC_CTYPE_MASK (1<<1) #define LC_MONETARY_MASK (1<<2) #define LC_NUMERIC_MASK (1<<3) #define LC_TIME_MASK (1<<4) #define LC_MESSAGES_MASK (1<<5) #define LC_ALL_MASK (LC_COLLATE_MASK | LC_CTYPE_MASK | LC_MESSAGES_MASK | \ LC_MONETARY_MASK | LC_NUMERIC_MASK | LC_TIME_MASK) +#define LC_VERSION_MASK (1<<6) #define LC_GLOBAL_LOCALE ((locale_t)-1) #ifndef _LOCALE_T_DEFINED #define _LOCALE_T_DEFINED typedef struct _xlocale *locale_t; #endif locale_t duplocale(locale_t base); void freelocale(locale_t loc); locale_t newlocale(int mask, const char *locale, locale_t base); const char *querylocale(int mask, locale_t loc); locale_t uselocale(locale_t loc); #endif /* _XLOCALE_LOCALE_H */ Index: head/lib/libc/locale/collate.c =================================================================== --- head/lib/libc/locale/collate.c (revision 367475) +++ head/lib/libc/locale/collate.c (revision 367476) @@ -1,716 +1,720 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright 2014 Garrett D'Amore * Copyright 2010 Nexenta Systems, Inc. All rights reserved. * Copyright (c) 1995 Alex Tatmanjants * at Electronni Visti IA, Kiev, Ukraine. * All rights reserved. * * Copyright (c) 2011 The FreeBSD Foundation * All rights reserved. * Portions of this software were developed by David Chisnall * under sponsorship from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * Adapted to xlocale by John Marino */ #include __FBSDID("$FreeBSD$"); #include "namespace.h" #include #include #include #include #include #include #include #include #include #include #include #include "un-namespace.h" #include "collate.h" #include "setlocale.h" #include "ldpart.h" #include "libc_private.h" struct xlocale_collate __xlocale_global_collate = { {{0}, "C"}, 1, 0, 0, 0 }; struct xlocale_collate __xlocale_C_collate = { {{0}, "C"}, 1, 0, 0, 0 }; static int __collate_load_tables_l(const char *encoding, struct xlocale_collate *table); static void destruct_collate(void *t) { struct xlocale_collate *table = t; if (table->map && (table->maplen > 0)) { (void) munmap(table->map, table->maplen); } free(t); } void * __collate_load(const char *encoding, __unused locale_t unused) { if (strcmp(encoding, "C") == 0 || strcmp(encoding, "POSIX") == 0 || strncmp(encoding, "C.", 2) == 0) { return &__xlocale_C_collate; } struct xlocale_collate *table = calloc(sizeof(struct xlocale_collate), 1); table->header.header.destructor = destruct_collate; // FIXME: Make sure that _LDP_CACHE is never returned. We should be doing // the caching outside of this section if (__collate_load_tables_l(encoding, table) != _LDP_LOADED) { xlocale_release(table); return NULL; } return table; } /** * Load the collation tables for the specified encoding into the global table. */ int __collate_load_tables(const char *encoding) { return (__collate_load_tables_l(encoding, &__xlocale_global_collate)); } static int __collate_load_tables_l(const char *encoding, struct xlocale_collate *table) { int i, chains, z; char *buf; char *TMP; char *map; collate_info_t *info; struct stat sbuf; int fd; table->__collate_load_error = 1; /* 'encoding' must be already checked. */ if (strcmp(encoding, "C") == 0 || strcmp(encoding, "POSIX") == 0 || strncmp(encoding, "C.", 2) == 0) { return (_LDP_CACHE); } if (asprintf(&buf, "%s/%s/LC_COLLATE", _PathLocale, encoding) == -1) return (_LDP_ERROR); if ((fd = _open(buf, O_RDONLY)) < 0) { free(buf); return (_LDP_ERROR); } free(buf); if (_fstat(fd, &sbuf) < 0) { (void) _close(fd); return (_LDP_ERROR); } - if (sbuf.st_size < (COLLATE_STR_LEN + sizeof (info))) { + if (sbuf.st_size < (COLLATE_FMT_VERSION_LEN + + XLOCALE_DEF_VERSION_LEN + + sizeof (info))) { (void) _close(fd); errno = EINVAL; return (_LDP_ERROR); } map = mmap(NULL, sbuf.st_size, PROT_READ, MAP_PRIVATE, fd, 0); (void) _close(fd); if ((TMP = map) == MAP_FAILED) { return (_LDP_ERROR); } - if (strncmp(TMP, COLLATE_VERSION, COLLATE_STR_LEN) != 0) { + if (strncmp(TMP, COLLATE_FMT_VERSION, COLLATE_FMT_VERSION_LEN) != 0) { (void) munmap(map, sbuf.st_size); errno = EINVAL; return (_LDP_ERROR); } - TMP += COLLATE_STR_LEN; + TMP += COLLATE_FMT_VERSION_LEN; + strlcat(table->header.version, TMP, sizeof (table->header.version)); + TMP += XLOCALE_DEF_VERSION_LEN; info = (void *)TMP; TMP += sizeof (*info); if ((info->directive_count < 1) || (info->directive_count >= COLL_WEIGHTS_MAX) || ((chains = info->chain_count) < 0)) { (void) munmap(map, sbuf.st_size); errno = EINVAL; return (_LDP_ERROR); } i = (sizeof (collate_char_t) * (UCHAR_MAX + 1)) + (sizeof (collate_chain_t) * chains) + (sizeof (collate_large_t) * info->large_count); for (z = 0; z < info->directive_count; z++) { i += sizeof (collate_subst_t) * info->subst_count[z]; } if (i != (sbuf.st_size - (TMP - map))) { (void) munmap(map, sbuf.st_size); errno = EINVAL; return (_LDP_ERROR); } if (table->map && (table->maplen > 0)) { (void) munmap(table->map, table->maplen); } table->map = map; table->maplen = sbuf.st_size; table->info = info; table->char_pri_table = (void *)TMP; TMP += sizeof (collate_char_t) * (UCHAR_MAX + 1); for (z = 0; z < info->directive_count; z++) { if (info->subst_count[z] > 0) { table->subst_table[z] = (void *)TMP; TMP += info->subst_count[z] * sizeof (collate_subst_t); } else { table->subst_table[z] = NULL; } } if (chains > 0) { table->chain_pri_table = (void *)TMP; TMP += chains * sizeof (collate_chain_t); } else table->chain_pri_table = NULL; if (info->large_count > 0) table->large_pri_table = (void *)TMP; else table->large_pri_table = NULL; table->__collate_load_error = 0; return (_LDP_LOADED); } static const int32_t * substsearch(struct xlocale_collate *table, const wchar_t key, int pass) { const collate_subst_t *p; int n = table->info->subst_count[pass]; if (n == 0) return (NULL); if (pass >= table->info->directive_count) return (NULL); if (!(key & COLLATE_SUBST_PRIORITY)) return (NULL); p = table->subst_table[pass] + (key & ~COLLATE_SUBST_PRIORITY); assert(p->key == key); return (p->pri); } static collate_chain_t * chainsearch(struct xlocale_collate *table, const wchar_t *key, int *len) { int low = 0; int high = table->info->chain_count - 1; int next, compar, l; collate_chain_t *p; collate_chain_t *tab = table->chain_pri_table; if (high < 0) return (NULL); while (low <= high) { next = (low + high) / 2; p = tab + next; compar = *key - *p->str; if (compar == 0) { l = wcsnlen(p->str, COLLATE_STR_LEN); compar = wcsncmp(key, p->str, l); if (compar == 0) { *len = l; return (p); } } if (compar > 0) low = next + 1; else high = next - 1; } return (NULL); } static collate_large_t * largesearch(struct xlocale_collate *table, const wchar_t key) { int low = 0; int high = table->info->large_count - 1; int next, compar; collate_large_t *p; collate_large_t *tab = table->large_pri_table; if (high < 0) return (NULL); while (low <= high) { next = (low + high) / 2; p = tab + next; compar = key - p->val; if (compar == 0) return (p); if (compar > 0) low = next + 1; else high = next - 1; } return (NULL); } void _collate_lookup(struct xlocale_collate *table, const wchar_t *t, int *len, int *pri, int which, const int **state) { collate_chain_t *p2; collate_large_t *match; int p, l; const int *sptr; /* * If this is the "last" pass for the UNDEFINED, then * we just return the priority itself. */ if (which >= table->info->directive_count) { *pri = *t; *len = 1; *state = NULL; return; } /* * If we have remaining substitution data from a previous * call, consume it first. */ if ((sptr = *state) != NULL) { *pri = *sptr; sptr++; if ((sptr == *state) || (sptr == NULL)) *state = NULL; else *state = sptr; *len = 0; return; } /* No active substitutions */ *len = 1; /* * Check for composites such as diphthongs that collate as a * single element (aka chains or collating-elements). */ if (((p2 = chainsearch(table, t, &l)) != NULL) && ((p = p2->pri[which]) >= 0)) { *len = l; *pri = p; } else if (*t <= UCHAR_MAX) { /* * Character is a small (8-bit) character. * We just look these up directly for speed. */ *pri = table->char_pri_table[*t].pri[which]; } else if ((table->info->large_count > 0) && ((match = largesearch(table, *t)) != NULL)) { /* * Character was found in the extended table. */ *pri = match->pri.pri[which]; } else { /* * Character lacks a specific definition. */ if (table->info->directive[which] & DIRECTIVE_UNDEFINED) { /* Mask off sign bit to prevent ordering confusion. */ *pri = (*t & COLLATE_MAX_PRIORITY); } else { *pri = table->info->undef_pri[which]; } /* No substitutions for undefined characters! */ return; } /* * Try substituting (expanding) the character. We are * currently doing this *after* the chain compression. I * think it should not matter, but this way might be slightly * faster. * * We do this after the priority search, as this will help us * to identify a single key value. In order for this to work, * its important that the priority assigned to a given element * to be substituted be unique for that level. The localedef * code ensures this for us. */ if ((sptr = substsearch(table, *pri, which)) != NULL) { if ((*pri = *sptr) > 0) { sptr++; *state = *sptr ? sptr : NULL; } } } /* * This is the meaty part of wcsxfrm & strxfrm. Note that it does * NOT NULL terminate. That is left to the caller. */ size_t _collate_wxfrm(struct xlocale_collate *table, const wchar_t *src, wchar_t *xf, size_t room) { int pri; int len; const wchar_t *t; wchar_t *tr = NULL; int direc; int pass; const int32_t *state; size_t want = 0; size_t need = 0; int ndir = table->info->directive_count; assert(src); for (pass = 0; pass <= ndir; pass++) { state = NULL; if (pass != 0) { /* insert level separator from the previous pass */ if (room) { *xf++ = 1; room--; } want++; } /* special pass for undefined */ if (pass == ndir) { direc = DIRECTIVE_FORWARD | DIRECTIVE_UNDEFINED; } else { direc = table->info->directive[pass]; } t = src; if (direc & DIRECTIVE_BACKWARD) { wchar_t *bp, *fp, c; free(tr); if ((tr = wcsdup(t)) == NULL) { errno = ENOMEM; goto fail; } bp = tr; fp = tr + wcslen(tr) - 1; while (bp < fp) { c = *bp; *bp++ = *fp; *fp-- = c; } t = (const wchar_t *)tr; } if (direc & DIRECTIVE_POSITION) { while (*t || state) { _collate_lookup(table, t, &len, &pri, pass, &state); t += len; if (pri <= 0) { if (pri < 0) { errno = EINVAL; goto fail; } state = NULL; pri = COLLATE_MAX_PRIORITY; } if (room) { *xf++ = pri; room--; } want++; need = want; } } else { while (*t || state) { _collate_lookup(table, t, &len, &pri, pass, &state); t += len; if (pri <= 0) { if (pri < 0) { errno = EINVAL; goto fail; } state = NULL; continue; } if (room) { *xf++ = pri; room--; } want++; need = want; } } } free(tr); return (need); fail: free(tr); return ((size_t)(-1)); } /* * In the non-POSIX case, we transform each character into a string of * characters representing the character's priority. Since char is usually * signed, we are limited by 7 bits per byte. To avoid zero, we need to add * XFRM_OFFSET, so we can't use a full 7 bits. For simplicity, we choose 6 * bits per byte. * * It turns out that we sometimes have real priorities that are * 31-bits wide. (But: be careful using priorities where the high * order bit is set -- i.e. the priority is negative. The sort order * may be surprising!) * * TODO: This would be a good area to optimize somewhat. It turns out * that real prioririties *except for the last UNDEFINED pass* are generally * very small. We need the localedef code to precalculate the max * priority for us, and ideally also give us a mask, and then we could * severely limit what we expand to. */ #define XFRM_BYTES 6 #define XFRM_OFFSET ('0') /* make all printable characters */ #define XFRM_SHIFT 6 #define XFRM_MASK ((1 << XFRM_SHIFT) - 1) #define XFRM_SEP ('.') /* chosen to be less than XFRM_OFFSET */ static int xfrm(struct xlocale_collate *table, unsigned char *p, int pri, int pass) { /* we use unsigned to ensure zero fill on right shift */ uint32_t val = (uint32_t)table->info->pri_count[pass]; int nc = 0; while (val) { *p = (pri & XFRM_MASK) + XFRM_OFFSET; pri >>= XFRM_SHIFT; val >>= XFRM_SHIFT; p++; nc++; } return (nc); } size_t _collate_sxfrm(struct xlocale_collate *table, const wchar_t *src, char *xf, size_t room) { int pri; int len; const wchar_t *t; wchar_t *tr = NULL; int direc; int pass; const int32_t *state; size_t want = 0; size_t need = 0; int b; uint8_t buf[XFRM_BYTES]; int ndir = table->info->directive_count; assert(src); for (pass = 0; pass <= ndir; pass++) { state = NULL; if (pass != 0) { /* insert level separator from the previous pass */ if (room) { *xf++ = XFRM_SEP; room--; } want++; } /* special pass for undefined */ if (pass == ndir) { direc = DIRECTIVE_FORWARD | DIRECTIVE_UNDEFINED; } else { direc = table->info->directive[pass]; } t = src; if (direc & DIRECTIVE_BACKWARD) { wchar_t *bp, *fp, c; free(tr); if ((tr = wcsdup(t)) == NULL) { errno = ENOMEM; goto fail; } bp = tr; fp = tr + wcslen(tr) - 1; while (bp < fp) { c = *bp; *bp++ = *fp; *fp-- = c; } t = (const wchar_t *)tr; } if (direc & DIRECTIVE_POSITION) { while (*t || state) { _collate_lookup(table, t, &len, &pri, pass, &state); t += len; if (pri <= 0) { if (pri < 0) { errno = EINVAL; goto fail; } state = NULL; pri = COLLATE_MAX_PRIORITY; } b = xfrm(table, buf, pri, pass); want += b; if (room) { while (b) { b--; if (room) { *xf++ = buf[b]; room--; } } } need = want; } } else { while (*t || state) { _collate_lookup(table, t, &len, &pri, pass, &state); t += len; if (pri <= 0) { if (pri < 0) { errno = EINVAL; goto fail; } state = NULL; continue; } b = xfrm(table, buf, pri, pass); want += b; if (room) { while (b) { b--; if (room) { *xf++ = buf[b]; room--; } } } need = want; } } } free(tr); return (need); fail: free(tr); return ((size_t)(-1)); } /* * __collate_equiv_value returns the primary collation value for the given * collating symbol specified by str and len. Zero or negative is returned * if the collating symbol was not found. This function is used by bracket * code in the TRE regex library. */ int __collate_equiv_value(locale_t locale, const wchar_t *str, size_t len) { int32_t e; if (len < 1 || len >= COLLATE_STR_LEN) return (-1); FIX_LOCALE(locale); struct xlocale_collate *table = (struct xlocale_collate*)locale->components[XLC_COLLATE]; if (table->__collate_load_error) return ((len == 1 && *str <= UCHAR_MAX) ? *str : -1); if (len == 1) { e = -1; if (*str <= UCHAR_MAX) e = table->char_pri_table[*str].pri[0]; else if (table->info->large_count > 0) { collate_large_t *match_large; match_large = largesearch(table, *str); if (match_large) e = match_large->pri.pri[0]; } if (e == 0) return (1); return (e > 0 ? e : 0); } if (table->info->chain_count > 0) { wchar_t name[COLLATE_STR_LEN]; collate_chain_t *match_chain; int clen; wcsncpy (name, str, len); name[len] = 0; match_chain = chainsearch(table, name, &clen); if (match_chain) { e = match_chain->pri[0]; if (e == 0) return (1); return (e < 0 ? -e : e); } } return (0); } Index: head/lib/libc/locale/collate.h =================================================================== --- head/lib/libc/locale/collate.h (revision 367475) +++ head/lib/libc/locale/collate.h (revision 367476) @@ -1,141 +1,144 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright 2010 Nexenta Systems, Inc. All rights reserved. * Copyright (c) 1995 Alex Tatmanjants * at Electronni Visti IA, Kiev, Ukraine. * All rights reserved. * * Copyright (c) 2011 The FreeBSD Foundation * All rights reserved. * Portions of this software were developed by David Chisnall * under sponsorship from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _COLLATE_H_ #define _COLLATE_H_ #include #include #include #include "xlocale_private.h" /* * Work around buildworld bootstrapping from older systems whose limits.h * sets COLL_WEIGHTS_MAX to 0. */ #if COLL_WEIGHTS_MAX == 0 #undef COLL_WEIGHTS_MAX #define COLL_WEIGHTS_MAX 10 #endif #define COLLATE_STR_LEN 24 /* should be 64-bit multiple */ -#define COLLATE_VERSION "BSD 1.0\n" +#define COLLATE_FMT_VERSION_LEN 12 +#define COLLATE_FMT_VERSION "BSD 1.0\n" + #define COLLATE_MAX_PRIORITY (0x7fffffff) /* max signed value */ #define COLLATE_SUBST_PRIORITY (0x40000000) /* bit indicates subst table */ #define DIRECTIVE_UNDEF 0x00 #define DIRECTIVE_FORWARD 0x01 #define DIRECTIVE_BACKWARD 0x02 #define DIRECTIVE_POSITION 0x04 #define DIRECTIVE_UNDEFINED 0x08 /* special last weight for UNDEFINED */ #define DIRECTIVE_DIRECTION_MASK (DIRECTIVE_FORWARD | DIRECTIVE_BACKWARD) /* * The collate file format is as follows: * - * char version[COLLATE_STR_LEN]; // must be COLLATE_VERSION + * char fmt_version[COLLATE_FMT_VERSION_LEN]; // must be COLLATE_FMT_VERSION + * char def_version[XLOCALE_DEF_VERSION_LEN]; // NUL-terminated, may be empty * collate_info_t info; // see below, includes padding * collate_char_pri_t char_data[256]; // 8 bit char values * collate_subst_t subst[*]; // 0 or more substitutions * collate_chain_pri_t chains[*]; // 0 or more chains * collate_large_pri_t large[*]; // extended char priorities * * Note that all structures must be 32-bit aligned, as each structure * contains 32-bit member fields. The entire file is mmap'd, so its * critical that alignment be observed. It is not generally safe to * use any 64-bit values in the structures. */ typedef struct collate_info { uint8_t directive_count; uint8_t directive[COLL_WEIGHTS_MAX]; int32_t pri_count[COLL_WEIGHTS_MAX]; int32_t flags; int32_t chain_count; int32_t large_count; int32_t subst_count[COLL_WEIGHTS_MAX]; int32_t undef_pri[COLL_WEIGHTS_MAX]; } collate_info_t; typedef struct collate_char { int32_t pri[COLL_WEIGHTS_MAX]; } collate_char_t; typedef struct collate_chain { wchar_t str[COLLATE_STR_LEN]; int32_t pri[COLL_WEIGHTS_MAX]; } collate_chain_t; typedef struct collate_large { int32_t val; collate_char_t pri; } collate_large_t; typedef struct collate_subst { int32_t key; int32_t pri[COLLATE_STR_LEN]; } collate_subst_t; struct xlocale_collate { struct xlocale_component header; int __collate_load_error; char * map; size_t maplen; collate_info_t *info; collate_char_t *char_pri_table; collate_large_t *large_pri_table; collate_chain_t *chain_pri_table; collate_subst_t *subst_table[COLL_WEIGHTS_MAX]; }; __BEGIN_DECLS int __collate_load_tables(const char *); int __collate_equiv_value(locale_t, const wchar_t *, size_t); void _collate_lookup(struct xlocale_collate *,const wchar_t *, int *, int *, int, const int **); int __collate_range_cmp(char, char); int __wcollate_range_cmp(wchar_t, wchar_t); size_t _collate_wxfrm(struct xlocale_collate *, const wchar_t *, wchar_t *, size_t); size_t _collate_sxfrm(struct xlocale_collate *, const wchar_t *, char *, size_t); __END_DECLS #endif /* !_COLLATE_H_ */ Index: head/lib/libc/locale/querylocale.3 =================================================================== --- head/lib/libc/locale/querylocale.3 (revision 367475) +++ head/lib/libc/locale/querylocale.3 (revision 367476) @@ -1,54 +1,74 @@ .\" Copyright (c) 2011 The FreeBSD Foundation .\" All rights reserved. .\" .\" This documentation was written by David Chisnall under sponsorship from .\" the FreeBSD Foundation. .\" .\" Redistribution and use in source and binary forms, with or without .\" modification, are permitted provided that the following conditions .\" are met: .\" 1. Redistributions of source code must retain the above copyright .\" notice, this list of conditions and the following disclaimer. .\" 2. Redistributions in binary form must reproduce the above copyright .\" notice, this list of conditions and the following disclaimer in the .\" documentation and/or other materials provided with the distribution. .\" .\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE .\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" .\" $FreeBSD$ .\" -.Dd May 3, 2013 +.Dd November 8, 2020 .Dt QUERYLOCALE 3 .Os .Sh NAME .Nm querylocale -.Nd Look up the locale name for a specified category +.Nd Look up the locale name or version for a specified category .Sh LIBRARY .Lb libc .Sh SYNOPSIS .In locale.h .Ft const char * .Fn querylocale "int mask" "locale_t locale" .Sh DESCRIPTION -Returns the name of the locale for the category specified by +Returns the name or version of the locale for the category specified by .Fa mask . -This possible values for the mask are the same as those in -.Xr newlocale 3 . -If more than one bit in the mask is set, the returned value is undefined. +The possible values for the mask are the same as those in +.Xr newlocale 3 , +when requesting the locale name. +Specify the bitwise OR of +.Fa LC_VERSION_MASK +and another mask value to request a version string. +Version strings can be compared to detect changes to the locale's definition. +The structure of the version string is unspecified. +Currently, version information is only available for +.Fa LC_COLLATE_MASK , +and an empty string is returned for other categories. +If more than one bit in the mask is set, not counting +.Fa LC_VERSION_MASK , +the returned value is undefined. .Sh SEE ALSO .Xr duplocale 3 , .Xr freelocale 3 , .Xr localeconv 3 , .Xr newlocale 3 , .Xr uselocale 3 , .Xr xlocale 3 +.Sh HISTORY +The +.Fn querylocale +function first appeared in +.Fx 9.1 , +and is based on the function of the same name in Darwin. +.Fa LC_VERSION_MASK +first appeared in +.Fx 13.0 . Index: head/lib/libc/locale/xlocale.c =================================================================== --- head/lib/libc/locale/xlocale.c (revision 367475) +++ head/lib/libc/locale/xlocale.c (revision 367476) @@ -1,373 +1,382 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2011 The FreeBSD Foundation * All rights reserved. * * This software was developed by David Chisnall under sponsorship from * the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include #include #include #include #include "libc_private.h" #include "xlocale_private.h" /** * Each locale loader declares a global component. This is used by setlocale() * and also by xlocale with LC_GLOBAL_LOCALE.. */ extern struct xlocale_component __xlocale_global_collate; extern struct xlocale_component __xlocale_global_ctype; extern struct xlocale_component __xlocale_global_monetary; extern struct xlocale_component __xlocale_global_numeric; extern struct xlocale_component __xlocale_global_time; extern struct xlocale_component __xlocale_global_messages; /* * And another version for the statically-allocated C locale. We only have * components for the parts that are expected to be sensible. */ extern struct xlocale_component __xlocale_C_collate; extern struct xlocale_component __xlocale_C_ctype; #ifndef __NO_TLS /* * The locale for this thread. */ _Thread_local locale_t __thread_locale; #endif /* * Flag indicating that one or more per-thread locales exist. */ int __has_thread_locale; /* * Private functions in setlocale.c. */ const char * __get_locale_env(int category); int __detect_path_locale(void); struct _xlocale __xlocale_global_locale = { {0}, { &__xlocale_global_collate, &__xlocale_global_ctype, &__xlocale_global_monetary, &__xlocale_global_numeric, &__xlocale_global_time, &__xlocale_global_messages }, 1, 0, 1, 0 }; struct _xlocale __xlocale_C_locale = { {0}, { &__xlocale_C_collate, &__xlocale_C_ctype, 0, 0, 0, 0 }, 1, 0, 1, 0 }; static void*(*constructors[])(const char*, locale_t) = { __collate_load, __ctype_load, __monetary_load, __numeric_load, __time_load, __messages_load }; static pthread_key_t locale_info_key; static int fake_tls; static locale_t thread_local_locale; static void init_key(void) { pthread_key_create(&locale_info_key, xlocale_release); pthread_setspecific(locale_info_key, (void*)42); if (pthread_getspecific(locale_info_key) == (void*)42) { pthread_setspecific(locale_info_key, 0); } else { fake_tls = 1; } /* At least one per-thread locale has now been set. */ __has_thread_locale = 1; __detect_path_locale(); } static pthread_once_t once_control = PTHREAD_ONCE_INIT; static locale_t get_thread_locale(void) { _once(&once_control, init_key); return (fake_tls ? thread_local_locale : pthread_getspecific(locale_info_key)); } #ifdef __NO_TLS locale_t __get_locale(void) { locale_t l = get_thread_locale(); return (l ? l : &__xlocale_global_locale); } #endif static void set_thread_locale(locale_t loc) { locale_t l = (loc == LC_GLOBAL_LOCALE) ? 0 : loc; _once(&once_control, init_key); if (NULL != l) { xlocale_retain((struct xlocale_refcounted*)l); } locale_t old = get_thread_locale(); if ((NULL != old) && (l != old)) { xlocale_release((struct xlocale_refcounted*)old); } if (fake_tls) { thread_local_locale = l; } else { pthread_setspecific(locale_info_key, l); } #ifndef __NO_TLS __thread_locale = l; __set_thread_rune_locale(loc); #endif } /** * Clean up a locale, once its reference count reaches zero. This function is * called by xlocale_release(), it should not be called directly. */ static void destruct_locale(void *l) { locale_t loc = l; for (int type=0 ; typecomponents[type]) { xlocale_release(loc->components[type]); } } if (loc->csym) { free(loc->csym); } free(l); } /** * Allocates a new, uninitialised, locale. */ static locale_t alloc_locale(void) { locale_t new = calloc(sizeof(struct _xlocale), 1); new->header.destructor = destruct_locale; new->monetary_locale_changed = 1; new->numeric_locale_changed = 1; return (new); } static void copyflags(locale_t new, locale_t old) { new->using_monetary_locale = old->using_monetary_locale; new->using_numeric_locale = old->using_numeric_locale; new->using_time_locale = old->using_time_locale; new->using_messages_locale = old->using_messages_locale; } static int dupcomponent(int type, locale_t base, locale_t new) { /* Always copy from the global locale, since it has mutable components. */ struct xlocale_component *src = base->components[type]; if (&__xlocale_global_locale == base) { new->components[type] = constructors[type](src->locale, new); if (new->components[type]) { strncpy(new->components[type]->locale, src->locale, ENCODING_LEN); + strncpy(new->components[type]->version, src->version, + XLOCALE_DEF_VERSION_LEN); } } else if (base->components[type]) { new->components[type] = xlocale_retain(base->components[type]); } else { /* If the component was NULL, return success - if base is a * valid locale then the flag indicating that this isn't * present should be set. If it isn't a valid locale, then * we're stuck anyway. */ return 1; } return (0 != new->components[type]); } /* * Public interfaces. These are the five public functions described by the * xlocale interface. */ locale_t newlocale(int mask, const char *locale, locale_t base) { locale_t orig_base; int type; const char *realLocale = locale; int useenv = 0; int success = 1; _once(&once_control, init_key); locale_t new = alloc_locale(); if (NULL == new) { return (NULL); } orig_base = base; FIX_LOCALE(base); copyflags(new, base); if (NULL == locale) { realLocale = "C"; } else if ('\0' == locale[0]) { useenv = 1; } for (type=0 ; typecomponents[type] = constructors[type](realLocale, new); if (new->components[type]) { strncpy(new->components[type]->locale, realLocale, ENCODING_LEN); } else { success = 0; break; } } else { if (!dupcomponent(type, base, new)) { success = 0; break; } } mask >>= 1; } if (0 == success) { xlocale_release(new); new = NULL; } else if (base == orig_base) { xlocale_release(base); } return (new); } locale_t duplocale(locale_t base) { locale_t new = alloc_locale(); int type; _once(&once_control, init_key); if (NULL == new) { return (NULL); } FIX_LOCALE(base); copyflags(new, base); for (type=0 ; type= XLC_LAST) return (NULL); - if (loc->components[type]) - return (loc->components[type]->locale); - return ("C"); + if (mask & LC_VERSION_MASK) { + if (loc->components[type]) + return (loc->components[type]->version); + return (""); + } else { + if (loc->components[type]) + return (loc->components[type]->locale); + return ("C"); + } } /* * Installs the specified locale_t as this thread's locale. */ locale_t uselocale(locale_t loc) { locale_t old = get_thread_locale(); if (NULL != loc) { set_thread_locale(loc); } return (old ? old : LC_GLOBAL_LOCALE); } Index: head/lib/libc/locale/xlocale_private.h =================================================================== --- head/lib/libc/locale/xlocale_private.h (revision 367475) +++ head/lib/libc/locale/xlocale_private.h (revision 367476) @@ -1,226 +1,231 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2011 The FreeBSD Foundation * All rights reserved. * * This software was developed by David Chisnall under sponsorship from * the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _XLOCALE_PRIVATE__H_ #define _XLOCALE_PRIVATE__H_ #include #include #include #include #include #include #include "setlocale.h" /** * The XLC_ values are indexes into the components array. They are defined in * the same order as the LC_ values in locale.h, but without the LC_ALL zero * value. Translating from LC_X to XLC_X is done by subtracting one. * * Any reordering of this enum should ensure that these invariants are not * violated. */ enum { XLC_COLLATE = 0, XLC_CTYPE, XLC_MONETARY, XLC_NUMERIC, XLC_TIME, XLC_MESSAGES, XLC_LAST }; _Static_assert(XLC_LAST - XLC_COLLATE == 6, "XLC values should be contiguous"); _Static_assert(XLC_COLLATE == LC_COLLATE - 1, "XLC_COLLATE doesn't match the LC_COLLATE value."); _Static_assert(XLC_CTYPE == LC_CTYPE - 1, "XLC_CTYPE doesn't match the LC_CTYPE value."); _Static_assert(XLC_MONETARY == LC_MONETARY - 1, "XLC_MONETARY doesn't match the LC_MONETARY value."); _Static_assert(XLC_NUMERIC == LC_NUMERIC - 1, "XLC_NUMERIC doesn't match the LC_NUMERIC value."); _Static_assert(XLC_TIME == LC_TIME - 1, "XLC_TIME doesn't match the LC_TIME value."); _Static_assert(XLC_MESSAGES == LC_MESSAGES - 1, "XLC_MESSAGES doesn't match the LC_MESSAGES value."); /** * Header used for objects that are reference counted. Objects may optionally * have a destructor associated, which is responsible for destroying the * structure. Global / static versions of the structure should have no * destructor set - they can then have their reference counts manipulated as * normal, but will not do anything with them. * * The header stores a retain count - objects are assumed to have a reference * count of 1 when they are created, but the retain count is 0. When the * retain count is less than 0, they are freed. */ struct xlocale_refcounted { /** Number of references to this component. */ long retain_count; /** Function used to destroy this component, if one is required*/ void(*destructor)(void*); }; + +#define XLOCALE_DEF_VERSION_LEN 12 + /** * Header for a locale component. All locale components must begin with this * header. */ struct xlocale_component { struct xlocale_refcounted header; /** Name of the locale used for this component. */ char locale[ENCODING_LEN+1]; + /** Version of the definition for this component. */ + char version[XLOCALE_DEF_VERSION_LEN]; }; /** * xlocale structure, stores per-thread locale information. */ struct _xlocale { struct xlocale_refcounted header; /** Components for the locale. */ struct xlocale_component *components[XLC_LAST]; /** Flag indicating if components[XLC_MONETARY] has changed since the * last call to localeconv_l() with this locale. */ int monetary_locale_changed; /** Flag indicating whether this locale is actually using a locale for * LC_MONETARY (1), or if it should use the C default instead (0). */ int using_monetary_locale; /** Flag indicating if components[XLC_NUMERIC] has changed since the * last call to localeconv_l() with this locale. */ int numeric_locale_changed; /** Flag indicating whether this locale is actually using a locale for * LC_NUMERIC (1), or if it should use the C default instead (0). */ int using_numeric_locale; /** Flag indicating whether this locale is actually using a locale for * LC_TIME (1), or if it should use the C default instead (0). */ int using_time_locale; /** Flag indicating whether this locale is actually using a locale for * LC_MESSAGES (1), or if it should use the C default instead (0). */ int using_messages_locale; /** The structure to be returned from localeconv_l() for this locale. */ struct lconv lconv; /** Buffer used by nl_langinfo_l() */ char *csym; }; /** * Increments the reference count of a reference-counted structure. */ __attribute__((unused)) static void* xlocale_retain(void *val) { struct xlocale_refcounted *obj = val; atomic_add_long(&(obj->retain_count), 1); return (val); } /** * Decrements the reference count of a reference-counted structure, freeing it * if this is the last reference, calling its destructor if it has one. */ __attribute__((unused)) static void xlocale_release(void *val) { struct xlocale_refcounted *obj = val; long count; count = atomic_fetchadd_long(&(obj->retain_count), -1) - 1; if (count < 0 && obj->destructor != NULL) obj->destructor(obj); } /** * Load functions. Each takes the name of a locale and a pointer to the data * to be initialised as arguments. Two special values are allowed for the */ extern void* __collate_load(const char*, locale_t); extern void* __ctype_load(const char*, locale_t); extern void* __messages_load(const char*, locale_t); extern void* __monetary_load(const char*, locale_t); extern void* __numeric_load(const char*, locale_t); extern void* __time_load(const char*, locale_t); extern struct _xlocale __xlocale_global_locale; extern struct _xlocale __xlocale_C_locale; /** * Caches the rune table in TLS for fast access. */ void __set_thread_rune_locale(locale_t loc); /** * Flag indicating whether a per-thread locale has been set. If no per-thread * locale has ever been set, then we always use the global locale. */ extern int __has_thread_locale; #ifndef __NO_TLS /** * The per-thread locale. Avoids the need to use pthread lookup functions when * getting the per-thread locale. */ extern _Thread_local locale_t __thread_locale; /** * Returns the current locale for this thread, or the global locale if none is * set. The caller does not have to free the locale. The return value from * this call is not guaranteed to remain valid after the locale changes. As * such, this should only be called within libc functions. */ static inline locale_t __get_locale(void) { if (!__has_thread_locale) { return (&__xlocale_global_locale); } return (__thread_locale ? __thread_locale : &__xlocale_global_locale); } #else locale_t __get_locale(void); #endif /** * Two magic values are allowed for locale_t objects. NULL and -1. This * function maps those to the real locales that they represent. */ static inline locale_t get_real_locale(locale_t locale) { switch ((intptr_t)locale) { case 0: return (&__xlocale_C_locale); case -1: return (&__xlocale_global_locale); default: return (locale); } } /** * Replace a placeholder locale with the real global or thread-local locale_t. */ #define FIX_LOCALE(l) (l = get_real_locale(l)) #endif Index: head/share/colldef/Makefile =================================================================== --- head/share/colldef/Makefile (revision 367475) +++ head/share/colldef/Makefile (revision 367476) @@ -1,234 +1,238 @@ # $FreeBSD$ # Warning: Do not edit. This file is automatically generated from the # tools in /usr/src/tools/tools/locale. LOCALEDIR= ${SHAREDIR}/locale FILESNAME= LC_COLLATE .SUFFIXES: .src .LC_COLLATE MAPLOC= ${.CURDIR}/../../tools/tools/locale/etc/final-maps +CLDR_VERSION= "34.0" + .include .src.LC_COLLATE: localedef ${LOCALEDEF_ENDIAN} -D -U -i ${.IMPSRC} \ + -V ${CLDR_VERSION} \ -f ${MAPLOC}/map.${.TARGET:T:R:E:C/@.*//} ${.OBJDIR}/${.IMPSRC:T:R} LOCALES+= af_ZA.UTF-8 LOCALES+= am_ET.UTF-8 LOCALES+= ar_SA.UTF-8 LOCALES+= be_BY.UTF-8 LOCALES+= ca_AD.UTF-8 LOCALES+= cs_CZ.UTF-8 LOCALES+= da_DK.UTF-8 LOCALES+= el_GR.UTF-8 LOCALES+= en_US.UTF-8 LOCALES+= es_MX.UTF-8 LOCALES+= et_EE.UTF-8 LOCALES+= fi_FI.UTF-8 LOCALES+= fr_CA.UTF-8 LOCALES+= he_IL.UTF-8 LOCALES+= hi_IN.UTF-8 LOCALES+= hu_HU.UTF-8 LOCALES+= hy_AM.UTF-8 LOCALES+= is_IS.UTF-8 LOCALES+= ja_JP.UTF-8 LOCALES+= kk_KZ.UTF-8 LOCALES+= ko_KR.UTF-8 LOCALES+= lt_LT.UTF-8 LOCALES+= lv_LV.UTF-8 LOCALES+= nn_NO.UTF-8 LOCALES+= pl_PL.UTF-8 LOCALES+= ro_RO.UTF-8 LOCALES+= ru_RU.UTF-8 LOCALES+= se_NO.UTF-8 LOCALES+= sk_SK.UTF-8 LOCALES+= sl_SI.UTF-8 LOCALES+= sr_RS.UTF-8 LOCALES+= sr_RS.UTF-8@latin LOCALES+= sv_SE.UTF-8 LOCALES+= tr_TR.UTF-8 LOCALES+= uk_UA.UTF-8 LOCALES+= zh_CN.UTF-8 LOCALES+= zh_TW.UTF-8 LOCALES_MAPPED+= af_ZA.UTF-8 af_ZA.ISO8859-15 LOCALES_MAPPED+= af_ZA.UTF-8 af_ZA.ISO8859-1 LOCALES_MAPPED+= be_BY.UTF-8 be_BY.ISO8859-5 LOCALES_MAPPED+= be_BY.UTF-8 be_BY.CP1251 LOCALES_MAPPED+= be_BY.UTF-8 be_BY.CP1131 LOCALES_MAPPED+= ru_RU.UTF-8 ru_RU.KOI8-R LOCALES_MAPPED+= ru_RU.UTF-8 ru_RU.ISO8859-5 LOCALES_MAPPED+= ru_RU.UTF-8 ru_RU.CP866 LOCALES_MAPPED+= ru_RU.UTF-8 ru_RU.CP1251 LOCALES_MAPPED+= ru_RU.UTF-8 bg_BG.CP1251 LOCALES_MAPPED+= ca_AD.UTF-8 ca_IT.ISO8859-15 LOCALES_MAPPED+= ca_AD.UTF-8 ca_IT.ISO8859-1 LOCALES_MAPPED+= ca_AD.UTF-8 ca_FR.ISO8859-15 LOCALES_MAPPED+= ca_AD.UTF-8 ca_FR.ISO8859-1 LOCALES_MAPPED+= ca_AD.UTF-8 ca_ES.ISO8859-15 LOCALES_MAPPED+= ca_AD.UTF-8 ca_ES.ISO8859-1 LOCALES_MAPPED+= ca_AD.UTF-8 ca_AD.ISO8859-15 LOCALES_MAPPED+= ca_AD.UTF-8 ca_AD.ISO8859-1 LOCALES_MAPPED+= en_US.UTF-8 pt_PT.ISO8859-15 LOCALES_MAPPED+= en_US.UTF-8 pt_PT.ISO8859-1 LOCALES_MAPPED+= en_US.UTF-8 pt_BR.ISO8859-1 LOCALES_MAPPED+= en_US.UTF-8 nl_NL.ISO8859-15 LOCALES_MAPPED+= en_US.UTF-8 nl_NL.ISO8859-1 LOCALES_MAPPED+= en_US.UTF-8 nl_BE.ISO8859-15 LOCALES_MAPPED+= en_US.UTF-8 nl_BE.ISO8859-1 LOCALES_MAPPED+= en_US.UTF-8 it_IT.ISO8859-15 LOCALES_MAPPED+= en_US.UTF-8 it_IT.ISO8859-1 LOCALES_MAPPED+= en_US.UTF-8 it_CH.ISO8859-15 LOCALES_MAPPED+= en_US.UTF-8 it_CH.ISO8859-1 LOCALES_MAPPED+= en_US.UTF-8 fr_FR.ISO8859-15 LOCALES_MAPPED+= en_US.UTF-8 fr_FR.ISO8859-1 LOCALES_MAPPED+= en_US.UTF-8 fr_CH.ISO8859-15 LOCALES_MAPPED+= en_US.UTF-8 fr_CH.ISO8859-1 LOCALES_MAPPED+= en_US.UTF-8 fr_BE.ISO8859-15 LOCALES_MAPPED+= en_US.UTF-8 fr_BE.ISO8859-1 LOCALES_MAPPED+= en_US.UTF-8 eu_ES.ISO8859-15 LOCALES_MAPPED+= en_US.UTF-8 eu_ES.ISO8859-1 LOCALES_MAPPED+= en_US.UTF-8 en_ZA.US-ASCII LOCALES_MAPPED+= en_US.UTF-8 en_ZA.ISO8859-15 LOCALES_MAPPED+= en_US.UTF-8 en_ZA.ISO8859-1 LOCALES_MAPPED+= en_US.UTF-8 en_US.US-ASCII LOCALES_MAPPED+= en_US.UTF-8 en_US.ISO8859-15 LOCALES_MAPPED+= en_US.UTF-8 en_US.ISO8859-1 LOCALES_MAPPED+= en_US.UTF-8 en_SG.ISO8859-1 LOCALES_MAPPED+= en_US.UTF-8 en_NZ.US-ASCII LOCALES_MAPPED+= en_US.UTF-8 en_NZ.ISO8859-15 LOCALES_MAPPED+= en_US.UTF-8 en_NZ.ISO8859-1 LOCALES_MAPPED+= en_US.UTF-8 en_IE.ISO8859-15 LOCALES_MAPPED+= en_US.UTF-8 en_IE.ISO8859-1 LOCALES_MAPPED+= en_US.UTF-8 en_HK.ISO8859-1 LOCALES_MAPPED+= en_US.UTF-8 en_GB.US-ASCII LOCALES_MAPPED+= en_US.UTF-8 en_GB.ISO8859-15 LOCALES_MAPPED+= en_US.UTF-8 en_GB.ISO8859-1 LOCALES_MAPPED+= en_US.UTF-8 en_CA.US-ASCII LOCALES_MAPPED+= en_US.UTF-8 en_CA.ISO8859-15 LOCALES_MAPPED+= en_US.UTF-8 en_CA.ISO8859-1 LOCALES_MAPPED+= en_US.UTF-8 en_AU.US-ASCII LOCALES_MAPPED+= en_US.UTF-8 en_AU.ISO8859-15 LOCALES_MAPPED+= en_US.UTF-8 en_AU.ISO8859-1 LOCALES_MAPPED+= en_US.UTF-8 de_DE.ISO8859-15 LOCALES_MAPPED+= en_US.UTF-8 de_DE.ISO8859-1 LOCALES_MAPPED+= en_US.UTF-8 de_CH.ISO8859-15 LOCALES_MAPPED+= en_US.UTF-8 de_CH.ISO8859-1 LOCALES_MAPPED+= en_US.UTF-8 de_AT.ISO8859-15 LOCALES_MAPPED+= en_US.UTF-8 de_AT.ISO8859-1 LOCALES_MAPPED+= el_GR.UTF-8 el_GR.ISO8859-7 LOCALES_MAPPED+= es_MX.UTF-8 es_MX.ISO8859-1 LOCALES_MAPPED+= es_MX.UTF-8 es_ES.ISO8859-15 LOCALES_MAPPED+= es_MX.UTF-8 es_ES.ISO8859-1 LOCALES_MAPPED+= es_MX.UTF-8 es_AR.ISO8859-1 LOCALES_MAPPED+= et_EE.UTF-8 et_EE.ISO8859-15 LOCALES_MAPPED+= et_EE.UTF-8 et_EE.ISO8859-1 LOCALES_MAPPED+= fi_FI.UTF-8 fi_FI.ISO8859-15 LOCALES_MAPPED+= fi_FI.UTF-8 fi_FI.ISO8859-1 LOCALES_MAPPED+= fr_CA.UTF-8 fr_CA.ISO8859-15 LOCALES_MAPPED+= fr_CA.UTF-8 fr_CA.ISO8859-1 LOCALES_MAPPED+= hi_IN.UTF-8 hi_IN.ISCII-DEV LOCALES_MAPPED+= sr_RS.UTF-8@latin hr_HR.UTF-8 LOCALES_MAPPED+= hy_AM.UTF-8 hy_AM.ARMSCII-8 LOCALES_MAPPED+= is_IS.UTF-8 is_IS.ISO8859-15 LOCALES_MAPPED+= is_IS.UTF-8 is_IS.ISO8859-1 LOCALES_MAPPED+= ja_JP.UTF-8 ja_JP.SJIS LOCALES_MAPPED+= ko_KR.UTF-8 ko_KR.eucKR LOCALES_MAPPED+= lt_LT.UTF-8 lt_LT.ISO8859-13 LOCALES_MAPPED+= lv_LV.UTF-8 lv_LV.ISO8859-13 LOCALES_MAPPED+= pl_PL.UTF-8 pl_PL.ISO8859-2 LOCALES_MAPPED+= ro_RO.UTF-8 ro_RO.ISO8859-2 LOCALES_MAPPED+= sl_SI.UTF-8 sl_SI.ISO8859-2 LOCALES_MAPPED+= sv_SE.UTF-8 sv_SE.ISO8859-15 LOCALES_MAPPED+= sv_SE.UTF-8 sv_SE.ISO8859-1 LOCALES_MAPPED+= sv_SE.UTF-8 sv_FI.ISO8859-15 LOCALES_MAPPED+= sv_SE.UTF-8 sv_FI.ISO8859-1 LOCALES_MAPPED+= tr_TR.UTF-8 tr_TR.ISO8859-9 LOCALES_MAPPED+= uk_UA.UTF-8 uk_UA.KOI8-U LOCALES_MAPPED+= uk_UA.UTF-8 uk_UA.ISO8859-5 LOCALES_MAPPED+= uk_UA.UTF-8 uk_UA.CP1251 LOCALES+= cs_CZ.ISO8859-2 LOCALES+= da_DK.ISO8859-1 LOCALES+= da_DK.ISO8859-15 LOCALES+= hr_HR.ISO8859-2 LOCALES+= hu_HU.ISO8859-2 LOCALES+= nb_NO.ISO8859-1 LOCALES+= nb_NO.ISO8859-15 LOCALES+= sk_SK.ISO8859-2 LOCALES+= sr_RS.ISO8859-2 LOCALES+= sr_RS.ISO8859-5 LOCALES+= zh_CN.GB2312 LOCALES+= zh_CN.eucCN LOCALES+= zh_TW.Big5 LOCALES+= zh_CN.GB18030 LOCALES+= zh_CN.GBK LOCALES+= ja_JP.eucJP LOCALES+= nn_NO.ISO8859-15 LOCALES+= nn_NO.ISO8859-1 SAME+= ar_SA.UTF-8 ar_QA.UTF-8 SAME+= ar_SA.UTF-8 ar_MA.UTF-8 SAME+= ar_SA.UTF-8 ar_JO.UTF-8 SAME+= ar_SA.UTF-8 ar_EG.UTF-8 SAME+= ar_SA.UTF-8 ar_AE.UTF-8 SAME+= ru_RU.UTF-8 mn_MN.UTF-8 SAME+= ru_RU.UTF-8 bg_BG.UTF-8 SAME+= ca_AD.UTF-8 ca_IT.UTF-8 SAME+= ca_AD.UTF-8 ca_FR.UTF-8 SAME+= ca_AD.UTF-8 ca_ES.UTF-8 SAME+= en_US.UTF-8 pt_PT.UTF-8 SAME+= en_US.UTF-8 pt_BR.UTF-8 SAME+= en_US.UTF-8 nl_NL.UTF-8 SAME+= en_US.UTF-8 nl_BE.UTF-8 SAME+= en_US.UTF-8 it_IT.UTF-8 SAME+= en_US.UTF-8 it_CH.UTF-8 SAME+= en_US.UTF-8 ga_IE.UTF-8 SAME+= en_US.UTF-8 fr_FR.UTF-8 SAME+= en_US.UTF-8 fr_CH.UTF-8 SAME+= en_US.UTF-8 fr_BE.UTF-8 SAME+= en_US.UTF-8 eu_ES.UTF-8 SAME+= en_US.UTF-8 en_ZA.UTF-8 SAME+= en_US.UTF-8 en_SG.UTF-8 SAME+= en_US.UTF-8 en_PH.UTF-8 SAME+= en_US.UTF-8 en_NZ.UTF-8 SAME+= en_US.UTF-8 en_IE.UTF-8 SAME+= en_US.UTF-8 en_HK.UTF-8 SAME+= en_US.UTF-8 en_GB.UTF-8 SAME+= en_US.UTF-8 en_CA.UTF-8 SAME+= en_US.UTF-8 en_AU.UTF-8 SAME+= en_US.UTF-8 de_DE.UTF-8 SAME+= en_US.UTF-8 de_CH.UTF-8 SAME+= en_US.UTF-8 de_AT.UTF-8 SAME+= es_MX.UTF-8 es_ES.UTF-8 SAME+= es_MX.UTF-8 es_CR.UTF-8 SAME+= es_MX.UTF-8 es_AR.UTF-8 SAME+= nn_NO.UTF-8 nb_NO.UTF-8 SAME+= se_NO.UTF-8 se_FI.UTF-8 SAME+= sv_SE.UTF-8 sv_FI.UTF-8 SAME+= zh_TW.UTF-8 zh_HK.UTF-8 SAME+= ko_KR.eucKR ko_KR.CP949 # legacy (same charset) FILES= ${LOCALES:S/$/.LC_COLLATE/} CLEANFILES= ${FILES} .for f t in ${SAME} SYMLINKS+= ../$f/${FILESNAME} \ ${LOCALEDIR}/$t/${FILESNAME} .endfor .for f in ${LOCALES} FILESDIR_${f}.LC_COLLATE= ${LOCALEDIR}/${f} .endfor .for f t in ${LOCALES_MAPPED} FILES+= $t.LC_COLLATE FILESDIR_$t.LC_COLLATE= ${LOCALEDIR}/$t $t.LC_COLLATE: ${.CURDIR}/$f.src localedef ${LOCALEDEF_ENDIAN} -D -U -i ${.ALLSRC} \ + -V ${CLDR_VERSION} \ -f ${MAPLOC}/map.${.TARGET:T:R:E:C/@.*//} \ ${.OBJDIR}/${.TARGET:T:R} .endfor .include Index: head/tools/tools/locale/Makefile =================================================================== --- head/tools/tools/locale/Makefile (revision 367475) +++ head/tools/tools/locale/Makefile (revision 367476) @@ -1,240 +1,242 @@ # $FreeBSD$ # See https://wiki.freebsd.org/LocaleNewApproach # Taken from FreeBSD svn [base]/user/edwin/locale/cldr # # needs: # devel/p5-Tie-IxHash # # Modified by John Marino to suit DragonFly needs # .if ${.CURDIR} == ${.OBJDIR} .error Do make obj first. .endif LOCALESRCDIR?= ${DESTDIR}/usr/src/share TMPDIR?= /tmp BASEDIR= ${.CURDIR} ETCDIR= ${BASEDIR}/etc TOOLSDIR= ${BASEDIR}/tools PATCHDIR= ${BASEDIR}/patch UNIDIR= ${.OBJDIR:tA}/unicode PKGS= openjdk8 \ apache-ant \ p5-XML-Parser \ p5-Tie-IxHash \ p5-Text-Iconv tools-test: pkg info -e ${PKGS} @echo tools ok. KNOWN= monetdef numericdef msgdef colldef ctypedef # timedef TYPES?= ${KNOWN} COLLATION_SPECIAL?= \ cs_CZ ISO8859-2 \ da_DK ISO8859-1 \ da_DK ISO8859-15 \ hr_HR ISO8859-2 \ hu_HU ISO8859-2 \ nb_NO ISO8859-1 \ nb_NO ISO8859-15 \ sk_SK ISO8859-2 \ sr_Latn_RS ISO8859-2 \ sr_Cyrl_RS ISO8859-5 \ zh_Hans_CN GB2312 \ zh_Hans_CN eucCN \ zh_Hant_TW Big5 \ zh_Hans_CN GB18030 \ zh_Hans_CN GBK \ ja_JP eucJP \ nn_NO ISO8859-15 \ nn_NO ISO8859-1 .for area enc in ${COLLATION_SPECIAL} COLLATIONS_SPECIAL_ENV+= ${area}.${enc} .endfor SETENV= env -i \ PATH="${PATH}" \ TMPDIR="${TMPDIR}" \ COLLATIONS_SPECIAL="${COLLATIONS_SPECIAL_ENV}" \ UNIDIR="${UNIDIR}" \ BASEDIR="${BASEDIR}" \ TOOLSDIR="${TOOLSDIR}" \ ETCDIR="${ETCDIR}" all: posix build afterbuild .ORDER: posix build afterbuild afterbuild: build @echo "" @find . -name *failed .for t in ${TYPES} . if ${KNOWN:M${t}} build: build-${t} .ORDER: build-${t} afterbuild . endif .endfor diff: .for t in ${TYPES} . if ${KNOWN:M${t}} diff: diff-${t} diff-${t}: -/usr/bin/diff -ruN -x Makefile -x Makefile.depend \ ${LOCALESRCDIR}/${t} ${t} . endif .endfor install: .for t in ${TYPES} . if ${KNOWN:M${t}} install: install-${t} install-${t}: cd ${LOCALESRCDIR}/${t} && \ rm -f Makefile *.src && \ install -c ${t}/* ${LOCALESRCDIR}/${t} . endif .endfor post-install: .for t in ${TYPES} . if ${KNOWN:M${t}} cd ${LOCALSRCDIR}/${t} && \ make && make install && make clean . endif .endfor .for t in ${TYPES} CLEANDIRS+= ${t} ${t}.draft ${t}: mkdir -p ${t} ${t}.draft && \ perl -I ${TOOLSDIR} ${TOOLSDIR}/cldr2def.pl \ --unidir=${UNIDIR:tA} \ --etc=${ETCDIR:tA} \ --type=${t} build-${t}: ${t} ${SETENV} OUTBASEDIR="${.OBJDIR}/${t}" ${TOOLSDIR}/finalize ${t} .endfor static-colldef: colldef build-colldef: static-colldef static-colldef: .for area enc in ${COLLATION_SPECIAL} colldef.draft/${area}.${enc}.src: posix/${area}.${enc}.src awk -f ${TOOLSDIR}/extract-colldef.awk \ ${.ALLSRC} > ${.TARGET} || (rm -f ${.TARGET} && false) .endfor BASE_LOCALES_OF_INTEREST?= \ af_ZA am_ET ar_AE ar_EG ar_JO ar_MA ar_QA ar_SA \ be_BY bg_BG ca_AD ca_ES ca_FR ca_IT \ cs_CZ da_DK de_AT de_CH de_DE el_GR en_AU en_CA \ en_GB en_HK en_IE en_NZ en_PH en_SG en_US en_ZA \ es_AR es_CR es_ES es_MX et_EE eu_ES fi_FI fr_BE \ fr_CA fr_CH fr_FR ga_IE he_IL hi_IN hr_HR hu_HU hy_AM \ is_IS it_CH it_IT ja_JP ko_KR lt_LT lv_LV \ nb_NO nl_BE nl_NL nn_NO pl_PL pt_BR pt_PT ro_RO \ ru_RU se_FI se_NO sk_SK sl_SI sv_FI sv_SE tr_TR \ uk_UA \ kk_KZ mn_MN sr_Cyrl_RS sr_Latn_RS \ zh_Hans_CN zh_Hant_HK zh_Hant_TW \ bn_IN gu_IN or_IN ta_IN te_IN kn_IN ml_IN si_LK \ th_TH lo_LA bo_IN my_MM pa_Guru_IN ka_GE chr_US \ km_KH shi_Tfng_MA ii_CN vai_Vaii_LR vi_VN ENCODINGS= Big5 \ CP1251 \ CP866 \ CP949 \ eucCN \ eucJP \ eucKR \ GB18030 \ GB2312 \ GBK \ ISO8859-1 \ ISO8859-13 \ ISO8859-15 \ ISO8859-2 \ ISO8859-5 \ ISO8859-7 \ ISO8859-9 \ KOI8-R \ KOI8-U \ SJIS \ US-ASCII \ UTF-8 # CLDR files CLDRFILES_CORE= https://unicode.org/Public/cldr/35/core.zip CLDRFILES_KEY= https://unicode.org/Public/cldr/35/keyboards.zip CLDRFILES_TOOLS=https://unicode.org/Public/cldr/35/tools.zip CLDRFILES_UCD= http://www.unicode.org/Public/zipped/latest/UCD.zip # fetch and extract targets ${UNIDIR}: mkdir -p ${UNIDIR} .for N in CORE KEY TOOLS UCD ${CLDRFILES_${N}:T}: fetch ${CLDRFILES_${N}} fetch: ${CLDRFILES_${N}:T} extract-${CLDRFILES_${N}:T}:: ${CLDRFILES_${N}:T} ${UNIDIR} cd ${UNIDIR} && unzip -o ../${CLDRFILES_${N}:T} extract: extract-${CLDRFILES_${N}:T} .endfor + grep 'name="version"' ${UNIDIR}/tools/build.xml | \ + sed 's/.* value="//;s/".*//' > ${UNIDIR}/cldr-version patch:: .if exists(${PATCHDIR}) cd ${UNIDIR} && cat ${PATCHDIR}/patch-* | patch .endif .if !exists(${UNIDIR}/tools/java/cldr.jar) .ORDER: extract patch build-tools: extract patch tools-test ${UNIDIR} cd ${UNIDIR}/tools/java && ${SETENV} ant all jar .else build-tools: @echo cldr.jar is ready. .endif JAVA_CLDR= java -DCLDR_DIR=${UNIDIR:Q} -jar ${UNIDIR}/tools/java/cldr.jar posix: posixcm post-posixcm posixsrc posixcol .ORDER: posixcm post-posixcm posixsrc posixcol ${UNIDIR}/posix: ln -s -f ../posix ${.TARGET} clean-posix: rm -rf posix ${UNIDIR}/posix post-posixcm: ${UNIDIR}/posix perl -I ${TOOLSDIR} ${TOOLSDIR}/utf8-rollup.pl \ --unidir=${UNIDIR} .for enc in ${ENCODINGS} posixcm: build-tools posix/${enc}.cm .ORDER: build-tools posix/${enc}.cm posix/${enc}.cm: mkdir -p posix && \ ${JAVA_CLDR} org.unicode.cldr.posix.GenerateCharmap \ -d posix -c ${enc} .endfor .for area in ${BASE_LOCALES_OF_INTEREST} posixsrc: build-tools posix/${area}.UTF-8.src .ORDER: build-tools posix/${area}.UTF-8.src posix/${area}.UTF-8.src: mkdir -p posix && \ ${JAVA_CLDR} org.unicode.cldr.posix.GeneratePOSIX \ -d posix -m ${area} -c UTF-8 .endfor .for area encoding in ${COLLATION_SPECIAL} posixcol: build-tools posix/${area}.${encoding}.src .ORDER: build-tools posix/${area}.${encoding}.src posix/${area}.${encoding}.src: mkdir -p posix && \ ${JAVA_CLDR} org.unicode.cldr.posix.GeneratePOSIX \ -d posix -m ${area} -c ${encoding} .endfor .include Index: head/tools/tools/locale/tools/cldr2def.pl =================================================================== --- head/tools/tools/locale/tools/cldr2def.pl (revision 367475) +++ head/tools/tools/locale/tools/cldr2def.pl (revision 367476) @@ -1,1049 +1,1068 @@ #!/usr/local/bin/perl -wC # SPDX-License-Identifier: BSD-2-Clause-FreeBSD # # Copyright 2009 Edwin Groothuis # Copyright 2015 John Marino # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # 1. Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # 2. Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # # THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE # ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS # OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY # OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF # SUCH DAMAGE. # # $FreeBSD$ use strict; use File::Copy; use XML::Parser; use Tie::IxHash; use Text::Iconv; #use Data::Dumper; use Getopt::Long; use Digest::SHA qw(sha1_hex); require "charmaps.pm"; if ($#ARGV < 2) { print "Usage: $0 --unidir= --etc= --type=\n"; exit(1); } my $DEFENCODING = "UTF-8"; my $UNIDIR = undef; my $ETCDIR = undef; my $TYPE = undef; +my $CLDR_VERSION = undef; + my $result = GetOptions ( "unidir=s" => \$UNIDIR, "etc=s" => \$ETCDIR, "type=s" => \$TYPE, ); my %convertors = (); my %ucd = (); my %values = (); my %hashtable = (); my %languages = (); my %translations = (); my %encodings = (); my %alternativemonths = (); get_languages(); my %utf8map = (); my %utf8aliases = (); get_unidata($UNIDIR); get_utf8map("$UNIDIR/posix/$DEFENCODING.cm"); get_encodings("$ETCDIR/charmaps"); my %keys = (); tie(%keys, "Tie::IxHash"); tie(%hashtable, "Tie::IxHash"); my %FILESNAMES = ( "monetdef" => "LC_MONETARY", "timedef" => "LC_TIME", "msgdef" => "LC_MESSAGES", "numericdef" => "LC_NUMERIC", "colldef" => "LC_COLLATE", "ctypedef" => "LC_CTYPE" ); my %callback = ( mdorder => \&callback_mdorder, altmon => \&callback_altmon, cformat => \&callback_cformat, dformat => \&callback_dformat, dtformat => \&callback_dtformat, cbabmon => \&callback_abmon, cbampm => \&callback_ampm, data => undef, ); my %DESC = ( # numericdef "decimal_point" => "decimal_point", "thousands_sep" => "thousands_sep", "grouping" => "grouping", # monetdef "int_curr_symbol" => "int_curr_symbol (last character always " . "SPACE)", "currency_symbol" => "currency_symbol", "mon_decimal_point" => "mon_decimal_point", "mon_thousands_sep" => "mon_thousands_sep", "mon_grouping" => "mon_grouping", "positive_sign" => "positive_sign", "negative_sign" => "negative_sign", "int_frac_digits" => "int_frac_digits", "frac_digits" => "frac_digits", "p_cs_precedes" => "p_cs_precedes", "p_sep_by_space" => "p_sep_by_space", "n_cs_precedes" => "n_cs_precedes", "n_sep_by_space" => "n_sep_by_space", "p_sign_posn" => "p_sign_posn", "n_sign_posn" => "n_sign_posn", # msgdef "yesexpr" => "yesexpr", "noexpr" => "noexpr", "yesstr" => "yesstr", "nostr" => "nostr", # timedef "abmon" => "Short month names", "mon" => "Long month names (as in a date)", "abday" => "Short weekday names", "day" => "Long weekday names", "t_fmt" => "X_fmt", "d_fmt" => "x_fmt", "c_fmt" => "c_fmt", "am_pm" => "AM/PM", "d_t_fmt" => "date_fmt", "altmon" => "Long month names (without case ending)", "md_order" => "md_order", "t_fmt_ampm" => "ampm_fmt", ); if ($TYPE eq "colldef") { transform_collation(); make_makefile(); } if ($TYPE eq "ctypedef") { transform_ctypes(); make_makefile(); } if ($TYPE eq "numericdef") { %keys = ( "decimal_point" => "s", "thousands_sep" => "s", "grouping" => "ai", ); get_fields(); print_fields(); make_makefile(); } if ($TYPE eq "monetdef") { %keys = ( "int_curr_symbol" => "s", "currency_symbol" => "s", "mon_decimal_point" => "s", "mon_thousands_sep" => "s", "mon_grouping" => "ai", "positive_sign" => "s", "negative_sign" => "s", "int_frac_digits" => "i", "frac_digits" => "i", "p_cs_precedes" => "i", "p_sep_by_space" => "i", "n_cs_precedes" => "i", "n_sep_by_space" => "i", "p_sign_posn" => "i", "n_sign_posn" => "i" ); get_fields(); print_fields(); make_makefile(); } if ($TYPE eq "msgdef") { %keys = ( "yesexpr" => "s", "noexpr" => "s", "yesstr" => "s", "nostr" => "s" ); get_fields(); print_fields(); make_makefile(); } if ($TYPE eq "timedef") { %keys = ( "abmon" => " "as", "abday" => "as", "day" => "as", "t_fmt" => "s", "d_fmt" => " " " " " " "s", ); get_fields(); print_fields(); make_makefile(); } sub callback_ampm { my $s = shift; my $nl = $callback{data}{l} . "_" . $callback{data}{c}; my $enc = $callback{data}{e}; if ($nl eq 'ru_RU') { if ($enc eq 'UTF-8') { $s = 'дп;пп'; } else { my $converter = Text::Iconv->new("utf-8", "$enc"); $s = $converter->convert("дп;пп"); } } return $s; } sub callback_cformat { my $s = shift; my $nl = $callback{data}{l} . "_" . $callback{data}{c}; if ($nl eq 'ko_KR') { $s =~ s/(> )(%p)/$1%A $2/; } $s =~ s/\.,/\./; $s =~ s/ %Z//; $s =~ s/ %z//; $s =~ s/^"%e\./%A %e/; $s =~ s/^"(%B %e, )/"%A, $1/; $s =~ s/^"(%e %B )/"%A $1/; return $s; }; sub callback_dformat { my $s = shift; $s =~ s/(%m(|[-.]))%e/$1%d/; $s =~ s/%e((|[-.])%m)/%d$1/; return $s; }; sub callback_dtformat { my $s = shift; my $nl = $callback{data}{l} . "_" . $callback{data}{c}; if ($nl eq 'ja_JP') { $s =~ s/(> )(%H)/$1%A $2/; } elsif ($nl eq 'ko_KR' || $nl eq 'zh_CN' || $nl eq 'zh_TW') { if ($nl ne 'ko_KR') { $s =~ s/%m/%_m/; } $s =~ s/(> )(%p)/$1%A $2/; } $s =~ s/\.,/\./; $s =~ s/^"%e\./%A %e/; $s =~ s/^"(%B %e, )/"%A, $1/; $s =~ s/^"(%e %B )/"%A $1/; return $s; }; sub callback_mdorder { my $s = shift; return undef if (!defined $s); $s =~ s/[^dem]//g; $s =~ s/e/d/g; return $s; }; sub callback_altmon { # if the language/country is known in %alternative months then # return that, otherwise repeat mon my $s = shift; if (defined $alternativemonths{$callback{data}{l}}{$callback{data}{c}}) { my @altnames = split(";",$alternativemonths{$callback{data}{l}}{$callback{data}{c}}); my @cleaned; foreach (@altnames) { $_ =~ s/^\s+//; $_ =~ s/\s+$//; push @cleaned, $_; } return join(";",@cleaned); } return $s; } sub callback_abmon { # for specified CJK locales, pad result with a space to enable # columns to line up (style established in FreeBSD in 2001) my $s = shift; my $nl = $callback{data}{l} . "_" . $callback{data}{c}; if ($nl eq 'ja_JP' || $nl eq 'ko_KR' || $nl eq 'zh_CN' || $nl eq 'zh_HK' || $nl eq 'zh_TW') { my @monthnames = split(";", $s); my @cleaned; foreach (@monthnames) { if ($_ =~ /^"<(two|three|four|five|six|seven|eight|nine)>/ || ($_ =~ /^"/ && $_ !~ /^"(||)/)) { $_ =~ s/^"/"/; } push @cleaned, $_; } return join(";",@cleaned); } return $s; } ############################ sub get_unidata { my $directory = shift; open(FIN, "$directory/UnicodeData.txt") or die("Cannot open $directory/UnicodeData.txt");; my @lines = ; chomp(@lines); close(FIN); foreach my $l (@lines) { my @a = split(/;/, $l); $ucd{code2name}{"$a[0]"} = $a[1]; # Unicode name $ucd{name2code}{"$a[1]"} = $a[0]; # Unicode code } } sub get_utf8map { my $file = shift; open(FIN, $file); my @lines = ; close(FIN); chomp(@lines); my $prev_k = undef; my $prev_v = ""; my $incharmap = 0; foreach my $l (@lines) { $l =~ s/\r//; next if ($l =~ /^\#/); next if ($l eq ""); if ($l eq "CHARMAP") { $incharmap = 1; next; } next if (!$incharmap); last if ($l eq "END CHARMAP"); $l =~ /^<([^\s]+)>\s+(.*)/; my $k = $1; my $v = $2; $k =~ s/_/ /g; # unicode char string $v =~ s/\\x//g; # UTF-8 char code $utf8map{$k} = $v; $utf8aliases{$k} = $prev_k if ($prev_v eq $v); $prev_v = $v; $prev_k = $k; } } sub get_encodings { my $dir = shift; foreach my $e (sort(keys(%encodings))) { if (!open(FIN, "$dir/$e.TXT")) { print "Cannot open charmap for $e\n"; next; } $encodings{$e} = 1; my @lines = ; close(FIN); chomp(@lines); foreach my $l (@lines) { $l =~ s/\r//; next if ($l =~ /^\#/); next if ($l eq ""); my @a = split(" ", $l); next if ($#a < 1); $a[0] =~ s/^0[xX]//; # local char code $a[1] =~ s/^0[xX]//; # unicode char code $convertors{$e}{uc($a[1])} = uc($a[0]); } } } sub get_languages { my %data = get_xmldata($ETCDIR); %languages = %{$data{L}}; %translations = %{$data{T}}; %alternativemonths = %{$data{AM}}; %encodings = %{$data{E}}; } sub transform_ctypes { # Add the C.UTF-8 $languages{"C"}{"x"}{data}{"x"}{$DEFENCODING} = undef; foreach my $l (sort keys(%languages)) { foreach my $f (sort keys(%{$languages{$l}})) { foreach my $c (sort keys(%{$languages{$l}{$f}{data}})) { next if (defined $languages{$l}{$f}{definitions} && $languages{$l}{$f}{definitions} !~ /$TYPE/); $languages{$l}{$f}{data}{$c}{$DEFENCODING} = 0; # unread my $file = $l; $file .= "_" . $f if ($f ne "x"); $file .= "_" . $c if ($c ne "x"); my $actfile = $file; my $filename = "$UNIDIR/posix/xx_Comm_C.UTF-8.src"; if (! -f $filename) { print STDERR "Cannot open $filename\n"; next; } open(FIN, "$filename"); print "Reading from $filename for ${l}_${f}_${c}\n"; $languages{$l}{$f}{data}{$c}{$DEFENCODING} = 1; # read my @lines; my $shex; my $uhex; while () { push @lines, $_; } close(FIN); $shex = sha1_hex(join("\n", @lines)); $languages{$l}{$f}{data}{$c}{$DEFENCODING} = $shex; $hashtable{$shex}{"${l}_${f}_${c}.$DEFENCODING"} = 1; open(FOUT, ">$TYPE.draft/$actfile.$DEFENCODING.src"); print FOUT @lines; close(FOUT); foreach my $enc (sort keys(%{$languages{$l}{$f}{data}{$c}})) { next if ($enc eq $DEFENCODING); $filename = "$UNIDIR/posix/$file.$DEFENCODING.src"; if ($file eq 'ja_JP') { # Override $filename for ja_JP because # its CTYPE is not compatible with UTF-8. $filename = "$UNIDIR/posix/$file.eucJP.src"; } if (! -f $filename) { print STDERR "Cannot open $filename\n"; next; } @lines = (); open(FIN, "$filename"); while () { if ((/^comment_char\s/) || (/^escape_char\s/)){ push @lines, $_; } if (/^LC_CTYPE/../^END LC_CTYPE/) { push @lines, $_; } } close(FIN); $uhex = sha1_hex(join("\n", @lines) . $enc); $languages{$l}{$f}{data}{$c}{$enc} = $uhex; $hashtable{$uhex}{"${l}_${f}_${c}.$enc"} = 1; open(FOUT, ">$TYPE.draft/$actfile.$enc.src"); print FOUT <) { if ((/^comment_char\s/) || (/^escape_char\s/)){ push @lines, $_; } if (/^LC_COLLATE/../^END LC_COLLATE/) { $_ =~ s/[ ]+/ /g; push @lines, $_; } } close(FIN); $shex = sha1_hex(join("\n", @lines)); $languages{$l}{$f}{data}{$c}{$DEFENCODING} = $shex; $hashtable{$shex}{"${l}_${f}_${c}.$DEFENCODING"} = 1; open(FOUT, ">$TYPE.draft/$actfile.$DEFENCODING.src"); print FOUT <; chomp(@lines); close(FIN); my $continue = 0; foreach my $k (keys(%keys)) { foreach my $line (@lines) { $line =~ s/\r//; next if (!$continue && $line !~ /^$k\s/); if ($continue) { $line =~ s/^\s+//; } else { $line =~ s/^$k\s+//; } $values{$l}{$f}{$c}{$k} = "" if (!defined $values{$l}{$f}{$c}{$k}); $continue = ($line =~ /\/$/); $line =~ s/\/$// if ($continue); while ($line =~ /_/) { $line =~ s/\<([^>_]+)_([^>]+)\>/<$1 $2>/; } die "_ in data - $line" if ($line =~ /_/); $values{$l}{$f}{$c}{$k} .= $line; last if (!$continue); } } } } } } sub decodecldr { my $e = shift; my $s = shift; my $v = undef; if ($e eq "UTF-8") { # # Conversion to UTF-8 can be done from the Unicode name to # the UTF-8 character code. # $v = $utf8map{$s}; die "Cannot convert $s in $e (charmap)" if (!defined $v); } else { # # Conversion to these encodings can be done from the Unicode # name to Unicode code to the encodings code. # my $ucc = undef; $ucc = $ucd{name2code}{$s} if (defined $ucd{name2code}{$s}); $ucc = $ucd{name2code}{$utf8aliases{$s}} if (!defined $ucc && $utf8aliases{$s} && defined $ucd{name2code}{$utf8aliases{$s}}); if (!defined $ucc) { if (defined $translations{$e}{$s}{hex}) { $v = $translations{$e}{$s}{hex}; $ucc = 0; } elsif (defined $translations{$e}{$s}{ucc}) { $ucc = $translations{$e}{$s}{ucc}; } } die "Cannot convert $s in $e (ucd string)" if (!defined $ucc); $v = $convertors{$e}{$ucc} if (!defined $v); $v = $translations{$e}{$s}{hex} if (!defined $v && defined $translations{$e}{$s}{hex}); if (!defined $v && defined $translations{$e}{$s}{unicode}) { my $ucn = $translations{$e}{$s}{unicode}; $ucc = $ucd{name2code}{$ucn} if (defined $ucd{name2code}{$ucn}); $ucc = $ucd{name2code}{$utf8aliases{$ucn}} if (!defined $ucc && defined $ucd{name2code}{$utf8aliases{$ucn}}); $v = $convertors{$e}{$ucc}; } die "Cannot convert $s in $e (charmap)" if (!defined $v); } return pack("C", hex($v)) if (length($v) == 2); return pack("CC", hex(substr($v, 0, 2)), hex(substr($v, 2, 2))) if (length($v) == 4); return pack("CCC", hex(substr($v, 0, 2)), hex(substr($v, 2, 2)), hex(substr($v, 4, 2))) if (length($v) == 6); print STDERR "Cannot convert $e $s\n"; return "length = " . length($v); } sub translate { my $enc = shift; my $v = shift; return $translations{$enc}{$v} if (defined $translations{$enc}{$v}); return undef; } sub print_fields { foreach my $l (sort keys(%languages)) { foreach my $f (sort keys(%{$languages{$l}})) { foreach my $c (sort keys(%{$languages{$l}{$f}{data}})) { next if (defined $languages{$l}{$f}{definitions} && $languages{$l}{$f}{definitions} !~ /$TYPE/); foreach my $enc (sort keys(%{$languages{$l}{$f}{data}{$c}})) { if ($languages{$l}{$f}{data}{$c}{$DEFENCODING} eq "0") { print "Skipping ${l}_" . ($f eq "x" ? "" : "${f}_") . "${c} - not read\n"; next; } my $file = $l; $file .= "_" . $f if ($f ne "x"); $file .= "_" . $c; print "Writing to $file in $enc\n"; if ($enc ne $DEFENCODING && !defined $convertors{$enc}) { print "Failed! Cannot convert to $enc.\n"; next; }; open(FOUT, ">$TYPE.draft/$file.$enc.new"); my $okay = 1; my $output = ""; print FOUT </) { $k = substr($g, 1); $g = $keys{$k}; } # Callback function if ($g =~ /^\(.*)/) { my $p1 = $1; $cm = $2; my $p3 = $3; my $rv = decodecldr($enc, $cm); # $rv = translate($enc, $cm) # if (!defined $rv); if (!defined $rv) { print STDERR "Could not convert $k ($cm) from $DEFENCODING to $enc\n"; $okay = 0; next; } $v = $p1 . $rv . $p3; } $output .= "$v\n"; next; } if ($g eq "as") { foreach my $v (split(/;/, $v)) { $v =~ s/^"//; $v =~ s/"$//; my $cm = ""; while ($v =~ /^(.*?)<(.*?)>(.*)/) { my $p1 = $1; $cm = $2; my $p3 = $3; my $rv = decodecldr($enc, $cm); # $rv = translate($enc, # $cm) # if (!defined $rv); if (!defined $rv) { print STDERR "Could not convert $k ($cm) from $DEFENCODING to $enc\n"; $okay = 0; next; } $v = $1 . $rv . $3; } $output .= "$v\n"; } next; } die("$k is '$g'"); } $languages{$l}{$f}{data}{$c}{$enc} = sha1_hex($output); $hashtable{sha1_hex($output)}{"${l}_${f}_${c}.$enc"} = 1; print FOUT "$output# EOF\n"; close(FOUT); if ($okay) { rename("$TYPE.draft/$file.$enc.new", "$TYPE.draft/$file.$enc.src"); } else { rename("$TYPE.draft/$file.$enc.new", "$TYPE.draft/$file.$enc.failed"); } } } } } } sub make_makefile { print "Creating Makefile for $TYPE\n"; my $SRCOUT; my $SRCOUT2; my $SRCOUT3 = ""; my $SRCOUT4 = ""; my $MAPLOC; if ($TYPE eq "colldef") { + # In future, we might want to try to put the CLDR version into + # the .src files with some new syntax, instead of the makefile. $SRCOUT = "localedef \${LOCALEDEF_ENDIAN} -D -U " . "-i \${.IMPSRC} \\\n" . + "\t-V \${CLDR_VERSION} \\\n" . "\t-f \${MAPLOC}/map.\${.TARGET:T:R:E:C/@.*//} " . "\${.OBJDIR}/\${.IMPSRC:T:R}"; $MAPLOC = "MAPLOC=\t\t\${.CURDIR}/../../tools/tools/" . "locale/etc/final-maps\n"; $SRCOUT2 = "LC_COLLATE"; $SRCOUT3 = "" . ".for f t in \${LOCALES_MAPPED}\n" . "FILES+=\t\$t.LC_COLLATE\n" . "FILESDIR_\$t.LC_COLLATE=\t\${LOCALEDIR}/\$t\n" . "\$t.LC_COLLATE: \${.CURDIR}/\$f.src\n" . "\tlocaledef \${LOCALEDEF_ENDIAN} -D -U " . "-i \${.ALLSRC} \\\n" . + "\t-V \${CLDR_VERSION} \\\n" . "\t\t-f \${MAPLOC}/map.\${.TARGET:T:R:E:C/@.*//} \\\n" . "\t\t\${.OBJDIR}/\${.TARGET:T:R}\n" . ".endfor\n\n"; $SRCOUT4 = "## LOCALES_MAPPED\n"; } elsif ($TYPE eq "ctypedef") { $SRCOUT = "localedef \${LOCALEDEF_ENDIAN} -D -U -c " . "-w \${MAPLOC}/widths.txt \\\n" . "\t-f \${MAPLOC}/map.\${.IMPSRC:T:R:E} " . "\\\n\t-i \${.IMPSRC} \${.OBJDIR}/\${.IMPSRC:T:R} " . " || true"; $SRCOUT2 = "LC_CTYPE"; $MAPLOC = "MAPLOC=\t\t\${.CURDIR}/../../tools/tools/" . "locale/etc/final-maps\n"; $SRCOUT3 = "## SYMPAIRS\n\n" . ".for s t in \${SYMPAIRS}\n" . "\${t:S/src\$/LC_CTYPE/}: " . "\$s\n" . "\tlocaledef \${LOCALEDEF_ENDIAN} -D -U -c " . "-w \${MAPLOC}/widths.txt \\\n" . "\t-f \${MAPLOC}/map.\${.TARGET:T:R:C/^.*\\.//} " . "\\\n\t-i \${.ALLSRC} \${.OBJDIR}/\${.TARGET:T:R} " . " || true\n" . ".endfor\n\n"; } else { $SRCOUT = "grep -v -E '^(\#\$\$|\#[ ])' < \${.IMPSRC} > \${.TARGET}"; $SRCOUT2 = "out"; $MAPLOC = ""; } open(FOUT, ">$TYPE.draft/Makefile"); print FOUT < EOF } print FOUT < 0) { my $link = shift(@files); $link =~ s/_x_x//; # special case for C $link =~ s/_x_/_/; # strip family if none there foreach my $file (@files) { my @a = split(/_/, $file); my @b = split(/\./, $a[-1]); $file =~ s/_x_/_/; print FOUT "SAME+=\t\t$link $file\n"; undef($languages{$a[0]}{$a[1]}{data}{$b[0]}{$b[1]}); } } } foreach my $l (sort keys(%languages)) { foreach my $f (sort keys(%{$languages{$l}})) { foreach my $c (sort keys(%{$languages{$l}{$f}{data}})) { next if (defined $languages{$l}{$f}{definitions} && $languages{$l}{$f}{definitions} !~ /$TYPE/); if (defined $languages{$l}{$f}{data}{$c}{$DEFENCODING} && $languages{$l}{$f}{data}{$c}{$DEFENCODING} eq "0") { print "Skipping ${l}_" . ($f eq "x" ? "" : "${f}_") . "${c} - not read\n"; next; } foreach my $e (sort keys(%{$languages{$l}{$f}{data}{$c}})) { my $file = $l; $file .= "_" . $f if ($f ne "x"); $file .= "_" . $c if ($c ne "x"); next if (!defined $languages{$l}{$f}{data}{$c}{$e}); print FOUT "LOCALES+=\t$file.$e\n"; } if (defined $languages{$l}{$f}{nc_link}) { foreach my $e (sort keys(%{$languages{$l}{$f}{data}{$c}})) { my $file = $l . "_"; $file .= $f . "_" if ($f ne "x"); $file .= $c; print FOUT "SAME+=\t\t$file.$e $languages{$l}{$f}{nc_link}.$e\t# legacy (lang/country change)\n"; } } if (defined $languages{$l}{$f}{e_link}) { foreach my $el (split(" ", $languages{$l}{$f}{e_link})) { my @a = split(/:/, $el); my $file = $l . "_"; $file .= $f . "_" if ($f ne "x"); $file .= $c; print FOUT "SAME+=\t\t$file.$a[0] $file.$a[1]\t# legacy (same charset)\n"; } } } } } print FOUT < EOF close(FOUT); } Index: head/usr.bin/localedef/bootstrap/bootstrap_xlocale_private.h =================================================================== --- head/usr.bin/localedef/bootstrap/bootstrap_xlocale_private.h (revision 367475) +++ head/usr.bin/localedef/bootstrap/bootstrap_xlocale_private.h (revision 367476) @@ -1,54 +1,57 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright 2018-2020 Alex Richardson * * This software was developed by SRI International and the University of * Cambridge Computer Laboratory (Department of Computer Science and * Technology) under DARPA contract HR0011-18-C-0016 ("ECATS"), as part of the * DARPA SSITH research programme. * * This software was developed by SRI International and the University of * Cambridge Computer Laboratory under DARPA/AFRL contract (FA8750-10-C-0237) * ("CTSRD"), as part of the DARPA CRASH research programme. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _LOCALDEF_BOOTSTRAP_XLOCALE_PRIVATE_H #define _LOCALDEF_BOOTSTRAP_XLOCALE_PRIVATE_H /* * Avoid pulling in anything from the real xlocale_private.h. * Unfortunately, we can't simply add a local xlocale_private.h to include * it instead of the real file, since xlocale_private.h is included with double * quotes from the same directory and therefore the real file will be selected. */ #define _XLOCALE_PRIVATE__H_ typedef struct _localedef_bootstrap_xlocale* localedef_bootstrap_locale_t; struct localedef_bootstrap_xlocale_component { char unused; }; +/* This must agree with the definition in xlocale_private.h. */ +#define XLOCALE_DEF_VERSION_LEN 12 + #endif /* _LOCALDEF_BOOTSTRAP_XLOCALE_PRIVATE_H */ Index: head/usr.bin/localedef/collate.c =================================================================== --- head/usr.bin/localedef/collate.c (revision 367475) +++ head/usr.bin/localedef/collate.c (revision 367476) @@ -1,1326 +1,1331 @@ /*- * Copyright 2018 Nexenta Systems, Inc. * Copyright 2015 John Marino * * This source code is derived from the illumos localedef command, and * provided under BSD-style license terms by Nexenta Systems, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ /* * LC_COLLATE database generation routines for localedef. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include "localedef.h" #include "parser.h" #include "collate.h" _Static_assert(COLL_WEIGHTS_MAX == 10, "This code assumes a value of 10"); /* * Design notes. * * It will be extremely helpful to the reader if they have access to * the localedef and locale file format specifications available. * Latest versions of these are available from www.opengroup.org. * * The design for the collation code is a bit complex. The goal is a * single collation database as described in collate.h (in * libc/port/locale). However, there are some other tidbits: * * a) The substitution entries are now a directly indexable array. A * priority elsewhere in the table is taken as an index into the * substitution table if it has a high bit (COLLATE_SUBST_PRIORITY) * set. (The bit is cleared and the result is the index into the * table. * * b) We eliminate duplicate entries into the substitution table. * This saves a lot of space. * * c) The priorities for each level are "compressed", so that each * sorting level has consecutively numbered priorities starting at 1. * (O is reserved for the ignore priority.) This means sort levels * which only have a few distinct priorities can represent the * priority level in fewer bits, which makes the strxfrm output * smaller. * * d) We record the total number of priorities so that strxfrm can * figure out how many bytes to expand a numeric priority into. * * e) For the UNDEFINED pass (the last pass), we record the maximum * number of bits needed to uniquely prioritize these entries, so that * the last pass can also use smaller strxfrm output when possible. * * f) Priorities with the sign bit set are verboten. This works out * because no active character set needs that bit to carry significant * information once the character is in wide form. * * To process the entire data to make the database, we actually run * multiple passes over the data. * * The first pass, which is done at parse time, identifies elements, * substitutions, and such, and records them in priority order. As * some priorities can refer to other priorities, using forward * references, we use a table of references indicating whether the * priority's value has been resolved, or whether it is still a * reference. * * The second pass walks over all the items in priority order, noting * that they are used directly, and not just an indirect reference. * This is done by creating a "weight" structure for the item. The * weights are stashed in an RB tree sorted by relative "priority". * * The third pass walks over all the weight structures, in priority * order, and assigns a new monotonically increasing (per sort level) * weight value to them. These are the values that will actually be * written to the file. * * The fourth pass just writes the data out. */ /* * In order to resolve the priorities, we create a table of priorities. * Entries in the table can be in one of three states. * * UNKNOWN is for newly allocated entries, and indicates that nothing * is known about the priority. (For example, when new entries are created * for collating-symbols, this is the value assigned for them until the * collating symbol's order has been determined. * * RESOLVED is used for an entry where the priority indicates the final * numeric weight. * * REFER is used for entries that reference other entries. Typically * this is used for forward references. A collating-symbol can never * have this value. * * The "pass" field is used during final resolution to aid in detection * of referencing loops. (For example depends on , but has its * priority dependent on .) */ typedef enum { UNKNOWN, /* priority is totally unknown */ RESOLVED, /* priority value fully resolved */ REFER /* priority is a reference (index) */ } res_t; typedef struct weight { int32_t pri; int opt; RB_ENTRY(weight) entry; } weight_t; typedef struct priority { res_t res; int32_t pri; int pass; int lineno; } collpri_t; #define NUM_WT collinfo.directive_count /* * These are the abstract collating symbols, which are just a symbolic * way to reference a priority. */ struct collsym { char *name; int32_t ref; RB_ENTRY(collsym) entry; }; /* * These are also abstract collating symbols, but we allow them to have * different priorities at different levels. */ typedef struct collundef { char *name; int32_t ref[COLL_WEIGHTS_MAX]; RB_ENTRY(collundef) entry; } collundef_t; /* * These are called "chains" in libc. This records the fact that two * more characters should be treated as a single collating entity when * they appear together. For example, in Spanish gets collated * as a character between and . */ struct collelem { char *symbol; wchar_t *expand; int32_t ref[COLL_WEIGHTS_MAX]; RB_ENTRY(collelem) rb_bysymbol; RB_ENTRY(collelem) rb_byexpand; }; /* * Individual characters have a sequence of weights as well. */ typedef struct collchar { wchar_t wc; int32_t ref[COLL_WEIGHTS_MAX]; RB_ENTRY(collchar) entry; } collchar_t; /* * Substitution entries. The key is itself a priority. Note that * when we create one of these, we *automatically* wind up with a * fully resolved priority for the key, because creation of * substitutions creates a resolved priority at the same time. */ typedef struct subst{ int32_t key; int32_t ref[COLLATE_STR_LEN]; RB_ENTRY(subst) entry; RB_ENTRY(subst) entry_ref; } subst_t; static RB_HEAD(collsyms, collsym) collsyms; static RB_HEAD(collundefs, collundef) collundefs; static RB_HEAD(elem_by_symbol, collelem) elem_by_symbol; static RB_HEAD(elem_by_expand, collelem) elem_by_expand; static RB_HEAD(collchars, collchar) collchars; static RB_HEAD(substs, subst) substs[COLL_WEIGHTS_MAX]; static RB_HEAD(substs_ref, subst) substs_ref[COLL_WEIGHTS_MAX]; static RB_HEAD(weights, weight) weights[COLL_WEIGHTS_MAX]; static int32_t nweight[COLL_WEIGHTS_MAX]; /* * This is state tracking for the ellipsis token. Note that we start * the initial values so that the ellipsis logic will think we got a * magic starting value of NUL. It starts at minus one because the * starting point is exclusive -- i.e. the starting point is not * itself handled by the ellipsis code. */ static int currorder = EOF; static int lastorder = EOF; static collelem_t *currelem; static collchar_t *currchar; static collundef_t *currundef; static wchar_t ellipsis_start = 0; static int32_t ellipsis_weights[COLL_WEIGHTS_MAX]; /* * We keep a running tally of weights. */ static int nextpri = 1; static int nextsubst[COLL_WEIGHTS_MAX] = { 0 }; /* * This array collects up the weights for each level. */ static int32_t order_weights[COLL_WEIGHTS_MAX]; static int curr_weight = 0; static int32_t subst_weights[COLLATE_STR_LEN]; static int curr_subst = 0; /* * Some initial priority values. */ static int32_t pri_undefined[COLL_WEIGHTS_MAX]; static int32_t pri_ignore; static collate_info_t collinfo; static int32_t subst_count[COLL_WEIGHTS_MAX]; static int32_t chain_count; static int32_t large_count; static collpri_t *prilist = NULL; static int numpri = 0; static int maxpri = 0; static void start_order(int); static int32_t new_pri(void) { int i; if (numpri >= maxpri) { maxpri = maxpri ? maxpri * 2 : 1024; prilist = realloc(prilist, sizeof (collpri_t) * maxpri); if (prilist == NULL) { fprintf(stderr,"out of memory"); return (-1); } for (i = numpri; i < maxpri; i++) { prilist[i].res = UNKNOWN; prilist[i].pri = 0; prilist[i].pass = 0; } } return (numpri++); } static collpri_t * get_pri(int32_t ref) { if ((ref < 0) || (ref > numpri)) { INTERR; return (NULL); } return (&prilist[ref]); } static void set_pri(int32_t ref, int32_t v, res_t res) { collpri_t *pri; pri = get_pri(ref); if ((res == REFER) && ((v < 0) || (v >= numpri))) { INTERR; } /* Resolve self references */ if ((res == REFER) && (ref == v)) { v = nextpri; res = RESOLVED; } if (pri->res != UNKNOWN) { warn("repeated item in order list (first on %d)", pri->lineno); return; } pri->lineno = lineno; pri->pri = v; pri->res = res; } static int32_t resolve_pri(int32_t ref) { collpri_t *pri; static int32_t pass = 0; pri = get_pri(ref); pass++; while (pri->res == REFER) { if (pri->pass == pass) { /* report a line with the circular symbol */ lineno = pri->lineno; fprintf(stderr,"circular reference in order list"); return (-1); } if ((pri->pri < 0) || (pri->pri >= numpri)) { INTERR; return (-1); } pri->pass = pass; pri = &prilist[pri->pri]; } if (pri->res == UNKNOWN) { return (-1); } if (pri->res != RESOLVED) INTERR; return (pri->pri); } static int weight_compare(const void *n1, const void *n2) { int32_t k1 = ((const weight_t *)n1)->pri; int32_t k2 = ((const weight_t *)n2)->pri; return (k1 < k2 ? -1 : k1 > k2 ? 1 : 0); } RB_GENERATE_STATIC(weights, weight, entry, weight_compare); static int collsym_compare(const void *n1, const void *n2) { const collsym_t *c1 = n1; const collsym_t *c2 = n2; int rv; rv = strcmp(c1->name, c2->name); return ((rv < 0) ? -1 : (rv > 0) ? 1 : 0); } RB_GENERATE_STATIC(collsyms, collsym, entry, collsym_compare); static int collundef_compare(const void *n1, const void *n2) { const collundef_t *c1 = n1; const collundef_t *c2 = n2; int rv; rv = strcmp(c1->name, c2->name); return ((rv < 0) ? -1 : (rv > 0) ? 1 : 0); } RB_GENERATE_STATIC(collundefs, collundef, entry, collundef_compare); static int element_compare_symbol(const void *n1, const void *n2) { const collelem_t *c1 = n1; const collelem_t *c2 = n2; int rv; rv = strcmp(c1->symbol, c2->symbol); return ((rv < 0) ? -1 : (rv > 0) ? 1 : 0); } RB_GENERATE_STATIC(elem_by_symbol, collelem, rb_bysymbol, element_compare_symbol); static int element_compare_expand(const void *n1, const void *n2) { const collelem_t *c1 = n1; const collelem_t *c2 = n2; int rv; rv = wcscmp(c1->expand, c2->expand); return ((rv < 0) ? -1 : (rv > 0) ? 1 : 0); } RB_GENERATE_STATIC(elem_by_expand, collelem, rb_byexpand, element_compare_expand); static int collchar_compare(const void *n1, const void *n2) { wchar_t k1 = ((const collchar_t *)n1)->wc; wchar_t k2 = ((const collchar_t *)n2)->wc; return (k1 < k2 ? -1 : k1 > k2 ? 1 : 0); } RB_GENERATE_STATIC(collchars, collchar, entry, collchar_compare); static int subst_compare(const void *n1, const void *n2) { int32_t k1 = ((const subst_t *)n1)->key; int32_t k2 = ((const subst_t *)n2)->key; return (k1 < k2 ? -1 : k1 > k2 ? 1 : 0); } RB_GENERATE_STATIC(substs, subst, entry, subst_compare); static int subst_compare_ref(const void *n1, const void *n2) { const wchar_t *c1 = ((const subst_t *)n1)->ref; const wchar_t *c2 = ((const subst_t *)n2)->ref; int rv; rv = wcscmp(c1, c2); return ((rv < 0) ? -1 : (rv > 0) ? 1 : 0); } RB_GENERATE_STATIC(substs_ref, subst, entry_ref, subst_compare_ref); void init_collate(void) { int i; RB_INIT(&collsyms); RB_INIT(&collundefs); RB_INIT(&elem_by_symbol); RB_INIT(&elem_by_expand); RB_INIT(&collchars); for (i = 0; i < COLL_WEIGHTS_MAX; i++) { RB_INIT(&substs[i]); RB_INIT(&substs_ref[i]); RB_INIT(&weights[i]); nweight[i] = 1; } (void) memset(&collinfo, 0, sizeof (collinfo)); /* allocate some initial priorities */ pri_ignore = new_pri(); set_pri(pri_ignore, 0, RESOLVED); for (i = 0; i < COLL_WEIGHTS_MAX; i++) { pri_undefined[i] = new_pri(); /* we will override this later */ set_pri(pri_undefined[i], COLLATE_MAX_PRIORITY, UNKNOWN); } } void define_collsym(char *name) { collsym_t *sym; if ((sym = calloc(1, sizeof(*sym))) == NULL) { fprintf(stderr,"out of memory"); return; } sym->name = name; sym->ref = new_pri(); if (RB_FIND(collsyms, &collsyms, sym) != NULL) { /* * This should never happen because we are only called * for undefined symbols. */ free(sym); INTERR; return; } RB_INSERT(collsyms, &collsyms, sym); } collsym_t * lookup_collsym(char *name) { collsym_t srch; srch.name = name; return (RB_FIND(collsyms, &collsyms, &srch)); } collelem_t * lookup_collelem(char *symbol) { collelem_t srch; srch.symbol = symbol; return (RB_FIND(elem_by_symbol, &elem_by_symbol, &srch)); } static collundef_t * get_collundef(char *name) { collundef_t srch; collundef_t *ud; int i; srch.name = name; if ((ud = RB_FIND(collundefs, &collundefs, &srch)) == NULL) { if (((ud = calloc(1, sizeof(*ud))) == NULL) || ((ud->name = strdup(name)) == NULL)) { fprintf(stderr,"out of memory"); free(ud); return (NULL); } for (i = 0; i < NUM_WT; i++) { ud->ref[i] = new_pri(); } RB_INSERT(collundefs, &collundefs, ud); } add_charmap_undefined(name); return (ud); } static collchar_t * get_collchar(wchar_t wc, int create) { collchar_t srch; collchar_t *cc; int i; srch.wc = wc; cc = RB_FIND(collchars, &collchars, &srch); if ((cc == NULL) && create) { if ((cc = calloc(1, sizeof(*cc))) == NULL) { fprintf(stderr, "out of memory"); return (NULL); } for (i = 0; i < NUM_WT; i++) { cc->ref[i] = new_pri(); } cc->wc = wc; RB_INSERT(collchars, &collchars, cc); } return (cc); } void end_order_collsym(collsym_t *sym) { start_order(T_COLLSYM); /* update the weight */ set_pri(sym->ref, nextpri, RESOLVED); nextpri++; } void end_order(void) { int i; int32_t pri; int32_t ref; collpri_t *p; /* advance the priority/weight */ pri = nextpri; switch (currorder) { case T_CHAR: for (i = 0; i < NUM_WT; i++) { if (((ref = order_weights[i]) < 0) || ((p = get_pri(ref)) == NULL) || (p->pri == -1)) { /* unspecified weight is a self reference */ set_pri(currchar->ref[i], pri, RESOLVED); } else { set_pri(currchar->ref[i], ref, REFER); } order_weights[i] = -1; } /* leave a cookie trail in case next symbol is ellipsis */ ellipsis_start = currchar->wc + 1; currchar = NULL; break; case T_ELLIPSIS: /* save off the weights were we can find them */ for (i = 0; i < NUM_WT; i++) { ellipsis_weights[i] = order_weights[i]; order_weights[i] = -1; } break; case T_COLLELEM: if (currelem == NULL) { INTERR; } else { for (i = 0; i < NUM_WT; i++) { if (((ref = order_weights[i]) < 0) || ((p = get_pri(ref)) == NULL) || (p->pri == -1)) { set_pri(currelem->ref[i], pri, RESOLVED); } else { set_pri(currelem->ref[i], ref, REFER); } order_weights[i] = -1; } } break; case T_UNDEFINED: for (i = 0; i < NUM_WT; i++) { if (((ref = order_weights[i]) < 0) || ((p = get_pri(ref)) == NULL) || (p->pri == -1)) { set_pri(pri_undefined[i], -1, RESOLVED); } else { set_pri(pri_undefined[i], ref, REFER); } order_weights[i] = -1; } break; case T_SYMBOL: for (i = 0; i < NUM_WT; i++) { if (((ref = order_weights[i]) < 0) || ((p = get_pri(ref)) == NULL) || (p->pri == -1)) { set_pri(currundef->ref[i], pri, RESOLVED); } else { set_pri(currundef->ref[i], ref, REFER); } order_weights[i] = -1; } break; default: INTERR; } nextpri++; } static void start_order(int type) { int i; lastorder = currorder; currorder = type; /* this is used to protect ELLIPSIS processing */ if ((lastorder == T_ELLIPSIS) && (type != T_CHAR)) { fprintf(stderr, "character value expected"); } for (i = 0; i < COLL_WEIGHTS_MAX; i++) { order_weights[i] = -1; } curr_weight = 0; } void start_order_undefined(void) { start_order(T_UNDEFINED); } void start_order_symbol(char *name) { currundef = get_collundef(name); start_order(T_SYMBOL); } void start_order_char(wchar_t wc) { collchar_t *cc; int32_t ref; start_order(T_CHAR); /* * If we last saw an ellipsis, then we need to close the range. * Handle that here. Note that we have to be careful because the * items *inside* the range are treated exclusiveley to the items * outside of the range. The ends of the range can have quite * different weights than the range members. */ if (lastorder == T_ELLIPSIS) { int i; if (wc < ellipsis_start) { fprintf(stderr, "malformed range!"); return; } while (ellipsis_start < wc) { /* * pick all of the saved weights for the * ellipsis. note that -1 encodes for the * ellipsis itself, which means to take the * current relative priority. */ if ((cc = get_collchar(ellipsis_start, 1)) == NULL) { INTERR; return; } for (i = 0; i < NUM_WT; i++) { collpri_t *p; if (((ref = ellipsis_weights[i]) == -1) || ((p = get_pri(ref)) == NULL) || (p->pri == -1)) { set_pri(cc->ref[i], nextpri, RESOLVED); } else { set_pri(cc->ref[i], ref, REFER); } ellipsis_weights[i] = 0; } ellipsis_start++; nextpri++; } } currchar = get_collchar(wc, 1); } void start_order_collelem(collelem_t *e) { start_order(T_COLLELEM); currelem = e; } void start_order_ellipsis(void) { int i; start_order(T_ELLIPSIS); if (lastorder != T_CHAR) { fprintf(stderr, "illegal starting point for range"); return; } for (i = 0; i < NUM_WT; i++) { ellipsis_weights[i] = order_weights[i]; } } void define_collelem(char *name, wchar_t *wcs) { collelem_t *e; int i; if (wcslen(wcs) >= COLLATE_STR_LEN) { fprintf(stderr,"expanded collation element too long"); return; } if ((e = calloc(1, sizeof(*e))) == NULL) { fprintf(stderr, "out of memory"); return; } e->expand = wcs; e->symbol = name; /* * This is executed before the order statement, so we don't * know how many priorities we *really* need. We allocate one * for each possible weight. Not a big deal, as collating-elements * prove to be quite rare. */ for (i = 0; i < COLL_WEIGHTS_MAX; i++) { e->ref[i] = new_pri(); } /* A character sequence can only reduce to one element. */ if ((RB_FIND(elem_by_symbol, &elem_by_symbol, e) != NULL) || (RB_FIND(elem_by_expand, &elem_by_expand, e) != NULL)) { fprintf(stderr, "duplicate collating element definition"); free(e); return; } RB_INSERT(elem_by_symbol, &elem_by_symbol, e); RB_INSERT(elem_by_expand, &elem_by_expand, e); } void add_order_bit(int kw) { uint8_t bit = DIRECTIVE_UNDEF; switch (kw) { case T_FORWARD: bit = DIRECTIVE_FORWARD; break; case T_BACKWARD: bit = DIRECTIVE_BACKWARD; break; case T_POSITION: bit = DIRECTIVE_POSITION; break; default: INTERR; break; } collinfo.directive[collinfo.directive_count] |= bit; } void add_order_directive(void) { if (collinfo.directive_count >= COLL_WEIGHTS_MAX) { fprintf(stderr, "too many directives (max %d)\n", COLL_WEIGHTS_MAX); return; } collinfo.directive_count++; } static void add_order_pri(int32_t ref) { if (curr_weight >= NUM_WT) { fprintf(stderr, "too many weights (max %d)\n", NUM_WT); return; } order_weights[curr_weight] = ref; curr_weight++; } void add_order_collsym(collsym_t *s) { add_order_pri(s->ref); } void add_order_char(wchar_t wc) { collchar_t *cc; if ((cc = get_collchar(wc, 1)) == NULL) { INTERR; return; } add_order_pri(cc->ref[curr_weight]); } void add_order_collelem(collelem_t *e) { add_order_pri(e->ref[curr_weight]); } void add_order_ignore(void) { add_order_pri(pri_ignore); } void add_order_symbol(char *sym) { collundef_t *c; if ((c = get_collundef(sym)) == NULL) { INTERR; return; } add_order_pri(c->ref[curr_weight]); } void add_order_ellipsis(void) { /* special NULL value indicates self reference */ add_order_pri(0); } void add_order_subst(void) { subst_t srch; subst_t *s; int i; (void) memset(&srch, 0, sizeof (srch)); for (i = 0; i < curr_subst; i++) { srch.ref[i] = subst_weights[i]; subst_weights[i] = 0; } s = RB_FIND(substs_ref, &substs_ref[curr_weight], &srch); if (s == NULL) { if ((s = calloc(1, sizeof(*s))) == NULL) { fprintf(stderr,"out of memory"); return; } s->key = new_pri(); /* * We use a self reference for our key, but we set a * high bit to indicate that this is a substitution * reference. This will expedite table lookups later, * and prevent table lookups for situations that don't * require it. (In short, its a big win, because we * can skip a lot of binary searching.) */ set_pri(s->key, (nextsubst[curr_weight] | COLLATE_SUBST_PRIORITY), RESOLVED); nextsubst[curr_weight] += 1; for (i = 0; i < curr_subst; i++) { s->ref[i] = srch.ref[i]; } RB_INSERT(substs_ref, &substs_ref[curr_weight], s); if (RB_FIND(substs, &substs[curr_weight], s) != NULL) { INTERR; return; } RB_INSERT(substs, &substs[curr_weight], s); } curr_subst = 0; /* * We are using the current (unique) priority as a search key * in the substitution table. */ add_order_pri(s->key); } static void add_subst_pri(int32_t ref) { if (curr_subst >= COLLATE_STR_LEN) { fprintf(stderr,"substitution string is too long"); return; } subst_weights[curr_subst] = ref; curr_subst++; } void add_subst_char(wchar_t wc) { collchar_t *cc; if (((cc = get_collchar(wc, 1)) == NULL) || (cc->wc != wc)) { INTERR; return; } /* we take the weight for the character at that position */ add_subst_pri(cc->ref[curr_weight]); } void add_subst_collelem(collelem_t *e) { add_subst_pri(e->ref[curr_weight]); } void add_subst_collsym(collsym_t *s) { add_subst_pri(s->ref); } void add_subst_symbol(char *ptr) { collundef_t *cu; if ((cu = get_collundef(ptr)) != NULL) { add_subst_pri(cu->ref[curr_weight]); } } void add_weight(int32_t ref, int pass) { weight_t srch; weight_t *w; srch.pri = resolve_pri(ref); /* No translation of ignores */ if (srch.pri == 0) return; /* Substitution priorities are not weights */ if (srch.pri & COLLATE_SUBST_PRIORITY) return; if (RB_FIND(weights, &weights[pass], &srch) != NULL) return; if ((w = calloc(1, sizeof(*w))) == NULL) { fprintf(stderr, "out of memory"); return; } w->pri = srch.pri; RB_INSERT(weights, &weights[pass], w); } void add_weights(int32_t *refs) { int i; for (i = 0; i < NUM_WT; i++) { add_weight(refs[i], i); } } int32_t get_weight(int32_t ref, int pass) { weight_t srch; weight_t *w; int32_t pri; pri = resolve_pri(ref); if (pri & COLLATE_SUBST_PRIORITY) { return (pri); } if (pri <= 0) { return (pri); } srch.pri = pri; if ((w = RB_FIND(weights, &weights[pass], &srch)) == NULL) { INTERR; return (-1); } return (w->opt); } wchar_t * wsncpy(wchar_t *s1, const wchar_t *s2, size_t n) { wchar_t *os1 = s1; n++; while (--n > 0 && (*s1++ = htote(*s2++)) != 0) continue; if (n > 0) while (--n > 0) *s1++ = 0; return (os1); } #define RB_COUNT(x, name, head, cnt) do { \ (cnt) = 0; \ RB_FOREACH(x, name, (head)) { \ (cnt)++; \ } \ } while (0) #define RB_NUMNODES(type, name, head, cnt) do { \ type *t; \ cnt = 0; \ RB_FOREACH(t, name, head) { \ cnt++; \ } \ } while (0) void dump_collate(void) { FILE *f; int i, j, n; size_t sz; int32_t pri; collelem_t *ce; collchar_t *cc; subst_t *sb; - char vers[COLLATE_STR_LEN]; + char fmt_version[COLLATE_FMT_VERSION_LEN]; + char def_version[XLOCALE_DEF_VERSION_LEN]; collate_char_t chars[UCHAR_MAX + 1]; collate_large_t *large; collate_subst_t *subst[COLL_WEIGHTS_MAX]; collate_chain_t *chain; /* * We have to run through a preliminary pass to identify all the * weights that we use for each sorting level. */ for (i = 0; i < NUM_WT; i++) { add_weight(pri_ignore, i); } for (i = 0; i < NUM_WT; i++) { RB_FOREACH(sb, substs, &substs[i]) { for (j = 0; sb->ref[j]; j++) { add_weight(sb->ref[j], i); } } } RB_FOREACH(ce, elem_by_expand, &elem_by_expand) { add_weights(ce->ref); } RB_FOREACH(cc, collchars, &collchars) { add_weights(cc->ref); } /* * Now we walk the entire set of weights, removing the gaps * in the weights. This gives us optimum usage. The walk * occurs in priority. */ for (i = 0; i < NUM_WT; i++) { weight_t *w; RB_FOREACH(w, weights, &weights[i]) { w->opt = nweight[i]; nweight[i] += 1; } } (void) memset(&chars, 0, sizeof (chars)); - (void) memset(vers, 0, COLLATE_STR_LEN); - (void) strlcpy(vers, COLLATE_VERSION, sizeof (vers)); + (void) memset(fmt_version, 0, COLLATE_FMT_VERSION_LEN); + (void) strlcpy(fmt_version, COLLATE_FMT_VERSION, sizeof (fmt_version)); + (void) memset(def_version, 0, XLOCALE_DEF_VERSION_LEN); + if (version) + (void) strlcpy(def_version, version, sizeof (def_version)); /* * We need to make sure we arrange for the UNDEFINED field * to show up. Also, set the total weight counts. */ for (i = 0; i < NUM_WT; i++) { if (resolve_pri(pri_undefined[i]) == -1) { set_pri(pri_undefined[i], -1, RESOLVED); /* they collate at the end of everything else */ collinfo.undef_pri[i] = htote(COLLATE_MAX_PRIORITY); } collinfo.pri_count[i] = htote(nweight[i]); } collinfo.pri_count[NUM_WT] = htote(max_wide()); collinfo.undef_pri[NUM_WT] = htote(COLLATE_MAX_PRIORITY); collinfo.directive[NUM_WT] = DIRECTIVE_UNDEFINED; /* * Ordinary character priorities */ for (i = 0; i <= UCHAR_MAX; i++) { if ((cc = get_collchar(i, 0)) != NULL) { for (j = 0; j < NUM_WT; j++) { chars[i].pri[j] = htote(get_weight(cc->ref[j], j)); } } else { for (j = 0; j < NUM_WT; j++) { chars[i].pri[j] = htote(get_weight(pri_undefined[j], j)); } /* * Per POSIX, for undefined characters, we * also have to add a last item, which is the * character code. */ chars[i].pri[NUM_WT] = htote(i); } } /* * Substitution tables */ for (i = 0; i < NUM_WT; i++) { collate_subst_t *st = NULL; subst_t *temp; RB_COUNT(temp, substs, &substs[i], n); subst_count[i] = n; if ((st = calloc(n, sizeof(collate_subst_t))) == NULL) { fprintf(stderr, "out of memory"); return; } n = 0; RB_FOREACH(sb, substs, &substs[i]) { if ((st[n].key = resolve_pri(sb->key)) < 0) { /* by definition these resolve! */ INTERR; } if (st[n].key != (n | COLLATE_SUBST_PRIORITY)) { INTERR; } st[n].key = htote(st[n].key); for (j = 0; sb->ref[j]; j++) { st[n].pri[j] = htote(get_weight(sb->ref[j], i)); } n++; } if (n != subst_count[i]) INTERR; subst[i] = st; } /* * Chains, i.e. collating elements */ RB_NUMNODES(collelem_t, elem_by_expand, &elem_by_expand, chain_count); chain = calloc(chain_count, sizeof(collate_chain_t)); if (chain == NULL) { fprintf(stderr, "out of memory"); return; } n = 0; RB_FOREACH(ce, elem_by_expand, &elem_by_expand) { (void) wsncpy(chain[n].str, ce->expand, COLLATE_STR_LEN); for (i = 0; i < NUM_WT; i++) { chain[n].pri[i] = htote(get_weight(ce->ref[i], i)); } n++; } if (n != chain_count) INTERR; /* * Large (> UCHAR_MAX) character priorities */ RB_NUMNODES(collchar_t, collchars, &collchars, n); large = calloc(n, sizeof(collate_large_t)); if (large == NULL) { fprintf(stderr, "out of memory"); return; } i = 0; RB_FOREACH(cc, collchars, &collchars) { int undef = 0; /* we already gathered those */ if (cc->wc <= UCHAR_MAX) continue; for (j = 0; j < NUM_WT; j++) { if ((pri = get_weight(cc->ref[j], j)) < 0) { undef = 1; } if (undef && (pri >= 0)) { /* if undefined, then all priorities are */ INTERR; } else { large[i].pri.pri[j] = htote(pri); } } if (!undef) { large[i].val = htote(cc->wc); large_count = i++; } } if ((f = open_category()) == NULL) { return; } /* Time to write the entire data set out */ for (i = 0; i < NUM_WT; i++) collinfo.subst_count[i] = htote(subst_count[i]); collinfo.chain_count = htote(chain_count); collinfo.large_count = htote(large_count); - if ((wr_category(vers, COLLATE_STR_LEN, f) < 0) || + if ((wr_category(fmt_version, COLLATE_FMT_VERSION_LEN, f) < 0) || + (wr_category(def_version, XLOCALE_DEF_VERSION_LEN, f) < 0) || (wr_category(&collinfo, sizeof (collinfo), f) < 0) || (wr_category(&chars, sizeof (chars), f) < 0)) { return; } for (i = 0; i < NUM_WT; i++) { sz = sizeof (collate_subst_t) * subst_count[i]; if (wr_category(subst[i], sz, f) < 0) { return; } } sz = sizeof (collate_chain_t) * chain_count; if (wr_category(chain, sz, f) < 0) { return; } sz = sizeof (collate_large_t) * large_count; if (wr_category(large, sz, f) < 0) { return; } close_category(f); } Index: head/usr.bin/localedef/localedef.1 =================================================================== --- head/usr.bin/localedef/localedef.1 (revision 367475) +++ head/usr.bin/localedef/localedef.1 (revision 367476) @@ -1,271 +1,280 @@ .\" Copyright (c) 1992, X/Open Company Limited All Rights Reserved .\" Portions Copyright (c) 2003, Sun Microsystems, Inc. All Rights Reserved .\" Portions Copyright 2013 DEY Storage Systems, Inc. .\" Sun Microsystems, Inc. gratefully acknowledges The Open Group for .\" permission to reproduce portions of its copyrighted documentation. .\" Original documentation from The Open Group can be obtained online at .\" http://www.opengroup.org/bookstore/. .\" The Institute of Electrical and Electronics Engineers and The Open Group, .\" have given us permission to reprint portions of their documentation. In .\" the following statement, the phrase "this text" refers to portions of the .\" system documentation. Portions of this text are reprinted and reproduced .\" in electronic form in the Sun OS Reference Manual, from IEEE Std 1003.1, .\" 2004 Edition, Standard for Information Technology -- Portable Operating .\" System Interface (POSIX), The Open Group Base Specifications Issue 6, .\" Copyright (C) 2001-2004 by the Institute of Electrical and Electronics .\" Engineers, Inc and The Open Group. In the event of any discrepancy between .\" these versions and the original IEEE and The Open Group Standard, the .\" original IEEE and The Open Group Standard is the referee document. The .\" original Standard can be obtained online at .\" http://www.opengroup.org/unix/online.html. .\" This notice shall appear on any product containing this material. .\" The contents of this file are subject to the terms of the Common .\" Development and Distribution License (the "License"). You may not use .\" this file except in compliance with the License. .\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or .\" http://www.opensolaris.org/os/licensing. See the License for the specific .\" language governing permissions and limitations under the License. .\" When distributing Covered Code, include this CDDL HEADER in each file and .\" include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, .\" add the following below this CDDL HEADER, with the fields enclosed by .\" brackets "[]" replaced with your own identifying information: .\" Portions Copyright [yyyy] [name of copyright owner] .\" .\" $FreeBSD$ .\" -.Dd October 18, 2018 +.Dd November 8, 2020 .Dt LOCALEDEF 1 .Os .Sh NAME .Nm localedef .Nd define locale environment .Sh SYNOPSIS .Nm .Op Fl bcDlUv .Op Fl f Ar charmap .Op Fl i Ar sourcefile .Op Fl u Ar codeset .Op Fl w Ar widthfile .Ar localename .Sh DESCRIPTION The .Nm utility converts source definitions for locale categories into a format usable by the functions and utilities whose operational behavior is determined by the setting of the locale environment variables; see .Xr environ 7 . .Pp The utility reads source definitions for one or more locale categories belonging to the same locale from the file named in the .Fl i option (if specified) or from standard input. .Pp Each category source definition is identified by the corresponding environment variable name and terminated by an .Sy END .Em category-name statement. The following categories are supported: .Bl -tag -width ".Ev LC_MONETARY" .It Ev LC_CTYPE Defines character classification and case conversion. .It Ev LC_COLLATE Defines collation rules. .It Ev LC_MONETARY Defines the format and symbols used in formatting of monetary information. .It Ev LC_NUMERIC Defines the decimal delimiter, grouping and grouping symbol for non-monetary numeric editing. .It Ev LC_TIME Defines the format and content of date and time information. .It Ev LC_MESSAGES Defines the format and values of affirmative and negative responses. .El .Pp The following options are supported: .Bl -tag -width indent .It Fl b Use big-endian byte order for output. .It Fl c Creates permanent output even if warning messages have been issued. .It Fl D BSD-style output. Rather than the default of creating the .Ar localename directory and creating files like .Pa LC_CTYPE , .Pa LC_COLLATE , etc.\& in that directory, the output file names have the format .Dq . and are dumped to the current directory. .It Fl f Ar charmap Specifies the pathname of a file containing a mapping of character symbols and collating element symbols to actual character encodings. This option must be specified if symbolic names (other than collating symbols defined in a .Sy collating-symbol keyword) are used. If the .Fl f option is not present, the default character mapping will be used. .It Fl i Ar sourcefile The path name of a file containing the source definitions. If this option is not present, source definitions will be read from standard input. .It Fl l Use little-endian byte order for output. .It Fl u Ar codeset Specifies the name of a codeset used as the target mapping of character symbols and collating element symbols whose encoding values are defined in terms of the ISO/IEC 10646-1:2000 standard position constant values. See .Sx NOTES . .It Fl U Ignore the presence of character symbols that have no matching character definition. This facilitates the use of a common locale definition file to be used across multiple encodings, even when some symbols are not present in a given encoding. .It Fl v Emit verbose debugging output on standard output. .It Fl w Ar widthfile The path name of the file containing character screen width definitions. If not supplied, then default screen widths will be assumed, which will generally not account for East Asian encodings requiring more than a single character cell to display, nor for combining or accent marks that occupy no additional screen width. +.It Fl V Ar version +Specifies a version string describing the version of the locale definition. +This string can be retrieved with +.Xr querylocale 3 , +and is intended to allow applications to detect locale definition changes. +Currently it is stored only for the +.Sy LC_COLLATE +category. .El .Pp The following operands are required: .Bl -tag -width ".Ar localename" .It Ar localename Identifies the locale. If the name contains one or more slash characters, .Ar localename will be interpreted as a path name where the created locale definitions will be stored. This capability may be restricted to users with appropriate privileges. (As a consequence of specifying one .Ar localename , although several categories can be processed in one execution, only categories belonging to the same locale can be processed.) .El .Sh OUTPUT .Nm creates a directory of files that represents the locale's data, unless instructed otherwise by the .Fl D ( BSD output) option. The contants of this directory should generally be copied into the appropriate subdirectory of .Pa /usr/share/locale in order the definitions to be visible to programs linked with libc. .Sh ENVIRONMENT See .Xr environ 7 for definitions of the following environment variables that affect the execution of .Nm : .Ev LANG , .Ev LC_ALL , .Ev LC_COLLATE , .Ev LC_CTYPE , .Ev LC_MESSAGES , .Ev LC_MONETARY , .Ev LC_MUMERIC , .Ev LC_TIME , and .Ev NLSPATH . .Sh EXIT STATUS The following exit values are returned: .Bl -tag -width XX .It 0 No errors occurred and the locales were successfully created. .It 1 Warnings occurred and the locales were successfully created. .It 2 The locale specification exceeded implementation limits or the coded character set or sets used were not supported by the implementation, and no locale was created. .It >3 Warnings or errors occurred and no output was created. .El .Pp If an error is detected, no permanent output will be created. .Sh SEE ALSO .Xr locale 1 , .Xr iconv_open 3 , .Xr nl_langinfo 3 , +.Xr querylocale 3 , .Xr strftime 3 , .Xr environ 7 .Sh WARNINGS If warnings occur, permanent output will be created if the .Fl c option was specified. The following conditions will cause warning messages to be issued: .Bl -bullet .It If a symbolic name not found in the .Pa charmap file is used for the descriptions of the .Sy LC_CTYPE or .Sy LC_COLLATE categories (for other categories, this will be an error condition). .It If optional keywords not supported by the implementation are present in the source. .El .Sh NOTES When the .Fl u option is used, the .Ar codeset option-argument is interpreted as a name of a codeset to which the ISO/IEC 10646-1:2000 standard position constant values are converted. Both the ISO/IEC 10646-1:2000 standard position constant values and other formats (decimal, hexadecimal, or octal) are valid as encoding values within the charmap file. The .Ar codeset can be any codeset that is supported by the .Fn iconv_open 3 function. .Pp When conflicts occur between the charmap specification of .Ar codeset , .Em mb_cur_max , or .Em mb_cur_min and the corresponding value for the codeset represented by the .Fl u option-argument .Ar codeset , the .Nm utility fails with an error. .Pp When conflicts occur between the charmap encoding values specified for symbolic names of characters of the portable character set and the character encoding values defined by the US-ASCII, the result is unspecified. .Sh HISTORY .Nm first appeared in .Fx 11 . .Pp It was written by .An Garrett D'Amore .Aq Mt garrett@nexenta.com for Illumos. .An John Marino .Aq Mt draco@marino.st provided the alternations necessary to compile cleanly on .Dx . .An Baptiste Daroussin .Aq Mt bapt@FreeBSD.org ported it to .Fx and converted it to .Xr tree 3 . Index: head/usr.bin/localedef/localedef.c =================================================================== --- head/usr.bin/localedef/localedef.c (revision 367475) +++ head/usr.bin/localedef/localedef.c (revision 367476) @@ -1,370 +1,381 @@ /*- * Copyright 2018 Nexenta Systems, Inc. * Copyright 2015 John Marino * * This source code is derived from the illumos localedef command, and * provided under BSD-style license terms by Nexenta Systems, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ /* * POSIX localedef. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include +#include "collate.h" #include "localedef.h" #include "parser.h" #ifndef TEXT_DOMAIN #define TEXT_DOMAIN "SYS_TEST" #endif static int bsd = 0; static int byteorder = 0; int verbose = 0; int undefok = 0; int warnok = 0; static char *locname = NULL; static char locpath[PATH_MAX]; +char *version = NULL; const char * category_name(void) { switch (get_category()) { case T_CHARMAP: return ("CHARMAP"); case T_WIDTH: return ("WIDTH"); case T_COLLATE: return ("LC_COLLATE"); case T_CTYPE: return ("LC_CTYPE"); case T_MESSAGES: return ("LC_MESSAGES"); case T_MONETARY: return ("LC_MONETARY"); case T_NUMERIC: return ("LC_NUMERIC"); case T_TIME: return ("LC_TIME"); default: INTERR; return (NULL); } } static char * category_file(void) { if (bsd) (void) snprintf(locpath, sizeof (locpath), "%s.%s", locname, category_name()); else (void) snprintf(locpath, sizeof (locpath), "%s/%s", locname, category_name()); return (locpath); } FILE * open_category(void) { FILE *file; if (verbose) { (void) printf("Writing category %s: ", category_name()); (void) fflush(stdout); } /* make the parent directory */ if (!bsd) (void) mkdir(dirname(category_file()), 0755); /* * note that we have to regenerate the file name, as dirname * clobbered it. */ file = fopen(category_file(), "w"); if (file == NULL) { errf("%s", strerror(errno)); return (NULL); } return (file); } void close_category(FILE *f) { if (fchmod(fileno(f), 0644) < 0) { (void) fclose(f); (void) unlink(category_file()); errf("%s", strerror(errno)); } if (fclose(f) < 0) { (void) unlink(category_file()); errf("%s", strerror(errno)); } if (verbose) { (void) fprintf(stdout, "done.\n"); (void) fflush(stdout); } } /* * This function is used when copying the category from another * locale. Note that the copy is actually performed using a hard * link for efficiency. */ void copy_category(char *src) { char srcpath[PATH_MAX]; int rv; (void) snprintf(srcpath, sizeof (srcpath), "%s/%s", src, category_name()); rv = access(srcpath, R_OK); if ((rv != 0) && (strchr(srcpath, '/') == NULL)) { /* Maybe we should try the system locale */ (void) snprintf(srcpath, sizeof (srcpath), "/usr/lib/locale/%s/%s", src, category_name()); rv = access(srcpath, R_OK); } if (rv != 0) { fprintf(stderr,"source locale data unavailable: %s", src); return; } if (verbose > 1) { (void) printf("Copying category %s from %s: ", category_name(), src); (void) fflush(stdout); } /* make the parent directory */ if (!bsd) (void) mkdir(dirname(category_file()), 0755); if (link(srcpath, category_file()) != 0) { fprintf(stderr,"unable to copy locale data: %s", strerror(errno)); return; } if (verbose > 1) { (void) printf("done.\n"); } } int putl_category(const char *s, FILE *f) { if (s && fputs(s, f) == EOF) { (void) fclose(f); (void) unlink(category_file()); errf("%s", strerror(errno)); return (EOF); } if (fputc('\n', f) == EOF) { (void) fclose(f); (void) unlink(category_file()); errf("%s", strerror(errno)); return (EOF); } return (0); } int wr_category(void *buf, size_t sz, FILE *f) { if (!sz) { return (0); } if (fwrite(buf, sz, 1, f) < 1) { (void) fclose(f); (void) unlink(category_file()); errf("%s", strerror(errno)); return (EOF); } return (0); } uint32_t htote(uint32_t arg) { if (byteorder == 4321) return (htobe32(arg)); else if (byteorder == 1234) return (htole32(arg)); else return (arg); } int yyparse(void); static void usage(void) { (void) fprintf(stderr, "Usage: localedef [options] localename\n"); (void) fprintf(stderr, "[options] are:\n"); (void) fprintf(stderr, " -D : BSD-style output\n"); (void) fprintf(stderr, " -b : big-endian output\n"); (void) fprintf(stderr, " -c : ignore warnings\n"); (void) fprintf(stderr, " -l : little-endian output\n"); (void) fprintf(stderr, " -v : verbose output\n"); (void) fprintf(stderr, " -U : ignore undefined symbols\n"); (void) fprintf(stderr, " -f charmap : use given charmap file\n"); (void) fprintf(stderr, " -u encoding : assume encoding\n"); (void) fprintf(stderr, " -w widths : use screen widths file\n"); (void) fprintf(stderr, " -i locsrc : source file for locale\n"); + (void) fprintf(stderr, " -V version : version string for locale\n"); exit(4); } int main(int argc, char **argv) { int c; char *lfname = NULL; char *cfname = NULL; char *wfname = NULL; DIR *dir; init_charmap(); init_collate(); init_ctype(); init_messages(); init_monetary(); init_numeric(); init_time(); #if YYDEBUG yydebug = 0; #endif (void) setlocale(LC_ALL, ""); - while ((c = getopt(argc, argv, "blw:i:cf:u:vUD")) != -1) { + while ((c = getopt(argc, argv, "blw:i:cf:u:vUDV:")) != -1) { switch (c) { case 'D': bsd = 1; break; case 'b': case 'l': if (byteorder != 0) usage(); byteorder = c == 'b' ? 4321 : 1234; break; case 'v': verbose++; break; case 'i': lfname = optarg; break; case 'u': set_wide_encoding(optarg); break; case 'f': cfname = optarg; break; case 'U': undefok++; break; case 'c': warnok++; break; case 'w': wfname = optarg; break; case '?': usage(); break; + case 'V': + version = optarg; + break; } } if ((argc - 1) != (optind)) { usage(); } locname = argv[argc - 1]; if (verbose) { (void) printf("Processing locale %s.\n", locname); + } + + if (version && strlen(version) >= XLOCALE_DEF_VERSION_LEN) { + (void) fprintf(stderr, "Version string too long.\n"); + exit(1); } if (cfname) { if (verbose) (void) printf("Loading charmap %s.\n", cfname); reset_scanner(cfname); (void) yyparse(); } if (wfname) { if (verbose) (void) printf("Loading widths %s.\n", wfname); reset_scanner(wfname); (void) yyparse(); } if (verbose) { (void) printf("Loading POSIX portable characters.\n"); } add_charmap_posix(); if (lfname) { reset_scanner(lfname); } else { reset_scanner(NULL); } /* make the directory for the locale if not already present */ if (!bsd) { while ((dir = opendir(locname)) == NULL) { if ((errno != ENOENT) || (mkdir(locname, 0755) < 0)) { errf("%s", strerror(errno)); } } (void) closedir(dir); (void) mkdir(dirname(category_file()), 0755); } (void) yyparse(); if (verbose) { (void) printf("All done.\n"); } return (warnings ? 1 : 0); } Index: head/usr.bin/localedef/localedef.h =================================================================== --- head/usr.bin/localedef/localedef.h (revision 367475) +++ head/usr.bin/localedef/localedef.h (revision 367476) @@ -1,177 +1,179 @@ /*- * Copyright 2018 Nexenta Systems, Inc. * Copyright 2015 John Marino * * This source code is derived from the illumos localedef command, and * provided under BSD-style license terms by Nexenta Systems, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD$ */ /* * POSIX localedef. */ /* Common header files. */ #include #include #include #include #include extern int com_char; extern int esc_char; extern int mb_cur_max; extern int mb_cur_min; extern int last_kw; extern int verbose; #if YYDEBUG extern int yydebug; #endif extern int lineno; extern int undefok; /* mostly ignore undefined symbols */ extern int warnok; extern int warnings; +extern char *version; + int yylex(void); void yyerror(const char *); _Noreturn void errf(const char *, ...) __printflike(1, 2); void warn(const char *, ...) __printflike(1, 2); int putl_category(const char *, FILE *); int wr_category(void *, size_t, FILE *); FILE *open_category(void); void close_category(FILE *); void copy_category(char *); const char *category_name(void); int get_category(void); int get_symbol(void); int get_escaped(int); int get_wide(void); void reset_scanner(const char *); void scan_to_eol(void); void add_wcs(wchar_t); void add_tok(int); wchar_t *get_wcs(void); uint32_t htote(uint32_t); /* charmap.c - CHARMAP handling */ void init_charmap(void); void add_charmap(const char *, int); void add_charmap_undefined(char *); void add_charmap_posix(void); void add_charmap_range(char *, char *, int); void add_charmap_char(const char *name, int val); int lookup_charmap(const char *, wchar_t *); int check_charmap_undefined(char *); int check_charmap(wchar_t); /* collate.o - LC_COLLATE handling */ typedef struct collelem collelem_t; typedef struct collsym collsym_t; void init_collate(void); void define_collsym(char *); void define_collelem(char *, wchar_t *); void add_order_directive(void); void add_order_bit(int); void dump_collate(void); collsym_t *lookup_collsym(char *); collelem_t *lookup_collelem(char *); void start_order_collelem(collelem_t *); void start_order_undefined(void); void start_order_symbol(char *); void start_order_char(wchar_t); void start_order_ellipsis(void); void end_order_collsym(collsym_t *); void end_order(void); void add_weight(int32_t, int); void add_weights(int32_t *); void add_weight_num(int); void add_order_collelem(collelem_t *); void add_order_collsym(collsym_t *); void add_order_char(wchar_t); void add_order_ignore(void); void add_order_ellipsis(void); void add_order_symbol(char *); void add_order_subst(void); void add_subst_char(wchar_t); void add_subst_collsym(collsym_t *); void add_subst_collelem(collelem_t *); void add_subst_symbol(char *); int32_t get_weight(int32_t, int); wchar_t * wsncpy(wchar_t *, const wchar_t *, size_t); /* ctype.c - LC_CTYPE handling */ void init_ctype(void); void add_ctype(int); void add_ctype_range(wchar_t); void add_width(int, int); void add_width_range(int, int, int); void add_caseconv(int, int); void dump_ctype(void); /* messages.c - LC_MESSAGES handling */ void init_messages(void); void add_message(wchar_t *); void dump_messages(void); /* monetary.c - LC_MONETARY handling */ void init_monetary(void); void add_monetary_str(wchar_t *); void add_monetary_num(int); void reset_monetary_group(void); void add_monetary_group(int); void dump_monetary(void); /* numeric.c - LC_NUMERIC handling */ void init_numeric(void); void add_numeric_str(wchar_t *); void reset_numeric_group(void); void add_numeric_group(int); void dump_numeric(void); /* time.c - LC_TIME handling */ void init_time(void); void add_time_str(wchar_t *); void reset_time_list(void); void add_time_list(wchar_t *); void check_time_list(void); void dump_time(void); /* wide.c - Wide character handling. */ int to_wide(wchar_t *, const char *); int to_mbs(char *, wchar_t); int to_mb(char *, wchar_t); char *to_mb_string(const wchar_t *); void set_wide_encoding(const char *); void werr(const char *, ...); const char *get_wide_encoding(void); int max_wide(void); //#define _(x) gettext(x) #define INTERR fprintf(stderr,"internal fault (%s:%d)", __FILE__, __LINE__)