Index: head/include/_ctype.h =================================================================== --- head/include/_ctype.h (revision 172618) +++ head/include/_ctype.h (revision 172619) @@ -1,157 +1,190 @@ /* * Copyright (c) 1989, 1993 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * This code is derived from software contributed to Berkeley by * Paul Borman at Krystal Technologies. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * From @(#)ctype.h 8.4 (Berkeley) 1/21/94 * From FreeBSD: src/include/ctype.h,v 1.27 2004/06/23 07:11:39 tjr Exp * $FreeBSD$ */ #ifndef __CTYPE_H_ #define __CTYPE_H_ #include #include #define _CTYPE_A 0x00000100L /* Alpha */ #define _CTYPE_C 0x00000200L /* Control */ #define _CTYPE_D 0x00000400L /* Digit */ #define _CTYPE_G 0x00000800L /* Graph */ #define _CTYPE_L 0x00001000L /* Lower */ #define _CTYPE_P 0x00002000L /* Punct */ #define _CTYPE_S 0x00004000L /* Space */ #define _CTYPE_U 0x00008000L /* Upper */ #define _CTYPE_X 0x00010000L /* X digit */ #define _CTYPE_B 0x00020000L /* Blank */ #define _CTYPE_R 0x00040000L /* Print */ #define _CTYPE_I 0x00080000L /* Ideogram */ #define _CTYPE_T 0x00100000L /* Special */ #define _CTYPE_Q 0x00200000L /* Phonogram */ #define _CTYPE_SW0 0x20000000L /* 0 width character */ #define _CTYPE_SW1 0x40000000L /* 1 width character */ #define _CTYPE_SW2 0x80000000L /* 2 width character */ #define _CTYPE_SW3 0xc0000000L /* 3 width character */ #define _CTYPE_SWM 0xe0000000L /* Mask for screen width data */ #define _CTYPE_SWS 30 /* Bits to shift to get width */ /* See comments in about __ct_rune_t. */ __BEGIN_DECLS unsigned long ___runetype(__ct_rune_t) __pure; __ct_rune_t ___tolower(__ct_rune_t) __pure; __ct_rune_t ___toupper(__ct_rune_t) __pure; __END_DECLS /* * _EXTERNALIZE_CTYPE_INLINES_ is defined in locale/nomacros.c to tell us * to generate code for extern versions of all our inline functions. */ #ifdef _EXTERNALIZE_CTYPE_INLINES_ #define _USE_CTYPE_INLINE_ #define static #define __inline #endif +extern int __mb_sb_limit; + /* * Use inline functions if we are allowed to and the compiler supports them. */ #if !defined(_DONT_USE_CTYPE_INLINE_) && \ (defined(_USE_CTYPE_INLINE_) || defined(__GNUC__) || defined(__cplusplus)) #include static __inline int __maskrune(__ct_rune_t _c, unsigned long _f) { return ((_c < 0 || _c >= _CACHED_RUNES) ? ___runetype(_c) : _CurrentRuneLocale->__runetype[_c]) & _f; } static __inline int +__sbmaskrune(__ct_rune_t _c, unsigned long _f) +{ + return (_c < 0 || _c >= __mb_sb_limit) ? 0 : + _CurrentRuneLocale->__runetype[_c] & _f; +} + +static __inline int __istype(__ct_rune_t _c, unsigned long _f) { return (!!__maskrune(_c, _f)); } static __inline int +__sbistype(__ct_rune_t _c, unsigned long _f) +{ + return (!!__sbmaskrune(_c, _f)); +} + +static __inline int __isctype(__ct_rune_t _c, unsigned long _f) { - return (_c < 0 || _c >= _CACHED_RUNES) ? 0 : + return (_c < 0 || _c >= __mb_sb_limit) ? 0 : !!(_DefaultRuneLocale.__runetype[_c] & _f); } static __inline __ct_rune_t __toupper(__ct_rune_t _c) { return (_c < 0 || _c >= _CACHED_RUNES) ? ___toupper(_c) : _CurrentRuneLocale->__mapupper[_c]; } static __inline __ct_rune_t +__sbtoupper(__ct_rune_t _c) +{ + return (_c < 0 || _c >= __mb_sb_limit) ? _c : + _CurrentRuneLocale->__mapupper[_c]; +} + +static __inline __ct_rune_t __tolower(__ct_rune_t _c) { return (_c < 0 || _c >= _CACHED_RUNES) ? ___tolower(_c) : _CurrentRuneLocale->__maplower[_c]; } +static __inline __ct_rune_t +__sbtolower(__ct_rune_t _c) +{ + return (_c < 0 || _c >= __mb_sb_limit) ? _c : + _CurrentRuneLocale->__maplower[_c]; +} + static __inline int __wcwidth(__ct_rune_t _c) { unsigned int _x; if (_c == 0) return (0); _x = (unsigned int)__maskrune(_c, _CTYPE_SWM|_CTYPE_R); if ((_x & _CTYPE_SWM) != 0) return ((_x & _CTYPE_SWM) >> _CTYPE_SWS); return ((_x & _CTYPE_R) != 0 ? 1 : -1); } #else /* not using inlines */ __BEGIN_DECLS int __maskrune(__ct_rune_t, unsigned long); +int __sbmaskrune(__ct_rune_t, unsigned long); int __istype(__ct_rune_t, unsigned long); +int __sbistype(__ct_rune_t, unsigned long); int __isctype(__ct_rune_t, unsigned long); __ct_rune_t __toupper(__ct_rune_t); +__ct_rune_t __sbtoupper(__ct_rune_t); __ct_rune_t __tolower(__ct_rune_t); +__ct_rune_t __sbtolower(__ct_rune_t); int __wcwidth(__ct_rune_t); __END_DECLS #endif /* using inlines */ #endif /* !__CTYPE_H_ */ Index: head/include/ctype.h =================================================================== --- head/include/ctype.h (revision 172618) +++ head/include/ctype.h (revision 172619) @@ -1,135 +1,135 @@ /* * Copyright (c) 1989, 1993 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * This code is derived from software contributed to Berkeley by * Paul Borman at Krystal Technologies. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)ctype.h 8.4 (Berkeley) 1/21/94 * $FreeBSD$ */ #ifndef _CTYPE_H_ #define _CTYPE_H_ #include #include #include <_ctype.h> __BEGIN_DECLS int isalnum(int); int isalpha(int); int iscntrl(int); int isdigit(int); int isgraph(int); int islower(int); int isprint(int); int ispunct(int); int isspace(int); int isupper(int); int isxdigit(int); int tolower(int); int toupper(int); #if __XSI_VISIBLE int _tolower(int); int _toupper(int); int isascii(int); int toascii(int); #endif #if __ISO_C_VISIBLE >= 1999 int isblank(int); #endif #if __BSD_VISIBLE int digittoint(int); int ishexnumber(int); int isideogram(int); int isnumber(int); int isphonogram(int); int isrune(int); int isspecial(int); #endif __END_DECLS -#define isalnum(c) __istype((c), _CTYPE_A|_CTYPE_D) -#define isalpha(c) __istype((c), _CTYPE_A) -#define iscntrl(c) __istype((c), _CTYPE_C) +#define isalnum(c) __sbistype((c), _CTYPE_A|_CTYPE_D) +#define isalpha(c) __sbistype((c), _CTYPE_A) +#define iscntrl(c) __sbistype((c), _CTYPE_C) #define isdigit(c) __isctype((c), _CTYPE_D) /* ANSI -- locale independent */ -#define isgraph(c) __istype((c), _CTYPE_G) -#define islower(c) __istype((c), _CTYPE_L) -#define isprint(c) __istype((c), _CTYPE_R) -#define ispunct(c) __istype((c), _CTYPE_P) -#define isspace(c) __istype((c), _CTYPE_S) -#define isupper(c) __istype((c), _CTYPE_U) +#define isgraph(c) __sbistype((c), _CTYPE_G) +#define islower(c) __sbistype((c), _CTYPE_L) +#define isprint(c) __sbistype((c), _CTYPE_R) +#define ispunct(c) __sbistype((c), _CTYPE_P) +#define isspace(c) __sbistype((c), _CTYPE_S) +#define isupper(c) __sbistype((c), _CTYPE_U) #define isxdigit(c) __isctype((c), _CTYPE_X) /* ANSI -- locale independent */ -#define tolower(c) __tolower(c) -#define toupper(c) __toupper(c) +#define tolower(c) __sbtolower(c) +#define toupper(c) __sbtoupper(c) #if __XSI_VISIBLE /* * POSIX.1-2001 specifies _tolower() and _toupper() to be macros equivalent to * tolower() and toupper() respectively, minus extra checking to ensure that * the argument is a lower or uppercase letter respectively. We've chosen to * implement these macros with the same error checking as tolower() and * toupper() since this doesn't violate the specification itself, only its * intent. We purposely leave _tolower() and _toupper() undocumented to * discourage their use. * * XXX isascii() and toascii() should similarly be undocumented. */ -#define _tolower(c) __tolower(c) -#define _toupper(c) __toupper(c) +#define _tolower(c) __sbtolower(c) +#define _toupper(c) __sbtoupper(c) #define isascii(c) (((c) & ~0x7F) == 0) #define toascii(c) ((c) & 0x7F) #endif #if __ISO_C_VISIBLE >= 1999 -#define isblank(c) __istype((c), _CTYPE_B) +#define isblank(c) __sbistype((c), _CTYPE_B) #endif #if __BSD_VISIBLE -#define digittoint(c) __maskrune((c), 0xFF) -#define ishexnumber(c) __istype((c), _CTYPE_X) -#define isideogram(c) __istype((c), _CTYPE_I) -#define isnumber(c) __istype((c), _CTYPE_D) -#define isphonogram(c) __istype((c), _CTYPE_Q) -#define isrune(c) __istype((c), 0xFFFFFF00L) -#define isspecial(c) __istype((c), _CTYPE_T) +#define digittoint(c) __sbmaskrune((c), 0xFF) +#define ishexnumber(c) __sbistype((c), _CTYPE_X) +#define isideogram(c) __sbistype((c), _CTYPE_I) +#define isnumber(c) __sbistype((c), _CTYPE_D) +#define isphonogram(c) __sbistype((c), _CTYPE_Q) +#define isrune(c) __sbistype((c), 0xFFFFFF00L) +#define isspecial(c) __sbistype((c), _CTYPE_T) #endif #endif /* !_CTYPE_H_ */ Index: head/include/wctype.h =================================================================== --- head/include/wctype.h (revision 172618) +++ head/include/wctype.h (revision 172619) @@ -1,118 +1,118 @@ /*- * Copyright (c)1999 Citrus Project, * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * citrus Id: wctype.h,v 1.4 2000/12/21 01:50:21 itojun Exp * $NetBSD: wctype.h,v 1.3 2000/12/22 14:16:16 itojun Exp $ * $FreeBSD$ */ #ifndef _WCTYPE_H_ #define _WCTYPE_H_ #include #include #include <_ctype.h> #ifndef _WCTRANS_T typedef int wctrans_t; #define _WCTRANS_T #endif #ifndef _WCTYPE_T typedef unsigned long wctype_t; #define _WCTYPE_T #endif #ifndef _WINT_T_DECLARED typedef __wint_t wint_t; #define _WINT_T_DECLARED #endif #ifndef WEOF #define WEOF ((wint_t)-1) #endif __BEGIN_DECLS int iswalnum(wint_t); int iswalpha(wint_t); int iswblank(wint_t); int iswcntrl(wint_t); int iswctype(wint_t, wctype_t); int iswdigit(wint_t); int iswgraph(wint_t); int iswlower(wint_t); int iswprint(wint_t); int iswpunct(wint_t); int iswspace(wint_t); int iswupper(wint_t); int iswxdigit(wint_t); wint_t towctrans(wint_t, wctrans_t); wint_t towlower(wint_t); wint_t towupper(wint_t); wctrans_t wctrans(const char *); wctype_t wctype(const char *); #if __BSD_VISIBLE wint_t iswascii(wint_t); wint_t iswhexnumber(wint_t); wint_t iswideogram(wint_t); wint_t iswnumber(wint_t); wint_t iswphonogram(wint_t); wint_t iswrune(wint_t); wint_t iswspecial(wint_t); wint_t nextwctype(wint_t, wctype_t); #endif __END_DECLS #define iswalnum(wc) __istype((wc), _CTYPE_A|_CTYPE_D) #define iswalpha(wc) __istype((wc), _CTYPE_A) #define iswblank(wc) __istype((wc), _CTYPE_B) #define iswcntrl(wc) __istype((wc), _CTYPE_C) #define iswctype(wc, charclass) __istype((wc), (charclass)) #define iswdigit(wc) __isctype((wc), _CTYPE_D) #define iswgraph(wc) __istype((wc), _CTYPE_G) #define iswlower(wc) __istype((wc), _CTYPE_L) #define iswprint(wc) __istype((wc), _CTYPE_R) #define iswpunct(wc) __istype((wc), _CTYPE_P) #define iswspace(wc) __istype((wc), _CTYPE_S) #define iswupper(wc) __istype((wc), _CTYPE_U) #define iswxdigit(wc) __isctype((wc), _CTYPE_X) #define towlower(wc) __tolower(wc) #define towupper(wc) __toupper(wc) #if __BSD_VISIBLE -#define iswascii(wc) (((wc) & ~0x7F) == 0) +#define iswascii(wc) ((wc) < 0x80) #define iswhexnumber(wc) __istype((wc), _CTYPE_X) #define iswideogram(wc) __istype((wc), _CTYPE_I) #define iswnumber(wc) __istype((wc), _CTYPE_D) #define iswphonogram(wc) __istype((wc), _CTYPE_Q) #define iswrune(wc) __istype((wc), 0xFFFFFF00L) #define iswspecial(wc) __istype((wc), _CTYPE_T) #endif #endif /* _WCTYPE_H_ */ Index: head/lib/libc/locale/Symbol.map =================================================================== --- head/lib/libc/locale/Symbol.map (revision 172618) +++ head/lib/libc/locale/Symbol.map (revision 172619) @@ -1,104 +1,109 @@ /* * $FreeBSD$ */ FBSD_1.0 { btowc; digittoint; isalnum; isalpha; isascii; isblank; iscntrl; isdigit; isgraph; ishexnumber; isideogram; islower; isnumber; isphonogram; isprint; ispunct; isrune; isspace; isspecial; isupper; isxdigit; toascii; tolower; toupper; iswalnum; iswalpha; iswascii; iswblank; iswcntrl; iswdigit; iswgraph; iswhexnumber; iswideogram; iswlower; iswnumber; iswphonogram; iswprint; iswpunct; iswrune; iswspace; iswspecial; iswupper; iswxdigit; towlower; towupper; localeconv; mblen; mbrlen; mbrtowc; mbsinit; mbsnrtowcs; mbsrtowcs; mbstowcs; mbtowc; nextwctype; nl_langinfo; __maskrune; + __sbmaskrune; __istype; + __sbistype; __isctype; __toupper; + __sbtoupper; __tolower; + __sbtolower; __wcwidth; __mb_cur_max; + __mb_sb_limit; rpmatch; ___runetype; setlocale; _DefaultRuneLocale; _CurrentRuneLocale; ___tolower; ___toupper; wcrtomb; wcsftime; wcsnrtombs; wcsrtombs; wcstod; wcstof; wcstoimax; wcstol; wcstold; wcstoll; wcstombs; wcstoul; wcstoull; wcstoumax; wctob; wctomb; towctrans; wctrans; iswctype; wctype; wcwidth; }; FBSDprivate_1.0 { _PathLocale; __detect_path_locale; __collate_load_error; __collate_range_cmp; }; Index: head/lib/libc/locale/big5.c =================================================================== --- head/lib/libc/locale/big5.c (revision 172618) +++ head/lib/libc/locale/big5.c (revision 172619) @@ -1,172 +1,175 @@ /*- * Copyright (c) 2002-2004 Tim J. Robbins. All rights reserved. * Copyright (c) 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Paul Borman at Krystal Technologies. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #if defined(LIBC_SCCS) && !defined(lint) static char sccsid[] = "@(#)big5.c 8.1 (Berkeley) 6/4/93"; #endif /* LIBC_SCCS and not lint */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include "mblocal.h" +extern int __mb_sb_limit; + static size_t _BIG5_mbrtowc(wchar_t * __restrict, const char * __restrict, size_t, mbstate_t * __restrict); static int _BIG5_mbsinit(const mbstate_t *); static size_t _BIG5_wcrtomb(char * __restrict, wchar_t, mbstate_t * __restrict); typedef struct { wchar_t ch; } _BIG5State; int _BIG5_init(_RuneLocale *rl) { __mbrtowc = _BIG5_mbrtowc; __wcrtomb = _BIG5_wcrtomb; __mbsinit = _BIG5_mbsinit; _CurrentRuneLocale = rl; __mb_cur_max = 2; + __mb_sb_limit = 128; return (0); } static int _BIG5_mbsinit(const mbstate_t *ps) { return (ps == NULL || ((const _BIG5State *)ps)->ch == 0); } static __inline int _big5_check(u_int c) { c &= 0xff; return ((c >= 0xa1 && c <= 0xfe) ? 2 : 1); } static size_t _BIG5_mbrtowc(wchar_t * __restrict pwc, const char * __restrict s, size_t n, mbstate_t * __restrict ps) { _BIG5State *bs; wchar_t wc; size_t len; bs = (_BIG5State *)ps; if ((bs->ch & ~0xFF) != 0) { /* Bad conversion state. */ errno = EINVAL; return ((size_t)-1); } if (s == NULL) { s = ""; n = 1; pwc = NULL; } if (n == 0) /* Incomplete multibyte sequence */ return ((size_t)-2); if (bs->ch != 0) { if (*s == '\0') { errno = EILSEQ; return ((size_t)-1); } wc = (bs->ch << 8) | (*s & 0xFF); if (pwc != NULL) *pwc = wc; bs->ch = 0; return (1); } len = (size_t)_big5_check(*s); wc = *s++ & 0xff; if (len == 2) { if (n < 2) { /* Incomplete multibyte sequence */ bs->ch = wc; return ((size_t)-2); } if (*s == '\0') { errno = EILSEQ; return ((size_t)-1); } wc = (wc << 8) | (*s++ & 0xff); if (pwc != NULL) *pwc = wc; return (2); } else { if (pwc != NULL) *pwc = wc; return (wc == L'\0' ? 0 : 1); } } static size_t _BIG5_wcrtomb(char * __restrict s, wchar_t wc, mbstate_t * __restrict ps) { _BIG5State *bs; bs = (_BIG5State *)ps; if (bs->ch != 0) { errno = EINVAL; return ((size_t)-1); } if (s == NULL) /* Reset to initial shift state (no-op) */ return (1); if (wc & 0x8000) { *s++ = (wc >> 8) & 0xff; *s = wc & 0xff; return (2); } *s = wc & 0xff; return (1); } Index: head/lib/libc/locale/euc.c =================================================================== --- head/lib/libc/locale/euc.c (revision 172618) +++ head/lib/libc/locale/euc.c (revision 172619) @@ -1,265 +1,268 @@ /*- * Copyright (c) 2002-2004 Tim J. Robbins. All rights reserved. * Copyright (c) 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Paul Borman at Krystal Technologies. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #if defined(LIBC_SCCS) && !defined(lint) static char sccsid[] = "@(#)euc.c 8.1 (Berkeley) 6/4/93"; #endif /* LIBC_SCCS and not lint */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include "mblocal.h" +extern int __mb_sb_limit; + static size_t _EUC_mbrtowc(wchar_t * __restrict, const char * __restrict, size_t, mbstate_t * __restrict); static int _EUC_mbsinit(const mbstate_t *); static size_t _EUC_wcrtomb(char * __restrict, wchar_t, mbstate_t * __restrict); typedef struct { int count[4]; wchar_t bits[4]; wchar_t mask; } _EucInfo; typedef struct { wchar_t ch; int set; int want; } _EucState; int _EUC_init(_RuneLocale *rl) { _EucInfo *ei; int x, new__mb_cur_max; char *v, *e; if (rl->__variable == NULL) return (EFTYPE); v = (char *)rl->__variable; while (*v == ' ' || *v == '\t') ++v; if ((ei = malloc(sizeof(_EucInfo))) == NULL) return (errno == 0 ? ENOMEM : errno); new__mb_cur_max = 0; for (x = 0; x < 4; ++x) { ei->count[x] = (int)strtol(v, &e, 0); if (v == e || !(v = e)) { free(ei); return (EFTYPE); } if (new__mb_cur_max < ei->count[x]) new__mb_cur_max = ei->count[x]; while (*v == ' ' || *v == '\t') ++v; ei->bits[x] = (int)strtol(v, &e, 0); if (v == e || !(v = e)) { free(ei); return (EFTYPE); } while (*v == ' ' || *v == '\t') ++v; } ei->mask = (int)strtol(v, &e, 0); if (v == e || !(v = e)) { free(ei); return (EFTYPE); } rl->__variable = ei; rl->__variable_len = sizeof(_EucInfo); _CurrentRuneLocale = rl; __mb_cur_max = new__mb_cur_max; __mbrtowc = _EUC_mbrtowc; __wcrtomb = _EUC_wcrtomb; __mbsinit = _EUC_mbsinit; + __mb_sb_limit = 256; return (0); } static int _EUC_mbsinit(const mbstate_t *ps) { return (ps == NULL || ((const _EucState *)ps)->want == 0); } #define CEI ((_EucInfo *)(_CurrentRuneLocale->__variable)) #define _SS2 0x008e #define _SS3 0x008f #define GR_BITS 0x80808080 /* XXX: to be fixed */ static __inline int _euc_set(u_int c) { c &= 0xff; return ((c & 0x80) ? c == _SS3 ? 3 : c == _SS2 ? 2 : 1 : 0); } static size_t _EUC_mbrtowc(wchar_t * __restrict pwc, const char * __restrict s, size_t n, mbstate_t * __restrict ps) { _EucState *es; int i, set, want; wchar_t wc; const char *os; es = (_EucState *)ps; if (es->want < 0 || es->want > MB_CUR_MAX || es->set < 0 || es->set > 3) { errno = EINVAL; return ((size_t)-1); } if (s == NULL) { s = ""; n = 1; pwc = NULL; } if (n == 0) /* Incomplete multibyte sequence */ return ((size_t)-2); os = s; if (es->want == 0) { want = CEI->count[set = _euc_set(*s)]; if (set == 2 || set == 3) { --want; if (--n == 0) { /* Incomplete multibyte sequence */ es->set = set; es->want = want; es->ch = 0; return ((size_t)-2); } ++s; if (*s == '\0') { errno = EILSEQ; return ((size_t)-1); } } wc = (unsigned char)*s++; } else { set = es->set; want = es->want; wc = es->ch; } for (i = (es->want == 0) ? 1 : 0; i < MIN(want, n); i++) { if (*s == '\0') { errno = EILSEQ; return ((size_t)-1); } wc = (wc << 8) | (unsigned char)*s++; } if (i < want) { /* Incomplete multibyte sequence */ es->set = set; es->want = want - i; es->ch = wc; return ((size_t)-2); } wc = (wc & ~CEI->mask) | CEI->bits[set]; if (pwc != NULL) *pwc = wc; es->want = 0; return (wc == L'\0' ? 0 : s - os); } static size_t _EUC_wcrtomb(char * __restrict s, wchar_t wc, mbstate_t * __restrict ps) { _EucState *es; wchar_t m, nm; int i, len; es = (_EucState *)ps; if (es->want != 0) { errno = EINVAL; return ((size_t)-1); } if (s == NULL) /* Reset to initial shift state (no-op) */ return (1); m = wc & CEI->mask; nm = wc & ~m; if (m == CEI->bits[1]) { CodeSet1: /* Codeset 1: The first byte must have 0x80 in it. */ i = len = CEI->count[1]; while (i-- > 0) *s++ = (nm >> (i << 3)) | 0x80; } else { if (m == CEI->bits[0]) i = len = CEI->count[0]; else if (m == CEI->bits[2]) { i = len = CEI->count[2]; *s++ = _SS2; --i; /* SS2 designates G2 into GR */ nm |= GR_BITS; } else if (m == CEI->bits[3]) { i = len = CEI->count[3]; *s++ = _SS3; --i; /* SS3 designates G3 into GR */ nm |= GR_BITS; } else goto CodeSet1; /* Bletch */ while (i-- > 0) *s++ = (nm >> (i << 3)) & 0xff; } return (len); } Index: head/lib/libc/locale/gb18030.c =================================================================== --- head/lib/libc/locale/gb18030.c (revision 172618) +++ head/lib/libc/locale/gb18030.c (revision 172619) @@ -1,218 +1,221 @@ /*- * Copyright (c) 2002-2004 Tim J. Robbins * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * PRC National Standard GB 18030-2000 encoding of Chinese text. * * See gb18030(5) for details. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include "mblocal.h" +extern int __mb_sb_limit; + static size_t _GB18030_mbrtowc(wchar_t * __restrict, const char * __restrict, size_t, mbstate_t * __restrict); static int _GB18030_mbsinit(const mbstate_t *); static size_t _GB18030_wcrtomb(char * __restrict, wchar_t, mbstate_t * __restrict); typedef struct { int count; u_char bytes[4]; } _GB18030State; int _GB18030_init(_RuneLocale *rl) { __mbrtowc = _GB18030_mbrtowc; __wcrtomb = _GB18030_wcrtomb; __mbsinit = _GB18030_mbsinit; _CurrentRuneLocale = rl; __mb_cur_max = 4; + __mb_sb_limit = 128; return (0); } static int _GB18030_mbsinit(const mbstate_t *ps) { return (ps == NULL || ((const _GB18030State *)ps)->count == 0); } static size_t _GB18030_mbrtowc(wchar_t * __restrict pwc, const char * __restrict s, size_t n, mbstate_t * __restrict ps) { _GB18030State *gs; wchar_t wch; int ch, len, ocount; size_t ncopy; gs = (_GB18030State *)ps; if (gs->count < 0 || gs->count > sizeof(gs->bytes)) { errno = EINVAL; return ((size_t)-1); } if (s == NULL) { s = ""; n = 1; pwc = NULL; } ncopy = MIN(MIN(n, MB_CUR_MAX), sizeof(gs->bytes) - gs->count); memcpy(gs->bytes + gs->count, s, ncopy); ocount = gs->count; gs->count += ncopy; s = (char *)gs->bytes; n = gs->count; if (n == 0) /* Incomplete multibyte sequence */ return ((size_t)-2); /* * Single byte: [00-7f] * Two byte: [81-fe][40-7e,80-fe] * Four byte: [81-fe][30-39][81-fe][30-39] */ ch = (unsigned char)*s++; if (ch <= 0x7f) { len = 1; wch = ch; } else if (ch >= 0x81 && ch <= 0xfe) { wch = ch; if (n < 2) return ((size_t)-2); ch = (unsigned char)*s++; if ((ch >= 0x40 && ch <= 0x7e) || (ch >= 0x80 && ch <= 0xfe)) { wch = (wch << 8) | ch; len = 2; } else if (ch >= 0x30 && ch <= 0x39) { /* * Strip high bit off the wide character we will * eventually output so that it is positive when * cast to wint_t on 32-bit twos-complement machines. */ wch = ((wch & 0x7f) << 8) | ch; if (n < 3) return ((size_t)-2); ch = (unsigned char)*s++; if (ch < 0x81 || ch > 0xfe) goto ilseq; wch = (wch << 8) | ch; if (n < 4) return ((size_t)-2); ch = (unsigned char)*s++; if (ch < 0x30 || ch > 0x39) goto ilseq; wch = (wch << 8) | ch; len = 4; } else goto ilseq; } else goto ilseq; if (pwc != NULL) *pwc = wch; gs->count = 0; return (wch == L'\0' ? 0 : len - ocount); ilseq: errno = EILSEQ; return ((size_t)-1); } static size_t _GB18030_wcrtomb(char * __restrict s, wchar_t wc, mbstate_t * __restrict ps) { _GB18030State *gs; size_t len; int c; gs = (_GB18030State *)ps; if (gs->count != 0) { errno = EINVAL; return ((size_t)-1); } if (s == NULL) /* Reset to initial shift state (no-op) */ return (1); if ((wc & ~0x7fffffff) != 0) goto ilseq; if (wc & 0x7f000000) { /* Replace high bit that mbrtowc() removed. */ wc |= 0x80000000; c = (wc >> 24) & 0xff; if (c < 0x81 || c > 0xfe) goto ilseq; *s++ = c; c = (wc >> 16) & 0xff; if (c < 0x30 || c > 0x39) goto ilseq; *s++ = c; c = (wc >> 8) & 0xff; if (c < 0x81 || c > 0xfe) goto ilseq; *s++ = c; c = wc & 0xff; if (c < 0x30 || c > 0x39) goto ilseq; *s++ = c; len = 4; } else if (wc & 0x00ff0000) goto ilseq; else if (wc & 0x0000ff00) { c = (wc >> 8) & 0xff; if (c < 0x81 || c > 0xfe) goto ilseq; *s++ = c; c = wc & 0xff; if (c < 0x40 || c == 0x7f || c == 0xff) goto ilseq; *s++ = c; len = 2; } else if (wc <= 0x7f) { *s++ = wc; len = 1; } else goto ilseq; return (len); ilseq: errno = EILSEQ; return ((size_t)-1); } Index: head/lib/libc/locale/gb2312.c =================================================================== --- head/lib/libc/locale/gb2312.c (revision 172618) +++ head/lib/libc/locale/gb2312.c (revision 172619) @@ -1,154 +1,157 @@ /*- * Copyright (c) 2004 Tim J. Robbins. All rights reserved. * Copyright (c) 2003 David Xu * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include "mblocal.h" +extern int __mb_sb_limit; + static size_t _GB2312_mbrtowc(wchar_t * __restrict, const char * __restrict, size_t, mbstate_t * __restrict); static int _GB2312_mbsinit(const mbstate_t *); static size_t _GB2312_wcrtomb(char * __restrict, wchar_t, mbstate_t * __restrict); typedef struct { int count; u_char bytes[2]; } _GB2312State; int _GB2312_init(_RuneLocale *rl) { _CurrentRuneLocale = rl; __mbrtowc = _GB2312_mbrtowc; __wcrtomb = _GB2312_wcrtomb; __mbsinit = _GB2312_mbsinit; __mb_cur_max = 2; + __mb_sb_limit = 128; return (0); } static int _GB2312_mbsinit(const mbstate_t *ps) { return (ps == NULL || ((const _GB2312State *)ps)->count == 0); } static __inline int _GB2312_check(const char *str, size_t n) { const u_char *s = (const u_char *)str; if (n == 0) /* Incomplete multibyte sequence */ return (-2); if (s[0] >= 0xa1 && s[0] <= 0xfe) { if (n < 2) /* Incomplete multibyte sequence */ return (-2); if (s[1] < 0xa1 || s[1] > 0xfe) /* Invalid multibyte sequence */ return (-1); return (2); } else if (s[0] & 0x80) { /* Invalid multibyte sequence */ return (-1); } return (1); } static size_t _GB2312_mbrtowc(wchar_t * __restrict pwc, const char * __restrict s, size_t n, mbstate_t * __restrict ps) { _GB2312State *gs; wchar_t wc; int i, len, ocount; size_t ncopy; gs = (_GB2312State *)ps; if (gs->count < 0 || gs->count > sizeof(gs->bytes)) { errno = EINVAL; return ((size_t)-1); } if (s == NULL) { s = ""; n = 1; pwc = NULL; } ncopy = MIN(MIN(n, MB_CUR_MAX), sizeof(gs->bytes) - gs->count); memcpy(gs->bytes + gs->count, s, ncopy); ocount = gs->count; gs->count += ncopy; s = (char *)gs->bytes; n = gs->count; if ((len = _GB2312_check(s, n)) < 0) return ((size_t)len); wc = 0; i = len; while (i-- > 0) wc = (wc << 8) | (unsigned char)*s++; if (pwc != NULL) *pwc = wc; gs->count = 0; return (wc == L'\0' ? 0 : len - ocount); } static size_t _GB2312_wcrtomb(char * __restrict s, wchar_t wc, mbstate_t * __restrict ps) { _GB2312State *gs; gs = (_GB2312State *)ps; if (gs->count != 0) { errno = EINVAL; return ((size_t)-1); } if (s == NULL) /* Reset to initial shift state (no-op) */ return (1); if (wc & 0x8000) { *s++ = (wc >> 8) & 0xff; *s = wc & 0xff; return (2); } *s = wc & 0xff; return (1); } Index: head/lib/libc/locale/gbk.c =================================================================== --- head/lib/libc/locale/gbk.c (revision 172618) +++ head/lib/libc/locale/gbk.c (revision 172619) @@ -1,165 +1,168 @@ /*- * Copyright (c) 2002-2004 Tim J. Robbins. All rights reserved. * Copyright (c) 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Paul Borman at Krystal Technologies. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include "mblocal.h" +extern int __mb_sb_limit; + static size_t _GBK_mbrtowc(wchar_t * __restrict, const char * __restrict, size_t, mbstate_t * __restrict); static int _GBK_mbsinit(const mbstate_t *); static size_t _GBK_wcrtomb(char * __restrict, wchar_t, mbstate_t * __restrict); typedef struct { wchar_t ch; } _GBKState; int _GBK_init(_RuneLocale *rl) { __mbrtowc = _GBK_mbrtowc; __wcrtomb = _GBK_wcrtomb; __mbsinit = _GBK_mbsinit; _CurrentRuneLocale = rl; __mb_cur_max = 2; + __mb_sb_limit = 128; return (0); } static int _GBK_mbsinit(const mbstate_t *ps) { return (ps == NULL || ((const _GBKState *)ps)->ch == 0); } static __inline int _gbk_check(u_int c) { c &= 0xff; return ((c >= 0x81 && c <= 0xfe) ? 2 : 1); } static size_t _GBK_mbrtowc(wchar_t * __restrict pwc, const char * __restrict s, size_t n, mbstate_t * __restrict ps) { _GBKState *gs; wchar_t wc; size_t len; gs = (_GBKState *)ps; if ((gs->ch & ~0xFF) != 0) { /* Bad conversion state. */ errno = EINVAL; return ((size_t)-1); } if (s == NULL) { s = ""; n = 1; pwc = NULL; } if (n == 0) /* Incomplete multibyte sequence */ return ((size_t)-2); if (gs->ch != 0) { if (*s == '\0') { errno = EILSEQ; return ((size_t)-1); } wc = (gs->ch << 8) | (*s & 0xFF); if (pwc != NULL) *pwc = wc; gs->ch = 0; return (1); } len = (size_t)_gbk_check(*s); wc = *s++ & 0xff; if (len == 2) { if (n < 2) { /* Incomplete multibyte sequence */ gs->ch = wc; return ((size_t)-2); } if (*s == '\0') { errno = EILSEQ; return ((size_t)-1); } wc = (wc << 8) | (*s++ & 0xff); if (pwc != NULL) *pwc = wc; return (2); } else { if (pwc != NULL) *pwc = wc; return (wc == L'\0' ? 0 : 1); } } static size_t _GBK_wcrtomb(char * __restrict s, wchar_t wc, mbstate_t * __restrict ps) { _GBKState *gs; gs = (_GBKState *)ps; if (gs->ch != 0) { errno = EINVAL; return ((size_t)-1); } if (s == NULL) /* Reset to initial shift state (no-op) */ return (1); if (wc & 0x8000) { *s++ = (wc >> 8) & 0xff; *s = wc & 0xff; return (2); } *s = wc & 0xff; return (1); } Index: head/lib/libc/locale/isctype.c =================================================================== --- head/lib/libc/locale/isctype.c (revision 172618) +++ head/lib/libc/locale/isctype.c (revision 172619) @@ -1,229 +1,229 @@ /* * Copyright (c) 1989, 1993 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * This code is derived from software contributed to Berkeley by * Paul Borman at Krystal Technologies. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #if defined(LIBC_SCCS) && !defined(lint) static char sccsid[] = "@(#)isctype.c 8.3 (Berkeley) 2/24/94"; #endif /* LIBC_SCCS and not lint */ #include __FBSDID("$FreeBSD$"); #include #undef digittoint int digittoint(c) int c; { - return (__maskrune(c, 0xFF)); + return (__sbmaskrune(c, 0xFF)); } #undef isalnum int isalnum(c) int c; { - return (__istype(c, _CTYPE_A|_CTYPE_D)); + return (__sbistype(c, _CTYPE_A|_CTYPE_D)); } #undef isalpha int isalpha(c) int c; { - return (__istype(c, _CTYPE_A)); + return (__sbistype(c, _CTYPE_A)); } #undef isascii int isascii(c) int c; { return ((c & ~0x7F) == 0); } #undef isblank int isblank(c) int c; { - return (__istype(c, _CTYPE_B)); + return (__sbistype(c, _CTYPE_B)); } #undef iscntrl int iscntrl(c) int c; { - return (__istype(c, _CTYPE_C)); + return (__sbistype(c, _CTYPE_C)); } #undef isdigit int isdigit(c) int c; { return (__isctype(c, _CTYPE_D)); } #undef isgraph int isgraph(c) int c; { - return (__istype(c, _CTYPE_G)); + return (__sbistype(c, _CTYPE_G)); } #undef ishexnumber int ishexnumber(c) int c; { - return (__istype(c, _CTYPE_X)); + return (__sbistype(c, _CTYPE_X)); } #undef isideogram int isideogram(c) int c; { - return (__istype(c, _CTYPE_I)); + return (__sbistype(c, _CTYPE_I)); } #undef islower int islower(c) int c; { - return (__istype(c, _CTYPE_L)); + return (__sbistype(c, _CTYPE_L)); } #undef isnumber int isnumber(c) int c; { - return (__istype(c, _CTYPE_D)); + return (__sbistype(c, _CTYPE_D)); } #undef isphonogram int isphonogram(c) int c; { - return (__istype(c, _CTYPE_Q)); + return (__sbistype(c, _CTYPE_Q)); } #undef isprint int isprint(c) int c; { - return (__istype(c, _CTYPE_R)); + return (__sbistype(c, _CTYPE_R)); } #undef ispunct int ispunct(c) int c; { - return (__istype(c, _CTYPE_P)); + return (__sbistype(c, _CTYPE_P)); } #undef isrune int isrune(c) int c; { - return (__istype(c, 0xFFFFFF00L)); + return (__sbistype(c, 0xFFFFFF00L)); } #undef isspace int isspace(c) int c; { - return (__istype(c, _CTYPE_S)); + return (__sbistype(c, _CTYPE_S)); } #undef isspecial int isspecial(c) int c; { - return (__istype(c, _CTYPE_T)); + return (__sbistype(c, _CTYPE_T)); } #undef isupper int isupper(c) int c; { - return (__istype(c, _CTYPE_U)); + return (__sbistype(c, _CTYPE_U)); } #undef isxdigit int isxdigit(c) int c; { return (__isctype(c, _CTYPE_X)); } #undef toascii int toascii(c) int c; { return (c & 0x7F); } #undef tolower int tolower(c) int c; { - return (__tolower(c)); + return (__sbtolower(c)); } #undef toupper int toupper(c) int c; { - return (__toupper(c)); + return (__sbtoupper(c)); } Index: head/lib/libc/locale/iswctype.c =================================================================== --- head/lib/libc/locale/iswctype.c (revision 172618) +++ head/lib/libc/locale/iswctype.c (revision 172619) @@ -1,210 +1,210 @@ /* * Copyright (c) 1989, 1993 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * This code is derived from software contributed to Berkeley by * Paul Borman at Krystal Technologies. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #undef iswalnum int iswalnum(wc) wint_t wc; { return (__istype(wc, _CTYPE_A|_CTYPE_D)); } #undef iswalpha int iswalpha(wc) wint_t wc; { return (__istype(wc, _CTYPE_A)); } #undef iswascii int iswascii(wc) wint_t wc; { - return ((wc & ~0x7F) == 0); + return (wc < 0x80); } #undef iswblank int iswblank(wc) wint_t wc; { return (__istype(wc, _CTYPE_B)); } #undef iswcntrl int iswcntrl(wc) wint_t wc; { return (__istype(wc, _CTYPE_C)); } #undef iswdigit int iswdigit(wc) wint_t wc; { return (__isctype(wc, _CTYPE_D)); } #undef iswgraph int iswgraph(wc) wint_t wc; { return (__istype(wc, _CTYPE_G)); } #undef iswhexnumber int iswhexnumber(wc) wint_t wc; { return (__istype(wc, _CTYPE_X)); } #undef iswideogram int iswideogram(wc) wint_t wc; { return (__istype(wc, _CTYPE_I)); } #undef iswlower int iswlower(wc) wint_t wc; { return (__istype(wc, _CTYPE_L)); } #undef iswnumber int iswnumber(wc) wint_t wc; { return (__istype(wc, _CTYPE_D)); } #undef iswphonogram int iswphonogram(wc) wint_t wc; { return (__istype(wc, _CTYPE_Q)); } #undef iswprint int iswprint(wc) wint_t wc; { return (__istype(wc, _CTYPE_R)); } #undef iswpunct int iswpunct(wc) wint_t wc; { return (__istype(wc, _CTYPE_P)); } #undef iswrune int iswrune(wc) wint_t wc; { return (__istype(wc, 0xFFFFFF00L)); } #undef iswspace int iswspace(wc) wint_t wc; { return (__istype(wc, _CTYPE_S)); } #undef iswspecial int iswspecial(wc) wint_t wc; { return (__istype(wc, _CTYPE_T)); } #undef iswupper int iswupper(wc) wint_t wc; { return (__istype(wc, _CTYPE_U)); } #undef iswxdigit int iswxdigit(wc) wint_t wc; { return (__isctype(wc, _CTYPE_X)); } #undef towlower wint_t towlower(wc) wint_t wc; { return (__tolower(wc)); } #undef towupper wint_t towupper(wc) wint_t wc; { return (__toupper(wc)); } Index: head/lib/libc/locale/mskanji.c =================================================================== --- head/lib/libc/locale/mskanji.c (revision 172618) +++ head/lib/libc/locale/mskanji.c (revision 172619) @@ -1,157 +1,160 @@ /* * Copyright (c) 2002-2004 Tim J. Robbins. All rights reserved. * * ja_JP.SJIS locale table for BSD4.4/rune * version 1.0 * (C) Sin'ichiro MIYATANI / Phase One, Inc * May 12, 1995 * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by Phase One, Inc. * 4. The name of Phase One, Inc. may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #if defined(LIBC_SCCS) && !defined(lint) static char sccsid[] = "@(#)mskanji.c 1.0 (Phase One) 5/5/95"; #endif /* LIBC_SCCS and not lint */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include "mblocal.h" +extern int __mb_sb_limit; + static size_t _MSKanji_mbrtowc(wchar_t * __restrict, const char * __restrict, size_t, mbstate_t * __restrict); static int _MSKanji_mbsinit(const mbstate_t *); static size_t _MSKanji_wcrtomb(char * __restrict, wchar_t, mbstate_t * __restrict); typedef struct { wchar_t ch; } _MSKanjiState; int _MSKanji_init(_RuneLocale *rl) { __mbrtowc = _MSKanji_mbrtowc; __wcrtomb = _MSKanji_wcrtomb; __mbsinit = _MSKanji_mbsinit; _CurrentRuneLocale = rl; __mb_cur_max = 2; + __mb_sb_limit = 256; return (0); } static int _MSKanji_mbsinit(const mbstate_t *ps) { return (ps == NULL || ((const _MSKanjiState *)ps)->ch == 0); } static size_t _MSKanji_mbrtowc(wchar_t * __restrict pwc, const char * __restrict s, size_t n, mbstate_t * __restrict ps) { _MSKanjiState *ms; wchar_t wc; ms = (_MSKanjiState *)ps; if ((ms->ch & ~0xFF) != 0) { /* Bad conversion state. */ errno = EINVAL; return ((size_t)-1); } if (s == NULL) { s = ""; n = 1; pwc = NULL; } if (n == 0) /* Incomplete multibyte sequence */ return ((size_t)-2); if (ms->ch != 0) { if (*s == '\0') { errno = EILSEQ; return ((size_t)-1); } wc = (ms->ch << 8) | (*s & 0xFF); if (pwc != NULL) *pwc = wc; ms->ch = 0; return (1); } wc = *s++ & 0xff; if ((wc > 0x80 && wc < 0xa0) || (wc >= 0xe0 && wc < 0xfd)) { if (n < 2) { /* Incomplete multibyte sequence */ ms->ch = wc; return ((size_t)-2); } if (*s == '\0') { errno = EILSEQ; return ((size_t)-1); } wc = (wc << 8) | (*s++ & 0xff); if (pwc != NULL) *pwc = wc; return (2); } else { if (pwc != NULL) *pwc = wc; return (wc == L'\0' ? 0 : 1); } } static size_t _MSKanji_wcrtomb(char * __restrict s, wchar_t wc, mbstate_t * __restrict ps) { _MSKanjiState *ms; int len, i; ms = (_MSKanjiState *)ps; if (ms->ch != 0) { errno = EINVAL; return ((size_t)-1); } if (s == NULL) /* Reset to initial shift state (no-op) */ return (1); len = (wc > 0x100) ? 2 : 1; for (i = len; i-- > 0; ) *s++ = wc >> (i << 3); return (len); } Index: head/lib/libc/locale/none.c =================================================================== --- head/lib/libc/locale/none.c (revision 172618) +++ head/lib/libc/locale/none.c (revision 172619) @@ -1,189 +1,194 @@ /*- * Copyright (c) 2002-2004 Tim J. Robbins. All rights reserved. * Copyright (c) 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Paul Borman at Krystal Technologies. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #if defined(LIBC_SCCS) && !defined(lint) static char sccsid[] = "@(#)none.c 8.1 (Berkeley) 6/4/93"; #endif /* LIBC_SCCS and not lint */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include "mblocal.h" static size_t _none_mbrtowc(wchar_t * __restrict, const char * __restrict, size_t, mbstate_t * __restrict); static int _none_mbsinit(const mbstate_t *); static size_t _none_mbsnrtowcs(wchar_t * __restrict dst, const char ** __restrict src, size_t nms, size_t len, mbstate_t * __restrict ps __unused); static size_t _none_wcrtomb(char * __restrict, wchar_t, mbstate_t * __restrict); static size_t _none_wcsnrtombs(char * __restrict, const wchar_t ** __restrict, size_t, size_t, mbstate_t * __restrict); +/* setup defaults */ + +int __mb_cur_max = 1; +int __mb_sb_limit = 256; /* Expected to be <= _CACHED_RUNES */ + int _none_init(_RuneLocale *rl) { __mbrtowc = _none_mbrtowc; __mbsinit = _none_mbsinit; __mbsnrtowcs = _none_mbsnrtowcs; __wcrtomb = _none_wcrtomb; __wcsnrtombs = _none_wcsnrtombs; _CurrentRuneLocale = rl; __mb_cur_max = 1; + __mb_sb_limit = 256; return(0); } static int _none_mbsinit(const mbstate_t *ps __unused) { /* * Encoding is not state dependent - we are always in the * initial state. */ return (1); } static size_t _none_mbrtowc(wchar_t * __restrict pwc, const char * __restrict s, size_t n, mbstate_t * __restrict ps __unused) { if (s == NULL) /* Reset to initial shift state (no-op) */ return (0); if (n == 0) /* Incomplete multibyte sequence */ return ((size_t)-2); if (pwc != NULL) *pwc = (unsigned char)*s; return (*s == '\0' ? 0 : 1); } static size_t _none_wcrtomb(char * __restrict s, wchar_t wc, mbstate_t * __restrict ps __unused) { if (s == NULL) /* Reset to initial shift state (no-op) */ return (1); if (wc < 0 || wc > UCHAR_MAX) { errno = EILSEQ; return ((size_t)-1); } *s = (unsigned char)wc; return (1); } static size_t _none_mbsnrtowcs(wchar_t * __restrict dst, const char ** __restrict src, size_t nms, size_t len, mbstate_t * __restrict ps __unused) { const char *s; size_t nchr; if (dst == NULL) { s = memchr(*src, '\0', nms); return (s != NULL ? s - *src : nms); } s = *src; nchr = 0; while (len-- > 0 && nms-- > 0) { if ((*dst++ = (unsigned char)*s++) == L'\0') { *src = NULL; return (nchr); } nchr++; } *src = s; return (nchr); } static size_t _none_wcsnrtombs(char * __restrict dst, const wchar_t ** __restrict src, size_t nwc, size_t len, mbstate_t * __restrict ps __unused) { const wchar_t *s; size_t nchr; if (dst == NULL) { for (s = *src; nwc > 0 && *s != L'\0'; s++, nwc--) { if (*s < 0 || *s > UCHAR_MAX) { errno = EILSEQ; return ((size_t)-1); } } return (s - *src); } s = *src; nchr = 0; while (len-- > 0 && nwc-- > 0) { if (*s < 0 || *s > UCHAR_MAX) { errno = EILSEQ; return ((size_t)-1); } if ((*dst++ = *s++) == '\0') { *src = NULL; return (nchr); } nchr++; } *src = s; return (nchr); } /* setup defaults */ -int __mb_cur_max = 1; size_t (*__mbrtowc)(wchar_t * __restrict, const char * __restrict, size_t, mbstate_t * __restrict) = _none_mbrtowc; int (*__mbsinit)(const mbstate_t *) = _none_mbsinit; size_t (*__mbsnrtowcs)(wchar_t * __restrict, const char ** __restrict, size_t, size_t, mbstate_t * __restrict) = _none_mbsnrtowcs; size_t (*__wcrtomb)(char * __restrict, wchar_t, mbstate_t * __restrict) = _none_wcrtomb; size_t (*__wcsnrtombs)(char * __restrict, const wchar_t ** __restrict, size_t, size_t, mbstate_t * __restrict) = _none_wcsnrtombs; Index: head/lib/libc/locale/setrunelocale.c =================================================================== --- head/lib/libc/locale/setrunelocale.c (revision 172618) +++ head/lib/libc/locale/setrunelocale.c (revision 172619) @@ -1,173 +1,178 @@ /*- * Copyright (c) 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Paul Borman at Krystal Technologies. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include "ldpart.h" #include "mblocal.h" #include "setlocale.h" +extern int __mb_sb_limit; + extern _RuneLocale *_Read_RuneMagi(FILE *); static int __setrunelocale(const char *); static int __setrunelocale(const char *encoding) { FILE *fp; char name[PATH_MAX]; _RuneLocale *rl; int saverr, ret; static char ctype_encoding[ENCODING_LEN + 1]; static _RuneLocale *CachedRuneLocale; static int Cached__mb_cur_max; + static int Cached__mb_sb_limit; static size_t (*Cached__mbrtowc)(wchar_t * __restrict, const char * __restrict, size_t, mbstate_t * __restrict); static size_t (*Cached__wcrtomb)(char * __restrict, wchar_t, mbstate_t * __restrict); static int (*Cached__mbsinit)(const mbstate_t *); static size_t (*Cached__mbsnrtowcs)(wchar_t * __restrict, const char ** __restrict, size_t, size_t, mbstate_t * __restrict); static size_t (*Cached__wcsnrtombs)(char * __restrict, const wchar_t ** __restrict, size_t, size_t, mbstate_t * __restrict); /* * The "C" and "POSIX" locale are always here. */ if (strcmp(encoding, "C") == 0 || strcmp(encoding, "POSIX") == 0) { _none_init(&_DefaultRuneLocale); return (0); } /* * If the locale name is the same as our cache, use the cache. */ if (CachedRuneLocale != NULL && strcmp(encoding, ctype_encoding) == 0) { _CurrentRuneLocale = CachedRuneLocale; __mb_cur_max = Cached__mb_cur_max; + __mb_sb_limit = Cached__mb_sb_limit; __mbrtowc = Cached__mbrtowc; __mbsinit = Cached__mbsinit; __mbsnrtowcs = Cached__mbsnrtowcs; __wcrtomb = Cached__wcrtomb; __wcsnrtombs = Cached__wcsnrtombs; return (0); } /* * Slurp the locale file into the cache. */ /* Range checking not needed, encoding length already checked before */ (void) strcpy(name, _PathLocale); (void) strcat(name, "/"); (void) strcat(name, encoding); (void) strcat(name, "/LC_CTYPE"); if ((fp = fopen(name, "r")) == NULL) return (errno == 0 ? ENOENT : errno); if ((rl = _Read_RuneMagi(fp)) == NULL) { saverr = (errno == 0 ? EFTYPE : errno); (void)fclose(fp); return (saverr); } (void)fclose(fp); __mbrtowc = NULL; __mbsinit = NULL; __mbsnrtowcs = __mbsnrtowcs_std; __wcrtomb = NULL; __wcsnrtombs = __wcsnrtombs_std; rl->__sputrune = NULL; rl->__sgetrune = NULL; if (strcmp(rl->__encoding, "NONE") == 0) ret = _none_init(rl); else if (strcmp(rl->__encoding, "UTF-8") == 0) ret = _UTF8_init(rl); else if (strcmp(rl->__encoding, "EUC") == 0) ret = _EUC_init(rl); else if (strcmp(rl->__encoding, "GB18030") == 0) ret = _GB18030_init(rl); else if (strcmp(rl->__encoding, "GB2312") == 0) ret = _GB2312_init(rl); else if (strcmp(rl->__encoding, "GBK") == 0) ret = _GBK_init(rl); else if (strcmp(rl->__encoding, "BIG5") == 0) ret = _BIG5_init(rl); else if (strcmp(rl->__encoding, "MSKanji") == 0) ret = _MSKanji_init(rl); else ret = EFTYPE; if (ret == 0) { if (CachedRuneLocale != NULL) { /* See euc.c */ if (strcmp(CachedRuneLocale->__encoding, "EUC") == 0) free(CachedRuneLocale->__variable); free(CachedRuneLocale); } CachedRuneLocale = _CurrentRuneLocale; Cached__mb_cur_max = __mb_cur_max; + Cached__mb_sb_limit = __mb_sb_limit; Cached__mbrtowc = __mbrtowc; Cached__mbsinit = __mbsinit; Cached__mbsnrtowcs = __mbsnrtowcs; Cached__wcrtomb = __wcrtomb; Cached__wcsnrtombs = __wcsnrtombs; (void)strcpy(ctype_encoding, encoding); } else free(rl); return (ret); } int __wrap_setrunelocale(const char *locale) { int ret = __setrunelocale(locale); if (ret != 0) { errno = ret; return (_LDP_ERROR); } return (_LDP_LOADED); } Index: head/lib/libc/locale/utf8.c =================================================================== --- head/lib/libc/locale/utf8.c (revision 172618) +++ head/lib/libc/locale/utf8.c (revision 172619) @@ -1,421 +1,424 @@ /*- * Copyright (c) 2002-2004 Tim J. Robbins * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include "mblocal.h" +extern int __mb_sb_limit; + static size_t _UTF8_mbrtowc(wchar_t * __restrict, const char * __restrict, size_t, mbstate_t * __restrict); static int _UTF8_mbsinit(const mbstate_t *); static size_t _UTF8_mbsnrtowcs(wchar_t * __restrict, const char ** __restrict, size_t, size_t, mbstate_t * __restrict); static size_t _UTF8_wcrtomb(char * __restrict, wchar_t, mbstate_t * __restrict); static size_t _UTF8_wcsnrtombs(char * __restrict, const wchar_t ** __restrict, size_t, size_t, mbstate_t * __restrict); typedef struct { wchar_t ch; int want; wchar_t lbound; } _UTF8State; int _UTF8_init(_RuneLocale *rl) { __mbrtowc = _UTF8_mbrtowc; __wcrtomb = _UTF8_wcrtomb; __mbsinit = _UTF8_mbsinit; __mbsnrtowcs = _UTF8_mbsnrtowcs; __wcsnrtombs = _UTF8_wcsnrtombs; _CurrentRuneLocale = rl; __mb_cur_max = 6; + __mb_sb_limit = 128; return (0); } static int _UTF8_mbsinit(const mbstate_t *ps) { return (ps == NULL || ((const _UTF8State *)ps)->want == 0); } static size_t _UTF8_mbrtowc(wchar_t * __restrict pwc, const char * __restrict s, size_t n, mbstate_t * __restrict ps) { _UTF8State *us; int ch, i, mask, want; wchar_t lbound, wch; us = (_UTF8State *)ps; if (us->want < 0 || us->want > 6) { errno = EINVAL; return ((size_t)-1); } if (s == NULL) { s = ""; n = 1; pwc = NULL; } if (n == 0) /* Incomplete multibyte sequence */ return ((size_t)-2); if (us->want == 0 && ((ch = (unsigned char)*s) & ~0x7f) == 0) { /* Fast path for plain ASCII characters. */ if (pwc != NULL) *pwc = ch; return (ch != '\0' ? 1 : 0); } if (us->want == 0) { /* * Determine the number of octets that make up this character * from the first octet, and a mask that extracts the * interesting bits of the first octet. We already know * the character is at least two bytes long. * * We also specify a lower bound for the character code to * detect redundant, non-"shortest form" encodings. For * example, the sequence C0 80 is _not_ a legal representation * of the null character. This enforces a 1-to-1 mapping * between character codes and their multibyte representations. */ ch = (unsigned char)*s; if ((ch & 0x80) == 0) { mask = 0x7f; want = 1; lbound = 0; } else if ((ch & 0xe0) == 0xc0) { mask = 0x1f; want = 2; lbound = 0x80; } else if ((ch & 0xf0) == 0xe0) { mask = 0x0f; want = 3; lbound = 0x800; } else if ((ch & 0xf8) == 0xf0) { mask = 0x07; want = 4; lbound = 0x10000; } else if ((ch & 0xfc) == 0xf8) { mask = 0x03; want = 5; lbound = 0x200000; } else if ((ch & 0xfe) == 0xfc) { mask = 0x01; want = 6; lbound = 0x4000000; } else { /* * Malformed input; input is not UTF-8. */ errno = EILSEQ; return ((size_t)-1); } } else { want = us->want; lbound = us->lbound; } /* * Decode the octet sequence representing the character in chunks * of 6 bits, most significant first. */ if (us->want == 0) wch = (unsigned char)*s++ & mask; else wch = us->ch; for (i = (us->want == 0) ? 1 : 0; i < MIN(want, n); i++) { if ((*s & 0xc0) != 0x80) { /* * Malformed input; bad characters in the middle * of a character. */ errno = EILSEQ; return ((size_t)-1); } wch <<= 6; wch |= *s++ & 0x3f; } if (i < want) { /* Incomplete multibyte sequence. */ us->want = want - i; us->lbound = lbound; us->ch = wch; return ((size_t)-2); } if (wch < lbound) { /* * Malformed input; redundant encoding. */ errno = EILSEQ; return ((size_t)-1); } if (pwc != NULL) *pwc = wch; us->want = 0; return (wch == L'\0' ? 0 : want); } static size_t _UTF8_mbsnrtowcs(wchar_t * __restrict dst, const char ** __restrict src, size_t nms, size_t len, mbstate_t * __restrict ps) { _UTF8State *us; const char *s; size_t nchr; wchar_t wc; size_t nb; us = (_UTF8State *)ps; s = *src; nchr = 0; if (dst == NULL) { /* * The fast path in the loop below is not safe if an ASCII * character appears as anything but the first byte of a * multibyte sequence. Check now to avoid doing it in the loop. */ if (nms > 0 && us->want > 0 && (signed char)*s > 0) { errno = EILSEQ; return ((size_t)-1); } for (;;) { if (nms > 0 && (signed char)*s > 0) /* * Fast path for plain ASCII characters * excluding NUL. */ nb = 1; else if ((nb = _UTF8_mbrtowc(&wc, s, nms, ps)) == (size_t)-1) /* Invalid sequence - mbrtowc() sets errno. */ return ((size_t)-1); else if (nb == 0 || nb == (size_t)-2) return (nchr); s += nb; nms -= nb; nchr++; } /*NOTREACHED*/ } /* * The fast path in the loop below is not safe if an ASCII * character appears as anything but the first byte of a * multibyte sequence. Check now to avoid doing it in the loop. */ if (nms > 0 && len > 0 && us->want > 0 && (signed char)*s > 0) { errno = EILSEQ; return ((size_t)-1); } while (len-- > 0) { if (nms > 0 && (signed char)*s > 0) { /* * Fast path for plain ASCII characters * excluding NUL. */ *dst = (wchar_t)*s; nb = 1; } else if ((nb = _UTF8_mbrtowc(dst, s, nms, ps)) == (size_t)-1) { *src = s; return ((size_t)-1); } else if (nb == (size_t)-2) { *src = s + nms; return (nchr); } else if (nb == 0) { *src = NULL; return (nchr); } s += nb; nms -= nb; nchr++; dst++; } *src = s; return (nchr); } static size_t _UTF8_wcrtomb(char * __restrict s, wchar_t wc, mbstate_t * __restrict ps) { _UTF8State *us; unsigned char lead; int i, len; us = (_UTF8State *)ps; if (us->want != 0) { errno = EINVAL; return ((size_t)-1); } if (s == NULL) /* Reset to initial shift state (no-op) */ return (1); if ((wc & ~0x7f) == 0) { /* Fast path for plain ASCII characters. */ *s = (char)wc; return (1); } /* * Determine the number of octets needed to represent this character. * We always output the shortest sequence possible. Also specify the * first few bits of the first octet, which contains the information * about the sequence length. */ if ((wc & ~0x7f) == 0) { lead = 0; len = 1; } else if ((wc & ~0x7ff) == 0) { lead = 0xc0; len = 2; } else if ((wc & ~0xffff) == 0) { lead = 0xe0; len = 3; } else if ((wc & ~0x1fffff) == 0) { lead = 0xf0; len = 4; } else if ((wc & ~0x3ffffff) == 0) { lead = 0xf8; len = 5; } else if ((wc & ~0x7fffffff) == 0) { lead = 0xfc; len = 6; } else { errno = EILSEQ; return ((size_t)-1); } /* * Output the octets representing the character in chunks * of 6 bits, least significant last. The first octet is * a special case because it contains the sequence length * information. */ for (i = len - 1; i > 0; i--) { s[i] = (wc & 0x3f) | 0x80; wc >>= 6; } *s = (wc & 0xff) | lead; return (len); } static size_t _UTF8_wcsnrtombs(char * __restrict dst, const wchar_t ** __restrict src, size_t nwc, size_t len, mbstate_t * __restrict ps) { _UTF8State *us; char buf[MB_LEN_MAX]; const wchar_t *s; size_t nbytes; size_t nb; us = (_UTF8State *)ps; if (us->want != 0) { errno = EINVAL; return ((size_t)-1); } s = *src; nbytes = 0; if (dst == NULL) { while (nwc-- > 0) { if (0 <= *s && *s < 0x80) /* Fast path for plain ASCII characters. */ nb = 1; else if ((nb = _UTF8_wcrtomb(buf, *s, ps)) == (size_t)-1) /* Invalid character - wcrtomb() sets errno. */ return ((size_t)-1); if (*s == L'\0') return (nbytes + nb - 1); s++; nbytes += nb; } return (nbytes); } while (len > 0 && nwc-- > 0) { if (0 <= *s && *s < 0x80) { /* Fast path for plain ASCII characters. */ nb = 1; *dst = *s; } else if (len > (size_t)MB_CUR_MAX) { /* Enough space to translate in-place. */ if ((nb = _UTF8_wcrtomb(dst, *s, ps)) == (size_t)-1) { *src = s; return ((size_t)-1); } } else { /* * May not be enough space; use temp. buffer. */ if ((nb = _UTF8_wcrtomb(buf, *s, ps)) == (size_t)-1) { *src = s; return ((size_t)-1); } if (nb > (int)len) /* MB sequence for character won't fit. */ break; memcpy(dst, buf, nb); } if (*s == L'\0') { *src = NULL; return (nbytes + nb - 1); } s++; dst += nb; len -= nb; nbytes += nb; } *src = s; return (nbytes); }