diff --git a/lib/libc/stdio/printf.3 b/lib/libc/stdio/printf.3 index 110851e2a421..a7b0c7399e2e 100644 --- a/lib/libc/stdio/printf.3 +++ b/lib/libc/stdio/printf.3 @@ -1,936 +1,938 @@ .\" Copyright (c) 1990, 1991, 1993 .\" The Regents of the University of California. All rights reserved. .\" .\" This code is derived from software contributed to Berkeley by .\" Chris Torek and the American National Standards Committee X3, .\" on Information Processing Systems. .\" .\" Redistribution and use in source and binary forms, with or without .\" modification, are permitted provided that the following conditions .\" are met: .\" 1. Redistributions of source code must retain the above copyright .\" notice, this list of conditions and the following disclaimer. .\" 2. Redistributions in binary form must reproduce the above copyright .\" notice, this list of conditions and the following disclaimer in the .\" documentation and/or other materials provided with the distribution. .\" 3. Neither the name of the University nor the names of its contributors .\" may be used to endorse or promote products derived from this software .\" without specific prior written permission. .\" .\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE .\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" .\" @(#)printf.3 8.1 (Berkeley) 6/4/93 .\" -.Dd August 21, 2023 +.Dd September 5, 2023 .Dt PRINTF 3 .Os .Sh NAME .Nm printf , .Nm fprintf , .Nm sprintf , .Nm snprintf , .Nm asprintf , .Nm dprintf , .Nm vprintf , .Nm vfprintf , .Nm vsprintf , .Nm vsnprintf , .Nm vasprintf , .Nm vdprintf .Nd formatted output conversion .Sh LIBRARY .Lb libc .Sh SYNOPSIS .In stdio.h .Ft int .Fn printf "const char * restrict format" ... .Ft int .Fn fprintf "FILE * restrict stream" "const char * restrict format" ... .Ft int .Fn sprintf "char * restrict str" "const char * restrict format" ... .Ft int .Fn snprintf "char * restrict str" "size_t size" "const char * restrict format" ... .Ft int .Fn asprintf "char **ret" "const char *format" ... .Ft int .Fn dprintf "int fd" "const char * restrict format" ... .In stdarg.h .Ft int .Fn vprintf "const char * restrict format" "va_list ap" .Ft int .Fn vfprintf "FILE * restrict stream" "const char * restrict format" "va_list ap" .Ft int .Fn vsprintf "char * restrict str" "const char * restrict format" "va_list ap" .Ft int .Fn vsnprintf "char * restrict str" "size_t size" "const char * restrict format" "va_list ap" .Ft int .Fn vasprintf "char **ret" "const char *format" "va_list ap" .Ft int .Fn vdprintf "int fd" "const char * restrict format" "va_list ap" .Sh DESCRIPTION The .Fn printf family of functions produces output according to a .Fa format as described below. The .Fn printf and .Fn vprintf functions write output to .Dv stdout , the standard output stream; .Fn fprintf and .Fn vfprintf write output to the given output .Fa stream ; .Fn dprintf and .Fn vdprintf write output to the given file descriptor; .Fn sprintf , .Fn snprintf , .Fn vsprintf , and .Fn vsnprintf write to the character string .Fa str ; and .Fn asprintf and .Fn vasprintf dynamically allocate a new string with .Xr malloc 3 . .Pp These functions write the output under the control of a .Fa format string that specifies how subsequent arguments (or arguments accessed via the variable-length argument facilities of .Xr stdarg 3 ) are converted for output. .Pp The .Fn asprintf and .Fn vasprintf functions set .Fa *ret to be a pointer to a buffer sufficiently large to hold the formatted string. This pointer should be passed to .Xr free 3 to release the allocated storage when it is no longer needed. If sufficient space cannot be allocated, .Fn asprintf and .Fn vasprintf will return \-1 and set .Fa ret to be a .Dv NULL pointer. .Pp The .Fn snprintf and .Fn vsnprintf functions will write at most .Fa size Ns \-1 of the characters printed into the output string (the .Fa size Ns 'th character then gets the terminating .Ql \e0 ) ; if the return value is greater than or equal to the .Fa size argument, the string was too short and some of the printed characters were discarded. The output is always null-terminated, unless .Fa size is 0. .Pp The .Fn sprintf and .Fn vsprintf functions effectively assume a .Fa size of .Dv INT_MAX + 1. .Pp The format string is composed of zero or more directives: ordinary .\" multibyte characters (not .Cm % ) , which are copied unchanged to the output stream; and conversion specifications, each of which results in fetching zero or more subsequent arguments. Each conversion specification is introduced by the .Cm % character. The arguments must correspond properly (after type promotion) with the conversion specifier. After the .Cm % , the following appear in sequence: .Bl -bullet .It An optional field, consisting of a decimal digit string followed by a .Cm $ , specifying the next argument to access. If this field is not provided, the argument following the last argument accessed will be used. Arguments are numbered starting at .Cm 1 . If unaccessed arguments in the format string are interspersed with ones that are accessed the results will be indeterminate. .It Zero or more of the following flags: .Bl -tag -width ".So \ Sc (space)" .It Sq Cm # The value should be converted to an .Dq alternate form . For .Cm c , d , i , n , p , s , and .Cm u conversions, this option has no effect. For .Cm b and .Cm B conversions, a non-zero result has the string .Ql 0b (or .Ql 0B for .Cm B conversions) prepended to it. For .Cm o conversions, the precision of the number is increased to force the first character of the output string to a zero. For .Cm x and .Cm X conversions, a non-zero result has the string .Ql 0x (or .Ql 0X for .Cm X conversions) prepended to it. For .Cm a , A , e , E , f , F , g , and .Cm G conversions, the result will always contain a decimal point, even if no digits follow it (normally, a decimal point appears in the results of those conversions only if a digit follows). For .Cm g and .Cm G conversions, trailing zeros are not removed from the result as they would otherwise be. .It So Cm 0 Sc (zero) Zero padding. For all conversions except .Cm n , the converted value is padded on the left with zeros rather than blanks. If a precision is given with a numeric conversion .Cm ( b , B , d , i , o , u , i , x , and .Cm X ) , the .Cm 0 flag is ignored. .It Sq Cm \- A negative field width flag; the converted value is to be left adjusted on the field boundary. Except for .Cm n conversions, the converted value is padded on the right with blanks, rather than on the left with blanks or zeros. A .Cm \- overrides a .Cm 0 if both are given. .It So "\ " Sc (space) A blank should be left before a positive number produced by a signed conversion .Cm ( a , A , d , e , E , f , F , g , G , or .Cm i ) . .It Sq Cm + A sign must always be placed before a number produced by a signed conversion. A .Cm + overrides a space if both are used. .It So "'" Sc (apostrophe) Decimal conversions .Cm ( d , u , or .Cm i ) or the integral portion of a floating point conversion .Cm ( f or .Cm F ) should be grouped and separated by thousands using the non-monetary separator returned by .Xr localeconv 3 . .El .It An optional decimal digit string specifying a minimum field width. If the converted value has fewer characters than the field width, it will be padded with spaces on the left (or right, if the left-adjustment flag has been given) to fill out the field width. .It An optional precision, in the form of a period .Cm \&. followed by an optional digit string. If the digit string is omitted, the precision is taken as zero. This gives the minimum number of digits to appear for .Cm b , B , d , i , o , u , x , and .Cm X conversions, the number of digits to appear after the decimal-point for .Cm a , A , e , E , f , and .Cm F conversions, the maximum number of significant digits for .Cm g and .Cm G conversions, or the maximum number of characters to be printed from a string for .Cm s conversions. .It An optional length modifier, that specifies the size of the argument. The following length modifiers are valid for the .Cm b , B , d , i , n , o , u , x , or .Cm X conversion: .Bl -column ".Cm q Em (deprecated)" ".Vt signed char" ".Vt unsigned long long" ".Vt long long *" .It Sy Modifier Ta Cm d , i Ta Cm b , B , o , u , x , X Ta Cm n .It Cm hh Ta Vt "signed char" Ta Vt "unsigned char" Ta Vt "signed char *" .It Cm h Ta Vt short Ta Vt "unsigned short" Ta Vt "short *" .It Cm l No (ell) Ta Vt long Ta Vt "unsigned long" Ta Vt "long *" .It Cm ll No (ell ell) Ta Vt "long long" Ta Vt "unsigned long long" Ta Vt "long long *" .It Cm j Ta Vt intmax_t Ta Vt uintmax_t Ta Vt "intmax_t *" .It Cm t Ta Vt ptrdiff_t Ta (see note) Ta Vt "ptrdiff_t *" +.It Cm w Ns Ar N Ta Vt intN_t Ta Vt uintN_t Ta Vt "intN_t *" +.It Cm wf Ns Ar N Ta Vt int_fastN_t Ta Vt uint_fastN_t Ta Vt "int_fastN_t *" .It Cm z Ta (see note) Ta Vt size_t Ta (see note) .It Cm q Em (deprecated) Ta Vt quad_t Ta Vt u_quad_t Ta Vt "quad_t *" .El .Pp Note: the .Cm t modifier, when applied to a .Cm b , B , o , u , x , or .Cm X conversion, indicates that the argument is of an unsigned type equivalent in size to a .Vt ptrdiff_t . The .Cm z modifier, when applied to a .Cm d or .Cm i conversion, indicates that the argument is of a signed type equivalent in size to a .Vt size_t . Similarly, when applied to an .Cm n conversion, it indicates that the argument is a pointer to a signed type equivalent in size to a .Vt size_t . .Pp The following length modifier is valid for the .Cm a , A , e , E , f , F , g , or .Cm G conversion: .Bl -column ".Sy Modifier" ".Cm a , A , e , E , f , F , g , G" .It Sy Modifier Ta Cm a , A , e , E , f , F , g , G .It Cm l No (ell) Ta Vt double (ignored, same behavior as without it) .It Cm L Ta Vt "long double" .El .Pp The following length modifier is valid for the .Cm c or .Cm s conversion: .Bl -column ".Sy Modifier" ".Vt wint_t" ".Vt wchar_t *" .It Sy Modifier Ta Cm c Ta Cm s .It Cm l No (ell) Ta Vt wint_t Ta Vt "wchar_t *" .El .It A character that specifies the type of conversion to be applied. .El .Pp A field width or precision, or both, may be indicated by an asterisk .Ql * or an asterisk followed by one or more decimal digits and a .Ql $ instead of a digit string. In this case, an .Vt int argument supplies the field width or precision. A negative field width is treated as a left adjustment flag followed by a positive field width; a negative precision is treated as though it were missing. If a single format directive mixes positional .Pq Li nn$ and non-positional arguments, the results are undefined. .Pp The conversion specifiers and their meanings are: .Bl -tag -width ".Cm bBdiouxX" .It Cm bBdiouxX The .Vt int (or appropriate variant) argument is converted to unsigned binary .Cm ( b and .Cm B ) , signed decimal .Cm ( d and .Cm i ) , unsigned octal .Pq Cm o , unsigned decimal .Pq Cm u , or unsigned hexadecimal .Cm ( x and .Cm X ) notation. The letters .Dq Li abcdef are used for .Cm x conversions; the letters .Dq Li ABCDEF are used for .Cm X conversions. The precision, if any, gives the minimum number of digits that must appear; if the converted value requires fewer digits, it is padded on the left with zeros. .It Cm DOU The .Vt "long int" argument is converted to signed decimal, unsigned octal, or unsigned decimal, as if the format had been .Cm ld , lo , or .Cm lu respectively. These conversion characters are deprecated, and will eventually disappear. .It Cm eE The .Vt double argument is rounded and converted in the style .Sm off .Oo \- Oc Ar d Li \&. Ar ddd Li e \(+- Ar dd .Sm on where there is one digit before the decimal-point character and the number of digits after it is equal to the precision; if the precision is missing, it is taken as 6; if the precision is zero, no decimal-point character appears. An .Cm E conversion uses the letter .Ql E (rather than .Ql e ) to introduce the exponent. The exponent always contains at least two digits; if the value is zero, the exponent is 00. .Pp For .Cm a , A , e , E , f , F , g , and .Cm G conversions, positive and negative infinity are represented as .Li inf and .Li -inf respectively when using the lowercase conversion character, and .Li INF and .Li -INF respectively when using the uppercase conversion character. Similarly, NaN is represented as .Li nan when using the lowercase conversion, and .Li NAN when using the uppercase conversion. .It Cm fF The .Vt double argument is rounded and converted to decimal notation in the style .Sm off .Oo \- Oc Ar ddd Li \&. Ar ddd , .Sm on where the number of digits after the decimal-point character is equal to the precision specification. If the precision is missing, it is taken as 6; if the precision is explicitly zero, no decimal-point character appears. If a decimal point appears, at least one digit appears before it. .It Cm gG The .Vt double argument is converted in style .Cm f or .Cm e (or .Cm F or .Cm E for .Cm G conversions). The precision specifies the number of significant digits. If the precision is missing, 6 digits are given; if the precision is zero, it is treated as 1. Style .Cm e is used if the exponent from its conversion is less than \-4 or greater than or equal to the precision. Trailing zeros are removed from the fractional part of the result; a decimal point appears only if it is followed by at least one digit. .It Cm aA The .Vt double argument is rounded and converted to hexadecimal notation in the style .Sm off .Oo \- Oc Li 0x Ar h Li \&. Ar hhhp Oo \(+- Oc Ar d , .Sm on where the number of digits after the hexadecimal-point character is equal to the precision specification. If the precision is missing, it is taken as enough to represent the floating-point number exactly, and no rounding occurs. If the precision is zero, no hexadecimal-point character appears. The .Cm p is a literal character .Ql p , and the exponent consists of a positive or negative sign followed by a decimal number representing an exponent of 2. The .Cm A conversion uses the prefix .Dq Li 0X (rather than .Dq Li 0x ) , the letters .Dq Li ABCDEF (rather than .Dq Li abcdef ) to represent the hex digits, and the letter .Ql P (rather than .Ql p ) to separate the mantissa and exponent. .Pp Note that there may be multiple valid ways to represent floating-point numbers in this hexadecimal format. For example, .Li 0x1.92p+1 , 0x3.24p+0 , 0x6.48p-1 , and .Li 0xc.9p-2 are all equivalent. .Fx 8.0 and later always prints finite non-zero numbers using .Ql 1 as the digit before the hexadecimal point. Zeroes are always represented with a mantissa of 0 (preceded by a .Ql - if appropriate) and an exponent of .Li +0 . .It Cm C Treated as .Cm c with the .Cm l (ell) modifier. .It Cm c The .Vt int argument is converted to an .Vt "unsigned char" , and the resulting character is written. .Pp If the .Cm l (ell) modifier is used, the .Vt wint_t argument shall be converted to a .Vt wchar_t , and the (potentially multi-byte) sequence representing the single wide character is written, including any shift sequences. If a shift sequence is used, the shift state is also restored to the original state after the character. .It Cm S Treated as .Cm s with the .Cm l (ell) modifier. .It Cm s The .Vt "char *" argument is expected to be a pointer to an array of character type (pointer to a string). Characters from the array are written up to (but not including) a terminating .Dv NUL character; if a precision is specified, no more than the number specified are written. If a precision is given, no null character need be present; if the precision is not specified, or is greater than the size of the array, the array must contain a terminating .Dv NUL character. .Pp If the .Cm l (ell) modifier is used, the .Vt "wchar_t *" argument is expected to be a pointer to an array of wide characters (pointer to a wide string). For each wide character in the string, the (potentially multi-byte) sequence representing the wide character is written, including any shift sequences. If any shift sequence is used, the shift state is also restored to the original state after the string. Wide characters from the array are written up to (but not including) a terminating wide .Dv NUL character; if a precision is specified, no more than the number of bytes specified are written (including shift sequences). Partial characters are never written. If a precision is given, no null character need be present; if the precision is not specified, or is greater than the number of bytes required to render the multibyte representation of the string, the array must contain a terminating wide .Dv NUL character. .It Cm p The .Vt "void *" pointer argument is printed in hexadecimal (as if by .Ql %#x or .Ql %#lx ) . .It Cm n The number of characters written so far is stored into the integer indicated by the .Vt "int *" (or variant) pointer argument. No argument is converted. .It Cm m Print the string representation of the error code stored in the .Dv errno variable at the beginning of the call, as returned by .Xr strerror 3 . No argument is taken. .It Cm % A .Ql % is written. No argument is converted. The complete conversion specification is .Ql %% . .El .Pp The decimal point character is defined in the program's locale (category .Dv LC_NUMERIC ) . .Pp In no case does a non-existent or small field width cause truncation of a numeric field; if the result of a conversion is wider than the field width, the field is expanded to contain the conversion result. .Sh RETURN VALUES These functions return the number of characters printed (not including the trailing .Ql \e0 used to end output to strings), except for .Fn snprintf and .Fn vsnprintf , which return the number of characters that would have been printed if the .Fa size were unlimited (again, not including the final .Ql \e0 ) . These functions return a negative value if an error occurs. .Sh EXAMPLES To print a date and time in the form .Dq Li "Sunday, July 3, 10:02" , where .Fa weekday and .Fa month are pointers to strings: .Bd -literal -offset indent #include fprintf(stdout, "%s, %s %d, %.2d:%.2d\en", weekday, month, day, hour, min); .Ed .Pp To print \*(Pi to five decimal places: .Bd -literal -offset indent #include #include fprintf(stdout, "pi = %.5f\en", 4 * atan(1.0)); .Ed .Pp To allocate a 128 byte string and print into it: .Bd -literal -offset indent #include #include #include char *newfmt(const char *fmt, ...) { char *p; va_list ap; if ((p = malloc(128)) == NULL) return (NULL); va_start(ap, fmt); (void) vsnprintf(p, 128, fmt, ap); va_end(ap); return (p); } .Ed .Sh COMPATIBILITY The conversion formats .Cm \&%D , \&%O , and .Cm \&%U are not standard and are provided only for backward compatibility. The conversion format .Cm \&%m is also not standard and provides the popular extension from the .Tn GNU C library. .Pp The effect of padding the .Cm %p format with zeros (either by the .Cm 0 flag or by specifying a precision), and the benign effect (i.e., none) of the .Cm # flag on .Cm %n and .Cm %p conversions, as well as other nonsensical combinations such as .Cm %Ld , are not standard; such combinations should be avoided. .Sh ERRORS In addition to the errors documented for the .Xr write 2 system call, the .Fn printf family of functions may fail if: .Bl -tag -width Er .It Bq Er EILSEQ An invalid wide character code was encountered. .It Bq Er ENOMEM Insufficient storage space is available. .It Bq Er EOVERFLOW The .Fa size argument exceeds .Dv INT_MAX + 1 , or the return value would be too large to be represented by an .Vt int . .El .Sh SEE ALSO .Xr printf 1 , .Xr errno 2 , .Xr fmtcheck 3 , .Xr scanf 3 , .Xr setlocale 3 , .Xr strerror 3 , .Xr wprintf 3 .Sh STANDARDS Subject to the caveats noted in the .Sx BUGS section below, the .Fn fprintf , .Fn printf , .Fn sprintf , .Fn vprintf , .Fn vfprintf , and .Fn vsprintf functions conform to .St -ansiC and .St -isoC-99 . With the same reservation, the .Fn snprintf and .Fn vsnprintf functions conform to .St -isoC-99 , while .Fn dprintf and .Fn vdprintf conform to .St -p1003.1-2008 . .Sh HISTORY The functions .Fn asprintf and .Fn vasprintf first appeared in the .Tn GNU C library. These were implemented by .An Peter Wemm Aq Mt peter@FreeBSD.org in .Fx 2.2 , but were later replaced with a different implementation from .Ox 2.3 by .An Todd C. Miller Aq Mt Todd.Miller@courtesan.com . The .Fn dprintf and .Fn vdprintf functions were added in .Fx 8.0 . The .Cm \&%m format extension first appeared in the .Tn GNU C library, and was implemented in .Fx 12.0 . .Sh BUGS The .Nm family of functions do not correctly handle multibyte characters in the .Fa format argument. .Sh SECURITY CONSIDERATIONS The .Fn sprintf and .Fn vsprintf functions are easily misused in a manner which enables malicious users to arbitrarily change a running program's functionality through a buffer overflow attack. Because .Fn sprintf and .Fn vsprintf assume an infinitely long string, callers must be careful not to overflow the actual space; this is often hard to assure. For safety, programmers should use the .Fn snprintf interface instead. For example: .Bd -literal void foo(const char *arbitrary_string, const char *and_another) { char onstack[8]; #ifdef BAD /* * This first sprintf is bad behavior. Do not use sprintf! */ sprintf(onstack, "%s, %s", arbitrary_string, and_another); #else /* * The following two lines demonstrate better use of * snprintf(). */ snprintf(onstack, sizeof(onstack), "%s, %s", arbitrary_string, and_another); #endif } .Ed .Pp The .Fn printf and .Fn sprintf family of functions are also easily misused in a manner allowing malicious users to arbitrarily change a running program's functionality by either causing the program to print potentially sensitive data .Dq "left on the stack" , or causing it to generate a memory fault or bus error by dereferencing an invalid pointer. .Pp .Cm %n can be used to write arbitrary data to potentially carefully-selected addresses. Programmers are therefore strongly advised to never pass untrusted strings as the .Fa format argument, as an attacker can put format specifiers in the string to mangle your stack, leading to a possible security hole. This holds true even if the string was built using a function like .Fn snprintf , as the resulting string may still contain user-supplied conversion specifiers for later interpolation by .Fn printf . .Pp Always use the proper secure idiom: .Pp .Dl "snprintf(buffer, sizeof(buffer), \*q%s\*q, string);" diff --git a/lib/libc/stdio/printflocal.h b/lib/libc/stdio/printflocal.h index 3594f75d0050..f3d0d3e9e216 100644 --- a/lib/libc/stdio/printflocal.h +++ b/lib/libc/stdio/printflocal.h @@ -1,94 +1,95 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1990, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Chris Torek. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * Flags used during conversion. */ #define ALT 0x001 /* alternate form */ #define LADJUST 0x004 /* left adjustment */ #define LONGDBL 0x008 /* long double */ #define LONGINT 0x010 /* long integer */ #define LLONGINT 0x020 /* long long integer */ #define SHORTINT 0x040 /* short integer */ #define ZEROPAD 0x080 /* zero (as opposed to blank) pad */ #define FPT 0x100 /* Floating point number */ #define GROUPING 0x200 /* use grouping ("'" flag) */ /* C99 additional size modifiers: */ #define SIZET 0x400 /* size_t */ #define PTRDIFFT 0x800 /* ptrdiff_t */ #define INTMAXT 0x1000 /* intmax_t */ #define CHARINT 0x2000 /* print char using int format */ +#define FASTINT 0x4000 /* int_fastN_t */ /* * Macros for converting digits to letters and vice versa */ #define to_digit(c) ((c) - '0') #define is_digit(c) ((unsigned)to_digit(c) <= 9) #define to_char(n) ((n) + '0') /* Size of the static argument table. */ #define STATIC_ARG_TBL_SIZE 8 union arg { int intarg; u_int uintarg; long longarg; u_long ulongarg; long long longlongarg; unsigned long long ulonglongarg; ptrdiff_t ptrdiffarg; size_t sizearg; intmax_t intmaxarg; uintmax_t uintmaxarg; void *pvoidarg; char *pchararg; signed char *pschararg; short *pshortarg; int *pintarg; long *plongarg; long long *plonglongarg; ptrdiff_t *pptrdiffarg; ssize_t *pssizearg; intmax_t *pintmaxarg; #ifndef NO_FLOATING_POINT double doublearg; long double longdoublearg; #endif wint_t wintarg; wchar_t *pwchararg; }; /* Handle positional parameters. */ int __find_arguments(const char *, va_list, union arg **); int __find_warguments(const wchar_t *, va_list, union arg **); diff --git a/lib/libc/stdio/scanf.3 b/lib/libc/stdio/scanf.3 index 6cefdb133983..b8d2360c5e76 100644 --- a/lib/libc/stdio/scanf.3 +++ b/lib/libc/stdio/scanf.3 @@ -1,536 +1,564 @@ .\" Copyright (c) 1990, 1991, 1993 .\" The Regents of the University of California. All rights reserved. .\" .\" This code is derived from software contributed to Berkeley by .\" Chris Torek and the American National Standards Committee X3, .\" on Information Processing Systems. .\" .\" Redistribution and use in source and binary forms, with or without .\" modification, are permitted provided that the following conditions .\" are met: .\" 1. Redistributions of source code must retain the above copyright .\" notice, this list of conditions and the following disclaimer. .\" 2. Redistributions in binary form must reproduce the above copyright .\" notice, this list of conditions and the following disclaimer in the .\" documentation and/or other materials provided with the distribution. .\" 3. Neither the name of the University nor the names of its contributors .\" may be used to endorse or promote products derived from this software .\" without specific prior written permission. .\" .\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE .\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" .\" @(#)scanf.3 8.2 (Berkeley) 12/11/93 .\" -.Dd August 21, 2023 +.Dd September 5, 2023 .Dt SCANF 3 .Os .Sh NAME .Nm scanf , .Nm fscanf , .Nm sscanf , .Nm vscanf , .Nm vsscanf , .Nm vfscanf .Nd input format conversion .Sh LIBRARY .Lb libc .Sh SYNOPSIS .In stdio.h .Ft int .Fn scanf "const char * restrict format" ... .Ft int .Fn fscanf "FILE * restrict stream" "const char * restrict format" ... .Ft int .Fn sscanf "const char * restrict str" "const char * restrict format" ... .In stdarg.h .Ft int .Fn vscanf "const char * restrict format" "va_list ap" .Ft int .Fn vsscanf "const char * restrict str" "const char * restrict format" "va_list ap" .Ft int .Fn vfscanf "FILE * restrict stream" "const char * restrict format" "va_list ap" .Sh DESCRIPTION The .Fn scanf family of functions scans input according to a .Fa format as described below. This format may contain .Em conversion specifiers ; the results from such conversions, if any, are stored through the .Em pointer arguments. The .Fn scanf function reads input from the standard input stream .Dv stdin , .Fn fscanf reads input from the stream pointer .Fa stream , and .Fn sscanf reads its input from the character string pointed to by .Fa str . The .Fn vfscanf function is analogous to .Xr vfprintf 3 and reads input from the stream pointer .Fa stream using a variable argument list of pointers (see .Xr stdarg 3 ) . The .Fn vscanf function scans a variable argument list from the standard input and the .Fn vsscanf function scans it from a string; these are analogous to the .Fn vprintf and .Fn vsprintf functions respectively. Each successive .Em pointer argument must correspond properly with each successive conversion specifier (but see the .Cm * conversion below). All conversions are introduced by the .Cm % (percent sign) character. The .Fa format string may also contain other characters. White space (such as blanks, tabs, or newlines) in the .Fa format string match any amount of white space, including none, in the input. Everything else matches only itself. Scanning stops when an input character does not match such a format character. Scanning also stops when an input conversion cannot be made (see below). .Sh CONVERSIONS Following the .Cm % character introducing a conversion there may be a number of .Em flag characters, as follows: .Bl -tag -width ".Cm l No (ell)" .It Cm * Suppresses assignment. The conversion that follows occurs as usual, but no pointer is used; the result of the conversion is simply discarded. .It Cm hh Indicates that the conversion will be one of .Cm bdioux or .Cm n and the next pointer is a pointer to a .Vt char (rather than .Vt int ) . .It Cm h Indicates that the conversion will be one of .Cm bdioux or .Cm n and the next pointer is a pointer to a .Vt "short int" (rather than .Vt int ) . .It Cm l No (ell) Indicates that the conversion will be one of .Cm bdioux or .Cm n and the next pointer is a pointer to a .Vt "long int" (rather than .Vt int ) , that the conversion will be one of .Cm a , e , f , or .Cm g and the next pointer is a pointer to .Vt double (rather than .Vt float ) , or that the conversion will be one of .Cm c , .Cm s or .Cm \&[ and the next pointer is a pointer to an array of .Vt wchar_t (rather than .Vt char ) . .It Cm ll No (ell ell) Indicates that the conversion will be one of .Cm bdioux or .Cm n and the next pointer is a pointer to a .Vt "long long int" (rather than .Vt int ) . .It Cm L Indicates that the conversion will be one of .Cm a , e , f , or .Cm g and the next pointer is a pointer to .Vt "long double" . .It Cm j Indicates that the conversion will be one of .Cm bdioux or .Cm n and the next pointer is a pointer to a .Vt intmax_t (rather than .Vt int ) . .It Cm t Indicates that the conversion will be one of .Cm bdioux or .Cm n and the next pointer is a pointer to a .Vt ptrdiff_t (rather than .Vt int ) . +.It Cm w Ns Ar N +.Po +where +.Ar N +is 8, 16, 32, or 64 +.Pc +Indicates that the conversion will be one of +.Cm bdioux +or +.Cm n +and the next pointer is a pointer to a +.Vt intN_t +(rather than +.Vt int ) . +.It Cm wf Ns Ar N +.Po +where +.Ar N +is 8, 16, 32, or 64 +.Pc +Indicates that the conversion will be one of +.Cm bdioux +or +.Cm n +and the next pointer is a pointer to a +.Vt int_fastN_t +(rather than +.Vt int ) . .It Cm z Indicates that the conversion will be one of .Cm bdioux or .Cm n and the next pointer is a pointer to a .Vt size_t (rather than .Vt int ) . .It Cm q (deprecated.) Indicates that the conversion will be one of .Cm bdioux or .Cm n and the next pointer is a pointer to a .Vt "long long int" (rather than .Vt int ) . .El .Pp In addition to these flags, there may be an optional maximum field width, expressed as a decimal integer, between the .Cm % and the conversion. If no width is given, a default of .Dq infinity is used (with one exception, below); otherwise at most this many bytes are scanned in processing the conversion. In the case of the .Cm lc , .Cm ls and .Cm l[ conversions, the field width specifies the maximum number of multibyte characters that will be scanned. Before conversion begins, most conversions skip white space; this white space is not counted against the field width. .Pp The following conversions are available: .Bl -tag -width XXXX .It Cm % Matches a literal .Ql % . That is, .Dq Li %% in the format string matches a single input .Ql % character. No conversion is done, and assignment does not occur. .It Cm b , B Matches an optionally signed binary integer; the next pointer must be a pointer to .Vt "unsigned int" . .It Cm d Matches an optionally signed decimal integer; the next pointer must be a pointer to .Vt int . .It Cm i Matches an optionally signed integer; the next pointer must be a pointer to .Vt int . The integer is read in base 2 if it begins with .Ql 0b or .Ql 0B , in base 16 if it begins with .Ql 0x or .Ql 0X , in base 8 if it begins with .Ql 0 , and in base 10 otherwise. Only characters that correspond to the base are used. .It Cm o Matches an octal integer; the next pointer must be a pointer to .Vt "unsigned int" . .It Cm u Matches an optionally signed decimal integer; the next pointer must be a pointer to .Vt "unsigned int" . .It Cm x , X Matches an optionally signed hexadecimal integer; the next pointer must be a pointer to .Vt "unsigned int" . .It Cm a , A , e , E , f , F , g , G Matches a floating-point number in the style of .Xr strtod 3 . The next pointer must be a pointer to .Vt float (unless .Cm l or .Cm L is specified.) .It Cm s Matches a sequence of non-white-space characters; the next pointer must be a pointer to .Vt char , and the array must be large enough to accept all the sequence and the terminating .Dv NUL character. The input string stops at white space or at the maximum field width, whichever occurs first. .Pp If an .Cm l qualifier is present, the next pointer must be a pointer to .Vt wchar_t , into which the input will be placed after conversion by .Xr mbrtowc 3 . .It Cm S The same as .Cm ls . .It Cm c Matches a sequence of .Em width count characters (default 1); the next pointer must be a pointer to .Vt char , and there must be enough room for all the characters (no terminating .Dv NUL is added). The usual skip of leading white space is suppressed. To skip white space first, use an explicit space in the format. .Pp If an .Cm l qualifier is present, the next pointer must be a pointer to .Vt wchar_t , into which the input will be placed after conversion by .Xr mbrtowc 3 . .It Cm C The same as .Cm lc . .It Cm \&[ Matches a nonempty sequence of characters from the specified set of accepted characters; the next pointer must be a pointer to .Vt char , and there must be enough room for all the characters in the string, plus a terminating .Dv NUL character. The usual skip of leading white space is suppressed. The string is to be made up of characters in (or not in) a particular set; the set is defined by the characters between the open bracket .Cm \&[ character and a close bracket .Cm \&] character. The set .Em excludes those characters if the first character after the open bracket is a circumflex .Cm ^ . To include a close bracket in the set, make it the first character after the open bracket or the circumflex; any other position will end the set. The hyphen character .Cm - is also special; when placed between two other characters, it adds all intervening characters to the set. To include a hyphen, make it the last character before the final close bracket. For instance, .Ql [^]0-9-] means the set .Dq "everything except close bracket, zero through nine, and hyphen" . The string ends with the appearance of a character not in the (or, with a circumflex, in) set or when the field width runs out. .Pp If an .Cm l qualifier is present, the next pointer must be a pointer to .Vt wchar_t , into which the input will be placed after conversion by .Xr mbrtowc 3 . .It Cm p Matches a pointer value (as printed by .Ql %p in .Xr printf 3 ) ; the next pointer must be a pointer to .Vt void . .It Cm n Nothing is expected; instead, the number of characters consumed thus far from the input is stored through the next pointer, which must be a pointer to .Vt int . This is .Em not a conversion, although it can be suppressed with the .Cm * flag. .El .Pp The decimal point character is defined in the program's locale (category .Dv LC_NUMERIC ) . .Pp For backwards compatibility, a .Dq conversion of .Ql %\e0 causes an immediate return of .Dv EOF . .Sh RETURN VALUES These functions return the number of input items assigned, which can be fewer than provided for, or even zero, in the event of a matching failure. Zero indicates that, while there was input available, no conversions were assigned; typically this is due to an invalid input character, such as an alphabetic character for a .Ql %d conversion. The value .Dv EOF is returned if an input failure occurs before any conversion such as an end-of-file occurs. If an error or end-of-file occurs after conversion has begun, the number of conversions which were successfully completed is returned. .Sh SEE ALSO .Xr getc 3 , .Xr mbrtowc 3 , .Xr printf 3 , .Xr strtod 3 , .Xr strtol 3 , .Xr strtoul 3 , .Xr wscanf 3 .Sh STANDARDS The functions .Fn fscanf , .Fn scanf , .Fn sscanf , .Fn vfscanf , .Fn vscanf and .Fn vsscanf conform to .St -isoC-99 . .Sh HISTORY The functions .Fn scanf , .Fn fscanf , and .Fn sscanf first appeared in .At v7 , and .Fn vscanf , .Fn vsscanf , and .Fn vfscanf in .Bx 4.3 Reno . .Sh BUGS Earlier implementations of .Nm treated .Cm \&%D , \&%E , \&%F , \&%O and .Cm \&%X as their lowercase equivalents with an .Cm l modifier. In addition, .Nm treated an unknown conversion character as .Cm \&%d or .Cm \&%D , depending on its case. This functionality has been removed. .Pp Numerical strings are truncated to 512 characters; for example, .Cm %f and .Cm %d are implicitly .Cm %512f and .Cm %512d . .Pp The .Cm %n$ modifiers for positional arguments are not implemented. .Pp The .Nm family of functions do not correctly handle multibyte characters in the .Fa format argument. diff --git a/lib/libc/stdio/vfprintf.c b/lib/libc/stdio/vfprintf.c index 5e5a9b5e31c1..8ce77626fb6f 100644 --- a/lib/libc/stdio/vfprintf.c +++ b/lib/libc/stdio/vfprintf.c @@ -1,1065 +1,1109 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1990, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Chris Torek. * * Copyright (c) 2011 The FreeBSD Foundation * * Portions of this software were developed by David Chisnall * under sponsorship from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #if defined(LIBC_SCCS) && !defined(lint) static char sccsid[] = "@(#)vfprintf.c 8.1 (Berkeley) 6/4/93"; #endif /* LIBC_SCCS and not lint */ #include /* * Actual printf innards. * * This code is large and complicated... */ #include "namespace.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include "xlocale_private.h" #include "un-namespace.h" #include "libc_private.h" #include "local.h" #include "fvwrite.h" #include "printflocal.h" static int __sprint(FILE *, struct __suio *, locale_t); static int __sbprintf(FILE *, locale_t, const char *, va_list) __printflike(3, 0) __noinline; static char *__wcsconv(wchar_t *, int); #define CHAR char #include "printfcommon.h" struct grouping_state { char *thousands_sep; /* locale-specific thousands separator */ int thousep_len; /* length of thousands_sep */ const char *grouping; /* locale-specific numeric grouping rules */ int lead; /* sig figs before decimal or group sep */ int nseps; /* number of group separators with ' */ int nrepeats; /* number of repeats of the last group */ }; /* * Initialize the thousands' grouping state in preparation to print a * number with ndigits digits. This routine returns the total number * of bytes that will be needed. */ static int grouping_init(struct grouping_state *gs, int ndigits, locale_t loc) { struct lconv *locale; locale = localeconv_l(loc); gs->grouping = locale->grouping; gs->thousands_sep = locale->thousands_sep; gs->thousep_len = strlen(gs->thousands_sep); gs->nseps = gs->nrepeats = 0; gs->lead = ndigits; while (*gs->grouping != CHAR_MAX) { if (gs->lead <= *gs->grouping) break; gs->lead -= *gs->grouping; if (*(gs->grouping+1)) { gs->nseps++; gs->grouping++; } else gs->nrepeats++; } return ((gs->nseps + gs->nrepeats) * gs->thousep_len); } /* * Print a number with thousands' separators. */ static int grouping_print(struct grouping_state *gs, struct io_state *iop, const CHAR *cp, const CHAR *ep, locale_t locale) { const CHAR *cp0 = cp; if (io_printandpad(iop, cp, ep, gs->lead, zeroes, locale)) return (-1); cp += gs->lead; while (gs->nseps > 0 || gs->nrepeats > 0) { if (gs->nrepeats > 0) gs->nrepeats--; else { gs->grouping--; gs->nseps--; } if (io_print(iop, gs->thousands_sep, gs->thousep_len, locale)) return (-1); if (io_printandpad(iop, cp, ep, *gs->grouping, zeroes, locale)) return (-1); cp += *gs->grouping; } if (cp > ep) cp = ep; return (cp - cp0); } /* * Flush out all the vectors defined by the given uio, * then reset it so that it can be reused. */ static int __sprint(FILE *fp, struct __suio *uio, locale_t locale) { int err; if (uio->uio_resid == 0) { uio->uio_iovcnt = 0; return (0); } err = __sfvwrite(fp, uio); uio->uio_resid = 0; uio->uio_iovcnt = 0; return (err); } /* * Helper function for `fprintf to unbuffered unix file': creates a * temporary buffer. We only work on write-only files; this avoids * worries about ungetc buffers and so forth. */ static int __sbprintf(FILE *fp, locale_t locale, const char *fmt, va_list ap) { int ret; FILE fake = FAKE_FILE; unsigned char buf[BUFSIZ]; /* XXX This is probably not needed. */ if (prepwrite(fp) != 0) return (EOF); /* copy the important variables */ fake._flags = fp->_flags & ~__SNBF; fake._file = fp->_file; fake._cookie = fp->_cookie; fake._write = fp->_write; fake._orientation = fp->_orientation; fake._mbstate = fp->_mbstate; /* set up the buffer */ fake._bf._base = fake._p = buf; fake._bf._size = fake._w = sizeof(buf); fake._lbfsize = 0; /* not actually used, but Just In Case */ /* do the work, then copy any error status */ ret = __vfprintf(&fake, locale, fmt, ap); if (ret >= 0 && __fflush(&fake)) ret = EOF; if (fake._flags & __SERR) fp->_flags |= __SERR; return (ret); } /* * Convert a wide character string argument for the %ls format to a multibyte * string representation. If not -1, prec specifies the maximum number of * bytes to output, and also means that we can't assume that the wide char. * string ends is null-terminated. */ static char * __wcsconv(wchar_t *wcsarg, int prec) { static const mbstate_t initial; mbstate_t mbs; char buf[MB_LEN_MAX]; wchar_t *p; char *convbuf; size_t clen, nbytes; /* Allocate space for the maximum number of bytes we could output. */ if (prec < 0) { p = wcsarg; mbs = initial; nbytes = wcsrtombs(NULL, (const wchar_t **)&p, 0, &mbs); if (nbytes == (size_t)-1) return (NULL); } else { /* * Optimisation: if the output precision is small enough, * just allocate enough memory for the maximum instead of * scanning the string. */ if (prec < 128) nbytes = prec; else { nbytes = 0; p = wcsarg; mbs = initial; for (;;) { clen = wcrtomb(buf, *p++, &mbs); if (clen == 0 || clen == (size_t)-1 || nbytes + clen > prec) break; nbytes += clen; } } } if ((convbuf = malloc(nbytes + 1)) == NULL) return (NULL); /* Fill the output buffer. */ p = wcsarg; mbs = initial; if ((nbytes = wcsrtombs(convbuf, (const wchar_t **)&p, nbytes, &mbs)) == (size_t)-1) { free(convbuf); return (NULL); } convbuf[nbytes] = '\0'; return (convbuf); } /* * MT-safe version */ int vfprintf_l(FILE * __restrict fp, locale_t locale, const char * __restrict fmt0, va_list ap) { int ret; FIX_LOCALE(locale); FLOCKFILE_CANCELSAFE(fp); /* optimise fprintf(stderr) (and other unbuffered Unix files) */ if ((fp->_flags & (__SNBF|__SWR|__SRW)) == (__SNBF|__SWR) && fp->_file >= 0) ret = __sbprintf(fp, locale, fmt0, ap); else ret = __vfprintf(fp, locale, fmt0, ap); FUNLOCKFILE_CANCELSAFE(); return (ret); } int vfprintf(FILE * __restrict fp, const char * __restrict fmt0, va_list ap) { return vfprintf_l(fp, __get_locale(), fmt0, ap); } /* * The size of the buffer we use as scratch space for integer * conversions, among other things. We need enough space to * write a uintmax_t in octal (plus one byte). */ #if UINTMAX_MAX <= UINT64_MAX #define BUF 32 #else #error "BUF must be large enough to format a uintmax_t" #endif /* * Non-MT-safe version */ int __vfprintf(FILE *fp, locale_t locale, const char *fmt0, va_list ap) { char *fmt; /* format string */ int ch; /* character from fmt */ int n, n2; /* handy integer (short term usage) */ char *cp; /* handy char pointer (short term usage) */ int flags; /* flags as above */ int ret; /* return value accumulator */ int width; /* width from format (%8d), or 0 */ int prec; /* precision from format; <0 for N/A */ int saved_errno; char sign; /* sign prefix (' ', '+', '-', or \0) */ struct grouping_state gs; /* thousands' grouping info */ #ifndef NO_FLOATING_POINT /* * We can decompose the printed representation of floating * point numbers into several parts, some of which may be empty: * * [+|-| ] [0x|0X] MMM . NNN [e|E|p|P] [+|-] ZZ * A B ---C--- D E F * * A: 'sign' holds this value if present; '\0' otherwise * B: ox[1] holds the 'x' or 'X'; '\0' if not hexadecimal * C: cp points to the string MMMNNN. Leading and trailing * zeros are not in the string and must be added. * D: expchar holds this character; '\0' if no exponent, e.g. %f * F: at least two digits for decimal, at least one digit for hex */ char *decimal_point; /* locale specific decimal point */ int decpt_len; /* length of decimal_point */ int signflag; /* true if float is negative */ union { /* floating point arguments %[aAeEfFgG] */ double dbl; long double ldbl; } fparg; int expt; /* integer value of exponent */ char expchar; /* exponent character: [eEpP\0] */ char *dtoaend; /* pointer to end of converted digits */ int expsize; /* character count for expstr */ int ndig; /* actual number of digits returned by dtoa */ char expstr[MAXEXPDIG+2]; /* buffer for exponent string: e+ZZZ */ char *dtoaresult; /* buffer allocated by dtoa */ #endif u_long ulval; /* integer arguments %[diouxX] */ uintmax_t ujval; /* %j, %ll, %q, %t, %z integers */ int base; /* base for [diouxX] conversion */ int dprec; /* a copy of prec if [diouxX], 0 otherwise */ int realsz; /* field size expanded by dprec, sign, etc */ int size; /* size of converted field or string */ int prsize; /* max size of printed field */ const char *xdigs; /* digits for %[xX] conversion */ struct io_state io; /* I/O buffering state */ char buf[BUF]; /* buffer with space for digits of uintmax_t */ char ox[2]; /* space for 0x; ox[1] is either x, X, or \0 */ union arg *argtable; /* args, built due to positional arg */ union arg statargtable [STATIC_ARG_TBL_SIZE]; int nextarg; /* 1-based argument index */ va_list orgap; /* original argument pointer */ char *convbuf; /* wide to multibyte conversion result */ int savserr; static const char xdigs_lower[16] = "0123456789abcdef"; static const char xdigs_upper[16] = "0123456789ABCDEF"; /* BEWARE, these `goto error' on error. */ #define PRINT(ptr, len) { \ if (io_print(&io, (ptr), (len), locale)) \ goto error; \ } #define PAD(howmany, with) { \ if (io_pad(&io, (howmany), (with), locale)) \ goto error; \ } #define PRINTANDPAD(p, ep, len, with) { \ if (io_printandpad(&io, (p), (ep), (len), (with), locale)) \ goto error; \ } #define FLUSH() { \ if (io_flush(&io, locale)) \ goto error; \ } /* * Get the argument indexed by nextarg. If the argument table is * built, use it to get the argument. If its not, get the next * argument (and arguments must be gotten sequentially). */ #define GETARG(type) \ ((argtable != NULL) ? *((type*)(&argtable[nextarg++])) : \ (nextarg++, va_arg(ap, type))) /* * To extend shorts properly, we need both signed and unsigned * argument extraction methods. */ #define SARG() \ (flags&LONGINT ? GETARG(long) : \ flags&SHORTINT ? (long)(short)GETARG(int) : \ flags&CHARINT ? (long)(signed char)GETARG(int) : \ (long)GETARG(int)) #define UARG() \ (flags&LONGINT ? GETARG(u_long) : \ flags&SHORTINT ? (u_long)(u_short)GETARG(int) : \ flags&CHARINT ? (u_long)(u_char)GETARG(int) : \ (u_long)GETARG(u_int)) #define INTMAX_SIZE (INTMAXT|SIZET|PTRDIFFT|LLONGINT) #define SJARG() \ (flags&INTMAXT ? GETARG(intmax_t) : \ flags&SIZET ? (intmax_t)GETARG(ssize_t) : \ flags&PTRDIFFT ? (intmax_t)GETARG(ptrdiff_t) : \ (intmax_t)GETARG(long long)) #define UJARG() \ (flags&INTMAXT ? GETARG(uintmax_t) : \ flags&SIZET ? (uintmax_t)GETARG(size_t) : \ flags&PTRDIFFT ? (uintmax_t)GETARG(ptrdiff_t) : \ (uintmax_t)GETARG(unsigned long long)) /* * Get * arguments, including the form *nn$. Preserve the nextarg * that the argument can be gotten once the type is determined. */ #define GETASTER(val) \ n2 = 0; \ cp = fmt; \ while (is_digit(*cp)) { \ n2 = 10 * n2 + to_digit(*cp); \ cp++; \ } \ if (*cp == '$') { \ int hold = nextarg; \ if (argtable == NULL) { \ argtable = statargtable; \ if (__find_arguments (fmt0, orgap, &argtable)) { \ ret = EOF; \ goto error; \ } \ } \ nextarg = n2; \ val = GETARG (int); \ nextarg = hold; \ fmt = ++cp; \ } else { \ val = GETARG (int); \ } if (__use_xprintf == 0 && getenv("USE_XPRINTF")) __use_xprintf = 1; if (__use_xprintf > 0) return (__xvprintf(fp, fmt0, ap)); /* sorry, fprintf(read_only_file, "") returns EOF, not 0 */ if (prepwrite(fp) != 0) { errno = EBADF; return (EOF); } savserr = fp->_flags & __SERR; fp->_flags &= ~__SERR; saved_errno = errno; convbuf = NULL; fmt = (char *)fmt0; argtable = NULL; nextarg = 1; va_copy(orgap, ap); io_init(&io, fp); ret = 0; #ifndef NO_FLOATING_POINT dtoaresult = NULL; decimal_point = localeconv_l(locale)->decimal_point; /* The overwhelmingly common case is decpt_len == 1. */ decpt_len = (decimal_point[1] == '\0' ? 1 : strlen(decimal_point)); #endif /* * Scan the format for conversions (`%' character). */ for (;;) { for (cp = fmt; (ch = *fmt) != '\0' && ch != '%'; fmt++) /* void */; if ((n = fmt - cp) != 0) { if ((unsigned)ret + n > INT_MAX) { ret = EOF; errno = EOVERFLOW; goto error; } PRINT(cp, n); ret += n; } if (ch == '\0') goto done; fmt++; /* skip over '%' */ flags = 0; dprec = 0; width = 0; prec = -1; gs.grouping = NULL; sign = '\0'; ox[1] = '\0'; rflag: ch = *fmt++; reswitch: switch (ch) { case ' ': /*- * ``If the space and + flags both appear, the space * flag will be ignored.'' * -- ANSI X3J11 */ if (!sign) sign = ' '; goto rflag; case '#': flags |= ALT; goto rflag; case '*': /*- * ``A negative field width argument is taken as a * - flag followed by a positive field width.'' * -- ANSI X3J11 * They don't exclude field widths read from args. */ GETASTER (width); if (width >= 0) goto rflag; width = -width; /* FALLTHROUGH */ case '-': flags |= LADJUST; goto rflag; case '+': sign = '+'; goto rflag; case '\'': flags |= GROUPING; goto rflag; case '.': if ((ch = *fmt++) == '*') { GETASTER (prec); goto rflag; } prec = 0; while (is_digit(ch)) { prec = 10 * prec + to_digit(ch); ch = *fmt++; } goto reswitch; case '0': /*- * ``Note that 0 is taken as a flag, not as the * beginning of a field width.'' * -- ANSI X3J11 */ flags |= ZEROPAD; goto rflag; case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': n = 0; do { n = 10 * n + to_digit(ch); ch = *fmt++; } while (is_digit(ch)); if (ch == '$') { nextarg = n; if (argtable == NULL) { argtable = statargtable; if (__find_arguments (fmt0, orgap, &argtable)) { ret = EOF; goto error; } } goto rflag; } width = n; goto reswitch; #ifndef NO_FLOATING_POINT case 'L': flags |= LONGDBL; goto rflag; #endif case 'h': if (flags & SHORTINT) { flags &= ~SHORTINT; flags |= CHARINT; } else flags |= SHORTINT; goto rflag; case 'j': flags |= INTMAXT; goto rflag; case 'l': if (flags & LONGINT) { flags &= ~LONGINT; flags |= LLONGINT; } else flags |= LONGINT; goto rflag; case 'q': flags |= LLONGINT; /* not necessarily */ goto rflag; case 't': flags |= PTRDIFFT; goto rflag; + case 'w': + /* + * Fixed-width integer types. On all platforms we + * support, int8_t is equivalent to char, int16_t + * is equivalent to short, int32_t is equivalent + * to int, int64_t is equivalent to long long int. + * Furthermore, int_fast8_t, int_fast16_t and + * int_fast32_t are equivalent to int, and + * int_fast64_t is equivalent to long long int. + */ + flags &= ~(CHARINT|SHORTINT|LONGINT|LLONGINT|INTMAXT); + if (fmt[0] == 'f') { + flags |= FASTINT; + fmt++; + } else { + flags &= ~FASTINT; + } + if (fmt[0] == '8') { + if (!(flags & FASTINT)) + flags |= CHARINT; + else + /* no flag set = 32 */ ; + fmt += 1; + } else if (fmt[0] == '1' && fmt[1] == '6') { + if (!(flags & FASTINT)) + flags |= SHORTINT; + else + /* no flag set = 32 */ ; + fmt += 2; + } else if (fmt[0] == '3' && fmt[1] == '2') { + /* no flag set = 32 */ ; + fmt += 2; + } else if (fmt[0] == '6' && fmt[1] == '4') { + flags |= LLONGINT; + fmt += 2; + } else { + if (flags & FASTINT) { + flags &= ~FASTINT; + fmt--; + } + goto invalid; + } + goto rflag; case 'z': flags |= SIZET; goto rflag; case 'B': case 'b': if (flags & INTMAX_SIZE) ujval = UJARG(); else ulval = UARG(); base = 2; /* leading 0b/B only if non-zero */ if (flags & ALT && (flags & INTMAX_SIZE ? ujval != 0 : ulval != 0)) ox[1] = ch; goto nosign; break; case 'C': flags |= LONGINT; /*FALLTHROUGH*/ case 'c': if (flags & LONGINT) { static const mbstate_t initial; mbstate_t mbs; size_t mbseqlen; mbs = initial; mbseqlen = wcrtomb(cp = buf, (wchar_t)GETARG(wint_t), &mbs); if (mbseqlen == (size_t)-1) { fp->_flags |= __SERR; goto error; } size = (int)mbseqlen; } else { *(cp = buf) = GETARG(int); size = 1; } sign = '\0'; break; case 'D': flags |= LONGINT; /*FALLTHROUGH*/ case 'd': case 'i': if (flags & INTMAX_SIZE) { ujval = SJARG(); if ((intmax_t)ujval < 0) { ujval = -ujval; sign = '-'; } } else { ulval = SARG(); if ((long)ulval < 0) { ulval = -ulval; sign = '-'; } } base = 10; goto number; #ifndef NO_FLOATING_POINT case 'a': case 'A': if (ch == 'a') { ox[1] = 'x'; xdigs = xdigs_lower; expchar = 'p'; } else { ox[1] = 'X'; xdigs = xdigs_upper; expchar = 'P'; } if (prec >= 0) prec++; if (dtoaresult != NULL) freedtoa(dtoaresult); if (flags & LONGDBL) { fparg.ldbl = GETARG(long double); dtoaresult = cp = __hldtoa(fparg.ldbl, xdigs, prec, &expt, &signflag, &dtoaend); } else { fparg.dbl = GETARG(double); dtoaresult = cp = __hdtoa(fparg.dbl, xdigs, prec, &expt, &signflag, &dtoaend); } if (prec < 0) prec = dtoaend - cp; if (expt == INT_MAX) ox[1] = '\0'; goto fp_common; case 'e': case 'E': expchar = ch; if (prec < 0) /* account for digit before decpt */ prec = DEFPREC + 1; else prec++; goto fp_begin; case 'f': case 'F': expchar = '\0'; goto fp_begin; case 'g': case 'G': expchar = ch - ('g' - 'e'); if (prec == 0) prec = 1; fp_begin: if (prec < 0) prec = DEFPREC; if (dtoaresult != NULL) freedtoa(dtoaresult); if (flags & LONGDBL) { fparg.ldbl = GETARG(long double); dtoaresult = cp = __ldtoa(&fparg.ldbl, expchar ? 2 : 3, prec, &expt, &signflag, &dtoaend); } else { fparg.dbl = GETARG(double); dtoaresult = cp = dtoa(fparg.dbl, expchar ? 2 : 3, prec, &expt, &signflag, &dtoaend); if (expt == 9999) expt = INT_MAX; } fp_common: if (signflag) sign = '-'; if (expt == INT_MAX) { /* inf or nan */ if (*cp == 'N') { cp = (ch >= 'a') ? "nan" : "NAN"; sign = '\0'; } else cp = (ch >= 'a') ? "inf" : "INF"; size = 3; flags &= ~ZEROPAD; break; } flags |= FPT; ndig = dtoaend - cp; if (ch == 'g' || ch == 'G') { if (expt > -4 && expt <= prec) { /* Make %[gG] smell like %[fF] */ expchar = '\0'; if (flags & ALT) prec -= expt; else prec = ndig - expt; if (prec < 0) prec = 0; } else { /* * Make %[gG] smell like %[eE], but * trim trailing zeroes if no # flag. */ if (!(flags & ALT)) prec = ndig; } } if (expchar) { expsize = exponent(expstr, expt - 1, expchar); size = expsize + prec; if (prec > 1 || flags & ALT) size += decpt_len; } else { /* space for digits before decimal point */ if (expt > 0) size = expt; else /* "0" */ size = 1; /* space for decimal pt and following digits */ if (prec || flags & ALT) size += prec + decpt_len; if ((flags & GROUPING) && expt > 0) size += grouping_init(&gs, expt, locale); } break; #endif /* !NO_FLOATING_POINT */ case 'm': cp = strerror(saved_errno); size = (prec >= 0) ? strnlen(cp, prec) : strlen(cp); sign = '\0'; break; case 'n': /* * Assignment-like behavior is specified if the * value overflows or is otherwise unrepresentable. * C99 says to use `signed char' for %hhn conversions. */ if (flags & LLONGINT) *GETARG(long long *) = ret; else if (flags & SIZET) *GETARG(ssize_t *) = (ssize_t)ret; else if (flags & PTRDIFFT) *GETARG(ptrdiff_t *) = ret; else if (flags & INTMAXT) *GETARG(intmax_t *) = ret; else if (flags & LONGINT) *GETARG(long *) = ret; else if (flags & SHORTINT) *GETARG(short *) = ret; else if (flags & CHARINT) *GETARG(signed char *) = ret; else *GETARG(int *) = ret; continue; /* no output */ case 'O': flags |= LONGINT; /*FALLTHROUGH*/ case 'o': if (flags & INTMAX_SIZE) ujval = UJARG(); else ulval = UARG(); base = 8; goto nosign; case 'p': /*- * ``The argument shall be a pointer to void. The * value of the pointer is converted to a sequence * of printable characters, in an implementation- * defined manner.'' * -- ANSI X3J11 */ ujval = (uintmax_t)(uintptr_t)GETARG(void *); base = 16; xdigs = xdigs_lower; flags = flags | INTMAXT; ox[1] = 'x'; goto nosign; case 'S': flags |= LONGINT; /*FALLTHROUGH*/ case 's': if (flags & LONGINT) { wchar_t *wcp; if (convbuf != NULL) free(convbuf); if ((wcp = GETARG(wchar_t *)) == NULL) cp = "(null)"; else { convbuf = __wcsconv(wcp, prec); if (convbuf == NULL) { fp->_flags |= __SERR; goto error; } cp = convbuf; } } else if ((cp = GETARG(char *)) == NULL) cp = "(null)"; size = (prec >= 0) ? strnlen(cp, prec) : strlen(cp); sign = '\0'; break; case 'U': flags |= LONGINT; /*FALLTHROUGH*/ case 'u': if (flags & INTMAX_SIZE) ujval = UJARG(); else ulval = UARG(); base = 10; goto nosign; case 'X': xdigs = xdigs_upper; goto hex; case 'x': xdigs = xdigs_lower; hex: if (flags & INTMAX_SIZE) ujval = UJARG(); else ulval = UARG(); base = 16; /* leading 0x/X only if non-zero */ if (flags & ALT && (flags & INTMAX_SIZE ? ujval != 0 : ulval != 0)) ox[1] = ch; flags &= ~GROUPING; /* unsigned conversions */ nosign: sign = '\0'; /*- * ``... diouXx conversions ... if a precision is * specified, the 0 flag will be ignored.'' * -- ANSI X3J11 */ number: if ((dprec = prec) >= 0) flags &= ~ZEROPAD; /*- * ``The result of converting a zero value with an * explicit precision of zero is no characters.'' * -- ANSI X3J11 * * ``The C Standard is clear enough as is. The call * printf("%#.0o", 0) should print 0.'' * -- Defect Report #151 */ cp = buf + BUF; if (flags & INTMAX_SIZE) { if (ujval != 0 || prec != 0 || (flags & ALT && base == 8)) cp = __ujtoa(ujval, cp, base, flags & ALT, xdigs); } else { if (ulval != 0 || prec != 0 || (flags & ALT && base == 8)) cp = __ultoa(ulval, cp, base, flags & ALT, xdigs); } size = buf + BUF - cp; if (size > BUF) /* should never happen */ abort(); if ((flags & GROUPING) && size != 0) size += grouping_init(&gs, size, locale); break; default: /* "%?" prints ?, unless ? is NUL */ if (ch == '\0') goto done; +invalid: /* pretend it was %c with argument ch */ cp = buf; *cp = ch; size = 1; sign = '\0'; break; } /* * All reasonable formats wind up here. At this point, `cp' * points to a string which (if not flags&LADJUST) should be * padded out to `width' places. If flags&ZEROPAD, it should * first be prefixed by any sign or other prefix; otherwise, * it should be blank padded before the prefix is emitted. * After any left-hand padding and prefixing, emit zeroes * required by a decimal [diouxX] precision, then print the * string proper, then emit zeroes required by any leftover * floating precision; finally, if LADJUST, pad with blanks. * * Compute actual size, so we know how much to pad. * size excludes decimal prec; realsz includes it. */ realsz = dprec > size ? dprec : size; if (sign) realsz++; if (ox[1]) realsz += 2; prsize = width > realsz ? width : realsz; if ((unsigned)ret + prsize > INT_MAX) { ret = EOF; errno = EOVERFLOW; goto error; } /* right-adjusting blank padding */ if ((flags & (LADJUST|ZEROPAD)) == 0) PAD(width - realsz, blanks); /* prefix */ if (sign) PRINT(&sign, 1); if (ox[1]) { /* ox[1] is either x, X, or \0 */ ox[0] = '0'; PRINT(ox, 2); } /* right-adjusting zero padding */ if ((flags & (LADJUST|ZEROPAD)) == ZEROPAD) PAD(width - realsz, zeroes); /* the string or number proper */ #ifndef NO_FLOATING_POINT if ((flags & FPT) == 0) { #endif /* leading zeroes from decimal precision */ PAD(dprec - size, zeroes); if (gs.grouping) { if (grouping_print(&gs, &io, cp, buf+BUF, locale) < 0) goto error; } else { PRINT(cp, size); } #ifndef NO_FLOATING_POINT } else { /* glue together f_p fragments */ if (!expchar) { /* %[fF] or sufficiently short %[gG] */ if (expt <= 0) { PRINT(zeroes, 1); if (prec || flags & ALT) PRINT(decimal_point,decpt_len); PAD(-expt, zeroes); /* already handled initial 0's */ prec += expt; } else { if (gs.grouping) { n = grouping_print(&gs, &io, cp, dtoaend, locale); if (n < 0) goto error; cp += n; } else { PRINTANDPAD(cp, dtoaend, expt, zeroes); cp += expt; } if (prec || flags & ALT) PRINT(decimal_point,decpt_len); } PRINTANDPAD(cp, dtoaend, prec, zeroes); } else { /* %[eE] or sufficiently long %[gG] */ if (prec > 1 || flags & ALT) { PRINT(cp++, 1); PRINT(decimal_point, decpt_len); PRINT(cp, ndig-1); PAD(prec - ndig, zeroes); } else /* XeYYY */ PRINT(cp, 1); PRINT(expstr, expsize); } } #endif /* left-adjusting padding (always blank) */ if (flags & LADJUST) PAD(width - realsz, blanks); /* finally, adjust ret */ ret += prsize; FLUSH(); /* copy out the I/O vectors */ } done: FLUSH(); error: va_end(orgap); #ifndef NO_FLOATING_POINT if (dtoaresult != NULL) freedtoa(dtoaresult); #endif if (convbuf != NULL) free(convbuf); if (__sferror(fp)) ret = EOF; else fp->_flags |= savserr; if ((argtable != NULL) && (argtable != statargtable)) free (argtable); return (ret); /* NOTREACHED */ } diff --git a/lib/libc/stdio/vfscanf.c b/lib/libc/stdio/vfscanf.c index 9727c9e70c34..fa2e865f33f9 100644 --- a/lib/libc/stdio/vfscanf.c +++ b/lib/libc/stdio/vfscanf.c @@ -1,1121 +1,1161 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1990, 1993 * The Regents of the University of California. All rights reserved. * * Copyright (c) 2011 The FreeBSD Foundation * * Copyright (c) 2023 Dag-Erling Smørgrav * * Portions of this software were developed by David Chisnall * under sponsorship from the FreeBSD Foundation. * * This code is derived from software contributed to Berkeley by * Chris Torek. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #if defined(LIBC_SCCS) && !defined(lint) static char sccsid[] = "@(#)vfscanf.c 8.1 (Berkeley) 6/4/93"; #endif /* LIBC_SCCS and not lint */ #include #include "namespace.h" #include #include #include #include #include #include #include #include #include #include "un-namespace.h" #include "collate.h" #include "libc_private.h" #include "local.h" #include "xlocale_private.h" #ifndef NO_FLOATING_POINT #include #endif #define BUF 513 /* Maximum length of numeric string. */ /* * Flags used during conversion. */ #define LONG 0x01 /* l: long or double */ #define LONGDBL 0x02 /* L: long double */ #define SHORT 0x04 /* h: short */ #define SUPPRESS 0x08 /* *: suppress assignment */ #define POINTER 0x10 /* p: void * (as hex) */ #define NOSKIP 0x20 /* [ or c: do not skip blanks */ +#define FASTINT 0x200 /* wfN: int_fastN_t */ #define LONGLONG 0x400 /* ll: long long (+ deprecated q: quad) */ #define INTMAXT 0x800 /* j: intmax_t */ #define PTRDIFFT 0x1000 /* t: ptrdiff_t */ #define SIZET 0x2000 /* z: size_t */ #define SHORTSHORT 0x4000 /* hh: char */ #define UNSIGNED 0x8000 /* %[oupxX] conversions */ /* * Conversion types. */ #define CT_CHAR 0 /* %c conversion */ #define CT_CCL 1 /* %[...] conversion */ #define CT_STRING 2 /* %s conversion */ #define CT_INT 3 /* %[dioupxX] conversion */ #define CT_FLOAT 4 /* %[efgEFG] conversion */ static const u_char *__sccl(char *, const u_char *); #ifndef NO_FLOATING_POINT static int parsefloat(FILE *, char *, char *, locale_t); #endif __weak_reference(__vfscanf, vfscanf); /* * Conversion functions are passed a pointer to this object instead of * a real parameter to indicate that the assignment-suppression (*) * flag was specified. We could use a NULL pointer to indicate this, * but that would mask bugs in applications that call scanf() with a * NULL pointer. */ static const int suppress; #define SUPPRESS_PTR ((void *)&suppress) static const mbstate_t initial_mbs; /* * The following conversion functions return the number of characters consumed, * or -1 on input failure. Character class conversion returns 0 on match * failure. */ static __inline int convert_char(FILE *fp, char * p, int width) { int n; if (p == SUPPRESS_PTR) { size_t sum = 0; for (;;) { if ((n = fp->_r) < width) { sum += n; width -= n; fp->_p += n; if (__srefill(fp)) { if (sum == 0) return (-1); break; } } else { sum += width; fp->_r -= width; fp->_p += width; break; } } return (sum); } else { size_t r = __fread(p, 1, width, fp); if (r == 0) return (-1); return (r); } } static __inline int convert_wchar(FILE *fp, wchar_t *wcp, int width, locale_t locale) { mbstate_t mbs; int n, nread; wint_t wi; mbs = initial_mbs; n = 0; while (width-- != 0 && (wi = __fgetwc_mbs(fp, &mbs, &nread, locale)) != WEOF) { if (wcp != SUPPRESS_PTR) *wcp++ = (wchar_t)wi; n += nread; } if (n == 0) return (-1); return (n); } static __inline int convert_ccl(FILE *fp, char * p, int width, const char *ccltab) { char *p0; int n; if (p == SUPPRESS_PTR) { n = 0; while (ccltab[*fp->_p]) { n++, fp->_r--, fp->_p++; if (--width == 0) break; if (fp->_r <= 0 && __srefill(fp)) { if (n == 0) return (-1); break; } } } else { p0 = p; while (ccltab[*fp->_p]) { fp->_r--; *p++ = *fp->_p++; if (--width == 0) break; if (fp->_r <= 0 && __srefill(fp)) { if (p == p0) return (-1); break; } } n = p - p0; if (n == 0) return (0); *p = 0; } return (n); } static __inline int convert_wccl(FILE *fp, wchar_t *wcp, int width, const char *ccltab, locale_t locale) { mbstate_t mbs; wint_t wi; int n, nread; mbs = initial_mbs; n = 0; if (wcp == SUPPRESS_PTR) { while ((wi = __fgetwc_mbs(fp, &mbs, &nread, locale)) != WEOF && width-- != 0 && ccltab[wctob(wi)]) n += nread; if (wi != WEOF) __ungetwc(wi, fp, __get_locale()); } else { while ((wi = __fgetwc_mbs(fp, &mbs, &nread, locale)) != WEOF && width-- != 0 && ccltab[wctob(wi)]) { *wcp++ = (wchar_t)wi; n += nread; } if (wi != WEOF) __ungetwc(wi, fp, __get_locale()); if (n == 0) return (0); *wcp = 0; } return (n); } static __inline int convert_string(FILE *fp, char * p, int width) { char *p0; int n; if (p == SUPPRESS_PTR) { n = 0; while (!isspace(*fp->_p)) { n++, fp->_r--, fp->_p++; if (--width == 0) break; if (fp->_r <= 0 && __srefill(fp)) break; } } else { p0 = p; while (!isspace(*fp->_p)) { fp->_r--; *p++ = *fp->_p++; if (--width == 0) break; if (fp->_r <= 0 && __srefill(fp)) break; } *p = 0; n = p - p0; } return (n); } static __inline int convert_wstring(FILE *fp, wchar_t *wcp, int width, locale_t locale) { mbstate_t mbs; wint_t wi; int n, nread; mbs = initial_mbs; n = 0; if (wcp == SUPPRESS_PTR) { while ((wi = __fgetwc_mbs(fp, &mbs, &nread, locale)) != WEOF && width-- != 0 && !iswspace(wi)) n += nread; if (wi != WEOF) __ungetwc(wi, fp, __get_locale()); } else { while ((wi = __fgetwc_mbs(fp, &mbs, &nread, locale)) != WEOF && width-- != 0 && !iswspace(wi)) { *wcp++ = (wchar_t)wi; n += nread; } if (wi != WEOF) __ungetwc(wi, fp, __get_locale()); *wcp = '\0'; } return (n); } enum parseint_state { begin, havesign, havezero, haveprefix, any, }; static __inline int parseint_fsm(int c, enum parseint_state *state, int *base) { switch (c) { case '+': case '-': if (*state == begin) { *state = havesign; return 1; } break; case '0': if (*state == begin || *state == havesign) { *state = havezero; } else { *state = any; } return 1; case '1': case '2': case '3': case '4': case '5': case '6': case '7': if (*state == havezero && *base == 0) { *base = 8; } /* FALL THROUGH */ case '8': case '9': if (*state == begin || *state == havesign) { if (*base == 0) { *base = 10; } } if (*state == begin || *state == havesign || *state == havezero || *state == haveprefix || *state == any) { if (*base > c - '0') { *state = any; return 1; } } break; case 'b': if (*state == havezero) { if (*base == 0 || *base == 2) { *state = haveprefix; *base = 2; return 1; } } /* FALL THROUGH */ case 'a': case 'c': case 'd': case 'e': case 'f': if (*state == begin || *state == havesign || *state == havezero || *state == haveprefix || *state == any) { if (*base > c - 'a' + 10) { *state = any; return 1; } } break; case 'B': if (*state == havezero) { if (*base == 0 || *base == 2) { *state = haveprefix; *base = 2; return 1; } } /* FALL THROUGH */ case 'A': case 'C': case 'D': case 'E': case 'F': if (*state == begin || *state == havesign || *state == havezero || *state == haveprefix || *state == any) { if (*base > c - 'A' + 10) { *state = any; return 1; } } break; case 'x': case 'X': if (*state == havezero) { if (*base == 0 || *base == 16) { *state = haveprefix; *base = 16; return 1; } } break; } return 0; } /* * Read an integer, storing it in buf. * * Return 0 on a match failure, and the number of characters read * otherwise. */ static __inline int parseint(FILE *fp, char * __restrict buf, int width, int base) { enum parseint_state state = begin; char *p; int c; for (p = buf; width; width--) { c = __sgetc(fp); if (c == EOF) break; if (!parseint_fsm(c, &state, &base)) break; *p++ = c; } /* * If we only had a sign, push it back. If we only had a 0b or 0x * prefix (possibly preceded by a sign), we view it as "0" and * push back the letter. In all other cases, if we stopped * because we read a non-number character, push it back. */ if (state == havesign) { p--; (void) __ungetc(*(u_char *)p, fp); } else if (state == haveprefix) { p--; (void) __ungetc(c, fp); } else if (width && c != EOF) { (void) __ungetc(c, fp); } return (p - buf); } /* * __vfscanf - MT-safe version */ int __vfscanf(FILE *fp, char const *fmt0, va_list ap) { int ret; FLOCKFILE_CANCELSAFE(fp); ret = __svfscanf(fp, __get_locale(), fmt0, ap); FUNLOCKFILE_CANCELSAFE(); return (ret); } int vfscanf_l(FILE *fp, locale_t locale, char const *fmt0, va_list ap) { int ret; FIX_LOCALE(locale); FLOCKFILE_CANCELSAFE(fp); ret = __svfscanf(fp, locale, fmt0, ap); FUNLOCKFILE_CANCELSAFE(); return (ret); } /* * __svfscanf - non-MT-safe version of __vfscanf */ int __svfscanf(FILE *fp, locale_t locale, const char *fmt0, va_list ap) { #define GETARG(type) ((flags & SUPPRESS) ? SUPPRESS_PTR : va_arg(ap, type)) const u_char *fmt = (const u_char *)fmt0; int c; /* character from format, or conversion */ size_t width; /* field width, or 0 */ int flags; /* flags as defined above */ int nassigned; /* number of fields assigned */ int nconversions; /* number of conversions */ int nr; /* characters read by the current conversion */ int nread; /* number of characters consumed from fp */ int base; /* base argument to conversion function */ char ccltab[256]; /* character class table for %[...] */ char buf[BUF]; /* buffer for numeric conversions */ ORIENT(fp, -1); nassigned = 0; nconversions = 0; nread = 0; for (;;) { c = *fmt++; if (c == 0) return (nassigned); if (isspace(c)) { while ((fp->_r > 0 || __srefill(fp) == 0) && isspace(*fp->_p)) nread++, fp->_r--, fp->_p++; continue; } if (c != '%') goto literal; width = 0; flags = 0; /* * switch on the format. continue if done; * break once format type is derived. */ again: c = *fmt++; switch (c) { case '%': literal: if (fp->_r <= 0 && __srefill(fp)) goto input_failure; if (*fp->_p != c) goto match_failure; fp->_r--, fp->_p++; nread++; continue; case '*': flags |= SUPPRESS; goto again; case 'j': flags |= INTMAXT; goto again; case 'l': if (flags & LONG) { flags &= ~LONG; flags |= LONGLONG; } else flags |= LONG; goto again; case 'q': flags |= LONGLONG; /* not quite */ goto again; case 't': flags |= PTRDIFFT; goto again; + case 'w': + /* + * Fixed-width integer types. On all platforms we + * support, int8_t is equivalent to char, int16_t + * is equivalent to short, int32_t is equivalent + * to int, int64_t is equivalent to long long int. + * Furthermore, int_fast8_t, int_fast16_t and + * int_fast32_t are equivalent to int, and + * int_fast64_t is equivalent to long long int. + */ + flags &= ~(SHORTSHORT|SHORT|LONG|LONGLONG|SIZET|INTMAXT|PTRDIFFT); + if (fmt[0] == 'f') { + flags |= FASTINT; + fmt++; + } else { + flags &= ~FASTINT; + } + if (fmt[0] == '8') { + if (!(flags & FASTINT)) + flags |= SHORTSHORT; + else + /* no flag set = 32 */ ; + fmt += 1; + } else if (fmt[0] == '1' && fmt[1] == '6') { + if (!(flags & FASTINT)) + flags |= SHORT; + else + /* no flag set = 32 */ ; + fmt += 2; + } else if (fmt[0] == '3' && fmt[1] == '2') { + /* no flag set = 32 */ ; + fmt += 2; + } else if (fmt[0] == '6' && fmt[1] == '4') { + flags |= LONGLONG; + fmt += 2; + } else { + goto match_failure; + } + goto again; case 'z': flags |= SIZET; goto again; case 'L': flags |= LONGDBL; goto again; case 'h': if (flags & SHORT) { flags &= ~SHORT; flags |= SHORTSHORT; } else flags |= SHORT; goto again; case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': width = width * 10 + c - '0'; goto again; /* * Conversions. */ case 'B': case 'b': c = CT_INT; flags |= UNSIGNED; base = 2; break; case 'd': c = CT_INT; base = 10; break; case 'i': c = CT_INT; base = 0; break; case 'o': c = CT_INT; flags |= UNSIGNED; base = 8; break; case 'u': c = CT_INT; flags |= UNSIGNED; base = 10; break; case 'X': case 'x': c = CT_INT; flags |= UNSIGNED; base = 16; break; #ifndef NO_FLOATING_POINT case 'A': case 'E': case 'F': case 'G': case 'a': case 'e': case 'f': case 'g': c = CT_FLOAT; break; #endif case 'S': flags |= LONG; /* FALLTHROUGH */ case 's': c = CT_STRING; break; case '[': fmt = __sccl(ccltab, fmt); flags |= NOSKIP; c = CT_CCL; break; case 'C': flags |= LONG; /* FALLTHROUGH */ case 'c': flags |= NOSKIP; c = CT_CHAR; break; case 'p': /* pointer format is like hex */ flags |= POINTER; c = CT_INT; /* assumes sizeof(uintmax_t) */ flags |= UNSIGNED; /* >= sizeof(uintptr_t) */ base = 16; break; case 'n': if (flags & SUPPRESS) /* ??? */ continue; if (flags & SHORTSHORT) *va_arg(ap, char *) = nread; else if (flags & SHORT) *va_arg(ap, short *) = nread; else if (flags & LONG) *va_arg(ap, long *) = nread; else if (flags & LONGLONG) *va_arg(ap, long long *) = nread; else if (flags & INTMAXT) *va_arg(ap, intmax_t *) = nread; else if (flags & SIZET) *va_arg(ap, size_t *) = nread; else if (flags & PTRDIFFT) *va_arg(ap, ptrdiff_t *) = nread; else *va_arg(ap, int *) = nread; continue; default: goto match_failure; /* * Disgusting backwards compatibility hack. XXX */ case '\0': /* compat */ return (EOF); } /* * We have a conversion that requires input. */ if (fp->_r <= 0 && __srefill(fp)) goto input_failure; /* * Consume leading white space, except for formats * that suppress this. */ if ((flags & NOSKIP) == 0) { while (isspace(*fp->_p)) { nread++; if (--fp->_r > 0) fp->_p++; else if (__srefill(fp)) goto input_failure; } /* * Note that there is at least one character in * the buffer, so conversions that do not set NOSKIP * ca no longer result in an input failure. */ } /* * Do the conversion. */ switch (c) { case CT_CHAR: /* scan arbitrary characters (sets NOSKIP) */ if (width == 0) width = 1; if (flags & LONG) { nr = convert_wchar(fp, GETARG(wchar_t *), width, locale); } else { nr = convert_char(fp, GETARG(char *), width); } if (nr < 0) goto input_failure; break; case CT_CCL: /* scan a (nonempty) character class (sets NOSKIP) */ if (width == 0) width = (size_t)~0; /* `infinity' */ if (flags & LONG) { nr = convert_wccl(fp, GETARG(wchar_t *), width, ccltab, locale); } else { nr = convert_ccl(fp, GETARG(char *), width, ccltab); } if (nr <= 0) { if (nr < 0) goto input_failure; else /* nr == 0 */ goto match_failure; } break; case CT_STRING: /* like CCL, but zero-length string OK, & no NOSKIP */ if (width == 0) width = (size_t)~0; if (flags & LONG) { nr = convert_wstring(fp, GETARG(wchar_t *), width, locale); } else { nr = convert_string(fp, GETARG(char *), width); } if (nr < 0) goto input_failure; break; case CT_INT: /* scan an integer as if by the conversion function */ #ifdef hardway if (width == 0 || width > sizeof(buf) - 1) width = sizeof(buf) - 1; #else /* size_t is unsigned, hence this optimisation */ if (--width > sizeof(buf) - 2) width = sizeof(buf) - 2; width++; #endif nr = parseint(fp, buf, width, base); if (nr == 0) goto match_failure; if ((flags & SUPPRESS) == 0) { uintmax_t res; buf[nr] = '\0'; if ((flags & UNSIGNED) == 0) res = strtoimax_l(buf, (char **)NULL, base, locale); else res = strtoumax_l(buf, (char **)NULL, base, locale); if (flags & POINTER) *va_arg(ap, void **) = (void *)(uintptr_t)res; else if (flags & SHORTSHORT) *va_arg(ap, char *) = res; else if (flags & SHORT) *va_arg(ap, short *) = res; else if (flags & LONG) *va_arg(ap, long *) = res; else if (flags & LONGLONG) *va_arg(ap, long long *) = res; else if (flags & INTMAXT) *va_arg(ap, intmax_t *) = res; else if (flags & PTRDIFFT) *va_arg(ap, ptrdiff_t *) = res; else if (flags & SIZET) *va_arg(ap, size_t *) = res; else *va_arg(ap, int *) = res; } break; #ifndef NO_FLOATING_POINT case CT_FLOAT: /* scan a floating point number as if by strtod */ if (width == 0 || width > sizeof(buf) - 1) width = sizeof(buf) - 1; nr = parsefloat(fp, buf, buf + width, locale); if (nr == 0) goto match_failure; if ((flags & SUPPRESS) == 0) { if (flags & LONGDBL) { long double res = strtold_l(buf, NULL, locale); *va_arg(ap, long double *) = res; } else if (flags & LONG) { double res = strtod_l(buf, NULL, locale); *va_arg(ap, double *) = res; } else { float res = strtof_l(buf, NULL, locale); *va_arg(ap, float *) = res; } } break; #endif /* !NO_FLOATING_POINT */ } if (!(flags & SUPPRESS)) nassigned++; nread += nr; nconversions++; } input_failure: return (nconversions != 0 ? nassigned : EOF); match_failure: return (nassigned); } /* * Fill in the given table from the scanset at the given format * (just after `['). Return a pointer to the character past the * closing `]'. The table has a 1 wherever characters should be * considered part of the scanset. */ static const u_char * __sccl(char *tab, const u_char *fmt) { int c, n, v, i; struct xlocale_collate *table = (struct xlocale_collate*)__get_locale()->components[XLC_COLLATE]; /* first `clear' the whole table */ c = *fmt++; /* first char hat => negated scanset */ if (c == '^') { v = 1; /* default => accept */ c = *fmt++; /* get new first char */ } else v = 0; /* default => reject */ /* XXX: Will not work if sizeof(tab*) > sizeof(char) */ (void) memset(tab, v, 256); if (c == 0) return (fmt - 1);/* format ended before closing ] */ /* * Now set the entries corresponding to the actual scanset * to the opposite of the above. * * The first character may be ']' (or '-') without being special; * the last character may be '-'. */ v = 1 - v; for (;;) { tab[c] = v; /* take character c */ doswitch: n = *fmt++; /* and examine the next */ switch (n) { case 0: /* format ended too soon */ return (fmt - 1); case '-': /* * A scanset of the form * [01+-] * is defined as `the digit 0, the digit 1, * the character +, the character -', but * the effect of a scanset such as * [a-zA-Z0-9] * is implementation defined. The V7 Unix * scanf treats `a-z' as `the letters a through * z', but treats `a-a' as `the letter a, the * character -, and the letter a'. * * For compatibility, the `-' is not considered * to define a range if the character following * it is either a close bracket (required by ANSI) * or is not numerically greater than the character * we just stored in the table (c). */ n = *fmt; if (n == ']' || (table->__collate_load_error ? n < c : __collate_range_cmp(n, c) < 0 ) ) { c = '-'; break; /* resume the for(;;) */ } fmt++; /* fill in the range */ if (table->__collate_load_error) { do { tab[++c] = v; } while (c < n); } else { for (i = 0; i < 256; i ++) if (__collate_range_cmp(c, i) <= 0 && __collate_range_cmp(i, n) <= 0 ) tab[i] = v; } #if 1 /* XXX another disgusting compatibility hack */ c = n; /* * Alas, the V7 Unix scanf also treats formats * such as [a-c-e] as `the letters a through e'. * This too is permitted by the standard.... */ goto doswitch; #else c = *fmt++; if (c == 0) return (fmt - 1); if (c == ']') return (fmt); #endif break; case ']': /* end of scanset */ return (fmt); default: /* just another character */ c = n; break; } } /* NOTREACHED */ } #ifndef NO_FLOATING_POINT static int parsefloat(FILE *fp, char *buf, char *end, locale_t locale) { char *commit, *p; int infnanpos = 0, decptpos = 0; enum { S_START, S_GOTSIGN, S_INF, S_NAN, S_DONE, S_MAYBEHEX, S_DIGITS, S_DECPT, S_FRAC, S_EXP, S_EXPDIGITS } state = S_START; unsigned char c; const char *decpt = localeconv_l(locale)->decimal_point; _Bool gotmantdig = 0, ishex = 0; /* * We set commit = p whenever the string we have read so far * constitutes a valid representation of a floating point * number by itself. At some point, the parse will complete * or fail, and we will ungetc() back to the last commit point. * To ensure that the file offset gets updated properly, it is * always necessary to read at least one character that doesn't * match; thus, we can't short-circuit "infinity" or "nan(...)". */ commit = buf - 1; for (p = buf; p < end; ) { c = *fp->_p; reswitch: switch (state) { case S_START: state = S_GOTSIGN; if (c == '-' || c == '+') break; else goto reswitch; case S_GOTSIGN: switch (c) { case '0': state = S_MAYBEHEX; commit = p; break; case 'I': case 'i': state = S_INF; break; case 'N': case 'n': state = S_NAN; break; default: state = S_DIGITS; goto reswitch; } break; case S_INF: if (infnanpos > 6 || (c != "nfinity"[infnanpos] && c != "NFINITY"[infnanpos])) goto parsedone; if (infnanpos == 1 || infnanpos == 6) commit = p; /* inf or infinity */ infnanpos++; break; case S_NAN: switch (infnanpos) { case 0: if (c != 'A' && c != 'a') goto parsedone; break; case 1: if (c != 'N' && c != 'n') goto parsedone; else commit = p; break; case 2: if (c != '(') goto parsedone; break; default: if (c == ')') { commit = p; state = S_DONE; } else if (!isalnum(c) && c != '_') goto parsedone; break; } infnanpos++; break; case S_DONE: goto parsedone; case S_MAYBEHEX: state = S_DIGITS; if (c == 'X' || c == 'x') { ishex = 1; break; } else { /* we saw a '0', but no 'x' */ gotmantdig = 1; goto reswitch; } case S_DIGITS: if ((ishex && isxdigit(c)) || isdigit(c)) { gotmantdig = 1; commit = p; break; } else { state = S_DECPT; goto reswitch; } case S_DECPT: if (c == decpt[decptpos]) { if (decpt[++decptpos] == '\0') { /* We read the complete decpt seq. */ state = S_FRAC; if (gotmantdig) commit = p; } break; } else if (!decptpos) { /* We didn't read any decpt characters. */ state = S_FRAC; goto reswitch; } else { /* * We read part of a multibyte decimal point, * but the rest is invalid, so bail. */ goto parsedone; } case S_FRAC: if (((c == 'E' || c == 'e') && !ishex) || ((c == 'P' || c == 'p') && ishex)) { if (!gotmantdig) goto parsedone; else state = S_EXP; } else if ((ishex && isxdigit(c)) || isdigit(c)) { commit = p; gotmantdig = 1; } else goto parsedone; break; case S_EXP: state = S_EXPDIGITS; if (c == '-' || c == '+') break; else goto reswitch; case S_EXPDIGITS: if (isdigit(c)) commit = p; else goto parsedone; break; default: abort(); } *p++ = c; if (--fp->_r > 0) fp->_p++; else if (__srefill(fp)) break; /* EOF */ } parsedone: while (commit < --p) __ungetc(*(u_char *)p, fp); *++commit = '\0'; return (commit - buf); } #endif diff --git a/lib/libc/stdio/vfwprintf.c b/lib/libc/stdio/vfwprintf.c index 259a86467ea7..d298ed03f521 100644 --- a/lib/libc/stdio/vfwprintf.c +++ b/lib/libc/stdio/vfwprintf.c @@ -1,1122 +1,1166 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1990, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Chris Torek. * * Copyright (c) 2011 The FreeBSD Foundation * * Portions of this software were developed by David Chisnall * under sponsorship from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #if 0 #if defined(LIBC_SCCS) && !defined(lint) static char sccsid[] = "@(#)vfprintf.c 8.1 (Berkeley) 6/4/93"; #endif /* LIBC_SCCS and not lint */ #endif #include /* * Actual wprintf innards. * * Avoid making gratuitous changes to this source file; it should be kept * as close as possible to vfprintf.c for ease of maintenance. */ #include "namespace.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include "un-namespace.h" #include "libc_private.h" #include "local.h" #include "fvwrite.h" #include "printflocal.h" #include "xlocale_private.h" static int __sprint(FILE *, struct __suio *, locale_t); static int __sbprintf(FILE *, locale_t, const wchar_t *, va_list) __noinline; static wint_t __xfputwc(wchar_t, FILE *, locale_t); static wchar_t *__mbsconv(char *, int); #define CHAR wchar_t #include "printfcommon.h" struct grouping_state { wchar_t thousands_sep; /* locale-specific thousands separator */ const char *grouping; /* locale-specific numeric grouping rules */ int lead; /* sig figs before decimal or group sep */ int nseps; /* number of group separators with ' */ int nrepeats; /* number of repeats of the last group */ }; static const mbstate_t initial_mbs; static inline wchar_t get_decpt(locale_t locale) { mbstate_t mbs; wchar_t decpt; int nconv; mbs = initial_mbs; nconv = mbrtowc(&decpt, localeconv_l(locale)->decimal_point, MB_CUR_MAX, &mbs); if (nconv == (size_t)-1 || nconv == (size_t)-2) decpt = '.'; /* failsafe */ return (decpt); } static inline wchar_t get_thousep(locale_t locale) { mbstate_t mbs; wchar_t thousep; int nconv; mbs = initial_mbs; nconv = mbrtowc(&thousep, localeconv_l(locale)->thousands_sep, MB_CUR_MAX, &mbs); if (nconv == (size_t)-1 || nconv == (size_t)-2) thousep = '\0'; /* failsafe */ return (thousep); } /* * Initialize the thousands' grouping state in preparation to print a * number with ndigits digits. This routine returns the total number * of wide characters that will be printed. */ static int grouping_init(struct grouping_state *gs, int ndigits, locale_t locale) { gs->grouping = localeconv_l(locale)->grouping; gs->thousands_sep = get_thousep(locale); gs->nseps = gs->nrepeats = 0; gs->lead = ndigits; while (*gs->grouping != CHAR_MAX) { if (gs->lead <= *gs->grouping) break; gs->lead -= *gs->grouping; if (*(gs->grouping+1)) { gs->nseps++; gs->grouping++; } else gs->nrepeats++; } return (gs->nseps + gs->nrepeats); } /* * Print a number with thousands' separators. */ static int grouping_print(struct grouping_state *gs, struct io_state *iop, const CHAR *cp, const CHAR *ep, locale_t locale) { const CHAR *cp0 = cp; if (io_printandpad(iop, cp, ep, gs->lead, zeroes, locale)) return (-1); cp += gs->lead; while (gs->nseps > 0 || gs->nrepeats > 0) { if (gs->nrepeats > 0) gs->nrepeats--; else { gs->grouping--; gs->nseps--; } if (io_print(iop, &gs->thousands_sep, 1, locale)) return (-1); if (io_printandpad(iop, cp, ep, *gs->grouping, zeroes, locale)) return (-1); cp += *gs->grouping; } if (cp > ep) cp = ep; return (cp - cp0); } /* * Flush out all the vectors defined by the given uio, * then reset it so that it can be reused. * * XXX The fact that we do this a character at a time and convert to a * multibyte character sequence even if the destination is a wide * string eclipses the benefits of buffering. */ static int __sprint(FILE *fp, struct __suio *uio, locale_t locale) { struct __siov *iov; wchar_t *p; int i, len; iov = uio->uio_iov; for (; uio->uio_resid != 0; uio->uio_resid -= len, iov++) { p = (wchar_t *)iov->iov_base; len = iov->iov_len; for (i = 0; i < len; i++) { if (__xfputwc(p[i], fp, locale) == WEOF) return (-1); } } uio->uio_iovcnt = 0; return (0); } /* * Helper function for `fprintf to unbuffered unix file': creates a * temporary buffer. We only work on write-only files; this avoids * worries about ungetc buffers and so forth. */ static int __sbprintf(FILE *fp, locale_t locale, const wchar_t *fmt, va_list ap) { int ret; FILE fake; unsigned char buf[BUFSIZ]; /* XXX This is probably not needed. */ if (prepwrite(fp) != 0) return (EOF); /* copy the important variables */ fake._flags = fp->_flags & ~__SNBF; fake._file = fp->_file; fake._cookie = fp->_cookie; fake._write = fp->_write; fake._orientation = fp->_orientation; fake._mbstate = fp->_mbstate; /* set up the buffer */ fake._bf._base = fake._p = buf; fake._bf._size = fake._w = sizeof(buf); fake._lbfsize = 0; /* not actually used, but Just In Case */ /* do the work, then copy any error status */ ret = __vfwprintf(&fake, locale, fmt, ap); if (ret >= 0 && __fflush(&fake)) ret = WEOF; if (fake._flags & __SERR) fp->_flags |= __SERR; return (ret); } /* * Like __fputwc, but handles fake string (__SSTR) files properly. * File must already be locked. */ static wint_t __xfputwc(wchar_t wc, FILE *fp, locale_t locale) { mbstate_t mbs; char buf[MB_LEN_MAX]; struct __suio uio; struct __siov iov; size_t len; if ((fp->_flags & __SSTR) == 0) return (__fputwc(wc, fp, locale)); mbs = initial_mbs; if ((len = wcrtomb(buf, wc, &mbs)) == (size_t)-1) { fp->_flags |= __SERR; return (WEOF); } uio.uio_iov = &iov; uio.uio_resid = len; uio.uio_iovcnt = 1; iov.iov_base = buf; iov.iov_len = len; return (__sfvwrite(fp, &uio) != EOF ? (wint_t)wc : WEOF); } /* * Convert a multibyte character string argument for the %s format to a wide * string representation. ``prec'' specifies the maximum number of bytes * to output. If ``prec'' is greater than or equal to zero, we can't assume * that the multibyte char. string ends in a null character. */ static wchar_t * __mbsconv(char *mbsarg, int prec) { mbstate_t mbs; wchar_t *convbuf, *wcp; const char *p; size_t insize, nchars, nconv; if (mbsarg == NULL) return (NULL); /* * Supplied argument is a multibyte string; convert it to wide * characters first. */ if (prec >= 0) { /* * String is not guaranteed to be NUL-terminated. Find the * number of characters to print. */ p = mbsarg; insize = nchars = nconv = 0; mbs = initial_mbs; while (nchars != (size_t)prec) { nconv = mbrlen(p, MB_CUR_MAX, &mbs); if (nconv == 0 || nconv == (size_t)-1 || nconv == (size_t)-2) break; p += nconv; nchars++; insize += nconv; } if (nconv == (size_t)-1 || nconv == (size_t)-2) return (NULL); } else { insize = strlen(mbsarg); nconv = 0; } /* * Allocate buffer for the result and perform the conversion, * converting at most `size' bytes of the input multibyte string to * wide characters for printing. */ convbuf = malloc((insize + 1) * sizeof(*convbuf)); if (convbuf == NULL) return (NULL); wcp = convbuf; p = mbsarg; mbs = initial_mbs; while (insize != 0) { nconv = mbrtowc(wcp, p, insize, &mbs); if (nconv == 0 || nconv == (size_t)-1 || nconv == (size_t)-2) break; wcp++; p += nconv; insize -= nconv; } if (nconv == (size_t)-1 || nconv == (size_t)-2) { free(convbuf); return (NULL); } *wcp = L'\0'; return (convbuf); } /* * MT-safe version */ int vfwprintf_l(FILE * __restrict fp, locale_t locale, const wchar_t * __restrict fmt0, va_list ap) { int ret; FIX_LOCALE(locale); FLOCKFILE_CANCELSAFE(fp); /* optimise fprintf(stderr) (and other unbuffered Unix files) */ if ((fp->_flags & (__SNBF|__SWR|__SRW)) == (__SNBF|__SWR) && fp->_file >= 0) ret = __sbprintf(fp, locale, fmt0, ap); else ret = __vfwprintf(fp, locale, fmt0, ap); FUNLOCKFILE_CANCELSAFE(); return (ret); } int vfwprintf(FILE * __restrict fp, const wchar_t * __restrict fmt0, va_list ap) { return vfwprintf_l(fp, __get_locale(), fmt0, ap); } /* * The size of the buffer we use as scratch space for integer * conversions, among other things. We need enough space to * write a uintmax_t in octal (plus one byte). */ #if UINTMAX_MAX <= UINT64_MAX #define BUF 32 #else #error "BUF must be large enough to format a uintmax_t" #endif /* * Non-MT-safe version */ int __vfwprintf(FILE *fp, locale_t locale, const wchar_t *fmt0, va_list ap) { wchar_t *fmt; /* format string */ wchar_t ch; /* character from fmt */ int n, n2; /* handy integer (short term usage) */ wchar_t *cp; /* handy char pointer (short term usage) */ int flags; /* flags as above */ int ret; /* return value accumulator */ int width; /* width from format (%8d), or 0 */ int prec; /* precision from format; <0 for N/A */ wchar_t sign; /* sign prefix (' ', '+', '-', or \0) */ struct grouping_state gs; /* thousands' grouping info */ #ifndef NO_FLOATING_POINT /* * We can decompose the printed representation of floating * point numbers into several parts, some of which may be empty: * * [+|-| ] [0x|0X] MMM . NNN [e|E|p|P] [+|-] ZZ * A B ---C--- D E F * * A: 'sign' holds this value if present; '\0' otherwise * B: ox[1] holds the 'x' or 'X'; '\0' if not hexadecimal * C: cp points to the string MMMNNN. Leading and trailing * zeros are not in the string and must be added. * D: expchar holds this character; '\0' if no exponent, e.g. %f * F: at least two digits for decimal, at least one digit for hex */ wchar_t decimal_point; /* locale specific decimal point */ int signflag; /* true if float is negative */ union { /* floating point arguments %[aAeEfFgG] */ double dbl; long double ldbl; } fparg; int expt; /* integer value of exponent */ char expchar; /* exponent character: [eEpP\0] */ char *dtoaend; /* pointer to end of converted digits */ int expsize; /* character count for expstr */ int ndig; /* actual number of digits returned by dtoa */ wchar_t expstr[MAXEXPDIG+2]; /* buffer for exponent string: e+ZZZ */ char *dtoaresult; /* buffer allocated by dtoa */ #endif u_long ulval; /* integer arguments %[diouxX] */ uintmax_t ujval; /* %j, %ll, %q, %t, %z integers */ int base; /* base for [diouxX] conversion */ int dprec; /* a copy of prec if [diouxX], 0 otherwise */ int realsz; /* field size expanded by dprec, sign, etc */ int size; /* size of converted field or string */ int prsize; /* max size of printed field */ const char *xdigs; /* digits for [xX] conversion */ struct io_state io; /* I/O buffering state */ wchar_t buf[BUF]; /* buffer with space for digits of uintmax_t */ wchar_t ox[2]; /* space for 0x hex-prefix */ union arg *argtable; /* args, built due to positional arg */ union arg statargtable [STATIC_ARG_TBL_SIZE]; int nextarg; /* 1-based argument index */ va_list orgap; /* original argument pointer */ wchar_t *convbuf; /* multibyte to wide conversion result */ int savserr; static const char xdigs_lower[16] = "0123456789abcdef"; static const char xdigs_upper[16] = "0123456789ABCDEF"; /* BEWARE, these `goto error' on error. */ #define PRINT(ptr, len) do { \ if (io_print(&io, (ptr), (len), locale)) \ goto error; \ } while (0) #define PAD(howmany, with) { \ if (io_pad(&io, (howmany), (with), locale)) \ goto error; \ } #define PRINTANDPAD(p, ep, len, with) { \ if (io_printandpad(&io, (p), (ep), (len), (with), locale)) \ goto error; \ } #define FLUSH() { \ if (io_flush(&io, locale)) \ goto error; \ } /* * Get the argument indexed by nextarg. If the argument table is * built, use it to get the argument. If its not, get the next * argument (and arguments must be gotten sequentially). */ #define GETARG(type) \ ((argtable != NULL) ? *((type*)(&argtable[nextarg++])) : \ (nextarg++, va_arg(ap, type))) /* * To extend shorts properly, we need both signed and unsigned * argument extraction methods. */ #define SARG() \ (flags&LONGINT ? GETARG(long) : \ flags&SHORTINT ? (long)(short)GETARG(int) : \ flags&CHARINT ? (long)(signed char)GETARG(int) : \ (long)GETARG(int)) #define UARG() \ (flags&LONGINT ? GETARG(u_long) : \ flags&SHORTINT ? (u_long)(u_short)GETARG(int) : \ flags&CHARINT ? (u_long)(u_char)GETARG(int) : \ (u_long)GETARG(u_int)) #define INTMAX_SIZE (INTMAXT|SIZET|PTRDIFFT|LLONGINT) #define SJARG() \ (flags&INTMAXT ? GETARG(intmax_t) : \ flags&SIZET ? (intmax_t)GETARG(ssize_t) : \ flags&PTRDIFFT ? (intmax_t)GETARG(ptrdiff_t) : \ (intmax_t)GETARG(long long)) #define UJARG() \ (flags&INTMAXT ? GETARG(uintmax_t) : \ flags&SIZET ? (uintmax_t)GETARG(size_t) : \ flags&PTRDIFFT ? (uintmax_t)GETARG(ptrdiff_t) : \ (uintmax_t)GETARG(unsigned long long)) /* * Get * arguments, including the form *nn$. Preserve the nextarg * that the argument can be gotten once the type is determined. */ #define GETASTER(val) \ n2 = 0; \ cp = fmt; \ while (is_digit(*cp)) { \ n2 = 10 * n2 + to_digit(*cp); \ cp++; \ } \ if (*cp == '$') { \ int hold = nextarg; \ if (argtable == NULL) { \ argtable = statargtable; \ if (__find_warguments (fmt0, orgap, &argtable)) { \ ret = EOF; \ goto error; \ } \ } \ nextarg = n2; \ val = GETARG (int); \ nextarg = hold; \ fmt = ++cp; \ } else { \ val = GETARG (int); \ } /* sorry, fwprintf(read_only_file, L"") returns WEOF, not 0 */ if (prepwrite(fp) != 0) { errno = EBADF; return (EOF); } savserr = fp->_flags & __SERR; fp->_flags &= ~__SERR; convbuf = NULL; fmt = (wchar_t *)fmt0; argtable = NULL; nextarg = 1; va_copy(orgap, ap); io_init(&io, fp); ret = 0; #ifndef NO_FLOATING_POINT decimal_point = get_decpt(locale); #endif /* * Scan the format for conversions (`%' character). */ for (;;) { for (cp = fmt; (ch = *fmt) != '\0' && ch != '%'; fmt++) /* void */; if ((n = fmt - cp) != 0) { if ((unsigned)ret + n > INT_MAX) { ret = EOF; errno = EOVERFLOW; goto error; } PRINT(cp, n); ret += n; } if (ch == '\0') goto done; fmt++; /* skip over '%' */ flags = 0; dprec = 0; width = 0; prec = -1; gs.grouping = NULL; sign = '\0'; ox[1] = '\0'; rflag: ch = *fmt++; reswitch: switch (ch) { case ' ': /*- * ``If the space and + flags both appear, the space * flag will be ignored.'' * -- ANSI X3J11 */ if (!sign) sign = ' '; goto rflag; case '#': flags |= ALT; goto rflag; case '*': /*- * ``A negative field width argument is taken as a * - flag followed by a positive field width.'' * -- ANSI X3J11 * They don't exclude field widths read from args. */ GETASTER (width); if (width >= 0) goto rflag; width = -width; /* FALLTHROUGH */ case '-': flags |= LADJUST; goto rflag; case '+': sign = '+'; goto rflag; case '\'': flags |= GROUPING; goto rflag; case '.': if ((ch = *fmt++) == '*') { GETASTER (prec); goto rflag; } prec = 0; while (is_digit(ch)) { prec = 10 * prec + to_digit(ch); ch = *fmt++; } goto reswitch; case '0': /*- * ``Note that 0 is taken as a flag, not as the * beginning of a field width.'' * -- ANSI X3J11 */ flags |= ZEROPAD; goto rflag; case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': n = 0; do { n = 10 * n + to_digit(ch); ch = *fmt++; } while (is_digit(ch)); if (ch == '$') { nextarg = n; if (argtable == NULL) { argtable = statargtable; if (__find_warguments (fmt0, orgap, &argtable)) { ret = EOF; goto error; } } goto rflag; } width = n; goto reswitch; #ifndef NO_FLOATING_POINT case 'L': flags |= LONGDBL; goto rflag; #endif case 'h': if (flags & SHORTINT) { flags &= ~SHORTINT; flags |= CHARINT; } else flags |= SHORTINT; goto rflag; case 'j': flags |= INTMAXT; goto rflag; case 'l': if (flags & LONGINT) { flags &= ~LONGINT; flags |= LLONGINT; } else flags |= LONGINT; goto rflag; case 'q': flags |= LLONGINT; /* not necessarily */ goto rflag; case 't': flags |= PTRDIFFT; goto rflag; + case 'w': + /* + * Fixed-width integer types. On all platforms we + * support, int8_t is equivalent to char, int16_t + * is equivalent to short, int32_t is equivalent + * to int, int64_t is equivalent to long long int. + * Furthermore, int_fast8_t, int_fast16_t and + * int_fast32_t are equivalent to int, and + * int_fast64_t is equivalent to long long int. + */ + flags &= ~(CHARINT|SHORTINT|LONGINT|LLONGINT|INTMAXT); + if (fmt[0] == 'f') { + flags |= FASTINT; + fmt++; + } else { + flags &= ~FASTINT; + } + if (fmt[0] == '8') { + if (!(flags & FASTINT)) + flags |= CHARINT; + else + /* no flag set = 32 */ ; + fmt += 1; + } else if (fmt[0] == '1' && fmt[1] == '6') { + if (!(flags & FASTINT)) + flags |= SHORTINT; + else + /* no flag set = 32 */ ; + fmt += 2; + } else if (fmt[0] == '3' && fmt[1] == '2') { + /* no flag set = 32 */ ; + fmt += 2; + } else if (fmt[0] == '6' && fmt[1] == '4') { + flags |= LLONGINT; + fmt += 2; + } else { + if (flags & FASTINT) { + flags &= ~FASTINT; + fmt--; + } + goto invalid; + } + goto rflag; case 'z': flags |= SIZET; goto rflag; case 'B': case 'b': if (flags & INTMAX_SIZE) ujval = UJARG(); else ulval = UARG(); base = 2; /* leading 0b/B only if non-zero */ if (flags & ALT && (flags & INTMAX_SIZE ? ujval != 0 : ulval != 0)) ox[1] = ch; goto nosign; break; case 'C': flags |= LONGINT; /*FALLTHROUGH*/ case 'c': if (flags & LONGINT) *(cp = buf) = (wchar_t)GETARG(wint_t); else *(cp = buf) = (wchar_t)btowc(GETARG(int)); size = 1; sign = '\0'; break; case 'D': flags |= LONGINT; /*FALLTHROUGH*/ case 'd': case 'i': if (flags & INTMAX_SIZE) { ujval = SJARG(); if ((intmax_t)ujval < 0) { ujval = -ujval; sign = '-'; } } else { ulval = SARG(); if ((long)ulval < 0) { ulval = -ulval; sign = '-'; } } base = 10; goto number; #ifndef NO_FLOATING_POINT case 'a': case 'A': if (ch == 'a') { ox[1] = 'x'; xdigs = xdigs_lower; expchar = 'p'; } else { ox[1] = 'X'; xdigs = xdigs_upper; expchar = 'P'; } if (prec >= 0) prec++; if (flags & LONGDBL) { fparg.ldbl = GETARG(long double); dtoaresult = __hldtoa(fparg.ldbl, xdigs, prec, &expt, &signflag, &dtoaend); } else { fparg.dbl = GETARG(double); dtoaresult = __hdtoa(fparg.dbl, xdigs, prec, &expt, &signflag, &dtoaend); } if (prec < 0) prec = dtoaend - dtoaresult; if (expt == INT_MAX) ox[1] = '\0'; if (convbuf != NULL) free(convbuf); ndig = dtoaend - dtoaresult; cp = convbuf = __mbsconv(dtoaresult, -1); freedtoa(dtoaresult); goto fp_common; case 'e': case 'E': expchar = ch; if (prec < 0) /* account for digit before decpt */ prec = DEFPREC + 1; else prec++; goto fp_begin; case 'f': case 'F': expchar = '\0'; goto fp_begin; case 'g': case 'G': expchar = ch - ('g' - 'e'); if (prec == 0) prec = 1; fp_begin: if (prec < 0) prec = DEFPREC; if (convbuf != NULL) free(convbuf); if (flags & LONGDBL) { fparg.ldbl = GETARG(long double); dtoaresult = __ldtoa(&fparg.ldbl, expchar ? 2 : 3, prec, &expt, &signflag, &dtoaend); } else { fparg.dbl = GETARG(double); dtoaresult = dtoa(fparg.dbl, expchar ? 2 : 3, prec, &expt, &signflag, &dtoaend); if (expt == 9999) expt = INT_MAX; } ndig = dtoaend - dtoaresult; cp = convbuf = __mbsconv(dtoaresult, -1); freedtoa(dtoaresult); fp_common: if (signflag) sign = '-'; if (expt == INT_MAX) { /* inf or nan */ if (*cp == 'N') { cp = (ch >= 'a') ? L"nan" : L"NAN"; sign = '\0'; } else cp = (ch >= 'a') ? L"inf" : L"INF"; size = 3; flags &= ~ZEROPAD; break; } flags |= FPT; if (ch == 'g' || ch == 'G') { if (expt > -4 && expt <= prec) { /* Make %[gG] smell like %[fF] */ expchar = '\0'; if (flags & ALT) prec -= expt; else prec = ndig - expt; if (prec < 0) prec = 0; } else { /* * Make %[gG] smell like %[eE], but * trim trailing zeroes if no # flag. */ if (!(flags & ALT)) prec = ndig; } } if (expchar) { expsize = exponent(expstr, expt - 1, expchar); size = expsize + prec; if (prec > 1 || flags & ALT) ++size; } else { /* space for digits before decimal point */ if (expt > 0) size = expt; else /* "0" */ size = 1; /* space for decimal pt and following digits */ if (prec || flags & ALT) size += prec + 1; if ((flags & GROUPING) && expt > 0) size += grouping_init(&gs, expt, locale); } break; #endif /* !NO_FLOATING_POINT */ case 'n': /* * Assignment-like behavior is specified if the * value overflows or is otherwise unrepresentable. * C99 says to use `signed char' for %hhn conversions. */ if (flags & LLONGINT) *GETARG(long long *) = ret; else if (flags & SIZET) *GETARG(ssize_t *) = (ssize_t)ret; else if (flags & PTRDIFFT) *GETARG(ptrdiff_t *) = ret; else if (flags & INTMAXT) *GETARG(intmax_t *) = ret; else if (flags & LONGINT) *GETARG(long *) = ret; else if (flags & SHORTINT) *GETARG(short *) = ret; else if (flags & CHARINT) *GETARG(signed char *) = ret; else *GETARG(int *) = ret; continue; /* no output */ case 'O': flags |= LONGINT; /*FALLTHROUGH*/ case 'o': if (flags & INTMAX_SIZE) ujval = UJARG(); else ulval = UARG(); base = 8; goto nosign; case 'p': /*- * ``The argument shall be a pointer to void. The * value of the pointer is converted to a sequence * of printable characters, in an implementation- * defined manner.'' * -- ANSI X3J11 */ ujval = (uintmax_t)(uintptr_t)GETARG(void *); base = 16; xdigs = xdigs_lower; flags = flags | INTMAXT; ox[1] = 'x'; goto nosign; case 'S': flags |= LONGINT; /*FALLTHROUGH*/ case 's': if (flags & LONGINT) { if ((cp = GETARG(wchar_t *)) == NULL) cp = L"(null)"; } else { char *mbp; if (convbuf != NULL) free(convbuf); if ((mbp = GETARG(char *)) == NULL) cp = L"(null)"; else { convbuf = __mbsconv(mbp, prec); if (convbuf == NULL) { fp->_flags |= __SERR; goto error; } cp = convbuf; } } size = (prec >= 0) ? wcsnlen(cp, prec) : wcslen(cp); sign = '\0'; break; case 'U': flags |= LONGINT; /*FALLTHROUGH*/ case 'u': if (flags & INTMAX_SIZE) ujval = UJARG(); else ulval = UARG(); base = 10; goto nosign; case 'X': xdigs = xdigs_upper; goto hex; case 'x': xdigs = xdigs_lower; hex: if (flags & INTMAX_SIZE) ujval = UJARG(); else ulval = UARG(); base = 16; /* leading 0x/X only if non-zero */ if (flags & ALT && (flags & INTMAX_SIZE ? ujval != 0 : ulval != 0)) ox[1] = ch; flags &= ~GROUPING; /* unsigned conversions */ nosign: sign = '\0'; /*- * ``... diouXx conversions ... if a precision is * specified, the 0 flag will be ignored.'' * -- ANSI X3J11 */ number: if ((dprec = prec) >= 0) flags &= ~ZEROPAD; /*- * ``The result of converting a zero value with an * explicit precision of zero is no characters.'' * -- ANSI X3J11 * * ``The C Standard is clear enough as is. The call * printf("%#.0o", 0) should print 0.'' * -- Defect Report #151 */ cp = buf + BUF; if (flags & INTMAX_SIZE) { if (ujval != 0 || prec != 0 || (flags & ALT && base == 8)) cp = __ujtoa(ujval, cp, base, flags & ALT, xdigs); } else { if (ulval != 0 || prec != 0 || (flags & ALT && base == 8)) cp = __ultoa(ulval, cp, base, flags & ALT, xdigs); } size = buf + BUF - cp; if (size > BUF) /* should never happen */ abort(); if ((flags & GROUPING) && size != 0) size += grouping_init(&gs, size, locale); break; default: /* "%?" prints ?, unless ? is NUL */ if (ch == '\0') goto done; +invalid: /* pretend it was %c with argument ch */ cp = buf; *cp = ch; size = 1; sign = '\0'; break; } /* * All reasonable formats wind up here. At this point, `cp' * points to a string which (if not flags&LADJUST) should be * padded out to `width' places. If flags&ZEROPAD, it should * first be prefixed by any sign or other prefix; otherwise, * it should be blank padded before the prefix is emitted. * After any left-hand padding and prefixing, emit zeroes * required by a decimal [diouxX] precision, then print the * string proper, then emit zeroes required by any leftover * floating precision; finally, if LADJUST, pad with blanks. * * Compute actual size, so we know how much to pad. * size excludes decimal prec; realsz includes it. */ realsz = dprec > size ? dprec : size; if (sign) realsz++; if (ox[1]) realsz += 2; prsize = width > realsz ? width : realsz; if ((unsigned)ret + prsize > INT_MAX) { ret = EOF; errno = EOVERFLOW; goto error; } /* right-adjusting blank padding */ if ((flags & (LADJUST|ZEROPAD)) == 0) PAD(width - realsz, blanks); /* prefix */ if (sign) PRINT(&sign, 1); if (ox[1]) { /* ox[1] is either x, X, or \0 */ ox[0] = '0'; PRINT(ox, 2); } /* right-adjusting zero padding */ if ((flags & (LADJUST|ZEROPAD)) == ZEROPAD) PAD(width - realsz, zeroes); /* the string or number proper */ #ifndef NO_FLOATING_POINT if ((flags & FPT) == 0) { #endif /* leading zeroes from decimal precision */ PAD(dprec - size, zeroes); if (gs.grouping) { if (grouping_print(&gs, &io, cp, buf+BUF, locale) < 0) goto error; } else { PRINT(cp, size); } #ifndef NO_FLOATING_POINT } else { /* glue together f_p fragments */ if (!expchar) { /* %[fF] or sufficiently short %[gG] */ if (expt <= 0) { PRINT(zeroes, 1); if (prec || flags & ALT) PRINT(&decimal_point, 1); PAD(-expt, zeroes); /* already handled initial 0's */ prec += expt; } else { if (gs.grouping) { n = grouping_print(&gs, &io, cp, convbuf + ndig, locale); if (n < 0) goto error; cp += n; } else { PRINTANDPAD(cp, convbuf + ndig, expt, zeroes); cp += expt; } if (prec || flags & ALT) PRINT(&decimal_point, 1); } PRINTANDPAD(cp, convbuf + ndig, prec, zeroes); } else { /* %[eE] or sufficiently long %[gG] */ if (prec > 1 || flags & ALT) { buf[0] = *cp++; buf[1] = decimal_point; PRINT(buf, 2); PRINT(cp, ndig-1); PAD(prec - ndig, zeroes); } else /* XeYYY */ PRINT(cp, 1); PRINT(expstr, expsize); } } #endif /* left-adjusting padding (always blank) */ if (flags & LADJUST) PAD(width - realsz, blanks); /* finally, adjust ret */ ret += prsize; FLUSH(); /* copy out the I/O vectors */ } done: FLUSH(); error: va_end(orgap); if (convbuf != NULL) free(convbuf); if (__sferror(fp)) ret = EOF; else fp->_flags |= savserr; if ((argtable != NULL) && (argtable != statargtable)) free (argtable); return (ret); /* NOTREACHED */ } diff --git a/lib/libc/stdio/vfwscanf.c b/lib/libc/stdio/vfwscanf.c index b03c9dba0699..e2c730b5e7a9 100644 --- a/lib/libc/stdio/vfwscanf.c +++ b/lib/libc/stdio/vfwscanf.c @@ -1,977 +1,1017 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1990, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Chris Torek. * * Copyright (c) 2011 The FreeBSD Foundation * * Copyright (c) 2023 Dag-Erling Smørgrav * * Portions of this software were developed by David Chisnall * under sponsorship from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #if 0 #if defined(LIBC_SCCS) && !defined(lint) static char sccsid[] = "@(#)vfscanf.c 8.1 (Berkeley) 6/4/93"; #endif /* LIBC_SCCS and not lint */ #endif #include #include "namespace.h" #include #include #include #include #include #include #include #include #include #include #include "un-namespace.h" #include "libc_private.h" #include "local.h" #include "xlocale_private.h" #define BUF 513 /* Maximum length of numeric string. */ /* * Flags used during conversion. */ #define LONG 0x01 /* l: long or double */ #define LONGDBL 0x02 /* L: long double */ #define SHORT 0x04 /* h: short */ #define SUPPRESS 0x08 /* *: suppress assignment */ #define POINTER 0x10 /* p: void * (as hex) */ #define NOSKIP 0x20 /* [ or c: do not skip blanks */ +#define FASTINT 0x200 /* wfN: int_fastN_t */ #define LONGLONG 0x400 /* ll: long long (+ deprecated q: quad) */ #define INTMAXT 0x800 /* j: intmax_t */ #define PTRDIFFT 0x1000 /* t: ptrdiff_t */ #define SIZET 0x2000 /* z: size_t */ #define SHORTSHORT 0x4000 /* hh: char */ #define UNSIGNED 0x8000 /* %[oupxX] conversions */ /* * Conversion types. */ #define CT_CHAR 0 /* %c conversion */ #define CT_CCL 1 /* %[...] conversion */ #define CT_STRING 2 /* %s conversion */ #define CT_INT 3 /* %[dioupxX] conversion */ #define CT_FLOAT 4 /* %[efgEFG] conversion */ #ifndef NO_FLOATING_POINT static int parsefloat(FILE *, wchar_t *, wchar_t *, locale_t); #endif struct ccl { const wchar_t *start; /* character class start */ const wchar_t *end; /* character class end */ int compl; /* ccl is complemented? */ }; static __inline int inccl(const struct ccl *ccl, wint_t wi) { if (ccl->compl) { return (wmemchr(ccl->start, wi, ccl->end - ccl->start) == NULL); } else { return (wmemchr(ccl->start, wi, ccl->end - ccl->start) != NULL); } } /* * Conversion functions are passed a pointer to this object instead of * a real parameter to indicate that the assignment-suppression (*) * flag was specified. We could use a NULL pointer to indicate this, * but that would mask bugs in applications that call scanf() with a * NULL pointer. */ static const int suppress; #define SUPPRESS_PTR ((void *)&suppress) static const mbstate_t initial_mbs; /* * The following conversion functions return the number of characters consumed, * or -1 on input failure. Character class conversion returns 0 on match * failure. */ static __inline int convert_char(FILE *fp, char * mbp, int width, locale_t locale) { mbstate_t mbs; size_t nconv; wint_t wi; int n; n = 0; mbs = initial_mbs; while (width-- != 0 && (wi = __fgetwc(fp, locale)) != WEOF) { if (mbp != SUPPRESS_PTR) { nconv = wcrtomb(mbp, wi, &mbs); if (nconv == (size_t)-1) return (-1); mbp += nconv; } n++; } if (n == 0) return (-1); return (n); } static __inline int convert_wchar(FILE *fp, wchar_t *wcp, int width, locale_t locale) { wint_t wi; int n; n = 0; while (width-- != 0 && (wi = __fgetwc(fp, locale)) != WEOF) { if (wcp != SUPPRESS_PTR) *wcp++ = (wchar_t)wi; n++; } if (n == 0) return (-1); return (n); } static __inline int convert_ccl(FILE *fp, char * mbp, int width, const struct ccl *ccl, locale_t locale) { mbstate_t mbs; size_t nconv; wint_t wi; int n; n = 0; mbs = initial_mbs; while ((wi = __fgetwc(fp, locale)) != WEOF && width-- != 0 && inccl(ccl, wi)) { if (mbp != SUPPRESS_PTR) { nconv = wcrtomb(mbp, wi, &mbs); if (nconv == (size_t)-1) return (-1); mbp += nconv; } n++; } if (wi != WEOF) __ungetwc(wi, fp, locale); if (mbp != SUPPRESS_PTR) *mbp = 0; return (n); } static __inline int convert_wccl(FILE *fp, wchar_t *wcp, int width, const struct ccl *ccl, locale_t locale) { wchar_t *wcp0; wint_t wi; int n; if (wcp == SUPPRESS_PTR) { n = 0; while ((wi = __fgetwc(fp, locale)) != WEOF && width-- != 0 && inccl(ccl, wi)) n++; if (wi != WEOF) __ungetwc(wi, fp, locale); } else { wcp0 = wcp; while ((wi = __fgetwc(fp, locale)) != WEOF && width-- != 0 && inccl(ccl, wi)) *wcp++ = (wchar_t)wi; if (wi != WEOF) __ungetwc(wi, fp, locale); n = wcp - wcp0; if (n == 0) return (0); *wcp = 0; } return (n); } static __inline int convert_string(FILE *fp, char * mbp, int width, locale_t locale) { mbstate_t mbs; size_t nconv; wint_t wi; int nread; mbs = initial_mbs; nread = 0; while ((wi = __fgetwc(fp, locale)) != WEOF && width-- != 0 && !iswspace(wi)) { if (mbp != SUPPRESS_PTR) { nconv = wcrtomb(mbp, wi, &mbs); if (nconv == (size_t)-1) return (-1); mbp += nconv; } nread++; } if (wi != WEOF) __ungetwc(wi, fp, locale); if (mbp != SUPPRESS_PTR) *mbp = 0; return (nread); } static __inline int convert_wstring(FILE *fp, wchar_t *wcp, int width, locale_t locale) { wint_t wi; int nread; nread = 0; if (wcp == SUPPRESS_PTR) { while ((wi = __fgetwc(fp, locale)) != WEOF && width-- != 0 && !iswspace(wi)) nread++; if (wi != WEOF) __ungetwc(wi, fp, locale); } else { while ((wi = __fgetwc(fp, locale)) != WEOF && width-- != 0 && !iswspace(wi)) { *wcp++ = (wchar_t)wi; nread++; } if (wi != WEOF) __ungetwc(wi, fp, locale); *wcp = '\0'; } return (nread); } enum parseint_state { begin, havesign, havezero, haveprefix, any, }; static __inline int parseint_fsm(wchar_t c, enum parseint_state *state, int *base) { switch (c) { case '+': case '-': if (*state == begin) { *state = havesign; return 1; } break; case '0': if (*state == begin || *state == havesign) { *state = havezero; } else { *state = any; } return 1; case '1': case '2': case '3': case '4': case '5': case '6': case '7': if (*state == havezero && *base == 0) { *base = 8; } /* FALL THROUGH */ case '8': case '9': if (*state == begin || *state == havesign) { if (*base == 0) { *base = 10; } } if (*state == begin || *state == havesign || *state == havezero || *state == haveprefix || *state == any) { if (*base > c - '0') { *state = any; return 1; } } break; case 'b': if (*state == havezero) { if (*base == 0 || *base == 2) { *state = haveprefix; *base = 2; return 1; } } /* FALL THROUGH */ case 'a': case 'c': case 'd': case 'e': case 'f': if (*state == begin || *state == havesign || *state == havezero || *state == haveprefix || *state == any) { if (*base > c - 'a' + 10) { *state = any; return 1; } } break; case 'B': if (*state == havezero) { if (*base == 0 || *base == 2) { *state = haveprefix; *base = 2; return 1; } } /* FALL THROUGH */ case 'A': case 'C': case 'D': case 'E': case 'F': if (*state == begin || *state == havesign || *state == havezero || *state == haveprefix || *state == any) { if (*base > c - 'A' + 10) { *state = any; return 1; } } break; case 'x': case 'X': if (*state == havezero) { if (*base == 0 || *base == 16) { *state = haveprefix; *base = 16; return 1; } } break; } return 0; } /* * Read an integer, storing it in buf. * * Return 0 on a match failure, and the number of characters read * otherwise. */ static __inline int parseint(FILE *fp, wchar_t * __restrict buf, int width, int base, locale_t locale) { enum parseint_state state = begin; wchar_t *wcp; int c; for (wcp = buf; width; width--) { c = __fgetwc(fp, locale); if (c == WEOF) break; if (!parseint_fsm(c, &state, &base)) break; *wcp++ = (wchar_t)c; } /* * If we only had a sign, push it back. If we only had a 0b or 0x * prefix (possibly preceded by a sign), we view it as "0" and * push back the letter. In all other cases, if we stopped * because we read a non-number character, push it back. */ if (state == havesign) { wcp--; __ungetwc(*wcp, fp, locale); } else if (state == haveprefix) { wcp--; __ungetwc(c, fp, locale); } else if (width && c != WEOF) { __ungetwc(c, fp, locale); } return (wcp - buf); } /* * MT-safe version. */ int vfwscanf_l(FILE * __restrict fp, locale_t locale, const wchar_t * __restrict fmt, va_list ap) { int ret; FIX_LOCALE(locale); FLOCKFILE_CANCELSAFE(fp); ORIENT(fp, 1); ret = __vfwscanf(fp, locale, fmt, ap); FUNLOCKFILE_CANCELSAFE(); return (ret); } int vfwscanf(FILE * __restrict fp, const wchar_t * __restrict fmt, va_list ap) { return vfwscanf_l(fp, __get_locale(), fmt, ap); } /* * Non-MT-safe version. */ int __vfwscanf(FILE * __restrict fp, locale_t locale, const wchar_t * __restrict fmt, va_list ap) { #define GETARG(type) ((flags & SUPPRESS) ? SUPPRESS_PTR : va_arg(ap, type)) wint_t c; /* character from format, or conversion */ size_t width; /* field width, or 0 */ int flags; /* flags as defined above */ int nassigned; /* number of fields assigned */ int nconversions; /* number of conversions */ int nr; /* characters read by the current conversion */ int nread; /* number of characters consumed from fp */ int base; /* base argument to conversion function */ struct ccl ccl; /* character class info */ wchar_t buf[BUF]; /* buffer for numeric conversions */ wint_t wi; /* handy wint_t */ nassigned = 0; nconversions = 0; nread = 0; ccl.start = ccl.end = NULL; for (;;) { c = *fmt++; if (c == 0) return (nassigned); if (iswspace(c)) { while ((c = __fgetwc(fp, locale)) != WEOF && iswspace_l(c, locale)) nread++; if (c != WEOF) __ungetwc(c, fp, locale); continue; } if (c != '%') goto literal; width = 0; flags = 0; /* * switch on the format. continue if done; * break once format type is derived. */ again: c = *fmt++; switch (c) { case '%': literal: if ((wi = __fgetwc(fp, locale)) == WEOF) goto input_failure; if (wi != c) { __ungetwc(wi, fp, locale); goto match_failure; } nread++; continue; case '*': flags |= SUPPRESS; goto again; case 'j': flags |= INTMAXT; goto again; case 'l': if (flags & LONG) { flags &= ~LONG; flags |= LONGLONG; } else flags |= LONG; goto again; case 'q': flags |= LONGLONG; /* not quite */ goto again; case 't': flags |= PTRDIFFT; goto again; + case 'w': + /* + * Fixed-width integer types. On all platforms we + * support, int8_t is equivalent to char, int16_t + * is equivalent to short, int32_t is equivalent + * to int, int64_t is equivalent to long long int. + * Furthermore, int_fast8_t, int_fast16_t and + * int_fast32_t are equivalent to int, and + * int_fast64_t is equivalent to long long int. + */ + flags &= ~(SHORTSHORT|SHORT|LONG|LONGLONG|SIZET|INTMAXT|PTRDIFFT); + if (fmt[0] == 'f') { + flags |= FASTINT; + fmt++; + } else { + flags &= ~FASTINT; + } + if (fmt[0] == '8') { + if (!(flags & FASTINT)) + flags |= SHORTSHORT; + else + /* no flag set = 32 */ ; + fmt += 1; + } else if (fmt[0] == '1' && fmt[1] == '6') { + if (!(flags & FASTINT)) + flags |= SHORT; + else + /* no flag set = 32 */ ; + fmt += 2; + } else if (fmt[0] == '3' && fmt[1] == '2') { + /* no flag set = 32 */ ; + fmt += 2; + } else if (fmt[0] == '6' && fmt[1] == '4') { + flags |= LONGLONG; + fmt += 2; + } else { + goto match_failure; + } + goto again; case 'z': flags |= SIZET; goto again; case 'L': flags |= LONGDBL; goto again; case 'h': if (flags & SHORT) { flags &= ~SHORT; flags |= SHORTSHORT; } else flags |= SHORT; goto again; case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': width = width * 10 + c - '0'; goto again; /* * Conversions. */ case 'B': case 'b': c = CT_INT; flags |= UNSIGNED; base = 2; break; case 'd': c = CT_INT; base = 10; break; case 'i': c = CT_INT; base = 0; break; case 'o': c = CT_INT; flags |= UNSIGNED; base = 8; break; case 'u': c = CT_INT; flags |= UNSIGNED; base = 10; break; case 'X': case 'x': c = CT_INT; flags |= UNSIGNED; base = 16; break; #ifndef NO_FLOATING_POINT case 'A': case 'E': case 'F': case 'G': case 'a': case 'e': case 'f': case 'g': c = CT_FLOAT; break; #endif case 'S': flags |= LONG; /* FALLTHROUGH */ case 's': c = CT_STRING; break; case '[': ccl.start = fmt; if (*fmt == '^') { ccl.compl = 1; fmt++; } else ccl.compl = 0; if (*fmt == ']') fmt++; while (*fmt != '\0' && *fmt != ']') fmt++; ccl.end = fmt; fmt++; flags |= NOSKIP; c = CT_CCL; break; case 'C': flags |= LONG; /* FALLTHROUGH */ case 'c': flags |= NOSKIP; c = CT_CHAR; break; case 'p': /* pointer format is like hex */ flags |= POINTER; c = CT_INT; /* assumes sizeof(uintmax_t) */ flags |= UNSIGNED; /* >= sizeof(uintptr_t) */ base = 16; break; case 'n': if (flags & SUPPRESS) /* ??? */ continue; if (flags & SHORTSHORT) *va_arg(ap, char *) = nread; else if (flags & SHORT) *va_arg(ap, short *) = nread; else if (flags & LONG) *va_arg(ap, long *) = nread; else if (flags & LONGLONG) *va_arg(ap, long long *) = nread; else if (flags & INTMAXT) *va_arg(ap, intmax_t *) = nread; else if (flags & SIZET) *va_arg(ap, size_t *) = nread; else if (flags & PTRDIFFT) *va_arg(ap, ptrdiff_t *) = nread; else *va_arg(ap, int *) = nread; continue; default: goto match_failure; /* * Disgusting backwards compatibility hack. XXX */ case '\0': /* compat */ return (EOF); } /* * Consume leading white space, except for formats * that suppress this. */ if ((flags & NOSKIP) == 0) { while ((wi = __fgetwc(fp, locale)) != WEOF && iswspace(wi)) nread++; if (wi == WEOF) goto input_failure; __ungetwc(wi, fp, locale); } /* * Do the conversion. */ switch (c) { case CT_CHAR: /* scan arbitrary characters (sets NOSKIP) */ if (width == 0) width = 1; if (flags & LONG) { nr = convert_wchar(fp, GETARG(wchar_t *), width, locale); } else { nr = convert_char(fp, GETARG(char *), width, locale); } if (nr < 0) goto input_failure; break; case CT_CCL: /* scan a (nonempty) character class (sets NOSKIP) */ if (width == 0) width = (size_t)~0; /* `infinity' */ /* take only those things in the class */ if (flags & LONG) { nr = convert_wccl(fp, GETARG(wchar_t *), width, &ccl, locale); } else { nr = convert_ccl(fp, GETARG(char *), width, &ccl, locale); } if (nr <= 0) { if (nr < 0) goto input_failure; else /* nr == 0 */ goto match_failure; } break; case CT_STRING: /* like CCL, but zero-length string OK, & no NOSKIP */ if (width == 0) width = (size_t)~0; if (flags & LONG) { nr = convert_wstring(fp, GETARG(wchar_t *), width, locale); } else { nr = convert_string(fp, GETARG(char *), width, locale); } if (nr < 0) goto input_failure; break; case CT_INT: /* scan an integer as if by the conversion function */ if (width == 0 || width > sizeof(buf) / sizeof(*buf) - 1) width = sizeof(buf) / sizeof(*buf) - 1; nr = parseint(fp, buf, width, base, locale); if (nr == 0) goto match_failure; if ((flags & SUPPRESS) == 0) { uintmax_t res; buf[nr] = L'\0'; if ((flags & UNSIGNED) == 0) res = wcstoimax(buf, NULL, base); else res = wcstoumax(buf, NULL, base); if (flags & POINTER) *va_arg(ap, void **) = (void *)(uintptr_t)res; else if (flags & SHORTSHORT) *va_arg(ap, char *) = res; else if (flags & SHORT) *va_arg(ap, short *) = res; else if (flags & LONG) *va_arg(ap, long *) = res; else if (flags & LONGLONG) *va_arg(ap, long long *) = res; else if (flags & INTMAXT) *va_arg(ap, intmax_t *) = res; else if (flags & PTRDIFFT) *va_arg(ap, ptrdiff_t *) = res; else if (flags & SIZET) *va_arg(ap, size_t *) = res; else *va_arg(ap, int *) = res; } break; #ifndef NO_FLOATING_POINT case CT_FLOAT: /* scan a floating point number as if by strtod */ if (width == 0 || width > sizeof(buf) / sizeof(*buf) - 1) width = sizeof(buf) / sizeof(*buf) - 1; nr = parsefloat(fp, buf, buf + width, locale); if (nr == 0) goto match_failure; if ((flags & SUPPRESS) == 0) { if (flags & LONGDBL) { long double res = wcstold(buf, NULL); *va_arg(ap, long double *) = res; } else if (flags & LONG) { double res = wcstod(buf, NULL); *va_arg(ap, double *) = res; } else { float res = wcstof(buf, NULL); *va_arg(ap, float *) = res; } } break; #endif /* !NO_FLOATING_POINT */ } if (!(flags & SUPPRESS)) nassigned++; nread += nr; nconversions++; } input_failure: return (nconversions != 0 ? nassigned : EOF); match_failure: return (nassigned); } #ifndef NO_FLOATING_POINT static int parsefloat(FILE *fp, wchar_t *buf, wchar_t *end, locale_t locale) { mbstate_t mbs; size_t nconv; wchar_t *commit, *p; int infnanpos = 0; enum { S_START, S_GOTSIGN, S_INF, S_NAN, S_DONE, S_MAYBEHEX, S_DIGITS, S_FRAC, S_EXP, S_EXPDIGITS } state = S_START; wchar_t c; wchar_t decpt; _Bool gotmantdig = 0, ishex = 0; mbs = initial_mbs; nconv = mbrtowc(&decpt, localeconv()->decimal_point, MB_CUR_MAX, &mbs); if (nconv == (size_t)-1 || nconv == (size_t)-2) decpt = '.'; /* failsafe */ /* * We set commit = p whenever the string we have read so far * constitutes a valid representation of a floating point * number by itself. At some point, the parse will complete * or fail, and we will ungetc() back to the last commit point. * To ensure that the file offset gets updated properly, it is * always necessary to read at least one character that doesn't * match; thus, we can't short-circuit "infinity" or "nan(...)". */ commit = buf - 1; c = WEOF; for (p = buf; p < end; ) { if ((c = __fgetwc(fp, locale)) == WEOF) break; reswitch: switch (state) { case S_START: state = S_GOTSIGN; if (c == '-' || c == '+') break; else goto reswitch; case S_GOTSIGN: switch (c) { case '0': state = S_MAYBEHEX; commit = p; break; case 'I': case 'i': state = S_INF; break; case 'N': case 'n': state = S_NAN; break; default: state = S_DIGITS; goto reswitch; } break; case S_INF: if (infnanpos > 6 || (c != "nfinity"[infnanpos] && c != "NFINITY"[infnanpos])) goto parsedone; if (infnanpos == 1 || infnanpos == 6) commit = p; /* inf or infinity */ infnanpos++; break; case S_NAN: switch (infnanpos) { case 0: if (c != 'A' && c != 'a') goto parsedone; break; case 1: if (c != 'N' && c != 'n') goto parsedone; else commit = p; break; case 2: if (c != '(') goto parsedone; break; default: if (c == ')') { commit = p; state = S_DONE; } else if (!iswalnum(c) && c != '_') goto parsedone; break; } infnanpos++; break; case S_DONE: goto parsedone; case S_MAYBEHEX: state = S_DIGITS; if (c == 'X' || c == 'x') { ishex = 1; break; } else { /* we saw a '0', but no 'x' */ gotmantdig = 1; goto reswitch; } case S_DIGITS: if ((ishex && iswxdigit(c)) || iswdigit(c)) gotmantdig = 1; else { state = S_FRAC; if (c != decpt) goto reswitch; } if (gotmantdig) commit = p; break; case S_FRAC: if (((c == 'E' || c == 'e') && !ishex) || ((c == 'P' || c == 'p') && ishex)) { if (!gotmantdig) goto parsedone; else state = S_EXP; } else if ((ishex && iswxdigit(c)) || iswdigit(c)) { commit = p; gotmantdig = 1; } else goto parsedone; break; case S_EXP: state = S_EXPDIGITS; if (c == '-' || c == '+') break; else goto reswitch; case S_EXPDIGITS: if (iswdigit(c)) commit = p; else goto parsedone; break; default: abort(); } *p++ = c; c = WEOF; } parsedone: if (c != WEOF) __ungetwc(c, fp, locale); while (commit < --p) __ungetwc(*p, fp, locale); *++commit = '\0'; return (commit - buf); } #endif