diff --git a/lib/libc/iconv/_strtol.h b/lib/libc/iconv/_strtol.h --- a/lib/libc/iconv/_strtol.h +++ b/lib/libc/iconv/_strtol.h @@ -91,6 +91,13 @@ s += 2; base = 16; } + if ((base == 0 || base == 2) && + c == '0' && (*s == 'b' || *s == 'B') && + (s[1] >= '0' && s[1] <= '1')) { + c = s[1]; + s += 2; + base = 2; + } if (base == 0) base = (c == '0' ? 8 : 10); diff --git a/lib/libc/iconv/_strtoul.h b/lib/libc/iconv/_strtoul.h --- a/lib/libc/iconv/_strtoul.h +++ b/lib/libc/iconv/_strtoul.h @@ -87,6 +87,13 @@ s += 2; base = 16; } + if ((base == 0 || base == 2) && + c == '0' && (*s == 'b' || *s == 'B') && + (s[1] >= '0' && s[1] <= '1')) { + c = s[1]; + s += 2; + base = 2; + } if (base == 0) base = (c == '0' ? 8 : 10); diff --git a/lib/libc/locale/wcstoimax.c b/lib/libc/locale/wcstoimax.c --- a/lib/libc/locale/wcstoimax.c +++ b/lib/libc/locale/wcstoimax.c @@ -86,6 +86,13 @@ s += 2; base = 16; } + if ((base == 0 || base == 2) && + c == L'0' && (*s == L'b' || *s == L'B') && + (s[1] >= L'0' && s[1] <= L'1')) { + c = s[1]; + s += 2; + base = 2; + } if (base == 0) base = c == L'0' ? 8 : 10; acc = any = 0; diff --git a/lib/libc/locale/wcstol.c b/lib/libc/locale/wcstol.c --- a/lib/libc/locale/wcstol.c +++ b/lib/libc/locale/wcstol.c @@ -80,6 +80,13 @@ s += 2; base = 16; } + if ((base == 0 || base == 2) && + c == L'0' && (*s == L'b' || *s == L'B') && + (s[1] >= L'0' && s[1] <= L'1')) { + c = s[1]; + s += 2; + base = 2; + } if (base == 0) base = c == L'0' ? 8 : 10; acc = any = 0; diff --git a/lib/libc/locale/wcstoll.c b/lib/libc/locale/wcstoll.c --- a/lib/libc/locale/wcstoll.c +++ b/lib/libc/locale/wcstoll.c @@ -86,6 +86,13 @@ s += 2; base = 16; } + if ((base == 0 || base == 2) && + c == L'0' && (*s == L'b' || *s == L'B') && + (s[1] >= L'0' && s[1] <= L'1')) { + c = s[1]; + s += 2; + base = 2; + } if (base == 0) base = c == L'0' ? 8 : 10; acc = any = 0; diff --git a/lib/libc/locale/wcstoul.c b/lib/libc/locale/wcstoul.c --- a/lib/libc/locale/wcstoul.c +++ b/lib/libc/locale/wcstoul.c @@ -80,6 +80,13 @@ s += 2; base = 16; } + if ((base == 0 || base == 2) && + c == L'0' && (*s == L'b' || *s == L'B') && + (s[1] >= L'0' && s[1] <= L'1')) { + c = s[1]; + s += 2; + base = 2; + } if (base == 0) base = c == L'0' ? 8 : 10; acc = any = 0; diff --git a/lib/libc/locale/wcstoull.c b/lib/libc/locale/wcstoull.c --- a/lib/libc/locale/wcstoull.c +++ b/lib/libc/locale/wcstoull.c @@ -86,6 +86,13 @@ s += 2; base = 16; } + if ((base == 0 || base == 2) && + c == L'0' && (*s == L'b' || *s == L'B') && + (s[1] >= L'0' && s[1] <= L'1')) { + c = s[1]; + s += 2; + base = 2; + } if (base == 0) base = c == L'0' ? 8 : 10; acc = any = 0; diff --git a/lib/libc/locale/wcstoumax.c b/lib/libc/locale/wcstoumax.c --- a/lib/libc/locale/wcstoumax.c +++ b/lib/libc/locale/wcstoumax.c @@ -86,6 +86,13 @@ s += 2; base = 16; } + if ((base == 0 || base == 2) && + c == L'0' && (*s == L'b' || *s == L'B') && + (s[1] >= L'0' && s[1] <= L'1')) { + c = s[1]; + s += 2; + base = 2; + } if (base == 0) base = c == L'0' ? 8 : 10; acc = any = 0; diff --git a/lib/libc/stdio/printfcommon.h b/lib/libc/stdio/printfcommon.h --- a/lib/libc/stdio/printfcommon.h +++ b/lib/libc/stdio/printfcommon.h @@ -194,6 +194,13 @@ } while (sval != 0); break; + case 2: + do { + *--cp = to_char(val & 1); + val >>= 1; + } while (val); + break; + case 8: do { *--cp = to_char(val & 7); @@ -244,6 +251,13 @@ } while (sval != 0); break; + case 2: + do { + *--cp = to_char(val & 1); + val >>= 1; + } while (val); + break; + case 8: do { *--cp = to_char(val & 7); diff --git a/lib/libc/stdio/vfprintf.c b/lib/libc/stdio/vfprintf.c --- a/lib/libc/stdio/vfprintf.c +++ b/lib/libc/stdio/vfprintf.c @@ -613,6 +613,19 @@ case 'z': flags |= SIZET; goto rflag; + case 'B': + case 'b': + if (flags & INTMAX_SIZE) + ujval = UJARG(); + else + ulval = UARG(); + base = 2; + /* leading 0b/B only if non-zero */ + if (flags & ALT && + (flags & INTMAX_SIZE ? ujval != 0 : ulval != 0)) + ox[1] = ch; + goto nosign; + break; case 'C': flags |= LONGINT; /*FALLTHROUGH*/ diff --git a/lib/libc/stdio/vfscanf.c b/lib/libc/stdio/vfscanf.c --- a/lib/libc/stdio/vfscanf.c +++ b/lib/libc/stdio/vfscanf.c @@ -80,16 +80,6 @@ #define SHORTSHORT 0x4000 /* hh: char */ #define UNSIGNED 0x8000 /* %[oupxX] conversions */ -/* - * The following are used in integral conversions only: - * SIGNOK, NDIGITS, PFXOK, and NZDIGITS - */ -#define SIGNOK 0x40 /* +/- is (still) legal */ -#define NDIGITS 0x80 /* no digits detected */ -#define PFXOK 0x100 /* 0x prefix is (still) legal */ -#define NZDIGITS 0x200 /* no zero digits detected */ -#define HAVESIGN 0x10000 /* sign detected */ - /* * Conversion types. */ @@ -307,110 +297,142 @@ return (n); } +enum parseint_state { + begin, + havesign, + havezero, + haveprefix, + any, +}; + +static __inline int +parseint_fsm(int c, enum parseint_state *state, int *base) +{ + switch (c) { + case '+': + case '-': + if (*state == begin) { + *state = havesign; + return 1; + } + break; + case '0': + if (*state == begin || *state == havesign) { + *state = havezero; + } else { + *state = any; + } + return 1; + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + if (*state == havezero && *base == 0) { + *base = 8; + } + /* FALL THROUGH */ + case '8': + case '9': + if (*state == begin || + *state == havesign) { + if (*base == 0) { + *base = 10; + } + *state = any; + } + if (*state == havezero || + *state == haveprefix || + *state == any) { + if (*base > c - '0') { + *state = any; + return 1; + } + } + break; + case 'b': + if (*state == havezero) { + if (*base == 0 || *base == 2) { + *state = haveprefix; + *base = 2; + return 1; + } + } + /* FALL THROUGH */ + case 'a': + case 'c': + case 'd': + case 'e': + case 'f': + if (*state == begin || + *state == havesign || + *state == havezero || + *state == haveprefix || + *state == any) { + if (*base > c - 'a' + 10) { + *state = any; + return 1; + } + } + break; + case 'B': + if (*state == havezero) { + if (*base == 0 || *base == 2) { + *state = haveprefix; + *base = 2; + return 1; + } + } + /* FALL THROUGH */ + case 'A': + case 'C': + case 'D': + case 'E': + case 'F': + if (*state == begin || + *state == havesign || + *state == havezero || + *state == haveprefix || + *state == any) { + if (*base > c - 'A' + 10) { + *state = any; + return 1; + } + } + break; + case 'x': + case 'X': + if (*state == havezero) { + if (*base == 0 || *base == 16) { + *state = haveprefix; + *base = 16; + return 1; + } + } + break; + } + return 0; +} + /* - * Read an integer, storing it in buf. The only relevant bit in the - * flags argument is PFXOK. + * Read an integer, storing it in buf. * * Return 0 on a match failure, and the number of characters read * otherwise. */ static __inline int -parseint(FILE *fp, char * __restrict buf, int width, int base, int flags) +parseint(FILE *fp, char * __restrict buf, int width, int base) { - /* `basefix' is used to avoid `if' tests */ - static const short basefix[17] = - { 10, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 }; + enum parseint_state state = begin; char *p; int c; - flags |= SIGNOK | NDIGITS | NZDIGITS; for (p = buf; width; width--) { c = *fp->_p; - /* - * Switch on the character; `goto ok' if we accept it - * as a part of number. - */ - switch (c) { - - /* - * The digit 0 is always legal, but is special. For - * %i conversions, if no digits (zero or nonzero) have - * been scanned (only signs), we will have base==0. - * In that case, we should set it to 8 and enable 0x - * prefixing. Also, if we have not scanned zero - * digits before this, do not turn off prefixing - * (someone else will turn it off if we have scanned - * any nonzero digits). - */ - case '0': - if (base == 0) { - base = 8; - flags |= PFXOK; - } - if (flags & NZDIGITS) - flags &= ~(SIGNOK|NZDIGITS|NDIGITS); - else - flags &= ~(SIGNOK|PFXOK|NDIGITS); - goto ok; - - /* 1 through 7 always legal */ - case '1': case '2': case '3': - case '4': case '5': case '6': case '7': - base = basefix[base]; - flags &= ~(SIGNOK | PFXOK | NDIGITS); - goto ok; - - /* digits 8 and 9 ok iff decimal or hex */ - case '8': case '9': - base = basefix[base]; - if (base <= 8) - break; /* not legal here */ - flags &= ~(SIGNOK | PFXOK | NDIGITS); - goto ok; - - /* letters ok iff hex */ - case 'A': case 'B': case 'C': - case 'D': case 'E': case 'F': - case 'a': case 'b': case 'c': - case 'd': case 'e': case 'f': - /* no need to fix base here */ - if (base <= 10) - break; /* not legal here */ - flags &= ~(SIGNOK | PFXOK | NDIGITS); - goto ok; - - /* sign ok only as first character */ - case '+': case '-': - if (flags & SIGNOK) { - flags &= ~SIGNOK; - flags |= HAVESIGN; - goto ok; - } - break; - - /* - * x ok iff flag still set & 2nd char (or 3rd char if - * we have a sign). - */ - case 'x': case 'X': - if (flags & PFXOK && p == - buf + 1 + !!(flags & HAVESIGN)) { - base = 16; /* if %i */ - flags &= ~PFXOK; - goto ok; - } + if (!parseint_fsm(c, &state, &base)) break; - } - - /* - * If we got here, c is not a legal character for a - * number. Stop accumulating digits. - */ - break; - ok: - /* - * c is legal: store it and look at the next. - */ *p++ = c; if (--fp->_r > 0) fp->_p++; @@ -418,19 +440,12 @@ break; /* EOF */ } /* - * If we had only a sign, it is no good; push back the sign. - * If the number ends in `x', it was [sign] '0' 'x', so push - * back the x and treat it as [sign] '0'. + * If we only had a sign, push it back. If we only had a 0b or 0x + * prefix (possibly preceded by a sign), we view it as "0" and + * push back the letter. */ - if (flags & NDIGITS) { - if (p > buf) - (void) __ungetc(*(u_char *)--p, fp); - return (0); - } - c = ((u_char *)p)[-1]; - if (c == 'x' || c == 'X') { - --p; - (void) __ungetc(c, fp); + if (state == havesign || state == haveprefix) { + (void) __ungetc(*(u_char *)--p, fp); } return (p - buf); } @@ -554,6 +569,13 @@ /* * Conversions. */ + case 'B': + case 'b': + c = CT_INT; + flags |= UNSIGNED; + base = 2; + break; + case 'd': c = CT_INT; base = 10; @@ -578,7 +600,6 @@ case 'X': case 'x': - flags |= PFXOK; /* enable 0x prefixing */ c = CT_INT; flags |= UNSIGNED; base = 16; @@ -613,7 +634,7 @@ break; case 'p': /* pointer format is like hex */ - flags |= POINTER | PFXOK; + flags |= POINTER; c = CT_INT; /* assumes sizeof(uintmax_t) */ flags |= UNSIGNED; /* >= sizeof(uintptr_t) */ base = 16; @@ -738,7 +759,7 @@ width = sizeof(buf) - 2; width++; #endif - nr = parseint(fp, buf, width, base, flags); + nr = parseint(fp, buf, width, base); if (nr == 0) goto match_failure; if ((flags & SUPPRESS) == 0) { diff --git a/lib/libc/stdio/vfwprintf.c b/lib/libc/stdio/vfwprintf.c --- a/lib/libc/stdio/vfwprintf.c +++ b/lib/libc/stdio/vfwprintf.c @@ -684,6 +684,19 @@ case 'z': flags |= SIZET; goto rflag; + case 'B': + case 'b': + if (flags & INTMAX_SIZE) + ujval = UJARG(); + else + ulval = UARG(); + base = 2; + /* leading 0b/B only if non-zero */ + if (flags & ALT && + (flags & INTMAX_SIZE ? ujval != 0 : ulval != 0)) + ox[1] = ch; + goto nosign; + break; case 'C': flags |= LONGINT; /*FALLTHROUGH*/ diff --git a/lib/libc/stdio/vfwscanf.c b/lib/libc/stdio/vfwscanf.c --- a/lib/libc/stdio/vfwscanf.c +++ b/lib/libc/stdio/vfwscanf.c @@ -336,13 +336,21 @@ flags &= ~(SIGNOK|PFXOK|NDIGITS); goto ok; - /* 1 through 7 always legal */ - case '1': case '2': case '3': - case '4': case '5': case '6': case '7': + /* 1 always legal */ + case '1': base = basefix[base]; flags &= ~(SIGNOK | PFXOK | NDIGITS); goto ok; + /* 2 through 7 ok iff octal, decimal, or hex */ + case '2': case '3': case '4': + case '5': case '6': case '7': + base = basefix[base]; + if (base <= 2) + break; /* not legal here */ + flags &= ~(SIGNOK | PFXOK | NDIGITS); + goto ok; + /* digits 8 and 9 ok iff decimal or hex */ case '8': case '9': base = basefix[base]; @@ -351,10 +359,23 @@ flags &= ~(SIGNOK | PFXOK | NDIGITS); goto ok; + /* + * b ok iff flag still set & 2nd char (or 3rd char if + * we have a sign). + */ + case 'b': + if (flags & PFXOK && wcp == + buf + 1 + !!(flags & HAVESIGN)) { + base = 2; /* if %i */ + flags &= ~PFXOK; + goto ok; + } + /* FALLTHROUGH */ + /* letters ok iff hex */ - case 'A': case 'B': case 'C': + case 'A': case 'C': case 'D': case 'E': case 'F': - case 'a': case 'b': case 'c': + case 'a': case 'c': case 'd': case 'e': case 'f': /* no need to fix base here */ if (base <= 10) @@ -401,7 +422,7 @@ /* * If we had only a sign, it is no good; push back the sign. * If the number ends in `x', it was [sign] '0' 'x', so push - * back the x and treat it as [sign] '0'. + * back the x and treat it as [sign] '0'. Same for `b'. */ if (flags & NDIGITS) { if (wcp > buf) @@ -409,7 +430,7 @@ return (0); } c = wcp[-1]; - if (c == 'x' || c == 'X') { + if (c == 'x' || c == 'X' || c == 'b' || c == 'B') { --wcp; __ungetwc(c, fp, locale); } @@ -536,6 +557,14 @@ /* * Conversions. */ + case 'B': + case 'b': + flags |= PFXOK; /* enable 0b prefixing */ + c = CT_INT; + flags |= UNSIGNED; + base = 2; + break; + case 'd': c = CT_INT; base = 10; diff --git a/lib/libc/stdlib/strtoimax.c b/lib/libc/stdlib/strtoimax.c --- a/lib/libc/stdlib/strtoimax.c +++ b/lib/libc/stdlib/strtoimax.c @@ -87,6 +87,13 @@ s += 2; base = 16; } + if ((base == 0 || base == 2) && + c == '0' && (*s == 'b' || *s == 'B') && + (s[1] >= '0' && s[1] <= '1')) { + c = s[1]; + s += 2; + base = 2; + } if (base == 0) base = c == '0' ? 8 : 10; acc = any = 0; diff --git a/lib/libc/stdlib/strtol.c b/lib/libc/stdlib/strtol.c --- a/lib/libc/stdlib/strtol.c +++ b/lib/libc/stdlib/strtol.c @@ -87,6 +87,13 @@ s += 2; base = 16; } + if ((base == 0 || base == 2) && + c == '0' && (*s == 'b' || *s == 'B') && + (s[1] >= '0' && s[1] <= '1')) { + c = s[1]; + s += 2; + base = 2; + } if (base == 0) base = c == '0' ? 8 : 10; acc = any = 0; diff --git a/lib/libc/stdlib/strtoll.c b/lib/libc/stdlib/strtoll.c --- a/lib/libc/stdlib/strtoll.c +++ b/lib/libc/stdlib/strtoll.c @@ -63,8 +63,9 @@ /* * Skip white space and pick up leading +/- sign if any. - * If base is 0, allow 0x for hex and 0 for octal, else - * assume decimal; if base is already 16, allow 0x. + * If base is 0, allow 0b for binary, 0x for hex, and 0 for + * octal, else assume decimal; if base is already 2, allow + * 0b; if base is already 16, allow 0x. */ s = nptr; do { @@ -87,6 +88,13 @@ s += 2; base = 16; } + if ((base == 0 || base == 2) && + c == '0' && (*s == 'b' || *s == 'B') && + (s[1] >= '0' && s[1] <= '1')) { + c = s[1]; + s += 2; + base = 2; + } if (base == 0) base = c == '0' ? 8 : 10; acc = any = 0; diff --git a/lib/libc/stdlib/strtoul.c b/lib/libc/stdlib/strtoul.c --- a/lib/libc/stdlib/strtoul.c +++ b/lib/libc/stdlib/strtoul.c @@ -84,6 +84,13 @@ s += 2; base = 16; } + if ((base == 0 || base == 2) && + c == '0' && (*s == 'b' || *s == 'B') && + (s[1] >= '0' && s[1] <= '1')) { + c = s[1]; + s += 2; + base = 2; + } if (base == 0) base = c == '0' ? 8 : 10; acc = any = 0; diff --git a/lib/libc/stdlib/strtoull.c b/lib/libc/stdlib/strtoull.c --- a/lib/libc/stdlib/strtoull.c +++ b/lib/libc/stdlib/strtoull.c @@ -85,6 +85,13 @@ s += 2; base = 16; } + if ((base == 0 || base == 2) && + c == '0' && (*s == 'b' || *s == 'B') && + (s[1] >= '0' && s[1] <= '1')) { + c = s[1]; + s += 2; + base = 2; + } if (base == 0) base = c == '0' ? 8 : 10; acc = any = 0; diff --git a/lib/libc/stdlib/strtoumax.c b/lib/libc/stdlib/strtoumax.c --- a/lib/libc/stdlib/strtoumax.c +++ b/lib/libc/stdlib/strtoumax.c @@ -85,6 +85,13 @@ s += 2; base = 16; } + if ((base == 0 || base == 2) && + c == '0' && (*s == 'b' || *s == 'B') && + (s[1] >= '0' && s[1] <= '1')) { + c = s[1]; + s += 2; + base = 2; + } if (base == 0) base = c == '0' ? 8 : 10; acc = any = 0;