diff --git a/lib/msun/ld128/s_cospil.c b/lib/msun/ld128/s_cospil.c index 71acc4485f7b..b21f879c3e84 100644 --- a/lib/msun/ld128/s_cospil.c +++ b/lib/msun/ld128/s_cospil.c @@ -1,112 +1,108 @@ /*- - * Copyright (c) 2017-2021 Steven G. Kargl + * Copyright (c) 2017-2023 Steven G. Kargl * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /* * See ../src/s_cospi.c for implementation details. */ +#include "fpmath.h" #include "math.h" #include "math_private.h" /* * pi_hi contains the leading 56 bits of a 169 bit approximation for pi. */ static const long double pi_hi = 3.14159265358979322702026593105983920e+00L, pi_lo = 1.14423774522196636802434264184180742e-17L; #include "k_cospil.h" #include "k_sinpil.h" volatile static const double vzero = 0; long double cospil(long double x) { - long double ax, c, xf; - uint32_t ix; + long double ai, ar, ax, c; ax = fabsl(x); if (ax <= 1) { if (ax < 0.25) { if (ax < 0x1p-60) { if ((int)x == 0) return (1); } return (__kernel_cospil(ax)); } if (ax < 0.5) c = __kernel_sinpil(0.5 - ax); else if (ax < 0.75) { if (ax == 0.5) return (0); c = -__kernel_sinpil(ax - 0.5); } else c = -__kernel_cospil(1 - ax); return (c); } if (ax < 0x1p112) { - /* Split x = n + r with 0 <= r < 1. */ - xf = (ax + 0x1p112L) - 0x1p112L; /* Integer part */ - ax -= xf; /* Remainder */ - if (ax < 0) { - ax += 1; - xf -= 1; - } + /* Split ax = ai + ar with 0 <= ar < 1. */ + FFLOORL128(ax, ai, ar); - if (ax < 0.5) { - if (ax < 0.25) - c = ax == 0 ? 1 : __kernel_cospil(ax); + if (ar < 0.5) { + if (ar < 0.25) + c = ar == 0 ? 1 : __kernel_cospil(ar); else - c = __kernel_sinpil(0.5 - ax); + c = __kernel_sinpil(0.5 - ar); } else { - if (ax < 0.75) { - if (ax == 0.5) + if (ar < 0.75) { + if (ar == 0.5) return (0); - c = -__kernel_sinpil(ax - 0.5); + c = -__kernel_sinpil(ar - 0.5); } else - c = -__kernel_cospil(1 - ax); + c = -__kernel_cospil(1 - ar); } - - if (xf > 0x1p64) - xf -= 0x1p64; - if (xf > 0x1p32) - xf -= 0x1p32; - ix = (uint32_t)xf; - return (ix & 1 ? -c : c); + return (fmodl(ai, 2.L) == 0 ? c : -c); } if (isinf(x) || isnan(x)) return (vzero / vzero); /* - * |x| >= 0x1p112 is always an even integer, so return 1. + * For |x| >= 0x1p113, it is always an even integer, so return 1. */ - return (1); + if (ax >= 0x1p113) + return (1); + /* + * For 0x1p112 <= |x| < 0x1p113 need to determine if x is an even + * or odd integer to return 1 or -1. + */ + + return (fmodl(ax, 2.L) == 0 ? 1 : -1); } diff --git a/lib/msun/ld128/s_sinpil.c b/lib/msun/ld128/s_sinpil.c index cdfa2bcac3ef..c8c205449557 100644 --- a/lib/msun/ld128/s_sinpil.c +++ b/lib/msun/ld128/s_sinpil.c @@ -1,121 +1,111 @@ /*- - * Copyright (c) 2017-2021 Steven G. Kargl + * Copyright (c) 2017-2023 Steven G. Kargl * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /* * See ../src/s_sinpi.c for implementation details. */ +#include "fpmath.h" #include "math.h" #include "math_private.h" /* * pi_hi contains the leading 56 bits of a 169 bit approximation for pi. */ static const long double pi_hi = 3.14159265358979322702026593105983920e+00L, pi_lo = 1.14423774522196636802434264184180742e-17L; #include "k_cospil.h" #include "k_sinpil.h" volatile static const double vzero = 0; long double sinpil(long double x) { - long double ax, hi, lo, s, xf, xhi, xlo; - uint32_t ix; + long double ai, ar, ax, hi, lo, s, xhi, xlo; ax = fabsl(x); if (ax < 1) { if (ax < 0.25) { if (ax < 0x1p-60) { if (x == 0) return (x); hi = (double)x; hi *= 0x1p113L; lo = x * 0x1p113L - hi; s = (pi_lo + pi_hi) * lo + pi_lo * lo + pi_hi * hi; return (s * 0x1p-113L); } s = __kernel_sinpil(ax); return (x < 0 ? -s : s); } if (ax < 0.5) s = __kernel_cospil(0.5 - ax); else if (ax < 0.75) s = __kernel_cospil(ax - 0.5); else s = __kernel_sinpil(1 - ax); return (x < 0 ? -s : s); } if (ax < 0x1p112) { - /* Split x = n + r with 0 <= r < 1. */ - xf = (ax + 0x1p112L) - 0x1p112L; /* Integer part */ - ax -= xf; /* Remainder */ - if (ax < 0) { - ax += 1; - xf -= 1; - } + /* Split ax = ai + ar with 0 <= ar < 1. */ + FFLOORL128(ax, ai, ar); - if (ax == 0) { + if (ar == 0) { s = 0; } else { - if (ax < 0.5) { - if (ax <= 0.25) - s = __kernel_sinpil(ax); + if (ar < 0.5) { + if (ar <= 0.25) + s = __kernel_sinpil(ar); else - s = __kernel_cospil(0.5 - ax); + s = __kernel_cospil(0.5 - ar); } else { - if (ax < 0.75) - s = __kernel_cospil(ax - 0.5); + if (ar < 0.75) + s = __kernel_cospil(ar - 0.5); else - s = __kernel_sinpil(1 - ax); + s = __kernel_sinpil(1 - ar); } - if (xf > 0x1p64) - xf -= 0x1p64; - if (xf > 0x1p32) - xf -= 0x1p32; - ix = (uint32_t)xf; - if (ix & 1) s = -s; + s = fmodl(ai, 2.L) == 0 ? s : -s; } return (x < 0 ? -s : s); } if (isinf(x) || isnan(x)) return (vzero / vzero); /* * |x| >= 0x1p112 is always an integer, so return +-0. */ return (copysignl(0, x)); } diff --git a/lib/msun/ld128/s_tanpil.c b/lib/msun/ld128/s_tanpil.c index 90f4aea5c629..2d253bb9f478 100644 --- a/lib/msun/ld128/s_tanpil.c +++ b/lib/msun/ld128/s_tanpil.c @@ -1,123 +1,122 @@ /*- - * Copyright (c) 2017-2021 Steven G. Kargl + * Copyright (c) 2017-2023 Steven G. Kargl * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /* * See ../src/s_tanpi.c for implementation details. */ +#include "fpmath.h" #include "math.h" #include "math_private.h" /* * pi_hi contains the leading 56 bits of a 169 bit approximation for pi. */ static const long double pi_hi = 3.14159265358979322702026593105983920e+00L, pi_lo = 1.14423774522196636802434264184180742e-17L; static inline long double __kernel_tanpil(long double x) { long double hi, lo, t; if (x < 0.25) { hi = (double)x; lo = x - hi; lo = lo * (pi_lo + pi_hi) + hi * pi_lo; hi *= pi_hi; _2sumF(hi, lo); t = __kernel_tanl(hi, lo, -1); } else if (x > 0.25) { x = 0.5 - x; hi = (double)x; lo = x - hi; lo = lo * (pi_lo + pi_hi) + hi * pi_lo; hi *= pi_hi; _2sumF(hi, lo); t = - __kernel_tanl(hi, lo, 1); } else t = 1; return (t); } volatile static const double vzero = 0; long double tanpil(long double x) { - long double ax, hi, lo, xf, t; - uint32_t ix; + long double ai, ar, ax, hi, lo, t; + double odd; ax = fabsl(x); if (ax < 1) { if (ax < 0.5) { if (ax < 0x1p-60) { if (x == 0) return (x); hi = (double)x; hi *= 0x1p113L; lo = x * 0x1p113L - hi; t = (pi_lo + pi_hi) * lo + pi_lo * lo + pi_hi * hi; return (t * 0x1p-113L); } t = __kernel_tanpil(ax); } else if (ax == 0.5) - return ((ax - ax) / (ax - ax)); + t = 1 / vzero; else t = -__kernel_tanpil(1 - ax); return (x < 0 ? -t : t); } - if (ix < 0x1p112) { - /* Split x = n + r with 0 <= r < 1. */ - xf = (ax + 0x1p112L) - 0x1p112L; /* Integer part */ - ax -= xf; /* Remainder */ - if (ax < 0) { - ax += 1; - xf -= 1; - } - - if (ax < 0.5) - t = ax == 0 ? 0 : __kernel_tanpil(ax); - else if (ax == 0.5) - return ((ax - ax) / (ax - ax)); + if (ax < 0x1p112) { + /* Split ax = ai + ar with 0 <= ar < 1. */ + FFLOORL128(ax, ai, ar); + odd = fmodl(ai, 2.L) == 0 ? 1 : -1; + if (ar < 0.5) + t = ar == 0 ? copysign(0., odd) : __kernel_tanpil(ar); + else if (ar == 0.5) + t = odd / vzero; else - t = -__kernel_tanpil(1 - ax); + t = -__kernel_tanpil(1 - ar); return (x < 0 ? -t : t); } /* x = +-inf or nan. */ if (isinf(x) || isnan(x)) return (vzero / vzero); /* - * |x| >= 0x1p112 is always an integer, so return +-0. + * For 0x1p112 <= |x| < 0x1p113 need to determine if x is an even + * or odd integer to set t = +0 or -0. + * For |x| >= 0x1p113, it is always an even integer, so t = 0. */ - return (copysignl(0, x)); + t = fmodl(ax,2.L) == 0 ? 0 : copysign(0., -1.); + return (copysignl(t, x)); } diff --git a/lib/msun/ld80/s_cospil.c b/lib/msun/ld80/s_cospil.c index 199479e9eaf9..69620d2f2f33 100644 --- a/lib/msun/ld80/s_cospil.c +++ b/lib/msun/ld80/s_cospil.c @@ -1,129 +1,121 @@ /*- - * Copyright (c) 2017 Steven G. Kargl + * Copyright (c) 2017, 2023 Steven G. Kargl * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /* * See ../src/s_cospi.c for implementation details. */ #ifdef __i386__ #include #endif #include #include "fpmath.h" #include "math.h" #include "math_private.h" static const double pi_hi = 3.1415926814079285e+00, /* 0x400921fb 0x58000000 */ pi_lo =-2.7818135228334233e-08; /* 0xbe5dde97 0x3dcb3b3a */ #include "k_cospil.h" #include "k_sinpil.h" volatile static const double vzero = 0; long double cospil(long double x) { long double ax, c; uint64_t lx, m; uint32_t j0; uint16_t hx, ix; EXTRACT_LDBL80_WORDS(hx, lx, x); ix = hx & 0x7fff; INSERT_LDBL80_WORDS(ax, ix, lx); ENTERI(); if (ix < 0x3fff) { /* |x| < 1 */ if (ix < 0x3ffd) { /* |x| < 0.25 */ if (ix < 0x3fdd) { /* |x| < 0x1p-34 */ if ((int)x == 0) RETURNI(1); } RETURNI(__kernel_cospil(ax)); } if (ix < 0x3ffe) /* |x| < 0.5 */ c = __kernel_sinpil(0.5 - ax); else if (lx < 0xc000000000000000ull) { /* |x| < 0.75 */ if (ax == 0.5) RETURNI(0); c = -__kernel_sinpil(ax - 0.5); } else c = -__kernel_cospil(1 - ax); RETURNI(c); } - if (ix < 0x403e) { /* 1 <= |x| < 0x1p63 */ - /* Determine integer part of ax. */ - j0 = ix - 0x3fff + 1; - if (j0 < 32) { - lx = (lx >> 32) << 32; - lx &= ~(((lx << 32)-1) >> j0); - } else { - m = (uint64_t)-1 >> (j0 + 1); - if (lx & m) lx &= ~m; - } - INSERT_LDBL80_WORDS(x, ix, lx); - + if (ix < 0x403e) { /* 1 <= |x| < 0x1p63 */ + FFLOORL80(x, j0, ix, lx); /* Integer part of ax. */ ax -= x; EXTRACT_LDBL80_WORDS(ix, lx, ax); if (ix < 0x3ffe) { /* |x| < 0.5 */ if (ix < 0x3ffd) /* |x| < 0.25 */ c = ix == 0 ? 1 : __kernel_cospil(ax); else c = __kernel_sinpil(0.5 - ax); } else { if (lx < 0xc000000000000000ull) { /* |x| < 0.75 */ if (ax == 0.5) RETURNI(0); c = -__kernel_sinpil(ax - 0.5); } else c = -__kernel_cospil(1 - ax); } if (j0 > 40) x -= 0x1p40; if (j0 > 30) x -= 0x1p30; j0 = (uint32_t)x; RETURNI(j0 & 1 ? -c : c); } if (ix >= 0x7fff) RETURNI(vzero / vzero); /* - * |x| >= 0x1p63 is always an even integer, so return 1. + * For 0x1p63 <= |x| < 0x1p64 need to determine if x is an even + * or odd integer to return t = +1 or -1. + * For |x| >= 0x1p64, it is always an even integer, so t = 1. */ - RETURNI(1); + RETURNI(ix >= 0x403f ? 1 : ((lx & 1) ? -1 : 1)); } diff --git a/lib/msun/ld80/s_sinpil.c b/lib/msun/ld80/s_sinpil.c index 4cefa92352e1..7d9008f9e18f 100644 --- a/lib/msun/ld80/s_sinpil.c +++ b/lib/msun/ld80/s_sinpil.c @@ -1,140 +1,130 @@ /*- - * Copyright (c) 2017 Steven G. Kargl + * Copyright (c) 2017, 2023 Steven G. Kargl * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /* * See ../src/s_sinpi.c for implementation details. */ #ifdef __i386__ #include #endif #include #include "fpmath.h" #include "math.h" #include "math_private.h" static const union IEEEl2bits pi_hi_u = LD80C(0xc90fdaa200000000, 1, 3.14159265346825122833e+00L), pi_lo_u = LD80C(0x85a308d313198a2e, -33, 1.21542010130123852029e-10L); #define pi_hi (pi_hi_u.e) #define pi_lo (pi_lo_u.e) #include "k_cospil.h" #include "k_sinpil.h" volatile static const double vzero = 0; long double sinpil(long double x) { long double ax, hi, lo, s; uint64_t lx, m; uint32_t j0; uint16_t hx, ix; EXTRACT_LDBL80_WORDS(hx, lx, x); ix = hx & 0x7fff; INSERT_LDBL80_WORDS(ax, ix, lx); ENTERI(); if (ix < 0x3fff) { /* |x| < 1 */ if (ix < 0x3ffd) { /* |x| < 0.25 */ if (ix < 0x3fdd) { /* |x| < 0x1p-34 */ if (x == 0) RETURNI(x); INSERT_LDBL80_WORDS(hi, hx, lx & 0xffffffff00000000ull); hi *= 0x1p63L; lo = x * 0x1p63L - hi; s = (pi_lo + pi_hi) * lo + pi_lo * hi + pi_hi * hi; RETURNI(s * 0x1p-63L); } s = __kernel_sinpil(ax); RETURNI((hx & 0x8000) ? -s : s); } if (ix < 0x3ffe) /* |x| < 0.5 */ s = __kernel_cospil(0.5 - ax); else if (lx < 0xc000000000000000ull) /* |x| < 0.75 */ s = __kernel_cospil(ax - 0.5); else s = __kernel_sinpil(1 - ax); RETURNI((hx & 0x8000) ? -s : s); } - if (ix < 0x403e) { /* 1 <= |x| < 0x1p63 */ - /* Determine integer part of ax. */ - j0 = ix - 0x3fff + 1; - if (j0 < 32) { - lx = (lx >> 32) << 32; - lx &= ~(((lx << 32)-1) >> j0); - } else { - m = (uint64_t)-1 >> (j0 + 1); - if (lx & m) lx &= ~m; - } - INSERT_LDBL80_WORDS(x, ix, lx); - + if (ix < 0x403e) { /* 1 <= |x| < 0x1p63 */ + FFLOORL80(x, j0, ix, lx); /* Integer part of ax. */ ax -= x; EXTRACT_LDBL80_WORDS(ix, lx, ax); if (ix == 0) { s = 0; } else { if (ix < 0x3ffe) { /* |x| < 0.5 */ if (ix < 0x3ffd) /* |x| < 0.25 */ s = __kernel_sinpil(ax); else s = __kernel_cospil(0.5 - ax); } else { /* |x| < 0.75 */ if (lx < 0xc000000000000000ull) s = __kernel_cospil(ax - 0.5); else s = __kernel_sinpil(1 - ax); } if (j0 > 40) x -= 0x1p40; if (j0 > 30) x -= 0x1p30; j0 = (uint32_t)x; if (j0 & 1) s = -s; } RETURNI((hx & 0x8000) ? -s : s); } /* x = +-inf or nan. */ if (ix >= 0x7fff) RETURNI(vzero / vzero); /* * |x| >= 0x1p63 is always an integer, so return +-0. */ RETURNI(copysignl(0, x)); } diff --git a/lib/msun/ld80/s_tanpil.c b/lib/msun/ld80/s_tanpil.c index 02451e562025..2d640413af6c 100644 --- a/lib/msun/ld80/s_tanpil.c +++ b/lib/msun/ld80/s_tanpil.c @@ -1,139 +1,133 @@ /*- - * Copyright (c) 2017 Steven G. Kargl + * Copyright (c) 2017, 2023 Steven G. Kargl * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /* * See ../src/s_tanpi.c for implementation details. */ #ifdef __i386__ #include #endif #include #include "fpmath.h" #include "math.h" #include "math_private.h" static const double pi_hi = 3.1415926814079285e+00, /* 0x400921fb 0x58000000 */ pi_lo = -2.7818135228334233e-08; /* 0xbe5dde97 0x3dcb3b3a */ static inline long double __kernel_tanpil(long double x) { long double hi, lo, t; if (x < 0.25) { hi = (float)x; lo = x - hi; lo = lo * (pi_lo + pi_hi) + hi * pi_lo; hi *= pi_hi; _2sumF(hi, lo); t = __kernel_tanl(hi, lo, -1); } else if (x > 0.25) { x = 0.5 - x; hi = (float)x; lo = x - hi; lo = lo * (pi_lo + pi_hi) + hi * pi_lo; hi *= pi_hi; _2sumF(hi, lo); t = - __kernel_tanl(hi, lo, 1); } else t = 1; return (t); } volatile static const double vzero = 0; long double tanpil(long double x) { - long double ax, hi, lo, t; + long double ax, hi, lo, odd, t; uint64_t lx, m; uint32_t j0; uint16_t hx, ix; EXTRACT_LDBL80_WORDS(hx, lx, x); ix = hx & 0x7fff; INSERT_LDBL80_WORDS(ax, ix, lx); ENTERI(); if (ix < 0x3fff) { /* |x| < 1 */ if (ix < 0x3ffe) { /* |x| < 0.5 */ if (ix < 0x3fdd) { /* |x| < 0x1p-34 */ if (x == 0) RETURNI(x); INSERT_LDBL80_WORDS(hi, hx, lx & 0xffffffff00000000ull); hi *= 0x1p63L; lo = x * 0x1p63L - hi; t = (pi_lo + pi_hi) * lo + pi_lo * hi + pi_hi * hi; RETURNI(t * 0x1p-63L); } t = __kernel_tanpil(ax); } else if (ax == 0.5) - RETURNI((ax - ax) / (ax - ax)); + t = 1 / vzero; else t = -__kernel_tanpil(1 - ax); RETURNI((hx & 0x8000) ? -t : t); } - if (ix < 0x403e) { /* 1 <= |x| < 0x1p63 */ - /* Determine integer part of ax. */ - j0 = ix - 0x3fff + 1; - if (j0 < 32) { - lx = (lx >> 32) << 32; - lx &= ~(((lx << 32)-1) >> j0); - } else { - m = (uint64_t)-1 >> (j0 + 1); - if (lx & m) lx &= ~m; - } - INSERT_LDBL80_WORDS(x, ix, lx); - + if (ix < 0x403e) { /* 1 <= |x| < 0x1p63 */ + FFLOORL80(x, j0, ix, lx); /* Integer part of ax. */ + odd = (uint64_t)x & 1 ? -1 : 1; ax -= x; EXTRACT_LDBL80_WORDS(ix, lx, ax); if (ix < 0x3ffe) /* |x| < 0.5 */ - t = ax == 0 ? 0 : __kernel_tanpil(ax); - else if (ax == 0.5) - RETURNI((ax - ax) / (ax - ax)); + t = ix == 0 ? copysignl(0, odd) : __kernel_tanpil(ax); + else if (ax == 0.5L) + t = odd / vzero; else t = -__kernel_tanpil(1 - ax); RETURNI((hx & 0x8000) ? -t : t); } /* x = +-inf or nan. */ if (ix >= 0x7fff) RETURNI(vzero / vzero); /* - * |x| >= 0x1p63 is always an integer, so return +-0. + * For 0x1p63 <= |x| < 0x1p64 need to determine if x is an even + * or odd integer to set t = +0 or -0. + * For |x| >= 0x1p64, it is always an even integer, so t = 0. */ - RETURNI(copysignl(0, x)); + t = ix >= 0x403f ? 0 : (copysignl(0, (lx & 1) ? -1 : 1)); + RETURNI((hx & 0x8000) ? -t : t); } diff --git a/lib/msun/src/math_private.h b/lib/msun/src/math_private.h index a853ad4f9b4c..63aa3f603a5e 100644 --- a/lib/msun/src/math_private.h +++ b/lib/msun/src/math_private.h @@ -1,819 +1,872 @@ /* * ==================================================== * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. * * Developed at SunPro, a Sun Microsystems, Inc. business. * Permission to use, copy, modify, and distribute this * software is freely granted, provided that this notice * is preserved. * ==================================================== */ /* * from: @(#)fdlibm.h 5.1 93/09/24 * $FreeBSD$ */ #ifndef _MATH_PRIVATE_H_ #define _MATH_PRIVATE_H_ #include #include /* * The original fdlibm code used statements like: * n0 = ((*(int*)&one)>>29)^1; * index of high word * * ix0 = *(n0+(int*)&x); * high word of x * * ix1 = *((1-n0)+(int*)&x); * low word of x * * to dig two 32 bit words out of the 64 bit IEEE floating point * value. That is non-ANSI, and, moreover, the gcc instruction * scheduler gets it wrong. We instead use the following macros. * Unlike the original code, we determine the endianness at compile * time, not at run time; I don't see much benefit to selecting * endianness at run time. */ /* * A union which permits us to convert between a double and two 32 bit * ints. */ #ifdef __arm__ #if defined(__VFP_FP__) || defined(__ARM_EABI__) #define IEEE_WORD_ORDER BYTE_ORDER #else #define IEEE_WORD_ORDER BIG_ENDIAN #endif #else /* __arm__ */ #define IEEE_WORD_ORDER BYTE_ORDER #endif /* A union which permits us to convert between a long double and four 32 bit ints. */ #if IEEE_WORD_ORDER == BIG_ENDIAN typedef union { long double value; struct { u_int32_t mswhi; u_int32_t mswlo; u_int32_t lswhi; u_int32_t lswlo; } parts32; struct { u_int64_t msw; u_int64_t lsw; } parts64; } ieee_quad_shape_type; #endif #if IEEE_WORD_ORDER == LITTLE_ENDIAN typedef union { long double value; struct { u_int32_t lswlo; u_int32_t lswhi; u_int32_t mswlo; u_int32_t mswhi; } parts32; struct { u_int64_t lsw; u_int64_t msw; } parts64; } ieee_quad_shape_type; #endif #if IEEE_WORD_ORDER == BIG_ENDIAN typedef union { double value; struct { u_int32_t msw; u_int32_t lsw; } parts; struct { u_int64_t w; } xparts; } ieee_double_shape_type; #endif #if IEEE_WORD_ORDER == LITTLE_ENDIAN typedef union { double value; struct { u_int32_t lsw; u_int32_t msw; } parts; struct { u_int64_t w; } xparts; } ieee_double_shape_type; #endif /* Get two 32 bit ints from a double. */ #define EXTRACT_WORDS(ix0,ix1,d) \ do { \ ieee_double_shape_type ew_u; \ ew_u.value = (d); \ (ix0) = ew_u.parts.msw; \ (ix1) = ew_u.parts.lsw; \ } while (0) /* Get a 64-bit int from a double. */ #define EXTRACT_WORD64(ix,d) \ do { \ ieee_double_shape_type ew_u; \ ew_u.value = (d); \ (ix) = ew_u.xparts.w; \ } while (0) /* Get the more significant 32 bit int from a double. */ #define GET_HIGH_WORD(i,d) \ do { \ ieee_double_shape_type gh_u; \ gh_u.value = (d); \ (i) = gh_u.parts.msw; \ } while (0) /* Get the less significant 32 bit int from a double. */ #define GET_LOW_WORD(i,d) \ do { \ ieee_double_shape_type gl_u; \ gl_u.value = (d); \ (i) = gl_u.parts.lsw; \ } while (0) /* Set a double from two 32 bit ints. */ #define INSERT_WORDS(d,ix0,ix1) \ do { \ ieee_double_shape_type iw_u; \ iw_u.parts.msw = (ix0); \ iw_u.parts.lsw = (ix1); \ (d) = iw_u.value; \ } while (0) /* Set a double from a 64-bit int. */ #define INSERT_WORD64(d,ix) \ do { \ ieee_double_shape_type iw_u; \ iw_u.xparts.w = (ix); \ (d) = iw_u.value; \ } while (0) /* Set the more significant 32 bits of a double from an int. */ #define SET_HIGH_WORD(d,v) \ do { \ ieee_double_shape_type sh_u; \ sh_u.value = (d); \ sh_u.parts.msw = (v); \ (d) = sh_u.value; \ } while (0) /* Set the less significant 32 bits of a double from an int. */ #define SET_LOW_WORD(d,v) \ do { \ ieee_double_shape_type sl_u; \ sl_u.value = (d); \ sl_u.parts.lsw = (v); \ (d) = sl_u.value; \ } while (0) /* * A union which permits us to convert between a float and a 32 bit * int. */ typedef union { float value; /* FIXME: Assumes 32 bit int. */ unsigned int word; } ieee_float_shape_type; /* Get a 32 bit int from a float. */ #define GET_FLOAT_WORD(i,d) \ do { \ ieee_float_shape_type gf_u; \ gf_u.value = (d); \ (i) = gf_u.word; \ } while (0) /* Set a float from a 32 bit int. */ #define SET_FLOAT_WORD(d,i) \ do { \ ieee_float_shape_type sf_u; \ sf_u.word = (i); \ (d) = sf_u.value; \ } while (0) /* * Get expsign and mantissa as 16 bit and 64 bit ints from an 80 bit long * double. */ #define EXTRACT_LDBL80_WORDS(ix0,ix1,d) \ do { \ union IEEEl2bits ew_u; \ ew_u.e = (d); \ (ix0) = ew_u.xbits.expsign; \ (ix1) = ew_u.xbits.man; \ } while (0) /* * Get expsign and mantissa as one 16 bit and two 64 bit ints from a 128 bit * long double. */ #define EXTRACT_LDBL128_WORDS(ix0,ix1,ix2,d) \ do { \ union IEEEl2bits ew_u; \ ew_u.e = (d); \ (ix0) = ew_u.xbits.expsign; \ (ix1) = ew_u.xbits.manh; \ (ix2) = ew_u.xbits.manl; \ } while (0) /* Get expsign as a 16 bit int from a long double. */ #define GET_LDBL_EXPSIGN(i,d) \ do { \ union IEEEl2bits ge_u; \ ge_u.e = (d); \ (i) = ge_u.xbits.expsign; \ } while (0) /* * Set an 80 bit long double from a 16 bit int expsign and a 64 bit int * mantissa. */ #define INSERT_LDBL80_WORDS(d,ix0,ix1) \ do { \ union IEEEl2bits iw_u; \ iw_u.xbits.expsign = (ix0); \ iw_u.xbits.man = (ix1); \ (d) = iw_u.e; \ } while (0) /* * Set a 128 bit long double from a 16 bit int expsign and two 64 bit ints * comprising the mantissa. */ #define INSERT_LDBL128_WORDS(d,ix0,ix1,ix2) \ do { \ union IEEEl2bits iw_u; \ iw_u.xbits.expsign = (ix0); \ iw_u.xbits.manh = (ix1); \ iw_u.xbits.manl = (ix2); \ (d) = iw_u.e; \ } while (0) /* Set expsign of a long double from a 16 bit int. */ #define SET_LDBL_EXPSIGN(d,v) \ do { \ union IEEEl2bits se_u; \ se_u.e = (d); \ se_u.xbits.expsign = (v); \ (d) = se_u.e; \ } while (0) #ifdef __i386__ /* Long double constants are broken on i386. */ #define LD80C(m, ex, v) { \ .xbits.man = __CONCAT(m, ULL), \ .xbits.expsign = (0x3fff + (ex)) | ((v) < 0 ? 0x8000 : 0), \ } #else /* The above works on non-i386 too, but we use this to check v. */ #define LD80C(m, ex, v) { .e = (v), } #endif #ifdef FLT_EVAL_METHOD /* * Attempt to get strict C99 semantics for assignment with non-C99 compilers. */ #if FLT_EVAL_METHOD == 0 || __GNUC__ == 0 #define STRICT_ASSIGN(type, lval, rval) ((lval) = (rval)) #else #define STRICT_ASSIGN(type, lval, rval) do { \ volatile type __lval; \ \ if (sizeof(type) >= sizeof(long double)) \ (lval) = (rval); \ else { \ __lval = (rval); \ (lval) = __lval; \ } \ } while (0) #endif #endif /* FLT_EVAL_METHOD */ /* Support switching the mode to FP_PE if necessary. */ #if defined(__i386__) && !defined(NO_FPSETPREC) #define ENTERI() ENTERIT(long double) #define ENTERIT(returntype) \ returntype __retval; \ fp_prec_t __oprec; \ \ if ((__oprec = fpgetprec()) != FP_PE) \ fpsetprec(FP_PE) #define RETURNI(x) do { \ __retval = (x); \ if (__oprec != FP_PE) \ fpsetprec(__oprec); \ RETURNF(__retval); \ } while (0) #define ENTERV() \ fp_prec_t __oprec; \ \ if ((__oprec = fpgetprec()) != FP_PE) \ fpsetprec(FP_PE) #define RETURNV() do { \ if (__oprec != FP_PE) \ fpsetprec(__oprec); \ return; \ } while (0) #else #define ENTERI() #define ENTERIT(x) #define RETURNI(x) RETURNF(x) #define ENTERV() #define RETURNV() return #endif /* Default return statement if hack*_t() is not used. */ #define RETURNF(v) return (v) /* * 2sum gives the same result as 2sumF without requiring |a| >= |b| or * a == 0, but is slower. */ #define _2sum(a, b) do { \ __typeof(a) __s, __w; \ \ __w = (a) + (b); \ __s = __w - (a); \ (b) = ((a) - (__w - __s)) + ((b) - __s); \ (a) = __w; \ } while (0) /* * 2sumF algorithm. * * "Normalize" the terms in the infinite-precision expression a + b for * the sum of 2 floating point values so that b is as small as possible * relative to 'a'. (The resulting 'a' is the value of the expression in * the same precision as 'a' and the resulting b is the rounding error.) * |a| must be >= |b| or 0, b's type must be no larger than 'a's type, and * exponent overflow or underflow must not occur. This uses a Theorem of * Dekker (1971). See Knuth (1981) 4.2.2 Theorem C. The name "TwoSum" * is apparently due to Skewchuk (1997). * * For this to always work, assignment of a + b to 'a' must not retain any * extra precision in a + b. This is required by C standards but broken * in many compilers. The brokenness cannot be worked around using * STRICT_ASSIGN() like we do elsewhere, since the efficiency of this * algorithm would be destroyed by non-null strict assignments. (The * compilers are correct to be broken -- the efficiency of all floating * point code calculations would be destroyed similarly if they forced the * conversions.) * * Fortunately, a case that works well can usually be arranged by building * any extra precision into the type of 'a' -- 'a' should have type float_t, * double_t or long double. b's type should be no larger than 'a's type. * Callers should use these types with scopes as large as possible, to * reduce their own extra-precision and efficiciency problems. In * particular, they shouldn't convert back and forth just to call here. */ #ifdef DEBUG #define _2sumF(a, b) do { \ __typeof(a) __w; \ volatile __typeof(a) __ia, __ib, __r, __vw; \ \ __ia = (a); \ __ib = (b); \ assert(__ia == 0 || fabsl(__ia) >= fabsl(__ib)); \ \ __w = (a) + (b); \ (b) = ((a) - __w) + (b); \ (a) = __w; \ \ /* The next 2 assertions are weak if (a) is already long double. */ \ assert((long double)__ia + __ib == (long double)(a) + (b)); \ __vw = __ia + __ib; \ __r = __ia - __vw; \ __r += __ib; \ assert(__vw == (a) && __r == (b)); \ } while (0) #else /* !DEBUG */ #define _2sumF(a, b) do { \ __typeof(a) __w; \ \ __w = (a) + (b); \ (b) = ((a) - __w) + (b); \ (a) = __w; \ } while (0) #endif /* DEBUG */ /* * Set x += c, where x is represented in extra precision as a + b. * x must be sufficiently normalized and sufficiently larger than c, * and the result is then sufficiently normalized. * * The details of ordering are that |a| must be >= |c| (so that (a, c) * can be normalized without extra work to swap 'a' with c). The details of * the normalization are that b must be small relative to the normalized 'a'. * Normalization of (a, c) makes the normalized c tiny relative to the * normalized a, so b remains small relative to 'a' in the result. However, * b need not ever be tiny relative to 'a'. For example, b might be about * 2**20 times smaller than 'a' to give about 20 extra bits of precision. * That is usually enough, and adding c (which by normalization is about * 2**53 times smaller than a) cannot change b significantly. However, * cancellation of 'a' with c in normalization of (a, c) may reduce 'a' * significantly relative to b. The caller must ensure that significant * cancellation doesn't occur, either by having c of the same sign as 'a', * or by having |c| a few percent smaller than |a|. Pre-normalization of * (a, b) may help. * * This is a variant of an algorithm of Kahan (see Knuth (1981) 4.2.2 * exercise 19). We gain considerable efficiency by requiring the terms to * be sufficiently normalized and sufficiently increasing. */ #define _3sumF(a, b, c) do { \ __typeof(a) __tmp; \ \ __tmp = (c); \ _2sumF(__tmp, (a)); \ (b) += (a); \ (a) = __tmp; \ } while (0) /* * Common routine to process the arguments to nan(), nanf(), and nanl(). */ void _scan_nan(uint32_t *__words, int __num_words, const char *__s); /* * Mix 0, 1 or 2 NaNs. First add 0 to each arg. This normally just turns * signaling NaNs into quiet NaNs by setting a quiet bit. We do this * because we want to never return a signaling NaN, and also because we * don't want the quiet bit to affect the result. Then mix the converted * args using the specified operation. * * When one arg is NaN, the result is typically that arg quieted. When both * args are NaNs, the result is typically the quietening of the arg whose * mantissa is largest after quietening. When neither arg is NaN, the * result may be NaN because it is indeterminate, or finite for subsequent * construction of a NaN as the indeterminate 0.0L/0.0L. * * Technical complications: the result in bits after rounding to the final * precision might depend on the runtime precision and/or on compiler * optimizations, especially when different register sets are used for * different precisions. Try to make the result not depend on at least the * runtime precision by always doing the main mixing step in long double * precision. Try to reduce dependencies on optimizations by adding the * the 0's in different precisions (unless everything is in long double * precision). */ #define nan_mix(x, y) (nan_mix_op((x), (y), +)) #define nan_mix_op(x, y, op) (((x) + 0.0L) op ((y) + 0)) #ifdef _COMPLEX_H /* * C99 specifies that complex numbers have the same representation as * an array of two elements, where the first element is the real part * and the second element is the imaginary part. */ typedef union { float complex f; float a[2]; } float_complex; typedef union { double complex f; double a[2]; } double_complex; typedef union { long double complex f; long double a[2]; } long_double_complex; #define REALPART(z) ((z).a[0]) #define IMAGPART(z) ((z).a[1]) /* * Inline functions that can be used to construct complex values. * * The C99 standard intends x+I*y to be used for this, but x+I*y is * currently unusable in general since gcc introduces many overflow, * underflow, sign and efficiency bugs by rewriting I*y as * (0.0+I)*(y+0.0*I) and laboriously computing the full complex product. * In particular, I*Inf is corrupted to NaN+I*Inf, and I*-0 is corrupted * to -0.0+I*0.0. * * The C11 standard introduced the macros CMPLX(), CMPLXF() and CMPLXL() * to construct complex values. Compilers that conform to the C99 * standard require the following functions to avoid the above issues. */ #ifndef CMPLXF static __inline float complex CMPLXF(float x, float y) { float_complex z; REALPART(z) = x; IMAGPART(z) = y; return (z.f); } #endif #ifndef CMPLX static __inline double complex CMPLX(double x, double y) { double_complex z; REALPART(z) = x; IMAGPART(z) = y; return (z.f); } #endif #ifndef CMPLXL static __inline long double complex CMPLXL(long double x, long double y) { long_double_complex z; REALPART(z) = x; IMAGPART(z) = y; return (z.f); } #endif #endif /* _COMPLEX_H */ /* * The rnint() family rounds to the nearest integer for a restricted range * range of args (up to about 2**MANT_DIG). We assume that the current * rounding mode is FE_TONEAREST so that this can be done efficiently. * Extra precision causes more problems in practice, and we only centralize * this here to reduce those problems, and have not solved the efficiency * problems. The exp2() family uses a more delicate version of this that * requires extracting bits from the intermediate value, so it is not * centralized here and should copy any solution of the efficiency problems. */ static inline double rnint(__double_t x) { /* * This casts to double to kill any extra precision. This depends * on the cast being applied to a double_t to avoid compiler bugs * (this is a cleaner version of STRICT_ASSIGN()). This is * inefficient if there actually is extra precision, but is hard * to improve on. We use double_t in the API to minimise conversions * for just calling here. Note that we cannot easily change the * magic number to the one that works directly with double_t, since * the rounding precision is variable at runtime on x86 so the * magic number would need to be variable. Assuming that the * rounding precision is always the default is too fragile. This * and many other complications will move when the default is * changed to FP_PE. */ return ((double)(x + 0x1.8p52) - 0x1.8p52); } static inline float rnintf(__float_t x) { /* * As for rnint(), except we could just call that to handle the * extra precision case, usually without losing efficiency. */ return ((float)(x + 0x1.8p23F) - 0x1.8p23F); } #ifdef LDBL_MANT_DIG /* * The complications for extra precision are smaller for rnintl() since it * can safely assume that the rounding precision has been increased from * its default to FP_PE on x86. We don't exploit that here to get small * optimizations from limiting the range to double. We just need it for * the magic number to work with long doubles. ld128 callers should use * rnint() instead of this if possible. ld80 callers should prefer * rnintl() since for amd64 this avoids swapping the register set, while * for i386 it makes no difference (assuming FP_PE), and for other arches * it makes little difference. */ static inline long double rnintl(long double x) { return (x + __CONCAT(0x1.8p, LDBL_MANT_DIG) / 2 - __CONCAT(0x1.8p, LDBL_MANT_DIG) / 2); } #endif /* LDBL_MANT_DIG */ /* * irint() and i64rint() give the same result as casting to their integer * return type provided their arg is a floating point integer. They can * sometimes be more efficient because no rounding is required. */ #if defined(amd64) || defined(__i386__) #define irint(x) \ (sizeof(x) == sizeof(float) && \ sizeof(__float_t) == sizeof(long double) ? irintf(x) : \ sizeof(x) == sizeof(double) && \ sizeof(__double_t) == sizeof(long double) ? irintd(x) : \ sizeof(x) == sizeof(long double) ? irintl(x) : (int)(x)) #else #define irint(x) ((int)(x)) #endif #define i64rint(x) ((int64_t)(x)) /* only needed for ld128 so not opt. */ #if defined(__i386__) static __inline int irintf(float x) { int n; __asm("fistl %0" : "=m" (n) : "t" (x)); return (n); } static __inline int irintd(double x) { int n; __asm("fistl %0" : "=m" (n) : "t" (x)); return (n); } #endif #if defined(__amd64__) || defined(__i386__) static __inline int irintl(long double x) { int n; __asm("fistl %0" : "=m" (n) : "t" (x)); return (n); } #endif +/* + * The following are fast floor macros for 0 <= |x| < 0x1p(N-1), where + * N is the precision of the type of x. These macros are used in the + * half-cycle trignometric functions (e.g., sinpi(x)). + */ +#define FFLOORF(x, j0, ix) do { \ + (j0) = (((ix) >> 23) & 0xff) - 0x7f; \ + (ix) &= ~(0x007fffff >> (j0)); \ + SET_FLOAT_WORD((x), (ix)); \ +} while (0) + +#define FFLOOR(x, j0, ix, lx) do { \ + (j0) = (((ix) >> 20) & 0x7ff) - 0x3ff; \ + if ((j0) < 20) { \ + (ix) &= ~(0x000fffff >> (j0)); \ + (lx) = 0; \ + } else { \ + (lx) &= ~((uint32_t)0xffffffff >> ((j0) - 20)); \ + } \ + INSERT_WORDS((x), (ix), (lx)); \ +} while (0) + +#define FFLOORL80(x, j0, ix, lx) do { \ + j0 = ix - 0x3fff + 1; \ + if ((j0) < 32) { \ + (lx) = ((lx) >> 32) << 32; \ + (lx) &= ~((((lx) << 32)-1) >> (j0)); \ + } else { \ + uint64_t _m; \ + _m = (uint64_t)-1 >> (j0); \ + if ((lx) & _m) (lx) &= ~_m; \ + } \ + INSERT_LDBL80_WORDS((x), (ix), (lx)); \ +} while (0) + +#define FFLOORL128(x, ai, ar) do { \ + union IEEEl2bits u; \ + uint64_t m; \ + int e; \ + u.e = (x); \ + e = u.bits.exp - 16383; \ + if (e < 48) { \ + m = ((1llu << 49) - 1) >> (e + 1); \ + u.bits.manh &= ~m; \ + u.bits.manl = 0; \ + } else { \ + m = (uint64_t)-1 >> (e - 48); \ + u.bits.manl &= ~m; \ + } \ + (ai) = u.e; \ + (ar) = (x) - (ai); \ +} while (0) + #ifdef DEBUG #if defined(__amd64__) || defined(__i386__) #define breakpoint() asm("int $3") #else #include #define breakpoint() raise(SIGTRAP) #endif #endif #ifdef STRUCT_RETURN #define RETURNSP(rp) do { \ if (!(rp)->lo_set) \ RETURNF((rp)->hi); \ RETURNF((rp)->hi + (rp)->lo); \ } while (0) #define RETURNSPI(rp) do { \ if (!(rp)->lo_set) \ RETURNI((rp)->hi); \ RETURNI((rp)->hi + (rp)->lo); \ } while (0) #endif #define SUM2P(x, y) ({ \ const __typeof (x) __x = (x); \ const __typeof (y) __y = (y); \ __x + __y; \ }) /* * ieee style elementary functions * * We rename functions here to improve other sources' diffability * against fdlibm. */ #define __ieee754_sqrt sqrt #define __ieee754_acos acos #define __ieee754_acosh acosh #define __ieee754_log log #define __ieee754_log2 log2 #define __ieee754_atanh atanh #define __ieee754_asin asin #define __ieee754_atan2 atan2 #define __ieee754_exp exp #define __ieee754_cosh cosh #define __ieee754_fmod fmod #define __ieee754_pow pow #define __ieee754_lgamma lgamma #define __ieee754_gamma gamma #define __ieee754_lgamma_r lgamma_r #define __ieee754_gamma_r gamma_r #define __ieee754_log10 log10 #define __ieee754_sinh sinh #define __ieee754_hypot hypot #define __ieee754_j0 j0 #define __ieee754_j1 j1 #define __ieee754_y0 y0 #define __ieee754_y1 y1 #define __ieee754_jn jn #define __ieee754_yn yn #define __ieee754_remainder remainder #define __ieee754_scalb scalb #define __ieee754_sqrtf sqrtf #define __ieee754_acosf acosf #define __ieee754_acoshf acoshf #define __ieee754_logf logf #define __ieee754_atanhf atanhf #define __ieee754_asinf asinf #define __ieee754_atan2f atan2f #define __ieee754_expf expf #define __ieee754_coshf coshf #define __ieee754_fmodf fmodf #define __ieee754_powf powf #define __ieee754_lgammaf lgammaf #define __ieee754_gammaf gammaf #define __ieee754_lgammaf_r lgammaf_r #define __ieee754_gammaf_r gammaf_r #define __ieee754_log10f log10f #define __ieee754_log2f log2f #define __ieee754_sinhf sinhf #define __ieee754_hypotf hypotf #define __ieee754_j0f j0f #define __ieee754_j1f j1f #define __ieee754_y0f y0f #define __ieee754_y1f y1f #define __ieee754_jnf jnf #define __ieee754_ynf ynf #define __ieee754_remainderf remainderf #define __ieee754_scalbf scalbf /* fdlibm kernel function */ int __kernel_rem_pio2(double*,double*,int,int,int); /* double precision kernel functions */ #ifndef INLINE_REM_PIO2 int __ieee754_rem_pio2(double,double*); #endif double __kernel_sin(double,double,int); double __kernel_cos(double,double); double __kernel_tan(double,double,int); double __ldexp_exp(double,int); #ifdef _COMPLEX_H double complex __ldexp_cexp(double complex,int); #endif /* float precision kernel functions */ #ifndef INLINE_REM_PIO2F int __ieee754_rem_pio2f(float,double*); #endif #ifndef INLINE_KERNEL_SINDF float __kernel_sindf(double); #endif #ifndef INLINE_KERNEL_COSDF float __kernel_cosdf(double); #endif #ifndef INLINE_KERNEL_TANDF float __kernel_tandf(double,int); #endif float __ldexp_expf(float,int); #ifdef _COMPLEX_H float complex __ldexp_cexpf(float complex,int); #endif /* long double precision kernel functions */ long double __kernel_sinl(long double, long double, int); long double __kernel_cosl(long double, long double); long double __kernel_tanl(long double, long double, int); #endif /* !_MATH_PRIVATE_H_ */ diff --git a/lib/msun/src/s_cospi.c b/lib/msun/src/s_cospi.c index 2e2f92733a86..f97570dc8792 100644 --- a/lib/msun/src/s_cospi.c +++ b/lib/msun/src/s_cospi.c @@ -1,153 +1,145 @@ /*- - * Copyright (c) 2017 Steven G. Kargl + * Copyright (c) 2017, 2023 Steven G. Kargl * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /** * cospi(x) computes cos(pi*x) without multiplication by pi (almost). First, * note that cospi(-x) = cospi(x), so the algorithm considers only |x|. The * method used depends on the magnitude of x. * * 1. For small |x|, cospi(x) = 1 with FE_INEXACT raised where a sloppy * threshold is used. The threshold is |x| < 0x1pN with N = -(P/2+M). * P is the precision of the floating-point type and M = 2 to 4. * * 2. For |x| < 1, argument reduction is not required and sinpi(x) is * computed by calling a kernel that leverages the kernels for sin(x) * ans cos(x). See k_sinpi.c and k_cospi.c for details. * * 3. For 1 <= |x| < 0x1p(P-1), argument reduction is required where * |x| = j0 + r with j0 an integer and the remainder r satisfies * 0 <= r < 1. With the given domain, a simplified inline floor(x) * is used. Also, note the following identity * * cospi(x) = cos(pi*(j0+r)) * = cos(pi*j0) * cos(pi*r) - sin(pi*j0) * sin(pi*r) * = cos(pi*j0) * cos(pi*r) * = +-cospi(r) * * If j0 is even, then cos(pi*j0) = 1. If j0 is odd, then cos(pi*j0) = -1. * cospi(r) is then computed via an appropriate kernel. * * 4. For |x| >= 0x1p(P-1), |x| is integral and cospi(x) = 1. * * 5. Special cases: * * cospi(+-0) = 1. * cospi(n.5) = 0 for n an integer. * cospi(+-inf) = nan. Raises the "invalid" floating-point exception. * cospi(nan) = nan. Raises the "invalid" floating-point exception. */ #include #include "math.h" #include "math_private.h" static const double pi_hi = 3.1415926814079285e+00, /* 0x400921fb 0x58000000 */ pi_lo =-2.7818135228334233e-08; /* 0xbe5dde97 0x3dcb3b3a */ #include "k_cospi.h" #include "k_sinpi.h" volatile static const double vzero = 0; double cospi(double x) { double ax, c; uint32_t hx, ix, j0, lx; EXTRACT_WORDS(hx, lx, x); ix = hx & 0x7fffffff; INSERT_WORDS(ax, ix, lx); if (ix < 0x3ff00000) { /* |x| < 1 */ if (ix < 0x3fd00000) { /* |x| < 0.25 */ if (ix < 0x3e200000) { /* |x| < 0x1p-29 */ if ((int)ax == 0) return (1); } return (__kernel_cospi(ax)); } if (ix < 0x3fe00000) /* |x| < 0.5 */ c = __kernel_sinpi(0.5 - ax); else if (ix < 0x3fe80000){ /* |x| < 0.75 */ if (ax == 0.5) return (0); c = -__kernel_sinpi(ax - 0.5); } else c = -__kernel_cospi(1 - ax); return (c); } if (ix < 0x43300000) { /* 1 <= |x| < 0x1p52 */ - /* Determine integer part of ax. */ - j0 = ((ix >> 20) & 0x7ff) - 0x3ff; - if (j0 < 20) { - ix &= ~(0x000fffff >> j0); - lx = 0; - } else { - lx &= ~((uint32_t)0xffffffff >> (j0 - 20)); - } - INSERT_WORDS(x, ix, lx); - + FFLOOR(x, j0, ix, lx); /* Integer part of ax. */ ax -= x; EXTRACT_WORDS(ix, lx, ax); - if (ix < 0x3fe00000) { /* |x| < 0.5 */ if (ix < 0x3fd00000) /* |x| < 0.25 */ c = ix == 0 ? 1 : __kernel_cospi(ax); else c = __kernel_sinpi(0.5 - ax); } else { if (ix < 0x3fe80000) { /* |x| < 0.75 */ if (ax == 0.5) return (0); c = -__kernel_sinpi(ax - 0.5); } else c = -__kernel_cospi(1 - ax); } if (j0 > 30) x -= 0x1p30; j0 = (uint32_t)x; return (j0 & 1 ? -c : c); } /* x = +-inf or nan. */ if (ix >= 0x7ff00000) return (vzero / vzero); /* - * |x| >= 0x1p52 is always an even integer, so return 1. + * For 0x1p52 <= |x| < 0x1p53 need to determine if x is an even + * or odd integer to return +1 or -1. + * For |x| >= 0x1p53, it is always an even integer, so return 1. */ - return (1); + return (ix < 0x43400000 ? ((lx & 1) ? -1 : 1) : 1); } #if LDBL_MANT_DIG == 53 __weak_reference(cospi, cospil); #endif diff --git a/lib/msun/src/s_cospif.c b/lib/msun/src/s_cospif.c index 4dd881395baf..44d19f165025 100644 --- a/lib/msun/src/s_cospif.c +++ b/lib/msun/src/s_cospif.c @@ -1,109 +1,107 @@ /*- - * Copyright (c) 2017 Steven G. Kargl + * Copyright (c) 2017,2023 Steven G. Kargl * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /* * See ../src/s_cospi.c for implementation details. */ #define INLINE_KERNEL_SINDF #define INLINE_KERNEL_COSDF #include "math.h" #include "math_private.h" #include "k_cosf.c" #include "k_sinf.c" #define __kernel_cospif(x) (__kernel_cosdf(M_PI * (x))) #define __kernel_sinpif(x) (__kernel_sindf(M_PI * (x))) volatile static const float vzero = 0; float cospif(float x) { float ax, c; uint32_t ix, j0; GET_FLOAT_WORD(ix, x); ix = ix & 0x7fffffff; SET_FLOAT_WORD(ax, ix); if (ix < 0x3f800000) { /* |x| < 1 */ if (ix < 0x3e800000) { /* |x| < 0.25 */ if (ix < 0x38800000) { /* |x| < 0x1p-14 */ /* Raise inexact iff != 0. */ if ((int)ax == 0) return (1); } return (__kernel_cospif(ax)); } if (ix < 0x3f000000) /* |x| < 0.5 */ c = __kernel_sinpif(0.5F - ax); else if (ix < 0x3f400000) { /* |x| < 0.75 */ if (ix == 0x3f000000) return (0); c = -__kernel_sinpif(ax - 0.5F); } else c = -__kernel_cospif(1 - ax); return (c); } - if (ix < 0x4b000000) { /* 1 <= |x| < 0x1p23 */ - /* Determine integer part of ax. */ - j0 = ((ix >> 23) & 0xff) - 0x7f; - ix &= ~(0x007fffff >> j0); - SET_FLOAT_WORD(x, ix); - + if (ix < 0x4b000000) { /* 1 <= |x| < 0x1p23 */ + FFLOORF(x, j0, ix); /* Integer part of ax. */ ax -= x; GET_FLOAT_WORD(ix, ax); if (ix < 0x3f000000) { /* |x| < 0.5 */ if (ix < 0x3e800000) /* |x| < 0.25 */ c = ix == 0 ? 1 : __kernel_cospif(ax); else c = __kernel_sinpif(0.5F - ax); } else { if (ix < 0x3f400000) { /* |x| < 0.75 */ if (ix == 0x3f000000) return (0); c = -__kernel_sinpif(ax - 0.5F); } else c = -__kernel_cospif(1 - ax); } j0 = (uint32_t)x; return (j0 & 1 ? -c : c); } /* x = +-inf or nan. */ if (ix >= 0x7f800000) return (vzero / vzero); /* - * |x| >= 0x1p23 is always an even integer, so return 1. + * For 0x1p23 <= |x| < 0x1p24 need to determine if x is an even + * or odd integer to return +1 or -1. + * For |x| >= 0x1p24, it is always an even integer, so return 1. */ - return (1); + return (ix < 0x4b800000 ? ((ix & 1) ? -1 : 1) : 1); } diff --git a/lib/msun/src/s_sinpi.c b/lib/msun/src/s_sinpi.c index bc3759e567a3..8b388de863c3 100644 --- a/lib/msun/src/s_sinpi.c +++ b/lib/msun/src/s_sinpi.c @@ -1,170 +1,161 @@ /*- - * Copyright (c) 2017 Steven G. Kargl + * Copyright (c) 2017, 2023 Steven G. Kargl * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /** * sinpi(x) computes sin(pi*x) without multiplication by pi (almost). First, * note that sinpi(-x) = -sinpi(x), so the algorithm considers only |x| and * includes reflection symmetry by considering the sign of x on output. The * method used depends on the magnitude of x. * * 1. For small |x|, sinpi(x) = pi * x where a sloppy threshold is used. The * threshold is |x| < 0x1pN with N = -(P/2+M). P is the precision of the * floating-point type and M = 2 to 4. To achieve high accuracy, pi is * decomposed into high and low parts with the high part containing a * number of trailing zero bits. x is also split into high and low parts. * * 2. For |x| < 1, argument reduction is not required and sinpi(x) is * computed by calling a kernel that leverages the kernels for sin(x) * ans cos(x). See k_sinpi.c and k_cospi.c for details. * * 3. For 1 <= |x| < 0x1p(P-1), argument reduction is required where * |x| = j0 + r with j0 an integer and the remainder r satisfies * 0 <= r < 1. With the given domain, a simplified inline floor(x) * is used. Also, note the following identity * * sinpi(x) = sin(pi*(j0+r)) * = sin(pi*j0) * cos(pi*r) + cos(pi*j0) * sin(pi*r) * = cos(pi*j0) * sin(pi*r) * = +-sinpi(r) * * If j0 is even, then cos(pi*j0) = 1. If j0 is odd, then cos(pi*j0) = -1. * sinpi(r) is then computed via an appropriate kernel. * * 4. For |x| >= 0x1p(P-1), |x| is integral and sinpi(x) = copysign(0,x). * * 5. Special cases: * * sinpi(+-0) = +-0 * sinpi(+-n) = +-0, for positive integers n. * sinpi(+-inf) = nan. Raises the "invalid" floating-point exception. * sinpi(nan) = nan. Raises the "invalid" floating-point exception. */ #include #include "math.h" #include "math_private.h" static const double pi_hi = 3.1415926814079285e+00, /* 0x400921fb 0x58000000 */ pi_lo =-2.7818135228334233e-08; /* 0xbe5dde97 0x3dcb3b3a */ #include "k_cospi.h" #include "k_sinpi.h" volatile static const double vzero = 0; double sinpi(double x) { double ax, hi, lo, s; uint32_t hx, ix, j0, lx; EXTRACT_WORDS(hx, lx, x); ix = hx & 0x7fffffff; INSERT_WORDS(ax, ix, lx); if (ix < 0x3ff00000) { /* |x| < 1 */ if (ix < 0x3fd00000) { /* |x| < 0.25 */ if (ix < 0x3e200000) { /* |x| < 0x1p-29 */ if (x == 0) return (x); /* * To avoid issues with subnormal values, * scale the computation and rescale on * return. */ INSERT_WORDS(hi, hx, 0); hi *= 0x1p53; lo = x * 0x1p53 - hi; s = (pi_lo + pi_hi) * lo + pi_lo * hi + pi_hi * hi; return (s * 0x1p-53); } s = __kernel_sinpi(ax); return ((hx & 0x80000000) ? -s : s); } if (ix < 0x3fe00000) /* |x| < 0.5 */ s = __kernel_cospi(0.5 - ax); else if (ix < 0x3fe80000) /* |x| < 0.75 */ s = __kernel_cospi(ax - 0.5); else s = __kernel_sinpi(1 - ax); return ((hx & 0x80000000) ? -s : s); } if (ix < 0x43300000) { /* 1 <= |x| < 0x1p52 */ - /* Determine integer part of ax. */ - j0 = ((ix >> 20) & 0x7ff) - 0x3ff; - if (j0 < 20) { - ix &= ~(0x000fffff >> j0); - lx = 0; - } else { - lx &= ~((uint32_t)0xffffffff >> (j0 - 20)); - } - INSERT_WORDS(x, ix, lx); - + FFLOOR(x, j0, ix, lx); /* Integer part of ax. */ ax -= x; EXTRACT_WORDS(ix, lx, ax); if (ix == 0) s = 0; else { if (ix < 0x3fe00000) { /* |x| < 0.5 */ if (ix < 0x3fd00000) /* |x| < 0.25 */ s = __kernel_sinpi(ax); else s = __kernel_cospi(0.5 - ax); } else { if (ix < 0x3fe80000) /* |x| < 0.75 */ s = __kernel_cospi(ax - 0.5); else s = __kernel_sinpi(1 - ax); } if (j0 > 30) x -= 0x1p30; j0 = (uint32_t)x; if (j0 & 1) s = -s; } return ((hx & 0x80000000) ? -s : s); } /* x = +-inf or nan. */ if (ix >= 0x7ff00000) return (vzero / vzero); /* * |x| >= 0x1p52 is always an integer, so return +-0. */ return (copysign(0, x)); } #if LDBL_MANT_DIG == 53 __weak_reference(sinpi, sinpil); #endif diff --git a/lib/msun/src/s_sinpif.c b/lib/msun/src/s_sinpif.c index c9f76f8a2358..21082dee7d9c 100644 --- a/lib/msun/src/s_sinpif.c +++ b/lib/msun/src/s_sinpif.c @@ -1,122 +1,118 @@ /*- - * Copyright (c) 2017 Steven G. Kargl + * Copyright (c) 2017,2023 Steven G. Kargl * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /* * See ../src/s_sinpi.c for implementation details. */ #define INLINE_KERNEL_SINDF #define INLINE_KERNEL_COSDF #include "math.h" #include "math_private.h" #include "k_cosf.c" #include "k_sinf.c" #define __kernel_cospif(x) (__kernel_cosdf(M_PI * (x))) #define __kernel_sinpif(x) (__kernel_sindf(M_PI * (x))) static const float pi_hi = 3.14160156e+00F, /* 0x40491000 */ pi_lo = -8.90890988e-06F; /* 0xb715777a */ volatile static const float vzero = 0; float sinpif(float x) { float ax, hi, lo, s; uint32_t hx, ix, j0; GET_FLOAT_WORD(hx, x); ix = hx & 0x7fffffff; SET_FLOAT_WORD(ax, ix); if (ix < 0x3f800000) { /* |x| < 1 */ if (ix < 0x3e800000) { /* |x| < 0.25 */ if (ix < 0x38800000) { /* |x| < 0x1p-14 */ if (x == 0) return (x); SET_FLOAT_WORD(hi, hx & 0xffff0000); hi *= 0x1p23F; lo = x * 0x1p23F - hi; s = (pi_lo + pi_hi) * lo + pi_lo * hi + pi_hi * hi; return (s * 0x1p-23F); } s = __kernel_sinpif(ax); return ((hx & 0x80000000) ? -s : s); } if (ix < 0x3f000000) /* |x| < 0.5 */ s = __kernel_cospif(0.5F - ax); else if (ix < 0x3f400000) /* |x| < 0.75 */ s = __kernel_cospif(ax - 0.5F); else s = __kernel_sinpif(1 - ax); return ((hx & 0x80000000) ? -s : s); } - if (ix < 0x4b000000) { /* 1 <= |x| < 0x1p23 */ - /* Determine integer part of ax. */ - j0 = ((ix >> 23) & 0xff) - 0x7f; - ix &= ~(0x007fffff >> j0); - SET_FLOAT_WORD(x, ix); - + if (ix < 0x4b000000) { /* 1 <= |x| < 0x1p23 */ + FFLOORF(x, j0, ix); /* Integer part of ax. */ ax -= x; GET_FLOAT_WORD(ix, ax); if (ix == 0) s = 0; else { if (ix < 0x3f000000) { /* |x| < 0.5 */ if (ix < 0x3e800000) /* |x| < 0.25 */ s = __kernel_sinpif(ax); else s = __kernel_cospif(0.5F - ax); } else { if (ix < 0x3f400000) /* |x| < 0.75 */ s = __kernel_cospif(ax - 0.5F); else s = __kernel_sinpif(1 - ax); } j0 = (uint32_t)x; s = (j0 & 1) ? -s : s; } return ((hx & 0x80000000) ? -s : s); } /* x = +-inf or nan. */ if (ix >= 0x7f800000) return (vzero / vzero); /* * |x| >= 0x1p23 is always an integer, so return +-0. */ return (copysignf(0, x)); } diff --git a/lib/msun/src/s_tanpi.c b/lib/msun/src/s_tanpi.c index f911d56156b3..cd00adbcb86e 100644 --- a/lib/msun/src/s_tanpi.c +++ b/lib/msun/src/s_tanpi.c @@ -1,177 +1,176 @@ /*- - * Copyright (c) 2017 Steven G. Kargl + * Copyright (c) 2017, 2023 Steven G. Kargl * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /** * tanpi(x) computes tan(pi*x) without multiplication by pi (almost). First, * note that tanpi(-x) = -tanpi(x), so the algorithm considers only |x| and * includes reflection symmetry by considering the sign of x on output. The * method used depends on the magnitude of x. * * 1. For small |x|, tanpi(x) = pi * x where a sloppy threshold is used. The * threshold is |x| < 0x1pN with N = -(P/2+M). P is the precision of the * floating-point type and M = 2 to 4. To achieve high accuracy, pi is * decomposed into high and low parts with the high part containing a * number of trailing zero bits. x is also split into high and low parts. * * 2. For |x| < 1, argument reduction is not required and tanpi(x) is * computed by a direct call to a kernel, which uses the kernel for * tan(x). See below. * * 3. For 1 <= |x| < 0x1p(P-1), argument reduction is required where * |x| = j0 + r with j0 an integer and the remainder r satisfies * 0 <= r < 1. With the given domain, a simplified inline floor(x) * is used. Also, note the following identity * * tan(pi*j0) + tan(pi*r) * tanpi(x) = tan(pi*(j0+r)) = ---------------------------- = tanpi(r) * 1 - tan(pi*j0) * tan(pi*r) * * So, after argument reduction, the kernel is again invoked. * * 4. For |x| >= 0x1p(P-1), |x| is integral and tanpi(x) = copysign(0,x). * * 5. Special cases: * * tanpi(+-0) = +-0 - * tanpi(+-n) = +-0, for positive integers n. + * tanpi(n) = +0 for positive even and negative odd integer n. + * tanpi(n) = -0 for positive odd and negative even integer n. * tanpi(+-n+1/4) = +-1, for positive integers n. - * tanpi(+-n+1/2) = NaN, for positive integers n. - * tanpi(+-inf) = NaN. Raises the "invalid" floating-point exception. - * tanpi(nan) = NaN. Raises the "invalid" floating-point exception. + * tanpi(n+1/2) = +inf and raises the FE_DIVBYZERO exception for + * even integers n. + * tanpi(n+1/2) = -inf and raises the FE_DIVBYZERO exception for + * odd integers n. + * tanpi(+-inf) = NaN and raises the FE_INVALID exception. + * tanpi(nan) = NaN and raises the FE_INVALID exception. */ #include #include "math.h" #include "math_private.h" static const double pi_hi = 3.1415926814079285e+00, /* 0x400921fb 0x58000000 */ pi_lo = -2.7818135228334233e-08; /* 0xbe5dde97 0x3dcb3b3a */ /* * The kernel for tanpi(x) multiplies x by an 80-bit approximation of * pi, where the hi and lo parts are used with with kernel for tan(x). */ static inline double __kernel_tanpi(double x) { double_t hi, lo, t; if (x < 0.25) { hi = (float)x; lo = x - hi; lo = lo * (pi_lo + pi_hi) + hi * pi_lo; hi *= pi_hi; _2sumF(hi, lo); t = __kernel_tan(hi, lo, 1); } else if (x > 0.25) { x = 0.5 - x; hi = (float)x; lo = x - hi; lo = lo * (pi_lo + pi_hi) + hi * pi_lo; hi *= pi_hi; _2sumF(hi, lo); t = - __kernel_tan(hi, lo, -1); } else t = 1; return (t); } volatile static const double vzero = 0; double tanpi(double x) { - double ax, hi, lo, t; + double ax, hi, lo, odd, t; uint32_t hx, ix, j0, lx; EXTRACT_WORDS(hx, lx, x); ix = hx & 0x7fffffff; INSERT_WORDS(ax, ix, lx); if (ix < 0x3ff00000) { /* |x| < 1 */ if (ix < 0x3fe00000) { /* |x| < 0.5 */ if (ix < 0x3e200000) { /* |x| < 0x1p-29 */ if (x == 0) return (x); /* * To avoid issues with subnormal values, * scale the computation and rescale on * return. */ INSERT_WORDS(hi, hx, 0); hi *= 0x1p53; lo = x * 0x1p53 - hi; t = (pi_lo + pi_hi) * lo + pi_lo * hi + pi_hi * hi; return (t * 0x1p-53); } t = __kernel_tanpi(ax); } else if (ax == 0.5) - return ((ax - ax) / (ax - ax)); + t = 1 / vzero; else t = - __kernel_tanpi(1 - ax); return ((hx & 0x80000000) ? -t : t); } if (ix < 0x43300000) { /* 1 <= |x| < 0x1p52 */ - /* Determine integer part of ax. */ - j0 = ((ix >> 20) & 0x7ff) - 0x3ff; - if (j0 < 20) { - ix &= ~(0x000fffff >> j0); - lx = 0; - } else { - lx &= ~(((uint32_t)(0xffffffff)) >> (j0 - 20)); - } - INSERT_WORDS(x,ix,lx); - + FFLOOR(x, j0, ix, lx); /* Integer part of ax. */ + odd = (uint64_t)x & 1 ? -1 : 1; ax -= x; EXTRACT_WORDS(ix, lx, ax); if (ix < 0x3fe00000) /* |x| < 0.5 */ - t = ax == 0 ? 0 : __kernel_tanpi(ax); + t = ix == 0 ? copysign(0, odd) : __kernel_tanpi(ax); else if (ax == 0.5) - return ((ax - ax) / (ax - ax)); + t = odd / vzero; else t = - __kernel_tanpi(1 - ax); return ((hx & 0x80000000) ? -t : t); } /* x = +-inf or nan. */ if (ix >= 0x7ff00000) return (vzero / vzero); /* - * |x| >= 0x1p52 is always an integer, so return +-0. + * For 0x1p52 <= |x| < 0x1p53 need to determine if x is an even + * or odd integer to set t = +0 or -0. + * For |x| >= 0x1p54, it is always an even integer, so t = 0. */ - return (copysign(0, x)); + t = ix >= 0x43400000 ? 0 : (copysign(0, (lx & 1) ? -1 : 1)); + return ((hx & 0x80000000) ? -t : t); } #if LDBL_MANT_DIG == 53 __weak_reference(tanpi, tanpil); #endif diff --git a/lib/msun/src/s_tanpif.c b/lib/msun/src/s_tanpif.c index 6d4b627d1cf9..12dd8f838976 100644 --- a/lib/msun/src/s_tanpif.c +++ b/lib/msun/src/s_tanpif.c @@ -1,114 +1,114 @@ /*- - * Copyright (c) 2017 Steven G. Kargl + * Copyright (c) 2017,2023 Steven G. Kargl * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /* * See ../src/s_tanpi.c for implementation details. */ #define INLINE_KERNEL_TANDF #include "math.h" #include "math_private.h" #include "k_tanf.c" static const float pi_hi = 3.14160156e+00F, /* 0x40491000 */ pi_lo = -8.90890988e-06F; /* 0xb715777a */ static inline float __kernel_tanpif(float x) { float t; if (x < 0.25F) t = __kernel_tandf(M_PI * x, 1); else if (x > 0.25F) t = -__kernel_tandf(M_PI * (0.5 - x), -1); else t = 1; return (t); } volatile static const float vzero = 0; float tanpif(float x) { - float ax, hi, lo, t; + float ax, hi, lo, odd, t; uint32_t hx, ix, j0; GET_FLOAT_WORD(hx, x); ix = hx & 0x7fffffff; SET_FLOAT_WORD(ax, ix); if (ix < 0x3f800000) { /* |x| < 1 */ if (ix < 0x3f000000) { /* |x| < 0.5 */ if (ix < 0x38800000) { /* |x| < 0x1p-14 */ if (ix == 0) return (x); SET_FLOAT_WORD(hi, hx & 0xffff0000); hi *= 0x1p23F; lo = x * 0x1p23F - hi; t = (pi_lo + pi_hi) * lo + pi_lo * hi + pi_hi * hi; return (t * 0x1p-23F); } t = __kernel_tanpif(ax); } else if (ix == 0x3f000000) - return ((ax - ax) / (ax - ax)); + t = 1 / vzero; else t = - __kernel_tanpif(1 - ax); return ((hx & 0x80000000) ? -t : t); } if (ix < 0x4b000000) { /* 1 <= |x| < 0x1p23 */ - /* Determine integer part of ax. */ - j0 = ((ix >> 23) & 0xff) - 0x7f; - ix &= ~(0x007fffff >> j0); - SET_FLOAT_WORD(x, ix); - + FFLOORF(x, j0, ix); /* Integer part of ax. */ + odd = (uint32_t)x & 1 ? -1 : 1; ax -= x; GET_FLOAT_WORD(ix, ax); if (ix < 0x3f000000) /* |x| < 0.5 */ - t = ix == 0 ? 0 : __kernel_tanpif(ax); + t = ix == 0 ? copysignf(0, odd) : __kernel_tanpif(ax); else if (ix == 0x3f000000) - return ((ax - ax) / (ax - ax)); + t = odd / vzero; else t = - __kernel_tanpif(1 - ax); return ((hx & 0x80000000) ? -t : t); } /* x = +-inf or nan. */ if (ix >= 0x7f800000) return (vzero / vzero); /* - * |x| >= 0x1p23 is always an integer, so return +-0. + * For 0x1p23 <= |x| < 0x1p24 need to determine if x is an even + * or odd integer to set t = +0 or -0. + * For |x| >= 0x1p24, it is always an even integer, so t = 0. */ - return (copysignf(0, x)); + t = ix >= 0x4b800000 ? 0 : (copysignf(0, (ix & 1) ? -1 : 1)); + return ((hx & 0x80000000) ? -t : t); }