diff --git a/lib/msun/src/math_private.h b/lib/msun/src/math_private.h index 20ce7bd88464..df526e71e545 100644 --- a/lib/msun/src/math_private.h +++ b/lib/msun/src/math_private.h @@ -1,924 +1,924 @@ /* * ==================================================== * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. * * Developed at SunPro, a Sun Microsystems, Inc. business. * Permission to use, copy, modify, and distribute this * software is freely granted, provided that this notice * is preserved. * ==================================================== */ /* * from: @(#)fdlibm.h 5.1 93/09/24 * $FreeBSD$ */ #ifndef _MATH_PRIVATE_H_ #define _MATH_PRIVATE_H_ #include #include /* * The original fdlibm code used statements like: * n0 = ((*(int*)&one)>>29)^1; * index of high word * * ix0 = *(n0+(int*)&x); * high word of x * * ix1 = *((1-n0)+(int*)&x); * low word of x * * to dig two 32 bit words out of the 64 bit IEEE floating point * value. That is non-ANSI, and, moreover, the gcc instruction * scheduler gets it wrong. We instead use the following macros. * Unlike the original code, we determine the endianness at compile * time, not at run time; I don't see much benefit to selecting * endianness at run time. */ /* * A union which permits us to convert between a double and two 32 bit * ints. */ #ifdef __arm__ #if defined(__VFP_FP__) || defined(__ARM_EABI__) #define IEEE_WORD_ORDER BYTE_ORDER #else #define IEEE_WORD_ORDER BIG_ENDIAN #endif #else /* __arm__ */ #define IEEE_WORD_ORDER BYTE_ORDER #endif /* A union which permits us to convert between a long double and four 32 bit ints. */ #if IEEE_WORD_ORDER == BIG_ENDIAN typedef union { long double value; struct { u_int32_t mswhi; u_int32_t mswlo; u_int32_t lswhi; u_int32_t lswlo; } parts32; struct { u_int64_t msw; u_int64_t lsw; } parts64; } ieee_quad_shape_type; #endif #if IEEE_WORD_ORDER == LITTLE_ENDIAN typedef union { long double value; struct { u_int32_t lswlo; u_int32_t lswhi; u_int32_t mswlo; u_int32_t mswhi; } parts32; struct { u_int64_t lsw; u_int64_t msw; } parts64; } ieee_quad_shape_type; #endif #if IEEE_WORD_ORDER == BIG_ENDIAN typedef union { double value; struct { u_int32_t msw; u_int32_t lsw; } parts; struct { u_int64_t w; } xparts; } ieee_double_shape_type; #endif #if IEEE_WORD_ORDER == LITTLE_ENDIAN typedef union { double value; struct { u_int32_t lsw; u_int32_t msw; } parts; struct { u_int64_t w; } xparts; } ieee_double_shape_type; #endif /* Get two 32 bit ints from a double. */ #define EXTRACT_WORDS(ix0,ix1,d) \ do { \ ieee_double_shape_type ew_u; \ ew_u.value = (d); \ (ix0) = ew_u.parts.msw; \ (ix1) = ew_u.parts.lsw; \ } while (0) /* Get a 64-bit int from a double. */ #define EXTRACT_WORD64(ix,d) \ do { \ ieee_double_shape_type ew_u; \ ew_u.value = (d); \ (ix) = ew_u.xparts.w; \ } while (0) /* Get the more significant 32 bit int from a double. */ #define GET_HIGH_WORD(i,d) \ do { \ ieee_double_shape_type gh_u; \ gh_u.value = (d); \ (i) = gh_u.parts.msw; \ } while (0) /* Get the less significant 32 bit int from a double. */ #define GET_LOW_WORD(i,d) \ do { \ ieee_double_shape_type gl_u; \ gl_u.value = (d); \ (i) = gl_u.parts.lsw; \ } while (0) /* Set a double from two 32 bit ints. */ #define INSERT_WORDS(d,ix0,ix1) \ do { \ ieee_double_shape_type iw_u; \ iw_u.parts.msw = (ix0); \ iw_u.parts.lsw = (ix1); \ (d) = iw_u.value; \ } while (0) /* Set a double from a 64-bit int. */ #define INSERT_WORD64(d,ix) \ do { \ ieee_double_shape_type iw_u; \ iw_u.xparts.w = (ix); \ (d) = iw_u.value; \ } while (0) /* Set the more significant 32 bits of a double from an int. */ #define SET_HIGH_WORD(d,v) \ do { \ ieee_double_shape_type sh_u; \ sh_u.value = (d); \ sh_u.parts.msw = (v); \ (d) = sh_u.value; \ } while (0) /* Set the less significant 32 bits of a double from an int. */ #define SET_LOW_WORD(d,v) \ do { \ ieee_double_shape_type sl_u; \ sl_u.value = (d); \ sl_u.parts.lsw = (v); \ (d) = sl_u.value; \ } while (0) /* * A union which permits us to convert between a float and a 32 bit * int. */ typedef union { float value; /* FIXME: Assumes 32 bit int. */ unsigned int word; } ieee_float_shape_type; /* Get a 32 bit int from a float. */ #define GET_FLOAT_WORD(i,d) \ do { \ ieee_float_shape_type gf_u; \ gf_u.value = (d); \ (i) = gf_u.word; \ } while (0) /* Set a float from a 32 bit int. */ #define SET_FLOAT_WORD(d,i) \ do { \ ieee_float_shape_type sf_u; \ sf_u.word = (i); \ (d) = sf_u.value; \ } while (0) /* * Get expsign and mantissa as 16 bit and 64 bit ints from an 80 bit long * double. */ #define EXTRACT_LDBL80_WORDS(ix0,ix1,d) \ do { \ union IEEEl2bits ew_u; \ ew_u.e = (d); \ (ix0) = ew_u.xbits.expsign; \ (ix1) = ew_u.xbits.man; \ } while (0) /* * Get expsign and mantissa as one 16 bit and two 64 bit ints from a 128 bit * long double. */ #define EXTRACT_LDBL128_WORDS(ix0,ix1,ix2,d) \ do { \ union IEEEl2bits ew_u; \ ew_u.e = (d); \ (ix0) = ew_u.xbits.expsign; \ (ix1) = ew_u.xbits.manh; \ (ix2) = ew_u.xbits.manl; \ } while (0) /* Get expsign as a 16 bit int from a long double. */ #define GET_LDBL_EXPSIGN(i,d) \ do { \ union IEEEl2bits ge_u; \ ge_u.e = (d); \ (i) = ge_u.xbits.expsign; \ } while (0) /* * Set an 80 bit long double from a 16 bit int expsign and a 64 bit int * mantissa. */ #define INSERT_LDBL80_WORDS(d,ix0,ix1) \ do { \ union IEEEl2bits iw_u; \ iw_u.xbits.expsign = (ix0); \ iw_u.xbits.man = (ix1); \ (d) = iw_u.e; \ } while (0) /* * Set a 128 bit long double from a 16 bit int expsign and two 64 bit ints * comprising the mantissa. */ #define INSERT_LDBL128_WORDS(d,ix0,ix1,ix2) \ do { \ union IEEEl2bits iw_u; \ iw_u.xbits.expsign = (ix0); \ iw_u.xbits.manh = (ix1); \ iw_u.xbits.manl = (ix2); \ (d) = iw_u.e; \ } while (0) /* Set expsign of a long double from a 16 bit int. */ #define SET_LDBL_EXPSIGN(d,v) \ do { \ union IEEEl2bits se_u; \ se_u.e = (d); \ se_u.xbits.expsign = (v); \ (d) = se_u.e; \ } while (0) #ifdef __i386__ /* Long double constants are broken on i386. */ #define LD80C(m, ex, v) { \ .xbits.man = __CONCAT(m, ULL), \ .xbits.expsign = (0x3fff + (ex)) | ((v) < 0 ? 0x8000 : 0), \ } #else /* The above works on non-i386 too, but we use this to check v. */ #define LD80C(m, ex, v) { .e = (v), } #endif #ifdef FLT_EVAL_METHOD /* * Attempt to get strict C99 semantics for assignment with non-C99 compilers. */ #if FLT_EVAL_METHOD == 0 || __GNUC__ == 0 #define STRICT_ASSIGN(type, lval, rval) ((lval) = (rval)) #else #define STRICT_ASSIGN(type, lval, rval) do { \ volatile type __lval; \ \ if (sizeof(type) >= sizeof(long double)) \ (lval) = (rval); \ else { \ __lval = (rval); \ (lval) = __lval; \ } \ } while (0) #endif #endif /* FLT_EVAL_METHOD */ /* Support switching the mode to FP_PE if necessary. */ #if defined(__i386__) && !defined(NO_FPSETPREC) #define ENTERI() ENTERIT(long double) #define ENTERIT(returntype) \ returntype __retval; \ fp_prec_t __oprec; \ \ if ((__oprec = fpgetprec()) != FP_PE) \ fpsetprec(FP_PE) #define RETURNI(x) do { \ __retval = (x); \ if (__oprec != FP_PE) \ fpsetprec(__oprec); \ RETURNF(__retval); \ } while (0) #define ENTERV() \ fp_prec_t __oprec; \ \ if ((__oprec = fpgetprec()) != FP_PE) \ fpsetprec(FP_PE) #define RETURNV() do { \ if (__oprec != FP_PE) \ fpsetprec(__oprec); \ return; \ } while (0) #else #define ENTERI() #define ENTERIT(x) #define RETURNI(x) RETURNF(x) #define ENTERV() #define RETURNV() return #endif /* Default return statement if hack*_t() is not used. */ #define RETURNF(v) return (v) /* * 2sum gives the same result as 2sumF without requiring |a| >= |b| or * a == 0, but is slower. */ #define _2sum(a, b) do { \ __typeof(a) __s, __w; \ \ __w = (a) + (b); \ __s = __w - (a); \ (b) = ((a) - (__w - __s)) + ((b) - __s); \ (a) = __w; \ } while (0) /* * 2sumF algorithm. * * "Normalize" the terms in the infinite-precision expression a + b for * the sum of 2 floating point values so that b is as small as possible * relative to 'a'. (The resulting 'a' is the value of the expression in * the same precision as 'a' and the resulting b is the rounding error.) * |a| must be >= |b| or 0, b's type must be no larger than 'a's type, and * exponent overflow or underflow must not occur. This uses a Theorem of * Dekker (1971). See Knuth (1981) 4.2.2 Theorem C. The name "TwoSum" * is apparently due to Skewchuk (1997). * * For this to always work, assignment of a + b to 'a' must not retain any * extra precision in a + b. This is required by C standards but broken * in many compilers. The brokenness cannot be worked around using * STRICT_ASSIGN() like we do elsewhere, since the efficiency of this * algorithm would be destroyed by non-null strict assignments. (The * compilers are correct to be broken -- the efficiency of all floating * point code calculations would be destroyed similarly if they forced the * conversions.) * * Fortunately, a case that works well can usually be arranged by building * any extra precision into the type of 'a' -- 'a' should have type float_t, * double_t or long double. b's type should be no larger than 'a's type. * Callers should use these types with scopes as large as possible, to * reduce their own extra-precision and efficiciency problems. In * particular, they shouldn't convert back and forth just to call here. */ #ifdef DEBUG #define _2sumF(a, b) do { \ __typeof(a) __w; \ volatile __typeof(a) __ia, __ib, __r, __vw; \ \ __ia = (a); \ __ib = (b); \ assert(__ia == 0 || fabsl(__ia) >= fabsl(__ib)); \ \ __w = (a) + (b); \ (b) = ((a) - __w) + (b); \ (a) = __w; \ \ /* The next 2 assertions are weak if (a) is already long double. */ \ assert((long double)__ia + __ib == (long double)(a) + (b)); \ __vw = __ia + __ib; \ __r = __ia - __vw; \ __r += __ib; \ assert(__vw == (a) && __r == (b)); \ } while (0) #else /* !DEBUG */ #define _2sumF(a, b) do { \ __typeof(a) __w; \ \ __w = (a) + (b); \ (b) = ((a) - __w) + (b); \ (a) = __w; \ } while (0) #endif /* DEBUG */ /* * Set x += c, where x is represented in extra precision as a + b. * x must be sufficiently normalized and sufficiently larger than c, * and the result is then sufficiently normalized. * * The details of ordering are that |a| must be >= |c| (so that (a, c) * can be normalized without extra work to swap 'a' with c). The details of * the normalization are that b must be small relative to the normalized 'a'. * Normalization of (a, c) makes the normalized c tiny relative to the * normalized a, so b remains small relative to 'a' in the result. However, * b need not ever be tiny relative to 'a'. For example, b might be about * 2**20 times smaller than 'a' to give about 20 extra bits of precision. * That is usually enough, and adding c (which by normalization is about * 2**53 times smaller than a) cannot change b significantly. However, * cancellation of 'a' with c in normalization of (a, c) may reduce 'a' * significantly relative to b. The caller must ensure that significant * cancellation doesn't occur, either by having c of the same sign as 'a', * or by having |c| a few percent smaller than |a|. Pre-normalization of * (a, b) may help. * * This is a variant of an algorithm of Kahan (see Knuth (1981) 4.2.2 * exercise 19). We gain considerable efficiency by requiring the terms to * be sufficiently normalized and sufficiently increasing. */ #define _3sumF(a, b, c) do { \ __typeof(a) __tmp; \ \ __tmp = (c); \ _2sumF(__tmp, (a)); \ (b) += (a); \ (a) = __tmp; \ } while (0) /* * Common routine to process the arguments to nan(), nanf(), and nanl(). */ void _scan_nan(uint32_t *__words, int __num_words, const char *__s); /* * Mix 0, 1 or 2 NaNs. First add 0 to each arg. This normally just turns * signaling NaNs into quiet NaNs by setting a quiet bit. We do this * because we want to never return a signaling NaN, and also because we * don't want the quiet bit to affect the result. Then mix the converted * args using the specified operation. * * When one arg is NaN, the result is typically that arg quieted. When both * args are NaNs, the result is typically the quietening of the arg whose * mantissa is largest after quietening. When neither arg is NaN, the * result may be NaN because it is indeterminate, or finite for subsequent * construction of a NaN as the indeterminate 0.0L/0.0L. * * Technical complications: the result in bits after rounding to the final * precision might depend on the runtime precision and/or on compiler * optimizations, especially when different register sets are used for * different precisions. Try to make the result not depend on at least the * runtime precision by always doing the main mixing step in long double * precision. Try to reduce dependencies on optimizations by adding the * the 0's in different precisions (unless everything is in long double * precision). */ #define nan_mix(x, y) (nan_mix_op((x), (y), +)) #define nan_mix_op(x, y, op) (((x) + 0.0L) op ((y) + 0)) #ifdef _COMPLEX_H /* * C99 specifies that complex numbers have the same representation as * an array of two elements, where the first element is the real part * and the second element is the imaginary part. */ typedef union { float complex f; float a[2]; } float_complex; typedef union { double complex f; double a[2]; } double_complex; typedef union { long double complex f; long double a[2]; } long_double_complex; #define REALPART(z) ((z).a[0]) #define IMAGPART(z) ((z).a[1]) /* * Inline functions that can be used to construct complex values. * * The C99 standard intends x+I*y to be used for this, but x+I*y is * currently unusable in general since gcc introduces many overflow, * underflow, sign and efficiency bugs by rewriting I*y as * (0.0+I)*(y+0.0*I) and laboriously computing the full complex product. * In particular, I*Inf is corrupted to NaN+I*Inf, and I*-0 is corrupted * to -0.0+I*0.0. * * The C11 standard introduced the macros CMPLX(), CMPLXF() and CMPLXL() * to construct complex values. Compilers that conform to the C99 * standard require the following functions to avoid the above issues. */ #ifndef CMPLXF static __inline float complex CMPLXF(float x, float y) { float_complex z; REALPART(z) = x; IMAGPART(z) = y; return (z.f); } #endif #ifndef CMPLX static __inline double complex CMPLX(double x, double y) { double_complex z; REALPART(z) = x; IMAGPART(z) = y; return (z.f); } #endif #ifndef CMPLXL static __inline long double complex CMPLXL(long double x, long double y) { long_double_complex z; REALPART(z) = x; IMAGPART(z) = y; return (z.f); } #endif #endif /* _COMPLEX_H */ /* * The rnint() family rounds to the nearest integer for a restricted range * range of args (up to about 2**MANT_DIG). We assume that the current * rounding mode is FE_TONEAREST so that this can be done efficiently. * Extra precision causes more problems in practice, and we only centralize * this here to reduce those problems, and have not solved the efficiency * problems. The exp2() family uses a more delicate version of this that * requires extracting bits from the intermediate value, so it is not * centralized here and should copy any solution of the efficiency problems. */ static inline double rnint(__double_t x) { /* * This casts to double to kill any extra precision. This depends * on the cast being applied to a double_t to avoid compiler bugs * (this is a cleaner version of STRICT_ASSIGN()). This is * inefficient if there actually is extra precision, but is hard * to improve on. We use double_t in the API to minimise conversions * for just calling here. Note that we cannot easily change the * magic number to the one that works directly with double_t, since * the rounding precision is variable at runtime on x86 so the * magic number would need to be variable. Assuming that the * rounding precision is always the default is too fragile. This * and many other complications will move when the default is * changed to FP_PE. */ return ((double)(x + 0x1.8p52) - 0x1.8p52); } static inline float rnintf(__float_t x) { /* * As for rnint(), except we could just call that to handle the * extra precision case, usually without losing efficiency. */ return ((float)(x + 0x1.8p23F) - 0x1.8p23F); } #ifdef LDBL_MANT_DIG /* * The complications for extra precision are smaller for rnintl() since it * can safely assume that the rounding precision has been increased from * its default to FP_PE on x86. We don't exploit that here to get small * optimizations from limiting the rangle to double. We just need it for * the magic number to work with long doubles. ld128 callers should use * rnint() instead of this if possible. ld80 callers should prefer * rnintl() since for amd64 this avoids swapping the register set, while * for i386 it makes no difference (assuming FP_PE), and for other arches * it makes little difference. */ static inline long double rnintl(long double x) { return (x + __CONCAT(0x1.8p, LDBL_MANT_DIG) / 2 - __CONCAT(0x1.8p, LDBL_MANT_DIG) / 2); } #endif /* LDBL_MANT_DIG */ /* * irint() and i64rint() give the same result as casting to their integer * return type provided their arg is a floating point integer. They can * sometimes be more efficient because no rounding is required. */ -#if (defined(amd64) || defined(__i386__)) && defined(__GNUCLIKE_ASM) +#if defined(amd64) || defined(__i386__) #define irint(x) \ (sizeof(x) == sizeof(float) && \ sizeof(__float_t) == sizeof(long double) ? irintf(x) : \ sizeof(x) == sizeof(double) && \ sizeof(__double_t) == sizeof(long double) ? irintd(x) : \ sizeof(x) == sizeof(long double) ? irintl(x) : (int)(x)) #else #define irint(x) ((int)(x)) #endif #define i64rint(x) ((int64_t)(x)) /* only needed for ld128 so not opt. */ -#if defined(__i386__) && defined(__GNUCLIKE_ASM) +#if defined(__i386__) static __inline int irintf(float x) { int n; __asm("fistl %0" : "=m" (n) : "t" (x)); return (n); } static __inline int irintd(double x) { int n; __asm("fistl %0" : "=m" (n) : "t" (x)); return (n); } #endif -#if (defined(__amd64__) || defined(__i386__)) && defined(__GNUCLIKE_ASM) +#if defined(__amd64__) || defined(__i386__) static __inline int irintl(long double x) { int n; __asm("fistl %0" : "=m" (n) : "t" (x)); return (n); } #endif #ifdef DEBUG #if defined(__amd64__) || defined(__i386__) #define breakpoint() asm("int $3") #else #include #define breakpoint() raise(SIGTRAP) #endif #endif /* Write a pari script to test things externally. */ #ifdef DOPRINT #include #ifndef DOPRINT_SWIZZLE #define DOPRINT_SWIZZLE 0 #endif #ifdef DOPRINT_LD80 #define DOPRINT_START(xp) do { \ uint64_t __lx; \ uint16_t __hx; \ \ /* Hack to give more-problematic args. */ \ EXTRACT_LDBL80_WORDS(__hx, __lx, *xp); \ __lx ^= DOPRINT_SWIZZLE; \ INSERT_LDBL80_WORDS(*xp, __hx, __lx); \ printf("x = %.21Lg; ", (long double)*xp); \ } while (0) #define DOPRINT_END1(v) \ printf("y = %.21Lg; z = 0; show(x, y, z);\n", (long double)(v)) #define DOPRINT_END2(hi, lo) \ printf("y = %.21Lg; z = %.21Lg; show(x, y, z);\n", \ (long double)(hi), (long double)(lo)) #elif defined(DOPRINT_D64) #define DOPRINT_START(xp) do { \ uint32_t __hx, __lx; \ \ EXTRACT_WORDS(__hx, __lx, *xp); \ __lx ^= DOPRINT_SWIZZLE; \ INSERT_WORDS(*xp, __hx, __lx); \ printf("x = %.21Lg; ", (long double)*xp); \ } while (0) #define DOPRINT_END1(v) \ printf("y = %.21Lg; z = 0; show(x, y, z);\n", (long double)(v)) #define DOPRINT_END2(hi, lo) \ printf("y = %.21Lg; z = %.21Lg; show(x, y, z);\n", \ (long double)(hi), (long double)(lo)) #elif defined(DOPRINT_F32) #define DOPRINT_START(xp) do { \ uint32_t __hx; \ \ GET_FLOAT_WORD(__hx, *xp); \ __hx ^= DOPRINT_SWIZZLE; \ SET_FLOAT_WORD(*xp, __hx); \ printf("x = %.21Lg; ", (long double)*xp); \ } while (0) #define DOPRINT_END1(v) \ printf("y = %.21Lg; z = 0; show(x, y, z);\n", (long double)(v)) #define DOPRINT_END2(hi, lo) \ printf("y = %.21Lg; z = %.21Lg; show(x, y, z);\n", \ (long double)(hi), (long double)(lo)) #else /* !DOPRINT_LD80 && !DOPRINT_D64 (LD128 only) */ #ifndef DOPRINT_SWIZZLE_HIGH #define DOPRINT_SWIZZLE_HIGH 0 #endif #define DOPRINT_START(xp) do { \ uint64_t __lx, __llx; \ uint16_t __hx; \ \ EXTRACT_LDBL128_WORDS(__hx, __lx, __llx, *xp); \ __llx ^= DOPRINT_SWIZZLE; \ __lx ^= DOPRINT_SWIZZLE_HIGH; \ INSERT_LDBL128_WORDS(*xp, __hx, __lx, __llx); \ printf("x = %.36Lg; ", (long double)*xp); \ } while (0) #define DOPRINT_END1(v) \ printf("y = %.36Lg; z = 0; show(x, y, z);\n", (long double)(v)) #define DOPRINT_END2(hi, lo) \ printf("y = %.36Lg; z = %.36Lg; show(x, y, z);\n", \ (long double)(hi), (long double)(lo)) #endif /* DOPRINT_LD80 */ #else /* !DOPRINT */ #define DOPRINT_START(xp) #define DOPRINT_END1(v) #define DOPRINT_END2(hi, lo) #endif /* DOPRINT */ #define RETURNP(x) do { \ DOPRINT_END1(x); \ RETURNF(x); \ } while (0) #define RETURNPI(x) do { \ DOPRINT_END1(x); \ RETURNI(x); \ } while (0) #define RETURN2P(x, y) do { \ DOPRINT_END2((x), (y)); \ RETURNF((x) + (y)); \ } while (0) #define RETURN2PI(x, y) do { \ DOPRINT_END2((x), (y)); \ RETURNI((x) + (y)); \ } while (0) #ifdef STRUCT_RETURN #define RETURNSP(rp) do { \ if (!(rp)->lo_set) \ RETURNP((rp)->hi); \ RETURN2P((rp)->hi, (rp)->lo); \ } while (0) #define RETURNSPI(rp) do { \ if (!(rp)->lo_set) \ RETURNPI((rp)->hi); \ RETURN2PI((rp)->hi, (rp)->lo); \ } while (0) #endif #define SUM2P(x, y) ({ \ const __typeof (x) __x = (x); \ const __typeof (y) __y = (y); \ \ DOPRINT_END2(__x, __y); \ __x + __y; \ }) /* * ieee style elementary functions * * We rename functions here to improve other sources' diffability * against fdlibm. */ #define __ieee754_sqrt sqrt #define __ieee754_acos acos #define __ieee754_acosh acosh #define __ieee754_log log #define __ieee754_log2 log2 #define __ieee754_atanh atanh #define __ieee754_asin asin #define __ieee754_atan2 atan2 #define __ieee754_exp exp #define __ieee754_cosh cosh #define __ieee754_fmod fmod #define __ieee754_pow pow #define __ieee754_lgamma lgamma #define __ieee754_gamma gamma #define __ieee754_lgamma_r lgamma_r #define __ieee754_gamma_r gamma_r #define __ieee754_log10 log10 #define __ieee754_sinh sinh #define __ieee754_hypot hypot #define __ieee754_j0 j0 #define __ieee754_j1 j1 #define __ieee754_y0 y0 #define __ieee754_y1 y1 #define __ieee754_jn jn #define __ieee754_yn yn #define __ieee754_remainder remainder #define __ieee754_scalb scalb #define __ieee754_sqrtf sqrtf #define __ieee754_acosf acosf #define __ieee754_acoshf acoshf #define __ieee754_logf logf #define __ieee754_atanhf atanhf #define __ieee754_asinf asinf #define __ieee754_atan2f atan2f #define __ieee754_expf expf #define __ieee754_coshf coshf #define __ieee754_fmodf fmodf #define __ieee754_powf powf #define __ieee754_lgammaf lgammaf #define __ieee754_gammaf gammaf #define __ieee754_lgammaf_r lgammaf_r #define __ieee754_gammaf_r gammaf_r #define __ieee754_log10f log10f #define __ieee754_log2f log2f #define __ieee754_sinhf sinhf #define __ieee754_hypotf hypotf #define __ieee754_j0f j0f #define __ieee754_j1f j1f #define __ieee754_y0f y0f #define __ieee754_y1f y1f #define __ieee754_jnf jnf #define __ieee754_ynf ynf #define __ieee754_remainderf remainderf #define __ieee754_scalbf scalbf /* fdlibm kernel function */ int __kernel_rem_pio2(double*,double*,int,int,int); /* double precision kernel functions */ #ifndef INLINE_REM_PIO2 int __ieee754_rem_pio2(double,double*); #endif double __kernel_sin(double,double,int); double __kernel_cos(double,double); double __kernel_tan(double,double,int); double __ldexp_exp(double,int); #ifdef _COMPLEX_H double complex __ldexp_cexp(double complex,int); #endif /* float precision kernel functions */ #ifndef INLINE_REM_PIO2F int __ieee754_rem_pio2f(float,double*); #endif #ifndef INLINE_KERNEL_SINDF float __kernel_sindf(double); #endif #ifndef INLINE_KERNEL_COSDF float __kernel_cosdf(double); #endif #ifndef INLINE_KERNEL_TANDF float __kernel_tandf(double,int); #endif float __ldexp_expf(float,int); #ifdef _COMPLEX_H float complex __ldexp_cexpf(float complex,int); #endif /* long double precision kernel functions */ long double __kernel_sinl(long double, long double, int); long double __kernel_cosl(long double, long double); long double __kernel_tanl(long double, long double, int); #endif /* !_MATH_PRIVATE_H_ */ diff --git a/sys/amd64/amd64/fpu.c b/sys/amd64/amd64/fpu.c index 9b23cc5773a3..0bbf1cf047c0 100644 --- a/sys/amd64/amd64/fpu.c +++ b/sys/amd64/amd64/fpu.c @@ -1,1296 +1,1274 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1990 William Jolitz. * Copyright (c) 1991 The Regents of the University of California. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: @(#)npx.c 7.2 (Berkeley) 5/12/91 */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* * Floating point support. */ -#if defined(__GNUCLIKE_ASM) && !defined(lint) - #define fldcw(cw) __asm __volatile("fldcw %0" : : "m" (cw)) #define fnclex() __asm __volatile("fnclex") #define fninit() __asm __volatile("fninit") #define fnstcw(addr) __asm __volatile("fnstcw %0" : "=m" (*(addr))) #define fnstsw(addr) __asm __volatile("fnstsw %0" : "=am" (*(addr))) #define fxrstor(addr) __asm __volatile("fxrstor %0" : : "m" (*(addr))) #define fxsave(addr) __asm __volatile("fxsave %0" : "=m" (*(addr))) #define ldmxcsr(csr) __asm __volatile("ldmxcsr %0" : : "m" (csr)) #define stmxcsr(addr) __asm __volatile("stmxcsr %0" : "=m" (*(addr))) static __inline void xrstor32(char *addr, uint64_t mask) { uint32_t low, hi; low = mask; hi = mask >> 32; __asm __volatile("xrstor %0" : : "m" (*addr), "a" (low), "d" (hi)); } static __inline void xrstor64(char *addr, uint64_t mask) { uint32_t low, hi; low = mask; hi = mask >> 32; __asm __volatile("xrstor64 %0" : : "m" (*addr), "a" (low), "d" (hi)); } static __inline void xsave32(char *addr, uint64_t mask) { uint32_t low, hi; low = mask; hi = mask >> 32; __asm __volatile("xsave %0" : "=m" (*addr) : "a" (low), "d" (hi) : "memory"); } static __inline void xsave64(char *addr, uint64_t mask) { uint32_t low, hi; low = mask; hi = mask >> 32; __asm __volatile("xsave64 %0" : "=m" (*addr) : "a" (low), "d" (hi) : "memory"); } static __inline void xsaveopt32(char *addr, uint64_t mask) { uint32_t low, hi; low = mask; hi = mask >> 32; __asm __volatile("xsaveopt %0" : "=m" (*addr) : "a" (low), "d" (hi) : "memory"); } static __inline void xsaveopt64(char *addr, uint64_t mask) { uint32_t low, hi; low = mask; hi = mask >> 32; __asm __volatile("xsaveopt64 %0" : "=m" (*addr) : "a" (low), "d" (hi) : "memory"); } -#else /* !(__GNUCLIKE_ASM && !lint) */ - -void fldcw(u_short cw); -void fnclex(void); -void fninit(void); -void fnstcw(caddr_t addr); -void fnstsw(caddr_t addr); -void fxsave(caddr_t addr); -void fxrstor(caddr_t addr); -void ldmxcsr(u_int csr); -void stmxcsr(u_int *csr); -void xrstor32(char *addr, uint64_t mask); -void xrstor64(char *addr, uint64_t mask); -void xsave32(char *addr, uint64_t mask); -void xsave64(char *addr, uint64_t mask); -void xsaveopt32(char *addr, uint64_t mask); -void xsaveopt64(char *addr, uint64_t mask); - -#endif /* __GNUCLIKE_ASM && !lint */ - #define start_emulating() load_cr0(rcr0() | CR0_TS) #define stop_emulating() clts() CTASSERT(sizeof(struct savefpu) == 512); CTASSERT(sizeof(struct xstate_hdr) == 64); CTASSERT(sizeof(struct savefpu_ymm) == 832); /* * This requirement is to make it easier for asm code to calculate * offset of the fpu save area from the pcb address. FPU save area * must be 64-byte aligned. */ CTASSERT(sizeof(struct pcb) % XSAVE_AREA_ALIGN == 0); /* * Ensure the copy of XCR0 saved in a core is contained in the padding * area. */ CTASSERT(X86_XSTATE_XCR0_OFFSET >= offsetof(struct savefpu, sv_pad) && X86_XSTATE_XCR0_OFFSET + sizeof(uint64_t) <= sizeof(struct savefpu)); static void fpu_clean_state(void); SYSCTL_INT(_hw, HW_FLOATINGPT, floatingpoint, CTLFLAG_RD, SYSCTL_NULL_INT_PTR, 1, "Floating point instructions executed in hardware"); int use_xsave; /* non-static for cpu_switch.S */ uint64_t xsave_mask; /* the same */ static uma_zone_t fpu_save_area_zone; static struct savefpu *fpu_initialstate; static struct xsave_area_elm_descr { u_int offset; u_int size; } *xsave_area_desc; static void fpusave_xsaveopt64(void *addr) { xsaveopt64((char *)addr, xsave_mask); } static void fpusave_xsaveopt3264(void *addr) { if (SV_CURPROC_FLAG(SV_ILP32)) xsaveopt32((char *)addr, xsave_mask); else xsaveopt64((char *)addr, xsave_mask); } static void fpusave_xsave64(void *addr) { xsave64((char *)addr, xsave_mask); } static void fpusave_xsave3264(void *addr) { if (SV_CURPROC_FLAG(SV_ILP32)) xsave32((char *)addr, xsave_mask); else xsave64((char *)addr, xsave_mask); } static void fpurestore_xrstor64(void *addr) { xrstor64((char *)addr, xsave_mask); } static void fpurestore_xrstor3264(void *addr) { if (SV_CURPROC_FLAG(SV_ILP32)) xrstor32((char *)addr, xsave_mask); else xrstor64((char *)addr, xsave_mask); } static void fpusave_fxsave(void *addr) { fxsave((char *)addr); } static void fpurestore_fxrstor(void *addr) { fxrstor((char *)addr); } static void init_xsave(void) { if (use_xsave) return; if ((cpu_feature2 & CPUID2_XSAVE) == 0) return; use_xsave = 1; TUNABLE_INT_FETCH("hw.use_xsave", &use_xsave); } DEFINE_IFUNC(, void, fpusave, (void *)) { init_xsave(); if (!use_xsave) return (fpusave_fxsave); if ((cpu_stdext_feature & CPUID_EXTSTATE_XSAVEOPT) != 0) { return ((cpu_stdext_feature & CPUID_STDEXT_NFPUSG) != 0 ? fpusave_xsaveopt64 : fpusave_xsaveopt3264); } return ((cpu_stdext_feature & CPUID_STDEXT_NFPUSG) != 0 ? fpusave_xsave64 : fpusave_xsave3264); } DEFINE_IFUNC(, void, fpurestore, (void *)) { init_xsave(); if (!use_xsave) return (fpurestore_fxrstor); return ((cpu_stdext_feature & CPUID_STDEXT_NFPUSG) != 0 ? fpurestore_xrstor64 : fpurestore_xrstor3264); } void fpususpend(void *addr) { u_long cr0; cr0 = rcr0(); stop_emulating(); fpusave(addr); load_cr0(cr0); } void fpuresume(void *addr) { u_long cr0; cr0 = rcr0(); stop_emulating(); fninit(); if (use_xsave) load_xcr(XCR0, xsave_mask); fpurestore(addr); load_cr0(cr0); } /* * Enable XSAVE if supported and allowed by user. * Calculate the xsave_mask. */ static void fpuinit_bsp1(void) { u_int cp[4]; uint64_t xsave_mask_user; bool old_wp; if (!use_xsave) return; cpuid_count(0xd, 0x0, cp); xsave_mask = XFEATURE_ENABLED_X87 | XFEATURE_ENABLED_SSE; if ((cp[0] & xsave_mask) != xsave_mask) panic("CPU0 does not support X87 or SSE: %x", cp[0]); xsave_mask = ((uint64_t)cp[3] << 32) | cp[0]; xsave_mask_user = xsave_mask; TUNABLE_ULONG_FETCH("hw.xsave_mask", &xsave_mask_user); xsave_mask_user |= XFEATURE_ENABLED_X87 | XFEATURE_ENABLED_SSE; xsave_mask &= xsave_mask_user; if ((xsave_mask & XFEATURE_AVX512) != XFEATURE_AVX512) xsave_mask &= ~XFEATURE_AVX512; if ((xsave_mask & XFEATURE_MPX) != XFEATURE_MPX) xsave_mask &= ~XFEATURE_MPX; cpuid_count(0xd, 0x1, cp); if ((cp[0] & CPUID_EXTSTATE_XSAVEOPT) != 0) { /* * Patch the XSAVE instruction in the cpu_switch code * to XSAVEOPT. We assume that XSAVE encoding used * REX byte, and set the bit 4 of the r/m byte. * * It seems that some BIOSes give control to the OS * with CR0.WP already set, making the kernel text * read-only before cpu_startup(). */ old_wp = disable_wp(); ctx_switch_xsave32[3] |= 0x10; ctx_switch_xsave[3] |= 0x10; restore_wp(old_wp); } } /* * Calculate the fpu save area size. */ static void fpuinit_bsp2(void) { u_int cp[4]; if (use_xsave) { cpuid_count(0xd, 0x0, cp); cpu_max_ext_state_size = cp[1]; /* * Reload the cpu_feature2, since we enabled OSXSAVE. */ do_cpuid(1, cp); cpu_feature2 = cp[2]; } else cpu_max_ext_state_size = sizeof(struct savefpu); } /* * Initialize the floating point unit. */ void fpuinit(void) { register_t saveintr; u_int mxcsr; u_short control; if (IS_BSP()) fpuinit_bsp1(); if (use_xsave) { load_cr4(rcr4() | CR4_XSAVE); load_xcr(XCR0, xsave_mask); } /* * XCR0 shall be set up before CPU can report the save area size. */ if (IS_BSP()) fpuinit_bsp2(); /* * It is too early for critical_enter() to work on AP. */ saveintr = intr_disable(); stop_emulating(); fninit(); control = __INITIAL_FPUCW__; fldcw(control); mxcsr = __INITIAL_MXCSR__; ldmxcsr(mxcsr); start_emulating(); intr_restore(saveintr); } /* * On the boot CPU we generate a clean state that is used to * initialize the floating point unit when it is first used by a * process. */ static void fpuinitstate(void *arg __unused) { uint64_t *xstate_bv; register_t saveintr; int cp[4], i, max_ext_n; /* Do potentially blocking operations before disabling interrupts. */ fpu_save_area_zone = uma_zcreate("FPU_save_area", cpu_max_ext_state_size, NULL, NULL, NULL, NULL, XSAVE_AREA_ALIGN - 1, 0); fpu_initialstate = uma_zalloc(fpu_save_area_zone, M_WAITOK | M_ZERO); if (use_xsave) { max_ext_n = flsl(xsave_mask); xsave_area_desc = malloc(max_ext_n * sizeof(struct xsave_area_elm_descr), M_DEVBUF, M_WAITOK | M_ZERO); } cpu_thread_alloc(&thread0); saveintr = intr_disable(); stop_emulating(); fpusave_fxsave(fpu_initialstate); if (fpu_initialstate->sv_env.en_mxcsr_mask) cpu_mxcsr_mask = fpu_initialstate->sv_env.en_mxcsr_mask; else cpu_mxcsr_mask = 0xFFBF; /* * The fninit instruction does not modify XMM registers or x87 * registers (MM/ST). The fpusave call dumped the garbage * contained in the registers after reset to the initial state * saved. Clear XMM and x87 registers file image to make the * startup program state and signal handler XMM/x87 register * content predictable. */ bzero(fpu_initialstate->sv_fp, sizeof(fpu_initialstate->sv_fp)); bzero(fpu_initialstate->sv_xmm, sizeof(fpu_initialstate->sv_xmm)); /* * Create a table describing the layout of the CPU Extended * Save Area. See Intel SDM rev. 075 Vol. 1 13.4.1 "Legacy * Region of an XSAVE Area" for the source of offsets/sizes. */ if (use_xsave) { xstate_bv = (uint64_t *)((char *)(fpu_initialstate + 1) + offsetof(struct xstate_hdr, xstate_bv)); *xstate_bv = XFEATURE_ENABLED_X87 | XFEATURE_ENABLED_SSE; /* x87 state */ xsave_area_desc[0].offset = 0; xsave_area_desc[0].size = 160; /* XMM */ xsave_area_desc[1].offset = 160; xsave_area_desc[1].size = 416 - 160; for (i = 2; i < max_ext_n; i++) { cpuid_count(0xd, i, cp); xsave_area_desc[i].offset = cp[1]; xsave_area_desc[i].size = cp[0]; } } start_emulating(); intr_restore(saveintr); } /* EFIRT needs this to be initialized before we can enter our EFI environment */ SYSINIT(fpuinitstate, SI_SUB_CPU, SI_ORDER_ANY, fpuinitstate, NULL); /* * Free coprocessor (if we have it). */ void fpuexit(struct thread *td) { critical_enter(); if (curthread == PCPU_GET(fpcurthread)) { stop_emulating(); fpusave(curpcb->pcb_save); start_emulating(); PCPU_SET(fpcurthread, NULL); } critical_exit(); } int fpuformat(void) { return (_MC_FPFMT_XMM); } /* * The following mechanism is used to ensure that the FPE_... value * that is passed as a trapcode to the signal handler of the user * process does not have more than one bit set. * * Multiple bits may be set if the user process modifies the control * word while a status word bit is already set. While this is a sign * of bad coding, we have no choise than to narrow them down to one * bit, since we must not send a trapcode that is not exactly one of * the FPE_ macros. * * The mechanism has a static table with 127 entries. Each combination * of the 7 FPU status word exception bits directly translates to a * position in this table, where a single FPE_... value is stored. * This FPE_... value stored there is considered the "most important" * of the exception bits and will be sent as the signal code. The * precedence of the bits is based upon Intel Document "Numerical * Applications", Chapter "Special Computational Situations". * * The macro to choose one of these values does these steps: 1) Throw * away status word bits that cannot be masked. 2) Throw away the bits * currently masked in the control word, assuming the user isn't * interested in them anymore. 3) Reinsert status word bit 7 (stack * fault) if it is set, which cannot be masked but must be presered. * 4) Use the remaining bits to point into the trapcode table. * * The 6 maskable bits in order of their preference, as stated in the * above referenced Intel manual: * 1 Invalid operation (FP_X_INV) * 1a Stack underflow * 1b Stack overflow * 1c Operand of unsupported format * 1d SNaN operand. * 2 QNaN operand (not an exception, irrelavant here) * 3 Any other invalid-operation not mentioned above or zero divide * (FP_X_INV, FP_X_DZ) * 4 Denormal operand (FP_X_DNML) * 5 Numeric over/underflow (FP_X_OFL, FP_X_UFL) * 6 Inexact result (FP_X_IMP) */ static char fpetable[128] = { 0, FPE_FLTINV, /* 1 - INV */ FPE_FLTUND, /* 2 - DNML */ FPE_FLTINV, /* 3 - INV | DNML */ FPE_FLTDIV, /* 4 - DZ */ FPE_FLTINV, /* 5 - INV | DZ */ FPE_FLTDIV, /* 6 - DNML | DZ */ FPE_FLTINV, /* 7 - INV | DNML | DZ */ FPE_FLTOVF, /* 8 - OFL */ FPE_FLTINV, /* 9 - INV | OFL */ FPE_FLTUND, /* A - DNML | OFL */ FPE_FLTINV, /* B - INV | DNML | OFL */ FPE_FLTDIV, /* C - DZ | OFL */ FPE_FLTINV, /* D - INV | DZ | OFL */ FPE_FLTDIV, /* E - DNML | DZ | OFL */ FPE_FLTINV, /* F - INV | DNML | DZ | OFL */ FPE_FLTUND, /* 10 - UFL */ FPE_FLTINV, /* 11 - INV | UFL */ FPE_FLTUND, /* 12 - DNML | UFL */ FPE_FLTINV, /* 13 - INV | DNML | UFL */ FPE_FLTDIV, /* 14 - DZ | UFL */ FPE_FLTINV, /* 15 - INV | DZ | UFL */ FPE_FLTDIV, /* 16 - DNML | DZ | UFL */ FPE_FLTINV, /* 17 - INV | DNML | DZ | UFL */ FPE_FLTOVF, /* 18 - OFL | UFL */ FPE_FLTINV, /* 19 - INV | OFL | UFL */ FPE_FLTUND, /* 1A - DNML | OFL | UFL */ FPE_FLTINV, /* 1B - INV | DNML | OFL | UFL */ FPE_FLTDIV, /* 1C - DZ | OFL | UFL */ FPE_FLTINV, /* 1D - INV | DZ | OFL | UFL */ FPE_FLTDIV, /* 1E - DNML | DZ | OFL | UFL */ FPE_FLTINV, /* 1F - INV | DNML | DZ | OFL | UFL */ FPE_FLTRES, /* 20 - IMP */ FPE_FLTINV, /* 21 - INV | IMP */ FPE_FLTUND, /* 22 - DNML | IMP */ FPE_FLTINV, /* 23 - INV | DNML | IMP */ FPE_FLTDIV, /* 24 - DZ | IMP */ FPE_FLTINV, /* 25 - INV | DZ | IMP */ FPE_FLTDIV, /* 26 - DNML | DZ | IMP */ FPE_FLTINV, /* 27 - INV | DNML | DZ | IMP */ FPE_FLTOVF, /* 28 - OFL | IMP */ FPE_FLTINV, /* 29 - INV | OFL | IMP */ FPE_FLTUND, /* 2A - DNML | OFL | IMP */ FPE_FLTINV, /* 2B - INV | DNML | OFL | IMP */ FPE_FLTDIV, /* 2C - DZ | OFL | IMP */ FPE_FLTINV, /* 2D - INV | DZ | OFL | IMP */ FPE_FLTDIV, /* 2E - DNML | DZ | OFL | IMP */ FPE_FLTINV, /* 2F - INV | DNML | DZ | OFL | IMP */ FPE_FLTUND, /* 30 - UFL | IMP */ FPE_FLTINV, /* 31 - INV | UFL | IMP */ FPE_FLTUND, /* 32 - DNML | UFL | IMP */ FPE_FLTINV, /* 33 - INV | DNML | UFL | IMP */ FPE_FLTDIV, /* 34 - DZ | UFL | IMP */ FPE_FLTINV, /* 35 - INV | DZ | UFL | IMP */ FPE_FLTDIV, /* 36 - DNML | DZ | UFL | IMP */ FPE_FLTINV, /* 37 - INV | DNML | DZ | UFL | IMP */ FPE_FLTOVF, /* 38 - OFL | UFL | IMP */ FPE_FLTINV, /* 39 - INV | OFL | UFL | IMP */ FPE_FLTUND, /* 3A - DNML | OFL | UFL | IMP */ FPE_FLTINV, /* 3B - INV | DNML | OFL | UFL | IMP */ FPE_FLTDIV, /* 3C - DZ | OFL | UFL | IMP */ FPE_FLTINV, /* 3D - INV | DZ | OFL | UFL | IMP */ FPE_FLTDIV, /* 3E - DNML | DZ | OFL | UFL | IMP */ FPE_FLTINV, /* 3F - INV | DNML | DZ | OFL | UFL | IMP */ FPE_FLTSUB, /* 40 - STK */ FPE_FLTSUB, /* 41 - INV | STK */ FPE_FLTUND, /* 42 - DNML | STK */ FPE_FLTSUB, /* 43 - INV | DNML | STK */ FPE_FLTDIV, /* 44 - DZ | STK */ FPE_FLTSUB, /* 45 - INV | DZ | STK */ FPE_FLTDIV, /* 46 - DNML | DZ | STK */ FPE_FLTSUB, /* 47 - INV | DNML | DZ | STK */ FPE_FLTOVF, /* 48 - OFL | STK */ FPE_FLTSUB, /* 49 - INV | OFL | STK */ FPE_FLTUND, /* 4A - DNML | OFL | STK */ FPE_FLTSUB, /* 4B - INV | DNML | OFL | STK */ FPE_FLTDIV, /* 4C - DZ | OFL | STK */ FPE_FLTSUB, /* 4D - INV | DZ | OFL | STK */ FPE_FLTDIV, /* 4E - DNML | DZ | OFL | STK */ FPE_FLTSUB, /* 4F - INV | DNML | DZ | OFL | STK */ FPE_FLTUND, /* 50 - UFL | STK */ FPE_FLTSUB, /* 51 - INV | UFL | STK */ FPE_FLTUND, /* 52 - DNML | UFL | STK */ FPE_FLTSUB, /* 53 - INV | DNML | UFL | STK */ FPE_FLTDIV, /* 54 - DZ | UFL | STK */ FPE_FLTSUB, /* 55 - INV | DZ | UFL | STK */ FPE_FLTDIV, /* 56 - DNML | DZ | UFL | STK */ FPE_FLTSUB, /* 57 - INV | DNML | DZ | UFL | STK */ FPE_FLTOVF, /* 58 - OFL | UFL | STK */ FPE_FLTSUB, /* 59 - INV | OFL | UFL | STK */ FPE_FLTUND, /* 5A - DNML | OFL | UFL | STK */ FPE_FLTSUB, /* 5B - INV | DNML | OFL | UFL | STK */ FPE_FLTDIV, /* 5C - DZ | OFL | UFL | STK */ FPE_FLTSUB, /* 5D - INV | DZ | OFL | UFL | STK */ FPE_FLTDIV, /* 5E - DNML | DZ | OFL | UFL | STK */ FPE_FLTSUB, /* 5F - INV | DNML | DZ | OFL | UFL | STK */ FPE_FLTRES, /* 60 - IMP | STK */ FPE_FLTSUB, /* 61 - INV | IMP | STK */ FPE_FLTUND, /* 62 - DNML | IMP | STK */ FPE_FLTSUB, /* 63 - INV | DNML | IMP | STK */ FPE_FLTDIV, /* 64 - DZ | IMP | STK */ FPE_FLTSUB, /* 65 - INV | DZ | IMP | STK */ FPE_FLTDIV, /* 66 - DNML | DZ | IMP | STK */ FPE_FLTSUB, /* 67 - INV | DNML | DZ | IMP | STK */ FPE_FLTOVF, /* 68 - OFL | IMP | STK */ FPE_FLTSUB, /* 69 - INV | OFL | IMP | STK */ FPE_FLTUND, /* 6A - DNML | OFL | IMP | STK */ FPE_FLTSUB, /* 6B - INV | DNML | OFL | IMP | STK */ FPE_FLTDIV, /* 6C - DZ | OFL | IMP | STK */ FPE_FLTSUB, /* 6D - INV | DZ | OFL | IMP | STK */ FPE_FLTDIV, /* 6E - DNML | DZ | OFL | IMP | STK */ FPE_FLTSUB, /* 6F - INV | DNML | DZ | OFL | IMP | STK */ FPE_FLTUND, /* 70 - UFL | IMP | STK */ FPE_FLTSUB, /* 71 - INV | UFL | IMP | STK */ FPE_FLTUND, /* 72 - DNML | UFL | IMP | STK */ FPE_FLTSUB, /* 73 - INV | DNML | UFL | IMP | STK */ FPE_FLTDIV, /* 74 - DZ | UFL | IMP | STK */ FPE_FLTSUB, /* 75 - INV | DZ | UFL | IMP | STK */ FPE_FLTDIV, /* 76 - DNML | DZ | UFL | IMP | STK */ FPE_FLTSUB, /* 77 - INV | DNML | DZ | UFL | IMP | STK */ FPE_FLTOVF, /* 78 - OFL | UFL | IMP | STK */ FPE_FLTSUB, /* 79 - INV | OFL | UFL | IMP | STK */ FPE_FLTUND, /* 7A - DNML | OFL | UFL | IMP | STK */ FPE_FLTSUB, /* 7B - INV | DNML | OFL | UFL | IMP | STK */ FPE_FLTDIV, /* 7C - DZ | OFL | UFL | IMP | STK */ FPE_FLTSUB, /* 7D - INV | DZ | OFL | UFL | IMP | STK */ FPE_FLTDIV, /* 7E - DNML | DZ | OFL | UFL | IMP | STK */ FPE_FLTSUB, /* 7F - INV | DNML | DZ | OFL | UFL | IMP | STK */ }; /* * Read the FP status and control words, then generate si_code value * for SIGFPE. The error code chosen will be one of the * FPE_... macros. It will be sent as the second argument to old * BSD-style signal handlers and as "siginfo_t->si_code" (second * argument) to SA_SIGINFO signal handlers. * * Some time ago, we cleared the x87 exceptions with FNCLEX there. * Clearing exceptions was necessary mainly to avoid IRQ13 bugs. The * usermode code which understands the FPU hardware enough to enable * the exceptions, can also handle clearing the exception state in the * handler. The only consequence of not clearing the exception is the * rethrow of the SIGFPE on return from the signal handler and * reexecution of the corresponding instruction. * * For XMM traps, the exceptions were never cleared. */ int fputrap_x87(void) { struct savefpu *pcb_save; u_short control, status; critical_enter(); /* * Interrupt handling (for another interrupt) may have pushed the * state to memory. Fetch the relevant parts of the state from * wherever they are. */ if (PCPU_GET(fpcurthread) != curthread) { pcb_save = curpcb->pcb_save; control = pcb_save->sv_env.en_cw; status = pcb_save->sv_env.en_sw; } else { fnstcw(&control); fnstsw(&status); } critical_exit(); return (fpetable[status & ((~control & 0x3f) | 0x40)]); } int fputrap_sse(void) { u_int mxcsr; critical_enter(); if (PCPU_GET(fpcurthread) != curthread) mxcsr = curpcb->pcb_save->sv_env.en_mxcsr; else stmxcsr(&mxcsr); critical_exit(); return (fpetable[(mxcsr & (~mxcsr >> 7)) & 0x3f]); } static void restore_fpu_curthread(struct thread *td) { struct pcb *pcb; /* * Record new context early in case frstor causes a trap. */ PCPU_SET(fpcurthread, td); stop_emulating(); fpu_clean_state(); pcb = td->td_pcb; if ((pcb->pcb_flags & PCB_FPUINITDONE) == 0) { /* * This is the first time this thread has used the FPU or * the PCB doesn't contain a clean FPU state. Explicitly * load an initial state. * * We prefer to restore the state from the actual save * area in PCB instead of directly loading from * fpu_initialstate, to ignite the XSAVEOPT * tracking engine. */ bcopy(fpu_initialstate, pcb->pcb_save, cpu_max_ext_state_size); fpurestore(pcb->pcb_save); if (pcb->pcb_initial_fpucw != __INITIAL_FPUCW__) fldcw(pcb->pcb_initial_fpucw); if (PCB_USER_FPU(pcb)) set_pcb_flags(pcb, PCB_FPUINITDONE | PCB_USERFPUINITDONE); else set_pcb_flags(pcb, PCB_FPUINITDONE); } else fpurestore(pcb->pcb_save); } /* * Device Not Available (DNA, #NM) exception handler. * * It would be better to switch FP context here (if curthread != * fpcurthread) and not necessarily for every context switch, but it * is too hard to access foreign pcb's. */ void fpudna(void) { struct thread *td; td = curthread; /* * This handler is entered with interrupts enabled, so context * switches may occur before critical_enter() is executed. If * a context switch occurs, then when we regain control, our * state will have been completely restored. The CPU may * change underneath us, but the only part of our context that * lives in the CPU is CR0.TS and that will be "restored" by * setting it on the new CPU. */ critical_enter(); KASSERT((curpcb->pcb_flags & PCB_FPUNOSAVE) == 0, ("fpudna while in fpu_kern_enter(FPU_KERN_NOCTX)")); if (__predict_false(PCPU_GET(fpcurthread) == td)) { /* * Some virtual machines seems to set %cr0.TS at * arbitrary moments. Silently clear the TS bit * regardless of the eager/lazy FPU context switch * mode. */ stop_emulating(); } else { if (__predict_false(PCPU_GET(fpcurthread) != NULL)) { panic( "fpudna: fpcurthread = %p (%d), curthread = %p (%d)\n", PCPU_GET(fpcurthread), PCPU_GET(fpcurthread)->td_tid, td, td->td_tid); } restore_fpu_curthread(td); } critical_exit(); } void fpu_activate_sw(struct thread *td); /* Called from the context switch */ void fpu_activate_sw(struct thread *td) { if ((td->td_pflags & TDP_KTHREAD) != 0 || !PCB_USER_FPU(td->td_pcb)) { PCPU_SET(fpcurthread, NULL); start_emulating(); } else if (PCPU_GET(fpcurthread) != td) { restore_fpu_curthread(td); } } void fpudrop(void) { struct thread *td; td = PCPU_GET(fpcurthread); KASSERT(td == curthread, ("fpudrop: fpcurthread != curthread")); CRITICAL_ASSERT(td); PCPU_SET(fpcurthread, NULL); clear_pcb_flags(td->td_pcb, PCB_FPUINITDONE); start_emulating(); } /* * Get the user state of the FPU into pcb->pcb_user_save without * dropping ownership (if possible). It returns the FPU ownership * status. */ int fpugetregs(struct thread *td) { struct pcb *pcb; uint64_t *xstate_bv, bit; char *sa; int max_ext_n, i, owned; pcb = td->td_pcb; critical_enter(); if ((pcb->pcb_flags & PCB_USERFPUINITDONE) == 0) { bcopy(fpu_initialstate, get_pcb_user_save_pcb(pcb), cpu_max_ext_state_size); get_pcb_user_save_pcb(pcb)->sv_env.en_cw = pcb->pcb_initial_fpucw; fpuuserinited(td); critical_exit(); return (_MC_FPOWNED_PCB); } if (td == PCPU_GET(fpcurthread) && PCB_USER_FPU(pcb)) { fpusave(get_pcb_user_save_pcb(pcb)); owned = _MC_FPOWNED_FPU; } else { owned = _MC_FPOWNED_PCB; } if (use_xsave) { /* * Handle partially saved state. */ sa = (char *)get_pcb_user_save_pcb(pcb); xstate_bv = (uint64_t *)(sa + sizeof(struct savefpu) + offsetof(struct xstate_hdr, xstate_bv)); max_ext_n = flsl(xsave_mask); for (i = 0; i < max_ext_n; i++) { bit = 1ULL << i; if ((xsave_mask & bit) == 0 || (*xstate_bv & bit) != 0) continue; bcopy((char *)fpu_initialstate + xsave_area_desc[i].offset, sa + xsave_area_desc[i].offset, xsave_area_desc[i].size); *xstate_bv |= bit; } } critical_exit(); return (owned); } void fpuuserinited(struct thread *td) { struct pcb *pcb; CRITICAL_ASSERT(td); pcb = td->td_pcb; if (PCB_USER_FPU(pcb)) set_pcb_flags(pcb, PCB_FPUINITDONE | PCB_USERFPUINITDONE); else set_pcb_flags(pcb, PCB_FPUINITDONE); } int fpusetxstate(struct thread *td, char *xfpustate, size_t xfpustate_size) { struct xstate_hdr *hdr, *ehdr; size_t len, max_len; uint64_t bv; /* XXXKIB should we clear all extended state in xstate_bv instead ? */ if (xfpustate == NULL) return (0); if (!use_xsave) return (EOPNOTSUPP); len = xfpustate_size; if (len < sizeof(struct xstate_hdr)) return (EINVAL); max_len = cpu_max_ext_state_size - sizeof(struct savefpu); if (len > max_len) return (EINVAL); ehdr = (struct xstate_hdr *)xfpustate; bv = ehdr->xstate_bv; /* * Avoid #gp. */ if (bv & ~xsave_mask) return (EINVAL); hdr = (struct xstate_hdr *)(get_pcb_user_save_td(td) + 1); hdr->xstate_bv = bv; bcopy(xfpustate + sizeof(struct xstate_hdr), (char *)(hdr + 1), len - sizeof(struct xstate_hdr)); return (0); } /* * Set the state of the FPU. */ int fpusetregs(struct thread *td, struct savefpu *addr, char *xfpustate, size_t xfpustate_size) { struct pcb *pcb; int error; addr->sv_env.en_mxcsr &= cpu_mxcsr_mask; pcb = td->td_pcb; error = 0; critical_enter(); if (td == PCPU_GET(fpcurthread) && PCB_USER_FPU(pcb)) { error = fpusetxstate(td, xfpustate, xfpustate_size); if (error == 0) { bcopy(addr, get_pcb_user_save_td(td), sizeof(*addr)); fpurestore(get_pcb_user_save_td(td)); set_pcb_flags(pcb, PCB_FPUINITDONE | PCB_USERFPUINITDONE); } } else { error = fpusetxstate(td, xfpustate, xfpustate_size); if (error == 0) { bcopy(addr, get_pcb_user_save_td(td), sizeof(*addr)); fpuuserinited(td); } } critical_exit(); return (error); } /* * On AuthenticAMD processors, the fxrstor instruction does not restore * the x87's stored last instruction pointer, last data pointer, and last * opcode values, except in the rare case in which the exception summary * (ES) bit in the x87 status word is set to 1. * * In order to avoid leaking this information across processes, we clean * these values by performing a dummy load before executing fxrstor(). */ static void fpu_clean_state(void) { static float dummy_variable = 0.0; u_short status; /* * Clear the ES bit in the x87 status word if it is currently * set, in order to avoid causing a fault in the upcoming load. */ fnstsw(&status); if (status & 0x80) fnclex(); /* * Load the dummy variable into the x87 stack. This mangles * the x87 stack, but we don't care since we're about to call * fxrstor() anyway. */ __asm __volatile("ffree %%st(7); flds %0" : : "m" (dummy_variable)); } /* * This really sucks. We want the acpi version only, but it requires * the isa_if.h file in order to get the definitions. */ #include "opt_isa.h" #ifdef DEV_ISA #include /* * This sucks up the legacy ISA support assignments from PNPBIOS/ACPI. */ static struct isa_pnp_id fpupnp_ids[] = { { 0x040cd041, "Legacy ISA coprocessor support" }, /* PNP0C04 */ { 0 } }; static int fpupnp_probe(device_t dev) { int result; result = ISA_PNP_PROBE(device_get_parent(dev), dev, fpupnp_ids); if (result <= 0) device_quiet(dev); return (result); } static int fpupnp_attach(device_t dev) { return (0); } static device_method_t fpupnp_methods[] = { /* Device interface */ DEVMETHOD(device_probe, fpupnp_probe), DEVMETHOD(device_attach, fpupnp_attach), DEVMETHOD(device_detach, bus_generic_detach), DEVMETHOD(device_shutdown, bus_generic_shutdown), DEVMETHOD(device_suspend, bus_generic_suspend), DEVMETHOD(device_resume, bus_generic_resume), { 0, 0 } }; static driver_t fpupnp_driver = { "fpupnp", fpupnp_methods, 1, /* no softc */ }; static devclass_t fpupnp_devclass; DRIVER_MODULE(fpupnp, acpi, fpupnp_driver, fpupnp_devclass, 0, 0); ISA_PNP_INFO(fpupnp_ids); #endif /* DEV_ISA */ static MALLOC_DEFINE(M_FPUKERN_CTX, "fpukern_ctx", "Kernel contexts for FPU state"); #define FPU_KERN_CTX_FPUINITDONE 0x01 #define FPU_KERN_CTX_DUMMY 0x02 /* avoided save for the kern thread */ #define FPU_KERN_CTX_INUSE 0x04 struct fpu_kern_ctx { struct savefpu *prev; uint32_t flags; char hwstate1[]; }; static inline size_t __pure2 fpu_kern_alloc_sz(u_int max_est) { return (sizeof(struct fpu_kern_ctx) + XSAVE_AREA_ALIGN + max_est); } static inline int __pure2 fpu_kern_malloc_flags(u_int fpflags) { return (((fpflags & FPU_KERN_NOWAIT) ? M_NOWAIT : M_WAITOK) | M_ZERO); } struct fpu_kern_ctx * fpu_kern_alloc_ctx_domain(int domain, u_int flags) { return (malloc_domainset(fpu_kern_alloc_sz(cpu_max_ext_state_size), M_FPUKERN_CTX, DOMAINSET_PREF(domain), fpu_kern_malloc_flags(flags))); } struct fpu_kern_ctx * fpu_kern_alloc_ctx(u_int flags) { return (malloc(fpu_kern_alloc_sz(cpu_max_ext_state_size), M_FPUKERN_CTX, fpu_kern_malloc_flags(flags))); } void fpu_kern_free_ctx(struct fpu_kern_ctx *ctx) { KASSERT((ctx->flags & FPU_KERN_CTX_INUSE) == 0, ("free'ing inuse ctx")); /* XXXKIB clear the memory ? */ free(ctx, M_FPUKERN_CTX); } static struct savefpu * fpu_kern_ctx_savefpu(struct fpu_kern_ctx *ctx) { vm_offset_t p; p = (vm_offset_t)&ctx->hwstate1; p = roundup2(p, XSAVE_AREA_ALIGN); return ((struct savefpu *)p); } void fpu_kern_enter(struct thread *td, struct fpu_kern_ctx *ctx, u_int flags) { struct pcb *pcb; pcb = td->td_pcb; KASSERT((flags & FPU_KERN_NOCTX) != 0 || ctx != NULL, ("ctx is required when !FPU_KERN_NOCTX")); KASSERT(ctx == NULL || (ctx->flags & FPU_KERN_CTX_INUSE) == 0, ("using inuse ctx")); KASSERT((pcb->pcb_flags & PCB_FPUNOSAVE) == 0, ("recursive fpu_kern_enter while in PCB_FPUNOSAVE state")); if ((flags & FPU_KERN_NOCTX) != 0) { critical_enter(); stop_emulating(); if (curthread == PCPU_GET(fpcurthread)) { fpusave(curpcb->pcb_save); PCPU_SET(fpcurthread, NULL); } else { KASSERT(PCPU_GET(fpcurthread) == NULL, ("invalid fpcurthread")); } /* * This breaks XSAVEOPT tracker, but * PCB_FPUNOSAVE state is supposed to never need to * save FPU context at all. */ fpurestore(fpu_initialstate); set_pcb_flags(pcb, PCB_KERNFPU | PCB_FPUNOSAVE | PCB_FPUINITDONE); return; } if ((flags & FPU_KERN_KTHR) != 0 && is_fpu_kern_thread(0)) { ctx->flags = FPU_KERN_CTX_DUMMY | FPU_KERN_CTX_INUSE; return; } critical_enter(); KASSERT(!PCB_USER_FPU(pcb) || pcb->pcb_save == get_pcb_user_save_pcb(pcb), ("mangled pcb_save")); ctx->flags = FPU_KERN_CTX_INUSE; if ((pcb->pcb_flags & PCB_FPUINITDONE) != 0) ctx->flags |= FPU_KERN_CTX_FPUINITDONE; fpuexit(td); ctx->prev = pcb->pcb_save; pcb->pcb_save = fpu_kern_ctx_savefpu(ctx); set_pcb_flags(pcb, PCB_KERNFPU); clear_pcb_flags(pcb, PCB_FPUINITDONE); critical_exit(); } int fpu_kern_leave(struct thread *td, struct fpu_kern_ctx *ctx) { struct pcb *pcb; pcb = td->td_pcb; if ((pcb->pcb_flags & PCB_FPUNOSAVE) != 0) { KASSERT(ctx == NULL, ("non-null ctx after FPU_KERN_NOCTX")); KASSERT(PCPU_GET(fpcurthread) == NULL, ("non-NULL fpcurthread for PCB_FPUNOSAVE")); CRITICAL_ASSERT(td); clear_pcb_flags(pcb, PCB_FPUNOSAVE | PCB_FPUINITDONE); start_emulating(); } else { KASSERT((ctx->flags & FPU_KERN_CTX_INUSE) != 0, ("leaving not inuse ctx")); ctx->flags &= ~FPU_KERN_CTX_INUSE; if (is_fpu_kern_thread(0) && (ctx->flags & FPU_KERN_CTX_DUMMY) != 0) return (0); KASSERT((ctx->flags & FPU_KERN_CTX_DUMMY) == 0, ("dummy ctx")); critical_enter(); if (curthread == PCPU_GET(fpcurthread)) fpudrop(); pcb->pcb_save = ctx->prev; } if (pcb->pcb_save == get_pcb_user_save_pcb(pcb)) { if ((pcb->pcb_flags & PCB_USERFPUINITDONE) != 0) { set_pcb_flags(pcb, PCB_FPUINITDONE); if ((pcb->pcb_flags & PCB_KERNFPU_THR) == 0) clear_pcb_flags(pcb, PCB_KERNFPU); } else if ((pcb->pcb_flags & PCB_KERNFPU_THR) == 0) clear_pcb_flags(pcb, PCB_FPUINITDONE | PCB_KERNFPU); } else { if ((ctx->flags & FPU_KERN_CTX_FPUINITDONE) != 0) set_pcb_flags(pcb, PCB_FPUINITDONE); else clear_pcb_flags(pcb, PCB_FPUINITDONE); KASSERT(!PCB_USER_FPU(pcb), ("unpaired fpu_kern_leave")); } critical_exit(); return (0); } int fpu_kern_thread(u_int flags) { KASSERT((curthread->td_pflags & TDP_KTHREAD) != 0, ("Only kthread may use fpu_kern_thread")); KASSERT(curpcb->pcb_save == get_pcb_user_save_pcb(curpcb), ("mangled pcb_save")); KASSERT(PCB_USER_FPU(curpcb), ("recursive call")); set_pcb_flags(curpcb, PCB_KERNFPU | PCB_KERNFPU_THR); return (0); } int is_fpu_kern_thread(u_int flags) { if ((curthread->td_pflags & TDP_KTHREAD) == 0) return (0); return ((curpcb->pcb_flags & PCB_KERNFPU_THR) != 0); } /* * FPU save area alloc/free/init utility routines */ struct savefpu * fpu_save_area_alloc(void) { return (uma_zalloc(fpu_save_area_zone, M_WAITOK)); } void fpu_save_area_free(struct savefpu *fsa) { uma_zfree(fpu_save_area_zone, fsa); } void fpu_save_area_reset(struct savefpu *fsa) { bcopy(fpu_initialstate, fsa, cpu_max_ext_state_size); } diff --git a/sys/amd64/include/atomic.h b/sys/amd64/include/atomic.h index d61fb359e261..8a7bab8fee2d 100644 --- a/sys/amd64/include/atomic.h +++ b/sys/amd64/include/atomic.h @@ -1,641 +1,598 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 1998 Doug Rabson * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _MACHINE_ATOMIC_H_ #define _MACHINE_ATOMIC_H_ #ifndef _SYS_CDEFS_H_ #error this file needs sys/cdefs.h as a prerequisite #endif /* * To express interprocessor (as opposed to processor and device) memory * ordering constraints, use the atomic_*() functions with acquire and release * semantics rather than the *mb() functions. An architecture's memory * ordering (or memory consistency) model governs the order in which a * program's accesses to different locations may be performed by an * implementation of that architecture. In general, for memory regions * defined as writeback cacheable, the memory ordering implemented by amd64 * processors preserves the program ordering of a load followed by a load, a * load followed by a store, and a store followed by a store. Only a store * followed by a load to a different memory location may be reordered. * Therefore, except for special cases, like non-temporal memory accesses or * memory regions defined as write combining, the memory ordering effects * provided by the sfence instruction in the wmb() function and the lfence * instruction in the rmb() function are redundant. In contrast, the * atomic_*() functions with acquire and release semantics do not perform * redundant instructions for ordinary cases of interprocessor memory * ordering on any architecture. */ #define mb() __asm __volatile("mfence;" : : : "memory") #define wmb() __asm __volatile("sfence;" : : : "memory") #define rmb() __asm __volatile("lfence;" : : : "memory") #ifdef _KERNEL /* * OFFSETOF_MONITORBUF == __pcpu_offset(pc_monitorbuf). * * The open-coded number is used instead of the symbolic expression to * avoid a dependency on sys/pcpu.h in machine/atomic.h consumers. * An assertion in amd64/vm_machdep.c ensures that the value is correct. */ #define OFFSETOF_MONITORBUF 0x100 #endif #if defined(SAN_NEEDS_INTERCEPTORS) && !defined(SAN_RUNTIME) #include #else #include /* * Various simple operations on memory, each of which is atomic in the * presence of interrupts and multiple processors. * * atomic_set_char(P, V) (*(u_char *)(P) |= (V)) * atomic_clear_char(P, V) (*(u_char *)(P) &= ~(V)) * atomic_add_char(P, V) (*(u_char *)(P) += (V)) * atomic_subtract_char(P, V) (*(u_char *)(P) -= (V)) * * atomic_set_short(P, V) (*(u_short *)(P) |= (V)) * atomic_clear_short(P, V) (*(u_short *)(P) &= ~(V)) * atomic_add_short(P, V) (*(u_short *)(P) += (V)) * atomic_subtract_short(P, V) (*(u_short *)(P) -= (V)) * * atomic_set_int(P, V) (*(u_int *)(P) |= (V)) * atomic_clear_int(P, V) (*(u_int *)(P) &= ~(V)) * atomic_add_int(P, V) (*(u_int *)(P) += (V)) * atomic_subtract_int(P, V) (*(u_int *)(P) -= (V)) * atomic_swap_int(P, V) (return (*(u_int *)(P)); *(u_int *)(P) = (V);) * atomic_readandclear_int(P) (return (*(u_int *)(P)); *(u_int *)(P) = 0;) * * atomic_set_long(P, V) (*(u_long *)(P) |= (V)) * atomic_clear_long(P, V) (*(u_long *)(P) &= ~(V)) * atomic_add_long(P, V) (*(u_long *)(P) += (V)) * atomic_subtract_long(P, V) (*(u_long *)(P) -= (V)) * atomic_swap_long(P, V) (return (*(u_long *)(P)); *(u_long *)(P) = (V);) * atomic_readandclear_long(P) (return (*(u_long *)(P)); *(u_long *)(P) = 0;) */ -#if !defined(__GNUCLIKE_ASM) -#define ATOMIC_ASM(NAME, TYPE, OP, CONS, V) \ -void atomic_##NAME##_##TYPE(volatile u_##TYPE *p, u_##TYPE v); \ -void atomic_##NAME##_barr_##TYPE(volatile u_##TYPE *p, u_##TYPE v) - -int atomic_cmpset_char(volatile u_char *dst, u_char expect, u_char src); -int atomic_cmpset_short(volatile u_short *dst, u_short expect, u_short src); -int atomic_cmpset_int(volatile u_int *dst, u_int expect, u_int src); -int atomic_cmpset_long(volatile u_long *dst, u_long expect, u_long src); -int atomic_fcmpset_char(volatile u_char *dst, u_char *expect, u_char src); -int atomic_fcmpset_short(volatile u_short *dst, u_short *expect, - u_short src); -int atomic_fcmpset_int(volatile u_int *dst, u_int *expect, u_int src); -int atomic_fcmpset_long(volatile u_long *dst, u_long *expect, u_long src); -u_int atomic_fetchadd_int(volatile u_int *p, u_int v); -u_long atomic_fetchadd_long(volatile u_long *p, u_long v); -int atomic_testandset_int(volatile u_int *p, u_int v); -int atomic_testandset_long(volatile u_long *p, u_int v); -int atomic_testandclear_int(volatile u_int *p, u_int v); -int atomic_testandclear_long(volatile u_long *p, u_int v); -void atomic_thread_fence_acq(void); -void atomic_thread_fence_acq_rel(void); -void atomic_thread_fence_rel(void); -void atomic_thread_fence_seq_cst(void); - -#define ATOMIC_LOAD(TYPE) \ -u_##TYPE atomic_load_acq_##TYPE(volatile u_##TYPE *p) -#define ATOMIC_STORE(TYPE) \ -void atomic_store_rel_##TYPE(volatile u_##TYPE *p, u_##TYPE v) - -#else /* !__GNUCLIKE_ASM */ - /* * Always use lock prefixes. The result is slighly less optimal for * UP systems, but it matters less now, and sometimes UP is emulated * over SMP. * * The assembly is volatilized to avoid code chunk removal by the compiler. * GCC aggressively reorders operations and memory clobbering is necessary * in order to avoid that for memory barriers. */ #define ATOMIC_ASM(NAME, TYPE, OP, CONS, V) \ static __inline void \ atomic_##NAME##_##TYPE(volatile u_##TYPE *p, u_##TYPE v)\ { \ __asm __volatile("lock; " OP \ : "+m" (*p) \ : CONS (V) \ : "cc"); \ } \ \ static __inline void \ atomic_##NAME##_barr_##TYPE(volatile u_##TYPE *p, u_##TYPE v)\ { \ __asm __volatile("lock; " OP \ : "+m" (*p) \ : CONS (V) \ : "memory", "cc"); \ } \ struct __hack /* * Atomic compare and set, used by the mutex functions. * * cmpset: * if (*dst == expect) * *dst = src * * fcmpset: * if (*dst == *expect) * *dst = src * else * *expect = *dst * * Returns 0 on failure, non-zero on success. */ #define ATOMIC_CMPSET(TYPE) \ static __inline int \ atomic_cmpset_##TYPE(volatile u_##TYPE *dst, u_##TYPE expect, u_##TYPE src) \ { \ u_char res; \ \ __asm __volatile( \ " lock; cmpxchg %3,%1 ; " \ "# atomic_cmpset_" #TYPE " " \ : "=@cce" (res), /* 0 */ \ "+m" (*dst), /* 1 */ \ "+a" (expect) /* 2 */ \ : "r" (src) /* 3 */ \ : "memory", "cc"); \ return (res); \ } \ \ static __inline int \ atomic_fcmpset_##TYPE(volatile u_##TYPE *dst, u_##TYPE *expect, u_##TYPE src) \ { \ u_char res; \ \ __asm __volatile( \ " lock; cmpxchg %3,%1 ; " \ "# atomic_fcmpset_" #TYPE " " \ : "=@cce" (res), /* 0 */ \ "+m" (*dst), /* 1 */ \ "+a" (*expect) /* 2 */ \ : "r" (src) /* 3 */ \ : "memory", "cc"); \ return (res); \ } ATOMIC_CMPSET(char); ATOMIC_CMPSET(short); ATOMIC_CMPSET(int); ATOMIC_CMPSET(long); /* * Atomically add the value of v to the integer pointed to by p and return * the previous value of *p. */ static __inline u_int atomic_fetchadd_int(volatile u_int *p, u_int v) { __asm __volatile( " lock; xaddl %0,%1 ; " "# atomic_fetchadd_int" : "+r" (v), /* 0 */ "+m" (*p) /* 1 */ : : "cc"); return (v); } /* * Atomically add the value of v to the long integer pointed to by p and return * the previous value of *p. */ static __inline u_long atomic_fetchadd_long(volatile u_long *p, u_long v) { __asm __volatile( " lock; xaddq %0,%1 ; " "# atomic_fetchadd_long" : "+r" (v), /* 0 */ "+m" (*p) /* 1 */ : : "cc"); return (v); } static __inline int atomic_testandset_int(volatile u_int *p, u_int v) { u_char res; __asm __volatile( " lock; btsl %2,%1 ; " "# atomic_testandset_int" : "=@ccc" (res), /* 0 */ "+m" (*p) /* 1 */ : "Ir" (v & 0x1f) /* 2 */ : "cc"); return (res); } static __inline int atomic_testandset_long(volatile u_long *p, u_int v) { u_char res; __asm __volatile( " lock; btsq %2,%1 ; " "# atomic_testandset_long" : "=@ccc" (res), /* 0 */ "+m" (*p) /* 1 */ : "Jr" ((u_long)(v & 0x3f)) /* 2 */ : "cc"); return (res); } static __inline int atomic_testandclear_int(volatile u_int *p, u_int v) { u_char res; __asm __volatile( " lock; btrl %2,%1 ; " "# atomic_testandclear_int" : "=@ccc" (res), /* 0 */ "+m" (*p) /* 1 */ : "Ir" (v & 0x1f) /* 2 */ : "cc"); return (res); } static __inline int atomic_testandclear_long(volatile u_long *p, u_int v) { u_char res; __asm __volatile( " lock; btrq %2,%1 ; " "# atomic_testandclear_long" : "=@ccc" (res), /* 0 */ "+m" (*p) /* 1 */ : "Jr" ((u_long)(v & 0x3f)) /* 2 */ : "cc"); return (res); } /* * We assume that a = b will do atomic loads and stores. Due to the * IA32 memory model, a simple store guarantees release semantics. * * However, a load may pass a store if they are performed on distinct * addresses, so we need a Store/Load barrier for sequentially * consistent fences in SMP kernels. We use "lock addl $0,mem" for a * Store/Load barrier, as recommended by the AMD Software Optimization * Guide, and not mfence. To avoid false data dependencies, we use a * special address for "mem". In the kernel, we use a private per-cpu * cache line. In user space, we use a word in the stack's red zone * (-8(%rsp)). */ static __inline void __storeload_barrier(void) { #if defined(_KERNEL) __asm __volatile("lock; addl $0,%%gs:%0" : "+m" (*(u_int *)OFFSETOF_MONITORBUF) : : "memory", "cc"); #else /* !_KERNEL */ __asm __volatile("lock; addl $0,-8(%%rsp)" : : : "memory", "cc"); #endif /* _KERNEL*/ } #define ATOMIC_LOAD(TYPE) \ static __inline u_##TYPE \ atomic_load_acq_##TYPE(volatile u_##TYPE *p) \ { \ u_##TYPE res; \ \ res = *p; \ __compiler_membar(); \ return (res); \ } \ struct __hack #define ATOMIC_STORE(TYPE) \ static __inline void \ atomic_store_rel_##TYPE(volatile u_##TYPE *p, u_##TYPE v) \ { \ \ __compiler_membar(); \ *p = v; \ } \ struct __hack static __inline void atomic_thread_fence_acq(void) { __compiler_membar(); } static __inline void atomic_thread_fence_rel(void) { __compiler_membar(); } static __inline void atomic_thread_fence_acq_rel(void) { __compiler_membar(); } static __inline void atomic_thread_fence_seq_cst(void) { __storeload_barrier(); } -#endif /* !__GNUCLIKE_ASM */ - ATOMIC_ASM(set, char, "orb %b1,%0", "iq", v); ATOMIC_ASM(clear, char, "andb %b1,%0", "iq", ~v); ATOMIC_ASM(add, char, "addb %b1,%0", "iq", v); ATOMIC_ASM(subtract, char, "subb %b1,%0", "iq", v); ATOMIC_ASM(set, short, "orw %w1,%0", "ir", v); ATOMIC_ASM(clear, short, "andw %w1,%0", "ir", ~v); ATOMIC_ASM(add, short, "addw %w1,%0", "ir", v); ATOMIC_ASM(subtract, short, "subw %w1,%0", "ir", v); ATOMIC_ASM(set, int, "orl %1,%0", "ir", v); ATOMIC_ASM(clear, int, "andl %1,%0", "ir", ~v); ATOMIC_ASM(add, int, "addl %1,%0", "ir", v); ATOMIC_ASM(subtract, int, "subl %1,%0", "ir", v); ATOMIC_ASM(set, long, "orq %1,%0", "er", v); ATOMIC_ASM(clear, long, "andq %1,%0", "er", ~v); ATOMIC_ASM(add, long, "addq %1,%0", "er", v); ATOMIC_ASM(subtract, long, "subq %1,%0", "er", v); #define ATOMIC_LOADSTORE(TYPE) \ ATOMIC_LOAD(TYPE); \ ATOMIC_STORE(TYPE) ATOMIC_LOADSTORE(char); ATOMIC_LOADSTORE(short); ATOMIC_LOADSTORE(int); ATOMIC_LOADSTORE(long); #undef ATOMIC_ASM #undef ATOMIC_LOAD #undef ATOMIC_STORE #undef ATOMIC_LOADSTORE #ifndef WANT_FUNCTIONS /* Read the current value and store a new value in the destination. */ -#ifdef __GNUCLIKE_ASM - static __inline u_int atomic_swap_int(volatile u_int *p, u_int v) { __asm __volatile( " xchgl %1,%0 ; " "# atomic_swap_int" : "+r" (v), /* 0 */ "+m" (*p)); /* 1 */ return (v); } static __inline u_long atomic_swap_long(volatile u_long *p, u_long v) { __asm __volatile( " xchgq %1,%0 ; " "# atomic_swap_long" : "+r" (v), /* 0 */ "+m" (*p)); /* 1 */ return (v); } -#else /* !__GNUCLIKE_ASM */ - -u_int atomic_swap_int(volatile u_int *p, u_int v); -u_long atomic_swap_long(volatile u_long *p, u_long v); - -#endif /* __GNUCLIKE_ASM */ - #define atomic_set_acq_char atomic_set_barr_char #define atomic_set_rel_char atomic_set_barr_char #define atomic_clear_acq_char atomic_clear_barr_char #define atomic_clear_rel_char atomic_clear_barr_char #define atomic_add_acq_char atomic_add_barr_char #define atomic_add_rel_char atomic_add_barr_char #define atomic_subtract_acq_char atomic_subtract_barr_char #define atomic_subtract_rel_char atomic_subtract_barr_char #define atomic_cmpset_acq_char atomic_cmpset_char #define atomic_cmpset_rel_char atomic_cmpset_char #define atomic_fcmpset_acq_char atomic_fcmpset_char #define atomic_fcmpset_rel_char atomic_fcmpset_char #define atomic_set_acq_short atomic_set_barr_short #define atomic_set_rel_short atomic_set_barr_short #define atomic_clear_acq_short atomic_clear_barr_short #define atomic_clear_rel_short atomic_clear_barr_short #define atomic_add_acq_short atomic_add_barr_short #define atomic_add_rel_short atomic_add_barr_short #define atomic_subtract_acq_short atomic_subtract_barr_short #define atomic_subtract_rel_short atomic_subtract_barr_short #define atomic_cmpset_acq_short atomic_cmpset_short #define atomic_cmpset_rel_short atomic_cmpset_short #define atomic_fcmpset_acq_short atomic_fcmpset_short #define atomic_fcmpset_rel_short atomic_fcmpset_short #define atomic_set_acq_int atomic_set_barr_int #define atomic_set_rel_int atomic_set_barr_int #define atomic_clear_acq_int atomic_clear_barr_int #define atomic_clear_rel_int atomic_clear_barr_int #define atomic_add_acq_int atomic_add_barr_int #define atomic_add_rel_int atomic_add_barr_int #define atomic_subtract_acq_int atomic_subtract_barr_int #define atomic_subtract_rel_int atomic_subtract_barr_int #define atomic_cmpset_acq_int atomic_cmpset_int #define atomic_cmpset_rel_int atomic_cmpset_int #define atomic_fcmpset_acq_int atomic_fcmpset_int #define atomic_fcmpset_rel_int atomic_fcmpset_int #define atomic_set_acq_long atomic_set_barr_long #define atomic_set_rel_long atomic_set_barr_long #define atomic_clear_acq_long atomic_clear_barr_long #define atomic_clear_rel_long atomic_clear_barr_long #define atomic_add_acq_long atomic_add_barr_long #define atomic_add_rel_long atomic_add_barr_long #define atomic_subtract_acq_long atomic_subtract_barr_long #define atomic_subtract_rel_long atomic_subtract_barr_long #define atomic_cmpset_acq_long atomic_cmpset_long #define atomic_cmpset_rel_long atomic_cmpset_long #define atomic_fcmpset_acq_long atomic_fcmpset_long #define atomic_fcmpset_rel_long atomic_fcmpset_long #define atomic_readandclear_int(p) atomic_swap_int(p, 0) #define atomic_readandclear_long(p) atomic_swap_long(p, 0) #define atomic_testandset_acq_long atomic_testandset_long /* Operations on 8-bit bytes. */ #define atomic_set_8 atomic_set_char #define atomic_set_acq_8 atomic_set_acq_char #define atomic_set_rel_8 atomic_set_rel_char #define atomic_clear_8 atomic_clear_char #define atomic_clear_acq_8 atomic_clear_acq_char #define atomic_clear_rel_8 atomic_clear_rel_char #define atomic_add_8 atomic_add_char #define atomic_add_acq_8 atomic_add_acq_char #define atomic_add_rel_8 atomic_add_rel_char #define atomic_subtract_8 atomic_subtract_char #define atomic_subtract_acq_8 atomic_subtract_acq_char #define atomic_subtract_rel_8 atomic_subtract_rel_char #define atomic_load_acq_8 atomic_load_acq_char #define atomic_store_rel_8 atomic_store_rel_char #define atomic_cmpset_8 atomic_cmpset_char #define atomic_cmpset_acq_8 atomic_cmpset_acq_char #define atomic_cmpset_rel_8 atomic_cmpset_rel_char #define atomic_fcmpset_8 atomic_fcmpset_char #define atomic_fcmpset_acq_8 atomic_fcmpset_acq_char #define atomic_fcmpset_rel_8 atomic_fcmpset_rel_char /* Operations on 16-bit words. */ #define atomic_set_16 atomic_set_short #define atomic_set_acq_16 atomic_set_acq_short #define atomic_set_rel_16 atomic_set_rel_short #define atomic_clear_16 atomic_clear_short #define atomic_clear_acq_16 atomic_clear_acq_short #define atomic_clear_rel_16 atomic_clear_rel_short #define atomic_add_16 atomic_add_short #define atomic_add_acq_16 atomic_add_acq_short #define atomic_add_rel_16 atomic_add_rel_short #define atomic_subtract_16 atomic_subtract_short #define atomic_subtract_acq_16 atomic_subtract_acq_short #define atomic_subtract_rel_16 atomic_subtract_rel_short #define atomic_load_acq_16 atomic_load_acq_short #define atomic_store_rel_16 atomic_store_rel_short #define atomic_cmpset_16 atomic_cmpset_short #define atomic_cmpset_acq_16 atomic_cmpset_acq_short #define atomic_cmpset_rel_16 atomic_cmpset_rel_short #define atomic_fcmpset_16 atomic_fcmpset_short #define atomic_fcmpset_acq_16 atomic_fcmpset_acq_short #define atomic_fcmpset_rel_16 atomic_fcmpset_rel_short /* Operations on 32-bit double words. */ #define atomic_set_32 atomic_set_int #define atomic_set_acq_32 atomic_set_acq_int #define atomic_set_rel_32 atomic_set_rel_int #define atomic_clear_32 atomic_clear_int #define atomic_clear_acq_32 atomic_clear_acq_int #define atomic_clear_rel_32 atomic_clear_rel_int #define atomic_add_32 atomic_add_int #define atomic_add_acq_32 atomic_add_acq_int #define atomic_add_rel_32 atomic_add_rel_int #define atomic_subtract_32 atomic_subtract_int #define atomic_subtract_acq_32 atomic_subtract_acq_int #define atomic_subtract_rel_32 atomic_subtract_rel_int #define atomic_load_acq_32 atomic_load_acq_int #define atomic_store_rel_32 atomic_store_rel_int #define atomic_cmpset_32 atomic_cmpset_int #define atomic_cmpset_acq_32 atomic_cmpset_acq_int #define atomic_cmpset_rel_32 atomic_cmpset_rel_int #define atomic_fcmpset_32 atomic_fcmpset_int #define atomic_fcmpset_acq_32 atomic_fcmpset_acq_int #define atomic_fcmpset_rel_32 atomic_fcmpset_rel_int #define atomic_swap_32 atomic_swap_int #define atomic_readandclear_32 atomic_readandclear_int #define atomic_fetchadd_32 atomic_fetchadd_int #define atomic_testandset_32 atomic_testandset_int #define atomic_testandclear_32 atomic_testandclear_int /* Operations on 64-bit quad words. */ #define atomic_set_64 atomic_set_long #define atomic_set_acq_64 atomic_set_acq_long #define atomic_set_rel_64 atomic_set_rel_long #define atomic_clear_64 atomic_clear_long #define atomic_clear_acq_64 atomic_clear_acq_long #define atomic_clear_rel_64 atomic_clear_rel_long #define atomic_add_64 atomic_add_long #define atomic_add_acq_64 atomic_add_acq_long #define atomic_add_rel_64 atomic_add_rel_long #define atomic_subtract_64 atomic_subtract_long #define atomic_subtract_acq_64 atomic_subtract_acq_long #define atomic_subtract_rel_64 atomic_subtract_rel_long #define atomic_load_acq_64 atomic_load_acq_long #define atomic_store_rel_64 atomic_store_rel_long #define atomic_cmpset_64 atomic_cmpset_long #define atomic_cmpset_acq_64 atomic_cmpset_acq_long #define atomic_cmpset_rel_64 atomic_cmpset_rel_long #define atomic_fcmpset_64 atomic_fcmpset_long #define atomic_fcmpset_acq_64 atomic_fcmpset_acq_long #define atomic_fcmpset_rel_64 atomic_fcmpset_rel_long #define atomic_swap_64 atomic_swap_long #define atomic_readandclear_64 atomic_readandclear_long #define atomic_fetchadd_64 atomic_fetchadd_long #define atomic_testandset_64 atomic_testandset_long #define atomic_testandclear_64 atomic_testandclear_long /* Operations on pointers. */ #define atomic_set_ptr atomic_set_long #define atomic_set_acq_ptr atomic_set_acq_long #define atomic_set_rel_ptr atomic_set_rel_long #define atomic_clear_ptr atomic_clear_long #define atomic_clear_acq_ptr atomic_clear_acq_long #define atomic_clear_rel_ptr atomic_clear_rel_long #define atomic_add_ptr atomic_add_long #define atomic_add_acq_ptr atomic_add_acq_long #define atomic_add_rel_ptr atomic_add_rel_long #define atomic_subtract_ptr atomic_subtract_long #define atomic_subtract_acq_ptr atomic_subtract_acq_long #define atomic_subtract_rel_ptr atomic_subtract_rel_long #define atomic_load_acq_ptr atomic_load_acq_long #define atomic_store_rel_ptr atomic_store_rel_long #define atomic_cmpset_ptr atomic_cmpset_long #define atomic_cmpset_acq_ptr atomic_cmpset_acq_long #define atomic_cmpset_rel_ptr atomic_cmpset_rel_long #define atomic_fcmpset_ptr atomic_fcmpset_long #define atomic_fcmpset_acq_ptr atomic_fcmpset_acq_long #define atomic_fcmpset_rel_ptr atomic_fcmpset_rel_long #define atomic_swap_ptr atomic_swap_long #define atomic_readandclear_ptr atomic_readandclear_long #endif /* !WANT_FUNCTIONS */ #endif /* !SAN_NEEDS_INTERCEPTORS || SAN_RUNTIME */ #endif /* !_MACHINE_ATOMIC_H_ */ diff --git a/sys/amd64/include/cpufunc.h b/sys/amd64/include/cpufunc.h index bca74d8ead67..f639c9c7119e 100644 --- a/sys/amd64/include/cpufunc.h +++ b/sys/amd64/include/cpufunc.h @@ -1,1041 +1,1041 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 2003 Peter Wemm. * Copyright (c) 1993 The Regents of the University of California. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ /* * Functions to provide access to special i386 instructions. * This in included in sys/systm.h, and that file should be * used in preference to this. */ #ifndef _MACHINE_CPUFUNC_H_ #define _MACHINE_CPUFUNC_H_ #ifndef _SYS_CDEFS_H_ #error this file needs sys/cdefs.h as a prerequisite #endif struct region_descriptor; #define readb(va) (*(volatile uint8_t *) (va)) #define readw(va) (*(volatile uint16_t *) (va)) #define readl(va) (*(volatile uint32_t *) (va)) #define readq(va) (*(volatile uint64_t *) (va)) #define writeb(va, d) (*(volatile uint8_t *) (va) = (d)) #define writew(va, d) (*(volatile uint16_t *) (va) = (d)) #define writel(va, d) (*(volatile uint32_t *) (va) = (d)) #define writeq(va, d) (*(volatile uint64_t *) (va) = (d)) -#if defined(__GNUCLIKE_ASM) && defined(__CC_SUPPORTS___INLINE) +#if defined(__CC_SUPPORTS___INLINE) static __inline void breakpoint(void) { __asm __volatile("int $3"); } #define bsfl(mask) __builtin_ctz(mask) #define bsfq(mask) __builtin_ctzl(mask) #define bsrl(mask) (__builtin_clz(mask) ^ 0x1f) #define bsrq(mask) (__builtin_clzl(mask) ^ 0x3f) static __inline void clflush(u_long addr) { __asm __volatile("clflush %0" : : "m" (*(char *)addr)); } static __inline void clflushopt(u_long addr) { __asm __volatile(".byte 0x66;clflush %0" : : "m" (*(char *)addr)); } static __inline void clwb(u_long addr) { __asm __volatile("clwb %0" : : "m" (*(char *)addr)); } static __inline void clts(void) { __asm __volatile("clts"); } static __inline void disable_intr(void) { __asm __volatile("cli" : : : "memory"); } static __inline void do_cpuid(u_int ax, u_int *p) { __asm __volatile("cpuid" : "=a" (p[0]), "=b" (p[1]), "=c" (p[2]), "=d" (p[3]) : "0" (ax)); } static __inline void cpuid_count(u_int ax, u_int cx, u_int *p) { __asm __volatile("cpuid" : "=a" (p[0]), "=b" (p[1]), "=c" (p[2]), "=d" (p[3]) : "0" (ax), "c" (cx)); } static __inline void enable_intr(void) { __asm __volatile("sti"); } #ifdef _KERNEL #define HAVE_INLINE_FFS #define ffs(x) __builtin_ffs(x) #define HAVE_INLINE_FFSL #define ffsl(x) __builtin_ffsl(x) #define HAVE_INLINE_FFSLL #define ffsll(x) __builtin_ffsll(x) #define HAVE_INLINE_FLS static __inline __pure2 int fls(int mask) { return (mask == 0 ? mask : (int)bsrl((u_int)mask) + 1); } #define HAVE_INLINE_FLSL static __inline __pure2 int flsl(long mask) { return (mask == 0 ? mask : (int)bsrq((u_long)mask) + 1); } #define HAVE_INLINE_FLSLL static __inline __pure2 int flsll(long long mask) { return (flsl((long)mask)); } #endif /* _KERNEL */ static __inline void halt(void) { __asm __volatile("hlt"); } static __inline u_char inb(u_int port) { u_char data; __asm __volatile("inb %w1, %0" : "=a" (data) : "Nd" (port)); return (data); } static __inline u_int inl(u_int port) { u_int data; __asm __volatile("inl %w1, %0" : "=a" (data) : "Nd" (port)); return (data); } static __inline void insb(u_int port, void *addr, size_t count) { __asm __volatile("rep; insb" : "+D" (addr), "+c" (count) : "d" (port) : "memory"); } static __inline void insw(u_int port, void *addr, size_t count) { __asm __volatile("rep; insw" : "+D" (addr), "+c" (count) : "d" (port) : "memory"); } static __inline void insl(u_int port, void *addr, size_t count) { __asm __volatile("rep; insl" : "+D" (addr), "+c" (count) : "d" (port) : "memory"); } static __inline void invd(void) { __asm __volatile("invd"); } static __inline u_short inw(u_int port) { u_short data; __asm __volatile("inw %w1, %0" : "=a" (data) : "Nd" (port)); return (data); } static __inline void outb(u_int port, u_char data) { __asm __volatile("outb %0, %w1" : : "a" (data), "Nd" (port)); } static __inline void outl(u_int port, u_int data) { __asm __volatile("outl %0, %w1" : : "a" (data), "Nd" (port)); } static __inline void outsb(u_int port, const void *addr, size_t count) { __asm __volatile("rep; outsb" : "+S" (addr), "+c" (count) : "d" (port)); } static __inline void outsw(u_int port, const void *addr, size_t count) { __asm __volatile("rep; outsw" : "+S" (addr), "+c" (count) : "d" (port)); } static __inline void outsl(u_int port, const void *addr, size_t count) { __asm __volatile("rep; outsl" : "+S" (addr), "+c" (count) : "d" (port)); } static __inline void outw(u_int port, u_short data) { __asm __volatile("outw %0, %w1" : : "a" (data), "Nd" (port)); } static __inline u_long popcntq(u_long mask) { u_long result; __asm __volatile("popcntq %1,%0" : "=r" (result) : "rm" (mask)); return (result); } static __inline void lfence(void) { __asm __volatile("lfence" : : : "memory"); } static __inline void mfence(void) { __asm __volatile("mfence" : : : "memory"); } static __inline void sfence(void) { __asm __volatile("sfence" : : : "memory"); } static __inline void ia32_pause(void) { __asm __volatile("pause"); } static __inline u_long read_rflags(void) { u_long rf; __asm __volatile("pushfq; popq %0" : "=r" (rf)); return (rf); } static __inline uint64_t rdmsr(u_int msr) { uint32_t low, high; __asm __volatile("rdmsr" : "=a" (low), "=d" (high) : "c" (msr)); return (low | ((uint64_t)high << 32)); } static __inline uint32_t rdmsr32(u_int msr) { uint32_t low; __asm __volatile("rdmsr" : "=a" (low) : "c" (msr) : "rdx"); return (low); } static __inline uint64_t rdpmc(u_int pmc) { uint32_t low, high; __asm __volatile("rdpmc" : "=a" (low), "=d" (high) : "c" (pmc)); return (low | ((uint64_t)high << 32)); } static __inline uint64_t rdtsc(void) { uint32_t low, high; __asm __volatile("rdtsc" : "=a" (low), "=d" (high)); return (low | ((uint64_t)high << 32)); } static __inline uint64_t rdtsc_ordered_lfence(void) { lfence(); return (rdtsc()); } static __inline uint64_t rdtsc_ordered_mfence(void) { mfence(); return (rdtsc()); } static __inline uint64_t rdtscp(void) { uint32_t low, high; __asm __volatile("rdtscp" : "=a" (low), "=d" (high) : : "ecx"); return (low | ((uint64_t)high << 32)); } static __inline uint64_t rdtscp_aux(uint32_t *aux) { uint32_t low, high; __asm __volatile("rdtscp" : "=a" (low), "=d" (high), "=c" (*aux)); return (low | ((uint64_t)high << 32)); } static __inline uint32_t rdtsc32(void) { uint32_t rv; __asm __volatile("rdtsc" : "=a" (rv) : : "edx"); return (rv); } static __inline uint32_t rdtscp32(void) { uint32_t rv; __asm __volatile("rdtscp" : "=a" (rv) : : "ecx", "edx"); return (rv); } static __inline void wbinvd(void) { __asm __volatile("wbinvd"); } static __inline void write_rflags(u_long rf) { __asm __volatile("pushq %0; popfq" : : "r" (rf)); } static __inline void wrmsr(u_int msr, uint64_t newval) { uint32_t low, high; low = newval; high = newval >> 32; __asm __volatile("wrmsr" : : "a" (low), "d" (high), "c" (msr)); } static __inline void load_cr0(u_long data) { __asm __volatile("movq %0,%%cr0" : : "r" (data)); } static __inline u_long rcr0(void) { u_long data; __asm __volatile("movq %%cr0,%0" : "=r" (data)); return (data); } static __inline u_long rcr2(void) { u_long data; __asm __volatile("movq %%cr2,%0" : "=r" (data)); return (data); } static __inline void load_cr3(u_long data) { __asm __volatile("movq %0,%%cr3" : : "r" (data) : "memory"); } static __inline u_long rcr3(void) { u_long data; __asm __volatile("movq %%cr3,%0" : "=r" (data)); return (data); } static __inline void load_cr4(u_long data) { __asm __volatile("movq %0,%%cr4" : : "r" (data)); } static __inline u_long rcr4(void) { u_long data; __asm __volatile("movq %%cr4,%0" : "=r" (data)); return (data); } static __inline u_long rxcr(u_int reg) { u_int low, high; __asm __volatile("xgetbv" : "=a" (low), "=d" (high) : "c" (reg)); return (low | ((uint64_t)high << 32)); } static __inline void load_xcr(u_int reg, u_long val) { u_int low, high; low = val; high = val >> 32; __asm __volatile("xsetbv" : : "c" (reg), "a" (low), "d" (high)); } /* * Global TLB flush (except for thise for pages marked PG_G) */ static __inline void invltlb(void) { load_cr3(rcr3()); } #ifndef CR4_PGE #define CR4_PGE 0x00000080 /* Page global enable */ #endif /* * Perform the guaranteed invalidation of all TLB entries. This * includes the global entries, and entries in all PCIDs, not only the * current context. The function works both on non-PCID CPUs and CPUs * with the PCID turned off or on. See IA-32 SDM Vol. 3a 4.10.4.1 * Operations that Invalidate TLBs and Paging-Structure Caches. */ static __inline void invltlb_glob(void) { uint64_t cr4; cr4 = rcr4(); load_cr4(cr4 & ~CR4_PGE); /* * Although preemption at this point could be detrimental to * performance, it would not lead to an error. PG_G is simply * ignored if CR4.PGE is clear. Moreover, in case this block * is re-entered, the load_cr4() either above or below will * modify CR4.PGE flushing the TLB. */ load_cr4(cr4 | CR4_PGE); } /* * TLB flush for an individual page (even if it has PG_G). * Only works on 486+ CPUs (i386 does not have PG_G). */ static __inline void invlpg(u_long addr) { __asm __volatile("invlpg %0" : : "m" (*(char *)addr) : "memory"); } #define INVPCID_ADDR 0 #define INVPCID_CTX 1 #define INVPCID_CTXGLOB 2 #define INVPCID_ALLCTX 3 struct invpcid_descr { uint64_t pcid:12 __packed; uint64_t pad:52 __packed; uint64_t addr; } __packed; static __inline void invpcid(struct invpcid_descr *d, int type) { __asm __volatile("invpcid (%0),%1" : : "r" (d), "r" ((u_long)type) : "memory"); } static __inline u_short rfs(void) { u_short sel; __asm __volatile("movw %%fs,%0" : "=rm" (sel)); return (sel); } static __inline u_short rgs(void) { u_short sel; __asm __volatile("movw %%gs,%0" : "=rm" (sel)); return (sel); } static __inline u_short rss(void) { u_short sel; __asm __volatile("movw %%ss,%0" : "=rm" (sel)); return (sel); } static __inline void load_ds(u_short sel) { __asm __volatile("movw %0,%%ds" : : "rm" (sel)); } static __inline void load_es(u_short sel) { __asm __volatile("movw %0,%%es" : : "rm" (sel)); } static __inline void cpu_monitor(const void *addr, u_long extensions, u_int hints) { __asm __volatile("monitor" : : "a" (addr), "c" (extensions), "d" (hints)); } static __inline void cpu_mwait(u_long extensions, u_int hints) { __asm __volatile("mwait" : : "a" (hints), "c" (extensions)); } static __inline uint32_t rdpkru(void) { uint32_t res; __asm __volatile("rdpkru" : "=a" (res) : "c" (0) : "edx"); return (res); } static __inline void wrpkru(uint32_t mask) { __asm __volatile("wrpkru" : : "a" (mask), "c" (0), "d" (0)); } #ifdef _KERNEL /* This is defined in but is too painful to get to */ #ifndef MSR_FSBASE #define MSR_FSBASE 0xc0000100 #endif static __inline void load_fs(u_short sel) { /* Preserve the fsbase value across the selector load */ __asm __volatile("rdmsr; movw %0,%%fs; wrmsr" : : "rm" (sel), "c" (MSR_FSBASE) : "eax", "edx"); } #ifndef MSR_GSBASE #define MSR_GSBASE 0xc0000101 #endif static __inline void load_gs(u_short sel) { /* * Preserve the gsbase value across the selector load. * Note that we have to disable interrupts because the gsbase * being trashed happens to be the kernel gsbase at the time. */ __asm __volatile("pushfq; cli; rdmsr; movw %0,%%gs; wrmsr; popfq" : : "rm" (sel), "c" (MSR_GSBASE) : "eax", "edx"); } #else /* Usable by userland */ static __inline void load_fs(u_short sel) { __asm __volatile("movw %0,%%fs" : : "rm" (sel)); } static __inline void load_gs(u_short sel) { __asm __volatile("movw %0,%%gs" : : "rm" (sel)); } #endif static __inline uint64_t rdfsbase(void) { uint64_t x; __asm __volatile("rdfsbase %0" : "=r" (x)); return (x); } static __inline void wrfsbase(uint64_t x) { __asm __volatile("wrfsbase %0" : : "r" (x)); } static __inline uint64_t rdgsbase(void) { uint64_t x; __asm __volatile("rdgsbase %0" : "=r" (x)); return (x); } static __inline void wrgsbase(uint64_t x) { __asm __volatile("wrgsbase %0" : : "r" (x)); } static __inline void bare_lgdt(struct region_descriptor *addr) { __asm __volatile("lgdt (%0)" : : "r" (addr)); } static __inline void sgdt(struct region_descriptor *addr) { char *loc; loc = (char *)addr; __asm __volatile("sgdt %0" : "=m" (*loc) : : "memory"); } static __inline void lidt(struct region_descriptor *addr) { __asm __volatile("lidt (%0)" : : "r" (addr)); } static __inline void sidt(struct region_descriptor *addr) { char *loc; loc = (char *)addr; __asm __volatile("sidt %0" : "=m" (*loc) : : "memory"); } static __inline void lldt(u_short sel) { __asm __volatile("lldt %0" : : "r" (sel)); } static __inline u_short sldt(void) { u_short sel; __asm __volatile("sldt %0" : "=r" (sel)); return (sel); } static __inline void ltr(u_short sel) { __asm __volatile("ltr %0" : : "r" (sel)); } static __inline uint32_t read_tr(void) { u_short sel; __asm __volatile("str %0" : "=r" (sel)); return (sel); } static __inline uint64_t rdr0(void) { uint64_t data; __asm __volatile("movq %%dr0,%0" : "=r" (data)); return (data); } static __inline void load_dr0(uint64_t dr0) { __asm __volatile("movq %0,%%dr0" : : "r" (dr0)); } static __inline uint64_t rdr1(void) { uint64_t data; __asm __volatile("movq %%dr1,%0" : "=r" (data)); return (data); } static __inline void load_dr1(uint64_t dr1) { __asm __volatile("movq %0,%%dr1" : : "r" (dr1)); } static __inline uint64_t rdr2(void) { uint64_t data; __asm __volatile("movq %%dr2,%0" : "=r" (data)); return (data); } static __inline void load_dr2(uint64_t dr2) { __asm __volatile("movq %0,%%dr2" : : "r" (dr2)); } static __inline uint64_t rdr3(void) { uint64_t data; __asm __volatile("movq %%dr3,%0" : "=r" (data)); return (data); } static __inline void load_dr3(uint64_t dr3) { __asm __volatile("movq %0,%%dr3" : : "r" (dr3)); } static __inline uint64_t rdr6(void) { uint64_t data; __asm __volatile("movq %%dr6,%0" : "=r" (data)); return (data); } static __inline void load_dr6(uint64_t dr6) { __asm __volatile("movq %0,%%dr6" : : "r" (dr6)); } static __inline uint64_t rdr7(void) { uint64_t data; __asm __volatile("movq %%dr7,%0" : "=r" (data)); return (data); } static __inline void load_dr7(uint64_t dr7) { __asm __volatile("movq %0,%%dr7" : : "r" (dr7)); } static __inline register_t intr_disable(void) { register_t rflags; rflags = read_rflags(); disable_intr(); return (rflags); } static __inline void intr_restore(register_t rflags) { write_rflags(rflags); } static __inline void stac(void) { __asm __volatile("stac" : : : "cc"); } static __inline void clac(void) { __asm __volatile("clac" : : : "cc"); } enum { SGX_ECREATE = 0x0, SGX_EADD = 0x1, SGX_EINIT = 0x2, SGX_EREMOVE = 0x3, SGX_EDGBRD = 0x4, SGX_EDGBWR = 0x5, SGX_EEXTEND = 0x6, SGX_ELDU = 0x8, SGX_EBLOCK = 0x9, SGX_EPA = 0xA, SGX_EWB = 0xB, SGX_ETRACK = 0xC, }; enum { SGX_PT_SECS = 0x00, SGX_PT_TCS = 0x01, SGX_PT_REG = 0x02, SGX_PT_VA = 0x03, SGX_PT_TRIM = 0x04, }; int sgx_encls(uint32_t eax, uint64_t rbx, uint64_t rcx, uint64_t rdx); static __inline int sgx_ecreate(void *pginfo, void *secs) { return (sgx_encls(SGX_ECREATE, (uint64_t)pginfo, (uint64_t)secs, 0)); } static __inline int sgx_eadd(void *pginfo, void *epc) { return (sgx_encls(SGX_EADD, (uint64_t)pginfo, (uint64_t)epc, 0)); } static __inline int sgx_einit(void *sigstruct, void *secs, void *einittoken) { return (sgx_encls(SGX_EINIT, (uint64_t)sigstruct, (uint64_t)secs, (uint64_t)einittoken)); } static __inline int sgx_eextend(void *secs, void *epc) { return (sgx_encls(SGX_EEXTEND, (uint64_t)secs, (uint64_t)epc, 0)); } static __inline int sgx_epa(void *epc) { return (sgx_encls(SGX_EPA, SGX_PT_VA, (uint64_t)epc, 0)); } static __inline int sgx_eldu(uint64_t rbx, uint64_t rcx, uint64_t rdx) { return (sgx_encls(SGX_ELDU, rbx, rcx, rdx)); } static __inline int sgx_eremove(void *epc) { return (sgx_encls(SGX_EREMOVE, 0, (uint64_t)epc, 0)); } -#else /* !(__GNUCLIKE_ASM && __CC_SUPPORTS___INLINE) */ +#else /* !__CC_SUPPORTS___INLINE */ int breakpoint(void); u_int bsfl(u_int mask); u_int bsrl(u_int mask); void clflush(u_long addr); void clts(void); void cpuid_count(u_int ax, u_int cx, u_int *p); void disable_intr(void); void do_cpuid(u_int ax, u_int *p); void enable_intr(void); void halt(void); void ia32_pause(void); u_char inb(u_int port); u_int inl(u_int port); void insb(u_int port, void *addr, size_t count); void insl(u_int port, void *addr, size_t count); void insw(u_int port, void *addr, size_t count); register_t intr_disable(void); void intr_restore(register_t rf); void invd(void); void invlpg(u_int addr); void invltlb(void); u_short inw(u_int port); void lidt(struct region_descriptor *addr); void lldt(u_short sel); void load_cr0(u_long cr0); void load_cr3(u_long cr3); void load_cr4(u_long cr4); void load_dr0(uint64_t dr0); void load_dr1(uint64_t dr1); void load_dr2(uint64_t dr2); void load_dr3(uint64_t dr3); void load_dr6(uint64_t dr6); void load_dr7(uint64_t dr7); void load_fs(u_short sel); void load_gs(u_short sel); void ltr(u_short sel); void outb(u_int port, u_char data); void outl(u_int port, u_int data); void outsb(u_int port, const void *addr, size_t count); void outsl(u_int port, const void *addr, size_t count); void outsw(u_int port, const void *addr, size_t count); void outw(u_int port, u_short data); u_long rcr0(void); u_long rcr2(void); u_long rcr3(void); u_long rcr4(void); uint64_t rdmsr(u_int msr); uint32_t rdmsr32(u_int msr); uint64_t rdpmc(u_int pmc); uint64_t rdr0(void); uint64_t rdr1(void); uint64_t rdr2(void); uint64_t rdr3(void); uint64_t rdr6(void); uint64_t rdr7(void); uint64_t rdtsc(void); u_long read_rflags(void); u_int rfs(void); u_int rgs(void); void wbinvd(void); void write_rflags(u_int rf); void wrmsr(u_int msr, uint64_t newval); -#endif /* __GNUCLIKE_ASM && __CC_SUPPORTS___INLINE */ +#endif /* __CC_SUPPORTS___INLINE */ void reset_dbregs(void); #ifdef _KERNEL int rdmsr_safe(u_int msr, uint64_t *val); int wrmsr_safe(u_int msr, uint64_t newval); #endif #endif /* !_MACHINE_CPUFUNC_H_ */ diff --git a/sys/amd64/include/ieeefp.h b/sys/amd64/include/ieeefp.h index 96ee7e9040d8..48d879f0b80b 100644 --- a/sys/amd64/include/ieeefp.h +++ b/sys/amd64/include/ieeefp.h @@ -1,215 +1,211 @@ /*- * SPDX-License-Identifier: BSD-4-Clause * * Copyright (c) 2003 Peter Wemm. * Copyright (c) 1990 Andrew Moore, Talke Studio * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: @(#) ieeefp.h 1.0 (Berkeley) 9/23/93 * $FreeBSD$ */ #ifndef _MACHINE_IEEEFP_H_ #define _MACHINE_IEEEFP_H_ /* Deprecated historical FPU control interface */ #include /* * IEEE floating point type, constant and function definitions. * XXX: {FP,SSE}*FLD and {FP,SSE}*OFF are undocumented pollution. */ /* * SSE mxcsr register bit-field masks. */ #define SSE_STKY_FLD 0x3f /* exception flags */ #define SSE_DAZ_FLD 0x40 /* Denormals are zero */ #define SSE_MSKS_FLD 0x1f80 /* exception masks field */ #define SSE_RND_FLD 0x6000 /* rounding control */ #define SSE_FZ_FLD 0x8000 /* flush to zero on underflow */ /* * SSE mxcsr register bit-field offsets (shift counts). */ #define SSE_STKY_OFF 0 /* exception flags offset */ #define SSE_DAZ_OFF 6 /* DAZ exception mask offset */ #define SSE_MSKS_OFF 7 /* other exception masks offset */ #define SSE_RND_OFF 13 /* rounding control offset */ #define SSE_FZ_OFF 15 /* flush to zero offset */ -#ifdef __GNUCLIKE_ASM - /* * General notes about conflicting SSE vs FP status bits. * This code assumes that software will not fiddle with the control * bits of the SSE and x87 in such a way to get them out of sync and * still expect this to work. Break this at your peril. * Because I based this on the i386 port, the x87 state is used for * the fpget*() functions, and is shadowed into the SSE state for * the fpset*() functions. For dual source fpget*() functions, I * merge the two together. I think. */ static __inline fp_rnd_t __fpgetround(void) { unsigned short _cw; __fnstcw(&_cw); return ((fp_rnd_t)((_cw & FP_RND_FLD) >> FP_RND_OFF)); } static __inline fp_rnd_t __fpsetround(fp_rnd_t _m) { fp_rnd_t _p; unsigned _mxcsr; unsigned short _cw, _newcw; __fnstcw(&_cw); _p = (fp_rnd_t)((_cw & FP_RND_FLD) >> FP_RND_OFF); _newcw = _cw & ~FP_RND_FLD; _newcw |= (_m << FP_RND_OFF) & FP_RND_FLD; __fnldcw(_cw, _newcw); __stmxcsr(&_mxcsr); _mxcsr &= ~SSE_RND_FLD; _mxcsr |= (_m << SSE_RND_OFF) & SSE_RND_FLD; __ldmxcsr(&_mxcsr); return (_p); } /* * Get or set the rounding precision for x87 arithmetic operations. * There is no equivalent SSE mode or control. */ static __inline fp_prec_t __fpgetprec(void) { unsigned short _cw; __fnstcw(&_cw); return ((fp_prec_t)((_cw & FP_PRC_FLD) >> FP_PRC_OFF)); } static __inline fp_prec_t __fpsetprec(fp_prec_t _m) { fp_prec_t _p; unsigned short _cw, _newcw; __fnstcw(&_cw); _p = (fp_prec_t)((_cw & FP_PRC_FLD) >> FP_PRC_OFF); _newcw = _cw & ~FP_PRC_FLD; _newcw |= (_m << FP_PRC_OFF) & FP_PRC_FLD; __fnldcw(_cw, _newcw); return (_p); } /* * Get or set the exception mask. * Note that the x87 mask bits are inverted by the API -- a mask bit of 1 * means disable for x87 and SSE, but for fp*mask() it means enable. */ static __inline fp_except_t __fpgetmask(void) { unsigned short _cw; __fnstcw(&_cw); return ((~_cw & FP_MSKS_FLD) >> FP_MSKS_OFF); } static __inline fp_except_t __fpsetmask(fp_except_t _m) { fp_except_t _p; unsigned _mxcsr; unsigned short _cw, _newcw; __fnstcw(&_cw); _p = (~_cw & FP_MSKS_FLD) >> FP_MSKS_OFF; _newcw = _cw & ~FP_MSKS_FLD; _newcw |= (~_m << FP_MSKS_OFF) & FP_MSKS_FLD; __fnldcw(_cw, _newcw); __stmxcsr(&_mxcsr); /* XXX should we clear non-ieee SSE_DAZ_FLD and SSE_FZ_FLD ? */ _mxcsr &= ~SSE_MSKS_FLD; _mxcsr |= (~_m << SSE_MSKS_OFF) & SSE_MSKS_FLD; __ldmxcsr(&_mxcsr); return (_p); } static __inline fp_except_t __fpgetsticky(void) { unsigned _ex, _mxcsr; unsigned short _sw; __fnstsw(&_sw); _ex = (_sw & FP_STKY_FLD) >> FP_STKY_OFF; __stmxcsr(&_mxcsr); _ex |= (_mxcsr & SSE_STKY_FLD) >> SSE_STKY_OFF; return ((fp_except_t)_ex); } -#endif /* __GNUCLIKE_ASM */ - -#if !defined(__IEEEFP_NOINLINES__) && defined(__GNUCLIKE_ASM) +#if !defined(__IEEEFP_NOINLINES__) #define fpgetmask() __fpgetmask() #define fpgetprec() __fpgetprec() #define fpgetround() __fpgetround() #define fpgetsticky() __fpgetsticky() #define fpsetmask(m) __fpsetmask(m) #define fpsetprec(m) __fpsetprec(m) #define fpsetround(m) __fpsetround(m) -#else /* !(!__IEEEFP_NOINLINES__ && __GNUCLIKE_ASM) */ +#else /* __IEEEFP_NOINLINES__ */ /* Augment the userland declarations. */ __BEGIN_DECLS extern fp_rnd_t fpgetround(void); extern fp_rnd_t fpsetround(fp_rnd_t); extern fp_except_t fpgetmask(void); extern fp_except_t fpsetmask(fp_except_t); extern fp_except_t fpgetsticky(void); extern fp_except_t fpsetsticky(fp_except_t); fp_prec_t fpgetprec(void); fp_prec_t fpsetprec(fp_prec_t); __END_DECLS -#endif /* !__IEEEFP_NOINLINES__ && __GNUCLIKE_ASM */ +#endif /* !__IEEEFP_NOINLINES__ */ #endif /* !_MACHINE_IEEEFP_H_ */ diff --git a/sys/amd64/include/pcpu.h b/sys/amd64/include/pcpu.h index dc99d4249bd2..ad2b6216ed47 100644 --- a/sys/amd64/include/pcpu.h +++ b/sys/amd64/include/pcpu.h @@ -1,288 +1,288 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) Peter Wemm * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _MACHINE_PCPU_H_ #define _MACHINE_PCPU_H_ #ifndef _SYS_CDEFS_H_ #error "sys/cdefs.h is a prerequisite for this file" #endif #include #include #define PC_PTI_STACK_SZ 16 struct monitorbuf { int idle_state; /* Used by cpu_idle_mwait. */ int stop_state; /* Used by cpustop_handler. */ char padding[128 - (2 * sizeof(int))]; }; _Static_assert(sizeof(struct monitorbuf) == 128, "2x cache line"); /* * The SMP parts are setup in pmap.c and locore.s for the BSP, and * mp_machdep.c sets up the data for the AP's to "see" when they awake. * The reason for doing it via a struct is so that an array of pointers * to each CPU's data can be set up for things like "check curproc on all * other processors" */ #define PCPU_MD_FIELDS \ struct monitorbuf pc_monitorbuf __aligned(128); /* cache line */\ struct pcpu *pc_prvspace; /* Self-reference */ \ struct pmap *pc_curpmap; \ struct amd64tss *pc_tssp; /* TSS segment active on CPU */ \ void *pc_pad0; \ uint64_t pc_kcr3; \ uint64_t pc_ucr3; \ uint64_t pc_saved_ucr3; \ register_t pc_rsp0; \ register_t pc_scratch_rsp; /* User %rsp in syscall */ \ register_t pc_scratch_rax; \ u_int pc_apic_id; \ u_int pc_acpi_id; /* ACPI CPU id */ \ /* Pointer to the CPU %fs descriptor */ \ struct user_segment_descriptor *pc_fs32p; \ /* Pointer to the CPU %gs descriptor */ \ struct user_segment_descriptor *pc_gs32p; \ /* Pointer to the CPU LDT descriptor */ \ struct system_segment_descriptor *pc_ldt; \ /* Pointer to the CPU TSS descriptor */ \ struct system_segment_descriptor *pc_tss; \ u_int pc_cmci_mask; /* MCx banks for CMCI */ \ uint64_t pc_dbreg[16]; /* ddb debugging regs */ \ uint64_t pc_pti_stack[PC_PTI_STACK_SZ]; \ register_t pc_pti_rsp0; \ int pc_dbreg_cmd; /* ddb debugging reg cmd */ \ u_int pc_vcpu_id; /* Xen vCPU ID */ \ uint32_t pc_pcid_next; \ uint32_t pc_pcid_gen; \ uint32_t pc_unused; \ uint32_t pc_ibpb_set; \ void *pc_mds_buf; \ void *pc_mds_buf64; \ uint32_t pc_pad[4]; \ uint8_t pc_mds_tmp[64]; \ u_int pc_ipi_bitmap; \ struct amd64tss pc_common_tss; \ struct user_segment_descriptor pc_gdt[NGDT]; \ void *pc_smp_tlb_pmap; \ uint64_t pc_smp_tlb_addr1; \ uint64_t pc_smp_tlb_addr2; \ uint32_t pc_smp_tlb_gen; \ u_int pc_smp_tlb_op; \ uint64_t pc_ucr3_load_mask; \ char __pad[2916] /* pad to UMA_PCPU_ALLOC_SIZE */ #define PC_DBREG_CMD_NONE 0 #define PC_DBREG_CMD_LOAD 1 #ifdef _KERNEL #define MONITOR_STOPSTATE_RUNNING 0 #define MONITOR_STOPSTATE_STOPPED 1 -#if defined(__GNUCLIKE_ASM) && defined(__GNUCLIKE___TYPEOF) +#if defined(__GNUCLIKE___TYPEOF) /* * Evaluates to the byte offset of the per-cpu variable name. */ #define __pcpu_offset(name) \ __offsetof(struct pcpu, name) /* * Evaluates to the type of the per-cpu variable name. */ #define __pcpu_type(name) \ __typeof(((struct pcpu *)0)->name) /* * Evaluates to the address of the per-cpu variable name. */ #define __PCPU_PTR(name) __extension__ ({ \ __pcpu_type(name) *__p; \ \ __asm __volatile("movq %%gs:%1,%0; addq %2,%0" \ : "=r" (__p) \ : "m" (*(struct pcpu *)(__pcpu_offset(pc_prvspace))), \ "i" (__pcpu_offset(name))); \ \ __p; \ }) /* * Evaluates to the value of the per-cpu variable name. */ #define __PCPU_GET(name) __extension__ ({ \ __pcpu_type(name) __res; \ struct __s { \ u_char __b[MIN(sizeof(__pcpu_type(name)), 8)]; \ } __s; \ \ if (sizeof(__res) == 1 || sizeof(__res) == 2 || \ sizeof(__res) == 4 || sizeof(__res) == 8) { \ __asm __volatile("mov %%gs:%1,%0" \ : "=r" (__s) \ : "m" (*(struct __s *)(__pcpu_offset(name)))); \ *(struct __s *)(void *)&__res = __s; \ } else { \ __res = *__PCPU_PTR(name); \ } \ __res; \ }) /* * Adds the value to the per-cpu counter name. The implementation * must be atomic with respect to interrupts. */ #define __PCPU_ADD(name, val) do { \ __pcpu_type(name) __val; \ struct __s { \ u_char __b[MIN(sizeof(__pcpu_type(name)), 8)]; \ } __s; \ \ __val = (val); \ if (sizeof(__val) == 1 || sizeof(__val) == 2 || \ sizeof(__val) == 4 || sizeof(__val) == 8) { \ __s = *(struct __s *)(void *)&__val; \ __asm __volatile("add %1,%%gs:%0" \ : "=m" (*(struct __s *)(__pcpu_offset(name))) \ : "r" (__s)); \ } else \ *__PCPU_PTR(name) += __val; \ } while (0) /* * Sets the value of the per-cpu variable name to value val. */ #define __PCPU_SET(name, val) { \ __pcpu_type(name) __val; \ struct __s { \ u_char __b[MIN(sizeof(__pcpu_type(name)), 8)]; \ } __s; \ \ __val = (val); \ if (sizeof(__val) == 1 || sizeof(__val) == 2 || \ sizeof(__val) == 4 || sizeof(__val) == 8) { \ __s = *(struct __s *)(void *)&__val; \ __asm __volatile("mov %1,%%gs:%0" \ : "=m" (*(struct __s *)(__pcpu_offset(name))) \ : "r" (__s)); \ } else { \ *__PCPU_PTR(name) = __val; \ } \ } #define get_pcpu() __extension__ ({ \ struct pcpu *__pc; \ \ __asm __volatile("movq %%gs:%1,%0" \ : "=r" (__pc) \ : "m" (*(struct pcpu *)(__pcpu_offset(pc_prvspace)))); \ __pc; \ }) #define PCPU_GET(member) __PCPU_GET(pc_ ## member) #define PCPU_ADD(member, val) __PCPU_ADD(pc_ ## member, val) #define PCPU_PTR(member) __PCPU_PTR(pc_ ## member) #define PCPU_SET(member, val) __PCPU_SET(pc_ ## member, val) #define IS_BSP() (PCPU_GET(cpuid) == 0) #define zpcpu_offset_cpu(cpu) ((uintptr_t)&__pcpu[0] + UMA_PCPU_ALLOC_SIZE * cpu) #define zpcpu_base_to_offset(base) (void *)((uintptr_t)(base) - (uintptr_t)&__pcpu[0]) #define zpcpu_offset_to_base(base) (void *)((uintptr_t)(base) + (uintptr_t)&__pcpu[0]) #define zpcpu_sub_protected(base, n) do { \ ZPCPU_ASSERT_PROTECTED(); \ zpcpu_sub(base, n); \ } while (0) #define zpcpu_set_protected(base, n) do { \ __typeof(*base) __n = (n); \ ZPCPU_ASSERT_PROTECTED(); \ switch (sizeof(*base)) { \ case 4: \ __asm __volatile("movl\t%1,%%gs:(%0)" \ : : "r" (base), "ri" (__n) : "memory", "cc"); \ break; \ case 8: \ __asm __volatile("movq\t%1,%%gs:(%0)" \ : : "r" (base), "ri" (__n) : "memory", "cc"); \ break; \ default: \ *zpcpu_get(base) = __n; \ } \ } while (0); #define zpcpu_add(base, n) do { \ __typeof(*base) __n = (n); \ CTASSERT(sizeof(*base) == 4 || sizeof(*base) == 8); \ switch (sizeof(*base)) { \ case 4: \ __asm __volatile("addl\t%1,%%gs:(%0)" \ : : "r" (base), "ri" (__n) : "memory", "cc"); \ break; \ case 8: \ __asm __volatile("addq\t%1,%%gs:(%0)" \ : : "r" (base), "ri" (__n) : "memory", "cc"); \ break; \ } \ } while (0) #define zpcpu_add_protected(base, n) do { \ ZPCPU_ASSERT_PROTECTED(); \ zpcpu_add(base, n); \ } while (0) #define zpcpu_sub(base, n) do { \ __typeof(*base) __n = (n); \ CTASSERT(sizeof(*base) == 4 || sizeof(*base) == 8); \ switch (sizeof(*base)) { \ case 4: \ __asm __volatile("subl\t%1,%%gs:(%0)" \ : : "r" (base), "ri" (__n) : "memory", "cc"); \ break; \ case 8: \ __asm __volatile("subq\t%1,%%gs:(%0)" \ : : "r" (base), "ri" (__n) : "memory", "cc"); \ break; \ } \ } while (0); -#else /* !__GNUCLIKE_ASM || !__GNUCLIKE___TYPEOF */ +#else /* !__GNUCLIKE___TYPEOF */ #error "this file needs to be ported to your compiler" -#endif /* __GNUCLIKE_ASM && __GNUCLIKE___TYPEOF */ +#endif /* __GNUCLIKE___TYPEOF */ #endif /* _KERNEL */ #endif /* !_MACHINE_PCPU_H_ */ diff --git a/sys/amd64/include/profile.h b/sys/amd64/include/profile.h index b0fb469f5354..e86fd582b407 100644 --- a/sys/amd64/include/profile.h +++ b/sys/amd64/include/profile.h @@ -1,124 +1,118 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1992, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)profile.h 8.1 (Berkeley) 6/11/93 * $FreeBSD$ */ #ifndef _MACHINE_PROFILE_H_ #define _MACHINE_PROFILE_H_ #ifndef _KERNEL #include #define FUNCTION_ALIGNMENT 4 #define _MCOUNT_DECL \ static void _mcount(uintfptr_t frompc, uintfptr_t selfpc) __used; \ static void _mcount -#ifdef __GNUCLIKE_ASM #define MCOUNT __asm(" \n\ .text \n\ .p2align 4,0x90 \n\ .globl .mcount \n\ .type .mcount,@function \n\ .mcount: \n\ pushq %rdi \n\ pushq %rsi \n\ pushq %rdx \n\ pushq %rcx \n\ pushq %r8 \n\ pushq %r9 \n\ pushq %rax \n\ movq 8(%rbp),%rdi \n\ movq 7*8(%rsp),%rsi \n\ call _mcount \n\ popq %rax \n\ popq %r9 \n\ popq %r8 \n\ popq %rcx \n\ popq %rdx \n\ popq %rsi \n\ popq %rdi \n\ ret \n\ .size .mcount, . - .mcount"); #if 0 /* * We could use this, except it doesn't preserve the registers that were * being passed with arguments to the function that we were inserted * into. I've left it here as documentation of what the code above is * supposed to do. */ #define MCOUNT \ void \ mcount() \ { \ uintfptr_t selfpc, frompc; \ /* \ * Find the return address for mcount, \ * and the return address for mcount's caller. \ * \ * selfpc = pc pushed by call to mcount \ */ \ __asm("movq 8(%%rbp),%0" : "=r" (selfpc)); \ /* \ * frompc = pc pushed by call to mcount's caller. \ * The caller's stack frame has already been built, so %rbp is \ * the caller's frame pointer. The caller's raddr is in the \ * caller's frame following the caller's caller's frame pointer.\ */ \ __asm("movq (%%rbp),%0" : "=r" (frompc)); \ frompc = ((uintfptr_t *)frompc)[1]; \ _mcount(frompc, selfpc); \ } #endif -#else /* !__GNUCLIKE_ASM */ -#define MCOUNT -#endif /* __GNUCLIKE_ASM */ typedef u_long uintfptr_t; /* * An unsigned integral type that can hold non-negative difference between * function pointers. */ typedef u_long fptrdiff_t; __BEGIN_DECLS -#ifdef __GNUCLIKE_ASM void mcount(void) __asm(".mcount"); -#endif __END_DECLS #endif /* !_KERNEL */ #endif /* !_MACHINE_PROFILE_H_ */ diff --git a/sys/arm64/include/profile.h b/sys/arm64/include/profile.h index f5f5caa1faa3..f898e2707d65 100644 --- a/sys/arm64/include/profile.h +++ b/sys/arm64/include/profile.h @@ -1,95 +1,91 @@ /*- * SPDX-License-Identifier: MIT-CMU * * Copyright (c) 1994, 1995, 1996 Carnegie-Mellon University. * All rights reserved. * * Author: Chris G. Demetriou * * Permission to use, copy, modify and distribute this software and * its documentation is hereby granted, provided that both the copyright * notice and this permission notice appear in all copies of the * software, derivative works or modified versions, and any portions * thereof, and that both notices appear in supporting documentation. * * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. * * Carnegie Mellon requests users of this software to return to * * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU * School of Computer Science * Carnegie Mellon University * Pittsburgh PA 15213-3890 * * any improvements or extensions that they make and grant Carnegie the * rights to redistribute these changes. * * from: NetBSD: profile.h,v 1.9 1997/04/06 08:47:37 cgd Exp * from: FreeBSD: src/sys/alpha/include/profile.h,v 1.4 1999/12/29 * $FreeBSD$ */ #ifndef _MACHINE_PROFILE_H_ #define _MACHINE_PROFILE_H_ #define FUNCTION_ALIGNMENT 32 typedef u_long fptrdiff_t; #ifndef _KERNEL #include typedef __uintfptr_t uintfptr_t; #define _MCOUNT_DECL \ static void _mcount(uintfptr_t frompc, uintfptr_t selfpc) __used; \ static void _mcount -#ifdef __GNUCLIKE_ASM /* * Call into _mcount. On arm64 the .mcount is a function so callers will * handle caller saved registers. As we don't directly touch any callee * saved registers we can just load the two arguments and use a tail call * into the MI _mcount function. * * When building with gcc frompc will be in x0, however this is not the * case on clang. As such we need to load it from the stack. As long as * the caller follows the ABI this will load the correct value. */ #define MCOUNT __asm( \ " .text \n" \ " .align 6 \n" \ " .type .mcount,#function \n" \ " .globl .mcount \n" \ " .mcount: \n" \ " .cfi_startproc \n" \ /* Load the caller return address as frompc */ \ " ldr x0, [x29, #8] \n" \ /* Use our return address as selfpc */ \ " mov x1, lr \n" \ " b _mcount \n" \ " .cfi_endproc \n" \ " .size .mcount, . - .mcount \n" \ ); #if 0 /* * If clang passed frompc correctly we could implement it like this, however * all clang versions we care about would need to be fixed before we could * make this change. */ void mcount(uintfptr_t frompc) { _mcount(frompc, __builtin_return_address(0)); } #endif -#else -#define MCOUNT -#endif #endif /* !_KERNEL */ #endif /* !_MACHINE_PROFILE_H_ */ diff --git a/sys/crypto/via/padlock_cipher.c b/sys/crypto/via/padlock_cipher.c index 0bc831f07c40..c5cd5161f4b2 100644 --- a/sys/crypto/via/padlock_cipher.c +++ b/sys/crypto/via/padlock_cipher.c @@ -1,240 +1,238 @@ /*- * Copyright (c) 2005-2006 Pawel Jakub Dawidek * Copyright (c) 2004 Mark R V Murray * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* $OpenBSD: via.c,v 1.3 2004/06/15 23:36:55 deraadt Exp $ */ /*- * Copyright (c) 2003 Jason Wright * Copyright (c) 2003, 2004 Theo de Raadt * All rights reserved. * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above * copyright notice and this permission notice appear in all copies. * * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #define PADLOCK_ROUND_COUNT_AES128 10 #define PADLOCK_ROUND_COUNT_AES192 12 #define PADLOCK_ROUND_COUNT_AES256 14 #define PADLOCK_ALGORITHM_TYPE_AES 0 #define PADLOCK_KEY_GENERATION_HW 0 #define PADLOCK_KEY_GENERATION_SW 1 #define PADLOCK_DIRECTION_ENCRYPT 0 #define PADLOCK_DIRECTION_DECRYPT 1 #define PADLOCK_KEY_SIZE_128 0 #define PADLOCK_KEY_SIZE_192 1 #define PADLOCK_KEY_SIZE_256 2 MALLOC_DECLARE(M_PADLOCK); static __inline void padlock_cbc(void *in, void *out, size_t count, void *key, union padlock_cw *cw, void *iv) { -#ifdef __GNUCLIKE_ASM /* The .byte line is really VIA C3 "xcrypt-cbc" instruction */ __asm __volatile( "pushf \n\t" "popf \n\t" "rep \n\t" ".byte 0x0f, 0xa7, 0xd0" : "+a" (iv), "+c" (count), "+D" (out), "+S" (in) : "b" (key), "d" (cw) : "cc", "memory" ); -#endif } static void padlock_cipher_key_setup(struct padlock_session *ses, const void *key, int klen) { union padlock_cw *cw; int i; cw = &ses->ses_cw; if (cw->cw_key_generation == PADLOCK_KEY_GENERATION_SW) { /* Build expanded keys for both directions */ rijndaelKeySetupEnc(ses->ses_ekey, key, klen * 8); rijndaelKeySetupDec(ses->ses_dkey, key, klen * 8); for (i = 0; i < 4 * (RIJNDAEL_MAXNR + 1); i++) { ses->ses_ekey[i] = ntohl(ses->ses_ekey[i]); ses->ses_dkey[i] = ntohl(ses->ses_dkey[i]); } } else { bcopy(key, ses->ses_ekey, klen); bcopy(key, ses->ses_dkey, klen); } } int padlock_cipher_setup(struct padlock_session *ses, const struct crypto_session_params *csp) { union padlock_cw *cw; if (csp->csp_cipher_klen != 16 && csp->csp_cipher_klen != 24 && csp->csp_cipher_klen != 32) { return (EINVAL); } cw = &ses->ses_cw; bzero(cw, sizeof(*cw)); cw->cw_algorithm_type = PADLOCK_ALGORITHM_TYPE_AES; cw->cw_key_generation = PADLOCK_KEY_GENERATION_SW; cw->cw_intermediate = 0; switch (csp->csp_cipher_klen * 8) { case 128: cw->cw_round_count = PADLOCK_ROUND_COUNT_AES128; cw->cw_key_size = PADLOCK_KEY_SIZE_128; #ifdef HW_KEY_GENERATION /* This doesn't buy us much, that's why it is commented out. */ cw->cw_key_generation = PADLOCK_KEY_GENERATION_HW; #endif break; case 192: cw->cw_round_count = PADLOCK_ROUND_COUNT_AES192; cw->cw_key_size = PADLOCK_KEY_SIZE_192; break; case 256: cw->cw_round_count = PADLOCK_ROUND_COUNT_AES256; cw->cw_key_size = PADLOCK_KEY_SIZE_256; break; } if (csp->csp_cipher_key != NULL) { padlock_cipher_key_setup(ses, csp->csp_cipher_key, csp->csp_cipher_klen); } return (0); } /* * Function checks if the given buffer is already 16 bytes aligned. * If it is there is no need to allocate new buffer. * If it isn't, new buffer is allocated. */ static u_char * padlock_cipher_alloc(struct cryptop *crp, int *allocated) { u_char *addr; addr = crypto_contiguous_subsegment(crp, crp->crp_payload_start, crp->crp_payload_length); if (((uintptr_t)addr & 0xf) == 0) { /* 16 bytes aligned? */ *allocated = 0; return (addr); } *allocated = 1; addr = malloc(crp->crp_payload_length + 16, M_PADLOCK, M_NOWAIT); return (addr); } int padlock_cipher_process(struct padlock_session *ses, struct cryptop *crp, const struct crypto_session_params *csp) { union padlock_cw *cw; struct thread *td; u_char *buf, *abuf; uint32_t *key; uint8_t iv[AES_BLOCK_LEN] __aligned(16); int allocated; buf = padlock_cipher_alloc(crp, &allocated); if (buf == NULL) return (ENOMEM); /* Buffer has to be 16 bytes aligned. */ abuf = PADLOCK_ALIGN(buf); if (crp->crp_cipher_key != NULL) { padlock_cipher_key_setup(ses, crp->crp_cipher_key, csp->csp_cipher_klen); } cw = &ses->ses_cw; cw->cw_filler0 = 0; cw->cw_filler1 = 0; cw->cw_filler2 = 0; cw->cw_filler3 = 0; crypto_read_iv(crp, iv); if (CRYPTO_OP_IS_ENCRYPT(crp->crp_op)) { cw->cw_direction = PADLOCK_DIRECTION_ENCRYPT; key = ses->ses_ekey; } else { cw->cw_direction = PADLOCK_DIRECTION_DECRYPT; key = ses->ses_dkey; } if (allocated) { crypto_copydata(crp, crp->crp_payload_start, crp->crp_payload_length, abuf); } td = curthread; fpu_kern_enter(td, ses->ses_fpu_ctx, FPU_KERN_NORMAL | FPU_KERN_KTHR); padlock_cbc(abuf, abuf, crp->crp_payload_length / AES_BLOCK_LEN, key, cw, iv); fpu_kern_leave(td, ses->ses_fpu_ctx); if (allocated) { crypto_copyback(crp, crp->crp_payload_start, crp->crp_payload_length, abuf); zfree(buf, M_PADLOCK); } return (0); } diff --git a/sys/crypto/via/padlock_hash.c b/sys/crypto/via/padlock_hash.c index f09024af4ed5..f834e677fc98 100644 --- a/sys/crypto/via/padlock_hash.c +++ b/sys/crypto/via/padlock_hash.c @@ -1,425 +1,421 @@ /*- * Copyright (c) 2006 Pawel Jakub Dawidek * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #if defined(__amd64__) || defined(__i386__) #include #include #include #include #endif #include #include #include #include /* * Implementation notes. * * Some VIA CPUs provides SHA1 and SHA256 acceleration. * We implement all HMAC algorithms provided by crypto(9) framework, but we do * the crypto work in software unless this is HMAC/SHA1 or HMAC/SHA256 and * our CPU can accelerate it. * * Additional CPU instructions, which preform SHA1 and SHA256 are one-shot * functions - we have only one chance to give the data, CPU itself will add * the padding and calculate hash automatically. * This means, it is not possible to implement common init(), update(), final() * methods. * The way I've choosen is to keep adding data to the buffer on update() * (reallocating the buffer if necessary) and call XSHA{1,256} instruction on * final(). */ struct padlock_sha_ctx { uint8_t *psc_buf; int psc_offset; int psc_size; }; CTASSERT(sizeof(struct padlock_sha_ctx) <= sizeof(union authctx)); static void padlock_sha_init(void *vctx); static int padlock_sha_update(void *vctx, const void *buf, u_int bufsize); static void padlock_sha1_final(uint8_t *hash, void *vctx); static void padlock_sha256_final(uint8_t *hash, void *vctx); static const struct auth_hash padlock_hmac_sha1 = { .type = CRYPTO_SHA1_HMAC, .name = "HMAC-SHA1", .keysize = SHA1_BLOCK_LEN, .hashsize = SHA1_HASH_LEN, .ctxsize = sizeof(struct padlock_sha_ctx), .blocksize = SHA1_BLOCK_LEN, .Init = padlock_sha_init, .Update = padlock_sha_update, .Final = padlock_sha1_final, }; static const struct auth_hash padlock_hmac_sha256 = { .type = CRYPTO_SHA2_256_HMAC, .name = "HMAC-SHA2-256", .keysize = SHA2_256_BLOCK_LEN, .hashsize = SHA2_256_HASH_LEN, .ctxsize = sizeof(struct padlock_sha_ctx), .blocksize = SHA2_256_BLOCK_LEN, .Init = padlock_sha_init, .Update = padlock_sha_update, .Final = padlock_sha256_final, }; MALLOC_DECLARE(M_PADLOCK); static __inline void padlock_output_block(uint32_t *src, uint32_t *dst, size_t count) { while (count-- > 0) *dst++ = bswap32(*src++); } static void padlock_do_sha1(const u_char *in, u_char *out, int count) { u_char buf[128+16]; /* PadLock needs at least 128 bytes buffer. */ u_char *result = PADLOCK_ALIGN(buf); ((uint32_t *)result)[0] = 0x67452301; ((uint32_t *)result)[1] = 0xEFCDAB89; ((uint32_t *)result)[2] = 0x98BADCFE; ((uint32_t *)result)[3] = 0x10325476; ((uint32_t *)result)[4] = 0xC3D2E1F0; -#ifdef __GNUCLIKE_ASM __asm __volatile( ".byte 0xf3, 0x0f, 0xa6, 0xc8" /* rep xsha1 */ : "+S"(in), "+D"(result) : "c"(count), "a"(0) ); -#endif padlock_output_block((uint32_t *)result, (uint32_t *)out, SHA1_HASH_LEN / sizeof(uint32_t)); } static void padlock_do_sha256(const char *in, char *out, int count) { char buf[128+16]; /* PadLock needs at least 128 bytes buffer. */ char *result = PADLOCK_ALIGN(buf); ((uint32_t *)result)[0] = 0x6A09E667; ((uint32_t *)result)[1] = 0xBB67AE85; ((uint32_t *)result)[2] = 0x3C6EF372; ((uint32_t *)result)[3] = 0xA54FF53A; ((uint32_t *)result)[4] = 0x510E527F; ((uint32_t *)result)[5] = 0x9B05688C; ((uint32_t *)result)[6] = 0x1F83D9AB; ((uint32_t *)result)[7] = 0x5BE0CD19; -#ifdef __GNUCLIKE_ASM __asm __volatile( ".byte 0xf3, 0x0f, 0xa6, 0xd0" /* rep xsha256 */ : "+S"(in), "+D"(result) : "c"(count), "a"(0) ); -#endif padlock_output_block((uint32_t *)result, (uint32_t *)out, SHA2_256_HASH_LEN / sizeof(uint32_t)); } static void padlock_sha_init(void *vctx) { struct padlock_sha_ctx *ctx; ctx = vctx; ctx->psc_buf = NULL; ctx->psc_offset = 0; ctx->psc_size = 0; } static int padlock_sha_update(void *vctx, const void *buf, u_int bufsize) { struct padlock_sha_ctx *ctx; ctx = vctx; if (ctx->psc_size - ctx->psc_offset < bufsize) { ctx->psc_size = MAX(ctx->psc_size * 2, ctx->psc_size + bufsize); ctx->psc_buf = realloc(ctx->psc_buf, ctx->psc_size, M_PADLOCK, M_NOWAIT); if(ctx->psc_buf == NULL) return (ENOMEM); } bcopy(buf, ctx->psc_buf + ctx->psc_offset, bufsize); ctx->psc_offset += bufsize; return (0); } static void padlock_sha_free(void *vctx) { struct padlock_sha_ctx *ctx; ctx = vctx; if (ctx->psc_buf != NULL) { zfree(ctx->psc_buf, M_PADLOCK); ctx->psc_buf = NULL; ctx->psc_offset = 0; ctx->psc_size = 0; } } static void padlock_sha1_final(uint8_t *hash, void *vctx) { struct padlock_sha_ctx *ctx; ctx = vctx; padlock_do_sha1(ctx->psc_buf, hash, ctx->psc_offset); padlock_sha_free(ctx); } static void padlock_sha256_final(uint8_t *hash, void *vctx) { struct padlock_sha_ctx *ctx; ctx = vctx; padlock_do_sha256(ctx->psc_buf, hash, ctx->psc_offset); padlock_sha_free(ctx); } static void padlock_copy_ctx(const struct auth_hash *axf, void *sctx, void *dctx) { if ((via_feature_xcrypt & VIA_HAS_SHA) != 0 && (axf->type == CRYPTO_SHA1_HMAC || axf->type == CRYPTO_SHA2_256_HMAC)) { struct padlock_sha_ctx *spctx = sctx, *dpctx = dctx; dpctx->psc_offset = spctx->psc_offset; dpctx->psc_size = spctx->psc_size; dpctx->psc_buf = malloc(dpctx->psc_size, M_PADLOCK, M_WAITOK); bcopy(spctx->psc_buf, dpctx->psc_buf, dpctx->psc_size); } else { bcopy(sctx, dctx, axf->ctxsize); } } static void padlock_free_ctx(const struct auth_hash *axf, void *ctx) { if ((via_feature_xcrypt & VIA_HAS_SHA) != 0 && (axf->type == CRYPTO_SHA1_HMAC || axf->type == CRYPTO_SHA2_256_HMAC)) { padlock_sha_free(ctx); } } static void padlock_hash_key_setup(struct padlock_session *ses, const uint8_t *key, int klen) { const struct auth_hash *axf; axf = ses->ses_axf; /* * Try to free contexts before using them, because * padlock_hash_key_setup() can be called twice - once from * padlock_newsession() and again from padlock_process(). */ padlock_free_ctx(axf, ses->ses_ictx); padlock_free_ctx(axf, ses->ses_octx); hmac_init_ipad(axf, key, klen, ses->ses_ictx); hmac_init_opad(axf, key, klen, ses->ses_octx); } /* * Compute keyed-hash authenticator. */ static int padlock_authcompute(struct padlock_session *ses, struct cryptop *crp) { u_char hash[HASH_MAX_LEN], hash2[HASH_MAX_LEN]; const struct auth_hash *axf; union authctx ctx; int error; axf = ses->ses_axf; padlock_copy_ctx(axf, ses->ses_ictx, &ctx); error = crypto_apply(crp, crp->crp_aad_start, crp->crp_aad_length, axf->Update, &ctx); if (error != 0) { padlock_free_ctx(axf, &ctx); return (error); } error = crypto_apply(crp, crp->crp_payload_start, crp->crp_payload_length, axf->Update, &ctx); if (error != 0) { padlock_free_ctx(axf, &ctx); return (error); } axf->Final(hash, &ctx); padlock_copy_ctx(axf, ses->ses_octx, &ctx); axf->Update(&ctx, hash, axf->hashsize); axf->Final(hash, &ctx); if (crp->crp_op & CRYPTO_OP_VERIFY_DIGEST) { crypto_copydata(crp, crp->crp_digest_start, ses->ses_mlen, hash2); if (timingsafe_bcmp(hash, hash2, ses->ses_mlen) != 0) return (EBADMSG); } else crypto_copyback(crp, crp->crp_digest_start, ses->ses_mlen, hash); return (0); } /* Find software structure which describes HMAC algorithm. */ static const struct auth_hash * padlock_hash_lookup(int alg) { const struct auth_hash *axf; switch (alg) { case CRYPTO_NULL_HMAC: axf = &auth_hash_null; break; case CRYPTO_SHA1_HMAC: if ((via_feature_xcrypt & VIA_HAS_SHA) != 0) axf = &padlock_hmac_sha1; else axf = &auth_hash_hmac_sha1; break; case CRYPTO_RIPEMD160_HMAC: axf = &auth_hash_hmac_ripemd_160; break; case CRYPTO_SHA2_256_HMAC: if ((via_feature_xcrypt & VIA_HAS_SHA) != 0) axf = &padlock_hmac_sha256; else axf = &auth_hash_hmac_sha2_256; break; case CRYPTO_SHA2_384_HMAC: axf = &auth_hash_hmac_sha2_384; break; case CRYPTO_SHA2_512_HMAC: axf = &auth_hash_hmac_sha2_512; break; default: axf = NULL; break; } return (axf); } bool padlock_hash_check(const struct crypto_session_params *csp) { return (padlock_hash_lookup(csp->csp_auth_alg) != NULL); } int padlock_hash_setup(struct padlock_session *ses, const struct crypto_session_params *csp) { ses->ses_axf = padlock_hash_lookup(csp->csp_auth_alg); if (csp->csp_auth_mlen == 0) ses->ses_mlen = ses->ses_axf->hashsize; else ses->ses_mlen = csp->csp_auth_mlen; /* Allocate memory for HMAC inner and outer contexts. */ ses->ses_ictx = malloc(ses->ses_axf->ctxsize, M_PADLOCK, M_ZERO | M_NOWAIT); ses->ses_octx = malloc(ses->ses_axf->ctxsize, M_PADLOCK, M_ZERO | M_NOWAIT); if (ses->ses_ictx == NULL || ses->ses_octx == NULL) return (ENOMEM); /* Setup key if given. */ if (csp->csp_auth_key != NULL) { padlock_hash_key_setup(ses, csp->csp_auth_key, csp->csp_auth_klen); } return (0); } int padlock_hash_process(struct padlock_session *ses, struct cryptop *crp, const struct crypto_session_params *csp) { struct thread *td; int error; td = curthread; fpu_kern_enter(td, ses->ses_fpu_ctx, FPU_KERN_NORMAL | FPU_KERN_KTHR); if (crp->crp_auth_key != NULL) padlock_hash_key_setup(ses, crp->crp_auth_key, csp->csp_auth_klen); error = padlock_authcompute(ses, crp); fpu_kern_leave(td, ses->ses_fpu_ctx); return (error); } void padlock_hash_free(struct padlock_session *ses) { if (ses->ses_ictx != NULL) { padlock_free_ctx(ses->ses_axf, ses->ses_ictx); zfree(ses->ses_ictx, M_PADLOCK); ses->ses_ictx = NULL; } if (ses->ses_octx != NULL) { padlock_free_ctx(ses->ses_axf, ses->ses_octx); zfree(ses->ses_octx, M_PADLOCK); ses->ses_octx = NULL; } } diff --git a/sys/dev/random/nehemiah.c b/sys/dev/random/nehemiah.c index 3ad18005c935..6381b9357176 100644 --- a/sys/dev/random/nehemiah.c +++ b/sys/dev/random/nehemiah.c @@ -1,157 +1,155 @@ /*- * Copyright (c) 2013-2015 Mark R V Murray * Copyright (c) 2013 David E. O'Brien * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer * in this position and unchanged. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include static void random_nehemiah_init(void); static void random_nehemiah_deinit(void); static u_int random_nehemiah_read(void *, u_int); static struct random_source random_nehemiah = { .rs_ident = "VIA Nehemiah Padlock RNG", .rs_source = RANDOM_PURE_NEHEMIAH, .rs_read = random_nehemiah_read }; static struct fpu_kern_ctx *fpu_ctx_save; /* This H/W source never stores more than 8 bytes in one go */ /* ARGSUSED */ static __inline size_t VIA_RNG_store(void *buf) { uint32_t retval = 0; uint32_t rate = 0; -#ifdef __GNUCLIKE_ASM __asm __volatile( "movl $0,%%edx\n\t" ".byte 0x0f, 0xa7, 0xc0" : "=a" (retval), "+d" (rate), "+D" (buf) : : "memory" ); -#endif if (rate == 0) return (retval&0x1f); return (0); } static void random_nehemiah_init(void) { fpu_ctx_save = fpu_kern_alloc_ctx(FPU_KERN_NORMAL); } static void random_nehemiah_deinit(void) { fpu_kern_free_ctx(fpu_ctx_save); } /* It is specifically allowed that buf is a multiple of sizeof(long) */ static u_int random_nehemiah_read(void *buf, u_int c) { uint8_t *b; size_t count, ret; uint64_t tmp; fpu_kern_enter(curthread, fpu_ctx_save, FPU_KERN_NORMAL); b = buf; for (count = c; count > 0; count -= ret) { ret = MIN(VIA_RNG_store(&tmp), count); memcpy(b, &tmp, ret); b += ret; } fpu_kern_leave(curthread, fpu_ctx_save); return (c); } static int nehemiah_modevent(module_t mod, int type, void *unused) { int error = 0; switch (type) { case MOD_LOAD: if (via_feature_rng & VIA_HAS_RNG) { random_source_register(&random_nehemiah); printf("random: fast provider: \"%s\"\n", random_nehemiah.rs_ident); random_nehemiah_init(); } break; case MOD_UNLOAD: if (via_feature_rng & VIA_HAS_RNG) { random_nehemiah_deinit(); random_source_deregister(&random_nehemiah); } break; case MOD_SHUTDOWN: break; default: error = EOPNOTSUPP; break; } return (error); } static moduledata_t nehemiah_mod = { "nehemiah", nehemiah_modevent, 0 }; DECLARE_MODULE(nehemiah, nehemiah_mod, SI_SUB_RANDOM, SI_ORDER_FOURTH); MODULE_VERSION(nehemiah, 1); MODULE_DEPEND(nehemiah, random_harvestq, 1, 1, 1); diff --git a/sys/dev/sound/pcm/feeder_rate.c b/sys/dev/sound/pcm/feeder_rate.c index f6bb1836089b..b4b7dc7401e0 100644 --- a/sys/dev/sound/pcm/feeder_rate.c +++ b/sys/dev/sound/pcm/feeder_rate.c @@ -1,1741 +1,1741 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2005-2009 Ariff Abdullah * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * feeder_rate: (Codename: Z Resampler), which means any effort to create * future replacement for this resampler are simply absurd unless * the world decide to add new alphabet after Z. * * FreeBSD bandlimited sinc interpolator, technically based on * "Digital Audio Resampling" by Julius O. Smith III * - http://ccrma.stanford.edu/~jos/resample/ * * The Good: * + all out fixed point integer operations, no soft-float or anything like * that. * + classic polyphase converters with high quality coefficient's polynomial * interpolators. * + fast, faster, or the fastest of its kind. * + compile time configurable. * + etc etc.. * * The Bad: * - The z, z_, and Z_ . Due to mental block (or maybe just 0x7a69), I * couldn't think of anything simpler than that (feeder_rate_xxx is just * too long). Expect possible clashes with other zitizens (any?). */ #ifdef _KERNEL #ifdef HAVE_KERNEL_OPTION_HEADERS #include "opt_snd.h" #endif #include #include #include "feeder_if.h" #define SND_USE_FXDIV #include "snd_fxdiv_gen.h" SND_DECLARE_FILE("$FreeBSD$"); #endif #include "feeder_rate_gen.h" #if !defined(_KERNEL) && defined(SND_DIAGNOSTIC) #undef Z_DIAGNOSTIC #define Z_DIAGNOSTIC 1 #elif defined(_KERNEL) #undef Z_DIAGNOSTIC #endif #ifndef Z_QUALITY_DEFAULT #define Z_QUALITY_DEFAULT Z_QUALITY_LINEAR #endif #define Z_RESERVOIR 2048 #define Z_RESERVOIR_MAX 131072 #define Z_SINC_MAX 0x3fffff #define Z_SINC_DOWNMAX 48 /* 384000 / 8000 */ #ifdef _KERNEL #define Z_POLYPHASE_MAX 183040 /* 286 taps, 640 phases */ #else #define Z_POLYPHASE_MAX 1464320 /* 286 taps, 5120 phases */ #endif #define Z_RATE_DEFAULT 48000 #define Z_RATE_MIN FEEDRATE_RATEMIN #define Z_RATE_MAX FEEDRATE_RATEMAX #define Z_ROUNDHZ FEEDRATE_ROUNDHZ #define Z_ROUNDHZ_MIN FEEDRATE_ROUNDHZ_MIN #define Z_ROUNDHZ_MAX FEEDRATE_ROUNDHZ_MAX #define Z_RATE_SRC FEEDRATE_SRC #define Z_RATE_DST FEEDRATE_DST #define Z_RATE_QUALITY FEEDRATE_QUALITY #define Z_RATE_CHANNELS FEEDRATE_CHANNELS #define Z_PARANOID 1 #define Z_MULTIFORMAT 1 #ifdef _KERNEL #undef Z_USE_ALPHADRIFT #define Z_USE_ALPHADRIFT 1 #endif #define Z_FACTOR_MIN 1 #define Z_FACTOR_MAX Z_MASK #define Z_FACTOR_SAFE(v) (!((v) < Z_FACTOR_MIN || (v) > Z_FACTOR_MAX)) struct z_info; typedef void (*z_resampler_t)(struct z_info *, uint8_t *); struct z_info { int32_t rsrc, rdst; /* original source / destination rates */ int32_t src, dst; /* rounded source / destination rates */ int32_t channels; /* total channels */ int32_t bps; /* bytes-per-sample */ int32_t quality; /* resampling quality */ int32_t z_gx, z_gy; /* interpolation / decimation ratio */ int32_t z_alpha; /* output sample time phase / drift */ uint8_t *z_delay; /* FIR delay line / linear buffer */ int32_t *z_coeff; /* FIR coefficients */ int32_t *z_dcoeff; /* FIR coefficients differences */ int32_t *z_pcoeff; /* FIR polyphase coefficients */ int32_t z_scale; /* output scaling */ int32_t z_dx; /* input sample drift increment */ int32_t z_dy; /* output sample drift increment */ #ifdef Z_USE_ALPHADRIFT int32_t z_alphadrift; /* alpha drift rate */ int32_t z_startdrift; /* buffer start position drift rate */ #endif int32_t z_mask; /* delay line full length mask */ int32_t z_size; /* half width of FIR taps */ int32_t z_full; /* full size of delay line */ int32_t z_alloc; /* largest allocated full size of delay line */ int32_t z_start; /* buffer processing start position */ int32_t z_pos; /* current position for the next feed */ #ifdef Z_DIAGNOSTIC uint32_t z_cycle; /* output cycle, purely for statistical */ #endif int32_t z_maxfeed; /* maximum feed to avoid 32bit overflow */ z_resampler_t z_resample; }; int feeder_rate_min = Z_RATE_MIN; int feeder_rate_max = Z_RATE_MAX; int feeder_rate_round = Z_ROUNDHZ; int feeder_rate_quality = Z_QUALITY_DEFAULT; static int feeder_rate_polyphase_max = Z_POLYPHASE_MAX; #ifdef _KERNEL static char feeder_rate_presets[] = FEEDER_RATE_PRESETS; SYSCTL_STRING(_hw_snd, OID_AUTO, feeder_rate_presets, CTLFLAG_RD, &feeder_rate_presets, 0, "compile-time rate presets"); SYSCTL_INT(_hw_snd, OID_AUTO, feeder_rate_polyphase_max, CTLFLAG_RWTUN, &feeder_rate_polyphase_max, 0, "maximum allowable polyphase entries"); static int sysctl_hw_snd_feeder_rate_min(SYSCTL_HANDLER_ARGS) { int err, val; val = feeder_rate_min; err = sysctl_handle_int(oidp, &val, 0, req); if (err != 0 || req->newptr == NULL || val == feeder_rate_min) return (err); if (!(Z_FACTOR_SAFE(val) && val < feeder_rate_max)) return (EINVAL); feeder_rate_min = val; return (0); } SYSCTL_PROC(_hw_snd, OID_AUTO, feeder_rate_min, CTLTYPE_INT | CTLFLAG_RWTUN | CTLFLAG_MPSAFE, 0, sizeof(int), sysctl_hw_snd_feeder_rate_min, "I", "minimum allowable rate"); static int sysctl_hw_snd_feeder_rate_max(SYSCTL_HANDLER_ARGS) { int err, val; val = feeder_rate_max; err = sysctl_handle_int(oidp, &val, 0, req); if (err != 0 || req->newptr == NULL || val == feeder_rate_max) return (err); if (!(Z_FACTOR_SAFE(val) && val > feeder_rate_min)) return (EINVAL); feeder_rate_max = val; return (0); } SYSCTL_PROC(_hw_snd, OID_AUTO, feeder_rate_max, CTLTYPE_INT | CTLFLAG_RWTUN | CTLFLAG_MPSAFE, 0, sizeof(int), sysctl_hw_snd_feeder_rate_max, "I", "maximum allowable rate"); static int sysctl_hw_snd_feeder_rate_round(SYSCTL_HANDLER_ARGS) { int err, val; val = feeder_rate_round; err = sysctl_handle_int(oidp, &val, 0, req); if (err != 0 || req->newptr == NULL || val == feeder_rate_round) return (err); if (val < Z_ROUNDHZ_MIN || val > Z_ROUNDHZ_MAX) return (EINVAL); feeder_rate_round = val - (val % Z_ROUNDHZ); return (0); } SYSCTL_PROC(_hw_snd, OID_AUTO, feeder_rate_round, CTLTYPE_INT | CTLFLAG_RWTUN | CTLFLAG_MPSAFE, 0, sizeof(int), sysctl_hw_snd_feeder_rate_round, "I", "sample rate converter rounding threshold"); static int sysctl_hw_snd_feeder_rate_quality(SYSCTL_HANDLER_ARGS) { struct snddev_info *d; struct pcm_channel *c; struct pcm_feeder *f; int i, err, val; val = feeder_rate_quality; err = sysctl_handle_int(oidp, &val, 0, req); if (err != 0 || req->newptr == NULL || val == feeder_rate_quality) return (err); if (val < Z_QUALITY_MIN || val > Z_QUALITY_MAX) return (EINVAL); feeder_rate_quality = val; /* * Traverse all available channels on each device and try to * set resampler quality if and only if it is exist as * part of feeder chains and the channel is idle. */ for (i = 0; pcm_devclass != NULL && i < devclass_get_maxunit(pcm_devclass); i++) { d = devclass_get_softc(pcm_devclass, i); if (!PCM_REGISTERED(d)) continue; PCM_LOCK(d); PCM_WAIT(d); PCM_ACQUIRE(d); CHN_FOREACH(c, d, channels.pcm) { CHN_LOCK(c); f = chn_findfeeder(c, FEEDER_RATE); if (f == NULL || f->data == NULL || CHN_STARTED(c)) { CHN_UNLOCK(c); continue; } (void)FEEDER_SET(f, FEEDRATE_QUALITY, val); CHN_UNLOCK(c); } PCM_RELEASE(d); PCM_UNLOCK(d); } return (0); } SYSCTL_PROC(_hw_snd, OID_AUTO, feeder_rate_quality, CTLTYPE_INT | CTLFLAG_RWTUN | CTLFLAG_NEEDGIANT, 0, sizeof(int), sysctl_hw_snd_feeder_rate_quality, "I", "sample rate converter quality ("__XSTRING(Z_QUALITY_MIN)"=low .. " __XSTRING(Z_QUALITY_MAX)"=high)"); #endif /* _KERNEL */ /* * Resampler type. */ #define Z_IS_ZOH(i) ((i)->quality == Z_QUALITY_ZOH) #define Z_IS_LINEAR(i) ((i)->quality == Z_QUALITY_LINEAR) #define Z_IS_SINC(i) ((i)->quality > Z_QUALITY_LINEAR) /* * Macroses for accurate sample time drift calculations. * * gy2gx : given the amount of output, return the _exact_ required amount of * input. * gx2gy : given the amount of input, return the _maximum_ amount of output * that will be generated. * drift : given the amount of input and output, return the elapsed * sample-time. */ #define _Z_GCAST(x) ((uint64_t)(x)) -#if defined(__GNUCLIKE_ASM) && defined(__i386__) +#if defined(__i386__) /* * This is where i386 being beaten to a pulp. Fortunately this function is * rarely being called and if it is, it will decide the best (hopefully) * fastest way to do the division. If we can ensure that everything is dword * aligned, letting the compiler to call udivdi3 to do the division can be * faster compared to this. * * amd64 is the clear winner here, no question about it. */ static __inline uint32_t Z_DIV(uint64_t v, uint32_t d) { uint32_t hi, lo, quo, rem; hi = v >> 32; lo = v & 0xffffffff; /* * As much as we can, try to avoid long division like a plague. */ if (hi == 0) quo = lo / d; else __asm("divl %2" : "=a" (quo), "=d" (rem) : "r" (d), "0" (lo), "1" (hi)); return (quo); } #else #define Z_DIV(x, y) ((x) / (y)) #endif #define _Z_GY2GX(i, a, v) \ Z_DIV(((_Z_GCAST((i)->z_gx) * (v)) + ((i)->z_gy - (a) - 1)), \ (i)->z_gy) #define _Z_GX2GY(i, a, v) \ Z_DIV(((_Z_GCAST((i)->z_gy) * (v)) + (a)), (i)->z_gx) #define _Z_DRIFT(i, x, y) \ ((_Z_GCAST((i)->z_gy) * (x)) - (_Z_GCAST((i)->z_gx) * (y))) #define z_gy2gx(i, v) _Z_GY2GX(i, (i)->z_alpha, v) #define z_gx2gy(i, v) _Z_GX2GY(i, (i)->z_alpha, v) #define z_drift(i, x, y) _Z_DRIFT(i, x, y) /* * Macroses for SINC coefficients table manipulations.. whatever. */ #define Z_SINC_COEFF_IDX(i) ((i)->quality - Z_QUALITY_LINEAR - 1) #define Z_SINC_LEN(i) \ ((int32_t)(((uint64_t)z_coeff_tab[Z_SINC_COEFF_IDX(i)].len << \ Z_SHIFT) / (i)->z_dy)) #define Z_SINC_BASE_LEN(i) \ ((z_coeff_tab[Z_SINC_COEFF_IDX(i)].len - 1) >> (Z_DRIFT_SHIFT - 1)) /* * Macroses for linear delay buffer operations. Alignment is not * really necessary since we're not using true circular buffer, but it * will help us guard against possible trespasser. To be honest, * the linear block operations does not need guarding at all due to * accurate drifting! */ #define z_align(i, v) ((v) & (i)->z_mask) #define z_next(i, o, v) z_align(i, (o) + (v)) #define z_prev(i, o, v) z_align(i, (o) - (v)) #define z_fetched(i) (z_align(i, (i)->z_pos - (i)->z_start) - 1) #define z_free(i) ((i)->z_full - (i)->z_pos) /* * Macroses for Bla Bla .. :) */ #define z_copy(src, dst, sz) (void)memcpy(dst, src, sz) #define z_feed(...) FEEDER_FEED(__VA_ARGS__) static __inline uint32_t z_min(uint32_t x, uint32_t y) { return ((x < y) ? x : y); } static int32_t z_gcd(int32_t x, int32_t y) { int32_t w; while (y != 0) { w = x % y; x = y; y = w; } return (x); } static int32_t z_roundpow2(int32_t v) { int32_t i; i = 1; /* * Let it overflow at will.. */ while (i > 0 && i < v) i <<= 1; return (i); } /* * Zero Order Hold, the worst of the worst, an insult against quality, * but super fast. */ static void z_feed_zoh(struct z_info *info, uint8_t *dst) { #if 0 z_copy(info->z_delay + (info->z_start * info->channels * info->bps), dst, info->channels * info->bps); #else uint32_t cnt; uint8_t *src; cnt = info->channels * info->bps; src = info->z_delay + (info->z_start * cnt); /* * This is a bit faster than doing bcopy() since we're dealing * with possible unaligned samples. */ do { *dst++ = *src++; } while (--cnt != 0); #endif } /* * Linear Interpolation. This at least sounds better (perceptually) and fast, * but without any proper filtering which means aliasing still exist and * could become worst with a right sample. Interpolation centered within * Z_LINEAR_ONE between the present and previous sample and everything is * done with simple 32bit scaling arithmetic. */ #define Z_DECLARE_LINEAR(SIGN, BIT, ENDIAN) \ static void \ z_feed_linear_##SIGN##BIT##ENDIAN(struct z_info *info, uint8_t *dst) \ { \ int32_t z; \ intpcm_t x, y; \ uint32_t ch; \ uint8_t *sx, *sy; \ \ z = ((uint32_t)info->z_alpha * info->z_dx) >> Z_LINEAR_UNSHIFT; \ \ sx = info->z_delay + (info->z_start * info->channels * \ PCM_##BIT##_BPS); \ sy = sx - (info->channels * PCM_##BIT##_BPS); \ \ ch = info->channels; \ \ do { \ x = _PCM_READ_##SIGN##BIT##_##ENDIAN(sx); \ y = _PCM_READ_##SIGN##BIT##_##ENDIAN(sy); \ x = Z_LINEAR_INTERPOLATE_##BIT(z, x, y); \ _PCM_WRITE_##SIGN##BIT##_##ENDIAN(dst, x); \ sx += PCM_##BIT##_BPS; \ sy += PCM_##BIT##_BPS; \ dst += PCM_##BIT##_BPS; \ } while (--ch != 0); \ } /* * Userland clipping diagnostic check, not enabled in kernel compilation. * While doing sinc interpolation, unrealistic samples like full scale sine * wav will clip, but for other things this will not make any noise at all. * Everybody should learn how to normalized perceived loudness of their own * music/sounds/samples (hint: ReplayGain). */ #ifdef Z_DIAGNOSTIC #define Z_CLIP_CHECK(v, BIT) do { \ if ((v) > PCM_S##BIT##_MAX) { \ fprintf(stderr, "Overflow: v=%jd, max=%jd\n", \ (intmax_t)(v), (intmax_t)PCM_S##BIT##_MAX); \ } else if ((v) < PCM_S##BIT##_MIN) { \ fprintf(stderr, "Underflow: v=%jd, min=%jd\n", \ (intmax_t)(v), (intmax_t)PCM_S##BIT##_MIN); \ } \ } while (0) #else #define Z_CLIP_CHECK(...) #endif #define Z_CLAMP(v, BIT) \ (((v) > PCM_S##BIT##_MAX) ? PCM_S##BIT##_MAX : \ (((v) < PCM_S##BIT##_MIN) ? PCM_S##BIT##_MIN : (v))) /* * Sine Cardinal (SINC) Interpolation. Scaling is done in 64 bit, so * there's no point to hold the plate any longer. All samples will be * shifted to a full 32 bit, scaled and restored during write for * maximum dynamic range (only for downsampling). */ #define _Z_SINC_ACCUMULATE(SIGN, BIT, ENDIAN, adv) \ c += z >> Z_SHIFT; \ z &= Z_MASK; \ coeff = Z_COEFF_INTERPOLATE(z, z_coeff[c], z_dcoeff[c]); \ x = _PCM_READ_##SIGN##BIT##_##ENDIAN(p); \ v += Z_NORM_##BIT((intpcm64_t)x * coeff); \ z += info->z_dy; \ p adv##= info->channels * PCM_##BIT##_BPS /* * XXX GCC4 optimization is such a !@#$%, need manual unrolling. */ #if defined(__GNUC__) && __GNUC__ >= 4 #define Z_SINC_ACCUMULATE(...) do { \ _Z_SINC_ACCUMULATE(__VA_ARGS__); \ _Z_SINC_ACCUMULATE(__VA_ARGS__); \ } while (0) #define Z_SINC_ACCUMULATE_DECR 2 #else #define Z_SINC_ACCUMULATE(...) do { \ _Z_SINC_ACCUMULATE(__VA_ARGS__); \ } while (0) #define Z_SINC_ACCUMULATE_DECR 1 #endif #define Z_DECLARE_SINC(SIGN, BIT, ENDIAN) \ static void \ z_feed_sinc_##SIGN##BIT##ENDIAN(struct z_info *info, uint8_t *dst) \ { \ intpcm64_t v; \ intpcm_t x; \ uint8_t *p; \ int32_t coeff, z, *z_coeff, *z_dcoeff; \ uint32_t c, center, ch, i; \ \ z_coeff = info->z_coeff; \ z_dcoeff = info->z_dcoeff; \ center = z_prev(info, info->z_start, info->z_size); \ ch = info->channels * PCM_##BIT##_BPS; \ dst += ch; \ \ do { \ dst -= PCM_##BIT##_BPS; \ ch -= PCM_##BIT##_BPS; \ v = 0; \ z = info->z_alpha * info->z_dx; \ c = 0; \ p = info->z_delay + (z_next(info, center, 1) * \ info->channels * PCM_##BIT##_BPS) + ch; \ for (i = info->z_size; i != 0; i -= Z_SINC_ACCUMULATE_DECR) \ Z_SINC_ACCUMULATE(SIGN, BIT, ENDIAN, +); \ z = info->z_dy - (info->z_alpha * info->z_dx); \ c = 0; \ p = info->z_delay + (center * info->channels * \ PCM_##BIT##_BPS) + ch; \ for (i = info->z_size; i != 0; i -= Z_SINC_ACCUMULATE_DECR) \ Z_SINC_ACCUMULATE(SIGN, BIT, ENDIAN, -); \ if (info->z_scale != Z_ONE) \ v = Z_SCALE_##BIT(v, info->z_scale); \ else \ v >>= Z_COEFF_SHIFT - Z_GUARD_BIT_##BIT; \ Z_CLIP_CHECK(v, BIT); \ _PCM_WRITE_##SIGN##BIT##_##ENDIAN(dst, Z_CLAMP(v, BIT)); \ } while (ch != 0); \ } #define Z_DECLARE_SINC_POLYPHASE(SIGN, BIT, ENDIAN) \ static void \ z_feed_sinc_polyphase_##SIGN##BIT##ENDIAN(struct z_info *info, uint8_t *dst) \ { \ intpcm64_t v; \ intpcm_t x; \ uint8_t *p; \ int32_t ch, i, start, *z_pcoeff; \ \ ch = info->channels * PCM_##BIT##_BPS; \ dst += ch; \ start = z_prev(info, info->z_start, (info->z_size << 1) - 1) * ch; \ \ do { \ dst -= PCM_##BIT##_BPS; \ ch -= PCM_##BIT##_BPS; \ v = 0; \ p = info->z_delay + start + ch; \ z_pcoeff = info->z_pcoeff + \ ((info->z_alpha * info->z_size) << 1); \ for (i = info->z_size; i != 0; i--) { \ x = _PCM_READ_##SIGN##BIT##_##ENDIAN(p); \ v += Z_NORM_##BIT((intpcm64_t)x * *z_pcoeff); \ z_pcoeff++; \ p += info->channels * PCM_##BIT##_BPS; \ x = _PCM_READ_##SIGN##BIT##_##ENDIAN(p); \ v += Z_NORM_##BIT((intpcm64_t)x * *z_pcoeff); \ z_pcoeff++; \ p += info->channels * PCM_##BIT##_BPS; \ } \ if (info->z_scale != Z_ONE) \ v = Z_SCALE_##BIT(v, info->z_scale); \ else \ v >>= Z_COEFF_SHIFT - Z_GUARD_BIT_##BIT; \ Z_CLIP_CHECK(v, BIT); \ _PCM_WRITE_##SIGN##BIT##_##ENDIAN(dst, Z_CLAMP(v, BIT)); \ } while (ch != 0); \ } #define Z_DECLARE(SIGN, BIT, ENDIAN) \ Z_DECLARE_LINEAR(SIGN, BIT, ENDIAN) \ Z_DECLARE_SINC(SIGN, BIT, ENDIAN) \ Z_DECLARE_SINC_POLYPHASE(SIGN, BIT, ENDIAN) #if BYTE_ORDER == LITTLE_ENDIAN || defined(SND_FEEDER_MULTIFORMAT) Z_DECLARE(S, 16, LE) Z_DECLARE(S, 32, LE) #endif #if BYTE_ORDER == BIG_ENDIAN || defined(SND_FEEDER_MULTIFORMAT) Z_DECLARE(S, 16, BE) Z_DECLARE(S, 32, BE) #endif #ifdef SND_FEEDER_MULTIFORMAT Z_DECLARE(S, 8, NE) Z_DECLARE(S, 24, LE) Z_DECLARE(S, 24, BE) Z_DECLARE(U, 8, NE) Z_DECLARE(U, 16, LE) Z_DECLARE(U, 24, LE) Z_DECLARE(U, 32, LE) Z_DECLARE(U, 16, BE) Z_DECLARE(U, 24, BE) Z_DECLARE(U, 32, BE) #endif enum { Z_RESAMPLER_ZOH, Z_RESAMPLER_LINEAR, Z_RESAMPLER_SINC, Z_RESAMPLER_SINC_POLYPHASE, Z_RESAMPLER_LAST }; #define Z_RESAMPLER_IDX(i) \ (Z_IS_SINC(i) ? Z_RESAMPLER_SINC : (i)->quality) #define Z_RESAMPLER_ENTRY(SIGN, BIT, ENDIAN) \ { \ AFMT_##SIGN##BIT##_##ENDIAN, \ { \ [Z_RESAMPLER_ZOH] = z_feed_zoh, \ [Z_RESAMPLER_LINEAR] = z_feed_linear_##SIGN##BIT##ENDIAN, \ [Z_RESAMPLER_SINC] = z_feed_sinc_##SIGN##BIT##ENDIAN, \ [Z_RESAMPLER_SINC_POLYPHASE] = \ z_feed_sinc_polyphase_##SIGN##BIT##ENDIAN \ } \ } static const struct { uint32_t format; z_resampler_t resampler[Z_RESAMPLER_LAST]; } z_resampler_tab[] = { #if BYTE_ORDER == LITTLE_ENDIAN || defined(SND_FEEDER_MULTIFORMAT) Z_RESAMPLER_ENTRY(S, 16, LE), Z_RESAMPLER_ENTRY(S, 32, LE), #endif #if BYTE_ORDER == BIG_ENDIAN || defined(SND_FEEDER_MULTIFORMAT) Z_RESAMPLER_ENTRY(S, 16, BE), Z_RESAMPLER_ENTRY(S, 32, BE), #endif #ifdef SND_FEEDER_MULTIFORMAT Z_RESAMPLER_ENTRY(S, 8, NE), Z_RESAMPLER_ENTRY(S, 24, LE), Z_RESAMPLER_ENTRY(S, 24, BE), Z_RESAMPLER_ENTRY(U, 8, NE), Z_RESAMPLER_ENTRY(U, 16, LE), Z_RESAMPLER_ENTRY(U, 24, LE), Z_RESAMPLER_ENTRY(U, 32, LE), Z_RESAMPLER_ENTRY(U, 16, BE), Z_RESAMPLER_ENTRY(U, 24, BE), Z_RESAMPLER_ENTRY(U, 32, BE), #endif }; #define Z_RESAMPLER_TAB_SIZE \ ((int32_t)(sizeof(z_resampler_tab) / sizeof(z_resampler_tab[0]))) static void z_resampler_reset(struct z_info *info) { info->src = info->rsrc - (info->rsrc % ((feeder_rate_round > 0 && info->rsrc > feeder_rate_round) ? feeder_rate_round : 1)); info->dst = info->rdst - (info->rdst % ((feeder_rate_round > 0 && info->rdst > feeder_rate_round) ? feeder_rate_round : 1)); info->z_gx = 1; info->z_gy = 1; info->z_alpha = 0; info->z_resample = NULL; info->z_size = 1; info->z_coeff = NULL; info->z_dcoeff = NULL; if (info->z_pcoeff != NULL) { free(info->z_pcoeff, M_DEVBUF); info->z_pcoeff = NULL; } info->z_scale = Z_ONE; info->z_dx = Z_FULL_ONE; info->z_dy = Z_FULL_ONE; #ifdef Z_DIAGNOSTIC info->z_cycle = 0; #endif if (info->quality < Z_QUALITY_MIN) info->quality = Z_QUALITY_MIN; else if (info->quality > Z_QUALITY_MAX) info->quality = Z_QUALITY_MAX; } #ifdef Z_PARANOID static int32_t z_resampler_sinc_len(struct z_info *info) { int32_t c, z, len, lmax; if (!Z_IS_SINC(info)) return (1); /* * A rather careful (or useless) way to calculate filter length. * Z_SINC_LEN() itself is accurate enough to do its job. Extra * sanity checking is not going to hurt though.. */ c = 0; z = info->z_dy; len = 0; lmax = z_coeff_tab[Z_SINC_COEFF_IDX(info)].len; do { c += z >> Z_SHIFT; z &= Z_MASK; z += info->z_dy; } while (c < lmax && ++len > 0); if (len != Z_SINC_LEN(info)) { #ifdef _KERNEL printf("%s(): sinc l=%d != Z_SINC_LEN=%d\n", __func__, len, Z_SINC_LEN(info)); #else fprintf(stderr, "%s(): sinc l=%d != Z_SINC_LEN=%d\n", __func__, len, Z_SINC_LEN(info)); return (-1); #endif } return (len); } #else #define z_resampler_sinc_len(i) (Z_IS_SINC(i) ? Z_SINC_LEN(i) : 1) #endif #define Z_POLYPHASE_COEFF_SHIFT 0 /* * Pick suitable polynomial interpolators based on filter oversampled ratio * (2 ^ Z_DRIFT_SHIFT). */ #if !(defined(Z_COEFF_INTERP_ZOH) || defined(Z_COEFF_INTERP_LINEAR) || \ defined(Z_COEFF_INTERP_QUADRATIC) || defined(Z_COEFF_INTERP_HERMITE) || \ defined(Z_COEFF_INTER_BSPLINE) || defined(Z_COEFF_INTERP_OPT32X) || \ defined(Z_COEFF_INTERP_OPT16X) || defined(Z_COEFF_INTERP_OPT8X) || \ defined(Z_COEFF_INTERP_OPT4X) || defined(Z_COEFF_INTERP_OPT2X)) #if Z_DRIFT_SHIFT >= 6 #define Z_COEFF_INTERP_BSPLINE 1 #elif Z_DRIFT_SHIFT >= 5 #define Z_COEFF_INTERP_OPT32X 1 #elif Z_DRIFT_SHIFT == 4 #define Z_COEFF_INTERP_OPT16X 1 #elif Z_DRIFT_SHIFT == 3 #define Z_COEFF_INTERP_OPT8X 1 #elif Z_DRIFT_SHIFT == 2 #define Z_COEFF_INTERP_OPT4X 1 #elif Z_DRIFT_SHIFT == 1 #define Z_COEFF_INTERP_OPT2X 1 #else #error "Z_DRIFT_SHIFT screwed!" #endif #endif /* * In classic polyphase mode, the actual coefficients for each phases need to * be calculated based on default prototype filters. For highly oversampled * filter, linear or quadradatic interpolator should be enough. Anything less * than that require 'special' interpolators to reduce interpolation errors. * * "Polynomial Interpolators for High-Quality Resampling of Oversampled Audio" * by Olli Niemitalo * - http://www.student.oulu.fi/~oniemita/dsp/deip.pdf * */ static int32_t z_coeff_interpolate(int32_t z, int32_t *z_coeff) { int32_t coeff; #if defined(Z_COEFF_INTERP_ZOH) /* 1-point, 0th-order (Zero Order Hold) */ z = z; coeff = z_coeff[0]; #elif defined(Z_COEFF_INTERP_LINEAR) int32_t zl0, zl1; /* 2-point, 1st-order Linear */ zl0 = z_coeff[0]; zl1 = z_coeff[1] - z_coeff[0]; coeff = Z_RSHIFT((int64_t)zl1 * z, Z_SHIFT) + zl0; #elif defined(Z_COEFF_INTERP_QUADRATIC) int32_t zq0, zq1, zq2; /* 3-point, 2nd-order Quadratic */ zq0 = z_coeff[0]; zq1 = z_coeff[1] - z_coeff[-1]; zq2 = z_coeff[1] + z_coeff[-1] - (z_coeff[0] << 1); coeff = Z_RSHIFT((Z_RSHIFT((int64_t)zq2 * z, Z_SHIFT) + zq1) * z, Z_SHIFT + 1) + zq0; #elif defined(Z_COEFF_INTERP_HERMITE) int32_t zh0, zh1, zh2, zh3; /* 4-point, 3rd-order Hermite */ zh0 = z_coeff[0]; zh1 = z_coeff[1] - z_coeff[-1]; zh2 = (z_coeff[-1] << 1) - (z_coeff[0] * 5) + (z_coeff[1] << 2) - z_coeff[2]; zh3 = z_coeff[2] - z_coeff[-1] + ((z_coeff[0] - z_coeff[1]) * 3); coeff = Z_RSHIFT((Z_RSHIFT((Z_RSHIFT((int64_t)zh3 * z, Z_SHIFT) + zh2) * z, Z_SHIFT) + zh1) * z, Z_SHIFT + 1) + zh0; #elif defined(Z_COEFF_INTERP_BSPLINE) int32_t zb0, zb1, zb2, zb3; /* 4-point, 3rd-order B-Spline */ zb0 = Z_RSHIFT(0x15555555LL * (((int64_t)z_coeff[0] << 2) + z_coeff[-1] + z_coeff[1]), 30); zb1 = z_coeff[1] - z_coeff[-1]; zb2 = z_coeff[-1] + z_coeff[1] - (z_coeff[0] << 1); zb3 = Z_RSHIFT(0x15555555LL * (((z_coeff[0] - z_coeff[1]) * 3) + z_coeff[2] - z_coeff[-1]), 30); coeff = (Z_RSHIFT((Z_RSHIFT((Z_RSHIFT((int64_t)zb3 * z, Z_SHIFT) + zb2) * z, Z_SHIFT) + zb1) * z, Z_SHIFT) + zb0 + 1) >> 1; #elif defined(Z_COEFF_INTERP_OPT32X) int32_t zoz, zoe1, zoe2, zoe3, zoo1, zoo2, zoo3; int32_t zoc0, zoc1, zoc2, zoc3, zoc4, zoc5; /* 6-point, 5th-order Optimal 32x */ zoz = z - (Z_ONE >> 1); zoe1 = z_coeff[1] + z_coeff[0]; zoe2 = z_coeff[2] + z_coeff[-1]; zoe3 = z_coeff[3] + z_coeff[-2]; zoo1 = z_coeff[1] - z_coeff[0]; zoo2 = z_coeff[2] - z_coeff[-1]; zoo3 = z_coeff[3] - z_coeff[-2]; zoc0 = Z_RSHIFT((0x1ac2260dLL * zoe1) + (0x0526cdcaLL * zoe2) + (0x00170c29LL * zoe3), 30); zoc1 = Z_RSHIFT((0x14f8a49aLL * zoo1) + (0x0d6d1109LL * zoo2) + (0x008cd4dcLL * zoo3), 30); zoc2 = Z_RSHIFT((-0x0d3e94a4LL * zoe1) + (0x0bddded4LL * zoe2) + (0x0160b5d0LL * zoe3), 30); zoc3 = Z_RSHIFT((-0x0de10cc4LL * zoo1) + (0x019b2a7dLL * zoo2) + (0x01cfe914LL * zoo3), 30); zoc4 = Z_RSHIFT((0x02aa12d7LL * zoe1) + (-0x03ff1bb3LL * zoe2) + (0x015508ddLL * zoe3), 30); zoc5 = Z_RSHIFT((0x051d29e5LL * zoo1) + (-0x028e7647LL * zoo2) + (0x0082d81aLL * zoo3), 30); coeff = Z_RSHIFT((Z_RSHIFT((Z_RSHIFT((Z_RSHIFT((Z_RSHIFT( (int64_t)zoc5 * zoz, Z_SHIFT) + zoc4) * zoz, Z_SHIFT) + zoc3) * zoz, Z_SHIFT) + zoc2) * zoz, Z_SHIFT) + zoc1) * zoz, Z_SHIFT) + zoc0; #elif defined(Z_COEFF_INTERP_OPT16X) int32_t zoz, zoe1, zoe2, zoe3, zoo1, zoo2, zoo3; int32_t zoc0, zoc1, zoc2, zoc3, zoc4, zoc5; /* 6-point, 5th-order Optimal 16x */ zoz = z - (Z_ONE >> 1); zoe1 = z_coeff[1] + z_coeff[0]; zoe2 = z_coeff[2] + z_coeff[-1]; zoe3 = z_coeff[3] + z_coeff[-2]; zoo1 = z_coeff[1] - z_coeff[0]; zoo2 = z_coeff[2] - z_coeff[-1]; zoo3 = z_coeff[3] - z_coeff[-2]; zoc0 = Z_RSHIFT((0x1ac2260dLL * zoe1) + (0x0526cdcaLL * zoe2) + (0x00170c29LL * zoe3), 30); zoc1 = Z_RSHIFT((0x14f8a49aLL * zoo1) + (0x0d6d1109LL * zoo2) + (0x008cd4dcLL * zoo3), 30); zoc2 = Z_RSHIFT((-0x0d3e94a4LL * zoe1) + (0x0bddded4LL * zoe2) + (0x0160b5d0LL * zoe3), 30); zoc3 = Z_RSHIFT((-0x0de10cc4LL * zoo1) + (0x019b2a7dLL * zoo2) + (0x01cfe914LL * zoo3), 30); zoc4 = Z_RSHIFT((0x02aa12d7LL * zoe1) + (-0x03ff1bb3LL * zoe2) + (0x015508ddLL * zoe3), 30); zoc5 = Z_RSHIFT((0x051d29e5LL * zoo1) + (-0x028e7647LL * zoo2) + (0x0082d81aLL * zoo3), 30); coeff = Z_RSHIFT((Z_RSHIFT((Z_RSHIFT((Z_RSHIFT((Z_RSHIFT( (int64_t)zoc5 * zoz, Z_SHIFT) + zoc4) * zoz, Z_SHIFT) + zoc3) * zoz, Z_SHIFT) + zoc2) * zoz, Z_SHIFT) + zoc1) * zoz, Z_SHIFT) + zoc0; #elif defined(Z_COEFF_INTERP_OPT8X) int32_t zoz, zoe1, zoe2, zoe3, zoo1, zoo2, zoo3; int32_t zoc0, zoc1, zoc2, zoc3, zoc4, zoc5; /* 6-point, 5th-order Optimal 8x */ zoz = z - (Z_ONE >> 1); zoe1 = z_coeff[1] + z_coeff[0]; zoe2 = z_coeff[2] + z_coeff[-1]; zoe3 = z_coeff[3] + z_coeff[-2]; zoo1 = z_coeff[1] - z_coeff[0]; zoo2 = z_coeff[2] - z_coeff[-1]; zoo3 = z_coeff[3] - z_coeff[-2]; zoc0 = Z_RSHIFT((0x1aa9b47dLL * zoe1) + (0x053d9944LL * zoe2) + (0x0018b23fLL * zoe3), 30); zoc1 = Z_RSHIFT((0x14a104d1LL * zoo1) + (0x0d7d2504LL * zoo2) + (0x0094b599LL * zoo3), 30); zoc2 = Z_RSHIFT((-0x0d22530bLL * zoe1) + (0x0bb37a2cLL * zoe2) + (0x016ed8e0LL * zoe3), 30); zoc3 = Z_RSHIFT((-0x0d744b1cLL * zoo1) + (0x01649591LL * zoo2) + (0x01dae93aLL * zoo3), 30); zoc4 = Z_RSHIFT((0x02a7ee1bLL * zoe1) + (-0x03fbdb24LL * zoe2) + (0x0153ed07LL * zoe3), 30); zoc5 = Z_RSHIFT((0x04cf9b6cLL * zoo1) + (-0x0266b378LL * zoo2) + (0x007a7c26LL * zoo3), 30); coeff = Z_RSHIFT((Z_RSHIFT((Z_RSHIFT((Z_RSHIFT((Z_RSHIFT( (int64_t)zoc5 * zoz, Z_SHIFT) + zoc4) * zoz, Z_SHIFT) + zoc3) * zoz, Z_SHIFT) + zoc2) * zoz, Z_SHIFT) + zoc1) * zoz, Z_SHIFT) + zoc0; #elif defined(Z_COEFF_INTERP_OPT4X) int32_t zoz, zoe1, zoe2, zoe3, zoo1, zoo2, zoo3; int32_t zoc0, zoc1, zoc2, zoc3, zoc4, zoc5; /* 6-point, 5th-order Optimal 4x */ zoz = z - (Z_ONE >> 1); zoe1 = z_coeff[1] + z_coeff[0]; zoe2 = z_coeff[2] + z_coeff[-1]; zoe3 = z_coeff[3] + z_coeff[-2]; zoo1 = z_coeff[1] - z_coeff[0]; zoo2 = z_coeff[2] - z_coeff[-1]; zoo3 = z_coeff[3] - z_coeff[-2]; zoc0 = Z_RSHIFT((0x1a8eda43LL * zoe1) + (0x0556ee38LL * zoe2) + (0x001a3784LL * zoe3), 30); zoc1 = Z_RSHIFT((0x143d863eLL * zoo1) + (0x0d910e36LL * zoo2) + (0x009ca889LL * zoo3), 30); zoc2 = Z_RSHIFT((-0x0d026821LL * zoe1) + (0x0b837773LL * zoe2) + (0x017ef0c6LL * zoe3), 30); zoc3 = Z_RSHIFT((-0x0cef1502LL * zoo1) + (0x01207a8eLL * zoo2) + (0x01e936dbLL * zoo3), 30); zoc4 = Z_RSHIFT((0x029fe643LL * zoe1) + (-0x03ef3fc8LL * zoe2) + (0x014f5923LL * zoe3), 30); zoc5 = Z_RSHIFT((0x043a9d08LL * zoo1) + (-0x02154febLL * zoo2) + (0x00670dbdLL * zoo3), 30); coeff = Z_RSHIFT((Z_RSHIFT((Z_RSHIFT((Z_RSHIFT((Z_RSHIFT( (int64_t)zoc5 * zoz, Z_SHIFT) + zoc4) * zoz, Z_SHIFT) + zoc3) * zoz, Z_SHIFT) + zoc2) * zoz, Z_SHIFT) + zoc1) * zoz, Z_SHIFT) + zoc0; #elif defined(Z_COEFF_INTERP_OPT2X) int32_t zoz, zoe1, zoe2, zoe3, zoo1, zoo2, zoo3; int32_t zoc0, zoc1, zoc2, zoc3, zoc4, zoc5; /* 6-point, 5th-order Optimal 2x */ zoz = z - (Z_ONE >> 1); zoe1 = z_coeff[1] + z_coeff[0]; zoe2 = z_coeff[2] + z_coeff[-1]; zoe3 = z_coeff[3] + z_coeff[-2]; zoo1 = z_coeff[1] - z_coeff[0]; zoo2 = z_coeff[2] - z_coeff[-1]; zoo3 = z_coeff[3] - z_coeff[-2]; zoc0 = Z_RSHIFT((0x19edb6fdLL * zoe1) + (0x05ebd062LL * zoe2) + (0x00267881LL * zoe3), 30); zoc1 = Z_RSHIFT((0x1223af76LL * zoo1) + (0x0de3dd6bLL * zoo2) + (0x00d683cdLL * zoo3), 30); zoc2 = Z_RSHIFT((-0x0c3ee068LL * zoe1) + (0x0a5c3769LL * zoe2) + (0x01e2aceaLL * zoe3), 30); zoc3 = Z_RSHIFT((-0x0a8ab614LL * zoo1) + (-0x0019522eLL * zoo2) + (0x022cefc7LL * zoo3), 30); zoc4 = Z_RSHIFT((0x0276187dLL * zoe1) + (-0x03a801e8LL * zoe2) + (0x0131d935LL * zoe3), 30); zoc5 = Z_RSHIFT((0x02c373f5LL * zoo1) + (-0x01275f83LL * zoo2) + (0x0018ee79LL * zoo3), 30); coeff = Z_RSHIFT((Z_RSHIFT((Z_RSHIFT((Z_RSHIFT((Z_RSHIFT( (int64_t)zoc5 * zoz, Z_SHIFT) + zoc4) * zoz, Z_SHIFT) + zoc3) * zoz, Z_SHIFT) + zoc2) * zoz, Z_SHIFT) + zoc1) * zoz, Z_SHIFT) + zoc0; #else #error "Interpolation type screwed!" #endif #if Z_POLYPHASE_COEFF_SHIFT > 0 coeff = Z_RSHIFT(coeff, Z_POLYPHASE_COEFF_SHIFT); #endif return (coeff); } static int z_resampler_build_polyphase(struct z_info *info) { int32_t alpha, c, i, z, idx; /* Let this be here first. */ if (info->z_pcoeff != NULL) { free(info->z_pcoeff, M_DEVBUF); info->z_pcoeff = NULL; } if (feeder_rate_polyphase_max < 1) return (ENOTSUP); if (((int64_t)info->z_size * info->z_gy * 2) > feeder_rate_polyphase_max) { #ifndef _KERNEL fprintf(stderr, "Polyphase entries exceed: [%d/%d] %jd > %d\n", info->z_gx, info->z_gy, (intmax_t)info->z_size * info->z_gy * 2, feeder_rate_polyphase_max); #endif return (E2BIG); } info->z_pcoeff = malloc(sizeof(int32_t) * info->z_size * info->z_gy * 2, M_DEVBUF, M_NOWAIT | M_ZERO); if (info->z_pcoeff == NULL) return (ENOMEM); for (alpha = 0; alpha < info->z_gy; alpha++) { z = alpha * info->z_dx; c = 0; for (i = info->z_size; i != 0; i--) { c += z >> Z_SHIFT; z &= Z_MASK; idx = (alpha * info->z_size * 2) + (info->z_size * 2) - i; info->z_pcoeff[idx] = z_coeff_interpolate(z, info->z_coeff + c); z += info->z_dy; } z = info->z_dy - (alpha * info->z_dx); c = 0; for (i = info->z_size; i != 0; i--) { c += z >> Z_SHIFT; z &= Z_MASK; idx = (alpha * info->z_size * 2) + i - 1; info->z_pcoeff[idx] = z_coeff_interpolate(z, info->z_coeff + c); z += info->z_dy; } } #ifndef _KERNEL fprintf(stderr, "Polyphase: [%d/%d] %d entries\n", info->z_gx, info->z_gy, info->z_size * info->z_gy * 2); #endif return (0); } static int z_resampler_setup(struct pcm_feeder *f) { struct z_info *info; int64_t gy2gx_max, gx2gy_max; uint32_t format; int32_t align, i, z_scale; int adaptive; info = f->data; z_resampler_reset(info); if (info->src == info->dst) return (0); /* Shrink by greatest common divisor. */ i = z_gcd(info->src, info->dst); info->z_gx = info->src / i; info->z_gy = info->dst / i; /* Too big, or too small. Bail out. */ if (!(Z_FACTOR_SAFE(info->z_gx) && Z_FACTOR_SAFE(info->z_gy))) return (EINVAL); format = f->desc->in; adaptive = 0; z_scale = 0; /* * Setup everything: filter length, conversion factor, etc. */ if (Z_IS_SINC(info)) { /* * Downsampling, or upsampling scaling factor. As long as the * factor can be represented by a fraction of 1 << Z_SHIFT, * we're pretty much in business. Scaling is not needed for * upsampling, so we just slap Z_ONE there. */ if (info->z_gx > info->z_gy) /* * If the downsampling ratio is beyond sanity, * enable semi-adaptive mode. Although handling * extreme ratio is possible, the result of the * conversion is just pointless, unworthy, * nonsensical noises, etc. */ if ((info->z_gx / info->z_gy) > Z_SINC_DOWNMAX) z_scale = Z_ONE / Z_SINC_DOWNMAX; else z_scale = ((uint64_t)info->z_gy << Z_SHIFT) / info->z_gx; else z_scale = Z_ONE; /* * This is actually impossible, unless anything above * overflow. */ if (z_scale < 1) return (E2BIG); /* * Calculate sample time/coefficients index drift. It is * a constant for upsampling, but downsampling require * heavy duty filtering with possible too long filters. * If anything goes wrong, revisit again and enable * adaptive mode. */ z_setup_adaptive_sinc: if (info->z_pcoeff != NULL) { free(info->z_pcoeff, M_DEVBUF); info->z_pcoeff = NULL; } if (adaptive == 0) { info->z_dy = z_scale << Z_DRIFT_SHIFT; if (info->z_dy < 1) return (E2BIG); info->z_scale = z_scale; } else { info->z_dy = Z_FULL_ONE; info->z_scale = Z_ONE; } #if 0 #define Z_SCALE_DIV 10000 #define Z_SCALE_LIMIT(s, v) \ ((((uint64_t)(s) * (v)) + (Z_SCALE_DIV >> 1)) / Z_SCALE_DIV) info->z_scale = Z_SCALE_LIMIT(info->z_scale, 9780); #endif /* Smallest drift increment. */ info->z_dx = info->z_dy / info->z_gy; /* * Overflow or underflow. Try adaptive, let it continue and * retry. */ if (info->z_dx < 1) { if (adaptive == 0) { adaptive = 1; goto z_setup_adaptive_sinc; } return (E2BIG); } /* * Round back output drift. */ info->z_dy = info->z_dx * info->z_gy; for (i = 0; i < Z_COEFF_TAB_SIZE; i++) { if (Z_SINC_COEFF_IDX(info) != i) continue; /* * Calculate required filter length and guard * against possible abusive result. Note that * this represents only 1/2 of the entire filter * length. */ info->z_size = z_resampler_sinc_len(info); /* * Multiple of 2 rounding, for better accumulator * performance. */ info->z_size &= ~1; if (info->z_size < 2 || info->z_size > Z_SINC_MAX) { if (adaptive == 0) { adaptive = 1; goto z_setup_adaptive_sinc; } return (E2BIG); } info->z_coeff = z_coeff_tab[i].coeff + Z_COEFF_OFFSET; info->z_dcoeff = z_coeff_tab[i].dcoeff; break; } if (info->z_coeff == NULL || info->z_dcoeff == NULL) return (EINVAL); } else if (Z_IS_LINEAR(info)) { /* * Don't put much effort if we're doing linear interpolation. * Just center the interpolation distance within Z_LINEAR_ONE, * and be happy about it. */ info->z_dx = Z_LINEAR_FULL_ONE / info->z_gy; } /* * We're safe for now, lets continue.. Look for our resampler * depending on configured format and quality. */ for (i = 0; i < Z_RESAMPLER_TAB_SIZE; i++) { int ridx; if (AFMT_ENCODING(format) != z_resampler_tab[i].format) continue; if (Z_IS_SINC(info) && adaptive == 0 && z_resampler_build_polyphase(info) == 0) ridx = Z_RESAMPLER_SINC_POLYPHASE; else ridx = Z_RESAMPLER_IDX(info); info->z_resample = z_resampler_tab[i].resampler[ridx]; break; } if (info->z_resample == NULL) return (EINVAL); info->bps = AFMT_BPS(format); align = info->channels * info->bps; /* * Calculate largest value that can be fed into z_gy2gx() and * z_gx2gy() without causing (signed) 32bit overflow. z_gy2gx() will * be called early during feeding process to determine how much input * samples that is required to generate requested output, while * z_gx2gy() will be called just before samples filtering / * accumulation process based on available samples that has been * calculated using z_gx2gy(). * * Now that is damn confusing, I guess ;-) . */ gy2gx_max = (((uint64_t)info->z_gy * INT32_MAX) - info->z_gy + 1) / info->z_gx; if ((gy2gx_max * align) > SND_FXDIV_MAX) gy2gx_max = SND_FXDIV_MAX / align; if (gy2gx_max < 1) return (E2BIG); gx2gy_max = (((uint64_t)info->z_gx * INT32_MAX) - info->z_gy) / info->z_gy; if (gx2gy_max > INT32_MAX) gx2gy_max = INT32_MAX; if (gx2gy_max < 1) return (E2BIG); /* * Ensure that z_gy2gx() at its largest possible calculated value * (alpha = 0) will not cause overflow further late during z_gx2gy() * stage. */ if (z_gy2gx(info, gy2gx_max) > _Z_GCAST(gx2gy_max)) return (E2BIG); info->z_maxfeed = gy2gx_max * align; #ifdef Z_USE_ALPHADRIFT info->z_startdrift = z_gy2gx(info, 1); info->z_alphadrift = z_drift(info, info->z_startdrift, 1); #endif i = z_gy2gx(info, 1); info->z_full = z_roundpow2((info->z_size << 1) + i); /* * Too big to be true, and overflowing left and right like mad .. */ if ((info->z_full * align) < 1) { if (adaptive == 0 && Z_IS_SINC(info)) { adaptive = 1; goto z_setup_adaptive_sinc; } return (E2BIG); } /* * Increase full buffer size if its too small to reduce cyclic * buffer shifting in main conversion/feeder loop. */ while (info->z_full < Z_RESERVOIR_MAX && (info->z_full - (info->z_size << 1)) < Z_RESERVOIR) info->z_full <<= 1; /* Initialize buffer position. */ info->z_mask = info->z_full - 1; info->z_start = z_prev(info, info->z_size << 1, 1); info->z_pos = z_next(info, info->z_start, 1); /* * Allocate or reuse delay line buffer, whichever makes sense. */ i = info->z_full * align; if (i < 1) return (E2BIG); if (info->z_delay == NULL || info->z_alloc < i || i <= (info->z_alloc >> 1)) { if (info->z_delay != NULL) free(info->z_delay, M_DEVBUF); info->z_delay = malloc(i, M_DEVBUF, M_NOWAIT | M_ZERO); if (info->z_delay == NULL) return (ENOMEM); info->z_alloc = i; } /* * Zero out head of buffer to avoid pops and clicks. */ memset(info->z_delay, sndbuf_zerodata(f->desc->out), info->z_pos * align); #ifdef Z_DIAGNOSTIC /* * XXX Debuging mess !@#$%^ */ #define dumpz(x) fprintf(stderr, "\t%12s = %10u : %-11d\n", \ "z_"__STRING(x), (uint32_t)info->z_##x, \ (int32_t)info->z_##x) fprintf(stderr, "\n%s():\n", __func__); fprintf(stderr, "\tchannels=%d, bps=%d, format=0x%08x, quality=%d\n", info->channels, info->bps, format, info->quality); fprintf(stderr, "\t%d (%d) -> %d (%d), ", info->src, info->rsrc, info->dst, info->rdst); fprintf(stderr, "[%d/%d]\n", info->z_gx, info->z_gy); fprintf(stderr, "\tminreq=%d, ", z_gy2gx(info, 1)); if (adaptive != 0) z_scale = Z_ONE; fprintf(stderr, "factor=0x%08x/0x%08x (%f)\n", z_scale, Z_ONE, (double)z_scale / Z_ONE); fprintf(stderr, "\tbase_length=%d, ", Z_SINC_BASE_LEN(info)); fprintf(stderr, "adaptive=%s\n", (adaptive != 0) ? "YES" : "NO"); dumpz(size); dumpz(alloc); if (info->z_alloc < 1024) fprintf(stderr, "\t%15s%10d Bytes\n", "", info->z_alloc); else if (info->z_alloc < (1024 << 10)) fprintf(stderr, "\t%15s%10d KBytes\n", "", info->z_alloc >> 10); else if (info->z_alloc < (1024 << 20)) fprintf(stderr, "\t%15s%10d MBytes\n", "", info->z_alloc >> 20); else fprintf(stderr, "\t%15s%10d GBytes\n", "", info->z_alloc >> 30); fprintf(stderr, "\t%12s %10d (min output samples)\n", "", (int32_t)z_gx2gy(info, info->z_full - (info->z_size << 1))); fprintf(stderr, "\t%12s %10d (min allocated output samples)\n", "", (int32_t)z_gx2gy(info, (info->z_alloc / align) - (info->z_size << 1))); fprintf(stderr, "\t%12s = %10d\n", "z_gy2gx()", (int32_t)z_gy2gx(info, 1)); fprintf(stderr, "\t%12s = %10d -> z_gy2gx() -> %d\n", "Max", (int32_t)gy2gx_max, (int32_t)z_gy2gx(info, gy2gx_max)); fprintf(stderr, "\t%12s = %10d\n", "z_gx2gy()", (int32_t)z_gx2gy(info, 1)); fprintf(stderr, "\t%12s = %10d -> z_gx2gy() -> %d\n", "Max", (int32_t)gx2gy_max, (int32_t)z_gx2gy(info, gx2gy_max)); dumpz(maxfeed); dumpz(full); dumpz(start); dumpz(pos); dumpz(scale); fprintf(stderr, "\t%12s %10f\n", "", (double)info->z_scale / Z_ONE); dumpz(dx); fprintf(stderr, "\t%12s %10f\n", "", (double)info->z_dx / info->z_dy); dumpz(dy); fprintf(stderr, "\t%12s %10d (drift step)\n", "", info->z_dy >> Z_SHIFT); fprintf(stderr, "\t%12s %10d (scaling differences)\n", "", (z_scale << Z_DRIFT_SHIFT) - info->z_dy); fprintf(stderr, "\t%12s = %u bytes\n", "intpcm32_t", sizeof(intpcm32_t)); fprintf(stderr, "\t%12s = 0x%08x, smallest=%.16lf\n", "Z_ONE", Z_ONE, (double)1.0 / (double)Z_ONE); #endif return (0); } static int z_resampler_set(struct pcm_feeder *f, int what, int32_t value) { struct z_info *info; int32_t oquality; info = f->data; switch (what) { case Z_RATE_SRC: if (value < feeder_rate_min || value > feeder_rate_max) return (E2BIG); if (value == info->rsrc) return (0); info->rsrc = value; break; case Z_RATE_DST: if (value < feeder_rate_min || value > feeder_rate_max) return (E2BIG); if (value == info->rdst) return (0); info->rdst = value; break; case Z_RATE_QUALITY: if (value < Z_QUALITY_MIN || value > Z_QUALITY_MAX) return (EINVAL); if (value == info->quality) return (0); /* * If we failed to set the requested quality, restore * the old one. We cannot afford leaving it broken since * passive feeder chains like vchans never reinitialize * itself. */ oquality = info->quality; info->quality = value; if (z_resampler_setup(f) == 0) return (0); info->quality = oquality; break; case Z_RATE_CHANNELS: if (value < SND_CHN_MIN || value > SND_CHN_MAX) return (EINVAL); if (value == info->channels) return (0); info->channels = value; break; default: return (EINVAL); break; } return (z_resampler_setup(f)); } static int z_resampler_get(struct pcm_feeder *f, int what) { struct z_info *info; info = f->data; switch (what) { case Z_RATE_SRC: return (info->rsrc); break; case Z_RATE_DST: return (info->rdst); break; case Z_RATE_QUALITY: return (info->quality); break; case Z_RATE_CHANNELS: return (info->channels); break; default: break; } return (-1); } static int z_resampler_init(struct pcm_feeder *f) { struct z_info *info; int ret; if (f->desc->in != f->desc->out) return (EINVAL); info = malloc(sizeof(*info), M_DEVBUF, M_NOWAIT | M_ZERO); if (info == NULL) return (ENOMEM); info->rsrc = Z_RATE_DEFAULT; info->rdst = Z_RATE_DEFAULT; info->quality = feeder_rate_quality; info->channels = AFMT_CHANNEL(f->desc->in); f->data = info; ret = z_resampler_setup(f); if (ret != 0) { if (info->z_pcoeff != NULL) free(info->z_pcoeff, M_DEVBUF); if (info->z_delay != NULL) free(info->z_delay, M_DEVBUF); free(info, M_DEVBUF); f->data = NULL; } return (ret); } static int z_resampler_free(struct pcm_feeder *f) { struct z_info *info; info = f->data; if (info != NULL) { if (info->z_pcoeff != NULL) free(info->z_pcoeff, M_DEVBUF); if (info->z_delay != NULL) free(info->z_delay, M_DEVBUF); free(info, M_DEVBUF); } f->data = NULL; return (0); } static uint32_t z_resampler_feed_internal(struct pcm_feeder *f, struct pcm_channel *c, uint8_t *b, uint32_t count, void *source) { struct z_info *info; int32_t alphadrift, startdrift, reqout, ocount, reqin, align; int32_t fetch, fetched, start, cp; uint8_t *dst; info = f->data; if (info->z_resample == NULL) return (z_feed(f->source, c, b, count, source)); /* * Calculate sample size alignment and amount of sample output. * We will do everything in sample domain, but at the end we * will jump back to byte domain. */ align = info->channels * info->bps; ocount = SND_FXDIV(count, align); if (ocount == 0) return (0); /* * Calculate amount of input samples that is needed to generate * exact amount of output. */ reqin = z_gy2gx(info, ocount) - z_fetched(info); #ifdef Z_USE_ALPHADRIFT startdrift = info->z_startdrift; alphadrift = info->z_alphadrift; #else startdrift = _Z_GY2GX(info, 0, 1); alphadrift = z_drift(info, startdrift, 1); #endif dst = b; do { if (reqin != 0) { fetch = z_min(z_free(info), reqin); if (fetch == 0) { /* * No more free spaces, so wind enough * samples back to the head of delay line * in byte domain. */ fetched = z_fetched(info); start = z_prev(info, info->z_start, (info->z_size << 1) - 1); cp = (info->z_size << 1) + fetched; z_copy(info->z_delay + (start * align), info->z_delay, cp * align); info->z_start = z_prev(info, info->z_size << 1, 1); info->z_pos = z_next(info, info->z_start, fetched + 1); fetch = z_min(z_free(info), reqin); #ifdef Z_DIAGNOSTIC if (1) { static uint32_t kk = 0; fprintf(stderr, "Buffer Move: " "start=%d fetched=%d cp=%d " "cycle=%u [%u]\r", start, fetched, cp, info->z_cycle, ++kk); } info->z_cycle = 0; #endif } if (fetch != 0) { /* * Fetch in byte domain and jump back * to sample domain. */ fetched = SND_FXDIV(z_feed(f->source, c, info->z_delay + (info->z_pos * align), fetch * align, source), align); /* * Prepare to convert fetched buffer, * or mark us done if we cannot fulfill * the request. */ reqin -= fetched; info->z_pos += fetched; if (fetched != fetch) reqin = 0; } } reqout = z_min(z_gx2gy(info, z_fetched(info)), ocount); if (reqout != 0) { ocount -= reqout; /* * Drift.. drift.. drift.. * * Notice that there are 2 methods of doing the drift * operations: The former is much cleaner (in a sense * of mathematical readings of my eyes), but slower * due to integer division in z_gy2gx(). Nevertheless, * both should give the same exact accurate drifting * results, so the later is favourable. */ do { info->z_resample(info, dst); #if 0 startdrift = z_gy2gx(info, 1); alphadrift = z_drift(info, startdrift, 1); info->z_start += startdrift; info->z_alpha += alphadrift; #else info->z_alpha += alphadrift; if (info->z_alpha < info->z_gy) info->z_start += startdrift; else { info->z_start += startdrift - 1; info->z_alpha -= info->z_gy; } #endif dst += align; #ifdef Z_DIAGNOSTIC info->z_cycle++; #endif } while (--reqout != 0); } } while (reqin != 0 && ocount != 0); /* * Back to byte domain.. */ return (dst - b); } static int z_resampler_feed(struct pcm_feeder *f, struct pcm_channel *c, uint8_t *b, uint32_t count, void *source) { uint32_t feed, maxfeed, left; /* * Split count to smaller chunks to avoid possible 32bit overflow. */ maxfeed = ((struct z_info *)(f->data))->z_maxfeed; left = count; do { feed = z_resampler_feed_internal(f, c, b, z_min(maxfeed, left), source); b += feed; left -= feed; } while (left != 0 && feed != 0); return (count - left); } static struct pcm_feederdesc feeder_rate_desc[] = { { FEEDER_RATE, 0, 0, 0, 0 }, { 0, 0, 0, 0, 0 }, }; static kobj_method_t feeder_rate_methods[] = { KOBJMETHOD(feeder_init, z_resampler_init), KOBJMETHOD(feeder_free, z_resampler_free), KOBJMETHOD(feeder_set, z_resampler_set), KOBJMETHOD(feeder_get, z_resampler_get), KOBJMETHOD(feeder_feed, z_resampler_feed), KOBJMETHOD_END }; FEEDER_DECLARE(feeder_rate, NULL); diff --git a/sys/i386/i386/in_cksum_machdep.c b/sys/i386/i386/in_cksum_machdep.c index ba7cd170286b..49255a3c99ec 100644 --- a/sys/i386/i386/in_cksum_machdep.c +++ b/sys/i386/i386/in_cksum_machdep.c @@ -1,493 +1,301 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1990 The Regents of the University of California. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from tahoe: in_cksum.c 1.2 86/01/05 * from: @(#)in_cksum.c 1.3 (Berkeley) 1/19/91 */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include /* * Checksum routine for Internet Protocol family headers. * * This routine is very heavily used in the network * code and should be modified for each CPU to be as fast as possible. * * This implementation is 386 version. */ #undef ADDCARRY #define ADDCARRY(x) if ((x) > 0xffff) (x) -= 0xffff -/* - * icc needs to be special cased here, as the asm code below results - * in broken code if compiled with icc. - */ -#if !defined(__GNUCLIKE_ASM) -/* non gcc parts stolen from sys/alpha/alpha/in_cksum.c */ -#define REDUCE32 \ - { \ - q_util.q = sum; \ - sum = q_util.s[0] + q_util.s[1] + q_util.s[2] + q_util.s[3]; \ - } -#define REDUCE16 \ - { \ - q_util.q = sum; \ - l_util.l = q_util.s[0] + q_util.s[1] + q_util.s[2] + q_util.s[3]; \ - sum = l_util.s[0] + l_util.s[1]; \ - ADDCARRY(sum); \ - } -#endif #define REDUCE {sum = (sum & 0xffff) + (sum >> 16); ADDCARRY(sum);} -#if !defined(__GNUCLIKE_ASM) -static const u_int32_t in_masks[] = { - /*0 bytes*/ /*1 byte*/ /*2 bytes*/ /*3 bytes*/ - 0x00000000, 0x000000FF, 0x0000FFFF, 0x00FFFFFF, /* offset 0 */ - 0x00000000, 0x0000FF00, 0x00FFFF00, 0xFFFFFF00, /* offset 1 */ - 0x00000000, 0x00FF0000, 0xFFFF0000, 0xFFFF0000, /* offset 2 */ - 0x00000000, 0xFF000000, 0xFF000000, 0xFF000000, /* offset 3 */ -}; - -union l_util { - u_int16_t s[2]; - u_int32_t l; -}; -union q_util { - u_int16_t s[4]; - u_int32_t l[2]; - u_int64_t q; -}; - -static u_int64_t -in_cksumdata(const u_int32_t *lw, int len) -{ - u_int64_t sum = 0; - u_int64_t prefilled; - int offset; - union q_util q_util; - - if ((3 & (long) lw) == 0 && len == 20) { - sum = (u_int64_t) lw[0] + lw[1] + lw[2] + lw[3] + lw[4]; - REDUCE32; - return sum; - } - - if ((offset = 3 & (long) lw) != 0) { - const u_int32_t *masks = in_masks + (offset << 2); - lw = (u_int32_t *) (((long) lw) - offset); - sum = *lw++ & masks[len >= 3 ? 3 : len]; - len -= 4 - offset; - if (len <= 0) { - REDUCE32; - return sum; - } - } -#if 0 - /* - * Force to cache line boundary. - */ - offset = 32 - (0x1f & (long) lw); - if (offset < 32 && len > offset) { - len -= offset; - if (4 & offset) { - sum += (u_int64_t) lw[0]; - lw += 1; - } - if (8 & offset) { - sum += (u_int64_t) lw[0] + lw[1]; - lw += 2; - } - if (16 & offset) { - sum += (u_int64_t) lw[0] + lw[1] + lw[2] + lw[3]; - lw += 4; - } - } -#endif - /* - * access prefilling to start load of next cache line. - * then add current cache line - * save result of prefilling for loop iteration. - */ - prefilled = lw[0]; - while ((len -= 32) >= 4) { - u_int64_t prefilling = lw[8]; - sum += prefilled + lw[1] + lw[2] + lw[3] - + lw[4] + lw[5] + lw[6] + lw[7]; - lw += 8; - prefilled = prefilling; - } - if (len >= 0) { - sum += prefilled + lw[1] + lw[2] + lw[3] - + lw[4] + lw[5] + lw[6] + lw[7]; - lw += 8; - } else { - len += 32; - } - while ((len -= 16) >= 0) { - sum += (u_int64_t) lw[0] + lw[1] + lw[2] + lw[3]; - lw += 4; - } - len += 16; - while ((len -= 4) >= 0) { - sum += (u_int64_t) *lw++; - } - len += 4; - if (len > 0) - sum += (u_int64_t) (in_masks[len] & *lw); - REDUCE32; - return sum; -} - -u_short -in_addword(u_short a, u_short b) -{ - u_int64_t sum = a + b; - - ADDCARRY(sum); - return (sum); -} - -u_short -in_pseudo(u_int32_t a, u_int32_t b, u_int32_t c) -{ - u_int64_t sum; - union q_util q_util; - union l_util l_util; - - sum = (u_int64_t) a + b + c; - REDUCE16; - return (sum); -} - -u_short -in_cksum_skip(struct mbuf *m, int len, int skip) -{ - u_int64_t sum = 0; - int mlen = 0; - int clen = 0; - caddr_t addr; - union q_util q_util; - union l_util l_util; - - len -= skip; - for (; skip && m; m = m->m_next) { - if (m->m_len > skip) { - mlen = m->m_len - skip; - addr = mtod(m, caddr_t) + skip; - goto skip_start; - } else { - skip -= m->m_len; - } - } - - for (; m && len; m = m->m_next) { - if (m->m_len == 0) - continue; - mlen = m->m_len; - addr = mtod(m, caddr_t); -skip_start: - if (len < mlen) - mlen = len; - if ((clen ^ (long) addr) & 1) - sum += in_cksumdata((const u_int32_t *)addr, mlen) << 8; - else - sum += in_cksumdata((const u_int32_t *)addr, mlen); - - clen += mlen; - len -= mlen; - } - REDUCE16; - return (~sum & 0xffff); -} - -u_int in_cksum_hdr(const struct ip *ip) -{ - u_int64_t sum = in_cksumdata((const u_int32_t *)ip, sizeof(struct ip)); - union q_util q_util; - union l_util l_util; - - REDUCE16; - return (~sum & 0xffff); -} -#else - /* * These asm statements require __volatile because they pass information * via the condition codes. GCC does not currently provide a way to specify * the condition codes as an input or output operand. * * The LOAD macro below is effectively a prefetch into cache. GCC will * load the value into a register but will not use it. Since modern CPUs * reorder operations, this will generally take place in parallel with * other calculations. */ u_short in_cksum_skip(m, len, skip) struct mbuf *m; int len; int skip; { u_short *w; unsigned sum = 0; int mlen = 0; int byte_swapped = 0; union { char c[2]; u_short s; } su; len -= skip; for (; skip && m; m = m->m_next) { if (m->m_len > skip) { mlen = m->m_len - skip; w = (u_short *)(mtod(m, u_char *) + skip); goto skip_start; } else { skip -= m->m_len; } } for (;m && len; m = m->m_next) { if (m->m_len == 0) continue; w = mtod(m, u_short *); if (mlen == -1) { /* * The first byte of this mbuf is the continuation * of a word spanning between this mbuf and the * last mbuf. */ /* su.c[0] is already saved when scanning previous * mbuf. sum was REDUCEd when we found mlen == -1 */ su.c[1] = *(u_char *)w; sum += su.s; w = (u_short *)((char *)w + 1); mlen = m->m_len - 1; len--; } else mlen = m->m_len; skip_start: if (len < mlen) mlen = len; len -= mlen; /* * Force to long boundary so we do longword aligned * memory operations */ if (3 & (int) w) { REDUCE; if ((1 & (int) w) && (mlen > 0)) { sum <<= 8; su.c[0] = *(char *)w; w = (u_short *)((char *)w + 1); mlen--; byte_swapped = 1; } if ((2 & (int) w) && (mlen >= 2)) { sum += *w++; mlen -= 2; } } /* * Advance to a 486 cache line boundary. */ if (4 & (int) w && mlen >= 4) { __asm __volatile ( "addl %1, %0\n" "adcl $0, %0" : "+r" (sum) : "g" (((const u_int32_t *)w)[0]) ); w += 2; mlen -= 4; } if (8 & (int) w && mlen >= 8) { __asm __volatile ( "addl %1, %0\n" "adcl %2, %0\n" "adcl $0, %0" : "+r" (sum) : "g" (((const u_int32_t *)w)[0]), "g" (((const u_int32_t *)w)[1]) ); w += 4; mlen -= 8; } /* * Do as much of the checksum as possible 32 bits at at time. * In fact, this loop is unrolled to make overhead from * branches &c small. */ mlen -= 1; while ((mlen -= 32) >= 0) { /* * Add with carry 16 words and fold in the last * carry by adding a 0 with carry. * * The early ADD(16) and the LOAD(32) are to load * the next 2 cache lines in advance on 486's. The * 486 has a penalty of 2 clock cycles for loading * a cache line, plus whatever time the external * memory takes to load the first word(s) addressed. * These penalties are unavoidable. Subsequent * accesses to a cache line being loaded (and to * other external memory?) are delayed until the * whole load finishes. These penalties are mostly * avoided by not accessing external memory for * 8 cycles after the ADD(16) and 12 cycles after * the LOAD(32). The loop terminates when mlen * is initially 33 (not 32) to guaranteed that * the LOAD(32) is within bounds. */ __asm __volatile ( "addl %1, %0\n" "adcl %2, %0\n" "adcl %3, %0\n" "adcl %4, %0\n" "adcl %5, %0\n" "mov %6, %%eax\n" "adcl %7, %0\n" "adcl %8, %0\n" "adcl %9, %0\n" "adcl $0, %0" : "+r" (sum) : "g" (((const u_int32_t *)w)[4]), "g" (((const u_int32_t *)w)[0]), "g" (((const u_int32_t *)w)[1]), "g" (((const u_int32_t *)w)[2]), "g" (((const u_int32_t *)w)[3]), "g" (((const u_int32_t *)w)[8]), "g" (((const u_int32_t *)w)[5]), "g" (((const u_int32_t *)w)[6]), "g" (((const u_int32_t *)w)[7]) : "eax" ); w += 16; } mlen += 32 + 1; if (mlen >= 32) { __asm __volatile ( "addl %1, %0\n" "adcl %2, %0\n" "adcl %3, %0\n" "adcl %4, %0\n" "adcl %5, %0\n" "adcl %6, %0\n" "adcl %7, %0\n" "adcl %8, %0\n" "adcl $0, %0" : "+r" (sum) : "g" (((const u_int32_t *)w)[4]), "g" (((const u_int32_t *)w)[0]), "g" (((const u_int32_t *)w)[1]), "g" (((const u_int32_t *)w)[2]), "g" (((const u_int32_t *)w)[3]), "g" (((const u_int32_t *)w)[5]), "g" (((const u_int32_t *)w)[6]), "g" (((const u_int32_t *)w)[7]) ); w += 16; mlen -= 32; } if (mlen >= 16) { __asm __volatile ( "addl %1, %0\n" "adcl %2, %0\n" "adcl %3, %0\n" "adcl %4, %0\n" "adcl $0, %0" : "+r" (sum) : "g" (((const u_int32_t *)w)[0]), "g" (((const u_int32_t *)w)[1]), "g" (((const u_int32_t *)w)[2]), "g" (((const u_int32_t *)w)[3]) ); w += 8; mlen -= 16; } if (mlen >= 8) { __asm __volatile ( "addl %1, %0\n" "adcl %2, %0\n" "adcl $0, %0" : "+r" (sum) : "g" (((const u_int32_t *)w)[0]), "g" (((const u_int32_t *)w)[1]) ); w += 4; mlen -= 8; } if (mlen == 0 && byte_swapped == 0) continue; /* worth 1% maybe ?? */ REDUCE; while ((mlen -= 2) >= 0) { sum += *w++; } if (byte_swapped) { sum <<= 8; byte_swapped = 0; if (mlen == -1) { su.c[1] = *(char *)w; sum += su.s; mlen = 0; } else mlen = -1; } else if (mlen == -1) /* * This mbuf has odd number of bytes. * There could be a word split betwen * this mbuf and the next mbuf. * Save the last byte (to prepend to next mbuf). */ su.c[0] = *(char *)w; } if (len) printf("%s: out of data by %d\n", __func__, len); if (mlen == -1) { /* The last mbuf has odd # of bytes. Follow the standard (the odd byte is shifted left by 8 bits) */ su.c[1] = 0; sum += su.s; } REDUCE; return (~sum & 0xffff); } -#endif diff --git a/sys/i386/i386/npx.c b/sys/i386/i386/npx.c index dc04d84b0eb5..307099925f23 100644 --- a/sys/i386/i386/npx.c +++ b/sys/i386/i386/npx.c @@ -1,1569 +1,1548 @@ /*- * Copyright (c) 1990 William Jolitz. * Copyright (c) 1991 The Regents of the University of California. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: @(#)npx.c 7.2 (Berkeley) 5/12/91 */ #include __FBSDID("$FreeBSD$"); #include "opt_cpu.h" #include "opt_isa.h" #include "opt_npx.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef NPX_DEBUG #include #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef DEV_ISA #include #endif /* * 387 and 287 Numeric Coprocessor Extension (NPX) Driver. */ -#if defined(__GNUCLIKE_ASM) && !defined(lint) - #define fldcw(cw) __asm __volatile("fldcw %0" : : "m" (cw)) #define fnclex() __asm __volatile("fnclex") #define fninit() __asm __volatile("fninit") #define fnsave(addr) __asm __volatile("fnsave %0" : "=m" (*(addr))) #define fnstcw(addr) __asm __volatile("fnstcw %0" : "=m" (*(addr))) #define fnstsw(addr) __asm __volatile("fnstsw %0" : "=am" (*(addr))) #define fp_divide_by_0() __asm __volatile( \ "fldz; fld1; fdiv %st,%st(1); fnop") #define frstor(addr) __asm __volatile("frstor %0" : : "m" (*(addr))) #define fxrstor(addr) __asm __volatile("fxrstor %0" : : "m" (*(addr))) #define fxsave(addr) __asm __volatile("fxsave %0" : "=m" (*(addr))) #define ldmxcsr(csr) __asm __volatile("ldmxcsr %0" : : "m" (csr)) #define stmxcsr(addr) __asm __volatile("stmxcsr %0" : : "m" (*(addr))) static __inline void xrstor(char *addr, uint64_t mask) { uint32_t low, hi; low = mask; hi = mask >> 32; __asm __volatile("xrstor %0" : : "m" (*addr), "a" (low), "d" (hi)); } static __inline void xsave(char *addr, uint64_t mask) { uint32_t low, hi; low = mask; hi = mask >> 32; __asm __volatile("xsave %0" : "=m" (*addr) : "a" (low), "d" (hi) : "memory"); } static __inline void xsaveopt(char *addr, uint64_t mask) { uint32_t low, hi; low = mask; hi = mask >> 32; __asm __volatile("xsaveopt %0" : "=m" (*addr) : "a" (low), "d" (hi) : "memory"); } -#else /* !(__GNUCLIKE_ASM && !lint) */ - -void fldcw(u_short cw); -void fnclex(void); -void fninit(void); -void fnsave(caddr_t addr); -void fnstcw(caddr_t addr); -void fnstsw(caddr_t addr); -void fp_divide_by_0(void); -void frstor(caddr_t addr); -void fxsave(caddr_t addr); -void fxrstor(caddr_t addr); -void ldmxcsr(u_int csr); -void stmxcsr(u_int *csr); -void xrstor(char *addr, uint64_t mask); -void xsave(char *addr, uint64_t mask); -void xsaveopt(char *addr, uint64_t mask); - -#endif /* __GNUCLIKE_ASM && !lint */ #define start_emulating() load_cr0(rcr0() | CR0_TS) #define stop_emulating() clts() #define GET_FPU_CW(thread) \ (cpu_fxsr ? \ (thread)->td_pcb->pcb_save->sv_xmm.sv_env.en_cw : \ (thread)->td_pcb->pcb_save->sv_87.sv_env.en_cw) #define GET_FPU_SW(thread) \ (cpu_fxsr ? \ (thread)->td_pcb->pcb_save->sv_xmm.sv_env.en_sw : \ (thread)->td_pcb->pcb_save->sv_87.sv_env.en_sw) #define SET_FPU_CW(savefpu, value) do { \ if (cpu_fxsr) \ (savefpu)->sv_xmm.sv_env.en_cw = (value); \ else \ (savefpu)->sv_87.sv_env.en_cw = (value); \ } while (0) CTASSERT(sizeof(union savefpu) == 512); CTASSERT(sizeof(struct xstate_hdr) == 64); CTASSERT(sizeof(struct savefpu_ymm) == 832); /* * This requirement is to make it easier for asm code to calculate * offset of the fpu save area from the pcb address. FPU save area * must be 64-byte aligned. */ CTASSERT(sizeof(struct pcb) % XSAVE_AREA_ALIGN == 0); /* * Ensure the copy of XCR0 saved in a core is contained in the padding * area. */ CTASSERT(X86_XSTATE_XCR0_OFFSET >= offsetof(struct savexmm, sv_pad) && X86_XSTATE_XCR0_OFFSET + sizeof(uint64_t) <= sizeof(struct savexmm)); static void fpu_clean_state(void); static void fpurstor(union savefpu *); int hw_float; SYSCTL_INT(_hw, HW_FLOATINGPT, floatingpoint, CTLFLAG_RD, &hw_float, 0, "Floating point instructions executed in hardware"); int lazy_fpu_switch = 0; SYSCTL_INT(_hw, OID_AUTO, lazy_fpu_switch, CTLFLAG_RWTUN | CTLFLAG_NOFETCH, &lazy_fpu_switch, 0, "Lazily load FPU context after context switch"); int use_xsave; uint64_t xsave_mask; static uma_zone_t fpu_save_area_zone; static union savefpu *npx_initialstate; static struct xsave_area_elm_descr { u_int offset; u_int size; } *xsave_area_desc; static volatile u_int npx_traps_while_probing; alias_for_inthand_t probetrap; __asm(" \n\ .text \n\ .p2align 2,0x90 \n\ .type " __XSTRING(CNAME(probetrap)) ",@function \n\ " __XSTRING(CNAME(probetrap)) ": \n\ ss \n\ incl " __XSTRING(CNAME(npx_traps_while_probing)) " \n\ fnclex \n\ iret \n\ "); /* * Determine if an FPU is present and how to use it. */ static int npx_probe(void) { struct gate_descriptor save_idt_npxtrap; u_short control, status; /* * Modern CPUs all have an FPU that uses the INT16 interface * and provide a simple way to verify that, so handle the * common case right away. */ if (cpu_feature & CPUID_FPU) { hw_float = 1; return (1); } save_idt_npxtrap = idt[IDT_MF]; setidt(IDT_MF, probetrap, SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); /* * Don't trap while we're probing. */ stop_emulating(); /* * Finish resetting the coprocessor, if any. If there is an error * pending, then we may get a bogus IRQ13, but npx_intr() will handle * it OK. Bogus halts have never been observed, but we enabled * IRQ13 and cleared the BUSY# latch early to handle them anyway. */ fninit(); /* * Don't use fwait here because it might hang. * Don't use fnop here because it usually hangs if there is no FPU. */ DELAY(1000); /* wait for any IRQ13 */ #ifdef DIAGNOSTIC if (npx_traps_while_probing != 0) printf("fninit caused %u bogus npx trap(s)\n", npx_traps_while_probing); #endif /* * Check for a status of mostly zero. */ status = 0x5a5a; fnstsw(&status); if ((status & 0xb8ff) == 0) { /* * Good, now check for a proper control word. */ control = 0x5a5a; fnstcw(&control); if ((control & 0x1f3f) == 0x033f) { /* * We have an npx, now divide by 0 to see if exception * 16 works. */ control &= ~(1 << 2); /* enable divide by 0 trap */ fldcw(control); npx_traps_while_probing = 0; fp_divide_by_0(); if (npx_traps_while_probing != 0) { /* * Good, exception 16 works. */ hw_float = 1; goto cleanup; } printf( "FPU does not use exception 16 for error reporting\n"); goto cleanup; } } /* * Probe failed. Floating point simply won't work. * Notify user and disable FPU/MMX/SSE instruction execution. */ printf("WARNING: no FPU!\n"); __asm __volatile("smsw %%ax; orb %0,%%al; lmsw %%ax" : : "n" (CR0_EM | CR0_MP) : "ax"); cleanup: idt[IDT_MF] = save_idt_npxtrap; return (hw_float); } static void fpusave_xsaveopt(union savefpu *addr) { xsaveopt((char *)addr, xsave_mask); } static void fpusave_xsave(union savefpu *addr) { xsave((char *)addr, xsave_mask); } static void fpusave_fxsave(union savefpu *addr) { fxsave((char *)addr); } static void fpusave_fnsave(union savefpu *addr) { fnsave((char *)addr); } static void init_xsave(void) { if (use_xsave) return; if (!cpu_fxsr || (cpu_feature2 & CPUID2_XSAVE) == 0) return; use_xsave = 1; TUNABLE_INT_FETCH("hw.use_xsave", &use_xsave); } DEFINE_IFUNC(, void, fpusave, (union savefpu *)) { init_xsave(); if (use_xsave) return ((cpu_stdext_feature & CPUID_EXTSTATE_XSAVEOPT) != 0 ? fpusave_xsaveopt : fpusave_xsave); if (cpu_fxsr) return (fpusave_fxsave); return (fpusave_fnsave); } /* * Enable XSAVE if supported and allowed by user. * Calculate the xsave_mask. */ static void npxinit_bsp1(void) { u_int cp[4]; uint64_t xsave_mask_user; TUNABLE_INT_FETCH("hw.lazy_fpu_switch", &lazy_fpu_switch); if (!use_xsave) return; cpuid_count(0xd, 0x0, cp); xsave_mask = XFEATURE_ENABLED_X87 | XFEATURE_ENABLED_SSE; if ((cp[0] & xsave_mask) != xsave_mask) panic("CPU0 does not support X87 or SSE: %x", cp[0]); xsave_mask = ((uint64_t)cp[3] << 32) | cp[0]; xsave_mask_user = xsave_mask; TUNABLE_QUAD_FETCH("hw.xsave_mask", &xsave_mask_user); xsave_mask_user |= XFEATURE_ENABLED_X87 | XFEATURE_ENABLED_SSE; xsave_mask &= xsave_mask_user; if ((xsave_mask & XFEATURE_AVX512) != XFEATURE_AVX512) xsave_mask &= ~XFEATURE_AVX512; if ((xsave_mask & XFEATURE_MPX) != XFEATURE_MPX) xsave_mask &= ~XFEATURE_MPX; } /* * Calculate the fpu save area size. */ static void npxinit_bsp2(void) { u_int cp[4]; if (use_xsave) { cpuid_count(0xd, 0x0, cp); cpu_max_ext_state_size = cp[1]; /* * Reload the cpu_feature2, since we enabled OSXSAVE. */ do_cpuid(1, cp); cpu_feature2 = cp[2]; } else cpu_max_ext_state_size = sizeof(union savefpu); } /* * Initialize floating point unit. */ void npxinit(bool bsp) { static union savefpu dummy; register_t saveintr; u_int mxcsr; u_short control; if (bsp) { if (!npx_probe()) return; npxinit_bsp1(); } if (use_xsave) { load_cr4(rcr4() | CR4_XSAVE); load_xcr(XCR0, xsave_mask); } /* * XCR0 shall be set up before CPU can report the save area size. */ if (bsp) npxinit_bsp2(); /* * fninit has the same h/w bugs as fnsave. Use the detoxified * fnsave to throw away any junk in the fpu. fpusave() initializes * the fpu. * * It is too early for critical_enter() to work on AP. */ saveintr = intr_disable(); stop_emulating(); if (cpu_fxsr) fninit(); else fnsave(&dummy); control = __INITIAL_NPXCW__; fldcw(control); if (cpu_fxsr) { mxcsr = __INITIAL_MXCSR__; ldmxcsr(mxcsr); } start_emulating(); intr_restore(saveintr); } /* * On the boot CPU we generate a clean state that is used to * initialize the floating point unit when it is first used by a * process. */ static void npxinitstate(void *arg __unused) { uint64_t *xstate_bv; register_t saveintr; int cp[4], i, max_ext_n; if (!hw_float) return; /* Do potentially blocking operations before disabling interrupts. */ fpu_save_area_zone = uma_zcreate("FPU_save_area", cpu_max_ext_state_size, NULL, NULL, NULL, NULL, XSAVE_AREA_ALIGN - 1, 0); npx_initialstate = uma_zalloc(fpu_save_area_zone, M_WAITOK | M_ZERO); if (use_xsave) { if (xsave_mask >> 32 != 0) max_ext_n = fls(xsave_mask >> 32) + 32; else max_ext_n = fls(xsave_mask); xsave_area_desc = malloc(max_ext_n * sizeof(struct xsave_area_elm_descr), M_DEVBUF, M_WAITOK | M_ZERO); } saveintr = intr_disable(); stop_emulating(); if (cpu_fxsr) fpusave_fxsave(npx_initialstate); else fpusave_fnsave(npx_initialstate); if (cpu_fxsr) { if (npx_initialstate->sv_xmm.sv_env.en_mxcsr_mask) cpu_mxcsr_mask = npx_initialstate->sv_xmm.sv_env.en_mxcsr_mask; else cpu_mxcsr_mask = 0xFFBF; /* * The fninit instruction does not modify XMM * registers or x87 registers (MM/ST). The fpusave * call dumped the garbage contained in the registers * after reset to the initial state saved. Clear XMM * and x87 registers file image to make the startup * program state and signal handler XMM/x87 register * content predictable. */ bzero(npx_initialstate->sv_xmm.sv_fp, sizeof(npx_initialstate->sv_xmm.sv_fp)); bzero(npx_initialstate->sv_xmm.sv_xmm, sizeof(npx_initialstate->sv_xmm.sv_xmm)); } else bzero(npx_initialstate->sv_87.sv_ac, sizeof(npx_initialstate->sv_87.sv_ac)); /* * Create a table describing the layout of the CPU Extended * Save Area. See Intel SDM rev. 075 Vol. 1 13.4.1 "Legacy * Region of an XSAVE Area" for the source of offsets/sizes. * Note that 32bit XSAVE does not use %xmm8-%xmm15, see * 10.5.1.2 and 13.5.2 "SSE State". */ if (use_xsave) { xstate_bv = (uint64_t *)((char *)(npx_initialstate + 1) + offsetof(struct xstate_hdr, xstate_bv)); *xstate_bv = XFEATURE_ENABLED_X87 | XFEATURE_ENABLED_SSE; /* x87 state */ xsave_area_desc[0].offset = 0; xsave_area_desc[0].size = 160; /* XMM */ xsave_area_desc[1].offset = 160; xsave_area_desc[1].size = 288 - 160; for (i = 2; i < max_ext_n; i++) { cpuid_count(0xd, i, cp); xsave_area_desc[i].offset = cp[1]; xsave_area_desc[i].size = cp[0]; } } start_emulating(); intr_restore(saveintr); } SYSINIT(npxinitstate, SI_SUB_CPU, SI_ORDER_ANY, npxinitstate, NULL); /* * Free coprocessor (if we have it). */ void npxexit(struct thread *td) { critical_enter(); if (curthread == PCPU_GET(fpcurthread)) { stop_emulating(); fpusave(curpcb->pcb_save); start_emulating(); PCPU_SET(fpcurthread, NULL); } critical_exit(); #ifdef NPX_DEBUG if (hw_float) { u_int masked_exceptions; masked_exceptions = GET_FPU_CW(td) & GET_FPU_SW(td) & 0x7f; /* * Log exceptions that would have trapped with the old * control word (overflow, divide by 0, and invalid operand). */ if (masked_exceptions & 0x0d) log(LOG_ERR, "pid %d (%s) exited with masked floating point exceptions 0x%02x\n", td->td_proc->p_pid, td->td_proc->p_comm, masked_exceptions); } #endif } int npxformat(void) { if (!hw_float) return (_MC_FPFMT_NODEV); if (cpu_fxsr) return (_MC_FPFMT_XMM); return (_MC_FPFMT_387); } /* * The following mechanism is used to ensure that the FPE_... value * that is passed as a trapcode to the signal handler of the user * process does not have more than one bit set. * * Multiple bits may be set if the user process modifies the control * word while a status word bit is already set. While this is a sign * of bad coding, we have no choise than to narrow them down to one * bit, since we must not send a trapcode that is not exactly one of * the FPE_ macros. * * The mechanism has a static table with 127 entries. Each combination * of the 7 FPU status word exception bits directly translates to a * position in this table, where a single FPE_... value is stored. * This FPE_... value stored there is considered the "most important" * of the exception bits and will be sent as the signal code. The * precedence of the bits is based upon Intel Document "Numerical * Applications", Chapter "Special Computational Situations". * * The macro to choose one of these values does these steps: 1) Throw * away status word bits that cannot be masked. 2) Throw away the bits * currently masked in the control word, assuming the user isn't * interested in them anymore. 3) Reinsert status word bit 7 (stack * fault) if it is set, which cannot be masked but must be presered. * 4) Use the remaining bits to point into the trapcode table. * * The 6 maskable bits in order of their preference, as stated in the * above referenced Intel manual: * 1 Invalid operation (FP_X_INV) * 1a Stack underflow * 1b Stack overflow * 1c Operand of unsupported format * 1d SNaN operand. * 2 QNaN operand (not an exception, irrelavant here) * 3 Any other invalid-operation not mentioned above or zero divide * (FP_X_INV, FP_X_DZ) * 4 Denormal operand (FP_X_DNML) * 5 Numeric over/underflow (FP_X_OFL, FP_X_UFL) * 6 Inexact result (FP_X_IMP) */ static char fpetable[128] = { 0, FPE_FLTINV, /* 1 - INV */ FPE_FLTUND, /* 2 - DNML */ FPE_FLTINV, /* 3 - INV | DNML */ FPE_FLTDIV, /* 4 - DZ */ FPE_FLTINV, /* 5 - INV | DZ */ FPE_FLTDIV, /* 6 - DNML | DZ */ FPE_FLTINV, /* 7 - INV | DNML | DZ */ FPE_FLTOVF, /* 8 - OFL */ FPE_FLTINV, /* 9 - INV | OFL */ FPE_FLTUND, /* A - DNML | OFL */ FPE_FLTINV, /* B - INV | DNML | OFL */ FPE_FLTDIV, /* C - DZ | OFL */ FPE_FLTINV, /* D - INV | DZ | OFL */ FPE_FLTDIV, /* E - DNML | DZ | OFL */ FPE_FLTINV, /* F - INV | DNML | DZ | OFL */ FPE_FLTUND, /* 10 - UFL */ FPE_FLTINV, /* 11 - INV | UFL */ FPE_FLTUND, /* 12 - DNML | UFL */ FPE_FLTINV, /* 13 - INV | DNML | UFL */ FPE_FLTDIV, /* 14 - DZ | UFL */ FPE_FLTINV, /* 15 - INV | DZ | UFL */ FPE_FLTDIV, /* 16 - DNML | DZ | UFL */ FPE_FLTINV, /* 17 - INV | DNML | DZ | UFL */ FPE_FLTOVF, /* 18 - OFL | UFL */ FPE_FLTINV, /* 19 - INV | OFL | UFL */ FPE_FLTUND, /* 1A - DNML | OFL | UFL */ FPE_FLTINV, /* 1B - INV | DNML | OFL | UFL */ FPE_FLTDIV, /* 1C - DZ | OFL | UFL */ FPE_FLTINV, /* 1D - INV | DZ | OFL | UFL */ FPE_FLTDIV, /* 1E - DNML | DZ | OFL | UFL */ FPE_FLTINV, /* 1F - INV | DNML | DZ | OFL | UFL */ FPE_FLTRES, /* 20 - IMP */ FPE_FLTINV, /* 21 - INV | IMP */ FPE_FLTUND, /* 22 - DNML | IMP */ FPE_FLTINV, /* 23 - INV | DNML | IMP */ FPE_FLTDIV, /* 24 - DZ | IMP */ FPE_FLTINV, /* 25 - INV | DZ | IMP */ FPE_FLTDIV, /* 26 - DNML | DZ | IMP */ FPE_FLTINV, /* 27 - INV | DNML | DZ | IMP */ FPE_FLTOVF, /* 28 - OFL | IMP */ FPE_FLTINV, /* 29 - INV | OFL | IMP */ FPE_FLTUND, /* 2A - DNML | OFL | IMP */ FPE_FLTINV, /* 2B - INV | DNML | OFL | IMP */ FPE_FLTDIV, /* 2C - DZ | OFL | IMP */ FPE_FLTINV, /* 2D - INV | DZ | OFL | IMP */ FPE_FLTDIV, /* 2E - DNML | DZ | OFL | IMP */ FPE_FLTINV, /* 2F - INV | DNML | DZ | OFL | IMP */ FPE_FLTUND, /* 30 - UFL | IMP */ FPE_FLTINV, /* 31 - INV | UFL | IMP */ FPE_FLTUND, /* 32 - DNML | UFL | IMP */ FPE_FLTINV, /* 33 - INV | DNML | UFL | IMP */ FPE_FLTDIV, /* 34 - DZ | UFL | IMP */ FPE_FLTINV, /* 35 - INV | DZ | UFL | IMP */ FPE_FLTDIV, /* 36 - DNML | DZ | UFL | IMP */ FPE_FLTINV, /* 37 - INV | DNML | DZ | UFL | IMP */ FPE_FLTOVF, /* 38 - OFL | UFL | IMP */ FPE_FLTINV, /* 39 - INV | OFL | UFL | IMP */ FPE_FLTUND, /* 3A - DNML | OFL | UFL | IMP */ FPE_FLTINV, /* 3B - INV | DNML | OFL | UFL | IMP */ FPE_FLTDIV, /* 3C - DZ | OFL | UFL | IMP */ FPE_FLTINV, /* 3D - INV | DZ | OFL | UFL | IMP */ FPE_FLTDIV, /* 3E - DNML | DZ | OFL | UFL | IMP */ FPE_FLTINV, /* 3F - INV | DNML | DZ | OFL | UFL | IMP */ FPE_FLTSUB, /* 40 - STK */ FPE_FLTSUB, /* 41 - INV | STK */ FPE_FLTUND, /* 42 - DNML | STK */ FPE_FLTSUB, /* 43 - INV | DNML | STK */ FPE_FLTDIV, /* 44 - DZ | STK */ FPE_FLTSUB, /* 45 - INV | DZ | STK */ FPE_FLTDIV, /* 46 - DNML | DZ | STK */ FPE_FLTSUB, /* 47 - INV | DNML | DZ | STK */ FPE_FLTOVF, /* 48 - OFL | STK */ FPE_FLTSUB, /* 49 - INV | OFL | STK */ FPE_FLTUND, /* 4A - DNML | OFL | STK */ FPE_FLTSUB, /* 4B - INV | DNML | OFL | STK */ FPE_FLTDIV, /* 4C - DZ | OFL | STK */ FPE_FLTSUB, /* 4D - INV | DZ | OFL | STK */ FPE_FLTDIV, /* 4E - DNML | DZ | OFL | STK */ FPE_FLTSUB, /* 4F - INV | DNML | DZ | OFL | STK */ FPE_FLTUND, /* 50 - UFL | STK */ FPE_FLTSUB, /* 51 - INV | UFL | STK */ FPE_FLTUND, /* 52 - DNML | UFL | STK */ FPE_FLTSUB, /* 53 - INV | DNML | UFL | STK */ FPE_FLTDIV, /* 54 - DZ | UFL | STK */ FPE_FLTSUB, /* 55 - INV | DZ | UFL | STK */ FPE_FLTDIV, /* 56 - DNML | DZ | UFL | STK */ FPE_FLTSUB, /* 57 - INV | DNML | DZ | UFL | STK */ FPE_FLTOVF, /* 58 - OFL | UFL | STK */ FPE_FLTSUB, /* 59 - INV | OFL | UFL | STK */ FPE_FLTUND, /* 5A - DNML | OFL | UFL | STK */ FPE_FLTSUB, /* 5B - INV | DNML | OFL | UFL | STK */ FPE_FLTDIV, /* 5C - DZ | OFL | UFL | STK */ FPE_FLTSUB, /* 5D - INV | DZ | OFL | UFL | STK */ FPE_FLTDIV, /* 5E - DNML | DZ | OFL | UFL | STK */ FPE_FLTSUB, /* 5F - INV | DNML | DZ | OFL | UFL | STK */ FPE_FLTRES, /* 60 - IMP | STK */ FPE_FLTSUB, /* 61 - INV | IMP | STK */ FPE_FLTUND, /* 62 - DNML | IMP | STK */ FPE_FLTSUB, /* 63 - INV | DNML | IMP | STK */ FPE_FLTDIV, /* 64 - DZ | IMP | STK */ FPE_FLTSUB, /* 65 - INV | DZ | IMP | STK */ FPE_FLTDIV, /* 66 - DNML | DZ | IMP | STK */ FPE_FLTSUB, /* 67 - INV | DNML | DZ | IMP | STK */ FPE_FLTOVF, /* 68 - OFL | IMP | STK */ FPE_FLTSUB, /* 69 - INV | OFL | IMP | STK */ FPE_FLTUND, /* 6A - DNML | OFL | IMP | STK */ FPE_FLTSUB, /* 6B - INV | DNML | OFL | IMP | STK */ FPE_FLTDIV, /* 6C - DZ | OFL | IMP | STK */ FPE_FLTSUB, /* 6D - INV | DZ | OFL | IMP | STK */ FPE_FLTDIV, /* 6E - DNML | DZ | OFL | IMP | STK */ FPE_FLTSUB, /* 6F - INV | DNML | DZ | OFL | IMP | STK */ FPE_FLTUND, /* 70 - UFL | IMP | STK */ FPE_FLTSUB, /* 71 - INV | UFL | IMP | STK */ FPE_FLTUND, /* 72 - DNML | UFL | IMP | STK */ FPE_FLTSUB, /* 73 - INV | DNML | UFL | IMP | STK */ FPE_FLTDIV, /* 74 - DZ | UFL | IMP | STK */ FPE_FLTSUB, /* 75 - INV | DZ | UFL | IMP | STK */ FPE_FLTDIV, /* 76 - DNML | DZ | UFL | IMP | STK */ FPE_FLTSUB, /* 77 - INV | DNML | DZ | UFL | IMP | STK */ FPE_FLTOVF, /* 78 - OFL | UFL | IMP | STK */ FPE_FLTSUB, /* 79 - INV | OFL | UFL | IMP | STK */ FPE_FLTUND, /* 7A - DNML | OFL | UFL | IMP | STK */ FPE_FLTSUB, /* 7B - INV | DNML | OFL | UFL | IMP | STK */ FPE_FLTDIV, /* 7C - DZ | OFL | UFL | IMP | STK */ FPE_FLTSUB, /* 7D - INV | DZ | OFL | UFL | IMP | STK */ FPE_FLTDIV, /* 7E - DNML | DZ | OFL | UFL | IMP | STK */ FPE_FLTSUB, /* 7F - INV | DNML | DZ | OFL | UFL | IMP | STK */ }; /* * Read the FP status and control words, then generate si_code value * for SIGFPE. The error code chosen will be one of the * FPE_... macros. It will be sent as the second argument to old * BSD-style signal handlers and as "siginfo_t->si_code" (second * argument) to SA_SIGINFO signal handlers. * * Some time ago, we cleared the x87 exceptions with FNCLEX there. * Clearing exceptions was necessary mainly to avoid IRQ13 bugs. The * usermode code which understands the FPU hardware enough to enable * the exceptions, can also handle clearing the exception state in the * handler. The only consequence of not clearing the exception is the * rethrow of the SIGFPE on return from the signal handler and * reexecution of the corresponding instruction. * * For XMM traps, the exceptions were never cleared. */ int npxtrap_x87(void) { u_short control, status; if (!hw_float) { printf( "npxtrap_x87: fpcurthread = %p, curthread = %p, hw_float = %d\n", PCPU_GET(fpcurthread), curthread, hw_float); panic("npxtrap from nowhere"); } critical_enter(); /* * Interrupt handling (for another interrupt) may have pushed the * state to memory. Fetch the relevant parts of the state from * wherever they are. */ if (PCPU_GET(fpcurthread) != curthread) { control = GET_FPU_CW(curthread); status = GET_FPU_SW(curthread); } else { fnstcw(&control); fnstsw(&status); } critical_exit(); return (fpetable[status & ((~control & 0x3f) | 0x40)]); } int npxtrap_sse(void) { u_int mxcsr; if (!hw_float) { printf( "npxtrap_sse: fpcurthread = %p, curthread = %p, hw_float = %d\n", PCPU_GET(fpcurthread), curthread, hw_float); panic("npxtrap from nowhere"); } critical_enter(); if (PCPU_GET(fpcurthread) != curthread) mxcsr = curthread->td_pcb->pcb_save->sv_xmm.sv_env.en_mxcsr; else stmxcsr(&mxcsr); critical_exit(); return (fpetable[(mxcsr & (~mxcsr >> 7)) & 0x3f]); } static void restore_npx_curthread(struct thread *td, struct pcb *pcb) { /* * Record new context early in case frstor causes a trap. */ PCPU_SET(fpcurthread, td); stop_emulating(); if (cpu_fxsr) fpu_clean_state(); if ((pcb->pcb_flags & PCB_NPXINITDONE) == 0) { /* * This is the first time this thread has used the FPU or * the PCB doesn't contain a clean FPU state. Explicitly * load an initial state. * * We prefer to restore the state from the actual save * area in PCB instead of directly loading from * npx_initialstate, to ignite the XSAVEOPT * tracking engine. */ bcopy(npx_initialstate, pcb->pcb_save, cpu_max_ext_state_size); fpurstor(pcb->pcb_save); if (pcb->pcb_initial_npxcw != __INITIAL_NPXCW__) fldcw(pcb->pcb_initial_npxcw); pcb->pcb_flags |= PCB_NPXINITDONE; if (PCB_USER_FPU(pcb)) pcb->pcb_flags |= PCB_NPXUSERINITDONE; } else { fpurstor(pcb->pcb_save); } } /* * Implement device not available (DNA) exception * * It would be better to switch FP context here (if curthread != fpcurthread) * and not necessarily for every context switch, but it is too hard to * access foreign pcb's. */ int npxdna(void) { struct thread *td; if (!hw_float) return (0); td = curthread; critical_enter(); KASSERT((curpcb->pcb_flags & PCB_NPXNOSAVE) == 0, ("npxdna while in fpu_kern_enter(FPU_KERN_NOCTX)")); if (__predict_false(PCPU_GET(fpcurthread) == td)) { /* * Some virtual machines seems to set %cr0.TS at * arbitrary moments. Silently clear the TS bit * regardless of the eager/lazy FPU context switch * mode. */ stop_emulating(); } else { if (__predict_false(PCPU_GET(fpcurthread) != NULL)) { printf( "npxdna: fpcurthread = %p (%d), curthread = %p (%d)\n", PCPU_GET(fpcurthread), PCPU_GET(fpcurthread)->td_proc->p_pid, td, td->td_proc->p_pid); panic("npxdna"); } restore_npx_curthread(td, td->td_pcb); } critical_exit(); return (1); } /* * Wrapper for fpusave() called from context switch routines. * * npxsave() must be called with interrupts disabled, so that it clears * fpcurthread atomically with saving the state. We require callers to do the * disabling, since most callers need to disable interrupts anyway to call * npxsave() atomically with checking fpcurthread. */ void npxsave(union savefpu *addr) { stop_emulating(); fpusave(addr); } void npxswitch(struct thread *td, struct pcb *pcb); void npxswitch(struct thread *td, struct pcb *pcb) { if (lazy_fpu_switch || (td->td_pflags & TDP_KTHREAD) != 0 || !PCB_USER_FPU(pcb)) { start_emulating(); PCPU_SET(fpcurthread, NULL); } else if (PCPU_GET(fpcurthread) != td) { restore_npx_curthread(td, pcb); } } /* * Unconditionally save the current co-processor state across suspend and * resume. */ void npxsuspend(union savefpu *addr) { register_t cr0; if (!hw_float) return; if (PCPU_GET(fpcurthread) == NULL) { bcopy(npx_initialstate, addr, cpu_max_ext_state_size); return; } cr0 = rcr0(); stop_emulating(); fpusave(addr); load_cr0(cr0); } void npxresume(union savefpu *addr) { register_t cr0; if (!hw_float) return; cr0 = rcr0(); npxinit(false); stop_emulating(); fpurstor(addr); load_cr0(cr0); } void npxdrop(void) { struct thread *td; /* * Discard pending exceptions in the !cpu_fxsr case so that unmasked * ones don't cause a panic on the next frstor. */ if (!cpu_fxsr) fnclex(); td = PCPU_GET(fpcurthread); KASSERT(td == curthread, ("fpudrop: fpcurthread != curthread")); CRITICAL_ASSERT(td); PCPU_SET(fpcurthread, NULL); td->td_pcb->pcb_flags &= ~PCB_NPXINITDONE; start_emulating(); } /* * Get the user state of the FPU into pcb->pcb_user_save without * dropping ownership (if possible). It returns the FPU ownership * status. */ int npxgetregs(struct thread *td) { struct pcb *pcb; uint64_t *xstate_bv, bit; char *sa; int max_ext_n, i; int owned; if (!hw_float) return (_MC_FPOWNED_NONE); pcb = td->td_pcb; critical_enter(); if ((pcb->pcb_flags & PCB_NPXINITDONE) == 0) { bcopy(npx_initialstate, get_pcb_user_save_pcb(pcb), cpu_max_ext_state_size); SET_FPU_CW(get_pcb_user_save_pcb(pcb), pcb->pcb_initial_npxcw); npxuserinited(td); critical_exit(); return (_MC_FPOWNED_PCB); } if (td == PCPU_GET(fpcurthread)) { fpusave(get_pcb_user_save_pcb(pcb)); if (!cpu_fxsr) /* * fnsave initializes the FPU and destroys whatever * context it contains. Make sure the FPU owner * starts with a clean state next time. */ npxdrop(); owned = _MC_FPOWNED_FPU; } else { owned = _MC_FPOWNED_PCB; } if (use_xsave) { /* * Handle partially saved state. */ sa = (char *)get_pcb_user_save_pcb(pcb); xstate_bv = (uint64_t *)(sa + sizeof(union savefpu) + offsetof(struct xstate_hdr, xstate_bv)); if (xsave_mask >> 32 != 0) max_ext_n = fls(xsave_mask >> 32) + 32; else max_ext_n = fls(xsave_mask); for (i = 0; i < max_ext_n; i++) { bit = 1ULL << i; if ((xsave_mask & bit) == 0 || (*xstate_bv & bit) != 0) continue; bcopy((char *)npx_initialstate + xsave_area_desc[i].offset, sa + xsave_area_desc[i].offset, xsave_area_desc[i].size); *xstate_bv |= bit; } } critical_exit(); return (owned); } void npxuserinited(struct thread *td) { struct pcb *pcb; CRITICAL_ASSERT(td); pcb = td->td_pcb; if (PCB_USER_FPU(pcb)) pcb->pcb_flags |= PCB_NPXINITDONE; pcb->pcb_flags |= PCB_NPXUSERINITDONE; } int npxsetxstate(struct thread *td, char *xfpustate, size_t xfpustate_size) { struct xstate_hdr *hdr, *ehdr; size_t len, max_len; uint64_t bv; /* XXXKIB should we clear all extended state in xstate_bv instead ? */ if (xfpustate == NULL) return (0); if (!use_xsave) return (EOPNOTSUPP); len = xfpustate_size; if (len < sizeof(struct xstate_hdr)) return (EINVAL); max_len = cpu_max_ext_state_size - sizeof(union savefpu); if (len > max_len) return (EINVAL); ehdr = (struct xstate_hdr *)xfpustate; bv = ehdr->xstate_bv; /* * Avoid #gp. */ if (bv & ~xsave_mask) return (EINVAL); hdr = (struct xstate_hdr *)(get_pcb_user_save_td(td) + 1); hdr->xstate_bv = bv; bcopy(xfpustate + sizeof(struct xstate_hdr), (char *)(hdr + 1), len - sizeof(struct xstate_hdr)); return (0); } int npxsetregs(struct thread *td, union savefpu *addr, char *xfpustate, size_t xfpustate_size) { struct pcb *pcb; int error; if (!hw_float) return (ENXIO); if (cpu_fxsr) addr->sv_xmm.sv_env.en_mxcsr &= cpu_mxcsr_mask; pcb = td->td_pcb; error = 0; critical_enter(); if (td == PCPU_GET(fpcurthread) && PCB_USER_FPU(pcb)) { error = npxsetxstate(td, xfpustate, xfpustate_size); if (error == 0) { if (!cpu_fxsr) fnclex(); /* As in npxdrop(). */ bcopy(addr, get_pcb_user_save_td(td), sizeof(*addr)); fpurstor(get_pcb_user_save_td(td)); pcb->pcb_flags |= PCB_NPXUSERINITDONE | PCB_NPXINITDONE; } } else { error = npxsetxstate(td, xfpustate, xfpustate_size); if (error == 0) { bcopy(addr, get_pcb_user_save_td(td), sizeof(*addr)); npxuserinited(td); } } critical_exit(); return (error); } static void npx_fill_fpregs_xmm1(struct savexmm *sv_xmm, struct save87 *sv_87) { struct env87 *penv_87; struct envxmm *penv_xmm; struct fpacc87 *fx_reg; int i, st; uint64_t mantissa; uint16_t tw, exp; uint8_t ab_tw; penv_87 = &sv_87->sv_env; penv_xmm = &sv_xmm->sv_env; /* FPU control/status */ penv_87->en_cw = penv_xmm->en_cw; penv_87->en_sw = penv_xmm->en_sw; penv_87->en_fip = penv_xmm->en_fip; penv_87->en_fcs = penv_xmm->en_fcs; penv_87->en_opcode = penv_xmm->en_opcode; penv_87->en_foo = penv_xmm->en_foo; penv_87->en_fos = penv_xmm->en_fos; /* * FPU registers and tags. * For ST(i), i = fpu_reg - top; we start with fpu_reg=7. */ st = 7 - ((penv_xmm->en_sw >> 11) & 7); ab_tw = penv_xmm->en_tw; tw = 0; for (i = 0x80; i != 0; i >>= 1) { sv_87->sv_ac[st] = sv_xmm->sv_fp[st].fp_acc; tw <<= 2; if (ab_tw & i) { /* Non-empty - we need to check ST(i) */ fx_reg = &sv_xmm->sv_fp[st].fp_acc; /* The first 64 bits contain the mantissa. */ mantissa = *((uint64_t *)fx_reg->fp_bytes); /* * The final 16 bits contain the sign bit and the exponent. * Mask the sign bit since it is of no consequence to these * tests. */ exp = *((uint16_t *)&fx_reg->fp_bytes[8]) & 0x7fff; if (exp == 0) { if (mantissa == 0) tw |= 1; /* Zero */ else tw |= 2; /* Denormal */ } else if (exp == 0x7fff) tw |= 2; /* Infinity or NaN */ } else tw |= 3; /* Empty */ st = (st - 1) & 7; } penv_87->en_tw = tw; } void npx_fill_fpregs_xmm(struct savexmm *sv_xmm, struct save87 *sv_87) { bzero(sv_87, sizeof(*sv_87)); npx_fill_fpregs_xmm1(sv_xmm, sv_87); } void npx_set_fpregs_xmm(struct save87 *sv_87, struct savexmm *sv_xmm) { struct env87 *penv_87; struct envxmm *penv_xmm; int i; penv_87 = &sv_87->sv_env; penv_xmm = &sv_xmm->sv_env; /* FPU control/status */ penv_xmm->en_cw = penv_87->en_cw; penv_xmm->en_sw = penv_87->en_sw; penv_xmm->en_fip = penv_87->en_fip; penv_xmm->en_fcs = penv_87->en_fcs; penv_xmm->en_opcode = penv_87->en_opcode; penv_xmm->en_foo = penv_87->en_foo; penv_xmm->en_fos = penv_87->en_fos; /* * FPU registers and tags. * Abridged / Full translation (values in binary), see FXSAVE spec. * 0 11 * 1 00, 01, 10 */ penv_xmm->en_tw = 0; for (i = 0; i < 8; ++i) { sv_xmm->sv_fp[i].fp_acc = sv_87->sv_ac[i]; if ((penv_87->en_tw & (3 << i * 2)) != (3 << i * 2)) penv_xmm->en_tw |= 1 << i; } } void npx_get_fsave(void *addr) { struct thread *td; union savefpu *sv; td = curthread; npxgetregs(td); sv = get_pcb_user_save_td(td); if (cpu_fxsr) npx_fill_fpregs_xmm1(&sv->sv_xmm, addr); else bcopy(sv, addr, sizeof(struct env87) + sizeof(struct fpacc87[8])); } int npx_set_fsave(void *addr) { union savefpu sv; int error; bzero(&sv, sizeof(sv)); if (cpu_fxsr) npx_set_fpregs_xmm(addr, &sv.sv_xmm); else bcopy(addr, &sv, sizeof(struct env87) + sizeof(struct fpacc87[8])); error = npxsetregs(curthread, &sv, NULL, 0); return (error); } /* * On AuthenticAMD processors, the fxrstor instruction does not restore * the x87's stored last instruction pointer, last data pointer, and last * opcode values, except in the rare case in which the exception summary * (ES) bit in the x87 status word is set to 1. * * In order to avoid leaking this information across processes, we clean * these values by performing a dummy load before executing fxrstor(). */ static void fpu_clean_state(void) { static float dummy_variable = 0.0; u_short status; /* * Clear the ES bit in the x87 status word if it is currently * set, in order to avoid causing a fault in the upcoming load. */ fnstsw(&status); if (status & 0x80) fnclex(); /* * Load the dummy variable into the x87 stack. This mangles * the x87 stack, but we don't care since we're about to call * fxrstor() anyway. */ __asm __volatile("ffree %%st(7); flds %0" : : "m" (dummy_variable)); } static void fpurstor(union savefpu *addr) { if (use_xsave) xrstor((char *)addr, xsave_mask); else if (cpu_fxsr) fxrstor(addr); else frstor(addr); } #ifdef DEV_ISA /* * This sucks up the legacy ISA support assignments from PNPBIOS/ACPI. */ static struct isa_pnp_id npxisa_ids[] = { { 0x040cd041, "Legacy ISA coprocessor support" }, /* PNP0C04 */ { 0 } }; static int npxisa_probe(device_t dev) { int result; if ((result = ISA_PNP_PROBE(device_get_parent(dev), dev, npxisa_ids)) <= 0) { device_quiet(dev); } return(result); } static int npxisa_attach(device_t dev) { return (0); } static device_method_t npxisa_methods[] = { /* Device interface */ DEVMETHOD(device_probe, npxisa_probe), DEVMETHOD(device_attach, npxisa_attach), DEVMETHOD(device_detach, bus_generic_detach), DEVMETHOD(device_shutdown, bus_generic_shutdown), DEVMETHOD(device_suspend, bus_generic_suspend), DEVMETHOD(device_resume, bus_generic_resume), { 0, 0 } }; static driver_t npxisa_driver = { "npxisa", npxisa_methods, 1, /* no softc */ }; static devclass_t npxisa_devclass; DRIVER_MODULE(npxisa, isa, npxisa_driver, npxisa_devclass, 0, 0); DRIVER_MODULE(npxisa, acpi, npxisa_driver, npxisa_devclass, 0, 0); ISA_PNP_INFO(npxisa_ids); #endif /* DEV_ISA */ static MALLOC_DEFINE(M_FPUKERN_CTX, "fpukern_ctx", "Kernel contexts for FPU state"); #define FPU_KERN_CTX_NPXINITDONE 0x01 #define FPU_KERN_CTX_DUMMY 0x02 #define FPU_KERN_CTX_INUSE 0x04 struct fpu_kern_ctx { union savefpu *prev; uint32_t flags; char hwstate1[]; }; struct fpu_kern_ctx * fpu_kern_alloc_ctx(u_int flags) { struct fpu_kern_ctx *res; size_t sz; sz = sizeof(struct fpu_kern_ctx) + XSAVE_AREA_ALIGN + cpu_max_ext_state_size; res = malloc(sz, M_FPUKERN_CTX, ((flags & FPU_KERN_NOWAIT) ? M_NOWAIT : M_WAITOK) | M_ZERO); return (res); } void fpu_kern_free_ctx(struct fpu_kern_ctx *ctx) { KASSERT((ctx->flags & FPU_KERN_CTX_INUSE) == 0, ("free'ing inuse ctx")); /* XXXKIB clear the memory ? */ free(ctx, M_FPUKERN_CTX); } static union savefpu * fpu_kern_ctx_savefpu(struct fpu_kern_ctx *ctx) { vm_offset_t p; p = (vm_offset_t)&ctx->hwstate1; p = roundup2(p, XSAVE_AREA_ALIGN); return ((union savefpu *)p); } void fpu_kern_enter(struct thread *td, struct fpu_kern_ctx *ctx, u_int flags) { struct pcb *pcb; pcb = td->td_pcb; KASSERT((flags & FPU_KERN_NOCTX) != 0 || ctx != NULL, ("ctx is required when !FPU_KERN_NOCTX")); KASSERT(ctx == NULL || (ctx->flags & FPU_KERN_CTX_INUSE) == 0, ("using inuse ctx")); KASSERT((pcb->pcb_flags & PCB_NPXNOSAVE) == 0, ("recursive fpu_kern_enter while in PCB_NPXNOSAVE state")); if ((flags & FPU_KERN_NOCTX) != 0) { critical_enter(); stop_emulating(); if (curthread == PCPU_GET(fpcurthread)) { fpusave(curpcb->pcb_save); PCPU_SET(fpcurthread, NULL); } else { KASSERT(PCPU_GET(fpcurthread) == NULL, ("invalid fpcurthread")); } /* * This breaks XSAVEOPT tracker, but * PCB_NPXNOSAVE state is supposed to never need to * save FPU context at all. */ fpurstor(npx_initialstate); pcb->pcb_flags |= PCB_KERNNPX | PCB_NPXNOSAVE | PCB_NPXINITDONE; return; } if ((flags & FPU_KERN_KTHR) != 0 && is_fpu_kern_thread(0)) { ctx->flags = FPU_KERN_CTX_DUMMY | FPU_KERN_CTX_INUSE; return; } pcb = td->td_pcb; critical_enter(); KASSERT(!PCB_USER_FPU(pcb) || pcb->pcb_save == get_pcb_user_save_pcb(pcb), ("mangled pcb_save")); ctx->flags = FPU_KERN_CTX_INUSE; if ((pcb->pcb_flags & PCB_NPXINITDONE) != 0) ctx->flags |= FPU_KERN_CTX_NPXINITDONE; npxexit(td); ctx->prev = pcb->pcb_save; pcb->pcb_save = fpu_kern_ctx_savefpu(ctx); pcb->pcb_flags |= PCB_KERNNPX; pcb->pcb_flags &= ~PCB_NPXINITDONE; critical_exit(); } int fpu_kern_leave(struct thread *td, struct fpu_kern_ctx *ctx) { struct pcb *pcb; pcb = td->td_pcb; if ((pcb->pcb_flags & PCB_NPXNOSAVE) != 0) { KASSERT(ctx == NULL, ("non-null ctx after FPU_KERN_NOCTX")); KASSERT(PCPU_GET(fpcurthread) == NULL, ("non-NULL fpcurthread for PCB_NPXNOSAVE")); CRITICAL_ASSERT(td); pcb->pcb_flags &= ~(PCB_NPXNOSAVE | PCB_NPXINITDONE); start_emulating(); } else { KASSERT((ctx->flags & FPU_KERN_CTX_INUSE) != 0, ("leaving not inuse ctx")); ctx->flags &= ~FPU_KERN_CTX_INUSE; if (is_fpu_kern_thread(0) && (ctx->flags & FPU_KERN_CTX_DUMMY) != 0) return (0); KASSERT((ctx->flags & FPU_KERN_CTX_DUMMY) == 0, ("dummy ctx")); critical_enter(); if (curthread == PCPU_GET(fpcurthread)) npxdrop(); pcb->pcb_save = ctx->prev; } if (pcb->pcb_save == get_pcb_user_save_pcb(pcb)) { if ((pcb->pcb_flags & PCB_NPXUSERINITDONE) != 0) { pcb->pcb_flags |= PCB_NPXINITDONE; if ((pcb->pcb_flags & PCB_KERNNPX_THR) == 0) pcb->pcb_flags &= ~PCB_KERNNPX; } else if ((pcb->pcb_flags & PCB_KERNNPX_THR) == 0) pcb->pcb_flags &= ~(PCB_NPXINITDONE | PCB_KERNNPX); } else { if ((ctx->flags & FPU_KERN_CTX_NPXINITDONE) != 0) pcb->pcb_flags |= PCB_NPXINITDONE; else pcb->pcb_flags &= ~PCB_NPXINITDONE; KASSERT(!PCB_USER_FPU(pcb), ("unpaired fpu_kern_leave")); } critical_exit(); return (0); } int fpu_kern_thread(u_int flags) { KASSERT((curthread->td_pflags & TDP_KTHREAD) != 0, ("Only kthread may use fpu_kern_thread")); KASSERT(curpcb->pcb_save == get_pcb_user_save_pcb(curpcb), ("mangled pcb_save")); KASSERT(PCB_USER_FPU(curpcb), ("recursive call")); curpcb->pcb_flags |= PCB_KERNNPX | PCB_KERNNPX_THR; return (0); } int is_fpu_kern_thread(u_int flags) { if ((curthread->td_pflags & TDP_KTHREAD) == 0) return (0); return ((curpcb->pcb_flags & PCB_KERNNPX_THR) != 0); } /* * FPU save area alloc/free/init utility routines */ union savefpu * fpu_save_area_alloc(void) { return (uma_zalloc(fpu_save_area_zone, M_WAITOK)); } void fpu_save_area_free(union savefpu *fsa) { uma_zfree(fpu_save_area_zone, fsa); } void fpu_save_area_reset(union savefpu *fsa) { bcopy(npx_initialstate, fsa, cpu_max_ext_state_size); } diff --git a/sys/i386/include/atomic.h b/sys/i386/include/atomic.h index 154144a470c6..af6d323e0396 100644 --- a/sys/i386/include/atomic.h +++ b/sys/i386/include/atomic.h @@ -1,934 +1,887 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 1998 Doug Rabson * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _MACHINE_ATOMIC_H_ #define _MACHINE_ATOMIC_H_ #ifndef _SYS_CDEFS_H_ #error this file needs sys/cdefs.h as a prerequisite #endif #include #ifdef _KERNEL #include #include #endif #ifndef __OFFSETOF_MONITORBUF /* * __OFFSETOF_MONITORBUF == __pcpu_offset(pc_monitorbuf). * * The open-coded number is used instead of the symbolic expression to * avoid a dependency on sys/pcpu.h in machine/atomic.h consumers. * An assertion in i386/vm_machdep.c ensures that the value is correct. */ #define __OFFSETOF_MONITORBUF 0x80 static __inline void __mbk(void) { __asm __volatile("lock; addl $0,%%fs:%0" : "+m" (*(u_int *)__OFFSETOF_MONITORBUF) : : "memory", "cc"); } static __inline void __mbu(void) { __asm __volatile("lock; addl $0,(%%esp)" : : : "memory", "cc"); } #endif /* * Various simple operations on memory, each of which is atomic in the * presence of interrupts and multiple processors. * * atomic_set_char(P, V) (*(u_char *)(P) |= (V)) * atomic_clear_char(P, V) (*(u_char *)(P) &= ~(V)) * atomic_add_char(P, V) (*(u_char *)(P) += (V)) * atomic_subtract_char(P, V) (*(u_char *)(P) -= (V)) * * atomic_set_short(P, V) (*(u_short *)(P) |= (V)) * atomic_clear_short(P, V) (*(u_short *)(P) &= ~(V)) * atomic_add_short(P, V) (*(u_short *)(P) += (V)) * atomic_subtract_short(P, V) (*(u_short *)(P) -= (V)) * * atomic_set_int(P, V) (*(u_int *)(P) |= (V)) * atomic_clear_int(P, V) (*(u_int *)(P) &= ~(V)) * atomic_add_int(P, V) (*(u_int *)(P) += (V)) * atomic_subtract_int(P, V) (*(u_int *)(P) -= (V)) * atomic_swap_int(P, V) (return (*(u_int *)(P)); *(u_int *)(P) = (V);) * atomic_readandclear_int(P) (return (*(u_int *)(P)); *(u_int *)(P) = 0;) * * atomic_set_long(P, V) (*(u_long *)(P) |= (V)) * atomic_clear_long(P, V) (*(u_long *)(P) &= ~(V)) * atomic_add_long(P, V) (*(u_long *)(P) += (V)) * atomic_subtract_long(P, V) (*(u_long *)(P) -= (V)) * atomic_swap_long(P, V) (return (*(u_long *)(P)); *(u_long *)(P) = (V);) * atomic_readandclear_long(P) (return (*(u_long *)(P)); *(u_long *)(P) = 0;) */ -#if !defined(__GNUCLIKE_ASM) -#define ATOMIC_ASM(NAME, TYPE, OP, CONS, V) \ -void atomic_##NAME##_##TYPE(volatile u_##TYPE *p, u_##TYPE v); \ -void atomic_##NAME##_barr_##TYPE(volatile u_##TYPE *p, u_##TYPE v) - -int atomic_cmpset_char(volatile u_char *dst, u_char expect, u_char src); -int atomic_cmpset_short(volatile u_short *dst, u_short expect, u_short src); -int atomic_cmpset_int(volatile u_int *dst, u_int expect, u_int src); -int atomic_fcmpset_char(volatile u_char *dst, u_char *expect, u_char src); -int atomic_fcmpset_short(volatile u_short *dst, u_short *expect, - u_short src); -int atomic_fcmpset_int(volatile u_int *dst, u_int *expect, u_int src); -u_int atomic_fetchadd_int(volatile u_int *p, u_int v); -int atomic_testandset_int(volatile u_int *p, u_int v); -int atomic_testandclear_int(volatile u_int *p, u_int v); -void atomic_thread_fence_acq(void); -void atomic_thread_fence_acq_rel(void); -void atomic_thread_fence_rel(void); -void atomic_thread_fence_seq_cst(void); - -#define ATOMIC_LOAD(TYPE) \ -u_##TYPE atomic_load_acq_##TYPE(volatile u_##TYPE *p) -#define ATOMIC_STORE(TYPE) \ -void atomic_store_rel_##TYPE(volatile u_##TYPE *p, u_##TYPE v) - -int atomic_cmpset_64(volatile uint64_t *, uint64_t, uint64_t); -int atomic_fcmpset_64(volatile uint64_t *, uint64_t *, uint64_t); -uint64_t atomic_load_acq_64(volatile uint64_t *); -void atomic_store_rel_64(volatile uint64_t *, uint64_t); -uint64_t atomic_swap_64(volatile uint64_t *, uint64_t); -uint64_t atomic_fetchadd_64(volatile uint64_t *, uint64_t); -void atomic_add_64(volatile uint64_t *, uint64_t); -void atomic_subtract_64(volatile uint64_t *, uint64_t); - -#else /* !__GNUCLIKE_ASM */ - /* * Always use lock prefixes. The result is slighly less optimal for * UP systems, but it matters less now, and sometimes UP is emulated * over SMP. * * The assembly is volatilized to avoid code chunk removal by the compiler. * GCC aggressively reorders operations and memory clobbering is necessary * in order to avoid that for memory barriers. */ #define ATOMIC_ASM(NAME, TYPE, OP, CONS, V) \ static __inline void \ atomic_##NAME##_##TYPE(volatile u_##TYPE *p, u_##TYPE v)\ { \ __asm __volatile("lock; " OP \ : "+m" (*p) \ : CONS (V) \ : "cc"); \ } \ \ static __inline void \ atomic_##NAME##_barr_##TYPE(volatile u_##TYPE *p, u_##TYPE v)\ { \ __asm __volatile("lock; " OP \ : "+m" (*p) \ : CONS (V) \ : "memory", "cc"); \ } \ struct __hack /* * Atomic compare and set, used by the mutex functions. * * cmpset: * if (*dst == expect) * *dst = src * * fcmpset: * if (*dst == *expect) * *dst = src * else * *expect = *dst * * Returns 0 on failure, non-zero on success. */ #define ATOMIC_CMPSET(TYPE, CONS) \ static __inline int \ atomic_cmpset_##TYPE(volatile u_##TYPE *dst, u_##TYPE expect, u_##TYPE src) \ { \ u_char res; \ \ __asm __volatile( \ " lock; cmpxchg %3,%1 ; " \ " sete %0 ; " \ "# atomic_cmpset_" #TYPE " " \ : "=q" (res), /* 0 */ \ "+m" (*dst), /* 1 */ \ "+a" (expect) /* 2 */ \ : CONS (src) /* 3 */ \ : "memory", "cc"); \ return (res); \ } \ \ static __inline int \ atomic_fcmpset_##TYPE(volatile u_##TYPE *dst, u_##TYPE *expect, u_##TYPE src) \ { \ u_char res; \ \ __asm __volatile( \ " lock; cmpxchg %3,%1 ; " \ " sete %0 ; " \ "# atomic_fcmpset_" #TYPE " " \ : "=q" (res), /* 0 */ \ "+m" (*dst), /* 1 */ \ "+a" (*expect) /* 2 */ \ : CONS (src) /* 3 */ \ : "memory", "cc"); \ return (res); \ } ATOMIC_CMPSET(char, "q"); ATOMIC_CMPSET(short, "r"); ATOMIC_CMPSET(int, "r"); /* * Atomically add the value of v to the integer pointed to by p and return * the previous value of *p. */ static __inline u_int atomic_fetchadd_int(volatile u_int *p, u_int v) { __asm __volatile( " lock; xaddl %0,%1 ; " "# atomic_fetchadd_int" : "+r" (v), /* 0 */ "+m" (*p) /* 1 */ : : "cc"); return (v); } static __inline int atomic_testandset_int(volatile u_int *p, u_int v) { u_char res; __asm __volatile( " lock; btsl %2,%1 ; " " setc %0 ; " "# atomic_testandset_int" : "=q" (res), /* 0 */ "+m" (*p) /* 1 */ : "Ir" (v & 0x1f) /* 2 */ : "cc"); return (res); } static __inline int atomic_testandclear_int(volatile u_int *p, u_int v) { u_char res; __asm __volatile( " lock; btrl %2,%1 ; " " setc %0 ; " "# atomic_testandclear_int" : "=q" (res), /* 0 */ "+m" (*p) /* 1 */ : "Ir" (v & 0x1f) /* 2 */ : "cc"); return (res); } /* * We assume that a = b will do atomic loads and stores. Due to the * IA32 memory model, a simple store guarantees release semantics. * * However, a load may pass a store if they are performed on distinct * addresses, so we need Store/Load barrier for sequentially * consistent fences in SMP kernels. We use "lock addl $0,mem" for a * Store/Load barrier, as recommended by the AMD Software Optimization * Guide, and not mfence. In the kernel, we use a private per-cpu * cache line for "mem", to avoid introducing false data * dependencies. In user space, we use the word at the top of the * stack. * * For UP kernels, however, the memory of the single processor is * always consistent, so we only need to stop the compiler from * reordering accesses in a way that violates the semantics of acquire * and release. */ #if defined(_KERNEL) #define __storeload_barrier() __mbk() #else /* !_KERNEL */ #define __storeload_barrier() __mbu() #endif /* _KERNEL*/ #define ATOMIC_LOAD(TYPE) \ static __inline u_##TYPE \ atomic_load_acq_##TYPE(volatile u_##TYPE *p) \ { \ u_##TYPE res; \ \ res = *p; \ __compiler_membar(); \ return (res); \ } \ struct __hack #define ATOMIC_STORE(TYPE) \ static __inline void \ atomic_store_rel_##TYPE(volatile u_##TYPE *p, u_##TYPE v) \ { \ \ __compiler_membar(); \ *p = v; \ } \ struct __hack static __inline void atomic_thread_fence_acq(void) { __compiler_membar(); } static __inline void atomic_thread_fence_rel(void) { __compiler_membar(); } static __inline void atomic_thread_fence_acq_rel(void) { __compiler_membar(); } static __inline void atomic_thread_fence_seq_cst(void) { __storeload_barrier(); } #ifdef _KERNEL #ifdef WANT_FUNCTIONS int atomic_cmpset_64_i386(volatile uint64_t *, uint64_t, uint64_t); int atomic_cmpset_64_i586(volatile uint64_t *, uint64_t, uint64_t); uint64_t atomic_load_acq_64_i386(volatile uint64_t *); uint64_t atomic_load_acq_64_i586(volatile uint64_t *); void atomic_store_rel_64_i386(volatile uint64_t *, uint64_t); void atomic_store_rel_64_i586(volatile uint64_t *, uint64_t); uint64_t atomic_swap_64_i386(volatile uint64_t *, uint64_t); uint64_t atomic_swap_64_i586(volatile uint64_t *, uint64_t); #endif /* I486 does not support SMP or CMPXCHG8B. */ static __inline int atomic_cmpset_64_i386(volatile uint64_t *dst, uint64_t expect, uint64_t src) { volatile uint32_t *p; u_char res; p = (volatile uint32_t *)dst; __asm __volatile( " pushfl ; " " cli ; " " xorl %1,%%eax ; " " xorl %2,%%edx ; " " orl %%edx,%%eax ; " " jne 1f ; " " movl %4,%1 ; " " movl %5,%2 ; " "1: " " sete %3 ; " " popfl" : "+A" (expect), /* 0 */ "+m" (*p), /* 1 */ "+m" (*(p + 1)), /* 2 */ "=q" (res) /* 3 */ : "r" ((uint32_t)src), /* 4 */ "r" ((uint32_t)(src >> 32)) /* 5 */ : "memory", "cc"); return (res); } static __inline int atomic_fcmpset_64_i386(volatile uint64_t *dst, uint64_t *expect, uint64_t src) { if (atomic_cmpset_64_i386(dst, *expect, src)) { return (1); } else { *expect = *dst; return (0); } } static __inline uint64_t atomic_load_acq_64_i386(volatile uint64_t *p) { volatile uint32_t *q; uint64_t res; q = (volatile uint32_t *)p; __asm __volatile( " pushfl ; " " cli ; " " movl %1,%%eax ; " " movl %2,%%edx ; " " popfl" : "=&A" (res) /* 0 */ : "m" (*q), /* 1 */ "m" (*(q + 1)) /* 2 */ : "memory"); return (res); } static __inline void atomic_store_rel_64_i386(volatile uint64_t *p, uint64_t v) { volatile uint32_t *q; q = (volatile uint32_t *)p; __asm __volatile( " pushfl ; " " cli ; " " movl %%eax,%0 ; " " movl %%edx,%1 ; " " popfl" : "=m" (*q), /* 0 */ "=m" (*(q + 1)) /* 1 */ : "A" (v) /* 2 */ : "memory"); } static __inline uint64_t atomic_swap_64_i386(volatile uint64_t *p, uint64_t v) { volatile uint32_t *q; uint64_t res; q = (volatile uint32_t *)p; __asm __volatile( " pushfl ; " " cli ; " " movl %1,%%eax ; " " movl %2,%%edx ; " " movl %4,%2 ; " " movl %3,%1 ; " " popfl" : "=&A" (res), /* 0 */ "+m" (*q), /* 1 */ "+m" (*(q + 1)) /* 2 */ : "r" ((uint32_t)v), /* 3 */ "r" ((uint32_t)(v >> 32))); /* 4 */ return (res); } static __inline int atomic_cmpset_64_i586(volatile uint64_t *dst, uint64_t expect, uint64_t src) { u_char res; __asm __volatile( " lock; cmpxchg8b %1 ; " " sete %0" : "=q" (res), /* 0 */ "+m" (*dst), /* 1 */ "+A" (expect) /* 2 */ : "b" ((uint32_t)src), /* 3 */ "c" ((uint32_t)(src >> 32)) /* 4 */ : "memory", "cc"); return (res); } static __inline int atomic_fcmpset_64_i586(volatile uint64_t *dst, uint64_t *expect, uint64_t src) { u_char res; __asm __volatile( " lock; cmpxchg8b %1 ; " " sete %0" : "=q" (res), /* 0 */ "+m" (*dst), /* 1 */ "+A" (*expect) /* 2 */ : "b" ((uint32_t)src), /* 3 */ "c" ((uint32_t)(src >> 32)) /* 4 */ : "memory", "cc"); return (res); } static __inline uint64_t atomic_load_acq_64_i586(volatile uint64_t *p) { uint64_t res; __asm __volatile( " movl %%ebx,%%eax ; " " movl %%ecx,%%edx ; " " lock; cmpxchg8b %1" : "=&A" (res), /* 0 */ "+m" (*p) /* 1 */ : : "memory", "cc"); return (res); } static __inline void atomic_store_rel_64_i586(volatile uint64_t *p, uint64_t v) { __asm __volatile( " movl %%eax,%%ebx ; " " movl %%edx,%%ecx ; " "1: " " lock; cmpxchg8b %0 ; " " jne 1b" : "+m" (*p), /* 0 */ "+A" (v) /* 1 */ : : "ebx", "ecx", "memory", "cc"); } static __inline uint64_t atomic_swap_64_i586(volatile uint64_t *p, uint64_t v) { __asm __volatile( " movl %%eax,%%ebx ; " " movl %%edx,%%ecx ; " "1: " " lock; cmpxchg8b %0 ; " " jne 1b" : "+m" (*p), /* 0 */ "+A" (v) /* 1 */ : : "ebx", "ecx", "memory", "cc"); return (v); } static __inline int atomic_cmpset_64(volatile uint64_t *dst, uint64_t expect, uint64_t src) { if ((cpu_feature & CPUID_CX8) == 0) return (atomic_cmpset_64_i386(dst, expect, src)); else return (atomic_cmpset_64_i586(dst, expect, src)); } static __inline int atomic_fcmpset_64(volatile uint64_t *dst, uint64_t *expect, uint64_t src) { if ((cpu_feature & CPUID_CX8) == 0) return (atomic_fcmpset_64_i386(dst, expect, src)); else return (atomic_fcmpset_64_i586(dst, expect, src)); } static __inline uint64_t atomic_load_acq_64(volatile uint64_t *p) { if ((cpu_feature & CPUID_CX8) == 0) return (atomic_load_acq_64_i386(p)); else return (atomic_load_acq_64_i586(p)); } static __inline void atomic_store_rel_64(volatile uint64_t *p, uint64_t v) { if ((cpu_feature & CPUID_CX8) == 0) atomic_store_rel_64_i386(p, v); else atomic_store_rel_64_i586(p, v); } static __inline uint64_t atomic_swap_64(volatile uint64_t *p, uint64_t v) { if ((cpu_feature & CPUID_CX8) == 0) return (atomic_swap_64_i386(p, v)); else return (atomic_swap_64_i586(p, v)); } static __inline uint64_t atomic_fetchadd_64(volatile uint64_t *p, uint64_t v) { for (;;) { uint64_t t = *p; if (atomic_cmpset_64(p, t, t + v)) return (t); } } static __inline void atomic_add_64(volatile uint64_t *p, uint64_t v) { uint64_t t; for (;;) { t = *p; if (atomic_cmpset_64(p, t, t + v)) break; } } static __inline void atomic_subtract_64(volatile uint64_t *p, uint64_t v) { uint64_t t; for (;;) { t = *p; if (atomic_cmpset_64(p, t, t - v)) break; } } #endif /* _KERNEL */ -#endif /* !__GNUCLIKE_ASM */ - ATOMIC_ASM(set, char, "orb %b1,%0", "iq", v); ATOMIC_ASM(clear, char, "andb %b1,%0", "iq", ~v); ATOMIC_ASM(add, char, "addb %b1,%0", "iq", v); ATOMIC_ASM(subtract, char, "subb %b1,%0", "iq", v); ATOMIC_ASM(set, short, "orw %w1,%0", "ir", v); ATOMIC_ASM(clear, short, "andw %w1,%0", "ir", ~v); ATOMIC_ASM(add, short, "addw %w1,%0", "ir", v); ATOMIC_ASM(subtract, short, "subw %w1,%0", "ir", v); ATOMIC_ASM(set, int, "orl %1,%0", "ir", v); ATOMIC_ASM(clear, int, "andl %1,%0", "ir", ~v); ATOMIC_ASM(add, int, "addl %1,%0", "ir", v); ATOMIC_ASM(subtract, int, "subl %1,%0", "ir", v); ATOMIC_ASM(set, long, "orl %1,%0", "ir", v); ATOMIC_ASM(clear, long, "andl %1,%0", "ir", ~v); ATOMIC_ASM(add, long, "addl %1,%0", "ir", v); ATOMIC_ASM(subtract, long, "subl %1,%0", "ir", v); #define ATOMIC_LOADSTORE(TYPE) \ ATOMIC_LOAD(TYPE); \ ATOMIC_STORE(TYPE) ATOMIC_LOADSTORE(char); ATOMIC_LOADSTORE(short); ATOMIC_LOADSTORE(int); ATOMIC_LOADSTORE(long); #undef ATOMIC_ASM #undef ATOMIC_LOAD #undef ATOMIC_STORE #undef ATOMIC_LOADSTORE #ifndef WANT_FUNCTIONS static __inline int atomic_cmpset_long(volatile u_long *dst, u_long expect, u_long src) { return (atomic_cmpset_int((volatile u_int *)dst, (u_int)expect, (u_int)src)); } static __inline int atomic_fcmpset_long(volatile u_long *dst, u_long *expect, u_long src) { return (atomic_fcmpset_int((volatile u_int *)dst, (u_int *)expect, (u_int)src)); } static __inline u_long atomic_fetchadd_long(volatile u_long *p, u_long v) { return (atomic_fetchadd_int((volatile u_int *)p, (u_int)v)); } static __inline int atomic_testandset_long(volatile u_long *p, u_int v) { return (atomic_testandset_int((volatile u_int *)p, v)); } static __inline int atomic_testandclear_long(volatile u_long *p, u_int v) { return (atomic_testandclear_int((volatile u_int *)p, v)); } /* Read the current value and store a new value in the destination. */ -#ifdef __GNUCLIKE_ASM - static __inline u_int atomic_swap_int(volatile u_int *p, u_int v) { __asm __volatile( " xchgl %1,%0 ; " "# atomic_swap_int" : "+r" (v), /* 0 */ "+m" (*p)); /* 1 */ return (v); } static __inline u_long atomic_swap_long(volatile u_long *p, u_long v) { return (atomic_swap_int((volatile u_int *)p, (u_int)v)); } -#else /* !__GNUCLIKE_ASM */ - -u_int atomic_swap_int(volatile u_int *p, u_int v); -u_long atomic_swap_long(volatile u_long *p, u_long v); - -#endif /* __GNUCLIKE_ASM */ - #define atomic_set_acq_char atomic_set_barr_char #define atomic_set_rel_char atomic_set_barr_char #define atomic_clear_acq_char atomic_clear_barr_char #define atomic_clear_rel_char atomic_clear_barr_char #define atomic_add_acq_char atomic_add_barr_char #define atomic_add_rel_char atomic_add_barr_char #define atomic_subtract_acq_char atomic_subtract_barr_char #define atomic_subtract_rel_char atomic_subtract_barr_char #define atomic_cmpset_acq_char atomic_cmpset_char #define atomic_cmpset_rel_char atomic_cmpset_char #define atomic_fcmpset_acq_char atomic_fcmpset_char #define atomic_fcmpset_rel_char atomic_fcmpset_char #define atomic_set_acq_short atomic_set_barr_short #define atomic_set_rel_short atomic_set_barr_short #define atomic_clear_acq_short atomic_clear_barr_short #define atomic_clear_rel_short atomic_clear_barr_short #define atomic_add_acq_short atomic_add_barr_short #define atomic_add_rel_short atomic_add_barr_short #define atomic_subtract_acq_short atomic_subtract_barr_short #define atomic_subtract_rel_short atomic_subtract_barr_short #define atomic_cmpset_acq_short atomic_cmpset_short #define atomic_cmpset_rel_short atomic_cmpset_short #define atomic_fcmpset_acq_short atomic_fcmpset_short #define atomic_fcmpset_rel_short atomic_fcmpset_short #define atomic_set_acq_int atomic_set_barr_int #define atomic_set_rel_int atomic_set_barr_int #define atomic_clear_acq_int atomic_clear_barr_int #define atomic_clear_rel_int atomic_clear_barr_int #define atomic_add_acq_int atomic_add_barr_int #define atomic_add_rel_int atomic_add_barr_int #define atomic_subtract_acq_int atomic_subtract_barr_int #define atomic_subtract_rel_int atomic_subtract_barr_int #define atomic_cmpset_acq_int atomic_cmpset_int #define atomic_cmpset_rel_int atomic_cmpset_int #define atomic_fcmpset_acq_int atomic_fcmpset_int #define atomic_fcmpset_rel_int atomic_fcmpset_int #define atomic_set_acq_long atomic_set_barr_long #define atomic_set_rel_long atomic_set_barr_long #define atomic_clear_acq_long atomic_clear_barr_long #define atomic_clear_rel_long atomic_clear_barr_long #define atomic_add_acq_long atomic_add_barr_long #define atomic_add_rel_long atomic_add_barr_long #define atomic_subtract_acq_long atomic_subtract_barr_long #define atomic_subtract_rel_long atomic_subtract_barr_long #define atomic_cmpset_acq_long atomic_cmpset_long #define atomic_cmpset_rel_long atomic_cmpset_long #define atomic_fcmpset_acq_long atomic_fcmpset_long #define atomic_fcmpset_rel_long atomic_fcmpset_long #define atomic_readandclear_int(p) atomic_swap_int(p, 0) #define atomic_readandclear_long(p) atomic_swap_long(p, 0) #define atomic_testandset_acq_long atomic_testandset_long /* Operations on 8-bit bytes. */ #define atomic_set_8 atomic_set_char #define atomic_set_acq_8 atomic_set_acq_char #define atomic_set_rel_8 atomic_set_rel_char #define atomic_clear_8 atomic_clear_char #define atomic_clear_acq_8 atomic_clear_acq_char #define atomic_clear_rel_8 atomic_clear_rel_char #define atomic_add_8 atomic_add_char #define atomic_add_acq_8 atomic_add_acq_char #define atomic_add_rel_8 atomic_add_rel_char #define atomic_subtract_8 atomic_subtract_char #define atomic_subtract_acq_8 atomic_subtract_acq_char #define atomic_subtract_rel_8 atomic_subtract_rel_char #define atomic_load_acq_8 atomic_load_acq_char #define atomic_store_rel_8 atomic_store_rel_char #define atomic_cmpset_8 atomic_cmpset_char #define atomic_cmpset_acq_8 atomic_cmpset_acq_char #define atomic_cmpset_rel_8 atomic_cmpset_rel_char #define atomic_fcmpset_8 atomic_fcmpset_char #define atomic_fcmpset_acq_8 atomic_fcmpset_acq_char #define atomic_fcmpset_rel_8 atomic_fcmpset_rel_char /* Operations on 16-bit words. */ #define atomic_set_16 atomic_set_short #define atomic_set_acq_16 atomic_set_acq_short #define atomic_set_rel_16 atomic_set_rel_short #define atomic_clear_16 atomic_clear_short #define atomic_clear_acq_16 atomic_clear_acq_short #define atomic_clear_rel_16 atomic_clear_rel_short #define atomic_add_16 atomic_add_short #define atomic_add_acq_16 atomic_add_acq_short #define atomic_add_rel_16 atomic_add_rel_short #define atomic_subtract_16 atomic_subtract_short #define atomic_subtract_acq_16 atomic_subtract_acq_short #define atomic_subtract_rel_16 atomic_subtract_rel_short #define atomic_load_acq_16 atomic_load_acq_short #define atomic_store_rel_16 atomic_store_rel_short #define atomic_cmpset_16 atomic_cmpset_short #define atomic_cmpset_acq_16 atomic_cmpset_acq_short #define atomic_cmpset_rel_16 atomic_cmpset_rel_short #define atomic_fcmpset_16 atomic_fcmpset_short #define atomic_fcmpset_acq_16 atomic_fcmpset_acq_short #define atomic_fcmpset_rel_16 atomic_fcmpset_rel_short /* Operations on 32-bit double words. */ #define atomic_set_32 atomic_set_int #define atomic_set_acq_32 atomic_set_acq_int #define atomic_set_rel_32 atomic_set_rel_int #define atomic_clear_32 atomic_clear_int #define atomic_clear_acq_32 atomic_clear_acq_int #define atomic_clear_rel_32 atomic_clear_rel_int #define atomic_add_32 atomic_add_int #define atomic_add_acq_32 atomic_add_acq_int #define atomic_add_rel_32 atomic_add_rel_int #define atomic_subtract_32 atomic_subtract_int #define atomic_subtract_acq_32 atomic_subtract_acq_int #define atomic_subtract_rel_32 atomic_subtract_rel_int #define atomic_load_acq_32 atomic_load_acq_int #define atomic_store_rel_32 atomic_store_rel_int #define atomic_cmpset_32 atomic_cmpset_int #define atomic_cmpset_acq_32 atomic_cmpset_acq_int #define atomic_cmpset_rel_32 atomic_cmpset_rel_int #define atomic_fcmpset_32 atomic_fcmpset_int #define atomic_fcmpset_acq_32 atomic_fcmpset_acq_int #define atomic_fcmpset_rel_32 atomic_fcmpset_rel_int #define atomic_swap_32 atomic_swap_int #define atomic_readandclear_32 atomic_readandclear_int #define atomic_fetchadd_32 atomic_fetchadd_int #define atomic_testandset_32 atomic_testandset_int #define atomic_testandclear_32 atomic_testandclear_int #ifdef _KERNEL /* Operations on 64-bit quad words. */ #define atomic_cmpset_acq_64 atomic_cmpset_64 #define atomic_cmpset_rel_64 atomic_cmpset_64 #define atomic_fcmpset_acq_64 atomic_fcmpset_64 #define atomic_fcmpset_rel_64 atomic_fcmpset_64 #define atomic_fetchadd_acq_64 atomic_fetchadd_64 #define atomic_fetchadd_rel_64 atomic_fetchadd_64 #define atomic_add_acq_64 atomic_add_64 #define atomic_add_rel_64 atomic_add_64 #define atomic_subtract_acq_64 atomic_subtract_64 #define atomic_subtract_rel_64 atomic_subtract_64 #define atomic_load_64 atomic_load_acq_64 #define atomic_store_64 atomic_store_rel_64 #endif /* Operations on pointers. */ #define atomic_set_ptr(p, v) \ atomic_set_int((volatile u_int *)(p), (u_int)(v)) #define atomic_set_acq_ptr(p, v) \ atomic_set_acq_int((volatile u_int *)(p), (u_int)(v)) #define atomic_set_rel_ptr(p, v) \ atomic_set_rel_int((volatile u_int *)(p), (u_int)(v)) #define atomic_clear_ptr(p, v) \ atomic_clear_int((volatile u_int *)(p), (u_int)(v)) #define atomic_clear_acq_ptr(p, v) \ atomic_clear_acq_int((volatile u_int *)(p), (u_int)(v)) #define atomic_clear_rel_ptr(p, v) \ atomic_clear_rel_int((volatile u_int *)(p), (u_int)(v)) #define atomic_add_ptr(p, v) \ atomic_add_int((volatile u_int *)(p), (u_int)(v)) #define atomic_add_acq_ptr(p, v) \ atomic_add_acq_int((volatile u_int *)(p), (u_int)(v)) #define atomic_add_rel_ptr(p, v) \ atomic_add_rel_int((volatile u_int *)(p), (u_int)(v)) #define atomic_subtract_ptr(p, v) \ atomic_subtract_int((volatile u_int *)(p), (u_int)(v)) #define atomic_subtract_acq_ptr(p, v) \ atomic_subtract_acq_int((volatile u_int *)(p), (u_int)(v)) #define atomic_subtract_rel_ptr(p, v) \ atomic_subtract_rel_int((volatile u_int *)(p), (u_int)(v)) #define atomic_load_acq_ptr(p) \ atomic_load_acq_int((volatile u_int *)(p)) #define atomic_store_rel_ptr(p, v) \ atomic_store_rel_int((volatile u_int *)(p), (v)) #define atomic_cmpset_ptr(dst, old, new) \ atomic_cmpset_int((volatile u_int *)(dst), (u_int)(old), (u_int)(new)) #define atomic_cmpset_acq_ptr(dst, old, new) \ atomic_cmpset_acq_int((volatile u_int *)(dst), (u_int)(old), \ (u_int)(new)) #define atomic_cmpset_rel_ptr(dst, old, new) \ atomic_cmpset_rel_int((volatile u_int *)(dst), (u_int)(old), \ (u_int)(new)) #define atomic_fcmpset_ptr(dst, old, new) \ atomic_fcmpset_int((volatile u_int *)(dst), (u_int *)(old), (u_int)(new)) #define atomic_fcmpset_acq_ptr(dst, old, new) \ atomic_fcmpset_acq_int((volatile u_int *)(dst), (u_int *)(old), \ (u_int)(new)) #define atomic_fcmpset_rel_ptr(dst, old, new) \ atomic_fcmpset_rel_int((volatile u_int *)(dst), (u_int *)(old), \ (u_int)(new)) #define atomic_swap_ptr(p, v) \ atomic_swap_int((volatile u_int *)(p), (u_int)(v)) #define atomic_readandclear_ptr(p) \ atomic_readandclear_int((volatile u_int *)(p)) #endif /* !WANT_FUNCTIONS */ #if defined(_KERNEL) #define mb() __mbk() #define wmb() __mbk() #define rmb() __mbk() #else #define mb() __mbu() #define wmb() __mbu() #define rmb() __mbu() #endif #endif /* !_MACHINE_ATOMIC_H_ */ diff --git a/sys/i386/include/cpufunc.h b/sys/i386/include/cpufunc.h index 79cdd3004b77..59ee9331cf9f 100644 --- a/sys/i386/include/cpufunc.h +++ b/sys/i386/include/cpufunc.h @@ -1,856 +1,856 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1993 The Regents of the University of California. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ /* * Functions to provide access to special i386 instructions. * This in included in sys/systm.h, and that file should be * used in preference to this. */ #ifndef _MACHINE_CPUFUNC_H_ #define _MACHINE_CPUFUNC_H_ #ifndef _SYS_CDEFS_H_ #error this file needs sys/cdefs.h as a prerequisite #endif struct region_descriptor; #define readb(va) (*(volatile uint8_t *) (va)) #define readw(va) (*(volatile uint16_t *) (va)) #define readl(va) (*(volatile uint32_t *) (va)) #define writeb(va, d) (*(volatile uint8_t *) (va) = (d)) #define writew(va, d) (*(volatile uint16_t *) (va) = (d)) #define writel(va, d) (*(volatile uint32_t *) (va) = (d)) -#if defined(__GNUCLIKE_ASM) && defined(__CC_SUPPORTS___INLINE) +#if defined(__CC_SUPPORTS___INLINE) static __inline void breakpoint(void) { __asm __volatile("int $3"); } static __inline __pure2 u_int bsfl(u_int mask) { u_int result; __asm("bsfl %1,%0" : "=r" (result) : "rm" (mask) : "cc"); return (result); } static __inline __pure2 u_int bsrl(u_int mask) { u_int result; __asm("bsrl %1,%0" : "=r" (result) : "rm" (mask) : "cc"); return (result); } static __inline void clflush(u_long addr) { __asm __volatile("clflush %0" : : "m" (*(char *)addr)); } static __inline void clflushopt(u_long addr) { __asm __volatile(".byte 0x66;clflush %0" : : "m" (*(char *)addr)); } static __inline void clts(void) { __asm __volatile("clts"); } static __inline void disable_intr(void) { __asm __volatile("cli" : : : "memory"); } #ifdef _KERNEL static __inline void do_cpuid(u_int ax, u_int *p) { __asm __volatile("cpuid" : "=a" (p[0]), "=b" (p[1]), "=c" (p[2]), "=d" (p[3]) : "0" (ax)); } static __inline void cpuid_count(u_int ax, u_int cx, u_int *p) { __asm __volatile("cpuid" : "=a" (p[0]), "=b" (p[1]), "=c" (p[2]), "=d" (p[3]) : "0" (ax), "c" (cx)); } #else static __inline void do_cpuid(u_int ax, u_int *p) { __asm __volatile( "pushl\t%%ebx\n\t" "cpuid\n\t" "movl\t%%ebx,%1\n\t" "popl\t%%ebx" : "=a" (p[0]), "=DS" (p[1]), "=c" (p[2]), "=d" (p[3]) : "0" (ax)); } static __inline void cpuid_count(u_int ax, u_int cx, u_int *p) { __asm __volatile( "pushl\t%%ebx\n\t" "cpuid\n\t" "movl\t%%ebx,%1\n\t" "popl\t%%ebx" : "=a" (p[0]), "=DS" (p[1]), "=c" (p[2]), "=d" (p[3]) : "0" (ax), "c" (cx)); } #endif static __inline void enable_intr(void) { __asm __volatile("sti"); } static __inline void cpu_monitor(const void *addr, u_long extensions, u_int hints) { __asm __volatile("monitor" : : "a" (addr), "c" (extensions), "d" (hints)); } static __inline void cpu_mwait(u_long extensions, u_int hints) { __asm __volatile("mwait" : : "a" (hints), "c" (extensions)); } static __inline void lfence(void) { __asm __volatile("lfence" : : : "memory"); } static __inline void mfence(void) { __asm __volatile("mfence" : : : "memory"); } static __inline void sfence(void) { __asm __volatile("sfence" : : : "memory"); } #ifdef _KERNEL #define HAVE_INLINE_FFS static __inline __pure2 int ffs(int mask) { /* * Note that gcc-2's builtin ffs would be used if we didn't declare * this inline or turn off the builtin. The builtin is faster but * broken in gcc-2.4.5 and slower but working in gcc-2.5 and later * versions. */ return (mask == 0 ? mask : (int)bsfl((u_int)mask) + 1); } #define HAVE_INLINE_FFSL static __inline __pure2 int ffsl(long mask) { return (ffs((int)mask)); } #define HAVE_INLINE_FLS static __inline __pure2 int fls(int mask) { return (mask == 0 ? mask : (int)bsrl((u_int)mask) + 1); } #define HAVE_INLINE_FLSL static __inline __pure2 int flsl(long mask) { return (fls((int)mask)); } #endif /* _KERNEL */ static __inline void halt(void) { __asm __volatile("hlt"); } static __inline u_char inb(u_int port) { u_char data; __asm __volatile("inb %w1, %0" : "=a" (data) : "Nd" (port)); return (data); } static __inline u_int inl(u_int port) { u_int data; __asm __volatile("inl %w1, %0" : "=a" (data) : "Nd" (port)); return (data); } static __inline void insb(u_int port, void *addr, size_t count) { __asm __volatile("cld; rep; insb" : "+D" (addr), "+c" (count) : "d" (port) : "memory"); } static __inline void insw(u_int port, void *addr, size_t count) { __asm __volatile("cld; rep; insw" : "+D" (addr), "+c" (count) : "d" (port) : "memory"); } static __inline void insl(u_int port, void *addr, size_t count) { __asm __volatile("cld; rep; insl" : "+D" (addr), "+c" (count) : "d" (port) : "memory"); } static __inline void invd(void) { __asm __volatile("invd"); } static __inline u_short inw(u_int port) { u_short data; __asm __volatile("inw %w1, %0" : "=a" (data) : "Nd" (port)); return (data); } static __inline void outb(u_int port, u_char data) { __asm __volatile("outb %0, %w1" : : "a" (data), "Nd" (port)); } static __inline void outl(u_int port, u_int data) { __asm __volatile("outl %0, %w1" : : "a" (data), "Nd" (port)); } static __inline void outsb(u_int port, const void *addr, size_t count) { __asm __volatile("cld; rep; outsb" : "+S" (addr), "+c" (count) : "d" (port)); } static __inline void outsw(u_int port, const void *addr, size_t count) { __asm __volatile("cld; rep; outsw" : "+S" (addr), "+c" (count) : "d" (port)); } static __inline void outsl(u_int port, const void *addr, size_t count) { __asm __volatile("cld; rep; outsl" : "+S" (addr), "+c" (count) : "d" (port)); } static __inline void outw(u_int port, u_short data) { __asm __volatile("outw %0, %w1" : : "a" (data), "Nd" (port)); } static __inline void ia32_pause(void) { __asm __volatile("pause"); } static __inline u_int read_eflags(void) { u_int ef; __asm __volatile("pushfl; popl %0" : "=r" (ef)); return (ef); } static __inline uint64_t rdmsr(u_int msr) { uint64_t rv; __asm __volatile("rdmsr" : "=A" (rv) : "c" (msr)); return (rv); } static __inline uint32_t rdmsr32(u_int msr) { uint32_t low; __asm __volatile("rdmsr" : "=a" (low) : "c" (msr) : "edx"); return (low); } static __inline uint64_t rdpmc(u_int pmc) { uint64_t rv; __asm __volatile("rdpmc" : "=A" (rv) : "c" (pmc)); return (rv); } static __inline uint64_t rdtsc(void) { uint64_t rv; __asm __volatile("rdtsc" : "=A" (rv)); return (rv); } static __inline uint64_t rdtsc_ordered_lfence(void) { lfence(); return (rdtsc()); } static __inline uint64_t rdtsc_ordered_mfence(void) { mfence(); return (rdtsc()); } static __inline uint64_t rdtscp(void) { uint64_t rv; __asm __volatile("rdtscp" : "=A" (rv) : : "ecx"); return (rv); } static __inline uint64_t rdtscp_aux(uint32_t *aux) { uint64_t rv; __asm __volatile("rdtscp" : "=A" (rv), "=c" (*aux)); return (rv); } static __inline uint32_t rdtsc32(void) { uint32_t rv; __asm __volatile("rdtsc" : "=a" (rv) : : "edx"); return (rv); } static __inline uint32_t rdtscp32(void) { uint32_t rv; __asm __volatile("rdtscp" : "=a" (rv) : : "ecx", "edx"); return (rv); } static __inline void wbinvd(void) { __asm __volatile("wbinvd"); } static __inline void write_eflags(u_int ef) { __asm __volatile("pushl %0; popfl" : : "r" (ef)); } static __inline void wrmsr(u_int msr, uint64_t newval) { __asm __volatile("wrmsr" : : "A" (newval), "c" (msr)); } static __inline void load_cr0(u_int data) { __asm __volatile("movl %0,%%cr0" : : "r" (data)); } static __inline u_int rcr0(void) { u_int data; __asm __volatile("movl %%cr0,%0" : "=r" (data)); return (data); } static __inline u_int rcr2(void) { u_int data; __asm __volatile("movl %%cr2,%0" : "=r" (data)); return (data); } static __inline void load_cr3(u_int data) { __asm __volatile("movl %0,%%cr3" : : "r" (data) : "memory"); } static __inline u_int rcr3(void) { u_int data; __asm __volatile("movl %%cr3,%0" : "=r" (data)); return (data); } static __inline void load_cr4(u_int data) { __asm __volatile("movl %0,%%cr4" : : "r" (data)); } static __inline u_int rcr4(void) { u_int data; __asm __volatile("movl %%cr4,%0" : "=r" (data)); return (data); } static __inline uint64_t rxcr(u_int reg) { u_int low, high; __asm __volatile("xgetbv" : "=a" (low), "=d" (high) : "c" (reg)); return (low | ((uint64_t)high << 32)); } static __inline void load_xcr(u_int reg, uint64_t val) { u_int low, high; low = val; high = val >> 32; __asm __volatile("xsetbv" : : "c" (reg), "a" (low), "d" (high)); } /* * Global TLB flush (except for thise for pages marked PG_G) */ static __inline void invltlb(void) { load_cr3(rcr3()); } /* * TLB flush for an individual page (even if it has PG_G). * Only works on 486+ CPUs (i386 does not have PG_G). */ static __inline void invlpg(u_int addr) { __asm __volatile("invlpg %0" : : "m" (*(char *)addr) : "memory"); } static __inline u_short rfs(void) { u_short sel; __asm __volatile("movw %%fs,%0" : "=rm" (sel)); return (sel); } static __inline uint64_t rgdt(void) { uint64_t gdtr; __asm __volatile("sgdt %0" : "=m" (gdtr)); return (gdtr); } static __inline u_short rgs(void) { u_short sel; __asm __volatile("movw %%gs,%0" : "=rm" (sel)); return (sel); } static __inline uint64_t ridt(void) { uint64_t idtr; __asm __volatile("sidt %0" : "=m" (idtr)); return (idtr); } static __inline u_short rldt(void) { u_short ldtr; __asm __volatile("sldt %0" : "=g" (ldtr)); return (ldtr); } static __inline u_short rss(void) { u_short sel; __asm __volatile("movw %%ss,%0" : "=rm" (sel)); return (sel); } static __inline u_short rtr(void) { u_short tr; __asm __volatile("str %0" : "=g" (tr)); return (tr); } static __inline void load_fs(u_short sel) { __asm __volatile("movw %0,%%fs" : : "rm" (sel)); } static __inline void load_gs(u_short sel) { __asm __volatile("movw %0,%%gs" : : "rm" (sel)); } static __inline void lidt(struct region_descriptor *addr) { __asm __volatile("lidt (%0)" : : "r" (addr)); } static __inline void lldt(u_short sel) { __asm __volatile("lldt %0" : : "r" (sel)); } static __inline void ltr(u_short sel) { __asm __volatile("ltr %0" : : "r" (sel)); } static __inline u_int rdr0(void) { u_int data; __asm __volatile("movl %%dr0,%0" : "=r" (data)); return (data); } static __inline void load_dr0(u_int dr0) { __asm __volatile("movl %0,%%dr0" : : "r" (dr0)); } static __inline u_int rdr1(void) { u_int data; __asm __volatile("movl %%dr1,%0" : "=r" (data)); return (data); } static __inline void load_dr1(u_int dr1) { __asm __volatile("movl %0,%%dr1" : : "r" (dr1)); } static __inline u_int rdr2(void) { u_int data; __asm __volatile("movl %%dr2,%0" : "=r" (data)); return (data); } static __inline void load_dr2(u_int dr2) { __asm __volatile("movl %0,%%dr2" : : "r" (dr2)); } static __inline u_int rdr3(void) { u_int data; __asm __volatile("movl %%dr3,%0" : "=r" (data)); return (data); } static __inline void load_dr3(u_int dr3) { __asm __volatile("movl %0,%%dr3" : : "r" (dr3)); } static __inline u_int rdr6(void) { u_int data; __asm __volatile("movl %%dr6,%0" : "=r" (data)); return (data); } static __inline void load_dr6(u_int dr6) { __asm __volatile("movl %0,%%dr6" : : "r" (dr6)); } static __inline u_int rdr7(void) { u_int data; __asm __volatile("movl %%dr7,%0" : "=r" (data)); return (data); } static __inline void load_dr7(u_int dr7) { __asm __volatile("movl %0,%%dr7" : : "r" (dr7)); } static __inline u_char read_cyrix_reg(u_char reg) { outb(0x22, reg); return inb(0x23); } static __inline void write_cyrix_reg(u_char reg, u_char data) { outb(0x22, reg); outb(0x23, data); } static __inline register_t intr_disable(void) { register_t eflags; eflags = read_eflags(); disable_intr(); return (eflags); } static __inline void intr_restore(register_t eflags) { write_eflags(eflags); } static __inline uint32_t rdpkru(void) { uint32_t res; __asm __volatile("rdpkru" : "=a" (res) : "c" (0) : "edx"); return (res); } static __inline void wrpkru(uint32_t mask) { __asm __volatile("wrpkru" : : "a" (mask), "c" (0), "d" (0)); } -#else /* !(__GNUCLIKE_ASM && __CC_SUPPORTS___INLINE) */ +#else /* !__CC_SUPPORTS___INLINE */ int breakpoint(void); u_int bsfl(u_int mask); u_int bsrl(u_int mask); void clflush(u_long addr); void clts(void); void cpuid_count(u_int ax, u_int cx, u_int *p); void disable_intr(void); void do_cpuid(u_int ax, u_int *p); void enable_intr(void); void halt(void); void ia32_pause(void); u_char inb(u_int port); u_int inl(u_int port); void insb(u_int port, void *addr, size_t count); void insl(u_int port, void *addr, size_t count); void insw(u_int port, void *addr, size_t count); register_t intr_disable(void); void intr_restore(register_t ef); void invd(void); void invlpg(u_int addr); void invltlb(void); u_short inw(u_int port); void lidt(struct region_descriptor *addr); void lldt(u_short sel); void load_cr0(u_int cr0); void load_cr3(u_int cr3); void load_cr4(u_int cr4); void load_dr0(u_int dr0); void load_dr1(u_int dr1); void load_dr2(u_int dr2); void load_dr3(u_int dr3); void load_dr6(u_int dr6); void load_dr7(u_int dr7); void load_fs(u_short sel); void load_gs(u_short sel); void ltr(u_short sel); void outb(u_int port, u_char data); void outl(u_int port, u_int data); void outsb(u_int port, const void *addr, size_t count); void outsl(u_int port, const void *addr, size_t count); void outsw(u_int port, const void *addr, size_t count); void outw(u_int port, u_short data); u_int rcr0(void); u_int rcr2(void); u_int rcr3(void); u_int rcr4(void); uint64_t rdmsr(u_int msr); uint64_t rdpmc(u_int pmc); u_int rdr0(void); u_int rdr1(void); u_int rdr2(void); u_int rdr3(void); u_int rdr6(void); u_int rdr7(void); uint64_t rdtsc(void); u_char read_cyrix_reg(u_char reg); u_int read_eflags(void); u_int rfs(void); uint64_t rgdt(void); u_int rgs(void); uint64_t ridt(void); u_short rldt(void); u_short rtr(void); void wbinvd(void); void write_cyrix_reg(u_char reg, u_char data); void write_eflags(u_int ef); void wrmsr(u_int msr, uint64_t newval); -#endif /* __GNUCLIKE_ASM && __CC_SUPPORTS___INLINE */ +#endif /* __CC_SUPPORTS___INLINE */ void reset_dbregs(void); #ifdef _KERNEL int rdmsr_safe(u_int msr, uint64_t *val); int wrmsr_safe(u_int msr, uint64_t newval); #endif #endif /* !_MACHINE_CPUFUNC_H_ */ diff --git a/sys/i386/include/ieeefp.h b/sys/i386/include/ieeefp.h index 1d92014e9209..133c40cb271e 100644 --- a/sys/i386/include/ieeefp.h +++ b/sys/i386/include/ieeefp.h @@ -1,161 +1,157 @@ /*- * SPDX-License-Identifier: BSD-4-Clause * * Copyright (c) 2003 Peter Wemm. * Copyright (c) 1990 Andrew Moore, Talke Studio * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: @(#) ieeefp.h 1.0 (Berkeley) 9/23/93 * $FreeBSD$ */ #ifndef _MACHINE_IEEEFP_H_ #define _MACHINE_IEEEFP_H_ /* Deprecated historical FPU control interface */ #include -#ifdef __GNUCLIKE_ASM - static __inline fp_rnd_t fpgetround(void) { unsigned short _cw; __fnstcw(&_cw); return ((fp_rnd_t)((_cw & FP_RND_FLD) >> FP_RND_OFF)); } static __inline fp_rnd_t fpsetround(fp_rnd_t _m) { fp_rnd_t _p; unsigned short _cw, _newcw; __fnstcw(&_cw); _p = (fp_rnd_t)((_cw & FP_RND_FLD) >> FP_RND_OFF); _newcw = _cw & ~FP_RND_FLD; _newcw |= (_m << FP_RND_OFF) & FP_RND_FLD; __fnldcw(_cw, _newcw); return (_p); } static __inline fp_prec_t fpgetprec(void) { unsigned short _cw; __fnstcw(&_cw); return ((fp_prec_t)((_cw & FP_PRC_FLD) >> FP_PRC_OFF)); } static __inline fp_prec_t fpsetprec(fp_prec_t _m) { fp_prec_t _p; unsigned short _cw, _newcw; __fnstcw(&_cw); _p = (fp_prec_t)((_cw & FP_PRC_FLD) >> FP_PRC_OFF); _newcw = _cw & ~FP_PRC_FLD; _newcw |= (_m << FP_PRC_OFF) & FP_PRC_FLD; __fnldcw(_cw, _newcw); return (_p); } /* * Get or set the exception mask. * Note that the x87 mask bits are inverted by the API -- a mask bit of 1 * means disable for x87 and SSE, but for fp*mask() it means enable. */ static __inline fp_except_t fpgetmask(void) { unsigned short _cw; __fnstcw(&_cw); return ((~_cw & FP_MSKS_FLD) >> FP_MSKS_OFF); } static __inline fp_except_t fpsetmask(fp_except_t _m) { fp_except_t _p; unsigned short _cw, _newcw; __fnstcw(&_cw); _p = (~_cw & FP_MSKS_FLD) >> FP_MSKS_OFF; _newcw = _cw & ~FP_MSKS_FLD; _newcw |= (~_m << FP_MSKS_OFF) & FP_MSKS_FLD; __fnldcw(_cw, _newcw); return (_p); } static __inline fp_except_t fpgetsticky(void) { unsigned _ex; unsigned short _sw; __fnstsw(&_sw); _ex = (_sw & FP_STKY_FLD) >> FP_STKY_OFF; return ((fp_except_t)_ex); } static __inline fp_except_t fpresetsticky(fp_except_t _m) { struct { unsigned _cw; unsigned _sw; unsigned _other[5]; } _env; fp_except_t _p; _m &= FP_STKY_FLD >> FP_STKY_OFF; _p = fpgetsticky(); if ((_p & ~_m) == _p) return (_p); if ((_p & ~_m) == 0) { __fnclex(); return (_p); } __fnstenv(&_env); _env._sw &= ~_m; __fldenv(&_env); return (_p); } -#endif /* __GNUCLIKE_ASM */ - #endif /* !_MACHINE_IEEEFP_H_ */ diff --git a/sys/i386/include/in_cksum.h b/sys/i386/include/in_cksum.h index 84e369cf3c81..ac6049719364 100644 --- a/sys/i386/include/in_cksum.h +++ b/sys/i386/include/in_cksum.h @@ -1,126 +1,117 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1990 The Regents of the University of California. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from tahoe: in_cksum.c 1.2 86/01/05 * from: @(#)in_cksum.c 1.3 (Berkeley) 1/19/91 * from: Id: in_cksum.c,v 1.8 1995/12/03 18:35:19 bde Exp * $FreeBSD$ */ #ifndef _MACHINE_IN_CKSUM_H_ #define _MACHINE_IN_CKSUM_H_ 1 #include #define in_cksum(m, len) in_cksum_skip(m, len, 0) /* * It it useful to have an Internet checksum routine which is inlineable * and optimized specifically for the task of computing IP header checksums * in the normal case (where there are no options and the header length is * therefore always exactly five 32-bit words. */ -#if defined(__GNUCLIKE_ASM) #if defined(IPVERSION) && (IPVERSION == 4) static __inline u_int in_cksum_hdr(const struct ip *ip) { u_int sum = 0; __asm( "addl %1, %0\n" "adcl %2, %0\n" "adcl %3, %0\n" "adcl %4, %0\n" "adcl %5, %0\n" "adcl $0, %0" : "+r" (sum) : "g" (((const u_int32_t *)ip)[0]), "g" (((const u_int32_t *)ip)[1]), "g" (((const u_int32_t *)ip)[2]), "g" (((const u_int32_t *)ip)[3]), "g" (((const u_int32_t *)ip)[4]) : "cc" ); sum = (sum & 0xffff) + (sum >> 16); if (sum > 0xffff) sum -= 0xffff; return ~sum & 0xffff; } #endif static __inline u_short in_addword(u_short sum, u_short b) { __asm( "addw %1, %0\n" "adcw $0, %0" : "+r" (sum) : "g" (b) : "cc" ); return (sum); } static __inline u_short in_pseudo(u_int sum, u_int b, u_int c) { __asm( "addl %1, %0\n" "adcl %2, %0\n" "adcl $0, %0" : "+r" (sum) : "g" (b), "g" (c) : "cc" ); sum = (sum & 0xffff) + (sum >> 16); if (sum > 0xffff) sum -= 0xffff; return (sum); } -#endif #ifdef _KERNEL #define HAVE_MD_IN_CKSUM -#if !defined(__GNUCLIKE_ASM) -#if defined(IPVERSION) && (IPVERSION == 4) -u_int in_cksum_hdr(const struct ip *ip); -#endif -u_short in_addword(u_short sum, u_short b); -u_short in_pseudo(u_int sum, u_int b, u_int c); -#endif u_short in_cksum_skip(struct mbuf *m, int len, int skip); #endif /* _KERNEL */ #endif /* _MACHINE_IN_CKSUM_H_ */ diff --git a/sys/i386/include/pcpu.h b/sys/i386/include/pcpu.h index a4c7968ea85f..c4099f04ded1 100644 --- a/sys/i386/include/pcpu.h +++ b/sys/i386/include/pcpu.h @@ -1,217 +1,217 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) Peter Wemm * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _MACHINE_PCPU_H_ #define _MACHINE_PCPU_H_ #ifndef _SYS_CDEFS_H_ #error "sys/cdefs.h is a prerequisite for this file" #endif #include #include #include #include struct monitorbuf { int idle_state; /* Used by cpu_idle_mwait. */ int stop_state; /* Used by cpustop_handler. */ char padding[128 - (2 * sizeof(int))]; }; _Static_assert(sizeof(struct monitorbuf) == 128, "2x cache line"); /* * The SMP parts are setup in pmap.c and machdep.c for the BSP, and * pmap.c and mp_machdep.c sets up the data for the AP's to "see" when * they awake. The reason for doing it via a struct is so that an * array of pointers to each CPU's data can be set up for things like * "check curproc on all other processors" */ #define PCPU_MD_FIELDS \ struct monitorbuf pc_monitorbuf __aligned(128); /* cache line */\ struct pcpu *pc_prvspace; /* Self-reference */ \ struct pmap *pc_curpmap; \ struct segment_descriptor pc_common_tssd; \ struct segment_descriptor *pc_tss_gdt; \ struct segment_descriptor *pc_fsgs_gdt; \ struct i386tss *pc_common_tssp; \ u_int pc_kesp0; \ u_int pc_trampstk; \ int pc_currentldt; \ u_int pc_acpi_id; /* ACPI CPU id */ \ u_int pc_apic_id; \ int pc_private_tss; /* Flag indicating private tss*/\ u_int pc_cmci_mask; /* MCx banks for CMCI */ \ u_int pc_vcpu_id; /* Xen vCPU ID */ \ struct mtx pc_cmap_lock; \ void *pc_cmap_pte1; \ void *pc_cmap_pte2; \ caddr_t pc_cmap_addr1; \ caddr_t pc_cmap_addr2; \ vm_offset_t pc_qmap_addr; /* KVA for temporary mappings */\ vm_offset_t pc_copyout_maddr; \ vm_offset_t pc_copyout_saddr; \ struct mtx pc_copyout_mlock; \ struct sx pc_copyout_slock; \ char *pc_copyout_buf; \ vm_offset_t pc_pmap_eh_va; \ caddr_t pc_pmap_eh_ptep; \ uint32_t pc_smp_tlb_done; /* TLB op acknowledgement */ \ uint32_t pc_ibpb_set; \ void *pc_mds_buf; \ void *pc_mds_buf64; \ uint32_t pc_pad[4]; \ uint8_t pc_mds_tmp[64]; \ u_int pc_ipi_bitmap; \ char __pad[3518] #ifdef _KERNEL #define MONITOR_STOPSTATE_RUNNING 0 #define MONITOR_STOPSTATE_STOPPED 1 -#if defined(__GNUCLIKE_ASM) && defined(__GNUCLIKE___TYPEOF) +#if defined(__GNUCLIKE___TYPEOF) /* * Evaluates to the byte offset of the per-cpu variable name. */ #define __pcpu_offset(name) \ __offsetof(struct pcpu, name) /* * Evaluates to the type of the per-cpu variable name. */ #define __pcpu_type(name) \ __typeof(((struct pcpu *)0)->name) /* * Evaluates to the address of the per-cpu variable name. */ #define __PCPU_PTR(name) __extension__ ({ \ __pcpu_type(name) *__p; \ \ __asm __volatile("movl %%fs:%1,%0; addl %2,%0" \ : "=r" (__p) \ : "m" (*(struct pcpu *)(__pcpu_offset(pc_prvspace))), \ "i" (__pcpu_offset(name))); \ \ __p; \ }) /* * Evaluates to the value of the per-cpu variable name. */ #define __PCPU_GET(name) __extension__ ({ \ __pcpu_type(name) __res; \ struct __s { \ u_char __b[MIN(sizeof(__res), 4)]; \ } __s; \ \ if (sizeof(__res) == 1 || sizeof(__res) == 2 || \ sizeof(__res) == 4) { \ __asm __volatile("mov %%fs:%1,%0" \ : "=r" (__s) \ : "m" (*(struct __s *)(__pcpu_offset(name)))); \ *(struct __s *)(void *)&__res = __s; \ } else { \ __res = *__PCPU_PTR(name); \ } \ __res; \ }) /* * Adds a value of the per-cpu counter name. The implementation * must be atomic with respect to interrupts. */ #define __PCPU_ADD(name, val) do { \ __pcpu_type(name) __val; \ struct __s { \ u_char __b[MIN(sizeof(__val), 4)]; \ } __s; \ \ __val = (val); \ if (sizeof(__val) == 1 || sizeof(__val) == 2 || \ sizeof(__val) == 4) { \ __s = *(struct __s *)(void *)&__val; \ __asm __volatile("add %1,%%fs:%0" \ : "=m" (*(struct __s *)(__pcpu_offset(name))) \ : "r" (__s)); \ } else \ *__PCPU_PTR(name) += __val; \ } while (0) /* * Sets the value of the per-cpu variable name to value val. */ #define __PCPU_SET(name, val) do { \ __pcpu_type(name) __val; \ struct __s { \ u_char __b[MIN(sizeof(__val), 4)]; \ } __s; \ \ __val = (val); \ if (sizeof(__val) == 1 || sizeof(__val) == 2 || \ sizeof(__val) == 4) { \ __s = *(struct __s *)(void *)&__val; \ __asm __volatile("mov %1,%%fs:%0" \ : "=m" (*(struct __s *)(__pcpu_offset(name))) \ : "r" (__s)); \ } else { \ *__PCPU_PTR(name) = __val; \ } \ } while (0) #define get_pcpu() __extension__ ({ \ struct pcpu *__pc; \ \ __asm __volatile("movl %%fs:%1,%0" \ : "=r" (__pc) \ : "m" (*(struct pcpu *)(__pcpu_offset(pc_prvspace)))); \ __pc; \ }) #define PCPU_GET(member) __PCPU_GET(pc_ ## member) #define PCPU_ADD(member, val) __PCPU_ADD(pc_ ## member, val) #define PCPU_PTR(member) __PCPU_PTR(pc_ ## member) #define PCPU_SET(member, val) __PCPU_SET(pc_ ## member, val) #define IS_BSP() (PCPU_GET(cpuid) == 0) -#else /* defined(__GNUCLIKE_ASM) && defined(__GNUCLIKE___TYPEOF) */ +#else /* defined(__GNUCLIKE___TYPEOF) */ #error "this file needs to be ported to your compiler" -#endif /* __GNUCLIKE_ASM etc. */ +#endif /* __GNUCLIKE___TYPEOF */ #endif /* _KERNEL */ #endif /* !_MACHINE_PCPU_H_ */ diff --git a/sys/i386/include/profile.h b/sys/i386/include/profile.h index 8bb3c7af748c..e834abe33c37 100644 --- a/sys/i386/include/profile.h +++ b/sys/i386/include/profile.h @@ -1,98 +1,92 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1992, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)profile.h 8.1 (Berkeley) 6/11/93 * $FreeBSD$ */ #ifndef _MACHINE_PROFILE_H_ #define _MACHINE_PROFILE_H_ #ifndef _KERNEL #include #define FUNCTION_ALIGNMENT 4 #define _MCOUNT_DECL static __inline void _mcount -#ifdef __GNUCLIKE_ASM #define MCOUNT \ void \ mcount() \ { \ uintfptr_t selfpc, frompc, ecx; \ /* \ * In gcc 4.2, ecx might be used in the caller as the arg \ * pointer if the stack realignment option is set (-mstackrealign) \ * or if the caller has the force_align_arg_pointer attribute \ * (stack realignment is ALWAYS on for main). Preserve ecx \ * here. \ */ \ __asm("" : "=c" (ecx)); \ /* \ * Find the return address for mcount, \ * and the return address for mcount's caller. \ * \ * selfpc = pc pushed by call to mcount \ */ \ __asm("movl 4(%%ebp),%0" : "=r" (selfpc)); \ /* \ * frompc = pc pushed by call to mcount's caller. \ * The caller's stack frame has already been built, so %ebp is \ * the caller's frame pointer. The caller's raddr is in the \ * caller's frame following the caller's caller's frame pointer.\ */ \ __asm("movl (%%ebp),%0" : "=r" (frompc)); \ frompc = ((uintfptr_t *)frompc)[1]; \ _mcount(frompc, selfpc); \ __asm("" : : "c" (ecx)); \ } -#else /* !__GNUCLIKE_ASM */ -#define MCOUNT -#endif /* __GNUCLIKE_ASM */ typedef u_int uintfptr_t; /* * An unsigned integral type that can hold non-negative difference between * function pointers. */ typedef u_int fptrdiff_t; __BEGIN_DECLS -#ifdef __GNUCLIKE_ASM void mcount(void) __asm(".mcount"); -#endif __END_DECLS #endif /* !_KERNEL */ #endif /* !_MACHINE_PROFILE_H_ */ diff --git a/sys/x86/include/bus.h b/sys/x86/include/bus.h index 9522e5db7c78..ccd4ba26e387 100644 --- a/sys/x86/include/bus.h +++ b/sys/x86/include/bus.h @@ -1,1126 +1,1084 @@ /*- * SPDX-License-Identifier: BSD-3-Clause AND BSD-2-Clause-NetBSDE * * Copyright (c) KATO Takenori, 1999. * * All rights reserved. Unpublished rights reserved under the copyright * laws of Japan. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer as * the first lines of this file unmodified. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD$ */ /* $NetBSD: bus.h,v 1.12 1997/10/01 08:25:15 fvdl Exp $ */ /*- * Copyright (c) 1996, 1997 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, * NASA Ames Research Center. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ /*- * Copyright (c) 1996 Charles M. Hannum. All rights reserved. * Copyright (c) 1996 Christopher G. Demetriou. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by Christopher G. Demetriou * for the NetBSD Project. * 4. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef _MACHINE_BUS_H_ #define _MACHINE_BUS_H_ #include #include #include -#ifndef __GNUCLIKE_ASM -#error "no assembler code for your compiler" -#endif - /* * Values for the x86 bus space tag, not to be used directly by MI code. */ #define X86_BUS_SPACE_IO 0 /* space is i/o space */ #define X86_BUS_SPACE_MEM 1 /* space is mem space */ #define BUS_SPACE_MAXSIZE_24BIT 0xFFFFFF #define BUS_SPACE_MAXSIZE_32BIT 0xFFFFFFFF #if defined(__amd64__) #define BUS_SPACE_MAXSIZE 0xFFFFFFFFFFFFFFFFULL #else #define BUS_SPACE_MAXSIZE 0xFFFFFFFF #endif #define BUS_SPACE_MAXADDR_24BIT 0xFFFFFF #define BUS_SPACE_MAXADDR_32BIT 0xFFFFFFFF #if defined(__amd64__) || defined(PAE) #define BUS_SPACE_MAXADDR_48BIT 0xFFFFFFFFFFFFULL #define BUS_SPACE_MAXADDR 0xFFFFFFFFFFFFFFFFULL #else #define BUS_SPACE_MAXADDR 0xFFFFFFFF #endif #define BUS_SPACE_INVALID_DATA (~0) #define BUS_SPACE_UNRESTRICTED (~0) #define BUS_SPACE_BARRIER_READ 0x01 /* force read barrier */ #define BUS_SPACE_BARRIER_WRITE 0x02 /* force write barrier */ #if defined(SAN_NEEDS_INTERCEPTORS) && !defined(SAN_RUNTIME) #include #else /* * Map a region of device bus space into CPU virtual address space. */ int bus_space_map(bus_space_tag_t tag, bus_addr_t addr, bus_size_t size, int flags, bus_space_handle_t *bshp); /* * Unmap a region of device bus space. */ void bus_space_unmap(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t size); /* * Get a new handle for a subregion of an already-mapped area of bus space. */ static __inline int bus_space_subregion(bus_space_tag_t t, bus_space_handle_t bsh, bus_size_t offset, bus_size_t size, bus_space_handle_t *nbshp); static __inline int bus_space_subregion(bus_space_tag_t t __unused, bus_space_handle_t bsh, bus_size_t offset, bus_size_t size __unused, bus_space_handle_t *nbshp) { *nbshp = bsh + offset; return (0); } /* * Allocate a region of memory that is accessible to devices in bus space. */ int bus_space_alloc(bus_space_tag_t t, bus_addr_t rstart, bus_addr_t rend, bus_size_t size, bus_size_t align, bus_size_t boundary, int flags, bus_addr_t *addrp, bus_space_handle_t *bshp); /* * Free a region of bus space accessible memory. */ static __inline void bus_space_free(bus_space_tag_t t, bus_space_handle_t bsh, bus_size_t size); static __inline void bus_space_free(bus_space_tag_t t __unused, bus_space_handle_t bsh __unused, bus_size_t size __unused) { } /* * Read a 1, 2, 4, or 8 byte quantity from bus space * described by tag/handle/offset. */ static __inline u_int8_t bus_space_read_1(bus_space_tag_t tag, bus_space_handle_t handle, bus_size_t offset); static __inline u_int16_t bus_space_read_2(bus_space_tag_t tag, bus_space_handle_t handle, bus_size_t offset); static __inline u_int32_t bus_space_read_4(bus_space_tag_t tag, bus_space_handle_t handle, bus_size_t offset); #ifdef __amd64__ static __inline uint64_t bus_space_read_8(bus_space_tag_t tag, bus_space_handle_t handle, bus_size_t offset); #endif static __inline u_int8_t bus_space_read_1(bus_space_tag_t tag, bus_space_handle_t handle, bus_size_t offset) { if (tag == X86_BUS_SPACE_IO) return (inb(handle + offset)); return (*(volatile u_int8_t *)(handle + offset)); } static __inline u_int16_t bus_space_read_2(bus_space_tag_t tag, bus_space_handle_t handle, bus_size_t offset) { if (tag == X86_BUS_SPACE_IO) return (inw(handle + offset)); return (*(volatile u_int16_t *)(handle + offset)); } static __inline u_int32_t bus_space_read_4(bus_space_tag_t tag, bus_space_handle_t handle, bus_size_t offset) { if (tag == X86_BUS_SPACE_IO) return (inl(handle + offset)); return (*(volatile u_int32_t *)(handle + offset)); } #ifdef __amd64__ static __inline uint64_t bus_space_read_8(bus_space_tag_t tag, bus_space_handle_t handle, bus_size_t offset) { if (tag == X86_BUS_SPACE_IO) /* No 8 byte IO space access on x86 */ return (BUS_SPACE_INVALID_DATA); return (*(volatile uint64_t *)(handle + offset)); } #endif /* * Read `count' 1, 2, 4, or 8 byte quantities from bus space * described by tag/handle/offset and copy into buffer provided. */ static __inline void bus_space_read_multi_1(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, u_int8_t *addr, size_t count); static __inline void bus_space_read_multi_2(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, u_int16_t *addr, size_t count); static __inline void bus_space_read_multi_4(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, u_int32_t *addr, size_t count); static __inline void bus_space_read_multi_1(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, u_int8_t *addr, size_t count) { if (tag == X86_BUS_SPACE_IO) insb(bsh + offset, addr, count); else { -#ifdef __GNUCLIKE_ASM __asm __volatile(" \n\ 1: movb (%2),%%al \n\ stosb \n\ loop 1b" : "=D" (addr), "=c" (count) : "r" (bsh + offset), "0" (addr), "1" (count) : "%eax", "memory"); -#endif } } static __inline void bus_space_read_multi_2(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, u_int16_t *addr, size_t count) { if (tag == X86_BUS_SPACE_IO) insw(bsh + offset, addr, count); else { -#ifdef __GNUCLIKE_ASM __asm __volatile(" \n\ 1: movw (%2),%%ax \n\ stosw \n\ loop 1b" : "=D" (addr), "=c" (count) : "r" (bsh + offset), "0" (addr), "1" (count) : "%eax", "memory"); -#endif } } static __inline void bus_space_read_multi_4(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, u_int32_t *addr, size_t count) { if (tag == X86_BUS_SPACE_IO) insl(bsh + offset, addr, count); else { -#ifdef __GNUCLIKE_ASM __asm __volatile(" \n\ 1: movl (%2),%%eax \n\ stosl \n\ loop 1b" : "=D" (addr), "=c" (count) : "r" (bsh + offset), "0" (addr), "1" (count) : "%eax", "memory"); -#endif } } #if 0 /* Cause a link error for bus_space_read_multi_8 */ #define bus_space_read_multi_8 !!! bus_space_read_multi_8 unimplemented !!! #endif /* * Read `count' 1, 2, 4, or 8 byte quantities from bus space * described by tag/handle and starting at `offset' and copy into * buffer provided. */ static __inline void bus_space_read_region_1(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, u_int8_t *addr, size_t count); static __inline void bus_space_read_region_2(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, u_int16_t *addr, size_t count); static __inline void bus_space_read_region_4(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, u_int32_t *addr, size_t count); static __inline void bus_space_read_region_1(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, u_int8_t *addr, size_t count) { if (tag == X86_BUS_SPACE_IO) { int _port_ = bsh + offset; -#ifdef __GNUCLIKE_ASM __asm __volatile(" \n\ 1: inb %w2,%%al \n\ stosb \n\ incl %2 \n\ loop 1b" : "=D" (addr), "=c" (count), "=d" (_port_) : "0" (addr), "1" (count), "2" (_port_) : "%eax", "memory", "cc"); -#endif } else { bus_space_handle_t _port_ = bsh + offset; -#ifdef __GNUCLIKE_ASM __asm __volatile(" \n\ repne \n\ movsb" : "=D" (addr), "=c" (count), "=S" (_port_) : "0" (addr), "1" (count), "2" (_port_) : "memory", "cc"); -#endif } } static __inline void bus_space_read_region_2(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, u_int16_t *addr, size_t count) { if (tag == X86_BUS_SPACE_IO) { int _port_ = bsh + offset; -#ifdef __GNUCLIKE_ASM __asm __volatile(" \n\ 1: inw %w2,%%ax \n\ stosw \n\ addl $2,%2 \n\ loop 1b" : "=D" (addr), "=c" (count), "=d" (_port_) : "0" (addr), "1" (count), "2" (_port_) : "%eax", "memory", "cc"); -#endif } else { bus_space_handle_t _port_ = bsh + offset; -#ifdef __GNUCLIKE_ASM __asm __volatile(" \n\ repne \n\ movsw" : "=D" (addr), "=c" (count), "=S" (_port_) : "0" (addr), "1" (count), "2" (_port_) : "memory", "cc"); -#endif } } static __inline void bus_space_read_region_4(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, u_int32_t *addr, size_t count) { if (tag == X86_BUS_SPACE_IO) { int _port_ = bsh + offset; -#ifdef __GNUCLIKE_ASM __asm __volatile(" \n\ 1: inl %w2,%%eax \n\ stosl \n\ addl $4,%2 \n\ loop 1b" : "=D" (addr), "=c" (count), "=d" (_port_) : "0" (addr), "1" (count), "2" (_port_) : "%eax", "memory", "cc"); -#endif } else { bus_space_handle_t _port_ = bsh + offset; -#ifdef __GNUCLIKE_ASM __asm __volatile(" \n\ repne \n\ movsl" : "=D" (addr), "=c" (count), "=S" (_port_) : "0" (addr), "1" (count), "2" (_port_) : "memory", "cc"); -#endif } } #if 0 /* Cause a link error for bus_space_read_region_8 */ #define bus_space_read_region_8 !!! bus_space_read_region_8 unimplemented !!! #endif /* * Write the 1, 2, 4, or 8 byte value `value' to bus space * described by tag/handle/offset. */ static __inline void bus_space_write_1(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, u_int8_t value); static __inline void bus_space_write_2(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, u_int16_t value); static __inline void bus_space_write_4(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, u_int32_t value); #ifdef __amd64__ static __inline void bus_space_write_8(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, uint64_t value); #endif static __inline void bus_space_write_1(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, u_int8_t value) { if (tag == X86_BUS_SPACE_IO) outb(bsh + offset, value); else *(volatile u_int8_t *)(bsh + offset) = value; } static __inline void bus_space_write_2(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, u_int16_t value) { if (tag == X86_BUS_SPACE_IO) outw(bsh + offset, value); else *(volatile u_int16_t *)(bsh + offset) = value; } static __inline void bus_space_write_4(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, u_int32_t value) { if (tag == X86_BUS_SPACE_IO) outl(bsh + offset, value); else *(volatile u_int32_t *)(bsh + offset) = value; } #ifdef __amd64__ static __inline void bus_space_write_8(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, uint64_t value) { if (tag == X86_BUS_SPACE_IO) /* No 8 byte IO space access on x86 */ return; else *(volatile uint64_t *)(bsh + offset) = value; } #endif /* * Write `count' 1, 2, 4, or 8 byte quantities from the buffer * provided to bus space described by tag/handle/offset. */ static __inline void bus_space_write_multi_1(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, const u_int8_t *addr, size_t count); static __inline void bus_space_write_multi_2(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, const u_int16_t *addr, size_t count); static __inline void bus_space_write_multi_4(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, const u_int32_t *addr, size_t count); static __inline void bus_space_write_multi_1(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, const u_int8_t *addr, size_t count) { if (tag == X86_BUS_SPACE_IO) outsb(bsh + offset, addr, count); else { -#ifdef __GNUCLIKE_ASM __asm __volatile(" \n\ 1: lodsb \n\ movb %%al,(%2) \n\ loop 1b" : "=S" (addr), "=c" (count) : "r" (bsh + offset), "0" (addr), "1" (count) : "%eax", "memory", "cc"); -#endif } } static __inline void bus_space_write_multi_2(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, const u_int16_t *addr, size_t count) { if (tag == X86_BUS_SPACE_IO) outsw(bsh + offset, addr, count); else { -#ifdef __GNUCLIKE_ASM __asm __volatile(" \n\ 1: lodsw \n\ movw %%ax,(%2) \n\ loop 1b" : "=S" (addr), "=c" (count) : "r" (bsh + offset), "0" (addr), "1" (count) : "%eax", "memory", "cc"); -#endif } } static __inline void bus_space_write_multi_4(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, const u_int32_t *addr, size_t count) { if (tag == X86_BUS_SPACE_IO) outsl(bsh + offset, addr, count); else { -#ifdef __GNUCLIKE_ASM __asm __volatile(" \n\ 1: lodsl \n\ movl %%eax,(%2) \n\ loop 1b" : "=S" (addr), "=c" (count) : "r" (bsh + offset), "0" (addr), "1" (count) : "%eax", "memory", "cc"); -#endif } } #if 0 /* Cause a link error for bus_space_write_multi_8 */ #define bus_space_write_multi_8(t, h, o, a, c) \ !!! bus_space_write_multi_8 unimplemented !!! #endif /* * Write `count' 1, 2, 4, or 8 byte quantities from the buffer provided * to bus space described by tag/handle starting at `offset'. */ static __inline void bus_space_write_region_1(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, const u_int8_t *addr, size_t count); static __inline void bus_space_write_region_2(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, const u_int16_t *addr, size_t count); static __inline void bus_space_write_region_4(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, const u_int32_t *addr, size_t count); static __inline void bus_space_write_region_1(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, const u_int8_t *addr, size_t count) { if (tag == X86_BUS_SPACE_IO) { int _port_ = bsh + offset; -#ifdef __GNUCLIKE_ASM __asm __volatile(" \n\ 1: lodsb \n\ outb %%al,%w0 \n\ incl %0 \n\ loop 1b" : "=d" (_port_), "=S" (addr), "=c" (count) : "0" (_port_), "1" (addr), "2" (count) : "%eax", "memory", "cc"); -#endif } else { bus_space_handle_t _port_ = bsh + offset; -#ifdef __GNUCLIKE_ASM __asm __volatile(" \n\ repne \n\ movsb" : "=D" (_port_), "=S" (addr), "=c" (count) : "0" (_port_), "1" (addr), "2" (count) : "memory", "cc"); -#endif } } static __inline void bus_space_write_region_2(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, const u_int16_t *addr, size_t count) { if (tag == X86_BUS_SPACE_IO) { int _port_ = bsh + offset; -#ifdef __GNUCLIKE_ASM __asm __volatile(" \n\ 1: lodsw \n\ outw %%ax,%w0 \n\ addl $2,%0 \n\ loop 1b" : "=d" (_port_), "=S" (addr), "=c" (count) : "0" (_port_), "1" (addr), "2" (count) : "%eax", "memory", "cc"); -#endif } else { bus_space_handle_t _port_ = bsh + offset; -#ifdef __GNUCLIKE_ASM __asm __volatile(" \n\ repne \n\ movsw" : "=D" (_port_), "=S" (addr), "=c" (count) : "0" (_port_), "1" (addr), "2" (count) : "memory", "cc"); -#endif } } static __inline void bus_space_write_region_4(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, const u_int32_t *addr, size_t count) { if (tag == X86_BUS_SPACE_IO) { int _port_ = bsh + offset; -#ifdef __GNUCLIKE_ASM __asm __volatile(" \n\ 1: lodsl \n\ outl %%eax,%w0 \n\ addl $4,%0 \n\ loop 1b" : "=d" (_port_), "=S" (addr), "=c" (count) : "0" (_port_), "1" (addr), "2" (count) : "%eax", "memory", "cc"); -#endif } else { bus_space_handle_t _port_ = bsh + offset; -#ifdef __GNUCLIKE_ASM __asm __volatile(" \n\ repne \n\ movsl" : "=D" (_port_), "=S" (addr), "=c" (count) : "0" (_port_), "1" (addr), "2" (count) : "memory", "cc"); -#endif } } #if 0 /* Cause a link error for bus_space_write_region_8 */ #define bus_space_write_region_8 \ !!! bus_space_write_region_8 unimplemented !!! #endif /* * Write the 1, 2, 4, or 8 byte value `val' to bus space described * by tag/handle/offset `count' times. */ static __inline void bus_space_set_multi_1(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, u_int8_t value, size_t count); static __inline void bus_space_set_multi_2(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, u_int16_t value, size_t count); static __inline void bus_space_set_multi_4(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, u_int32_t value, size_t count); static __inline void bus_space_set_multi_1(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, u_int8_t value, size_t count) { bus_space_handle_t addr = bsh + offset; if (tag == X86_BUS_SPACE_IO) while (count--) outb(addr, value); else while (count--) *(volatile u_int8_t *)(addr) = value; } static __inline void bus_space_set_multi_2(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, u_int16_t value, size_t count) { bus_space_handle_t addr = bsh + offset; if (tag == X86_BUS_SPACE_IO) while (count--) outw(addr, value); else while (count--) *(volatile u_int16_t *)(addr) = value; } static __inline void bus_space_set_multi_4(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, u_int32_t value, size_t count) { bus_space_handle_t addr = bsh + offset; if (tag == X86_BUS_SPACE_IO) while (count--) outl(addr, value); else while (count--) *(volatile u_int32_t *)(addr) = value; } #if 0 /* Cause a link error for bus_space_set_multi_8 */ #define bus_space_set_multi_8 !!! bus_space_set_multi_8 unimplemented !!! #endif /* * Write `count' 1, 2, 4, or 8 byte value `val' to bus space described * by tag/handle starting at `offset'. */ static __inline void bus_space_set_region_1(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, u_int8_t value, size_t count); static __inline void bus_space_set_region_2(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, u_int16_t value, size_t count); static __inline void bus_space_set_region_4(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, u_int32_t value, size_t count); static __inline void bus_space_set_region_1(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, u_int8_t value, size_t count) { bus_space_handle_t addr = bsh + offset; if (tag == X86_BUS_SPACE_IO) for (; count != 0; count--, addr++) outb(addr, value); else for (; count != 0; count--, addr++) *(volatile u_int8_t *)(addr) = value; } static __inline void bus_space_set_region_2(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, u_int16_t value, size_t count) { bus_space_handle_t addr = bsh + offset; if (tag == X86_BUS_SPACE_IO) for (; count != 0; count--, addr += 2) outw(addr, value); else for (; count != 0; count--, addr += 2) *(volatile u_int16_t *)(addr) = value; } static __inline void bus_space_set_region_4(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, u_int32_t value, size_t count) { bus_space_handle_t addr = bsh + offset; if (tag == X86_BUS_SPACE_IO) for (; count != 0; count--, addr += 4) outl(addr, value); else for (; count != 0; count--, addr += 4) *(volatile u_int32_t *)(addr) = value; } #if 0 /* Cause a link error for bus_space_set_region_8 */ #define bus_space_set_region_8 !!! bus_space_set_region_8 unimplemented !!! #endif /* * Copy `count' 1, 2, 4, or 8 byte values from bus space starting * at tag/bsh1/off1 to bus space starting at tag/bsh2/off2. */ static __inline void bus_space_copy_region_1(bus_space_tag_t tag, bus_space_handle_t bsh1, bus_size_t off1, bus_space_handle_t bsh2, bus_size_t off2, size_t count); static __inline void bus_space_copy_region_2(bus_space_tag_t tag, bus_space_handle_t bsh1, bus_size_t off1, bus_space_handle_t bsh2, bus_size_t off2, size_t count); static __inline void bus_space_copy_region_4(bus_space_tag_t tag, bus_space_handle_t bsh1, bus_size_t off1, bus_space_handle_t bsh2, bus_size_t off2, size_t count); static __inline void bus_space_copy_region_1(bus_space_tag_t tag, bus_space_handle_t bsh1, bus_size_t off1, bus_space_handle_t bsh2, bus_size_t off2, size_t count) { bus_space_handle_t addr1 = bsh1 + off1; bus_space_handle_t addr2 = bsh2 + off2; if (tag == X86_BUS_SPACE_IO) { if (addr1 >= addr2) { /* src after dest: copy forward */ for (; count != 0; count--, addr1++, addr2++) outb(addr2, inb(addr1)); } else { /* dest after src: copy backwards */ for (addr1 += (count - 1), addr2 += (count - 1); count != 0; count--, addr1--, addr2--) outb(addr2, inb(addr1)); } } else { if (addr1 >= addr2) { /* src after dest: copy forward */ for (; count != 0; count--, addr1++, addr2++) *(volatile u_int8_t *)(addr2) = *(volatile u_int8_t *)(addr1); } else { /* dest after src: copy backwards */ for (addr1 += (count - 1), addr2 += (count - 1); count != 0; count--, addr1--, addr2--) *(volatile u_int8_t *)(addr2) = *(volatile u_int8_t *)(addr1); } } } static __inline void bus_space_copy_region_2(bus_space_tag_t tag, bus_space_handle_t bsh1, bus_size_t off1, bus_space_handle_t bsh2, bus_size_t off2, size_t count) { bus_space_handle_t addr1 = bsh1 + off1; bus_space_handle_t addr2 = bsh2 + off2; if (tag == X86_BUS_SPACE_IO) { if (addr1 >= addr2) { /* src after dest: copy forward */ for (; count != 0; count--, addr1 += 2, addr2 += 2) outw(addr2, inw(addr1)); } else { /* dest after src: copy backwards */ for (addr1 += 2 * (count - 1), addr2 += 2 * (count - 1); count != 0; count--, addr1 -= 2, addr2 -= 2) outw(addr2, inw(addr1)); } } else { if (addr1 >= addr2) { /* src after dest: copy forward */ for (; count != 0; count--, addr1 += 2, addr2 += 2) *(volatile u_int16_t *)(addr2) = *(volatile u_int16_t *)(addr1); } else { /* dest after src: copy backwards */ for (addr1 += 2 * (count - 1), addr2 += 2 * (count - 1); count != 0; count--, addr1 -= 2, addr2 -= 2) *(volatile u_int16_t *)(addr2) = *(volatile u_int16_t *)(addr1); } } } static __inline void bus_space_copy_region_4(bus_space_tag_t tag, bus_space_handle_t bsh1, bus_size_t off1, bus_space_handle_t bsh2, bus_size_t off2, size_t count) { bus_space_handle_t addr1 = bsh1 + off1; bus_space_handle_t addr2 = bsh2 + off2; if (tag == X86_BUS_SPACE_IO) { if (addr1 >= addr2) { /* src after dest: copy forward */ for (; count != 0; count--, addr1 += 4, addr2 += 4) outl(addr2, inl(addr1)); } else { /* dest after src: copy backwards */ for (addr1 += 4 * (count - 1), addr2 += 4 * (count - 1); count != 0; count--, addr1 -= 4, addr2 -= 4) outl(addr2, inl(addr1)); } } else { if (addr1 >= addr2) { /* src after dest: copy forward */ for (; count != 0; count--, addr1 += 4, addr2 += 4) *(volatile u_int32_t *)(addr2) = *(volatile u_int32_t *)(addr1); } else { /* dest after src: copy backwards */ for (addr1 += 4 * (count - 1), addr2 += 4 * (count - 1); count != 0; count--, addr1 -= 4, addr2 -= 4) *(volatile u_int32_t *)(addr2) = *(volatile u_int32_t *)(addr1); } } } #if 0 /* Cause a link error for bus_space_copy_8 */ #define bus_space_copy_region_8 !!! bus_space_copy_region_8 unimplemented !!! #endif /* * Bus read/write barrier methods. * * void bus_space_barrier(bus_space_tag_t tag, bus_space_handle_t bsh, * bus_size_t offset, bus_size_t len, int flags); * * * Note that BUS_SPACE_BARRIER_WRITE doesn't do anything other than * prevent reordering by the compiler; all Intel x86 processors currently * retire operations outside the CPU in program order. */ static __inline void bus_space_barrier(bus_space_tag_t tag __unused, bus_space_handle_t bsh __unused, bus_size_t offset __unused, bus_size_t len __unused, int flags) { -#ifdef __GNUCLIKE_ASM if (flags & BUS_SPACE_BARRIER_READ) #ifdef __amd64__ __asm __volatile("lock; addl $0,0(%%rsp)" : : : "memory"); #else __asm __volatile("lock; addl $0,0(%%esp)" : : : "memory"); #endif else __compiler_membar(); -#endif } #ifdef BUS_SPACE_NO_LEGACY #undef inb #undef outb #define inb(a) compiler_error #define inw(a) compiler_error #define inl(a) compiler_error #define outb(a, b) compiler_error #define outw(a, b) compiler_error #define outl(a, b) compiler_error #endif /* * Stream accesses are the same as normal accesses on x86; there are no * supported bus systems with an endianess different from the host one. */ #define bus_space_read_stream_1(t, h, o) bus_space_read_1((t), (h), (o)) #define bus_space_read_stream_2(t, h, o) bus_space_read_2((t), (h), (o)) #define bus_space_read_stream_4(t, h, o) bus_space_read_4((t), (h), (o)) #define bus_space_read_multi_stream_1(t, h, o, a, c) \ bus_space_read_multi_1((t), (h), (o), (a), (c)) #define bus_space_read_multi_stream_2(t, h, o, a, c) \ bus_space_read_multi_2((t), (h), (o), (a), (c)) #define bus_space_read_multi_stream_4(t, h, o, a, c) \ bus_space_read_multi_4((t), (h), (o), (a), (c)) #define bus_space_write_stream_1(t, h, o, v) \ bus_space_write_1((t), (h), (o), (v)) #define bus_space_write_stream_2(t, h, o, v) \ bus_space_write_2((t), (h), (o), (v)) #define bus_space_write_stream_4(t, h, o, v) \ bus_space_write_4((t), (h), (o), (v)) #define bus_space_write_multi_stream_1(t, h, o, a, c) \ bus_space_write_multi_1((t), (h), (o), (a), (c)) #define bus_space_write_multi_stream_2(t, h, o, a, c) \ bus_space_write_multi_2((t), (h), (o), (a), (c)) #define bus_space_write_multi_stream_4(t, h, o, a, c) \ bus_space_write_multi_4((t), (h), (o), (a), (c)) #define bus_space_set_multi_stream_1(t, h, o, v, c) \ bus_space_set_multi_1((t), (h), (o), (v), (c)) #define bus_space_set_multi_stream_2(t, h, o, v, c) \ bus_space_set_multi_2((t), (h), (o), (v), (c)) #define bus_space_set_multi_stream_4(t, h, o, v, c) \ bus_space_set_multi_4((t), (h), (o), (v), (c)) #define bus_space_read_region_stream_1(t, h, o, a, c) \ bus_space_read_region_1((t), (h), (o), (a), (c)) #define bus_space_read_region_stream_2(t, h, o, a, c) \ bus_space_read_region_2((t), (h), (o), (a), (c)) #define bus_space_read_region_stream_4(t, h, o, a, c) \ bus_space_read_region_4((t), (h), (o), (a), (c)) #define bus_space_write_region_stream_1(t, h, o, a, c) \ bus_space_write_region_1((t), (h), (o), (a), (c)) #define bus_space_write_region_stream_2(t, h, o, a, c) \ bus_space_write_region_2((t), (h), (o), (a), (c)) #define bus_space_write_region_stream_4(t, h, o, a, c) \ bus_space_write_region_4((t), (h), (o), (a), (c)) #define bus_space_set_region_stream_1(t, h, o, v, c) \ bus_space_set_region_1((t), (h), (o), (v), (c)) #define bus_space_set_region_stream_2(t, h, o, v, c) \ bus_space_set_region_2((t), (h), (o), (v), (c)) #define bus_space_set_region_stream_4(t, h, o, v, c) \ bus_space_set_region_4((t), (h), (o), (v), (c)) #define bus_space_copy_region_stream_1(t, h1, o1, h2, o2, c) \ bus_space_copy_region_1((t), (h1), (o1), (h2), (o2), (c)) #define bus_space_copy_region_stream_2(t, h1, o1, h2, o2, c) \ bus_space_copy_region_2((t), (h1), (o1), (h2), (o2), (c)) #define bus_space_copy_region_stream_4(t, h1, o1, h2, o2, c) \ bus_space_copy_region_4((t), (h1), (o1), (h2), (o2), (c)) #define BUS_PEEK_FUNC(width, type) \ static inline int \ bus_space_peek_##width(bus_space_tag_t tag, \ bus_space_handle_t hnd, bus_size_t offset, type *value) \ { \ type tmp; \ tmp = bus_space_read_##width(tag, hnd, offset); \ *value = (type)tmp; \ return (0); \ } BUS_PEEK_FUNC(1, uint8_t) BUS_PEEK_FUNC(2, uint16_t) BUS_PEEK_FUNC(4, uint32_t) #ifdef __amd64__ BUS_PEEK_FUNC(8, uint64_t) #endif #define BUS_POKE_FUNC(width, type) \ static inline int \ bus_space_poke_##width(bus_space_tag_t tag, \ bus_space_handle_t hnd, bus_size_t offset, type value) \ { \ bus_space_write_##width(tag, hnd, offset, value); \ return (0); \ } BUS_POKE_FUNC(1, uint8_t) BUS_POKE_FUNC(2, uint16_t) BUS_POKE_FUNC(4, uint32_t) #ifdef __amd64__ BUS_POKE_FUNC(8, uint64_t) #endif #endif /* !SAN_NEEDS_INTERCEPTORS && SAN_RUNTIME */ #endif /* !_MACHINE_BUS_H_ */ diff --git a/sys/x86/include/x86_ieeefp.h b/sys/x86/include/x86_ieeefp.h index 6412a7dfac1d..1bbcd7f01a96 100644 --- a/sys/x86/include/x86_ieeefp.h +++ b/sys/x86/include/x86_ieeefp.h @@ -1,152 +1,148 @@ /*- * SPDX-License-Identifier: BSD-4-Clause * * Copyright (c) 2003 Peter Wemm. * Copyright (c) 1990 Andrew Moore, Talke Studio * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: @(#) ieeefp.h 1.0 (Berkeley) 9/23/93 * $FreeBSD$ */ #ifndef _X86_X86_IEEEFP_H_ #define _X86_X86_IEEEFP_H_ /* Deprecated historical FPU control interface */ /* * IEEE floating point type, constant and function definitions. * XXX: {FP,SSE}*FLD and {FP,SSE}*OFF are undocumented pollution. */ #ifndef _SYS_CDEFS_H_ #error this file needs sys/cdefs.h as a prerequisite #endif /* * Rounding modes. */ typedef enum { FP_RN=0, /* round to nearest */ FP_RM, /* round down towards minus infinity */ FP_RP, /* round up towards plus infinity */ FP_RZ /* truncate */ } fp_rnd_t; /* * Precision (i.e., rounding precision) modes. */ typedef enum { FP_PS=0, /* 24 bit (single-precision) */ FP_PRS, /* reserved */ FP_PD, /* 53 bit (double-precision) */ FP_PE /* 64 bit (extended-precision) */ } fp_prec_t; #define fp_except_t int /* * Exception bit masks. */ #define FP_X_INV 0x01 /* invalid operation */ #define FP_X_DNML 0x02 /* denormal */ #define FP_X_DZ 0x04 /* zero divide */ #define FP_X_OFL 0x08 /* overflow */ #define FP_X_UFL 0x10 /* underflow */ #define FP_X_IMP 0x20 /* (im)precision */ #define FP_X_STK 0x40 /* stack fault */ /* * FPU control word bit-field masks. */ #define FP_MSKS_FLD 0x3f /* exception masks field */ #define FP_PRC_FLD 0x300 /* precision control field */ #define FP_RND_FLD 0xc00 /* rounding control field */ /* * FPU status word bit-field masks. */ #define FP_STKY_FLD 0x3f /* sticky flags field */ /* * FPU control word bit-field offsets (shift counts). */ #define FP_MSKS_OFF 0 /* exception masks offset */ #define FP_PRC_OFF 8 /* precision control offset */ #define FP_RND_OFF 10 /* rounding control offset */ /* * FPU status word bit-field offsets (shift counts). */ #define FP_STKY_OFF 0 /* sticky flags offset */ -#ifdef __GNUCLIKE_ASM - #define __fldcw(addr) __asm __volatile("fldcw %0" : : "m" (*(addr))) #define __fldenv(addr) __asm __volatile("fldenv %0" : : "m" (*(addr))) #define __fnclex() __asm __volatile("fnclex") #define __fnstcw(addr) __asm __volatile("fnstcw %0" : "=m" (*(addr))) #define __fnstenv(addr) __asm __volatile("fnstenv %0" : "=m" (*(addr))) #define __fnstsw(addr) __asm __volatile("fnstsw %0" : "=m" (*(addr))) #define __ldmxcsr(addr) __asm __volatile("ldmxcsr %0" : : "m" (*(addr))) #define __stmxcsr(addr) __asm __volatile("stmxcsr %0" : "=m" (*(addr))) /* * Load the control word. Be careful not to trap if there is a currently * unmasked exception (ones that will become freshly unmasked are not a * problem). This case must be handled by a save/restore of the * environment or even of the full x87 state. Accessing the environment * is very inefficient, so only do it when necessary. */ static __inline void __fnldcw(unsigned short _cw, unsigned short _newcw) { struct { unsigned _cw; unsigned _other[6]; } _env; unsigned short _sw; if ((_cw & FP_MSKS_FLD) != FP_MSKS_FLD) { __fnstsw(&_sw); if (((_sw & ~_cw) & FP_STKY_FLD) != 0) { __fnstenv(&_env); _env._cw = _newcw; __fldenv(&_env); return; } } __fldcw(&_newcw); } -#endif /* __GNUCLIKE_ASM */ - #endif/* _X86_X86_IEEEFP_H_ */ diff --git a/sys/x86/x86/identcpu.c b/sys/x86/x86/identcpu.c index cf75d00683f1..4ccbd3da311f 100644 --- a/sys/x86/x86/identcpu.c +++ b/sys/x86/x86/identcpu.c @@ -1,2674 +1,2670 @@ /*- * Copyright (c) 1992 Terrence R. Lambert. * Copyright (c) 1982, 1987, 1990 The Regents of the University of California. * Copyright (c) 1997 KATO Takenori. * All rights reserved. * * This code is derived from software contributed to Berkeley by * William Jolitz. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: Id: machdep.c,v 1.193 1996/06/18 01:22:04 bde Exp */ #include __FBSDID("$FreeBSD$"); #include "opt_cpu.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef __i386__ #define IDENTBLUE_CYRIX486 0 #define IDENTBLUE_IBMCPU 1 #define IDENTBLUE_CYRIXM2 2 static void identifycyrix(void); static void print_transmeta_info(void); #endif static u_int find_cpu_vendor_id(void); static void print_AMD_info(void); static void print_INTEL_info(void); static void print_INTEL_TLB(u_int data); static void print_hypervisor_info(void); static void print_svm_info(void); static void print_via_padlock_info(void); static void print_vmx_info(void); #ifdef __i386__ int cpu; /* Are we 386, 386sx, 486, etc? */ int cpu_class; #endif u_int cpu_feature; /* Feature flags */ u_int cpu_feature2; /* Feature flags */ u_int amd_feature; /* AMD feature flags */ u_int amd_feature2; /* AMD feature flags */ u_int amd_rascap; /* AMD RAS capabilities */ u_int amd_pminfo; /* AMD advanced power management info */ u_int amd_extended_feature_extensions; u_int via_feature_rng; /* VIA RNG features */ u_int via_feature_xcrypt; /* VIA ACE features */ u_int cpu_high; /* Highest arg to CPUID */ u_int cpu_exthigh; /* Highest arg to extended CPUID */ u_int cpu_id; /* Stepping ID */ u_int cpu_procinfo; /* HyperThreading Info / Brand Index / CLFUSH */ u_int cpu_procinfo2; /* Multicore info */ char cpu_vendor[20]; /* CPU Origin code */ u_int cpu_vendor_id; /* CPU vendor ID */ u_int cpu_fxsr; /* SSE enabled */ u_int cpu_mxcsr_mask; /* Valid bits in mxcsr */ u_int cpu_clflush_line_size = 32; u_int cpu_stdext_feature; /* %ebx */ u_int cpu_stdext_feature2; /* %ecx */ u_int cpu_stdext_feature3; /* %edx */ uint64_t cpu_ia32_arch_caps; u_int cpu_max_ext_state_size; u_int cpu_mon_mwait_flags; /* MONITOR/MWAIT flags (CPUID.05H.ECX) */ u_int cpu_mon_min_size; /* MONITOR minimum range size, bytes */ u_int cpu_mon_max_size; /* MONITOR minimum range size, bytes */ u_int cpu_maxphyaddr; /* Max phys addr width in bits */ u_int cpu_power_eax; /* 06H: Power management leaf, %eax */ u_int cpu_power_ebx; /* 06H: Power management leaf, %ebx */ u_int cpu_power_ecx; /* 06H: Power management leaf, %ecx */ u_int cpu_power_edx; /* 06H: Power management leaf, %edx */ char machine[] = MACHINE; SYSCTL_UINT(_hw, OID_AUTO, via_feature_rng, CTLFLAG_RD, &via_feature_rng, 0, "VIA RNG feature available in CPU"); SYSCTL_UINT(_hw, OID_AUTO, via_feature_xcrypt, CTLFLAG_RD, &via_feature_xcrypt, 0, "VIA xcrypt feature available in CPU"); #ifdef __amd64__ #ifdef SCTL_MASK32 extern int adaptive_machine_arch; #endif static int sysctl_hw_machine(SYSCTL_HANDLER_ARGS) { #ifdef SCTL_MASK32 static const char machine32[] = "i386"; #endif int error; #ifdef SCTL_MASK32 if ((req->flags & SCTL_MASK32) != 0 && adaptive_machine_arch) error = SYSCTL_OUT(req, machine32, sizeof(machine32)); else #endif error = SYSCTL_OUT(req, machine, sizeof(machine)); return (error); } SYSCTL_PROC(_hw, HW_MACHINE, machine, CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0, sysctl_hw_machine, "A", "Machine class"); #else SYSCTL_STRING(_hw, HW_MACHINE, machine, CTLFLAG_RD, machine, 0, "Machine class"); #endif static char cpu_model[128]; SYSCTL_STRING(_hw, HW_MODEL, model, CTLFLAG_RD | CTLFLAG_MPSAFE, cpu_model, 0, "Machine model"); static int hw_clockrate; SYSCTL_INT(_hw, OID_AUTO, clockrate, CTLFLAG_RD, &hw_clockrate, 0, "CPU instruction clock rate"); u_int hv_base; u_int hv_high; char hv_vendor[16]; SYSCTL_STRING(_hw, OID_AUTO, hv_vendor, CTLFLAG_RD | CTLFLAG_MPSAFE, hv_vendor, 0, "Hypervisor vendor"); static eventhandler_tag tsc_post_tag; static char cpu_brand[48]; #ifdef __i386__ #define MAX_BRAND_INDEX 8 static const char *cpu_brandtable[MAX_BRAND_INDEX + 1] = { NULL, /* No brand */ "Intel Celeron", "Intel Pentium III", "Intel Pentium III Xeon", NULL, NULL, NULL, NULL, "Intel Pentium 4" }; static struct { char *cpu_name; int cpu_class; } cpus[] = { { "Intel 80286", CPUCLASS_286 }, /* CPU_286 */ { "i386SX", CPUCLASS_386 }, /* CPU_386SX */ { "i386DX", CPUCLASS_386 }, /* CPU_386 */ { "i486SX", CPUCLASS_486 }, /* CPU_486SX */ { "i486DX", CPUCLASS_486 }, /* CPU_486 */ { "Pentium", CPUCLASS_586 }, /* CPU_586 */ { "Cyrix 486", CPUCLASS_486 }, /* CPU_486DLC */ { "Pentium Pro", CPUCLASS_686 }, /* CPU_686 */ { "Cyrix 5x86", CPUCLASS_486 }, /* CPU_M1SC */ { "Cyrix 6x86", CPUCLASS_486 }, /* CPU_M1 */ { "Blue Lightning", CPUCLASS_486 }, /* CPU_BLUE */ { "Cyrix 6x86MX", CPUCLASS_686 }, /* CPU_M2 */ { "NexGen 586", CPUCLASS_386 }, /* CPU_NX586 (XXX) */ { "Cyrix 486S/DX", CPUCLASS_486 }, /* CPU_CY486DX */ { "Pentium II", CPUCLASS_686 }, /* CPU_PII */ { "Pentium III", CPUCLASS_686 }, /* CPU_PIII */ { "Pentium 4", CPUCLASS_686 }, /* CPU_P4 */ }; #endif static struct { char *vendor; u_int vendor_id; } cpu_vendors[] = { { INTEL_VENDOR_ID, CPU_VENDOR_INTEL }, /* GenuineIntel */ { AMD_VENDOR_ID, CPU_VENDOR_AMD }, /* AuthenticAMD */ { HYGON_VENDOR_ID, CPU_VENDOR_HYGON }, /* HygonGenuine */ { CENTAUR_VENDOR_ID, CPU_VENDOR_CENTAUR }, /* CentaurHauls */ #ifdef __i386__ { NSC_VENDOR_ID, CPU_VENDOR_NSC }, /* Geode by NSC */ { CYRIX_VENDOR_ID, CPU_VENDOR_CYRIX }, /* CyrixInstead */ { TRANSMETA_VENDOR_ID, CPU_VENDOR_TRANSMETA }, /* GenuineTMx86 */ { SIS_VENDOR_ID, CPU_VENDOR_SIS }, /* SiS SiS SiS */ { UMC_VENDOR_ID, CPU_VENDOR_UMC }, /* UMC UMC UMC */ { NEXGEN_VENDOR_ID, CPU_VENDOR_NEXGEN }, /* NexGenDriven */ { RISE_VENDOR_ID, CPU_VENDOR_RISE }, /* RiseRiseRise */ #if 0 /* XXX CPUID 8000_0000h and 8086_0000h, not 0000_0000h */ { "TransmetaCPU", CPU_VENDOR_TRANSMETA }, #endif #endif }; void printcpuinfo(void) { u_int regs[4], i; char *brand; printf("CPU: "); #ifdef __i386__ cpu_class = cpus[cpu].cpu_class; strncpy(cpu_model, cpus[cpu].cpu_name, sizeof (cpu_model)); #else strncpy(cpu_model, "Hammer", sizeof (cpu_model)); #endif /* Check for extended CPUID information and a processor name. */ if (cpu_exthigh >= 0x80000004) { brand = cpu_brand; for (i = 0x80000002; i < 0x80000005; i++) { do_cpuid(i, regs); memcpy(brand, regs, sizeof(regs)); brand += sizeof(regs); } } switch (cpu_vendor_id) { case CPU_VENDOR_INTEL: #ifdef __i386__ if ((cpu_id & 0xf00) > 0x300) { u_int brand_index; cpu_model[0] = '\0'; switch (cpu_id & 0x3000) { case 0x1000: strcpy(cpu_model, "Overdrive "); break; case 0x2000: strcpy(cpu_model, "Dual "); break; } switch (cpu_id & 0xf00) { case 0x400: strcat(cpu_model, "i486 "); /* Check the particular flavor of 486 */ switch (cpu_id & 0xf0) { case 0x00: case 0x10: strcat(cpu_model, "DX"); break; case 0x20: strcat(cpu_model, "SX"); break; case 0x30: strcat(cpu_model, "DX2"); break; case 0x40: strcat(cpu_model, "SL"); break; case 0x50: strcat(cpu_model, "SX2"); break; case 0x70: strcat(cpu_model, "DX2 Write-Back Enhanced"); break; case 0x80: strcat(cpu_model, "DX4"); break; } break; case 0x500: /* Check the particular flavor of 586 */ strcat(cpu_model, "Pentium"); switch (cpu_id & 0xf0) { case 0x00: strcat(cpu_model, " A-step"); break; case 0x10: strcat(cpu_model, "/P5"); break; case 0x20: strcat(cpu_model, "/P54C"); break; case 0x30: strcat(cpu_model, "/P24T"); break; case 0x40: strcat(cpu_model, "/P55C"); break; case 0x70: strcat(cpu_model, "/P54C"); break; case 0x80: strcat(cpu_model, "/P55C (quarter-micron)"); break; default: /* nothing */ break; } #if defined(I586_CPU) && !defined(NO_F00F_HACK) /* * XXX - If/when Intel fixes the bug, this * should also check the version of the * CPU, not just that it's a Pentium. */ has_f00f_bug = 1; #endif break; case 0x600: /* Check the particular flavor of 686 */ switch (cpu_id & 0xf0) { case 0x00: strcat(cpu_model, "Pentium Pro A-step"); break; case 0x10: strcat(cpu_model, "Pentium Pro"); break; case 0x30: case 0x50: case 0x60: strcat(cpu_model, "Pentium II/Pentium II Xeon/Celeron"); cpu = CPU_PII; break; case 0x70: case 0x80: case 0xa0: case 0xb0: strcat(cpu_model, "Pentium III/Pentium III Xeon/Celeron"); cpu = CPU_PIII; break; default: strcat(cpu_model, "Unknown 80686"); break; } break; case 0xf00: strcat(cpu_model, "Pentium 4"); cpu = CPU_P4; break; default: strcat(cpu_model, "unknown"); break; } /* * If we didn't get a brand name from the extended * CPUID, try to look it up in the brand table. */ if (cpu_high > 0 && *cpu_brand == '\0') { brand_index = cpu_procinfo & CPUID_BRAND_INDEX; if (brand_index <= MAX_BRAND_INDEX && cpu_brandtable[brand_index] != NULL) strcpy(cpu_brand, cpu_brandtable[brand_index]); } } #else /* Please make up your mind folks! */ strcat(cpu_model, "EM64T"); #endif break; case CPU_VENDOR_AMD: /* * Values taken from AMD Processor Recognition * http://www.amd.com/K6/k6docs/pdf/20734g.pdf * (also describes ``Features'' encodings. */ strcpy(cpu_model, "AMD "); #ifdef __i386__ switch (cpu_id & 0xFF0) { case 0x410: strcat(cpu_model, "Standard Am486DX"); break; case 0x430: strcat(cpu_model, "Enhanced Am486DX2 Write-Through"); break; case 0x470: strcat(cpu_model, "Enhanced Am486DX2 Write-Back"); break; case 0x480: strcat(cpu_model, "Enhanced Am486DX4/Am5x86 Write-Through"); break; case 0x490: strcat(cpu_model, "Enhanced Am486DX4/Am5x86 Write-Back"); break; case 0x4E0: strcat(cpu_model, "Am5x86 Write-Through"); break; case 0x4F0: strcat(cpu_model, "Am5x86 Write-Back"); break; case 0x500: strcat(cpu_model, "K5 model 0"); break; case 0x510: strcat(cpu_model, "K5 model 1"); break; case 0x520: strcat(cpu_model, "K5 PR166 (model 2)"); break; case 0x530: strcat(cpu_model, "K5 PR200 (model 3)"); break; case 0x560: strcat(cpu_model, "K6"); break; case 0x570: strcat(cpu_model, "K6 266 (model 1)"); break; case 0x580: strcat(cpu_model, "K6-2"); break; case 0x590: strcat(cpu_model, "K6-III"); break; case 0x5a0: strcat(cpu_model, "Geode LX"); break; default: strcat(cpu_model, "Unknown"); break; } #else if ((cpu_id & 0xf00) == 0xf00) strcat(cpu_model, "AMD64 Processor"); else strcat(cpu_model, "Unknown"); #endif break; #ifdef __i386__ case CPU_VENDOR_CYRIX: strcpy(cpu_model, "Cyrix "); switch (cpu_id & 0xff0) { case 0x440: strcat(cpu_model, "MediaGX"); break; case 0x520: strcat(cpu_model, "6x86"); break; case 0x540: cpu_class = CPUCLASS_586; strcat(cpu_model, "GXm"); break; case 0x600: strcat(cpu_model, "6x86MX"); break; default: /* * Even though CPU supports the cpuid * instruction, it can be disabled. * Therefore, this routine supports all Cyrix * CPUs. */ switch (cyrix_did & 0xf0) { case 0x00: switch (cyrix_did & 0x0f) { case 0x00: strcat(cpu_model, "486SLC"); break; case 0x01: strcat(cpu_model, "486DLC"); break; case 0x02: strcat(cpu_model, "486SLC2"); break; case 0x03: strcat(cpu_model, "486DLC2"); break; case 0x04: strcat(cpu_model, "486SRx"); break; case 0x05: strcat(cpu_model, "486DRx"); break; case 0x06: strcat(cpu_model, "486SRx2"); break; case 0x07: strcat(cpu_model, "486DRx2"); break; case 0x08: strcat(cpu_model, "486SRu"); break; case 0x09: strcat(cpu_model, "486DRu"); break; case 0x0a: strcat(cpu_model, "486SRu2"); break; case 0x0b: strcat(cpu_model, "486DRu2"); break; default: strcat(cpu_model, "Unknown"); break; } break; case 0x10: switch (cyrix_did & 0x0f) { case 0x00: strcat(cpu_model, "486S"); break; case 0x01: strcat(cpu_model, "486S2"); break; case 0x02: strcat(cpu_model, "486Se"); break; case 0x03: strcat(cpu_model, "486S2e"); break; case 0x0a: strcat(cpu_model, "486DX"); break; case 0x0b: strcat(cpu_model, "486DX2"); break; case 0x0f: strcat(cpu_model, "486DX4"); break; default: strcat(cpu_model, "Unknown"); break; } break; case 0x20: if ((cyrix_did & 0x0f) < 8) strcat(cpu_model, "6x86"); /* Where did you get it? */ else strcat(cpu_model, "5x86"); break; case 0x30: strcat(cpu_model, "6x86"); break; case 0x40: if ((cyrix_did & 0xf000) == 0x3000) { cpu_class = CPUCLASS_586; strcat(cpu_model, "GXm"); } else strcat(cpu_model, "MediaGX"); break; case 0x50: strcat(cpu_model, "6x86MX"); break; case 0xf0: switch (cyrix_did & 0x0f) { case 0x0d: strcat(cpu_model, "Overdrive CPU"); break; case 0x0e: strcpy(cpu_model, "Texas Instruments 486SXL"); break; case 0x0f: strcat(cpu_model, "486SLC/DLC"); break; default: strcat(cpu_model, "Unknown"); break; } break; default: strcat(cpu_model, "Unknown"); break; } break; } break; case CPU_VENDOR_RISE: strcpy(cpu_model, "Rise "); switch (cpu_id & 0xff0) { case 0x500: /* 6401 and 6441 (Kirin) */ case 0x520: /* 6510 (Lynx) */ strcat(cpu_model, "mP6"); break; default: strcat(cpu_model, "Unknown"); } break; #endif case CPU_VENDOR_CENTAUR: #ifdef __i386__ switch (cpu_id & 0xff0) { case 0x540: strcpy(cpu_model, "IDT WinChip C6"); break; case 0x580: strcpy(cpu_model, "IDT WinChip 2"); break; case 0x590: strcpy(cpu_model, "IDT WinChip 3"); break; case 0x660: strcpy(cpu_model, "VIA C3 Samuel"); break; case 0x670: if (cpu_id & 0x8) strcpy(cpu_model, "VIA C3 Ezra"); else strcpy(cpu_model, "VIA C3 Samuel 2"); break; case 0x680: strcpy(cpu_model, "VIA C3 Ezra-T"); break; case 0x690: strcpy(cpu_model, "VIA C3 Nehemiah"); break; case 0x6a0: case 0x6d0: strcpy(cpu_model, "VIA C7 Esther"); break; case 0x6f0: strcpy(cpu_model, "VIA Nano"); break; default: strcpy(cpu_model, "VIA/IDT Unknown"); } #else strcpy(cpu_model, "VIA "); if ((cpu_id & 0xff0) == 0x6f0) strcat(cpu_model, "Nano Processor"); else strcat(cpu_model, "Unknown"); #endif break; #ifdef __i386__ case CPU_VENDOR_IBM: strcpy(cpu_model, "Blue Lightning CPU"); break; case CPU_VENDOR_NSC: switch (cpu_id & 0xff0) { case 0x540: strcpy(cpu_model, "Geode SC1100"); cpu = CPU_GEODE1100; break; default: strcpy(cpu_model, "Geode/NSC unknown"); break; } break; #endif case CPU_VENDOR_HYGON: strcpy(cpu_model, "Hygon "); #ifdef __i386__ strcat(cpu_model, "Unknown"); #else if ((cpu_id & 0xf00) == 0xf00) strcat(cpu_model, "AMD64 Processor"); else strcat(cpu_model, "Unknown"); #endif break; default: strcat(cpu_model, "Unknown"); break; } /* * Replace cpu_model with cpu_brand minus leading spaces if * we have one. */ brand = cpu_brand; while (*brand == ' ') ++brand; if (*brand != '\0') strcpy(cpu_model, brand); printf("%s (", cpu_model); if (tsc_freq != 0) { hw_clockrate = (tsc_freq + 5000) / 1000000; printf("%jd.%02d-MHz ", (intmax_t)(tsc_freq + 4999) / 1000000, (u_int)((tsc_freq + 4999) / 10000) % 100); } #ifdef __i386__ switch(cpu_class) { case CPUCLASS_286: printf("286"); break; case CPUCLASS_386: printf("386"); break; #if defined(I486_CPU) case CPUCLASS_486: printf("486"); break; #endif #if defined(I586_CPU) case CPUCLASS_586: printf("586"); break; #endif #if defined(I686_CPU) case CPUCLASS_686: printf("686"); break; #endif default: printf("Unknown"); /* will panic below... */ } #else printf("K8"); #endif printf("-class CPU)\n"); if (*cpu_vendor) printf(" Origin=\"%s\"", cpu_vendor); if (cpu_id) printf(" Id=0x%x", cpu_id); if (cpu_vendor_id == CPU_VENDOR_INTEL || cpu_vendor_id == CPU_VENDOR_AMD || cpu_vendor_id == CPU_VENDOR_HYGON || cpu_vendor_id == CPU_VENDOR_CENTAUR || #ifdef __i386__ cpu_vendor_id == CPU_VENDOR_TRANSMETA || cpu_vendor_id == CPU_VENDOR_RISE || cpu_vendor_id == CPU_VENDOR_NSC || (cpu_vendor_id == CPU_VENDOR_CYRIX && ((cpu_id & 0xf00) > 0x500)) || #endif 0) { printf(" Family=0x%x", CPUID_TO_FAMILY(cpu_id)); printf(" Model=0x%x", CPUID_TO_MODEL(cpu_id)); printf(" Stepping=%u", cpu_id & CPUID_STEPPING); #ifdef __i386__ if (cpu_vendor_id == CPU_VENDOR_CYRIX) printf("\n DIR=0x%04x", cyrix_did); #endif /* * AMD CPUID Specification * http://support.amd.com/us/Embedded_TechDocs/25481.pdf * * Intel Processor Identification and CPUID Instruction * http://www.intel.com/assets/pdf/appnote/241618.pdf */ if (cpu_high > 0) { /* * Here we should probably set up flags indicating * whether or not various features are available. * The interesting ones are probably VME, PSE, PAE, * and PGE. The code already assumes without bothering * to check that all CPUs >= Pentium have a TSC and * MSRs. */ printf("\n Features=0x%b", cpu_feature, "\020" "\001FPU" /* Integral FPU */ "\002VME" /* Extended VM86 mode support */ "\003DE" /* Debugging Extensions (CR4.DE) */ "\004PSE" /* 4MByte page tables */ "\005TSC" /* Timestamp counter */ "\006MSR" /* Machine specific registers */ "\007PAE" /* Physical address extension */ "\010MCE" /* Machine Check support */ "\011CX8" /* CMPEXCH8 instruction */ "\012APIC" /* SMP local APIC */ "\013oldMTRR" /* Previous implementation of MTRR */ "\014SEP" /* Fast System Call */ "\015MTRR" /* Memory Type Range Registers */ "\016PGE" /* PG_G (global bit) support */ "\017MCA" /* Machine Check Architecture */ "\020CMOV" /* CMOV instruction */ "\021PAT" /* Page attributes table */ "\022PSE36" /* 36 bit address space support */ "\023PN" /* Processor Serial number */ "\024CLFLUSH" /* Has the CLFLUSH instruction */ "\025" "\026DTS" /* Debug Trace Store */ "\027ACPI" /* ACPI support */ "\030MMX" /* MMX instructions */ "\031FXSR" /* FXSAVE/FXRSTOR */ "\032SSE" /* Streaming SIMD Extensions */ "\033SSE2" /* Streaming SIMD Extensions #2 */ "\034SS" /* Self snoop */ "\035HTT" /* Hyperthreading (see EBX bit 16-23) */ "\036TM" /* Thermal Monitor clock slowdown */ "\037IA64" /* CPU can execute IA64 instructions */ "\040PBE" /* Pending Break Enable */ ); if (cpu_feature2 != 0) { printf("\n Features2=0x%b", cpu_feature2, "\020" "\001SSE3" /* SSE3 */ "\002PCLMULQDQ" /* Carry-Less Mul Quadword */ "\003DTES64" /* 64-bit Debug Trace */ "\004MON" /* MONITOR/MWAIT Instructions */ "\005DS_CPL" /* CPL Qualified Debug Store */ "\006VMX" /* Virtual Machine Extensions */ "\007SMX" /* Safer Mode Extensions */ "\010EST" /* Enhanced SpeedStep */ "\011TM2" /* Thermal Monitor 2 */ "\012SSSE3" /* SSSE3 */ "\013CNXT-ID" /* L1 context ID available */ "\014SDBG" /* IA32 silicon debug */ "\015FMA" /* Fused Multiply Add */ "\016CX16" /* CMPXCHG16B Instruction */ "\017xTPR" /* Send Task Priority Messages*/ "\020PDCM" /* Perf/Debug Capability MSR */ "\021" "\022PCID" /* Process-context Identifiers*/ "\023DCA" /* Direct Cache Access */ "\024SSE4.1" /* SSE 4.1 */ "\025SSE4.2" /* SSE 4.2 */ "\026x2APIC" /* xAPIC Extensions */ "\027MOVBE" /* MOVBE Instruction */ "\030POPCNT" /* POPCNT Instruction */ "\031TSCDLT" /* TSC-Deadline Timer */ "\032AESNI" /* AES Crypto */ "\033XSAVE" /* XSAVE/XRSTOR States */ "\034OSXSAVE" /* OS-Enabled State Management*/ "\035AVX" /* Advanced Vector Extensions */ "\036F16C" /* Half-precision conversions */ "\037RDRAND" /* RDRAND Instruction */ "\040HV" /* Hypervisor */ ); } if (amd_feature != 0) { printf("\n AMD Features=0x%b", amd_feature, "\020" /* in hex */ "\001" /* Same */ "\002" /* Same */ "\003" /* Same */ "\004" /* Same */ "\005" /* Same */ "\006" /* Same */ "\007" /* Same */ "\010" /* Same */ "\011" /* Same */ "\012" /* Same */ "\013" /* Undefined */ "\014SYSCALL" /* Have SYSCALL/SYSRET */ "\015" /* Same */ "\016" /* Same */ "\017" /* Same */ "\020" /* Same */ "\021" /* Same */ "\022" /* Same */ "\023" /* Reserved, unknown */ "\024MP" /* Multiprocessor Capable */ "\025NX" /* Has EFER.NXE, NX */ "\026" /* Undefined */ "\027MMX+" /* AMD MMX Extensions */ "\030" /* Same */ "\031" /* Same */ "\032FFXSR" /* Fast FXSAVE/FXRSTOR */ "\033Page1GB" /* 1-GB large page support */ "\034RDTSCP" /* RDTSCP */ "\035" /* Undefined */ "\036LM" /* 64 bit long mode */ "\0373DNow!+" /* AMD 3DNow! Extensions */ "\0403DNow!" /* AMD 3DNow! */ ); } if (amd_feature2 != 0) { printf("\n AMD Features2=0x%b", amd_feature2, "\020" "\001LAHF" /* LAHF/SAHF in long mode */ "\002CMP" /* CMP legacy */ "\003SVM" /* Secure Virtual Mode */ "\004ExtAPIC" /* Extended APIC register */ "\005CR8" /* CR8 in legacy mode */ "\006ABM" /* LZCNT instruction */ "\007SSE4A" /* SSE4A */ "\010MAS" /* Misaligned SSE mode */ "\011Prefetch" /* 3DNow! Prefetch/PrefetchW */ "\012OSVW" /* OS visible workaround */ "\013IBS" /* Instruction based sampling */ "\014XOP" /* XOP extended instructions */ "\015SKINIT" /* SKINIT/STGI */ "\016WDT" /* Watchdog timer */ "\017" "\020LWP" /* Lightweight Profiling */ "\021FMA4" /* 4-operand FMA instructions */ "\022TCE" /* Translation Cache Extension */ "\023" "\024NodeId" /* NodeId MSR support */ "\025" "\026TBM" /* Trailing Bit Manipulation */ "\027Topology" /* Topology Extensions */ "\030PCXC" /* Core perf count */ "\031PNXC" /* NB perf count */ "\032" "\033DBE" /* Data Breakpoint extension */ "\034PTSC" /* Performance TSC */ "\035PL2I" /* L2I perf count */ "\036MWAITX" /* MONITORX/MWAITX instructions */ "\037ADMSKX" /* Address mask extension */ "\040" ); } if (cpu_stdext_feature != 0) { printf("\n Structured Extended Features=0x%b", cpu_stdext_feature, "\020" /* RDFSBASE/RDGSBASE/WRFSBASE/WRGSBASE */ "\001FSGSBASE" "\002TSCADJ" "\003SGX" /* Bit Manipulation Instructions */ "\004BMI1" /* Hardware Lock Elision */ "\005HLE" /* Advanced Vector Instructions 2 */ "\006AVX2" /* FDP_EXCPTN_ONLY */ "\007FDPEXC" /* Supervisor Mode Execution Prot. */ "\010SMEP" /* Bit Manipulation Instructions */ "\011BMI2" "\012ERMS" /* Invalidate Processor Context ID */ "\013INVPCID" /* Restricted Transactional Memory */ "\014RTM" "\015PQM" "\016NFPUSG" /* Intel Memory Protection Extensions */ "\017MPX" "\020PQE" /* AVX512 Foundation */ "\021AVX512F" "\022AVX512DQ" /* Enhanced NRBG */ "\023RDSEED" /* ADCX + ADOX */ "\024ADX" /* Supervisor Mode Access Prevention */ "\025SMAP" "\026AVX512IFMA" /* Formerly PCOMMIT */ "\027" "\030CLFLUSHOPT" "\031CLWB" "\032PROCTRACE" "\033AVX512PF" "\034AVX512ER" "\035AVX512CD" "\036SHA" "\037AVX512BW" "\040AVX512VL" ); } if (cpu_stdext_feature2 != 0) { printf("\n Structured Extended Features2=0x%b", cpu_stdext_feature2, "\020" "\001PREFETCHWT1" "\002AVX512VBMI" "\003UMIP" "\004PKU" "\005OSPKE" "\006WAITPKG" "\007AVX512VBMI2" "\011GFNI" "\012VAES" "\013VPCLMULQDQ" "\014AVX512VNNI" "\015AVX512BITALG" "\016TME" "\017AVX512VPOPCNTDQ" "\021LA57" "\027RDPID" "\032CLDEMOTE" "\034MOVDIRI" "\035MOVDIR64B" "\036ENQCMD" "\037SGXLC" ); } if (cpu_stdext_feature3 != 0) { printf("\n Structured Extended Features3=0x%b", cpu_stdext_feature3, "\020" "\003AVX512_4VNNIW" "\004AVX512_4FMAPS" "\005FSRM" "\011AVX512VP2INTERSECT" "\012MCUOPT" "\013MD_CLEAR" "\016TSXFA" "\023PCONFIG" "\025IBT" "\033IBPB" "\034STIBP" "\035L1DFL" "\036ARCH_CAP" "\037CORE_CAP" "\040SSBD" ); } if ((cpu_feature2 & CPUID2_XSAVE) != 0) { cpuid_count(0xd, 0x1, regs); if (regs[0] != 0) { printf("\n XSAVE Features=0x%b", regs[0], "\020" "\001XSAVEOPT" "\002XSAVEC" "\003XINUSE" "\004XSAVES"); } } if (cpu_ia32_arch_caps != 0) { printf("\n IA32_ARCH_CAPS=0x%b", (u_int)cpu_ia32_arch_caps, "\020" "\001RDCL_NO" "\002IBRS_ALL" "\003RSBA" "\004SKIP_L1DFL_VME" "\005SSB_NO" "\006MDS_NO" "\010TSX_CTRL" "\011TAA_NO" ); } if (amd_extended_feature_extensions != 0) { u_int amd_fe_masked; amd_fe_masked = amd_extended_feature_extensions; if ((amd_fe_masked & AMDFEID_IBRS) == 0) amd_fe_masked &= ~(AMDFEID_IBRS_ALWAYSON | AMDFEID_PREFER_IBRS); if ((amd_fe_masked & AMDFEID_STIBP) == 0) amd_fe_masked &= ~AMDFEID_STIBP_ALWAYSON; printf("\n " "AMD Extended Feature Extensions ID EBX=" "0x%b", amd_fe_masked, "\020" "\001CLZERO" "\002IRPerf" "\003XSaveErPtr" "\005RDPRU" "\011MCOMMIT" "\012WBNOINVD" "\015IBPB" "\017IBRS" "\020STIBP" "\021IBRS_ALWAYSON" "\022STIBP_ALWAYSON" "\023PREFER_IBRS" "\030PPIN" "\031SSBD" "\032VIRT_SSBD" "\033SSB_NO" ); } if (via_feature_rng != 0 || via_feature_xcrypt != 0) print_via_padlock_info(); if (cpu_feature2 & CPUID2_VMX) print_vmx_info(); if (amd_feature2 & AMDID2_SVM) print_svm_info(); if ((cpu_feature & CPUID_HTT) && (cpu_vendor_id == CPU_VENDOR_AMD || cpu_vendor_id == CPU_VENDOR_HYGON)) cpu_feature &= ~CPUID_HTT; /* * If this CPU supports P-state invariant TSC then * mention the capability. */ if (tsc_is_invariant) { printf("\n TSC: P-state invariant"); if (tsc_perf_stat) printf(", performance statistics"); } } #ifdef __i386__ } else if (cpu_vendor_id == CPU_VENDOR_CYRIX) { printf(" DIR=0x%04x", cyrix_did); printf(" Stepping=%u", (cyrix_did & 0xf000) >> 12); printf(" Revision=%u", (cyrix_did & 0x0f00) >> 8); #ifndef CYRIX_CACHE_REALLY_WORKS if (cpu == CPU_M1 && (cyrix_did & 0xff00) < 0x1700) printf("\n CPU cache: write-through mode"); #endif #endif } /* Avoid ugly blank lines: only print newline when we have to. */ if (*cpu_vendor || cpu_id) printf("\n"); if (bootverbose) { if (cpu_vendor_id == CPU_VENDOR_AMD || cpu_vendor_id == CPU_VENDOR_HYGON) print_AMD_info(); else if (cpu_vendor_id == CPU_VENDOR_INTEL) print_INTEL_info(); #ifdef __i386__ else if (cpu_vendor_id == CPU_VENDOR_TRANSMETA) print_transmeta_info(); #endif } print_hypervisor_info(); } #ifdef __i386__ void panicifcpuunsupported(void) { #if !defined(lint) #if !defined(I486_CPU) && !defined(I586_CPU) && !defined(I686_CPU) #error This kernel is not configured for one of the supported CPUs #endif #else /* lint */ #endif /* lint */ /* * Now that we have told the user what they have, * let them know if that machine type isn't configured. */ switch (cpu_class) { case CPUCLASS_286: /* a 286 should not make it this far, anyway */ case CPUCLASS_386: #if !defined(I486_CPU) case CPUCLASS_486: #endif #if !defined(I586_CPU) case CPUCLASS_586: #endif #if !defined(I686_CPU) case CPUCLASS_686: #endif panic("CPU class not configured"); default: break; } } static volatile u_int trap_by_rdmsr; /* * Special exception 6 handler. * The rdmsr instruction generates invalid opcodes fault on 486-class * Cyrix CPU. Stacked eip register points the rdmsr instruction in the * function identblue() when this handler is called. Stacked eip should * be advanced. */ inthand_t bluetrap6; -#ifdef __GNUCLIKE_ASM __asm (" \n\ .text \n\ .p2align 2,0x90 \n\ .type " __XSTRING(CNAME(bluetrap6)) ",@function \n\ " __XSTRING(CNAME(bluetrap6)) ": \n\ ss \n\ movl $0xa8c1d," __XSTRING(CNAME(trap_by_rdmsr)) " \n\ addl $2, (%esp) /* rdmsr is a 2-byte instruction */ \n\ iret \n\ "); -#endif /* * Special exception 13 handler. * Accessing non-existent MSR generates general protection fault. */ inthand_t bluetrap13; -#ifdef __GNUCLIKE_ASM __asm (" \n\ .text \n\ .p2align 2,0x90 \n\ .type " __XSTRING(CNAME(bluetrap13)) ",@function \n\ " __XSTRING(CNAME(bluetrap13)) ": \n\ ss \n\ movl $0xa89c4," __XSTRING(CNAME(trap_by_rdmsr)) " \n\ popl %eax /* discard error code */ \n\ addl $2, (%esp) /* rdmsr is a 2-byte instruction */ \n\ iret \n\ "); -#endif /* * Distinguish IBM Blue Lightning CPU from Cyrix CPUs that does not * support cpuid instruction. This function should be called after * loading interrupt descriptor table register. * * I don't like this method that handles fault, but I couldn't get * information for any other methods. Does blue giant know? */ static int identblue(void) { trap_by_rdmsr = 0; /* * Cyrix 486-class CPU does not support rdmsr instruction. * The rdmsr instruction generates invalid opcode fault, and exception * will be trapped by bluetrap6() on Cyrix 486-class CPU. The * bluetrap6() set the magic number to trap_by_rdmsr. */ setidt(IDT_UD, bluetrap6, SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); /* * Certain BIOS disables cpuid instruction of Cyrix 6x86MX CPU. * In this case, rdmsr generates general protection fault, and * exception will be trapped by bluetrap13(). */ setidt(IDT_GP, bluetrap13, SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); rdmsr(0x1002); /* Cyrix CPU generates fault. */ if (trap_by_rdmsr == 0xa8c1d) return IDENTBLUE_CYRIX486; else if (trap_by_rdmsr == 0xa89c4) return IDENTBLUE_CYRIXM2; return IDENTBLUE_IBMCPU; } /* * identifycyrix() set lower 16 bits of cyrix_did as follows: * * F E D C B A 9 8 7 6 5 4 3 2 1 0 * +-------+-------+---------------+ * | SID | RID | Device ID | * | (DIR 1) | (DIR 0) | * +-------+-------+---------------+ */ static void identifycyrix(void) { register_t saveintr; int ccr2_test = 0, dir_test = 0; u_char ccr2, ccr3; saveintr = intr_disable(); ccr2 = read_cyrix_reg(CCR2); write_cyrix_reg(CCR2, ccr2 ^ CCR2_LOCK_NW); read_cyrix_reg(CCR2); if (read_cyrix_reg(CCR2) != ccr2) ccr2_test = 1; write_cyrix_reg(CCR2, ccr2); ccr3 = read_cyrix_reg(CCR3); write_cyrix_reg(CCR3, ccr3 ^ CCR3_MAPEN3); read_cyrix_reg(CCR3); if (read_cyrix_reg(CCR3) != ccr3) dir_test = 1; /* CPU supports DIRs. */ write_cyrix_reg(CCR3, ccr3); if (dir_test) { /* Device ID registers are available. */ cyrix_did = read_cyrix_reg(DIR1) << 8; cyrix_did += read_cyrix_reg(DIR0); } else if (ccr2_test) cyrix_did = 0x0010; /* 486S A-step */ else cyrix_did = 0x00ff; /* Old 486SLC/DLC and TI486SXLC/SXL */ intr_restore(saveintr); } #endif /* Update TSC freq with the value indicated by the caller. */ static void tsc_freq_changed(void *arg __unused, const struct cf_level *level, int status) { /* If there was an error during the transition, don't do anything. */ if (status != 0) return; /* Total setting for this level gives the new frequency in MHz. */ hw_clockrate = level->total_set.freq; } static void hook_tsc_freq(void *arg __unused) { if (tsc_is_invariant) return; tsc_post_tag = EVENTHANDLER_REGISTER(cpufreq_post_change, tsc_freq_changed, NULL, EVENTHANDLER_PRI_ANY); } SYSINIT(hook_tsc_freq, SI_SUB_CONFIGURE, SI_ORDER_ANY, hook_tsc_freq, NULL); static const struct { const char * vm_bname; int vm_guest; } vm_bnames[] = { { "QEMU", VM_GUEST_VM }, /* QEMU */ { "Plex86", VM_GUEST_VM }, /* Plex86 */ { "Bochs", VM_GUEST_VM }, /* Bochs */ { "Xen", VM_GUEST_XEN }, /* Xen */ { "BHYVE", VM_GUEST_BHYVE }, /* bhyve */ { "Seabios", VM_GUEST_KVM }, /* KVM */ }; static const struct { const char * vm_pname; int vm_guest; } vm_pnames[] = { { "VMware Virtual Platform", VM_GUEST_VMWARE }, { "Virtual Machine", VM_GUEST_VM }, /* Microsoft VirtualPC */ { "VirtualBox", VM_GUEST_VBOX }, { "Parallels Virtual Platform", VM_GUEST_PARALLELS }, { "KVM", VM_GUEST_KVM }, }; static struct { const char *vm_cpuid; int vm_guest; } vm_cpuids[] = { { "XenVMMXenVMM", VM_GUEST_XEN }, /* XEN */ { "Microsoft Hv", VM_GUEST_HV }, /* Microsoft Hyper-V */ { "VMwareVMware", VM_GUEST_VMWARE }, /* VMware VM */ { "KVMKVMKVM", VM_GUEST_KVM }, /* KVM */ { "bhyve bhyve ", VM_GUEST_BHYVE }, /* bhyve */ { "VBoxVBoxVBox", VM_GUEST_VBOX }, /* VirtualBox */ }; static void identify_hypervisor_cpuid_base(void) { u_int leaf, regs[4]; int i; /* * [RFC] CPUID usage for interaction between Hypervisors and Linux. * http://lkml.org/lkml/2008/10/1/246 * * KB1009458: Mechanisms to determine if software is running in * a VMware virtual machine * http://kb.vmware.com/kb/1009458 * * Search for a hypervisor that we recognize. If we cannot find * a specific hypervisor, return the first information about the * hypervisor that we found, as others may be able to use. */ for (leaf = 0x40000000; leaf < 0x40010000; leaf += 0x100) { do_cpuid(leaf, regs); /* * KVM from Linux kernels prior to commit * 57c22e5f35aa4b9b2fe11f73f3e62bbf9ef36190 set %eax * to 0 rather than a valid hv_high value. Check for * the KVM signature bytes and fixup %eax to the * highest supported leaf in that case. */ if (regs[0] == 0 && regs[1] == 0x4b4d564b && regs[2] == 0x564b4d56 && regs[3] == 0x0000004d) regs[0] = leaf + 1; if (regs[0] >= leaf) { for (i = 0; i < nitems(vm_cpuids); i++) if (strncmp((const char *)®s[1], vm_cpuids[i].vm_cpuid, 12) == 0) { vm_guest = vm_cpuids[i].vm_guest; break; } /* * If this is the first entry or we found a * specific hypervisor, record the base, high value, * and vendor identifier. */ if (vm_guest != VM_GUEST_VM || leaf == 0x40000000) { hv_base = leaf; hv_high = regs[0]; ((u_int *)&hv_vendor)[0] = regs[1]; ((u_int *)&hv_vendor)[1] = regs[2]; ((u_int *)&hv_vendor)[2] = regs[3]; hv_vendor[12] = '\0'; /* * If we found a specific hypervisor, then * we are finished. */ if (vm_guest != VM_GUEST_VM) return; } } } } void identify_hypervisor(void) { u_int regs[4]; char *p; int i; /* * If CPUID2_HV is set, we are running in a hypervisor environment. */ if (cpu_feature2 & CPUID2_HV) { vm_guest = VM_GUEST_VM; identify_hypervisor_cpuid_base(); /* If we have a definitive vendor, we can return now. */ if (*hv_vendor != '\0') return; } /* * Examine SMBIOS strings for older hypervisors. */ p = kern_getenv("smbios.system.serial"); if (p != NULL) { if (strncmp(p, "VMware-", 7) == 0 || strncmp(p, "VMW", 3) == 0) { vmware_hvcall(VMW_HVCMD_GETVERSION, regs); if (regs[1] == VMW_HVMAGIC) { vm_guest = VM_GUEST_VMWARE; freeenv(p); return; } } freeenv(p); } /* * XXX: Some of these entries may not be needed since they were * added to FreeBSD before the checks above. */ p = kern_getenv("smbios.bios.vendor"); if (p != NULL) { for (i = 0; i < nitems(vm_bnames); i++) if (strcmp(p, vm_bnames[i].vm_bname) == 0) { vm_guest = vm_bnames[i].vm_guest; /* If we have a specific match, return */ if (vm_guest != VM_GUEST_VM) { freeenv(p); return; } /* * We are done with bnames, but there might be * a more specific match in the pnames */ break; } freeenv(p); } p = kern_getenv("smbios.system.product"); if (p != NULL) { for (i = 0; i < nitems(vm_pnames); i++) if (strcmp(p, vm_pnames[i].vm_pname) == 0) { vm_guest = vm_pnames[i].vm_guest; freeenv(p); return; } freeenv(p); } } bool fix_cpuid(void) { uint64_t msr; /* * Clear "Limit CPUID Maxval" bit and return true if the caller should * get the largest standard CPUID function number again if it is set * from BIOS. It is necessary for probing correct CPU topology later * and for the correct operation of the AVX-aware userspace. */ if (cpu_vendor_id == CPU_VENDOR_INTEL && ((CPUID_TO_FAMILY(cpu_id) == 0xf && CPUID_TO_MODEL(cpu_id) >= 0x3) || (CPUID_TO_FAMILY(cpu_id) == 0x6 && CPUID_TO_MODEL(cpu_id) >= 0xe))) { msr = rdmsr(MSR_IA32_MISC_ENABLE); if ((msr & IA32_MISC_EN_LIMCPUID) != 0) { msr &= ~IA32_MISC_EN_LIMCPUID; wrmsr(MSR_IA32_MISC_ENABLE, msr); return (true); } } /* * Re-enable AMD Topology Extension that could be disabled by BIOS * on some notebook processors. Without the extension it's really * hard to determine the correct CPU cache topology. * See BIOS and Kernel Developer’s Guide (BKDG) for AMD Family 15h * Models 60h-6Fh Processors, Publication # 50742. */ if (vm_guest == VM_GUEST_NO && cpu_vendor_id == CPU_VENDOR_AMD && CPUID_TO_FAMILY(cpu_id) == 0x15) { msr = rdmsr(MSR_EXTFEATURES); if ((msr & ((uint64_t)1 << 54)) == 0) { msr |= (uint64_t)1 << 54; wrmsr(MSR_EXTFEATURES, msr); return (true); } } return (false); } void identify_cpu1(void) { u_int regs[4]; do_cpuid(0, regs); cpu_high = regs[0]; ((u_int *)&cpu_vendor)[0] = regs[1]; ((u_int *)&cpu_vendor)[1] = regs[3]; ((u_int *)&cpu_vendor)[2] = regs[2]; cpu_vendor[12] = '\0'; do_cpuid(1, regs); cpu_id = regs[0]; cpu_procinfo = regs[1]; cpu_feature = regs[3]; cpu_feature2 = regs[2]; } void identify_cpu2(void) { u_int regs[4], cpu_stdext_disable; if (cpu_high >= 6) { cpuid_count(6, 0, regs); cpu_power_eax = regs[0]; cpu_power_ebx = regs[1]; cpu_power_ecx = regs[2]; cpu_power_edx = regs[3]; } if (cpu_high >= 7) { cpuid_count(7, 0, regs); cpu_stdext_feature = regs[1]; /* * Some hypervisors failed to filter out unsupported * extended features. Allow to disable the * extensions, activation of which requires setting a * bit in CR4, and which VM monitors do not support. */ cpu_stdext_disable = 0; TUNABLE_INT_FETCH("hw.cpu_stdext_disable", &cpu_stdext_disable); cpu_stdext_feature &= ~cpu_stdext_disable; cpu_stdext_feature2 = regs[2]; cpu_stdext_feature3 = regs[3]; if ((cpu_stdext_feature3 & CPUID_STDEXT3_ARCH_CAP) != 0) cpu_ia32_arch_caps = rdmsr(MSR_IA32_ARCH_CAP); } } void identify_cpu_fixup_bsp(void) { u_int regs[4]; cpu_vendor_id = find_cpu_vendor_id(); if (fix_cpuid()) { do_cpuid(0, regs); cpu_high = regs[0]; } } /* * Final stage of CPU identification. */ void finishidentcpu(void) { u_int regs[4]; #ifdef __i386__ u_char ccr3; #endif identify_cpu_fixup_bsp(); if (cpu_high >= 5 && (cpu_feature2 & CPUID2_MON) != 0) { do_cpuid(5, regs); cpu_mon_mwait_flags = regs[2]; cpu_mon_min_size = regs[0] & CPUID5_MON_MIN_SIZE; cpu_mon_max_size = regs[1] & CPUID5_MON_MAX_SIZE; } identify_cpu2(); #ifdef __i386__ if (cpu_high > 0 && (cpu_vendor_id == CPU_VENDOR_INTEL || cpu_vendor_id == CPU_VENDOR_AMD || cpu_vendor_id == CPU_VENDOR_HYGON || cpu_vendor_id == CPU_VENDOR_TRANSMETA || cpu_vendor_id == CPU_VENDOR_CENTAUR || cpu_vendor_id == CPU_VENDOR_NSC)) { do_cpuid(0x80000000, regs); if (regs[0] >= 0x80000000) cpu_exthigh = regs[0]; } #else if (cpu_vendor_id == CPU_VENDOR_INTEL || cpu_vendor_id == CPU_VENDOR_AMD || cpu_vendor_id == CPU_VENDOR_HYGON || cpu_vendor_id == CPU_VENDOR_CENTAUR) { do_cpuid(0x80000000, regs); cpu_exthigh = regs[0]; } #endif if (cpu_exthigh >= 0x80000001) { do_cpuid(0x80000001, regs); amd_feature = regs[3] & ~(cpu_feature & 0x0183f3ff); amd_feature2 = regs[2]; } if (cpu_exthigh >= 0x80000007) { do_cpuid(0x80000007, regs); amd_rascap = regs[1]; amd_pminfo = regs[3]; } if (cpu_exthigh >= 0x80000008) { do_cpuid(0x80000008, regs); cpu_maxphyaddr = regs[0] & 0xff; amd_extended_feature_extensions = regs[1]; cpu_procinfo2 = regs[2]; } else { cpu_maxphyaddr = (cpu_feature & CPUID_PAE) != 0 ? 36 : 32; } #ifdef __i386__ if (cpu_vendor_id == CPU_VENDOR_CYRIX) { if (cpu == CPU_486) { /* * These conditions are equivalent to: * - CPU does not support cpuid instruction. * - Cyrix/IBM CPU is detected. */ if (identblue() == IDENTBLUE_IBMCPU) { strcpy(cpu_vendor, "IBM"); cpu_vendor_id = CPU_VENDOR_IBM; cpu = CPU_BLUE; return; } } switch (cpu_id & 0xf00) { case 0x600: /* * Cyrix's datasheet does not describe DIRs. * Therefor, I assume it does not have them * and use the result of the cpuid instruction. * XXX they seem to have it for now at least. -Peter */ identifycyrix(); cpu = CPU_M2; break; default: identifycyrix(); /* * This routine contains a trick. * Don't check (cpu_id & 0x00f0) == 0x50 to detect M2, now. */ switch (cyrix_did & 0x00f0) { case 0x00: case 0xf0: cpu = CPU_486DLC; break; case 0x10: cpu = CPU_CY486DX; break; case 0x20: if ((cyrix_did & 0x000f) < 8) cpu = CPU_M1; else cpu = CPU_M1SC; break; case 0x30: cpu = CPU_M1; break; case 0x40: /* MediaGX CPU */ cpu = CPU_M1SC; break; default: /* M2 and later CPUs are treated as M2. */ cpu = CPU_M2; /* * enable cpuid instruction. */ ccr3 = read_cyrix_reg(CCR3); write_cyrix_reg(CCR3, CCR3_MAPEN0); write_cyrix_reg(CCR4, read_cyrix_reg(CCR4) | CCR4_CPUID); write_cyrix_reg(CCR3, ccr3); do_cpuid(0, regs); cpu_high = regs[0]; /* eax */ do_cpuid(1, regs); cpu_id = regs[0]; /* eax */ cpu_feature = regs[3]; /* edx */ break; } } } else if (cpu == CPU_486 && *cpu_vendor == '\0') { /* * There are BlueLightning CPUs that do not change * undefined flags by dividing 5 by 2. In this case, * the CPU identification routine in locore.s leaves * cpu_vendor null string and puts CPU_486 into the * cpu. */ if (identblue() == IDENTBLUE_IBMCPU) { strcpy(cpu_vendor, "IBM"); cpu_vendor_id = CPU_VENDOR_IBM; cpu = CPU_BLUE; return; } } #endif } int pti_get_default(void) { if (strcmp(cpu_vendor, AMD_VENDOR_ID) == 0 || strcmp(cpu_vendor, HYGON_VENDOR_ID) == 0) return (0); if ((cpu_ia32_arch_caps & IA32_ARCH_CAP_RDCL_NO) != 0) return (0); return (1); } static u_int find_cpu_vendor_id(void) { int i; for (i = 0; i < nitems(cpu_vendors); i++) if (strcmp(cpu_vendor, cpu_vendors[i].vendor) == 0) return (cpu_vendors[i].vendor_id); return (0); } static void print_AMD_assoc(int i) { if (i == 255) printf(", fully associative\n"); else printf(", %d-way associative\n", i); } static void print_AMD_l2_assoc(int i) { switch (i & 0x0f) { case 0: printf(", disabled/not present\n"); break; case 1: printf(", direct mapped\n"); break; case 2: printf(", 2-way associative\n"); break; case 4: printf(", 4-way associative\n"); break; case 6: printf(", 8-way associative\n"); break; case 8: printf(", 16-way associative\n"); break; case 15: printf(", fully associative\n"); break; default: printf(", reserved configuration\n"); break; } } static void print_AMD_info(void) { #ifdef __i386__ uint64_t amd_whcr; #endif u_int regs[4]; if (cpu_exthigh >= 0x80000005) { do_cpuid(0x80000005, regs); printf("L1 2MB data TLB: %d entries", (regs[0] >> 16) & 0xff); print_AMD_assoc(regs[0] >> 24); printf("L1 2MB instruction TLB: %d entries", regs[0] & 0xff); print_AMD_assoc((regs[0] >> 8) & 0xff); printf("L1 4KB data TLB: %d entries", (regs[1] >> 16) & 0xff); print_AMD_assoc(regs[1] >> 24); printf("L1 4KB instruction TLB: %d entries", regs[1] & 0xff); print_AMD_assoc((regs[1] >> 8) & 0xff); printf("L1 data cache: %d kbytes", regs[2] >> 24); printf(", %d bytes/line", regs[2] & 0xff); printf(", %d lines/tag", (regs[2] >> 8) & 0xff); print_AMD_assoc((regs[2] >> 16) & 0xff); printf("L1 instruction cache: %d kbytes", regs[3] >> 24); printf(", %d bytes/line", regs[3] & 0xff); printf(", %d lines/tag", (regs[3] >> 8) & 0xff); print_AMD_assoc((regs[3] >> 16) & 0xff); } if (cpu_exthigh >= 0x80000006) { do_cpuid(0x80000006, regs); if ((regs[0] >> 16) != 0) { printf("L2 2MB data TLB: %d entries", (regs[0] >> 16) & 0xfff); print_AMD_l2_assoc(regs[0] >> 28); printf("L2 2MB instruction TLB: %d entries", regs[0] & 0xfff); print_AMD_l2_assoc((regs[0] >> 28) & 0xf); } else { printf("L2 2MB unified TLB: %d entries", regs[0] & 0xfff); print_AMD_l2_assoc((regs[0] >> 28) & 0xf); } if ((regs[1] >> 16) != 0) { printf("L2 4KB data TLB: %d entries", (regs[1] >> 16) & 0xfff); print_AMD_l2_assoc(regs[1] >> 28); printf("L2 4KB instruction TLB: %d entries", (regs[1] >> 16) & 0xfff); print_AMD_l2_assoc((regs[1] >> 28) & 0xf); } else { printf("L2 4KB unified TLB: %d entries", (regs[1] >> 16) & 0xfff); print_AMD_l2_assoc((regs[1] >> 28) & 0xf); } printf("L2 unified cache: %d kbytes", regs[2] >> 16); printf(", %d bytes/line", regs[2] & 0xff); printf(", %d lines/tag", (regs[2] >> 8) & 0x0f); print_AMD_l2_assoc((regs[2] >> 12) & 0x0f); } #ifdef __i386__ if (((cpu_id & 0xf00) == 0x500) && (((cpu_id & 0x0f0) > 0x80) || (((cpu_id & 0x0f0) == 0x80) && (cpu_id & 0x00f) > 0x07))) { /* K6-2(new core [Stepping 8-F]), K6-III or later */ amd_whcr = rdmsr(0xc0000082); if (!(amd_whcr & (0x3ff << 22))) { printf("Write Allocate Disable\n"); } else { printf("Write Allocate Enable Limit: %dM bytes\n", (u_int32_t)((amd_whcr & (0x3ff << 22)) >> 22) * 4); printf("Write Allocate 15-16M bytes: %s\n", (amd_whcr & (1 << 16)) ? "Enable" : "Disable"); } } else if (((cpu_id & 0xf00) == 0x500) && ((cpu_id & 0x0f0) > 0x50)) { /* K6, K6-2(old core) */ amd_whcr = rdmsr(0xc0000082); if (!(amd_whcr & (0x7f << 1))) { printf("Write Allocate Disable\n"); } else { printf("Write Allocate Enable Limit: %dM bytes\n", (u_int32_t)((amd_whcr & (0x7f << 1)) >> 1) * 4); printf("Write Allocate 15-16M bytes: %s\n", (amd_whcr & 0x0001) ? "Enable" : "Disable"); printf("Hardware Write Allocate Control: %s\n", (amd_whcr & 0x0100) ? "Enable" : "Disable"); } } #endif /* * Opteron Rev E shows a bug as in very rare occasions a read memory * barrier is not performed as expected if it is followed by a * non-atomic read-modify-write instruction. * As long as that bug pops up very rarely (intensive machine usage * on other operating systems generally generates one unexplainable * crash any 2 months) and as long as a model specific fix would be * impractical at this stage, print out a warning string if the broken * model and family are identified. */ if (CPUID_TO_FAMILY(cpu_id) == 0xf && CPUID_TO_MODEL(cpu_id) >= 0x20 && CPUID_TO_MODEL(cpu_id) <= 0x3f) printf("WARNING: This architecture revision has known SMP " "hardware bugs which may cause random instability\n"); } static void print_INTEL_info(void) { u_int regs[4]; u_int rounds, regnum; u_int nwaycode, nway; if (cpu_high >= 2) { rounds = 0; do { do_cpuid(0x2, regs); if (rounds == 0 && (rounds = (regs[0] & 0xff)) == 0) break; /* we have a buggy CPU */ for (regnum = 0; regnum <= 3; ++regnum) { if (regs[regnum] & (1<<31)) continue; if (regnum != 0) print_INTEL_TLB(regs[regnum] & 0xff); print_INTEL_TLB((regs[regnum] >> 8) & 0xff); print_INTEL_TLB((regs[regnum] >> 16) & 0xff); print_INTEL_TLB((regs[regnum] >> 24) & 0xff); } } while (--rounds > 0); } if (cpu_exthigh >= 0x80000006) { do_cpuid(0x80000006, regs); nwaycode = (regs[2] >> 12) & 0x0f; if (nwaycode >= 0x02 && nwaycode <= 0x08) nway = 1 << (nwaycode / 2); else nway = 0; printf("L2 cache: %u kbytes, %u-way associative, %u bytes/line\n", (regs[2] >> 16) & 0xffff, nway, regs[2] & 0xff); } } static void print_INTEL_TLB(u_int data) { switch (data) { case 0x0: case 0x40: default: break; case 0x1: printf("Instruction TLB: 4 KB pages, 4-way set associative, 32 entries\n"); break; case 0x2: printf("Instruction TLB: 4 MB pages, fully associative, 2 entries\n"); break; case 0x3: printf("Data TLB: 4 KB pages, 4-way set associative, 64 entries\n"); break; case 0x4: printf("Data TLB: 4 MB Pages, 4-way set associative, 8 entries\n"); break; case 0x6: printf("1st-level instruction cache: 8 KB, 4-way set associative, 32 byte line size\n"); break; case 0x8: printf("1st-level instruction cache: 16 KB, 4-way set associative, 32 byte line size\n"); break; case 0x9: printf("1st-level instruction cache: 32 KB, 4-way set associative, 64 byte line size\n"); break; case 0xa: printf("1st-level data cache: 8 KB, 2-way set associative, 32 byte line size\n"); break; case 0xb: printf("Instruction TLB: 4 MByte pages, 4-way set associative, 4 entries\n"); break; case 0xc: printf("1st-level data cache: 16 KB, 4-way set associative, 32 byte line size\n"); break; case 0xd: printf("1st-level data cache: 16 KBytes, 4-way set associative, 64 byte line size"); break; case 0xe: printf("1st-level data cache: 24 KBytes, 6-way set associative, 64 byte line size\n"); break; case 0x1d: printf("2nd-level cache: 128 KBytes, 2-way set associative, 64 byte line size\n"); break; case 0x21: printf("2nd-level cache: 256 KBytes, 8-way set associative, 64 byte line size\n"); break; case 0x22: printf("3rd-level cache: 512 KB, 4-way set associative, sectored cache, 64 byte line size\n"); break; case 0x23: printf("3rd-level cache: 1 MB, 8-way set associative, sectored cache, 64 byte line size\n"); break; case 0x24: printf("2nd-level cache: 1 MBytes, 16-way set associative, 64 byte line size\n"); break; case 0x25: printf("3rd-level cache: 2 MB, 8-way set associative, sectored cache, 64 byte line size\n"); break; case 0x29: printf("3rd-level cache: 4 MB, 8-way set associative, sectored cache, 64 byte line size\n"); break; case 0x2c: printf("1st-level data cache: 32 KB, 8-way set associative, 64 byte line size\n"); break; case 0x30: printf("1st-level instruction cache: 32 KB, 8-way set associative, 64 byte line size\n"); break; case 0x39: /* De-listed in SDM rev. 54 */ printf("2nd-level cache: 128 KB, 4-way set associative, sectored cache, 64 byte line size\n"); break; case 0x3b: /* De-listed in SDM rev. 54 */ printf("2nd-level cache: 128 KB, 2-way set associative, sectored cache, 64 byte line size\n"); break; case 0x3c: /* De-listed in SDM rev. 54 */ printf("2nd-level cache: 256 KB, 4-way set associative, sectored cache, 64 byte line size\n"); break; case 0x41: printf("2nd-level cache: 128 KB, 4-way set associative, 32 byte line size\n"); break; case 0x42: printf("2nd-level cache: 256 KB, 4-way set associative, 32 byte line size\n"); break; case 0x43: printf("2nd-level cache: 512 KB, 4-way set associative, 32 byte line size\n"); break; case 0x44: printf("2nd-level cache: 1 MB, 4-way set associative, 32 byte line size\n"); break; case 0x45: printf("2nd-level cache: 2 MB, 4-way set associative, 32 byte line size\n"); break; case 0x46: printf("3rd-level cache: 4 MB, 4-way set associative, 64 byte line size\n"); break; case 0x47: printf("3rd-level cache: 8 MB, 8-way set associative, 64 byte line size\n"); break; case 0x48: printf("2nd-level cache: 3MByte, 12-way set associative, 64 byte line size\n"); break; case 0x49: if (CPUID_TO_FAMILY(cpu_id) == 0xf && CPUID_TO_MODEL(cpu_id) == 0x6) printf("3rd-level cache: 4MB, 16-way set associative, 64-byte line size\n"); else printf("2nd-level cache: 4 MByte, 16-way set associative, 64 byte line size"); break; case 0x4a: printf("3rd-level cache: 6MByte, 12-way set associative, 64 byte line size\n"); break; case 0x4b: printf("3rd-level cache: 8MByte, 16-way set associative, 64 byte line size\n"); break; case 0x4c: printf("3rd-level cache: 12MByte, 12-way set associative, 64 byte line size\n"); break; case 0x4d: printf("3rd-level cache: 16MByte, 16-way set associative, 64 byte line size\n"); break; case 0x4e: printf("2nd-level cache: 6MByte, 24-way set associative, 64 byte line size\n"); break; case 0x4f: printf("Instruction TLB: 4 KByte pages, 32 entries\n"); break; case 0x50: printf("Instruction TLB: 4 KB, 2 MB or 4 MB pages, fully associative, 64 entries\n"); break; case 0x51: printf("Instruction TLB: 4 KB, 2 MB or 4 MB pages, fully associative, 128 entries\n"); break; case 0x52: printf("Instruction TLB: 4 KB, 2 MB or 4 MB pages, fully associative, 256 entries\n"); break; case 0x55: printf("Instruction TLB: 2-MByte or 4-MByte pages, fully associative, 7 entries\n"); break; case 0x56: printf("Data TLB0: 4 MByte pages, 4-way set associative, 16 entries\n"); break; case 0x57: printf("Data TLB0: 4 KByte pages, 4-way associative, 16 entries\n"); break; case 0x59: printf("Data TLB0: 4 KByte pages, fully associative, 16 entries\n"); break; case 0x5a: printf("Data TLB0: 2-MByte or 4 MByte pages, 4-way set associative, 32 entries\n"); break; case 0x5b: printf("Data TLB: 4 KB or 4 MB pages, fully associative, 64 entries\n"); break; case 0x5c: printf("Data TLB: 4 KB or 4 MB pages, fully associative, 128 entries\n"); break; case 0x5d: printf("Data TLB: 4 KB or 4 MB pages, fully associative, 256 entries\n"); break; case 0x60: printf("1st-level data cache: 16 KB, 8-way set associative, sectored cache, 64 byte line size\n"); break; case 0x61: printf("Instruction TLB: 4 KByte pages, fully associative, 48 entries\n"); break; case 0x63: printf("Data TLB: 2 MByte or 4 MByte pages, 4-way set associative, 32 entries and a separate array with 1 GByte pages, 4-way set associative, 4 entries\n"); break; case 0x64: printf("Data TLB: 4 KBytes pages, 4-way set associative, 512 entries\n"); break; case 0x66: printf("1st-level data cache: 8 KB, 4-way set associative, sectored cache, 64 byte line size\n"); break; case 0x67: printf("1st-level data cache: 16 KB, 4-way set associative, sectored cache, 64 byte line size\n"); break; case 0x68: printf("1st-level data cache: 32 KB, 4 way set associative, sectored cache, 64 byte line size\n"); break; case 0x6a: printf("uTLB: 4KByte pages, 8-way set associative, 64 entries\n"); break; case 0x6b: printf("DTLB: 4KByte pages, 8-way set associative, 256 entries\n"); break; case 0x6c: printf("DTLB: 2M/4M pages, 8-way set associative, 128 entries\n"); break; case 0x6d: printf("DTLB: 1 GByte pages, fully associative, 16 entries\n"); break; case 0x70: printf("Trace cache: 12K-uops, 8-way set associative\n"); break; case 0x71: printf("Trace cache: 16K-uops, 8-way set associative\n"); break; case 0x72: printf("Trace cache: 32K-uops, 8-way set associative\n"); break; case 0x76: printf("Instruction TLB: 2M/4M pages, fully associative, 8 entries\n"); break; case 0x78: printf("2nd-level cache: 1 MB, 4-way set associative, 64-byte line size\n"); break; case 0x79: printf("2nd-level cache: 128 KB, 8-way set associative, sectored cache, 64 byte line size\n"); break; case 0x7a: printf("2nd-level cache: 256 KB, 8-way set associative, sectored cache, 64 byte line size\n"); break; case 0x7b: printf("2nd-level cache: 512 KB, 8-way set associative, sectored cache, 64 byte line size\n"); break; case 0x7c: printf("2nd-level cache: 1 MB, 8-way set associative, sectored cache, 64 byte line size\n"); break; case 0x7d: printf("2nd-level cache: 2-MB, 8-way set associative, 64-byte line size\n"); break; case 0x7f: printf("2nd-level cache: 512-KB, 2-way set associative, 64-byte line size\n"); break; case 0x80: printf("2nd-level cache: 512 KByte, 8-way set associative, 64-byte line size\n"); break; case 0x82: printf("2nd-level cache: 256 KB, 8-way set associative, 32 byte line size\n"); break; case 0x83: printf("2nd-level cache: 512 KB, 8-way set associative, 32 byte line size\n"); break; case 0x84: printf("2nd-level cache: 1 MB, 8-way set associative, 32 byte line size\n"); break; case 0x85: printf("2nd-level cache: 2 MB, 8-way set associative, 32 byte line size\n"); break; case 0x86: printf("2nd-level cache: 512 KB, 4-way set associative, 64 byte line size\n"); break; case 0x87: printf("2nd-level cache: 1 MB, 8-way set associative, 64 byte line size\n"); break; case 0xa0: printf("DTLB: 4k pages, fully associative, 32 entries\n"); break; case 0xb0: printf("Instruction TLB: 4 KB Pages, 4-way set associative, 128 entries\n"); break; case 0xb1: printf("Instruction TLB: 2M pages, 4-way, 8 entries or 4M pages, 4-way, 4 entries\n"); break; case 0xb2: printf("Instruction TLB: 4KByte pages, 4-way set associative, 64 entries\n"); break; case 0xb3: printf("Data TLB: 4 KB Pages, 4-way set associative, 128 entries\n"); break; case 0xb4: printf("Data TLB1: 4 KByte pages, 4-way associative, 256 entries\n"); break; case 0xb5: printf("Instruction TLB: 4KByte pages, 8-way set associative, 64 entries\n"); break; case 0xb6: printf("Instruction TLB: 4KByte pages, 8-way set associative, 128 entries\n"); break; case 0xba: printf("Data TLB1: 4 KByte pages, 4-way associative, 64 entries\n"); break; case 0xc0: printf("Data TLB: 4 KByte and 4 MByte pages, 4-way associative, 8 entries\n"); break; case 0xc1: printf("Shared 2nd-Level TLB: 4 KByte/2MByte pages, 8-way associative, 1024 entries\n"); break; case 0xc2: printf("DTLB: 4 KByte/2 MByte pages, 4-way associative, 16 entries\n"); break; case 0xc3: printf("Shared 2nd-Level TLB: 4 KByte /2 MByte pages, 6-way associative, 1536 entries. Also 1GBbyte pages, 4-way, 16 entries\n"); break; case 0xc4: printf("DTLB: 2M/4M Byte pages, 4-way associative, 32 entries\n"); break; case 0xca: printf("Shared 2nd-Level TLB: 4 KByte pages, 4-way associative, 512 entries\n"); break; case 0xd0: printf("3rd-level cache: 512 KByte, 4-way set associative, 64 byte line size\n"); break; case 0xd1: printf("3rd-level cache: 1 MByte, 4-way set associative, 64 byte line size\n"); break; case 0xd2: printf("3rd-level cache: 2 MByte, 4-way set associative, 64 byte line size\n"); break; case 0xd6: printf("3rd-level cache: 1 MByte, 8-way set associative, 64 byte line size\n"); break; case 0xd7: printf("3rd-level cache: 2 MByte, 8-way set associative, 64 byte line size\n"); break; case 0xd8: printf("3rd-level cache: 4 MByte, 8-way set associative, 64 byte line size\n"); break; case 0xdc: printf("3rd-level cache: 1.5 MByte, 12-way set associative, 64 byte line size\n"); break; case 0xdd: printf("3rd-level cache: 3 MByte, 12-way set associative, 64 byte line size\n"); break; case 0xde: printf("3rd-level cache: 6 MByte, 12-way set associative, 64 byte line size\n"); break; case 0xe2: printf("3rd-level cache: 2 MByte, 16-way set associative, 64 byte line size\n"); break; case 0xe3: printf("3rd-level cache: 4 MByte, 16-way set associative, 64 byte line size\n"); break; case 0xe4: printf("3rd-level cache: 8 MByte, 16-way set associative, 64 byte line size\n"); break; case 0xea: printf("3rd-level cache: 12MByte, 24-way set associative, 64 byte line size\n"); break; case 0xeb: printf("3rd-level cache: 18MByte, 24-way set associative, 64 byte line size\n"); break; case 0xec: printf("3rd-level cache: 24MByte, 24-way set associative, 64 byte line size\n"); break; case 0xf0: printf("64-Byte prefetching\n"); break; case 0xf1: printf("128-Byte prefetching\n"); break; } } static void print_svm_info(void) { u_int features, regs[4]; uint64_t msr; int comma; printf("\n SVM: "); do_cpuid(0x8000000A, regs); features = regs[3]; msr = rdmsr(MSR_VM_CR); if ((msr & VM_CR_SVMDIS) == VM_CR_SVMDIS) printf("(disabled in BIOS) "); if (!bootverbose) { comma = 0; if (features & (1 << 0)) { printf("%sNP", comma ? "," : ""); comma = 1; } if (features & (1 << 3)) { printf("%sNRIP", comma ? "," : ""); comma = 1; } if (features & (1 << 5)) { printf("%sVClean", comma ? "," : ""); comma = 1; } if (features & (1 << 6)) { printf("%sAFlush", comma ? "," : ""); comma = 1; } if (features & (1 << 7)) { printf("%sDAssist", comma ? "," : ""); comma = 1; } printf("%sNAsids=%d", comma ? "," : "", regs[1]); return; } printf("Features=0x%b", features, "\020" "\001NP" /* Nested paging */ "\002LbrVirt" /* LBR virtualization */ "\003SVML" /* SVM lock */ "\004NRIPS" /* NRIP save */ "\005TscRateMsr" /* MSR based TSC rate control */ "\006VmcbClean" /* VMCB clean bits */ "\007FlushByAsid" /* Flush by ASID */ "\010DecodeAssist" /* Decode assist */ "\011" "\012" "\013PauseFilter" /* PAUSE intercept filter */ "\014EncryptedMcodePatch" "\015PauseFilterThreshold" /* PAUSE filter threshold */ "\016AVIC" /* virtual interrupt controller */ "\017" "\020V_VMSAVE_VMLOAD" "\021vGIF" "\022GMET" /* Guest Mode Execute Trap */ "\023" "\024" "\025GuesSpecCtl" /* Guest Spec_ctl */ "\026" "\027" "\030" "\031" "\032" "\033" "\034" "\035" "\036" "\037" "\040" ); printf("\nRevision=%d, ASIDs=%d", regs[0] & 0xff, regs[1]); } #ifdef __i386__ static void print_transmeta_info(void) { u_int regs[4], nreg = 0; do_cpuid(0x80860000, regs); nreg = regs[0]; if (nreg >= 0x80860001) { do_cpuid(0x80860001, regs); printf(" Processor revision %u.%u.%u.%u\n", (regs[1] >> 24) & 0xff, (regs[1] >> 16) & 0xff, (regs[1] >> 8) & 0xff, regs[1] & 0xff); } if (nreg >= 0x80860002) { do_cpuid(0x80860002, regs); printf(" Code Morphing Software revision %u.%u.%u-%u-%u\n", (regs[1] >> 24) & 0xff, (regs[1] >> 16) & 0xff, (regs[1] >> 8) & 0xff, regs[1] & 0xff, regs[2]); } if (nreg >= 0x80860006) { char info[65]; do_cpuid(0x80860003, (u_int*) &info[0]); do_cpuid(0x80860004, (u_int*) &info[16]); do_cpuid(0x80860005, (u_int*) &info[32]); do_cpuid(0x80860006, (u_int*) &info[48]); info[64] = 0; printf(" %s\n", info); } } #endif static void print_via_padlock_info(void) { u_int regs[4]; do_cpuid(0xc0000001, regs); printf("\n VIA Padlock Features=0x%b", regs[3], "\020" "\003RNG" /* RNG */ "\007AES" /* ACE */ "\011AES-CTR" /* ACE2 */ "\013SHA1,SHA256" /* PHE */ "\015RSA" /* PMM */ ); } static uint32_t vmx_settable(uint64_t basic, int msr, int true_msr) { uint64_t val; if (basic & (1ULL << 55)) val = rdmsr(true_msr); else val = rdmsr(msr); /* Just report the controls that can be set to 1. */ return (val >> 32); } static void print_vmx_info(void) { uint64_t basic, msr; uint32_t entry, exit, mask, pin, proc, proc2; int comma; printf("\n VT-x: "); msr = rdmsr(MSR_IA32_FEATURE_CONTROL); if (!(msr & IA32_FEATURE_CONTROL_VMX_EN)) printf("(disabled in BIOS) "); basic = rdmsr(MSR_VMX_BASIC); pin = vmx_settable(basic, MSR_VMX_PINBASED_CTLS, MSR_VMX_TRUE_PINBASED_CTLS); proc = vmx_settable(basic, MSR_VMX_PROCBASED_CTLS, MSR_VMX_TRUE_PROCBASED_CTLS); if (proc & PROCBASED_SECONDARY_CONTROLS) proc2 = vmx_settable(basic, MSR_VMX_PROCBASED_CTLS2, MSR_VMX_PROCBASED_CTLS2); else proc2 = 0; exit = vmx_settable(basic, MSR_VMX_EXIT_CTLS, MSR_VMX_TRUE_EXIT_CTLS); entry = vmx_settable(basic, MSR_VMX_ENTRY_CTLS, MSR_VMX_TRUE_ENTRY_CTLS); if (!bootverbose) { comma = 0; if (exit & VM_EXIT_SAVE_PAT && exit & VM_EXIT_LOAD_PAT && entry & VM_ENTRY_LOAD_PAT) { printf("%sPAT", comma ? "," : ""); comma = 1; } if (proc & PROCBASED_HLT_EXITING) { printf("%sHLT", comma ? "," : ""); comma = 1; } if (proc & PROCBASED_MTF) { printf("%sMTF", comma ? "," : ""); comma = 1; } if (proc & PROCBASED_PAUSE_EXITING) { printf("%sPAUSE", comma ? "," : ""); comma = 1; } if (proc2 & PROCBASED2_ENABLE_EPT) { printf("%sEPT", comma ? "," : ""); comma = 1; } if (proc2 & PROCBASED2_UNRESTRICTED_GUEST) { printf("%sUG", comma ? "," : ""); comma = 1; } if (proc2 & PROCBASED2_ENABLE_VPID) { printf("%sVPID", comma ? "," : ""); comma = 1; } if (proc & PROCBASED_USE_TPR_SHADOW && proc2 & PROCBASED2_VIRTUALIZE_APIC_ACCESSES && proc2 & PROCBASED2_VIRTUALIZE_X2APIC_MODE && proc2 & PROCBASED2_APIC_REGISTER_VIRTUALIZATION && proc2 & PROCBASED2_VIRTUAL_INTERRUPT_DELIVERY) { printf("%sVID", comma ? "," : ""); comma = 1; if (pin & PINBASED_POSTED_INTERRUPT) printf(",PostIntr"); } return; } mask = basic >> 32; printf("Basic Features=0x%b", mask, "\020" "\02132PA" /* 32-bit physical addresses */ "\022SMM" /* SMM dual-monitor */ "\027INS/OUTS" /* VM-exit info for INS and OUTS */ "\030TRUE" /* TRUE_CTLS MSRs */ ); printf("\n Pin-Based Controls=0x%b", pin, "\020" "\001ExtINT" /* External-interrupt exiting */ "\004NMI" /* NMI exiting */ "\006VNMI" /* Virtual NMIs */ "\007PreTmr" /* Activate VMX-preemption timer */ "\010PostIntr" /* Process posted interrupts */ ); printf("\n Primary Processor Controls=0x%b", proc, "\020" "\003INTWIN" /* Interrupt-window exiting */ "\004TSCOff" /* Use TSC offsetting */ "\010HLT" /* HLT exiting */ "\012INVLPG" /* INVLPG exiting */ "\013MWAIT" /* MWAIT exiting */ "\014RDPMC" /* RDPMC exiting */ "\015RDTSC" /* RDTSC exiting */ "\020CR3-LD" /* CR3-load exiting */ "\021CR3-ST" /* CR3-store exiting */ "\024CR8-LD" /* CR8-load exiting */ "\025CR8-ST" /* CR8-store exiting */ "\026TPR" /* Use TPR shadow */ "\027NMIWIN" /* NMI-window exiting */ "\030MOV-DR" /* MOV-DR exiting */ "\031IO" /* Unconditional I/O exiting */ "\032IOmap" /* Use I/O bitmaps */ "\034MTF" /* Monitor trap flag */ "\035MSRmap" /* Use MSR bitmaps */ "\036MONITOR" /* MONITOR exiting */ "\037PAUSE" /* PAUSE exiting */ ); if (proc & PROCBASED_SECONDARY_CONTROLS) printf("\n Secondary Processor Controls=0x%b", proc2, "\020" "\001APIC" /* Virtualize APIC accesses */ "\002EPT" /* Enable EPT */ "\003DT" /* Descriptor-table exiting */ "\004RDTSCP" /* Enable RDTSCP */ "\005x2APIC" /* Virtualize x2APIC mode */ "\006VPID" /* Enable VPID */ "\007WBINVD" /* WBINVD exiting */ "\010UG" /* Unrestricted guest */ "\011APIC-reg" /* APIC-register virtualization */ "\012VID" /* Virtual-interrupt delivery */ "\013PAUSE-loop" /* PAUSE-loop exiting */ "\014RDRAND" /* RDRAND exiting */ "\015INVPCID" /* Enable INVPCID */ "\016VMFUNC" /* Enable VM functions */ "\017VMCS" /* VMCS shadowing */ "\020EPT#VE" /* EPT-violation #VE */ "\021XSAVES" /* Enable XSAVES/XRSTORS */ ); printf("\n Exit Controls=0x%b", mask, "\020" "\003DR" /* Save debug controls */ /* Ignore Host address-space size */ "\015PERF" /* Load MSR_PERF_GLOBAL_CTRL */ "\020AckInt" /* Acknowledge interrupt on exit */ "\023PAT-SV" /* Save MSR_PAT */ "\024PAT-LD" /* Load MSR_PAT */ "\025EFER-SV" /* Save MSR_EFER */ "\026EFER-LD" /* Load MSR_EFER */ "\027PTMR-SV" /* Save VMX-preemption timer value */ ); printf("\n Entry Controls=0x%b", mask, "\020" "\003DR" /* Save debug controls */ /* Ignore IA-32e mode guest */ /* Ignore Entry to SMM */ /* Ignore Deactivate dual-monitor treatment */ "\016PERF" /* Load MSR_PERF_GLOBAL_CTRL */ "\017PAT" /* Load MSR_PAT */ "\020EFER" /* Load MSR_EFER */ ); if (proc & PROCBASED_SECONDARY_CONTROLS && (proc2 & (PROCBASED2_ENABLE_EPT | PROCBASED2_ENABLE_VPID)) != 0) { msr = rdmsr(MSR_VMX_EPT_VPID_CAP); mask = msr; printf("\n EPT Features=0x%b", mask, "\020" "\001XO" /* Execute-only translations */ "\007PW4" /* Page-walk length of 4 */ "\011UC" /* EPT paging-structure mem can be UC */ "\017WB" /* EPT paging-structure mem can be WB */ "\0212M" /* EPT PDE can map a 2-Mbyte page */ "\0221G" /* EPT PDPTE can map a 1-Gbyte page */ "\025INVEPT" /* INVEPT is supported */ "\026AD" /* Accessed and dirty flags for EPT */ "\032single" /* INVEPT single-context type */ "\033all" /* INVEPT all-context type */ ); mask = msr >> 32; printf("\n VPID Features=0x%b", mask, "\020" "\001INVVPID" /* INVVPID is supported */ "\011individual" /* INVVPID individual-address type */ "\012single" /* INVVPID single-context type */ "\013all" /* INVVPID all-context type */ /* INVVPID single-context-retaining-globals type */ "\014single-globals" ); } } static void print_hypervisor_info(void) { if (*hv_vendor != '\0') printf("Hypervisor: Origin = \"%s\"\n", hv_vendor); } /* * Returns the maximum physical address that can be used with the * current system. */ vm_paddr_t cpu_getmaxphyaddr(void) { #if defined(__i386__) if (!pae_mode) return (0xffffffff); #endif return ((1ULL << cpu_maxphyaddr) - 1); }