diff --git a/sys/amd64/amd64/fpu.c b/sys/amd64/amd64/fpu.c index 1c38949f7375..58a135e827a8 100644 --- a/sys/amd64/amd64/fpu.c +++ b/sys/amd64/amd64/fpu.c @@ -1,1291 +1,1287 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1990 William Jolitz. * Copyright (c) 1991 The Regents of the University of California. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* * Floating point support. */ #define fldcw(cw) __asm __volatile("fldcw %0" : : "m" (cw)) #define fnclex() __asm __volatile("fnclex") #define fninit() __asm __volatile("fninit") #define fnstcw(addr) __asm __volatile("fnstcw %0" : "=m" (*(addr))) #define fnstsw(addr) __asm __volatile("fnstsw %0" : "=am" (*(addr))) #define fxrstor(addr) __asm __volatile("fxrstor %0" : : "m" (*(addr))) #define fxsave(addr) __asm __volatile("fxsave %0" : "=m" (*(addr))) #define ldmxcsr(csr) __asm __volatile("ldmxcsr %0" : : "m" (csr)) #define stmxcsr(addr) __asm __volatile("stmxcsr %0" : "=m" (*(addr))) static __inline void xrstor32(char *addr, uint64_t mask) { uint32_t low, hi; low = mask; hi = mask >> 32; __asm __volatile("xrstor %0" : : "m" (*addr), "a" (low), "d" (hi)); } static __inline void xrstor64(char *addr, uint64_t mask) { uint32_t low, hi; low = mask; hi = mask >> 32; __asm __volatile("xrstor64 %0" : : "m" (*addr), "a" (low), "d" (hi)); } static __inline void xsave32(char *addr, uint64_t mask) { uint32_t low, hi; low = mask; hi = mask >> 32; __asm __volatile("xsave %0" : "=m" (*addr) : "a" (low), "d" (hi) : "memory"); } static __inline void xsave64(char *addr, uint64_t mask) { uint32_t low, hi; low = mask; hi = mask >> 32; __asm __volatile("xsave64 %0" : "=m" (*addr) : "a" (low), "d" (hi) : "memory"); } static __inline void xsaveopt32(char *addr, uint64_t mask) { uint32_t low, hi; low = mask; hi = mask >> 32; __asm __volatile("xsaveopt %0" : "=m" (*addr) : "a" (low), "d" (hi) : "memory"); } static __inline void xsaveopt64(char *addr, uint64_t mask) { uint32_t low, hi; low = mask; hi = mask >> 32; __asm __volatile("xsaveopt64 %0" : "=m" (*addr) : "a" (low), "d" (hi) : "memory"); } CTASSERT(sizeof(struct savefpu) == 512); CTASSERT(sizeof(struct xstate_hdr) == 64); CTASSERT(sizeof(struct savefpu_ymm) == 832); /* * This requirement is to make it easier for asm code to calculate * offset of the fpu save area from the pcb address. FPU save area * must be 64-byte aligned. */ CTASSERT(sizeof(struct pcb) % XSAVE_AREA_ALIGN == 0); /* * Ensure the copy of XCR0 saved in a core is contained in the padding * area. */ CTASSERT(X86_XSTATE_XCR0_OFFSET >= offsetof(struct savefpu, sv_pad) && X86_XSTATE_XCR0_OFFSET + sizeof(uint64_t) <= sizeof(struct savefpu)); static void fpu_clean_state(void); SYSCTL_INT(_hw, HW_FLOATINGPT, floatingpoint, CTLFLAG_RD, SYSCTL_NULL_INT_PTR, 1, "Floating point instructions executed in hardware"); int use_xsave; /* non-static for cpu_switch.S */ uint64_t xsave_mask; /* the same */ static uma_zone_t fpu_save_area_zone; static struct savefpu *fpu_initialstate; static struct xsave_area_elm_descr { u_int offset; u_int size; } *xsave_area_desc; static void fpusave_xsaveopt64(void *addr) { xsaveopt64((char *)addr, xsave_mask); } static void fpusave_xsaveopt3264(void *addr) { if (SV_CURPROC_FLAG(SV_ILP32)) xsaveopt32((char *)addr, xsave_mask); else xsaveopt64((char *)addr, xsave_mask); } static void fpusave_xsave64(void *addr) { xsave64((char *)addr, xsave_mask); } static void fpusave_xsave3264(void *addr) { if (SV_CURPROC_FLAG(SV_ILP32)) xsave32((char *)addr, xsave_mask); else xsave64((char *)addr, xsave_mask); } static void fpurestore_xrstor64(void *addr) { xrstor64((char *)addr, xsave_mask); } static void fpurestore_xrstor3264(void *addr) { if (SV_CURPROC_FLAG(SV_ILP32)) xrstor32((char *)addr, xsave_mask); else xrstor64((char *)addr, xsave_mask); } static void fpusave_fxsave(void *addr) { fxsave((char *)addr); } static void fpurestore_fxrstor(void *addr) { fxrstor((char *)addr); } DEFINE_IFUNC(, void, fpusave, (void *)) { u_int cp[4]; if (!use_xsave) return (fpusave_fxsave); cpuid_count(0xd, 0x1, cp); if ((cp[0] & CPUID_EXTSTATE_XSAVEOPT) != 0) { return ((cpu_stdext_feature & CPUID_STDEXT_NFPUSG) != 0 ? fpusave_xsaveopt64 : fpusave_xsaveopt3264); } return ((cpu_stdext_feature & CPUID_STDEXT_NFPUSG) != 0 ? fpusave_xsave64 : fpusave_xsave3264); } DEFINE_IFUNC(, void, fpurestore, (void *)) { if (!use_xsave) return (fpurestore_fxrstor); return ((cpu_stdext_feature & CPUID_STDEXT_NFPUSG) != 0 ? fpurestore_xrstor64 : fpurestore_xrstor3264); } void fpususpend(void *addr) { u_long cr0; cr0 = rcr0(); fpu_enable(); fpusave(addr); load_cr0(cr0); } void fpuresume(void *addr) { u_long cr0; cr0 = rcr0(); fpu_enable(); fninit(); if (use_xsave) load_xcr(XCR0, xsave_mask); fpurestore(addr); load_cr0(cr0); } /* * Enable XSAVE if supported and allowed by user. * Calculate the xsave_mask. */ static void fpuinit_bsp1(void) { u_int cp[4]; uint64_t xsave_mask_user; bool old_wp; if (!use_xsave) return; cpuid_count(0xd, 0x0, cp); xsave_mask = XFEATURE_ENABLED_X87 | XFEATURE_ENABLED_SSE; if ((cp[0] & xsave_mask) != xsave_mask) panic("CPU0 does not support X87 or SSE: %x", cp[0]); xsave_mask = ((uint64_t)cp[3] << 32) | cp[0]; xsave_mask_user = xsave_mask; TUNABLE_ULONG_FETCH("hw.xsave_mask", &xsave_mask_user); xsave_mask_user |= XFEATURE_ENABLED_X87 | XFEATURE_ENABLED_SSE; xsave_mask &= xsave_mask_user; if ((xsave_mask & XFEATURE_AVX512) != XFEATURE_AVX512) xsave_mask &= ~XFEATURE_AVX512; if ((xsave_mask & XFEATURE_MPX) != XFEATURE_MPX) xsave_mask &= ~XFEATURE_MPX; cpuid_count(0xd, 0x1, cp); if ((cp[0] & CPUID_EXTSTATE_XSAVEOPT) != 0) { /* * Patch the XSAVE instruction in the cpu_switch code * to XSAVEOPT. We assume that XSAVE encoding used * REX byte, and set the bit 4 of the r/m byte. * * It seems that some BIOSes give control to the OS * with CR0.WP already set, making the kernel text * read-only before cpu_startup(). */ old_wp = disable_wp(); ctx_switch_xsave32[3] |= 0x10; ctx_switch_xsave[3] |= 0x10; restore_wp(old_wp); } } /* * Calculate the fpu save area size. */ static void fpuinit_bsp2(void) { u_int cp[4]; if (use_xsave) { cpuid_count(0xd, 0x0, cp); cpu_max_ext_state_size = cp[1]; /* * Reload the cpu_feature2, since we enabled OSXSAVE. */ do_cpuid(1, cp); cpu_feature2 = cp[2]; } else cpu_max_ext_state_size = sizeof(struct savefpu); } /* * Initialize the floating point unit. */ void fpuinit(void) { register_t saveintr; uint64_t cr4; u_int mxcsr; u_short control; TSENTER(); if (IS_BSP()) fpuinit_bsp1(); if (use_xsave) { cr4 = rcr4(); /* * Revert enablement of PKRU if user disabled its * saving on context switches by clearing the bit in * the xsave mask. Also redundantly clear the bit in * cpu_stdext_feature2 to prevent pmap from ever * trying to set the page table bits. */ if ((cpu_stdext_feature2 & CPUID_STDEXT2_PKU) != 0 && (xsave_mask & XFEATURE_ENABLED_PKRU) == 0) { cr4 &= ~CR4_PKE; cpu_stdext_feature2 &= ~CPUID_STDEXT2_PKU; } load_cr4(cr4 | CR4_XSAVE); load_xcr(XCR0, xsave_mask); } /* * XCR0 shall be set up before CPU can report the save area size. */ if (IS_BSP()) fpuinit_bsp2(); /* * It is too early for critical_enter() to work on AP. */ saveintr = intr_disable(); fpu_enable(); fninit(); control = __INITIAL_FPUCW__; fldcw(control); mxcsr = __INITIAL_MXCSR__; ldmxcsr(mxcsr); fpu_disable(); intr_restore(saveintr); TSEXIT(); } /* * On the boot CPU we generate a clean state that is used to * initialize the floating point unit when it is first used by a * process. */ static void fpuinitstate(void *arg __unused) { uint64_t *xstate_bv; register_t saveintr; int cp[4], i, max_ext_n; /* Do potentially blocking operations before disabling interrupts. */ fpu_save_area_zone = uma_zcreate("FPU_save_area", cpu_max_ext_state_size, NULL, NULL, NULL, NULL, XSAVE_AREA_ALIGN - 1, 0); fpu_initialstate = uma_zalloc(fpu_save_area_zone, M_WAITOK | M_ZERO); if (use_xsave) { max_ext_n = flsl(xsave_mask); xsave_area_desc = malloc(max_ext_n * sizeof(struct xsave_area_elm_descr), M_DEVBUF, M_WAITOK | M_ZERO); } cpu_thread_alloc(&thread0); saveintr = intr_disable(); fpu_enable(); fpusave_fxsave(fpu_initialstate); if (fpu_initialstate->sv_env.en_mxcsr_mask) cpu_mxcsr_mask = fpu_initialstate->sv_env.en_mxcsr_mask; else cpu_mxcsr_mask = 0xFFBF; /* * The fninit instruction does not modify XMM registers or x87 * registers (MM/ST). The fpusave call dumped the garbage * contained in the registers after reset to the initial state * saved. Clear XMM and x87 registers file image to make the * startup program state and signal handler XMM/x87 register * content predictable. */ bzero(fpu_initialstate->sv_fp, sizeof(fpu_initialstate->sv_fp)); bzero(fpu_initialstate->sv_xmm, sizeof(fpu_initialstate->sv_xmm)); /* * Create a table describing the layout of the CPU Extended * Save Area. See Intel SDM rev. 075 Vol. 1 13.4.1 "Legacy * Region of an XSAVE Area" for the source of offsets/sizes. */ if (use_xsave) { xstate_bv = (uint64_t *)((char *)(fpu_initialstate + 1) + offsetof(struct xstate_hdr, xstate_bv)); *xstate_bv = XFEATURE_ENABLED_X87 | XFEATURE_ENABLED_SSE; /* x87 state */ xsave_area_desc[0].offset = 0; xsave_area_desc[0].size = 160; /* XMM */ xsave_area_desc[1].offset = 160; xsave_area_desc[1].size = 416 - 160; for (i = 2; i < max_ext_n; i++) { cpuid_count(0xd, i, cp); xsave_area_desc[i].offset = cp[1]; xsave_area_desc[i].size = cp[0]; } } fpu_disable(); intr_restore(saveintr); } /* EFIRT needs this to be initialized before we can enter our EFI environment */ SYSINIT(fpuinitstate, SI_SUB_CPU, SI_ORDER_ANY, fpuinitstate, NULL); /* * Free coprocessor (if we have it). */ void fpuexit(struct thread *td) { critical_enter(); if (curthread == PCPU_GET(fpcurthread)) { fpu_enable(); fpusave(curpcb->pcb_save); fpu_disable(); PCPU_SET(fpcurthread, NULL); } critical_exit(); } int fpuformat(void) { return (_MC_FPFMT_XMM); } /* * The following mechanism is used to ensure that the FPE_... value * that is passed as a trapcode to the signal handler of the user * process does not have more than one bit set. * * Multiple bits may be set if the user process modifies the control * word while a status word bit is already set. While this is a sign * of bad coding, we have no choice than to narrow them down to one * bit, since we must not send a trapcode that is not exactly one of * the FPE_ macros. * * The mechanism has a static table with 127 entries. Each combination * of the 7 FPU status word exception bits directly translates to a * position in this table, where a single FPE_... value is stored. * This FPE_... value stored there is considered the "most important" * of the exception bits and will be sent as the signal code. The * precedence of the bits is based upon Intel Document "Numerical * Applications", Chapter "Special Computational Situations". * * The macro to choose one of these values does these steps: 1) Throw * away status word bits that cannot be masked. 2) Throw away the bits * currently masked in the control word, assuming the user isn't * interested in them anymore. 3) Reinsert status word bit 7 (stack * fault) if it is set, which cannot be masked but must be presered. * 4) Use the remaining bits to point into the trapcode table. * * The 6 maskable bits in order of their preference, as stated in the * above referenced Intel manual: * 1 Invalid operation (FP_X_INV) * 1a Stack underflow * 1b Stack overflow * 1c Operand of unsupported format * 1d SNaN operand. * 2 QNaN operand (not an exception, irrelavant here) * 3 Any other invalid-operation not mentioned above or zero divide * (FP_X_INV, FP_X_DZ) * 4 Denormal operand (FP_X_DNML) * 5 Numeric over/underflow (FP_X_OFL, FP_X_UFL) * 6 Inexact result (FP_X_IMP) */ static char fpetable[128] = { 0, FPE_FLTINV, /* 1 - INV */ FPE_FLTUND, /* 2 - DNML */ FPE_FLTINV, /* 3 - INV | DNML */ FPE_FLTDIV, /* 4 - DZ */ FPE_FLTINV, /* 5 - INV | DZ */ FPE_FLTDIV, /* 6 - DNML | DZ */ FPE_FLTINV, /* 7 - INV | DNML | DZ */ FPE_FLTOVF, /* 8 - OFL */ FPE_FLTINV, /* 9 - INV | OFL */ FPE_FLTUND, /* A - DNML | OFL */ FPE_FLTINV, /* B - INV | DNML | OFL */ FPE_FLTDIV, /* C - DZ | OFL */ FPE_FLTINV, /* D - INV | DZ | OFL */ FPE_FLTDIV, /* E - DNML | DZ | OFL */ FPE_FLTINV, /* F - INV | DNML | DZ | OFL */ FPE_FLTUND, /* 10 - UFL */ FPE_FLTINV, /* 11 - INV | UFL */ FPE_FLTUND, /* 12 - DNML | UFL */ FPE_FLTINV, /* 13 - INV | DNML | UFL */ FPE_FLTDIV, /* 14 - DZ | UFL */ FPE_FLTINV, /* 15 - INV | DZ | UFL */ FPE_FLTDIV, /* 16 - DNML | DZ | UFL */ FPE_FLTINV, /* 17 - INV | DNML | DZ | UFL */ FPE_FLTOVF, /* 18 - OFL | UFL */ FPE_FLTINV, /* 19 - INV | OFL | UFL */ FPE_FLTUND, /* 1A - DNML | OFL | UFL */ FPE_FLTINV, /* 1B - INV | DNML | OFL | UFL */ FPE_FLTDIV, /* 1C - DZ | OFL | UFL */ FPE_FLTINV, /* 1D - INV | DZ | OFL | UFL */ FPE_FLTDIV, /* 1E - DNML | DZ | OFL | UFL */ FPE_FLTINV, /* 1F - INV | DNML | DZ | OFL | UFL */ FPE_FLTRES, /* 20 - IMP */ FPE_FLTINV, /* 21 - INV | IMP */ FPE_FLTUND, /* 22 - DNML | IMP */ FPE_FLTINV, /* 23 - INV | DNML | IMP */ FPE_FLTDIV, /* 24 - DZ | IMP */ FPE_FLTINV, /* 25 - INV | DZ | IMP */ FPE_FLTDIV, /* 26 - DNML | DZ | IMP */ FPE_FLTINV, /* 27 - INV | DNML | DZ | IMP */ FPE_FLTOVF, /* 28 - OFL | IMP */ FPE_FLTINV, /* 29 - INV | OFL | IMP */ FPE_FLTUND, /* 2A - DNML | OFL | IMP */ FPE_FLTINV, /* 2B - INV | DNML | OFL | IMP */ FPE_FLTDIV, /* 2C - DZ | OFL | IMP */ FPE_FLTINV, /* 2D - INV | DZ | OFL | IMP */ FPE_FLTDIV, /* 2E - DNML | DZ | OFL | IMP */ FPE_FLTINV, /* 2F - INV | DNML | DZ | OFL | IMP */ FPE_FLTUND, /* 30 - UFL | IMP */ FPE_FLTINV, /* 31 - INV | UFL | IMP */ FPE_FLTUND, /* 32 - DNML | UFL | IMP */ FPE_FLTINV, /* 33 - INV | DNML | UFL | IMP */ FPE_FLTDIV, /* 34 - DZ | UFL | IMP */ FPE_FLTINV, /* 35 - INV | DZ | UFL | IMP */ FPE_FLTDIV, /* 36 - DNML | DZ | UFL | IMP */ FPE_FLTINV, /* 37 - INV | DNML | DZ | UFL | IMP */ FPE_FLTOVF, /* 38 - OFL | UFL | IMP */ FPE_FLTINV, /* 39 - INV | OFL | UFL | IMP */ FPE_FLTUND, /* 3A - DNML | OFL | UFL | IMP */ FPE_FLTINV, /* 3B - INV | DNML | OFL | UFL | IMP */ FPE_FLTDIV, /* 3C - DZ | OFL | UFL | IMP */ FPE_FLTINV, /* 3D - INV | DZ | OFL | UFL | IMP */ FPE_FLTDIV, /* 3E - DNML | DZ | OFL | UFL | IMP */ FPE_FLTINV, /* 3F - INV | DNML | DZ | OFL | UFL | IMP */ FPE_FLTSUB, /* 40 - STK */ FPE_FLTSUB, /* 41 - INV | STK */ FPE_FLTUND, /* 42 - DNML | STK */ FPE_FLTSUB, /* 43 - INV | DNML | STK */ FPE_FLTDIV, /* 44 - DZ | STK */ FPE_FLTSUB, /* 45 - INV | DZ | STK */ FPE_FLTDIV, /* 46 - DNML | DZ | STK */ FPE_FLTSUB, /* 47 - INV | DNML | DZ | STK */ FPE_FLTOVF, /* 48 - OFL | STK */ FPE_FLTSUB, /* 49 - INV | OFL | STK */ FPE_FLTUND, /* 4A - DNML | OFL | STK */ FPE_FLTSUB, /* 4B - INV | DNML | OFL | STK */ FPE_FLTDIV, /* 4C - DZ | OFL | STK */ FPE_FLTSUB, /* 4D - INV | DZ | OFL | STK */ FPE_FLTDIV, /* 4E - DNML | DZ | OFL | STK */ FPE_FLTSUB, /* 4F - INV | DNML | DZ | OFL | STK */ FPE_FLTUND, /* 50 - UFL | STK */ FPE_FLTSUB, /* 51 - INV | UFL | STK */ FPE_FLTUND, /* 52 - DNML | UFL | STK */ FPE_FLTSUB, /* 53 - INV | DNML | UFL | STK */ FPE_FLTDIV, /* 54 - DZ | UFL | STK */ FPE_FLTSUB, /* 55 - INV | DZ | UFL | STK */ FPE_FLTDIV, /* 56 - DNML | DZ | UFL | STK */ FPE_FLTSUB, /* 57 - INV | DNML | DZ | UFL | STK */ FPE_FLTOVF, /* 58 - OFL | UFL | STK */ FPE_FLTSUB, /* 59 - INV | OFL | UFL | STK */ FPE_FLTUND, /* 5A - DNML | OFL | UFL | STK */ FPE_FLTSUB, /* 5B - INV | DNML | OFL | UFL | STK */ FPE_FLTDIV, /* 5C - DZ | OFL | UFL | STK */ FPE_FLTSUB, /* 5D - INV | DZ | OFL | UFL | STK */ FPE_FLTDIV, /* 5E - DNML | DZ | OFL | UFL | STK */ FPE_FLTSUB, /* 5F - INV | DNML | DZ | OFL | UFL | STK */ FPE_FLTRES, /* 60 - IMP | STK */ FPE_FLTSUB, /* 61 - INV | IMP | STK */ FPE_FLTUND, /* 62 - DNML | IMP | STK */ FPE_FLTSUB, /* 63 - INV | DNML | IMP | STK */ FPE_FLTDIV, /* 64 - DZ | IMP | STK */ FPE_FLTSUB, /* 65 - INV | DZ | IMP | STK */ FPE_FLTDIV, /* 66 - DNML | DZ | IMP | STK */ FPE_FLTSUB, /* 67 - INV | DNML | DZ | IMP | STK */ FPE_FLTOVF, /* 68 - OFL | IMP | STK */ FPE_FLTSUB, /* 69 - INV | OFL | IMP | STK */ FPE_FLTUND, /* 6A - DNML | OFL | IMP | STK */ FPE_FLTSUB, /* 6B - INV | DNML | OFL | IMP | STK */ FPE_FLTDIV, /* 6C - DZ | OFL | IMP | STK */ FPE_FLTSUB, /* 6D - INV | DZ | OFL | IMP | STK */ FPE_FLTDIV, /* 6E - DNML | DZ | OFL | IMP | STK */ FPE_FLTSUB, /* 6F - INV | DNML | DZ | OFL | IMP | STK */ FPE_FLTUND, /* 70 - UFL | IMP | STK */ FPE_FLTSUB, /* 71 - INV | UFL | IMP | STK */ FPE_FLTUND, /* 72 - DNML | UFL | IMP | STK */ FPE_FLTSUB, /* 73 - INV | DNML | UFL | IMP | STK */ FPE_FLTDIV, /* 74 - DZ | UFL | IMP | STK */ FPE_FLTSUB, /* 75 - INV | DZ | UFL | IMP | STK */ FPE_FLTDIV, /* 76 - DNML | DZ | UFL | IMP | STK */ FPE_FLTSUB, /* 77 - INV | DNML | DZ | UFL | IMP | STK */ FPE_FLTOVF, /* 78 - OFL | UFL | IMP | STK */ FPE_FLTSUB, /* 79 - INV | OFL | UFL | IMP | STK */ FPE_FLTUND, /* 7A - DNML | OFL | UFL | IMP | STK */ FPE_FLTSUB, /* 7B - INV | DNML | OFL | UFL | IMP | STK */ FPE_FLTDIV, /* 7C - DZ | OFL | UFL | IMP | STK */ FPE_FLTSUB, /* 7D - INV | DZ | OFL | UFL | IMP | STK */ FPE_FLTDIV, /* 7E - DNML | DZ | OFL | UFL | IMP | STK */ FPE_FLTSUB, /* 7F - INV | DNML | DZ | OFL | UFL | IMP | STK */ }; /* * Read the FP status and control words, then generate si_code value * for SIGFPE. The error code chosen will be one of the * FPE_... macros. It will be sent as the second argument to old * BSD-style signal handlers and as "siginfo_t->si_code" (second * argument) to SA_SIGINFO signal handlers. * * Some time ago, we cleared the x87 exceptions with FNCLEX there. * Clearing exceptions was necessary mainly to avoid IRQ13 bugs. The * usermode code which understands the FPU hardware enough to enable * the exceptions, can also handle clearing the exception state in the * handler. The only consequence of not clearing the exception is the * rethrow of the SIGFPE on return from the signal handler and * reexecution of the corresponding instruction. * * For XMM traps, the exceptions were never cleared. */ int fputrap_x87(void) { struct savefpu *pcb_save; u_short control, status; critical_enter(); /* * Interrupt handling (for another interrupt) may have pushed the * state to memory. Fetch the relevant parts of the state from * wherever they are. */ if (PCPU_GET(fpcurthread) != curthread) { pcb_save = curpcb->pcb_save; control = pcb_save->sv_env.en_cw; status = pcb_save->sv_env.en_sw; } else { fnstcw(&control); fnstsw(&status); } critical_exit(); return (fpetable[status & ((~control & 0x3f) | 0x40)]); } int fputrap_sse(void) { u_int mxcsr; critical_enter(); if (PCPU_GET(fpcurthread) != curthread) mxcsr = curpcb->pcb_save->sv_env.en_mxcsr; else stmxcsr(&mxcsr); critical_exit(); return (fpetable[(mxcsr & (~mxcsr >> 7)) & 0x3f]); } static void restore_fpu_curthread(struct thread *td) { struct pcb *pcb; /* * Record new context early in case frstor causes a trap. */ PCPU_SET(fpcurthread, td); fpu_enable(); fpu_clean_state(); pcb = td->td_pcb; if ((pcb->pcb_flags & PCB_FPUINITDONE) == 0) { /* * This is the first time this thread has used the FPU or * the PCB doesn't contain a clean FPU state. Explicitly * load an initial state. * * We prefer to restore the state from the actual save * area in PCB instead of directly loading from * fpu_initialstate, to ignite the XSAVEOPT * tracking engine. */ bcopy(fpu_initialstate, pcb->pcb_save, cpu_max_ext_state_size); fpurestore(pcb->pcb_save); if (pcb->pcb_initial_fpucw != __INITIAL_FPUCW__) fldcw(pcb->pcb_initial_fpucw); if (PCB_USER_FPU(pcb)) set_pcb_flags(pcb, PCB_FPUINITDONE | PCB_USERFPUINITDONE); else set_pcb_flags(pcb, PCB_FPUINITDONE); } else fpurestore(pcb->pcb_save); } /* * Device Not Available (DNA, #NM) exception handler. * * It would be better to switch FP context here (if curthread != * fpcurthread) and not necessarily for every context switch, but it * is too hard to access foreign pcb's. */ void fpudna(void) { struct thread *td; td = curthread; /* * This handler is entered with interrupts enabled, so context * switches may occur before critical_enter() is executed. If * a context switch occurs, then when we regain control, our * state will have been completely restored. The CPU may * change underneath us, but the only part of our context that * lives in the CPU is CR0.TS and that will be "restored" by * setting it on the new CPU. */ critical_enter(); KASSERT((curpcb->pcb_flags & PCB_FPUNOSAVE) == 0, ("fpudna while in fpu_kern_enter(FPU_KERN_NOCTX)")); if (__predict_false(PCPU_GET(fpcurthread) == td)) { /* * Some virtual machines seems to set %cr0.TS at * arbitrary moments. Silently clear the TS bit * regardless of the eager/lazy FPU context switch * mode. */ fpu_enable(); } else { if (__predict_false(PCPU_GET(fpcurthread) != NULL)) { panic( "fpudna: fpcurthread = %p (%d), curthread = %p (%d)\n", PCPU_GET(fpcurthread), PCPU_GET(fpcurthread)->td_tid, td, td->td_tid); } restore_fpu_curthread(td); } critical_exit(); } void fpu_activate_sw(struct thread *td); /* Called from the context switch */ void fpu_activate_sw(struct thread *td) { if ((td->td_pflags & TDP_KTHREAD) != 0 || !PCB_USER_FPU(td->td_pcb)) { PCPU_SET(fpcurthread, NULL); fpu_disable(); } else if (PCPU_GET(fpcurthread) != td) { restore_fpu_curthread(td); } } void fpudrop(void) { struct thread *td; td = PCPU_GET(fpcurthread); KASSERT(td == curthread, ("fpudrop: fpcurthread != curthread")); CRITICAL_ASSERT(td); PCPU_SET(fpcurthread, NULL); clear_pcb_flags(td->td_pcb, PCB_FPUINITDONE); fpu_disable(); } /* * Get the user state of the FPU into pcb->pcb_user_save without * dropping ownership (if possible). It returns the FPU ownership * status. */ int fpugetregs(struct thread *td) { struct pcb *pcb; uint64_t *xstate_bv, bit; char *sa; struct savefpu *s; uint32_t mxcsr, mxcsr_mask; int max_ext_n, i, owned; bool do_mxcsr; pcb = td->td_pcb; critical_enter(); if ((pcb->pcb_flags & PCB_USERFPUINITDONE) == 0) { bcopy(fpu_initialstate, get_pcb_user_save_pcb(pcb), cpu_max_ext_state_size); get_pcb_user_save_pcb(pcb)->sv_env.en_cw = pcb->pcb_initial_fpucw; fpuuserinited(td); critical_exit(); return (_MC_FPOWNED_PCB); } if (td == PCPU_GET(fpcurthread) && PCB_USER_FPU(pcb)) { fpusave(get_pcb_user_save_pcb(pcb)); owned = _MC_FPOWNED_FPU; } else { owned = _MC_FPOWNED_PCB; } if (use_xsave) { /* * Handle partially saved state. */ sa = (char *)get_pcb_user_save_pcb(pcb); xstate_bv = (uint64_t *)(sa + sizeof(struct savefpu) + offsetof(struct xstate_hdr, xstate_bv)); max_ext_n = flsl(xsave_mask); for (i = 0; i < max_ext_n; i++) { bit = 1ULL << i; if ((xsave_mask & bit) == 0 || (*xstate_bv & bit) != 0) continue; do_mxcsr = false; if (i == 0 && (*xstate_bv & (XFEATURE_ENABLED_SSE | XFEATURE_ENABLED_AVX)) != 0) { /* * x87 area was not saved by XSAVEOPT, * but one of XMM or AVX was. Then we need * to preserve MXCSR from being overwritten * with the default value. */ s = (struct savefpu *)sa; mxcsr = s->sv_env.en_mxcsr; mxcsr_mask = s->sv_env.en_mxcsr_mask; do_mxcsr = true; } bcopy((char *)fpu_initialstate + xsave_area_desc[i].offset, sa + xsave_area_desc[i].offset, xsave_area_desc[i].size); if (do_mxcsr) { s->sv_env.en_mxcsr = mxcsr; s->sv_env.en_mxcsr_mask = mxcsr_mask; } *xstate_bv |= bit; } } critical_exit(); return (owned); } void fpuuserinited(struct thread *td) { struct pcb *pcb; CRITICAL_ASSERT(td); pcb = td->td_pcb; if (PCB_USER_FPU(pcb)) set_pcb_flags(pcb, PCB_FPUINITDONE | PCB_USERFPUINITDONE); else set_pcb_flags(pcb, PCB_FPUINITDONE); } int fpusetxstate(struct thread *td, char *xfpustate, size_t xfpustate_size) { struct xstate_hdr *hdr, *ehdr; size_t len, max_len; uint64_t bv; /* XXXKIB should we clear all extended state in xstate_bv instead ? */ if (xfpustate == NULL) return (0); if (!use_xsave) return (EOPNOTSUPP); len = xfpustate_size; if (len < sizeof(struct xstate_hdr)) return (EINVAL); max_len = cpu_max_ext_state_size - sizeof(struct savefpu); if (len > max_len) return (EINVAL); ehdr = (struct xstate_hdr *)xfpustate; bv = ehdr->xstate_bv; /* * Avoid #gp. */ if (bv & ~xsave_mask) return (EINVAL); hdr = (struct xstate_hdr *)(get_pcb_user_save_td(td) + 1); hdr->xstate_bv = bv; bcopy(xfpustate + sizeof(struct xstate_hdr), (char *)(hdr + 1), len - sizeof(struct xstate_hdr)); return (0); } /* * Set the state of the FPU. */ int fpusetregs(struct thread *td, struct savefpu *addr, char *xfpustate, size_t xfpustate_size) { struct pcb *pcb; int error; addr->sv_env.en_mxcsr &= cpu_mxcsr_mask; pcb = td->td_pcb; error = 0; critical_enter(); if (td == PCPU_GET(fpcurthread) && PCB_USER_FPU(pcb)) { error = fpusetxstate(td, xfpustate, xfpustate_size); if (error == 0) { bcopy(addr, get_pcb_user_save_td(td), sizeof(*addr)); fpurestore(get_pcb_user_save_td(td)); set_pcb_flags(pcb, PCB_FPUINITDONE | PCB_USERFPUINITDONE); } } else { error = fpusetxstate(td, xfpustate, xfpustate_size); if (error == 0) { bcopy(addr, get_pcb_user_save_td(td), sizeof(*addr)); fpuuserinited(td); } } critical_exit(); return (error); } /* * On AuthenticAMD processors, the fxrstor instruction does not restore * the x87's stored last instruction pointer, last data pointer, and last * opcode values, except in the rare case in which the exception summary * (ES) bit in the x87 status word is set to 1. * * In order to avoid leaking this information across processes, we clean * these values by performing a dummy load before executing fxrstor(). */ static void fpu_clean_state(void) { static float dummy_variable = 0.0; u_short status; /* * Clear the ES bit in the x87 status word if it is currently * set, in order to avoid causing a fault in the upcoming load. */ fnstsw(&status); if (status & 0x80) fnclex(); /* * Load the dummy variable into the x87 stack. This mangles * the x87 stack, but we don't care since we're about to call * fxrstor() anyway. */ __asm __volatile("ffree %%st(7); flds %0" : : "m" (dummy_variable)); } /* * This really sucks. We want the acpi version only, but it requires * the isa_if.h file in order to get the definitions. */ #include "opt_isa.h" #ifdef DEV_ISA #include /* * This sucks up the legacy ISA support assignments from PNPBIOS/ACPI. */ static struct isa_pnp_id fpupnp_ids[] = { { 0x040cd041, "Legacy ISA coprocessor support" }, /* PNP0C04 */ { 0 } }; static int fpupnp_probe(device_t dev) { int result; result = ISA_PNP_PROBE(device_get_parent(dev), dev, fpupnp_ids); if (result <= 0) device_quiet(dev); return (result); } static int fpupnp_attach(device_t dev) { return (0); } static device_method_t fpupnp_methods[] = { /* Device interface */ DEVMETHOD(device_probe, fpupnp_probe), DEVMETHOD(device_attach, fpupnp_attach), - DEVMETHOD(device_detach, bus_generic_detach), - DEVMETHOD(device_shutdown, bus_generic_shutdown), - DEVMETHOD(device_suspend, bus_generic_suspend), - DEVMETHOD(device_resume, bus_generic_resume), { 0, 0 } }; static driver_t fpupnp_driver = { "fpupnp", fpupnp_methods, 1, /* no softc */ }; DRIVER_MODULE(fpupnp, acpi, fpupnp_driver, 0, 0); ISA_PNP_INFO(fpupnp_ids); #endif /* DEV_ISA */ static MALLOC_DEFINE(M_FPUKERN_CTX, "fpukern_ctx", "Kernel contexts for FPU state"); #define FPU_KERN_CTX_FPUINITDONE 0x01 #define FPU_KERN_CTX_DUMMY 0x02 /* avoided save for the kern thread */ #define FPU_KERN_CTX_INUSE 0x04 struct fpu_kern_ctx { struct savefpu *prev; uint32_t flags; char hwstate1[]; }; static inline size_t __pure2 fpu_kern_alloc_sz(u_int max_est) { return (sizeof(struct fpu_kern_ctx) + XSAVE_AREA_ALIGN + max_est); } static inline int __pure2 fpu_kern_malloc_flags(u_int fpflags) { return (((fpflags & FPU_KERN_NOWAIT) ? M_NOWAIT : M_WAITOK) | M_ZERO); } struct fpu_kern_ctx * fpu_kern_alloc_ctx_domain(int domain, u_int flags) { return (malloc_domainset(fpu_kern_alloc_sz(cpu_max_ext_state_size), M_FPUKERN_CTX, DOMAINSET_PREF(domain), fpu_kern_malloc_flags(flags))); } struct fpu_kern_ctx * fpu_kern_alloc_ctx(u_int flags) { return (malloc(fpu_kern_alloc_sz(cpu_max_ext_state_size), M_FPUKERN_CTX, fpu_kern_malloc_flags(flags))); } void fpu_kern_free_ctx(struct fpu_kern_ctx *ctx) { KASSERT((ctx->flags & FPU_KERN_CTX_INUSE) == 0, ("free'ing inuse ctx")); /* XXXKIB clear the memory ? */ free(ctx, M_FPUKERN_CTX); } static struct savefpu * fpu_kern_ctx_savefpu(struct fpu_kern_ctx *ctx) { vm_offset_t p; p = (vm_offset_t)&ctx->hwstate1; p = roundup2(p, XSAVE_AREA_ALIGN); return ((struct savefpu *)p); } void fpu_kern_enter(struct thread *td, struct fpu_kern_ctx *ctx, u_int flags) { struct pcb *pcb; pcb = td->td_pcb; KASSERT((flags & FPU_KERN_NOCTX) != 0 || ctx != NULL, ("ctx is required when !FPU_KERN_NOCTX")); KASSERT(ctx == NULL || (ctx->flags & FPU_KERN_CTX_INUSE) == 0, ("using inuse ctx")); KASSERT((pcb->pcb_flags & PCB_FPUNOSAVE) == 0, ("recursive fpu_kern_enter while in PCB_FPUNOSAVE state")); if ((flags & FPU_KERN_NOCTX) != 0) { critical_enter(); fpu_enable(); if (curthread == PCPU_GET(fpcurthread)) { fpusave(curpcb->pcb_save); PCPU_SET(fpcurthread, NULL); } else { KASSERT(PCPU_GET(fpcurthread) == NULL, ("invalid fpcurthread")); } /* * This breaks XSAVEOPT tracker, but * PCB_FPUNOSAVE state is supposed to never need to * save FPU context at all. */ fpurestore(fpu_initialstate); set_pcb_flags(pcb, PCB_KERNFPU | PCB_FPUNOSAVE | PCB_FPUINITDONE); return; } if ((flags & FPU_KERN_KTHR) != 0 && is_fpu_kern_thread(0)) { ctx->flags = FPU_KERN_CTX_DUMMY | FPU_KERN_CTX_INUSE; return; } critical_enter(); KASSERT(!PCB_USER_FPU(pcb) || pcb->pcb_save == get_pcb_user_save_pcb(pcb), ("mangled pcb_save")); ctx->flags = FPU_KERN_CTX_INUSE; if ((pcb->pcb_flags & PCB_FPUINITDONE) != 0) ctx->flags |= FPU_KERN_CTX_FPUINITDONE; fpuexit(td); ctx->prev = pcb->pcb_save; pcb->pcb_save = fpu_kern_ctx_savefpu(ctx); set_pcb_flags(pcb, PCB_KERNFPU); clear_pcb_flags(pcb, PCB_FPUINITDONE); critical_exit(); } int fpu_kern_leave(struct thread *td, struct fpu_kern_ctx *ctx) { struct pcb *pcb; pcb = td->td_pcb; if ((pcb->pcb_flags & PCB_FPUNOSAVE) != 0) { KASSERT(ctx == NULL, ("non-null ctx after FPU_KERN_NOCTX")); KASSERT(PCPU_GET(fpcurthread) == NULL, ("non-NULL fpcurthread for PCB_FPUNOSAVE")); CRITICAL_ASSERT(td); clear_pcb_flags(pcb, PCB_FPUNOSAVE | PCB_FPUINITDONE); fpu_disable(); } else { KASSERT((ctx->flags & FPU_KERN_CTX_INUSE) != 0, ("leaving not inuse ctx")); ctx->flags &= ~FPU_KERN_CTX_INUSE; if (is_fpu_kern_thread(0) && (ctx->flags & FPU_KERN_CTX_DUMMY) != 0) return (0); KASSERT((ctx->flags & FPU_KERN_CTX_DUMMY) == 0, ("dummy ctx")); critical_enter(); if (curthread == PCPU_GET(fpcurthread)) fpudrop(); pcb->pcb_save = ctx->prev; } if (pcb->pcb_save == get_pcb_user_save_pcb(pcb)) { if ((pcb->pcb_flags & PCB_USERFPUINITDONE) != 0) { set_pcb_flags(pcb, PCB_FPUINITDONE); if ((pcb->pcb_flags & PCB_KERNFPU_THR) == 0) clear_pcb_flags(pcb, PCB_KERNFPU); } else if ((pcb->pcb_flags & PCB_KERNFPU_THR) == 0) clear_pcb_flags(pcb, PCB_FPUINITDONE | PCB_KERNFPU); } else { if ((ctx->flags & FPU_KERN_CTX_FPUINITDONE) != 0) set_pcb_flags(pcb, PCB_FPUINITDONE); else clear_pcb_flags(pcb, PCB_FPUINITDONE); KASSERT(!PCB_USER_FPU(pcb), ("unpaired fpu_kern_leave")); } critical_exit(); return (0); } int fpu_kern_thread(u_int flags) { KASSERT((curthread->td_pflags & TDP_KTHREAD) != 0, ("Only kthread may use fpu_kern_thread")); KASSERT(curpcb->pcb_save == get_pcb_user_save_pcb(curpcb), ("mangled pcb_save")); KASSERT(PCB_USER_FPU(curpcb), ("recursive call")); set_pcb_flags(curpcb, PCB_KERNFPU | PCB_KERNFPU_THR); return (0); } int is_fpu_kern_thread(u_int flags) { if ((curthread->td_pflags & TDP_KTHREAD) == 0) return (0); return ((curpcb->pcb_flags & PCB_KERNFPU_THR) != 0); } /* * FPU save area alloc/free/init utility routines */ struct savefpu * fpu_save_area_alloc(void) { return (uma_zalloc(fpu_save_area_zone, M_WAITOK)); } void fpu_save_area_free(struct savefpu *fsa) { uma_zfree(fpu_save_area_zone, fsa); } void fpu_save_area_reset(struct savefpu *fsa) { bcopy(fpu_initialstate, fsa, cpu_max_ext_state_size); } diff --git a/sys/i386/i386/npx.c b/sys/i386/i386/npx.c index 0334aedd740b..a81413c291b3 100644 --- a/sys/i386/i386/npx.c +++ b/sys/i386/i386/npx.c @@ -1,1551 +1,1547 @@ /*- * Copyright (c) 1990 William Jolitz. * Copyright (c) 1991 The Regents of the University of California. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include #include "opt_cpu.h" #include "opt_isa.h" #include "opt_npx.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef NPX_DEBUG #include #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef DEV_ISA #include #endif /* * 387 and 287 Numeric Coprocessor Extension (NPX) Driver. */ #define fldcw(cw) __asm __volatile("fldcw %0" : : "m" (cw)) #define fnclex() __asm __volatile("fnclex") #define fninit() __asm __volatile("fninit") #define fnsave(addr) __asm __volatile("fnsave %0" : "=m" (*(addr))) #define fnstcw(addr) __asm __volatile("fnstcw %0" : "=m" (*(addr))) #define fnstsw(addr) __asm __volatile("fnstsw %0" : "=am" (*(addr))) #define fp_divide_by_0() __asm __volatile( \ "fldz; fld1; fdiv %st,%st(1); fnop") #define frstor(addr) __asm __volatile("frstor %0" : : "m" (*(addr))) #define fxrstor(addr) __asm __volatile("fxrstor %0" : : "m" (*(addr))) #define fxsave(addr) __asm __volatile("fxsave %0" : "=m" (*(addr))) #define ldmxcsr(csr) __asm __volatile("ldmxcsr %0" : : "m" (csr)) #define stmxcsr(addr) __asm __volatile("stmxcsr %0" : : "m" (*(addr))) static __inline void xrstor(char *addr, uint64_t mask) { uint32_t low, hi; low = mask; hi = mask >> 32; __asm __volatile("xrstor %0" : : "m" (*addr), "a" (low), "d" (hi)); } static __inline void xsave(char *addr, uint64_t mask) { uint32_t low, hi; low = mask; hi = mask >> 32; __asm __volatile("xsave %0" : "=m" (*addr) : "a" (low), "d" (hi) : "memory"); } static __inline void xsaveopt(char *addr, uint64_t mask) { uint32_t low, hi; low = mask; hi = mask >> 32; __asm __volatile("xsaveopt %0" : "=m" (*addr) : "a" (low), "d" (hi) : "memory"); } #define GET_FPU_CW(thread) \ (cpu_fxsr ? \ (thread)->td_pcb->pcb_save->sv_xmm.sv_env.en_cw : \ (thread)->td_pcb->pcb_save->sv_87.sv_env.en_cw) #define GET_FPU_SW(thread) \ (cpu_fxsr ? \ (thread)->td_pcb->pcb_save->sv_xmm.sv_env.en_sw : \ (thread)->td_pcb->pcb_save->sv_87.sv_env.en_sw) #define SET_FPU_CW(savefpu, value) do { \ if (cpu_fxsr) \ (savefpu)->sv_xmm.sv_env.en_cw = (value); \ else \ (savefpu)->sv_87.sv_env.en_cw = (value); \ } while (0) CTASSERT(sizeof(union savefpu) == 512); CTASSERT(sizeof(struct xstate_hdr) == 64); CTASSERT(sizeof(struct savefpu_ymm) == 832); /* * This requirement is to make it easier for asm code to calculate * offset of the fpu save area from the pcb address. FPU save area * must be 64-byte aligned. */ CTASSERT(sizeof(struct pcb) % XSAVE_AREA_ALIGN == 0); /* * Ensure the copy of XCR0 saved in a core is contained in the padding * area. */ CTASSERT(X86_XSTATE_XCR0_OFFSET >= offsetof(struct savexmm, sv_pad) && X86_XSTATE_XCR0_OFFSET + sizeof(uint64_t) <= sizeof(struct savexmm)); static void fpu_clean_state(void); static void fpurstor(union savefpu *); int hw_float; SYSCTL_INT(_hw, HW_FLOATINGPT, floatingpoint, CTLFLAG_RD, &hw_float, 0, "Floating point instructions executed in hardware"); int lazy_fpu_switch = 0; SYSCTL_INT(_hw, OID_AUTO, lazy_fpu_switch, CTLFLAG_RWTUN | CTLFLAG_NOFETCH, &lazy_fpu_switch, 0, "Lazily load FPU context after context switch"); u_int cpu_fxsr; /* SSE enabled */ int use_xsave; uint64_t xsave_mask; static uma_zone_t fpu_save_area_zone; static union savefpu *npx_initialstate; static struct xsave_area_elm_descr { u_int offset; u_int size; } *xsave_area_desc; static volatile u_int npx_traps_while_probing; alias_for_inthand_t probetrap; __asm(" \n\ .text \n\ .p2align 2,0x90 \n\ .type " __XSTRING(CNAME(probetrap)) ",@function \n\ " __XSTRING(CNAME(probetrap)) ": \n\ ss \n\ incl " __XSTRING(CNAME(npx_traps_while_probing)) " \n\ fnclex \n\ iret \n\ "); /* * Determine if an FPU is present and how to use it. */ static int npx_probe(void) { struct gate_descriptor save_idt_npxtrap; u_short control, status; /* * Modern CPUs all have an FPU that uses the INT16 interface * and provide a simple way to verify that, so handle the * common case right away. */ if (cpu_feature & CPUID_FPU) { hw_float = 1; return (1); } save_idt_npxtrap = idt[IDT_MF]; setidt(IDT_MF, probetrap, SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); /* * Don't trap while we're probing. */ fpu_enable(); /* * Finish resetting the coprocessor, if any. If there is an error * pending, then we may get a bogus IRQ13, but npx_intr() will handle * it OK. Bogus halts have never been observed, but we enabled * IRQ13 and cleared the BUSY# latch early to handle them anyway. */ fninit(); /* * Don't use fwait here because it might hang. * Don't use fnop here because it usually hangs if there is no FPU. */ DELAY(1000); /* wait for any IRQ13 */ #ifdef DIAGNOSTIC if (npx_traps_while_probing != 0) printf("fninit caused %u bogus npx trap(s)\n", npx_traps_while_probing); #endif /* * Check for a status of mostly zero. */ status = 0x5a5a; fnstsw(&status); if ((status & 0xb8ff) == 0) { /* * Good, now check for a proper control word. */ control = 0x5a5a; fnstcw(&control); if ((control & 0x1f3f) == 0x033f) { /* * We have an npx, now divide by 0 to see if exception * 16 works. */ control &= ~(1 << 2); /* enable divide by 0 trap */ fldcw(control); npx_traps_while_probing = 0; fp_divide_by_0(); if (npx_traps_while_probing != 0) { /* * Good, exception 16 works. */ hw_float = 1; goto cleanup; } printf( "FPU does not use exception 16 for error reporting\n"); goto cleanup; } } /* * Probe failed. Floating point simply won't work. * Notify user and disable FPU/MMX/SSE instruction execution. */ printf("WARNING: no FPU!\n"); __asm __volatile("smsw %%ax; orb %0,%%al; lmsw %%ax" : : "n" (CR0_EM | CR0_MP) : "ax"); cleanup: idt[IDT_MF] = save_idt_npxtrap; return (hw_float); } static void fpusave_xsaveopt(union savefpu *addr) { xsaveopt((char *)addr, xsave_mask); } static void fpusave_xsave(union savefpu *addr) { xsave((char *)addr, xsave_mask); } static void fpusave_fxsave(union savefpu *addr) { fxsave((char *)addr); } static void fpusave_fnsave(union savefpu *addr) { fnsave((char *)addr); } DEFINE_IFUNC(, void, fpusave, (union savefpu *)) { u_int cp[4]; if (use_xsave) { cpuid_count(0xd, 0x1, cp); return ((cp[0] & CPUID_EXTSTATE_XSAVEOPT) != 0 ? fpusave_xsaveopt : fpusave_xsave); } if (cpu_fxsr) return (fpusave_fxsave); return (fpusave_fnsave); } /* * Enable XSAVE if supported and allowed by user. * Calculate the xsave_mask. */ static void npxinit_bsp1(void) { u_int cp[4]; uint64_t xsave_mask_user; TUNABLE_INT_FETCH("hw.lazy_fpu_switch", &lazy_fpu_switch); if (!use_xsave) return; cpuid_count(0xd, 0x0, cp); xsave_mask = XFEATURE_ENABLED_X87 | XFEATURE_ENABLED_SSE; if ((cp[0] & xsave_mask) != xsave_mask) panic("CPU0 does not support X87 or SSE: %x", cp[0]); xsave_mask = ((uint64_t)cp[3] << 32) | cp[0]; xsave_mask_user = xsave_mask; TUNABLE_QUAD_FETCH("hw.xsave_mask", &xsave_mask_user); xsave_mask_user |= XFEATURE_ENABLED_X87 | XFEATURE_ENABLED_SSE; xsave_mask &= xsave_mask_user; if ((xsave_mask & XFEATURE_AVX512) != XFEATURE_AVX512) xsave_mask &= ~XFEATURE_AVX512; if ((xsave_mask & XFEATURE_MPX) != XFEATURE_MPX) xsave_mask &= ~XFEATURE_MPX; } /* * Calculate the fpu save area size. */ static void npxinit_bsp2(void) { u_int cp[4]; if (use_xsave) { cpuid_count(0xd, 0x0, cp); cpu_max_ext_state_size = cp[1]; /* * Reload the cpu_feature2, since we enabled OSXSAVE. */ do_cpuid(1, cp); cpu_feature2 = cp[2]; } else cpu_max_ext_state_size = sizeof(union savefpu); } /* * Initialize floating point unit. */ void npxinit(bool bsp) { static union savefpu dummy; register_t saveintr; u_int mxcsr; u_short control; if (bsp) { if (!npx_probe()) return; npxinit_bsp1(); } if (use_xsave) { load_cr4(rcr4() | CR4_XSAVE); load_xcr(XCR0, xsave_mask); } /* * XCR0 shall be set up before CPU can report the save area size. */ if (bsp) npxinit_bsp2(); /* * fninit has the same h/w bugs as fnsave. Use the detoxified * fnsave to throw away any junk in the fpu. fpusave() initializes * the fpu. * * It is too early for critical_enter() to work on AP. */ saveintr = intr_disable(); fpu_enable(); if (cpu_fxsr) fninit(); else fnsave(&dummy); control = __INITIAL_NPXCW__; fldcw(control); if (cpu_fxsr) { mxcsr = __INITIAL_MXCSR__; ldmxcsr(mxcsr); } fpu_disable(); intr_restore(saveintr); } /* * On the boot CPU we generate a clean state that is used to * initialize the floating point unit when it is first used by a * process. */ static void npxinitstate(void *arg __unused) { uint64_t *xstate_bv; register_t saveintr; int cp[4], i, max_ext_n; if (!hw_float) return; /* Do potentially blocking operations before disabling interrupts. */ fpu_save_area_zone = uma_zcreate("FPU_save_area", cpu_max_ext_state_size, NULL, NULL, NULL, NULL, XSAVE_AREA_ALIGN - 1, 0); npx_initialstate = uma_zalloc(fpu_save_area_zone, M_WAITOK | M_ZERO); if (use_xsave) { if (xsave_mask >> 32 != 0) max_ext_n = fls(xsave_mask >> 32) + 32; else max_ext_n = fls(xsave_mask); xsave_area_desc = malloc(max_ext_n * sizeof(struct xsave_area_elm_descr), M_DEVBUF, M_WAITOK | M_ZERO); } saveintr = intr_disable(); fpu_enable(); if (cpu_fxsr) fpusave_fxsave(npx_initialstate); else fpusave_fnsave(npx_initialstate); if (cpu_fxsr) { if (npx_initialstate->sv_xmm.sv_env.en_mxcsr_mask) cpu_mxcsr_mask = npx_initialstate->sv_xmm.sv_env.en_mxcsr_mask; else cpu_mxcsr_mask = 0xFFBF; /* * The fninit instruction does not modify XMM * registers or x87 registers (MM/ST). The fpusave * call dumped the garbage contained in the registers * after reset to the initial state saved. Clear XMM * and x87 registers file image to make the startup * program state and signal handler XMM/x87 register * content predictable. */ bzero(npx_initialstate->sv_xmm.sv_fp, sizeof(npx_initialstate->sv_xmm.sv_fp)); bzero(npx_initialstate->sv_xmm.sv_xmm, sizeof(npx_initialstate->sv_xmm.sv_xmm)); } else bzero(npx_initialstate->sv_87.sv_ac, sizeof(npx_initialstate->sv_87.sv_ac)); /* * Create a table describing the layout of the CPU Extended * Save Area. See Intel SDM rev. 075 Vol. 1 13.4.1 "Legacy * Region of an XSAVE Area" for the source of offsets/sizes. * Note that 32bit XSAVE does not use %xmm8-%xmm15, see * 10.5.1.2 and 13.5.2 "SSE State". */ if (use_xsave) { xstate_bv = (uint64_t *)((char *)(npx_initialstate + 1) + offsetof(struct xstate_hdr, xstate_bv)); *xstate_bv = XFEATURE_ENABLED_X87 | XFEATURE_ENABLED_SSE; /* x87 state */ xsave_area_desc[0].offset = 0; xsave_area_desc[0].size = 160; /* XMM */ xsave_area_desc[1].offset = 160; xsave_area_desc[1].size = 288 - 160; for (i = 2; i < max_ext_n; i++) { cpuid_count(0xd, i, cp); xsave_area_desc[i].offset = cp[1]; xsave_area_desc[i].size = cp[0]; } } fpu_disable(); intr_restore(saveintr); } SYSINIT(npxinitstate, SI_SUB_CPU, SI_ORDER_ANY, npxinitstate, NULL); /* * Free coprocessor (if we have it). */ void npxexit(struct thread *td) { critical_enter(); if (curthread == PCPU_GET(fpcurthread)) { fpu_enable(); fpusave(curpcb->pcb_save); fpu_disable(); PCPU_SET(fpcurthread, NULL); } critical_exit(); #ifdef NPX_DEBUG if (hw_float) { u_int masked_exceptions; masked_exceptions = GET_FPU_CW(td) & GET_FPU_SW(td) & 0x7f; /* * Log exceptions that would have trapped with the old * control word (overflow, divide by 0, and invalid operand). */ if (masked_exceptions & 0x0d) log(LOG_ERR, "pid %d (%s) exited with masked floating point exceptions 0x%02x\n", td->td_proc->p_pid, td->td_proc->p_comm, masked_exceptions); } #endif } int npxformat(void) { if (!hw_float) return (_MC_FPFMT_NODEV); if (cpu_fxsr) return (_MC_FPFMT_XMM); return (_MC_FPFMT_387); } /* * The following mechanism is used to ensure that the FPE_... value * that is passed as a trapcode to the signal handler of the user * process does not have more than one bit set. * * Multiple bits may be set if the user process modifies the control * word while a status word bit is already set. While this is a sign * of bad coding, we have no choice than to narrow them down to one * bit, since we must not send a trapcode that is not exactly one of * the FPE_ macros. * * The mechanism has a static table with 127 entries. Each combination * of the 7 FPU status word exception bits directly translates to a * position in this table, where a single FPE_... value is stored. * This FPE_... value stored there is considered the "most important" * of the exception bits and will be sent as the signal code. The * precedence of the bits is based upon Intel Document "Numerical * Applications", Chapter "Special Computational Situations". * * The macro to choose one of these values does these steps: 1) Throw * away status word bits that cannot be masked. 2) Throw away the bits * currently masked in the control word, assuming the user isn't * interested in them anymore. 3) Reinsert status word bit 7 (stack * fault) if it is set, which cannot be masked but must be presered. * 4) Use the remaining bits to point into the trapcode table. * * The 6 maskable bits in order of their preference, as stated in the * above referenced Intel manual: * 1 Invalid operation (FP_X_INV) * 1a Stack underflow * 1b Stack overflow * 1c Operand of unsupported format * 1d SNaN operand. * 2 QNaN operand (not an exception, irrelavant here) * 3 Any other invalid-operation not mentioned above or zero divide * (FP_X_INV, FP_X_DZ) * 4 Denormal operand (FP_X_DNML) * 5 Numeric over/underflow (FP_X_OFL, FP_X_UFL) * 6 Inexact result (FP_X_IMP) */ static char fpetable[128] = { 0, FPE_FLTINV, /* 1 - INV */ FPE_FLTUND, /* 2 - DNML */ FPE_FLTINV, /* 3 - INV | DNML */ FPE_FLTDIV, /* 4 - DZ */ FPE_FLTINV, /* 5 - INV | DZ */ FPE_FLTDIV, /* 6 - DNML | DZ */ FPE_FLTINV, /* 7 - INV | DNML | DZ */ FPE_FLTOVF, /* 8 - OFL */ FPE_FLTINV, /* 9 - INV | OFL */ FPE_FLTUND, /* A - DNML | OFL */ FPE_FLTINV, /* B - INV | DNML | OFL */ FPE_FLTDIV, /* C - DZ | OFL */ FPE_FLTINV, /* D - INV | DZ | OFL */ FPE_FLTDIV, /* E - DNML | DZ | OFL */ FPE_FLTINV, /* F - INV | DNML | DZ | OFL */ FPE_FLTUND, /* 10 - UFL */ FPE_FLTINV, /* 11 - INV | UFL */ FPE_FLTUND, /* 12 - DNML | UFL */ FPE_FLTINV, /* 13 - INV | DNML | UFL */ FPE_FLTDIV, /* 14 - DZ | UFL */ FPE_FLTINV, /* 15 - INV | DZ | UFL */ FPE_FLTDIV, /* 16 - DNML | DZ | UFL */ FPE_FLTINV, /* 17 - INV | DNML | DZ | UFL */ FPE_FLTOVF, /* 18 - OFL | UFL */ FPE_FLTINV, /* 19 - INV | OFL | UFL */ FPE_FLTUND, /* 1A - DNML | OFL | UFL */ FPE_FLTINV, /* 1B - INV | DNML | OFL | UFL */ FPE_FLTDIV, /* 1C - DZ | OFL | UFL */ FPE_FLTINV, /* 1D - INV | DZ | OFL | UFL */ FPE_FLTDIV, /* 1E - DNML | DZ | OFL | UFL */ FPE_FLTINV, /* 1F - INV | DNML | DZ | OFL | UFL */ FPE_FLTRES, /* 20 - IMP */ FPE_FLTINV, /* 21 - INV | IMP */ FPE_FLTUND, /* 22 - DNML | IMP */ FPE_FLTINV, /* 23 - INV | DNML | IMP */ FPE_FLTDIV, /* 24 - DZ | IMP */ FPE_FLTINV, /* 25 - INV | DZ | IMP */ FPE_FLTDIV, /* 26 - DNML | DZ | IMP */ FPE_FLTINV, /* 27 - INV | DNML | DZ | IMP */ FPE_FLTOVF, /* 28 - OFL | IMP */ FPE_FLTINV, /* 29 - INV | OFL | IMP */ FPE_FLTUND, /* 2A - DNML | OFL | IMP */ FPE_FLTINV, /* 2B - INV | DNML | OFL | IMP */ FPE_FLTDIV, /* 2C - DZ | OFL | IMP */ FPE_FLTINV, /* 2D - INV | DZ | OFL | IMP */ FPE_FLTDIV, /* 2E - DNML | DZ | OFL | IMP */ FPE_FLTINV, /* 2F - INV | DNML | DZ | OFL | IMP */ FPE_FLTUND, /* 30 - UFL | IMP */ FPE_FLTINV, /* 31 - INV | UFL | IMP */ FPE_FLTUND, /* 32 - DNML | UFL | IMP */ FPE_FLTINV, /* 33 - INV | DNML | UFL | IMP */ FPE_FLTDIV, /* 34 - DZ | UFL | IMP */ FPE_FLTINV, /* 35 - INV | DZ | UFL | IMP */ FPE_FLTDIV, /* 36 - DNML | DZ | UFL | IMP */ FPE_FLTINV, /* 37 - INV | DNML | DZ | UFL | IMP */ FPE_FLTOVF, /* 38 - OFL | UFL | IMP */ FPE_FLTINV, /* 39 - INV | OFL | UFL | IMP */ FPE_FLTUND, /* 3A - DNML | OFL | UFL | IMP */ FPE_FLTINV, /* 3B - INV | DNML | OFL | UFL | IMP */ FPE_FLTDIV, /* 3C - DZ | OFL | UFL | IMP */ FPE_FLTINV, /* 3D - INV | DZ | OFL | UFL | IMP */ FPE_FLTDIV, /* 3E - DNML | DZ | OFL | UFL | IMP */ FPE_FLTINV, /* 3F - INV | DNML | DZ | OFL | UFL | IMP */ FPE_FLTSUB, /* 40 - STK */ FPE_FLTSUB, /* 41 - INV | STK */ FPE_FLTUND, /* 42 - DNML | STK */ FPE_FLTSUB, /* 43 - INV | DNML | STK */ FPE_FLTDIV, /* 44 - DZ | STK */ FPE_FLTSUB, /* 45 - INV | DZ | STK */ FPE_FLTDIV, /* 46 - DNML | DZ | STK */ FPE_FLTSUB, /* 47 - INV | DNML | DZ | STK */ FPE_FLTOVF, /* 48 - OFL | STK */ FPE_FLTSUB, /* 49 - INV | OFL | STK */ FPE_FLTUND, /* 4A - DNML | OFL | STK */ FPE_FLTSUB, /* 4B - INV | DNML | OFL | STK */ FPE_FLTDIV, /* 4C - DZ | OFL | STK */ FPE_FLTSUB, /* 4D - INV | DZ | OFL | STK */ FPE_FLTDIV, /* 4E - DNML | DZ | OFL | STK */ FPE_FLTSUB, /* 4F - INV | DNML | DZ | OFL | STK */ FPE_FLTUND, /* 50 - UFL | STK */ FPE_FLTSUB, /* 51 - INV | UFL | STK */ FPE_FLTUND, /* 52 - DNML | UFL | STK */ FPE_FLTSUB, /* 53 - INV | DNML | UFL | STK */ FPE_FLTDIV, /* 54 - DZ | UFL | STK */ FPE_FLTSUB, /* 55 - INV | DZ | UFL | STK */ FPE_FLTDIV, /* 56 - DNML | DZ | UFL | STK */ FPE_FLTSUB, /* 57 - INV | DNML | DZ | UFL | STK */ FPE_FLTOVF, /* 58 - OFL | UFL | STK */ FPE_FLTSUB, /* 59 - INV | OFL | UFL | STK */ FPE_FLTUND, /* 5A - DNML | OFL | UFL | STK */ FPE_FLTSUB, /* 5B - INV | DNML | OFL | UFL | STK */ FPE_FLTDIV, /* 5C - DZ | OFL | UFL | STK */ FPE_FLTSUB, /* 5D - INV | DZ | OFL | UFL | STK */ FPE_FLTDIV, /* 5E - DNML | DZ | OFL | UFL | STK */ FPE_FLTSUB, /* 5F - INV | DNML | DZ | OFL | UFL | STK */ FPE_FLTRES, /* 60 - IMP | STK */ FPE_FLTSUB, /* 61 - INV | IMP | STK */ FPE_FLTUND, /* 62 - DNML | IMP | STK */ FPE_FLTSUB, /* 63 - INV | DNML | IMP | STK */ FPE_FLTDIV, /* 64 - DZ | IMP | STK */ FPE_FLTSUB, /* 65 - INV | DZ | IMP | STK */ FPE_FLTDIV, /* 66 - DNML | DZ | IMP | STK */ FPE_FLTSUB, /* 67 - INV | DNML | DZ | IMP | STK */ FPE_FLTOVF, /* 68 - OFL | IMP | STK */ FPE_FLTSUB, /* 69 - INV | OFL | IMP | STK */ FPE_FLTUND, /* 6A - DNML | OFL | IMP | STK */ FPE_FLTSUB, /* 6B - INV | DNML | OFL | IMP | STK */ FPE_FLTDIV, /* 6C - DZ | OFL | IMP | STK */ FPE_FLTSUB, /* 6D - INV | DZ | OFL | IMP | STK */ FPE_FLTDIV, /* 6E - DNML | DZ | OFL | IMP | STK */ FPE_FLTSUB, /* 6F - INV | DNML | DZ | OFL | IMP | STK */ FPE_FLTUND, /* 70 - UFL | IMP | STK */ FPE_FLTSUB, /* 71 - INV | UFL | IMP | STK */ FPE_FLTUND, /* 72 - DNML | UFL | IMP | STK */ FPE_FLTSUB, /* 73 - INV | DNML | UFL | IMP | STK */ FPE_FLTDIV, /* 74 - DZ | UFL | IMP | STK */ FPE_FLTSUB, /* 75 - INV | DZ | UFL | IMP | STK */ FPE_FLTDIV, /* 76 - DNML | DZ | UFL | IMP | STK */ FPE_FLTSUB, /* 77 - INV | DNML | DZ | UFL | IMP | STK */ FPE_FLTOVF, /* 78 - OFL | UFL | IMP | STK */ FPE_FLTSUB, /* 79 - INV | OFL | UFL | IMP | STK */ FPE_FLTUND, /* 7A - DNML | OFL | UFL | IMP | STK */ FPE_FLTSUB, /* 7B - INV | DNML | OFL | UFL | IMP | STK */ FPE_FLTDIV, /* 7C - DZ | OFL | UFL | IMP | STK */ FPE_FLTSUB, /* 7D - INV | DZ | OFL | UFL | IMP | STK */ FPE_FLTDIV, /* 7E - DNML | DZ | OFL | UFL | IMP | STK */ FPE_FLTSUB, /* 7F - INV | DNML | DZ | OFL | UFL | IMP | STK */ }; /* * Read the FP status and control words, then generate si_code value * for SIGFPE. The error code chosen will be one of the * FPE_... macros. It will be sent as the second argument to old * BSD-style signal handlers and as "siginfo_t->si_code" (second * argument) to SA_SIGINFO signal handlers. * * Some time ago, we cleared the x87 exceptions with FNCLEX there. * Clearing exceptions was necessary mainly to avoid IRQ13 bugs. The * usermode code which understands the FPU hardware enough to enable * the exceptions, can also handle clearing the exception state in the * handler. The only consequence of not clearing the exception is the * rethrow of the SIGFPE on return from the signal handler and * reexecution of the corresponding instruction. * * For XMM traps, the exceptions were never cleared. */ int npxtrap_x87(void) { u_short control, status; if (!hw_float) { printf( "npxtrap_x87: fpcurthread = %p, curthread = %p, hw_float = %d\n", PCPU_GET(fpcurthread), curthread, hw_float); panic("npxtrap from nowhere"); } critical_enter(); /* * Interrupt handling (for another interrupt) may have pushed the * state to memory. Fetch the relevant parts of the state from * wherever they are. */ if (PCPU_GET(fpcurthread) != curthread) { control = GET_FPU_CW(curthread); status = GET_FPU_SW(curthread); } else { fnstcw(&control); fnstsw(&status); } critical_exit(); return (fpetable[status & ((~control & 0x3f) | 0x40)]); } int npxtrap_sse(void) { u_int mxcsr; if (!hw_float) { printf( "npxtrap_sse: fpcurthread = %p, curthread = %p, hw_float = %d\n", PCPU_GET(fpcurthread), curthread, hw_float); panic("npxtrap from nowhere"); } critical_enter(); if (PCPU_GET(fpcurthread) != curthread) mxcsr = curthread->td_pcb->pcb_save->sv_xmm.sv_env.en_mxcsr; else stmxcsr(&mxcsr); critical_exit(); return (fpetable[(mxcsr & (~mxcsr >> 7)) & 0x3f]); } static void restore_npx_curthread(struct thread *td, struct pcb *pcb) { /* * Record new context early in case frstor causes a trap. */ PCPU_SET(fpcurthread, td); fpu_enable(); if (cpu_fxsr) fpu_clean_state(); if ((pcb->pcb_flags & PCB_NPXINITDONE) == 0) { /* * This is the first time this thread has used the FPU or * the PCB doesn't contain a clean FPU state. Explicitly * load an initial state. * * We prefer to restore the state from the actual save * area in PCB instead of directly loading from * npx_initialstate, to ignite the XSAVEOPT * tracking engine. */ bcopy(npx_initialstate, pcb->pcb_save, cpu_max_ext_state_size); fpurstor(pcb->pcb_save); if (pcb->pcb_initial_npxcw != __INITIAL_NPXCW__) fldcw(pcb->pcb_initial_npxcw); pcb->pcb_flags |= PCB_NPXINITDONE; if (PCB_USER_FPU(pcb)) pcb->pcb_flags |= PCB_NPXUSERINITDONE; } else { fpurstor(pcb->pcb_save); } } /* * Implement device not available (DNA) exception * * It would be better to switch FP context here (if curthread != fpcurthread) * and not necessarily for every context switch, but it is too hard to * access foreign pcb's. */ int npxdna(void) { struct thread *td; if (!hw_float) return (0); td = curthread; critical_enter(); KASSERT((curpcb->pcb_flags & PCB_NPXNOSAVE) == 0, ("npxdna while in fpu_kern_enter(FPU_KERN_NOCTX)")); if (__predict_false(PCPU_GET(fpcurthread) == td)) { /* * Some virtual machines seems to set %cr0.TS at * arbitrary moments. Silently clear the TS bit * regardless of the eager/lazy FPU context switch * mode. */ fpu_enable(); } else { if (__predict_false(PCPU_GET(fpcurthread) != NULL)) { printf( "npxdna: fpcurthread = %p (%d), curthread = %p (%d)\n", PCPU_GET(fpcurthread), PCPU_GET(fpcurthread)->td_proc->p_pid, td, td->td_proc->p_pid); panic("npxdna"); } restore_npx_curthread(td, td->td_pcb); } critical_exit(); return (1); } /* * Wrapper for fpusave() called from context switch routines. * * npxsave() must be called with interrupts disabled, so that it clears * fpcurthread atomically with saving the state. We require callers to do the * disabling, since most callers need to disable interrupts anyway to call * npxsave() atomically with checking fpcurthread. */ void npxsave(union savefpu *addr) { fpu_enable(); fpusave(addr); } void npxswitch(struct thread *td, struct pcb *pcb); void npxswitch(struct thread *td, struct pcb *pcb) { if (lazy_fpu_switch || (td->td_pflags & TDP_KTHREAD) != 0 || !PCB_USER_FPU(pcb)) { fpu_disable(); PCPU_SET(fpcurthread, NULL); } else if (PCPU_GET(fpcurthread) != td) { restore_npx_curthread(td, pcb); } } /* * Unconditionally save the current co-processor state across suspend and * resume. */ void npxsuspend(union savefpu *addr) { register_t cr0; if (!hw_float) return; if (PCPU_GET(fpcurthread) == NULL) { bcopy(npx_initialstate, addr, cpu_max_ext_state_size); return; } cr0 = rcr0(); fpu_enable(); fpusave(addr); load_cr0(cr0); } void npxresume(union savefpu *addr) { register_t cr0; if (!hw_float) return; cr0 = rcr0(); npxinit(false); fpu_enable(); fpurstor(addr); load_cr0(cr0); } void npxdrop(void) { struct thread *td; /* * Discard pending exceptions in the !cpu_fxsr case so that unmasked * ones don't cause a panic on the next frstor. */ if (!cpu_fxsr) fnclex(); td = PCPU_GET(fpcurthread); KASSERT(td == curthread, ("fpudrop: fpcurthread != curthread")); CRITICAL_ASSERT(td); PCPU_SET(fpcurthread, NULL); td->td_pcb->pcb_flags &= ~PCB_NPXINITDONE; fpu_disable(); } /* * Get the user state of the FPU into pcb->pcb_user_save without * dropping ownership (if possible). It returns the FPU ownership * status. */ int npxgetregs(struct thread *td) { struct pcb *pcb; uint64_t *xstate_bv, bit; char *sa; union savefpu *s; uint32_t mxcsr, mxcsr_mask; int max_ext_n, i; int owned; bool do_mxcsr; if (!hw_float) return (_MC_FPOWNED_NONE); pcb = td->td_pcb; critical_enter(); if ((pcb->pcb_flags & PCB_NPXINITDONE) == 0) { bcopy(npx_initialstate, get_pcb_user_save_pcb(pcb), cpu_max_ext_state_size); SET_FPU_CW(get_pcb_user_save_pcb(pcb), pcb->pcb_initial_npxcw); npxuserinited(td); critical_exit(); return (_MC_FPOWNED_PCB); } if (td == PCPU_GET(fpcurthread)) { fpusave(get_pcb_user_save_pcb(pcb)); if (!cpu_fxsr) /* * fnsave initializes the FPU and destroys whatever * context it contains. Make sure the FPU owner * starts with a clean state next time. */ npxdrop(); owned = _MC_FPOWNED_FPU; } else { owned = _MC_FPOWNED_PCB; } if (use_xsave) { /* * Handle partially saved state. */ sa = (char *)get_pcb_user_save_pcb(pcb); xstate_bv = (uint64_t *)(sa + sizeof(union savefpu) + offsetof(struct xstate_hdr, xstate_bv)); if (xsave_mask >> 32 != 0) max_ext_n = fls(xsave_mask >> 32) + 32; else max_ext_n = fls(xsave_mask); for (i = 0; i < max_ext_n; i++) { bit = 1ULL << i; if ((xsave_mask & bit) == 0 || (*xstate_bv & bit) != 0) continue; do_mxcsr = false; if (i == 0 && (*xstate_bv & (XFEATURE_ENABLED_SSE | XFEATURE_ENABLED_AVX)) != 0) { /* * x87 area was not saved by XSAVEOPT, * but one of XMM or AVX was. Then we need * to preserve MXCSR from being overwritten * with the default value. */ s = (union savefpu *)sa; mxcsr = s->sv_xmm.sv_env.en_mxcsr; mxcsr_mask = s->sv_xmm.sv_env.en_mxcsr_mask; do_mxcsr = true; } bcopy((char *)npx_initialstate + xsave_area_desc[i].offset, sa + xsave_area_desc[i].offset, xsave_area_desc[i].size); if (do_mxcsr) { s->sv_xmm.sv_env.en_mxcsr = mxcsr; s->sv_xmm.sv_env.en_mxcsr_mask = mxcsr_mask; } *xstate_bv |= bit; } } critical_exit(); return (owned); } void npxuserinited(struct thread *td) { struct pcb *pcb; CRITICAL_ASSERT(td); pcb = td->td_pcb; if (PCB_USER_FPU(pcb)) pcb->pcb_flags |= PCB_NPXINITDONE; pcb->pcb_flags |= PCB_NPXUSERINITDONE; } int npxsetxstate(struct thread *td, char *xfpustate, size_t xfpustate_size) { struct xstate_hdr *hdr, *ehdr; size_t len, max_len; uint64_t bv; /* XXXKIB should we clear all extended state in xstate_bv instead ? */ if (xfpustate == NULL) return (0); if (!use_xsave) return (EOPNOTSUPP); len = xfpustate_size; if (len < sizeof(struct xstate_hdr)) return (EINVAL); max_len = cpu_max_ext_state_size - sizeof(union savefpu); if (len > max_len) return (EINVAL); ehdr = (struct xstate_hdr *)xfpustate; bv = ehdr->xstate_bv; /* * Avoid #gp. */ if (bv & ~xsave_mask) return (EINVAL); hdr = (struct xstate_hdr *)(get_pcb_user_save_td(td) + 1); hdr->xstate_bv = bv; bcopy(xfpustate + sizeof(struct xstate_hdr), (char *)(hdr + 1), len - sizeof(struct xstate_hdr)); return (0); } int npxsetregs(struct thread *td, union savefpu *addr, char *xfpustate, size_t xfpustate_size) { struct pcb *pcb; int error; if (!hw_float) return (ENXIO); if (cpu_fxsr) addr->sv_xmm.sv_env.en_mxcsr &= cpu_mxcsr_mask; pcb = td->td_pcb; error = 0; critical_enter(); if (td == PCPU_GET(fpcurthread) && PCB_USER_FPU(pcb)) { error = npxsetxstate(td, xfpustate, xfpustate_size); if (error == 0) { if (!cpu_fxsr) fnclex(); /* As in npxdrop(). */ bcopy(addr, get_pcb_user_save_td(td), sizeof(*addr)); fpurstor(get_pcb_user_save_td(td)); pcb->pcb_flags |= PCB_NPXUSERINITDONE | PCB_NPXINITDONE; } } else { error = npxsetxstate(td, xfpustate, xfpustate_size); if (error == 0) { bcopy(addr, get_pcb_user_save_td(td), sizeof(*addr)); npxuserinited(td); } } critical_exit(); return (error); } static void npx_fill_fpregs_xmm1(struct savexmm *sv_xmm, struct save87 *sv_87) { struct env87 *penv_87; struct envxmm *penv_xmm; struct fpacc87 *fx_reg; int i, st; uint64_t mantissa; uint16_t tw, exp; uint8_t ab_tw; penv_87 = &sv_87->sv_env; penv_xmm = &sv_xmm->sv_env; /* FPU control/status */ penv_87->en_cw = penv_xmm->en_cw; penv_87->en_sw = penv_xmm->en_sw; penv_87->en_fip = penv_xmm->en_fip; penv_87->en_fcs = penv_xmm->en_fcs; penv_87->en_opcode = penv_xmm->en_opcode; penv_87->en_foo = penv_xmm->en_foo; penv_87->en_fos = penv_xmm->en_fos; /* * FPU registers and tags. * For ST(i), i = fpu_reg - top; we start with fpu_reg=7. */ st = 7 - ((penv_xmm->en_sw >> 11) & 7); ab_tw = penv_xmm->en_tw; tw = 0; for (i = 0x80; i != 0; i >>= 1) { sv_87->sv_ac[st] = sv_xmm->sv_fp[st].fp_acc; tw <<= 2; if (ab_tw & i) { /* Non-empty - we need to check ST(i) */ fx_reg = &sv_xmm->sv_fp[st].fp_acc; /* The first 64 bits contain the mantissa. */ mantissa = *((uint64_t *)fx_reg->fp_bytes); /* * The final 16 bits contain the sign bit and the exponent. * Mask the sign bit since it is of no consequence to these * tests. */ exp = *((uint16_t *)&fx_reg->fp_bytes[8]) & 0x7fff; if (exp == 0) { if (mantissa == 0) tw |= 1; /* Zero */ else tw |= 2; /* Denormal */ } else if (exp == 0x7fff) tw |= 2; /* Infinity or NaN */ } else tw |= 3; /* Empty */ st = (st - 1) & 7; } penv_87->en_tw = tw; } void npx_fill_fpregs_xmm(struct savexmm *sv_xmm, struct save87 *sv_87) { bzero(sv_87, sizeof(*sv_87)); npx_fill_fpregs_xmm1(sv_xmm, sv_87); } void npx_set_fpregs_xmm(struct save87 *sv_87, struct savexmm *sv_xmm) { struct env87 *penv_87; struct envxmm *penv_xmm; int i; penv_87 = &sv_87->sv_env; penv_xmm = &sv_xmm->sv_env; /* FPU control/status */ penv_xmm->en_cw = penv_87->en_cw; penv_xmm->en_sw = penv_87->en_sw; penv_xmm->en_fip = penv_87->en_fip; penv_xmm->en_fcs = penv_87->en_fcs; penv_xmm->en_opcode = penv_87->en_opcode; penv_xmm->en_foo = penv_87->en_foo; penv_xmm->en_fos = penv_87->en_fos; /* * FPU registers and tags. * Abridged / Full translation (values in binary), see FXSAVE spec. * 0 11 * 1 00, 01, 10 */ penv_xmm->en_tw = 0; for (i = 0; i < 8; ++i) { sv_xmm->sv_fp[i].fp_acc = sv_87->sv_ac[i]; if ((penv_87->en_tw & (3 << i * 2)) != (3 << i * 2)) penv_xmm->en_tw |= 1 << i; } } void npx_get_fsave(void *addr) { struct thread *td; union savefpu *sv; td = curthread; npxgetregs(td); sv = get_pcb_user_save_td(td); if (cpu_fxsr) npx_fill_fpregs_xmm1(&sv->sv_xmm, addr); else bcopy(sv, addr, sizeof(struct env87) + sizeof(struct fpacc87[8])); } int npx_set_fsave(void *addr) { union savefpu sv; int error; bzero(&sv, sizeof(sv)); if (cpu_fxsr) npx_set_fpregs_xmm(addr, &sv.sv_xmm); else bcopy(addr, &sv, sizeof(struct env87) + sizeof(struct fpacc87[8])); error = npxsetregs(curthread, &sv, NULL, 0); return (error); } /* * On AuthenticAMD processors, the fxrstor instruction does not restore * the x87's stored last instruction pointer, last data pointer, and last * opcode values, except in the rare case in which the exception summary * (ES) bit in the x87 status word is set to 1. * * In order to avoid leaking this information across processes, we clean * these values by performing a dummy load before executing fxrstor(). */ static void fpu_clean_state(void) { static float dummy_variable = 0.0; u_short status; /* * Clear the ES bit in the x87 status word if it is currently * set, in order to avoid causing a fault in the upcoming load. */ fnstsw(&status); if (status & 0x80) fnclex(); /* * Load the dummy variable into the x87 stack. This mangles * the x87 stack, but we don't care since we're about to call * fxrstor() anyway. */ __asm __volatile("ffree %%st(7); flds %0" : : "m" (dummy_variable)); } static void fpurstor(union savefpu *addr) { if (use_xsave) xrstor((char *)addr, xsave_mask); else if (cpu_fxsr) fxrstor(addr); else frstor(addr); } #ifdef DEV_ISA /* * This sucks up the legacy ISA support assignments from PNPBIOS/ACPI. */ static struct isa_pnp_id npxisa_ids[] = { { 0x040cd041, "Legacy ISA coprocessor support" }, /* PNP0C04 */ { 0 } }; static int npxisa_probe(device_t dev) { int result; if ((result = ISA_PNP_PROBE(device_get_parent(dev), dev, npxisa_ids)) <= 0) { device_quiet(dev); } return(result); } static int npxisa_attach(device_t dev) { return (0); } static device_method_t npxisa_methods[] = { /* Device interface */ DEVMETHOD(device_probe, npxisa_probe), DEVMETHOD(device_attach, npxisa_attach), - DEVMETHOD(device_detach, bus_generic_detach), - DEVMETHOD(device_shutdown, bus_generic_shutdown), - DEVMETHOD(device_suspend, bus_generic_suspend), - DEVMETHOD(device_resume, bus_generic_resume), { 0, 0 } }; static driver_t npxisa_driver = { "npxisa", npxisa_methods, 1, /* no softc */ }; DRIVER_MODULE(npxisa, isa, npxisa_driver, 0, 0); DRIVER_MODULE(npxisa, acpi, npxisa_driver, 0, 0); ISA_PNP_INFO(npxisa_ids); #endif /* DEV_ISA */ static MALLOC_DEFINE(M_FPUKERN_CTX, "fpukern_ctx", "Kernel contexts for FPU state"); #define FPU_KERN_CTX_NPXINITDONE 0x01 #define FPU_KERN_CTX_DUMMY 0x02 #define FPU_KERN_CTX_INUSE 0x04 struct fpu_kern_ctx { union savefpu *prev; uint32_t flags; char hwstate1[]; }; struct fpu_kern_ctx * fpu_kern_alloc_ctx(u_int flags) { struct fpu_kern_ctx *res; size_t sz; sz = sizeof(struct fpu_kern_ctx) + XSAVE_AREA_ALIGN + cpu_max_ext_state_size; res = malloc(sz, M_FPUKERN_CTX, ((flags & FPU_KERN_NOWAIT) ? M_NOWAIT : M_WAITOK) | M_ZERO); return (res); } void fpu_kern_free_ctx(struct fpu_kern_ctx *ctx) { KASSERT((ctx->flags & FPU_KERN_CTX_INUSE) == 0, ("free'ing inuse ctx")); /* XXXKIB clear the memory ? */ free(ctx, M_FPUKERN_CTX); } static union savefpu * fpu_kern_ctx_savefpu(struct fpu_kern_ctx *ctx) { vm_offset_t p; p = (vm_offset_t)&ctx->hwstate1; p = roundup2(p, XSAVE_AREA_ALIGN); return ((union savefpu *)p); } void fpu_kern_enter(struct thread *td, struct fpu_kern_ctx *ctx, u_int flags) { struct pcb *pcb; pcb = td->td_pcb; KASSERT((flags & FPU_KERN_NOCTX) != 0 || ctx != NULL, ("ctx is required when !FPU_KERN_NOCTX")); KASSERT(ctx == NULL || (ctx->flags & FPU_KERN_CTX_INUSE) == 0, ("using inuse ctx")); KASSERT((pcb->pcb_flags & PCB_NPXNOSAVE) == 0, ("recursive fpu_kern_enter while in PCB_NPXNOSAVE state")); if ((flags & FPU_KERN_NOCTX) != 0) { critical_enter(); fpu_enable(); if (curthread == PCPU_GET(fpcurthread)) { fpusave(curpcb->pcb_save); PCPU_SET(fpcurthread, NULL); } else { KASSERT(PCPU_GET(fpcurthread) == NULL, ("invalid fpcurthread")); } /* * This breaks XSAVEOPT tracker, but * PCB_NPXNOSAVE state is supposed to never need to * save FPU context at all. */ fpurstor(npx_initialstate); pcb->pcb_flags |= PCB_KERNNPX | PCB_NPXNOSAVE | PCB_NPXINITDONE; return; } if ((flags & FPU_KERN_KTHR) != 0 && is_fpu_kern_thread(0)) { ctx->flags = FPU_KERN_CTX_DUMMY | FPU_KERN_CTX_INUSE; return; } pcb = td->td_pcb; critical_enter(); KASSERT(!PCB_USER_FPU(pcb) || pcb->pcb_save == get_pcb_user_save_pcb(pcb), ("mangled pcb_save")); ctx->flags = FPU_KERN_CTX_INUSE; if ((pcb->pcb_flags & PCB_NPXINITDONE) != 0) ctx->flags |= FPU_KERN_CTX_NPXINITDONE; npxexit(td); ctx->prev = pcb->pcb_save; pcb->pcb_save = fpu_kern_ctx_savefpu(ctx); pcb->pcb_flags |= PCB_KERNNPX; pcb->pcb_flags &= ~PCB_NPXINITDONE; critical_exit(); } int fpu_kern_leave(struct thread *td, struct fpu_kern_ctx *ctx) { struct pcb *pcb; pcb = td->td_pcb; if ((pcb->pcb_flags & PCB_NPXNOSAVE) != 0) { KASSERT(ctx == NULL, ("non-null ctx after FPU_KERN_NOCTX")); KASSERT(PCPU_GET(fpcurthread) == NULL, ("non-NULL fpcurthread for PCB_NPXNOSAVE")); CRITICAL_ASSERT(td); pcb->pcb_flags &= ~(PCB_NPXNOSAVE | PCB_NPXINITDONE); fpu_disable(); } else { KASSERT((ctx->flags & FPU_KERN_CTX_INUSE) != 0, ("leaving not inuse ctx")); ctx->flags &= ~FPU_KERN_CTX_INUSE; if (is_fpu_kern_thread(0) && (ctx->flags & FPU_KERN_CTX_DUMMY) != 0) return (0); KASSERT((ctx->flags & FPU_KERN_CTX_DUMMY) == 0, ("dummy ctx")); critical_enter(); if (curthread == PCPU_GET(fpcurthread)) npxdrop(); pcb->pcb_save = ctx->prev; } if (pcb->pcb_save == get_pcb_user_save_pcb(pcb)) { if ((pcb->pcb_flags & PCB_NPXUSERINITDONE) != 0) { pcb->pcb_flags |= PCB_NPXINITDONE; if ((pcb->pcb_flags & PCB_KERNNPX_THR) == 0) pcb->pcb_flags &= ~PCB_KERNNPX; } else if ((pcb->pcb_flags & PCB_KERNNPX_THR) == 0) pcb->pcb_flags &= ~(PCB_NPXINITDONE | PCB_KERNNPX); } else { if ((ctx->flags & FPU_KERN_CTX_NPXINITDONE) != 0) pcb->pcb_flags |= PCB_NPXINITDONE; else pcb->pcb_flags &= ~PCB_NPXINITDONE; KASSERT(!PCB_USER_FPU(pcb), ("unpaired fpu_kern_leave")); } critical_exit(); return (0); } int fpu_kern_thread(u_int flags) { KASSERT((curthread->td_pflags & TDP_KTHREAD) != 0, ("Only kthread may use fpu_kern_thread")); KASSERT(curpcb->pcb_save == get_pcb_user_save_pcb(curpcb), ("mangled pcb_save")); KASSERT(PCB_USER_FPU(curpcb), ("recursive call")); curpcb->pcb_flags |= PCB_KERNNPX | PCB_KERNNPX_THR; return (0); } int is_fpu_kern_thread(u_int flags) { if ((curthread->td_pflags & TDP_KTHREAD) == 0) return (0); return ((curpcb->pcb_flags & PCB_KERNNPX_THR) != 0); } /* * FPU save area alloc/free/init utility routines */ union savefpu * fpu_save_area_alloc(void) { return (uma_zalloc(fpu_save_area_zone, M_WAITOK)); } void fpu_save_area_free(union savefpu *fsa) { uma_zfree(fpu_save_area_zone, fsa); } void fpu_save_area_reset(union savefpu *fsa) { bcopy(npx_initialstate, fsa, cpu_max_ext_state_size); } diff --git a/sys/x86/isa/atpic.c b/sys/x86/isa/atpic.c index 556951d1b886..4ea80cfff7f9 100644 --- a/sys/x86/isa/atpic.c +++ b/sys/x86/isa/atpic.c @@ -1,618 +1,614 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2003 John Baldwin * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * PIC driver for the 8259A Master and Slave PICs in PC/AT machines. */ #include #include "opt_auto_eoi.h" #include "opt_isa.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef __amd64__ #define SDT_ATPIC SDT_SYSIGT #define GSEL_ATPIC 0 #else #define SDT_ATPIC SDT_SYS386IGT #define GSEL_ATPIC GSEL(GCODE_SEL, SEL_KPL) #endif #define MASTER 0 #define SLAVE 1 #define IMEN_MASK(ai) (IRQ_MASK((ai)->at_irq)) #define NUM_ISA_IRQS 16 static void atpic_init(void *dummy); inthand_t IDTVEC(atpic_intr0), IDTVEC(atpic_intr1), IDTVEC(atpic_intr2), IDTVEC(atpic_intr3), IDTVEC(atpic_intr4), IDTVEC(atpic_intr5), IDTVEC(atpic_intr6), IDTVEC(atpic_intr7), IDTVEC(atpic_intr8), IDTVEC(atpic_intr9), IDTVEC(atpic_intr10), IDTVEC(atpic_intr11), IDTVEC(atpic_intr12), IDTVEC(atpic_intr13), IDTVEC(atpic_intr14), IDTVEC(atpic_intr15); /* XXXKIB i386 uses stubs until pti comes */ inthand_t IDTVEC(atpic_intr0_pti), IDTVEC(atpic_intr1_pti), IDTVEC(atpic_intr2_pti), IDTVEC(atpic_intr3_pti), IDTVEC(atpic_intr4_pti), IDTVEC(atpic_intr5_pti), IDTVEC(atpic_intr6_pti), IDTVEC(atpic_intr7_pti), IDTVEC(atpic_intr8_pti), IDTVEC(atpic_intr9_pti), IDTVEC(atpic_intr10_pti), IDTVEC(atpic_intr11_pti), IDTVEC(atpic_intr12_pti), IDTVEC(atpic_intr13_pti), IDTVEC(atpic_intr14_pti), IDTVEC(atpic_intr15_pti); #define IRQ(ap, ai) ((ap)->at_irqbase + (ai)->at_irq) #define ATPIC(io, base, eoi) { \ .at_pic = { \ .pic_register_sources = atpic_register_sources, \ .pic_enable_source = atpic_enable_source, \ .pic_disable_source = atpic_disable_source, \ .pic_eoi_source = (eoi), \ .pic_enable_intr = atpic_enable_intr, \ .pic_disable_intr = atpic_disable_intr, \ .pic_vector = atpic_vector, \ .pic_source_pending = atpic_source_pending, \ .pic_resume = atpic_resume, \ .pic_config_intr = atpic_config_intr, \ .pic_assign_cpu = atpic_assign_cpu \ }, \ .at_ioaddr = (io), \ .at_irqbase = (base), \ .at_intbase = IDT_IO_INTS + (base), \ .at_imen = 0xff, \ } #define INTSRC(irq) \ { { &atpics[(irq) / 8].at_pic }, IDTVEC(atpic_intr ## irq ), \ IDTVEC(atpic_intr ## irq ## _pti), (irq) % 8 } struct atpic { struct pic at_pic; int at_ioaddr; int at_irqbase; uint8_t at_intbase; uint8_t at_imen; }; struct atpic_intsrc { struct intsrc at_intsrc; inthand_t *at_intr, *at_intr_pti; int at_irq; /* Relative to PIC base. */ enum intr_trigger at_trigger; u_long at_count; u_long at_straycount; }; static void atpic_register_sources(struct pic *pic); static void atpic_enable_source(struct intsrc *isrc); static void atpic_disable_source(struct intsrc *isrc, int eoi); static void atpic_eoi_master(struct intsrc *isrc); static void atpic_eoi_slave(struct intsrc *isrc); static void atpic_enable_intr(struct intsrc *isrc); static void atpic_disable_intr(struct intsrc *isrc); static int atpic_vector(struct intsrc *isrc); static void atpic_resume(struct pic *pic, bool suspend_cancelled); static int atpic_source_pending(struct intsrc *isrc); static int atpic_config_intr(struct intsrc *isrc, enum intr_trigger trig, enum intr_polarity pol); static int atpic_assign_cpu(struct intsrc *isrc, u_int apic_id); static void i8259_init(struct atpic *pic, int slave); static struct atpic atpics[] = { ATPIC(IO_ICU1, 0, atpic_eoi_master), ATPIC(IO_ICU2, 8, atpic_eoi_slave) }; static struct atpic_intsrc atintrs[] = { INTSRC(0), INTSRC(1), INTSRC(2), INTSRC(3), INTSRC(4), INTSRC(5), INTSRC(6), INTSRC(7), INTSRC(8), INTSRC(9), INTSRC(10), INTSRC(11), INTSRC(12), INTSRC(13), INTSRC(14), INTSRC(15), }; CTASSERT(nitems(atintrs) == NUM_ISA_IRQS); static __inline void _atpic_eoi_master(struct intsrc *isrc) { KASSERT(isrc->is_pic == &atpics[MASTER].at_pic, ("%s: mismatched pic", __func__)); #ifndef AUTO_EOI_1 outb(atpics[MASTER].at_ioaddr, OCW2_EOI); #endif } /* * The data sheet says no auto-EOI on slave, but it sometimes works. * So, if AUTO_EOI_2 is enabled, we use it. */ static __inline void _atpic_eoi_slave(struct intsrc *isrc) { KASSERT(isrc->is_pic == &atpics[SLAVE].at_pic, ("%s: mismatched pic", __func__)); #ifndef AUTO_EOI_2 outb(atpics[SLAVE].at_ioaddr, OCW2_EOI); #ifndef AUTO_EOI_1 outb(atpics[MASTER].at_ioaddr, OCW2_EOI); #endif #endif } static void atpic_register_sources(struct pic *pic) { struct atpic *ap = (struct atpic *)pic; struct atpic_intsrc *ai; int i; /* * If any of the ISA IRQs have an interrupt source already, then * assume that the I/O APICs are being used and don't register any * of our interrupt sources. This makes sure we don't accidentally * use mixed mode. The "accidental" use could otherwise occur on * machines that route the ACPI SCI interrupt to a different ISA * IRQ (at least one machine routes it to IRQ 13) thus disabling * that APIC ISA routing and allowing the ATPIC source for that IRQ * to leak through. We used to depend on this feature for routing * IRQ0 via mixed mode, but now we don't use mixed mode at all. * * To avoid the slave not register sources after the master * registers its sources, register all IRQs when this function is * called on the master. */ if (ap != &atpics[MASTER]) return; for (i = 0; i < NUM_ISA_IRQS; i++) if (intr_lookup_source(i) != NULL) return; /* Loop through all interrupt sources and add them. */ for (i = 0, ai = atintrs; i < NUM_ISA_IRQS; i++, ai++) { if (i == ICU_SLAVEID) continue; intr_register_source(&ai->at_intsrc); } } static void atpic_enable_source(struct intsrc *isrc) { struct atpic_intsrc *ai = (struct atpic_intsrc *)isrc; struct atpic *ap = (struct atpic *)isrc->is_pic; spinlock_enter(); if (ap->at_imen & IMEN_MASK(ai)) { ap->at_imen &= ~IMEN_MASK(ai); outb(ap->at_ioaddr + ICU_IMR_OFFSET, ap->at_imen); } spinlock_exit(); } static void atpic_disable_source(struct intsrc *isrc, int eoi) { struct atpic_intsrc *ai = (struct atpic_intsrc *)isrc; struct atpic *ap = (struct atpic *)isrc->is_pic; spinlock_enter(); if (ai->at_trigger != INTR_TRIGGER_EDGE) { ap->at_imen |= IMEN_MASK(ai); outb(ap->at_ioaddr + ICU_IMR_OFFSET, ap->at_imen); } /* * Take care to call these functions directly instead of through * a function pointer. All of the referenced variables should * still be hot in the cache. */ if (eoi == PIC_EOI) { if (isrc->is_pic == &atpics[MASTER].at_pic) _atpic_eoi_master(isrc); else _atpic_eoi_slave(isrc); } spinlock_exit(); } static void atpic_eoi_master(struct intsrc *isrc) { #ifndef AUTO_EOI_1 spinlock_enter(); _atpic_eoi_master(isrc); spinlock_exit(); #endif } static void atpic_eoi_slave(struct intsrc *isrc) { #ifndef AUTO_EOI_2 spinlock_enter(); _atpic_eoi_slave(isrc); spinlock_exit(); #endif } static void atpic_enable_intr(struct intsrc *isrc) { } static void atpic_disable_intr(struct intsrc *isrc) { } static int atpic_vector(struct intsrc *isrc) { struct atpic_intsrc *ai = (struct atpic_intsrc *)isrc; struct atpic *ap = (struct atpic *)isrc->is_pic; return (IRQ(ap, ai)); } static int atpic_source_pending(struct intsrc *isrc) { struct atpic_intsrc *ai = (struct atpic_intsrc *)isrc; struct atpic *ap = (struct atpic *)isrc->is_pic; return (inb(ap->at_ioaddr) & IMEN_MASK(ai)); } static void atpic_resume(struct pic *pic, bool suspend_cancelled) { struct atpic *ap = (struct atpic *)pic; i8259_init(ap, ap == &atpics[SLAVE]); if (ap == &atpics[SLAVE] && elcr_found) elcr_resume(); } static int atpic_config_intr(struct intsrc *isrc, enum intr_trigger trig, enum intr_polarity pol) { struct atpic_intsrc *ai = (struct atpic_intsrc *)isrc; u_int vector; /* Map conforming values to edge/hi and sanity check the values. */ if (trig == INTR_TRIGGER_CONFORM) trig = INTR_TRIGGER_EDGE; if (pol == INTR_POLARITY_CONFORM) pol = INTR_POLARITY_HIGH; vector = atpic_vector(isrc); if ((trig == INTR_TRIGGER_EDGE && pol == INTR_POLARITY_LOW) || (trig == INTR_TRIGGER_LEVEL && pol == INTR_POLARITY_HIGH)) { printf( "atpic: Mismatched config for IRQ%u: trigger %s, polarity %s\n", vector, trig == INTR_TRIGGER_EDGE ? "edge" : "level", pol == INTR_POLARITY_HIGH ? "high" : "low"); return (EINVAL); } /* If there is no change, just return. */ if (ai->at_trigger == trig) return (0); /* * Certain IRQs can never be level/lo, so don't try to set them * that way if asked. At least some ELCR registers ignore setting * these bits as well. */ if ((vector == 0 || vector == 1 || vector == 2 || vector == 13) && trig == INTR_TRIGGER_LEVEL) { if (bootverbose) printf( "atpic: Ignoring invalid level/low configuration for IRQ%u\n", vector); return (EINVAL); } if (!elcr_found) { if (bootverbose) printf("atpic: No ELCR to configure IRQ%u as %s\n", vector, trig == INTR_TRIGGER_EDGE ? "edge/high" : "level/low"); return (ENXIO); } if (bootverbose) printf("atpic: Programming IRQ%u as %s\n", vector, trig == INTR_TRIGGER_EDGE ? "edge/high" : "level/low"); spinlock_enter(); elcr_write_trigger(atpic_vector(isrc), trig); ai->at_trigger = trig; spinlock_exit(); return (0); } static int atpic_assign_cpu(struct intsrc *isrc, u_int apic_id) { /* * 8259A's are only used in UP in which case all interrupts always * go to the sole CPU and this function shouldn't even be called. */ panic("%s: bad cookie", __func__); } static void i8259_init(struct atpic *pic, int slave) { int imr_addr; /* Reset the PIC and program with next four bytes. */ spinlock_enter(); outb(pic->at_ioaddr, ICW1_RESET | ICW1_IC4); imr_addr = pic->at_ioaddr + ICU_IMR_OFFSET; /* Start vector. */ outb(imr_addr, pic->at_intbase); /* * Setup slave links. For the master pic, indicate what line * the slave is configured on. For the slave indicate * which line on the master we are connected to. */ if (slave) outb(imr_addr, ICU_SLAVEID); else outb(imr_addr, IRQ_MASK(ICU_SLAVEID)); /* Set mode. */ if (slave) outb(imr_addr, SLAVE_MODE); else outb(imr_addr, MASTER_MODE); /* Set interrupt enable mask. */ outb(imr_addr, pic->at_imen); /* Reset is finished, default to IRR on read. */ outb(pic->at_ioaddr, OCW3_SEL | OCW3_RR); /* OCW2_L1 sets priority order to 3-7, 0-2 (com2 first). */ if (!slave) outb(pic->at_ioaddr, OCW2_R | OCW2_SL | OCW2_L1); spinlock_exit(); } void atpic_startup(void) { struct atpic_intsrc *ai; int i; /* Start off with all interrupts disabled. */ i8259_init(&atpics[MASTER], 0); i8259_init(&atpics[SLAVE], 1); atpic_enable_source((struct intsrc *)&atintrs[ICU_SLAVEID]); /* Install low-level interrupt handlers for all of our IRQs. */ for (i = 0, ai = atintrs; i < NUM_ISA_IRQS; i++, ai++) { if (i == ICU_SLAVEID) continue; ai->at_intsrc.is_count = &ai->at_count; ai->at_intsrc.is_straycount = &ai->at_straycount; setidt(((struct atpic *)ai->at_intsrc.is_pic)->at_intbase + ai->at_irq, pti ? ai->at_intr_pti : ai->at_intr, SDT_ATPIC, SEL_KPL, GSEL_ATPIC); } /* * Look for an ELCR. If we find one, update the trigger modes. * If we don't find one, assume that IRQs 0, 1, 2, and 13 are * edge triggered and that everything else is level triggered. * We only use the trigger information to reprogram the ELCR if * we have one and as an optimization to avoid masking edge * triggered interrupts. For the case that we don't have an ELCR, * it doesn't hurt to mask an edge triggered interrupt, so we * assume level trigger for any interrupt that we aren't sure is * edge triggered. */ if (elcr_found) { for (i = 0, ai = atintrs; i < NUM_ISA_IRQS; i++, ai++) ai->at_trigger = elcr_read_trigger(i); } else { for (i = 0, ai = atintrs; i < NUM_ISA_IRQS; i++, ai++) switch (i) { case 0: case 1: case 2: case 8: case 13: ai->at_trigger = INTR_TRIGGER_EDGE; break; default: ai->at_trigger = INTR_TRIGGER_LEVEL; break; } } } static void atpic_init(void *dummy __unused) { /* * Register our PICs, even if we aren't going to use any of their * pins so that they are suspended and resumed. */ if (intr_register_pic(&atpics[0].at_pic) != 0 || intr_register_pic(&atpics[1].at_pic) != 0) panic("Unable to register ATPICs"); if (num_io_irqs == 0) num_io_irqs = NUM_ISA_IRQS; } SYSINIT(atpic_init, SI_SUB_INTR, SI_ORDER_FOURTH, atpic_init, NULL); void atpic_handle_intr(u_int vector, struct trapframe *frame) { struct intsrc *isrc; kasan_mark(frame, sizeof(*frame), sizeof(*frame), 0); kmsan_mark(frame, sizeof(*frame), KMSAN_STATE_INITED); trap_check_kstack(); KASSERT(vector < NUM_ISA_IRQS, ("unknown int %u\n", vector)); isrc = &atintrs[vector].at_intsrc; /* * If we don't have an event, see if this is a spurious * interrupt. */ if (isrc->is_event == NULL && (vector == 7 || vector == 15)) { int port, isr; /* * Read the ISR register to see if IRQ 7/15 is really * pending. Reset read register back to IRR when done. */ port = ((struct atpic *)isrc->is_pic)->at_ioaddr; spinlock_enter(); outb(port, OCW3_SEL | OCW3_RR | OCW3_RIS); isr = inb(port); outb(port, OCW3_SEL | OCW3_RR); spinlock_exit(); if ((isr & IRQ_MASK(7)) == 0) return; } intr_execute_handlers(isrc, frame); } #ifdef DEV_ISA /* * Bus attachment for the ISA PIC. */ static struct isa_pnp_id atpic_ids[] = { { 0x0000d041 /* PNP0000 */, "AT interrupt controller" }, { 0 } }; static int atpic_probe(device_t dev) { int result; result = ISA_PNP_PROBE(device_get_parent(dev), dev, atpic_ids); if (result <= 0) device_quiet(dev); return (result); } /* * We might be granted IRQ 2, as this is typically consumed by chaining * between the two PIC components. If we're using the APIC, however, * this may not be the case, and as such we should free the resource. * (XXX untested) * * The generic ISA attachment code will handle allocating any other resources * that we don't explicitly claim here. */ static int atpic_attach(device_t dev) { struct resource *res; int rid; /* Try to allocate our IRQ and then free it. */ rid = 0; res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, 0); if (res != NULL) bus_release_resource(dev, SYS_RES_IRQ, rid, res); return (0); } static device_method_t atpic_methods[] = { /* Device interface */ DEVMETHOD(device_probe, atpic_probe), DEVMETHOD(device_attach, atpic_attach), - DEVMETHOD(device_detach, bus_generic_detach), - DEVMETHOD(device_shutdown, bus_generic_shutdown), - DEVMETHOD(device_suspend, bus_generic_suspend), - DEVMETHOD(device_resume, bus_generic_resume), { 0, 0 } }; static driver_t atpic_driver = { "atpic", atpic_methods, 1, /* no softc */ }; DRIVER_MODULE(atpic, isa, atpic_driver, 0, 0); DRIVER_MODULE(atpic, acpi, atpic_driver, 0, 0); ISA_PNP_INFO(atpic_ids); #endif /* DEV_ISA */ diff --git a/sys/x86/isa/atrtc.c b/sys/x86/isa/atrtc.c index df0acdd499be..63f41d866679 100644 --- a/sys/x86/isa/atrtc.c +++ b/sys/x86/isa/atrtc.c @@ -1,686 +1,683 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2008 Poul-Henning Kamp * Copyright (c) 2010 Alexander Motin * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include #include "opt_acpi.h" #include "opt_isa.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef DEV_ISA #include #include #endif #include #include "clock_if.h" #ifdef DEV_ACPI #include #include #include #include #endif /* tunable to detect a power loss of the rtc */ static bool atrtc_power_lost = false; SYSCTL_BOOL(_machdep, OID_AUTO, atrtc_power_lost, CTLFLAG_RD, &atrtc_power_lost, false, "RTC lost power on last power cycle (probably caused by an empty cmos battery)"); /* * atrtc_lock protects low-level access to individual hardware registers. * atrtc_time_lock protects the entire sequence of accessing multiple registers * to read or write the date and time. */ static struct mtx atrtc_lock; MTX_SYSINIT(atrtc_lock_init, &atrtc_lock, "atrtc", MTX_SPIN); /* Force RTC enabled/disabled. */ static int atrtc_enabled = -1; TUNABLE_INT("hw.atrtc.enabled", &atrtc_enabled); struct mtx atrtc_time_lock; MTX_SYSINIT(atrtc_time_lock_init, &atrtc_time_lock, "atrtc_time", MTX_DEF); int atrtcclock_disable = 0; static int rtc_century = 0; static int rtc_reg = -1; static u_char rtc_statusa = RTCSA_DIVIDER | RTCSA_NOPROF; static u_char rtc_statusb = RTCSB_24HR; #ifdef DEV_ACPI #define _COMPONENT ACPI_TIMER ACPI_MODULE_NAME("ATRTC") #endif /* * RTC support routines */ static inline u_char rtcin_locked(int reg) { if (rtc_reg != reg) { inb(0x84); outb(IO_RTC, reg); rtc_reg = reg; inb(0x84); } return (inb(IO_RTC + 1)); } static inline void rtcout_locked(int reg, u_char val) { if (rtc_reg != reg) { inb(0x84); outb(IO_RTC, reg); rtc_reg = reg; inb(0x84); } outb(IO_RTC + 1, val); inb(0x84); } int rtcin(int reg) { u_char val; mtx_lock_spin(&atrtc_lock); val = rtcin_locked(reg); mtx_unlock_spin(&atrtc_lock); return (val); } void writertc(int reg, u_char val) { mtx_lock_spin(&atrtc_lock); rtcout_locked(reg, val); mtx_unlock_spin(&atrtc_lock); } static void atrtc_start(void) { mtx_lock_spin(&atrtc_lock); rtcout_locked(RTC_STATUSA, rtc_statusa); rtcout_locked(RTC_STATUSB, RTCSB_24HR); mtx_unlock_spin(&atrtc_lock); } static void atrtc_rate(unsigned rate) { rtc_statusa = RTCSA_DIVIDER | rate; writertc(RTC_STATUSA, rtc_statusa); } static void atrtc_enable_intr(void) { rtc_statusb |= RTCSB_PINTR; mtx_lock_spin(&atrtc_lock); rtcout_locked(RTC_STATUSB, rtc_statusb); rtcin_locked(RTC_INTR); mtx_unlock_spin(&atrtc_lock); } static void atrtc_disable_intr(void) { rtc_statusb &= ~RTCSB_PINTR; mtx_lock_spin(&atrtc_lock); rtcout_locked(RTC_STATUSB, rtc_statusb); rtcin_locked(RTC_INTR); mtx_unlock_spin(&atrtc_lock); } void atrtc_restore(void) { /* Restore all of the RTC's "status" (actually, control) registers. */ mtx_lock_spin(&atrtc_lock); rtcin_locked(RTC_STATUSA); /* dummy to get rtc_reg set */ rtcout_locked(RTC_STATUSB, RTCSB_24HR); rtcout_locked(RTC_STATUSA, rtc_statusa); rtcout_locked(RTC_STATUSB, rtc_statusb); rtcin_locked(RTC_INTR); mtx_unlock_spin(&atrtc_lock); } /********************************************************************** * RTC driver for subr_rtc */ struct atrtc_softc { int port_rid, intr_rid; struct resource *port_res; struct resource *intr_res; void *intr_handler; struct eventtimer et; #ifdef DEV_ACPI ACPI_HANDLE acpi_handle; #endif }; static int rtc_start(struct eventtimer *et, sbintime_t first, sbintime_t period) { atrtc_rate(max(fls(period + (period >> 1)) - 17, 1)); atrtc_enable_intr(); return (0); } static int rtc_stop(struct eventtimer *et) { atrtc_disable_intr(); return (0); } /* * This routine receives statistical clock interrupts from the RTC. * As explained above, these occur at 128 interrupts per second. * When profiling, we receive interrupts at a rate of 1024 Hz. * * This does not actually add as much overhead as it sounds, because * when the statistical clock is active, the hardclock driver no longer * needs to keep (inaccurate) statistics on its own. This decouples * statistics gathering from scheduling interrupts. * * The RTC chip requires that we read status register C (RTC_INTR) * to acknowledge an interrupt, before it will generate the next one. * Under high interrupt load, rtcintr() can be indefinitely delayed and * the clock can tick immediately after the read from RTC_INTR. In this * case, the mc146818A interrupt signal will not drop for long enough * to register with the 8259 PIC. If an interrupt is missed, the stat * clock will halt, considerably degrading system performance. This is * why we use 'while' rather than a more straightforward 'if' below. * Stat clock ticks can still be lost, causing minor loss of accuracy * in the statistics, but the stat clock will no longer stop. */ static int rtc_intr(void *arg) { struct atrtc_softc *sc = (struct atrtc_softc *)arg; int flag = 0; while (rtcin(RTC_INTR) & RTCIR_PERIOD) { flag = 1; if (sc->et.et_active) sc->et.et_event_cb(&sc->et, sc->et.et_arg); } return(flag ? FILTER_HANDLED : FILTER_STRAY); } #ifdef DEV_ACPI /* * ACPI RTC CMOS address space handler */ #define ATRTC_LAST_REG 0x40 static void rtcin_region(int reg, void *buf, int len) { u_char *ptr = buf; /* Drop lock after each IO as intr and settime have greater priority */ while (len-- > 0) *ptr++ = rtcin(reg++) & 0xff; } static void rtcout_region(int reg, const void *buf, int len) { const u_char *ptr = buf; while (len-- > 0) writertc(reg++, *ptr++); } static bool atrtc_check_cmos_access(bool is_read, ACPI_PHYSICAL_ADDRESS addr, UINT32 len) { /* Block address space wrapping on out-of-bound access */ if (addr >= ATRTC_LAST_REG || addr + len > ATRTC_LAST_REG) return (false); if (is_read) { /* Reading 0x0C will muck with interrupts */ if (addr <= RTC_INTR && addr + len > RTC_INTR) return (false); } else { /* * Allow single-byte writes to alarm registers and * multi-byte writes to addr >= 0x30, else deny. */ if (!((len == 1 && (addr == RTC_SECALRM || addr == RTC_MINALRM || addr == RTC_HRSALRM)) || addr >= 0x30)) return (false); } return (true); } static ACPI_STATUS atrtc_acpi_cmos_handler(UINT32 func, ACPI_PHYSICAL_ADDRESS addr, UINT32 bitwidth, UINT64 *value, void *context, void *region_context) { device_t dev = context; UINT32 bytewidth = howmany(bitwidth, 8); bool is_read = func == ACPI_READ; /* ACPICA is very verbose on CMOS handler failures, so we, too */ #define CMOS_HANDLER_ERR(fmt, ...) \ device_printf(dev, "ACPI [SystemCMOS] handler: " fmt, ##__VA_ARGS__) ACPI_FUNCTION_TRACE((char *)(uintptr_t)__func__); if (value == NULL) { CMOS_HANDLER_ERR("NULL parameter\n"); return (AE_BAD_PARAMETER); } if (bitwidth == 0 || (bitwidth & 0x07) != 0) { CMOS_HANDLER_ERR("Invalid bitwidth: %u\n", bitwidth); return (AE_BAD_PARAMETER); } if (!atrtc_check_cmos_access(is_read, addr, bytewidth)) { CMOS_HANDLER_ERR("%s access rejected: addr=%#04jx, len=%u\n", is_read ? "Read" : "Write", (uintmax_t)addr, bytewidth); return (AE_BAD_PARAMETER); } switch (func) { case ACPI_READ: rtcin_region(addr, value, bytewidth); break; case ACPI_WRITE: rtcout_region(addr, value, bytewidth); break; default: CMOS_HANDLER_ERR("Invalid function: %u\n", func); return (AE_BAD_PARAMETER); } ACPI_VPRINT(dev, acpi_device_get_parent_softc(dev), "ACPI RTC CMOS %s access: addr=%#04x, len=%u, val=%*D\n", is_read ? "read" : "write", (unsigned)addr, bytewidth, bytewidth, value, " "); return (AE_OK); } static int atrtc_reg_acpi_cmos_handler(device_t dev) { struct atrtc_softc *sc = device_get_softc(dev); ACPI_FUNCTION_TRACE((char *)(uintptr_t) __func__); /* Don't handle address space events if driver is disabled. */ if (acpi_disabled("atrtc")) return (ENXIO); if (ACPI_FAILURE(AcpiGetHandle(ACPI_ROOT_OBJECT, "\\_SB_", &sc->acpi_handle))) { return (ENXIO); } if (sc->acpi_handle == NULL || ACPI_FAILURE(AcpiInstallAddressSpaceHandler(sc->acpi_handle, ACPI_ADR_SPACE_CMOS, atrtc_acpi_cmos_handler, NULL, dev))) { sc->acpi_handle = NULL; device_printf(dev, "Can't register ACPI CMOS address space handler\n"); return (ENXIO); } return (0); } static int atrtc_unreg_acpi_cmos_handler(device_t dev) { struct atrtc_softc *sc = device_get_softc(dev); ACPI_FUNCTION_TRACE((char *)(uintptr_t) __func__); if (sc->acpi_handle != NULL) AcpiRemoveAddressSpaceHandler(sc->acpi_handle, ACPI_ADR_SPACE_CMOS, atrtc_acpi_cmos_handler); return (0); } #endif /* DEV_ACPI */ /* * Attach to the ISA PnP descriptors for the timer and realtime clock. */ static struct isa_pnp_id atrtc_ids[] = { { 0x000bd041 /* PNP0B00 */, "AT realtime clock" }, { 0 } }; static bool atrtc_acpi_disabled(void) { #ifdef DEV_ACPI uint16_t flags; if (!acpi_get_fadt_bootflags(&flags)) return (false); return ((flags & ACPI_FADT_NO_CMOS_RTC) != 0); #else return (false); #endif } static int rtc_acpi_century_get(void) { #ifdef DEV_ACPI ACPI_TABLE_FADT *fadt; vm_paddr_t physaddr; int century; physaddr = acpi_find_table(ACPI_SIG_FADT); if (physaddr == 0) return (0); fadt = acpi_map_table(physaddr, ACPI_SIG_FADT); if (fadt == NULL) return (0); century = fadt->Century; acpi_unmap_table(fadt); return (century); #else return (0); #endif } static int atrtc_probe(device_t dev) { int result; if ((atrtc_enabled == -1 && atrtc_acpi_disabled()) || (atrtc_enabled == 0)) return (ENXIO); result = ISA_PNP_PROBE(device_get_parent(dev), dev, atrtc_ids); /* ENOENT means no PnP-ID, device is hinted. */ if (result == ENOENT) { device_set_desc(dev, "AT realtime clock"); return (BUS_PROBE_LOW_PRIORITY); } rtc_century = rtc_acpi_century_get(); return (result); } static int atrtc_attach(device_t dev) { struct atrtc_softc *sc; rman_res_t s; int i; sc = device_get_softc(dev); sc->port_res = bus_alloc_resource(dev, SYS_RES_IOPORT, &sc->port_rid, IO_RTC, IO_RTC + 1, 2, RF_ACTIVE); if (sc->port_res == NULL) device_printf(dev, "Warning: Couldn't map I/O.\n"); atrtc_start(); clock_register(dev, 1000000); bzero(&sc->et, sizeof(struct eventtimer)); if (!atrtcclock_disable && (resource_int_value(device_get_name(dev), device_get_unit(dev), "clock", &i) != 0 || i != 0)) { sc->intr_rid = 0; while (bus_get_resource(dev, SYS_RES_IRQ, sc->intr_rid, &s, NULL) == 0 && s != 8) sc->intr_rid++; sc->intr_res = bus_alloc_resource(dev, SYS_RES_IRQ, &sc->intr_rid, 8, 8, 1, RF_ACTIVE); if (sc->intr_res == NULL) { device_printf(dev, "Can't map interrupt.\n"); return (0); } else if ((bus_setup_intr(dev, sc->intr_res, INTR_TYPE_CLK, rtc_intr, NULL, sc, &sc->intr_handler))) { device_printf(dev, "Can't setup interrupt.\n"); return (0); } else { /* Bind IRQ to BSP to avoid live migration. */ bus_bind_intr(dev, sc->intr_res, 0); } sc->et.et_name = "RTC"; sc->et.et_flags = ET_FLAGS_PERIODIC | ET_FLAGS_POW2DIV; sc->et.et_quality = 0; sc->et.et_frequency = 32768; sc->et.et_min_period = 0x00080000; sc->et.et_max_period = 0x80000000; sc->et.et_start = rtc_start; sc->et.et_stop = rtc_stop; sc->et.et_priv = dev; et_register(&sc->et); } return(0); } static int atrtc_isa_attach(device_t dev) { return (atrtc_attach(dev)); } #ifdef DEV_ACPI static int atrtc_acpi_attach(device_t dev) { int ret; ret = atrtc_attach(dev); if (ret) return (ret); (void)atrtc_reg_acpi_cmos_handler(dev); return (0); } static int atrtc_acpi_detach(device_t dev) { (void)atrtc_unreg_acpi_cmos_handler(dev); return (0); } #endif /* DEV_ACPI */ static int atrtc_resume(device_t dev) { atrtc_restore(); return(0); } static int atrtc_settime(device_t dev __unused, struct timespec *ts) { struct bcd_clocktime bct; clock_ts_to_bcd(ts, &bct, false); clock_dbgprint_bcd(dev, CLOCK_DBG_WRITE, &bct); mtx_lock(&atrtc_time_lock); mtx_lock_spin(&atrtc_lock); /* Disable RTC updates and interrupts. */ rtcout_locked(RTC_STATUSB, RTCSB_HALT | RTCSB_24HR); /* Write all the time registers. */ rtcout_locked(RTC_SEC, bct.sec); rtcout_locked(RTC_MIN, bct.min); rtcout_locked(RTC_HRS, bct.hour); rtcout_locked(RTC_WDAY, bct.dow + 1); rtcout_locked(RTC_DAY, bct.day); rtcout_locked(RTC_MONTH, bct.mon); rtcout_locked(RTC_YEAR, bct.year & 0xff); if (rtc_century) rtcout_locked(rtc_century, bct.year >> 8); /* * Re-enable RTC updates and interrupts. */ rtcout_locked(RTC_STATUSB, rtc_statusb); rtcin_locked(RTC_INTR); mtx_unlock_spin(&atrtc_lock); mtx_unlock(&atrtc_time_lock); return (0); } static int atrtc_gettime(device_t dev, struct timespec *ts) { struct bcd_clocktime bct; /* Look if we have a RTC present and the time is valid */ if (!(rtcin(RTC_STATUSD) & RTCSD_PWR)) { atrtc_power_lost = true; device_printf(dev, "WARNING: Battery failure indication\n"); return (EINVAL); } /* * wait for time update to complete * If RTCSA_TUP is zero, we have at least 244us before next update. * This is fast enough on most hardware, but a refinement would be * to make sure that no more than 240us pass after we start reading, * and try again if so. */ mtx_lock(&atrtc_time_lock); while (rtcin(RTC_STATUSA) & RTCSA_TUP) continue; mtx_lock_spin(&atrtc_lock); bct.sec = rtcin_locked(RTC_SEC); bct.min = rtcin_locked(RTC_MIN); bct.hour = rtcin_locked(RTC_HRS); bct.day = rtcin_locked(RTC_DAY); bct.mon = rtcin_locked(RTC_MONTH); bct.year = rtcin_locked(RTC_YEAR); if (rtc_century) bct.year |= rtcin_locked(rtc_century) << 8; mtx_unlock_spin(&atrtc_lock); mtx_unlock(&atrtc_time_lock); /* dow is unused in timespec conversion and we have no nsec info. */ bct.dow = 0; bct.nsec = 0; clock_dbgprint_bcd(dev, CLOCK_DBG_READ, &bct); return (clock_bcd_to_ts(&bct, ts, false)); } static device_method_t atrtc_isa_methods[] = { /* Device interface */ DEVMETHOD(device_probe, atrtc_probe), DEVMETHOD(device_attach, atrtc_isa_attach), - DEVMETHOD(device_detach, bus_generic_detach), - DEVMETHOD(device_shutdown, bus_generic_shutdown), - DEVMETHOD(device_suspend, bus_generic_suspend), /* XXX stop statclock? */ DEVMETHOD(device_resume, atrtc_resume), /* clock interface */ DEVMETHOD(clock_gettime, atrtc_gettime), DEVMETHOD(clock_settime, atrtc_settime), { 0, 0 } }; static driver_t atrtc_isa_driver = { "atrtc", atrtc_isa_methods, sizeof(struct atrtc_softc), }; #ifdef DEV_ACPI static device_method_t atrtc_acpi_methods[] = { /* Device interface */ DEVMETHOD(device_probe, atrtc_probe), DEVMETHOD(device_attach, atrtc_acpi_attach), DEVMETHOD(device_detach, atrtc_acpi_detach), /* XXX stop statclock? */ DEVMETHOD(device_resume, atrtc_resume), /* clock interface */ DEVMETHOD(clock_gettime, atrtc_gettime), DEVMETHOD(clock_settime, atrtc_settime), { 0, 0 } }; static driver_t atrtc_acpi_driver = { "atrtc", atrtc_acpi_methods, sizeof(struct atrtc_softc), }; #endif /* DEV_ACPI */ DRIVER_MODULE(atrtc, isa, atrtc_isa_driver, 0, 0); #ifdef DEV_ACPI DRIVER_MODULE(atrtc, acpi, atrtc_acpi_driver, 0, 0); #endif ISA_PNP_INFO(atrtc_ids); diff --git a/sys/x86/isa/clock.c b/sys/x86/isa/clock.c index 0b5d2e614d67..68d6085d5891 100644 --- a/sys/x86/isa/clock.c +++ b/sys/x86/isa/clock.c @@ -1,655 +1,652 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1990 The Regents of the University of California. * Copyright (c) 2010 Alexander Motin * All rights reserved. * * This code is derived from software contributed to Berkeley by * William Jolitz and Don Ahn. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include /* * Routines to handle clock hardware. */ #ifdef __amd64__ #define DEV_APIC #else #include "opt_apic.h" #endif #include "opt_clock.h" #include "opt_isa.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef DEV_ISA #include #include #endif int clkintr_pending; #ifndef TIMER_FREQ #define TIMER_FREQ 1193182 #endif u_int i8254_freq = TIMER_FREQ; TUNABLE_INT("hw.i8254.freq", &i8254_freq); int i8254_max_count; static int i8254_timecounter = 1; static struct mtx clock_lock; static struct intsrc *i8254_intsrc; static uint16_t i8254_lastcount; static uint16_t i8254_offset; static int (*i8254_pending)(struct intsrc *); static int i8254_ticked; struct attimer_softc { int intr_en; int port_rid, intr_rid; struct resource *port_res; struct resource *intr_res; void *intr_handler; struct timecounter tc; struct eventtimer et; int mode; #define MODE_STOP 0 #define MODE_PERIODIC 1 #define MODE_ONESHOT 2 uint32_t period; }; static struct attimer_softc *attimer_sc = NULL; static int timer0_period = -2; static int timer0_mode = 0xffff; static int timer0_last = 0xffff; /* Values for timerX_state: */ #define RELEASED 0 #define RELEASE_PENDING 1 #define ACQUIRED 2 #define ACQUIRE_PENDING 3 static u_char timer2_state; static unsigned i8254_get_timecount(struct timecounter *tc); static void set_i8254_freq(int mode, uint32_t period); void clock_init(void) { /* Init the clock lock */ mtx_init(&clock_lock, "clk", NULL, MTX_SPIN | MTX_NOPROFILE); /* Init the clock in order to use DELAY */ init_ops.early_clock_source_init(); tsc_init(); } static int clkintr(void *arg) { struct attimer_softc *sc = (struct attimer_softc *)arg; if (i8254_timecounter && sc->period != 0) { mtx_lock_spin(&clock_lock); if (i8254_ticked) i8254_ticked = 0; else { i8254_offset += i8254_max_count; i8254_lastcount = 0; } clkintr_pending = 0; mtx_unlock_spin(&clock_lock); } if (sc->et.et_active && sc->mode != MODE_STOP) sc->et.et_event_cb(&sc->et, sc->et.et_arg); return (FILTER_HANDLED); } int timer_spkr_acquire(void) { int mode; mode = TIMER_SEL2 | TIMER_SQWAVE | TIMER_16BIT; if (timer2_state != RELEASED) return (-1); timer2_state = ACQUIRED; /* * This access to the timer registers is as atomic as possible * because it is a single instruction. We could do better if we * knew the rate. Use of splclock() limits glitches to 10-100us, * and this is probably good enough for timer2, so we aren't as * careful with it as with timer0. */ outb(TIMER_MODE, TIMER_SEL2 | (mode & 0x3f)); ppi_spkr_on(); /* enable counter2 output to speaker */ return (0); } int timer_spkr_release(void) { if (timer2_state != ACQUIRED) return (-1); timer2_state = RELEASED; outb(TIMER_MODE, TIMER_SEL2 | TIMER_SQWAVE | TIMER_16BIT); ppi_spkr_off(); /* disable counter2 output to speaker */ return (0); } void timer_spkr_setfreq(int freq) { freq = i8254_freq / freq; mtx_lock_spin(&clock_lock); outb(TIMER_CNTR2, freq & 0xff); outb(TIMER_CNTR2, freq >> 8); mtx_unlock_spin(&clock_lock); } static int getit(void) { int high, low; mtx_lock_spin(&clock_lock); /* Select timer0 and latch counter value. */ outb(TIMER_MODE, TIMER_SEL0 | TIMER_LATCH); low = inb(TIMER_CNTR0); high = inb(TIMER_CNTR0); mtx_unlock_spin(&clock_lock); return ((high << 8) | low); } /* * Wait "n" microseconds. * Relies on timer 1 counting down from (i8254_freq / hz) * Note: timer had better have been programmed before this is first used! */ void i8254_delay(int n) { int delta, prev_tick, tick, ticks_left; #ifdef DELAYDEBUG int getit_calls = 1; int n1; static int state = 0; if (state == 0) { state = 1; for (n1 = 1; n1 <= 10000000; n1 *= 10) DELAY(n1); state = 2; } if (state == 1) printf("DELAY(%d)...", n); #endif /* * Read the counter first, so that the rest of the setup overhead is * counted. Guess the initial overhead is 20 usec (on most systems it * takes about 1.5 usec for each of the i/o's in getit(). The loop * takes about 6 usec on a 486/33 and 13 usec on a 386/20. The * multiplications and divisions to scale the count take a while). * * However, if ddb is active then use a fake counter since reading * the i8254 counter involves acquiring a lock. ddb must not do * locking for many reasons, but it calls here for at least atkbd * input. */ #ifdef KDB if (kdb_active) prev_tick = 1; else #endif prev_tick = getit(); n -= 0; /* XXX actually guess no initial overhead */ /* * Calculate (n * (i8254_freq / 1e6)) without using floating point * and without any avoidable overflows. */ if (n <= 0) ticks_left = 0; else if (n < 256) /* * Use fixed point to avoid a slow division by 1000000. * 39099 = 1193182 * 2^15 / 10^6 rounded to nearest. * 2^15 is the first power of 2 that gives exact results * for n between 0 and 256. */ ticks_left = ((u_int)n * 39099 + (1 << 15) - 1) >> 15; else /* * Don't bother using fixed point, although gcc-2.7.2 * generates particularly poor code for the long long * division, since even the slow way will complete long * before the delay is up (unless we're interrupted). */ ticks_left = ((u_int)n * (long long)i8254_freq + 999999) / 1000000; while (ticks_left > 0) { #ifdef KDB if (kdb_active) { inb(0x84); tick = prev_tick - 1; if (tick <= 0) tick = i8254_max_count; } else #endif tick = getit(); #ifdef DELAYDEBUG ++getit_calls; #endif delta = prev_tick - tick; prev_tick = tick; if (delta < 0) { delta += i8254_max_count; /* * Guard against i8254_max_count being wrong. * This shouldn't happen in normal operation, * but it may happen if set_i8254_freq() is * traced. */ if (delta < 0) delta = 0; } ticks_left -= delta; } #ifdef DELAYDEBUG if (state == 1) printf(" %d calls to getit() at %d usec each\n", getit_calls, (n + 5) / getit_calls); #endif } static void set_i8254_freq(int mode, uint32_t period) { int new_count, new_mode; mtx_lock_spin(&clock_lock); if (mode == MODE_STOP) { if (i8254_timecounter) { mode = MODE_PERIODIC; new_count = 0x10000; } else new_count = -1; } else { new_count = min(((uint64_t)i8254_freq * period + 0x80000000LLU) >> 32, 0x10000); } if (new_count == timer0_period) goto out; i8254_max_count = ((new_count & ~0xffff) != 0) ? 0xffff : new_count; timer0_period = (mode == MODE_PERIODIC) ? new_count : -1; switch (mode) { case MODE_STOP: new_mode = TIMER_SEL0 | TIMER_INTTC | TIMER_16BIT; outb(TIMER_MODE, new_mode); outb(TIMER_CNTR0, 0); outb(TIMER_CNTR0, 0); break; case MODE_PERIODIC: new_mode = TIMER_SEL0 | TIMER_RATEGEN | TIMER_16BIT; outb(TIMER_MODE, new_mode); outb(TIMER_CNTR0, new_count & 0xff); outb(TIMER_CNTR0, new_count >> 8); break; case MODE_ONESHOT: if (new_count < 256 && timer0_last < 256) { new_mode = TIMER_SEL0 | TIMER_INTTC | TIMER_LSB; if (new_mode != timer0_mode) outb(TIMER_MODE, new_mode); outb(TIMER_CNTR0, new_count & 0xff); break; } new_mode = TIMER_SEL0 | TIMER_INTTC | TIMER_16BIT; if (new_mode != timer0_mode) outb(TIMER_MODE, new_mode); outb(TIMER_CNTR0, new_count & 0xff); outb(TIMER_CNTR0, new_count >> 8); break; default: panic("set_i8254_freq: unknown operational mode"); } timer0_mode = new_mode; timer0_last = new_count; out: mtx_unlock_spin(&clock_lock); } static void i8254_restore(void) { timer0_period = -2; timer0_mode = 0xffff; timer0_last = 0xffff; if (attimer_sc != NULL) set_i8254_freq(attimer_sc->mode, attimer_sc->period); else set_i8254_freq(MODE_STOP, 0); } /* This is separate from startrtclock() so that it can be called early. */ void i8254_init(void) { set_i8254_freq(MODE_STOP, 0); } void startrtclock(void) { start_TSC(); } void cpu_initclocks(void) { struct thread *td; int i; td = curthread; tsc_calibrate(); #ifdef DEV_APIC lapic_calibrate_timer(); #endif cpu_initclocks_bsp(); CPU_FOREACH(i) { if (i == 0) continue; thread_lock(td); sched_bind(td, i); thread_unlock(td); cpu_initclocks_ap(); } thread_lock(td); if (sched_is_bound(td)) sched_unbind(td); thread_unlock(td); } static int sysctl_machdep_i8254_freq(SYSCTL_HANDLER_ARGS) { int error; u_int freq; /* * Use `i8254' instead of `timer' in external names because `timer' * is too generic. Should use it everywhere. */ freq = i8254_freq; error = sysctl_handle_int(oidp, &freq, 0, req); if (error == 0 && req->newptr != NULL) { i8254_freq = freq; if (attimer_sc != NULL) { set_i8254_freq(attimer_sc->mode, attimer_sc->period); attimer_sc->tc.tc_frequency = freq; } else { set_i8254_freq(MODE_STOP, 0); } } return (error); } SYSCTL_PROC(_machdep, OID_AUTO, i8254_freq, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 0, sizeof(u_int), sysctl_machdep_i8254_freq, "IU", "i8254 timer frequency"); static unsigned i8254_get_timecount(struct timecounter *tc) { device_t dev = (device_t)tc->tc_priv; struct attimer_softc *sc = device_get_softc(dev); register_t flags; uint16_t count; u_int high, low; if (sc->period == 0) return (i8254_max_count - getit()); #ifdef __amd64__ flags = read_rflags(); #else flags = read_eflags(); #endif mtx_lock_spin(&clock_lock); /* Select timer0 and latch counter value. */ outb(TIMER_MODE, TIMER_SEL0 | TIMER_LATCH); low = inb(TIMER_CNTR0); high = inb(TIMER_CNTR0); count = i8254_max_count - ((high << 8) | low); if (count < i8254_lastcount || (!i8254_ticked && (clkintr_pending || ((count < 20 || (!(flags & PSL_I) && count < i8254_max_count / 2u)) && i8254_pending != NULL && i8254_pending(i8254_intsrc))))) { i8254_ticked = 1; i8254_offset += i8254_max_count; } i8254_lastcount = count; count += i8254_offset; mtx_unlock_spin(&clock_lock); return (count); } static int attimer_start(struct eventtimer *et, sbintime_t first, sbintime_t period) { device_t dev = (device_t)et->et_priv; struct attimer_softc *sc = device_get_softc(dev); if (period != 0) { sc->mode = MODE_PERIODIC; sc->period = period; } else { sc->mode = MODE_ONESHOT; sc->period = first; } if (!sc->intr_en) { i8254_intsrc->is_pic->pic_enable_source(i8254_intsrc); sc->intr_en = 1; } set_i8254_freq(sc->mode, sc->period); return (0); } static int attimer_stop(struct eventtimer *et) { device_t dev = (device_t)et->et_priv; struct attimer_softc *sc = device_get_softc(dev); sc->mode = MODE_STOP; sc->period = 0; set_i8254_freq(sc->mode, sc->period); return (0); } #ifdef DEV_ISA /* * Attach to the ISA PnP descriptors for the timer */ static struct isa_pnp_id attimer_ids[] = { { 0x0001d041 /* PNP0100 */, "AT timer" }, { 0 } }; static int attimer_probe(device_t dev) { int result; result = ISA_PNP_PROBE(device_get_parent(dev), dev, attimer_ids); /* ENOENT means no PnP-ID, device is hinted. */ if (result == ENOENT) { device_set_desc(dev, "AT timer"); return (BUS_PROBE_LOW_PRIORITY); } return (result); } static int attimer_attach(device_t dev) { struct attimer_softc *sc; rman_res_t s; int i; attimer_sc = sc = device_get_softc(dev); bzero(sc, sizeof(struct attimer_softc)); if (!(sc->port_res = bus_alloc_resource(dev, SYS_RES_IOPORT, &sc->port_rid, IO_TIMER1, IO_TIMER1 + 3, 4, RF_ACTIVE))) device_printf(dev,"Warning: Couldn't map I/O.\n"); i8254_intsrc = intr_lookup_source(0); if (i8254_intsrc != NULL) i8254_pending = i8254_intsrc->is_pic->pic_source_pending; resource_int_value(device_get_name(dev), device_get_unit(dev), "timecounter", &i8254_timecounter); set_i8254_freq(MODE_STOP, 0); if (i8254_timecounter) { sc->tc.tc_get_timecount = i8254_get_timecount; sc->tc.tc_counter_mask = 0xffff; sc->tc.tc_frequency = i8254_freq; sc->tc.tc_name = "i8254"; sc->tc.tc_quality = 0; sc->tc.tc_priv = dev; tc_init(&sc->tc); } if (resource_int_value(device_get_name(dev), device_get_unit(dev), "clock", &i) != 0 || i != 0) { sc->intr_rid = 0; while (bus_get_resource(dev, SYS_RES_IRQ, sc->intr_rid, &s, NULL) == 0 && s != 0) sc->intr_rid++; if (!(sc->intr_res = bus_alloc_resource(dev, SYS_RES_IRQ, &sc->intr_rid, 0, 0, 1, RF_ACTIVE))) { device_printf(dev,"Can't map interrupt.\n"); return (0); } /* Dirty hack, to make bus_setup_intr to not enable source. */ i8254_intsrc->is_handlers++; if ((bus_setup_intr(dev, sc->intr_res, INTR_MPSAFE | INTR_TYPE_CLK, (driver_filter_t *)clkintr, NULL, sc, &sc->intr_handler))) { device_printf(dev, "Can't setup interrupt.\n"); i8254_intsrc->is_handlers--; return (0); } i8254_intsrc->is_handlers--; i8254_intsrc->is_pic->pic_enable_intr(i8254_intsrc); sc->et.et_name = "i8254"; sc->et.et_flags = ET_FLAGS_PERIODIC; if (!i8254_timecounter) sc->et.et_flags |= ET_FLAGS_ONESHOT; sc->et.et_quality = 100; sc->et.et_frequency = i8254_freq; sc->et.et_min_period = (0x0002LLU << 32) / i8254_freq; sc->et.et_max_period = (0xfffeLLU << 32) / i8254_freq; sc->et.et_start = attimer_start; sc->et.et_stop = attimer_stop; sc->et.et_priv = dev; et_register(&sc->et); } return(0); } static int attimer_resume(device_t dev) { i8254_restore(); return (0); } static device_method_t attimer_methods[] = { /* Device interface */ DEVMETHOD(device_probe, attimer_probe), DEVMETHOD(device_attach, attimer_attach), - DEVMETHOD(device_detach, bus_generic_detach), - DEVMETHOD(device_shutdown, bus_generic_shutdown), - DEVMETHOD(device_suspend, bus_generic_suspend), DEVMETHOD(device_resume, attimer_resume), { 0, 0 } }; static driver_t attimer_driver = { "attimer", attimer_methods, sizeof(struct attimer_softc), }; DRIVER_MODULE(attimer, isa, attimer_driver, 0, 0); DRIVER_MODULE(attimer, acpi, attimer_driver, 0, 0); ISA_PNP_INFO(attimer_ids); #endif /* DEV_ISA */ diff --git a/sys/x86/isa/isa_dma.c b/sys/x86/isa/isa_dma.c index cf2f2ea417e6..6196d5a6a8e6 100644 --- a/sys/x86/isa/isa_dma.c +++ b/sys/x86/isa/isa_dma.c @@ -1,599 +1,595 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1991 The Regents of the University of California. * All rights reserved. * * This code is derived from software contributed to Berkeley by * William Jolitz. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include /* * code to manage AT bus * * 92/08/18 Frank P. MacLachlan (fpm@crash.cts.com): * Fixed uninitialized variable problem and added code to deal * with DMA page boundaries in isa_dmarangecheck(). Fixed word * mode DMA count compution and reorganized DMA setup code in * isa_dmastart() */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define ISARAM_END 0x1000000 static int isa_dmarangecheck(caddr_t va, u_int length, int chan); static caddr_t dma_bouncebuf[8]; static u_int dma_bouncebufsize[8]; static u_int8_t dma_bounced = 0; static u_int8_t dma_busy = 0; /* Used in isa_dmastart() */ static u_int8_t dma_inuse = 0; /* User for acquire/release */ static u_int8_t dma_auto_mode = 0; static struct mtx isa_dma_lock; MTX_SYSINIT(isa_dma_lock, &isa_dma_lock, "isa DMA lock", MTX_DEF); #define VALID_DMA_MASK (7) /* high byte of address is stored in this port for i-th dma channel */ static int dmapageport[8] = { 0x87, 0x83, 0x81, 0x82, 0x8f, 0x8b, 0x89, 0x8a }; /* * Setup a DMA channel's bounce buffer. */ int isa_dma_init(int chan, u_int bouncebufsize, int flag) { void *buf; #ifdef DIAGNOSTIC if (chan & ~VALID_DMA_MASK) panic("isa_dma_init: channel out of range"); #endif /* Try malloc() first. It works better if it works. */ buf = malloc(bouncebufsize, M_DEVBUF, flag); if (buf != NULL) { if (isa_dmarangecheck(buf, bouncebufsize, chan) != 0) { free(buf, M_DEVBUF); buf = NULL; } } if (buf == NULL) { buf = contigmalloc(bouncebufsize, M_DEVBUF, flag, 0ul, 0xfffffful, 1ul, chan & 4 ? 0x20000ul : 0x10000ul); } if (buf == NULL) return (ENOMEM); mtx_lock(&isa_dma_lock); /* * If a DMA channel is shared, both drivers have to call isa_dma_init * since they don't know that the other driver will do it. * Just return if we're already set up good. * XXX: this only works if they agree on the bouncebuf size. This * XXX: is typically the case since they are multiple instances of * XXX: the same driver. */ if (dma_bouncebuf[chan] != NULL) { free(buf, M_DEVBUF); mtx_unlock(&isa_dma_lock); return (0); } dma_bouncebufsize[chan] = bouncebufsize; dma_bouncebuf[chan] = buf; mtx_unlock(&isa_dma_lock); return (0); } /* * Register a DMA channel's usage. Usually called from a device driver * in open() or during its initialization. */ int isa_dma_acquire(int chan) { #ifdef DIAGNOSTIC if (chan & ~VALID_DMA_MASK) panic("isa_dma_acquire: channel out of range"); #endif mtx_lock(&isa_dma_lock); if (dma_inuse & (1 << chan)) { printf("isa_dma_acquire: channel %d already in use\n", chan); mtx_unlock(&isa_dma_lock); return (EBUSY); } dma_inuse |= (1 << chan); dma_auto_mode &= ~(1 << chan); mtx_unlock(&isa_dma_lock); return (0); } /* * Unregister a DMA channel's usage. Usually called from a device driver * during close() or during its shutdown. */ void isa_dma_release(int chan) { #ifdef DIAGNOSTIC if (chan & ~VALID_DMA_MASK) panic("isa_dma_release: channel out of range"); mtx_lock(&isa_dma_lock); if ((dma_inuse & (1 << chan)) == 0) printf("isa_dma_release: channel %d not in use\n", chan); #else mtx_lock(&isa_dma_lock); #endif if (dma_busy & (1 << chan)) { dma_busy &= ~(1 << chan); /* * XXX We should also do "dma_bounced &= (1 << chan);" * because we are acting on behalf of isa_dmadone() which * was not called to end the last DMA operation. This does * not matter now, but it may in the future. */ } dma_inuse &= ~(1 << chan); dma_auto_mode &= ~(1 << chan); mtx_unlock(&isa_dma_lock); } /* * isa_dmacascade(): program 8237 DMA controller channel to accept * external dma control by a board. */ void isa_dmacascade(int chan) { #ifdef DIAGNOSTIC if (chan & ~VALID_DMA_MASK) panic("isa_dmacascade: channel out of range"); #endif mtx_lock(&isa_dma_lock); /* set dma channel mode, and set dma channel mode */ if ((chan & 4) == 0) { outb(DMA1_MODE, DMA37MD_CASCADE | chan); outb(DMA1_SMSK, chan); } else { outb(DMA2_MODE, DMA37MD_CASCADE | (chan & 3)); outb(DMA2_SMSK, chan & 3); } mtx_unlock(&isa_dma_lock); } /* * isa_dmastart(): program 8237 DMA controller channel, avoid page alignment * problems by using a bounce buffer. */ void isa_dmastart(int flags, caddr_t addr, u_int nbytes, int chan) { vm_paddr_t phys; int waport; caddr_t newaddr; int dma_range_checked; dma_range_checked = isa_dmarangecheck(addr, nbytes, chan); #ifdef DIAGNOSTIC if (chan & ~VALID_DMA_MASK) panic("isa_dmastart: channel out of range"); if ((chan < 4 && nbytes > (1<<16)) || (chan >= 4 && (nbytes > (1<<17) || (uintptr_t)addr & 1))) panic("isa_dmastart: impossible request"); mtx_lock(&isa_dma_lock); if ((dma_inuse & (1 << chan)) == 0) printf("isa_dmastart: channel %d not acquired\n", chan); #else mtx_lock(&isa_dma_lock); #endif #if 0 /* * XXX This should be checked, but drivers like ad1848 only call * isa_dmastart() once because they use Auto DMA mode. If we * leave this in, drivers that do this will print this continuously. */ if (dma_busy & (1 << chan)) printf("isa_dmastart: channel %d busy\n", chan); #endif dma_busy |= (1 << chan); if (dma_range_checked) { if (dma_bouncebuf[chan] == NULL || dma_bouncebufsize[chan] < nbytes) panic("isa_dmastart: bad bounce buffer"); dma_bounced |= (1 << chan); newaddr = dma_bouncebuf[chan]; /* copy bounce buffer on write */ if (!(flags & ISADMA_READ)) bcopy(addr, newaddr, nbytes); addr = newaddr; } /* translate to physical */ phys = pmap_extract(kernel_pmap, (vm_offset_t)addr); if (flags & ISADMA_RAW) { dma_auto_mode |= (1 << chan); } else { dma_auto_mode &= ~(1 << chan); } if ((chan & 4) == 0) { /* * Program one of DMA channels 0..3. These are * byte mode channels. */ /* set dma channel mode, and reset address ff */ /* If ISADMA_RAW flag is set, then use autoinitialise mode */ if (flags & ISADMA_RAW) { if (flags & ISADMA_READ) outb(DMA1_MODE, DMA37MD_AUTO|DMA37MD_WRITE|chan); else outb(DMA1_MODE, DMA37MD_AUTO|DMA37MD_READ|chan); } else if (flags & ISADMA_READ) outb(DMA1_MODE, DMA37MD_SINGLE|DMA37MD_WRITE|chan); else outb(DMA1_MODE, DMA37MD_SINGLE|DMA37MD_READ|chan); outb(DMA1_FFC, 0); /* send start address */ waport = DMA1_CHN(chan); outb(waport, phys); outb(waport, phys>>8); outb(dmapageport[chan], phys>>16); /* send count */ outb(waport + 1, --nbytes); outb(waport + 1, nbytes>>8); /* unmask channel */ outb(DMA1_SMSK, chan); } else { /* * Program one of DMA channels 4..7. These are * word mode channels. */ /* set dma channel mode, and reset address ff */ /* If ISADMA_RAW flag is set, then use autoinitialise mode */ if (flags & ISADMA_RAW) { if (flags & ISADMA_READ) outb(DMA2_MODE, DMA37MD_AUTO|DMA37MD_WRITE|(chan&3)); else outb(DMA2_MODE, DMA37MD_AUTO|DMA37MD_READ|(chan&3)); } else if (flags & ISADMA_READ) outb(DMA2_MODE, DMA37MD_SINGLE|DMA37MD_WRITE|(chan&3)); else outb(DMA2_MODE, DMA37MD_SINGLE|DMA37MD_READ|(chan&3)); outb(DMA2_FFC, 0); /* send start address */ waport = DMA2_CHN(chan - 4); outb(waport, phys>>1); outb(waport, phys>>9); outb(dmapageport[chan], phys>>16); /* send count */ nbytes >>= 1; outb(waport + 2, --nbytes); outb(waport + 2, nbytes>>8); /* unmask channel */ outb(DMA2_SMSK, chan & 3); } mtx_unlock(&isa_dma_lock); } void isa_dmadone(int flags, caddr_t addr, int nbytes, int chan) { #ifdef DIAGNOSTIC if (chan & ~VALID_DMA_MASK) panic("isa_dmadone: channel out of range"); if ((dma_inuse & (1 << chan)) == 0) printf("isa_dmadone: channel %d not acquired\n", chan); #endif mtx_lock(&isa_dma_lock); if (((dma_busy & (1 << chan)) == 0) && (dma_auto_mode & (1 << chan)) == 0 ) printf("isa_dmadone: channel %d not busy\n", chan); if ((dma_auto_mode & (1 << chan)) == 0) outb(chan & 4 ? DMA2_SMSK : DMA1_SMSK, (chan & 3) | 4); if (dma_bounced & (1 << chan)) { /* copy bounce buffer on read */ if (flags & ISADMA_READ) bcopy(dma_bouncebuf[chan], addr, nbytes); dma_bounced &= ~(1 << chan); } dma_busy &= ~(1 << chan); mtx_unlock(&isa_dma_lock); } /* * Check for problems with the address range of a DMA transfer * (non-contiguous physical pages, outside of bus address space, * crossing DMA page boundaries). * Return true if special handling needed. */ static int isa_dmarangecheck(caddr_t va, u_int length, int chan) { vm_paddr_t phys, priorpage = 0; vm_offset_t endva; u_int dma_pgmsk = (chan & 4) ? ~(128*1024-1) : ~(64*1024-1); endva = (vm_offset_t)round_page((vm_offset_t)va + length); for (; va < (caddr_t) endva ; va += PAGE_SIZE) { phys = trunc_page(pmap_extract(kernel_pmap, (vm_offset_t)va)); if (phys == 0) panic("isa_dmacheck: no physical page present"); if (phys >= ISARAM_END) return (1); if (priorpage) { if (priorpage + PAGE_SIZE != phys) return (1); /* check if crossing a DMA page boundary */ if (((u_int)priorpage ^ (u_int)phys) & dma_pgmsk) return (1); } priorpage = phys; } return (0); } /* * Query the progress of a transfer on a DMA channel. * * To avoid having to interrupt a transfer in progress, we sample * each of the high and low databytes twice, and apply the following * logic to determine the correct count. * * Reads are performed with interrupts disabled, thus it is to be * expected that the time between reads is very small. At most * one rollover in the low count byte can be expected within the * four reads that are performed. * * There are three gaps in which a rollover can occur : * * - read low1 * gap1 * - read high1 * gap2 * - read low2 * gap3 * - read high2 * * If a rollover occurs in gap1 or gap2, the low2 value will be * greater than the low1 value. In this case, low2 and high2 are a * corresponding pair. * * In any other case, low1 and high1 can be considered to be correct. * * The function returns the number of bytes remaining in the transfer, * or -1 if the channel requested is not active. * */ static int isa_dmastatus_locked(int chan) { u_long cnt = 0; int ffport, waport; u_long low1, high1, low2, high2; mtx_assert(&isa_dma_lock, MA_OWNED); /* channel active? */ if ((dma_inuse & (1 << chan)) == 0) { printf("isa_dmastatus: channel %d not active\n", chan); return(-1); } /* channel busy? */ if (((dma_busy & (1 << chan)) == 0) && (dma_auto_mode & (1 << chan)) == 0 ) { printf("chan %d not busy\n", chan); return -2 ; } if (chan < 4) { /* low DMA controller */ ffport = DMA1_FFC; waport = DMA1_CHN(chan) + 1; } else { /* high DMA controller */ ffport = DMA2_FFC; waport = DMA2_CHN(chan - 4) + 2; } disable_intr(); /* no interrupts Mr Jones! */ outb(ffport, 0); /* clear register LSB flipflop */ low1 = inb(waport); high1 = inb(waport); outb(ffport, 0); /* clear again */ low2 = inb(waport); high2 = inb(waport); enable_intr(); /* enable interrupts again */ /* * Now decide if a wrap has tried to skew our results. * Note that after TC, the count will read 0xffff, while we want * to return zero, so we add and then mask to compensate. */ if (low1 >= low2) { cnt = (low1 + (high1 << 8) + 1) & 0xffff; } else { cnt = (low2 + (high2 << 8) + 1) & 0xffff; } if (chan >= 4) /* high channels move words */ cnt *= 2; return(cnt); } int isa_dmastatus(int chan) { int status; mtx_lock(&isa_dma_lock); status = isa_dmastatus_locked(chan); mtx_unlock(&isa_dma_lock); return (status); } /* * Reached terminal count yet ? */ int isa_dmatc(int chan) { if (chan < 4) return(inb(DMA1_STATUS) & (1 << chan)); else return(inb(DMA2_STATUS) & (1 << (chan & 3))); } /* * Stop a DMA transfer currently in progress. */ int isa_dmastop(int chan) { int status; mtx_lock(&isa_dma_lock); if ((dma_inuse & (1 << chan)) == 0) printf("isa_dmastop: channel %d not acquired\n", chan); if (((dma_busy & (1 << chan)) == 0) && ((dma_auto_mode & (1 << chan)) == 0)) { printf("chan %d not busy\n", chan); mtx_unlock(&isa_dma_lock); return -2 ; } if ((chan & 4) == 0) { outb(DMA1_SMSK, (chan & 3) | 4 /* disable mask */); } else { outb(DMA2_SMSK, (chan & 3) | 4 /* disable mask */); } status = isa_dmastatus_locked(chan); mtx_unlock(&isa_dma_lock); return (status); } /* * Attach to the ISA PnP descriptor for the AT DMA controller */ static struct isa_pnp_id atdma_ids[] = { { 0x0002d041 /* PNP0200 */, "AT DMA controller" }, { 0 } }; static int atdma_probe(device_t dev) { int result; if ((result = ISA_PNP_PROBE(device_get_parent(dev), dev, atdma_ids)) <= 0) device_quiet(dev); return(result); } static int atdma_attach(device_t dev) { return(0); } static device_method_t atdma_methods[] = { /* Device interface */ DEVMETHOD(device_probe, atdma_probe), DEVMETHOD(device_attach, atdma_attach), - DEVMETHOD(device_detach, bus_generic_detach), - DEVMETHOD(device_shutdown, bus_generic_shutdown), - DEVMETHOD(device_suspend, bus_generic_suspend), - DEVMETHOD(device_resume, bus_generic_resume), { 0, 0 } }; static driver_t atdma_driver = { "atdma", atdma_methods, 1, /* no softc */ }; DRIVER_MODULE(atdma, isa, atdma_driver, 0, 0); DRIVER_MODULE(atdma, acpi, atdma_driver, 0, 0); ISA_PNP_INFO(atdma_ids); diff --git a/sys/x86/pci/pci_bus.c b/sys/x86/pci/pci_bus.c index 8aafbfb9d664..ca4773ecff9e 100644 --- a/sys/x86/pci/pci_bus.c +++ b/sys/x86/pci/pci_bus.c @@ -1,763 +1,759 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 1997, Stefan Esser * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include #include "opt_cpu.h" #include #include #include #include #include #include #include #include #include #include #include #include #ifdef CPU_ELAN #include #endif #include #include #include #include "pcib_if.h" int legacy_pcib_maxslots(device_t dev) { return 31; } /* read configuration space register */ uint32_t legacy_pcib_read_config(device_t dev, u_int bus, u_int slot, u_int func, u_int reg, int bytes) { return(pci_cfgregread(0, bus, slot, func, reg, bytes)); } /* write configuration space register */ void legacy_pcib_write_config(device_t dev, u_int bus, u_int slot, u_int func, u_int reg, uint32_t data, int bytes) { pci_cfgregwrite(0, bus, slot, func, reg, data, bytes); } /* route interrupt */ static int legacy_pcib_route_interrupt(device_t pcib, device_t dev, int pin) { #ifdef __HAVE_PIR return (pci_pir_route_interrupt(pci_get_bus(dev), pci_get_slot(dev), pci_get_function(dev), pin)); #else /* No routing possible */ return (PCI_INVALID_IRQ); #endif } /* Pass MSI requests up to the nexus. */ int legacy_pcib_alloc_msi(device_t pcib, device_t dev, int count, int maxcount, int *irqs) { device_t bus; bus = device_get_parent(pcib); return (PCIB_ALLOC_MSI(device_get_parent(bus), dev, count, maxcount, irqs)); } int legacy_pcib_alloc_msix(device_t pcib, device_t dev, int *irq) { device_t bus; bus = device_get_parent(pcib); return (PCIB_ALLOC_MSIX(device_get_parent(bus), dev, irq)); } int legacy_pcib_map_msi(device_t pcib, device_t dev, int irq, uint64_t *addr, uint32_t *data) { device_t bus, hostb; int error, func, slot; bus = device_get_parent(pcib); error = PCIB_MAP_MSI(device_get_parent(bus), dev, irq, addr, data); if (error) return (error); slot = legacy_get_pcislot(pcib); func = legacy_get_pcifunc(pcib); if (slot == -1 || func == -1) return (0); hostb = pci_find_bsf(0, slot, func); KASSERT(hostb != NULL, ("%s: missing hostb for 0:%d:%d", __func__, slot, func)); pci_ht_map_msi(hostb, *addr); return (0); } static const char * legacy_pcib_is_host_bridge(int bus, int slot, int func, uint32_t id, uint8_t class, uint8_t subclass, uint8_t *busnum) { #ifdef __i386__ const char *s = NULL; static uint8_t pxb[4]; /* hack for 450nx */ *busnum = 0; switch (id) { case 0x12258086: s = "Intel 824?? host to PCI bridge"; /* XXX This is a guess */ /* *busnum = legacy_pcib_read_config(0, bus, slot, func, 0x41, 1); */ *busnum = bus; break; case 0x71208086: s = "Intel 82810 (i810 GMCH) Host To Hub bridge"; break; case 0x71228086: s = "Intel 82810-DC100 (i810-DC100 GMCH) Host To Hub bridge"; break; case 0x71248086: s = "Intel 82810E (i810E GMCH) Host To Hub bridge"; break; case 0x11308086: s = "Intel 82815 (i815 GMCH) Host To Hub bridge"; break; case 0x71808086: s = "Intel 82443LX (440 LX) host to PCI bridge"; break; case 0x71908086: s = "Intel 82443BX (440 BX) host to PCI bridge"; break; case 0x71928086: s = "Intel 82443BX host to PCI bridge (AGP disabled)"; break; case 0x71948086: s = "Intel 82443MX host to PCI bridge"; break; case 0x71a08086: s = "Intel 82443GX host to PCI bridge"; break; case 0x71a18086: s = "Intel 82443GX host to AGP bridge"; break; case 0x71a28086: s = "Intel 82443GX host to PCI bridge (AGP disabled)"; break; case 0x84c48086: s = "Intel 82454KX/GX (Orion) host to PCI bridge"; *busnum = legacy_pcib_read_config(0, bus, slot, func, 0x4a, 1); break; case 0x84ca8086: /* * For the 450nx chipset, there is a whole bundle of * things pretending to be host bridges. The MIOC will * be seen first and isn't really a pci bridge (the * actual buses are attached to the PXB's). We need to * read the registers of the MIOC to figure out the * bus numbers for the PXB channels. * * Since the MIOC doesn't have a pci bus attached, we * pretend it wasn't there. */ pxb[0] = legacy_pcib_read_config(0, bus, slot, func, 0xd0, 1); /* BUSNO[0] */ pxb[1] = legacy_pcib_read_config(0, bus, slot, func, 0xd1, 1) + 1; /* SUBA[0]+1 */ pxb[2] = legacy_pcib_read_config(0, bus, slot, func, 0xd3, 1); /* BUSNO[1] */ pxb[3] = legacy_pcib_read_config(0, bus, slot, func, 0xd4, 1) + 1; /* SUBA[1]+1 */ return NULL; case 0x84cb8086: switch (slot) { case 0x12: s = "Intel 82454NX PXB#0, Bus#A"; *busnum = pxb[0]; break; case 0x13: s = "Intel 82454NX PXB#0, Bus#B"; *busnum = pxb[1]; break; case 0x14: s = "Intel 82454NX PXB#1, Bus#A"; *busnum = pxb[2]; break; case 0x15: s = "Intel 82454NX PXB#1, Bus#B"; *busnum = pxb[3]; break; } break; case 0x1A308086: s = "Intel 82845 Host to PCI bridge"; break; /* AMD -- vendor 0x1022 */ case 0x30001022: s = "AMD Elan SC520 host to PCI bridge"; #ifdef CPU_ELAN init_AMD_Elan_sc520(); #else printf( "*** WARNING: missing CPU_ELAN -- timekeeping may be wrong\n"); #endif break; case 0x70061022: s = "AMD-751 host to PCI bridge"; break; case 0x700e1022: s = "AMD-761 host to PCI bridge"; break; /* SiS -- vendor 0x1039 */ case 0x04961039: s = "SiS 85c496"; break; case 0x04061039: s = "SiS 85c501"; break; case 0x06011039: s = "SiS 85c601"; break; case 0x55911039: s = "SiS 5591 host to PCI bridge"; break; case 0x00011039: s = "SiS 5591 host to AGP bridge"; break; /* VLSI -- vendor 0x1004 */ case 0x00051004: s = "VLSI 82C592 Host to PCI bridge"; break; /* XXX Here is MVP3, I got the datasheet but NO M/B to test it */ /* totally. Please let me know if anything wrong. -F */ /* XXX need info on the MVP3 -- any takers? */ case 0x05981106: s = "VIA 82C598MVP (Apollo MVP3) host bridge"; break; /* AcerLabs -- vendor 0x10b9 */ /* Funny : The datasheet told me vendor id is "10b8",sub-vendor */ /* id is '10b9" but the register always shows "10b9". -Foxfair */ case 0x154110b9: s = "AcerLabs M1541 (Aladdin-V) PCI host bridge"; break; /* OPTi -- vendor 0x1045 */ case 0xc7011045: s = "OPTi 82C700 host to PCI bridge"; break; case 0xc8221045: s = "OPTi 82C822 host to PCI Bridge"; break; /* ServerWorks -- vendor 0x1166 */ case 0x00051166: s = "ServerWorks NB6536 2.0HE host to PCI bridge"; *busnum = legacy_pcib_read_config(0, bus, slot, func, 0x44, 1); break; case 0x00061166: /* FALLTHROUGH */ case 0x00081166: /* FALLTHROUGH */ case 0x02011166: /* FALLTHROUGH */ case 0x010f1014: /* IBM re-badged ServerWorks chipset */ s = "ServerWorks host to PCI bridge"; *busnum = legacy_pcib_read_config(0, bus, slot, func, 0x44, 1); break; case 0x00091166: s = "ServerWorks NB6635 3.0LE host to PCI bridge"; *busnum = legacy_pcib_read_config(0, bus, slot, func, 0x44, 1); break; case 0x00101166: s = "ServerWorks CIOB30 host to PCI bridge"; *busnum = legacy_pcib_read_config(0, bus, slot, func, 0x44, 1); break; case 0x00111166: /* FALLTHROUGH */ case 0x03021014: /* IBM re-badged ServerWorks chipset */ s = "ServerWorks CMIC-HE host to PCI-X bridge"; *busnum = legacy_pcib_read_config(0, bus, slot, func, 0x44, 1); break; /* XXX unknown chipset, but working */ case 0x00171166: /* FALLTHROUGH */ case 0x01011166: case 0x01101166: case 0x02251166: s = "ServerWorks host to PCI bridge(unknown chipset)"; *busnum = legacy_pcib_read_config(0, bus, slot, func, 0x44, 1); break; /* Compaq/HP -- vendor 0x0e11 */ case 0x60100e11: s = "Compaq/HP Model 6010 HotPlug PCI Bridge"; *busnum = legacy_pcib_read_config(0, bus, slot, func, 0xc8, 1); break; /* Integrated Micro Solutions -- vendor 0x10e0 */ case 0x884910e0: s = "Integrated Micro Solutions VL Bridge"; break; default: if (class == PCIC_BRIDGE && subclass == PCIS_BRIDGE_HOST) s = "Host to PCI bridge"; break; } return s; #else const char *s = NULL; *busnum = 0; if (class == PCIC_BRIDGE && subclass == PCIS_BRIDGE_HOST) s = "Host to PCI bridge"; return s; #endif } /* * Scan the first pci bus for host-pci bridges and add pcib instances * to the nexus for each bridge. */ static void legacy_pcib_identify(driver_t *driver, device_t parent) { int bus, slot, func; uint8_t hdrtype; int found = 0; int pcifunchigh; int found824xx = 0; int found_orion = 0; device_t child; devclass_t pci_devclass; if (pci_cfgregopen() == 0) return; /* * Check to see if we haven't already had a PCI bus added * via some other means. If we have, bail since otherwise * we're going to end up duplicating it. */ if ((pci_devclass = devclass_find("pci")) && devclass_get_device(pci_devclass, 0)) return; bus = 0; retry: for (slot = 0; slot <= PCI_SLOTMAX; slot++) { func = 0; hdrtype = legacy_pcib_read_config(0, bus, slot, func, PCIR_HDRTYPE, 1); /* * When enumerating bus devices, the standard says that * one should check the header type and ignore the slots whose * header types that the software doesn't know about. We use * this to filter out devices. */ if ((hdrtype & PCIM_HDRTYPE) > PCI_MAXHDRTYPE) continue; if ((hdrtype & PCIM_MFDEV) && (!found_orion || hdrtype != 0xff)) pcifunchigh = PCI_FUNCMAX; else pcifunchigh = 0; for (func = 0; func <= pcifunchigh; func++) { /* * Read the IDs and class from the device. */ uint32_t id; uint8_t class, subclass, busnum; const char *s; device_t *devs; int ndevs, i; id = legacy_pcib_read_config(0, bus, slot, func, PCIR_DEVVENDOR, 4); if (id == -1) continue; class = legacy_pcib_read_config(0, bus, slot, func, PCIR_CLASS, 1); subclass = legacy_pcib_read_config(0, bus, slot, func, PCIR_SUBCLASS, 1); s = legacy_pcib_is_host_bridge(bus, slot, func, id, class, subclass, &busnum); if (s == NULL) continue; /* * Check to see if the physical bus has already * been seen. Eg: hybrid 32 and 64 bit host * bridges to the same logical bus. */ if (device_get_children(parent, &devs, &ndevs) == 0) { for (i = 0; s != NULL && i < ndevs; i++) { if (strcmp(device_get_name(devs[i]), "pcib") != 0) continue; if (legacy_get_pcibus(devs[i]) == busnum) s = NULL; } free(devs, M_TEMP); } if (s == NULL) continue; /* * Add at priority 100 to make sure we * go after any motherboard resources */ child = BUS_ADD_CHILD(parent, 100, "pcib", busnum); device_set_desc(child, s); legacy_set_pcibus(child, busnum); legacy_set_pcislot(child, slot); legacy_set_pcifunc(child, func); found = 1; if (id == 0x12258086) found824xx = 1; if (id == 0x84c48086) found_orion = 1; } } if (found824xx && bus == 0) { bus++; goto retry; } /* * Make sure we add at least one bridge since some old * hardware doesn't actually have a host-pci bridge device. * Note that pci_cfgregopen() thinks we have PCI devices.. */ if (!found) { #ifndef NO_LEGACY_PCIB if (bootverbose) printf( "legacy_pcib_identify: no bridge found, adding pcib0 anyway\n"); child = BUS_ADD_CHILD(parent, 100, "pcib", 0); legacy_set_pcibus(child, 0); #endif } } static int legacy_pcib_probe(device_t dev) { if (pci_cfgregopen() == 0) return ENXIO; return -100; } static int legacy_pcib_attach(device_t dev) { #ifdef __HAVE_PIR device_t pir; int bus; bus = pcib_get_bus(dev); /* * Look for a PCI BIOS interrupt routing table as that will be * our method of routing interrupts if we have one. */ if (pci_pir_probe(bus, 0)) { pir = BUS_ADD_CHILD(device_get_parent(dev), 0, "pir", 0); if (pir != NULL) device_probe_and_attach(pir); } #endif device_add_child(dev, "pci", DEVICE_UNIT_ANY); return bus_generic_attach(dev); } int legacy_pcib_read_ivar(device_t dev, device_t child, int which, uintptr_t *result) { switch (which) { case PCIB_IVAR_DOMAIN: *result = 0; return 0; case PCIB_IVAR_BUS: *result = legacy_get_pcibus(dev); return 0; } return ENOENT; } int legacy_pcib_write_ivar(device_t dev, device_t child, int which, uintptr_t value) { switch (which) { case PCIB_IVAR_DOMAIN: return EINVAL; case PCIB_IVAR_BUS: legacy_set_pcibus(dev, value); return 0; } return ENOENT; } /* * Helper routine for x86 Host-PCI bridge driver resource allocation. * This is used to adjust the start address of wildcard allocation * requests to avoid low addresses that are known to be problematic. * * If no memory preference is given, use upper 32MB slot most BIOSes * use for their memory window. This is typically only used on older * laptops that don't have PCI buses behind a PCI bridge, so assuming * > 32MB is likely OK. * * However, this can cause problems for other chipsets, so we make * this tunable by hw.pci.host_mem_start. */ SYSCTL_DECL(_hw_pci); static unsigned long host_mem_start = 0x80000000; SYSCTL_ULONG(_hw_pci, OID_AUTO, host_mem_start, CTLFLAG_RDTUN, &host_mem_start, 0, "Limit the host bridge memory to being above this address."); rman_res_t hostb_alloc_start(int type, rman_res_t start, rman_res_t end, rman_res_t count) { if (start + count - 1 != end) { if (type == SYS_RES_MEMORY && start < host_mem_start) start = host_mem_start; if (type == SYS_RES_IOPORT && start < 0x1000) start = 0x1000; } return (start); } struct resource * legacy_pcib_alloc_resource(device_t dev, device_t child, int type, int *rid, rman_res_t start, rman_res_t end, rman_res_t count, u_int flags) { if (type == PCI_RES_BUS) return (pci_domain_alloc_bus(0, child, rid, start, end, count, flags)); start = hostb_alloc_start(type, start, end, count); return (bus_generic_alloc_resource(dev, child, type, rid, start, end, count, flags)); } int legacy_pcib_adjust_resource(device_t dev, device_t child, struct resource *r, rman_res_t start, rman_res_t end) { if (rman_get_type(r) == PCI_RES_BUS) return (pci_domain_adjust_bus(0, child, r, start, end)); return (bus_generic_adjust_resource(dev, child, r, start, end)); } int legacy_pcib_release_resource(device_t dev, device_t child, struct resource *r) { if (rman_get_type(r) == PCI_RES_BUS) return (pci_domain_release_bus(0, child, r)); return (bus_generic_release_resource(dev, child, r)); } int legacy_pcib_activate_resource(device_t dev, device_t child, struct resource *r) { if (rman_get_type(r) == PCI_RES_BUS) return (pci_domain_activate_bus(0, child, r)); return (bus_generic_activate_resource(dev, child, r)); } int legacy_pcib_deactivate_resource(device_t dev, device_t child, struct resource *r) { if (rman_get_type(r) == PCI_RES_BUS) return (pci_domain_deactivate_bus(0, child, r)); return (bus_generic_deactivate_resource(dev, child, r)); } static device_method_t legacy_pcib_methods[] = { /* Device interface */ DEVMETHOD(device_identify, legacy_pcib_identify), DEVMETHOD(device_probe, legacy_pcib_probe), DEVMETHOD(device_attach, legacy_pcib_attach), DEVMETHOD(device_shutdown, bus_generic_shutdown), DEVMETHOD(device_suspend, bus_generic_suspend), DEVMETHOD(device_resume, bus_generic_resume), /* Bus interface */ DEVMETHOD(bus_read_ivar, legacy_pcib_read_ivar), DEVMETHOD(bus_write_ivar, legacy_pcib_write_ivar), DEVMETHOD(bus_alloc_resource, legacy_pcib_alloc_resource), DEVMETHOD(bus_adjust_resource, legacy_pcib_adjust_resource), DEVMETHOD(bus_release_resource, legacy_pcib_release_resource), DEVMETHOD(bus_activate_resource, legacy_pcib_activate_resource), DEVMETHOD(bus_deactivate_resource, legacy_pcib_deactivate_resource), DEVMETHOD(bus_setup_intr, bus_generic_setup_intr), DEVMETHOD(bus_teardown_intr, bus_generic_teardown_intr), /* pcib interface */ DEVMETHOD(pcib_maxslots, legacy_pcib_maxslots), DEVMETHOD(pcib_read_config, legacy_pcib_read_config), DEVMETHOD(pcib_write_config, legacy_pcib_write_config), DEVMETHOD(pcib_route_interrupt, legacy_pcib_route_interrupt), DEVMETHOD(pcib_alloc_msi, legacy_pcib_alloc_msi), DEVMETHOD(pcib_release_msi, pcib_release_msi), DEVMETHOD(pcib_alloc_msix, legacy_pcib_alloc_msix), DEVMETHOD(pcib_release_msix, pcib_release_msix), DEVMETHOD(pcib_map_msi, legacy_pcib_map_msi), DEVMETHOD(pcib_request_feature, pcib_request_feature_allow), DEVMETHOD_END }; DEFINE_CLASS_0(pcib, legacy_pcib_driver, legacy_pcib_methods, 1); DRIVER_MODULE(pcib, legacy, legacy_pcib_driver, 0, 0); /* * Install placeholder to claim the resources owned by the * PCI bus interface. This could be used to extract the * config space registers in the extreme case where the PnP * ID is available and the PCI BIOS isn't, but for now we just * eat the PnP ID and do nothing else. * * we silence this probe, as it will generally confuse people. */ static struct isa_pnp_id pcibus_pnp_ids[] = { { 0x030ad041 /* PNP0A03 */, "PCI Bus" }, { 0x080ad041 /* PNP0A08 */, "PCIe Bus" }, { 0 } }; static int pcibus_pnp_probe(device_t dev) { int result; if ((result = ISA_PNP_PROBE(device_get_parent(dev), dev, pcibus_pnp_ids)) <= 0) device_quiet(dev); return(result); } static int pcibus_pnp_attach(device_t dev) { return(0); } static device_method_t pcibus_pnp_methods[] = { /* Device interface */ DEVMETHOD(device_probe, pcibus_pnp_probe), DEVMETHOD(device_attach, pcibus_pnp_attach), - DEVMETHOD(device_detach, bus_generic_detach), - DEVMETHOD(device_shutdown, bus_generic_shutdown), - DEVMETHOD(device_suspend, bus_generic_suspend), - DEVMETHOD(device_resume, bus_generic_resume), { 0, 0 } }; DEFINE_CLASS_0(pcibus_pnp, pcibus_pnp_driver, pcibus_pnp_methods, 1); DRIVER_MODULE(pcibus_pnp, isa, pcibus_pnp_driver, 0, 0); #ifdef __HAVE_PIR /* * Provide a PCI-PCI bridge driver for PCI buses behind PCI-PCI bridges * that appear in the PCIBIOS Interrupt Routing Table to use the routing * table for interrupt routing when possible. */ static int pcibios_pcib_probe(device_t bus); static device_method_t pcibios_pcib_pci_methods[] = { /* Device interface */ DEVMETHOD(device_probe, pcibios_pcib_probe), /* pcib interface */ DEVMETHOD(pcib_route_interrupt, legacy_pcib_route_interrupt), {0, 0} }; DEFINE_CLASS_1(pcib, pcibios_pcib_driver, pcibios_pcib_pci_methods, sizeof(struct pcib_softc), pcib_driver); DRIVER_MODULE(pcibios_pcib, pci, pcibios_pcib_driver, 0, 0); ISA_PNP_INFO(pcibus_pnp_ids); static int pcibios_pcib_probe(device_t dev) { int bus; if ((pci_get_class(dev) != PCIC_BRIDGE) || (pci_get_subclass(dev) != PCIS_BRIDGE_PCI)) return (ENXIO); bus = pci_read_config(dev, PCIR_SECBUS_1, 1); if (bus == 0) return (ENXIO); if (!pci_pir_probe(bus, 1)) return (ENXIO); device_set_desc(dev, "PCIBIOS PCI-PCI bridge"); return (-2000); } #endif diff --git a/sys/x86/x86/nexus.c b/sys/x86/x86/nexus.c index aa62c920bcd8..d7bb0292157e 100644 --- a/sys/x86/x86/nexus.c +++ b/sys/x86/x86/nexus.c @@ -1,780 +1,776 @@ /*- * Copyright 1998 Massachusetts Institute of Technology * * Permission to use, copy, modify, and distribute this software and * its documentation for any purpose and without fee is hereby * granted, provided that both the above copyright notice and this * permission notice appear in all copies, that both the above * copyright notice and this permission notice appear in all * supporting documentation, and that the name of M.I.T. not be used * in advertising or publicity pertaining to distribution of the * software without specific, written prior permission. M.I.T. makes * no representations about the suitability of this software for any * purpose. It is provided "as is" without express or implied * warranty. * * THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''. M.I.T. DISCLAIMS * ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE, * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT * SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include /* * This code implements a `root nexus' for Intel Architecture * machines. The function of the root nexus is to serve as an * attachment point for both processors and buses, and to manage * resources which are common to all of them. In particular, * this code implements the core resource managers for interrupt * requests, DMA requests (which rightfully should be a part of the * ISA code but it's easier to do it here for now), I/O port addresses, * and I/O memory address space. */ #ifdef __amd64__ #define DEV_APIC #else #include "opt_apic.h" #endif #include "opt_isa.h" #include "opt_pci.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef DEV_APIC #include "pcib_if.h" #endif #ifdef DEV_ISA #include #include #endif #define ELF_KERN_STR ("elf"__XSTRING(__ELF_WORD_SIZE)" kernel") static MALLOC_DEFINE(M_NEXUSDEV, "nexusdev", "Nexus device"); #define DEVTONX(dev) ((struct nexus_device *)device_get_ivars(dev)) struct rman irq_rman, drq_rman, port_rman, mem_rman; static int nexus_print_all_resources(device_t dev); static device_probe_t nexus_probe; static device_attach_t nexus_attach; static bus_add_child_t nexus_add_child; static bus_print_child_t nexus_print_child; static bus_alloc_resource_t nexus_alloc_resource; static bus_get_resource_list_t nexus_get_reslist; static bus_get_rman_t nexus_get_rman; static bus_map_resource_t nexus_map_resource; static bus_unmap_resource_t nexus_unmap_resource; #ifdef SMP static bus_bind_intr_t nexus_bind_intr; #endif static bus_config_intr_t nexus_config_intr; static bus_describe_intr_t nexus_describe_intr; static bus_resume_intr_t nexus_resume_intr; static bus_setup_intr_t nexus_setup_intr; static bus_suspend_intr_t nexus_suspend_intr; static bus_teardown_intr_t nexus_teardown_intr; static bus_get_cpus_t nexus_get_cpus; #if defined(DEV_APIC) && defined(DEV_PCI) static pcib_alloc_msi_t nexus_alloc_msi; static pcib_release_msi_t nexus_release_msi; static pcib_alloc_msix_t nexus_alloc_msix; static pcib_release_msix_t nexus_release_msix; static pcib_map_msi_t nexus_map_msi; #endif static device_method_t nexus_methods[] = { /* Device interface */ DEVMETHOD(device_probe, nexus_probe), DEVMETHOD(device_attach, nexus_attach), DEVMETHOD(device_detach, bus_generic_detach), DEVMETHOD(device_shutdown, bus_generic_shutdown), DEVMETHOD(device_suspend, bus_generic_suspend), DEVMETHOD(device_resume, bus_generic_resume), /* Bus interface */ DEVMETHOD(bus_print_child, nexus_print_child), DEVMETHOD(bus_add_child, nexus_add_child), DEVMETHOD(bus_activate_resource, bus_generic_rman_activate_resource), DEVMETHOD(bus_adjust_resource, bus_generic_rman_adjust_resource), DEVMETHOD(bus_alloc_resource, nexus_alloc_resource), DEVMETHOD(bus_deactivate_resource, bus_generic_rman_deactivate_resource), DEVMETHOD(bus_get_resource, bus_generic_rl_get_resource), DEVMETHOD(bus_get_resource_list, nexus_get_reslist), DEVMETHOD(bus_get_rman, nexus_get_rman), DEVMETHOD(bus_delete_resource, bus_generic_rl_delete_resource), DEVMETHOD(bus_map_resource, nexus_map_resource), DEVMETHOD(bus_release_resource, bus_generic_rman_release_resource), DEVMETHOD(bus_set_resource, bus_generic_rl_set_resource), DEVMETHOD(bus_unmap_resource, nexus_unmap_resource), #ifdef SMP DEVMETHOD(bus_bind_intr, nexus_bind_intr), #endif DEVMETHOD(bus_config_intr, nexus_config_intr), DEVMETHOD(bus_describe_intr, nexus_describe_intr), DEVMETHOD(bus_resume_intr, nexus_resume_intr), DEVMETHOD(bus_setup_intr, nexus_setup_intr), DEVMETHOD(bus_suspend_intr, nexus_suspend_intr), DEVMETHOD(bus_teardown_intr, nexus_teardown_intr), DEVMETHOD(bus_get_cpus, nexus_get_cpus), /* pcib interface */ #if defined(DEV_APIC) && defined(DEV_PCI) DEVMETHOD(pcib_alloc_msi, nexus_alloc_msi), DEVMETHOD(pcib_release_msi, nexus_release_msi), DEVMETHOD(pcib_alloc_msix, nexus_alloc_msix), DEVMETHOD(pcib_release_msix, nexus_release_msix), DEVMETHOD(pcib_map_msi, nexus_map_msi), #endif DEVMETHOD_END }; DEFINE_CLASS_0(nexus, nexus_driver, nexus_methods, 1); DRIVER_MODULE(nexus, root, nexus_driver, 0, 0); static int nexus_probe(device_t dev) { device_quiet(dev); /* suppress attach message for neatness */ return (BUS_PROBE_GENERIC); } void nexus_init_resources(void) { int irq; /* * XXX working notes: * * - IRQ resource creation should be moved to the PIC/APIC driver. * - DRQ resource creation should be moved to the DMAC driver. * - The above should be sorted to probe earlier than any child buses. * * - Leave I/O and memory creation here, as child probes may need them. * (especially eg. ACPI) */ /* * IRQ's are on the mainboard on old systems, but on the ISA part * of PCI->ISA bridges. There would be multiple sets of IRQs on * multi-ISA-bus systems. PCI interrupts are routed to the ISA * component, so in a way, PCI can be a partial child of an ISA bus(!). * APIC interrupts are global though. */ irq_rman.rm_start = 0; irq_rman.rm_type = RMAN_ARRAY; irq_rman.rm_descr = "Interrupt request lines"; irq_rman.rm_end = num_io_irqs - 1; if (rman_init(&irq_rman)) panic("nexus_init_resources irq_rman"); /* * We search for regions of existing IRQs and add those to the IRQ * resource manager. */ for (irq = 0; irq < num_io_irqs; irq++) if (intr_lookup_source(irq) != NULL) if (rman_manage_region(&irq_rman, irq, irq) != 0) panic("nexus_init_resources irq_rman add"); /* * ISA DMA on PCI systems is implemented in the ISA part of each * PCI->ISA bridge and the channels can be duplicated if there are * multiple bridges. (eg: laptops with docking stations) */ drq_rman.rm_start = 0; drq_rman.rm_end = 7; drq_rman.rm_type = RMAN_ARRAY; drq_rman.rm_descr = "DMA request lines"; /* XXX drq 0 not available on some machines */ if (rman_init(&drq_rman) || rman_manage_region(&drq_rman, drq_rman.rm_start, drq_rman.rm_end)) panic("nexus_init_resources drq_rman"); /* * However, IO ports and Memory truely are global at this level, * as are APIC interrupts (however many IO APICS there turn out * to be on large systems..) */ port_rman.rm_start = 0; port_rman.rm_end = 0xffff; port_rman.rm_type = RMAN_ARRAY; port_rman.rm_descr = "I/O ports"; if (rman_init(&port_rman) || rman_manage_region(&port_rman, 0, 0xffff)) panic("nexus_init_resources port_rman"); mem_rman.rm_start = 0; mem_rman.rm_end = cpu_getmaxphyaddr(); mem_rman.rm_type = RMAN_ARRAY; mem_rman.rm_descr = "I/O memory addresses"; if (rman_init(&mem_rman) || rman_manage_region(&mem_rman, 0, mem_rman.rm_end)) panic("nexus_init_resources mem_rman"); } static int nexus_attach(device_t dev) { nexus_init_resources(); bus_generic_probe(dev); /* * Explicitly add the legacy0 device here. Other platform * types (such as ACPI), use their own nexus(4) subclass * driver to override this routine and add their own root bus. */ if (BUS_ADD_CHILD(dev, 10, "legacy", 0) == NULL) panic("legacy: could not attach"); bus_generic_attach(dev); return (0); } static int nexus_print_all_resources(device_t dev) { struct nexus_device *ndev = DEVTONX(dev); struct resource_list *rl = &ndev->nx_resources; int retval = 0; if (STAILQ_FIRST(rl)) retval += printf(" at"); retval += resource_list_print_type(rl, "port", SYS_RES_IOPORT, "%#jx"); retval += resource_list_print_type(rl, "iomem", SYS_RES_MEMORY, "%#jx"); retval += resource_list_print_type(rl, "irq", SYS_RES_IRQ, "%jd"); return (retval); } static int nexus_print_child(device_t bus, device_t child) { int retval = 0; retval += bus_print_child_header(bus, child); retval += nexus_print_all_resources(child); if (device_get_flags(child)) retval += printf(" flags %#x", device_get_flags(child)); retval += printf("\n"); return (retval); } static device_t nexus_add_child(device_t bus, u_int order, const char *name, int unit) { device_t child; struct nexus_device *ndev; ndev = malloc(sizeof(struct nexus_device), M_NEXUSDEV, M_NOWAIT|M_ZERO); if (!ndev) return (0); resource_list_init(&ndev->nx_resources); child = device_add_child_ordered(bus, order, name, unit); /* should we free this in nexus_child_detached? */ device_set_ivars(child, ndev); return (child); } static struct rman * nexus_get_rman(device_t bus, int type, u_int flags) { switch (type) { case SYS_RES_IRQ: return (&irq_rman); case SYS_RES_DRQ: return (&drq_rman); case SYS_RES_IOPORT: return (&port_rman); case SYS_RES_MEMORY: return (&mem_rman); default: return (NULL); } } /* * Allocate a resource on behalf of child. NB: child is usually going to be a * child of one of our descendants, not a direct child of nexus0. */ static struct resource * nexus_alloc_resource(device_t bus, device_t child, int type, int *rid, rman_res_t start, rman_res_t end, rman_res_t count, u_int flags) { struct nexus_device *ndev = DEVTONX(child); struct resource_list_entry *rle; /* * If this is an allocation of the "default" range for a given * RID, and we know what the resources for this device are * (ie. they aren't maintained by a child bus), then work out * the start/end values. */ if (RMAN_IS_DEFAULT_RANGE(start, end) && (count == 1)) { if (device_get_parent(child) != bus || ndev == NULL) return (NULL); rle = resource_list_find(&ndev->nx_resources, type, *rid); if (rle == NULL) return (NULL); start = rle->start; end = rle->end; count = rle->count; } return (bus_generic_rman_alloc_resource(bus, child, type, rid, start, end, count, flags)); } static int nexus_map_resource(device_t bus, device_t child, struct resource *r, struct resource_map_request *argsp, struct resource_map *map) { struct resource_map_request args; rman_res_t length, start; int error, type; /* Resources must be active to be mapped. */ if (!(rman_get_flags(r) & RF_ACTIVE)) return (ENXIO); /* Mappings are only supported on I/O and memory resources. */ type = rman_get_type(r); switch (type) { case SYS_RES_IOPORT: case SYS_RES_MEMORY: break; default: return (EINVAL); } resource_init_map_request(&args); error = resource_validate_map_request(r, argsp, &args, &start, &length); if (error) return (error); /* * If this is a memory resource, map it into the kernel. */ switch (type) { case SYS_RES_IOPORT: map->r_bushandle = start; map->r_bustag = X86_BUS_SPACE_IO; map->r_size = length; map->r_vaddr = NULL; break; case SYS_RES_MEMORY: map->r_vaddr = pmap_mapdev_attr(start, length, args.memattr); map->r_bustag = X86_BUS_SPACE_MEM; map->r_size = length; /* * The handle is the virtual address. */ map->r_bushandle = (bus_space_handle_t)map->r_vaddr; break; } return (0); } static int nexus_unmap_resource(device_t bus, device_t child, struct resource *r, struct resource_map *map) { /* * If this is a memory resource, unmap it. */ switch (rman_get_type(r)) { case SYS_RES_MEMORY: pmap_unmapdev(map->r_vaddr, map->r_size); /* FALLTHROUGH */ case SYS_RES_IOPORT: break; default: return (EINVAL); } return (0); } /* * Currently this uses the really grody interface from kern/kern_intr.c * (which really doesn't belong in kern/anything.c). Eventually, all of * the code in kern_intr.c and machdep_intr.c should get moved here, since * this is going to be the official interface. */ static int nexus_setup_intr(device_t bus, device_t child, struct resource *irq, int flags, driver_filter_t filter, void (*ihand)(void *), void *arg, void **cookiep) { int error, domain; struct intsrc *isrc; /* somebody tried to setup an irq that failed to allocate! */ if (irq == NULL) panic("nexus_setup_intr: NULL irq resource!"); *cookiep = NULL; if ((rman_get_flags(irq) & RF_SHAREABLE) == 0) flags |= INTR_EXCL; /* * We depend here on rman_activate_resource() being idempotent. */ error = rman_activate_resource(irq); if (error != 0) return (error); if (bus_get_domain(child, &domain) != 0) domain = 0; isrc = intr_lookup_source(rman_get_start(irq)); if (isrc == NULL) return (EINVAL); error = intr_add_handler(isrc, device_get_nameunit(child), filter, ihand, arg, flags, cookiep, domain); if (error == 0) rman_set_irq_cookie(irq, *cookiep); return (error); } static int nexus_teardown_intr(device_t dev, device_t child, struct resource *r, void *ih) { int error; error = intr_remove_handler(ih); if (error == 0) rman_set_irq_cookie(r, NULL); return (error); } static int nexus_suspend_intr(device_t dev, device_t child, struct resource *irq) { return (intr_event_suspend_handler(rman_get_irq_cookie(irq))); } static int nexus_resume_intr(device_t dev, device_t child, struct resource *irq) { return (intr_event_resume_handler(rman_get_irq_cookie(irq))); } #ifdef SMP static int nexus_bind_intr(device_t dev, device_t child, struct resource *irq, int cpu) { struct intsrc *isrc; isrc = intr_lookup_source(rman_get_start(irq)); if (isrc == NULL) return (EINVAL); return (intr_event_bind(isrc->is_event, cpu)); } #endif static int nexus_config_intr(device_t dev, int irq, enum intr_trigger trig, enum intr_polarity pol) { struct intsrc *isrc; isrc = intr_lookup_source(irq); if (isrc == NULL) return (EINVAL); return (intr_config_intr(isrc, trig, pol)); } static int nexus_describe_intr(device_t dev, device_t child, struct resource *irq, void *cookie, const char *descr) { struct intsrc *isrc; isrc = intr_lookup_source(rman_get_start(irq)); if (isrc == NULL) return (EINVAL); return (intr_describe(isrc, cookie, descr)); } static struct resource_list * nexus_get_reslist(device_t dev, device_t child) { struct nexus_device *ndev = DEVTONX(child); return (&ndev->nx_resources); } static int nexus_get_cpus(device_t dev, device_t child, enum cpu_sets op, size_t setsize, cpuset_t *cpuset) { switch (op) { #ifdef SMP case INTR_CPUS: if (setsize != sizeof(cpuset_t)) return (EINVAL); *cpuset = intr_cpus; return (0); #endif default: return (bus_generic_get_cpus(dev, child, op, setsize, cpuset)); } } /* Called from the MSI code to add new IRQs to the IRQ rman. */ void nexus_add_irq(u_long irq) { if (rman_manage_region(&irq_rman, irq, irq) != 0) panic("%s: failed", __func__); } #if defined(DEV_APIC) && defined(DEV_PCI) static int nexus_alloc_msix(device_t pcib, device_t dev, int *irq) { return (msix_alloc(dev, irq)); } static int nexus_release_msix(device_t pcib, device_t dev, int irq) { return (msix_release(irq)); } static int nexus_alloc_msi(device_t pcib, device_t dev, int count, int maxcount, int *irqs) { return (msi_alloc(dev, count, maxcount, irqs)); } static int nexus_release_msi(device_t pcib, device_t dev, int count, int *irqs) { return (msi_release(irqs, count)); } static int nexus_map_msi(device_t pcib, device_t dev, int irq, uint64_t *addr, uint32_t *data) { return (msi_map(irq, addr, data)); } #endif /* DEV_APIC && DEV_PCI */ /* Placeholder for system RAM. */ static void ram_identify(driver_t *driver, device_t parent) { if (resource_disabled("ram", 0)) return; if (BUS_ADD_CHILD(parent, 0, "ram", 0) == NULL) panic("ram_identify"); } static int ram_probe(device_t dev) { device_quiet(dev); device_set_desc(dev, "System RAM"); return (0); } static int ram_attach(device_t dev) { struct bios_smap *smapbase, *smap, *smapend; struct resource *res; rman_res_t length; vm_paddr_t *p; caddr_t kmdp; uint32_t smapsize; int error, rid; /* Retrieve the system memory map from the loader. */ kmdp = preload_search_by_type("elf kernel"); if (kmdp == NULL) kmdp = preload_search_by_type(ELF_KERN_STR); smapbase = (struct bios_smap *)preload_search_info(kmdp, MODINFO_METADATA | MODINFOMD_SMAP); if (smapbase != NULL) { smapsize = *((u_int32_t *)smapbase - 1); smapend = (struct bios_smap *)((uintptr_t)smapbase + smapsize); rid = 0; for (smap = smapbase; smap < smapend; smap++) { if (smap->type != SMAP_TYPE_MEMORY || smap->length == 0) continue; if (smap->base > mem_rman.rm_end) continue; length = smap->base + smap->length > mem_rman.rm_end ? mem_rman.rm_end - smap->base : smap->length; error = bus_set_resource(dev, SYS_RES_MEMORY, rid, smap->base, length); if (error) panic( "ram_attach: resource %d failed set with %d", rid, error); res = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, 0); if (res == NULL) panic("ram_attach: resource %d failed to attach", rid); rid++; } return (0); } /* * If the system map is not available, fall back to using * dump_avail[]. We use the dump_avail[] array rather than * phys_avail[] for the memory map as phys_avail[] contains * holes for kernel memory, page 0, the message buffer, and * the dcons buffer. We test the end address in the loop * instead of the start since the start address for the first * segment is 0. */ for (rid = 0, p = dump_avail; p[1] != 0; rid++, p += 2) { if (p[0] > mem_rman.rm_end) break; length = (p[1] > mem_rman.rm_end ? mem_rman.rm_end : p[1]) - p[0]; error = bus_set_resource(dev, SYS_RES_MEMORY, rid, p[0], length); if (error) panic("ram_attach: resource %d failed set with %d", rid, error); res = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, 0); if (res == NULL) panic("ram_attach: resource %d failed to attach", rid); } return (0); } static device_method_t ram_methods[] = { /* Device interface */ DEVMETHOD(device_identify, ram_identify), DEVMETHOD(device_probe, ram_probe), DEVMETHOD(device_attach, ram_attach), DEVMETHOD_END }; static driver_t ram_driver = { "ram", ram_methods, 1, /* no softc */ }; DRIVER_MODULE(ram, nexus, ram_driver, 0, 0); #ifdef DEV_ISA /* * Placeholder which claims PnP 'devices' which describe system * resources. */ static struct isa_pnp_id sysresource_ids[] = { { 0x010cd041 /* PNP0c01 */, "System Memory" }, { 0x020cd041 /* PNP0c02 */, "System Resource" }, { 0 } }; static int sysresource_probe(device_t dev) { int result; if ((result = ISA_PNP_PROBE(device_get_parent(dev), dev, sysresource_ids)) <= 0) { device_quiet(dev); } return (result); } static int sysresource_attach(device_t dev) { return (0); } static device_method_t sysresource_methods[] = { /* Device interface */ DEVMETHOD(device_probe, sysresource_probe), DEVMETHOD(device_attach, sysresource_attach), - DEVMETHOD(device_detach, bus_generic_detach), - DEVMETHOD(device_shutdown, bus_generic_shutdown), - DEVMETHOD(device_suspend, bus_generic_suspend), - DEVMETHOD(device_resume, bus_generic_resume), DEVMETHOD_END }; static driver_t sysresource_driver = { "sysresource", sysresource_methods, 1, /* no softc */ }; DRIVER_MODULE(sysresource, isa, sysresource_driver, 0, 0); ISA_PNP_INFO(sysresource_ids); #endif /* DEV_ISA */