Index: head/sys/amd64/amd64/fpu.c =================================================================== --- head/sys/amd64/amd64/fpu.c (revision 329877) +++ head/sys/amd64/amd64/fpu.c (revision 329878) @@ -1,1113 +1,1113 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1990 William Jolitz. * Copyright (c) 1991 The Regents of the University of California. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: @(#)npx.c 7.2 (Berkeley) 5/12/91 */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* * Floating point support. */ #if defined(__GNUCLIKE_ASM) && !defined(lint) #define fldcw(cw) __asm __volatile("fldcw %0" : : "m" (cw)) #define fnclex() __asm __volatile("fnclex") #define fninit() __asm __volatile("fninit") #define fnstcw(addr) __asm __volatile("fnstcw %0" : "=m" (*(addr))) #define fnstsw(addr) __asm __volatile("fnstsw %0" : "=am" (*(addr))) #define fxrstor(addr) __asm __volatile("fxrstor %0" : : "m" (*(addr))) #define fxsave(addr) __asm __volatile("fxsave %0" : "=m" (*(addr))) #define ldmxcsr(csr) __asm __volatile("ldmxcsr %0" : : "m" (csr)) #define stmxcsr(addr) __asm __volatile("stmxcsr %0" : : "m" (*(addr))) static __inline void xrstor(char *addr, uint64_t mask) { uint32_t low, hi; low = mask; hi = mask >> 32; __asm __volatile("xrstor %0" : : "m" (*addr), "a" (low), "d" (hi)); } static __inline void xsave(char *addr, uint64_t mask) { uint32_t low, hi; low = mask; hi = mask >> 32; __asm __volatile("xsave %0" : "=m" (*addr) : "a" (low), "d" (hi) : "memory"); } #else /* !(__GNUCLIKE_ASM && !lint) */ void fldcw(u_short cw); void fnclex(void); void fninit(void); void fnstcw(caddr_t addr); void fnstsw(caddr_t addr); void fxsave(caddr_t addr); void fxrstor(caddr_t addr); void ldmxcsr(u_int csr); void stmxcsr(u_int *csr); void xrstor(char *addr, uint64_t mask); void xsave(char *addr, uint64_t mask); #endif /* __GNUCLIKE_ASM && !lint */ #define start_emulating() load_cr0(rcr0() | CR0_TS) #define stop_emulating() clts() CTASSERT(sizeof(struct savefpu) == 512); CTASSERT(sizeof(struct xstate_hdr) == 64); CTASSERT(sizeof(struct savefpu_ymm) == 832); /* * This requirement is to make it easier for asm code to calculate * offset of the fpu save area from the pcb address. FPU save area * must be 64-byte aligned. */ CTASSERT(sizeof(struct pcb) % XSAVE_AREA_ALIGN == 0); /* * Ensure the copy of XCR0 saved in a core is contained in the padding * area. */ CTASSERT(X86_XSTATE_XCR0_OFFSET >= offsetof(struct savefpu, sv_pad) && X86_XSTATE_XCR0_OFFSET + sizeof(uint64_t) <= sizeof(struct savefpu)); static void fpu_clean_state(void); SYSCTL_INT(_hw, HW_FLOATINGPT, floatingpoint, CTLFLAG_RD, SYSCTL_NULL_INT_PTR, 1, "Floating point instructions executed in hardware"); int use_xsave; /* non-static for cpu_switch.S */ uint64_t xsave_mask; /* the same */ static uma_zone_t fpu_save_area_zone; static struct savefpu *fpu_initialstate; struct xsave_area_elm_descr { u_int offset; u_int size; } *xsave_area_desc; void fpusave(void *addr) { if (use_xsave) xsave((char *)addr, xsave_mask); else fxsave((char *)addr); } void fpurestore(void *addr) { if (use_xsave) xrstor((char *)addr, xsave_mask); else fxrstor((char *)addr); } void fpususpend(void *addr) { u_long cr0; cr0 = rcr0(); stop_emulating(); fpusave(addr); load_cr0(cr0); } void fpuresume(void *addr) { u_long cr0; cr0 = rcr0(); stop_emulating(); fninit(); if (use_xsave) load_xcr(XCR0, xsave_mask); fpurestore(addr); load_cr0(cr0); } /* * Enable XSAVE if supported and allowed by user. * Calculate the xsave_mask. */ static void fpuinit_bsp1(void) { u_int cp[4]; uint64_t xsave_mask_user; if ((cpu_feature2 & CPUID2_XSAVE) != 0) { use_xsave = 1; TUNABLE_INT_FETCH("hw.use_xsave", &use_xsave); } if (!use_xsave) return; cpuid_count(0xd, 0x0, cp); xsave_mask = XFEATURE_ENABLED_X87 | XFEATURE_ENABLED_SSE; if ((cp[0] & xsave_mask) != xsave_mask) panic("CPU0 does not support X87 or SSE: %x", cp[0]); xsave_mask = ((uint64_t)cp[3] << 32) | cp[0]; xsave_mask_user = xsave_mask; TUNABLE_ULONG_FETCH("hw.xsave_mask", &xsave_mask_user); xsave_mask_user |= XFEATURE_ENABLED_X87 | XFEATURE_ENABLED_SSE; xsave_mask &= xsave_mask_user; if ((xsave_mask & XFEATURE_AVX512) != XFEATURE_AVX512) xsave_mask &= ~XFEATURE_AVX512; if ((xsave_mask & XFEATURE_MPX) != XFEATURE_MPX) xsave_mask &= ~XFEATURE_MPX; cpuid_count(0xd, 0x1, cp); if ((cp[0] & CPUID_EXTSTATE_XSAVEOPT) != 0) { /* * Patch the XSAVE instruction in the cpu_switch code * to XSAVEOPT. We assume that XSAVE encoding used * REX byte, and set the bit 4 of the r/m byte. */ ctx_switch_xsave[3] |= 0x10; } } /* * Calculate the fpu save area size. */ static void fpuinit_bsp2(void) { u_int cp[4]; if (use_xsave) { cpuid_count(0xd, 0x0, cp); cpu_max_ext_state_size = cp[1]; /* * Reload the cpu_feature2, since we enabled OSXSAVE. */ do_cpuid(1, cp); cpu_feature2 = cp[2]; } else cpu_max_ext_state_size = sizeof(struct savefpu); } /* * Initialize the floating point unit. */ void fpuinit(void) { register_t saveintr; u_int mxcsr; u_short control; if (IS_BSP()) fpuinit_bsp1(); if (use_xsave) { load_cr4(rcr4() | CR4_XSAVE); load_xcr(XCR0, xsave_mask); } /* * XCR0 shall be set up before CPU can report the save area size. */ if (IS_BSP()) fpuinit_bsp2(); /* * It is too early for critical_enter() to work on AP. */ saveintr = intr_disable(); stop_emulating(); fninit(); control = __INITIAL_FPUCW__; fldcw(control); mxcsr = __INITIAL_MXCSR__; ldmxcsr(mxcsr); start_emulating(); intr_restore(saveintr); } /* * On the boot CPU we generate a clean state that is used to * initialize the floating point unit when it is first used by a * process. */ static void fpuinitstate(void *arg __unused) { register_t saveintr; int cp[4], i, max_ext_n; fpu_initialstate = malloc(cpu_max_ext_state_size, M_DEVBUF, M_WAITOK | M_ZERO); saveintr = intr_disable(); stop_emulating(); fpusave(fpu_initialstate); if (fpu_initialstate->sv_env.en_mxcsr_mask) cpu_mxcsr_mask = fpu_initialstate->sv_env.en_mxcsr_mask; else cpu_mxcsr_mask = 0xFFBF; /* * The fninit instruction does not modify XMM registers or x87 * registers (MM/ST). The fpusave call dumped the garbage * contained in the registers after reset to the initial state * saved. Clear XMM and x87 registers file image to make the * startup program state and signal handler XMM/x87 register * content predictable. */ bzero(fpu_initialstate->sv_fp, sizeof(fpu_initialstate->sv_fp)); bzero(fpu_initialstate->sv_xmm, sizeof(fpu_initialstate->sv_xmm)); /* * Create a table describing the layout of the CPU Extended * Save Area. */ if (use_xsave) { max_ext_n = flsl(xsave_mask); xsave_area_desc = malloc(max_ext_n * sizeof(struct xsave_area_elm_descr), M_DEVBUF, M_WAITOK | M_ZERO); /* x87 state */ xsave_area_desc[0].offset = 0; xsave_area_desc[0].size = 160; /* XMM */ xsave_area_desc[1].offset = 160; xsave_area_desc[1].size = 288 - 160; for (i = 2; i < max_ext_n; i++) { cpuid_count(0xd, i, cp); xsave_area_desc[i].offset = cp[1]; xsave_area_desc[i].size = cp[0]; } } fpu_save_area_zone = uma_zcreate("FPU_save_area", cpu_max_ext_state_size, NULL, NULL, NULL, NULL, XSAVE_AREA_ALIGN - 1, 0); start_emulating(); intr_restore(saveintr); } SYSINIT(fpuinitstate, SI_SUB_DRIVERS, SI_ORDER_ANY, fpuinitstate, NULL); /* * Free coprocessor (if we have it). */ void fpuexit(struct thread *td) { critical_enter(); if (curthread == PCPU_GET(fpcurthread)) { stop_emulating(); fpusave(curpcb->pcb_save); start_emulating(); PCPU_SET(fpcurthread, NULL); } critical_exit(); } int fpuformat(void) { return (_MC_FPFMT_XMM); } /* * The following mechanism is used to ensure that the FPE_... value * that is passed as a trapcode to the signal handler of the user * process does not have more than one bit set. * * Multiple bits may be set if the user process modifies the control * word while a status word bit is already set. While this is a sign * of bad coding, we have no choise than to narrow them down to one * bit, since we must not send a trapcode that is not exactly one of * the FPE_ macros. * * The mechanism has a static table with 127 entries. Each combination * of the 7 FPU status word exception bits directly translates to a * position in this table, where a single FPE_... value is stored. * This FPE_... value stored there is considered the "most important" * of the exception bits and will be sent as the signal code. The * precedence of the bits is based upon Intel Document "Numerical * Applications", Chapter "Special Computational Situations". * * The macro to choose one of these values does these steps: 1) Throw * away status word bits that cannot be masked. 2) Throw away the bits * currently masked in the control word, assuming the user isn't * interested in them anymore. 3) Reinsert status word bit 7 (stack * fault) if it is set, which cannot be masked but must be presered. * 4) Use the remaining bits to point into the trapcode table. * * The 6 maskable bits in order of their preference, as stated in the * above referenced Intel manual: * 1 Invalid operation (FP_X_INV) * 1a Stack underflow * 1b Stack overflow * 1c Operand of unsupported format * 1d SNaN operand. * 2 QNaN operand (not an exception, irrelavant here) * 3 Any other invalid-operation not mentioned above or zero divide * (FP_X_INV, FP_X_DZ) * 4 Denormal operand (FP_X_DNML) * 5 Numeric over/underflow (FP_X_OFL, FP_X_UFL) * 6 Inexact result (FP_X_IMP) */ static char fpetable[128] = { 0, FPE_FLTINV, /* 1 - INV */ FPE_FLTUND, /* 2 - DNML */ FPE_FLTINV, /* 3 - INV | DNML */ FPE_FLTDIV, /* 4 - DZ */ FPE_FLTINV, /* 5 - INV | DZ */ FPE_FLTDIV, /* 6 - DNML | DZ */ FPE_FLTINV, /* 7 - INV | DNML | DZ */ FPE_FLTOVF, /* 8 - OFL */ FPE_FLTINV, /* 9 - INV | OFL */ FPE_FLTUND, /* A - DNML | OFL */ FPE_FLTINV, /* B - INV | DNML | OFL */ FPE_FLTDIV, /* C - DZ | OFL */ FPE_FLTINV, /* D - INV | DZ | OFL */ FPE_FLTDIV, /* E - DNML | DZ | OFL */ FPE_FLTINV, /* F - INV | DNML | DZ | OFL */ FPE_FLTUND, /* 10 - UFL */ FPE_FLTINV, /* 11 - INV | UFL */ FPE_FLTUND, /* 12 - DNML | UFL */ FPE_FLTINV, /* 13 - INV | DNML | UFL */ FPE_FLTDIV, /* 14 - DZ | UFL */ FPE_FLTINV, /* 15 - INV | DZ | UFL */ FPE_FLTDIV, /* 16 - DNML | DZ | UFL */ FPE_FLTINV, /* 17 - INV | DNML | DZ | UFL */ FPE_FLTOVF, /* 18 - OFL | UFL */ FPE_FLTINV, /* 19 - INV | OFL | UFL */ FPE_FLTUND, /* 1A - DNML | OFL | UFL */ FPE_FLTINV, /* 1B - INV | DNML | OFL | UFL */ FPE_FLTDIV, /* 1C - DZ | OFL | UFL */ FPE_FLTINV, /* 1D - INV | DZ | OFL | UFL */ FPE_FLTDIV, /* 1E - DNML | DZ | OFL | UFL */ FPE_FLTINV, /* 1F - INV | DNML | DZ | OFL | UFL */ FPE_FLTRES, /* 20 - IMP */ FPE_FLTINV, /* 21 - INV | IMP */ FPE_FLTUND, /* 22 - DNML | IMP */ FPE_FLTINV, /* 23 - INV | DNML | IMP */ FPE_FLTDIV, /* 24 - DZ | IMP */ FPE_FLTINV, /* 25 - INV | DZ | IMP */ FPE_FLTDIV, /* 26 - DNML | DZ | IMP */ FPE_FLTINV, /* 27 - INV | DNML | DZ | IMP */ FPE_FLTOVF, /* 28 - OFL | IMP */ FPE_FLTINV, /* 29 - INV | OFL | IMP */ FPE_FLTUND, /* 2A - DNML | OFL | IMP */ FPE_FLTINV, /* 2B - INV | DNML | OFL | IMP */ FPE_FLTDIV, /* 2C - DZ | OFL | IMP */ FPE_FLTINV, /* 2D - INV | DZ | OFL | IMP */ FPE_FLTDIV, /* 2E - DNML | DZ | OFL | IMP */ FPE_FLTINV, /* 2F - INV | DNML | DZ | OFL | IMP */ FPE_FLTUND, /* 30 - UFL | IMP */ FPE_FLTINV, /* 31 - INV | UFL | IMP */ FPE_FLTUND, /* 32 - DNML | UFL | IMP */ FPE_FLTINV, /* 33 - INV | DNML | UFL | IMP */ FPE_FLTDIV, /* 34 - DZ | UFL | IMP */ FPE_FLTINV, /* 35 - INV | DZ | UFL | IMP */ FPE_FLTDIV, /* 36 - DNML | DZ | UFL | IMP */ FPE_FLTINV, /* 37 - INV | DNML | DZ | UFL | IMP */ FPE_FLTOVF, /* 38 - OFL | UFL | IMP */ FPE_FLTINV, /* 39 - INV | OFL | UFL | IMP */ FPE_FLTUND, /* 3A - DNML | OFL | UFL | IMP */ FPE_FLTINV, /* 3B - INV | DNML | OFL | UFL | IMP */ FPE_FLTDIV, /* 3C - DZ | OFL | UFL | IMP */ FPE_FLTINV, /* 3D - INV | DZ | OFL | UFL | IMP */ FPE_FLTDIV, /* 3E - DNML | DZ | OFL | UFL | IMP */ FPE_FLTINV, /* 3F - INV | DNML | DZ | OFL | UFL | IMP */ FPE_FLTSUB, /* 40 - STK */ FPE_FLTSUB, /* 41 - INV | STK */ FPE_FLTUND, /* 42 - DNML | STK */ FPE_FLTSUB, /* 43 - INV | DNML | STK */ FPE_FLTDIV, /* 44 - DZ | STK */ FPE_FLTSUB, /* 45 - INV | DZ | STK */ FPE_FLTDIV, /* 46 - DNML | DZ | STK */ FPE_FLTSUB, /* 47 - INV | DNML | DZ | STK */ FPE_FLTOVF, /* 48 - OFL | STK */ FPE_FLTSUB, /* 49 - INV | OFL | STK */ FPE_FLTUND, /* 4A - DNML | OFL | STK */ FPE_FLTSUB, /* 4B - INV | DNML | OFL | STK */ FPE_FLTDIV, /* 4C - DZ | OFL | STK */ FPE_FLTSUB, /* 4D - INV | DZ | OFL | STK */ FPE_FLTDIV, /* 4E - DNML | DZ | OFL | STK */ FPE_FLTSUB, /* 4F - INV | DNML | DZ | OFL | STK */ FPE_FLTUND, /* 50 - UFL | STK */ FPE_FLTSUB, /* 51 - INV | UFL | STK */ FPE_FLTUND, /* 52 - DNML | UFL | STK */ FPE_FLTSUB, /* 53 - INV | DNML | UFL | STK */ FPE_FLTDIV, /* 54 - DZ | UFL | STK */ FPE_FLTSUB, /* 55 - INV | DZ | UFL | STK */ FPE_FLTDIV, /* 56 - DNML | DZ | UFL | STK */ FPE_FLTSUB, /* 57 - INV | DNML | DZ | UFL | STK */ FPE_FLTOVF, /* 58 - OFL | UFL | STK */ FPE_FLTSUB, /* 59 - INV | OFL | UFL | STK */ FPE_FLTUND, /* 5A - DNML | OFL | UFL | STK */ FPE_FLTSUB, /* 5B - INV | DNML | OFL | UFL | STK */ FPE_FLTDIV, /* 5C - DZ | OFL | UFL | STK */ FPE_FLTSUB, /* 5D - INV | DZ | OFL | UFL | STK */ FPE_FLTDIV, /* 5E - DNML | DZ | OFL | UFL | STK */ FPE_FLTSUB, /* 5F - INV | DNML | DZ | OFL | UFL | STK */ FPE_FLTRES, /* 60 - IMP | STK */ FPE_FLTSUB, /* 61 - INV | IMP | STK */ FPE_FLTUND, /* 62 - DNML | IMP | STK */ FPE_FLTSUB, /* 63 - INV | DNML | IMP | STK */ FPE_FLTDIV, /* 64 - DZ | IMP | STK */ FPE_FLTSUB, /* 65 - INV | DZ | IMP | STK */ FPE_FLTDIV, /* 66 - DNML | DZ | IMP | STK */ FPE_FLTSUB, /* 67 - INV | DNML | DZ | IMP | STK */ FPE_FLTOVF, /* 68 - OFL | IMP | STK */ FPE_FLTSUB, /* 69 - INV | OFL | IMP | STK */ FPE_FLTUND, /* 6A - DNML | OFL | IMP | STK */ FPE_FLTSUB, /* 6B - INV | DNML | OFL | IMP | STK */ FPE_FLTDIV, /* 6C - DZ | OFL | IMP | STK */ FPE_FLTSUB, /* 6D - INV | DZ | OFL | IMP | STK */ FPE_FLTDIV, /* 6E - DNML | DZ | OFL | IMP | STK */ FPE_FLTSUB, /* 6F - INV | DNML | DZ | OFL | IMP | STK */ FPE_FLTUND, /* 70 - UFL | IMP | STK */ FPE_FLTSUB, /* 71 - INV | UFL | IMP | STK */ FPE_FLTUND, /* 72 - DNML | UFL | IMP | STK */ FPE_FLTSUB, /* 73 - INV | DNML | UFL | IMP | STK */ FPE_FLTDIV, /* 74 - DZ | UFL | IMP | STK */ FPE_FLTSUB, /* 75 - INV | DZ | UFL | IMP | STK */ FPE_FLTDIV, /* 76 - DNML | DZ | UFL | IMP | STK */ FPE_FLTSUB, /* 77 - INV | DNML | DZ | UFL | IMP | STK */ FPE_FLTOVF, /* 78 - OFL | UFL | IMP | STK */ FPE_FLTSUB, /* 79 - INV | OFL | UFL | IMP | STK */ FPE_FLTUND, /* 7A - DNML | OFL | UFL | IMP | STK */ FPE_FLTSUB, /* 7B - INV | DNML | OFL | UFL | IMP | STK */ FPE_FLTDIV, /* 7C - DZ | OFL | UFL | IMP | STK */ FPE_FLTSUB, /* 7D - INV | DZ | OFL | UFL | IMP | STK */ FPE_FLTDIV, /* 7E - DNML | DZ | OFL | UFL | IMP | STK */ FPE_FLTSUB, /* 7F - INV | DNML | DZ | OFL | UFL | IMP | STK */ }; /* * Read the FP status and control words, then generate si_code value * for SIGFPE. The error code chosen will be one of the * FPE_... macros. It will be sent as the second argument to old * BSD-style signal handlers and as "siginfo_t->si_code" (second * argument) to SA_SIGINFO signal handlers. * * Some time ago, we cleared the x87 exceptions with FNCLEX there. * Clearing exceptions was necessary mainly to avoid IRQ13 bugs. The * usermode code which understands the FPU hardware enough to enable * the exceptions, can also handle clearing the exception state in the * handler. The only consequence of not clearing the exception is the * rethrow of the SIGFPE on return from the signal handler and * reexecution of the corresponding instruction. * * For XMM traps, the exceptions were never cleared. */ int fputrap_x87(void) { struct savefpu *pcb_save; u_short control, status; critical_enter(); /* * Interrupt handling (for another interrupt) may have pushed the * state to memory. Fetch the relevant parts of the state from * wherever they are. */ if (PCPU_GET(fpcurthread) != curthread) { pcb_save = curpcb->pcb_save; control = pcb_save->sv_env.en_cw; status = pcb_save->sv_env.en_sw; } else { fnstcw(&control); fnstsw(&status); } critical_exit(); return (fpetable[status & ((~control & 0x3f) | 0x40)]); } int fputrap_sse(void) { u_int mxcsr; critical_enter(); if (PCPU_GET(fpcurthread) != curthread) mxcsr = curpcb->pcb_save->sv_env.en_mxcsr; else stmxcsr(&mxcsr); critical_exit(); return (fpetable[(mxcsr & (~mxcsr >> 7)) & 0x3f]); } /* * Device Not Available (DNA, #NM) exception handler. * * It would be better to switch FP context here (if curthread != * fpcurthread) and not necessarily for every context switch, but it * is too hard to access foreign pcb's. */ void fpudna(void) { /* * This handler is entered with interrupts enabled, so context * switches may occur before critical_enter() is executed. If * a context switch occurs, then when we regain control, our * state will have been completely restored. The CPU may * change underneath us, but the only part of our context that * lives in the CPU is CR0.TS and that will be "restored" by * setting it on the new CPU. */ critical_enter(); KASSERT((curpcb->pcb_flags & PCB_FPUNOSAVE) == 0, ("fpudna while in fpu_kern_enter(FPU_KERN_NOCTX)")); if (PCPU_GET(fpcurthread) == curthread) { printf("fpudna: fpcurthread == curthread\n"); stop_emulating(); critical_exit(); return; } if (PCPU_GET(fpcurthread) != NULL) { panic("fpudna: fpcurthread = %p (%d), curthread = %p (%d)\n", PCPU_GET(fpcurthread), PCPU_GET(fpcurthread)->td_tid, curthread, curthread->td_tid); } stop_emulating(); /* * Record new context early in case frstor causes a trap. */ PCPU_SET(fpcurthread, curthread); fpu_clean_state(); if ((curpcb->pcb_flags & PCB_FPUINITDONE) == 0) { /* * This is the first time this thread has used the FPU or * the PCB doesn't contain a clean FPU state. Explicitly * load an initial state. * * We prefer to restore the state from the actual save * area in PCB instead of directly loading from * fpu_initialstate, to ignite the XSAVEOPT * tracking engine. */ bcopy(fpu_initialstate, curpcb->pcb_save, cpu_max_ext_state_size); fpurestore(curpcb->pcb_save); if (curpcb->pcb_initial_fpucw != __INITIAL_FPUCW__) fldcw(curpcb->pcb_initial_fpucw); if (PCB_USER_FPU(curpcb)) set_pcb_flags(curpcb, PCB_FPUINITDONE | PCB_USERFPUINITDONE); else set_pcb_flags(curpcb, PCB_FPUINITDONE); } else fpurestore(curpcb->pcb_save); critical_exit(); } void fpudrop(void) { struct thread *td; td = PCPU_GET(fpcurthread); KASSERT(td == curthread, ("fpudrop: fpcurthread != curthread")); CRITICAL_ASSERT(td); PCPU_SET(fpcurthread, NULL); clear_pcb_flags(td->td_pcb, PCB_FPUINITDONE); start_emulating(); } /* * Get the user state of the FPU into pcb->pcb_user_save without * dropping ownership (if possible). It returns the FPU ownership * status. */ int fpugetregs(struct thread *td) { struct pcb *pcb; uint64_t *xstate_bv, bit; char *sa; int max_ext_n, i, owned; pcb = td->td_pcb; if ((pcb->pcb_flags & PCB_USERFPUINITDONE) == 0) { bcopy(fpu_initialstate, get_pcb_user_save_pcb(pcb), cpu_max_ext_state_size); get_pcb_user_save_pcb(pcb)->sv_env.en_cw = pcb->pcb_initial_fpucw; fpuuserinited(td); return (_MC_FPOWNED_PCB); } critical_enter(); if (td == PCPU_GET(fpcurthread) && PCB_USER_FPU(pcb)) { fpusave(get_pcb_user_save_pcb(pcb)); owned = _MC_FPOWNED_FPU; } else { owned = _MC_FPOWNED_PCB; } critical_exit(); if (use_xsave) { /* * Handle partially saved state. */ sa = (char *)get_pcb_user_save_pcb(pcb); xstate_bv = (uint64_t *)(sa + sizeof(struct savefpu) + offsetof(struct xstate_hdr, xstate_bv)); max_ext_n = flsl(xsave_mask); for (i = 0; i < max_ext_n; i++) { bit = 1ULL << i; if ((xsave_mask & bit) == 0 || (*xstate_bv & bit) != 0) continue; bcopy((char *)fpu_initialstate + xsave_area_desc[i].offset, sa + xsave_area_desc[i].offset, xsave_area_desc[i].size); *xstate_bv |= bit; } } return (owned); } void fpuuserinited(struct thread *td) { struct pcb *pcb; pcb = td->td_pcb; if (PCB_USER_FPU(pcb)) set_pcb_flags(pcb, PCB_FPUINITDONE | PCB_USERFPUINITDONE); else set_pcb_flags(pcb, PCB_FPUINITDONE); } int fpusetxstate(struct thread *td, char *xfpustate, size_t xfpustate_size) { struct xstate_hdr *hdr, *ehdr; size_t len, max_len; uint64_t bv; /* XXXKIB should we clear all extended state in xstate_bv instead ? */ if (xfpustate == NULL) return (0); if (!use_xsave) return (EOPNOTSUPP); len = xfpustate_size; if (len < sizeof(struct xstate_hdr)) return (EINVAL); max_len = cpu_max_ext_state_size - sizeof(struct savefpu); if (len > max_len) return (EINVAL); ehdr = (struct xstate_hdr *)xfpustate; bv = ehdr->xstate_bv; /* * Avoid #gp. */ if (bv & ~xsave_mask) return (EINVAL); hdr = (struct xstate_hdr *)(get_pcb_user_save_td(td) + 1); hdr->xstate_bv = bv; bcopy(xfpustate + sizeof(struct xstate_hdr), (char *)(hdr + 1), len - sizeof(struct xstate_hdr)); return (0); } /* * Set the state of the FPU. */ int fpusetregs(struct thread *td, struct savefpu *addr, char *xfpustate, size_t xfpustate_size) { struct pcb *pcb; int error; addr->sv_env.en_mxcsr &= cpu_mxcsr_mask; pcb = td->td_pcb; critical_enter(); if (td == PCPU_GET(fpcurthread) && PCB_USER_FPU(pcb)) { error = fpusetxstate(td, xfpustate, xfpustate_size); if (error != 0) { critical_exit(); return (error); } bcopy(addr, get_pcb_user_save_td(td), sizeof(*addr)); fpurestore(get_pcb_user_save_td(td)); critical_exit(); set_pcb_flags(pcb, PCB_FPUINITDONE | PCB_USERFPUINITDONE); } else { critical_exit(); error = fpusetxstate(td, xfpustate, xfpustate_size); if (error != 0) return (error); bcopy(addr, get_pcb_user_save_td(td), sizeof(*addr)); fpuuserinited(td); } return (0); } /* * On AuthenticAMD processors, the fxrstor instruction does not restore * the x87's stored last instruction pointer, last data pointer, and last * opcode values, except in the rare case in which the exception summary * (ES) bit in the x87 status word is set to 1. * * In order to avoid leaking this information across processes, we clean * these values by performing a dummy load before executing fxrstor(). */ static void fpu_clean_state(void) { static float dummy_variable = 0.0; u_short status; /* * Clear the ES bit in the x87 status word if it is currently * set, in order to avoid causing a fault in the upcoming load. */ fnstsw(&status); if (status & 0x80) fnclex(); /* * Load the dummy variable into the x87 stack. This mangles * the x87 stack, but we don't care since we're about to call * fxrstor() anyway. */ __asm __volatile("ffree %%st(7); flds %0" : : "m" (dummy_variable)); } /* * This really sucks. We want the acpi version only, but it requires * the isa_if.h file in order to get the definitions. */ #include "opt_isa.h" #ifdef DEV_ISA #include /* * This sucks up the legacy ISA support assignments from PNPBIOS/ACPI. */ static struct isa_pnp_id fpupnp_ids[] = { { 0x040cd041, "Legacy ISA coprocessor support" }, /* PNP0C04 */ { 0 } }; static int fpupnp_probe(device_t dev) { int result; result = ISA_PNP_PROBE(device_get_parent(dev), dev, fpupnp_ids); if (result <= 0) device_quiet(dev); return (result); } static int fpupnp_attach(device_t dev) { return (0); } static device_method_t fpupnp_methods[] = { /* Device interface */ DEVMETHOD(device_probe, fpupnp_probe), DEVMETHOD(device_attach, fpupnp_attach), DEVMETHOD(device_detach, bus_generic_detach), DEVMETHOD(device_shutdown, bus_generic_shutdown), DEVMETHOD(device_suspend, bus_generic_suspend), DEVMETHOD(device_resume, bus_generic_resume), { 0, 0 } }; static driver_t fpupnp_driver = { "fpupnp", fpupnp_methods, 1, /* no softc */ }; static devclass_t fpupnp_devclass; DRIVER_MODULE(fpupnp, acpi, fpupnp_driver, fpupnp_devclass, 0, 0); ISA_PNP_INFO(fpupnp_ids); #endif /* DEV_ISA */ static MALLOC_DEFINE(M_FPUKERN_CTX, "fpukern_ctx", "Kernel contexts for FPU state"); #define FPU_KERN_CTX_FPUINITDONE 0x01 #define FPU_KERN_CTX_DUMMY 0x02 /* avoided save for the kern thread */ #define FPU_KERN_CTX_INUSE 0x04 struct fpu_kern_ctx { struct savefpu *prev; uint32_t flags; char hwstate1[]; }; struct fpu_kern_ctx * fpu_kern_alloc_ctx(u_int flags) { struct fpu_kern_ctx *res; size_t sz; sz = sizeof(struct fpu_kern_ctx) + XSAVE_AREA_ALIGN + cpu_max_ext_state_size; res = malloc(sz, M_FPUKERN_CTX, ((flags & FPU_KERN_NOWAIT) ? M_NOWAIT : M_WAITOK) | M_ZERO); return (res); } void fpu_kern_free_ctx(struct fpu_kern_ctx *ctx) { KASSERT((ctx->flags & FPU_KERN_CTX_INUSE) == 0, ("free'ing inuse ctx")); /* XXXKIB clear the memory ? */ free(ctx, M_FPUKERN_CTX); } static struct savefpu * fpu_kern_ctx_savefpu(struct fpu_kern_ctx *ctx) { vm_offset_t p; p = (vm_offset_t)&ctx->hwstate1; p = roundup2(p, XSAVE_AREA_ALIGN); return ((struct savefpu *)p); } -int +void fpu_kern_enter(struct thread *td, struct fpu_kern_ctx *ctx, u_int flags) { struct pcb *pcb; pcb = td->td_pcb; KASSERT((flags & FPU_KERN_NOCTX) != 0 || ctx != NULL, ("ctx is required when !FPU_KERN_NOCTX")); KASSERT(ctx == NULL || (ctx->flags & FPU_KERN_CTX_INUSE) == 0, ("using inuse ctx")); KASSERT((pcb->pcb_flags & PCB_FPUNOSAVE) == 0, ("recursive fpu_kern_enter while in PCB_FPUNOSAVE state")); if ((flags & FPU_KERN_NOCTX) != 0) { critical_enter(); stop_emulating(); if (curthread == PCPU_GET(fpcurthread)) { fpusave(curpcb->pcb_save); PCPU_SET(fpcurthread, NULL); } else { KASSERT(PCPU_GET(fpcurthread) == NULL, ("invalid fpcurthread")); } /* * This breaks XSAVEOPT tracker, but * PCB_FPUNOSAVE state is supposed to never need to * save FPU context at all. */ fpurestore(fpu_initialstate); set_pcb_flags(pcb, PCB_KERNFPU | PCB_FPUNOSAVE | PCB_FPUINITDONE); - return (0); + return; } if ((flags & FPU_KERN_KTHR) != 0 && is_fpu_kern_thread(0)) { ctx->flags = FPU_KERN_CTX_DUMMY | FPU_KERN_CTX_INUSE; - return (0); + return; } KASSERT(!PCB_USER_FPU(pcb) || pcb->pcb_save == get_pcb_user_save_pcb(pcb), ("mangled pcb_save")); ctx->flags = FPU_KERN_CTX_INUSE; if ((pcb->pcb_flags & PCB_FPUINITDONE) != 0) ctx->flags |= FPU_KERN_CTX_FPUINITDONE; fpuexit(td); ctx->prev = pcb->pcb_save; pcb->pcb_save = fpu_kern_ctx_savefpu(ctx); set_pcb_flags(pcb, PCB_KERNFPU); clear_pcb_flags(pcb, PCB_FPUINITDONE); - return (0); + return; } int fpu_kern_leave(struct thread *td, struct fpu_kern_ctx *ctx) { struct pcb *pcb; pcb = td->td_pcb; if ((pcb->pcb_flags & PCB_FPUNOSAVE) != 0) { KASSERT(ctx == NULL, ("non-null ctx after FPU_KERN_NOCTX")); KASSERT(PCPU_GET(fpcurthread) == NULL, ("non-NULL fpcurthread for PCB_FPUNOSAVE")); CRITICAL_ASSERT(td); clear_pcb_flags(pcb, PCB_FPUNOSAVE | PCB_FPUINITDONE); start_emulating(); critical_exit(); } else { KASSERT((ctx->flags & FPU_KERN_CTX_INUSE) != 0, ("leaving not inuse ctx")); ctx->flags &= ~FPU_KERN_CTX_INUSE; if (is_fpu_kern_thread(0) && (ctx->flags & FPU_KERN_CTX_DUMMY) != 0) return (0); KASSERT((ctx->flags & FPU_KERN_CTX_DUMMY) == 0, ("dummy ctx")); critical_enter(); if (curthread == PCPU_GET(fpcurthread)) fpudrop(); critical_exit(); pcb->pcb_save = ctx->prev; } if (pcb->pcb_save == get_pcb_user_save_pcb(pcb)) { if ((pcb->pcb_flags & PCB_USERFPUINITDONE) != 0) { set_pcb_flags(pcb, PCB_FPUINITDONE); clear_pcb_flags(pcb, PCB_KERNFPU); } else clear_pcb_flags(pcb, PCB_FPUINITDONE | PCB_KERNFPU); } else { if ((ctx->flags & FPU_KERN_CTX_FPUINITDONE) != 0) set_pcb_flags(pcb, PCB_FPUINITDONE); else clear_pcb_flags(pcb, PCB_FPUINITDONE); KASSERT(!PCB_USER_FPU(pcb), ("unpaired fpu_kern_leave")); } return (0); } int fpu_kern_thread(u_int flags) { KASSERT((curthread->td_pflags & TDP_KTHREAD) != 0, ("Only kthread may use fpu_kern_thread")); KASSERT(curpcb->pcb_save == get_pcb_user_save_pcb(curpcb), ("mangled pcb_save")); KASSERT(PCB_USER_FPU(curpcb), ("recursive call")); set_pcb_flags(curpcb, PCB_KERNFPU); return (0); } int is_fpu_kern_thread(u_int flags) { if ((curthread->td_pflags & TDP_KTHREAD) == 0) return (0); return ((curpcb->pcb_flags & PCB_KERNFPU) != 0); } /* * FPU save area alloc/free/init utility routines */ struct savefpu * fpu_save_area_alloc(void) { return (uma_zalloc(fpu_save_area_zone, 0)); } void fpu_save_area_free(struct savefpu *fsa) { uma_zfree(fpu_save_area_zone, fsa); } void fpu_save_area_reset(struct savefpu *fsa) { bcopy(fpu_initialstate, fsa, cpu_max_ext_state_size); } Index: head/sys/amd64/include/fpu.h =================================================================== --- head/sys/amd64/include/fpu.h (revision 329877) +++ head/sys/amd64/include/fpu.h (revision 329878) @@ -1,95 +1,95 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1990 The Regents of the University of California. * All rights reserved. * * This code is derived from software contributed to Berkeley by * William Jolitz. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: @(#)npx.h 5.3 (Berkeley) 1/18/91 * $FreeBSD$ */ /* * Floating Point Data Structures and Constants * W. Jolitz 1/90 */ #ifndef _MACHINE_FPU_H_ #define _MACHINE_FPU_H_ #include #ifdef _KERNEL struct fpu_kern_ctx; #define PCB_USER_FPU(pcb) (((pcb)->pcb_flags & PCB_KERNFPU) == 0) #define XSAVE_AREA_ALIGN 64 void fpudna(void); void fpudrop(void); void fpuexit(struct thread *td); int fpuformat(void); int fpugetregs(struct thread *td); void fpuinit(void); void fpurestore(void *addr); void fpuresume(void *addr); void fpusave(void *addr); int fpusetregs(struct thread *td, struct savefpu *addr, char *xfpustate, size_t xfpustate_size); int fpusetxstate(struct thread *td, char *xfpustate, size_t xfpustate_size); void fpususpend(void *addr); int fputrap_sse(void); int fputrap_x87(void); void fpuuserinited(struct thread *td); struct fpu_kern_ctx *fpu_kern_alloc_ctx(u_int flags); void fpu_kern_free_ctx(struct fpu_kern_ctx *ctx); -int fpu_kern_enter(struct thread *td, struct fpu_kern_ctx *ctx, +void fpu_kern_enter(struct thread *td, struct fpu_kern_ctx *ctx, u_int flags); int fpu_kern_leave(struct thread *td, struct fpu_kern_ctx *ctx); int fpu_kern_thread(u_int flags); int is_fpu_kern_thread(u_int flags); struct savefpu *fpu_save_area_alloc(void); void fpu_save_area_free(struct savefpu *fsa); void fpu_save_area_reset(struct savefpu *fsa); /* * Flags for fpu_kern_alloc_ctx(), fpu_kern_enter() and fpu_kern_thread(). */ #define FPU_KERN_NORMAL 0x0000 #define FPU_KERN_NOWAIT 0x0001 #define FPU_KERN_KTHR 0x0002 #define FPU_KERN_NOCTX 0x0004 #endif #endif /* !_MACHINE_FPU_H_ */ Index: head/sys/arm64/arm64/vfp.c =================================================================== --- head/sys/arm64/arm64/vfp.c (revision 329877) +++ head/sys/arm64/arm64/vfp.c (revision 329878) @@ -1,373 +1,373 @@ /*- * Copyright (c) 2015-2016 The FreeBSD Foundation * All rights reserved. * * This software was developed by Andrew Turner under * sponsorship from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #ifdef VFP #include #include #include #include #include #include #include #include #include /* Sanity check we can store all the VFP registers */ CTASSERT(sizeof(((struct pcb *)0)->pcb_fpustate.vfp_regs) == 16 * 32); static MALLOC_DEFINE(M_FPUKERN_CTX, "fpukern_ctx", "Kernel contexts for VFP state"); struct fpu_kern_ctx { struct vfpstate *prev; #define FPU_KERN_CTX_DUMMY 0x01 /* avoided save for the kern thread */ #define FPU_KERN_CTX_INUSE 0x02 uint32_t flags; struct vfpstate state; }; static void vfp_enable(void) { uint32_t cpacr; cpacr = READ_SPECIALREG(cpacr_el1); cpacr = (cpacr & ~CPACR_FPEN_MASK) | CPACR_FPEN_TRAP_NONE; WRITE_SPECIALREG(cpacr_el1, cpacr); isb(); } static void vfp_disable(void) { uint32_t cpacr; cpacr = READ_SPECIALREG(cpacr_el1); cpacr = (cpacr & ~CPACR_FPEN_MASK) | CPACR_FPEN_TRAP_ALL1; WRITE_SPECIALREG(cpacr_el1, cpacr); isb(); } /* * Called when the thread is dying or when discarding the kernel VFP state. * If the thread was the last to use the VFP unit mark it as unused to tell * the kernel the fp state is unowned. Ensure the VFP unit is off so we get * an exception on the next access. */ void vfp_discard(struct thread *td) { #ifdef INVARIANTS if (td != NULL) CRITICAL_ASSERT(td); #endif if (PCPU_GET(fpcurthread) == td) PCPU_SET(fpcurthread, NULL); vfp_disable(); } static void vfp_store(struct vfpstate *state) { __int128_t *vfp_state; uint64_t fpcr, fpsr; vfp_state = state->vfp_regs; __asm __volatile( "mrs %0, fpcr \n" "mrs %1, fpsr \n" "stp q0, q1, [%2, #16 * 0]\n" "stp q2, q3, [%2, #16 * 2]\n" "stp q4, q5, [%2, #16 * 4]\n" "stp q6, q7, [%2, #16 * 6]\n" "stp q8, q9, [%2, #16 * 8]\n" "stp q10, q11, [%2, #16 * 10]\n" "stp q12, q13, [%2, #16 * 12]\n" "stp q14, q15, [%2, #16 * 14]\n" "stp q16, q17, [%2, #16 * 16]\n" "stp q18, q19, [%2, #16 * 18]\n" "stp q20, q21, [%2, #16 * 20]\n" "stp q22, q23, [%2, #16 * 22]\n" "stp q24, q25, [%2, #16 * 24]\n" "stp q26, q27, [%2, #16 * 26]\n" "stp q28, q29, [%2, #16 * 28]\n" "stp q30, q31, [%2, #16 * 30]\n" : "=&r"(fpcr), "=&r"(fpsr) : "r"(vfp_state)); state->vfp_fpcr = fpcr; state->vfp_fpsr = fpsr; } static void vfp_restore(struct vfpstate *state) { __int128_t *vfp_state; uint64_t fpcr, fpsr; vfp_state = state->vfp_regs; fpcr = state->vfp_fpcr; fpsr = state->vfp_fpsr; __asm __volatile( "ldp q0, q1, [%2, #16 * 0]\n" "ldp q2, q3, [%2, #16 * 2]\n" "ldp q4, q5, [%2, #16 * 4]\n" "ldp q6, q7, [%2, #16 * 6]\n" "ldp q8, q9, [%2, #16 * 8]\n" "ldp q10, q11, [%2, #16 * 10]\n" "ldp q12, q13, [%2, #16 * 12]\n" "ldp q14, q15, [%2, #16 * 14]\n" "ldp q16, q17, [%2, #16 * 16]\n" "ldp q18, q19, [%2, #16 * 18]\n" "ldp q20, q21, [%2, #16 * 20]\n" "ldp q22, q23, [%2, #16 * 22]\n" "ldp q24, q25, [%2, #16 * 24]\n" "ldp q26, q27, [%2, #16 * 26]\n" "ldp q28, q29, [%2, #16 * 28]\n" "ldp q30, q31, [%2, #16 * 30]\n" "msr fpcr, %0 \n" "msr fpsr, %1 \n" : : "r"(fpcr), "r"(fpsr), "r"(vfp_state)); } void vfp_save_state(struct thread *td, struct pcb *pcb) { uint32_t cpacr; KASSERT(pcb != NULL, ("NULL vfp pcb")); KASSERT(td == NULL || td->td_pcb == pcb, ("Invalid vfp pcb")); if (td == NULL) td = curthread; critical_enter(); /* * Only store the registers if the VFP is enabled, * i.e. return if we are trapping on FP access. */ cpacr = READ_SPECIALREG(cpacr_el1); if ((cpacr & CPACR_FPEN_MASK) == CPACR_FPEN_TRAP_NONE) { KASSERT(PCPU_GET(fpcurthread) == td, ("Storing an invalid VFP state")); vfp_store(pcb->pcb_fpusaved); dsb(ish); vfp_disable(); } critical_exit(); } void vfp_restore_state(void) { struct pcb *curpcb; u_int cpu; critical_enter(); cpu = PCPU_GET(cpuid); curpcb = curthread->td_pcb; curpcb->pcb_fpflags |= PCB_FP_STARTED; vfp_enable(); /* * If the previous thread on this cpu to use the VFP was not the * current thread, or the current thread last used it on a different * cpu we need to restore the old state. */ if (PCPU_GET(fpcurthread) != curthread || cpu != curpcb->pcb_vfpcpu) { vfp_restore(curthread->td_pcb->pcb_fpusaved); PCPU_SET(fpcurthread, curthread); curpcb->pcb_vfpcpu = cpu; } critical_exit(); } void vfp_init(void) { uint64_t pfr; /* Check if there is a vfp unit present */ pfr = READ_SPECIALREG(id_aa64pfr0_el1); if ((pfr & ID_AA64PFR0_FP_MASK) == ID_AA64PFR0_FP_NONE) return; /* Disable to be enabled when it's used */ vfp_disable(); } SYSINIT(vfp, SI_SUB_CPU, SI_ORDER_ANY, vfp_init, NULL); struct fpu_kern_ctx * fpu_kern_alloc_ctx(u_int flags) { struct fpu_kern_ctx *res; size_t sz; sz = sizeof(struct fpu_kern_ctx); res = malloc(sz, M_FPUKERN_CTX, ((flags & FPU_KERN_NOWAIT) ? M_NOWAIT : M_WAITOK) | M_ZERO); return (res); } void fpu_kern_free_ctx(struct fpu_kern_ctx *ctx) { KASSERT((ctx->flags & FPU_KERN_CTX_INUSE) == 0, ("free'ing inuse ctx")); /* XXXAndrew clear the memory ? */ free(ctx, M_FPUKERN_CTX); } -int +void fpu_kern_enter(struct thread *td, struct fpu_kern_ctx *ctx, u_int flags) { struct pcb *pcb; pcb = td->td_pcb; KASSERT((flags & FPU_KERN_NOCTX) != 0 || ctx != NULL, ("ctx is required when !FPU_KERN_NOCTX")); KASSERT(ctx == NULL || (ctx->flags & FPU_KERN_CTX_INUSE) == 0, ("using inuse ctx")); KASSERT((pcb->pcb_fpflags & PCB_FP_NOSAVE) == 0, ("recursive fpu_kern_enter while in PCB_FP_NOSAVE state")); if ((flags & FPU_KERN_NOCTX) != 0) { critical_enter(); if (curthread == PCPU_GET(fpcurthread)) { vfp_save_state(curthread, pcb); } PCPU_SET(fpcurthread, NULL); vfp_enable(); pcb->pcb_fpflags |= PCB_FP_KERN | PCB_FP_NOSAVE | PCB_FP_STARTED; - return (0); + return; } if ((flags & FPU_KERN_KTHR) != 0 && is_fpu_kern_thread(0)) { ctx->flags = FPU_KERN_CTX_DUMMY | FPU_KERN_CTX_INUSE; - return (0); + return; } /* * Check either we are already using the VFP in the kernel, or * the the saved state points to the default user space. */ KASSERT((pcb->pcb_fpflags & PCB_FP_KERN) != 0 || pcb->pcb_fpusaved == &pcb->pcb_fpustate, ("Mangled pcb_fpusaved %x %p %p", pcb->pcb_fpflags, pcb->pcb_fpusaved, &pcb->pcb_fpustate)); ctx->flags = FPU_KERN_CTX_INUSE; vfp_save_state(curthread, pcb); ctx->prev = pcb->pcb_fpusaved; pcb->pcb_fpusaved = &ctx->state; pcb->pcb_fpflags |= PCB_FP_KERN; pcb->pcb_fpflags &= ~PCB_FP_STARTED; - return (0); + return; } int fpu_kern_leave(struct thread *td, struct fpu_kern_ctx *ctx) { struct pcb *pcb; pcb = td->td_pcb; if ((pcb->pcb_fpflags & PCB_FP_NOSAVE) != 0) { KASSERT(ctx == NULL, ("non-null ctx after FPU_KERN_NOCTX")); KASSERT(PCPU_GET(fpcurthread) == NULL, ("non-NULL fpcurthread for PCB_FP_NOSAVE")); CRITICAL_ASSERT(td); vfp_disable(); pcb->pcb_fpflags &= ~(PCB_FP_NOSAVE | PCB_FP_STARTED); critical_exit(); } else { KASSERT((ctx->flags & FPU_KERN_CTX_INUSE) != 0, ("FPU context not inuse")); ctx->flags &= ~FPU_KERN_CTX_INUSE; if (is_fpu_kern_thread(0) && (ctx->flags & FPU_KERN_CTX_DUMMY) != 0) return (0); KASSERT((ctx->flags & FPU_KERN_CTX_DUMMY) == 0, ("dummy ctx")); critical_enter(); vfp_discard(td); critical_exit(); pcb->pcb_fpflags &= ~PCB_FP_STARTED; pcb->pcb_fpusaved = ctx->prev; } if (pcb->pcb_fpusaved == &pcb->pcb_fpustate) { pcb->pcb_fpflags &= ~PCB_FP_KERN; } else { KASSERT((pcb->pcb_fpflags & PCB_FP_KERN) != 0, ("unpaired fpu_kern_leave")); } return (0); } int fpu_kern_thread(u_int flags) { struct pcb *pcb = curthread->td_pcb; KASSERT((curthread->td_pflags & TDP_KTHREAD) != 0, ("Only kthread may use fpu_kern_thread")); KASSERT(pcb->pcb_fpusaved == &pcb->pcb_fpustate, ("Mangled pcb_fpusaved")); KASSERT((pcb->pcb_fpflags & PCB_FP_KERN) == 0, ("Thread already setup for the VFP")); pcb->pcb_fpflags |= PCB_FP_KERN; return (0); } int is_fpu_kern_thread(u_int flags) { struct pcb *curpcb; if ((curthread->td_pflags & TDP_KTHREAD) == 0) return (0); curpcb = curthread->td_pcb; return ((curpcb->pcb_fpflags & PCB_FP_KERN) != 0); } #endif Index: head/sys/arm64/include/vfp.h =================================================================== --- head/sys/arm64/include/vfp.h (revision 329877) +++ head/sys/arm64/include/vfp.h (revision 329878) @@ -1,72 +1,72 @@ /*- * Copyright (c) 2015 The FreeBSD Foundation * All rights reserved. * * This software was developed by Andrew Turner under * sponsorship from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _MACHINE_VFP_H_ #define _MACHINE_VFP_H_ #ifndef LOCORE struct vfpstate { __uint128_t vfp_regs[32]; uint32_t vfp_fpcr; uint32_t vfp_fpsr; }; #ifdef _KERNEL struct pcb; void vfp_init(void); void vfp_discard(struct thread *); void vfp_restore_state(void); void vfp_save_state(struct thread *, struct pcb *); struct fpu_kern_ctx; /* * Flags for fpu_kern_alloc_ctx(), fpu_kern_enter() and fpu_kern_thread(). */ #define FPU_KERN_NORMAL 0x0000 #define FPU_KERN_NOWAIT 0x0001 #define FPU_KERN_KTHR 0x0002 #define FPU_KERN_NOCTX 0x0004 struct fpu_kern_ctx *fpu_kern_alloc_ctx(u_int); void fpu_kern_free_ctx(struct fpu_kern_ctx *); -int fpu_kern_enter(struct thread *, struct fpu_kern_ctx *, u_int); +void fpu_kern_enter(struct thread *, struct fpu_kern_ctx *, u_int); int fpu_kern_leave(struct thread *, struct fpu_kern_ctx *); int fpu_kern_thread(u_int); int is_fpu_kern_thread(u_int); #endif #endif #endif /* !_MACHINE_VFP_H_ */ Index: head/sys/crypto/aesni/aesni.c =================================================================== --- head/sys/crypto/aesni/aesni.c (revision 329877) +++ head/sys/crypto/aesni/aesni.c (revision 329878) @@ -1,959 +1,953 @@ /*- * Copyright (c) 2005-2008 Pawel Jakub Dawidek * Copyright (c) 2010 Konstantin Belousov * Copyright (c) 2014 The FreeBSD Foundation * Copyright (c) 2017 Conrad Meyer * All rights reserved. * * Portions of this software were developed by John-Mark Gurney * under sponsorship of the FreeBSD Foundation and * Rubicon Communications, LLC (Netgate). * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #if defined(__i386__) #include #elif defined(__amd64__) #include #endif static struct mtx_padalign *ctx_mtx; static struct fpu_kern_ctx **ctx_fpu; struct aesni_softc { int dieing; int32_t cid; uint32_t sid; bool has_aes; bool has_sha; TAILQ_HEAD(aesni_sessions_head, aesni_session) sessions; struct rwlock lock; }; #define ACQUIRE_CTX(i, ctx) \ do { \ (i) = PCPU_GET(cpuid); \ mtx_lock(&ctx_mtx[(i)]); \ (ctx) = ctx_fpu[(i)]; \ } while (0) #define RELEASE_CTX(i, ctx) \ do { \ mtx_unlock(&ctx_mtx[(i)]); \ (i) = -1; \ (ctx) = NULL; \ } while (0) static int aesni_newsession(device_t, uint32_t *sidp, struct cryptoini *cri); static int aesni_freesession(device_t, uint64_t tid); static void aesni_freesession_locked(struct aesni_softc *sc, struct aesni_session *ses); static int aesni_cipher_setup(struct aesni_session *ses, struct cryptoini *encini, struct cryptoini *authini); static int aesni_cipher_process(struct aesni_session *ses, struct cryptodesc *enccrd, struct cryptodesc *authcrd, struct cryptop *crp); static int aesni_cipher_crypt(struct aesni_session *ses, struct cryptodesc *enccrd, struct cryptodesc *authcrd, struct cryptop *crp); static int aesni_cipher_mac(struct aesni_session *ses, struct cryptodesc *crd, struct cryptop *crp); MALLOC_DEFINE(M_AESNI, "aesni_data", "AESNI Data"); static void aesni_identify(driver_t *drv, device_t parent) { /* NB: order 10 is so we get attached after h/w devices */ if (device_find_child(parent, "aesni", -1) == NULL && BUS_ADD_CHILD(parent, 10, "aesni", -1) == 0) panic("aesni: could not attach"); } static void detect_cpu_features(bool *has_aes, bool *has_sha) { *has_aes = ((cpu_feature2 & CPUID2_AESNI) != 0 && (cpu_feature2 & CPUID2_SSE41) != 0); *has_sha = ((cpu_stdext_feature & CPUID_STDEXT_SHA) != 0 && (cpu_feature2 & CPUID2_SSSE3) != 0); } static int aesni_probe(device_t dev) { bool has_aes, has_sha; detect_cpu_features(&has_aes, &has_sha); if (!has_aes && !has_sha) { device_printf(dev, "No AES or SHA support.\n"); return (EINVAL); } else if (has_aes && has_sha) device_set_desc(dev, "AES-CBC,AES-XTS,AES-GCM,AES-ICM,SHA1,SHA256"); else if (has_aes) device_set_desc(dev, "AES-CBC,AES-XTS,AES-GCM,AES-ICM"); else device_set_desc(dev, "SHA1,SHA256"); return (0); } static void aesni_cleanctx(void) { int i; /* XXX - no way to return driverid */ CPU_FOREACH(i) { if (ctx_fpu[i] != NULL) { mtx_destroy(&ctx_mtx[i]); fpu_kern_free_ctx(ctx_fpu[i]); } ctx_fpu[i] = NULL; } free(ctx_mtx, M_AESNI); ctx_mtx = NULL; free(ctx_fpu, M_AESNI); ctx_fpu = NULL; } static int aesni_attach(device_t dev) { struct aesni_softc *sc; int i; sc = device_get_softc(dev); sc->dieing = 0; TAILQ_INIT(&sc->sessions); sc->sid = 1; sc->cid = crypto_get_driverid(dev, CRYPTOCAP_F_HARDWARE | CRYPTOCAP_F_SYNC); if (sc->cid < 0) { device_printf(dev, "Could not get crypto driver id.\n"); return (ENOMEM); } ctx_mtx = malloc(sizeof *ctx_mtx * (mp_maxid + 1), M_AESNI, M_WAITOK|M_ZERO); ctx_fpu = malloc(sizeof *ctx_fpu * (mp_maxid + 1), M_AESNI, M_WAITOK|M_ZERO); CPU_FOREACH(i) { ctx_fpu[i] = fpu_kern_alloc_ctx(0); mtx_init(&ctx_mtx[i], "anifpumtx", NULL, MTX_DEF|MTX_NEW); } rw_init(&sc->lock, "aesni_lock"); detect_cpu_features(&sc->has_aes, &sc->has_sha); if (sc->has_aes) { crypto_register(sc->cid, CRYPTO_AES_CBC, 0, 0); crypto_register(sc->cid, CRYPTO_AES_ICM, 0, 0); crypto_register(sc->cid, CRYPTO_AES_NIST_GCM_16, 0, 0); crypto_register(sc->cid, CRYPTO_AES_128_NIST_GMAC, 0, 0); crypto_register(sc->cid, CRYPTO_AES_192_NIST_GMAC, 0, 0); crypto_register(sc->cid, CRYPTO_AES_256_NIST_GMAC, 0, 0); crypto_register(sc->cid, CRYPTO_AES_XTS, 0, 0); } if (sc->has_sha) { crypto_register(sc->cid, CRYPTO_SHA1, 0, 0); crypto_register(sc->cid, CRYPTO_SHA1_HMAC, 0, 0); crypto_register(sc->cid, CRYPTO_SHA2_256_HMAC, 0, 0); } return (0); } static int aesni_detach(device_t dev) { struct aesni_softc *sc; struct aesni_session *ses; sc = device_get_softc(dev); rw_wlock(&sc->lock); TAILQ_FOREACH(ses, &sc->sessions, next) { if (ses->used) { rw_wunlock(&sc->lock); device_printf(dev, "Cannot detach, sessions still active.\n"); return (EBUSY); } } sc->dieing = 1; while ((ses = TAILQ_FIRST(&sc->sessions)) != NULL) { TAILQ_REMOVE(&sc->sessions, ses, next); free(ses, M_AESNI); } rw_wunlock(&sc->lock); crypto_unregister_all(sc->cid); rw_destroy(&sc->lock); aesni_cleanctx(); return (0); } static int aesni_newsession(device_t dev, uint32_t *sidp, struct cryptoini *cri) { struct aesni_softc *sc; struct aesni_session *ses; struct cryptoini *encini, *authini; bool gcm_hash, gcm; int error; if (sidp == NULL || cri == NULL) { CRYPTDEB("no sidp or cri"); return (EINVAL); } sc = device_get_softc(dev); if (sc->dieing) return (EINVAL); ses = NULL; authini = NULL; encini = NULL; gcm = false; gcm_hash = false; for (; cri != NULL; cri = cri->cri_next) { switch (cri->cri_alg) { case CRYPTO_AES_NIST_GCM_16: gcm = true; /* FALLTHROUGH */ case CRYPTO_AES_CBC: case CRYPTO_AES_ICM: case CRYPTO_AES_XTS: if (!sc->has_aes) goto unhandled; if (encini != NULL) { CRYPTDEB("encini already set"); return (EINVAL); } encini = cri; break; case CRYPTO_AES_128_NIST_GMAC: case CRYPTO_AES_192_NIST_GMAC: case CRYPTO_AES_256_NIST_GMAC: /* * nothing to do here, maybe in the future cache some * values for GHASH */ gcm_hash = true; break; case CRYPTO_SHA1: case CRYPTO_SHA1_HMAC: case CRYPTO_SHA2_256_HMAC: if (!sc->has_sha) goto unhandled; if (authini != NULL) { CRYPTDEB("authini already set"); return (EINVAL); } authini = cri; break; default: unhandled: CRYPTDEB("unhandled algorithm"); return (EINVAL); } } if (encini == NULL && authini == NULL) { CRYPTDEB("no cipher"); return (EINVAL); } /* * GMAC algorithms are only supported with simultaneous GCM. Likewise * GCM is not supported without GMAC. */ if (gcm_hash != gcm) return (EINVAL); rw_wlock(&sc->lock); if (sc->dieing) { rw_wunlock(&sc->lock); return (EINVAL); } /* * Free sessions goes first, so if first session is used, we need to * allocate one. */ ses = TAILQ_FIRST(&sc->sessions); if (ses == NULL || ses->used) { ses = malloc(sizeof(*ses), M_AESNI, M_NOWAIT | M_ZERO); if (ses == NULL) { rw_wunlock(&sc->lock); return (ENOMEM); } ses->id = sc->sid++; } else { TAILQ_REMOVE(&sc->sessions, ses, next); } ses->used = 1; TAILQ_INSERT_TAIL(&sc->sessions, ses, next); rw_wunlock(&sc->lock); if (encini != NULL) ses->algo = encini->cri_alg; if (authini != NULL) ses->auth_algo = authini->cri_alg; error = aesni_cipher_setup(ses, encini, authini); if (error != 0) { CRYPTDEB("setup failed"); rw_wlock(&sc->lock); aesni_freesession_locked(sc, ses); rw_wunlock(&sc->lock); return (error); } *sidp = ses->id; return (0); } static void aesni_freesession_locked(struct aesni_softc *sc, struct aesni_session *ses) { uint32_t sid; rw_assert(&sc->lock, RA_WLOCKED); sid = ses->id; TAILQ_REMOVE(&sc->sessions, ses, next); explicit_bzero(ses, sizeof(*ses)); ses->id = sid; TAILQ_INSERT_HEAD(&sc->sessions, ses, next); } static int aesni_freesession(device_t dev, uint64_t tid) { struct aesni_softc *sc; struct aesni_session *ses; uint32_t sid; sc = device_get_softc(dev); sid = ((uint32_t)tid) & 0xffffffff; rw_wlock(&sc->lock); TAILQ_FOREACH_REVERSE(ses, &sc->sessions, aesni_sessions_head, next) { if (ses->id == sid) break; } if (ses == NULL) { rw_wunlock(&sc->lock); return (EINVAL); } aesni_freesession_locked(sc, ses); rw_wunlock(&sc->lock); return (0); } static int aesni_process(device_t dev, struct cryptop *crp, int hint __unused) { struct aesni_softc *sc = device_get_softc(dev); struct aesni_session *ses = NULL; struct cryptodesc *crd, *enccrd, *authcrd; int error, needauth; error = 0; enccrd = NULL; authcrd = NULL; needauth = 0; /* Sanity check. */ if (crp == NULL) return (EINVAL); if (crp->crp_callback == NULL || crp->crp_desc == NULL) { error = EINVAL; goto out; } for (crd = crp->crp_desc; crd != NULL; crd = crd->crd_next) { switch (crd->crd_alg) { case CRYPTO_AES_NIST_GCM_16: needauth = 1; /* FALLTHROUGH */ case CRYPTO_AES_CBC: case CRYPTO_AES_ICM: case CRYPTO_AES_XTS: if (enccrd != NULL) { error = EINVAL; goto out; } enccrd = crd; break; case CRYPTO_AES_128_NIST_GMAC: case CRYPTO_AES_192_NIST_GMAC: case CRYPTO_AES_256_NIST_GMAC: case CRYPTO_SHA1: case CRYPTO_SHA1_HMAC: case CRYPTO_SHA2_256_HMAC: if (authcrd != NULL) { error = EINVAL; goto out; } authcrd = crd; break; default: error = EINVAL; goto out; } } if ((enccrd == NULL && authcrd == NULL) || (needauth && authcrd == NULL)) { error = EINVAL; goto out; } /* CBC & XTS can only handle full blocks for now */ if (enccrd != NULL && (enccrd->crd_alg == CRYPTO_AES_CBC || enccrd->crd_alg == CRYPTO_AES_XTS) && (enccrd->crd_len % AES_BLOCK_LEN) != 0) { error = EINVAL; goto out; } rw_rlock(&sc->lock); TAILQ_FOREACH_REVERSE(ses, &sc->sessions, aesni_sessions_head, next) { if (ses->id == (crp->crp_sid & 0xffffffff)) break; } rw_runlock(&sc->lock); if (ses == NULL) { error = EINVAL; goto out; } error = aesni_cipher_process(ses, enccrd, authcrd, crp); if (error != 0) goto out; out: crp->crp_etype = error; crypto_done(crp); return (error); } static uint8_t * aesni_cipher_alloc(struct cryptodesc *enccrd, struct cryptop *crp, bool *allocated) { struct mbuf *m; struct uio *uio; struct iovec *iov; uint8_t *addr; if (crp->crp_flags & CRYPTO_F_IMBUF) { m = (struct mbuf *)crp->crp_buf; if (m->m_next != NULL) goto alloc; addr = mtod(m, uint8_t *); } else if (crp->crp_flags & CRYPTO_F_IOV) { uio = (struct uio *)crp->crp_buf; if (uio->uio_iovcnt != 1) goto alloc; iov = uio->uio_iov; addr = (uint8_t *)iov->iov_base; } else addr = (uint8_t *)crp->crp_buf; *allocated = false; addr += enccrd->crd_skip; return (addr); alloc: addr = malloc(enccrd->crd_len, M_AESNI, M_NOWAIT); if (addr != NULL) { *allocated = true; crypto_copydata(crp->crp_flags, crp->crp_buf, enccrd->crd_skip, enccrd->crd_len, addr); } else *allocated = false; return (addr); } static device_method_t aesni_methods[] = { DEVMETHOD(device_identify, aesni_identify), DEVMETHOD(device_probe, aesni_probe), DEVMETHOD(device_attach, aesni_attach), DEVMETHOD(device_detach, aesni_detach), DEVMETHOD(cryptodev_newsession, aesni_newsession), DEVMETHOD(cryptodev_freesession, aesni_freesession), DEVMETHOD(cryptodev_process, aesni_process), {0, 0}, }; static driver_t aesni_driver = { "aesni", aesni_methods, sizeof(struct aesni_softc), }; static devclass_t aesni_devclass; DRIVER_MODULE(aesni, nexus, aesni_driver, aesni_devclass, 0, 0); MODULE_VERSION(aesni, 1); MODULE_DEPEND(aesni, crypto, 1, 1, 1); static int aesni_cipher_setup(struct aesni_session *ses, struct cryptoini *encini, struct cryptoini *authini) { struct fpu_kern_ctx *ctx; int kt, ctxidx, keylen, error; switch (ses->auth_algo) { case CRYPTO_SHA1: case CRYPTO_SHA1_HMAC: case CRYPTO_SHA2_256_HMAC: if (authini->cri_klen % 8 != 0) return (EINVAL); keylen = authini->cri_klen / 8; if (keylen > sizeof(ses->hmac_key)) return (EINVAL); if (ses->auth_algo == CRYPTO_SHA1 && keylen > 0) return (EINVAL); memcpy(ses->hmac_key, authini->cri_key, keylen); ses->mlen = authini->cri_mlen; } kt = is_fpu_kern_thread(0) || (encini == NULL); if (!kt) { ACQUIRE_CTX(ctxidx, ctx); - error = fpu_kern_enter(curthread, ctx, + fpu_kern_enter(curthread, ctx, FPU_KERN_NORMAL | FPU_KERN_KTHR); - if (error != 0) - goto out; } error = 0; if (encini != NULL) error = aesni_cipher_setup_common(ses, encini->cri_key, encini->cri_klen); if (!kt) { fpu_kern_leave(curthread, ctx); -out: RELEASE_CTX(ctxidx, ctx); } return (error); } static int intel_sha1_update(void *vctx, const void *vdata, u_int datalen) { struct sha1_ctxt *ctx = vctx; const char *data = vdata; size_t gaplen; size_t gapstart; size_t off; size_t copysiz; u_int blocks; off = 0; /* Do any aligned blocks without redundant copying. */ if (datalen >= 64 && ctx->count % 64 == 0) { blocks = datalen / 64; ctx->c.b64[0] += blocks * 64 * 8; intel_sha1_step(ctx->h.b32, data + off, blocks); off += blocks * 64; } while (off < datalen) { gapstart = ctx->count % 64; gaplen = 64 - gapstart; copysiz = (gaplen < datalen - off) ? gaplen : datalen - off; bcopy(&data[off], &ctx->m.b8[gapstart], copysiz); ctx->count += copysiz; ctx->count %= 64; ctx->c.b64[0] += copysiz * 8; if (ctx->count % 64 == 0) intel_sha1_step(ctx->h.b32, (void *)ctx->m.b8, 1); off += copysiz; } return (0); } static void SHA1_Finalize_fn(void *digest, void *ctx) { sha1_result(ctx, digest); } static int intel_sha256_update(void *vctx, const void *vdata, u_int len) { SHA256_CTX *ctx = vctx; uint64_t bitlen; uint32_t r; u_int blocks; const unsigned char *src = vdata; /* Number of bytes left in the buffer from previous updates */ r = (ctx->count >> 3) & 0x3f; /* Convert the length into a number of bits */ bitlen = len << 3; /* Update number of bits */ ctx->count += bitlen; /* Handle the case where we don't need to perform any transforms */ if (len < 64 - r) { memcpy(&ctx->buf[r], src, len); return (0); } /* Finish the current block */ memcpy(&ctx->buf[r], src, 64 - r); intel_sha256_step(ctx->state, ctx->buf, 1); src += 64 - r; len -= 64 - r; /* Perform complete blocks */ if (len >= 64) { blocks = len / 64; intel_sha256_step(ctx->state, src, blocks); src += blocks * 64; len -= blocks * 64; } /* Copy left over data into buffer */ memcpy(ctx->buf, src, len); return (0); } static void SHA256_Finalize_fn(void *digest, void *ctx) { SHA256_Final(digest, ctx); } /* * Compute the HASH( (key ^ xorbyte) || buf ) */ static void hmac_internal(void *ctx, uint32_t *res, int (*update)(void *, const void *, u_int), void (*finalize)(void *, void *), uint8_t *key, uint8_t xorbyte, const void *buf, size_t off, size_t buflen, int crpflags) { size_t i; for (i = 0; i < 64; i++) key[i] ^= xorbyte; update(ctx, key, 64); for (i = 0; i < 64; i++) key[i] ^= xorbyte; crypto_apply(crpflags, __DECONST(void *, buf), off, buflen, __DECONST(int (*)(void *, void *, u_int), update), ctx); finalize(res, ctx); } static int aesni_cipher_process(struct aesni_session *ses, struct cryptodesc *enccrd, struct cryptodesc *authcrd, struct cryptop *crp) { struct fpu_kern_ctx *ctx; int error, ctxidx; bool kt; if (enccrd != NULL) { if ((enccrd->crd_alg == CRYPTO_AES_ICM || enccrd->crd_alg == CRYPTO_AES_NIST_GCM_16) && (enccrd->crd_flags & CRD_F_IV_EXPLICIT) == 0) return (EINVAL); } ctx = NULL; ctxidx = 0; error = 0; kt = is_fpu_kern_thread(0); if (!kt) { ACQUIRE_CTX(ctxidx, ctx); - error = fpu_kern_enter(curthread, ctx, + fpu_kern_enter(curthread, ctx, FPU_KERN_NORMAL | FPU_KERN_KTHR); - if (error != 0) - goto out2; } /* Do work */ if (enccrd != NULL && authcrd != NULL) { /* Perform the first operation */ if (crp->crp_desc == enccrd) error = aesni_cipher_crypt(ses, enccrd, authcrd, crp); else error = aesni_cipher_mac(ses, authcrd, crp); if (error != 0) goto out; /* Perform the second operation */ if (crp->crp_desc == enccrd) error = aesni_cipher_mac(ses, authcrd, crp); else error = aesni_cipher_crypt(ses, enccrd, authcrd, crp); } else if (enccrd != NULL) error = aesni_cipher_crypt(ses, enccrd, authcrd, crp); else error = aesni_cipher_mac(ses, authcrd, crp); if (error != 0) goto out; out: if (!kt) { fpu_kern_leave(curthread, ctx); -out2: RELEASE_CTX(ctxidx, ctx); } return (error); } static int aesni_cipher_crypt(struct aesni_session *ses, struct cryptodesc *enccrd, struct cryptodesc *authcrd, struct cryptop *crp) { uint8_t iv[AES_BLOCK_LEN], tag[GMAC_DIGEST_LEN], *buf, *authbuf; int error, ivlen; bool encflag, allocated, authallocated; KASSERT(ses->algo != CRYPTO_AES_NIST_GCM_16 || authcrd != NULL, ("AES_NIST_GCM_16 must include MAC descriptor")); ivlen = 0; authbuf = NULL; buf = aesni_cipher_alloc(enccrd, crp, &allocated); if (buf == NULL) return (ENOMEM); authallocated = false; if (ses->algo == CRYPTO_AES_NIST_GCM_16) { authbuf = aesni_cipher_alloc(authcrd, crp, &authallocated); if (authbuf == NULL) { error = ENOMEM; goto out; } } error = 0; encflag = (enccrd->crd_flags & CRD_F_ENCRYPT) == CRD_F_ENCRYPT; if ((enccrd->crd_flags & CRD_F_KEY_EXPLICIT) != 0) { error = aesni_cipher_setup_common(ses, enccrd->crd_key, enccrd->crd_klen); if (error != 0) goto out; } switch (enccrd->crd_alg) { case CRYPTO_AES_CBC: case CRYPTO_AES_ICM: ivlen = AES_BLOCK_LEN; break; case CRYPTO_AES_XTS: ivlen = 8; break; case CRYPTO_AES_NIST_GCM_16: ivlen = 12; /* should support arbitarily larger */ break; } /* Setup iv */ if (encflag) { if ((enccrd->crd_flags & CRD_F_IV_EXPLICIT) != 0) bcopy(enccrd->crd_iv, iv, ivlen); else arc4rand(iv, ivlen, 0); if ((enccrd->crd_flags & CRD_F_IV_PRESENT) == 0) crypto_copyback(crp->crp_flags, crp->crp_buf, enccrd->crd_inject, ivlen, iv); } else { if ((enccrd->crd_flags & CRD_F_IV_EXPLICIT) != 0) bcopy(enccrd->crd_iv, iv, ivlen); else crypto_copydata(crp->crp_flags, crp->crp_buf, enccrd->crd_inject, ivlen, iv); } switch (ses->algo) { case CRYPTO_AES_CBC: if (encflag) aesni_encrypt_cbc(ses->rounds, ses->enc_schedule, enccrd->crd_len, buf, buf, iv); else aesni_decrypt_cbc(ses->rounds, ses->dec_schedule, enccrd->crd_len, buf, iv); break; case CRYPTO_AES_ICM: /* encryption & decryption are the same */ aesni_encrypt_icm(ses->rounds, ses->enc_schedule, enccrd->crd_len, buf, buf, iv); break; case CRYPTO_AES_XTS: if (encflag) aesni_encrypt_xts(ses->rounds, ses->enc_schedule, ses->xts_schedule, enccrd->crd_len, buf, buf, iv); else aesni_decrypt_xts(ses->rounds, ses->dec_schedule, ses->xts_schedule, enccrd->crd_len, buf, buf, iv); break; case CRYPTO_AES_NIST_GCM_16: if (!encflag) crypto_copydata(crp->crp_flags, crp->crp_buf, authcrd->crd_inject, GMAC_DIGEST_LEN, tag); else bzero(tag, sizeof tag); if (encflag) { AES_GCM_encrypt(buf, buf, authbuf, iv, tag, enccrd->crd_len, authcrd->crd_len, ivlen, ses->enc_schedule, ses->rounds); if (authcrd != NULL) crypto_copyback(crp->crp_flags, crp->crp_buf, authcrd->crd_inject, GMAC_DIGEST_LEN, tag); } else { if (!AES_GCM_decrypt(buf, buf, authbuf, iv, tag, enccrd->crd_len, authcrd->crd_len, ivlen, ses->enc_schedule, ses->rounds)) error = EBADMSG; } break; } out: if (allocated) { explicit_bzero(buf, enccrd->crd_len); free(buf, M_AESNI); } if (authallocated) { explicit_bzero(authbuf, authcrd->crd_len); free(authbuf, M_AESNI); } return (error); } static int aesni_cipher_mac(struct aesni_session *ses, struct cryptodesc *crd, struct cryptop *crp) { union { struct SHA256Context sha2 __aligned(16); struct sha1_ctxt sha1 __aligned(16); } sctx; uint32_t res[SHA2_256_HASH_LEN / sizeof(uint32_t)]; int hashlen; if (crd->crd_flags != 0) return (EINVAL); switch (ses->auth_algo) { case CRYPTO_SHA1_HMAC: hashlen = SHA1_HASH_LEN; /* Inner hash: (K ^ IPAD) || data */ sha1_init(&sctx.sha1); hmac_internal(&sctx.sha1, res, intel_sha1_update, SHA1_Finalize_fn, ses->hmac_key, 0x36, crp->crp_buf, crd->crd_skip, crd->crd_len, crp->crp_flags); /* Outer hash: (K ^ OPAD) || inner hash */ sha1_init(&sctx.sha1); hmac_internal(&sctx.sha1, res, intel_sha1_update, SHA1_Finalize_fn, ses->hmac_key, 0x5C, res, 0, hashlen, 0); break; case CRYPTO_SHA1: hashlen = SHA1_HASH_LEN; sha1_init(&sctx.sha1); crypto_apply(crp->crp_flags, crp->crp_buf, crd->crd_skip, crd->crd_len, __DECONST(int (*)(void *, void *, u_int), intel_sha1_update), &sctx.sha1); sha1_result(&sctx.sha1, (void *)res); break; case CRYPTO_SHA2_256_HMAC: hashlen = SHA2_256_HASH_LEN; /* Inner hash: (K ^ IPAD) || data */ SHA256_Init(&sctx.sha2); hmac_internal(&sctx.sha2, res, intel_sha256_update, SHA256_Finalize_fn, ses->hmac_key, 0x36, crp->crp_buf, crd->crd_skip, crd->crd_len, crp->crp_flags); /* Outer hash: (K ^ OPAD) || inner hash */ SHA256_Init(&sctx.sha2); hmac_internal(&sctx.sha2, res, intel_sha256_update, SHA256_Finalize_fn, ses->hmac_key, 0x5C, res, 0, hashlen, 0); break; default: /* * AES-GMAC authentication is verified while processing the * enccrd */ return (0); } if (ses->mlen != 0 && ses->mlen < hashlen) hashlen = ses->mlen; crypto_copyback(crp->crp_flags, crp->crp_buf, crd->crd_inject, hashlen, (void *)res); return (0); } Index: head/sys/crypto/armv8/armv8_crypto.c =================================================================== --- head/sys/crypto/armv8/armv8_crypto.c (revision 329877) +++ head/sys/crypto/armv8/armv8_crypto.c (revision 329878) @@ -1,565 +1,560 @@ /*- * Copyright (c) 2005-2008 Pawel Jakub Dawidek * Copyright (c) 2010 Konstantin Belousov * Copyright (c) 2014,2016 The FreeBSD Foundation * All rights reserved. * * Portions of this software were developed by John-Mark Gurney * under sponsorship of the FreeBSD Foundation and * Rubicon Communications, LLC (Netgate). * * This software was developed by Andrew Turner under * sponsorship from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * This is based on the aesni code. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include struct armv8_crypto_softc { int dieing; int32_t cid; uint32_t sid; TAILQ_HEAD(armv8_crypto_sessions_head, armv8_crypto_session) sessions; struct rwlock lock; }; static struct mtx *ctx_mtx; static struct fpu_kern_ctx **ctx_vfp; #define AQUIRE_CTX(i, ctx) \ do { \ (i) = PCPU_GET(cpuid); \ mtx_lock(&ctx_mtx[(i)]); \ (ctx) = ctx_vfp[(i)]; \ } while (0) #define RELEASE_CTX(i, ctx) \ do { \ mtx_unlock(&ctx_mtx[(i)]); \ (i) = -1; \ (ctx) = NULL; \ } while (0) static void armv8_crypto_freesession_locked(struct armv8_crypto_softc *, struct armv8_crypto_session *); static int armv8_crypto_cipher_process(struct armv8_crypto_session *, struct cryptodesc *, struct cryptop *); MALLOC_DEFINE(M_ARMV8_CRYPTO, "armv8_crypto", "ARMv8 Crypto Data"); static void armv8_crypto_identify(driver_t *drv, device_t parent) { /* NB: order 10 is so we get attached after h/w devices */ if (device_find_child(parent, "armv8crypto", -1) == NULL && BUS_ADD_CHILD(parent, 10, "armv8crypto", -1) == 0) panic("ARMv8 crypto: could not attach"); } static int armv8_crypto_probe(device_t dev) { uint64_t reg; int ret = ENXIO; reg = READ_SPECIALREG(id_aa64isar0_el1); switch (ID_AA64ISAR0_AES(reg)) { case ID_AA64ISAR0_AES_BASE: case ID_AA64ISAR0_AES_PMULL: ret = 0; break; } device_set_desc_copy(dev, "AES-CBC"); /* TODO: Check more fields as we support more features */ return (ret); } static int armv8_crypto_attach(device_t dev) { struct armv8_crypto_softc *sc; int i; sc = device_get_softc(dev); TAILQ_INIT(&sc->sessions); sc->dieing = 0; sc->sid = 1; sc->cid = crypto_get_driverid(dev, CRYPTOCAP_F_HARDWARE | CRYPTOCAP_F_SYNC); if (sc->cid < 0) { device_printf(dev, "Could not get crypto driver id.\n"); return (ENOMEM); } rw_init(&sc->lock, "armv8crypto"); ctx_mtx = malloc(sizeof(*ctx_mtx) * (mp_maxid + 1), M_ARMV8_CRYPTO, M_WAITOK|M_ZERO); ctx_vfp = malloc(sizeof(*ctx_vfp) * (mp_maxid + 1), M_ARMV8_CRYPTO, M_WAITOK|M_ZERO); CPU_FOREACH(i) { ctx_vfp[i] = fpu_kern_alloc_ctx(0); mtx_init(&ctx_mtx[i], "armv8cryptoctx", NULL, MTX_DEF|MTX_NEW); } crypto_register(sc->cid, CRYPTO_AES_CBC, 0, 0); return (0); } static int armv8_crypto_detach(device_t dev) { struct armv8_crypto_softc *sc; struct armv8_crypto_session *ses; int i; sc = device_get_softc(dev); rw_wlock(&sc->lock); TAILQ_FOREACH(ses, &sc->sessions, next) { if (ses->used) { rw_wunlock(&sc->lock); device_printf(dev, "Cannot detach, sessions still active.\n"); return (EBUSY); } } sc->dieing = 1; while ((ses = TAILQ_FIRST(&sc->sessions)) != NULL) { TAILQ_REMOVE(&sc->sessions, ses, next); free(ses, M_ARMV8_CRYPTO); } rw_wunlock(&sc->lock); crypto_unregister_all(sc->cid); rw_destroy(&sc->lock); CPU_FOREACH(i) { if (ctx_vfp[i] != NULL) { mtx_destroy(&ctx_mtx[i]); fpu_kern_free_ctx(ctx_vfp[i]); } ctx_vfp[i] = NULL; } free(ctx_mtx, M_ARMV8_CRYPTO); ctx_mtx = NULL; free(ctx_vfp, M_ARMV8_CRYPTO); ctx_vfp = NULL; return (0); } static int armv8_crypto_cipher_setup(struct armv8_crypto_session *ses, struct cryptoini *encini) { int i; switch (ses->algo) { case CRYPTO_AES_CBC: switch (encini->cri_klen) { case 128: ses->rounds = AES128_ROUNDS; break; case 192: ses->rounds = AES192_ROUNDS; break; case 256: ses->rounds = AES256_ROUNDS; break; default: CRYPTDEB("invalid CBC/ICM/GCM key length"); return (EINVAL); } break; default: return (EINVAL); } rijndaelKeySetupEnc(ses->enc_schedule, encini->cri_key, encini->cri_klen); rijndaelKeySetupDec(ses->dec_schedule, encini->cri_key, encini->cri_klen); for (i = 0; i < nitems(ses->enc_schedule); i++) { ses->enc_schedule[i] = bswap32(ses->enc_schedule[i]); ses->dec_schedule[i] = bswap32(ses->dec_schedule[i]); } return (0); } static int armv8_crypto_newsession(device_t dev, uint32_t *sidp, struct cryptoini *cri) { struct armv8_crypto_softc *sc; struct armv8_crypto_session *ses; struct cryptoini *encini; int error; if (sidp == NULL || cri == NULL) { CRYPTDEB("no sidp or cri"); return (EINVAL); } sc = device_get_softc(dev); if (sc->dieing) return (EINVAL); ses = NULL; encini = NULL; for (; cri != NULL; cri = cri->cri_next) { switch (cri->cri_alg) { case CRYPTO_AES_CBC: if (encini != NULL) { CRYPTDEB("encini already set"); return (EINVAL); } encini = cri; break; default: CRYPTDEB("unhandled algorithm"); return (EINVAL); } } if (encini == NULL) { CRYPTDEB("no cipher"); return (EINVAL); } rw_wlock(&sc->lock); if (sc->dieing) { rw_wunlock(&sc->lock); return (EINVAL); } /* * Free sessions goes first, so if first session is used, we need to * allocate one. */ ses = TAILQ_FIRST(&sc->sessions); if (ses == NULL || ses->used) { ses = malloc(sizeof(*ses), M_ARMV8_CRYPTO, M_NOWAIT | M_ZERO); if (ses == NULL) { rw_wunlock(&sc->lock); return (ENOMEM); } ses->id = sc->sid++; } else { TAILQ_REMOVE(&sc->sessions, ses, next); } ses->used = 1; TAILQ_INSERT_TAIL(&sc->sessions, ses, next); rw_wunlock(&sc->lock); ses->algo = encini->cri_alg; error = armv8_crypto_cipher_setup(ses, encini); if (error != 0) { CRYPTDEB("setup failed"); rw_wlock(&sc->lock); armv8_crypto_freesession_locked(sc, ses); rw_wunlock(&sc->lock); return (error); } *sidp = ses->id; return (0); } static void armv8_crypto_freesession_locked(struct armv8_crypto_softc *sc, struct armv8_crypto_session *ses) { uint32_t sid; rw_assert(&sc->lock, RA_WLOCKED); sid = ses->id; TAILQ_REMOVE(&sc->sessions, ses, next); *ses = (struct armv8_crypto_session){}; ses->id = sid; TAILQ_INSERT_HEAD(&sc->sessions, ses, next); } static int armv8_crypto_freesession(device_t dev, uint64_t tid) { struct armv8_crypto_softc *sc; struct armv8_crypto_session *ses; uint32_t sid; sc = device_get_softc(dev); sid = ((uint32_t)tid) & 0xffffffff; rw_wlock(&sc->lock); TAILQ_FOREACH_REVERSE(ses, &sc->sessions, armv8_crypto_sessions_head, next) { if (ses->id == sid) break; } if (ses == NULL) { rw_wunlock(&sc->lock); return (EINVAL); } armv8_crypto_freesession_locked(sc, ses); rw_wunlock(&sc->lock); return (0); } static int armv8_crypto_process(device_t dev, struct cryptop *crp, int hint __unused) { struct armv8_crypto_softc *sc = device_get_softc(dev); struct cryptodesc *crd, *enccrd; struct armv8_crypto_session *ses; int error; error = 0; enccrd = NULL; /* Sanity check. */ if (crp == NULL) return (EINVAL); if (crp->crp_callback == NULL || crp->crp_desc == NULL) { error = EINVAL; goto out; } for (crd = crp->crp_desc; crd != NULL; crd = crd->crd_next) { switch (crd->crd_alg) { case CRYPTO_AES_CBC: if (enccrd != NULL) { error = EINVAL; goto out; } enccrd = crd; break; default: error = EINVAL; goto out; } } if (enccrd == NULL) { error = EINVAL; goto out; } /* We can only handle full blocks for now */ if ((enccrd->crd_len % AES_BLOCK_LEN) != 0) { error = EINVAL; goto out; } rw_rlock(&sc->lock); TAILQ_FOREACH_REVERSE(ses, &sc->sessions, armv8_crypto_sessions_head, next) { if (ses->id == (crp->crp_sid & 0xffffffff)) break; } rw_runlock(&sc->lock); if (ses == NULL) { error = EINVAL; goto out; } error = armv8_crypto_cipher_process(ses, enccrd, crp); out: crp->crp_etype = error; crypto_done(crp); return (error); } static uint8_t * armv8_crypto_cipher_alloc(struct cryptodesc *enccrd, struct cryptop *crp, int *allocated) { struct mbuf *m; struct uio *uio; struct iovec *iov; uint8_t *addr; if (crp->crp_flags & CRYPTO_F_IMBUF) { m = (struct mbuf *)crp->crp_buf; if (m->m_next != NULL) goto alloc; addr = mtod(m, uint8_t *); } else if (crp->crp_flags & CRYPTO_F_IOV) { uio = (struct uio *)crp->crp_buf; if (uio->uio_iovcnt != 1) goto alloc; iov = uio->uio_iov; addr = (uint8_t *)iov->iov_base; } else addr = (uint8_t *)crp->crp_buf; *allocated = 0; addr += enccrd->crd_skip; return (addr); alloc: addr = malloc(enccrd->crd_len, M_ARMV8_CRYPTO, M_NOWAIT); if (addr != NULL) { *allocated = 1; crypto_copydata(crp->crp_flags, crp->crp_buf, enccrd->crd_skip, enccrd->crd_len, addr); } else *allocated = 0; return (addr); } static int armv8_crypto_cipher_process(struct armv8_crypto_session *ses, struct cryptodesc *enccrd, struct cryptop *crp) { struct fpu_kern_ctx *ctx; uint8_t *buf; uint8_t iv[AES_BLOCK_LEN]; - int allocated, error, i; + int allocated, i; int encflag, ivlen; int kt; encflag = (enccrd->crd_flags & CRD_F_ENCRYPT) == CRD_F_ENCRYPT; buf = armv8_crypto_cipher_alloc(enccrd, crp, &allocated); if (buf == NULL) return (ENOMEM); - error = 0; - kt = is_fpu_kern_thread(0); if (!kt) { AQUIRE_CTX(i, ctx); - error = fpu_kern_enter(curthread, ctx, + fpu_kern_enter(curthread, ctx, FPU_KERN_NORMAL | FPU_KERN_KTHR); - if (error != 0) - goto out; } if ((enccrd->crd_flags & CRD_F_KEY_EXPLICIT) != 0) { panic("CRD_F_KEY_EXPLICIT"); } switch (enccrd->crd_alg) { case CRYPTO_AES_CBC: ivlen = AES_BLOCK_LEN; break; } /* Setup iv */ if (encflag) { if ((enccrd->crd_flags & CRD_F_IV_EXPLICIT) != 0) bcopy(enccrd->crd_iv, iv, ivlen); else arc4rand(iv, ivlen, 0); if ((enccrd->crd_flags & CRD_F_IV_PRESENT) == 0) crypto_copyback(crp->crp_flags, crp->crp_buf, enccrd->crd_inject, ivlen, iv); } else { if ((enccrd->crd_flags & CRD_F_IV_EXPLICIT) != 0) bcopy(enccrd->crd_iv, iv, ivlen); else crypto_copydata(crp->crp_flags, crp->crp_buf, enccrd->crd_inject, ivlen, iv); } /* Do work */ switch (ses->algo) { case CRYPTO_AES_CBC: if (encflag) armv8_aes_encrypt_cbc(ses->rounds, ses->enc_schedule, enccrd->crd_len, buf, buf, iv); else armv8_aes_decrypt_cbc(ses->rounds, ses->dec_schedule, enccrd->crd_len, buf, iv); break; } if (allocated) crypto_copyback(crp->crp_flags, crp->crp_buf, enccrd->crd_skip, enccrd->crd_len, buf); if (!kt) { fpu_kern_leave(curthread, ctx); -out: RELEASE_CTX(i, ctx); } if (allocated) { bzero(buf, enccrd->crd_len); free(buf, M_ARMV8_CRYPTO); } - return (error); + return (0); } static device_method_t armv8_crypto_methods[] = { DEVMETHOD(device_identify, armv8_crypto_identify), DEVMETHOD(device_probe, armv8_crypto_probe), DEVMETHOD(device_attach, armv8_crypto_attach), DEVMETHOD(device_detach, armv8_crypto_detach), DEVMETHOD(cryptodev_newsession, armv8_crypto_newsession), DEVMETHOD(cryptodev_freesession, armv8_crypto_freesession), DEVMETHOD(cryptodev_process, armv8_crypto_process), DEVMETHOD_END, }; static DEFINE_CLASS_0(armv8crypto, armv8_crypto_driver, armv8_crypto_methods, sizeof(struct armv8_crypto_softc)); static devclass_t armv8_crypto_devclass; DRIVER_MODULE(armv8crypto, nexus, armv8_crypto_driver, armv8_crypto_devclass, 0, 0); Index: head/sys/crypto/via/padlock.c =================================================================== --- head/sys/crypto/via/padlock.c (revision 329877) +++ head/sys/crypto/via/padlock.c (revision 329878) @@ -1,428 +1,426 @@ /*- * Copyright (c) 2005-2008 Pawel Jakub Dawidek * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #if defined(__amd64__) || defined(__i386__) #include #include #include #include #endif #include #include #include #include #include "cryptodev_if.h" /* * Technical documentation about the PadLock engine can be found here: * * http://www.via.com.tw/en/downloads/whitepapers/initiatives/padlock/programming_guide.pdf */ struct padlock_softc { int32_t sc_cid; uint32_t sc_sid; TAILQ_HEAD(padlock_sessions_head, padlock_session) sc_sessions; struct rwlock sc_sessions_lock; }; static int padlock_newsession(device_t, uint32_t *sidp, struct cryptoini *cri); static int padlock_freesession(device_t, uint64_t tid); static void padlock_freesession_one(struct padlock_softc *sc, struct padlock_session *ses, int locked); static int padlock_process(device_t, struct cryptop *crp, int hint __unused); MALLOC_DEFINE(M_PADLOCK, "padlock_data", "PadLock Data"); static void padlock_identify(driver_t *drv, device_t parent) { /* NB: order 10 is so we get attached after h/w devices */ if (device_find_child(parent, "padlock", -1) == NULL && BUS_ADD_CHILD(parent, 10, "padlock", -1) == 0) panic("padlock: could not attach"); } static int padlock_probe(device_t dev) { char capp[256]; #if defined(__amd64__) || defined(__i386__) /* If there is no AES support, we has nothing to do here. */ if (!(via_feature_xcrypt & VIA_HAS_AES)) { device_printf(dev, "No ACE support.\n"); return (EINVAL); } strlcpy(capp, "AES-CBC", sizeof(capp)); #if 0 strlcat(capp, ",AES-EBC", sizeof(capp)); strlcat(capp, ",AES-CFB", sizeof(capp)); strlcat(capp, ",AES-OFB", sizeof(capp)); #endif if (via_feature_xcrypt & VIA_HAS_SHA) { strlcat(capp, ",SHA1", sizeof(capp)); strlcat(capp, ",SHA256", sizeof(capp)); } #if 0 if (via_feature_xcrypt & VIA_HAS_AESCTR) strlcat(capp, ",AES-CTR", sizeof(capp)); if (via_feature_xcrypt & VIA_HAS_MM) strlcat(capp, ",RSA", sizeof(capp)); #endif device_set_desc_copy(dev, capp); return (0); #else return (EINVAL); #endif } static int padlock_attach(device_t dev) { struct padlock_softc *sc = device_get_softc(dev); TAILQ_INIT(&sc->sc_sessions); sc->sc_sid = 1; sc->sc_cid = crypto_get_driverid(dev, CRYPTOCAP_F_HARDWARE); if (sc->sc_cid < 0) { device_printf(dev, "Could not get crypto driver id.\n"); return (ENOMEM); } rw_init(&sc->sc_sessions_lock, "padlock_lock"); crypto_register(sc->sc_cid, CRYPTO_AES_CBC, 0, 0); crypto_register(sc->sc_cid, CRYPTO_MD5_HMAC, 0, 0); crypto_register(sc->sc_cid, CRYPTO_SHA1_HMAC, 0, 0); crypto_register(sc->sc_cid, CRYPTO_RIPEMD160_HMAC, 0, 0); crypto_register(sc->sc_cid, CRYPTO_SHA2_256_HMAC, 0, 0); crypto_register(sc->sc_cid, CRYPTO_SHA2_384_HMAC, 0, 0); crypto_register(sc->sc_cid, CRYPTO_SHA2_512_HMAC, 0, 0); return (0); } static int padlock_detach(device_t dev) { struct padlock_softc *sc = device_get_softc(dev); struct padlock_session *ses; rw_wlock(&sc->sc_sessions_lock); TAILQ_FOREACH(ses, &sc->sc_sessions, ses_next) { if (ses->ses_used) { rw_wunlock(&sc->sc_sessions_lock); device_printf(dev, "Cannot detach, sessions still active.\n"); return (EBUSY); } } while ((ses = TAILQ_FIRST(&sc->sc_sessions)) != NULL) { TAILQ_REMOVE(&sc->sc_sessions, ses, ses_next); fpu_kern_free_ctx(ses->ses_fpu_ctx); free(ses, M_PADLOCK); } rw_destroy(&sc->sc_sessions_lock); crypto_unregister_all(sc->sc_cid); return (0); } static int padlock_newsession(device_t dev, uint32_t *sidp, struct cryptoini *cri) { struct padlock_softc *sc = device_get_softc(dev); struct padlock_session *ses = NULL; struct cryptoini *encini, *macini; struct thread *td; int error; if (sidp == NULL || cri == NULL) return (EINVAL); encini = macini = NULL; for (; cri != NULL; cri = cri->cri_next) { switch (cri->cri_alg) { case CRYPTO_NULL_HMAC: case CRYPTO_MD5_HMAC: case CRYPTO_SHA1_HMAC: case CRYPTO_RIPEMD160_HMAC: case CRYPTO_SHA2_256_HMAC: case CRYPTO_SHA2_384_HMAC: case CRYPTO_SHA2_512_HMAC: if (macini != NULL) return (EINVAL); macini = cri; break; case CRYPTO_AES_CBC: if (encini != NULL) return (EINVAL); encini = cri; break; default: return (EINVAL); } } /* * We only support HMAC algorithms to be able to work with * ipsec(4), so if we are asked only for authentication without * encryption, don't pretend we can accellerate it. */ if (encini == NULL) return (EINVAL); /* * Let's look for a free session structure. */ rw_wlock(&sc->sc_sessions_lock); /* * Free sessions goes first, so if first session is used, we need to * allocate one. */ ses = TAILQ_FIRST(&sc->sc_sessions); if (ses == NULL || ses->ses_used) { ses = malloc(sizeof(*ses), M_PADLOCK, M_NOWAIT | M_ZERO); if (ses == NULL) { rw_wunlock(&sc->sc_sessions_lock); return (ENOMEM); } ses->ses_fpu_ctx = fpu_kern_alloc_ctx(FPU_KERN_NORMAL | FPU_KERN_NOWAIT); if (ses->ses_fpu_ctx == NULL) { free(ses, M_PADLOCK); rw_wunlock(&sc->sc_sessions_lock); return (ENOMEM); } ses->ses_id = sc->sc_sid++; } else { TAILQ_REMOVE(&sc->sc_sessions, ses, ses_next); } ses->ses_used = 1; TAILQ_INSERT_TAIL(&sc->sc_sessions, ses, ses_next); rw_wunlock(&sc->sc_sessions_lock); error = padlock_cipher_setup(ses, encini); if (error != 0) { padlock_freesession_one(sc, ses, 0); return (error); } if (macini != NULL) { td = curthread; - error = fpu_kern_enter(td, ses->ses_fpu_ctx, FPU_KERN_NORMAL | + fpu_kern_enter(td, ses->ses_fpu_ctx, FPU_KERN_NORMAL | FPU_KERN_KTHR); - if (error == 0) { - error = padlock_hash_setup(ses, macini); - fpu_kern_leave(td, ses->ses_fpu_ctx); - } + error = padlock_hash_setup(ses, macini); + fpu_kern_leave(td, ses->ses_fpu_ctx); if (error != 0) { padlock_freesession_one(sc, ses, 0); return (error); } } *sidp = ses->ses_id; return (0); } static void padlock_freesession_one(struct padlock_softc *sc, struct padlock_session *ses, int locked) { struct fpu_kern_ctx *ctx; uint32_t sid = ses->ses_id; if (!locked) rw_wlock(&sc->sc_sessions_lock); TAILQ_REMOVE(&sc->sc_sessions, ses, ses_next); padlock_hash_free(ses); ctx = ses->ses_fpu_ctx; bzero(ses, sizeof(*ses)); ses->ses_used = 0; ses->ses_id = sid; ses->ses_fpu_ctx = ctx; TAILQ_INSERT_HEAD(&sc->sc_sessions, ses, ses_next); if (!locked) rw_wunlock(&sc->sc_sessions_lock); } static int padlock_freesession(device_t dev, uint64_t tid) { struct padlock_softc *sc = device_get_softc(dev); struct padlock_session *ses; uint32_t sid = ((uint32_t)tid) & 0xffffffff; rw_wlock(&sc->sc_sessions_lock); TAILQ_FOREACH_REVERSE(ses, &sc->sc_sessions, padlock_sessions_head, ses_next) { if (ses->ses_id == sid) break; } if (ses == NULL) { rw_wunlock(&sc->sc_sessions_lock); return (EINVAL); } padlock_freesession_one(sc, ses, 1); rw_wunlock(&sc->sc_sessions_lock); return (0); } static int padlock_process(device_t dev, struct cryptop *crp, int hint __unused) { struct padlock_softc *sc = device_get_softc(dev); struct padlock_session *ses = NULL; struct cryptodesc *crd, *enccrd, *maccrd; int error = 0; enccrd = maccrd = NULL; /* Sanity check. */ if (crp == NULL) return (EINVAL); if (crp->crp_callback == NULL || crp->crp_desc == NULL) { error = EINVAL; goto out; } for (crd = crp->crp_desc; crd != NULL; crd = crd->crd_next) { switch (crd->crd_alg) { case CRYPTO_NULL_HMAC: case CRYPTO_MD5_HMAC: case CRYPTO_SHA1_HMAC: case CRYPTO_RIPEMD160_HMAC: case CRYPTO_SHA2_256_HMAC: case CRYPTO_SHA2_384_HMAC: case CRYPTO_SHA2_512_HMAC: if (maccrd != NULL) { error = EINVAL; goto out; } maccrd = crd; break; case CRYPTO_AES_CBC: if (enccrd != NULL) { error = EINVAL; goto out; } enccrd = crd; break; default: return (EINVAL); } } if (enccrd == NULL || (enccrd->crd_len % AES_BLOCK_LEN) != 0) { error = EINVAL; goto out; } rw_rlock(&sc->sc_sessions_lock); TAILQ_FOREACH_REVERSE(ses, &sc->sc_sessions, padlock_sessions_head, ses_next) { if (ses->ses_id == (crp->crp_sid & 0xffffffff)) break; } rw_runlock(&sc->sc_sessions_lock); if (ses == NULL) { error = EINVAL; goto out; } /* Perform data authentication if requested before encryption. */ if (maccrd != NULL && maccrd->crd_next == enccrd) { error = padlock_hash_process(ses, maccrd, crp); if (error != 0) goto out; } error = padlock_cipher_process(ses, enccrd, crp); if (error != 0) goto out; /* Perform data authentication if requested after encryption. */ if (maccrd != NULL && enccrd->crd_next == maccrd) { error = padlock_hash_process(ses, maccrd, crp); if (error != 0) goto out; } out: #if 0 /* * This code is not necessary, because contexts will be freed on next * padlock_setup_mackey() call or at padlock_freesession() call. */ if (ses != NULL && maccrd != NULL && (maccrd->crd_flags & CRD_F_KEY_EXPLICIT) != 0) { padlock_free_ctx(ses->ses_axf, ses->ses_ictx); padlock_free_ctx(ses->ses_axf, ses->ses_octx); } #endif crp->crp_etype = error; crypto_done(crp); return (error); } static device_method_t padlock_methods[] = { DEVMETHOD(device_identify, padlock_identify), DEVMETHOD(device_probe, padlock_probe), DEVMETHOD(device_attach, padlock_attach), DEVMETHOD(device_detach, padlock_detach), DEVMETHOD(cryptodev_newsession, padlock_newsession), DEVMETHOD(cryptodev_freesession,padlock_freesession), DEVMETHOD(cryptodev_process, padlock_process), {0, 0}, }; static driver_t padlock_driver = { "padlock", padlock_methods, sizeof(struct padlock_softc), }; static devclass_t padlock_devclass; /* XXX where to attach */ DRIVER_MODULE(padlock, nexus, padlock_driver, padlock_devclass, 0, 0); MODULE_VERSION(padlock, 1); MODULE_DEPEND(padlock, crypto, 1, 1, 1); Index: head/sys/crypto/via/padlock_cipher.c =================================================================== --- head/sys/crypto/via/padlock_cipher.c (revision 329877) +++ head/sys/crypto/via/padlock_cipher.c (revision 329878) @@ -1,279 +1,275 @@ /*- * Copyright (c) 2005-2006 Pawel Jakub Dawidek * Copyright (c) 2004 Mark R V Murray * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* $OpenBSD: via.c,v 1.3 2004/06/15 23:36:55 deraadt Exp $ */ /*- * Copyright (c) 2003 Jason Wright * Copyright (c) 2003, 2004 Theo de Raadt * All rights reserved. * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above * copyright notice and this permission notice appear in all copies. * * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #define PADLOCK_ROUND_COUNT_AES128 10 #define PADLOCK_ROUND_COUNT_AES192 12 #define PADLOCK_ROUND_COUNT_AES256 14 #define PADLOCK_ALGORITHM_TYPE_AES 0 #define PADLOCK_KEY_GENERATION_HW 0 #define PADLOCK_KEY_GENERATION_SW 1 #define PADLOCK_DIRECTION_ENCRYPT 0 #define PADLOCK_DIRECTION_DECRYPT 1 #define PADLOCK_KEY_SIZE_128 0 #define PADLOCK_KEY_SIZE_192 1 #define PADLOCK_KEY_SIZE_256 2 MALLOC_DECLARE(M_PADLOCK); static __inline void padlock_cbc(void *in, void *out, size_t count, void *key, union padlock_cw *cw, void *iv) { #ifdef __GNUCLIKE_ASM /* The .byte line is really VIA C3 "xcrypt-cbc" instruction */ __asm __volatile( "pushf \n\t" "popf \n\t" "rep \n\t" ".byte 0x0f, 0xa7, 0xd0" : "+a" (iv), "+c" (count), "+D" (out), "+S" (in) : "b" (key), "d" (cw) : "cc", "memory" ); #endif } static void padlock_cipher_key_setup(struct padlock_session *ses, caddr_t key, int klen) { union padlock_cw *cw; int i; cw = &ses->ses_cw; if (cw->cw_key_generation == PADLOCK_KEY_GENERATION_SW) { /* Build expanded keys for both directions */ rijndaelKeySetupEnc(ses->ses_ekey, key, klen); rijndaelKeySetupDec(ses->ses_dkey, key, klen); for (i = 0; i < 4 * (RIJNDAEL_MAXNR + 1); i++) { ses->ses_ekey[i] = ntohl(ses->ses_ekey[i]); ses->ses_dkey[i] = ntohl(ses->ses_dkey[i]); } } else { bcopy(key, ses->ses_ekey, klen); bcopy(key, ses->ses_dkey, klen); } } int padlock_cipher_setup(struct padlock_session *ses, struct cryptoini *encini) { union padlock_cw *cw; if (encini->cri_klen != 128 && encini->cri_klen != 192 && encini->cri_klen != 256) { return (EINVAL); } cw = &ses->ses_cw; bzero(cw, sizeof(*cw)); cw->cw_algorithm_type = PADLOCK_ALGORITHM_TYPE_AES; cw->cw_key_generation = PADLOCK_KEY_GENERATION_SW; cw->cw_intermediate = 0; switch (encini->cri_klen) { case 128: cw->cw_round_count = PADLOCK_ROUND_COUNT_AES128; cw->cw_key_size = PADLOCK_KEY_SIZE_128; #ifdef HW_KEY_GENERATION /* This doesn't buy us much, that's why it is commented out. */ cw->cw_key_generation = PADLOCK_KEY_GENERATION_HW; #endif break; case 192: cw->cw_round_count = PADLOCK_ROUND_COUNT_AES192; cw->cw_key_size = PADLOCK_KEY_SIZE_192; break; case 256: cw->cw_round_count = PADLOCK_ROUND_COUNT_AES256; cw->cw_key_size = PADLOCK_KEY_SIZE_256; break; } if (encini->cri_key != NULL) { padlock_cipher_key_setup(ses, encini->cri_key, encini->cri_klen); } arc4rand(ses->ses_iv, sizeof(ses->ses_iv), 0); return (0); } /* * Function checks if the given buffer is already 16 bytes aligned. * If it is there is no need to allocate new buffer. * If it isn't, new buffer is allocated. */ static u_char * padlock_cipher_alloc(struct cryptodesc *enccrd, struct cryptop *crp, int *allocated) { u_char *addr; if (crp->crp_flags & CRYPTO_F_IMBUF) goto alloc; else { if (crp->crp_flags & CRYPTO_F_IOV) { struct uio *uio; struct iovec *iov; uio = (struct uio *)crp->crp_buf; if (uio->uio_iovcnt != 1) goto alloc; iov = uio->uio_iov; addr = (u_char *)iov->iov_base + enccrd->crd_skip; } else { addr = (u_char *)crp->crp_buf; } if (((uintptr_t)addr & 0xf) != 0) /* 16 bytes aligned? */ goto alloc; *allocated = 0; return (addr); } alloc: *allocated = 1; addr = malloc(enccrd->crd_len + 16, M_PADLOCK, M_NOWAIT); return (addr); } int padlock_cipher_process(struct padlock_session *ses, struct cryptodesc *enccrd, struct cryptop *crp) { union padlock_cw *cw; struct thread *td; u_char *buf, *abuf; uint32_t *key; - int allocated, error; + int allocated; buf = padlock_cipher_alloc(enccrd, crp, &allocated); if (buf == NULL) return (ENOMEM); /* Buffer has to be 16 bytes aligned. */ abuf = PADLOCK_ALIGN(buf); if ((enccrd->crd_flags & CRD_F_KEY_EXPLICIT) != 0) { padlock_cipher_key_setup(ses, enccrd->crd_key, enccrd->crd_klen); } cw = &ses->ses_cw; cw->cw_filler0 = 0; cw->cw_filler1 = 0; cw->cw_filler2 = 0; cw->cw_filler3 = 0; if ((enccrd->crd_flags & CRD_F_ENCRYPT) != 0) { cw->cw_direction = PADLOCK_DIRECTION_ENCRYPT; key = ses->ses_ekey; if ((enccrd->crd_flags & CRD_F_IV_EXPLICIT) != 0) bcopy(enccrd->crd_iv, ses->ses_iv, AES_BLOCK_LEN); if ((enccrd->crd_flags & CRD_F_IV_PRESENT) == 0) { crypto_copyback(crp->crp_flags, crp->crp_buf, enccrd->crd_inject, AES_BLOCK_LEN, ses->ses_iv); } } else { cw->cw_direction = PADLOCK_DIRECTION_DECRYPT; key = ses->ses_dkey; if ((enccrd->crd_flags & CRD_F_IV_EXPLICIT) != 0) bcopy(enccrd->crd_iv, ses->ses_iv, AES_BLOCK_LEN); else { crypto_copydata(crp->crp_flags, crp->crp_buf, enccrd->crd_inject, AES_BLOCK_LEN, ses->ses_iv); } } if (allocated) { crypto_copydata(crp->crp_flags, crp->crp_buf, enccrd->crd_skip, enccrd->crd_len, abuf); } td = curthread; - error = fpu_kern_enter(td, ses->ses_fpu_ctx, FPU_KERN_NORMAL | - FPU_KERN_KTHR); - if (error != 0) - goto out; + fpu_kern_enter(td, ses->ses_fpu_ctx, FPU_KERN_NORMAL | FPU_KERN_KTHR); padlock_cbc(abuf, abuf, enccrd->crd_len / AES_BLOCK_LEN, key, cw, ses->ses_iv); fpu_kern_leave(td, ses->ses_fpu_ctx); if (allocated) { crypto_copyback(crp->crp_flags, crp->crp_buf, enccrd->crd_skip, enccrd->crd_len, abuf); } /* copy out last block for use as next session IV */ if ((enccrd->crd_flags & CRD_F_ENCRYPT) != 0) { crypto_copydata(crp->crp_flags, crp->crp_buf, enccrd->crd_skip + enccrd->crd_len - AES_BLOCK_LEN, AES_BLOCK_LEN, ses->ses_iv); } - out: if (allocated) { bzero(buf, enccrd->crd_len + 16); free(buf, M_PADLOCK); } - return (error); + return (0); } Index: head/sys/crypto/via/padlock_hash.c =================================================================== --- head/sys/crypto/via/padlock_hash.c (revision 329877) +++ head/sys/crypto/via/padlock_hash.c (revision 329878) @@ -1,408 +1,405 @@ /*- * Copyright (c) 2006 Pawel Jakub Dawidek * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #if defined(__amd64__) || defined(__i386__) #include #include #include #include #endif #include #include #include /* for hmac_ipad_buffer and hmac_opad_buffer */ #include #include /* * Implementation notes. * * Some VIA CPUs provides SHA1 and SHA256 acceleration. * We implement all HMAC algorithms provided by crypto(9) framework, but we do * the crypto work in software unless this is HMAC/SHA1 or HMAC/SHA256 and * our CPU can accelerate it. * * Additional CPU instructions, which preform SHA1 and SHA256 are one-shot * functions - we have only one chance to give the data, CPU itself will add * the padding and calculate hash automatically. * This means, it is not possible to implement common init(), update(), final() * methods. * The way I've choosen is to keep adding data to the buffer on update() * (reallocating the buffer if necessary) and call XSHA{1,256} instruction on * final(). */ struct padlock_sha_ctx { uint8_t *psc_buf; int psc_offset; int psc_size; }; CTASSERT(sizeof(struct padlock_sha_ctx) <= sizeof(union authctx)); static void padlock_sha_init(struct padlock_sha_ctx *ctx); static int padlock_sha_update(struct padlock_sha_ctx *ctx, const uint8_t *buf, uint16_t bufsize); static void padlock_sha1_final(uint8_t *hash, struct padlock_sha_ctx *ctx); static void padlock_sha256_final(uint8_t *hash, struct padlock_sha_ctx *ctx); static struct auth_hash padlock_hmac_sha1 = { .type = CRYPTO_SHA1_HMAC, .name = "HMAC-SHA1", .keysize = SHA1_HMAC_BLOCK_LEN, .hashsize = SHA1_HASH_LEN, .ctxsize = sizeof(struct padlock_sha_ctx), .blocksize = SHA1_HMAC_BLOCK_LEN, .Init = (void (*)(void *))padlock_sha_init, .Update = (int (*)(void *, const uint8_t *, uint16_t))padlock_sha_update, .Final = (void (*)(uint8_t *, void *))padlock_sha1_final, }; static struct auth_hash padlock_hmac_sha256 = { .type = CRYPTO_SHA2_256_HMAC, .name = "HMAC-SHA2-256", .keysize = SHA2_256_HMAC_BLOCK_LEN, .hashsize = SHA2_256_HASH_LEN, .ctxsize = sizeof(struct padlock_sha_ctx), .blocksize = SHA2_256_HMAC_BLOCK_LEN, .Init = (void (*)(void *))padlock_sha_init, .Update = (int (*)(void *, const uint8_t *, uint16_t))padlock_sha_update, .Final = (void (*)(uint8_t *, void *))padlock_sha256_final, }; MALLOC_DECLARE(M_PADLOCK); static __inline void padlock_output_block(uint32_t *src, uint32_t *dst, size_t count) { while (count-- > 0) *dst++ = bswap32(*src++); } static void padlock_do_sha1(const u_char *in, u_char *out, int count) { u_char buf[128+16]; /* PadLock needs at least 128 bytes buffer. */ u_char *result = PADLOCK_ALIGN(buf); ((uint32_t *)result)[0] = 0x67452301; ((uint32_t *)result)[1] = 0xEFCDAB89; ((uint32_t *)result)[2] = 0x98BADCFE; ((uint32_t *)result)[3] = 0x10325476; ((uint32_t *)result)[4] = 0xC3D2E1F0; #ifdef __GNUCLIKE_ASM __asm __volatile( ".byte 0xf3, 0x0f, 0xa6, 0xc8" /* rep xsha1 */ : "+S"(in), "+D"(result) : "c"(count), "a"(0) ); #endif padlock_output_block((uint32_t *)result, (uint32_t *)out, SHA1_HASH_LEN / sizeof(uint32_t)); } static void padlock_do_sha256(const char *in, char *out, int count) { char buf[128+16]; /* PadLock needs at least 128 bytes buffer. */ char *result = PADLOCK_ALIGN(buf); ((uint32_t *)result)[0] = 0x6A09E667; ((uint32_t *)result)[1] = 0xBB67AE85; ((uint32_t *)result)[2] = 0x3C6EF372; ((uint32_t *)result)[3] = 0xA54FF53A; ((uint32_t *)result)[4] = 0x510E527F; ((uint32_t *)result)[5] = 0x9B05688C; ((uint32_t *)result)[6] = 0x1F83D9AB; ((uint32_t *)result)[7] = 0x5BE0CD19; #ifdef __GNUCLIKE_ASM __asm __volatile( ".byte 0xf3, 0x0f, 0xa6, 0xd0" /* rep xsha256 */ : "+S"(in), "+D"(result) : "c"(count), "a"(0) ); #endif padlock_output_block((uint32_t *)result, (uint32_t *)out, SHA2_256_HASH_LEN / sizeof(uint32_t)); } static void padlock_sha_init(struct padlock_sha_ctx *ctx) { ctx->psc_buf = NULL; ctx->psc_offset = 0; ctx->psc_size = 0; } static int padlock_sha_update(struct padlock_sha_ctx *ctx, const uint8_t *buf, uint16_t bufsize) { if (ctx->psc_size - ctx->psc_offset < bufsize) { ctx->psc_size = MAX(ctx->psc_size * 2, ctx->psc_size + bufsize); ctx->psc_buf = realloc(ctx->psc_buf, ctx->psc_size, M_PADLOCK, M_NOWAIT); if(ctx->psc_buf == NULL) return (ENOMEM); } bcopy(buf, ctx->psc_buf + ctx->psc_offset, bufsize); ctx->psc_offset += bufsize; return (0); } static void padlock_sha_free(struct padlock_sha_ctx *ctx) { if (ctx->psc_buf != NULL) { //bzero(ctx->psc_buf, ctx->psc_size); free(ctx->psc_buf, M_PADLOCK); ctx->psc_buf = NULL; ctx->psc_offset = 0; ctx->psc_size = 0; } } static void padlock_sha1_final(uint8_t *hash, struct padlock_sha_ctx *ctx) { padlock_do_sha1(ctx->psc_buf, hash, ctx->psc_offset); padlock_sha_free(ctx); } static void padlock_sha256_final(uint8_t *hash, struct padlock_sha_ctx *ctx) { padlock_do_sha256(ctx->psc_buf, hash, ctx->psc_offset); padlock_sha_free(ctx); } static void padlock_copy_ctx(struct auth_hash *axf, void *sctx, void *dctx) { if ((via_feature_xcrypt & VIA_HAS_SHA) != 0 && (axf->type == CRYPTO_SHA1_HMAC || axf->type == CRYPTO_SHA2_256_HMAC)) { struct padlock_sha_ctx *spctx = sctx, *dpctx = dctx; dpctx->psc_offset = spctx->psc_offset; dpctx->psc_size = spctx->psc_size; dpctx->psc_buf = malloc(dpctx->psc_size, M_PADLOCK, M_WAITOK); bcopy(spctx->psc_buf, dpctx->psc_buf, dpctx->psc_size); } else { bcopy(sctx, dctx, axf->ctxsize); } } static void padlock_free_ctx(struct auth_hash *axf, void *ctx) { if ((via_feature_xcrypt & VIA_HAS_SHA) != 0 && (axf->type == CRYPTO_SHA1_HMAC || axf->type == CRYPTO_SHA2_256_HMAC)) { padlock_sha_free(ctx); } } static void padlock_hash_key_setup(struct padlock_session *ses, caddr_t key, int klen) { struct auth_hash *axf; int i; klen /= 8; axf = ses->ses_axf; /* * Try to free contexts before using them, because * padlock_hash_key_setup() can be called twice - once from * padlock_newsession() and again from padlock_process(). */ padlock_free_ctx(axf, ses->ses_ictx); padlock_free_ctx(axf, ses->ses_octx); for (i = 0; i < klen; i++) key[i] ^= HMAC_IPAD_VAL; axf->Init(ses->ses_ictx); axf->Update(ses->ses_ictx, key, klen); axf->Update(ses->ses_ictx, hmac_ipad_buffer, axf->blocksize - klen); for (i = 0; i < klen; i++) key[i] ^= (HMAC_IPAD_VAL ^ HMAC_OPAD_VAL); axf->Init(ses->ses_octx); axf->Update(ses->ses_octx, key, klen); axf->Update(ses->ses_octx, hmac_opad_buffer, axf->blocksize - klen); for (i = 0; i < klen; i++) key[i] ^= HMAC_OPAD_VAL; } /* * Compute keyed-hash authenticator. */ static int padlock_authcompute(struct padlock_session *ses, struct cryptodesc *crd, caddr_t buf, int flags) { u_char hash[HASH_MAX_LEN]; struct auth_hash *axf; union authctx ctx; int error; axf = ses->ses_axf; padlock_copy_ctx(axf, ses->ses_ictx, &ctx); error = crypto_apply(flags, buf, crd->crd_skip, crd->crd_len, (int (*)(void *, void *, unsigned int))axf->Update, (caddr_t)&ctx); if (error != 0) { padlock_free_ctx(axf, &ctx); return (error); } axf->Final(hash, &ctx); padlock_copy_ctx(axf, ses->ses_octx, &ctx); axf->Update(&ctx, hash, axf->hashsize); axf->Final(hash, &ctx); /* Inject the authentication data */ crypto_copyback(flags, buf, crd->crd_inject, ses->ses_mlen == 0 ? axf->hashsize : ses->ses_mlen, hash); return (0); } int padlock_hash_setup(struct padlock_session *ses, struct cryptoini *macini) { ses->ses_mlen = macini->cri_mlen; /* Find software structure which describes HMAC algorithm. */ switch (macini->cri_alg) { case CRYPTO_NULL_HMAC: ses->ses_axf = &auth_hash_null; break; case CRYPTO_MD5_HMAC: ses->ses_axf = &auth_hash_hmac_md5; break; case CRYPTO_SHA1_HMAC: if ((via_feature_xcrypt & VIA_HAS_SHA) != 0) ses->ses_axf = &padlock_hmac_sha1; else ses->ses_axf = &auth_hash_hmac_sha1; break; case CRYPTO_RIPEMD160_HMAC: ses->ses_axf = &auth_hash_hmac_ripemd_160; break; case CRYPTO_SHA2_256_HMAC: if ((via_feature_xcrypt & VIA_HAS_SHA) != 0) ses->ses_axf = &padlock_hmac_sha256; else ses->ses_axf = &auth_hash_hmac_sha2_256; break; case CRYPTO_SHA2_384_HMAC: ses->ses_axf = &auth_hash_hmac_sha2_384; break; case CRYPTO_SHA2_512_HMAC: ses->ses_axf = &auth_hash_hmac_sha2_512; break; } /* Allocate memory for HMAC inner and outer contexts. */ ses->ses_ictx = malloc(ses->ses_axf->ctxsize, M_PADLOCK, M_ZERO | M_NOWAIT); ses->ses_octx = malloc(ses->ses_axf->ctxsize, M_PADLOCK, M_ZERO | M_NOWAIT); if (ses->ses_ictx == NULL || ses->ses_octx == NULL) return (ENOMEM); /* Setup key if given. */ if (macini->cri_key != NULL) { padlock_hash_key_setup(ses, macini->cri_key, macini->cri_klen); } return (0); } int padlock_hash_process(struct padlock_session *ses, struct cryptodesc *maccrd, struct cryptop *crp) { struct thread *td; int error; td = curthread; - error = fpu_kern_enter(td, ses->ses_fpu_ctx, FPU_KERN_NORMAL | - FPU_KERN_KTHR); - if (error != 0) - return (error); + fpu_kern_enter(td, ses->ses_fpu_ctx, FPU_KERN_NORMAL | FPU_KERN_KTHR); if ((maccrd->crd_flags & CRD_F_KEY_EXPLICIT) != 0) padlock_hash_key_setup(ses, maccrd->crd_key, maccrd->crd_klen); error = padlock_authcompute(ses, maccrd, crp->crp_buf, crp->crp_flags); fpu_kern_leave(td, ses->ses_fpu_ctx); return (error); } void padlock_hash_free(struct padlock_session *ses) { if (ses->ses_ictx != NULL) { padlock_free_ctx(ses->ses_axf, ses->ses_ictx); bzero(ses->ses_ictx, ses->ses_axf->ctxsize); free(ses->ses_ictx, M_PADLOCK); ses->ses_ictx = NULL; } if (ses->ses_octx != NULL) { padlock_free_ctx(ses->ses_axf, ses->ses_octx); bzero(ses->ses_octx, ses->ses_axf->ctxsize); free(ses->ses_octx, M_PADLOCK); ses->ses_octx = NULL; } } Index: head/sys/dev/efidev/efirt.c =================================================================== --- head/sys/dev/efidev/efirt.c (revision 329877) +++ head/sys/dev/efidev/efirt.c (revision 329878) @@ -1,393 +1,387 @@ /*- * Copyright (c) 2004 Marcel Moolenaar * Copyright (c) 2001 Doug Rabson * Copyright (c) 2016 The FreeBSD Foundation * All rights reserved. * * Portions of this software were developed by Konstantin Belousov * under sponsorship from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static struct efi_systbl *efi_systbl; static struct efi_cfgtbl *efi_cfgtbl; static struct efi_rt *efi_runtime; static int efi_status2err[25] = { 0, /* EFI_SUCCESS */ ENOEXEC, /* EFI_LOAD_ERROR */ EINVAL, /* EFI_INVALID_PARAMETER */ ENOSYS, /* EFI_UNSUPPORTED */ EMSGSIZE, /* EFI_BAD_BUFFER_SIZE */ EOVERFLOW, /* EFI_BUFFER_TOO_SMALL */ EBUSY, /* EFI_NOT_READY */ EIO, /* EFI_DEVICE_ERROR */ EROFS, /* EFI_WRITE_PROTECTED */ EAGAIN, /* EFI_OUT_OF_RESOURCES */ EIO, /* EFI_VOLUME_CORRUPTED */ ENOSPC, /* EFI_VOLUME_FULL */ ENXIO, /* EFI_NO_MEDIA */ ESTALE, /* EFI_MEDIA_CHANGED */ ENOENT, /* EFI_NOT_FOUND */ EACCES, /* EFI_ACCESS_DENIED */ ETIMEDOUT, /* EFI_NO_RESPONSE */ EADDRNOTAVAIL, /* EFI_NO_MAPPING */ ETIMEDOUT, /* EFI_TIMEOUT */ EDOOFUS, /* EFI_NOT_STARTED */ EALREADY, /* EFI_ALREADY_STARTED */ ECANCELED, /* EFI_ABORTED */ EPROTO, /* EFI_ICMP_ERROR */ EPROTO, /* EFI_TFTP_ERROR */ EPROTO /* EFI_PROTOCOL_ERROR */ }; static int efi_status_to_errno(efi_status status) { u_long code; code = status & 0x3ffffffffffffffful; return (code < nitems(efi_status2err) ? efi_status2err[code] : EDOOFUS); } static struct mtx efi_lock; static int efi_init(void) { struct efi_map_header *efihdr; struct efi_md *map; caddr_t kmdp; size_t efisz; mtx_init(&efi_lock, "efi", NULL, MTX_DEF); if (efi_systbl_phys == 0) { if (bootverbose) printf("EFI systbl not available\n"); return (0); } if (!PMAP_HAS_DMAP) { if (bootverbose) printf("EFI systbl requires direct map\n"); return (0); } efi_systbl = (struct efi_systbl *)PHYS_TO_DMAP(efi_systbl_phys); if (efi_systbl->st_hdr.th_sig != EFI_SYSTBL_SIG) { efi_systbl = NULL; if (bootverbose) printf("EFI systbl signature invalid\n"); return (0); } efi_cfgtbl = (efi_systbl->st_cfgtbl == 0) ? NULL : (struct efi_cfgtbl *)efi_systbl->st_cfgtbl; if (efi_cfgtbl == NULL) { if (bootverbose) printf("EFI config table is not present\n"); } kmdp = preload_search_by_type("elf kernel"); if (kmdp == NULL) kmdp = preload_search_by_type("elf64 kernel"); efihdr = (struct efi_map_header *)preload_search_info(kmdp, MODINFO_METADATA | MODINFOMD_EFI_MAP); if (efihdr == NULL) { if (bootverbose) printf("EFI map is not present\n"); return (0); } efisz = (sizeof(struct efi_map_header) + 0xf) & ~0xf; map = (struct efi_md *)((uint8_t *)efihdr + efisz); if (efihdr->descriptor_size == 0) return (ENOMEM); if (!efi_create_1t1_map(map, efihdr->memory_size / efihdr->descriptor_size, efihdr->descriptor_size)) { if (bootverbose) printf("EFI cannot create runtime map\n"); return (ENOMEM); } efi_runtime = (efi_systbl->st_rt == 0) ? NULL : (struct efi_rt *)efi_systbl->st_rt; if (efi_runtime == NULL) { if (bootverbose) printf("EFI runtime services table is not present\n"); efi_destroy_1t1_map(); return (ENXIO); } return (0); } static void efi_uninit(void) { efi_destroy_1t1_map(); efi_systbl = NULL; efi_cfgtbl = NULL; efi_runtime = NULL; mtx_destroy(&efi_lock); } int efi_rt_ok(void) { if (efi_runtime == NULL) return (ENXIO); return (0); } static int efi_enter(void) { struct thread *td; pmap_t curpmap; - int error; if (efi_runtime == NULL) return (ENXIO); td = curthread; curpmap = &td->td_proc->p_vmspace->vm_pmap; PMAP_LOCK(curpmap); mtx_lock(&efi_lock); - error = fpu_kern_enter(td, NULL, FPU_KERN_NOCTX); - if (error != 0) { - PMAP_UNLOCK(curpmap); - return (error); - } - + fpu_kern_enter(td, NULL, FPU_KERN_NOCTX); return (efi_arch_enter()); } static void efi_leave(void) { struct thread *td; pmap_t curpmap; efi_arch_leave(); curpmap = &curproc->p_vmspace->vm_pmap; td = curthread; fpu_kern_leave(td, NULL); mtx_unlock(&efi_lock); PMAP_UNLOCK(curpmap); } int efi_get_table(struct uuid *uuid, void **ptr) { struct efi_cfgtbl *ct; u_long count; if (efi_cfgtbl == NULL || efi_systbl == NULL) return (ENXIO); count = efi_systbl->st_entries; ct = efi_cfgtbl; while (count--) { if (!bcmp(&ct->ct_uuid, uuid, sizeof(*uuid))) { *ptr = (void *)PHYS_TO_DMAP(ct->ct_data); return (0); } ct++; } return (ENOENT); } static int efi_get_time_locked(struct efi_tm *tm) { efi_status status; int error; EFI_TIME_OWNED() error = efi_enter(); if (error != 0) return (error); status = efi_runtime->rt_gettime(tm, NULL); efi_leave(); error = efi_status_to_errno(status); return (error); } int efi_get_time(struct efi_tm *tm) { int error; if (efi_runtime == NULL) return (ENXIO); EFI_TIME_LOCK() error = efi_get_time_locked(tm); EFI_TIME_UNLOCK() return (error); } int efi_reset_system(void) { int error; error = efi_enter(); if (error != 0) return (error); efi_runtime->rt_reset(EFI_RESET_WARM, 0, 0, NULL); efi_leave(); return (EIO); } static int efi_set_time_locked(struct efi_tm *tm) { efi_status status; int error; EFI_TIME_OWNED(); error = efi_enter(); if (error != 0) return (error); status = efi_runtime->rt_settime(tm); efi_leave(); error = efi_status_to_errno(status); return (error); } int efi_set_time(struct efi_tm *tm) { int error; if (efi_runtime == NULL) return (ENXIO); EFI_TIME_LOCK() error = efi_set_time_locked(tm); EFI_TIME_UNLOCK() return (error); } int efi_var_get(efi_char *name, struct uuid *vendor, uint32_t *attrib, size_t *datasize, void *data) { efi_status status; int error; error = efi_enter(); if (error != 0) return (error); status = efi_runtime->rt_getvar(name, vendor, attrib, datasize, data); efi_leave(); error = efi_status_to_errno(status); return (error); } int efi_var_nextname(size_t *namesize, efi_char *name, struct uuid *vendor) { efi_status status; int error; error = efi_enter(); if (error != 0) return (error); status = efi_runtime->rt_scanvar(namesize, name, vendor); efi_leave(); error = efi_status_to_errno(status); return (error); } int efi_var_set(efi_char *name, struct uuid *vendor, uint32_t attrib, size_t datasize, void *data) { efi_status status; int error; error = efi_enter(); if (error != 0) return (error); status = efi_runtime->rt_setvar(name, vendor, attrib, datasize, data); efi_leave(); error = efi_status_to_errno(status); return (error); } static int efirt_modevents(module_t m, int event, void *arg __unused) { switch (event) { case MOD_LOAD: return (efi_init()); case MOD_UNLOAD: efi_uninit(); return (0); case MOD_SHUTDOWN: return (0); default: return (EOPNOTSUPP); } } static moduledata_t efirt_moddata = { .name = "efirt", .evhand = efirt_modevents, .priv = NULL, }; DECLARE_MODULE(efirt, efirt_moddata, SI_SUB_VM_CONF, SI_ORDER_ANY); MODULE_VERSION(efirt, 1); Index: head/sys/dev/random/nehemiah.c =================================================================== --- head/sys/dev/random/nehemiah.c (revision 329877) +++ head/sys/dev/random/nehemiah.c (revision 329878) @@ -1,154 +1,151 @@ /*- * Copyright (c) 2013-2015 Mark R V Murray * Copyright (c) 2013 David E. O'Brien * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer * in this position and unchanged. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include static void random_nehemiah_init(void); static void random_nehemiah_deinit(void); static u_int random_nehemiah_read(void *, u_int); static struct random_source random_nehemiah = { .rs_ident = "VIA Nehemiah Padlock RNG", .rs_source = RANDOM_PURE_NEHEMIAH, .rs_read = random_nehemiah_read }; static struct fpu_kern_ctx *fpu_ctx_save; /* This H/W source never stores more than 8 bytes in one go */ /* ARGSUSED */ static __inline size_t VIA_RNG_store(void *buf) { uint32_t retval = 0; uint32_t rate = 0; #ifdef __GNUCLIKE_ASM __asm __volatile( "movl $0,%%edx\n\t" "xstore" : "=a" (retval), "+d" (rate), "+D" (buf) : : "memory" ); #endif if (rate == 0) return (retval&0x1f); return (0); } static void random_nehemiah_init(void) { fpu_ctx_save = fpu_kern_alloc_ctx(FPU_KERN_NORMAL); } static void random_nehemiah_deinit(void) { fpu_kern_free_ctx(fpu_ctx_save); } /* It is specifically allowed that buf is a multiple of sizeof(long) */ static u_int random_nehemiah_read(void *buf, u_int c) { uint8_t *b; size_t count, ret; uint64_t tmp; - if ((fpu_kern_enter(curthread, fpu_ctx_save, FPU_KERN_NORMAL) == 0)) { - b = buf; - for (count = c; count > 0; count -= ret) { - ret = MIN(VIA_RNG_store(&tmp), count); - memcpy(b, &tmp, ret); - b += ret; - } - fpu_kern_leave(curthread, fpu_ctx_save); + fpu_kern_enter(curthread, fpu_ctx_save, FPU_KERN_NORMAL); + b = buf; + for (count = c; count > 0; count -= ret) { + ret = MIN(VIA_RNG_store(&tmp), count); + memcpy(b, &tmp, ret); + b += ret; } - else - c = 0; + fpu_kern_leave(curthread, fpu_ctx_save); return (c); } static int nehemiah_modevent(module_t mod, int type, void *unused) { int error = 0; switch (type) { case MOD_LOAD: if (via_feature_rng & VIA_HAS_RNG) { random_source_register(&random_nehemiah); printf("random: fast provider: \"%s\"\n", random_nehemiah.rs_ident); random_nehemiah_init(); } break; case MOD_UNLOAD: if (via_feature_rng & VIA_HAS_RNG) { random_nehemiah_deinit(); random_source_deregister(&random_nehemiah); } break; case MOD_SHUTDOWN: break; default: error = EOPNOTSUPP; break; } return (error); } DEV_MODULE(nehemiah, nehemiah_modevent, NULL); MODULE_VERSION(nehemiah, 1); MODULE_DEPEND(nehemiah, random_device, 1, 1, 1); Index: head/sys/i386/i386/npx.c =================================================================== --- head/sys/i386/i386/npx.c (revision 329877) +++ head/sys/i386/i386/npx.c (revision 329878) @@ -1,1431 +1,1431 @@ /*- * Copyright (c) 1990 William Jolitz. * Copyright (c) 1991 The Regents of the University of California. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: @(#)npx.c 7.2 (Berkeley) 5/12/91 */ #include __FBSDID("$FreeBSD$"); #include "opt_cpu.h" #include "opt_isa.h" #include "opt_npx.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef NPX_DEBUG #include #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef DEV_ISA #include #endif /* * 387 and 287 Numeric Coprocessor Extension (NPX) Driver. */ #if defined(__GNUCLIKE_ASM) && !defined(lint) #define fldcw(cw) __asm __volatile("fldcw %0" : : "m" (cw)) #define fnclex() __asm __volatile("fnclex") #define fninit() __asm __volatile("fninit") #define fnsave(addr) __asm __volatile("fnsave %0" : "=m" (*(addr))) #define fnstcw(addr) __asm __volatile("fnstcw %0" : "=m" (*(addr))) #define fnstsw(addr) __asm __volatile("fnstsw %0" : "=am" (*(addr))) #define fp_divide_by_0() __asm __volatile( \ "fldz; fld1; fdiv %st,%st(1); fnop") #define frstor(addr) __asm __volatile("frstor %0" : : "m" (*(addr))) #define fxrstor(addr) __asm __volatile("fxrstor %0" : : "m" (*(addr))) #define fxsave(addr) __asm __volatile("fxsave %0" : "=m" (*(addr))) #define ldmxcsr(csr) __asm __volatile("ldmxcsr %0" : : "m" (csr)) #define stmxcsr(addr) __asm __volatile("stmxcsr %0" : : "m" (*(addr))) static __inline void xrstor(char *addr, uint64_t mask) { uint32_t low, hi; low = mask; hi = mask >> 32; __asm __volatile("xrstor %0" : : "m" (*addr), "a" (low), "d" (hi)); } static __inline void xsave(char *addr, uint64_t mask) { uint32_t low, hi; low = mask; hi = mask >> 32; __asm __volatile("xsave %0" : "=m" (*addr) : "a" (low), "d" (hi) : "memory"); } static __inline void xsaveopt(char *addr, uint64_t mask) { uint32_t low, hi; low = mask; hi = mask >> 32; __asm __volatile("xsaveopt %0" : "=m" (*addr) : "a" (low), "d" (hi) : "memory"); } #else /* !(__GNUCLIKE_ASM && !lint) */ void fldcw(u_short cw); void fnclex(void); void fninit(void); void fnsave(caddr_t addr); void fnstcw(caddr_t addr); void fnstsw(caddr_t addr); void fp_divide_by_0(void); void frstor(caddr_t addr); void fxsave(caddr_t addr); void fxrstor(caddr_t addr); void ldmxcsr(u_int csr); void stmxcsr(u_int *csr); void xrstor(char *addr, uint64_t mask); void xsave(char *addr, uint64_t mask); void xsaveopt(char *addr, uint64_t mask); #endif /* __GNUCLIKE_ASM && !lint */ #define start_emulating() load_cr0(rcr0() | CR0_TS) #define stop_emulating() clts() #define GET_FPU_CW(thread) \ (cpu_fxsr ? \ (thread)->td_pcb->pcb_save->sv_xmm.sv_env.en_cw : \ (thread)->td_pcb->pcb_save->sv_87.sv_env.en_cw) #define GET_FPU_SW(thread) \ (cpu_fxsr ? \ (thread)->td_pcb->pcb_save->sv_xmm.sv_env.en_sw : \ (thread)->td_pcb->pcb_save->sv_87.sv_env.en_sw) #define SET_FPU_CW(savefpu, value) do { \ if (cpu_fxsr) \ (savefpu)->sv_xmm.sv_env.en_cw = (value); \ else \ (savefpu)->sv_87.sv_env.en_cw = (value); \ } while (0) CTASSERT(sizeof(union savefpu) == 512); CTASSERT(sizeof(struct xstate_hdr) == 64); CTASSERT(sizeof(struct savefpu_ymm) == 832); /* * This requirement is to make it easier for asm code to calculate * offset of the fpu save area from the pcb address. FPU save area * must be 64-byte aligned. */ CTASSERT(sizeof(struct pcb) % XSAVE_AREA_ALIGN == 0); /* * Ensure the copy of XCR0 saved in a core is contained in the padding * area. */ CTASSERT(X86_XSTATE_XCR0_OFFSET >= offsetof(struct savexmm, sv_pad) && X86_XSTATE_XCR0_OFFSET + sizeof(uint64_t) <= sizeof(struct savexmm)); static void fpu_clean_state(void); static void fpusave(union savefpu *); static void fpurstor(union savefpu *); int hw_float; SYSCTL_INT(_hw, HW_FLOATINGPT, floatingpoint, CTLFLAG_RD, &hw_float, 0, "Floating point instructions executed in hardware"); int use_xsave; uint64_t xsave_mask; static uma_zone_t fpu_save_area_zone; static union savefpu *npx_initialstate; struct xsave_area_elm_descr { u_int offset; u_int size; } *xsave_area_desc; static int use_xsaveopt; static volatile u_int npx_traps_while_probing; alias_for_inthand_t probetrap; __asm(" \n\ .text \n\ .p2align 2,0x90 \n\ .type " __XSTRING(CNAME(probetrap)) ",@function \n\ " __XSTRING(CNAME(probetrap)) ": \n\ ss \n\ incl " __XSTRING(CNAME(npx_traps_while_probing)) " \n\ fnclex \n\ iret \n\ "); /* * Determine if an FPU is present and how to use it. */ static int npx_probe(void) { struct gate_descriptor save_idt_npxtrap; u_short control, status; /* * Modern CPUs all have an FPU that uses the INT16 interface * and provide a simple way to verify that, so handle the * common case right away. */ if (cpu_feature & CPUID_FPU) { hw_float = 1; return (1); } save_idt_npxtrap = idt[IDT_MF]; setidt(IDT_MF, probetrap, SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); /* * Don't trap while we're probing. */ stop_emulating(); /* * Finish resetting the coprocessor, if any. If there is an error * pending, then we may get a bogus IRQ13, but npx_intr() will handle * it OK. Bogus halts have never been observed, but we enabled * IRQ13 and cleared the BUSY# latch early to handle them anyway. */ fninit(); /* * Don't use fwait here because it might hang. * Don't use fnop here because it usually hangs if there is no FPU. */ DELAY(1000); /* wait for any IRQ13 */ #ifdef DIAGNOSTIC if (npx_traps_while_probing != 0) printf("fninit caused %u bogus npx trap(s)\n", npx_traps_while_probing); #endif /* * Check for a status of mostly zero. */ status = 0x5a5a; fnstsw(&status); if ((status & 0xb8ff) == 0) { /* * Good, now check for a proper control word. */ control = 0x5a5a; fnstcw(&control); if ((control & 0x1f3f) == 0x033f) { /* * We have an npx, now divide by 0 to see if exception * 16 works. */ control &= ~(1 << 2); /* enable divide by 0 trap */ fldcw(control); npx_traps_while_probing = 0; fp_divide_by_0(); if (npx_traps_while_probing != 0) { /* * Good, exception 16 works. */ hw_float = 1; goto cleanup; } printf( "FPU does not use exception 16 for error reporting\n"); goto cleanup; } } /* * Probe failed. Floating point simply won't work. * Notify user and disable FPU/MMX/SSE instruction execution. */ printf("WARNING: no FPU!\n"); __asm __volatile("smsw %%ax; orb %0,%%al; lmsw %%ax" : : "n" (CR0_EM | CR0_MP) : "ax"); cleanup: idt[IDT_MF] = save_idt_npxtrap; return (hw_float); } /* * Enable XSAVE if supported and allowed by user. * Calculate the xsave_mask. */ static void npxinit_bsp1(void) { u_int cp[4]; uint64_t xsave_mask_user; if (cpu_fxsr && (cpu_feature2 & CPUID2_XSAVE) != 0) { use_xsave = 1; TUNABLE_INT_FETCH("hw.use_xsave", &use_xsave); } if (!use_xsave) return; cpuid_count(0xd, 0x0, cp); xsave_mask = XFEATURE_ENABLED_X87 | XFEATURE_ENABLED_SSE; if ((cp[0] & xsave_mask) != xsave_mask) panic("CPU0 does not support X87 or SSE: %x", cp[0]); xsave_mask = ((uint64_t)cp[3] << 32) | cp[0]; xsave_mask_user = xsave_mask; TUNABLE_QUAD_FETCH("hw.xsave_mask", &xsave_mask_user); xsave_mask_user |= XFEATURE_ENABLED_X87 | XFEATURE_ENABLED_SSE; xsave_mask &= xsave_mask_user; if ((xsave_mask & XFEATURE_AVX512) != XFEATURE_AVX512) xsave_mask &= ~XFEATURE_AVX512; if ((xsave_mask & XFEATURE_MPX) != XFEATURE_MPX) xsave_mask &= ~XFEATURE_MPX; cpuid_count(0xd, 0x1, cp); if ((cp[0] & CPUID_EXTSTATE_XSAVEOPT) != 0) use_xsaveopt = 1; } /* * Calculate the fpu save area size. */ static void npxinit_bsp2(void) { u_int cp[4]; if (use_xsave) { cpuid_count(0xd, 0x0, cp); cpu_max_ext_state_size = cp[1]; /* * Reload the cpu_feature2, since we enabled OSXSAVE. */ do_cpuid(1, cp); cpu_feature2 = cp[2]; } else cpu_max_ext_state_size = sizeof(union savefpu); } /* * Initialize floating point unit. */ void npxinit(bool bsp) { static union savefpu dummy; register_t saveintr; u_int mxcsr; u_short control; if (bsp) { if (!npx_probe()) return; npxinit_bsp1(); } if (use_xsave) { load_cr4(rcr4() | CR4_XSAVE); load_xcr(XCR0, xsave_mask); } /* * XCR0 shall be set up before CPU can report the save area size. */ if (bsp) npxinit_bsp2(); /* * fninit has the same h/w bugs as fnsave. Use the detoxified * fnsave to throw away any junk in the fpu. fpusave() initializes * the fpu. * * It is too early for critical_enter() to work on AP. */ saveintr = intr_disable(); stop_emulating(); if (cpu_fxsr) fninit(); else fnsave(&dummy); control = __INITIAL_NPXCW__; fldcw(control); if (cpu_fxsr) { mxcsr = __INITIAL_MXCSR__; ldmxcsr(mxcsr); } start_emulating(); intr_restore(saveintr); } /* * On the boot CPU we generate a clean state that is used to * initialize the floating point unit when it is first used by a * process. */ static void npxinitstate(void *arg __unused) { register_t saveintr; int cp[4], i, max_ext_n; if (!hw_float) return; npx_initialstate = malloc(cpu_max_ext_state_size, M_DEVBUF, M_WAITOK | M_ZERO); saveintr = intr_disable(); stop_emulating(); fpusave(npx_initialstate); if (cpu_fxsr) { if (npx_initialstate->sv_xmm.sv_env.en_mxcsr_mask) cpu_mxcsr_mask = npx_initialstate->sv_xmm.sv_env.en_mxcsr_mask; else cpu_mxcsr_mask = 0xFFBF; /* * The fninit instruction does not modify XMM * registers or x87 registers (MM/ST). The fpusave * call dumped the garbage contained in the registers * after reset to the initial state saved. Clear XMM * and x87 registers file image to make the startup * program state and signal handler XMM/x87 register * content predictable. */ bzero(npx_initialstate->sv_xmm.sv_fp, sizeof(npx_initialstate->sv_xmm.sv_fp)); bzero(npx_initialstate->sv_xmm.sv_xmm, sizeof(npx_initialstate->sv_xmm.sv_xmm)); } else bzero(npx_initialstate->sv_87.sv_ac, sizeof(npx_initialstate->sv_87.sv_ac)); /* * Create a table describing the layout of the CPU Extended * Save Area. */ if (use_xsave) { if (xsave_mask >> 32 != 0) max_ext_n = fls(xsave_mask >> 32) + 32; else max_ext_n = fls(xsave_mask); xsave_area_desc = malloc(max_ext_n * sizeof(struct xsave_area_elm_descr), M_DEVBUF, M_WAITOK | M_ZERO); /* x87 state */ xsave_area_desc[0].offset = 0; xsave_area_desc[0].size = 160; /* XMM */ xsave_area_desc[1].offset = 160; xsave_area_desc[1].size = 288 - 160; for (i = 2; i < max_ext_n; i++) { cpuid_count(0xd, i, cp); xsave_area_desc[i].offset = cp[1]; xsave_area_desc[i].size = cp[0]; } } fpu_save_area_zone = uma_zcreate("FPU_save_area", cpu_max_ext_state_size, NULL, NULL, NULL, NULL, XSAVE_AREA_ALIGN - 1, 0); start_emulating(); intr_restore(saveintr); } SYSINIT(npxinitstate, SI_SUB_DRIVERS, SI_ORDER_ANY, npxinitstate, NULL); /* * Free coprocessor (if we have it). */ void npxexit(struct thread *td) { critical_enter(); if (curthread == PCPU_GET(fpcurthread)) { stop_emulating(); fpusave(curpcb->pcb_save); start_emulating(); PCPU_SET(fpcurthread, NULL); } critical_exit(); #ifdef NPX_DEBUG if (hw_float) { u_int masked_exceptions; masked_exceptions = GET_FPU_CW(td) & GET_FPU_SW(td) & 0x7f; /* * Log exceptions that would have trapped with the old * control word (overflow, divide by 0, and invalid operand). */ if (masked_exceptions & 0x0d) log(LOG_ERR, "pid %d (%s) exited with masked floating point exceptions 0x%02x\n", td->td_proc->p_pid, td->td_proc->p_comm, masked_exceptions); } #endif } int npxformat(void) { if (!hw_float) return (_MC_FPFMT_NODEV); if (cpu_fxsr) return (_MC_FPFMT_XMM); return (_MC_FPFMT_387); } /* * The following mechanism is used to ensure that the FPE_... value * that is passed as a trapcode to the signal handler of the user * process does not have more than one bit set. * * Multiple bits may be set if the user process modifies the control * word while a status word bit is already set. While this is a sign * of bad coding, we have no choise than to narrow them down to one * bit, since we must not send a trapcode that is not exactly one of * the FPE_ macros. * * The mechanism has a static table with 127 entries. Each combination * of the 7 FPU status word exception bits directly translates to a * position in this table, where a single FPE_... value is stored. * This FPE_... value stored there is considered the "most important" * of the exception bits and will be sent as the signal code. The * precedence of the bits is based upon Intel Document "Numerical * Applications", Chapter "Special Computational Situations". * * The macro to choose one of these values does these steps: 1) Throw * away status word bits that cannot be masked. 2) Throw away the bits * currently masked in the control word, assuming the user isn't * interested in them anymore. 3) Reinsert status word bit 7 (stack * fault) if it is set, which cannot be masked but must be presered. * 4) Use the remaining bits to point into the trapcode table. * * The 6 maskable bits in order of their preference, as stated in the * above referenced Intel manual: * 1 Invalid operation (FP_X_INV) * 1a Stack underflow * 1b Stack overflow * 1c Operand of unsupported format * 1d SNaN operand. * 2 QNaN operand (not an exception, irrelavant here) * 3 Any other invalid-operation not mentioned above or zero divide * (FP_X_INV, FP_X_DZ) * 4 Denormal operand (FP_X_DNML) * 5 Numeric over/underflow (FP_X_OFL, FP_X_UFL) * 6 Inexact result (FP_X_IMP) */ static char fpetable[128] = { 0, FPE_FLTINV, /* 1 - INV */ FPE_FLTUND, /* 2 - DNML */ FPE_FLTINV, /* 3 - INV | DNML */ FPE_FLTDIV, /* 4 - DZ */ FPE_FLTINV, /* 5 - INV | DZ */ FPE_FLTDIV, /* 6 - DNML | DZ */ FPE_FLTINV, /* 7 - INV | DNML | DZ */ FPE_FLTOVF, /* 8 - OFL */ FPE_FLTINV, /* 9 - INV | OFL */ FPE_FLTUND, /* A - DNML | OFL */ FPE_FLTINV, /* B - INV | DNML | OFL */ FPE_FLTDIV, /* C - DZ | OFL */ FPE_FLTINV, /* D - INV | DZ | OFL */ FPE_FLTDIV, /* E - DNML | DZ | OFL */ FPE_FLTINV, /* F - INV | DNML | DZ | OFL */ FPE_FLTUND, /* 10 - UFL */ FPE_FLTINV, /* 11 - INV | UFL */ FPE_FLTUND, /* 12 - DNML | UFL */ FPE_FLTINV, /* 13 - INV | DNML | UFL */ FPE_FLTDIV, /* 14 - DZ | UFL */ FPE_FLTINV, /* 15 - INV | DZ | UFL */ FPE_FLTDIV, /* 16 - DNML | DZ | UFL */ FPE_FLTINV, /* 17 - INV | DNML | DZ | UFL */ FPE_FLTOVF, /* 18 - OFL | UFL */ FPE_FLTINV, /* 19 - INV | OFL | UFL */ FPE_FLTUND, /* 1A - DNML | OFL | UFL */ FPE_FLTINV, /* 1B - INV | DNML | OFL | UFL */ FPE_FLTDIV, /* 1C - DZ | OFL | UFL */ FPE_FLTINV, /* 1D - INV | DZ | OFL | UFL */ FPE_FLTDIV, /* 1E - DNML | DZ | OFL | UFL */ FPE_FLTINV, /* 1F - INV | DNML | DZ | OFL | UFL */ FPE_FLTRES, /* 20 - IMP */ FPE_FLTINV, /* 21 - INV | IMP */ FPE_FLTUND, /* 22 - DNML | IMP */ FPE_FLTINV, /* 23 - INV | DNML | IMP */ FPE_FLTDIV, /* 24 - DZ | IMP */ FPE_FLTINV, /* 25 - INV | DZ | IMP */ FPE_FLTDIV, /* 26 - DNML | DZ | IMP */ FPE_FLTINV, /* 27 - INV | DNML | DZ | IMP */ FPE_FLTOVF, /* 28 - OFL | IMP */ FPE_FLTINV, /* 29 - INV | OFL | IMP */ FPE_FLTUND, /* 2A - DNML | OFL | IMP */ FPE_FLTINV, /* 2B - INV | DNML | OFL | IMP */ FPE_FLTDIV, /* 2C - DZ | OFL | IMP */ FPE_FLTINV, /* 2D - INV | DZ | OFL | IMP */ FPE_FLTDIV, /* 2E - DNML | DZ | OFL | IMP */ FPE_FLTINV, /* 2F - INV | DNML | DZ | OFL | IMP */ FPE_FLTUND, /* 30 - UFL | IMP */ FPE_FLTINV, /* 31 - INV | UFL | IMP */ FPE_FLTUND, /* 32 - DNML | UFL | IMP */ FPE_FLTINV, /* 33 - INV | DNML | UFL | IMP */ FPE_FLTDIV, /* 34 - DZ | UFL | IMP */ FPE_FLTINV, /* 35 - INV | DZ | UFL | IMP */ FPE_FLTDIV, /* 36 - DNML | DZ | UFL | IMP */ FPE_FLTINV, /* 37 - INV | DNML | DZ | UFL | IMP */ FPE_FLTOVF, /* 38 - OFL | UFL | IMP */ FPE_FLTINV, /* 39 - INV | OFL | UFL | IMP */ FPE_FLTUND, /* 3A - DNML | OFL | UFL | IMP */ FPE_FLTINV, /* 3B - INV | DNML | OFL | UFL | IMP */ FPE_FLTDIV, /* 3C - DZ | OFL | UFL | IMP */ FPE_FLTINV, /* 3D - INV | DZ | OFL | UFL | IMP */ FPE_FLTDIV, /* 3E - DNML | DZ | OFL | UFL | IMP */ FPE_FLTINV, /* 3F - INV | DNML | DZ | OFL | UFL | IMP */ FPE_FLTSUB, /* 40 - STK */ FPE_FLTSUB, /* 41 - INV | STK */ FPE_FLTUND, /* 42 - DNML | STK */ FPE_FLTSUB, /* 43 - INV | DNML | STK */ FPE_FLTDIV, /* 44 - DZ | STK */ FPE_FLTSUB, /* 45 - INV | DZ | STK */ FPE_FLTDIV, /* 46 - DNML | DZ | STK */ FPE_FLTSUB, /* 47 - INV | DNML | DZ | STK */ FPE_FLTOVF, /* 48 - OFL | STK */ FPE_FLTSUB, /* 49 - INV | OFL | STK */ FPE_FLTUND, /* 4A - DNML | OFL | STK */ FPE_FLTSUB, /* 4B - INV | DNML | OFL | STK */ FPE_FLTDIV, /* 4C - DZ | OFL | STK */ FPE_FLTSUB, /* 4D - INV | DZ | OFL | STK */ FPE_FLTDIV, /* 4E - DNML | DZ | OFL | STK */ FPE_FLTSUB, /* 4F - INV | DNML | DZ | OFL | STK */ FPE_FLTUND, /* 50 - UFL | STK */ FPE_FLTSUB, /* 51 - INV | UFL | STK */ FPE_FLTUND, /* 52 - DNML | UFL | STK */ FPE_FLTSUB, /* 53 - INV | DNML | UFL | STK */ FPE_FLTDIV, /* 54 - DZ | UFL | STK */ FPE_FLTSUB, /* 55 - INV | DZ | UFL | STK */ FPE_FLTDIV, /* 56 - DNML | DZ | UFL | STK */ FPE_FLTSUB, /* 57 - INV | DNML | DZ | UFL | STK */ FPE_FLTOVF, /* 58 - OFL | UFL | STK */ FPE_FLTSUB, /* 59 - INV | OFL | UFL | STK */ FPE_FLTUND, /* 5A - DNML | OFL | UFL | STK */ FPE_FLTSUB, /* 5B - INV | DNML | OFL | UFL | STK */ FPE_FLTDIV, /* 5C - DZ | OFL | UFL | STK */ FPE_FLTSUB, /* 5D - INV | DZ | OFL | UFL | STK */ FPE_FLTDIV, /* 5E - DNML | DZ | OFL | UFL | STK */ FPE_FLTSUB, /* 5F - INV | DNML | DZ | OFL | UFL | STK */ FPE_FLTRES, /* 60 - IMP | STK */ FPE_FLTSUB, /* 61 - INV | IMP | STK */ FPE_FLTUND, /* 62 - DNML | IMP | STK */ FPE_FLTSUB, /* 63 - INV | DNML | IMP | STK */ FPE_FLTDIV, /* 64 - DZ | IMP | STK */ FPE_FLTSUB, /* 65 - INV | DZ | IMP | STK */ FPE_FLTDIV, /* 66 - DNML | DZ | IMP | STK */ FPE_FLTSUB, /* 67 - INV | DNML | DZ | IMP | STK */ FPE_FLTOVF, /* 68 - OFL | IMP | STK */ FPE_FLTSUB, /* 69 - INV | OFL | IMP | STK */ FPE_FLTUND, /* 6A - DNML | OFL | IMP | STK */ FPE_FLTSUB, /* 6B - INV | DNML | OFL | IMP | STK */ FPE_FLTDIV, /* 6C - DZ | OFL | IMP | STK */ FPE_FLTSUB, /* 6D - INV | DZ | OFL | IMP | STK */ FPE_FLTDIV, /* 6E - DNML | DZ | OFL | IMP | STK */ FPE_FLTSUB, /* 6F - INV | DNML | DZ | OFL | IMP | STK */ FPE_FLTUND, /* 70 - UFL | IMP | STK */ FPE_FLTSUB, /* 71 - INV | UFL | IMP | STK */ FPE_FLTUND, /* 72 - DNML | UFL | IMP | STK */ FPE_FLTSUB, /* 73 - INV | DNML | UFL | IMP | STK */ FPE_FLTDIV, /* 74 - DZ | UFL | IMP | STK */ FPE_FLTSUB, /* 75 - INV | DZ | UFL | IMP | STK */ FPE_FLTDIV, /* 76 - DNML | DZ | UFL | IMP | STK */ FPE_FLTSUB, /* 77 - INV | DNML | DZ | UFL | IMP | STK */ FPE_FLTOVF, /* 78 - OFL | UFL | IMP | STK */ FPE_FLTSUB, /* 79 - INV | OFL | UFL | IMP | STK */ FPE_FLTUND, /* 7A - DNML | OFL | UFL | IMP | STK */ FPE_FLTSUB, /* 7B - INV | DNML | OFL | UFL | IMP | STK */ FPE_FLTDIV, /* 7C - DZ | OFL | UFL | IMP | STK */ FPE_FLTSUB, /* 7D - INV | DZ | OFL | UFL | IMP | STK */ FPE_FLTDIV, /* 7E - DNML | DZ | OFL | UFL | IMP | STK */ FPE_FLTSUB, /* 7F - INV | DNML | DZ | OFL | UFL | IMP | STK */ }; /* * Read the FP status and control words, then generate si_code value * for SIGFPE. The error code chosen will be one of the * FPE_... macros. It will be sent as the second argument to old * BSD-style signal handlers and as "siginfo_t->si_code" (second * argument) to SA_SIGINFO signal handlers. * * Some time ago, we cleared the x87 exceptions with FNCLEX there. * Clearing exceptions was necessary mainly to avoid IRQ13 bugs. The * usermode code which understands the FPU hardware enough to enable * the exceptions, can also handle clearing the exception state in the * handler. The only consequence of not clearing the exception is the * rethrow of the SIGFPE on return from the signal handler and * reexecution of the corresponding instruction. * * For XMM traps, the exceptions were never cleared. */ int npxtrap_x87(void) { u_short control, status; if (!hw_float) { printf( "npxtrap_x87: fpcurthread = %p, curthread = %p, hw_float = %d\n", PCPU_GET(fpcurthread), curthread, hw_float); panic("npxtrap from nowhere"); } critical_enter(); /* * Interrupt handling (for another interrupt) may have pushed the * state to memory. Fetch the relevant parts of the state from * wherever they are. */ if (PCPU_GET(fpcurthread) != curthread) { control = GET_FPU_CW(curthread); status = GET_FPU_SW(curthread); } else { fnstcw(&control); fnstsw(&status); } critical_exit(); return (fpetable[status & ((~control & 0x3f) | 0x40)]); } int npxtrap_sse(void) { u_int mxcsr; if (!hw_float) { printf( "npxtrap_sse: fpcurthread = %p, curthread = %p, hw_float = %d\n", PCPU_GET(fpcurthread), curthread, hw_float); panic("npxtrap from nowhere"); } critical_enter(); if (PCPU_GET(fpcurthread) != curthread) mxcsr = curthread->td_pcb->pcb_save->sv_xmm.sv_env.en_mxcsr; else stmxcsr(&mxcsr); critical_exit(); return (fpetable[(mxcsr & (~mxcsr >> 7)) & 0x3f]); } /* * Implement device not available (DNA) exception * * It would be better to switch FP context here (if curthread != fpcurthread) * and not necessarily for every context switch, but it is too hard to * access foreign pcb's. */ static int err_count = 0; int npxdna(void) { if (!hw_float) return (0); critical_enter(); if (PCPU_GET(fpcurthread) == curthread) { printf("npxdna: fpcurthread == curthread %d times\n", ++err_count); stop_emulating(); critical_exit(); return (1); } if (PCPU_GET(fpcurthread) != NULL) { printf("npxdna: fpcurthread = %p (%d), curthread = %p (%d)\n", PCPU_GET(fpcurthread), PCPU_GET(fpcurthread)->td_proc->p_pid, curthread, curthread->td_proc->p_pid); panic("npxdna"); } stop_emulating(); /* * Record new context early in case frstor causes a trap. */ PCPU_SET(fpcurthread, curthread); if (cpu_fxsr) fpu_clean_state(); if ((curpcb->pcb_flags & PCB_NPXINITDONE) == 0) { /* * This is the first time this thread has used the FPU or * the PCB doesn't contain a clean FPU state. Explicitly * load an initial state. * * We prefer to restore the state from the actual save * area in PCB instead of directly loading from * npx_initialstate, to ignite the XSAVEOPT * tracking engine. */ bcopy(npx_initialstate, curpcb->pcb_save, cpu_max_ext_state_size); fpurstor(curpcb->pcb_save); if (curpcb->pcb_initial_npxcw != __INITIAL_NPXCW__) fldcw(curpcb->pcb_initial_npxcw); curpcb->pcb_flags |= PCB_NPXINITDONE; if (PCB_USER_FPU(curpcb)) curpcb->pcb_flags |= PCB_NPXUSERINITDONE; } else { fpurstor(curpcb->pcb_save); } critical_exit(); return (1); } /* * Wrapper for fpusave() called from context switch routines. * * npxsave() must be called with interrupts disabled, so that it clears * fpcurthread atomically with saving the state. We require callers to do the * disabling, since most callers need to disable interrupts anyway to call * npxsave() atomically with checking fpcurthread. */ void npxsave(addr) union savefpu *addr; { stop_emulating(); if (use_xsaveopt) xsaveopt((char *)addr, xsave_mask); else fpusave(addr); start_emulating(); PCPU_SET(fpcurthread, NULL); } /* * Unconditionally save the current co-processor state across suspend and * resume. */ void npxsuspend(union savefpu *addr) { register_t cr0; if (!hw_float) return; if (PCPU_GET(fpcurthread) == NULL) { bcopy(npx_initialstate, addr, cpu_max_ext_state_size); return; } cr0 = rcr0(); stop_emulating(); fpusave(addr); load_cr0(cr0); } void npxresume(union savefpu *addr) { register_t cr0; if (!hw_float) return; cr0 = rcr0(); npxinit(false); stop_emulating(); fpurstor(addr); load_cr0(cr0); } void npxdrop(void) { struct thread *td; /* * Discard pending exceptions in the !cpu_fxsr case so that unmasked * ones don't cause a panic on the next frstor. */ if (!cpu_fxsr) fnclex(); td = PCPU_GET(fpcurthread); KASSERT(td == curthread, ("fpudrop: fpcurthread != curthread")); CRITICAL_ASSERT(td); PCPU_SET(fpcurthread, NULL); td->td_pcb->pcb_flags &= ~PCB_NPXINITDONE; start_emulating(); } /* * Get the user state of the FPU into pcb->pcb_user_save without * dropping ownership (if possible). It returns the FPU ownership * status. */ int npxgetregs(struct thread *td) { struct pcb *pcb; uint64_t *xstate_bv, bit; char *sa; int max_ext_n, i; int owned; if (!hw_float) return (_MC_FPOWNED_NONE); pcb = td->td_pcb; if ((pcb->pcb_flags & PCB_NPXINITDONE) == 0) { bcopy(npx_initialstate, get_pcb_user_save_pcb(pcb), cpu_max_ext_state_size); SET_FPU_CW(get_pcb_user_save_pcb(pcb), pcb->pcb_initial_npxcw); npxuserinited(td); return (_MC_FPOWNED_PCB); } critical_enter(); if (td == PCPU_GET(fpcurthread)) { fpusave(get_pcb_user_save_pcb(pcb)); if (!cpu_fxsr) /* * fnsave initializes the FPU and destroys whatever * context it contains. Make sure the FPU owner * starts with a clean state next time. */ npxdrop(); owned = _MC_FPOWNED_FPU; } else { owned = _MC_FPOWNED_PCB; } critical_exit(); if (use_xsave) { /* * Handle partially saved state. */ sa = (char *)get_pcb_user_save_pcb(pcb); xstate_bv = (uint64_t *)(sa + sizeof(union savefpu) + offsetof(struct xstate_hdr, xstate_bv)); if (xsave_mask >> 32 != 0) max_ext_n = fls(xsave_mask >> 32) + 32; else max_ext_n = fls(xsave_mask); for (i = 0; i < max_ext_n; i++) { bit = 1ULL << i; if ((xsave_mask & bit) == 0 || (*xstate_bv & bit) != 0) continue; bcopy((char *)npx_initialstate + xsave_area_desc[i].offset, sa + xsave_area_desc[i].offset, xsave_area_desc[i].size); *xstate_bv |= bit; } } return (owned); } void npxuserinited(struct thread *td) { struct pcb *pcb; pcb = td->td_pcb; if (PCB_USER_FPU(pcb)) pcb->pcb_flags |= PCB_NPXINITDONE; pcb->pcb_flags |= PCB_NPXUSERINITDONE; } int npxsetxstate(struct thread *td, char *xfpustate, size_t xfpustate_size) { struct xstate_hdr *hdr, *ehdr; size_t len, max_len; uint64_t bv; /* XXXKIB should we clear all extended state in xstate_bv instead ? */ if (xfpustate == NULL) return (0); if (!use_xsave) return (EOPNOTSUPP); len = xfpustate_size; if (len < sizeof(struct xstate_hdr)) return (EINVAL); max_len = cpu_max_ext_state_size - sizeof(union savefpu); if (len > max_len) return (EINVAL); ehdr = (struct xstate_hdr *)xfpustate; bv = ehdr->xstate_bv; /* * Avoid #gp. */ if (bv & ~xsave_mask) return (EINVAL); hdr = (struct xstate_hdr *)(get_pcb_user_save_td(td) + 1); hdr->xstate_bv = bv; bcopy(xfpustate + sizeof(struct xstate_hdr), (char *)(hdr + 1), len - sizeof(struct xstate_hdr)); return (0); } int npxsetregs(struct thread *td, union savefpu *addr, char *xfpustate, size_t xfpustate_size) { struct pcb *pcb; int error; if (!hw_float) return (ENXIO); if (cpu_fxsr) addr->sv_xmm.sv_env.en_mxcsr &= cpu_mxcsr_mask; pcb = td->td_pcb; critical_enter(); if (td == PCPU_GET(fpcurthread) && PCB_USER_FPU(pcb)) { error = npxsetxstate(td, xfpustate, xfpustate_size); if (error != 0) { critical_exit(); return (error); } if (!cpu_fxsr) fnclex(); /* As in npxdrop(). */ bcopy(addr, get_pcb_user_save_td(td), sizeof(*addr)); fpurstor(get_pcb_user_save_td(td)); critical_exit(); pcb->pcb_flags |= PCB_NPXUSERINITDONE | PCB_NPXINITDONE; } else { critical_exit(); error = npxsetxstate(td, xfpustate, xfpustate_size); if (error != 0) return (error); bcopy(addr, get_pcb_user_save_td(td), sizeof(*addr)); npxuserinited(td); } return (0); } static void fpusave(addr) union savefpu *addr; { if (use_xsave) xsave((char *)addr, xsave_mask); else if (cpu_fxsr) fxsave(addr); else fnsave(addr); } static void npx_fill_fpregs_xmm1(struct savexmm *sv_xmm, struct save87 *sv_87) { struct env87 *penv_87; struct envxmm *penv_xmm; int i; penv_87 = &sv_87->sv_env; penv_xmm = &sv_xmm->sv_env; /* FPU control/status */ penv_87->en_cw = penv_xmm->en_cw; penv_87->en_sw = penv_xmm->en_sw; penv_87->en_fip = penv_xmm->en_fip; penv_87->en_fcs = penv_xmm->en_fcs; penv_87->en_opcode = penv_xmm->en_opcode; penv_87->en_foo = penv_xmm->en_foo; penv_87->en_fos = penv_xmm->en_fos; /* FPU registers and tags */ penv_87->en_tw = 0xffff; for (i = 0; i < 8; ++i) { sv_87->sv_ac[i] = sv_xmm->sv_fp[i].fp_acc; if ((penv_xmm->en_tw & (1 << i)) != 0) /* zero and special are set as valid */ penv_87->en_tw &= ~(3 << i * 2); } } void npx_fill_fpregs_xmm(struct savexmm *sv_xmm, struct save87 *sv_87) { bzero(sv_87, sizeof(*sv_87)); npx_fill_fpregs_xmm1(sv_xmm, sv_87); } void npx_set_fpregs_xmm(struct save87 *sv_87, struct savexmm *sv_xmm) { struct env87 *penv_87; struct envxmm *penv_xmm; int i; penv_87 = &sv_87->sv_env; penv_xmm = &sv_xmm->sv_env; /* FPU control/status */ penv_xmm->en_cw = penv_87->en_cw; penv_xmm->en_sw = penv_87->en_sw; penv_xmm->en_fip = penv_87->en_fip; penv_xmm->en_fcs = penv_87->en_fcs; penv_xmm->en_opcode = penv_87->en_opcode; penv_xmm->en_foo = penv_87->en_foo; penv_xmm->en_fos = penv_87->en_fos; /* * FPU registers and tags. * Abridged / Full translation (values in binary), see FXSAVE spec. * 0 11 * 1 00, 01, 10 */ penv_xmm->en_tw = 0; for (i = 0; i < 8; ++i) { sv_xmm->sv_fp[i].fp_acc = sv_87->sv_ac[i]; if ((penv_87->en_tw & (3 << i * 2)) != (3 << i * 2)) penv_xmm->en_tw |= 1 << i; } } void npx_get_fsave(void *addr) { struct thread *td; union savefpu *sv; td = curthread; npxgetregs(td); sv = get_pcb_user_save_td(td); if (cpu_fxsr) npx_fill_fpregs_xmm1(&sv->sv_xmm, addr); else bcopy(sv, addr, sizeof(struct env87) + sizeof(struct fpacc87[8])); } int npx_set_fsave(void *addr) { union savefpu sv; int error; bzero(&sv, sizeof(sv)); if (cpu_fxsr) npx_set_fpregs_xmm(addr, &sv.sv_xmm); else bcopy(addr, &sv, sizeof(struct env87) + sizeof(struct fpacc87[8])); error = npxsetregs(curthread, &sv, NULL, 0); return (error); } /* * On AuthenticAMD processors, the fxrstor instruction does not restore * the x87's stored last instruction pointer, last data pointer, and last * opcode values, except in the rare case in which the exception summary * (ES) bit in the x87 status word is set to 1. * * In order to avoid leaking this information across processes, we clean * these values by performing a dummy load before executing fxrstor(). */ static void fpu_clean_state(void) { static float dummy_variable = 0.0; u_short status; /* * Clear the ES bit in the x87 status word if it is currently * set, in order to avoid causing a fault in the upcoming load. */ fnstsw(&status); if (status & 0x80) fnclex(); /* * Load the dummy variable into the x87 stack. This mangles * the x87 stack, but we don't care since we're about to call * fxrstor() anyway. */ __asm __volatile("ffree %%st(7); flds %0" : : "m" (dummy_variable)); } static void fpurstor(union savefpu *addr) { if (use_xsave) xrstor((char *)addr, xsave_mask); else if (cpu_fxsr) fxrstor(addr); else frstor(addr); } #ifdef DEV_ISA /* * This sucks up the legacy ISA support assignments from PNPBIOS/ACPI. */ static struct isa_pnp_id npxisa_ids[] = { { 0x040cd041, "Legacy ISA coprocessor support" }, /* PNP0C04 */ { 0 } }; static int npxisa_probe(device_t dev) { int result; if ((result = ISA_PNP_PROBE(device_get_parent(dev), dev, npxisa_ids)) <= 0) { device_quiet(dev); } return(result); } static int npxisa_attach(device_t dev) { return (0); } static device_method_t npxisa_methods[] = { /* Device interface */ DEVMETHOD(device_probe, npxisa_probe), DEVMETHOD(device_attach, npxisa_attach), DEVMETHOD(device_detach, bus_generic_detach), DEVMETHOD(device_shutdown, bus_generic_shutdown), DEVMETHOD(device_suspend, bus_generic_suspend), DEVMETHOD(device_resume, bus_generic_resume), { 0, 0 } }; static driver_t npxisa_driver = { "npxisa", npxisa_methods, 1, /* no softc */ }; static devclass_t npxisa_devclass; DRIVER_MODULE(npxisa, isa, npxisa_driver, npxisa_devclass, 0, 0); DRIVER_MODULE(npxisa, acpi, npxisa_driver, npxisa_devclass, 0, 0); ISA_PNP_INFO(npxisa_ids); #endif /* DEV_ISA */ static MALLOC_DEFINE(M_FPUKERN_CTX, "fpukern_ctx", "Kernel contexts for FPU state"); #define FPU_KERN_CTX_NPXINITDONE 0x01 #define FPU_KERN_CTX_DUMMY 0x02 #define FPU_KERN_CTX_INUSE 0x04 struct fpu_kern_ctx { union savefpu *prev; uint32_t flags; char hwstate1[]; }; struct fpu_kern_ctx * fpu_kern_alloc_ctx(u_int flags) { struct fpu_kern_ctx *res; size_t sz; sz = sizeof(struct fpu_kern_ctx) + XSAVE_AREA_ALIGN + cpu_max_ext_state_size; res = malloc(sz, M_FPUKERN_CTX, ((flags & FPU_KERN_NOWAIT) ? M_NOWAIT : M_WAITOK) | M_ZERO); return (res); } void fpu_kern_free_ctx(struct fpu_kern_ctx *ctx) { KASSERT((ctx->flags & FPU_KERN_CTX_INUSE) == 0, ("free'ing inuse ctx")); /* XXXKIB clear the memory ? */ free(ctx, M_FPUKERN_CTX); } static union savefpu * fpu_kern_ctx_savefpu(struct fpu_kern_ctx *ctx) { vm_offset_t p; p = (vm_offset_t)&ctx->hwstate1; p = roundup2(p, XSAVE_AREA_ALIGN); return ((union savefpu *)p); } -int +void fpu_kern_enter(struct thread *td, struct fpu_kern_ctx *ctx, u_int flags) { struct pcb *pcb; KASSERT((ctx->flags & FPU_KERN_CTX_INUSE) == 0, ("using inuse ctx")); if ((flags & FPU_KERN_KTHR) != 0 && is_fpu_kern_thread(0)) { ctx->flags = FPU_KERN_CTX_DUMMY | FPU_KERN_CTX_INUSE; - return (0); + return; } pcb = td->td_pcb; KASSERT(!PCB_USER_FPU(pcb) || pcb->pcb_save == get_pcb_user_save_pcb(pcb), ("mangled pcb_save")); ctx->flags = FPU_KERN_CTX_INUSE; if ((pcb->pcb_flags & PCB_NPXINITDONE) != 0) ctx->flags |= FPU_KERN_CTX_NPXINITDONE; npxexit(td); ctx->prev = pcb->pcb_save; pcb->pcb_save = fpu_kern_ctx_savefpu(ctx); pcb->pcb_flags |= PCB_KERNNPX; pcb->pcb_flags &= ~PCB_NPXINITDONE; - return (0); + return; } int fpu_kern_leave(struct thread *td, struct fpu_kern_ctx *ctx) { struct pcb *pcb; KASSERT((ctx->flags & FPU_KERN_CTX_INUSE) != 0, ("leaving not inuse ctx")); ctx->flags &= ~FPU_KERN_CTX_INUSE; if (is_fpu_kern_thread(0) && (ctx->flags & FPU_KERN_CTX_DUMMY) != 0) return (0); pcb = td->td_pcb; critical_enter(); if (curthread == PCPU_GET(fpcurthread)) npxdrop(); critical_exit(); pcb->pcb_save = ctx->prev; if (pcb->pcb_save == get_pcb_user_save_pcb(pcb)) { if ((pcb->pcb_flags & PCB_NPXUSERINITDONE) != 0) pcb->pcb_flags |= PCB_NPXINITDONE; else pcb->pcb_flags &= ~PCB_NPXINITDONE; pcb->pcb_flags &= ~PCB_KERNNPX; } else { if ((ctx->flags & FPU_KERN_CTX_NPXINITDONE) != 0) pcb->pcb_flags |= PCB_NPXINITDONE; else pcb->pcb_flags &= ~PCB_NPXINITDONE; KASSERT(!PCB_USER_FPU(pcb), ("unpaired fpu_kern_leave")); } return (0); } int fpu_kern_thread(u_int flags) { KASSERT((curthread->td_pflags & TDP_KTHREAD) != 0, ("Only kthread may use fpu_kern_thread")); KASSERT(curpcb->pcb_save == get_pcb_user_save_pcb(curpcb), ("mangled pcb_save")); KASSERT(PCB_USER_FPU(curpcb), ("recursive call")); curpcb->pcb_flags |= PCB_KERNNPX; return (0); } int is_fpu_kern_thread(u_int flags) { if ((curthread->td_pflags & TDP_KTHREAD) == 0) return (0); return ((curpcb->pcb_flags & PCB_KERNNPX) != 0); } /* * FPU save area alloc/free/init utility routines */ union savefpu * fpu_save_area_alloc(void) { return (uma_zalloc(fpu_save_area_zone, 0)); } void fpu_save_area_free(union savefpu *fsa) { uma_zfree(fpu_save_area_zone, fsa); } void fpu_save_area_reset(union savefpu *fsa) { bcopy(npx_initialstate, fsa, cpu_max_ext_state_size); } Index: head/sys/i386/include/npx.h =================================================================== --- head/sys/i386/include/npx.h (revision 329877) +++ head/sys/i386/include/npx.h (revision 329878) @@ -1,98 +1,98 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1990 The Regents of the University of California. * All rights reserved. * * This code is derived from software contributed to Berkeley by * William Jolitz. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: @(#)npx.h 5.3 (Berkeley) 1/18/91 * $FreeBSD$ */ /* * 287/387 NPX Coprocessor Data Structures and Constants * W. Jolitz 1/90 */ #ifndef _MACHINE_NPX_H_ #define _MACHINE_NPX_H_ #include #ifdef _KERNEL struct fpu_kern_ctx; #define PCB_USER_FPU(pcb) (((pcb)->pcb_flags & PCB_KERNNPX) == 0) #define XSAVE_AREA_ALIGN 64 int npxdna(void); void npxdrop(void); void npxexit(struct thread *td); int npxformat(void); int npxgetregs(struct thread *td); void npxinit(bool bsp); void npxresume(union savefpu *addr); void npxsave(union savefpu *addr); int npxsetregs(struct thread *td, union savefpu *addr, char *xfpustate, size_t xfpustate_size); int npxsetxstate(struct thread *td, char *xfpustate, size_t xfpustate_size); void npxsuspend(union savefpu *addr); int npxtrap_x87(void); int npxtrap_sse(void); void npxuserinited(struct thread *); void npx_get_fsave(void *); int npx_set_fsave(void *); void npx_fill_fpregs_xmm(struct savexmm *, struct save87 *); void npx_set_fpregs_xmm(struct save87 *, struct savexmm *); struct fpu_kern_ctx *fpu_kern_alloc_ctx(u_int flags); void fpu_kern_free_ctx(struct fpu_kern_ctx *ctx); -int fpu_kern_enter(struct thread *td, struct fpu_kern_ctx *ctx, +void fpu_kern_enter(struct thread *td, struct fpu_kern_ctx *ctx, u_int flags); int fpu_kern_leave(struct thread *td, struct fpu_kern_ctx *ctx); int fpu_kern_thread(u_int flags); int is_fpu_kern_thread(u_int flags); union savefpu *fpu_save_area_alloc(void); void fpu_save_area_free(union savefpu *fsa); void fpu_save_area_reset(union savefpu *fsa); /* * Flags for fpu_kern_alloc_ctx(), fpu_kern_enter() and fpu_kern_thread(). */ #define FPU_KERN_NORMAL 0x0000 #define FPU_KERN_NOWAIT 0x0001 #define FPU_KERN_KTHR 0x0002 #endif #endif /* !_MACHINE_NPX_H_ */