Index: head/sys/arm/arm/vfp.c =================================================================== --- head/sys/arm/arm/vfp.c (revision 290242) +++ head/sys/arm/arm/vfp.c (revision 290243) @@ -1,312 +1,312 @@ /*- * Copyright (c) 2014 Ian Lepore * Copyright (c) 2012 Mark Tinguely * * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #ifdef VFP #include #include #include #include #include #include #include #include #include #include #include /* function prototypes */ static int vfp_bounce(u_int, u_int, struct trapframe *, int); static void vfp_restore(struct vfp_state *); extern int vfp_exists; static struct undefined_handler vfp10_uh, vfp11_uh; /* If true the VFP unit has 32 double registers, otherwise it has 16 */ static int is_d32; /* * About .fpu directives in this file... * * We should need simply .fpu vfpv3, but clang 3.5 has a quirk where setting * vfpv3 doesn't imply that vfp2 features are also available -- both have to be * explicitly set to get all the features of both. This is probably a bug in * clang, so it may get fixed and require changes here some day. Other changes * are probably coming in clang too, because there is email and open PRs * indicating they want to completely disable the ability to use .fpu and * similar directives in inline asm. That would be catastrophic for us, * hopefully they come to their senses. There was also some discusion of a new * syntax such as .push fpu=vfpv3; ...; .pop fpu; and that would be ideal for * us, better than what we have now really. * * For gcc, each .fpu directive completely overrides the prior directive, unlike * with clang, but luckily on gcc saying v3 implies all the v2 features as well. */ #define fmxr(reg, val) \ __asm __volatile(" .fpu vfpv2\n .fpu vfpv3\n" \ " vmsr " __STRING(reg) ", %0" :: "r"(val)); #define fmrx(reg) \ ({ u_int val = 0;\ __asm __volatile(" .fpu vfpv2\n .fpu vfpv3\n" \ " vmrs %0, " __STRING(reg) : "=r"(val)); \ val; \ }) static u_int get_coprocessorACR(void) { u_int val; __asm __volatile("mrc p15, 0, %0, c1, c0, 2" : "=r" (val) : : "cc"); return val; } static void set_coprocessorACR(u_int val) { __asm __volatile("mcr p15, 0, %0, c1, c0, 2\n\t" : : "r" (val) : "cc"); isb(); } /* called for each cpu */ void vfp_init(void) { u_int fpsid, fpexc, tmp; u_int coproc, vfp_arch; coproc = get_coprocessorACR(); coproc |= COPROC10 | COPROC11; set_coprocessorACR(coproc); fpsid = fmrx(fpsid); /* read the vfp system id */ fpexc = fmrx(fpexc); /* read the vfp exception reg */ if (!(fpsid & VFPSID_HARDSOFT_IMP)) { vfp_exists = 1; is_d32 = 0; PCPU_SET(vfpsid, fpsid); /* save the fpsid */ vfp_arch = (fpsid & VFPSID_SUBVERSION2_MASK) >> VFPSID_SUBVERSION_OFF; if (vfp_arch >= VFP_ARCH3) { tmp = fmrx(mvfr0); PCPU_SET(vfpmvfr0, tmp); if ((tmp & VMVFR0_RB_MASK) == 2) is_d32 = 1; tmp = fmrx(mvfr1); PCPU_SET(vfpmvfr1, tmp); if (PCPU_GET(cpuid) == 0) { if ((tmp & VMVFR1_FZ_MASK) == 0x1) { /* Denormals arithmetic support */ initial_fpscr &= ~VFPSCR_FZ; thread0.td_pcb->pcb_vfpstate.fpscr = initial_fpscr; } } } /* initialize the coprocess 10 and 11 calls * These are called to restore the registers and enable * the VFP hardware. */ if (vfp10_uh.uh_handler == NULL) { vfp10_uh.uh_handler = vfp_bounce; vfp11_uh.uh_handler = vfp_bounce; install_coproc_handler_static(10, &vfp10_uh); install_coproc_handler_static(11, &vfp11_uh); } } } SYSINIT(vfp, SI_SUB_CPU, SI_ORDER_ANY, vfp_init, NULL); /* start VFP unit, restore the vfp registers from the PCB and retry * the instruction */ static int vfp_bounce(u_int addr, u_int insn, struct trapframe *frame, int code) { u_int cpu, fpexc; struct pcb *curpcb; ksiginfo_t ksi; if ((code & FAULT_USER) == 0) panic("undefined floating point instruction in supervisor mode"); critical_enter(); /* * If the VFP is already on and we got an undefined instruction, then * something tried to executate a truly invalid instruction that maps to * the VFP. */ fpexc = fmrx(fpexc); if (fpexc & VFPEXC_EN) { /* Clear any exceptions */ - fmxr(fpexc, fpexc & ~(VFPEXC_EX | VFPEXC_FP2V)); + fmxr(fpexc, fpexc & ~(VFPEXC_EX | VFPEXC_DEX | VFPEXC_FP2V)); /* kill the process - we do not handle emulation */ critical_exit(); - if (fpexc & VFPEXC_EX) { + if (fpexc & (VFPEXC_EX | VFPEXC_DEX)) { /* We have an exception, signal a SIGFPE */ ksiginfo_init_trap(&ksi); ksi.ksi_signo = SIGFPE; if (fpexc & VFPEXC_UFC) ksi.ksi_code = FPE_FLTUND; else if (fpexc & VFPEXC_OFC) ksi.ksi_code = FPE_FLTOVF; else if (fpexc & VFPEXC_IOC) ksi.ksi_code = FPE_FLTINV; ksi.ksi_addr = (void *)addr; trapsignal(curthread, &ksi); return 0; } return 1; } /* * If the last time this thread used the VFP it was on this core, and * the last thread to use the VFP on this core was this thread, then the * VFP state is valid, otherwise restore this thread's state to the VFP. */ fmxr(fpexc, fpexc | VFPEXC_EN); curpcb = curthread->td_pcb; cpu = PCPU_GET(cpuid); if (curpcb->pcb_vfpcpu != cpu || curthread != PCPU_GET(fpcurthread)) { vfp_restore(&curpcb->pcb_vfpstate); curpcb->pcb_vfpcpu = cpu; PCPU_SET(fpcurthread, curthread); } critical_exit(); return (0); } /* * Restore the given state to the VFP hardware. */ static void vfp_restore(struct vfp_state *vfpsave) { uint32_t fpexc; /* On vfpv3 we may need to restore FPINST and FPINST2 */ fpexc = vfpsave->fpexec; if (fpexc & VFPEXC_EX) { fmxr(fpinst, vfpsave->fpinst); if (fpexc & VFPEXC_FP2V) fmxr(fpinst2, vfpsave->fpinst2); } fmxr(fpscr, vfpsave->fpscr); __asm __volatile( " .fpu vfpv2\n" " .fpu vfpv3\n" " vldmia %0!, {d0-d15}\n" /* d0-d15 */ " cmp %1, #0\n" /* -D16 or -D32? */ " vldmiane %0!, {d16-d31}\n" /* d16-d31 */ " addeq %0, %0, #128\n" /* skip missing regs */ : "+&r" (vfpsave) : "r" (is_d32) : "cc" ); fmxr(fpexc, fpexc); } /* * If the VFP is on, save its current state and turn it off if requested to do * so. If the VFP is not on, does not change the values at *vfpsave. Caller is * responsible for preventing a context switch while this is running. */ void vfp_store(struct vfp_state *vfpsave, boolean_t disable_vfp) { uint32_t fpexc; fpexc = fmrx(fpexc); /* Is the vfp enabled? */ if (fpexc & VFPEXC_EN) { vfpsave->fpexec = fpexc; vfpsave->fpscr = fmrx(fpscr); /* On vfpv3 we may need to save FPINST and FPINST2 */ if (fpexc & VFPEXC_EX) { vfpsave->fpinst = fmrx(fpinst); if (fpexc & VFPEXC_FP2V) vfpsave->fpinst2 = fmrx(fpinst2); fpexc &= ~VFPEXC_EX; } __asm __volatile( " .fpu vfpv2\n" " .fpu vfpv3\n" " vstmia %0!, {d0-d15}\n" /* d0-d15 */ " cmp %1, #0\n" /* -D16 or -D32? */ " vstmiane r0!, {d16-d31}\n" /* d16-d31 */ " addeq %0, %0, #128\n" /* skip missing regs */ : "+&r" (vfpsave) : "r" (is_d32) : "cc" ); if (disable_vfp) fmxr(fpexc , fpexc & ~VFPEXC_EN); } } /* * The current thread is dying. If the state currently in the hardware belongs * to the current thread, set fpcurthread to NULL to indicate that the VFP * hardware state does not belong to any thread. If the VFP is on, turn it off. * Called only from cpu_throw(), so we don't have to worry about a context * switch here. */ void vfp_discard(struct thread *td) { u_int tmp; if (PCPU_GET(fpcurthread) == td) PCPU_SET(fpcurthread, NULL); tmp = fmrx(fpexc); if (tmp & VFPEXC_EN) fmxr(fpexc, tmp & ~VFPEXC_EN); } #endif Index: head/sys/arm/include/vfp.h =================================================================== --- head/sys/arm/include/vfp.h (revision 290242) +++ head/sys/arm/include/vfp.h (revision 290243) @@ -1,140 +1,141 @@ /* * Copyright (c) 2012 Mark Tinguely * * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * $FreeBSD$ */ #ifndef _MACHINE__VFP_H_ #define _MACHINE__VFP_H_ /* fpsid, fpscr, fpexc are defined in the newer gas */ #define VFPSID cr0 #define VFPSCR cr1 #define VMVFR1 cr6 #define VMVFR0 cr7 #define VFPEXC cr8 #define VFPINST cr9 /* vfp 1 and 2 except instruction */ #define VFPINST2 cr10 /* vfp 2? */ /* VFPSID */ #define VFPSID_IMPLEMENTOR_OFF 24 #define VFPSID_IMPLEMENTOR_MASK (0xff000000) #define VFPSID_HARDSOFT_IMP (0x00800000) #define VFPSID_SINGLE_PREC 20 /* version 1 and 2 */ #define VFPSID_SUBVERSION_OFF 16 #define VFPSID_SUBVERSION2_MASK (0x000f0000) /* version 1 and 2 */ #define VFPSID_SUBVERSION3_MASK (0x007f0000) /* version 3 */ #define VFP_ARCH1 0x0 #define VFP_ARCH2 0x1 #define VFP_ARCH3 0x2 #define VFPSID_PARTNUMBER_OFF 8 #define VFPSID_PARTNUMBER_MASK (0x0000ff00) #define VFPSID_VARIANT_OFF 4 #define VFPSID_VARIANT_MASK (0x000000f0) #define VFPSID_REVISION_MASK 0x0f /* VFPSCR */ #define VFPSCR_CC_N (0x80000000) /* comparison less than */ #define VFPSCR_CC_Z (0x40000000) /* comparison equal */ #define VFPSCR_CC_C (0x20000000) /* comparison = > unordered */ #define VFPSCR_CC_V (0x10000000) /* comparison unordered */ #define VFPSCR_QC (0x08000000) /* saturation cululative */ #define VFPSCR_DN (0x02000000) /* default NaN enable */ #define VFPSCR_FZ (0x01000000) /* flush to zero enabled */ #define VFPSCR_RMODE_OFF 22 /* rounding mode offset */ #define VFPSCR_RMODE_MASK (0x00c00000) /* rounding mode mask */ #define VFPSCR_RMODE_RN (0x00000000) /* round nearest */ #define VFPSCR_RMODE_RPI (0x00400000) /* round to plus infinity */ #define VFPSCR_RMODE_RNI (0x00800000) /* round to neg infinity */ #define VFPSCR_RMODE_RM (0x00c00000) /* round to zero */ #define VFPSCR_STRIDE_OFF 20 /* vector stride -1 */ #define VFPSCR_STRIDE_MASK (0x00300000) #define VFPSCR_LEN_OFF 16 /* vector length -1 */ #define VFPSCR_LEN_MASK (0x00070000) #define VFPSCR_IDE (0x00008000) /* input subnormal exc enable */ #define VFPSCR_IXE (0x00001000) /* inexact exception enable */ #define VFPSCR_UFE (0x00000800) /* underflow exception enable */ #define VFPSCR_OFE (0x00000400) /* overflow exception enable */ #define VFPSCR_DNZ (0x00000200) /* div by zero exception en */ #define VFPSCR_IOE (0x00000100) /* invalid op exec enable */ #define VFPSCR_IDC (0x00000080) /* input subnormal cumul */ #define VFPSCR_IXC (0x00000010) /* Inexact cumulative flag */ #define VFPSCR_UFC (0x00000008) /* underflow cumulative flag */ #define VFPSCR_OFC (0x00000004) /* overflow cumulative flag */ #define VFPSCR_DZC (0x00000002) /* division by zero flag */ #define VFPSCR_IOC (0x00000001) /* invalid operation cumul */ /* VFPEXC */ #define VFPEXC_EX (0x80000000) /* exception v1 v2 */ #define VFPEXC_EN (0x40000000) /* vfp enable */ +#define VFPEXC_DEX (0x20000000) /* Synchronous exception */ #define VFPEXC_FP2V (0x10000000) /* FPINST2 valid */ #define VFPEXC_INV (0x00000080) /* Input exception */ #define VFPEXC_UFC (0x00000008) /* Underflow exception */ #define VFPEXC_OFC (0x00000004) /* Overflow exception */ #define VFPEXC_IOC (0x00000001) /* Invlaid operation */ /* version 3 registers */ /* VMVFR0 */ #define VMVFR0_RM_OFF 28 #define VMVFR0_RM_MASK (0xf0000000) /* VFP rounding modes */ #define VMVFR0_SV_OFF 24 #define VMVFR0_SV_MASK (0x0f000000) /* VFP short vector supp */ #define VMVFR0_SR_OFF 20 #define VMVFR0_SR (0x00f00000) /* VFP hw sqrt supp */ #define VMVFR0_D_OFF 16 #define VMVFR0_D_MASK (0x000f0000) /* VFP divide supp */ #define VMVFR0_TE_OFF 12 #define VMVFR0_TE_MASK (0x0000f000) /* VFP trap exception supp */ #define VMVFR0_DP_OFF 8 #define VMVFR0_DP_MASK (0x00000f00) /* VFP double prec support */ #define VMVFR0_SP_OFF 4 #define VMVFR0_SP_MASK (0x000000f0) /* VFP single prec support */ #define VMVFR0_RB_MASK (0x0000000f) /* VFP 64 bit media support */ /* VMVFR1 */ #define VMVFR1_SP_OFF 16 #define VMVFR1_SP_MASK (0x000f0000) /* Neon single prec support */ #define VMVFR1_I_OFF 12 #define VMVFR1_I_MASK (0x0000f000) /* Neon integer support */ #define VMVFR1_LS_OFF 8 #define VMVFR1_LS_MASK (0x00000f00) /* Neon ld/st instr support */ #define VMVFR1_DN_OFF 4 #define VMVFR1_DN_MASK (0x000000f0) /* Neon prop NaN support */ #define VMVFR1_FZ_MASK (0x0000000f) /* Neon denormal arith supp */ #define COPROC10 (0x3 << 20) #define COPROC11 (0x3 << 22) #ifndef LOCORE void vfp_init(void); void vfp_store(struct vfp_state *, boolean_t); void vfp_discard(struct thread *); #endif #endif