diff --git a/lib/libthread_db/arch/arm/libpthread_md.c b/lib/libthread_db/arch/arm/libpthread_md.c --- a/lib/libthread_db/arch/arm/libpthread_md.c +++ b/lib/libthread_db/arch/arm/libpthread_md.c @@ -87,22 +87,25 @@ } void -pt_fpreg_to_ucontext(const struct fpreg *r __unused, ucontext_t *uc) +pt_fpreg_to_ucontext(const struct fpreg *r, ucontext_t *uc) { - mcontext_t *mc = &uc->uc_mcontext; + mcontext_vfp_t *mc_vfp; - /* XXX */ - mc->mc_vfp_size = 0; - mc->mc_vfp_ptr = NULL; - memset(mc->mc_spare, 0, sizeof(mc->mc_spare)); + mc_vfp = uc->uc_mcontext.mc_vfp_ptr; + + if (mc_vfp != NULL) + memcpy(mc_vfp, r, sizeof(*r)); } void -pt_ucontext_to_fpreg(const ucontext_t *uc __unused, struct fpreg *r) +pt_ucontext_to_fpreg(const ucontext_t *uc, struct fpreg *r) { + mcontext_vfp_t *mc_vfp; - /* XXX */ - memset(r, 0, sizeof(*r)); + mc_vfp = uc->uc_mcontext.mc_vfp_ptr; + + if (mc_vfp != NULL) + memcpy(r, &mc_vfp, sizeof(*r)); } void diff --git a/sys/arm/arm/exec_machdep.c b/sys/arm/arm/exec_machdep.c --- a/sys/arm/arm/exec_machdep.c +++ b/sys/arm/arm/exec_machdep.c @@ -100,16 +100,18 @@ { struct pcb *pcb; + MPASS(td == curthread); + pcb = td->td_pcb; - if (td == curthread) { + if ((pcb->pcb_fpflags & PCB_FP_STARTED) != 0) { critical_enter(); vfp_store(&pcb->pcb_vfpstate, false); critical_exit(); - } else - MPASS(TD_IS_SUSPENDED(td)); - memset(vfp, 0, sizeof(*vfp)); + } + KASSERT(pcb->pcb_vfpsaved == &pcb->pcb_vfpstate, + ("Called get_vfpcontext while the kernel is using the VFP")); memcpy(vfp->mcv_reg, pcb->pcb_vfpstate.reg, - sizeof(vfp->mcv_reg)); + sizeof(vfp->mcv_reg)); vfp->mcv_fpscr = pcb->pcb_vfpstate.fpscr; } @@ -121,15 +123,18 @@ { struct pcb *pcb; + MPASS(td == curthread); + pcb = td->td_pcb; - if (td == curthread) { + if ((pcb->pcb_fpflags & PCB_FP_STARTED) != 0) { critical_enter(); vfp_discard(td); critical_exit(); - } else - MPASS(TD_IS_SUSPENDED(td)); + } + KASSERT(pcb->pcb_vfpsaved == &pcb->pcb_vfpstate, + ("Called set_vfpcontext while the kernel is using the VFP")); memcpy(pcb->pcb_vfpstate.reg, vfp->mcv_reg, - sizeof(pcb->pcb_vfpstate.reg)); + sizeof(pcb->pcb_vfpstate.reg)); pcb->pcb_vfpstate.fpscr = vfp->mcv_fpscr; } #endif @@ -166,6 +171,8 @@ { struct trapframe *tf = td->td_frame; __greg_t *gr = mcp->__gregs; + mcontext_vfp_t mcontext_vfp; + int rv; if (clear_ret & GET_MC_CLEAR_RET) { gr[_REG_R0] = 0; @@ -190,9 +197,19 @@ gr[_REG_LR] = tf->tf_usr_lr; gr[_REG_PC] = tf->tf_pc; - mcp->mc_vfp_size = 0; - mcp->mc_vfp_ptr = NULL; - memset(&mcp->mc_spare, 0, sizeof(mcp->mc_spare)); +#ifdef VFP + if (mcp->mc_vfp_size != sizeof(mcontext_vfp_t)) + return (EINVAL); + get_vfpcontext(td, &mcontext_vfp); +#else + bzero(&mcontext_vfp, sizeof(mcontext_vfp)); +#endif + + if (mcp->mc_vfp_ptr != NULL) { + rv = copyout(&mcontext_vfp, mcp->mc_vfp_ptr, sizeof(mcontext_vfp)); + if (rv != 0) + return (rv); + } return (0); } @@ -306,14 +323,6 @@ /* Populate the siginfo frame. */ bzero(&frame, sizeof(frame)); get_mcontext(td, &frame.sf_uc.uc_mcontext, 0); -#ifdef VFP - get_vfpcontext(td, &frame.sf_vfp); - frame.sf_uc.uc_mcontext.mc_vfp_size = sizeof(fp->sf_vfp); - frame.sf_uc.uc_mcontext.mc_vfp_ptr = &fp->sf_vfp; -#else - frame.sf_uc.uc_mcontext.mc_vfp_size = 0; - frame.sf_uc.uc_mcontext.mc_vfp_ptr = NULL; -#endif frame.sf_si = ksi->ksi_info; frame.sf_uc.uc_sigmask = *mask; frame.sf_uc.uc_stack = td->td_sigstk; diff --git a/sys/arm/arm/machdep.c b/sys/arm/arm/machdep.c --- a/sys/arm/arm/machdep.c +++ b/sys/arm/arm/machdep.c @@ -377,6 +377,7 @@ thread0.td_pcb = (struct pcb *)(thread0.td_kstack + thread0.td_kstack_pages * PAGE_SIZE) - 1; thread0.td_pcb->pcb_flags = 0; + thread0.td_pcb->pcb_fpflags = 0; thread0.td_pcb->pcb_vfpcpu = -1; thread0.td_pcb->pcb_vfpstate.fpscr = VFPSCR_DN; thread0.td_frame = &proc0_tf; diff --git a/sys/arm/arm/machdep_kdb.c b/sys/arm/arm/machdep_kdb.c --- a/sys/arm/arm/machdep_kdb.c +++ b/sys/arm/arm/machdep_kdb.c @@ -39,6 +39,7 @@ #include #include +#include #ifdef DDB #include @@ -105,7 +106,26 @@ int fill_fpregs(struct thread *td, struct fpreg *regs) { - bzero(regs, sizeof(*regs)); +#ifdef VFP + struct pcb *pcb; + + pcb = td->td_pcb; + if ((pcb->pcb_fpflags & PCB_FP_STARTED) != 0) { + /* + * If we have just been running VFP instructions we will + * need to save the state to memcpy it below. + */ + if (td == curthread) + vfp_save_state(td, pcb); + } + KASSERT(pcb->pcb_vfpsaved == &pcb->pcb_vfpstate, + ("Called fill_fpregs while the kernel is using the VFP")); + memcpy(regs->fpr_r, pcb->pcb_vfpstate.reg, + sizeof(regs->fpr_r)); + regs->fpr_fpscr = pcb->pcb_vfpstate.fpscr; +#else + memset(regs, 0, sizeof(*regs)); +#endif return (0); } @@ -126,6 +146,15 @@ int set_fpregs(struct thread *td, struct fpreg *regs) { +#ifdef VFP + struct pcb *pcb; + + pcb = td->td_pcb; + KASSERT(pcb->pcb_vfpsaved == &pcb->pcb_vfpstate, + ("Called set_fpregs while the kernel is using the VFP")); + memcpy(pcb->pcb_vfpstate.reg, regs->fpr_r, sizeof(regs->fpr_r)); + pcb->pcb_vfpstate.fpscr = regs->fpr_fpscr; +#endif return (0); } diff --git a/sys/arm/arm/swtch-v6.S b/sys/arm/arm/swtch-v6.S --- a/sys/arm/arm/swtch-v6.S +++ b/sys/arm/arm/swtch-v6.S @@ -323,11 +323,9 @@ #ifdef VFP ldr r3, [r10, #(TD_PCB)] - fmrx r0, fpexc /* If the VFP is enabled */ - tst r0, #(VFPEXC_EN) /* the current thread has */ - movne r1, #1 /* used it, so go save */ - addne r0, r3, #(PCB_VFPSTATE) /* the state into the PCB */ - blne _C_LABEL(vfp_store) /* and disable the VFP. */ + mov r1, r3 + mov r0, r10 + blne _C_LABEL(vfp_save_state) #endif /* diff --git a/sys/arm/arm/swtch.S b/sys/arm/arm/swtch.S --- a/sys/arm/arm/swtch.S +++ b/sys/arm/arm/swtch.S @@ -99,11 +99,9 @@ add r3, r0, #(PCB_R4) stmia r3, {r4-r12, sp, lr, pc} #ifdef VFP - fmrx r2, fpexc /* If the VFP is enabled */ - tst r2, #(VFPEXC_EN) /* the current thread has */ - movne r1, #1 /* used it, so go save */ - addne r0, r0, #(PCB_VFPSTATE) /* the state into the PCB */ - blne _C_LABEL(vfp_store) /* and disable the VFP. */ + mov r1, r0 + mov r0, #0 + blne _C_LABEL(vfp_save_state) #endif add sp, sp, #4; ldmfd sp!, {pc} diff --git a/sys/arm/arm/vfp.c b/sys/arm/arm/vfp.c --- a/sys/arm/arm/vfp.c +++ b/sys/arm/arm/vfp.c @@ -55,6 +55,14 @@ /* If true the VFP unit has 32 double registers, otherwise it has 16 */ static int is_d32; +struct fpu_kern_ctx { + struct vfp_state *prev; +#define FPU_KERN_CTX_DUMMY 0x01 /* avoided save for the kern thread */ +#define FPU_KERN_CTX_INUSE 0x02 + uint32_t flags; + struct vfp_state state; +}; + /* * About .fpu directives in this file... * @@ -100,6 +108,26 @@ isb(); } +static void +vfp_enable(void) +{ + uint32_t fpexc; + + fpexc = fmrx(fpexc); + fmxr(fpexc, fpexc | VFPEXC_EN); + isb(); +} + +static void +vfp_disable(void) +{ + uint32_t fpexc; + + fpexc = fmrx(fpexc); + fmxr(fpexc, fpexc & ~VFPEXC_EN); + isb(); +} + /* called for each cpu */ void vfp_init(void) @@ -223,7 +251,9 @@ curpcb = curthread->td_pcb; cpu = PCPU_GET(cpuid); if (curpcb->pcb_vfpcpu != cpu || curthread != PCPU_GET(fpcurthread)) { - vfp_restore(&curpcb->pcb_vfpstate); + if (curpcb->pcb_vfpsaved == NULL) + curpcb->pcb_vfpsaved = &curpcb->pcb_vfpstate; + vfp_restore(curpcb->pcb_vfpsaved); curpcb->pcb_vfpcpu = cpu; PCPU_SET(fpcurthread, curthread); } @@ -320,4 +350,154 @@ fmxr(fpexc, tmp & ~VFPEXC_EN); } +void +vfp_save_state(struct thread *td, struct pcb *pcb) +{ + int32_t fpexc; + + KASSERT(pcb != NULL, ("NULL vfp pcb")); + KASSERT(td == NULL || td->td_pcb == pcb, ("Invalid vfp pcb")); + + /* + * savectx() will be called on panic with dumppcb as an argument, + * dumppcb doesn't have pcb_vfpsaved set, so set it to save + * the VFP registers. + */ + if (pcb->pcb_vfpsaved == NULL) + pcb->pcb_vfpsaved = &pcb->pcb_vfpstate; + + if (td == NULL) + td = curthread; + + critical_enter(); + /* + * Only store the registers if the VFP is enabled, + * i.e. return if we are trapping on FP access. + */ + fpexc = fmrx(fpexc); + if (fpexc & VFPEXC_EN) { + KASSERT(PCPU_GET(fpcurthread) == td, + ("Storing an invalid VFP state")); + + vfp_store(pcb->pcb_vfpsaved, true); + } + critical_exit(); +} + +void +fpu_kern_enter(struct thread *td, struct fpu_kern_ctx *ctx, u_int flags) +{ + struct pcb *pcb; + + pcb = td->td_pcb; + KASSERT((flags & FPU_KERN_NOCTX) != 0 || ctx != NULL, + ("ctx is required when !FPU_KERN_NOCTX")); + KASSERT(ctx == NULL || (ctx->flags & FPU_KERN_CTX_INUSE) == 0, + ("using inuse ctx")); + KASSERT((pcb->pcb_fpflags & PCB_FP_NOSAVE) == 0, + ("recursive fpu_kern_enter while in PCB_FP_NOSAVE state")); + + if ((flags & FPU_KERN_NOCTX) != 0) { + critical_enter(); + if (curthread == PCPU_GET(fpcurthread)) { + vfp_save_state(curthread, pcb); + } + PCPU_SET(fpcurthread, NULL); + + vfp_enable(); + pcb->pcb_fpflags |= PCB_FP_KERN | PCB_FP_NOSAVE | + PCB_FP_STARTED; + return; + } + + if ((flags & FPU_KERN_KTHR) != 0 && is_fpu_kern_thread(0)) { + ctx->flags = FPU_KERN_CTX_DUMMY | FPU_KERN_CTX_INUSE; + return; + } + /* + * Check either we are already using the VFP in the kernel, or + * the the saved state points to the default user space. + */ + KASSERT((pcb->pcb_fpflags & PCB_FP_KERN) != 0 || + pcb->pcb_vfpsaved == &pcb->pcb_vfpstate, + ("Mangled pcb_vfpsaved %x %p %p", pcb->pcb_fpflags, pcb->pcb_vfpsaved, + &pcb->pcb_vfpstate)); + ctx->flags = FPU_KERN_CTX_INUSE; + vfp_save_state(curthread, pcb); + ctx->prev = pcb->pcb_vfpsaved; + pcb->pcb_vfpsaved = &ctx->state; + pcb->pcb_fpflags |= PCB_FP_KERN; + pcb->pcb_fpflags &= ~PCB_FP_STARTED; + + return; +} + +int +fpu_kern_leave(struct thread *td, struct fpu_kern_ctx *ctx) +{ + struct pcb *pcb; + + pcb = td->td_pcb; + + if ((pcb->pcb_fpflags & PCB_FP_NOSAVE) != 0) { + KASSERT(ctx == NULL, ("non-null ctx after FPU_KERN_NOCTX")); + KASSERT(PCPU_GET(fpcurthread) == NULL, + ("non-NULL fpcurthread for PCB_FP_NOSAVE")); + CRITICAL_ASSERT(td); + + vfp_disable(); + pcb->pcb_fpflags &= ~(PCB_FP_NOSAVE | PCB_FP_STARTED); + critical_exit(); + } else { + KASSERT((ctx->flags & FPU_KERN_CTX_INUSE) != 0, + ("FPU context not inuse")); + ctx->flags &= ~FPU_KERN_CTX_INUSE; + + if (is_fpu_kern_thread(0) && + (ctx->flags & FPU_KERN_CTX_DUMMY) != 0) + return (0); + KASSERT((ctx->flags & FPU_KERN_CTX_DUMMY) == 0, ("dummy ctx")); + critical_enter(); + vfp_discard(td); + critical_exit(); + pcb->pcb_fpflags &= ~PCB_FP_STARTED; + pcb->pcb_vfpsaved = ctx->prev; + } + + if (pcb->pcb_vfpsaved == &pcb->pcb_vfpstate) { + pcb->pcb_fpflags &= ~PCB_FP_KERN; + } else { + KASSERT((pcb->pcb_fpflags & PCB_FP_KERN) != 0, + ("unpaired fpu_kern_leave")); + } + + return (0); +} + +int +fpu_kern_thread(u_int flags __unused) +{ + struct pcb *pcb = curthread->td_pcb; + + KASSERT((curthread->td_pflags & TDP_KTHREAD) != 0, + ("Only kthread may use fpu_kern_thread")); + KASSERT(pcb->pcb_vfpsaved == &pcb->pcb_vfpstate, + ("Mangled pcb_vfpsaved")); + KASSERT((pcb->pcb_fpflags & PCB_FP_KERN) == 0, + ("Thread already setup for the VFP")); + pcb->pcb_fpflags |= PCB_FP_KERN; + return (0); +} + +int +is_fpu_kern_thread(u_int flags __unused) +{ + struct pcb *curpcb; + + if ((curthread->td_pflags & TDP_KTHREAD) == 0) + return (0); + curpcb = curthread->td_pcb; + return ((curpcb->pcb_fpflags & PCB_FP_KERN) != 0); +} + #endif diff --git a/sys/arm/arm/vm_machdep.c b/sys/arm/arm/vm_machdep.c --- a/sys/arm/arm/vm_machdep.c +++ b/sys/arm/arm/vm_machdep.c @@ -108,9 +108,8 @@ #ifdef VFP /* Store actual state of VFP */ if (curthread == td1) { - critical_enter(); - vfp_store(&td1->td_pcb->pcb_vfpstate, false); - critical_exit(); + if ((td1->td_pcb->pcb_fpflags & PCB_FP_STARTED) != 0) + vfp_save_state(td1, td1->td_pcb); } #endif td2->td_pcb = pcb2; @@ -139,6 +138,7 @@ pcb2->pcb_regs.sf_tpidrurw = (register_t)get_tls(); pcb2->pcb_vfpcpu = -1; + pcb2->pcb_vfpsaved = &pcb2->pcb_vfpstate; pcb2->pcb_vfpstate.fpscr = initial_fpscr; tf = td2->td_frame; diff --git a/sys/arm/include/fpu.h b/sys/arm/include/fpu.h new file mode 100644 --- /dev/null +++ b/sys/arm/include/fpu.h @@ -0,0 +1,7 @@ +/*- + * This file is in the public domain. + * + * $FreeBSD$ + */ +#include +#include diff --git a/sys/arm/include/pcb.h b/sys/arm/include/pcb.h --- a/sys/arm/include/pcb.h +++ b/sys/arm/include/pcb.h @@ -66,6 +66,11 @@ struct vfp_state pcb_vfpstate; /* VP/NEON state */ u_int pcb_vfpcpu; /* VP/NEON last cpu */ +#define PCB_FP_STARTED 0x01 +#define PCB_FP_KERN 0x02 +#define PCB_FP_NOSAVE 0x04 + struct vfp_state *pcb_vfpsaved; /* VP/NEON state */ + int pcb_fpflags; } __aligned(8); /* * We need the PCB to be aligned on 8 bytes, as we may * access it using ldrd/strd, and ARM ABI require it diff --git a/sys/arm/include/reg.h b/sys/arm/include/reg.h --- a/sys/arm/include/reg.h +++ b/sys/arm/include/reg.h @@ -13,17 +13,9 @@ unsigned int r_cpsr; }; -struct fp_extended_precision { - __uint32_t fp_exponent; - __uint32_t fp_mantissa_hi; - __uint32_t fp_mantissa_lo; -}; - -typedef struct fp_extended_precision fp_reg_t; - struct fpreg { - unsigned int fpr_fpsr; - fp_reg_t fpr[8]; + __uint64_t fpr_r[32]; + __uint32_t fpr_fpscr; }; struct dbreg { diff --git a/sys/arm/include/vfp.h b/sys/arm/include/vfp.h --- a/sys/arm/include/vfp.h +++ b/sys/arm/include/vfp.h @@ -139,6 +139,11 @@ #define COPROC10 (0x3 << 20) #define COPROC11 (0x3 << 22) +#define FPU_KERN_NORMAL 0x0000 +#define FPU_KERN_NOWAIT 0x0001 +#define FPU_KERN_KTHR 0x0002 +#define FPU_KERN_NOCTX 0x0004 + #ifndef LOCORE struct vfp_state { uint64_t reg[32]; @@ -154,6 +159,18 @@ void vfp_init(void); void vfp_store(struct vfp_state *, boolean_t); void vfp_discard(struct thread *); +void vfp_restore_state(void); +void vfp_save_state(struct thread *, struct pcb *); + +struct fpu_kern_ctx; + +struct fpu_kern_ctx *fpu_kern_alloc_ctx(u_int); +void fpu_kern_free_ctx(struct fpu_kern_ctx *); +void fpu_kern_enter(struct thread *, struct fpu_kern_ctx *, u_int); +int fpu_kern_leave(struct thread *, struct fpu_kern_ctx *); +int fpu_kern_thread(u_int); +int is_fpu_kern_thread(u_int); + #endif /* _KERNEL */ #endif /* LOCORE */