Index: head/sys/powerpc/booke/booke_machdep.c =================================================================== --- head/sys/powerpc/booke/booke_machdep.c +++ head/sys/powerpc/booke/booke_machdep.c @@ -189,6 +189,10 @@ extern void *int_debug_ed; extern void *int_vec; extern void *int_vecast; +#ifdef __SPE__ +extern void *int_spe_fpdata; +extern void *int_spe_fpround; +#endif #ifdef HWPMC_HOOKS extern void *int_performance_counter; #endif @@ -258,6 +262,10 @@ case FSL_E500v1: case FSL_E500v2: SET_TRAP(SPR_IVOR32, int_vec); +#ifdef __SPE__ + SET_TRAP(SPR_IVOR33, int_spe_fpdata); + SET_TRAP(SPR_IVOR34, int_spe_fpround); +#endif break; } Index: head/sys/powerpc/booke/spe.c =================================================================== --- head/sys/powerpc/booke/spe.c +++ head/sys/powerpc/booke/spe.c @@ -40,9 +40,19 @@ #include #include +#include +#include #include #include +#include +#include +#include + +void spe_handle_fpdata(struct trapframe *); +void spe_handle_fpround(struct trapframe *); +static int spe_emu_instr(uint32_t, struct fpemu *, struct fpn **, uint32_t *); + static void save_vec_int(struct thread *td) { @@ -75,7 +85,7 @@ __asm ( "evxor 0,0,0\n" "evaddumiaaw 0,0\n" - "evstdd 0,0(%0)" :: "b"(&pcb->pcb_vec.vr[17][0])); + "evstdd 0,0(%0)" :: "b"(&pcb->pcb_vec.spare[0])); pcb->pcb_vec.vscr = mfspr(SPR_SPEFSCR); /* @@ -113,6 +123,7 @@ if (!(pcb->pcb_flags & PCB_VEC)) { memset(&pcb->pcb_vec, 0, sizeof pcb->pcb_vec); pcb->pcb_flags |= PCB_VEC; + pcb->pcb_vec.vscr = mfspr(SPR_SPEFSCR); } /* @@ -121,12 +132,11 @@ */ msr = mfmsr(); mtmsr(msr | PSL_VEC); - isync(); /* Restore SPEFSCR and ACC. Use %r0 as the scratch for ACC. */ mtspr(SPR_SPEFSCR, pcb->pcb_vec.vscr); __asm __volatile("evldd 0, 0(%0); evmra 0,0\n" - :: "b"(&pcb->pcb_vec.vr[17][0])); + :: "b"(&pcb->pcb_vec.spare[0])); /* * The lower half of each register will be restored on trap return. Use @@ -180,4 +190,445 @@ } save_vec_int(td); +} + + +#define SPE_INST_MASK 0x31f +#define EADD 0x200 +#define ESUB 0x201 +#define EABS 0x204 +#define ENABS 0x205 +#define ENEG 0x206 +#define EMUL 0x208 +#define EDIV 0x209 +#define ECMPGT 0x20c +#define ECMPLT 0x20d +#define ECMPEQ 0x20e +#define ECFUI 0x210 +#define ECFSI 0x211 +#define ECTUI 0x214 +#define ECTSI 0x215 +#define ECTUF 0x216 +#define ECTSF 0x217 +#define ECTUIZ 0x218 +#define ECTSIZ 0x21a + +#define SPE 0x4 +#define SPFP 0x6 +#define DPFP 0x7 + +#define SPE_OPC 4 +#define OPC_SHIFT 26 + +#define EVFSADD 0x280 +#define EVFSSUB 0x281 +#define EVFSABS 0x284 +#define EVFSNABS 0x285 +#define EVFSNEG 0x286 +#define EVFSMUL 0x288 +#define EVFSDIV 0x289 +#define EVFSCMPGT 0x28c +#define EVFSCMPLT 0x28d +#define EVFSCMPEQ 0x28e +#define EVFSCFUI 0x290 +#define EVFSCFSI 0x291 +#define EVFSCTUI 0x294 +#define EVFSCTSI 0x295 +#define EVFSCTUF 0x296 +#define EVFSCTSF 0x297 +#define EVFSCTUIZ 0x298 +#define EVFSCTSIZ 0x29a + +#define EFSADD 0x2c0 +#define EFSSUB 0x2c1 +#define EFSABS 0x2c4 +#define EFSNABS 0x2c5 +#define EFSNEG 0x2c6 +#define EFSMUL 0x2c8 +#define EFSDIV 0x2c9 +#define EFSCMPGT 0x2cc +#define EFSCMPLT 0x2cd +#define EFSCMPEQ 0x2ce +#define EFSCFD 0x2cf +#define EFSCFUI 0x2d0 +#define EFSCFSI 0x2d1 +#define EFSCTUI 0x2d4 +#define EFSCTSI 0x2d5 +#define EFSCTUF 0x2d6 +#define EFSCTSF 0x2d7 +#define EFSCTUIZ 0x2d8 +#define EFSCTSIZ 0x2da + +#define EFDADD 0x2e0 +#define EFDSUB 0x2e1 +#define EFDABS 0x2e4 +#define EFDNABS 0x2e5 +#define EFDNEG 0x2e6 +#define EFDMUL 0x2e8 +#define EFDDIV 0x2e9 +#define EFDCMPGT 0x2ec +#define EFDCMPLT 0x2ed +#define EFDCMPEQ 0x2ee +#define EFDCFS 0x2ef +#define EFDCFUI 0x2f0 +#define EFDCFSI 0x2f1 +#define EFDCTUI 0x2f4 +#define EFDCTSI 0x2f5 +#define EFDCTUF 0x2f6 +#define EFDCTSF 0x2f7 +#define EFDCTUIZ 0x2f8 +#define EFDCTSIZ 0x2fa + +enum { + NONE, + SINGLE, + DOUBLE, + VECTOR, +}; + +static uint32_t fpscr_to_spefscr(uint32_t fpscr) +{ + uint32_t spefscr; + + spefscr = 0; + + if (fpscr & FPSCR_VX) + spefscr |= SPEFSCR_FINV; + if (fpscr & FPSCR_OX) + spefscr |= SPEFSCR_FOVF; + if (fpscr & FPSCR_UX) + spefscr |= SPEFSCR_FUNF; + if (fpscr & FPSCR_ZX) + spefscr |= SPEFSCR_FDBZ; + if (fpscr & FPSCR_XX) + spefscr |= SPEFSCR_FX; + + return (spefscr); +} + +/* Sign is 0 for unsigned, 1 for signed. */ +static int +spe_to_int(struct fpemu *fpemu, struct fpn *fpn, uint32_t *val, int sign) +{ + uint32_t res[2]; + + res[0] = fpu_ftox(fpemu, fpn, res); + if (res[0] != UINT_MAX && res[0] != 0) + fpemu->fe_cx |= FPSCR_OX; + else if (sign == 0 && res[0] != 0) + fpemu->fe_cx |= FPSCR_UX; + else + *val = res[1]; + + return (0); +} + +/* Masked instruction */ +/* + * For compare instructions, returns 1 if success, 0 if not. For all others, + * returns -1, or -2 if no result needs recorded. + */ +static int +spe_emu_instr(uint32_t instr, struct fpemu *fpemu, + struct fpn **result, uint32_t *iresult) +{ + switch (instr & SPE_INST_MASK) { + case EABS: + case ENABS: + case ENEG: + /* Taken care of elsewhere. */ + break; + case ECTUIZ: + fpemu->fe_cx &= ~FPSCR_RN; + fpemu->fe_cx |= FP_RZ; + case ECTUI: + spe_to_int(fpemu, &fpemu->fe_f2, iresult, 0); + return (-1); + case ECTSIZ: + fpemu->fe_cx &= ~FPSCR_RN; + fpemu->fe_cx |= FP_RZ; + case ECTSI: + spe_to_int(fpemu, &fpemu->fe_f2, iresult, 1); + return (-1); + case EADD: + *result = fpu_add(fpemu); + break; + case ESUB: + *result = fpu_sub(fpemu); + break; + case EMUL: + *result = fpu_mul(fpemu); + break; + case EDIV: + *result = fpu_div(fpemu); + break; + case ECMPGT: + fpu_compare(fpemu, 0); + if (fpemu->fe_cx & FPSCR_FG) + return (1); + return (0); + case ECMPLT: + fpu_compare(fpemu, 0); + if (fpemu->fe_cx & FPSCR_FL) + return (1); + return (0); + case ECMPEQ: + fpu_compare(fpemu, 0); + if (fpemu->fe_cx & FPSCR_FE) + return (1); + return (0); + default: + printf("Unknown instruction %x\n", instr); + } + + return (-1); +} + +static int +spe_explode(struct fpemu *fe, struct fpn *fp, uint32_t type, + uint32_t hi, uint32_t lo) +{ + uint32_t s; + + fp->fp_sign = hi >> 31; + fp->fp_sticky = 0; + switch (type) { + case SINGLE: + s = fpu_stof(fp, hi); + break; + + case DOUBLE: + s = fpu_dtof(fp, hi, lo); + break; + } + + if (s == FPC_QNAN && (fp->fp_mant[0] & FP_QUIETBIT) == 0) { + /* + * Input is a signalling NaN. All operations that return + * an input NaN operand put it through a ``NaN conversion'', + * which basically just means ``turn on the quiet bit''. + * We do this here so that all NaNs internally look quiet + * (we can tell signalling ones by their class). + */ + fp->fp_mant[0] |= FP_QUIETBIT; + fe->fe_cx = FPSCR_VXSNAN; /* assert invalid operand */ + s = FPC_SNAN; + } + fp->fp_class = s; + + return (0); +} + +void +spe_handle_fpdata(struct trapframe *frame) +{ + struct fpemu fpemu; + struct fpn *result; + uint32_t instr, instr_sec_op; + uint32_t cr_shift, ra, rb, rd, src; + uint32_t high, low, res; /* For vector operations. */ + uint32_t spefscr = 0; + uint32_t ftod_res[2]; + int width; /* Single, Double, Vector, Integer */ + int err; + + err = fueword32((void *)frame->srr0, &instr); + + if (err != 0) + return; + /* Fault. */; + + if ((instr >> OPC_SHIFT) != SPE_OPC) + return; + + /* + * 'cr' field is the upper 3 bits of rd. Magically, since a) rd is 5 + * bits, b) each 'cr' field is 4 bits, and c) Only the 'GT' bit is + * modified for most compare operations, the full value of rd can be + * used as a shift value. + */ + rd = (instr >> 21) & 0x1f; + ra = (instr >> 16) & 0x1f; + rb = (instr >> 11) & 0x1f; + src = (instr >> 5) & 0x7; + cr_shift = 28 - (rd & 0x1f); + + instr_sec_op = (instr & 0x7ff); + + memset(&fpemu, 0, sizeof(fpemu)); + + width = NONE; + switch (src) { + case SPE: + save_vec_nodrop(curthread); + switch (instr_sec_op) { + case EVFSABS: + curthread->td_pcb->pcb_vec.vr[rd][0] = + curthread->td_pcb->pcb_vec.vr[ra][0] & ~(1U << 31); + frame->fixreg[rd] = frame->fixreg[ra] & ~(1U << 31); + break; + case EVFSNABS: + curthread->td_pcb->pcb_vec.vr[rd][0] = + curthread->td_pcb->pcb_vec.vr[ra][0] | (1U << 31); + frame->fixreg[rd] = frame->fixreg[ra] | (1U << 31); + break; + case EVFSNEG: + curthread->td_pcb->pcb_vec.vr[rd][0] = + curthread->td_pcb->pcb_vec.vr[ra][0] ^ (1U << 31); + frame->fixreg[rd] = frame->fixreg[ra] ^ (1U << 31); + break; + default: + /* High word */ + spe_explode(&fpemu, &fpemu.fe_f1, SINGLE, + curthread->td_pcb->pcb_vec.vr[ra][0], 0); + spe_explode(&fpemu, &fpemu.fe_f2, SINGLE, + curthread->td_pcb->pcb_vec.vr[rb][0], 0); + high = spe_emu_instr(instr_sec_op, &fpemu, &result, + &curthread->td_pcb->pcb_vec.vr[rd][0]); + + spefscr = fpscr_to_spefscr(fpemu.fe_cx) << 16; + /* Clear the fpemu to start over on the lower bits. */ + memset(&fpemu, 0, sizeof(fpemu)); + + /* Now low word */ + spe_explode(&fpemu, &fpemu.fe_f1, SINGLE, + frame->fixreg[ra], 0); + spe_explode(&fpemu, &fpemu.fe_f2, SINGLE, + frame->fixreg[rb], 0); + spefscr |= fpscr_to_spefscr(fpemu.fe_cx); + low = spe_emu_instr(instr_sec_op, &fpemu, &result, + &frame->fixreg[rd]); + if (instr_sec_op == EVFSCMPEQ || + instr_sec_op == EVFSCMPGT || + instr_sec_op == EVFSCMPLT) { + res = (high << 3) | (low << 2) | + ((high | low) << 1) | (high & low); + width = NONE; + } else + width = VECTOR; + break; + } + enable_vec(curthread); + goto end; + + case SPFP: + switch (instr_sec_op) { + case EFSABS: + frame->fixreg[rd] = frame->fixreg[ra] & ~(1U << 31); + break; + case EFSNABS: + frame->fixreg[rd] = frame->fixreg[ra] | (1U << 31); + break; + case EFSNEG: + frame->fixreg[rd] = frame->fixreg[ra] ^ (1U << 31); + break; + case EFSCFD: + spe_explode(&fpemu, &fpemu.fe_f3, DOUBLE, + curthread->td_pcb->pcb_vec.vr[rb][0], + frame->fixreg[rb]); + result = &fpemu.fe_f3; + width = SINGLE; + break; + default: + spe_explode(&fpemu, &fpemu.fe_f1, SINGLE, + frame->fixreg[ra], 0); + spe_explode(&fpemu, &fpemu.fe_f2, SINGLE, + frame->fixreg[rb], 0); + width = SINGLE; + } + break; + case DPFP: + save_vec_nodrop(curthread); + switch (instr_sec_op) { + case EFDABS: + curthread->td_pcb->pcb_vec.vr[rd][0] = + curthread->td_pcb->pcb_vec.vr[ra][0] & ~(1U << 31); + break; + case EFDNABS: + curthread->td_pcb->pcb_vec.vr[rd][0] = + curthread->td_pcb->pcb_vec.vr[ra][0] | (1U << 31); + break; + case EFDNEG: + curthread->td_pcb->pcb_vec.vr[rd][0] = + curthread->td_pcb->pcb_vec.vr[ra][0] ^ (1U << 31); + break; + case EFDCFS: + spe_explode(&fpemu, &fpemu.fe_f3, SINGLE, + frame->fixreg[rb], 0); + result = &fpemu.fe_f3; + width = DOUBLE; + break; + default: + spe_explode(&fpemu, &fpemu.fe_f1, DOUBLE, + curthread->td_pcb->pcb_vec.vr[ra][0], + frame->fixreg[ra]); + spe_explode(&fpemu, &fpemu.fe_f2, DOUBLE, + curthread->td_pcb->pcb_vec.vr[rb][0], + frame->fixreg[rb]); + width = DOUBLE; + } + break; + } + switch (instr_sec_op) { + case EFDCFS: + case EFSCFD: + /* Already handled. */ + break; + default: + res = spe_emu_instr(instr_sec_op, &fpemu, &result, + &frame->fixreg[rd]); + if (res != -1) + res <<= 2; + break; + } + + switch (instr_sec_op & SPE_INST_MASK) { + case ECMPEQ: + case ECMPGT: + case ECMPLT: + frame->cr &= ~(0xf << cr_shift); + frame->cr |= (res << cr_shift); + break; + case ECTUI: + case ECTUIZ: + case ECTSI: + case ECTSIZ: + break; + default: + switch (width) { + case NONE: + case VECTOR: + break; + case SINGLE: + frame->fixreg[rd] = fpu_ftos(&fpemu, result); + break; + case DOUBLE: + curthread->td_pcb->pcb_vec.vr[rd][0] = + fpu_ftod(&fpemu, result, ftod_res); + frame->fixreg[rd] = ftod_res[1]; + enable_vec(curthread); + break; + default: + panic("Unknown storage width %d", width); + break; + } + } + +end: + spefscr |= (mfspr(SPR_SPEFSCR) & ~SPEFSCR_FINVS); + mtspr(SPR_SPEFSCR, spefscr); + frame->srr0 += 4; + + return; +} + +void +spe_handle_fpround(struct trapframe *frame) +{ + + /* + * Punt fpround exceptions for now. This leaves the truncated result in + * the register. We'll deal with overflow/underflow later. + */ + return; } Index: head/sys/powerpc/booke/trap_subr.S =================================================================== --- head/sys/powerpc/booke/trap_subr.S +++ head/sys/powerpc/booke/trap_subr.S @@ -638,6 +638,28 @@ b trap_common +#ifdef __SPE__ +/***************************************************************************** + * Floating point Assist interrupt + ****************************************************************************/ +INTERRUPT(int_spe_fpdata) + STANDARD_PROLOG(SPR_SPRG1, PC_TEMPSAVE, SPR_SRR0, SPR_SRR1) + FRAME_SETUP(SPR_SPRG1, PC_TEMPSAVE, EXC_SPFPD) + addi %r3, %r1, CALLSIZE + bl spe_handle_fpdata + FRAME_LEAVE(SPR_SRR0, SPR_SRR1) + rfi + +INTERRUPT(int_spe_fpround) + STANDARD_PROLOG(SPR_SPRG1, PC_TEMPSAVE, SPR_SRR0, SPR_SRR1) + FRAME_SETUP(SPR_SPRG1, PC_TEMPSAVE, EXC_SPFPR) + addi %r3, %r1, CALLSIZE + bl spe_handle_fpround + FRAME_LEAVE(SPR_SRR0, SPR_SRR1) + rfi +#endif + + #ifdef HWPMC_HOOKS /***************************************************************************** * PMC Interrupt Index: head/sys/powerpc/include/spr.h =================================================================== --- head/sys/powerpc/include/spr.h +++ head/sys/powerpc/include/spr.h @@ -245,6 +245,33 @@ #define SPR_PTCR 0x1d0 /* Partition Table Control Register */ #define SPR_SPEFSCR 0x200 /* ..8 Signal Processing Engine FSCR. */ +#define SPEFSCR_SOVH 0x80000000 +#define SPEFSCR_OVH 0x40000000 +#define SPEFSCR_FGH 0x20000000 +#define SPEFSCR_FXH 0x10000000 +#define SPEFSCR_FINVH 0x08000000 +#define SPEFSCR_FDBZH 0x04000000 +#define SPEFSCR_FUNFH 0x02000000 +#define SPEFSCR_FOVFH 0x01000000 +#define SPEFSCR_FINXS 0x00200000 +#define SPEFSCR_FINVS 0x00100000 +#define SPEFSCR_FDBZS 0x00080000 +#define SPEFSCR_FUNFS 0x00040000 +#define SPEFSCR_FOVFS 0x00020000 +#define SPEFSCR_SOV 0x00008000 +#define SPEFSCR_OV 0x00004000 +#define SPEFSCR_FG 0x00002000 +#define SPEFSCR_FX 0x00001000 +#define SPEFSCR_FINV 0x00000800 +#define SPEFSCR_FDBZ 0x00000400 +#define SPEFSCR_FUNF 0x00000200 +#define SPEFSCR_FOVF 0x00000100 +#define SPEFSCR_FINXE 0x00000040 +#define SPEFSCR_FINVE 0x00000020 +#define SPEFSCR_FDBZE 0x00000010 +#define SPEFSCR_FUNFE 0x00000008 +#define SPEFSCR_FOVFE 0x00000004 +#define SPEFSCR_FRMC_M 0x00000003 #define SPR_IBAT0U 0x210 /* .6. Instruction BAT Reg 0 Upper */ #define SPR_IBAT0L 0x211 /* .6. Instruction BAT Reg 0 Lower */ #define SPR_IBAT1U 0x212 /* .6. Instruction BAT Reg 1 Upper */ Index: head/sys/powerpc/include/trap.h =================================================================== --- head/sys/powerpc/include/trap.h +++ head/sys/powerpc/include/trap.h @@ -100,6 +100,8 @@ #define EXC_APU 0x1300 /* Auxiliary Processing Unit */ #define EXC_DEBUG 0x2f10 /* Debug trap */ #define EXC_VECAST_E 0x2f20 /* Altivec Assist (Book-E) */ +#define EXC_SPFPD 0x2f30 /* SPE Floating-point Data */ +#define EXC_SPFPR 0x2f40 /* SPE Floating-point Round */ #define EXC_LAST 0x2f00 /* Last possible exception vector */ Index: head/sys/powerpc/powerpc/exec_machdep.c =================================================================== --- head/sys/powerpc/powerpc/exec_machdep.c +++ head/sys/powerpc/powerpc/exec_machdep.c @@ -971,6 +971,10 @@ pcb2->pcb_context[0] = pcb2->pcb_lr; #endif pcb2->pcb_cpu.aim.usr_vsid = 0; +#ifdef __SPE__ + pcb2->pcb_vec.vscr = SPEFSCR_FINVE | SPEFSCR_FDBZE | + SPEFSCR_FUNFE | SPEFSCR_FOVFE; +#endif /* Setup to release spin count in fork_exit(). */ td->td_md.md_spinlock_count = 1; @@ -1016,6 +1020,10 @@ } td->td_pcb->pcb_flags = 0; +#ifdef __SPE__ + td->td_pcb->pcb_vec.vscr = SPEFSCR_FINVE | SPEFSCR_FDBZE | + SPEFSCR_FUNFE | SPEFSCR_FOVFE; +#endif td->td_retval[0] = (register_t)entry; td->td_retval[1] = 0; Index: head/sys/powerpc/powerpc/swtch32.S =================================================================== --- head/sys/powerpc/powerpc/swtch32.S +++ head/sys/powerpc/powerpc/swtch32.S @@ -212,4 +212,8 @@ trapframe to simulate FRAME_SETUP does when allocating space for a frame pointer/saved LR */ +#ifdef __SPE__ + li %r3,SPEFSCR_FINVE|SPEFSCR_FDBZE|SPEFSCR_FUNFE|SPEFSCR_FOVFE + mtspr SPR_SPEFSCR, %r3 +#endif b trapexit