Changeset View
Changeset View
Standalone View
Standalone View
head/sys/powerpc/booke/spe.c
Show All 34 Lines | |||||
__FBSDID("$FreeBSD$"); | __FBSDID("$FreeBSD$"); | ||||
#include <sys/param.h> | #include <sys/param.h> | ||||
#include <sys/proc.h> | #include <sys/proc.h> | ||||
#include <sys/systm.h> | #include <sys/systm.h> | ||||
#include <sys/limits.h> | #include <sys/limits.h> | ||||
#include <machine/altivec.h> | #include <machine/altivec.h> | ||||
#include <machine/fpu.h> | |||||
#include <machine/ieeefp.h> | |||||
#include <machine/pcb.h> | #include <machine/pcb.h> | ||||
#include <machine/psl.h> | #include <machine/psl.h> | ||||
#include <powerpc/fpu/fpu_arith.h> | |||||
#include <powerpc/fpu/fpu_emu.h> | |||||
#include <powerpc/fpu/fpu_extern.h> | |||||
void spe_handle_fpdata(struct trapframe *); | |||||
void spe_handle_fpround(struct trapframe *); | |||||
static int spe_emu_instr(uint32_t, struct fpemu *, struct fpn **, uint32_t *); | |||||
static void | static void | ||||
save_vec_int(struct thread *td) | save_vec_int(struct thread *td) | ||||
{ | { | ||||
int msr; | int msr; | ||||
struct pcb *pcb; | struct pcb *pcb; | ||||
pcb = td->td_pcb; | pcb = td->td_pcb; | ||||
Show All 16 Lines | #define EVSTDW(n) __asm ("evstdw %1,0(%0)" \ | ||||
EVSTDW(16); EVSTDW(17); EVSTDW(18); EVSTDW(19); | EVSTDW(16); EVSTDW(17); EVSTDW(18); EVSTDW(19); | ||||
EVSTDW(20); EVSTDW(21); EVSTDW(22); EVSTDW(23); | EVSTDW(20); EVSTDW(21); EVSTDW(22); EVSTDW(23); | ||||
EVSTDW(24); EVSTDW(25); EVSTDW(26); EVSTDW(27); | EVSTDW(24); EVSTDW(25); EVSTDW(26); EVSTDW(27); | ||||
EVSTDW(28); EVSTDW(29); EVSTDW(30); EVSTDW(31); | EVSTDW(28); EVSTDW(29); EVSTDW(30); EVSTDW(31); | ||||
#undef EVSTDW | #undef EVSTDW | ||||
__asm ( "evxor 0,0,0\n" | __asm ( "evxor 0,0,0\n" | ||||
"evaddumiaaw 0,0\n" | "evaddumiaaw 0,0\n" | ||||
"evstdd 0,0(%0)" :: "b"(&pcb->pcb_vec.vr[17][0])); | "evstdd 0,0(%0)" :: "b"(&pcb->pcb_vec.spare[0])); | ||||
pcb->pcb_vec.vscr = mfspr(SPR_SPEFSCR); | pcb->pcb_vec.vscr = mfspr(SPR_SPEFSCR); | ||||
/* | /* | ||||
* Disable vector unit again | * Disable vector unit again | ||||
*/ | */ | ||||
isync(); | isync(); | ||||
mtmsr(msr); | mtmsr(msr); | ||||
Show All 21 Lines | enable_vec(struct thread *td) | ||||
* exception. If this is the first time the unit has been used by | * exception. If this is the first time the unit has been used by | ||||
* the thread, initialise the vector registers and VSCR to 0, and | * the thread, initialise the vector registers and VSCR to 0, and | ||||
* set the flag to indicate that the vector unit is in use. | * set the flag to indicate that the vector unit is in use. | ||||
*/ | */ | ||||
tf->srr1 |= PSL_VEC; | tf->srr1 |= PSL_VEC; | ||||
if (!(pcb->pcb_flags & PCB_VEC)) { | if (!(pcb->pcb_flags & PCB_VEC)) { | ||||
memset(&pcb->pcb_vec, 0, sizeof pcb->pcb_vec); | memset(&pcb->pcb_vec, 0, sizeof pcb->pcb_vec); | ||||
pcb->pcb_flags |= PCB_VEC; | pcb->pcb_flags |= PCB_VEC; | ||||
pcb->pcb_vec.vscr = mfspr(SPR_SPEFSCR); | |||||
} | } | ||||
/* | /* | ||||
* Temporarily enable the vector unit so the registers | * Temporarily enable the vector unit so the registers | ||||
* can be restored. | * can be restored. | ||||
*/ | */ | ||||
msr = mfmsr(); | msr = mfmsr(); | ||||
mtmsr(msr | PSL_VEC); | mtmsr(msr | PSL_VEC); | ||||
isync(); | |||||
/* Restore SPEFSCR and ACC. Use %r0 as the scratch for ACC. */ | /* Restore SPEFSCR and ACC. Use %r0 as the scratch for ACC. */ | ||||
mtspr(SPR_SPEFSCR, pcb->pcb_vec.vscr); | mtspr(SPR_SPEFSCR, pcb->pcb_vec.vscr); | ||||
__asm __volatile("evldd 0, 0(%0); evmra 0,0\n" | __asm __volatile("evldd 0, 0(%0); evmra 0,0\n" | ||||
:: "b"(&pcb->pcb_vec.vr[17][0])); | :: "b"(&pcb->pcb_vec.spare[0])); | ||||
/* | /* | ||||
* The lower half of each register will be restored on trap return. Use | * The lower half of each register will be restored on trap return. Use | ||||
* %r0 as a scratch register, and restore it last. | * %r0 as a scratch register, and restore it last. | ||||
*/ | */ | ||||
#define EVLDW(n) __asm __volatile("evldw 0, 0(%0); evmergehilo "#n",0,"#n \ | #define EVLDW(n) __asm __volatile("evldw 0, 0(%0); evmergehilo "#n",0,"#n \ | ||||
:: "b"(&pcb->pcb_vec.vr[n])); | :: "b"(&pcb->pcb_vec.vr[n])); | ||||
EVLDW(1); EVLDW(2); EVLDW(3); EVLDW(4); | EVLDW(1); EVLDW(2); EVLDW(3); EVLDW(4); | ||||
Show All 37 Lines | save_vec_nodrop(struct thread *td) | ||||
struct thread *vtd; | struct thread *vtd; | ||||
vtd = PCPU_GET(vecthread); | vtd = PCPU_GET(vecthread); | ||||
if (td != vtd) { | if (td != vtd) { | ||||
return; | return; | ||||
} | } | ||||
save_vec_int(td); | save_vec_int(td); | ||||
} | |||||
#define SPE_INST_MASK 0x31f | |||||
#define EADD 0x200 | |||||
#define ESUB 0x201 | |||||
#define EABS 0x204 | |||||
#define ENABS 0x205 | |||||
#define ENEG 0x206 | |||||
#define EMUL 0x208 | |||||
#define EDIV 0x209 | |||||
#define ECMPGT 0x20c | |||||
#define ECMPLT 0x20d | |||||
#define ECMPEQ 0x20e | |||||
#define ECFUI 0x210 | |||||
#define ECFSI 0x211 | |||||
#define ECTUI 0x214 | |||||
#define ECTSI 0x215 | |||||
#define ECTUF 0x216 | |||||
#define ECTSF 0x217 | |||||
#define ECTUIZ 0x218 | |||||
#define ECTSIZ 0x21a | |||||
#define SPE 0x4 | |||||
#define SPFP 0x6 | |||||
#define DPFP 0x7 | |||||
#define SPE_OPC 4 | |||||
#define OPC_SHIFT 26 | |||||
#define EVFSADD 0x280 | |||||
#define EVFSSUB 0x281 | |||||
#define EVFSABS 0x284 | |||||
#define EVFSNABS 0x285 | |||||
#define EVFSNEG 0x286 | |||||
#define EVFSMUL 0x288 | |||||
#define EVFSDIV 0x289 | |||||
#define EVFSCMPGT 0x28c | |||||
#define EVFSCMPLT 0x28d | |||||
#define EVFSCMPEQ 0x28e | |||||
#define EVFSCFUI 0x290 | |||||
#define EVFSCFSI 0x291 | |||||
#define EVFSCTUI 0x294 | |||||
#define EVFSCTSI 0x295 | |||||
#define EVFSCTUF 0x296 | |||||
#define EVFSCTSF 0x297 | |||||
#define EVFSCTUIZ 0x298 | |||||
#define EVFSCTSIZ 0x29a | |||||
#define EFSADD 0x2c0 | |||||
#define EFSSUB 0x2c1 | |||||
#define EFSABS 0x2c4 | |||||
#define EFSNABS 0x2c5 | |||||
#define EFSNEG 0x2c6 | |||||
#define EFSMUL 0x2c8 | |||||
#define EFSDIV 0x2c9 | |||||
#define EFSCMPGT 0x2cc | |||||
#define EFSCMPLT 0x2cd | |||||
#define EFSCMPEQ 0x2ce | |||||
#define EFSCFD 0x2cf | |||||
#define EFSCFUI 0x2d0 | |||||
#define EFSCFSI 0x2d1 | |||||
#define EFSCTUI 0x2d4 | |||||
#define EFSCTSI 0x2d5 | |||||
#define EFSCTUF 0x2d6 | |||||
#define EFSCTSF 0x2d7 | |||||
#define EFSCTUIZ 0x2d8 | |||||
#define EFSCTSIZ 0x2da | |||||
#define EFDADD 0x2e0 | |||||
#define EFDSUB 0x2e1 | |||||
#define EFDABS 0x2e4 | |||||
#define EFDNABS 0x2e5 | |||||
#define EFDNEG 0x2e6 | |||||
#define EFDMUL 0x2e8 | |||||
#define EFDDIV 0x2e9 | |||||
#define EFDCMPGT 0x2ec | |||||
#define EFDCMPLT 0x2ed | |||||
#define EFDCMPEQ 0x2ee | |||||
#define EFDCFS 0x2ef | |||||
#define EFDCFUI 0x2f0 | |||||
#define EFDCFSI 0x2f1 | |||||
#define EFDCTUI 0x2f4 | |||||
#define EFDCTSI 0x2f5 | |||||
#define EFDCTUF 0x2f6 | |||||
#define EFDCTSF 0x2f7 | |||||
#define EFDCTUIZ 0x2f8 | |||||
#define EFDCTSIZ 0x2fa | |||||
enum { | |||||
NONE, | |||||
SINGLE, | |||||
DOUBLE, | |||||
VECTOR, | |||||
}; | |||||
static uint32_t fpscr_to_spefscr(uint32_t fpscr) | |||||
{ | |||||
uint32_t spefscr; | |||||
spefscr = 0; | |||||
if (fpscr & FPSCR_VX) | |||||
spefscr |= SPEFSCR_FINV; | |||||
if (fpscr & FPSCR_OX) | |||||
spefscr |= SPEFSCR_FOVF; | |||||
if (fpscr & FPSCR_UX) | |||||
spefscr |= SPEFSCR_FUNF; | |||||
if (fpscr & FPSCR_ZX) | |||||
spefscr |= SPEFSCR_FDBZ; | |||||
if (fpscr & FPSCR_XX) | |||||
spefscr |= SPEFSCR_FX; | |||||
return (spefscr); | |||||
} | |||||
/* Sign is 0 for unsigned, 1 for signed. */ | |||||
static int | |||||
spe_to_int(struct fpemu *fpemu, struct fpn *fpn, uint32_t *val, int sign) | |||||
{ | |||||
uint32_t res[2]; | |||||
res[0] = fpu_ftox(fpemu, fpn, res); | |||||
if (res[0] != UINT_MAX && res[0] != 0) | |||||
fpemu->fe_cx |= FPSCR_OX; | |||||
else if (sign == 0 && res[0] != 0) | |||||
fpemu->fe_cx |= FPSCR_UX; | |||||
else | |||||
*val = res[1]; | |||||
return (0); | |||||
} | |||||
/* Masked instruction */ | |||||
/* | |||||
* For compare instructions, returns 1 if success, 0 if not. For all others, | |||||
* returns -1, or -2 if no result needs recorded. | |||||
*/ | |||||
static int | |||||
spe_emu_instr(uint32_t instr, struct fpemu *fpemu, | |||||
struct fpn **result, uint32_t *iresult) | |||||
{ | |||||
switch (instr & SPE_INST_MASK) { | |||||
case EABS: | |||||
case ENABS: | |||||
case ENEG: | |||||
/* Taken care of elsewhere. */ | |||||
break; | |||||
case ECTUIZ: | |||||
fpemu->fe_cx &= ~FPSCR_RN; | |||||
fpemu->fe_cx |= FP_RZ; | |||||
case ECTUI: | |||||
spe_to_int(fpemu, &fpemu->fe_f2, iresult, 0); | |||||
return (-1); | |||||
case ECTSIZ: | |||||
fpemu->fe_cx &= ~FPSCR_RN; | |||||
fpemu->fe_cx |= FP_RZ; | |||||
case ECTSI: | |||||
spe_to_int(fpemu, &fpemu->fe_f2, iresult, 1); | |||||
return (-1); | |||||
case EADD: | |||||
*result = fpu_add(fpemu); | |||||
break; | |||||
case ESUB: | |||||
*result = fpu_sub(fpemu); | |||||
break; | |||||
case EMUL: | |||||
*result = fpu_mul(fpemu); | |||||
break; | |||||
case EDIV: | |||||
*result = fpu_div(fpemu); | |||||
break; | |||||
case ECMPGT: | |||||
fpu_compare(fpemu, 0); | |||||
if (fpemu->fe_cx & FPSCR_FG) | |||||
return (1); | |||||
return (0); | |||||
case ECMPLT: | |||||
fpu_compare(fpemu, 0); | |||||
if (fpemu->fe_cx & FPSCR_FL) | |||||
return (1); | |||||
return (0); | |||||
case ECMPEQ: | |||||
fpu_compare(fpemu, 0); | |||||
if (fpemu->fe_cx & FPSCR_FE) | |||||
return (1); | |||||
return (0); | |||||
default: | |||||
printf("Unknown instruction %x\n", instr); | |||||
} | |||||
return (-1); | |||||
} | |||||
static int | |||||
spe_explode(struct fpemu *fe, struct fpn *fp, uint32_t type, | |||||
uint32_t hi, uint32_t lo) | |||||
{ | |||||
uint32_t s; | |||||
fp->fp_sign = hi >> 31; | |||||
fp->fp_sticky = 0; | |||||
switch (type) { | |||||
case SINGLE: | |||||
s = fpu_stof(fp, hi); | |||||
break; | |||||
case DOUBLE: | |||||
s = fpu_dtof(fp, hi, lo); | |||||
break; | |||||
} | |||||
if (s == FPC_QNAN && (fp->fp_mant[0] & FP_QUIETBIT) == 0) { | |||||
/* | |||||
* Input is a signalling NaN. All operations that return | |||||
* an input NaN operand put it through a ``NaN conversion'', | |||||
* which basically just means ``turn on the quiet bit''. | |||||
* We do this here so that all NaNs internally look quiet | |||||
* (we can tell signalling ones by their class). | |||||
*/ | |||||
fp->fp_mant[0] |= FP_QUIETBIT; | |||||
fe->fe_cx = FPSCR_VXSNAN; /* assert invalid operand */ | |||||
s = FPC_SNAN; | |||||
} | |||||
fp->fp_class = s; | |||||
return (0); | |||||
} | |||||
void | |||||
spe_handle_fpdata(struct trapframe *frame) | |||||
{ | |||||
struct fpemu fpemu; | |||||
struct fpn *result; | |||||
uint32_t instr, instr_sec_op; | |||||
uint32_t cr_shift, ra, rb, rd, src; | |||||
uint32_t high, low, res; /* For vector operations. */ | |||||
uint32_t spefscr = 0; | |||||
uint32_t ftod_res[2]; | |||||
int width; /* Single, Double, Vector, Integer */ | |||||
int err; | |||||
err = fueword32((void *)frame->srr0, &instr); | |||||
if (err != 0) | |||||
return; | |||||
/* Fault. */; | |||||
if ((instr >> OPC_SHIFT) != SPE_OPC) | |||||
return; | |||||
/* | |||||
* 'cr' field is the upper 3 bits of rd. Magically, since a) rd is 5 | |||||
* bits, b) each 'cr' field is 4 bits, and c) Only the 'GT' bit is | |||||
* modified for most compare operations, the full value of rd can be | |||||
* used as a shift value. | |||||
*/ | |||||
rd = (instr >> 21) & 0x1f; | |||||
ra = (instr >> 16) & 0x1f; | |||||
rb = (instr >> 11) & 0x1f; | |||||
src = (instr >> 5) & 0x7; | |||||
cr_shift = 28 - (rd & 0x1f); | |||||
instr_sec_op = (instr & 0x7ff); | |||||
memset(&fpemu, 0, sizeof(fpemu)); | |||||
width = NONE; | |||||
switch (src) { | |||||
case SPE: | |||||
save_vec_nodrop(curthread); | |||||
switch (instr_sec_op) { | |||||
case EVFSABS: | |||||
curthread->td_pcb->pcb_vec.vr[rd][0] = | |||||
curthread->td_pcb->pcb_vec.vr[ra][0] & ~(1U << 31); | |||||
frame->fixreg[rd] = frame->fixreg[ra] & ~(1U << 31); | |||||
break; | |||||
case EVFSNABS: | |||||
curthread->td_pcb->pcb_vec.vr[rd][0] = | |||||
curthread->td_pcb->pcb_vec.vr[ra][0] | (1U << 31); | |||||
frame->fixreg[rd] = frame->fixreg[ra] | (1U << 31); | |||||
break; | |||||
case EVFSNEG: | |||||
curthread->td_pcb->pcb_vec.vr[rd][0] = | |||||
curthread->td_pcb->pcb_vec.vr[ra][0] ^ (1U << 31); | |||||
frame->fixreg[rd] = frame->fixreg[ra] ^ (1U << 31); | |||||
break; | |||||
default: | |||||
/* High word */ | |||||
spe_explode(&fpemu, &fpemu.fe_f1, SINGLE, | |||||
curthread->td_pcb->pcb_vec.vr[ra][0], 0); | |||||
spe_explode(&fpemu, &fpemu.fe_f2, SINGLE, | |||||
curthread->td_pcb->pcb_vec.vr[rb][0], 0); | |||||
high = spe_emu_instr(instr_sec_op, &fpemu, &result, | |||||
&curthread->td_pcb->pcb_vec.vr[rd][0]); | |||||
spefscr = fpscr_to_spefscr(fpemu.fe_cx) << 16; | |||||
/* Clear the fpemu to start over on the lower bits. */ | |||||
memset(&fpemu, 0, sizeof(fpemu)); | |||||
/* Now low word */ | |||||
spe_explode(&fpemu, &fpemu.fe_f1, SINGLE, | |||||
frame->fixreg[ra], 0); | |||||
spe_explode(&fpemu, &fpemu.fe_f2, SINGLE, | |||||
frame->fixreg[rb], 0); | |||||
spefscr |= fpscr_to_spefscr(fpemu.fe_cx); | |||||
low = spe_emu_instr(instr_sec_op, &fpemu, &result, | |||||
&frame->fixreg[rd]); | |||||
if (instr_sec_op == EVFSCMPEQ || | |||||
instr_sec_op == EVFSCMPGT || | |||||
instr_sec_op == EVFSCMPLT) { | |||||
res = (high << 3) | (low << 2) | | |||||
((high | low) << 1) | (high & low); | |||||
width = NONE; | |||||
} else | |||||
width = VECTOR; | |||||
break; | |||||
} | |||||
enable_vec(curthread); | |||||
goto end; | |||||
case SPFP: | |||||
switch (instr_sec_op) { | |||||
case EFSABS: | |||||
frame->fixreg[rd] = frame->fixreg[ra] & ~(1U << 31); | |||||
break; | |||||
case EFSNABS: | |||||
frame->fixreg[rd] = frame->fixreg[ra] | (1U << 31); | |||||
break; | |||||
case EFSNEG: | |||||
frame->fixreg[rd] = frame->fixreg[ra] ^ (1U << 31); | |||||
break; | |||||
case EFSCFD: | |||||
spe_explode(&fpemu, &fpemu.fe_f3, DOUBLE, | |||||
curthread->td_pcb->pcb_vec.vr[rb][0], | |||||
frame->fixreg[rb]); | |||||
result = &fpemu.fe_f3; | |||||
width = SINGLE; | |||||
break; | |||||
default: | |||||
spe_explode(&fpemu, &fpemu.fe_f1, SINGLE, | |||||
frame->fixreg[ra], 0); | |||||
spe_explode(&fpemu, &fpemu.fe_f2, SINGLE, | |||||
frame->fixreg[rb], 0); | |||||
width = SINGLE; | |||||
} | |||||
break; | |||||
case DPFP: | |||||
save_vec_nodrop(curthread); | |||||
switch (instr_sec_op) { | |||||
case EFDABS: | |||||
curthread->td_pcb->pcb_vec.vr[rd][0] = | |||||
curthread->td_pcb->pcb_vec.vr[ra][0] & ~(1U << 31); | |||||
break; | |||||
case EFDNABS: | |||||
curthread->td_pcb->pcb_vec.vr[rd][0] = | |||||
curthread->td_pcb->pcb_vec.vr[ra][0] | (1U << 31); | |||||
break; | |||||
case EFDNEG: | |||||
curthread->td_pcb->pcb_vec.vr[rd][0] = | |||||
curthread->td_pcb->pcb_vec.vr[ra][0] ^ (1U << 31); | |||||
break; | |||||
case EFDCFS: | |||||
spe_explode(&fpemu, &fpemu.fe_f3, SINGLE, | |||||
frame->fixreg[rb], 0); | |||||
result = &fpemu.fe_f3; | |||||
width = DOUBLE; | |||||
break; | |||||
default: | |||||
spe_explode(&fpemu, &fpemu.fe_f1, DOUBLE, | |||||
curthread->td_pcb->pcb_vec.vr[ra][0], | |||||
frame->fixreg[ra]); | |||||
spe_explode(&fpemu, &fpemu.fe_f2, DOUBLE, | |||||
curthread->td_pcb->pcb_vec.vr[rb][0], | |||||
frame->fixreg[rb]); | |||||
width = DOUBLE; | |||||
} | |||||
break; | |||||
} | |||||
switch (instr_sec_op) { | |||||
case EFDCFS: | |||||
case EFSCFD: | |||||
/* Already handled. */ | |||||
break; | |||||
default: | |||||
res = spe_emu_instr(instr_sec_op, &fpemu, &result, | |||||
&frame->fixreg[rd]); | |||||
if (res != -1) | |||||
res <<= 2; | |||||
break; | |||||
} | |||||
switch (instr_sec_op & SPE_INST_MASK) { | |||||
case ECMPEQ: | |||||
case ECMPGT: | |||||
case ECMPLT: | |||||
frame->cr &= ~(0xf << cr_shift); | |||||
frame->cr |= (res << cr_shift); | |||||
break; | |||||
case ECTUI: | |||||
case ECTUIZ: | |||||
case ECTSI: | |||||
case ECTSIZ: | |||||
break; | |||||
default: | |||||
switch (width) { | |||||
case NONE: | |||||
case VECTOR: | |||||
break; | |||||
case SINGLE: | |||||
frame->fixreg[rd] = fpu_ftos(&fpemu, result); | |||||
break; | |||||
case DOUBLE: | |||||
curthread->td_pcb->pcb_vec.vr[rd][0] = | |||||
fpu_ftod(&fpemu, result, ftod_res); | |||||
frame->fixreg[rd] = ftod_res[1]; | |||||
enable_vec(curthread); | |||||
break; | |||||
default: | |||||
panic("Unknown storage width %d", width); | |||||
break; | |||||
} | |||||
} | |||||
end: | |||||
spefscr |= (mfspr(SPR_SPEFSCR) & ~SPEFSCR_FINVS); | |||||
mtspr(SPR_SPEFSCR, spefscr); | |||||
frame->srr0 += 4; | |||||
return; | |||||
} | |||||
void | |||||
spe_handle_fpround(struct trapframe *frame) | |||||
{ | |||||
/* | |||||
* Punt fpround exceptions for now. This leaves the truncated result in | |||||
* the register. We'll deal with overflow/underflow later. | |||||
*/ | |||||
return; | |||||
} | } |