Index: head/sys/amd64/amd64/cpu_switch.S =================================================================== --- head/sys/amd64/amd64/cpu_switch.S +++ head/sys/amd64/amd64/cpu_switch.S @@ -116,22 +116,25 @@ /* have we used fp, and need a save? */ cmpq %rdi,PCPU(FPCURTHREAD) - jne 2f - movq PCB_SAVEFPU(%r8),%r8 + jne ctx_switch_fpusave_done + movq PCB_SAVEFPU(%r8),%r9 clts cmpl $0,use_xsave(%rip) jne 1f - fxsave (%r8) - jmp 2f + fxsave (%r9) + jmp ctx_switch_fpusave_done 1: movq %rdx,%rcx movl xsave_mask,%eax movl xsave_mask+4,%edx + testl $PCB_32BIT,PCB_FLAGS(%r8) + jne ctx_switch_xsave32 .globl ctx_switch_xsave ctx_switch_xsave: /* This is patched to xsaveopt if supported, see fpuinit_bsp1() */ - xsave (%r8) + xsave64 (%r9) +ctx_switch_xsave_done: movq %rcx,%rdx -2: +ctx_switch_fpusave_done: /* Save is done. Now fire up new thread. Leave old vmspace. */ movq %rsi,%r12 movq %rdi,%r13 @@ -294,6 +297,11 @@ movq %rdx,8(%rax) movl $LDTSEL,%eax jmp ld_ldt + + .globl ctx_switch_xsave32 +ctx_switch_xsave32: + xsave (%r9) + jmp ctx_switch_xsave_done END(cpu_switch) /* Index: head/sys/amd64/amd64/fpu.c =================================================================== --- head/sys/amd64/amd64/fpu.c +++ head/sys/amd64/amd64/fpu.c @@ -47,6 +47,7 @@ #include #include #include +#include #include #include #include @@ -81,7 +82,7 @@ #define stmxcsr(addr) __asm __volatile("stmxcsr %0" : : "m" (*(addr))) static __inline void -xrstor(char *addr, uint64_t mask) +xrstor32(char *addr, uint64_t mask) { uint32_t low, hi; @@ -91,27 +92,59 @@ } static __inline void -xsave(char *addr, uint64_t mask) +xrstor64(char *addr, uint64_t mask) { uint32_t low, hi; low = mask; hi = mask >> 32; + __asm __volatile("xrstor64 %0" : : "m" (*addr), "a" (low), "d" (hi)); +} + +static __inline void +xsave32(char *addr, uint64_t mask) +{ + uint32_t low, hi; + + low = mask; + hi = mask >> 32; __asm __volatile("xsave %0" : "=m" (*addr) : "a" (low), "d" (hi) : "memory"); } static __inline void -xsaveopt(char *addr, uint64_t mask) +xsave64(char *addr, uint64_t mask) { uint32_t low, hi; low = mask; hi = mask >> 32; + __asm __volatile("xsave64 %0" : "=m" (*addr) : "a" (low), "d" (hi) : + "memory"); +} + +static __inline void +xsaveopt32(char *addr, uint64_t mask) +{ + uint32_t low, hi; + + low = mask; + hi = mask >> 32; __asm __volatile("xsaveopt %0" : "=m" (*addr) : "a" (low), "d" (hi) : "memory"); } +static __inline void +xsaveopt64(char *addr, uint64_t mask) +{ + uint32_t low, hi; + + low = mask; + hi = mask >> 32; + __asm __volatile("xsaveopt64 %0" : "=m" (*addr) : "a" (low), "d" (hi) : + "memory"); +} + #else /* !(__GNUCLIKE_ASM && !lint) */ void fldcw(u_short cw); @@ -123,9 +156,12 @@ void fxrstor(caddr_t addr); void ldmxcsr(u_int csr); void stmxcsr(u_int *csr); -void xrstor(char *addr, uint64_t mask); -void xsave(char *addr, uint64_t mask); -void xsaveopt(char *addr, uint64_t mask); +void xrstor32(char *addr, uint64_t mask); +void xrstor64(char *addr, uint64_t mask); +void xsave32(char *addr, uint64_t mask); +void xsave64(char *addr, uint64_t mask); +void xsaveopt32(char *addr, uint64_t mask); +void xsaveopt64(char *addr, uint64_t mask); #endif /* __GNUCLIKE_ASM && !lint */ @@ -166,24 +202,48 @@ } *xsave_area_desc; static void -fpusave_xsaveopt(void *addr) +fpusave_xsaveopt64(void *addr) { + xsaveopt64((char *)addr, xsave_mask); +} - xsaveopt((char *)addr, xsave_mask); +static void +fpusave_xsaveopt3264(void *addr) +{ + if (SV_CURPROC_FLAG(SV_ILP32)) + xsaveopt32((char *)addr, xsave_mask); + else + xsaveopt64((char *)addr, xsave_mask); } static void -fpusave_xsave(void *addr) +fpusave_xsave64(void *addr) { + xsave64((char *)addr, xsave_mask); +} - xsave((char *)addr, xsave_mask); +static void +fpusave_xsave3264(void *addr) +{ + if (SV_CURPROC_FLAG(SV_ILP32)) + xsave32((char *)addr, xsave_mask); + else + xsave64((char *)addr, xsave_mask); } static void -fpurestore_xrstor(void *addr) +fpurestore_xrstor64(void *addr) { + xrstor64((char *)addr, xsave_mask); +} - xrstor((char *)addr, xsave_mask); +static void +fpurestore_xrstor3264(void *addr) +{ + if (SV_CURPROC_FLAG(SV_ILP32)) + xrstor32((char *)addr, xsave_mask); + else + xrstor64((char *)addr, xsave_mask); } static void @@ -216,17 +276,24 @@ { init_xsave(); - if (use_xsave) - return ((cpu_stdext_feature & CPUID_EXTSTATE_XSAVEOPT) != 0 ? - fpusave_xsaveopt : fpusave_xsave); - return (fpusave_fxsave); + if (!use_xsave) + return (fpusave_fxsave); + if ((cpu_stdext_feature & CPUID_EXTSTATE_XSAVEOPT) != 0) { + return ((cpu_stdext_feature & CPUID_STDEXT_NFPUSG) != 0 ? + fpusave_xsaveopt64 : fpusave_xsaveopt3264); + } + return ((cpu_stdext_feature & CPUID_STDEXT_NFPUSG) != 0 ? + fpusave_xsave64 : fpusave_xsave3264); } DEFINE_IFUNC(, void, fpurestore, (void *)) { init_xsave(); - return (use_xsave ? fpurestore_xrstor : fpurestore_fxrstor); + if (!use_xsave) + return (fpurestore_fxrstor); + return ((cpu_stdext_feature & CPUID_STDEXT_NFPUSG) != 0 ? + fpurestore_xrstor64 : fpurestore_xrstor3264); } void @@ -293,6 +360,7 @@ * read-only before cpu_startup(). */ old_wp = disable_wp(); + ctx_switch_xsave32[3] |= 0x10; ctx_switch_xsave[3] |= 0x10; restore_wp(old_wp); } Index: head/sys/amd64/include/md_var.h =================================================================== --- head/sys/amd64/include/md_var.h +++ head/sys/amd64/include/md_var.h @@ -37,6 +37,7 @@ #include extern char ctx_switch_xsave[]; +extern char ctx_switch_xsave32[]; extern int hw_lower_amd64_sharedpage; extern int hw_ibrs_disable; extern int hw_ssb_disable;