Index: sys/amd64/amd64/exception.S =================================================================== --- sys/amd64/amd64/exception.S +++ sys/amd64/amd64/exception.S @@ -171,21 +171,22 @@ alltraps: movq %rdi,TF_RDI(%rsp) testb $SEL_RPL_MASK,TF_CS(%rsp) /* Did we come from kernel? */ - jz alltraps_segs /* already running with kernel GS.base */ + jz 1f /* already running with kernel GS.base */ swapgs movq PCPU(CURPCB),%rdi andl $~PCB_FULL_IRET,PCB_FLAGS(%rdi) -alltraps_segs: - SAVE_SEGS - testl $PSL_I,TF_RFLAGS(%rsp) - jz alltraps_pushregs_no_rdi - sti -alltraps_pushregs_no_rdi: +1: SAVE_SEGS movq %rdx,TF_RDX(%rsp) movq %rax,TF_RAX(%rsp) + movq %rcx,TF_RCX(%rsp) + testb $SEL_RPL_MASK,TF_CS(%rsp) + jz 2f + call handle_ibrs_entry +2: testl $PSL_I,TF_RFLAGS(%rsp) + jz alltraps_pushregs_no_rax + sti alltraps_pushregs_no_rax: movq %rsi,TF_RSI(%rsp) - movq %rcx,TF_RCX(%rsp) movq %r8,TF_R8(%rsp) movq %r9,TF_R9(%rsp) movq %rbx,TF_RBX(%rsp) @@ -243,13 +244,18 @@ alltraps_noen: movq %rdi,TF_RDI(%rsp) testb $SEL_RPL_MASK,TF_CS(%rsp) /* Did we come from kernel? */ - jz alltraps_noen_segs /* already running with kernel GS.base */ + jz 1f /* already running with kernel GS.base */ swapgs movq PCPU(CURPCB),%rdi andl $~PCB_FULL_IRET,PCB_FLAGS(%rdi) -alltraps_noen_segs: - SAVE_SEGS - jmp alltraps_pushregs_no_rdi +1: SAVE_SEGS + movq %rdx,TF_RDX(%rsp) + movq %rax,TF_RAX(%rsp) + movq %rcx,TF_RCX(%rsp) + testb $SEL_RPL_MASK,TF_CS(%rsp) + jz alltraps_pushregs_no_rax + call handle_ibrs_entry + jmp alltraps_pushregs_no_rax IDTVEC(dblfault) subq $TF_ERR,%rsp @@ -301,12 +307,14 @@ movq %rdi,TF_RDI(%rsp) movq %rax,TF_RAX(%rsp) movq %rdx,TF_RDX(%rsp) + movq %rcx,TF_RCX(%rsp) jmp page_u IDTVEC(page) subq $TF_ERR,%rsp movq %rdi,TF_RDI(%rsp) /* free up GP registers */ movq %rax,TF_RAX(%rsp) movq %rdx,TF_RDX(%rsp) + movq %rcx,TF_RCX(%rsp) testb $SEL_RPL_MASK,TF_CS(%rsp) /* Did we come from kernel? */ jz page_cr2 /* already running with kernel GS.base */ swapgs @@ -314,6 +322,7 @@ andl $~PCB_FULL_IRET,PCB_FLAGS(%rdi) movq PCPU(SAVED_UCR3),%rax movq %rax,PCB_SAVED_UCR3(%rdi) + call handle_ibrs_entry page_cr2: movq %cr2,%rdi /* preserve %cr2 before .. */ movq %rdi,TF_ADDR(%rsp) /* enabling interrupts. */ @@ -371,6 +380,7 @@ movq %rdi,TF_RDI(%rsp) /* free up a GP register */ movq %rax,TF_RAX(%rsp) movq %rdx,TF_RDX(%rsp) + movq %rcx,TF_RCX(%rsp) movw %fs,TF_FS(%rsp) movw %gs,TF_GS(%rsp) leaq doreti_iret(%rip),%rdi @@ -396,7 +406,8 @@ 3: cmpw $KUG32SEL,TF_GS(%rsp) jne 4f movq %rdx,PCB_GSBASE(%rdi) -4: orl $PCB_FULL_IRET,PCB_FLAGS(%rdi) /* always full iret from GPF */ +4: call handle_ibrs_entry + orl $PCB_FULL_IRET,PCB_FLAGS(%rdi) /* always full iret from GPF */ movw %es,TF_ES(%rsp) movw %ds,TF_DS(%rsp) testl $PSL_I,TF_RFLAGS(%rsp) @@ -440,7 +451,9 @@ movq %r11,TF_RSP(%rsp) /* user stack pointer */ movq PCPU(SCRATCH_RAX),%rax movq %rax,TF_RAX(%rsp) /* syscall number */ + movq %rdx,TF_RDX(%rsp) /* arg 3 */ SAVE_SEGS + call handle_ibrs_entry movq PCPU(CURPCB),%r11 andl $~PCB_FULL_IRET,PCB_FLAGS(%r11) sti @@ -449,7 +462,6 @@ movq $2,TF_ERR(%rsp) movq %rdi,TF_RDI(%rsp) /* arg 1 */ movq %rsi,TF_RSI(%rsp) /* arg 2 */ - movq %rdx,TF_RDX(%rsp) /* arg 3 */ movq %r10,TF_RCX(%rsp) /* arg 4 */ movq %r8,TF_R8(%rsp) /* arg 5 */ movq %r9,TF_R9(%rsp) /* arg 6 */ @@ -475,6 +487,7 @@ movq PCPU(CURTHREAD),%rax testl $TDF_ASTPENDING | TDF_NEEDRESCHED,TD_FLAGS(%rax) jne 3f + call handle_ibrs_exit /* Restore preserved registers. */ MEXITCOUNT movq TF_RDI(%rsp),%rdi /* bonus; preserve arg 1 */ @@ -561,8 +574,8 @@ testb $SEL_RPL_MASK,TF_CS(%rsp) jnz nmi_fromuserspace /* - * We've interrupted the kernel. Preserve GS.base in %r12 - * and %cr3 in %r13. + * We've interrupted the kernel. Preserve GS.base in %r12, + * %cr3 in %r13, and possibly lower half of MSR_IA32_SPEC_CTL in %r14d. */ movl $MSR_GSBASE,%ecx rdmsr @@ -577,8 +590,14 @@ movq %cr3,%r13 movq PCPU(KCR3),%rax cmpq $~0,%rax - je nmi_calltrap + je 1f movq %rax,%cr3 +1: testl $CPUID_STDEXT3_IBPB,cpu_stdext_feature3(%rip) + je nmi_calltrap + movl $MSR_IA32_SPEC_CTRL,%ecx + rdmsr + movl %eax,%r14d + call handle_ibrs_entry jmp nmi_calltrap nmi_fromuserspace: incl %ebx @@ -588,7 +607,8 @@ cmpq $~0,%rax je 1f movq %rax,%cr3 -1: movq PCPU(CURPCB),%rdi +1: call handle_ibrs_entry + movq PCPU(CURPCB),%rdi testq %rdi,%rdi jz 3f orl $PCB_FULL_IRET,PCB_FLAGS(%rdi) @@ -682,10 +702,19 @@ #endif testl %ebx,%ebx /* %ebx == 0 => return to userland */ jnz doreti_exit + /* + * Restore speculation control MSR, if preserved. + */ + testl $CPUID_STDEXT3_IBPB,cpu_stdext_feature3(%rip) + je 1f + movl %r14d,%eax + xorl %edx,%edx + movl $MSR_IA32_SPEC_CTRL,%ecx + wrmsr /* * Put back the preserved MSR_GSBASE value. */ - movl $MSR_GSBASE,%ecx +1: movl $MSR_GSBASE,%ecx movq %r12,%rdx movl %edx,%eax shrq $32,%rdx @@ -743,8 +772,8 @@ testb $SEL_RPL_MASK,TF_CS(%rsp) jnz mchk_fromuserspace /* - * We've interrupted the kernel. Preserve GS.base in %r12 - * and %cr3 in %r13. + * We've interrupted the kernel. Preserve GS.base in %r12, + * %cr3 in %r13, and possibly lower half of MSR_IA32_SPEC_CTL in %r14d. */ movl $MSR_GSBASE,%ecx rdmsr @@ -759,8 +788,14 @@ movq %cr3,%r13 movq PCPU(KCR3),%rax cmpq $~0,%rax - je mchk_calltrap + je 1f movq %rax,%cr3 +1: testl $CPUID_STDEXT3_IBPB,cpu_stdext_feature3(%rip) + je mchk_calltrap + movl $MSR_IA32_SPEC_CTRL,%ecx + rdmsr + movl %eax,%r14d + call handle_ibrs_entry jmp mchk_calltrap mchk_fromuserspace: incl %ebx @@ -770,7 +805,7 @@ cmpq $~0,%rax je 1f movq %rax,%cr3 -1: +1: call handle_ibrs_entry /* Note: this label is also used by ddb and gdb: */ mchk_calltrap: FAKE_MCOUNT(TF_RIP(%rsp)) @@ -779,10 +814,19 @@ MEXITCOUNT testl %ebx,%ebx /* %ebx == 0 => return to userland */ jnz doreti_exit + /* + * Restore speculation control MSR, if preserved. + */ + testl $CPUID_STDEXT3_IBPB,cpu_stdext_feature3(%rip) + je 1f + movl %r14d,%eax + xorl %edx,%edx + movl $MSR_IA32_SPEC_CTRL,%ecx + wrmsr /* * Put back the preserved MSR_GSBASE value. */ - movl $MSR_GSBASE,%ecx +1: movl $MSR_GSBASE,%ecx movq %r12,%rdx movl %edx,%eax shrq $32,%rdx @@ -960,6 +1004,7 @@ testb $SEL_RPL_MASK,TF_CS(%rsp) /* Did we come from kernel? */ jz 2f /* keep running with kernel GS.base */ cli + call handle_ibrs_exit_rs cmpb $0,pti je 1f pushq %rdx @@ -1011,6 +1056,10 @@ .globl doreti_iret_fault doreti_iret_fault: subq $TF_RIP,%rsp /* space including tf_err, tf_trapno */ + movq %rax,TF_RAX(%rsp) + movq %rdx,TF_RDX(%rsp) + movq %rcx,TF_RCX(%rsp) + call handle_ibrs_entry testb $SEL_RPL_MASK,TF_CS(%rsp) jz 1f sti @@ -1019,11 +1068,8 @@ movl $TF_HASSEGS,TF_FLAGS(%rsp) movq %rdi,TF_RDI(%rsp) movq %rsi,TF_RSI(%rsp) - movq %rdx,TF_RDX(%rsp) - movq %rcx,TF_RCX(%rsp) movq %r8,TF_R8(%rsp) movq %r9,TF_R9(%rsp) - movq %rax,TF_RAX(%rsp) movq %rbx,TF_RBX(%rsp) movq %rbp,TF_RBP(%rsp) movq %r10,TF_R10(%rsp) Index: sys/amd64/amd64/genassym.c =================================================================== --- sys/amd64/amd64/genassym.c +++ sys/amd64/amd64/genassym.c @@ -228,6 +228,7 @@ ASSYM(PC_SAVED_UCR3, offsetof(struct pcpu, pc_saved_ucr3)); ASSYM(PC_PTI_STACK, offsetof(struct pcpu, pc_pti_stack)); ASSYM(PC_PTI_STACK_SZ, PC_PTI_STACK_SZ); +ASSYM(PC_IBPB_SET, offsetof(struct pcpu, pc_ibpb_set)); ASSYM(LA_EOI, LAPIC_EOI * LAPIC_MEM_MUL); ASSYM(LA_ISR, LAPIC_ISR0 * LAPIC_MEM_MUL); Index: sys/amd64/amd64/initcpu.c =================================================================== --- sys/amd64/amd64/initcpu.c +++ sys/amd64/amd64/initcpu.c @@ -223,6 +223,7 @@ wrmsr(MSR_EFER, msr); pg_nx = PG_NX; } + hw_ibrs_recalculate(); switch (cpu_vendor_id) { case CPU_VENDOR_AMD: init_amd(); Index: sys/amd64/amd64/machdep.c =================================================================== --- sys/amd64/amd64/machdep.c +++ sys/amd64/amd64/machdep.c @@ -1826,6 +1826,8 @@ #endif thread0.td_critnest = 0; + TUNABLE_INT_FETCH("hw.ibrs_disable", &hw_ibrs_disable); + TSEXIT(); /* Location of kernel stack for locore */ Index: sys/amd64/amd64/support.S =================================================================== --- sys/amd64/amd64/support.S +++ sys/amd64/amd64/support.S @@ -33,6 +33,7 @@ #include "opt_ddb.h" #include +#include #include #include "assym.s" @@ -850,3 +851,67 @@ movq %rsi,%cr3 /* back to kernel */ popfq retq + + .altmacro + .macro ibrs_seq_label l +handle_ibrs_\l: + .endm + .macro ibrs_call_label l + call handle_ibrs_\l + .endm + .macro ibrs_seq count + ll=1 + .rept \count + ibrs_call_label %(ll) + nop + ibrs_seq_label %(ll) + addq $8,%rsp + ll=ll+1 + .endr + .endm + +/* all callers already saved %rax, %rdx, and %rcx */ +ENTRY(handle_ibrs_entry) + cmpb $0,hw_ibrs_active(%rip) + je 1f + movl $MSR_IA32_SPEC_CTRL,%ecx + movl $IA32_SPEC_CTRL_IBRS,%eax + movl $IA32_SPEC_CTRL_IBRS>>32,%edx + wrmsr + movb $1,PCPU(IBPB_SET) + testl $CPUID_STDEXT_SMEP,cpu_stdext_feature(%rip) + jne 1f + ibrs_seq 32 +1: ret +END(handle_ibrs_entry) + +ENTRY(handle_ibrs_exit) + cmpb $0,PCPU(IBPB_SET) + je 1f + movl $MSR_IA32_SPEC_CTRL,%ecx + xorl %eax,%eax + xorl %edx,%edx + wrmsr + movb $0,PCPU(IBPB_SET) +1: ret +END(handle_ibrs_exit) + +/* registers-neutral version, but needs stack */ +ENTRY(handle_ibrs_exit_rs) + cmpb $0,PCPU(IBPB_SET) + je 1f + pushq %rax + pushq %rdx + pushq %rcx + movl $MSR_IA32_SPEC_CTRL,%ecx + xorl %eax,%eax + xorl %edx,%edx + wrmsr + popq %rcx + popq %rdx + popq %rax + movb $0,PCPU(IBPB_SET) +1: ret +END(handle_ibrs_exit_rs) + + .noaltmacro Index: sys/amd64/ia32/ia32_exception.S =================================================================== --- sys/amd64/ia32/ia32_exception.S +++ sys/amd64/ia32/ia32_exception.S @@ -53,13 +53,14 @@ movq PCPU(CURPCB),%rdi andl $~PCB_FULL_IRET,PCB_FLAGS(%rdi) SAVE_SEGS - sti - movq %rsi,TF_RSI(%rsp) + movq %rax,TF_RAX(%rsp) movq %rdx,TF_RDX(%rsp) movq %rcx,TF_RCX(%rsp) + call handle_ibrs_entry + sti + movq %rsi,TF_RSI(%rsp) movq %r8,TF_R8(%rsp) movq %r9,TF_R9(%rsp) - movq %rax,TF_RAX(%rsp) movq %rbx,TF_RBX(%rsp) movq %rbp,TF_RBP(%rsp) movq %r10,TF_R10(%rsp) Index: sys/amd64/include/md_var.h =================================================================== --- sys/amd64/include/md_var.h +++ sys/amd64/include/md_var.h @@ -38,6 +38,7 @@ extern uint64_t *vm_page_dump; extern int hw_lower_amd64_sharedpage; +extern int hw_ibrs_disable; /* * The file "conf/ldscript.amd64" defines the symbol "kernphys". Its Index: sys/amd64/include/pcpu.h =================================================================== --- sys/amd64/include/pcpu.h +++ sys/amd64/include/pcpu.h @@ -74,7 +74,8 @@ uint32_t pc_pcid_next; \ uint32_t pc_pcid_gen; \ uint32_t pc_smp_tlb_done; /* TLB op acknowledgement */ \ - char __pad[224] /* be divisor of PAGE_SIZE \ + uint32_t pc_ibpb_set; \ + char __pad[216] /* be divisor of PAGE_SIZE \ after cache alignment */ #define PC_DBREG_CMD_NONE 0 Index: sys/dev/cpuctl/cpuctl.c =================================================================== --- sys/dev/cpuctl/cpuctl.c +++ sys/dev/cpuctl/cpuctl.c @@ -527,6 +527,7 @@ set_cpu(cpu, td); identify_cpu1(); identify_cpu2(); + hw_ibrs_recalculate(); restore_cpu(oldcpu, is_bound, td); printcpuinfo(); return (0); Index: sys/i386/i386/support.s =================================================================== --- sys/i386/i386/support.s +++ sys/i386/i386/support.s @@ -827,3 +827,11 @@ movl $0,PCB_ONFAULT(%ecx) movl $EFAULT,%eax ret + +ENTRY(handle_ibrs_entry) + ret +END(handle_ibrs_entry) + +ENTRY(handle_ibrs_exit) + ret +END(handle_ibrs_exit) Index: sys/x86/include/specialreg.h =================================================================== --- sys/x86/include/specialreg.h +++ sys/x86/include/specialreg.h @@ -697,6 +697,10 @@ #define IA32_MISC_EN_xTPRD 0x0000000000800000ULL #define IA32_MISC_EN_XDD 0x0000000400000000ULL +/* + * IA32_SPEC_CTRL and IA32_PRED_CMD MSRs are described in the Intel' + * document 336996-001 Speculative Execution Side Channel Mitigations. + */ /* MSR IA32_SPEC_CTRL */ #define IA32_SPEC_CTRL_IBRS 0x0000000000000001ULL #define IA32_SPEC_CTRL_STIBP 0x0000000000000002ULL Index: sys/x86/include/x86_var.h =================================================================== --- sys/x86/include/x86_var.h +++ sys/x86/include/x86_var.h @@ -131,6 +131,9 @@ void fillw(int /*u_short*/ pat, void *base, size_t cnt); int is_physical_memory(vm_paddr_t addr); int isa_nmi(int cd); +void handle_ibrs_entry(void); +void handle_ibrs_exit(void); +void hw_ibrs_recalculate(void); void nmi_call_kdb(u_int cpu, u_int type, struct trapframe *frame); void nmi_call_kdb_smp(u_int type, struct trapframe *frame); void nmi_handle_intr(u_int type, struct trapframe *frame); Index: sys/x86/x86/cpu_machdep.c =================================================================== --- sys/x86/x86/cpu_machdep.c +++ sys/x86/x86/cpu_machdep.c @@ -142,6 +142,12 @@ int *state; /* + * A comment in Linux patch claims that 'CPUs run faster with + * speculation protection disabled. All CPU threads in a core + * must disable speculation protection for it to be + * disabled. Disable it while we are idle so the other + * hyperthread can run fast.' + * * XXXKIB. Software coordination mode should be supported, * but all Intel CPUs provide hardware coordination. */ @@ -150,9 +156,11 @@ KASSERT(*state == STATE_SLEEPING, ("cpu_mwait_cx: wrong monitorbuf state")); *state = STATE_MWAIT; + handle_ibrs_entry(); cpu_monitor(state, 0, 0); if (*state == STATE_MWAIT) cpu_mwait(MWAIT_INTRBREAK, mwait_hint); + handle_ibrs_exit(); /* * We should exit on any event that interrupts mwait, because @@ -569,3 +577,46 @@ nmi_call_kdb(PCPU_GET(cpuid), type, frame); #endif } + +int hw_ibrs_active; +SYSCTL_INT(_hw, OID_AUTO, ibrs_active, CTLFLAG_RD, &hw_ibrs_active, 0, + "IBRS active"); + +int hw_ibrs_disable; +void +hw_ibrs_recalculate(void) +{ + uint64_t v; + + if ((cpu_ia32_arch_caps & IA32_ARCH_CAP_IBRS_ALL) != 0) { + if (hw_ibrs_disable) { + v= rdmsr(MSR_IA32_SPEC_CTRL); + v &= ~IA32_SPEC_CTRL_IBRS; + wrmsr(MSR_IA32_SPEC_CTRL, v); + } else { + v= rdmsr(MSR_IA32_SPEC_CTRL); + v |= IA32_SPEC_CTRL_IBRS; + wrmsr(MSR_IA32_SPEC_CTRL, v); + } + return; + } + hw_ibrs_active = (cpu_stdext_feature3 & CPUID_STDEXT3_IBPB) != 0 && + !hw_ibrs_disable; +} + +static int +hw_ibrs_disable_handler(SYSCTL_HANDLER_ARGS) +{ + int error, val; + + val = hw_ibrs_disable; + error = sysctl_handle_int(oidp, &val, 0, req); + if (error != 0 || req->newptr == NULL) + return (error); + hw_ibrs_disable = val != 0; + hw_ibrs_recalculate(); + return (0); +} +SYSCTL_PROC(_hw, OID_AUTO, ibrs_disable, CTLTYPE_INT | CTLFLAG_RWTUN | + CTLFLAG_NOFETCH | CTLFLAG_MPSAFE, NULL, 0, hw_ibrs_disable_handler, "I", + "Disable IBRS"); Index: sys/x86/x86/identcpu.c =================================================================== --- sys/x86/x86/identcpu.c +++ sys/x86/x86/identcpu.c @@ -1614,7 +1614,8 @@ if (strcmp(cpu_vendor, AMD_VENDOR_ID) == 0) return (0); - + if ((cpu_ia32_arch_caps & IA32_ARCH_CAP_RDCL_NO) != 0) + return (0); return (1); }