Index: sys/amd64/amd64/exception.S =================================================================== --- sys/amd64/amd64/exception.S +++ sys/amd64/amd64/exception.S @@ -171,21 +171,22 @@ alltraps: movq %rdi,TF_RDI(%rsp) testb $SEL_RPL_MASK,TF_CS(%rsp) /* Did we come from kernel? */ - jz alltraps_segs /* already running with kernel GS.base */ + jz 1f /* already running with kernel GS.base */ swapgs movq PCPU(CURPCB),%rdi andl $~PCB_FULL_IRET,PCB_FLAGS(%rdi) -alltraps_segs: - SAVE_SEGS - testl $PSL_I,TF_RFLAGS(%rsp) - jz alltraps_pushregs_no_rdi - sti -alltraps_pushregs_no_rdi: +1: SAVE_SEGS movq %rdx,TF_RDX(%rsp) movq %rax,TF_RAX(%rsp) + movq %rcx,TF_RCX(%rsp) + testb $SEL_RPL_MASK,TF_CS(%rsp) + jz 2f + call handle_ibrs_entry +2: testl $PSL_I,TF_RFLAGS(%rsp) + jz alltraps_pushregs_no_rax + sti alltraps_pushregs_no_rax: movq %rsi,TF_RSI(%rsp) - movq %rcx,TF_RCX(%rsp) movq %r8,TF_R8(%rsp) movq %r9,TF_R9(%rsp) movq %rbx,TF_RBX(%rsp) @@ -243,13 +244,18 @@ alltraps_noen: movq %rdi,TF_RDI(%rsp) testb $SEL_RPL_MASK,TF_CS(%rsp) /* Did we come from kernel? */ - jz alltraps_noen_segs /* already running with kernel GS.base */ + jz 1f /* already running with kernel GS.base */ swapgs movq PCPU(CURPCB),%rdi andl $~PCB_FULL_IRET,PCB_FLAGS(%rdi) -alltraps_noen_segs: - SAVE_SEGS - jmp alltraps_pushregs_no_rdi +1: SAVE_SEGS + movq %rdx,TF_RDX(%rsp) + movq %rax,TF_RAX(%rsp) + movq %rcx,TF_RCX(%rsp) + testb $SEL_RPL_MASK,TF_CS(%rsp) + jz alltraps_pushregs_no_rax + call handle_ibrs_entry + jmp alltraps_pushregs_no_rax IDTVEC(dblfault) subq $TF_ERR,%rsp @@ -301,12 +307,14 @@ movq %rdi,TF_RDI(%rsp) movq %rax,TF_RAX(%rsp) movq %rdx,TF_RDX(%rsp) + movq %rcx,TF_RCX(%rsp) jmp page_u IDTVEC(page) subq $TF_ERR,%rsp movq %rdi,TF_RDI(%rsp) /* free up GP registers */ movq %rax,TF_RAX(%rsp) movq %rdx,TF_RDX(%rsp) + movq %rcx,TF_RCX(%rsp) testb $SEL_RPL_MASK,TF_CS(%rsp) /* Did we come from kernel? */ jz page_cr2 /* already running with kernel GS.base */ swapgs @@ -314,6 +322,7 @@ andl $~PCB_FULL_IRET,PCB_FLAGS(%rdi) movq PCPU(SAVED_UCR3),%rax movq %rax,PCB_SAVED_UCR3(%rdi) + call handle_ibrs_entry page_cr2: movq %cr2,%rdi /* preserve %cr2 before .. */ movq %rdi,TF_ADDR(%rsp) /* enabling interrupts. */ @@ -371,6 +380,7 @@ movq %rdi,TF_RDI(%rsp) /* free up a GP register */ movq %rax,TF_RAX(%rsp) movq %rdx,TF_RDX(%rsp) + movq %rcx,TF_RCX(%rsp) movw %fs,TF_FS(%rsp) movw %gs,TF_GS(%rsp) leaq doreti_iret(%rip),%rdi @@ -396,7 +406,8 @@ 3: cmpw $KUG32SEL,TF_GS(%rsp) jne 4f movq %rdx,PCB_GSBASE(%rdi) -4: orl $PCB_FULL_IRET,PCB_FLAGS(%rdi) /* always full iret from GPF */ +4: call handle_ibrs_entry + orl $PCB_FULL_IRET,PCB_FLAGS(%rdi) /* always full iret from GPF */ movw %es,TF_ES(%rsp) movw %ds,TF_DS(%rsp) testl $PSL_I,TF_RFLAGS(%rsp) @@ -440,7 +451,9 @@ movq %r11,TF_RSP(%rsp) /* user stack pointer */ movq PCPU(SCRATCH_RAX),%rax movq %rax,TF_RAX(%rsp) /* syscall number */ + movq %rdx,TF_RDX(%rsp) /* arg 3 */ SAVE_SEGS + call handle_ibrs_entry movq PCPU(CURPCB),%r11 andl $~PCB_FULL_IRET,PCB_FLAGS(%r11) sti @@ -449,7 +462,6 @@ movq $2,TF_ERR(%rsp) movq %rdi,TF_RDI(%rsp) /* arg 1 */ movq %rsi,TF_RSI(%rsp) /* arg 2 */ - movq %rdx,TF_RDX(%rsp) /* arg 3 */ movq %r10,TF_RCX(%rsp) /* arg 4 */ movq %r8,TF_R8(%rsp) /* arg 5 */ movq %r9,TF_R9(%rsp) /* arg 6 */ @@ -475,6 +487,7 @@ movq PCPU(CURTHREAD),%rax testl $TDF_ASTPENDING | TDF_NEEDRESCHED,TD_FLAGS(%rax) jne 3f + call handle_ibrs_exit /* Restore preserved registers. */ MEXITCOUNT movq TF_RDI(%rsp),%rdi /* bonus; preserve arg 1 */ @@ -561,8 +574,8 @@ testb $SEL_RPL_MASK,TF_CS(%rsp) jnz nmi_fromuserspace /* - * We've interrupted the kernel. Preserve GS.base in %r12 - * and %cr3 in %r13. + * We've interrupted the kernel. Preserve GS.base in %r12, + * %cr3 in %r13, and possibly lower half of MSR_IA32_SPEC_CTL in %r14d. */ movl $MSR_GSBASE,%ecx rdmsr @@ -577,8 +590,14 @@ movq %cr3,%r13 movq PCPU(KCR3),%rax cmpq $~0,%rax - je nmi_calltrap + je 1f movq %rax,%cr3 +1: testl $CPUID_STDEXT3_IBPB,cpu_stdext_feature3(%rip) + je nmi_calltrap + movl $MSR_IA32_SPEC_CTRL,%ecx + rdmsr + movl %eax,%r14d + call handle_ibrs_entry jmp nmi_calltrap nmi_fromuserspace: incl %ebx @@ -588,7 +607,8 @@ cmpq $~0,%rax je 1f movq %rax,%cr3 -1: movq PCPU(CURPCB),%rdi +1: call handle_ibrs_entry + movq PCPU(CURPCB),%rdi testq %rdi,%rdi jz 3f orl $PCB_FULL_IRET,PCB_FLAGS(%rdi) @@ -682,10 +702,19 @@ #endif testl %ebx,%ebx /* %ebx == 0 => return to userland */ jnz doreti_exit + /* + * Restore speculation control MSR, if preserved. + */ + testl $CPUID_STDEXT3_IBPB,cpu_stdext_feature3(%rip) + je 1f + movl %r14d,%eax + xorl %edx,%edx + movl $MSR_IA32_SPEC_CTRL,%ecx + wrmsr /* * Put back the preserved MSR_GSBASE value. */ - movl $MSR_GSBASE,%ecx +1: movl $MSR_GSBASE,%ecx movq %r12,%rdx movl %edx,%eax shrq $32,%rdx @@ -743,8 +772,8 @@ testb $SEL_RPL_MASK,TF_CS(%rsp) jnz mchk_fromuserspace /* - * We've interrupted the kernel. Preserve GS.base in %r12 - * and %cr3 in %r13. + * We've interrupted the kernel. Preserve GS.base in %r12, + * %cr3 in %r13, and possibly lower half of MSR_IA32_SPEC_CTL in %r14d. */ movl $MSR_GSBASE,%ecx rdmsr @@ -759,8 +788,14 @@ movq %cr3,%r13 movq PCPU(KCR3),%rax cmpq $~0,%rax - je mchk_calltrap + je 1f movq %rax,%cr3 +1: testl $CPUID_STDEXT3_IBPB,cpu_stdext_feature3(%rip) + je mchk_calltrap + movl $MSR_IA32_SPEC_CTRL,%ecx + rdmsr + movl %eax,%r14d + call handle_ibrs_entry jmp mchk_calltrap mchk_fromuserspace: incl %ebx @@ -770,7 +805,7 @@ cmpq $~0,%rax je 1f movq %rax,%cr3 -1: +1: call handle_ibrs_entry /* Note: this label is also used by ddb and gdb: */ mchk_calltrap: FAKE_MCOUNT(TF_RIP(%rsp)) @@ -779,10 +814,19 @@ MEXITCOUNT testl %ebx,%ebx /* %ebx == 0 => return to userland */ jnz doreti_exit + /* + * Restore speculation control MSR, if preserved. + */ + testl $CPUID_STDEXT3_IBPB,cpu_stdext_feature3(%rip) + je 1f + movl %r14d,%eax + xorl %edx,%edx + movl $MSR_IA32_SPEC_CTRL,%ecx + wrmsr /* * Put back the preserved MSR_GSBASE value. */ - movl $MSR_GSBASE,%ecx +1: movl $MSR_GSBASE,%ecx movq %r12,%rdx movl %edx,%eax shrq $32,%rdx @@ -960,6 +1004,7 @@ testb $SEL_RPL_MASK,TF_CS(%rsp) /* Did we come from kernel? */ jz 2f /* keep running with kernel GS.base */ cli + call handle_ibrs_exit_rs cmpb $0,pti je 1f pushq %rdx @@ -1011,6 +1056,10 @@ .globl doreti_iret_fault doreti_iret_fault: subq $TF_RIP,%rsp /* space including tf_err, tf_trapno */ + movq %rax,TF_RAX(%rsp) + movq %rdx,TF_RDX(%rsp) + movq %rcx,TF_RCX(%rsp) + call handle_ibrs_entry testb $SEL_RPL_MASK,TF_CS(%rsp) jz 1f sti @@ -1019,11 +1068,8 @@ movl $TF_HASSEGS,TF_FLAGS(%rsp) movq %rdi,TF_RDI(%rsp) movq %rsi,TF_RSI(%rsp) - movq %rdx,TF_RDX(%rsp) - movq %rcx,TF_RCX(%rsp) movq %r8,TF_R8(%rsp) movq %r9,TF_R9(%rsp) - movq %rax,TF_RAX(%rsp) movq %rbx,TF_RBX(%rsp) movq %rbp,TF_RBP(%rsp) movq %r10,TF_R10(%rsp) Index: sys/amd64/amd64/initcpu.c =================================================================== --- sys/amd64/amd64/initcpu.c +++ sys/amd64/amd64/initcpu.c @@ -62,6 +62,11 @@ */ static int hw_clflush_disable = -1; +int hw_ibrs_disable; +SYSCTL_INT(_hw, OID_AUTO, ibrs_disable, CTLFLAG_RWTUN | CTLFLAG_NOFETCH, + &hw_ibrs_disable, 0, + "IBRS"); + static void init_amd(void) { @@ -223,6 +228,8 @@ wrmsr(MSR_EFER, msr); pg_nx = PG_NX; } + if ((cpu_ia32_arch_caps & IA32_ARCH_CAP_IBRS_ALL) != 0) + wrmsr(MSR_IA32_SPEC_CTRL, IA32_SPEC_CTRL_IBRS); switch (cpu_vendor_id) { case CPU_VENDOR_AMD: init_amd(); Index: sys/amd64/amd64/machdep.c =================================================================== --- sys/amd64/amd64/machdep.c +++ sys/amd64/amd64/machdep.c @@ -1826,6 +1826,8 @@ #endif thread0.td_critnest = 0; + TUNABLE_INT_FETCH("hw.ibrs_disable", &hw_ibrs_disable); + TSEXIT(); /* Location of kernel stack for locore */ Index: sys/amd64/amd64/support.S =================================================================== --- sys/amd64/amd64/support.S +++ sys/amd64/amd64/support.S @@ -33,6 +33,7 @@ #include "opt_ddb.h" #include +#include #include #include "assym.s" @@ -850,3 +851,75 @@ movq %rsi,%cr3 /* back to kernel */ popfq retq + + .altmacro + .macro ibrs_seq_label l +handle_ibrs_\l: + .endm + .macro ibrs_call_label l + call handle_ibrs_\l + .endm + .macro ibrs_seq count + ll=1 + .rept \count + ibrs_call_label %(ll) + nop + ibrs_seq_label %(ll) + addq $8,%rsp + ll=ll+1 + .endr + .endm + +/* all callers already saved %rax, %rdx, and %rcx */ +ENTRY(handle_ibrs_entry) + testl $IA32_ARCH_CAP_IBRS_ALL,cpu_ia32_arch_caps(%rip) + jne 2f + cmpb $0,hw_ibrs_disable(%rip) + jne 2f + testl $CPUID_STDEXT3_IBPB,cpu_stdext_feature3(%rip) + je 1f + movl $MSR_IA32_SPEC_CTRL,%ecx + movl $IA32_SPEC_CTRL_IBRS,%eax + movl $IA32_SPEC_CTRL_IBRS>>32,%edx + wrmsr +1: testl $CPUID_STDEXT_SMEP,cpu_stdext_feature(%rip) + jne 2f + ibrs_seq 32 +2: ret +END(handle_ibrs_entry) + +ENTRY(handle_ibrs_exit) + testl $IA32_ARCH_CAP_IBRS_ALL,cpu_ia32_arch_caps(%rip) + jne 1f + cmpb $0,hw_ibrs_disable(%rip) + jne 1f + testl $CPUID_STDEXT3_IBPB,cpu_stdext_feature3(%rip) + je 1f + movl $MSR_IA32_SPEC_CTRL,%ecx + xorl %eax,%eax + xorl %edx,%edx + wrmsr +1: ret +END(handle_ibrs_exit) + +ENTRY(handle_ibrs_exit_rs) + testl $IA32_ARCH_CAP_IBRS_ALL,cpu_ia32_arch_caps(%rip) + jne 1f + cmpb $0,hw_ibrs_disable(%rip) + jne 1f + testl $CPUID_STDEXT3_IBPB,cpu_stdext_feature3(%rip) + je 1f + pushq %rax + pushq %rdx + pushq %rcx + movl $MSR_IA32_SPEC_CTRL,%ecx + xorl %eax,%eax + xorl %edx,%edx + wrmsr + popq %rcx + popq %rdx + popq %rax +1: ret +END(handle_ibrs_exit_rs) + + .noaltmacro Index: sys/amd64/ia32/ia32_exception.S =================================================================== --- sys/amd64/ia32/ia32_exception.S +++ sys/amd64/ia32/ia32_exception.S @@ -53,13 +53,14 @@ movq PCPU(CURPCB),%rdi andl $~PCB_FULL_IRET,PCB_FLAGS(%rdi) SAVE_SEGS - sti - movq %rsi,TF_RSI(%rsp) + movq %rax,TF_RAX(%rsp) movq %rdx,TF_RDX(%rsp) movq %rcx,TF_RCX(%rsp) + call handle_ibrs_entry + sti + movq %rsi,TF_RSI(%rsp) movq %r8,TF_R8(%rsp) movq %r9,TF_R9(%rsp) - movq %rax,TF_RAX(%rsp) movq %rbx,TF_RBX(%rsp) movq %rbp,TF_RBP(%rsp) movq %r10,TF_R10(%rsp) Index: sys/amd64/include/md_var.h =================================================================== --- sys/amd64/include/md_var.h +++ sys/amd64/include/md_var.h @@ -38,6 +38,7 @@ extern uint64_t *vm_page_dump; extern int hw_lower_amd64_sharedpage; +extern int hw_ibrs_disable; /* * The file "conf/ldscript.amd64" defines the symbol "kernphys". Its Index: sys/x86/include/x86_var.h =================================================================== --- sys/x86/include/x86_var.h +++ sys/x86/include/x86_var.h @@ -131,6 +131,8 @@ void fillw(int /*u_short*/ pat, void *base, size_t cnt); int is_physical_memory(vm_paddr_t addr); int isa_nmi(int cd); +void handle_ibrs_entry(void); +void handle_ibrs_exit(void); void nmi_call_kdb(u_int cpu, u_int type, struct trapframe *frame); void nmi_call_kdb_smp(u_int type, struct trapframe *frame); void nmi_handle_intr(u_int type, struct trapframe *frame); Index: sys/x86/x86/cpu_machdep.c =================================================================== --- sys/x86/x86/cpu_machdep.c +++ sys/x86/x86/cpu_machdep.c @@ -150,9 +150,11 @@ KASSERT(*state == STATE_SLEEPING, ("cpu_mwait_cx: wrong monitorbuf state")); *state = STATE_MWAIT; + handle_ibrs_entry(); cpu_monitor(state, 0, 0); if (*state == STATE_MWAIT) cpu_mwait(MWAIT_INTRBREAK, mwait_hint); + handle_ibrs_exit(); /* * We should exit on any event that interrupts mwait, because Index: sys/x86/x86/identcpu.c =================================================================== --- sys/x86/x86/identcpu.c +++ sys/x86/x86/identcpu.c @@ -1614,7 +1614,8 @@ if (strcmp(cpu_vendor, AMD_VENDOR_ID) == 0) return (0); - + if ((cpu_ia32_arch_caps & IA32_ARCH_CAP_RDCL_NO) != 0) + return (0); return (1); }