Index: sys/amd64/amd64/exception.S =================================================================== --- sys/amd64/amd64/exception.S +++ sys/amd64/amd64/exception.S @@ -864,7 +864,10 @@ movl %edx,%eax shrq $32,%rdx wrmsr - movq %r13,%cr3 + cmpb $0, nmi_flush_l1d_sw(%rip) + je 2f + call flush_l1d_sw /* bhyve L1TF assist */ +2: movq %r13,%cr3 RESTORE_REGS addq $TF_RIP,%rsp jmp doreti_iret Index: sys/amd64/amd64/support.S =================================================================== --- sys/amd64/amd64/support.S +++ sys/amd64/amd64/support.S @@ -1225,3 +1225,36 @@ END(handle_ibrs_exit_rs) .noaltmacro + +/* + * Flush L1D cache. Load enough of the data from the kernel text + * to flush existing L1D content. + * + * N.B. The function follows ABI calling conventions, but the vmm.ko + * caller expects that only %rax, %rcx, %r9, and %rflags registers + * are clobbered. + */ +ENTRY(flush_l1d_sw) +#define L1D_FLUSH_SIZE (64 * 1024) + movq $KERNBASE, %r9 + movq $-L1D_FLUSH_SIZE, %rcx + /* + * pass 1: Preload TLB. + * Kernel text is mapped using superpages. TLB preload is + * done for the benefit of older CPUs which split 2M page + * into 4k TLB entries. + */ +1: movb L1D_FLUSH_SIZE(%r9, %rcx), %al + addq $PAGE_SIZE, %rcx + jne 1b + xorl %eax, %eax + cpuid + movq $-L1D_FLUSH_SIZE, %rcx + /* pass 2: Read each cache line */ +2: movb L1D_FLUSH_SIZE(%r9, %rcx), %al + addq $64, %rcx + jne 2b + lfence + ret +#undef L1D_FLUSH_SIZE +END(flush_l1d_sw) Index: sys/amd64/amd64/trap.c =================================================================== --- sys/amd64/amd64/trap.c +++ sys/amd64/amd64/trap.c @@ -160,6 +160,11 @@ &uprintf_signal, 0, "Print debugging information on trap signal to ctty"); +int nmi_flush_l1d_sw; +SYSCTL_INT(_machdep, OID_AUTO, nmi_flush_l1d_sw, CTLFLAG_RWTUN, + &nmi_flush_l1d_sw, 0, + "Flush L1 Data Cache on NMI exit, software bhyve L1TF mitigation assist"); + /* * Exception, fault, and trap interface to the FreeBSD kernel. * This common code is called from assembly language IDT gate entry Index: sys/amd64/include/md_var.h =================================================================== --- sys/amd64/include/md_var.h +++ sys/amd64/include/md_var.h @@ -40,6 +40,7 @@ extern int hw_lower_amd64_sharedpage; extern int hw_ibrs_disable; extern int hw_ssb_disable; +extern int nmi_flush_l1d_sw; /* * The file "conf/ldscript.amd64" defines the symbol "kernphys". Its Index: sys/amd64/vmm/intel/vmx.c =================================================================== --- sys/amd64/vmm/intel/vmx.c +++ sys/amd64/vmm/intel/vmx.c @@ -191,8 +191,11 @@ static int guest_l1d_flush; SYSCTL_INT(_hw_vmm_vmx, OID_AUTO, l1d_flush, CTLFLAG_RD, &guest_l1d_flush, 0, NULL); +static int guest_l1d_flush_sw; +SYSCTL_INT(_hw_vmm_vmx, OID_AUTO, l1d_flush_sw, CTLFLAG_RD, + &guest_l1d_flush_sw, 0, NULL); -uint64_t vmx_msr_flush_cmd; +static struct msr_entry msr_load_list[1] __aligned(16); /* * The definitions of SDT probes for VMX. @@ -579,6 +582,9 @@ vpid_unr = NULL; } + if (nmi_flush_l1d_sw == 1) + nmi_flush_l1d_sw = 0; + smp_rendezvous(NULL, vmx_disable, NULL, NULL); return (0); @@ -808,8 +814,19 @@ guest_l1d_flush = (cpu_ia32_arch_caps & IA32_ARCH_CAP_RDCL_NO) == 0; TUNABLE_INT_FETCH("hw.vmm.l1d_flush", &guest_l1d_flush); if (guest_l1d_flush && - (cpu_stdext_feature3 & CPUID_STDEXT3_L1D_FLUSH) != 0) - vmx_msr_flush_cmd = IA32_FLUSH_CMD_L1D; + (cpu_stdext_feature3 & CPUID_STDEXT3_L1D_FLUSH) == 0) { + guest_l1d_flush_sw = 1; + TUNABLE_INT_FETCH("hw.vmm.l1d_flush_sw", &guest_l1d_flush_sw); + } + if (guest_l1d_flush) { + if (guest_l1d_flush_sw) { + if (nmi_flush_l1d_sw <= 1) + nmi_flush_l1d_sw = 1; + } else { + msr_load_list[0].index = MSR_IA32_FLUSH_CMD; + msr_load_list[0].val = IA32_FLUSH_CMD_L1D; + } + } /* * Stash the cr0 and cr4 bits that must be fixed to 0 or 1 @@ -1000,6 +1017,15 @@ error += vmwrite(VMCS_MSR_BITMAP, vtophys(vmx->msr_bitmap)); error += vmwrite(VMCS_VPID, vpid[i]); + if (guest_l1d_flush && !guest_l1d_flush_sw) { + vmcs_write(VMCS_ENTRY_MSR_LOAD, pmap_kextract( + (vm_offset_t)&msr_load_list[0])); + vmcs_write(VMCS_ENTRY_MSR_LOAD_COUNT, + nitems(msr_load_list)); + vmcs_write(VMCS_EXIT_MSR_STORE, 0); + vmcs_write(VMCS_EXIT_MSR_STORE_COUNT, 0); + } + /* exception bitmap */ if (vcpu_trace_exceptions(vm, i)) exc_bitmap = 0xffffffff; Index: sys/amd64/vmm/intel/vmx_support.S =================================================================== --- sys/amd64/vmm/intel/vmx_support.S +++ sys/amd64/vmm/intel/vmx_support.S @@ -182,38 +182,10 @@ * otherwise load enough of the data from the zero_region to flush * existing L1D content. */ -#define L1D_FLUSH_SIZE (64 * 1024) movl %edx, %r8d - cmpb $0, guest_l1d_flush(%rip) + cmpb $0, guest_l1d_flush_sw(%rip) je after_l1d - movq vmx_msr_flush_cmd(%rip), %rax - testq %rax, %rax - jz 1f - movq %rax, %rdx - shrq $32, %rdx - movl $MSR_IA32_FLUSH_CMD, %ecx - wrmsr - jmp after_l1d -1: movq $KERNBASE, %r9 - movq $-L1D_FLUSH_SIZE, %rcx - /* - * pass 1: Preload TLB. - * Kernel text is mapped using superpages. TLB preload is - * done for the benefit of older CPUs which split 2M page - * into 4k TLB entries. - */ -2: movb L1D_FLUSH_SIZE(%r9, %rcx), %al - addq $PAGE_SIZE, %rcx - jne 2b - xorl %eax, %eax - cpuid - movq $-L1D_FLUSH_SIZE, %rcx - /* pass 2: Read each cache line */ -3: movb L1D_FLUSH_SIZE(%r9, %rcx), %al - addq $64, %rcx - jne 3b - lfence -#undef L1D_FLUSH_SIZE + call flush_l1d_sw after_l1d: cmpl $0, %r8d je do_launch