Index: stable/11/sys/amd64/vmm/amd/svm_support.S =================================================================== --- stable/11/sys/amd64/vmm/amd/svm_support.S (revision 330703) +++ stable/11/sys/amd64/vmm/amd/svm_support.S (revision 330704) @@ -1,132 +1,161 @@ /*- * Copyright (c) 2013, Anish Gupta (akgupt3@gmail.com) * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD$ */ #include #include "svm_assym.h" /* * Be friendly to DTrace FBT's prologue/epilogue pattern matching. * * They are also responsible for saving/restoring the host %rbp across VMRUN. */ #define VENTER push %rbp ; mov %rsp,%rbp #define VLEAVE pop %rbp #define VMLOAD .byte 0x0f, 0x01, 0xda #define VMRUN .byte 0x0f, 0x01, 0xd8 #define VMSAVE .byte 0x0f, 0x01, 0xdb /* * svm_launch(uint64_t vmcb, struct svm_regctx *gctx, struct pcpu *pcpu) * %rdi: physical address of VMCB * %rsi: pointer to guest context * %rdx: pointer to the pcpu data */ ENTRY(svm_launch) VENTER /* save pointer to the pcpu data */ push %rdx /* * Host register state saved across a VMRUN. * * All "callee saved registers" except: * %rsp: because it is preserved by the processor across VMRUN. * %rbp: because it is saved/restored by the function prologue/epilogue. */ push %rbx push %r12 push %r13 push %r14 push %r15 /* Save the physical address of the VMCB in %rax */ movq %rdi, %rax push %rsi /* push guest context pointer on the stack */ /* * Restore guest state. */ movq SCTX_R8(%rsi), %r8 movq SCTX_R9(%rsi), %r9 movq SCTX_R10(%rsi), %r10 movq SCTX_R11(%rsi), %r11 movq SCTX_R12(%rsi), %r12 movq SCTX_R13(%rsi), %r13 movq SCTX_R14(%rsi), %r14 movq SCTX_R15(%rsi), %r15 movq SCTX_RBP(%rsi), %rbp movq SCTX_RBX(%rsi), %rbx movq SCTX_RCX(%rsi), %rcx movq SCTX_RDX(%rsi), %rdx movq SCTX_RDI(%rsi), %rdi movq SCTX_RSI(%rsi), %rsi /* %rsi must be restored last */ VMLOAD VMRUN VMSAVE pop %rax /* pop guest context pointer from the stack */ /* * Save guest state. */ movq %r8, SCTX_R8(%rax) movq %r9, SCTX_R9(%rax) movq %r10, SCTX_R10(%rax) movq %r11, SCTX_R11(%rax) movq %r12, SCTX_R12(%rax) movq %r13, SCTX_R13(%rax) movq %r14, SCTX_R14(%rax) movq %r15, SCTX_R15(%rax) movq %rbp, SCTX_RBP(%rax) movq %rbx, SCTX_RBX(%rax) movq %rcx, SCTX_RCX(%rax) movq %rdx, SCTX_RDX(%rax) movq %rdi, SCTX_RDI(%rax) movq %rsi, SCTX_RSI(%rax) + /* + * To prevent malicious branch target predictions from + * affecting the host, overwrite all entries in the RSB upon + * exiting a guest. + */ + mov $16, %ecx /* 16 iterations, two calls per loop */ + mov %rsp, %rax +0: call 2f /* create an RSB entry. */ +1: pause + call 1b /* capture rogue speculation. */ +2: call 2f /* create an RSB entry. */ +1: pause + call 1b /* capture rogue speculation. */ +2: sub $1, %ecx + jnz 0b + mov %rax, %rsp + /* Restore host state */ pop %r15 pop %r14 pop %r13 pop %r12 pop %rbx /* Restore %GS.base to point to the host's pcpu data */ pop %rdx mov %edx, %eax shr $32, %rdx - mov $MSR_GSBASE, %ecx + mov $MSR_GSBASE, %rcx wrmsr + + /* + * Clobber the remaining registers with guest contents so they + * can't be misused. + */ + xor %rbp, %rbp + xor %rdi, %rdi + xor %rsi, %rsi + xor %r8, %r8 + xor %r9, %r9 + xor %r10, %r10 + xor %r11, %r11 VLEAVE ret END(svm_launch) Index: stable/11/sys/amd64/vmm/intel/vmcs.c =================================================================== --- stable/11/sys/amd64/vmm/intel/vmcs.c (revision 330703) +++ stable/11/sys/amd64/vmm/intel/vmcs.c (revision 330704) @@ -1,503 +1,517 @@ /*- * Copyright (c) 2011 NetApp, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include "opt_ddb.h" #include __FBSDID("$FreeBSD$"); #include +#include #include #include #include #include #include #include #include "vmm_host.h" #include "vmx_cpufunc.h" #include "vmcs.h" #include "ept.h" #include "vmx.h" #ifdef DDB #include #endif +SYSCTL_DECL(_hw_vmm_vmx); + +static int no_flush_rsb; +SYSCTL_INT(_hw_vmm_vmx, OID_AUTO, no_flush_rsb, CTLFLAG_RW, + &no_flush_rsb, 0, "Do not flush RSB upon vmexit"); + static uint64_t vmcs_fix_regval(uint32_t encoding, uint64_t val) { switch (encoding) { case VMCS_GUEST_CR0: val = vmx_fix_cr0(val); break; case VMCS_GUEST_CR4: val = vmx_fix_cr4(val); break; default: break; } return (val); } static uint32_t vmcs_field_encoding(int ident) { switch (ident) { case VM_REG_GUEST_CR0: return (VMCS_GUEST_CR0); case VM_REG_GUEST_CR3: return (VMCS_GUEST_CR3); case VM_REG_GUEST_CR4: return (VMCS_GUEST_CR4); case VM_REG_GUEST_DR7: return (VMCS_GUEST_DR7); case VM_REG_GUEST_RSP: return (VMCS_GUEST_RSP); case VM_REG_GUEST_RIP: return (VMCS_GUEST_RIP); case VM_REG_GUEST_RFLAGS: return (VMCS_GUEST_RFLAGS); case VM_REG_GUEST_ES: return (VMCS_GUEST_ES_SELECTOR); case VM_REG_GUEST_CS: return (VMCS_GUEST_CS_SELECTOR); case VM_REG_GUEST_SS: return (VMCS_GUEST_SS_SELECTOR); case VM_REG_GUEST_DS: return (VMCS_GUEST_DS_SELECTOR); case VM_REG_GUEST_FS: return (VMCS_GUEST_FS_SELECTOR); case VM_REG_GUEST_GS: return (VMCS_GUEST_GS_SELECTOR); case VM_REG_GUEST_TR: return (VMCS_GUEST_TR_SELECTOR); case VM_REG_GUEST_LDTR: return (VMCS_GUEST_LDTR_SELECTOR); case VM_REG_GUEST_EFER: return (VMCS_GUEST_IA32_EFER); case VM_REG_GUEST_PDPTE0: return (VMCS_GUEST_PDPTE0); case VM_REG_GUEST_PDPTE1: return (VMCS_GUEST_PDPTE1); case VM_REG_GUEST_PDPTE2: return (VMCS_GUEST_PDPTE2); case VM_REG_GUEST_PDPTE3: return (VMCS_GUEST_PDPTE3); default: return (-1); } } static int vmcs_seg_desc_encoding(int seg, uint32_t *base, uint32_t *lim, uint32_t *acc) { switch (seg) { case VM_REG_GUEST_ES: *base = VMCS_GUEST_ES_BASE; *lim = VMCS_GUEST_ES_LIMIT; *acc = VMCS_GUEST_ES_ACCESS_RIGHTS; break; case VM_REG_GUEST_CS: *base = VMCS_GUEST_CS_BASE; *lim = VMCS_GUEST_CS_LIMIT; *acc = VMCS_GUEST_CS_ACCESS_RIGHTS; break; case VM_REG_GUEST_SS: *base = VMCS_GUEST_SS_BASE; *lim = VMCS_GUEST_SS_LIMIT; *acc = VMCS_GUEST_SS_ACCESS_RIGHTS; break; case VM_REG_GUEST_DS: *base = VMCS_GUEST_DS_BASE; *lim = VMCS_GUEST_DS_LIMIT; *acc = VMCS_GUEST_DS_ACCESS_RIGHTS; break; case VM_REG_GUEST_FS: *base = VMCS_GUEST_FS_BASE; *lim = VMCS_GUEST_FS_LIMIT; *acc = VMCS_GUEST_FS_ACCESS_RIGHTS; break; case VM_REG_GUEST_GS: *base = VMCS_GUEST_GS_BASE; *lim = VMCS_GUEST_GS_LIMIT; *acc = VMCS_GUEST_GS_ACCESS_RIGHTS; break; case VM_REG_GUEST_TR: *base = VMCS_GUEST_TR_BASE; *lim = VMCS_GUEST_TR_LIMIT; *acc = VMCS_GUEST_TR_ACCESS_RIGHTS; break; case VM_REG_GUEST_LDTR: *base = VMCS_GUEST_LDTR_BASE; *lim = VMCS_GUEST_LDTR_LIMIT; *acc = VMCS_GUEST_LDTR_ACCESS_RIGHTS; break; case VM_REG_GUEST_IDTR: *base = VMCS_GUEST_IDTR_BASE; *lim = VMCS_GUEST_IDTR_LIMIT; *acc = VMCS_INVALID_ENCODING; break; case VM_REG_GUEST_GDTR: *base = VMCS_GUEST_GDTR_BASE; *lim = VMCS_GUEST_GDTR_LIMIT; *acc = VMCS_INVALID_ENCODING; break; default: return (EINVAL); } return (0); } int vmcs_getreg(struct vmcs *vmcs, int running, int ident, uint64_t *retval) { int error; uint32_t encoding; /* * If we need to get at vmx-specific state in the VMCS we can bypass * the translation of 'ident' to 'encoding' by simply setting the * sign bit. As it so happens the upper 16 bits are reserved (i.e * set to 0) in the encodings for the VMCS so we are free to use the * sign bit. */ if (ident < 0) encoding = ident & 0x7fffffff; else encoding = vmcs_field_encoding(ident); if (encoding == (uint32_t)-1) return (EINVAL); if (!running) VMPTRLD(vmcs); error = vmread(encoding, retval); if (!running) VMCLEAR(vmcs); return (error); } int vmcs_setreg(struct vmcs *vmcs, int running, int ident, uint64_t val) { int error; uint32_t encoding; if (ident < 0) encoding = ident & 0x7fffffff; else encoding = vmcs_field_encoding(ident); if (encoding == (uint32_t)-1) return (EINVAL); val = vmcs_fix_regval(encoding, val); if (!running) VMPTRLD(vmcs); error = vmwrite(encoding, val); if (!running) VMCLEAR(vmcs); return (error); } int vmcs_setdesc(struct vmcs *vmcs, int running, int seg, struct seg_desc *desc) { int error; uint32_t base, limit, access; error = vmcs_seg_desc_encoding(seg, &base, &limit, &access); if (error != 0) panic("vmcs_setdesc: invalid segment register %d", seg); if (!running) VMPTRLD(vmcs); if ((error = vmwrite(base, desc->base)) != 0) goto done; if ((error = vmwrite(limit, desc->limit)) != 0) goto done; if (access != VMCS_INVALID_ENCODING) { if ((error = vmwrite(access, desc->access)) != 0) goto done; } done: if (!running) VMCLEAR(vmcs); return (error); } int vmcs_getdesc(struct vmcs *vmcs, int running, int seg, struct seg_desc *desc) { int error; uint32_t base, limit, access; uint64_t u64; error = vmcs_seg_desc_encoding(seg, &base, &limit, &access); if (error != 0) panic("vmcs_getdesc: invalid segment register %d", seg); if (!running) VMPTRLD(vmcs); if ((error = vmread(base, &u64)) != 0) goto done; desc->base = u64; if ((error = vmread(limit, &u64)) != 0) goto done; desc->limit = u64; if (access != VMCS_INVALID_ENCODING) { if ((error = vmread(access, &u64)) != 0) goto done; desc->access = u64; } done: if (!running) VMCLEAR(vmcs); return (error); } int vmcs_set_msr_save(struct vmcs *vmcs, u_long g_area, u_int g_count) { int error; VMPTRLD(vmcs); /* * Guest MSRs are saved in the VM-exit MSR-store area. * Guest MSRs are loaded from the VM-entry MSR-load area. * Both areas point to the same location in memory. */ if ((error = vmwrite(VMCS_EXIT_MSR_STORE, g_area)) != 0) goto done; if ((error = vmwrite(VMCS_EXIT_MSR_STORE_COUNT, g_count)) != 0) goto done; if ((error = vmwrite(VMCS_ENTRY_MSR_LOAD, g_area)) != 0) goto done; if ((error = vmwrite(VMCS_ENTRY_MSR_LOAD_COUNT, g_count)) != 0) goto done; error = 0; done: VMCLEAR(vmcs); return (error); } int vmcs_init(struct vmcs *vmcs) { int error, codesel, datasel, tsssel; u_long cr0, cr4, efer; uint64_t pat, fsbase, idtrbase; codesel = vmm_get_host_codesel(); datasel = vmm_get_host_datasel(); tsssel = vmm_get_host_tsssel(); /* * Make sure we have a "current" VMCS to work with. */ VMPTRLD(vmcs); /* Host state */ /* Initialize host IA32_PAT MSR */ pat = vmm_get_host_pat(); if ((error = vmwrite(VMCS_HOST_IA32_PAT, pat)) != 0) goto done; /* Load the IA32_EFER MSR */ efer = vmm_get_host_efer(); if ((error = vmwrite(VMCS_HOST_IA32_EFER, efer)) != 0) goto done; /* Load the control registers */ cr0 = vmm_get_host_cr0(); if ((error = vmwrite(VMCS_HOST_CR0, cr0)) != 0) goto done; cr4 = vmm_get_host_cr4() | CR4_VMXE; if ((error = vmwrite(VMCS_HOST_CR4, cr4)) != 0) goto done; /* Load the segment selectors */ if ((error = vmwrite(VMCS_HOST_ES_SELECTOR, datasel)) != 0) goto done; if ((error = vmwrite(VMCS_HOST_CS_SELECTOR, codesel)) != 0) goto done; if ((error = vmwrite(VMCS_HOST_SS_SELECTOR, datasel)) != 0) goto done; if ((error = vmwrite(VMCS_HOST_DS_SELECTOR, datasel)) != 0) goto done; if ((error = vmwrite(VMCS_HOST_FS_SELECTOR, datasel)) != 0) goto done; if ((error = vmwrite(VMCS_HOST_GS_SELECTOR, datasel)) != 0) goto done; if ((error = vmwrite(VMCS_HOST_TR_SELECTOR, tsssel)) != 0) goto done; /* * Load the Base-Address for %fs and idtr. * * Note that we exclude %gs, tss and gdtr here because their base * address is pcpu specific. */ fsbase = vmm_get_host_fsbase(); if ((error = vmwrite(VMCS_HOST_FS_BASE, fsbase)) != 0) goto done; idtrbase = vmm_get_host_idtrbase(); if ((error = vmwrite(VMCS_HOST_IDTR_BASE, idtrbase)) != 0) goto done; /* instruction pointer */ - if ((error = vmwrite(VMCS_HOST_RIP, (u_long)vmx_exit_guest)) != 0) - goto done; + if (no_flush_rsb) { + if ((error = vmwrite(VMCS_HOST_RIP, + (u_long)vmx_exit_guest)) != 0) + goto done; + } else { + if ((error = vmwrite(VMCS_HOST_RIP, + (u_long)vmx_exit_guest_flush_rsb)) != 0) + goto done; + } /* link pointer */ if ((error = vmwrite(VMCS_LINK_POINTER, ~0)) != 0) goto done; done: VMCLEAR(vmcs); return (error); } #ifdef DDB extern int vmxon_enabled[]; DB_SHOW_COMMAND(vmcs, db_show_vmcs) { uint64_t cur_vmcs, val; uint32_t exit; if (!vmxon_enabled[curcpu]) { db_printf("VMX not enabled\n"); return; } if (have_addr) { db_printf("Only current VMCS supported\n"); return; } vmptrst(&cur_vmcs); if (cur_vmcs == VMCS_INITIAL) { db_printf("No current VM context\n"); return; } db_printf("VMCS: %jx\n", cur_vmcs); db_printf("VPID: %lu\n", vmcs_read(VMCS_VPID)); db_printf("Activity: "); val = vmcs_read(VMCS_GUEST_ACTIVITY); switch (val) { case 0: db_printf("Active"); break; case 1: db_printf("HLT"); break; case 2: db_printf("Shutdown"); break; case 3: db_printf("Wait for SIPI"); break; default: db_printf("Unknown: %#lx", val); } db_printf("\n"); exit = vmcs_read(VMCS_EXIT_REASON); if (exit & 0x80000000) db_printf("Entry Failure Reason: %u\n", exit & 0xffff); else db_printf("Exit Reason: %u\n", exit & 0xffff); db_printf("Qualification: %#lx\n", vmcs_exit_qualification()); db_printf("Guest Linear Address: %#lx\n", vmcs_read(VMCS_GUEST_LINEAR_ADDRESS)); switch (exit & 0x8000ffff) { case EXIT_REASON_EXCEPTION: case EXIT_REASON_EXT_INTR: val = vmcs_read(VMCS_EXIT_INTR_INFO); db_printf("Interrupt Type: "); switch (val >> 8 & 0x7) { case 0: db_printf("external"); break; case 2: db_printf("NMI"); break; case 3: db_printf("HW exception"); break; case 4: db_printf("SW exception"); break; default: db_printf("?? %lu", val >> 8 & 0x7); break; } db_printf(" Vector: %lu", val & 0xff); if (val & 0x800) db_printf(" Error Code: %lx", vmcs_read(VMCS_EXIT_INTR_ERRCODE)); db_printf("\n"); break; case EXIT_REASON_EPT_FAULT: case EXIT_REASON_EPT_MISCONFIG: db_printf("Guest Physical Address: %#lx\n", vmcs_read(VMCS_GUEST_PHYSICAL_ADDRESS)); break; } db_printf("VM-instruction error: %#lx\n", vmcs_instruction_error()); } #endif Index: stable/11/sys/amd64/vmm/intel/vmx.h =================================================================== --- stable/11/sys/amd64/vmm/intel/vmx.h (revision 330703) +++ stable/11/sys/amd64/vmm/intel/vmx.h (revision 330704) @@ -1,152 +1,153 @@ /*- * Copyright (c) 2011 NetApp, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _VMX_H_ #define _VMX_H_ #include "vmcs.h" struct pmap; struct vmxctx { register_t guest_rdi; /* Guest state */ register_t guest_rsi; register_t guest_rdx; register_t guest_rcx; register_t guest_r8; register_t guest_r9; register_t guest_rax; register_t guest_rbx; register_t guest_rbp; register_t guest_r10; register_t guest_r11; register_t guest_r12; register_t guest_r13; register_t guest_r14; register_t guest_r15; register_t guest_cr2; register_t guest_dr0; register_t guest_dr1; register_t guest_dr2; register_t guest_dr3; register_t guest_dr6; register_t host_r15; /* Host state */ register_t host_r14; register_t host_r13; register_t host_r12; register_t host_rbp; register_t host_rsp; register_t host_rbx; register_t host_dr0; register_t host_dr1; register_t host_dr2; register_t host_dr3; register_t host_dr6; register_t host_dr7; uint64_t host_debugctl; int host_tf; int inst_fail_status; /* * The pmap needs to be deactivated in vmx_enter_guest() * so keep a copy of the 'pmap' in each vmxctx. */ struct pmap *pmap; }; struct vmxcap { int set; uint32_t proc_ctls; uint32_t proc_ctls2; }; struct vmxstate { uint64_t nextrip; /* next instruction to be executed by guest */ int lastcpu; /* host cpu that this 'vcpu' last ran on */ uint16_t vpid; }; struct apic_page { uint32_t reg[PAGE_SIZE / 4]; }; CTASSERT(sizeof(struct apic_page) == PAGE_SIZE); /* Posted Interrupt Descriptor (described in section 29.6 of the Intel SDM) */ struct pir_desc { uint64_t pir[4]; uint64_t pending; uint64_t unused[3]; } __aligned(64); CTASSERT(sizeof(struct pir_desc) == 64); /* Index into the 'guest_msrs[]' array */ enum { IDX_MSR_LSTAR, IDX_MSR_CSTAR, IDX_MSR_STAR, IDX_MSR_SF_MASK, IDX_MSR_KGSBASE, IDX_MSR_PAT, GUEST_MSR_NUM /* must be the last enumeration */ }; /* virtual machine softc */ struct vmx { struct vmcs vmcs[VM_MAXCPU]; /* one vmcs per virtual cpu */ struct apic_page apic_page[VM_MAXCPU]; /* one apic page per vcpu */ char msr_bitmap[PAGE_SIZE]; struct pir_desc pir_desc[VM_MAXCPU]; uint64_t guest_msrs[VM_MAXCPU][GUEST_MSR_NUM]; struct vmxctx ctx[VM_MAXCPU]; struct vmxcap cap[VM_MAXCPU]; struct vmxstate state[VM_MAXCPU]; uint64_t eptp; struct vm *vm; long eptgen[MAXCPU]; /* cached pmap->pm_eptgen */ }; CTASSERT((offsetof(struct vmx, vmcs) & PAGE_MASK) == 0); CTASSERT((offsetof(struct vmx, msr_bitmap) & PAGE_MASK) == 0); CTASSERT((offsetof(struct vmx, pir_desc[0]) & 63) == 0); #define VMX_GUEST_VMEXIT 0 #define VMX_VMRESUME_ERROR 1 #define VMX_VMLAUNCH_ERROR 2 #define VMX_INVEPT_ERROR 3 int vmx_enter_guest(struct vmxctx *ctx, struct vmx *vmx, int launched); void vmx_call_isr(uintptr_t entry); u_long vmx_fix_cr0(u_long cr0); u_long vmx_fix_cr4(u_long cr4); int vmx_set_tsc_offset(struct vmx *vmx, int vcpu, uint64_t offset); extern char vmx_exit_guest[]; +extern char vmx_exit_guest_flush_rsb[]; #endif Index: stable/11/sys/amd64/vmm/intel/vmx_support.S =================================================================== --- stable/11/sys/amd64/vmm/intel/vmx_support.S (revision 330703) +++ stable/11/sys/amd64/vmm/intel/vmx_support.S (revision 330704) @@ -1,262 +1,325 @@ /*- * Copyright (c) 2011 NetApp, Inc. * Copyright (c) 2013 Neel Natu * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include #include "vmx_assym.h" #ifdef SMP #define LK lock ; #else #define LK #endif /* Be friendly to DTrace FBT's prologue/epilogue pattern matching */ #define VENTER push %rbp ; mov %rsp,%rbp #define VLEAVE pop %rbp /* + * Save the guest context. + */ +#define VMX_GUEST_SAVE \ + movq %rdi,VMXCTX_GUEST_RDI(%rsp); \ + movq %rsi,VMXCTX_GUEST_RSI(%rsp); \ + movq %rdx,VMXCTX_GUEST_RDX(%rsp); \ + movq %rcx,VMXCTX_GUEST_RCX(%rsp); \ + movq %r8,VMXCTX_GUEST_R8(%rsp); \ + movq %r9,VMXCTX_GUEST_R9(%rsp); \ + movq %rax,VMXCTX_GUEST_RAX(%rsp); \ + movq %rbx,VMXCTX_GUEST_RBX(%rsp); \ + movq %rbp,VMXCTX_GUEST_RBP(%rsp); \ + movq %r10,VMXCTX_GUEST_R10(%rsp); \ + movq %r11,VMXCTX_GUEST_R11(%rsp); \ + movq %r12,VMXCTX_GUEST_R12(%rsp); \ + movq %r13,VMXCTX_GUEST_R13(%rsp); \ + movq %r14,VMXCTX_GUEST_R14(%rsp); \ + movq %r15,VMXCTX_GUEST_R15(%rsp); \ + movq %cr2,%rdi; \ + movq %rdi,VMXCTX_GUEST_CR2(%rsp); \ + movq %rsp,%rdi; + +/* * Assumes that %rdi holds a pointer to the 'vmxctx'. * * On "return" all registers are updated to reflect guest state. The two * exceptions are %rip and %rsp. These registers are atomically switched * by hardware from the guest area of the vmcs. * * We modify %rsp to point to the 'vmxctx' so we can use it to restore * host context in case of an error with 'vmlaunch' or 'vmresume'. */ #define VMX_GUEST_RESTORE \ movq %rdi,%rsp; \ movq VMXCTX_GUEST_CR2(%rdi),%rsi; \ movq %rsi,%cr2; \ movq VMXCTX_GUEST_RSI(%rdi),%rsi; \ movq VMXCTX_GUEST_RDX(%rdi),%rdx; \ movq VMXCTX_GUEST_RCX(%rdi),%rcx; \ movq VMXCTX_GUEST_R8(%rdi),%r8; \ movq VMXCTX_GUEST_R9(%rdi),%r9; \ movq VMXCTX_GUEST_RAX(%rdi),%rax; \ movq VMXCTX_GUEST_RBX(%rdi),%rbx; \ movq VMXCTX_GUEST_RBP(%rdi),%rbp; \ movq VMXCTX_GUEST_R10(%rdi),%r10; \ movq VMXCTX_GUEST_R11(%rdi),%r11; \ movq VMXCTX_GUEST_R12(%rdi),%r12; \ movq VMXCTX_GUEST_R13(%rdi),%r13; \ movq VMXCTX_GUEST_R14(%rdi),%r14; \ movq VMXCTX_GUEST_R15(%rdi),%r15; \ movq VMXCTX_GUEST_RDI(%rdi),%rdi; /* restore rdi the last */ /* + * Clobber the remaining registers with guest contents so they can't + * be misused. + */ +#define VMX_GUEST_CLOBBER \ + xor %rax, %rax; \ + xor %rcx, %rcx; \ + xor %rdx, %rdx; \ + xor %rsi, %rsi; \ + xor %r8, %r8; \ + xor %r9, %r9; \ + xor %r10, %r10; \ + xor %r11, %r11; + +/* * Save and restore the host context. * * Assumes that %rdi holds a pointer to the 'vmxctx'. */ #define VMX_HOST_SAVE \ movq %r15, VMXCTX_HOST_R15(%rdi); \ movq %r14, VMXCTX_HOST_R14(%rdi); \ movq %r13, VMXCTX_HOST_R13(%rdi); \ movq %r12, VMXCTX_HOST_R12(%rdi); \ movq %rbp, VMXCTX_HOST_RBP(%rdi); \ movq %rsp, VMXCTX_HOST_RSP(%rdi); \ movq %rbx, VMXCTX_HOST_RBX(%rdi); \ #define VMX_HOST_RESTORE \ movq VMXCTX_HOST_R15(%rdi), %r15; \ movq VMXCTX_HOST_R14(%rdi), %r14; \ movq VMXCTX_HOST_R13(%rdi), %r13; \ movq VMXCTX_HOST_R12(%rdi), %r12; \ movq VMXCTX_HOST_RBP(%rdi), %rbp; \ movq VMXCTX_HOST_RSP(%rdi), %rsp; \ movq VMXCTX_HOST_RBX(%rdi), %rbx; \ /* * vmx_enter_guest(struct vmxctx *vmxctx, int launched) * %rdi: pointer to the 'vmxctx' * %rsi: pointer to the 'vmx' * %edx: launch state of the VMCS * Interrupts must be disabled on entry. */ ENTRY(vmx_enter_guest) VENTER /* * Save host state before doing anything else. */ VMX_HOST_SAVE /* * Activate guest pmap on this cpu. */ movq VMXCTX_PMAP(%rdi), %r11 movl PCPU(CPUID), %eax LK btsl %eax, PM_ACTIVE(%r11) /* * If 'vmx->eptgen[curcpu]' is not identical to 'pmap->pm_eptgen' * then we must invalidate all mappings associated with this EPTP. */ movq PM_EPTGEN(%r11), %r10 cmpq %r10, VMX_EPTGEN(%rsi, %rax, 8) je guest_restore /* Refresh 'vmx->eptgen[curcpu]' */ movq %r10, VMX_EPTGEN(%rsi, %rax, 8) /* Setup the invept descriptor on the host stack */ mov %rsp, %r11 movq VMX_EPTP(%rsi), %rax movq %rax, -16(%r11) movq $0x0, -8(%r11) mov $0x1, %eax /* Single context invalidate */ invept -16(%r11), %rax jbe invept_error /* Check invept instruction error */ guest_restore: cmpl $0, %edx je do_launch VMX_GUEST_RESTORE vmresume /* * In the common case 'vmresume' returns back to the host through * 'vmx_exit_guest' with %rsp pointing to 'vmxctx'. * * If there is an error we return VMX_VMRESUME_ERROR to the caller. */ movq %rsp, %rdi /* point %rdi back to 'vmxctx' */ movl $VMX_VMRESUME_ERROR, %eax jmp decode_inst_error do_launch: VMX_GUEST_RESTORE vmlaunch /* * In the common case 'vmlaunch' returns back to the host through * 'vmx_exit_guest' with %rsp pointing to 'vmxctx'. * * If there is an error we return VMX_VMLAUNCH_ERROR to the caller. */ movq %rsp, %rdi /* point %rdi back to 'vmxctx' */ movl $VMX_VMLAUNCH_ERROR, %eax jmp decode_inst_error invept_error: movl $VMX_INVEPT_ERROR, %eax jmp decode_inst_error decode_inst_error: movl $VM_FAIL_VALID, %r11d jz inst_error movl $VM_FAIL_INVALID, %r11d inst_error: movl %r11d, VMXCTX_INST_FAIL_STATUS(%rdi) /* * The return value is already populated in %eax so we cannot use * it as a scratch register beyond this point. */ /* * Deactivate guest pmap from this cpu. */ movq VMXCTX_PMAP(%rdi), %r11 movl PCPU(CPUID), %r10d LK btrl %r10d, PM_ACTIVE(%r11) VMX_HOST_RESTORE VLEAVE ret /* * Non-error VM-exit from the guest. Make this a label so it can * be used by C code when setting up the VMCS. * The VMCS-restored %rsp points to the struct vmxctx */ ALIGN_TEXT - .globl vmx_exit_guest -vmx_exit_guest: + .globl vmx_exit_guest_flush_rsb +vmx_exit_guest_flush_rsb: /* * Save guest state that is not automatically saved in the vmcs. */ - movq %rdi,VMXCTX_GUEST_RDI(%rsp) - movq %rsi,VMXCTX_GUEST_RSI(%rsp) - movq %rdx,VMXCTX_GUEST_RDX(%rsp) - movq %rcx,VMXCTX_GUEST_RCX(%rsp) - movq %r8,VMXCTX_GUEST_R8(%rsp) - movq %r9,VMXCTX_GUEST_R9(%rsp) - movq %rax,VMXCTX_GUEST_RAX(%rsp) - movq %rbx,VMXCTX_GUEST_RBX(%rsp) - movq %rbp,VMXCTX_GUEST_RBP(%rsp) - movq %r10,VMXCTX_GUEST_R10(%rsp) - movq %r11,VMXCTX_GUEST_R11(%rsp) - movq %r12,VMXCTX_GUEST_R12(%rsp) - movq %r13,VMXCTX_GUEST_R13(%rsp) - movq %r14,VMXCTX_GUEST_R14(%rsp) - movq %r15,VMXCTX_GUEST_R15(%rsp) + VMX_GUEST_SAVE - movq %cr2,%rdi - movq %rdi,VMXCTX_GUEST_CR2(%rsp) + /* + * Deactivate guest pmap from this cpu. + */ + movq VMXCTX_PMAP(%rdi), %r11 + movl PCPU(CPUID), %r10d + LK btrl %r10d, PM_ACTIVE(%r11) - movq %rsp,%rdi + VMX_HOST_RESTORE + VMX_GUEST_CLOBBER + /* + * To prevent malicious branch target predictions from + * affecting the host, overwrite all entries in the RSB upon + * exiting a guest. + */ + mov $16, %ecx /* 16 iterations, two calls per loop */ + mov %rsp, %rax +0: call 2f /* create an RSB entry. */ +1: pause + call 1b /* capture rogue speculation. */ +2: call 2f /* create an RSB entry. */ +1: pause + call 1b /* capture rogue speculation. */ +2: sub $1, %ecx + jnz 0b + mov %rax, %rsp + + /* + * This will return to the caller of 'vmx_enter_guest()' with a return + * value of VMX_GUEST_VMEXIT. + */ + movl $VMX_GUEST_VMEXIT, %eax + VLEAVE + ret + + .globl vmx_exit_guest +vmx_exit_guest: + /* + * Save guest state that is not automatically saved in the vmcs. + */ + VMX_GUEST_SAVE + + /* * Deactivate guest pmap from this cpu. */ movq VMXCTX_PMAP(%rdi), %r11 movl PCPU(CPUID), %r10d LK btrl %r10d, PM_ACTIVE(%r11) VMX_HOST_RESTORE + + VMX_GUEST_CLOBBER /* * This will return to the caller of 'vmx_enter_guest()' with a return * value of VMX_GUEST_VMEXIT. */ movl $VMX_GUEST_VMEXIT, %eax VLEAVE ret END(vmx_enter_guest) /* * %rdi = interrupt handler entry point * * Calling sequence described in the "Instruction Set Reference" for the "INT" * instruction in Intel SDM, Vol 2. */ ENTRY(vmx_call_isr) VENTER mov %rsp, %r11 /* save %rsp */ and $~0xf, %rsp /* align on 16-byte boundary */ pushq $KERNEL_SS /* %ss */ pushq %r11 /* %rsp */ pushfq /* %rflags */ pushq $KERNEL_CS /* %cs */ cli /* disable interrupts */ callq *%rdi /* push %rip and call isr */ VLEAVE ret END(vmx_call_isr) Index: stable/11 =================================================================== --- stable/11 (revision 330703) +++ stable/11 (revision 330704) Property changes on: stable/11 ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head:r328011,329162