Changeset View
Changeset View
Standalone View
Standalone View
sys/amd64/vmm/intel/vmx.c
Show All 26 Lines | |||||
* SUCH DAMAGE. | * SUCH DAMAGE. | ||||
* | * | ||||
* $FreeBSD$ | * $FreeBSD$ | ||||
*/ | */ | ||||
#include <sys/cdefs.h> | #include <sys/cdefs.h> | ||||
__FBSDID("$FreeBSD$"); | __FBSDID("$FreeBSD$"); | ||||
#include "opt_bhyve_snapshot.h" | |||||
#include <sys/param.h> | #include <sys/param.h> | ||||
#include <sys/systm.h> | #include <sys/systm.h> | ||||
#include <sys/smp.h> | #include <sys/smp.h> | ||||
#include <sys/kernel.h> | #include <sys/kernel.h> | ||||
#include <sys/malloc.h> | #include <sys/malloc.h> | ||||
#include <sys/pcpu.h> | #include <sys/pcpu.h> | ||||
#include <sys/proc.h> | #include <sys/proc.h> | ||||
#include <sys/sysctl.h> | #include <sys/sysctl.h> | ||||
#include <vm/vm.h> | #include <vm/vm.h> | ||||
#include <vm/pmap.h> | #include <vm/pmap.h> | ||||
#include <machine/psl.h> | #include <machine/psl.h> | ||||
#include <machine/cpufunc.h> | #include <machine/cpufunc.h> | ||||
#include <machine/md_var.h> | #include <machine/md_var.h> | ||||
#include <machine/reg.h> | #include <machine/reg.h> | ||||
#include <machine/segments.h> | #include <machine/segments.h> | ||||
#include <machine/smp.h> | #include <machine/smp.h> | ||||
#include <machine/specialreg.h> | #include <machine/specialreg.h> | ||||
#include <machine/vmparam.h> | #include <machine/vmparam.h> | ||||
#include <machine/vmm.h> | #include <machine/vmm.h> | ||||
#include <machine/vmm_dev.h> | #include <machine/vmm_dev.h> | ||||
#include <machine/vmm_instruction_emul.h> | #include <machine/vmm_instruction_emul.h> | ||||
#include <machine/vmm_snapshot.h> | |||||
#include "vmm_lapic.h" | #include "vmm_lapic.h" | ||||
#include "vmm_host.h" | #include "vmm_host.h" | ||||
#include "vmm_ioport.h" | #include "vmm_ioport.h" | ||||
#include "vmm_ktr.h" | #include "vmm_ktr.h" | ||||
#include "vmm_stat.h" | #include "vmm_stat.h" | ||||
#include "vatpic.h" | #include "vatpic.h" | ||||
#include "vlapic.h" | #include "vlapic.h" | ||||
#include "vlapic_priv.h" | #include "vlapic_priv.h" | ||||
▲ Show 20 Lines • Show All 223 Lines • ▼ Show 20 Lines | |||||
* with a page in system memory. | * with a page in system memory. | ||||
*/ | */ | ||||
#define APIC_ACCESS_ADDRESS 0xFFFFF000 | #define APIC_ACCESS_ADDRESS 0xFFFFF000 | ||||
static int vmx_getdesc(void *arg, int vcpu, int reg, struct seg_desc *desc); | static int vmx_getdesc(void *arg, int vcpu, int reg, struct seg_desc *desc); | ||||
static int vmx_getreg(void *arg, int vcpu, int reg, uint64_t *retval); | static int vmx_getreg(void *arg, int vcpu, int reg, uint64_t *retval); | ||||
static int vmxctx_setreg(struct vmxctx *vmxctx, int reg, uint64_t val); | static int vmxctx_setreg(struct vmxctx *vmxctx, int reg, uint64_t val); | ||||
static void vmx_inject_pir(struct vlapic *vlapic); | static void vmx_inject_pir(struct vlapic *vlapic); | ||||
#ifdef BHYVE_SNAPSHOT | |||||
static int vmx_restore_tsc(void *arg, int vcpu, uint64_t now); | |||||
#endif | |||||
#ifdef KTR | #ifdef KTR | ||||
static const char * | static const char * | ||||
exit_reason_to_str(int reason) | exit_reason_to_str(int reason) | ||||
{ | { | ||||
static char reasonbuf[32]; | static char reasonbuf[32]; | ||||
switch (reason) { | switch (reason) { | ||||
▲ Show 20 Lines • Show All 988 Lines • ▼ Show 20 Lines | vmx_set_tsc_offset(struct vmx *vmx, int vcpu, uint64_t offset) | ||||
if ((vmx->cap[vcpu].proc_ctls & PROCBASED_TSC_OFFSET) == 0) { | if ((vmx->cap[vcpu].proc_ctls & PROCBASED_TSC_OFFSET) == 0) { | ||||
vmx->cap[vcpu].proc_ctls |= PROCBASED_TSC_OFFSET; | vmx->cap[vcpu].proc_ctls |= PROCBASED_TSC_OFFSET; | ||||
vmcs_write(VMCS_PRI_PROC_BASED_CTLS, vmx->cap[vcpu].proc_ctls); | vmcs_write(VMCS_PRI_PROC_BASED_CTLS, vmx->cap[vcpu].proc_ctls); | ||||
VCPU_CTR0(vmx->vm, vcpu, "Enabling TSC offsetting"); | VCPU_CTR0(vmx->vm, vcpu, "Enabling TSC offsetting"); | ||||
} | } | ||||
error = vmwrite(VMCS_TSC_OFFSET, offset); | error = vmwrite(VMCS_TSC_OFFSET, offset); | ||||
#ifdef BHYVE_SNAPSHOT | |||||
if (error == 0) | |||||
error = vm_set_tsc_offset(vmx->vm, vcpu, offset); | |||||
#endif | |||||
return (error); | return (error); | ||||
} | } | ||||
#define NMI_BLOCKING (VMCS_INTERRUPTIBILITY_NMI_BLOCKING | \ | #define NMI_BLOCKING (VMCS_INTERRUPTIBILITY_NMI_BLOCKING | \ | ||||
VMCS_INTERRUPTIBILITY_MOVSS_BLOCKING) | VMCS_INTERRUPTIBILITY_MOVSS_BLOCKING) | ||||
#define HWINTR_BLOCKING (VMCS_INTERRUPTIBILITY_STI_BLOCKING | \ | #define HWINTR_BLOCKING (VMCS_INTERRUPTIBILITY_STI_BLOCKING | \ | ||||
VMCS_INTERRUPTIBILITY_MOVSS_BLOCKING) | VMCS_INTERRUPTIBILITY_MOVSS_BLOCKING) | ||||
▲ Show 20 Lines • Show All 2,560 Lines • ▼ Show 20 Lines | |||||
static void | static void | ||||
vmx_vlapic_cleanup(void *arg, struct vlapic *vlapic) | vmx_vlapic_cleanup(void *arg, struct vlapic *vlapic) | ||||
{ | { | ||||
vlapic_cleanup(vlapic); | vlapic_cleanup(vlapic); | ||||
free(vlapic, M_VLAPIC); | free(vlapic, M_VLAPIC); | ||||
} | } | ||||
#ifdef BHYVE_SNAPSHOT | |||||
static int | |||||
vmx_snapshot_vmi(void *arg, struct vm_snapshot_meta *meta) | |||||
{ | |||||
struct vmx *vmx; | |||||
struct vmxctx *vmxctx; | |||||
int i; | |||||
int ret; | |||||
vmx = arg; | |||||
KASSERT(vmx != NULL, ("%s: arg was NULL", __func__)); | |||||
for (i = 0; i < VM_MAXCPU; i++) { | |||||
SNAPSHOT_BUF_OR_LEAVE(vmx->guest_msrs[i], | |||||
sizeof(vmx->guest_msrs[i]), meta, ret, done); | |||||
vmxctx = &vmx->ctx[i]; | |||||
SNAPSHOT_VAR_OR_LEAVE(vmxctx->guest_rdi, meta, ret, done); | |||||
SNAPSHOT_VAR_OR_LEAVE(vmxctx->guest_rsi, meta, ret, done); | |||||
SNAPSHOT_VAR_OR_LEAVE(vmxctx->guest_rdx, meta, ret, done); | |||||
SNAPSHOT_VAR_OR_LEAVE(vmxctx->guest_rcx, meta, ret, done); | |||||
SNAPSHOT_VAR_OR_LEAVE(vmxctx->guest_r8, meta, ret, done); | |||||
SNAPSHOT_VAR_OR_LEAVE(vmxctx->guest_r9, meta, ret, done); | |||||
SNAPSHOT_VAR_OR_LEAVE(vmxctx->guest_rax, meta, ret, done); | |||||
SNAPSHOT_VAR_OR_LEAVE(vmxctx->guest_rbx, meta, ret, done); | |||||
SNAPSHOT_VAR_OR_LEAVE(vmxctx->guest_rbp, meta, ret, done); | |||||
SNAPSHOT_VAR_OR_LEAVE(vmxctx->guest_r10, meta, ret, done); | |||||
SNAPSHOT_VAR_OR_LEAVE(vmxctx->guest_r11, meta, ret, done); | |||||
SNAPSHOT_VAR_OR_LEAVE(vmxctx->guest_r12, meta, ret, done); | |||||
SNAPSHOT_VAR_OR_LEAVE(vmxctx->guest_r13, meta, ret, done); | |||||
SNAPSHOT_VAR_OR_LEAVE(vmxctx->guest_r14, meta, ret, done); | |||||
SNAPSHOT_VAR_OR_LEAVE(vmxctx->guest_r15, meta, ret, done); | |||||
SNAPSHOT_VAR_OR_LEAVE(vmxctx->guest_cr2, meta, ret, done); | |||||
SNAPSHOT_VAR_OR_LEAVE(vmxctx->guest_dr0, meta, ret, done); | |||||
SNAPSHOT_VAR_OR_LEAVE(vmxctx->guest_dr1, meta, ret, done); | |||||
SNAPSHOT_VAR_OR_LEAVE(vmxctx->guest_dr2, meta, ret, done); | |||||
SNAPSHOT_VAR_OR_LEAVE(vmxctx->guest_dr3, meta, ret, done); | |||||
SNAPSHOT_VAR_OR_LEAVE(vmxctx->guest_dr6, meta, ret, done); | |||||
} | |||||
done: | |||||
return (ret); | |||||
} | |||||
static int | |||||
vmx_snapshot_vmcx(void *arg, struct vm_snapshot_meta *meta, int vcpu) | |||||
{ | |||||
struct vmcs *vmcs; | |||||
struct vmx *vmx; | |||||
int err, run, hostcpu; | |||||
vmx = (struct vmx *)arg; | |||||
err = 0; | |||||
KASSERT(arg != NULL, ("%s: arg was NULL", __func__)); | |||||
vmcs = &vmx->vmcs[vcpu]; | |||||
run = vcpu_is_running(vmx->vm, vcpu, &hostcpu); | |||||
if (run && hostcpu != curcpu) { | |||||
printf("%s: %s%d is running", __func__, vm_name(vmx->vm), vcpu); | |||||
return (EINVAL); | |||||
} | |||||
err += vmcs_snapshot_reg(vmcs, run, VM_REG_GUEST_CR0, meta); | |||||
err += vmcs_snapshot_reg(vmcs, run, VM_REG_GUEST_CR3, meta); | |||||
err += vmcs_snapshot_reg(vmcs, run, VM_REG_GUEST_CR4, meta); | |||||
err += vmcs_snapshot_reg(vmcs, run, VM_REG_GUEST_DR7, meta); | |||||
err += vmcs_snapshot_reg(vmcs, run, VM_REG_GUEST_RSP, meta); | |||||
err += vmcs_snapshot_reg(vmcs, run, VM_REG_GUEST_RIP, meta); | |||||
err += vmcs_snapshot_reg(vmcs, run, VM_REG_GUEST_RFLAGS, meta); | |||||
/* Guest segments */ | |||||
err += vmcs_snapshot_reg(vmcs, run, VM_REG_GUEST_ES, meta); | |||||
err += vmcs_snapshot_desc(vmcs, run, VM_REG_GUEST_ES, meta); | |||||
pmooney_pfmooney.com: With descriptors being common across the two CPU types, why not use a general mechanism for… | |||||
Not Done Inline ActionsSimply meant to keep code for all data from vmcs bundled in one function - may be useful to move to a generic function to avoid duplication. darius.mihaim_gmail.com: Simply meant to keep code for all data from vmcs bundled in one function - may be useful to… | |||||
err += vmcs_snapshot_reg(vmcs, run, VM_REG_GUEST_CS, meta); | |||||
err += vmcs_snapshot_desc(vmcs, run, VM_REG_GUEST_CS, meta); | |||||
err += vmcs_snapshot_reg(vmcs, run, VM_REG_GUEST_SS, meta); | |||||
err += vmcs_snapshot_desc(vmcs, run, VM_REG_GUEST_SS, meta); | |||||
err += vmcs_snapshot_reg(vmcs, run, VM_REG_GUEST_DS, meta); | |||||
err += vmcs_snapshot_desc(vmcs, run, VM_REG_GUEST_DS, meta); | |||||
err += vmcs_snapshot_reg(vmcs, run, VM_REG_GUEST_FS, meta); | |||||
err += vmcs_snapshot_desc(vmcs, run, VM_REG_GUEST_FS, meta); | |||||
err += vmcs_snapshot_reg(vmcs, run, VM_REG_GUEST_GS, meta); | |||||
err += vmcs_snapshot_desc(vmcs, run, VM_REG_GUEST_GS, meta); | |||||
err += vmcs_snapshot_reg(vmcs, run, VM_REG_GUEST_TR, meta); | |||||
err += vmcs_snapshot_desc(vmcs, run, VM_REG_GUEST_TR, meta); | |||||
err += vmcs_snapshot_reg(vmcs, run, VM_REG_GUEST_LDTR, meta); | |||||
err += vmcs_snapshot_desc(vmcs, run, VM_REG_GUEST_LDTR, meta); | |||||
Not Done Inline ActionsWhy not save these MSRs in a general fasion? (Same goes for EFER too) pmooney_pfmooney.com: Why not save these MSRs in a general fasion? (Same goes for EFER too) | |||||
Not Done Inline ActionsSame comment as the one for other registers. darius.mihaim_gmail.com: Same comment as the one for other registers. | |||||
err += vmcs_snapshot_reg(vmcs, run, VM_REG_GUEST_EFER, meta); | |||||
err += vmcs_snapshot_desc(vmcs, run, VM_REG_GUEST_IDTR, meta); | |||||
err += vmcs_snapshot_desc(vmcs, run, VM_REG_GUEST_GDTR, meta); | |||||
Not Done Inline ActionsSaving/restoring control state like this with no validation seems dangerous. It's probably safest to allow the host to reconstruct that from present (and requested) features? pmooney_pfmooney.com: Saving/restoring control state like this with no validation seems dangerous. It's probably… | |||||
/* Guest page tables */ | |||||
err += vmcs_snapshot_reg(vmcs, run, VM_REG_GUEST_PDPTE0, meta); | |||||
err += vmcs_snapshot_reg(vmcs, run, VM_REG_GUEST_PDPTE1, meta); | |||||
err += vmcs_snapshot_reg(vmcs, run, VM_REG_GUEST_PDPTE2, meta); | |||||
err += vmcs_snapshot_reg(vmcs, run, VM_REG_GUEST_PDPTE3, meta); | |||||
/* Other guest state */ | |||||
err += vmcs_snapshot_any(vmcs, run, VMCS_GUEST_IA32_SYSENTER_CS, meta); | |||||
err += vmcs_snapshot_any(vmcs, run, VMCS_GUEST_IA32_SYSENTER_ESP, meta); | |||||
err += vmcs_snapshot_any(vmcs, run, VMCS_GUEST_IA32_SYSENTER_EIP, meta); | |||||
err += vmcs_snapshot_any(vmcs, run, VMCS_GUEST_INTERRUPTIBILITY, meta); | |||||
err += vmcs_snapshot_any(vmcs, run, VMCS_GUEST_ACTIVITY, meta); | |||||
err += vmcs_snapshot_any(vmcs, run, VMCS_ENTRY_CTLS, meta); | |||||
err += vmcs_snapshot_any(vmcs, run, VMCS_EXIT_CTLS, meta); | |||||
return (err); | |||||
} | |||||
static int | |||||
vmx_restore_tsc(void *arg, int vcpu, uint64_t offset) | |||||
{ | |||||
struct vmcs *vmcs; | |||||
struct vmx *vmx = (struct vmx *)arg; | |||||
int error, running, hostcpu; | |||||
KASSERT(arg != NULL, ("%s: arg was NULL", __func__)); | |||||
vmcs = &vmx->vmcs[vcpu]; | |||||
running = vcpu_is_running(vmx->vm, vcpu, &hostcpu); | |||||
if (running && hostcpu != curcpu) { | |||||
Not Done Inline ActionsIn SmartOS bhyve, we added a CPU-agnostic method for setting the tsc offset in vmm.c, which then VMX/SVM consume in order to do the CPU-specific settings. It might make sense to do something like that here. pmooney_pfmooney.com: In SmartOS bhyve, we added a CPU-agnostic method for setting the tsc offset in vmm.c, which… | |||||
Done Inline ActionsWe will look into it, but it's not a priority for now. Thanks for pointing it out. darius.mihaim_gmail.com: We will look into it, but it's not a priority for now. Thanks for pointing it out. | |||||
printf("%s: %s%d is running", __func__, vm_name(vmx->vm), vcpu); | |||||
return (EINVAL); | |||||
} | |||||
if (!running) | |||||
VMPTRLD(vmcs); | |||||
error = vmx_set_tsc_offset(vmx, vcpu, offset); | |||||
if (!running) | |||||
VMCLEAR(vmcs); | |||||
return (error); | |||||
} | |||||
#endif | |||||
struct vmm_ops vmm_ops_intel = { | struct vmm_ops vmm_ops_intel = { | ||||
.init = vmx_init, | .init = vmx_init, | ||||
.cleanup = vmx_cleanup, | .cleanup = vmx_cleanup, | ||||
.resume = vmx_restore, | .resume = vmx_restore, | ||||
.vminit = vmx_vminit, | .vminit = vmx_vminit, | ||||
.vmrun = vmx_run, | .vmrun = vmx_run, | ||||
.vmcleanup = vmx_vmcleanup, | .vmcleanup = vmx_vmcleanup, | ||||
.vmgetreg = vmx_getreg, | .vmgetreg = vmx_getreg, | ||||
.vmsetreg = vmx_setreg, | .vmsetreg = vmx_setreg, | ||||
.vmgetdesc = vmx_getdesc, | .vmgetdesc = vmx_getdesc, | ||||
.vmsetdesc = vmx_setdesc, | .vmsetdesc = vmx_setdesc, | ||||
.vmgetcap = vmx_getcap, | .vmgetcap = vmx_getcap, | ||||
.vmsetcap = vmx_setcap, | .vmsetcap = vmx_setcap, | ||||
.vmspace_alloc = ept_vmspace_alloc, | .vmspace_alloc = ept_vmspace_alloc, | ||||
.vmspace_free = ept_vmspace_free, | .vmspace_free = ept_vmspace_free, | ||||
.vlapic_init = vmx_vlapic_init, | .vlapic_init = vmx_vlapic_init, | ||||
.vlapic_cleanup = vmx_vlapic_cleanup, | .vlapic_cleanup = vmx_vlapic_cleanup, | ||||
#ifdef BHYVE_SNAPSHOT | |||||
.vmsnapshot = vmx_snapshot_vmi, | |||||
.vmcx_snapshot = vmx_snapshot_vmcx, | |||||
.vm_restore_tsc = vmx_restore_tsc, | |||||
#endif | |||||
}; | }; |
With descriptors being common across the two CPU types, why not use a general mechanism for saving them?