Changeset View
Changeset View
Standalone View
Standalone View
sys/amd64/vmm/intel/vmx.c
Show First 20 Lines • Show All 161 Lines • ▼ Show 20 Lines | |||||
static int cap_halt_exit; | static int cap_halt_exit; | ||||
SYSCTL_INT(_hw_vmm_vmx_cap, OID_AUTO, halt_exit, CTLFLAG_RD, &cap_halt_exit, 0, | SYSCTL_INT(_hw_vmm_vmx_cap, OID_AUTO, halt_exit, CTLFLAG_RD, &cap_halt_exit, 0, | ||||
"HLT triggers a VM-exit"); | "HLT triggers a VM-exit"); | ||||
static int cap_pause_exit; | static int cap_pause_exit; | ||||
SYSCTL_INT(_hw_vmm_vmx_cap, OID_AUTO, pause_exit, CTLFLAG_RD, &cap_pause_exit, | SYSCTL_INT(_hw_vmm_vmx_cap, OID_AUTO, pause_exit, CTLFLAG_RD, &cap_pause_exit, | ||||
0, "PAUSE triggers a VM-exit"); | 0, "PAUSE triggers a VM-exit"); | ||||
static int cap_rdpid; | |||||
SYSCTL_INT(_hw_vmm_vmx_cap, OID_AUTO, rdpid, CTLFLAG_RD, &cap_rdpid, 0, | |||||
"Guests are allowed to use RDPID"); | |||||
static int cap_rdtscp; | |||||
SYSCTL_INT(_hw_vmm_vmx_cap, OID_AUTO, rdtscp, CTLFLAG_RD, &cap_rdtscp, 0, | |||||
"Guests are allowed to use RDTSCP"); | |||||
static int cap_unrestricted_guest; | static int cap_unrestricted_guest; | ||||
SYSCTL_INT(_hw_vmm_vmx_cap, OID_AUTO, unrestricted_guest, CTLFLAG_RD, | SYSCTL_INT(_hw_vmm_vmx_cap, OID_AUTO, unrestricted_guest, CTLFLAG_RD, | ||||
&cap_unrestricted_guest, 0, "Unrestricted guests"); | &cap_unrestricted_guest, 0, "Unrestricted guests"); | ||||
static int cap_monitor_trap; | static int cap_monitor_trap; | ||||
SYSCTL_INT(_hw_vmm_vmx_cap, OID_AUTO, monitor_trap, CTLFLAG_RD, | SYSCTL_INT(_hw_vmm_vmx_cap, OID_AUTO, monitor_trap, CTLFLAG_RD, | ||||
&cap_monitor_trap, 0, "Monitor trap flag"); | &cap_monitor_trap, 0, "Monitor trap flag"); | ||||
▲ Show 20 Lines • Show All 120 Lines • ▼ Show 20 Lines | |||||
static int vmx_getdesc(void *arg, int vcpu, int reg, struct seg_desc *desc); | static int vmx_getdesc(void *arg, int vcpu, int reg, struct seg_desc *desc); | ||||
static int vmx_getreg(void *arg, int vcpu, int reg, uint64_t *retval); | static int vmx_getreg(void *arg, int vcpu, int reg, uint64_t *retval); | ||||
static int vmxctx_setreg(struct vmxctx *vmxctx, int reg, uint64_t val); | static int vmxctx_setreg(struct vmxctx *vmxctx, int reg, uint64_t val); | ||||
static void vmx_inject_pir(struct vlapic *vlapic); | static void vmx_inject_pir(struct vlapic *vlapic); | ||||
#ifdef BHYVE_SNAPSHOT | #ifdef BHYVE_SNAPSHOT | ||||
static int vmx_restore_tsc(void *arg, int vcpu, uint64_t now); | static int vmx_restore_tsc(void *arg, int vcpu, uint64_t now); | ||||
#endif | #endif | ||||
static inline bool | |||||
host_has_rdpid(void) | |||||
{ | |||||
return ((cpu_stdext_feature2 & CPUID_STDEXT2_RDPID) != 0); | |||||
} | |||||
static inline bool | |||||
host_has_rdtscp(void) | |||||
{ | |||||
return ((amd_feature & AMDID_RDTSCP) != 0); | |||||
} | |||||
#ifdef KTR | #ifdef KTR | ||||
static const char * | static const char * | ||||
exit_reason_to_str(int reason) | exit_reason_to_str(int reason) | ||||
{ | { | ||||
static char reasonbuf[32]; | static char reasonbuf[32]; | ||||
switch (reason) { | switch (reason) { | ||||
case EXIT_REASON_EXCEPTION: | case EXIT_REASON_EXCEPTION: | ||||
▲ Show 20 Lines • Show All 436 Lines • ▼ Show 20 Lines | cap_monitor_trap = (vmx_set_ctlreg(MSR_VMX_PROCBASED_CTLS, | ||||
PROCBASED_MTF, 0, | PROCBASED_MTF, 0, | ||||
&tmp) == 0); | &tmp) == 0); | ||||
cap_pause_exit = (vmx_set_ctlreg(MSR_VMX_PROCBASED_CTLS, | cap_pause_exit = (vmx_set_ctlreg(MSR_VMX_PROCBASED_CTLS, | ||||
MSR_VMX_TRUE_PROCBASED_CTLS, | MSR_VMX_TRUE_PROCBASED_CTLS, | ||||
PROCBASED_PAUSE_EXITING, 0, | PROCBASED_PAUSE_EXITING, 0, | ||||
&tmp) == 0); | &tmp) == 0); | ||||
/* | |||||
* Check support for RDPID and/or RDTSCP. | |||||
* | |||||
* Support a pass-through-based implementation of these via the | |||||
* "enable RDTSCP" VM-execution control and the "RDTSC exiting" | |||||
* VM-execution control. | |||||
* | |||||
* The "enable RDTSCP" VM-execution control applies to both RDPID | |||||
* and RDTSCP (see SDM volume 3, section 25.3, "Changes to | |||||
* Instruction Behavior in VMX Non-root operation"); this is why | |||||
* only this VM-execution control needs to be enabled in order to | |||||
* enable passing through whichever of RDPID and/or RDTSCP are | |||||
* supported by the host. | |||||
* | |||||
* The "RDTSC exiting" VM-execution control applies to both RDTSC | |||||
* and RDTSCP (again, per SDM volume 3, section 25.3), and is | |||||
* already set up for RDTSC and RDTSCP pass-through by the current | |||||
* implementation of RDTSC. | |||||
* | |||||
* Although RDPID and RDTSCP are optional capabilities, since there | |||||
* does not currently seem to be a use case for enabling/disabling | |||||
* these via libvmmapi, choose not to support this and, instead, | |||||
* just statically always enable or always disable this support | |||||
* across all vCPUs on all VMs. (Note that there may be some | |||||
* complications to providing this functionality, e.g., the MSR | |||||
* bitmap is currently per-VM rather than per-vCPU while the | |||||
* capability API wants to be able to control capabilities on a | |||||
* per-vCPU basis). | |||||
*/ | |||||
error = vmx_set_ctlreg(MSR_VMX_PROCBASED_CTLS2, | |||||
MSR_VMX_PROCBASED_CTLS2, | |||||
PROCBASED2_ENABLE_RDTSCP, 0, &tmp); | |||||
cap_rdpid = error == 0 && host_has_rdpid(); | |||||
cap_rdtscp = error == 0 && host_has_rdtscp(); | |||||
if (cap_rdpid || cap_rdtscp) | |||||
procbased_ctls2 |= PROCBASED2_ENABLE_RDTSCP; | |||||
cap_unrestricted_guest = (vmx_set_ctlreg(MSR_VMX_PROCBASED_CTLS2, | cap_unrestricted_guest = (vmx_set_ctlreg(MSR_VMX_PROCBASED_CTLS2, | ||||
MSR_VMX_PROCBASED_CTLS2, | MSR_VMX_PROCBASED_CTLS2, | ||||
PROCBASED2_UNRESTRICTED_GUEST, 0, | PROCBASED2_UNRESTRICTED_GUEST, 0, | ||||
&tmp) == 0); | &tmp) == 0); | ||||
cap_invpcid = (vmx_set_ctlreg(MSR_VMX_PROCBASED_CTLS2, | cap_invpcid = (vmx_set_ctlreg(MSR_VMX_PROCBASED_CTLS2, | ||||
MSR_VMX_PROCBASED_CTLS2, PROCBASED2_ENABLE_INVPCID, 0, | MSR_VMX_PROCBASED_CTLS2, PROCBASED2_ENABLE_INVPCID, 0, | ||||
&tmp) == 0); | &tmp) == 0); | ||||
▲ Show 20 Lines • Show All 236 Lines • ▼ Show 20 Lines | vmx_vminit(struct vm *vm, pmap_t pmap) | ||||
* VM exit and entry respectively. It is also restored from the | * VM exit and entry respectively. It is also restored from the | ||||
* host VMCS area on a VM exit. | * host VMCS area on a VM exit. | ||||
* | * | ||||
* The TSC MSR is exposed read-only. Writes are disallowed as | * The TSC MSR is exposed read-only. Writes are disallowed as | ||||
* that will impact the host TSC. If the guest does a write | * that will impact the host TSC. If the guest does a write | ||||
* the "use TSC offsetting" execution control is enabled and the | * the "use TSC offsetting" execution control is enabled and the | ||||
* difference between the host TSC and the guest TSC is written | * difference between the host TSC and the guest TSC is written | ||||
* into the TSC offset in the VMCS. | * into the TSC offset in the VMCS. | ||||
* | |||||
* Guest TSC_AUX support is enabled if any of guest RDPID and/or | |||||
* guest RDTSCP support are enabled (since, as per Table 2-2 in SDM | |||||
* volume 4, TSC_AUX is supported if any of RDPID and/or RDTSCP are | |||||
* supported). If guest TSC_AUX support is enabled, TSC_AUX is | |||||
* exposed read-only so that the VMM can do one fewer MSR read per | |||||
* exit than if this register were exposed read-write; the guest | |||||
* restore value can be updated during guest writes (expected to be | |||||
* rare) instead of during all exits (common). | |||||
*/ | */ | ||||
if (guest_msr_rw(vmx, MSR_GSBASE) || | if (guest_msr_rw(vmx, MSR_GSBASE) || | ||||
guest_msr_rw(vmx, MSR_FSBASE) || | guest_msr_rw(vmx, MSR_FSBASE) || | ||||
guest_msr_rw(vmx, MSR_SYSENTER_CS_MSR) || | guest_msr_rw(vmx, MSR_SYSENTER_CS_MSR) || | ||||
guest_msr_rw(vmx, MSR_SYSENTER_ESP_MSR) || | guest_msr_rw(vmx, MSR_SYSENTER_ESP_MSR) || | ||||
guest_msr_rw(vmx, MSR_SYSENTER_EIP_MSR) || | guest_msr_rw(vmx, MSR_SYSENTER_EIP_MSR) || | ||||
guest_msr_rw(vmx, MSR_EFER) || | guest_msr_rw(vmx, MSR_EFER) || | ||||
guest_msr_ro(vmx, MSR_TSC)) | guest_msr_ro(vmx, MSR_TSC) || | ||||
((cap_rdpid || cap_rdtscp) && guest_msr_ro(vmx, MSR_TSC_AUX))) | |||||
panic("vmx_vminit: error setting guest msr access"); | panic("vmx_vminit: error setting guest msr access"); | ||||
vpid_alloc(vpid, VM_MAXCPU); | vpid_alloc(vpid, VM_MAXCPU); | ||||
if (virtual_interrupt_delivery) { | if (virtual_interrupt_delivery) { | ||||
error = vm_map_mmio(vm, DEFAULT_APIC_BASE, PAGE_SIZE, | error = vm_map_mmio(vm, DEFAULT_APIC_BASE, PAGE_SIZE, | ||||
APIC_ACCESS_ADDRESS); | APIC_ACCESS_ADDRESS); | ||||
/* XXX this should really return an error to the caller */ | /* XXX this should really return an error to the caller */ | ||||
▲ Show 20 Lines • Show All 62 Lines • ▼ Show 20 Lines | if (posted_interrupts) { | ||||
error += vmwrite(VMCS_PIR_VECTOR, pirvec); | error += vmwrite(VMCS_PIR_VECTOR, pirvec); | ||||
error += vmwrite(VMCS_PIR_DESC, | error += vmwrite(VMCS_PIR_DESC, | ||||
vtophys(&vmx->pir_desc[i])); | vtophys(&vmx->pir_desc[i])); | ||||
} | } | ||||
VMCLEAR(vmcs); | VMCLEAR(vmcs); | ||||
KASSERT(error == 0, ("vmx_vminit: error customizing the vmcs")); | KASSERT(error == 0, ("vmx_vminit: error customizing the vmcs")); | ||||
vmx->cap[i].set = 0; | vmx->cap[i].set = 0; | ||||
vmx->cap[i].set |= cap_rdpid != 0 ? 1 << VM_CAP_RDPID : 0; | |||||
vmx->cap[i].set |= cap_rdtscp != 0 ? 1 << VM_CAP_RDTSCP : 0; | |||||
vmx->cap[i].proc_ctls = procbased_ctls; | vmx->cap[i].proc_ctls = procbased_ctls; | ||||
vmx->cap[i].proc_ctls2 = procbased_ctls2; | vmx->cap[i].proc_ctls2 = procbased_ctls2; | ||||
vmx->cap[i].exc_bitmap = exc_bitmap; | vmx->cap[i].exc_bitmap = exc_bitmap; | ||||
vmx->state[i].nextrip = ~0; | vmx->state[i].nextrip = ~0; | ||||
vmx->state[i].lastcpu = NOCPU; | vmx->state[i].lastcpu = NOCPU; | ||||
vmx->state[i].vpid = vpid[i]; | vmx->state[i].vpid = vpid[i]; | ||||
▲ Show 20 Lines • Show All 1,895 Lines • ▼ Show 20 Lines | do { | ||||
* LDT, but save and restore it to be safe. | * LDT, but save and restore it to be safe. | ||||
*/ | */ | ||||
sgdt(&gdtr); | sgdt(&gdtr); | ||||
sidt(&idtr); | sidt(&idtr); | ||||
ldt_sel = sldt(); | ldt_sel = sldt(); | ||||
vmx_run_trace(vmx, vcpu); | vmx_run_trace(vmx, vcpu); | ||||
vmx_dr_enter_guest(vmxctx); | vmx_dr_enter_guest(vmxctx); | ||||
/* | |||||
* The TSC_AUX MSR must be saved/restored while interrupts | |||||
* are disabled so that it is not possible for the guest | |||||
* TSC_AUX MSR value to be overwritten by the resume | |||||
* portion of the IPI_SUSPEND codepath. This is why the | |||||
* transition of this MSR is handled separately from those | |||||
* handled by vmx_msr_guest_{enter,exit}(), which are ok to | |||||
* be transitioned with preemption disabled but interrupts | |||||
* enabled. | |||||
* | |||||
* These vmx_msr_guest_{enter,exit}_tsc_aux() calls can be | |||||
* anywhere in this loop so long as they happen with | |||||
* interrupts disabled. This location is chosen for | |||||
* simplicity. | |||||
*/ | |||||
vmx_msr_guest_enter_tsc_aux(vmx, vcpu); | |||||
grehan: I'm still undecided as to whether the debug register save/restore should be closer to the… | |||||
Done Inline Actions
Hmmm, I think the only argument for doing it before debug is that it means in theory you could use DDB to single step through vmx_guest_enter_tsc_aux(). In practice I'm not sure it really matters all that much? I think though I might lean towards doing the tsc_aux save/restore "outside" of the debug registers. I would perhaps even do the save before vmx_run_trace(). jhb: > I'm still undecided as to whether the debug register save/restore should be closer to the… | |||||
rc = vmx_enter_guest(vmxctx, vmx, launched); | rc = vmx_enter_guest(vmxctx, vmx, launched); | ||||
vmx_msr_guest_exit_tsc_aux(vmx, vcpu); | |||||
vmx_dr_leave_guest(vmxctx); | vmx_dr_leave_guest(vmxctx); | ||||
bare_lgdt(&gdtr); | bare_lgdt(&gdtr); | ||||
lidt(&idtr); | lidt(&idtr); | ||||
lldt(ldt_sel); | lldt(ldt_sel); | ||||
/* Collect some information for VM exit processing */ | /* Collect some information for VM exit processing */ | ||||
vmexit->rip = rip = vmcs_guest_rip(); | vmexit->rip = rip = vmcs_guest_rip(); | ||||
▲ Show 20 Lines • Show All 323 Lines • ▼ Show 20 Lines | vmx_getcap(void *arg, int vcpu, int type, int *retval) | ||||
case VM_CAP_PAUSE_EXIT: | case VM_CAP_PAUSE_EXIT: | ||||
if (cap_pause_exit) | if (cap_pause_exit) | ||||
ret = 0; | ret = 0; | ||||
break; | break; | ||||
case VM_CAP_MTRAP_EXIT: | case VM_CAP_MTRAP_EXIT: | ||||
if (cap_monitor_trap) | if (cap_monitor_trap) | ||||
ret = 0; | ret = 0; | ||||
break; | break; | ||||
case VM_CAP_RDPID: | |||||
if (cap_rdpid) | |||||
ret = 0; | |||||
break; | |||||
case VM_CAP_RDTSCP: | |||||
if (cap_rdtscp) | |||||
ret = 0; | |||||
break; | |||||
case VM_CAP_UNRESTRICTED_GUEST: | case VM_CAP_UNRESTRICTED_GUEST: | ||||
if (cap_unrestricted_guest) | if (cap_unrestricted_guest) | ||||
ret = 0; | ret = 0; | ||||
break; | break; | ||||
case VM_CAP_ENABLE_INVPCID: | case VM_CAP_ENABLE_INVPCID: | ||||
if (cap_invpcid) | if (cap_invpcid) | ||||
ret = 0; | ret = 0; | ||||
break; | break; | ||||
▲ Show 20 Lines • Show All 47 Lines • ▼ Show 20 Lines | vmx_setcap(void *arg, int vcpu, int type, int val) | ||||
case VM_CAP_PAUSE_EXIT: | case VM_CAP_PAUSE_EXIT: | ||||
if (cap_pause_exit) { | if (cap_pause_exit) { | ||||
retval = 0; | retval = 0; | ||||
pptr = &vmx->cap[vcpu].proc_ctls; | pptr = &vmx->cap[vcpu].proc_ctls; | ||||
baseval = *pptr; | baseval = *pptr; | ||||
flag = PROCBASED_PAUSE_EXITING; | flag = PROCBASED_PAUSE_EXITING; | ||||
reg = VMCS_PRI_PROC_BASED_CTLS; | reg = VMCS_PRI_PROC_BASED_CTLS; | ||||
} | } | ||||
break; | |||||
case VM_CAP_RDPID: | |||||
case VM_CAP_RDTSCP: | |||||
if (cap_rdpid || cap_rdtscp) | |||||
/* | |||||
* Choose not to support enabling/disabling | |||||
* RDPID/RDTSCP via libvmmapi since, as per the | |||||
* discussion in vmx_init(), RDPID/RDTSCP are | |||||
* either always enabled or always disabled. | |||||
*/ | |||||
error = EOPNOTSUPP; | |||||
break; | break; | ||||
case VM_CAP_UNRESTRICTED_GUEST: | case VM_CAP_UNRESTRICTED_GUEST: | ||||
if (cap_unrestricted_guest) { | if (cap_unrestricted_guest) { | ||||
retval = 0; | retval = 0; | ||||
pptr = &vmx->cap[vcpu].proc_ctls2; | pptr = &vmx->cap[vcpu].proc_ctls2; | ||||
baseval = *pptr; | baseval = *pptr; | ||||
flag = PROCBASED2_UNRESTRICTED_GUEST; | flag = PROCBASED2_UNRESTRICTED_GUEST; | ||||
reg = VMCS_SEC_PROC_BASED_CTLS; | reg = VMCS_SEC_PROC_BASED_CTLS; | ||||
▲ Show 20 Lines • Show All 641 Lines • Show Last 20 Lines |
I'm still undecided as to whether the debug register save/restore should be closer to the enter_guest call than the tsx_aux save/restore. jhb, any thoughts ?