Page Menu
Home
FreeBSD
Search
Configure Global Search
Log In
Files
F143187956
D26003.id75592.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
11 KB
Referenced Files
None
Subscribers
None
D26003.id75592.diff
View Options
Index: sys/amd64/include/vmm.h
===================================================================
--- sys/amd64/include/vmm.h
+++ sys/amd64/include/vmm.h
@@ -481,6 +481,8 @@
VM_CAP_UNRESTRICTED_GUEST,
VM_CAP_ENABLE_INVPCID,
VM_CAP_BPT_EXIT,
+ VM_CAP_RDPID,
+ VM_CAP_RDTSCP,
VM_CAP_MAX
};
Index: sys/amd64/vmm/intel/vmx.h
===================================================================
--- sys/amd64/vmm/intel/vmx.h
+++ sys/amd64/vmm/intel/vmx.h
@@ -117,6 +117,7 @@
IDX_MSR_SF_MASK,
IDX_MSR_KGSBASE,
IDX_MSR_PAT,
+ IDX_MSR_TSC_AUX,
GUEST_MSR_NUM /* must be the last enumeration */
};
@@ -153,4 +154,18 @@
extern char vmx_exit_guest[];
extern char vmx_exit_guest_flush_rsb[];
+static inline bool
+vmx_have_msr_tsc_aux(struct vmx *vmx)
+{
+ int rdpid_rdtscp_bits = ((1 << VM_CAP_RDPID) | (1 << VM_CAP_RDTSCP));
+
+ /*
+ * Since the values of these bits are uniform across all vCPUs
+ * (see discussion in vmx_init() and initialization of these bits
+ * in vmx_vminit()), just always use vCPU-zero's capability set and
+ * remove the need to require a vcpuid argument.
+ */
+ return ((vmx->cap[0].set & rdpid_rdtscp_bits) != 0);
+}
+
#endif
Index: sys/amd64/vmm/intel/vmx.c
===================================================================
--- sys/amd64/vmm/intel/vmx.c
+++ sys/amd64/vmm/intel/vmx.c
@@ -167,6 +167,14 @@
SYSCTL_INT(_hw_vmm_vmx_cap, OID_AUTO, pause_exit, CTLFLAG_RD, &cap_pause_exit,
0, "PAUSE triggers a VM-exit");
+static int cap_rdpid;
+SYSCTL_INT(_hw_vmm_vmx_cap, OID_AUTO, rdpid, CTLFLAG_RD, &cap_rdpid, 0,
+ "Guests are allowed to use RDPID");
+
+static int cap_rdtscp;
+SYSCTL_INT(_hw_vmm_vmx_cap, OID_AUTO, rdtscp, CTLFLAG_RD, &cap_rdtscp, 0,
+ "Guests are allowed to use RDTSCP");
+
static int cap_unrestricted_guest;
SYSCTL_INT(_hw_vmm_vmx_cap, OID_AUTO, unrestricted_guest, CTLFLAG_RD,
&cap_unrestricted_guest, 0, "Unrestricted guests");
@@ -303,6 +311,18 @@
static int vmx_restore_tsc(void *arg, int vcpu, uint64_t now);
#endif
+static inline bool
+host_has_rdpid(void)
+{
+ return ((cpu_stdext_feature2 & CPUID_STDEXT2_RDPID) != 0);
+}
+
+static inline bool
+host_has_rdtscp(void)
+{
+ return ((amd_feature & AMDID_RDTSCP) != 0);
+}
+
#ifdef KTR
static const char *
exit_reason_to_str(int reason)
@@ -755,6 +775,43 @@
PROCBASED_PAUSE_EXITING, 0,
&tmp) == 0);
+ /*
+ * Check support for RDPID and/or RDTSCP.
+ *
+ * Support a pass-through-based implementation of these via the
+ * "enable RDTSCP" VM-execution control and the "RDTSC exiting"
+ * VM-execution control.
+ *
+ * The "enable RDTSCP" VM-execution control applies to both RDPID
+ * and RDTSCP (see SDM volume 3, section 25.3, "Changes to
+ * Instruction Behavior in VMX Non-root operation"); this is why
+ * only this VM-execution control needs to be enabled in order to
+ * enable passing through whichever of RDPID and/or RDTSCP are
+ * supported by the host.
+ *
+ * The "RDTSC exiting" VM-execution control applies to both RDTSC
+ * and RDTSCP (again, per SDM volume 3, section 25.3), and is
+ * already set up for RDTSC and RDTSCP pass-through by the current
+ * implementation of RDTSC.
+ *
+ * Although RDPID and RDTSCP are optional capabilities, since there
+ * does not currently seem to be a use case for enabling/disabling
+ * these via libvmmapi, choose not to support this and, instead,
+ * just statically always enable or always disable this support
+ * across all vCPUs on all VMs. (Note that there may be some
+ * complications to providing this functionality, e.g., the MSR
+ * bitmap is currently per-VM rather than per-vCPU while the
+ * capability API wants to be able to control capabilities on a
+ * per-vCPU basis).
+ */
+ error = vmx_set_ctlreg(MSR_VMX_PROCBASED_CTLS2,
+ MSR_VMX_PROCBASED_CTLS2,
+ PROCBASED2_ENABLE_RDTSCP, 0, &tmp);
+ cap_rdpid = error == 0 && host_has_rdpid();
+ cap_rdtscp = error == 0 && host_has_rdtscp();
+ if (cap_rdpid || cap_rdtscp)
+ procbased_ctls2 |= PROCBASED2_ENABLE_RDTSCP;
+
cap_unrestricted_guest = (vmx_set_ctlreg(MSR_VMX_PROCBASED_CTLS2,
MSR_VMX_PROCBASED_CTLS2,
PROCBASED2_UNRESTRICTED_GUEST, 0,
@@ -1007,6 +1064,15 @@
* the "use TSC offsetting" execution control is enabled and the
* difference between the host TSC and the guest TSC is written
* into the TSC offset in the VMCS.
+ *
+ * Guest TSC_AUX support is enabled if any of guest RDPID and/or
+ * guest RDTSCP support are enabled (since, as per Table 2-2 in SDM
+ * volume 4, TSC_AUX is supported if any of RDPID and/or RDTSCP are
+ * supported). If guest TSC_AUX support is enabled, TSC_AUX is
+ * exposed read-only so that the VMM can do one fewer MSR read per
+ * exit than if this register were exposed read-write; the guest
+ * restore value can be updated during guest writes (expected to be
+ * rare) instead of during all exits (common).
*/
if (guest_msr_rw(vmx, MSR_GSBASE) ||
guest_msr_rw(vmx, MSR_FSBASE) ||
@@ -1014,7 +1080,8 @@
guest_msr_rw(vmx, MSR_SYSENTER_ESP_MSR) ||
guest_msr_rw(vmx, MSR_SYSENTER_EIP_MSR) ||
guest_msr_rw(vmx, MSR_EFER) ||
- guest_msr_ro(vmx, MSR_TSC))
+ guest_msr_ro(vmx, MSR_TSC) ||
+ ((cap_rdpid || cap_rdtscp) && guest_msr_ro(vmx, MSR_TSC_AUX)))
panic("vmx_vminit: error setting guest msr access");
vpid_alloc(vpid, VM_MAXCPU);
@@ -1093,6 +1160,8 @@
KASSERT(error == 0, ("vmx_vminit: error customizing the vmcs"));
vmx->cap[i].set = 0;
+ vmx->cap[i].set |= cap_rdpid != 0 ? 1 << VM_CAP_RDPID : 0;
+ vmx->cap[i].set |= cap_rdtscp != 0 ? 1 << VM_CAP_RDTSCP : 0;
vmx->cap[i].proc_ctls = procbased_ctls;
vmx->cap[i].proc_ctls2 = procbased_ctls2;
vmx->cap[i].exc_bitmap = exc_bitmap;
@@ -3004,7 +3073,24 @@
vmx_run_trace(vmx, vcpu);
vmx_dr_enter_guest(vmxctx);
+ /*
+ * The TSC_AUX MSR must be saved/restored while interrupts
+ * are disabled so that it is not possible for the guest
+ * TSC_AUX MSR value to be overwritten by the resume
+ * portion of the IPI_SUSPEND codepath. This is why the
+ * transition of this MSR is handled separately from those
+ * handled by vmx_msr_guest_{enter,exit}(), which are ok to
+ * be transitioned with preemption disabled but interrupts
+ * enabled.
+ *
+ * These vmx_msr_guest_{enter,exit}_tsc_aux() calls can be
+ * anywhere in this loop so long as they happen with
+ * interrupts disabled. This location is chosen for
+ * simplicity.
+ */
+ vmx_msr_guest_enter_tsc_aux(vmx, vcpu);
rc = vmx_enter_guest(vmxctx, vmx, launched);
+ vmx_msr_guest_exit_tsc_aux(vmx, vcpu);
vmx_dr_leave_guest(vmxctx);
bare_lgdt(&gdtr);
@@ -3344,6 +3430,14 @@
if (cap_monitor_trap)
ret = 0;
break;
+ case VM_CAP_RDPID:
+ if (cap_rdpid)
+ ret = 0;
+ break;
+ case VM_CAP_RDTSCP:
+ if (cap_rdtscp)
+ ret = 0;
+ break;
case VM_CAP_UNRESTRICTED_GUEST:
if (cap_unrestricted_guest)
ret = 0;
@@ -3408,6 +3502,17 @@
reg = VMCS_PRI_PROC_BASED_CTLS;
}
break;
+ case VM_CAP_RDPID:
+ case VM_CAP_RDTSCP:
+ if (cap_rdpid || cap_rdtscp)
+ /*
+ * Choose not to support enabling/disabling
+ * RDPID/RDTSCP via libvmmapi since, as per the
+ * discussion in vmx_init(), RDPID/RDTSCP are
+ * either always enabled or always disabled.
+ */
+ error = EOPNOTSUPP;
+ break;
case VM_CAP_UNRESTRICTED_GUEST:
if (cap_unrestricted_guest) {
retval = 0;
Index: sys/amd64/vmm/intel/vmx_msr.h
===================================================================
--- sys/amd64/vmm/intel/vmx_msr.h
+++ sys/amd64/vmm/intel/vmx_msr.h
@@ -35,8 +35,10 @@
void vmx_msr_init(void);
void vmx_msr_guest_init(struct vmx *vmx, int vcpuid);
+void vmx_msr_guest_enter_tsc_aux(struct vmx *vmx, int vcpuid);
void vmx_msr_guest_enter(struct vmx *vmx, int vcpuid);
void vmx_msr_guest_exit(struct vmx *vmx, int vcpuid);
+void vmx_msr_guest_exit_tsc_aux(struct vmx *vmx, int vcpuid);
int vmx_rdmsr(struct vmx *, int vcpuid, u_int num, uint64_t *val, bool *retu);
int vmx_wrmsr(struct vmx *, int vcpuid, u_int num, uint64_t val, bool *retu);
Index: sys/amd64/vmm/intel/vmx_msr.c
===================================================================
--- sys/amd64/vmm/intel/vmx_msr.c
+++ sys/amd64/vmm/intel/vmx_msr.c
@@ -44,6 +44,7 @@
#include "vmx.h"
#include "vmx_msr.h"
+#include "x86.h"
static bool
vmx_ctl_allows_one_setting(uint64_t msr_val, int bitpos)
@@ -360,6 +361,16 @@
wrmsr(MSR_KGSBASE, guest_msrs[IDX_MSR_KGSBASE]);
}
+void
+vmx_msr_guest_enter_tsc_aux(struct vmx *vmx, int vcpuid)
+{
+ uint64_t guest_tsc_aux = vmx->guest_msrs[vcpuid][IDX_MSR_TSC_AUX];
+ uint32_t cpuid = PCPU_GET(cpuid);
+
+ if (vmx_have_msr_tsc_aux(vmx) && (guest_tsc_aux != cpuid))
+ wrmsr(MSR_TSC_AUX, guest_tsc_aux);
+}
+
void
vmx_msr_guest_exit(struct vmx *vmx, int vcpuid)
{
@@ -381,6 +392,23 @@
/* MSR_KGSBASE will be restored on the way back to userspace */
}
+void
+vmx_msr_guest_exit_tsc_aux(struct vmx *vmx, int vcpuid)
+{
+ uint64_t guest_tsc_aux = vmx->guest_msrs[vcpuid][IDX_MSR_TSC_AUX];
+ uint32_t cpuid = PCPU_GET(cpuid);
+
+ if (vmx_have_msr_tsc_aux(vmx) && (guest_tsc_aux != cpuid))
+ /*
+ * Note that it is not necessary to save the guest value
+ * here; vmx->guest_msrs[vcpuid][IDX_MSR_TSC_AUX] always
+ * contains the current value since it is updated whenever
+ * the guest writes to it (which is expected to be very
+ * rare).
+ */
+ wrmsr(MSR_TSC_AUX, cpuid);
+}
+
int
vmx_rdmsr(struct vmx *vmx, int vcpuid, u_int num, uint64_t *val, bool *retu)
{
@@ -473,6 +501,17 @@
case MSR_TSC:
error = vmx_set_tsc_offset(vmx, vcpuid, val - rdtsc());
break;
+ case MSR_TSC_AUX:
+ if (vmx_have_msr_tsc_aux(vmx))
+ /*
+ * vmx_msr_guest_enter_tsc_aux() will apply this
+ * value when it is called immediately before guest
+ * entry.
+ */
+ guest_msrs[IDX_MSR_TSC_AUX] = val;
+ else
+ vm_inject_gp(vmx->vm, vcpuid);
+ break;
default:
error = EINVAL;
break;
Index: sys/amd64/vmm/x86.c
===================================================================
--- sys/amd64/vmm/x86.c
+++ sys/amd64/vmm/x86.c
@@ -92,7 +92,8 @@
{
const struct xsave_limits *limits;
uint64_t cr4;
- int error, enable_invpcid, level, width, x2apic_id;
+ int error, enable_invpcid, enable_rdpid, enable_rdtscp, level,
+ width, x2apic_id;
unsigned int func, regs[4], logical_cpus;
enum x2apic_state x2apic_state;
uint16_t cores, maxcpus, sockets, threads;
@@ -195,11 +196,13 @@
/* Hide mwaitx/monitorx capability from the guest */
regs[2] &= ~AMDID2_MWAITX;
- /*
- * Hide rdtscp/ia32_tsc_aux until we know how
- * to deal with them.
- */
- regs[3] &= ~AMDID_RDTSCP;
+ /* Advertise RDTSCP if it is enabled. */
+ error = vm_get_capability(vm, vcpu_id,
+ VM_CAP_RDTSCP, &enable_rdtscp);
+ if (error == 0 && enable_rdtscp)
+ regs[3] |= AMDID_RDTSCP;
+ else
+ regs[3] &= ~AMDID_RDTSCP;
break;
case CPUID_8000_0007:
@@ -443,6 +446,12 @@
regs[2] = 0;
regs[3] &= CPUID_STDEXT3_MD_CLEAR;
+ /* Advertise RDPID if it is enabled. */
+ error = vm_get_capability(vm, vcpu_id,
+ VM_CAP_RDPID, &enable_rdpid);
+ if (error == 0 && enable_rdpid)
+ regs[2] |= CPUID_STDEXT2_RDPID;
+
/* Advertise INVPCID if it is enabled. */
error = vm_get_capability(vm, vcpu_id,
VM_CAP_ENABLE_INVPCID, &enable_invpcid);
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Wed, Jan 28, 3:57 AM (10 h, 25 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
28063041
Default Alt Text
D26003.id75592.diff (11 KB)
Attached To
Mode
D26003: vmm: intel: Support rdtscp and rdpid
Attached
Detach File
Event Timeline
Log In to Comment