Page Menu
Home
FreeBSD
Search
Configure Global Search
Log In
Files
F151872834
D54829.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
10 KB
Referenced Files
None
Subscribers
None
D54829.diff
View Options
diff --git a/sys/amd64/vmm/io/vlapic.h b/sys/amd64/vmm/io/vlapic.h
--- a/sys/amd64/vmm/io/vlapic.h
+++ b/sys/amd64/vmm/io/vlapic.h
@@ -114,4 +114,6 @@
int vm_handle_ipi(struct vcpu *vcpu, struct vm_exit *vme, bool *retu);
+bool vlapic_hw_disabled(const struct vlapic *);
+
#endif /* _VLAPIC_H_ */
diff --git a/sys/amd64/vmm/io/vlapic.c b/sys/amd64/vmm/io/vlapic.c
--- a/sys/amd64/vmm/io/vlapic.c
+++ b/sys/amd64/vmm/io/vlapic.c
@@ -87,7 +87,7 @@
return ((vlapic->msr_apicbase & APICBASE_X2APIC) != 0);
}
-static __inline bool
+bool
vlapic_hw_disabled(const struct vlapic *vlapic)
{
return ((vlapic->msr_apicbase & APICBASE_ENABLED) == 0);
diff --git a/sys/amd64/vmm/vmm_cpuid.c b/sys/amd64/vmm/vmm_cpuid.c
--- a/sys/amd64/vmm/vmm_cpuid.c
+++ b/sys/amd64/vmm/vmm_cpuid.c
@@ -6,7 +6,7 @@
*
* Copyright 2014 Pluribus Networks Inc.
* Copyright 2018 Joyent, Inc.
- * Copyright 2024 Oxide Computer Company
+ * Copyright 2025 Oxide Computer Company
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -40,13 +40,13 @@
#include <machine/clock.h>
#include <machine/md_var.h>
#include <machine/specialreg.h>
-#include <x86/bhyve.h>
#include <dev/vmm/vmm_ktr.h>
#include <dev/vmm/vmm_vm.h>
#include "vmm_host.h"
#include "vmm_util.h"
+#include "vlapic.h"
#include "x86.h"
static MALLOC_DEFINE(M_CPUID, "cpuid", "cpuid");
@@ -180,38 +180,206 @@
}
}
+/*
+ * Updates a previously-populated set of CPUID return values to account for the
+ * runtime state of the executing vCPU, i.e., the values in its control
+ * registers and MSRs that influence the values returned by the CPUID
+ * instruction.
+ *
+ * This function does not account for "static" properties of the vCPU or VM,
+ * such as the enablement of VM-wide features and capabilities (like x2APIC or
+ * INVPCID support) or settings that vary only with the vCPU's ID (like the
+ * values returned from its topology leaves).
+ *
+ * This function assumes that it is called from within VMRUN(), which guarantees
+ * that the guest's FPU state is loaded. This is required to obtain the correct
+ * values for leaves whose values depend on the guest values of %xcr0 and the
+ * IA32_XSS MSR.
+ */
+static void
+cpuid_apply_runtime_reg_state(struct vcpu *vcpu, uint32_t func,
+ uint32_t index, uint32_t *eax, uint32_t *ebx, uint32_t *ecx, uint32_t *edx)
+{
+ uint64_t cr4;
+ int error;
+ unsigned int regs[4];
+
+ switch (func) {
+ case CPUID_0000_0001:
+ /*
+ * If CPUID2_XSAVE is being advertised and the
+ * guest has set CR4_XSAVE, set CPUID2_OSXSAVE.
+ */
+ *ecx &= ~CPUID2_OSXSAVE;
+ if ((*ecx & CPUID2_XSAVE) != 0) {
+ error = vm_get_register(vcpu,
+ VM_REG_GUEST_CR4, &cr4);
+ if (error)
+ panic("cpuid_apply_runtime_reg_state: error %d "
+ "fetching %%cr4", error);
+ if ((cr4 & CR4_XSAVE) != 0) {
+ *ecx |= CPUID2_OSXSAVE;
+ }
+ }
+
+ /*
+ * AMD APM vol. 3 rev. 3.36 section E.3.2 notes that this bit is
+ * set only if the "APIC exists and is enabled." Vol. 3 of the
+ * June 2024 Intel SDM notes in section 11.4.3 that "[t]he CPUID
+ * feature flag for the APIC ... is also set to 0" when the APIC
+ * enable bit is cleared.
+ */
+ if (vlapic_hw_disabled(vm_lapic(vcpu))) {
+ *edx &= ~CPUID_APIC;
+ }
+ break;
+
+ case CPUID_0000_000D:
+ /*
+ * Leaf D reports XSAVE area sizes that vary with the current
+ * value of %xcr0. Since this function is called with %xcr0
+ * still set to its guest value, the easiest way to get the
+ * correct output is to execute CPUID on the host and copy out
+ * the relevant values.
+ */
+ cpuid_count(func, index, regs);
+ switch (index) {
+ case 0:
+ /*
+ * %eax, %ecx, and %edx return information about the
+ * complete set of features the processor supports, not
+ * just the ones that are enabled. The caller is
+ * presumed to have set these already, so just update
+ * %ebx.
+ */
+ *ebx = regs[1];
+ break;
+ case 1:
+ /*
+ * Subleaf 1 reports the XSAVE area size required for
+ * features enabled in %xcr0 and the IA32_XSS MSR via
+ * %ebx. As with subleaf 0, the caller is presumed to
+ * have set the other three output register values
+ * already.
+ *
+ * AMD APM vol. 3 rev. 3.36 and the June 2024 edition of
+ * volume 2 of the Intel SDM specify slightly different
+ * behavior here: the SDM says that the value returned
+ * in %ebx depends in part on whether %eax advertises
+ * XSAVEC and IA32_XSS support, but the APM does not. To
+ * handle these cases:
+ *
+ * 1. If the guest isn't a VMX guest, just copy the
+ * current reported save area size.
+ * 2. If both the XSAVEC and XSAVES bits are clear in
+ * %eax, return a save area size of 0 in %ebx to
+ * match the SDM description.
+ * 3. Otherwise, copy the host's reported save area
+ * size.
+ *
+ * Note that, because XSAVES saves a superset of the
+ * state saved by XSAVEC, it's OK to report the host's
+ * save area size even if the host and guest report
+ * different feature bits in %eax:
+ *
+ * - If the host supports XSAVES and the guest doesn't,
+ * the reported save area size will be too large, but
+ * the guest can still use XSAVEC safely.
+ * - If the VM's explicit CPUID values advertise XSAVES
+ * support, but the host doesn't support XSAVES, the
+ * host's reported save area size will still be large
+ * enough for the xcr0-controlled state saved by
+ * XSAVEC. The area will be undersized for XSAVES,
+ * but this is OK because the guest can't execute
+ * XSAVES anyway (it will #UD).
+ */
+ if (!vmm_is_intel()) {
+ *ebx = regs[1];
+ } else {
+ if ((*eax & (CPUID_EXTSTATE_XSAVEC |
+ CPUID_EXTSTATE_XSAVES)) == 0) {
+ *ebx = 0;
+ } else {
+ *ebx = regs[1];
+ }
+ }
+ break;
+ default:
+ /*
+ * Other subleaves of leaf D report the relative sizes
+ * and offsets of the state required for specific
+ * features in the relevant offset masks. These don't
+ * depend on the current enabled features (only the
+ * supported ones), so no enabled-feature specialization
+ * is required.
+ */
+ break;
+ }
+ break;
+ }
+}
+
+/*
+ * Emulates the CPUID instruction on the specified vCPU and returns its outputs
+ * in the rax/rbx/rcx/rdx variables.
+ *
+ * This function assumes it is called from within VMRUN(), which guarantees that
+ * certain guest state (e.g. FPU state) remains loaded.
+ */
void
vcpu_emulate_cpuid(struct vcpu *vcpu, uint64_t *rax, uint64_t *rbx,
uint64_t *rcx, uint64_t *rdx)
{
const vcpu_cpuid_config_t *cfg = vm_cpuid_config(vcpu);
+ uint32_t func, index;
KASSERT(rax != NULL, ("rax == NULL"));
KASSERT(rbx != NULL, ("rbx == NULL"));
KASSERT(rcx != NULL, ("rcx == NULL"));
KASSERT(rdx != NULL, ("rdx == NULL"));
+ uint32_t regs[4] = { *rax, 0, *rcx, 0 };
+ func = (uint32_t)*rax;
+ index = (uint32_t)*rcx;
+
/* Fall back to legacy handling if specified */
if ((cfg->vcc_flags & VCC_FLAG_LEGACY_HANDLING) != 0) {
- uint32_t regs[4] = { *rax, 0, *rcx, 0 };
-
legacy_emulate_cpuid(vcpu, ®s[0], ®s[1], ®s[2],
®s[3]);
- /* CPUID clears the upper 32-bits of the long-mode registers. */
- *rax = regs[0];
- *rbx = regs[1];
- *rcx = regs[2];
- *rdx = regs[3];
- return;
+ } else {
+ const struct vcpu_cpuid_entry *ent = cpuid_find_entry(cfg, func,
+ index);
+ KASSERT(ent != NULL, ("ent == NULL"));
+
+ /*
+ * The function and index in the found entry may differ from
+ * what the guest requested (if the entry was chosen via the
+ * "highest leaf" fallback described above). Use the values
+ * from the entry to ensure that the correct vCPU state fixups
+ * get applied below.
+ *
+ * The found entry may also be an all-zero empty entry (if the
+ * requested leaf is invalid but is less than the maximum valid
+ * leaf). It's OK to fall through in this case because leaf 0
+ * never has any CPU state-based fixups to apply.
+ */
+ func = ent->vce_function;
+ index = ent->vce_index;
+ regs[0] = ent->vce_eax;
+ regs[1] = ent->vce_ebx;
+ regs[2] = ent->vce_ecx;
+ regs[3] = ent->vce_edx;
}
- const struct vcpu_cpuid_entry *ent = cpuid_find_entry(cfg, *rax, *rcx);
- KASSERT(ent != NULL, ("ent == NULL"));
+ /* Fix up any returned values that vary with guest register state. */
+ cpuid_apply_runtime_reg_state(vcpu, func, index, ®s[0],
+ ®s[1], ®s[2], ®s[3]);
+
/* CPUID clears the upper 32-bits of the long-mode registers. */
- *rax = ent->vce_eax;
- *rbx = ent->vce_ebx;
- *rcx = ent->vce_ecx;
- *rdx = ent->vce_edx;
+ *rax = regs[0];
+ *rbx = regs[1];
+ *rcx = regs[2];
+ *rdx = regs[3];
}
/*
@@ -327,38 +495,6 @@
static const char bhyve_id[12] = "bhyve bhyve ";
-#define CPUID_0000_0000 (0x0)
-#define CPUID_0000_0001 (0x1)
-#define CPUID_0000_0002 (0x2)
-#define CPUID_0000_0003 (0x3)
-#define CPUID_0000_0004 (0x4)
-#define CPUID_0000_0006 (0x6)
-#define CPUID_0000_0007 (0x7)
-#define CPUID_0000_000A (0xA)
-#define CPUID_0000_000B (0xB)
-#define CPUID_0000_000D (0xD)
-#define CPUID_0000_000F (0xF)
-#define CPUID_0000_0010 (0x10)
-#define CPUID_0000_0015 (0x15)
-#define CPUID_8000_0000 (0x80000000)
-#define CPUID_8000_0001 (0x80000001)
-#define CPUID_8000_0002 (0x80000002)
-#define CPUID_8000_0003 (0x80000003)
-#define CPUID_8000_0004 (0x80000004)
-#define CPUID_8000_0006 (0x80000006)
-#define CPUID_8000_0007 (0x80000007)
-#define CPUID_8000_0008 (0x80000008)
-#define CPUID_8000_001D (0x8000001D)
-#define CPUID_8000_001E (0x8000001E)
-
-#define CPUID_VM_SIGNATURE 0x40000000
-#define CPUID_VM_HIGH CPUID_BHYVE_FEATURES
-
-/*
- * CPUID instruction Fn0000_0001:
- */
-#define CPUID_0000_0001_APICID_SHIFT 24
-
/*
* Compute ceil(log2(x)). Returns -1 if x is zero.
*/
@@ -380,7 +516,6 @@
struct vm *vm = vcpu_vm(vcpu);
int vcpu_id = vcpu_vcpuid(vcpu);
const struct xsave_limits *limits;
- uint64_t cr4;
int error, enable_invpcid, enable_rdpid, enable_rdtscp, level,
width, x2apic_id;
unsigned int func, regs[4], logical_cpus, param;
@@ -645,22 +780,6 @@
if (!(regs[2] & CPUID2_OSXSAVE))
regs[2] &= ~CPUID2_XSAVE;
- /*
- * If CPUID2_XSAVE is being advertised and the
- * guest has set CR4_XSAVE, set
- * CPUID2_OSXSAVE.
- */
- regs[2] &= ~CPUID2_OSXSAVE;
- if (regs[2] & CPUID2_XSAVE) {
- error = vm_get_register(vcpu,
- VM_REG_GUEST_CR4, &cr4);
- if (error)
- panic("x86_emulate_cpuid: error %d "
- "fetching %%cr4", error);
- if (cr4 & CR4_XSAVE)
- regs[2] |= CPUID2_OSXSAVE;
- }
-
/*
* Hide monitor/mwait until we know how to deal with
* these instructions.
diff --git a/sys/amd64/vmm/x86.h b/sys/amd64/vmm/x86.h
--- a/sys/amd64/vmm/x86.h
+++ b/sys/amd64/vmm/x86.h
@@ -26,6 +26,8 @@
* SUCH DAMAGE.
*/
+#include <x86/bhyve.h>
+
#ifndef _X86_H_
#define _X86_H_
@@ -53,6 +55,9 @@
#define CPUID_8000_001D (0x8000001D)
#define CPUID_8000_001E (0x8000001E)
+#define CPUID_VM_SIGNATURE 0x40000000
+#define CPUID_VM_HIGH CPUID_BHYVE_FEATURES
+
/*
* CPUID instruction Fn0000_0001:
*/
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Sun, Apr 12, 6:25 AM (8 h, 12 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
31333745
Default Alt Text
D54829.diff (10 KB)
Attached To
Mode
D54829: vmm/amd64: Bhyve should keep guest CPUID updated when using explicit CPUID settings
Attached
Detach File
Event Timeline
Log In to Comment