Page Menu
Home
FreeBSD
Search
Configure Global Search
Log In
Files
F150747203
D51552.id.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
42 KB
Referenced Files
None
Subscribers
None
D51552.id.diff
View Options
diff --git a/sys/amd64/include/vmm.h b/sys/amd64/include/vmm.h
--- a/sys/amd64/include/vmm.h
+++ b/sys/amd64/include/vmm.h
@@ -138,7 +138,8 @@
struct vm_exit exitinfo; /* (x) exit reason and collateral */ \
cpuset_t exitinfo_cpuset; /* (x) storage for vmexit handlers */ \
uint64_t nextrip; /* (x) next instruction to execute */ \
- uint64_t tsc_offset /* (o) TSC offsetting */
+ uint64_t tsc_offset; /* (o) TSC offsetting */ \
+ vcpu_cpuid_config_t cpuid_cfg /* (x) cpuid configuration */
#define VMM_VM_MD_FIELDS \
cpuset_t startup_cpus; /* (i) [r] waiting for startup */ \
@@ -690,4 +691,70 @@
void vm_inject_pf(struct vcpu *vcpu, int error_code, uint64_t cr2);
+/*
+ * Describes an entry for `cpuid` emulation.
+ * Used internally by bhyve (kernel) in addition to exposed ioctl(2) interface.
+ */
+struct vcpu_cpuid_entry {
+ uint32_t vce_function;
+ uint32_t vce_index;
+ uint32_t vce_flags;
+ uint32_t vce_eax;
+ uint32_t vce_ebx;
+ uint32_t vce_ecx;
+ uint32_t vce_edx;
+ uint32_t _pad;
+};
+
+/*
+ * Defined flags for vcpu_cpuid_entry`vce_flags are below.
+ */
+
+/* Use index (ecx) input value when matching entry */
+#define VCE_FLAG_MATCH_INDEX (1 << 0)
+
+/* All valid flags for vcpu_cpuid_entry`vce_flags */
+#define VCE_FLAGS_VALID VCE_FLAG_MATCH_INDEX
+
+/*
+ * Defined flags for vcpu_cpuid configuration are below.
+ * These are used by both the ioctl(2) interface via vm_vcpu_cpuid_config and
+ * internally in the kernel vmm.
+ */
+
+/* Use legacy hard-coded cpuid masking tables applied to the host CPU */
+#define VCC_FLAG_LEGACY_HANDLING (1 << 0)
+/*
+ * Emulate Intel-style fallback behavior (emit highest "standard" entry) if the
+ * queried function/index do not match. If not set, emulate AMD-style, where
+ * all zeroes are returned in such cases.
+ */
+#define VCC_FLAG_INTEL_FALLBACK (1 << 1)
+
+/* All valid flags for vm_vcpu_cpuid_config`vvcc_flags */
+#define VCC_FLAGS_VALID \
+ (VCC_FLAG_LEGACY_HANDLING | VCC_FLAG_INTEL_FALLBACK)
+
+/* Maximum vcpu_cpuid_entry records per vCPU */
+#define VMM_MAX_CPUID_ENTRIES 256
+
+#ifdef _KERNEL
+typedef struct vcpu_cpuid_config {
+ uint32_t vcc_flags;
+ uint32_t vcc_nent;
+ struct vcpu_cpuid_entry *vcc_entries;
+} vcpu_cpuid_config_t;
+
+vcpu_cpuid_config_t *vm_cpuid_config(struct vcpu *);
+int vm_get_cpuid(struct vcpu *, vcpu_cpuid_config_t *);
+int vm_set_cpuid(struct vcpu *, const vcpu_cpuid_config_t *);
+void vcpu_emulate_cpuid(struct vcpu *, uint64_t *, uint64_t *, uint64_t *,
+ uint64_t *);
+void legacy_emulate_cpuid(struct vcpu *, uint32_t *, uint32_t *, uint32_t *,
+ uint32_t *);
+void vcpu_cpuid_init(vcpu_cpuid_config_t *);
+void vcpu_cpuid_cleanup(vcpu_cpuid_config_t *);
+#endif
+
+
#endif /* _VMM_H_ */
diff --git a/sys/amd64/include/vmm_dev.h b/sys/amd64/include/vmm_dev.h
--- a/sys/amd64/include/vmm_dev.h
+++ b/sys/amd64/include/vmm_dev.h
@@ -253,6 +253,23 @@
};
_Static_assert(sizeof(struct vm_readwrite_kernemu_device) == 24, "ABI");
+struct vm_vcpu_cpuid_config {
+ int vvcc_vcpuid;
+ uint32_t vvcc_flags;
+ uint32_t vvcc_nent;
+ uint32_t _pad;
+ void *vvcc_entries;
+};
+
+/* Query the computed legacy cpuid value for a vcpuid with VM_LEGACY_CPUID */
+struct vm_legacy_cpuid {
+ int vlc_vcpuid;
+ uint32_t vlc_eax;
+ uint32_t vlc_ebx;
+ uint32_t vlc_ecx;
+ uint32_t vlc_edx;
+};
+
enum {
/* general routines */
IOCNUM_ABIVERS = 0,
@@ -320,6 +337,11 @@
IOCNUM_SET_TOPOLOGY = 63,
IOCNUM_GET_TOPOLOGY = 64,
+ /* CPUID override */
+ IOCNUM_GET_CPUID = 65,
+ IOCNUM_SET_CPUID = 66,
+ IOCNUM_LEGACY_CPUID = 67,
+
/* legacy interrupt injection */
IOCNUM_ISA_ASSERT_IRQ = 80,
IOCNUM_ISA_DEASSERT_IRQ = 81,
@@ -436,6 +458,12 @@
_IOW('v', IOCNUM_SET_TOPOLOGY, struct vm_cpu_topology)
#define VM_GET_TOPOLOGY \
_IOR('v', IOCNUM_GET_TOPOLOGY, struct vm_cpu_topology)
+#define VM_GET_CPUID \
+ _IOWR('v', IOCNUM_GET_CPUID, struct vm_vcpu_cpuid_config)
+#define VM_SET_CPUID \
+ _IOW('v', IOCNUM_SET_CPUID, struct vm_vcpu_cpuid_config)
+#define VM_LEGACY_CPUID \
+ _IOWR('v', IOCNUM_LEGACY_CPUID, struct vm_legacy_cpuid)
#define VM_GET_GPA_PMAP \
_IOWR('v', IOCNUM_GET_GPA_PMAP, struct vm_gpa_pte)
#define VM_GLA2GPA \
diff --git a/sys/amd64/vmm/amd/svm.c b/sys/amd64/vmm/amd/svm.c
--- a/sys/amd64/vmm/amd/svm.c
+++ b/sys/amd64/vmm/amd/svm.c
@@ -1592,9 +1592,9 @@
break;
case VMCB_EXIT_CPUID:
vmm_stat_incr(vcpu->vcpu, VMEXIT_CPUID, 1);
- handled = x86_emulate_cpuid(vcpu->vcpu,
- &state->rax, &ctx->sctx_rbx, &ctx->sctx_rcx,
- &ctx->sctx_rdx);
+ vcpu_emulate_cpuid(vcpu->vcpu, &state->rax,
+ &ctx->sctx_rbx, &ctx->sctx_rcx, &ctx->sctx_rdx);
+ handled = 1;
break;
case VMCB_EXIT_HLT:
vmm_stat_incr(vcpu->vcpu, VMEXIT_HLT, 1);
diff --git a/sys/amd64/vmm/intel/vmx.c b/sys/amd64/vmm/intel/vmx.c
--- a/sys/amd64/vmm/intel/vmx.c
+++ b/sys/amd64/vmm/intel/vmx.c
@@ -1247,17 +1247,6 @@
return (vcpu);
}
-static int
-vmx_handle_cpuid(struct vmx_vcpu *vcpu, struct vmxctx *vmxctx)
-{
- int handled;
-
- handled = x86_emulate_cpuid(vcpu->vcpu, (uint64_t *)&vmxctx->guest_rax,
- (uint64_t *)&vmxctx->guest_rbx, (uint64_t *)&vmxctx->guest_rcx,
- (uint64_t *)&vmxctx->guest_rdx);
- return (handled);
-}
-
static __inline void
vmx_run_trace(struct vmx_vcpu *vcpu)
{
@@ -2668,7 +2657,12 @@
case EXIT_REASON_CPUID:
vmm_stat_incr(vcpu->vcpu, VMEXIT_CPUID, 1);
SDT_PROBE3(vmm, vmx, exit, cpuid, vmx, vcpuid, vmexit);
- handled = vmx_handle_cpuid(vcpu, vmxctx);
+ vcpu_emulate_cpuid(vcpu->vcpu,
+ (uint64_t *)&vmxctx->guest_rax,
+ (uint64_t *)&vmxctx->guest_rbx,
+ (uint64_t *)&vmxctx->guest_rcx,
+ (uint64_t *)&vmxctx->guest_rdx);
+ handled = HANDLED;
break;
case EXIT_REASON_EXCEPTION:
vmm_stat_incr(vcpu->vcpu, VMEXIT_EXCEPTION, 1);
diff --git a/sys/amd64/vmm/vmm.c b/sys/amd64/vmm/vmm.c
--- a/sys/amd64/vmm/vmm.c
+++ b/sys/amd64/vmm/vmm.c
@@ -86,6 +86,7 @@
#include "vrtc.h"
#include "vmm_stat.h"
#include "vmm_lapic.h"
+#include "x86.h"
#include "io/ppt.h"
#include "io/iommu.h"
@@ -232,6 +233,9 @@
vcpu->cookie = NULL;
if (destroy) {
vmm_stat_free(vcpu->stats);
+
+ vcpu_cpuid_cleanup(&vcpu->cpuid_cfg);
+
fpu_save_area_free(vcpu->guestfpu);
vcpu_lock_destroy(vcpu);
free(vcpu, M_VM);
@@ -261,6 +265,8 @@
static void
vcpu_init(struct vcpu *vcpu)
{
+ vcpu_cpuid_init(&vcpu->cpuid_cfg);
+
vcpu->cookie = vmmops_vcpu_init(vcpu->vm->cookie, vcpu, vcpu->vcpuid);
vcpu->vlapic = vmmops_vlapic_init(vcpu->cookie);
vm_set_x2apic_state(vcpu, X2APIC_DISABLED);
@@ -1615,6 +1621,12 @@
return (vmmops_setcap(vcpu->cookie, type, val));
}
+vcpu_cpuid_config_t *
+vm_cpuid_config(struct vcpu *vcpu)
+{
+ return (&vcpu->cpuid_cfg);
+}
+
struct vlapic *
vm_lapic(struct vcpu *vcpu)
{
diff --git a/sys/amd64/vmm/x86.c b/sys/amd64/vmm/vmm_cpuid.c
copy from sys/amd64/vmm/x86.c
copy to sys/amd64/vmm/vmm_cpuid.c
--- a/sys/amd64/vmm/x86.c
+++ b/sys/amd64/vmm/vmm_cpuid.c
@@ -4,6 +4,10 @@
* Copyright (c) 2011 NetApp, Inc.
* All rights reserved.
*
+ * Copyright 2014 Pluribus Networks Inc.
+ * Copyright 2018 Joyent, Inc.
+ * Copyright 2022 Oxide Computer Company
+ *
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
@@ -27,16 +31,15 @@
*/
#include <sys/param.h>
-#include <sys/pcpu.h>
-#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/malloc.h>
#include <sys/sysctl.h>
+#include <sys/types.h>
+#include <sys/errno.h>
#include <machine/clock.h>
-#include <machine/cpufunc.h>
#include <machine/md_var.h>
-#include <machine/segments.h>
#include <machine/specialreg.h>
-#include <machine/vmm.h>
#include <dev/vmm/vmm_ktr.h>
#include <dev/vmm/vmm_vm.h>
@@ -45,18 +48,273 @@
#include "vmm_util.h"
#include "x86.h"
-SYSCTL_DECL(_hw_vmm);
-static SYSCTL_NODE(_hw_vmm, OID_AUTO, topology, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
- NULL);
+static MALLOC_DEFINE(M_CPUID, "cpuid", "cpuid");
-#define CPUID_VM_SIGNATURE 0x40000000
-#define CPUID_BHYVE_FEATURES 0x40000001
-#define CPUID_VM_HIGH CPUID_BHYVE_FEATURES
+/*
+ * CPUID Emulation
+ *
+ * All CPUID instruction exits are handled by the in-kernel emulation.
+ *
+ * ----------------
+ * Legacy Emulation
+ * ----------------
+ *
+ * Originally, the kernel vmm portion of bhyve relied on fixed logic to filter
+ * and/or generate CPUID results based on what was reported by the host CPU, as
+ * well as attributes of the VM (such as CPU topology, and enabled features).
+ * This is largely adequate to expose CPU capabilities to the guest in manner
+ * which allows it to operate properly.
+ *
+ * ------------------------------
+ * Userspace-Controlled Emulation
+ * ------------------------------
+ *
+ * In certain situations, more control over the CPUID emulation results present
+ * to the guest is desired. Live migration between physical hosts is one such
+ * example, where the underlying CPUs, or at least their microcode, may differ
+ * between the source and destination. In such cases, where changes to the
+ * CPUID results cannot be tolerated, the userspace portion of the VMM can be in
+ * complete control over the leaves which are presented to the guest. It may
+ * still consult the "legacy" CPUID data for guidance about which CPU features
+ * are safe to expose (due to hypervisor limitations, etc). This leaf
+ * information is configured on a per-vCPU basis.
+ *
+ * The emulation entries provided by userspace are expected to be in sorted
+ * order, running from lowest function and index to highest.
+ *
+ * For example:
+ * (func: 00h idx: 00h) ->
+ * (flags: 0, eax: highest std leaf, ebx-edx: vendor id)
+ * (func: 0Dh idx: 00h) ->
+ * (flags: VCE_FLAG_MATCH_INDEX, eax - edx: XCR0/XSAVE info)
+ * (func: 0Dh idx: 01h) ->
+ * (flags: VCE_FLAG_MATCH_INDEX, eax - edx: XSAVE/XSAVEOPT details)
+ * ...
+ * (func: 0Dh idx: 07H) ->
+ * (flags: VCE_FLAG_MATCH_INDEX, eax - edx: AVX-512 details)
+ * (func: 8000000h idx: 0h) ->
+ * (flags: 0, eax: highest extd leaf ...)
+ * ...
+ */
-/* Features advertised in CPUID_BHYVE_FEATURES %eax */
-#define CPUID_BHYVE_FEAT_EXT_DEST_ID (1UL << 0) /* MSI Extended Dest ID */
-static const char bhyve_id[12] = "bhyve bhyve ";
+#define CPUID_TYPE_MASK 0xf0000000
+#define CPUID_TYPE_STD 0x00000000
+#define CPUID_TYPE_EXTD 0x80000000
+
+static const struct vcpu_cpuid_entry cpuid_empty_entry = { 0 };
+
+/*
+ * Given the CPUID configuration for a vCPU, locate the entry which matches the
+ * provided function/index tuple. The entries list is walked in order, and the
+ * first valid match based on the function/index and flags will be emitted.
+ *
+ * If no match is found, but Intel-style fallback is configured, then the
+ * highest standard leaf encountered will be emitted.
+ */
+static const struct vcpu_cpuid_entry *
+cpuid_find_entry(const vcpu_cpuid_config_t *cfg, uint32_t func, uint32_t idx)
+{
+ const struct vcpu_cpuid_entry *last_std = &cpuid_empty_entry;
+ const bool intel_fallback =
+ (cfg->vcc_flags & VCC_FLAG_INTEL_FALLBACK) != 0;
+ bool matched_leaf = false;
+
+ KASSERT((cfg->vcc_flags & VCC_FLAG_LEGACY_HANDLING) == 0,
+ ("legacy CPUID handling enabled"));
+
+ for (u_int i = 0; i < cfg->vcc_nent; i++) {
+ const struct vcpu_cpuid_entry *ent = &cfg->vcc_entries[i];
+ const bool ent_is_std =
+ (ent->vce_function & CPUID_TYPE_MASK) == CPUID_TYPE_STD;
+ const bool ent_must_match_idx =
+ (ent->vce_flags & VCE_FLAG_MATCH_INDEX) != 0;
+
+ if (ent_is_std) {
+ /*
+ * Keep track of the last "standard" leaf for
+ * Intel-style fallback behavior.
+ *
+ * This does currently not account for the sub-leaf
+ * index matching behavior for fallback described in the
+ * SDM. It is not clear if any consumers rely on such
+ * matching when encountering fallback.
+ */
+ last_std = ent;
+ }
+ if (ent->vce_function == func) {
+ if (ent->vce_index == idx || !ent_must_match_idx) {
+ return (ent);
+ }
+ /*
+ * Make note of when the top-level leaf matches, even
+ * when the index does not.
+ */
+ matched_leaf = true;
+ } else if (ent->vce_function > func) {
+ if ((ent->vce_function & CPUID_TYPE_MASK) ==
+ (func & CPUID_TYPE_MASK)) {
+ /*
+ * We are beyond a valid leaf to match, but have
+ * not exceeded the maximum leaf for this "type"
+ * (standard, extended, hvm, etc), so return an
+ * empty entry.
+ */
+ return (&cpuid_empty_entry);
+ } else {
+ /*
+ * Otherwise, we can stop now, having gone
+ * beyond the last entry which could match the
+ * target function in a sorted list.
+ */
+ break;
+ }
+ }
+ }
+
+ if (matched_leaf || !intel_fallback) {
+ return (&cpuid_empty_entry);
+ } else {
+ return (last_std);
+ }
+}
+
+void
+vcpu_emulate_cpuid(struct vcpu *vcpu, uint64_t *rax, uint64_t *rbx,
+ uint64_t *rcx, uint64_t *rdx)
+{
+ const vcpu_cpuid_config_t *cfg = vm_cpuid_config(vcpu);
+
+ KASSERT(rax != NULL, ("rax == NULL"));
+ KASSERT(rbx != NULL, ("rbx == NULL"));
+ KASSERT(rcx != NULL, ("rcx == NULL"));
+ KASSERT(rdx != NULL, ("rdx == NULL"));
+
+ /* Fall back to legacy handling if specified */
+ if ((cfg->vcc_flags & VCC_FLAG_LEGACY_HANDLING) != 0) {
+ uint32_t regs[4] = { *rax, 0, *rcx, 0 };
+
+ legacy_emulate_cpuid(vcpu, ®s[0], ®s[1], ®s[2],
+ ®s[3]);
+ /* CPUID clears the upper 32-bits of the long-mode registers. */
+ *rax = regs[0];
+ *rbx = regs[1];
+ *rcx = regs[2];
+ *rdx = regs[3];
+ return;
+ }
+
+ const struct vcpu_cpuid_entry *ent = cpuid_find_entry(cfg, *rax, *rcx);
+ KASSERT(ent != NULL, ("ent == NULL"));
+ /* CPUID clears the upper 32-bits of the long-mode registers. */
+ *rax = ent->vce_eax;
+ *rbx = ent->vce_ebx;
+ *rcx = ent->vce_ecx;
+ *rdx = ent->vce_edx;
+}
+
+/*
+ * Get the current CPUID emulation configuration for this vCPU.
+ *
+ * Only the existing flags will be emitted if the vCPU is configured for legacy
+ * operation via the VCC_FLAG_LEGACY_HANDLING flag. If in userspace-controlled
+ * mode, then we will attempt to copy the existing entries into vcc_entries,
+ * its side specified by vcc_nent.
+ *
+ * Regardless of whether vcc_entries is adequately sized (or even present),
+ * vcc_nent will be set to the number of existing entries.
+ */
+int
+vm_get_cpuid(struct vcpu *vcpu, vcpu_cpuid_config_t *res)
+{
+ const vcpu_cpuid_config_t *src = vm_cpuid_config(vcpu);
+ if (src->vcc_nent > res->vcc_nent) {
+ res->vcc_nent = src->vcc_nent;
+ return (E2BIG);
+ } else if (src->vcc_nent != 0) {
+ bcopy(src->vcc_entries, res->vcc_entries,
+ src->vcc_nent * sizeof (struct vcpu_cpuid_entry));
+ }
+ res->vcc_flags = src->vcc_flags;
+ res->vcc_nent = src->vcc_nent;
+ return (0);
+}
+
+/*
+ * Set the CPUID emulation configuration for this vCPU.
+ *
+ * If VCC_FLAG_LEGACY_HANDLING is set in vcc_flags, then vcc_nent is expected to
+ * be set to 0, as configuring a list of entries would be useless when using the
+ * legacy handling.
+ *
+ * Any existing entries which are configured are freed, and the newly provided
+ * ones will be copied into their place.
+ */
+int
+vm_set_cpuid(struct vcpu *vcpu, const vcpu_cpuid_config_t *src)
+{
+ if (src->vcc_nent > VMM_MAX_CPUID_ENTRIES) {
+ return (EINVAL);
+ }
+ if ((src->vcc_flags & ~VCC_FLAGS_VALID) != 0) {
+ return (EINVAL);
+ }
+ if ((src->vcc_flags & VCC_FLAG_LEGACY_HANDLING) != 0 &&
+ src->vcc_nent != 0) {
+ /* No entries should be provided if using legacy handling */
+ return (EINVAL);
+ }
+ for (u_int i = 0; i < src->vcc_nent; i++) {
+ /* Ensure all entries carry valid flags */
+ if ((src->vcc_entries[i].vce_flags & ~VCE_FLAGS_VALID) != 0) {
+ return (EINVAL);
+ }
+ }
+
+ vcpu_cpuid_config_t *cfg = vm_cpuid_config(vcpu);
+
+ /* Free any existing entries first */
+ vcpu_cpuid_cleanup(cfg);
+
+ /* Copy supplied entries into freshly allocated space */
+ if (src->vcc_nent != 0) {
+ const size_t entries_sz =
+ src->vcc_nent * sizeof (struct vcpu_cpuid_entry);
+
+ cfg->vcc_nent = src->vcc_nent;
+ cfg->vcc_entries = malloc(entries_sz, M_CPUID, M_WAITOK);
+ bcopy(src->vcc_entries, cfg->vcc_entries, entries_sz);
+ }
+ cfg->vcc_flags = src->vcc_flags;
+
+ return (0);
+}
+
+void
+vcpu_cpuid_init(vcpu_cpuid_config_t *cfg)
+{
+ /* Default to legacy-style handling */
+ cfg->vcc_flags = VCC_FLAG_LEGACY_HANDLING;
+ cfg->vcc_nent = 0;
+ cfg->vcc_entries = NULL;
+}
+
+void
+vcpu_cpuid_cleanup(vcpu_cpuid_config_t *cfg)
+{
+ if (cfg->vcc_nent != 0) {
+ KASSERT(cfg->vcc_entries != NULL, ("cfg->vcc_entries == NULL"));
+
+ free(cfg->vcc_entries, M_CPUID);
+
+ cfg->vcc_nent = 0;
+ cfg->vcc_entries = NULL;
+ }
+}
+
+SYSCTL_DECL(_hw_vmm);
+static SYSCTL_NODE(_hw_vmm, OID_AUTO, topology, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
+ NULL);
static uint64_t bhyve_xcpuids;
SYSCTL_ULONG(_hw_vmm, OID_AUTO, bhyve_xcpuids, CTLFLAG_RW, &bhyve_xcpuids, 0,
@@ -66,6 +324,44 @@
SYSCTL_INT(_hw_vmm_topology, OID_AUTO, cpuid_leaf_b, CTLFLAG_RDTUN,
&cpuid_leaf_b, 0, NULL);
+static const char bhyve_id[12] = "bhyve bhyve ";
+
+#define CPUID_0000_0000 (0x0)
+#define CPUID_0000_0001 (0x1)
+#define CPUID_0000_0002 (0x2)
+#define CPUID_0000_0003 (0x3)
+#define CPUID_0000_0004 (0x4)
+#define CPUID_0000_0006 (0x6)
+#define CPUID_0000_0007 (0x7)
+#define CPUID_0000_000A (0xA)
+#define CPUID_0000_000B (0xB)
+#define CPUID_0000_000D (0xD)
+#define CPUID_0000_000F (0xF)
+#define CPUID_0000_0010 (0x10)
+#define CPUID_0000_0015 (0x15)
+#define CPUID_8000_0000 (0x80000000)
+#define CPUID_8000_0001 (0x80000001)
+#define CPUID_8000_0002 (0x80000002)
+#define CPUID_8000_0003 (0x80000003)
+#define CPUID_8000_0004 (0x80000004)
+#define CPUID_8000_0006 (0x80000006)
+#define CPUID_8000_0007 (0x80000007)
+#define CPUID_8000_0008 (0x80000008)
+#define CPUID_8000_001D (0x8000001D)
+#define CPUID_8000_001E (0x8000001E)
+
+#define CPUID_VM_SIGNATURE 0x40000000
+#define CPUID_BHYVE_FEATURES 0x40000001
+#define CPUID_VM_HIGH CPUID_BHYVE_FEATURES
+
+/* Features advertised in CPUID_BHYVE_FEATURES %eax */
+#define CPUID_BHYVE_FEAT_EXT_DEST_ID (1UL << 0) /* MSI Extended Dest ID */
+
+/*
+ * CPUID instruction Fn0000_0001:
+ */
+#define CPUID_0000_0001_APICID_SHIFT 24
+
/*
* Compute ceil(log2(x)). Returns -1 if x is zero.
*/
@@ -76,9 +372,13 @@
return (x == 0 ? -1 : order_base_2(x));
}
-int
-x86_emulate_cpuid(struct vcpu *vcpu, uint64_t *rax, uint64_t *rbx,
- uint64_t *rcx, uint64_t *rdx)
+/*
+ * The "legacy" bhyve cpuid emulation, which largly applies statically defined
+ * masks to the data provided by the host CPU.
+ */
+void
+legacy_emulate_cpuid(struct vcpu *vcpu, uint32_t *eax, uint32_t *ebx,
+ uint32_t *ecx, uint32_t *edx)
{
struct vm *vm = vcpu_vm(vcpu);
int vcpu_id = vcpu_vcpuid(vcpu);
@@ -94,8 +394,8 @@
* The function of CPUID is controlled through the provided value of
* %eax (and secondarily %ecx, for certain leaf data).
*/
- func = (uint32_t)*rax;
- param = (uint32_t)*rcx;
+ func = (uint32_t)*eax;
+ param = (uint32_t)*ecx;
VCPU_CTR2(vm, vcpu_id, "cpuid %#x,%#x", func, param);
@@ -156,8 +456,11 @@
* pkg_id_shift and other OSes may rely on it.
*/
width = MIN(0xF, log2(threads * cores));
+ if (width < 0x4)
+ width = 0;
logical_cpus = MIN(0xFF, threads * cores - 1);
- regs[2] = (width << AMDID_COREID_SIZE_SHIFT) | logical_cpus;
+ regs[2] = (width << AMDID_COREID_SIZE_SHIFT) |
+ logical_cpus;
}
break;
@@ -201,6 +504,7 @@
break;
case CPUID_8000_0007:
+ cpuid_count(func, param, regs);
/*
* AMD uses this leaf to advertise the processor's
* power monitoring and RAS capabilities. These
@@ -237,7 +541,7 @@
goto default_leaf;
/*
- * Similar to Intel, generate a fictitious cache
+ * Similar to Intel, generate a ficticious cache
* topology for the guest with L3 shared by the
* package, and L1 and L2 local to a core.
*/
@@ -269,7 +573,7 @@
logical_cpus = MIN(0xfff, logical_cpus - 1);
regs[0] = (logical_cpus << 14) | (1 << 8) |
(level << 5) | func;
- regs[1] = (func > 0) ? (CACHE_LINE_SIZE - 1) : 0;
+ regs[1] = func > 0 ? CACHE_LINE_SIZE - 1 : 0;
/*
* ecx: Number of cache ways for non-fully
@@ -361,7 +665,7 @@
*/
regs[2] &= ~CPUID2_MON;
- /*
+ /*
* Hide the performance and debug features.
*/
regs[2] &= ~CPUID2_PDCM;
@@ -616,9 +920,9 @@
case CPUID_BHYVE_FEATURES:
regs[0] = CPUID_BHYVE_FEAT_EXT_DEST_ID;
- regs[1] = 0;
- regs[2] = 0;
- regs[3] = 0;
+ regs[1] = 1;
+ regs[2] = 2;
+ regs[3] = 3;
break;
default:
@@ -633,126 +937,8 @@
break;
}
- /*
- * CPUID clears the upper 32-bits of the long-mode registers.
- */
- *rax = regs[0];
- *rbx = regs[1];
- *rcx = regs[2];
- *rdx = regs[3];
-
- return (1);
-}
-
-bool
-vm_cpuid_capability(struct vcpu *vcpu, enum vm_cpuid_capability cap)
-{
- bool rv;
-
- KASSERT(cap > 0 && cap < VCC_LAST, ("%s: invalid vm_cpu_capability %d",
- __func__, cap));
-
- /*
- * Simply passthrough the capabilities of the host cpu for now.
- */
- rv = false;
- switch (cap) {
- case VCC_NO_EXECUTE:
- if (amd_feature & AMDID_NX)
- rv = true;
- break;
- case VCC_FFXSR:
- if (amd_feature & AMDID_FFXSR)
- rv = true;
- break;
- case VCC_TCE:
- if (amd_feature2 & AMDID2_TCE)
- rv = true;
- break;
- default:
- panic("%s: unknown vm_cpu_capability %d", __func__, cap);
- }
- return (rv);
-}
-
-int
-vm_rdmtrr(struct vm_mtrr *mtrr, u_int num, uint64_t *val)
-{
- switch (num) {
- case MSR_MTRRcap:
- *val = MTRR_CAP_WC | MTRR_CAP_FIXED | VMM_MTRR_VAR_MAX;
- break;
- case MSR_MTRRdefType:
- *val = mtrr->def_type;
- break;
- case MSR_MTRR4kBase ... MSR_MTRR4kBase + 7:
- *val = mtrr->fixed4k[num - MSR_MTRR4kBase];
- break;
- case MSR_MTRR16kBase ... MSR_MTRR16kBase + 1:
- *val = mtrr->fixed16k[num - MSR_MTRR16kBase];
- break;
- case MSR_MTRR64kBase:
- *val = mtrr->fixed64k;
- break;
- case MSR_MTRRVarBase ... MSR_MTRRVarBase + (VMM_MTRR_VAR_MAX * 2) - 1: {
- u_int offset = num - MSR_MTRRVarBase;
- if (offset % 2 == 0) {
- *val = mtrr->var[offset / 2].base;
- } else {
- *val = mtrr->var[offset / 2].mask;
- }
- break;
- }
- default:
- return (-1);
- }
-
- return (0);
-}
-
-int
-vm_wrmtrr(struct vm_mtrr *mtrr, u_int num, uint64_t val)
-{
- switch (num) {
- case MSR_MTRRcap:
- /* MTRRCAP is read only */
- return (-1);
- case MSR_MTRRdefType:
- if (val & ~VMM_MTRR_DEF_MASK) {
- /* generate #GP on writes to reserved fields */
- return (-1);
- }
- mtrr->def_type = val;
- break;
- case MSR_MTRR4kBase ... MSR_MTRR4kBase + 7:
- mtrr->fixed4k[num - MSR_MTRR4kBase] = val;
- break;
- case MSR_MTRR16kBase ... MSR_MTRR16kBase + 1:
- mtrr->fixed16k[num - MSR_MTRR16kBase] = val;
- break;
- case MSR_MTRR64kBase:
- mtrr->fixed64k = val;
- break;
- case MSR_MTRRVarBase ... MSR_MTRRVarBase + (VMM_MTRR_VAR_MAX * 2) - 1: {
- u_int offset = num - MSR_MTRRVarBase;
- if (offset % 2 == 0) {
- if (val & ~VMM_MTRR_PHYSBASE_MASK) {
- /* generate #GP on writes to reserved fields */
- return (-1);
- }
- mtrr->var[offset / 2].base = val;
- } else {
- if (val & ~VMM_MTRR_PHYSMASK_MASK) {
- /* generate #GP on writes to reserved fields */
- return (-1);
- }
- mtrr->var[offset / 2].mask = val;
- }
- break;
- }
- default:
- return (-1);
- }
-
- return (0);
+ *eax = regs[0];
+ *ebx = regs[1];
+ *ecx = regs[2];
+ *edx = regs[3];
}
diff --git a/sys/amd64/vmm/vmm_dev_machdep.c b/sys/amd64/vmm/vmm_dev_machdep.c
--- a/sys/amd64/vmm/vmm_dev_machdep.c
+++ b/sys/amd64/vmm/vmm_dev_machdep.c
@@ -33,6 +33,7 @@
#include <sys/conf.h>
#include <sys/libkern.h>
#include <sys/ioccom.h>
+#include <sys/malloc.h>
#include <sys/mman.h>
#include <sys/uio.h>
#include <sys/proc.h>
@@ -58,6 +59,10 @@
#include "io/vioapic.h"
#include "io/vhpet.h"
#include "io/vrtc.h"
+#include "x86.h"
+
+
+static MALLOC_DEFINE(M_VMMDEV, "vmmdev", "vmmdev");
#ifdef COMPAT_FREEBSD13
struct vm_stats_13 {
@@ -163,6 +168,9 @@
VMMDEV_IOCTL(VM_ISA_DEASSERT_IRQ, 0),
VMMDEV_IOCTL(VM_ISA_PULSE_IRQ, 0),
VMMDEV_IOCTL(VM_ISA_SET_IRQ_TRIGGER, 0),
+ VMMDEV_IOCTL(VM_GET_CPUID, VMMDEV_IOCTL_LOCK_ONE_VCPU),
+ VMMDEV_IOCTL(VM_SET_CPUID, VMMDEV_IOCTL_LOCK_ONE_VCPU),
+ VMMDEV_IOCTL(VM_LEGACY_CPUID, VMMDEV_IOCTL_LOCK_ONE_VCPU),
VMMDEV_IOCTL(VM_GET_GPA_PMAP, 0),
VMMDEV_IOCTL(VM_GET_HPET_CAPABILITIES, 0),
VMMDEV_IOCTL(VM_RTC_READ, 0),
@@ -478,6 +486,110 @@
error = vm_get_x2apic_state(vcpu, &x2apic->state);
break;
}
+ case VM_GET_CPUID: {
+ struct vm_vcpu_cpuid_config *cfg = (void *)data;
+ struct vcpu_cpuid_entry *entries = NULL;
+
+ if (cfg->vvcc_vcpuid != vcpu_vcpuid(vcpu)) {
+ error = EINVAL;
+ break;
+ }
+ if (cfg->vvcc_nent > VMM_MAX_CPUID_ENTRIES) {
+ error = EINVAL;
+ break;
+ }
+
+ const size_t entries_size =
+ cfg->vvcc_nent * sizeof (struct vcpu_cpuid_entry);
+ if (entries_size != 0) {
+ entries = malloc(entries_size, M_VMMDEV,
+ M_WAITOK | M_ZERO);
+ }
+
+ vcpu_cpuid_config_t vm_cfg = {
+ .vcc_nent = cfg->vvcc_nent,
+ .vcc_entries = entries,
+ };
+ error = vm_get_cpuid(vcpu, &vm_cfg);
+
+ /*
+ * Only attempt to copy out the resultant entries if we were
+ * able to query them from the instance. The flags and number
+ * of entries are emitted regardless.
+ */
+ cfg->vvcc_flags = vm_cfg.vcc_flags;
+ cfg->vvcc_nent = vm_cfg.vcc_nent;
+ if (entries != NULL) {
+ if (error == 0)
+ error = copyout(entries, cfg->vvcc_entries,
+ entries_size);
+
+ free(entries, M_VMMDEV);
+ }
+
+ /*
+ * If vm_get_cpuid() returned E2BIG, clear the error to allow
+ * flags and number of entries to be returned.
+ */
+ if (error == E2BIG)
+ error = 0;
+ break;
+ }
+ case VM_SET_CPUID: {
+ struct vm_vcpu_cpuid_config *cfg = (void *)data;
+ struct vcpu_cpuid_entry *entries = NULL;
+ size_t entries_size = 0;
+
+ if (cfg->vvcc_vcpuid != vcpu_vcpuid(vcpu)) {
+ error = EINVAL;
+ break;
+ }
+ if (cfg->vvcc_nent > VMM_MAX_CPUID_ENTRIES) {
+ error = EFBIG;
+ break;
+ }
+ if ((cfg->vvcc_flags & VCC_FLAG_LEGACY_HANDLING) != 0) {
+ /*
+ * If we are being instructed to use "legacy" handling,
+ * then no entries should be provided, since the static
+ * in-kernel masking will be used.
+ */
+ if (cfg->vvcc_nent != 0) {
+ error = EINVAL;
+ break;
+ }
+ } else if (cfg->vvcc_nent != 0) {
+ entries_size =
+ cfg->vvcc_nent * sizeof (struct vcpu_cpuid_entry);
+ entries = malloc(entries_size, M_VMMDEV, M_WAITOK);
+
+ error = copyin(cfg->vvcc_entries, entries,
+ entries_size);
+ if (error != 0) {
+ free(entries, M_VMMDEV);
+ break;
+ }
+ }
+
+ vcpu_cpuid_config_t vm_cfg = {
+ .vcc_flags = cfg->vvcc_flags,
+ .vcc_nent = cfg->vvcc_nent,
+ .vcc_entries = entries,
+ };
+ error = vm_set_cpuid(vcpu, &vm_cfg);
+
+ if (entries != NULL) {
+ free(entries, M_VMMDEV);
+ }
+ break;
+ }
+ case VM_LEGACY_CPUID: {
+ struct vm_legacy_cpuid *vlc = (void *)data;
+
+ legacy_emulate_cpuid(vcpu, &vlc->vlc_eax, &vlc->vlc_ebx,
+ &vlc->vlc_ecx, &vlc->vlc_edx);
+ break;
+ }
case VM_GET_GPA_PMAP: {
struct vm_gpa_pte *gpapte;
diff --git a/sys/amd64/vmm/x86.h b/sys/amd64/vmm/x86.h
--- a/sys/amd64/vmm/x86.h
+++ b/sys/amd64/vmm/x86.h
@@ -64,9 +64,6 @@
*/
#define CPUID_0000_0001_FEAT0_VMX (1<<5)
-int x86_emulate_cpuid(struct vcpu *vcpu, uint64_t *rax, uint64_t *rbx,
- uint64_t *rcx, uint64_t *rdx);
-
enum vm_cpuid_capability {
VCC_NONE,
VCC_NO_EXECUTE,
diff --git a/sys/amd64/vmm/x86.c b/sys/amd64/vmm/x86.c
--- a/sys/amd64/vmm/x86.c
+++ b/sys/amd64/vmm/x86.c
@@ -45,605 +45,6 @@
#include "vmm_util.h"
#include "x86.h"
-SYSCTL_DECL(_hw_vmm);
-static SYSCTL_NODE(_hw_vmm, OID_AUTO, topology, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
- NULL);
-
-#define CPUID_VM_SIGNATURE 0x40000000
-#define CPUID_BHYVE_FEATURES 0x40000001
-#define CPUID_VM_HIGH CPUID_BHYVE_FEATURES
-
-/* Features advertised in CPUID_BHYVE_FEATURES %eax */
-#define CPUID_BHYVE_FEAT_EXT_DEST_ID (1UL << 0) /* MSI Extended Dest ID */
-
-static const char bhyve_id[12] = "bhyve bhyve ";
-
-static uint64_t bhyve_xcpuids;
-SYSCTL_ULONG(_hw_vmm, OID_AUTO, bhyve_xcpuids, CTLFLAG_RW, &bhyve_xcpuids, 0,
- "Number of times an unknown cpuid leaf was accessed");
-
-static int cpuid_leaf_b = 1;
-SYSCTL_INT(_hw_vmm_topology, OID_AUTO, cpuid_leaf_b, CTLFLAG_RDTUN,
- &cpuid_leaf_b, 0, NULL);
-
-/*
- * Compute ceil(log2(x)). Returns -1 if x is zero.
- */
-static __inline int
-log2(u_int x)
-{
-
- return (x == 0 ? -1 : order_base_2(x));
-}
-
-int
-x86_emulate_cpuid(struct vcpu *vcpu, uint64_t *rax, uint64_t *rbx,
- uint64_t *rcx, uint64_t *rdx)
-{
- struct vm *vm = vcpu_vm(vcpu);
- int vcpu_id = vcpu_vcpuid(vcpu);
- const struct xsave_limits *limits;
- uint64_t cr4;
- int error, enable_invpcid, enable_rdpid, enable_rdtscp, level,
- width, x2apic_id;
- unsigned int func, regs[4], logical_cpus, param;
- enum x2apic_state x2apic_state;
- uint16_t cores, maxcpus, sockets, threads;
-
- /*
- * The function of CPUID is controlled through the provided value of
- * %eax (and secondarily %ecx, for certain leaf data).
- */
- func = (uint32_t)*rax;
- param = (uint32_t)*rcx;
-
- VCPU_CTR2(vm, vcpu_id, "cpuid %#x,%#x", func, param);
-
- /*
- * Requests for invalid CPUID levels should map to the highest
- * available level instead.
- */
- if (cpu_exthigh != 0 && func >= 0x80000000) {
- if (func > cpu_exthigh)
- func = cpu_exthigh;
- } else if (func >= CPUID_VM_SIGNATURE) {
- if (func > CPUID_VM_HIGH)
- func = CPUID_VM_HIGH;
- } else if (func > cpu_high) {
- func = cpu_high;
- }
-
- /*
- * In general the approach used for CPU topology is to
- * advertise a flat topology where all CPUs are packages with
- * no multi-core or SMT.
- */
- switch (func) {
- /*
- * Pass these through to the guest
- */
- case CPUID_0000_0000:
- case CPUID_0000_0002:
- case CPUID_0000_0003:
- case CPUID_8000_0000:
- case CPUID_8000_0002:
- case CPUID_8000_0003:
- case CPUID_8000_0004:
- case CPUID_8000_0006:
- cpuid_count(func, param, regs);
- break;
- case CPUID_8000_0008:
- cpuid_count(func, param, regs);
- if (vmm_is_svm()) {
- /*
- * As on Intel (0000_0007:0, EDX), mask out
- * unsupported or unsafe AMD extended features
- * (8000_0008 EBX).
- */
- regs[1] &= (AMDFEID_CLZERO | AMDFEID_IRPERF |
- AMDFEID_XSAVEERPTR);
-
- vm_get_topology(vm, &sockets, &cores, &threads,
- &maxcpus);
- /*
- * Here, width is ApicIdCoreIdSize, present on
- * at least Family 15h and newer. It
- * represents the "number of bits in the
- * initial apicid that indicate thread id
- * within a package."
- *
- * Our topo_probe_amd() uses it for
- * pkg_id_shift and other OSes may rely on it.
- */
- width = MIN(0xF, log2(threads * cores));
- logical_cpus = MIN(0xFF, threads * cores - 1);
- regs[2] = (width << AMDID_COREID_SIZE_SHIFT) | logical_cpus;
- }
- break;
-
- case CPUID_8000_0001:
- cpuid_count(func, param, regs);
-
- /*
- * Hide SVM from guest.
- */
- regs[2] &= ~AMDID2_SVM;
-
- /*
- * Don't advertise extended performance counter MSRs
- * to the guest.
- */
- regs[2] &= ~AMDID2_PCXC;
- regs[2] &= ~AMDID2_PNXC;
- regs[2] &= ~AMDID2_PTSCEL2I;
-
- /*
- * Don't advertise Instruction Based Sampling feature.
- */
- regs[2] &= ~AMDID2_IBS;
-
- /* NodeID MSR not available */
- regs[2] &= ~AMDID2_NODE_ID;
-
- /* Don't advertise the OS visible workaround feature */
- regs[2] &= ~AMDID2_OSVW;
-
- /* Hide mwaitx/monitorx capability from the guest */
- regs[2] &= ~AMDID2_MWAITX;
-
- /* Advertise RDTSCP if it is enabled. */
- error = vm_get_capability(vcpu,
- VM_CAP_RDTSCP, &enable_rdtscp);
- if (error == 0 && enable_rdtscp)
- regs[3] |= AMDID_RDTSCP;
- else
- regs[3] &= ~AMDID_RDTSCP;
- break;
-
- case CPUID_8000_0007:
- /*
- * AMD uses this leaf to advertise the processor's
- * power monitoring and RAS capabilities. These
- * features are hardware-specific and exposing
- * them to a guest doesn't make a lot of sense.
- *
- * Intel uses this leaf only to advertise the
- * "Invariant TSC" feature with all other bits
- * being reserved (set to zero).
- */
- regs[0] = 0;
- regs[1] = 0;
- regs[2] = 0;
- regs[3] = 0;
-
- /*
- * "Invariant TSC" can be advertised to the guest if:
- * - host TSC frequency is invariant
- * - host TSCs are synchronized across physical cpus
- *
- * XXX This still falls short because the vcpu
- * can observe the TSC moving backwards as it
- * migrates across physical cpus. But at least
- * it should discourage the guest from using the
- * TSC to keep track of time.
- */
- if (tsc_is_invariant && smp_tsc)
- regs[3] |= AMDPM_TSC_INVARIANT;
- break;
-
- case CPUID_8000_001D:
- /* AMD Cache topology, like 0000_0004 for Intel. */
- if (!vmm_is_svm())
- goto default_leaf;
-
- /*
- * Similar to Intel, generate a fictitious cache
- * topology for the guest with L3 shared by the
- * package, and L1 and L2 local to a core.
- */
- vm_get_topology(vm, &sockets, &cores, &threads,
- &maxcpus);
- switch (param) {
- case 0:
- logical_cpus = threads;
- level = 1;
- func = 1; /* data cache */
- break;
- case 1:
- logical_cpus = threads;
- level = 2;
- func = 3; /* unified cache */
- break;
- case 2:
- logical_cpus = threads * cores;
- level = 3;
- func = 3; /* unified cache */
- break;
- default:
- logical_cpus = sockets * threads * cores;
- level = 0;
- func = 0;
- break;
- }
-
- logical_cpus = MIN(0xfff, logical_cpus - 1);
- regs[0] = (logical_cpus << 14) | (1 << 8) |
- (level << 5) | func;
- regs[1] = (func > 0) ? (CACHE_LINE_SIZE - 1) : 0;
-
- /*
- * ecx: Number of cache ways for non-fully
- * associative cache, minus 1. Reported value
- * of zero means there is one way.
- */
- regs[2] = 0;
-
- regs[3] = 0;
- break;
-
- case CPUID_8000_001E:
- /*
- * AMD Family 16h+ and Hygon Family 18h additional
- * identifiers.
- */
- if (!vmm_is_svm() || CPUID_TO_FAMILY(cpu_id) < 0x16)
- goto default_leaf;
-
- vm_get_topology(vm, &sockets, &cores, &threads,
- &maxcpus);
- regs[0] = vcpu_id;
- threads = MIN(0xFF, threads - 1);
- regs[1] = (threads << 8) |
- (vcpu_id >> log2(threads + 1));
- /*
- * XXX Bhyve topology cannot yet represent >1 node per
- * processor.
- */
- regs[2] = 0;
- regs[3] = 0;
- break;
-
- case CPUID_0000_0001:
- do_cpuid(1, regs);
-
- error = vm_get_x2apic_state(vcpu, &x2apic_state);
- if (error) {
- panic("x86_emulate_cpuid: error %d "
- "fetching x2apic state", error);
- }
-
- /*
- * Override the APIC ID only in ebx
- */
- regs[1] &= ~(CPUID_LOCAL_APIC_ID);
- regs[1] |= (vcpu_id << CPUID_0000_0001_APICID_SHIFT);
-
- /*
- * Don't expose VMX, SpeedStep, TME or SMX capability.
- * Advertise x2APIC capability and Hypervisor guest.
- */
- regs[2] &= ~(CPUID2_VMX | CPUID2_EST | CPUID2_TM2);
- regs[2] &= ~(CPUID2_SMX);
-
- regs[2] |= CPUID2_HV;
-
- if (x2apic_state != X2APIC_DISABLED)
- regs[2] |= CPUID2_X2APIC;
- else
- regs[2] &= ~CPUID2_X2APIC;
-
- /*
- * Only advertise CPUID2_XSAVE in the guest if
- * the host is using XSAVE.
- */
- if (!(regs[2] & CPUID2_OSXSAVE))
- regs[2] &= ~CPUID2_XSAVE;
-
- /*
- * If CPUID2_XSAVE is being advertised and the
- * guest has set CR4_XSAVE, set
- * CPUID2_OSXSAVE.
- */
- regs[2] &= ~CPUID2_OSXSAVE;
- if (regs[2] & CPUID2_XSAVE) {
- error = vm_get_register(vcpu,
- VM_REG_GUEST_CR4, &cr4);
- if (error)
- panic("x86_emulate_cpuid: error %d "
- "fetching %%cr4", error);
- if (cr4 & CR4_XSAVE)
- regs[2] |= CPUID2_OSXSAVE;
- }
-
- /*
- * Hide monitor/mwait until we know how to deal with
- * these instructions.
- */
- regs[2] &= ~CPUID2_MON;
-
- /*
- * Hide the performance and debug features.
- */
- regs[2] &= ~CPUID2_PDCM;
-
- /*
- * No TSC deadline support in the APIC yet
- */
- regs[2] &= ~CPUID2_TSCDLT;
-
- /*
- * Hide thermal monitoring
- */
- regs[3] &= ~(CPUID_ACPI | CPUID_TM);
-
- /*
- * Hide the debug store capability.
- */
- regs[3] &= ~CPUID_DS;
-
- /*
- * Advertise the Machine Check and MTRR capability.
- *
- * Some guest OSes (e.g. Windows) will not boot if
- * these features are absent.
- */
- regs[3] |= (CPUID_MCA | CPUID_MCE | CPUID_MTRR);
-
- vm_get_topology(vm, &sockets, &cores, &threads,
- &maxcpus);
- logical_cpus = threads * cores;
- regs[1] &= ~CPUID_HTT_CORES;
- regs[1] |= (logical_cpus & 0xff) << 16;
- regs[3] |= CPUID_HTT;
- break;
-
- case CPUID_0000_0004:
- cpuid_count(func, param, regs);
-
- if (regs[0] || regs[1] || regs[2] || regs[3]) {
- vm_get_topology(vm, &sockets, &cores, &threads,
- &maxcpus);
- regs[0] &= 0x3ff;
- regs[0] |= (cores - 1) << 26;
- /*
- * Cache topology:
- * - L1 and L2 are shared only by the logical
- * processors in a single core.
- * - L3 and above are shared by all logical
- * processors in the package.
- */
- logical_cpus = threads;
- level = (regs[0] >> 5) & 0x7;
- if (level >= 3)
- logical_cpus *= cores;
- regs[0] |= (logical_cpus - 1) << 14;
- }
- break;
-
- case CPUID_0000_0007:
- regs[0] = 0;
- regs[1] = 0;
- regs[2] = 0;
- regs[3] = 0;
-
- /* leaf 0 */
- if (param == 0) {
- cpuid_count(func, param, regs);
-
- /* Only leaf 0 is supported */
- regs[0] = 0;
-
- /*
- * Expose known-safe features.
- */
- regs[1] &= CPUID_STDEXT_FSGSBASE |
- CPUID_STDEXT_BMI1 | CPUID_STDEXT_HLE |
- CPUID_STDEXT_AVX2 | CPUID_STDEXT_SMEP |
- CPUID_STDEXT_BMI2 |
- CPUID_STDEXT_ERMS | CPUID_STDEXT_RTM |
- CPUID_STDEXT_AVX512F |
- CPUID_STDEXT_AVX512DQ |
- CPUID_STDEXT_RDSEED |
- CPUID_STDEXT_SMAP |
- CPUID_STDEXT_AVX512PF |
- CPUID_STDEXT_AVX512ER |
- CPUID_STDEXT_AVX512CD | CPUID_STDEXT_SHA |
- CPUID_STDEXT_AVX512BW |
- CPUID_STDEXT_AVX512VL;
- regs[2] &= CPUID_STDEXT2_VAES |
- CPUID_STDEXT2_VPCLMULQDQ;
- regs[3] &= CPUID_STDEXT3_MD_CLEAR;
-
- /* Advertise RDPID if it is enabled. */
- error = vm_get_capability(vcpu, VM_CAP_RDPID,
- &enable_rdpid);
- if (error == 0 && enable_rdpid)
- regs[2] |= CPUID_STDEXT2_RDPID;
-
- /* Advertise INVPCID if it is enabled. */
- error = vm_get_capability(vcpu,
- VM_CAP_ENABLE_INVPCID, &enable_invpcid);
- if (error == 0 && enable_invpcid)
- regs[1] |= CPUID_STDEXT_INVPCID;
- }
- break;
-
- case CPUID_0000_0006:
- regs[0] = CPUTPM1_ARAT;
- regs[1] = 0;
- regs[2] = 0;
- regs[3] = 0;
- break;
-
- case CPUID_0000_000A:
- /*
- * Handle the access, but report 0 for
- * all options
- */
- regs[0] = 0;
- regs[1] = 0;
- regs[2] = 0;
- regs[3] = 0;
- break;
-
- case CPUID_0000_000B:
- /*
- * Intel processor topology enumeration
- */
- if (vmm_is_intel()) {
- vm_get_topology(vm, &sockets, &cores, &threads,
- &maxcpus);
- if (param == 0) {
- logical_cpus = threads;
- width = log2(logical_cpus);
- level = CPUID_TYPE_SMT;
- x2apic_id = vcpu_id;
- }
-
- if (param == 1) {
- logical_cpus = threads * cores;
- width = log2(logical_cpus);
- level = CPUID_TYPE_CORE;
- x2apic_id = vcpu_id;
- }
-
- if (!cpuid_leaf_b || param >= 2) {
- width = 0;
- logical_cpus = 0;
- level = 0;
- x2apic_id = 0;
- }
-
- regs[0] = width & 0x1f;
- regs[1] = logical_cpus & 0xffff;
- regs[2] = (level << 8) | (param & 0xff);
- regs[3] = x2apic_id;
- } else {
- regs[0] = 0;
- regs[1] = 0;
- regs[2] = 0;
- regs[3] = 0;
- }
- break;
-
- case CPUID_0000_000D:
- limits = vmm_get_xsave_limits();
- if (!limits->xsave_enabled) {
- regs[0] = 0;
- regs[1] = 0;
- regs[2] = 0;
- regs[3] = 0;
- break;
- }
-
- cpuid_count(func, param, regs);
- switch (param) {
- case 0:
- /*
- * Only permit the guest to use bits
- * that are active in the host in
- * %xcr0. Also, claim that the
- * maximum save area size is
- * equivalent to the host's current
- * save area size. Since this runs
- * "inside" of vmrun(), it runs with
- * the guest's xcr0, so the current
- * save area size is correct as-is.
- */
- regs[0] &= limits->xcr0_allowed;
- regs[2] = limits->xsave_max_size;
- regs[3] &= (limits->xcr0_allowed >> 32);
- break;
- case 1:
- /* Only permit XSAVEOPT. */
- regs[0] &= CPUID_EXTSTATE_XSAVEOPT;
- regs[1] = 0;
- regs[2] = 0;
- regs[3] = 0;
- break;
- default:
- /*
- * If the leaf is for a permitted feature,
- * pass through as-is, otherwise return
- * all zeroes.
- */
- if (!(limits->xcr0_allowed & (1ul << param))) {
- regs[0] = 0;
- regs[1] = 0;
- regs[2] = 0;
- regs[3] = 0;
- }
- break;
- }
- break;
-
- case CPUID_0000_000F:
- case CPUID_0000_0010:
- /*
- * Do not report any Resource Director Technology
- * capabilities. Exposing control of cache or memory
- * controller resource partitioning to the guest is not
- * at all sensible.
- *
- * This is already hidden at a high level by masking of
- * leaf 0x7. Even still, a guest may look here for
- * detailed capability information.
- */
- regs[0] = 0;
- regs[1] = 0;
- regs[2] = 0;
- regs[3] = 0;
- break;
-
- case CPUID_0000_0015:
- /*
- * Don't report CPU TSC/Crystal ratio and clock
- * values since guests may use these to derive the
- * local APIC frequency..
- */
- regs[0] = 0;
- regs[1] = 0;
- regs[2] = 0;
- regs[3] = 0;
- break;
-
- case CPUID_VM_SIGNATURE:
- regs[0] = CPUID_VM_HIGH;
- bcopy(bhyve_id, ®s[1], 4);
- bcopy(bhyve_id + 4, ®s[2], 4);
- bcopy(bhyve_id + 8, ®s[3], 4);
- break;
-
- case CPUID_BHYVE_FEATURES:
- regs[0] = CPUID_BHYVE_FEAT_EXT_DEST_ID;
- regs[1] = 0;
- regs[2] = 0;
- regs[3] = 0;
- break;
-
- default:
-default_leaf:
- /*
- * The leaf value has already been clamped so
- * simply pass this through, keeping count of
- * how many unhandled leaf values have been seen.
- */
- atomic_add_long(&bhyve_xcpuids, 1);
- cpuid_count(func, param, regs);
- break;
- }
-
- /*
- * CPUID clears the upper 32-bits of the long-mode registers.
- */
- *rax = regs[0];
- *rbx = regs[1];
- *rcx = regs[2];
- *rdx = regs[3];
-
- return (1);
-}
-
bool
vm_cpuid_capability(struct vcpu *vcpu, enum vm_cpuid_capability cap)
{
diff --git a/sys/modules/vmm/Makefile b/sys/modules/vmm/Makefile
--- a/sys/modules/vmm/Makefile
+++ b/sys/modules/vmm/Makefile
@@ -81,7 +81,8 @@
opt_bhyve_snapshot.h \
opt_ddb.h
-SRCS+= vmm_host.c \
+SRCS+= vmm_cpuid.c \
+ vmm_host.c \
vmm_ioport.c \
vmm_lapic.c \
vmm_mem_machdep.c \
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Sat, Apr 4, 7:15 PM (6 h, 51 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
30837149
Default Alt Text
D51552.id.diff (42 KB)
Attached To
Mode
D51552: vmm/amd64: Port bhyve CPUID control from illumos
Attached
Detach File
Event Timeline
Log In to Comment