diff --git a/sys/amd64/vmm/amd/svm_msr.c b/sys/amd64/vmm/amd/svm_msr.c index 12046de4dbb9..1a22f16cf48e 100644 --- a/sys/amd64/vmm/amd/svm_msr.c +++ b/sys/amd64/vmm/amd/svm_msr.c @@ -1,180 +1,188 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2014, Neel Natu (neel@freebsd.org) * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_bhyve_snapshot.h" #include #include #include #include #include #include #include "svm.h" #include "vmcb.h" #include "svm_softc.h" #include "svm_msr.h" #ifndef MSR_AMDK8_IPM #define MSR_AMDK8_IPM 0xc0010055 #endif enum { IDX_MSR_LSTAR, IDX_MSR_CSTAR, IDX_MSR_STAR, IDX_MSR_SF_MASK, HOST_MSR_NUM /* must be the last enumeration */ }; static uint64_t host_msrs[HOST_MSR_NUM]; void svm_msr_init(void) { /* * It is safe to cache the values of the following MSRs because they * don't change based on curcpu, curproc or curthread. */ host_msrs[IDX_MSR_LSTAR] = rdmsr(MSR_LSTAR); host_msrs[IDX_MSR_CSTAR] = rdmsr(MSR_CSTAR); host_msrs[IDX_MSR_STAR] = rdmsr(MSR_STAR); host_msrs[IDX_MSR_SF_MASK] = rdmsr(MSR_SF_MASK); } void svm_msr_guest_init(struct svm_softc *sc, int vcpu) { /* * All the MSRs accessible to the guest are either saved/restored by * hardware on every #VMEXIT/VMRUN (e.g., G_PAT) or are saved/restored * by VMSAVE/VMLOAD (e.g., MSR_GSBASE). * * There are no guest MSRs that are saved/restored "by hand" so nothing * more to do here. */ return; } void svm_msr_guest_enter(struct svm_softc *sc, int vcpu) { /* * Save host MSRs (if any) and restore guest MSRs (if any). */ } void svm_msr_guest_exit(struct svm_softc *sc, int vcpu) { /* * Save guest MSRs (if any) and restore host MSRs. */ wrmsr(MSR_LSTAR, host_msrs[IDX_MSR_LSTAR]); wrmsr(MSR_CSTAR, host_msrs[IDX_MSR_CSTAR]); wrmsr(MSR_STAR, host_msrs[IDX_MSR_STAR]); wrmsr(MSR_SF_MASK, host_msrs[IDX_MSR_SF_MASK]); /* MSR_KGSBASE will be restored on the way back to userspace */ } int svm_rdmsr(struct svm_softc *sc, int vcpu, u_int num, uint64_t *result, bool *retu) { int error = 0; switch (num) { case MSR_MCG_CAP: case MSR_MCG_STATUS: *result = 0; break; case MSR_MTRRcap: case MSR_MTRRdefType: - case MSR_MTRR4kBase ... MSR_MTRR4kBase + 8: + case MSR_MTRR4kBase ... MSR_MTRR4kBase + 7: case MSR_MTRR16kBase ... MSR_MTRR16kBase + 1: case MSR_MTRR64kBase: + case MSR_MTRRVarBase ... MSR_MTRRVarBase + (VMM_MTRR_VAR_MAX * 2) - 1: + if (vm_rdmtrr(&sc->mtrr[vcpu], num, result) != 0) { + vm_inject_gp(sc->vm, vcpu); + } + break; case MSR_SYSCFG: case MSR_AMDK8_IPM: case MSR_EXTFEATURES: *result = 0; break; default: error = EINVAL; break; } return (error); } int svm_wrmsr(struct svm_softc *sc, int vcpu, u_int num, uint64_t val, bool *retu) { int error = 0; switch (num) { case MSR_MCG_CAP: case MSR_MCG_STATUS: break; /* ignore writes */ case MSR_MTRRcap: - vm_inject_gp(sc->vm, vcpu); - break; case MSR_MTRRdefType: - case MSR_MTRR4kBase ... MSR_MTRR4kBase + 8: + case MSR_MTRR4kBase ... MSR_MTRR4kBase + 7: case MSR_MTRR16kBase ... MSR_MTRR16kBase + 1: case MSR_MTRR64kBase: + case MSR_MTRRVarBase ... MSR_MTRRVarBase + (VMM_MTRR_VAR_MAX * 2) - 1: + if (vm_wrmtrr(&sc->mtrr[vcpu], num, val) != 0) { + vm_inject_gp(sc->vm, vcpu); + } + break; case MSR_SYSCFG: break; /* Ignore writes */ case MSR_AMDK8_IPM: /* * Ignore writes to the "Interrupt Pending Message" MSR. */ break; case MSR_K8_UCODE_UPDATE: /* * Ignore writes to microcode update register. */ break; #ifdef BHYVE_SNAPSHOT case MSR_TSC: error = svm_set_tsc_offset(sc, vcpu, val - rdtsc()); break; #endif case MSR_EXTFEATURES: break; default: error = EINVAL; break; } return (error); } diff --git a/sys/amd64/vmm/amd/svm_softc.h b/sys/amd64/vmm/amd/svm_softc.h index 8735353bb472..5f6a267617d2 100644 --- a/sys/amd64/vmm/amd/svm_softc.h +++ b/sys/amd64/vmm/amd/svm_softc.h @@ -1,116 +1,119 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2013 Anish Gupta (akgupt3@gmail.com) * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _SVM_SOFTC_H_ #define _SVM_SOFTC_H_ +#include "x86.h" + #define SVM_IO_BITMAP_SIZE (3 * PAGE_SIZE) #define SVM_MSR_BITMAP_SIZE (2 * PAGE_SIZE) struct asid { uint64_t gen; /* range is [1, ~0UL] */ uint32_t num; /* range is [1, nasid - 1] */ }; /* * XXX separate out 'struct vmcb' from 'svm_vcpu' to avoid wasting space * due to VMCB alignment requirements. */ struct svm_vcpu { struct vmcb vmcb; /* hardware saved vcpu context */ struct svm_regctx swctx; /* software saved vcpu context */ uint64_t vmcb_pa; /* VMCB physical address */ uint64_t nextrip; /* next instruction to be executed by guest */ int lastcpu; /* host cpu that the vcpu last ran on */ uint32_t dirty; /* state cache bits that must be cleared */ long eptgen; /* pmap->pm_eptgen when the vcpu last ran */ struct asid asid; } __aligned(PAGE_SIZE); /* * SVM softc, one per virtual machine. */ struct svm_softc { uint8_t apic_page[VM_MAXCPU][PAGE_SIZE]; struct svm_vcpu vcpu[VM_MAXCPU]; vm_offset_t nptp; /* nested page table */ uint8_t *iopm_bitmap; /* shared by all vcpus */ uint8_t *msr_bitmap; /* shared by all vcpus */ struct vm *vm; + struct vm_mtrr mtrr[VM_MAXCPU]; }; CTASSERT((offsetof(struct svm_softc, nptp) & PAGE_MASK) == 0); static __inline struct svm_vcpu * svm_get_vcpu(struct svm_softc *sc, int vcpu) { return (&(sc->vcpu[vcpu])); } static __inline struct vmcb * svm_get_vmcb(struct svm_softc *sc, int vcpu) { return (&(sc->vcpu[vcpu].vmcb)); } static __inline struct vmcb_state * svm_get_vmcb_state(struct svm_softc *sc, int vcpu) { return (&(sc->vcpu[vcpu].vmcb.state)); } static __inline struct vmcb_ctrl * svm_get_vmcb_ctrl(struct svm_softc *sc, int vcpu) { return (&(sc->vcpu[vcpu].vmcb.ctrl)); } static __inline struct svm_regctx * svm_get_guest_regctx(struct svm_softc *sc, int vcpu) { return (&(sc->vcpu[vcpu].swctx)); } static __inline void svm_set_dirty(struct svm_softc *sc, int vcpu, uint32_t dirtybits) { struct svm_vcpu *vcpustate; vcpustate = svm_get_vcpu(sc, vcpu); vcpustate->dirty |= dirtybits; } #endif /* _SVM_SOFTC_H_ */ diff --git a/sys/amd64/vmm/intel/vmx.h b/sys/amd64/vmm/intel/vmx.h index 57499a3a6869..81e508e30d3d 100644 --- a/sys/amd64/vmm/intel/vmx.h +++ b/sys/amd64/vmm/intel/vmx.h @@ -1,170 +1,172 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2011 NetApp, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _VMX_H_ #define _VMX_H_ #include "vmcs.h" +#include "x86.h" struct pmap; struct vmxctx { register_t guest_rdi; /* Guest state */ register_t guest_rsi; register_t guest_rdx; register_t guest_rcx; register_t guest_r8; register_t guest_r9; register_t guest_rax; register_t guest_rbx; register_t guest_rbp; register_t guest_r10; register_t guest_r11; register_t guest_r12; register_t guest_r13; register_t guest_r14; register_t guest_r15; register_t guest_cr2; register_t guest_dr0; register_t guest_dr1; register_t guest_dr2; register_t guest_dr3; register_t guest_dr6; register_t host_r15; /* Host state */ register_t host_r14; register_t host_r13; register_t host_r12; register_t host_rbp; register_t host_rsp; register_t host_rbx; register_t host_dr0; register_t host_dr1; register_t host_dr2; register_t host_dr3; register_t host_dr6; register_t host_dr7; uint64_t host_debugctl; int host_tf; int inst_fail_status; /* * The pmap needs to be deactivated in vmx_enter_guest() * so keep a copy of the 'pmap' in each vmxctx. */ struct pmap *pmap; }; struct vmxcap { int set; uint32_t proc_ctls; uint32_t proc_ctls2; uint32_t exc_bitmap; }; struct vmxstate { uint64_t nextrip; /* next instruction to be executed by guest */ int lastcpu; /* host cpu that this 'vcpu' last ran on */ uint16_t vpid; }; struct apic_page { uint32_t reg[PAGE_SIZE / 4]; }; CTASSERT(sizeof(struct apic_page) == PAGE_SIZE); /* Posted Interrupt Descriptor (described in section 29.6 of the Intel SDM) */ struct pir_desc { uint64_t pir[4]; uint64_t pending; uint64_t unused[3]; } __aligned(64); CTASSERT(sizeof(struct pir_desc) == 64); /* Index into the 'guest_msrs[]' array */ enum { IDX_MSR_LSTAR, IDX_MSR_CSTAR, IDX_MSR_STAR, IDX_MSR_SF_MASK, IDX_MSR_KGSBASE, IDX_MSR_PAT, IDX_MSR_TSC_AUX, GUEST_MSR_NUM /* must be the last enumeration */ }; /* virtual machine softc */ struct vmx { struct vmcs vmcs[VM_MAXCPU]; /* one vmcs per virtual cpu */ struct apic_page apic_page[VM_MAXCPU]; /* one apic page per vcpu */ char msr_bitmap[PAGE_SIZE]; struct pir_desc pir_desc[VM_MAXCPU]; uint64_t guest_msrs[VM_MAXCPU][GUEST_MSR_NUM]; struct vmxctx ctx[VM_MAXCPU]; struct vmxcap cap[VM_MAXCPU]; struct vmxstate state[VM_MAXCPU]; uint64_t eptp; struct vm *vm; long eptgen[MAXCPU]; /* cached pmap->pm_eptgen */ + struct vm_mtrr mtrr[VM_MAXCPU]; }; CTASSERT((offsetof(struct vmx, vmcs) & PAGE_MASK) == 0); CTASSERT((offsetof(struct vmx, msr_bitmap) & PAGE_MASK) == 0); CTASSERT((offsetof(struct vmx, pir_desc[0]) & 63) == 0); #define VMX_GUEST_VMEXIT 0 #define VMX_VMRESUME_ERROR 1 #define VMX_VMLAUNCH_ERROR 2 int vmx_enter_guest(struct vmxctx *ctx, struct vmx *vmx, int launched); void vmx_call_isr(uintptr_t entry); u_long vmx_fix_cr0(u_long cr0); u_long vmx_fix_cr4(u_long cr4); int vmx_set_tsc_offset(struct vmx *vmx, int vcpu, uint64_t offset); extern char vmx_exit_guest[]; extern char vmx_exit_guest_flush_rsb[]; static inline bool vmx_have_msr_tsc_aux(struct vmx *vmx) { int rdpid_rdtscp_bits = ((1 << VM_CAP_RDPID) | (1 << VM_CAP_RDTSCP)); /* * Since the values of these bits are uniform across all vCPUs * (see discussion in vmx_modinit() and initialization of these bits * in vmx_init()), just always use vCPU-zero's capability set and * remove the need to require a vcpuid argument. */ return ((vmx->cap[0].set & rdpid_rdtscp_bits) != 0); } #endif diff --git a/sys/amd64/vmm/intel/vmx_msr.c b/sys/amd64/vmm/intel/vmx_msr.c index db9e659bab5b..a135518cb1c3 100644 --- a/sys/amd64/vmm/intel/vmx_msr.c +++ b/sys/amd64/vmm/intel/vmx_msr.c @@ -1,521 +1,526 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2011 NetApp, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include "vmx.h" #include "vmx_msr.h" #include "x86.h" static bool vmx_ctl_allows_one_setting(uint64_t msr_val, int bitpos) { return ((msr_val & (1UL << (bitpos + 32))) != 0); } static bool vmx_ctl_allows_zero_setting(uint64_t msr_val, int bitpos) { return ((msr_val & (1UL << bitpos)) == 0); } uint32_t vmx_revision(void) { return (rdmsr(MSR_VMX_BASIC) & 0xffffffff); } /* * Generate a bitmask to be used for the VMCS execution control fields. * * The caller specifies what bits should be set to one in 'ones_mask' * and what bits should be set to zero in 'zeros_mask'. The don't-care * bits are set to the default value. The default values are obtained * based on "Algorithm 3" in Section 27.5.1 "Algorithms for Determining * VMX Capabilities". * * Returns zero on success and non-zero on error. */ int vmx_set_ctlreg(int ctl_reg, int true_ctl_reg, uint32_t ones_mask, uint32_t zeros_mask, uint32_t *retval) { int i; uint64_t val, trueval; bool true_ctls_avail, one_allowed, zero_allowed; /* We cannot ask the same bit to be set to both '1' and '0' */ if ((ones_mask ^ zeros_mask) != (ones_mask | zeros_mask)) return (EINVAL); true_ctls_avail = (rdmsr(MSR_VMX_BASIC) & (1UL << 55)) != 0; val = rdmsr(ctl_reg); if (true_ctls_avail) trueval = rdmsr(true_ctl_reg); /* step c */ else trueval = val; /* step a */ for (i = 0; i < 32; i++) { one_allowed = vmx_ctl_allows_one_setting(trueval, i); zero_allowed = vmx_ctl_allows_zero_setting(trueval, i); KASSERT(one_allowed || zero_allowed, ("invalid zero/one setting for bit %d of ctl 0x%0x, " "truectl 0x%0x\n", i, ctl_reg, true_ctl_reg)); if (zero_allowed && !one_allowed) { /* b(i),c(i) */ if (ones_mask & (1 << i)) return (EINVAL); *retval &= ~(1 << i); } else if (one_allowed && !zero_allowed) { /* b(i),c(i) */ if (zeros_mask & (1 << i)) return (EINVAL); *retval |= 1 << i; } else { if (zeros_mask & (1 << i)) /* b(ii),c(ii) */ *retval &= ~(1 << i); else if (ones_mask & (1 << i)) /* b(ii), c(ii) */ *retval |= 1 << i; else if (!true_ctls_avail) *retval &= ~(1 << i); /* b(iii) */ else if (vmx_ctl_allows_zero_setting(val, i))/* c(iii)*/ *retval &= ~(1 << i); else if (vmx_ctl_allows_one_setting(val, i)) /* c(iv) */ *retval |= 1 << i; else { panic("vmx_set_ctlreg: unable to determine " "correct value of ctl bit %d for msr " "0x%0x and true msr 0x%0x", i, ctl_reg, true_ctl_reg); } } } return (0); } void msr_bitmap_initialize(char *bitmap) { memset(bitmap, 0xff, PAGE_SIZE); } int msr_bitmap_change_access(char *bitmap, u_int msr, int access) { int byte, bit; if (msr <= 0x00001FFF) byte = msr / 8; else if (msr >= 0xC0000000 && msr <= 0xC0001FFF) byte = 1024 + (msr - 0xC0000000) / 8; else return (EINVAL); bit = msr & 0x7; if (access & MSR_BITMAP_ACCESS_READ) bitmap[byte] &= ~(1 << bit); else bitmap[byte] |= 1 << bit; byte += 2048; if (access & MSR_BITMAP_ACCESS_WRITE) bitmap[byte] &= ~(1 << bit); else bitmap[byte] |= 1 << bit; return (0); } static uint64_t misc_enable; static uint64_t platform_info; static uint64_t turbo_ratio_limit; static uint64_t host_msrs[GUEST_MSR_NUM]; static bool nehalem_cpu(void) { u_int family, model; /* * The family:model numbers belonging to the Nehalem microarchitecture * are documented in Section 35.5, Intel SDM dated Feb 2014. */ family = CPUID_TO_FAMILY(cpu_id); model = CPUID_TO_MODEL(cpu_id); if (family == 0x6) { switch (model) { case 0x1A: case 0x1E: case 0x1F: case 0x2E: return (true); default: break; } } return (false); } static bool westmere_cpu(void) { u_int family, model; /* * The family:model numbers belonging to the Westmere microarchitecture * are documented in Section 35.6, Intel SDM dated Feb 2014. */ family = CPUID_TO_FAMILY(cpu_id); model = CPUID_TO_MODEL(cpu_id); if (family == 0x6) { switch (model) { case 0x25: case 0x2C: return (true); default: break; } } return (false); } static bool pat_valid(uint64_t val) { int i, pa; /* * From Intel SDM: Table "Memory Types That Can Be Encoded With PAT" * * Extract PA0 through PA7 and validate that each one encodes a * valid memory type. */ for (i = 0; i < 8; i++) { pa = (val >> (i * 8)) & 0xff; if (pa == 2 || pa == 3 || pa >= 8) return (false); } return (true); } void vmx_msr_init(void) { uint64_t bus_freq, ratio; int i; /* * It is safe to cache the values of the following MSRs because * they don't change based on curcpu, curproc or curthread. */ host_msrs[IDX_MSR_LSTAR] = rdmsr(MSR_LSTAR); host_msrs[IDX_MSR_CSTAR] = rdmsr(MSR_CSTAR); host_msrs[IDX_MSR_STAR] = rdmsr(MSR_STAR); host_msrs[IDX_MSR_SF_MASK] = rdmsr(MSR_SF_MASK); /* * Initialize emulated MSRs */ misc_enable = rdmsr(MSR_IA32_MISC_ENABLE); /* * Set mandatory bits * 11: branch trace disabled * 12: PEBS unavailable * Clear unsupported features * 16: SpeedStep enable * 18: enable MONITOR FSM */ misc_enable |= (1 << 12) | (1 << 11); misc_enable &= ~((1 << 18) | (1 << 16)); if (nehalem_cpu() || westmere_cpu()) bus_freq = 133330000; /* 133Mhz */ else bus_freq = 100000000; /* 100Mhz */ /* * XXXtime * The ratio should really be based on the virtual TSC frequency as * opposed to the host TSC. */ ratio = (tsc_freq / bus_freq) & 0xff; /* * The register definition is based on the micro-architecture * but the following bits are always the same: * [15:8] Maximum Non-Turbo Ratio * [28] Programmable Ratio Limit for Turbo Mode * [29] Programmable TDC-TDP Limit for Turbo Mode * [47:40] Maximum Efficiency Ratio * * The other bits can be safely set to 0 on all * micro-architectures up to Haswell. */ platform_info = (ratio << 8) | (ratio << 40); /* * The number of valid bits in the MSR_TURBO_RATIO_LIMITx register is * dependent on the maximum cores per package supported by the micro- * architecture. For e.g., Westmere supports 6 cores per package and * uses the low 48 bits. Sandybridge support 8 cores per package and * uses up all 64 bits. * * However, the unused bits are reserved so we pretend that all bits * in this MSR are valid. */ for (i = 0; i < 8; i++) turbo_ratio_limit = (turbo_ratio_limit << 8) | ratio; } void vmx_msr_guest_init(struct vmx *vmx, int vcpuid) { uint64_t *guest_msrs; guest_msrs = vmx->guest_msrs[vcpuid]; /* * The permissions bitmap is shared between all vcpus so initialize it * once when initializing the vBSP. */ if (vcpuid == 0) { guest_msr_rw(vmx, MSR_LSTAR); guest_msr_rw(vmx, MSR_CSTAR); guest_msr_rw(vmx, MSR_STAR); guest_msr_rw(vmx, MSR_SF_MASK); guest_msr_rw(vmx, MSR_KGSBASE); } /* * Initialize guest IA32_PAT MSR with default value after reset. */ guest_msrs[IDX_MSR_PAT] = PAT_VALUE(0, PAT_WRITE_BACK) | PAT_VALUE(1, PAT_WRITE_THROUGH) | PAT_VALUE(2, PAT_UNCACHED) | PAT_VALUE(3, PAT_UNCACHEABLE) | PAT_VALUE(4, PAT_WRITE_BACK) | PAT_VALUE(5, PAT_WRITE_THROUGH) | PAT_VALUE(6, PAT_UNCACHED) | PAT_VALUE(7, PAT_UNCACHEABLE); return; } void vmx_msr_guest_enter(struct vmx *vmx, int vcpuid) { uint64_t *guest_msrs = vmx->guest_msrs[vcpuid]; /* Save host MSRs (in particular, KGSBASE) and restore guest MSRs */ update_pcb_bases(curpcb); wrmsr(MSR_LSTAR, guest_msrs[IDX_MSR_LSTAR]); wrmsr(MSR_CSTAR, guest_msrs[IDX_MSR_CSTAR]); wrmsr(MSR_STAR, guest_msrs[IDX_MSR_STAR]); wrmsr(MSR_SF_MASK, guest_msrs[IDX_MSR_SF_MASK]); wrmsr(MSR_KGSBASE, guest_msrs[IDX_MSR_KGSBASE]); } void vmx_msr_guest_enter_tsc_aux(struct vmx *vmx, int vcpuid) { uint64_t guest_tsc_aux = vmx->guest_msrs[vcpuid][IDX_MSR_TSC_AUX]; uint32_t host_aux = cpu_auxmsr(); if (vmx_have_msr_tsc_aux(vmx) && guest_tsc_aux != host_aux) wrmsr(MSR_TSC_AUX, guest_tsc_aux); } void vmx_msr_guest_exit(struct vmx *vmx, int vcpuid) { uint64_t *guest_msrs = vmx->guest_msrs[vcpuid]; /* Save guest MSRs */ guest_msrs[IDX_MSR_LSTAR] = rdmsr(MSR_LSTAR); guest_msrs[IDX_MSR_CSTAR] = rdmsr(MSR_CSTAR); guest_msrs[IDX_MSR_STAR] = rdmsr(MSR_STAR); guest_msrs[IDX_MSR_SF_MASK] = rdmsr(MSR_SF_MASK); guest_msrs[IDX_MSR_KGSBASE] = rdmsr(MSR_KGSBASE); /* Restore host MSRs */ wrmsr(MSR_LSTAR, host_msrs[IDX_MSR_LSTAR]); wrmsr(MSR_CSTAR, host_msrs[IDX_MSR_CSTAR]); wrmsr(MSR_STAR, host_msrs[IDX_MSR_STAR]); wrmsr(MSR_SF_MASK, host_msrs[IDX_MSR_SF_MASK]); /* MSR_KGSBASE will be restored on the way back to userspace */ } void vmx_msr_guest_exit_tsc_aux(struct vmx *vmx, int vcpuid) { uint64_t guest_tsc_aux = vmx->guest_msrs[vcpuid][IDX_MSR_TSC_AUX]; uint32_t host_aux = cpu_auxmsr(); if (vmx_have_msr_tsc_aux(vmx) && guest_tsc_aux != host_aux) /* * Note that it is not necessary to save the guest value * here; vmx->guest_msrs[vcpuid][IDX_MSR_TSC_AUX] always * contains the current value since it is updated whenever * the guest writes to it (which is expected to be very * rare). */ wrmsr(MSR_TSC_AUX, host_aux); } int vmx_rdmsr(struct vmx *vmx, int vcpuid, u_int num, uint64_t *val, bool *retu) { const uint64_t *guest_msrs; int error; guest_msrs = vmx->guest_msrs[vcpuid]; error = 0; switch (num) { case MSR_MCG_CAP: case MSR_MCG_STATUS: *val = 0; break; case MSR_MTRRcap: case MSR_MTRRdefType: - case MSR_MTRR4kBase ... MSR_MTRR4kBase + 8: + case MSR_MTRR4kBase ... MSR_MTRR4kBase + 7: case MSR_MTRR16kBase ... MSR_MTRR16kBase + 1: case MSR_MTRR64kBase: - *val = 0; + case MSR_MTRRVarBase ... MSR_MTRRVarBase + (VMM_MTRR_VAR_MAX * 2) - 1: + if (vm_rdmtrr(&vmx->mtrr[vcpuid], num, val) != 0) { + vm_inject_gp(vmx->vm, vcpuid); + } break; case MSR_IA32_MISC_ENABLE: *val = misc_enable; break; case MSR_PLATFORM_INFO: *val = platform_info; break; case MSR_TURBO_RATIO_LIMIT: case MSR_TURBO_RATIO_LIMIT1: *val = turbo_ratio_limit; break; case MSR_PAT: *val = guest_msrs[IDX_MSR_PAT]; break; default: error = EINVAL; break; } return (error); } int vmx_wrmsr(struct vmx *vmx, int vcpuid, u_int num, uint64_t val, bool *retu) { uint64_t *guest_msrs; uint64_t changed; int error; guest_msrs = vmx->guest_msrs[vcpuid]; error = 0; switch (num) { case MSR_MCG_CAP: case MSR_MCG_STATUS: break; /* ignore writes */ case MSR_MTRRcap: - vm_inject_gp(vmx->vm, vcpuid); - break; case MSR_MTRRdefType: - case MSR_MTRR4kBase ... MSR_MTRR4kBase + 8: + case MSR_MTRR4kBase ... MSR_MTRR4kBase + 7: case MSR_MTRR16kBase ... MSR_MTRR16kBase + 1: case MSR_MTRR64kBase: - break; /* Ignore writes */ + case MSR_MTRRVarBase ... MSR_MTRRVarBase + (VMM_MTRR_VAR_MAX * 2) - 1: + if (vm_wrmtrr(&vmx->mtrr[vcpuid], num, val) != 0) { + vm_inject_gp(vmx->vm, vcpuid); + } + break; case MSR_IA32_MISC_ENABLE: changed = val ^ misc_enable; /* * If the host has disabled the NX feature then the guest * also cannot use it. However, a Linux guest will try to * enable the NX feature by writing to the MISC_ENABLE MSR. * * This can be safely ignored because the memory management * code looks at CPUID.80000001H:EDX.NX to check if the * functionality is actually enabled. */ changed &= ~(1UL << 34); /* * Punt to userspace if any other bits are being modified. */ if (changed) error = EINVAL; break; case MSR_PAT: if (pat_valid(val)) guest_msrs[IDX_MSR_PAT] = val; else vm_inject_gp(vmx->vm, vcpuid); break; case MSR_TSC: error = vmx_set_tsc_offset(vmx, vcpuid, val - rdtsc()); break; case MSR_TSC_AUX: if (vmx_have_msr_tsc_aux(vmx)) /* * vmx_msr_guest_enter_tsc_aux() will apply this * value when it is called immediately before guest * entry. */ guest_msrs[IDX_MSR_TSC_AUX] = val; else vm_inject_gp(vmx->vm, vcpuid); break; default: error = EINVAL; break; } return (error); } diff --git a/sys/amd64/vmm/x86.c b/sys/amd64/vmm/x86.c index c43a3c870211..c97cb91af4f6 100644 --- a/sys/amd64/vmm/x86.c +++ b/sys/amd64/vmm/x86.c @@ -1,650 +1,732 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2011 NetApp, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include "vmm_host.h" #include "vmm_ktr.h" #include "vmm_util.h" #include "x86.h" SYSCTL_DECL(_hw_vmm); static SYSCTL_NODE(_hw_vmm, OID_AUTO, topology, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, NULL); #define CPUID_VM_HIGH 0x40000000 static const char bhyve_id[12] = "bhyve bhyve "; static uint64_t bhyve_xcpuids; SYSCTL_ULONG(_hw_vmm, OID_AUTO, bhyve_xcpuids, CTLFLAG_RW, &bhyve_xcpuids, 0, "Number of times an unknown cpuid leaf was accessed"); #if __FreeBSD_version < 1200060 /* Remove after 11 EOL helps MFCing */ extern u_int threads_per_core; SYSCTL_UINT(_hw_vmm_topology, OID_AUTO, threads_per_core, CTLFLAG_RDTUN, &threads_per_core, 0, NULL); extern u_int cores_per_package; SYSCTL_UINT(_hw_vmm_topology, OID_AUTO, cores_per_package, CTLFLAG_RDTUN, &cores_per_package, 0, NULL); #endif static int cpuid_leaf_b = 1; SYSCTL_INT(_hw_vmm_topology, OID_AUTO, cpuid_leaf_b, CTLFLAG_RDTUN, &cpuid_leaf_b, 0, NULL); /* * Round up to the next power of two, if necessary, and then take log2. * Returns -1 if argument is zero. */ static __inline int log2(u_int x) { return (fls(x << (1 - powerof2(x))) - 1); } int x86_emulate_cpuid(struct vm *vm, int vcpu_id, uint64_t *rax, uint64_t *rbx, uint64_t *rcx, uint64_t *rdx) { const struct xsave_limits *limits; uint64_t cr4; int error, enable_invpcid, enable_rdpid, enable_rdtscp, level, width, x2apic_id; unsigned int func, regs[4], logical_cpus, param; enum x2apic_state x2apic_state; uint16_t cores, maxcpus, sockets, threads; /* * The function of CPUID is controlled through the provided value of * %eax (and secondarily %ecx, for certain leaf data). */ func = (uint32_t)*rax; param = (uint32_t)*rcx; VCPU_CTR2(vm, vcpu_id, "cpuid %#x,%#x", func, param); /* * Requests for invalid CPUID levels should map to the highest * available level instead. */ if (cpu_exthigh != 0 && func >= 0x80000000) { if (func > cpu_exthigh) func = cpu_exthigh; } else if (func >= 0x40000000) { if (func > CPUID_VM_HIGH) func = CPUID_VM_HIGH; } else if (func > cpu_high) { func = cpu_high; } /* * In general the approach used for CPU topology is to * advertise a flat topology where all CPUs are packages with * no multi-core or SMT. */ switch (func) { /* * Pass these through to the guest */ case CPUID_0000_0000: case CPUID_0000_0002: case CPUID_0000_0003: case CPUID_8000_0000: case CPUID_8000_0002: case CPUID_8000_0003: case CPUID_8000_0004: case CPUID_8000_0006: cpuid_count(func, param, regs); break; case CPUID_8000_0008: cpuid_count(func, param, regs); if (vmm_is_svm()) { /* * As on Intel (0000_0007:0, EDX), mask out * unsupported or unsafe AMD extended features * (8000_0008 EBX). */ regs[1] &= (AMDFEID_CLZERO | AMDFEID_IRPERF | AMDFEID_XSAVEERPTR); vm_get_topology(vm, &sockets, &cores, &threads, &maxcpus); /* * Here, width is ApicIdCoreIdSize, present on * at least Family 15h and newer. It * represents the "number of bits in the * initial apicid that indicate thread id * within a package." * * Our topo_probe_amd() uses it for * pkg_id_shift and other OSes may rely on it. */ width = MIN(0xF, log2(threads * cores)); if (width < 0x4) width = 0; logical_cpus = MIN(0xFF, threads * cores - 1); regs[2] = (width << AMDID_COREID_SIZE_SHIFT) | logical_cpus; } break; case CPUID_8000_0001: cpuid_count(func, param, regs); /* * Hide SVM from guest. */ regs[2] &= ~AMDID2_SVM; /* * Don't advertise extended performance counter MSRs * to the guest. */ regs[2] &= ~AMDID2_PCXC; regs[2] &= ~AMDID2_PNXC; regs[2] &= ~AMDID2_PTSCEL2I; /* * Don't advertise Instruction Based Sampling feature. */ regs[2] &= ~AMDID2_IBS; /* NodeID MSR not available */ regs[2] &= ~AMDID2_NODE_ID; /* Don't advertise the OS visible workaround feature */ regs[2] &= ~AMDID2_OSVW; /* Hide mwaitx/monitorx capability from the guest */ regs[2] &= ~AMDID2_MWAITX; /* Advertise RDTSCP if it is enabled. */ error = vm_get_capability(vm, vcpu_id, VM_CAP_RDTSCP, &enable_rdtscp); if (error == 0 && enable_rdtscp) regs[3] |= AMDID_RDTSCP; else regs[3] &= ~AMDID_RDTSCP; break; case CPUID_8000_0007: /* * AMD uses this leaf to advertise the processor's * power monitoring and RAS capabilities. These * features are hardware-specific and exposing * them to a guest doesn't make a lot of sense. * * Intel uses this leaf only to advertise the * "Invariant TSC" feature with all other bits * being reserved (set to zero). */ regs[0] = 0; regs[1] = 0; regs[2] = 0; regs[3] = 0; /* * "Invariant TSC" can be advertised to the guest if: * - host TSC frequency is invariant * - host TSCs are synchronized across physical cpus * * XXX This still falls short because the vcpu * can observe the TSC moving backwards as it * migrates across physical cpus. But at least * it should discourage the guest from using the * TSC to keep track of time. */ if (tsc_is_invariant && smp_tsc) regs[3] |= AMDPM_TSC_INVARIANT; break; case CPUID_8000_001D: /* AMD Cache topology, like 0000_0004 for Intel. */ if (!vmm_is_svm()) goto default_leaf; /* * Similar to Intel, generate a ficticious cache * topology for the guest with L3 shared by the * package, and L1 and L2 local to a core. */ vm_get_topology(vm, &sockets, &cores, &threads, &maxcpus); switch (param) { case 0: logical_cpus = threads; level = 1; func = 1; /* data cache */ break; case 1: logical_cpus = threads; level = 2; func = 3; /* unified cache */ break; case 2: logical_cpus = threads * cores; level = 3; func = 3; /* unified cache */ break; default: logical_cpus = 0; level = 0; func = 0; break; } logical_cpus = MIN(0xfff, logical_cpus - 1); regs[0] = (logical_cpus << 14) | (1 << 8) | (level << 5) | func; regs[1] = (func > 0) ? (CACHE_LINE_SIZE - 1) : 0; regs[2] = 0; regs[3] = 0; break; case CPUID_8000_001E: /* * AMD Family 16h+ and Hygon Family 18h additional * identifiers. */ if (!vmm_is_svm() || CPUID_TO_FAMILY(cpu_id) < 0x16) goto default_leaf; vm_get_topology(vm, &sockets, &cores, &threads, &maxcpus); regs[0] = vcpu_id; threads = MIN(0xFF, threads - 1); regs[1] = (threads << 8) | (vcpu_id >> log2(threads + 1)); /* * XXX Bhyve topology cannot yet represent >1 node per * processor. */ regs[2] = 0; regs[3] = 0; break; case CPUID_0000_0001: do_cpuid(1, regs); error = vm_get_x2apic_state(vm, vcpu_id, &x2apic_state); if (error) { panic("x86_emulate_cpuid: error %d " "fetching x2apic state", error); } /* * Override the APIC ID only in ebx */ regs[1] &= ~(CPUID_LOCAL_APIC_ID); regs[1] |= (vcpu_id << CPUID_0000_0001_APICID_SHIFT); /* * Don't expose VMX, SpeedStep, TME or SMX capability. * Advertise x2APIC capability and Hypervisor guest. */ regs[2] &= ~(CPUID2_VMX | CPUID2_EST | CPUID2_TM2); regs[2] &= ~(CPUID2_SMX); regs[2] |= CPUID2_HV; if (x2apic_state != X2APIC_DISABLED) regs[2] |= CPUID2_X2APIC; else regs[2] &= ~CPUID2_X2APIC; /* * Only advertise CPUID2_XSAVE in the guest if * the host is using XSAVE. */ if (!(regs[2] & CPUID2_OSXSAVE)) regs[2] &= ~CPUID2_XSAVE; /* * If CPUID2_XSAVE is being advertised and the * guest has set CR4_XSAVE, set * CPUID2_OSXSAVE. */ regs[2] &= ~CPUID2_OSXSAVE; if (regs[2] & CPUID2_XSAVE) { error = vm_get_register(vm, vcpu_id, VM_REG_GUEST_CR4, &cr4); if (error) panic("x86_emulate_cpuid: error %d " "fetching %%cr4", error); if (cr4 & CR4_XSAVE) regs[2] |= CPUID2_OSXSAVE; } /* * Hide monitor/mwait until we know how to deal with * these instructions. */ regs[2] &= ~CPUID2_MON; /* * Hide the performance and debug features. */ regs[2] &= ~CPUID2_PDCM; /* * No TSC deadline support in the APIC yet */ regs[2] &= ~CPUID2_TSCDLT; /* * Hide thermal monitoring */ regs[3] &= ~(CPUID_ACPI | CPUID_TM); /* * Hide the debug store capability. */ regs[3] &= ~CPUID_DS; /* * Advertise the Machine Check and MTRR capability. * * Some guest OSes (e.g. Windows) will not boot if * these features are absent. */ regs[3] |= (CPUID_MCA | CPUID_MCE | CPUID_MTRR); vm_get_topology(vm, &sockets, &cores, &threads, &maxcpus); logical_cpus = threads * cores; regs[1] &= ~CPUID_HTT_CORES; regs[1] |= (logical_cpus & 0xff) << 16; regs[3] |= CPUID_HTT; break; case CPUID_0000_0004: cpuid_count(func, param, regs); if (regs[0] || regs[1] || regs[2] || regs[3]) { vm_get_topology(vm, &sockets, &cores, &threads, &maxcpus); regs[0] &= 0x3ff; regs[0] |= (cores - 1) << 26; /* * Cache topology: * - L1 and L2 are shared only by the logical * processors in a single core. * - L3 and above are shared by all logical * processors in the package. */ logical_cpus = threads; level = (regs[0] >> 5) & 0x7; if (level >= 3) logical_cpus *= cores; regs[0] |= (logical_cpus - 1) << 14; } break; case CPUID_0000_0007: regs[0] = 0; regs[1] = 0; regs[2] = 0; regs[3] = 0; /* leaf 0 */ if (param == 0) { cpuid_count(func, param, regs); /* Only leaf 0 is supported */ regs[0] = 0; /* * Expose known-safe features. */ regs[1] &= (CPUID_STDEXT_FSGSBASE | CPUID_STDEXT_BMI1 | CPUID_STDEXT_HLE | CPUID_STDEXT_AVX2 | CPUID_STDEXT_SMEP | CPUID_STDEXT_BMI2 | CPUID_STDEXT_ERMS | CPUID_STDEXT_RTM | CPUID_STDEXT_AVX512F | CPUID_STDEXT_RDSEED | CPUID_STDEXT_SMAP | CPUID_STDEXT_AVX512PF | CPUID_STDEXT_AVX512ER | CPUID_STDEXT_AVX512CD | CPUID_STDEXT_SHA); regs[2] = 0; regs[3] &= CPUID_STDEXT3_MD_CLEAR; /* Advertise RDPID if it is enabled. */ error = vm_get_capability(vm, vcpu_id, VM_CAP_RDPID, &enable_rdpid); if (error == 0 && enable_rdpid) regs[2] |= CPUID_STDEXT2_RDPID; /* Advertise INVPCID if it is enabled. */ error = vm_get_capability(vm, vcpu_id, VM_CAP_ENABLE_INVPCID, &enable_invpcid); if (error == 0 && enable_invpcid) regs[1] |= CPUID_STDEXT_INVPCID; } break; case CPUID_0000_0006: regs[0] = CPUTPM1_ARAT; regs[1] = 0; regs[2] = 0; regs[3] = 0; break; case CPUID_0000_000A: /* * Handle the access, but report 0 for * all options */ regs[0] = 0; regs[1] = 0; regs[2] = 0; regs[3] = 0; break; case CPUID_0000_000B: /* * Intel processor topology enumeration */ if (vmm_is_intel()) { vm_get_topology(vm, &sockets, &cores, &threads, &maxcpus); if (param == 0) { logical_cpus = threads; width = log2(logical_cpus); level = CPUID_TYPE_SMT; x2apic_id = vcpu_id; } if (param == 1) { logical_cpus = threads * cores; width = log2(logical_cpus); level = CPUID_TYPE_CORE; x2apic_id = vcpu_id; } if (!cpuid_leaf_b || param >= 2) { width = 0; logical_cpus = 0; level = 0; x2apic_id = 0; } regs[0] = width & 0x1f; regs[1] = logical_cpus & 0xffff; regs[2] = (level << 8) | (param & 0xff); regs[3] = x2apic_id; } else { regs[0] = 0; regs[1] = 0; regs[2] = 0; regs[3] = 0; } break; case CPUID_0000_000D: limits = vmm_get_xsave_limits(); if (!limits->xsave_enabled) { regs[0] = 0; regs[1] = 0; regs[2] = 0; regs[3] = 0; break; } cpuid_count(func, param, regs); switch (param) { case 0: /* * Only permit the guest to use bits * that are active in the host in * %xcr0. Also, claim that the * maximum save area size is * equivalent to the host's current * save area size. Since this runs * "inside" of vmrun(), it runs with * the guest's xcr0, so the current * save area size is correct as-is. */ regs[0] &= limits->xcr0_allowed; regs[2] = limits->xsave_max_size; regs[3] &= (limits->xcr0_allowed >> 32); break; case 1: /* Only permit XSAVEOPT. */ regs[0] &= CPUID_EXTSTATE_XSAVEOPT; regs[1] = 0; regs[2] = 0; regs[3] = 0; break; default: /* * If the leaf is for a permitted feature, * pass through as-is, otherwise return * all zeroes. */ if (!(limits->xcr0_allowed & (1ul << param))) { regs[0] = 0; regs[1] = 0; regs[2] = 0; regs[3] = 0; } break; } break; case CPUID_0000_0015: /* * Don't report CPU TSC/Crystal ratio and clock * values since guests may use these to derive the * local APIC frequency.. */ regs[0] = 0; regs[1] = 0; regs[2] = 0; regs[3] = 0; break; case 0x40000000: regs[0] = CPUID_VM_HIGH; bcopy(bhyve_id, ®s[1], 4); bcopy(bhyve_id + 4, ®s[2], 4); bcopy(bhyve_id + 8, ®s[3], 4); break; default: default_leaf: /* * The leaf value has already been clamped so * simply pass this through, keeping count of * how many unhandled leaf values have been seen. */ atomic_add_long(&bhyve_xcpuids, 1); cpuid_count(func, param, regs); break; } /* * CPUID clears the upper 32-bits of the long-mode registers. */ *rax = regs[0]; *rbx = regs[1]; *rcx = regs[2]; *rdx = regs[3]; return (1); } bool vm_cpuid_capability(struct vm *vm, int vcpuid, enum vm_cpuid_capability cap) { bool rv; KASSERT(cap > 0 && cap < VCC_LAST, ("%s: invalid vm_cpu_capability %d", __func__, cap)); /* * Simply passthrough the capabilities of the host cpu for now. */ rv = false; switch (cap) { case VCC_NO_EXECUTE: if (amd_feature & AMDID_NX) rv = true; break; case VCC_FFXSR: if (amd_feature & AMDID_FFXSR) rv = true; break; case VCC_TCE: if (amd_feature2 & AMDID2_TCE) rv = true; break; default: panic("%s: unknown vm_cpu_capability %d", __func__, cap); } return (rv); } + +int +vm_rdmtrr(struct vm_mtrr *mtrr, u_int num, uint64_t *val) +{ + switch (num) { + case MSR_MTRRcap: + *val = MTRR_CAP_WC | MTRR_CAP_FIXED | VMM_MTRR_VAR_MAX; + break; + case MSR_MTRRdefType: + *val = mtrr->def_type; + break; + case MSR_MTRR4kBase ... MSR_MTRR4kBase + 7: + *val = mtrr->fixed4k[num - MSR_MTRR4kBase]; + break; + case MSR_MTRR16kBase ... MSR_MTRR16kBase + 1: + *val = mtrr->fixed16k[num - MSR_MTRR16kBase]; + break; + case MSR_MTRR64kBase: + *val = mtrr->fixed64k; + break; + case MSR_MTRRVarBase ... MSR_MTRRVarBase + (VMM_MTRR_VAR_MAX * 2) - 1: { + u_int offset = num - MSR_MTRRVarBase; + if (offset % 2 == 0) { + *val = mtrr->var[offset / 2].base; + } else { + *val = mtrr->var[offset / 2].mask; + } + break; + } + default: + return (-1); + } + + return (0); +} + +int +vm_wrmtrr(struct vm_mtrr *mtrr, u_int num, uint64_t val) +{ + switch (num) { + case MSR_MTRRcap: + /* MTRRCAP is read only */ + return (-1); + case MSR_MTRRdefType: + if (val & ~VMM_MTRR_DEF_MASK) { + /* generate #GP on writes to reserved fields */ + return (-1); + } + mtrr->def_type = val; + break; + case MSR_MTRR4kBase ... MSR_MTRR4kBase + 7: + mtrr->fixed4k[num - MSR_MTRR4kBase] = val; + break; + case MSR_MTRR16kBase ... MSR_MTRR16kBase + 1: + mtrr->fixed16k[num - MSR_MTRR16kBase] = val; + break; + case MSR_MTRR64kBase: + mtrr->fixed64k = val; + break; + case MSR_MTRRVarBase ... MSR_MTRRVarBase + (VMM_MTRR_VAR_MAX * 2) - 1: { + u_int offset = num - MSR_MTRRVarBase; + if (offset % 2 == 0) { + if (val & ~VMM_MTRR_PHYSBASE_MASK) { + /* generate #GP on writes to reserved fields */ + return (-1); + } + mtrr->var[offset / 2].base = val; + } else { + if (val & ~VMM_MTRR_PHYSMASK_MASK) { + /* generate #GP on writes to reserved fields */ + return (-1); + } + mtrr->var[offset / 2].mask = val; + } + break; + } + default: + return (-1); + } + + return (0); +} diff --git a/sys/amd64/vmm/x86.h b/sys/amd64/vmm/x86.h index 7c8fccf78f28..318f0b5cf871 100644 --- a/sys/amd64/vmm/x86.h +++ b/sys/amd64/vmm/x86.h @@ -1,83 +1,103 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2011 NetApp, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _X86_H_ #define _X86_H_ #define CPUID_0000_0000 (0x0) #define CPUID_0000_0001 (0x1) #define CPUID_0000_0002 (0x2) #define CPUID_0000_0003 (0x3) #define CPUID_0000_0004 (0x4) #define CPUID_0000_0006 (0x6) #define CPUID_0000_0007 (0x7) #define CPUID_0000_000A (0xA) #define CPUID_0000_000B (0xB) #define CPUID_0000_000D (0xD) #define CPUID_0000_0015 (0x15) #define CPUID_8000_0000 (0x80000000) #define CPUID_8000_0001 (0x80000001) #define CPUID_8000_0002 (0x80000002) #define CPUID_8000_0003 (0x80000003) #define CPUID_8000_0004 (0x80000004) #define CPUID_8000_0006 (0x80000006) #define CPUID_8000_0007 (0x80000007) #define CPUID_8000_0008 (0x80000008) #define CPUID_8000_001D (0x8000001D) #define CPUID_8000_001E (0x8000001E) /* * CPUID instruction Fn0000_0001: */ #define CPUID_0000_0001_APICID_MASK (0xff<<24) #define CPUID_0000_0001_APICID_SHIFT 24 /* * CPUID instruction Fn0000_0001 ECX */ #define CPUID_0000_0001_FEAT0_VMX (1<<5) int x86_emulate_cpuid(struct vm *vm, int vcpu_id, uint64_t *rax, uint64_t *rbx, uint64_t *rcx, uint64_t *rdx); enum vm_cpuid_capability { VCC_NONE, VCC_NO_EXECUTE, VCC_FFXSR, VCC_TCE, VCC_LAST }; /* * Return 'true' if the capability 'cap' is enabled in this virtual cpu * and 'false' otherwise. */ bool vm_cpuid_capability(struct vm *vm, int vcpuid, enum vm_cpuid_capability); + +#define VMM_MTRR_VAR_MAX 10 +#define VMM_MTRR_DEF_MASK \ + (MTRR_DEF_ENABLE | MTRR_DEF_FIXED_ENABLE | MTRR_DEF_TYPE) +#define VMM_MTRR_PHYSBASE_MASK (MTRR_PHYSBASE_PHYSBASE | MTRR_PHYSBASE_TYPE) +#define VMM_MTRR_PHYSMASK_MASK (MTRR_PHYSMASK_PHYSMASK | MTRR_PHYSMASK_VALID) +struct vm_mtrr { + uint64_t def_type; + uint64_t fixed4k[8]; + uint64_t fixed16k[2]; + uint64_t fixed64k; + struct { + uint64_t base; + uint64_t mask; + } var[VMM_MTRR_VAR_MAX]; +}; + +int vm_rdmtrr(struct vm_mtrr *mtrr, u_int num, uint64_t *val); +int vm_wrmtrr(struct vm_mtrr *mtrr, u_int num, uint64_t val); + #endif