diff --git a/sys/amd64/include/vmm_dev.h b/sys/amd64/include/vmm_dev.h index aa265315f15e..a007ce5515a5 100644 --- a/sys/amd64/include/vmm_dev.h +++ b/sys/amd64/include/vmm_dev.h @@ -1,479 +1,474 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2011 NetApp, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #ifndef _VMM_DEV_H_ #define _VMM_DEV_H_ struct vm_snapshot_meta; -#ifdef _KERNEL -void vmmdev_init(void); -int vmmdev_cleanup(void); -#endif - struct vm_memmap { vm_paddr_t gpa; int segid; /* memory segment */ vm_ooffset_t segoff; /* offset into memory segment */ size_t len; /* mmap length */ int prot; /* RWX */ int flags; }; #define VM_MEMMAP_F_WIRED 0x01 #define VM_MEMMAP_F_IOMMU 0x02 struct vm_munmap { vm_paddr_t gpa; size_t len; }; #define VM_MEMSEG_NAME(m) ((m)->name[0] != '\0' ? (m)->name : NULL) struct vm_memseg { int segid; size_t len; char name[VM_MAX_SUFFIXLEN + 1]; }; struct vm_memseg_fbsd12 { int segid; size_t len; char name[64]; }; _Static_assert(sizeof(struct vm_memseg_fbsd12) == 80, "COMPAT_FREEBSD12 ABI"); struct vm_register { int cpuid; int regnum; /* enum vm_reg_name */ uint64_t regval; }; struct vm_seg_desc { /* data or code segment */ int cpuid; int regnum; /* enum vm_reg_name */ struct seg_desc desc; }; struct vm_register_set { int cpuid; unsigned int count; const int *regnums; /* enum vm_reg_name */ uint64_t *regvals; }; struct vm_run { int cpuid; cpuset_t *cpuset; /* CPU set storage */ size_t cpusetsize; struct vm_exit *vm_exit; }; struct vm_exception { int cpuid; int vector; uint32_t error_code; int error_code_valid; int restart_instruction; }; struct vm_lapic_msi { uint64_t msg; uint64_t addr; }; struct vm_lapic_irq { int cpuid; int vector; }; struct vm_ioapic_irq { int irq; }; struct vm_isa_irq { int atpic_irq; int ioapic_irq; }; struct vm_isa_irq_trigger { int atpic_irq; enum vm_intr_trigger trigger; }; struct vm_capability { int cpuid; enum vm_cap_type captype; int capval; int allcpus; }; struct vm_pptdev { int bus; int slot; int func; }; struct vm_pptdev_mmio { int bus; int slot; int func; vm_paddr_t gpa; vm_paddr_t hpa; size_t len; }; struct vm_pptdev_msi { int vcpu; /* unused */ int bus; int slot; int func; int numvec; /* 0 means disabled */ uint64_t msg; uint64_t addr; }; struct vm_pptdev_msix { int vcpu; /* unused */ int bus; int slot; int func; int idx; uint64_t msg; uint32_t vector_control; uint64_t addr; }; struct vm_nmi { int cpuid; }; #define MAX_VM_STATS 64 struct vm_stats { int cpuid; /* in */ int index; /* in */ int num_entries; /* out */ struct timeval tv; uint64_t statbuf[MAX_VM_STATS]; }; struct vm_stat_desc { int index; /* in */ char desc[128]; /* out */ }; struct vm_x2apic { int cpuid; enum x2apic_state state; }; struct vm_gpa_pte { uint64_t gpa; /* in */ uint64_t pte[4]; /* out */ int ptenum; }; struct vm_hpet_cap { uint32_t capabilities; /* lower 32 bits of HPET capabilities */ }; struct vm_suspend { enum vm_suspend_how how; }; struct vm_gla2gpa { int vcpuid; /* inputs */ int prot; /* PROT_READ or PROT_WRITE */ uint64_t gla; struct vm_guest_paging paging; int fault; /* outputs */ uint64_t gpa; }; struct vm_activate_cpu { int vcpuid; }; struct vm_cpuset { int which; int cpusetsize; cpuset_t *cpus; }; #define VM_ACTIVE_CPUS 0 #define VM_SUSPENDED_CPUS 1 #define VM_DEBUG_CPUS 2 struct vm_intinfo { int vcpuid; uint64_t info1; uint64_t info2; }; struct vm_rtc_time { time_t secs; }; struct vm_rtc_data { int offset; uint8_t value; }; struct vm_cpu_topology { uint16_t sockets; uint16_t cores; uint16_t threads; uint16_t maxcpus; }; struct vm_readwrite_kernemu_device { int vcpuid; unsigned access_width : 3; unsigned _unused : 29; uint64_t gpa; uint64_t value; }; _Static_assert(sizeof(struct vm_readwrite_kernemu_device) == 24, "ABI"); enum { /* general routines */ IOCNUM_ABIVERS = 0, IOCNUM_RUN = 1, IOCNUM_SET_CAPABILITY = 2, IOCNUM_GET_CAPABILITY = 3, IOCNUM_SUSPEND = 4, IOCNUM_REINIT = 5, /* memory apis */ IOCNUM_MAP_MEMORY = 10, /* deprecated */ IOCNUM_GET_MEMORY_SEG = 11, /* deprecated */ IOCNUM_GET_GPA_PMAP = 12, IOCNUM_GLA2GPA = 13, IOCNUM_ALLOC_MEMSEG = 14, IOCNUM_GET_MEMSEG = 15, IOCNUM_MMAP_MEMSEG = 16, IOCNUM_MMAP_GETNEXT = 17, IOCNUM_GLA2GPA_NOFAULT = 18, IOCNUM_MUNMAP_MEMSEG = 19, /* register/state accessors */ IOCNUM_SET_REGISTER = 20, IOCNUM_GET_REGISTER = 21, IOCNUM_SET_SEGMENT_DESCRIPTOR = 22, IOCNUM_GET_SEGMENT_DESCRIPTOR = 23, IOCNUM_SET_REGISTER_SET = 24, IOCNUM_GET_REGISTER_SET = 25, IOCNUM_GET_KERNEMU_DEV = 26, IOCNUM_SET_KERNEMU_DEV = 27, /* interrupt injection */ IOCNUM_GET_INTINFO = 28, IOCNUM_SET_INTINFO = 29, IOCNUM_INJECT_EXCEPTION = 30, IOCNUM_LAPIC_IRQ = 31, IOCNUM_INJECT_NMI = 32, IOCNUM_IOAPIC_ASSERT_IRQ = 33, IOCNUM_IOAPIC_DEASSERT_IRQ = 34, IOCNUM_IOAPIC_PULSE_IRQ = 35, IOCNUM_LAPIC_MSI = 36, IOCNUM_LAPIC_LOCAL_IRQ = 37, IOCNUM_IOAPIC_PINCOUNT = 38, IOCNUM_RESTART_INSTRUCTION = 39, /* PCI pass-thru */ IOCNUM_BIND_PPTDEV = 40, IOCNUM_UNBIND_PPTDEV = 41, IOCNUM_MAP_PPTDEV_MMIO = 42, IOCNUM_PPTDEV_MSI = 43, IOCNUM_PPTDEV_MSIX = 44, IOCNUM_PPTDEV_DISABLE_MSIX = 45, IOCNUM_UNMAP_PPTDEV_MMIO = 46, /* statistics */ IOCNUM_VM_STATS = 50, IOCNUM_VM_STAT_DESC = 51, /* kernel device state */ IOCNUM_SET_X2APIC_STATE = 60, IOCNUM_GET_X2APIC_STATE = 61, IOCNUM_GET_HPET_CAPABILITIES = 62, /* CPU Topology */ IOCNUM_SET_TOPOLOGY = 63, IOCNUM_GET_TOPOLOGY = 64, /* legacy interrupt injection */ IOCNUM_ISA_ASSERT_IRQ = 80, IOCNUM_ISA_DEASSERT_IRQ = 81, IOCNUM_ISA_PULSE_IRQ = 82, IOCNUM_ISA_SET_IRQ_TRIGGER = 83, /* vm_cpuset */ IOCNUM_ACTIVATE_CPU = 90, IOCNUM_GET_CPUSET = 91, IOCNUM_SUSPEND_CPU = 92, IOCNUM_RESUME_CPU = 93, /* RTC */ IOCNUM_RTC_READ = 100, IOCNUM_RTC_WRITE = 101, IOCNUM_RTC_SETTIME = 102, IOCNUM_RTC_GETTIME = 103, /* checkpoint */ IOCNUM_SNAPSHOT_REQ = 113, IOCNUM_RESTORE_TIME = 115 }; #define VM_RUN \ _IOW('v', IOCNUM_RUN, struct vm_run) #define VM_SUSPEND \ _IOW('v', IOCNUM_SUSPEND, struct vm_suspend) #define VM_REINIT \ _IO('v', IOCNUM_REINIT) #define VM_ALLOC_MEMSEG_FBSD12 \ _IOW('v', IOCNUM_ALLOC_MEMSEG, struct vm_memseg_fbsd12) #define VM_ALLOC_MEMSEG \ _IOW('v', IOCNUM_ALLOC_MEMSEG, struct vm_memseg) #define VM_GET_MEMSEG_FBSD12 \ _IOWR('v', IOCNUM_GET_MEMSEG, struct vm_memseg_fbsd12) #define VM_GET_MEMSEG \ _IOWR('v', IOCNUM_GET_MEMSEG, struct vm_memseg) #define VM_MMAP_MEMSEG \ _IOW('v', IOCNUM_MMAP_MEMSEG, struct vm_memmap) #define VM_MMAP_GETNEXT \ _IOWR('v', IOCNUM_MMAP_GETNEXT, struct vm_memmap) #define VM_MUNMAP_MEMSEG \ _IOW('v', IOCNUM_MUNMAP_MEMSEG, struct vm_munmap) #define VM_SET_REGISTER \ _IOW('v', IOCNUM_SET_REGISTER, struct vm_register) #define VM_GET_REGISTER \ _IOWR('v', IOCNUM_GET_REGISTER, struct vm_register) #define VM_SET_SEGMENT_DESCRIPTOR \ _IOW('v', IOCNUM_SET_SEGMENT_DESCRIPTOR, struct vm_seg_desc) #define VM_GET_SEGMENT_DESCRIPTOR \ _IOWR('v', IOCNUM_GET_SEGMENT_DESCRIPTOR, struct vm_seg_desc) #define VM_SET_REGISTER_SET \ _IOW('v', IOCNUM_SET_REGISTER_SET, struct vm_register_set) #define VM_GET_REGISTER_SET \ _IOWR('v', IOCNUM_GET_REGISTER_SET, struct vm_register_set) #define VM_SET_KERNEMU_DEV \ _IOW('v', IOCNUM_SET_KERNEMU_DEV, \ struct vm_readwrite_kernemu_device) #define VM_GET_KERNEMU_DEV \ _IOWR('v', IOCNUM_GET_KERNEMU_DEV, \ struct vm_readwrite_kernemu_device) #define VM_INJECT_EXCEPTION \ _IOW('v', IOCNUM_INJECT_EXCEPTION, struct vm_exception) #define VM_LAPIC_IRQ \ _IOW('v', IOCNUM_LAPIC_IRQ, struct vm_lapic_irq) #define VM_LAPIC_LOCAL_IRQ \ _IOW('v', IOCNUM_LAPIC_LOCAL_IRQ, struct vm_lapic_irq) #define VM_LAPIC_MSI \ _IOW('v', IOCNUM_LAPIC_MSI, struct vm_lapic_msi) #define VM_IOAPIC_ASSERT_IRQ \ _IOW('v', IOCNUM_IOAPIC_ASSERT_IRQ, struct vm_ioapic_irq) #define VM_IOAPIC_DEASSERT_IRQ \ _IOW('v', IOCNUM_IOAPIC_DEASSERT_IRQ, struct vm_ioapic_irq) #define VM_IOAPIC_PULSE_IRQ \ _IOW('v', IOCNUM_IOAPIC_PULSE_IRQ, struct vm_ioapic_irq) #define VM_IOAPIC_PINCOUNT \ _IOR('v', IOCNUM_IOAPIC_PINCOUNT, int) #define VM_ISA_ASSERT_IRQ \ _IOW('v', IOCNUM_ISA_ASSERT_IRQ, struct vm_isa_irq) #define VM_ISA_DEASSERT_IRQ \ _IOW('v', IOCNUM_ISA_DEASSERT_IRQ, struct vm_isa_irq) #define VM_ISA_PULSE_IRQ \ _IOW('v', IOCNUM_ISA_PULSE_IRQ, struct vm_isa_irq) #define VM_ISA_SET_IRQ_TRIGGER \ _IOW('v', IOCNUM_ISA_SET_IRQ_TRIGGER, struct vm_isa_irq_trigger) #define VM_SET_CAPABILITY \ _IOW('v', IOCNUM_SET_CAPABILITY, struct vm_capability) #define VM_GET_CAPABILITY \ _IOWR('v', IOCNUM_GET_CAPABILITY, struct vm_capability) #define VM_BIND_PPTDEV \ _IOW('v', IOCNUM_BIND_PPTDEV, struct vm_pptdev) #define VM_UNBIND_PPTDEV \ _IOW('v', IOCNUM_UNBIND_PPTDEV, struct vm_pptdev) #define VM_MAP_PPTDEV_MMIO \ _IOW('v', IOCNUM_MAP_PPTDEV_MMIO, struct vm_pptdev_mmio) #define VM_PPTDEV_MSI \ _IOW('v', IOCNUM_PPTDEV_MSI, struct vm_pptdev_msi) #define VM_PPTDEV_MSIX \ _IOW('v', IOCNUM_PPTDEV_MSIX, struct vm_pptdev_msix) #define VM_PPTDEV_DISABLE_MSIX \ _IOW('v', IOCNUM_PPTDEV_DISABLE_MSIX, struct vm_pptdev) #define VM_UNMAP_PPTDEV_MMIO \ _IOW('v', IOCNUM_UNMAP_PPTDEV_MMIO, struct vm_pptdev_mmio) #define VM_INJECT_NMI \ _IOW('v', IOCNUM_INJECT_NMI, struct vm_nmi) #define VM_STATS \ _IOWR('v', IOCNUM_VM_STATS, struct vm_stats) #define VM_STAT_DESC \ _IOWR('v', IOCNUM_VM_STAT_DESC, struct vm_stat_desc) #define VM_SET_X2APIC_STATE \ _IOW('v', IOCNUM_SET_X2APIC_STATE, struct vm_x2apic) #define VM_GET_X2APIC_STATE \ _IOWR('v', IOCNUM_GET_X2APIC_STATE, struct vm_x2apic) #define VM_GET_HPET_CAPABILITIES \ _IOR('v', IOCNUM_GET_HPET_CAPABILITIES, struct vm_hpet_cap) #define VM_SET_TOPOLOGY \ _IOW('v', IOCNUM_SET_TOPOLOGY, struct vm_cpu_topology) #define VM_GET_TOPOLOGY \ _IOR('v', IOCNUM_GET_TOPOLOGY, struct vm_cpu_topology) #define VM_GET_GPA_PMAP \ _IOWR('v', IOCNUM_GET_GPA_PMAP, struct vm_gpa_pte) #define VM_GLA2GPA \ _IOWR('v', IOCNUM_GLA2GPA, struct vm_gla2gpa) #define VM_GLA2GPA_NOFAULT \ _IOWR('v', IOCNUM_GLA2GPA_NOFAULT, struct vm_gla2gpa) #define VM_ACTIVATE_CPU \ _IOW('v', IOCNUM_ACTIVATE_CPU, struct vm_activate_cpu) #define VM_GET_CPUS \ _IOW('v', IOCNUM_GET_CPUSET, struct vm_cpuset) #define VM_SUSPEND_CPU \ _IOW('v', IOCNUM_SUSPEND_CPU, struct vm_activate_cpu) #define VM_RESUME_CPU \ _IOW('v', IOCNUM_RESUME_CPU, struct vm_activate_cpu) #define VM_SET_INTINFO \ _IOW('v', IOCNUM_SET_INTINFO, struct vm_intinfo) #define VM_GET_INTINFO \ _IOWR('v', IOCNUM_GET_INTINFO, struct vm_intinfo) #define VM_RTC_WRITE \ _IOW('v', IOCNUM_RTC_WRITE, struct vm_rtc_data) #define VM_RTC_READ \ _IOWR('v', IOCNUM_RTC_READ, struct vm_rtc_data) #define VM_RTC_SETTIME \ _IOW('v', IOCNUM_RTC_SETTIME, struct vm_rtc_time) #define VM_RTC_GETTIME \ _IOR('v', IOCNUM_RTC_GETTIME, struct vm_rtc_time) #define VM_RESTART_INSTRUCTION \ _IOW('v', IOCNUM_RESTART_INSTRUCTION, int) #define VM_SNAPSHOT_REQ \ _IOWR('v', IOCNUM_SNAPSHOT_REQ, struct vm_snapshot_meta) #define VM_RESTORE_TIME \ _IOWR('v', IOCNUM_RESTORE_TIME, int) #endif diff --git a/sys/amd64/vmm/vmm.c b/sys/amd64/vmm/vmm.c index d0259b3b125f..20006e63cfeb 100644 --- a/sys/amd64/vmm/vmm.c +++ b/sys/amd64/vmm/vmm.c @@ -1,3066 +1,3066 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2011 NetApp, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include #include "opt_bhyve_snapshot.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include -#include #include #include +#include #include #include "vmm_ioport.h" #include "vmm_host.h" #include "vmm_mem.h" #include "vmm_util.h" #include "vatpic.h" #include "vatpit.h" #include "vhpet.h" #include "vioapic.h" #include "vlapic.h" #include "vpmtmr.h" #include "vrtc.h" #include "vmm_stat.h" #include "vmm_lapic.h" #include "io/ppt.h" #include "io/iommu.h" struct vlapic; /* * Initialization: * (a) allocated when vcpu is created * (i) initialized when vcpu is created and when it is reinitialized * (o) initialized the first time the vcpu is created * (x) initialized before use */ struct vcpu { struct mtx mtx; /* (o) protects 'state' and 'hostcpu' */ enum vcpu_state state; /* (o) vcpu state */ int vcpuid; /* (o) */ int hostcpu; /* (o) vcpu's host cpu */ int reqidle; /* (i) request vcpu to idle */ struct vm *vm; /* (o) */ void *cookie; /* (i) cpu-specific data */ struct vlapic *vlapic; /* (i) APIC device model */ enum x2apic_state x2apic_state; /* (i) APIC mode */ uint64_t exitintinfo; /* (i) events pending at VM exit */ int nmi_pending; /* (i) NMI pending */ int extint_pending; /* (i) INTR pending */ int exception_pending; /* (i) exception pending */ int exc_vector; /* (x) exception collateral */ int exc_errcode_valid; uint32_t exc_errcode; struct savefpu *guestfpu; /* (a,i) guest fpu state */ uint64_t guest_xcr0; /* (i) guest %xcr0 register */ void *stats; /* (a,i) statistics */ struct vm_exit exitinfo; /* (x) exit reason and collateral */ cpuset_t exitinfo_cpuset; /* (x) storage for vmexit handlers */ uint64_t nextrip; /* (x) next instruction to execute */ uint64_t tsc_offset; /* (o) TSC offsetting */ }; #define vcpu_lock_init(v) mtx_init(&((v)->mtx), "vcpu lock", 0, MTX_SPIN) #define vcpu_lock_destroy(v) mtx_destroy(&((v)->mtx)) #define vcpu_lock(v) mtx_lock_spin(&((v)->mtx)) #define vcpu_unlock(v) mtx_unlock_spin(&((v)->mtx)) #define vcpu_assert_locked(v) mtx_assert(&((v)->mtx), MA_OWNED) struct mem_seg { size_t len; bool sysmem; struct vm_object *object; }; #define VM_MAX_MEMSEGS 4 struct mem_map { vm_paddr_t gpa; size_t len; vm_ooffset_t segoff; int segid; int prot; int flags; }; #define VM_MAX_MEMMAPS 8 /* * Initialization: * (o) initialized the first time the VM is created * (i) initialized when VM is created and when it is reinitialized * (x) initialized before use * * Locking: * [m] mem_segs_lock * [r] rendezvous_mtx * [v] reads require one frozen vcpu, writes require freezing all vcpus */ struct vm { void *cookie; /* (i) cpu-specific data */ void *iommu; /* (x) iommu-specific data */ struct vhpet *vhpet; /* (i) virtual HPET */ struct vioapic *vioapic; /* (i) virtual ioapic */ struct vatpic *vatpic; /* (i) virtual atpic */ struct vatpit *vatpit; /* (i) virtual atpit */ struct vpmtmr *vpmtmr; /* (i) virtual ACPI PM timer */ struct vrtc *vrtc; /* (o) virtual RTC */ volatile cpuset_t active_cpus; /* (i) active vcpus */ volatile cpuset_t debug_cpus; /* (i) vcpus stopped for debug */ cpuset_t startup_cpus; /* (i) [r] waiting for startup */ int suspend; /* (i) stop VM execution */ bool dying; /* (o) is dying */ volatile cpuset_t suspended_cpus; /* (i) suspended vcpus */ volatile cpuset_t halted_cpus; /* (x) cpus in a hard halt */ cpuset_t rendezvous_req_cpus; /* (x) [r] rendezvous requested */ cpuset_t rendezvous_done_cpus; /* (x) [r] rendezvous finished */ void *rendezvous_arg; /* (x) [r] rendezvous func/arg */ vm_rendezvous_func_t rendezvous_func; struct mtx rendezvous_mtx; /* (o) rendezvous lock */ struct mem_map mem_maps[VM_MAX_MEMMAPS]; /* (i) [m+v] guest address space */ struct mem_seg mem_segs[VM_MAX_MEMSEGS]; /* (o) [m+v] guest memory regions */ struct vmspace *vmspace; /* (o) guest's address space */ char name[VM_MAX_NAMELEN+1]; /* (o) virtual machine name */ struct vcpu **vcpu; /* (o) guest vcpus */ /* The following describe the vm cpu topology */ uint16_t sockets; /* (o) num of sockets */ uint16_t cores; /* (o) num of cores/socket */ uint16_t threads; /* (o) num of threads/core */ uint16_t maxcpus; /* (o) max pluggable cpus */ struct sx mem_segs_lock; /* (o) */ struct sx vcpus_init_lock; /* (o) */ }; #define VMM_CTR0(vcpu, format) \ VCPU_CTR0((vcpu)->vm, (vcpu)->vcpuid, format) #define VMM_CTR1(vcpu, format, p1) \ VCPU_CTR1((vcpu)->vm, (vcpu)->vcpuid, format, p1) #define VMM_CTR2(vcpu, format, p1, p2) \ VCPU_CTR2((vcpu)->vm, (vcpu)->vcpuid, format, p1, p2) #define VMM_CTR3(vcpu, format, p1, p2, p3) \ VCPU_CTR3((vcpu)->vm, (vcpu)->vcpuid, format, p1, p2, p3) #define VMM_CTR4(vcpu, format, p1, p2, p3, p4) \ VCPU_CTR4((vcpu)->vm, (vcpu)->vcpuid, format, p1, p2, p3, p4) static int vmm_initialized; static void vmmops_panic(void); static void vmmops_panic(void) { panic("vmm_ops func called when !vmm_is_intel() && !vmm_is_svm()"); } #define DEFINE_VMMOPS_IFUNC(ret_type, opname, args) \ DEFINE_IFUNC(static, ret_type, vmmops_##opname, args) \ { \ if (vmm_is_intel()) \ return (vmm_ops_intel.opname); \ else if (vmm_is_svm()) \ return (vmm_ops_amd.opname); \ else \ return ((ret_type (*)args)vmmops_panic); \ } DEFINE_VMMOPS_IFUNC(int, modinit, (int ipinum)) DEFINE_VMMOPS_IFUNC(int, modcleanup, (void)) DEFINE_VMMOPS_IFUNC(void, modresume, (void)) DEFINE_VMMOPS_IFUNC(void *, init, (struct vm *vm, struct pmap *pmap)) DEFINE_VMMOPS_IFUNC(int, run, (void *vcpui, register_t rip, struct pmap *pmap, struct vm_eventinfo *info)) DEFINE_VMMOPS_IFUNC(void, cleanup, (void *vmi)) DEFINE_VMMOPS_IFUNC(void *, vcpu_init, (void *vmi, struct vcpu *vcpu, int vcpu_id)) DEFINE_VMMOPS_IFUNC(void, vcpu_cleanup, (void *vcpui)) DEFINE_VMMOPS_IFUNC(int, getreg, (void *vcpui, int num, uint64_t *retval)) DEFINE_VMMOPS_IFUNC(int, setreg, (void *vcpui, int num, uint64_t val)) DEFINE_VMMOPS_IFUNC(int, getdesc, (void *vcpui, int num, struct seg_desc *desc)) DEFINE_VMMOPS_IFUNC(int, setdesc, (void *vcpui, int num, struct seg_desc *desc)) DEFINE_VMMOPS_IFUNC(int, getcap, (void *vcpui, int num, int *retval)) DEFINE_VMMOPS_IFUNC(int, setcap, (void *vcpui, int num, int val)) DEFINE_VMMOPS_IFUNC(struct vmspace *, vmspace_alloc, (vm_offset_t min, vm_offset_t max)) DEFINE_VMMOPS_IFUNC(void, vmspace_free, (struct vmspace *vmspace)) DEFINE_VMMOPS_IFUNC(struct vlapic *, vlapic_init, (void *vcpui)) DEFINE_VMMOPS_IFUNC(void, vlapic_cleanup, (struct vlapic *vlapic)) #ifdef BHYVE_SNAPSHOT DEFINE_VMMOPS_IFUNC(int, vcpu_snapshot, (void *vcpui, struct vm_snapshot_meta *meta)) DEFINE_VMMOPS_IFUNC(int, restore_tsc, (void *vcpui, uint64_t now)) #endif SDT_PROVIDER_DEFINE(vmm); static MALLOC_DEFINE(M_VM, "vm", "vm"); /* statistics */ static VMM_STAT(VCPU_TOTAL_RUNTIME, "vcpu total runtime"); SYSCTL_NODE(_hw, OID_AUTO, vmm, CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, NULL); /* * Halt the guest if all vcpus are executing a HLT instruction with * interrupts disabled. */ static int halt_detection_enabled = 1; SYSCTL_INT(_hw_vmm, OID_AUTO, halt_detection, CTLFLAG_RDTUN, &halt_detection_enabled, 0, "Halt VM if all vcpus execute HLT with interrupts disabled"); static int vmm_ipinum; SYSCTL_INT(_hw_vmm, OID_AUTO, ipinum, CTLFLAG_RD, &vmm_ipinum, 0, "IPI vector used for vcpu notifications"); static int trace_guest_exceptions; SYSCTL_INT(_hw_vmm, OID_AUTO, trace_guest_exceptions, CTLFLAG_RDTUN, &trace_guest_exceptions, 0, "Trap into hypervisor on all guest exceptions and reflect them back"); static int trap_wbinvd; SYSCTL_INT(_hw_vmm, OID_AUTO, trap_wbinvd, CTLFLAG_RDTUN, &trap_wbinvd, 0, "WBINVD triggers a VM-exit"); u_int vm_maxcpu; SYSCTL_UINT(_hw_vmm, OID_AUTO, maxcpu, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, &vm_maxcpu, 0, "Maximum number of vCPUs"); static void vm_free_memmap(struct vm *vm, int ident); static bool sysmem_mapping(struct vm *vm, struct mem_map *mm); static void vcpu_notify_event_locked(struct vcpu *vcpu, bool lapic_intr); /* global statistics */ VMM_STAT(VCPU_MIGRATIONS, "vcpu migration across host cpus"); VMM_STAT(VMEXIT_COUNT, "total number of vm exits"); VMM_STAT(VMEXIT_EXTINT, "vm exits due to external interrupt"); VMM_STAT(VMEXIT_HLT, "number of times hlt was intercepted"); VMM_STAT(VMEXIT_CR_ACCESS, "number of times %cr access was intercepted"); VMM_STAT(VMEXIT_RDMSR, "number of times rdmsr was intercepted"); VMM_STAT(VMEXIT_WRMSR, "number of times wrmsr was intercepted"); VMM_STAT(VMEXIT_MTRAP, "number of monitor trap exits"); VMM_STAT(VMEXIT_PAUSE, "number of times pause was intercepted"); VMM_STAT(VMEXIT_INTR_WINDOW, "vm exits due to interrupt window opening"); VMM_STAT(VMEXIT_NMI_WINDOW, "vm exits due to nmi window opening"); VMM_STAT(VMEXIT_INOUT, "number of times in/out was intercepted"); VMM_STAT(VMEXIT_CPUID, "number of times cpuid was intercepted"); VMM_STAT(VMEXIT_NESTED_FAULT, "vm exits due to nested page fault"); VMM_STAT(VMEXIT_INST_EMUL, "vm exits for instruction emulation"); VMM_STAT(VMEXIT_UNKNOWN, "number of vm exits for unknown reason"); VMM_STAT(VMEXIT_ASTPENDING, "number of times astpending at exit"); VMM_STAT(VMEXIT_REQIDLE, "number of times idle requested at exit"); VMM_STAT(VMEXIT_USERSPACE, "number of vm exits handled in userspace"); VMM_STAT(VMEXIT_RENDEZVOUS, "number of times rendezvous pending at exit"); VMM_STAT(VMEXIT_EXCEPTION, "number of vm exits due to exceptions"); /* * Upper limit on vm_maxcpu. Limited by use of uint16_t types for CPU * counts as well as range of vpid values for VT-x and by the capacity * of cpuset_t masks. The call to new_unrhdr() in vpid_init() in * vmx.c requires 'vm_maxcpu + 1 <= 0xffff', hence the '- 1' below. */ #define VM_MAXCPU MIN(0xffff - 1, CPU_SETSIZE) #ifdef KTR static const char * vcpu_state2str(enum vcpu_state state) { switch (state) { case VCPU_IDLE: return ("idle"); case VCPU_FROZEN: return ("frozen"); case VCPU_RUNNING: return ("running"); case VCPU_SLEEPING: return ("sleeping"); default: return ("unknown"); } } #endif static void vcpu_cleanup(struct vcpu *vcpu, bool destroy) { vmmops_vlapic_cleanup(vcpu->vlapic); vmmops_vcpu_cleanup(vcpu->cookie); vcpu->cookie = NULL; if (destroy) { vmm_stat_free(vcpu->stats); fpu_save_area_free(vcpu->guestfpu); vcpu_lock_destroy(vcpu); free(vcpu, M_VM); } } static struct vcpu * vcpu_alloc(struct vm *vm, int vcpu_id) { struct vcpu *vcpu; KASSERT(vcpu_id >= 0 && vcpu_id < vm->maxcpus, ("vcpu_init: invalid vcpu %d", vcpu_id)); vcpu = malloc(sizeof(*vcpu), M_VM, M_WAITOK | M_ZERO); vcpu_lock_init(vcpu); vcpu->state = VCPU_IDLE; vcpu->hostcpu = NOCPU; vcpu->vcpuid = vcpu_id; vcpu->vm = vm; vcpu->guestfpu = fpu_save_area_alloc(); vcpu->stats = vmm_stat_alloc(); vcpu->tsc_offset = 0; return (vcpu); } static void vcpu_init(struct vcpu *vcpu) { vcpu->cookie = vmmops_vcpu_init(vcpu->vm->cookie, vcpu, vcpu->vcpuid); vcpu->vlapic = vmmops_vlapic_init(vcpu->cookie); vm_set_x2apic_state(vcpu, X2APIC_DISABLED); vcpu->reqidle = 0; vcpu->exitintinfo = 0; vcpu->nmi_pending = 0; vcpu->extint_pending = 0; vcpu->exception_pending = 0; vcpu->guest_xcr0 = XFEATURE_ENABLED_X87; fpu_save_area_reset(vcpu->guestfpu); vmm_stat_init(vcpu->stats); } int vcpu_trace_exceptions(struct vcpu *vcpu) { return (trace_guest_exceptions); } int vcpu_trap_wbinvd(struct vcpu *vcpu) { return (trap_wbinvd); } struct vm_exit * vm_exitinfo(struct vcpu *vcpu) { return (&vcpu->exitinfo); } cpuset_t * vm_exitinfo_cpuset(struct vcpu *vcpu) { return (&vcpu->exitinfo_cpuset); } static int vmm_init(void) { int error; if (!vmm_is_hw_supported()) return (ENXIO); vm_maxcpu = mp_ncpus; TUNABLE_INT_FETCH("hw.vmm.maxcpu", &vm_maxcpu); if (vm_maxcpu > VM_MAXCPU) { printf("vmm: vm_maxcpu clamped to %u\n", VM_MAXCPU); vm_maxcpu = VM_MAXCPU; } if (vm_maxcpu == 0) vm_maxcpu = 1; vmm_host_state_init(); vmm_ipinum = lapic_ipi_alloc(pti ? &IDTVEC(justreturn1_pti) : &IDTVEC(justreturn)); if (vmm_ipinum < 0) vmm_ipinum = IPI_AST; error = vmm_mem_init(); if (error) return (error); vmm_resume_p = vmmops_modresume; return (vmmops_modinit(vmm_ipinum)); } static int vmm_handler(module_t mod, int what, void *arg) { int error; switch (what) { case MOD_LOAD: if (vmm_is_hw_supported()) { vmmdev_init(); error = vmm_init(); if (error == 0) vmm_initialized = 1; } else { error = ENXIO; } break; case MOD_UNLOAD: if (vmm_is_hw_supported()) { error = vmmdev_cleanup(); if (error == 0) { vmm_resume_p = NULL; iommu_cleanup(); if (vmm_ipinum != IPI_AST) lapic_ipi_free(vmm_ipinum); error = vmmops_modcleanup(); /* * Something bad happened - prevent new * VMs from being created */ if (error) vmm_initialized = 0; } } else { error = 0; } break; default: error = 0; break; } return (error); } static moduledata_t vmm_kmod = { "vmm", vmm_handler, NULL }; /* * vmm initialization has the following dependencies: * * - VT-x initialization requires smp_rendezvous() and therefore must happen * after SMP is fully functional (after SI_SUB_SMP). */ DECLARE_MODULE(vmm, vmm_kmod, SI_SUB_SMP + 1, SI_ORDER_ANY); MODULE_VERSION(vmm, 1); static void vm_init(struct vm *vm, bool create) { vm->cookie = vmmops_init(vm, vmspace_pmap(vm->vmspace)); vm->iommu = NULL; vm->vioapic = vioapic_init(vm); vm->vhpet = vhpet_init(vm); vm->vatpic = vatpic_init(vm); vm->vatpit = vatpit_init(vm); vm->vpmtmr = vpmtmr_init(vm); if (create) vm->vrtc = vrtc_init(vm); CPU_ZERO(&vm->active_cpus); CPU_ZERO(&vm->debug_cpus); CPU_ZERO(&vm->startup_cpus); vm->suspend = 0; CPU_ZERO(&vm->suspended_cpus); if (!create) { for (int i = 0; i < vm->maxcpus; i++) { if (vm->vcpu[i] != NULL) vcpu_init(vm->vcpu[i]); } } } void vm_disable_vcpu_creation(struct vm *vm) { sx_xlock(&vm->vcpus_init_lock); vm->dying = true; sx_xunlock(&vm->vcpus_init_lock); } struct vcpu * vm_alloc_vcpu(struct vm *vm, int vcpuid) { struct vcpu *vcpu; if (vcpuid < 0 || vcpuid >= vm_get_maxcpus(vm)) return (NULL); vcpu = atomic_load_ptr(&vm->vcpu[vcpuid]); if (__predict_true(vcpu != NULL)) return (vcpu); sx_xlock(&vm->vcpus_init_lock); vcpu = vm->vcpu[vcpuid]; if (vcpu == NULL && !vm->dying) { vcpu = vcpu_alloc(vm, vcpuid); vcpu_init(vcpu); /* * Ensure vCPU is fully created before updating pointer * to permit unlocked reads above. */ atomic_store_rel_ptr((uintptr_t *)&vm->vcpu[vcpuid], (uintptr_t)vcpu); } sx_xunlock(&vm->vcpus_init_lock); return (vcpu); } void vm_slock_vcpus(struct vm *vm) { sx_slock(&vm->vcpus_init_lock); } void vm_unlock_vcpus(struct vm *vm) { sx_unlock(&vm->vcpus_init_lock); } /* * The default CPU topology is a single thread per package. */ u_int cores_per_package = 1; u_int threads_per_core = 1; int vm_create(const char *name, struct vm **retvm) { struct vm *vm; struct vmspace *vmspace; /* * If vmm.ko could not be successfully initialized then don't attempt * to create the virtual machine. */ if (!vmm_initialized) return (ENXIO); if (name == NULL || strnlen(name, VM_MAX_NAMELEN + 1) == VM_MAX_NAMELEN + 1) return (EINVAL); vmspace = vmmops_vmspace_alloc(0, VM_MAXUSER_ADDRESS_LA48); if (vmspace == NULL) return (ENOMEM); vm = malloc(sizeof(struct vm), M_VM, M_WAITOK | M_ZERO); strcpy(vm->name, name); vm->vmspace = vmspace; mtx_init(&vm->rendezvous_mtx, "vm rendezvous lock", 0, MTX_DEF); sx_init(&vm->mem_segs_lock, "vm mem_segs"); sx_init(&vm->vcpus_init_lock, "vm vcpus"); vm->vcpu = malloc(sizeof(*vm->vcpu) * vm_maxcpu, M_VM, M_WAITOK | M_ZERO); vm->sockets = 1; vm->cores = cores_per_package; /* XXX backwards compatibility */ vm->threads = threads_per_core; /* XXX backwards compatibility */ vm->maxcpus = vm_maxcpu; vm_init(vm, true); *retvm = vm; return (0); } void vm_get_topology(struct vm *vm, uint16_t *sockets, uint16_t *cores, uint16_t *threads, uint16_t *maxcpus) { *sockets = vm->sockets; *cores = vm->cores; *threads = vm->threads; *maxcpus = vm->maxcpus; } uint16_t vm_get_maxcpus(struct vm *vm) { return (vm->maxcpus); } int vm_set_topology(struct vm *vm, uint16_t sockets, uint16_t cores, uint16_t threads, uint16_t maxcpus __unused) { /* Ignore maxcpus. */ if ((sockets * cores * threads) > vm->maxcpus) return (EINVAL); vm->sockets = sockets; vm->cores = cores; vm->threads = threads; return(0); } static void vm_cleanup(struct vm *vm, bool destroy) { struct mem_map *mm; int i; if (destroy) vm_xlock_memsegs(vm); ppt_unassign_all(vm); if (vm->iommu != NULL) iommu_destroy_domain(vm->iommu); if (destroy) vrtc_cleanup(vm->vrtc); else vrtc_reset(vm->vrtc); vpmtmr_cleanup(vm->vpmtmr); vatpit_cleanup(vm->vatpit); vhpet_cleanup(vm->vhpet); vatpic_cleanup(vm->vatpic); vioapic_cleanup(vm->vioapic); for (i = 0; i < vm->maxcpus; i++) { if (vm->vcpu[i] != NULL) vcpu_cleanup(vm->vcpu[i], destroy); } vmmops_cleanup(vm->cookie); /* * System memory is removed from the guest address space only when * the VM is destroyed. This is because the mapping remains the same * across VM reset. * * Device memory can be relocated by the guest (e.g. using PCI BARs) * so those mappings are removed on a VM reset. */ for (i = 0; i < VM_MAX_MEMMAPS; i++) { mm = &vm->mem_maps[i]; if (destroy || !sysmem_mapping(vm, mm)) vm_free_memmap(vm, i); } if (destroy) { for (i = 0; i < VM_MAX_MEMSEGS; i++) vm_free_memseg(vm, i); vm_unlock_memsegs(vm); vmmops_vmspace_free(vm->vmspace); vm->vmspace = NULL; free(vm->vcpu, M_VM); sx_destroy(&vm->vcpus_init_lock); sx_destroy(&vm->mem_segs_lock); mtx_destroy(&vm->rendezvous_mtx); } } void vm_destroy(struct vm *vm) { vm_cleanup(vm, true); free(vm, M_VM); } int vm_reinit(struct vm *vm) { int error; /* * A virtual machine can be reset only if all vcpus are suspended. */ if (CPU_CMP(&vm->suspended_cpus, &vm->active_cpus) == 0) { vm_cleanup(vm, false); vm_init(vm, false); error = 0; } else { error = EBUSY; } return (error); } const char * vm_name(struct vm *vm) { return (vm->name); } void vm_slock_memsegs(struct vm *vm) { sx_slock(&vm->mem_segs_lock); } void vm_xlock_memsegs(struct vm *vm) { sx_xlock(&vm->mem_segs_lock); } void vm_unlock_memsegs(struct vm *vm) { sx_unlock(&vm->mem_segs_lock); } int vm_map_mmio(struct vm *vm, vm_paddr_t gpa, size_t len, vm_paddr_t hpa) { vm_object_t obj; if ((obj = vmm_mmio_alloc(vm->vmspace, gpa, len, hpa)) == NULL) return (ENOMEM); else return (0); } int vm_unmap_mmio(struct vm *vm, vm_paddr_t gpa, size_t len) { vmm_mmio_free(vm->vmspace, gpa, len); return (0); } /* * Return 'true' if 'gpa' is allocated in the guest address space. * * This function is called in the context of a running vcpu which acts as * an implicit lock on 'vm->mem_maps[]'. */ bool vm_mem_allocated(struct vcpu *vcpu, vm_paddr_t gpa) { struct vm *vm = vcpu->vm; struct mem_map *mm; int i; #ifdef INVARIANTS int hostcpu, state; state = vcpu_get_state(vcpu, &hostcpu); KASSERT(state == VCPU_RUNNING && hostcpu == curcpu, ("%s: invalid vcpu state %d/%d", __func__, state, hostcpu)); #endif for (i = 0; i < VM_MAX_MEMMAPS; i++) { mm = &vm->mem_maps[i]; if (mm->len != 0 && gpa >= mm->gpa && gpa < mm->gpa + mm->len) return (true); /* 'gpa' is sysmem or devmem */ } if (ppt_is_mmio(vm, gpa)) return (true); /* 'gpa' is pci passthru mmio */ return (false); } int vm_alloc_memseg(struct vm *vm, int ident, size_t len, bool sysmem) { struct mem_seg *seg; vm_object_t obj; sx_assert(&vm->mem_segs_lock, SX_XLOCKED); if (ident < 0 || ident >= VM_MAX_MEMSEGS) return (EINVAL); if (len == 0 || (len & PAGE_MASK)) return (EINVAL); seg = &vm->mem_segs[ident]; if (seg->object != NULL) { if (seg->len == len && seg->sysmem == sysmem) return (EEXIST); else return (EINVAL); } obj = vm_object_allocate(OBJT_SWAP, len >> PAGE_SHIFT); if (obj == NULL) return (ENOMEM); seg->len = len; seg->object = obj; seg->sysmem = sysmem; return (0); } int vm_get_memseg(struct vm *vm, int ident, size_t *len, bool *sysmem, vm_object_t *objptr) { struct mem_seg *seg; sx_assert(&vm->mem_segs_lock, SX_LOCKED); if (ident < 0 || ident >= VM_MAX_MEMSEGS) return (EINVAL); seg = &vm->mem_segs[ident]; if (len) *len = seg->len; if (sysmem) *sysmem = seg->sysmem; if (objptr) *objptr = seg->object; return (0); } void vm_free_memseg(struct vm *vm, int ident) { struct mem_seg *seg; KASSERT(ident >= 0 && ident < VM_MAX_MEMSEGS, ("%s: invalid memseg ident %d", __func__, ident)); seg = &vm->mem_segs[ident]; if (seg->object != NULL) { vm_object_deallocate(seg->object); bzero(seg, sizeof(struct mem_seg)); } } int vm_mmap_memseg(struct vm *vm, vm_paddr_t gpa, int segid, vm_ooffset_t first, size_t len, int prot, int flags) { struct mem_seg *seg; struct mem_map *m, *map; vm_ooffset_t last; int i, error; if (prot == 0 || (prot & ~(VM_PROT_ALL)) != 0) return (EINVAL); if (flags & ~VM_MEMMAP_F_WIRED) return (EINVAL); if (segid < 0 || segid >= VM_MAX_MEMSEGS) return (EINVAL); seg = &vm->mem_segs[segid]; if (seg->object == NULL) return (EINVAL); last = first + len; if (first < 0 || first >= last || last > seg->len) return (EINVAL); if ((gpa | first | last) & PAGE_MASK) return (EINVAL); map = NULL; for (i = 0; i < VM_MAX_MEMMAPS; i++) { m = &vm->mem_maps[i]; if (m->len == 0) { map = m; break; } } if (map == NULL) return (ENOSPC); error = vm_map_find(&vm->vmspace->vm_map, seg->object, first, &gpa, len, 0, VMFS_NO_SPACE, prot, prot, 0); if (error != KERN_SUCCESS) return (EFAULT); vm_object_reference(seg->object); if (flags & VM_MEMMAP_F_WIRED) { error = vm_map_wire(&vm->vmspace->vm_map, gpa, gpa + len, VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES); if (error != KERN_SUCCESS) { vm_map_remove(&vm->vmspace->vm_map, gpa, gpa + len); return (error == KERN_RESOURCE_SHORTAGE ? ENOMEM : EFAULT); } } map->gpa = gpa; map->len = len; map->segoff = first; map->segid = segid; map->prot = prot; map->flags = flags; return (0); } int vm_munmap_memseg(struct vm *vm, vm_paddr_t gpa, size_t len) { struct mem_map *m; int i; for (i = 0; i < VM_MAX_MEMMAPS; i++) { m = &vm->mem_maps[i]; if (m->gpa == gpa && m->len == len && (m->flags & VM_MEMMAP_F_IOMMU) == 0) { vm_free_memmap(vm, i); return (0); } } return (EINVAL); } int vm_mmap_getnext(struct vm *vm, vm_paddr_t *gpa, int *segid, vm_ooffset_t *segoff, size_t *len, int *prot, int *flags) { struct mem_map *mm, *mmnext; int i; mmnext = NULL; for (i = 0; i < VM_MAX_MEMMAPS; i++) { mm = &vm->mem_maps[i]; if (mm->len == 0 || mm->gpa < *gpa) continue; if (mmnext == NULL || mm->gpa < mmnext->gpa) mmnext = mm; } if (mmnext != NULL) { *gpa = mmnext->gpa; if (segid) *segid = mmnext->segid; if (segoff) *segoff = mmnext->segoff; if (len) *len = mmnext->len; if (prot) *prot = mmnext->prot; if (flags) *flags = mmnext->flags; return (0); } else { return (ENOENT); } } static void vm_free_memmap(struct vm *vm, int ident) { struct mem_map *mm; int error __diagused; mm = &vm->mem_maps[ident]; if (mm->len) { error = vm_map_remove(&vm->vmspace->vm_map, mm->gpa, mm->gpa + mm->len); KASSERT(error == KERN_SUCCESS, ("%s: vm_map_remove error %d", __func__, error)); bzero(mm, sizeof(struct mem_map)); } } static __inline bool sysmem_mapping(struct vm *vm, struct mem_map *mm) { if (mm->len != 0 && vm->mem_segs[mm->segid].sysmem) return (true); else return (false); } vm_paddr_t vmm_sysmem_maxaddr(struct vm *vm) { struct mem_map *mm; vm_paddr_t maxaddr; int i; maxaddr = 0; for (i = 0; i < VM_MAX_MEMMAPS; i++) { mm = &vm->mem_maps[i]; if (sysmem_mapping(vm, mm)) { if (maxaddr < mm->gpa + mm->len) maxaddr = mm->gpa + mm->len; } } return (maxaddr); } static void vm_iommu_map(struct vm *vm) { vm_paddr_t gpa, hpa; struct mem_map *mm; int i; sx_assert(&vm->mem_segs_lock, SX_LOCKED); for (i = 0; i < VM_MAX_MEMMAPS; i++) { mm = &vm->mem_maps[i]; if (!sysmem_mapping(vm, mm)) continue; KASSERT((mm->flags & VM_MEMMAP_F_IOMMU) == 0, ("iommu map found invalid memmap %#lx/%#lx/%#x", mm->gpa, mm->len, mm->flags)); if ((mm->flags & VM_MEMMAP_F_WIRED) == 0) continue; mm->flags |= VM_MEMMAP_F_IOMMU; for (gpa = mm->gpa; gpa < mm->gpa + mm->len; gpa += PAGE_SIZE) { hpa = pmap_extract(vmspace_pmap(vm->vmspace), gpa); /* * All mappings in the vmm vmspace must be * present since they are managed by vmm in this way. * Because we are in pass-through mode, the * mappings must also be wired. This implies * that all pages must be mapped and wired, * allowing to use pmap_extract() and avoiding the * need to use vm_gpa_hold_global(). * * This could change if/when we start * supporting page faults on IOMMU maps. */ KASSERT(vm_page_wired(PHYS_TO_VM_PAGE(hpa)), ("vm_iommu_map: vm %p gpa %jx hpa %jx not wired", vm, (uintmax_t)gpa, (uintmax_t)hpa)); iommu_create_mapping(vm->iommu, gpa, hpa, PAGE_SIZE); } } iommu_invalidate_tlb(iommu_host_domain()); } static void vm_iommu_unmap(struct vm *vm) { vm_paddr_t gpa; struct mem_map *mm; int i; sx_assert(&vm->mem_segs_lock, SX_LOCKED); for (i = 0; i < VM_MAX_MEMMAPS; i++) { mm = &vm->mem_maps[i]; if (!sysmem_mapping(vm, mm)) continue; if ((mm->flags & VM_MEMMAP_F_IOMMU) == 0) continue; mm->flags &= ~VM_MEMMAP_F_IOMMU; KASSERT((mm->flags & VM_MEMMAP_F_WIRED) != 0, ("iommu unmap found invalid memmap %#lx/%#lx/%#x", mm->gpa, mm->len, mm->flags)); for (gpa = mm->gpa; gpa < mm->gpa + mm->len; gpa += PAGE_SIZE) { KASSERT(vm_page_wired(PHYS_TO_VM_PAGE(pmap_extract( vmspace_pmap(vm->vmspace), gpa))), ("vm_iommu_unmap: vm %p gpa %jx not wired", vm, (uintmax_t)gpa)); iommu_remove_mapping(vm->iommu, gpa, PAGE_SIZE); } } /* * Invalidate the cached translations associated with the domain * from which pages were removed. */ iommu_invalidate_tlb(vm->iommu); } int vm_unassign_pptdev(struct vm *vm, int bus, int slot, int func) { int error; error = ppt_unassign_device(vm, bus, slot, func); if (error) return (error); if (ppt_assigned_devices(vm) == 0) vm_iommu_unmap(vm); return (0); } int vm_assign_pptdev(struct vm *vm, int bus, int slot, int func) { int error; vm_paddr_t maxaddr; /* Set up the IOMMU to do the 'gpa' to 'hpa' translation */ if (ppt_assigned_devices(vm) == 0) { KASSERT(vm->iommu == NULL, ("vm_assign_pptdev: iommu must be NULL")); maxaddr = vmm_sysmem_maxaddr(vm); vm->iommu = iommu_create_domain(maxaddr); if (vm->iommu == NULL) return (ENXIO); vm_iommu_map(vm); } error = ppt_assign_device(vm, bus, slot, func); return (error); } static void * _vm_gpa_hold(struct vm *vm, vm_paddr_t gpa, size_t len, int reqprot, void **cookie) { int i, count, pageoff; struct mem_map *mm; vm_page_t m; pageoff = gpa & PAGE_MASK; if (len > PAGE_SIZE - pageoff) panic("vm_gpa_hold: invalid gpa/len: 0x%016lx/%lu", gpa, len); count = 0; for (i = 0; i < VM_MAX_MEMMAPS; i++) { mm = &vm->mem_maps[i]; if (gpa >= mm->gpa && gpa < mm->gpa + mm->len) { count = vm_fault_quick_hold_pages(&vm->vmspace->vm_map, trunc_page(gpa), PAGE_SIZE, reqprot, &m, 1); break; } } if (count == 1) { *cookie = m; return ((void *)(PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)) + pageoff)); } else { *cookie = NULL; return (NULL); } } void * vm_gpa_hold(struct vcpu *vcpu, vm_paddr_t gpa, size_t len, int reqprot, void **cookie) { #ifdef INVARIANTS /* * The current vcpu should be frozen to ensure 'vm_memmap[]' * stability. */ int state = vcpu_get_state(vcpu, NULL); KASSERT(state == VCPU_FROZEN, ("%s: invalid vcpu state %d", __func__, state)); #endif return (_vm_gpa_hold(vcpu->vm, gpa, len, reqprot, cookie)); } void * vm_gpa_hold_global(struct vm *vm, vm_paddr_t gpa, size_t len, int reqprot, void **cookie) { sx_assert(&vm->mem_segs_lock, SX_LOCKED); return (_vm_gpa_hold(vm, gpa, len, reqprot, cookie)); } void vm_gpa_release(void *cookie) { vm_page_t m = cookie; vm_page_unwire(m, PQ_ACTIVE); } int vm_get_register(struct vcpu *vcpu, int reg, uint64_t *retval) { if (reg >= VM_REG_LAST) return (EINVAL); return (vmmops_getreg(vcpu->cookie, reg, retval)); } int vm_set_register(struct vcpu *vcpu, int reg, uint64_t val) { int error; if (reg >= VM_REG_LAST) return (EINVAL); error = vmmops_setreg(vcpu->cookie, reg, val); if (error || reg != VM_REG_GUEST_RIP) return (error); /* Set 'nextrip' to match the value of %rip */ VMM_CTR1(vcpu, "Setting nextrip to %#lx", val); vcpu->nextrip = val; return (0); } static bool is_descriptor_table(int reg) { switch (reg) { case VM_REG_GUEST_IDTR: case VM_REG_GUEST_GDTR: return (true); default: return (false); } } static bool is_segment_register(int reg) { switch (reg) { case VM_REG_GUEST_ES: case VM_REG_GUEST_CS: case VM_REG_GUEST_SS: case VM_REG_GUEST_DS: case VM_REG_GUEST_FS: case VM_REG_GUEST_GS: case VM_REG_GUEST_TR: case VM_REG_GUEST_LDTR: return (true); default: return (false); } } int vm_get_seg_desc(struct vcpu *vcpu, int reg, struct seg_desc *desc) { if (!is_segment_register(reg) && !is_descriptor_table(reg)) return (EINVAL); return (vmmops_getdesc(vcpu->cookie, reg, desc)); } int vm_set_seg_desc(struct vcpu *vcpu, int reg, struct seg_desc *desc) { if (!is_segment_register(reg) && !is_descriptor_table(reg)) return (EINVAL); return (vmmops_setdesc(vcpu->cookie, reg, desc)); } static void restore_guest_fpustate(struct vcpu *vcpu) { /* flush host state to the pcb */ fpuexit(curthread); /* restore guest FPU state */ fpu_enable(); fpurestore(vcpu->guestfpu); /* restore guest XCR0 if XSAVE is enabled in the host */ if (rcr4() & CR4_XSAVE) load_xcr(0, vcpu->guest_xcr0); /* * The FPU is now "dirty" with the guest's state so disable * the FPU to trap any access by the host. */ fpu_disable(); } static void save_guest_fpustate(struct vcpu *vcpu) { if ((rcr0() & CR0_TS) == 0) panic("fpu emulation not enabled in host!"); /* save guest XCR0 and restore host XCR0 */ if (rcr4() & CR4_XSAVE) { vcpu->guest_xcr0 = rxcr(0); load_xcr(0, vmm_get_host_xcr0()); } /* save guest FPU state */ fpu_enable(); fpusave(vcpu->guestfpu); fpu_disable(); } static VMM_STAT(VCPU_IDLE_TICKS, "number of ticks vcpu was idle"); static int vcpu_set_state_locked(struct vcpu *vcpu, enum vcpu_state newstate, bool from_idle) { int error; vcpu_assert_locked(vcpu); /* * State transitions from the vmmdev_ioctl() must always begin from * the VCPU_IDLE state. This guarantees that there is only a single * ioctl() operating on a vcpu at any point. */ if (from_idle) { while (vcpu->state != VCPU_IDLE) { vcpu->reqidle = 1; vcpu_notify_event_locked(vcpu, false); VMM_CTR1(vcpu, "vcpu state change from %s to " "idle requested", vcpu_state2str(vcpu->state)); msleep_spin(&vcpu->state, &vcpu->mtx, "vmstat", hz); } } else { KASSERT(vcpu->state != VCPU_IDLE, ("invalid transition from " "vcpu idle state")); } if (vcpu->state == VCPU_RUNNING) { KASSERT(vcpu->hostcpu == curcpu, ("curcpu %d and hostcpu %d " "mismatch for running vcpu", curcpu, vcpu->hostcpu)); } else { KASSERT(vcpu->hostcpu == NOCPU, ("Invalid hostcpu %d for a " "vcpu that is not running", vcpu->hostcpu)); } /* * The following state transitions are allowed: * IDLE -> FROZEN -> IDLE * FROZEN -> RUNNING -> FROZEN * FROZEN -> SLEEPING -> FROZEN */ switch (vcpu->state) { case VCPU_IDLE: case VCPU_RUNNING: case VCPU_SLEEPING: error = (newstate != VCPU_FROZEN); break; case VCPU_FROZEN: error = (newstate == VCPU_FROZEN); break; default: error = 1; break; } if (error) return (EBUSY); VMM_CTR2(vcpu, "vcpu state changed from %s to %s", vcpu_state2str(vcpu->state), vcpu_state2str(newstate)); vcpu->state = newstate; if (newstate == VCPU_RUNNING) vcpu->hostcpu = curcpu; else vcpu->hostcpu = NOCPU; if (newstate == VCPU_IDLE) wakeup(&vcpu->state); return (0); } static void vcpu_require_state(struct vcpu *vcpu, enum vcpu_state newstate) { int error; if ((error = vcpu_set_state(vcpu, newstate, false)) != 0) panic("Error %d setting state to %d\n", error, newstate); } static void vcpu_require_state_locked(struct vcpu *vcpu, enum vcpu_state newstate) { int error; if ((error = vcpu_set_state_locked(vcpu, newstate, false)) != 0) panic("Error %d setting state to %d", error, newstate); } static int vm_handle_rendezvous(struct vcpu *vcpu) { struct vm *vm = vcpu->vm; struct thread *td; int error, vcpuid; error = 0; vcpuid = vcpu->vcpuid; td = curthread; mtx_lock(&vm->rendezvous_mtx); while (vm->rendezvous_func != NULL) { /* 'rendezvous_req_cpus' must be a subset of 'active_cpus' */ CPU_AND(&vm->rendezvous_req_cpus, &vm->rendezvous_req_cpus, &vm->active_cpus); if (CPU_ISSET(vcpuid, &vm->rendezvous_req_cpus) && !CPU_ISSET(vcpuid, &vm->rendezvous_done_cpus)) { VMM_CTR0(vcpu, "Calling rendezvous func"); (*vm->rendezvous_func)(vcpu, vm->rendezvous_arg); CPU_SET(vcpuid, &vm->rendezvous_done_cpus); } if (CPU_CMP(&vm->rendezvous_req_cpus, &vm->rendezvous_done_cpus) == 0) { VMM_CTR0(vcpu, "Rendezvous completed"); CPU_ZERO(&vm->rendezvous_req_cpus); vm->rendezvous_func = NULL; wakeup(&vm->rendezvous_func); break; } VMM_CTR0(vcpu, "Wait for rendezvous completion"); mtx_sleep(&vm->rendezvous_func, &vm->rendezvous_mtx, 0, "vmrndv", hz); if (td_ast_pending(td, TDA_SUSPEND)) { mtx_unlock(&vm->rendezvous_mtx); error = thread_check_susp(td, true); if (error != 0) return (error); mtx_lock(&vm->rendezvous_mtx); } } mtx_unlock(&vm->rendezvous_mtx); return (0); } /* * Emulate a guest 'hlt' by sleeping until the vcpu is ready to run. */ static int vm_handle_hlt(struct vcpu *vcpu, bool intr_disabled, bool *retu) { struct vm *vm = vcpu->vm; const char *wmesg; struct thread *td; int error, t, vcpuid, vcpu_halted, vm_halted; vcpuid = vcpu->vcpuid; vcpu_halted = 0; vm_halted = 0; error = 0; td = curthread; KASSERT(!CPU_ISSET(vcpuid, &vm->halted_cpus), ("vcpu already halted")); vcpu_lock(vcpu); while (1) { /* * Do a final check for pending NMI or interrupts before * really putting this thread to sleep. Also check for * software events that would cause this vcpu to wakeup. * * These interrupts/events could have happened after the * vcpu returned from vmmops_run() and before it acquired the * vcpu lock above. */ if (vm->rendezvous_func != NULL || vm->suspend || vcpu->reqidle) break; if (vm_nmi_pending(vcpu)) break; if (!intr_disabled) { if (vm_extint_pending(vcpu) || vlapic_pending_intr(vcpu->vlapic, NULL)) { break; } } /* Don't go to sleep if the vcpu thread needs to yield */ if (vcpu_should_yield(vcpu)) break; if (vcpu_debugged(vcpu)) break; /* * Some Linux guests implement "halt" by having all vcpus * execute HLT with interrupts disabled. 'halted_cpus' keeps * track of the vcpus that have entered this state. When all * vcpus enter the halted state the virtual machine is halted. */ if (intr_disabled) { wmesg = "vmhalt"; VMM_CTR0(vcpu, "Halted"); if (!vcpu_halted && halt_detection_enabled) { vcpu_halted = 1; CPU_SET_ATOMIC(vcpuid, &vm->halted_cpus); } if (CPU_CMP(&vm->halted_cpus, &vm->active_cpus) == 0) { vm_halted = 1; break; } } else { wmesg = "vmidle"; } t = ticks; vcpu_require_state_locked(vcpu, VCPU_SLEEPING); /* * XXX msleep_spin() cannot be interrupted by signals so * wake up periodically to check pending signals. */ msleep_spin(vcpu, &vcpu->mtx, wmesg, hz); vcpu_require_state_locked(vcpu, VCPU_FROZEN); vmm_stat_incr(vcpu, VCPU_IDLE_TICKS, ticks - t); if (td_ast_pending(td, TDA_SUSPEND)) { vcpu_unlock(vcpu); error = thread_check_susp(td, false); if (error != 0) { if (vcpu_halted) { CPU_CLR_ATOMIC(vcpuid, &vm->halted_cpus); } return (error); } vcpu_lock(vcpu); } } if (vcpu_halted) CPU_CLR_ATOMIC(vcpuid, &vm->halted_cpus); vcpu_unlock(vcpu); if (vm_halted) vm_suspend(vm, VM_SUSPEND_HALT); return (0); } static int vm_handle_paging(struct vcpu *vcpu, bool *retu) { struct vm *vm = vcpu->vm; int rv, ftype; struct vm_map *map; struct vm_exit *vme; vme = &vcpu->exitinfo; KASSERT(vme->inst_length == 0, ("%s: invalid inst_length %d", __func__, vme->inst_length)); ftype = vme->u.paging.fault_type; KASSERT(ftype == VM_PROT_READ || ftype == VM_PROT_WRITE || ftype == VM_PROT_EXECUTE, ("vm_handle_paging: invalid fault_type %d", ftype)); if (ftype == VM_PROT_READ || ftype == VM_PROT_WRITE) { rv = pmap_emulate_accessed_dirty(vmspace_pmap(vm->vmspace), vme->u.paging.gpa, ftype); if (rv == 0) { VMM_CTR2(vcpu, "%s bit emulation for gpa %#lx", ftype == VM_PROT_READ ? "accessed" : "dirty", vme->u.paging.gpa); goto done; } } map = &vm->vmspace->vm_map; rv = vm_fault(map, vme->u.paging.gpa, ftype, VM_FAULT_NORMAL, NULL); VMM_CTR3(vcpu, "vm_handle_paging rv = %d, gpa = %#lx, " "ftype = %d", rv, vme->u.paging.gpa, ftype); if (rv != KERN_SUCCESS) return (EFAULT); done: return (0); } static int vm_handle_inst_emul(struct vcpu *vcpu, bool *retu) { struct vie *vie; struct vm_exit *vme; uint64_t gla, gpa, cs_base; struct vm_guest_paging *paging; mem_region_read_t mread; mem_region_write_t mwrite; enum vm_cpu_mode cpu_mode; int cs_d, error, fault; vme = &vcpu->exitinfo; KASSERT(vme->inst_length == 0, ("%s: invalid inst_length %d", __func__, vme->inst_length)); gla = vme->u.inst_emul.gla; gpa = vme->u.inst_emul.gpa; cs_base = vme->u.inst_emul.cs_base; cs_d = vme->u.inst_emul.cs_d; vie = &vme->u.inst_emul.vie; paging = &vme->u.inst_emul.paging; cpu_mode = paging->cpu_mode; VMM_CTR1(vcpu, "inst_emul fault accessing gpa %#lx", gpa); /* Fetch, decode and emulate the faulting instruction */ if (vie->num_valid == 0) { error = vmm_fetch_instruction(vcpu, paging, vme->rip + cs_base, VIE_INST_SIZE, vie, &fault); } else { /* * The instruction bytes have already been copied into 'vie' */ error = fault = 0; } if (error || fault) return (error); if (vmm_decode_instruction(vcpu, gla, cpu_mode, cs_d, vie) != 0) { VMM_CTR1(vcpu, "Error decoding instruction at %#lx", vme->rip + cs_base); *retu = true; /* dump instruction bytes in userspace */ return (0); } /* * Update 'nextrip' based on the length of the emulated instruction. */ vme->inst_length = vie->num_processed; vcpu->nextrip += vie->num_processed; VMM_CTR1(vcpu, "nextrip updated to %#lx after instruction decoding", vcpu->nextrip); /* return to userland unless this is an in-kernel emulated device */ if (gpa >= DEFAULT_APIC_BASE && gpa < DEFAULT_APIC_BASE + PAGE_SIZE) { mread = lapic_mmio_read; mwrite = lapic_mmio_write; } else if (gpa >= VIOAPIC_BASE && gpa < VIOAPIC_BASE + VIOAPIC_SIZE) { mread = vioapic_mmio_read; mwrite = vioapic_mmio_write; } else if (gpa >= VHPET_BASE && gpa < VHPET_BASE + VHPET_SIZE) { mread = vhpet_mmio_read; mwrite = vhpet_mmio_write; } else { *retu = true; return (0); } error = vmm_emulate_instruction(vcpu, gpa, vie, paging, mread, mwrite, retu); return (error); } static int vm_handle_suspend(struct vcpu *vcpu, bool *retu) { struct vm *vm = vcpu->vm; int error, i; struct thread *td; error = 0; td = curthread; CPU_SET_ATOMIC(vcpu->vcpuid, &vm->suspended_cpus); /* * Wait until all 'active_cpus' have suspended themselves. * * Since a VM may be suspended at any time including when one or * more vcpus are doing a rendezvous we need to call the rendezvous * handler while we are waiting to prevent a deadlock. */ vcpu_lock(vcpu); while (error == 0) { if (CPU_CMP(&vm->suspended_cpus, &vm->active_cpus) == 0) { VMM_CTR0(vcpu, "All vcpus suspended"); break; } if (vm->rendezvous_func == NULL) { VMM_CTR0(vcpu, "Sleeping during suspend"); vcpu_require_state_locked(vcpu, VCPU_SLEEPING); msleep_spin(vcpu, &vcpu->mtx, "vmsusp", hz); vcpu_require_state_locked(vcpu, VCPU_FROZEN); if (td_ast_pending(td, TDA_SUSPEND)) { vcpu_unlock(vcpu); error = thread_check_susp(td, false); vcpu_lock(vcpu); } } else { VMM_CTR0(vcpu, "Rendezvous during suspend"); vcpu_unlock(vcpu); error = vm_handle_rendezvous(vcpu); vcpu_lock(vcpu); } } vcpu_unlock(vcpu); /* * Wakeup the other sleeping vcpus and return to userspace. */ for (i = 0; i < vm->maxcpus; i++) { if (CPU_ISSET(i, &vm->suspended_cpus)) { vcpu_notify_event(vm_vcpu(vm, i), false); } } *retu = true; return (error); } static int vm_handle_reqidle(struct vcpu *vcpu, bool *retu) { vcpu_lock(vcpu); KASSERT(vcpu->reqidle, ("invalid vcpu reqidle %d", vcpu->reqidle)); vcpu->reqidle = 0; vcpu_unlock(vcpu); *retu = true; return (0); } static int vm_handle_db(struct vcpu *vcpu, struct vm_exit *vme, bool *retu) { int error, fault; uint64_t rsp; uint64_t rflags; struct vm_copyinfo copyinfo; *retu = true; if (!vme->u.dbg.pushf_intercept || vme->u.dbg.tf_shadow_val != 0) { return (0); } vm_get_register(vcpu, VM_REG_GUEST_RSP, &rsp); error = vm_copy_setup(vcpu, &vme->u.dbg.paging, rsp, sizeof(uint64_t), VM_PROT_RW, ©info, 1, &fault); if (error != 0 || fault != 0) { *retu = false; return (EINVAL); } /* Read pushed rflags value from top of stack. */ vm_copyin(©info, &rflags, sizeof(uint64_t)); /* Clear TF bit. */ rflags &= ~(PSL_T); /* Write updated value back to memory. */ vm_copyout(&rflags, ©info, sizeof(uint64_t)); vm_copy_teardown(©info, 1); return (0); } int vm_suspend(struct vm *vm, enum vm_suspend_how how) { int i; if (how <= VM_SUSPEND_NONE || how >= VM_SUSPEND_LAST) return (EINVAL); if (atomic_cmpset_int(&vm->suspend, 0, how) == 0) { VM_CTR2(vm, "virtual machine already suspended %d/%d", vm->suspend, how); return (EALREADY); } VM_CTR1(vm, "virtual machine successfully suspended %d", how); /* * Notify all active vcpus that they are now suspended. */ for (i = 0; i < vm->maxcpus; i++) { if (CPU_ISSET(i, &vm->active_cpus)) vcpu_notify_event(vm_vcpu(vm, i), false); } return (0); } void vm_exit_suspended(struct vcpu *vcpu, uint64_t rip) { struct vm *vm = vcpu->vm; struct vm_exit *vmexit; KASSERT(vm->suspend > VM_SUSPEND_NONE && vm->suspend < VM_SUSPEND_LAST, ("vm_exit_suspended: invalid suspend type %d", vm->suspend)); vmexit = vm_exitinfo(vcpu); vmexit->rip = rip; vmexit->inst_length = 0; vmexit->exitcode = VM_EXITCODE_SUSPENDED; vmexit->u.suspended.how = vm->suspend; } void vm_exit_debug(struct vcpu *vcpu, uint64_t rip) { struct vm_exit *vmexit; vmexit = vm_exitinfo(vcpu); vmexit->rip = rip; vmexit->inst_length = 0; vmexit->exitcode = VM_EXITCODE_DEBUG; } void vm_exit_rendezvous(struct vcpu *vcpu, uint64_t rip) { struct vm_exit *vmexit; vmexit = vm_exitinfo(vcpu); vmexit->rip = rip; vmexit->inst_length = 0; vmexit->exitcode = VM_EXITCODE_RENDEZVOUS; vmm_stat_incr(vcpu, VMEXIT_RENDEZVOUS, 1); } void vm_exit_reqidle(struct vcpu *vcpu, uint64_t rip) { struct vm_exit *vmexit; vmexit = vm_exitinfo(vcpu); vmexit->rip = rip; vmexit->inst_length = 0; vmexit->exitcode = VM_EXITCODE_REQIDLE; vmm_stat_incr(vcpu, VMEXIT_REQIDLE, 1); } void vm_exit_astpending(struct vcpu *vcpu, uint64_t rip) { struct vm_exit *vmexit; vmexit = vm_exitinfo(vcpu); vmexit->rip = rip; vmexit->inst_length = 0; vmexit->exitcode = VM_EXITCODE_BOGUS; vmm_stat_incr(vcpu, VMEXIT_ASTPENDING, 1); } int vm_run(struct vcpu *vcpu) { struct vm *vm = vcpu->vm; struct vm_eventinfo evinfo; int error, vcpuid; struct pcb *pcb; uint64_t tscval; struct vm_exit *vme; bool retu, intr_disabled; pmap_t pmap; vcpuid = vcpu->vcpuid; if (!CPU_ISSET(vcpuid, &vm->active_cpus)) return (EINVAL); if (CPU_ISSET(vcpuid, &vm->suspended_cpus)) return (EINVAL); pmap = vmspace_pmap(vm->vmspace); vme = &vcpu->exitinfo; evinfo.rptr = &vm->rendezvous_req_cpus; evinfo.sptr = &vm->suspend; evinfo.iptr = &vcpu->reqidle; restart: critical_enter(); KASSERT(!CPU_ISSET(curcpu, &pmap->pm_active), ("vm_run: absurd pm_active")); tscval = rdtsc(); pcb = PCPU_GET(curpcb); set_pcb_flags(pcb, PCB_FULL_IRET); restore_guest_fpustate(vcpu); vcpu_require_state(vcpu, VCPU_RUNNING); error = vmmops_run(vcpu->cookie, vcpu->nextrip, pmap, &evinfo); vcpu_require_state(vcpu, VCPU_FROZEN); save_guest_fpustate(vcpu); vmm_stat_incr(vcpu, VCPU_TOTAL_RUNTIME, rdtsc() - tscval); critical_exit(); if (error == 0) { retu = false; vcpu->nextrip = vme->rip + vme->inst_length; switch (vme->exitcode) { case VM_EXITCODE_REQIDLE: error = vm_handle_reqidle(vcpu, &retu); break; case VM_EXITCODE_SUSPENDED: error = vm_handle_suspend(vcpu, &retu); break; case VM_EXITCODE_IOAPIC_EOI: vioapic_process_eoi(vm, vme->u.ioapic_eoi.vector); break; case VM_EXITCODE_RENDEZVOUS: error = vm_handle_rendezvous(vcpu); break; case VM_EXITCODE_HLT: intr_disabled = ((vme->u.hlt.rflags & PSL_I) == 0); error = vm_handle_hlt(vcpu, intr_disabled, &retu); break; case VM_EXITCODE_PAGING: error = vm_handle_paging(vcpu, &retu); break; case VM_EXITCODE_INST_EMUL: error = vm_handle_inst_emul(vcpu, &retu); break; case VM_EXITCODE_INOUT: case VM_EXITCODE_INOUT_STR: error = vm_handle_inout(vcpu, vme, &retu); break; case VM_EXITCODE_DB: error = vm_handle_db(vcpu, vme, &retu); break; case VM_EXITCODE_MONITOR: case VM_EXITCODE_MWAIT: case VM_EXITCODE_VMINSN: vm_inject_ud(vcpu); break; default: retu = true; /* handled in userland */ break; } } /* * VM_EXITCODE_INST_EMUL could access the apic which could transform the * exit code into VM_EXITCODE_IPI. */ if (error == 0 && vme->exitcode == VM_EXITCODE_IPI) error = vm_handle_ipi(vcpu, vme, &retu); if (error == 0 && retu == false) goto restart; vmm_stat_incr(vcpu, VMEXIT_USERSPACE, 1); VMM_CTR2(vcpu, "retu %d/%d", error, vme->exitcode); return (error); } int vm_restart_instruction(struct vcpu *vcpu) { enum vcpu_state state; uint64_t rip; int error __diagused; state = vcpu_get_state(vcpu, NULL); if (state == VCPU_RUNNING) { /* * When a vcpu is "running" the next instruction is determined * by adding 'rip' and 'inst_length' in the vcpu's 'exitinfo'. * Thus setting 'inst_length' to zero will cause the current * instruction to be restarted. */ vcpu->exitinfo.inst_length = 0; VMM_CTR1(vcpu, "restarting instruction at %#lx by " "setting inst_length to zero", vcpu->exitinfo.rip); } else if (state == VCPU_FROZEN) { /* * When a vcpu is "frozen" it is outside the critical section * around vmmops_run() and 'nextrip' points to the next * instruction. Thus instruction restart is achieved by setting * 'nextrip' to the vcpu's %rip. */ error = vm_get_register(vcpu, VM_REG_GUEST_RIP, &rip); KASSERT(!error, ("%s: error %d getting rip", __func__, error)); VMM_CTR2(vcpu, "restarting instruction by updating " "nextrip from %#lx to %#lx", vcpu->nextrip, rip); vcpu->nextrip = rip; } else { panic("%s: invalid state %d", __func__, state); } return (0); } int vm_exit_intinfo(struct vcpu *vcpu, uint64_t info) { int type, vector; if (info & VM_INTINFO_VALID) { type = info & VM_INTINFO_TYPE; vector = info & 0xff; if (type == VM_INTINFO_NMI && vector != IDT_NMI) return (EINVAL); if (type == VM_INTINFO_HWEXCEPTION && vector >= 32) return (EINVAL); if (info & VM_INTINFO_RSVD) return (EINVAL); } else { info = 0; } VMM_CTR2(vcpu, "%s: info1(%#lx)", __func__, info); vcpu->exitintinfo = info; return (0); } enum exc_class { EXC_BENIGN, EXC_CONTRIBUTORY, EXC_PAGEFAULT }; #define IDT_VE 20 /* Virtualization Exception (Intel specific) */ static enum exc_class exception_class(uint64_t info) { int type, vector; KASSERT(info & VM_INTINFO_VALID, ("intinfo must be valid: %#lx", info)); type = info & VM_INTINFO_TYPE; vector = info & 0xff; /* Table 6-4, "Interrupt and Exception Classes", Intel SDM, Vol 3 */ switch (type) { case VM_INTINFO_HWINTR: case VM_INTINFO_SWINTR: case VM_INTINFO_NMI: return (EXC_BENIGN); default: /* * Hardware exception. * * SVM and VT-x use identical type values to represent NMI, * hardware interrupt and software interrupt. * * SVM uses type '3' for all exceptions. VT-x uses type '3' * for exceptions except #BP and #OF. #BP and #OF use a type * value of '5' or '6'. Therefore we don't check for explicit * values of 'type' to classify 'intinfo' into a hardware * exception. */ break; } switch (vector) { case IDT_PF: case IDT_VE: return (EXC_PAGEFAULT); case IDT_DE: case IDT_TS: case IDT_NP: case IDT_SS: case IDT_GP: return (EXC_CONTRIBUTORY); default: return (EXC_BENIGN); } } static int nested_fault(struct vcpu *vcpu, uint64_t info1, uint64_t info2, uint64_t *retinfo) { enum exc_class exc1, exc2; int type1, vector1; KASSERT(info1 & VM_INTINFO_VALID, ("info1 %#lx is not valid", info1)); KASSERT(info2 & VM_INTINFO_VALID, ("info2 %#lx is not valid", info2)); /* * If an exception occurs while attempting to call the double-fault * handler the processor enters shutdown mode (aka triple fault). */ type1 = info1 & VM_INTINFO_TYPE; vector1 = info1 & 0xff; if (type1 == VM_INTINFO_HWEXCEPTION && vector1 == IDT_DF) { VMM_CTR2(vcpu, "triple fault: info1(%#lx), info2(%#lx)", info1, info2); vm_suspend(vcpu->vm, VM_SUSPEND_TRIPLEFAULT); *retinfo = 0; return (0); } /* * Table 6-5 "Conditions for Generating a Double Fault", Intel SDM, Vol3 */ exc1 = exception_class(info1); exc2 = exception_class(info2); if ((exc1 == EXC_CONTRIBUTORY && exc2 == EXC_CONTRIBUTORY) || (exc1 == EXC_PAGEFAULT && exc2 != EXC_BENIGN)) { /* Convert nested fault into a double fault. */ *retinfo = IDT_DF; *retinfo |= VM_INTINFO_VALID | VM_INTINFO_HWEXCEPTION; *retinfo |= VM_INTINFO_DEL_ERRCODE; } else { /* Handle exceptions serially */ *retinfo = info2; } return (1); } static uint64_t vcpu_exception_intinfo(struct vcpu *vcpu) { uint64_t info = 0; if (vcpu->exception_pending) { info = vcpu->exc_vector & 0xff; info |= VM_INTINFO_VALID | VM_INTINFO_HWEXCEPTION; if (vcpu->exc_errcode_valid) { info |= VM_INTINFO_DEL_ERRCODE; info |= (uint64_t)vcpu->exc_errcode << 32; } } return (info); } int vm_entry_intinfo(struct vcpu *vcpu, uint64_t *retinfo) { uint64_t info1, info2; int valid; info1 = vcpu->exitintinfo; vcpu->exitintinfo = 0; info2 = 0; if (vcpu->exception_pending) { info2 = vcpu_exception_intinfo(vcpu); vcpu->exception_pending = 0; VMM_CTR2(vcpu, "Exception %d delivered: %#lx", vcpu->exc_vector, info2); } if ((info1 & VM_INTINFO_VALID) && (info2 & VM_INTINFO_VALID)) { valid = nested_fault(vcpu, info1, info2, retinfo); } else if (info1 & VM_INTINFO_VALID) { *retinfo = info1; valid = 1; } else if (info2 & VM_INTINFO_VALID) { *retinfo = info2; valid = 1; } else { valid = 0; } if (valid) { VMM_CTR4(vcpu, "%s: info1(%#lx), info2(%#lx), " "retinfo(%#lx)", __func__, info1, info2, *retinfo); } return (valid); } int vm_get_intinfo(struct vcpu *vcpu, uint64_t *info1, uint64_t *info2) { *info1 = vcpu->exitintinfo; *info2 = vcpu_exception_intinfo(vcpu); return (0); } int vm_inject_exception(struct vcpu *vcpu, int vector, int errcode_valid, uint32_t errcode, int restart_instruction) { uint64_t regval; int error __diagused; if (vector < 0 || vector >= 32) return (EINVAL); /* * A double fault exception should never be injected directly into * the guest. It is a derived exception that results from specific * combinations of nested faults. */ if (vector == IDT_DF) return (EINVAL); if (vcpu->exception_pending) { VMM_CTR2(vcpu, "Unable to inject exception %d due to " "pending exception %d", vector, vcpu->exc_vector); return (EBUSY); } if (errcode_valid) { /* * Exceptions don't deliver an error code in real mode. */ error = vm_get_register(vcpu, VM_REG_GUEST_CR0, ®val); KASSERT(!error, ("%s: error %d getting CR0", __func__, error)); if (!(regval & CR0_PE)) errcode_valid = 0; } /* * From section 26.6.1 "Interruptibility State" in Intel SDM: * * Event blocking by "STI" or "MOV SS" is cleared after guest executes * one instruction or incurs an exception. */ error = vm_set_register(vcpu, VM_REG_GUEST_INTR_SHADOW, 0); KASSERT(error == 0, ("%s: error %d clearing interrupt shadow", __func__, error)); if (restart_instruction) vm_restart_instruction(vcpu); vcpu->exception_pending = 1; vcpu->exc_vector = vector; vcpu->exc_errcode = errcode; vcpu->exc_errcode_valid = errcode_valid; VMM_CTR1(vcpu, "Exception %d pending", vector); return (0); } void vm_inject_fault(struct vcpu *vcpu, int vector, int errcode_valid, int errcode) { int error __diagused, restart_instruction; restart_instruction = 1; error = vm_inject_exception(vcpu, vector, errcode_valid, errcode, restart_instruction); KASSERT(error == 0, ("vm_inject_exception error %d", error)); } void vm_inject_pf(struct vcpu *vcpu, int error_code, uint64_t cr2) { int error __diagused; VMM_CTR2(vcpu, "Injecting page fault: error_code %#x, cr2 %#lx", error_code, cr2); error = vm_set_register(vcpu, VM_REG_GUEST_CR2, cr2); KASSERT(error == 0, ("vm_set_register(cr2) error %d", error)); vm_inject_fault(vcpu, IDT_PF, 1, error_code); } static VMM_STAT(VCPU_NMI_COUNT, "number of NMIs delivered to vcpu"); int vm_inject_nmi(struct vcpu *vcpu) { vcpu->nmi_pending = 1; vcpu_notify_event(vcpu, false); return (0); } int vm_nmi_pending(struct vcpu *vcpu) { return (vcpu->nmi_pending); } void vm_nmi_clear(struct vcpu *vcpu) { if (vcpu->nmi_pending == 0) panic("vm_nmi_clear: inconsistent nmi_pending state"); vcpu->nmi_pending = 0; vmm_stat_incr(vcpu, VCPU_NMI_COUNT, 1); } static VMM_STAT(VCPU_EXTINT_COUNT, "number of ExtINTs delivered to vcpu"); int vm_inject_extint(struct vcpu *vcpu) { vcpu->extint_pending = 1; vcpu_notify_event(vcpu, false); return (0); } int vm_extint_pending(struct vcpu *vcpu) { return (vcpu->extint_pending); } void vm_extint_clear(struct vcpu *vcpu) { if (vcpu->extint_pending == 0) panic("vm_extint_clear: inconsistent extint_pending state"); vcpu->extint_pending = 0; vmm_stat_incr(vcpu, VCPU_EXTINT_COUNT, 1); } int vm_get_capability(struct vcpu *vcpu, int type, int *retval) { if (type < 0 || type >= VM_CAP_MAX) return (EINVAL); return (vmmops_getcap(vcpu->cookie, type, retval)); } int vm_set_capability(struct vcpu *vcpu, int type, int val) { if (type < 0 || type >= VM_CAP_MAX) return (EINVAL); return (vmmops_setcap(vcpu->cookie, type, val)); } struct vm * vcpu_vm(struct vcpu *vcpu) { return (vcpu->vm); } int vcpu_vcpuid(struct vcpu *vcpu) { return (vcpu->vcpuid); } struct vcpu * vm_vcpu(struct vm *vm, int vcpuid) { return (vm->vcpu[vcpuid]); } struct vlapic * vm_lapic(struct vcpu *vcpu) { return (vcpu->vlapic); } struct vioapic * vm_ioapic(struct vm *vm) { return (vm->vioapic); } struct vhpet * vm_hpet(struct vm *vm) { return (vm->vhpet); } bool vmm_is_pptdev(int bus, int slot, int func) { int b, f, i, n, s; char *val, *cp, *cp2; bool found; /* * XXX * The length of an environment variable is limited to 128 bytes which * puts an upper limit on the number of passthru devices that may be * specified using a single environment variable. * * Work around this by scanning multiple environment variable * names instead of a single one - yuck! */ const char *names[] = { "pptdevs", "pptdevs2", "pptdevs3", NULL }; /* set pptdevs="1/2/3 4/5/6 7/8/9 10/11/12" */ found = false; for (i = 0; names[i] != NULL && !found; i++) { cp = val = kern_getenv(names[i]); while (cp != NULL && *cp != '\0') { if ((cp2 = strchr(cp, ' ')) != NULL) *cp2 = '\0'; n = sscanf(cp, "%d/%d/%d", &b, &s, &f); if (n == 3 && bus == b && slot == s && func == f) { found = true; break; } if (cp2 != NULL) *cp2++ = ' '; cp = cp2; } freeenv(val); } return (found); } void * vm_iommu_domain(struct vm *vm) { return (vm->iommu); } int vcpu_set_state(struct vcpu *vcpu, enum vcpu_state newstate, bool from_idle) { int error; vcpu_lock(vcpu); error = vcpu_set_state_locked(vcpu, newstate, from_idle); vcpu_unlock(vcpu); return (error); } enum vcpu_state vcpu_get_state(struct vcpu *vcpu, int *hostcpu) { enum vcpu_state state; vcpu_lock(vcpu); state = vcpu->state; if (hostcpu != NULL) *hostcpu = vcpu->hostcpu; vcpu_unlock(vcpu); return (state); } int vm_activate_cpu(struct vcpu *vcpu) { struct vm *vm = vcpu->vm; if (CPU_ISSET(vcpu->vcpuid, &vm->active_cpus)) return (EBUSY); VMM_CTR0(vcpu, "activated"); CPU_SET_ATOMIC(vcpu->vcpuid, &vm->active_cpus); return (0); } int vm_suspend_cpu(struct vm *vm, struct vcpu *vcpu) { if (vcpu == NULL) { vm->debug_cpus = vm->active_cpus; for (int i = 0; i < vm->maxcpus; i++) { if (CPU_ISSET(i, &vm->active_cpus)) vcpu_notify_event(vm_vcpu(vm, i), false); } } else { if (!CPU_ISSET(vcpu->vcpuid, &vm->active_cpus)) return (EINVAL); CPU_SET_ATOMIC(vcpu->vcpuid, &vm->debug_cpus); vcpu_notify_event(vcpu, false); } return (0); } int vm_resume_cpu(struct vm *vm, struct vcpu *vcpu) { if (vcpu == NULL) { CPU_ZERO(&vm->debug_cpus); } else { if (!CPU_ISSET(vcpu->vcpuid, &vm->debug_cpus)) return (EINVAL); CPU_CLR_ATOMIC(vcpu->vcpuid, &vm->debug_cpus); } return (0); } int vcpu_debugged(struct vcpu *vcpu) { return (CPU_ISSET(vcpu->vcpuid, &vcpu->vm->debug_cpus)); } cpuset_t vm_active_cpus(struct vm *vm) { return (vm->active_cpus); } cpuset_t vm_debug_cpus(struct vm *vm) { return (vm->debug_cpus); } cpuset_t vm_suspended_cpus(struct vm *vm) { return (vm->suspended_cpus); } /* * Returns the subset of vCPUs in tostart that are awaiting startup. * These vCPUs are also marked as no longer awaiting startup. */ cpuset_t vm_start_cpus(struct vm *vm, const cpuset_t *tostart) { cpuset_t set; mtx_lock(&vm->rendezvous_mtx); CPU_AND(&set, &vm->startup_cpus, tostart); CPU_ANDNOT(&vm->startup_cpus, &vm->startup_cpus, &set); mtx_unlock(&vm->rendezvous_mtx); return (set); } void vm_await_start(struct vm *vm, const cpuset_t *waiting) { mtx_lock(&vm->rendezvous_mtx); CPU_OR(&vm->startup_cpus, &vm->startup_cpus, waiting); mtx_unlock(&vm->rendezvous_mtx); } void * vcpu_stats(struct vcpu *vcpu) { return (vcpu->stats); } int vm_get_x2apic_state(struct vcpu *vcpu, enum x2apic_state *state) { *state = vcpu->x2apic_state; return (0); } int vm_set_x2apic_state(struct vcpu *vcpu, enum x2apic_state state) { if (state >= X2APIC_STATE_LAST) return (EINVAL); vcpu->x2apic_state = state; vlapic_set_x2apic_state(vcpu, state); return (0); } /* * This function is called to ensure that a vcpu "sees" a pending event * as soon as possible: * - If the vcpu thread is sleeping then it is woken up. * - If the vcpu is running on a different host_cpu then an IPI will be directed * to the host_cpu to cause the vcpu to trap into the hypervisor. */ static void vcpu_notify_event_locked(struct vcpu *vcpu, bool lapic_intr) { int hostcpu; hostcpu = vcpu->hostcpu; if (vcpu->state == VCPU_RUNNING) { KASSERT(hostcpu != NOCPU, ("vcpu running on invalid hostcpu")); if (hostcpu != curcpu) { if (lapic_intr) { vlapic_post_intr(vcpu->vlapic, hostcpu, vmm_ipinum); } else { ipi_cpu(hostcpu, vmm_ipinum); } } else { /* * If the 'vcpu' is running on 'curcpu' then it must * be sending a notification to itself (e.g. SELF_IPI). * The pending event will be picked up when the vcpu * transitions back to guest context. */ } } else { KASSERT(hostcpu == NOCPU, ("vcpu state %d not consistent " "with hostcpu %d", vcpu->state, hostcpu)); if (vcpu->state == VCPU_SLEEPING) wakeup_one(vcpu); } } void vcpu_notify_event(struct vcpu *vcpu, bool lapic_intr) { vcpu_lock(vcpu); vcpu_notify_event_locked(vcpu, lapic_intr); vcpu_unlock(vcpu); } struct vmspace * vm_get_vmspace(struct vm *vm) { return (vm->vmspace); } int vm_apicid2vcpuid(struct vm *vm, int apicid) { /* * XXX apic id is assumed to be numerically identical to vcpu id */ return (apicid); } int vm_smp_rendezvous(struct vcpu *vcpu, cpuset_t dest, vm_rendezvous_func_t func, void *arg) { struct vm *vm = vcpu->vm; int error, i; /* * Enforce that this function is called without any locks */ WITNESS_WARN(WARN_PANIC, NULL, "vm_smp_rendezvous"); restart: mtx_lock(&vm->rendezvous_mtx); if (vm->rendezvous_func != NULL) { /* * If a rendezvous is already in progress then we need to * call the rendezvous handler in case this 'vcpu' is one * of the targets of the rendezvous. */ VMM_CTR0(vcpu, "Rendezvous already in progress"); mtx_unlock(&vm->rendezvous_mtx); error = vm_handle_rendezvous(vcpu); if (error != 0) return (error); goto restart; } KASSERT(vm->rendezvous_func == NULL, ("vm_smp_rendezvous: previous " "rendezvous is still in progress")); VMM_CTR0(vcpu, "Initiating rendezvous"); vm->rendezvous_req_cpus = dest; CPU_ZERO(&vm->rendezvous_done_cpus); vm->rendezvous_arg = arg; vm->rendezvous_func = func; mtx_unlock(&vm->rendezvous_mtx); /* * Wake up any sleeping vcpus and trigger a VM-exit in any running * vcpus so they handle the rendezvous as soon as possible. */ for (i = 0; i < vm->maxcpus; i++) { if (CPU_ISSET(i, &dest)) vcpu_notify_event(vm_vcpu(vm, i), false); } return (vm_handle_rendezvous(vcpu)); } struct vatpic * vm_atpic(struct vm *vm) { return (vm->vatpic); } struct vatpit * vm_atpit(struct vm *vm) { return (vm->vatpit); } struct vpmtmr * vm_pmtmr(struct vm *vm) { return (vm->vpmtmr); } struct vrtc * vm_rtc(struct vm *vm) { return (vm->vrtc); } enum vm_reg_name vm_segment_name(int seg) { static enum vm_reg_name seg_names[] = { VM_REG_GUEST_ES, VM_REG_GUEST_CS, VM_REG_GUEST_SS, VM_REG_GUEST_DS, VM_REG_GUEST_FS, VM_REG_GUEST_GS }; KASSERT(seg >= 0 && seg < nitems(seg_names), ("%s: invalid segment encoding %d", __func__, seg)); return (seg_names[seg]); } void vm_copy_teardown(struct vm_copyinfo *copyinfo, int num_copyinfo) { int idx; for (idx = 0; idx < num_copyinfo; idx++) { if (copyinfo[idx].cookie != NULL) vm_gpa_release(copyinfo[idx].cookie); } bzero(copyinfo, num_copyinfo * sizeof(struct vm_copyinfo)); } int vm_copy_setup(struct vcpu *vcpu, struct vm_guest_paging *paging, uint64_t gla, size_t len, int prot, struct vm_copyinfo *copyinfo, int num_copyinfo, int *fault) { int error, idx, nused; size_t n, off, remaining; void *hva, *cookie; uint64_t gpa; bzero(copyinfo, sizeof(struct vm_copyinfo) * num_copyinfo); nused = 0; remaining = len; while (remaining > 0) { KASSERT(nused < num_copyinfo, ("insufficient vm_copyinfo")); error = vm_gla2gpa(vcpu, paging, gla, prot, &gpa, fault); if (error || *fault) return (error); off = gpa & PAGE_MASK; n = min(remaining, PAGE_SIZE - off); copyinfo[nused].gpa = gpa; copyinfo[nused].len = n; remaining -= n; gla += n; nused++; } for (idx = 0; idx < nused; idx++) { hva = vm_gpa_hold(vcpu, copyinfo[idx].gpa, copyinfo[idx].len, prot, &cookie); if (hva == NULL) break; copyinfo[idx].hva = hva; copyinfo[idx].cookie = cookie; } if (idx != nused) { vm_copy_teardown(copyinfo, num_copyinfo); return (EFAULT); } else { *fault = 0; return (0); } } void vm_copyin(struct vm_copyinfo *copyinfo, void *kaddr, size_t len) { char *dst; int idx; dst = kaddr; idx = 0; while (len > 0) { bcopy(copyinfo[idx].hva, dst, copyinfo[idx].len); len -= copyinfo[idx].len; dst += copyinfo[idx].len; idx++; } } void vm_copyout(const void *kaddr, struct vm_copyinfo *copyinfo, size_t len) { const char *src; int idx; src = kaddr; idx = 0; while (len > 0) { bcopy(src, copyinfo[idx].hva, copyinfo[idx].len); len -= copyinfo[idx].len; src += copyinfo[idx].len; idx++; } } /* * Return the amount of in-use and wired memory for the VM. Since * these are global stats, only return the values with for vCPU 0 */ VMM_STAT_DECLARE(VMM_MEM_RESIDENT); VMM_STAT_DECLARE(VMM_MEM_WIRED); static void vm_get_rescnt(struct vcpu *vcpu, struct vmm_stat_type *stat) { if (vcpu->vcpuid == 0) { vmm_stat_set(vcpu, VMM_MEM_RESIDENT, PAGE_SIZE * vmspace_resident_count(vcpu->vm->vmspace)); } } static void vm_get_wiredcnt(struct vcpu *vcpu, struct vmm_stat_type *stat) { if (vcpu->vcpuid == 0) { vmm_stat_set(vcpu, VMM_MEM_WIRED, PAGE_SIZE * pmap_wired_count(vmspace_pmap(vcpu->vm->vmspace))); } } VMM_STAT_FUNC(VMM_MEM_RESIDENT, "Resident memory", vm_get_rescnt); VMM_STAT_FUNC(VMM_MEM_WIRED, "Wired memory", vm_get_wiredcnt); #ifdef BHYVE_SNAPSHOT static int vm_snapshot_vcpus(struct vm *vm, struct vm_snapshot_meta *meta) { uint64_t tsc, now; int ret; struct vcpu *vcpu; uint16_t i, maxcpus; now = rdtsc(); maxcpus = vm_get_maxcpus(vm); for (i = 0; i < maxcpus; i++) { vcpu = vm->vcpu[i]; if (vcpu == NULL) continue; SNAPSHOT_VAR_OR_LEAVE(vcpu->x2apic_state, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(vcpu->exitintinfo, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(vcpu->exc_vector, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(vcpu->exc_errcode_valid, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(vcpu->exc_errcode, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(vcpu->guest_xcr0, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(vcpu->exitinfo, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(vcpu->nextrip, meta, ret, done); /* * Save the absolute TSC value by adding now to tsc_offset. * * It will be turned turned back into an actual offset when the * TSC restore function is called */ tsc = now + vcpu->tsc_offset; SNAPSHOT_VAR_OR_LEAVE(tsc, meta, ret, done); if (meta->op == VM_SNAPSHOT_RESTORE) vcpu->tsc_offset = tsc; } done: return (ret); } static int vm_snapshot_vm(struct vm *vm, struct vm_snapshot_meta *meta) { int ret; ret = vm_snapshot_vcpus(vm, meta); if (ret != 0) goto done; SNAPSHOT_VAR_OR_LEAVE(vm->startup_cpus, meta, ret, done); done: return (ret); } static int vm_snapshot_vcpu(struct vm *vm, struct vm_snapshot_meta *meta) { int error; struct vcpu *vcpu; uint16_t i, maxcpus; error = 0; maxcpus = vm_get_maxcpus(vm); for (i = 0; i < maxcpus; i++) { vcpu = vm->vcpu[i]; if (vcpu == NULL) continue; error = vmmops_vcpu_snapshot(vcpu->cookie, meta); if (error != 0) { printf("%s: failed to snapshot vmcs/vmcb data for " "vCPU: %d; error: %d\n", __func__, i, error); goto done; } } done: return (error); } /* * Save kernel-side structures to user-space for snapshotting. */ int vm_snapshot_req(struct vm *vm, struct vm_snapshot_meta *meta) { int ret = 0; switch (meta->dev_req) { case STRUCT_VMCX: ret = vm_snapshot_vcpu(vm, meta); break; case STRUCT_VM: ret = vm_snapshot_vm(vm, meta); break; case STRUCT_VIOAPIC: ret = vioapic_snapshot(vm_ioapic(vm), meta); break; case STRUCT_VLAPIC: ret = vlapic_snapshot(vm, meta); break; case STRUCT_VHPET: ret = vhpet_snapshot(vm_hpet(vm), meta); break; case STRUCT_VATPIC: ret = vatpic_snapshot(vm_atpic(vm), meta); break; case STRUCT_VATPIT: ret = vatpit_snapshot(vm_atpit(vm), meta); break; case STRUCT_VPMTMR: ret = vpmtmr_snapshot(vm_pmtmr(vm), meta); break; case STRUCT_VRTC: ret = vrtc_snapshot(vm_rtc(vm), meta); break; default: printf("%s: failed to find the requested type %#x\n", __func__, meta->dev_req); ret = (EINVAL); } return (ret); } void vm_set_tsc_offset(struct vcpu *vcpu, uint64_t offset) { vcpu->tsc_offset = offset; } int vm_restore_time(struct vm *vm) { int error; uint64_t now; struct vcpu *vcpu; uint16_t i, maxcpus; now = rdtsc(); error = vhpet_restore_time(vm_hpet(vm)); if (error) return (error); maxcpus = vm_get_maxcpus(vm); for (i = 0; i < maxcpus; i++) { vcpu = vm->vcpu[i]; if (vcpu == NULL) continue; error = vmmops_restore_tsc(vcpu->cookie, vcpu->tsc_offset - now); if (error) return (error); } return (0); } #endif diff --git a/sys/amd64/vmm/vmm_dev.c b/sys/amd64/vmm/vmm_dev.c deleted file mode 100644 index 9acbfbb454e0..000000000000 --- a/sys/amd64/vmm/vmm_dev.c +++ /dev/null @@ -1,1452 +0,0 @@ -/*- - * SPDX-License-Identifier: BSD-2-Clause - * - * Copyright (c) 2011 NetApp, Inc. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include "opt_bhyve_snapshot.h" - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include - -#include "vmm_lapic.h" -#include "vmm_stat.h" -#include "vmm_mem.h" -#include "io/ppt.h" -#include "io/vatpic.h" -#include "io/vioapic.h" -#include "io/vhpet.h" -#include "io/vrtc.h" - -#ifdef COMPAT_FREEBSD13 -struct vm_stats_old { - int cpuid; /* in */ - int num_entries; /* out */ - struct timeval tv; - uint64_t statbuf[MAX_VM_STATS]; -}; - -#define VM_STATS_OLD \ - _IOWR('v', IOCNUM_VM_STATS, struct vm_stats_old) - -struct vm_snapshot_meta_old { - void *ctx; /* unused */ - void *dev_data; - const char *dev_name; /* identify userspace devices */ - enum snapshot_req dev_req; /* identify kernel structs */ - - struct vm_snapshot_buffer buffer; - - enum vm_snapshot_op op; -}; - -#define VM_SNAPSHOT_REQ_OLD \ - _IOWR('v', IOCNUM_SNAPSHOT_REQ, struct vm_snapshot_meta_old) - -struct vm_exit_ipi_13 { - uint32_t mode; - uint8_t vector; - __BITSET_DEFINE(, 256) dmask; -}; - -struct vm_exit_13 { - uint32_t exitcode; - int32_t inst_length; - uint64_t rip; - uint64_t u[120 / sizeof(uint64_t)]; -}; - -struct vm_run_13 { - int cpuid; - struct vm_exit_13 vm_exit; -}; - -#define VM_RUN_13 \ - _IOWR('v', IOCNUM_RUN, struct vm_run_13) - -#endif /* COMPAT_FREEBSD13 */ - -struct devmem_softc { - int segid; - char *name; - struct cdev *cdev; - struct vmmdev_softc *sc; - SLIST_ENTRY(devmem_softc) link; -}; - -struct vmmdev_softc { - struct vm *vm; /* vm instance cookie */ - struct cdev *cdev; - struct ucred *ucred; - SLIST_ENTRY(vmmdev_softc) link; - SLIST_HEAD(, devmem_softc) devmem; - int flags; -}; -#define VSC_LINKED 0x01 - -static SLIST_HEAD(, vmmdev_softc) head; - -static unsigned pr_allow_flag; -static struct mtx vmmdev_mtx; -MTX_SYSINIT(vmmdev_mtx, &vmmdev_mtx, "vmm device mutex", MTX_DEF); - -static MALLOC_DEFINE(M_VMMDEV, "vmmdev", "vmmdev"); - -SYSCTL_DECL(_hw_vmm); - -static int vmm_priv_check(struct ucred *ucred); -static int devmem_create_cdev(const char *vmname, int id, char *devmem); -static void devmem_destroy(void *arg); - -static int -vmm_priv_check(struct ucred *ucred) -{ - - if (jailed(ucred) && - !(ucred->cr_prison->pr_allow & pr_allow_flag)) - return (EPERM); - - return (0); -} - -static int -vcpu_lock_one(struct vcpu *vcpu) -{ - return (vcpu_set_state(vcpu, VCPU_FROZEN, true)); -} - -static void -vcpu_unlock_one(struct vcpu *vcpu) -{ - enum vcpu_state state; - - state = vcpu_get_state(vcpu, NULL); - if (state != VCPU_FROZEN) { - panic("vcpu %s(%d) has invalid state %d", - vm_name(vcpu_vm(vcpu)), vcpu_vcpuid(vcpu), state); - } - - vcpu_set_state(vcpu, VCPU_IDLE, false); -} - -static int -vcpu_lock_all(struct vmmdev_softc *sc) -{ - struct vcpu *vcpu; - int error; - uint16_t i, j, maxcpus; - - error = 0; - vm_slock_vcpus(sc->vm); - maxcpus = vm_get_maxcpus(sc->vm); - for (i = 0; i < maxcpus; i++) { - vcpu = vm_vcpu(sc->vm, i); - if (vcpu == NULL) - continue; - error = vcpu_lock_one(vcpu); - if (error) - break; - } - - if (error) { - for (j = 0; j < i; j++) { - vcpu = vm_vcpu(sc->vm, j); - if (vcpu == NULL) - continue; - vcpu_unlock_one(vcpu); - } - vm_unlock_vcpus(sc->vm); - } - - return (error); -} - -static void -vcpu_unlock_all(struct vmmdev_softc *sc) -{ - struct vcpu *vcpu; - uint16_t i, maxcpus; - - maxcpus = vm_get_maxcpus(sc->vm); - for (i = 0; i < maxcpus; i++) { - vcpu = vm_vcpu(sc->vm, i); - if (vcpu == NULL) - continue; - vcpu_unlock_one(vcpu); - } - vm_unlock_vcpus(sc->vm); -} - -static struct vmmdev_softc * -vmmdev_lookup(const char *name) -{ - struct vmmdev_softc *sc; - - mtx_assert(&vmmdev_mtx, MA_OWNED); - - SLIST_FOREACH(sc, &head, link) { - if (strcmp(name, vm_name(sc->vm)) == 0) - break; - } - - if (sc == NULL) - return (NULL); - - if (cr_cansee(curthread->td_ucred, sc->ucred)) - return (NULL); - - return (sc); -} - -static struct vmmdev_softc * -vmmdev_lookup2(struct cdev *cdev) -{ - - return (cdev->si_drv1); -} - -static int -vmmdev_rw(struct cdev *cdev, struct uio *uio, int flags) -{ - int error, off, c, prot; - vm_paddr_t gpa, maxaddr; - void *hpa, *cookie; - struct vmmdev_softc *sc; - - error = vmm_priv_check(curthread->td_ucred); - if (error) - return (error); - - sc = vmmdev_lookup2(cdev); - if (sc == NULL) - return (ENXIO); - - /* - * Get a read lock on the guest memory map. - */ - vm_slock_memsegs(sc->vm); - - prot = (uio->uio_rw == UIO_WRITE ? VM_PROT_WRITE : VM_PROT_READ); - maxaddr = vmm_sysmem_maxaddr(sc->vm); - while (uio->uio_resid > 0 && error == 0) { - gpa = uio->uio_offset; - off = gpa & PAGE_MASK; - c = min(uio->uio_resid, PAGE_SIZE - off); - - /* - * The VM has a hole in its physical memory map. If we want to - * use 'dd' to inspect memory beyond the hole we need to - * provide bogus data for memory that lies in the hole. - * - * Since this device does not support lseek(2), dd(1) will - * read(2) blocks of data to simulate the lseek(2). - */ - hpa = vm_gpa_hold_global(sc->vm, gpa, c, prot, &cookie); - if (hpa == NULL) { - if (uio->uio_rw == UIO_READ && gpa < maxaddr) - error = uiomove(__DECONST(void *, zero_region), - c, uio); - else - error = EFAULT; - } else { - error = uiomove(hpa, c, uio); - vm_gpa_release(cookie); - } - } - vm_unlock_memsegs(sc->vm); - return (error); -} - -CTASSERT(sizeof(((struct vm_memseg *)0)->name) >= VM_MAX_SUFFIXLEN + 1); - -static int -get_memseg(struct vmmdev_softc *sc, struct vm_memseg *mseg, size_t len) -{ - struct devmem_softc *dsc; - int error; - bool sysmem; - - error = vm_get_memseg(sc->vm, mseg->segid, &mseg->len, &sysmem, NULL); - if (error || mseg->len == 0) - return (error); - - if (!sysmem) { - SLIST_FOREACH(dsc, &sc->devmem, link) { - if (dsc->segid == mseg->segid) - break; - } - KASSERT(dsc != NULL, ("%s: devmem segment %d not found", - __func__, mseg->segid)); - error = copystr(dsc->name, mseg->name, len, NULL); - } else { - bzero(mseg->name, len); - } - - return (error); -} - -static int -alloc_memseg(struct vmmdev_softc *sc, struct vm_memseg *mseg, size_t len) -{ - char *name; - int error; - bool sysmem; - - error = 0; - name = NULL; - sysmem = true; - - /* - * The allocation is lengthened by 1 to hold a terminating NUL. It'll - * by stripped off when devfs processes the full string. - */ - if (VM_MEMSEG_NAME(mseg)) { - sysmem = false; - name = malloc(len, M_VMMDEV, M_WAITOK); - error = copystr(mseg->name, name, len, NULL); - if (error) - goto done; - } - - error = vm_alloc_memseg(sc->vm, mseg->segid, mseg->len, sysmem); - if (error) - goto done; - - if (VM_MEMSEG_NAME(mseg)) { - error = devmem_create_cdev(vm_name(sc->vm), mseg->segid, name); - if (error) - vm_free_memseg(sc->vm, mseg->segid); - else - name = NULL; /* freed when 'cdev' is destroyed */ - } -done: - free(name, M_VMMDEV); - return (error); -} - -static int -vm_get_register_set(struct vcpu *vcpu, unsigned int count, int *regnum, - uint64_t *regval) -{ - int error, i; - - error = 0; - for (i = 0; i < count; i++) { - error = vm_get_register(vcpu, regnum[i], ®val[i]); - if (error) - break; - } - return (error); -} - -static int -vm_set_register_set(struct vcpu *vcpu, unsigned int count, int *regnum, - uint64_t *regval) -{ - int error, i; - - error = 0; - for (i = 0; i < count; i++) { - error = vm_set_register(vcpu, regnum[i], regval[i]); - if (error) - break; - } - return (error); -} - -static int -vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag, - struct thread *td) -{ - int error, vcpuid, size; - cpuset_t *cpuset; - struct vmmdev_softc *sc; - struct vcpu *vcpu; - struct vm_register *vmreg; - struct vm_seg_desc *vmsegdesc; - struct vm_register_set *vmregset; - struct vm_run *vmrun; -#ifdef COMPAT_FREEBSD13 - struct vm_run_13 *vmrun_13; -#endif - struct vm_exception *vmexc; - struct vm_lapic_irq *vmirq; - struct vm_lapic_msi *vmmsi; - struct vm_ioapic_irq *ioapic_irq; - struct vm_isa_irq *isa_irq; - struct vm_isa_irq_trigger *isa_irq_trigger; - struct vm_capability *vmcap; - struct vm_pptdev *pptdev; - struct vm_pptdev_mmio *pptmmio; - struct vm_pptdev_msi *pptmsi; - struct vm_pptdev_msix *pptmsix; -#ifdef COMPAT_FREEBSD13 - struct vm_stats_old *vmstats_old; -#endif - struct vm_stats *vmstats; - struct vm_stat_desc *statdesc; - struct vm_x2apic *x2apic; - struct vm_gpa_pte *gpapte; - struct vm_suspend *vmsuspend; - struct vm_gla2gpa *gg; - struct vm_cpuset *vm_cpuset; - struct vm_intinfo *vmii; - struct vm_rtc_time *rtctime; - struct vm_rtc_data *rtcdata; - struct vm_memmap *mm; - struct vm_munmap *mu; - struct vm_cpu_topology *topology; - struct vm_readwrite_kernemu_device *kernemu; - uint64_t *regvals; - int *regnums; - enum { NONE, SINGLE, ALL } vcpus_locked; - bool memsegs_locked; -#ifdef BHYVE_SNAPSHOT - struct vm_snapshot_meta *snapshot_meta; -#ifdef COMPAT_FREEBSD13 - struct vm_snapshot_meta_old *snapshot_old; -#endif -#endif - - error = vmm_priv_check(curthread->td_ucred); - if (error) - return (error); - - sc = vmmdev_lookup2(cdev); - if (sc == NULL) - return (ENXIO); - - vcpuid = -1; - vcpu = NULL; - vcpus_locked = NONE; - memsegs_locked = false; - - /* - * For VMM ioctls that operate on a single vCPU, lookup the - * vcpu. For VMM ioctls which require one or more vCPUs to - * not be running, lock necessary vCPUs. - * - * XXX fragile, handle with care - * Most of these assume that the first field of the ioctl data - * is the vcpuid. - */ - switch (cmd) { - case VM_RUN: -#ifdef COMPAT_FREEBSD13 - case VM_RUN_13: -#endif - case VM_GET_REGISTER: - case VM_SET_REGISTER: - case VM_GET_SEGMENT_DESCRIPTOR: - case VM_SET_SEGMENT_DESCRIPTOR: - case VM_GET_REGISTER_SET: - case VM_SET_REGISTER_SET: - case VM_INJECT_EXCEPTION: - case VM_GET_CAPABILITY: - case VM_SET_CAPABILITY: - case VM_SET_X2APIC_STATE: - case VM_GLA2GPA: - case VM_GLA2GPA_NOFAULT: - case VM_ACTIVATE_CPU: - case VM_SET_INTINFO: - case VM_GET_INTINFO: - case VM_RESTART_INSTRUCTION: - case VM_GET_KERNEMU_DEV: - case VM_SET_KERNEMU_DEV: - /* - * ioctls that can operate only on vcpus that are not running. - */ - vcpuid = *(int *)data; - vcpu = vm_alloc_vcpu(sc->vm, vcpuid); - if (vcpu == NULL) { - error = EINVAL; - goto done; - } - error = vcpu_lock_one(vcpu); - if (error) - goto done; - vcpus_locked = SINGLE; - break; - -#ifdef COMPAT_FREEBSD12 - case VM_ALLOC_MEMSEG_FBSD12: -#endif - case VM_ALLOC_MEMSEG: - case VM_BIND_PPTDEV: - case VM_UNBIND_PPTDEV: - case VM_MMAP_MEMSEG: - case VM_MUNMAP_MEMSEG: - case VM_REINIT: - /* - * ioctls that modify the memory map must lock memory - * segments exclusively. - */ - vm_xlock_memsegs(sc->vm); - memsegs_locked = true; - /* FALLTHROUGH */ - case VM_MAP_PPTDEV_MMIO: - case VM_UNMAP_PPTDEV_MMIO: -#ifdef BHYVE_SNAPSHOT - case VM_SNAPSHOT_REQ: -#ifdef COMPAT_FREEBSD13 - case VM_SNAPSHOT_REQ_OLD: -#endif - case VM_RESTORE_TIME: -#endif - /* - * ioctls that operate on the entire virtual machine must - * prevent all vcpus from running. - */ - error = vcpu_lock_all(sc); - if (error) - goto done; - vcpus_locked = ALL; - break; - -#ifdef COMPAT_FREEBSD12 - case VM_GET_MEMSEG_FBSD12: -#endif - case VM_GET_MEMSEG: - case VM_MMAP_GETNEXT: - /* - * Lock the memory map while it is being inspected. - */ - vm_slock_memsegs(sc->vm); - memsegs_locked = true; - break; - -#ifdef COMPAT_FREEBSD13 - case VM_STATS_OLD: -#endif - case VM_STATS: - case VM_INJECT_NMI: - case VM_LAPIC_IRQ: - case VM_GET_X2APIC_STATE: - /* - * These do not need the vCPU locked but do operate on - * a specific vCPU. - */ - vcpuid = *(int *)data; - vcpu = vm_alloc_vcpu(sc->vm, vcpuid); - if (vcpu == NULL) { - error = EINVAL; - goto done; - } - break; - - case VM_LAPIC_LOCAL_IRQ: - case VM_SUSPEND_CPU: - case VM_RESUME_CPU: - /* - * These can either operate on all CPUs via a vcpuid of - * -1 or on a specific vCPU. - */ - vcpuid = *(int *)data; - if (vcpuid == -1) - break; - vcpu = vm_alloc_vcpu(sc->vm, vcpuid); - if (vcpu == NULL) { - error = EINVAL; - goto done; - } - break; - - default: - break; - } - - switch (cmd) { - case VM_RUN: { - struct vm_exit *vme; - - vmrun = (struct vm_run *)data; - vme = vm_exitinfo(vcpu); - - error = vm_run(vcpu); - if (error != 0) - break; - - error = copyout(vme, vmrun->vm_exit, sizeof(*vme)); - if (error != 0) - break; - if (vme->exitcode == VM_EXITCODE_IPI) { - error = copyout(vm_exitinfo_cpuset(vcpu), - vmrun->cpuset, - min(vmrun->cpusetsize, sizeof(cpuset_t))); - if (error != 0) - break; - if (sizeof(cpuset_t) < vmrun->cpusetsize) { - uint8_t *p; - - p = (uint8_t *)vmrun->cpuset + - sizeof(cpuset_t); - while (p < (uint8_t *)vmrun->cpuset + - vmrun->cpusetsize) { - if (subyte(p++, 0) != 0) { - error = EFAULT; - break; - } - } - } - } - break; - } -#ifdef COMPAT_FREEBSD13 - case VM_RUN_13: { - struct vm_exit *vme; - struct vm_exit_13 *vme_13; - - vmrun_13 = (struct vm_run_13 *)data; - vme_13 = &vmrun_13->vm_exit; - vme = vm_exitinfo(vcpu); - - error = vm_run(vcpu); - if (error == 0) { - vme_13->exitcode = vme->exitcode; - vme_13->inst_length = vme->inst_length; - vme_13->rip = vme->rip; - memcpy(vme_13->u, &vme->u, sizeof(vme_13->u)); - if (vme->exitcode == VM_EXITCODE_IPI) { - struct vm_exit_ipi_13 *ipi; - cpuset_t *dmask; - int cpu; - - dmask = vm_exitinfo_cpuset(vcpu); - ipi = (struct vm_exit_ipi_13 *)&vme_13->u[0]; - BIT_ZERO(256, &ipi->dmask); - CPU_FOREACH_ISSET(cpu, dmask) { - if (cpu >= 256) - break; - BIT_SET(256, cpu, &ipi->dmask); - } - } - } - break; - } -#endif - case VM_SUSPEND: - vmsuspend = (struct vm_suspend *)data; - error = vm_suspend(sc->vm, vmsuspend->how); - break; - case VM_REINIT: - error = vm_reinit(sc->vm); - break; - case VM_STAT_DESC: { - statdesc = (struct vm_stat_desc *)data; - error = vmm_stat_desc_copy(statdesc->index, - statdesc->desc, sizeof(statdesc->desc)); - break; - } -#ifdef COMPAT_FREEBSD13 - case VM_STATS_OLD: - vmstats_old = (struct vm_stats_old *)data; - getmicrotime(&vmstats_old->tv); - error = vmm_stat_copy(vcpu, 0, - nitems(vmstats_old->statbuf), - &vmstats_old->num_entries, - vmstats_old->statbuf); - break; -#endif - case VM_STATS: { - vmstats = (struct vm_stats *)data; - getmicrotime(&vmstats->tv); - error = vmm_stat_copy(vcpu, vmstats->index, - nitems(vmstats->statbuf), - &vmstats->num_entries, vmstats->statbuf); - break; - } - case VM_PPTDEV_MSI: - pptmsi = (struct vm_pptdev_msi *)data; - error = ppt_setup_msi(sc->vm, - pptmsi->bus, pptmsi->slot, pptmsi->func, - pptmsi->addr, pptmsi->msg, - pptmsi->numvec); - break; - case VM_PPTDEV_MSIX: - pptmsix = (struct vm_pptdev_msix *)data; - error = ppt_setup_msix(sc->vm, - pptmsix->bus, pptmsix->slot, - pptmsix->func, pptmsix->idx, - pptmsix->addr, pptmsix->msg, - pptmsix->vector_control); - break; - case VM_PPTDEV_DISABLE_MSIX: - pptdev = (struct vm_pptdev *)data; - error = ppt_disable_msix(sc->vm, pptdev->bus, pptdev->slot, - pptdev->func); - break; - case VM_MAP_PPTDEV_MMIO: - pptmmio = (struct vm_pptdev_mmio *)data; - error = ppt_map_mmio(sc->vm, pptmmio->bus, pptmmio->slot, - pptmmio->func, pptmmio->gpa, pptmmio->len, - pptmmio->hpa); - break; - case VM_UNMAP_PPTDEV_MMIO: - pptmmio = (struct vm_pptdev_mmio *)data; - error = ppt_unmap_mmio(sc->vm, pptmmio->bus, pptmmio->slot, - pptmmio->func, pptmmio->gpa, pptmmio->len); - break; - case VM_BIND_PPTDEV: - pptdev = (struct vm_pptdev *)data; - error = vm_assign_pptdev(sc->vm, pptdev->bus, pptdev->slot, - pptdev->func); - break; - case VM_UNBIND_PPTDEV: - pptdev = (struct vm_pptdev *)data; - error = vm_unassign_pptdev(sc->vm, pptdev->bus, pptdev->slot, - pptdev->func); - break; - case VM_INJECT_EXCEPTION: - vmexc = (struct vm_exception *)data; - error = vm_inject_exception(vcpu, - vmexc->vector, vmexc->error_code_valid, vmexc->error_code, - vmexc->restart_instruction); - break; - case VM_INJECT_NMI: - error = vm_inject_nmi(vcpu); - break; - case VM_LAPIC_IRQ: - vmirq = (struct vm_lapic_irq *)data; - error = lapic_intr_edge(vcpu, vmirq->vector); - break; - case VM_LAPIC_LOCAL_IRQ: - vmirq = (struct vm_lapic_irq *)data; - error = lapic_set_local_intr(sc->vm, vcpu, vmirq->vector); - break; - case VM_LAPIC_MSI: - vmmsi = (struct vm_lapic_msi *)data; - error = lapic_intr_msi(sc->vm, vmmsi->addr, vmmsi->msg); - break; - case VM_IOAPIC_ASSERT_IRQ: - ioapic_irq = (struct vm_ioapic_irq *)data; - error = vioapic_assert_irq(sc->vm, ioapic_irq->irq); - break; - case VM_IOAPIC_DEASSERT_IRQ: - ioapic_irq = (struct vm_ioapic_irq *)data; - error = vioapic_deassert_irq(sc->vm, ioapic_irq->irq); - break; - case VM_IOAPIC_PULSE_IRQ: - ioapic_irq = (struct vm_ioapic_irq *)data; - error = vioapic_pulse_irq(sc->vm, ioapic_irq->irq); - break; - case VM_IOAPIC_PINCOUNT: - *(int *)data = vioapic_pincount(sc->vm); - break; - case VM_SET_KERNEMU_DEV: - case VM_GET_KERNEMU_DEV: { - mem_region_write_t mwrite; - mem_region_read_t mread; - bool arg; - - kernemu = (void *)data; - - if (kernemu->access_width > 0) - size = (1u << kernemu->access_width); - else - size = 1; - - if (kernemu->gpa >= DEFAULT_APIC_BASE && kernemu->gpa < DEFAULT_APIC_BASE + PAGE_SIZE) { - mread = lapic_mmio_read; - mwrite = lapic_mmio_write; - } else if (kernemu->gpa >= VIOAPIC_BASE && kernemu->gpa < VIOAPIC_BASE + VIOAPIC_SIZE) { - mread = vioapic_mmio_read; - mwrite = vioapic_mmio_write; - } else if (kernemu->gpa >= VHPET_BASE && kernemu->gpa < VHPET_BASE + VHPET_SIZE) { - mread = vhpet_mmio_read; - mwrite = vhpet_mmio_write; - } else { - error = EINVAL; - break; - } - - if (cmd == VM_SET_KERNEMU_DEV) - error = mwrite(vcpu, kernemu->gpa, - kernemu->value, size, &arg); - else - error = mread(vcpu, kernemu->gpa, - &kernemu->value, size, &arg); - break; - } - case VM_ISA_ASSERT_IRQ: - isa_irq = (struct vm_isa_irq *)data; - error = vatpic_assert_irq(sc->vm, isa_irq->atpic_irq); - if (error == 0 && isa_irq->ioapic_irq != -1) - error = vioapic_assert_irq(sc->vm, - isa_irq->ioapic_irq); - break; - case VM_ISA_DEASSERT_IRQ: - isa_irq = (struct vm_isa_irq *)data; - error = vatpic_deassert_irq(sc->vm, isa_irq->atpic_irq); - if (error == 0 && isa_irq->ioapic_irq != -1) - error = vioapic_deassert_irq(sc->vm, - isa_irq->ioapic_irq); - break; - case VM_ISA_PULSE_IRQ: - isa_irq = (struct vm_isa_irq *)data; - error = vatpic_pulse_irq(sc->vm, isa_irq->atpic_irq); - if (error == 0 && isa_irq->ioapic_irq != -1) - error = vioapic_pulse_irq(sc->vm, isa_irq->ioapic_irq); - break; - case VM_ISA_SET_IRQ_TRIGGER: - isa_irq_trigger = (struct vm_isa_irq_trigger *)data; - error = vatpic_set_irq_trigger(sc->vm, - isa_irq_trigger->atpic_irq, isa_irq_trigger->trigger); - break; - case VM_MMAP_GETNEXT: - mm = (struct vm_memmap *)data; - error = vm_mmap_getnext(sc->vm, &mm->gpa, &mm->segid, - &mm->segoff, &mm->len, &mm->prot, &mm->flags); - break; - case VM_MMAP_MEMSEG: - mm = (struct vm_memmap *)data; - error = vm_mmap_memseg(sc->vm, mm->gpa, mm->segid, mm->segoff, - mm->len, mm->prot, mm->flags); - break; - case VM_MUNMAP_MEMSEG: - mu = (struct vm_munmap *)data; - error = vm_munmap_memseg(sc->vm, mu->gpa, mu->len); - break; -#ifdef COMPAT_FREEBSD12 - case VM_ALLOC_MEMSEG_FBSD12: - error = alloc_memseg(sc, (struct vm_memseg *)data, - sizeof(((struct vm_memseg_fbsd12 *)0)->name)); - break; -#endif - case VM_ALLOC_MEMSEG: - error = alloc_memseg(sc, (struct vm_memseg *)data, - sizeof(((struct vm_memseg *)0)->name)); - break; -#ifdef COMPAT_FREEBSD12 - case VM_GET_MEMSEG_FBSD12: - error = get_memseg(sc, (struct vm_memseg *)data, - sizeof(((struct vm_memseg_fbsd12 *)0)->name)); - break; -#endif - case VM_GET_MEMSEG: - error = get_memseg(sc, (struct vm_memseg *)data, - sizeof(((struct vm_memseg *)0)->name)); - break; - case VM_GET_REGISTER: - vmreg = (struct vm_register *)data; - error = vm_get_register(vcpu, vmreg->regnum, &vmreg->regval); - break; - case VM_SET_REGISTER: - vmreg = (struct vm_register *)data; - error = vm_set_register(vcpu, vmreg->regnum, vmreg->regval); - break; - case VM_SET_SEGMENT_DESCRIPTOR: - vmsegdesc = (struct vm_seg_desc *)data; - error = vm_set_seg_desc(vcpu, - vmsegdesc->regnum, - &vmsegdesc->desc); - break; - case VM_GET_SEGMENT_DESCRIPTOR: - vmsegdesc = (struct vm_seg_desc *)data; - error = vm_get_seg_desc(vcpu, - vmsegdesc->regnum, - &vmsegdesc->desc); - break; - case VM_GET_REGISTER_SET: - vmregset = (struct vm_register_set *)data; - if (vmregset->count > VM_REG_LAST) { - error = EINVAL; - break; - } - regvals = malloc(sizeof(regvals[0]) * vmregset->count, M_VMMDEV, - M_WAITOK); - regnums = malloc(sizeof(regnums[0]) * vmregset->count, M_VMMDEV, - M_WAITOK); - error = copyin(vmregset->regnums, regnums, sizeof(regnums[0]) * - vmregset->count); - if (error == 0) - error = vm_get_register_set(vcpu, - vmregset->count, regnums, regvals); - if (error == 0) - error = copyout(regvals, vmregset->regvals, - sizeof(regvals[0]) * vmregset->count); - free(regvals, M_VMMDEV); - free(regnums, M_VMMDEV); - break; - case VM_SET_REGISTER_SET: - vmregset = (struct vm_register_set *)data; - if (vmregset->count > VM_REG_LAST) { - error = EINVAL; - break; - } - regvals = malloc(sizeof(regvals[0]) * vmregset->count, M_VMMDEV, - M_WAITOK); - regnums = malloc(sizeof(regnums[0]) * vmregset->count, M_VMMDEV, - M_WAITOK); - error = copyin(vmregset->regnums, regnums, sizeof(regnums[0]) * - vmregset->count); - if (error == 0) - error = copyin(vmregset->regvals, regvals, - sizeof(regvals[0]) * vmregset->count); - if (error == 0) - error = vm_set_register_set(vcpu, - vmregset->count, regnums, regvals); - free(regvals, M_VMMDEV); - free(regnums, M_VMMDEV); - break; - case VM_GET_CAPABILITY: - vmcap = (struct vm_capability *)data; - error = vm_get_capability(vcpu, - vmcap->captype, - &vmcap->capval); - break; - case VM_SET_CAPABILITY: - vmcap = (struct vm_capability *)data; - error = vm_set_capability(vcpu, - vmcap->captype, - vmcap->capval); - break; - case VM_SET_X2APIC_STATE: - x2apic = (struct vm_x2apic *)data; - error = vm_set_x2apic_state(vcpu, x2apic->state); - break; - case VM_GET_X2APIC_STATE: - x2apic = (struct vm_x2apic *)data; - error = vm_get_x2apic_state(vcpu, &x2apic->state); - break; - case VM_GET_GPA_PMAP: - gpapte = (struct vm_gpa_pte *)data; - pmap_get_mapping(vmspace_pmap(vm_get_vmspace(sc->vm)), - gpapte->gpa, gpapte->pte, &gpapte->ptenum); - error = 0; - break; - case VM_GET_HPET_CAPABILITIES: - error = vhpet_getcap((struct vm_hpet_cap *)data); - break; - case VM_GLA2GPA: { - CTASSERT(PROT_READ == VM_PROT_READ); - CTASSERT(PROT_WRITE == VM_PROT_WRITE); - CTASSERT(PROT_EXEC == VM_PROT_EXECUTE); - gg = (struct vm_gla2gpa *)data; - error = vm_gla2gpa(vcpu, &gg->paging, gg->gla, - gg->prot, &gg->gpa, &gg->fault); - KASSERT(error == 0 || error == EFAULT, - ("%s: vm_gla2gpa unknown error %d", __func__, error)); - break; - } - case VM_GLA2GPA_NOFAULT: - gg = (struct vm_gla2gpa *)data; - error = vm_gla2gpa_nofault(vcpu, &gg->paging, gg->gla, - gg->prot, &gg->gpa, &gg->fault); - KASSERT(error == 0 || error == EFAULT, - ("%s: vm_gla2gpa unknown error %d", __func__, error)); - break; - case VM_ACTIVATE_CPU: - error = vm_activate_cpu(vcpu); - break; - case VM_GET_CPUS: - error = 0; - vm_cpuset = (struct vm_cpuset *)data; - size = vm_cpuset->cpusetsize; - if (size < 1 || size > CPU_MAXSIZE / NBBY) { - error = ERANGE; - break; - } - cpuset = malloc(max(size, sizeof(cpuset_t)), M_TEMP, - M_WAITOK | M_ZERO); - if (vm_cpuset->which == VM_ACTIVE_CPUS) - *cpuset = vm_active_cpus(sc->vm); - else if (vm_cpuset->which == VM_SUSPENDED_CPUS) - *cpuset = vm_suspended_cpus(sc->vm); - else if (vm_cpuset->which == VM_DEBUG_CPUS) - *cpuset = vm_debug_cpus(sc->vm); - else - error = EINVAL; - if (error == 0 && size < howmany(CPU_FLS(cpuset), NBBY)) - error = ERANGE; - if (error == 0) - error = copyout(cpuset, vm_cpuset->cpus, size); - free(cpuset, M_TEMP); - break; - case VM_SUSPEND_CPU: - error = vm_suspend_cpu(sc->vm, vcpu); - break; - case VM_RESUME_CPU: - error = vm_resume_cpu(sc->vm, vcpu); - break; - case VM_SET_INTINFO: - vmii = (struct vm_intinfo *)data; - error = vm_exit_intinfo(vcpu, vmii->info1); - break; - case VM_GET_INTINFO: - vmii = (struct vm_intinfo *)data; - error = vm_get_intinfo(vcpu, &vmii->info1, &vmii->info2); - break; - case VM_RTC_WRITE: - rtcdata = (struct vm_rtc_data *)data; - error = vrtc_nvram_write(sc->vm, rtcdata->offset, - rtcdata->value); - break; - case VM_RTC_READ: - rtcdata = (struct vm_rtc_data *)data; - error = vrtc_nvram_read(sc->vm, rtcdata->offset, - &rtcdata->value); - break; - case VM_RTC_SETTIME: - rtctime = (struct vm_rtc_time *)data; - error = vrtc_set_time(sc->vm, rtctime->secs); - break; - case VM_RTC_GETTIME: - error = 0; - rtctime = (struct vm_rtc_time *)data; - rtctime->secs = vrtc_get_time(sc->vm); - break; - case VM_RESTART_INSTRUCTION: - error = vm_restart_instruction(vcpu); - break; - case VM_SET_TOPOLOGY: - topology = (struct vm_cpu_topology *)data; - error = vm_set_topology(sc->vm, topology->sockets, - topology->cores, topology->threads, topology->maxcpus); - break; - case VM_GET_TOPOLOGY: - topology = (struct vm_cpu_topology *)data; - vm_get_topology(sc->vm, &topology->sockets, &topology->cores, - &topology->threads, &topology->maxcpus); - error = 0; - break; -#ifdef BHYVE_SNAPSHOT - case VM_SNAPSHOT_REQ: - snapshot_meta = (struct vm_snapshot_meta *)data; - error = vm_snapshot_req(sc->vm, snapshot_meta); - break; -#ifdef COMPAT_FREEBSD13 - case VM_SNAPSHOT_REQ_OLD: - /* - * The old structure just has an additional pointer at - * the start that is ignored. - */ - snapshot_old = (struct vm_snapshot_meta_old *)data; - snapshot_meta = - (struct vm_snapshot_meta *)&snapshot_old->dev_data; - error = vm_snapshot_req(sc->vm, snapshot_meta); - break; -#endif - case VM_RESTORE_TIME: - error = vm_restore_time(sc->vm); - break; -#endif - default: - error = ENOTTY; - break; - } - -done: - if (vcpus_locked == SINGLE) - vcpu_unlock_one(vcpu); - else if (vcpus_locked == ALL) - vcpu_unlock_all(sc); - if (memsegs_locked) - vm_unlock_memsegs(sc->vm); - - /* - * Make sure that no handler returns a kernel-internal - * error value to userspace. - */ - KASSERT(error == ERESTART || error >= 0, - ("vmmdev_ioctl: invalid error return %d", error)); - return (error); -} - -static int -vmmdev_mmap_single(struct cdev *cdev, vm_ooffset_t *offset, vm_size_t mapsize, - struct vm_object **objp, int nprot) -{ - struct vmmdev_softc *sc; - vm_paddr_t gpa; - size_t len; - vm_ooffset_t segoff, first, last; - int error, found, segid; - bool sysmem; - - error = vmm_priv_check(curthread->td_ucred); - if (error) - return (error); - - first = *offset; - last = first + mapsize; - if ((nprot & PROT_EXEC) || first < 0 || first >= last) - return (EINVAL); - - sc = vmmdev_lookup2(cdev); - if (sc == NULL) { - /* virtual machine is in the process of being created */ - return (EINVAL); - } - - /* - * Get a read lock on the guest memory map. - */ - vm_slock_memsegs(sc->vm); - - gpa = 0; - found = 0; - while (!found) { - error = vm_mmap_getnext(sc->vm, &gpa, &segid, &segoff, &len, - NULL, NULL); - if (error) - break; - - if (first >= gpa && last <= gpa + len) - found = 1; - else - gpa += len; - } - - if (found) { - error = vm_get_memseg(sc->vm, segid, &len, &sysmem, objp); - KASSERT(error == 0 && *objp != NULL, - ("%s: invalid memory segment %d", __func__, segid)); - if (sysmem) { - vm_object_reference(*objp); - *offset = segoff + (first - gpa); - } else { - error = EINVAL; - } - } - vm_unlock_memsegs(sc->vm); - return (error); -} - -static void -vmmdev_destroy(void *arg) -{ - struct vmmdev_softc *sc = arg; - struct devmem_softc *dsc; - int error __diagused; - - vm_disable_vcpu_creation(sc->vm); - error = vcpu_lock_all(sc); - KASSERT(error == 0, ("%s: error %d freezing vcpus", __func__, error)); - vm_unlock_vcpus(sc->vm); - - while ((dsc = SLIST_FIRST(&sc->devmem)) != NULL) { - KASSERT(dsc->cdev == NULL, ("%s: devmem not free", __func__)); - SLIST_REMOVE_HEAD(&sc->devmem, link); - free(dsc->name, M_VMMDEV); - free(dsc, M_VMMDEV); - } - - if (sc->cdev != NULL) - destroy_dev(sc->cdev); - - if (sc->vm != NULL) - vm_destroy(sc->vm); - - if (sc->ucred != NULL) - crfree(sc->ucred); - - if ((sc->flags & VSC_LINKED) != 0) { - mtx_lock(&vmmdev_mtx); - SLIST_REMOVE(&head, sc, vmmdev_softc, link); - mtx_unlock(&vmmdev_mtx); - } - - free(sc, M_VMMDEV); -} - -static int -sysctl_vmm_destroy(SYSCTL_HANDLER_ARGS) -{ - struct devmem_softc *dsc; - struct vmmdev_softc *sc; - struct cdev *cdev; - char *buf; - int error, buflen; - - error = vmm_priv_check(req->td->td_ucred); - if (error) - return (error); - - buflen = VM_MAX_NAMELEN + 1; - buf = malloc(buflen, M_VMMDEV, M_WAITOK | M_ZERO); - strlcpy(buf, "beavis", buflen); - error = sysctl_handle_string(oidp, buf, buflen, req); - if (error != 0 || req->newptr == NULL) - goto out; - - mtx_lock(&vmmdev_mtx); - sc = vmmdev_lookup(buf); - if (sc == NULL || sc->cdev == NULL) { - mtx_unlock(&vmmdev_mtx); - error = EINVAL; - goto out; - } - - /* - * Setting 'sc->cdev' to NULL is used to indicate that the VM - * is scheduled for destruction. - */ - cdev = sc->cdev; - sc->cdev = NULL; - mtx_unlock(&vmmdev_mtx); - - /* - * Destroy all cdevs: - * - * - any new operations on the 'cdev' will return an error (ENXIO). - * - * - the 'devmem' cdevs are destroyed before the virtual machine 'cdev' - */ - SLIST_FOREACH(dsc, &sc->devmem, link) { - KASSERT(dsc->cdev != NULL, ("devmem cdev already destroyed")); - destroy_dev(dsc->cdev); - devmem_destroy(dsc); - } - destroy_dev(cdev); - vmmdev_destroy(sc); - error = 0; - -out: - free(buf, M_VMMDEV); - return (error); -} -SYSCTL_PROC(_hw_vmm, OID_AUTO, destroy, - CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE, - NULL, 0, sysctl_vmm_destroy, "A", - NULL); - -static struct cdevsw vmmdevsw = { - .d_name = "vmmdev", - .d_version = D_VERSION, - .d_ioctl = vmmdev_ioctl, - .d_mmap_single = vmmdev_mmap_single, - .d_read = vmmdev_rw, - .d_write = vmmdev_rw, -}; - -static int -sysctl_vmm_create(SYSCTL_HANDLER_ARGS) -{ - struct vm *vm; - struct cdev *cdev; - struct vmmdev_softc *sc, *sc2; - char *buf; - int error, buflen; - - error = vmm_priv_check(req->td->td_ucred); - if (error) - return (error); - - buflen = VM_MAX_NAMELEN + 1; - buf = malloc(buflen, M_VMMDEV, M_WAITOK | M_ZERO); - strlcpy(buf, "beavis", buflen); - error = sysctl_handle_string(oidp, buf, buflen, req); - if (error != 0 || req->newptr == NULL) - goto out; - - mtx_lock(&vmmdev_mtx); - sc = vmmdev_lookup(buf); - mtx_unlock(&vmmdev_mtx); - if (sc != NULL) { - error = EEXIST; - goto out; - } - - error = vm_create(buf, &vm); - if (error != 0) - goto out; - - sc = malloc(sizeof(struct vmmdev_softc), M_VMMDEV, M_WAITOK | M_ZERO); - sc->ucred = crhold(curthread->td_ucred); - sc->vm = vm; - SLIST_INIT(&sc->devmem); - - /* - * Lookup the name again just in case somebody sneaked in when we - * dropped the lock. - */ - mtx_lock(&vmmdev_mtx); - sc2 = vmmdev_lookup(buf); - if (sc2 == NULL) { - SLIST_INSERT_HEAD(&head, sc, link); - sc->flags |= VSC_LINKED; - } - mtx_unlock(&vmmdev_mtx); - - if (sc2 != NULL) { - vmmdev_destroy(sc); - error = EEXIST; - goto out; - } - - error = make_dev_p(MAKEDEV_CHECKNAME, &cdev, &vmmdevsw, sc->ucred, - UID_ROOT, GID_WHEEL, 0600, "vmm/%s", buf); - if (error != 0) { - vmmdev_destroy(sc); - goto out; - } - - mtx_lock(&vmmdev_mtx); - sc->cdev = cdev; - sc->cdev->si_drv1 = sc; - mtx_unlock(&vmmdev_mtx); - -out: - free(buf, M_VMMDEV); - return (error); -} -SYSCTL_PROC(_hw_vmm, OID_AUTO, create, - CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE, - NULL, 0, sysctl_vmm_create, "A", - NULL); - -void -vmmdev_init(void) -{ - pr_allow_flag = prison_add_allow(NULL, "vmm", NULL, - "Allow use of vmm in a jail."); -} - -int -vmmdev_cleanup(void) -{ - int error; - - if (SLIST_EMPTY(&head)) - error = 0; - else - error = EBUSY; - - return (error); -} - -static int -devmem_mmap_single(struct cdev *cdev, vm_ooffset_t *offset, vm_size_t len, - struct vm_object **objp, int nprot) -{ - struct devmem_softc *dsc; - vm_ooffset_t first, last; - size_t seglen; - int error; - bool sysmem; - - dsc = cdev->si_drv1; - if (dsc == NULL) { - /* 'cdev' has been created but is not ready for use */ - return (ENXIO); - } - - first = *offset; - last = *offset + len; - if ((nprot & PROT_EXEC) || first < 0 || first >= last) - return (EINVAL); - - vm_slock_memsegs(dsc->sc->vm); - - error = vm_get_memseg(dsc->sc->vm, dsc->segid, &seglen, &sysmem, objp); - KASSERT(error == 0 && !sysmem && *objp != NULL, - ("%s: invalid devmem segment %d", __func__, dsc->segid)); - - if (seglen >= last) - vm_object_reference(*objp); - else - error = EINVAL; - - vm_unlock_memsegs(dsc->sc->vm); - return (error); -} - -static struct cdevsw devmemsw = { - .d_name = "devmem", - .d_version = D_VERSION, - .d_mmap_single = devmem_mmap_single, -}; - -static int -devmem_create_cdev(const char *vmname, int segid, char *devname) -{ - struct devmem_softc *dsc; - struct vmmdev_softc *sc; - struct cdev *cdev; - int error; - - error = make_dev_p(MAKEDEV_CHECKNAME, &cdev, &devmemsw, NULL, - UID_ROOT, GID_WHEEL, 0600, "vmm.io/%s.%s", vmname, devname); - if (error) - return (error); - - dsc = malloc(sizeof(struct devmem_softc), M_VMMDEV, M_WAITOK | M_ZERO); - - mtx_lock(&vmmdev_mtx); - sc = vmmdev_lookup(vmname); - KASSERT(sc != NULL, ("%s: vm %s softc not found", __func__, vmname)); - if (sc->cdev == NULL) { - /* virtual machine is being created or destroyed */ - mtx_unlock(&vmmdev_mtx); - free(dsc, M_VMMDEV); - destroy_dev_sched_cb(cdev, NULL, 0); - return (ENODEV); - } - - dsc->segid = segid; - dsc->name = devname; - dsc->cdev = cdev; - dsc->sc = sc; - SLIST_INSERT_HEAD(&sc->devmem, dsc, link); - mtx_unlock(&vmmdev_mtx); - - /* The 'cdev' is ready for use after 'si_drv1' is initialized */ - cdev->si_drv1 = dsc; - return (0); -} - -static void -devmem_destroy(void *arg) -{ - struct devmem_softc *dsc = arg; - - KASSERT(dsc->cdev, ("%s: devmem cdev already destroyed", __func__)); - dsc->cdev = NULL; - dsc->sc = NULL; -} diff --git a/sys/amd64/vmm/vmm_dev_machdep.c b/sys/amd64/vmm/vmm_dev_machdep.c new file mode 100644 index 000000000000..792807568212 --- /dev/null +++ b/sys/amd64/vmm/vmm_dev_machdep.c @@ -0,0 +1,526 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2011 NetApp, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "opt_bhyve_snapshot.h" + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include +#include +#include +#include +#include + +#include +#include + +#include "vmm_lapic.h" +#include "vmm_mem.h" +#include "io/ppt.h" +#include "io/vatpic.h" +#include "io/vioapic.h" +#include "io/vhpet.h" +#include "io/vrtc.h" + +#ifdef COMPAT_FREEBSD13 +struct vm_stats_old { + int cpuid; /* in */ + int num_entries; /* out */ + struct timeval tv; + uint64_t statbuf[MAX_VM_STATS]; +}; + +#define VM_STATS_OLD _IOWR('v', IOCNUM_VM_STATS, struct vm_stats_old) + +struct vm_snapshot_meta_old { + void *ctx; /* unused */ + void *dev_data; + const char *dev_name; /* identify userspace devices */ + enum snapshot_req dev_req; /* identify kernel structs */ + + struct vm_snapshot_buffer buffer; + + enum vm_snapshot_op op; +}; + +#define VM_SNAPSHOT_REQ_OLD \ + _IOWR('v', IOCNUM_SNAPSHOT_REQ, struct vm_snapshot_meta_old) + +struct vm_exit_ipi_13 { + uint32_t mode; + uint8_t vector; + __BITSET_DEFINE(, 256) dmask; +}; + +struct vm_exit_13 { + uint32_t exitcode; + int32_t inst_length; + uint64_t rip; + uint64_t u[120 / sizeof(uint64_t)]; +}; + +struct vm_run_13 { + int cpuid; + struct vm_exit_13 vm_exit; +}; + +#define VM_RUN_13 \ + _IOWR('v', IOCNUM_RUN, struct vm_run_13) + +#endif /* COMPAT_FREEBSD13 */ + +const struct vmmdev_ioctl vmmdev_machdep_ioctls[] = { + VMMDEV_IOCTL(VM_RUN, VMMDEV_IOCTL_LOCK_ONE_VCPU), +#ifdef COMPAT_FREEBSD13 + VMMDEV_IOCTL(VM_RUN_13, VMMDEV_IOCTL_LOCK_ONE_VCPU), +#endif + VMMDEV_IOCTL(VM_GET_SEGMENT_DESCRIPTOR, VMMDEV_IOCTL_LOCK_ONE_VCPU), + VMMDEV_IOCTL(VM_SET_SEGMENT_DESCRIPTOR, VMMDEV_IOCTL_LOCK_ONE_VCPU), + VMMDEV_IOCTL(VM_INJECT_EXCEPTION, VMMDEV_IOCTL_LOCK_ONE_VCPU), + VMMDEV_IOCTL(VM_SET_X2APIC_STATE, VMMDEV_IOCTL_LOCK_ONE_VCPU), + VMMDEV_IOCTL(VM_GLA2GPA, VMMDEV_IOCTL_LOCK_ONE_VCPU), + VMMDEV_IOCTL(VM_GLA2GPA_NOFAULT, VMMDEV_IOCTL_LOCK_ONE_VCPU), + VMMDEV_IOCTL(VM_SET_INTINFO, VMMDEV_IOCTL_LOCK_ONE_VCPU), + VMMDEV_IOCTL(VM_GET_INTINFO, VMMDEV_IOCTL_LOCK_ONE_VCPU), + VMMDEV_IOCTL(VM_RESTART_INSTRUCTION, VMMDEV_IOCTL_LOCK_ONE_VCPU), + VMMDEV_IOCTL(VM_GET_KERNEMU_DEV, VMMDEV_IOCTL_LOCK_ONE_VCPU), + VMMDEV_IOCTL(VM_SET_KERNEMU_DEV, VMMDEV_IOCTL_LOCK_ONE_VCPU), + + VMMDEV_IOCTL(VM_BIND_PPTDEV, + VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS), + VMMDEV_IOCTL(VM_UNBIND_PPTDEV, + VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS), + + VMMDEV_IOCTL(VM_MAP_PPTDEV_MMIO, VMMDEV_IOCTL_LOCK_ALL_VCPUS), + VMMDEV_IOCTL(VM_UNMAP_PPTDEV_MMIO, VMMDEV_IOCTL_LOCK_ALL_VCPUS), +#ifdef BHYVE_SNAPSHOT +#ifdef COMPAT_FREEBSD13 + VMMDEV_IOCTL(VM_SNAPSHOT_REQ_OLD, VMMDEV_IOCTL_LOCK_ALL_VCPUS), +#endif + VMMDEV_IOCTL(VM_SNAPSHOT_REQ, VMMDEV_IOCTL_LOCK_ALL_VCPUS), + VMMDEV_IOCTL(VM_RESTORE_TIME, VMMDEV_IOCTL_LOCK_ALL_VCPUS), +#endif + +#ifdef COMPAT_FREEBSD13 + VMMDEV_IOCTL(VM_STATS_OLD, VMMDEV_IOCTL_LOCK_ONE_VCPU), +#endif + VMMDEV_IOCTL(VM_INJECT_NMI, VMMDEV_IOCTL_LOCK_ONE_VCPU), + VMMDEV_IOCTL(VM_LAPIC_IRQ, VMMDEV_IOCTL_LOCK_ONE_VCPU), + VMMDEV_IOCTL(VM_GET_X2APIC_STATE, VMMDEV_IOCTL_LOCK_ONE_VCPU), + + VMMDEV_IOCTL(VM_LAPIC_LOCAL_IRQ, VMMDEV_IOCTL_MAYBE_ALLOC_VCPU), + + VMMDEV_IOCTL(VM_PPTDEV_MSI, 0), + VMMDEV_IOCTL(VM_PPTDEV_MSIX, 0), + VMMDEV_IOCTL(VM_PPTDEV_DISABLE_MSIX, 0), + VMMDEV_IOCTL(VM_LAPIC_MSI, 0), + VMMDEV_IOCTL(VM_IOAPIC_ASSERT_IRQ, 0), + VMMDEV_IOCTL(VM_IOAPIC_DEASSERT_IRQ, 0), + VMMDEV_IOCTL(VM_IOAPIC_PULSE_IRQ, 0), + VMMDEV_IOCTL(VM_IOAPIC_PINCOUNT, 0), + VMMDEV_IOCTL(VM_ISA_ASSERT_IRQ, 0), + VMMDEV_IOCTL(VM_ISA_DEASSERT_IRQ, 0), + VMMDEV_IOCTL(VM_ISA_PULSE_IRQ, 0), + VMMDEV_IOCTL(VM_ISA_SET_IRQ_TRIGGER, 0), + VMMDEV_IOCTL(VM_GET_GPA_PMAP, 0), + VMMDEV_IOCTL(VM_GET_HPET_CAPABILITIES, 0), + VMMDEV_IOCTL(VM_RTC_READ, 0), + VMMDEV_IOCTL(VM_RTC_WRITE, 0), + VMMDEV_IOCTL(VM_RTC_GETTIME, 0), + VMMDEV_IOCTL(VM_RTC_SETTIME, 0), +}; +const size_t vmmdev_machdep_ioctl_count = nitems(vmmdev_machdep_ioctls); + +int +vmmdev_machdep_ioctl(struct vm *vm, struct vcpu *vcpu, u_long cmd, caddr_t data, + int fflag, struct thread *td) +{ + struct vm_seg_desc *vmsegdesc; + struct vm_run *vmrun; +#ifdef COMPAT_FREEBSD13 + struct vm_run_13 *vmrun_13; +#endif + struct vm_exception *vmexc; + struct vm_lapic_irq *vmirq; + struct vm_lapic_msi *vmmsi; + struct vm_ioapic_irq *ioapic_irq; + struct vm_isa_irq *isa_irq; + struct vm_isa_irq_trigger *isa_irq_trigger; + struct vm_pptdev *pptdev; + struct vm_pptdev_mmio *pptmmio; + struct vm_pptdev_msi *pptmsi; + struct vm_pptdev_msix *pptmsix; + struct vm_x2apic *x2apic; + struct vm_gpa_pte *gpapte; + struct vm_gla2gpa *gg; + struct vm_intinfo *vmii; + struct vm_rtc_time *rtctime; + struct vm_rtc_data *rtcdata; + struct vm_readwrite_kernemu_device *kernemu; +#ifdef BHYVE_SNAPSHOT + struct vm_snapshot_meta *snapshot_meta; +#ifdef COMPAT_FREEBSD13 + struct vm_snapshot_meta_old *snapshot_old; +#endif +#endif + int error; + + error = 0; + switch (cmd) { + case VM_RUN: { + struct vm_exit *vme; + + vmrun = (struct vm_run *)data; + vme = vm_exitinfo(vcpu); + + error = vm_run(vcpu); + if (error != 0) + break; + + error = copyout(vme, vmrun->vm_exit, sizeof(*vme)); + if (error != 0) + break; + if (vme->exitcode == VM_EXITCODE_IPI) { + error = copyout(vm_exitinfo_cpuset(vcpu), + vmrun->cpuset, + min(vmrun->cpusetsize, sizeof(cpuset_t))); + if (error != 0) + break; + if (sizeof(cpuset_t) < vmrun->cpusetsize) { + uint8_t *p; + + p = (uint8_t *)vmrun->cpuset + + sizeof(cpuset_t); + while (p < (uint8_t *)vmrun->cpuset + + vmrun->cpusetsize) { + if (subyte(p++, 0) != 0) { + error = EFAULT; + break; + } + } + } + } + break; + } +#ifdef COMPAT_FREEBSD13 + case VM_RUN_13: { + struct vm_exit *vme; + struct vm_exit_13 *vme_13; + + vmrun_13 = (struct vm_run_13 *)data; + vme_13 = &vmrun_13->vm_exit; + vme = vm_exitinfo(vcpu); + + error = vm_run(vcpu); + if (error == 0) { + vme_13->exitcode = vme->exitcode; + vme_13->inst_length = vme->inst_length; + vme_13->rip = vme->rip; + memcpy(vme_13->u, &vme->u, sizeof(vme_13->u)); + if (vme->exitcode == VM_EXITCODE_IPI) { + struct vm_exit_ipi_13 *ipi; + cpuset_t *dmask; + int cpu; + + dmask = vm_exitinfo_cpuset(vcpu); + ipi = (struct vm_exit_ipi_13 *)&vme_13->u[0]; + BIT_ZERO(256, &ipi->dmask); + CPU_FOREACH_ISSET(cpu, dmask) { + if (cpu >= 256) + break; + BIT_SET(256, cpu, &ipi->dmask); + } + } + } + break; + } + case VM_STATS_OLD: { + struct vm_stats_old *vmstats_old; + + vmstats_old = (struct vm_stats_old *)data; + getmicrotime(&vmstats_old->tv); + error = vmm_stat_copy(vcpu, 0, nitems(vmstats_old->statbuf), + &vmstats_old->num_entries, vmstats_old->statbuf); + break; + } +#endif + case VM_PPTDEV_MSI: + pptmsi = (struct vm_pptdev_msi *)data; + error = ppt_setup_msi(vm, + pptmsi->bus, pptmsi->slot, pptmsi->func, + pptmsi->addr, pptmsi->msg, + pptmsi->numvec); + break; + case VM_PPTDEV_MSIX: + pptmsix = (struct vm_pptdev_msix *)data; + error = ppt_setup_msix(vm, + pptmsix->bus, pptmsix->slot, + pptmsix->func, pptmsix->idx, + pptmsix->addr, pptmsix->msg, + pptmsix->vector_control); + break; + case VM_PPTDEV_DISABLE_MSIX: + pptdev = (struct vm_pptdev *)data; + error = ppt_disable_msix(vm, pptdev->bus, pptdev->slot, + pptdev->func); + break; + case VM_MAP_PPTDEV_MMIO: + pptmmio = (struct vm_pptdev_mmio *)data; + error = ppt_map_mmio(vm, pptmmio->bus, pptmmio->slot, + pptmmio->func, pptmmio->gpa, pptmmio->len, + pptmmio->hpa); + break; + case VM_UNMAP_PPTDEV_MMIO: + pptmmio = (struct vm_pptdev_mmio *)data; + error = ppt_unmap_mmio(vm, pptmmio->bus, pptmmio->slot, + pptmmio->func, pptmmio->gpa, pptmmio->len); + break; + case VM_BIND_PPTDEV: + pptdev = (struct vm_pptdev *)data; + error = vm_assign_pptdev(vm, pptdev->bus, pptdev->slot, + pptdev->func); + break; + case VM_UNBIND_PPTDEV: + pptdev = (struct vm_pptdev *)data; + error = vm_unassign_pptdev(vm, pptdev->bus, pptdev->slot, + pptdev->func); + break; + case VM_INJECT_EXCEPTION: + vmexc = (struct vm_exception *)data; + error = vm_inject_exception(vcpu, + vmexc->vector, vmexc->error_code_valid, vmexc->error_code, + vmexc->restart_instruction); + break; + case VM_INJECT_NMI: + error = vm_inject_nmi(vcpu); + break; + case VM_LAPIC_IRQ: + vmirq = (struct vm_lapic_irq *)data; + error = lapic_intr_edge(vcpu, vmirq->vector); + break; + case VM_LAPIC_LOCAL_IRQ: + vmirq = (struct vm_lapic_irq *)data; + error = lapic_set_local_intr(vm, vcpu, vmirq->vector); + break; + case VM_LAPIC_MSI: + vmmsi = (struct vm_lapic_msi *)data; + error = lapic_intr_msi(vm, vmmsi->addr, vmmsi->msg); + break; + case VM_IOAPIC_ASSERT_IRQ: + ioapic_irq = (struct vm_ioapic_irq *)data; + error = vioapic_assert_irq(vm, ioapic_irq->irq); + break; + case VM_IOAPIC_DEASSERT_IRQ: + ioapic_irq = (struct vm_ioapic_irq *)data; + error = vioapic_deassert_irq(vm, ioapic_irq->irq); + break; + case VM_IOAPIC_PULSE_IRQ: + ioapic_irq = (struct vm_ioapic_irq *)data; + error = vioapic_pulse_irq(vm, ioapic_irq->irq); + break; + case VM_IOAPIC_PINCOUNT: + *(int *)data = vioapic_pincount(vm); + break; + case VM_SET_KERNEMU_DEV: + case VM_GET_KERNEMU_DEV: { + mem_region_write_t mwrite; + mem_region_read_t mread; + int size; + bool arg; + + kernemu = (void *)data; + + if (kernemu->access_width > 0) + size = (1u << kernemu->access_width); + else + size = 1; + + if (kernemu->gpa >= DEFAULT_APIC_BASE && + kernemu->gpa < DEFAULT_APIC_BASE + PAGE_SIZE) { + mread = lapic_mmio_read; + mwrite = lapic_mmio_write; + } else if (kernemu->gpa >= VIOAPIC_BASE && + kernemu->gpa < VIOAPIC_BASE + VIOAPIC_SIZE) { + mread = vioapic_mmio_read; + mwrite = vioapic_mmio_write; + } else if (kernemu->gpa >= VHPET_BASE && + kernemu->gpa < VHPET_BASE + VHPET_SIZE) { + mread = vhpet_mmio_read; + mwrite = vhpet_mmio_write; + } else { + error = EINVAL; + break; + } + + if (cmd == VM_SET_KERNEMU_DEV) + error = mwrite(vcpu, kernemu->gpa, + kernemu->value, size, &arg); + else + error = mread(vcpu, kernemu->gpa, + &kernemu->value, size, &arg); + break; + } + case VM_ISA_ASSERT_IRQ: + isa_irq = (struct vm_isa_irq *)data; + error = vatpic_assert_irq(vm, isa_irq->atpic_irq); + if (error == 0 && isa_irq->ioapic_irq != -1) + error = vioapic_assert_irq(vm, isa_irq->ioapic_irq); + break; + case VM_ISA_DEASSERT_IRQ: + isa_irq = (struct vm_isa_irq *)data; + error = vatpic_deassert_irq(vm, isa_irq->atpic_irq); + if (error == 0 && isa_irq->ioapic_irq != -1) + error = vioapic_deassert_irq(vm, isa_irq->ioapic_irq); + break; + case VM_ISA_PULSE_IRQ: + isa_irq = (struct vm_isa_irq *)data; + error = vatpic_pulse_irq(vm, isa_irq->atpic_irq); + if (error == 0 && isa_irq->ioapic_irq != -1) + error = vioapic_pulse_irq(vm, isa_irq->ioapic_irq); + break; + case VM_ISA_SET_IRQ_TRIGGER: + isa_irq_trigger = (struct vm_isa_irq_trigger *)data; + error = vatpic_set_irq_trigger(vm, + isa_irq_trigger->atpic_irq, isa_irq_trigger->trigger); + break; + case VM_SET_SEGMENT_DESCRIPTOR: + vmsegdesc = (struct vm_seg_desc *)data; + error = vm_set_seg_desc(vcpu, + vmsegdesc->regnum, + &vmsegdesc->desc); + break; + case VM_GET_SEGMENT_DESCRIPTOR: + vmsegdesc = (struct vm_seg_desc *)data; + error = vm_get_seg_desc(vcpu, + vmsegdesc->regnum, + &vmsegdesc->desc); + break; + case VM_SET_X2APIC_STATE: + x2apic = (struct vm_x2apic *)data; + error = vm_set_x2apic_state(vcpu, x2apic->state); + break; + case VM_GET_X2APIC_STATE: + x2apic = (struct vm_x2apic *)data; + error = vm_get_x2apic_state(vcpu, &x2apic->state); + break; + case VM_GET_GPA_PMAP: + gpapte = (struct vm_gpa_pte *)data; + pmap_get_mapping(vmspace_pmap(vm_get_vmspace(vm)), + gpapte->gpa, gpapte->pte, &gpapte->ptenum); + error = 0; + break; + case VM_GET_HPET_CAPABILITIES: + error = vhpet_getcap((struct vm_hpet_cap *)data); + break; + case VM_GLA2GPA: { + CTASSERT(PROT_READ == VM_PROT_READ); + CTASSERT(PROT_WRITE == VM_PROT_WRITE); + CTASSERT(PROT_EXEC == VM_PROT_EXECUTE); + gg = (struct vm_gla2gpa *)data; + error = vm_gla2gpa(vcpu, &gg->paging, gg->gla, + gg->prot, &gg->gpa, &gg->fault); + KASSERT(error == 0 || error == EFAULT, + ("%s: vm_gla2gpa unknown error %d", __func__, error)); + break; + } + case VM_GLA2GPA_NOFAULT: + gg = (struct vm_gla2gpa *)data; + error = vm_gla2gpa_nofault(vcpu, &gg->paging, gg->gla, + gg->prot, &gg->gpa, &gg->fault); + KASSERT(error == 0 || error == EFAULT, + ("%s: vm_gla2gpa unknown error %d", __func__, error)); + break; + case VM_SET_INTINFO: + vmii = (struct vm_intinfo *)data; + error = vm_exit_intinfo(vcpu, vmii->info1); + break; + case VM_GET_INTINFO: + vmii = (struct vm_intinfo *)data; + error = vm_get_intinfo(vcpu, &vmii->info1, &vmii->info2); + break; + case VM_RTC_WRITE: + rtcdata = (struct vm_rtc_data *)data; + error = vrtc_nvram_write(vm, rtcdata->offset, + rtcdata->value); + break; + case VM_RTC_READ: + rtcdata = (struct vm_rtc_data *)data; + error = vrtc_nvram_read(vm, rtcdata->offset, + &rtcdata->value); + break; + case VM_RTC_SETTIME: + rtctime = (struct vm_rtc_time *)data; + error = vrtc_set_time(vm, rtctime->secs); + break; + case VM_RTC_GETTIME: + error = 0; + rtctime = (struct vm_rtc_time *)data; + rtctime->secs = vrtc_get_time(vm); + break; + case VM_RESTART_INSTRUCTION: + error = vm_restart_instruction(vcpu); + break; +#ifdef BHYVE_SNAPSHOT + case VM_SNAPSHOT_REQ: + snapshot_meta = (struct vm_snapshot_meta *)data; + error = vm_snapshot_req(vm, snapshot_meta); + break; +#ifdef COMPAT_FREEBSD13 + case VM_SNAPSHOT_REQ_OLD: + /* + * The old structure just has an additional pointer at + * the start that is ignored. + */ + snapshot_old = (struct vm_snapshot_meta_old *)data; + snapshot_meta = + (struct vm_snapshot_meta *)&snapshot_old->dev_data; + error = vm_snapshot_req(vm, snapshot_meta); + break; +#endif + case VM_RESTORE_TIME: + error = vm_restore_time(vm); + break; +#endif + default: + error = ENOTTY; + break; + } + + return (error); +} diff --git a/sys/arm64/include/vmm_dev.h b/sys/arm64/include/vmm_dev.h index 9e229665a71e..08c237d31046 100644 --- a/sys/arm64/include/vmm_dev.h +++ b/sys/arm64/include/vmm_dev.h @@ -1,272 +1,267 @@ /* * Copyright (C) 2015 Mihai Carabas * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #ifndef _VMM_DEV_H_ #define _VMM_DEV_H_ -#ifdef _KERNEL -void vmmdev_init(void); -int vmmdev_cleanup(void); -#endif - struct vm_memmap { vm_paddr_t gpa; int segid; /* memory segment */ vm_ooffset_t segoff; /* offset into memory segment */ size_t len; /* mmap length */ int prot; /* RWX */ int flags; }; #define VM_MEMMAP_F_WIRED 0x01 struct vm_munmap { vm_paddr_t gpa; size_t len; }; #define VM_MEMSEG_NAME(m) ((m)->name[0] != '\0' ? (m)->name : NULL) struct vm_memseg { int segid; size_t len; char name[VM_MAX_SUFFIXLEN + 1]; }; struct vm_register { int cpuid; int regnum; /* enum vm_reg_name */ uint64_t regval; }; struct vm_register_set { int cpuid; unsigned int count; const int *regnums; /* enum vm_reg_name */ uint64_t *regvals; }; struct vm_run { int cpuid; cpuset_t *cpuset; /* CPU set storage */ size_t cpusetsize; struct vm_exit *vm_exit; }; struct vm_exception { int cpuid; uint64_t esr; uint64_t far; }; struct vm_msi { uint64_t msg; uint64_t addr; int bus; int slot; int func; }; struct vm_capability { int cpuid; enum vm_cap_type captype; int capval; int allcpus; }; #define MAX_VM_STATS 64 struct vm_stats { int cpuid; /* in */ int index; /* in */ int num_entries; /* out */ struct timeval tv; uint64_t statbuf[MAX_VM_STATS]; }; struct vm_stat_desc { int index; /* in */ char desc[128]; /* out */ }; struct vm_suspend { enum vm_suspend_how how; }; struct vm_gla2gpa { int vcpuid; /* inputs */ int prot; /* PROT_READ or PROT_WRITE */ uint64_t gla; struct vm_guest_paging paging; int fault; /* outputs */ uint64_t gpa; }; struct vm_activate_cpu { int vcpuid; }; struct vm_cpuset { int which; int cpusetsize; cpuset_t *cpus; }; #define VM_ACTIVE_CPUS 0 #define VM_SUSPENDED_CPUS 1 #define VM_DEBUG_CPUS 2 struct vm_vgic_version { u_int version; u_int flags; }; struct vm_vgic_descr { struct vm_vgic_version ver; union { struct { uint64_t dist_start; uint64_t dist_size; uint64_t redist_start; uint64_t redist_size; } v3_regs; }; }; struct vm_irq { uint32_t irq; }; struct vm_cpu_topology { uint16_t sockets; uint16_t cores; uint16_t threads; uint16_t maxcpus; }; enum { /* general routines */ IOCNUM_ABIVERS = 0, IOCNUM_RUN = 1, IOCNUM_SET_CAPABILITY = 2, IOCNUM_GET_CAPABILITY = 3, IOCNUM_SUSPEND = 4, IOCNUM_REINIT = 5, /* memory apis */ IOCNUM_GET_GPA_PMAP = 12, IOCNUM_GLA2GPA_NOFAULT = 13, IOCNUM_ALLOC_MEMSEG = 14, IOCNUM_GET_MEMSEG = 15, IOCNUM_MMAP_MEMSEG = 16, IOCNUM_MMAP_GETNEXT = 17, IOCNUM_MUNMAP_MEMSEG = 18, /* register/state accessors */ IOCNUM_SET_REGISTER = 20, IOCNUM_GET_REGISTER = 21, IOCNUM_SET_REGISTER_SET = 24, IOCNUM_GET_REGISTER_SET = 25, /* statistics */ IOCNUM_VM_STATS = 50, IOCNUM_VM_STAT_DESC = 51, /* CPU Topology */ IOCNUM_SET_TOPOLOGY = 63, IOCNUM_GET_TOPOLOGY = 64, /* interrupt injection */ IOCNUM_ASSERT_IRQ = 80, IOCNUM_DEASSERT_IRQ = 81, IOCNUM_RAISE_MSI = 82, IOCNUM_INJECT_EXCEPTION = 83, /* vm_cpuset */ IOCNUM_ACTIVATE_CPU = 90, IOCNUM_GET_CPUSET = 91, IOCNUM_SUSPEND_CPU = 92, IOCNUM_RESUME_CPU = 93, /* vm_attach_vgic */ IOCNUM_GET_VGIC_VERSION = 110, IOCNUM_ATTACH_VGIC = 111, }; #define VM_RUN \ _IOWR('v', IOCNUM_RUN, struct vm_run) #define VM_SUSPEND \ _IOW('v', IOCNUM_SUSPEND, struct vm_suspend) #define VM_REINIT \ _IO('v', IOCNUM_REINIT) #define VM_ALLOC_MEMSEG \ _IOW('v', IOCNUM_ALLOC_MEMSEG, struct vm_memseg) #define VM_GET_MEMSEG \ _IOWR('v', IOCNUM_GET_MEMSEG, struct vm_memseg) #define VM_MMAP_MEMSEG \ _IOW('v', IOCNUM_MMAP_MEMSEG, struct vm_memmap) #define VM_MMAP_GETNEXT \ _IOWR('v', IOCNUM_MMAP_GETNEXT, struct vm_memmap) #define VM_MUNMAP_MEMSEG \ _IOW('v', IOCNUM_MUNMAP_MEMSEG, struct vm_munmap) #define VM_SET_REGISTER \ _IOW('v', IOCNUM_SET_REGISTER, struct vm_register) #define VM_GET_REGISTER \ _IOWR('v', IOCNUM_GET_REGISTER, struct vm_register) #define VM_SET_REGISTER_SET \ _IOW('v', IOCNUM_SET_REGISTER_SET, struct vm_register_set) #define VM_GET_REGISTER_SET \ _IOWR('v', IOCNUM_GET_REGISTER_SET, struct vm_register_set) #define VM_SET_CAPABILITY \ _IOW('v', IOCNUM_SET_CAPABILITY, struct vm_capability) #define VM_GET_CAPABILITY \ _IOWR('v', IOCNUM_GET_CAPABILITY, struct vm_capability) #define VM_STATS \ _IOWR('v', IOCNUM_VM_STATS, struct vm_stats) #define VM_STAT_DESC \ _IOWR('v', IOCNUM_VM_STAT_DESC, struct vm_stat_desc) #define VM_ASSERT_IRQ \ _IOW('v', IOCNUM_ASSERT_IRQ, struct vm_irq) #define VM_DEASSERT_IRQ \ _IOW('v', IOCNUM_DEASSERT_IRQ, struct vm_irq) #define VM_RAISE_MSI \ _IOW('v', IOCNUM_RAISE_MSI, struct vm_msi) #define VM_INJECT_EXCEPTION \ _IOW('v', IOCNUM_INJECT_EXCEPTION, struct vm_exception) #define VM_SET_TOPOLOGY \ _IOW('v', IOCNUM_SET_TOPOLOGY, struct vm_cpu_topology) #define VM_GET_TOPOLOGY \ _IOR('v', IOCNUM_GET_TOPOLOGY, struct vm_cpu_topology) #define VM_GLA2GPA_NOFAULT \ _IOWR('v', IOCNUM_GLA2GPA_NOFAULT, struct vm_gla2gpa) #define VM_ACTIVATE_CPU \ _IOW('v', IOCNUM_ACTIVATE_CPU, struct vm_activate_cpu) #define VM_GET_CPUS \ _IOW('v', IOCNUM_GET_CPUSET, struct vm_cpuset) #define VM_SUSPEND_CPU \ _IOW('v', IOCNUM_SUSPEND_CPU, struct vm_activate_cpu) #define VM_RESUME_CPU \ _IOW('v', IOCNUM_RESUME_CPU, struct vm_activate_cpu) #define VM_GET_VGIC_VERSION \ _IOR('v', IOCNUM_GET_VGIC_VERSION, struct vm_vgic_version) #define VM_ATTACH_VGIC \ _IOW('v', IOCNUM_ATTACH_VGIC, struct vm_vgic_descr) #endif diff --git a/sys/arm64/vmm/vmm.c b/sys/arm64/vmm/vmm.c index 425e44f81500..fe5f43495262 100644 --- a/sys/arm64/vmm/vmm.c +++ b/sys/arm64/vmm/vmm.c @@ -1,1882 +1,1882 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (C) 2015 Mihai Carabas * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include -#include #include #include +#include #include #include #include "arm64.h" #include "mmu.h" #include "io/vgic.h" #include "io/vtimer.h" struct vcpu { int flags; enum vcpu_state state; struct mtx mtx; int hostcpu; /* host cpuid this vcpu last ran on */ int vcpuid; void *stats; struct vm_exit exitinfo; uint64_t nextpc; /* (x) next instruction to execute */ struct vm *vm; /* (o) */ void *cookie; /* (i) cpu-specific data */ struct vfpstate *guestfpu; /* (a,i) guest fpu state */ }; #define vcpu_lock_initialized(v) mtx_initialized(&((v)->mtx)) #define vcpu_lock_init(v) mtx_init(&((v)->mtx), "vcpu lock", 0, MTX_SPIN) #define vcpu_lock_destroy(v) mtx_destroy(&((v)->mtx)) #define vcpu_lock(v) mtx_lock_spin(&((v)->mtx)) #define vcpu_unlock(v) mtx_unlock_spin(&((v)->mtx)) #define vcpu_assert_locked(v) mtx_assert(&((v)->mtx), MA_OWNED) struct mem_seg { uint64_t gpa; size_t len; bool wired; bool sysmem; vm_object_t object; }; #define VM_MAX_MEMSEGS 3 struct mem_map { vm_paddr_t gpa; size_t len; vm_ooffset_t segoff; int segid; int prot; int flags; }; #define VM_MAX_MEMMAPS 4 struct vmm_mmio_region { uint64_t start; uint64_t end; mem_region_read_t read; mem_region_write_t write; }; #define VM_MAX_MMIO_REGIONS 4 struct vmm_special_reg { uint32_t esr_iss; uint32_t esr_mask; reg_read_t reg_read; reg_write_t reg_write; void *arg; }; #define VM_MAX_SPECIAL_REGS 16 /* * Initialization: * (o) initialized the first time the VM is created * (i) initialized when VM is created and when it is reinitialized * (x) initialized before use */ struct vm { void *cookie; /* (i) cpu-specific data */ volatile cpuset_t active_cpus; /* (i) active vcpus */ volatile cpuset_t debug_cpus; /* (i) vcpus stopped for debug */ int suspend; /* (i) stop VM execution */ bool dying; /* (o) is dying */ volatile cpuset_t suspended_cpus; /* (i) suspended vcpus */ volatile cpuset_t halted_cpus; /* (x) cpus in a hard halt */ struct mem_map mem_maps[VM_MAX_MEMMAPS]; /* (i) guest address space */ struct mem_seg mem_segs[VM_MAX_MEMSEGS]; /* (o) guest memory regions */ struct vmspace *vmspace; /* (o) guest's address space */ char name[VM_MAX_NAMELEN]; /* (o) virtual machine name */ struct vcpu **vcpu; /* (i) guest vcpus */ struct vmm_mmio_region mmio_region[VM_MAX_MMIO_REGIONS]; /* (o) guest MMIO regions */ struct vmm_special_reg special_reg[VM_MAX_SPECIAL_REGS]; /* The following describe the vm cpu topology */ uint16_t sockets; /* (o) num of sockets */ uint16_t cores; /* (o) num of cores/socket */ uint16_t threads; /* (o) num of threads/core */ uint16_t maxcpus; /* (o) max pluggable cpus */ struct sx mem_segs_lock; /* (o) */ struct sx vcpus_init_lock; /* (o) */ }; static bool vmm_initialized = false; static int vm_handle_wfi(struct vcpu *vcpu, struct vm_exit *vme, bool *retu); static MALLOC_DEFINE(M_VMM, "vmm", "vmm"); /* statistics */ static VMM_STAT(VCPU_TOTAL_RUNTIME, "vcpu total runtime"); SYSCTL_NODE(_hw, OID_AUTO, vmm, CTLFLAG_RW, NULL, NULL); static int vmm_ipinum; SYSCTL_INT(_hw_vmm, OID_AUTO, ipinum, CTLFLAG_RD, &vmm_ipinum, 0, "IPI vector used for vcpu notifications"); struct vmm_regs { uint64_t id_aa64afr0; uint64_t id_aa64afr1; uint64_t id_aa64dfr0; uint64_t id_aa64dfr1; uint64_t id_aa64isar0; uint64_t id_aa64isar1; uint64_t id_aa64isar2; uint64_t id_aa64mmfr0; uint64_t id_aa64mmfr1; uint64_t id_aa64mmfr2; uint64_t id_aa64pfr0; uint64_t id_aa64pfr1; }; static const struct vmm_regs vmm_arch_regs_masks = { .id_aa64dfr0 = ID_AA64DFR0_CTX_CMPs_MASK | ID_AA64DFR0_WRPs_MASK | ID_AA64DFR0_BRPs_MASK | ID_AA64DFR0_PMUVer_3 | ID_AA64DFR0_DebugVer_8, .id_aa64isar0 = ID_AA64ISAR0_TLB_TLBIOSR | ID_AA64ISAR0_SHA3_IMPL | ID_AA64ISAR0_RDM_IMPL | ID_AA64ISAR0_Atomic_IMPL | ID_AA64ISAR0_CRC32_BASE | ID_AA64ISAR0_SHA2_512 | ID_AA64ISAR0_SHA1_BASE | ID_AA64ISAR0_AES_PMULL, .id_aa64mmfr0 = ID_AA64MMFR0_TGran4_IMPL | ID_AA64MMFR0_TGran64_IMPL | ID_AA64MMFR0_TGran16_IMPL | ID_AA64MMFR0_ASIDBits_16 | ID_AA64MMFR0_PARange_4P, .id_aa64mmfr1 = ID_AA64MMFR1_SpecSEI_IMPL | ID_AA64MMFR1_PAN_ATS1E1 | ID_AA64MMFR1_HAFDBS_AF, .id_aa64pfr0 = ID_AA64PFR0_GIC_CPUIF_NONE | ID_AA64PFR0_AdvSIMD_HP | ID_AA64PFR0_FP_HP | ID_AA64PFR0_EL3_64 | ID_AA64PFR0_EL2_64 | ID_AA64PFR0_EL1_64 | ID_AA64PFR0_EL0_64, }; /* Host registers masked by vmm_arch_regs_masks. */ static struct vmm_regs vmm_arch_regs; u_int vm_maxcpu; SYSCTL_UINT(_hw_vmm, OID_AUTO, maxcpu, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, &vm_maxcpu, 0, "Maximum number of vCPUs"); static void vm_free_memmap(struct vm *vm, int ident); static bool sysmem_mapping(struct vm *vm, struct mem_map *mm); static void vcpu_notify_event_locked(struct vcpu *vcpu); /* global statistics */ VMM_STAT(VMEXIT_COUNT, "total number of vm exits"); VMM_STAT(VMEXIT_UNKNOWN, "number of vmexits for the unknown exception"); VMM_STAT(VMEXIT_WFI, "number of times wfi was intercepted"); VMM_STAT(VMEXIT_WFE, "number of times wfe was intercepted"); VMM_STAT(VMEXIT_HVC, "number of times hvc was intercepted"); VMM_STAT(VMEXIT_MSR, "number of times msr/mrs was intercepted"); VMM_STAT(VMEXIT_DATA_ABORT, "number of vmexits for a data abort"); VMM_STAT(VMEXIT_INSN_ABORT, "number of vmexits for an instruction abort"); VMM_STAT(VMEXIT_UNHANDLED_SYNC, "number of vmexits for an unhandled synchronous exception"); VMM_STAT(VMEXIT_IRQ, "number of vmexits for an irq"); VMM_STAT(VMEXIT_FIQ, "number of vmexits for an interrupt"); VMM_STAT(VMEXIT_BRK, "number of vmexits for a breakpoint exception"); VMM_STAT(VMEXIT_SS, "number of vmexits for a single-step exception"); VMM_STAT(VMEXIT_UNHANDLED_EL2, "number of vmexits for an unhandled EL2 exception"); VMM_STAT(VMEXIT_UNHANDLED, "number of vmexits for an unhandled exception"); /* * Upper limit on vm_maxcpu. We could increase this to 28 bits, but this * is a safe value for now. */ #define VM_MAXCPU MIN(0xffff - 1, CPU_SETSIZE) static int vmm_regs_init(struct vmm_regs *regs, const struct vmm_regs *masks) { #define _FETCH_KERN_REG(reg, field) do { \ regs->field = vmm_arch_regs_masks.field; \ if (!get_kernel_reg_masked(reg, ®s->field, masks->field)) \ regs->field = 0; \ } while (0) _FETCH_KERN_REG(ID_AA64AFR0_EL1, id_aa64afr0); _FETCH_KERN_REG(ID_AA64AFR1_EL1, id_aa64afr1); _FETCH_KERN_REG(ID_AA64DFR0_EL1, id_aa64dfr0); _FETCH_KERN_REG(ID_AA64DFR1_EL1, id_aa64dfr1); _FETCH_KERN_REG(ID_AA64ISAR0_EL1, id_aa64isar0); _FETCH_KERN_REG(ID_AA64ISAR1_EL1, id_aa64isar1); _FETCH_KERN_REG(ID_AA64ISAR2_EL1, id_aa64isar2); _FETCH_KERN_REG(ID_AA64MMFR0_EL1, id_aa64mmfr0); _FETCH_KERN_REG(ID_AA64MMFR1_EL1, id_aa64mmfr1); _FETCH_KERN_REG(ID_AA64MMFR2_EL1, id_aa64mmfr2); _FETCH_KERN_REG(ID_AA64PFR0_EL1, id_aa64pfr0); _FETCH_KERN_REG(ID_AA64PFR1_EL1, id_aa64pfr1); #undef _FETCH_KERN_REG return (0); } static void vcpu_cleanup(struct vcpu *vcpu, bool destroy) { vmmops_vcpu_cleanup(vcpu->cookie); vcpu->cookie = NULL; if (destroy) { vmm_stat_free(vcpu->stats); fpu_save_area_free(vcpu->guestfpu); vcpu_lock_destroy(vcpu); } } static struct vcpu * vcpu_alloc(struct vm *vm, int vcpu_id) { struct vcpu *vcpu; KASSERT(vcpu_id >= 0 && vcpu_id < vm->maxcpus, ("vcpu_alloc: invalid vcpu %d", vcpu_id)); vcpu = malloc(sizeof(*vcpu), M_VMM, M_WAITOK | M_ZERO); vcpu_lock_init(vcpu); vcpu->state = VCPU_IDLE; vcpu->hostcpu = NOCPU; vcpu->vcpuid = vcpu_id; vcpu->vm = vm; vcpu->guestfpu = fpu_save_area_alloc(); vcpu->stats = vmm_stat_alloc(); return (vcpu); } static void vcpu_init(struct vcpu *vcpu) { vcpu->cookie = vmmops_vcpu_init(vcpu->vm->cookie, vcpu, vcpu->vcpuid); MPASS(vcpu->cookie != NULL); fpu_save_area_reset(vcpu->guestfpu); vmm_stat_init(vcpu->stats); } struct vm_exit * vm_exitinfo(struct vcpu *vcpu) { return (&vcpu->exitinfo); } static int vmm_init(void) { int error; vm_maxcpu = mp_ncpus; TUNABLE_INT_FETCH("hw.vmm.maxcpu", &vm_maxcpu); if (vm_maxcpu > VM_MAXCPU) { printf("vmm: vm_maxcpu clamped to %u\n", VM_MAXCPU); vm_maxcpu = VM_MAXCPU; } if (vm_maxcpu == 0) vm_maxcpu = 1; error = vmm_regs_init(&vmm_arch_regs, &vmm_arch_regs_masks); if (error != 0) return (error); return (vmmops_modinit(0)); } static int vmm_handler(module_t mod, int what, void *arg) { int error; switch (what) { case MOD_LOAD: /* TODO: if (vmm_is_hw_supported()) { */ vmmdev_init(); error = vmm_init(); if (error == 0) vmm_initialized = true; break; case MOD_UNLOAD: /* TODO: if (vmm_is_hw_supported()) { */ error = vmmdev_cleanup(); if (error == 0 && vmm_initialized) { error = vmmops_modcleanup(); if (error) vmm_initialized = false; } break; default: error = 0; break; } return (error); } static moduledata_t vmm_kmod = { "vmm", vmm_handler, NULL }; /* * vmm initialization has the following dependencies: * * - HYP initialization requires smp_rendezvous() and therefore must happen * after SMP is fully functional (after SI_SUB_SMP). */ DECLARE_MODULE(vmm, vmm_kmod, SI_SUB_SMP + 1, SI_ORDER_ANY); MODULE_VERSION(vmm, 1); static void vm_init(struct vm *vm, bool create) { int i; vm->cookie = vmmops_init(vm, vmspace_pmap(vm->vmspace)); MPASS(vm->cookie != NULL); CPU_ZERO(&vm->active_cpus); CPU_ZERO(&vm->debug_cpus); vm->suspend = 0; CPU_ZERO(&vm->suspended_cpus); memset(vm->mmio_region, 0, sizeof(vm->mmio_region)); memset(vm->special_reg, 0, sizeof(vm->special_reg)); if (!create) { for (i = 0; i < vm->maxcpus; i++) { if (vm->vcpu[i] != NULL) vcpu_init(vm->vcpu[i]); } } } void vm_disable_vcpu_creation(struct vm *vm) { sx_xlock(&vm->vcpus_init_lock); vm->dying = true; sx_xunlock(&vm->vcpus_init_lock); } struct vcpu * vm_alloc_vcpu(struct vm *vm, int vcpuid) { struct vcpu *vcpu; if (vcpuid < 0 || vcpuid >= vm_get_maxcpus(vm)) return (NULL); /* Some interrupt controllers may have a CPU limit */ if (vcpuid >= vgic_max_cpu_count(vm->cookie)) return (NULL); vcpu = atomic_load_ptr(&vm->vcpu[vcpuid]); if (__predict_true(vcpu != NULL)) return (vcpu); sx_xlock(&vm->vcpus_init_lock); vcpu = vm->vcpu[vcpuid]; if (vcpu == NULL && !vm->dying) { vcpu = vcpu_alloc(vm, vcpuid); vcpu_init(vcpu); /* * Ensure vCPU is fully created before updating pointer * to permit unlocked reads above. */ atomic_store_rel_ptr((uintptr_t *)&vm->vcpu[vcpuid], (uintptr_t)vcpu); } sx_xunlock(&vm->vcpus_init_lock); return (vcpu); } void vm_slock_vcpus(struct vm *vm) { sx_slock(&vm->vcpus_init_lock); } void vm_unlock_vcpus(struct vm *vm) { sx_unlock(&vm->vcpus_init_lock); } int vm_create(const char *name, struct vm **retvm) { struct vm *vm; struct vmspace *vmspace; /* * If vmm.ko could not be successfully initialized then don't attempt * to create the virtual machine. */ if (!vmm_initialized) return (ENXIO); if (name == NULL || strlen(name) >= VM_MAX_NAMELEN) return (EINVAL); vmspace = vmmops_vmspace_alloc(0, 1ul << 39); if (vmspace == NULL) return (ENOMEM); vm = malloc(sizeof(struct vm), M_VMM, M_WAITOK | M_ZERO); strcpy(vm->name, name); vm->vmspace = vmspace; sx_init(&vm->mem_segs_lock, "vm mem_segs"); sx_init(&vm->vcpus_init_lock, "vm vcpus"); vm->sockets = 1; vm->cores = 1; /* XXX backwards compatibility */ vm->threads = 1; /* XXX backwards compatibility */ vm->maxcpus = vm_maxcpu; vm->vcpu = malloc(sizeof(*vm->vcpu) * vm->maxcpus, M_VMM, M_WAITOK | M_ZERO); vm_init(vm, true); *retvm = vm; return (0); } void vm_get_topology(struct vm *vm, uint16_t *sockets, uint16_t *cores, uint16_t *threads, uint16_t *maxcpus) { *sockets = vm->sockets; *cores = vm->cores; *threads = vm->threads; *maxcpus = vm->maxcpus; } uint16_t vm_get_maxcpus(struct vm *vm) { return (vm->maxcpus); } int vm_set_topology(struct vm *vm, uint16_t sockets, uint16_t cores, uint16_t threads, uint16_t maxcpus) { /* Ignore maxcpus. */ if ((sockets * cores * threads) > vm->maxcpus) return (EINVAL); vm->sockets = sockets; vm->cores = cores; vm->threads = threads; return(0); } static void vm_cleanup(struct vm *vm, bool destroy) { struct mem_map *mm; pmap_t pmap __diagused; int i; if (destroy) { pmap = vmspace_pmap(vm->vmspace); sched_pin(); PCPU_SET(curvmpmap, NULL); sched_unpin(); CPU_FOREACH(i) { MPASS(cpuid_to_pcpu[i]->pc_curvmpmap != pmap); } } vgic_detach_from_vm(vm->cookie); for (i = 0; i < vm->maxcpus; i++) { if (vm->vcpu[i] != NULL) vcpu_cleanup(vm->vcpu[i], destroy); } vmmops_cleanup(vm->cookie); /* * System memory is removed from the guest address space only when * the VM is destroyed. This is because the mapping remains the same * across VM reset. * * Device memory can be relocated by the guest (e.g. using PCI BARs) * so those mappings are removed on a VM reset. */ if (!destroy) { for (i = 0; i < VM_MAX_MEMMAPS; i++) { mm = &vm->mem_maps[i]; if (destroy || !sysmem_mapping(vm, mm)) vm_free_memmap(vm, i); } } if (destroy) { for (i = 0; i < VM_MAX_MEMSEGS; i++) vm_free_memseg(vm, i); vmmops_vmspace_free(vm->vmspace); vm->vmspace = NULL; for (i = 0; i < vm->maxcpus; i++) free(vm->vcpu[i], M_VMM); free(vm->vcpu, M_VMM); sx_destroy(&vm->vcpus_init_lock); sx_destroy(&vm->mem_segs_lock); } } void vm_destroy(struct vm *vm) { vm_cleanup(vm, true); free(vm, M_VMM); } int vm_reinit(struct vm *vm) { int error; /* * A virtual machine can be reset only if all vcpus are suspended. */ if (CPU_CMP(&vm->suspended_cpus, &vm->active_cpus) == 0) { vm_cleanup(vm, false); vm_init(vm, false); error = 0; } else { error = EBUSY; } return (error); } const char * vm_name(struct vm *vm) { return (vm->name); } void vm_slock_memsegs(struct vm *vm) { sx_slock(&vm->mem_segs_lock); } void vm_xlock_memsegs(struct vm *vm) { sx_xlock(&vm->mem_segs_lock); } void vm_unlock_memsegs(struct vm *vm) { sx_unlock(&vm->mem_segs_lock); } /* * Return 'true' if 'gpa' is allocated in the guest address space. * * This function is called in the context of a running vcpu which acts as * an implicit lock on 'vm->mem_maps[]'. */ bool vm_mem_allocated(struct vcpu *vcpu, vm_paddr_t gpa) { struct vm *vm = vcpu->vm; struct mem_map *mm; int i; #ifdef INVARIANTS int hostcpu, state; state = vcpu_get_state(vcpu, &hostcpu); KASSERT(state == VCPU_RUNNING && hostcpu == curcpu, ("%s: invalid vcpu state %d/%d", __func__, state, hostcpu)); #endif for (i = 0; i < VM_MAX_MEMMAPS; i++) { mm = &vm->mem_maps[i]; if (mm->len != 0 && gpa >= mm->gpa && gpa < mm->gpa + mm->len) return (true); /* 'gpa' is sysmem or devmem */ } return (false); } int vm_alloc_memseg(struct vm *vm, int ident, size_t len, bool sysmem) { struct mem_seg *seg; vm_object_t obj; sx_assert(&vm->mem_segs_lock, SX_XLOCKED); if (ident < 0 || ident >= VM_MAX_MEMSEGS) return (EINVAL); if (len == 0 || (len & PAGE_MASK)) return (EINVAL); seg = &vm->mem_segs[ident]; if (seg->object != NULL) { if (seg->len == len && seg->sysmem == sysmem) return (EEXIST); else return (EINVAL); } obj = vm_object_allocate(OBJT_DEFAULT, len >> PAGE_SHIFT); if (obj == NULL) return (ENOMEM); seg->len = len; seg->object = obj; seg->sysmem = sysmem; return (0); } int vm_get_memseg(struct vm *vm, int ident, size_t *len, bool *sysmem, vm_object_t *objptr) { struct mem_seg *seg; sx_assert(&vm->mem_segs_lock, SX_LOCKED); if (ident < 0 || ident >= VM_MAX_MEMSEGS) return (EINVAL); seg = &vm->mem_segs[ident]; if (len) *len = seg->len; if (sysmem) *sysmem = seg->sysmem; if (objptr) *objptr = seg->object; return (0); } void vm_free_memseg(struct vm *vm, int ident) { struct mem_seg *seg; KASSERT(ident >= 0 && ident < VM_MAX_MEMSEGS, ("%s: invalid memseg ident %d", __func__, ident)); seg = &vm->mem_segs[ident]; if (seg->object != NULL) { vm_object_deallocate(seg->object); bzero(seg, sizeof(struct mem_seg)); } } int vm_mmap_memseg(struct vm *vm, vm_paddr_t gpa, int segid, vm_ooffset_t first, size_t len, int prot, int flags) { struct mem_seg *seg; struct mem_map *m, *map; vm_ooffset_t last; int i, error; if (prot == 0 || (prot & ~(VM_PROT_ALL)) != 0) return (EINVAL); if (flags & ~VM_MEMMAP_F_WIRED) return (EINVAL); if (segid < 0 || segid >= VM_MAX_MEMSEGS) return (EINVAL); seg = &vm->mem_segs[segid]; if (seg->object == NULL) return (EINVAL); last = first + len; if (first < 0 || first >= last || last > seg->len) return (EINVAL); if ((gpa | first | last) & PAGE_MASK) return (EINVAL); map = NULL; for (i = 0; i < VM_MAX_MEMMAPS; i++) { m = &vm->mem_maps[i]; if (m->len == 0) { map = m; break; } } if (map == NULL) return (ENOSPC); error = vm_map_find(&vm->vmspace->vm_map, seg->object, first, &gpa, len, 0, VMFS_NO_SPACE, prot, prot, 0); if (error != KERN_SUCCESS) return (EFAULT); vm_object_reference(seg->object); if (flags & VM_MEMMAP_F_WIRED) { error = vm_map_wire(&vm->vmspace->vm_map, gpa, gpa + len, VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES); if (error != KERN_SUCCESS) { vm_map_remove(&vm->vmspace->vm_map, gpa, gpa + len); return (error == KERN_RESOURCE_SHORTAGE ? ENOMEM : EFAULT); } } map->gpa = gpa; map->len = len; map->segoff = first; map->segid = segid; map->prot = prot; map->flags = flags; return (0); } int vm_munmap_memseg(struct vm *vm, vm_paddr_t gpa, size_t len) { struct mem_map *m; int i; for (i = 0; i < VM_MAX_MEMMAPS; i++) { m = &vm->mem_maps[i]; if (m->gpa == gpa && m->len == len) { vm_free_memmap(vm, i); return (0); } } return (EINVAL); } int vm_mmap_getnext(struct vm *vm, vm_paddr_t *gpa, int *segid, vm_ooffset_t *segoff, size_t *len, int *prot, int *flags) { struct mem_map *mm, *mmnext; int i; mmnext = NULL; for (i = 0; i < VM_MAX_MEMMAPS; i++) { mm = &vm->mem_maps[i]; if (mm->len == 0 || mm->gpa < *gpa) continue; if (mmnext == NULL || mm->gpa < mmnext->gpa) mmnext = mm; } if (mmnext != NULL) { *gpa = mmnext->gpa; if (segid) *segid = mmnext->segid; if (segoff) *segoff = mmnext->segoff; if (len) *len = mmnext->len; if (prot) *prot = mmnext->prot; if (flags) *flags = mmnext->flags; return (0); } else { return (ENOENT); } } static void vm_free_memmap(struct vm *vm, int ident) { struct mem_map *mm; int error __diagused; mm = &vm->mem_maps[ident]; if (mm->len) { error = vm_map_remove(&vm->vmspace->vm_map, mm->gpa, mm->gpa + mm->len); KASSERT(error == KERN_SUCCESS, ("%s: vm_map_remove error %d", __func__, error)); bzero(mm, sizeof(struct mem_map)); } } static __inline bool sysmem_mapping(struct vm *vm, struct mem_map *mm) { if (mm->len != 0 && vm->mem_segs[mm->segid].sysmem) return (true); else return (false); } vm_paddr_t vmm_sysmem_maxaddr(struct vm *vm) { struct mem_map *mm; vm_paddr_t maxaddr; int i; maxaddr = 0; for (i = 0; i < VM_MAX_MEMMAPS; i++) { mm = &vm->mem_maps[i]; if (sysmem_mapping(vm, mm)) { if (maxaddr < mm->gpa + mm->len) maxaddr = mm->gpa + mm->len; } } return (maxaddr); } int vm_gla2gpa_nofault(struct vcpu *vcpu, struct vm_guest_paging *paging, uint64_t gla, int prot, uint64_t *gpa, int *is_fault) { vmmops_gla2gpa(vcpu->cookie, paging, gla, prot, gpa, is_fault); return (0); } static int vmm_reg_raz(struct vcpu *vcpu, uint64_t *rval, void *arg) { *rval = 0; return (0); } static int vmm_reg_read_arg(struct vcpu *vcpu, uint64_t *rval, void *arg) { *rval = *(uint64_t *)arg; return (0); } static int vmm_reg_wi(struct vcpu *vcpu, uint64_t wval, void *arg) { return (0); } static const struct vmm_special_reg vmm_special_regs[] = { #define SPECIAL_REG(_reg, _read, _write) \ { \ .esr_iss = ((_reg ## _op0) << ISS_MSR_OP0_SHIFT) | \ ((_reg ## _op1) << ISS_MSR_OP1_SHIFT) | \ ((_reg ## _CRn) << ISS_MSR_CRn_SHIFT) | \ ((_reg ## _CRm) << ISS_MSR_CRm_SHIFT) | \ ((_reg ## _op2) << ISS_MSR_OP2_SHIFT), \ .esr_mask = ISS_MSR_REG_MASK, \ .reg_read = (_read), \ .reg_write = (_write), \ .arg = NULL, \ } #define ID_SPECIAL_REG(_reg, _name) \ { \ .esr_iss = ((_reg ## _op0) << ISS_MSR_OP0_SHIFT) | \ ((_reg ## _op1) << ISS_MSR_OP1_SHIFT) | \ ((_reg ## _CRn) << ISS_MSR_CRn_SHIFT) | \ ((_reg ## _CRm) << ISS_MSR_CRm_SHIFT) | \ ((_reg ## _op2) << ISS_MSR_OP2_SHIFT), \ .esr_mask = ISS_MSR_REG_MASK, \ .reg_read = vmm_reg_read_arg, \ .reg_write = vmm_reg_wi, \ .arg = &(vmm_arch_regs._name), \ } /* ID registers */ ID_SPECIAL_REG(ID_AA64PFR0_EL1, id_aa64pfr0), ID_SPECIAL_REG(ID_AA64DFR0_EL1, id_aa64dfr0), ID_SPECIAL_REG(ID_AA64ISAR0_EL1, id_aa64isar0), ID_SPECIAL_REG(ID_AA64MMFR0_EL1, id_aa64mmfr0), ID_SPECIAL_REG(ID_AA64MMFR1_EL1, id_aa64mmfr1), /* * All other ID registers are read as zero. * They are all in the op0=3, op1=0, CRn=0, CRm={0..7} space. */ { .esr_iss = (3 << ISS_MSR_OP0_SHIFT) | (0 << ISS_MSR_OP1_SHIFT) | (0 << ISS_MSR_CRn_SHIFT) | (0 << ISS_MSR_CRm_SHIFT), .esr_mask = ISS_MSR_OP0_MASK | ISS_MSR_OP1_MASK | ISS_MSR_CRn_MASK | (0x8 << ISS_MSR_CRm_SHIFT), .reg_read = vmm_reg_raz, .reg_write = vmm_reg_wi, .arg = NULL, }, /* Counter physical registers */ SPECIAL_REG(CNTP_CTL_EL0, vtimer_phys_ctl_read, vtimer_phys_ctl_write), SPECIAL_REG(CNTP_CVAL_EL0, vtimer_phys_cval_read, vtimer_phys_cval_write), SPECIAL_REG(CNTP_TVAL_EL0, vtimer_phys_tval_read, vtimer_phys_tval_write), SPECIAL_REG(CNTPCT_EL0, vtimer_phys_cnt_read, vtimer_phys_cnt_write), #undef SPECIAL_REG }; void vm_register_reg_handler(struct vm *vm, uint64_t iss, uint64_t mask, reg_read_t reg_read, reg_write_t reg_write, void *arg) { int i; for (i = 0; i < nitems(vm->special_reg); i++) { if (vm->special_reg[i].esr_iss == 0 && vm->special_reg[i].esr_mask == 0) { vm->special_reg[i].esr_iss = iss; vm->special_reg[i].esr_mask = mask; vm->special_reg[i].reg_read = reg_read; vm->special_reg[i].reg_write = reg_write; vm->special_reg[i].arg = arg; return; } } panic("%s: No free special register slot", __func__); } void vm_deregister_reg_handler(struct vm *vm, uint64_t iss, uint64_t mask) { int i; for (i = 0; i < nitems(vm->special_reg); i++) { if (vm->special_reg[i].esr_iss == iss && vm->special_reg[i].esr_mask == mask) { memset(&vm->special_reg[i], 0, sizeof(vm->special_reg[i])); return; } } panic("%s: Invalid special register: iss %lx mask %lx", __func__, iss, mask); } static int vm_handle_reg_emul(struct vcpu *vcpu, bool *retu) { struct vm *vm; struct vm_exit *vme; struct vre *vre; int i, rv; vm = vcpu->vm; vme = &vcpu->exitinfo; vre = &vme->u.reg_emul.vre; for (i = 0; i < nitems(vm->special_reg); i++) { if (vm->special_reg[i].esr_iss == 0 && vm->special_reg[i].esr_mask == 0) continue; if ((vre->inst_syndrome & vm->special_reg[i].esr_mask) == vm->special_reg[i].esr_iss) { rv = vmm_emulate_register(vcpu, vre, vm->special_reg[i].reg_read, vm->special_reg[i].reg_write, vm->special_reg[i].arg); if (rv == 0) { *retu = false; } return (rv); } } for (i = 0; i < nitems(vmm_special_regs); i++) { if ((vre->inst_syndrome & vmm_special_regs[i].esr_mask) == vmm_special_regs[i].esr_iss) { rv = vmm_emulate_register(vcpu, vre, vmm_special_regs[i].reg_read, vmm_special_regs[i].reg_write, vmm_special_regs[i].arg); if (rv == 0) { *retu = false; } return (rv); } } *retu = true; return (0); } void vm_register_inst_handler(struct vm *vm, uint64_t start, uint64_t size, mem_region_read_t mmio_read, mem_region_write_t mmio_write) { int i; for (i = 0; i < nitems(vm->mmio_region); i++) { if (vm->mmio_region[i].start == 0 && vm->mmio_region[i].end == 0) { vm->mmio_region[i].start = start; vm->mmio_region[i].end = start + size; vm->mmio_region[i].read = mmio_read; vm->mmio_region[i].write = mmio_write; return; } } panic("%s: No free MMIO region", __func__); } void vm_deregister_inst_handler(struct vm *vm, uint64_t start, uint64_t size) { int i; for (i = 0; i < nitems(vm->mmio_region); i++) { if (vm->mmio_region[i].start == start && vm->mmio_region[i].end == start + size) { memset(&vm->mmio_region[i], 0, sizeof(vm->mmio_region[i])); return; } } panic("%s: Invalid MMIO region: %lx - %lx", __func__, start, start + size); } static int vm_handle_inst_emul(struct vcpu *vcpu, bool *retu) { struct vm *vm; struct vm_exit *vme; struct vie *vie; struct hyp *hyp; uint64_t fault_ipa; struct vm_guest_paging *paging; struct vmm_mmio_region *vmr; int error, i; vm = vcpu->vm; hyp = vm->cookie; if (!hyp->vgic_attached) goto out_user; vme = &vcpu->exitinfo; vie = &vme->u.inst_emul.vie; paging = &vme->u.inst_emul.paging; fault_ipa = vme->u.inst_emul.gpa; vmr = NULL; for (i = 0; i < nitems(vm->mmio_region); i++) { if (vm->mmio_region[i].start <= fault_ipa && vm->mmio_region[i].end > fault_ipa) { vmr = &vm->mmio_region[i]; break; } } if (vmr == NULL) goto out_user; error = vmm_emulate_instruction(vcpu, fault_ipa, vie, paging, vmr->read, vmr->write, retu); return (error); out_user: *retu = true; return (0); } int vm_suspend(struct vm *vm, enum vm_suspend_how how) { int i; if (how <= VM_SUSPEND_NONE || how >= VM_SUSPEND_LAST) return (EINVAL); if (atomic_cmpset_int(&vm->suspend, 0, how) == 0) { VM_CTR2(vm, "virtual machine already suspended %d/%d", vm->suspend, how); return (EALREADY); } VM_CTR1(vm, "virtual machine successfully suspended %d", how); /* * Notify all active vcpus that they are now suspended. */ for (i = 0; i < vm->maxcpus; i++) { if (CPU_ISSET(i, &vm->active_cpus)) vcpu_notify_event(vm_vcpu(vm, i)); } return (0); } void vm_exit_suspended(struct vcpu *vcpu, uint64_t pc) { struct vm *vm = vcpu->vm; struct vm_exit *vmexit; KASSERT(vm->suspend > VM_SUSPEND_NONE && vm->suspend < VM_SUSPEND_LAST, ("vm_exit_suspended: invalid suspend type %d", vm->suspend)); vmexit = vm_exitinfo(vcpu); vmexit->pc = pc; vmexit->inst_length = 4; vmexit->exitcode = VM_EXITCODE_SUSPENDED; vmexit->u.suspended.how = vm->suspend; } void vm_exit_debug(struct vcpu *vcpu, uint64_t pc) { struct vm_exit *vmexit; vmexit = vm_exitinfo(vcpu); vmexit->pc = pc; vmexit->inst_length = 4; vmexit->exitcode = VM_EXITCODE_DEBUG; } int vm_activate_cpu(struct vcpu *vcpu) { struct vm *vm = vcpu->vm; if (CPU_ISSET(vcpu->vcpuid, &vm->active_cpus)) return (EBUSY); CPU_SET_ATOMIC(vcpu->vcpuid, &vm->active_cpus); return (0); } int vm_suspend_cpu(struct vm *vm, struct vcpu *vcpu) { if (vcpu == NULL) { vm->debug_cpus = vm->active_cpus; for (int i = 0; i < vm->maxcpus; i++) { if (CPU_ISSET(i, &vm->active_cpus)) vcpu_notify_event(vm_vcpu(vm, i)); } } else { if (!CPU_ISSET(vcpu->vcpuid, &vm->active_cpus)) return (EINVAL); CPU_SET_ATOMIC(vcpu->vcpuid, &vm->debug_cpus); vcpu_notify_event(vcpu); } return (0); } int vm_resume_cpu(struct vm *vm, struct vcpu *vcpu) { if (vcpu == NULL) { CPU_ZERO(&vm->debug_cpus); } else { if (!CPU_ISSET(vcpu->vcpuid, &vm->debug_cpus)) return (EINVAL); CPU_CLR_ATOMIC(vcpu->vcpuid, &vm->debug_cpus); } return (0); } int vcpu_debugged(struct vcpu *vcpu) { return (CPU_ISSET(vcpu->vcpuid, &vcpu->vm->debug_cpus)); } cpuset_t vm_active_cpus(struct vm *vm) { return (vm->active_cpus); } cpuset_t vm_debug_cpus(struct vm *vm) { return (vm->debug_cpus); } cpuset_t vm_suspended_cpus(struct vm *vm) { return (vm->suspended_cpus); } void * vcpu_stats(struct vcpu *vcpu) { return (vcpu->stats); } /* * This function is called to ensure that a vcpu "sees" a pending event * as soon as possible: * - If the vcpu thread is sleeping then it is woken up. * - If the vcpu is running on a different host_cpu then an IPI will be directed * to the host_cpu to cause the vcpu to trap into the hypervisor. */ static void vcpu_notify_event_locked(struct vcpu *vcpu) { int hostcpu; hostcpu = vcpu->hostcpu; if (vcpu->state == VCPU_RUNNING) { KASSERT(hostcpu != NOCPU, ("vcpu running on invalid hostcpu")); if (hostcpu != curcpu) { ipi_cpu(hostcpu, vmm_ipinum); } else { /* * If the 'vcpu' is running on 'curcpu' then it must * be sending a notification to itself (e.g. SELF_IPI). * The pending event will be picked up when the vcpu * transitions back to guest context. */ } } else { KASSERT(hostcpu == NOCPU, ("vcpu state %d not consistent " "with hostcpu %d", vcpu->state, hostcpu)); if (vcpu->state == VCPU_SLEEPING) wakeup_one(vcpu); } } void vcpu_notify_event(struct vcpu *vcpu) { vcpu_lock(vcpu); vcpu_notify_event_locked(vcpu); vcpu_unlock(vcpu); } static void restore_guest_fpustate(struct vcpu *vcpu) { /* flush host state to the pcb */ vfp_save_state(curthread, curthread->td_pcb); /* Ensure the VFP state will be re-loaded when exiting the guest */ PCPU_SET(fpcurthread, NULL); /* restore guest FPU state */ vfp_enable(); vfp_restore(vcpu->guestfpu); /* * The FPU is now "dirty" with the guest's state so turn on emulation * to trap any access to the FPU by the host. */ vfp_disable(); } static void save_guest_fpustate(struct vcpu *vcpu) { if ((READ_SPECIALREG(cpacr_el1) & CPACR_FPEN_MASK) != CPACR_FPEN_TRAP_ALL1) panic("VFP not enabled in host!"); /* save guest FPU state */ vfp_enable(); vfp_store(vcpu->guestfpu); vfp_disable(); KASSERT(PCPU_GET(fpcurthread) == NULL, ("%s: fpcurthread set with guest registers", __func__)); } static int vcpu_set_state_locked(struct vcpu *vcpu, enum vcpu_state newstate, bool from_idle) { int error; vcpu_assert_locked(vcpu); /* * State transitions from the vmmdev_ioctl() must always begin from * the VCPU_IDLE state. This guarantees that there is only a single * ioctl() operating on a vcpu at any point. */ if (from_idle) { while (vcpu->state != VCPU_IDLE) { vcpu_notify_event_locked(vcpu); msleep_spin(&vcpu->state, &vcpu->mtx, "vmstat", hz); } } else { KASSERT(vcpu->state != VCPU_IDLE, ("invalid transition from " "vcpu idle state")); } if (vcpu->state == VCPU_RUNNING) { KASSERT(vcpu->hostcpu == curcpu, ("curcpu %d and hostcpu %d " "mismatch for running vcpu", curcpu, vcpu->hostcpu)); } else { KASSERT(vcpu->hostcpu == NOCPU, ("Invalid hostcpu %d for a " "vcpu that is not running", vcpu->hostcpu)); } /* * The following state transitions are allowed: * IDLE -> FROZEN -> IDLE * FROZEN -> RUNNING -> FROZEN * FROZEN -> SLEEPING -> FROZEN */ switch (vcpu->state) { case VCPU_IDLE: case VCPU_RUNNING: case VCPU_SLEEPING: error = (newstate != VCPU_FROZEN); break; case VCPU_FROZEN: error = (newstate == VCPU_FROZEN); break; default: error = 1; break; } if (error) return (EBUSY); vcpu->state = newstate; if (newstate == VCPU_RUNNING) vcpu->hostcpu = curcpu; else vcpu->hostcpu = NOCPU; if (newstate == VCPU_IDLE) wakeup(&vcpu->state); return (0); } static void vcpu_require_state(struct vcpu *vcpu, enum vcpu_state newstate) { int error; if ((error = vcpu_set_state(vcpu, newstate, false)) != 0) panic("Error %d setting state to %d\n", error, newstate); } static void vcpu_require_state_locked(struct vcpu *vcpu, enum vcpu_state newstate) { int error; if ((error = vcpu_set_state_locked(vcpu, newstate, false)) != 0) panic("Error %d setting state to %d", error, newstate); } int vm_get_capability(struct vcpu *vcpu, int type, int *retval) { if (type < 0 || type >= VM_CAP_MAX) return (EINVAL); return (vmmops_getcap(vcpu->cookie, type, retval)); } int vm_set_capability(struct vcpu *vcpu, int type, int val) { if (type < 0 || type >= VM_CAP_MAX) return (EINVAL); return (vmmops_setcap(vcpu->cookie, type, val)); } struct vm * vcpu_vm(struct vcpu *vcpu) { return (vcpu->vm); } int vcpu_vcpuid(struct vcpu *vcpu) { return (vcpu->vcpuid); } void * vcpu_get_cookie(struct vcpu *vcpu) { return (vcpu->cookie); } struct vcpu * vm_vcpu(struct vm *vm, int vcpuid) { return (vm->vcpu[vcpuid]); } int vcpu_set_state(struct vcpu *vcpu, enum vcpu_state newstate, bool from_idle) { int error; vcpu_lock(vcpu); error = vcpu_set_state_locked(vcpu, newstate, from_idle); vcpu_unlock(vcpu); return (error); } enum vcpu_state vcpu_get_state(struct vcpu *vcpu, int *hostcpu) { enum vcpu_state state; vcpu_lock(vcpu); state = vcpu->state; if (hostcpu != NULL) *hostcpu = vcpu->hostcpu; vcpu_unlock(vcpu); return (state); } static void * _vm_gpa_hold(struct vm *vm, vm_paddr_t gpa, size_t len, int reqprot, void **cookie) { int i, count, pageoff; struct mem_map *mm; vm_page_t m; pageoff = gpa & PAGE_MASK; if (len > PAGE_SIZE - pageoff) panic("vm_gpa_hold: invalid gpa/len: 0x%016lx/%lu", gpa, len); count = 0; for (i = 0; i < VM_MAX_MEMMAPS; i++) { mm = &vm->mem_maps[i]; if (sysmem_mapping(vm, mm) && gpa >= mm->gpa && gpa < mm->gpa + mm->len) { count = vm_fault_quick_hold_pages(&vm->vmspace->vm_map, trunc_page(gpa), PAGE_SIZE, reqprot, &m, 1); break; } } if (count == 1) { *cookie = m; return ((void *)(PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)) + pageoff)); } else { *cookie = NULL; return (NULL); } } void * vm_gpa_hold(struct vcpu *vcpu, vm_paddr_t gpa, size_t len, int reqprot, void **cookie) { #ifdef INVARIANTS /* * The current vcpu should be frozen to ensure 'vm_memmap[]' * stability. */ int state = vcpu_get_state(vcpu, NULL); KASSERT(state == VCPU_FROZEN, ("%s: invalid vcpu state %d", __func__, state)); #endif return (_vm_gpa_hold(vcpu->vm, gpa, len, reqprot, cookie)); } void * vm_gpa_hold_global(struct vm *vm, vm_paddr_t gpa, size_t len, int reqprot, void **cookie) { sx_assert(&vm->mem_segs_lock, SX_LOCKED); return (_vm_gpa_hold(vm, gpa, len, reqprot, cookie)); } void vm_gpa_release(void *cookie) { vm_page_t m = cookie; vm_page_unwire(m, PQ_ACTIVE); } int vm_get_register(struct vcpu *vcpu, int reg, uint64_t *retval) { if (reg >= VM_REG_LAST) return (EINVAL); return (vmmops_getreg(vcpu->cookie, reg, retval)); } int vm_set_register(struct vcpu *vcpu, int reg, uint64_t val) { int error; if (reg >= VM_REG_LAST) return (EINVAL); error = vmmops_setreg(vcpu->cookie, reg, val); if (error || reg != VM_REG_GUEST_PC) return (error); vcpu->nextpc = val; return (0); } void * vm_get_cookie(struct vm *vm) { return (vm->cookie); } int vm_inject_exception(struct vcpu *vcpu, uint64_t esr, uint64_t far) { return (vmmops_exception(vcpu->cookie, esr, far)); } int vm_attach_vgic(struct vm *vm, struct vm_vgic_descr *descr) { return (vgic_attach_to_vm(vm->cookie, descr)); } int vm_assert_irq(struct vm *vm, uint32_t irq) { return (vgic_inject_irq(vm->cookie, -1, irq, true)); } int vm_deassert_irq(struct vm *vm, uint32_t irq) { return (vgic_inject_irq(vm->cookie, -1, irq, false)); } int vm_raise_msi(struct vm *vm, uint64_t msg, uint64_t addr, int bus, int slot, int func) { /* TODO: Should we raise an SError? */ return (vgic_inject_msi(vm->cookie, msg, addr)); } static int vm_handle_smccc_call(struct vcpu *vcpu, struct vm_exit *vme, bool *retu) { struct hypctx *hypctx; int i; hypctx = vcpu_get_cookie(vcpu); if ((hypctx->tf.tf_esr & ESR_ELx_ISS_MASK) != 0) return (1); vme->exitcode = VM_EXITCODE_SMCCC; vme->u.smccc_call.func_id = hypctx->tf.tf_x[0]; for (i = 0; i < nitems(vme->u.smccc_call.args); i++) vme->u.smccc_call.args[i] = hypctx->tf.tf_x[i + 1]; *retu = true; return (0); } static int vm_handle_wfi(struct vcpu *vcpu, struct vm_exit *vme, bool *retu) { vcpu_lock(vcpu); while (1) { if (vgic_has_pending_irq(vcpu->cookie)) break; if (vcpu_should_yield(vcpu)) break; vcpu_require_state_locked(vcpu, VCPU_SLEEPING); /* * XXX msleep_spin() cannot be interrupted by signals so * wake up periodically to check pending signals. */ msleep_spin(vcpu, &vcpu->mtx, "vmidle", hz); vcpu_require_state_locked(vcpu, VCPU_FROZEN); } vcpu_unlock(vcpu); *retu = false; return (0); } static int vm_handle_paging(struct vcpu *vcpu, bool *retu) { struct vm *vm = vcpu->vm; struct vm_exit *vme; struct vm_map *map; uint64_t addr, esr; pmap_t pmap; int ftype, rv; vme = &vcpu->exitinfo; pmap = vmspace_pmap(vcpu->vm->vmspace); addr = vme->u.paging.gpa; esr = vme->u.paging.esr; /* The page exists, but the page table needs to be updated. */ if (pmap_fault(pmap, esr, addr) == KERN_SUCCESS) return (0); switch (ESR_ELx_EXCEPTION(esr)) { case EXCP_INSN_ABORT_L: case EXCP_DATA_ABORT_L: ftype = VM_PROT_EXECUTE | VM_PROT_READ | VM_PROT_WRITE; break; default: panic("%s: Invalid exception (esr = %lx)", __func__, esr); } map = &vm->vmspace->vm_map; rv = vm_fault(map, vme->u.paging.gpa, ftype, VM_FAULT_NORMAL, NULL); if (rv != KERN_SUCCESS) return (EFAULT); return (0); } static int vm_handle_suspend(struct vcpu *vcpu, bool *retu) { struct vm *vm = vcpu->vm; int error, i; struct thread *td; error = 0; td = curthread; CPU_SET_ATOMIC(vcpu->vcpuid, &vm->suspended_cpus); /* * Wait until all 'active_cpus' have suspended themselves. * * Since a VM may be suspended at any time including when one or * more vcpus are doing a rendezvous we need to call the rendezvous * handler while we are waiting to prevent a deadlock. */ vcpu_lock(vcpu); while (error == 0) { if (CPU_CMP(&vm->suspended_cpus, &vm->active_cpus) == 0) break; vcpu_require_state_locked(vcpu, VCPU_SLEEPING); msleep_spin(vcpu, &vcpu->mtx, "vmsusp", hz); vcpu_require_state_locked(vcpu, VCPU_FROZEN); if (td_ast_pending(td, TDA_SUSPEND)) { vcpu_unlock(vcpu); error = thread_check_susp(td, false); vcpu_lock(vcpu); } } vcpu_unlock(vcpu); /* * Wakeup the other sleeping vcpus and return to userspace. */ for (i = 0; i < vm->maxcpus; i++) { if (CPU_ISSET(i, &vm->suspended_cpus)) { vcpu_notify_event(vm_vcpu(vm, i)); } } *retu = true; return (error); } int vm_run(struct vcpu *vcpu) { struct vm *vm = vcpu->vm; struct vm_eventinfo evinfo; int error, vcpuid; struct vm_exit *vme; bool retu; pmap_t pmap; vcpuid = vcpu->vcpuid; if (!CPU_ISSET(vcpuid, &vm->active_cpus)) return (EINVAL); if (CPU_ISSET(vcpuid, &vm->suspended_cpus)) return (EINVAL); pmap = vmspace_pmap(vm->vmspace); vme = &vcpu->exitinfo; evinfo.rptr = NULL; evinfo.sptr = &vm->suspend; evinfo.iptr = NULL; restart: critical_enter(); restore_guest_fpustate(vcpu); vcpu_require_state(vcpu, VCPU_RUNNING); error = vmmops_run(vcpu->cookie, vcpu->nextpc, pmap, &evinfo); vcpu_require_state(vcpu, VCPU_FROZEN); save_guest_fpustate(vcpu); critical_exit(); if (error == 0) { retu = false; switch (vme->exitcode) { case VM_EXITCODE_INST_EMUL: vcpu->nextpc = vme->pc + vme->inst_length; error = vm_handle_inst_emul(vcpu, &retu); break; case VM_EXITCODE_REG_EMUL: vcpu->nextpc = vme->pc + vme->inst_length; error = vm_handle_reg_emul(vcpu, &retu); break; case VM_EXITCODE_HVC: /* * The HVC instruction saves the address for the * next instruction as the return address. */ vcpu->nextpc = vme->pc; /* * The PSCI call can change the exit information in the * case of suspend/reset/poweroff/cpu off/cpu on. */ error = vm_handle_smccc_call(vcpu, vme, &retu); break; case VM_EXITCODE_WFI: vcpu->nextpc = vme->pc + vme->inst_length; error = vm_handle_wfi(vcpu, vme, &retu); break; case VM_EXITCODE_PAGING: vcpu->nextpc = vme->pc; error = vm_handle_paging(vcpu, &retu); break; case VM_EXITCODE_SUSPENDED: vcpu->nextpc = vme->pc; error = vm_handle_suspend(vcpu, &retu); break; default: /* Handle in userland */ vcpu->nextpc = vme->pc; retu = true; break; } } if (error == 0 && retu == false) goto restart; return (error); } diff --git a/sys/arm64/vmm/vmm_dev_machdep.c b/sys/arm64/vmm/vmm_dev_machdep.c new file mode 100644 index 000000000000..a8e0ee50fd81 --- /dev/null +++ b/sys/arm64/vmm/vmm_dev_machdep.c @@ -0,0 +1,137 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2011 NetApp, Inc. + * Copyright (C) 2015 Mihai Carabas + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include +#include +#include + +#include + +#include "io/vgic.h" + +const struct vmmdev_ioctl vmmdev_machdep_ioctls[] = { + VMMDEV_IOCTL(VM_RUN, VMMDEV_IOCTL_LOCK_ONE_VCPU), + VMMDEV_IOCTL(VM_INJECT_EXCEPTION, VMMDEV_IOCTL_LOCK_ONE_VCPU), + VMMDEV_IOCTL(VM_GLA2GPA_NOFAULT, VMMDEV_IOCTL_LOCK_ONE_VCPU), + + VMMDEV_IOCTL(VM_ATTACH_VGIC, + VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS), + + VMMDEV_IOCTL(VM_GET_VGIC_VERSION, 0), + VMMDEV_IOCTL(VM_RAISE_MSI, 0), + VMMDEV_IOCTL(VM_ASSERT_IRQ, 0), + VMMDEV_IOCTL(VM_DEASSERT_IRQ, 0), +}; +const size_t vmmdev_machdep_ioctl_count = nitems(vmmdev_machdep_ioctls); + +int +vmmdev_machdep_ioctl(struct vm *vm, struct vcpu *vcpu, u_long cmd, caddr_t data, + int fflag, struct thread *td) +{ + struct vm_run *vmrun; + struct vm_vgic_version *vgv; + struct vm_vgic_descr *vgic; + struct vm_irq *vi; + struct vm_exception *vmexc; + struct vm_gla2gpa *gg; + struct vm_msi *vmsi; + int error; + + error = 0; + switch (cmd) { + case VM_RUN: { + struct vm_exit *vme; + + vmrun = (struct vm_run *)data; + vme = vm_exitinfo(vcpu); + + error = vm_run(vcpu); + if (error != 0) + break; + + error = copyout(vme, vmrun->vm_exit, sizeof(*vme)); + if (error != 0) + break; + break; + } + case VM_INJECT_EXCEPTION: + vmexc = (struct vm_exception *)data; + error = vm_inject_exception(vcpu, vmexc->esr, vmexc->far); + break; + case VM_GLA2GPA_NOFAULT: + gg = (struct vm_gla2gpa *)data; + error = vm_gla2gpa_nofault(vcpu, &gg->paging, gg->gla, + gg->prot, &gg->gpa, &gg->fault); + KASSERT(error == 0 || error == EFAULT, + ("%s: vm_gla2gpa unknown error %d", __func__, error)); + break; + case VM_GET_VGIC_VERSION: + vgv = (struct vm_vgic_version *)data; + /* TODO: Query the vgic driver for this */ + vgv->version = 3; + vgv->flags = 0; + error = 0; + break; + case VM_ATTACH_VGIC: + vgic = (struct vm_vgic_descr *)data; + error = vm_attach_vgic(vm, vgic); + break; + case VM_RAISE_MSI: + vmsi = (struct vm_msi *)data; + error = vm_raise_msi(vm, vmsi->msg, vmsi->addr, vmsi->bus, + vmsi->slot, vmsi->func); + break; + case VM_ASSERT_IRQ: + vi = (struct vm_irq *)data; + error = vm_assert_irq(vm, vi->irq); + break; + case VM_DEASSERT_IRQ: + vi = (struct vm_irq *)data; + error = vm_deassert_irq(vm, vi->irq); + break; + default: + error = ENOTTY; + break; + } + + return (error); +} diff --git a/sys/conf/files.arm64 b/sys/conf/files.arm64 index 3a8818340f75..4cfe87f5a508 100644 --- a/sys/conf/files.arm64 +++ b/sys/conf/files.arm64 @@ -1,758 +1,759 @@ ## ## Kernel ## kern/msi_if.m optional intrng kern/pic_if.m optional intrng kern/subr_devmap.c standard kern/subr_intr.c optional intrng kern/subr_physmem.c standard libkern/strlen.c standard libkern/arm64/crc32c_armv8.S standard arm/arm/generic_timer.c standard arm/arm/gic.c standard arm/arm/gic_acpi.c optional acpi arm/arm/gic_fdt.c optional fdt arm/arm/gic_if.m standard arm/arm/pmu.c standard arm/arm/pmu_acpi.c optional acpi arm/arm/pmu_fdt.c optional fdt arm64/acpica/acpi_iort.c optional acpi arm64/acpica/acpi_machdep.c optional acpi arm64/acpica/OsdEnvironment.c optional acpi arm64/acpica/acpi_wakeup.c optional acpi arm64/acpica/pci_cfgreg.c optional acpi pci arm64/arm64/autoconf.c standard arm64/arm64/bus_machdep.c standard arm64/arm64/bus_space_asm.S standard arm64/arm64/busdma_bounce.c standard arm64/arm64/busdma_machdep.c standard arm64/arm64/clock.c standard arm64/arm64/copyinout.S standard arm64/arm64/cpu_errata.c standard arm64/arm64/cpufunc_asm.S standard arm64/arm64/db_disasm.c optional ddb arm64/arm64/db_interface.c optional ddb arm64/arm64/db_trace.c optional ddb arm64/arm64/debug_monitor.c standard arm64/arm64/disassem.c optional ddb arm64/arm64/dump_machdep.c standard arm64/arm64/efirt_machdep.c optional efirt arm64/arm64/elf32_machdep.c optional compat_freebsd32 arm64/arm64/elf_machdep.c standard arm64/arm64/exception.S standard arm64/arm64/exec_machdep.c standard arm64/arm64/freebsd32_machdep.c optional compat_freebsd32 arm64/arm64/gdb_machdep.c optional gdb arm64/arm64/gicv3_its.c optional intrng fdt arm64/arm64/gic_v3.c standard arm64/arm64/gic_v3_acpi.c optional acpi arm64/arm64/gic_v3_fdt.c optional fdt arm64/arm64/hyp_stub.S standard arm64/arm64/identcpu.c standard arm64/arm64/locore.S standard no-obj arm64/arm64/machdep.c standard arm64/arm64/machdep_boot.c standard arm64/arm64/mem.c standard arm64/arm64/memcmp.S standard arm64/arm64/memcpy.S standard arm64/arm64/memset.S standard arm64/arm64/minidump_machdep.c standard arm64/arm64/mp_machdep.c optional smp arm64/arm64/nexus.c standard arm64/arm64/ofw_machdep.c optional fdt arm64/arm64/pl031_rtc.c optional fdt pl031 arm64/arm64/ptrauth.c standard \ compile-with "${NORMAL_C:N-mbranch-protection*} -mbranch-protection=bti" arm64/arm64/pmap.c standard arm64/arm64/ptrace_machdep.c standard arm64/arm64/sdt_machdep.c optional kdtrace_hooks arm64/arm64/sigtramp.S standard arm64/arm64/stack_machdep.c optional ddb | stack arm64/arm64/strcmp.S standard arm64/arm64/strncmp.S standard arm64/arm64/support_ifunc.c standard arm64/arm64/support.S standard arm64/arm64/swtch.S standard arm64/arm64/sys_machdep.c standard arm64/arm64/trap.c standard arm64/arm64/uio_machdep.c standard arm64/arm64/undefined.c standard arm64/arm64/unwind.c optional ddb | kdtrace_hooks | stack \ compile-with "${NOSAN_C}" arm64/arm64/vfp.c standard arm64/arm64/vm_machdep.c standard arm64/coresight/coresight.c standard arm64/coresight/coresight_acpi.c optional acpi arm64/coresight/coresight_fdt.c optional fdt arm64/coresight/coresight_if.m standard arm64/coresight/coresight_cmd.c standard arm64/coresight/coresight_cpu_debug.c optional fdt arm64/coresight/coresight_etm4x.c standard arm64/coresight/coresight_etm4x_acpi.c optional acpi arm64/coresight/coresight_etm4x_fdt.c optional fdt arm64/coresight/coresight_funnel.c standard arm64/coresight/coresight_funnel_acpi.c optional acpi arm64/coresight/coresight_funnel_fdt.c optional fdt arm64/coresight/coresight_replicator.c standard arm64/coresight/coresight_replicator_acpi.c optional acpi arm64/coresight/coresight_replicator_fdt.c optional fdt arm64/coresight/coresight_tmc.c standard arm64/coresight/coresight_tmc_acpi.c optional acpi arm64/coresight/coresight_tmc_fdt.c optional fdt dev/smbios/smbios_subr.c standard arm64/iommu/iommu.c optional iommu arm64/iommu/iommu_if.m optional iommu arm64/iommu/iommu_pmap.c optional iommu arm64/iommu/smmu.c optional iommu arm64/iommu/smmu_acpi.c optional iommu acpi arm64/iommu/smmu_fdt.c optional iommu fdt arm64/iommu/smmu_quirks.c optional iommu dev/iommu/busdma_iommu.c optional iommu dev/iommu/iommu_gas.c optional iommu arm64/vmm/vmm.c optional vmm arm64/vmm/vmm_dev.c optional vmm arm64/vmm/vmm_instruction_emul.c optional vmm arm64/vmm/vmm_arm64.c optional vmm arm64/vmm/vmm_reset.c optional vmm arm64/vmm/vmm_handlers.c optional vmm arm64/vmm/vmm_call.S optional vmm arm64/vmm/vmm_nvhe_exception.S optional vmm \ compile-with "${NOSAN_C} -fpie" \ no-obj arm64/vmm/vmm_nvhe.c optional vmm \ compile-with "${NOSAN_C} -fpie" \ no-obj vmm_hyp_blob.elf.full optional vmm \ dependency "vmm_nvhe.o vmm_nvhe_exception.o" \ compile-with "${SYSTEM_LD_BASECMD} -o ${.TARGET} ${.ALLSRC} --defsym=_start='0x0' --defsym=text_start='0x0'" \ no-obj no-implicit-rule vmm_hyp_blob.elf optional vmm \ dependency "vmm_hyp_blob.elf.full" \ compile-with "${OBJCOPY} --strip-debug ${.ALLSRC} ${.TARGET}" \ no-obj no-implicit-rule vmm_hyp_blob.bin optional vmm \ dependency vmm_hyp_blob.elf \ compile-with "${OBJCOPY} --output-target=binary ${.ALLSRC} ${.TARGET}" \ no-obj no-implicit-rule arm64/vmm/vmm_hyp_el2.S optional vmm \ dependency vmm_hyp_blob.bin arm64/vmm/vmm_mmu.c optional vmm arm64/vmm/vmm_vhe.c optional vmm arm64/vmm/vmm_vhe_exception.S optional vmm arm64/vmm/io/vgic.c optional vmm arm64/vmm/io/vgic_v3.c optional vmm arm64/vmm/io/vgic_if.m optional vmm arm64/vmm/io/vtimer.c optional vmm +dev/vmm/vmm_dev.c optional vmm dev/vmm/vmm_stat.c optional vmm crypto/armv8/armv8_crypto.c optional armv8crypto armv8_crypto_wrap.o optional armv8crypto \ dependency "$S/crypto/armv8/armv8_crypto_wrap.c" \ compile-with "${CC} -c ${CFLAGS:C/^-O2$/-O3/:N-nostdinc:N-mgeneral-regs-only} -I$S/crypto/armv8 ${WERROR} ${NO_WCAST_QUAL} ${CFLAGS:M-march=*:S/^$/-march=armv8-a/}+crypto ${.IMPSRC}" \ no-implicit-rule \ clean "armv8_crypto_wrap.o" aesv8-armx.o optional armv8crypto | ossl \ dependency "$S/crypto/openssl/aarch64/aesv8-armx.S" \ compile-with "${CC} -c ${CFLAGS:C/^-O2$/-O3/:N-nostdinc:N-mgeneral-regs-only} -I$S/crypto/armv8 -I$S/crypto/openssl ${WERROR} ${NO_WCAST_QUAL} ${CFLAGS:M-march=*:S/^$/-march=armv8-a/}+crypto ${.IMPSRC}" \ no-implicit-rule \ clean "aesv8-armx.o" ghashv8-armx.o optional armv8crypto \ dependency "$S/crypto/openssl/aarch64/ghashv8-armx.S" \ compile-with "${CC} -c ${CFLAGS:C/^-O2$/-O3/:N-nostdinc:N-mgeneral-regs-only} -I$S/crypto/armv8 -I$S/crypto/openssl ${WERROR} ${NO_WCAST_QUAL} ${CFLAGS:M-march=*:S/^$/-march=armv8-a/}+crypto ${.IMPSRC}" \ no-implicit-rule \ clean "ghashv8-armx.o" crypto/des/des_enc.c optional netsmb crypto/openssl/ossl_aarch64.c optional ossl crypto/openssl/aarch64/chacha-armv8.S optional ossl \ compile-with "${CC} -c ${CFLAGS:N-mgeneral-regs-only} -I$S/crypto/openssl ${WERROR} ${.IMPSRC}" crypto/openssl/aarch64/poly1305-armv8.S optional ossl \ compile-with "${CC} -c ${CFLAGS:N-mgeneral-regs-only} -I$S/crypto/openssl ${WERROR} ${.IMPSRC}" crypto/openssl/aarch64/sha1-armv8.S optional ossl \ compile-with "${CC} -c ${CFLAGS:N-mgeneral-regs-only} -I$S/crypto/openssl ${WERROR} ${.IMPSRC}" crypto/openssl/aarch64/sha256-armv8.S optional ossl \ compile-with "${CC} -c ${CFLAGS:N-mgeneral-regs-only} -I$S/crypto/openssl ${WERROR} ${.IMPSRC}" crypto/openssl/aarch64/sha512-armv8.S optional ossl \ compile-with "${CC} -c ${CFLAGS:N-mgeneral-regs-only} -I$S/crypto/openssl ${WERROR} ${.IMPSRC}" crypto/openssl/aarch64/vpaes-armv8.S optional ossl \ compile-with "${CC} -c ${CFLAGS:N-mgeneral-regs-only} -I$S/crypto/openssl ${WERROR} ${.IMPSRC}" dev/acpica/acpi_bus_if.m optional acpi dev/acpica/acpi_if.m optional acpi dev/acpica/acpi_pci_link.c optional acpi pci dev/acpica/acpi_pcib.c optional acpi pci dev/acpica/acpi_pxm.c optional acpi dev/ahci/ahci_generic.c optional ahci cddl/dev/dtrace/aarch64/dtrace_asm.S optional dtrace compile-with "${DTRACE_S}" cddl/dev/dtrace/aarch64/dtrace_subr.c optional dtrace compile-with "${DTRACE_C}" cddl/dev/fbt/aarch64/fbt_isa.c optional dtrace_fbt | dtraceall compile-with "${FBT_C}" # zfs blake3 hash support contrib/openzfs/module/icp/asm-aarch64/blake3/b3_aarch64_sse2.S optional zfs compile-with "${ZFS_S:N-mgeneral-regs-only}" contrib/openzfs/module/icp/asm-aarch64/blake3/b3_aarch64_sse41.S optional zfs compile-with "${ZFS_S:N-mgeneral-regs-only}" # zfs sha2 hash support zfs-sha256-armv8.o optional zfs \ dependency "$S/contrib/openzfs/module/icp/asm-aarch64/sha2/sha256-armv8.S" \ compile-with "${CC} -c ${ZFS_ASM_CFLAGS:N-mgeneral-regs-only} -o ${.TARGET} ${WERROR} $S/contrib/openzfs/module/icp/asm-aarch64/sha2/sha256-armv8.S" \ no-implicit-rule \ clean "zfs-sha256-armv8.o" zfs-sha512-armv8.o optional zfs \ dependency "$S/contrib/openzfs/module/icp/asm-aarch64/sha2/sha512-armv8.S" \ compile-with "${CC} -c ${ZFS_ASM_CFLAGS:N-mgeneral-regs-only} -o ${.TARGET} ${WERROR} $S/contrib/openzfs/module/icp/asm-aarch64/sha2/sha512-armv8.S" \ no-implicit-rule \ clean "zfs-sha512-armv8.o" ## ## ASoC support ## dev/sound/fdt/audio_dai_if.m optional sound fdt dev/sound/fdt/audio_soc.c optional sound fdt dev/sound/fdt/dummy_codec.c optional sound fdt dev/sound/fdt/simple_amplifier.c optional sound fdt ## ## Device drivers ## dev/axgbe/if_axgbe.c optional axa fdt dev/axgbe/xgbe-desc.c optional axa fdt dev/axgbe/xgbe-dev.c optional axa fdt dev/axgbe/xgbe-drv.c optional axa fdt dev/axgbe/xgbe-mdio.c optional axa fdt dev/axgbe/xgbe-sysctl.c optional axa fdt dev/axgbe/xgbe-txrx.c optional axa fdt dev/axgbe/xgbe_osdep.c optional axa fdt dev/axgbe/xgbe-phy-v1.c optional axa fdt dev/cpufreq/cpufreq_dt.c optional cpufreq fdt dev/dpaa2/dpaa2_bp.c optional soc_nxp_ls dpaa2 dev/dpaa2/dpaa2_buf.c optional soc_nxp_ls dpaa2 dev/dpaa2/dpaa2_channel.c optional soc_nxp_ls dpaa2 dev/dpaa2/dpaa2_cmd_if.m optional soc_nxp_ls dpaa2 dev/dpaa2/dpaa2_con.c optional soc_nxp_ls dpaa2 dev/dpaa2/dpaa2_console.c optional soc_nxp_ls dpaa2 fdt dev/dpaa2/dpaa2_io.c optional soc_nxp_ls dpaa2 dev/dpaa2/dpaa2_mac.c optional soc_nxp_ls dpaa2 dev/dpaa2/dpaa2_mc.c optional soc_nxp_ls dpaa2 dev/dpaa2/dpaa2_mc_acpi.c optional soc_nxp_ls dpaa2 acpi dev/dpaa2/dpaa2_mc_fdt.c optional soc_nxp_ls dpaa2 fdt dev/dpaa2/dpaa2_mc_if.m optional soc_nxp_ls dpaa2 dev/dpaa2/dpaa2_mcp.c optional soc_nxp_ls dpaa2 dev/dpaa2/dpaa2_ni.c optional soc_nxp_ls dpaa2 dev/dpaa2/dpaa2_rc.c optional soc_nxp_ls dpaa2 dev/dpaa2/dpaa2_swp.c optional soc_nxp_ls dpaa2 dev/dpaa2/dpaa2_swp_if.m optional soc_nxp_ls dpaa2 dev/dpaa2/dpaa2_types.c optional soc_nxp_ls dpaa2 dev/dpaa2/memac_mdio_acpi.c optional soc_nxp_ls dpaa2 acpi dev/dpaa2/memac_mdio_common.c optional soc_nxp_ls dpaa2 acpi | soc_nxp_ls dpaa2 fdt dev/dpaa2/memac_mdio_fdt.c optional soc_nxp_ls dpaa2 fdt dev/dpaa2/memac_mdio_if.m optional soc_nxp_ls dpaa2 acpi | soc_nxp_ls dpaa2 fdt # Synopsys DesignWare Ethernet Controller dev/dwc/if_dwc_rk.c optional fdt dwc_rk soc_rockchip_rk3328 | fdt dwc_rk soc_rockchip_rk3399 dev/dwc/if_dwc_socfpga.c optional fdt dwc_socfpga dev/enetc/enetc_mdio.c optional enetc soc_nxp_ls dev/enetc/if_enetc.c optional enetc iflib pci fdt soc_nxp_ls dev/eqos/if_eqos.c optional eqos dev/eqos/if_eqos_if.m optional eqos dev/eqos/if_eqos_fdt.c optional eqos fdt dev/etherswitch/felix/felix.c optional enetc etherswitch fdt felix pci soc_nxp_ls dev/firmware/arm/scmi.c optional fdt scmi dev/firmware/arm/scmi_clk.c optional fdt scmi dev/firmware/arm/scmi_if.m optional fdt scmi dev/firmware/arm/scmi_mailbox.c optional fdt scmi dev/firmware/arm/scmi_smc.c optional fdt scmi dev/firmware/arm/scmi_virtio.c optional fdt scmi virtio dev/firmware/arm/scmi_shmem.c optional fdt scmi dev/gpio/pl061.c optional pl061 gpio dev/gpio/pl061_acpi.c optional pl061 gpio acpi dev/gpio/pl061_fdt.c optional pl061 gpio fdt dev/gpio/qoriq_gpio.c optional soc_nxp_ls gpio fdt dev/hwpmc/hwpmc_arm64.c optional hwpmc dev/hwpmc/hwpmc_arm64_md.c optional hwpmc dev/hwpmc/hwpmc_cmn600.c optional hwpmc acpi arm64/arm64/cmn600.c optional hwpmc acpi dev/hwpmc/hwpmc_dmc620.c optional hwpmc acpi dev/hwpmc/pmu_dmc620.c optional hwpmc acpi # Microsoft Hyper-V dev/hyperv/vmbus/hyperv.c optional hyperv acpi dev/hyperv/vmbus/aarch64/hyperv_aarch64.c optional hyperv acpi dev/hyperv/vmbus/vmbus.c optional hyperv acpi pci dev/hyperv/vmbus/aarch64/vmbus_aarch64.c optional hyperv acpi dev/hyperv/vmbus/vmbus_if.m optional hyperv acpi dev/hyperv/vmbus/vmbus_res.c optional hyperv acpi dev/hyperv/vmbus/vmbus_xact.c optional hyperv acpi dev/hyperv/vmbus/aarch64/hyperv_machdep.c optional hyperv acpi dev/hyperv/vmbus/vmbus_chan.c optional hyperv acpi dev/hyperv/vmbus/hyperv_busdma.c optional hyperv acpi dev/hyperv/vmbus/vmbus_br.c optional hyperv acpi dev/hyperv/storvsc/hv_storvsc_drv_freebsd.c optional hyperv acpi dev/hyperv/utilities/vmbus_timesync.c optional hyperv acpi dev/hyperv/utilities/vmbus_heartbeat.c optional hyperv acpi dev/hyperv/utilities/vmbus_ic.c optional hyperv acpi dev/hyperv/utilities/vmbus_shutdown.c optional hyperv acpi dev/hyperv/utilities/hv_kvp.c optional hyperv acpi dev/hyperv/input/hv_kbd.c optional hyperv acpi dev/hyperv/input/hv_kbdc.c optional hyperv acpi dev/hyperv/netvsc/hn_nvs.c optional hyperv acpi dev/hyperv/netvsc/hn_rndis.c optional hyperv acpi dev/hyperv/netvsc/if_hn.c optional hyperv acpi dev/hyperv/pcib/vmbus_pcib.c optional hyperv pci acpi dev/ice/if_ice_iflib.c optional ice pci \ compile-with "${NORMAL_C} -I$S/dev/ice" dev/ice/ice_lib.c optional ice pci \ compile-with "${NORMAL_C} -I$S/dev/ice" dev/ice/ice_osdep.c optional ice pci \ compile-with "${NORMAL_C} -I$S/dev/ice" dev/ice/ice_resmgr.c optional ice pci \ compile-with "${NORMAL_C} -I$S/dev/ice" dev/ice/ice_strings.c optional ice pci \ compile-with "${NORMAL_C} -I$S/dev/ice" dev/ice/ice_iflib_recovery_txrx.c optional ice pci \ compile-with "${NORMAL_C} -I$S/dev/ice" dev/ice/ice_iflib_txrx.c optional ice pci \ compile-with "${NORMAL_C} -I$S/dev/ice" dev/ice/ice_common.c optional ice pci \ compile-with "${NORMAL_C} -I$S/dev/ice" dev/ice/ice_controlq.c optional ice pci \ compile-with "${NORMAL_C} -I$S/dev/ice" dev/ice/ice_dcb.c optional ice pci \ compile-with "${NORMAL_C} -I$S/dev/ice" dev/ice/ice_flex_pipe.c optional ice pci \ compile-with "${NORMAL_C} -I$S/dev/ice" dev/ice/ice_flow.c optional ice pci \ compile-with "${NORMAL_C} -I$S/dev/ice" dev/ice/ice_nvm.c optional ice pci \ compile-with "${NORMAL_C} -I$S/dev/ice" dev/ice/ice_sched.c optional ice pci \ compile-with "${NORMAL_C} -I$S/dev/ice" dev/ice/ice_switch.c optional ice pci \ compile-with "${NORMAL_C} -I$S/dev/ice" dev/ice/ice_vlan_mode.c optional ice pci \ compile-with "${NORMAL_C} -I$S/dev/ice" dev/ice/ice_fw_logging.c optional ice pci \ compile-with "${NORMAL_C} -I$S/dev/ice" dev/ice/ice_fwlog.c optional ice pci \ compile-with "${NORMAL_C} -I$S/dev/ice" dev/ice/ice_rdma.c optional ice pci \ compile-with "${NORMAL_C} -I$S/dev/ice" dev/ice/irdma_if.m optional ice pci \ compile-with "${NORMAL_M} -I$S/dev/ice" dev/ice/irdma_di_if.m optional ice pci \ compile-with "${NORMAL_M} -I$S/dev/ice" dev/ice/ice_ddp_common.c optional ice pci \ compile-with "${NORMAL_C} -I$S/dev/ice" ice_ddp.c optional ice_ddp \ compile-with "${AWK} -f $S/tools/fw_stub.awk ice_ddp.fw:ice_ddp:0x01032400 -mice_ddp -c${.TARGET}" \ no-ctfconvert no-implicit-rule before-depend local \ clean "ice_ddp.c" ice_ddp.fwo optional ice_ddp \ dependency "ice_ddp.fw" \ compile-with "${NORMAL_FWO}" \ no-implicit-rule \ clean "ice_ddp.fwo" ice_ddp.fw optional ice_ddp \ dependency "$S/contrib/dev/ice/ice-1.3.36.0.pkg" \ compile-with "${CP} $S/contrib/dev/ice/ice-1.3.36.0.pkg ice_ddp.fw" \ no-obj no-implicit-rule \ clean "ice_ddp.fw" dev/iicbus/controller/twsi/mv_twsi.c optional twsi fdt dev/iicbus/controller/twsi/a10_twsi.c optional twsi fdt dev/iicbus/controller/twsi/twsi.c optional twsi fdt dev/iicbus/controller/rockchip/rk_i2c.c optional rk_i2c fdt dev/ipmi/ipmi.c optional ipmi dev/ipmi/ipmi_acpi.c optional ipmi acpi dev/ipmi/ipmi_bt.c optional ipmi dev/ipmi/ipmi_kcs.c optional ipmi dev/ipmi/ipmi_pci.c optional ipmi pci dev/ipmi/ipmi_smbios.c optional ipmi dev/ipmi/ipmi_smbus.c optional ipmi smbus dev/ipmi/ipmi_smic.c optional ipmi dev/ipmi/ipmi_ssif.c optional ipmi smbus dev/mailbox/arm/arm_doorbell.c optional fdt arm_doorbell dev/mbox/mbox_if.m optional soc_brcm_bcm2837 dev/mmc/host/dwmmc_altera.c optional dwmmc dwmmc_altera fdt dev/mmc/host/dwmmc_hisi.c optional dwmmc dwmmc_hisi fdt dev/mmc/host/dwmmc_rockchip.c optional dwmmc rk_dwmmc fdt dev/neta/if_mvneta_fdt.c optional neta fdt dev/neta/if_mvneta.c optional neta mdio mii fdt dev/ofw/ofw_cpu.c optional fdt dev/ofw/ofw_pci.c optional fdt pci dev/ofw/ofw_pcib.c optional fdt pci dev/pci/controller/pci_n1sdp.c optional pci_n1sdp acpi dev/pci/pci_host_generic.c optional pci dev/pci/pci_host_generic_acpi.c optional pci acpi dev/pci/pci_host_generic_den0115.c optional pci acpi dev/pci/pci_host_generic_fdt.c optional pci fdt dev/pci/pci_dw_mv.c optional pci fdt dev/pci/pci_dw.c optional pci fdt dev/pci/pci_dw_if.m optional pci fdt dev/psci/psci.c standard dev/psci/smccc_arm64.S standard dev/psci/smccc.c standard dev/pwm/controller/allwinner/aw_pwm.c optional fdt aw_pwm dev/pwm/controller//rockchip/rk_pwm.c optional fdt rk_pwm dev/random/armv8rng.c optional armv8_rng !random_loadable dev/safexcel/safexcel.c optional safexcel fdt dev/sdhci/sdhci_xenon.c optional sdhci_xenon sdhci dev/sdhci/sdhci_xenon_acpi.c optional sdhci_xenon sdhci acpi dev/sdhci/sdhci_xenon_fdt.c optional sdhci_xenon sdhci fdt dev/sram/mmio_sram.c optional fdt mmio_sram dev/sram/mmio_sram_if.m optional fdt mmio_sram dev/spibus/controller/allwinner/aw_spi.c optional fdt aw_spi dev/spibus/controller/rockchip/rk_spi.c optional fdt rk_spi dev/uart/uart_cpu_arm64.c optional uart dev/uart/uart_dev_mu.c optional uart uart_mu fdt dev/uart/uart_dev_pl011.c optional uart pl011 dev/usb/controller/dwc_otg_hisi.c optional dwcotg fdt soc_hisi_hi6220 dev/usb/controller/ehci_mv.c optional ehci_mv fdt dev/usb/controller/generic_ehci.c optional ehci dev/usb/controller/generic_ehci_acpi.c optional ehci acpi dev/usb/controller/generic_ehci_fdt.c optional ehci fdt dev/usb/controller/generic_ohci.c optional ohci fdt dev/usb/controller/generic_usb_if.m optional ohci fdt dev/usb/controller/musb_otg_allwinner.c optional musb fdt soc_allwinner_a64 dev/usb/controller/usb_nop_xceiv.c optional fdt dev/usb/controller/generic_xhci.c optional xhci dev/usb/controller/generic_xhci_acpi.c optional xhci acpi dev/usb/controller/generic_xhci_fdt.c optional xhci fdt dev/usb/controller/dwc3/dwc3.c optional xhci acpi dwc3 | xhci fdt dwc3 dev/usb/controller/dwc3/aw_dwc3.c optional xhci fdt dwc3 aw_dwc3 dev/usb/controller/dwc3/rk_dwc3.c optional xhci fdt dwc3 rk_dwc3 dev/vnic/mrml_bridge.c optional vnic fdt dev/vnic/nic_main.c optional vnic pci dev/vnic/nicvf_main.c optional vnic pci pci_iov dev/vnic/nicvf_queues.c optional vnic pci pci_iov dev/vnic/thunder_bgx_fdt.c optional soc_cavm_thunderx pci vnic fdt dev/vnic/thunder_bgx.c optional soc_cavm_thunderx pci vnic pci dev/vnic/thunder_mdio_fdt.c optional soc_cavm_thunderx pci vnic fdt dev/vnic/thunder_mdio.c optional soc_cavm_thunderx pci vnic dev/vnic/lmac_if.m optional inet | inet6 | vnic ## ## SoC Support ## # Allwinner common files arm/allwinner/a10_timer.c optional a10_timer fdt arm/allwinner/a10_codec.c optional sound a10_codec fdt arm/allwinner/a31_dmac.c optional a31_dmac fdt arm/allwinner/a33_codec.c optional fdt sound a33_codec arm/allwinner/a64/sun50i_a64_acodec.c optional fdt sound a64_codec arm/allwinner/sunxi_dma_if.m optional a31_dmac arm/allwinner/aw_cir.c optional evdev aw_cir fdt arm/allwinner/aw_gpio.c optional gpio aw_gpio fdt arm/allwinner/aw_i2s.c optional fdt sound aw_i2s arm/allwinner/aw_mmc.c optional mmc aw_mmc fdt | mmccam aw_mmc fdt arm/allwinner/aw_nmi.c optional aw_nmi fdt \ compile-with "${NORMAL_C} -I$S/contrib/device-tree/include" arm/allwinner/aw_r_intc.c optional aw_r_intc fdt arm/allwinner/aw_rsb.c optional aw_rsb fdt arm/allwinner/aw_rtc.c optional aw_rtc fdt arm/allwinner/aw_sid.c optional aw_sid nvmem fdt arm/allwinner/aw_syscon.c optional aw_syscon syscon fdt arm/allwinner/aw_thermal.c optional aw_thermal nvmem fdt arm/allwinner/aw_usbphy.c optional ehci aw_usbphy fdt arm/allwinner/aw_usb3phy.c optional xhci aw_usbphy fdt arm/allwinner/aw_wdog.c optional aw_wdog fdt arm/allwinner/axp81x.c optional axp81x fdt arm/allwinner/if_awg.c optional awg syscon aw_sid nvmem fdt # Allwinner clock driver dev/clk/allwinner/aw_ccung.c optional aw_ccu fdt dev/clk/allwinner/aw_clk_frac.c optional aw_ccu fdt dev/clk/allwinner/aw_clk_m.c optional aw_ccu fdt dev/clk/allwinner/aw_clk_mipi.c optional aw_ccu fdt dev/clk/allwinner/aw_clk_nkmp.c optional aw_ccu fdt dev/clk/allwinner/aw_clk_nm.c optional aw_ccu fdt dev/clk/allwinner/aw_clk_nmm.c optional aw_ccu fdt dev/clk/allwinner/aw_clk_np.c optional aw_ccu fdt dev/clk/allwinner/aw_clk_prediv_mux.c optional aw_ccu fdt dev/clk/allwinner/ccu_a64.c optional soc_allwinner_a64 aw_ccu fdt dev/clk/allwinner/ccu_h3.c optional soc_allwinner_h5 aw_ccu fdt dev/clk/allwinner/ccu_h6.c optional soc_allwinner_h6 aw_ccu fdt dev/clk/allwinner/ccu_h6_r.c optional soc_allwinner_h6 aw_ccu fdt dev/clk/allwinner/ccu_sun8i_r.c optional aw_ccu fdt dev/clk/allwinner/ccu_de2.c optional aw_ccu fdt # Allwinner padconf files arm/allwinner/a64/a64_padconf.c optional soc_allwinner_a64 fdt arm/allwinner/a64/a64_r_padconf.c optional soc_allwinner_a64 fdt arm/allwinner/h3/h3_padconf.c optional soc_allwinner_h5 fdt arm/allwinner/h3/h3_r_padconf.c optional soc_allwinner_h5 fdt arm/allwinner/h6/h6_padconf.c optional soc_allwinner_h6 fdt arm/allwinner/h6/h6_r_padconf.c optional soc_allwinner_h6 fdt # Altera/Intel arm64/intel/stratix10-soc-fpga-mgr.c optional soc_intel_stratix10 fdt arm64/intel/stratix10-svc.c optional soc_intel_stratix10 fdt # Annapurna arm/annapurna/alpine/alpine_ccu.c optional al_ccu fdt arm/annapurna/alpine/alpine_nb_service.c optional al_nb_service fdt arm/annapurna/alpine/alpine_pci.c optional al_pci fdt arm/annapurna/alpine/alpine_pci_msix.c optional al_pci fdt arm/annapurna/alpine/alpine_serdes.c optional al_serdes fdt \ no-depend \ compile-with "${CC} -c -o ${.TARGET} ${CFLAGS} -I$S/contrib/alpine-hal -I$S/contrib/alpine-hal/eth ${.IMPSRC}" # Broadcom arm64/broadcom/brcmmdio/mdio_mux_iproc.c optional soc_brcm_ns2 fdt arm64/broadcom/brcmmdio/mdio_nexus_iproc.c optional soc_brcm_ns2 fdt arm64/broadcom/brcmmdio/mdio_ns2_pcie_phy.c optional soc_brcm_ns2 fdt pci arm64/broadcom/genet/if_genet.c optional soc_brcm_bcm2838 fdt genet arm/broadcom/bcm2835/bcm2835_audio.c optional sound vchiq fdt \ compile-with "${NORMAL_C} -DUSE_VCHIQ_ARM -D__VCCOREVER__=0x04000000 -I$S/contrib/vchiq" arm/broadcom/bcm2835/bcm2835_bsc.c optional bcm2835_bsc fdt arm/broadcom/bcm2835/bcm2835_clkman.c optional soc_brcm_bcm2837 fdt | soc_brcm_bcm2838 fdt arm/broadcom/bcm2835/bcm2835_cpufreq.c optional soc_brcm_bcm2837 fdt | soc_brcm_bcm2838 fdt arm/broadcom/bcm2835/bcm2835_dma.c optional soc_brcm_bcm2837 fdt | soc_brcm_bcm2838 fdt arm/broadcom/bcm2835/bcm2835_fbd.c optional vt soc_brcm_bcm2837 fdt | vt soc_brcm_bcm2838 fdt arm/broadcom/bcm2835/bcm2835_firmware.c optional soc_brcm_bcm2837 fdt | soc_brcm_bcm2838 fdt arm/broadcom/bcm2835/bcm2835_ft5406.c optional evdev bcm2835_ft5406 fdt arm/broadcom/bcm2835/bcm2835_gpio.c optional gpio soc_brcm_bcm2837 fdt | gpio soc_brcm_bcm2838 fdt arm/broadcom/bcm2835/bcm2835_intr.c optional soc_brcm_bcm2837 fdt | soc_brcm_bcm2838 fdt arm/broadcom/bcm2835/bcm2835_mbox.c optional soc_brcm_bcm2837 fdt | soc_brcm_bcm2838 fdt arm/broadcom/bcm2835/bcm2835_rng.c optional !random_loadable soc_brcm_bcm2837 fdt | !random_loadable soc_brcm_bcm2838 fdt arm/broadcom/bcm2835/bcm2835_sdhci.c optional sdhci soc_brcm_bcm2837 fdt | sdhci soc_brcm_bcm2838 fdt arm/broadcom/bcm2835/bcm2835_sdhost.c optional sdhci soc_brcm_bcm2837 fdt | sdhci soc_brcm_bcm2838 fdt arm/broadcom/bcm2835/bcm2835_spi.c optional bcm2835_spi fdt arm/broadcom/bcm2835/bcm2835_vcbus.c optional soc_brcm_bcm2837 fdt | soc_brcm_bcm2838 fdt arm/broadcom/bcm2835/bcm2835_vcio.c optional soc_brcm_bcm2837 fdt | soc_brcm_bcm2838 fdt arm/broadcom/bcm2835/bcm2835_wdog.c optional soc_brcm_bcm2837 fdt | soc_brcm_bcm2838 fdt arm/broadcom/bcm2835/bcm2836.c optional soc_brcm_bcm2837 fdt | soc_brcm_bcm2838 fdt arm/broadcom/bcm2835/bcm283x_dwc_fdt.c optional dwcotg fdt soc_brcm_bcm2837 | dwcotg fdt soc_brcm_bcm2838 arm/broadcom/bcm2835/bcm2838_pci.c optional soc_brcm_bcm2838 fdt pci arm/broadcom/bcm2835/bcm2838_xhci.c optional soc_brcm_bcm2838 fdt pci xhci arm/broadcom/bcm2835/raspberrypi_gpio.c optional soc_brcm_bcm2837 gpio fdt | soc_brcm_bcm2838 gpio fdt contrib/vchiq/interface/compat/vchi_bsd.c optional vchiq soc_brcm_bcm2837 \ compile-with "${NORMAL_C} -DUSE_VCHIQ_ARM -D__VCCOREVER__=0x04000000 -I$S/contrib/vchiq" contrib/vchiq/interface/vchiq_arm/vchiq_2835_arm.c optional vchiq soc_brcm_bcm2837 \ compile-with "${NORMAL_C} -Wno-unused -DUSE_VCHIQ_ARM -D__VCCOREVER__=0x04000000 -I$S/contrib/vchiq" contrib/vchiq/interface/vchiq_arm/vchiq_arm.c optional vchiq soc_brcm_bcm2837 \ compile-with "${NORMAL_C} -Wno-unused -DUSE_VCHIQ_ARM -D__VCCOREVER__=0x04000000 -I$S/contrib/vchiq" contrib/vchiq/interface/vchiq_arm/vchiq_connected.c optional vchiq soc_brcm_bcm2837 \ compile-with "${NORMAL_C} -DUSE_VCHIQ_ARM -D__VCCOREVER__=0x04000000 -I$S/contrib/vchiq" contrib/vchiq/interface/vchiq_arm/vchiq_core.c optional vchiq soc_brcm_bcm2837 \ compile-with "${NORMAL_C} -DUSE_VCHIQ_ARM -D__VCCOREVER__=0x04000000 -I$S/contrib/vchiq" contrib/vchiq/interface/vchiq_arm/vchiq_kern_lib.c optional vchiq soc_brcm_bcm2837 \ compile-with "${NORMAL_C} -DUSE_VCHIQ_ARM -D__VCCOREVER__=0x04000000 -I$S/contrib/vchiq" contrib/vchiq/interface/vchiq_arm/vchiq_kmod.c optional vchiq soc_brcm_bcm2837 \ compile-with "${NORMAL_C} -DUSE_VCHIQ_ARM -D__VCCOREVER__=0x04000000 -I$S/contrib/vchiq" contrib/vchiq/interface/vchiq_arm/vchiq_shim.c optional vchiq soc_brcm_bcm2837 \ compile-with "${NORMAL_C} -DUSE_VCHIQ_ARM -D__VCCOREVER__=0x04000000 -I$S/contrib/vchiq" contrib/vchiq/interface/vchiq_arm/vchiq_util.c optional vchiq soc_brcm_bcm2837 \ compile-with "${NORMAL_C} -DUSE_VCHIQ_ARM -D__VCCOREVER__=0x04000000 -I$S/contrib/vchiq" # Cavium arm64/cavium/thunder_pcie_fdt.c optional soc_cavm_thunderx pci fdt arm64/cavium/thunder_pcie_pem.c optional soc_cavm_thunderx pci arm64/cavium/thunder_pcie_pem_fdt.c optional soc_cavm_thunderx pci fdt arm64/cavium/thunder_pcie_common.c optional soc_cavm_thunderx pci # i.MX8 Clock support arm64/freescale/imx/imx8mq_ccm.c optional fdt soc_freescale_imx8 arm64/freescale/imx/clk/imx_clk_gate.c optional fdt soc_freescale_imx8 arm64/freescale/imx/clk/imx_clk_mux.c optional fdt soc_freescale_imx8 arm64/freescale/imx/clk/imx_clk_composite.c optional fdt soc_freescale_imx8 arm64/freescale/imx/clk/imx_clk_sscg_pll.c optional fdt soc_freescale_imx8 arm64/freescale/imx/clk/imx_clk_frac_pll.c optional fdt soc_freescale_imx8 # iMX drivers arm/freescale/imx/imx_gpio.c optional gpio soc_freescale_imx8 fdt arm/freescale/imx/imx_i2c.c optional fsliic arm/freescale/imx/imx_machdep.c optional fdt soc_freescale_imx8 arm64/freescale/imx/imx7gpc.c optional fdt soc_freescale_imx8 dev/ffec/if_ffec.c optional ffec # Marvell arm/mv/a37x0_gpio.c optional a37x0_gpio gpio fdt arm/mv/a37x0_iic.c optional a37x0_iic iicbus fdt arm/mv/a37x0_spi.c optional a37x0_spi spibus fdt arm/mv/clk/a37x0_tbg.c optional a37x0_tbg clk fdt syscon arm/mv/clk/a37x0_xtal.c optional a37x0_xtal clk fdt syscon arm/mv/armada38x/armada38x_rtc.c optional mv_rtc fdt arm/mv/gpio.c optional mv_gpio fdt arm/mv/mvebu_gpio.c optional mv_gpio fdt arm/mv/mvebu_pinctrl.c optional mvebu_pinctrl fdt arm/mv/mv_ap806_clock.c optional soc_marvell_8k fdt arm/mv/mv_ap806_gicp.c optional mv_ap806_gicp fdt arm/mv/mv_ap806_sei.c optional mv_ap806_sei fdt arm/mv/mv_cp110_clock.c optional soc_marvell_8k fdt arm/mv/mv_cp110_icu.c optional mv_cp110_icu fdt arm/mv/mv_cp110_icu_bus.c optional mv_cp110_icu fdt arm/mv/mv_thermal.c optional soc_marvell_8k mv_thermal fdt arm/mv/clk/a37x0_tbg_pll.c optional a37x0_tbg clk fdt syscon arm/mv/clk/a37x0_periph_clk_driver.c optional a37x0_nb_periph a37x0_sb_periph clk fdt syscon arm/mv/clk/a37x0_nb_periph_clk_driver.c optional a37x0_nb_periph clk fdt syscon arm/mv/clk/a37x0_sb_periph_clk_driver.c optional a37x0_sb_periph clk fdt syscon arm/mv/clk/periph.c optional a37x0_nb_periph a37x0_sb_periph clk fdt syscon arm/mv/clk/periph_clk_d.c optional a37x0_nb_periph a37x0_sb_periph clk fdt syscon arm/mv/clk/periph_clk_fixed.c optional a37x0_nb_periph a37x0_sb_periph clk fdt syscon arm/mv/clk/periph_clk_gate.c optional a37x0_nb_periph a37x0_sb_periph clk fdt syscon arm/mv/clk/periph_clk_mux_gate.c optional a37x0_nb_periph a37x0_sb_periph clk fdt syscon # NVidia arm/nvidia/tegra_abpmisc.c optional fdt soc_nvidia_tegra210 arm/nvidia/tegra_ahci.c optional fdt soc_nvidia_tegra210 arm/nvidia/tegra_efuse.c optional fdt soc_nvidia_tegra210 arm/nvidia/tegra_ehci.c optional fdt soc_nvidia_tegra210 arm/nvidia/tegra_gpio.c optional fdt soc_nvidia_tegra210 arm/nvidia/tegra_i2c.c optional fdt soc_nvidia_tegra210 arm/nvidia/tegra_lic.c optional fdt soc_nvidia_tegra210 arm/nvidia/tegra_mc.c optional fdt soc_nvidia_tegra210 arm/nvidia/tegra_pcie.c optional fdt soc_nvidia_tegra210 arm/nvidia/tegra_sdhci.c optional fdt soc_nvidia_tegra210 arm/nvidia/tegra_soctherm_if.m optional fdt soc_nvidia_tegra210 arm/nvidia/tegra_soctherm.c optional fdt soc_nvidia_tegra210 arm/nvidia/tegra_uart.c optional fdt soc_nvidia_tegra210 arm/nvidia/tegra_usbphy.c optional fdt soc_nvidia_tegra210 arm/nvidia/tegra_xhci.c optional fdt soc_nvidia_tegra210 arm64/nvidia/tegra210/max77620.c optional fdt soc_nvidia_tegra210 arm64/nvidia/tegra210/max77620_gpio.c optional fdt soc_nvidia_tegra210 arm64/nvidia/tegra210/max77620_regulators.c optional fdt soc_nvidia_tegra210 arm64/nvidia/tegra210/max77620_rtc.c optional fdt soc_nvidia_tegra210 arm64/nvidia/tegra210/tegra210_car.c optional fdt soc_nvidia_tegra210 arm64/nvidia/tegra210/tegra210_clk_per.c optional fdt soc_nvidia_tegra210 arm64/nvidia/tegra210/tegra210_clk_pll.c optional fdt soc_nvidia_tegra210 arm64/nvidia/tegra210/tegra210_clk_super.c optional fdt soc_nvidia_tegra210 arm64/nvidia/tegra210/tegra210_coretemp.c optional fdt soc_nvidia_tegra210 arm64/nvidia/tegra210/tegra210_cpufreq.c optional fdt soc_nvidia_tegra210 arm64/nvidia/tegra210/tegra210_pinmux.c optional fdt soc_nvidia_tegra210 arm64/nvidia/tegra210/tegra210_pmc.c optional fdt soc_nvidia_tegra210 arm64/nvidia/tegra210/tegra210_xusbpadctl.c optional fdt soc_nvidia_tegra210 # Nvidia firmware for Tegra tegra210_xusb_fw.c optional tegra210_xusb_fw \ dependency "$S/conf/files.arm64" \ compile-with "${AWK} -f $S/tools/fw_stub.awk tegra210_xusb.fw:tegra210_xusb_fw -mtegra210_xusb_fw -c${.TARGET}" \ no-ctfconvert no-implicit-rule before-depend local \ clean "tegra210_xusb_fw.c" tegra210_xusb.fwo optional tegra210_xusb_fw \ dependency "tegra210_xusb.fw" \ compile-with "${NORMAL_FWO}" \ no-implicit-rule \ clean "tegra210_xusb.fwo" tegra210_xusb.fw optional tegra210_xusb_fw \ dependency "$S/contrib/dev/nvidia/tegra210_xusb.bin.uu" \ compile-with "${NORMAL_FW}" \ no-obj no-implicit-rule \ clean "tegra210_xusb.fw" # NXP dev/iicbus/controller/vybrid/vf_i2c.c optional vf_i2c iicbus soc_nxp_ls dev/iicbus/controller/vybrid/vf_i2c_acpi.c optional vf_i2c iicbus acpi soc_nxp_ls dev/iicbus/controller/vybrid/vf_i2c_fdt.c optional vf_i2c iicbus fdt soc_nxp_ls arm64/qoriq/qoriq_dw_pci.c optional pci fdt soc_nxp_ls arm64/qoriq/qoriq_gpio_pic.c optional gpio fdt soc_nxp_ls arm64/qoriq/qoriq_therm.c optional pci fdt soc_nxp_ls arm64/qoriq/qoriq_therm_if.m optional pci fdt soc_nxp_ls arm64/qoriq/clk/ls1028a_clkgen.c optional clk soc_nxp_ls fdt arm64/qoriq/clk/ls1028a_flexspi_clk.c optional clk soc_nxp_ls fdt arm64/qoriq/clk/ls1046a_clkgen.c optional clk soc_nxp_ls fdt arm64/qoriq/clk/ls1088a_clkgen.c optional clk soc_nxp_ls fdt arm64/qoriq/clk/lx2160a_clkgen.c optional clk soc_nxp_ls fdt arm64/qoriq/clk/qoriq_clk_pll.c optional clk soc_nxp_ls arm64/qoriq/clk/qoriq_clkgen.c optional clk soc_nxp_ls fdt dev/ahci/ahci_fsl_fdt.c optional soc_nxp_ls ahci fdt dev/flash/flexspi/flex_spi.c optional clk flex_spi soc_nxp_ls fdt # Qualcomm arm64/qualcomm/qcom_gcc.c optional qcom_gcc fdt dev/qcom_mdio/qcom_mdio_ipq4018.c optional qcom_mdio fdt mdio mii # RockChip Drivers arm64/rockchip/rk3328_codec.c optional fdt rk3328codec soc_rockchip_rk3328 arm64/rockchip/rk3399_emmcphy.c optional fdt rk_emmcphy soc_rockchip_rk3399 arm64/rockchip/rk3568_combphy.c optional fdt rk_combphy soc_rockchip_rk3568 arm64/rockchip/rk3568_pcie.c optional fdt pci soc_rockchip_rk3568 arm64/rockchip/rk3568_pciephy.c optional fdt pci soc_rockchip_rk3568 arm64/rockchip/rk_i2s.c optional fdt sound soc_rockchip_rk3328 | fdt sound soc_rockchip_rk3399 arm64/rockchip/rk_otp.c optional fdt soc_rockchip_rk3568 arm64/rockchip/rk_otp_if.m optional fdt soc_rockchip_rk3568 dev/iicbus/pmic/rockchip/rk8xx.c optional fdt rk805 soc_rockchip_rk3328 | fdt rk805 soc_rockchip_rk3399 | fdt soc_rockchip_rk3568 dev/iicbus/pmic/rockchip/rk8xx_clocks.c optional fdt rk805 soc_rockchip_rk3328 | fdt rk805 soc_rockchip_rk3399 | fdt soc_rockchip_rk3568 dev/iicbus/pmic/rockchip/rk8xx_regulators.c optional fdt rk805 soc_rockchip_rk3328 | fdt rk805 soc_rockchip_rk3399 | fdt soc_rockchip_rk3568 dev/iicbus/pmic/rockchip/rk8xx_rtc.c optional fdt rk805 soc_rockchip_rk3328 | fdt rk805 soc_rockchip_rk3399 | fdt soc_rockchip_rk3568 dev/iicbus/pmic/rockchip/rk805.c optional fdt rk805 soc_rockchip_rk3328 dev/iicbus/pmic/rockchip/rk808.c optional fdt rk805 soc_rockchip_rk3399 dev/iicbus/pmic/rockchip/rk817.c optional fdt rk817 soc_rockchip_rk3568 arm64/rockchip/rk_grf.c optional fdt soc_rockchip_rk3328 | fdt soc_rockchip_rk3399 | fdt soc_rockchip_rk3568 arm64/rockchip/rk_pinctrl.c optional fdt rk_pinctrl soc_rockchip_rk3328 | fdt rk_pinctrl soc_rockchip_rk3399 | fdt soc_rockchip_rk3568 arm64/rockchip/rk_gpio.c optional fdt rk_gpio soc_rockchip_rk3328 | fdt rk_gpio soc_rockchip_rk3399 | fdt soc_rockchip_rk3568 arm64/rockchip/rk_iodomain.c optional fdt rk_iodomain arm64/rockchip/rk_usb2phy.c optional fdt rk_usb2phy soc_rockchip_rk3328 | fdt rk_usb2phy soc_rockchip_rk3399 | fdt rk_usb2phy soc_rockchip_rk3568 arm64/rockchip/rk_typec_phy.c optional fdt rk_typec_phy soc_rockchip_rk3399 arm64/rockchip/rk_tsadc_if.m optional fdt soc_rockchip_rk3399 | fdt soc_rockchip_rk3568 arm64/rockchip/rk_tsadc.c optional fdt soc_rockchip_rk3399 | fdt soc_rockchip_rk3568 arm64/rockchip/rk_pcie.c optional fdt pci soc_rockchip_rk3399 arm64/rockchip/rk_pcie_phy.c optional fdt pci soc_rockchip_rk3399 # RockChip Clock support dev/clk/rockchip/rk_cru.c optional fdt soc_rockchip_rk3328 | fdt soc_rockchip_rk3399 | fdt soc_rockchip_rk3568 dev/clk/rockchip/rk_clk_armclk.c optional fdt soc_rockchip_rk3328 | fdt soc_rockchip_rk3399 | fdt soc_rockchip_rk3568 dev/clk/rockchip/rk_clk_composite.c optional fdt soc_rockchip_rk3328 | fdt soc_rockchip_rk3399 | fdt soc_rockchip_rk3568 dev/clk/rockchip/rk_clk_fract.c optional fdt soc_rockchip_rk3328 | fdt soc_rockchip_rk3399 | fdt soc_rockchip_rk3568 dev/clk/rockchip/rk_clk_gate.c optional fdt soc_rockchip_rk3328 | fdt soc_rockchip_rk3399 | fdt soc_rockchip_rk3568 dev/clk/rockchip/rk_clk_mux.c optional fdt soc_rockchip_rk3328 | fdt soc_rockchip_rk3399 | fdt soc_rockchip_rk3568 dev/clk/rockchip/rk_clk_pll.c optional fdt soc_rockchip_rk3328 | fdt soc_rockchip_rk3399 | fdt soc_rockchip_rk3568 dev/clk/rockchip/rk3328_cru.c optional fdt soc_rockchip_rk3328 dev/clk/rockchip/rk3399_cru.c optional fdt soc_rockchip_rk3399 dev/clk/rockchip/rk3399_pmucru.c optional fdt soc_rockchip_rk3399 dev/clk/rockchip/rk3568_cru.c optional fdt soc_rockchip_rk3568 dev/clk/rockchip/rk3568_pmucru.c optional fdt soc_rockchip_rk3568 # Xilinx arm/xilinx/uart_dev_cdnc.c optional uart soc_xilinx_zynq fdt arm/xilinx/zy7_gpio.c optional gpio soc_xilinx_zynq fdt dev/iicbus/controller/cadence/cdnc_i2c.c optional cdnc_i2c iicbus soc_xilinx_zynq fdt dev/usb/controller/xlnx_dwc3.c optional xhci soc_xilinx_zynq fdt dev/firmware/xilinx/zynqmp_firmware.c optional fdt soc_xilinx_zynq dev/firmware/xilinx/zynqmp_firmware_if.m optional fdt soc_xilinx_zynq dev/clk/xilinx/zynqmp_clock.c optional fdt soc_xilinx_zynq dev/clk/xilinx/zynqmp_clk_div.c optional fdt soc_xilinx_zynq dev/clk/xilinx/zynqmp_clk_fixed.c optional fdt soc_xilinx_zynq dev/clk/xilinx/zynqmp_clk_gate.c optional fdt soc_xilinx_zynq dev/clk/xilinx/zynqmp_clk_mux.c optional fdt soc_xilinx_zynq dev/clk/xilinx/zynqmp_clk_pll.c optional fdt soc_xilinx_zynq dev/clk/xilinx/zynqmp_reset.c optional fdt soc_xilinx_zynq diff --git a/sys/arm64/vmm/vmm_dev.c b/sys/dev/vmm/vmm_dev.c similarity index 74% rename from sys/arm64/vmm/vmm_dev.c rename to sys/dev/vmm/vmm_dev.c index 28877739e2ce..554583358168 100644 --- a/sys/arm64/vmm/vmm_dev.c +++ b/sys/dev/vmm/vmm_dev.c @@ -1,1054 +1,1011 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2011 NetApp, Inc. * Copyright (C) 2015 Mihai Carabas * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. */ #include -#include -#include -#include -#include -#include -#include #include -#include -#include #include +#include +#include +#include #include -#include +#include #include +#include +#include +#include +#include + +#include #include -#include -#include #include -#include -#include -#include -#include - -#include "vmm_stat.h" +#include +#include -#include "io/vgic.h" +static int devmem_create_cdev(const char *vmname, int id, char *devmem); struct devmem_softc { int segid; char *name; struct cdev *cdev; struct vmmdev_softc *sc; SLIST_ENTRY(devmem_softc) link; }; struct vmmdev_softc { struct vm *vm; /* vm instance cookie */ struct cdev *cdev; struct ucred *ucred; SLIST_ENTRY(vmmdev_softc) link; SLIST_HEAD(, devmem_softc) devmem; int flags; }; #define VSC_LINKED 0x01 static SLIST_HEAD(, vmmdev_softc) head; static unsigned pr_allow_flag; static struct mtx vmmdev_mtx; MTX_SYSINIT(vmmdev_mtx, &vmmdev_mtx, "vmm device mutex", MTX_DEF); static MALLOC_DEFINE(M_VMMDEV, "vmmdev", "vmmdev"); SYSCTL_DECL(_hw_vmm); -static int vmm_priv_check(struct ucred *ucred); -static int devmem_create_cdev(const char *vmname, int id, char *devmem); static void devmem_destroy(void *arg); static int vmm_priv_check(struct ucred *ucred) { - if (jailed(ucred) && !(ucred->cr_prison->pr_allow & pr_allow_flag)) return (EPERM); return (0); } static int vcpu_lock_one(struct vcpu *vcpu) { return (vcpu_set_state(vcpu, VCPU_FROZEN, true)); } static void vcpu_unlock_one(struct vcpu *vcpu) { enum vcpu_state state; state = vcpu_get_state(vcpu, NULL); if (state != VCPU_FROZEN) { panic("vcpu %s(%d) has invalid state %d", vm_name(vcpu_vm(vcpu)), vcpu_vcpuid(vcpu), state); } vcpu_set_state(vcpu, VCPU_IDLE, false); } static int vcpu_lock_all(struct vmmdev_softc *sc) { struct vcpu *vcpu; int error; uint16_t i, j, maxcpus; error = 0; vm_slock_vcpus(sc->vm); maxcpus = vm_get_maxcpus(sc->vm); for (i = 0; i < maxcpus; i++) { vcpu = vm_vcpu(sc->vm, i); if (vcpu == NULL) continue; error = vcpu_lock_one(vcpu); if (error) break; } if (error) { for (j = 0; j < i; j++) { vcpu = vm_vcpu(sc->vm, j); if (vcpu == NULL) continue; vcpu_unlock_one(vcpu); } vm_unlock_vcpus(sc->vm); } return (error); } static void vcpu_unlock_all(struct vmmdev_softc *sc) { struct vcpu *vcpu; uint16_t i, maxcpus; maxcpus = vm_get_maxcpus(sc->vm); for (i = 0; i < maxcpus; i++) { vcpu = vm_vcpu(sc->vm, i); if (vcpu == NULL) continue; vcpu_unlock_one(vcpu); } vm_unlock_vcpus(sc->vm); } static struct vmmdev_softc * vmmdev_lookup(const char *name) { struct vmmdev_softc *sc; mtx_assert(&vmmdev_mtx, MA_OWNED); SLIST_FOREACH(sc, &head, link) { if (strcmp(name, vm_name(sc->vm)) == 0) break; } if (sc == NULL) return (NULL); if (cr_cansee(curthread->td_ucred, sc->ucred)) return (NULL); return (sc); } static struct vmmdev_softc * vmmdev_lookup2(struct cdev *cdev) { - return (cdev->si_drv1); } static int vmmdev_rw(struct cdev *cdev, struct uio *uio, int flags) { int error, off, c, prot; vm_paddr_t gpa, maxaddr; void *hpa, *cookie; struct vmmdev_softc *sc; error = vmm_priv_check(curthread->td_ucred); if (error) return (error); sc = vmmdev_lookup2(cdev); if (sc == NULL) return (ENXIO); /* * Get a read lock on the guest memory map. */ vm_slock_memsegs(sc->vm); prot = (uio->uio_rw == UIO_WRITE ? VM_PROT_WRITE : VM_PROT_READ); maxaddr = vmm_sysmem_maxaddr(sc->vm); while (uio->uio_resid > 0 && error == 0) { gpa = uio->uio_offset; off = gpa & PAGE_MASK; c = min(uio->uio_resid, PAGE_SIZE - off); /* * The VM has a hole in its physical memory map. If we want to * use 'dd' to inspect memory beyond the hole we need to * provide bogus data for memory that lies in the hole. * * Since this device does not support lseek(2), dd(1) will * read(2) blocks of data to simulate the lseek(2). */ hpa = vm_gpa_hold_global(sc->vm, gpa, c, prot, &cookie); if (hpa == NULL) { if (uio->uio_rw == UIO_READ && gpa < maxaddr) error = uiomove(__DECONST(void *, zero_region), c, uio); else error = EFAULT; } else { error = uiomove(hpa, c, uio); vm_gpa_release(cookie); } } vm_unlock_memsegs(sc->vm); return (error); } CTASSERT(sizeof(((struct vm_memseg *)0)->name) >= VM_MAX_SUFFIXLEN + 1); static int get_memseg(struct vmmdev_softc *sc, struct vm_memseg *mseg, size_t len) { struct devmem_softc *dsc; int error; bool sysmem; error = vm_get_memseg(sc->vm, mseg->segid, &mseg->len, &sysmem, NULL); if (error || mseg->len == 0) return (error); if (!sysmem) { SLIST_FOREACH(dsc, &sc->devmem, link) { if (dsc->segid == mseg->segid) break; } KASSERT(dsc != NULL, ("%s: devmem segment %d not found", __func__, mseg->segid)); error = copystr(dsc->name, mseg->name, len, NULL); } else { bzero(mseg->name, len); } return (error); } static int alloc_memseg(struct vmmdev_softc *sc, struct vm_memseg *mseg, size_t len) { char *name; int error; bool sysmem; error = 0; name = NULL; sysmem = true; /* * The allocation is lengthened by 1 to hold a terminating NUL. It'll * by stripped off when devfs processes the full string. */ if (VM_MEMSEG_NAME(mseg)) { sysmem = false; name = malloc(len, M_VMMDEV, M_WAITOK); error = copystr(mseg->name, name, len, NULL); if (error) goto done; } error = vm_alloc_memseg(sc->vm, mseg->segid, mseg->len, sysmem); if (error) goto done; if (VM_MEMSEG_NAME(mseg)) { error = devmem_create_cdev(vm_name(sc->vm), mseg->segid, name); if (error) vm_free_memseg(sc->vm, mseg->segid); else name = NULL; /* freed when 'cdev' is destroyed */ } done: free(name, M_VMMDEV); return (error); } static int vm_get_register_set(struct vcpu *vcpu, unsigned int count, int *regnum, uint64_t *regval) { int error, i; error = 0; for (i = 0; i < count; i++) { error = vm_get_register(vcpu, regnum[i], ®val[i]); if (error) break; } return (error); } static int vm_set_register_set(struct vcpu *vcpu, unsigned int count, int *regnum, uint64_t *regval) { int error, i; error = 0; for (i = 0; i < count; i++) { error = vm_set_register(vcpu, regnum[i], regval[i]); if (error) break; } return (error); } +static const struct vmmdev_ioctl vmmdev_ioctls[] = { + VMMDEV_IOCTL(VM_GET_REGISTER, VMMDEV_IOCTL_LOCK_ONE_VCPU), + VMMDEV_IOCTL(VM_SET_REGISTER, VMMDEV_IOCTL_LOCK_ONE_VCPU), + VMMDEV_IOCTL(VM_GET_REGISTER_SET, VMMDEV_IOCTL_LOCK_ONE_VCPU), + VMMDEV_IOCTL(VM_SET_REGISTER_SET, VMMDEV_IOCTL_LOCK_ONE_VCPU), + VMMDEV_IOCTL(VM_GET_CAPABILITY, VMMDEV_IOCTL_LOCK_ONE_VCPU), + VMMDEV_IOCTL(VM_SET_CAPABILITY, VMMDEV_IOCTL_LOCK_ONE_VCPU), + VMMDEV_IOCTL(VM_ACTIVATE_CPU, VMMDEV_IOCTL_LOCK_ONE_VCPU), + VMMDEV_IOCTL(VM_INJECT_EXCEPTION, VMMDEV_IOCTL_LOCK_ONE_VCPU), + VMMDEV_IOCTL(VM_STATS, VMMDEV_IOCTL_LOCK_ONE_VCPU), + +#if defined(__amd64__) && defined(COMPAT_FREEBSD12) + VMMDEV_IOCTL(VM_ALLOC_MEMSEG_FBSD12, + VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS), +#endif + VMMDEV_IOCTL(VM_ALLOC_MEMSEG, + VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS), + VMMDEV_IOCTL(VM_MMAP_MEMSEG, + VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS), + VMMDEV_IOCTL(VM_MUNMAP_MEMSEG, + VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS), + VMMDEV_IOCTL(VM_REINIT, + VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS), + +#if defined(__amd64__) && defined(COMPAT_FREEBSD12) + VMMDEV_IOCTL(VM_GET_MEMSEG_FBSD12, VMMDEV_IOCTL_SLOCK_MEMSEGS), +#endif + VMMDEV_IOCTL(VM_GET_MEMSEG, VMMDEV_IOCTL_SLOCK_MEMSEGS), + VMMDEV_IOCTL(VM_MMAP_GETNEXT, VMMDEV_IOCTL_SLOCK_MEMSEGS), + + VMMDEV_IOCTL(VM_SUSPEND_CPU, VMMDEV_IOCTL_MAYBE_ALLOC_VCPU), + VMMDEV_IOCTL(VM_RESUME_CPU, VMMDEV_IOCTL_MAYBE_ALLOC_VCPU), + + VMMDEV_IOCTL(VM_SUSPEND, 0), + VMMDEV_IOCTL(VM_GET_CPUS, 0), + VMMDEV_IOCTL(VM_GET_TOPOLOGY, 0), + VMMDEV_IOCTL(VM_SET_TOPOLOGY, 0), +}; + static int vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag, - struct thread *td) + struct thread *td) { - int error, vcpuid, size; - cpuset_t *cpuset; struct vmmdev_softc *sc; struct vcpu *vcpu; - struct vm_register *vmreg; - struct vm_register_set *vmregset; - struct vm_run *vmrun; - struct vm_vgic_version *vgv; - struct vm_vgic_descr *vgic; - struct vm_cpuset *vm_cpuset; - struct vm_irq *vi; - struct vm_capability *vmcap; - struct vm_stats *vmstats; - struct vm_stat_desc *statdesc; - struct vm_suspend *vmsuspend; - struct vm_exception *vmexc; - struct vm_gla2gpa *gg; - struct vm_memmap *mm; - struct vm_munmap *mu; - struct vm_msi *vmsi; - struct vm_cpu_topology *topology; - uint64_t *regvals; - int *regnums; - enum { NONE, SINGLE, ALL } vcpus_locked; - bool memsegs_locked; + const struct vmmdev_ioctl *ioctl; + int error, vcpuid; - error = vmm_priv_check(curthread->td_ucred); + error = vmm_priv_check(td->td_ucred); if (error) return (error); sc = vmmdev_lookup2(cdev); if (sc == NULL) return (ENXIO); - error = 0; - vcpuid = -1; - vcpu = NULL; - vcpus_locked = NONE; - memsegs_locked = false; - - /* - * Some VMM ioctls can operate only on vcpus that are not running. - */ - switch (cmd) { - case VM_RUN: - case VM_GET_REGISTER: - case VM_SET_REGISTER: - case VM_GET_REGISTER_SET: - case VM_SET_REGISTER_SET: - case VM_INJECT_EXCEPTION: - case VM_GET_CAPABILITY: - case VM_SET_CAPABILITY: - case VM_GLA2GPA_NOFAULT: - case VM_ACTIVATE_CPU: - /* - * ioctls that can operate only on vcpus that are not running. - */ - vcpuid = *(int *)data; - vcpu = vm_alloc_vcpu(sc->vm, vcpuid); - if (vcpu == NULL) { - error = EINVAL; - goto done; + ioctl = NULL; + for (size_t i = 0; i < nitems(vmmdev_ioctls); i++) { + if (vmmdev_ioctls[i].cmd == cmd) { + ioctl = &vmmdev_ioctls[i]; + break; } - error = vcpu_lock_one(vcpu); - if (error) - goto done; - vcpus_locked = SINGLE; - break; + } + if (ioctl == NULL) { + for (size_t i = 0; i < vmmdev_machdep_ioctl_count; i++) { + if (vmmdev_machdep_ioctls[i].cmd == cmd) { + ioctl = &vmmdev_machdep_ioctls[i]; + break; + } + } + } + if (ioctl == NULL) + return (ENOTTY); - case VM_ALLOC_MEMSEG: - case VM_MMAP_MEMSEG: - case VM_MUNMAP_MEMSEG: - case VM_REINIT: - case VM_ATTACH_VGIC: - /* - * ioctls that modify the memory map must lock memory - * segments exclusively. - */ + if ((ioctl->flags & VMMDEV_IOCTL_XLOCK_MEMSEGS) != 0) vm_xlock_memsegs(sc->vm); - memsegs_locked = true; - - /* - * ioctls that operate on the entire virtual machine must - * prevent all vcpus from running. - */ - error = vcpu_lock_all(sc); - if (error) - goto done; - vcpus_locked = ALL; - break; - case VM_GET_MEMSEG: - case VM_MMAP_GETNEXT: - /* - * Lock the memory map while it is being inspected. - */ + else if ((ioctl->flags & VMMDEV_IOCTL_SLOCK_MEMSEGS) != 0) vm_slock_memsegs(sc->vm); - memsegs_locked = true; - break; - - case VM_STATS: - /* - * These do not need the vCPU locked but do operate on - * a specific vCPU. - */ - vcpuid = *(int *)data; - vcpu = vm_alloc_vcpu(sc->vm, vcpuid); - if (vcpu == NULL) { - error = EINVAL; - goto done; - } - break; - case VM_SUSPEND_CPU: - case VM_RESUME_CPU: - /* - * These can either operate on all CPUs via a vcpuid of - * -1 or on a specific vCPU. - */ + vcpu = NULL; + vcpuid = -1; + if ((ioctl->flags & (VMMDEV_IOCTL_LOCK_ONE_VCPU | + VMMDEV_IOCTL_ALLOC_VCPU | VMMDEV_IOCTL_MAYBE_ALLOC_VCPU)) != 0) { vcpuid = *(int *)data; - if (vcpuid == -1) - break; - vcpu = vm_alloc_vcpu(sc->vm, vcpuid); - if (vcpu == NULL) { - error = EINVAL; - goto done; + if (vcpuid == -1) { + if ((ioctl->flags & + VMMDEV_IOCTL_MAYBE_ALLOC_VCPU) == 0) { + error = EINVAL; + goto lockfail; + } + } else { + vcpu = vm_alloc_vcpu(sc->vm, vcpuid); + if (vcpu == NULL) { + error = EINVAL; + goto lockfail; + } + if ((ioctl->flags & VMMDEV_IOCTL_LOCK_ONE_VCPU) != 0) { + error = vcpu_lock_one(vcpu); + if (error) + goto lockfail; + } } - break; - - case VM_ASSERT_IRQ: - vi = (struct vm_irq *)data; - error = vm_assert_irq(sc->vm, vi->irq); - break; - case VM_DEASSERT_IRQ: - vi = (struct vm_irq *)data; - error = vm_deassert_irq(sc->vm, vi->irq); - break; - default: - break; + } + if ((ioctl->flags & VMMDEV_IOCTL_LOCK_ALL_VCPUS) != 0) { + error = vcpu_lock_all(sc); + if (error) + goto lockfail; } switch (cmd) { - case VM_RUN: { - struct vm_exit *vme; - - vmrun = (struct vm_run *)data; - vme = vm_exitinfo(vcpu); + case VM_SUSPEND: { + struct vm_suspend *vmsuspend; - error = vm_run(vcpu); - if (error != 0) - break; - - error = copyout(vme, vmrun->vm_exit, sizeof(*vme)); - if (error != 0) - break; - break; - } - case VM_SUSPEND: vmsuspend = (struct vm_suspend *)data; error = vm_suspend(sc->vm, vmsuspend->how); break; + } case VM_REINIT: error = vm_reinit(sc->vm); break; case VM_STAT_DESC: { + struct vm_stat_desc *statdesc; + statdesc = (struct vm_stat_desc *)data; - error = vmm_stat_desc_copy(statdesc->index, - statdesc->desc, sizeof(statdesc->desc)); + error = vmm_stat_desc_copy(statdesc->index, statdesc->desc, + sizeof(statdesc->desc)); break; } case VM_STATS: { - CTASSERT(MAX_VM_STATS >= MAX_VMM_STAT_ELEMS); + struct vm_stats *vmstats; + vmstats = (struct vm_stats *)data; getmicrotime(&vmstats->tv); error = vmm_stat_copy(vcpu, vmstats->index, - nitems(vmstats->statbuf), - &vmstats->num_entries, vmstats->statbuf); + nitems(vmstats->statbuf), &vmstats->num_entries, + vmstats->statbuf); break; } - case VM_MMAP_GETNEXT: + case VM_MMAP_GETNEXT: { + struct vm_memmap *mm; + mm = (struct vm_memmap *)data; error = vm_mmap_getnext(sc->vm, &mm->gpa, &mm->segid, &mm->segoff, &mm->len, &mm->prot, &mm->flags); break; - case VM_MMAP_MEMSEG: + } + case VM_MMAP_MEMSEG: { + struct vm_memmap *mm; + mm = (struct vm_memmap *)data; error = vm_mmap_memseg(sc->vm, mm->gpa, mm->segid, mm->segoff, mm->len, mm->prot, mm->flags); break; - case VM_MUNMAP_MEMSEG: + } + case VM_MUNMAP_MEMSEG: { + struct vm_munmap *mu; + mu = (struct vm_munmap *)data; error = vm_munmap_memseg(sc->vm, mu->gpa, mu->len); break; + } +#if defined(__amd64__) && defined(COMPAT_FREEBSD12) + case VM_ALLOC_MEMSEG_FBSD12: + error = alloc_memseg(sc, (struct vm_memseg *)data, + sizeof(((struct vm_memseg_fbsd12 *)0)->name)); + break; + case VM_GET_MEMSEG_FBSD12: + error = get_memseg(sc, (struct vm_memseg *)data, + sizeof(((struct vm_memseg_fbsd12 *)0)->name)); + break; +#endif case VM_ALLOC_MEMSEG: error = alloc_memseg(sc, (struct vm_memseg *)data, sizeof(((struct vm_memseg *)0)->name)); break; case VM_GET_MEMSEG: error = get_memseg(sc, (struct vm_memseg *)data, sizeof(((struct vm_memseg *)0)->name)); break; - case VM_GET_REGISTER: + case VM_GET_REGISTER: { + struct vm_register *vmreg; + vmreg = (struct vm_register *)data; error = vm_get_register(vcpu, vmreg->regnum, &vmreg->regval); break; - case VM_SET_REGISTER: + } + case VM_SET_REGISTER: { + struct vm_register *vmreg; + vmreg = (struct vm_register *)data; error = vm_set_register(vcpu, vmreg->regnum, vmreg->regval); break; - case VM_GET_REGISTER_SET: + } + case VM_GET_REGISTER_SET: { + struct vm_register_set *vmregset; + uint64_t *regvals; + int *regnums; + vmregset = (struct vm_register_set *)data; if (vmregset->count > VM_REG_LAST) { error = EINVAL; break; } regvals = malloc(sizeof(regvals[0]) * vmregset->count, M_VMMDEV, M_WAITOK); regnums = malloc(sizeof(regnums[0]) * vmregset->count, M_VMMDEV, M_WAITOK); error = copyin(vmregset->regnums, regnums, sizeof(regnums[0]) * vmregset->count); if (error == 0) - error = vm_get_register_set(vcpu, vmregset->count, - regnums, regvals); + error = vm_get_register_set(vcpu, + vmregset->count, regnums, regvals); if (error == 0) error = copyout(regvals, vmregset->regvals, sizeof(regvals[0]) * vmregset->count); free(regvals, M_VMMDEV); free(regnums, M_VMMDEV); break; - case VM_SET_REGISTER_SET: + } + case VM_SET_REGISTER_SET: { + struct vm_register_set *vmregset; + uint64_t *regvals; + int *regnums; + vmregset = (struct vm_register_set *)data; if (vmregset->count > VM_REG_LAST) { error = EINVAL; break; } regvals = malloc(sizeof(regvals[0]) * vmregset->count, M_VMMDEV, M_WAITOK); regnums = malloc(sizeof(regnums[0]) * vmregset->count, M_VMMDEV, M_WAITOK); error = copyin(vmregset->regnums, regnums, sizeof(regnums[0]) * vmregset->count); if (error == 0) error = copyin(vmregset->regvals, regvals, sizeof(regvals[0]) * vmregset->count); if (error == 0) - error = vm_set_register_set(vcpu, vmregset->count, - regnums, regvals); + error = vm_set_register_set(vcpu, + vmregset->count, regnums, regvals); free(regvals, M_VMMDEV); free(regnums, M_VMMDEV); break; - case VM_GET_CAPABILITY: + } + case VM_GET_CAPABILITY: { + struct vm_capability *vmcap; + vmcap = (struct vm_capability *)data; - error = vm_get_capability(vcpu, - vmcap->captype, - &vmcap->capval); + error = vm_get_capability(vcpu, vmcap->captype, &vmcap->capval); break; - case VM_SET_CAPABILITY: + } + case VM_SET_CAPABILITY: { + struct vm_capability *vmcap; + vmcap = (struct vm_capability *)data; - error = vm_set_capability(vcpu, - vmcap->captype, - vmcap->capval); - break; - case VM_INJECT_EXCEPTION: - vmexc = (struct vm_exception *)data; - error = vm_inject_exception(vcpu, vmexc->esr, vmexc->far); - break; - case VM_GLA2GPA_NOFAULT: - gg = (struct vm_gla2gpa *)data; - error = vm_gla2gpa_nofault(vcpu, &gg->paging, gg->gla, - gg->prot, &gg->gpa, &gg->fault); - KASSERT(error == 0 || error == EFAULT, - ("%s: vm_gla2gpa unknown error %d", __func__, error)); + error = vm_set_capability(vcpu, vmcap->captype, vmcap->capval); break; + } case VM_ACTIVATE_CPU: error = vm_activate_cpu(vcpu); break; - case VM_GET_CPUS: + case VM_GET_CPUS: { + struct vm_cpuset *vm_cpuset; + cpuset_t *cpuset; + int size; + error = 0; vm_cpuset = (struct vm_cpuset *)data; size = vm_cpuset->cpusetsize; - if (size < sizeof(cpuset_t) || size > CPU_MAXSIZE / NBBY) { + if (size < 1 || size > CPU_MAXSIZE / NBBY) { error = ERANGE; break; } - cpuset = malloc(size, M_TEMP, M_WAITOK | M_ZERO); + cpuset = malloc(max(size, sizeof(cpuset_t)), M_TEMP, + M_WAITOK | M_ZERO); if (vm_cpuset->which == VM_ACTIVE_CPUS) *cpuset = vm_active_cpus(sc->vm); else if (vm_cpuset->which == VM_SUSPENDED_CPUS) *cpuset = vm_suspended_cpus(sc->vm); else if (vm_cpuset->which == VM_DEBUG_CPUS) *cpuset = vm_debug_cpus(sc->vm); else error = EINVAL; + if (error == 0 && size < howmany(CPU_FLS(cpuset), NBBY)) + error = ERANGE; if (error == 0) error = copyout(cpuset, vm_cpuset->cpus, size); free(cpuset, M_TEMP); break; + } case VM_SUSPEND_CPU: error = vm_suspend_cpu(sc->vm, vcpu); break; case VM_RESUME_CPU: error = vm_resume_cpu(sc->vm, vcpu); break; - case VM_GET_VGIC_VERSION: - vgv = (struct vm_vgic_version *)data; - /* TODO: Query the vgic driver for this */ - vgv->version = 3; - vgv->flags = 0; - error = 0; - break; - case VM_ATTACH_VGIC: - vgic = (struct vm_vgic_descr *)data; - error = vm_attach_vgic(sc->vm, vgic); - break; - case VM_RAISE_MSI: - vmsi = (struct vm_msi *)data; - error = vm_raise_msi(sc->vm, vmsi->msg, vmsi->addr, vmsi->bus, - vmsi->slot, vmsi->func); - break; - case VM_SET_TOPOLOGY: + case VM_SET_TOPOLOGY: { + struct vm_cpu_topology *topology; + topology = (struct vm_cpu_topology *)data; error = vm_set_topology(sc->vm, topology->sockets, topology->cores, topology->threads, topology->maxcpus); break; - case VM_GET_TOPOLOGY: + } + case VM_GET_TOPOLOGY: { + struct vm_cpu_topology *topology; + topology = (struct vm_cpu_topology *)data; vm_get_topology(sc->vm, &topology->sockets, &topology->cores, &topology->threads, &topology->maxcpus); error = 0; break; + } default: - error = ENOTTY; + error = vmmdev_machdep_ioctl(sc->vm, vcpu, cmd, data, fflag, + td); break; } -done: - if (vcpus_locked == SINGLE) - vcpu_unlock_one(vcpu); - else if (vcpus_locked == ALL) - vcpu_unlock_all(sc); - if (memsegs_locked) + if ((ioctl->flags & + (VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_SLOCK_MEMSEGS)) != 0) vm_unlock_memsegs(sc->vm); + if ((ioctl->flags & VMMDEV_IOCTL_LOCK_ALL_VCPUS) != 0) + vcpu_unlock_all(sc); + else if ((ioctl->flags & VMMDEV_IOCTL_LOCK_ONE_VCPU) != 0) + vcpu_unlock_one(vcpu); /* * Make sure that no handler returns a kernel-internal * error value to userspace. */ KASSERT(error == ERESTART || error >= 0, ("vmmdev_ioctl: invalid error return %d", error)); return (error); + +lockfail: + if ((ioctl->flags & + (VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_SLOCK_MEMSEGS)) != 0) + vm_unlock_memsegs(sc->vm); + return (error); } static int vmmdev_mmap_single(struct cdev *cdev, vm_ooffset_t *offset, vm_size_t mapsize, struct vm_object **objp, int nprot) { struct vmmdev_softc *sc; vm_paddr_t gpa; size_t len; vm_ooffset_t segoff, first, last; int error, found, segid; bool sysmem; error = vmm_priv_check(curthread->td_ucred); if (error) return (error); first = *offset; last = first + mapsize; if ((nprot & PROT_EXEC) || first < 0 || first >= last) return (EINVAL); sc = vmmdev_lookup2(cdev); if (sc == NULL) { /* virtual machine is in the process of being created */ return (EINVAL); } /* * Get a read lock on the guest memory map. */ vm_slock_memsegs(sc->vm); gpa = 0; found = 0; while (!found) { error = vm_mmap_getnext(sc->vm, &gpa, &segid, &segoff, &len, NULL, NULL); if (error) break; if (first >= gpa && last <= gpa + len) found = 1; else gpa += len; } if (found) { error = vm_get_memseg(sc->vm, segid, &len, &sysmem, objp); KASSERT(error == 0 && *objp != NULL, ("%s: invalid memory segment %d", __func__, segid)); if (sysmem) { vm_object_reference(*objp); *offset = segoff + (first - gpa); } else { error = EINVAL; } } vm_unlock_memsegs(sc->vm); return (error); } static void vmmdev_destroy(void *arg) { struct vmmdev_softc *sc = arg; struct devmem_softc *dsc; int error __diagused; vm_disable_vcpu_creation(sc->vm); error = vcpu_lock_all(sc); KASSERT(error == 0, ("%s: error %d freezing vcpus", __func__, error)); vm_unlock_vcpus(sc->vm); while ((dsc = SLIST_FIRST(&sc->devmem)) != NULL) { KASSERT(dsc->cdev == NULL, ("%s: devmem not free", __func__)); SLIST_REMOVE_HEAD(&sc->devmem, link); free(dsc->name, M_VMMDEV); free(dsc, M_VMMDEV); } if (sc->cdev != NULL) destroy_dev(sc->cdev); if (sc->vm != NULL) vm_destroy(sc->vm); if (sc->ucred != NULL) crfree(sc->ucred); if ((sc->flags & VSC_LINKED) != 0) { mtx_lock(&vmmdev_mtx); SLIST_REMOVE(&head, sc, vmmdev_softc, link); mtx_unlock(&vmmdev_mtx); } free(sc, M_VMMDEV); } static int sysctl_vmm_destroy(SYSCTL_HANDLER_ARGS) { struct devmem_softc *dsc; struct vmmdev_softc *sc; struct cdev *cdev; char *buf; int error, buflen; error = vmm_priv_check(req->td->td_ucred); if (error) return (error); buflen = VM_MAX_NAMELEN + 1; buf = malloc(buflen, M_VMMDEV, M_WAITOK | M_ZERO); strlcpy(buf, "beavis", buflen); error = sysctl_handle_string(oidp, buf, buflen, req); if (error != 0 || req->newptr == NULL) goto out; mtx_lock(&vmmdev_mtx); sc = vmmdev_lookup(buf); if (sc == NULL || sc->cdev == NULL) { mtx_unlock(&vmmdev_mtx); error = EINVAL; goto out; } /* * Setting 'sc->cdev' to NULL is used to indicate that the VM * is scheduled for destruction. */ cdev = sc->cdev; sc->cdev = NULL; mtx_unlock(&vmmdev_mtx); /* * Destroy all cdevs: * * - any new operations on the 'cdev' will return an error (ENXIO). * * - the 'devmem' cdevs are destroyed before the virtual machine 'cdev' */ SLIST_FOREACH(dsc, &sc->devmem, link) { KASSERT(dsc->cdev != NULL, ("devmem cdev already destroyed")); destroy_dev(dsc->cdev); devmem_destroy(dsc); } destroy_dev(cdev); vmmdev_destroy(sc); error = 0; out: free(buf, M_VMMDEV); return (error); } SYSCTL_PROC(_hw_vmm, OID_AUTO, destroy, CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE, NULL, 0, sysctl_vmm_destroy, "A", NULL); static struct cdevsw vmmdevsw = { .d_name = "vmmdev", .d_version = D_VERSION, .d_ioctl = vmmdev_ioctl, .d_mmap_single = vmmdev_mmap_single, .d_read = vmmdev_rw, .d_write = vmmdev_rw, }; static int sysctl_vmm_create(SYSCTL_HANDLER_ARGS) { struct vm *vm; struct cdev *cdev; struct vmmdev_softc *sc, *sc2; char *buf; int error, buflen; error = vmm_priv_check(req->td->td_ucred); if (error) return (error); buflen = VM_MAX_NAMELEN + 1; buf = malloc(buflen, M_VMMDEV, M_WAITOK | M_ZERO); strlcpy(buf, "beavis", buflen); error = sysctl_handle_string(oidp, buf, buflen, req); if (error != 0 || req->newptr == NULL) goto out; mtx_lock(&vmmdev_mtx); sc = vmmdev_lookup(buf); mtx_unlock(&vmmdev_mtx); if (sc != NULL) { error = EEXIST; goto out; } error = vm_create(buf, &vm); if (error != 0) goto out; sc = malloc(sizeof(struct vmmdev_softc), M_VMMDEV, M_WAITOK | M_ZERO); sc->ucred = crhold(curthread->td_ucred); sc->vm = vm; SLIST_INIT(&sc->devmem); /* * Lookup the name again just in case somebody sneaked in when we * dropped the lock. */ mtx_lock(&vmmdev_mtx); sc2 = vmmdev_lookup(buf); if (sc2 == NULL) { SLIST_INSERT_HEAD(&head, sc, link); sc->flags |= VSC_LINKED; } mtx_unlock(&vmmdev_mtx); if (sc2 != NULL) { vmmdev_destroy(sc); error = EEXIST; goto out; } error = make_dev_p(MAKEDEV_CHECKNAME, &cdev, &vmmdevsw, sc->ucred, UID_ROOT, GID_WHEEL, 0600, "vmm/%s", buf); if (error != 0) { vmmdev_destroy(sc); goto out; } mtx_lock(&vmmdev_mtx); sc->cdev = cdev; sc->cdev->si_drv1 = sc; mtx_unlock(&vmmdev_mtx); out: free(buf, M_VMMDEV); return (error); } SYSCTL_PROC(_hw_vmm, OID_AUTO, create, CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE, NULL, 0, sysctl_vmm_create, "A", NULL); void vmmdev_init(void) { pr_allow_flag = prison_add_allow(NULL, "vmm", NULL, "Allow use of vmm in a jail."); } int vmmdev_cleanup(void) { int error; if (SLIST_EMPTY(&head)) error = 0; else error = EBUSY; return (error); } static int devmem_mmap_single(struct cdev *cdev, vm_ooffset_t *offset, vm_size_t len, struct vm_object **objp, int nprot) { struct devmem_softc *dsc; vm_ooffset_t first, last; size_t seglen; int error; bool sysmem; dsc = cdev->si_drv1; if (dsc == NULL) { /* 'cdev' has been created but is not ready for use */ return (ENXIO); } first = *offset; last = *offset + len; if ((nprot & PROT_EXEC) || first < 0 || first >= last) return (EINVAL); vm_slock_memsegs(dsc->sc->vm); error = vm_get_memseg(dsc->sc->vm, dsc->segid, &seglen, &sysmem, objp); KASSERT(error == 0 && !sysmem && *objp != NULL, ("%s: invalid devmem segment %d", __func__, dsc->segid)); if (seglen >= last) vm_object_reference(*objp); else error = EINVAL; vm_unlock_memsegs(dsc->sc->vm); return (error); } static struct cdevsw devmemsw = { .d_name = "devmem", .d_version = D_VERSION, .d_mmap_single = devmem_mmap_single, }; static int devmem_create_cdev(const char *vmname, int segid, char *devname) { struct devmem_softc *dsc; struct vmmdev_softc *sc; struct cdev *cdev; int error; error = make_dev_p(MAKEDEV_CHECKNAME, &cdev, &devmemsw, NULL, UID_ROOT, GID_WHEEL, 0600, "vmm.io/%s.%s", vmname, devname); if (error) return (error); dsc = malloc(sizeof(struct devmem_softc), M_VMMDEV, M_WAITOK | M_ZERO); mtx_lock(&vmmdev_mtx); sc = vmmdev_lookup(vmname); KASSERT(sc != NULL, ("%s: vm %s softc not found", __func__, vmname)); if (sc->cdev == NULL) { /* virtual machine is being created or destroyed */ mtx_unlock(&vmmdev_mtx); free(dsc, M_VMMDEV); destroy_dev_sched_cb(cdev, NULL, 0); return (ENODEV); } dsc->segid = segid; dsc->name = devname; dsc->cdev = cdev; dsc->sc = sc; SLIST_INSERT_HEAD(&sc->devmem, dsc, link); mtx_unlock(&vmmdev_mtx); /* The 'cdev' is ready for use after 'si_drv1' is initialized */ cdev->si_drv1 = dsc; return (0); } static void devmem_destroy(void *arg) { struct devmem_softc *dsc = arg; KASSERT(dsc->cdev, ("%s: devmem cdev already destroyed", __func__)); dsc->cdev = NULL; dsc->sc = NULL; } diff --git a/sys/dev/vmm/vmm_dev.h b/sys/dev/vmm/vmm_dev.h new file mode 100644 index 000000000000..a2dc4d11f359 --- /dev/null +++ b/sys/dev/vmm/vmm_dev.h @@ -0,0 +1,57 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2011 NetApp, Inc. + * Copyright (C) 2015 Mihai Carabas + * All rights reserved. + */ + +#ifndef _DEV_VMM_DEV_H_ +#define _DEV_VMM_DEV_H_ + +#include +#include +#include + +#ifdef _KERNEL +struct thread; +struct vm; +struct vcpu; + +void vmmdev_init(void); +int vmmdev_cleanup(void); +int vmmdev_machdep_ioctl(struct vm *vm, struct vcpu *vcpu, u_long cmd, + caddr_t data, int fflag, struct thread *td); + +/* + * Entry in an ioctl handler table. A number of generic ioctls are defined, + * plus a table of machine-dependent ioctls. The flags indicate the + * required preconditions for a given ioctl. + * + * Some ioctls encode a vcpuid as the first member of their ioctl structure. + * These ioctls must specify one of the following flags: + * - ALLOC_VCPU: create the vCPU if it does not already exist + * - LOCK_ONE_VCPU: create the vCPU if it does not already exist + * and lock the vCPU for the duration of the ioctl + * - MAYBE_ALLOC_VCPU: if the vcpuid is -1, do nothing, otherwise + * create the vCPU if it does not already exist + */ +struct vmmdev_ioctl { + unsigned long cmd; +#define VMMDEV_IOCTL_SLOCK_MEMSEGS 0x01 +#define VMMDEV_IOCTL_XLOCK_MEMSEGS 0x02 +#define VMMDEV_IOCTL_LOCK_ONE_VCPU 0x04 +#define VMMDEV_IOCTL_LOCK_ALL_VCPUS 0x08 +#define VMMDEV_IOCTL_ALLOC_VCPU 0x10 +#define VMMDEV_IOCTL_MAYBE_ALLOC_VCPU 0x20 + int flags; +}; + +#define VMMDEV_IOCTL(_cmd, _flags) { .cmd = (_cmd), .flags = (_flags) } + +extern const struct vmmdev_ioctl vmmdev_machdep_ioctls[]; +extern const size_t vmmdev_machdep_ioctl_count; + +#endif /* _KERNEL */ + +#endif /* _DEV_VMM_DEV_H_ */ diff --git a/sys/modules/vmm/Makefile b/sys/modules/vmm/Makefile index 92986a364883..47504a053c34 100644 --- a/sys/modules/vmm/Makefile +++ b/sys/modules/vmm/Makefile @@ -1,150 +1,151 @@ .include KMOD= vmm .if ${MACHINE_CPUARCH} == "amd64" SRCS+= opt_acpi.h \ opt_bhyve_snapshot.h \ opt_ddb.h .endif SRCS+= acpi_if.h bus_if.h device_if.h pci_if.h pcib_if.h vnode_if.h CFLAGS+= -DVMM_KEEP_STATS CFLAGS+= -I${SRCTOP}/sys/${MACHINE}/vmm CFLAGS+= -I${SRCTOP}/sys/${MACHINE}/vmm/io # generic vmm support .PATH: ${SRCTOP}/sys/dev/vmm ${SRCTOP}/sys/${MACHINE}/vmm SRCS+= vmm.c \ vmm_dev.c \ + vmm_dev_machdep.c \ vmm_instruction_emul.c \ vmm_stat.c .if ${MACHINE_CPUARCH} == "aarch64" DPSRCS+= assym.inc # TODO: Add the new EL2 code SRCS+= vmm_arm64.c \ vmm_reset.c \ vmm_call.S \ vmm_handlers.c \ vmm_mmu.c \ vmm_vhe_exception.S \ vmm_vhe.c \ vmm_hyp_el2.S .PATH: ${SRCTOP}/sys/${MACHINE}/vmm/io SRCS+= vgic.c \ vgic_if.h \ vgic_if.c \ vgic_v3.c \ vtimer.c CLEANFILES+= vmm_nvhe_exception.o vmm_nvhe.o CLEANFILES+= vmm_hyp_blob.elf.full CLEANFILES+= vmm_hyp_blob.elf vmm_hyp_blob.bin vmm_nvhe_exception.o: vmm_nvhe_exception.S ${CC} -c -x assembler-with-cpp -DLOCORE \ ${NOSAN_CFLAGS:N-mbranch-protection*} ${.IMPSRC} -o ${.TARGET} -fpie vmm_nvhe.o: vmm_nvhe.c ${CC} -c ${NOSAN_CFLAGS:N-mbranch-protection*} ${.IMPSRC} \ -o ${.TARGET} -fpie vmm_hyp_blob.elf.full: vmm_nvhe_exception.o vmm_nvhe.o ${LD} -m ${LD_EMULATION} -Bdynamic -L ${SYSDIR}/conf -T ${SYSDIR}/conf/ldscript.arm64 \ ${_LDFLAGS:N-zbti-report*} --no-warn-mismatch --warn-common --export-dynamic \ --dynamic-linker /red/herring -X -o ${.TARGET} ${.ALLSRC} \ --defsym=_start='0x0' --defsym=text_start='0x0' vmm_hyp_blob.elf: vmm_hyp_blob.elf.full ${OBJCOPY} --strip-debug ${.ALLSRC} ${.TARGET} vmm_hyp_blob.bin: vmm_hyp_blob.elf ${OBJCOPY} --output-target=binary ${.ALLSRC} ${.TARGET} vmm_hyp_el2.o: vmm_hyp_blob.bin .elif ${MACHINE_CPUARCH} == "amd64" DPSRCS+= vmx_assym.h svm_assym.h DPSRCS+= vmx_genassym.c svm_genassym.c offset.inc CFLAGS+= -I${SRCTOP}/sys/amd64/vmm/intel CFLAGS+= -I${SRCTOP}/sys/amd64/vmm/amd SRCS+= vmm_host.c \ vmm_ioport.c \ vmm_lapic.c \ vmm_mem.c \ vmm_util.c \ x86.c .PATH: ${SRCTOP}/sys/${MACHINE}/vmm/io SRCS+= iommu.c \ ppt.c \ vatpic.c \ vatpit.c \ vhpet.c \ vioapic.c \ vlapic.c \ vpmtmr.c \ vrtc.c # intel-specific files .PATH: ${SRCTOP}/sys/amd64/vmm/intel SRCS+= ept.c \ vmcs.c \ vmx_msr.c \ vmx_support.S \ vmx.c \ vtd.c # amd-specific files .PATH: ${SRCTOP}/sys/amd64/vmm/amd SRCS+= vmcb.c \ amdiommu.c \ ivhd_if.c \ ivhd_if.h \ svm.c \ svm_support.S \ npt.c \ ivrs_drv.c \ amdvi_hw.c \ svm_msr.c SRCS.BHYVE_SNAPSHOT= vmm_snapshot.c CLEANFILES+= vmx_assym.h vmx_genassym.o svm_assym.h svm_genassym.o OBJS_DEPEND_GUESS.vmx_support.o+= vmx_assym.h OBJS_DEPEND_GUESS.svm_support.o+= svm_assym.h .endif vmx_assym.h: vmx_genassym.o sh ${SYSDIR}/kern/genassym.sh vmx_genassym.o > ${.TARGET} svm_assym.h: svm_genassym.o sh ${SYSDIR}/kern/genassym.sh svm_genassym.o > ${.TARGET} vmx_support.o: ${CC} -c -x assembler-with-cpp -DLOCORE ${CFLAGS} \ ${.IMPSRC} -o ${.TARGET} svm_support.o: ${CC} -c -x assembler-with-cpp -DLOCORE ${CFLAGS} \ ${.IMPSRC} -o ${.TARGET} hyp_genassym.o: offset.inc ${CC} -c ${NOSAN_CFLAGS:N-flto:N-fno-common} -fcommon ${.IMPSRC} vmx_genassym.o: offset.inc ${CC} -c ${NOSAN_CFLAGS:N-flto*:N-fno-common} -fcommon ${.IMPSRC} svm_genassym.o: offset.inc ${CC} -c ${NOSAN_CFLAGS:N-flto*:N-fno-common} -fcommon ${.IMPSRC} .include