Index: head/sys/amd64/include/vmm.h =================================================================== --- head/sys/amd64/include/vmm.h (revision 349947) +++ head/sys/amd64/include/vmm.h (revision 349948) @@ -1,702 +1,723 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2011 NetApp, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _VMM_H_ #define _VMM_H_ #include #include #ifdef _KERNEL SDT_PROVIDER_DECLARE(vmm); #endif enum vm_suspend_how { VM_SUSPEND_NONE, VM_SUSPEND_RESET, VM_SUSPEND_POWEROFF, VM_SUSPEND_HALT, VM_SUSPEND_TRIPLEFAULT, VM_SUSPEND_LAST }; /* * Identifiers for architecturally defined registers. */ enum vm_reg_name { VM_REG_GUEST_RAX, VM_REG_GUEST_RBX, VM_REG_GUEST_RCX, VM_REG_GUEST_RDX, VM_REG_GUEST_RSI, VM_REG_GUEST_RDI, VM_REG_GUEST_RBP, VM_REG_GUEST_R8, VM_REG_GUEST_R9, VM_REG_GUEST_R10, VM_REG_GUEST_R11, VM_REG_GUEST_R12, VM_REG_GUEST_R13, VM_REG_GUEST_R14, VM_REG_GUEST_R15, VM_REG_GUEST_CR0, VM_REG_GUEST_CR3, VM_REG_GUEST_CR4, VM_REG_GUEST_DR7, VM_REG_GUEST_RSP, VM_REG_GUEST_RIP, VM_REG_GUEST_RFLAGS, VM_REG_GUEST_ES, VM_REG_GUEST_CS, VM_REG_GUEST_SS, VM_REG_GUEST_DS, VM_REG_GUEST_FS, VM_REG_GUEST_GS, VM_REG_GUEST_LDTR, VM_REG_GUEST_TR, VM_REG_GUEST_IDTR, VM_REG_GUEST_GDTR, VM_REG_GUEST_EFER, VM_REG_GUEST_CR2, VM_REG_GUEST_PDPTE0, VM_REG_GUEST_PDPTE1, VM_REG_GUEST_PDPTE2, VM_REG_GUEST_PDPTE3, VM_REG_GUEST_INTR_SHADOW, VM_REG_GUEST_DR0, VM_REG_GUEST_DR1, VM_REG_GUEST_DR2, VM_REG_GUEST_DR3, VM_REG_GUEST_DR6, VM_REG_LAST }; enum x2apic_state { X2APIC_DISABLED, X2APIC_ENABLED, X2APIC_STATE_LAST }; #define VM_INTINFO_VECTOR(info) ((info) & 0xff) #define VM_INTINFO_DEL_ERRCODE 0x800 #define VM_INTINFO_RSVD 0x7ffff000 #define VM_INTINFO_VALID 0x80000000 #define VM_INTINFO_TYPE 0x700 #define VM_INTINFO_HWINTR (0 << 8) #define VM_INTINFO_NMI (2 << 8) #define VM_INTINFO_HWEXCEPTION (3 << 8) #define VM_INTINFO_SWINTR (4 << 8) -#ifdef _KERNEL +/* + * The VM name has to fit into the pathname length constraints of devfs, + * governed primarily by SPECNAMELEN. The length is the total number of + * characters in the full path, relative to the mount point and not + * including any leading '/' characters. + * A prefix and a suffix are added to the name specified by the user. + * The prefix is usually "vmm/" or "vmm.io/", but can be a few characters + * longer for future use. + * The suffix is a string that identifies a bootrom image or some similar + * image that is attached to the VM. A separator character gets added to + * the suffix automatically when generating the full path, so it must be + * accounted for, reducing the effective length by 1. + * The effective length of a VM name is 229 bytes for FreeBSD 13 and 37 + * bytes for FreeBSD 12. A minimum length is set for safety and supports + * a SPECNAMELEN as small as 32 on old systems. + */ +#define VM_MAX_PREFIXLEN 10 +#define VM_MAX_SUFFIXLEN 15 +#define VM_MIN_NAMELEN 6 +#define VM_MAX_NAMELEN \ + (SPECNAMELEN - VM_MAX_PREFIXLEN - VM_MAX_SUFFIXLEN - 1) -#define VM_MAX_NAMELEN 32 +#ifdef _KERNEL +CTASSERT(VM_MAX_NAMELEN >= VM_MIN_NAMELEN); struct vm; struct vm_exception; struct seg_desc; struct vm_exit; struct vm_run; struct vhpet; struct vioapic; struct vlapic; struct vmspace; struct vm_object; struct vm_guest_paging; struct pmap; struct vm_eventinfo { void *rptr; /* rendezvous cookie */ int *sptr; /* suspend cookie */ int *iptr; /* reqidle cookie */ }; typedef int (*vmm_init_func_t)(int ipinum); typedef int (*vmm_cleanup_func_t)(void); typedef void (*vmm_resume_func_t)(void); typedef void * (*vmi_init_func_t)(struct vm *vm, struct pmap *pmap); typedef int (*vmi_run_func_t)(void *vmi, int vcpu, register_t rip, struct pmap *pmap, struct vm_eventinfo *info); typedef void (*vmi_cleanup_func_t)(void *vmi); typedef int (*vmi_get_register_t)(void *vmi, int vcpu, int num, uint64_t *retval); typedef int (*vmi_set_register_t)(void *vmi, int vcpu, int num, uint64_t val); typedef int (*vmi_get_desc_t)(void *vmi, int vcpu, int num, struct seg_desc *desc); typedef int (*vmi_set_desc_t)(void *vmi, int vcpu, int num, struct seg_desc *desc); typedef int (*vmi_get_cap_t)(void *vmi, int vcpu, int num, int *retval); typedef int (*vmi_set_cap_t)(void *vmi, int vcpu, int num, int val); typedef struct vmspace * (*vmi_vmspace_alloc)(vm_offset_t min, vm_offset_t max); typedef void (*vmi_vmspace_free)(struct vmspace *vmspace); typedef struct vlapic * (*vmi_vlapic_init)(void *vmi, int vcpu); typedef void (*vmi_vlapic_cleanup)(void *vmi, struct vlapic *vlapic); struct vmm_ops { vmm_init_func_t init; /* module wide initialization */ vmm_cleanup_func_t cleanup; vmm_resume_func_t resume; vmi_init_func_t vminit; /* vm-specific initialization */ vmi_run_func_t vmrun; vmi_cleanup_func_t vmcleanup; vmi_get_register_t vmgetreg; vmi_set_register_t vmsetreg; vmi_get_desc_t vmgetdesc; vmi_set_desc_t vmsetdesc; vmi_get_cap_t vmgetcap; vmi_set_cap_t vmsetcap; vmi_vmspace_alloc vmspace_alloc; vmi_vmspace_free vmspace_free; vmi_vlapic_init vlapic_init; vmi_vlapic_cleanup vlapic_cleanup; }; extern struct vmm_ops vmm_ops_intel; extern struct vmm_ops vmm_ops_amd; int vm_create(const char *name, struct vm **retvm); void vm_destroy(struct vm *vm); int vm_reinit(struct vm *vm); const char *vm_name(struct vm *vm); uint16_t vm_get_maxcpus(struct vm *vm); void vm_get_topology(struct vm *vm, uint16_t *sockets, uint16_t *cores, uint16_t *threads, uint16_t *maxcpus); int vm_set_topology(struct vm *vm, uint16_t sockets, uint16_t cores, uint16_t threads, uint16_t maxcpus); /* * APIs that modify the guest memory map require all vcpus to be frozen. */ int vm_mmap_memseg(struct vm *vm, vm_paddr_t gpa, int segid, vm_ooffset_t off, size_t len, int prot, int flags); int vm_alloc_memseg(struct vm *vm, int ident, size_t len, bool sysmem); void vm_free_memseg(struct vm *vm, int ident); int vm_map_mmio(struct vm *vm, vm_paddr_t gpa, size_t len, vm_paddr_t hpa); int vm_unmap_mmio(struct vm *vm, vm_paddr_t gpa, size_t len); int vm_assign_pptdev(struct vm *vm, int bus, int slot, int func); int vm_unassign_pptdev(struct vm *vm, int bus, int slot, int func); /* * APIs that inspect the guest memory map require only a *single* vcpu to * be frozen. This acts like a read lock on the guest memory map since any * modification requires *all* vcpus to be frozen. */ int vm_mmap_getnext(struct vm *vm, vm_paddr_t *gpa, int *segid, vm_ooffset_t *segoff, size_t *len, int *prot, int *flags); int vm_get_memseg(struct vm *vm, int ident, size_t *len, bool *sysmem, struct vm_object **objptr); vm_paddr_t vmm_sysmem_maxaddr(struct vm *vm); void *vm_gpa_hold(struct vm *, int vcpuid, vm_paddr_t gpa, size_t len, int prot, void **cookie); void vm_gpa_release(void *cookie); bool vm_mem_allocated(struct vm *vm, int vcpuid, vm_paddr_t gpa); int vm_get_register(struct vm *vm, int vcpu, int reg, uint64_t *retval); int vm_set_register(struct vm *vm, int vcpu, int reg, uint64_t val); int vm_get_seg_desc(struct vm *vm, int vcpu, int reg, struct seg_desc *ret_desc); int vm_set_seg_desc(struct vm *vm, int vcpu, int reg, struct seg_desc *desc); int vm_run(struct vm *vm, struct vm_run *vmrun); int vm_suspend(struct vm *vm, enum vm_suspend_how how); int vm_inject_nmi(struct vm *vm, int vcpu); int vm_nmi_pending(struct vm *vm, int vcpuid); void vm_nmi_clear(struct vm *vm, int vcpuid); int vm_inject_extint(struct vm *vm, int vcpu); int vm_extint_pending(struct vm *vm, int vcpuid); void vm_extint_clear(struct vm *vm, int vcpuid); struct vlapic *vm_lapic(struct vm *vm, int cpu); struct vioapic *vm_ioapic(struct vm *vm); struct vhpet *vm_hpet(struct vm *vm); int vm_get_capability(struct vm *vm, int vcpu, int type, int *val); int vm_set_capability(struct vm *vm, int vcpu, int type, int val); int vm_get_x2apic_state(struct vm *vm, int vcpu, enum x2apic_state *state); int vm_set_x2apic_state(struct vm *vm, int vcpu, enum x2apic_state state); int vm_apicid2vcpuid(struct vm *vm, int apicid); int vm_activate_cpu(struct vm *vm, int vcpu); int vm_suspend_cpu(struct vm *vm, int vcpu); int vm_resume_cpu(struct vm *vm, int vcpu); struct vm_exit *vm_exitinfo(struct vm *vm, int vcpuid); void vm_exit_suspended(struct vm *vm, int vcpuid, uint64_t rip); void vm_exit_debug(struct vm *vm, int vcpuid, uint64_t rip); void vm_exit_rendezvous(struct vm *vm, int vcpuid, uint64_t rip); void vm_exit_astpending(struct vm *vm, int vcpuid, uint64_t rip); void vm_exit_reqidle(struct vm *vm, int vcpuid, uint64_t rip); #ifdef _SYS__CPUSET_H_ /* * Rendezvous all vcpus specified in 'dest' and execute 'func(arg)'. * The rendezvous 'func(arg)' is not allowed to do anything that will * cause the thread to be put to sleep. * * If the rendezvous is being initiated from a vcpu context then the * 'vcpuid' must refer to that vcpu, otherwise it should be set to -1. * * The caller cannot hold any locks when initiating the rendezvous. * * The implementation of this API may cause vcpus other than those specified * by 'dest' to be stalled. The caller should not rely on any vcpus making * forward progress when the rendezvous is in progress. */ typedef void (*vm_rendezvous_func_t)(struct vm *vm, int vcpuid, void *arg); void vm_smp_rendezvous(struct vm *vm, int vcpuid, cpuset_t dest, vm_rendezvous_func_t func, void *arg); cpuset_t vm_active_cpus(struct vm *vm); cpuset_t vm_debug_cpus(struct vm *vm); cpuset_t vm_suspended_cpus(struct vm *vm); #endif /* _SYS__CPUSET_H_ */ static __inline int vcpu_rendezvous_pending(struct vm_eventinfo *info) { return (*((uintptr_t *)(info->rptr)) != 0); } static __inline int vcpu_suspended(struct vm_eventinfo *info) { return (*info->sptr); } static __inline int vcpu_reqidle(struct vm_eventinfo *info) { return (*info->iptr); } int vcpu_debugged(struct vm *vm, int vcpuid); /* * Return 1 if device indicated by bus/slot/func is supposed to be a * pci passthrough device. * * Return 0 otherwise. */ int vmm_is_pptdev(int bus, int slot, int func); void *vm_iommu_domain(struct vm *vm); enum vcpu_state { VCPU_IDLE, VCPU_FROZEN, VCPU_RUNNING, VCPU_SLEEPING, }; int vcpu_set_state(struct vm *vm, int vcpu, enum vcpu_state state, bool from_idle); enum vcpu_state vcpu_get_state(struct vm *vm, int vcpu, int *hostcpu); static int __inline vcpu_is_running(struct vm *vm, int vcpu, int *hostcpu) { return (vcpu_get_state(vm, vcpu, hostcpu) == VCPU_RUNNING); } #ifdef _SYS_PROC_H_ static int __inline vcpu_should_yield(struct vm *vm, int vcpu) { if (curthread->td_flags & (TDF_ASTPENDING | TDF_NEEDRESCHED)) return (1); else if (curthread->td_owepreempt) return (1); else return (0); } #endif void *vcpu_stats(struct vm *vm, int vcpu); void vcpu_notify_event(struct vm *vm, int vcpuid, bool lapic_intr); struct vmspace *vm_get_vmspace(struct vm *vm); struct vatpic *vm_atpic(struct vm *vm); struct vatpit *vm_atpit(struct vm *vm); struct vpmtmr *vm_pmtmr(struct vm *vm); struct vrtc *vm_rtc(struct vm *vm); /* * Inject exception 'vector' into the guest vcpu. This function returns 0 on * success and non-zero on failure. * * Wrapper functions like 'vm_inject_gp()' should be preferred to calling * this function directly because they enforce the trap-like or fault-like * behavior of an exception. * * This function should only be called in the context of the thread that is * executing this vcpu. */ int vm_inject_exception(struct vm *vm, int vcpuid, int vector, int err_valid, uint32_t errcode, int restart_instruction); /* * This function is called after a VM-exit that occurred during exception or * interrupt delivery through the IDT. The format of 'intinfo' is described * in Figure 15-1, "EXITINTINFO for All Intercepts", APM, Vol 2. * * If a VM-exit handler completes the event delivery successfully then it * should call vm_exit_intinfo() to extinguish the pending event. For e.g., * if the task switch emulation is triggered via a task gate then it should * call this function with 'intinfo=0' to indicate that the external event * is not pending anymore. * * Return value is 0 on success and non-zero on failure. */ int vm_exit_intinfo(struct vm *vm, int vcpuid, uint64_t intinfo); /* * This function is called before every VM-entry to retrieve a pending * event that should be injected into the guest. This function combines * nested events into a double or triple fault. * * Returns 0 if there are no events that need to be injected into the guest * and non-zero otherwise. */ int vm_entry_intinfo(struct vm *vm, int vcpuid, uint64_t *info); int vm_get_intinfo(struct vm *vm, int vcpuid, uint64_t *info1, uint64_t *info2); enum vm_reg_name vm_segment_name(int seg_encoding); struct vm_copyinfo { uint64_t gpa; size_t len; void *hva; void *cookie; }; /* * Set up 'copyinfo[]' to copy to/from guest linear address space starting * at 'gla' and 'len' bytes long. The 'prot' should be set to PROT_READ for * a copyin or PROT_WRITE for a copyout. * * retval is_fault Interpretation * 0 0 Success * 0 1 An exception was injected into the guest * EFAULT N/A Unrecoverable error * * The 'copyinfo[]' can be passed to 'vm_copyin()' or 'vm_copyout()' only if * the return value is 0. The 'copyinfo[]' resources should be freed by calling * 'vm_copy_teardown()' after the copy is done. */ int vm_copy_setup(struct vm *vm, int vcpuid, struct vm_guest_paging *paging, uint64_t gla, size_t len, int prot, struct vm_copyinfo *copyinfo, int num_copyinfo, int *is_fault); void vm_copy_teardown(struct vm *vm, int vcpuid, struct vm_copyinfo *copyinfo, int num_copyinfo); void vm_copyin(struct vm *vm, int vcpuid, struct vm_copyinfo *copyinfo, void *kaddr, size_t len); void vm_copyout(struct vm *vm, int vcpuid, const void *kaddr, struct vm_copyinfo *copyinfo, size_t len); int vcpu_trace_exceptions(struct vm *vm, int vcpuid); #endif /* KERNEL */ #define VM_MAXCPU 16 /* maximum virtual cpus */ /* * Identifiers for optional vmm capabilities */ enum vm_cap_type { VM_CAP_HALT_EXIT, VM_CAP_MTRAP_EXIT, VM_CAP_PAUSE_EXIT, VM_CAP_UNRESTRICTED_GUEST, VM_CAP_ENABLE_INVPCID, VM_CAP_MAX }; enum vm_intr_trigger { EDGE_TRIGGER, LEVEL_TRIGGER }; /* * The 'access' field has the format specified in Table 21-2 of the Intel * Architecture Manual vol 3b. * * XXX The contents of the 'access' field are architecturally defined except * bit 16 - Segment Unusable. */ struct seg_desc { uint64_t base; uint32_t limit; uint32_t access; }; #define SEG_DESC_TYPE(access) ((access) & 0x001f) #define SEG_DESC_DPL(access) (((access) >> 5) & 0x3) #define SEG_DESC_PRESENT(access) (((access) & 0x0080) ? 1 : 0) #define SEG_DESC_DEF32(access) (((access) & 0x4000) ? 1 : 0) #define SEG_DESC_GRANULARITY(access) (((access) & 0x8000) ? 1 : 0) #define SEG_DESC_UNUSABLE(access) (((access) & 0x10000) ? 1 : 0) enum vm_cpu_mode { CPU_MODE_REAL, CPU_MODE_PROTECTED, CPU_MODE_COMPATIBILITY, /* IA-32E mode (CS.L = 0) */ CPU_MODE_64BIT, /* IA-32E mode (CS.L = 1) */ }; enum vm_paging_mode { PAGING_MODE_FLAT, PAGING_MODE_32, PAGING_MODE_PAE, PAGING_MODE_64, }; struct vm_guest_paging { uint64_t cr3; int cpl; enum vm_cpu_mode cpu_mode; enum vm_paging_mode paging_mode; }; /* * The data structures 'vie' and 'vie_op' are meant to be opaque to the * consumers of instruction decoding. The only reason why their contents * need to be exposed is because they are part of the 'vm_exit' structure. */ struct vie_op { uint8_t op_byte; /* actual opcode byte */ uint8_t op_type; /* type of operation (e.g. MOV) */ uint16_t op_flags; }; #define VIE_INST_SIZE 15 struct vie { uint8_t inst[VIE_INST_SIZE]; /* instruction bytes */ uint8_t num_valid; /* size of the instruction */ uint8_t num_processed; uint8_t addrsize:4, opsize:4; /* address and operand sizes */ uint8_t rex_w:1, /* REX prefix */ rex_r:1, rex_x:1, rex_b:1, rex_present:1, repz_present:1, /* REP/REPE/REPZ prefix */ repnz_present:1, /* REPNE/REPNZ prefix */ opsize_override:1, /* Operand size override */ addrsize_override:1, /* Address size override */ segment_override:1; /* Segment override */ uint8_t mod:2, /* ModRM byte */ reg:4, rm:4; uint8_t ss:2, /* SIB byte */ index:4, base:4; uint8_t disp_bytes; uint8_t imm_bytes; uint8_t scale; int base_register; /* VM_REG_GUEST_xyz */ int index_register; /* VM_REG_GUEST_xyz */ int segment_register; /* VM_REG_GUEST_xyz */ int64_t displacement; /* optional addr displacement */ int64_t immediate; /* optional immediate operand */ uint8_t decoded; /* set to 1 if successfully decoded */ struct vie_op op; /* opcode description */ }; enum vm_exitcode { VM_EXITCODE_INOUT, VM_EXITCODE_VMX, VM_EXITCODE_BOGUS, VM_EXITCODE_RDMSR, VM_EXITCODE_WRMSR, VM_EXITCODE_HLT, VM_EXITCODE_MTRAP, VM_EXITCODE_PAUSE, VM_EXITCODE_PAGING, VM_EXITCODE_INST_EMUL, VM_EXITCODE_SPINUP_AP, VM_EXITCODE_DEPRECATED1, /* used to be SPINDOWN_CPU */ VM_EXITCODE_RENDEZVOUS, VM_EXITCODE_IOAPIC_EOI, VM_EXITCODE_SUSPENDED, VM_EXITCODE_INOUT_STR, VM_EXITCODE_TASK_SWITCH, VM_EXITCODE_MONITOR, VM_EXITCODE_MWAIT, VM_EXITCODE_SVM, VM_EXITCODE_REQIDLE, VM_EXITCODE_DEBUG, VM_EXITCODE_VMINSN, VM_EXITCODE_MAX }; struct vm_inout { uint16_t bytes:3; /* 1 or 2 or 4 */ uint16_t in:1; uint16_t string:1; uint16_t rep:1; uint16_t port; uint32_t eax; /* valid for out */ }; struct vm_inout_str { struct vm_inout inout; /* must be the first element */ struct vm_guest_paging paging; uint64_t rflags; uint64_t cr0; uint64_t index; uint64_t count; /* rep=1 (%rcx), rep=0 (1) */ int addrsize; enum vm_reg_name seg_name; struct seg_desc seg_desc; }; enum task_switch_reason { TSR_CALL, TSR_IRET, TSR_JMP, TSR_IDT_GATE, /* task gate in IDT */ }; struct vm_task_switch { uint16_t tsssel; /* new TSS selector */ int ext; /* task switch due to external event */ uint32_t errcode; int errcode_valid; /* push 'errcode' on the new stack */ enum task_switch_reason reason; struct vm_guest_paging paging; }; struct vm_exit { enum vm_exitcode exitcode; int inst_length; /* 0 means unknown */ uint64_t rip; union { struct vm_inout inout; struct vm_inout_str inout_str; struct { uint64_t gpa; int fault_type; } paging; struct { uint64_t gpa; uint64_t gla; uint64_t cs_base; int cs_d; /* CS.D */ struct vm_guest_paging paging; struct vie vie; } inst_emul; /* * VMX specific payload. Used when there is no "better" * exitcode to represent the VM-exit. */ struct { int status; /* vmx inst status */ /* * 'exit_reason' and 'exit_qualification' are valid * only if 'status' is zero. */ uint32_t exit_reason; uint64_t exit_qualification; /* * 'inst_error' and 'inst_type' are valid * only if 'status' is non-zero. */ int inst_type; int inst_error; } vmx; /* * SVM specific payload. */ struct { uint64_t exitcode; uint64_t exitinfo1; uint64_t exitinfo2; } svm; struct { uint32_t code; /* ecx value */ uint64_t wval; } msr; struct { int vcpu; uint64_t rip; } spinup_ap; struct { uint64_t rflags; uint64_t intr_status; } hlt; struct { int vector; } ioapic_eoi; struct { enum vm_suspend_how how; } suspended; struct vm_task_switch task_switch; } u; }; /* APIs to inject faults into the guest */ void vm_inject_fault(void *vm, int vcpuid, int vector, int errcode_valid, int errcode); static __inline void vm_inject_ud(void *vm, int vcpuid) { vm_inject_fault(vm, vcpuid, IDT_UD, 0, 0); } static __inline void vm_inject_gp(void *vm, int vcpuid) { vm_inject_fault(vm, vcpuid, IDT_GP, 1, 0); } static __inline void vm_inject_ac(void *vm, int vcpuid, int errcode) { vm_inject_fault(vm, vcpuid, IDT_AC, 1, errcode); } static __inline void vm_inject_ss(void *vm, int vcpuid, int errcode) { vm_inject_fault(vm, vcpuid, IDT_SS, 1, errcode); } void vm_inject_pf(void *vm, int vcpuid, int error_code, uint64_t cr2); int vm_restart_instruction(void *vm, int vcpuid); #endif /* _VMM_H_ */ Index: head/sys/amd64/include/vmm_dev.h =================================================================== --- head/sys/amd64/include/vmm_dev.h (revision 349947) +++ head/sys/amd64/include/vmm_dev.h (revision 349948) @@ -1,425 +1,425 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2011 NetApp, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _VMM_DEV_H_ #define _VMM_DEV_H_ #ifdef _KERNEL void vmmdev_init(void); int vmmdev_cleanup(void); #endif struct vm_memmap { vm_paddr_t gpa; int segid; /* memory segment */ vm_ooffset_t segoff; /* offset into memory segment */ size_t len; /* mmap length */ int prot; /* RWX */ int flags; }; #define VM_MEMMAP_F_WIRED 0x01 #define VM_MEMMAP_F_IOMMU 0x02 #define VM_MEMSEG_NAME(m) ((m)->name[0] != '\0' ? (m)->name : NULL) struct vm_memseg { int segid; size_t len; - char name[SPECNAMELEN + 1]; + char name[VM_MAX_SUFFIXLEN + 1]; }; struct vm_register { int cpuid; int regnum; /* enum vm_reg_name */ uint64_t regval; }; struct vm_seg_desc { /* data or code segment */ int cpuid; int regnum; /* enum vm_reg_name */ struct seg_desc desc; }; struct vm_register_set { int cpuid; unsigned int count; const int *regnums; /* enum vm_reg_name */ uint64_t *regvals; }; struct vm_run { int cpuid; struct vm_exit vm_exit; }; struct vm_exception { int cpuid; int vector; uint32_t error_code; int error_code_valid; int restart_instruction; }; struct vm_lapic_msi { uint64_t msg; uint64_t addr; }; struct vm_lapic_irq { int cpuid; int vector; }; struct vm_ioapic_irq { int irq; }; struct vm_isa_irq { int atpic_irq; int ioapic_irq; }; struct vm_isa_irq_trigger { int atpic_irq; enum vm_intr_trigger trigger; }; struct vm_capability { int cpuid; enum vm_cap_type captype; int capval; int allcpus; }; struct vm_pptdev { int bus; int slot; int func; }; struct vm_pptdev_mmio { int bus; int slot; int func; vm_paddr_t gpa; vm_paddr_t hpa; size_t len; }; struct vm_pptdev_msi { int vcpu; int bus; int slot; int func; int numvec; /* 0 means disabled */ uint64_t msg; uint64_t addr; }; struct vm_pptdev_msix { int vcpu; int bus; int slot; int func; int idx; uint64_t msg; uint32_t vector_control; uint64_t addr; }; struct vm_nmi { int cpuid; }; #define MAX_VM_STATS 64 struct vm_stats { int cpuid; /* in */ int num_entries; /* out */ struct timeval tv; uint64_t statbuf[MAX_VM_STATS]; }; struct vm_stat_desc { int index; /* in */ char desc[128]; /* out */ }; struct vm_x2apic { int cpuid; enum x2apic_state state; }; struct vm_gpa_pte { uint64_t gpa; /* in */ uint64_t pte[4]; /* out */ int ptenum; }; struct vm_hpet_cap { uint32_t capabilities; /* lower 32 bits of HPET capabilities */ }; struct vm_suspend { enum vm_suspend_how how; }; struct vm_gla2gpa { int vcpuid; /* inputs */ int prot; /* PROT_READ or PROT_WRITE */ uint64_t gla; struct vm_guest_paging paging; int fault; /* outputs */ uint64_t gpa; }; struct vm_activate_cpu { int vcpuid; }; struct vm_cpuset { int which; int cpusetsize; cpuset_t *cpus; }; #define VM_ACTIVE_CPUS 0 #define VM_SUSPENDED_CPUS 1 #define VM_DEBUG_CPUS 2 struct vm_intinfo { int vcpuid; uint64_t info1; uint64_t info2; }; struct vm_rtc_time { time_t secs; }; struct vm_rtc_data { int offset; uint8_t value; }; struct vm_cpu_topology { uint16_t sockets; uint16_t cores; uint16_t threads; uint16_t maxcpus; }; enum { /* general routines */ IOCNUM_ABIVERS = 0, IOCNUM_RUN = 1, IOCNUM_SET_CAPABILITY = 2, IOCNUM_GET_CAPABILITY = 3, IOCNUM_SUSPEND = 4, IOCNUM_REINIT = 5, /* memory apis */ IOCNUM_MAP_MEMORY = 10, /* deprecated */ IOCNUM_GET_MEMORY_SEG = 11, /* deprecated */ IOCNUM_GET_GPA_PMAP = 12, IOCNUM_GLA2GPA = 13, IOCNUM_ALLOC_MEMSEG = 14, IOCNUM_GET_MEMSEG = 15, IOCNUM_MMAP_MEMSEG = 16, IOCNUM_MMAP_GETNEXT = 17, IOCNUM_GLA2GPA_NOFAULT = 18, /* register/state accessors */ IOCNUM_SET_REGISTER = 20, IOCNUM_GET_REGISTER = 21, IOCNUM_SET_SEGMENT_DESCRIPTOR = 22, IOCNUM_GET_SEGMENT_DESCRIPTOR = 23, IOCNUM_SET_REGISTER_SET = 24, IOCNUM_GET_REGISTER_SET = 25, /* interrupt injection */ IOCNUM_GET_INTINFO = 28, IOCNUM_SET_INTINFO = 29, IOCNUM_INJECT_EXCEPTION = 30, IOCNUM_LAPIC_IRQ = 31, IOCNUM_INJECT_NMI = 32, IOCNUM_IOAPIC_ASSERT_IRQ = 33, IOCNUM_IOAPIC_DEASSERT_IRQ = 34, IOCNUM_IOAPIC_PULSE_IRQ = 35, IOCNUM_LAPIC_MSI = 36, IOCNUM_LAPIC_LOCAL_IRQ = 37, IOCNUM_IOAPIC_PINCOUNT = 38, IOCNUM_RESTART_INSTRUCTION = 39, /* PCI pass-thru */ IOCNUM_BIND_PPTDEV = 40, IOCNUM_UNBIND_PPTDEV = 41, IOCNUM_MAP_PPTDEV_MMIO = 42, IOCNUM_PPTDEV_MSI = 43, IOCNUM_PPTDEV_MSIX = 44, /* statistics */ IOCNUM_VM_STATS = 50, IOCNUM_VM_STAT_DESC = 51, /* kernel device state */ IOCNUM_SET_X2APIC_STATE = 60, IOCNUM_GET_X2APIC_STATE = 61, IOCNUM_GET_HPET_CAPABILITIES = 62, /* CPU Topology */ IOCNUM_SET_TOPOLOGY = 63, IOCNUM_GET_TOPOLOGY = 64, /* legacy interrupt injection */ IOCNUM_ISA_ASSERT_IRQ = 80, IOCNUM_ISA_DEASSERT_IRQ = 81, IOCNUM_ISA_PULSE_IRQ = 82, IOCNUM_ISA_SET_IRQ_TRIGGER = 83, /* vm_cpuset */ IOCNUM_ACTIVATE_CPU = 90, IOCNUM_GET_CPUSET = 91, IOCNUM_SUSPEND_CPU = 92, IOCNUM_RESUME_CPU = 93, /* RTC */ IOCNUM_RTC_READ = 100, IOCNUM_RTC_WRITE = 101, IOCNUM_RTC_SETTIME = 102, IOCNUM_RTC_GETTIME = 103, }; #define VM_RUN \ _IOWR('v', IOCNUM_RUN, struct vm_run) #define VM_SUSPEND \ _IOW('v', IOCNUM_SUSPEND, struct vm_suspend) #define VM_REINIT \ _IO('v', IOCNUM_REINIT) #define VM_ALLOC_MEMSEG \ _IOW('v', IOCNUM_ALLOC_MEMSEG, struct vm_memseg) #define VM_GET_MEMSEG \ _IOWR('v', IOCNUM_GET_MEMSEG, struct vm_memseg) #define VM_MMAP_MEMSEG \ _IOW('v', IOCNUM_MMAP_MEMSEG, struct vm_memmap) #define VM_MMAP_GETNEXT \ _IOWR('v', IOCNUM_MMAP_GETNEXT, struct vm_memmap) #define VM_SET_REGISTER \ _IOW('v', IOCNUM_SET_REGISTER, struct vm_register) #define VM_GET_REGISTER \ _IOWR('v', IOCNUM_GET_REGISTER, struct vm_register) #define VM_SET_SEGMENT_DESCRIPTOR \ _IOW('v', IOCNUM_SET_SEGMENT_DESCRIPTOR, struct vm_seg_desc) #define VM_GET_SEGMENT_DESCRIPTOR \ _IOWR('v', IOCNUM_GET_SEGMENT_DESCRIPTOR, struct vm_seg_desc) #define VM_SET_REGISTER_SET \ _IOW('v', IOCNUM_SET_REGISTER_SET, struct vm_register_set) #define VM_GET_REGISTER_SET \ _IOWR('v', IOCNUM_GET_REGISTER_SET, struct vm_register_set) #define VM_INJECT_EXCEPTION \ _IOW('v', IOCNUM_INJECT_EXCEPTION, struct vm_exception) #define VM_LAPIC_IRQ \ _IOW('v', IOCNUM_LAPIC_IRQ, struct vm_lapic_irq) #define VM_LAPIC_LOCAL_IRQ \ _IOW('v', IOCNUM_LAPIC_LOCAL_IRQ, struct vm_lapic_irq) #define VM_LAPIC_MSI \ _IOW('v', IOCNUM_LAPIC_MSI, struct vm_lapic_msi) #define VM_IOAPIC_ASSERT_IRQ \ _IOW('v', IOCNUM_IOAPIC_ASSERT_IRQ, struct vm_ioapic_irq) #define VM_IOAPIC_DEASSERT_IRQ \ _IOW('v', IOCNUM_IOAPIC_DEASSERT_IRQ, struct vm_ioapic_irq) #define VM_IOAPIC_PULSE_IRQ \ _IOW('v', IOCNUM_IOAPIC_PULSE_IRQ, struct vm_ioapic_irq) #define VM_IOAPIC_PINCOUNT \ _IOR('v', IOCNUM_IOAPIC_PINCOUNT, int) #define VM_ISA_ASSERT_IRQ \ _IOW('v', IOCNUM_ISA_ASSERT_IRQ, struct vm_isa_irq) #define VM_ISA_DEASSERT_IRQ \ _IOW('v', IOCNUM_ISA_DEASSERT_IRQ, struct vm_isa_irq) #define VM_ISA_PULSE_IRQ \ _IOW('v', IOCNUM_ISA_PULSE_IRQ, struct vm_isa_irq) #define VM_ISA_SET_IRQ_TRIGGER \ _IOW('v', IOCNUM_ISA_SET_IRQ_TRIGGER, struct vm_isa_irq_trigger) #define VM_SET_CAPABILITY \ _IOW('v', IOCNUM_SET_CAPABILITY, struct vm_capability) #define VM_GET_CAPABILITY \ _IOWR('v', IOCNUM_GET_CAPABILITY, struct vm_capability) #define VM_BIND_PPTDEV \ _IOW('v', IOCNUM_BIND_PPTDEV, struct vm_pptdev) #define VM_UNBIND_PPTDEV \ _IOW('v', IOCNUM_UNBIND_PPTDEV, struct vm_pptdev) #define VM_MAP_PPTDEV_MMIO \ _IOW('v', IOCNUM_MAP_PPTDEV_MMIO, struct vm_pptdev_mmio) #define VM_PPTDEV_MSI \ _IOW('v', IOCNUM_PPTDEV_MSI, struct vm_pptdev_msi) #define VM_PPTDEV_MSIX \ _IOW('v', IOCNUM_PPTDEV_MSIX, struct vm_pptdev_msix) #define VM_INJECT_NMI \ _IOW('v', IOCNUM_INJECT_NMI, struct vm_nmi) #define VM_STATS \ _IOWR('v', IOCNUM_VM_STATS, struct vm_stats) #define VM_STAT_DESC \ _IOWR('v', IOCNUM_VM_STAT_DESC, struct vm_stat_desc) #define VM_SET_X2APIC_STATE \ _IOW('v', IOCNUM_SET_X2APIC_STATE, struct vm_x2apic) #define VM_GET_X2APIC_STATE \ _IOWR('v', IOCNUM_GET_X2APIC_STATE, struct vm_x2apic) #define VM_GET_HPET_CAPABILITIES \ _IOR('v', IOCNUM_GET_HPET_CAPABILITIES, struct vm_hpet_cap) #define VM_SET_TOPOLOGY \ _IOW('v', IOCNUM_SET_TOPOLOGY, struct vm_cpu_topology) #define VM_GET_TOPOLOGY \ _IOR('v', IOCNUM_GET_TOPOLOGY, struct vm_cpu_topology) #define VM_GET_GPA_PMAP \ _IOWR('v', IOCNUM_GET_GPA_PMAP, struct vm_gpa_pte) #define VM_GLA2GPA \ _IOWR('v', IOCNUM_GLA2GPA, struct vm_gla2gpa) #define VM_GLA2GPA_NOFAULT \ _IOWR('v', IOCNUM_GLA2GPA_NOFAULT, struct vm_gla2gpa) #define VM_ACTIVATE_CPU \ _IOW('v', IOCNUM_ACTIVATE_CPU, struct vm_activate_cpu) #define VM_GET_CPUS \ _IOW('v', IOCNUM_GET_CPUSET, struct vm_cpuset) #define VM_SUSPEND_CPU \ _IOW('v', IOCNUM_SUSPEND_CPU, struct vm_activate_cpu) #define VM_RESUME_CPU \ _IOW('v', IOCNUM_RESUME_CPU, struct vm_activate_cpu) #define VM_SET_INTINFO \ _IOW('v', IOCNUM_SET_INTINFO, struct vm_intinfo) #define VM_GET_INTINFO \ _IOWR('v', IOCNUM_GET_INTINFO, struct vm_intinfo) #define VM_RTC_WRITE \ _IOW('v', IOCNUM_RTC_WRITE, struct vm_rtc_data) #define VM_RTC_READ \ _IOWR('v', IOCNUM_RTC_READ, struct vm_rtc_data) #define VM_RTC_SETTIME \ _IOW('v', IOCNUM_RTC_SETTIME, struct vm_rtc_time) #define VM_RTC_GETTIME \ _IOR('v', IOCNUM_RTC_GETTIME, struct vm_rtc_time) #define VM_RESTART_INSTRUCTION \ _IOW('v', IOCNUM_RESTART_INSTRUCTION, int) #endif Index: head/sys/amd64/vmm/vmm_dev.c =================================================================== --- head/sys/amd64/vmm/vmm_dev.c (revision 349947) +++ head/sys/amd64/vmm/vmm_dev.c (revision 349948) @@ -1,1142 +1,1161 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2011 NetApp, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "vmm_lapic.h" #include "vmm_stat.h" #include "vmm_mem.h" #include "io/ppt.h" #include "io/vatpic.h" #include "io/vioapic.h" #include "io/vhpet.h" #include "io/vrtc.h" struct devmem_softc { int segid; char *name; struct cdev *cdev; struct vmmdev_softc *sc; SLIST_ENTRY(devmem_softc) link; }; struct vmmdev_softc { struct vm *vm; /* vm instance cookie */ struct cdev *cdev; SLIST_ENTRY(vmmdev_softc) link; SLIST_HEAD(, devmem_softc) devmem; int flags; }; #define VSC_LINKED 0x01 static SLIST_HEAD(, vmmdev_softc) head; static unsigned pr_allow_flag; static struct mtx vmmdev_mtx; static MALLOC_DEFINE(M_VMMDEV, "vmmdev", "vmmdev"); SYSCTL_DECL(_hw_vmm); static int vmm_priv_check(struct ucred *ucred); static int devmem_create_cdev(const char *vmname, int id, char *devmem); static void devmem_destroy(void *arg); static int vmm_priv_check(struct ucred *ucred) { if (jailed(ucred) && !(ucred->cr_prison->pr_allow & pr_allow_flag)) return (EPERM); return (0); } static int vcpu_lock_one(struct vmmdev_softc *sc, int vcpu) { int error; if (vcpu < 0 || vcpu >= vm_get_maxcpus(sc->vm)) return (EINVAL); error = vcpu_set_state(sc->vm, vcpu, VCPU_FROZEN, true); return (error); } static void vcpu_unlock_one(struct vmmdev_softc *sc, int vcpu) { enum vcpu_state state; state = vcpu_get_state(sc->vm, vcpu, NULL); if (state != VCPU_FROZEN) { panic("vcpu %s(%d) has invalid state %d", vm_name(sc->vm), vcpu, state); } vcpu_set_state(sc->vm, vcpu, VCPU_IDLE, false); } static int vcpu_lock_all(struct vmmdev_softc *sc) { int error, vcpu; uint16_t maxcpus; maxcpus = vm_get_maxcpus(sc->vm); for (vcpu = 0; vcpu < maxcpus; vcpu++) { error = vcpu_lock_one(sc, vcpu); if (error) break; } if (error) { while (--vcpu >= 0) vcpu_unlock_one(sc, vcpu); } return (error); } static void vcpu_unlock_all(struct vmmdev_softc *sc) { int vcpu; uint16_t maxcpus; maxcpus = vm_get_maxcpus(sc->vm); for (vcpu = 0; vcpu < maxcpus; vcpu++) vcpu_unlock_one(sc, vcpu); } static struct vmmdev_softc * vmmdev_lookup(const char *name) { struct vmmdev_softc *sc; #ifdef notyet /* XXX kernel is not compiled with invariants */ mtx_assert(&vmmdev_mtx, MA_OWNED); #endif SLIST_FOREACH(sc, &head, link) { if (strcmp(name, vm_name(sc->vm)) == 0) break; } return (sc); } static struct vmmdev_softc * vmmdev_lookup2(struct cdev *cdev) { return (cdev->si_drv1); } static int vmmdev_rw(struct cdev *cdev, struct uio *uio, int flags) { int error, off, c, prot; vm_paddr_t gpa, maxaddr; void *hpa, *cookie; struct vmmdev_softc *sc; uint16_t lastcpu; error = vmm_priv_check(curthread->td_ucred); if (error) return (error); sc = vmmdev_lookup2(cdev); if (sc == NULL) return (ENXIO); /* * Get a read lock on the guest memory map by freezing any vcpu. */ lastcpu = vm_get_maxcpus(sc->vm) - 1; error = vcpu_lock_one(sc, lastcpu); if (error) return (error); prot = (uio->uio_rw == UIO_WRITE ? VM_PROT_WRITE : VM_PROT_READ); maxaddr = vmm_sysmem_maxaddr(sc->vm); while (uio->uio_resid > 0 && error == 0) { gpa = uio->uio_offset; off = gpa & PAGE_MASK; c = min(uio->uio_resid, PAGE_SIZE - off); /* * The VM has a hole in its physical memory map. If we want to * use 'dd' to inspect memory beyond the hole we need to * provide bogus data for memory that lies in the hole. * * Since this device does not support lseek(2), dd(1) will * read(2) blocks of data to simulate the lseek(2). */ hpa = vm_gpa_hold(sc->vm, lastcpu, gpa, c, prot, &cookie); if (hpa == NULL) { if (uio->uio_rw == UIO_READ && gpa < maxaddr) error = uiomove(__DECONST(void *, zero_region), c, uio); else error = EFAULT; } else { error = uiomove(hpa, c, uio); vm_gpa_release(cookie); } } vcpu_unlock_one(sc, lastcpu); return (error); } -CTASSERT(sizeof(((struct vm_memseg *)0)->name) >= SPECNAMELEN + 1); +CTASSERT(sizeof(((struct vm_memseg *)0)->name) >= VM_MAX_SUFFIXLEN + 1); static int get_memseg(struct vmmdev_softc *sc, struct vm_memseg *mseg) { struct devmem_softc *dsc; int error; bool sysmem; error = vm_get_memseg(sc->vm, mseg->segid, &mseg->len, &sysmem, NULL); if (error || mseg->len == 0) return (error); if (!sysmem) { SLIST_FOREACH(dsc, &sc->devmem, link) { if (dsc->segid == mseg->segid) break; } KASSERT(dsc != NULL, ("%s: devmem segment %d not found", __func__, mseg->segid)); - error = copystr(dsc->name, mseg->name, SPECNAMELEN + 1, NULL); + error = copystr(dsc->name, mseg->name, sizeof(mseg->name), + NULL); } else { bzero(mseg->name, sizeof(mseg->name)); } return (error); } static int alloc_memseg(struct vmmdev_softc *sc, struct vm_memseg *mseg) { char *name; int error; bool sysmem; error = 0; name = NULL; sysmem = true; + /* + * The allocation is lengthened by 1 to hold a terminating NUL. It'll + * by stripped off when devfs processes the full string. + */ if (VM_MEMSEG_NAME(mseg)) { sysmem = false; - name = malloc(SPECNAMELEN + 1, M_VMMDEV, M_WAITOK); - error = copystr(mseg->name, name, SPECNAMELEN + 1, 0); + name = malloc(sizeof(mseg->name) M_VMMDEV, M_WAITOK); + error = copystr(mseg->name, name, sizeof(mseg->name), NULL); if (error) goto done; } error = vm_alloc_memseg(sc->vm, mseg->segid, mseg->len, sysmem); if (error) goto done; if (VM_MEMSEG_NAME(mseg)) { error = devmem_create_cdev(vm_name(sc->vm), mseg->segid, name); if (error) vm_free_memseg(sc->vm, mseg->segid); else name = NULL; /* freed when 'cdev' is destroyed */ } done: free(name, M_VMMDEV); return (error); } static int vm_get_register_set(struct vm *vm, int vcpu, unsigned int count, int *regnum, uint64_t *regval) { int error, i; error = 0; for (i = 0; i < count; i++) { error = vm_get_register(vm, vcpu, regnum[i], ®val[i]); if (error) break; } return (error); } static int vm_set_register_set(struct vm *vm, int vcpu, unsigned int count, int *regnum, uint64_t *regval) { int error, i; error = 0; for (i = 0; i < count; i++) { error = vm_set_register(vm, vcpu, regnum[i], regval[i]); if (error) break; } return (error); } static int vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag, struct thread *td) { int error, vcpu, state_changed, size; cpuset_t *cpuset; struct vmmdev_softc *sc; struct vm_register *vmreg; struct vm_seg_desc *vmsegdesc; struct vm_register_set *vmregset; struct vm_run *vmrun; struct vm_exception *vmexc; struct vm_lapic_irq *vmirq; struct vm_lapic_msi *vmmsi; struct vm_ioapic_irq *ioapic_irq; struct vm_isa_irq *isa_irq; struct vm_isa_irq_trigger *isa_irq_trigger; struct vm_capability *vmcap; struct vm_pptdev *pptdev; struct vm_pptdev_mmio *pptmmio; struct vm_pptdev_msi *pptmsi; struct vm_pptdev_msix *pptmsix; struct vm_nmi *vmnmi; struct vm_stats *vmstats; struct vm_stat_desc *statdesc; struct vm_x2apic *x2apic; struct vm_gpa_pte *gpapte; struct vm_suspend *vmsuspend; struct vm_gla2gpa *gg; struct vm_activate_cpu *vac; struct vm_cpuset *vm_cpuset; struct vm_intinfo *vmii; struct vm_rtc_time *rtctime; struct vm_rtc_data *rtcdata; struct vm_memmap *mm; struct vm_cpu_topology *topology; uint64_t *regvals; int *regnums; error = vmm_priv_check(curthread->td_ucred); if (error) return (error); sc = vmmdev_lookup2(cdev); if (sc == NULL) return (ENXIO); vcpu = -1; state_changed = 0; /* * Some VMM ioctls can operate only on vcpus that are not running. */ switch (cmd) { case VM_RUN: case VM_GET_REGISTER: case VM_SET_REGISTER: case VM_GET_SEGMENT_DESCRIPTOR: case VM_SET_SEGMENT_DESCRIPTOR: case VM_GET_REGISTER_SET: case VM_SET_REGISTER_SET: case VM_INJECT_EXCEPTION: case VM_GET_CAPABILITY: case VM_SET_CAPABILITY: case VM_PPTDEV_MSI: case VM_PPTDEV_MSIX: case VM_SET_X2APIC_STATE: case VM_GLA2GPA: case VM_GLA2GPA_NOFAULT: case VM_ACTIVATE_CPU: case VM_SET_INTINFO: case VM_GET_INTINFO: case VM_RESTART_INSTRUCTION: /* * XXX fragile, handle with care * Assumes that the first field of the ioctl data is the vcpu. */ vcpu = *(int *)data; error = vcpu_lock_one(sc, vcpu); if (error) goto done; state_changed = 1; break; case VM_MAP_PPTDEV_MMIO: case VM_BIND_PPTDEV: case VM_UNBIND_PPTDEV: case VM_ALLOC_MEMSEG: case VM_MMAP_MEMSEG: case VM_REINIT: /* * ioctls that operate on the entire virtual machine must * prevent all vcpus from running. */ error = vcpu_lock_all(sc); if (error) goto done; state_changed = 2; break; case VM_GET_MEMSEG: case VM_MMAP_GETNEXT: /* * Lock a vcpu to make sure that the memory map cannot be * modified while it is being inspected. */ vcpu = vm_get_maxcpus(sc->vm) - 1; error = vcpu_lock_one(sc, vcpu); if (error) goto done; state_changed = 1; break; default: break; } switch(cmd) { case VM_RUN: vmrun = (struct vm_run *)data; error = vm_run(sc->vm, vmrun); break; case VM_SUSPEND: vmsuspend = (struct vm_suspend *)data; error = vm_suspend(sc->vm, vmsuspend->how); break; case VM_REINIT: error = vm_reinit(sc->vm); break; case VM_STAT_DESC: { statdesc = (struct vm_stat_desc *)data; error = vmm_stat_desc_copy(statdesc->index, statdesc->desc, sizeof(statdesc->desc)); break; } case VM_STATS: { CTASSERT(MAX_VM_STATS >= MAX_VMM_STAT_ELEMS); vmstats = (struct vm_stats *)data; getmicrotime(&vmstats->tv); error = vmm_stat_copy(sc->vm, vmstats->cpuid, &vmstats->num_entries, vmstats->statbuf); break; } case VM_PPTDEV_MSI: pptmsi = (struct vm_pptdev_msi *)data; error = ppt_setup_msi(sc->vm, pptmsi->vcpu, pptmsi->bus, pptmsi->slot, pptmsi->func, pptmsi->addr, pptmsi->msg, pptmsi->numvec); break; case VM_PPTDEV_MSIX: pptmsix = (struct vm_pptdev_msix *)data; error = ppt_setup_msix(sc->vm, pptmsix->vcpu, pptmsix->bus, pptmsix->slot, pptmsix->func, pptmsix->idx, pptmsix->addr, pptmsix->msg, pptmsix->vector_control); break; case VM_MAP_PPTDEV_MMIO: pptmmio = (struct vm_pptdev_mmio *)data; error = ppt_map_mmio(sc->vm, pptmmio->bus, pptmmio->slot, pptmmio->func, pptmmio->gpa, pptmmio->len, pptmmio->hpa); break; case VM_BIND_PPTDEV: pptdev = (struct vm_pptdev *)data; error = vm_assign_pptdev(sc->vm, pptdev->bus, pptdev->slot, pptdev->func); break; case VM_UNBIND_PPTDEV: pptdev = (struct vm_pptdev *)data; error = vm_unassign_pptdev(sc->vm, pptdev->bus, pptdev->slot, pptdev->func); break; case VM_INJECT_EXCEPTION: vmexc = (struct vm_exception *)data; error = vm_inject_exception(sc->vm, vmexc->cpuid, vmexc->vector, vmexc->error_code_valid, vmexc->error_code, vmexc->restart_instruction); break; case VM_INJECT_NMI: vmnmi = (struct vm_nmi *)data; error = vm_inject_nmi(sc->vm, vmnmi->cpuid); break; case VM_LAPIC_IRQ: vmirq = (struct vm_lapic_irq *)data; error = lapic_intr_edge(sc->vm, vmirq->cpuid, vmirq->vector); break; case VM_LAPIC_LOCAL_IRQ: vmirq = (struct vm_lapic_irq *)data; error = lapic_set_local_intr(sc->vm, vmirq->cpuid, vmirq->vector); break; case VM_LAPIC_MSI: vmmsi = (struct vm_lapic_msi *)data; error = lapic_intr_msi(sc->vm, vmmsi->addr, vmmsi->msg); break; case VM_IOAPIC_ASSERT_IRQ: ioapic_irq = (struct vm_ioapic_irq *)data; error = vioapic_assert_irq(sc->vm, ioapic_irq->irq); break; case VM_IOAPIC_DEASSERT_IRQ: ioapic_irq = (struct vm_ioapic_irq *)data; error = vioapic_deassert_irq(sc->vm, ioapic_irq->irq); break; case VM_IOAPIC_PULSE_IRQ: ioapic_irq = (struct vm_ioapic_irq *)data; error = vioapic_pulse_irq(sc->vm, ioapic_irq->irq); break; case VM_IOAPIC_PINCOUNT: *(int *)data = vioapic_pincount(sc->vm); break; case VM_ISA_ASSERT_IRQ: isa_irq = (struct vm_isa_irq *)data; error = vatpic_assert_irq(sc->vm, isa_irq->atpic_irq); if (error == 0 && isa_irq->ioapic_irq != -1) error = vioapic_assert_irq(sc->vm, isa_irq->ioapic_irq); break; case VM_ISA_DEASSERT_IRQ: isa_irq = (struct vm_isa_irq *)data; error = vatpic_deassert_irq(sc->vm, isa_irq->atpic_irq); if (error == 0 && isa_irq->ioapic_irq != -1) error = vioapic_deassert_irq(sc->vm, isa_irq->ioapic_irq); break; case VM_ISA_PULSE_IRQ: isa_irq = (struct vm_isa_irq *)data; error = vatpic_pulse_irq(sc->vm, isa_irq->atpic_irq); if (error == 0 && isa_irq->ioapic_irq != -1) error = vioapic_pulse_irq(sc->vm, isa_irq->ioapic_irq); break; case VM_ISA_SET_IRQ_TRIGGER: isa_irq_trigger = (struct vm_isa_irq_trigger *)data; error = vatpic_set_irq_trigger(sc->vm, isa_irq_trigger->atpic_irq, isa_irq_trigger->trigger); break; case VM_MMAP_GETNEXT: mm = (struct vm_memmap *)data; error = vm_mmap_getnext(sc->vm, &mm->gpa, &mm->segid, &mm->segoff, &mm->len, &mm->prot, &mm->flags); break; case VM_MMAP_MEMSEG: mm = (struct vm_memmap *)data; error = vm_mmap_memseg(sc->vm, mm->gpa, mm->segid, mm->segoff, mm->len, mm->prot, mm->flags); break; case VM_ALLOC_MEMSEG: error = alloc_memseg(sc, (struct vm_memseg *)data); break; case VM_GET_MEMSEG: error = get_memseg(sc, (struct vm_memseg *)data); break; case VM_GET_REGISTER: vmreg = (struct vm_register *)data; error = vm_get_register(sc->vm, vmreg->cpuid, vmreg->regnum, &vmreg->regval); break; case VM_SET_REGISTER: vmreg = (struct vm_register *)data; error = vm_set_register(sc->vm, vmreg->cpuid, vmreg->regnum, vmreg->regval); break; case VM_SET_SEGMENT_DESCRIPTOR: vmsegdesc = (struct vm_seg_desc *)data; error = vm_set_seg_desc(sc->vm, vmsegdesc->cpuid, vmsegdesc->regnum, &vmsegdesc->desc); break; case VM_GET_SEGMENT_DESCRIPTOR: vmsegdesc = (struct vm_seg_desc *)data; error = vm_get_seg_desc(sc->vm, vmsegdesc->cpuid, vmsegdesc->regnum, &vmsegdesc->desc); break; case VM_GET_REGISTER_SET: vmregset = (struct vm_register_set *)data; if (vmregset->count > VM_REG_LAST) { error = EINVAL; break; } regvals = malloc(sizeof(regvals[0]) * vmregset->count, M_VMMDEV, M_WAITOK); regnums = malloc(sizeof(regnums[0]) * vmregset->count, M_VMMDEV, M_WAITOK); error = copyin(vmregset->regnums, regnums, sizeof(regnums[0]) * vmregset->count); if (error == 0) error = vm_get_register_set(sc->vm, vmregset->cpuid, vmregset->count, regnums, regvals); if (error == 0) error = copyout(regvals, vmregset->regvals, sizeof(regvals[0]) * vmregset->count); free(regvals, M_VMMDEV); free(regnums, M_VMMDEV); break; case VM_SET_REGISTER_SET: vmregset = (struct vm_register_set *)data; if (vmregset->count > VM_REG_LAST) { error = EINVAL; break; } regvals = malloc(sizeof(regvals[0]) * vmregset->count, M_VMMDEV, M_WAITOK); regnums = malloc(sizeof(regnums[0]) * vmregset->count, M_VMMDEV, M_WAITOK); error = copyin(vmregset->regnums, regnums, sizeof(regnums[0]) * vmregset->count); if (error == 0) error = copyin(vmregset->regvals, regvals, sizeof(regvals[0]) * vmregset->count); if (error == 0) error = vm_set_register_set(sc->vm, vmregset->cpuid, vmregset->count, regnums, regvals); free(regvals, M_VMMDEV); free(regnums, M_VMMDEV); break; case VM_GET_CAPABILITY: vmcap = (struct vm_capability *)data; error = vm_get_capability(sc->vm, vmcap->cpuid, vmcap->captype, &vmcap->capval); break; case VM_SET_CAPABILITY: vmcap = (struct vm_capability *)data; error = vm_set_capability(sc->vm, vmcap->cpuid, vmcap->captype, vmcap->capval); break; case VM_SET_X2APIC_STATE: x2apic = (struct vm_x2apic *)data; error = vm_set_x2apic_state(sc->vm, x2apic->cpuid, x2apic->state); break; case VM_GET_X2APIC_STATE: x2apic = (struct vm_x2apic *)data; error = vm_get_x2apic_state(sc->vm, x2apic->cpuid, &x2apic->state); break; case VM_GET_GPA_PMAP: gpapte = (struct vm_gpa_pte *)data; pmap_get_mapping(vmspace_pmap(vm_get_vmspace(sc->vm)), gpapte->gpa, gpapte->pte, &gpapte->ptenum); error = 0; break; case VM_GET_HPET_CAPABILITIES: error = vhpet_getcap((struct vm_hpet_cap *)data); break; case VM_GLA2GPA: { CTASSERT(PROT_READ == VM_PROT_READ); CTASSERT(PROT_WRITE == VM_PROT_WRITE); CTASSERT(PROT_EXEC == VM_PROT_EXECUTE); gg = (struct vm_gla2gpa *)data; error = vm_gla2gpa(sc->vm, gg->vcpuid, &gg->paging, gg->gla, gg->prot, &gg->gpa, &gg->fault); KASSERT(error == 0 || error == EFAULT, ("%s: vm_gla2gpa unknown error %d", __func__, error)); break; } case VM_GLA2GPA_NOFAULT: gg = (struct vm_gla2gpa *)data; error = vm_gla2gpa_nofault(sc->vm, gg->vcpuid, &gg->paging, gg->gla, gg->prot, &gg->gpa, &gg->fault); KASSERT(error == 0 || error == EFAULT, ("%s: vm_gla2gpa unknown error %d", __func__, error)); break; case VM_ACTIVATE_CPU: vac = (struct vm_activate_cpu *)data; error = vm_activate_cpu(sc->vm, vac->vcpuid); break; case VM_GET_CPUS: error = 0; vm_cpuset = (struct vm_cpuset *)data; size = vm_cpuset->cpusetsize; if (size < sizeof(cpuset_t) || size > CPU_MAXSIZE / NBBY) { error = ERANGE; break; } cpuset = malloc(size, M_TEMP, M_WAITOK | M_ZERO); if (vm_cpuset->which == VM_ACTIVE_CPUS) *cpuset = vm_active_cpus(sc->vm); else if (vm_cpuset->which == VM_SUSPENDED_CPUS) *cpuset = vm_suspended_cpus(sc->vm); else if (vm_cpuset->which == VM_DEBUG_CPUS) *cpuset = vm_debug_cpus(sc->vm); else error = EINVAL; if (error == 0) error = copyout(cpuset, vm_cpuset->cpus, size); free(cpuset, M_TEMP); break; case VM_SUSPEND_CPU: vac = (struct vm_activate_cpu *)data; error = vm_suspend_cpu(sc->vm, vac->vcpuid); break; case VM_RESUME_CPU: vac = (struct vm_activate_cpu *)data; error = vm_resume_cpu(sc->vm, vac->vcpuid); break; case VM_SET_INTINFO: vmii = (struct vm_intinfo *)data; error = vm_exit_intinfo(sc->vm, vmii->vcpuid, vmii->info1); break; case VM_GET_INTINFO: vmii = (struct vm_intinfo *)data; error = vm_get_intinfo(sc->vm, vmii->vcpuid, &vmii->info1, &vmii->info2); break; case VM_RTC_WRITE: rtcdata = (struct vm_rtc_data *)data; error = vrtc_nvram_write(sc->vm, rtcdata->offset, rtcdata->value); break; case VM_RTC_READ: rtcdata = (struct vm_rtc_data *)data; error = vrtc_nvram_read(sc->vm, rtcdata->offset, &rtcdata->value); break; case VM_RTC_SETTIME: rtctime = (struct vm_rtc_time *)data; error = vrtc_set_time(sc->vm, rtctime->secs); break; case VM_RTC_GETTIME: error = 0; rtctime = (struct vm_rtc_time *)data; rtctime->secs = vrtc_get_time(sc->vm); break; case VM_RESTART_INSTRUCTION: error = vm_restart_instruction(sc->vm, vcpu); break; case VM_SET_TOPOLOGY: topology = (struct vm_cpu_topology *)data; error = vm_set_topology(sc->vm, topology->sockets, topology->cores, topology->threads, topology->maxcpus); break; case VM_GET_TOPOLOGY: topology = (struct vm_cpu_topology *)data; vm_get_topology(sc->vm, &topology->sockets, &topology->cores, &topology->threads, &topology->maxcpus); error = 0; break; default: error = ENOTTY; break; } if (state_changed == 1) vcpu_unlock_one(sc, vcpu); else if (state_changed == 2) vcpu_unlock_all(sc); done: /* Make sure that no handler returns a bogus value like ERESTART */ KASSERT(error >= 0, ("vmmdev_ioctl: invalid error return %d", error)); return (error); } static int vmmdev_mmap_single(struct cdev *cdev, vm_ooffset_t *offset, vm_size_t mapsize, struct vm_object **objp, int nprot) { struct vmmdev_softc *sc; vm_paddr_t gpa; size_t len; vm_ooffset_t segoff, first, last; int error, found, segid; uint16_t lastcpu; bool sysmem; error = vmm_priv_check(curthread->td_ucred); if (error) return (error); first = *offset; last = first + mapsize; if ((nprot & PROT_EXEC) || first < 0 || first >= last) return (EINVAL); sc = vmmdev_lookup2(cdev); if (sc == NULL) { /* virtual machine is in the process of being created */ return (EINVAL); } /* * Get a read lock on the guest memory map by freezing any vcpu. */ lastcpu = vm_get_maxcpus(sc->vm) - 1; error = vcpu_lock_one(sc, lastcpu); if (error) return (error); gpa = 0; found = 0; while (!found) { error = vm_mmap_getnext(sc->vm, &gpa, &segid, &segoff, &len, NULL, NULL); if (error) break; if (first >= gpa && last <= gpa + len) found = 1; else gpa += len; } if (found) { error = vm_get_memseg(sc->vm, segid, &len, &sysmem, objp); KASSERT(error == 0 && *objp != NULL, ("%s: invalid memory segment %d", __func__, segid)); if (sysmem) { vm_object_reference(*objp); *offset = segoff + (first - gpa); } else { error = EINVAL; } } vcpu_unlock_one(sc, lastcpu); return (error); } static void vmmdev_destroy(void *arg) { struct vmmdev_softc *sc = arg; struct devmem_softc *dsc; int error; error = vcpu_lock_all(sc); KASSERT(error == 0, ("%s: error %d freezing vcpus", __func__, error)); while ((dsc = SLIST_FIRST(&sc->devmem)) != NULL) { KASSERT(dsc->cdev == NULL, ("%s: devmem not free", __func__)); SLIST_REMOVE_HEAD(&sc->devmem, link); free(dsc->name, M_VMMDEV); free(dsc, M_VMMDEV); } if (sc->cdev != NULL) destroy_dev(sc->cdev); if (sc->vm != NULL) vm_destroy(sc->vm); if ((sc->flags & VSC_LINKED) != 0) { mtx_lock(&vmmdev_mtx); SLIST_REMOVE(&head, sc, vmmdev_softc, link); mtx_unlock(&vmmdev_mtx); } free(sc, M_VMMDEV); } static int sysctl_vmm_destroy(SYSCTL_HANDLER_ARGS) { - int error; - char buf[VM_MAX_NAMELEN]; struct devmem_softc *dsc; struct vmmdev_softc *sc; struct cdev *cdev; + char *buf; + int error, buflen; error = vmm_priv_check(req->td->td_ucred); if (error) return (error); - strlcpy(buf, "beavis", sizeof(buf)); - error = sysctl_handle_string(oidp, buf, sizeof(buf), req); + buflen = VM_MAX_NAMELEN + 1; + buf = malloc(buflen, M_VMMDEV, M_WAITOK | M_ZERO); + strlcpy(buf, "beavis", buflen); + error = sysctl_handle_string(oidp, buf, buflen, req); if (error != 0 || req->newptr == NULL) - return (error); + goto out; mtx_lock(&vmmdev_mtx); sc = vmmdev_lookup(buf); if (sc == NULL || sc->cdev == NULL) { mtx_unlock(&vmmdev_mtx); - return (EINVAL); + error = EINVAL; + goto out; } /* * The 'cdev' will be destroyed asynchronously when 'si_threadcount' * goes down to 0 so we should not do it again in the callback. * * Setting 'sc->cdev' to NULL is also used to indicate that the VM * is scheduled for destruction. */ cdev = sc->cdev; sc->cdev = NULL; mtx_unlock(&vmmdev_mtx); /* * Schedule all cdevs to be destroyed: * * - any new operations on the 'cdev' will return an error (ENXIO). * * - when the 'si_threadcount' dwindles down to zero the 'cdev' will * be destroyed and the callback will be invoked in a taskqueue * context. * * - the 'devmem' cdevs are destroyed before the virtual machine 'cdev' */ SLIST_FOREACH(dsc, &sc->devmem, link) { KASSERT(dsc->cdev != NULL, ("devmem cdev already destroyed")); destroy_dev_sched_cb(dsc->cdev, devmem_destroy, dsc); } destroy_dev_sched_cb(cdev, vmmdev_destroy, sc); - return (0); + error = 0; + +out: + free(buf, M_VMMDEV); + return (error); } SYSCTL_PROC(_hw_vmm, OID_AUTO, destroy, CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_PRISON, NULL, 0, sysctl_vmm_destroy, "A", NULL); static struct cdevsw vmmdevsw = { .d_name = "vmmdev", .d_version = D_VERSION, .d_ioctl = vmmdev_ioctl, .d_mmap_single = vmmdev_mmap_single, .d_read = vmmdev_rw, .d_write = vmmdev_rw, }; static int sysctl_vmm_create(SYSCTL_HANDLER_ARGS) { - int error; struct vm *vm; struct cdev *cdev; struct vmmdev_softc *sc, *sc2; - char buf[VM_MAX_NAMELEN]; + char *buf; + int error, buflen; error = vmm_priv_check(req->td->td_ucred); if (error) return (error); - strlcpy(buf, "beavis", sizeof(buf)); - error = sysctl_handle_string(oidp, buf, sizeof(buf), req); + buflen = VM_MAX_NAMELEN + 1; + buf = malloc(buflen, M_VMMDEV, M_WAITOK | M_ZERO); + strlcpy(buf, "beavis", buflen); + error = sysctl_handle_string(oidp, buf, buflen, req); if (error != 0 || req->newptr == NULL) - return (error); + goto out; mtx_lock(&vmmdev_mtx); sc = vmmdev_lookup(buf); mtx_unlock(&vmmdev_mtx); - if (sc != NULL) - return (EEXIST); + if (sc != NULL) { + error = EEXIST; + goto out; + } error = vm_create(buf, &vm); if (error != 0) - return (error); + goto out; sc = malloc(sizeof(struct vmmdev_softc), M_VMMDEV, M_WAITOK | M_ZERO); sc->vm = vm; SLIST_INIT(&sc->devmem); /* * Lookup the name again just in case somebody sneaked in when we * dropped the lock. */ mtx_lock(&vmmdev_mtx); sc2 = vmmdev_lookup(buf); if (sc2 == NULL) { SLIST_INSERT_HEAD(&head, sc, link); sc->flags |= VSC_LINKED; } mtx_unlock(&vmmdev_mtx); if (sc2 != NULL) { vmmdev_destroy(sc); - return (EEXIST); + error = EEXIST; + goto out; } error = make_dev_p(MAKEDEV_CHECKNAME, &cdev, &vmmdevsw, NULL, UID_ROOT, GID_WHEEL, 0600, "vmm/%s", buf); if (error != 0) { vmmdev_destroy(sc); - return (error); + goto out; } mtx_lock(&vmmdev_mtx); sc->cdev = cdev; sc->cdev->si_drv1 = sc; mtx_unlock(&vmmdev_mtx); - return (0); +out: + free(buf, M_VMMDEV); + return (error); } SYSCTL_PROC(_hw_vmm, OID_AUTO, create, CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_PRISON, NULL, 0, sysctl_vmm_create, "A", NULL); void vmmdev_init(void) { mtx_init(&vmmdev_mtx, "vmm device mutex", NULL, MTX_DEF); pr_allow_flag = prison_add_allow(NULL, "vmm", NULL, "Allow use of vmm in a jail."); } int vmmdev_cleanup(void) { int error; if (SLIST_EMPTY(&head)) error = 0; else error = EBUSY; return (error); } static int devmem_mmap_single(struct cdev *cdev, vm_ooffset_t *offset, vm_size_t len, struct vm_object **objp, int nprot) { struct devmem_softc *dsc; vm_ooffset_t first, last; size_t seglen; int error; uint16_t lastcpu; bool sysmem; dsc = cdev->si_drv1; if (dsc == NULL) { /* 'cdev' has been created but is not ready for use */ return (ENXIO); } first = *offset; last = *offset + len; if ((nprot & PROT_EXEC) || first < 0 || first >= last) return (EINVAL); lastcpu = vm_get_maxcpus(dsc->sc->vm) - 1; error = vcpu_lock_one(dsc->sc, lastcpu); if (error) return (error); error = vm_get_memseg(dsc->sc->vm, dsc->segid, &seglen, &sysmem, objp); KASSERT(error == 0 && !sysmem && *objp != NULL, ("%s: invalid devmem segment %d", __func__, dsc->segid)); vcpu_unlock_one(dsc->sc, lastcpu); if (seglen >= last) { vm_object_reference(*objp); return (0); } else { return (EINVAL); } } static struct cdevsw devmemsw = { .d_name = "devmem", .d_version = D_VERSION, .d_mmap_single = devmem_mmap_single, }; static int devmem_create_cdev(const char *vmname, int segid, char *devname) { struct devmem_softc *dsc; struct vmmdev_softc *sc; struct cdev *cdev; int error; error = make_dev_p(MAKEDEV_CHECKNAME, &cdev, &devmemsw, NULL, UID_ROOT, GID_WHEEL, 0600, "vmm.io/%s.%s", vmname, devname); if (error) return (error); dsc = malloc(sizeof(struct devmem_softc), M_VMMDEV, M_WAITOK | M_ZERO); mtx_lock(&vmmdev_mtx); sc = vmmdev_lookup(vmname); KASSERT(sc != NULL, ("%s: vm %s softc not found", __func__, vmname)); if (sc->cdev == NULL) { /* virtual machine is being created or destroyed */ mtx_unlock(&vmmdev_mtx); free(dsc, M_VMMDEV); destroy_dev_sched_cb(cdev, NULL, 0); return (ENODEV); } dsc->segid = segid; dsc->name = devname; dsc->cdev = cdev; dsc->sc = sc; SLIST_INSERT_HEAD(&sc->devmem, dsc, link); mtx_unlock(&vmmdev_mtx); /* The 'cdev' is ready for use after 'si_drv1' is initialized */ cdev->si_drv1 = dsc; return (0); } static void devmem_destroy(void *arg) { struct devmem_softc *dsc = arg; KASSERT(dsc->cdev, ("%s: devmem cdev already destroyed", __func__)); dsc->cdev = NULL; dsc->sc = NULL; }