diff --git a/sys/dev/vmm/vmm_dev.c b/sys/dev/vmm/vmm_dev.c index 94546efd03c9..6bdcb51777d4 100644 --- a/sys/dev/vmm/vmm_dev.c +++ b/sys/dev/vmm/vmm_dev.c @@ -1,1035 +1,1037 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2011 NetApp, Inc. * Copyright (C) 2015 Mihai Carabas * All rights reserved. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #if defined(__amd64__) && defined(COMPAT_FREEBSD12) struct vm_memseg_fbsd12 { int segid; size_t len; char name[64]; }; _Static_assert(sizeof(struct vm_memseg_fbsd12) == 80, "COMPAT_FREEBSD12 ABI"); #define VM_ALLOC_MEMSEG_FBSD12 \ _IOW('v', IOCNUM_ALLOC_MEMSEG, struct vm_memseg_fbsd12) #define VM_GET_MEMSEG_FBSD12 \ _IOWR('v', IOCNUM_GET_MEMSEG, struct vm_memseg_fbsd12) #endif static int devmem_create_cdev(const char *vmname, int id, char *devmem); struct devmem_softc { int segid; char *name; struct cdev *cdev; struct vmmdev_softc *sc; SLIST_ENTRY(devmem_softc) link; }; struct vmmdev_softc { struct vm *vm; /* vm instance cookie */ struct cdev *cdev; struct ucred *ucred; SLIST_ENTRY(vmmdev_softc) link; SLIST_HEAD(, devmem_softc) devmem; int flags; }; #define VSC_LINKED 0x01 static SLIST_HEAD(, vmmdev_softc) head; static unsigned pr_allow_flag; static struct mtx vmmdev_mtx; MTX_SYSINIT(vmmdev_mtx, &vmmdev_mtx, "vmm device mutex", MTX_DEF); static MALLOC_DEFINE(M_VMMDEV, "vmmdev", "vmmdev"); SYSCTL_DECL(_hw_vmm); static void devmem_destroy(void *arg); static int vmm_priv_check(struct ucred *ucred) { if (jailed(ucred) && !(ucred->cr_prison->pr_allow & pr_allow_flag)) return (EPERM); return (0); } static int vcpu_lock_one(struct vcpu *vcpu) { return (vcpu_set_state(vcpu, VCPU_FROZEN, true)); } static void vcpu_unlock_one(struct vcpu *vcpu) { enum vcpu_state state; state = vcpu_get_state(vcpu, NULL); if (state != VCPU_FROZEN) { panic("vcpu %s(%d) has invalid state %d", vm_name(vcpu_vm(vcpu)), vcpu_vcpuid(vcpu), state); } vcpu_set_state(vcpu, VCPU_IDLE, false); } static int vcpu_lock_all(struct vmmdev_softc *sc) { struct vcpu *vcpu; int error; uint16_t i, j, maxcpus; error = 0; vm_slock_vcpus(sc->vm); maxcpus = vm_get_maxcpus(sc->vm); for (i = 0; i < maxcpus; i++) { vcpu = vm_vcpu(sc->vm, i); if (vcpu == NULL) continue; error = vcpu_lock_one(vcpu); if (error) break; } if (error) { for (j = 0; j < i; j++) { vcpu = vm_vcpu(sc->vm, j); if (vcpu == NULL) continue; vcpu_unlock_one(vcpu); } vm_unlock_vcpus(sc->vm); } return (error); } static void vcpu_unlock_all(struct vmmdev_softc *sc) { struct vcpu *vcpu; uint16_t i, maxcpus; maxcpus = vm_get_maxcpus(sc->vm); for (i = 0; i < maxcpus; i++) { vcpu = vm_vcpu(sc->vm, i); if (vcpu == NULL) continue; vcpu_unlock_one(vcpu); } vm_unlock_vcpus(sc->vm); } static struct vmmdev_softc * vmmdev_lookup(const char *name) { struct vmmdev_softc *sc; mtx_assert(&vmmdev_mtx, MA_OWNED); SLIST_FOREACH(sc, &head, link) { if (strcmp(name, vm_name(sc->vm)) == 0) break; } if (sc == NULL) return (NULL); if (cr_cansee(curthread->td_ucred, sc->ucred)) return (NULL); return (sc); } static struct vmmdev_softc * vmmdev_lookup2(struct cdev *cdev) { return (cdev->si_drv1); } static int vmmdev_rw(struct cdev *cdev, struct uio *uio, int flags) { int error, off, c, prot; vm_paddr_t gpa, maxaddr; void *hpa, *cookie; struct vmmdev_softc *sc; error = vmm_priv_check(curthread->td_ucred); if (error) return (error); sc = vmmdev_lookup2(cdev); if (sc == NULL) return (ENXIO); /* * Get a read lock on the guest memory map. */ vm_slock_memsegs(sc->vm); prot = (uio->uio_rw == UIO_WRITE ? VM_PROT_WRITE : VM_PROT_READ); maxaddr = vmm_sysmem_maxaddr(sc->vm); while (uio->uio_resid > 0 && error == 0) { gpa = uio->uio_offset; off = gpa & PAGE_MASK; c = min(uio->uio_resid, PAGE_SIZE - off); /* * The VM has a hole in its physical memory map. If we want to * use 'dd' to inspect memory beyond the hole we need to * provide bogus data for memory that lies in the hole. * * Since this device does not support lseek(2), dd(1) will * read(2) blocks of data to simulate the lseek(2). */ hpa = vm_gpa_hold_global(sc->vm, gpa, c, prot, &cookie); if (hpa == NULL) { if (uio->uio_rw == UIO_READ && gpa < maxaddr) error = uiomove(__DECONST(void *, zero_region), c, uio); else error = EFAULT; } else { error = uiomove(hpa, c, uio); vm_gpa_release(cookie); } } vm_unlock_memsegs(sc->vm); return (error); } CTASSERT(sizeof(((struct vm_memseg *)0)->name) >= VM_MAX_SUFFIXLEN + 1); static int get_memseg(struct vmmdev_softc *sc, struct vm_memseg *mseg, size_t len) { struct devmem_softc *dsc; int error; bool sysmem; error = vm_get_memseg(sc->vm, mseg->segid, &mseg->len, &sysmem, NULL); if (error || mseg->len == 0) return (error); if (!sysmem) { SLIST_FOREACH(dsc, &sc->devmem, link) { if (dsc->segid == mseg->segid) break; } KASSERT(dsc != NULL, ("%s: devmem segment %d not found", __func__, mseg->segid)); error = copystr(dsc->name, mseg->name, len, NULL); } else { bzero(mseg->name, len); } return (error); } static int alloc_memseg(struct vmmdev_softc *sc, struct vm_memseg *mseg, size_t len) { char *name; int error; bool sysmem; error = 0; name = NULL; sysmem = true; /* * The allocation is lengthened by 1 to hold a terminating NUL. It'll * by stripped off when devfs processes the full string. */ if (VM_MEMSEG_NAME(mseg)) { sysmem = false; name = malloc(len, M_VMMDEV, M_WAITOK); error = copystr(mseg->name, name, len, NULL); if (error) goto done; } error = vm_alloc_memseg(sc->vm, mseg->segid, mseg->len, sysmem); if (error) goto done; if (VM_MEMSEG_NAME(mseg)) { error = devmem_create_cdev(vm_name(sc->vm), mseg->segid, name); if (error) vm_free_memseg(sc->vm, mseg->segid); else name = NULL; /* freed when 'cdev' is destroyed */ } done: free(name, M_VMMDEV); return (error); } static int vm_get_register_set(struct vcpu *vcpu, unsigned int count, int *regnum, uint64_t *regval) { int error, i; error = 0; for (i = 0; i < count; i++) { error = vm_get_register(vcpu, regnum[i], ®val[i]); if (error) break; } return (error); } static int vm_set_register_set(struct vcpu *vcpu, unsigned int count, int *regnum, uint64_t *regval) { int error, i; error = 0; for (i = 0; i < count; i++) { error = vm_set_register(vcpu, regnum[i], regval[i]); if (error) break; } return (error); } static const struct vmmdev_ioctl vmmdev_ioctls[] = { VMMDEV_IOCTL(VM_GET_REGISTER, VMMDEV_IOCTL_LOCK_ONE_VCPU), VMMDEV_IOCTL(VM_SET_REGISTER, VMMDEV_IOCTL_LOCK_ONE_VCPU), VMMDEV_IOCTL(VM_GET_REGISTER_SET, VMMDEV_IOCTL_LOCK_ONE_VCPU), VMMDEV_IOCTL(VM_SET_REGISTER_SET, VMMDEV_IOCTL_LOCK_ONE_VCPU), VMMDEV_IOCTL(VM_GET_CAPABILITY, VMMDEV_IOCTL_LOCK_ONE_VCPU), VMMDEV_IOCTL(VM_SET_CAPABILITY, VMMDEV_IOCTL_LOCK_ONE_VCPU), VMMDEV_IOCTL(VM_ACTIVATE_CPU, VMMDEV_IOCTL_LOCK_ONE_VCPU), VMMDEV_IOCTL(VM_INJECT_EXCEPTION, VMMDEV_IOCTL_LOCK_ONE_VCPU), VMMDEV_IOCTL(VM_STATS, VMMDEV_IOCTL_LOCK_ONE_VCPU), #if defined(__amd64__) && defined(COMPAT_FREEBSD12) VMMDEV_IOCTL(VM_ALLOC_MEMSEG_FBSD12, VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS), #endif VMMDEV_IOCTL(VM_ALLOC_MEMSEG, VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS), VMMDEV_IOCTL(VM_MMAP_MEMSEG, VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS), VMMDEV_IOCTL(VM_MUNMAP_MEMSEG, VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS), VMMDEV_IOCTL(VM_REINIT, VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS), #if defined(__amd64__) && defined(COMPAT_FREEBSD12) VMMDEV_IOCTL(VM_GET_MEMSEG_FBSD12, VMMDEV_IOCTL_SLOCK_MEMSEGS), #endif VMMDEV_IOCTL(VM_GET_MEMSEG, VMMDEV_IOCTL_SLOCK_MEMSEGS), VMMDEV_IOCTL(VM_MMAP_GETNEXT, VMMDEV_IOCTL_SLOCK_MEMSEGS), VMMDEV_IOCTL(VM_SUSPEND_CPU, VMMDEV_IOCTL_MAYBE_ALLOC_VCPU), VMMDEV_IOCTL(VM_RESUME_CPU, VMMDEV_IOCTL_MAYBE_ALLOC_VCPU), VMMDEV_IOCTL(VM_SUSPEND, 0), VMMDEV_IOCTL(VM_GET_CPUS, 0), VMMDEV_IOCTL(VM_GET_TOPOLOGY, 0), VMMDEV_IOCTL(VM_SET_TOPOLOGY, 0), }; static int vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag, struct thread *td) { struct vmmdev_softc *sc; struct vcpu *vcpu; const struct vmmdev_ioctl *ioctl; int error, vcpuid; error = vmm_priv_check(td->td_ucred); if (error) return (error); sc = vmmdev_lookup2(cdev); if (sc == NULL) return (ENXIO); ioctl = NULL; for (size_t i = 0; i < nitems(vmmdev_ioctls); i++) { if (vmmdev_ioctls[i].cmd == cmd) { ioctl = &vmmdev_ioctls[i]; break; } } if (ioctl == NULL) { for (size_t i = 0; i < vmmdev_machdep_ioctl_count; i++) { if (vmmdev_machdep_ioctls[i].cmd == cmd) { ioctl = &vmmdev_machdep_ioctls[i]; break; } } } if (ioctl == NULL) return (ENOTTY); if ((ioctl->flags & VMMDEV_IOCTL_XLOCK_MEMSEGS) != 0) vm_xlock_memsegs(sc->vm); else if ((ioctl->flags & VMMDEV_IOCTL_SLOCK_MEMSEGS) != 0) vm_slock_memsegs(sc->vm); vcpu = NULL; vcpuid = -1; if ((ioctl->flags & (VMMDEV_IOCTL_LOCK_ONE_VCPU | VMMDEV_IOCTL_ALLOC_VCPU | VMMDEV_IOCTL_MAYBE_ALLOC_VCPU)) != 0) { vcpuid = *(int *)data; if (vcpuid == -1) { if ((ioctl->flags & VMMDEV_IOCTL_MAYBE_ALLOC_VCPU) == 0) { error = EINVAL; goto lockfail; } } else { vcpu = vm_alloc_vcpu(sc->vm, vcpuid); if (vcpu == NULL) { error = EINVAL; goto lockfail; } if ((ioctl->flags & VMMDEV_IOCTL_LOCK_ONE_VCPU) != 0) { error = vcpu_lock_one(vcpu); if (error) goto lockfail; } } } if ((ioctl->flags & VMMDEV_IOCTL_LOCK_ALL_VCPUS) != 0) { error = vcpu_lock_all(sc); if (error) goto lockfail; } switch (cmd) { case VM_SUSPEND: { struct vm_suspend *vmsuspend; vmsuspend = (struct vm_suspend *)data; error = vm_suspend(sc->vm, vmsuspend->how); break; } case VM_REINIT: error = vm_reinit(sc->vm); break; case VM_STAT_DESC: { struct vm_stat_desc *statdesc; statdesc = (struct vm_stat_desc *)data; error = vmm_stat_desc_copy(statdesc->index, statdesc->desc, sizeof(statdesc->desc)); break; } case VM_STATS: { struct vm_stats *vmstats; vmstats = (struct vm_stats *)data; getmicrotime(&vmstats->tv); error = vmm_stat_copy(vcpu, vmstats->index, nitems(vmstats->statbuf), &vmstats->num_entries, vmstats->statbuf); break; } case VM_MMAP_GETNEXT: { struct vm_memmap *mm; mm = (struct vm_memmap *)data; error = vm_mmap_getnext(sc->vm, &mm->gpa, &mm->segid, &mm->segoff, &mm->len, &mm->prot, &mm->flags); break; } case VM_MMAP_MEMSEG: { struct vm_memmap *mm; mm = (struct vm_memmap *)data; error = vm_mmap_memseg(sc->vm, mm->gpa, mm->segid, mm->segoff, mm->len, mm->prot, mm->flags); break; } case VM_MUNMAP_MEMSEG: { struct vm_munmap *mu; mu = (struct vm_munmap *)data; error = vm_munmap_memseg(sc->vm, mu->gpa, mu->len); break; } #if defined(__amd64__) && defined(COMPAT_FREEBSD12) case VM_ALLOC_MEMSEG_FBSD12: error = alloc_memseg(sc, (struct vm_memseg *)data, sizeof(((struct vm_memseg_fbsd12 *)0)->name)); break; case VM_GET_MEMSEG_FBSD12: error = get_memseg(sc, (struct vm_memseg *)data, sizeof(((struct vm_memseg_fbsd12 *)0)->name)); break; #endif case VM_ALLOC_MEMSEG: error = alloc_memseg(sc, (struct vm_memseg *)data, sizeof(((struct vm_memseg *)0)->name)); break; case VM_GET_MEMSEG: error = get_memseg(sc, (struct vm_memseg *)data, sizeof(((struct vm_memseg *)0)->name)); break; case VM_GET_REGISTER: { struct vm_register *vmreg; vmreg = (struct vm_register *)data; error = vm_get_register(vcpu, vmreg->regnum, &vmreg->regval); break; } case VM_SET_REGISTER: { struct vm_register *vmreg; vmreg = (struct vm_register *)data; error = vm_set_register(vcpu, vmreg->regnum, vmreg->regval); break; } case VM_GET_REGISTER_SET: { struct vm_register_set *vmregset; uint64_t *regvals; int *regnums; vmregset = (struct vm_register_set *)data; if (vmregset->count > VM_REG_LAST) { error = EINVAL; break; } regvals = malloc(sizeof(regvals[0]) * vmregset->count, M_VMMDEV, M_WAITOK); regnums = malloc(sizeof(regnums[0]) * vmregset->count, M_VMMDEV, M_WAITOK); error = copyin(vmregset->regnums, regnums, sizeof(regnums[0]) * vmregset->count); if (error == 0) error = vm_get_register_set(vcpu, vmregset->count, regnums, regvals); if (error == 0) error = copyout(regvals, vmregset->regvals, sizeof(regvals[0]) * vmregset->count); free(regvals, M_VMMDEV); free(regnums, M_VMMDEV); break; } case VM_SET_REGISTER_SET: { struct vm_register_set *vmregset; uint64_t *regvals; int *regnums; vmregset = (struct vm_register_set *)data; if (vmregset->count > VM_REG_LAST) { error = EINVAL; break; } regvals = malloc(sizeof(regvals[0]) * vmregset->count, M_VMMDEV, M_WAITOK); regnums = malloc(sizeof(regnums[0]) * vmregset->count, M_VMMDEV, M_WAITOK); error = copyin(vmregset->regnums, regnums, sizeof(regnums[0]) * vmregset->count); if (error == 0) error = copyin(vmregset->regvals, regvals, sizeof(regvals[0]) * vmregset->count); if (error == 0) error = vm_set_register_set(vcpu, vmregset->count, regnums, regvals); free(regvals, M_VMMDEV); free(regnums, M_VMMDEV); break; } case VM_GET_CAPABILITY: { struct vm_capability *vmcap; vmcap = (struct vm_capability *)data; error = vm_get_capability(vcpu, vmcap->captype, &vmcap->capval); break; } case VM_SET_CAPABILITY: { struct vm_capability *vmcap; vmcap = (struct vm_capability *)data; error = vm_set_capability(vcpu, vmcap->captype, vmcap->capval); break; } case VM_ACTIVATE_CPU: error = vm_activate_cpu(vcpu); break; case VM_GET_CPUS: { struct vm_cpuset *vm_cpuset; cpuset_t *cpuset; int size; error = 0; vm_cpuset = (struct vm_cpuset *)data; size = vm_cpuset->cpusetsize; if (size < 1 || size > CPU_MAXSIZE / NBBY) { error = ERANGE; break; } cpuset = malloc(max(size, sizeof(cpuset_t)), M_TEMP, M_WAITOK | M_ZERO); if (vm_cpuset->which == VM_ACTIVE_CPUS) *cpuset = vm_active_cpus(sc->vm); else if (vm_cpuset->which == VM_SUSPENDED_CPUS) *cpuset = vm_suspended_cpus(sc->vm); else if (vm_cpuset->which == VM_DEBUG_CPUS) *cpuset = vm_debug_cpus(sc->vm); else error = EINVAL; if (error == 0 && size < howmany(CPU_FLS(cpuset), NBBY)) error = ERANGE; if (error == 0) error = copyout(cpuset, vm_cpuset->cpus, size); free(cpuset, M_TEMP); break; } case VM_SUSPEND_CPU: error = vm_suspend_cpu(sc->vm, vcpu); break; case VM_RESUME_CPU: error = vm_resume_cpu(sc->vm, vcpu); break; case VM_SET_TOPOLOGY: { struct vm_cpu_topology *topology; topology = (struct vm_cpu_topology *)data; error = vm_set_topology(sc->vm, topology->sockets, topology->cores, topology->threads, topology->maxcpus); break; } case VM_GET_TOPOLOGY: { struct vm_cpu_topology *topology; topology = (struct vm_cpu_topology *)data; vm_get_topology(sc->vm, &topology->sockets, &topology->cores, &topology->threads, &topology->maxcpus); error = 0; break; } default: error = vmmdev_machdep_ioctl(sc->vm, vcpu, cmd, data, fflag, td); break; } if ((ioctl->flags & (VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_SLOCK_MEMSEGS)) != 0) vm_unlock_memsegs(sc->vm); if ((ioctl->flags & VMMDEV_IOCTL_LOCK_ALL_VCPUS) != 0) vcpu_unlock_all(sc); else if ((ioctl->flags & VMMDEV_IOCTL_LOCK_ONE_VCPU) != 0) vcpu_unlock_one(vcpu); /* * Make sure that no handler returns a kernel-internal * error value to userspace. */ KASSERT(error == ERESTART || error >= 0, ("vmmdev_ioctl: invalid error return %d", error)); return (error); lockfail: if ((ioctl->flags & (VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_SLOCK_MEMSEGS)) != 0) vm_unlock_memsegs(sc->vm); return (error); } static int vmmdev_mmap_single(struct cdev *cdev, vm_ooffset_t *offset, vm_size_t mapsize, struct vm_object **objp, int nprot) { struct vmmdev_softc *sc; vm_paddr_t gpa; size_t len; vm_ooffset_t segoff, first, last; int error, found, segid; bool sysmem; error = vmm_priv_check(curthread->td_ucred); if (error) return (error); first = *offset; last = first + mapsize; if ((nprot & PROT_EXEC) || first < 0 || first >= last) return (EINVAL); sc = vmmdev_lookup2(cdev); if (sc == NULL) { /* virtual machine is in the process of being created */ return (EINVAL); } /* * Get a read lock on the guest memory map. */ vm_slock_memsegs(sc->vm); gpa = 0; found = 0; while (!found) { error = vm_mmap_getnext(sc->vm, &gpa, &segid, &segoff, &len, NULL, NULL); if (error) break; if (first >= gpa && last <= gpa + len) found = 1; else gpa += len; } if (found) { error = vm_get_memseg(sc->vm, segid, &len, &sysmem, objp); KASSERT(error == 0 && *objp != NULL, ("%s: invalid memory segment %d", __func__, segid)); if (sysmem) { vm_object_reference(*objp); *offset = segoff + (first - gpa); } else { error = EINVAL; } } vm_unlock_memsegs(sc->vm); return (error); } static void -vmmdev_destroy(void *arg) +vmmdev_destroy(struct vmmdev_softc *sc) { - struct vmmdev_softc *sc = arg; struct devmem_softc *dsc; int error __diagused; + /* + * Destroy all cdevs: + * + * - any new operations on the 'cdev' will return an error (ENXIO). + * + * - the 'devmem' cdevs are destroyed before the virtual machine 'cdev' + */ + SLIST_FOREACH(dsc, &sc->devmem, link) { + KASSERT(dsc->cdev != NULL, ("devmem cdev already destroyed")); + destroy_dev(dsc->cdev); + devmem_destroy(dsc); + } + vm_disable_vcpu_creation(sc->vm); error = vcpu_lock_all(sc); KASSERT(error == 0, ("%s: error %d freezing vcpus", __func__, error)); vm_unlock_vcpus(sc->vm); while ((dsc = SLIST_FIRST(&sc->devmem)) != NULL) { KASSERT(dsc->cdev == NULL, ("%s: devmem not free", __func__)); SLIST_REMOVE_HEAD(&sc->devmem, link); free(dsc->name, M_VMMDEV); free(dsc, M_VMMDEV); } if (sc->cdev != NULL) destroy_dev(sc->cdev); if (sc->vm != NULL) vm_destroy(sc->vm); if (sc->ucred != NULL) crfree(sc->ucred); if ((sc->flags & VSC_LINKED) != 0) { mtx_lock(&vmmdev_mtx); SLIST_REMOVE(&head, sc, vmmdev_softc, link); mtx_unlock(&vmmdev_mtx); } free(sc, M_VMMDEV); } static int -sysctl_vmm_destroy(SYSCTL_HANDLER_ARGS) +vmmdev_lookup_and_destroy(const char *name, struct ucred *cred) { - struct devmem_softc *dsc; - struct vmmdev_softc *sc; struct cdev *cdev; - char *buf; - int error, buflen; - - error = vmm_priv_check(req->td->td_ucred); - if (error) - return (error); - - buflen = VM_MAX_NAMELEN + 1; - buf = malloc(buflen, M_VMMDEV, M_WAITOK | M_ZERO); - strlcpy(buf, "beavis", buflen); - error = sysctl_handle_string(oidp, buf, buflen, req); - if (error != 0 || req->newptr == NULL) - goto out; + struct vmmdev_softc *sc; mtx_lock(&vmmdev_mtx); - sc = vmmdev_lookup(buf); + sc = vmmdev_lookup(name); if (sc == NULL || sc->cdev == NULL) { mtx_unlock(&vmmdev_mtx); - error = EINVAL; - goto out; + return (EINVAL); } /* * Setting 'sc->cdev' to NULL is used to indicate that the VM * is scheduled for destruction. */ cdev = sc->cdev; sc->cdev = NULL; mtx_unlock(&vmmdev_mtx); - /* - * Destroy all cdevs: - * - * - any new operations on the 'cdev' will return an error (ENXIO). - * - * - the 'devmem' cdevs are destroyed before the virtual machine 'cdev' - */ - SLIST_FOREACH(dsc, &sc->devmem, link) { - KASSERT(dsc->cdev != NULL, ("devmem cdev already destroyed")); - destroy_dev(dsc->cdev); - devmem_destroy(dsc); - } destroy_dev(cdev); vmmdev_destroy(sc); - error = 0; -out: + return (0); +} + +static int +sysctl_vmm_destroy(SYSCTL_HANDLER_ARGS) +{ + char *buf; + int error, buflen; + + error = vmm_priv_check(req->td->td_ucred); + if (error) + return (error); + + buflen = VM_MAX_NAMELEN + 1; + buf = malloc(buflen, M_VMMDEV, M_WAITOK | M_ZERO); + strlcpy(buf, "beavis", buflen); + error = sysctl_handle_string(oidp, buf, buflen, req); + if (error == 0 && req->newptr != NULL) + error = vmmdev_lookup_and_destroy(buf, req->td->td_ucred); free(buf, M_VMMDEV); return (error); } SYSCTL_PROC(_hw_vmm, OID_AUTO, destroy, CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE, NULL, 0, sysctl_vmm_destroy, "A", NULL); static struct cdevsw vmmdevsw = { .d_name = "vmmdev", .d_version = D_VERSION, .d_ioctl = vmmdev_ioctl, .d_mmap_single = vmmdev_mmap_single, .d_read = vmmdev_rw, .d_write = vmmdev_rw, }; static struct vmmdev_softc * vmmdev_alloc(struct vm *vm, struct ucred *cred) { struct vmmdev_softc *sc; sc = malloc(sizeof(*sc), M_VMMDEV, M_WAITOK | M_ZERO); SLIST_INIT(&sc->devmem); sc->vm = vm; sc->ucred = crhold(cred); return (sc); } static int vmmdev_create(const char *name, struct ucred *cred) { struct cdev *cdev; struct vmmdev_softc *sc, *sc2; struct vm *vm; int error; mtx_lock(&vmmdev_mtx); sc = vmmdev_lookup(name); mtx_unlock(&vmmdev_mtx); if (sc != NULL) return (EEXIST); error = vm_create(name, &vm); if (error != 0) return (error); sc = vmmdev_alloc(vm, cred); /* * Lookup the name again just in case somebody sneaked in when we * dropped the lock. */ mtx_lock(&vmmdev_mtx); sc2 = vmmdev_lookup(name); if (sc2 != NULL) { mtx_unlock(&vmmdev_mtx); vmmdev_destroy(sc); return (EEXIST); } sc->flags |= VSC_LINKED; SLIST_INSERT_HEAD(&head, sc, link); mtx_unlock(&vmmdev_mtx); error = make_dev_p(MAKEDEV_CHECKNAME, &cdev, &vmmdevsw, sc->ucred, UID_ROOT, GID_WHEEL, 0600, "vmm/%s", name); if (error != 0) { vmmdev_destroy(sc); return (error); } mtx_lock(&vmmdev_mtx); sc->cdev = cdev; sc->cdev->si_drv1 = sc; mtx_unlock(&vmmdev_mtx); return (0); } static int sysctl_vmm_create(SYSCTL_HANDLER_ARGS) { char *buf; int error, buflen; error = vmm_priv_check(req->td->td_ucred); if (error != 0) return (error); buflen = VM_MAX_NAMELEN + 1; buf = malloc(buflen, M_VMMDEV, M_WAITOK | M_ZERO); strlcpy(buf, "beavis", buflen); error = sysctl_handle_string(oidp, buf, buflen, req); if (error == 0 && req->newptr != NULL) error = vmmdev_create(buf, req->td->td_ucred); free(buf, M_VMMDEV); return (error); } SYSCTL_PROC(_hw_vmm, OID_AUTO, create, CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE, NULL, 0, sysctl_vmm_create, "A", NULL); void vmmdev_init(void) { pr_allow_flag = prison_add_allow(NULL, "vmm", NULL, "Allow use of vmm in a jail."); } int vmmdev_cleanup(void) { int error; if (SLIST_EMPTY(&head)) error = 0; else error = EBUSY; return (error); } static int devmem_mmap_single(struct cdev *cdev, vm_ooffset_t *offset, vm_size_t len, struct vm_object **objp, int nprot) { struct devmem_softc *dsc; vm_ooffset_t first, last; size_t seglen; int error; bool sysmem; dsc = cdev->si_drv1; if (dsc == NULL) { /* 'cdev' has been created but is not ready for use */ return (ENXIO); } first = *offset; last = *offset + len; if ((nprot & PROT_EXEC) || first < 0 || first >= last) return (EINVAL); vm_slock_memsegs(dsc->sc->vm); error = vm_get_memseg(dsc->sc->vm, dsc->segid, &seglen, &sysmem, objp); KASSERT(error == 0 && !sysmem && *objp != NULL, ("%s: invalid devmem segment %d", __func__, dsc->segid)); if (seglen >= last) vm_object_reference(*objp); else error = EINVAL; vm_unlock_memsegs(dsc->sc->vm); return (error); } static struct cdevsw devmemsw = { .d_name = "devmem", .d_version = D_VERSION, .d_mmap_single = devmem_mmap_single, }; static int devmem_create_cdev(const char *vmname, int segid, char *devname) { struct devmem_softc *dsc; struct vmmdev_softc *sc; struct cdev *cdev; int error; error = make_dev_p(MAKEDEV_CHECKNAME, &cdev, &devmemsw, NULL, UID_ROOT, GID_WHEEL, 0600, "vmm.io/%s.%s", vmname, devname); if (error) return (error); dsc = malloc(sizeof(struct devmem_softc), M_VMMDEV, M_WAITOK | M_ZERO); mtx_lock(&vmmdev_mtx); sc = vmmdev_lookup(vmname); KASSERT(sc != NULL, ("%s: vm %s softc not found", __func__, vmname)); if (sc->cdev == NULL) { /* virtual machine is being created or destroyed */ mtx_unlock(&vmmdev_mtx); free(dsc, M_VMMDEV); destroy_dev_sched_cb(cdev, NULL, 0); return (ENODEV); } dsc->segid = segid; dsc->name = devname; dsc->cdev = cdev; dsc->sc = sc; SLIST_INSERT_HEAD(&sc->devmem, dsc, link); mtx_unlock(&vmmdev_mtx); /* The 'cdev' is ready for use after 'si_drv1' is initialized */ cdev->si_drv1 = dsc; return (0); } static void devmem_destroy(void *arg) { struct devmem_softc *dsc = arg; KASSERT(dsc->cdev, ("%s: devmem cdev already destroyed", __func__)); dsc->cdev = NULL; dsc->sc = NULL; }