diff --git a/sys/amd64/include/vmm.h b/sys/amd64/include/vmm.h --- a/sys/amd64/include/vmm.h +++ b/sys/amd64/include/vmm.h @@ -219,6 +219,10 @@ extern const struct vmm_ops vmm_ops_amd; int vm_create(const char *name, struct vm **retvm); +struct vcpu *vm_alloc_vcpu(struct vm *vm, int vcpuid); +void vm_disable_vcpu_creation(struct vm *vm); +void vm_slock_vcpus(struct vm *vm); +void vm_unlock_vcpus(struct vm *vm); void vm_destroy(struct vm *vm); int vm_reinit(struct vm *vm); const char *vm_name(struct vm *vm); diff --git a/sys/amd64/vmm/io/vlapic.c b/sys/amd64/vmm/io/vlapic.c --- a/sys/amd64/vmm/io/vlapic.c +++ b/sys/amd64/vmm/io/vlapic.c @@ -1840,6 +1840,7 @@ vlapic_snapshot(struct vm *vm, struct vm_snapshot_meta *meta) { int ret; + struct vcpu *vcpu; struct vlapic *vlapic; struct LAPIC *lapic; uint32_t ccr; @@ -1851,7 +1852,10 @@ maxcpus = vm_get_maxcpus(vm); for (i = 0; i < maxcpus; i++) { - vlapic = vm_lapic(vm_vcpu(vm, i)); + vcpu = vm_vcpu(vm, i); + if (vcpu == NULL) + continue; + vlapic = vm_lapic(vcpu); /* snapshot the page first; timer period depends on icr_timer */ lapic = vlapic->apic_page; diff --git a/sys/amd64/vmm/vmm.c b/sys/amd64/vmm/vmm.c --- a/sys/amd64/vmm/vmm.c +++ b/sys/amd64/vmm/vmm.c @@ -127,7 +127,6 @@ uint64_t tsc_offset; /* (o) TSC offsetting */ }; -#define vcpu_lock_initialized(v) mtx_initialized(&((v)->mtx)) #define vcpu_lock_init(v) mtx_init(&((v)->mtx), "vcpu lock", 0, MTX_SPIN) #define vcpu_lock_destroy(v) mtx_destroy(&((v)->mtx)) #define vcpu_lock(v) mtx_lock_spin(&((v)->mtx)) @@ -175,6 +174,7 @@ volatile cpuset_t debug_cpus; /* (i) vcpus stopped for debug */ cpuset_t startup_cpus; /* (i) [r] waiting for startup */ int suspend; /* (i) stop VM execution */ + bool dying; /* (o) is dying */ volatile cpuset_t suspended_cpus; /* (i) suspended vcpus */ volatile cpuset_t halted_cpus; /* (x) cpus in a hard halt */ cpuset_t rendezvous_req_cpus; /* (x) [r] rendezvous requested */ @@ -186,13 +186,14 @@ struct mem_seg mem_segs[VM_MAX_MEMSEGS]; /* (o) [m+v] guest memory regions */ struct vmspace *vmspace; /* (o) guest's address space */ char name[VM_MAX_NAMELEN+1]; /* (o) virtual machine name */ - struct vcpu vcpu[VM_MAXCPU]; /* (i) guest vcpus */ + struct vcpu *vcpu[VM_MAXCPU]; /* (x) guest vcpus */ /* The following describe the vm cpu topology */ uint16_t sockets; /* (o) num of sockets */ uint16_t cores; /* (o) num of cores/socket */ uint16_t threads; /* (o) num of threads/core */ uint16_t maxcpus; /* (o) max pluggable cpus */ struct sx mem_segs_lock; /* (o) */ + struct sx vcpus_init_lock; /* (o) */ }; #define VMM_CTR0(vcpu, format) \ @@ -319,10 +320,8 @@ #endif static void -vcpu_cleanup(struct vm *vm, int i, bool destroy) +vcpu_cleanup(struct vcpu *vcpu, bool destroy) { - struct vcpu *vcpu = &vm->vcpu[i]; - vmmops_vlapic_cleanup(vcpu->vlapic); vmmops_vcpu_cleanup(vcpu->cookie); vcpu->cookie = NULL; @@ -333,30 +332,30 @@ } } -static void -vcpu_init(struct vm *vm, int vcpu_id, bool create) +static struct vcpu * +vcpu_alloc(struct vm *vm, int vcpu_id) { struct vcpu *vcpu; KASSERT(vcpu_id >= 0 && vcpu_id < vm->maxcpus, ("vcpu_init: invalid vcpu %d", vcpu_id)); - - vcpu = &vm->vcpu[vcpu_id]; - - if (create) { - KASSERT(!vcpu_lock_initialized(vcpu), ("vcpu %d already " - "initialized", vcpu_id)); - vcpu_lock_init(vcpu); - vcpu->state = VCPU_IDLE; - vcpu->hostcpu = NOCPU; - vcpu->vcpuid = vcpu_id; - vcpu->vm = vm; - vcpu->guestfpu = fpu_save_area_alloc(); - vcpu->stats = vmm_stat_alloc(); - vcpu->tsc_offset = 0; - } - vcpu->cookie = vmmops_vcpu_init(vm->cookie, vcpu, vcpu_id); + vcpu = malloc(sizeof(*vcpu), M_VM, M_WAITOK | M_ZERO); + vcpu_lock_init(vcpu); + vcpu->state = VCPU_IDLE; + vcpu->hostcpu = NOCPU; + vcpu->vcpuid = vcpu_id; + vcpu->vm = vm; + vcpu->guestfpu = fpu_save_area_alloc(); + vcpu->stats = vmm_stat_alloc(); + vcpu->tsc_offset = 0; + return (vcpu); +} + +static void +vcpu_init(struct vcpu *vcpu) +{ + vcpu->cookie = vmmops_vcpu_init(vcpu->vm->cookie, vcpu, vcpu->vcpuid); vcpu->vlapic = vmmops_vlapic_init(vcpu->cookie); vm_set_x2apic_state(vcpu, X2APIC_DISABLED); vcpu->reqidle = 0; @@ -473,8 +472,6 @@ static void vm_init(struct vm *vm, bool create) { - int i; - vm->cookie = vmmops_init(vm, vmspace_pmap(vm->vmspace)); vm->iommu = NULL; vm->vioapic = vioapic_init(vm); @@ -492,8 +489,61 @@ vm->suspend = 0; CPU_ZERO(&vm->suspended_cpus); - for (i = 0; i < vm->maxcpus; i++) - vcpu_init(vm, i, create); + if (!create) { + for (int i = 0; i < vm->maxcpus; i++) { + if (vm->vcpu[i] != NULL) + vcpu_init(vm->vcpu[i]); + } + } +} + +void +vm_disable_vcpu_creation(struct vm *vm) +{ + sx_xlock(&vm->vcpus_init_lock); + vm->dying = true; + sx_xunlock(&vm->vcpus_init_lock); +} + +struct vcpu * +vm_alloc_vcpu(struct vm *vm, int vcpuid) +{ + struct vcpu *vcpu; + + if (vcpuid < 0 || vcpuid >= vm_get_maxcpus(vm)) + return (NULL); + + vcpu = atomic_load_ptr(&vm->vcpu[vcpuid]); + if (__predict_true(vcpu != NULL)) + return (vcpu); + + sx_xlock(&vm->vcpus_init_lock); + vcpu = vm->vcpu[vcpuid]; + if (vcpu == NULL && !vm->dying) { + vcpu = vcpu_alloc(vm, vcpuid); + vcpu_init(vcpu); + + /* + * Ensure vCPU is fully created before updating pointer + * to permit unlocked reads above. + */ + atomic_store_rel_ptr((uintptr_t *)&vm->vcpu[vcpuid], + (uintptr_t)vcpu); + } + sx_xunlock(&vm->vcpus_init_lock); + return (vcpu); +} + +void +vm_slock_vcpus(struct vm *vm) +{ + sx_slock(&vm->vcpus_init_lock); +} + +void +vm_unlock_vcpus(struct vm *vm) +{ + sx_unlock(&vm->vcpus_init_lock); } /* @@ -528,6 +578,7 @@ vm->vmspace = vmspace; mtx_init(&vm->rendezvous_mtx, "vm rendezvous lock", 0, MTX_DEF); sx_init(&vm->mem_segs_lock, "vm mem_segs"); + sx_init(&vm->vcpus_init_lock, "vm vcpus"); vm->sockets = 1; vm->cores = cores_per_package; /* XXX backwards compatibility */ @@ -558,17 +609,14 @@ int vm_set_topology(struct vm *vm, uint16_t sockets, uint16_t cores, - uint16_t threads, uint16_t maxcpus) + uint16_t threads, uint16_t maxcpus __unused) { - if (maxcpus != 0) - return (EINVAL); /* XXX remove when supported */ + /* Ignore maxcpus. */ if ((sockets * cores * threads) > vm->maxcpus) return (EINVAL); - /* XXX need to check sockets * cores * threads == vCPU, how? */ vm->sockets = sockets; vm->cores = cores; vm->threads = threads; - vm->maxcpus = VM_MAXCPU; /* XXX temp to keep code working */ return(0); } @@ -593,8 +641,10 @@ vatpic_cleanup(vm->vatpic); vioapic_cleanup(vm->vioapic); - for (i = 0; i < vm->maxcpus; i++) - vcpu_cleanup(vm, i, destroy); + for (i = 0; i < vm->maxcpus; i++) { + if (vm->vcpu[i] != NULL) + vcpu_cleanup(vm->vcpu[i], destroy); + } vmmops_cleanup(vm->cookie); @@ -619,6 +669,7 @@ vmmops_vmspace_free(vm->vmspace); vm->vmspace = NULL; + sx_destroy(&vm->vcpus_init_lock); sx_destroy(&vm->mem_segs_lock); mtx_destroy(&vm->rendezvous_mtx); } @@ -2250,7 +2301,7 @@ struct vcpu * vm_vcpu(struct vm *vm, int vcpuid) { - return (&vm->vcpu[vcpuid]); + return (vm->vcpu[vcpuid]); } struct vlapic * @@ -2761,7 +2812,9 @@ now = rdtsc(); maxcpus = vm_get_maxcpus(vm); for (i = 0; i < maxcpus; i++) { - vcpu = &vm->vcpu[i]; + vcpu = vm->vcpu[i]; + if (vcpu == NULL) + continue; SNAPSHOT_VAR_OR_LEAVE(vcpu->x2apic_state, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(vcpu->exitintinfo, meta, ret, done); @@ -2811,7 +2864,9 @@ maxcpus = vm_get_maxcpus(vm); for (i = 0; i < maxcpus; i++) { - vcpu = &vm->vcpu[i]; + vcpu = vm->vcpu[i]; + if (vcpu == NULL) + continue; error = vmmops_vcpu_snapshot(vcpu->cookie, meta); if (error != 0) { @@ -2894,7 +2949,9 @@ maxcpus = vm_get_maxcpus(vm); for (i = 0; i < maxcpus; i++) { - vcpu = &vm->vcpu[i]; + vcpu = vm->vcpu[i]; + if (vcpu == NULL) + continue; error = vmmops_restore_tsc(vcpu->cookie, vcpu->tsc_offset - now); diff --git a/sys/amd64/vmm/vmm_dev.c b/sys/amd64/vmm/vmm_dev.c --- a/sys/amd64/vmm/vmm_dev.c +++ b/sys/amd64/vmm/vmm_dev.c @@ -125,24 +125,16 @@ } static int -vcpu_lock_one(struct vmmdev_softc *sc, int vcpu) +vcpu_lock_one(struct vcpu *vcpu) { - int error; - - if (vcpu < 0 || vcpu >= vm_get_maxcpus(sc->vm)) - return (EINVAL); - - error = vcpu_set_state(vm_vcpu(sc->vm, vcpu), VCPU_FROZEN, true); - return (error); + return (vcpu_set_state(vcpu, VCPU_FROZEN, true)); } static void -vcpu_unlock_one(struct vmmdev_softc *sc, int vcpuid) +vcpu_unlock_one(struct vmmdev_softc *sc, int vcpuid, struct vcpu *vcpu) { - struct vcpu *vcpu; enum vcpu_state state; - vcpu = vm_vcpu(sc->vm, vcpuid); state = vcpu_get_state(vcpu, NULL); if (state != VCPU_FROZEN) { panic("vcpu %s(%d) has invalid state %d", vm_name(sc->vm), @@ -155,19 +147,29 @@ static int vcpu_lock_all(struct vmmdev_softc *sc) { - int error, vcpu; - uint16_t maxcpus; + struct vcpu *vcpu; + int error; + uint16_t i, maxcpus; + vm_slock_vcpus(sc->vm); maxcpus = vm_get_maxcpus(sc->vm); - for (vcpu = 0; vcpu < maxcpus; vcpu++) { - error = vcpu_lock_one(sc, vcpu); + for (i = 0; i < maxcpus; i++) { + vcpu = vm_vcpu(sc->vm, i); + if (vcpu == NULL) + continue; + error = vcpu_lock_one(vcpu); if (error) break; } if (error) { - while (--vcpu >= 0) - vcpu_unlock_one(sc, vcpu); + while (--i >= 0) { + vcpu = vm_vcpu(sc->vm, i); + if (vcpu == NULL) + continue; + vcpu_unlock_one(sc, i, vcpu); + } + vm_unlock_vcpus(sc->vm); } return (error); @@ -176,12 +178,17 @@ static void vcpu_unlock_all(struct vmmdev_softc *sc) { - int vcpu; - uint16_t maxcpus; + struct vcpu *vcpu; + uint16_t i, maxcpus; maxcpus = vm_get_maxcpus(sc->vm); - for (vcpu = 0; vcpu < maxcpus; vcpu++) - vcpu_unlock_one(sc, vcpu); + for (i = 0; i < maxcpus; i++) { + vcpu = vm_vcpu(sc->vm, i); + if (vcpu == NULL) + continue; + vcpu_unlock_one(sc, i, vcpu); + } + vm_unlock_vcpus(sc->vm); } static struct vmmdev_softc * @@ -363,20 +370,11 @@ return (error); } -static struct vcpu * -lookup_vcpu(struct vm *vm, int vcpuid) -{ - if (vcpuid < 0 || vcpuid >= vm_get_maxcpus(vm)) - return (NULL); - - return (vm_vcpu(vm, vcpuid)); -} - static int vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag, struct thread *td) { - int error, vcpuid, state_changed, size; + int error, vcpuid, size; cpuset_t *cpuset; struct vmmdev_softc *sc; struct vcpu *vcpu; @@ -414,6 +412,7 @@ struct vm_readwrite_kernemu_device *kernemu; uint64_t *regvals; int *regnums; + enum { NONE, SINGLE, ALL } vcpus_locked; bool memsegs_locked; #ifdef BHYVE_SNAPSHOT struct vm_snapshot_meta *snapshot_meta; @@ -429,7 +428,7 @@ vcpuid = -1; vcpu = NULL; - state_changed = 0; + vcpus_locked = NONE; memsegs_locked = false; /* @@ -465,11 +464,15 @@ * ioctls that can operate only on vcpus that are not running. */ vcpuid = *(int *)data; - error = vcpu_lock_one(sc, vcpuid); + vcpu = vm_alloc_vcpu(sc->vm, vcpuid); + if (vcpu == NULL) { + error = EINVAL; + goto done; + } + error = vcpu_lock_one(vcpu); if (error) goto done; - state_changed = 1; - vcpu = vm_vcpu(sc->vm, vcpuid); + vcpus_locked = SINGLE; break; #ifdef COMPAT_FREEBSD12 @@ -501,7 +504,7 @@ error = vcpu_lock_all(sc); if (error) goto done; - state_changed = 2; + vcpus_locked = ALL; break; #ifdef COMPAT_FREEBSD12 @@ -528,7 +531,7 @@ * a specific vCPU. */ vcpuid = *(int *)data; - vcpu = lookup_vcpu(sc->vm, vcpuid); + vcpu = vm_alloc_vcpu(sc->vm, vcpuid); if (vcpu == NULL) { error = EINVAL; goto done; @@ -545,7 +548,7 @@ vcpuid = *(int *)data; if (vcpuid == -1) break; - vcpu = lookup_vcpu(sc->vm, vcpuid); + vcpu = vm_alloc_vcpu(sc->vm, vcpuid); if (vcpu == NULL) { error = EINVAL; goto done; @@ -957,9 +960,9 @@ break; } - if (state_changed == 1) - vcpu_unlock_one(sc, vcpuid); - else if (state_changed == 2) + if (vcpus_locked == SINGLE) + vcpu_unlock_one(sc, vcpuid, vcpu); + else if (vcpus_locked == ALL) vcpu_unlock_all(sc); if (memsegs_locked) vm_unlock_memsegs(sc->vm); @@ -1041,8 +1044,10 @@ struct devmem_softc *dsc; int error __diagused; + vm_disable_vcpu_creation(sc->vm); error = vcpu_lock_all(sc); KASSERT(error == 0, ("%s: error %d freezing vcpus", __func__, error)); + vm_unlock_vcpus(sc->vm); while ((dsc = SLIST_FIRST(&sc->devmem)) != NULL) { KASSERT(dsc->cdev == NULL, ("%s: devmem not free", __func__));