diff --git a/lib/libvmmapi/vmmapi.h b/lib/libvmmapi/vmmapi.h --- a/lib/libvmmapi/vmmapi.h +++ b/lib/libvmmapi/vmmapi.h @@ -155,7 +155,7 @@ const int *regnums, uint64_t *regvals); int vm_get_register_set(struct vcpu *vcpu, unsigned int count, const int *regnums, uint64_t *regvals); -int vm_run(struct vcpu *vcpu, struct vm_exit *ret_vmexit); +int vm_run(struct vcpu *vcpu, struct vm_run *vmrun); int vm_suspend(struct vmctx *ctx, enum vm_suspend_how how); int vm_reinit(struct vmctx *ctx); int vm_apicid2vcpu(struct vmctx *ctx, int apicid); diff --git a/lib/libvmmapi/vmmapi.c b/lib/libvmmapi/vmmapi.c --- a/lib/libvmmapi/vmmapi.c +++ b/lib/libvmmapi/vmmapi.c @@ -721,16 +721,9 @@ } int -vm_run(struct vcpu *vcpu, struct vm_exit *vmexit) +vm_run(struct vcpu *vcpu, struct vm_run *vmrun) { - int error; - struct vm_run vmrun; - - bzero(&vmrun, sizeof(vmrun)); - - error = vcpu_ioctl(vcpu, VM_RUN, &vmrun); - bcopy(&vmrun.vm_exit, vmexit, sizeof(struct vm_exit)); - return (error); + return (vcpu_ioctl(vcpu, VM_RUN, vmrun)); } int diff --git a/sys/amd64/include/vmm.h b/sys/amd64/include/vmm.h --- a/sys/amd64/include/vmm.h +++ b/sys/amd64/include/vmm.h @@ -273,7 +273,7 @@ struct seg_desc *ret_desc); int vm_set_seg_desc(struct vcpu *vcpu, int reg, struct seg_desc *desc); -int vm_run(struct vcpu *vcpu, struct vm_exit *vme_user); +int vm_run(struct vcpu *vcpu); int vm_suspend(struct vm *vm, enum vm_suspend_how how); int vm_inject_nmi(struct vcpu *vcpu); int vm_nmi_pending(struct vcpu *vcpu); @@ -297,6 +297,7 @@ int vm_resume_cpu(struct vm *vm, struct vcpu *vcpu); int vm_restart_instruction(struct vcpu *vcpu); struct vm_exit *vm_exitinfo(struct vcpu *vcpu); +cpuset_t *vm_exitinfo_cpuset(struct vcpu *vcpu); void vm_exit_suspended(struct vcpu *vcpu, uint64_t rip); void vm_exit_debug(struct vcpu *vcpu, uint64_t rip); void vm_exit_rendezvous(struct vcpu *vcpu, uint64_t rip); @@ -754,9 +755,13 @@ enum vm_suspend_how how; } suspended; struct { + /* + * The destination vCPU mask is saved in vcpu->cpuset + * and is copied out to userspace separately to avoid + * ABI concerns. + */ uint32_t mode; uint8_t vector; - cpuset_t dmask; } ipi; struct vm_task_switch task_switch; } u; diff --git a/sys/amd64/include/vmm_dev.h b/sys/amd64/include/vmm_dev.h --- a/sys/amd64/include/vmm_dev.h +++ b/sys/amd64/include/vmm_dev.h @@ -89,7 +89,9 @@ struct vm_run { int cpuid; - struct vm_exit vm_exit; + cpuset_t *cpuset; /* CPU set storage */ + size_t cpusetsize; + struct vm_exit *vm_exit; }; struct vm_exception { @@ -349,7 +351,7 @@ }; #define VM_RUN \ - _IOWR('v', IOCNUM_RUN, struct vm_run) + _IOW('v', IOCNUM_RUN, struct vm_run) #define VM_SUSPEND \ _IOW('v', IOCNUM_SUSPEND, struct vm_suspend) #define VM_REINIT \ diff --git a/sys/amd64/vmm/io/vlapic.c b/sys/amd64/vmm/io/vlapic.c --- a/sys/amd64/vmm/io/vlapic.c +++ b/sys/amd64/vmm/io/vlapic.c @@ -1146,7 +1146,7 @@ vmexit->exitcode = VM_EXITCODE_IPI; vmexit->u.ipi.mode = mode; vmexit->u.ipi.vector = vec; - vmexit->u.ipi.dmask = ipimask; + *vm_exitinfo_cpuset(vlapic->vcpu) = ipimask; *retu = true; } @@ -1166,7 +1166,7 @@ vm_handle_ipi(struct vcpu *vcpu, struct vm_exit *vme, bool *retu) { struct vlapic *vlapic = vm_lapic(vcpu); - cpuset_t *dmask = &vme->u.ipi.dmask; + cpuset_t *dmask = vm_exitinfo_cpuset(vcpu); uint8_t vec = vme->u.ipi.vector; *retu = true; diff --git a/sys/amd64/vmm/vmm.c b/sys/amd64/vmm/vmm.c --- a/sys/amd64/vmm/vmm.c +++ b/sys/amd64/vmm/vmm.c @@ -123,6 +123,7 @@ uint64_t guest_xcr0; /* (i) guest %xcr0 register */ void *stats; /* (a,i) statistics */ struct vm_exit exitinfo; /* (x) exit reason and collateral */ + cpuset_t exitinfo_cpuset; /* (x) storage for vmexit handlers */ uint64_t nextrip; /* (x) next instruction to execute */ uint64_t tsc_offset; /* (o) TSC offsetting */ }; @@ -399,6 +400,12 @@ return (&vcpu->exitinfo); } +cpuset_t * +vm_exitinfo_cpuset(struct vcpu *vcpu) +{ + return (&vcpu->exitinfo_cpuset); +} + static int vmm_init(void) { @@ -1837,7 +1844,7 @@ } int -vm_run(struct vcpu *vcpu, struct vm_exit *vme_user) +vm_run(struct vcpu *vcpu) { struct vm *vm = vcpu->vm; struct vm_eventinfo evinfo; @@ -1938,8 +1945,6 @@ vmm_stat_incr(vcpu, VMEXIT_USERSPACE, 1); VMM_CTR2(vcpu, "retu %d/%d", error, vme->exitcode); - /* copy the exit information */ - *vme_user = *vme; return (error); } diff --git a/sys/amd64/vmm/vmm_dev.c b/sys/amd64/vmm/vmm_dev.c --- a/sys/amd64/vmm/vmm_dev.c +++ b/sys/amd64/vmm/vmm_dev.c @@ -93,7 +93,29 @@ #define VM_SNAPSHOT_REQ_OLD \ _IOWR('v', IOCNUM_SNAPSHOT_REQ, struct vm_snapshot_meta_old) -#endif + +struct vm_exit_ipi_13 { + uint32_t mode; + uint8_t vector; + __BITSET_DEFINE(, 256) dmask; +}; + +struct vm_exit_13 { + uint32_t exitcode; + int32_t inst_length; + uint64_t rip; + uint64_t u[120 / sizeof(uint64_t)]; +}; + +struct vm_run_13 { + int cpuid; + struct vm_exit_13 vm_exit; +}; + +#define VM_RUN_13 \ + _IOWR('v', IOCNUM_RUN, struct vm_run_13) + +#endif /* COMPAT_FREEBSD13 */ struct devmem_softc { int segid; @@ -396,6 +418,9 @@ struct vm_seg_desc *vmsegdesc; struct vm_register_set *vmregset; struct vm_run *vmrun; +#ifdef COMPAT_FREEBSD13 + struct vm_run_13 *vmrun_13; +#endif struct vm_exception *vmexc; struct vm_lapic_irq *vmirq; struct vm_lapic_msi *vmmsi; @@ -459,6 +484,9 @@ */ switch (cmd) { case VM_RUN: +#ifdef COMPAT_FREEBSD13 + case VM_RUN_13: +#endif case VM_GET_REGISTER: case VM_SET_REGISTER: case VM_GET_SEGMENT_DESCRIPTOR: @@ -579,11 +607,73 @@ break; } - switch(cmd) { - case VM_RUN: + switch (cmd) { + case VM_RUN: { + struct vm_exit *vme; + vmrun = (struct vm_run *)data; - error = vm_run(vcpu, &vmrun->vm_exit); + vme = vm_exitinfo(vcpu); + + error = vm_run(vcpu); + if (error != 0) + break; + + error = copyout(vme, vmrun->vm_exit, sizeof(*vme)); + if (error != 0) + break; + if (vme->exitcode == VM_EXITCODE_IPI) { + error = copyout(vm_exitinfo_cpuset(vcpu), + vmrun->cpuset, + min(vmrun->cpusetsize, sizeof(cpuset_t))); + if (error != 0) + break; + if (sizeof(cpuset_t) < vmrun->cpusetsize) { + uint8_t *p; + + p = (uint8_t *)vmrun->cpuset + + sizeof(cpuset_t); + while (error == 0 && + p < (uint8_t *)vmrun->cpuset + + vmrun->cpusetsize) { + error = subyte(p++, 0); + } + } + } break; + } +#ifdef COMPAT_FREEBSD13 + case VM_RUN_13: { + struct vm_exit *vme; + struct vm_exit_13 *vme_13; + + vmrun_13 = (struct vm_run_13 *)data; + vme_13 = &vmrun_13->vm_exit; + vme = vm_exitinfo(vcpu); + + error = vm_run(vcpu); + if (error == 0) { + vme_13->exitcode = vme->exitcode; + vme_13->inst_length = vme->inst_length; + vme_13->rip = vme->rip; + memcpy(vme_13->u, &vme->u, sizeof(vme_13->u)); + if (vme->exitcode == VM_EXITCODE_IPI) { + struct vm_exit_ipi_13 *ipi; + cpuset_t *dmask; + int cpu; + + dmask = vm_exitinfo_cpuset(vcpu); + ipi = (struct vm_exit_ipi_13 *)&vme_13->u[0]; + BIT_ZERO(256, &ipi->dmask); + CPU_FOREACH_ISSET(cpu, dmask) { + if (cpu >= 256) + break; + BIT_SET(256, cpu, &ipi->dmask); + } + } + } + break; + } +#endif case VM_SUSPEND: vmsuspend = (struct vm_suspend *)data; error = vm_suspend(sc->vm, vmsuspend->how); diff --git a/usr.sbin/bhyve/bhyverun.h b/usr.sbin/bhyve/bhyverun.h --- a/usr.sbin/bhyve/bhyverun.h +++ b/usr.sbin/bhyve/bhyverun.h @@ -39,7 +39,7 @@ struct vcpu; struct vmctx; -struct vm_exit; +struct vm_run; void *paddr_guest2host(struct vmctx *ctx, uintptr_t addr, size_t len); #ifdef BHYVE_SNAPSHOT @@ -48,6 +48,6 @@ int fbsdrun_virtio_msix(void); -int vmexit_task_switch(struct vmctx *, struct vcpu *, struct vm_exit *); +int vmexit_task_switch(struct vmctx *, struct vcpu *, struct vm_run *); #endif diff --git a/usr.sbin/bhyve/bhyverun.c b/usr.sbin/bhyve/bhyverun.c --- a/usr.sbin/bhyve/bhyverun.c +++ b/usr.sbin/bhyve/bhyverun.c @@ -184,7 +184,7 @@ [EXIT_REASON_XRSTORS] = "XRSTORS" }; -typedef int (*vmexit_handler_t)(struct vmctx *, struct vcpu *, struct vm_exit *); +typedef int (*vmexit_handler_t)(struct vmctx *, struct vcpu *, struct vm_run *); int guest_ncpus; uint16_t cpu_cores, cpu_sockets, cpu_threads; @@ -592,11 +592,13 @@ } static int -vmexit_inout(struct vmctx *ctx, struct vcpu *vcpu, struct vm_exit *vme) +vmexit_inout(struct vmctx *ctx, struct vcpu *vcpu, struct vm_run *vmrun) { + struct vm_exit *vme; int error; int bytes, port, in, out; + vme = vmrun->vm_exit; port = vme->u.inout.port; bytes = vme->u.inout.bytes; in = vme->u.inout.in; @@ -621,12 +623,16 @@ } static int -vmexit_rdmsr(struct vmctx *ctx __unused, struct vcpu *vcpu, struct vm_exit *vme) +vmexit_rdmsr(struct vmctx *ctx __unused, struct vcpu *vcpu, + struct vm_run *vmrun) { + struct vm_exit *vme; uint64_t val; uint32_t eax, edx; int error; + vme = vmrun->vm_exit; + val = 0; error = emulate_rdmsr(vcpu, vme->u.msr.code, &val); if (error != 0) { @@ -650,10 +656,14 @@ } static int -vmexit_wrmsr(struct vmctx *ctx __unused, struct vcpu *vcpu, struct vm_exit *vme) +vmexit_wrmsr(struct vmctx *ctx __unused, struct vcpu *vcpu, + struct vm_run *vmrun) { + struct vm_exit *vme; int error; + vme = vmrun->vm_exit; + error = emulate_wrmsr(vcpu, vme->u.msr.code, vme->u.msr.wval); if (error != 0) { fprintf(stderr, "wrmsr to register %#x(%#lx) on vcpu %d\n", @@ -685,8 +695,11 @@ } static int -vmexit_vmx(struct vmctx *ctx, struct vcpu *vcpu, struct vm_exit *vme) +vmexit_vmx(struct vmctx *ctx, struct vcpu *vcpu, struct vm_run *vmrun) { + struct vm_exit *vme; + + vme = vmrun->vm_exit; fprintf(stderr, "vm exit[%d]\n", vcpu_id(vcpu)); fprintf(stderr, "\treason\t\tVMX\n"); @@ -718,8 +731,11 @@ } static int -vmexit_svm(struct vmctx *ctx __unused, struct vcpu *vcpu, struct vm_exit *vme) +vmexit_svm(struct vmctx *ctx __unused, struct vcpu *vcpu, struct vm_run *vmrun) { + struct vm_exit *vme; + + vme = vmrun->vm_exit; fprintf(stderr, "vm exit[%d]\n", vcpu_id(vcpu)); fprintf(stderr, "\treason\t\tSVM\n"); @@ -733,10 +749,9 @@ static int vmexit_bogus(struct vmctx *ctx __unused, struct vcpu *vcpu __unused, - struct vm_exit *vme) + struct vm_run *vmrun) { - - assert(vme->inst_length == 0); + assert(vmrun->vm_exit->inst_length == 0); stats.vmexit_bogus++; @@ -745,10 +760,9 @@ static int vmexit_reqidle(struct vmctx *ctx __unused, struct vcpu *vcpu __unused, - struct vm_exit *vme) + struct vm_run *vmrun) { - - assert(vme->inst_length == 0); + assert(vmrun->vm_exit->inst_length == 0); stats.vmexit_reqidle++; @@ -757,9 +771,8 @@ static int vmexit_hlt(struct vmctx *ctx __unused, struct vcpu *vcpu __unused, - struct vm_exit *vme __unused) + struct vm_run *vmrun __unused) { - stats.vmexit_hlt++; /* @@ -772,9 +785,8 @@ static int vmexit_pause(struct vmctx *ctx __unused, struct vcpu *vcpu __unused, - struct vm_exit *vme __unused) + struct vm_run *vmrun __unused) { - stats.vmexit_pause++; return (VMEXIT_CONTINUE); @@ -782,10 +794,9 @@ static int vmexit_mtrap(struct vmctx *ctx __unused, struct vcpu *vcpu, - struct vm_exit *vme) + struct vm_run *vmrun) { - - assert(vme->inst_length == 0); + assert(vmrun->vm_exit->inst_length == 0); stats.vmexit_mtrap++; @@ -802,12 +813,15 @@ static int vmexit_inst_emul(struct vmctx *ctx __unused, struct vcpu *vcpu, - struct vm_exit *vme) + struct vm_run *vmrun) { - int err, i, cs_d; + struct vm_exit *vme; struct vie *vie; + int err, i, cs_d; enum vm_cpu_mode mode; + vme = vmrun->vm_exit; + stats.vmexit_inst_emul++; vie = &vme->u.inst_emul.vie; @@ -852,11 +866,14 @@ static pthread_cond_t resetcpu_cond = PTHREAD_COND_INITIALIZER; static int -vmexit_suspend(struct vmctx *ctx, struct vcpu *vcpu, struct vm_exit *vme) +vmexit_suspend(struct vmctx *ctx, struct vcpu *vcpu, struct vm_run *vmrun) { + struct vm_exit *vme; enum vm_suspend_how how; int vcpuid = vcpu_id(vcpu); + vme = vmrun->vm_exit; + how = vme->u.suspended.how; fbsdrun_deletecpu(vcpuid); @@ -894,7 +911,7 @@ static int vmexit_debug(struct vmctx *ctx __unused, struct vcpu *vcpu, - struct vm_exit *vme __unused) + struct vm_run *vmrun __unused) { #ifdef BHYVE_SNAPSHOT @@ -914,22 +931,27 @@ static int vmexit_breakpoint(struct vmctx *ctx __unused, struct vcpu *vcpu, - struct vm_exit *vme) + struct vm_run *vmrun) { - - gdb_cpu_breakpoint(vcpu, vme); + gdb_cpu_breakpoint(vcpu, vmrun->vm_exit); return (VMEXIT_CONTINUE); } static int vmexit_ipi(struct vmctx *ctx __unused, struct vcpu *vcpu __unused, - struct vm_exit *vme) + struct vm_run *vmrun) { + struct vm_exit *vme; + cpuset_t *dmask; int error = -1; int i; + + dmask = vmrun->cpuset; + vme = vmrun->vm_exit; + switch (vme->u.ipi.mode) { case APIC_DELMODE_INIT: - CPU_FOREACH_ISSET(i, &vme->u.ipi.dmask) { + CPU_FOREACH_ISSET(i, dmask) { error = vm_suspend_cpu(vcpu_info[i].vcpu); if (error) { warnx("%s: failed to suspend cpu %d\n", @@ -939,7 +961,7 @@ } break; case APIC_DELMODE_STARTUP: - CPU_FOREACH_ISSET(i, &vme->u.ipi.dmask) { + CPU_FOREACH_ISSET(i, dmask) { spinup_ap(vcpu_info[i].vcpu, vme->u.ipi.vector << PAGE_SHIFT); } @@ -974,15 +996,20 @@ vm_loop(struct vmctx *ctx, struct vcpu *vcpu) { struct vm_exit vme; + struct vm_run vmrun; int error, rc; enum vm_exitcode exitcode; - cpuset_t active_cpus; + cpuset_t active_cpus, dmask; error = vm_active_cpus(ctx, &active_cpus); assert(CPU_ISSET(vcpu_id(vcpu), &active_cpus)); + vmrun.vm_exit = &vme; + vmrun.cpuset = &dmask; + vmrun.cpusetsize = sizeof(dmask); + while (1) { - error = vm_run(vcpu, &vme); + error = vm_run(vcpu, &vmrun); if (error != 0) break; @@ -993,7 +1020,7 @@ exit(4); } - rc = (*handler[exitcode])(ctx, vcpu, &vme); + rc = (*handler[exitcode])(ctx, vcpu, &vmrun); switch (rc) { case VMEXIT_CONTINUE: diff --git a/usr.sbin/bhyve/task_switch.c b/usr.sbin/bhyve/task_switch.c --- a/usr.sbin/bhyve/task_switch.c +++ b/usr.sbin/bhyve/task_switch.c @@ -704,7 +704,7 @@ } while (0) int -vmexit_task_switch(struct vmctx *ctx, struct vcpu *vcpu, struct vm_exit *vmexit) +vmexit_task_switch(struct vmctx *ctx, struct vcpu *vcpu, struct vm_run *vmrun) { struct seg_desc nt; struct tss32 oldtss, newtss; @@ -712,12 +712,14 @@ struct vm_guest_paging *paging, sup_paging; struct user_segment_descriptor nt_desc, ot_desc; struct iovec nt_iov[2], ot_iov[2]; + struct vm_exit *vmexit; uint64_t cr0, ot_base; uint32_t eip, ot_lim, access; int error, ext, fault, minlimit, nt_type, ot_type; enum task_switch_reason reason; uint16_t nt_sel, ot_sel; + vmexit = vmrun->vm_exit; task_switch = &vmexit->u.task_switch; nt_sel = task_switch->tsssel; ext = vmexit->u.task_switch.ext;