diff --git a/usr.sbin/bhyve/Makefile b/usr.sbin/bhyve/Makefile --- a/usr.sbin/bhyve/Makefile +++ b/usr.sbin/bhyve/Makefile @@ -77,6 +77,7 @@ usb_mouse.c \ vga.c \ virtio.c \ + vmexit.c \ vmgenc.c \ xmsr.c diff --git a/usr.sbin/bhyve/amd64/vmexit.c b/usr.sbin/bhyve/amd64/vmexit.c new file mode 100644 --- /dev/null +++ b/usr.sbin/bhyve/amd64/vmexit.c @@ -0,0 +1,493 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2011 NetApp, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include + +#include "bhyverun.h" +#include "config.h" +#include "debug.h" +#include "gdb.h" +#include "inout.h" +#include "mem.h" +#ifdef BHYVE_SNAPSHOT +#include "snapshot.h" +#endif +#include "spinup_ap.h" +#include "vmexit.h" +#include "xmsr.h" + +static int +vmexit_inout(struct vmctx *ctx, struct vcpu *vcpu, struct vm_run *vmrun) +{ + struct vm_exit *vme; + int error; + int bytes, port, in; + + vme = vmrun->vm_exit; + port = vme->u.inout.port; + bytes = vme->u.inout.bytes; + in = vme->u.inout.in; + + error = emulate_inout(ctx, vcpu, vme); + if (error) { + fprintf(stderr, "Unhandled %s%c 0x%04x at 0x%lx\n", + in ? "in" : "out", + bytes == 1 ? 'b' : (bytes == 2 ? 'w' : 'l'), + port, vme->rip); + return (VMEXIT_ABORT); + } else { + return (VMEXIT_CONTINUE); + } +} + +static int +vmexit_rdmsr(struct vmctx *ctx __unused, struct vcpu *vcpu, + struct vm_run *vmrun) +{ + struct vm_exit *vme; + uint64_t val; + uint32_t eax, edx; + int error; + + vme = vmrun->vm_exit; + + val = 0; + error = emulate_rdmsr(vcpu, vme->u.msr.code, &val); + if (error != 0) { + fprintf(stderr, "rdmsr to register %#x on vcpu %d\n", + vme->u.msr.code, vcpu_id(vcpu)); + if (get_config_bool("x86.strictmsr")) { + vm_inject_gp(vcpu); + return (VMEXIT_CONTINUE); + } + } + + eax = val; + error = vm_set_register(vcpu, VM_REG_GUEST_RAX, eax); + assert(error == 0); + + edx = val >> 32; + error = vm_set_register(vcpu, VM_REG_GUEST_RDX, edx); + assert(error == 0); + + return (VMEXIT_CONTINUE); +} + +static int +vmexit_wrmsr(struct vmctx *ctx __unused, struct vcpu *vcpu, + struct vm_run *vmrun) +{ + struct vm_exit *vme; + int error; + + vme = vmrun->vm_exit; + + error = emulate_wrmsr(vcpu, vme->u.msr.code, vme->u.msr.wval); + if (error != 0) { + fprintf(stderr, "wrmsr to register %#x(%#lx) on vcpu %d\n", + vme->u.msr.code, vme->u.msr.wval, vcpu_id(vcpu)); + if (get_config_bool("x86.strictmsr")) { + vm_inject_gp(vcpu); + return (VMEXIT_CONTINUE); + } + } + return (VMEXIT_CONTINUE); +} + +static const char * const vmx_exit_reason_desc[] = { + [EXIT_REASON_EXCEPTION] = "Exception or non-maskable interrupt (NMI)", + [EXIT_REASON_EXT_INTR] = "External interrupt", + [EXIT_REASON_TRIPLE_FAULT] = "Triple fault", + [EXIT_REASON_INIT] = "INIT signal", + [EXIT_REASON_SIPI] = "Start-up IPI (SIPI)", + [EXIT_REASON_IO_SMI] = "I/O system-management interrupt (SMI)", + [EXIT_REASON_SMI] = "Other SMI", + [EXIT_REASON_INTR_WINDOW] = "Interrupt window", + [EXIT_REASON_NMI_WINDOW] = "NMI window", + [EXIT_REASON_TASK_SWITCH] = "Task switch", + [EXIT_REASON_CPUID] = "CPUID", + [EXIT_REASON_GETSEC] = "GETSEC", + [EXIT_REASON_HLT] = "HLT", + [EXIT_REASON_INVD] = "INVD", + [EXIT_REASON_INVLPG] = "INVLPG", + [EXIT_REASON_RDPMC] = "RDPMC", + [EXIT_REASON_RDTSC] = "RDTSC", + [EXIT_REASON_RSM] = "RSM", + [EXIT_REASON_VMCALL] = "VMCALL", + [EXIT_REASON_VMCLEAR] = "VMCLEAR", + [EXIT_REASON_VMLAUNCH] = "VMLAUNCH", + [EXIT_REASON_VMPTRLD] = "VMPTRLD", + [EXIT_REASON_VMPTRST] = "VMPTRST", + [EXIT_REASON_VMREAD] = "VMREAD", + [EXIT_REASON_VMRESUME] = "VMRESUME", + [EXIT_REASON_VMWRITE] = "VMWRITE", + [EXIT_REASON_VMXOFF] = "VMXOFF", + [EXIT_REASON_VMXON] = "VMXON", + [EXIT_REASON_CR_ACCESS] = "Control-register accesses", + [EXIT_REASON_DR_ACCESS] = "MOV DR", + [EXIT_REASON_INOUT] = "I/O instruction", + [EXIT_REASON_RDMSR] = "RDMSR", + [EXIT_REASON_WRMSR] = "WRMSR", + [EXIT_REASON_INVAL_VMCS] = + "VM-entry failure due to invalid guest state", + [EXIT_REASON_INVAL_MSR] = "VM-entry failure due to MSR loading", + [EXIT_REASON_MWAIT] = "MWAIT", + [EXIT_REASON_MTF] = "Monitor trap flag", + [EXIT_REASON_MONITOR] = "MONITOR", + [EXIT_REASON_PAUSE] = "PAUSE", + [EXIT_REASON_MCE_DURING_ENTRY] = + "VM-entry failure due to machine-check event", + [EXIT_REASON_TPR] = "TPR below threshold", + [EXIT_REASON_APIC_ACCESS] = "APIC access", + [EXIT_REASON_VIRTUALIZED_EOI] = "Virtualized EOI", + [EXIT_REASON_GDTR_IDTR] = "Access to GDTR or IDTR", + [EXIT_REASON_LDTR_TR] = "Access to LDTR or TR", + [EXIT_REASON_EPT_FAULT] = "EPT violation", + [EXIT_REASON_EPT_MISCONFIG] = "EPT misconfiguration", + [EXIT_REASON_INVEPT] = "INVEPT", + [EXIT_REASON_RDTSCP] = "RDTSCP", + [EXIT_REASON_VMX_PREEMPT] = "VMX-preemption timer expired", + [EXIT_REASON_INVVPID] = "INVVPID", + [EXIT_REASON_WBINVD] = "WBINVD", + [EXIT_REASON_XSETBV] = "XSETBV", + [EXIT_REASON_APIC_WRITE] = "APIC write", + [EXIT_REASON_RDRAND] = "RDRAND", + [EXIT_REASON_INVPCID] = "INVPCID", + [EXIT_REASON_VMFUNC] = "VMFUNC", + [EXIT_REASON_ENCLS] = "ENCLS", + [EXIT_REASON_RDSEED] = "RDSEED", + [EXIT_REASON_PM_LOG_FULL] = "Page-modification log full", + [EXIT_REASON_XSAVES] = "XSAVES", + [EXIT_REASON_XRSTORS] = "XRSTORS" +}; + +static const char * +vmexit_vmx_desc(uint32_t exit_reason) +{ + + if (exit_reason >= nitems(vmx_exit_reason_desc) || + vmx_exit_reason_desc[exit_reason] == NULL) + return ("Unknown"); + return (vmx_exit_reason_desc[exit_reason]); +} + +#define DEBUG_EPT_MISCONFIG +#ifdef DEBUG_EPT_MISCONFIG +#define VMCS_GUEST_PHYSICAL_ADDRESS 0x00002400 + +static uint64_t ept_misconfig_gpa, ept_misconfig_pte[4]; +static int ept_misconfig_ptenum; +#endif + +static int +vmexit_vmx(struct vmctx *ctx, struct vcpu *vcpu, struct vm_run *vmrun) +{ + struct vm_exit *vme; + + vme = vmrun->vm_exit; + + fprintf(stderr, "vm exit[%d]\n", vcpu_id(vcpu)); + fprintf(stderr, "\treason\t\tVMX\n"); + fprintf(stderr, "\trip\t\t0x%016lx\n", vme->rip); + fprintf(stderr, "\tinst_length\t%d\n", vme->inst_length); + fprintf(stderr, "\tstatus\t\t%d\n", vme->u.vmx.status); + fprintf(stderr, "\texit_reason\t%u (%s)\n", vme->u.vmx.exit_reason, + vmexit_vmx_desc(vme->u.vmx.exit_reason)); + fprintf(stderr, "\tqualification\t0x%016lx\n", + vme->u.vmx.exit_qualification); + fprintf(stderr, "\tinst_type\t\t%d\n", vme->u.vmx.inst_type); + fprintf(stderr, "\tinst_error\t\t%d\n", vme->u.vmx.inst_error); +#ifdef DEBUG_EPT_MISCONFIG + if (vme->u.vmx.exit_reason == EXIT_REASON_EPT_MISCONFIG) { + vm_get_register(vcpu, + VMCS_IDENT(VMCS_GUEST_PHYSICAL_ADDRESS), + &ept_misconfig_gpa); + vm_get_gpa_pmap(ctx, ept_misconfig_gpa, ept_misconfig_pte, + &ept_misconfig_ptenum); + fprintf(stderr, "\tEPT misconfiguration:\n"); + fprintf(stderr, "\t\tGPA: %#lx\n", ept_misconfig_gpa); + fprintf(stderr, "\t\tPTE(%d): %#lx %#lx %#lx %#lx\n", + ept_misconfig_ptenum, ept_misconfig_pte[0], + ept_misconfig_pte[1], ept_misconfig_pte[2], + ept_misconfig_pte[3]); + } +#endif /* DEBUG_EPT_MISCONFIG */ + return (VMEXIT_ABORT); +} + +static int +vmexit_svm(struct vmctx *ctx __unused, struct vcpu *vcpu, struct vm_run *vmrun) +{ + struct vm_exit *vme; + + vme = vmrun->vm_exit; + + fprintf(stderr, "vm exit[%d]\n", vcpu_id(vcpu)); + fprintf(stderr, "\treason\t\tSVM\n"); + fprintf(stderr, "\trip\t\t0x%016lx\n", vme->rip); + fprintf(stderr, "\tinst_length\t%d\n", vme->inst_length); + fprintf(stderr, "\texitcode\t%#lx\n", vme->u.svm.exitcode); + fprintf(stderr, "\texitinfo1\t%#lx\n", vme->u.svm.exitinfo1); + fprintf(stderr, "\texitinfo2\t%#lx\n", vme->u.svm.exitinfo2); + return (VMEXIT_ABORT); +} + +static int +vmexit_bogus(struct vmctx *ctx __unused, struct vcpu *vcpu __unused, + struct vm_run *vmrun) +{ + assert(vmrun->vm_exit->inst_length == 0); + + return (VMEXIT_CONTINUE); +} + +static int +vmexit_reqidle(struct vmctx *ctx __unused, struct vcpu *vcpu __unused, + struct vm_run *vmrun) +{ + assert(vmrun->vm_exit->inst_length == 0); + + return (VMEXIT_CONTINUE); +} + +static int +vmexit_hlt(struct vmctx *ctx __unused, struct vcpu *vcpu __unused, + struct vm_run *vmrun __unused) +{ + /* + * Just continue execution with the next instruction. We use + * the HLT VM exit as a way to be friendly with the host + * scheduler. + */ + return (VMEXIT_CONTINUE); +} + +static int +vmexit_pause(struct vmctx *ctx __unused, struct vcpu *vcpu __unused, + struct vm_run *vmrun __unused) +{ + return (VMEXIT_CONTINUE); +} + +static int +vmexit_mtrap(struct vmctx *ctx __unused, struct vcpu *vcpu, + struct vm_run *vmrun) +{ + assert(vmrun->vm_exit->inst_length == 0); + +#ifdef BHYVE_SNAPSHOT + checkpoint_cpu_suspend(vcpu_id(vcpu)); +#endif + gdb_cpu_mtrap(vcpu); +#ifdef BHYVE_SNAPSHOT + checkpoint_cpu_resume(vcpu_id(vcpu)); +#endif + + return (VMEXIT_CONTINUE); +} + +static int +vmexit_inst_emul(struct vmctx *ctx __unused, struct vcpu *vcpu, + struct vm_run *vmrun) +{ + struct vm_exit *vme; + struct vie *vie; + int err, i, cs_d; + enum vm_cpu_mode mode; + + vme = vmrun->vm_exit; + + vie = &vme->u.inst_emul.vie; + if (!vie->decoded) { + /* + * Attempt to decode in userspace as a fallback. This allows + * updating instruction decode in bhyve without rebooting the + * kernel (rapid prototyping), albeit with much slower + * emulation. + */ + vie_restart(vie); + mode = vme->u.inst_emul.paging.cpu_mode; + cs_d = vme->u.inst_emul.cs_d; + if (vmm_decode_instruction(mode, cs_d, vie) != 0) + goto fail; + if (vm_set_register(vcpu, VM_REG_GUEST_RIP, + vme->rip + vie->num_processed) != 0) + goto fail; + } + + err = emulate_mem(vcpu, vme->u.inst_emul.gpa, vie, + &vme->u.inst_emul.paging); + if (err) { + if (err == ESRCH) { + EPRINTLN("Unhandled memory access to 0x%lx\n", + vme->u.inst_emul.gpa); + } + goto fail; + } + + return (VMEXIT_CONTINUE); + +fail: + fprintf(stderr, "Failed to emulate instruction sequence [ "); + for (i = 0; i < vie->num_valid; i++) + fprintf(stderr, "%02x", vie->inst[i]); + FPRINTLN(stderr, " ] at 0x%lx", vme->rip); + return (VMEXIT_ABORT); +} + +static int +vmexit_suspend(struct vmctx *ctx, struct vcpu *vcpu, struct vm_run *vmrun) +{ + struct vm_exit *vme; + enum vm_suspend_how how; + int vcpuid = vcpu_id(vcpu); + + vme = vmrun->vm_exit; + + how = vme->u.suspended.how; + + fbsdrun_deletecpu(vcpuid); + + switch (how) { + case VM_SUSPEND_RESET: + exit(0); + case VM_SUSPEND_POWEROFF: + if (get_config_bool_default("destroy_on_poweroff", false)) + vm_destroy(ctx); + exit(1); + case VM_SUSPEND_HALT: + exit(2); + case VM_SUSPEND_TRIPLEFAULT: + exit(3); + default: + fprintf(stderr, "vmexit_suspend: invalid reason %d\n", how); + exit(100); + } + return (0); /* NOTREACHED */ +} + +static int +vmexit_debug(struct vmctx *ctx __unused, struct vcpu *vcpu, + struct vm_run *vmrun __unused) +{ + +#ifdef BHYVE_SNAPSHOT + checkpoint_cpu_suspend(vcpu_id(vcpu)); +#endif + gdb_cpu_suspend(vcpu); +#ifdef BHYVE_SNAPSHOT + checkpoint_cpu_resume(vcpu_id(vcpu)); +#endif + /* + * XXX-MJ sleep for a short period to avoid chewing up the CPU in the + * window between activation of the vCPU thread and the STARTUP IPI. + */ + usleep(1000); + return (VMEXIT_CONTINUE); +} + +static int +vmexit_breakpoint(struct vmctx *ctx __unused, struct vcpu *vcpu, + struct vm_run *vmrun) +{ + gdb_cpu_breakpoint(vcpu, vmrun->vm_exit); + return (VMEXIT_CONTINUE); +} + +static int +vmexit_ipi(struct vmctx *ctx __unused, struct vcpu *vcpu __unused, + struct vm_run *vmrun) +{ + struct vm_exit *vme; + cpuset_t *dmask; + int error = -1; + int i; + + dmask = vmrun->cpuset; + vme = vmrun->vm_exit; + + switch (vme->u.ipi.mode) { + case APIC_DELMODE_INIT: + CPU_FOREACH_ISSET(i, dmask) { + error = fbsdrun_suspendcpu(i); + if (error) { + warnx("failed to suspend cpu %d", i); + break; + } + } + break; + case APIC_DELMODE_STARTUP: + CPU_FOREACH_ISSET(i, dmask) { + spinup_ap(fbsdrun_vcpu(i), + vme->u.ipi.vector << PAGE_SHIFT); + } + error = 0; + break; + default: + break; + } + + return (error); +} + +int vmexit_task_switch(struct vmctx *, struct vcpu *, struct vm_run *); + +const vmexit_handler_t vmexit_handlers[VM_EXITCODE_MAX] = { + [VM_EXITCODE_INOUT] = vmexit_inout, + [VM_EXITCODE_INOUT_STR] = vmexit_inout, + [VM_EXITCODE_VMX] = vmexit_vmx, + [VM_EXITCODE_SVM] = vmexit_svm, + [VM_EXITCODE_BOGUS] = vmexit_bogus, + [VM_EXITCODE_REQIDLE] = vmexit_reqidle, + [VM_EXITCODE_RDMSR] = vmexit_rdmsr, + [VM_EXITCODE_WRMSR] = vmexit_wrmsr, + [VM_EXITCODE_MTRAP] = vmexit_mtrap, + [VM_EXITCODE_INST_EMUL] = vmexit_inst_emul, + [VM_EXITCODE_SUSPENDED] = vmexit_suspend, + [VM_EXITCODE_TASK_SWITCH] = vmexit_task_switch, + [VM_EXITCODE_DEBUG] = vmexit_debug, + [VM_EXITCODE_BPT] = vmexit_breakpoint, + [VM_EXITCODE_IPI] = vmexit_ipi, + [VM_EXITCODE_HLT] = vmexit_hlt, + [VM_EXITCODE_PAUSE] = vmexit_pause, +}; diff --git a/usr.sbin/bhyve/bhyverun.h b/usr.sbin/bhyve/bhyverun.h --- a/usr.sbin/bhyve/bhyverun.h +++ b/usr.sbin/bhyve/bhyverun.h @@ -46,6 +46,13 @@ uintptr_t paddr_host2guest(struct vmctx *ctx, void *addr); #endif +struct vcpu; +struct vcpu *fbsdrun_vcpu(int vcpuid); +void fbsdrun_deletecpu(int vcpuid); +int fbsdrun_suspendcpu(int vcpuid); + int fbsdrun_virtio_msix(void); +typedef int (*vmexit_handler_t)(struct vmctx *, struct vcpu *, struct vm_run *); + #endif diff --git a/usr.sbin/bhyve/bhyverun.c b/usr.sbin/bhyve/bhyverun.c --- a/usr.sbin/bhyve/bhyverun.c +++ b/usr.sbin/bhyve/bhyverun.c @@ -45,11 +45,7 @@ #include #endif -#include -#include - #include -#include #ifndef WITHOUT_CAPSICUM #include @@ -77,11 +73,6 @@ #include #endif -#include -#ifndef WITHOUT_CAPSICUM -#include -#endif -#include #include #include "bhyverun.h" @@ -115,83 +106,14 @@ #ifdef BHYVE_SNAPSHOT #include "snapshot.h" #endif -#include "xmsr.h" -#include "spinup_ap.h" #include "rtc.h" #include "vmgenc.h" +#include "vmexit.h" +#include "xmsr.h" #define MB (1024UL * 1024) #define GB (1024UL * MB) -static const char * const vmx_exit_reason_desc[] = { - [EXIT_REASON_EXCEPTION] = "Exception or non-maskable interrupt (NMI)", - [EXIT_REASON_EXT_INTR] = "External interrupt", - [EXIT_REASON_TRIPLE_FAULT] = "Triple fault", - [EXIT_REASON_INIT] = "INIT signal", - [EXIT_REASON_SIPI] = "Start-up IPI (SIPI)", - [EXIT_REASON_IO_SMI] = "I/O system-management interrupt (SMI)", - [EXIT_REASON_SMI] = "Other SMI", - [EXIT_REASON_INTR_WINDOW] = "Interrupt window", - [EXIT_REASON_NMI_WINDOW] = "NMI window", - [EXIT_REASON_TASK_SWITCH] = "Task switch", - [EXIT_REASON_CPUID] = "CPUID", - [EXIT_REASON_GETSEC] = "GETSEC", - [EXIT_REASON_HLT] = "HLT", - [EXIT_REASON_INVD] = "INVD", - [EXIT_REASON_INVLPG] = "INVLPG", - [EXIT_REASON_RDPMC] = "RDPMC", - [EXIT_REASON_RDTSC] = "RDTSC", - [EXIT_REASON_RSM] = "RSM", - [EXIT_REASON_VMCALL] = "VMCALL", - [EXIT_REASON_VMCLEAR] = "VMCLEAR", - [EXIT_REASON_VMLAUNCH] = "VMLAUNCH", - [EXIT_REASON_VMPTRLD] = "VMPTRLD", - [EXIT_REASON_VMPTRST] = "VMPTRST", - [EXIT_REASON_VMREAD] = "VMREAD", - [EXIT_REASON_VMRESUME] = "VMRESUME", - [EXIT_REASON_VMWRITE] = "VMWRITE", - [EXIT_REASON_VMXOFF] = "VMXOFF", - [EXIT_REASON_VMXON] = "VMXON", - [EXIT_REASON_CR_ACCESS] = "Control-register accesses", - [EXIT_REASON_DR_ACCESS] = "MOV DR", - [EXIT_REASON_INOUT] = "I/O instruction", - [EXIT_REASON_RDMSR] = "RDMSR", - [EXIT_REASON_WRMSR] = "WRMSR", - [EXIT_REASON_INVAL_VMCS] = - "VM-entry failure due to invalid guest state", - [EXIT_REASON_INVAL_MSR] = "VM-entry failure due to MSR loading", - [EXIT_REASON_MWAIT] = "MWAIT", - [EXIT_REASON_MTF] = "Monitor trap flag", - [EXIT_REASON_MONITOR] = "MONITOR", - [EXIT_REASON_PAUSE] = "PAUSE", - [EXIT_REASON_MCE_DURING_ENTRY] = - "VM-entry failure due to machine-check event", - [EXIT_REASON_TPR] = "TPR below threshold", - [EXIT_REASON_APIC_ACCESS] = "APIC access", - [EXIT_REASON_VIRTUALIZED_EOI] = "Virtualized EOI", - [EXIT_REASON_GDTR_IDTR] = "Access to GDTR or IDTR", - [EXIT_REASON_LDTR_TR] = "Access to LDTR or TR", - [EXIT_REASON_EPT_FAULT] = "EPT violation", - [EXIT_REASON_EPT_MISCONFIG] = "EPT misconfiguration", - [EXIT_REASON_INVEPT] = "INVEPT", - [EXIT_REASON_RDTSCP] = "RDTSCP", - [EXIT_REASON_VMX_PREEMPT] = "VMX-preemption timer expired", - [EXIT_REASON_INVVPID] = "INVVPID", - [EXIT_REASON_WBINVD] = "WBINVD", - [EXIT_REASON_XSETBV] = "XSETBV", - [EXIT_REASON_APIC_WRITE] = "APIC write", - [EXIT_REASON_RDRAND] = "RDRAND", - [EXIT_REASON_INVPCID] = "INVPCID", - [EXIT_REASON_VMFUNC] = "VMFUNC", - [EXIT_REASON_ENCLS] = "ENCLS", - [EXIT_REASON_RDSEED] = "RDSEED", - [EXIT_REASON_PM_LOG_FULL] = "Page-modification log full", - [EXIT_REASON_XSAVES] = "XSAVES", - [EXIT_REASON_XRSTORS] = "XRSTORS" -}; - -typedef int (*vmexit_handler_t)(struct vmctx *, struct vcpu *, struct vm_run *); - int guest_ncpus; uint16_t cpu_cores, cpu_sockets, cpu_threads; @@ -511,6 +433,12 @@ return (get_config_bool_default("virtio_msix", true)); } +struct vcpu * +fbsdrun_vcpu(int vcpuid) +{ + return (vcpu_info[vcpuid].vcpu); +} + static void * fbsdrun_start_thread(void *param) { @@ -557,7 +485,7 @@ assert(error == 0); } -static void +void fbsdrun_deletecpu(int vcpu) { static pthread_mutex_t resetcpu_mtx = PTHREAD_MUTEX_INITIALIZER; @@ -584,376 +512,12 @@ pthread_mutex_unlock(&resetcpu_mtx); } -static int -vmexit_inout(struct vmctx *ctx, struct vcpu *vcpu, struct vm_run *vmrun) -{ - struct vm_exit *vme; - int error; - int bytes, port, in; - - vme = vmrun->vm_exit; - port = vme->u.inout.port; - bytes = vme->u.inout.bytes; - in = vme->u.inout.in; - - error = emulate_inout(ctx, vcpu, vme); - if (error) { - fprintf(stderr, "Unhandled %s%c 0x%04x at 0x%lx\n", - in ? "in" : "out", - bytes == 1 ? 'b' : (bytes == 2 ? 'w' : 'l'), - port, vme->rip); - return (VMEXIT_ABORT); - } else { - return (VMEXIT_CONTINUE); - } -} - -static int -vmexit_rdmsr(struct vmctx *ctx __unused, struct vcpu *vcpu, - struct vm_run *vmrun) -{ - struct vm_exit *vme; - uint64_t val; - uint32_t eax, edx; - int error; - - vme = vmrun->vm_exit; - - val = 0; - error = emulate_rdmsr(vcpu, vme->u.msr.code, &val); - if (error != 0) { - fprintf(stderr, "rdmsr to register %#x on vcpu %d\n", - vme->u.msr.code, vcpu_id(vcpu)); - if (get_config_bool("x86.strictmsr")) { - vm_inject_gp(vcpu); - return (VMEXIT_CONTINUE); - } - } - - eax = val; - error = vm_set_register(vcpu, VM_REG_GUEST_RAX, eax); - assert(error == 0); - - edx = val >> 32; - error = vm_set_register(vcpu, VM_REG_GUEST_RDX, edx); - assert(error == 0); - - return (VMEXIT_CONTINUE); -} - -static int -vmexit_wrmsr(struct vmctx *ctx __unused, struct vcpu *vcpu, - struct vm_run *vmrun) -{ - struct vm_exit *vme; - int error; - - vme = vmrun->vm_exit; - - error = emulate_wrmsr(vcpu, vme->u.msr.code, vme->u.msr.wval); - if (error != 0) { - fprintf(stderr, "wrmsr to register %#x(%#lx) on vcpu %d\n", - vme->u.msr.code, vme->u.msr.wval, vcpu_id(vcpu)); - if (get_config_bool("x86.strictmsr")) { - vm_inject_gp(vcpu); - return (VMEXIT_CONTINUE); - } - } - return (VMEXIT_CONTINUE); -} - -#define DEBUG_EPT_MISCONFIG -#ifdef DEBUG_EPT_MISCONFIG -#define VMCS_GUEST_PHYSICAL_ADDRESS 0x00002400 - -static uint64_t ept_misconfig_gpa, ept_misconfig_pte[4]; -static int ept_misconfig_ptenum; -#endif - -static const char * -vmexit_vmx_desc(uint32_t exit_reason) -{ - - if (exit_reason >= nitems(vmx_exit_reason_desc) || - vmx_exit_reason_desc[exit_reason] == NULL) - return ("Unknown"); - return (vmx_exit_reason_desc[exit_reason]); -} - -static int -vmexit_vmx(struct vmctx *ctx, struct vcpu *vcpu, struct vm_run *vmrun) -{ - struct vm_exit *vme; - - vme = vmrun->vm_exit; - - fprintf(stderr, "vm exit[%d]\n", vcpu_id(vcpu)); - fprintf(stderr, "\treason\t\tVMX\n"); - fprintf(stderr, "\trip\t\t0x%016lx\n", vme->rip); - fprintf(stderr, "\tinst_length\t%d\n", vme->inst_length); - fprintf(stderr, "\tstatus\t\t%d\n", vme->u.vmx.status); - fprintf(stderr, "\texit_reason\t%u (%s)\n", vme->u.vmx.exit_reason, - vmexit_vmx_desc(vme->u.vmx.exit_reason)); - fprintf(stderr, "\tqualification\t0x%016lx\n", - vme->u.vmx.exit_qualification); - fprintf(stderr, "\tinst_type\t\t%d\n", vme->u.vmx.inst_type); - fprintf(stderr, "\tinst_error\t\t%d\n", vme->u.vmx.inst_error); -#ifdef DEBUG_EPT_MISCONFIG - if (vme->u.vmx.exit_reason == EXIT_REASON_EPT_MISCONFIG) { - vm_get_register(vcpu, - VMCS_IDENT(VMCS_GUEST_PHYSICAL_ADDRESS), - &ept_misconfig_gpa); - vm_get_gpa_pmap(ctx, ept_misconfig_gpa, ept_misconfig_pte, - &ept_misconfig_ptenum); - fprintf(stderr, "\tEPT misconfiguration:\n"); - fprintf(stderr, "\t\tGPA: %#lx\n", ept_misconfig_gpa); - fprintf(stderr, "\t\tPTE(%d): %#lx %#lx %#lx %#lx\n", - ept_misconfig_ptenum, ept_misconfig_pte[0], - ept_misconfig_pte[1], ept_misconfig_pte[2], - ept_misconfig_pte[3]); - } -#endif /* DEBUG_EPT_MISCONFIG */ - return (VMEXIT_ABORT); -} - -static int -vmexit_svm(struct vmctx *ctx __unused, struct vcpu *vcpu, struct vm_run *vmrun) -{ - struct vm_exit *vme; - - vme = vmrun->vm_exit; - - fprintf(stderr, "vm exit[%d]\n", vcpu_id(vcpu)); - fprintf(stderr, "\treason\t\tSVM\n"); - fprintf(stderr, "\trip\t\t0x%016lx\n", vme->rip); - fprintf(stderr, "\tinst_length\t%d\n", vme->inst_length); - fprintf(stderr, "\texitcode\t%#lx\n", vme->u.svm.exitcode); - fprintf(stderr, "\texitinfo1\t%#lx\n", vme->u.svm.exitinfo1); - fprintf(stderr, "\texitinfo2\t%#lx\n", vme->u.svm.exitinfo2); - return (VMEXIT_ABORT); -} - -static int -vmexit_bogus(struct vmctx *ctx __unused, struct vcpu *vcpu __unused, - struct vm_run *vmrun) -{ - assert(vmrun->vm_exit->inst_length == 0); - - return (VMEXIT_CONTINUE); -} - -static int -vmexit_reqidle(struct vmctx *ctx __unused, struct vcpu *vcpu __unused, - struct vm_run *vmrun) -{ - assert(vmrun->vm_exit->inst_length == 0); - - return (VMEXIT_CONTINUE); -} - -static int -vmexit_hlt(struct vmctx *ctx __unused, struct vcpu *vcpu __unused, - struct vm_run *vmrun __unused) -{ - /* - * Just continue execution with the next instruction. We use - * the HLT VM exit as a way to be friendly with the host - * scheduler. - */ - return (VMEXIT_CONTINUE); -} - -static int -vmexit_pause(struct vmctx *ctx __unused, struct vcpu *vcpu __unused, - struct vm_run *vmrun __unused) -{ - return (VMEXIT_CONTINUE); -} - -static int -vmexit_mtrap(struct vmctx *ctx __unused, struct vcpu *vcpu, - struct vm_run *vmrun) -{ - assert(vmrun->vm_exit->inst_length == 0); - -#ifdef BHYVE_SNAPSHOT - checkpoint_cpu_suspend(vcpu_id(vcpu)); -#endif - gdb_cpu_mtrap(vcpu); -#ifdef BHYVE_SNAPSHOT - checkpoint_cpu_resume(vcpu_id(vcpu)); -#endif - - return (VMEXIT_CONTINUE); -} - -static int -vmexit_inst_emul(struct vmctx *ctx __unused, struct vcpu *vcpu, - struct vm_run *vmrun) -{ - struct vm_exit *vme; - struct vie *vie; - int err, i, cs_d; - enum vm_cpu_mode mode; - - vme = vmrun->vm_exit; - - vie = &vme->u.inst_emul.vie; - if (!vie->decoded) { - /* - * Attempt to decode in userspace as a fallback. This allows - * updating instruction decode in bhyve without rebooting the - * kernel (rapid prototyping), albeit with much slower - * emulation. - */ - vie_restart(vie); - mode = vme->u.inst_emul.paging.cpu_mode; - cs_d = vme->u.inst_emul.cs_d; - if (vmm_decode_instruction(mode, cs_d, vie) != 0) - goto fail; - if (vm_set_register(vcpu, VM_REG_GUEST_RIP, - vme->rip + vie->num_processed) != 0) - goto fail; - } - - err = emulate_mem(vcpu, vme->u.inst_emul.gpa, vie, - &vme->u.inst_emul.paging); - if (err) { - if (err == ESRCH) { - EPRINTLN("Unhandled memory access to 0x%lx\n", - vme->u.inst_emul.gpa); - } - goto fail; - } - - return (VMEXIT_CONTINUE); - -fail: - fprintf(stderr, "Failed to emulate instruction sequence [ "); - for (i = 0; i < vie->num_valid; i++) - fprintf(stderr, "%02x", vie->inst[i]); - FPRINTLN(stderr, " ] at 0x%lx", vme->rip); - return (VMEXIT_ABORT); -} - -static int -vmexit_suspend(struct vmctx *ctx, struct vcpu *vcpu, struct vm_run *vmrun) -{ - struct vm_exit *vme; - enum vm_suspend_how how; - int vcpuid = vcpu_id(vcpu); - - vme = vmrun->vm_exit; - - how = vme->u.suspended.how; - - fbsdrun_deletecpu(vcpuid); - - switch (how) { - case VM_SUSPEND_RESET: - exit(0); - case VM_SUSPEND_POWEROFF: - if (get_config_bool_default("destroy_on_poweroff", false)) - vm_destroy(ctx); - exit(1); - case VM_SUSPEND_HALT: - exit(2); - case VM_SUSPEND_TRIPLEFAULT: - exit(3); - default: - fprintf(stderr, "vmexit_suspend: invalid reason %d\n", how); - exit(100); - } - return (0); /* NOTREACHED */ -} - -static int -vmexit_debug(struct vmctx *ctx __unused, struct vcpu *vcpu, - struct vm_run *vmrun __unused) -{ - -#ifdef BHYVE_SNAPSHOT - checkpoint_cpu_suspend(vcpu_id(vcpu)); -#endif - gdb_cpu_suspend(vcpu); -#ifdef BHYVE_SNAPSHOT - checkpoint_cpu_resume(vcpu_id(vcpu)); -#endif - /* - * XXX-MJ sleep for a short period to avoid chewing up the CPU in the - * window between activation of the vCPU thread and the STARTUP IPI. - */ - usleep(1000); - return (VMEXIT_CONTINUE); -} - -static int -vmexit_breakpoint(struct vmctx *ctx __unused, struct vcpu *vcpu, - struct vm_run *vmrun) -{ - gdb_cpu_breakpoint(vcpu, vmrun->vm_exit); - return (VMEXIT_CONTINUE); -} - -static int -vmexit_ipi(struct vmctx *ctx __unused, struct vcpu *vcpu __unused, - struct vm_run *vmrun) +int +fbsdrun_suspendcpu(int vcpuid) { - struct vm_exit *vme; - cpuset_t *dmask; - int error = -1; - int i; - - dmask = vmrun->cpuset; - vme = vmrun->vm_exit; - - switch (vme->u.ipi.mode) { - case APIC_DELMODE_INIT: - CPU_FOREACH_ISSET(i, dmask) { - error = vm_suspend_cpu(vcpu_info[i].vcpu); - if (error) { - warnx("%s: failed to suspend cpu %d\n", - __func__, i); - break; - } - } - break; - case APIC_DELMODE_STARTUP: - CPU_FOREACH_ISSET(i, dmask) { - spinup_ap(vcpu_info[i].vcpu, - vme->u.ipi.vector << PAGE_SHIFT); - } - error = 0; - break; - default: - break; - } - - return (error); + return (vm_suspend_cpu(vcpu_info[vcpuid].vcpu)); } -int vmexit_task_switch(struct vmctx *, struct vcpu *, struct vm_run *); - -static const vmexit_handler_t handler[VM_EXITCODE_MAX] = { - [VM_EXITCODE_INOUT] = vmexit_inout, - [VM_EXITCODE_INOUT_STR] = vmexit_inout, - [VM_EXITCODE_VMX] = vmexit_vmx, - [VM_EXITCODE_SVM] = vmexit_svm, - [VM_EXITCODE_BOGUS] = vmexit_bogus, - [VM_EXITCODE_REQIDLE] = vmexit_reqidle, - [VM_EXITCODE_RDMSR] = vmexit_rdmsr, - [VM_EXITCODE_WRMSR] = vmexit_wrmsr, - [VM_EXITCODE_MTRAP] = vmexit_mtrap, - [VM_EXITCODE_INST_EMUL] = vmexit_inst_emul, - [VM_EXITCODE_SUSPENDED] = vmexit_suspend, - [VM_EXITCODE_TASK_SWITCH] = vmexit_task_switch, - [VM_EXITCODE_DEBUG] = vmexit_debug, - [VM_EXITCODE_BPT] = vmexit_breakpoint, - [VM_EXITCODE_IPI] = vmexit_ipi, - [VM_EXITCODE_HLT] = vmexit_hlt, - [VM_EXITCODE_PAUSE] = vmexit_pause, -}; - static void vm_loop(struct vmctx *ctx, struct vcpu *vcpu) { @@ -976,13 +540,13 @@ break; exitcode = vme.exitcode; - if (exitcode >= VM_EXITCODE_MAX || handler[exitcode] == NULL) { - fprintf(stderr, "vm_loop: unexpected exitcode 0x%x\n", - exitcode); + if (exitcode >= VM_EXITCODE_MAX || + vmexit_handlers[exitcode] == NULL) { + warnx("vm_loop: unexpected exitcode 0x%x", exitcode); exit(4); } - rc = (*handler[exitcode])(ctx, vcpu, &vmrun); + rc = (*vmexit_handlers[exitcode])(ctx, vcpu, &vmrun); switch (rc) { case VMEXIT_CONTINUE: diff --git a/usr.sbin/bhyve/bhyverun.h b/usr.sbin/bhyve/vmexit.h copy from usr.sbin/bhyve/bhyverun.h copy to usr.sbin/bhyve/vmexit.h --- a/usr.sbin/bhyve/bhyverun.h +++ b/usr.sbin/bhyve/vmexit.h @@ -24,28 +24,11 @@ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. - * - * $FreeBSD$ */ -#ifndef _FBSDRUN_H_ -#define _FBSDRUN_H_ - -#define VMEXIT_CONTINUE (0) -#define VMEXIT_ABORT (-1) - -extern int guest_ncpus; -extern uint16_t cpu_cores, cpu_sockets, cpu_threads; - -struct vcpu; -struct vmctx; -struct vm_run; - -void *paddr_guest2host(struct vmctx *ctx, uintptr_t addr, size_t len); -#ifdef BHYVE_SNAPSHOT -uintptr_t paddr_host2guest(struct vmctx *ctx, void *addr); -#endif +#ifndef _VMEXIT_H_ +#define _VMEXIT_H_ -int fbsdrun_virtio_msix(void); +extern const vmexit_handler_t vmexit_handlers[VM_EXITCODE_MAX]; -#endif +#endif /* !_VMEXIT_H_ */