Index: head/usr.sbin/bhyve/Makefile =================================================================== --- head/usr.sbin/bhyve/Makefile (revision 288521) +++ head/usr.sbin/bhyve/Makefile (revision 288522) @@ -1,51 +1,52 @@ # # $FreeBSD$ # PROG= bhyve DEBUG_FLAGS= -g -O0 MAN= bhyve.8 SRCS= \ atkbdc.c \ acpi.c \ bhyverun.c \ block_if.c \ bootrom.c \ consport.c \ dbgport.c \ + fwctl.c \ inout.c \ ioapic.c \ mem.c \ mevent.c \ mptbl.c \ pci_ahci.c \ pci_emul.c \ pci_hostbridge.c \ pci_irq.c \ pci_lpc.c \ pci_passthru.c \ pci_virtio_block.c \ pci_virtio_net.c \ pci_virtio_rnd.c \ pci_uart.c \ pm.c \ post.c \ rtc.c \ smbiostbl.c \ task_switch.c \ uart_emul.c \ virtio.c \ xmsr.c \ spinup_ap.c .PATH: ${.CURDIR}/../../sys/amd64/vmm SRCS+= vmm_instruction_emul.c LIBADD= vmmapi md pthread WARNS?= 2 .include Index: head/usr.sbin/bhyve/bhyverun.c =================================================================== --- head/usr.sbin/bhyve/bhyverun.c (revision 288521) +++ head/usr.sbin/bhyve/bhyverun.c (revision 288522) @@ -1,969 +1,973 @@ /*- * Copyright (c) 2011 NetApp, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "bhyverun.h" #include "acpi.h" #include "inout.h" #include "dbgport.h" +#include "fwctl.h" #include "ioapic.h" #include "mem.h" #include "mevent.h" #include "mptbl.h" #include "pci_emul.h" #include "pci_irq.h" #include "pci_lpc.h" #include "smbiostbl.h" #include "xmsr.h" #include "spinup_ap.h" #include "rtc.h" #define GUEST_NIO_PORT 0x488 /* guest upcalls via i/o port */ #define MB (1024UL * 1024) #define GB (1024UL * MB) typedef int (*vmexit_handler_t)(struct vmctx *, struct vm_exit *, int *vcpu); extern int vmexit_task_switch(struct vmctx *, struct vm_exit *, int *vcpu); char *vmname; int guest_ncpus; char *guest_uuid_str; static int guest_vmexit_on_hlt, guest_vmexit_on_pause; static int virtio_msix = 1; static int x2apic_mode = 0; /* default is xAPIC */ static int strictio; static int strictmsr = 1; static int acpi; static char *progname; static const int BSP = 0; static cpuset_t cpumask; static void vm_loop(struct vmctx *ctx, int vcpu, uint64_t rip); static struct vm_exit vmexit[VM_MAXCPU]; struct bhyvestats { uint64_t vmexit_bogus; uint64_t vmexit_reqidle; uint64_t vmexit_hlt; uint64_t vmexit_pause; uint64_t vmexit_mtrap; uint64_t vmexit_inst_emul; uint64_t cpu_switch_rotate; uint64_t cpu_switch_direct; } stats; struct mt_vmm_info { pthread_t mt_thr; struct vmctx *mt_ctx; int mt_vcpu; } mt_vmm_info[VM_MAXCPU]; static cpuset_t *vcpumap[VM_MAXCPU] = { NULL }; static void usage(int code) { fprintf(stderr, "Usage: %s [-abehuwxACHPSWY] [-c vcpus] [-g ] [-l ]\n" " %*s [-m mem] [-p vcpu:hostcpu] [-s ] [-U uuid] \n" " -a: local apic is in xAPIC mode (deprecated)\n" " -A: create ACPI tables\n" " -c: # cpus (default 1)\n" " -C: include guest memory in core file\n" " -e: exit on unhandled I/O access\n" " -g: gdb port\n" " -h: help\n" " -H: vmexit from the guest on hlt\n" " -l: LPC device configuration\n" " -m: memory size in MB\n" " -p: pin 'vcpu' to 'hostcpu'\n" " -P: vmexit from the guest on pause\n" " -s: PCI slot config\n" " -S: guest memory cannot be swapped\n" " -u: RTC keeps UTC time\n" " -U: uuid\n" " -w: ignore unimplemented MSRs\n" " -W: force virtio to use single-vector MSI\n" " -x: local apic is in x2APIC mode\n" " -Y: disable MPtable generation\n", progname, (int)strlen(progname), ""); exit(code); } static int pincpu_parse(const char *opt) { int vcpu, pcpu; if (sscanf(opt, "%d:%d", &vcpu, &pcpu) != 2) { fprintf(stderr, "invalid format: %s\n", opt); return (-1); } if (vcpu < 0 || vcpu >= VM_MAXCPU) { fprintf(stderr, "vcpu '%d' outside valid range from 0 to %d\n", vcpu, VM_MAXCPU - 1); return (-1); } if (pcpu < 0 || pcpu >= CPU_SETSIZE) { fprintf(stderr, "hostcpu '%d' outside valid range from " "0 to %d\n", pcpu, CPU_SETSIZE - 1); return (-1); } if (vcpumap[vcpu] == NULL) { if ((vcpumap[vcpu] = malloc(sizeof(cpuset_t))) == NULL) { perror("malloc"); return (-1); } CPU_ZERO(vcpumap[vcpu]); } CPU_SET(pcpu, vcpumap[vcpu]); return (0); } void vm_inject_fault(void *arg, int vcpu, int vector, int errcode_valid, int errcode) { struct vmctx *ctx; int error, restart_instruction; ctx = arg; restart_instruction = 1; error = vm_inject_exception(ctx, vcpu, vector, errcode_valid, errcode, restart_instruction); assert(error == 0); } void * paddr_guest2host(struct vmctx *ctx, uintptr_t gaddr, size_t len) { return (vm_map_gpa(ctx, gaddr, len)); } int fbsdrun_vmexit_on_pause(void) { return (guest_vmexit_on_pause); } int fbsdrun_vmexit_on_hlt(void) { return (guest_vmexit_on_hlt); } int fbsdrun_virtio_msix(void) { return (virtio_msix); } static void * fbsdrun_start_thread(void *param) { char tname[MAXCOMLEN + 1]; struct mt_vmm_info *mtp; int vcpu; mtp = param; vcpu = mtp->mt_vcpu; snprintf(tname, sizeof(tname), "vcpu %d", vcpu); pthread_set_name_np(mtp->mt_thr, tname); vm_loop(mtp->mt_ctx, vcpu, vmexit[vcpu].rip); /* not reached */ exit(1); return (NULL); } void fbsdrun_addcpu(struct vmctx *ctx, int fromcpu, int newcpu, uint64_t rip) { int error; assert(fromcpu == BSP); /* * The 'newcpu' must be activated in the context of 'fromcpu'. If * vm_activate_cpu() is delayed until newcpu's pthread starts running * then vmm.ko is out-of-sync with bhyve and this can create a race * with vm_suspend(). */ error = vm_activate_cpu(ctx, newcpu); assert(error == 0); CPU_SET_ATOMIC(newcpu, &cpumask); /* * Set up the vmexit struct to allow execution to start * at the given RIP */ vmexit[newcpu].rip = rip; vmexit[newcpu].inst_length = 0; mt_vmm_info[newcpu].mt_ctx = ctx; mt_vmm_info[newcpu].mt_vcpu = newcpu; error = pthread_create(&mt_vmm_info[newcpu].mt_thr, NULL, fbsdrun_start_thread, &mt_vmm_info[newcpu]); assert(error == 0); } static int fbsdrun_deletecpu(struct vmctx *ctx, int vcpu) { if (!CPU_ISSET(vcpu, &cpumask)) { fprintf(stderr, "Attempting to delete unknown cpu %d\n", vcpu); exit(1); } CPU_CLR_ATOMIC(vcpu, &cpumask); return (CPU_EMPTY(&cpumask)); } static int vmexit_handle_notify(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu, uint32_t eax) { #if BHYVE_DEBUG /* * put guest-driven debug here */ #endif return (VMEXIT_CONTINUE); } static int vmexit_inout(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu) { int error; int bytes, port, in, out, string; int vcpu; vcpu = *pvcpu; port = vme->u.inout.port; bytes = vme->u.inout.bytes; string = vme->u.inout.string; in = vme->u.inout.in; out = !in; /* Extra-special case of host notifications */ if (out && port == GUEST_NIO_PORT) { error = vmexit_handle_notify(ctx, vme, pvcpu, vme->u.inout.eax); return (error); } error = emulate_inout(ctx, vcpu, vme, strictio); if (error) { fprintf(stderr, "Unhandled %s%c 0x%04x at 0x%lx\n", in ? "in" : "out", bytes == 1 ? 'b' : (bytes == 2 ? 'w' : 'l'), port, vmexit->rip); return (VMEXIT_ABORT); } else { return (VMEXIT_CONTINUE); } } static int vmexit_rdmsr(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu) { uint64_t val; uint32_t eax, edx; int error; val = 0; error = emulate_rdmsr(ctx, *pvcpu, vme->u.msr.code, &val); if (error != 0) { fprintf(stderr, "rdmsr to register %#x on vcpu %d\n", vme->u.msr.code, *pvcpu); if (strictmsr) { vm_inject_gp(ctx, *pvcpu); return (VMEXIT_CONTINUE); } } eax = val; error = vm_set_register(ctx, *pvcpu, VM_REG_GUEST_RAX, eax); assert(error == 0); edx = val >> 32; error = vm_set_register(ctx, *pvcpu, VM_REG_GUEST_RDX, edx); assert(error == 0); return (VMEXIT_CONTINUE); } static int vmexit_wrmsr(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu) { int error; error = emulate_wrmsr(ctx, *pvcpu, vme->u.msr.code, vme->u.msr.wval); if (error != 0) { fprintf(stderr, "wrmsr to register %#x(%#lx) on vcpu %d\n", vme->u.msr.code, vme->u.msr.wval, *pvcpu); if (strictmsr) { vm_inject_gp(ctx, *pvcpu); return (VMEXIT_CONTINUE); } } return (VMEXIT_CONTINUE); } static int vmexit_spinup_ap(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu) { int newcpu; int retval = VMEXIT_CONTINUE; newcpu = spinup_ap(ctx, *pvcpu, vme->u.spinup_ap.vcpu, vme->u.spinup_ap.rip); return (retval); } #define DEBUG_EPT_MISCONFIG #ifdef DEBUG_EPT_MISCONFIG #define EXIT_REASON_EPT_MISCONFIG 49 #define VMCS_GUEST_PHYSICAL_ADDRESS 0x00002400 #define VMCS_IDENT(x) ((x) | 0x80000000) static uint64_t ept_misconfig_gpa, ept_misconfig_pte[4]; static int ept_misconfig_ptenum; #endif static int vmexit_vmx(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) { fprintf(stderr, "vm exit[%d]\n", *pvcpu); fprintf(stderr, "\treason\t\tVMX\n"); fprintf(stderr, "\trip\t\t0x%016lx\n", vmexit->rip); fprintf(stderr, "\tinst_length\t%d\n", vmexit->inst_length); fprintf(stderr, "\tstatus\t\t%d\n", vmexit->u.vmx.status); fprintf(stderr, "\texit_reason\t%u\n", vmexit->u.vmx.exit_reason); fprintf(stderr, "\tqualification\t0x%016lx\n", vmexit->u.vmx.exit_qualification); fprintf(stderr, "\tinst_type\t\t%d\n", vmexit->u.vmx.inst_type); fprintf(stderr, "\tinst_error\t\t%d\n", vmexit->u.vmx.inst_error); #ifdef DEBUG_EPT_MISCONFIG if (vmexit->u.vmx.exit_reason == EXIT_REASON_EPT_MISCONFIG) { vm_get_register(ctx, *pvcpu, VMCS_IDENT(VMCS_GUEST_PHYSICAL_ADDRESS), &ept_misconfig_gpa); vm_get_gpa_pmap(ctx, ept_misconfig_gpa, ept_misconfig_pte, &ept_misconfig_ptenum); fprintf(stderr, "\tEPT misconfiguration:\n"); fprintf(stderr, "\t\tGPA: %#lx\n", ept_misconfig_gpa); fprintf(stderr, "\t\tPTE(%d): %#lx %#lx %#lx %#lx\n", ept_misconfig_ptenum, ept_misconfig_pte[0], ept_misconfig_pte[1], ept_misconfig_pte[2], ept_misconfig_pte[3]); } #endif /* DEBUG_EPT_MISCONFIG */ return (VMEXIT_ABORT); } static int vmexit_svm(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) { fprintf(stderr, "vm exit[%d]\n", *pvcpu); fprintf(stderr, "\treason\t\tSVM\n"); fprintf(stderr, "\trip\t\t0x%016lx\n", vmexit->rip); fprintf(stderr, "\tinst_length\t%d\n", vmexit->inst_length); fprintf(stderr, "\texitcode\t%#lx\n", vmexit->u.svm.exitcode); fprintf(stderr, "\texitinfo1\t%#lx\n", vmexit->u.svm.exitinfo1); fprintf(stderr, "\texitinfo2\t%#lx\n", vmexit->u.svm.exitinfo2); return (VMEXIT_ABORT); } static int vmexit_bogus(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) { assert(vmexit->inst_length == 0); stats.vmexit_bogus++; return (VMEXIT_CONTINUE); } static int vmexit_reqidle(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) { assert(vmexit->inst_length == 0); stats.vmexit_reqidle++; return (VMEXIT_CONTINUE); } static int vmexit_hlt(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) { stats.vmexit_hlt++; /* * Just continue execution with the next instruction. We use * the HLT VM exit as a way to be friendly with the host * scheduler. */ return (VMEXIT_CONTINUE); } static int vmexit_pause(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) { stats.vmexit_pause++; return (VMEXIT_CONTINUE); } static int vmexit_mtrap(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) { assert(vmexit->inst_length == 0); stats.vmexit_mtrap++; return (VMEXIT_CONTINUE); } static int vmexit_inst_emul(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) { int err, i; struct vie *vie; stats.vmexit_inst_emul++; vie = &vmexit->u.inst_emul.vie; err = emulate_mem(ctx, *pvcpu, vmexit->u.inst_emul.gpa, vie, &vmexit->u.inst_emul.paging); if (err) { if (err == ESRCH) { fprintf(stderr, "Unhandled memory access to 0x%lx\n", vmexit->u.inst_emul.gpa); } fprintf(stderr, "Failed to emulate instruction ["); for (i = 0; i < vie->num_valid; i++) { fprintf(stderr, "0x%02x%s", vie->inst[i], i != (vie->num_valid - 1) ? " " : ""); } fprintf(stderr, "] at 0x%lx\n", vmexit->rip); return (VMEXIT_ABORT); } return (VMEXIT_CONTINUE); } static pthread_mutex_t resetcpu_mtx = PTHREAD_MUTEX_INITIALIZER; static pthread_cond_t resetcpu_cond = PTHREAD_COND_INITIALIZER; static int vmexit_suspend(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) { enum vm_suspend_how how; how = vmexit->u.suspended.how; fbsdrun_deletecpu(ctx, *pvcpu); if (*pvcpu != BSP) { pthread_mutex_lock(&resetcpu_mtx); pthread_cond_signal(&resetcpu_cond); pthread_mutex_unlock(&resetcpu_mtx); pthread_exit(NULL); } pthread_mutex_lock(&resetcpu_mtx); while (!CPU_EMPTY(&cpumask)) { pthread_cond_wait(&resetcpu_cond, &resetcpu_mtx); } pthread_mutex_unlock(&resetcpu_mtx); switch (how) { case VM_SUSPEND_RESET: exit(0); case VM_SUSPEND_POWEROFF: exit(1); case VM_SUSPEND_HALT: exit(2); case VM_SUSPEND_TRIPLEFAULT: exit(3); default: fprintf(stderr, "vmexit_suspend: invalid reason %d\n", how); exit(100); } return (0); /* NOTREACHED */ } static vmexit_handler_t handler[VM_EXITCODE_MAX] = { [VM_EXITCODE_INOUT] = vmexit_inout, [VM_EXITCODE_INOUT_STR] = vmexit_inout, [VM_EXITCODE_VMX] = vmexit_vmx, [VM_EXITCODE_SVM] = vmexit_svm, [VM_EXITCODE_BOGUS] = vmexit_bogus, [VM_EXITCODE_REQIDLE] = vmexit_reqidle, [VM_EXITCODE_RDMSR] = vmexit_rdmsr, [VM_EXITCODE_WRMSR] = vmexit_wrmsr, [VM_EXITCODE_MTRAP] = vmexit_mtrap, [VM_EXITCODE_INST_EMUL] = vmexit_inst_emul, [VM_EXITCODE_SPINUP_AP] = vmexit_spinup_ap, [VM_EXITCODE_SUSPENDED] = vmexit_suspend, [VM_EXITCODE_TASK_SWITCH] = vmexit_task_switch, }; static void vm_loop(struct vmctx *ctx, int vcpu, uint64_t startrip) { int error, rc, prevcpu; enum vm_exitcode exitcode; cpuset_t active_cpus; if (vcpumap[vcpu] != NULL) { error = pthread_setaffinity_np(pthread_self(), sizeof(cpuset_t), vcpumap[vcpu]); assert(error == 0); } error = vm_active_cpus(ctx, &active_cpus); assert(CPU_ISSET(vcpu, &active_cpus)); error = vm_set_register(ctx, vcpu, VM_REG_GUEST_RIP, startrip); assert(error == 0); while (1) { error = vm_run(ctx, vcpu, &vmexit[vcpu]); if (error != 0) break; prevcpu = vcpu; exitcode = vmexit[vcpu].exitcode; if (exitcode >= VM_EXITCODE_MAX || handler[exitcode] == NULL) { fprintf(stderr, "vm_loop: unexpected exitcode 0x%x\n", exitcode); exit(1); } rc = (*handler[exitcode])(ctx, &vmexit[vcpu], &vcpu); switch (rc) { case VMEXIT_CONTINUE: break; case VMEXIT_ABORT: abort(); default: exit(1); } } fprintf(stderr, "vm_run error %d, errno %d\n", error, errno); } static int num_vcpus_allowed(struct vmctx *ctx) { int tmp, error; error = vm_get_capability(ctx, BSP, VM_CAP_UNRESTRICTED_GUEST, &tmp); /* * The guest is allowed to spinup more than one processor only if the * UNRESTRICTED_GUEST capability is available. */ if (error == 0) return (VM_MAXCPU); else return (1); } void fbsdrun_set_capabilities(struct vmctx *ctx, int cpu) { int err, tmp; if (fbsdrun_vmexit_on_hlt()) { err = vm_get_capability(ctx, cpu, VM_CAP_HALT_EXIT, &tmp); if (err < 0) { fprintf(stderr, "VM exit on HLT not supported\n"); exit(1); } vm_set_capability(ctx, cpu, VM_CAP_HALT_EXIT, 1); if (cpu == BSP) handler[VM_EXITCODE_HLT] = vmexit_hlt; } if (fbsdrun_vmexit_on_pause()) { /* * pause exit support required for this mode */ err = vm_get_capability(ctx, cpu, VM_CAP_PAUSE_EXIT, &tmp); if (err < 0) { fprintf(stderr, "SMP mux requested, no pause support\n"); exit(1); } vm_set_capability(ctx, cpu, VM_CAP_PAUSE_EXIT, 1); if (cpu == BSP) handler[VM_EXITCODE_PAUSE] = vmexit_pause; } if (x2apic_mode) err = vm_set_x2apic_state(ctx, cpu, X2APIC_ENABLED); else err = vm_set_x2apic_state(ctx, cpu, X2APIC_DISABLED); if (err) { fprintf(stderr, "Unable to set x2apic state (%d)\n", err); exit(1); } vm_set_capability(ctx, cpu, VM_CAP_ENABLE_INVPCID, 1); } static struct vmctx * do_open(const char *vmname) { struct vmctx *ctx; int error; bool reinit, romboot; reinit = romboot = false; if (lpc_bootrom()) romboot = true; error = vm_create(vmname); if (error) { if (errno == EEXIST) { if (romboot) { reinit = true; } else { /* * The virtual machine has been setup by the * userspace bootloader. */ } } else { perror("vm_create"); exit(1); } } else { if (!romboot) { /* * If the virtual machine was just created then a * bootrom must be configured to boot it. */ fprintf(stderr, "virtual machine cannot be booted\n"); exit(1); } } ctx = vm_open(vmname); if (ctx == NULL) { perror("vm_open"); exit(1); } if (reinit) { error = vm_reinit(ctx); if (error) { perror("vm_reinit"); exit(1); } } return (ctx); } int main(int argc, char *argv[]) { int c, error, gdb_port, err, bvmcons; int max_vcpus, mptgen, memflags; int rtc_localtime; struct vmctx *ctx; uint64_t rip; size_t memsize; char *optstr; bvmcons = 0; progname = basename(argv[0]); gdb_port = 0; guest_ncpus = 1; memsize = 256 * MB; mptgen = 1; rtc_localtime = 1; memflags = 0; optstr = "abehuwxACHIPSWYp:g:c:s:m:l:U:"; while ((c = getopt(argc, argv, optstr)) != -1) { switch (c) { case 'a': x2apic_mode = 0; break; case 'A': acpi = 1; break; case 'b': bvmcons = 1; break; case 'p': if (pincpu_parse(optarg) != 0) { errx(EX_USAGE, "invalid vcpu pinning " "configuration '%s'", optarg); } break; case 'c': guest_ncpus = atoi(optarg); break; case 'C': memflags |= VM_MEM_F_INCORE; break; case 'g': gdb_port = atoi(optarg); break; case 'l': if (lpc_device_parse(optarg) != 0) { errx(EX_USAGE, "invalid lpc device " "configuration '%s'", optarg); } break; case 's': if (pci_parse_slot(optarg) != 0) exit(1); else break; case 'S': memflags |= VM_MEM_F_WIRED; break; case 'm': error = vm_parse_memsize(optarg, &memsize); if (error) errx(EX_USAGE, "invalid memsize '%s'", optarg); break; case 'H': guest_vmexit_on_hlt = 1; break; case 'I': /* * The "-I" option was used to add an ioapic to the * virtual machine. * * An ioapic is now provided unconditionally for each * virtual machine and this option is now deprecated. */ break; case 'P': guest_vmexit_on_pause = 1; break; case 'e': strictio = 1; break; case 'u': rtc_localtime = 0; break; case 'U': guest_uuid_str = optarg; break; case 'w': strictmsr = 0; break; case 'W': virtio_msix = 0; break; case 'x': x2apic_mode = 1; break; case 'Y': mptgen = 0; break; case 'h': usage(0); default: usage(1); } } argc -= optind; argv += optind; if (argc != 1) usage(1); vmname = argv[0]; ctx = do_open(vmname); if (guest_ncpus < 1) { fprintf(stderr, "Invalid guest vCPUs (%d)\n", guest_ncpus); exit(1); } max_vcpus = num_vcpus_allowed(ctx); if (guest_ncpus > max_vcpus) { fprintf(stderr, "%d vCPUs requested but only %d available\n", guest_ncpus, max_vcpus); exit(1); } fbsdrun_set_capabilities(ctx, BSP); vm_set_memflags(ctx, memflags); err = vm_setup_memory(ctx, memsize, VM_MMAP_ALL); if (err) { fprintf(stderr, "Unable to setup memory (%d)\n", errno); exit(1); } error = init_msr(); if (error) { fprintf(stderr, "init_msr error %d", error); exit(1); } init_mem(); init_inout(); pci_irq_init(ctx); ioapic_init(ctx); rtc_init(ctx, rtc_localtime); sci_init(ctx); /* * Exit if a device emulation finds an error in it's initilization */ if (init_pci(ctx) != 0) exit(1); if (gdb_port != 0) init_dbgport(gdb_port); if (bvmcons) init_bvmcons(); if (lpc_bootrom()) { if (vm_set_capability(ctx, BSP, VM_CAP_UNRESTRICTED_GUEST, 1)) { fprintf(stderr, "ROM boot failed: unrestricted guest " "capability not available\n"); exit(1); } error = vcpu_reset(ctx, BSP); assert(error == 0); } error = vm_get_register(ctx, BSP, VM_REG_GUEST_RIP, &rip); assert(error == 0); /* * build the guest tables, MP etc. */ if (mptgen) { error = mptable_build(ctx, guest_ncpus); if (error) exit(1); } error = smbios_build(ctx); assert(error == 0); if (acpi) { error = acpi_build(ctx, guest_ncpus); assert(error == 0); } + + if (lpc_bootrom()) + fwctl_init(); /* * Change the proc title to include the VM name. */ setproctitle("%s", vmname); /* * Add CPU 0 */ fbsdrun_addcpu(ctx, BSP, BSP, rip); /* * Head off to the main event dispatch loop */ mevent_dispatch(); exit(1); } Index: head/usr.sbin/bhyve/fwctl.c =================================================================== --- head/usr.sbin/bhyve/fwctl.c (nonexistent) +++ head/usr.sbin/bhyve/fwctl.c (revision 288522) @@ -0,0 +1,549 @@ +/*- + * Copyright (c) 2015 Peter Grehan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +/* + * Guest firmware interface. Uses i/o ports x510/x511 as Qemu does, + * but with a request/response messaging protocol. + */ +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include + +#include +#include +#include +#include + +#include "bhyverun.h" +#include "inout.h" +#include "fwctl.h" + +/* + * Messaging protocol base operations + */ +#define OP_NULL 1 +#define OP_ECHO 2 +#define OP_GET 3 +#define OP_GET_LEN 4 +#define OP_SET 5 +#define OP_MAX OP_SET + +/* I/O ports */ +#define FWCTL_OUT 0x510 +#define FWCTL_IN 0x511 + +/* + * Back-end state-machine + */ +enum state { + DORMANT, + IDENT_WAIT, + IDENT_SEND, + REQ, + RESP +} be_state = DORMANT; + +static uint8_t sig[] = { 'B', 'H', 'Y', 'V' }; +static u_int ident_idx; + +struct op_info { + int op; + int (*op_start)(int len); + void (*op_data)(uint32_t data, int len); + int (*op_result)(struct iovec **data); + void (*op_done)(struct iovec *data); +}; +static struct op_info *ops[OP_MAX+1]; + +/* Return 0-padded uint32_t */ +static uint32_t +fwctl_send_rest(uint32_t *data, size_t len) +{ + union { + uint8_t c[4]; + uint32_t w; + } u; + uint8_t *cdata; + int i; + + cdata = (uint8_t *) data; + u.w = 0; + + for (i = 0, u.w = 0; i < len; i++) + u.c[i] = *cdata++; + + return (u.w); +} + +/* + * error op dummy proto - drop all data sent and return an error +*/ +static int errop_code; + +static void +errop_set(int err) +{ + + errop_code = err; +} + +static int +errop_start(int len) +{ + errop_code = ENOENT; + + /* accept any length */ + return (errop_code); +} + +static void +errop_data(uint32_t data, int len) +{ + + /* ignore */ +} + +static int +errop_result(struct iovec **data) +{ + + /* no data to send back; always successful */ + *data = NULL; + return (errop_code); +} + +static void +errop_done(struct iovec *data) +{ + + /* assert data is NULL */ +} + +static struct op_info errop_info = { + .op_start = errop_start, + .op_data = errop_data, + .op_result = errop_result, + .op_done = errop_done +}; + +/* OID search */ +SET_DECLARE(ctl_set, struct ctl); + +CTL_NODE("hw.ncpu", &guest_ncpus, sizeof(guest_ncpus)); + +static struct ctl * +ctl_locate(const char *str, int maxlen) +{ + struct ctl *cp, **cpp; + + SET_FOREACH(cpp, ctl_set) { + cp = *cpp; + if (!strncmp(str, cp->c_oid, maxlen)) + return (cp); + } + + return (NULL); +} + +/* uefi-sysctl get-len */ +#define FGET_STRSZ 80 +static struct iovec fget_biov[2]; +static char fget_str[FGET_STRSZ]; +static struct { + size_t f_sz; + uint32_t f_data[1024]; +} fget_buf; +static int fget_cnt; +static size_t fget_size; + +static int +fget_start(int len) +{ + + if (len > FGET_STRSZ) + return(E2BIG); + + fget_cnt = 0; + + return (0); +} + +static void +fget_data(uint32_t data, int len) +{ + + *((uint32_t *) &fget_str[fget_cnt]) = data; + fget_cnt += sizeof(uint32_t); +} + +static int +fget_result(struct iovec **data, int val) +{ + struct ctl *cp; + int err; + + err = 0; + + /* Locate the OID */ + cp = ctl_locate(fget_str, fget_cnt); + if (cp == NULL) { + *data = NULL; + err = ENOENT; + } else { + if (val) { + /* For now, copy the len/data into a buffer */ + memset(&fget_buf, 0, sizeof(fget_buf)); + fget_buf.f_sz = cp->c_len; + memcpy(fget_buf.f_data, cp->c_data, cp->c_len); + fget_biov[0].iov_base = (char *)&fget_buf; + fget_biov[0].iov_len = sizeof(fget_buf.f_sz) + + cp->c_len; + } else { + fget_size = cp->c_len; + fget_biov[0].iov_base = (char *)&fget_size; + fget_biov[0].iov_len = sizeof(fget_size); + } + + fget_biov[1].iov_base = NULL; + fget_biov[1].iov_len = 0; + *data = fget_biov; + } + + return (err); +} + +static void +fget_done(struct iovec *data) +{ + + /* nothing needs to be freed */ +} + +static int +fget_len_result(struct iovec **data) +{ + return (fget_result(data, 0)); +} + +static int +fget_val_result(struct iovec **data) +{ + return (fget_result(data, 1)); +} + +static struct op_info fgetlen_info = { + .op_start = fget_start, + .op_data = fget_data, + .op_result = fget_len_result, + .op_done = fget_done +}; + +static struct op_info fgetval_info = { + .op_start = fget_start, + .op_data = fget_data, + .op_result = fget_val_result, + .op_done = fget_done +}; + +static struct req_info { + int req_error; + u_int req_count; + uint32_t req_size; + uint32_t req_type; + uint32_t req_txid; + struct op_info *req_op; + int resp_error; + int resp_count; + int resp_size; + int resp_off; + struct iovec *resp_biov; +} rinfo; + +static void +fwctl_response_done(void) +{ + + (*rinfo.req_op->op_done)(rinfo.resp_biov); + + /* reinit the req data struct */ + memset(&rinfo, 0, sizeof(rinfo)); +} + +static void +fwctl_request_done(void) +{ + + rinfo.resp_error = (*rinfo.req_op->op_result)(&rinfo.resp_biov); + + /* XXX only a single vector supported at the moment */ + rinfo.resp_off = 0; + if (rinfo.resp_biov == NULL) { + rinfo.resp_size = 0; + } else { + rinfo.resp_size = rinfo.resp_biov[0].iov_len; + } +} + +static int +fwctl_request_start(void) +{ + int err; + + /* Data size doesn't include header */ + rinfo.req_size -= 12; + + rinfo.req_op = &errop_info; + if (rinfo.req_type <= OP_MAX && ops[rinfo.req_type] != NULL) + rinfo.req_op = ops[rinfo.req_type]; + + err = (*rinfo.req_op->op_start)(rinfo.req_size); + + if (err) { + errop_set(err); + rinfo.req_op = &errop_info; + } + + /* Catch case of zero-length message here */ + if (rinfo.req_size == 0) { + fwctl_request_done(); + return (1); + } + + return (0); +} + +static int +fwctl_request_data(uint32_t value) +{ + int remlen; + + /* Make sure remaining size is >= 0 */ + rinfo.req_size -= sizeof(uint32_t); + remlen = (rinfo.req_size > 0) ? rinfo.req_size: 0; + + (*rinfo.req_op->op_data)(value, remlen); + + if (rinfo.req_size < sizeof(uint32_t)) { + fwctl_request_done(); + return (1); + } + + return (0); +} + +static int +fwctl_request(uint32_t value) +{ + + int ret; + + ret = 0; + + switch (rinfo.req_count) { + case 0: + /* Verify size */ + if (value < 12) { + printf("msg size error"); + exit(1); + } + rinfo.req_size = value; + rinfo.req_count = 1; + break; + case 1: + rinfo.req_type = value; + rinfo.req_count++; + break; + case 2: + rinfo.req_txid = value; + rinfo.req_count++; + ret = fwctl_request_start(); + break; + default: + ret = fwctl_request_data(value); + break; + } + + return (ret); +} + +static int +fwctl_response(uint32_t *retval) +{ + uint32_t *dp; + int remlen; + + switch(rinfo.resp_count) { + case 0: + /* 4 x u32 header len + data */ + *retval = 4*sizeof(uint32_t) + + roundup(rinfo.resp_size, sizeof(uint32_t)); + rinfo.resp_count++; + break; + case 1: + *retval = rinfo.req_type; + rinfo.resp_count++; + break; + case 2: + *retval = rinfo.req_txid; + rinfo.resp_count++; + break; + case 3: + *retval = rinfo.resp_error; + rinfo.resp_count++; + break; + default: + remlen = rinfo.resp_size - rinfo.resp_off; + dp = (uint32_t *) + ((uint8_t *)rinfo.resp_biov->iov_base + rinfo.resp_off); + if (remlen >= sizeof(uint32_t)) { + *retval = *dp; + } else if (remlen > 0) { + *retval = fwctl_send_rest(dp, remlen); + } + rinfo.resp_off += sizeof(uint32_t); + break; + } + + if (rinfo.resp_count > 3 && + rinfo.resp_size - rinfo.resp_off <= 0) { + fwctl_response_done(); + return (1); + } + + return (0); +} + + +/* + * i/o port handling. + */ +static uint8_t +fwctl_inb(void) +{ + uint8_t retval; + + retval = 0xff; + + switch (be_state) { + case IDENT_SEND: + retval = sig[ident_idx++]; + if (ident_idx >= sizeof(sig)) + be_state = REQ; + break; + default: + break; + } + + return (retval); +} + +static void +fwctl_outw(uint16_t val) +{ + switch (be_state) { + case IDENT_WAIT: + if (val == 0) { + be_state = IDENT_SEND; + ident_idx = 0; + } + break; + default: + /* ignore */ + break; + } +} + +static uint32_t +fwctl_inl(void) +{ + uint32_t retval; + + switch (be_state) { + case RESP: + if (fwctl_response(&retval)) + be_state = REQ; + break; + default: + retval = 0xffffffff; + break; + } + + return (retval); +} + +static void +fwctl_outl(uint32_t val) +{ + + switch (be_state) { + case REQ: + if (fwctl_request(val)) + be_state = RESP; + default: + break; + } + +} + +static int +fwctl_handler(struct vmctx *ctx, int vcpu, int in, int port, int bytes, + uint32_t *eax, void *arg) +{ + + if (in) { + if (bytes == 1) + *eax = fwctl_inb(); + else if (bytes == 4) + *eax = fwctl_inl(); + else + *eax = 0xffff; + } else { + if (bytes == 2) + fwctl_outw(*eax); + else if (bytes == 4) + fwctl_outl(*eax); + } + + return (0); +} +INOUT_PORT(fwctl_wreg, FWCTL_OUT, IOPORT_F_INOUT, fwctl_handler); +INOUT_PORT(fwctl_rreg, FWCTL_IN, IOPORT_F_OUT, fwctl_handler); + +void +fwctl_init(void) +{ + + ops[OP_GET_LEN] = &fgetlen_info; + ops[OP_GET] = &fgetval_info; + + be_state = IDENT_WAIT; +} Property changes on: head/usr.sbin/bhyve/fwctl.c ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Added: svn:keywords ## -0,0 +1 ## +FreeBSD=%H \ No newline at end of property Added: svn:mime-type ## -0,0 +1 ## +text/plain \ No newline at end of property Index: head/usr.sbin/bhyve/fwctl.h =================================================================== --- head/usr.sbin/bhyve/fwctl.h (nonexistent) +++ head/usr.sbin/bhyve/fwctl.h (revision 288522) @@ -0,0 +1,54 @@ +/*- + * Copyright (c) 2015 Peter Grehan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _FWCTL_H_ +#define _FWCTL_H_ + +#include + +/* + * Linker set api for export of information to guest firmware via + * a sysctl-like OID interface + */ +struct ctl { + const char *c_oid; + const void *c_data; + const int c_len; +}; + +#define CTL_NODE(oid, data, len) \ + static struct ctl __CONCAT(__ctl, __LINE__) = { \ + oid, \ + (data), \ + (len), \ + }; \ + DATA_SET(ctl_set, __CONCAT(__ctl, __LINE__)) + +void fwctl_init(void); + +#endif /* _FWCTL_H_ */ Property changes on: head/usr.sbin/bhyve/fwctl.h ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Added: svn:keywords ## -0,0 +1 ## +FreeBSD=%H \ No newline at end of property Added: svn:mime-type ## -0,0 +1 ## +text/plain \ No newline at end of property