Index: head/usr.sbin/bhyve/bhyverun.c =================================================================== --- head/usr.sbin/bhyve/bhyverun.c (revision 359948) +++ head/usr.sbin/bhyve/bhyverun.c (revision 359949) @@ -1,1245 +1,1247 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2011 NetApp, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include __FBSDID("$FreeBSD$"); #include #ifndef WITHOUT_CAPSICUM #include #endif #include #include #include #include #include #ifndef WITHOUT_CAPSICUM #include #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifndef WITHOUT_CAPSICUM #include #endif #include #include "bhyverun.h" #include "acpi.h" #include "atkbdc.h" +#include "bootrom.h" #include "inout.h" #include "dbgport.h" #include "fwctl.h" #include "gdb.h" #include "ioapic.h" #include "mem.h" #include "mevent.h" #include "mptbl.h" #include "pci_emul.h" #include "pci_irq.h" #include "pci_lpc.h" #include "smbiostbl.h" #include "xmsr.h" #include "spinup_ap.h" #include "rtc.h" #define GUEST_NIO_PORT 0x488 /* guest upcalls via i/o port */ #define MB (1024UL * 1024) #define GB (1024UL * MB) static const char * const vmx_exit_reason_desc[] = { [EXIT_REASON_EXCEPTION] = "Exception or non-maskable interrupt (NMI)", [EXIT_REASON_EXT_INTR] = "External interrupt", [EXIT_REASON_TRIPLE_FAULT] = "Triple fault", [EXIT_REASON_INIT] = "INIT signal", [EXIT_REASON_SIPI] = "Start-up IPI (SIPI)", [EXIT_REASON_IO_SMI] = "I/O system-management interrupt (SMI)", [EXIT_REASON_SMI] = "Other SMI", [EXIT_REASON_INTR_WINDOW] = "Interrupt window", [EXIT_REASON_NMI_WINDOW] = "NMI window", [EXIT_REASON_TASK_SWITCH] = "Task switch", [EXIT_REASON_CPUID] = "CPUID", [EXIT_REASON_GETSEC] = "GETSEC", [EXIT_REASON_HLT] = "HLT", [EXIT_REASON_INVD] = "INVD", [EXIT_REASON_INVLPG] = "INVLPG", [EXIT_REASON_RDPMC] = "RDPMC", [EXIT_REASON_RDTSC] = "RDTSC", [EXIT_REASON_RSM] = "RSM", [EXIT_REASON_VMCALL] = "VMCALL", [EXIT_REASON_VMCLEAR] = "VMCLEAR", [EXIT_REASON_VMLAUNCH] = "VMLAUNCH", [EXIT_REASON_VMPTRLD] = "VMPTRLD", [EXIT_REASON_VMPTRST] = "VMPTRST", [EXIT_REASON_VMREAD] = "VMREAD", [EXIT_REASON_VMRESUME] = "VMRESUME", [EXIT_REASON_VMWRITE] = "VMWRITE", [EXIT_REASON_VMXOFF] = "VMXOFF", [EXIT_REASON_VMXON] = "VMXON", [EXIT_REASON_CR_ACCESS] = "Control-register accesses", [EXIT_REASON_DR_ACCESS] = "MOV DR", [EXIT_REASON_INOUT] = "I/O instruction", [EXIT_REASON_RDMSR] = "RDMSR", [EXIT_REASON_WRMSR] = "WRMSR", [EXIT_REASON_INVAL_VMCS] = "VM-entry failure due to invalid guest state", [EXIT_REASON_INVAL_MSR] = "VM-entry failure due to MSR loading", [EXIT_REASON_MWAIT] = "MWAIT", [EXIT_REASON_MTF] = "Monitor trap flag", [EXIT_REASON_MONITOR] = "MONITOR", [EXIT_REASON_PAUSE] = "PAUSE", [EXIT_REASON_MCE_DURING_ENTRY] = "VM-entry failure due to machine-check event", [EXIT_REASON_TPR] = "TPR below threshold", [EXIT_REASON_APIC_ACCESS] = "APIC access", [EXIT_REASON_VIRTUALIZED_EOI] = "Virtualized EOI", [EXIT_REASON_GDTR_IDTR] = "Access to GDTR or IDTR", [EXIT_REASON_LDTR_TR] = "Access to LDTR or TR", [EXIT_REASON_EPT_FAULT] = "EPT violation", [EXIT_REASON_EPT_MISCONFIG] = "EPT misconfiguration", [EXIT_REASON_INVEPT] = "INVEPT", [EXIT_REASON_RDTSCP] = "RDTSCP", [EXIT_REASON_VMX_PREEMPT] = "VMX-preemption timer expired", [EXIT_REASON_INVVPID] = "INVVPID", [EXIT_REASON_WBINVD] = "WBINVD", [EXIT_REASON_XSETBV] = "XSETBV", [EXIT_REASON_APIC_WRITE] = "APIC write", [EXIT_REASON_RDRAND] = "RDRAND", [EXIT_REASON_INVPCID] = "INVPCID", [EXIT_REASON_VMFUNC] = "VMFUNC", [EXIT_REASON_ENCLS] = "ENCLS", [EXIT_REASON_RDSEED] = "RDSEED", [EXIT_REASON_PM_LOG_FULL] = "Page-modification log full", [EXIT_REASON_XSAVES] = "XSAVES", [EXIT_REASON_XRSTORS] = "XRSTORS" }; typedef int (*vmexit_handler_t)(struct vmctx *, struct vm_exit *, int *vcpu); extern int vmexit_task_switch(struct vmctx *, struct vm_exit *, int *vcpu); char *vmname; int guest_ncpus; uint16_t cores, maxcpus, sockets, threads; char *guest_uuid_str; int raw_stdio = 0; static int gdb_port = 0; static int guest_vmexit_on_hlt, guest_vmexit_on_pause; static int virtio_msix = 1; static int x2apic_mode = 0; /* default is xAPIC */ static int strictio; static int strictmsr = 1; static int acpi; static char *progname; static const int BSP = 0; static cpuset_t cpumask; static void vm_loop(struct vmctx *ctx, int vcpu, uint64_t rip); static struct vm_exit vmexit[VM_MAXCPU]; struct bhyvestats { uint64_t vmexit_bogus; uint64_t vmexit_reqidle; uint64_t vmexit_hlt; uint64_t vmexit_pause; uint64_t vmexit_mtrap; uint64_t vmexit_inst_emul; uint64_t cpu_switch_rotate; uint64_t cpu_switch_direct; } stats; struct mt_vmm_info { pthread_t mt_thr; struct vmctx *mt_ctx; int mt_vcpu; } mt_vmm_info[VM_MAXCPU]; static cpuset_t *vcpumap[VM_MAXCPU] = { NULL }; static void usage(int code) { fprintf(stderr, "Usage: %s [-abehuwxACHPSWY]\n" " %*s [-c [[cpus=]numcpus][,sockets=n][,cores=n][,threads=n]]\n" " %*s [-g ] [-l ]\n" " %*s [-m mem] [-p vcpu:hostcpu] [-s ] [-U uuid] \n" " -a: local apic is in xAPIC mode (deprecated)\n" " -A: create ACPI tables\n" " -c: number of cpus and/or topology specification\n" " -C: include guest memory in core file\n" " -e: exit on unhandled I/O access\n" " -g: gdb port\n" " -h: help\n" " -H: vmexit from the guest on hlt\n" " -l: LPC device configuration\n" " -m: memory size in MB\n" " -p: pin 'vcpu' to 'hostcpu'\n" " -P: vmexit from the guest on pause\n" " -s: PCI slot config\n" " -S: guest memory cannot be swapped\n" " -u: RTC keeps UTC time\n" " -U: uuid\n" " -w: ignore unimplemented MSRs\n" " -W: force virtio to use single-vector MSI\n" " -x: local apic is in x2APIC mode\n" " -Y: disable MPtable generation\n", progname, (int)strlen(progname), "", (int)strlen(progname), "", (int)strlen(progname), ""); exit(code); } /* * XXX This parser is known to have the following issues: * 1. It accepts null key=value tokens ",,". * 2. It accepts whitespace after = and before value. * 3. Values out of range of INT are silently wrapped. * 4. It doesn't check non-final values. * 5. The apparently bogus limits of UINT16_MAX are for future expansion. * * The acceptance of a null specification ('-c ""') is by design to match the * manual page syntax specification, this results in a topology of 1 vCPU. */ static int topology_parse(const char *opt) { uint64_t ncpus; int c, chk, n, s, t, tmp; char *cp, *str; bool ns, scts; c = 1, n = 1, s = 1, t = 1; ns = false, scts = false; str = strdup(opt); if (str == NULL) goto out; while ((cp = strsep(&str, ",")) != NULL) { if (sscanf(cp, "%i%n", &tmp, &chk) == 1) { n = tmp; ns = true; } else if (sscanf(cp, "cpus=%i%n", &tmp, &chk) == 1) { n = tmp; ns = true; } else if (sscanf(cp, "sockets=%i%n", &tmp, &chk) == 1) { s = tmp; scts = true; } else if (sscanf(cp, "cores=%i%n", &tmp, &chk) == 1) { c = tmp; scts = true; } else if (sscanf(cp, "threads=%i%n", &tmp, &chk) == 1) { t = tmp; scts = true; #ifdef notyet /* Do not expose this until vmm.ko implements it */ } else if (sscanf(cp, "maxcpus=%i%n", &tmp, &chk) == 1) { m = tmp; #endif /* Skip the empty argument case from -c "" */ } else if (cp[0] == '\0') continue; else goto out; /* Any trailing garbage causes an error */ if (cp[chk] != '\0') goto out; } free(str); str = NULL; /* * Range check 1 <= n <= UINT16_MAX all values */ if (n < 1 || s < 1 || c < 1 || t < 1 || n > UINT16_MAX || s > UINT16_MAX || c > UINT16_MAX || t > UINT16_MAX) return (-1); /* If only the cpus was specified, use that as sockets */ if (!scts) s = n; /* * Compute sockets * cores * threads avoiding overflow * The range check above insures these are 16 bit values * If n was specified check it against computed ncpus */ ncpus = (uint64_t)s * c * t; if (ncpus > UINT16_MAX || (ns && n != ncpus)) return (-1); guest_ncpus = ncpus; sockets = s; cores = c; threads = t; return(0); out: free(str); return (-1); } static int pincpu_parse(const char *opt) { int vcpu, pcpu; if (sscanf(opt, "%d:%d", &vcpu, &pcpu) != 2) { fprintf(stderr, "invalid format: %s\n", opt); return (-1); } if (vcpu < 0 || vcpu >= VM_MAXCPU) { fprintf(stderr, "vcpu '%d' outside valid range from 0 to %d\n", vcpu, VM_MAXCPU - 1); return (-1); } if (pcpu < 0 || pcpu >= CPU_SETSIZE) { fprintf(stderr, "hostcpu '%d' outside valid range from " "0 to %d\n", pcpu, CPU_SETSIZE - 1); return (-1); } if (vcpumap[vcpu] == NULL) { if ((vcpumap[vcpu] = malloc(sizeof(cpuset_t))) == NULL) { perror("malloc"); return (-1); } CPU_ZERO(vcpumap[vcpu]); } CPU_SET(pcpu, vcpumap[vcpu]); return (0); } void vm_inject_fault(void *arg, int vcpu, int vector, int errcode_valid, int errcode) { struct vmctx *ctx; int error, restart_instruction; ctx = arg; restart_instruction = 1; error = vm_inject_exception(ctx, vcpu, vector, errcode_valid, errcode, restart_instruction); assert(error == 0); } void * paddr_guest2host(struct vmctx *ctx, uintptr_t gaddr, size_t len) { return (vm_map_gpa(ctx, gaddr, len)); } int fbsdrun_vmexit_on_pause(void) { return (guest_vmexit_on_pause); } int fbsdrun_vmexit_on_hlt(void) { return (guest_vmexit_on_hlt); } int fbsdrun_virtio_msix(void) { return (virtio_msix); } static void * fbsdrun_start_thread(void *param) { char tname[MAXCOMLEN + 1]; struct mt_vmm_info *mtp; int vcpu; mtp = param; vcpu = mtp->mt_vcpu; snprintf(tname, sizeof(tname), "vcpu %d", vcpu); pthread_set_name_np(mtp->mt_thr, tname); if (gdb_port != 0) gdb_cpu_add(vcpu); vm_loop(mtp->mt_ctx, vcpu, vmexit[vcpu].rip); /* not reached */ exit(1); return (NULL); } void fbsdrun_addcpu(struct vmctx *ctx, int fromcpu, int newcpu, uint64_t rip) { int error; assert(fromcpu == BSP); /* * The 'newcpu' must be activated in the context of 'fromcpu'. If * vm_activate_cpu() is delayed until newcpu's pthread starts running * then vmm.ko is out-of-sync with bhyve and this can create a race * with vm_suspend(). */ error = vm_activate_cpu(ctx, newcpu); if (error != 0) err(EX_OSERR, "could not activate CPU %d", newcpu); CPU_SET_ATOMIC(newcpu, &cpumask); /* * Set up the vmexit struct to allow execution to start * at the given RIP */ vmexit[newcpu].rip = rip; vmexit[newcpu].inst_length = 0; mt_vmm_info[newcpu].mt_ctx = ctx; mt_vmm_info[newcpu].mt_vcpu = newcpu; error = pthread_create(&mt_vmm_info[newcpu].mt_thr, NULL, fbsdrun_start_thread, &mt_vmm_info[newcpu]); assert(error == 0); } static int fbsdrun_deletecpu(struct vmctx *ctx, int vcpu) { if (!CPU_ISSET(vcpu, &cpumask)) { fprintf(stderr, "Attempting to delete unknown cpu %d\n", vcpu); exit(4); } CPU_CLR_ATOMIC(vcpu, &cpumask); return (CPU_EMPTY(&cpumask)); } static int vmexit_handle_notify(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu, uint32_t eax) { #if BHYVE_DEBUG /* * put guest-driven debug here */ #endif return (VMEXIT_CONTINUE); } static int vmexit_inout(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu) { int error; int bytes, port, in, out; int vcpu; vcpu = *pvcpu; port = vme->u.inout.port; bytes = vme->u.inout.bytes; in = vme->u.inout.in; out = !in; /* Extra-special case of host notifications */ if (out && port == GUEST_NIO_PORT) { error = vmexit_handle_notify(ctx, vme, pvcpu, vme->u.inout.eax); return (error); } error = emulate_inout(ctx, vcpu, vme, strictio); if (error) { fprintf(stderr, "Unhandled %s%c 0x%04x at 0x%lx\n", in ? "in" : "out", bytes == 1 ? 'b' : (bytes == 2 ? 'w' : 'l'), port, vmexit->rip); return (VMEXIT_ABORT); } else { return (VMEXIT_CONTINUE); } } static int vmexit_rdmsr(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu) { uint64_t val; uint32_t eax, edx; int error; val = 0; error = emulate_rdmsr(ctx, *pvcpu, vme->u.msr.code, &val); if (error != 0) { fprintf(stderr, "rdmsr to register %#x on vcpu %d\n", vme->u.msr.code, *pvcpu); if (strictmsr) { vm_inject_gp(ctx, *pvcpu); return (VMEXIT_CONTINUE); } } eax = val; error = vm_set_register(ctx, *pvcpu, VM_REG_GUEST_RAX, eax); assert(error == 0); edx = val >> 32; error = vm_set_register(ctx, *pvcpu, VM_REG_GUEST_RDX, edx); assert(error == 0); return (VMEXIT_CONTINUE); } static int vmexit_wrmsr(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu) { int error; error = emulate_wrmsr(ctx, *pvcpu, vme->u.msr.code, vme->u.msr.wval); if (error != 0) { fprintf(stderr, "wrmsr to register %#x(%#lx) on vcpu %d\n", vme->u.msr.code, vme->u.msr.wval, *pvcpu); if (strictmsr) { vm_inject_gp(ctx, *pvcpu); return (VMEXIT_CONTINUE); } } return (VMEXIT_CONTINUE); } static int vmexit_spinup_ap(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu) { (void)spinup_ap(ctx, *pvcpu, vme->u.spinup_ap.vcpu, vme->u.spinup_ap.rip); return (VMEXIT_CONTINUE); } #define DEBUG_EPT_MISCONFIG #ifdef DEBUG_EPT_MISCONFIG #define VMCS_GUEST_PHYSICAL_ADDRESS 0x00002400 static uint64_t ept_misconfig_gpa, ept_misconfig_pte[4]; static int ept_misconfig_ptenum; #endif static const char * vmexit_vmx_desc(uint32_t exit_reason) { if (exit_reason >= nitems(vmx_exit_reason_desc) || vmx_exit_reason_desc[exit_reason] == NULL) return ("Unknown"); return (vmx_exit_reason_desc[exit_reason]); } static int vmexit_vmx(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) { fprintf(stderr, "vm exit[%d]\n", *pvcpu); fprintf(stderr, "\treason\t\tVMX\n"); fprintf(stderr, "\trip\t\t0x%016lx\n", vmexit->rip); fprintf(stderr, "\tinst_length\t%d\n", vmexit->inst_length); fprintf(stderr, "\tstatus\t\t%d\n", vmexit->u.vmx.status); fprintf(stderr, "\texit_reason\t%u (%s)\n", vmexit->u.vmx.exit_reason, vmexit_vmx_desc(vmexit->u.vmx.exit_reason)); fprintf(stderr, "\tqualification\t0x%016lx\n", vmexit->u.vmx.exit_qualification); fprintf(stderr, "\tinst_type\t\t%d\n", vmexit->u.vmx.inst_type); fprintf(stderr, "\tinst_error\t\t%d\n", vmexit->u.vmx.inst_error); #ifdef DEBUG_EPT_MISCONFIG if (vmexit->u.vmx.exit_reason == EXIT_REASON_EPT_MISCONFIG) { vm_get_register(ctx, *pvcpu, VMCS_IDENT(VMCS_GUEST_PHYSICAL_ADDRESS), &ept_misconfig_gpa); vm_get_gpa_pmap(ctx, ept_misconfig_gpa, ept_misconfig_pte, &ept_misconfig_ptenum); fprintf(stderr, "\tEPT misconfiguration:\n"); fprintf(stderr, "\t\tGPA: %#lx\n", ept_misconfig_gpa); fprintf(stderr, "\t\tPTE(%d): %#lx %#lx %#lx %#lx\n", ept_misconfig_ptenum, ept_misconfig_pte[0], ept_misconfig_pte[1], ept_misconfig_pte[2], ept_misconfig_pte[3]); } #endif /* DEBUG_EPT_MISCONFIG */ return (VMEXIT_ABORT); } static int vmexit_svm(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) { fprintf(stderr, "vm exit[%d]\n", *pvcpu); fprintf(stderr, "\treason\t\tSVM\n"); fprintf(stderr, "\trip\t\t0x%016lx\n", vmexit->rip); fprintf(stderr, "\tinst_length\t%d\n", vmexit->inst_length); fprintf(stderr, "\texitcode\t%#lx\n", vmexit->u.svm.exitcode); fprintf(stderr, "\texitinfo1\t%#lx\n", vmexit->u.svm.exitinfo1); fprintf(stderr, "\texitinfo2\t%#lx\n", vmexit->u.svm.exitinfo2); return (VMEXIT_ABORT); } static int vmexit_bogus(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) { assert(vmexit->inst_length == 0); stats.vmexit_bogus++; return (VMEXIT_CONTINUE); } static int vmexit_reqidle(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) { assert(vmexit->inst_length == 0); stats.vmexit_reqidle++; return (VMEXIT_CONTINUE); } static int vmexit_hlt(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) { stats.vmexit_hlt++; /* * Just continue execution with the next instruction. We use * the HLT VM exit as a way to be friendly with the host * scheduler. */ return (VMEXIT_CONTINUE); } static int vmexit_pause(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) { stats.vmexit_pause++; return (VMEXIT_CONTINUE); } static int vmexit_mtrap(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) { assert(vmexit->inst_length == 0); stats.vmexit_mtrap++; if (gdb_port == 0) { fprintf(stderr, "vm_loop: unexpected VMEXIT_MTRAP\n"); exit(4); } gdb_cpu_mtrap(*pvcpu); return (VMEXIT_CONTINUE); } static int vmexit_inst_emul(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) { int err, i; struct vie *vie; stats.vmexit_inst_emul++; vie = &vmexit->u.inst_emul.vie; err = emulate_mem(ctx, *pvcpu, vmexit->u.inst_emul.gpa, vie, &vmexit->u.inst_emul.paging); if (err) { if (err == ESRCH) { fprintf(stderr, "Unhandled memory access to 0x%lx\n", vmexit->u.inst_emul.gpa); } fprintf(stderr, "Failed to emulate instruction ["); for (i = 0; i < vie->num_valid; i++) { fprintf(stderr, "0x%02x%s", vie->inst[i], i != (vie->num_valid - 1) ? " " : ""); } fprintf(stderr, "] at 0x%lx\n", vmexit->rip); return (VMEXIT_ABORT); } return (VMEXIT_CONTINUE); } static pthread_mutex_t resetcpu_mtx = PTHREAD_MUTEX_INITIALIZER; static pthread_cond_t resetcpu_cond = PTHREAD_COND_INITIALIZER; static int vmexit_suspend(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) { enum vm_suspend_how how; how = vmexit->u.suspended.how; fbsdrun_deletecpu(ctx, *pvcpu); if (*pvcpu != BSP) { pthread_mutex_lock(&resetcpu_mtx); pthread_cond_signal(&resetcpu_cond); pthread_mutex_unlock(&resetcpu_mtx); pthread_exit(NULL); } pthread_mutex_lock(&resetcpu_mtx); while (!CPU_EMPTY(&cpumask)) { pthread_cond_wait(&resetcpu_cond, &resetcpu_mtx); } pthread_mutex_unlock(&resetcpu_mtx); switch (how) { case VM_SUSPEND_RESET: exit(0); case VM_SUSPEND_POWEROFF: exit(1); case VM_SUSPEND_HALT: exit(2); case VM_SUSPEND_TRIPLEFAULT: exit(3); default: fprintf(stderr, "vmexit_suspend: invalid reason %d\n", how); exit(100); } return (0); /* NOTREACHED */ } static int vmexit_debug(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) { if (gdb_port == 0) { fprintf(stderr, "vm_loop: unexpected VMEXIT_DEBUG\n"); exit(4); } gdb_cpu_suspend(*pvcpu); return (VMEXIT_CONTINUE); } static int vmexit_breakpoint(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) { if (gdb_port == 0) { fprintf(stderr, "vm_loop: unexpected VMEXIT_DEBUG\n"); exit(4); } gdb_cpu_breakpoint(*pvcpu, vmexit); return (VMEXIT_CONTINUE); } static vmexit_handler_t handler[VM_EXITCODE_MAX] = { [VM_EXITCODE_INOUT] = vmexit_inout, [VM_EXITCODE_INOUT_STR] = vmexit_inout, [VM_EXITCODE_VMX] = vmexit_vmx, [VM_EXITCODE_SVM] = vmexit_svm, [VM_EXITCODE_BOGUS] = vmexit_bogus, [VM_EXITCODE_REQIDLE] = vmexit_reqidle, [VM_EXITCODE_RDMSR] = vmexit_rdmsr, [VM_EXITCODE_WRMSR] = vmexit_wrmsr, [VM_EXITCODE_MTRAP] = vmexit_mtrap, [VM_EXITCODE_INST_EMUL] = vmexit_inst_emul, [VM_EXITCODE_SPINUP_AP] = vmexit_spinup_ap, [VM_EXITCODE_SUSPENDED] = vmexit_suspend, [VM_EXITCODE_TASK_SWITCH] = vmexit_task_switch, [VM_EXITCODE_DEBUG] = vmexit_debug, [VM_EXITCODE_BPT] = vmexit_breakpoint, }; static void vm_loop(struct vmctx *ctx, int vcpu, uint64_t startrip) { int error, rc; enum vm_exitcode exitcode; cpuset_t active_cpus; if (vcpumap[vcpu] != NULL) { error = pthread_setaffinity_np(pthread_self(), sizeof(cpuset_t), vcpumap[vcpu]); assert(error == 0); } error = vm_active_cpus(ctx, &active_cpus); assert(CPU_ISSET(vcpu, &active_cpus)); error = vm_set_register(ctx, vcpu, VM_REG_GUEST_RIP, startrip); assert(error == 0); while (1) { error = vm_run(ctx, vcpu, &vmexit[vcpu]); if (error != 0) break; exitcode = vmexit[vcpu].exitcode; if (exitcode >= VM_EXITCODE_MAX || handler[exitcode] == NULL) { fprintf(stderr, "vm_loop: unexpected exitcode 0x%x\n", exitcode); exit(4); } rc = (*handler[exitcode])(ctx, &vmexit[vcpu], &vcpu); switch (rc) { case VMEXIT_CONTINUE: break; case VMEXIT_ABORT: abort(); default: exit(4); } } fprintf(stderr, "vm_run error %d, errno %d\n", error, errno); } static int num_vcpus_allowed(struct vmctx *ctx) { int tmp, error; error = vm_get_capability(ctx, BSP, VM_CAP_UNRESTRICTED_GUEST, &tmp); /* * The guest is allowed to spinup more than one processor only if the * UNRESTRICTED_GUEST capability is available. */ if (error == 0) return (VM_MAXCPU); else return (1); } void fbsdrun_set_capabilities(struct vmctx *ctx, int cpu) { int err, tmp; if (fbsdrun_vmexit_on_hlt()) { err = vm_get_capability(ctx, cpu, VM_CAP_HALT_EXIT, &tmp); if (err < 0) { fprintf(stderr, "VM exit on HLT not supported\n"); exit(4); } vm_set_capability(ctx, cpu, VM_CAP_HALT_EXIT, 1); if (cpu == BSP) handler[VM_EXITCODE_HLT] = vmexit_hlt; } if (fbsdrun_vmexit_on_pause()) { /* * pause exit support required for this mode */ err = vm_get_capability(ctx, cpu, VM_CAP_PAUSE_EXIT, &tmp); if (err < 0) { fprintf(stderr, "SMP mux requested, no pause support\n"); exit(4); } vm_set_capability(ctx, cpu, VM_CAP_PAUSE_EXIT, 1); if (cpu == BSP) handler[VM_EXITCODE_PAUSE] = vmexit_pause; } if (x2apic_mode) err = vm_set_x2apic_state(ctx, cpu, X2APIC_ENABLED); else err = vm_set_x2apic_state(ctx, cpu, X2APIC_DISABLED); if (err) { fprintf(stderr, "Unable to set x2apic state (%d)\n", err); exit(4); } vm_set_capability(ctx, cpu, VM_CAP_ENABLE_INVPCID, 1); } static struct vmctx * do_open(const char *vmname) { struct vmctx *ctx; int error; bool reinit, romboot; #ifndef WITHOUT_CAPSICUM cap_rights_t rights; const cap_ioctl_t *cmds; size_t ncmds; #endif reinit = romboot = false; if (lpc_bootrom()) romboot = true; error = vm_create(vmname); if (error) { if (errno == EEXIST) { if (romboot) { reinit = true; } else { /* * The virtual machine has been setup by the * userspace bootloader. */ } } else { perror("vm_create"); exit(4); } } else { if (!romboot) { /* * If the virtual machine was just created then a * bootrom must be configured to boot it. */ fprintf(stderr, "virtual machine cannot be booted\n"); exit(4); } } ctx = vm_open(vmname); if (ctx == NULL) { perror("vm_open"); exit(4); } #ifndef WITHOUT_CAPSICUM cap_rights_init(&rights, CAP_IOCTL, CAP_MMAP_RW); if (caph_rights_limit(vm_get_device_fd(ctx), &rights) == -1) errx(EX_OSERR, "Unable to apply rights for sandbox"); vm_get_ioctls(&ncmds); cmds = vm_get_ioctls(NULL); if (cmds == NULL) errx(EX_OSERR, "out of memory"); if (caph_ioctls_limit(vm_get_device_fd(ctx), cmds, ncmds) == -1) errx(EX_OSERR, "Unable to apply rights for sandbox"); free((cap_ioctl_t *)cmds); #endif if (reinit) { error = vm_reinit(ctx); if (error) { perror("vm_reinit"); exit(4); } } error = vm_set_topology(ctx, sockets, cores, threads, maxcpus); if (error) errx(EX_OSERR, "vm_set_topology"); return (ctx); } int main(int argc, char *argv[]) { int c, error, dbg_port, err, bvmcons; int max_vcpus, mptgen, memflags; int rtc_localtime; bool gdb_stop; struct vmctx *ctx; uint64_t rip; size_t memsize; char *optstr; bvmcons = 0; progname = basename(argv[0]); dbg_port = 0; gdb_stop = false; guest_ncpus = 1; sockets = cores = threads = 1; maxcpus = 0; memsize = 256 * MB; mptgen = 1; rtc_localtime = 1; memflags = 0; optstr = "abehuwxACHIPSWYp:g:G:c:s:m:l:U:"; while ((c = getopt(argc, argv, optstr)) != -1) { switch (c) { case 'a': x2apic_mode = 0; break; case 'A': acpi = 1; break; case 'b': bvmcons = 1; break; case 'p': if (pincpu_parse(optarg) != 0) { errx(EX_USAGE, "invalid vcpu pinning " "configuration '%s'", optarg); } break; case 'c': if (topology_parse(optarg) != 0) { errx(EX_USAGE, "invalid cpu topology " "'%s'", optarg); } break; case 'C': memflags |= VM_MEM_F_INCORE; break; case 'g': dbg_port = atoi(optarg); break; case 'G': if (optarg[0] == 'w') { gdb_stop = true; optarg++; } gdb_port = atoi(optarg); break; case 'l': if (strncmp(optarg, "help", strlen(optarg)) == 0) { lpc_print_supported_devices(); exit(0); } else if (lpc_device_parse(optarg) != 0) { errx(EX_USAGE, "invalid lpc device " "configuration '%s'", optarg); } break; case 's': if (strncmp(optarg, "help", strlen(optarg)) == 0) { pci_print_supported_devices(); exit(0); } else if (pci_parse_slot(optarg) != 0) exit(4); else break; case 'S': memflags |= VM_MEM_F_WIRED; break; case 'm': error = vm_parse_memsize(optarg, &memsize); if (error) errx(EX_USAGE, "invalid memsize '%s'", optarg); break; case 'H': guest_vmexit_on_hlt = 1; break; case 'I': /* * The "-I" option was used to add an ioapic to the * virtual machine. * * An ioapic is now provided unconditionally for each * virtual machine and this option is now deprecated. */ break; case 'P': guest_vmexit_on_pause = 1; break; case 'e': strictio = 1; break; case 'u': rtc_localtime = 0; break; case 'U': guest_uuid_str = optarg; break; case 'w': strictmsr = 0; break; case 'W': virtio_msix = 0; break; case 'x': x2apic_mode = 1; break; case 'Y': mptgen = 0; break; case 'h': usage(0); default: usage(1); } } argc -= optind; argv += optind; if (argc != 1) usage(1); vmname = argv[0]; ctx = do_open(vmname); max_vcpus = num_vcpus_allowed(ctx); if (guest_ncpus > max_vcpus) { fprintf(stderr, "%d vCPUs requested but only %d available\n", guest_ncpus, max_vcpus); exit(4); } fbsdrun_set_capabilities(ctx, BSP); vm_set_memflags(ctx, memflags); err = vm_setup_memory(ctx, memsize, VM_MMAP_ALL); if (err) { fprintf(stderr, "Unable to setup memory (%d)\n", errno); exit(4); } error = init_msr(); if (error) { fprintf(stderr, "init_msr error %d", error); exit(4); } init_mem(); init_inout(); + init_bootrom(ctx); atkbdc_init(ctx); pci_irq_init(ctx); ioapic_init(ctx); rtc_init(ctx, rtc_localtime); sci_init(ctx); /* * Exit if a device emulation finds an error in its initilization */ if (init_pci(ctx) != 0) { perror("device emulation initialization error"); exit(4); } if (dbg_port != 0) init_dbgport(dbg_port); if (gdb_port != 0) init_gdb(ctx, gdb_port, gdb_stop); if (bvmcons) init_bvmcons(); if (lpc_bootrom()) { if (vm_set_capability(ctx, BSP, VM_CAP_UNRESTRICTED_GUEST, 1)) { fprintf(stderr, "ROM boot failed: unrestricted guest " "capability not available\n"); exit(4); } error = vcpu_reset(ctx, BSP); assert(error == 0); } error = vm_get_register(ctx, BSP, VM_REG_GUEST_RIP, &rip); assert(error == 0); /* * build the guest tables, MP etc. */ if (mptgen) { error = mptable_build(ctx, guest_ncpus); if (error) { perror("error to build the guest tables"); exit(4); } } error = smbios_build(ctx); assert(error == 0); if (acpi) { error = acpi_build(ctx, guest_ncpus); assert(error == 0); } if (lpc_bootrom()) fwctl_init(); /* * Change the proc title to include the VM name. */ setproctitle("%s", vmname); #ifndef WITHOUT_CAPSICUM caph_cache_catpages(); if (caph_limit_stdout() == -1 || caph_limit_stderr() == -1) errx(EX_OSERR, "Unable to apply rights for sandbox"); if (caph_enter() == -1) errx(EX_OSERR, "cap_enter() failed"); #endif /* * Add CPU 0 */ fbsdrun_addcpu(ctx, BSP, BSP, rip); /* * Head off to the main event dispatch loop */ mevent_dispatch(); exit(4); } Index: head/usr.sbin/bhyve/bootrom.c =================================================================== --- head/usr.sbin/bhyve/bootrom.c (revision 359948) +++ head/usr.sbin/bhyve/bootrom.c (revision 359949) @@ -1,114 +1,181 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2015 Neel Natu * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include +#include #include #include #include #include #include #include #include #include "bhyverun.h" #include "bootrom.h" #include "debug.h" -#define MAX_BOOTROM_SIZE (16 * 1024 * 1024) /* 16 MB */ +#define BOOTROM_SIZE (16 * 1024 * 1024) /* 16 MB */ +/* + * ROM region is 16 MB at the top of 4GB ("low") memory. + * + * The size is limited so it doesn't encroach into reserved MMIO space (e.g., + * APIC, HPET, MSI). + * + * It is allocated in page-multiple blocks on a first-come first-serve basis, + * from high to low, during initialization, and does not change at runtime. + */ +static char *romptr; /* Pointer to userspace-mapped bootrom region. */ +static vm_paddr_t gpa_base; /* GPA of low end of region. */ +static vm_paddr_t gpa_allocbot; /* Low GPA of free region. */ +static vm_paddr_t gpa_alloctop; /* High GPA, minus 1, of free region. */ + +void +init_bootrom(struct vmctx *ctx) +{ + romptr = vm_create_devmem(ctx, VM_BOOTROM, "bootrom", BOOTROM_SIZE); + if (romptr == MAP_FAILED) + err(4, "%s: vm_create_devmem", __func__); + gpa_base = (1ULL << 32) - BOOTROM_SIZE; + gpa_allocbot = gpa_base; + gpa_alloctop = (1ULL << 32) - 1; +} + int -bootrom_init(struct vmctx *ctx, const char *romfile) +bootrom_alloc(struct vmctx *ctx, size_t len, int prot, int flags, + char **region_out, uint64_t *gpa_out) { - struct stat sbuf; + static const int bootrom_valid_flags = BOOTROM_ALLOC_TOP; + vm_paddr_t gpa; + vm_ooffset_t segoff; + + if (flags & ~bootrom_valid_flags) { + warnx("%s: Invalid flags: %x", __func__, + flags & ~bootrom_valid_flags); + return (EINVAL); + } + if (prot & ~_PROT_ALL) { + warnx("%s: Invalid protection: %x", __func__, + prot & ~_PROT_ALL); + return (EINVAL); + } + + if (len == 0 || len > BOOTROM_SIZE) { + warnx("ROM size %zu is invalid", len); + return (EINVAL); + } + if (len & PAGE_MASK) { + warnx("ROM size %zu is not a multiple of the page size", + len); + return (EINVAL); + } + + if (flags & BOOTROM_ALLOC_TOP) { + gpa = (gpa_alloctop - len) + 1; + if (gpa < gpa_allocbot) { + warnx("No room for %zu ROM in bootrom region", len); + return (ENOMEM); + } + } else { + gpa = gpa_allocbot; + if (gpa > (gpa_alloctop - len) + 1) { + warnx("No room for %zu ROM in bootrom region", len); + return (ENOMEM); + } + } + + segoff = gpa - gpa_base; + if (vm_mmap_memseg(ctx, gpa, VM_BOOTROM, segoff, len, prot) != 0) { + int serrno = errno; + warn("%s: vm_mmap_mapseg", __func__); + return (serrno); + } + + if (flags & BOOTROM_ALLOC_TOP) + gpa_alloctop = gpa - 1; + else + gpa_allocbot = gpa + len; + + *region_out = romptr + segoff; + if (gpa_out != NULL) + *gpa_out = gpa; + return (0); +} + +int +bootrom_loadrom(struct vmctx *ctx, const char *romfile) +{ + struct stat sbuf; ssize_t rlen; char *ptr; - int fd, i, rv, prot; + int fd, i, rv; rv = -1; fd = open(romfile, O_RDONLY); if (fd < 0) { EPRINTLN("Error opening bootrom \"%s\": %s", romfile, strerror(errno)); goto done; } if (fstat(fd, &sbuf) < 0) { EPRINTLN("Could not fstat bootrom file \"%s\": %s", romfile, strerror(errno)); goto done; } - /* - * Limit bootrom size to 16MB so it doesn't encroach into reserved - * MMIO space (e.g. APIC, HPET, MSI). - */ - if (sbuf.st_size > MAX_BOOTROM_SIZE || sbuf.st_size < PAGE_SIZE) { - EPRINTLN("Invalid bootrom size %ld", sbuf.st_size); - goto done; - } - - if (sbuf.st_size & PAGE_MASK) { - EPRINTLN("Bootrom size %ld is not a multiple of the " - "page size", sbuf.st_size); - goto done; - } - - ptr = vm_create_devmem(ctx, VM_BOOTROM, "bootrom", sbuf.st_size); - if (ptr == MAP_FAILED) - goto done; - /* Map the bootrom into the guest address space */ - prot = PROT_READ | PROT_EXEC; - gpa = (1ULL << 32) - sbuf.st_size; - if (vm_mmap_memseg(ctx, gpa, VM_BOOTROM, 0, sbuf.st_size, prot) != 0) + if (bootrom_alloc(ctx, sbuf.st_size, PROT_READ | PROT_EXEC, + BOOTROM_ALLOC_TOP, &ptr, NULL) != 0) goto done; /* Read 'romfile' into the guest address space */ for (i = 0; i < sbuf.st_size / PAGE_SIZE; i++) { rlen = read(fd, ptr + i * PAGE_SIZE, PAGE_SIZE); if (rlen != PAGE_SIZE) { EPRINTLN("Incomplete read of page %d of bootrom " "file %s: %ld bytes", i, romfile, rlen); goto done; } } rv = 0; done: if (fd >= 0) close(fd); return (rv); } Index: head/usr.sbin/bhyve/bootrom.h =================================================================== --- head/usr.sbin/bhyve/bootrom.h (revision 359948) +++ head/usr.sbin/bhyve/bootrom.h (revision 359949) @@ -1,40 +1,50 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2015 Neel Natu * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _BOOTROM_H_ #define _BOOTROM_H_ #include +#include +#include +#include struct vmctx; -int bootrom_init(struct vmctx *ctx, const char *romfile); +void init_bootrom(struct vmctx *ctx); +enum { + BOOTROM_ALLOC_TOP = 0x80, + _FORCE_INT = INT_MIN, +}; +int bootrom_alloc(struct vmctx *ctx, size_t len, int prot, int flags, + char **region_out, uint64_t *gpa_out); +int bootrom_loadrom(struct vmctx *ctx, const char *romfile); #endif Index: head/usr.sbin/bhyve/pci_lpc.c =================================================================== --- head/usr.sbin/bhyve/pci_lpc.c (revision 359948) +++ head/usr.sbin/bhyve/pci_lpc.c (revision 359949) @@ -1,463 +1,463 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2013 Neel Natu * Copyright (c) 2013 Tycho Nightingale * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include "acpi.h" #include "debug.h" #include "bootrom.h" #include "inout.h" #include "pci_emul.h" #include "pci_irq.h" #include "pci_lpc.h" #include "uart_emul.h" #define IO_ICU1 0x20 #define IO_ICU2 0xA0 SET_DECLARE(lpc_dsdt_set, struct lpc_dsdt); SET_DECLARE(lpc_sysres_set, struct lpc_sysres); #define ELCR_PORT 0x4d0 SYSRES_IO(ELCR_PORT, 2); #define IO_TIMER1_PORT 0x40 #define NMISC_PORT 0x61 SYSRES_IO(NMISC_PORT, 1); static struct pci_devinst *lpc_bridge; static const char *romfile; #define LPC_UART_NUM 2 static struct lpc_uart_softc { struct uart_softc *uart_softc; const char *opts; int iobase; int irq; int enabled; } lpc_uart_softc[LPC_UART_NUM]; static const char *lpc_uart_names[LPC_UART_NUM] = { "COM1", "COM2" }; /* * LPC device configuration is in the following form: * [,] * For e.g. "com1,stdio" or "bootrom,/var/romfile" */ int lpc_device_parse(const char *opts) { int unit, error; char *str, *cpy, *lpcdev; error = -1; str = cpy = strdup(opts); lpcdev = strsep(&str, ","); if (lpcdev != NULL) { if (strcasecmp(lpcdev, "bootrom") == 0) { romfile = str; error = 0; goto done; } for (unit = 0; unit < LPC_UART_NUM; unit++) { if (strcasecmp(lpcdev, lpc_uart_names[unit]) == 0) { lpc_uart_softc[unit].opts = str; error = 0; goto done; } } } done: if (error) free(cpy); return (error); } void lpc_print_supported_devices() { size_t i; printf("bootrom\n"); for (i = 0; i < LPC_UART_NUM; i++) printf("%s\n", lpc_uart_names[i]); } const char * lpc_bootrom(void) { return (romfile); } static void lpc_uart_intr_assert(void *arg) { struct lpc_uart_softc *sc = arg; assert(sc->irq >= 0); vm_isa_pulse_irq(lpc_bridge->pi_vmctx, sc->irq, sc->irq); } static void lpc_uart_intr_deassert(void *arg) { /* * The COM devices on the LPC bus generate edge triggered interrupts, * so nothing more to do here. */ } static int lpc_uart_io_handler(struct vmctx *ctx, int vcpu, int in, int port, int bytes, uint32_t *eax, void *arg) { int offset; struct lpc_uart_softc *sc = arg; offset = port - sc->iobase; switch (bytes) { case 1: if (in) *eax = uart_read(sc->uart_softc, offset); else uart_write(sc->uart_softc, offset, *eax); break; case 2: if (in) { *eax = uart_read(sc->uart_softc, offset); *eax |= uart_read(sc->uart_softc, offset + 1) << 8; } else { uart_write(sc->uart_softc, offset, *eax); uart_write(sc->uart_softc, offset + 1, *eax >> 8); } break; default: return (-1); } return (0); } static int lpc_init(struct vmctx *ctx) { struct lpc_uart_softc *sc; struct inout_port iop; const char *name; int unit, error; if (romfile != NULL) { - error = bootrom_init(ctx, romfile); + error = bootrom_loadrom(ctx, romfile); if (error) return (error); } /* COM1 and COM2 */ for (unit = 0; unit < LPC_UART_NUM; unit++) { sc = &lpc_uart_softc[unit]; name = lpc_uart_names[unit]; if (uart_legacy_alloc(unit, &sc->iobase, &sc->irq) != 0) { EPRINTLN("Unable to allocate resources for " "LPC device %s", name); return (-1); } pci_irq_reserve(sc->irq); sc->uart_softc = uart_init(lpc_uart_intr_assert, lpc_uart_intr_deassert, sc); if (uart_set_backend(sc->uart_softc, sc->opts) != 0) { EPRINTLN("Unable to initialize backend '%s' " "for LPC device %s", sc->opts, name); return (-1); } bzero(&iop, sizeof(struct inout_port)); iop.name = name; iop.port = sc->iobase; iop.size = UART_IO_BAR_SIZE; iop.flags = IOPORT_F_INOUT; iop.handler = lpc_uart_io_handler; iop.arg = sc; error = register_inout(&iop); assert(error == 0); sc->enabled = 1; } return (0); } static void pci_lpc_write_dsdt(struct pci_devinst *pi) { struct lpc_dsdt **ldpp, *ldp; dsdt_line(""); dsdt_line("Device (ISA)"); dsdt_line("{"); dsdt_line(" Name (_ADR, 0x%04X%04X)", pi->pi_slot, pi->pi_func); dsdt_line(" OperationRegion (LPCR, PCI_Config, 0x00, 0x100)"); dsdt_line(" Field (LPCR, AnyAcc, NoLock, Preserve)"); dsdt_line(" {"); dsdt_line(" Offset (0x60),"); dsdt_line(" PIRA, 8,"); dsdt_line(" PIRB, 8,"); dsdt_line(" PIRC, 8,"); dsdt_line(" PIRD, 8,"); dsdt_line(" Offset (0x68),"); dsdt_line(" PIRE, 8,"); dsdt_line(" PIRF, 8,"); dsdt_line(" PIRG, 8,"); dsdt_line(" PIRH, 8"); dsdt_line(" }"); dsdt_line(""); dsdt_indent(1); SET_FOREACH(ldpp, lpc_dsdt_set) { ldp = *ldpp; ldp->handler(); } dsdt_line(""); dsdt_line("Device (PIC)"); dsdt_line("{"); dsdt_line(" Name (_HID, EisaId (\"PNP0000\"))"); dsdt_line(" Name (_CRS, ResourceTemplate ()"); dsdt_line(" {"); dsdt_indent(2); dsdt_fixed_ioport(IO_ICU1, 2); dsdt_fixed_ioport(IO_ICU2, 2); dsdt_fixed_irq(2); dsdt_unindent(2); dsdt_line(" })"); dsdt_line("}"); dsdt_line(""); dsdt_line("Device (TIMR)"); dsdt_line("{"); dsdt_line(" Name (_HID, EisaId (\"PNP0100\"))"); dsdt_line(" Name (_CRS, ResourceTemplate ()"); dsdt_line(" {"); dsdt_indent(2); dsdt_fixed_ioport(IO_TIMER1_PORT, 4); dsdt_fixed_irq(0); dsdt_unindent(2); dsdt_line(" })"); dsdt_line("}"); dsdt_unindent(1); dsdt_line("}"); } static void pci_lpc_sysres_dsdt(void) { struct lpc_sysres **lspp, *lsp; dsdt_line(""); dsdt_line("Device (SIO)"); dsdt_line("{"); dsdt_line(" Name (_HID, EisaId (\"PNP0C02\"))"); dsdt_line(" Name (_CRS, ResourceTemplate ()"); dsdt_line(" {"); dsdt_indent(2); SET_FOREACH(lspp, lpc_sysres_set) { lsp = *lspp; switch (lsp->type) { case LPC_SYSRES_IO: dsdt_fixed_ioport(lsp->base, lsp->length); break; case LPC_SYSRES_MEM: dsdt_fixed_mem32(lsp->base, lsp->length); break; } } dsdt_unindent(2); dsdt_line(" })"); dsdt_line("}"); } LPC_DSDT(pci_lpc_sysres_dsdt); static void pci_lpc_uart_dsdt(void) { struct lpc_uart_softc *sc; int unit; for (unit = 0; unit < LPC_UART_NUM; unit++) { sc = &lpc_uart_softc[unit]; if (!sc->enabled) continue; dsdt_line(""); dsdt_line("Device (%s)", lpc_uart_names[unit]); dsdt_line("{"); dsdt_line(" Name (_HID, EisaId (\"PNP0501\"))"); dsdt_line(" Name (_UID, %d)", unit + 1); dsdt_line(" Name (_CRS, ResourceTemplate ()"); dsdt_line(" {"); dsdt_indent(2); dsdt_fixed_ioport(sc->iobase, UART_IO_BAR_SIZE); dsdt_fixed_irq(sc->irq); dsdt_unindent(2); dsdt_line(" })"); dsdt_line("}"); } } LPC_DSDT(pci_lpc_uart_dsdt); static int pci_lpc_cfgwrite(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int coff, int bytes, uint32_t val) { int pirq_pin; if (bytes == 1) { pirq_pin = 0; if (coff >= 0x60 && coff <= 0x63) pirq_pin = coff - 0x60 + 1; if (coff >= 0x68 && coff <= 0x6b) pirq_pin = coff - 0x68 + 5; if (pirq_pin != 0) { pirq_write(ctx, pirq_pin, val); pci_set_cfgdata8(pi, coff, pirq_read(pirq_pin)); return (0); } } return (-1); } static void pci_lpc_write(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx, uint64_t offset, int size, uint64_t value) { } static uint64_t pci_lpc_read(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx, uint64_t offset, int size) { return (0); } #define LPC_DEV 0x7000 #define LPC_VENDOR 0x8086 static int pci_lpc_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts) { /* * Do not allow more than one LPC bridge to be configured. */ if (lpc_bridge != NULL) { EPRINTLN("Only one LPC bridge is allowed."); return (-1); } /* * Enforce that the LPC can only be configured on bus 0. This * simplifies the ACPI DSDT because it can provide a decode for * all legacy i/o ports behind bus 0. */ if (pi->pi_bus != 0) { EPRINTLN("LPC bridge can be present only on bus 0."); return (-1); } if (lpc_init(ctx) != 0) return (-1); /* initialize config space */ pci_set_cfgdata16(pi, PCIR_DEVICE, LPC_DEV); pci_set_cfgdata16(pi, PCIR_VENDOR, LPC_VENDOR); pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_BRIDGE); pci_set_cfgdata8(pi, PCIR_SUBCLASS, PCIS_BRIDGE_ISA); lpc_bridge = pi; return (0); } char * lpc_pirq_name(int pin) { char *name; if (lpc_bridge == NULL) return (NULL); asprintf(&name, "\\_SB.PC00.ISA.LNK%c,", 'A' + pin - 1); return (name); } void lpc_pirq_routed(void) { int pin; if (lpc_bridge == NULL) return; for (pin = 0; pin < 4; pin++) pci_set_cfgdata8(lpc_bridge, 0x60 + pin, pirq_read(pin + 1)); for (pin = 0; pin < 4; pin++) pci_set_cfgdata8(lpc_bridge, 0x68 + pin, pirq_read(pin + 5)); } struct pci_devemu pci_de_lpc = { .pe_emu = "lpc", .pe_init = pci_lpc_init, .pe_write_dsdt = pci_lpc_write_dsdt, .pe_cfgwrite = pci_lpc_cfgwrite, .pe_barwrite = pci_lpc_write, .pe_barread = pci_lpc_read }; PCI_EMUL_SET(pci_de_lpc);