diff --git a/usr.sbin/bhyve/amd64/bhyverun_machdep.c b/usr.sbin/bhyve/amd64/bhyverun_machdep.c index c453092107d5..d51ad3a5fc05 100644 --- a/usr.sbin/bhyve/amd64/bhyverun_machdep.c +++ b/usr.sbin/bhyve/amd64/bhyverun_machdep.c @@ -1,367 +1,383 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2011 NetApp, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include #include #include #include #include #include #include "acpi.h" #include "atkbdc.h" #include "bhyverun.h" +#include "bootrom.h" #include "config.h" #include "debug.h" #include "e820.h" #include "fwctl.h" #include "ioapic.h" #include "inout.h" #include "kernemu_dev.h" #include "mptbl.h" #include "pci_emul.h" #include "pci_irq.h" #include "pci_lpc.h" #include "rtc.h" #include "smbiostbl.h" #include "xmsr.h" void bhyve_init_config(void) { init_config(); /* Set default values prior to option parsing. */ set_config_bool("acpi_tables", true); set_config_bool("acpi_tables_in_memory", true); set_config_value("memory.size", "256M"); set_config_bool("x86.strictmsr", true); set_config_value("lpc.fwcfg", "bhyve"); } void bhyve_usage(int code) { const char *progname; progname = getprogname(); fprintf(stderr, "Usage: %s [-aCDeHhPSuWwxY]\n" " %*s [-c [[cpus=]numcpus][,sockets=n][,cores=n][,threads=n]]\n" " %*s [-G port] [-k config_file] [-l lpc] [-m mem] [-o var=value]\n" " %*s [-p vcpu:hostcpu] [-r file] [-s pci] [-U uuid] vmname\n" " -a: local apic is in xAPIC mode (deprecated)\n" " -C: include guest memory in core file\n" " -c: number of CPUs and/or topology specification\n" " -D: destroy on power-off\n" " -e: exit on unhandled I/O access\n" " -G: start a debug server\n" " -H: vmexit from the guest on HLT\n" " -h: help\n" " -k: key=value flat config file\n" " -K: PS2 keyboard layout\n" " -l: LPC device configuration\n" " -m: memory size\n" " -o: set config 'var' to 'value'\n" " -P: vmexit from the guest on pause\n" " -p: pin 'vcpu' to 'hostcpu'\n" #ifdef BHYVE_SNAPSHOT " -r: path to checkpoint file\n" #endif " -S: guest memory cannot be swapped\n" " -s: PCI slot config\n" " -U: UUID\n" " -u: RTC keeps UTC time\n" " -W: force virtio to use single-vector MSI\n" " -w: ignore unimplemented MSRs\n" " -x: local APIC is in x2APIC mode\n" " -Y: disable MPtable generation\n", progname, (int)strlen(progname), "", (int)strlen(progname), "", (int)strlen(progname), ""); exit(code); } void bhyve_optparse(int argc, char **argv) { const char *optstr; int c; #ifdef BHYVE_SNAPSHOT optstr = "aehuwxACDHIPSWYk:f:o:p:G:c:s:m:l:K:U:r:"; #else optstr = "aehuwxACDHIPSWYk:f:o:p:G:c:s:m:l:K:U:"; #endif while ((c = getopt(argc, argv, optstr)) != -1) { switch (c) { case 'a': set_config_bool("x86.x2apic", false); break; case 'A': /* * NOP. For backward compatibility. Most systems don't * work properly without sane ACPI tables. Therefore, * we're always generating them. */ break; case 'D': set_config_bool("destroy_on_poweroff", true); break; case 'p': if (bhyve_pincpu_parse(optarg) != 0) { errx(EX_USAGE, "invalid vcpu pinning " "configuration '%s'", optarg); } break; case 'c': if (bhyve_topology_parse(optarg) != 0) { errx(EX_USAGE, "invalid cpu topology " "'%s'", optarg); } break; case 'C': set_config_bool("memory.guest_in_core", true); break; case 'f': if (qemu_fwcfg_parse_cmdline_arg(optarg) != 0) { errx(EX_USAGE, "invalid fwcfg item '%s'", optarg); } break; case 'G': bhyve_parse_gdb_options(optarg); break; case 'k': bhyve_parse_simple_config_file(optarg); break; case 'K': set_config_value("keyboard.layout", optarg); break; case 'l': if (strncmp(optarg, "help", strlen(optarg)) == 0) { lpc_print_supported_devices(); exit(0); } else if (lpc_device_parse(optarg) != 0) { errx(EX_USAGE, "invalid lpc device " "configuration '%s'", optarg); } break; #ifdef BHYVE_SNAPSHOT case 'r': restore_file = optarg; break; #endif case 's': if (strncmp(optarg, "help", strlen(optarg)) == 0) { pci_print_supported_devices(); exit(0); } else if (pci_parse_slot(optarg) != 0) exit(4); else break; case 'S': set_config_bool("memory.wired", true); break; case 'm': set_config_value("memory.size", optarg); break; case 'o': if (!bhyve_parse_config_option(optarg)) { errx(EX_USAGE, "invalid configuration option '%s'", optarg); } break; case 'H': set_config_bool("x86.vmexit_on_hlt", true); break; case 'I': /* * The "-I" option was used to add an ioapic to the * virtual machine. * * An ioapic is now provided unconditionally for each * virtual machine and this option is now deprecated. */ break; case 'P': set_config_bool("x86.vmexit_on_pause", true); break; case 'e': set_config_bool("x86.strictio", true); break; case 'u': set_config_bool("rtc.use_localtime", false); break; case 'U': set_config_value("uuid", optarg); break; case 'w': set_config_bool("x86.strictmsr", false); break; case 'W': set_config_bool("virtio_msix", false); break; case 'x': set_config_bool("x86.x2apic", true); break; case 'Y': set_config_bool("x86.mptable", false); break; case 'h': bhyve_usage(0); default: bhyve_usage(1); } } + + /* Handle backwards compatibility aliases in config options. */ + if (get_config_value("lpc.bootrom") != NULL && + get_config_value("bootrom") == NULL) { + warnx("lpc.bootrom is deprecated, use '-o bootrom' instead"); + set_config_value("bootrom", get_config_value("lpc.bootrom")); + } + if (get_config_value("lpc.bootvars") != NULL && + get_config_value("bootvars") == NULL) { + warnx("lpc.bootvars is deprecated, use '-o bootvars' instead"); + set_config_value("bootvars", get_config_value("lpc.bootvars")); + } } void bhyve_init_vcpu(struct vcpu *vcpu) { int err, tmp; if (get_config_bool_default("x86.vmexit_on_hlt", false)) { err = vm_get_capability(vcpu, VM_CAP_HALT_EXIT, &tmp); if (err < 0) { EPRINTLN("VM exit on HLT not supported"); exit(4); } vm_set_capability(vcpu, VM_CAP_HALT_EXIT, 1); } if (get_config_bool_default("x86.vmexit_on_pause", false)) { /* * pause exit support required for this mode */ err = vm_get_capability(vcpu, VM_CAP_PAUSE_EXIT, &tmp); if (err < 0) { EPRINTLN("SMP mux requested, no pause support"); exit(4); } vm_set_capability(vcpu, VM_CAP_PAUSE_EXIT, 1); } if (get_config_bool_default("x86.x2apic", false)) err = vm_set_x2apic_state(vcpu, X2APIC_ENABLED); else err = vm_set_x2apic_state(vcpu, X2APIC_DISABLED); if (err) { EPRINTLN("Unable to set x2apic state (%d)", err); exit(4); } vm_set_capability(vcpu, VM_CAP_ENABLE_INVPCID, 1); err = vm_set_capability(vcpu, VM_CAP_IPI_EXIT, 1); assert(err == 0); } void bhyve_start_vcpu(struct vcpu *vcpu, bool bsp) { int error; if (bsp) { - if (lpc_bootrom()) { + if (bootrom_boot()) { error = vm_set_capability(vcpu, VM_CAP_UNRESTRICTED_GUEST, 1); if (error != 0) { err(4, "ROM boot failed: unrestricted guest " "capability not available"); } error = vcpu_reset(vcpu); assert(error == 0); } } else { bhyve_init_vcpu(vcpu); /* * Enable the 'unrestricted guest' mode for APs. * * APs startup in power-on 16-bit mode. */ error = vm_set_capability(vcpu, VM_CAP_UNRESTRICTED_GUEST, 1); assert(error == 0); } fbsdrun_addcpu(vcpu_id(vcpu)); } int bhyve_init_platform(struct vmctx *ctx, struct vcpu *bsp __unused) { int error; error = init_msr(); if (error != 0) return (error); init_inout(); kernemu_dev_init(); atkbdc_init(ctx); pci_irq_init(ctx); ioapic_init(ctx); rtc_init(ctx); sci_init(ctx); error = e820_init(ctx); + if (error != 0) + return (error); + error = bootrom_loadrom(ctx); if (error != 0) return (error); return (0); } int bhyve_init_platform_late(struct vmctx *ctx, struct vcpu *bsp __unused) { int error; if (get_config_bool_default("x86.mptable", true)) { error = mptable_build(ctx, guest_ncpus); if (error != 0) return (error); } error = smbios_build(ctx); if (error != 0) return (error); error = e820_finalize(); if (error != 0) return (error); - if (lpc_bootrom() && strcmp(lpc_fwcfg(), "bhyve") == 0) + if (bootrom_boot() && strcmp(lpc_fwcfg(), "bhyve") == 0) fwctl_init(); if (get_config_bool("acpi_tables")) { error = acpi_build(ctx, guest_ncpus); assert(error == 0); } return (0); } diff --git a/usr.sbin/bhyve/amd64/ioapic.c b/usr.sbin/bhyve/amd64/ioapic.c index 9ad1c501fbae..494fb0c7ae82 100644 --- a/usr.sbin/bhyve/amd64/ioapic.c +++ b/usr.sbin/bhyve/amd64/ioapic.c @@ -1,80 +1,81 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2014 Hudson River Trading LLC * Written by: John H. Baldwin * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include #include #include #include +#include "bootrom.h" #include "ioapic.h" #include "pci_emul.h" #include "pci_lpc.h" /* * Assign PCI INTx interrupts to I/O APIC pins in a round-robin * fashion. Note that we have no idea what the HPET is using, but the * HPET is also programmable whereas this is intended for hardwired * PCI interrupts. * * This assumes a single I/O APIC where pins >= 16 are permitted for * PCI devices. */ static int pci_pins; void ioapic_init(struct vmctx *ctx) { if (vm_ioapic_pincount(ctx, &pci_pins) < 0) { pci_pins = 0; return; } /* Ignore the first 16 pins. */ if (pci_pins <= 16) { pci_pins = 0; return; } pci_pins -= 16; } int ioapic_pci_alloc_irq(struct pci_devinst *pi) { static int last_pin; if (pci_pins == 0) return (-1); - if (lpc_bootrom()) { + if (bootrom_boot()) { /* For external bootrom use fixed mapping. */ return (16 + (4 + pi->pi_slot + pi->pi_lintr.pin) % 8); } return (16 + (last_pin++ % pci_pins)); } diff --git a/usr.sbin/bhyve/amd64/pci_irq.c b/usr.sbin/bhyve/amd64/pci_irq.c index 7e1aee7fbb1d..fea6d9a2591c 100644 --- a/usr.sbin/bhyve/amd64/pci_irq.c +++ b/usr.sbin/bhyve/amd64/pci_irq.c @@ -1,377 +1,378 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2014 Hudson River Trading LLC * Written by: John H. Baldwin * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include #include #include #include #include #include #include #include #include "acpi.h" +#include "bootrom.h" #include "inout.h" #include "ioapic.h" #include "pci_emul.h" #include "pci_irq.h" #include "pci_lpc.h" /* * Implement an 8 pin PCI interrupt router compatible with the router * present on Intel's ICH10 chip. */ /* Fields in each PIRQ register. */ #define PIRQ_DIS 0x80 #define PIRQ_IRQ 0x0f /* Only IRQs 3-7, 9-12, and 14-15 are permitted. */ #define PERMITTED_IRQS 0xdef8 #define IRQ_PERMITTED(irq) (((1U << (irq)) & PERMITTED_IRQS) != 0) /* IRQ count to disable an IRQ. */ #define IRQ_DISABLED 0xff #define NPIRQS 8 static struct pirq { uint8_t reg; int use_count; int active_count; pthread_mutex_t lock; } pirqs[NPIRQS]; #define NIRQ_COUNTS 16 static u_char irq_counts[NIRQ_COUNTS]; static int pirq_cold = 1; /* * Returns true if this pin is enabled with a valid IRQ. Setting the * register to a reserved IRQ causes interrupts to not be asserted as * if the pin was disabled. */ static bool pirq_valid_irq(int reg) { if (reg & PIRQ_DIS) return (false); return (IRQ_PERMITTED(reg & PIRQ_IRQ)); } uint8_t pirq_read(int pin) { assert(pin > 0 && pin <= NPIRQS); return (pirqs[pin - 1].reg); } void pirq_write(struct vmctx *ctx, int pin, uint8_t val) { struct pirq *pirq; assert(pin > 0 && pin <= NPIRQS); pirq = &pirqs[pin - 1]; pthread_mutex_lock(&pirq->lock); if (pirq->reg != (val & (PIRQ_DIS | PIRQ_IRQ))) { if (pirq->active_count != 0 && pirq_valid_irq(pirq->reg)) vm_isa_deassert_irq(ctx, pirq->reg & PIRQ_IRQ, -1); pirq->reg = val & (PIRQ_DIS | PIRQ_IRQ); if (pirq->active_count != 0 && pirq_valid_irq(pirq->reg)) vm_isa_assert_irq(ctx, pirq->reg & PIRQ_IRQ, -1); } pthread_mutex_unlock(&pirq->lock); } void pci_irq_reserve(int irq) { assert(irq >= 0 && irq < NIRQ_COUNTS); assert(pirq_cold); assert(irq_counts[irq] == 0 || irq_counts[irq] == IRQ_DISABLED); irq_counts[irq] = IRQ_DISABLED; } void pci_irq_use(int irq) { assert(irq >= 0 && irq < NIRQ_COUNTS); assert(pirq_cold); assert(irq_counts[irq] != IRQ_DISABLED); irq_counts[irq]++; } void pci_irq_init(struct vmctx *ctx __unused) { int i; for (i = 0; i < NPIRQS; i++) { pirqs[i].reg = PIRQ_DIS; pirqs[i].use_count = 0; pirqs[i].active_count = 0; pthread_mutex_init(&pirqs[i].lock, NULL); } for (i = 0; i < NIRQ_COUNTS; i++) { if (IRQ_PERMITTED(i)) irq_counts[i] = 0; else irq_counts[i] = IRQ_DISABLED; } } void pci_irq_assert(struct pci_devinst *pi) { struct pirq *pirq; int pin; pin = pi->pi_lintr.irq.pirq_pin; if (pin > 0) { assert(pin <= NPIRQS); pirq = &pirqs[pin - 1]; pthread_mutex_lock(&pirq->lock); pirq->active_count++; if (pirq->active_count == 1 && pirq_valid_irq(pirq->reg)) { vm_isa_assert_irq(pi->pi_vmctx, pirq->reg & PIRQ_IRQ, pi->pi_lintr.irq.ioapic_irq); pthread_mutex_unlock(&pirq->lock); return; } pthread_mutex_unlock(&pirq->lock); } vm_ioapic_assert_irq(pi->pi_vmctx, pi->pi_lintr.irq.ioapic_irq); } void pci_irq_deassert(struct pci_devinst *pi) { struct pirq *pirq; int pin; pin = pi->pi_lintr.irq.pirq_pin; if (pin > 0) { assert(pin <= NPIRQS); pirq = &pirqs[pin - 1]; pthread_mutex_lock(&pirq->lock); pirq->active_count--; if (pirq->active_count == 0 && pirq_valid_irq(pirq->reg)) { vm_isa_deassert_irq(pi->pi_vmctx, pirq->reg & PIRQ_IRQ, pi->pi_lintr.irq.ioapic_irq); pthread_mutex_unlock(&pirq->lock); return; } pthread_mutex_unlock(&pirq->lock); } vm_ioapic_deassert_irq(pi->pi_vmctx, pi->pi_lintr.irq.ioapic_irq); } static int pirq_alloc_pin(struct pci_devinst *pi) { struct vmctx *ctx = pi->pi_vmctx; int best_count, best_irq, best_pin, irq, pin; pirq_cold = 0; - if (lpc_bootrom()) { + if (bootrom_boot()) { /* For external bootrom use fixed mapping. */ best_pin = (4 + pi->pi_slot + pi->pi_lintr.pin) % 8; } else { /* Find the least-used PIRQ pin. */ best_pin = 0; best_count = pirqs[0].use_count; for (pin = 1; pin < NPIRQS; pin++) { if (pirqs[pin].use_count < best_count) { best_pin = pin; best_count = pirqs[pin].use_count; } } } pirqs[best_pin].use_count++; /* Second, route this pin to an IRQ. */ if (pirqs[best_pin].reg == PIRQ_DIS) { best_irq = -1; best_count = 0; for (irq = 0; irq < NIRQ_COUNTS; irq++) { if (irq_counts[irq] == IRQ_DISABLED) continue; if (best_irq == -1 || irq_counts[irq] < best_count) { best_irq = irq; best_count = irq_counts[irq]; } } assert(best_irq >= 0); irq_counts[best_irq]++; pirqs[best_pin].reg = best_irq; vm_isa_set_irq_trigger(ctx, best_irq, LEVEL_TRIGGER); } return (best_pin + 1); } int pirq_irq(int pin) { assert(pin > 0 && pin <= NPIRQS); return (pirqs[pin - 1].reg & PIRQ_IRQ); } void pci_irq_route(struct pci_devinst *pi, struct pci_irq *irq) { /* * Attempt to allocate an I/O APIC pin for this intpin if one * is not yet assigned. */ if (irq->ioapic_irq == 0) irq->ioapic_irq = ioapic_pci_alloc_irq(pi); assert(irq->ioapic_irq > 0); /* * Attempt to allocate a PIRQ pin for this intpin if one is * not yet assigned. */ if (irq->pirq_pin == 0) irq->pirq_pin = pirq_alloc_pin(pi); assert(irq->pirq_pin > 0); } /* XXX: Generate $PIR table. */ static void pirq_dsdt(void) { char *irq_prs, *old; int irq, pin; irq_prs = NULL; for (irq = 0; irq < NIRQ_COUNTS; irq++) { if (!IRQ_PERMITTED(irq)) continue; if (irq_prs == NULL) asprintf(&irq_prs, "%d", irq); else { old = irq_prs; asprintf(&irq_prs, "%s,%d", old, irq); free(old); } } /* * A helper method to validate a link register's value. This * duplicates pirq_valid_irq(). */ dsdt_line(""); dsdt_line("Method (PIRV, 1, NotSerialized)"); dsdt_line("{"); dsdt_line(" If (And (Arg0, 0x%02X))", PIRQ_DIS); dsdt_line(" {"); dsdt_line(" Return (0x00)"); dsdt_line(" }"); dsdt_line(" And (Arg0, 0x%02X, Local0)", PIRQ_IRQ); dsdt_line(" If (LLess (Local0, 0x03))"); dsdt_line(" {"); dsdt_line(" Return (0x00)"); dsdt_line(" }"); dsdt_line(" If (LEqual (Local0, 0x08))"); dsdt_line(" {"); dsdt_line(" Return (0x00)"); dsdt_line(" }"); dsdt_line(" If (LEqual (Local0, 0x0D))"); dsdt_line(" {"); dsdt_line(" Return (0x00)"); dsdt_line(" }"); dsdt_line(" Return (0x01)"); dsdt_line("}"); for (pin = 0; pin < NPIRQS; pin++) { dsdt_line(""); dsdt_line("Device (LNK%c)", 'A' + pin); dsdt_line("{"); dsdt_line(" Name (_HID, EisaId (\"PNP0C0F\"))"); dsdt_line(" Name (_UID, 0x%02X)", pin + 1); dsdt_line(" Method (_STA, 0, NotSerialized)"); dsdt_line(" {"); dsdt_line(" If (PIRV (PIR%c))", 'A' + pin); dsdt_line(" {"); dsdt_line(" Return (0x0B)"); dsdt_line(" }"); dsdt_line(" Else"); dsdt_line(" {"); dsdt_line(" Return (0x09)"); dsdt_line(" }"); dsdt_line(" }"); dsdt_line(" Name (_PRS, ResourceTemplate ()"); dsdt_line(" {"); dsdt_line(" IRQ (Level, ActiveLow, Shared, )"); dsdt_line(" {%s}", irq_prs); dsdt_line(" })"); dsdt_line(" Name (CB%02X, ResourceTemplate ()", pin + 1); dsdt_line(" {"); dsdt_line(" IRQ (Level, ActiveLow, Shared, )"); dsdt_line(" {}"); dsdt_line(" })"); dsdt_line(" CreateWordField (CB%02X, 0x01, CIR%c)", pin + 1, 'A' + pin); dsdt_line(" Method (_CRS, 0, NotSerialized)"); dsdt_line(" {"); dsdt_line(" And (PIR%c, 0x%02X, Local0)", 'A' + pin, PIRQ_DIS | PIRQ_IRQ); dsdt_line(" If (PIRV (Local0))"); dsdt_line(" {"); dsdt_line(" ShiftLeft (0x01, Local0, CIR%c)", 'A' + pin); dsdt_line(" }"); dsdt_line(" Else"); dsdt_line(" {"); dsdt_line(" Store (0x00, CIR%c)", 'A' + pin); dsdt_line(" }"); dsdt_line(" Return (CB%02X)", pin + 1); dsdt_line(" }"); dsdt_line(" Method (_DIS, 0, NotSerialized)"); dsdt_line(" {"); dsdt_line(" Store (0x80, PIR%c)", 'A' + pin); dsdt_line(" }"); dsdt_line(" Method (_SRS, 1, NotSerialized)"); dsdt_line(" {"); dsdt_line(" CreateWordField (Arg0, 0x01, SIR%c)", 'A' + pin); dsdt_line(" FindSetRightBit (SIR%c, Local0)", 'A' + pin); dsdt_line(" Store (Decrement (Local0), PIR%c)", 'A' + pin); dsdt_line(" }"); dsdt_line("}"); } free(irq_prs); } LPC_DSDT(pirq_dsdt); diff --git a/usr.sbin/bhyve/amd64/pci_lpc.c b/usr.sbin/bhyve/amd64/pci_lpc.c index 57d2333edcc6..ed41a800a2ea 100644 --- a/usr.sbin/bhyve/amd64/pci_lpc.c +++ b/usr.sbin/bhyve/amd64/pci_lpc.c @@ -1,620 +1,605 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2013 Neel Natu * Copyright (c) 2013 Tycho Nightingale * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include #include #include #include #include #include #include #include #include "acpi.h" #include "debug.h" #include "bootrom.h" #include "config.h" #include "inout.h" #include "pci_emul.h" #include "pci_irq.h" #include "pci_lpc.h" #include "pci_passthru.h" #include "pctestdev.h" #include "tpm_device.h" #include "uart_emul.h" #define IO_ICU1 0x20 #define IO_ICU2 0xA0 SET_DECLARE(lpc_dsdt_set, struct lpc_dsdt); SET_DECLARE(lpc_sysres_set, struct lpc_sysres); #define ELCR_PORT 0x4d0 SYSRES_IO(ELCR_PORT, 2); #define IO_TIMER1_PORT 0x40 #define NMISC_PORT 0x61 SYSRES_IO(NMISC_PORT, 1); static struct pci_devinst *lpc_bridge; #define LPC_UART_NUM 4 static struct lpc_uart_softc { struct uart_ns16550_softc *uart_softc; int iobase; int irq; int enabled; } lpc_uart_softc[LPC_UART_NUM]; static const char *lpc_uart_names[LPC_UART_NUM] = { "com1", "com2", "com3", "com4" }; static const char *lpc_uart_acpi_names[LPC_UART_NUM] = { "COM1", "COM2", "COM3", "COM4" }; /* * LPC device configuration is in the following form: * [,] * For e.g. "com1,stdio" or "bootrom,/var/romfile" */ int lpc_device_parse(const char *opts) { int unit, error; char *str, *cpy, *lpcdev, *node_name; const char *romfile, *varfile, *tpm_type, *tpm_path; error = -1; str = cpy = strdup(opts); lpcdev = strsep(&str, ","); if (lpcdev != NULL) { if (strcasecmp(lpcdev, "bootrom") == 0) { romfile = strsep(&str, ","); if (romfile == NULL) { errx(4, "invalid bootrom option \"%s\"", opts); } - set_config_value("lpc.bootrom", romfile); + set_config_value("bootrom", romfile); varfile = strsep(&str, ","); if (varfile == NULL) { error = 0; goto done; } if (strchr(varfile, '=') == NULL) { - set_config_value("lpc.bootvars", varfile); + set_config_value("bootvars", varfile); } else { /* varfile doesn't exist, it's another config * option */ pci_parse_legacy_config(find_config_node("lpc"), varfile); } pci_parse_legacy_config(find_config_node("lpc"), str); error = 0; goto done; } if (strcasecmp(lpcdev, "tpm") == 0) { nvlist_t *nvl = create_config_node("tpm"); tpm_type = strsep(&str, ","); if (tpm_type == NULL) { errx(4, "invalid tpm type \"%s\"", opts); } set_config_value_node(nvl, "type", tpm_type); tpm_path = strsep(&str, ","); if (tpm_path == NULL) { errx(4, "invalid tpm path \"%s\"", opts); } set_config_value_node(nvl, "path", tpm_path); pci_parse_legacy_config(find_config_node("tpm"), str); set_config_value_node_if_unset(nvl, "version", "2.0"); error = 0; goto done; } for (unit = 0; unit < LPC_UART_NUM; unit++) { if (strcasecmp(lpcdev, lpc_uart_names[unit]) == 0) { asprintf(&node_name, "lpc.%s.path", lpc_uart_names[unit]); set_config_value(node_name, str); free(node_name); error = 0; goto done; } } if (strcasecmp(lpcdev, pctestdev_getname()) == 0) { asprintf(&node_name, "lpc.%s", pctestdev_getname()); set_config_bool(node_name, true); free(node_name); error = 0; goto done; } } done: free(cpy); return (error); } void lpc_print_supported_devices(void) { size_t i; printf("bootrom\n"); for (i = 0; i < LPC_UART_NUM; i++) printf("%s\n", lpc_uart_names[i]); printf("tpm\n"); printf("%s\n", pctestdev_getname()); } -const char * -lpc_bootrom(void) -{ - - return (get_config_value("lpc.bootrom")); -} - const char * lpc_fwcfg(void) { return (get_config_value("lpc.fwcfg")); } static void lpc_uart_intr_assert(void *arg) { struct lpc_uart_softc *sc = arg; assert(sc->irq >= 0); vm_isa_pulse_irq(lpc_bridge->pi_vmctx, sc->irq, sc->irq); } static void lpc_uart_intr_deassert(void *arg __unused) { /* * The COM devices on the LPC bus generate edge triggered interrupts, * so nothing more to do here. */ } static int lpc_uart_io_handler(struct vmctx *ctx __unused, int in, int port, int bytes, uint32_t *eax, void *arg) { int offset; struct lpc_uart_softc *sc = arg; offset = port - sc->iobase; switch (bytes) { case 1: if (in) *eax = uart_ns16550_read(sc->uart_softc, offset); else uart_ns16550_write(sc->uart_softc, offset, *eax); break; case 2: if (in) { *eax = uart_ns16550_read(sc->uart_softc, offset); *eax |= uart_ns16550_read(sc->uart_softc, offset + 1) << 8; } else { uart_ns16550_write(sc->uart_softc, offset, *eax); uart_ns16550_write(sc->uart_softc, offset + 1, *eax >> 8); } break; default: return (-1); } return (0); } static int lpc_init(struct vmctx *ctx) { struct lpc_uart_softc *sc; struct inout_port iop; const char *backend, *name; char *node_name; int unit, error; - const nvlist_t *nvl; - - nvl = find_config_node("lpc"); - if (nvl != NULL && nvlist_exists(nvl, "bootrom")) { - error = bootrom_loadrom(ctx, nvl); - if (error) - return (error); - } /* COM1 and COM2 */ for (unit = 0; unit < LPC_UART_NUM; unit++) { sc = &lpc_uart_softc[unit]; name = lpc_uart_names[unit]; if (uart_legacy_alloc(unit, &sc->iobase, &sc->irq) != 0) { EPRINTLN("Unable to allocate resources for " "LPC device %s", name); return (-1); } pci_irq_reserve(sc->irq); sc->uart_softc = uart_ns16550_init(lpc_uart_intr_assert, lpc_uart_intr_deassert, sc); asprintf(&node_name, "lpc.%s.path", name); backend = get_config_value(node_name); free(node_name); if (backend != NULL && uart_ns16550_tty_open(sc->uart_softc, backend) != 0) { EPRINTLN("Unable to initialize backend '%s' " "for LPC device %s", backend, name); return (-1); } bzero(&iop, sizeof(struct inout_port)); iop.name = name; iop.port = sc->iobase; iop.size = UART_NS16550_IO_BAR_SIZE; iop.flags = IOPORT_F_INOUT; iop.handler = lpc_uart_io_handler; iop.arg = sc; error = register_inout(&iop); assert(error == 0); sc->enabled = 1; } /* pc-testdev */ asprintf(&node_name, "lpc.%s", pctestdev_getname()); if (get_config_bool_default(node_name, false)) { error = pctestdev_init(ctx); if (error) return (error); } free(node_name); return (0); } static void pci_lpc_write_dsdt(struct pci_devinst *pi) { struct lpc_dsdt **ldpp, *ldp; dsdt_line(""); dsdt_line("Device (ISA)"); dsdt_line("{"); dsdt_line(" Name (_ADR, 0x%04X%04X)", pi->pi_slot, pi->pi_func); dsdt_line(" OperationRegion (LPCR, PCI_Config, 0x00, 0x100)"); dsdt_line(" Field (LPCR, AnyAcc, NoLock, Preserve)"); dsdt_line(" {"); dsdt_line(" Offset (0x60),"); dsdt_line(" PIRA, 8,"); dsdt_line(" PIRB, 8,"); dsdt_line(" PIRC, 8,"); dsdt_line(" PIRD, 8,"); dsdt_line(" Offset (0x68),"); dsdt_line(" PIRE, 8,"); dsdt_line(" PIRF, 8,"); dsdt_line(" PIRG, 8,"); dsdt_line(" PIRH, 8"); dsdt_line(" }"); dsdt_line(""); dsdt_indent(1); SET_FOREACH(ldpp, lpc_dsdt_set) { ldp = *ldpp; ldp->handler(); } dsdt_line(""); dsdt_line("Device (PIC)"); dsdt_line("{"); dsdt_line(" Name (_HID, EisaId (\"PNP0000\"))"); dsdt_line(" Name (_CRS, ResourceTemplate ()"); dsdt_line(" {"); dsdt_indent(2); dsdt_fixed_ioport(IO_ICU1, 2); dsdt_fixed_ioport(IO_ICU2, 2); dsdt_fixed_irq(2); dsdt_unindent(2); dsdt_line(" })"); dsdt_line("}"); dsdt_line(""); dsdt_line("Device (TIMR)"); dsdt_line("{"); dsdt_line(" Name (_HID, EisaId (\"PNP0100\"))"); dsdt_line(" Name (_CRS, ResourceTemplate ()"); dsdt_line(" {"); dsdt_indent(2); dsdt_fixed_ioport(IO_TIMER1_PORT, 4); dsdt_fixed_irq(0); dsdt_unindent(2); dsdt_line(" })"); dsdt_line("}"); dsdt_unindent(1); dsdt_line("}"); } static void pci_lpc_sysres_dsdt(void) { struct lpc_sysres **lspp, *lsp; dsdt_line(""); dsdt_line("Device (SIO)"); dsdt_line("{"); dsdt_line(" Name (_HID, EisaId (\"PNP0C02\"))"); dsdt_line(" Name (_CRS, ResourceTemplate ()"); dsdt_line(" {"); dsdt_indent(2); SET_FOREACH(lspp, lpc_sysres_set) { lsp = *lspp; switch (lsp->type) { case LPC_SYSRES_IO: dsdt_fixed_ioport(lsp->base, lsp->length); break; case LPC_SYSRES_MEM: dsdt_fixed_mem32(lsp->base, lsp->length); break; } } dsdt_unindent(2); dsdt_line(" })"); dsdt_line("}"); } LPC_DSDT(pci_lpc_sysres_dsdt); static void pci_lpc_uart_dsdt(void) { struct lpc_uart_softc *sc; int unit; for (unit = 0; unit < LPC_UART_NUM; unit++) { sc = &lpc_uart_softc[unit]; if (!sc->enabled) continue; dsdt_line(""); dsdt_line("Device (%s)", lpc_uart_acpi_names[unit]); dsdt_line("{"); dsdt_line(" Name (_HID, EisaId (\"PNP0501\"))"); dsdt_line(" Name (_UID, %d)", unit + 1); dsdt_line(" Name (_CRS, ResourceTemplate ()"); dsdt_line(" {"); dsdt_indent(2); dsdt_fixed_ioport(sc->iobase, UART_NS16550_IO_BAR_SIZE); dsdt_fixed_irq(sc->irq); dsdt_unindent(2); dsdt_line(" })"); dsdt_line("}"); } } LPC_DSDT(pci_lpc_uart_dsdt); static int pci_lpc_cfgwrite(struct pci_devinst *pi, int coff, int bytes, uint32_t val) { int pirq_pin; if (bytes == 1) { pirq_pin = 0; if (coff >= 0x60 && coff <= 0x63) pirq_pin = coff - 0x60 + 1; if (coff >= 0x68 && coff <= 0x6b) pirq_pin = coff - 0x68 + 5; if (pirq_pin != 0) { pirq_write(pi->pi_vmctx, pirq_pin, val); pci_set_cfgdata8(pi, coff, pirq_read(pirq_pin)); return (0); } } return (-1); } static void pci_lpc_write(struct pci_devinst *pi __unused, int baridx __unused, uint64_t offset __unused, int size __unused, uint64_t value __unused) { } static uint64_t pci_lpc_read(struct pci_devinst *pi __unused, int baridx __unused, uint64_t offset __unused, int size __unused) { return (0); } #define LPC_DEV 0x7000 #define LPC_VENDOR 0x8086 #define LPC_REVID 0x00 #define LPC_SUBVEND_0 0x0000 #define LPC_SUBDEV_0 0x0000 static int pci_lpc_get_sel(struct pcisel *const sel) { assert(sel != NULL); memset(sel, 0, sizeof(*sel)); for (uint8_t slot = 0; slot <= PCI_SLOTMAX; ++slot) { uint8_t max_func = 0; sel->pc_dev = slot; sel->pc_func = 0; if (pci_host_read_config(sel, PCIR_HDRTYPE, 1) & PCIM_MFDEV) max_func = PCI_FUNCMAX; for (uint8_t func = 0; func <= max_func; ++func) { sel->pc_func = func; if (pci_host_read_config(sel, PCIR_CLASS, 1) == PCIC_BRIDGE && pci_host_read_config(sel, PCIR_SUBCLASS, 1) == PCIS_BRIDGE_ISA) { return (0); } } } warnx("%s: Unable to find host selector of LPC bridge.", __func__); return (-1); } static int pci_lpc_init(struct pci_devinst *pi, nvlist_t *nvl) { struct pcisel sel = { 0 }; struct pcisel *selp = NULL; uint16_t device, subdevice, subvendor, vendor; uint8_t revid; /* * Do not allow more than one LPC bridge to be configured. */ if (lpc_bridge != NULL) { EPRINTLN("Only one LPC bridge is allowed."); return (-1); } /* * Enforce that the LPC can only be configured on bus 0. This * simplifies the ACPI DSDT because it can provide a decode for * all legacy i/o ports behind bus 0. */ if (pi->pi_bus != 0) { EPRINTLN("LPC bridge can be present only on bus 0."); return (-1); } if (lpc_init(pi->pi_vmctx) != 0) return (-1); if (pci_lpc_get_sel(&sel) == 0) selp = &sel; vendor = pci_config_read_reg(selp, nvl, PCIR_VENDOR, 2, LPC_VENDOR); device = pci_config_read_reg(selp, nvl, PCIR_DEVICE, 2, LPC_DEV); revid = pci_config_read_reg(selp, nvl, PCIR_REVID, 1, LPC_REVID); subvendor = pci_config_read_reg(selp, nvl, PCIR_SUBVEND_0, 2, LPC_SUBVEND_0); subdevice = pci_config_read_reg(selp, nvl, PCIR_SUBDEV_0, 2, LPC_SUBDEV_0); /* initialize config space */ pci_set_cfgdata16(pi, PCIR_VENDOR, vendor); pci_set_cfgdata16(pi, PCIR_DEVICE, device); pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_BRIDGE); pci_set_cfgdata8(pi, PCIR_SUBCLASS, PCIS_BRIDGE_ISA); pci_set_cfgdata8(pi, PCIR_REVID, revid); pci_set_cfgdata16(pi, PCIR_SUBVEND_0, subvendor); pci_set_cfgdata16(pi, PCIR_SUBDEV_0, subdevice); lpc_bridge = pi; return (0); } char * lpc_pirq_name(int pin) { char *name; if (lpc_bridge == NULL) return (NULL); asprintf(&name, "\\_SB.PC00.ISA.LNK%c,", 'A' + pin - 1); return (name); } void lpc_pirq_routed(void) { int pin; if (lpc_bridge == NULL) return; for (pin = 0; pin < 4; pin++) pci_set_cfgdata8(lpc_bridge, 0x60 + pin, pirq_read(pin + 1)); for (pin = 0; pin < 4; pin++) pci_set_cfgdata8(lpc_bridge, 0x68 + pin, pirq_read(pin + 5)); } #ifdef BHYVE_SNAPSHOT static int pci_lpc_snapshot(struct vm_snapshot_meta *meta) { int unit, ret; struct uart_ns16550_softc *sc; for (unit = 0; unit < LPC_UART_NUM; unit++) { sc = lpc_uart_softc[unit].uart_softc; ret = uart_ns16550_snapshot(sc, meta); if (ret != 0) goto done; } done: return (ret); } #endif static const struct pci_devemu pci_de_lpc = { .pe_emu = "lpc", .pe_init = pci_lpc_init, .pe_write_dsdt = pci_lpc_write_dsdt, .pe_cfgwrite = pci_lpc_cfgwrite, .pe_barwrite = pci_lpc_write, .pe_barread = pci_lpc_read, #ifdef BHYVE_SNAPSHOT .pe_snapshot = pci_lpc_snapshot, #endif }; PCI_EMUL_SET(pci_de_lpc); diff --git a/usr.sbin/bhyve/amd64/pci_lpc.h b/usr.sbin/bhyve/amd64/pci_lpc.h index 2dca8f7bec24..402eae082545 100644 --- a/usr.sbin/bhyve/amd64/pci_lpc.h +++ b/usr.sbin/bhyve/amd64/pci_lpc.h @@ -1,75 +1,74 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2013 Neel Natu * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #ifndef _LPC_H_ #define _LPC_H_ #include typedef void (*lpc_write_dsdt_t)(void); struct lpc_dsdt { lpc_write_dsdt_t handler; }; #define LPC_DSDT(handler) \ static struct lpc_dsdt __CONCAT(__lpc_dsdt, __LINE__) = { \ (handler), \ }; \ DATA_SET(lpc_dsdt_set, __CONCAT(__lpc_dsdt, __LINE__)) enum lpc_sysres_type { LPC_SYSRES_IO, LPC_SYSRES_MEM }; struct lpc_sysres { enum lpc_sysres_type type; uint32_t base; uint32_t length; }; #define LPC_SYSRES(type, base, length) \ static struct lpc_sysres __CONCAT(__lpc_sysres, __LINE__) = { \ (type), \ (base), \ (length) \ }; \ DATA_SET(lpc_sysres_set, __CONCAT(__lpc_sysres, __LINE__)) #define SYSRES_IO(base, length) LPC_SYSRES(LPC_SYSRES_IO, base, length) #define SYSRES_MEM(base, length) LPC_SYSRES(LPC_SYSRES_MEM, base, length) int lpc_device_parse(const char *opt); void lpc_print_supported_devices(void); char *lpc_pirq_name(int pin); void lpc_pirq_routed(void); -const char *lpc_bootrom(void); const char *lpc_fwcfg(void); #endif diff --git a/usr.sbin/bhyve/bhyve_config.5 b/usr.sbin/bhyve/bhyve_config.5 index d0e5c8ae47d3..ebbb206cca9f 100644 --- a/usr.sbin/bhyve/bhyve_config.5 +++ b/usr.sbin/bhyve/bhyve_config.5 @@ -1,737 +1,737 @@ .\" SPDX-License-Identifier: BSD-2-Clause .\" .\" Copyright (c) 2021 John H. Baldwin .\" .\" Redistribution and use in source and binary forms, with or without .\" modification, are permitted provided that the following conditions .\" are met: .\" 1. Redistributions of source code must retain the above copyright .\" notice, this list of conditions and the following disclaimer. .\" 2. Redistributions in binary form must reproduce the above copyright .\" notice, this list of conditions and the following disclaimer in the .\" documentation and/or other materials provided with the distribution. .\" .\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE .\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" -.Dd November 20, 2023 +.Dd August 13, 2024 .Dt BHYVE_CONFIG 5 .Os .Sh NAME .Nm bhyve_config .Nd "bhyve configuration variables" .Sh DESCRIPTION .Xr bhyve 8 uses a hierarchical tree of configuration variables to describe global and per-device settings. Internal nodes in this tree do not have a value, only leaf nodes have values. This manual describes the configuration variables understood by .Xr bhyve 8 . If additional variables are defined, .Xr bhyve 8 will ignore them and will not emit errors for unknown variables. However, these additional variables can be referenced by other variables as described below. .Sh VARIABLE VALUES Configuration variable values are stored as strings. A configuration variable value may refer to one or more other configuration values by name. Instances of the pattern .Sq % Ns Pq Ar var are replaced by the value of the configuration variable .Va var . To avoid unwanted expansion, .Sq % characters can be escaped by a leading .Sq % . For example, if a configuration variable .Va disk uses the value .Pa /dev/zvol/bhyve/%(name) , then the final value of the .Va disk variable will be set to the path of a ZFS volume whose name matches the name of the virtual machine on the pool .Pa bhyve . .Pp Some configuration variables may be interpreted as a boolean value. For those variables the following case-insensitive values may be used to indicate true: .Pp .Bl -bullet -offset indent -compact .It true .It on .It yes .It 1 .El .Pp The following values may be used to indicate false: .Pp .Bl -bullet -offset indent -compact .It false .It off .It no .It 0 .El .Pp Some configuration variables may be interperted as an integer. For those variables, any syntax supported by .Xr strtol 3 may be used. .Sh GLOBAL SETTINGS .Ss Architecture Neutral Settings .Bl -column "memory.guest_in_core" "integer" "Default" .It Sy Name Ta Sy Format Ta Sy Default Ta Sy Description .It Va name Ta string Ta Ta The name of the VM. .It Va cpus Ta integer Ta 1 Ta The total number of virtual CPUs. .It Va cores Ta integer Ta 1 Ta The number of virtual cores in each virtual socket. .It Va threads Ta integer Ta 1 Ta The number of virtual CPUs in each virtual core. .It Va sockets Ta integer Ta 1 Ta The number of virtual sockets. .It Va memory.guest_in_core Ta bool Ta false Ta Include guest memory in core file. .It Va memory.size Ta string Ta 256M Ta Guest physical memory size in bytes. The value must be formatted as described in .Xr expand_number 3 . .It Va memory.wired Ta bool Ta false Ta Wire guest memory. +.It Va bootrom Ta path Ta Ta +Path to a boot ROM. +During initialization of the guest, the contents of this file are copied into +the guest's memory. +If a boot ROM is present, a firmware interface device is +also enabled for use by the boot ROM. +.It Va bootvars Ta path Ta Ta +Path to boot VARS. +The contents of this file are copied beneath the boot ROM. +Firmware can write to it to save variables. +All variables will be persistent even on reboots of the guest. .It Va acpi_tables Ta bool Ta true Ta Generate ACPI tables. .It Va acpi_tables_in_memory Ta bool Ta true Ta .Xr bhyve 8 always exposes ACPI tables by FwCfg. For backward compatibility bhyve copies them into the guest memory as well. This can cause problems if the guest uses the in-memory version, since certain advanced features, such as TPM emulation, are exposed only via FwCfg. Therefore, it is recommended to set this flag to false when running Windows guests. .It Va destroy_on_poweroff Ta bool Ta false Ta Destroy the VM on guest-initiated power-off. .It Va gdb.address Ta string Ta localhost Ta Hostname, IP address, or IPv6 address for the debug server. .It Va gdb.port Ta integer Ta 0 Ta TCP port number for the debug server. If this is set to a non-zero value, a debug server will listen for connections on this port. .It Va gdb.wait Ta bool Ta false Ta If the debug server is enabled, wait for a debugger to connect before starting the guest. .It Va keyboard.layout Ta string Ta Ta Specify the keyboard layout name with the file name in .Ar /usr/share/bhyve/kbdlayout . This value only works when loaded with UEFI mode for VNC, and used a VNC client that don't support QEMU Extended Key Event Message (e.g. TightVNC). .It Va tpm.path Ta string Ta Ta Path to the host TPM device. This is typically /dev/tpm0. .It Va tpm.type Ta string Ta Ta Type of the TPM device passed to the guest. Currently, only "passthru" is supported. .It Va tpm.version Ta string Ta 2.0 Ta Version of the TPM device according to the TCG specification. Currently, only version 2.0 is supported. .It Va rtc.use_localtime Ta bool Ta true Ta The real time clock uses the local time of the host. If this is set to false, the real time clock uses UTC. .It Va uuid Ta string Ta Ta The universally unique identifier (UUID) to use in the guest's System Management BIOS System Information structure. If an explicit value is not set, a valid UUID is generated from the host's hostname and the VM name. .It Va virtio_msix Ta bool Ta true Ta Use MSI-X interrupts for PCI VirtIO devices. If set to false, MSI interrupts are used instead. .It Va config.dump Ta bool Ta false Ta If this value is set to true after .Xr bhyve 8 has finished parsing command line options, then .Xr bhyve 8 will write all of its configuration variables to stdout and exit. No VM will be started. .It Va bios.vendor Ta string Ta BHYVE Ta This value is used for the guest's System Management BIOS System Information structure. .It Va bios.version Ta string Ta 14.0 Ta This value is used for the guest's System Management BIOS System Information structure. .It Va bios.release_date Ta string Ta 10/17/2021 Ta This value is used for the guest's System Management BIOS System Information structure. .It Va system.family_name Ta string Ta Virtual Machine Ta Family the computer belongs to. This value is used for the guest's System Management BIOS System Information structure. .It Va system.manufacturer Ta string Ta FreeBSD Ta This value is used for the guest's System Management BIOS System Information structure. .It Va system.product_name Ta string Ta BHYVE Ta This value is used for the guest's System Management BIOS System Information structure. .It Va system.serial_number Ta string Ta None Ta This value is used for the guest's System Management BIOS System Information structure. .It Va system.sku Ta string Ta None Ta Stock keeping unit of the computer. It's also called product ID or purchase order number. This value is used for the guest's System Management BIOS System Information structure. .It Va system.version Ta string Ta 1.0 Ta This value is used for the guest's System Management BIOS System Information structure. .It Va board.manufacturer Ta string Ta FreeBSD Ta This value is used for the guest's System Management BIOS System Information structure. .It Va board.product_name Ta string Ta BHYVE Ta This value is used for the guest's System Management BIOS System Information structure. .It Va board.version Ta string Ta 1.0 Ta This value is used for the guest's System Management BIOS System Information structure. .It Va board.serial_number Ta string Ta None Ta This value is used for the guest's System Management BIOS System Information structure. .It Va board.asset_tag Ta string Ta None Ta This value is used for the guest's System Management BIOS System Information structure. .It Va board.location Ta string Ta None Ta Describes the board's location within the chassis. This value is used for the guest's System Management BIOS System Information structure. .It Va chassis.manufacturer Ta string Ta FreeBSD Ta This value is used for the guest's System Management BIOS System Information structure. .It Va chassis.version Ta string Ta 1.0 Ta This value is used for the guest's System Management BIOS System Information structure. .It Va chassis.serial_number Ta string Ta None Ta This value is used for the guest's System Management BIOS System Information structure. .It Va chassis.asset_tag Ta string Ta None Ta This value is used for the guest's System Management BIOS System Information structure. .It Va chassis.sku Ta string Ta None Ta Stock keeping unit of the chassis. It's also called product ID or purchase order number. This value is used for the guest's System Management BIOS System Information structure. .El .Ss x86-Specific Settings .Bl -column "x86.vmexit_on_pause" "integer" "Default" .It Sy Name Ta Sy Format Ta Sy Default Ta Sy Description .It Va x86.mptable Ta bool Ta true Ta Generate an MPTable. .It Va x86.x2apic Ta bool Ta false Ta Configure guest's local APICs in x2APIC mode. .It Va x86.strictio Ta bool Ta false Ta Exit if a guest accesses an I/O port that is not emulated. By default, writes are ignored and reads return all bits set. .It Va x86.strictmsr Ta bool Ta true Ta Inject a general protection fault if a guest accesses a Model Specific Register (MSR) that is not emulated. If this is false, writes are ignored and reads return zero. .It Va x86.vmexit_on_hlt Ta bool Ta false Ta Force a VM exit when a guest CPU executes the .Dv HLT instruction. This allows idle guest CPUs to yield the host CPU. .It Va x86.vmexit_on_pause Ta bool Ta false Ta Force a VM exit when a guest CPU executes the .Dv PAUSE instruction. .El .Sh DEVICE SETTINGS Device settings are stored under a device node. The device node's name is set by the parent bus of the device. .Ss PCI Device Settings PCI devices are described by a device node named .Dq pci . Ns Ar bus . Ns Ar slot . Ns Ar function where each of .Ar bus , .Ar slot , and .Ar function are formatted as decimal values with no padding. All PCI device nodes must contain a configuration variable named .Dq device which specifies the device model to use. The following PCI device models are supported: .Bl -tag -indent .It Li hostbridge Provide a simple PCI-Host bridge device. This is usually configured at pci0:0:0 and is required by most guest operating systems. .It Li ahci AHCI storage controller. .It Li e1000 Intel e82545 network interface. .It Li fbuf VGA framebuffer device attached to VNC server. .It Li lpc LPC PCI-ISA bridge with COM1-COM4 16550 serial ports, a boot ROM, an optional fwcfg type, and an optional debug/test device. This device must be configured on bus 0. .It Li hda High Definition audio controller. .It Li nvme NVM Express (NVMe) controller. .It Li passthru PCI pass-through device. .It Li uart PCI 16550 serial device. .It Li virtio-9p VirtIO 9p (VirtFS) interface. .It Li virtio-blk VirtIO block storage interface. .It Li virtio-console VirtIO console interface. .It Li virtio-input VirtIO input interface. .It Li virtio-net VirtIO network interface. .It Li virtio-rnd VirtIO RNG interface. .It Li virtio-scsi VirtIO SCSI interface. .It Li xhci Extensible Host Controller Interface (XHCI) USB controller. .El .Ss USB Device Settings USB controller devices contain zero or more child USB devices attached to slots. Each USB device stores its settings in a node named .Dq slot. Ns Va N under the controller's device node. .Va N is the number of the slot to which the USB device is attached. Note that USB slot numbers begin at 1. All USB device nodes must contain a configuration variable named .Dq device which specifies the device model to use. The following USB device models are supported: .Bl -tag -indent .It Li tablet A USB tablet device which provides precise cursor synchronization when using VNC. .El .Ss Block Device Settings Block devices use the following settings to configure their backing store. These settings are stored in the configuration node of the respective device. .Bl -column "sectorsize" "logical[/physical]" "Default" .It Sy Name Ta Sy Format Ta Sy Default Ta Sy Description .It path Ta string Ta Ta The path of the file or disk device to use as the backing store. .It nocache Ta bool Ta false Ta Disable caching on the backing file by opening the backing file with .Dv O_DIRECT . .It nodelete Ta bool Ta false Ta Disable emulation of guest trim requests via .Dv DIOCGDELETE requests. .It sync Ta bool Ta false Ta Write changes to the backing file with synchronous writes. .It direct Ta bool Ta false Ta An alias for .Va sync . .It ro Ta bool Ta false Ta Disable writes to the backing file. .It sectorsize Ta Va logical Ns Op / Ns Va physical Ta Ta Specify the logical and physical sector size of the emulated disk. If the physical size is not specified, it is equal to the logical size. .El .Ss Network Backend Settings Network devices use the following settings to configure their backend. The backend is responsible for passing packets between the device model and a desired destination. Configuring a backend requires setting the .Va backend variable. The type of a backend can either be set explicitly via the .Va type variable or it can be inferred from the value of .Va backend . .Pp The following types of backends are supported: .Bl -tag -width "netgraph" .It tap Use the .Xr tap 4 interface named in .Va backend as the backend. .It netgraph Use a .Xr netgraph 4 socket hook as the backend. This backend uses the following additional variables: .Bl -column "peerhook" "Format" "Default" .It Sy Name Ta Sy Format Ta Sy Default Ta Sy Description .It Va path Ta string Ta Ta The name of the .Xr netgraph 4 destination node. .It Va peerhook Ta string Ta Ta The name of the destination hook. .It Va socket Ta string Ta Ta The name of the created .Xr ng_socket 4 node. .It Va hook Ta string Ta vmlink Ta The name of the source hook on the created .Xr ng_socket 4 node. .El .It netmap Use .Xr netmap 4 either on a network interface or a port on a .Xr vale 4 bridge as the backend. The value of .Va backend is passed to .Xr nm_open to connect to a netmap port. .It slirp Use the slirp backend to provide a userspace network stack. The .Va hostfwd variable is used to configure how packets from the host are translated before being sent to the guest. .Bl -column "peerhook" "Format" "Default" .It Sy Name Ta Sy Format Ta Sy Default Ta Sy Description .It Va hostfwd Ta string Ta Ta A semicolon-separated list of host forwarding rules, each of the form .Ar proto:haddr:hport-gaddr:gport , where .Ar proto is either .Ql tcp or .Ql udp . If the guest address is equal to the empty string, packets will be forwarded to the first DHCP-assigned address in the guest. .El .El .Pp If .Va type is not specified explicitly, then it is inferred from .Va backend based on the following patterns: .Bl -column -offset indent "valuebridge:port" .It Sy Pattern Ta Sy Type .It tap Ns Va N Ta tap .It vmnet Ns Va N Ta tap .It netgraph Ta netgraph .It netmap: Ns Va interface Ta netmap .It vale Ns Va bridge : Ns Va port Ta netmap .El .Ss UART Device Settings .Bl -column "Name" "Format" "Default" .It Sy Name Ta Sy Format Ta Sy Default Ta Sy Description .It Va path Ta path Ta Ta Backend device for the serial port. Either the pathname of a character device or .Dq stdio to use standard input and output of the .Xr bhyve 8 process. .El .Ss Host Bridge Settings .Bl -column "pcireg.*" "integer" "Default" .It Sy Name Ta Sy Format Ta Sy Default Ta Sy Description .It Va pcireg.* Ta integer Ta Ta Values of PCI register. .Bl -column "device" "Default" .It Sy Name Ta Sy Default .It Va vendor Ta integer Ta 0x1275 Ta .It Va device Ta integer Ta 0x1275 Ta .El .El .Ss AHCI Controller Settings AHCI controller devices contain zero or more ports each of which provides a storage device. Each port stores its settings in a node named .Dq port. Ns Va N under the controller's device node. The .Va N values are formatted as successive decimal values starting with 0. In addition to the block device settings described above, each port supports the following settings: .Bl -column "model" "integer" "generated" .It Sy Name Ta Sy Format Ta Sy Default Ta Sy Description .It Va type Ta string Ta Ta The type of storage device to emulate. Must be set to either .Dq cd or .Dq hd . .It Va nmrr Ta integer Ta 0 Ta Nominal Media Rotation Rate, also known as RPM. A value 1 of indicates a device with no rate such as a Solid State Disk. .It Va ser Ta string Ta generated Ta Serial number of up to twenty characters. A default serial number is generated using a hash of the backing store's pathname. .It Va rev Ta string Ta 001 Ta Revision number of up to eight characters. .It Va model Ta string Ta Ta Model number of up to forty characters. Separate default model strings are used for .Dq cd and .Dq hd device types. .El .Ss e1000 Settings In addition to the network backend settings, Intel e82545 network interfaces support the following variables: .Bl -column "Name" "MAC address" "generated" .It Sy Name Ta Sy Format Ta Sy Default Ta Sy Description .It Va mac Ta MAC address Ta generated Ta MAC address. If an explicit address is not provided, a MAC address is generated from a hash of the device's PCI address. .El .Ss Frame Buffer Settings .Bl -column "password" "[IP:]port" "127.0.0.1:5900" .It Sy Name Ta Sy Format Ta Sy Default Ta Sy Description .It Va wait Ta bool Ta false Ta Wait for a remote connection before starting the VM. .It Va rfb Ta Oo Ar IP Ns : Oc Ns Ar port Ta 127.0.0.1:5900 Ta TCP address to listen on for remote connections. The IP address must be given as a numeric address. IPv6 addresses must be enclosed in square brackets and support scoped identifiers as described in .Xr getaddrinfo 3 . A bare port number may be given in which case the IPv4 localhost address is used. .It Va vga Ta string Ta io Ta VGA configuration. More details are provided in .Xr bhyve 8 . .It Va w Ta integer Ta 1024 Ta Frame buffer width in pixels. .It Va h Ta integer Ta 768 Ta Frame buffer height in pixels. .It Va password Ta string Ta Ta Password to use for VNC authentication. This type of authentication is known to be cryptographically weak and is not intended for use on untrusted networks. .El .Ss High Definition Audio Settings .Bl -column "Name" "Format" "Default" .It Sy Name Ta Sy Format Ta Sy Default Ta Sy Description .It Va play Ta path Ta Ta Host playback device, typically .Pa /dev/dsp0 . .It Va rec Ta path Ta Ta Host recording device, typically .Pa /dev/dsp0 . .El .Ss LPC Device Settings The LPC bridge stores its configuration under a top-level .Va lpc node rather than under the PCI LPC device's node. The following nodes are available under .Va lpc : .Bl -column "pc-testdev" "Format" "Default" .It Sy Name Ta Sy Format Ta Sy Default Ta Sy Description -.It Va bootrom Ta path Ta Ta -Path to a boot ROM. -The contents of this file are copied into the guest's -memory ending just before the 4GB physical address. -If a boot ROM is present, a firmware interface device is -also enabled for use by the boot ROM. -.It Va bootvars Ta path Ta Ta -Path to boot VARS. -The contents of this file are copied beneath the boot ROM. -Firmware can write to it to save variables. -All variables will be persistent even on reboots of the guest. .It Va com1 Ta node Ta Ta Settings for the COM1 serial port device. .It Va com2 Ta node Ta Ta Settings for the COM2 serial port device. .It Va com3 Ta node Ta Ta Settings for the COM3 serial port device. .It Va com4 Ta node Ta Ta Settings for the COM4 serial port device. .It Va fwcfg Ta string Ta bhyve Ta The fwcfg type to be used. Supported values are .Dq bhyve for fwctl and .Dq qemu for fwcfg. .It Va pc-testdev Ta bool Ta false Ta Enable the PC debug/test device. .It Va pcireg.* Ta integer Ta Ta Values of PCI register. It also accepts the value .Ar host to use the pci id of the host system. This value is required for the Intel GOP driver to work properly. .Bl -column "subvendor" "Default" .It Sy Name Ta Sy Default .It Va vendor Ta 0x8086 .It Va device Ta 0x7000 .It Va revid Ta 0 .It Va subvendor Ta 0 .It Va subdevice Ta 0 .El .El .Ss NVMe Controller Settings Each NVMe controller supports a single storage device. The device can be backed either by a memory disk described by the .Va ram variable, or a block device using the block device settings described above. In addition, each controller supports the following settings: .Bl -column "ioslots" "Format" "Default" .It Sy Name Ta Sy Format Ta Sy Default Ta Sy Description .It Va maxq Ta integer Ta 16 Ta Maximum number of I/O submission and completion queue pairs. .It Va qsz Ta integer Ta 2058 Ta Number of elements in each I/O queue. .It Va ioslots Ta integer Ta 8 Ta Maximum number of concurrent I/O requests. .It Va sectsz Ta integer Ta Ta Sector size. Can be one of 512, 4096, or 8192. Devices backed by a memory disk use 4096 as the default. Devices backed by a block device use the block device's sector size as the default. .It Va ser Ta string Ta Ta Serial number of up to twenty characters. A default serial number is generated using a hash of the device's PCI address. .It Va eui64 Ta integer Ta Ta IEEE Extended Unique Identifier. If an EUI is not provided, a default is generated using a checksum of the device's PCI address. .It Va dsm Ta string Ta auto Ta Whether or not to advertise DataSet Management support. One of .Dq auto , .Dq enable , or .Dq disable . The .Dq auto setting only advertises support if the backing store supports resource freeing, for example via TRIM. .It Va ram Ta integer Ta Ta If set, allocate a memory disk as the backing store. The value of this variable is the size of the memory disk in megabytes. .El .Ss PCI Passthrough Settings The .Xr ppt 4 device driver must be attached to the PCI device being passed through. The device to pass through can be identified either by name or its host PCI bus location. .Bl -column "Name" "integer" "Default" .It Sy Name Ta Sy Format Ta Sy Default Ta Sy Description .It Va bus Ta integer Ta Ta Host PCI bus address of device to pass through. .It Va slot Ta integer Ta Ta Host PCI slot address of device to pass through. .It Va func Ta integer Ta Ta Host PCI function address of device to pass through. .It Va pptdev Ta string Ta Ta Name of a .Xr ppt 4 device to pass through. .It Va rom Ta path Ta Ta ROM file of the device which will be executed by OVMF to init the device. .El .Ss VirtIO 9p Settings Each VirtIO 9p device exposes a single filesystem from a host path. .Bl -column "sharename" "Format" "Default" .It Sy Name Ta Sy Format Ta Sy Default Ta Sy Description .It Va sharename Ta string Ta Ta The share name exposed to the guest. .It Va path Ta path Ta Ta The path of a directory on the host to export to the guest. .It Va ro Ta bool Ta false Ta If true, the guest filesystem is read-only. .El .Ss VirtIO Block Device Settings In addition to the block device settings described above, each VirtIO block device supports the following settings: .Bl -column "model" "integer" "generated" .It Sy Name Ta Sy Format Ta Sy Default Ta Sy Description .It Va ser Ta string Ta generated Ta Serial number of up to twenty characters. A default serial number is generated using a hash of the backing store's pathname. .El .Ss VirtIO Console Device Settings Each VirtIO Console device contains one or more console ports. Each port stores its settings in a node named .Dq port. Ns Va N under the controller's device node. The .Va N values are formatted as successive decimal values starting with 0. Each port supports the following settings: .Bl -column "Name" "Format" "Default" .It Sy Name Ta Sy Format Ta Sy Default Ta Sy Description .It Va name Ta string Ta Ta The name of the port exposed to the guest. .It Va path Ta path Ta Ta The path of a UNIX domain socket providing the host connection for the port. .El .Ss VirtIO Input Interface Settings Each VirtIO Input device contains one input event device. All input events of the input event device are send to the guest by VirtIO Input interface. VirtIO Input Interfaces support the following variables: .Bl -column "Name" "Format" "Default" .It Sy Name Ta Sy Format Ta Sy Default Ta Sy Description .It Va path Ta path Ta Ta The path of the input event device exposed to the guest .El .Ss VirtIO Network Interface Settings In addition to the network backend settings, VirtIO network interfaces support the following variables: .Bl -column "Name" "MAC address" "generated" .It Sy Name Ta Sy Format Ta Sy Default Ta Sy Description .It Va mac Ta MAC address Ta generated Ta MAC address. If an explicit address is not provided, a MAC address is generated from a hash of the device's PCI address. .It Va mtu Ta integer Ta 1500 Ta The largest supported MTU advertised to the guest. .El .Ss VirtIO SCSI Settings .Bl -column "Name" "integer" "Default" .It Sy Name Ta Sy Format Ta Sy Default Ta Sy Description .It Va dev Ta path Ta Ta The path of a CAM target layer (CTL) device to export: .Pa /dev/cam/ctl Ns Oo Ar pp . Ns Ar vp Oc . .It Va iid Ta integer Ta 0 Ta Initiator ID to use when sending requests to the CTL port. .El .Sh SEE ALSO .Xr expand_number 3 , .Xr getaddrinfo 3 , .Xr strtol 3 , .Xr netgraph 4 , .Xr netmap 4 , .Xr ng_socket 4 , .Xr tap 4 , .Xr vale 4 , .Xr vmnet 4 , .Xr bhyve 8 diff --git a/usr.sbin/bhyve/bhyverun.c b/usr.sbin/bhyve/bhyverun.c index f844da90e76c..41655a188bf9 100644 --- a/usr.sbin/bhyve/bhyverun.c +++ b/usr.sbin/bhyve/bhyverun.c @@ -1,882 +1,877 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2011 NetApp, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include #ifndef WITHOUT_CAPSICUM #include #endif #include #ifdef BHYVE_SNAPSHOT #include #include #endif #include #ifdef BHYVE_SNAPSHOT #include #endif #include #ifndef WITHOUT_CAPSICUM #include #endif #include #include #include #include #include #ifdef BHYVE_SNAPSHOT #include #endif #include #include #include #include #include #include #include #include #ifdef BHYVE_SNAPSHOT #include #include #include #endif #include #include "acpi.h" #include "bhyverun.h" #include "bootrom.h" #include "config.h" #include "debug.h" #ifdef BHYVE_GDB #include "gdb.h" #endif #include "mem.h" #include "mevent.h" #include "pci_emul.h" #ifdef __amd64__ #include "amd64/pci_lpc.h" #endif #include "qemu_fwcfg.h" #ifdef BHYVE_SNAPSHOT #include "snapshot.h" #endif #include "tpm_device.h" #include "vmgenc.h" #include "vmexit.h" #define MB (1024UL * 1024) #define GB (1024UL * MB) int guest_ncpus; uint16_t cpu_cores, cpu_sockets, cpu_threads; int raw_stdio = 0; #ifdef BHYVE_SNAPSHOT char *restore_file; #endif static const int BSP = 0; static cpuset_t cpumask; static void vm_loop(struct vmctx *ctx, struct vcpu *vcpu); static struct vcpu_info { struct vmctx *ctx; struct vcpu *vcpu; int vcpuid; } *vcpu_info; static cpuset_t **vcpumap; /* * XXX This parser is known to have the following issues: * 1. It accepts null key=value tokens ",," as setting "cpus" to an * empty string. * * The acceptance of a null specification ('-c ""') is by design to match the * manual page syntax specification, this results in a topology of 1 vCPU. */ int bhyve_topology_parse(const char *opt) { char *cp, *str, *tofree; if (*opt == '\0') { set_config_value("sockets", "1"); set_config_value("cores", "1"); set_config_value("threads", "1"); set_config_value("cpus", "1"); return (0); } tofree = str = strdup(opt); if (str == NULL) errx(4, "Failed to allocate memory"); while ((cp = strsep(&str, ",")) != NULL) { if (strncmp(cp, "cpus=", strlen("cpus=")) == 0) set_config_value("cpus", cp + strlen("cpus=")); else if (strncmp(cp, "sockets=", strlen("sockets=")) == 0) set_config_value("sockets", cp + strlen("sockets=")); else if (strncmp(cp, "cores=", strlen("cores=")) == 0) set_config_value("cores", cp + strlen("cores=")); else if (strncmp(cp, "threads=", strlen("threads=")) == 0) set_config_value("threads", cp + strlen("threads=")); else if (strchr(cp, '=') != NULL) goto out; else set_config_value("cpus", cp); } free(tofree); return (0); out: free(tofree); return (-1); } static int parse_int_value(const char *key, const char *value, int minval, int maxval) { char *cp; long lval; errno = 0; lval = strtol(value, &cp, 0); if (errno != 0 || *cp != '\0' || cp == value || lval < minval || lval > maxval) errx(4, "Invalid value for %s: '%s'", key, value); return (lval); } /* * Set the sockets, cores, threads, and guest_cpus variables based on * the configured topology. * * The limits of UINT16_MAX are due to the types passed to * vm_set_topology(). vmm.ko may enforce tighter limits. */ static void calc_topology(void) { const char *value; bool explicit_cpus; uint64_t ncpus; value = get_config_value("cpus"); if (value != NULL) { guest_ncpus = parse_int_value("cpus", value, 1, UINT16_MAX); explicit_cpus = true; } else { guest_ncpus = 1; explicit_cpus = false; } value = get_config_value("cores"); if (value != NULL) cpu_cores = parse_int_value("cores", value, 1, UINT16_MAX); else cpu_cores = 1; value = get_config_value("threads"); if (value != NULL) cpu_threads = parse_int_value("threads", value, 1, UINT16_MAX); else cpu_threads = 1; value = get_config_value("sockets"); if (value != NULL) cpu_sockets = parse_int_value("sockets", value, 1, UINT16_MAX); else cpu_sockets = guest_ncpus; /* * Compute sockets * cores * threads avoiding overflow. The * range check above insures these are 16 bit values. */ ncpus = (uint64_t)cpu_sockets * cpu_cores * cpu_threads; if (ncpus > UINT16_MAX) errx(4, "Computed number of vCPUs too high: %ju", (uintmax_t)ncpus); if (explicit_cpus) { if (guest_ncpus != (int)ncpus) errx(4, "Topology (%d sockets, %d cores, %d threads) " "does not match %d vCPUs", cpu_sockets, cpu_cores, cpu_threads, guest_ncpus); } else guest_ncpus = ncpus; } int bhyve_pincpu_parse(const char *opt) { const char *value; char *newval; char key[16]; int vcpu, pcpu; if (sscanf(opt, "%d:%d", &vcpu, &pcpu) != 2) { fprintf(stderr, "invalid format: %s\n", opt); return (-1); } if (vcpu < 0) { fprintf(stderr, "invalid vcpu '%d'\n", vcpu); return (-1); } if (pcpu < 0 || pcpu >= CPU_SETSIZE) { fprintf(stderr, "hostcpu '%d' outside valid range from " "0 to %d\n", pcpu, CPU_SETSIZE - 1); return (-1); } snprintf(key, sizeof(key), "vcpu.%d.cpuset", vcpu); value = get_config_value(key); if (asprintf(&newval, "%s%s%d", value != NULL ? value : "", value != NULL ? "," : "", pcpu) == -1) { perror("failed to build new cpuset string"); return (-1); } set_config_value(key, newval); free(newval); return (0); } static void parse_cpuset(int vcpu, const char *list, cpuset_t *set) { char *cp, *token; int pcpu, start; CPU_ZERO(set); start = -1; token = __DECONST(char *, list); for (;;) { pcpu = strtoul(token, &cp, 0); if (cp == token) errx(4, "invalid cpuset for vcpu %d: '%s'", vcpu, list); if (pcpu < 0 || pcpu >= CPU_SETSIZE) errx(4, "hostcpu '%d' outside valid range from 0 to %d", pcpu, CPU_SETSIZE - 1); switch (*cp) { case ',': case '\0': if (start >= 0) { if (start > pcpu) errx(4, "Invalid hostcpu range %d-%d", start, pcpu); while (start < pcpu) { CPU_SET(start, set); start++; } start = -1; } CPU_SET(pcpu, set); break; case '-': if (start >= 0) errx(4, "invalid cpuset for vcpu %d: '%s'", vcpu, list); start = pcpu; break; default: errx(4, "invalid cpuset for vcpu %d: '%s'", vcpu, list); } if (*cp == '\0') break; token = cp + 1; } } static void build_vcpumaps(void) { char key[16]; const char *value; int vcpu; vcpumap = calloc(guest_ncpus, sizeof(*vcpumap)); for (vcpu = 0; vcpu < guest_ncpus; vcpu++) { snprintf(key, sizeof(key), "vcpu.%d.cpuset", vcpu); value = get_config_value(key); if (value == NULL) continue; vcpumap[vcpu] = malloc(sizeof(cpuset_t)); if (vcpumap[vcpu] == NULL) err(4, "Failed to allocate cpuset for vcpu %d", vcpu); parse_cpuset(vcpu, value, vcpumap[vcpu]); } } void * paddr_guest2host(struct vmctx *ctx, uintptr_t gaddr, size_t len) { return (vm_map_gpa(ctx, gaddr, len)); } #ifdef BHYVE_SNAPSHOT uintptr_t paddr_host2guest(struct vmctx *ctx, void *addr) { return (vm_rev_map_gpa(ctx, addr)); } #endif int fbsdrun_virtio_msix(void) { return (get_config_bool_default("virtio_msix", true)); } struct vcpu * fbsdrun_vcpu(int vcpuid) { return (vcpu_info[vcpuid].vcpu); } static void * fbsdrun_start_thread(void *param) { char tname[MAXCOMLEN + 1]; struct vcpu_info *vi = param; int error; snprintf(tname, sizeof(tname), "vcpu %d", vi->vcpuid); pthread_set_name_np(pthread_self(), tname); if (vcpumap[vi->vcpuid] != NULL) { error = pthread_setaffinity_np(pthread_self(), sizeof(cpuset_t), vcpumap[vi->vcpuid]); assert(error == 0); } #ifdef BHYVE_SNAPSHOT checkpoint_cpu_add(vi->vcpuid); #endif #ifdef BHYVE_GDB gdb_cpu_add(vi->vcpu); #endif vm_loop(vi->ctx, vi->vcpu); /* not reached */ exit(1); return (NULL); } void fbsdrun_addcpu(int vcpuid) { struct vcpu_info *vi; pthread_t thr; int error; vi = &vcpu_info[vcpuid]; error = vm_activate_cpu(vi->vcpu); if (error != 0) err(EX_OSERR, "could not activate CPU %d", vi->vcpuid); CPU_SET_ATOMIC(vcpuid, &cpumask); error = vm_suspend_cpu(vi->vcpu); assert(error == 0); error = pthread_create(&thr, NULL, fbsdrun_start_thread, vi); assert(error == 0); } void fbsdrun_deletecpu(int vcpu) { static pthread_mutex_t resetcpu_mtx = PTHREAD_MUTEX_INITIALIZER; static pthread_cond_t resetcpu_cond = PTHREAD_COND_INITIALIZER; pthread_mutex_lock(&resetcpu_mtx); if (!CPU_ISSET(vcpu, &cpumask)) { EPRINTLN("Attempting to delete unknown cpu %d", vcpu); exit(4); } CPU_CLR(vcpu, &cpumask); if (vcpu != BSP) { pthread_cond_signal(&resetcpu_cond); pthread_mutex_unlock(&resetcpu_mtx); pthread_exit(NULL); /* NOTREACHED */ } while (!CPU_EMPTY(&cpumask)) { pthread_cond_wait(&resetcpu_cond, &resetcpu_mtx); } pthread_mutex_unlock(&resetcpu_mtx); } int fbsdrun_suspendcpu(int vcpuid) { return (vm_suspend_cpu(vcpu_info[vcpuid].vcpu)); } static void vm_loop(struct vmctx *ctx, struct vcpu *vcpu) { struct vm_exit vme; struct vm_run vmrun; int error, rc; enum vm_exitcode exitcode; cpuset_t active_cpus, dmask; error = vm_active_cpus(ctx, &active_cpus); assert(CPU_ISSET(vcpu_id(vcpu), &active_cpus)); vmrun.vm_exit = &vme; vmrun.cpuset = &dmask; vmrun.cpusetsize = sizeof(dmask); while (1) { error = vm_run(vcpu, &vmrun); if (error != 0) break; exitcode = vme.exitcode; if (exitcode >= VM_EXITCODE_MAX || vmexit_handlers[exitcode] == NULL) { warnx("vm_loop: unexpected exitcode 0x%x", exitcode); exit(4); } rc = (*vmexit_handlers[exitcode])(ctx, vcpu, &vmrun); switch (rc) { case VMEXIT_CONTINUE: break; case VMEXIT_ABORT: abort(); default: exit(4); } } EPRINTLN("vm_run error %d, errno %d", error, errno); } static int num_vcpus_allowed(struct vmctx *ctx, struct vcpu *vcpu) { uint16_t sockets, cores, threads, maxcpus; int tmp, error; /* * The guest is allowed to spinup more than one processor only if the * UNRESTRICTED_GUEST capability is available. */ error = vm_get_capability(vcpu, VM_CAP_UNRESTRICTED_GUEST, &tmp); if (error != 0) return (1); error = vm_get_topology(ctx, &sockets, &cores, &threads, &maxcpus); if (error == 0) return (maxcpus); else return (1); } static struct vmctx * do_open(const char *vmname) { struct vmctx *ctx; int error; bool reinit, romboot; reinit = false; -#ifdef __amd64__ - romboot = lpc_bootrom() != NULL; -#else - romboot = true; -#endif - + romboot = bootrom_boot(); error = vm_create(vmname); if (error) { if (errno == EEXIST) { if (romboot) { reinit = true; } else { /* * The virtual machine has been setup by the * userspace bootloader. */ } } else { perror("vm_create"); exit(4); } } else { if (!romboot) { /* * If the virtual machine was just created then a * bootrom must be configured to boot it. */ fprintf(stderr, "virtual machine cannot be booted\n"); exit(4); } } ctx = vm_open(vmname); if (ctx == NULL) { perror("vm_open"); exit(4); } #ifndef WITHOUT_CAPSICUM if (vm_limit_rights(ctx) != 0) err(EX_OSERR, "vm_limit_rights"); #endif if (reinit) { error = vm_reinit(ctx); if (error) { perror("vm_reinit"); exit(4); } } error = vm_set_topology(ctx, cpu_sockets, cpu_cores, cpu_threads, 0); if (error) errx(EX_OSERR, "vm_set_topology"); return (ctx); } bool bhyve_parse_config_option(const char *option) { const char *value; char *path; value = strchr(option, '='); if (value == NULL || value[1] == '\0') return (false); path = strndup(option, value - option); if (path == NULL) err(4, "Failed to allocate memory"); set_config_value(path, value + 1); free(path); return (true); } void bhyve_parse_simple_config_file(const char *path) { FILE *fp; char *line, *cp; size_t linecap; unsigned int lineno; fp = fopen(path, "r"); if (fp == NULL) err(4, "Failed to open configuration file %s", path); line = NULL; linecap = 0; lineno = 1; for (lineno = 1; getline(&line, &linecap, fp) > 0; lineno++) { if (*line == '#' || *line == '\n') continue; cp = strchr(line, '\n'); if (cp != NULL) *cp = '\0'; if (!bhyve_parse_config_option(line)) errx(4, "%s line %u: invalid config option '%s'", path, lineno, line); } free(line); fclose(fp); } #ifdef BHYVE_GDB void bhyve_parse_gdb_options(const char *opt) { const char *sport; char *colon; if (opt[0] == 'w') { set_config_bool("gdb.wait", true); opt++; } colon = strrchr(opt, ':'); if (colon == NULL) { sport = opt; } else { *colon = '\0'; colon++; sport = colon; set_config_value("gdb.address", opt); } set_config_value("gdb.port", sport); } #endif int main(int argc, char *argv[]) { int error; int max_vcpus, memflags; struct vcpu *bsp; struct vmctx *ctx; size_t memsize; const char *value, *vmname; #ifdef BHYVE_SNAPSHOT struct restore_state rstate; #endif bhyve_init_config(); bhyve_optparse(argc, argv); argc -= optind; argv += optind; if (argc > 1) bhyve_usage(1); #ifdef BHYVE_SNAPSHOT if (restore_file != NULL) { error = load_restore_file(restore_file, &rstate); if (error) { fprintf(stderr, "Failed to read checkpoint info from " "file: '%s'.\n", restore_file); exit(1); } vmname = lookup_vmname(&rstate); if (vmname != NULL) set_config_value("name", vmname); } #endif if (argc == 1) set_config_value("name", argv[0]); vmname = get_config_value("name"); if (vmname == NULL) bhyve_usage(1); if (get_config_bool_default("config.dump", false)) { dump_config(); exit(1); } calc_topology(); build_vcpumaps(); value = get_config_value("memory.size"); error = vm_parse_memsize(value, &memsize); if (error) errx(EX_USAGE, "invalid memsize '%s'", value); ctx = do_open(vmname); #ifdef BHYVE_SNAPSHOT if (restore_file != NULL) { guest_ncpus = lookup_guest_ncpus(&rstate); memflags = lookup_memflags(&rstate); memsize = lookup_memsize(&rstate); } if (guest_ncpus < 1) { fprintf(stderr, "Invalid guest vCPUs (%d)\n", guest_ncpus); exit(1); } #endif bsp = vm_vcpu_open(ctx, BSP); max_vcpus = num_vcpus_allowed(ctx, bsp); if (guest_ncpus > max_vcpus) { fprintf(stderr, "%d vCPUs requested but only %d available\n", guest_ncpus, max_vcpus); exit(4); } bhyve_init_vcpu(bsp); /* Allocate per-VCPU resources. */ vcpu_info = calloc(guest_ncpus, sizeof(*vcpu_info)); for (int vcpuid = 0; vcpuid < guest_ncpus; vcpuid++) { vcpu_info[vcpuid].ctx = ctx; vcpu_info[vcpuid].vcpuid = vcpuid; if (vcpuid == BSP) vcpu_info[vcpuid].vcpu = bsp; else vcpu_info[vcpuid].vcpu = vm_vcpu_open(ctx, vcpuid); } memflags = 0; if (get_config_bool_default("memory.wired", false)) memflags |= VM_MEM_F_WIRED; if (get_config_bool_default("memory.guest_in_core", false)) memflags |= VM_MEM_F_INCORE; vm_set_memflags(ctx, memflags); error = vm_setup_memory(ctx, memsize, VM_MMAP_ALL); if (error) { fprintf(stderr, "Unable to setup memory (%d)\n", errno); exit(4); } init_mem(guest_ncpus); init_bootrom(ctx); if (bhyve_init_platform(ctx, bsp) != 0) exit(4); if (qemu_fwcfg_init(ctx) != 0) { fprintf(stderr, "qemu fwcfg initialization error\n"); exit(4); } if (qemu_fwcfg_add_file("opt/bhyve/hw.ncpu", sizeof(guest_ncpus), &guest_ncpus) != 0) { fprintf(stderr, "Could not add qemu fwcfg opt/bhyve/hw.ncpu\n"); exit(4); } /* * Exit if a device emulation finds an error in its initialization */ if (init_pci(ctx) != 0) { EPRINTLN("Device emulation initialization error: %s", strerror(errno)); exit(4); } if (init_tpm(ctx) != 0) { EPRINTLN("Failed to init TPM device"); exit(4); } /* * Initialize after PCI, to allow a bootrom file to reserve the high * region. */ if (get_config_bool("acpi_tables")) vmgenc_init(ctx); #ifdef BHYVE_GDB init_gdb(ctx); #endif /* * Add all vCPUs. */ for (int vcpuid = 0; vcpuid < guest_ncpus; vcpuid++) bhyve_start_vcpu(vcpu_info[vcpuid].vcpu, vcpuid == BSP); #ifdef BHYVE_SNAPSHOT if (restore_file != NULL) { FPRINTLN(stdout, "Pausing pci devs..."); if (vm_pause_devices() != 0) { EPRINTLN("Failed to pause PCI device state."); exit(1); } FPRINTLN(stdout, "Restoring vm mem..."); if (restore_vm_mem(ctx, &rstate) != 0) { EPRINTLN("Failed to restore VM memory."); exit(1); } FPRINTLN(stdout, "Restoring pci devs..."); if (vm_restore_devices(&rstate) != 0) { EPRINTLN("Failed to restore PCI device state."); exit(1); } FPRINTLN(stdout, "Restoring kernel structs..."); if (vm_restore_kern_structs(ctx, &rstate) != 0) { EPRINTLN("Failed to restore kernel structs."); exit(1); } FPRINTLN(stdout, "Resuming pci devs..."); if (vm_resume_devices() != 0) { EPRINTLN("Failed to resume PCI device state."); exit(1); } } #endif if (bhyve_init_platform_late(ctx, bsp) != 0) exit(4); /* * Change the proc title to include the VM name. */ setproctitle("%s", vmname); #ifdef BHYVE_SNAPSHOT /* * checkpointing thread for communication with bhyvectl */ if (init_checkpoint_thread(ctx) != 0) errx(EX_OSERR, "Failed to start checkpoint thread"); #endif #ifndef WITHOUT_CAPSICUM caph_cache_catpages(); if (caph_limit_stdout() == -1 || caph_limit_stderr() == -1) errx(EX_OSERR, "Unable to apply rights for sandbox"); if (caph_enter() == -1) errx(EX_OSERR, "cap_enter() failed"); #endif #ifdef BHYVE_SNAPSHOT if (restore_file != NULL) { destroy_restore_state(&rstate); if (vm_restore_time(ctx) < 0) err(EX_OSERR, "Unable to restore time"); for (int vcpuid = 0; vcpuid < guest_ncpus; vcpuid++) vm_resume_cpu(vcpu_info[vcpuid].vcpu); } else #endif vm_resume_cpu(bsp); /* * Head off to the main event dispatch loop */ mevent_dispatch(); exit(4); } diff --git a/usr.sbin/bhyve/bootrom.c b/usr.sbin/bhyve/bootrom.c index 1d461ba76597..e4adaca55947 100644 --- a/usr.sbin/bhyve/bootrom.c +++ b/usr.sbin/bhyve/bootrom.c @@ -1,316 +1,325 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2015 Neel Natu * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "bhyverun.h" #include "bootrom.h" #include "debug.h" #include "mem.h" #define BOOTROM_SIZE (16 * 1024 * 1024) /* 16 MB */ /* * ROM region is 16 MB at the top of 4GB ("low") memory. * * The size is limited so it doesn't encroach into reserved MMIO space (e.g., * APIC, HPET, MSI). * * It is allocated in page-multiple blocks on a first-come first-serve basis, * from high to low, during initialization, and does not change at runtime. */ static char *romptr; /* Pointer to userspace-mapped bootrom region. */ static vm_paddr_t gpa_base; /* GPA of low end of region. */ static vm_paddr_t gpa_allocbot; /* Low GPA of free region. */ static vm_paddr_t gpa_alloctop; /* High GPA, minus 1, of free region. */ #define CFI_BCS_WRITE_BYTE 0x10 #define CFI_BCS_CLEAR_STATUS 0x50 #define CFI_BCS_READ_STATUS 0x70 #define CFI_BCS_READ_ARRAY 0xff static struct bootrom_var_state { uint8_t *mmap; uint64_t gpa; off_t size; uint8_t cmd; } var = { NULL, 0, 0, CFI_BCS_READ_ARRAY }; /* * Emulate just those CFI basic commands that will convince EDK II * that the Firmware Volume area is writable and persistent. */ static int bootrom_var_mem_handler(struct vcpu *vcpu __unused, int dir, uint64_t addr, int size, uint64_t *val, void *arg1 __unused, long arg2 __unused) { off_t offset; offset = addr - var.gpa; if (offset + size > var.size || offset < 0 || offset + size <= offset) return (EINVAL); if (dir == MEM_F_WRITE) { switch (var.cmd) { case CFI_BCS_WRITE_BYTE: memcpy(var.mmap + offset, val, size); var.cmd = CFI_BCS_READ_ARRAY; break; default: var.cmd = *(uint8_t *)val; } } else { switch (var.cmd) { case CFI_BCS_CLEAR_STATUS: case CFI_BCS_READ_STATUS: memset(val, 0, size); var.cmd = CFI_BCS_READ_ARRAY; break; default: memcpy(val, var.mmap + offset, size); break; } } return (0); } void init_bootrom(struct vmctx *ctx) { vm_paddr_t highmem; romptr = vm_create_devmem(ctx, VM_BOOTROM, "bootrom", BOOTROM_SIZE); if (romptr == MAP_FAILED) err(4, "%s: vm_create_devmem", __func__); highmem = vm_get_highmem_base(ctx); gpa_base = highmem - BOOTROM_SIZE; gpa_allocbot = gpa_base; gpa_alloctop = highmem - 1; } int bootrom_alloc(struct vmctx *ctx, size_t len, int prot, int flags, char **region_out, uint64_t *gpa_out) { static const int bootrom_valid_flags = BOOTROM_ALLOC_TOP; vm_paddr_t gpa; vm_ooffset_t segoff; if (flags & ~bootrom_valid_flags) { warnx("%s: Invalid flags: %x", __func__, flags & ~bootrom_valid_flags); return (EINVAL); } if (prot & ~_PROT_ALL) { warnx("%s: Invalid protection: %x", __func__, prot & ~_PROT_ALL); return (EINVAL); } if (len == 0 || len > BOOTROM_SIZE) { warnx("ROM size %zu is invalid", len); return (EINVAL); } if (len & PAGE_MASK) { warnx("ROM size %zu is not a multiple of the page size", len); return (EINVAL); } if (flags & BOOTROM_ALLOC_TOP) { gpa = (gpa_alloctop - len) + 1; if (gpa < gpa_allocbot) { warnx("No room for %zu ROM in bootrom region", len); return (ENOMEM); } } else { gpa = gpa_allocbot; if (gpa > (gpa_alloctop - len) + 1) { warnx("No room for %zu ROM in bootrom region", len); return (ENOMEM); } } segoff = gpa - gpa_base; if (vm_mmap_memseg(ctx, gpa, VM_BOOTROM, segoff, len, prot) != 0) { int serrno = errno; warn("%s: vm_mmap_mapseg", __func__); return (serrno); } if (flags & BOOTROM_ALLOC_TOP) gpa_alloctop = gpa - 1; else gpa_allocbot = gpa + len; *region_out = romptr + segoff; if (gpa_out != NULL) *gpa_out = gpa; return (0); } int -bootrom_loadrom(struct vmctx *ctx, const nvlist_t *nvl) +bootrom_loadrom(struct vmctx *ctx) { struct stat sbuf; ssize_t rlen; off_t rom_size, var_size, total_size; char *ptr, *romfile; int fd, varfd, i, rv; const char *bootrom, *varfile; rv = -1; varfd = -1; - bootrom = get_config_value_node(nvl, "bootrom"); + bootrom = get_config_value("bootrom"); if (bootrom == NULL) { - return (-1); + return (0); } /* * get_config_value_node may use a thread local buffer to return * variables. So, when we query the second variable, the first variable * might get overwritten. For that reason, the bootrom should be * duplicated. */ romfile = strdup(bootrom); if (romfile == NULL) { return (-1); } fd = open(romfile, O_RDONLY); if (fd < 0) { EPRINTLN("Error opening bootrom \"%s\": %s", romfile, strerror(errno)); goto done; } if (fstat(fd, &sbuf) < 0) { EPRINTLN("Could not fstat bootrom file \"%s\": %s", romfile, strerror(errno)); goto done; } rom_size = sbuf.st_size; - varfile = get_config_value_node(nvl, "bootvars"); + varfile = get_config_value("bootvars"); var_size = 0; if (varfile != NULL) { varfd = open(varfile, O_RDWR); if (varfd < 0) { EPRINTLN("Error opening bootrom variable file " "\"%s\": %s", varfile, strerror(errno)); goto done; } if (fstat(varfd, &sbuf) < 0) { EPRINTLN( "Could not fstat bootrom variable file \"%s\": %s", varfile, strerror(errno)); goto done; } var_size = sbuf.st_size; } if (var_size > BOOTROM_SIZE || (var_size != 0 && var_size < PAGE_SIZE)) { EPRINTLN("Invalid bootrom variable size %ld", var_size); goto done; } total_size = rom_size + var_size; if (total_size > BOOTROM_SIZE) { EPRINTLN("Invalid bootrom and variable aggregate size %ld", total_size); goto done; } /* Map the bootrom into the guest address space */ if (bootrom_alloc(ctx, rom_size, PROT_READ | PROT_EXEC, BOOTROM_ALLOC_TOP, &ptr, NULL) != 0) { goto done; } /* Read 'romfile' into the guest address space */ for (i = 0; i < rom_size / PAGE_SIZE; i++) { rlen = read(fd, ptr + i * PAGE_SIZE, PAGE_SIZE); if (rlen != PAGE_SIZE) { EPRINTLN("Incomplete read of page %d of bootrom " "file %s: %ld bytes", i, romfile, rlen); goto done; } } if (varfd >= 0) { var.mmap = mmap(NULL, var_size, PROT_READ | PROT_WRITE, MAP_SHARED, varfd, 0); if (var.mmap == MAP_FAILED) goto done; var.size = var_size; var.gpa = (gpa_alloctop - var_size) + 1; gpa_alloctop = var.gpa - 1; rv = register_mem(&(struct mem_range){ .name = "bootrom variable", .flags = MEM_F_RW, .handler = bootrom_var_mem_handler, .base = var.gpa, .size = var.size, }); if (rv != 0) goto done; } rv = 0; done: if (varfd >= 0) close(varfd); if (fd >= 0) close(fd); free(romfile); return (rv); } + +/* + * Are we relying on a bootrom to initialize the guest's CPU context? + */ +bool +bootrom_boot(void) +{ + return (get_config_value("bootrom") != NULL); +} diff --git a/usr.sbin/bhyve/bootrom.h b/usr.sbin/bhyve/bootrom.h index d22ac3718fa2..0477b0f35218 100644 --- a/usr.sbin/bhyve/bootrom.h +++ b/usr.sbin/bhyve/bootrom.h @@ -1,50 +1,51 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2015 Neel Natu * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #ifndef _BOOTROM_H_ #define _BOOTROM_H_ #include #include #include #include #include "config.h" struct vmctx; void init_bootrom(struct vmctx *ctx); enum { BOOTROM_ALLOC_TOP = 0x80, _FORCE_INT = INT_MIN, }; int bootrom_alloc(struct vmctx *ctx, size_t len, int prot, int flags, char **region_out, uint64_t *gpa_out); -int bootrom_loadrom(struct vmctx *ctx, const nvlist_t *nvl); +bool bootrom_boot(void); +int bootrom_loadrom(struct vmctx *ctx); #endif