diff --git a/usr.sbin/bhyve/acpi.c b/usr.sbin/bhyve/acpi.c
index fcf6858f40b9..c8fc65cb3b6e 100644
--- a/usr.sbin/bhyve/acpi.c
+++ b/usr.sbin/bhyve/acpi.c
@@ -1,825 +1,825 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause
  *
  * Copyright (c) 2012 NetApp, Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 /*
  * bhyve ACPI table generator.
  *
  * Create the minimal set of ACPI tables required to boot FreeBSD (and
  * hopefully other o/s's).
  *
  * The tables are placed in the guest's ROM area just below 1MB physical,
  * above the MPTable.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/endian.h>
 #include <sys/errno.h>
 #include <sys/stat.h>
 
 #include <err.h>
 #include <paths.h>
 #include <stdarg.h>
 #include <stddef.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <unistd.h>
 
 #include <machine/vmm.h>
 #include <vmmapi.h>
 
 #include "bhyverun.h"
 #include "acpi.h"
 #include "basl.h"
 #include "pci_emul.h"
 #include "vmgenc.h"
 
 #define	BHYVE_ASL_TEMPLATE	"bhyve.XXXXXXX"
 #define BHYVE_ASL_SUFFIX	".aml"
 #define BHYVE_ASL_COMPILER	"/usr/sbin/iasl"
 
 #define BHYVE_ADDRESS_IOAPIC 	0xFEC00000
 #define BHYVE_ADDRESS_HPET 	0xFED00000
 #define BHYVE_ADDRESS_LAPIC 	0xFEE00000
 
 static int basl_keep_temps;
 static int basl_verbose_iasl;
 static int basl_ncpu;
 static uint32_t hpet_capabilities;
 
 /*
  * Contains the full pathname of the template to be passed
  * to mkstemp/mktemps(3)
  */
 static char basl_template[MAXPATHLEN];
 static char basl_stemplate[MAXPATHLEN];
 
 /*
  * State for dsdt_line(), dsdt_indent(), and dsdt_unindent().
  */
 static FILE *dsdt_fp;
 static int dsdt_indent_level;
 static int dsdt_error;
 
 static struct basl_table *rsdt;
 static struct basl_table *xsdt;
 
 struct basl_fio {
 	int	fd;
 	FILE	*fp;
 	char	f_name[MAXPATHLEN];
 };
 
 #define EFPRINTF(...) \
 	if (fprintf(__VA_ARGS__) < 0) goto err_exit
 
 #define EFFLUSH(x) \
 	if (fflush(x) != 0) goto err_exit
 
 /*
  * A list for additional ACPI devices like a TPM.
  */
 struct acpi_device_list_entry {
 	SLIST_ENTRY(acpi_device_list_entry) chain;
 	const struct acpi_device *dev;
 };
 static SLIST_HEAD(acpi_device_list,
     acpi_device_list_entry) acpi_devices = SLIST_HEAD_INITIALIZER(acpi_devices);
 
 int
 acpi_tables_add_device(const struct acpi_device *const dev)
 {
 	struct acpi_device_list_entry *const entry = calloc(1, sizeof(*entry));
 	if (entry == NULL) {
 		return (ENOMEM);
 	}
 
 	entry->dev = dev;
 	SLIST_INSERT_HEAD(&acpi_devices, entry, chain);
 
 	return (0);
 }
 
 /*
  * Helper routines for writing to the DSDT from other modules.
  */
 void
 dsdt_line(const char *fmt, ...)
 {
 	va_list ap;
 
 	if (dsdt_error != 0)
 		return;
 
 	if (strcmp(fmt, "") != 0) {
 		if (dsdt_indent_level != 0)
 			EFPRINTF(dsdt_fp, "%*c", dsdt_indent_level * 2, ' ');
 		va_start(ap, fmt);
 		if (vfprintf(dsdt_fp, fmt, ap) < 0) {
 			va_end(ap);
 			goto err_exit;
 		}
 		va_end(ap);
 	}
 	EFPRINTF(dsdt_fp, "\n");
 	return;
 
 err_exit:
 	dsdt_error = errno;
 }
 
 void
 dsdt_indent(int levels)
 {
 
 	dsdt_indent_level += levels;
 	assert(dsdt_indent_level >= 0);
 }
 
 void
 dsdt_unindent(int levels)
 {
 
 	assert(dsdt_indent_level >= levels);
 	dsdt_indent_level -= levels;
 }
 
 void
 dsdt_fixed_ioport(uint16_t iobase, uint16_t length)
 {
 
 	dsdt_line("IO (Decode16,");
 	dsdt_line("  0x%04X,             // Range Minimum", iobase);
 	dsdt_line("  0x%04X,             // Range Maximum", iobase);
 	dsdt_line("  0x01,               // Alignment");
 	dsdt_line("  0x%02X,               // Length", length);
 	dsdt_line("  )");
 }
 
 void
 dsdt_fixed_irq(uint8_t irq)
 {
 
 	dsdt_line("IRQNoFlags ()");
 	dsdt_line("  {%d}", irq);
 }
 
 void
 dsdt_fixed_mem32(uint32_t base, uint32_t length)
 {
 
 	dsdt_line("Memory32Fixed (ReadWrite,");
 	dsdt_line("  0x%08X,         // Address Base", base);
 	dsdt_line("  0x%08X,         // Address Length", length);
 	dsdt_line("  )");
 }
 
 static int
 basl_fwrite_dsdt(FILE *fp)
 {
 	dsdt_fp = fp;
 	dsdt_error = 0;
 	dsdt_indent_level = 0;
 
 	dsdt_line("/*");
 	dsdt_line(" * bhyve DSDT template");
 	dsdt_line(" */");
 	dsdt_line("DefinitionBlock (\"bhyve_dsdt.aml\", \"DSDT\", 2,"
 		 "\"BHYVE \", \"BVDSDT  \", 0x00000001)");
 	dsdt_line("{");
 	dsdt_line("  Name (_S5, Package ()");
 	dsdt_line("  {");
 	dsdt_line("      0x05,");
 	dsdt_line("      Zero,");
 	dsdt_line("  })");
 
 	pci_write_dsdt();
 
 	dsdt_line("");
 	dsdt_line("  Scope (_SB.PC00)");
 	dsdt_line("  {");
 	dsdt_line("    Device (HPET)");
 	dsdt_line("    {");
 	dsdt_line("      Name (_HID, EISAID(\"PNP0103\"))");
 	dsdt_line("      Name (_UID, 0)");
 	dsdt_line("      Name (_CRS, ResourceTemplate ()");
 	dsdt_line("      {");
 	dsdt_indent(4);
 	dsdt_fixed_mem32(0xFED00000, 0x400);
 	dsdt_unindent(4);
 	dsdt_line("      })");
 	dsdt_line("    }");
 	dsdt_line("  }");
 
 	vmgenc_write_dsdt();
 
 	const struct acpi_device_list_entry *entry;
 	SLIST_FOREACH(entry, &acpi_devices, chain) {
 		BASL_EXEC(acpi_device_write_dsdt(entry->dev));
 	}
 
 	dsdt_line("}");
 
 	if (dsdt_error != 0)
 		return (dsdt_error);
 
 	EFFLUSH(fp);
 
 	return (0);
 
 err_exit:
 	return (errno);
 }
 
 static int
 basl_open(struct basl_fio *bf, int suffix)
 {
 	int err;
 
 	err = 0;
 
 	if (suffix) {
 		strlcpy(bf->f_name, basl_stemplate, MAXPATHLEN);
 		bf->fd = mkstemps(bf->f_name, strlen(BHYVE_ASL_SUFFIX));
 	} else {
 		strlcpy(bf->f_name, basl_template, MAXPATHLEN);
 		bf->fd = mkstemp(bf->f_name);
 	}
 
 	if (bf->fd > 0) {
 		bf->fp = fdopen(bf->fd, "w+");
 		if (bf->fp == NULL) {
 			unlink(bf->f_name);
 			close(bf->fd);
 		}
 	} else {
 		err = 1;
 	}
 
 	return (err);
 }
 
 static void
 basl_close(struct basl_fio *bf)
 {
 
 	if (!basl_keep_temps)
 		unlink(bf->f_name);
 	fclose(bf->fp);
 }
 
 static int
 basl_start(struct basl_fio *in, struct basl_fio *out)
 {
 	int err;
 
 	err = basl_open(in, 0);
 	if (!err) {
 		err = basl_open(out, 1);
 		if (err) {
 			basl_close(in);
 		}
 	}
 
 	return (err);
 }
 
 static void
 basl_end(struct basl_fio *in, struct basl_fio *out)
 {
 
 	basl_close(in);
 	basl_close(out);
 }
 
 static int
 basl_load(struct vmctx *ctx, int fd)
 {
 	struct stat sb;
 	void *addr;
 
 	if (fstat(fd, &sb) < 0)
 		return (errno);
 
 	addr = calloc(1, sb.st_size);
 	if (addr == NULL)
 		return (EFAULT);
 
 	if (read(fd, addr, sb.st_size) < 0)
 		return (errno);
 
 	struct basl_table *table;
 
 	uint8_t name[ACPI_NAMESEG_SIZE + 1] = { 0 };
 	memcpy(name, addr, sizeof(name) - 1 /* last char is '\0' */);
 	BASL_EXEC(basl_table_create(&table, ctx, name, BASL_TABLE_ALIGNMENT));
 	BASL_EXEC(basl_table_append_bytes(table, addr, sb.st_size));
 
 	return (0);
 }
 
 static int
 basl_compile(struct vmctx *ctx, int (*fwrite_section)(FILE *))
 {
 	struct basl_fio io[2];
 	static char iaslbuf[3*MAXPATHLEN + 10];
 	const char *fmt;
 	int err;
 
 	err = basl_start(&io[0], &io[1]);
 	if (!err) {
 		err = (*fwrite_section)(io[0].fp);
 
 		if (!err) {
 			/*
 			 * iasl sends the results of the compilation to
 			 * stdout. Shut this down by using the shell to
 			 * redirect stdout to /dev/null, unless the user
 			 * has requested verbose output for debugging
 			 * purposes
 			 */
 			fmt = basl_verbose_iasl ?
 				"%s -p %s %s" :
 				"/bin/sh -c \"%s -p %s %s\" 1> /dev/null";
 
 			snprintf(iaslbuf, sizeof(iaslbuf),
 				 fmt,
 				 BHYVE_ASL_COMPILER,
 				 io[1].f_name, io[0].f_name);
 			err = system(iaslbuf);
 
 			if (!err) {
 				/*
 				 * Copy the aml output file into guest
 				 * memory at the specified location
 				 */
 				err = basl_load(ctx, io[1].fd);
 			}
 		}
 		basl_end(&io[0], &io[1]);
 	}
 
 	return (err);
 }
 
 static int
 basl_make_templates(void)
 {
 	const char *tmpdir;
 	int err;
 	int len;
 
 	err = 0;
 
 	/*
 	 *
 	 */
 	if ((tmpdir = getenv("BHYVE_TMPDIR")) == NULL || *tmpdir == '\0' ||
 	    (tmpdir = getenv("TMPDIR")) == NULL || *tmpdir == '\0') {
 		tmpdir = _PATH_TMP;
 	}
 
 	len = strlen(tmpdir);
 
 	if ((len + sizeof(BHYVE_ASL_TEMPLATE) + 1) < MAXPATHLEN) {
 		strcpy(basl_template, tmpdir);
 		while (len > 0 && basl_template[len - 1] == '/')
 			len--;
 		basl_template[len] = '/';
 		strcpy(&basl_template[len + 1], BHYVE_ASL_TEMPLATE);
 	} else
 		err = E2BIG;
 
 	if (!err) {
 		/*
-		 * len has been intialized (and maybe adjusted) above
+		 * len has been initialized (and maybe adjusted) above
 		 */
 		if ((len + sizeof(BHYVE_ASL_TEMPLATE) + 1 +
 		     sizeof(BHYVE_ASL_SUFFIX)) < MAXPATHLEN) {
 			strcpy(basl_stemplate, tmpdir);
 			basl_stemplate[len] = '/';
 			strcpy(&basl_stemplate[len + 1], BHYVE_ASL_TEMPLATE);
 			len = strlen(basl_stemplate);
 			strcpy(&basl_stemplate[len], BHYVE_ASL_SUFFIX);
 		} else
 			err = E2BIG;
 	}
 
 	return (err);
 }
 
 static int
 build_dsdt(struct vmctx *const ctx)
 {
 	BASL_EXEC(basl_compile(ctx, basl_fwrite_dsdt));
 
 	return (0);
 }
 
 static int
 build_facs(struct vmctx *const ctx)
 {
 	ACPI_TABLE_FACS facs;
 	struct basl_table *table;
 
 	BASL_EXEC(basl_table_create(&table, ctx, ACPI_SIG_FACS,
 	    BASL_TABLE_ALIGNMENT_FACS));
 
 	memset(&facs, 0, sizeof(facs));
 	memcpy(facs.Signature, ACPI_SIG_FACS, ACPI_NAMESEG_SIZE);
 	facs.Length = sizeof(facs);
 	facs.Version = htole32(2);
 	BASL_EXEC(basl_table_append_bytes(table, &facs, sizeof(facs)));
 
 	return (0);
 }
 
 static int
 build_fadt(struct vmctx *const ctx)
 {
 	ACPI_TABLE_FADT fadt;
 	struct basl_table *table;
 
 	BASL_EXEC(basl_table_create(&table, ctx, ACPI_SIG_FADT,
 	    BASL_TABLE_ALIGNMENT));
 
 	memset(&fadt, 0, sizeof(fadt));
 	BASL_EXEC(basl_table_append_header(table, ACPI_SIG_FADT, 5, 1));
 	fadt.Facs = htole32(0); /* patched by basl */
 	fadt.Dsdt = htole32(0); /* patched by basl */
 	fadt.SciInterrupt = htole16(SCI_INT);
 	fadt.SmiCommand = htole32(SMI_CMD);
 	fadt.AcpiEnable = BHYVE_ACPI_ENABLE;
 	fadt.AcpiDisable = BHYVE_ACPI_DISABLE;
 	fadt.Pm1aEventBlock = htole32(PM1A_EVT_ADDR);
 	fadt.Pm1aControlBlock = htole32(PM1A_CNT_ADDR);
 	fadt.PmTimerBlock = htole32(IO_PMTMR);
 	fadt.Gpe0Block = htole32(IO_GPE0_BLK);
 	fadt.Pm1EventLength = 4;
 	fadt.Pm1ControlLength = 2;
 	fadt.PmTimerLength = 4;
 	fadt.Gpe0BlockLength = IO_GPE0_LEN;
 	fadt.Century = 0x32;
 	fadt.BootFlags = htole16(ACPI_FADT_NO_VGA | ACPI_FADT_NO_ASPM);
 	fadt.Flags = htole32(ACPI_FADT_WBINVD | ACPI_FADT_C1_SUPPORTED |
 	    ACPI_FADT_SLEEP_BUTTON | ACPI_FADT_32BIT_TIMER |
 	    ACPI_FADT_RESET_REGISTER | ACPI_FADT_HEADLESS |
 	    ACPI_FADT_APIC_PHYSICAL);
 	basl_fill_gas(&fadt.ResetRegister, ACPI_ADR_SPACE_SYSTEM_IO, 8, 0,
 	    ACPI_GAS_ACCESS_WIDTH_BYTE, 0xCF9);
 	fadt.ResetValue = 6;
 	fadt.MinorRevision = 1;
 	fadt.XFacs = htole64(0); /* patched by basl */
 	fadt.XDsdt = htole64(0); /* patched by basl */
 	basl_fill_gas(&fadt.XPm1aEventBlock, ACPI_ADR_SPACE_SYSTEM_IO, 0x20, 0,
 	    ACPI_GAS_ACCESS_WIDTH_WORD, PM1A_EVT_ADDR);
 	basl_fill_gas(&fadt.XPm1bEventBlock, ACPI_ADR_SPACE_SYSTEM_IO, 0, 0,
 	    ACPI_GAS_ACCESS_WIDTH_UNDEFINED, 0);
 	basl_fill_gas(&fadt.XPm1aControlBlock, ACPI_ADR_SPACE_SYSTEM_IO, 0x10,
 	    0, ACPI_GAS_ACCESS_WIDTH_WORD, PM1A_CNT_ADDR);
 	basl_fill_gas(&fadt.XPm1bControlBlock, ACPI_ADR_SPACE_SYSTEM_IO, 0, 0,
 	    ACPI_GAS_ACCESS_WIDTH_UNDEFINED, 0);
 	basl_fill_gas(&fadt.XPm2ControlBlock, ACPI_ADR_SPACE_SYSTEM_IO, 8, 0,
 	    ACPI_GAS_ACCESS_WIDTH_UNDEFINED, 0);
 	basl_fill_gas(&fadt.XPmTimerBlock, ACPI_ADR_SPACE_SYSTEM_IO, 0x20, 0,
 	    ACPI_GAS_ACCESS_WIDTH_DWORD, IO_PMTMR);
 	basl_fill_gas(&fadt.XGpe0Block, ACPI_ADR_SPACE_SYSTEM_IO,
 	    IO_GPE0_LEN * 8, 0, ACPI_GAS_ACCESS_WIDTH_BYTE, IO_GPE0_BLK);
 	basl_fill_gas(&fadt.XGpe1Block, ACPI_ADR_SPACE_SYSTEM_IO, 0, 0,
 	    ACPI_GAS_ACCESS_WIDTH_UNDEFINED, 0);
 	basl_fill_gas(&fadt.SleepControl, ACPI_ADR_SPACE_SYSTEM_IO, 8, 0,
 	    ACPI_GAS_ACCESS_WIDTH_BYTE, 0);
 	basl_fill_gas(&fadt.SleepStatus, ACPI_ADR_SPACE_SYSTEM_IO, 8, 0,
 	    ACPI_GAS_ACCESS_WIDTH_BYTE, 0);
 	BASL_EXEC(basl_table_append_content(table, &fadt, sizeof(fadt)));
 
 	BASL_EXEC(basl_table_add_pointer(table, ACPI_SIG_FACS,
 	    offsetof(ACPI_TABLE_FADT, Facs), sizeof(fadt.Facs)));
 	BASL_EXEC(basl_table_add_pointer(table, ACPI_SIG_DSDT,
 	    offsetof(ACPI_TABLE_FADT, Dsdt), sizeof(fadt.Dsdt)));
 	BASL_EXEC(basl_table_add_pointer(table, ACPI_SIG_FACS,
 	    offsetof(ACPI_TABLE_FADT, XFacs), sizeof(fadt.XFacs)));
 	BASL_EXEC(basl_table_add_pointer(table, ACPI_SIG_DSDT,
 	    offsetof(ACPI_TABLE_FADT, XDsdt), sizeof(fadt.XDsdt)));
 
 	BASL_EXEC(basl_table_append_pointer(rsdt, ACPI_SIG_FADT,
 	    ACPI_RSDT_ENTRY_SIZE));
 	BASL_EXEC(basl_table_append_pointer(xsdt, ACPI_SIG_FADT,
 	    ACPI_XSDT_ENTRY_SIZE));
 
 	return (0);
 }
 
 static int
 build_hpet(struct vmctx *const ctx)
 {
 	ACPI_TABLE_HPET hpet;
 	struct basl_table *table;
 
 	BASL_EXEC(basl_table_create(&table, ctx, ACPI_SIG_HPET,
 	    BASL_TABLE_ALIGNMENT));
 
 	memset(&hpet, 0, sizeof(hpet));
 	BASL_EXEC(basl_table_append_header(table, ACPI_SIG_HPET, 1, 1));
 	hpet.Id = htole32(hpet_capabilities);
 	basl_fill_gas(&hpet.Address, ACPI_ADR_SPACE_SYSTEM_MEMORY, 0, 0,
 	    ACPI_GAS_ACCESS_WIDTH_LEGACY, BHYVE_ADDRESS_HPET);
 	hpet.Flags = ACPI_HPET_PAGE_PROTECT4;
 	BASL_EXEC(basl_table_append_content(table, &hpet, sizeof(hpet)));
 
 	BASL_EXEC(basl_table_append_pointer(rsdt, ACPI_SIG_HPET,
 	    ACPI_RSDT_ENTRY_SIZE));
 	BASL_EXEC(basl_table_append_pointer(xsdt, ACPI_SIG_HPET,
 	    ACPI_XSDT_ENTRY_SIZE));
 
 	return (0);
 }
 
 static int
 build_madt(struct vmctx *const ctx)
 {
 	ACPI_TABLE_MADT madt;
 	ACPI_MADT_LOCAL_APIC madt_lapic;
 	ACPI_MADT_IO_APIC madt_ioapic;
 	ACPI_MADT_INTERRUPT_OVERRIDE madt_irq_override;
 	ACPI_MADT_LOCAL_APIC_NMI madt_lapic_nmi;
 	struct basl_table *table;
 
 	BASL_EXEC(basl_table_create(&table, ctx, ACPI_SIG_MADT,
 	    BASL_TABLE_ALIGNMENT));
 
 	memset(&madt, 0, sizeof(madt));
 	BASL_EXEC(basl_table_append_header(table, ACPI_SIG_MADT, 1, 1));
 	madt.Address = htole32(BHYVE_ADDRESS_LAPIC);
 	madt.Flags = htole32(ACPI_MADT_PCAT_COMPAT);
 	BASL_EXEC(basl_table_append_content(table, &madt, sizeof(madt)));
 
 	/* Local APIC for each CPU */
 	for (int i = 0; i < basl_ncpu; ++i) {
 		memset(&madt_lapic, 0, sizeof(madt_lapic));
 		madt_lapic.Header.Type = ACPI_MADT_TYPE_LOCAL_APIC;
 		madt_lapic.Header.Length = sizeof(madt_lapic);
 		madt_lapic.ProcessorId = i;
 		madt_lapic.Id = i;
 		madt_lapic.LapicFlags = htole32(ACPI_MADT_ENABLED);
 		BASL_EXEC(basl_table_append_bytes(table, &madt_lapic,
 		    sizeof(madt_lapic)));
 	}
 
 	/* I/O APIC */
 	memset(&madt_ioapic, 0, sizeof(madt_ioapic));
 	madt_ioapic.Header.Type = ACPI_MADT_TYPE_IO_APIC;
 	madt_ioapic.Header.Length = sizeof(madt_ioapic);
 	madt_ioapic.Address = htole32(BHYVE_ADDRESS_IOAPIC);
 	BASL_EXEC(
 	    basl_table_append_bytes(table, &madt_ioapic, sizeof(madt_ioapic)));
 
 	/* Legacy IRQ0 is connected to pin 2 of the I/O APIC */
 	memset(&madt_irq_override, 0, sizeof(madt_irq_override));
 	madt_irq_override.Header.Type = ACPI_MADT_TYPE_INTERRUPT_OVERRIDE;
 	madt_irq_override.Header.Length = sizeof(madt_irq_override);
 	madt_irq_override.GlobalIrq = htole32(2);
 	madt_irq_override.IntiFlags = htole16(
 	    ACPI_MADT_POLARITY_ACTIVE_HIGH | ACPI_MADT_TRIGGER_EDGE);
 	BASL_EXEC(basl_table_append_bytes(table, &madt_irq_override,
 	    sizeof(madt_irq_override)));
 
 	memset(&madt_irq_override, 0, sizeof(madt_irq_override));
 	madt_irq_override.Header.Type = ACPI_MADT_TYPE_INTERRUPT_OVERRIDE;
 	madt_irq_override.Header.Length = sizeof(madt_irq_override);
 	madt_irq_override.SourceIrq = SCI_INT;
 	madt_irq_override.GlobalIrq = htole32(SCI_INT);
 	madt_irq_override.IntiFlags = htole16(
 	    ACPI_MADT_POLARITY_ACTIVE_LOW | ACPI_MADT_TRIGGER_LEVEL);
 	BASL_EXEC(basl_table_append_bytes(table, &madt_irq_override,
 	    sizeof(madt_irq_override)));
 
 	/* Local APIC NMI is conntected to LINT 1 on all CPUs */
 	memset(&madt_lapic_nmi, 0, sizeof(madt_lapic_nmi));
 	madt_lapic_nmi.Header.Type = ACPI_MADT_TYPE_LOCAL_APIC_NMI;
 	madt_lapic_nmi.Header.Length = sizeof(madt_lapic_nmi);
 	madt_lapic_nmi.ProcessorId = 0xFF;
 	madt_lapic_nmi.IntiFlags = htole16(
 	    ACPI_MADT_POLARITY_ACTIVE_HIGH | ACPI_MADT_TRIGGER_EDGE);
 	madt_lapic_nmi.Lint = 1;
 	BASL_EXEC(basl_table_append_bytes(table, &madt_lapic_nmi,
 	    sizeof(madt_lapic_nmi)));
 
 	BASL_EXEC(basl_table_append_pointer(rsdt, ACPI_SIG_MADT,
 	    ACPI_RSDT_ENTRY_SIZE));
 	BASL_EXEC(basl_table_append_pointer(xsdt, ACPI_SIG_MADT,
 	    ACPI_XSDT_ENTRY_SIZE));
 
 	return (0);
 }
 
 static int
 build_mcfg(struct vmctx *const ctx)
 {
 	ACPI_TABLE_MCFG mcfg;
 	ACPI_MCFG_ALLOCATION mcfg_allocation;
 	struct basl_table *table;
 
 	BASL_EXEC(basl_table_create(&table, ctx, ACPI_SIG_MCFG,
 	    BASL_TABLE_ALIGNMENT));
 
 	memset(&mcfg, 0, sizeof(mcfg));
 	BASL_EXEC(basl_table_append_header(table, ACPI_SIG_MCFG, 1, 1));
 	BASL_EXEC(basl_table_append_content(table, &mcfg, sizeof(mcfg)));
 
 	memset(&mcfg_allocation, 0, sizeof(mcfg_allocation));
 	mcfg_allocation.Address = htole64(pci_ecfg_base());
 	mcfg_allocation.EndBusNumber = 0xFF;
 	BASL_EXEC(basl_table_append_bytes(table, &mcfg_allocation,
 	    sizeof(mcfg_allocation)));
 
 	BASL_EXEC(basl_table_append_pointer(rsdt, ACPI_SIG_MCFG,
 	    ACPI_RSDT_ENTRY_SIZE));
 	BASL_EXEC(basl_table_append_pointer(xsdt, ACPI_SIG_MCFG,
 	    ACPI_XSDT_ENTRY_SIZE));
 
 	return (0);
 }
 
 static int
 build_rsdp(struct vmctx *const ctx)
 {
 	ACPI_TABLE_RSDP rsdp;
 	struct basl_table *table;
 
 	BASL_EXEC(basl_table_create(&table, ctx, ACPI_RSDP_NAME,
 	    BASL_TABLE_ALIGNMENT));
 
 	memset(&rsdp, 0, sizeof(rsdp));
 	memcpy(rsdp.Signature, ACPI_SIG_RSDP, 8);
 	rsdp.Checksum = 0; /* patched by basl */
 	memcpy(rsdp.OemId, "BHYVE ", ACPI_OEM_ID_SIZE);
 	rsdp.Revision = 2;
 	rsdp.RsdtPhysicalAddress = htole32(0); /* patched by basl */
 	rsdp.Length = htole32(0);	       /* patched by basl */
 	rsdp.XsdtPhysicalAddress = htole64(0); /* patched by basl */
 	rsdp.ExtendedChecksum = 0;	       /* patched by basl */
 	BASL_EXEC(basl_table_append_bytes(table, &rsdp, sizeof(rsdp)));
 
 	BASL_EXEC(basl_table_add_checksum(table,
 	    offsetof(ACPI_TABLE_RSDP, Checksum), 0, 20));
 	BASL_EXEC(basl_table_add_pointer(table, ACPI_SIG_RSDT,
 	    offsetof(ACPI_TABLE_RSDP, RsdtPhysicalAddress),
 	    sizeof(rsdp.RsdtPhysicalAddress)));
 	BASL_EXEC(basl_table_add_length(table,
 	    offsetof(ACPI_TABLE_RSDP, Length), sizeof(rsdp.Length)));
 	BASL_EXEC(basl_table_add_pointer(table, ACPI_SIG_XSDT,
 	    offsetof(ACPI_TABLE_RSDP, XsdtPhysicalAddress),
 	    sizeof(rsdp.XsdtPhysicalAddress)));
 	BASL_EXEC(basl_table_add_checksum(table,
 	    offsetof(ACPI_TABLE_RSDP, ExtendedChecksum), 0,
 	    BASL_TABLE_CHECKSUM_LEN_FULL_TABLE));
 
 	return (0);
 }
 
 static int
 build_rsdt(struct vmctx *const ctx)
 {
 	BASL_EXEC(
 	    basl_table_create(&rsdt, ctx, ACPI_SIG_RSDT, BASL_TABLE_ALIGNMENT));
 
 	/* Header */
 	BASL_EXEC(basl_table_append_header(rsdt, ACPI_SIG_RSDT, 1, 1));
 	/* Pointers (added by other build_XXX funcs) */
 
 	return (0);
 }
 
 static int
 build_spcr(struct vmctx *const ctx)
 {
 	ACPI_TABLE_SPCR spcr;
 	struct basl_table *table;
 
 	BASL_EXEC(basl_table_create(&table, ctx, ACPI_SIG_SPCR,
 	    BASL_TABLE_ALIGNMENT));
 
 	memset(&spcr, 0, sizeof(spcr));
 	BASL_EXEC(basl_table_append_header(table, ACPI_SIG_SPCR, 1, 1));
 	spcr.InterfaceType = ACPI_DBG2_16550_COMPATIBLE;
 	basl_fill_gas(&spcr.SerialPort, ACPI_ADR_SPACE_SYSTEM_IO, 8, 0,
 	    ACPI_GAS_ACCESS_WIDTH_LEGACY, 0x3F8);
 	spcr.InterruptType = ACPI_SPCR_INTERRUPT_TYPE_8259;
 	spcr.PcInterrupt = 4;
 	spcr.BaudRate = ACPI_SPCR_BAUD_RATE_115200;
 	spcr.Parity = ACPI_SPCR_PARITY_NO_PARITY;
 	spcr.StopBits = ACPI_SPCR_STOP_BITS_1;
 	spcr.FlowControl = 3; /* RTS/CTS | DCD */
 	spcr.TerminalType = ACPI_SPCR_TERMINAL_TYPE_VT_UTF8;
 	BASL_EXEC(basl_table_append_content(table, &spcr, sizeof(spcr)));
 
 	BASL_EXEC(basl_table_append_pointer(rsdt, ACPI_SIG_SPCR,
 	    ACPI_RSDT_ENTRY_SIZE));
 	BASL_EXEC(basl_table_append_pointer(xsdt, ACPI_SIG_SPCR,
 	    ACPI_XSDT_ENTRY_SIZE));
 
 	return (0);
 }
 
 static int
 build_xsdt(struct vmctx *const ctx)
 {
 	BASL_EXEC(
 	    basl_table_create(&xsdt, ctx, ACPI_SIG_XSDT, BASL_TABLE_ALIGNMENT));
 
 	/* Header */
 	BASL_EXEC(basl_table_append_header(xsdt, ACPI_SIG_XSDT, 1, 1));
 	/* Pointers (added by other build_XXX funcs) */
 
 	return (0);
 }
 
 int
 acpi_build(struct vmctx *ctx, int ncpu)
 {
 	int err;
 
 	basl_ncpu = ncpu;
 
 	err = vm_get_hpet_capabilities(ctx, &hpet_capabilities);
 	if (err != 0)
 		return (err);
 
 	/*
 	 * For debug, allow the user to have iasl compiler output sent
 	 * to stdout rather than /dev/null
 	 */
 	if (getenv("BHYVE_ACPI_VERBOSE_IASL"))
 		basl_verbose_iasl = 1;
 
 	/*
 	 * Allow the user to keep the generated ASL files for debugging
 	 * instead of deleting them following use
 	 */
 	if (getenv("BHYVE_ACPI_KEEPTMPS"))
 		basl_keep_temps = 1;
 
 	BASL_EXEC(basl_init());
 
 	BASL_EXEC(basl_make_templates());
 
 	/*
 	 * Generate ACPI tables and copy them into guest memory.
 	 *
 	 * According to UEFI Specification v6.3 chapter 5.1 the FADT should be
 	 * the first table pointed to by XSDT. For that reason, build it as the
 	 * first table after XSDT.
 	 */
 	BASL_EXEC(build_rsdp(ctx));
 	BASL_EXEC(build_rsdt(ctx));
 	BASL_EXEC(build_xsdt(ctx));
 	BASL_EXEC(build_fadt(ctx));
 	BASL_EXEC(build_madt(ctx));
 	BASL_EXEC(build_hpet(ctx));
 	BASL_EXEC(build_mcfg(ctx));
 	BASL_EXEC(build_facs(ctx));
 	BASL_EXEC(build_spcr(ctx));
 
 	/* Build ACPI device-specific tables such as a TPM2 table. */
 	const struct acpi_device_list_entry *entry;
 	SLIST_FOREACH(entry, &acpi_devices, chain) {
 		BASL_EXEC(acpi_device_build_table(entry->dev));
 	}
 
 	BASL_EXEC(build_dsdt(ctx));
 
 	BASL_EXEC(basl_finish());
 
 	return (0);
 }
diff --git a/usr.sbin/bhyve/bhyverun.c b/usr.sbin/bhyve/bhyverun.c
index ef5968ba6385..df9e1924a525 100644
--- a/usr.sbin/bhyve/bhyverun.c
+++ b/usr.sbin/bhyve/bhyverun.c
@@ -1,1655 +1,1655 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause
  *
  * Copyright (c) 2011 NetApp, Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/types.h>
 #ifndef WITHOUT_CAPSICUM
 #include <sys/capsicum.h>
 #endif
 #include <sys/mman.h>
 #ifdef BHYVE_SNAPSHOT
 #include <sys/socket.h>
 #include <sys/stat.h>
 #endif
 #include <sys/time.h>
 #ifdef BHYVE_SNAPSHOT
 #include <sys/un.h>
 #endif
 
 #include <amd64/vmm/intel/vmcs.h>
 #include <x86/apicreg.h>
 
 #include <machine/atomic.h>
 #include <machine/segments.h>
 
 #ifndef WITHOUT_CAPSICUM
 #include <capsicum_helpers.h>
 #endif
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <err.h>
 #include <errno.h>
 #ifdef BHYVE_SNAPSHOT
 #include <fcntl.h>
 #endif
 #include <libgen.h>
 #include <unistd.h>
 #include <assert.h>
 #include <pthread.h>
 #include <pthread_np.h>
 #include <sysexits.h>
 #include <stdbool.h>
 #include <stdint.h>
 #ifdef BHYVE_SNAPSHOT
 #include <ucl.h>
 #include <unistd.h>
 
 #include <libxo/xo.h>
 #endif
 
 #include <machine/vmm.h>
 #ifndef WITHOUT_CAPSICUM
 #include <machine/vmm_dev.h>
 #endif
 #include <machine/vmm_instruction_emul.h>
 #include <vmmapi.h>
 
 #include "bhyverun.h"
 #include "acpi.h"
 #include "atkbdc.h"
 #include "bootrom.h"
 #include "config.h"
 #include "inout.h"
 #include "debug.h"
 #include "e820.h"
 #include "fwctl.h"
 #include "gdb.h"
 #include "ioapic.h"
 #include "kernemu_dev.h"
 #include "mem.h"
 #include "mevent.h"
 #include "mptbl.h"
 #include "pci_emul.h"
 #include "pci_irq.h"
 #include "pci_lpc.h"
 #include "qemu_fwcfg.h"
 #include "smbiostbl.h"
 #ifdef BHYVE_SNAPSHOT
 #include "snapshot.h"
 #endif
 #include "xmsr.h"
 #include "spinup_ap.h"
 #include "rtc.h"
 #include "vmgenc.h"
 
 #define GUEST_NIO_PORT		0x488	/* guest upcalls via i/o port */
 
 #define MB		(1024UL * 1024)
 #define GB		(1024UL * MB)
 
 static const char * const vmx_exit_reason_desc[] = {
 	[EXIT_REASON_EXCEPTION] = "Exception or non-maskable interrupt (NMI)",
 	[EXIT_REASON_EXT_INTR] = "External interrupt",
 	[EXIT_REASON_TRIPLE_FAULT] = "Triple fault",
 	[EXIT_REASON_INIT] = "INIT signal",
 	[EXIT_REASON_SIPI] = "Start-up IPI (SIPI)",
 	[EXIT_REASON_IO_SMI] = "I/O system-management interrupt (SMI)",
 	[EXIT_REASON_SMI] = "Other SMI",
 	[EXIT_REASON_INTR_WINDOW] = "Interrupt window",
 	[EXIT_REASON_NMI_WINDOW] = "NMI window",
 	[EXIT_REASON_TASK_SWITCH] = "Task switch",
 	[EXIT_REASON_CPUID] = "CPUID",
 	[EXIT_REASON_GETSEC] = "GETSEC",
 	[EXIT_REASON_HLT] = "HLT",
 	[EXIT_REASON_INVD] = "INVD",
 	[EXIT_REASON_INVLPG] = "INVLPG",
 	[EXIT_REASON_RDPMC] = "RDPMC",
 	[EXIT_REASON_RDTSC] = "RDTSC",
 	[EXIT_REASON_RSM] = "RSM",
 	[EXIT_REASON_VMCALL] = "VMCALL",
 	[EXIT_REASON_VMCLEAR] = "VMCLEAR",
 	[EXIT_REASON_VMLAUNCH] = "VMLAUNCH",
 	[EXIT_REASON_VMPTRLD] = "VMPTRLD",
 	[EXIT_REASON_VMPTRST] = "VMPTRST",
 	[EXIT_REASON_VMREAD] = "VMREAD",
 	[EXIT_REASON_VMRESUME] = "VMRESUME",
 	[EXIT_REASON_VMWRITE] = "VMWRITE",
 	[EXIT_REASON_VMXOFF] = "VMXOFF",
 	[EXIT_REASON_VMXON] = "VMXON",
 	[EXIT_REASON_CR_ACCESS] = "Control-register accesses",
 	[EXIT_REASON_DR_ACCESS] = "MOV DR",
 	[EXIT_REASON_INOUT] = "I/O instruction",
 	[EXIT_REASON_RDMSR] = "RDMSR",
 	[EXIT_REASON_WRMSR] = "WRMSR",
 	[EXIT_REASON_INVAL_VMCS] =
 	    "VM-entry failure due to invalid guest state",
 	[EXIT_REASON_INVAL_MSR] = "VM-entry failure due to MSR loading",
 	[EXIT_REASON_MWAIT] = "MWAIT",
 	[EXIT_REASON_MTF] = "Monitor trap flag",
 	[EXIT_REASON_MONITOR] = "MONITOR",
 	[EXIT_REASON_PAUSE] = "PAUSE",
 	[EXIT_REASON_MCE_DURING_ENTRY] =
 	    "VM-entry failure due to machine-check event",
 	[EXIT_REASON_TPR] = "TPR below threshold",
 	[EXIT_REASON_APIC_ACCESS] = "APIC access",
 	[EXIT_REASON_VIRTUALIZED_EOI] = "Virtualized EOI",
 	[EXIT_REASON_GDTR_IDTR] = "Access to GDTR or IDTR",
 	[EXIT_REASON_LDTR_TR] = "Access to LDTR or TR",
 	[EXIT_REASON_EPT_FAULT] = "EPT violation",
 	[EXIT_REASON_EPT_MISCONFIG] = "EPT misconfiguration",
 	[EXIT_REASON_INVEPT] = "INVEPT",
 	[EXIT_REASON_RDTSCP] = "RDTSCP",
 	[EXIT_REASON_VMX_PREEMPT] = "VMX-preemption timer expired",
 	[EXIT_REASON_INVVPID] = "INVVPID",
 	[EXIT_REASON_WBINVD] = "WBINVD",
 	[EXIT_REASON_XSETBV] = "XSETBV",
 	[EXIT_REASON_APIC_WRITE] = "APIC write",
 	[EXIT_REASON_RDRAND] = "RDRAND",
 	[EXIT_REASON_INVPCID] = "INVPCID",
 	[EXIT_REASON_VMFUNC] = "VMFUNC",
 	[EXIT_REASON_ENCLS] = "ENCLS",
 	[EXIT_REASON_RDSEED] = "RDSEED",
 	[EXIT_REASON_PM_LOG_FULL] = "Page-modification log full",
 	[EXIT_REASON_XSAVES] = "XSAVES",
 	[EXIT_REASON_XRSTORS] = "XRSTORS"
 };
 
 typedef int (*vmexit_handler_t)(struct vmctx *, struct vcpu *, struct vm_run *);
 
 int guest_ncpus;
 uint16_t cpu_cores, cpu_sockets, cpu_threads;
 
 int raw_stdio = 0;
 
 static char *progname;
 static const int BSP = 0;
 
 static cpuset_t cpumask;
 
 static void vm_loop(struct vmctx *ctx, struct vcpu *vcpu);
 
 static struct vcpu_info {
 	struct vmctx	*ctx;
 	struct vcpu	*vcpu;
 	int		vcpuid;
 } *vcpu_info;
 
 static cpuset_t **vcpumap;
 
 static void
 usage(int code)
 {
 
         fprintf(stderr,
 		"Usage: %s [-AaCDeHhPSuWwxY]\n"
 		"       %*s [-c [[cpus=]numcpus][,sockets=n][,cores=n][,threads=n]]\n"
 		"       %*s [-G port] [-k config_file] [-l lpc] [-m mem] [-o var=value]\n"
 		"       %*s [-p vcpu:hostcpu] [-r file] [-s pci] [-U uuid] vmname\n"
 		"       -A: create ACPI tables\n"
 		"       -a: local apic is in xAPIC mode (deprecated)\n"
 		"       -C: include guest memory in core file\n"
 		"       -c: number of CPUs and/or topology specification\n"
 		"       -D: destroy on power-off\n"
 		"       -e: exit on unhandled I/O access\n"
 		"       -G: start a debug server\n"
 		"       -H: vmexit from the guest on HLT\n"
 		"       -h: help\n"
 		"       -k: key=value flat config file\n"
 		"       -K: PS2 keyboard layout\n"
 		"       -l: LPC device configuration\n"
 		"       -m: memory size\n"
 		"       -o: set config 'var' to 'value'\n"
 		"       -P: vmexit from the guest on pause\n"
 		"       -p: pin 'vcpu' to 'hostcpu'\n"
 #ifdef BHYVE_SNAPSHOT
 		"       -r: path to checkpoint file\n"
 #endif
 		"       -S: guest memory cannot be swapped\n"
 		"       -s: <slot,driver,configinfo> PCI slot config\n"
 		"       -U: UUID\n"
 		"       -u: RTC keeps UTC time\n"
 		"       -W: force virtio to use single-vector MSI\n"
 		"       -w: ignore unimplemented MSRs\n"
 		"       -x: local APIC is in x2APIC mode\n"
 		"       -Y: disable MPtable generation\n",
 		progname, (int)strlen(progname), "", (int)strlen(progname), "",
 		(int)strlen(progname), "");
 
 	exit(code);
 }
 
 /*
  * XXX This parser is known to have the following issues:
  * 1.  It accepts null key=value tokens ",," as setting "cpus" to an
  *     empty string.
  *
  * The acceptance of a null specification ('-c ""') is by design to match the
  * manual page syntax specification, this results in a topology of 1 vCPU.
  */
 static int
 topology_parse(const char *opt)
 {
 	char *cp, *str, *tofree;
 
 	if (*opt == '\0') {
 		set_config_value("sockets", "1");
 		set_config_value("cores", "1");
 		set_config_value("threads", "1");
 		set_config_value("cpus", "1");
 		return (0);
 	}
 
 	tofree = str = strdup(opt);
 	if (str == NULL)
 		errx(4, "Failed to allocate memory");
 
 	while ((cp = strsep(&str, ",")) != NULL) {
 		if (strncmp(cp, "cpus=", strlen("cpus=")) == 0)
 			set_config_value("cpus", cp + strlen("cpus="));
 		else if (strncmp(cp, "sockets=", strlen("sockets=")) == 0)
 			set_config_value("sockets", cp + strlen("sockets="));
 		else if (strncmp(cp, "cores=", strlen("cores=")) == 0)
 			set_config_value("cores", cp + strlen("cores="));
 		else if (strncmp(cp, "threads=", strlen("threads=")) == 0)
 			set_config_value("threads", cp + strlen("threads="));
 #ifdef notyet  /* Do not expose this until vmm.ko implements it */
 		else if (strncmp(cp, "maxcpus=", strlen("maxcpus=")) == 0)
 			set_config_value("maxcpus", cp + strlen("maxcpus="));
 #endif
 		else if (strchr(cp, '=') != NULL)
 			goto out;
 		else
 			set_config_value("cpus", cp);
 	}
 	free(tofree);
 	return (0);
 
 out:
 	free(tofree);
 	return (-1);
 }
 
 static int
 parse_int_value(const char *key, const char *value, int minval, int maxval)
 {
 	char *cp;
 	long lval;
 
 	errno = 0;
 	lval = strtol(value, &cp, 0);
 	if (errno != 0 || *cp != '\0' || cp == value || lval < minval ||
 	    lval > maxval)
 		errx(4, "Invalid value for %s: '%s'", key, value);
 	return (lval);
 }
 
 /*
  * Set the sockets, cores, threads, and guest_cpus variables based on
  * the configured topology.
  *
  * The limits of UINT16_MAX are due to the types passed to
  * vm_set_topology().  vmm.ko may enforce tighter limits.
  */
 static void
 calc_topology(void)
 {
 	const char *value;
 	bool explicit_cpus;
 	uint64_t ncpus;
 
 	value = get_config_value("cpus");
 	if (value != NULL) {
 		guest_ncpus = parse_int_value("cpus", value, 1, UINT16_MAX);
 		explicit_cpus = true;
 	} else {
 		guest_ncpus = 1;
 		explicit_cpus = false;
 	}
 	value = get_config_value("cores");
 	if (value != NULL)
 		cpu_cores = parse_int_value("cores", value, 1, UINT16_MAX);
 	else
 		cpu_cores = 1;
 	value = get_config_value("threads");
 	if (value != NULL)
 		cpu_threads = parse_int_value("threads", value, 1, UINT16_MAX);
 	else
 		cpu_threads = 1;
 	value = get_config_value("sockets");
 	if (value != NULL)
 		cpu_sockets = parse_int_value("sockets", value, 1, UINT16_MAX);
 	else
 		cpu_sockets = guest_ncpus;
 
 	/*
 	 * Compute sockets * cores * threads avoiding overflow.  The
 	 * range check above insures these are 16 bit values.
 	 */
 	ncpus = (uint64_t)cpu_sockets * cpu_cores * cpu_threads;
 	if (ncpus > UINT16_MAX)
 		errx(4, "Computed number of vCPUs too high: %ju",
 		    (uintmax_t)ncpus);
 
 	if (explicit_cpus) {
 		if (guest_ncpus != (int)ncpus)
 			errx(4, "Topology (%d sockets, %d cores, %d threads) "
 			    "does not match %d vCPUs",
 			    cpu_sockets, cpu_cores, cpu_threads,
 			    guest_ncpus);
 	} else
 		guest_ncpus = ncpus;
 }
 
 static int
 pincpu_parse(const char *opt)
 {
 	const char *value;
 	char *newval;
 	char key[16];
 	int vcpu, pcpu;
 
 	if (sscanf(opt, "%d:%d", &vcpu, &pcpu) != 2) {
 		fprintf(stderr, "invalid format: %s\n", opt);
 		return (-1);
 	}
 
 	if (vcpu < 0) {
 		fprintf(stderr, "invalid vcpu '%d'\n", vcpu);
 		return (-1);
 	}
 
 	if (pcpu < 0 || pcpu >= CPU_SETSIZE) {
 		fprintf(stderr, "hostcpu '%d' outside valid range from "
 		    "0 to %d\n", pcpu, CPU_SETSIZE - 1);
 		return (-1);
 	}
 
 	snprintf(key, sizeof(key), "vcpu.%d.cpuset", vcpu);
 	value = get_config_value(key);
 
 	if (asprintf(&newval, "%s%s%d", value != NULL ? value : "",
 	    value != NULL ? "," : "", pcpu) == -1) {
 		perror("failed to build new cpuset string");
 		return (-1);
 	}
 
 	set_config_value(key, newval);
 	free(newval);
 	return (0);
 }
 
 static void
 parse_cpuset(int vcpu, const char *list, cpuset_t *set)
 {
 	char *cp, *token;
 	int pcpu, start;
 
 	CPU_ZERO(set);
 	start = -1;
 	token = __DECONST(char *, list);
 	for (;;) {
 		pcpu = strtoul(token, &cp, 0);
 		if (cp == token)
 			errx(4, "invalid cpuset for vcpu %d: '%s'", vcpu, list);
 		if (pcpu < 0 || pcpu >= CPU_SETSIZE)
 			errx(4, "hostcpu '%d' outside valid range from 0 to %d",
 			    pcpu, CPU_SETSIZE - 1);
 		switch (*cp) {
 		case ',':
 		case '\0':
 			if (start >= 0) {
 				if (start > pcpu)
 					errx(4, "Invalid hostcpu range %d-%d",
 					    start, pcpu);
 				while (start < pcpu) {
 					CPU_SET(start, set);
 					start++;
 				}
 				start = -1;
 			}
 			CPU_SET(pcpu, set);
 			break;
 		case '-':
 			if (start >= 0)
 				errx(4, "invalid cpuset for vcpu %d: '%s'",
 				    vcpu, list);
 			start = pcpu;
 			break;
 		default:
 			errx(4, "invalid cpuset for vcpu %d: '%s'", vcpu, list);
 		}
 		if (*cp == '\0')
 			break;
 		token = cp + 1;
 	}
 }
 
 static void
 build_vcpumaps(void)
 {
 	char key[16];
 	const char *value;
 	int vcpu;
 
 	vcpumap = calloc(guest_ncpus, sizeof(*vcpumap));
 	for (vcpu = 0; vcpu < guest_ncpus; vcpu++) {
 		snprintf(key, sizeof(key), "vcpu.%d.cpuset", vcpu);
 		value = get_config_value(key);
 		if (value == NULL)
 			continue;
 		vcpumap[vcpu] = malloc(sizeof(cpuset_t));
 		if (vcpumap[vcpu] == NULL)
 			err(4, "Failed to allocate cpuset for vcpu %d", vcpu);
 		parse_cpuset(vcpu, value, vcpumap[vcpu]);
 	}
 }
 
 void
 vm_inject_fault(struct vcpu *vcpu, int vector, int errcode_valid,
     int errcode)
 {
 	int error, restart_instruction;
 
 	restart_instruction = 1;
 
 	error = vm_inject_exception(vcpu, vector, errcode_valid, errcode,
 	    restart_instruction);
 	assert(error == 0);
 }
 
 void *
 paddr_guest2host(struct vmctx *ctx, uintptr_t gaddr, size_t len)
 {
 
 	return (vm_map_gpa(ctx, gaddr, len));
 }
 
 #ifdef BHYVE_SNAPSHOT
 uintptr_t
 paddr_host2guest(struct vmctx *ctx, void *addr)
 {
 	return (vm_rev_map_gpa(ctx, addr));
 }
 #endif
 
 int
 fbsdrun_virtio_msix(void)
 {
 
 	return (get_config_bool_default("virtio_msix", true));
 }
 
 static void *
 fbsdrun_start_thread(void *param)
 {
 	char tname[MAXCOMLEN + 1];
 	struct vcpu_info *vi = param;
 	int error;
 
 	snprintf(tname, sizeof(tname), "vcpu %d", vi->vcpuid);
 	pthread_set_name_np(pthread_self(), tname);
 
 	if (vcpumap[vi->vcpuid] != NULL) {
 		error = pthread_setaffinity_np(pthread_self(),
 		    sizeof(cpuset_t), vcpumap[vi->vcpuid]);
 		assert(error == 0);
 	}
 
 #ifdef BHYVE_SNAPSHOT
 	checkpoint_cpu_add(vi->vcpuid);
 #endif
 	gdb_cpu_add(vi->vcpu);
 
 	vm_loop(vi->ctx, vi->vcpu);
 
 	/* not reached */
 	exit(1);
 	return (NULL);
 }
 
 static void
 fbsdrun_addcpu(struct vcpu_info *vi)
 {
 	pthread_t thr;
 	int error;
 
 	error = vm_activate_cpu(vi->vcpu);
 	if (error != 0)
 		err(EX_OSERR, "could not activate CPU %d", vi->vcpuid);
 
 	CPU_SET_ATOMIC(vi->vcpuid, &cpumask);
 
 	vm_suspend_cpu(vi->vcpu);
 
 	error = pthread_create(&thr, NULL, fbsdrun_start_thread, vi);
 	assert(error == 0);
 }
 
 static int
 fbsdrun_deletecpu(int vcpu)
 {
 
 	if (!CPU_ISSET(vcpu, &cpumask)) {
 		fprintf(stderr, "Attempting to delete unknown cpu %d\n", vcpu);
 		exit(4);
 	}
 
 	CPU_CLR_ATOMIC(vcpu, &cpumask);
 	return (CPU_EMPTY(&cpumask));
 }
 
 static int
 vmexit_handle_notify(struct vmctx *ctx __unused, struct vcpu *vcpu __unused,
     struct vm_exit *vme __unused, uint32_t eax __unused)
 {
 #if BHYVE_DEBUG
 	/*
 	 * put guest-driven debug here
 	 */
 #endif
 	return (VMEXIT_CONTINUE);
 }
 
 static int
 vmexit_inout(struct vmctx *ctx, struct vcpu *vcpu, struct vm_run *vmrun)
 {
 	struct vm_exit *vme;
 	int error;
 	int bytes, port, in, out;
 
 	vme = vmrun->vm_exit;
 	port = vme->u.inout.port;
 	bytes = vme->u.inout.bytes;
 	in = vme->u.inout.in;
 	out = !in;
 
         /* Extra-special case of host notifications */
         if (out && port == GUEST_NIO_PORT) {
                 error = vmexit_handle_notify(ctx, vcpu, vme, vme->u.inout.eax);
 		return (error);
 	}
 
 	error = emulate_inout(ctx, vcpu, vme);
 	if (error) {
 		fprintf(stderr, "Unhandled %s%c 0x%04x at 0x%lx\n",
 		    in ? "in" : "out",
 		    bytes == 1 ? 'b' : (bytes == 2 ? 'w' : 'l'),
 		    port, vme->rip);
 		return (VMEXIT_ABORT);
 	} else {
 		return (VMEXIT_CONTINUE);
 	}
 }
 
 static int
 vmexit_rdmsr(struct vmctx *ctx __unused, struct vcpu *vcpu,
     struct vm_run *vmrun)
 {
 	struct vm_exit *vme;
 	uint64_t val;
 	uint32_t eax, edx;
 	int error;
 
 	vme = vmrun->vm_exit;
 
 	val = 0;
 	error = emulate_rdmsr(vcpu, vme->u.msr.code, &val);
 	if (error != 0) {
 		fprintf(stderr, "rdmsr to register %#x on vcpu %d\n",
 		    vme->u.msr.code, vcpu_id(vcpu));
 		if (get_config_bool("x86.strictmsr")) {
 			vm_inject_gp(vcpu);
 			return (VMEXIT_CONTINUE);
 		}
 	}
 
 	eax = val;
 	error = vm_set_register(vcpu, VM_REG_GUEST_RAX, eax);
 	assert(error == 0);
 
 	edx = val >> 32;
 	error = vm_set_register(vcpu, VM_REG_GUEST_RDX, edx);
 	assert(error == 0);
 
 	return (VMEXIT_CONTINUE);
 }
 
 static int
 vmexit_wrmsr(struct vmctx *ctx __unused, struct vcpu *vcpu,
     struct vm_run *vmrun)
 {
 	struct vm_exit *vme;
 	int error;
 
 	vme = vmrun->vm_exit;
 
 	error = emulate_wrmsr(vcpu, vme->u.msr.code, vme->u.msr.wval);
 	if (error != 0) {
 		fprintf(stderr, "wrmsr to register %#x(%#lx) on vcpu %d\n",
 		    vme->u.msr.code, vme->u.msr.wval, vcpu_id(vcpu));
 		if (get_config_bool("x86.strictmsr")) {
 			vm_inject_gp(vcpu);
 			return (VMEXIT_CONTINUE);
 		}
 	}
 	return (VMEXIT_CONTINUE);
 }
 
 #define	DEBUG_EPT_MISCONFIG
 #ifdef DEBUG_EPT_MISCONFIG
 #define	VMCS_GUEST_PHYSICAL_ADDRESS	0x00002400
 
 static uint64_t ept_misconfig_gpa, ept_misconfig_pte[4];
 static int ept_misconfig_ptenum;
 #endif
 
 static const char *
 vmexit_vmx_desc(uint32_t exit_reason)
 {
 
 	if (exit_reason >= nitems(vmx_exit_reason_desc) ||
 	    vmx_exit_reason_desc[exit_reason] == NULL)
 		return ("Unknown");
 	return (vmx_exit_reason_desc[exit_reason]);
 }
 
 static int
 vmexit_vmx(struct vmctx *ctx, struct vcpu *vcpu, struct vm_run *vmrun)
 {
 	struct vm_exit *vme;
 
 	vme = vmrun->vm_exit;
 
 	fprintf(stderr, "vm exit[%d]\n", vcpu_id(vcpu));
 	fprintf(stderr, "\treason\t\tVMX\n");
 	fprintf(stderr, "\trip\t\t0x%016lx\n", vme->rip);
 	fprintf(stderr, "\tinst_length\t%d\n", vme->inst_length);
 	fprintf(stderr, "\tstatus\t\t%d\n", vme->u.vmx.status);
 	fprintf(stderr, "\texit_reason\t%u (%s)\n", vme->u.vmx.exit_reason,
 	    vmexit_vmx_desc(vme->u.vmx.exit_reason));
 	fprintf(stderr, "\tqualification\t0x%016lx\n",
 	    vme->u.vmx.exit_qualification);
 	fprintf(stderr, "\tinst_type\t\t%d\n", vme->u.vmx.inst_type);
 	fprintf(stderr, "\tinst_error\t\t%d\n", vme->u.vmx.inst_error);
 #ifdef DEBUG_EPT_MISCONFIG
 	if (vme->u.vmx.exit_reason == EXIT_REASON_EPT_MISCONFIG) {
 		vm_get_register(vcpu,
 		    VMCS_IDENT(VMCS_GUEST_PHYSICAL_ADDRESS),
 		    &ept_misconfig_gpa);
 		vm_get_gpa_pmap(ctx, ept_misconfig_gpa, ept_misconfig_pte,
 		    &ept_misconfig_ptenum);
 		fprintf(stderr, "\tEPT misconfiguration:\n");
 		fprintf(stderr, "\t\tGPA: %#lx\n", ept_misconfig_gpa);
 		fprintf(stderr, "\t\tPTE(%d): %#lx %#lx %#lx %#lx\n",
 		    ept_misconfig_ptenum, ept_misconfig_pte[0],
 		    ept_misconfig_pte[1], ept_misconfig_pte[2],
 		    ept_misconfig_pte[3]);
 	}
 #endif	/* DEBUG_EPT_MISCONFIG */
 	return (VMEXIT_ABORT);
 }
 
 static int
 vmexit_svm(struct vmctx *ctx __unused, struct vcpu *vcpu, struct vm_run *vmrun)
 {
 	struct vm_exit *vme;
 
 	vme = vmrun->vm_exit;
 
 	fprintf(stderr, "vm exit[%d]\n", vcpu_id(vcpu));
 	fprintf(stderr, "\treason\t\tSVM\n");
 	fprintf(stderr, "\trip\t\t0x%016lx\n", vme->rip);
 	fprintf(stderr, "\tinst_length\t%d\n", vme->inst_length);
 	fprintf(stderr, "\texitcode\t%#lx\n", vme->u.svm.exitcode);
 	fprintf(stderr, "\texitinfo1\t%#lx\n", vme->u.svm.exitinfo1);
 	fprintf(stderr, "\texitinfo2\t%#lx\n", vme->u.svm.exitinfo2);
 	return (VMEXIT_ABORT);
 }
 
 static int
 vmexit_bogus(struct vmctx *ctx __unused, struct vcpu *vcpu __unused,
     struct vm_run *vmrun)
 {
 	assert(vmrun->vm_exit->inst_length == 0);
 
 	return (VMEXIT_CONTINUE);
 }
 
 static int
 vmexit_reqidle(struct vmctx *ctx __unused, struct vcpu *vcpu __unused,
     struct vm_run *vmrun)
 {
 	assert(vmrun->vm_exit->inst_length == 0);
 
 	return (VMEXIT_CONTINUE);
 }
 
 static int
 vmexit_hlt(struct vmctx *ctx __unused, struct vcpu *vcpu __unused,
     struct vm_run *vmrun __unused)
 {
 	/*
 	 * Just continue execution with the next instruction. We use
 	 * the HLT VM exit as a way to be friendly with the host
 	 * scheduler.
 	 */
 	return (VMEXIT_CONTINUE);
 }
 
 static int
 vmexit_pause(struct vmctx *ctx __unused, struct vcpu *vcpu __unused,
     struct vm_run *vmrun __unused)
 {
 	return (VMEXIT_CONTINUE);
 }
 
 static int
 vmexit_mtrap(struct vmctx *ctx __unused, struct vcpu *vcpu,
     struct vm_run *vmrun)
 {
 	assert(vmrun->vm_exit->inst_length == 0);
 
 #ifdef BHYVE_SNAPSHOT
 	checkpoint_cpu_suspend(vcpu_id(vcpu));
 #endif
 	gdb_cpu_mtrap(vcpu);
 #ifdef BHYVE_SNAPSHOT
 	checkpoint_cpu_resume(vcpu_id(vcpu));
 #endif
 
 	return (VMEXIT_CONTINUE);
 }
 
 static int
 vmexit_inst_emul(struct vmctx *ctx __unused, struct vcpu *vcpu,
     struct vm_run *vmrun)
 {
 	struct vm_exit *vme;
 	struct vie *vie;
 	int err, i, cs_d;
 	enum vm_cpu_mode mode;
 
 	vme = vmrun->vm_exit;
 
 	vie = &vme->u.inst_emul.vie;
 	if (!vie->decoded) {
 		/*
 		 * Attempt to decode in userspace as a fallback.  This allows
 		 * updating instruction decode in bhyve without rebooting the
 		 * kernel (rapid prototyping), albeit with much slower
 		 * emulation.
 		 */
 		vie_restart(vie);
 		mode = vme->u.inst_emul.paging.cpu_mode;
 		cs_d = vme->u.inst_emul.cs_d;
 		if (vmm_decode_instruction(mode, cs_d, vie) != 0)
 			goto fail;
 		if (vm_set_register(vcpu, VM_REG_GUEST_RIP,
 		    vme->rip + vie->num_processed) != 0)
 			goto fail;
 	}
 
 	err = emulate_mem(vcpu, vme->u.inst_emul.gpa, vie,
 	    &vme->u.inst_emul.paging);
 	if (err) {
 		if (err == ESRCH) {
 			EPRINTLN("Unhandled memory access to 0x%lx\n",
 			    vme->u.inst_emul.gpa);
 		}
 		goto fail;
 	}
 
 	return (VMEXIT_CONTINUE);
 
 fail:
 	fprintf(stderr, "Failed to emulate instruction sequence [ ");
 	for (i = 0; i < vie->num_valid; i++)
 		fprintf(stderr, "%02x", vie->inst[i]);
 	FPRINTLN(stderr, " ] at 0x%lx", vme->rip);
 	return (VMEXIT_ABORT);
 }
 
 static pthread_mutex_t resetcpu_mtx = PTHREAD_MUTEX_INITIALIZER;
 static pthread_cond_t resetcpu_cond = PTHREAD_COND_INITIALIZER;
 
 static int
 vmexit_suspend(struct vmctx *ctx, struct vcpu *vcpu, struct vm_run *vmrun)
 {
 	struct vm_exit *vme;
 	enum vm_suspend_how how;
 	int vcpuid = vcpu_id(vcpu);
 
 	vme = vmrun->vm_exit;
 
 	how = vme->u.suspended.how;
 
 	fbsdrun_deletecpu(vcpuid);
 
 	if (vcpuid != BSP) {
 		pthread_mutex_lock(&resetcpu_mtx);
 		pthread_cond_signal(&resetcpu_cond);
 		pthread_mutex_unlock(&resetcpu_mtx);
 		pthread_exit(NULL);
 	}
 
 	pthread_mutex_lock(&resetcpu_mtx);
 	while (!CPU_EMPTY(&cpumask)) {
 		pthread_cond_wait(&resetcpu_cond, &resetcpu_mtx);
 	}
 	pthread_mutex_unlock(&resetcpu_mtx);
 
 	switch (how) {
 	case VM_SUSPEND_RESET:
 		exit(0);
 	case VM_SUSPEND_POWEROFF:
 		if (get_config_bool_default("destroy_on_poweroff", false))
 			vm_destroy(ctx);
 		exit(1);
 	case VM_SUSPEND_HALT:
 		exit(2);
 	case VM_SUSPEND_TRIPLEFAULT:
 		exit(3);
 	default:
 		fprintf(stderr, "vmexit_suspend: invalid reason %d\n", how);
 		exit(100);
 	}
 	return (0);	/* NOTREACHED */
 }
 
 static int
 vmexit_debug(struct vmctx *ctx __unused, struct vcpu *vcpu,
     struct vm_run *vmrun __unused)
 {
 
 #ifdef BHYVE_SNAPSHOT
 	checkpoint_cpu_suspend(vcpu_id(vcpu));
 #endif
 	gdb_cpu_suspend(vcpu);
 #ifdef BHYVE_SNAPSHOT
 	checkpoint_cpu_resume(vcpu_id(vcpu));
 #endif
 	/*
 	 * XXX-MJ sleep for a short period to avoid chewing up the CPU in the
 	 * window between activation of the vCPU thread and the STARTUP IPI.
 	 */
 	usleep(1000);
 	return (VMEXIT_CONTINUE);
 }
 
 static int
 vmexit_breakpoint(struct vmctx *ctx __unused, struct vcpu *vcpu,
     struct vm_run *vmrun)
 {
 	gdb_cpu_breakpoint(vcpu, vmrun->vm_exit);
 	return (VMEXIT_CONTINUE);
 }
 
 static int
 vmexit_ipi(struct vmctx *ctx __unused, struct vcpu *vcpu __unused,
     struct vm_run *vmrun)
 {
 	struct vm_exit *vme;
 	cpuset_t *dmask;
 	int error = -1;
 	int i;
 
 	dmask = vmrun->cpuset;
 	vme = vmrun->vm_exit;
 
 	switch (vme->u.ipi.mode) {
 	case APIC_DELMODE_INIT:
 		CPU_FOREACH_ISSET(i, dmask) {
 			error = vm_suspend_cpu(vcpu_info[i].vcpu);
 			if (error) {
 				warnx("%s: failed to suspend cpu %d\n",
 				    __func__, i);
 				break;
 			}
 		}
 		break;
 	case APIC_DELMODE_STARTUP:
 		CPU_FOREACH_ISSET(i, dmask) {
 			spinup_ap(vcpu_info[i].vcpu,
 			    vme->u.ipi.vector << PAGE_SHIFT);
 		}
 		error = 0;
 		break;
 	default:
 		break;
 	}
 
 	return (error);
 }
 
 static vmexit_handler_t handler[VM_EXITCODE_MAX] = {
 	[VM_EXITCODE_INOUT]  = vmexit_inout,
 	[VM_EXITCODE_INOUT_STR]  = vmexit_inout,
 	[VM_EXITCODE_VMX]    = vmexit_vmx,
 	[VM_EXITCODE_SVM]    = vmexit_svm,
 	[VM_EXITCODE_BOGUS]  = vmexit_bogus,
 	[VM_EXITCODE_REQIDLE] = vmexit_reqidle,
 	[VM_EXITCODE_RDMSR]  = vmexit_rdmsr,
 	[VM_EXITCODE_WRMSR]  = vmexit_wrmsr,
 	[VM_EXITCODE_MTRAP]  = vmexit_mtrap,
 	[VM_EXITCODE_INST_EMUL] = vmexit_inst_emul,
 	[VM_EXITCODE_SUSPENDED] = vmexit_suspend,
 	[VM_EXITCODE_TASK_SWITCH] = vmexit_task_switch,
 	[VM_EXITCODE_DEBUG] = vmexit_debug,
 	[VM_EXITCODE_BPT] = vmexit_breakpoint,
 	[VM_EXITCODE_IPI] = vmexit_ipi,
 };
 
 static void
 vm_loop(struct vmctx *ctx, struct vcpu *vcpu)
 {
 	struct vm_exit vme;
 	struct vm_run vmrun;
 	int error, rc;
 	enum vm_exitcode exitcode;
 	cpuset_t active_cpus, dmask;
 
 	error = vm_active_cpus(ctx, &active_cpus);
 	assert(CPU_ISSET(vcpu_id(vcpu), &active_cpus));
 
 	vmrun.vm_exit = &vme;
 	vmrun.cpuset = &dmask;
 	vmrun.cpusetsize = sizeof(dmask);
 
 	while (1) {
 		error = vm_run(vcpu, &vmrun);
 		if (error != 0)
 			break;
 
 		exitcode = vme.exitcode;
 		if (exitcode >= VM_EXITCODE_MAX || handler[exitcode] == NULL) {
 			fprintf(stderr, "vm_loop: unexpected exitcode 0x%x\n",
 			    exitcode);
 			exit(4);
 		}
 
 		rc = (*handler[exitcode])(ctx, vcpu, &vmrun);
 
 		switch (rc) {
 		case VMEXIT_CONTINUE:
 			break;
 		case VMEXIT_ABORT:
 			abort();
 		default:
 			exit(4);
 		}
 	}
 	fprintf(stderr, "vm_run error %d, errno %d\n", error, errno);
 }
 
 static int
 num_vcpus_allowed(struct vmctx *ctx, struct vcpu *vcpu)
 {
 	uint16_t sockets, cores, threads, maxcpus;
 	int tmp, error;
 
 	/*
 	 * The guest is allowed to spinup more than one processor only if the
 	 * UNRESTRICTED_GUEST capability is available.
 	 */
 	error = vm_get_capability(vcpu, VM_CAP_UNRESTRICTED_GUEST, &tmp);
 	if (error != 0)
 		return (1);
 
 	error = vm_get_topology(ctx, &sockets, &cores, &threads, &maxcpus);
 	if (error == 0)
 		return (maxcpus);
 	else
 		return (1);
 }
 
 static void
 fbsdrun_set_capabilities(struct vcpu *vcpu, bool bsp)
 {
 	int err, tmp;
 
 	if (get_config_bool_default("x86.vmexit_on_hlt", false)) {
 		err = vm_get_capability(vcpu, VM_CAP_HALT_EXIT, &tmp);
 		if (err < 0) {
 			fprintf(stderr, "VM exit on HLT not supported\n");
 			exit(4);
 		}
 		vm_set_capability(vcpu, VM_CAP_HALT_EXIT, 1);
 		if (bsp)
 			handler[VM_EXITCODE_HLT] = vmexit_hlt;
 	}
 
 	if (get_config_bool_default("x86.vmexit_on_pause", false)) {
 		/*
 		 * pause exit support required for this mode
 		 */
 		err = vm_get_capability(vcpu, VM_CAP_PAUSE_EXIT, &tmp);
 		if (err < 0) {
 			fprintf(stderr,
 			    "SMP mux requested, no pause support\n");
 			exit(4);
 		}
 		vm_set_capability(vcpu, VM_CAP_PAUSE_EXIT, 1);
 		if (bsp)
 			handler[VM_EXITCODE_PAUSE] = vmexit_pause;
         }
 
 	if (get_config_bool_default("x86.x2apic", false))
 		err = vm_set_x2apic_state(vcpu, X2APIC_ENABLED);
 	else
 		err = vm_set_x2apic_state(vcpu, X2APIC_DISABLED);
 
 	if (err) {
 		fprintf(stderr, "Unable to set x2apic state (%d)\n", err);
 		exit(4);
 	}
 
 	vm_set_capability(vcpu, VM_CAP_ENABLE_INVPCID, 1);
 
 	err = vm_set_capability(vcpu, VM_CAP_IPI_EXIT, 1);
 	assert(err == 0);
 }
 
 static struct vmctx *
 do_open(const char *vmname)
 {
 	struct vmctx *ctx;
 	int error;
 	bool reinit, romboot;
 
 	reinit = romboot = false;
 
 	if (lpc_bootrom())
 		romboot = true;
 
 	error = vm_create(vmname);
 	if (error) {
 		if (errno == EEXIST) {
 			if (romboot) {
 				reinit = true;
 			} else {
 				/*
 				 * The virtual machine has been setup by the
 				 * userspace bootloader.
 				 */
 			}
 		} else {
 			perror("vm_create");
 			exit(4);
 		}
 	} else {
 		if (!romboot) {
 			/*
 			 * If the virtual machine was just created then a
 			 * bootrom must be configured to boot it.
 			 */
 			fprintf(stderr, "virtual machine cannot be booted\n");
 			exit(4);
 		}
 	}
 
 	ctx = vm_open(vmname);
 	if (ctx == NULL) {
 		perror("vm_open");
 		exit(4);
 	}
 
 #ifndef WITHOUT_CAPSICUM
 	if (vm_limit_rights(ctx) != 0)
 		err(EX_OSERR, "vm_limit_rights");
 #endif
 
 	if (reinit) {
 		error = vm_reinit(ctx);
 		if (error) {
 			perror("vm_reinit");
 			exit(4);
 		}
 	}
 	error = vm_set_topology(ctx, cpu_sockets, cpu_cores, cpu_threads,
 	    0 /* maxcpus, unimplemented */);
 	if (error)
 		errx(EX_OSERR, "vm_set_topology");
 	return (ctx);
 }
 
 static void
 spinup_vcpu(struct vcpu_info *vi, bool bsp)
 {
 	int error;
 
 	if (!bsp) {
 		fbsdrun_set_capabilities(vi->vcpu, false);
 
 		/*
 		 * Enable the 'unrestricted guest' mode for APs.
 		 *
 		 * APs startup in power-on 16-bit mode.
 		 */
 		error = vm_set_capability(vi->vcpu, VM_CAP_UNRESTRICTED_GUEST, 1);
 		assert(error == 0);
 	}
 
 	fbsdrun_addcpu(vi);
 }
 
 static bool
 parse_config_option(const char *option)
 {
 	const char *value;
 	char *path;
 
 	value = strchr(option, '=');
 	if (value == NULL || value[1] == '\0')
 		return (false);
 	path = strndup(option, value - option);
 	if (path == NULL)
 		err(4, "Failed to allocate memory");
 	set_config_value(path, value + 1);
 	return (true);
 }
 
 static void
 parse_simple_config_file(const char *path)
 {
 	FILE *fp;
 	char *line, *cp;
 	size_t linecap;
 	unsigned int lineno;
 
 	fp = fopen(path, "r");
 	if (fp == NULL)
 		err(4, "Failed to open configuration file %s", path);
 	line = NULL;
 	linecap = 0;
 	lineno = 1;
 	for (lineno = 1; getline(&line, &linecap, fp) > 0; lineno++) {
 		if (*line == '#' || *line == '\n')
 			continue;
 		cp = strchr(line, '\n');
 		if (cp != NULL)
 			*cp = '\0';
 		if (!parse_config_option(line))
 			errx(4, "%s line %u: invalid config option '%s'", path,
 			    lineno, line);
 	}
 	free(line);
 	fclose(fp);
 }
 
 static void
 parse_gdb_options(const char *opt)
 {
 	const char *sport;
 	char *colon;
 
 	if (opt[0] == 'w') {
 		set_config_bool("gdb.wait", true);
 		opt++;
 	}
 
 	colon = strrchr(opt, ':');
 	if (colon == NULL) {
 		sport = opt;
 	} else {
 		*colon = '\0';
 		colon++;
 		sport = colon;
 		set_config_value("gdb.address", opt);
 	}
 
 	set_config_value("gdb.port", sport);
 }
 
 static void
 set_defaults(void)
 {
 
 	set_config_bool("acpi_tables", false);
 	set_config_value("memory.size", "256M");
 	set_config_bool("x86.strictmsr", true);
 	set_config_value("lpc.fwcfg", "bhyve");
 }
 
 int
 main(int argc, char *argv[])
 {
 	int c, error;
 	int max_vcpus, memflags;
 	struct vcpu *bsp;
 	struct vmctx *ctx;
 	struct qemu_fwcfg_item *e820_fwcfg_item;
 	uint64_t rip;
 	size_t memsize;
 	const char *optstr, *value, *vmname;
 #ifdef BHYVE_SNAPSHOT
 	char *restore_file;
 	struct restore_state rstate;
 
 	restore_file = NULL;
 #endif
 
 	init_config();
 	set_defaults();
 	progname = basename(argv[0]);
 
 #ifdef BHYVE_SNAPSHOT
 	optstr = "aehuwxACDHIPSWYk:f:o:p:G:c:s:m:l:K:U:r:";
 #else
 	optstr = "aehuwxACDHIPSWYk:f:o:p:G:c:s:m:l:K:U:";
 #endif
 	while ((c = getopt(argc, argv, optstr)) != -1) {
 		switch (c) {
 		case 'a':
 			set_config_bool("x86.x2apic", false);
 			break;
 		case 'A':
 			set_config_bool("acpi_tables", true);
 			break;
 		case 'D':
 			set_config_bool("destroy_on_poweroff", true);
 			break;
 		case 'p':
                         if (pincpu_parse(optarg) != 0) {
                             errx(EX_USAGE, "invalid vcpu pinning "
                                  "configuration '%s'", optarg);
                         }
 			break;
                 case 'c':
 			if (topology_parse(optarg) != 0) {
 			    errx(EX_USAGE, "invalid cpu topology "
 				"'%s'", optarg);
 			}
 			break;
 		case 'C':
 			set_config_bool("memory.guest_in_core", true);
 			break;
 		case 'f':
 			if (qemu_fwcfg_parse_cmdline_arg(optarg) != 0) {
 			    errx(EX_USAGE, "invalid fwcfg item '%s'", optarg);
 			}
 			break;
 		case 'G':
 			parse_gdb_options(optarg);
 			break;
 		case 'k':
 			parse_simple_config_file(optarg);
 			break;
 		case 'K':
 			set_config_value("keyboard.layout", optarg);
 			break;
 		case 'l':
 			if (strncmp(optarg, "help", strlen(optarg)) == 0) {
 				lpc_print_supported_devices();
 				exit(0);
 			} else if (lpc_device_parse(optarg) != 0) {
 				errx(EX_USAGE, "invalid lpc device "
 				    "configuration '%s'", optarg);
 			}
 			break;
 #ifdef BHYVE_SNAPSHOT
 		case 'r':
 			restore_file = optarg;
 			break;
 #endif
 		case 's':
 			if (strncmp(optarg, "help", strlen(optarg)) == 0) {
 				pci_print_supported_devices();
 				exit(0);
 			} else if (pci_parse_slot(optarg) != 0)
 				exit(4);
 			else
 				break;
 		case 'S':
 			set_config_bool("memory.wired", true);
 			break;
                 case 'm':
 			set_config_value("memory.size", optarg);
 			break;
 		case 'o':
 			if (!parse_config_option(optarg))
 				errx(EX_USAGE, "invalid configuration option '%s'", optarg);
 			break;
 		case 'H':
 			set_config_bool("x86.vmexit_on_hlt", true);
 			break;
 		case 'I':
 			/*
 			 * The "-I" option was used to add an ioapic to the
 			 * virtual machine.
 			 *
 			 * An ioapic is now provided unconditionally for each
 			 * virtual machine and this option is now deprecated.
 			 */
 			break;
 		case 'P':
 			set_config_bool("x86.vmexit_on_pause", true);
 			break;
 		case 'e':
 			set_config_bool("x86.strictio", true);
 			break;
 		case 'u':
 			set_config_bool("rtc.use_localtime", false);
 			break;
 		case 'U':
 			set_config_value("uuid", optarg);
 			break;
 		case 'w':
 			set_config_bool("x86.strictmsr", false);
 			break;
 		case 'W':
 			set_config_bool("virtio_msix", false);
 			break;
 		case 'x':
 			set_config_bool("x86.x2apic", true);
 			break;
 		case 'Y':
 			set_config_bool("x86.mptable", false);
 			break;
 		case 'h':
 			usage(0);
 		default:
 			usage(1);
 		}
 	}
 	argc -= optind;
 	argv += optind;
 
 	if (argc > 1)
 		usage(1);
 
 #ifdef BHYVE_SNAPSHOT
 	if (restore_file != NULL) {
 		error = load_restore_file(restore_file, &rstate);
 		if (error) {
 			fprintf(stderr, "Failed to read checkpoint info from "
 					"file: '%s'.\n", restore_file);
 			exit(1);
 		}
 		vmname = lookup_vmname(&rstate);
 		if (vmname != NULL)
 			set_config_value("name", vmname);
 	}
 #endif
 
 	if (argc == 1)
 		set_config_value("name", argv[0]);
 
 	vmname = get_config_value("name");
 	if (vmname == NULL)
 		usage(1);
 
 	if (get_config_bool_default("config.dump", false)) {
 		dump_config();
 		exit(1);
 	}
 
 	calc_topology();
 	build_vcpumaps();
 
 	value = get_config_value("memory.size");
 	error = vm_parse_memsize(value, &memsize);
 	if (error)
 		errx(EX_USAGE, "invalid memsize '%s'", value);
 
 	ctx = do_open(vmname);
 
 #ifdef BHYVE_SNAPSHOT
 	if (restore_file != NULL) {
 		guest_ncpus = lookup_guest_ncpus(&rstate);
 		memflags = lookup_memflags(&rstate);
 		memsize = lookup_memsize(&rstate);
 	}
 
 	if (guest_ncpus < 1) {
 		fprintf(stderr, "Invalid guest vCPUs (%d)\n", guest_ncpus);
 		exit(1);
 	}
 #endif
 
 	bsp = vm_vcpu_open(ctx, BSP);
 	max_vcpus = num_vcpus_allowed(ctx, bsp);
 	if (guest_ncpus > max_vcpus) {
 		fprintf(stderr, "%d vCPUs requested but only %d available\n",
 			guest_ncpus, max_vcpus);
 		exit(4);
 	}
 
 	fbsdrun_set_capabilities(bsp, true);
 
 	/* Allocate per-VCPU resources. */
 	vcpu_info = calloc(guest_ncpus, sizeof(*vcpu_info));
 	for (int vcpuid = 0; vcpuid < guest_ncpus; vcpuid++) {
 		vcpu_info[vcpuid].ctx = ctx;
 		vcpu_info[vcpuid].vcpuid = vcpuid;
 		if (vcpuid == BSP)
 			vcpu_info[vcpuid].vcpu = bsp;
 		else
 			vcpu_info[vcpuid].vcpu = vm_vcpu_open(ctx, vcpuid);
 	}
 
 	memflags = 0;
 	if (get_config_bool_default("memory.wired", false))
 		memflags |= VM_MEM_F_WIRED;
 	if (get_config_bool_default("memory.guest_in_core", false))
 		memflags |= VM_MEM_F_INCORE;
 	vm_set_memflags(ctx, memflags);
 	error = vm_setup_memory(ctx, memsize, VM_MMAP_ALL);
 	if (error) {
 		fprintf(stderr, "Unable to setup memory (%d)\n", errno);
 		exit(4);
 	}
 
 	error = init_msr();
 	if (error) {
 		fprintf(stderr, "init_msr error %d", error);
 		exit(4);
 	}
 
 	init_mem(guest_ncpus);
 	init_inout();
 	kernemu_dev_init();
 	init_bootrom(ctx);
 	atkbdc_init(ctx);
 	pci_irq_init(ctx);
 	ioapic_init(ctx);
 
 	rtc_init(ctx);
 	sci_init(ctx);
 
 	if (qemu_fwcfg_init(ctx) != 0) {
 		fprintf(stderr, "qemu fwcfg initialization error");
 		exit(4);
 	}
 
 	if (qemu_fwcfg_add_file("opt/bhyve/hw.ncpu", sizeof(guest_ncpus),
 	    &guest_ncpus) != 0) {
 		fprintf(stderr, "Could not add qemu fwcfg opt/bhyve/hw.ncpu");
 		exit(4);
 	}
 
 	if (e820_init(ctx) != 0) {
 		fprintf(stderr, "Unable to setup E820");
 		exit(4);
 	}
 
 	/*
-	 * Exit if a device emulation finds an error in its initilization
+	 * Exit if a device emulation finds an error in its initialization
 	 */
 	if (init_pci(ctx) != 0) {
 		perror("device emulation initialization error");
 		exit(4);
 	}
 
 	/*
 	 * Initialize after PCI, to allow a bootrom file to reserve the high
 	 * region.
 	 */
 	if (get_config_bool("acpi_tables"))
 		vmgenc_init(ctx);
 
 	init_gdb(ctx);
 
 	if (lpc_bootrom()) {
 		if (vm_set_capability(bsp, VM_CAP_UNRESTRICTED_GUEST, 1)) {
 			fprintf(stderr, "ROM boot failed: unrestricted guest "
 			    "capability not available\n");
 			exit(4);
 		}
 		error = vcpu_reset(bsp);
 		assert(error == 0);
 	}
 
 	/*
 	 * Add all vCPUs.
 	 */
 	for (int vcpuid = 0; vcpuid < guest_ncpus; vcpuid++)
 		spinup_vcpu(&vcpu_info[vcpuid], vcpuid == BSP);
 
 #ifdef BHYVE_SNAPSHOT
 	if (restore_file != NULL) {
 		fprintf(stdout, "Pausing pci devs...\r\n");
 		if (vm_pause_user_devs() != 0) {
 			fprintf(stderr, "Failed to pause PCI device state.\n");
 			exit(1);
 		}
 
 		fprintf(stdout, "Restoring vm mem...\r\n");
 		if (restore_vm_mem(ctx, &rstate) != 0) {
 			fprintf(stderr, "Failed to restore VM memory.\n");
 			exit(1);
 		}
 
 		fprintf(stdout, "Restoring pci devs...\r\n");
 		if (vm_restore_user_devs(&rstate) != 0) {
 			fprintf(stderr, "Failed to restore PCI device state.\n");
 			exit(1);
 		}
 
 		fprintf(stdout, "Restoring kernel structs...\r\n");
 		if (vm_restore_kern_structs(ctx, &rstate) != 0) {
 			fprintf(stderr, "Failed to restore kernel structs.\n");
 			exit(1);
 		}
 
 		fprintf(stdout, "Resuming pci devs...\r\n");
 		if (vm_resume_user_devs() != 0) {
 			fprintf(stderr, "Failed to resume PCI device state.\n");
 			exit(1);
 		}
 	}
 #endif
 
 	error = vm_get_register(bsp, VM_REG_GUEST_RIP, &rip);
 	assert(error == 0);
 
 	/*
 	 * build the guest tables, MP etc.
 	 */
 	if (get_config_bool_default("x86.mptable", true)) {
 		error = mptable_build(ctx, guest_ncpus);
 		if (error) {
 			perror("error to build the guest tables");
 			exit(4);
 		}
 	}
 
 	error = smbios_build(ctx);
 	if (error != 0)
 		exit(4);
 
 	if (get_config_bool("acpi_tables")) {
 		error = acpi_build(ctx, guest_ncpus);
 		assert(error == 0);
 	}
 
 	e820_fwcfg_item = e820_get_fwcfg_item();
 	if (e820_fwcfg_item == NULL) {
 	    fprintf(stderr, "invalid e820 table");
 		exit(4);
 	}
 	if (qemu_fwcfg_add_file("etc/e820", e820_fwcfg_item->size,
 		e820_fwcfg_item->data) != 0) {
 		fprintf(stderr, "could not add qemu fwcfg etc/e820");
 		exit(4);
 	}
 	free(e820_fwcfg_item);
 
 	if (lpc_bootrom() && strcmp(lpc_fwcfg(), "bhyve") == 0) {
 		fwctl_init();
 	}
 
 	/*
 	 * Change the proc title to include the VM name.
 	 */
 	setproctitle("%s", vmname);
 
 #ifdef BHYVE_SNAPSHOT
 	/* initialize mutex/cond variables */
 	init_snapshot();
 
 	/*
 	 * checkpointing thread for communication with bhyvectl
 	 */
 	if (init_checkpoint_thread(ctx) != 0)
 		errx(EX_OSERR, "Failed to start checkpoint thread");
 #endif
 
 #ifndef WITHOUT_CAPSICUM
 	caph_cache_catpages();
 
 	if (caph_limit_stdout() == -1 || caph_limit_stderr() == -1)
 		errx(EX_OSERR, "Unable to apply rights for sandbox");
 
 	if (caph_enter() == -1)
 		errx(EX_OSERR, "cap_enter() failed");
 #endif
 
 #ifdef BHYVE_SNAPSHOT
 	if (restore_file != NULL) {
 		destroy_restore_state(&rstate);
 		if (vm_restore_time(ctx) < 0)
 			err(EX_OSERR, "Unable to restore time");
 
 		for (int vcpuid = 0; vcpuid < guest_ncpus; vcpuid++)
 			vm_resume_cpu(vcpu_info[vcpuid].vcpu);
 	} else
 #endif
 		vm_resume_cpu(bsp);
 
 	/*
 	 * Head off to the main event dispatch loop
 	 */
 	mevent_dispatch();
 
 	exit(4);
 }
diff --git a/usr.sbin/bhyve/config.c b/usr.sbin/bhyve/config.c
index ba51f81dd011..c844d30f2f74 100644
--- a/usr.sbin/bhyve/config.c
+++ b/usr.sbin/bhyve/config.c
@@ -1,466 +1,466 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause
  *
  * Copyright (c) 2021 John H. Baldwin <jhb@FreeBSD.org>
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <assert.h>
 #include <err.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 
 #include "config.h"
 
 static nvlist_t *config_root;
 
 void
 init_config(void)
 {
 
 	config_root = nvlist_create(0);
 	if (config_root == NULL)
 		err(4, "Failed to create configuration root nvlist");
 }
 
 static nvlist_t *
 _lookup_config_node(nvlist_t *parent, const char *path, bool create)
 {
 	char *copy, *name, *tofree;
 	nvlist_t *nvl, *new_nvl;
 
 	copy = strdup(path);
 	if (copy == NULL)
 		errx(4, "Failed to allocate memory");
 	tofree = copy;
 	nvl = parent;
 	while ((name = strsep(&copy, ".")) != NULL) {
 		if (*name == '\0') {
 			warnx("Invalid configuration node: %s", path);
 			nvl = NULL;
 			break;
 		}
 		if (nvlist_exists_nvlist(nvl, name))
 			/*
 			 * XXX-MJ it is incorrect to cast away the const
 			 * qualifier like this since the contract with nvlist
 			 * says that values are immutable, and some consumers
 			 * will indeed add nodes to the returned nvlist.  In
 			 * practice, however, it appears to be harmless with the
 			 * current nvlist implementation, so we just live with
 			 * it until the implementation is reworked.
 			 */
 			nvl = __DECONST(nvlist_t *,
 			    nvlist_get_nvlist(nvl, name));
 		else if (nvlist_exists(nvl, name)) {
 			for (copy = tofree; copy < name; copy++)
 				if (*copy == '\0')
 					*copy = '.';
 			warnx(
 		    "Configuration node %s is a child of existing variable %s",
 			    path, tofree);
 			nvl = NULL;
 			break;
 		} else if (create) {
 			/*
 			 * XXX-MJ as with the case above, "new_nvl" shouldn't be
 			 * mutated after its ownership is given to "nvl".
 			 */
 			new_nvl = nvlist_create(0);
 			if (new_nvl == NULL)
 				errx(4, "Failed to allocate memory");
 			nvlist_move_nvlist(nvl, name, new_nvl);
 			nvl = new_nvl;
 		} else {
 			nvl = NULL;
 			break;
 		}
 	}
 	free(tofree);
 	return (nvl);
 }
 
 nvlist_t *
 create_config_node(const char *path)
 {
 
 	return (_lookup_config_node(config_root, path, true));
 }
 
 nvlist_t *
 find_config_node(const char *path)
 {
 
 	return (_lookup_config_node(config_root, path, false));
 }
 
 nvlist_t *
 create_relative_config_node(nvlist_t *parent, const char *path)
 {
 
 	return (_lookup_config_node(parent, path, true));
 }
 
 nvlist_t *
 find_relative_config_node(nvlist_t *parent, const char *path)
 {
 
 	return (_lookup_config_node(parent, path, false));
 }
 
 void
 set_config_value_node(nvlist_t *parent, const char *name, const char *value)
 {
 
 	if (strchr(name, '.') != NULL)
 		errx(4, "Invalid config node name %s", name);
 	if (parent == NULL)
 		parent = config_root;
 	if (nvlist_exists_string(parent, name))
 		nvlist_free_string(parent, name);
 	else if (nvlist_exists(parent, name))
 		errx(4,
-		    "Attemping to add value %s to existing node %s of list %p",
+		    "Attempting to add value %s to existing node %s of list %p",
 		    value, name, parent);
 	nvlist_add_string(parent, name, value);
 }
 
 void
 set_config_value_node_if_unset(nvlist_t *const parent, const char *const name,
     const char *const value)
 {
 	if (get_config_value_node(parent, name) != NULL) {
 		return;
 	}
 
 	set_config_value_node(parent, name, value);
 }
 
 void
 set_config_value(const char *path, const char *value)
 {
 	const char *name;
 	char *node_name;
 	nvlist_t *nvl;
 
 	/* Look for last separator. */
 	name = strrchr(path, '.');
 	if (name == NULL) {
 		nvl = config_root;
 		name = path;
 	} else {
 		node_name = strndup(path, name - path);
 		if (node_name == NULL)
 			errx(4, "Failed to allocate memory");
 		nvl = create_config_node(node_name);
 		if (nvl == NULL)
 			errx(4, "Failed to create configuration node %s",
 			    node_name);
 		free(node_name);
 
 		/* Skip over '.'. */
 		name++;
 	}
 
 	if (nvlist_exists_nvlist(nvl, name))
 		errx(4, "Attempting to add value %s to existing node %s",
 		    value, path);
 	set_config_value_node(nvl, name, value);
 }
 
 void
 set_config_value_if_unset(const char *const path, const char *const value)
 {
 	if (get_config_value(path) != NULL) {
 		return;
 	}
 
 	set_config_value(path, value);
 }
 
 static const char *
 get_raw_config_value(const char *path)
 {
 	const char *name;
 	char *node_name;
 	nvlist_t *nvl;
 
 	/* Look for last separator. */
 	name = strrchr(path, '.');
 	if (name == NULL) {
 		nvl = config_root;
 		name = path;
 	} else {
 		node_name = strndup(path, name - path);
 		if (node_name == NULL)
 			errx(4, "Failed to allocate memory");
 		nvl = find_config_node(node_name);
 		free(node_name);
 		if (nvl == NULL)
 			return (NULL);
 
 		/* Skip over '.'. */
 		name++;
 	}
 
 	if (nvlist_exists_string(nvl, name))
 		return (nvlist_get_string(nvl, name));
 	if (nvlist_exists_nvlist(nvl, name))
 		warnx("Attempting to fetch value of node %s", path);
 	return (NULL);
 }
 
 static char *
 _expand_config_value(const char *value, int depth)
 {
 	FILE *valfp;
 	const char *cp, *vp;
 	char *nestedval, *path, *valbuf;
 	size_t valsize;
 
 	valfp = open_memstream(&valbuf, &valsize);
 	if (valfp == NULL)
 		errx(4, "Failed to allocate memory");
 
 	vp = value;
 	while (*vp != '\0') {
 		switch (*vp) {
 		case '%':
 			if (depth > 15) {
 				warnx(
 		    "Too many recursive references in configuration value");
 				fputc('%', valfp);
 				vp++;
 				break;
 			}
 			if (vp[1] != '(' || vp[2] == '\0')
 				cp = NULL;
 			else
 				cp = strchr(vp + 2, ')');
 			if (cp == NULL) {
 				warnx(
 			    "Invalid reference in configuration value \"%s\"",
 				    value);
 				fputc('%', valfp);
 				vp++;
 				break;
 			}
 			vp += 2;
 
 			if (cp == vp) {
 				warnx(
 			    "Empty reference in configuration value \"%s\"",
 				    value);
 				vp++;
 				break;
 			}
 
 			/* Allocate a C string holding the path. */
 			path = strndup(vp, cp - vp);
 			if (path == NULL)
 				errx(4, "Failed to allocate memory");
 
 			/* Advance 'vp' past the reference. */
 			vp = cp + 1;
 
 			/* Fetch the referenced value. */
 			cp = get_raw_config_value(path);
 			if (cp == NULL)
 				warnx(
 		    "Failed to fetch referenced configuration variable %s",
 				    path);
 			else {
 				nestedval = _expand_config_value(cp, depth + 1);
 				fputs(nestedval, valfp);
 				free(nestedval);
 			}
 			free(path);
 			break;
 		case '\\':
 			vp++;
 			if (*vp == '\0') {
 				warnx(
 			    "Trailing \\ in configuration value \"%s\"",
 				    value);
 				break;
 			}
 			/* FALLTHROUGH */
 		default:
 			fputc(*vp, valfp);
 			vp++;
 			break;
 		}
 	}
 	fclose(valfp);
 	return (valbuf);
 }
 
 static const char *
 expand_config_value(const char *value)
 {
 	static char *valbuf;
 
 	if (strchr(value, '%') == NULL)
 		return (value);
 
 	free(valbuf);
 	valbuf = _expand_config_value(value, 0);
 	return (valbuf);
 }
 
 const char *
 get_config_value(const char *path)
 {
 	const char *value;
 
 	value = get_raw_config_value(path);
 	if (value == NULL)
 		return (NULL);
 	return (expand_config_value(value));
 }
 
 const char *
 get_config_value_node(const nvlist_t *parent, const char *name)
 {
 
 	if (strchr(name, '.') != NULL)
 		errx(4, "Invalid config node name %s", name);
 	if (parent == NULL)
 		parent = config_root;
 	if (nvlist_exists_nvlist(parent, name))
 		warnx("Attempt to fetch value of node %s of list %p", name,
 		    parent);
 	if (!nvlist_exists_string(parent, name))
 		return (NULL);
 
 	return (expand_config_value(nvlist_get_string(parent, name)));
 }
 
 static bool
 _bool_value(const char *name, const char *value)
 {
 
 	if (strcasecmp(value, "true") == 0 ||
 	    strcasecmp(value, "on") == 0 ||
 	    strcasecmp(value, "yes") == 0 ||
 	    strcmp(value, "1") == 0)
 		return (true);
 	if (strcasecmp(value, "false") == 0 ||
 	    strcasecmp(value, "off") == 0 ||
 	    strcasecmp(value, "no") == 0 ||
 	    strcmp(value, "0") == 0)
 		return (false);
 	err(4, "Invalid value %s for boolean variable %s", value, name);
 }
 
 bool
 get_config_bool(const char *path)
 {
 	const char *value;
 
 	value = get_config_value(path);
 	if (value == NULL)
 		err(4, "Failed to fetch boolean variable %s", path);
 	return (_bool_value(path, value));
 }
 
 bool
 get_config_bool_default(const char *path, bool def)
 {
 	const char *value;
 
 	value = get_config_value(path);
 	if (value == NULL)
 		return (def);
 	return (_bool_value(path, value));
 }
 
 bool
 get_config_bool_node(const nvlist_t *parent, const char *name)
 {
 	const char *value;
 
 	value = get_config_value_node(parent, name);
 	if (value == NULL)
 		err(4, "Failed to fetch boolean variable %s", name);
 	return (_bool_value(name, value));
 }
 
 bool
 get_config_bool_node_default(const nvlist_t *parent, const char *name,
     bool def)
 {
 	const char *value;
 
 	value = get_config_value_node(parent, name);
 	if (value == NULL)
 		return (def);
 	return (_bool_value(name, value));
 }
 
 void
 set_config_bool(const char *path, bool value)
 {
 
 	set_config_value(path, value ? "true" : "false");
 }
 
 void
 set_config_bool_node(nvlist_t *parent, const char *name, bool value)
 {
 
 	set_config_value_node(parent, name, value ? "true" : "false");
 }
 
 static void
 dump_tree(const char *prefix, const nvlist_t *nvl)
 {
 	const char *name;
 	void *cookie;
 	int type;
 
 	cookie = NULL;
 	while ((name = nvlist_next(nvl, &type, &cookie)) != NULL) {
 		if (type == NV_TYPE_NVLIST) {
 			char *new_prefix;
 
 			asprintf(&new_prefix, "%s%s.", prefix, name);
 			dump_tree(new_prefix, nvlist_get_nvlist(nvl, name));
 			free(new_prefix);
 		} else {
 			assert(type == NV_TYPE_STRING);
 			printf("%s%s=%s\n", prefix, name,
 			    nvlist_get_string(nvl, name));
 		}
 	}
 }
 
 void
 dump_config(void)
 {
 	dump_tree("", config_root);
 }
diff --git a/usr.sbin/bhyve/net_backends.h b/usr.sbin/bhyve/net_backends.h
index 844024e7bc9d..cc8c7b5cff32 100644
--- a/usr.sbin/bhyve/net_backends.h
+++ b/usr.sbin/bhyve/net_backends.h
@@ -1,96 +1,96 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause
  *
  * Copyright (c) 2019 Vincenzo Maffione <vmaffione@FreeBSD.org>
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS
  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
  * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
  * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
  * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
  * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef __NET_BACKENDS_H__
 #define __NET_BACKENDS_H__
 
 #include <stdint.h>
 
 /* Opaque type representing a network backend. */
 typedef struct net_backend net_backend_t;
 
 /* Interface between network frontends and the network backends. */
 typedef void (*net_be_rxeof_t)(int, enum ev_type, void *param);
 int	netbe_init(net_backend_t **be, nvlist_t *nvl, net_be_rxeof_t cb,
             void *param);
 int	netbe_legacy_config(nvlist_t *nvl, const char *opts);
 void	netbe_cleanup(net_backend_t *be);
 uint64_t netbe_get_cap(net_backend_t *be);
 int	 netbe_set_cap(net_backend_t *be, uint64_t cap,
              unsigned vnet_hdr_len);
 size_t	netbe_get_vnet_hdr_len(net_backend_t *be);
 ssize_t	netbe_send(net_backend_t *be, const struct iovec *iov, int iovcnt);
 ssize_t	netbe_peek_recvlen(net_backend_t *be);
 ssize_t	netbe_recv(net_backend_t *be, const struct iovec *iov, int iovcnt);
 ssize_t	netbe_rx_discard(net_backend_t *be);
 void	netbe_rx_disable(net_backend_t *be);
 void	netbe_rx_enable(net_backend_t *be);
 
 
 /*
  * Network device capabilities taken from the VirtIO standard.
- * Despite the name, these capabilities can be used by different frontents
+ * Despite the name, these capabilities can be used by different frontends
  * (virtio-net, ptnet) and supported by different backends (netmap, tap, ...).
  */
 #define	VIRTIO_NET_F_CSUM	(1 <<  0) /* host handles partial cksum */
 #define	VIRTIO_NET_F_GUEST_CSUM	(1 <<  1) /* guest handles partial cksum */
 #define	VIRTIO_NET_F_MTU	(1 <<  3) /* initial MTU advice */
 #define	VIRTIO_NET_F_MAC	(1 <<  5) /* host supplies MAC */
 #define	VIRTIO_NET_F_GSO_DEPREC	(1 <<  6) /* deprecated: host handles GSO */
 #define	VIRTIO_NET_F_GUEST_TSO4	(1 <<  7) /* guest can rcv TSOv4 */
 #define	VIRTIO_NET_F_GUEST_TSO6	(1 <<  8) /* guest can rcv TSOv6 */
 #define	VIRTIO_NET_F_GUEST_ECN	(1 <<  9) /* guest can rcv TSO with ECN */
 #define	VIRTIO_NET_F_GUEST_UFO	(1 << 10) /* guest can rcv UFO */
 #define	VIRTIO_NET_F_HOST_TSO4	(1 << 11) /* host can rcv TSOv4 */
 #define	VIRTIO_NET_F_HOST_TSO6	(1 << 12) /* host can rcv TSOv6 */
 #define	VIRTIO_NET_F_HOST_ECN	(1 << 13) /* host can rcv TSO with ECN */
 #define	VIRTIO_NET_F_HOST_UFO	(1 << 14) /* host can rcv UFO */
 #define	VIRTIO_NET_F_MRG_RXBUF	(1 << 15) /* host can merge RX buffers */
 #define	VIRTIO_NET_F_STATUS	(1 << 16) /* config status field available */
 #define	VIRTIO_NET_F_CTRL_VQ	(1 << 17) /* control channel available */
 #define	VIRTIO_NET_F_CTRL_RX	(1 << 18) /* control channel RX mode support */
 #define	VIRTIO_NET_F_CTRL_VLAN	(1 << 19) /* control channel VLAN filtering */
 #define	VIRTIO_NET_F_GUEST_ANNOUNCE \
 				(1 << 21) /* guest can send gratuitous pkts */
 #define	VIRTIO_NET_F_MQ		(1 << 22) /* host supports multiple VQ pairs */
 
 /*
  * Fixed network header size
  */
 struct virtio_net_rxhdr {
 	uint8_t		vrh_flags;
 	uint8_t		vrh_gso_type;
 	uint16_t	vrh_hdr_len;
 	uint16_t	vrh_gso_size;
 	uint16_t	vrh_csum_start;
 	uint16_t	vrh_csum_offset;
 	uint16_t	vrh_bufs;
 } __packed;
 
 #endif /* __NET_BACKENDS_H__ */
diff --git a/usr.sbin/bhyve/pci_nvme.c b/usr.sbin/bhyve/pci_nvme.c
index f3f10cba7502..de5865220155 100644
--- a/usr.sbin/bhyve/pci_nvme.c
+++ b/usr.sbin/bhyve/pci_nvme.c
@@ -1,3342 +1,3342 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause
  *
  * Copyright (c) 2017 Shunsuke Mie
  * Copyright (c) 2018 Leon Dang
  * Copyright (c) 2020 Chuck Tuffli
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 /*
  * bhyve PCIe-NVMe device emulation.
  *
  * options:
  *  -s <n>,nvme,devpath,maxq=#,qsz=#,ioslots=#,sectsz=#,ser=A-Z,eui64=#,dsm=<opt>
  *
  *  accepted devpath:
  *    /dev/blockdev
  *    /path/to/image
  *    ram=size_in_MiB
  *
  *  maxq    = max number of queues
  *  qsz     = max elements in each queue
  *  ioslots = max number of concurrent io requests
  *  sectsz  = sector size (defaults to blockif sector size)
  *  ser     = serial number (20-chars max)
  *  eui64   = IEEE Extended Unique Identifier (8 byte value)
  *  dsm     = DataSet Management support. Option is one of auto, enable,disable
  *
  */
 
 /* TODO:
     - create async event for smart and log
     - intr coalesce
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/errno.h>
 #include <sys/types.h>
 #include <sys/crc16.h>
 #include <net/ieee_oui.h>
 
 #include <assert.h>
 #include <pthread.h>
 #include <pthread_np.h>
 #include <semaphore.h>
 #include <stdbool.h>
 #include <stddef.h>
 #include <stdint.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 
 #include <machine/atomic.h>
 #include <machine/vmm.h>
 #include <vmmapi.h>
 
 #include <dev/nvme/nvme.h>
 
 #include "bhyverun.h"
 #include "block_if.h"
 #include "config.h"
 #include "debug.h"
 #include "pci_emul.h"
 
 
 static int nvme_debug = 0;
 #define	DPRINTF(fmt, args...) if (nvme_debug) PRINTLN(fmt, ##args)
 #define	WPRINTF(fmt, args...) PRINTLN(fmt, ##args)
 
 /* defaults; can be overridden */
 #define	NVME_MSIX_BAR		4
 
 #define	NVME_IOSLOTS		8
 
 /* The NVMe spec defines bits 13:4 in BAR0 as reserved */
 #define NVME_MMIO_SPACE_MIN	(1 << 14)
 
 #define	NVME_QUEUES		16
 #define	NVME_MAX_QENTRIES	2048
 /* Memory Page size Minimum reported in CAP register */
 #define	NVME_MPSMIN		0
 /* MPSMIN converted to bytes */
 #define	NVME_MPSMIN_BYTES	(1 << (12 + NVME_MPSMIN))
 
 #define	NVME_PRP2_ITEMS		(PAGE_SIZE/sizeof(uint64_t))
 #define	NVME_MDTS		9
 /* Note the + 1 allows for the initial descriptor to not be page aligned */
 #define	NVME_MAX_IOVEC		((1 << NVME_MDTS) + 1)
 #define	NVME_MAX_DATA_SIZE	((1 << NVME_MDTS) * NVME_MPSMIN_BYTES)
 
 /* This is a synthetic status code to indicate there is no status */
 #define NVME_NO_STATUS		0xffff
 #define NVME_COMPLETION_VALID(c)	((c).status != NVME_NO_STATUS)
 
 /* Reported temperature in Kelvin (i.e. room temperature) */
 #define NVME_TEMPERATURE 296
 
 /* helpers */
 
 /* Convert a zero-based value into a one-based value */
 #define ONE_BASED(zero)		((zero) + 1)
 /* Convert a one-based value into a zero-based value */
 #define ZERO_BASED(one)		((one)  - 1)
 
 /* Encode number of SQ's and CQ's for Set/Get Features */
 #define NVME_FEATURE_NUM_QUEUES(sc) \
 	(ZERO_BASED((sc)->num_squeues) & 0xffff) | \
 	(ZERO_BASED((sc)->num_cqueues) & 0xffff) << 16
 
 #define	NVME_DOORBELL_OFFSET	offsetof(struct nvme_registers, doorbell)
 
 enum nvme_controller_register_offsets {
 	NVME_CR_CAP_LOW = 0x00,
 	NVME_CR_CAP_HI  = 0x04,
 	NVME_CR_VS      = 0x08,
 	NVME_CR_INTMS   = 0x0c,
 	NVME_CR_INTMC   = 0x10,
 	NVME_CR_CC      = 0x14,
 	NVME_CR_CSTS    = 0x1c,
 	NVME_CR_NSSR    = 0x20,
 	NVME_CR_AQA     = 0x24,
 	NVME_CR_ASQ_LOW = 0x28,
 	NVME_CR_ASQ_HI  = 0x2c,
 	NVME_CR_ACQ_LOW = 0x30,
 	NVME_CR_ACQ_HI  = 0x34,
 };
 
 enum nvme_cmd_cdw11 {
 	NVME_CMD_CDW11_PC  = 0x0001,
 	NVME_CMD_CDW11_IEN = 0x0002,
 	NVME_CMD_CDW11_IV  = 0xFFFF0000,
 };
 
 enum nvme_copy_dir {
 	NVME_COPY_TO_PRP,
 	NVME_COPY_FROM_PRP,
 };
 
 #define	NVME_CQ_INTEN	0x01
 #define	NVME_CQ_INTCOAL	0x02
 
 struct nvme_completion_queue {
 	struct nvme_completion *qbase;
 	pthread_mutex_t	mtx;
 	uint32_t	size;
 	uint16_t	tail; /* nvme progress */
 	uint16_t	head; /* guest progress */
 	uint16_t	intr_vec;
 	uint32_t	intr_en;
 };
 
 struct nvme_submission_queue {
 	struct nvme_command *qbase;
 	pthread_mutex_t	mtx;
 	uint32_t	size;
 	uint16_t	head; /* nvme progress */
 	uint16_t	tail; /* guest progress */
 	uint16_t	cqid; /* completion queue id */
 	int		qpriority;
 };
 
 enum nvme_storage_type {
 	NVME_STOR_BLOCKIF = 0,
 	NVME_STOR_RAM = 1,
 };
 
 struct pci_nvme_blockstore {
 	enum nvme_storage_type type;
 	void		*ctx;
 	uint64_t	size;
 	uint32_t	sectsz;
 	uint32_t	sectsz_bits;
 	uint64_t	eui64;
 	uint32_t	deallocate:1;
 };
 
 /*
  * Calculate the number of additional page descriptors for guest IO requests
  * based on the advertised Max Data Transfer (MDTS) and given the number of
  * default iovec's in a struct blockif_req.
  */
 #define MDTS_PAD_SIZE \
 	( NVME_MAX_IOVEC > BLOCKIF_IOV_MAX ? \
 	  NVME_MAX_IOVEC - BLOCKIF_IOV_MAX : \
 	  0 )
 
 struct pci_nvme_ioreq {
 	struct pci_nvme_softc *sc;
 	STAILQ_ENTRY(pci_nvme_ioreq) link;
 	struct nvme_submission_queue *nvme_sq;
 	uint16_t	sqid;
 
 	/* command information */
 	uint16_t	opc;
 	uint16_t	cid;
 	uint32_t	nsid;
 
 	uint64_t	prev_gpaddr;
 	size_t		prev_size;
 	size_t		bytes;
 
 	struct blockif_req io_req;
 
 	struct iovec	iovpadding[MDTS_PAD_SIZE];
 };
 
 enum nvme_dsm_type {
 	/* Dataset Management bit in ONCS reflects backing storage capability */
 	NVME_DATASET_MANAGEMENT_AUTO,
 	/* Unconditionally set Dataset Management bit in ONCS */
 	NVME_DATASET_MANAGEMENT_ENABLE,
 	/* Unconditionally clear Dataset Management bit in ONCS */
 	NVME_DATASET_MANAGEMENT_DISABLE,
 };
 
 struct pci_nvme_softc;
 struct nvme_feature_obj;
 
 typedef void (*nvme_feature_cb)(struct pci_nvme_softc *,
     struct nvme_feature_obj *,
     struct nvme_command *,
     struct nvme_completion *);
 
 struct nvme_feature_obj {
 	uint32_t	cdw11;
 	nvme_feature_cb	set;
 	nvme_feature_cb	get;
 	bool namespace_specific;
 };
 
 #define NVME_FID_MAX		(NVME_FEAT_ENDURANCE_GROUP_EVENT_CONFIGURATION + 1)
 
 typedef enum {
 	PCI_NVME_AE_TYPE_ERROR = 0,
 	PCI_NVME_AE_TYPE_SMART,
 	PCI_NVME_AE_TYPE_NOTICE,
 	PCI_NVME_AE_TYPE_IO_CMD = 6,
 	PCI_NVME_AE_TYPE_VENDOR = 7,
 	PCI_NVME_AE_TYPE_MAX		/* Must be last */
 } pci_nvme_async_type;
 
 /* Asynchronous Event Requests */
 struct pci_nvme_aer {
 	STAILQ_ENTRY(pci_nvme_aer) link;
 	uint16_t	cid;	/* Command ID of the submitted AER */
 };
 
 /** Asynchronous Event Information - Notice */
 typedef enum {
 	PCI_NVME_AEI_NOTICE_NS_ATTR_CHANGED = 0,
 	PCI_NVME_AEI_NOTICE_FW_ACTIVATION,
 	PCI_NVME_AEI_NOTICE_TELEMETRY_CHANGE,
 	PCI_NVME_AEI_NOTICE_ANA_CHANGE,
 	PCI_NVME_AEI_NOTICE_PREDICT_LATENCY_CHANGE,
 	PCI_NVME_AEI_NOTICE_LBA_STATUS_ALERT,
 	PCI_NVME_AEI_NOTICE_ENDURANCE_GROUP_CHANGE,
 	PCI_NVME_AEI_NOTICE_MAX,
 } pci_nvme_async_event_info_notice;
 
 #define PCI_NVME_AEI_NOTICE_SHIFT		8
 #define PCI_NVME_AEI_NOTICE_MASK(event)	(1 << (event + PCI_NVME_AEI_NOTICE_SHIFT))
 
 /* Asynchronous Event Notifications */
 struct pci_nvme_aen {
 	pci_nvme_async_type atype;
 	uint32_t	event_data;
 	bool		posted;
 };
 
 /*
  * By default, enable all Asynchrnous Event Notifications:
  *     SMART / Health Critical Warnings
  *     Namespace Attribute Notices
  */
 #define PCI_NVME_AEN_DEFAULT_MASK	0x11f
 
 typedef enum {
 	NVME_CNTRLTYPE_IO = 1,
 	NVME_CNTRLTYPE_DISCOVERY = 2,
 	NVME_CNTRLTYPE_ADMIN = 3,
 } pci_nvme_cntrl_type;
 
 struct pci_nvme_softc {
 	struct pci_devinst *nsc_pi;
 
 	pthread_mutex_t	mtx;
 
 	struct nvme_registers regs;
 
 	struct nvme_namespace_data  nsdata;
 	struct nvme_controller_data ctrldata;
 	struct nvme_error_information_entry err_log;
 	struct nvme_health_information_page health_log;
 	struct nvme_firmware_page fw_log;
 	struct nvme_ns_list ns_log;
 
 	struct pci_nvme_blockstore nvstore;
 
 	uint16_t	max_qentries;	/* max entries per queue */
 	uint32_t	max_queues;	/* max number of IO SQ's or CQ's */
 	uint32_t	num_cqueues;
 	uint32_t	num_squeues;
 	bool		num_q_is_set; /* Has host set Number of Queues */
 
 	struct pci_nvme_ioreq *ioreqs;
 	STAILQ_HEAD(, pci_nvme_ioreq) ioreqs_free; /* free list of ioreqs */
 	uint32_t	pending_ios;
 	uint32_t	ioslots;
 	sem_t		iosemlock;
 
 	/*
 	 * Memory mapped Submission and Completion queues
 	 * Each array includes both Admin and IO queues
 	 */
 	struct nvme_completion_queue *compl_queues;
 	struct nvme_submission_queue *submit_queues;
 
 	struct nvme_feature_obj feat[NVME_FID_MAX];
 
 	enum nvme_dsm_type dataset_management;
 
 	/* Accounting for SMART data */
 	__uint128_t	read_data_units;
 	__uint128_t	write_data_units;
 	__uint128_t	read_commands;
 	__uint128_t	write_commands;
 	uint32_t	read_dunits_remainder;
 	uint32_t	write_dunits_remainder;
 
 	STAILQ_HEAD(, pci_nvme_aer) aer_list;
 	pthread_mutex_t	aer_mtx;
 	uint32_t	aer_count;
 	struct pci_nvme_aen aen[PCI_NVME_AE_TYPE_MAX];
 	pthread_t	aen_tid;
 	pthread_mutex_t	aen_mtx;
 	pthread_cond_t	aen_cond;
 };
 
 
 static void pci_nvme_cq_update(struct pci_nvme_softc *sc,
     struct nvme_completion_queue *cq,
     uint32_t cdw0,
     uint16_t cid,
     uint16_t sqid,
     uint16_t status);
 static struct pci_nvme_ioreq *pci_nvme_get_ioreq(struct pci_nvme_softc *);
 static void pci_nvme_release_ioreq(struct pci_nvme_softc *, struct pci_nvme_ioreq *);
 static void pci_nvme_io_done(struct blockif_req *, int);
 
 /* Controller Configuration utils */
 #define	NVME_CC_GET_EN(cc) \
 	((cc) >> NVME_CC_REG_EN_SHIFT & NVME_CC_REG_EN_MASK)
 #define	NVME_CC_GET_CSS(cc) \
 	((cc) >> NVME_CC_REG_CSS_SHIFT & NVME_CC_REG_CSS_MASK)
 #define	NVME_CC_GET_SHN(cc) \
 	((cc) >> NVME_CC_REG_SHN_SHIFT & NVME_CC_REG_SHN_MASK)
 #define	NVME_CC_GET_IOSQES(cc) \
 	((cc) >> NVME_CC_REG_IOSQES_SHIFT & NVME_CC_REG_IOSQES_MASK)
 #define	NVME_CC_GET_IOCQES(cc) \
 	((cc) >> NVME_CC_REG_IOCQES_SHIFT & NVME_CC_REG_IOCQES_MASK)
 
 #define	NVME_CC_WRITE_MASK \
 	((NVME_CC_REG_EN_MASK << NVME_CC_REG_EN_SHIFT) | \
 	 (NVME_CC_REG_IOSQES_MASK << NVME_CC_REG_IOSQES_SHIFT) | \
 	 (NVME_CC_REG_IOCQES_MASK << NVME_CC_REG_IOCQES_SHIFT))
 
 #define	NVME_CC_NEN_WRITE_MASK \
 	((NVME_CC_REG_CSS_MASK << NVME_CC_REG_CSS_SHIFT) | \
 	 (NVME_CC_REG_MPS_MASK << NVME_CC_REG_MPS_SHIFT) | \
 	 (NVME_CC_REG_AMS_MASK << NVME_CC_REG_AMS_SHIFT))
 
 /* Controller Status utils */
 #define	NVME_CSTS_GET_RDY(sts) \
 	((sts) >> NVME_CSTS_REG_RDY_SHIFT & NVME_CSTS_REG_RDY_MASK)
 
 #define	NVME_CSTS_RDY	(1 << NVME_CSTS_REG_RDY_SHIFT)
 #define	NVME_CSTS_CFS	(1 << NVME_CSTS_REG_CFS_SHIFT)
 
 /* Completion Queue status word utils */
 #define	NVME_STATUS_P	(1 << NVME_STATUS_P_SHIFT)
 #define	NVME_STATUS_MASK \
 	((NVME_STATUS_SCT_MASK << NVME_STATUS_SCT_SHIFT) |\
 	 (NVME_STATUS_SC_MASK << NVME_STATUS_SC_SHIFT))
 
 #define NVME_ONCS_DSM	(NVME_CTRLR_DATA_ONCS_DSM_MASK << \
 	NVME_CTRLR_DATA_ONCS_DSM_SHIFT)
 
 static void nvme_feature_invalid_cb(struct pci_nvme_softc *,
     struct nvme_feature_obj *,
     struct nvme_command *,
     struct nvme_completion *);
 static void nvme_feature_temperature(struct pci_nvme_softc *,
     struct nvme_feature_obj *,
     struct nvme_command *,
     struct nvme_completion *);
 static void nvme_feature_num_queues(struct pci_nvme_softc *,
     struct nvme_feature_obj *,
     struct nvme_command *,
     struct nvme_completion *);
 static void nvme_feature_iv_config(struct pci_nvme_softc *,
     struct nvme_feature_obj *,
     struct nvme_command *,
     struct nvme_completion *);
 static void nvme_feature_async_event(struct pci_nvme_softc *,
     struct nvme_feature_obj *,
     struct nvme_command *,
     struct nvme_completion *);
 
 static void *aen_thr(void *arg);
 
 static __inline void
 cpywithpad(char *dst, size_t dst_size, const char *src, char pad)
 {
 	size_t len;
 
 	len = strnlen(src, dst_size);
 	memset(dst, pad, dst_size);
 	memcpy(dst, src, len);
 }
 
 static __inline void
 pci_nvme_status_tc(uint16_t *status, uint16_t type, uint16_t code)
 {
 
 	*status &= ~NVME_STATUS_MASK;
 	*status |= (type & NVME_STATUS_SCT_MASK) << NVME_STATUS_SCT_SHIFT |
 		(code & NVME_STATUS_SC_MASK) << NVME_STATUS_SC_SHIFT;
 }
 
 static __inline void
 pci_nvme_status_genc(uint16_t *status, uint16_t code)
 {
 
 	pci_nvme_status_tc(status, NVME_SCT_GENERIC, code);
 }
 
 /*
  * Initialize the requested number or IO Submission and Completion Queues.
  * Admin queues are allocated implicitly.
  */
 static void
 pci_nvme_init_queues(struct pci_nvme_softc *sc, uint32_t nsq, uint32_t ncq)
 {
 	uint32_t i;
 
 	/*
 	 * Allocate and initialize the Submission Queues
 	 */
 	if (nsq > NVME_QUEUES) {
 		WPRINTF("%s: clamping number of SQ from %u to %u",
 					__func__, nsq, NVME_QUEUES);
 		nsq = NVME_QUEUES;
 	}
 
 	sc->num_squeues = nsq;
 
 	sc->submit_queues = calloc(sc->num_squeues + 1,
 				sizeof(struct nvme_submission_queue));
 	if (sc->submit_queues == NULL) {
 		WPRINTF("%s: SQ allocation failed", __func__);
 		sc->num_squeues = 0;
 	} else {
 		struct nvme_submission_queue *sq = sc->submit_queues;
 
 		for (i = 0; i < sc->num_squeues + 1; i++)
 			pthread_mutex_init(&sq[i].mtx, NULL);
 	}
 
 	/*
 	 * Allocate and initialize the Completion Queues
 	 */
 	if (ncq > NVME_QUEUES) {
 		WPRINTF("%s: clamping number of CQ from %u to %u",
 					__func__, ncq, NVME_QUEUES);
 		ncq = NVME_QUEUES;
 	}
 
 	sc->num_cqueues = ncq;
 
 	sc->compl_queues = calloc(sc->num_cqueues + 1,
 				sizeof(struct nvme_completion_queue));
 	if (sc->compl_queues == NULL) {
 		WPRINTF("%s: CQ allocation failed", __func__);
 		sc->num_cqueues = 0;
 	} else {
 		struct nvme_completion_queue *cq = sc->compl_queues;
 
 		for (i = 0; i < sc->num_cqueues + 1; i++)
 			pthread_mutex_init(&cq[i].mtx, NULL);
 	}
 }
 
 static void
 pci_nvme_init_ctrldata(struct pci_nvme_softc *sc)
 {
 	struct nvme_controller_data *cd = &sc->ctrldata;
 
 	cd->vid = 0xFB5D;
 	cd->ssvid = 0x0000;
 
 	cpywithpad((char *)cd->mn, sizeof(cd->mn), "bhyve-NVMe", ' ');
 	cpywithpad((char *)cd->fr, sizeof(cd->fr), "1.0", ' ');
 
 	/* Num of submission commands that we can handle at a time (2^rab) */
 	cd->rab   = 4;
 
 	/* FreeBSD OUI */
 	cd->ieee[0] = 0x58;
 	cd->ieee[1] = 0x9c;
 	cd->ieee[2] = 0xfc;
 
 	cd->mic = 0;
 
 	cd->mdts = NVME_MDTS;	/* max data transfer size (2^mdts * CAP.MPSMIN) */
 
 	cd->ver = NVME_REV(1,4);
 
 	cd->cntrltype = NVME_CNTRLTYPE_IO;
 	cd->oacs = 1 << NVME_CTRLR_DATA_OACS_FORMAT_SHIFT;
 	cd->oaes = NVMEB(NVME_CTRLR_DATA_OAES_NS_ATTR);
 	cd->acl = 2;
 	cd->aerl = 4;
 
 	/* Advertise 1, Read-only firmware slot */
 	cd->frmw = NVMEB(NVME_CTRLR_DATA_FRMW_SLOT1_RO) |
 	    (1 << NVME_CTRLR_DATA_FRMW_NUM_SLOTS_SHIFT);
 	cd->lpa = 0;	/* TODO: support some simple things like SMART */
 	cd->elpe = 0;	/* max error log page entries */
 	/*
 	 * Report a single power state (zero-based value)
 	 * power_state[] values are left as zero to indicate "Not reported"
 	 */
 	cd->npss = 0;
 
 	/* Warning Composite Temperature Threshold */
 	cd->wctemp = 0x0157;
 	cd->cctemp = 0x0157;
 
 	/* SANICAP must not be 0 for Revision 1.4 and later NVMe Controllers */
 	cd->sanicap = (NVME_CTRLR_DATA_SANICAP_NODMMAS_NO <<
 			NVME_CTRLR_DATA_SANICAP_NODMMAS_SHIFT);
 
 	cd->sqes = (6 << NVME_CTRLR_DATA_SQES_MAX_SHIFT) |
 	    (6 << NVME_CTRLR_DATA_SQES_MIN_SHIFT);
 	cd->cqes = (4 << NVME_CTRLR_DATA_CQES_MAX_SHIFT) |
 	    (4 << NVME_CTRLR_DATA_CQES_MIN_SHIFT);
 	cd->nn = 1;	/* number of namespaces */
 
 	cd->oncs = 0;
 	switch (sc->dataset_management) {
 	case NVME_DATASET_MANAGEMENT_AUTO:
 		if (sc->nvstore.deallocate)
 			cd->oncs |= NVME_ONCS_DSM;
 		break;
 	case NVME_DATASET_MANAGEMENT_ENABLE:
 		cd->oncs |= NVME_ONCS_DSM;
 		break;
 	default:
 		break;
 	}
 
 	cd->fna = NVME_CTRLR_DATA_FNA_FORMAT_ALL_MASK <<
 	    NVME_CTRLR_DATA_FNA_FORMAT_ALL_SHIFT;
 
 	cd->vwc = NVME_CTRLR_DATA_VWC_ALL_NO << NVME_CTRLR_DATA_VWC_ALL_SHIFT;
 }
 
 static void
 pci_nvme_init_nsdata_size(struct pci_nvme_blockstore *nvstore,
     struct nvme_namespace_data *nd)
 {
 
 	/* Get capacity and block size information from backing store */
 	nd->nsze = nvstore->size / nvstore->sectsz;
 	nd->ncap = nd->nsze;
 	nd->nuse = nd->nsze;
 }
 
 static void
 pci_nvme_init_nsdata(struct pci_nvme_softc *sc,
     struct nvme_namespace_data *nd, uint32_t nsid,
     struct pci_nvme_blockstore *nvstore)
 {
 
 	pci_nvme_init_nsdata_size(nvstore, nd);
 
 	if (nvstore->type == NVME_STOR_BLOCKIF)
 		nvstore->deallocate = blockif_candelete(nvstore->ctx);
 
 	nd->nlbaf = 0; /* NLBAF is a 0's based value (i.e. 1 LBA Format) */
 	nd->flbas = 0;
 
 	/* Create an EUI-64 if user did not provide one */
 	if (nvstore->eui64 == 0) {
 		char *data = NULL;
 		uint64_t eui64 = nvstore->eui64;
 
 		asprintf(&data, "%s%u%u%u", get_config_value("name"),
 		    sc->nsc_pi->pi_bus, sc->nsc_pi->pi_slot,
 		    sc->nsc_pi->pi_func);
 
 		if (data != NULL) {
 			eui64 = OUI_FREEBSD_NVME_LOW | crc16(0, data, strlen(data));
 			free(data);
 		}
 		nvstore->eui64 = (eui64 << 16) | (nsid & 0xffff);
 	}
 	be64enc(nd->eui64, nvstore->eui64);
 
 	/* LBA data-sz = 2^lbads */
 	nd->lbaf[0] = nvstore->sectsz_bits << NVME_NS_DATA_LBAF_LBADS_SHIFT;
 }
 
 static void
 pci_nvme_init_logpages(struct pci_nvme_softc *sc)
 {
 	__uint128_t power_cycles = 1;
 
 	memset(&sc->err_log, 0, sizeof(sc->err_log));
 	memset(&sc->health_log, 0, sizeof(sc->health_log));
 	memset(&sc->fw_log, 0, sizeof(sc->fw_log));
 	memset(&sc->ns_log, 0, sizeof(sc->ns_log));
 
 	/* Set read/write remainder to round up according to spec */
 	sc->read_dunits_remainder = 999;
 	sc->write_dunits_remainder = 999;
 
 	/* Set nominal Health values checked by implementations */
 	sc->health_log.temperature = NVME_TEMPERATURE;
 	sc->health_log.available_spare = 100;
 	sc->health_log.available_spare_threshold = 10;
 
 	/* Set Active Firmware Info to slot 1 */
 	sc->fw_log.afi = (1 << NVME_FIRMWARE_PAGE_AFI_SLOT_SHIFT);
 	memcpy(&sc->fw_log.revision[0], sc->ctrldata.fr,
 	    sizeof(sc->fw_log.revision[0]));
 
 	memcpy(&sc->health_log.power_cycles, &power_cycles,
 	    sizeof(sc->health_log.power_cycles));
 }
 
 static void
 pci_nvme_init_features(struct pci_nvme_softc *sc)
 {
 	enum nvme_feature	fid;
 
 	for (fid = 0; fid < NVME_FID_MAX; fid++) {
 		switch (fid) {
 		case NVME_FEAT_ARBITRATION:
 		case NVME_FEAT_POWER_MANAGEMENT:
 		case NVME_FEAT_INTERRUPT_COALESCING: //XXX
 		case NVME_FEAT_WRITE_ATOMICITY:
 			/* Mandatory but no special handling required */
 		//XXX hang - case NVME_FEAT_PREDICTABLE_LATENCY_MODE_CONFIG:
 		//XXX hang - case NVME_FEAT_HOST_BEHAVIOR_SUPPORT:
 		//		  this returns a data buffer
 			break;
 		case NVME_FEAT_TEMPERATURE_THRESHOLD:
 			sc->feat[fid].set = nvme_feature_temperature;
 			break;
 		case NVME_FEAT_ERROR_RECOVERY:
 			sc->feat[fid].namespace_specific = true;
 			break;
 		case NVME_FEAT_NUMBER_OF_QUEUES:
 			sc->feat[fid].set = nvme_feature_num_queues;
 			break;
 		case NVME_FEAT_INTERRUPT_VECTOR_CONFIGURATION:
 			sc->feat[fid].set = nvme_feature_iv_config;
 			break;
 		case NVME_FEAT_ASYNC_EVENT_CONFIGURATION:
 			sc->feat[fid].set = nvme_feature_async_event;
 			/* Enable all AENs by default */
 			sc->feat[fid].cdw11 = PCI_NVME_AEN_DEFAULT_MASK;
 			break;
 		default:
 			sc->feat[fid].set = nvme_feature_invalid_cb;
 			sc->feat[fid].get = nvme_feature_invalid_cb;
 		}
 	}
 }
 
 static void
 pci_nvme_aer_reset(struct pci_nvme_softc *sc)
 {
 
 	STAILQ_INIT(&sc->aer_list);
 	sc->aer_count = 0;
 }
 
 static void
 pci_nvme_aer_init(struct pci_nvme_softc *sc)
 {
 
 	pthread_mutex_init(&sc->aer_mtx, NULL);
 	pci_nvme_aer_reset(sc);
 }
 
 static void
 pci_nvme_aer_destroy(struct pci_nvme_softc *sc)
 {
 	struct pci_nvme_aer *aer = NULL;
 
 	pthread_mutex_lock(&sc->aer_mtx);
 	while (!STAILQ_EMPTY(&sc->aer_list)) {
 		aer = STAILQ_FIRST(&sc->aer_list);
 		STAILQ_REMOVE_HEAD(&sc->aer_list, link);
 		free(aer);
 	}
 	pthread_mutex_unlock(&sc->aer_mtx);
 
 	pci_nvme_aer_reset(sc);
 }
 
 static bool
 pci_nvme_aer_available(struct pci_nvme_softc *sc)
 {
 
 	return (sc->aer_count != 0);
 }
 
 static bool
 pci_nvme_aer_limit_reached(struct pci_nvme_softc *sc)
 {
 	struct nvme_controller_data *cd = &sc->ctrldata;
 
 	/* AERL is a zero based value while aer_count is one's based */
 	return (sc->aer_count == (cd->aerl + 1U));
 }
 
 /*
  * Add an Async Event Request
  *
  * Stores an AER to be returned later if the Controller needs to notify the
  * host of an event.
  * Note that while the NVMe spec doesn't require Controllers to return AER's
  * in order, this implementation does preserve the order.
  */
 static int
 pci_nvme_aer_add(struct pci_nvme_softc *sc, uint16_t cid)
 {
 	struct pci_nvme_aer *aer = NULL;
 
 	aer = calloc(1, sizeof(struct pci_nvme_aer));
 	if (aer == NULL)
 		return (-1);
 
 	/* Save the Command ID for use in the completion message */
 	aer->cid = cid;
 
 	pthread_mutex_lock(&sc->aer_mtx);
 	sc->aer_count++;
 	STAILQ_INSERT_TAIL(&sc->aer_list, aer, link);
 	pthread_mutex_unlock(&sc->aer_mtx);
 
 	return (0);
 }
 
 /*
  * Get an Async Event Request structure
  *
  * Returns a pointer to an AER previously submitted by the host or NULL if
  * no AER's exist. Caller is responsible for freeing the returned struct.
  */
 static struct pci_nvme_aer *
 pci_nvme_aer_get(struct pci_nvme_softc *sc)
 {
 	struct pci_nvme_aer *aer = NULL;
 
 	pthread_mutex_lock(&sc->aer_mtx);
 	aer = STAILQ_FIRST(&sc->aer_list);
 	if (aer != NULL) {
 		STAILQ_REMOVE_HEAD(&sc->aer_list, link);
 		sc->aer_count--;
 	}
 	pthread_mutex_unlock(&sc->aer_mtx);
 
 	return (aer);
 }
 
 static void
 pci_nvme_aen_reset(struct pci_nvme_softc *sc)
 {
 	uint32_t	atype;
 
 	memset(sc->aen, 0, PCI_NVME_AE_TYPE_MAX * sizeof(struct pci_nvme_aen));
 
 	for (atype = 0; atype < PCI_NVME_AE_TYPE_MAX; atype++) {
 		sc->aen[atype].atype = atype;
 	}
 }
 
 static void
 pci_nvme_aen_init(struct pci_nvme_softc *sc)
 {
 	char nstr[80];
 
 	pci_nvme_aen_reset(sc);
 
 	pthread_mutex_init(&sc->aen_mtx, NULL);
 	pthread_create(&sc->aen_tid, NULL, aen_thr, sc);
 	snprintf(nstr, sizeof(nstr), "nvme-aen-%d:%d", sc->nsc_pi->pi_slot,
 	    sc->nsc_pi->pi_func);
 	pthread_set_name_np(sc->aen_tid, nstr);
 }
 
 static void
 pci_nvme_aen_destroy(struct pci_nvme_softc *sc)
 {
 
 	pci_nvme_aen_reset(sc);
 }
 
 /* Notify the AEN thread of pending work */
 static void
 pci_nvme_aen_notify(struct pci_nvme_softc *sc)
 {
 
 	pthread_cond_signal(&sc->aen_cond);
 }
 
 /*
  * Post an Asynchronous Event Notification
  */
 static int32_t
 pci_nvme_aen_post(struct pci_nvme_softc *sc, pci_nvme_async_type atype,
 		uint32_t event_data)
 {
 	struct pci_nvme_aen *aen;
 
 	if (atype >= PCI_NVME_AE_TYPE_MAX) {
 		return(EINVAL);
 	}
 
 	pthread_mutex_lock(&sc->aen_mtx);
 	aen = &sc->aen[atype];
 
 	/* Has the controller already posted an event of this type? */
 	if (aen->posted) {
 		pthread_mutex_unlock(&sc->aen_mtx);
 		return(EALREADY);
 	}
 
 	aen->event_data = event_data;
 	aen->posted = true;
 	pthread_mutex_unlock(&sc->aen_mtx);
 
 	pci_nvme_aen_notify(sc);
 
 	return(0);
 }
 
 static void
 pci_nvme_aen_process(struct pci_nvme_softc *sc)
 {
 	struct pci_nvme_aer *aer;
 	struct pci_nvme_aen *aen;
 	pci_nvme_async_type atype;
 	uint32_t mask;
 	uint16_t status;
 	uint8_t lid;
 
 	assert(pthread_mutex_isowned_np(&sc->aen_mtx));
 	for (atype = 0; atype < PCI_NVME_AE_TYPE_MAX; atype++) {
 		aen = &sc->aen[atype];
 		/* Previous iterations may have depleted the available AER's */
 		if (!pci_nvme_aer_available(sc)) {
 			DPRINTF("%s: no AER", __func__);
 			break;
 		}
 
 		if (!aen->posted) {
 			DPRINTF("%s: no AEN posted for atype=%#x", __func__, atype);
 			continue;
 		}
 
 		status = NVME_SC_SUCCESS;
 
 		/* Is the event masked? */
 		mask =
 		    sc->feat[NVME_FEAT_ASYNC_EVENT_CONFIGURATION].cdw11;
 
 		DPRINTF("%s: atype=%#x mask=%#x event_data=%#x", __func__, atype, mask, aen->event_data);
 		switch (atype) {
 		case PCI_NVME_AE_TYPE_ERROR:
 			lid = NVME_LOG_ERROR;
 			break;
 		case PCI_NVME_AE_TYPE_SMART:
 			mask &= 0xff;
 			if ((mask & aen->event_data) == 0)
 				continue;
 			lid = NVME_LOG_HEALTH_INFORMATION;
 			break;
 		case PCI_NVME_AE_TYPE_NOTICE:
 			if (aen->event_data >= PCI_NVME_AEI_NOTICE_MAX) {
 				EPRINTLN("%s unknown AEN notice type %u",
 				    __func__, aen->event_data);
 				status = NVME_SC_INTERNAL_DEVICE_ERROR;
 				lid = 0;
 				break;
 			}
 			if ((PCI_NVME_AEI_NOTICE_MASK(aen->event_data) & mask) == 0)
 				continue;
 			switch (aen->event_data) {
 			case PCI_NVME_AEI_NOTICE_NS_ATTR_CHANGED:
 				lid = NVME_LOG_CHANGED_NAMESPACE;
 				break;
 			case PCI_NVME_AEI_NOTICE_FW_ACTIVATION:
 				lid = NVME_LOG_FIRMWARE_SLOT;
 				break;
 			case PCI_NVME_AEI_NOTICE_TELEMETRY_CHANGE:
 				lid = NVME_LOG_TELEMETRY_CONTROLLER_INITIATED;
 				break;
 			case PCI_NVME_AEI_NOTICE_ANA_CHANGE:
 				lid = NVME_LOG_ASYMMETRIC_NAMESPACE_ACCESS;
 				break;
 			case PCI_NVME_AEI_NOTICE_PREDICT_LATENCY_CHANGE:
 				lid = NVME_LOG_PREDICTABLE_LATENCY_EVENT_AGGREGATE;
 				break;
 			case PCI_NVME_AEI_NOTICE_LBA_STATUS_ALERT:
 				lid = NVME_LOG_LBA_STATUS_INFORMATION;
 				break;
 			case PCI_NVME_AEI_NOTICE_ENDURANCE_GROUP_CHANGE:
 				lid = NVME_LOG_ENDURANCE_GROUP_EVENT_AGGREGATE;
 				break;
 			default:
 				lid = 0;
 			}
 			break;
 		default:
 			/* bad type?!? */
 			EPRINTLN("%s unknown AEN type %u", __func__, atype);
 			status = NVME_SC_INTERNAL_DEVICE_ERROR;
 			lid = 0;
 			break;
 		}
 
 		aer = pci_nvme_aer_get(sc);
 		assert(aer != NULL);
 
 		DPRINTF("%s: CID=%#x CDW0=%#x", __func__, aer->cid, (lid << 16) | (aen->event_data << 8) | atype);
 		pci_nvme_cq_update(sc, &sc->compl_queues[0],
 		    (lid << 16) | (aen->event_data << 8) | atype, /* cdw0 */
 		    aer->cid,
 		    0,		/* SQID */
 		    status);
 
 		aen->event_data = 0;
 		aen->posted = false;
 
 		pci_generate_msix(sc->nsc_pi, 0);
 	}
 }
 
 static void *
 aen_thr(void *arg)
 {
 	struct pci_nvme_softc *sc;
 
 	sc = arg;
 
 	pthread_mutex_lock(&sc->aen_mtx);
 	for (;;) {
 		pci_nvme_aen_process(sc);
 		pthread_cond_wait(&sc->aen_cond, &sc->aen_mtx);
 	}
 	pthread_mutex_unlock(&sc->aen_mtx);
 
 	pthread_exit(NULL);
 	return (NULL);
 }
 
 static void
 pci_nvme_reset_locked(struct pci_nvme_softc *sc)
 {
 	uint32_t i;
 
 	DPRINTF("%s", __func__);
 
 	sc->regs.cap_lo = (ZERO_BASED(sc->max_qentries) & NVME_CAP_LO_REG_MQES_MASK) |
 	    (1 << NVME_CAP_LO_REG_CQR_SHIFT) |
 	    (60 << NVME_CAP_LO_REG_TO_SHIFT);
 
 	sc->regs.cap_hi = 1 << NVME_CAP_HI_REG_CSS_NVM_SHIFT;
 
 	sc->regs.vs = NVME_REV(1,4);	/* NVMe v1.4 */
 
 	sc->regs.cc = 0;
 
 	assert(sc->submit_queues != NULL);
 
 	for (i = 0; i < sc->num_squeues + 1; i++) {
 		sc->submit_queues[i].qbase = NULL;
 		sc->submit_queues[i].size = 0;
 		sc->submit_queues[i].cqid = 0;
 		sc->submit_queues[i].tail = 0;
 		sc->submit_queues[i].head = 0;
 	}
 
 	assert(sc->compl_queues != NULL);
 
 	for (i = 0; i < sc->num_cqueues + 1; i++) {
 		sc->compl_queues[i].qbase = NULL;
 		sc->compl_queues[i].size = 0;
 		sc->compl_queues[i].tail = 0;
 		sc->compl_queues[i].head = 0;
 	}
 
 	sc->num_q_is_set = false;
 
 	pci_nvme_aer_destroy(sc);
 	pci_nvme_aen_destroy(sc);
 
 	/*
 	 * Clear CSTS.RDY last to prevent the host from enabling Controller
 	 * before cleanup completes
 	 */
 	sc->regs.csts = 0;
 }
 
 static void
 pci_nvme_reset(struct pci_nvme_softc *sc)
 {
 	pthread_mutex_lock(&sc->mtx);
 	pci_nvme_reset_locked(sc);
 	pthread_mutex_unlock(&sc->mtx);
 }
 
 static int
 pci_nvme_init_controller(struct pci_nvme_softc *sc)
 {
 	uint16_t acqs, asqs;
 
 	DPRINTF("%s", __func__);
 
 	/*
 	 * NVMe 2.0 states that "enabling a controller while this field is
 	 * cleared to 0h produces undefined results" for both ACQS and
 	 * ASQS. If zero, set CFS and do not become ready.
 	 */
 	asqs = ONE_BASED(sc->regs.aqa & NVME_AQA_REG_ASQS_MASK);
 	if (asqs < 2) {
 		EPRINTLN("%s: illegal ASQS value %#x (aqa=%#x)", __func__,
 		    asqs - 1, sc->regs.aqa);
 		sc->regs.csts |= NVME_CSTS_CFS;
 		return (-1);
 	}
 	sc->submit_queues[0].size = asqs;
 	sc->submit_queues[0].qbase = vm_map_gpa(sc->nsc_pi->pi_vmctx,
 	    sc->regs.asq, sizeof(struct nvme_command) * asqs);
 	if (sc->submit_queues[0].qbase == NULL) {
 		EPRINTLN("%s: ASQ vm_map_gpa(%lx) failed", __func__,
 		    sc->regs.asq);
 		sc->regs.csts |= NVME_CSTS_CFS;
 		return (-1);
 	}
 
 	DPRINTF("%s mapping Admin-SQ guest 0x%lx, host: %p",
 	        __func__, sc->regs.asq, sc->submit_queues[0].qbase);
 
 	acqs = ONE_BASED((sc->regs.aqa >> NVME_AQA_REG_ACQS_SHIFT) &
 	    NVME_AQA_REG_ACQS_MASK);
 	if (acqs < 2) {
 		EPRINTLN("%s: illegal ACQS value %#x (aqa=%#x)", __func__,
 		    acqs - 1, sc->regs.aqa);
 		sc->regs.csts |= NVME_CSTS_CFS;
 		return (-1);
 	}
 	sc->compl_queues[0].size = acqs;
 	sc->compl_queues[0].qbase = vm_map_gpa(sc->nsc_pi->pi_vmctx,
 	    sc->regs.acq, sizeof(struct nvme_completion) * acqs);
 	if (sc->compl_queues[0].qbase == NULL) {
 		EPRINTLN("%s: ACQ vm_map_gpa(%lx) failed", __func__,
 		    sc->regs.acq);
 		sc->regs.csts |= NVME_CSTS_CFS;
 		return (-1);
 	}
 	sc->compl_queues[0].intr_en = NVME_CQ_INTEN;
 
 	DPRINTF("%s mapping Admin-CQ guest 0x%lx, host: %p",
 	        __func__, sc->regs.acq, sc->compl_queues[0].qbase);
 
 	return (0);
 }
 
 static int
 nvme_prp_memcpy(struct vmctx *ctx, uint64_t prp1, uint64_t prp2, uint8_t *b,
 	size_t len, enum nvme_copy_dir dir)
 {
 	uint8_t *p;
 	size_t bytes;
 
 	if (len > (8 * 1024)) {
 		return (-1);
 	}
 
 	/* Copy from the start of prp1 to the end of the physical page */
 	bytes = PAGE_SIZE - (prp1 & PAGE_MASK);
 	bytes = MIN(bytes, len);
 
 	p = vm_map_gpa(ctx, prp1, bytes);
 	if (p == NULL) {
 		return (-1);
 	}
 
 	if (dir == NVME_COPY_TO_PRP)
 		memcpy(p, b, bytes);
 	else
 		memcpy(b, p, bytes);
 
 	b += bytes;
 
 	len -= bytes;
 	if (len == 0) {
 		return (0);
 	}
 
 	len = MIN(len, PAGE_SIZE);
 
 	p = vm_map_gpa(ctx, prp2, len);
 	if (p == NULL) {
 		return (-1);
 	}
 
 	if (dir == NVME_COPY_TO_PRP)
 		memcpy(p, b, len);
 	else
 		memcpy(b, p, len);
 
 	return (0);
 }
 
 /*
  * Write a Completion Queue Entry update
  *
  * Write the completion and update the doorbell value
  */
 static void
 pci_nvme_cq_update(struct pci_nvme_softc *sc,
 		struct nvme_completion_queue *cq,
 		uint32_t cdw0,
 		uint16_t cid,
 		uint16_t sqid,
 		uint16_t status)
 {
 	struct nvme_submission_queue *sq = &sc->submit_queues[sqid];
 	struct nvme_completion *cqe;
 
 	assert(cq->qbase != NULL);
 
 	pthread_mutex_lock(&cq->mtx);
 
 	cqe = &cq->qbase[cq->tail];
 
 	/* Flip the phase bit */
 	status |= (cqe->status ^ NVME_STATUS_P) & NVME_STATUS_P_MASK;
 
 	cqe->cdw0 = cdw0;
 	cqe->sqhd = sq->head;
 	cqe->sqid = sqid;
 	cqe->cid = cid;
 	cqe->status = status;
 
 	cq->tail++;
 	if (cq->tail >= cq->size) {
 		cq->tail = 0;
 	}
 
 	pthread_mutex_unlock(&cq->mtx);
 }
 
 static int
 nvme_opc_delete_io_sq(struct pci_nvme_softc* sc, struct nvme_command* command,
 	struct nvme_completion* compl)
 {
 	uint16_t qid = command->cdw10 & 0xffff;
 
 	DPRINTF("%s DELETE_IO_SQ %u", __func__, qid);
 	if (qid == 0 || qid > sc->num_squeues ||
 	    (sc->submit_queues[qid].qbase == NULL)) {
 		WPRINTF("%s NOT PERMITTED queue id %u / num_squeues %u",
 		        __func__, qid, sc->num_squeues);
 		pci_nvme_status_tc(&compl->status, NVME_SCT_COMMAND_SPECIFIC,
 		    NVME_SC_INVALID_QUEUE_IDENTIFIER);
 		return (1);
 	}
 
 	sc->submit_queues[qid].qbase = NULL;
 	sc->submit_queues[qid].cqid = 0;
 	pci_nvme_status_genc(&compl->status, NVME_SC_SUCCESS);
 	return (1);
 }
 
 static int
 nvme_opc_create_io_sq(struct pci_nvme_softc* sc, struct nvme_command* command,
 	struct nvme_completion* compl)
 {
 	if (command->cdw11 & NVME_CMD_CDW11_PC) {
 		uint16_t qid = command->cdw10 & 0xffff;
 		struct nvme_submission_queue *nsq;
 
 		if ((qid == 0) || (qid > sc->num_squeues) ||
 		    (sc->submit_queues[qid].qbase != NULL)) {
 			WPRINTF("%s queue index %u > num_squeues %u",
 			        __func__, qid, sc->num_squeues);
 			pci_nvme_status_tc(&compl->status,
 			    NVME_SCT_COMMAND_SPECIFIC,
 			    NVME_SC_INVALID_QUEUE_IDENTIFIER);
 			return (1);
 		}
 
 		nsq = &sc->submit_queues[qid];
 		nsq->size = ONE_BASED((command->cdw10 >> 16) & 0xffff);
 		DPRINTF("%s size=%u (max=%u)", __func__, nsq->size, sc->max_qentries);
 		if ((nsq->size < 2) || (nsq->size > sc->max_qentries)) {
 			/*
 			 * Queues must specify at least two entries
 			 * NOTE: "MAXIMUM QUEUE SIZE EXCEEDED" was renamed to
 			 * "INVALID QUEUE SIZE" in the NVM Express 1.3 Spec
 			 */
 			pci_nvme_status_tc(&compl->status,
 			    NVME_SCT_COMMAND_SPECIFIC,
 			    NVME_SC_MAXIMUM_QUEUE_SIZE_EXCEEDED);
 			return (1);
 		}
 		nsq->head = nsq->tail = 0;
 
 		nsq->cqid = (command->cdw11 >> 16) & 0xffff;
 		if ((nsq->cqid == 0) || (nsq->cqid > sc->num_cqueues)) {
 			pci_nvme_status_tc(&compl->status,
 			    NVME_SCT_COMMAND_SPECIFIC,
 			    NVME_SC_INVALID_QUEUE_IDENTIFIER);
 			return (1);
 		}
 
 		if (sc->compl_queues[nsq->cqid].qbase == NULL) {
 			pci_nvme_status_tc(&compl->status,
 			    NVME_SCT_COMMAND_SPECIFIC,
 			    NVME_SC_COMPLETION_QUEUE_INVALID);
 			return (1);
 		}
 
 		nsq->qpriority = (command->cdw11 >> 1) & 0x03;
 
 		nsq->qbase = vm_map_gpa(sc->nsc_pi->pi_vmctx, command->prp1,
 		              sizeof(struct nvme_command) * (size_t)nsq->size);
 
 		DPRINTF("%s sq %u size %u gaddr %p cqid %u", __func__,
 		        qid, nsq->size, nsq->qbase, nsq->cqid);
 
 		pci_nvme_status_genc(&compl->status, NVME_SC_SUCCESS);
 
 		DPRINTF("%s completed creating IOSQ qid %u",
 		         __func__, qid);
 	} else {
 		/*
 		 * Guest sent non-cont submission queue request.
 		 * This setting is unsupported by this emulation.
 		 */
 		WPRINTF("%s unsupported non-contig (list-based) "
 		         "create i/o submission queue", __func__);
 
 		pci_nvme_status_genc(&compl->status, NVME_SC_INVALID_FIELD);
 	}
 	return (1);
 }
 
 static int
 nvme_opc_delete_io_cq(struct pci_nvme_softc* sc, struct nvme_command* command,
 	struct nvme_completion* compl)
 {
 	uint16_t qid = command->cdw10 & 0xffff;
 	uint16_t sqid;
 
 	DPRINTF("%s DELETE_IO_CQ %u", __func__, qid);
 	if (qid == 0 || qid > sc->num_cqueues ||
 	    (sc->compl_queues[qid].qbase == NULL)) {
 		WPRINTF("%s queue index %u / num_cqueues %u",
 		        __func__, qid, sc->num_cqueues);
 		pci_nvme_status_tc(&compl->status, NVME_SCT_COMMAND_SPECIFIC,
 		    NVME_SC_INVALID_QUEUE_IDENTIFIER);
 		return (1);
 	}
 
 	/* Deleting an Active CQ is an error */
 	for (sqid = 1; sqid < sc->num_squeues + 1; sqid++)
 		if (sc->submit_queues[sqid].cqid == qid) {
 			pci_nvme_status_tc(&compl->status,
 			    NVME_SCT_COMMAND_SPECIFIC,
 			    NVME_SC_INVALID_QUEUE_DELETION);
 			return (1);
 		}
 
 	sc->compl_queues[qid].qbase = NULL;
 	pci_nvme_status_genc(&compl->status, NVME_SC_SUCCESS);
 	return (1);
 }
 
 static int
 nvme_opc_create_io_cq(struct pci_nvme_softc* sc, struct nvme_command* command,
 	struct nvme_completion* compl)
 {
 	struct nvme_completion_queue *ncq;
 	uint16_t qid = command->cdw10 & 0xffff;
 
 	/* Only support Physically Contiguous queues */
 	if ((command->cdw11 & NVME_CMD_CDW11_PC) == 0) {
 		WPRINTF("%s unsupported non-contig (list-based) "
 		         "create i/o completion queue",
 		         __func__);
 
 		pci_nvme_status_genc(&compl->status, NVME_SC_INVALID_FIELD);
 		return (1);
 	}
 
 	if ((qid == 0) || (qid > sc->num_cqueues) ||
 	    (sc->compl_queues[qid].qbase != NULL)) {
 		WPRINTF("%s queue index %u > num_cqueues %u",
 			__func__, qid, sc->num_cqueues);
 		pci_nvme_status_tc(&compl->status,
 		    NVME_SCT_COMMAND_SPECIFIC,
 		    NVME_SC_INVALID_QUEUE_IDENTIFIER);
 		return (1);
  	}
 
 	ncq = &sc->compl_queues[qid];
 	ncq->intr_en = (command->cdw11 & NVME_CMD_CDW11_IEN) >> 1;
 	ncq->intr_vec = (command->cdw11 >> 16) & 0xffff;
 	if (ncq->intr_vec > (sc->max_queues + 1)) {
 		pci_nvme_status_tc(&compl->status,
 		    NVME_SCT_COMMAND_SPECIFIC,
 		    NVME_SC_INVALID_INTERRUPT_VECTOR);
 		return (1);
 	}
 
 	ncq->size = ONE_BASED((command->cdw10 >> 16) & 0xffff);
 	if ((ncq->size < 2) || (ncq->size > sc->max_qentries))  {
 		/*
 		 * Queues must specify at least two entries
 		 * NOTE: "MAXIMUM QUEUE SIZE EXCEEDED" was renamed to
 		 * "INVALID QUEUE SIZE" in the NVM Express 1.3 Spec
 		 */
 		pci_nvme_status_tc(&compl->status,
 		    NVME_SCT_COMMAND_SPECIFIC,
 		    NVME_SC_MAXIMUM_QUEUE_SIZE_EXCEEDED);
 		return (1);
 	}
 	ncq->head = ncq->tail = 0;
 	ncq->qbase = vm_map_gpa(sc->nsc_pi->pi_vmctx,
 		     command->prp1,
 		     sizeof(struct nvme_command) * (size_t)ncq->size);
 
 	pci_nvme_status_genc(&compl->status, NVME_SC_SUCCESS);
 
 
 	return (1);
 }
 
 static int
 nvme_opc_get_log_page(struct pci_nvme_softc* sc, struct nvme_command* command,
 	struct nvme_completion* compl)
 {
 	uint64_t logoff;
 	uint32_t logsize;
 	uint8_t logpage;
 
 	pci_nvme_status_genc(&compl->status, NVME_SC_SUCCESS);
 
 	/*
 	 * Command specifies the number of dwords to return in fields NUMDU
 	 * and NUMDL. This is a zero-based value.
 	 */
 	logpage = command->cdw10 & 0xFF;
 	logsize = ((command->cdw11 << 16) | (command->cdw10 >> 16)) + 1;
 	logsize *= sizeof(uint32_t);
 	logoff  = ((uint64_t)(command->cdw13) << 32) | command->cdw12;
 
 	DPRINTF("%s log page %u len %u", __func__, logpage, logsize);
 
 	switch (logpage) {
 	case NVME_LOG_ERROR:
 		if (logoff >= sizeof(sc->err_log)) {
 			pci_nvme_status_genc(&compl->status,
 			    NVME_SC_INVALID_FIELD);
 			break;
 		}
 
 		nvme_prp_memcpy(sc->nsc_pi->pi_vmctx, command->prp1,
 		    command->prp2, (uint8_t *)&sc->err_log + logoff,
 		    MIN(logsize - logoff, sizeof(sc->err_log)),
 		    NVME_COPY_TO_PRP);
 		break;
 	case NVME_LOG_HEALTH_INFORMATION:
 		if (logoff >= sizeof(sc->health_log)) {
 			pci_nvme_status_genc(&compl->status,
 			    NVME_SC_INVALID_FIELD);
 			break;
 		}
 
 		pthread_mutex_lock(&sc->mtx);
 		memcpy(&sc->health_log.data_units_read, &sc->read_data_units,
 		    sizeof(sc->health_log.data_units_read));
 		memcpy(&sc->health_log.data_units_written, &sc->write_data_units,
 		    sizeof(sc->health_log.data_units_written));
 		memcpy(&sc->health_log.host_read_commands, &sc->read_commands,
 		    sizeof(sc->health_log.host_read_commands));
 		memcpy(&sc->health_log.host_write_commands, &sc->write_commands,
 		    sizeof(sc->health_log.host_write_commands));
 		pthread_mutex_unlock(&sc->mtx);
 
 		nvme_prp_memcpy(sc->nsc_pi->pi_vmctx, command->prp1,
 		    command->prp2, (uint8_t *)&sc->health_log + logoff,
 		    MIN(logsize - logoff, sizeof(sc->health_log)),
 		    NVME_COPY_TO_PRP);
 		break;
 	case NVME_LOG_FIRMWARE_SLOT:
 		if (logoff >= sizeof(sc->fw_log)) {
 			pci_nvme_status_genc(&compl->status,
 			    NVME_SC_INVALID_FIELD);
 			break;
 		}
 
 		nvme_prp_memcpy(sc->nsc_pi->pi_vmctx, command->prp1,
 		    command->prp2, (uint8_t *)&sc->fw_log + logoff,
 		    MIN(logsize - logoff, sizeof(sc->fw_log)),
 		    NVME_COPY_TO_PRP);
 		break;
 	case NVME_LOG_CHANGED_NAMESPACE:
 		if (logoff >= sizeof(sc->ns_log)) {
 			pci_nvme_status_genc(&compl->status,
 			    NVME_SC_INVALID_FIELD);
 			break;
 		}
 
 		nvme_prp_memcpy(sc->nsc_pi->pi_vmctx, command->prp1,
 		    command->prp2, (uint8_t *)&sc->ns_log + logoff,
 		    MIN(logsize - logoff, sizeof(sc->ns_log)),
 		    NVME_COPY_TO_PRP);
 		memset(&sc->ns_log, 0, sizeof(sc->ns_log));
 		break;
 	default:
 		DPRINTF("%s get log page %x command not supported",
 		        __func__, logpage);
 
 		pci_nvme_status_tc(&compl->status, NVME_SCT_COMMAND_SPECIFIC,
 		    NVME_SC_INVALID_LOG_PAGE);
 	}
 
 	return (1);
 }
 
 static int
 nvme_opc_identify(struct pci_nvme_softc* sc, struct nvme_command* command,
 	struct nvme_completion* compl)
 {
 	void *dest;
 	uint16_t status;
 
 	DPRINTF("%s identify 0x%x nsid 0x%x", __func__,
 	        command->cdw10 & 0xFF, command->nsid);
 
 	status = 0;
 	pci_nvme_status_genc(&status, NVME_SC_SUCCESS);
 
 	switch (command->cdw10 & 0xFF) {
 	case 0x00: /* return Identify Namespace data structure */
 		/* Global NS only valid with NS Management */
 		if (command->nsid == NVME_GLOBAL_NAMESPACE_TAG) {
 			pci_nvme_status_genc(&status,
 			    NVME_SC_INVALID_NAMESPACE_OR_FORMAT);
 			break;
 		}
 		nvme_prp_memcpy(sc->nsc_pi->pi_vmctx, command->prp1,
 		    command->prp2, (uint8_t *)&sc->nsdata, sizeof(sc->nsdata),
 		    NVME_COPY_TO_PRP);
 		break;
 	case 0x01: /* return Identify Controller data structure */
 		nvme_prp_memcpy(sc->nsc_pi->pi_vmctx, command->prp1,
 		    command->prp2, (uint8_t *)&sc->ctrldata,
 		    sizeof(sc->ctrldata),
 		    NVME_COPY_TO_PRP);
 		break;
 	case 0x02: /* list of 1024 active NSIDs > CDW1.NSID */
 		dest = vm_map_gpa(sc->nsc_pi->pi_vmctx, command->prp1,
 		                  sizeof(uint32_t) * 1024);
 		/* All unused entries shall be zero */
 		memset(dest, 0, sizeof(uint32_t) * 1024);
 		((uint32_t *)dest)[0] = 1;
 		break;
 	case 0x03: /* list of NSID structures in CDW1.NSID, 4096 bytes */
 		if (command->nsid != 1) {
 			pci_nvme_status_genc(&status,
 			    NVME_SC_INVALID_NAMESPACE_OR_FORMAT);
 			break;
 		}
 		dest = vm_map_gpa(sc->nsc_pi->pi_vmctx, command->prp1,
 		                  sizeof(uint32_t) * 1024);
 		/* All bytes after the descriptor shall be zero */
 		memset(dest, 0, sizeof(uint32_t) * 1024);
 
 		/* Return NIDT=1 (i.e. EUI64) descriptor */
 		((uint8_t *)dest)[0] = 1;
 		((uint8_t *)dest)[1] = sizeof(uint64_t);
 		memcpy(((uint8_t *)dest) + 4, sc->nsdata.eui64, sizeof(uint64_t));
 		break;
 	case 0x13:
 		/*
 		 * Controller list is optional but used by UNH tests. Return
 		 * a valid but empty list.
 		 */
 		dest = vm_map_gpa(sc->nsc_pi->pi_vmctx, command->prp1,
 		                  sizeof(uint16_t) * 2048);
 		memset(dest, 0, sizeof(uint16_t) * 2048);
 		break;
 	default:
 		DPRINTF("%s unsupported identify command requested 0x%x",
 		         __func__, command->cdw10 & 0xFF);
 		pci_nvme_status_genc(&status, NVME_SC_INVALID_FIELD);
 		break;
 	}
 
 	compl->status = status;
 	return (1);
 }
 
 static const char *
 nvme_fid_to_name(uint8_t fid)
 {
 	const char *name;
 
 	switch (fid) {
 	case NVME_FEAT_ARBITRATION:
 		name = "Arbitration";
 		break;
 	case NVME_FEAT_POWER_MANAGEMENT:
 		name = "Power Management";
 		break;
 	case NVME_FEAT_LBA_RANGE_TYPE:
 		name = "LBA Range Type";
 		break;
 	case NVME_FEAT_TEMPERATURE_THRESHOLD:
 		name = "Temperature Threshold";
 		break;
 	case NVME_FEAT_ERROR_RECOVERY:
 		name = "Error Recovery";
 		break;
 	case NVME_FEAT_VOLATILE_WRITE_CACHE:
 		name = "Volatile Write Cache";
 		break;
 	case NVME_FEAT_NUMBER_OF_QUEUES:
 		name = "Number of Queues";
 		break;
 	case NVME_FEAT_INTERRUPT_COALESCING:
 		name = "Interrupt Coalescing";
 		break;
 	case NVME_FEAT_INTERRUPT_VECTOR_CONFIGURATION:
 		name = "Interrupt Vector Configuration";
 		break;
 	case NVME_FEAT_WRITE_ATOMICITY:
 		name = "Write Atomicity Normal";
 		break;
 	case NVME_FEAT_ASYNC_EVENT_CONFIGURATION:
 		name = "Asynchronous Event Configuration";
 		break;
 	case NVME_FEAT_AUTONOMOUS_POWER_STATE_TRANSITION:
 		name = "Autonomous Power State Transition";
 		break;
 	case NVME_FEAT_HOST_MEMORY_BUFFER:
 		name = "Host Memory Buffer";
 		break;
 	case NVME_FEAT_TIMESTAMP:
 		name = "Timestamp";
 		break;
 	case NVME_FEAT_KEEP_ALIVE_TIMER:
 		name = "Keep Alive Timer";
 		break;
 	case NVME_FEAT_HOST_CONTROLLED_THERMAL_MGMT:
 		name = "Host Controlled Thermal Management";
 		break;
 	case NVME_FEAT_NON_OP_POWER_STATE_CONFIG:
 		name = "Non-Operation Power State Config";
 		break;
 	case NVME_FEAT_READ_RECOVERY_LEVEL_CONFIG:
 		name = "Read Recovery Level Config";
 		break;
 	case NVME_FEAT_PREDICTABLE_LATENCY_MODE_CONFIG:
 		name = "Predictable Latency Mode Config";
 		break;
 	case NVME_FEAT_PREDICTABLE_LATENCY_MODE_WINDOW:
 		name = "Predictable Latency Mode Window";
 		break;
 	case NVME_FEAT_LBA_STATUS_INFORMATION_ATTRIBUTES:
 		name = "LBA Status Information Report Interval";
 		break;
 	case NVME_FEAT_HOST_BEHAVIOR_SUPPORT:
 		name = "Host Behavior Support";
 		break;
 	case NVME_FEAT_SANITIZE_CONFIG:
 		name = "Sanitize Config";
 		break;
 	case NVME_FEAT_ENDURANCE_GROUP_EVENT_CONFIGURATION:
 		name = "Endurance Group Event Configuration";
 		break;
 	case NVME_FEAT_SOFTWARE_PROGRESS_MARKER:
 		name = "Software Progress Marker";
 		break;
 	case NVME_FEAT_HOST_IDENTIFIER:
 		name = "Host Identifier";
 		break;
 	case NVME_FEAT_RESERVATION_NOTIFICATION_MASK:
 		name = "Reservation Notification Mask";
 		break;
 	case NVME_FEAT_RESERVATION_PERSISTENCE:
 		name = "Reservation Persistence";
 		break;
 	case NVME_FEAT_NAMESPACE_WRITE_PROTECTION_CONFIG:
 		name = "Namespace Write Protection Config";
 		break;
 	default:
 		name = "Unknown";
 		break;
 	}
 
 	return (name);
 }
 
 static void
 nvme_feature_invalid_cb(struct pci_nvme_softc *sc __unused,
     struct nvme_feature_obj *feat __unused,
     struct nvme_command *command __unused,
     struct nvme_completion *compl)
 {
 	pci_nvme_status_genc(&compl->status, NVME_SC_INVALID_FIELD);
 }
 
 static void
 nvme_feature_iv_config(struct pci_nvme_softc *sc,
     struct nvme_feature_obj *feat __unused,
     struct nvme_command *command,
     struct nvme_completion *compl)
 {
 	uint32_t i;
 	uint32_t cdw11 = command->cdw11;
 	uint16_t iv;
 	bool cd;
 
 	pci_nvme_status_genc(&compl->status, NVME_SC_INVALID_FIELD);
 
 	iv = cdw11 & 0xffff;
 	cd = cdw11 & (1 << 16);
 
 	if (iv > (sc->max_queues + 1)) {
 		return;
 	}
 
 	/* No Interrupt Coalescing (i.e. not Coalescing Disable) for Admin Q */
 	if ((iv == 0) && !cd)
 		return;
 
 	/* Requested Interrupt Vector must be used by a CQ */
 	for (i = 0; i < sc->num_cqueues + 1; i++) {
 		if (sc->compl_queues[i].intr_vec == iv) {
 			pci_nvme_status_genc(&compl->status, NVME_SC_SUCCESS);
 		}
 	}
 }
 
 #define NVME_ASYNC_EVENT_ENDURANCE_GROUP		(0x4000)
 static void
 nvme_feature_async_event(struct pci_nvme_softc *sc __unused,
     struct nvme_feature_obj *feat __unused,
     struct nvme_command *command,
     struct nvme_completion *compl)
 {
 	if (command->cdw11 & NVME_ASYNC_EVENT_ENDURANCE_GROUP)
 		pci_nvme_status_genc(&compl->status, NVME_SC_INVALID_FIELD);
 }
 
 #define NVME_TEMP_THRESH_OVER	0
 #define NVME_TEMP_THRESH_UNDER	1
 static void
 nvme_feature_temperature(struct pci_nvme_softc *sc,
     struct nvme_feature_obj *feat __unused,
     struct nvme_command *command,
     struct nvme_completion *compl)
 {
 	uint16_t	tmpth;	/* Temperature Threshold */
 	uint8_t		tmpsel; /* Threshold Temperature Select */
 	uint8_t		thsel;  /* Threshold Type Select */
 	bool		set_crit = false;
 	bool		report_crit;
 
 	tmpth  = command->cdw11 & 0xffff;
 	tmpsel = (command->cdw11 >> 16) & 0xf;
 	thsel  = (command->cdw11 >> 20) & 0x3;
 
 	DPRINTF("%s: tmpth=%#x tmpsel=%#x thsel=%#x", __func__, tmpth, tmpsel, thsel);
 
 	/* Check for unsupported values */
 	if (((tmpsel != 0) && (tmpsel != 0xf)) ||
 	    (thsel > NVME_TEMP_THRESH_UNDER)) {
 		pci_nvme_status_genc(&compl->status, NVME_SC_INVALID_FIELD);
 		return;
 	}
 
 	if (((thsel == NVME_TEMP_THRESH_OVER)  && (NVME_TEMPERATURE >= tmpth)) ||
 	    ((thsel == NVME_TEMP_THRESH_UNDER) && (NVME_TEMPERATURE <= tmpth)))
 		set_crit = true;
 
 	pthread_mutex_lock(&sc->mtx);
 	if (set_crit)
 		sc->health_log.critical_warning |=
 		    NVME_CRIT_WARN_ST_TEMPERATURE;
 	else
 		sc->health_log.critical_warning &=
 		    ~NVME_CRIT_WARN_ST_TEMPERATURE;
 	pthread_mutex_unlock(&sc->mtx);
 
 	report_crit = sc->feat[NVME_FEAT_ASYNC_EVENT_CONFIGURATION].cdw11 &
 	    NVME_CRIT_WARN_ST_TEMPERATURE;
 
 	if (set_crit && report_crit)
 		pci_nvme_aen_post(sc, PCI_NVME_AE_TYPE_SMART,
 		    sc->health_log.critical_warning);
 
 	DPRINTF("%s: set_crit=%c critical_warning=%#x status=%#x", __func__, set_crit ? 'T':'F', sc->health_log.critical_warning, compl->status);
 }
 
 static void
 nvme_feature_num_queues(struct pci_nvme_softc *sc,
     struct nvme_feature_obj *feat __unused,
     struct nvme_command *command,
     struct nvme_completion *compl)
 {
 	uint16_t nqr;	/* Number of Queues Requested */
 
 	if (sc->num_q_is_set) {
 		WPRINTF("%s: Number of Queues already set", __func__);
 		pci_nvme_status_genc(&compl->status,
 		    NVME_SC_COMMAND_SEQUENCE_ERROR);
 		return;
 	}
 
 	nqr = command->cdw11 & 0xFFFF;
 	if (nqr == 0xffff) {
 		WPRINTF("%s: Illegal NSQR value %#x", __func__, nqr);
 		pci_nvme_status_genc(&compl->status, NVME_SC_INVALID_FIELD);
 		return;
 	}
 
 	sc->num_squeues = ONE_BASED(nqr);
 	if (sc->num_squeues > sc->max_queues) {
 		DPRINTF("NSQR=%u is greater than max %u", sc->num_squeues,
 					sc->max_queues);
 		sc->num_squeues = sc->max_queues;
 	}
 
 	nqr = (command->cdw11 >> 16) & 0xFFFF;
 	if (nqr == 0xffff) {
 		WPRINTF("%s: Illegal NCQR value %#x", __func__, nqr);
 		pci_nvme_status_genc(&compl->status, NVME_SC_INVALID_FIELD);
 		return;
 	}
 
 	sc->num_cqueues = ONE_BASED(nqr);
 	if (sc->num_cqueues > sc->max_queues) {
 		DPRINTF("NCQR=%u is greater than max %u", sc->num_cqueues,
 					sc->max_queues);
 		sc->num_cqueues = sc->max_queues;
 	}
 
 	/* Patch the command value which will be saved on callback's return */
 	command->cdw11 = NVME_FEATURE_NUM_QUEUES(sc);
 	compl->cdw0 = NVME_FEATURE_NUM_QUEUES(sc);
 
 	sc->num_q_is_set = true;
 }
 
 static int
 nvme_opc_set_features(struct pci_nvme_softc *sc, struct nvme_command *command,
 	struct nvme_completion *compl)
 {
 	struct nvme_feature_obj *feat;
 	uint32_t nsid = command->nsid;
 	uint8_t fid = NVMEV(NVME_FEAT_SET_FID, command->cdw10);
 	bool sv = NVMEV(NVME_FEAT_SET_SV, command->cdw10);
 
 	DPRINTF("%s: Feature ID 0x%x (%s)", __func__, fid, nvme_fid_to_name(fid));
 
 	if (fid >= NVME_FID_MAX) {
 		DPRINTF("%s invalid feature 0x%x", __func__, fid);
 		pci_nvme_status_genc(&compl->status, NVME_SC_INVALID_FIELD);
 		return (1);
 	}
 
 	if (sv) {
 		pci_nvme_status_tc(&compl->status, NVME_SCT_COMMAND_SPECIFIC,
 		    NVME_SC_FEATURE_NOT_SAVEABLE);
 		return (1);
 	}
 
 	feat = &sc->feat[fid];
 
 	if (feat->namespace_specific && (nsid == NVME_GLOBAL_NAMESPACE_TAG)) {
 		pci_nvme_status_genc(&compl->status, NVME_SC_INVALID_FIELD);
 		return (1);
 	}
 
 	if (!feat->namespace_specific &&
 	    !((nsid == 0) || (nsid == NVME_GLOBAL_NAMESPACE_TAG))) {
 		pci_nvme_status_tc(&compl->status, NVME_SCT_COMMAND_SPECIFIC,
 		    NVME_SC_FEATURE_NOT_NS_SPECIFIC);
 		return (1);
 	}
 
 	compl->cdw0 = 0;
 	pci_nvme_status_genc(&compl->status, NVME_SC_SUCCESS);
 
 	if (feat->set)
 		feat->set(sc, feat, command, compl);
 	else {
 		pci_nvme_status_tc(&compl->status, NVME_SCT_COMMAND_SPECIFIC,
 		    NVME_SC_FEATURE_NOT_CHANGEABLE);
 		return (1);
 	}
 
 	DPRINTF("%s: status=%#x cdw11=%#x", __func__, compl->status, command->cdw11);
 	if (compl->status == NVME_SC_SUCCESS) {
 		feat->cdw11 = command->cdw11;
 		if ((fid == NVME_FEAT_ASYNC_EVENT_CONFIGURATION) &&
 		    (command->cdw11 != 0))
 			pci_nvme_aen_notify(sc);
 	}
 
 	return (0);
 }
 
 #define NVME_FEATURES_SEL_SUPPORTED	0x3
 #define NVME_FEATURES_NS_SPECIFIC	(1 << 1)
 
 static int
 nvme_opc_get_features(struct pci_nvme_softc* sc, struct nvme_command* command,
 	struct nvme_completion* compl)
 {
 	struct nvme_feature_obj *feat;
 	uint8_t fid = command->cdw10 & 0xFF;
 	uint8_t sel = (command->cdw10 >> 8) & 0x7;
 
 	DPRINTF("%s: Feature ID 0x%x (%s)", __func__, fid, nvme_fid_to_name(fid));
 
 	if (fid >= NVME_FID_MAX) {
 		DPRINTF("%s invalid feature 0x%x", __func__, fid);
 		pci_nvme_status_genc(&compl->status, NVME_SC_INVALID_FIELD);
 		return (1);
 	}
 
 	compl->cdw0 = 0;
 	pci_nvme_status_genc(&compl->status, NVME_SC_SUCCESS);
 
 	feat = &sc->feat[fid];
 	if (feat->get) {
 		feat->get(sc, feat, command, compl);
 	}
 
 	if (compl->status == NVME_SC_SUCCESS) {
 		if ((sel == NVME_FEATURES_SEL_SUPPORTED) && feat->namespace_specific)
 			compl->cdw0 = NVME_FEATURES_NS_SPECIFIC;
 		else
 			compl->cdw0 = feat->cdw11;
 	}
 
 	return (0);
 }
 
 static int
 nvme_opc_format_nvm(struct pci_nvme_softc* sc, struct nvme_command* command,
 	struct nvme_completion* compl)
 {
 	uint8_t	ses, lbaf, pi;
 
 	/* Only supports Secure Erase Setting - User Data Erase */
 	ses = (command->cdw10 >> 9) & 0x7;
 	if (ses > 0x1) {
 		pci_nvme_status_genc(&compl->status, NVME_SC_INVALID_FIELD);
 		return (1);
 	}
 
 	/* Only supports a single LBA Format */
 	lbaf = command->cdw10 & 0xf;
 	if (lbaf != 0) {
 		pci_nvme_status_tc(&compl->status, NVME_SCT_COMMAND_SPECIFIC,
 		    NVME_SC_INVALID_FORMAT);
 		return (1);
 	}
 
-	/* Doesn't support Protection Infomation */
+	/* Doesn't support Protection Information */
 	pi = (command->cdw10 >> 5) & 0x7;
 	if (pi != 0) {
 		pci_nvme_status_genc(&compl->status, NVME_SC_INVALID_FIELD);
 		return (1);
 	}
 
 	if (sc->nvstore.type == NVME_STOR_RAM) {
 		if (sc->nvstore.ctx)
 			free(sc->nvstore.ctx);
 		sc->nvstore.ctx = calloc(1, sc->nvstore.size);
 		pci_nvme_status_genc(&compl->status, NVME_SC_SUCCESS);
 	} else {
 		struct pci_nvme_ioreq *req;
 		int err;
 
 		req = pci_nvme_get_ioreq(sc);
 		if (req == NULL) {
 			pci_nvme_status_genc(&compl->status,
 			    NVME_SC_INTERNAL_DEVICE_ERROR);
 			WPRINTF("%s: unable to allocate IO req", __func__);
 			return (1);
 		}
 		req->nvme_sq = &sc->submit_queues[0];
 		req->sqid = 0;
 		req->opc = command->opc;
 		req->cid = command->cid;
 		req->nsid = command->nsid;
 
 		req->io_req.br_offset = 0;
 		req->io_req.br_resid = sc->nvstore.size;
 		req->io_req.br_callback = pci_nvme_io_done;
 
 		err = blockif_delete(sc->nvstore.ctx, &req->io_req);
 		if (err) {
 			pci_nvme_status_genc(&compl->status,
 			    NVME_SC_INTERNAL_DEVICE_ERROR);
 			pci_nvme_release_ioreq(sc, req);
 		} else
 			compl->status = NVME_NO_STATUS;
 	}
 
 	return (1);
 }
 
 static int
 nvme_opc_abort(struct pci_nvme_softc *sc __unused, struct nvme_command *command,
     struct nvme_completion *compl)
 {
 	DPRINTF("%s submission queue %u, command ID 0x%x", __func__,
 	        command->cdw10 & 0xFFFF, (command->cdw10 >> 16) & 0xFFFF);
 
 	/* TODO: search for the command ID and abort it */
 
 	compl->cdw0 = 1;
 	pci_nvme_status_genc(&compl->status, NVME_SC_SUCCESS);
 	return (1);
 }
 
 static int
 nvme_opc_async_event_req(struct pci_nvme_softc* sc,
 	struct nvme_command* command, struct nvme_completion* compl)
 {
 	DPRINTF("%s async event request count=%u aerl=%u cid=%#x", __func__,
 	    sc->aer_count, sc->ctrldata.aerl, command->cid);
 
 	/* Don't exceed the Async Event Request Limit (AERL). */
 	if (pci_nvme_aer_limit_reached(sc)) {
 		pci_nvme_status_tc(&compl->status, NVME_SCT_COMMAND_SPECIFIC,
 				NVME_SC_ASYNC_EVENT_REQUEST_LIMIT_EXCEEDED);
 		return (1);
 	}
 
 	if (pci_nvme_aer_add(sc, command->cid)) {
 		pci_nvme_status_tc(&compl->status, NVME_SCT_GENERIC,
 				NVME_SC_INTERNAL_DEVICE_ERROR);
 		return (1);
 	}
 
 	/*
 	 * Raise events when they happen based on the Set Features cmd.
 	 * These events happen async, so only set completion successful if
 	 * there is an event reflective of the request to get event.
 	 */
 	compl->status = NVME_NO_STATUS;
 	pci_nvme_aen_notify(sc);
 
 	return (0);
 }
 
 static void
 pci_nvme_handle_admin_cmd(struct pci_nvme_softc* sc, uint64_t value)
 {
 	struct nvme_completion compl;
 	struct nvme_command *cmd;
 	struct nvme_submission_queue *sq;
 	struct nvme_completion_queue *cq;
 	uint16_t sqhead;
 
 	DPRINTF("%s index %u", __func__, (uint32_t)value);
 
 	sq = &sc->submit_queues[0];
 	cq = &sc->compl_queues[0];
 
 	pthread_mutex_lock(&sq->mtx);
 
 	sqhead = sq->head;
 	DPRINTF("sqhead %u, tail %u", sqhead, sq->tail);
 
 	while (sqhead != atomic_load_acq_short(&sq->tail)) {
 		cmd = &(sq->qbase)[sqhead];
 		compl.cdw0 = 0;
 		compl.status = 0;
 
 		switch (cmd->opc) {
 		case NVME_OPC_DELETE_IO_SQ:
 			DPRINTF("%s command DELETE_IO_SQ", __func__);
 			nvme_opc_delete_io_sq(sc, cmd, &compl);
 			break;
 		case NVME_OPC_CREATE_IO_SQ:
 			DPRINTF("%s command CREATE_IO_SQ", __func__);
 			nvme_opc_create_io_sq(sc, cmd, &compl);
 			break;
 		case NVME_OPC_DELETE_IO_CQ:
 			DPRINTF("%s command DELETE_IO_CQ", __func__);
 			nvme_opc_delete_io_cq(sc, cmd, &compl);
 			break;
 		case NVME_OPC_CREATE_IO_CQ:
 			DPRINTF("%s command CREATE_IO_CQ", __func__);
 			nvme_opc_create_io_cq(sc, cmd, &compl);
 			break;
 		case NVME_OPC_GET_LOG_PAGE:
 			DPRINTF("%s command GET_LOG_PAGE", __func__);
 			nvme_opc_get_log_page(sc, cmd, &compl);
 			break;
 		case NVME_OPC_IDENTIFY:
 			DPRINTF("%s command IDENTIFY", __func__);
 			nvme_opc_identify(sc, cmd, &compl);
 			break;
 		case NVME_OPC_ABORT:
 			DPRINTF("%s command ABORT", __func__);
 			nvme_opc_abort(sc, cmd, &compl);
 			break;
 		case NVME_OPC_SET_FEATURES:
 			DPRINTF("%s command SET_FEATURES", __func__);
 			nvme_opc_set_features(sc, cmd, &compl);
 			break;
 		case NVME_OPC_GET_FEATURES:
 			DPRINTF("%s command GET_FEATURES", __func__);
 			nvme_opc_get_features(sc, cmd, &compl);
 			break;
 		case NVME_OPC_FIRMWARE_ACTIVATE:
 			DPRINTF("%s command FIRMWARE_ACTIVATE", __func__);
 			pci_nvme_status_tc(&compl.status,
 			    NVME_SCT_COMMAND_SPECIFIC,
 			    NVME_SC_INVALID_FIRMWARE_SLOT);
 			break;
 		case NVME_OPC_ASYNC_EVENT_REQUEST:
 			DPRINTF("%s command ASYNC_EVENT_REQ", __func__);
 			nvme_opc_async_event_req(sc, cmd, &compl);
 			break;
 		case NVME_OPC_FORMAT_NVM:
 			DPRINTF("%s command FORMAT_NVM", __func__);
 			if ((sc->ctrldata.oacs &
 			    (1 << NVME_CTRLR_DATA_OACS_FORMAT_SHIFT)) == 0) {
 				pci_nvme_status_genc(&compl.status, NVME_SC_INVALID_OPCODE);
 				break;
 			}
 			nvme_opc_format_nvm(sc, cmd, &compl);
 			break;
 		case NVME_OPC_SECURITY_SEND:
 		case NVME_OPC_SECURITY_RECEIVE:
 		case NVME_OPC_SANITIZE:
 		case NVME_OPC_GET_LBA_STATUS:
 			DPRINTF("%s command OPC=%#x (unsupported)", __func__,
 			    cmd->opc);
 			/* Valid but unsupported opcodes */
 			pci_nvme_status_genc(&compl.status, NVME_SC_INVALID_FIELD);
 			break;
 		default:
 			DPRINTF("%s command OPC=%#X (not implemented)",
 			    __func__,
 			    cmd->opc);
 			pci_nvme_status_genc(&compl.status, NVME_SC_INVALID_OPCODE);
 		}
 		sqhead = (sqhead + 1) % sq->size;
 
 		if (NVME_COMPLETION_VALID(compl)) {
 			pci_nvme_cq_update(sc, &sc->compl_queues[0],
 			    compl.cdw0,
 			    cmd->cid,
 			    0,		/* SQID */
 			    compl.status);
 		}
 	}
 
 	DPRINTF("setting sqhead %u", sqhead);
 	sq->head = sqhead;
 
 	if (cq->head != cq->tail)
 		pci_generate_msix(sc->nsc_pi, 0);
 
 	pthread_mutex_unlock(&sq->mtx);
 }
 
 /*
  * Update the Write and Read statistics reported in SMART data
  *
  * NVMe defines "data unit" as thousand's of 512 byte blocks and is rounded up.
  * E.g. 1 data unit is 1 - 1,000 512 byte blocks. 3 data units are 2,001 - 3,000
- * 512 byte blocks. Rounding up is acheived by initializing the remainder to 999.
+ * 512 byte blocks. Rounding up is achieved by initializing the remainder to 999.
  */
 static void
 pci_nvme_stats_write_read_update(struct pci_nvme_softc *sc, uint8_t opc,
     size_t bytes, uint16_t status)
 {
 
 	pthread_mutex_lock(&sc->mtx);
 	switch (opc) {
 	case NVME_OPC_WRITE:
 		sc->write_commands++;
 		if (status != NVME_SC_SUCCESS)
 			break;
 		sc->write_dunits_remainder += (bytes / 512);
 		while (sc->write_dunits_remainder >= 1000) {
 			sc->write_data_units++;
 			sc->write_dunits_remainder -= 1000;
 		}
 		break;
 	case NVME_OPC_READ:
 		sc->read_commands++;
 		if (status != NVME_SC_SUCCESS)
 			break;
 		sc->read_dunits_remainder += (bytes / 512);
 		while (sc->read_dunits_remainder >= 1000) {
 			sc->read_data_units++;
 			sc->read_dunits_remainder -= 1000;
 		}
 		break;
 	default:
 		DPRINTF("%s: Invalid OPC 0x%02x for stats", __func__, opc);
 		break;
 	}
 	pthread_mutex_unlock(&sc->mtx);
 }
 
 /*
  * Check if the combination of Starting LBA (slba) and number of blocks
  * exceeds the range of the underlying storage.
  *
  * Because NVMe specifies the SLBA in blocks as a uint64_t and blockif stores
  * the capacity in bytes as a uint64_t, care must be taken to avoid integer
  * overflow.
  */
 static bool
 pci_nvme_out_of_range(struct pci_nvme_blockstore *nvstore, uint64_t slba,
     uint32_t nblocks)
 {
 	size_t	offset, bytes;
 
 	/* Overflow check of multiplying Starting LBA by the sector size */
 	if (slba >> (64 - nvstore->sectsz_bits))
 		return (true);
 
 	offset = slba << nvstore->sectsz_bits;
 	bytes = nblocks << nvstore->sectsz_bits;
 
 	/* Overflow check of Number of Logical Blocks */
 	if ((nvstore->size <= offset) || ((nvstore->size - offset) < bytes))
 		return (true);
 
 	return (false);
 }
 
 static int
 pci_nvme_append_iov_req(struct pci_nvme_softc *sc __unused,
     struct pci_nvme_ioreq *req, uint64_t gpaddr, size_t size, uint64_t offset)
 {
 	int iovidx;
 	bool range_is_contiguous;
 
 	if (req == NULL)
 		return (-1);
 
 	if (req->io_req.br_iovcnt == NVME_MAX_IOVEC) {
 		return (-1);
 	}
 
 	/*
 	 * Minimize the number of IOVs by concatenating contiguous address
 	 * ranges. If the IOV count is zero, there is no previous range to
 	 * concatenate.
 	 */
 	if (req->io_req.br_iovcnt == 0)
 		range_is_contiguous = false;
 	else
 		range_is_contiguous = (req->prev_gpaddr + req->prev_size) == gpaddr;
 
 	if (range_is_contiguous) {
 		iovidx = req->io_req.br_iovcnt - 1;
 
 		req->io_req.br_iov[iovidx].iov_base =
 		    paddr_guest2host(req->sc->nsc_pi->pi_vmctx,
 				     req->prev_gpaddr, size);
 		if (req->io_req.br_iov[iovidx].iov_base == NULL)
 			return (-1);
 
 		req->prev_size += size;
 		req->io_req.br_resid += size;
 
 		req->io_req.br_iov[iovidx].iov_len = req->prev_size;
 	} else {
 		iovidx = req->io_req.br_iovcnt;
 		if (iovidx == 0) {
 			req->io_req.br_offset = offset;
 			req->io_req.br_resid = 0;
 			req->io_req.br_param = req;
 		}
 
 		req->io_req.br_iov[iovidx].iov_base =
 		    paddr_guest2host(req->sc->nsc_pi->pi_vmctx,
 				     gpaddr, size);
 		if (req->io_req.br_iov[iovidx].iov_base == NULL)
 			return (-1);
 
 		req->io_req.br_iov[iovidx].iov_len = size;
 
 		req->prev_gpaddr = gpaddr;
 		req->prev_size = size;
 		req->io_req.br_resid += size;
 
 		req->io_req.br_iovcnt++;
 	}
 
 	return (0);
 }
 
 static void
 pci_nvme_set_completion(struct pci_nvme_softc *sc,
     struct nvme_submission_queue *sq, int sqid, uint16_t cid, uint16_t status)
 {
 	struct nvme_completion_queue *cq = &sc->compl_queues[sq->cqid];
 
 	DPRINTF("%s sqid %d cqid %u cid %u status: 0x%x 0x%x",
 		 __func__, sqid, sq->cqid, cid, NVME_STATUS_GET_SCT(status),
 		 NVME_STATUS_GET_SC(status));
 
 	pci_nvme_cq_update(sc, cq, 0, cid, sqid, status);
 
 	if (cq->head != cq->tail) {
 		if (cq->intr_en & NVME_CQ_INTEN) {
 			pci_generate_msix(sc->nsc_pi, cq->intr_vec);
 		} else {
 			DPRINTF("%s: CQ%u interrupt disabled",
 						__func__, sq->cqid);
 		}
 	}
 }
 
 static void
 pci_nvme_release_ioreq(struct pci_nvme_softc *sc, struct pci_nvme_ioreq *req)
 {
 	req->sc = NULL;
 	req->nvme_sq = NULL;
 	req->sqid = 0;
 
 	pthread_mutex_lock(&sc->mtx);
 
 	STAILQ_INSERT_TAIL(&sc->ioreqs_free, req, link);
 	sc->pending_ios--;
 
 	/* when no more IO pending, can set to ready if device reset/enabled */
 	if (sc->pending_ios == 0 &&
 	    NVME_CC_GET_EN(sc->regs.cc) && !(NVME_CSTS_GET_RDY(sc->regs.csts)))
 		sc->regs.csts |= NVME_CSTS_RDY;
 
 	pthread_mutex_unlock(&sc->mtx);
 
 	sem_post(&sc->iosemlock);
 }
 
 static struct pci_nvme_ioreq *
 pci_nvme_get_ioreq(struct pci_nvme_softc *sc)
 {
 	struct pci_nvme_ioreq *req = NULL;
 
 	sem_wait(&sc->iosemlock);
 	pthread_mutex_lock(&sc->mtx);
 
 	req = STAILQ_FIRST(&sc->ioreqs_free);
 	assert(req != NULL);
 	STAILQ_REMOVE_HEAD(&sc->ioreqs_free, link);
 
 	req->sc = sc;
 
 	sc->pending_ios++;
 
 	pthread_mutex_unlock(&sc->mtx);
 
 	req->io_req.br_iovcnt = 0;
 	req->io_req.br_offset = 0;
 	req->io_req.br_resid = 0;
 	req->io_req.br_param = req;
 	req->prev_gpaddr = 0;
 	req->prev_size = 0;
 
 	return req;
 }
 
 static void
 pci_nvme_io_done(struct blockif_req *br, int err)
 {
 	struct pci_nvme_ioreq *req = br->br_param;
 	struct nvme_submission_queue *sq = req->nvme_sq;
 	uint16_t code, status;
 
 	DPRINTF("%s error %d %s", __func__, err, strerror(err));
 
 	/* TODO return correct error */
 	code = err ? NVME_SC_DATA_TRANSFER_ERROR : NVME_SC_SUCCESS;
 	status = 0;
 	pci_nvme_status_genc(&status, code);
 
 	pci_nvme_set_completion(req->sc, sq, req->sqid, req->cid, status);
 	pci_nvme_stats_write_read_update(req->sc, req->opc,
 	    req->bytes, status);
 	pci_nvme_release_ioreq(req->sc, req);
 }
 
 /*
  * Implements the Flush command. The specification states:
  *    If a volatile write cache is not present, Flush commands complete
  *    successfully and have no effect
  * in the description of the Volatile Write Cache (VWC) field of the Identify
  * Controller data. Therefore, set status to Success if the command is
  * not supported (i.e. RAM or as indicated by the blockif).
  */
 static bool
 nvme_opc_flush(struct pci_nvme_softc *sc __unused,
     struct nvme_command *cmd __unused,
     struct pci_nvme_blockstore *nvstore,
     struct pci_nvme_ioreq *req,
     uint16_t *status)
 {
 	bool pending = false;
 
 	if (nvstore->type == NVME_STOR_RAM) {
 		pci_nvme_status_genc(status, NVME_SC_SUCCESS);
 	} else {
 		int err;
 
 		req->io_req.br_callback = pci_nvme_io_done;
 
 		err = blockif_flush(nvstore->ctx, &req->io_req);
 		switch (err) {
 		case 0:
 			pending = true;
 			break;
 		case EOPNOTSUPP:
 			pci_nvme_status_genc(status, NVME_SC_SUCCESS);
 			break;
 		default:
 			pci_nvme_status_genc(status, NVME_SC_INTERNAL_DEVICE_ERROR);
 		}
 	}
 
 	return (pending);
 }
 
 static uint16_t
 nvme_write_read_ram(struct pci_nvme_softc *sc,
     struct pci_nvme_blockstore *nvstore,
     uint64_t prp1, uint64_t prp2,
     size_t offset, uint64_t bytes,
     bool is_write)
 {
 	uint8_t *buf = nvstore->ctx;
 	enum nvme_copy_dir dir;
 	uint16_t status;
 
 	if (is_write)
 		dir = NVME_COPY_TO_PRP;
 	else
 		dir = NVME_COPY_FROM_PRP;
 
 	status = 0;
 	if (nvme_prp_memcpy(sc->nsc_pi->pi_vmctx, prp1, prp2,
 	    buf + offset, bytes, dir))
 		pci_nvme_status_genc(&status,
 		    NVME_SC_DATA_TRANSFER_ERROR);
 	else
 		pci_nvme_status_genc(&status, NVME_SC_SUCCESS);
 
 	return (status);
 }
 
 static uint16_t
 nvme_write_read_blockif(struct pci_nvme_softc *sc,
     struct pci_nvme_blockstore *nvstore,
     struct pci_nvme_ioreq *req,
     uint64_t prp1, uint64_t prp2,
     size_t offset, uint64_t bytes,
     bool is_write)
 {
 	uint64_t size;
 	int err;
 	uint16_t status = NVME_NO_STATUS;
 
 	size = MIN(PAGE_SIZE - (prp1 % PAGE_SIZE), bytes);
 	if (pci_nvme_append_iov_req(sc, req, prp1, size, offset)) {
 		err = -1;
 		goto out;
 	}
 
 	offset += size;
 	bytes  -= size;
 
 	if (bytes == 0) {
 		;
 	} else if (bytes <= PAGE_SIZE) {
 		size = bytes;
 		if (pci_nvme_append_iov_req(sc, req, prp2, size, offset)) {
 			err = -1;
 			goto out;
 		}
 	} else {
 		void *vmctx = sc->nsc_pi->pi_vmctx;
 		uint64_t *prp_list = &prp2;
 		uint64_t *last = prp_list;
 
 		/* PRP2 is pointer to a physical region page list */
 		while (bytes) {
 			/* Last entry in list points to the next list */
 			if ((prp_list == last) && (bytes > PAGE_SIZE)) {
 				uint64_t prp = *prp_list;
 
 				prp_list = paddr_guest2host(vmctx, prp,
 				    PAGE_SIZE - (prp % PAGE_SIZE));
 				if (prp_list == NULL) {
 					err = -1;
 					goto out;
 				}
 				last = prp_list + (NVME_PRP2_ITEMS - 1);
 			}
 
 			size = MIN(bytes, PAGE_SIZE);
 
 			if (pci_nvme_append_iov_req(sc, req, *prp_list, size,
 			    offset)) {
 				err = -1;
 				goto out;
 			}
 
 			offset += size;
 			bytes  -= size;
 
 			prp_list++;
 		}
 	}
 	req->io_req.br_callback = pci_nvme_io_done;
 	if (is_write)
 		err = blockif_write(nvstore->ctx, &req->io_req);
 	else
 		err = blockif_read(nvstore->ctx, &req->io_req);
 out:
 	if (err)
 		pci_nvme_status_genc(&status, NVME_SC_DATA_TRANSFER_ERROR);
 
 	return (status);
 }
 
 static bool
 nvme_opc_write_read(struct pci_nvme_softc *sc,
     struct nvme_command *cmd,
     struct pci_nvme_blockstore *nvstore,
     struct pci_nvme_ioreq *req,
     uint16_t *status)
 {
 	uint64_t lba, nblocks, bytes;
 	size_t offset;
 	bool is_write = cmd->opc == NVME_OPC_WRITE;
 	bool pending = false;
 
 	lba = ((uint64_t)cmd->cdw11 << 32) | cmd->cdw10;
 	nblocks = (cmd->cdw12 & 0xFFFF) + 1;
 	bytes = nblocks << nvstore->sectsz_bits;
 	if (bytes > NVME_MAX_DATA_SIZE) {
 		WPRINTF("%s command would exceed MDTS", __func__);
 		pci_nvme_status_genc(status, NVME_SC_INVALID_FIELD);
 		goto out;
 	}
 
 	if (pci_nvme_out_of_range(nvstore, lba, nblocks)) {
 		WPRINTF("%s command would exceed LBA range(slba=%#lx nblocks=%#lx)",
 		    __func__, lba, nblocks);
 		pci_nvme_status_genc(status, NVME_SC_LBA_OUT_OF_RANGE);
 		goto out;
 	}
 
 	offset = lba << nvstore->sectsz_bits;
 
 	req->bytes = bytes;
 	req->io_req.br_offset = lba;
 
 	/* PRP bits 1:0 must be zero */
 	cmd->prp1 &= ~0x3UL;
 	cmd->prp2 &= ~0x3UL;
 
 	if (nvstore->type == NVME_STOR_RAM) {
 		*status = nvme_write_read_ram(sc, nvstore, cmd->prp1,
 		    cmd->prp2, offset, bytes, is_write);
 	} else {
 		*status = nvme_write_read_blockif(sc, nvstore, req,
 		    cmd->prp1, cmd->prp2, offset, bytes, is_write);
 
 		if (*status == NVME_NO_STATUS)
 			pending = true;
 	}
 out:
 	if (!pending)
 		pci_nvme_stats_write_read_update(sc, cmd->opc, bytes, *status);
 
 	return (pending);
 }
 
 static void
 pci_nvme_dealloc_sm(struct blockif_req *br, int err)
 {
 	struct pci_nvme_ioreq *req = br->br_param;
 	struct pci_nvme_softc *sc = req->sc;
 	bool done = true;
 	uint16_t status;
 
 	status = 0;
 	if (err) {
 		pci_nvme_status_genc(&status, NVME_SC_INTERNAL_DEVICE_ERROR);
 	} else if ((req->prev_gpaddr + 1) == (req->prev_size)) {
 		pci_nvme_status_genc(&status, NVME_SC_SUCCESS);
 	} else {
 		struct iovec *iov = req->io_req.br_iov;
 
 		req->prev_gpaddr++;
 		iov += req->prev_gpaddr;
 
 		/* The iov_* values already include the sector size */
 		req->io_req.br_offset = (off_t)iov->iov_base;
 		req->io_req.br_resid = iov->iov_len;
 		if (blockif_delete(sc->nvstore.ctx, &req->io_req)) {
 			pci_nvme_status_genc(&status,
 			    NVME_SC_INTERNAL_DEVICE_ERROR);
 		} else
 			done = false;
 	}
 
 	if (done) {
 		pci_nvme_set_completion(sc, req->nvme_sq, req->sqid, req->cid,
 		    status);
 		pci_nvme_release_ioreq(sc, req);
 	}
 }
 
 static bool
 nvme_opc_dataset_mgmt(struct pci_nvme_softc *sc,
     struct nvme_command *cmd,
     struct pci_nvme_blockstore *nvstore,
     struct pci_nvme_ioreq *req,
     uint16_t *status)
 {
 	struct nvme_dsm_range *range = NULL;
 	uint32_t nr, r, non_zero, dr;
 	int err;
 	bool pending = false;
 
 	if ((sc->ctrldata.oncs & NVME_ONCS_DSM) == 0) {
 		pci_nvme_status_genc(status, NVME_SC_INVALID_OPCODE);
 		goto out;
 	}
 
 	nr = cmd->cdw10 & 0xff;
 
 	/* copy locally because a range entry could straddle PRPs */
 	range = calloc(1, NVME_MAX_DSM_TRIM);
 	if (range == NULL) {
 		pci_nvme_status_genc(status, NVME_SC_INTERNAL_DEVICE_ERROR);
 		goto out;
 	}
 	nvme_prp_memcpy(sc->nsc_pi->pi_vmctx, cmd->prp1, cmd->prp2,
 	    (uint8_t *)range, NVME_MAX_DSM_TRIM, NVME_COPY_FROM_PRP);
 
 	/* Check for invalid ranges and the number of non-zero lengths */
 	non_zero = 0;
 	for (r = 0; r <= nr; r++) {
 		if (pci_nvme_out_of_range(nvstore,
 		    range[r].starting_lba, range[r].length)) {
 			pci_nvme_status_genc(status, NVME_SC_LBA_OUT_OF_RANGE);
 			goto out;
 		}
 		if (range[r].length != 0)
 			non_zero++;
 	}
 
 	if (cmd->cdw11 & NVME_DSM_ATTR_DEALLOCATE) {
 		size_t offset, bytes;
 		int sectsz_bits = sc->nvstore.sectsz_bits;
 
 		/*
 		 * DSM calls are advisory only, and compliant controllers
 		 * may choose to take no actions (i.e. return Success).
 		 */
 		if (!nvstore->deallocate) {
 			pci_nvme_status_genc(status, NVME_SC_SUCCESS);
 			goto out;
 		}
 
 		/* If all ranges have a zero length, return Success */
 		if (non_zero == 0) {
 			pci_nvme_status_genc(status, NVME_SC_SUCCESS);
 			goto out;
 		}
 
 		if (req == NULL) {
 			pci_nvme_status_genc(status, NVME_SC_INTERNAL_DEVICE_ERROR);
 			goto out;
 		}
 
 		offset = range[0].starting_lba << sectsz_bits;
 		bytes = range[0].length << sectsz_bits;
 
 		/*
 		 * If the request is for more than a single range, store
 		 * the ranges in the br_iov. Optimize for the common case
 		 * of a single range.
 		 *
 		 * Note that NVMe Number of Ranges is a zero based value
 		 */
 		req->io_req.br_iovcnt = 0;
 		req->io_req.br_offset = offset;
 		req->io_req.br_resid = bytes;
 
 		if (nr == 0) {
 			req->io_req.br_callback = pci_nvme_io_done;
 		} else {
 			struct iovec *iov = req->io_req.br_iov;
 
 			for (r = 0, dr = 0; r <= nr; r++) {
 				offset = range[r].starting_lba << sectsz_bits;
 				bytes = range[r].length << sectsz_bits;
 				if (bytes == 0)
 					continue;
 
 				if ((nvstore->size - offset) < bytes) {
 					pci_nvme_status_genc(status,
 					    NVME_SC_LBA_OUT_OF_RANGE);
 					goto out;
 				}
 				iov[dr].iov_base = (void *)offset;
 				iov[dr].iov_len = bytes;
 				dr++;
 			}
 			req->io_req.br_callback = pci_nvme_dealloc_sm;
 
 			/*
 			 * Use prev_gpaddr to track the current entry and
 			 * prev_size to track the number of entries
 			 */
 			req->prev_gpaddr = 0;
 			req->prev_size = dr;
 		}
 
 		err = blockif_delete(nvstore->ctx, &req->io_req);
 		if (err)
 			pci_nvme_status_genc(status, NVME_SC_INTERNAL_DEVICE_ERROR);
 		else
 			pending = true;
 	}
 out:
 	free(range);
 	return (pending);
 }
 
 static void
 pci_nvme_handle_io_cmd(struct pci_nvme_softc* sc, uint16_t idx)
 {
 	struct nvme_submission_queue *sq;
 	uint16_t status;
 	uint16_t sqhead;
 
 	/* handle all submissions up to sq->tail index */
 	sq = &sc->submit_queues[idx];
 
 	pthread_mutex_lock(&sq->mtx);
 
 	sqhead = sq->head;
 	DPRINTF("nvme_handle_io qid %u head %u tail %u cmdlist %p",
 	         idx, sqhead, sq->tail, sq->qbase);
 
 	while (sqhead != atomic_load_acq_short(&sq->tail)) {
 		struct nvme_command *cmd;
 		struct pci_nvme_ioreq *req;
 		uint32_t nsid;
 		bool pending;
 
 		pending = false;
 		req = NULL;
 		status = 0;
 
 		cmd = &sq->qbase[sqhead];
 		sqhead = (sqhead + 1) % sq->size;
 
 		nsid = le32toh(cmd->nsid);
 		if ((nsid == 0) || (nsid > sc->ctrldata.nn)) {
 			pci_nvme_status_genc(&status,
 			    NVME_SC_INVALID_NAMESPACE_OR_FORMAT);
 			status |=
 			    NVME_STATUS_DNR_MASK << NVME_STATUS_DNR_SHIFT;
 			goto complete;
  		}
 
 		req = pci_nvme_get_ioreq(sc);
 		if (req == NULL) {
 			pci_nvme_status_genc(&status,
 			    NVME_SC_INTERNAL_DEVICE_ERROR);
 			WPRINTF("%s: unable to allocate IO req", __func__);
 			goto complete;
 		}
 		req->nvme_sq = sq;
 		req->sqid = idx;
 		req->opc = cmd->opc;
 		req->cid = cmd->cid;
 		req->nsid = cmd->nsid;
 
 		switch (cmd->opc) {
 		case NVME_OPC_FLUSH:
 			pending = nvme_opc_flush(sc, cmd, &sc->nvstore,
 			    req, &status);
  			break;
 		case NVME_OPC_WRITE:
 		case NVME_OPC_READ:
 			pending = nvme_opc_write_read(sc, cmd, &sc->nvstore,
 			    req, &status);
 			break;
 		case NVME_OPC_WRITE_ZEROES:
 			/* TODO: write zeroes
 			WPRINTF("%s write zeroes lba 0x%lx blocks %u",
 			        __func__, lba, cmd->cdw12 & 0xFFFF); */
 			pci_nvme_status_genc(&status, NVME_SC_SUCCESS);
 			break;
 		case NVME_OPC_DATASET_MANAGEMENT:
  			pending = nvme_opc_dataset_mgmt(sc, cmd, &sc->nvstore,
 			    req, &status);
 			break;
  		default:
  			WPRINTF("%s unhandled io command 0x%x",
 			    __func__, cmd->opc);
 			pci_nvme_status_genc(&status, NVME_SC_INVALID_OPCODE);
 		}
 complete:
 		if (!pending) {
 			pci_nvme_set_completion(sc, sq, idx, cmd->cid, status);
 			if (req != NULL)
 				pci_nvme_release_ioreq(sc, req);
 		}
 	}
 
 	sq->head = sqhead;
 
 	pthread_mutex_unlock(&sq->mtx);
 }
 
 static void
 pci_nvme_handle_doorbell(struct pci_nvme_softc* sc,
 	uint64_t idx, int is_sq, uint64_t value)
 {
 	DPRINTF("nvme doorbell %lu, %s, val 0x%lx",
 	        idx, is_sq ? "SQ" : "CQ", value & 0xFFFF);
 
 	if (is_sq) {
 		if (idx > sc->num_squeues) {
 			WPRINTF("%s queue index %lu overflow from "
 			         "guest (max %u)",
 			         __func__, idx, sc->num_squeues);
 			return;
 		}
 
 		atomic_store_short(&sc->submit_queues[idx].tail,
 		                   (uint16_t)value);
 
 		if (idx == 0) {
 			pci_nvme_handle_admin_cmd(sc, value);
 		} else {
 			/* submission queue; handle new entries in SQ */
 			if (idx > sc->num_squeues) {
 				WPRINTF("%s SQ index %lu overflow from "
 				         "guest (max %u)",
 				         __func__, idx, sc->num_squeues);
 				return;
 			}
 			pci_nvme_handle_io_cmd(sc, (uint16_t)idx);
 		}
 	} else {
 		if (idx > sc->num_cqueues) {
 			WPRINTF("%s queue index %lu overflow from "
 			         "guest (max %u)",
 			         __func__, idx, sc->num_cqueues);
 			return;
 		}
 
 		atomic_store_short(&sc->compl_queues[idx].head,
 				(uint16_t)value);
 	}
 }
 
 static void
 pci_nvme_bar0_reg_dumps(const char *func, uint64_t offset, int iswrite)
 {
 	const char *s = iswrite ? "WRITE" : "READ";
 
 	switch (offset) {
 	case NVME_CR_CAP_LOW:
 		DPRINTF("%s %s NVME_CR_CAP_LOW", func, s);
 		break;
 	case NVME_CR_CAP_HI:
 		DPRINTF("%s %s NVME_CR_CAP_HI", func, s);
 		break;
 	case NVME_CR_VS:
 		DPRINTF("%s %s NVME_CR_VS", func, s);
 		break;
 	case NVME_CR_INTMS:
 		DPRINTF("%s %s NVME_CR_INTMS", func, s);
 		break;
 	case NVME_CR_INTMC:
 		DPRINTF("%s %s NVME_CR_INTMC", func, s);
 		break;
 	case NVME_CR_CC:
 		DPRINTF("%s %s NVME_CR_CC", func, s);
 		break;
 	case NVME_CR_CSTS:
 		DPRINTF("%s %s NVME_CR_CSTS", func, s);
 		break;
 	case NVME_CR_NSSR:
 		DPRINTF("%s %s NVME_CR_NSSR", func, s);
 		break;
 	case NVME_CR_AQA:
 		DPRINTF("%s %s NVME_CR_AQA", func, s);
 		break;
 	case NVME_CR_ASQ_LOW:
 		DPRINTF("%s %s NVME_CR_ASQ_LOW", func, s);
 		break;
 	case NVME_CR_ASQ_HI:
 		DPRINTF("%s %s NVME_CR_ASQ_HI", func, s);
 		break;
 	case NVME_CR_ACQ_LOW:
 		DPRINTF("%s %s NVME_CR_ACQ_LOW", func, s);
 		break;
 	case NVME_CR_ACQ_HI:
 		DPRINTF("%s %s NVME_CR_ACQ_HI", func, s);
 		break;
 	default:
 		DPRINTF("unknown nvme bar-0 offset 0x%lx", offset);
 	}
 
 }
 
 static void
 pci_nvme_write_bar_0(struct pci_nvme_softc *sc, uint64_t offset, int size,
     uint64_t value)
 {
 	uint32_t ccreg;
 
 	if (offset >= NVME_DOORBELL_OFFSET) {
 		uint64_t belloffset = offset - NVME_DOORBELL_OFFSET;
 		uint64_t idx = belloffset / 8; /* door bell size = 2*int */
 		int is_sq = (belloffset % 8) < 4;
 
 		if ((sc->regs.csts & NVME_CSTS_RDY) == 0) {
 			WPRINTF("doorbell write prior to RDY (offset=%#lx)\n",
 			    offset);
 			return;
 		}
 
 		if (belloffset > ((sc->max_queues+1) * 8 - 4)) {
 			WPRINTF("guest attempted an overflow write offset "
 			         "0x%lx, val 0x%lx in %s",
 			         offset, value, __func__);
 			return;
 		}
 
 		if (is_sq) {
 			if (sc->submit_queues[idx].qbase == NULL)
 				return;
 		} else if (sc->compl_queues[idx].qbase == NULL)
 			return;
 
 		pci_nvme_handle_doorbell(sc, idx, is_sq, value);
 		return;
 	}
 
 	DPRINTF("nvme-write offset 0x%lx, size %d, value 0x%lx",
 	        offset, size, value);
 
 	if (size != 4) {
 		WPRINTF("guest wrote invalid size %d (offset 0x%lx, "
 		         "val 0x%lx) to bar0 in %s",
 		         size, offset, value, __func__);
 		/* TODO: shutdown device */
 		return;
 	}
 
 	pci_nvme_bar0_reg_dumps(__func__, offset, 1);
 
 	pthread_mutex_lock(&sc->mtx);
 
 	switch (offset) {
 	case NVME_CR_CAP_LOW:
 	case NVME_CR_CAP_HI:
 		/* readonly */
 		break;
 	case NVME_CR_VS:
 		/* readonly */
 		break;
 	case NVME_CR_INTMS:
 		/* MSI-X, so ignore */
 		break;
 	case NVME_CR_INTMC:
 		/* MSI-X, so ignore */
 		break;
 	case NVME_CR_CC:
 		ccreg = (uint32_t)value;
 
 		DPRINTF("%s NVME_CR_CC en %x css %x shn %x iosqes %u "
 		         "iocqes %u",
 		        __func__,
 			 NVME_CC_GET_EN(ccreg), NVME_CC_GET_CSS(ccreg),
 			 NVME_CC_GET_SHN(ccreg), NVME_CC_GET_IOSQES(ccreg),
 			 NVME_CC_GET_IOCQES(ccreg));
 
 		if (NVME_CC_GET_SHN(ccreg)) {
 			/* perform shutdown - flush out data to backend */
 			sc->regs.csts &= ~(NVME_CSTS_REG_SHST_MASK <<
 			    NVME_CSTS_REG_SHST_SHIFT);
 			sc->regs.csts |= NVME_SHST_COMPLETE <<
 			    NVME_CSTS_REG_SHST_SHIFT;
 		}
 		if (NVME_CC_GET_EN(ccreg) != NVME_CC_GET_EN(sc->regs.cc)) {
 			if (NVME_CC_GET_EN(ccreg) == 0)
 				/* transition 1-> causes controller reset */
 				pci_nvme_reset_locked(sc);
 			else
 				pci_nvme_init_controller(sc);
 		}
 
 		/* Insert the iocqes, iosqes and en bits from the write */
 		sc->regs.cc &= ~NVME_CC_WRITE_MASK;
 		sc->regs.cc |= ccreg & NVME_CC_WRITE_MASK;
 		if (NVME_CC_GET_EN(ccreg) == 0) {
 			/* Insert the ams, mps and css bit fields */
 			sc->regs.cc &= ~NVME_CC_NEN_WRITE_MASK;
 			sc->regs.cc |= ccreg & NVME_CC_NEN_WRITE_MASK;
 			sc->regs.csts &= ~NVME_CSTS_RDY;
 		} else if ((sc->pending_ios == 0) &&
 		    !(sc->regs.csts & NVME_CSTS_CFS)) {
 			sc->regs.csts |= NVME_CSTS_RDY;
 		}
 		break;
 	case NVME_CR_CSTS:
 		break;
 	case NVME_CR_NSSR:
 		/* ignore writes; don't support subsystem reset */
 		break;
 	case NVME_CR_AQA:
 		sc->regs.aqa = (uint32_t)value;
 		break;
 	case NVME_CR_ASQ_LOW:
 		sc->regs.asq = (sc->regs.asq & (0xFFFFFFFF00000000)) |
 		               (0xFFFFF000 & value);
 		break;
 	case NVME_CR_ASQ_HI:
 		sc->regs.asq = (sc->regs.asq & (0x00000000FFFFFFFF)) |
 		               (value << 32);
 		break;
 	case NVME_CR_ACQ_LOW:
 		sc->regs.acq = (sc->regs.acq & (0xFFFFFFFF00000000)) |
 		               (0xFFFFF000 & value);
 		break;
 	case NVME_CR_ACQ_HI:
 		sc->regs.acq = (sc->regs.acq & (0x00000000FFFFFFFF)) |
 		               (value << 32);
 		break;
 	default:
 		DPRINTF("%s unknown offset 0x%lx, value 0x%lx size %d",
 		         __func__, offset, value, size);
 	}
 	pthread_mutex_unlock(&sc->mtx);
 }
 
 static void
 pci_nvme_write(struct pci_devinst *pi, int baridx, uint64_t offset, int size,
     uint64_t value)
 {
 	struct pci_nvme_softc* sc = pi->pi_arg;
 
 	if (baridx == pci_msix_table_bar(pi) ||
 	    baridx == pci_msix_pba_bar(pi)) {
 		DPRINTF("nvme-write baridx %d, msix: off 0x%lx, size %d, "
 		         " value 0x%lx", baridx, offset, size, value);
 
 		pci_emul_msix_twrite(pi, offset, size, value);
 		return;
 	}
 
 	switch (baridx) {
 	case 0:
 		pci_nvme_write_bar_0(sc, offset, size, value);
 		break;
 
 	default:
 		DPRINTF("%s unknown baridx %d, val 0x%lx",
 		         __func__, baridx, value);
 	}
 }
 
 static uint64_t pci_nvme_read_bar_0(struct pci_nvme_softc* sc,
 	uint64_t offset, int size)
 {
 	uint64_t value;
 
 	pci_nvme_bar0_reg_dumps(__func__, offset, 0);
 
 	if (offset < NVME_DOORBELL_OFFSET) {
 		void *p = &(sc->regs);
 		pthread_mutex_lock(&sc->mtx);
 		memcpy(&value, (void *)((uintptr_t)p + offset), size);
 		pthread_mutex_unlock(&sc->mtx);
 	} else {
 		value = 0;
                 WPRINTF("pci_nvme: read invalid offset %ld", offset);
 	}
 
 	switch (size) {
 	case 1:
 		value &= 0xFF;
 		break;
 	case 2:
 		value &= 0xFFFF;
 		break;
 	case 4:
 		value &= 0xFFFFFFFF;
 		break;
 	}
 
 	DPRINTF("   nvme-read offset 0x%lx, size %d -> value 0x%x",
 	         offset, size, (uint32_t)value);
 
 	return (value);
 }
 
 
 
 static uint64_t
 pci_nvme_read(struct pci_devinst *pi, int baridx, uint64_t offset, int size)
 {
 	struct pci_nvme_softc* sc = pi->pi_arg;
 
 	if (baridx == pci_msix_table_bar(pi) ||
 	    baridx == pci_msix_pba_bar(pi)) {
 		DPRINTF("nvme-read bar: %d, msix: regoff 0x%lx, size %d",
 		        baridx, offset, size);
 
 		return pci_emul_msix_tread(pi, offset, size);
 	}
 
 	switch (baridx) {
 	case 0:
        		return pci_nvme_read_bar_0(sc, offset, size);
 
 	default:
 		DPRINTF("unknown bar %d, 0x%lx", baridx, offset);
 	}
 
 	return (0);
 }
 
 static int
 pci_nvme_parse_config(struct pci_nvme_softc *sc, nvlist_t *nvl)
 {
 	char bident[sizeof("XXX:XXX")];
 	const char *value;
 	uint32_t sectsz;
 
 	sc->max_queues = NVME_QUEUES;
 	sc->max_qentries = NVME_MAX_QENTRIES;
 	sc->ioslots = NVME_IOSLOTS;
 	sc->num_squeues = sc->max_queues;
 	sc->num_cqueues = sc->max_queues;
 	sc->dataset_management = NVME_DATASET_MANAGEMENT_AUTO;
 	sectsz = 0;
 	snprintf(sc->ctrldata.sn, sizeof(sc->ctrldata.sn),
 	         "NVME-%d-%d", sc->nsc_pi->pi_slot, sc->nsc_pi->pi_func);
 
 	value = get_config_value_node(nvl, "maxq");
 	if (value != NULL)
 		sc->max_queues = atoi(value);
 	value = get_config_value_node(nvl, "qsz");
 	if (value != NULL) {
 		sc->max_qentries = atoi(value);
 		if (sc->max_qentries <= 0) {
 			EPRINTLN("nvme: Invalid qsz option %d",
 			    sc->max_qentries);
 			return (-1);
 		}
 	}
 	value = get_config_value_node(nvl, "ioslots");
 	if (value != NULL) {
 		sc->ioslots = atoi(value);
 		if (sc->ioslots <= 0) {
 			EPRINTLN("Invalid ioslots option %d", sc->ioslots);
 			return (-1);
 		}
 	}
 	value = get_config_value_node(nvl, "sectsz");
 	if (value != NULL)
 		sectsz = atoi(value);
 	value = get_config_value_node(nvl, "ser");
 	if (value != NULL) {
 		/*
 		 * This field indicates the Product Serial Number in
 		 * 7-bit ASCII, unused bytes should be space characters.
 		 * Ref: NVMe v1.3c.
 		 */
 		cpywithpad((char *)sc->ctrldata.sn,
 		    sizeof(sc->ctrldata.sn), value, ' ');
 	}
 	value = get_config_value_node(nvl, "eui64");
 	if (value != NULL)
 		sc->nvstore.eui64 = htobe64(strtoull(value, NULL, 0));
 	value = get_config_value_node(nvl, "dsm");
 	if (value != NULL) {
 		if (strcmp(value, "auto") == 0)
 			sc->dataset_management = NVME_DATASET_MANAGEMENT_AUTO;
 		else if (strcmp(value, "enable") == 0)
 			sc->dataset_management = NVME_DATASET_MANAGEMENT_ENABLE;
 		else if (strcmp(value, "disable") == 0)
 			sc->dataset_management = NVME_DATASET_MANAGEMENT_DISABLE;
 	}
 
 	value = get_config_value_node(nvl, "ram");
 	if (value != NULL) {
 		uint64_t sz = strtoull(value, NULL, 10);
 
 		sc->nvstore.type = NVME_STOR_RAM;
 		sc->nvstore.size = sz * 1024 * 1024;
 		sc->nvstore.ctx = calloc(1, sc->nvstore.size);
 		sc->nvstore.sectsz = 4096;
 		sc->nvstore.sectsz_bits = 12;
 		if (sc->nvstore.ctx == NULL) {
 			EPRINTLN("nvme: Unable to allocate RAM");
 			return (-1);
 		}
 	} else {
 		snprintf(bident, sizeof(bident), "%u:%u",
 		    sc->nsc_pi->pi_slot, sc->nsc_pi->pi_func);
 		sc->nvstore.ctx = blockif_open(nvl, bident);
 		if (sc->nvstore.ctx == NULL) {
 			EPRINTLN("nvme: Could not open backing file: %s",
 			    strerror(errno));
 			return (-1);
 		}
 		sc->nvstore.type = NVME_STOR_BLOCKIF;
 		sc->nvstore.size = blockif_size(sc->nvstore.ctx);
 	}
 
 	if (sectsz == 512 || sectsz == 4096 || sectsz == 8192)
 		sc->nvstore.sectsz = sectsz;
 	else if (sc->nvstore.type != NVME_STOR_RAM)
 		sc->nvstore.sectsz = blockif_sectsz(sc->nvstore.ctx);
 	for (sc->nvstore.sectsz_bits = 9;
 	     (1U << sc->nvstore.sectsz_bits) < sc->nvstore.sectsz;
 	     sc->nvstore.sectsz_bits++);
 
 	if (sc->max_queues <= 0 || sc->max_queues > NVME_QUEUES)
 		sc->max_queues = NVME_QUEUES;
 
 	return (0);
 }
 
 static void
 pci_nvme_resized(struct blockif_ctxt *bctxt __unused, void *arg,
     size_t new_size)
 {
 	struct pci_nvme_softc *sc;
 	struct pci_nvme_blockstore *nvstore;
 	struct nvme_namespace_data *nd;
 
 	sc = arg;
 	nvstore = &sc->nvstore;
 	nd = &sc->nsdata;
 
 	nvstore->size = new_size;
 	pci_nvme_init_nsdata_size(nvstore, nd);
 
 	/* Add changed NSID to list */
 	sc->ns_log.ns[0] = 1;
 	sc->ns_log.ns[1] = 0;
 
 	pci_nvme_aen_post(sc, PCI_NVME_AE_TYPE_NOTICE,
 	    PCI_NVME_AEI_NOTICE_NS_ATTR_CHANGED);
 }
 
 static int
 pci_nvme_init(struct pci_devinst *pi, nvlist_t *nvl)
 {
 	struct pci_nvme_softc *sc;
 	uint32_t pci_membar_sz;
 	int	error;
 
 	error = 0;
 
 	sc = calloc(1, sizeof(struct pci_nvme_softc));
 	pi->pi_arg = sc;
 	sc->nsc_pi = pi;
 
 	error = pci_nvme_parse_config(sc, nvl);
 	if (error < 0)
 		goto done;
 	else
 		error = 0;
 
 	STAILQ_INIT(&sc->ioreqs_free);
 	sc->ioreqs = calloc(sc->ioslots, sizeof(struct pci_nvme_ioreq));
 	for (uint32_t i = 0; i < sc->ioslots; i++) {
 		STAILQ_INSERT_TAIL(&sc->ioreqs_free, &sc->ioreqs[i], link);
 	}
 
 	pci_set_cfgdata16(pi, PCIR_DEVICE, 0x0A0A);
 	pci_set_cfgdata16(pi, PCIR_VENDOR, 0xFB5D);
 	pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_STORAGE);
 	pci_set_cfgdata8(pi, PCIR_SUBCLASS, PCIS_STORAGE_NVM);
 	pci_set_cfgdata8(pi, PCIR_PROGIF,
 	                 PCIP_STORAGE_NVM_ENTERPRISE_NVMHCI_1_0);
 
 	/*
 	 * Allocate size of NVMe registers + doorbell space for all queues.
 	 *
 	 * The specification requires a minimum memory I/O window size of 16K.
 	 * The Windows driver will refuse to start a device with a smaller
 	 * window.
 	 */
 	pci_membar_sz = sizeof(struct nvme_registers) +
 	    2 * sizeof(uint32_t) * (sc->max_queues + 1);
 	pci_membar_sz = MAX(pci_membar_sz, NVME_MMIO_SPACE_MIN);
 
 	DPRINTF("nvme membar size: %u", pci_membar_sz);
 
 	error = pci_emul_alloc_bar(pi, 0, PCIBAR_MEM64, pci_membar_sz);
 	if (error) {
 		WPRINTF("%s pci alloc mem bar failed", __func__);
 		goto done;
 	}
 
 	error = pci_emul_add_msixcap(pi, sc->max_queues + 1, NVME_MSIX_BAR);
 	if (error) {
 		WPRINTF("%s pci add msixcap failed", __func__);
 		goto done;
 	}
 
 	error = pci_emul_add_pciecap(pi, PCIEM_TYPE_ROOT_INT_EP);
 	if (error) {
 		WPRINTF("%s pci add Express capability failed", __func__);
 		goto done;
 	}
 
 	pthread_mutex_init(&sc->mtx, NULL);
 	sem_init(&sc->iosemlock, 0, sc->ioslots);
 	blockif_register_resize_callback(sc->nvstore.ctx, pci_nvme_resized, sc);
 
 	pci_nvme_init_queues(sc, sc->max_queues, sc->max_queues);
 	/*
 	 * Controller data depends on Namespace data so initialize Namespace
 	 * data first.
 	 */
 	pci_nvme_init_nsdata(sc, &sc->nsdata, 1, &sc->nvstore);
 	pci_nvme_init_ctrldata(sc);
 	pci_nvme_init_logpages(sc);
 	pci_nvme_init_features(sc);
 
 	pci_nvme_aer_init(sc);
 	pci_nvme_aen_init(sc);
 
 	pci_nvme_reset(sc);
 
 	pci_lintr_request(pi);
 
 done:
 	return (error);
 }
 
 static int
 pci_nvme_legacy_config(nvlist_t *nvl, const char *opts)
 {
 	char *cp, *ram;
 
 	if (opts == NULL)
 		return (0);
 
 	if (strncmp(opts, "ram=", 4) == 0) {
 		cp = strchr(opts, ',');
 		if (cp == NULL) {
 			set_config_value_node(nvl, "ram", opts + 4);
 			return (0);
 		}
 		ram = strndup(opts + 4, cp - opts - 4);
 		set_config_value_node(nvl, "ram", ram);
 		free(ram);
 		return (pci_parse_legacy_config(nvl, cp + 1));
 	} else
 		return (blockif_legacy_config(nvl, opts));
 }
 
 static const struct pci_devemu pci_de_nvme = {
 	.pe_emu =	"nvme",
 	.pe_init =	pci_nvme_init,
 	.pe_legacy_config = pci_nvme_legacy_config,
 	.pe_barwrite =	pci_nvme_write,
 	.pe_barread =	pci_nvme_read
 };
 PCI_EMUL_SET(pci_de_nvme);
diff --git a/usr.sbin/bhyve/pci_virtio_net.c b/usr.sbin/bhyve/pci_virtio_net.c
index 9af5854037fe..dd60c5a89523 100644
--- a/usr.sbin/bhyve/pci_virtio_net.c
+++ b/usr.sbin/bhyve/pci_virtio_net.c
@@ -1,820 +1,820 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause
  *
  * Copyright (c) 2011 NetApp, Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/linker_set.h>
 #include <sys/select.h>
 #include <sys/uio.h>
 #include <sys/ioctl.h>
 #include <machine/vmm_snapshot.h>
 #include <net/ethernet.h>
 #include <net/if.h> /* IFNAMSIZ */
 
 #include <err.h>
 #include <errno.h>
 #include <fcntl.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <stdint.h>
 #include <string.h>
 #include <strings.h>
 #include <unistd.h>
 #include <assert.h>
 #include <pthread.h>
 #include <pthread_np.h>
 
 #include "bhyverun.h"
 #include "config.h"
 #include "debug.h"
 #include "pci_emul.h"
 #include "mevent.h"
 #include "virtio.h"
 #include "net_utils.h"
 #include "net_backends.h"
 #include "iov.h"
 
 #define VTNET_RINGSZ	1024
 
 #define VTNET_MAXSEGS	256
 
 #define VTNET_MAX_PKT_LEN	(65536 + 64)
 
 #define VTNET_MIN_MTU	ETHERMIN
 #define VTNET_MAX_MTU	65535
 
 #define VTNET_S_HOSTCAPS      \
   ( VIRTIO_NET_F_MAC | VIRTIO_NET_F_STATUS | \
     VIRTIO_F_NOTIFY_ON_EMPTY | VIRTIO_RING_F_INDIRECT_DESC)
 
 /*
  * PCI config-space "registers"
  */
 struct virtio_net_config {
 	uint8_t  mac[6];
 	uint16_t status;
 	uint16_t max_virtqueue_pairs;
 	uint16_t mtu;
 } __packed;
 
 /*
  * Queue definitions.
  */
 #define VTNET_RXQ	0
 #define VTNET_TXQ	1
 #define VTNET_CTLQ	2	/* NB: not yet supported */
 
 #define VTNET_MAXQ	3
 
 /*
  * Debug printf
  */
 static int pci_vtnet_debug;
 #define DPRINTF(params) if (pci_vtnet_debug) PRINTLN params
 #define WPRINTF(params) PRINTLN params
 
 /*
  * Per-device softc
  */
 struct pci_vtnet_softc {
 	struct virtio_softc vsc_vs;
 	struct vqueue_info vsc_queues[VTNET_MAXQ - 1];
 	pthread_mutex_t vsc_mtx;
 
 	net_backend_t	*vsc_be;
 
 	bool    features_negotiated;	/* protected by rx_mtx */
 
 	int		resetting;	/* protected by tx_mtx */
 
 	uint64_t	vsc_features;	/* negotiated features */
 
 	pthread_mutex_t	rx_mtx;
 	int		rx_merge;	/* merged rx bufs in use */
 
 	pthread_t 	tx_tid;
 	pthread_mutex_t	tx_mtx;
 	pthread_cond_t	tx_cond;
 	int		tx_in_progress;
 
 	size_t		vhdrlen;
 	size_t		be_vhdrlen;
 
 	struct virtio_net_config vsc_config;
 	struct virtio_consts vsc_consts;
 };
 
 static void pci_vtnet_reset(void *);
 /* static void pci_vtnet_notify(void *, struct vqueue_info *); */
 static int pci_vtnet_cfgread(void *, int, int, uint32_t *);
 static int pci_vtnet_cfgwrite(void *, int, int, uint32_t);
 static void pci_vtnet_neg_features(void *, uint64_t);
 #ifdef BHYVE_SNAPSHOT
 static void pci_vtnet_pause(void *);
 static void pci_vtnet_resume(void *);
 static int pci_vtnet_snapshot(void *, struct vm_snapshot_meta *);
 #endif
 
 static struct virtio_consts vtnet_vi_consts = {
 	.vc_name =	"vtnet",
 	.vc_nvq =	VTNET_MAXQ - 1,
 	.vc_cfgsize =	sizeof(struct virtio_net_config),
 	.vc_reset =	pci_vtnet_reset,
 	.vc_cfgread =	pci_vtnet_cfgread,
 	.vc_cfgwrite =	pci_vtnet_cfgwrite,
 	.vc_apply_features = pci_vtnet_neg_features,
 	.vc_hv_caps =	VTNET_S_HOSTCAPS,
 #ifdef BHYVE_SNAPSHOT
 	.vc_pause =	pci_vtnet_pause,
 	.vc_resume =	pci_vtnet_resume,
 	.vc_snapshot =	pci_vtnet_snapshot,
 #endif
 };
 
 static void
 pci_vtnet_reset(void *vsc)
 {
 	struct pci_vtnet_softc *sc = vsc;
 
 	DPRINTF(("vtnet: device reset requested !"));
 
 	/* Acquire the RX lock to block RX processing. */
 	pthread_mutex_lock(&sc->rx_mtx);
 
 	/*
 	 * Make sure receive operation is disabled at least until we
 	 * re-negotiate the features, since receive operation depends
 	 * on the value of sc->rx_merge and the header length, which
 	 * are both set in pci_vtnet_neg_features().
 	 * Receive operation will be enabled again once the guest adds
 	 * the first receive buffers and kicks us.
 	 */
 	sc->features_negotiated = false;
 	netbe_rx_disable(sc->vsc_be);
 
 	/* Set sc->resetting and give a chance to the TX thread to stop. */
 	pthread_mutex_lock(&sc->tx_mtx);
 	sc->resetting = 1;
 	while (sc->tx_in_progress) {
 		pthread_mutex_unlock(&sc->tx_mtx);
 		usleep(10000);
 		pthread_mutex_lock(&sc->tx_mtx);
 	}
 
 	/*
 	 * Now reset rings, MSI-X vectors, and negotiated capabilities.
 	 * Do that with the TX lock held, since we need to reset
 	 * sc->resetting.
 	 */
 	vi_reset_dev(&sc->vsc_vs);
 
 	sc->resetting = 0;
 	pthread_mutex_unlock(&sc->tx_mtx);
 	pthread_mutex_unlock(&sc->rx_mtx);
 }
 
 static __inline struct iovec *
 iov_trim_hdr(struct iovec *iov, int *iovcnt, unsigned int hlen)
 {
 	struct iovec *riov;
 
 	if (iov[0].iov_len < hlen) {
 		/*
 		 * Not enough header space in the first fragment.
 		 * That's not ok for us.
 		 */
 		return NULL;
 	}
 
 	iov[0].iov_len -= hlen;
 	if (iov[0].iov_len == 0) {
 		*iovcnt -= 1;
 		if (*iovcnt == 0) {
 			/*
 			 * Only space for the header. That's not
 			 * enough for us.
 			 */
 			return NULL;
 		}
 		riov = &iov[1];
 	} else {
 		iov[0].iov_base = (void *)((uintptr_t)iov[0].iov_base + hlen);
 		riov = &iov[0];
 	}
 
 	return (riov);
 }
 
 struct virtio_mrg_rxbuf_info {
 	uint16_t idx;
 	uint16_t pad;
 	uint32_t len;
 };
 
 static void
 pci_vtnet_rx(struct pci_vtnet_softc *sc)
 {
 	int prepend_hdr_len = sc->vhdrlen - sc->be_vhdrlen;
 	struct virtio_mrg_rxbuf_info info[VTNET_MAXSEGS];
 	struct iovec iov[VTNET_MAXSEGS + 1];
 	struct vqueue_info *vq;
 	struct vi_req req;
 
 	vq = &sc->vsc_queues[VTNET_RXQ];
 
 	/* Features must be negotiated */
 	if (!sc->features_negotiated) {
 		return;
 	}
 
 	for (;;) {
 		struct virtio_net_rxhdr *hdr;
 		uint32_t riov_bytes;
 		struct iovec *riov;
 		uint32_t ulen;
 		int riov_len;
 		int n_chains;
 		ssize_t rlen;
 		ssize_t plen;
 
 		plen = netbe_peek_recvlen(sc->vsc_be);
 		if (plen <= 0) {
 			/*
 			 * No more packets (plen == 0), or backend errored
 			 * (plen < 0). Interrupt if needed and stop.
 			 */
 			vq_endchains(vq, /*used_all_avail=*/0);
 			return;
 		}
 		plen += prepend_hdr_len;
 
 		/*
 		 * Get a descriptor chain to store the next ingress
 		 * packet. In case of mergeable rx buffers, get as
 		 * many chains as necessary in order to make room
 		 * for plen bytes.
 		 */
 		riov_bytes = 0;
 		riov_len = 0;
 		riov = iov;
 		n_chains = 0;
 		do {
 			int n = vq_getchain(vq, riov, VTNET_MAXSEGS - riov_len,
 			    &req);
 			info[n_chains].idx = req.idx;
 
 			if (n == 0) {
 				/*
 				 * No rx buffers. Enable RX kicks and double
 				 * check.
 				 */
 				vq_kick_enable(vq);
 				if (!vq_has_descs(vq)) {
 					/*
 					 * Still no buffers. Return the unused
 					 * chains (if any), interrupt if needed
 					 * (including for NOTIFY_ON_EMPTY), and
 					 * disable the backend until the next
 					 * kick.
 					 */
 					vq_retchains(vq, n_chains);
 					vq_endchains(vq, /*used_all_avail=*/1);
 					netbe_rx_disable(sc->vsc_be);
 					return;
 				}
 
 				/* More rx buffers found, so keep going. */
 				vq_kick_disable(vq);
 				continue;
 			}
 			assert(n >= 1 && riov_len + n <= VTNET_MAXSEGS);
 			riov_len += n;
 			if (!sc->rx_merge) {
 				n_chains = 1;
 				break;
 			}
 			info[n_chains].len = (uint32_t)count_iov(riov, n);
 			riov_bytes += info[n_chains].len;
 			riov += n;
 			n_chains++;
 		} while (riov_bytes < plen && riov_len < VTNET_MAXSEGS);
 
 		riov = iov;
 		hdr = riov[0].iov_base;
 		if (prepend_hdr_len > 0) {
 			/*
 			 * The frontend uses a virtio-net header, but the
 			 * backend does not. We need to prepend a zeroed
 			 * header.
 			 */
 			riov = iov_trim_hdr(riov, &riov_len, prepend_hdr_len);
 			if (riov == NULL) {
 				/*
 				 * The first collected chain is nonsensical,
 				 * as it is not even enough to store the
 				 * virtio-net header. Just drop it.
 				 */
 				vq_relchain(vq, info[0].idx, 0);
 				vq_retchains(vq, n_chains - 1);
 				continue;
 			}
 			memset(hdr, 0, prepend_hdr_len);
 		}
 
 		rlen = netbe_recv(sc->vsc_be, riov, riov_len);
 		if (rlen != plen - prepend_hdr_len) {
 			/*
 			 * If this happens it means there is something
 			 * wrong with the backend (e.g., some other
 			 * process is stealing our packets).
 			 */
 			WPRINTF(("netbe_recv: expected %zd bytes, "
 				"got %zd", plen - prepend_hdr_len, rlen));
 			vq_retchains(vq, n_chains);
 			continue;
 		}
 
 		ulen = (uint32_t)plen;
 
 		/*
 		 * Publish the used buffers to the guest, reporting the
 		 * number of bytes that we wrote.
 		 */
 		if (!sc->rx_merge) {
 			vq_relchain(vq, info[0].idx, ulen);
 		} else {
 			uint32_t iolen;
 			int i = 0;
 
 			do {
 				iolen = info[i].len;
 				if (iolen > ulen) {
 					iolen = ulen;
 				}
 				vq_relchain_prepare(vq, info[i].idx, iolen);
 				ulen -= iolen;
 				i++;
 			} while (ulen > 0);
 
 			hdr->vrh_bufs = i;
 			vq_relchain_publish(vq);
 			assert(i == n_chains);
 		}
 	}
 
 }
 
 /*
  * Called when there is read activity on the backend file descriptor.
  * Each buffer posted by the guest is assumed to be able to contain
  * an entire ethernet frame + rx header.
  */
 static void
 pci_vtnet_rx_callback(int fd __unused, enum ev_type type __unused, void *param)
 {
 	struct pci_vtnet_softc *sc = param;
 
 	pthread_mutex_lock(&sc->rx_mtx);
 	pci_vtnet_rx(sc);
 	pthread_mutex_unlock(&sc->rx_mtx);
 
 }
 
 /* Called on RX kick. */
 static void
 pci_vtnet_ping_rxq(void *vsc, struct vqueue_info *vq)
 {
 	struct pci_vtnet_softc *sc = vsc;
 
 	/*
 	 * A qnotify means that the rx process can now begin.
 	 * Enable RX only if features are negotiated.
 	 */
 	pthread_mutex_lock(&sc->rx_mtx);
 	if (!sc->features_negotiated) {
 		pthread_mutex_unlock(&sc->rx_mtx);
 		return;
 	}
 
 	vq_kick_disable(vq);
 	netbe_rx_enable(sc->vsc_be);
 	pthread_mutex_unlock(&sc->rx_mtx);
 }
 
 /* TX virtqueue processing, called by the TX thread. */
 static void
 pci_vtnet_proctx(struct pci_vtnet_softc *sc, struct vqueue_info *vq)
 {
 	struct iovec iov[VTNET_MAXSEGS + 1];
 	struct iovec *siov = iov;
 	struct vi_req req;
 	ssize_t len;
 	int n;
 
 	/*
 	 * Obtain chain of descriptors. The first descriptor also
 	 * contains the virtio-net header.
 	 */
 	n = vq_getchain(vq, iov, VTNET_MAXSEGS, &req);
 	assert(n >= 1 && n <= VTNET_MAXSEGS);
 
 	if (sc->vhdrlen != sc->be_vhdrlen) {
 		/*
 		 * The frontend uses a virtio-net header, but the backend
 		 * does not. We simply strip the header and ignore it, as
 		 * it should be zero-filled.
 		 */
 		siov = iov_trim_hdr(siov, &n, sc->vhdrlen);
 	}
 
 	if (siov == NULL) {
 		/* The chain is nonsensical. Just drop it. */
 		len = 0;
 	} else {
 		len = netbe_send(sc->vsc_be, siov, n);
 		if (len < 0) {
 			/*
 			 * If send failed, report that 0 bytes
 			 * were read.
 			 */
 			len = 0;
 		}
 	}
 
 	/*
 	 * Return the processed chain to the guest, reporting
 	 * the number of bytes that we read.
 	 */
 	vq_relchain(vq, req.idx, len);
 }
 
 /* Called on TX kick. */
 static void
 pci_vtnet_ping_txq(void *vsc, struct vqueue_info *vq)
 {
 	struct pci_vtnet_softc *sc = vsc;
 
 	/*
 	 * Any ring entries to process?
 	 */
 	if (!vq_has_descs(vq))
 		return;
 
 	/* Signal the tx thread for processing */
 	pthread_mutex_lock(&sc->tx_mtx);
 	vq_kick_disable(vq);
 	if (sc->tx_in_progress == 0)
 		pthread_cond_signal(&sc->tx_cond);
 	pthread_mutex_unlock(&sc->tx_mtx);
 }
 
 /*
  * Thread which will handle processing of TX desc
  */
 static void *
 pci_vtnet_tx_thread(void *param)
 {
 	struct pci_vtnet_softc *sc = param;
 	struct vqueue_info *vq;
 	int error;
 
 	vq = &sc->vsc_queues[VTNET_TXQ];
 
 	/*
 	 * Let us wait till the tx queue pointers get initialised &
 	 * first tx signaled
 	 */
 	pthread_mutex_lock(&sc->tx_mtx);
 	error = pthread_cond_wait(&sc->tx_cond, &sc->tx_mtx);
 	assert(error == 0);
 
 	for (;;) {
 		/* note - tx mutex is locked here */
 		while (sc->resetting || !vq_has_descs(vq)) {
 			vq_kick_enable(vq);
 			if (!sc->resetting && vq_has_descs(vq))
 				break;
 
 			sc->tx_in_progress = 0;
 			error = pthread_cond_wait(&sc->tx_cond, &sc->tx_mtx);
 			assert(error == 0);
 		}
 		vq_kick_disable(vq);
 		sc->tx_in_progress = 1;
 		pthread_mutex_unlock(&sc->tx_mtx);
 
 		do {
 			/*
 			 * Run through entries, placing them into
 			 * iovecs and sending when an end-of-packet
 			 * is found
 			 */
 			pci_vtnet_proctx(sc, vq);
 		} while (vq_has_descs(vq));
 
 		/*
 		 * Generate an interrupt if needed.
 		 */
 		vq_endchains(vq, /*used_all_avail=*/1);
 
 		pthread_mutex_lock(&sc->tx_mtx);
 	}
 }
 
 #ifdef notyet
 static void
 pci_vtnet_ping_ctlq(void *vsc, struct vqueue_info *vq)
 {
 
 	DPRINTF(("vtnet: control qnotify!"));
 }
 #endif
 
 static int
 pci_vtnet_init(struct pci_devinst *pi, nvlist_t *nvl)
 {
 	struct pci_vtnet_softc *sc;
 	const char *value;
 	char tname[MAXCOMLEN + 1];
 	unsigned long mtu = ETHERMTU;
 	int err;
 
 	/*
 	 * Allocate data structures for further virtio initializations.
 	 * sc also contains a copy of vtnet_vi_consts, since capabilities
 	 * change depending on the backend.
 	 */
 	sc = calloc(1, sizeof(struct pci_vtnet_softc));
 
 	sc->vsc_consts = vtnet_vi_consts;
 	pthread_mutex_init(&sc->vsc_mtx, NULL);
 
 	sc->vsc_queues[VTNET_RXQ].vq_qsize = VTNET_RINGSZ;
 	sc->vsc_queues[VTNET_RXQ].vq_notify = pci_vtnet_ping_rxq;
 	sc->vsc_queues[VTNET_TXQ].vq_qsize = VTNET_RINGSZ;
 	sc->vsc_queues[VTNET_TXQ].vq_notify = pci_vtnet_ping_txq;
 #ifdef notyet
 	sc->vsc_queues[VTNET_CTLQ].vq_qsize = VTNET_RINGSZ;
         sc->vsc_queues[VTNET_CTLQ].vq_notify = pci_vtnet_ping_ctlq;
 #endif
 
 	value = get_config_value_node(nvl, "mac");
 	if (value != NULL) {
 		err = net_parsemac(value, sc->vsc_config.mac);
 		if (err) {
 			free(sc);
 			return (err);
 		}
 	} else
 		net_genmac(pi, sc->vsc_config.mac);
 
 	value = get_config_value_node(nvl, "mtu");
 	if (value != NULL) {
 		err = net_parsemtu(value, &mtu);
 		if (err) {
 			free(sc);
 			return (err);
 		}
 
 		if (mtu < VTNET_MIN_MTU || mtu > VTNET_MAX_MTU) {
 			err = EINVAL;
 			errno = EINVAL;
 			free(sc);
 			return (err);
 		}
 		sc->vsc_consts.vc_hv_caps |= VIRTIO_NET_F_MTU;
 	}
 	sc->vsc_config.mtu = mtu;
 
 	/* Permit interfaces without a configured backend. */
 	if (get_config_value_node(nvl, "backend") != NULL) {
 		err = netbe_init(&sc->vsc_be, nvl, pci_vtnet_rx_callback, sc);
 		if (err) {
 			free(sc);
 			return (err);
 		}
 	}
 
 	sc->vsc_consts.vc_hv_caps |= VIRTIO_NET_F_MRG_RXBUF |
 	    netbe_get_cap(sc->vsc_be);
 
 	/*
 	 * Since we do not actually support multiqueue,
 	 * set the maximum virtqueue pairs to 1.
 	 */
 	sc->vsc_config.max_virtqueue_pairs = 1;
 
 	/* initialize config space */
 	pci_set_cfgdata16(pi, PCIR_DEVICE, VIRTIO_DEV_NET);
 	pci_set_cfgdata16(pi, PCIR_VENDOR, VIRTIO_VENDOR);
 	pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_NETWORK);
 	pci_set_cfgdata16(pi, PCIR_SUBDEV_0, VIRTIO_ID_NETWORK);
 	pci_set_cfgdata16(pi, PCIR_SUBVEND_0, VIRTIO_VENDOR);
 
 	/* Link is always up. */
 	sc->vsc_config.status = 1;
 
 	vi_softc_linkup(&sc->vsc_vs, &sc->vsc_consts, sc, pi, sc->vsc_queues);
 	sc->vsc_vs.vs_mtx = &sc->vsc_mtx;
 
 	/* use BAR 1 to map MSI-X table and PBA, if we're using MSI-X */
 	if (vi_intr_init(&sc->vsc_vs, 1, fbsdrun_virtio_msix())) {
 		free(sc);
 		return (1);
 	}
 
 	/* use BAR 0 to map config regs in IO space */
 	vi_set_io_bar(&sc->vsc_vs, 0);
 
 	sc->resetting = 0;
 
 	sc->rx_merge = 0;
 	sc->vhdrlen = sizeof(struct virtio_net_rxhdr) - 2;
 	pthread_mutex_init(&sc->rx_mtx, NULL);
 
 	/*
 	 * Initialize tx semaphore & spawn TX processing thread.
 	 * As of now, only one thread for TX desc processing is
 	 * spawned.
 	 */
 	sc->tx_in_progress = 0;
 	pthread_mutex_init(&sc->tx_mtx, NULL);
 	pthread_cond_init(&sc->tx_cond, NULL);
 	pthread_create(&sc->tx_tid, NULL, pci_vtnet_tx_thread, (void *)sc);
 	snprintf(tname, sizeof(tname), "vtnet-%d:%d tx", pi->pi_slot,
 	    pi->pi_func);
 	pthread_set_name_np(sc->tx_tid, tname);
 
 	return (0);
 }
 
 static int
 pci_vtnet_cfgwrite(void *vsc, int offset, int size, uint32_t value)
 {
 	struct pci_vtnet_softc *sc = vsc;
 	void *ptr;
 
 	if (offset < (int)sizeof(sc->vsc_config.mac)) {
 		assert(offset + size <= (int)sizeof(sc->vsc_config.mac));
 		/*
 		 * The driver is allowed to change the MAC address
 		 */
 		ptr = &sc->vsc_config.mac[offset];
 		memcpy(ptr, &value, size);
 	} else {
 		/* silently ignore other writes */
 		DPRINTF(("vtnet: write to readonly reg %d", offset));
 	}
 
 	return (0);
 }
 
 static int
 pci_vtnet_cfgread(void *vsc, int offset, int size, uint32_t *retval)
 {
 	struct pci_vtnet_softc *sc = vsc;
 	void *ptr;
 
 	ptr = (uint8_t *)&sc->vsc_config + offset;
 	memcpy(retval, ptr, size);
 	return (0);
 }
 
 static void
 pci_vtnet_neg_features(void *vsc, uint64_t negotiated_features)
 {
 	struct pci_vtnet_softc *sc = vsc;
 
 	sc->vsc_features = negotiated_features;
 
 	if (negotiated_features & VIRTIO_NET_F_MRG_RXBUF) {
 		sc->vhdrlen = sizeof(struct virtio_net_rxhdr);
 		sc->rx_merge = 1;
 	} else {
 		/*
 		 * Without mergeable rx buffers, virtio-net header is 2
 		 * bytes shorter than sizeof(struct virtio_net_rxhdr).
 		 */
 		sc->vhdrlen = sizeof(struct virtio_net_rxhdr) - 2;
 		sc->rx_merge = 0;
 	}
 
 	/* Tell the backend to enable some capabilities it has advertised. */
 	netbe_set_cap(sc->vsc_be, negotiated_features, sc->vhdrlen);
 	sc->be_vhdrlen = netbe_get_vnet_hdr_len(sc->vsc_be);
 	assert(sc->be_vhdrlen == 0 || sc->be_vhdrlen == sc->vhdrlen);
 
 	pthread_mutex_lock(&sc->rx_mtx);
 	sc->features_negotiated = true;
 	pthread_mutex_unlock(&sc->rx_mtx);
 }
 
 #ifdef BHYVE_SNAPSHOT
 static void
 pci_vtnet_pause(void *vsc)
 {
 	struct pci_vtnet_softc *sc = vsc;
 
 	DPRINTF(("vtnet: device pause requested !\n"));
 
 	/* Acquire the RX lock to block RX processing. */
 	pthread_mutex_lock(&sc->rx_mtx);
 
 	/* Wait for the transmit thread to finish its processing. */
 	pthread_mutex_lock(&sc->tx_mtx);
 	while (sc->tx_in_progress) {
 		pthread_mutex_unlock(&sc->tx_mtx);
 		usleep(10000);
 		pthread_mutex_lock(&sc->tx_mtx);
 	}
 }
 
 static void
 pci_vtnet_resume(void *vsc)
 {
 	struct pci_vtnet_softc *sc = vsc;
 
 	DPRINTF(("vtnet: device resume requested !\n"));
 
 	pthread_mutex_unlock(&sc->tx_mtx);
 	/* The RX lock should have been acquired in vtnet_pause. */
 	pthread_mutex_unlock(&sc->rx_mtx);
 }
 
 static int
 pci_vtnet_snapshot(void *vsc, struct vm_snapshot_meta *meta)
 {
 	int ret;
 	struct pci_vtnet_softc *sc = vsc;
 
 	DPRINTF(("vtnet: device snapshot requested !\n"));
 
 	/*
 	 * Queues and consts should have been saved by the more generic
 	 * vi_pci_snapshot function. We need to save only our features and
 	 * config.
 	 */
 
 	SNAPSHOT_VAR_OR_LEAVE(sc->vsc_features, meta, ret, done);
 	SNAPSHOT_VAR_OR_LEAVE(sc->features_negotiated, meta, ret, done);
 
-	/* Force reapply negociated features at restore time */
+	/* Force reapply negotiated features at restore time */
 	if (meta->op == VM_SNAPSHOT_RESTORE &&
 	    sc->features_negotiated) {
 		pci_vtnet_neg_features(sc, sc->vsc_features);
 		netbe_rx_enable(sc->vsc_be);
 	}
 
 	SNAPSHOT_VAR_OR_LEAVE(sc->vsc_config, meta, ret, done);
 	SNAPSHOT_VAR_OR_LEAVE(sc->rx_merge, meta, ret, done);
 
 	SNAPSHOT_VAR_OR_LEAVE(sc->vhdrlen, meta, ret, done);
 	SNAPSHOT_VAR_OR_LEAVE(sc->be_vhdrlen, meta, ret, done);
 
 done:
 	return (ret);
 }
 #endif
 
 static const struct pci_devemu pci_de_vnet = {
 	.pe_emu = 	"virtio-net",
 	.pe_init =	pci_vtnet_init,
 	.pe_legacy_config = netbe_legacy_config,
 	.pe_barwrite =	vi_pci_write,
 	.pe_barread =	vi_pci_read,
 #ifdef BHYVE_SNAPSHOT
 	.pe_snapshot =	vi_pci_snapshot,
 	.pe_pause =	vi_pci_pause,
 	.pe_resume =	vi_pci_resume,
 #endif
 };
 PCI_EMUL_SET(pci_de_vnet);
diff --git a/usr.sbin/bhyve/usb_emul.h b/usr.sbin/bhyve/usb_emul.h
index 1d713bc55cdc..b2122d6b7e13 100644
--- a/usr.sbin/bhyve/usb_emul.h
+++ b/usr.sbin/bhyve/usb_emul.h
@@ -1,159 +1,159 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause
  *
  * Copyright (c) 2014 Leon Dang <ldang@nahannisys.com>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef _USB_EMUL_H_
 #define _USB_EMUL_H_
 
 #include <sys/nv.h>
 #include <stdlib.h>
 #include <sys/linker_set.h>
 #include <pthread.h>
 
 #define	USB_MAX_XFER_BLOCKS	8
 
 #define	USB_XFER_OUT		0
 #define	USB_XFER_IN		1
 
 
 struct usb_hci;
 struct usb_device_request;
 struct usb_data_xfer;
 struct vm_snapshot_meta;
 
 /* Device emulation handlers */
 struct usb_devemu {
 	const char *ue_emu;	/* name of device emulation */
 	int	ue_usbver;	/* usb version: 2 or 3 */
 	int	ue_usbspeed;	/* usb device speed */
 
 	/* instance creation */
 	void	*(*ue_init)(struct usb_hci *hci, nvlist_t *nvl);
 
 	/* handlers */
 	int	(*ue_request)(void *sc, struct usb_data_xfer *xfer);
 	int	(*ue_data)(void *sc, struct usb_data_xfer *xfer, int dir,
 	                   int epctx);
 	int	(*ue_reset)(void *sc);
 	int	(*ue_remove)(void *sc);
 	int	(*ue_stop)(void *sc);
 	int	(*ue_snapshot)(void *scarg, struct vm_snapshot_meta *meta);
 };
 #define	USB_EMUL_SET(x)		DATA_SET(usb_emu_set, x)
 
 /*
  * USB device events to notify HCI when state changes
  */
 enum hci_usbev {
 	USBDEV_ATTACH,
 	USBDEV_RESET,
 	USBDEV_STOP,
 	USBDEV_REMOVE,
 };
 
 /* usb controller, ie xhci, ehci */
 struct usb_hci {
 	int	(*hci_intr)(struct usb_hci *hci, int epctx);
 	int	(*hci_event)(struct usb_hci *hci, enum hci_usbev evid,
 		             void *param);
 	void	*hci_sc;			/* private softc for hci */
 
 	/* controller managed fields */
 	int	hci_address;
 	int	hci_port;
 };
 
 /*
  * Each xfer block is mapped to the hci transfer block.
- * On input into the device handler, blen is set to the lenght of buf.
+ * On input into the device handler, blen is set to the length of buf.
  * The device handler is to update blen to reflect on the residual size
  * of the buffer, i.e. len(buf) - len(consumed).
  */
 struct usb_data_xfer_block {
 	void	*buf;			/* IN or OUT pointer */
 	int	blen;			/* in:len(buf), out:len(remaining) */
 	int	bdone;			/* bytes transferred */
 	uint32_t processed;		/* device processed this + errcode */
 	void	*hci_data;		/* HCI private reference */
 	int	ccs;
 	uint32_t streamid;
 	uint64_t trbnext;		/* next TRB guest address */
 };
 
 struct usb_data_xfer {
 	struct usb_data_xfer_block data[USB_MAX_XFER_BLOCKS];
 	struct usb_device_request *ureq; 	/* setup ctl request */
 	int	ndata;				/* # of data items */
 	int	head;
 	int	tail;
 	pthread_mutex_t mtx;
 };
 
 enum USB_ERRCODE {
 	USB_ACK,
 	USB_NAK,
 	USB_STALL,
 	USB_NYET,
 	USB_ERR,
 	USB_SHORT
 };
 
 #define	USB_DATA_GET_ERRCODE(x)		(x)->processed >> 8
 #define	USB_DATA_SET_ERRCODE(x,e)	do {				\
 			(x)->processed = ((x)->processed & 0xFF) | (e << 8); \
 		} while (0)
 
 #define	USB_DATA_OK(x,i)	((x)->data[(i)].buf != NULL)
 
 #define	USB_DATA_XFER_INIT(x)	do {					\
 			memset((x), 0, sizeof(*(x)));			\
 			pthread_mutex_init(&((x)->mtx), NULL);		\
 		} while (0)
 
 #define	USB_DATA_XFER_RESET(x)	do {					\
 			memset((x)->data, 0, sizeof((x)->data));	\
 			(x)->ndata = 0;					\
 			(x)->head = (x)->tail = 0;			\
 		} while (0)
 
 #define	USB_DATA_XFER_LOCK(x)	do {					\
 			pthread_mutex_lock(&((x)->mtx));		\
 		} while (0)
 
 #define	USB_DATA_XFER_UNLOCK(x)	do {					\
 			pthread_mutex_unlock(&((x)->mtx));		\
 		} while (0)
 
 struct usb_devemu *usb_emu_finddev(const char *name);
 
 struct usb_data_xfer_block *usb_data_xfer_append(struct usb_data_xfer *xfer,
                           void *buf, int blen, void *hci_data, int ccs);
 
 
 #endif /* _USB_EMUL_H_ */