Index: head/lib/libvmmapi/vmmapi.c
===================================================================
--- head/lib/libvmmapi/vmmapi.c	(revision 361081)
+++ head/lib/libvmmapi/vmmapi.c	(revision 361082)
@@ -1,1643 +1,1663 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2011 NetApp, Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/sysctl.h>
 #include <sys/ioctl.h>
 #include <sys/linker.h>
 #include <sys/mman.h>
 #include <sys/module.h>
 #include <sys/_iovec.h>
 #include <sys/cpuset.h>
 
 #include <x86/segments.h>
 #include <machine/specialreg.h>
 
 #include <errno.h>
 #include <stdbool.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <assert.h>
 #include <string.h>
 #include <fcntl.h>
 #include <unistd.h>
 
 #include <libutil.h>
 
 #include <vm/vm.h>
 #include <machine/vmm.h>
 #include <machine/vmm_dev.h>
 #include <machine/vmm_snapshot.h>
 
 #include "vmmapi.h"
 
 #define	MB	(1024 * 1024UL)
 #define	GB	(1024 * 1024 * 1024UL)
 
 /*
  * Size of the guard region before and after the virtual address space
  * mapping the guest physical memory. This must be a multiple of the
  * superpage size for performance reasons.
  */
 #define	VM_MMAP_GUARD_SIZE	(4 * MB)
 
 #define	PROT_RW		(PROT_READ | PROT_WRITE)
 #define	PROT_ALL	(PROT_READ | PROT_WRITE | PROT_EXEC)
 
 struct vmctx {
 	int	fd;
 	uint32_t lowmem_limit;
 	int	memflags;
 	size_t	lowmem;
 	size_t	highmem;
 	char	*baseaddr;
 	char	*name;
 };
 
 #define	CREATE(x)  sysctlbyname("hw.vmm.create", NULL, NULL, (x), strlen((x)))
 #define	DESTROY(x) sysctlbyname("hw.vmm.destroy", NULL, NULL, (x), strlen((x)))
 
 static int
 vm_device_open(const char *name)
 {
 	int fd, len;
 	char *vmfile;
 
 	len = strlen("/dev/vmm/") + strlen(name) + 1;
 	vmfile = malloc(len);
 	assert(vmfile != NULL);
 	snprintf(vmfile, len, "/dev/vmm/%s", name);
 
 	/* Open the device file */
 	fd = open(vmfile, O_RDWR, 0);
 
 	free(vmfile);
 	return (fd);
 }
 
 int
 vm_create(const char *name)
 {
 	/* Try to load vmm(4) module before creating a guest. */
 	if (modfind("vmm") < 0)
 		kldload("vmm");
 	return (CREATE((char *)name));
 }
 
 struct vmctx *
 vm_open(const char *name)
 {
 	struct vmctx *vm;
 
 	vm = malloc(sizeof(struct vmctx) + strlen(name) + 1);
 	assert(vm != NULL);
 
 	vm->fd = -1;
 	vm->memflags = 0;
 	vm->lowmem_limit = 3 * GB;
 	vm->name = (char *)(vm + 1);
 	strcpy(vm->name, name);
 
 	if ((vm->fd = vm_device_open(vm->name)) < 0)
 		goto err;
 
 	return (vm);
 err:
 	vm_destroy(vm);
 	return (NULL);
 }
 
 void
 vm_destroy(struct vmctx *vm)
 {
 	assert(vm != NULL);
 
 	if (vm->fd >= 0)
 		close(vm->fd);
 	DESTROY(vm->name);
 
 	free(vm);
 }
 
 int
 vm_parse_memsize(const char *optarg, size_t *ret_memsize)
 {
 	char *endptr;
 	size_t optval;
 	int error;
 
 	optval = strtoul(optarg, &endptr, 0);
 	if (*optarg != '\0' && *endptr == '\0') {
 		/*
 		 * For the sake of backward compatibility if the memory size
 		 * specified on the command line is less than a megabyte then
 		 * it is interpreted as being in units of MB.
 		 */
 		if (optval < MB)
 			optval *= MB;
 		*ret_memsize = optval;
 		error = 0;
 	} else
 		error = expand_number(optarg, ret_memsize);
 
 	return (error);
 }
 
 uint32_t
 vm_get_lowmem_limit(struct vmctx *ctx)
 {
 
 	return (ctx->lowmem_limit);
 }
 
 void
 vm_set_lowmem_limit(struct vmctx *ctx, uint32_t limit)
 {
 
 	ctx->lowmem_limit = limit;
 }
 
 void
 vm_set_memflags(struct vmctx *ctx, int flags)
 {
 
 	ctx->memflags = flags;
 }
 
 int
 vm_get_memflags(struct vmctx *ctx)
 {
 
 	return (ctx->memflags);
 }
 
 /*
  * Map segment 'segid' starting at 'off' into guest address range [gpa,gpa+len).
  */
 int
 vm_mmap_memseg(struct vmctx *ctx, vm_paddr_t gpa, int segid, vm_ooffset_t off,
     size_t len, int prot)
 {
 	struct vm_memmap memmap;
 	int error, flags;
 
 	memmap.gpa = gpa;
 	memmap.segid = segid;
 	memmap.segoff = off;
 	memmap.len = len;
 	memmap.prot = prot;
 	memmap.flags = 0;
 
 	if (ctx->memflags & VM_MEM_F_WIRED)
 		memmap.flags |= VM_MEMMAP_F_WIRED;
 
 	/*
 	 * If this mapping already exists then don't create it again. This
 	 * is the common case for SYSMEM mappings created by bhyveload(8).
 	 */
 	error = vm_mmap_getnext(ctx, &gpa, &segid, &off, &len, &prot, &flags);
 	if (error == 0 && gpa == memmap.gpa) {
 		if (segid != memmap.segid || off != memmap.segoff ||
 		    prot != memmap.prot || flags != memmap.flags) {
 			errno = EEXIST;
 			return (-1);
 		} else {
 			return (0);
 		}
 	}
 
 	error = ioctl(ctx->fd, VM_MMAP_MEMSEG, &memmap);
 	return (error);
 }
 
 int
 vm_get_guestmem_from_ctx(struct vmctx *ctx, char **guest_baseaddr,
     size_t *lowmem_size, size_t *highmem_size)
 {
 
 	*guest_baseaddr = ctx->baseaddr;
 	*lowmem_size = ctx->lowmem;
 	*highmem_size = ctx->highmem;
 	return (0);
 }
 
 int
 vm_mmap_getnext(struct vmctx *ctx, vm_paddr_t *gpa, int *segid,
     vm_ooffset_t *segoff, size_t *len, int *prot, int *flags)
 {
 	struct vm_memmap memmap;
 	int error;
 
 	bzero(&memmap, sizeof(struct vm_memmap));
 	memmap.gpa = *gpa;
 	error = ioctl(ctx->fd, VM_MMAP_GETNEXT, &memmap);
 	if (error == 0) {
 		*gpa = memmap.gpa;
 		*segid = memmap.segid;
 		*segoff = memmap.segoff;
 		*len = memmap.len;
 		*prot = memmap.prot;
 		*flags = memmap.flags;
 	}
 	return (error);
 }
 
 /*
  * Return 0 if the segments are identical and non-zero otherwise.
  *
  * This is slightly complicated by the fact that only device memory segments
  * are named.
  */
 static int
 cmpseg(size_t len, const char *str, size_t len2, const char *str2)
 {
 
 	if (len == len2) {
 		if ((!str && !str2) || (str && str2 && !strcmp(str, str2)))
 			return (0);
 	}
 	return (-1);
 }
 
 static int
 vm_alloc_memseg(struct vmctx *ctx, int segid, size_t len, const char *name)
 {
 	struct vm_memseg memseg;
 	size_t n;
 	int error;
 
 	/*
 	 * If the memory segment has already been created then just return.
 	 * This is the usual case for the SYSMEM segment created by userspace
 	 * loaders like bhyveload(8).
 	 */
 	error = vm_get_memseg(ctx, segid, &memseg.len, memseg.name,
 	    sizeof(memseg.name));
 	if (error)
 		return (error);
 
 	if (memseg.len != 0) {
 		if (cmpseg(len, name, memseg.len, VM_MEMSEG_NAME(&memseg))) {
 			errno = EINVAL;
 			return (-1);
 		} else {
 			return (0);
 		}
 	}
 
 	bzero(&memseg, sizeof(struct vm_memseg));
 	memseg.segid = segid;
 	memseg.len = len;
 	if (name != NULL) {
 		n = strlcpy(memseg.name, name, sizeof(memseg.name));
 		if (n >= sizeof(memseg.name)) {
 			errno = ENAMETOOLONG;
 			return (-1);
 		}
 	}
 
 	error = ioctl(ctx->fd, VM_ALLOC_MEMSEG, &memseg);
 	return (error);
 }
 
 int
 vm_get_memseg(struct vmctx *ctx, int segid, size_t *lenp, char *namebuf,
     size_t bufsize)
 {
 	struct vm_memseg memseg;
 	size_t n;
 	int error;
 
 	memseg.segid = segid;
 	error = ioctl(ctx->fd, VM_GET_MEMSEG, &memseg);
 	if (error == 0) {
 		*lenp = memseg.len;
 		n = strlcpy(namebuf, memseg.name, bufsize);
 		if (n >= bufsize) {
 			errno = ENAMETOOLONG;
 			error = -1;
 		}
 	}
 	return (error);
 }
 
 static int
 setup_memory_segment(struct vmctx *ctx, vm_paddr_t gpa, size_t len, char *base)
 {
 	char *ptr;
 	int error, flags;
 
 	/* Map 'len' bytes starting at 'gpa' in the guest address space */
 	error = vm_mmap_memseg(ctx, gpa, VM_SYSMEM, gpa, len, PROT_ALL);
 	if (error)
 		return (error);
 
 	flags = MAP_SHARED | MAP_FIXED;
 	if ((ctx->memflags & VM_MEM_F_INCORE) == 0)
 		flags |= MAP_NOCORE;
 
 	/* mmap into the process address space on the host */
 	ptr = mmap(base + gpa, len, PROT_RW, flags, ctx->fd, gpa);
 	if (ptr == MAP_FAILED)
 		return (-1);
 
 	return (0);
 }
 
 int
 vm_setup_memory(struct vmctx *ctx, size_t memsize, enum vm_mmap_style vms)
 {
 	size_t objsize, len;
 	vm_paddr_t gpa;
 	char *baseaddr, *ptr;
 	int error;
 
 	assert(vms == VM_MMAP_ALL);
 
 	/*
 	 * If 'memsize' cannot fit entirely in the 'lowmem' segment then
 	 * create another 'highmem' segment above 4GB for the remainder.
 	 */
 	if (memsize > ctx->lowmem_limit) {
 		ctx->lowmem = ctx->lowmem_limit;
 		ctx->highmem = memsize - ctx->lowmem_limit;
 		objsize = 4*GB + ctx->highmem;
 	} else {
 		ctx->lowmem = memsize;
 		ctx->highmem = 0;
 		objsize = ctx->lowmem;
 	}
 
 	error = vm_alloc_memseg(ctx, VM_SYSMEM, objsize, NULL);
 	if (error)
 		return (error);
 
 	/*
 	 * Stake out a contiguous region covering the guest physical memory
 	 * and the adjoining guard regions.
 	 */
 	len = VM_MMAP_GUARD_SIZE + objsize + VM_MMAP_GUARD_SIZE;
 	ptr = mmap(NULL, len, PROT_NONE, MAP_GUARD | MAP_ALIGNED_SUPER, -1, 0);
 	if (ptr == MAP_FAILED)
 		return (-1);
 
 	baseaddr = ptr + VM_MMAP_GUARD_SIZE;
 	if (ctx->highmem > 0) {
 		gpa = 4*GB;
 		len = ctx->highmem;
 		error = setup_memory_segment(ctx, gpa, len, baseaddr);
 		if (error)
 			return (error);
 	}
 
 	if (ctx->lowmem > 0) {
 		gpa = 0;
 		len = ctx->lowmem;
 		error = setup_memory_segment(ctx, gpa, len, baseaddr);
 		if (error)
 			return (error);
 	}
 
 	ctx->baseaddr = baseaddr;
 
 	return (0);
 }
 
 /*
  * Returns a non-NULL pointer if [gaddr, gaddr+len) is entirely contained in
  * the lowmem or highmem regions.
  *
  * In particular return NULL if [gaddr, gaddr+len) falls in guest MMIO region.
  * The instruction emulation code depends on this behavior.
  */
 void *
 vm_map_gpa(struct vmctx *ctx, vm_paddr_t gaddr, size_t len)
 {
 
 	if (ctx->lowmem > 0) {
 		if (gaddr < ctx->lowmem && len <= ctx->lowmem &&
 		    gaddr + len <= ctx->lowmem)
 			return (ctx->baseaddr + gaddr);
 	}
 
 	if (ctx->highmem > 0) {
                 if (gaddr >= 4*GB) {
 			if (gaddr < 4*GB + ctx->highmem &&
 			    len <= ctx->highmem &&
 			    gaddr + len <= 4*GB + ctx->highmem)
 				return (ctx->baseaddr + gaddr);
 		}
 	}
 
 	return (NULL);
 }
 
 vm_paddr_t
 vm_rev_map_gpa(struct vmctx *ctx, void *addr)
 {
 	vm_paddr_t offaddr;
 
 	offaddr = (char *)addr - ctx->baseaddr;
 
 	if (ctx->lowmem > 0)
 		if (offaddr >= 0 && offaddr <= ctx->lowmem)
 			return (offaddr);
 
 	if (ctx->highmem > 0)
 		if (offaddr >= 4*GB && offaddr < 4*GB + ctx->highmem)
 			return (offaddr);
 
 	return ((vm_paddr_t)-1);
 }
 
 /* TODO: maximum size for vmname */
 int
 vm_get_name(struct vmctx *ctx, char *buf, size_t max_len)
 {
 
 	if (strlcpy(buf, ctx->name, max_len) >= max_len)
 		return (EINVAL);
 	return (0);
 }
 
 size_t
 vm_get_lowmem_size(struct vmctx *ctx)
 {
 
 	return (ctx->lowmem);
 }
 
 size_t
 vm_get_highmem_size(struct vmctx *ctx)
 {
 
 	return (ctx->highmem);
 }
 
 void *
 vm_create_devmem(struct vmctx *ctx, int segid, const char *name, size_t len)
 {
 	char pathname[MAXPATHLEN];
 	size_t len2;
 	char *base, *ptr;
 	int fd, error, flags;
 
 	fd = -1;
 	ptr = MAP_FAILED;
 	if (name == NULL || strlen(name) == 0) {
 		errno = EINVAL;
 		goto done;
 	}
 
 	error = vm_alloc_memseg(ctx, segid, len, name);
 	if (error)
 		goto done;
 
 	strlcpy(pathname, "/dev/vmm.io/", sizeof(pathname));
 	strlcat(pathname, ctx->name, sizeof(pathname));
 	strlcat(pathname, ".", sizeof(pathname));
 	strlcat(pathname, name, sizeof(pathname));
 
 	fd = open(pathname, O_RDWR);
 	if (fd < 0)
 		goto done;
 
 	/*
 	 * Stake out a contiguous region covering the device memory and the
 	 * adjoining guard regions.
 	 */
 	len2 = VM_MMAP_GUARD_SIZE + len + VM_MMAP_GUARD_SIZE;
 	base = mmap(NULL, len2, PROT_NONE, MAP_GUARD | MAP_ALIGNED_SUPER, -1,
 	    0);
 	if (base == MAP_FAILED)
 		goto done;
 
 	flags = MAP_SHARED | MAP_FIXED;
 	if ((ctx->memflags & VM_MEM_F_INCORE) == 0)
 		flags |= MAP_NOCORE;
 
 	/* mmap the devmem region in the host address space */
 	ptr = mmap(base + VM_MMAP_GUARD_SIZE, len, PROT_RW, flags, fd, 0);
 done:
 	if (fd >= 0)
 		close(fd);
 	return (ptr);
 }
 
 int
 vm_set_desc(struct vmctx *ctx, int vcpu, int reg,
 	    uint64_t base, uint32_t limit, uint32_t access)
 {
 	int error;
 	struct vm_seg_desc vmsegdesc;
 
 	bzero(&vmsegdesc, sizeof(vmsegdesc));
 	vmsegdesc.cpuid = vcpu;
 	vmsegdesc.regnum = reg;
 	vmsegdesc.desc.base = base;
 	vmsegdesc.desc.limit = limit;
 	vmsegdesc.desc.access = access;
 
 	error = ioctl(ctx->fd, VM_SET_SEGMENT_DESCRIPTOR, &vmsegdesc);
 	return (error);
 }
 
 int
 vm_get_desc(struct vmctx *ctx, int vcpu, int reg,
 	    uint64_t *base, uint32_t *limit, uint32_t *access)
 {
 	int error;
 	struct vm_seg_desc vmsegdesc;
 
 	bzero(&vmsegdesc, sizeof(vmsegdesc));
 	vmsegdesc.cpuid = vcpu;
 	vmsegdesc.regnum = reg;
 
 	error = ioctl(ctx->fd, VM_GET_SEGMENT_DESCRIPTOR, &vmsegdesc);
 	if (error == 0) {
 		*base = vmsegdesc.desc.base;
 		*limit = vmsegdesc.desc.limit;
 		*access = vmsegdesc.desc.access;
 	}
 	return (error);
 }
 
 int
 vm_get_seg_desc(struct vmctx *ctx, int vcpu, int reg, struct seg_desc *seg_desc)
 {
 	int error;
 
 	error = vm_get_desc(ctx, vcpu, reg, &seg_desc->base, &seg_desc->limit,
 	    &seg_desc->access);
 	return (error);
 }
 
 int
 vm_set_register(struct vmctx *ctx, int vcpu, int reg, uint64_t val)
 {
 	int error;
 	struct vm_register vmreg;
 
 	bzero(&vmreg, sizeof(vmreg));
 	vmreg.cpuid = vcpu;
 	vmreg.regnum = reg;
 	vmreg.regval = val;
 
 	error = ioctl(ctx->fd, VM_SET_REGISTER, &vmreg);
 	return (error);
 }
 
 int
 vm_get_register(struct vmctx *ctx, int vcpu, int reg, uint64_t *ret_val)
 {
 	int error;
 	struct vm_register vmreg;
 
 	bzero(&vmreg, sizeof(vmreg));
 	vmreg.cpuid = vcpu;
 	vmreg.regnum = reg;
 
 	error = ioctl(ctx->fd, VM_GET_REGISTER, &vmreg);
 	*ret_val = vmreg.regval;
 	return (error);
 }
 
 int
 vm_set_register_set(struct vmctx *ctx, int vcpu, unsigned int count,
     const int *regnums, uint64_t *regvals)
 {
 	int error;
 	struct vm_register_set vmregset;
 
 	bzero(&vmregset, sizeof(vmregset));
 	vmregset.cpuid = vcpu;
 	vmregset.count = count;
 	vmregset.regnums = regnums;
 	vmregset.regvals = regvals;
 
 	error = ioctl(ctx->fd, VM_SET_REGISTER_SET, &vmregset);
 	return (error);
 }
 
 int
 vm_get_register_set(struct vmctx *ctx, int vcpu, unsigned int count,
     const int *regnums, uint64_t *regvals)
 {
 	int error;
 	struct vm_register_set vmregset;
 
 	bzero(&vmregset, sizeof(vmregset));
 	vmregset.cpuid = vcpu;
 	vmregset.count = count;
 	vmregset.regnums = regnums;
 	vmregset.regvals = regvals;
 
 	error = ioctl(ctx->fd, VM_GET_REGISTER_SET, &vmregset);
 	return (error);
 }
 
 int
 vm_run(struct vmctx *ctx, int vcpu, struct vm_exit *vmexit)
 {
 	int error;
 	struct vm_run vmrun;
 
 	bzero(&vmrun, sizeof(vmrun));
 	vmrun.cpuid = vcpu;
 
 	error = ioctl(ctx->fd, VM_RUN, &vmrun);
 	bcopy(&vmrun.vm_exit, vmexit, sizeof(struct vm_exit));
 	return (error);
 }
 
 int
 vm_suspend(struct vmctx *ctx, enum vm_suspend_how how)
 {
 	struct vm_suspend vmsuspend;
 
 	bzero(&vmsuspend, sizeof(vmsuspend));
 	vmsuspend.how = how;
 	return (ioctl(ctx->fd, VM_SUSPEND, &vmsuspend));
 }
 
 int
 vm_reinit(struct vmctx *ctx)
 {
 
 	return (ioctl(ctx->fd, VM_REINIT, 0));
 }
 
 int
 vm_inject_exception(struct vmctx *ctx, int vcpu, int vector, int errcode_valid,
     uint32_t errcode, int restart_instruction)
 {
 	struct vm_exception exc;
 
 	exc.cpuid = vcpu;
 	exc.vector = vector;
 	exc.error_code = errcode;
 	exc.error_code_valid = errcode_valid;
 	exc.restart_instruction = restart_instruction;
 
 	return (ioctl(ctx->fd, VM_INJECT_EXCEPTION, &exc));
 }
 
 int
 vm_apicid2vcpu(struct vmctx *ctx, int apicid)
 {
 	/*
 	 * The apic id associated with the 'vcpu' has the same numerical value
 	 * as the 'vcpu' itself.
 	 */
 	return (apicid);
 }
 
 int
 vm_lapic_irq(struct vmctx *ctx, int vcpu, int vector)
 {
 	struct vm_lapic_irq vmirq;
 
 	bzero(&vmirq, sizeof(vmirq));
 	vmirq.cpuid = vcpu;
 	vmirq.vector = vector;
 
 	return (ioctl(ctx->fd, VM_LAPIC_IRQ, &vmirq));
 }
 
 int
 vm_lapic_local_irq(struct vmctx *ctx, int vcpu, int vector)
 {
 	struct vm_lapic_irq vmirq;
 
 	bzero(&vmirq, sizeof(vmirq));
 	vmirq.cpuid = vcpu;
 	vmirq.vector = vector;
 
 	return (ioctl(ctx->fd, VM_LAPIC_LOCAL_IRQ, &vmirq));
 }
 
 int
 vm_lapic_msi(struct vmctx *ctx, uint64_t addr, uint64_t msg)
 {
 	struct vm_lapic_msi vmmsi;
 
 	bzero(&vmmsi, sizeof(vmmsi));
 	vmmsi.addr = addr;
 	vmmsi.msg = msg;
 
 	return (ioctl(ctx->fd, VM_LAPIC_MSI, &vmmsi));
 }
 
 int
 vm_ioapic_assert_irq(struct vmctx *ctx, int irq)
 {
 	struct vm_ioapic_irq ioapic_irq;
 
 	bzero(&ioapic_irq, sizeof(struct vm_ioapic_irq));
 	ioapic_irq.irq = irq;
 
 	return (ioctl(ctx->fd, VM_IOAPIC_ASSERT_IRQ, &ioapic_irq));
 }
 
 int
 vm_ioapic_deassert_irq(struct vmctx *ctx, int irq)
 {
 	struct vm_ioapic_irq ioapic_irq;
 
 	bzero(&ioapic_irq, sizeof(struct vm_ioapic_irq));
 	ioapic_irq.irq = irq;
 
 	return (ioctl(ctx->fd, VM_IOAPIC_DEASSERT_IRQ, &ioapic_irq));
 }
 
 int
 vm_ioapic_pulse_irq(struct vmctx *ctx, int irq)
 {
 	struct vm_ioapic_irq ioapic_irq;
 
 	bzero(&ioapic_irq, sizeof(struct vm_ioapic_irq));
 	ioapic_irq.irq = irq;
 
 	return (ioctl(ctx->fd, VM_IOAPIC_PULSE_IRQ, &ioapic_irq));
 }
 
 int
 vm_ioapic_pincount(struct vmctx *ctx, int *pincount)
 {
 
 	return (ioctl(ctx->fd, VM_IOAPIC_PINCOUNT, pincount));
 }
 
 int
+vm_readwrite_kernemu_device(struct vmctx *ctx, int vcpu, vm_paddr_t gpa,
+    bool write, int size, uint64_t *value)
+{
+	struct vm_readwrite_kernemu_device irp = {
+		.vcpuid = vcpu,
+		.access_width = fls(size) - 1,
+		.gpa = gpa,
+		.value = write ? *value : ~0ul,
+	};
+	long cmd = (write ? VM_SET_KERNEMU_DEV : VM_GET_KERNEMU_DEV);
+	int rc;
+
+	rc = ioctl(ctx->fd, cmd, &irp);
+	if (rc == 0 && !write)
+		*value = irp.value;
+	return (rc);
+}
+
+int
 vm_isa_assert_irq(struct vmctx *ctx, int atpic_irq, int ioapic_irq)
 {
 	struct vm_isa_irq isa_irq;
 
 	bzero(&isa_irq, sizeof(struct vm_isa_irq));
 	isa_irq.atpic_irq = atpic_irq;
 	isa_irq.ioapic_irq = ioapic_irq;
 
 	return (ioctl(ctx->fd, VM_ISA_ASSERT_IRQ, &isa_irq));
 }
 
 int
 vm_isa_deassert_irq(struct vmctx *ctx, int atpic_irq, int ioapic_irq)
 {
 	struct vm_isa_irq isa_irq;
 
 	bzero(&isa_irq, sizeof(struct vm_isa_irq));
 	isa_irq.atpic_irq = atpic_irq;
 	isa_irq.ioapic_irq = ioapic_irq;
 
 	return (ioctl(ctx->fd, VM_ISA_DEASSERT_IRQ, &isa_irq));
 }
 
 int
 vm_isa_pulse_irq(struct vmctx *ctx, int atpic_irq, int ioapic_irq)
 {
 	struct vm_isa_irq isa_irq;
 
 	bzero(&isa_irq, sizeof(struct vm_isa_irq));
 	isa_irq.atpic_irq = atpic_irq;
 	isa_irq.ioapic_irq = ioapic_irq;
 
 	return (ioctl(ctx->fd, VM_ISA_PULSE_IRQ, &isa_irq));
 }
 
 int
 vm_isa_set_irq_trigger(struct vmctx *ctx, int atpic_irq,
     enum vm_intr_trigger trigger)
 {
 	struct vm_isa_irq_trigger isa_irq_trigger;
 
 	bzero(&isa_irq_trigger, sizeof(struct vm_isa_irq_trigger));
 	isa_irq_trigger.atpic_irq = atpic_irq;
 	isa_irq_trigger.trigger = trigger;
 
 	return (ioctl(ctx->fd, VM_ISA_SET_IRQ_TRIGGER, &isa_irq_trigger));
 }
 
 int
 vm_inject_nmi(struct vmctx *ctx, int vcpu)
 {
 	struct vm_nmi vmnmi;
 
 	bzero(&vmnmi, sizeof(vmnmi));
 	vmnmi.cpuid = vcpu;
 
 	return (ioctl(ctx->fd, VM_INJECT_NMI, &vmnmi));
 }
 
 static const char *capstrmap[] = {
 	[VM_CAP_HALT_EXIT]  = "hlt_exit",
 	[VM_CAP_MTRAP_EXIT] = "mtrap_exit",
 	[VM_CAP_PAUSE_EXIT] = "pause_exit",
 	[VM_CAP_UNRESTRICTED_GUEST] = "unrestricted_guest",
 	[VM_CAP_ENABLE_INVPCID] = "enable_invpcid",
 	[VM_CAP_BPT_EXIT] = "bpt_exit",
 };
 
 int
 vm_capability_name2type(const char *capname)
 {
 	int i;
 
 	for (i = 0; i < nitems(capstrmap); i++) {
 		if (strcmp(capstrmap[i], capname) == 0)
 			return (i);
 	}
 
 	return (-1);
 }
 
 const char *
 vm_capability_type2name(int type)
 {
 	if (type >= 0 && type < nitems(capstrmap))
 		return (capstrmap[type]);
 
 	return (NULL);
 }
 
 int
 vm_get_capability(struct vmctx *ctx, int vcpu, enum vm_cap_type cap,
 		  int *retval)
 {
 	int error;
 	struct vm_capability vmcap;
 
 	bzero(&vmcap, sizeof(vmcap));
 	vmcap.cpuid = vcpu;
 	vmcap.captype = cap;
 
 	error = ioctl(ctx->fd, VM_GET_CAPABILITY, &vmcap);
 	*retval = vmcap.capval;
 	return (error);
 }
 
 int
 vm_set_capability(struct vmctx *ctx, int vcpu, enum vm_cap_type cap, int val)
 {
 	struct vm_capability vmcap;
 
 	bzero(&vmcap, sizeof(vmcap));
 	vmcap.cpuid = vcpu;
 	vmcap.captype = cap;
 	vmcap.capval = val;
 
 	return (ioctl(ctx->fd, VM_SET_CAPABILITY, &vmcap));
 }
 
 int
 vm_assign_pptdev(struct vmctx *ctx, int bus, int slot, int func)
 {
 	struct vm_pptdev pptdev;
 
 	bzero(&pptdev, sizeof(pptdev));
 	pptdev.bus = bus;
 	pptdev.slot = slot;
 	pptdev.func = func;
 
 	return (ioctl(ctx->fd, VM_BIND_PPTDEV, &pptdev));
 }
 
 int
 vm_unassign_pptdev(struct vmctx *ctx, int bus, int slot, int func)
 {
 	struct vm_pptdev pptdev;
 
 	bzero(&pptdev, sizeof(pptdev));
 	pptdev.bus = bus;
 	pptdev.slot = slot;
 	pptdev.func = func;
 
 	return (ioctl(ctx->fd, VM_UNBIND_PPTDEV, &pptdev));
 }
 
 int
 vm_map_pptdev_mmio(struct vmctx *ctx, int bus, int slot, int func,
 		   vm_paddr_t gpa, size_t len, vm_paddr_t hpa)
 {
 	struct vm_pptdev_mmio pptmmio;
 
 	bzero(&pptmmio, sizeof(pptmmio));
 	pptmmio.bus = bus;
 	pptmmio.slot = slot;
 	pptmmio.func = func;
 	pptmmio.gpa = gpa;
 	pptmmio.len = len;
 	pptmmio.hpa = hpa;
 
 	return (ioctl(ctx->fd, VM_MAP_PPTDEV_MMIO, &pptmmio));
 }
 
 int
 vm_setup_pptdev_msi(struct vmctx *ctx, int vcpu, int bus, int slot, int func,
     uint64_t addr, uint64_t msg, int numvec)
 {
 	struct vm_pptdev_msi pptmsi;
 
 	bzero(&pptmsi, sizeof(pptmsi));
 	pptmsi.vcpu = vcpu;
 	pptmsi.bus = bus;
 	pptmsi.slot = slot;
 	pptmsi.func = func;
 	pptmsi.msg = msg;
 	pptmsi.addr = addr;
 	pptmsi.numvec = numvec;
 
 	return (ioctl(ctx->fd, VM_PPTDEV_MSI, &pptmsi));
 }
 
 int	
 vm_setup_pptdev_msix(struct vmctx *ctx, int vcpu, int bus, int slot, int func,
     int idx, uint64_t addr, uint64_t msg, uint32_t vector_control)
 {
 	struct vm_pptdev_msix pptmsix;
 
 	bzero(&pptmsix, sizeof(pptmsix));
 	pptmsix.vcpu = vcpu;
 	pptmsix.bus = bus;
 	pptmsix.slot = slot;
 	pptmsix.func = func;
 	pptmsix.idx = idx;
 	pptmsix.msg = msg;
 	pptmsix.addr = addr;
 	pptmsix.vector_control = vector_control;
 
 	return ioctl(ctx->fd, VM_PPTDEV_MSIX, &pptmsix);
 }
 
 uint64_t *
 vm_get_stats(struct vmctx *ctx, int vcpu, struct timeval *ret_tv,
 	     int *ret_entries)
 {
 	int error;
 
 	static struct vm_stats vmstats;
 
 	vmstats.cpuid = vcpu;
 
 	error = ioctl(ctx->fd, VM_STATS, &vmstats);
 	if (error == 0) {
 		if (ret_entries)
 			*ret_entries = vmstats.num_entries;
 		if (ret_tv)
 			*ret_tv = vmstats.tv;
 		return (vmstats.statbuf);
 	} else
 		return (NULL);
 }
 
 const char *
 vm_get_stat_desc(struct vmctx *ctx, int index)
 {
 	static struct vm_stat_desc statdesc;
 
 	statdesc.index = index;
 	if (ioctl(ctx->fd, VM_STAT_DESC, &statdesc) == 0)
 		return (statdesc.desc);
 	else
 		return (NULL);
 }
 
 int
 vm_get_x2apic_state(struct vmctx *ctx, int vcpu, enum x2apic_state *state)
 {
 	int error;
 	struct vm_x2apic x2apic;
 
 	bzero(&x2apic, sizeof(x2apic));
 	x2apic.cpuid = vcpu;
 
 	error = ioctl(ctx->fd, VM_GET_X2APIC_STATE, &x2apic);
 	*state = x2apic.state;
 	return (error);
 }
 
 int
 vm_set_x2apic_state(struct vmctx *ctx, int vcpu, enum x2apic_state state)
 {
 	int error;
 	struct vm_x2apic x2apic;
 
 	bzero(&x2apic, sizeof(x2apic));
 	x2apic.cpuid = vcpu;
 	x2apic.state = state;
 
 	error = ioctl(ctx->fd, VM_SET_X2APIC_STATE, &x2apic);
 
 	return (error);
 }
 
 /*
  * From Intel Vol 3a:
  * Table 9-1. IA-32 Processor States Following Power-up, Reset or INIT
  */
 int
 vcpu_reset(struct vmctx *vmctx, int vcpu)
 {
 	int error;
 	uint64_t rflags, rip, cr0, cr4, zero, desc_base, rdx;
 	uint32_t desc_access, desc_limit;
 	uint16_t sel;
 
 	zero = 0;
 
 	rflags = 0x2;
 	error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RFLAGS, rflags);
 	if (error)
 		goto done;
 
 	rip = 0xfff0;
 	if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RIP, rip)) != 0)
 		goto done;
 
 	cr0 = CR0_NE;
 	if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_CR0, cr0)) != 0)
 		goto done;
 
 	if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_CR3, zero)) != 0)
 		goto done;
 	
 	cr4 = 0;
 	if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_CR4, cr4)) != 0)
 		goto done;
 
 	/*
 	 * CS: present, r/w, accessed, 16-bit, byte granularity, usable
 	 */
 	desc_base = 0xffff0000;
 	desc_limit = 0xffff;
 	desc_access = 0x0093;
 	error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_CS,
 			    desc_base, desc_limit, desc_access);
 	if (error)
 		goto done;
 
 	sel = 0xf000;
 	if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_CS, sel)) != 0)
 		goto done;
 
 	/*
 	 * SS,DS,ES,FS,GS: present, r/w, accessed, 16-bit, byte granularity
 	 */
 	desc_base = 0;
 	desc_limit = 0xffff;
 	desc_access = 0x0093;
 	error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_SS,
 			    desc_base, desc_limit, desc_access);
 	if (error)
 		goto done;
 
 	error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_DS,
 			    desc_base, desc_limit, desc_access);
 	if (error)
 		goto done;
 
 	error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_ES,
 			    desc_base, desc_limit, desc_access);
 	if (error)
 		goto done;
 
 	error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_FS,
 			    desc_base, desc_limit, desc_access);
 	if (error)
 		goto done;
 
 	error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_GS,
 			    desc_base, desc_limit, desc_access);
 	if (error)
 		goto done;
 
 	sel = 0;
 	if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_SS, sel)) != 0)
 		goto done;
 	if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_DS, sel)) != 0)
 		goto done;
 	if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_ES, sel)) != 0)
 		goto done;
 	if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_FS, sel)) != 0)
 		goto done;
 	if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_GS, sel)) != 0)
 		goto done;
 
 	/* General purpose registers */
 	rdx = 0xf00;
 	if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RAX, zero)) != 0)
 		goto done;
 	if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RBX, zero)) != 0)
 		goto done;
 	if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RCX, zero)) != 0)
 		goto done;
 	if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RDX, rdx)) != 0)
 		goto done;
 	if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RSI, zero)) != 0)
 		goto done;
 	if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RDI, zero)) != 0)
 		goto done;
 	if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RBP, zero)) != 0)
 		goto done;
 	if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RSP, zero)) != 0)
 		goto done;
 
 	/* GDTR, IDTR */
 	desc_base = 0;
 	desc_limit = 0xffff;
 	desc_access = 0;
 	error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_GDTR,
 			    desc_base, desc_limit, desc_access);
 	if (error != 0)
 		goto done;
 
 	error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_IDTR,
 			    desc_base, desc_limit, desc_access);
 	if (error != 0)
 		goto done;
 
 	/* TR */
 	desc_base = 0;
 	desc_limit = 0xffff;
 	desc_access = 0x0000008b;
 	error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_TR, 0, 0, desc_access);
 	if (error)
 		goto done;
 
 	sel = 0;
 	if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_TR, sel)) != 0)
 		goto done;
 
 	/* LDTR */
 	desc_base = 0;
 	desc_limit = 0xffff;
 	desc_access = 0x00000082;
 	error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_LDTR, desc_base,
 			    desc_limit, desc_access);
 	if (error)
 		goto done;
 
 	sel = 0;
 	if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_LDTR, 0)) != 0)
 		goto done;
 
 	/* XXX cr2, debug registers */
 
 	error = 0;
 done:
 	return (error);
 }
 
 int
 vm_get_gpa_pmap(struct vmctx *ctx, uint64_t gpa, uint64_t *pte, int *num)
 {
 	int error, i;
 	struct vm_gpa_pte gpapte;
 
 	bzero(&gpapte, sizeof(gpapte));
 	gpapte.gpa = gpa;
 
 	error = ioctl(ctx->fd, VM_GET_GPA_PMAP, &gpapte);
 
 	if (error == 0) {
 		*num = gpapte.ptenum;
 		for (i = 0; i < gpapte.ptenum; i++)
 			pte[i] = gpapte.pte[i];
 	}
 
 	return (error);
 }
 
 int
 vm_get_hpet_capabilities(struct vmctx *ctx, uint32_t *capabilities)
 {
 	int error;
 	struct vm_hpet_cap cap;
 
 	bzero(&cap, sizeof(struct vm_hpet_cap));
 	error = ioctl(ctx->fd, VM_GET_HPET_CAPABILITIES, &cap);
 	if (capabilities != NULL)
 		*capabilities = cap.capabilities;
 	return (error);
 }
 
 int
 vm_gla2gpa(struct vmctx *ctx, int vcpu, struct vm_guest_paging *paging,
     uint64_t gla, int prot, uint64_t *gpa, int *fault)
 {
 	struct vm_gla2gpa gg;
 	int error;
 
 	bzero(&gg, sizeof(struct vm_gla2gpa));
 	gg.vcpuid = vcpu;
 	gg.prot = prot;
 	gg.gla = gla;
 	gg.paging = *paging;
 
 	error = ioctl(ctx->fd, VM_GLA2GPA, &gg);
 	if (error == 0) {
 		*fault = gg.fault;
 		*gpa = gg.gpa;
 	}
 	return (error);
 }
 
 int
 vm_gla2gpa_nofault(struct vmctx *ctx, int vcpu, struct vm_guest_paging *paging,
     uint64_t gla, int prot, uint64_t *gpa, int *fault)
 {
 	struct vm_gla2gpa gg;
 	int error;
 
 	bzero(&gg, sizeof(struct vm_gla2gpa));
 	gg.vcpuid = vcpu;
 	gg.prot = prot;
 	gg.gla = gla;
 	gg.paging = *paging;
 
 	error = ioctl(ctx->fd, VM_GLA2GPA_NOFAULT, &gg);
 	if (error == 0) {
 		*fault = gg.fault;
 		*gpa = gg.gpa;
 	}
 	return (error);
 }
 
 #ifndef min
 #define	min(a,b)	(((a) < (b)) ? (a) : (b))
 #endif
 
 int
 vm_copy_setup(struct vmctx *ctx, int vcpu, struct vm_guest_paging *paging,
     uint64_t gla, size_t len, int prot, struct iovec *iov, int iovcnt,
     int *fault)
 {
 	void *va;
 	uint64_t gpa;
 	int error, i, n, off;
 
 	for (i = 0; i < iovcnt; i++) {
 		iov[i].iov_base = 0;
 		iov[i].iov_len = 0;
 	}
 
 	while (len) {
 		assert(iovcnt > 0);
 		error = vm_gla2gpa(ctx, vcpu, paging, gla, prot, &gpa, fault);
 		if (error || *fault)
 			return (error);
 
 		off = gpa & PAGE_MASK;
 		n = min(len, PAGE_SIZE - off);
 
 		va = vm_map_gpa(ctx, gpa, n);
 		if (va == NULL)
 			return (EFAULT);
 
 		iov->iov_base = va;
 		iov->iov_len = n;
 		iov++;
 		iovcnt--;
 
 		gla += n;
 		len -= n;
 	}
 	return (0);
 }
 
 void
 vm_copy_teardown(struct vmctx *ctx, int vcpu, struct iovec *iov, int iovcnt)
 {
 
 	return;
 }
 
 void
 vm_copyin(struct vmctx *ctx, int vcpu, struct iovec *iov, void *vp, size_t len)
 {
 	const char *src;
 	char *dst;
 	size_t n;
 
 	dst = vp;
 	while (len) {
 		assert(iov->iov_len);
 		n = min(len, iov->iov_len);
 		src = iov->iov_base;
 		bcopy(src, dst, n);
 
 		iov++;
 		dst += n;
 		len -= n;
 	}
 }
 
 void
 vm_copyout(struct vmctx *ctx, int vcpu, const void *vp, struct iovec *iov,
     size_t len)
 {
 	const char *src;
 	char *dst;
 	size_t n;
 
 	src = vp;
 	while (len) {
 		assert(iov->iov_len);
 		n = min(len, iov->iov_len);
 		dst = iov->iov_base;
 		bcopy(src, dst, n);
 
 		iov++;
 		src += n;
 		len -= n;
 	}
 }
 
 static int
 vm_get_cpus(struct vmctx *ctx, int which, cpuset_t *cpus)
 {
 	struct vm_cpuset vm_cpuset;
 	int error;
 
 	bzero(&vm_cpuset, sizeof(struct vm_cpuset));
 	vm_cpuset.which = which;
 	vm_cpuset.cpusetsize = sizeof(cpuset_t);
 	vm_cpuset.cpus = cpus;
 
 	error = ioctl(ctx->fd, VM_GET_CPUS, &vm_cpuset);
 	return (error);
 }
 
 int
 vm_active_cpus(struct vmctx *ctx, cpuset_t *cpus)
 {
 
 	return (vm_get_cpus(ctx, VM_ACTIVE_CPUS, cpus));
 }
 
 int
 vm_suspended_cpus(struct vmctx *ctx, cpuset_t *cpus)
 {
 
 	return (vm_get_cpus(ctx, VM_SUSPENDED_CPUS, cpus));
 }
 
 int
 vm_debug_cpus(struct vmctx *ctx, cpuset_t *cpus)
 {
 
 	return (vm_get_cpus(ctx, VM_DEBUG_CPUS, cpus));
 }
 
 int
 vm_activate_cpu(struct vmctx *ctx, int vcpu)
 {
 	struct vm_activate_cpu ac;
 	int error;
 
 	bzero(&ac, sizeof(struct vm_activate_cpu));
 	ac.vcpuid = vcpu;
 	error = ioctl(ctx->fd, VM_ACTIVATE_CPU, &ac);
 	return (error);
 }
 
 int
 vm_suspend_cpu(struct vmctx *ctx, int vcpu)
 {
 	struct vm_activate_cpu ac;
 	int error;
 
 	bzero(&ac, sizeof(struct vm_activate_cpu));
 	ac.vcpuid = vcpu;
 	error = ioctl(ctx->fd, VM_SUSPEND_CPU, &ac);
 	return (error);
 }
 
 int
 vm_resume_cpu(struct vmctx *ctx, int vcpu)
 {
 	struct vm_activate_cpu ac;
 	int error;
 
 	bzero(&ac, sizeof(struct vm_activate_cpu));
 	ac.vcpuid = vcpu;
 	error = ioctl(ctx->fd, VM_RESUME_CPU, &ac);
 	return (error);
 }
 
 int
 vm_get_intinfo(struct vmctx *ctx, int vcpu, uint64_t *info1, uint64_t *info2)
 {
 	struct vm_intinfo vmii;
 	int error;
 
 	bzero(&vmii, sizeof(struct vm_intinfo));
 	vmii.vcpuid = vcpu;
 	error = ioctl(ctx->fd, VM_GET_INTINFO, &vmii);
 	if (error == 0) {
 		*info1 = vmii.info1;
 		*info2 = vmii.info2;
 	}
 	return (error);
 }
 
 int
 vm_set_intinfo(struct vmctx *ctx, int vcpu, uint64_t info1)
 {
 	struct vm_intinfo vmii;
 	int error;
 
 	bzero(&vmii, sizeof(struct vm_intinfo));
 	vmii.vcpuid = vcpu;
 	vmii.info1 = info1;
 	error = ioctl(ctx->fd, VM_SET_INTINFO, &vmii);
 	return (error);
 }
 
 int
 vm_rtc_write(struct vmctx *ctx, int offset, uint8_t value)
 {
 	struct vm_rtc_data rtcdata;
 	int error;
 
 	bzero(&rtcdata, sizeof(struct vm_rtc_data));
 	rtcdata.offset = offset;
 	rtcdata.value = value;
 	error = ioctl(ctx->fd, VM_RTC_WRITE, &rtcdata);
 	return (error);
 }
 
 int
 vm_rtc_read(struct vmctx *ctx, int offset, uint8_t *retval)
 {
 	struct vm_rtc_data rtcdata;
 	int error;
 
 	bzero(&rtcdata, sizeof(struct vm_rtc_data));
 	rtcdata.offset = offset;
 	error = ioctl(ctx->fd, VM_RTC_READ, &rtcdata);
 	if (error == 0)
 		*retval = rtcdata.value;
 	return (error);
 }
 
 int
 vm_rtc_settime(struct vmctx *ctx, time_t secs)
 {
 	struct vm_rtc_time rtctime;
 	int error;
 
 	bzero(&rtctime, sizeof(struct vm_rtc_time));
 	rtctime.secs = secs;
 	error = ioctl(ctx->fd, VM_RTC_SETTIME, &rtctime);
 	return (error);
 }
 
 int
 vm_rtc_gettime(struct vmctx *ctx, time_t *secs)
 {
 	struct vm_rtc_time rtctime;
 	int error;
 
 	bzero(&rtctime, sizeof(struct vm_rtc_time));
 	error = ioctl(ctx->fd, VM_RTC_GETTIME, &rtctime);
 	if (error == 0)
 		*secs = rtctime.secs;
 	return (error);
 }
 
 int
 vm_restart_instruction(void *arg, int vcpu)
 {
 	struct vmctx *ctx = arg;
 
 	return (ioctl(ctx->fd, VM_RESTART_INSTRUCTION, &vcpu));
 }
 
 int
 vm_snapshot_req(struct vm_snapshot_meta *meta)
 {
 
 	if (ioctl(meta->ctx->fd, VM_SNAPSHOT_REQ, meta) == -1) {
 #ifdef SNAPSHOT_DEBUG
 		fprintf(stderr, "%s: snapshot failed for %s: %d\r\n",
 		    __func__, meta->dev_name, errno);
 #endif
 		return (-1);
 	}
 	return (0);
 }
 
 int
 vm_restore_time(struct vmctx *ctx)
 {
 	int dummy;
 
 	dummy = 0;
 	return (ioctl(ctx->fd, VM_RESTORE_TIME, &dummy));
 }
 
 int
 vm_set_topology(struct vmctx *ctx,
     uint16_t sockets, uint16_t cores, uint16_t threads, uint16_t maxcpus)
 {
 	struct vm_cpu_topology topology;
 
 	bzero(&topology, sizeof (struct vm_cpu_topology));
 	topology.sockets = sockets;
 	topology.cores = cores;
 	topology.threads = threads;
 	topology.maxcpus = maxcpus;
 	return (ioctl(ctx->fd, VM_SET_TOPOLOGY, &topology));
 }
 
 int
 vm_get_topology(struct vmctx *ctx,
     uint16_t *sockets, uint16_t *cores, uint16_t *threads, uint16_t *maxcpus)
 {
 	struct vm_cpu_topology topology;
 	int error;
 
 	bzero(&topology, sizeof (struct vm_cpu_topology));
 	error = ioctl(ctx->fd, VM_GET_TOPOLOGY, &topology);
 	if (error == 0) {
 		*sockets = topology.sockets;
 		*cores = topology.cores;
 		*threads = topology.threads;
 		*maxcpus = topology.maxcpus;
 	}
 	return (error);
 }
 
 int
 vm_get_device_fd(struct vmctx *ctx)
 {
 
 	return (ctx->fd);
 }
 
 const cap_ioctl_t *
 vm_get_ioctls(size_t *len)
 {
 	cap_ioctl_t *cmds;
 	/* keep in sync with machine/vmm_dev.h */
 	static const cap_ioctl_t vm_ioctl_cmds[] = { VM_RUN, VM_SUSPEND, VM_REINIT,
 	    VM_ALLOC_MEMSEG, VM_GET_MEMSEG, VM_MMAP_MEMSEG, VM_MMAP_MEMSEG,
 	    VM_MMAP_GETNEXT, VM_SET_REGISTER, VM_GET_REGISTER,
 	    VM_SET_SEGMENT_DESCRIPTOR, VM_GET_SEGMENT_DESCRIPTOR,
 	    VM_SET_REGISTER_SET, VM_GET_REGISTER_SET,
+	    VM_SET_KERNEMU_DEV, VM_GET_KERNEMU_DEV,
 	    VM_INJECT_EXCEPTION, VM_LAPIC_IRQ, VM_LAPIC_LOCAL_IRQ,
 	    VM_LAPIC_MSI, VM_IOAPIC_ASSERT_IRQ, VM_IOAPIC_DEASSERT_IRQ,
 	    VM_IOAPIC_PULSE_IRQ, VM_IOAPIC_PINCOUNT, VM_ISA_ASSERT_IRQ,
 	    VM_ISA_DEASSERT_IRQ, VM_ISA_PULSE_IRQ, VM_ISA_SET_IRQ_TRIGGER,
 	    VM_SET_CAPABILITY, VM_GET_CAPABILITY, VM_BIND_PPTDEV,
 	    VM_UNBIND_PPTDEV, VM_MAP_PPTDEV_MMIO, VM_PPTDEV_MSI,
 	    VM_PPTDEV_MSIX, VM_INJECT_NMI, VM_STATS, VM_STAT_DESC,
 	    VM_SET_X2APIC_STATE, VM_GET_X2APIC_STATE,
 	    VM_GET_HPET_CAPABILITIES, VM_GET_GPA_PMAP, VM_GLA2GPA,
 	    VM_GLA2GPA_NOFAULT,
 	    VM_ACTIVATE_CPU, VM_GET_CPUS, VM_SUSPEND_CPU, VM_RESUME_CPU,
 	    VM_SET_INTINFO, VM_GET_INTINFO,
 	    VM_RTC_WRITE, VM_RTC_READ, VM_RTC_SETTIME, VM_RTC_GETTIME,
 	    VM_RESTART_INSTRUCTION, VM_SET_TOPOLOGY, VM_GET_TOPOLOGY };
 
 	if (len == NULL) {
 		cmds = malloc(sizeof(vm_ioctl_cmds));
 		if (cmds == NULL)
 			return (NULL);
 		bcopy(vm_ioctl_cmds, cmds, sizeof(vm_ioctl_cmds));
 		return (cmds);
 	}
 
 	*len = nitems(vm_ioctl_cmds);
 	return (NULL);
 }
Index: head/lib/libvmmapi/vmmapi.h
===================================================================
--- head/lib/libvmmapi/vmmapi.h	(revision 361081)
+++ head/lib/libvmmapi/vmmapi.h	(revision 361082)
@@ -1,269 +1,273 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2011 NetApp, Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef _VMMAPI_H_
 #define	_VMMAPI_H_
 
 #include <sys/param.h>
 #include <sys/cpuset.h>
 #include <machine/vmm_dev.h>
 
+#include <stdbool.h>
+
 /*
  * API version for out-of-tree consumers like grub-bhyve for making compile
  * time decisions.
  */
 #define	VMMAPI_VERSION	0103	/* 2 digit major followed by 2 digit minor */
 
 struct iovec;
 struct vmctx;
 struct vm_snapshot_meta;
 enum x2apic_state;
 
 /*
  * Different styles of mapping the memory assigned to a VM into the address
  * space of the controlling process.
  */
 enum vm_mmap_style {
 	VM_MMAP_NONE,		/* no mapping */
 	VM_MMAP_ALL,		/* fully and statically mapped */
 	VM_MMAP_SPARSE,		/* mappings created on-demand */
 };
 
 /*
  * 'flags' value passed to 'vm_set_memflags()'.
  */
 #define	VM_MEM_F_INCORE	0x01	/* include guest memory in core file */
 #define	VM_MEM_F_WIRED	0x02	/* guest memory is wired */
 
 /*
  * Identifiers for memory segments:
  * - vm_setup_memory() uses VM_SYSMEM for the system memory segment.
  * - the remaining identifiers can be used to create devmem segments.
  */
 enum {
 	VM_SYSMEM,
 	VM_BOOTROM,
 	VM_FRAMEBUFFER,
 };
 
 /*
  * Get the length and name of the memory segment identified by 'segid'.
  * Note that system memory segments are identified with a nul name.
  *
  * Returns 0 on success and non-zero otherwise.
  */
 int	vm_get_memseg(struct vmctx *ctx, int ident, size_t *lenp, char *name,
 	    size_t namesiz);
 
 /*
  * Iterate over the guest address space. This function finds an address range
  * that starts at an address >= *gpa.
  *
  * Returns 0 if the next address range was found and non-zero otherwise.
  */
 int	vm_mmap_getnext(struct vmctx *ctx, vm_paddr_t *gpa, int *segid,
 	    vm_ooffset_t *segoff, size_t *len, int *prot, int *flags);
 
 int	vm_get_guestmem_from_ctx(struct vmctx *ctx, char **guest_baseaddr,
 				 size_t *lowmem_size, size_t *highmem_size);
 
 /*
  * Create a device memory segment identified by 'segid'.
  *
  * Returns a pointer to the memory segment on success and MAP_FAILED otherwise.
  */
 void	*vm_create_devmem(struct vmctx *ctx, int segid, const char *name,
 	    size_t len);
 
 /*
  * Map the memory segment identified by 'segid' into the guest address space
  * at [gpa,gpa+len) with protection 'prot'.
  */
 int	vm_mmap_memseg(struct vmctx *ctx, vm_paddr_t gpa, int segid,
 	    vm_ooffset_t segoff, size_t len, int prot);
 
 int	vm_create(const char *name);
 int	vm_get_device_fd(struct vmctx *ctx);
 struct vmctx *vm_open(const char *name);
 void	vm_destroy(struct vmctx *ctx);
 int	vm_parse_memsize(const char *optarg, size_t *memsize);
 int	vm_setup_memory(struct vmctx *ctx, size_t len, enum vm_mmap_style s);
 void	*vm_map_gpa(struct vmctx *ctx, vm_paddr_t gaddr, size_t len);
 /* inverse operation to vm_map_gpa - extract guest address from host pointer */
 vm_paddr_t vm_rev_map_gpa(struct vmctx *ctx, void *addr);
 int	vm_get_gpa_pmap(struct vmctx *, uint64_t gpa, uint64_t *pte, int *num);
 int	vm_gla2gpa(struct vmctx *, int vcpuid, struct vm_guest_paging *paging,
 		   uint64_t gla, int prot, uint64_t *gpa, int *fault);
 int	vm_gla2gpa_nofault(struct vmctx *, int vcpuid,
 		   struct vm_guest_paging *paging, uint64_t gla, int prot,
 		   uint64_t *gpa, int *fault);
 uint32_t vm_get_lowmem_limit(struct vmctx *ctx);
 void	vm_set_lowmem_limit(struct vmctx *ctx, uint32_t limit);
 void	vm_set_memflags(struct vmctx *ctx, int flags);
 int	vm_get_memflags(struct vmctx *ctx);
 int	vm_get_name(struct vmctx *ctx, char *buffer, size_t max_len);
 size_t	vm_get_lowmem_size(struct vmctx *ctx);
 size_t	vm_get_highmem_size(struct vmctx *ctx);
 int	vm_set_desc(struct vmctx *ctx, int vcpu, int reg,
 		    uint64_t base, uint32_t limit, uint32_t access);
 int	vm_get_desc(struct vmctx *ctx, int vcpu, int reg,
 		    uint64_t *base, uint32_t *limit, uint32_t *access);
 int	vm_get_seg_desc(struct vmctx *ctx, int vcpu, int reg,
 			struct seg_desc *seg_desc);
 int	vm_set_register(struct vmctx *ctx, int vcpu, int reg, uint64_t val);
 int	vm_get_register(struct vmctx *ctx, int vcpu, int reg, uint64_t *retval);
 int	vm_set_register_set(struct vmctx *ctx, int vcpu, unsigned int count,
     const int *regnums, uint64_t *regvals);
 int	vm_get_register_set(struct vmctx *ctx, int vcpu, unsigned int count,
     const int *regnums, uint64_t *regvals);
 int	vm_run(struct vmctx *ctx, int vcpu, struct vm_exit *ret_vmexit);
 int	vm_suspend(struct vmctx *ctx, enum vm_suspend_how how);
 int	vm_reinit(struct vmctx *ctx);
 int	vm_apicid2vcpu(struct vmctx *ctx, int apicid);
 int	vm_inject_exception(struct vmctx *ctx, int vcpu, int vector,
     int errcode_valid, uint32_t errcode, int restart_instruction);
 int	vm_lapic_irq(struct vmctx *ctx, int vcpu, int vector);
 int	vm_lapic_local_irq(struct vmctx *ctx, int vcpu, int vector);
 int	vm_lapic_msi(struct vmctx *ctx, uint64_t addr, uint64_t msg);
 int	vm_ioapic_assert_irq(struct vmctx *ctx, int irq);
 int	vm_ioapic_deassert_irq(struct vmctx *ctx, int irq);
 int	vm_ioapic_pulse_irq(struct vmctx *ctx, int irq);
 int	vm_ioapic_pincount(struct vmctx *ctx, int *pincount);
+int	vm_readwrite_kernemu_device(struct vmctx *ctx, int vcpu,
+	    vm_paddr_t gpa, bool write, int size, uint64_t *value);
 int	vm_isa_assert_irq(struct vmctx *ctx, int atpic_irq, int ioapic_irq);
 int	vm_isa_deassert_irq(struct vmctx *ctx, int atpic_irq, int ioapic_irq);
 int	vm_isa_pulse_irq(struct vmctx *ctx, int atpic_irq, int ioapic_irq);
 int	vm_isa_set_irq_trigger(struct vmctx *ctx, int atpic_irq,
 	    enum vm_intr_trigger trigger);
 int	vm_inject_nmi(struct vmctx *ctx, int vcpu);
 int	vm_capability_name2type(const char *capname);
 const char *vm_capability_type2name(int type);
 int	vm_get_capability(struct vmctx *ctx, int vcpu, enum vm_cap_type cap,
 			  int *retval);
 int	vm_set_capability(struct vmctx *ctx, int vcpu, enum vm_cap_type cap,
 			  int val);
 int	vm_assign_pptdev(struct vmctx *ctx, int bus, int slot, int func);
 int	vm_unassign_pptdev(struct vmctx *ctx, int bus, int slot, int func);
 int	vm_map_pptdev_mmio(struct vmctx *ctx, int bus, int slot, int func,
 			   vm_paddr_t gpa, size_t len, vm_paddr_t hpa);
 int	vm_setup_pptdev_msi(struct vmctx *ctx, int vcpu, int bus, int slot,
 	    int func, uint64_t addr, uint64_t msg, int numvec);
 int	vm_setup_pptdev_msix(struct vmctx *ctx, int vcpu, int bus, int slot,
 	    int func, int idx, uint64_t addr, uint64_t msg,
 	    uint32_t vector_control);
 
 int	vm_get_intinfo(struct vmctx *ctx, int vcpu, uint64_t *i1, uint64_t *i2);
 int	vm_set_intinfo(struct vmctx *ctx, int vcpu, uint64_t exit_intinfo);
 
 const cap_ioctl_t *vm_get_ioctls(size_t *len);
 
 /*
  * Return a pointer to the statistics buffer. Note that this is not MT-safe.
  */
 uint64_t *vm_get_stats(struct vmctx *ctx, int vcpu, struct timeval *ret_tv,
 		       int *ret_entries);
 const char *vm_get_stat_desc(struct vmctx *ctx, int index);
 
 int	vm_get_x2apic_state(struct vmctx *ctx, int vcpu, enum x2apic_state *s);
 int	vm_set_x2apic_state(struct vmctx *ctx, int vcpu, enum x2apic_state s);
 
 int	vm_get_hpet_capabilities(struct vmctx *ctx, uint32_t *capabilities);
 
 /*
  * Translate the GLA range [gla,gla+len) into GPA segments in 'iov'.
  * The 'iovcnt' should be big enough to accommodate all GPA segments.
  *
  * retval	fault		Interpretation
  *   0		  0		Success
  *   0		  1		An exception was injected into the guest
  * EFAULT	 N/A		Error
  */
 int	vm_copy_setup(struct vmctx *ctx, int vcpu, struct vm_guest_paging *pg,
 	    uint64_t gla, size_t len, int prot, struct iovec *iov, int iovcnt,
 	    int *fault);
 void	vm_copyin(struct vmctx *ctx, int vcpu, struct iovec *guest_iov,
 	    void *host_dst, size_t len);
 void	vm_copyout(struct vmctx *ctx, int vcpu, const void *host_src,
 	    struct iovec *guest_iov, size_t len);
 void	vm_copy_teardown(struct vmctx *ctx, int vcpu, struct iovec *iov,
 	    int iovcnt);
 
 /* RTC */
 int	vm_rtc_write(struct vmctx *ctx, int offset, uint8_t value);
 int	vm_rtc_read(struct vmctx *ctx, int offset, uint8_t *retval);
 int	vm_rtc_settime(struct vmctx *ctx, time_t secs);
 int	vm_rtc_gettime(struct vmctx *ctx, time_t *secs);
 
 /* Reset vcpu register state */
 int	vcpu_reset(struct vmctx *ctx, int vcpu);
 
 int	vm_active_cpus(struct vmctx *ctx, cpuset_t *cpus);
 int	vm_suspended_cpus(struct vmctx *ctx, cpuset_t *cpus);
 int	vm_debug_cpus(struct vmctx *ctx, cpuset_t *cpus);
 int	vm_activate_cpu(struct vmctx *ctx, int vcpu);
 int	vm_suspend_cpu(struct vmctx *ctx, int vcpu);
 int	vm_resume_cpu(struct vmctx *ctx, int vcpu);
 
 /* CPU topology */
 int	vm_set_topology(struct vmctx *ctx, uint16_t sockets, uint16_t cores,
 	    uint16_t threads, uint16_t maxcpus);
 int	vm_get_topology(struct vmctx *ctx, uint16_t *sockets, uint16_t *cores,
 	    uint16_t *threads, uint16_t *maxcpus);
 
 /*
  * FreeBSD specific APIs
  */
 int	vm_setup_freebsd_registers(struct vmctx *ctx, int vcpu,
 				uint64_t rip, uint64_t cr3, uint64_t gdtbase,
 				uint64_t rsp);
 int	vm_setup_freebsd_registers_i386(struct vmctx *vmctx, int vcpu,
 					uint32_t eip, uint32_t gdtbase,
 					uint32_t esp);
 void	vm_setup_freebsd_gdt(uint64_t *gdtr);
 
 /*
  * Save and restore
  */
 
 #define MAX_SNAPSHOT_VMNAME 100
 
 enum checkpoint_opcodes {
 	START_CHECKPOINT = 0,
 	START_SUSPEND = 1,
 };
 
 struct checkpoint_op {
 	unsigned int op;
 	char snapshot_filename[MAX_SNAPSHOT_VMNAME];
 };
 
 int	vm_snapshot_req(struct vm_snapshot_meta *meta);
 int	vm_restore_time(struct vmctx *ctx);
 
 #endif	/* _VMMAPI_H_ */
Index: head/sys/amd64/include/vmm_dev.h
===================================================================
--- head/sys/amd64/include/vmm_dev.h	(revision 361081)
+++ head/sys/amd64/include/vmm_dev.h	(revision 361082)
@@ -1,436 +1,453 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2011 NetApp, Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef	_VMM_DEV_H_
 #define	_VMM_DEV_H_
 
 struct vm_snapshot_meta;
 
 #ifdef _KERNEL
 void	vmmdev_init(void);
 int	vmmdev_cleanup(void);
 #endif
 
 struct vm_memmap {
 	vm_paddr_t	gpa;
 	int		segid;		/* memory segment */
 	vm_ooffset_t	segoff;		/* offset into memory segment */
 	size_t		len;		/* mmap length */
 	int		prot;		/* RWX */
 	int		flags;
 };
 #define	VM_MEMMAP_F_WIRED	0x01
 #define	VM_MEMMAP_F_IOMMU	0x02
 
 #define	VM_MEMSEG_NAME(m)	((m)->name[0] != '\0' ? (m)->name : NULL)
 struct vm_memseg {
 	int		segid;
 	size_t		len;
 	char		name[VM_MAX_SUFFIXLEN + 1];
 };
 
 struct vm_register {
 	int		cpuid;
 	int		regnum;		/* enum vm_reg_name */
 	uint64_t	regval;
 };
 
 struct vm_seg_desc {			/* data or code segment */
 	int		cpuid;
 	int		regnum;		/* enum vm_reg_name */
 	struct seg_desc desc;
 };
 
 struct vm_register_set {
 	int		cpuid;
 	unsigned int	count;
 	const int	*regnums;	/* enum vm_reg_name */
 	uint64_t	*regvals;
 };
 
 struct vm_run {
 	int		cpuid;
 	struct vm_exit	vm_exit;
 };
 
 struct vm_exception {
 	int		cpuid;
 	int		vector;
 	uint32_t	error_code;
 	int		error_code_valid;
 	int		restart_instruction;
 };
 
 struct vm_lapic_msi {
 	uint64_t	msg;
 	uint64_t	addr;
 };
 
 struct vm_lapic_irq {
 	int		cpuid;
 	int		vector;
 };
 
 struct vm_ioapic_irq {
 	int		irq;
 };
 
 struct vm_isa_irq {
 	int		atpic_irq;
 	int		ioapic_irq;
 };
 
 struct vm_isa_irq_trigger {
 	int		atpic_irq;
 	enum vm_intr_trigger trigger;
 };
 
 struct vm_capability {
 	int		cpuid;
 	enum vm_cap_type captype;
 	int		capval;
 	int		allcpus;
 };
 
 struct vm_pptdev {
 	int		bus;
 	int		slot;
 	int		func;
 };
 
 struct vm_pptdev_mmio {
 	int		bus;
 	int		slot;
 	int		func;
 	vm_paddr_t	gpa;
 	vm_paddr_t	hpa;
 	size_t		len;
 };
 
 struct vm_pptdev_msi {
 	int		vcpu;
 	int		bus;
 	int		slot;
 	int		func;
 	int		numvec;		/* 0 means disabled */
 	uint64_t	msg;
 	uint64_t	addr;
 };
 
 struct vm_pptdev_msix {
 	int		vcpu;
 	int		bus;
 	int		slot;
 	int		func;
 	int		idx;
 	uint64_t	msg;
 	uint32_t	vector_control;
 	uint64_t	addr;
 };
 
 struct vm_nmi {
 	int		cpuid;
 };
 
 #define	MAX_VM_STATS	64
 struct vm_stats {
 	int		cpuid;				/* in */
 	int		num_entries;			/* out */
 	struct timeval	tv;
 	uint64_t	statbuf[MAX_VM_STATS];
 };
 
 struct vm_stat_desc {
 	int		index;				/* in */
 	char		desc[128];			/* out */
 };
 
 struct vm_x2apic {
 	int			cpuid;
 	enum x2apic_state	state;
 };
 
 struct vm_gpa_pte {
 	uint64_t	gpa;				/* in */
 	uint64_t	pte[4];				/* out */
 	int		ptenum;
 };
 
 struct vm_hpet_cap {
 	uint32_t	capabilities;	/* lower 32 bits of HPET capabilities */
 };
 
 struct vm_suspend {
 	enum vm_suspend_how how;
 };
 
 struct vm_gla2gpa {
 	int		vcpuid;		/* inputs */
 	int 		prot;		/* PROT_READ or PROT_WRITE */
 	uint64_t	gla;
 	struct vm_guest_paging paging;
 	int		fault;		/* outputs */
 	uint64_t	gpa;
 };
 
 struct vm_activate_cpu {
 	int		vcpuid;
 };
 
 struct vm_cpuset {
 	int		which;
 	int		cpusetsize;
 	cpuset_t	*cpus;
 };
 #define	VM_ACTIVE_CPUS		0
 #define	VM_SUSPENDED_CPUS	1
 #define	VM_DEBUG_CPUS		2
 
 struct vm_intinfo {
 	int		vcpuid;
 	uint64_t	info1;
 	uint64_t	info2;
 };
 
 struct vm_rtc_time {
 	time_t		secs;
 };
 
 struct vm_rtc_data {
 	int		offset;
 	uint8_t		value;
 };
 
 struct vm_cpu_topology {
 	uint16_t	sockets;
 	uint16_t	cores;
 	uint16_t	threads;
 	uint16_t	maxcpus;
 };
 
+struct vm_readwrite_kernemu_device {
+	int		vcpuid;
+	unsigned	access_width : 3;
+	unsigned	_unused : 29;
+	uint64_t	gpa;
+	uint64_t	value;
+};
+_Static_assert(sizeof(struct vm_readwrite_kernemu_device) == 24, "ABI");
+
 enum {
 	/* general routines */
 	IOCNUM_ABIVERS = 0,
 	IOCNUM_RUN = 1,
 	IOCNUM_SET_CAPABILITY = 2,
 	IOCNUM_GET_CAPABILITY = 3,
 	IOCNUM_SUSPEND = 4,
 	IOCNUM_REINIT = 5,
 
 	/* memory apis */
 	IOCNUM_MAP_MEMORY = 10,			/* deprecated */
 	IOCNUM_GET_MEMORY_SEG = 11,		/* deprecated */
 	IOCNUM_GET_GPA_PMAP = 12,
 	IOCNUM_GLA2GPA = 13,
 	IOCNUM_ALLOC_MEMSEG = 14,
 	IOCNUM_GET_MEMSEG = 15,
 	IOCNUM_MMAP_MEMSEG = 16,
 	IOCNUM_MMAP_GETNEXT = 17,
 	IOCNUM_GLA2GPA_NOFAULT = 18,
 
 	/* register/state accessors */
 	IOCNUM_SET_REGISTER = 20,
 	IOCNUM_GET_REGISTER = 21,
 	IOCNUM_SET_SEGMENT_DESCRIPTOR = 22,
 	IOCNUM_GET_SEGMENT_DESCRIPTOR = 23,
 	IOCNUM_SET_REGISTER_SET = 24,
 	IOCNUM_GET_REGISTER_SET = 25,
+	IOCNUM_GET_KERNEMU_DEV = 26,
+	IOCNUM_SET_KERNEMU_DEV = 27,
 
 	/* interrupt injection */
 	IOCNUM_GET_INTINFO = 28,
 	IOCNUM_SET_INTINFO = 29,
 	IOCNUM_INJECT_EXCEPTION = 30,
 	IOCNUM_LAPIC_IRQ = 31,
 	IOCNUM_INJECT_NMI = 32,
 	IOCNUM_IOAPIC_ASSERT_IRQ = 33,
 	IOCNUM_IOAPIC_DEASSERT_IRQ = 34,
 	IOCNUM_IOAPIC_PULSE_IRQ = 35,
 	IOCNUM_LAPIC_MSI = 36,
 	IOCNUM_LAPIC_LOCAL_IRQ = 37,
 	IOCNUM_IOAPIC_PINCOUNT = 38,
 	IOCNUM_RESTART_INSTRUCTION = 39,
 
 	/* PCI pass-thru */
 	IOCNUM_BIND_PPTDEV = 40,
 	IOCNUM_UNBIND_PPTDEV = 41,
 	IOCNUM_MAP_PPTDEV_MMIO = 42,
 	IOCNUM_PPTDEV_MSI = 43,
 	IOCNUM_PPTDEV_MSIX = 44,
 
 	/* statistics */
 	IOCNUM_VM_STATS = 50, 
 	IOCNUM_VM_STAT_DESC = 51,
 
 	/* kernel device state */
 	IOCNUM_SET_X2APIC_STATE = 60,
 	IOCNUM_GET_X2APIC_STATE = 61,
 	IOCNUM_GET_HPET_CAPABILITIES = 62,
 
 	/* CPU Topology */
 	IOCNUM_SET_TOPOLOGY = 63,
 	IOCNUM_GET_TOPOLOGY = 64,
 
 	/* legacy interrupt injection */
 	IOCNUM_ISA_ASSERT_IRQ = 80,
 	IOCNUM_ISA_DEASSERT_IRQ = 81,
 	IOCNUM_ISA_PULSE_IRQ = 82,
 	IOCNUM_ISA_SET_IRQ_TRIGGER = 83,
 
 	/* vm_cpuset */
 	IOCNUM_ACTIVATE_CPU = 90,
 	IOCNUM_GET_CPUSET = 91,
 	IOCNUM_SUSPEND_CPU = 92,
 	IOCNUM_RESUME_CPU = 93,
 
 	/* RTC */
 	IOCNUM_RTC_READ = 100,
 	IOCNUM_RTC_WRITE = 101,
 	IOCNUM_RTC_SETTIME = 102,
 	IOCNUM_RTC_GETTIME = 103,
 
 	/* checkpoint */
 	IOCNUM_SNAPSHOT_REQ = 113,
 
 	IOCNUM_RESTORE_TIME = 115
 };
 
 #define	VM_RUN		\
 	_IOWR('v', IOCNUM_RUN, struct vm_run)
 #define	VM_SUSPEND	\
 	_IOW('v', IOCNUM_SUSPEND, struct vm_suspend)
 #define	VM_REINIT	\
 	_IO('v', IOCNUM_REINIT)
 #define	VM_ALLOC_MEMSEG	\
 	_IOW('v', IOCNUM_ALLOC_MEMSEG, struct vm_memseg)
 #define	VM_GET_MEMSEG	\
 	_IOWR('v', IOCNUM_GET_MEMSEG, struct vm_memseg)
 #define	VM_MMAP_MEMSEG	\
 	_IOW('v', IOCNUM_MMAP_MEMSEG, struct vm_memmap)
 #define	VM_MMAP_GETNEXT	\
 	_IOWR('v', IOCNUM_MMAP_GETNEXT, struct vm_memmap)
 #define	VM_SET_REGISTER \
 	_IOW('v', IOCNUM_SET_REGISTER, struct vm_register)
 #define	VM_GET_REGISTER \
 	_IOWR('v', IOCNUM_GET_REGISTER, struct vm_register)
 #define	VM_SET_SEGMENT_DESCRIPTOR \
 	_IOW('v', IOCNUM_SET_SEGMENT_DESCRIPTOR, struct vm_seg_desc)
 #define	VM_GET_SEGMENT_DESCRIPTOR \
 	_IOWR('v', IOCNUM_GET_SEGMENT_DESCRIPTOR, struct vm_seg_desc)
 #define	VM_SET_REGISTER_SET \
 	_IOW('v', IOCNUM_SET_REGISTER_SET, struct vm_register_set)
 #define	VM_GET_REGISTER_SET \
 	_IOWR('v', IOCNUM_GET_REGISTER_SET, struct vm_register_set)
+#define	VM_SET_KERNEMU_DEV \
+	_IOW('v', IOCNUM_SET_KERNEMU_DEV, \
+	    struct vm_readwrite_kernemu_device)
+#define	VM_GET_KERNEMU_DEV \
+	_IOWR('v', IOCNUM_GET_KERNEMU_DEV, \
+	    struct vm_readwrite_kernemu_device)
 #define	VM_INJECT_EXCEPTION	\
 	_IOW('v', IOCNUM_INJECT_EXCEPTION, struct vm_exception)
 #define	VM_LAPIC_IRQ 		\
 	_IOW('v', IOCNUM_LAPIC_IRQ, struct vm_lapic_irq)
 #define	VM_LAPIC_LOCAL_IRQ 	\
 	_IOW('v', IOCNUM_LAPIC_LOCAL_IRQ, struct vm_lapic_irq)
 #define	VM_LAPIC_MSI		\
 	_IOW('v', IOCNUM_LAPIC_MSI, struct vm_lapic_msi)
 #define	VM_IOAPIC_ASSERT_IRQ	\
 	_IOW('v', IOCNUM_IOAPIC_ASSERT_IRQ, struct vm_ioapic_irq)
 #define	VM_IOAPIC_DEASSERT_IRQ	\
 	_IOW('v', IOCNUM_IOAPIC_DEASSERT_IRQ, struct vm_ioapic_irq)
 #define	VM_IOAPIC_PULSE_IRQ	\
 	_IOW('v', IOCNUM_IOAPIC_PULSE_IRQ, struct vm_ioapic_irq)
 #define	VM_IOAPIC_PINCOUNT	\
 	_IOR('v', IOCNUM_IOAPIC_PINCOUNT, int)
 #define	VM_ISA_ASSERT_IRQ	\
 	_IOW('v', IOCNUM_ISA_ASSERT_IRQ, struct vm_isa_irq)
 #define	VM_ISA_DEASSERT_IRQ	\
 	_IOW('v', IOCNUM_ISA_DEASSERT_IRQ, struct vm_isa_irq)
 #define	VM_ISA_PULSE_IRQ	\
 	_IOW('v', IOCNUM_ISA_PULSE_IRQ, struct vm_isa_irq)
 #define	VM_ISA_SET_IRQ_TRIGGER	\
 	_IOW('v', IOCNUM_ISA_SET_IRQ_TRIGGER, struct vm_isa_irq_trigger)
 #define	VM_SET_CAPABILITY \
 	_IOW('v', IOCNUM_SET_CAPABILITY, struct vm_capability)
 #define	VM_GET_CAPABILITY \
 	_IOWR('v', IOCNUM_GET_CAPABILITY, struct vm_capability)
 #define	VM_BIND_PPTDEV \
 	_IOW('v', IOCNUM_BIND_PPTDEV, struct vm_pptdev)
 #define	VM_UNBIND_PPTDEV \
 	_IOW('v', IOCNUM_UNBIND_PPTDEV, struct vm_pptdev)
 #define	VM_MAP_PPTDEV_MMIO \
 	_IOW('v', IOCNUM_MAP_PPTDEV_MMIO, struct vm_pptdev_mmio)
 #define	VM_PPTDEV_MSI \
 	_IOW('v', IOCNUM_PPTDEV_MSI, struct vm_pptdev_msi)
 #define	VM_PPTDEV_MSIX \
 	_IOW('v', IOCNUM_PPTDEV_MSIX, struct vm_pptdev_msix)
 #define VM_INJECT_NMI \
 	_IOW('v', IOCNUM_INJECT_NMI, struct vm_nmi)
 #define	VM_STATS \
 	_IOWR('v', IOCNUM_VM_STATS, struct vm_stats)
 #define	VM_STAT_DESC \
 	_IOWR('v', IOCNUM_VM_STAT_DESC, struct vm_stat_desc)
 #define	VM_SET_X2APIC_STATE \
 	_IOW('v', IOCNUM_SET_X2APIC_STATE, struct vm_x2apic)
 #define	VM_GET_X2APIC_STATE \
 	_IOWR('v', IOCNUM_GET_X2APIC_STATE, struct vm_x2apic)
 #define	VM_GET_HPET_CAPABILITIES \
 	_IOR('v', IOCNUM_GET_HPET_CAPABILITIES, struct vm_hpet_cap)
 #define VM_SET_TOPOLOGY \
 	_IOW('v', IOCNUM_SET_TOPOLOGY, struct vm_cpu_topology)
 #define VM_GET_TOPOLOGY \
 	_IOR('v', IOCNUM_GET_TOPOLOGY, struct vm_cpu_topology)
 #define	VM_GET_GPA_PMAP \
 	_IOWR('v', IOCNUM_GET_GPA_PMAP, struct vm_gpa_pte)
 #define	VM_GLA2GPA	\
 	_IOWR('v', IOCNUM_GLA2GPA, struct vm_gla2gpa)
 #define	VM_GLA2GPA_NOFAULT \
 	_IOWR('v', IOCNUM_GLA2GPA_NOFAULT, struct vm_gla2gpa)
 #define	VM_ACTIVATE_CPU	\
 	_IOW('v', IOCNUM_ACTIVATE_CPU, struct vm_activate_cpu)
 #define	VM_GET_CPUS	\
 	_IOW('v', IOCNUM_GET_CPUSET, struct vm_cpuset)
 #define	VM_SUSPEND_CPU \
 	_IOW('v', IOCNUM_SUSPEND_CPU, struct vm_activate_cpu)
 #define	VM_RESUME_CPU \
 	_IOW('v', IOCNUM_RESUME_CPU, struct vm_activate_cpu)
 #define	VM_SET_INTINFO	\
 	_IOW('v', IOCNUM_SET_INTINFO, struct vm_intinfo)
 #define	VM_GET_INTINFO	\
 	_IOWR('v', IOCNUM_GET_INTINFO, struct vm_intinfo)
 #define VM_RTC_WRITE \
 	_IOW('v', IOCNUM_RTC_WRITE, struct vm_rtc_data)
 #define VM_RTC_READ \
 	_IOWR('v', IOCNUM_RTC_READ, struct vm_rtc_data)
 #define VM_RTC_SETTIME	\
 	_IOW('v', IOCNUM_RTC_SETTIME, struct vm_rtc_time)
 #define VM_RTC_GETTIME	\
 	_IOR('v', IOCNUM_RTC_GETTIME, struct vm_rtc_time)
 #define	VM_RESTART_INSTRUCTION \
 	_IOW('v', IOCNUM_RESTART_INSTRUCTION, int)
 #define VM_SNAPSHOT_REQ \
 	_IOWR('v', IOCNUM_SNAPSHOT_REQ, struct vm_snapshot_meta)
 #define VM_RESTORE_TIME \
 	_IOWR('v', IOCNUM_RESTORE_TIME, int)
 #endif
Index: head/sys/amd64/vmm/vmm_dev.c
===================================================================
--- head/sys/amd64/vmm/vmm_dev.c	(revision 361081)
+++ head/sys/amd64/vmm/vmm_dev.c	(revision 361082)
@@ -1,1182 +1,1219 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2011 NetApp, Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_bhyve_snapshot.h"
 
 #include <sys/param.h>
 #include <sys/kernel.h>
 #include <sys/jail.h>
 #include <sys/queue.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/malloc.h>
 #include <sys/conf.h>
 #include <sys/sysctl.h>
 #include <sys/libkern.h>
 #include <sys/ioccom.h>
 #include <sys/mman.h>
 #include <sys/uio.h>
 #include <sys/proc.h>
 
 #include <vm/vm.h>
 #include <vm/pmap.h>
 #include <vm/vm_map.h>
 #include <vm/vm_object.h>
 
 #include <machine/vmparam.h>
 #include <machine/vmm.h>
 #include <machine/vmm_dev.h>
 #include <machine/vmm_instruction_emul.h>
 #include <machine/vmm_snapshot.h>
+#include <x86/apicreg.h>
 
 #include "vmm_lapic.h"
 #include "vmm_stat.h"
 #include "vmm_mem.h"
 #include "io/ppt.h"
 #include "io/vatpic.h"
 #include "io/vioapic.h"
 #include "io/vhpet.h"
 #include "io/vrtc.h"
 
 struct devmem_softc {
 	int	segid;
 	char	*name;
 	struct cdev *cdev;
 	struct vmmdev_softc *sc;
 	SLIST_ENTRY(devmem_softc) link;
 };
 
 struct vmmdev_softc {
 	struct vm	*vm;		/* vm instance cookie */
 	struct cdev	*cdev;
 	SLIST_ENTRY(vmmdev_softc) link;
 	SLIST_HEAD(, devmem_softc) devmem;
 	int		flags;
 };
 #define	VSC_LINKED		0x01
 
 static SLIST_HEAD(, vmmdev_softc) head;
 
 static unsigned pr_allow_flag;
 static struct mtx vmmdev_mtx;
 
 static MALLOC_DEFINE(M_VMMDEV, "vmmdev", "vmmdev");
 
 SYSCTL_DECL(_hw_vmm);
 
 static int vmm_priv_check(struct ucred *ucred);
 static int devmem_create_cdev(const char *vmname, int id, char *devmem);
 static void devmem_destroy(void *arg);
 
 static int
 vmm_priv_check(struct ucred *ucred)
 {
 
 	if (jailed(ucred) &&
 	    !(ucred->cr_prison->pr_allow & pr_allow_flag))
 		return (EPERM);
 
 	return (0);
 }
 
 static int
 vcpu_lock_one(struct vmmdev_softc *sc, int vcpu)
 {
 	int error;
 
 	if (vcpu < 0 || vcpu >= vm_get_maxcpus(sc->vm))
 		return (EINVAL);
 
 	error = vcpu_set_state(sc->vm, vcpu, VCPU_FROZEN, true);
 	return (error);
 }
 
 static void
 vcpu_unlock_one(struct vmmdev_softc *sc, int vcpu)
 {
 	enum vcpu_state state;
 
 	state = vcpu_get_state(sc->vm, vcpu, NULL);
 	if (state != VCPU_FROZEN) {
 		panic("vcpu %s(%d) has invalid state %d", vm_name(sc->vm),
 		    vcpu, state);
 	}
 
 	vcpu_set_state(sc->vm, vcpu, VCPU_IDLE, false);
 }
 
 static int
 vcpu_lock_all(struct vmmdev_softc *sc)
 {
 	int error, vcpu;
 	uint16_t maxcpus;
 
 	maxcpus = vm_get_maxcpus(sc->vm);
 	for (vcpu = 0; vcpu < maxcpus; vcpu++) {
 		error = vcpu_lock_one(sc, vcpu);
 		if (error)
 			break;
 	}
 
 	if (error) {
 		while (--vcpu >= 0)
 			vcpu_unlock_one(sc, vcpu);
 	}
 
 	return (error);
 }
 
 static void
 vcpu_unlock_all(struct vmmdev_softc *sc)
 {
 	int vcpu;
 	uint16_t maxcpus;
 
 	maxcpus = vm_get_maxcpus(sc->vm);
 	for (vcpu = 0; vcpu < maxcpus; vcpu++)
 		vcpu_unlock_one(sc, vcpu);
 }
 
 static struct vmmdev_softc *
 vmmdev_lookup(const char *name)
 {
 	struct vmmdev_softc *sc;
 
 #ifdef notyet	/* XXX kernel is not compiled with invariants */
 	mtx_assert(&vmmdev_mtx, MA_OWNED);
 #endif
 
 	SLIST_FOREACH(sc, &head, link) {
 		if (strcmp(name, vm_name(sc->vm)) == 0)
 			break;
 	}
 
 	return (sc);
 }
 
 static struct vmmdev_softc *
 vmmdev_lookup2(struct cdev *cdev)
 {
 
 	return (cdev->si_drv1);
 }
 
 static int
 vmmdev_rw(struct cdev *cdev, struct uio *uio, int flags)
 {
 	int error, off, c, prot;
 	vm_paddr_t gpa, maxaddr;
 	void *hpa, *cookie;
 	struct vmmdev_softc *sc;
 	uint16_t lastcpu;
 
 	error = vmm_priv_check(curthread->td_ucred);
 	if (error)
 		return (error);
 
 	sc = vmmdev_lookup2(cdev);
 	if (sc == NULL)
 		return (ENXIO);
 
 	/*
 	 * Get a read lock on the guest memory map by freezing any vcpu.
 	 */
 	lastcpu = vm_get_maxcpus(sc->vm) - 1;
 	error = vcpu_lock_one(sc, lastcpu);
 	if (error)
 		return (error);
 
 	prot = (uio->uio_rw == UIO_WRITE ? VM_PROT_WRITE : VM_PROT_READ);
 	maxaddr = vmm_sysmem_maxaddr(sc->vm);
 	while (uio->uio_resid > 0 && error == 0) {
 		gpa = uio->uio_offset;
 		off = gpa & PAGE_MASK;
 		c = min(uio->uio_resid, PAGE_SIZE - off);
 
 		/*
 		 * The VM has a hole in its physical memory map. If we want to
 		 * use 'dd' to inspect memory beyond the hole we need to
 		 * provide bogus data for memory that lies in the hole.
 		 *
 		 * Since this device does not support lseek(2), dd(1) will
 		 * read(2) blocks of data to simulate the lseek(2).
 		 */
 		hpa = vm_gpa_hold(sc->vm, lastcpu, gpa, c,
 		    prot, &cookie);
 		if (hpa == NULL) {
 			if (uio->uio_rw == UIO_READ && gpa < maxaddr)
 				error = uiomove(__DECONST(void *, zero_region),
 				    c, uio);
 			else
 				error = EFAULT;
 		} else {
 			error = uiomove(hpa, c, uio);
 			vm_gpa_release(cookie);
 		}
 	}
 	vcpu_unlock_one(sc, lastcpu);
 	return (error);
 }
 
 CTASSERT(sizeof(((struct vm_memseg *)0)->name) >= VM_MAX_SUFFIXLEN + 1);
 
 static int
 get_memseg(struct vmmdev_softc *sc, struct vm_memseg *mseg)
 {
 	struct devmem_softc *dsc;
 	int error;
 	bool sysmem;
 
 	error = vm_get_memseg(sc->vm, mseg->segid, &mseg->len, &sysmem, NULL);
 	if (error || mseg->len == 0)
 		return (error);
 
 	if (!sysmem) {
 		SLIST_FOREACH(dsc, &sc->devmem, link) {
 			if (dsc->segid == mseg->segid)
 				break;
 		}
 		KASSERT(dsc != NULL, ("%s: devmem segment %d not found",
 		    __func__, mseg->segid));
 		error = copystr(dsc->name, mseg->name, sizeof(mseg->name),
 		    NULL);
 	} else {
 		bzero(mseg->name, sizeof(mseg->name));
 	}
 
 	return (error);
 }
 
 static int
 alloc_memseg(struct vmmdev_softc *sc, struct vm_memseg *mseg)
 {
 	char *name;
 	int error;
 	bool sysmem;
 
 	error = 0;
 	name = NULL;
 	sysmem = true;
 
 	/*
 	 * The allocation is lengthened by 1 to hold a terminating NUL.  It'll
 	 * by stripped off when devfs processes the full string.
 	 */
 	if (VM_MEMSEG_NAME(mseg)) {
 		sysmem = false;
 		name = malloc(sizeof(mseg->name), M_VMMDEV, M_WAITOK);
 		error = copystr(mseg->name, name, sizeof(mseg->name), NULL);
 		if (error)
 			goto done;
 	}
 
 	error = vm_alloc_memseg(sc->vm, mseg->segid, mseg->len, sysmem);
 	if (error)
 		goto done;
 
 	if (VM_MEMSEG_NAME(mseg)) {
 		error = devmem_create_cdev(vm_name(sc->vm), mseg->segid, name);
 		if (error)
 			vm_free_memseg(sc->vm, mseg->segid);
 		else
 			name = NULL;	/* freed when 'cdev' is destroyed */
 	}
 done:
 	free(name, M_VMMDEV);
 	return (error);
 }
 
 static int
 vm_get_register_set(struct vm *vm, int vcpu, unsigned int count, int *regnum,
     uint64_t *regval)
 {
 	int error, i;
 
 	error = 0;
 	for (i = 0; i < count; i++) {
 		error = vm_get_register(vm, vcpu, regnum[i], &regval[i]);
 		if (error)
 			break;
 	}
 	return (error);
 }
 
 static int
 vm_set_register_set(struct vm *vm, int vcpu, unsigned int count, int *regnum,
     uint64_t *regval)
 {
 	int error, i;
 
 	error = 0;
 	for (i = 0; i < count; i++) {
 		error = vm_set_register(vm, vcpu, regnum[i], regval[i]);
 		if (error)
 			break;
 	}
 	return (error);
 }
 
 static int
 vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag,
 	     struct thread *td)
 {
 	int error, vcpu, state_changed, size;
 	cpuset_t *cpuset;
 	struct vmmdev_softc *sc;
 	struct vm_register *vmreg;
 	struct vm_seg_desc *vmsegdesc;
 	struct vm_register_set *vmregset;
 	struct vm_run *vmrun;
 	struct vm_exception *vmexc;
 	struct vm_lapic_irq *vmirq;
 	struct vm_lapic_msi *vmmsi;
 	struct vm_ioapic_irq *ioapic_irq;
 	struct vm_isa_irq *isa_irq;
 	struct vm_isa_irq_trigger *isa_irq_trigger;
 	struct vm_capability *vmcap;
 	struct vm_pptdev *pptdev;
 	struct vm_pptdev_mmio *pptmmio;
 	struct vm_pptdev_msi *pptmsi;
 	struct vm_pptdev_msix *pptmsix;
 	struct vm_nmi *vmnmi;
 	struct vm_stats *vmstats;
 	struct vm_stat_desc *statdesc;
 	struct vm_x2apic *x2apic;
 	struct vm_gpa_pte *gpapte;
 	struct vm_suspend *vmsuspend;
 	struct vm_gla2gpa *gg;
 	struct vm_activate_cpu *vac;
 	struct vm_cpuset *vm_cpuset;
 	struct vm_intinfo *vmii;
 	struct vm_rtc_time *rtctime;
 	struct vm_rtc_data *rtcdata;
 	struct vm_memmap *mm;
 	struct vm_cpu_topology *topology;
+	struct vm_readwrite_kernemu_device *kernemu;
 	uint64_t *regvals;
 	int *regnums;
 #ifdef BHYVE_SNAPSHOT
 	struct vm_snapshot_meta *snapshot_meta;
 #endif
 
 	error = vmm_priv_check(curthread->td_ucred);
 	if (error)
 		return (error);
 
 	sc = vmmdev_lookup2(cdev);
 	if (sc == NULL)
 		return (ENXIO);
 
 	vcpu = -1;
 	state_changed = 0;
 
 	/*
 	 * Some VMM ioctls can operate only on vcpus that are not running.
 	 */
 	switch (cmd) {
 	case VM_RUN:
 	case VM_GET_REGISTER:
 	case VM_SET_REGISTER:
 	case VM_GET_SEGMENT_DESCRIPTOR:
 	case VM_SET_SEGMENT_DESCRIPTOR:
 	case VM_GET_REGISTER_SET:
 	case VM_SET_REGISTER_SET:
 	case VM_INJECT_EXCEPTION:
 	case VM_GET_CAPABILITY:
 	case VM_SET_CAPABILITY:
 	case VM_PPTDEV_MSI:
 	case VM_PPTDEV_MSIX:
 	case VM_SET_X2APIC_STATE:
 	case VM_GLA2GPA:
 	case VM_GLA2GPA_NOFAULT:
 	case VM_ACTIVATE_CPU:
 	case VM_SET_INTINFO:
 	case VM_GET_INTINFO:
 	case VM_RESTART_INSTRUCTION:
 		/*
 		 * XXX fragile, handle with care
 		 * Assumes that the first field of the ioctl data is the vcpu.
 		 */
 		vcpu = *(int *)data;
 		error = vcpu_lock_one(sc, vcpu);
 		if (error)
 			goto done;
 		state_changed = 1;
 		break;
 
 	case VM_MAP_PPTDEV_MMIO:
 	case VM_BIND_PPTDEV:
 	case VM_UNBIND_PPTDEV:
 	case VM_ALLOC_MEMSEG:
 	case VM_MMAP_MEMSEG:
 	case VM_REINIT:
 		/*
 		 * ioctls that operate on the entire virtual machine must
 		 * prevent all vcpus from running.
 		 */
 		error = vcpu_lock_all(sc);
 		if (error)
 			goto done;
 		state_changed = 2;
 		break;
 
 	case VM_GET_MEMSEG:
 	case VM_MMAP_GETNEXT:
 		/*
 		 * Lock a vcpu to make sure that the memory map cannot be
 		 * modified while it is being inspected.
 		 */
 		vcpu = vm_get_maxcpus(sc->vm) - 1;
 		error = vcpu_lock_one(sc, vcpu);
 		if (error)
 			goto done;
 		state_changed = 1;
 		break;
 
 	default:
 		break;
 	}
 
 	switch(cmd) {
 	case VM_RUN:
 		vmrun = (struct vm_run *)data;
 		error = vm_run(sc->vm, vmrun);
 		break;
 	case VM_SUSPEND:
 		vmsuspend = (struct vm_suspend *)data;
 		error = vm_suspend(sc->vm, vmsuspend->how);
 		break;
 	case VM_REINIT:
 		error = vm_reinit(sc->vm);
 		break;
 	case VM_STAT_DESC: {
 		statdesc = (struct vm_stat_desc *)data;
 		error = vmm_stat_desc_copy(statdesc->index,
 					statdesc->desc, sizeof(statdesc->desc));
 		break;
 	}
 	case VM_STATS: {
 		CTASSERT(MAX_VM_STATS >= MAX_VMM_STAT_ELEMS);
 		vmstats = (struct vm_stats *)data;
 		getmicrotime(&vmstats->tv);
 		error = vmm_stat_copy(sc->vm, vmstats->cpuid,
 				      &vmstats->num_entries, vmstats->statbuf);
 		break;
 	}
 	case VM_PPTDEV_MSI:
 		pptmsi = (struct vm_pptdev_msi *)data;
 		error = ppt_setup_msi(sc->vm, pptmsi->vcpu,
 				      pptmsi->bus, pptmsi->slot, pptmsi->func,
 				      pptmsi->addr, pptmsi->msg,
 				      pptmsi->numvec);
 		break;
 	case VM_PPTDEV_MSIX:
 		pptmsix = (struct vm_pptdev_msix *)data;
 		error = ppt_setup_msix(sc->vm, pptmsix->vcpu,
 				       pptmsix->bus, pptmsix->slot, 
 				       pptmsix->func, pptmsix->idx,
 				       pptmsix->addr, pptmsix->msg,
 				       pptmsix->vector_control);
 		break;
 	case VM_MAP_PPTDEV_MMIO:
 		pptmmio = (struct vm_pptdev_mmio *)data;
 		error = ppt_map_mmio(sc->vm, pptmmio->bus, pptmmio->slot,
 				     pptmmio->func, pptmmio->gpa, pptmmio->len,
 				     pptmmio->hpa);
 		break;
 	case VM_BIND_PPTDEV:
 		pptdev = (struct vm_pptdev *)data;
 		error = vm_assign_pptdev(sc->vm, pptdev->bus, pptdev->slot,
 					 pptdev->func);
 		break;
 	case VM_UNBIND_PPTDEV:
 		pptdev = (struct vm_pptdev *)data;
 		error = vm_unassign_pptdev(sc->vm, pptdev->bus, pptdev->slot,
 					   pptdev->func);
 		break;
 	case VM_INJECT_EXCEPTION:
 		vmexc = (struct vm_exception *)data;
 		error = vm_inject_exception(sc->vm, vmexc->cpuid,
 		    vmexc->vector, vmexc->error_code_valid, vmexc->error_code,
 		    vmexc->restart_instruction);
 		break;
 	case VM_INJECT_NMI:
 		vmnmi = (struct vm_nmi *)data;
 		error = vm_inject_nmi(sc->vm, vmnmi->cpuid);
 		break;
 	case VM_LAPIC_IRQ:
 		vmirq = (struct vm_lapic_irq *)data;
 		error = lapic_intr_edge(sc->vm, vmirq->cpuid, vmirq->vector);
 		break;
 	case VM_LAPIC_LOCAL_IRQ:
 		vmirq = (struct vm_lapic_irq *)data;
 		error = lapic_set_local_intr(sc->vm, vmirq->cpuid,
 		    vmirq->vector);
 		break;
 	case VM_LAPIC_MSI:
 		vmmsi = (struct vm_lapic_msi *)data;
 		error = lapic_intr_msi(sc->vm, vmmsi->addr, vmmsi->msg);
 		break;
 	case VM_IOAPIC_ASSERT_IRQ:
 		ioapic_irq = (struct vm_ioapic_irq *)data;
 		error = vioapic_assert_irq(sc->vm, ioapic_irq->irq);
 		break;
 	case VM_IOAPIC_DEASSERT_IRQ:
 		ioapic_irq = (struct vm_ioapic_irq *)data;
 		error = vioapic_deassert_irq(sc->vm, ioapic_irq->irq);
 		break;
 	case VM_IOAPIC_PULSE_IRQ:
 		ioapic_irq = (struct vm_ioapic_irq *)data;
 		error = vioapic_pulse_irq(sc->vm, ioapic_irq->irq);
 		break;
 	case VM_IOAPIC_PINCOUNT:
 		*(int *)data = vioapic_pincount(sc->vm);
 		break;
+	case VM_SET_KERNEMU_DEV:
+	case VM_GET_KERNEMU_DEV: {
+		mem_region_write_t mwrite;
+		mem_region_read_t mread;
+		bool arg;
+
+		kernemu = (void *)data;
+
+		if (kernemu->access_width > 0)
+			size = (1u << kernemu->access_width);
+		else
+			size = 1;
+
+		if (kernemu->gpa >= DEFAULT_APIC_BASE && kernemu->gpa < DEFAULT_APIC_BASE + PAGE_SIZE) {
+			mread = lapic_mmio_read;
+			mwrite = lapic_mmio_write;
+		} else if (kernemu->gpa >= VIOAPIC_BASE && kernemu->gpa < VIOAPIC_BASE + VIOAPIC_SIZE) {
+			mread = vioapic_mmio_read;
+			mwrite = vioapic_mmio_write;
+		} else if (kernemu->gpa >= VHPET_BASE && kernemu->gpa < VHPET_BASE + VHPET_SIZE) {
+			mread = vhpet_mmio_read;
+			mwrite = vhpet_mmio_write;
+		} else {
+			error = EINVAL;
+			break;
+		}
+
+		if (cmd == VM_SET_KERNEMU_DEV)
+			error = mwrite(sc->vm, kernemu->vcpuid, kernemu->gpa,
+			    kernemu->value, size, &arg);
+		else
+			error = mread(sc->vm, kernemu->vcpuid, kernemu->gpa,
+			    &kernemu->value, size, &arg);
+		break;
+		}
 	case VM_ISA_ASSERT_IRQ:
 		isa_irq = (struct vm_isa_irq *)data;
 		error = vatpic_assert_irq(sc->vm, isa_irq->atpic_irq);
 		if (error == 0 && isa_irq->ioapic_irq != -1)
 			error = vioapic_assert_irq(sc->vm,
 			    isa_irq->ioapic_irq);
 		break;
 	case VM_ISA_DEASSERT_IRQ:
 		isa_irq = (struct vm_isa_irq *)data;
 		error = vatpic_deassert_irq(sc->vm, isa_irq->atpic_irq);
 		if (error == 0 && isa_irq->ioapic_irq != -1)
 			error = vioapic_deassert_irq(sc->vm,
 			    isa_irq->ioapic_irq);
 		break;
 	case VM_ISA_PULSE_IRQ:
 		isa_irq = (struct vm_isa_irq *)data;
 		error = vatpic_pulse_irq(sc->vm, isa_irq->atpic_irq);
 		if (error == 0 && isa_irq->ioapic_irq != -1)
 			error = vioapic_pulse_irq(sc->vm, isa_irq->ioapic_irq);
 		break;
 	case VM_ISA_SET_IRQ_TRIGGER:
 		isa_irq_trigger = (struct vm_isa_irq_trigger *)data;
 		error = vatpic_set_irq_trigger(sc->vm,
 		    isa_irq_trigger->atpic_irq, isa_irq_trigger->trigger);
 		break;
 	case VM_MMAP_GETNEXT:
 		mm = (struct vm_memmap *)data;
 		error = vm_mmap_getnext(sc->vm, &mm->gpa, &mm->segid,
 		    &mm->segoff, &mm->len, &mm->prot, &mm->flags);
 		break;
 	case VM_MMAP_MEMSEG:
 		mm = (struct vm_memmap *)data;
 		error = vm_mmap_memseg(sc->vm, mm->gpa, mm->segid, mm->segoff,
 		    mm->len, mm->prot, mm->flags);
 		break;
 	case VM_ALLOC_MEMSEG:
 		error = alloc_memseg(sc, (struct vm_memseg *)data);
 		break;
 	case VM_GET_MEMSEG:
 		error = get_memseg(sc, (struct vm_memseg *)data);
 		break;
 	case VM_GET_REGISTER:
 		vmreg = (struct vm_register *)data;
 		error = vm_get_register(sc->vm, vmreg->cpuid, vmreg->regnum,
 					&vmreg->regval);
 		break;
 	case VM_SET_REGISTER:
 		vmreg = (struct vm_register *)data;
 		error = vm_set_register(sc->vm, vmreg->cpuid, vmreg->regnum,
 					vmreg->regval);
 		break;
 	case VM_SET_SEGMENT_DESCRIPTOR:
 		vmsegdesc = (struct vm_seg_desc *)data;
 		error = vm_set_seg_desc(sc->vm, vmsegdesc->cpuid,
 					vmsegdesc->regnum,
 					&vmsegdesc->desc);
 		break;
 	case VM_GET_SEGMENT_DESCRIPTOR:
 		vmsegdesc = (struct vm_seg_desc *)data;
 		error = vm_get_seg_desc(sc->vm, vmsegdesc->cpuid,
 					vmsegdesc->regnum,
 					&vmsegdesc->desc);
 		break;
 	case VM_GET_REGISTER_SET:
 		vmregset = (struct vm_register_set *)data;
 		if (vmregset->count > VM_REG_LAST) {
 			error = EINVAL;
 			break;
 		}
 		regvals = malloc(sizeof(regvals[0]) * vmregset->count, M_VMMDEV,
 		    M_WAITOK);
 		regnums = malloc(sizeof(regnums[0]) * vmregset->count, M_VMMDEV,
 		    M_WAITOK);
 		error = copyin(vmregset->regnums, regnums, sizeof(regnums[0]) *
 		    vmregset->count);
 		if (error == 0)
 			error = vm_get_register_set(sc->vm, vmregset->cpuid,
 			    vmregset->count, regnums, regvals);
 		if (error == 0)
 			error = copyout(regvals, vmregset->regvals,
 			    sizeof(regvals[0]) * vmregset->count);
 		free(regvals, M_VMMDEV);
 		free(regnums, M_VMMDEV);
 		break;
 	case VM_SET_REGISTER_SET:
 		vmregset = (struct vm_register_set *)data;
 		if (vmregset->count > VM_REG_LAST) {
 			error = EINVAL;
 			break;
 		}
 		regvals = malloc(sizeof(regvals[0]) * vmregset->count, M_VMMDEV,
 		    M_WAITOK);
 		regnums = malloc(sizeof(regnums[0]) * vmregset->count, M_VMMDEV,
 		    M_WAITOK);
 		error = copyin(vmregset->regnums, regnums, sizeof(regnums[0]) *
 		    vmregset->count);
 		if (error == 0)
 			error = copyin(vmregset->regvals, regvals,
 			    sizeof(regvals[0]) * vmregset->count);
 		if (error == 0)
 			error = vm_set_register_set(sc->vm, vmregset->cpuid,
 			    vmregset->count, regnums, regvals);
 		free(regvals, M_VMMDEV);
 		free(regnums, M_VMMDEV);
 		break;
 	case VM_GET_CAPABILITY:
 		vmcap = (struct vm_capability *)data;
 		error = vm_get_capability(sc->vm, vmcap->cpuid,
 					  vmcap->captype,
 					  &vmcap->capval);
 		break;
 	case VM_SET_CAPABILITY:
 		vmcap = (struct vm_capability *)data;
 		error = vm_set_capability(sc->vm, vmcap->cpuid,
 					  vmcap->captype,
 					  vmcap->capval);
 		break;
 	case VM_SET_X2APIC_STATE:
 		x2apic = (struct vm_x2apic *)data;
 		error = vm_set_x2apic_state(sc->vm,
 					    x2apic->cpuid, x2apic->state);
 		break;
 	case VM_GET_X2APIC_STATE:
 		x2apic = (struct vm_x2apic *)data;
 		error = vm_get_x2apic_state(sc->vm,
 					    x2apic->cpuid, &x2apic->state);
 		break;
 	case VM_GET_GPA_PMAP:
 		gpapte = (struct vm_gpa_pte *)data;
 		pmap_get_mapping(vmspace_pmap(vm_get_vmspace(sc->vm)),
 				 gpapte->gpa, gpapte->pte, &gpapte->ptenum);
 		error = 0;
 		break;
 	case VM_GET_HPET_CAPABILITIES:
 		error = vhpet_getcap((struct vm_hpet_cap *)data);
 		break;
 	case VM_GLA2GPA: {
 		CTASSERT(PROT_READ == VM_PROT_READ);
 		CTASSERT(PROT_WRITE == VM_PROT_WRITE);
 		CTASSERT(PROT_EXEC == VM_PROT_EXECUTE);
 		gg = (struct vm_gla2gpa *)data;
 		error = vm_gla2gpa(sc->vm, gg->vcpuid, &gg->paging, gg->gla,
 		    gg->prot, &gg->gpa, &gg->fault);
 		KASSERT(error == 0 || error == EFAULT,
 		    ("%s: vm_gla2gpa unknown error %d", __func__, error));
 		break;
 	}
 	case VM_GLA2GPA_NOFAULT:
 		gg = (struct vm_gla2gpa *)data;
 		error = vm_gla2gpa_nofault(sc->vm, gg->vcpuid, &gg->paging,
 		    gg->gla, gg->prot, &gg->gpa, &gg->fault);
 		KASSERT(error == 0 || error == EFAULT,
 		    ("%s: vm_gla2gpa unknown error %d", __func__, error));
 		break;
 	case VM_ACTIVATE_CPU:
 		vac = (struct vm_activate_cpu *)data;
 		error = vm_activate_cpu(sc->vm, vac->vcpuid);
 		break;
 	case VM_GET_CPUS:
 		error = 0;
 		vm_cpuset = (struct vm_cpuset *)data;
 		size = vm_cpuset->cpusetsize;
 		if (size < sizeof(cpuset_t) || size > CPU_MAXSIZE / NBBY) {
 			error = ERANGE;
 			break;
 		}
 		cpuset = malloc(size, M_TEMP, M_WAITOK | M_ZERO);
 		if (vm_cpuset->which == VM_ACTIVE_CPUS)
 			*cpuset = vm_active_cpus(sc->vm);
 		else if (vm_cpuset->which == VM_SUSPENDED_CPUS)
 			*cpuset = vm_suspended_cpus(sc->vm);
 		else if (vm_cpuset->which == VM_DEBUG_CPUS)
 			*cpuset = vm_debug_cpus(sc->vm);
 		else
 			error = EINVAL;
 		if (error == 0)
 			error = copyout(cpuset, vm_cpuset->cpus, size);
 		free(cpuset, M_TEMP);
 		break;
 	case VM_SUSPEND_CPU:
 		vac = (struct vm_activate_cpu *)data;
 		error = vm_suspend_cpu(sc->vm, vac->vcpuid);
 		break;
 	case VM_RESUME_CPU:
 		vac = (struct vm_activate_cpu *)data;
 		error = vm_resume_cpu(sc->vm, vac->vcpuid);
 		break;
 	case VM_SET_INTINFO:
 		vmii = (struct vm_intinfo *)data;
 		error = vm_exit_intinfo(sc->vm, vmii->vcpuid, vmii->info1);
 		break;
 	case VM_GET_INTINFO:
 		vmii = (struct vm_intinfo *)data;
 		error = vm_get_intinfo(sc->vm, vmii->vcpuid, &vmii->info1,
 		    &vmii->info2);
 		break;
 	case VM_RTC_WRITE:
 		rtcdata = (struct vm_rtc_data *)data;
 		error = vrtc_nvram_write(sc->vm, rtcdata->offset,
 		    rtcdata->value);
 		break;
 	case VM_RTC_READ:
 		rtcdata = (struct vm_rtc_data *)data;
 		error = vrtc_nvram_read(sc->vm, rtcdata->offset,
 		    &rtcdata->value);
 		break;
 	case VM_RTC_SETTIME:
 		rtctime = (struct vm_rtc_time *)data;
 		error = vrtc_set_time(sc->vm, rtctime->secs);
 		break;
 	case VM_RTC_GETTIME:
 		error = 0;
 		rtctime = (struct vm_rtc_time *)data;
 		rtctime->secs = vrtc_get_time(sc->vm);
 		break;
 	case VM_RESTART_INSTRUCTION:
 		error = vm_restart_instruction(sc->vm, vcpu);
 		break;
 	case VM_SET_TOPOLOGY:
 		topology = (struct vm_cpu_topology *)data;
 		error = vm_set_topology(sc->vm, topology->sockets,
 		    topology->cores, topology->threads, topology->maxcpus);
 		break;
 	case VM_GET_TOPOLOGY:
 		topology = (struct vm_cpu_topology *)data;
 		vm_get_topology(sc->vm, &topology->sockets, &topology->cores,
 		    &topology->threads, &topology->maxcpus);
 		error = 0;
 		break;
 #ifdef BHYVE_SNAPSHOT
 	case VM_SNAPSHOT_REQ:
 		snapshot_meta = (struct vm_snapshot_meta *)data;
 		error = vm_snapshot_req(sc->vm, snapshot_meta);
 		break;
 	case VM_RESTORE_TIME:
 		error = vm_restore_time(sc->vm);
 		break;
 #endif
 	default:
 		error = ENOTTY;
 		break;
 	}
 
 	if (state_changed == 1)
 		vcpu_unlock_one(sc, vcpu);
 	else if (state_changed == 2)
 		vcpu_unlock_all(sc);
 
 done:
 	/*
 	 * Make sure that no handler returns a kernel-internal
 	 * error value to userspace.
 	 */
 	KASSERT(error == ERESTART || error >= 0,
 	    ("vmmdev_ioctl: invalid error return %d", error));
 	return (error);
 }
 
 static int
 vmmdev_mmap_single(struct cdev *cdev, vm_ooffset_t *offset, vm_size_t mapsize,
     struct vm_object **objp, int nprot)
 {
 	struct vmmdev_softc *sc;
 	vm_paddr_t gpa;
 	size_t len;
 	vm_ooffset_t segoff, first, last;
 	int error, found, segid;
 	uint16_t lastcpu;
 	bool sysmem;
 
 	error = vmm_priv_check(curthread->td_ucred);
 	if (error)
 		return (error);
 
 	first = *offset;
 	last = first + mapsize;
 	if ((nprot & PROT_EXEC) || first < 0 || first >= last)
 		return (EINVAL);
 
 	sc = vmmdev_lookup2(cdev);
 	if (sc == NULL) {
 		/* virtual machine is in the process of being created */
 		return (EINVAL);
 	}
 
 	/*
 	 * Get a read lock on the guest memory map by freezing any vcpu.
 	 */
 	lastcpu = vm_get_maxcpus(sc->vm) - 1;
 	error = vcpu_lock_one(sc, lastcpu);
 	if (error)
 		return (error);
 
 	gpa = 0;
 	found = 0;
 	while (!found) {
 		error = vm_mmap_getnext(sc->vm, &gpa, &segid, &segoff, &len,
 		    NULL, NULL);
 		if (error)
 			break;
 
 		if (first >= gpa && last <= gpa + len)
 			found = 1;
 		else
 			gpa += len;
 	}
 
 	if (found) {
 		error = vm_get_memseg(sc->vm, segid, &len, &sysmem, objp);
 		KASSERT(error == 0 && *objp != NULL,
 		    ("%s: invalid memory segment %d", __func__, segid));
 		if (sysmem) {
 			vm_object_reference(*objp);
 			*offset = segoff + (first - gpa);
 		} else {
 			error = EINVAL;
 		}
 	}
 	vcpu_unlock_one(sc, lastcpu);
 	return (error);
 }
 
 static void
 vmmdev_destroy(void *arg)
 {
 	struct vmmdev_softc *sc = arg;
 	struct devmem_softc *dsc;
 	int error;
 
 	error = vcpu_lock_all(sc);
 	KASSERT(error == 0, ("%s: error %d freezing vcpus", __func__, error));
 
 	while ((dsc = SLIST_FIRST(&sc->devmem)) != NULL) {
 		KASSERT(dsc->cdev == NULL, ("%s: devmem not free", __func__));
 		SLIST_REMOVE_HEAD(&sc->devmem, link);
 		free(dsc->name, M_VMMDEV);
 		free(dsc, M_VMMDEV);
 	}
 
 	if (sc->cdev != NULL)
 		destroy_dev(sc->cdev);
 
 	if (sc->vm != NULL)
 		vm_destroy(sc->vm);
 
 	if ((sc->flags & VSC_LINKED) != 0) {
 		mtx_lock(&vmmdev_mtx);
 		SLIST_REMOVE(&head, sc, vmmdev_softc, link);
 		mtx_unlock(&vmmdev_mtx);
 	}
 
 	free(sc, M_VMMDEV);
 }
 
 static int
 sysctl_vmm_destroy(SYSCTL_HANDLER_ARGS)
 {
 	struct devmem_softc *dsc;
 	struct vmmdev_softc *sc;
 	struct cdev *cdev;
 	char *buf;
 	int error, buflen;
 
 	error = vmm_priv_check(req->td->td_ucred);
 	if (error)
 		return (error);
 
 	buflen = VM_MAX_NAMELEN + 1;
 	buf = malloc(buflen, M_VMMDEV, M_WAITOK | M_ZERO);
 	strlcpy(buf, "beavis", buflen);
 	error = sysctl_handle_string(oidp, buf, buflen, req);
 	if (error != 0 || req->newptr == NULL)
 		goto out;
 
 	mtx_lock(&vmmdev_mtx);
 	sc = vmmdev_lookup(buf);
 	if (sc == NULL || sc->cdev == NULL) {
 		mtx_unlock(&vmmdev_mtx);
 		error = EINVAL;
 		goto out;
 	}
 
 	/*
 	 * The 'cdev' will be destroyed asynchronously when 'si_threadcount'
 	 * goes down to 0 so we should not do it again in the callback.
 	 *
 	 * Setting 'sc->cdev' to NULL is also used to indicate that the VM
 	 * is scheduled for destruction.
 	 */
 	cdev = sc->cdev;
 	sc->cdev = NULL;		
 	mtx_unlock(&vmmdev_mtx);
 
 	/*
 	 * Schedule all cdevs to be destroyed:
 	 *
 	 * - any new operations on the 'cdev' will return an error (ENXIO).
 	 *
 	 * - when the 'si_threadcount' dwindles down to zero the 'cdev' will
 	 *   be destroyed and the callback will be invoked in a taskqueue
 	 *   context.
 	 *
 	 * - the 'devmem' cdevs are destroyed before the virtual machine 'cdev'
 	 */
 	SLIST_FOREACH(dsc, &sc->devmem, link) {
 		KASSERT(dsc->cdev != NULL, ("devmem cdev already destroyed"));
 		destroy_dev_sched_cb(dsc->cdev, devmem_destroy, dsc);
 	}
 	destroy_dev_sched_cb(cdev, vmmdev_destroy, sc);
 	error = 0;
 
 out:
 	free(buf, M_VMMDEV);
 	return (error);
 }
 SYSCTL_PROC(_hw_vmm, OID_AUTO, destroy,
     CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE,
     NULL, 0, sysctl_vmm_destroy, "A",
     NULL);
 
 static struct cdevsw vmmdevsw = {
 	.d_name		= "vmmdev",
 	.d_version	= D_VERSION,
 	.d_ioctl	= vmmdev_ioctl,
 	.d_mmap_single	= vmmdev_mmap_single,
 	.d_read		= vmmdev_rw,
 	.d_write	= vmmdev_rw,
 };
 
 static int
 sysctl_vmm_create(SYSCTL_HANDLER_ARGS)
 {
 	struct vm *vm;
 	struct cdev *cdev;
 	struct vmmdev_softc *sc, *sc2;
 	char *buf;
 	int error, buflen;
 
 	error = vmm_priv_check(req->td->td_ucred);
 	if (error)
 		return (error);
 
 	buflen = VM_MAX_NAMELEN + 1;
 	buf = malloc(buflen, M_VMMDEV, M_WAITOK | M_ZERO);
 	strlcpy(buf, "beavis", buflen);
 	error = sysctl_handle_string(oidp, buf, buflen, req);
 	if (error != 0 || req->newptr == NULL)
 		goto out;
 
 	mtx_lock(&vmmdev_mtx);
 	sc = vmmdev_lookup(buf);
 	mtx_unlock(&vmmdev_mtx);
 	if (sc != NULL) {
 		error = EEXIST;
 		goto out;
 	}
 
 	error = vm_create(buf, &vm);
 	if (error != 0)
 		goto out;
 
 	sc = malloc(sizeof(struct vmmdev_softc), M_VMMDEV, M_WAITOK | M_ZERO);
 	sc->vm = vm;
 	SLIST_INIT(&sc->devmem);
 
 	/*
 	 * Lookup the name again just in case somebody sneaked in when we
 	 * dropped the lock.
 	 */
 	mtx_lock(&vmmdev_mtx);
 	sc2 = vmmdev_lookup(buf);
 	if (sc2 == NULL) {
 		SLIST_INSERT_HEAD(&head, sc, link);
 		sc->flags |= VSC_LINKED;
 	}
 	mtx_unlock(&vmmdev_mtx);
 
 	if (sc2 != NULL) {
 		vmmdev_destroy(sc);
 		error = EEXIST;
 		goto out;
 	}
 
 	error = make_dev_p(MAKEDEV_CHECKNAME, &cdev, &vmmdevsw, NULL,
 			   UID_ROOT, GID_WHEEL, 0600, "vmm/%s", buf);
 	if (error != 0) {
 		vmmdev_destroy(sc);
 		goto out;
 	}
 
 	mtx_lock(&vmmdev_mtx);
 	sc->cdev = cdev;
 	sc->cdev->si_drv1 = sc;
 	mtx_unlock(&vmmdev_mtx);
 
 out:
 	free(buf, M_VMMDEV);
 	return (error);
 }
 SYSCTL_PROC(_hw_vmm, OID_AUTO, create,
     CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE,
     NULL, 0, sysctl_vmm_create, "A",
     NULL);
 
 void
 vmmdev_init(void)
 {
 	mtx_init(&vmmdev_mtx, "vmm device mutex", NULL, MTX_DEF);
 	pr_allow_flag = prison_add_allow(NULL, "vmm", NULL,
 	    "Allow use of vmm in a jail.");
 }
 
 int
 vmmdev_cleanup(void)
 {
 	int error;
 
 	if (SLIST_EMPTY(&head))
 		error = 0;
 	else
 		error = EBUSY;
 
 	return (error);
 }
 
 static int
 devmem_mmap_single(struct cdev *cdev, vm_ooffset_t *offset, vm_size_t len,
     struct vm_object **objp, int nprot)
 {
 	struct devmem_softc *dsc;
 	vm_ooffset_t first, last;
 	size_t seglen;
 	int error;
 	uint16_t lastcpu;
 	bool sysmem;
 
 	dsc = cdev->si_drv1;
 	if (dsc == NULL) {
 		/* 'cdev' has been created but is not ready for use */
 		return (ENXIO);
 	}
 
 	first = *offset;
 	last = *offset + len;
 	if ((nprot & PROT_EXEC) || first < 0 || first >= last)
 		return (EINVAL);
 
 	lastcpu = vm_get_maxcpus(dsc->sc->vm) - 1;
 	error = vcpu_lock_one(dsc->sc, lastcpu);
 	if (error)
 		return (error);
 
 	error = vm_get_memseg(dsc->sc->vm, dsc->segid, &seglen, &sysmem, objp);
 	KASSERT(error == 0 && !sysmem && *objp != NULL,
 	    ("%s: invalid devmem segment %d", __func__, dsc->segid));
 
 	vcpu_unlock_one(dsc->sc, lastcpu);
 
 	if (seglen >= last) {
 		vm_object_reference(*objp);
 		return (0);
 	} else {
 		return (EINVAL);
 	}
 }
 
 static struct cdevsw devmemsw = {
 	.d_name		= "devmem",
 	.d_version	= D_VERSION,
 	.d_mmap_single	= devmem_mmap_single,
 };
 
 static int
 devmem_create_cdev(const char *vmname, int segid, char *devname)
 {
 	struct devmem_softc *dsc;
 	struct vmmdev_softc *sc;
 	struct cdev *cdev;
 	int error;
 
 	error = make_dev_p(MAKEDEV_CHECKNAME, &cdev, &devmemsw, NULL,
 	    UID_ROOT, GID_WHEEL, 0600, "vmm.io/%s.%s", vmname, devname);
 	if (error)
 		return (error);
 
 	dsc = malloc(sizeof(struct devmem_softc), M_VMMDEV, M_WAITOK | M_ZERO);
 
 	mtx_lock(&vmmdev_mtx);
 	sc = vmmdev_lookup(vmname);
 	KASSERT(sc != NULL, ("%s: vm %s softc not found", __func__, vmname));
 	if (sc->cdev == NULL) {
 		/* virtual machine is being created or destroyed */
 		mtx_unlock(&vmmdev_mtx);
 		free(dsc, M_VMMDEV);
 		destroy_dev_sched_cb(cdev, NULL, 0);
 		return (ENODEV);
 	}
 
 	dsc->segid = segid;
 	dsc->name = devname;
 	dsc->cdev = cdev;
 	dsc->sc = sc;
 	SLIST_INSERT_HEAD(&sc->devmem, dsc, link);
 	mtx_unlock(&vmmdev_mtx);
 
 	/* The 'cdev' is ready for use after 'si_drv1' is initialized */
 	cdev->si_drv1 = dsc;
 	return (0);
 }
 
 static void
 devmem_destroy(void *arg)
 {
 	struct devmem_softc *dsc = arg;
 
 	KASSERT(dsc->cdev, ("%s: devmem cdev already destroyed", __func__));
 	dsc->cdev = NULL;
 	dsc->sc = NULL;
 }
Index: head/usr.sbin/bhyve/Makefile
===================================================================
--- head/usr.sbin/bhyve/Makefile	(revision 361081)
+++ head/usr.sbin/bhyve/Makefile	(revision 361082)
@@ -1,121 +1,124 @@
 #
 # $FreeBSD$
 #
 
 .include <src.opts.mk>
 CFLAGS+=-I${SRCTOP}/sys
 .PATH:  ${SRCTOP}/sys/cam/ctl
 
 PROG=	bhyve
 PACKAGE=	bhyve
 
 MAN=	bhyve.8
 
 BHYVE_SYSDIR?=${SRCTOP}
 
 SRCS=	\
 	atkbdc.c		\
 	acpi.c			\
 	audio.c			\
 	bhyvegc.c		\
 	bhyverun.c		\
 	block_if.c		\
 	bootrom.c		\
 	console.c		\
 	consport.c		\
 	ctl_util.c		\
 	ctl_scsi_all.c		\
 	dbgport.c		\
 	fwctl.c			\
 	gdb.c			\
 	hda_codec.c		\
 	inout.c			\
 	ioapic.c		\
+	kernemu_dev.c		\
 	mem.c			\
 	mevent.c		\
 	mptbl.c			\
 	net_backends.c		\
 	net_utils.c		\
 	pci_ahci.c		\
 	pci_e82545.c		\
 	pci_emul.c		\
 	pci_hda.c		\
 	pci_fbuf.c		\
 	pci_hostbridge.c	\
 	pci_irq.c		\
 	pci_lpc.c		\
 	pci_nvme.c		\
 	pci_passthru.c		\
 	pci_virtio_block.c	\
 	pci_virtio_console.c	\
 	pci_virtio_net.c	\
 	pci_virtio_rnd.c	\
 	pci_virtio_scsi.c	\
 	pci_uart.c		\
 	pci_xhci.c		\
 	pm.c			\
 	post.c			\
 	ps2kbd.c		\
 	ps2mouse.c		\
 	rfb.c			\
 	rtc.c			\
 	smbiostbl.c		\
 	sockstream.c		\
 	task_switch.c		\
 	uart_emul.c		\
 	usb_emul.c		\
 	usb_mouse.c		\
 	virtio.c		\
 	vga.c			\
 	vmgenc.c		\
 	xmsr.c			\
 	spinup_ap.c		\
 	iov.c
 
 .if ${MK_BHYVE_SNAPSHOT} != "no"
 SRCS+=	snapshot.c
 .endif
+
+CFLAGS.kernemu_dev.c+=	-I${SRCTOP}/sys/amd64
 
 .PATH:  ${BHYVE_SYSDIR}/sys/amd64/vmm
 SRCS+=	vmm_instruction_emul.c
 
 LIBADD=	vmmapi md pthread z util sbuf cam
 .if ${MK_BHYVE_SNAPSHOT} != "no"
 LIBADD+= ucl xo
 .endif
 
 .if ${MK_INET_SUPPORT} != "no"
 CFLAGS+=-DINET
 .endif
 .if ${MK_INET6_SUPPORT} != "no"
 CFLAGS+=-DINET6
 .endif
 .if ${MK_NETGRAPH_SUPPORT} != "no"
 CFLAGS+=-DNETGRAPH
 LIBADD+=    netgraph
 .endif
 .if ${MK_OPENSSL} == "no"
 CFLAGS+=-DNO_OPENSSL
 .else
 LIBADD+=	crypto
 .endif
 
 CFLAGS+= -I${BHYVE_SYSDIR}/sys/dev/e1000
 CFLAGS+= -I${BHYVE_SYSDIR}/sys/dev/mii
 CFLAGS+= -I${BHYVE_SYSDIR}/sys/dev/usb/controller
 .if ${MK_BHYVE_SNAPSHOT} != "no"
 CFLAGS+= -I${SRCTOP}/contrib/libucl/include
 
 # Temporary disable capsicum, until we integrate checkpoint code with it.
 CFLAGS+= -DWITHOUT_CAPSICUM
 
 CFLAGS+= -DBHYVE_SNAPSHOT
 .endif
 
 .ifdef GDB_LOG
 CFLAGS+=-DGDB_LOG
 .endif
 
 WARNS?=	2
 
 .include <bsd.prog.mk>
Index: head/usr.sbin/bhyve/bhyverun.c
===================================================================
--- head/usr.sbin/bhyve/bhyverun.c	(revision 361081)
+++ head/usr.sbin/bhyve/bhyverun.c	(revision 361082)
@@ -1,1428 +1,1430 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2011 NetApp, Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/types.h>
 #ifndef WITHOUT_CAPSICUM
 #include <sys/capsicum.h>
 #endif
 #include <sys/mman.h>
 #ifdef BHYVE_SNAPSHOT
 #include <sys/socket.h>
 #include <sys/stat.h>
 #endif
 #include <sys/time.h>
 #ifdef BHYVE_SNAPSHOT
 #include <sys/un.h>
 #endif
 
 #include <amd64/vmm/intel/vmcs.h>
 
 #include <machine/atomic.h>
 #include <machine/segments.h>
 
 #ifndef WITHOUT_CAPSICUM
 #include <capsicum_helpers.h>
 #endif
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <err.h>
 #include <errno.h>
 #ifdef BHYVE_SNAPSHOT
 #include <fcntl.h>
 #endif
 #include <libgen.h>
 #include <unistd.h>
 #include <assert.h>
 #include <pthread.h>
 #include <pthread_np.h>
 #include <sysexits.h>
 #include <stdbool.h>
 #include <stdint.h>
 #ifdef BHYVE_SNAPSHOT
 #include <ucl.h>
 #include <unistd.h>
 
 #include <libxo/xo.h>
 #endif
 
 #include <machine/vmm.h>
 #ifndef WITHOUT_CAPSICUM
 #include <machine/vmm_dev.h>
 #endif
 #include <vmmapi.h>
 
 #include "bhyverun.h"
 #include "acpi.h"
 #include "atkbdc.h"
 #include "bootrom.h"
 #include "inout.h"
 #include "dbgport.h"
 #include "debug.h"
 #include "fwctl.h"
 #include "gdb.h"
 #include "ioapic.h"
+#include "kernemu_dev.h"
 #include "mem.h"
 #include "mevent.h"
 #include "mptbl.h"
 #include "pci_emul.h"
 #include "pci_irq.h"
 #include "pci_lpc.h"
 #include "smbiostbl.h"
 #ifdef BHYVE_SNAPSHOT
 #include "snapshot.h"
 #endif
 #include "xmsr.h"
 #include "spinup_ap.h"
 #include "rtc.h"
 #include "vmgenc.h"
 
 #define GUEST_NIO_PORT		0x488	/* guest upcalls via i/o port */
 
 #define MB		(1024UL * 1024)
 #define GB		(1024UL * MB)
 
 static const char * const vmx_exit_reason_desc[] = {
 	[EXIT_REASON_EXCEPTION] = "Exception or non-maskable interrupt (NMI)",
 	[EXIT_REASON_EXT_INTR] = "External interrupt",
 	[EXIT_REASON_TRIPLE_FAULT] = "Triple fault",
 	[EXIT_REASON_INIT] = "INIT signal",
 	[EXIT_REASON_SIPI] = "Start-up IPI (SIPI)",
 	[EXIT_REASON_IO_SMI] = "I/O system-management interrupt (SMI)",
 	[EXIT_REASON_SMI] = "Other SMI",
 	[EXIT_REASON_INTR_WINDOW] = "Interrupt window",
 	[EXIT_REASON_NMI_WINDOW] = "NMI window",
 	[EXIT_REASON_TASK_SWITCH] = "Task switch",
 	[EXIT_REASON_CPUID] = "CPUID",
 	[EXIT_REASON_GETSEC] = "GETSEC",
 	[EXIT_REASON_HLT] = "HLT",
 	[EXIT_REASON_INVD] = "INVD",
 	[EXIT_REASON_INVLPG] = "INVLPG",
 	[EXIT_REASON_RDPMC] = "RDPMC",
 	[EXIT_REASON_RDTSC] = "RDTSC",
 	[EXIT_REASON_RSM] = "RSM",
 	[EXIT_REASON_VMCALL] = "VMCALL",
 	[EXIT_REASON_VMCLEAR] = "VMCLEAR",
 	[EXIT_REASON_VMLAUNCH] = "VMLAUNCH",
 	[EXIT_REASON_VMPTRLD] = "VMPTRLD",
 	[EXIT_REASON_VMPTRST] = "VMPTRST",
 	[EXIT_REASON_VMREAD] = "VMREAD",
 	[EXIT_REASON_VMRESUME] = "VMRESUME",
 	[EXIT_REASON_VMWRITE] = "VMWRITE",
 	[EXIT_REASON_VMXOFF] = "VMXOFF",
 	[EXIT_REASON_VMXON] = "VMXON",
 	[EXIT_REASON_CR_ACCESS] = "Control-register accesses",
 	[EXIT_REASON_DR_ACCESS] = "MOV DR",
 	[EXIT_REASON_INOUT] = "I/O instruction",
 	[EXIT_REASON_RDMSR] = "RDMSR",
 	[EXIT_REASON_WRMSR] = "WRMSR",
 	[EXIT_REASON_INVAL_VMCS] =
 	    "VM-entry failure due to invalid guest state",
 	[EXIT_REASON_INVAL_MSR] = "VM-entry failure due to MSR loading",
 	[EXIT_REASON_MWAIT] = "MWAIT",
 	[EXIT_REASON_MTF] = "Monitor trap flag",
 	[EXIT_REASON_MONITOR] = "MONITOR",
 	[EXIT_REASON_PAUSE] = "PAUSE",
 	[EXIT_REASON_MCE_DURING_ENTRY] =
 	    "VM-entry failure due to machine-check event",
 	[EXIT_REASON_TPR] = "TPR below threshold",
 	[EXIT_REASON_APIC_ACCESS] = "APIC access",
 	[EXIT_REASON_VIRTUALIZED_EOI] = "Virtualized EOI",
 	[EXIT_REASON_GDTR_IDTR] = "Access to GDTR or IDTR",
 	[EXIT_REASON_LDTR_TR] = "Access to LDTR or TR",
 	[EXIT_REASON_EPT_FAULT] = "EPT violation",
 	[EXIT_REASON_EPT_MISCONFIG] = "EPT misconfiguration",
 	[EXIT_REASON_INVEPT] = "INVEPT",
 	[EXIT_REASON_RDTSCP] = "RDTSCP",
 	[EXIT_REASON_VMX_PREEMPT] = "VMX-preemption timer expired",
 	[EXIT_REASON_INVVPID] = "INVVPID",
 	[EXIT_REASON_WBINVD] = "WBINVD",
 	[EXIT_REASON_XSETBV] = "XSETBV",
 	[EXIT_REASON_APIC_WRITE] = "APIC write",
 	[EXIT_REASON_RDRAND] = "RDRAND",
 	[EXIT_REASON_INVPCID] = "INVPCID",
 	[EXIT_REASON_VMFUNC] = "VMFUNC",
 	[EXIT_REASON_ENCLS] = "ENCLS",
 	[EXIT_REASON_RDSEED] = "RDSEED",
 	[EXIT_REASON_PM_LOG_FULL] = "Page-modification log full",
 	[EXIT_REASON_XSAVES] = "XSAVES",
 	[EXIT_REASON_XRSTORS] = "XRSTORS"
 };
 
 typedef int (*vmexit_handler_t)(struct vmctx *, struct vm_exit *, int *vcpu);
 extern int vmexit_task_switch(struct vmctx *, struct vm_exit *, int *vcpu);
 
 const char *vmname;
 
 int guest_ncpus;
 uint16_t cores, maxcpus, sockets, threads;
 
 char *guest_uuid_str;
 
 int raw_stdio = 0;
 
 static int gdb_port = 0;
 static int guest_vmexit_on_hlt, guest_vmexit_on_pause;
 static int virtio_msix = 1;
 static int x2apic_mode = 0;	/* default is xAPIC */
 
 static int strictio;
 static int strictmsr = 1;
 
 static int acpi;
 
 static char *progname;
 static const int BSP = 0;
 
 static cpuset_t cpumask;
 
 static void vm_loop(struct vmctx *ctx, int vcpu, uint64_t rip);
 
 static struct vm_exit vmexit[VM_MAXCPU];
 
 struct bhyvestats {
 	uint64_t	vmexit_bogus;
 	uint64_t	vmexit_reqidle;
 	uint64_t	vmexit_hlt;
 	uint64_t	vmexit_pause;
 	uint64_t	vmexit_mtrap;
 	uint64_t	vmexit_inst_emul;
 	uint64_t	cpu_switch_rotate;
 	uint64_t	cpu_switch_direct;
 } stats;
 
 struct mt_vmm_info {
 	pthread_t	mt_thr;
 	struct vmctx	*mt_ctx;
 	int		mt_vcpu;	
 } mt_vmm_info[VM_MAXCPU];
 
 static cpuset_t *vcpumap[VM_MAXCPU] = { NULL };
 
 static void
 usage(int code)
 {
 
         fprintf(stderr,
 		"Usage: %s [-abehuwxACHPSWY]\n"
 		"       %*s [-c [[cpus=]numcpus][,sockets=n][,cores=n][,threads=n]]\n"
 		"       %*s [-g <gdb port>] [-l <lpc>]\n"
 		"       %*s [-m mem] [-p vcpu:hostcpu] [-s <pci>] [-U uuid] <vm>\n"
 		"       -a: local apic is in xAPIC mode (deprecated)\n"
 		"       -A: create ACPI tables\n"
 		"       -c: number of cpus and/or topology specification\n"
 		"       -C: include guest memory in core file\n"
 		"       -e: exit on unhandled I/O access\n"
 		"       -g: gdb port\n"
 		"       -h: help\n"
 		"       -H: vmexit from the guest on hlt\n"
 		"       -l: LPC device configuration\n"
 		"       -m: memory size in MB\n"
 #ifdef BHYVE_SNAPSHOT
 		"       -r: path to checkpoint file\n"
 #endif
 		"       -p: pin 'vcpu' to 'hostcpu'\n"
 		"       -P: vmexit from the guest on pause\n"
 		"       -s: <slot,driver,configinfo> PCI slot config\n"
 		"       -S: guest memory cannot be swapped\n"
 		"       -u: RTC keeps UTC time\n"
 		"       -U: uuid\n"
 		"       -w: ignore unimplemented MSRs\n"
 		"       -W: force virtio to use single-vector MSI\n"
 		"       -x: local apic is in x2APIC mode\n"
 		"       -Y: disable MPtable generation\n",
 		progname, (int)strlen(progname), "", (int)strlen(progname), "",
 		(int)strlen(progname), "");
 
 	exit(code);
 }
 
 /*
  * XXX This parser is known to have the following issues:
  * 1.  It accepts null key=value tokens ",,".
  * 2.  It accepts whitespace after = and before value.
  * 3.  Values out of range of INT are silently wrapped.
  * 4.  It doesn't check non-final values.
  * 5.  The apparently bogus limits of UINT16_MAX are for future expansion.
  *
  * The acceptance of a null specification ('-c ""') is by design to match the
  * manual page syntax specification, this results in a topology of 1 vCPU.
  */
 static int
 topology_parse(const char *opt)
 {
 	uint64_t ncpus;
 	int c, chk, n, s, t, tmp;
 	char *cp, *str;
 	bool ns, scts;
 
 	c = 1, n = 1, s = 1, t = 1;
 	ns = false, scts = false;
 	str = strdup(opt);
 	if (str == NULL)
 		goto out;
 
 	while ((cp = strsep(&str, ",")) != NULL) {
 		if (sscanf(cp, "%i%n", &tmp, &chk) == 1) {
 			n = tmp;
 			ns = true;
 		} else if (sscanf(cp, "cpus=%i%n", &tmp, &chk) == 1) {
 			n = tmp;
 			ns = true;
 		} else if (sscanf(cp, "sockets=%i%n", &tmp, &chk) == 1) {
 			s = tmp;
 			scts = true;
 		} else if (sscanf(cp, "cores=%i%n", &tmp, &chk) == 1) {
 			c = tmp;
 			scts = true;
 		} else if (sscanf(cp, "threads=%i%n", &tmp, &chk) == 1) {
 			t = tmp;
 			scts = true;
 #ifdef notyet  /* Do not expose this until vmm.ko implements it */
 		} else if (sscanf(cp, "maxcpus=%i%n", &tmp, &chk) == 1) {
 			m = tmp;
 #endif
 		/* Skip the empty argument case from -c "" */
 		} else if (cp[0] == '\0')
 			continue;
 		else
 			goto out;
 		/* Any trailing garbage causes an error */
 		if (cp[chk] != '\0')
 			goto out;
 	}
 	free(str);
 	str = NULL;
 
 	/*
 	 * Range check 1 <= n <= UINT16_MAX all values
 	 */
 	if (n < 1 || s < 1 || c < 1 || t < 1 ||
 	    n > UINT16_MAX || s > UINT16_MAX || c > UINT16_MAX  ||
 	    t > UINT16_MAX)
 		return (-1);
 
 	/* If only the cpus was specified, use that as sockets */
 	if (!scts)
 		s = n;
 	/*
 	 * Compute sockets * cores * threads avoiding overflow
 	 * The range check above insures these are 16 bit values
 	 * If n was specified check it against computed ncpus
 	 */
 	ncpus = (uint64_t)s * c * t;
 	if (ncpus > UINT16_MAX || (ns && n != ncpus))
 		return (-1);
 
 	guest_ncpus = ncpus;
 	sockets = s;
 	cores = c;
 	threads = t;
 	return(0);
 
 out:
 	free(str);
 	return (-1);
 }
 
 static int
 pincpu_parse(const char *opt)
 {
 	int vcpu, pcpu;
 
 	if (sscanf(opt, "%d:%d", &vcpu, &pcpu) != 2) {
 		fprintf(stderr, "invalid format: %s\n", opt);
 		return (-1);
 	}
 
 	if (vcpu < 0 || vcpu >= VM_MAXCPU) {
 		fprintf(stderr, "vcpu '%d' outside valid range from 0 to %d\n",
 		    vcpu, VM_MAXCPU - 1);
 		return (-1);
 	}
 
 	if (pcpu < 0 || pcpu >= CPU_SETSIZE) {
 		fprintf(stderr, "hostcpu '%d' outside valid range from "
 		    "0 to %d\n", pcpu, CPU_SETSIZE - 1);
 		return (-1);
 	}
 
 	if (vcpumap[vcpu] == NULL) {
 		if ((vcpumap[vcpu] = malloc(sizeof(cpuset_t))) == NULL) {
 			perror("malloc");
 			return (-1);
 		}
 		CPU_ZERO(vcpumap[vcpu]);
 	}
 	CPU_SET(pcpu, vcpumap[vcpu]);
 	return (0);
 }
 
 void
 vm_inject_fault(void *arg, int vcpu, int vector, int errcode_valid,
     int errcode)
 {
 	struct vmctx *ctx;
 	int error, restart_instruction;
 
 	ctx = arg;
 	restart_instruction = 1;
 
 	error = vm_inject_exception(ctx, vcpu, vector, errcode_valid, errcode,
 	    restart_instruction);
 	assert(error == 0);
 }
 
 void *
 paddr_guest2host(struct vmctx *ctx, uintptr_t gaddr, size_t len)
 {
 
 	return (vm_map_gpa(ctx, gaddr, len));
 }
 
 #ifdef BHYVE_SNAPSHOT
 uintptr_t
 paddr_host2guest(struct vmctx *ctx, void *addr)
 {
 	return (vm_rev_map_gpa(ctx, addr));
 }
 #endif
 
 int
 fbsdrun_vmexit_on_pause(void)
 {
 
 	return (guest_vmexit_on_pause);
 }
 
 int
 fbsdrun_vmexit_on_hlt(void)
 {
 
 	return (guest_vmexit_on_hlt);
 }
 
 int
 fbsdrun_virtio_msix(void)
 {
 
 	return (virtio_msix);
 }
 
 static void *
 fbsdrun_start_thread(void *param)
 {
 	char tname[MAXCOMLEN + 1];
 	struct mt_vmm_info *mtp;
 	int vcpu;
 
 	mtp = param;
 	vcpu = mtp->mt_vcpu;
 
 	snprintf(tname, sizeof(tname), "vcpu %d", vcpu);
 	pthread_set_name_np(mtp->mt_thr, tname);
 
 #ifdef BHYVE_SNAPSHOT
 	checkpoint_cpu_add(vcpu);
 #endif
 	if (gdb_port != 0)
 		gdb_cpu_add(vcpu);
 
 	vm_loop(mtp->mt_ctx, vcpu, vmexit[vcpu].rip);
 
 	/* not reached */
 	exit(1);
 	return (NULL);
 }
 
 void
 fbsdrun_addcpu(struct vmctx *ctx, int fromcpu, int newcpu, uint64_t rip)
 {
 	int error;
 
 	assert(fromcpu == BSP);
 
 	/*
 	 * The 'newcpu' must be activated in the context of 'fromcpu'. If
 	 * vm_activate_cpu() is delayed until newcpu's pthread starts running
 	 * then vmm.ko is out-of-sync with bhyve and this can create a race
 	 * with vm_suspend().
 	 */
 	error = vm_activate_cpu(ctx, newcpu);
 	if (error != 0)
 		err(EX_OSERR, "could not activate CPU %d", newcpu);
 
 	CPU_SET_ATOMIC(newcpu, &cpumask);
 
 	/*
 	 * Set up the vmexit struct to allow execution to start
 	 * at the given RIP
 	 */
 	vmexit[newcpu].rip = rip;
 	vmexit[newcpu].inst_length = 0;
 
 	mt_vmm_info[newcpu].mt_ctx = ctx;
 	mt_vmm_info[newcpu].mt_vcpu = newcpu;
 
 	error = pthread_create(&mt_vmm_info[newcpu].mt_thr, NULL,
 	    fbsdrun_start_thread, &mt_vmm_info[newcpu]);
 	assert(error == 0);
 }
 
 static int
 fbsdrun_deletecpu(struct vmctx *ctx, int vcpu)
 {
 
 	if (!CPU_ISSET(vcpu, &cpumask)) {
 		fprintf(stderr, "Attempting to delete unknown cpu %d\n", vcpu);
 		exit(4);
 	}
 
 	CPU_CLR_ATOMIC(vcpu, &cpumask);
 	return (CPU_EMPTY(&cpumask));
 }
 
 static int
 vmexit_handle_notify(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu,
 		     uint32_t eax)
 {
 #if BHYVE_DEBUG
 	/*
 	 * put guest-driven debug here
 	 */
 #endif
 	return (VMEXIT_CONTINUE);
 }
 
 static int
 vmexit_inout(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu)
 {
 	int error;
 	int bytes, port, in, out;
 	int vcpu;
 
 	vcpu = *pvcpu;
 
 	port = vme->u.inout.port;
 	bytes = vme->u.inout.bytes;
 	in = vme->u.inout.in;
 	out = !in;
 
         /* Extra-special case of host notifications */
         if (out && port == GUEST_NIO_PORT) {
                 error = vmexit_handle_notify(ctx, vme, pvcpu, vme->u.inout.eax);
 		return (error);
 	}
 
 	error = emulate_inout(ctx, vcpu, vme, strictio);
 	if (error) {
 		fprintf(stderr, "Unhandled %s%c 0x%04x at 0x%lx\n",
 		    in ? "in" : "out",
 		    bytes == 1 ? 'b' : (bytes == 2 ? 'w' : 'l'),
 		    port, vmexit->rip);
 		return (VMEXIT_ABORT);
 	} else {
 		return (VMEXIT_CONTINUE);
 	}
 }
 
 static int
 vmexit_rdmsr(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu)
 {
 	uint64_t val;
 	uint32_t eax, edx;
 	int error;
 
 	val = 0;
 	error = emulate_rdmsr(ctx, *pvcpu, vme->u.msr.code, &val);
 	if (error != 0) {
 		fprintf(stderr, "rdmsr to register %#x on vcpu %d\n",
 		    vme->u.msr.code, *pvcpu);
 		if (strictmsr) {
 			vm_inject_gp(ctx, *pvcpu);
 			return (VMEXIT_CONTINUE);
 		}
 	}
 
 	eax = val;
 	error = vm_set_register(ctx, *pvcpu, VM_REG_GUEST_RAX, eax);
 	assert(error == 0);
 
 	edx = val >> 32;
 	error = vm_set_register(ctx, *pvcpu, VM_REG_GUEST_RDX, edx);
 	assert(error == 0);
 
 	return (VMEXIT_CONTINUE);
 }
 
 static int
 vmexit_wrmsr(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu)
 {
 	int error;
 
 	error = emulate_wrmsr(ctx, *pvcpu, vme->u.msr.code, vme->u.msr.wval);
 	if (error != 0) {
 		fprintf(stderr, "wrmsr to register %#x(%#lx) on vcpu %d\n",
 		    vme->u.msr.code, vme->u.msr.wval, *pvcpu);
 		if (strictmsr) {
 			vm_inject_gp(ctx, *pvcpu);
 			return (VMEXIT_CONTINUE);
 		}
 	}
 	return (VMEXIT_CONTINUE);
 }
 
 static int
 vmexit_spinup_ap(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu)
 {
 
 	(void)spinup_ap(ctx, *pvcpu,
 		    vme->u.spinup_ap.vcpu, vme->u.spinup_ap.rip);
 
 	return (VMEXIT_CONTINUE);
 }
 
 #define	DEBUG_EPT_MISCONFIG
 #ifdef DEBUG_EPT_MISCONFIG
 #define	VMCS_GUEST_PHYSICAL_ADDRESS	0x00002400
 
 static uint64_t ept_misconfig_gpa, ept_misconfig_pte[4];
 static int ept_misconfig_ptenum;
 #endif
 
 static const char *
 vmexit_vmx_desc(uint32_t exit_reason)
 {
 
 	if (exit_reason >= nitems(vmx_exit_reason_desc) ||
 	    vmx_exit_reason_desc[exit_reason] == NULL)
 		return ("Unknown");
 	return (vmx_exit_reason_desc[exit_reason]);
 }
 
 static int
 vmexit_vmx(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
 {
 
 	fprintf(stderr, "vm exit[%d]\n", *pvcpu);
 	fprintf(stderr, "\treason\t\tVMX\n");
 	fprintf(stderr, "\trip\t\t0x%016lx\n", vmexit->rip);
 	fprintf(stderr, "\tinst_length\t%d\n", vmexit->inst_length);
 	fprintf(stderr, "\tstatus\t\t%d\n", vmexit->u.vmx.status);
 	fprintf(stderr, "\texit_reason\t%u (%s)\n", vmexit->u.vmx.exit_reason,
 	    vmexit_vmx_desc(vmexit->u.vmx.exit_reason));
 	fprintf(stderr, "\tqualification\t0x%016lx\n",
 	    vmexit->u.vmx.exit_qualification);
 	fprintf(stderr, "\tinst_type\t\t%d\n", vmexit->u.vmx.inst_type);
 	fprintf(stderr, "\tinst_error\t\t%d\n", vmexit->u.vmx.inst_error);
 #ifdef DEBUG_EPT_MISCONFIG
 	if (vmexit->u.vmx.exit_reason == EXIT_REASON_EPT_MISCONFIG) {
 		vm_get_register(ctx, *pvcpu,
 		    VMCS_IDENT(VMCS_GUEST_PHYSICAL_ADDRESS),
 		    &ept_misconfig_gpa);
 		vm_get_gpa_pmap(ctx, ept_misconfig_gpa, ept_misconfig_pte,
 		    &ept_misconfig_ptenum);
 		fprintf(stderr, "\tEPT misconfiguration:\n");
 		fprintf(stderr, "\t\tGPA: %#lx\n", ept_misconfig_gpa);
 		fprintf(stderr, "\t\tPTE(%d): %#lx %#lx %#lx %#lx\n",
 		    ept_misconfig_ptenum, ept_misconfig_pte[0],
 		    ept_misconfig_pte[1], ept_misconfig_pte[2],
 		    ept_misconfig_pte[3]);
 	}
 #endif	/* DEBUG_EPT_MISCONFIG */
 	return (VMEXIT_ABORT);
 }
 
 static int
 vmexit_svm(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
 {
 
 	fprintf(stderr, "vm exit[%d]\n", *pvcpu);
 	fprintf(stderr, "\treason\t\tSVM\n");
 	fprintf(stderr, "\trip\t\t0x%016lx\n", vmexit->rip);
 	fprintf(stderr, "\tinst_length\t%d\n", vmexit->inst_length);
 	fprintf(stderr, "\texitcode\t%#lx\n", vmexit->u.svm.exitcode);
 	fprintf(stderr, "\texitinfo1\t%#lx\n", vmexit->u.svm.exitinfo1);
 	fprintf(stderr, "\texitinfo2\t%#lx\n", vmexit->u.svm.exitinfo2);
 	return (VMEXIT_ABORT);
 }
 
 static int
 vmexit_bogus(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
 {
 
 	assert(vmexit->inst_length == 0);
 
 	stats.vmexit_bogus++;
 
 	return (VMEXIT_CONTINUE);
 }
 
 static int
 vmexit_reqidle(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
 {
 
 	assert(vmexit->inst_length == 0);
 
 	stats.vmexit_reqidle++;
 
 	return (VMEXIT_CONTINUE);
 }
 
 static int
 vmexit_hlt(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
 {
 
 	stats.vmexit_hlt++;
 
 	/*
 	 * Just continue execution with the next instruction. We use
 	 * the HLT VM exit as a way to be friendly with the host
 	 * scheduler.
 	 */
 	return (VMEXIT_CONTINUE);
 }
 
 static int
 vmexit_pause(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
 {
 
 	stats.vmexit_pause++;
 
 	return (VMEXIT_CONTINUE);
 }
 
 static int
 vmexit_mtrap(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
 {
 
 	assert(vmexit->inst_length == 0);
 
 	stats.vmexit_mtrap++;
 
 #ifdef BHYVE_SNAPSHOT
 	checkpoint_cpu_suspend(*pvcpu);
 #endif
 	if (gdb_port != 0)
 		gdb_cpu_mtrap(*pvcpu);
 #ifdef BHYVE_SNAPSHOT
 	checkpoint_cpu_resume(*pvcpu);
 #endif
 
 	return (VMEXIT_CONTINUE);
 }
 
 static int
 vmexit_inst_emul(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
 {
 	int err, i;
 	struct vie *vie;
 
 	stats.vmexit_inst_emul++;
 
 	vie = &vmexit->u.inst_emul.vie;
 	err = emulate_mem(ctx, *pvcpu, vmexit->u.inst_emul.gpa,
 	    vie, &vmexit->u.inst_emul.paging);
 
 	if (err) {
 		if (err == ESRCH) {
 			EPRINTLN("Unhandled memory access to 0x%lx\n",
 			    vmexit->u.inst_emul.gpa);
 		}
 
 		fprintf(stderr, "Failed to emulate instruction sequence [ ");
 		for (i = 0; i < vie->num_valid; i++)
 			fprintf(stderr, "%02x", vie->inst[i]);
 		FPRINTLN(stderr, " ] at 0x%lx", vmexit->rip);
 		return (VMEXIT_ABORT);
 	}
 
 	return (VMEXIT_CONTINUE);
 }
 
 static pthread_mutex_t resetcpu_mtx = PTHREAD_MUTEX_INITIALIZER;
 static pthread_cond_t resetcpu_cond = PTHREAD_COND_INITIALIZER;
 
 static int
 vmexit_suspend(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
 {
 	enum vm_suspend_how how;
 
 	how = vmexit->u.suspended.how;
 
 	fbsdrun_deletecpu(ctx, *pvcpu);
 
 	if (*pvcpu != BSP) {
 		pthread_mutex_lock(&resetcpu_mtx);
 		pthread_cond_signal(&resetcpu_cond);
 		pthread_mutex_unlock(&resetcpu_mtx);
 		pthread_exit(NULL);
 	}
 
 	pthread_mutex_lock(&resetcpu_mtx);
 	while (!CPU_EMPTY(&cpumask)) {
 		pthread_cond_wait(&resetcpu_cond, &resetcpu_mtx);
 	}
 	pthread_mutex_unlock(&resetcpu_mtx);
 
 	switch (how) {
 	case VM_SUSPEND_RESET:
 		exit(0);
 	case VM_SUSPEND_POWEROFF:
 		exit(1);
 	case VM_SUSPEND_HALT:
 		exit(2);
 	case VM_SUSPEND_TRIPLEFAULT:
 		exit(3);
 	default:
 		fprintf(stderr, "vmexit_suspend: invalid reason %d\n", how);
 		exit(100);
 	}
 	return (0);	/* NOTREACHED */
 }
 
 static int
 vmexit_debug(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
 {
 
 #ifdef BHYVE_SNAPSHOT
 	checkpoint_cpu_suspend(*pvcpu);
 #endif
 	if (gdb_port != 0)
 		gdb_cpu_suspend(*pvcpu);
 #ifdef BHYVE_SNAPSHOT
 	checkpoint_cpu_resume(*pvcpu);
 #endif
 	return (VMEXIT_CONTINUE);
 }
 
 static int
 vmexit_breakpoint(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
 {
 
 	if (gdb_port == 0) {
 		fprintf(stderr, "vm_loop: unexpected VMEXIT_DEBUG\n");
 		exit(4);
 	}
 	gdb_cpu_breakpoint(*pvcpu, vmexit);
 	return (VMEXIT_CONTINUE);
 }
 
 static vmexit_handler_t handler[VM_EXITCODE_MAX] = {
 	[VM_EXITCODE_INOUT]  = vmexit_inout,
 	[VM_EXITCODE_INOUT_STR]  = vmexit_inout,
 	[VM_EXITCODE_VMX]    = vmexit_vmx,
 	[VM_EXITCODE_SVM]    = vmexit_svm,
 	[VM_EXITCODE_BOGUS]  = vmexit_bogus,
 	[VM_EXITCODE_REQIDLE] = vmexit_reqidle,
 	[VM_EXITCODE_RDMSR]  = vmexit_rdmsr,
 	[VM_EXITCODE_WRMSR]  = vmexit_wrmsr,
 	[VM_EXITCODE_MTRAP]  = vmexit_mtrap,
 	[VM_EXITCODE_INST_EMUL] = vmexit_inst_emul,
 	[VM_EXITCODE_SPINUP_AP] = vmexit_spinup_ap,
 	[VM_EXITCODE_SUSPENDED] = vmexit_suspend,
 	[VM_EXITCODE_TASK_SWITCH] = vmexit_task_switch,
 	[VM_EXITCODE_DEBUG] = vmexit_debug,
 	[VM_EXITCODE_BPT] = vmexit_breakpoint,
 };
 
 static void
 vm_loop(struct vmctx *ctx, int vcpu, uint64_t startrip)
 {
 	int error, rc;
 	enum vm_exitcode exitcode;
 	cpuset_t active_cpus;
 
 	if (vcpumap[vcpu] != NULL) {
 		error = pthread_setaffinity_np(pthread_self(),
 		    sizeof(cpuset_t), vcpumap[vcpu]);
 		assert(error == 0);
 	}
 
 	error = vm_active_cpus(ctx, &active_cpus);
 	assert(CPU_ISSET(vcpu, &active_cpus));
 
 	error = vm_set_register(ctx, vcpu, VM_REG_GUEST_RIP, startrip);
 	assert(error == 0);
 
 	while (1) {
 		error = vm_run(ctx, vcpu, &vmexit[vcpu]);
 		if (error != 0)
 			break;
 
 		exitcode = vmexit[vcpu].exitcode;
 		if (exitcode >= VM_EXITCODE_MAX || handler[exitcode] == NULL) {
 			fprintf(stderr, "vm_loop: unexpected exitcode 0x%x\n",
 			    exitcode);
 			exit(4);
 		}
 
 		rc = (*handler[exitcode])(ctx, &vmexit[vcpu], &vcpu);
 
 		switch (rc) {
 		case VMEXIT_CONTINUE:
 			break;
 		case VMEXIT_ABORT:
 			abort();
 		default:
 			exit(4);
 		}
 	}
 	fprintf(stderr, "vm_run error %d, errno %d\n", error, errno);
 }
 
 static int
 num_vcpus_allowed(struct vmctx *ctx)
 {
 	int tmp, error;
 
 	error = vm_get_capability(ctx, BSP, VM_CAP_UNRESTRICTED_GUEST, &tmp);
 
 	/*
 	 * The guest is allowed to spinup more than one processor only if the
 	 * UNRESTRICTED_GUEST capability is available.
 	 */
 	if (error == 0)
 		return (VM_MAXCPU);
 	else
 		return (1);
 }
 
 void
 fbsdrun_set_capabilities(struct vmctx *ctx, int cpu)
 {
 	int err, tmp;
 
 	if (fbsdrun_vmexit_on_hlt()) {
 		err = vm_get_capability(ctx, cpu, VM_CAP_HALT_EXIT, &tmp);
 		if (err < 0) {
 			fprintf(stderr, "VM exit on HLT not supported\n");
 			exit(4);
 		}
 		vm_set_capability(ctx, cpu, VM_CAP_HALT_EXIT, 1);
 		if (cpu == BSP)
 			handler[VM_EXITCODE_HLT] = vmexit_hlt;
 	}
 
         if (fbsdrun_vmexit_on_pause()) {
 		/*
 		 * pause exit support required for this mode
 		 */
 		err = vm_get_capability(ctx, cpu, VM_CAP_PAUSE_EXIT, &tmp);
 		if (err < 0) {
 			fprintf(stderr,
 			    "SMP mux requested, no pause support\n");
 			exit(4);
 		}
 		vm_set_capability(ctx, cpu, VM_CAP_PAUSE_EXIT, 1);
 		if (cpu == BSP)
 			handler[VM_EXITCODE_PAUSE] = vmexit_pause;
         }
 
 	if (x2apic_mode)
 		err = vm_set_x2apic_state(ctx, cpu, X2APIC_ENABLED);
 	else
 		err = vm_set_x2apic_state(ctx, cpu, X2APIC_DISABLED);
 
 	if (err) {
 		fprintf(stderr, "Unable to set x2apic state (%d)\n", err);
 		exit(4);
 	}
 
 	vm_set_capability(ctx, cpu, VM_CAP_ENABLE_INVPCID, 1);
 }
 
 static struct vmctx *
 do_open(const char *vmname)
 {
 	struct vmctx *ctx;
 	int error;
 	bool reinit, romboot;
 #ifndef WITHOUT_CAPSICUM
 	cap_rights_t rights;
 	const cap_ioctl_t *cmds;	
 	size_t ncmds;
 #endif
 
 	reinit = romboot = false;
 
 	if (lpc_bootrom())
 		romboot = true;
 
 	error = vm_create(vmname);
 	if (error) {
 		if (errno == EEXIST) {
 			if (romboot) {
 				reinit = true;
 			} else {
 				/*
 				 * The virtual machine has been setup by the
 				 * userspace bootloader.
 				 */
 			}
 		} else {
 			perror("vm_create");
 			exit(4);
 		}
 	} else {
 		if (!romboot) {
 			/*
 			 * If the virtual machine was just created then a
 			 * bootrom must be configured to boot it.
 			 */
 			fprintf(stderr, "virtual machine cannot be booted\n");
 			exit(4);
 		}
 	}
 
 	ctx = vm_open(vmname);
 	if (ctx == NULL) {
 		perror("vm_open");
 		exit(4);
 	}
 
 #ifndef WITHOUT_CAPSICUM
 	cap_rights_init(&rights, CAP_IOCTL, CAP_MMAP_RW);
 	if (caph_rights_limit(vm_get_device_fd(ctx), &rights) == -1) 
 		errx(EX_OSERR, "Unable to apply rights for sandbox");
 	vm_get_ioctls(&ncmds);
 	cmds = vm_get_ioctls(NULL);
 	if (cmds == NULL)
 		errx(EX_OSERR, "out of memory");
 	if (caph_ioctls_limit(vm_get_device_fd(ctx), cmds, ncmds) == -1)
 		errx(EX_OSERR, "Unable to apply rights for sandbox");
 	free((cap_ioctl_t *)cmds);
 #endif
  
 	if (reinit) {
 		error = vm_reinit(ctx);
 		if (error) {
 			perror("vm_reinit");
 			exit(4);
 		}
 	}
 	error = vm_set_topology(ctx, sockets, cores, threads, maxcpus);
 	if (error)
 		errx(EX_OSERR, "vm_set_topology");
 	return (ctx);
 }
 
 void
 spinup_vcpu(struct vmctx *ctx, int vcpu)
 {
 	int error;
 	uint64_t rip;
 
 	error = vm_get_register(ctx, vcpu, VM_REG_GUEST_RIP, &rip);
 	assert(error == 0);
 
 	fbsdrun_set_capabilities(ctx, vcpu);
 	error = vm_set_capability(ctx, vcpu, VM_CAP_UNRESTRICTED_GUEST, 1);
 	assert(error == 0);
 
 	fbsdrun_addcpu(ctx, BSP, vcpu, rip);
 }
 
 int
 main(int argc, char *argv[])
 {
 	int c, error, dbg_port, err, bvmcons;
 	int max_vcpus, mptgen, memflags;
 	int rtc_localtime;
 	bool gdb_stop;
 	struct vmctx *ctx;
 	uint64_t rip;
 	size_t memsize;
 	char *optstr;
 #ifdef BHYVE_SNAPSHOT
 	char *restore_file;
 	struct restore_state rstate;
 	int vcpu;
 
 	restore_file = NULL;
 #endif
 
 	bvmcons = 0;
 	progname = basename(argv[0]);
 	dbg_port = 0;
 	gdb_stop = false;
 	guest_ncpus = 1;
 	sockets = cores = threads = 1;
 	maxcpus = 0;
 	memsize = 256 * MB;
 	mptgen = 1;
 	rtc_localtime = 1;
 	memflags = 0;
 
 #ifdef BHYVE_SNAPSHOT
 	optstr = "abehuwxACHIPSWYp:g:G:c:s:m:l:U:r:";
 #else
 	optstr = "abehuwxACHIPSWYp:g:G:c:s:m:l:U:";
 #endif
 	while ((c = getopt(argc, argv, optstr)) != -1) {
 		switch (c) {
 		case 'a':
 			x2apic_mode = 0;
 			break;
 		case 'A':
 			acpi = 1;
 			break;
 		case 'b':
 			bvmcons = 1;
 			break;
 		case 'p':
                         if (pincpu_parse(optarg) != 0) {
                             errx(EX_USAGE, "invalid vcpu pinning "
                                  "configuration '%s'", optarg);
                         }
 			break;
                 case 'c':
 			if (topology_parse(optarg) != 0) {
 			    errx(EX_USAGE, "invalid cpu topology "
 				"'%s'", optarg);
 			}
 			break;
 		case 'C':
 			memflags |= VM_MEM_F_INCORE;
 			break;
 		case 'g':
 			dbg_port = atoi(optarg);
 			break;
 		case 'G':
 			if (optarg[0] == 'w') {
 				gdb_stop = true;
 				optarg++;
 			}
 			gdb_port = atoi(optarg);
 			break;
 		case 'l':
 			if (strncmp(optarg, "help", strlen(optarg)) == 0) {
 				lpc_print_supported_devices();
 				exit(0);
 			} else if (lpc_device_parse(optarg) != 0) {
 				errx(EX_USAGE, "invalid lpc device "
 				    "configuration '%s'", optarg);
 			}
 			break;
 #ifdef BHYVE_SNAPSHOT
 		case 'r':
 			restore_file = optarg;
 			break;
 #endif
 		case 's':
 			if (strncmp(optarg, "help", strlen(optarg)) == 0) {
 				pci_print_supported_devices();
 				exit(0);
 			} else if (pci_parse_slot(optarg) != 0)
 				exit(4);
 			else
 				break;
 		case 'S':
 			memflags |= VM_MEM_F_WIRED;
 			break;
                 case 'm':
 			error = vm_parse_memsize(optarg, &memsize);
 			if (error)
 				errx(EX_USAGE, "invalid memsize '%s'", optarg);
 			break;
 		case 'H':
 			guest_vmexit_on_hlt = 1;
 			break;
 		case 'I':
 			/*
 			 * The "-I" option was used to add an ioapic to the
 			 * virtual machine.
 			 *
 			 * An ioapic is now provided unconditionally for each
 			 * virtual machine and this option is now deprecated.
 			 */
 			break;
 		case 'P':
 			guest_vmexit_on_pause = 1;
 			break;
 		case 'e':
 			strictio = 1;
 			break;
 		case 'u':
 			rtc_localtime = 0;
 			break;
 		case 'U':
 			guest_uuid_str = optarg;
 			break;
 		case 'w':
 			strictmsr = 0;
 			break;
 		case 'W':
 			virtio_msix = 0;
 			break;
 		case 'x':
 			x2apic_mode = 1;
 			break;
 		case 'Y':
 			mptgen = 0;
 			break;
 		case 'h':
 			usage(0);			
 		default:
 			usage(1);
 		}
 	}
 	argc -= optind;
 	argv += optind;
 
 #ifdef BHYVE_SNAPSHOT
 	if (argc > 1 || (argc == 0 && restore_file == NULL))
 		usage(1);
 
 	if (restore_file != NULL) {
 		error = load_restore_file(restore_file, &rstate);
 		if (error) {
 			fprintf(stderr, "Failed to read checkpoint info from "
 					"file: '%s'.\n", restore_file);
 			exit(1);
 		}
 	}
 
 	if (argc == 1) {
 		vmname = argv[0];
 	} else {
 		vmname = lookup_vmname(&rstate);
 		if (vmname == NULL) {
 			fprintf(stderr, "Cannot find VM name in restore file. "
 					"Please specify one.\n");
 			exit(1);
 		}
 	}
 #else
 	if (argc != 1)
 		usage(1);
 
 	vmname = argv[0];
 #endif
 	ctx = do_open(vmname);
 
 #ifdef BHYVE_SNAPSHOT
 	if (restore_file != NULL) {
 		guest_ncpus = lookup_guest_ncpus(&rstate);
 		memflags = lookup_memflags(&rstate);
 		memsize = lookup_memsize(&rstate);
 	}
 
 	if (guest_ncpus < 1) {
 		fprintf(stderr, "Invalid guest vCPUs (%d)\n", guest_ncpus);
 		exit(1);
 	}
 #endif
 
 	max_vcpus = num_vcpus_allowed(ctx);
 	if (guest_ncpus > max_vcpus) {
 		fprintf(stderr, "%d vCPUs requested but only %d available\n",
 			guest_ncpus, max_vcpus);
 		exit(4);
 	}
 
 	fbsdrun_set_capabilities(ctx, BSP);
 
 	vm_set_memflags(ctx, memflags);
 	err = vm_setup_memory(ctx, memsize, VM_MMAP_ALL);
 	if (err) {
 		fprintf(stderr, "Unable to setup memory (%d)\n", errno);
 		exit(4);
 	}
 
 	error = init_msr();
 	if (error) {
 		fprintf(stderr, "init_msr error %d", error);
 		exit(4);
 	}
 
 	init_mem();
 	init_inout();
+	kernemu_dev_init();
 	init_bootrom(ctx);
 	atkbdc_init(ctx);
 	pci_irq_init(ctx);
 	ioapic_init(ctx);
 
 	rtc_init(ctx, rtc_localtime);
 	sci_init(ctx);
 
 	/*
 	 * Exit if a device emulation finds an error in its initilization
 	 */
 	if (init_pci(ctx) != 0) {
 		perror("device emulation initialization error");
 		exit(4);
 	}
 
 	/*
 	 * Initialize after PCI, to allow a bootrom file to reserve the high
 	 * region.
 	 */
 	if (acpi)
 		vmgenc_init(ctx);
 
 	if (dbg_port != 0)
 		init_dbgport(dbg_port);
 
 	if (gdb_port != 0)
 		init_gdb(ctx, gdb_port, gdb_stop);
 
 	if (bvmcons)
 		init_bvmcons();
 
 	if (lpc_bootrom()) {
 		if (vm_set_capability(ctx, BSP, VM_CAP_UNRESTRICTED_GUEST, 1)) {
 			fprintf(stderr, "ROM boot failed: unrestricted guest "
 			    "capability not available\n");
 			exit(4);
 		}
 		error = vcpu_reset(ctx, BSP);
 		assert(error == 0);
 	}
 
 #ifdef BHYVE_SNAPSHOT
 	if (restore_file != NULL) {
 		fprintf(stdout, "Pausing pci devs...\r\n");
 		if (vm_pause_user_devs(ctx) != 0) {
 			fprintf(stderr, "Failed to pause PCI device state.\n");
 			exit(1);
 		}
 
 		fprintf(stdout, "Restoring vm mem...\r\n");
 		if (restore_vm_mem(ctx, &rstate) != 0) {
 			fprintf(stderr, "Failed to restore VM memory.\n");
 			exit(1);
 		}
 
 		fprintf(stdout, "Restoring pci devs...\r\n");
 		if (vm_restore_user_devs(ctx, &rstate) != 0) {
 			fprintf(stderr, "Failed to restore PCI device state.\n");
 			exit(1);
 		}
 
 		fprintf(stdout, "Restoring kernel structs...\r\n");
 		if (vm_restore_kern_structs(ctx, &rstate) != 0) {
 			fprintf(stderr, "Failed to restore kernel structs.\n");
 			exit(1);
 		}
 
 		fprintf(stdout, "Resuming pci devs...\r\n");
 		if (vm_resume_user_devs(ctx) != 0) {
 			fprintf(stderr, "Failed to resume PCI device state.\n");
 			exit(1);
 		}
 	}
 #endif
 
 	error = vm_get_register(ctx, BSP, VM_REG_GUEST_RIP, &rip);
 	assert(error == 0);
 
 	/*
 	 * build the guest tables, MP etc.
 	 */
 	if (mptgen) {
 		error = mptable_build(ctx, guest_ncpus);
 		if (error) {
 			perror("error to build the guest tables");
 			exit(4);
 		}
 	}
 
 	error = smbios_build(ctx);
 	assert(error == 0);
 
 	if (acpi) {
 		error = acpi_build(ctx, guest_ncpus);
 		assert(error == 0);
 	}
 
 	if (lpc_bootrom())
 		fwctl_init();
 
 	/*
 	 * Change the proc title to include the VM name.
 	 */
 	setproctitle("%s", vmname);
 
 #ifndef WITHOUT_CAPSICUM
 	caph_cache_catpages();
 
 	if (caph_limit_stdout() == -1 || caph_limit_stderr() == -1)
 		errx(EX_OSERR, "Unable to apply rights for sandbox");
 
 	if (caph_enter() == -1)
 		errx(EX_OSERR, "cap_enter() failed");
 #endif
 
 #ifdef BHYVE_SNAPSHOT
 	if (restore_file != NULL)
 		destroy_restore_state(&rstate);
 
 	/*
 	 * checkpointing thread for communication with bhyvectl
 	 */
 	if (init_checkpoint_thread(ctx) < 0)
 		printf("Failed to start checkpoint thread!\r\n");
 
 	if (restore_file != NULL)
 		vm_restore_time(ctx);
 #endif
 
 	/*
 	 * Add CPU 0
 	 */
 	fbsdrun_addcpu(ctx, BSP, BSP, rip);
 
 #ifdef BHYVE_SNAPSHOT
 	/*
 	 * If we restore a VM, start all vCPUs now (including APs), otherwise,
 	 * let the guest OS to spin them up later via vmexits.
 	 */
 	if (restore_file != NULL) {
 		for (vcpu = 0; vcpu < guest_ncpus; vcpu++) {
 			if (vcpu == BSP)
 				continue;
 
 			fprintf(stdout, "spinning up vcpu no %d...\r\n", vcpu);
 			spinup_vcpu(ctx, vcpu);
 		}
 	}
 #endif
 
 	/*
 	 * Head off to the main event dispatch loop
 	 */
 	mevent_dispatch();
 
 	exit(4);
 }
Index: head/usr.sbin/bhyve/kernemu_dev.c
===================================================================
--- head/usr.sbin/bhyve/kernemu_dev.c	(nonexistent)
+++ head/usr.sbin/bhyve/kernemu_dev.c	(revision 361082)
@@ -0,0 +1,98 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright 2020 Conrad Meyer <cem@FreeBSD.org>.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/errno.h>
+#include <sys/tree.h>
+
+#include <amd64/include/vmm.h>
+#include <x86/include/apicreg.h>
+struct vm;
+struct vm_hpet_cap;
+#include <vmm/io/vioapic.h>
+#include <vmm/io/vhpet.h>
+
+#include <err.h>
+#include <errno.h>
+#include <vmmapi.h>
+
+#include "kernemu_dev.h"
+#include "mem.h"
+
+static int
+apic_handler(struct vmctx *ctx, int vcpu, int dir, uint64_t addr, int size,
+    uint64_t *val, void *arg1 __unused, long arg2 __unused)
+{
+	if (vm_readwrite_kernemu_device(ctx, vcpu, addr, (dir == MEM_F_WRITE),
+	    size, val) != 0)
+		return (errno);
+	return (0);
+}
+
+static struct mem_range lapic_mmio = {
+	.name = "kern-lapic-mmio",
+	.base = DEFAULT_APIC_BASE,
+	.size = PAGE_SIZE,
+	.flags = MEM_F_RW | MEM_F_IMMUTABLE,
+	.handler = apic_handler,
+
+};
+static struct mem_range ioapic_mmio = {
+	.name = "kern-ioapic-mmio",
+	.base = VIOAPIC_BASE,
+	.size = VIOAPIC_SIZE,
+	.flags = MEM_F_RW | MEM_F_IMMUTABLE,
+	.handler = apic_handler,
+};
+static struct mem_range hpet_mmio = {
+	.name = "kern-hpet-mmio",
+	.base = VHPET_BASE,
+	.size = VHPET_SIZE,
+	.flags = MEM_F_RW | MEM_F_IMMUTABLE,
+	.handler = apic_handler,
+};
+
+void
+kernemu_dev_init(void)
+{
+	int rc;
+
+	rc = register_mem(&lapic_mmio);
+	if (rc != 0)
+		errc(4, rc, "register_mem: LAPIC (0x%08x)",
+		    (unsigned)lapic_mmio.base);
+	rc = register_mem(&ioapic_mmio);
+	if (rc != 0)
+		errc(4, rc, "register_mem: IOAPIC (0x%08x)",
+		    (unsigned)ioapic_mmio.base);
+	rc = register_mem(&hpet_mmio);
+	if (rc != 0)
+		errc(4, rc, "register_mem: HPET (0x%08x)",
+		    (unsigned)hpet_mmio.base);
+}

Property changes on: head/usr.sbin/bhyve/kernemu_dev.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+FreeBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Index: head/usr.sbin/bhyve/kernemu_dev.h
===================================================================
--- head/usr.sbin/bhyve/kernemu_dev.h	(nonexistent)
+++ head/usr.sbin/bhyve/kernemu_dev.h	(revision 361082)
@@ -0,0 +1,32 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright 2020 Conrad Meyer <cem@FreeBSD.org>.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#pragma once
+
+void kernemu_dev_init(void);

Property changes on: head/usr.sbin/bhyve/kernemu_dev.h
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+FreeBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property