Index: lib/Makefile
===================================================================
--- lib/Makefile
+++ lib/Makefile
@@ -205,6 +205,9 @@
 
 .if ${MACHINE_CPUARCH} == "amd64"
 SUBDIR.${MK_PMC}+=	libipt
+.endif
+
+.if ${MACHINE_CPUARCH} == "amd64" || ${MACHINE_CPUARCH} == "aarch64"
 SUBDIR.${MK_BHYVE}+=	libvmmapi
 .endif
 
Index: lib/libvmmapi/Makefile
===================================================================
--- lib/libvmmapi/Makefile
+++ lib/libvmmapi/Makefile
@@ -1,12 +1,21 @@
 # $FreeBSD$
 
-PACKAGE=lib${LIB}
-LIB=	vmmapi
-SRCS=	vmmapi.c vmmapi_freebsd.c
-INCS=	vmmapi.h
+PACKAGE=	lib${LIB}
+SHLIBDIR?=	/lib
+LIB_SRCTOP?=	${.CURDIR}
 
-LIBADD=	util
+LIB=		vmmapi
+WARNS?=	2
 
-CFLAGS+= -I${.CURDIR}
+.if exists(${LIB_SRCTOP}/${MACHINE})
+LIB_ARCH=	${MACHINE}
+.elif exists(${LIB_SRCTOP}/${MACHINE_ARCH})
+LIB_ARCH=	${MACHINE_ARCH}
+.else
+LIB_ARCH=	${MACHINE_CPUARCH}
+.endif
+
+CFLAGS+= 	-I${LIB_SRCTOP}/${LIB_ARCH}
+.include 	"${LIB_SRCTOP}/${LIB_ARCH}/Makefile.inc"
 
 .include <bsd.lib.mk>
Index: lib/libvmmapi/amd64/Makefile.inc
===================================================================
--- /dev/null
+++ lib/libvmmapi/amd64/Makefile.inc
@@ -0,0 +1,7 @@
+# $FreeBSD$
+.PATH: ${LIB_SRCTOP}/amd64/
+
+SRCS=	vmmapi.c vmmapi_freebsd.c
+INCS=	vmmapi.h
+
+LIBADD=	util
Index: lib/libvmmapi/arm64/Makefile.inc
===================================================================
--- /dev/null
+++ lib/libvmmapi/arm64/Makefile.inc
@@ -0,0 +1,7 @@
+# $FreeBSD$
+.PATH: ${LIB_SRCTOP}/arm64/
+
+SRCS=	vmmapi.c
+INCS=	vmmapi.h
+
+LIBADD=	util
Index: lib/libvmmapi/arm64/vmmapi.h
===================================================================
--- /dev/null
+++ lib/libvmmapi/arm64/vmmapi.h
@@ -0,0 +1,80 @@
+/*-
+ * Copyright (c) 2011 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _VMMAPI_H_
+#define	_VMMAPI_H_
+
+struct vmctx;
+struct vm_exit;
+enum vm_cap_type;
+
+/*
+ * Different styles of mapping the memory assigned to a VM into the address
+ * space of the controlling process.
+ */
+enum vm_mmap_style {
+	VM_MMAP_NONE,		/* no mapping */
+	VM_MMAP_ALL,		/* fully and statically mapped */
+	VM_MMAP_SPARSE,		/* mappings created on-demand */
+};
+
+int	vm_create(const char *name);
+struct vmctx *vm_open(const char *name);
+void	vm_destroy(struct vmctx *ctx);
+int	vm_parse_memsize(const char *optarg, size_t *memsize);
+int	vm_get_memory_seg(struct vmctx *ctx, uint64_t gpa, size_t *ret_len);
+int	vm_setup_memory(struct vmctx *ctx, uint64_t membase, size_t len, enum vm_mmap_style s);
+void	*vm_map_ipa(struct vmctx *ctx, uint64_t gaddr, size_t len);
+uint32_t vm_get_mem_limit(struct vmctx *ctx);
+void	vm_set_mem_limit(struct vmctx *ctx, uint32_t limit);
+int	vm_set_register(struct vmctx *ctx, int vcpu, int reg, uint64_t val);
+int	vm_get_register(struct vmctx *ctx, int vcpu, int reg, uint64_t *retval);
+int	vm_run(struct vmctx *ctx, int vcpu, uint64_t rip,
+	       struct vm_exit *ret_vmexit);
+const char *vm_capability_type2name(int type);
+int	vm_get_capability(struct vmctx *ctx, int vcpu, enum vm_cap_type cap,
+			  int *retval);
+int	vm_set_capability(struct vmctx *ctx, int vcpu, enum vm_cap_type cap,
+			  int val);
+int	vm_assert_irq(struct vmctx *ctx, uint32_t irq, uint32_t vcpuid);
+int	vm_deassert_irq(struct vmctx *ctx, uint32_t irq, uint32_t vcpuid);
+
+/*
+ * Return a pointer to the statistics buffer. Note that this is not MT-safe.
+ */
+uint64_t *vm_get_stats(struct vmctx *ctx, int vcpu, struct timeval *ret_tv,
+		       int *ret_entries);
+const char *vm_get_stat_desc(struct vmctx *ctx, int index);
+
+/* Reset vcpu register state */
+int	vcpu_reset(struct vmctx *ctx, int vcpu);
+int	vm_activate_cpu(struct vmctx *ctx, int vcpu);
+
+int	vm_attach_vgic(struct vmctx *ctx, uint64_t dist_start, size_t dist_size,
+		uint64_t redist_start, size_t redist_size);
+#endif	/* _VMMAPI_H_ */
Index: lib/libvmmapi/arm64/vmmapi.c
===================================================================
--- /dev/null
+++ lib/libvmmapi/arm64/vmmapi.c
@@ -0,0 +1,406 @@
+#include <sys/cdefs.h>
+
+#include <sys/types.h>
+#include <sys/sysctl.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <sys/errno.h>
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <string.h>
+#include <fcntl.h>
+#include <unistd.h>
+
+#include <libutil.h>
+
+#include <machine/vmm.h>
+#include <machine/vmm_dev.h>
+
+#include "vmmapi.h"
+
+#define	MB	(1024 * 1024UL)
+#define	GB	(1024 * 1024 * 1024UL)
+
+struct vmctx {
+	int		fd;
+	uint32_t 	mem_limit;
+	enum vm_mmap_style vms;
+	size_t		mem_size;
+	uint64_t 	mem_base;
+	char		*mem_addr;
+	char		*name;
+};
+
+#define	CREATE(x)  sysctlbyname("hw.vmm.create", NULL, NULL, (x), strlen((x)))
+#define	DESTROY(x) sysctlbyname("hw.vmm.destroy", NULL, NULL, (x), strlen((x)))
+
+static int
+vm_device_open(const char *name)
+{
+        int fd, len;
+        char *vmfile;
+
+	len = strlen("/dev/vmm/") + strlen(name) + 1;
+	vmfile = malloc(len);
+	assert(vmfile != NULL);
+	snprintf(vmfile, len, "/dev/vmm/%s", name);
+
+        /* Open the device file */
+        fd = open(vmfile, O_RDWR, 0);
+
+	free(vmfile);
+        return (fd);
+}
+
+int
+vm_create(const char *name)
+{
+
+	return (CREATE((char *)name));
+}
+
+struct vmctx *
+vm_open(const char *name)
+{
+	struct vmctx *vm;
+
+	vm = malloc(sizeof(struct vmctx) + strlen(name) + 1);
+	assert(vm != NULL);
+
+	vm->fd = -1;
+	vm->mem_limit = 2 * GB;
+	vm->name = (char *)(vm + 1);
+	strcpy(vm->name, name);
+
+	if ((vm->fd = vm_device_open(vm->name)) < 0)
+		goto err;
+
+	return (vm);
+err:
+	vm_destroy(vm);
+	return (NULL);
+}
+
+void
+vm_destroy(struct vmctx *vm)
+{
+	assert(vm != NULL);
+
+	if (vm->fd >= 0)
+		close(vm->fd);
+	DESTROY(vm->name);
+
+	free(vm);
+}
+
+int
+vm_parse_memsize(const char *optarg, size_t *ret_memsize)
+{
+	char *endptr;
+	size_t optval;
+	int error;
+
+	optval = strtoul(optarg, &endptr, 0);
+	if (*optarg != '\0' && *endptr == '\0') {
+		/*
+		 * For the sake of backward compatibility if the memory size
+		 * specified on the command line is less than a megabyte then
+		 * it is interpreted as being in units of MB.
+		 */
+		if (optval < MB)
+			optval *= MB;
+		*ret_memsize = optval;
+		error = 0;
+	} else
+		error = expand_number(optarg, ret_memsize);
+
+	return (error);
+}
+
+int
+vm_get_memory_seg(struct vmctx *ctx, uint64_t gpa, size_t *ret_len)
+{
+	int error;
+	struct vm_memory_segment seg;
+
+	bzero(&seg, sizeof(seg));
+	seg.gpa = gpa;
+	error = ioctl(ctx->fd, VM_GET_MEMORY_SEG, &seg);
+	*ret_len = seg.len;
+	return (error);
+}
+
+uint32_t
+vm_get_mem_limit(struct vmctx *ctx)
+{
+
+	return (ctx->mem_limit);
+}
+
+void
+vm_set_mem_limit(struct vmctx *ctx, uint32_t limit)
+{
+
+	ctx->mem_limit = limit;
+}
+
+static int
+setup_memory_segment(struct vmctx *ctx, uint64_t gpa, size_t len, char **addr)
+{
+	int error;
+	struct vm_memory_segment seg;
+
+	/*
+	 * Create and optionally map 'len' bytes of memory at guest
+	 * physical address 'gpa'
+	 */
+	bzero(&seg, sizeof(seg));
+	seg.gpa = gpa;
+	seg.len = len;
+	error = ioctl(ctx->fd, VM_MAP_MEMORY, &seg);
+	if (error == 0 && addr != NULL) {
+		*addr = mmap(NULL, len, PROT_READ | PROT_WRITE, MAP_SHARED,
+				ctx->fd, gpa);
+	}
+	return (error);
+}
+
+int
+vm_setup_memory(struct vmctx *ctx, uint64_t membase, size_t memsize, enum vm_mmap_style vms)
+{
+	int error;
+
+	/* XXX VM_MMAP_SPARSE not implemented yet */
+	assert(vms == VM_MMAP_ALL);
+
+	ctx->vms = vms;
+	ctx->mem_base = membase;
+
+	assert(memsize <= ctx->mem_limit);
+	ctx->mem_size = memsize;
+
+	if (ctx->mem_size > 0) {
+		error = setup_memory_segment(ctx, ctx->mem_base, ctx->mem_size,
+		    &ctx->mem_addr);
+		if (error)
+			return (error);
+	}
+
+	return (0);
+}
+
+void *
+vm_map_ipa(struct vmctx *ctx, uint64_t iaddr, size_t len)
+{
+	/* XXX VM_MMAP_SPARSE not implemented yet */
+	assert(ctx->vms == VM_MMAP_ALL);
+
+	if (iaddr < ctx->mem_base)
+		return ((void *)(ctx->mem_addr + iaddr));
+	else
+		return ((void *)(ctx->mem_addr + (iaddr - ctx->mem_base)));
+}
+
+
+int
+vm_set_register(struct vmctx *ctx, int vcpu, int reg, uint64_t val)
+{
+	int error;
+	struct vm_register vmreg;
+
+	bzero(&vmreg, sizeof(vmreg));
+	vmreg.cpuid = vcpu;
+	vmreg.regnum = reg;
+	vmreg.regval = val;
+
+	error = ioctl(ctx->fd, VM_SET_REGISTER, &vmreg);
+	return (error);
+}
+
+int
+vm_get_register(struct vmctx *ctx, int vcpu, int reg, uint64_t *ret_val)
+{
+	int error;
+	struct vm_register vmreg;
+
+	bzero(&vmreg, sizeof(vmreg));
+	vmreg.cpuid = vcpu;
+	vmreg.regnum = reg;
+
+	error = ioctl(ctx->fd, VM_GET_REGISTER, &vmreg);
+	*ret_val = vmreg.regval;
+	return (error);
+}
+
+int
+vm_run(struct vmctx *ctx, int vcpu, uint64_t pc, struct vm_exit *vmexit)
+{
+	int error;
+	struct vm_run vmrun;
+
+	bzero(&vmrun, sizeof(vmrun));
+	vmrun.cpuid = vcpu;
+	vmrun.pc = pc;
+
+	error = ioctl(ctx->fd, VM_RUN, &vmrun);
+	bcopy(&vmrun.vm_exit, vmexit, sizeof(struct vm_exit));
+	return (error);
+}
+
+static struct {
+	const char	*name;
+	int		type;
+} capstrmap[] = {
+	{ "hlt_exit",		VM_CAP_HALT_EXIT },
+	{ "mtrap_exit",		VM_CAP_MTRAP_EXIT },
+	{ "pause_exit",		VM_CAP_PAUSE_EXIT },
+	{ "unrestricted_guest",	VM_CAP_UNRESTRICTED_GUEST },
+	{ 0 }
+};
+
+int
+vm_capability_name2type(const char *capname)
+{
+	int i;
+
+	for (i = 0; capstrmap[i].name != NULL && capname != NULL; i++) {
+		if (strcmp(capstrmap[i].name, capname) == 0)
+			return (capstrmap[i].type);
+	}
+
+	return (-1);
+}
+
+const char *
+vm_capability_type2name(int type)
+{
+	int i;
+
+	for (i = 0; capstrmap[i].name != NULL; i++) {
+		if (capstrmap[i].type == type)
+			return (capstrmap[i].name);
+	}
+
+	return (NULL);
+}
+
+int
+vm_get_capability(struct vmctx *ctx, int vcpu, enum vm_cap_type cap,
+		  int *retval)
+{
+	int error;
+	struct vm_capability vmcap;
+
+	bzero(&vmcap, sizeof(vmcap));
+	vmcap.cpuid = vcpu;
+	vmcap.captype = cap;
+
+	error = ioctl(ctx->fd, VM_GET_CAPABILITY, &vmcap);
+	*retval = vmcap.capval;
+	return (error);
+}
+
+int
+vm_set_capability(struct vmctx *ctx, int vcpu, enum vm_cap_type cap, int val)
+{
+	struct vm_capability vmcap;
+
+	bzero(&vmcap, sizeof(vmcap));
+	vmcap.cpuid = vcpu;
+	vmcap.captype = cap;
+	vmcap.capval = val;
+
+	return (ioctl(ctx->fd, VM_SET_CAPABILITY, &vmcap));
+}
+
+uint64_t *
+vm_get_stats(struct vmctx *ctx, int vcpu, struct timeval *ret_tv,
+	     int *ret_entries)
+{
+	int error;
+
+	static struct vm_stats vmstats;
+
+	vmstats.cpuid = vcpu;
+
+	error = ioctl(ctx->fd, VM_STATS, &vmstats);
+	if (error == 0) {
+		if (ret_entries)
+			*ret_entries = vmstats.num_entries;
+		if (ret_tv)
+			*ret_tv = vmstats.tv;
+		return (vmstats.statbuf);
+	} else
+		return (NULL);
+}
+
+const char *
+vm_get_stat_desc(struct vmctx *ctx, int index)
+{
+	static struct vm_stat_desc statdesc;
+
+	statdesc.index = index;
+	if (ioctl(ctx->fd, VM_STAT_DESC, &statdesc) == 0)
+		return (statdesc.desc);
+	else
+		return (NULL);
+}
+
+int
+vcpu_reset(struct vmctx *vmctx, int vcpu)
+{
+	return (ENXIO);
+}
+
+int
+vm_attach_vgic(struct vmctx *ctx, uint64_t dist_start, size_t dist_size,
+		uint64_t redist_start, size_t redist_size)
+{
+	struct vm_attach_vgic vav;
+
+	bzero(&vav, sizeof(vav));
+	vav.dist_start = dist_start;
+	vav.dist_size = dist_size;
+	vav.redist_start = redist_start;
+	vav.redist_size = redist_size;
+
+	return (ioctl(ctx->fd, VM_ATTACH_VGIC, &vav));
+}
+
+int
+vm_assert_irq(struct vmctx *ctx, uint32_t irq, uint32_t vcpuid)
+{
+	struct vm_irq vi;
+
+	bzero(&vi, sizeof(vi));
+	vi.irq = irq;
+	vi.vcpuid = vcpuid;
+
+	return (ioctl(ctx->fd, VM_ASSERT_IRQ, &vi));
+}
+
+int
+vm_deassert_irq(struct vmctx *ctx, uint32_t irq, uint32_t vcpuid)
+{
+	struct vm_irq vi;
+
+	bzero(&vi, sizeof(vi));
+	vi.irq = irq;
+	vi.vcpuid = vcpuid;
+
+	return (ioctl(ctx->fd, VM_DEASSERT_IRQ, &vi));
+}
+
+int
+vm_activate_cpu(struct vmctx *ctx, int vcpu)
+{
+	struct vm_activate_cpu ac;
+	int error;
+
+	bzero(&ac, sizeof(struct vm_activate_cpu));
+	ac.vcpuid = vcpu;
+	error = ioctl(ctx->fd, VM_ACTIVATE_CPU, &ac);
+	return (error);
+}
Index: sys/arm/arm/generic_timer.h
===================================================================
--- /dev/null
+++ sys/arm/arm/generic_timer.h
@@ -0,0 +1,44 @@
+/*-
+ * Copyright (C) 2015-2021 Mihai Carabas <mihai.carabas@gmail.com>
+ * Copyright (C) 2017-2019 Alexandru Elisei <alexandru.elisei@gmail.com>
+ * Copyright (C) 2017-2021 Darius Mihai <darius.mihai.m@gmail.com>
+ * Copyright (C) 2019-2021 Andrei Martin <andrei.cos.martin@gmail.com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _ARM_GENERIC_TIMER_H_
+#define _ARM_GENERIC_TIMER_H_
+
+#define	GT_PHYS_SECURE		0
+#define	GT_PHYS_NONSECURE	1
+#define	GT_VIRT			2
+#define	GT_HYP			3
+
+int	arm_tmr_setup_intr(int gt_type, driver_filter_t filter,
+    driver_intr_t handler, void *arg);
+int	arm_tmr_teardown_intr(int gt_type);
+
+#endif
Index: sys/arm/arm/generic_timer.c
===================================================================
--- sys/arm/arm/generic_timer.c
+++ sys/arm/arm/generic_timer.c
@@ -63,6 +63,10 @@
 #include <machine/machdep.h> /* For arm_set_delay */
 #endif
 
+#if defined(__aarch64__)
+#include <machine/vmm.h>	/* For virt_enabled() */
+#endif
+
 #ifdef FDT
 #include <dev/ofw/openfirm.h>
 #include <dev/ofw/ofw_bus.h>
@@ -74,6 +78,8 @@
 #include <dev/acpica/acpivar.h>
 #endif
 
+#include "generic_timer.h"
+
 #define	GT_CTRL_ENABLE		(1 << 0)
 #define	GT_CTRL_INT_MASK	(1 << 1)
 #define	GT_CTRL_INT_STAT	(1 << 2)
@@ -123,6 +129,8 @@
 	.tc_fill_vdso_timehands = arm_tmr_fill_vdso_timehands,
 };
 
+static device_t arm_tmr_dev;
+
 #ifdef __arm__
 #define	get_el0(x)	cp15_## x ##_get()
 #define	get_el1(x)	cp15_## x ##_get()
@@ -314,6 +322,39 @@
 	return (FILTER_HANDLED);
 }
 
+int
+arm_tmr_setup_intr(int gt_type, driver_filter_t filter, driver_intr_t handler,
+    void *arg)
+{
+	if (gt_type != GT_PHYS_SECURE &&
+	    gt_type != GT_PHYS_NONSECURE &&
+	    gt_type != GT_VIRT &&
+	    gt_type != GT_HYP)
+		return (ENXIO);
+
+	if (arm_tmr_sc->res[gt_type] == NULL)
+		return (ENXIO);
+
+	return (bus_setup_intr(arm_tmr_dev, arm_tmr_sc->res[gt_type],
+	    INTR_TYPE_CLK, filter, handler, arg, &arm_tmr_sc->ihl[gt_type]));
+}
+
+int
+arm_tmr_teardown_intr(int gt_type)
+{
+	if (gt_type != GT_PHYS_SECURE &&
+	    gt_type != GT_PHYS_NONSECURE &&
+	    gt_type != GT_VIRT &&
+	    gt_type != GT_HYP)
+		return (ENXIO);
+
+	if (arm_tmr_sc->res[gt_type] == NULL)
+		return (ENXIO);
+
+	return (bus_teardown_intr(arm_tmr_dev, arm_tmr_sc->res[gt_type],
+	    arm_tmr_sc->ihl[gt_type]));
+}
+
 #ifdef FDT
 static int
 arm_tmr_fdt_probe(device_t dev)
@@ -447,13 +488,26 @@
 		last_timer = 1;
 	}
 
+#ifdef __aarch64__
+	sc->physical |= virt_enabled();
+#endif
+
 	arm_tmr_sc = sc;
 
 	/* Setup secure, non-secure and virtual IRQs handler */
-	for (i = first_timer; i <= last_timer; i++) {
+	for (i = GT_PHYS_SECURE; i <= GT_VIRT; i++) {
 		/* If we do not have the interrupt, skip it. */
 		if (sc->res[i] == NULL)
 			continue;
+#if defined(__aarch64__)
+		if (i == 2 && virt_enabled()) {
+			/*
+			 * Do not install an interrupt handler for the virtual
+			 * timer. This will be used by the VM.
+			 */
+			continue;
+		}
+#endif
 		error = bus_setup_intr(dev, sc->res[i], INTR_TYPE_CLK,
 		    arm_tmr_intr, NULL, sc, &sc->ihl[i]);
 		if (error) {
@@ -461,7 +515,6 @@
 			return (ENXIO);
 		}
 	}
-
 	/* Disable the virtual timer until we are ready */
 	if (sc->res[2] != NULL)
 		arm_tmr_disable(false);
@@ -488,6 +541,8 @@
 	arm_set_delay(arm_tmr_do_delay, sc);
 #endif
 
+	arm_tmr_dev = dev;
+
 	return (0);
 }
 
Index: sys/arm/arm/gic.h
===================================================================
--- sys/arm/arm/gic.h
+++ sys/arm/arm/gic.h
@@ -47,11 +47,18 @@
 
 struct arm_gic_softc {
 	device_t		gic_dev;
+	bool			is_root;
 	void *			gic_intrhand;
 	struct gic_irqsrc *	gic_irqs;
-#define	GIC_RES_DIST		0
-#define	GIC_RES_CPU		1
-	struct resource *	gic_res[3];
+#define GIC_RES_DIST		0
+#define GIC_RES_CPU		1
+	struct resource *	gic_res[6];
+	bus_space_tag_t		gic_c_bst;
+	bus_space_tag_t		gic_d_bst;
+	bus_space_handle_t	gic_c_bsh;
+	bus_space_handle_t	gic_d_bsh;
+	bus_space_tag_t		gic_h_bst;
+	bus_space_handle_t	gic_h_bsh;
 	uint8_t			ver;
 	struct mtx		mutex;
 	uint32_t		nirqs;
Index: sys/arm/arm/gic.c
===================================================================
--- sys/arm/arm/gic.c
+++ sys/arm/arm/gic.c
@@ -128,10 +128,14 @@
 static struct resource_spec arm_gic_spec[] = {
 	{ SYS_RES_MEMORY,	0,	RF_ACTIVE },	/* Distributor registers */
 	{ SYS_RES_MEMORY,	1,	RF_ACTIVE },	/* CPU Interrupt Intf. registers */
-	{ SYS_RES_IRQ,	  0, RF_ACTIVE | RF_OPTIONAL }, /* Parent interrupt */
+	{ SYS_RES_MEMORY,       2,      RF_ACTIVE | RF_OPTIONAL },    /* Virtual Interface Control */
+	{ SYS_RES_MEMORY,       3,      RF_ACTIVE | RF_OPTIONAL },    /* Virtual CPU interface */
+	{ SYS_RES_IRQ,		0,	RF_ACTIVE | RF_OPTIONAL },	/* vGIC maintenance interrupt or parent interrupt */
 	{ -1, 0 }
 };
 
+extern char hypmode_enabled[];
+
 #if defined(__arm__) && defined(INVARIANTS)
 static int gic_debug_spurious = 1;
 #else
@@ -156,6 +160,22 @@
 #define	gic_d_write_4(_sc, _reg, _val)		\
     bus_write_4((_sc)->gic_res[GIC_RES_DIST], (_reg), (_val))
 
+#define	gic_h_read_4(_sc, _reg)		\
+    bus_space_read_4((_sc)->gic_h_bst, (_sc)->gic_h_bsh, (_reg))
+#define	gic_h_write_4(_sc, _reg, _val)		\
+    bus_space_write_4((_sc)->gic_h_bst, (_sc)->gic_h_bsh, (_reg), (_val))
+
+struct arm_gic_softc *
+arm_gic_get_sc(void)
+{
+	return gic_sc;
+}
+uint32_t
+arm_gic_get_lr_num(void)
+{
+	return (gic_h_read_4(gic_sc, GICH_VTR) & 0x3f) + 1;
+}
+
 static inline void
 gic_irq_unmask(struct arm_gic_softc *sc, u_int irq)
 {
@@ -323,6 +343,27 @@
 	/* Initialize mutex */
 	mtx_init(&sc->mutex, "GIC lock", NULL, MTX_SPIN);
 
+	/* Distributor Interface */
+	sc->gic_d_bst = rman_get_bustag(sc->gic_res[DISTRIBUTOR_RES_IDX]);
+	sc->gic_d_bsh = rman_get_bushandle(sc->gic_res[DISTRIBUTOR_RES_IDX]);
+
+	/* CPU Interface */
+	sc->gic_c_bst = rman_get_bustag(sc->gic_res[CPU_INTERFACE_RES_IDX]);
+	sc->gic_c_bsh = rman_get_bushandle(sc->gic_res[CPU_INTERFACE_RES_IDX]);
+
+	/* Virtual Interface Control */
+	if (sc->is_root) {
+		if (sc->gic_res[VIRT_INTERFACE_CONTROL_RES_IDX] == NULL) {
+			device_printf(dev, "Cannot find Virtual Interface Control Registers. Disabling Hyp-Mode...\n");
+			hypmode_enabled[0] = -1;
+		} else {
+			sc->gic_h_bst = rman_get_bustag(sc->gic_res[VIRT_INTERFACE_CONTROL_RES_IDX]);
+			sc->gic_h_bsh = rman_get_bushandle(sc->gic_res[VIRT_INTERFACE_CONTROL_RES_IDX]);
+		}
+	} else {
+		hypmode_enabled[0] = -1;
+	}
+
 	/* Disable interrupt forwarding to the CPU interface */
 	gic_d_write_4(sc, GICD_CTLR, 0x00);
 
@@ -501,6 +542,33 @@
 		    ("arm_gic_read_ivar: Invalid bus type %u", sc->gic_bus));
 		*result = sc->gic_bus;
 		return (0);
+	case GIC_IVAR_VIRTUAL_INT_CTRL_RES:
+		*result = (uintptr_t)sc->gic_res[VIRT_INTERFACE_CONTROL_RES_IDX];
+		return (0);
+	case GIC_IVAR_VIRTUAL_INT_CTRL_VADDR:
+		*result = (uintptr_t)rman_get_virtual(sc->gic_res[VIRT_INTERFACE_CONTROL_RES_IDX]);
+		return (0);
+	case GIC_IVAR_VIRTUAL_INT_CTRL_PADDR:
+		*result = (uintptr_t)rman_get_start(sc->gic_res[VIRT_INTERFACE_CONTROL_RES_IDX]);
+		return (0);
+	case GIC_IVAR_VIRTUAL_INT_CTRL_SIZE:
+		*result = rman_get_size(sc->gic_res[VIRT_INTERFACE_CONTROL_RES_IDX]);
+		return (0);
+	case GIC_IVAR_VIRTUAL_CPU_INT_PADDR:
+		*result = rman_get_start(sc->gic_res[VIRT_CPU_INTERFACE_RES_IDX]);
+		return (0);
+	case GIC_IVAR_VIRTUAL_CPU_INT_SIZE:
+		*result = rman_get_size(sc->gic_res[VIRT_CPU_INTERFACE_RES_IDX]);
+		return (0);
+	case GIC_IVAR_LR_NUM:
+		*result = (gic_h_read_4(gic_sc, GICH_VTR) & 0x3f) + 1;
+		return (0);
+	case GIC_IVAR_MAINTENANCE_INTR_RES:
+		if (sc->is_root)
+			*result = (uintptr_t)sc->gic_res[MAINTENANCE_INTR_RES_IDX];
+		else
+			result = NULL;
+		return (0);
 	}
 
 	return (ENOENT);
@@ -973,7 +1041,7 @@
 		if (CPU_ISSET(i, &cpus))
 			val |= arm_gic_map[i] << GICD_SGI_TARGET_SHIFT;
 
-	gic_d_write_4(sc, GICD_SGIR, val | gi->gi_irq);
+	gic_d_write_4(sc, GICD_SGIR(0), val | gi->gi_irq);
 }
 
 static int
Index: sys/arm/arm/gic_common.h
===================================================================
--- sys/arm/arm/gic_common.h
+++ sys/arm/arm/gic_common.h
@@ -31,8 +31,25 @@
 #ifndef _GIC_COMMON_H_
 #define _GIC_COMMON_H_
 
-#define	GIC_IVAR_HW_REV		500
-#define	GIC_IVAR_BUS		501
+#ifndef __ASSEMBLER__
+
+#define	DISTRIBUTOR_RES_IDX		0
+#define	CPU_INTERFACE_RES_IDX		1
+#define	VIRT_INTERFACE_CONTROL_RES_IDX	2
+#define	VIRT_CPU_INTERFACE_RES_IDX	3
+#define	MAINTENANCE_INTR_RES_IDX	4
+#define	INTRNG_RES_IDX			5
+
+#define	GIC_IVAR_HW_REV			500
+#define	GIC_IVAR_BUS			501
+#define	GIC_IVAR_VIRTUAL_INT_CTRL_RES	502
+#define	GIC_IVAR_VIRTUAL_INT_CTRL_VADDR	503
+#define	GIC_IVAR_VIRTUAL_INT_CTRL_PADDR	505
+#define	GIC_IVAR_VIRTUAL_INT_CTRL_SIZE	504
+#define	GIC_IVAR_VIRTUAL_CPU_INT_PADDR	506
+#define	GIC_IVAR_VIRTUAL_CPU_INT_SIZE	507
+#define	GIC_IVAR_LR_NUM			508
+#define	GIC_IVAR_MAINTENANCE_INTR_RES	509
 
 /* GIC_IVAR_BUS values */
 #define	GIC_BUS_UNKNOWN		0
@@ -42,6 +59,19 @@
 
 __BUS_ACCESSOR(gic, hw_rev, GIC, HW_REV, u_int);
 __BUS_ACCESSOR(gic, bus, GIC, BUS, u_int);
+__BUS_ACCESSOR(gic, virtual_int_ctrl_res, GIC, VIRTUAL_INT_CTRL_RES, struct resource *);
+__BUS_ACCESSOR(gic, virtual_int_ctrl_vaddr, GIC, VIRTUAL_INT_CTRL_VADDR, uint64_t);
+__BUS_ACCESSOR(gic, virtual_int_ctrl_paddr, GIC, VIRTUAL_INT_CTRL_PADDR, uint64_t);
+__BUS_ACCESSOR(gic, virtual_int_ctrl_size, GIC, VIRTUAL_INT_CTRL_SIZE, uint32_t);
+__BUS_ACCESSOR(gic, virtual_cpu_int_paddr, GIC, VIRTUAL_CPU_INT_PADDR, uint32_t);
+__BUS_ACCESSOR(gic, virtual_cpu_int_size, GIC, VIRTUAL_CPU_INT_SIZE, uint32_t);
+__BUS_ACCESSOR(gic, lr_num, GIC, LR_NUM, uint32_t);
+__BUS_ACCESSOR(gic, maintenance_intr_res, GIC, MAINTENANCE_INTR_RES, struct resource *);
+
+struct arm_gic_softc *arm_gic_get_sc(void);
+uint32_t arm_gic_get_lr_num(void);
+
+#endif /*__ASSEMBLER__ */
 
 /* Software Generated Interrupts */
 #define	GIC_FIRST_SGI		 0	/* Irqs 0-15 are SGIs/IPIs. */
@@ -55,8 +85,9 @@
 /* Common register values */
 #define	GICD_CTLR		0x0000				/* v1 ICDDCR */
 #define	GICD_TYPER		0x0004				/* v1 ICDICTR */
-#define	 GICD_TYPER_ITLINESNUM_MASK	0x1f
-#define	 GICD_TYPER_I_NUM(n)	((((n) & 0x1F) + 1) * 32)
+#define	 GICD_TYPER_ITLINESNUM_MASK	(0x1f)
+#define	 GICD_TYPER_I_NUM(n)					\
+    ((((n) & GICD_TYPER_ITLINESNUM_MASK) + 1) * 32)
 #define	GICD_IIDR		0x0008				/* v1 ICDIIDR */
 #define	 GICD_IIDR_PROD_SHIFT	24
 #define	 GICD_IIDR_PROD_MASK	0xff000000
@@ -74,20 +105,33 @@
 #define	 GICD_IIDR_IMPL_MASK	0x00000fff
 #define	 GICD_IIDR_IMPL(x)					\
     (((x) & GICD_IIDR_IMPL_MASK) >> GICD_IIDR_IMPL_SHIFT)
-#define	GICD_IGROUPR(n)		(0x0080 + (((n) >> 5) * 4))	/* v1 ICDISER */
+#define	GICD_TYPER2		0x000c
+#define	GICD_IGROUPR_BASE	(0x0080)
+#define	GICD_IGROUPR(n)						\
+    (GICD_IGROUPR_BASE + (((n) >> 5) * 4))			/* v1 ICDISER */
 #define	 GICD_I_PER_IGROUPRn	32
-#define	GICD_ISENABLER(n)	(0x0100 + (((n) >> 5) * 4))	/* v1 ICDISER */
+#define	GICD_ISENABLER_BASE	(0x0100)
+#define	GICD_ISENABLER(n)					\
+    (GICD_ISENABLER_BASE + (((n) >> 5) * 4))			/* v1 ICDISER */
 #define	 GICD_I_MASK(n)		(1ul << ((n) & 0x1f))
 #define	 GICD_I_PER_ISENABLERn	32
-#define	GICD_ICENABLER(n)	(0x0180 + (((n) >> 5) * 4))	/* v1 ICDICER */
+#define	GICD_ICENABLER_BASE	(0x0180)
+#define	GICD_ICENABLER(n)					\
+    (GICD_ICENABLER_BASE + (((n) >> 5) * 4))			/* v1 ICDICER */
 #define	GICD_ISPENDR(n)		(0x0200 + (((n) >> 5) * 4))	/* v1 ICDISPR */
 #define	GICD_ICPENDR(n)		(0x0280 + (((n) >> 5) * 4))	/* v1 ICDICPR */
-#define	GICD_ISACTIVER(n)	(0x0300 + (((n) >> 5) * 4))	/* v1 ICDABR */
-#define	GICD_ICACTIVER(n)	(0x0380 + (((n) >> 5) * 4))
-#define	GICD_IPRIORITYR(n)	(0x0400 + (((n) >> 2) * 4))	/* v1 ICDIPR */
+#define	GICD_ISACTIVER_BASE	(0x0300)
+#define	GICD_ISACTIVER(n)	(GICD_ISACTIVER_BASE + (((n) >> 5) * 4))	/* v1 ICDABR */
+#define GICD_ICACTIVER_BASE     (0x0380)
+#define	GICD_ICACTIVER(n)	(GICD_ICACTIVER_BASE + (((n) >> 5) * 4))	/* v1 ICDABR */
+#define	GICD_IPRIORITYR_BASE	(0x0400)
+#define	GICD_IPRIORITYR(n)					\
+    (GICD_IPRIORITYR_BASE + (((n) >> 2) * 4))			/* v1 ICDIPR */
 #define	 GICD_I_PER_IPRIORITYn	4
 #define	GICD_ITARGETSR(n)	(0x0800 + (((n) >> 2) * 4))	/* v1 ICDIPTR */
-#define	GICD_ICFGR(n)		(0x0C00 + (((n) >> 4) * 4))	/* v1 ICDICFR */
+#define	GICD_ICFGR_BASE		(0x0C00)
+#define	GICD_ICFGR(n)						\
+    (GICD_ICFGR_BASE + (((n) >> 4) * 4))			/* v1 ICDICFR */
 #define	 GICD_I_PER_ICFGRn	16
 /* First bit is a polarity bit (0 - low, 1 - high) */
 #define	 GICD_ICFGR_POL_LOW	(0 << 0)
@@ -97,7 +141,34 @@
 #define	 GICD_ICFGR_TRIG_LVL	(0 << 1)
 #define	 GICD_ICFGR_TRIG_EDGE	(1 << 1)
 #define	 GICD_ICFGR_TRIG_MASK	0x2
-#define GICD_SGIR		0x0F00				/* v1 ICDSGIR */
+#define	 GICD_SGIR(n)		(0x0F00 + ((n) * 4))	/* v1 ICDSGIR */
 #define	 GICD_SGI_TARGET_SHIFT	16
 
+/* GIC Hypervisor specific registers */
+#define	GICH_HCR		0x0
+#define	GICH_VTR		0x4
+#define	GICH_VMCR		0x8
+#define	 GICH_VMCR_VMGRP1EN	(1 << 1)
+#define	GICH_MISR		0x10
+#define	GICH_EISR0		0x20
+#define	GICH_EISR1		0x24
+#define	GICH_ELSR0		0x30
+#define	GICH_ELSR1		0x34
+#define	GICH_APR		0xF0
+#define	GICH_LR0		0x100
+
+#define	GICH_HCR_EN		(1 << 0)
+#define	GICH_HCR_UIE		(1 << 1)
+
+#define	GICH_LR_VIRTID		(0x3FF << 0)
+#define	GICH_LR_PHYSID_CPUID_SHIFT	10
+#define	GICH_LR_PHYSID_CPUID		(7 << GICH_LR_PHYSID_CPUID_SHIFT)
+#define	GICH_LR_STATE		(3 << 28)
+#define	GICH_LR_PENDING		(1 << 28)
+#define	GICH_LR_ACTIVE		(1 << 29)
+#define	GICH_LR_EOI			(1 << 19)
+
+#define	GICH_MISR_EOI		(1 << 0)
+#define	GICH_MISR_U		(1 << 1)
+
 #endif /* _GIC_COMMON_H_ */
Index: sys/arm/arm/gic_fdt.c
===================================================================
--- sys/arm/arm/gic_fdt.c
+++ sys/arm/arm/gic_fdt.c
@@ -129,18 +129,25 @@
 gic_fdt_attach(device_t dev)
 {
 	struct arm_gic_fdt_softc *sc = device_get_softc(dev);
-	phandle_t pxref;
-	intptr_t xref;
+	phandle_t pxref = ofw_bus_find_iparent(ofw_bus_get_node(dev));
+	intptr_t xref = OF_xref_from_node(ofw_bus_get_node(dev));
 	int err;
 
+	sc->base.is_root = false;
+	/*
+	 * Controller is root if:
+	 * - doesn't have interrupt parent
+	 * - his interrupt parent is this controller
+	 */
+	if (pxref == 0 || xref == pxref)
+		sc->base.is_root = true;
+
 	sc->base.gic_bus = GIC_BUS_FDT;
 
 	err = arm_gic_attach(dev);
 	if (err != 0)
 		return (err);
 
-	xref = OF_xref_from_node(ofw_bus_get_node(dev));
-
 	/*
 	 * Now, when everything is initialized, it's right time to
 	 * register interrupt controller to interrupt framefork.
@@ -150,13 +157,7 @@
 		goto cleanup;
 	}
 
-	/*
-	 * Controller is root if:
-	 * - doesn't have interrupt parent
-	 * - his interrupt parent is this controller
-	 */
-	pxref = ofw_bus_find_iparent(ofw_bus_get_node(dev));
-	if (pxref == 0 || xref == pxref) {
+	if (sc->base.is_root) {
 		if (intr_pic_claim_root(dev, xref, arm_gic_intr, sc,
 		    GIC_LAST_SGI - GIC_FIRST_SGI + 1) != 0) {
 			device_printf(dev, "could not set PIC as a root\n");
Index: sys/arm64/arm64/gic_v3.c
===================================================================
--- sys/arm64/arm64/gic_v3.c
+++ sys/arm64/arm64/gic_v3.c
@@ -106,6 +106,11 @@
 static u_int sgi_first_unused = GIC_FIRST_SGI;
 #endif
 
+static struct resource *maint_res;
+static device_t gic_dev;
+static int maint_rid;
+static void *maint_cookie;
+
 static device_method_t gic_v3_methods[] = {
 	/* Device interface */
 	DEVMETHOD(device_detach,	gic_v3_detach),
@@ -401,12 +406,49 @@
 	return (0);
 }
 
+void
+gic_v3_alloc_maint_res(device_t dev)
+{
+	gic_dev = dev;
+	maint_res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &maint_rid,
+	    RF_ACTIVE);
+	if (!maint_res)
+		device_printf(dev,
+		    "Could not allocate resource for maintenance interrupt\n");
+}
+
+int
+gic_v3_setup_maint_intr(driver_filter_t filter, driver_intr_t handler,
+    void *arg)
+{
+	int flags;
+
+	if (!maint_res)
+		return (EINVAL);
+
+	flags = INTR_TYPE_MISC | INTR_MPSAFE;
+	return (bus_setup_intr(gic_dev, maint_res, flags, filter, handler,
+	    arg, &maint_cookie));
+}
+
+int
+gic_v3_teardown_maint_intr(void)
+{
+	if (!maint_res)
+		return (EINVAL);
+
+	return (bus_teardown_intr(gic_dev, maint_res, maint_cookie));
+}
+
 static int
 gic_v3_get_domain(device_t dev, device_t child, int *domain)
 {
 	struct gic_v3_devinfo *di;
 
 	di = device_get_ivars(child);
+	if (di == NULL)
+		return (0);
+
 	if (di->gic_domain < 0)
 		return (ENOENT);
 
@@ -1016,22 +1058,25 @@
 	struct resource *res;
 	u_int cpuid;
 	size_t us_left = 1000000;
+	uint32_t rwp;
 
 	cpuid = PCPU_GET(cpuid);
 
 	switch (xdist) {
 	case DIST:
 		res = sc->gic_dist;
+		rwp = GICD_CTLR_RWP;
 		break;
 	case REDIST:
 		res = &sc->gic_redists.pcpu[cpuid]->res;
+		rwp = GICR_CTLR_RWP;
 		break;
 	default:
 		KASSERT(0, ("%s: Attempt to wait for unknown RWP", __func__));
 		return;
 	}
 
-	while ((bus_read_4(res, GICD_CTLR) & GICD_CTLR_RWP) != 0) {
+	while ((bus_read_4(res, GICD_CTLR) & rwp) != 0) {
 		DELAY(1);
 		if (us_left-- == 0)
 			panic("GICD Register write pending for too long");
Index: sys/arm64/arm64/gic_v3_acpi.c
===================================================================
--- sys/arm64/arm64/gic_v3_acpi.c
+++ sys/arm64/arm64/gic_v3_acpi.c
@@ -337,6 +337,8 @@
 	if (device_get_children(dev, &sc->gic_children, &sc->gic_nchildren) !=0)
 		sc->gic_nchildren = 0;
 
+	gic_v3_alloc_maint_res(dev);
+
 	return (0);
 
 error:
Index: sys/arm64/arm64/gic_v3_fdt.c
===================================================================
--- sys/arm64/arm64/gic_v3_fdt.c
+++ sys/arm64/arm64/gic_v3_fdt.c
@@ -190,6 +190,8 @@
 	if (device_get_children(dev, &sc->gic_children, &sc->gic_nchildren) != 0)
 		sc->gic_nchildren = 0;
 
+	gic_v3_alloc_maint_res(dev);
+
 	return (err);
 
 error:
@@ -213,12 +215,19 @@
 static int
 gic_v3_fdt_print_child(device_t bus, device_t child)
 {
-	struct gic_v3_ofw_devinfo *di = device_get_ivars(child);
-	struct resource_list *rl = &di->di_rl;
+	struct gic_v3_ofw_devinfo *di;
+	struct resource_list *rl;
 	int retval = 0;
 
 	retval += bus_print_child_header(bus, child);
+
+	di = device_get_ivars(child);
+	if (di == NULL)
+		goto footer;
+	rl = &di->di_rl;
+
 	retval += resource_list_print_type(rl, "mem", SYS_RES_MEMORY, "%#jx");
+footer:
 	retval += bus_print_child_footer(bus, child);
 
 	return (retval);
@@ -299,6 +308,7 @@
 		size_cells = 2;
 		OF_getencprop(parent, "#size-cells", &size_cells,
 		    sizeof(size_cells));
+
 		/* Iterate through all GIC subordinates */
 		for (node = OF_child(parent); node > 0; node = OF_peer(node)) {
 			/* Allocate and populate devinfo. */
Index: sys/arm64/arm64/gic_v3_reg.h
===================================================================
--- sys/arm64/arm64/gic_v3_reg.h
+++ sys/arm64/arm64/gic_v3_reg.h
@@ -55,8 +55,9 @@
 #define	 GICD_CTLR_G1		(1 << 0)
 #define	 GICD_CTLR_G1A		(1 << 1)
 #define	 GICD_CTLR_ARE_NS	(1 << 4)
+#define	 GICD_CTLR_DS		(1 << 6)
+#define	 GICD_CTLR_E1NWF	(1 << 7)
 #define	 GICD_CTLR_RWP		(1 << 31)
-
 /* GICD_TYPER */
 #define	 GICD_TYPER_SECURITYEXTN (1 << 10)
 #define	 GICD_TYPER_MBIS	(1 << 16)
@@ -64,6 +65,10 @@
 #define	 GICD_TYPER_DVIS	(1 << 18)
 #define	 GICD_TYPER_IDBITS_SHIFT 19
 #define	 GICD_TYPER_IDBITS(n)	((((n) >> 19) & 0x1F) + 1)
+#define	 GICD_TYPER_SECURITYEXTN	\
+		(1 << 10)
+#define	 GICD_TYPER_DVIS	(1 << 18)
+#define	 GICD_TYPER_LPIS	(1 << 17)
 
 /*
  * Registers (v3)
@@ -103,8 +108,8 @@
 
 #define	GICD_PIDR3		0xFFEC
 
-/*
- * Redistributor registers
+/* 
+ * Redistributor registers 
  */
 
 /* RD_base registers */
@@ -130,6 +135,10 @@
 #define	 GICR_TYPER_AFF_MASK	(0xfffffffful << GICR_TYPER_AFF_SHIFT)
 #define	GICR_TYPER_AFF(x)					\
     (((x) & GICR_TYPER_AFF_MASK) >> GICR_TYPER_AFF_SHIFT)
+#define	GICR_TYPER_AFF0(x)	((x >> GICR_TYPER_AFF_SHIFT) & 0xff)
+#define	GICR_TYPER_AFF1(x)	((x >> (GICR_TYPER_AFF_SHIFT + 8)) & 0xff)
+#define	GICR_TYPER_AFF2(x)	((x >> (GICR_TYPER_AFF_SHIFT + 16)) & 0xff)
+#define	GICR_TYPER_AFF3(x)	((x >> (GICR_TYPER_AFF_SHIFT + 24)) & 0xff)
 
 #define	GICR_STATUSR		0x0010
 
@@ -240,6 +249,7 @@
 #define		GICR_I_ENABLER_SGI_MASK		(0x0000FFFF)
 #define		GICR_I_ENABLER_PPI_MASK		(0xFFFF0000)
 
+#define	GICR_IPRIORITYR_BASE			(0x0400)
 #define		GICR_I_PER_IPRIORITYn		(GICD_I_PER_IPRIORITYn)
 
 #define	GICR_ISPENDR0				0x0200
Index: sys/arm64/arm64/gic_v3_var.h
===================================================================
--- sys/arm64/arm64/gic_v3_var.h
+++ sys/arm64/arm64/gic_v3_var.h
@@ -113,6 +113,10 @@
 void gic_r_write_4(device_t, bus_size_t, uint32_t var);
 void gic_r_write_8(device_t, bus_size_t, uint64_t var);
 
+void gic_v3_alloc_maint_res(device_t);
+int gic_v3_setup_maint_intr(driver_filter_t, driver_intr_t, void *);
+int gic_v3_teardown_maint_intr(void);
+
 /*
  * GIC Distributor accessors.
  * Notice that only GIC sofc can be passed.
Index: sys/arm64/arm64/hyp_stub.S
===================================================================
--- /dev/null
+++ sys/arm64/arm64/hyp_stub.S
@@ -0,0 +1,76 @@
+/*
+ * Copyright (C) 2015-2021 Mihai Carabas <mihai.carabas@gmail.com>
+ * Copyright (C) 2017-2019 Alexandru Elisei <alexandru.elisei@gmail.com>
+ * Copyright (C) 2017-2021 Darius Mihai <darius.mihai.m@gmail.com>
+ * Copyright (C) 2019-2021 Andrei Martin <andrei.cos.martin@gmail.com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <machine/asm.h>
+
+__FBSDID("$FreeBSD$");
+
+	.text
+
+/*
+ * Install a new exception vector table with the base address supplied by the
+ * parameter in register x0.
+ */
+ENTRY(handle_stub_el1h_sync)
+	msr 	vbar_el2, x0
+	eret
+END(handle_hyp_stub)
+
+.macro vempty
+	.align 7
+	1: b 	1b
+.endm
+
+.macro vector name
+	.align 7
+	b 	handle_\name
+.endm
+
+	.align 11
+	.globl hyp_stub_vectors
+hyp_stub_vectors:
+	vempty			/* Synchronous EL2t */
+	vempty			/* IRQ EL2t */
+	vempty			/* FIQ EL2t */
+	vempty			/* SError EL2t */
+
+	vempty			/* Synchronous EL2h */
+	vempty			/* IRQ EL2h */
+	vempty			/* FIQ EL2h */
+	vempty			/* SError EL2h */
+
+	vector stub_el1h_sync	/* Synchronous 64-bit EL1 */
+	vempty			/* IRQ 64-bit EL1 */
+	vempty			/* FIQ 64-bit EL1 */
+	vempty			/* SError 64-bit EL1 */
+
+	vempty			/* Synchronous 32-bit EL1 */
+	vempty			/* IRQ 32-bit EL1 */
+	vempty			/* FIQ 32-bit EL1 */
+	vempty			/* SError 32-bit EL1 */
Index: sys/arm64/arm64/locore.S
===================================================================
--- sys/arm64/arm64/locore.S
+++ sys/arm64/arm64/locore.S
@@ -213,6 +213,11 @@
 END(mpentry)
 #endif
 
+	.align 3
+	.globl _C_LABEL(hypmode_enabled)
+_C_LABEL(hypmode_enabled):
+	.zero 8
+
 /*
  * If we are started in EL2, configure the required hypervisor
  * registers and drop to EL1.
@@ -224,8 +229,22 @@
 	b.eq	1f
 	ret
 1:
+	/*
+	 * If the MMU is active, then it is using a page table where VA == PA.
+	 * But the page table won't have entries for the hypervisor EL2
+	 * initialization code which is loaded into memory with the vmm module.
+	 *
+	 * So we disable the MMU in EL2 to make the vmm hypervisor code run
+	 * successfully.
+	 */
+	dsb	sy
+	mrs	x2, sctlr_el2
+	bic	x2, x2, SCTLR_M
+	msr	sctlr_el2, x2
+	isb
+
 	/* Configure the Hypervisor */
-	mov	x2, #(HCR_RW)
+	mov	x2, #(HCR_RW & ~HCR_HCD)
 	msr	hcr_el2, x2
 
 	/* Load the Virtualization Process ID Register */
@@ -256,10 +275,18 @@
 	msr	cntvoff_el2, xzr
 
 	/* Hypervisor trap functions */
-	adrp	x2, hyp_vectors
-	add	x2, x2, :lo12:hyp_vectors
+	adrp	x2, hyp_stub_vectors
 	msr	vbar_el2, x2
 
+	/* Use the host VTTBR_EL2 to tell the host and the guests apart */
+	mov	x2, #VTTBR_HOST
+	msr	vttbr_el2, x2
+
+	/* Mark hypervisor mode as enabled */
+	mov	x1, #1
+	adr	x2, hypmode_enabled
+	str	x1, [x2]
+
 	mov	x2, #(PSR_F | PSR_I | PSR_A | PSR_D | PSR_M_EL1h)
 	msr	spsr_el2, x2
 
@@ -288,6 +315,10 @@
 	.quad SCTLR_RES1
 LEND(drop_to_el1)
 
+hcr:
+	/* Make sure the HVC instruction is not disabled */
+	.quad (HCR_RW & ~HCR_HCD)
+
 #define	VECT_EMPTY	\
 	.align 7;	\
 	1:	b	1b
@@ -733,6 +764,8 @@
 
 ENTRY(abort)
 	b abort
+
+	.align 12 /* 4KiB aligned */
 END(abort)
 
 	.section .init_pagetable, "aw", %nobits
Index: sys/arm64/arm64/pmap.c
===================================================================
--- sys/arm64/arm64/pmap.c
+++ sys/arm64/arm64/pmap.c
@@ -407,6 +407,8 @@
 static int pmap_unuse_pt(pmap_t, vm_offset_t, pd_entry_t, struct spglist *);
 static __inline vm_page_t pmap_remove_pt_page(pmap_t pmap, vm_offset_t va);
 
+static uint64_t pa_range_bits = 0;
+
 /*
  * These load the old table data and store the new value.
  * They need to be atomic as the System MMU may write to the table at
@@ -431,9 +433,19 @@
 	memcpy(d, s, PAGE_SIZE);
 }
 
+#define	pmap_l0_index(va)		(((va) >> L0_SHIFT) & L0_ADDR_MASK)
+#define	pmap_l1_index(va)		(((va) >> L1_SHIFT) & Ln_ADDR_MASK)
+#define	pmap_l2_index(va)		(((va) >> L2_SHIFT) & Ln_ADDR_MASK)
+#define	pmap_l3_index(va)		(((va) >> L3_SHIFT) & Ln_ADDR_MASK)
+
+#define	STAGE2_L1_ADDR_MASK		((1UL << (pa_range_bits - L1_SHIFT)) - 1)
+#define	pmap_stage2_l1_index(va)	(((va) >> L1_SHIFT) & STAGE2_L1_ADDR_MASK)
+
 static __inline pd_entry_t *
 pmap_l0(pmap_t pmap, vm_offset_t va)
 {
+	KASSERT(pmap->pm_stage != PM_STAGE2,
+	    ("Level 0 table is invalid for PM_STAGE2 pmap"));
 
 	return (&pmap->pm_l0[pmap_l0_index(va)]);
 }
@@ -450,6 +462,9 @@
 static __inline pd_entry_t *
 pmap_l1(pmap_t pmap, vm_offset_t va)
 {
+	if (pmap->pm_stage == PM_STAGE2)
+		return (&pmap->pm_l0[pmap_stage2_l1_index(va)]);
+
 	pd_entry_t *l0;
 
 	l0 = pmap_l0(pmap, va);
@@ -459,6 +474,32 @@
 	return (pmap_l0_to_l1(l0, va));
 }
 
+static __inline vm_page_t
+pmap_l1pg(pmap_t pmap, vm_offset_t va)
+{
+	if (pmap->pm_stage == PM_STAGE1) {
+		pd_entry_t *l0, tl0;
+
+		l0 = pmap_l0(pmap, va);
+		tl0 = pmap_load(l0);
+
+		return (PHYS_TO_VM_PAGE(tl0 & ~ATTR_MASK));
+	} else {
+		vm_paddr_t pa, pa_offset;
+
+		/*
+		 * The offset will be the bits
+		 * [pa_range_bits-1:L0_SHIFT]
+		 */
+		va = va & ((1 << pa_range_bits) - 1);
+		pa_offset = va >> L0_SHIFT;
+		pa = DMAP_TO_PHYS((vm_offset_t)pmap->pm_l0) + \
+			(pa_offset << PAGE_SHIFT);
+
+		return (PHYS_TO_VM_PAGE(pa));
+	}
+}
+
 static __inline pd_entry_t *
 pmap_l1_to_l2(pd_entry_t *l1p, vm_offset_t va)
 {
@@ -523,18 +564,28 @@
 {
 	pd_entry_t *l0, *l1, *l2, desc;
 
-	l0 = pmap_l0(pmap, va);
-	desc = pmap_load(l0) & ATTR_DESCR_MASK;
-	if (desc != L0_TABLE) {
-		*level = -1;
-		return (NULL);
-	}
+	if (pmap->pm_stage == PM_STAGE1) {
+		l0 = pmap_l0(pmap, va);
+		desc = pmap_load(l0) & ATTR_DESCR_MASK;
+		if (desc != L0_TABLE) {
+			*level = -1;
+			return (NULL);
+		}
 
-	l1 = pmap_l0_to_l1(l0, va);
-	desc = pmap_load(l1) & ATTR_DESCR_MASK;
-	if (desc != L1_TABLE) {
-		*level = 0;
-		return (l0);
+		l1 = pmap_l0_to_l1(l0, va);
+		desc = pmap_load(l1) & ATTR_DESCR_MASK;
+		if (desc != L1_TABLE) {
+			*level = 0;
+			return (l0);
+		}
+	} else {
+		l1 = pmap_l1(pmap, va);
+		desc = pmap_load(l1) & ATTR_DESCR_MASK;
+		if (desc != L1_TABLE) {
+			/* For PM_STAGE2 mappings the first level is level 1 */
+			*level = -1;
+			return (NULL);
+		}
 	}
 
 	l2 = pmap_l1_to_l2(l1, va);
@@ -611,13 +662,18 @@
 	if (pmap->pm_l0 == NULL)
 		return (false);
 
-	l0p = pmap_l0(pmap, va);
-	*l0 = l0p;
+	if (pmap->pm_stage == PM_STAGE1) {
+		l0p = pmap_l0(pmap, va);
+		*l0 = l0p;
 
-	if ((pmap_load(l0p) & ATTR_DESCR_MASK) != L0_TABLE)
-		return (false);
+		if ((pmap_load(l0p) & ATTR_DESCR_MASK) != L0_TABLE)
+			return (false);
 
-	l1p = pmap_l0_to_l1(l0p, va);
+		l1p = pmap_l0_to_l1(l0p, va);
+	} else {
+		*l0 = NULL;
+		l1p = pmap_l1(pmap, va);
+	}
 	*l1 = l1p;
 
 	if ((pmap_load(l1p) & ATTR_DESCR_MASK) == L1_BLOCK) {
@@ -930,6 +986,7 @@
 pmap_bootstrap(vm_offset_t l0pt, vm_offset_t l1pt, vm_paddr_t kernstart,
     vm_size_t kernlen)
 {
+	uint64_t id_aa64mmfr0_el1;
 	vm_offset_t freemempos;
 	vm_offset_t dpcpu, msgbufpv;
 	vm_paddr_t start_pa, pa, min_pa;
@@ -1018,6 +1075,35 @@
 
 	physmem_exclude_region(start_pa, pa - start_pa, EXFLAG_NOALLOC);
 
+	id_aa64mmfr0_el1 = READ_SPECIALREG(id_aa64mmfr0_el1);
+	switch (ID_AA64MMFR0_PARange_VAL(id_aa64mmfr0_el1)) {
+	case ID_AA64MMFR0_PARange_4G:
+		pa_range_bits = 32;
+		break;
+	case ID_AA64MMFR0_PARange_64G:
+		pa_range_bits = 36;
+		break;
+	case ID_AA64MMFR0_PARange_1T:
+		pa_range_bits = 40;
+		break;
+	case ID_AA64MMFR0_PARange_4T:
+		pa_range_bits = 42;
+		break;
+	case ID_AA64MMFR0_PARange_16T:
+		pa_range_bits = 44;
+		break;
+	case ID_AA64MMFR0_PARange_256T:
+		pa_range_bits = 48;
+		break;
+	default:
+		/*
+		 * Unknown PA range bits, will lead to a panic if a stage 2
+		 * pmap starting at level 1 is created.
+		 */
+		pa_range_bits = 0;
+		break;
+	}
+
 	cpu_tlb_flushID();
 }
 
@@ -1181,8 +1267,6 @@
 {
 	uint64_t r;
 
-	PMAP_ASSERT_STAGE1(pmap);
-
 	dsb(ishst);
 	if (pmap == kernel_pmap) {
 		r = atop(va);
@@ -1200,8 +1284,6 @@
 {
 	uint64_t end, r, start;
 
-	PMAP_ASSERT_STAGE1(pmap);
-
 	dsb(ishst);
 	if (pmap == kernel_pmap) {
 		start = atop(sva);
@@ -1650,10 +1732,12 @@
 	 */
 	if (m->pindex >= (NUL2E + NUL1E)) {
 		/* l1 page */
-		pd_entry_t *l0;
+		if (pmap->pm_stage == PM_STAGE1) {
+			pd_entry_t *l0;
 
-		l0 = pmap_l0(pmap, va);
-		pmap_clear(l0);
+			l0 = pmap_l0(pmap, va);
+			pmap_clear(l0);
+		}
 	} else if (m->pindex >= NUL2E) {
 		/* l2 page */
 		pd_entry_t *l1;
@@ -1679,12 +1763,16 @@
 		pmap_unwire_l3(pmap, va, l2pg, free);
 	} else if (m->pindex < (NUL2E + NUL1E)) {
 		/* We just released an l2, unhold the matching l1 */
-		pd_entry_t *l0, tl0;
 		vm_page_t l1pg;
+		pd_entry_t *l0, tl0;
 
-		l0 = pmap_l0(pmap, va);
-		tl0 = pmap_load(l0);
-		l1pg = PHYS_TO_VM_PAGE(tl0 & ~ATTR_MASK);
+		if (pmap->pm_stage == PM_STAGE1) {
+			l0 = pmap_l0(pmap, va);
+			tl0 = pmap_load(l0);
+			l1pg = PHYS_TO_VM_PAGE(tl0 & ~ATTR_MASK);
+		} else {
+			l1pg = pmap_l1pg(pmap, va);
+		}
 		pmap_unwire_l3(pmap, va, l1pg, free);
 	}
 	pmap_invalidate_page(pmap, va);
@@ -1761,12 +1849,48 @@
 {
 	vm_page_t m;
 
+	KASSERT((stage == PM_STAGE1 || stage == PM_STAGE2),
+	    ("Invalid pmap stage %d", stage));
+	KASSERT(!((stage == PM_STAGE2) && (pa_range_bits == 0)),
+	    ("Unknown PARange bits"));
+
 	/*
 	 * allocate the l0 page
 	 */
-	while ((m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL |
-	    VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | VM_ALLOC_ZERO)) == NULL)
-		vm_wait(NULL);
+	if (stage == PM_STAGE1) {
+		while ((m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL |
+		    VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | VM_ALLOC_ZERO)) == NULL)
+			vm_wait(NULL);
+	} else {
+		uint64_t npages;
+		uint64_t alignment;
+
+		if (pa_range_bits <= L0_SHIFT) {
+			/*
+			 * The level 1 translation table is not larger than a
+			 * PM_STAGE1 level 1 table, use only one page.
+			 */
+			npages = 1;
+			alignment = PAGE_SIZE;
+		} else {
+			/*
+			 * The level 1 translation table is larger than a
+			 * regular PM_STAGE1 level 1 table, for every x bits
+			 * that is larger we need 2^x pages and the table must
+			 * be aligned at a  2^(x + 12) boundary.
+			 *
+			 * See Table D5-25 and Example D4-5 from the DDI0487B
+			 * ARMv8 Architecture Manual for more information.
+			 */
+			npages = 1 << (pa_range_bits - L0_SHIFT);
+			alignment = 1 << (PAGE_SHIFT + pa_range_bits - L0_SHIFT);
+		}
+		while ((m = vm_page_alloc_contig(NULL, 0, VM_ALLOC_NORMAL |
+		    VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | VM_ALLOC_ZERO,
+		    npages, DMAP_MIN_PHYSADDR, DMAP_MAX_PHYSADDR,
+		    alignment, 0, VM_MEMATTR_DEFAULT)) == NULL)
+			vm_wait(NULL);
+	}
 
 	pmap->pm_l0_paddr = VM_PAGE_TO_PHYS(m);
 	pmap->pm_l0 = (pd_entry_t *)PHYS_TO_DMAP(pmap->pm_l0_paddr);
@@ -1775,6 +1899,7 @@
 		pagezero(pmap->pm_l0);
 
 	pmap->pm_root.rt_root = 0;
+	pmap->pm_stage = stage;
 	bzero(&pmap->pm_stats, sizeof(pmap->pm_stats));
 	pmap->pm_cookie = COOKIE_FROM(-1, INT_MAX);
 
@@ -1887,27 +2012,34 @@
 		pd_entry_t tl0;
 
 		l1index = ptepindex - NUL2E;
-		l0index = l1index >> L0_ENTRIES_SHIFT;
-
-		l0 = &pmap->pm_l0[l0index];
-		tl0 = pmap_load(l0);
-		if (tl0 == 0) {
-			/* recurse for allocating page dir */
-			if (_pmap_alloc_l3(pmap, NUL2E + NUL1E + l0index,
-			    lockp) == NULL) {
-				vm_page_unwire_noq(m);
-				vm_page_free_zero(m);
-				return (NULL);
+		if (pmap->pm_stage == PM_STAGE1) {
+			l0index = l1index >> L0_ENTRIES_SHIFT;
+			l0 = &pmap->pm_l0[l0index];
+			tl0 = pmap_load(l0);
+			if (tl0 == 0) {
+				/* recurse for allocating page dir */
+				if (_pmap_alloc_l3(pmap, NUL2E + NUL1E + l0index,
+				    lockp) == NULL) {
+					vm_page_unwire_noq(m);
+					vm_page_free_zero(m);
+					return (NULL);
+				}
+			} else {
+				l1pg = PHYS_TO_VM_PAGE(tl0 & ~ATTR_MASK);
+				l1pg->ref_count++;
 			}
+
+			l1 = (pd_entry_t *)PHYS_TO_DMAP(pmap_load(l0) & ~ATTR_MASK);
+			l1 = &l1[ptepindex & Ln_ADDR_MASK];
+			KASSERT((pmap_load(l1) & ATTR_DESCR_VALID) == 0,
+			    ("%s: L1 entry %#lx is valid", __func__, pmap_load(l1)));
 		} else {
-			l1pg = PHYS_TO_VM_PAGE(tl0 & ~ATTR_MASK);
+			l1pg = pmap_l1pg(pmap, l1index);
 			l1pg->ref_count++;
+			l1 = &pmap->pm_l0[l1index & STAGE2_L1_ADDR_MASK];
 		}
 
-		l1 = (pd_entry_t *)PHYS_TO_DMAP(pmap_load(l0) & ~ATTR_MASK);
-		l1 = &l1[ptepindex & Ln_ADDR_MASK];
-		KASSERT((pmap_load(l1) & ATTR_DESCR_VALID) == 0,
-		    ("%s: L1 entry %#lx is valid", __func__, pmap_load(l1)));
+
 		pmap_store(l1, VM_PAGE_TO_PHYS(m) | L1_TABLE);
 	} else {
 		vm_pindex_t l0index, l1index;
@@ -1915,24 +2047,40 @@
 		pd_entry_t tl0, tl1;
 
 		l1index = ptepindex >> Ln_ENTRIES_SHIFT;
-		l0index = l1index >> L0_ENTRIES_SHIFT;
-
-		l0 = &pmap->pm_l0[l0index];
-		tl0 = pmap_load(l0);
-		if (tl0 == 0) {
-			/* recurse for allocating page dir */
-			if (_pmap_alloc_l3(pmap, NUL2E + l1index,
-			    lockp) == NULL) {
-				vm_page_unwire_noq(m);
-				vm_page_free_zero(m);
-				return (NULL);
-			}
+		if (pmap->pm_stage == PM_STAGE1) {
+			l0index = l1index >> L0_ENTRIES_SHIFT;
+			l0 = &pmap->pm_l0[l0index];
 			tl0 = pmap_load(l0);
-			l1 = (pd_entry_t *)PHYS_TO_DMAP(tl0 & ~ATTR_MASK);
-			l1 = &l1[l1index & Ln_ADDR_MASK];
+			if (tl0 == 0) {
+				/* recurse for allocating page dir */
+				if (_pmap_alloc_l3(pmap, NUL2E + l1index,
+				    lockp) == NULL) {
+					vm_page_unwire_noq(m);
+					vm_page_free_zero(m);
+					return (NULL);
+				}
+				tl0 = pmap_load(l0);
+				l1 = (pd_entry_t *)PHYS_TO_DMAP(tl0 & ~ATTR_MASK);
+				l1 = &l1[l1index & Ln_ADDR_MASK];
+			} else {
+				l1 = (pd_entry_t *)PHYS_TO_DMAP(tl0 & ~ATTR_MASK);
+				l1 = &l1[l1index & Ln_ADDR_MASK];
+				tl1 = pmap_load(l1);
+				if (tl1 == 0) {
+					/* recurse for allocating page dir */
+					if (_pmap_alloc_l3(pmap, NUL2E + l1index,
+					    lockp) == NULL) {
+						vm_page_unwire_noq(m);
+						vm_page_free_zero(m);
+						return (NULL);
+					}
+				} else {
+					l2pg = PHYS_TO_VM_PAGE(tl1 & ~ATTR_MASK);
+					l2pg->ref_count++;
+				}
+			}
 		} else {
-			l1 = (pd_entry_t *)PHYS_TO_DMAP(tl0 & ~ATTR_MASK);
-			l1 = &l1[l1index & Ln_ADDR_MASK];
+			l1 = &pmap->pm_l0[l1index & STAGE2_L1_ADDR_MASK];
 			tl1 = pmap_load(l1);
 			if (tl1 == 0) {
 				/* recurse for allocating page dir */
@@ -2127,9 +2275,27 @@
 		mtx_unlock_spin(&set->asid_set_mutex);
 	}
 
-	m = PHYS_TO_VM_PAGE(pmap->pm_l0_paddr);
-	vm_page_unwire_noq(m);
-	vm_page_free_zero(m);
+	if (pmap->pm_stage == PM_STAGE1) {
+		m = PHYS_TO_VM_PAGE(pmap->pm_l0_paddr);
+		vm_page_unwire_noq(m);
+		vm_page_free_zero(m);
+	} else {
+		uint64_t i, page_cnt;
+		vm_paddr_t pa;
+
+		if (pa_range_bits < L0_SHIFT)
+			page_cnt = 1;
+		else
+			page_cnt = 1 << (pa_range_bits - L0_SHIFT);
+
+		pa = DMAP_TO_PHYS((vm_offset_t)pmap->pm_l0);
+		for (i = 0; i < page_cnt; i++) {
+			m = PHYS_TO_VM_PAGE(pa);
+			vm_page_unwire_noq(m);
+			vm_page_free_zero(m);
+			pa += PAGE_SIZE;
+		}
+	}
 }
 
 static int
@@ -2496,7 +2662,7 @@
 	vm_page_t m;
 
 	mtx_lock(&pv_chunks_mutex);
- 	TAILQ_REMOVE(&pv_chunks, pc, pc_lru);
+	TAILQ_REMOVE(&pv_chunks, pc, pc_lru);
 	mtx_unlock(&pv_chunks_mutex);
 	PV_STAT(atomic_subtract_int(&pv_entry_spare, _NPCPV));
 	PV_STAT(atomic_subtract_int(&pc_chunk_count, 1));
@@ -2984,7 +3150,7 @@
 					 * released.  Otherwise, a concurrent
 					 * pmap_remove_all() on a physical page
 					 * could return while a stale TLB entry
-					 * still provides access to that page. 
+					 * still provides access to that page.
 					 */
 					if (va != eva) {
 						pmap_invalidate_range(pmap, va,
@@ -3045,18 +3211,23 @@
 		if (pmap->pm_stats.resident_count == 0)
 			break;
 
-		l0 = pmap_l0(pmap, sva);
-		if (pmap_load(l0) == 0) {
-			va_next = (sva + L0_SIZE) & ~L0_OFFSET;
-			if (va_next < sva)
-				va_next = eva;
-			continue;
+		if (pmap->pm_stage == PM_STAGE1) {
+			l0 = pmap_l0(pmap, sva);
+			if (pmap_load(l0) == 0) {
+				va_next = (sva + L0_SIZE) & ~L0_OFFSET;
+				if (va_next < sva)
+					va_next = eva;
+				continue;
+			}
+
+			l1 = pmap_l0_to_l1(l0, sva);
+		} else {
+			l1 = pmap_l1(pmap, sva);
 		}
 
 		va_next = (sva + L1_SIZE) & ~L1_OFFSET;
 		if (va_next < sva)
 			va_next = eva;
-		l1 = pmap_l0_to_l1(l0, sva);
 		if (pmap_load(l1) == 0)
 			continue;
 		if ((pmap_load(l1) & ATTR_DESCR_MASK) == L1_BLOCK) {
@@ -3736,33 +3907,19 @@
 			new_l3 |= ATTR_S1_UXN;
 		if (pmap != kernel_pmap)
 			new_l3 |= ATTR_S1_nG;
-	} else {
-		/*
-		 * Clear the access flag on executable mappings, this will be
-		 * set later when the page is accessed. The fault handler is
-		 * required to invalidate the I-cache.
-		 *
-		 * TODO: Switch to the valid flag to allow hardware management
-		 * of the access flag. Much of the pmap code assumes the
-		 * valid flag is set and fails to destroy the old page tables
-		 * correctly if it is clear.
-		 */
-		if (prot & VM_PROT_EXECUTE)
-			new_l3 &= ~ATTR_AF;
-	}
-	if ((m->oflags & VPO_UNMANAGED) == 0) {
-		new_l3 |= ATTR_SW_MANAGED;
-		if ((prot & VM_PROT_WRITE) != 0) {
-			new_l3 |= ATTR_SW_DBM;
-			if ((flags & VM_PROT_WRITE) == 0) {
-				if (pmap->pm_stage == PM_STAGE1)
-					new_l3 |= ATTR_S1_AP(ATTR_S1_AP_RO);
-				else
-					new_l3 &=
-					    ~ATTR_S2_S2AP(ATTR_S2_S2AP_WRITE);
+		if ((m->oflags & VPO_UNMANAGED) == 0) {
+			new_l3 |= ATTR_SW_MANAGED;
+			if ((prot & VM_PROT_WRITE) != 0) {
+				new_l3 |= ATTR_SW_DBM;
+				if ((flags & VM_PROT_WRITE) == 0)
+						new_l3 |= ATTR_S1_AP(ATTR_S1_AP_RO);
 			}
 		}
+	} else {
+		new_l3 = (pd_entry_t)(pa | ATTR_ST2_DEFAULT | L3_PAGE);
 	}
+	if ((flags & PMAP_ENTER_WIRED) != 0)
+		new_l3 |= ATTR_SW_WIRED;
 
 	CTR2(KTR_PMAP, "pmap_enter: %.16lx -> %.16lx", va, pa);
 
@@ -4676,7 +4833,7 @@
 				pmap_abort_ptp(dst_pmap, addr, dstmpte);
 				goto out;
 			}
-			/* Have we copied all of the valid mappings? */ 
+			/* Have we copied all of the valid mappings? */
 			if (dstmpte->ref_count >= srcmpte->ref_count)
 				break;
 		}
@@ -4976,7 +5133,7 @@
 				switch(lvl) {
 				case 1:
 					pte = pmap_l1_to_l2(pde, pv->pv_va);
-					tpte = pmap_load(pte); 
+					tpte = pmap_load(pte);
 					KASSERT((tpte & ATTR_DESCR_MASK) ==
 					    L2_BLOCK,
 					    ("Attempting to remove an invalid "
Index: sys/arm64/include/armreg.h
===================================================================
--- sys/arm64/include/armreg.h
+++ sys/arm64/include/armreg.h
@@ -215,7 +215,7 @@
 #define	 ISS_DATA_DFSC_TLB_CONFLICT (0x30 << 0)
 #define	ESR_ELx_IL		(0x01 << 25)
 #define	ESR_ELx_EC_SHIFT	26
-#define	ESR_ELx_EC_MASK		(0x3f << 26)
+#define	ESR_ELx_EC_MASK		(0x3f << ESR_ELx_EC_SHIFT)
 #define	ESR_ELx_EXCEPTION(esr)	(((esr) & ESR_ELx_EC_MASK) >> ESR_ELx_EC_SHIFT)
 #define	 EXCP_UNKNOWN		0x00	/* Unkwn exception */
 #define	 EXCP_TRAP_WFI_WFE	0x01	/* Trapped WFI or WFE */
@@ -226,10 +226,10 @@
 #define	 EXCP_HVC		0x16	/* HVC trap */
 #define	 EXCP_MSR		0x18	/* MSR/MRS trap */
 #define	 EXCP_INSN_ABORT_L	0x20	/* Instruction abort, from lower EL */
-#define	 EXCP_INSN_ABORT	0x21	/* Instruction abort, from same EL */ 
+#define	 EXCP_INSN_ABORT	0x21	/* Instruction abort, from same EL */
 #define	 EXCP_PC_ALIGN		0x22	/* PC alignment fault */
 #define	 EXCP_DATA_ABORT_L	0x24	/* Data abort, from lower EL */
-#define	 EXCP_DATA_ABORT	0x25	/* Data abort, from same EL */ 
+#define	 EXCP_DATA_ABORT	0x25	/* Data abort, from same EL */
 #define	 EXCP_SP_ALIGN		0x26	/* SP slignment fault */
 #define	 EXCP_TRAP_FP		0x2c	/* Trapped FP exception */
 #define	 EXCP_SERROR		0x2f	/* SError interrupt */
Index: sys/arm64/include/bitops.h
===================================================================
--- /dev/null
+++ sys/arm64/include/bitops.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright (C) TODO
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef _ARM_BITOPS_H_
+#define _ARM_BITOPS_H_
+
+#include <sys/bitstring.h>
+
+#define for_each_set_bit(bit, addr, size) 									\
+	for (bit_ffs((bitstr_t *)(addr), (size), (int *)&(bit));				\
+	     (bit) != -1;														\
+	     bit_ffs_at((bitstr_t *)(addr), (bit) + 1, (size), (int *)&(bit)))
+
+/* same as for_each_set_bit() but use bit as value to start with */
+#define for_each_set_bit_from(bit, addr, size)								\
+	for (bit_ffs_at((bitstr_t *)(addr), (bit), (size), (int *)&(bit));		\
+	     (bit) != -1;														\
+	     bit_ffs_at((bitstr_t *)(addr), (bit) + 1, (size), (int *)&(bit)))
+
+#define for_each_clear_bit(bit, addr, size) 								\
+	for (bit_ffc((bitstr_t *)(addr), (size), (int *)&(bit));				\
+	     (bit) != -1;														\
+	     bit_ffc_at((bitstr_t *)(addr), (bit) + 1, (size), (int *)&(bit)))
+
+/* same as for_each_clear_bit() but use bit as value to start with */
+#define for_each_clear_bit_from(bit, addr, size)							\
+	for (bit_ffc_at((bitstr_t *)(addr), (bit), (size), (int *)&(bit));		\
+	     (bit) != -1;														\
+	     bit_ffc_at((bitstr_t *)(addr), (bit) + 1, (size), (int *)&(bit)))
+
+#endif /* _ARM_BITOPS_H_ */
Index: sys/arm64/include/cpu.h
===================================================================
--- sys/arm64/include/cpu.h
+++ sys/arm64/include/cpu.h
@@ -117,6 +117,7 @@
 
 #define	CPU_IMPL_TO_MIDR(val)	(((val) & 0xff) << 24)
 #define	CPU_PART_TO_MIDR(val)	(((val) & 0xfff) << 4)
+#define	CPU_ARCH_TO_MIDR(val)	(((val) & 0xf) << 16)
 #define	CPU_VAR_TO_MIDR(val)	(((val) & 0xf) << 20)
 #define	CPU_REV_TO_MIDR(val)	(((val) & 0xf) << 0)
 
Index: sys/arm64/include/hypervisor.h
===================================================================
--- sys/arm64/include/hypervisor.h
+++ sys/arm64/include/hypervisor.h
@@ -201,4 +201,35 @@
 #define	VTTBR_VMID_SHIFT	48
 #define	VTTBR_HOST		0x0000000000000000
 
+/* VTCR_EL2 - Virtualization Translation Control Register */
+#define	VTCR_EL2_RES1		(0x1 << 31)
+#define	VTCR_EL2_T0SZ_MASK	0x3f
+#define VTCR_EL2_SL0_SHIFT	6
+#define	 VTCR_EL2_SL0_4K_LVL2	(0x0 << VTCR_EL2_SL0_SHIFT)
+#define	 VTCR_EL2_SL0_4K_LVL1	(0x1 << VTCR_EL2_SL0_SHIFT)
+#define	 VTCR_EL2_SL0_4K_LVL0	(0x2 << VTCR_EL2_SL0_SHIFT)
+#define VTCR_EL2_IRGN0_SHIFT	8
+#define	 VTCR_EL2_IRGN0_WBWA	(0x1 << VTCR_EL2_IRGN0_SHIFT)
+#define VTCR_EL2_ORGN0_SHIFT	10
+#define	 VTCR_EL2_ORGN0_WBWA	(0x1 << VTCR_EL2_ORGN0_SHIFT)
+#define	VTCR_EL2_SH0_SHIFT	12
+#define	 VTCR_EL2_SH0_NS	(0x0 << VTCR_EL2_SH0_SHIFT)
+#define	 VTCR_EL2_SH0_OS	(0x2 << VTCR_EL2_SH0_SHIFT)
+#define	 VTCR_EL2_SH0_IS	(0x3 << VTCR_EL2_SH0_SHIFT)
+#define	VTCR_EL2_TG0_SHIFT	14
+#define	 VTCR_EL2_TG0_4K	(0x0 << VTCR_EL2_TG0_SHIFT)
+#define	 VTCR_EL2_TG0_64K	(0x1 << VTCR_EL2_TG0_SHIFT)
+#define	 VTCR_EL2_TG0_16K	(0x2 << VTCR_EL2_TG0_SHIFT)
+#define	VTCR_EL2_PS_SHIFT	16
+#define  VTCR_EL2_PS_32BIT	(0x0 << VTCR_EL2_PS_SHIFT)
+#define  VTCR_EL2_PS_36BIT	(0x1 << VTCR_EL2_PS_SHIFT)
+#define  VTCR_EL2_PS_40BIT	(0x2 << VTCR_EL2_PS_SHIFT)
+#define  VTCR_EL2_PS_42BIT	(0x3 << VTCR_EL2_PS_SHIFT)
+#define  VTCR_EL2_PS_44BIT	(0x4 << VTCR_EL2_PS_SHIFT)
+#define  VTCR_EL2_PS_48BIT	(0x5 << VTCR_EL2_PS_SHIFT)
+
+/* HPFAR_EL2 - Hypervisor IPA Fault Address Register */
+#define	HPFAR_EL2_FIPA_SHIFT	4
+#define HPFAR_EL2_FIPA_MASK	0xfffffffff0
+
 #endif /* !_MACHINE_HYPERVISOR_H_ */
Index: sys/arm64/include/pcpu.h
===================================================================
--- sys/arm64/include/pcpu.h
+++ sys/arm64/include/pcpu.h
@@ -43,6 +43,7 @@
 	u_int	pc_acpi_id;	/* ACPI CPU id */			\
 	u_int	pc_midr;	/* stored MIDR value */			\
 	uint64_t pc_clock;						\
+	void	*pc_vcpu;						\
 	pcpu_bp_harden pc_bp_harden;					\
 	pcpu_ssbd pc_ssbd;						\
 	struct pmap *pc_curpmap;					\
Index: sys/arm64/include/pmap.h
===================================================================
--- sys/arm64/include/pmap.h
+++ sys/arm64/include/pmap.h
@@ -191,6 +191,7 @@
     pd_entry_t **, pt_entry_t **);
 
 int	pmap_fault(pmap_t, uint64_t, uint64_t);
+int	pmap_pinit_type(pmap_t, enum pmap_stage);
 
 struct pcb *pmap_switch(struct thread *, struct thread *);
 
Index: sys/arm64/include/pte.h
===================================================================
--- sys/arm64/include/pte.h
+++ sys/arm64/include/pte.h
@@ -99,6 +99,35 @@
 #define	ATTR_DESCR_TYPE_TABLE	2
 #define	ATTR_DESCR_TYPE_PAGE	2
 #define	ATTR_DESCR_TYPE_BLOCK	0
+/* Stage 2 translation Block and Page attributes */
+#define ATTR_ST2_AF		ATTR_AF
+#define	ATTR_ST2_SH(x)		ATTR_SH(x)
+#define	 ATTR_ST2_SH_MASK	ATTR_SH_MASK
+#define	 ATTR_ST2_SH_NS		ATTR_SH_NS	/* Non-shareable */
+#define	 ATTR_ST2_SH_OS		ATTR_SH_OS	/* Outer-shareable */
+#define	 ATTR_ST2_SH_IS		ATTR_SH_IS	/* Inner-shareable */
+#define	ATTR_ST2_S2AP(x)	((x) << 6)	/* Data access permissions */
+#define	 ATTR_ST2_S2AP_NONE	(0 << 1)
+#define	 ATTR_ST2_S2AP_R0	(1 << 0)
+#define	 ATTR_ST2_S2AP_W0	(1 << 1)
+#define	 ATTR_ST2_S2AP_RW	(3 << 0)
+#define ATTR_ST2_MEMATTR(x)	((x) << 2)	/* Memory attributes */
+#define  ATTR_ST2_MEM_DEV	(0 << 2)	/* Device memory */
+#define   ATTR_ST2_MEM_DEV_nGnRnE	(0 << 0)
+#define   ATTR_ST2_MEM_DEV_nGnRE	(1 << 0)
+#define   ATTR_ST2_MEM_DEV_nGRE		(1 << 1)
+#define   ATTR_ST2_MEM_DEV_GRE		(3 << 0)
+#define  ATTR_ST2_MEM_ONC	(1 << 2)	/* Outer Non-cacheable */
+#define  ATTR_ST2_MEM_OWT	(1 << 2)	/* Outer Write-Through Cacheable */
+#define  ATTR_ST2_MEM_OWB	(3 << 2)	/* Outer Write-Back Cacheable */
+#define	 ATTR_ST2_MEM_INC	(1 << 0)	/* Inner Non-cacheable */
+#define	 ATTR_ST2_MEM_IWT	(1 << 1)	/* Inner Write-Through Cacheable */
+#define	 ATTR_ST2_MEM_IWB	(3 << 0)	/* Inner Write-Back Cacheable */
+
+#define ATTR_ST2_DEFAULT	(ATTR_ST2_AF | ATTR_ST2_SH(ATTR_ST2_SH_IS) | \
+				ATTR_ST2_S2AP(ATTR_ST2_S2AP_RW) | \
+				ATTR_ST2_MEMATTR(ATTR_ST2_MEM_OWB | ATTR_ST2_MEM_IWB))
+
 
 /* Level 0 table, 512GiB per entry */
 #define	L0_SHIFT	39
Index: sys/arm64/include/vmm.h
===================================================================
--- /dev/null
+++ sys/arm64/include/vmm.h
@@ -0,0 +1,430 @@
+/*
+ * Copyright (C) 2015 Mihai Carabas <mihai.carabas@gmail.com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef _VMM_H_
+#define	_VMM_H_
+
+#include <sys/param.h>
+#include <vm/vm.h>
+#include <vm/pmap.h>
+
+#include "pte.h"
+#include "pmap.h"
+
+enum vm_suspend_how {
+	VM_SUSPEND_NONE,
+	VM_SUSPEND_RESET,
+	VM_SUSPEND_POWEROFF,
+	VM_SUSPEND_HALT,
+	VM_SUSPEND_TRIPLEFAULT,
+	VM_SUSPEND_LAST
+};
+
+/*
+ * Identifiers for architecturally defined registers.
+ */
+enum vm_reg_name {
+	VM_REG_GUEST_X0,
+	VM_REG_GUEST_X1,
+	VM_REG_GUEST_X2,
+	VM_REG_GUEST_X3,
+	VM_REG_GUEST_X4,
+	VM_REG_GUEST_X5,
+	VM_REG_GUEST_X6,
+	VM_REG_GUEST_X7,
+	VM_REG_GUEST_X8,
+	VM_REG_GUEST_X9,
+	VM_REG_GUEST_X10,
+	VM_REG_GUEST_X11,
+	VM_REG_GUEST_X12,
+	VM_REG_GUEST_X13,
+	VM_REG_GUEST_X14,
+	VM_REG_GUEST_X15,
+	VM_REG_GUEST_X16,
+	VM_REG_GUEST_X17,
+	VM_REG_GUEST_X18,
+	VM_REG_GUEST_X19,
+	VM_REG_GUEST_X20,
+	VM_REG_GUEST_X21,
+	VM_REG_GUEST_X22,
+	VM_REG_GUEST_X23,
+	VM_REG_GUEST_X24,
+	VM_REG_GUEST_X25,
+	VM_REG_GUEST_X26,
+	VM_REG_GUEST_X27,
+	VM_REG_GUEST_X28,
+	VM_REG_GUEST_X29,
+	VM_REG_GUEST_LR,
+	VM_REG_GUEST_SP,
+	VM_REG_GUEST_ELR,
+	VM_REG_GUEST_SPSR,
+	VM_REG_ELR_EL2,
+	VM_REG_LAST
+};
+
+#define	VM_INTINFO_VECTOR(info)	((info) & 0xff)
+#define	VM_INTINFO_DEL_ERRCODE	0x800
+#define	VM_INTINFO_RSVD		0x7ffff000
+#define	VM_INTINFO_VALID	0x80000000
+#define	VM_INTINFO_TYPE		0x700
+#define	VM_INTINFO_HWINTR	(0 << 8)
+#define	VM_INTINFO_NMI		(2 << 8)
+#define	VM_INTINFO_HWEXCEPTION	(3 << 8)
+#define	VM_INTINFO_SWINTR	(4 << 8)
+
+#define VM_GUEST_BASE_IPA	0x80000000UL	/* Guest kernel start ipa */
+
+#ifdef _KERNEL
+
+#define	VM_MAX_NAMELEN	32
+
+struct vm;
+struct vm_exception;
+struct vm_memory_segment;
+struct vm_exit;
+struct vm_run;
+struct vm_object;
+struct pmap;
+struct hypctx;
+
+typedef int	(*vmm_init_func_t)(int ipinum);
+typedef int	(*vmm_cleanup_func_t)(void);
+typedef void	(*vmm_resume_func_t)(void);
+typedef void *	(*vmi_init_func_t)(struct vm *vm);
+typedef int	(*vmi_run_func_t)(void *vmi, int vcpu, register_t rip,
+				  struct pmap *pmap, void *rendezvous_cookie,
+				  void *suspend_cookie);
+typedef void	(*vmi_cleanup_func_t)(void *vmi);
+typedef void	(*vmi_mmap_set_func_t)(void *arg, vm_offset_t va,
+				       vm_offset_t pa, size_t len,
+				       vm_prot_t prot);
+typedef vm_paddr_t (*vmi_mmap_get_func_t)(void *arg, vm_offset_t va);
+typedef int	(*vmi_get_register_t)(void *vmi, int vcpu, int num,
+				      uint64_t *retval);
+typedef int	(*vmi_set_register_t)(void *vmi, int vcpu, int num,
+				      uint64_t val);
+typedef int	(*vmi_get_cap_t)(void *vmi, int vcpu, int num, int *retval);
+typedef int	(*vmi_set_cap_t)(void *vmi, int vcpu, int num, int val);
+typedef struct vmspace * (*vmi_vmspace_alloc)(vm_offset_t min, vm_offset_t max);
+typedef void	(*vmi_vmspace_free)(struct vmspace *vmspace);
+typedef struct vlapic * (*vmi_vlapic_init)(void *vmi, int vcpu);
+typedef void	(*vmi_vlapic_cleanup)(void *vmi, struct vlapic *vlapic);
+
+struct vmm_ops {
+	/* Module-wide functions */
+	vmm_init_func_t		init;
+	vmm_cleanup_func_t	cleanup;
+	vmm_resume_func_t	resume;
+	/* VM specific functions */
+	vmi_init_func_t		vminit;
+	vmi_run_func_t		vmrun;
+	vmi_cleanup_func_t	vmcleanup;
+	vmi_mmap_set_func_t	vmmapset;
+	vmi_mmap_get_func_t	vmmapget;
+	vmi_get_register_t	vmgetreg;
+	vmi_set_register_t	vmsetreg;
+	vmi_get_cap_t		vmgetcap;
+	vmi_set_cap_t		vmsetcap;
+};
+
+extern struct vmm_ops vmm_ops_arm;
+
+int vm_create(const char *name, struct vm **retvm);
+void vm_destroy(struct vm *vm);
+const char *vm_name(struct vm *vm);
+int vm_malloc(struct vm *vm, uint64_t gpa, size_t len);
+uint64_t vm_gpa2hpa(struct vm *vm, uint64_t gpa, size_t size);
+int vm_gpabase2memseg(struct vm *vm, uint64_t gpabase,
+		      struct vm_memory_segment *seg);
+boolean_t vm_mem_allocated(struct vm *vm, uint64_t gpa);
+int vm_get_register(struct vm *vm, int vcpu, int reg, uint64_t *retval);
+int vm_set_register(struct vm *vm, int vcpu, int reg, uint64_t val);
+int vm_run(struct vm *vm, struct vm_run *vmrun);
+void* vm_get_cookie(struct vm *vm);
+uint16_t vm_get_maxcpus(struct vm *vm);
+int vm_get_capability(struct vm *vm, int vcpu, int type, int *val);
+int vm_set_capability(struct vm *vm, int vcpu, int type, int val);
+int vm_activate_cpu(struct vm *vm, int vcpu);
+int vm_attach_vgic(struct vm *vm, uint64_t dist_start, size_t dist_size,
+		   uint64_t redist_start, size_t redist_size);
+int vm_assert_irq(struct vm *vm, uint32_t irq, uint32_t vcpuid);
+int vm_deassert_irq(struct vm *vm, uint32_t irq, uint32_t vcpuid);
+struct vm_exit *vm_exitinfo(struct vm *vm, int vcpuid);
+void vm_exit_suspended(struct vm *vm, int vcpuid, uint64_t rip);
+void vm_exit_rendezvous(struct vm *vm, int vcpuid, uint64_t rip);
+void vm_exit_astpending(struct vm *vm, int vcpuid, uint64_t rip);
+
+#ifdef _SYS__CPUSET_H_
+/*
+ * Rendezvous all vcpus specified in 'dest' and execute 'func(arg)'.
+ * The rendezvous 'func(arg)' is not allowed to do anything that will
+ * cause the thread to be put to sleep.
+ *
+ * If the rendezvous is being initiated from a vcpu context then the
+ * 'vcpuid' must refer to that vcpu, otherwise it should be set to -1.
+ *
+ * The caller cannot hold any locks when initiating the rendezvous.
+ *
+ * The implementation of this API may cause vcpus other than those specified
+ * by 'dest' to be stalled. The caller should not rely on any vcpus making
+ * forward progress when the rendezvous is in progress.
+ */
+typedef void (*vm_rendezvous_func_t)(struct vm *vm, int vcpuid, void *arg);
+void vm_smp_rendezvous(struct vm *vm, int vcpuid, cpuset_t dest,
+    vm_rendezvous_func_t func, void *arg);
+cpuset_t vm_active_cpus(struct vm *vm);
+cpuset_t vm_suspended_cpus(struct vm *vm);
+#endif	/* _SYS__CPUSET_H_ */
+
+extern uint64_t hypmode_enabled;
+static __inline bool
+virt_enabled()
+{
+	return (hypmode_enabled != 0);
+}
+
+static __inline int
+vcpu_rendezvous_pending(void *rendezvous_cookie)
+{
+
+	return (*(uintptr_t *)rendezvous_cookie != 0);
+}
+
+static __inline int
+vcpu_suspended(void *suspend_cookie)
+{
+
+	return (*(int *)suspend_cookie);
+}
+
+enum vcpu_state {
+	VCPU_IDLE,
+	VCPU_FROZEN,
+	VCPU_RUNNING,
+	VCPU_SLEEPING,
+};
+
+int vcpu_set_state(struct vm *vm, int vcpu, enum vcpu_state state,
+    bool from_idle);
+enum vcpu_state vcpu_get_state(struct vm *vm, int vcpu, int *hostcpu);
+
+static int __inline
+vcpu_is_running(struct vm *vm, int vcpu, int *hostcpu)
+{
+	return (vcpu_get_state(vm, vcpu, hostcpu) == VCPU_RUNNING);
+}
+
+#ifdef _SYS_PROC_H_
+static int __inline
+vcpu_should_yield(struct vm *vm, int vcpu)
+{
+
+	if (curthread->td_flags & (TDF_ASTPENDING | TDF_NEEDRESCHED))
+		return (1);
+	else if (curthread->td_owepreempt)
+		return (1);
+	else
+		return (0);
+}
+#endif
+
+void *vcpu_stats(struct vm *vm, int vcpu);
+void vcpu_notify_event(struct vm *vm, int vcpuid, bool lapic_intr);
+
+/*
+ * This function is called after a VM-exit that occurred during exception or
+ * interrupt delivery through the IDT. The format of 'intinfo' is described
+ * in Figure 15-1, "EXITINTINFO for All Intercepts", APM, Vol 2.
+ *
+ * If a VM-exit handler completes the event delivery successfully then it
+ * should call vm_exit_intinfo() to extinguish the pending event. For e.g.,
+ * if the task switch emulation is triggered via a task gate then it should
+ * call this function with 'intinfo=0' to indicate that the external event
+ * is not pending anymore.
+ *
+ * Return value is 0 on success and non-zero on failure.
+ */
+int vm_exit_intinfo(struct vm *vm, int vcpuid, uint64_t intinfo);
+
+/*
+ * This function is called before every VM-entry to retrieve a pending
+ * event that should be injected into the guest. This function combines
+ * nested events into a double or triple fault.
+ *
+ * Returns 0 if there are no events that need to be injected into the guest
+ * and non-zero otherwise.
+ */
+int vm_entry_intinfo(struct vm *vm, int vcpuid, uint64_t *info);
+
+int vm_get_intinfo(struct vm *vm, int vcpuid, uint64_t *info1, uint64_t *info2);
+
+enum vm_reg_name vm_segment_name(int seg_encoding);
+
+struct vm_copyinfo {
+	uint64_t	gpa;
+	size_t		len;
+	void		*hva;
+	void		*cookie;
+};
+
+int vcpu_trace_exceptions(struct vm *vm, int vcpuid);
+#endif	/* _KERNEL */
+
+#define	VM_MAXCPU	1
+
+#define	VM_DIR_READ	0
+#define	VM_DIR_WRITE	1
+
+struct vie {
+	uint8_t access_size:4, sign_extend:1, dir:1, unused:2;
+	enum vm_reg_name reg;
+};
+
+struct vre {
+	uint32_t inst_syndrome;
+	uint8_t dir:1, unused:7;
+	enum vm_reg_name reg;
+};
+
+/*
+ * Identifiers for optional vmm capabilities
+ */
+enum vm_cap_type {
+	VM_CAP_HALT_EXIT,
+	VM_CAP_MTRAP_EXIT,
+	VM_CAP_PAUSE_EXIT,
+	VM_CAP_UNRESTRICTED_GUEST,
+	VM_CAP_MAX
+};
+enum vm_exitcode {
+	VM_EXITCODE_BOGUS,
+	VM_EXITCODE_INST_EMUL,
+	VM_EXITCODE_REG_EMUL,
+	VM_EXITCODE_HVC,
+	VM_EXITCODE_SUSPENDED,
+	VM_EXITCODE_HYP,
+	VM_EXITCODE_WFI,
+	VM_EXITCODE_SPINUP_AP,
+	VM_EXITCODE_MAX
+};
+
+enum task_switch_reason {
+	TSR_CALL,
+	TSR_IRET,
+	TSR_JMP,
+	TSR_IDT_GATE,	/* task gate in IDT */
+};
+
+struct vm_task_switch {
+	uint16_t	tsssel;		/* new TSS selector */
+	int		ext;		/* task switch due to external event */
+	uint32_t	errcode;
+	int		errcode_valid;	/* push 'errcode' on the new stack */
+	enum task_switch_reason reason;
+};
+
+struct vm_exit {
+	enum vm_exitcode	exitcode;
+	int			inst_length;
+	uint64_t		pc;
+	union {
+		/*
+		 * ARM specific payload.
+		 */
+		struct {
+			uint32_t	exception_nr;
+			uint32_t	esr_el2;	/* Exception Syndrome Register */
+			uint64_t	far_el2;	/* Fault Address Register */
+			uint64_t	hpfar_el2;	/* Hypervisor IPA Fault Address Register */
+		} hyp;
+		struct {
+			struct vre 	vre;
+		} reg_emul;
+		struct {
+			uint64_t	gpa;
+			int		fault_type;
+		} paging;
+		struct {
+			uint64_t	gpa;
+			struct vie	vie;
+		} inst_emul;
+
+		struct {
+			struct hypctx *hypctx;
+		} wfi;
+		/*
+		 * VMX specific payload. Used when there is no "better"
+		 * exitcode to represent the VM-exit.
+		 */
+		struct {
+			int		status;		/* vmx inst status */
+			/*
+			 * 'exit_reason' and 'exit_qualification' are valid
+			 * only if 'status' is zero.
+			 */
+			uint32_t	exit_reason;
+			uint64_t	exit_qualification;
+			/*
+			 * 'inst_error' and 'inst_type' are valid
+			 * only if 'status' is non-zero.
+			 */
+			int		inst_type;
+			int		inst_error;
+		} vmx;
+		/*
+		 * SVM specific payload.
+		 */
+		struct {
+			uint64_t	exitcode;
+			uint64_t	exitinfo1;
+			uint64_t	exitinfo2;
+		} svm;
+		struct {
+#ifdef __aarch64__
+#else
+			uint32_t	code;		/* ecx value */
+			uint64_t	wval;
+#endif
+		} msr;
+		struct {
+			int		vcpu;
+			uint64_t	rip;
+			uint64_t	ctx_id;
+		} spinup_ap;
+		struct {
+			uint64_t	rflags;
+		} hlt;
+		struct {
+			int		vector;
+		} ioapic_eoi;
+		struct {
+			enum vm_suspend_how how;
+		} suspended;
+		struct vm_task_switch task_switch;
+	} u;
+};
+
+#endif	/* _VMM_H_ */
Index: sys/arm64/include/vmm_dev.h
===================================================================
--- /dev/null
+++ sys/arm64/include/vmm_dev.h
@@ -0,0 +1,182 @@
+/*
+ * Copyright (C) 2015 Mihai Carabas <mihai.carabas@gmail.com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef	_VMM_DEV_H_
+#define	_VMM_DEV_H_
+
+#ifdef _KERNEL
+void	vmmdev_init(void);
+int	vmmdev_cleanup(void);
+#endif
+
+struct vm_memory_segment {
+	uint64_t	gpa;	/* in */
+	size_t		len;
+	int		wired;
+};
+
+struct vm_register {
+	int		cpuid;
+	int		regnum;		/* enum vm_reg_name */
+	uint64_t	regval;
+};
+
+struct vm_run {
+	int		cpuid;
+	uint64_t	pc;
+	struct vm_exit	vm_exit;
+
+};
+
+struct vm_exception {
+	int		cpuid;
+	int		vector;
+	uint32_t	error_code;
+	int		error_code_valid;
+	int		restart_instruction;
+};
+
+struct vm_capability {
+	int		cpuid;
+	enum vm_cap_type captype;
+	int		capval;
+	int		allcpus;
+};
+
+#define	MAX_VM_STATS	64
+struct vm_stats {
+	int		cpuid;				/* in */
+	int		num_entries;			/* out */
+	struct timeval	tv;
+	uint64_t	statbuf[MAX_VM_STATS];
+};
+struct vm_stat_desc {
+	int		index;				/* in */
+	char		desc[128];			/* out */
+};
+
+
+struct vm_suspend {
+	enum vm_suspend_how how;
+};
+
+struct vm_gla2gpa {
+	int		vcpuid;		/* inputs */
+	int 		prot;		/* PROT_READ or PROT_WRITE */
+	uint64_t	gla;
+	int		fault;		/* outputs */
+	uint64_t	gpa;
+};
+
+struct vm_activate_cpu {
+	int		vcpuid;
+};
+
+struct vm_attach_vgic {
+	uint64_t	dist_start;
+	size_t		dist_size;
+	uint64_t	redist_start;
+	size_t		redist_size;
+};
+
+struct vm_irq {
+	uint32_t irq;
+	uint32_t vcpuid;
+};
+
+#define	VM_ACTIVE_CPUS		0
+#define	VM_SUSPENDED_CPUS	1
+
+enum {
+	/* general routines */
+	IOCNUM_ABIVERS = 0,
+	IOCNUM_RUN = 1,
+	IOCNUM_SET_CAPABILITY = 2,
+	IOCNUM_GET_CAPABILITY = 3,
+	IOCNUM_SUSPEND = 4,
+	IOCNUM_REINIT = 5,
+
+	/* memory apis */
+	IOCNUM_MAP_MEMORY = 10,
+	IOCNUM_GET_MEMORY_SEG = 11,
+	IOCNUM_GET_GPA_PMAP = 12,
+	IOCNUM_GLA2GPA = 13,
+
+	/* register/state accessors */
+	IOCNUM_SET_REGISTER = 20,
+	IOCNUM_GET_REGISTER = 21,
+
+	/* statistics */
+	IOCNUM_VM_STATS = 50,
+	IOCNUM_VM_STAT_DESC = 51,
+
+	/* interrupt injection */
+	IOCNUM_ASSERT_IRQ = 80,
+	IOCNUM_DEASSERT_IRQ = 81,
+
+	/* vm_cpuset */
+	IOCNUM_ACTIVATE_CPU = 90,
+	IOCNUM_GET_CPUSET = 91,
+
+	/* vm_attach_vgic */
+	IOCNUM_ATTACH_VGIC = 110,
+};
+
+#define	VM_RUN		\
+	_IOWR('v', IOCNUM_RUN, struct vm_run)
+#define	VM_SUSPEND	\
+	_IOW('v', IOCNUM_SUSPEND, struct vm_suspend)
+#define	VM_REINIT	\
+	_IO('v', IOCNUM_REINIT)
+#define	VM_MAP_MEMORY	\
+	_IOWR('v', IOCNUM_MAP_MEMORY, struct vm_memory_segment)
+#define	VM_GET_MEMORY_SEG \
+	_IOWR('v', IOCNUM_GET_MEMORY_SEG, struct vm_memory_segment)
+#define	VM_SET_REGISTER \
+	_IOW('v', IOCNUM_SET_REGISTER, struct vm_register)
+#define	VM_GET_REGISTER \
+	_IOWR('v', IOCNUM_GET_REGISTER, struct vm_register)
+#define	VM_SET_CAPABILITY \
+	_IOW('v', IOCNUM_SET_CAPABILITY, struct vm_capability)
+#define	VM_GET_CAPABILITY \
+	_IOWR('v', IOCNUM_GET_CAPABILITY, struct vm_capability)
+#define	VM_STATS \
+	_IOWR('v', IOCNUM_VM_STATS, struct vm_stats)
+#define	VM_STAT_DESC \
+	_IOWR('v', IOCNUM_VM_STAT_DESC, struct vm_stat_desc)
+#define VM_ASSERT_IRQ \
+	_IOW('v', IOCNUM_ASSERT_IRQ, struct vm_irq)
+#define VM_DEASSERT_IRQ \
+	_IOW('v', IOCNUM_DEASSERT_IRQ, struct vm_irq)
+#define	VM_GLA2GPA	\
+	_IOWR('v', IOCNUM_GLA2GPA, struct vm_gla2gpa)
+#define	VM_ACTIVATE_CPU	\
+	_IOW('v', IOCNUM_ACTIVATE_CPU, struct vm_activate_cpu)
+#define	VM_GET_CPUS	\
+	_IOW('v', IOCNUM_GET_CPUSET, struct vm_cpuset)
+#define	VM_ATTACH_VGIC	\
+	_IOW('v', IOCNUM_ATTACH_VGIC, struct vm_attach_vgic)
+#endif
Index: sys/arm64/include/vmm_instruction_emul.h
===================================================================
--- /dev/null
+++ sys/arm64/include/vmm_instruction_emul.h
@@ -0,0 +1,72 @@
+/*
+ * Copyright (C) 2015 Mihai Carabas <mihai.carabas@gmail.com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef	_VMM_INSTRUCTION_EMUL_H_
+#define	_VMM_INSTRUCTION_EMUL_H_
+
+/*
+ * Callback functions to read and write memory regions.
+ */
+typedef int (*mem_region_read_t)(void *vm, int cpuid, uint64_t gpa,
+				 uint64_t *rval, int rsize, void *arg);
+typedef int (*mem_region_write_t)(void *vm, int cpuid, uint64_t gpa,
+				  uint64_t wval, int wsize, void *arg);
+
+/*
+ * Callback functions to read and write registers.
+ */
+typedef int (*reg_read_t)(void *vm, int cpuid, uint64_t *rval, void *arg);
+typedef int (*reg_write_t)(void *vm, int cpuid, uint64_t wval, void *arg);
+
+/*
+ * Emulate the decoded 'vie' instruction when it contains a memory operation.
+ *
+ * The callbacks 'mrr' and 'mrw' emulate reads and writes to the memory region
+ * containing 'gpa'. 'mrarg' is an opaque argument that is passed into the
+ * callback functions.
+ *
+ * 'void *vm' should be 'struct vm *' when called from kernel context and
+ * 'struct vmctx *' when called from user context.
+ *
+ */
+int vmm_emulate_instruction(void *vm, int cpuid, uint64_t gpa, struct vie *vie,
+    mem_region_read_t mrr, mem_region_write_t mrw, void *mrarg);
+
+/*
+ * Emulate the decoded 'vre' instruction when it contains a register access.
+ *
+ * The callbacks 'regread' and 'regwrite' emulate reads and writes to the
+ * register from 'vie'. 'regarg' is an opaque argument that is passed into the
+ * callback functions.
+ *
+ * 'void *vm' should be 'struct vm *' when called from kernel context and
+ * 'struct vmctx *' when called from user context.
+ *
+ */
+int vmm_emulate_register(void *vm, int vcpuid, struct vre *vre, reg_read_t regread,
+    reg_write_t regwrite, void *regarg);
+
+#endif	/* _VMM_INSTRUCTION_EMUL_H_ */
Index: sys/arm64/vmm/arm64.h
===================================================================
--- /dev/null
+++ sys/arm64/vmm/arm64.h
@@ -0,0 +1,138 @@
+/*
+ * Copyright (C) 2015-2021 Mihai Carabas <mihai.carabas@gmail.com>
+ * Copyright (C) 2017-2019 Alexandru Elisei <alexandru.elisei@gmail.com>
+ * Copyright (C) 2017-2021 Darius Mihai <darius.mihai.m@gmail.com>
+ * Copyright (C) 2019-2021 Andrei Martin <andrei.cos.martin@gmail.com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+#ifndef _VMM_ARM64_H_
+#define _VMM_ARM64_H_
+
+#include <machine/reg.h>
+#include <machine/vfp.h>
+#include <machine/hypervisor.h>
+#include <machine/pcpu.h>
+
+#include "mmu.h"
+#include "io/vgic_v3.h"
+#include "io/vtimer.h"
+
+struct hypctx {
+	struct reg	regs;
+
+	/* EL1 control registers */
+	uint64_t	actlr_el1;	/* Auxiliary Control Register */
+	uint64_t	afsr0_el1;	/* Auxiliary Fault Status Register 0 */
+	uint64_t	afsr1_el1;	/* Auxiliary Fault Status Register 1 */
+	uint64_t	amair_el1;	/* Auxiliary Memory Attribute Indirection Register */
+	uint64_t	contextidr_el1;	/* Current Process Identifier */
+	uint64_t	cpacr_el1;	/* Arhitectural Feature Access Control Register */
+	uint64_t	elr_el1;	/* Exception Link Register */
+	uint64_t	esr_el1;	/* Exception Syndrome Register */
+	uint64_t	far_el1;	/* Fault Address Register */
+	uint64_t	fp;		/* Frame Pointer */
+	uint64_t	mair_el1;	/* Memory Attribute Indirection Register */
+	uint64_t	par_el1;	/* Physical Address Register */
+	uint64_t	sctlr_el1;	/* System Control Register */
+	uint64_t	sp_el0;		/* Stack Pointer */
+	uint64_t	tcr_el1;	/* Translation Control Register */
+	uint64_t	tpidr_el0;	/* EL0 Software ID Register */
+	uint64_t	tpidrro_el0;	/* Read-only Thread ID Register */
+	uint64_t	tpidr_el1;	/* EL1 Software ID Register */
+	uint64_t	ttbr0_el1;	/* Translation Table Base Register 0 */
+	uint64_t	ttbr1_el1;	/* Translation Table Base Register 1 */
+	uint64_t	vbar_el1;	/* Vector Base Address Register */
+	uint32_t	spsr_el1;	/* Saved Program Status Register */
+
+	/* EL2 control registers */
+	uint64_t	cptr_el2;	/* Architectural Feature Trap Register */
+	uint64_t	elr_el2;	/* Exception Link Register */
+	uint64_t	hcr_el2;	/* Hypervisor Configuration Register */
+	uint64_t	vpidr_el2;	/* Virtualization Processor ID Register */
+	uint64_t	vmpidr_el2;	/* Virtualization Multiprocessor ID Register */
+	uint32_t	spsr_el2;	/* Saved Program Status Register */
+
+	uint32_t	vcpu;
+	struct hyp	*hyp;
+	struct {
+		uint64_t	esr_el2;	/* Exception Syndrome Register */
+		uint64_t	far_el2;	/* Fault Address Register */
+		uint64_t	hpfar_el2;	/* Hypervisor IPA Fault Address Register */
+	} exit_info;
+
+	struct vtimer_cpu 	vtimer_cpu;
+	struct vgic_v3_cpu_if	vgic_cpu_if;
+	struct vgic_v3_redist	vgic_redist;
+#ifdef VFP
+	struct vfpstate	vfpstate;
+#endif
+};
+
+struct hyp {
+	pmap_t		stage2_map;
+	struct hypctx	ctx[VM_MAXCPU];
+	struct vgic_mmio_region	*vgic_mmio_regions;
+	size_t		vgic_mmio_regions_num;
+	struct vgic_v3_dist vgic_dist;
+	struct vm	*vm;
+	struct vtimer	vtimer;
+	uint64_t	vmid_generation;
+	uint64_t	vttbr_el2;
+	bool		vgic_attached;
+};
+
+uint64_t	vmm_call_hyp(void *hyp_func_addr, ...);
+void 		vmm_cleanup(void *hyp_stub_vectors);
+uint64_t 	vmm_enter_guest(struct hypctx *hypctx);
+uint64_t 	vmm_read_ich_vtr_el2(void);
+uint64_t 	vmm_read_cnthctl_el2(void);
+uint64_t 	vmm_read_tcr_el2(void);
+
+#define	eprintf(fmt, ...)	printf("%s:%d " fmt, __func__, __LINE__, ##__VA_ARGS__)
+//#define	eprintf(fmt, ...)	do {} while(0)
+
+#define	VMID_GENERATION_MASK 		((1UL<<8) - 1)
+#define	build_vttbr(vmid, ptaddr) 	\
+		((((vmid) & VMID_GENERATION_MASK) << VTTBR_VMID_SHIFT) | \
+						(uint64_t)(ptaddr))
+
+#define	MPIDR_SMP_MASK 		(0x3 << 30)
+#define	MPIDR_AFF1_LEVEL(x) 	(((x) >> 2) << 8)
+#define	MPIDR_AFF0_LEVEL(x) 	(((x) & 0x3) << 0)
+
+/*
+ * Return true if the exception was caused by a translation fault in the stage 2
+ * translation regime. The DFSC encoding for a translation fault has the format
+ * 0b0001LL, where LL (bits [1:0]) represents the level where the fault occured
+ * (page D7-2280 of the ARMv8 Architecture Manual).
+ */
+#define	ISS_DATA_DFSC_TF(esr_iss)	\
+		(!((esr_iss) & 0b111000) && ((esr_iss) & 0b000100))
+#define	FAR_EL2_PAGE_OFFSET(x)		((x) & PAGE_MASK)
+
+#define	DEBUG_ME	0
+
+#define	arm64_get_active_vcpu()		((struct hypctx *)PCPU_GET(vcpu))
+
+#endif /* !_VMM_ARM64_H_ */
Index: sys/arm64/vmm/arm64.c
===================================================================
--- /dev/null
+++ sys/arm64/vmm/arm64.c
@@ -0,0 +1,807 @@
+/*
+ * Copyright (C) 2015-2021 Mihai Carabas <mihai.carabas@gmail.com>
+ * Copyright (C) 2017-2019 Alexandru Elisei <alexandru.elisei@gmail.com>
+ * Copyright (C) 2017-2021 Darius Mihai <darius.mihai.m@gmail.com>
+ * Copyright (C) 2019-2021 Andrei Martin <andrei.cos.martin@gmail.com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/smp.h>
+#include <sys/kernel.h>
+#include <sys/malloc.h>
+#include <sys/pcpu.h>
+#include <sys/proc.h>
+#include <sys/sysctl.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
+
+#include <vm/vm.h>
+#include <vm/pmap.h>
+#include <vm/vm_page.h>
+#include <vm/vm_param.h>
+
+#include <machine/armreg.h>
+#include <machine/vm.h>
+#include <machine/cpufunc.h>
+#include <machine/cpu.h>
+#include <machine/vmm.h>
+#include <machine/vmm_dev.h>
+#include <machine/atomic.h>
+#include <machine/hypervisor.h>
+#include <machine/pmap.h>
+
+#include "mmu.h"
+#include "arm64.h"
+#include "hyp.h"
+#include "reset.h"
+#include "io/vgic_v3.h"
+#include "io/vtimer.h"
+
+#define	HANDLED		1
+#define	UNHANDLED	0
+
+#define	UNUSED		0
+
+MALLOC_DEFINE(M_HYP, "ARM VMM HYP", "ARM VMM HYP");
+
+extern char hyp_init_vectors[];
+extern char hyp_vectors[];
+extern char hyp_code_start[];
+extern char hyp_code_end[];
+extern char hyp_stub_vectors[];
+
+char *stack[MAXCPU];
+pmap_t hyp_pmap;
+
+static uint64_t vmid_generation = 0;
+static struct mtx vmid_generation_mtx;
+
+static inline void
+arm64_set_active_vcpu(struct hypctx *hypctx)
+{
+	PCPU_SET(vcpu, hypctx);
+}
+
+static void arm64_set_vttbr(struct hyp *hyp)
+{
+	if (hyp->vmid_generation != 0 &&
+			((hyp->vmid_generation & ~VMID_GENERATION_MASK) !=
+			(atomic_load_acq_64(&vmid_generation) & ~VMID_GENERATION_MASK)))
+		goto out;
+
+	mtx_lock(&vmid_generation_mtx);
+
+	/* Another VCPU has change the VMID already */
+	if (hyp->vmid_generation &&
+	    ((hyp->vmid_generation & ~VMID_GENERATION_MASK) !=
+	    (vmid_generation & ~VMID_GENERATION_MASK))) {
+		mtx_unlock(&vmid_generation_mtx);
+		goto out;
+	}
+
+	vmid_generation++;
+	if (!(vmid_generation & VMID_GENERATION_MASK))
+		vmid_generation++;
+
+	hyp->vmid_generation = vmid_generation;
+	mtx_unlock(&vmid_generation_mtx);
+out:
+	hyp->vttbr_el2 = build_vttbr(hyp->vmid_generation,
+			vtophys(hyp->stage2_map->pm_l0));
+}
+
+static void
+arm_init_vectors(void *arg)
+{
+	char *stack_top;
+	uint64_t tcr_el1, tcr_el2;
+	uint32_t sctlr_el2;
+	uint32_t vtcr_el2;
+	uint64_t id_aa64mmfr0_el1;
+	uint64_t pa_range_bits;
+	register_t daif;
+
+	daif = intr_disable();
+
+	arm64_set_active_vcpu(NULL);
+
+	/*
+	 * Install the temporary vectors which will be responsible for
+	 * initializing the VMM when we next trap into EL2.
+	 *
+	 * x0: the exception vector table responsible for hypervisor
+	 * initialization on the next call.
+	 */
+	vmm_call_hyp((void *)vtophys(hyp_init_vectors));
+
+	/* Create and map the hypervisor stack */
+	stack_top = stack[PCPU_GET(cpuid)] + PAGE_SIZE;
+
+	/* Configure address translation at EL2 */
+	tcr_el1 = READ_SPECIALREG(tcr_el1);
+	tcr_el2 = TCR_EL2_RES1;
+
+	/* Set physical address size */
+	id_aa64mmfr0_el1 = READ_SPECIALREG(id_aa64mmfr0_el1);
+	pa_range_bits = ID_AA64MMFR0_PARange_VAL(id_aa64mmfr0_el1);
+	tcr_el2	|= (pa_range_bits & 0x7) << TCR_EL2_PS_SHIFT;
+
+	/* Use the same address translation attributes as the host */
+	tcr_el2 |= tcr_el1 & TCR_T0SZ_MASK;
+	tcr_el2 |= tcr_el1 & (0xff << TCR_IRGN0_SHIFT);
+
+	/*
+	 * Configure the system control register for EL2:
+	 *
+	 * SCTLR_EL2_M: MMU on
+	 * SCTLR_EL2_C: Data cacheability not affected
+	 * SCTLR_EL2_I: Instruction cacheability not affected
+	 * SCTLR_EL2_A: Instruction alignment check
+	 * SCTLR_EL2_SA: Stack pointer alignment check
+	 * SCTLR_EL2_WXN: Treat writable memory as execute never
+	 * ~SCTLR_EL2_EE: Data accesses are little-endian
+	 */
+	sctlr_el2 = SCTLR_EL2_RES1;
+	sctlr_el2 |= SCTLR_EL2_M | SCTLR_EL2_C | SCTLR_EL2_I;
+	sctlr_el2 |= SCTLR_EL2_A | SCTLR_EL2_SA;
+	sctlr_el2 |= SCTLR_EL2_WXN;
+	sctlr_el2 &= ~SCTLR_EL2_EE;
+
+	/*
+	 * Configure the Stage 2 translation control register:
+	 *
+	 * VTCR_IRGN0_WBWA: Translation table walks access inner cacheable
+	 * normal memory
+	 * VTCR_ORGN0_WBWA: Translation table walks access outer cacheable
+	 * normal memory
+	 * VTCR_EL2_TG0_4K: Stage 2 uses 4K pages
+	 * VTCR_EL2_SL0_4K_LVL1: Stage 2 uses concatenated level 1 tables
+	 * VTCR_EL2_SH0_IS: Memory associated with Stage 2 walks is inner
+	 * shareable
+	 */
+	vtcr_el2 = VTCR_EL2_RES1;
+	vtcr_el2 = (pa_range_bits & 0x7) << VTCR_EL2_PS_SHIFT;
+	vtcr_el2 |= VTCR_EL2_IRGN0_WBWA | VTCR_EL2_ORGN0_WBWA;
+	vtcr_el2 |= VTCR_EL2_TG0_4K;
+	vtcr_el2 |= VTCR_EL2_SH0_IS;
+	if (pa_range_bits == ID_AA64MMFR0_PARange_1T) {
+		/*
+		 * 40 bits of physical addresses, use concatenated level 1
+		 * tables
+		 */
+		vtcr_el2 |= 24 & VTCR_EL2_T0SZ_MASK;
+		vtcr_el2 |= VTCR_EL2_SL0_4K_LVL1;
+	}
+
+	/* Special call to initialize EL2 */
+	vmm_call_hyp((void *)vtophys(hyp_vectors), vtophys(hyp_pmap->pm_l0),
+	    ktohyp(stack_top), tcr_el2, sctlr_el2, vtcr_el2);
+
+	intr_restore(daif);
+}
+
+static void
+arm_cleanup_vectors(void *arg)
+{
+	register_t daif;
+
+	/*
+	 * vmm_cleanup() will disable the MMU. For the next few instructions,
+	 * before the hardware disables the MMU, one of the following is
+	 * possible:
+	 *
+	 * a. The instruction addresses are fetched with the MMU disabled,
+	 * and they must represent the actual physical addresses. This will work
+	 * because we call the vmm_cleanup() function by its physical address.
+	 *
+	 * b. The instruction addresses are fetched using the old translation
+	 * tables. This will work because we have an identity mapping in place
+	 * in the translation tables and vmm_cleanup() is called by its physical
+	 * address.
+	 */
+	daif = intr_disable();
+	vmm_call_hyp((void *)vtophys(vmm_cleanup), vtophys(hyp_stub_vectors));
+	intr_restore(daif);
+
+	arm64_set_active_vcpu(NULL);
+}
+
+static int
+arm_init(int ipinum)
+{
+	size_t hyp_code_len;
+	uint64_t ich_vtr_el2;
+	uint64_t cnthctl_el2;
+	int cpu;
+	register_t daif;
+
+	if (!virt_enabled()) {
+		printf("arm_init: Processor doesn't have support for virtualization.\n");
+		return (ENXIO);
+	}
+
+	mtx_init(&vmid_generation_mtx, "vmid_generation_mtx", NULL, MTX_DEF);
+
+	/* Create the mappings for the hypervisor translation table. */
+	hyp_pmap = malloc(sizeof(*hyp_pmap), M_HYP, M_WAITOK | M_ZERO);
+	hypmap_init(hyp_pmap, PM_STAGE1);
+	hyp_code_len = (size_t)hyp_code_end - (size_t)hyp_code_start;
+	hypmap_map(hyp_pmap, (vm_offset_t)hyp_code_start, hyp_code_len, VM_PROT_EXECUTE);
+
+	/* We need an identity mapping for when we activate the MMU */
+	hypmap_map_identity(hyp_pmap, (vm_offset_t)hyp_code_start, hyp_code_len,
+	    VM_PROT_EXECUTE);
+
+	/* Create a per-CPU hypervisor stack */
+	CPU_FOREACH(cpu) {
+		stack[cpu] = malloc(PAGE_SIZE, M_HYP, M_WAITOK | M_ZERO);
+		hypmap_map(hyp_pmap, (vm_offset_t)stack[cpu], PAGE_SIZE,
+		    VM_PROT_READ | VM_PROT_WRITE);
+	}
+
+
+	smp_rendezvous(NULL, arm_init_vectors, NULL, NULL);
+
+	daif = intr_disable();
+
+	ich_vtr_el2 = vmm_call_hyp((void *)ktohyp(vmm_read_ich_vtr_el2));
+	vgic_v3_init(ich_vtr_el2);
+
+	cnthctl_el2 = vmm_call_hyp((void *)ktohyp(vmm_read_cnthctl_el2));
+	vtimer_init(cnthctl_el2);
+
+	intr_restore(daif);
+
+	return 0;
+}
+
+static int
+arm_cleanup(void)
+{
+	int cpu;
+
+	smp_rendezvous(NULL, arm_cleanup_vectors, NULL, NULL);
+
+	vtimer_cleanup();
+
+	hypmap_cleanup(hyp_pmap);
+	free(hyp_pmap, M_HYP);
+	for (cpu = 0; cpu < nitems(stack); cpu++)
+		free(stack[cpu], M_HYP);
+
+	mtx_destroy(&vmid_generation_mtx);
+
+	return (0);
+}
+
+static void *
+arm_vminit(struct vm *vm)
+{
+	struct hyp *hyp;
+	struct hypctx *hypctx;
+	bool last_vcpu;
+	int i;
+
+	hyp = malloc(sizeof(struct hyp), M_HYP, M_WAITOK | M_ZERO);
+	hyp->vm = vm;
+	hyp->vgic_attached = false;
+
+	hyp->stage2_map = malloc(sizeof(*hyp->stage2_map),
+	    M_HYP, M_WAITOK | M_ZERO);
+	hypmap_init(hyp->stage2_map, PM_STAGE2);
+	arm64_set_vttbr(hyp);
+
+	for (i = 0; i < VM_MAXCPU; i++) {
+		hypctx = &hyp->ctx[i];
+		hypctx->vcpu = i;
+		hypctx->hyp = hyp;
+
+		reset_vm_el01_regs(hypctx);
+		reset_vm_el2_regs(hypctx);
+	}
+
+	vtimer_vminit(hyp);
+	vgic_v3_vminit(hyp);
+	for (i = 0; i < VM_MAXCPU; i++) {
+		hypctx = &hyp->ctx[i];
+		vtimer_cpuinit(hypctx);
+		last_vcpu = (i == VM_MAXCPU - 1);
+		vgic_v3_cpuinit(hypctx, last_vcpu);
+	}
+
+	hypmap_map(hyp_pmap, (vm_offset_t)hyp, sizeof(struct hyp),
+	    VM_PROT_READ | VM_PROT_WRITE);
+
+	return (hyp);
+}
+
+static enum vm_reg_name
+get_vm_reg_name(uint32_t reg_nr, uint32_t mode __attribute__((unused)))
+{
+	switch(reg_nr) {
+		case 0:
+			return VM_REG_GUEST_X0;
+		case 1:
+			return VM_REG_GUEST_X1;
+		case 2:
+			return VM_REG_GUEST_X2;
+		case 3:
+			return VM_REG_GUEST_X3;
+		case 4:
+			return VM_REG_GUEST_X4;
+		case 5:
+			return VM_REG_GUEST_X5;
+		case 6:
+			return VM_REG_GUEST_X6;
+		case 7:
+			return VM_REG_GUEST_X7;
+		case 8:
+			return VM_REG_GUEST_X8;
+		case 9:
+			return VM_REG_GUEST_X9;
+		case 10:
+			return VM_REG_GUEST_X10;
+		case 11:
+			return VM_REG_GUEST_X11;
+		case 12:
+			return VM_REG_GUEST_X12;
+		case 13:
+			return VM_REG_GUEST_X13;
+		case 14:
+			return VM_REG_GUEST_X14;
+		case 15:
+			return VM_REG_GUEST_X15;
+		case 16:
+			return VM_REG_GUEST_X16;
+		case 17:
+			return VM_REG_GUEST_X17;
+		case 18:
+			return VM_REG_GUEST_X18;
+		case 19:
+			return VM_REG_GUEST_X19;
+		case 20:
+			return VM_REG_GUEST_X20;
+		case 21:
+			return VM_REG_GUEST_X21;
+		case 22:
+			return VM_REG_GUEST_X22;
+		case 23:
+			return VM_REG_GUEST_X23;
+		case 24:
+			return VM_REG_GUEST_X24;
+		case 25:
+			return VM_REG_GUEST_X25;
+		case 26:
+			return VM_REG_GUEST_X26;
+		case 27:
+			return VM_REG_GUEST_X27;
+		case 28:
+			return VM_REG_GUEST_X28;
+		case 29:
+			return VM_REG_GUEST_X29;
+		case 30:
+			return VM_REG_GUEST_LR;
+		case 31:
+			return VM_REG_GUEST_SP;
+		case 32:
+			return VM_REG_GUEST_ELR;
+		case 33:
+			return VM_REG_GUEST_SPSR;
+		case 34:
+			return VM_REG_ELR_EL2;
+		default:
+			break;
+	}
+
+	return (VM_REG_LAST);
+}
+
+static inline void
+arm64_print_hyp_regs(struct vm_exit *vme)
+{
+	printf("esr_el2:   0x%08x\n", vme->u.hyp.esr_el2);
+	printf("far_el2:   0x%016lx\n", vme->u.hyp.far_el2);
+	printf("hpfar_el2: 0x%016lx\n", vme->u.hyp.hpfar_el2);
+}
+
+static void
+arm64_gen_inst_emul_data(uint32_t esr_iss, struct vm_exit *vme_ret)
+{
+	struct vie *vie;
+	uint32_t esr_sas, reg_num;
+	uint64_t page_off;
+
+	/*
+	 * Get bits [47:12] of the IPA from HPFAR_EL2.
+	 * At this point the 'u.hyp' member will be replaced by 'u.inst_emul'.
+	 */
+	vme_ret->u.inst_emul.gpa = \
+	    (vme_ret->u.hyp.hpfar_el2) >> HPFAR_EL2_FIPA_SHIFT;
+	/* The IPA is the base address of a 4KB page, make bits [11:0] zero. */
+	vme_ret->u.inst_emul.gpa = (vme_ret->u.inst_emul.gpa) << PAGE_SHIFT;
+	/* Bits [11:0] are the same as bits [11:0] from the virtual address. */
+	page_off = FAR_EL2_PAGE_OFFSET(vme_ret->u.hyp.far_el2);
+	vme_ret->u.inst_emul.gpa = vme_ret->u.inst_emul.gpa + page_off;
+
+	esr_sas = (esr_iss & ISS_DATA_SAS_MASK) >> ISS_DATA_SAS_SHIFT;
+	reg_num = (esr_iss & ISS_DATA_SRT_MASK) >> ISS_DATA_SRT_SHIFT;
+
+	vie = &vme_ret->u.inst_emul.vie;
+	vie->access_size = 1 << esr_sas;
+	vie->sign_extend = (esr_iss & ISS_DATA_SSE) ? 1 : 0;
+	vie->dir = (esr_iss & ISS_DATA_WnR) ? VM_DIR_WRITE : VM_DIR_READ;
+	vie->reg = get_vm_reg_name(reg_num, UNUSED);
+}
+
+static void
+arm64_gen_reg_emul_data(uint32_t esr_iss, struct vm_exit *vme_ret)
+{
+	uint32_t reg_num;
+	struct vre *vre;
+
+	/* u.hyp member will be replaced by u.reg_emul */
+	vre = &vme_ret->u.reg_emul.vre;
+
+	vre->inst_syndrome = esr_iss;
+	/* ARMv8 Architecture Manual, p. D7-2273: 1 means read */
+	vre->dir = (esr_iss & ISS_MSR_DIR) ? VM_DIR_READ : VM_DIR_WRITE;
+	reg_num = ISS_MSR_Rt(esr_iss);
+	vre->reg = get_vm_reg_name(reg_num, UNUSED);
+}
+
+//static bool print_stuff = false;
+
+static int
+handle_el1_sync_excp(struct hyp *hyp, int vcpu, struct vm_exit *vme_ret)
+{
+	uint32_t esr_ec, esr_iss;
+
+	esr_ec = ESR_ELx_EXCEPTION(vme_ret->u.hyp.esr_el2);
+	esr_iss = vme_ret->u.hyp.esr_el2 & ESR_ELx_ISS_MASK;
+
+	switch(esr_ec) {
+	case EXCP_UNKNOWN:
+		eprintf("Unknown exception from guest\n");
+		arm64_print_hyp_regs(vme_ret);
+		vme_ret->exitcode = VM_EXITCODE_HYP;
+		break;
+	case EXCP_HVC:
+		vme_ret->exitcode = VM_EXITCODE_HVC;
+		break;
+	case EXCP_MSR:
+		arm64_gen_reg_emul_data(esr_iss, vme_ret);
+		vme_ret->exitcode = VM_EXITCODE_REG_EMUL;
+		break;
+
+	case EXCP_DATA_ABORT_L:
+		/* Check if instruction syndrome is valid */
+		if (!(esr_iss & ISS_DATA_ISV)) {
+			eprintf("Data abort with invalid instruction syndrome\n");
+			arm64_print_hyp_regs(vme_ret);
+			vme_ret->exitcode = VM_EXITCODE_HYP;
+			break;
+		}
+
+		/*
+		 * Check if the data abort was caused by a translation fault.
+		 * Any other type of data fault will be treated as an error.
+		 */
+		if (!(ISS_DATA_DFSC_TF(esr_iss))) {
+			eprintf("Data abort not on a stage 2 translation\n");
+			arm64_print_hyp_regs(vme_ret);
+			vme_ret->exitcode = VM_EXITCODE_HYP;
+			break;
+		}
+
+		arm64_gen_inst_emul_data(esr_iss, vme_ret);
+		vme_ret->exitcode = VM_EXITCODE_INST_EMUL;
+		break;
+
+	default:
+		eprintf("Unsupported synchronous exception from guest: 0x%x\n",
+		    esr_ec);
+		arm64_print_hyp_regs(vme_ret);
+		vme_ret->exitcode = VM_EXITCODE_HYP;
+		break;
+	}
+
+	/* We don't don't do any instruction emulation here */
+	return (UNHANDLED);
+}
+
+static int
+arm64_handle_world_switch(struct hyp *hyp, int vcpu, struct vm_exit *vme)
+{
+	int excp_type;
+	int handled;
+
+	excp_type = vme->u.hyp.exception_nr;
+	switch (excp_type) {
+	case EXCP_TYPE_EL1_SYNC:
+		/* The exit code will be set by handle_el1_sync_excp(). */
+		handled = handle_el1_sync_excp(hyp, vcpu, vme);
+		break;
+
+	case EXCP_TYPE_EL1_IRQ:
+	case EXCP_TYPE_EL1_FIQ:
+		/* The host kernel will handle IRQs and FIQs. */
+		vme->exitcode = VM_EXITCODE_BOGUS;
+		handled = UNHANDLED;
+		break;
+
+	case EXCP_TYPE_EL1_ERROR:
+	case EXCP_TYPE_EL2_SYNC:
+	case EXCP_TYPE_EL2_IRQ:
+	case EXCP_TYPE_EL2_FIQ:
+	case EXCP_TYPE_EL2_ERROR:
+		eprintf("Unhandled exception type: %s\n", __STRING(excp_type));
+		vme->exitcode = VM_EXITCODE_BOGUS;
+		handled = UNHANDLED;
+		break;
+
+	default:
+		eprintf("Unknown exception type: %d\n", excp_type);
+		vme->exitcode = VM_EXITCODE_BOGUS;
+		handled = UNHANDLED;
+		break;
+	}
+
+	return (handled);
+}
+
+static int
+arm_vmrun(void *arg, int vcpu, register_t pc, pmap_t pmap,
+	void *rendezvous_cookie, void *suspend_cookie)
+{
+	uint64_t excp_type;
+	int handled;
+	register_t daif;
+	struct hyp *hyp;
+	struct hypctx *hypctx;
+	struct vm *vm;
+	struct vm_exit *vme;
+
+	hyp = (struct hyp *)arg;
+	vm = hyp->vm;
+	vme = vm_exitinfo(vm, vcpu);
+
+	hypctx = &hyp->ctx[vcpu];
+	hypctx->elr_el2 = (uint64_t)pc;
+
+	for (;;) {
+		daif = intr_disable();
+		/*
+		 * TODO: What happens if a timer interrupt is asserted exactly
+		 * here, but for the previous VM?
+		 */
+		arm64_set_active_vcpu(hypctx);
+		vgic_v3_sync_hwstate(hypctx);
+		excp_type = vmm_call_hyp((void *)ktohyp(vmm_enter_guest),
+		    ktohyp(hypctx));
+		intr_restore(daif);
+
+		if (excp_type == EXCP_TYPE_MAINT_IRQ)
+			continue;
+
+		vme->pc = hypctx->elr_el2;
+		vme->inst_length = INSN_SIZE;
+		vme->u.hyp.exception_nr = excp_type;
+		vme->u.hyp.esr_el2 = hypctx->exit_info.esr_el2;
+		vme->u.hyp.far_el2 = hypctx->exit_info.far_el2;
+		vme->u.hyp.hpfar_el2 = hypctx->exit_info.hpfar_el2;
+
+		handled = arm64_handle_world_switch(hyp, vcpu, vme);
+		if (handled == UNHANDLED)
+			/* Exit loop to emulate instruction. */
+			break;
+		else
+			/* Resume guest execution from the next instruction. */
+			hypctx->elr_el2 += vme->inst_length;
+	}
+
+	return (0);
+}
+
+static void
+arm_deactivate_pcpu(void *arg)
+{
+	struct hyp *hyp = arg;
+	int maxcpu;
+	int i;
+
+	maxcpu = vm_get_maxcpus(hyp->vm);
+	for (i = 0; i < maxcpu; i++)
+		if (arm64_get_active_vcpu() == &hyp->ctx[i])
+			arm64_set_active_vcpu(NULL);
+}
+
+static void
+arm_vmcleanup(void *arg)
+{
+	struct hyp *hyp = arg;
+
+	smp_rendezvous(NULL, arm_deactivate_pcpu, NULL, hyp);
+
+	vtimer_vmcleanup(arg);
+	vgic_v3_detach_from_vm(arg);
+
+	/* Unmap the VM hyp struct from the hyp mode translation table */
+	hypmap_map(hyp_pmap, (vm_offset_t)hyp, sizeof(struct hyp),
+	    VM_PROT_NONE);
+	hypmap_cleanup(hyp->stage2_map);
+	free(hyp->stage2_map, M_HYP);
+	free(hyp, M_HYP);
+}
+
+/*
+ * Return register value. Registers have different sizes and an explicit cast
+ * must be made to ensure proper conversion.
+ */
+static void *
+hypctx_regptr(struct hypctx *hypctx, int reg)
+{
+	switch (reg) {
+	case VM_REG_GUEST_X0:
+		return (&hypctx->regs.x[0]);
+	case VM_REG_GUEST_X1:
+		return (&hypctx->regs.x[1]);
+	case VM_REG_GUEST_X2:
+		return (&hypctx->regs.x[2]);
+	case VM_REG_GUEST_X3:
+		return (&hypctx->regs.x[3]);
+	case VM_REG_GUEST_X4:
+		return (&hypctx->regs.x[4]);
+	case VM_REG_GUEST_X5:
+		return (&hypctx->regs.x[5]);
+	case VM_REG_GUEST_X6:
+		return (&hypctx->regs.x[6]);
+	case VM_REG_GUEST_X7:
+		return (&hypctx->regs.x[7]);
+	case VM_REG_GUEST_X8:
+		return (&hypctx->regs.x[8]);
+	case VM_REG_GUEST_X9:
+		return (&hypctx->regs.x[9]);
+	case VM_REG_GUEST_X10:
+		return (&hypctx->regs.x[10]);
+	case VM_REG_GUEST_X11:
+		return (&hypctx->regs.x[11]);
+	case VM_REG_GUEST_X12:
+		return (&hypctx->regs.x[12]);
+	case VM_REG_GUEST_X13:
+		return (&hypctx->regs.x[13]);
+	case VM_REG_GUEST_X14:
+		return (&hypctx->regs.x[14]);
+	case VM_REG_GUEST_X15:
+		return (&hypctx->regs.x[15]);
+	case VM_REG_GUEST_X16:
+		return (&hypctx->regs.x[16]);
+	case VM_REG_GUEST_X17:
+		return (&hypctx->regs.x[17]);
+	case VM_REG_GUEST_X18:
+		return (&hypctx->regs.x[18]);
+	case VM_REG_GUEST_X19:
+		return (&hypctx->regs.x[19]);
+	case VM_REG_GUEST_X20:
+		return (&hypctx->regs.x[20]);
+	case VM_REG_GUEST_X21:
+		return (&hypctx->regs.x[21]);
+	case VM_REG_GUEST_X22:
+		return (&hypctx->regs.x[22]);
+	case VM_REG_GUEST_X23:
+		return (&hypctx->regs.x[23]);
+	case VM_REG_GUEST_X24:
+		return (&hypctx->regs.x[24]);
+	case VM_REG_GUEST_X25:
+		return (&hypctx->regs.x[25]);
+	case VM_REG_GUEST_X26:
+		return (&hypctx->regs.x[26]);
+	case VM_REG_GUEST_X27:
+		return (&hypctx->regs.x[27]);
+	case VM_REG_GUEST_X28:
+		return (&hypctx->regs.x[28]);
+	case VM_REG_GUEST_X29:
+		return (&hypctx->regs.x[29]);
+	case VM_REG_GUEST_LR:
+		return (&hypctx->regs.lr);
+	case VM_REG_GUEST_SP:
+		return (&hypctx->regs.sp);
+	case VM_REG_GUEST_ELR:
+		return (&hypctx->regs.elr);
+	case VM_REG_GUEST_SPSR:
+		return (&hypctx->regs.spsr);
+	case VM_REG_ELR_EL2:
+		return (&hypctx->elr_el2);
+	default:
+		break;
+	}
+	return (NULL);
+}
+
+static int
+arm_getreg(void *arg, int vcpu, int reg, uint64_t *retval)
+{
+	void *regp;
+	int running, hostcpu;
+	struct hyp *hyp = arg;
+
+	running = vcpu_is_running(hyp->vm, vcpu, &hostcpu);
+	if (running && hostcpu != curcpu)
+		panic("arm_getreg: %s%d is running", vm_name(hyp->vm), vcpu);
+
+	if ((regp = hypctx_regptr(&hyp->ctx[vcpu], reg)) != NULL) {
+		if (reg == VM_REG_GUEST_SPSR)
+			*retval = *(uint32_t *)regp;
+		else
+			*retval = *(uint64_t *)regp;
+		return (0);
+	} else {
+		return (EINVAL);
+	}
+}
+
+static int
+arm_setreg(void *arg, int vcpu, int reg, uint64_t val)
+{
+	void *regp;
+	struct hyp *hyp = arg;
+	int running, hostcpu;
+
+	running = vcpu_is_running(hyp->vm, vcpu, &hostcpu);
+	if (running && hostcpu != curcpu)
+		panic("hyp_setreg: %s%d is running", vm_name(hyp->vm), vcpu);
+
+	if ((regp = hypctx_regptr(&hyp->ctx[vcpu], reg)) != NULL) {
+		if (reg == VM_REG_GUEST_SPSR)
+			*(uint32_t *)regp = (uint32_t)val;
+		else
+			*(uint64_t *)regp = val;
+		return (0);
+	} else {
+		return (EINVAL);
+	}
+}
+
+static
+void arm_restore(void)
+{
+	;
+}
+
+struct vmm_ops vmm_ops_arm = {
+	arm_init,
+	arm_cleanup,
+	arm_restore,
+	arm_vminit,
+	arm_vmrun,
+	arm_vmcleanup,
+	hypmap_set,
+	hypmap_get,
+	arm_getreg,
+	arm_setreg,
+	NULL, 		/* vmi_get_cap_t */
+	NULL 		/* vmi_set_cap_t */
+};
Index: sys/arm64/vmm/hyp.h
===================================================================
--- /dev/null
+++ sys/arm64/vmm/hyp.h
@@ -0,0 +1,100 @@
+/*
+ * Copyright (C) 2015-2021 Mihai Carabas <mihai.carabas@gmail.com>
+ * Copyright (C) 2017-2019 Alexandru Elisei <alexandru.elisei@gmail.com>
+ * Copyright (C) 2017-2021 Darius Mihai <darius.mihai.m@gmail.com>
+ * Copyright (C) 2019-2021 Andrei Martin <andrei.cos.martin@gmail.com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef _VMM_HYP_H_
+#define	_VMM_HYP_H_
+
+/*
+ * The translation tables for the hypervisor mode will hold mappings for kernel
+ * virtual addresses and an identity mapping (VA == PA) necessary when
+ * enabling/disabling the MMU.
+ *
+ * When in EL2 exception level the translation table base register is TTBR0_EL2
+ * and the virtual addresses generated by the CPU must be at the bottom of the
+ * memory, with the first 16 bits all set to zero:
+ *
+ *                  0x0000ffffffffffff  End hyp address space
+ *                  0x0000000000000000  Start of hyp address space
+ *
+ * To run code in hyp mode we need to convert kernel virtual addresses to
+ * addreses that fit into this address space.
+ *
+ * The kernel virtual address range is:
+ *
+ *                  0xffff007fffffffff  End of KVA
+ *                  0xffff000000000000  Kernel base address & start of KVA
+ *
+ * (see /sys/arm64/include/vmparam.h).
+ *
+ * We could convert the kernel virtual addresses to valid EL2 addresses by
+ * setting the first 16 bits to zero and thus mapping the kernel addresses in
+ * the bottom half of the EL2 address space, but then they might clash with the
+ * identity mapping addresses. Instead we map the kernel addresses in the upper
+ * half of the EL2 address space.
+ *
+ * The hypervisor address space will look like this:
+ *
+ *                  0x0000807fffffffff  End of KVA mapping
+ *                  0x0000800000000000  Start of KVA mapping
+ *
+ *                  0x00007fffffffffff  End of identity mapping
+ *                  0x0000000000000000  Start of identity mapping
+ *
+ * With the scheme we have 47 bits at our disposable for the identity map and
+ * another 47 bits for the kernel virtual addresses. For a maximum physical
+ * memory size of 128TB we are guaranteed to not have any clashes between
+ * addresses.
+ */
+#define	HYP_VM_MIN_ADDRESS	0x0000000000000000
+#define	HYP_VM_MAX_ADDRESS	0x0000ffffffffffff
+
+#define	HYP_KVA_OFFSET		0x0000800000000000
+#define	HYP_KVA_MASK		0x0000ffffffffffff
+
+/*
+ * When taking asynchronous exceptions, or interrupts, with the exception of the
+ * SError interrupt, the exception syndrome register is not updated with the
+ * exception code. We need to differentiate between the different exception
+ * types taken to EL2.
+ */
+#define EXCP_TYPE_EL1_SYNC	0
+#define EXCP_TYPE_EL1_IRQ	1
+#define EXCP_TYPE_EL1_FIQ	2
+#define EXCP_TYPE_EL1_ERROR	3
+
+#define EXCP_TYPE_EL2_SYNC	4
+#define EXCP_TYPE_EL2_IRQ	5
+#define EXCP_TYPE_EL2_FIQ	6
+#define EXCP_TYPE_EL2_ERROR	7
+
+#define	EXCP_TYPE_MAINT_IRQ	8
+
+#define	HYP_GET_VECTOR_TABLE	-1
+
+#endif /* !_VMM_HYP_H_ */
Index: sys/arm64/vmm/hyp.S
===================================================================
--- /dev/null
+++ sys/arm64/vmm/hyp.S
@@ -0,0 +1,387 @@
+/*
+ * Copyright (C) 2015-2021 Mihai Carabas <mihai.carabas@gmail.com>
+ * Copyright (C) 2017-2019 Alexandru Elisei <alexandru.elisei@gmail.com>
+ * Copyright (C) 2017-2021 Darius Mihai <darius.mihai.m@gmail.com>
+ * Copyright (C) 2019-2021 Andrei Martin <andrei.cos.martin@gmail.com>
+ * All rights reserved.
+ *
+ * This software was developed by Alexandru Elisei under sponsorship
+ * from the FreeBSD Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+
+#include <sys/syscall.h>
+#include <machine/armreg.h>
+#include <machine/asm.h>
+#include <machine/hypervisor.h>
+#include <arm/arm/gic_common.h>
+
+#include "hyp_macros.h"
+#include "hyp.h"
+#include "hyp_assym.h"
+
+	.text
+
+	.globl	hyp_code_start
+	.globl	hyp_code_end
+
+	.align 12
+hyp_code_start:
+
+
+ENTRY(vmm_call_hyp)
+	hvc	#0
+	ret
+END(vmm_call_hyp)
+
+
+.macro vempty
+	.align 7
+	1: b 	1b
+.endm
+
+.macro vector name
+	.align 7
+	b 	handle_\name
+.endm
+
+	.align 11
+	.globl	hyp_init_vectors
+hyp_init_vectors:
+	vempty		/* Synchronous EL2t */
+	vempty		/* IRQ EL2t */
+	vempty		/* FIQ EL2t */
+	vempty		/* Error EL2t */
+
+	vempty		/* Synchronous EL2h */
+	vempty		/* IRQ EL2h */
+	vempty		/* FIQ EL2h */
+	vempty		/* Error EL2h */
+
+	vector hyp_init	/* Synchronous 64-bit EL1 */
+	vempty		/* IRQ 64-bit EL1 */
+	vempty		/* FIQ 64-bit EL1 */
+	vempty		/* Error 64-bit EL1 */
+
+	vempty		/* Synchronous 32-bit EL1 */
+	vempty		/* IRQ 32-bit EL1 */
+	vempty		/* FIQ 32-bit EL1 */
+	vempty		/* Error 32-bit EL1 */
+
+
+/*
+ * Initialize the hypervisor mode with a new exception vector table, translation
+ * table and stack.
+ *
+ * Expecting:
+ * x0 - the hypervisor exception vectors
+ * x1 - translation tables physical address
+ * x2 - stack top virtual address
+ * x3 - TCR_EL2 value
+ * x4 - SCTLR_EL2 value
+ * x5 - VTCR_EL2 value
+ */
+ENTRY(handle_hyp_init)
+	/* Install the new exception vectors */
+	msr	vbar_el2, x0
+	/* Set the stack top address */
+	mov	sp, x2
+	/* Use the host VTTBR_EL2 to tell the host and the guests apart */
+	mov	x9, #VTTBR_HOST
+	msr	vttbr_el2, x9
+	/* Load the base address for the translation tables */
+	msr	ttbr0_el2, x1
+	/* Invalidate the TLB */
+	tlbi	alle2
+	/* Use the same memory attributes as EL1 */
+	mrs	x9, mair_el1
+	msr	mair_el2, x9
+	/* Configure address translation */
+	msr	tcr_el2, x3
+	isb
+	/* Set the system control register for EL2 */
+	msr	sctlr_el2, x4
+	/* Set the Stage 2 translation control register */
+	msr	vtcr_el2, x5
+	/* Return success */
+	mov	x0, #0
+	/* MMU is up and running */
+	eret
+END(handle_hyp_init)
+
+
+	.align 11
+	.globl	hyp_vectors
+hyp_vectors:
+	vempty			/* Synchronous EL2t */
+	vempty			/* IRQ EL2t */
+	vempty			/* FIQ EL2t */
+	vempty			/* Error EL2t */
+
+	vector el2_el2h_sync	/* Synchronous EL2h */
+	vector el2_el2h_irq	/* IRQ EL2h */
+	vector el2_el2h_fiq	/* FIQ EL2h */
+	vector el2_el2h_error	/* Error EL2h */
+
+	vector el2_el1_sync64	/* Synchronous 64-bit EL1 */
+	vector el2_el1_irq64	/* IRQ 64-bit EL1 */
+	vector el2_el1_fiq64	/* FIQ 64-bit EL1 */
+	vector el2_el1_error64	/* Error 64-bit EL1 */
+
+	vempty			/* Synchronous 32-bit EL1 */
+	vempty			/* IRQ 32-bit EL1 */
+	vempty			/* FIQ 32-bit EL1 */
+	vempty			/* Error 32-bit EL1 */
+
+
+.macro do_world_switch_to_host
+	.align 	7
+	SAVE_GUEST_REGS()
+#ifdef VFP
+	/*
+	 * Saving the guest VFP registers needs to come after saving the rest of
+	 * the registers because the process dirties the regular registers.
+	 */
+	SAVE_GUEST_VFP_REGS()
+	LOAD_HOST_VFP_REGS()
+#endif
+	LOAD_HOST_REGS()
+	SAVE_EXIT_INFO()
+
+	/* Restore host VTTBR */
+	mov	x9, #VTTBR_HOST
+	msr	vttbr_el2, x9
+.endm
+
+
+.macro handle_el2_excp type
+	.align 	7
+	/* Save registers before modifying so we can restore them */
+	str	x9, [sp, #-16]!
+
+	/* Test if the exception happened when the host was running */
+	mrs	x9, vttbr_el2
+	cmp	x9, #VTTBR_HOST
+	beq	1f
+
+	/* We got the exception while the guest was running */
+	ldr	x9, [sp], #16
+	do_world_switch_to_host
+	b	2f
+1:
+	/* We got the exception while the host was running */
+	ldr	x9, [sp], #16
+2:
+	mov	x0, \type
+	eret
+.endm
+
+
+ENTRY(handle_el2_el2h_sync)
+	handle_el2_excp #EXCP_TYPE_EL2_SYNC
+END(handle_el2_el2h_sync)
+
+ENTRY(handle_el2_el2h_irq)
+	handle_el2_excp #EXCP_TYPE_EL2_IRQ
+END(handle_el2_el2h_sync)
+
+ENTRY(handle_el2_el2h_fiq)
+	handle_el2_excp #EXCP_TYPE_EL2_FIQ
+END(handle_el2_el2h_sync)
+
+ENTRY(handle_el2_el2h_error)
+	handle_el2_excp #EXCP_TYPE_EL2_ERROR
+END(handle_el2_el2h_sync)
+
+
+ENTRY(handle_el2_el1_sync64)
+	/* Save registers before modifying so we can restore them */
+	str	x9, [sp, #-16]!
+
+	/* Check for host hypervisor call */
+	mrs	x9, vttbr_el2
+	cmp	x9, #VTTBR_HOST
+	beq	1f
+
+	/* Restore register */
+	ldr	x9, [sp], #16
+
+	/* Guest exception taken to EL2 */
+	do_world_switch_to_host
+	mov	x0, #EXCP_TYPE_EL1_SYNC
+	b	exit
+
+1:
+	/* Restore register */
+	ldr	x9, [sp], #16
+
+	cmp 	x0, #HYP_GET_VECTOR_TABLE
+	beq 	2f
+	b	call_function
+2:
+	/* Return the vector table base address */
+	mrs	x0, vbar_el2
+exit:
+	eret
+END(handle_el2_el1_sync64)
+
+
+/*
+ * Call a function in EL2 context
+ *
+ * Expecting:
+ * x0 - function virtual address
+ * x1-x7 - function parameters
+ */
+ENTRY(call_function)
+	/* Save the function address before shuffling parameters */
+	mov	x9, x0
+
+	/* Shuffle function parameters */
+	mov 	x0, x1
+	mov	x1, x2
+	mov 	x2, x3
+	mov	x3, x4
+	mov	x4, x5
+	mov	x5, x6
+	mov 	x6, x7
+
+	/* Call function */
+	br 	x9
+END(call_function)
+
+
+/*
+ * We only trap IRQ, FIQ and SError exceptions when a guest is running. Do a
+ * world switch to host to handle these exceptions.
+ */
+
+
+ENTRY(handle_el2_el1_irq64)
+	do_world_switch_to_host
+	str	x9, [sp, #-16]!
+	mrs	x9, ich_misr_el2
+	cmp	x9, xzr
+	beq	1f
+	mov	x0, #EXCP_TYPE_MAINT_IRQ
+	b	2f
+1:
+	mov	x0, #EXCP_TYPE_EL1_IRQ
+2:
+	ldr	x9, [sp], #16
+	eret
+END(handle_el2_el1_irq)
+
+ENTRY(handle_el2_el1_fiq64)
+	do_world_switch_to_host
+	mov	x0, #EXCP_TYPE_EL1_FIQ
+	eret
+END(handle_el2_el1_fiq64)
+
+ENTRY(handle_el2_el1_error64)
+	do_world_switch_to_host
+	mov	x0, #EXCP_TYPE_EL1_ERROR
+	eret
+END(handle_el2_el1_error64)
+
+
+/*
+ * Usage:
+ * void vmm_enter_guest(struct hypctx *hypctx)
+ *
+ * Expecting:
+ * x0 - hypctx address
+ */
+ENTRY(vmm_enter_guest)
+	/* Save hypctx address */
+	msr	tpidr_el2, x0
+
+	SAVE_HOST_REGS()
+#ifdef VFP
+	SAVE_HOST_VFP_REGS()
+	/*
+	 * Loading the guest VFP registers needs to come before loading the
+	 * rest of the registers because this process dirties the regular
+	 * registers.
+	 */
+	LOAD_GUEST_VFP_REGS()
+#endif
+	LOAD_GUEST_REGS()
+
+	/* Enter guest */
+	eret
+END(vmm_enter_guest)
+
+
+/*
+ * Usage:
+ * void vmm_cleanup(void *hyp_stub_vectors)
+ *
+ * Expecting:
+ * x0 - physical address of hyp_stub_vectors
+ */
+ENTRY(vmm_cleanup)
+	/* Restore the stub vectors */
+	msr	vbar_el2, x0
+
+	/* Disable the MMU */
+	dsb	sy
+	mrs	x2, sctlr_el2
+	bic	x2, x2, #SCTLR_EL2_M
+	msr	sctlr_el2, x2
+
+	eret
+END(vmm_cleanup)
+
+.macro read_reg name
+	mrs	x0, \name
+.endm
+
+/*
+ * Return the value of the ICH_VTR_EL2 register.
+ */
+ENTRY(vmm_read_ich_vtr_el2)
+	read_reg ich_vtr_el2
+	eret
+END(vmm_read_ich_vtr_el2)
+
+/*
+ * Return the value of the CNTHCTL_EL2 register.
+ */
+ENTRY(vmm_read_cnthctl_el2)
+	read_reg cnthctl_el2
+	eret
+END(vmm_read_cnthctl_el2)
+
+/*
+ * Return the value of the TCR_EL2 register.
+ */
+ENTRY(vmm_read_tcr_el2)
+	read_reg tcr_el2
+	eret
+END(vmm_read_tcr_el2)
+
+
+
+hyp_code_end:
Index: sys/arm64/vmm/hyp_genassym.c
===================================================================
--- /dev/null
+++ sys/arm64/vmm/hyp_genassym.c
@@ -0,0 +1,170 @@
+/*
+ * Copyright (C) 2015-2021 Mihai Carabas <mihai.carabas@gmail.com>
+ * Copyright (C) 2017-2019 Alexandru Elisei <alexandru.elisei@gmail.com>
+ * Copyright (C) 2017-2021 Darius Mihai <darius.mihai.m@gmail.com>
+ * Copyright (C) 2019-2021 Andrei Martin <andrei.cos.martin@gmail.com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/assym.h>
+#include <vm/vm.h>
+#include <vm/pmap.h>
+#include <machine/vmm.h>
+
+#include "arm64.h"
+
+ASSYM(HYPCTX_REGS_X0, offsetof(struct hypctx, regs) + 0 * 8);
+ASSYM(HYPCTX_REGS_X1, offsetof(struct hypctx, regs) + 1 * 8);
+ASSYM(HYPCTX_REGS_X2, offsetof(struct hypctx, regs) + 2 * 8);
+ASSYM(HYPCTX_REGS_X3, offsetof(struct hypctx, regs) + 3 * 8);
+ASSYM(HYPCTX_REGS_X4, offsetof(struct hypctx, regs) + 4 * 8);
+ASSYM(HYPCTX_REGS_X5, offsetof(struct hypctx, regs) + 5 * 8);
+ASSYM(HYPCTX_REGS_X6, offsetof(struct hypctx, regs) + 6 * 8);
+ASSYM(HYPCTX_REGS_X7, offsetof(struct hypctx, regs) + 7 * 8);
+ASSYM(HYPCTX_REGS_X8, offsetof(struct hypctx, regs) + 8 * 8);
+ASSYM(HYPCTX_REGS_X9, offsetof(struct hypctx, regs) + 9 * 8);
+ASSYM(HYPCTX_REGS_X10, offsetof(struct hypctx, regs) + 10 * 8);
+ASSYM(HYPCTX_REGS_X11, offsetof(struct hypctx, regs) + 11 * 8);
+ASSYM(HYPCTX_REGS_X12, offsetof(struct hypctx, regs) + 12 * 8);
+ASSYM(HYPCTX_REGS_X13, offsetof(struct hypctx, regs) + 13 * 8);
+ASSYM(HYPCTX_REGS_X14, offsetof(struct hypctx, regs) + 14 * 8);
+ASSYM(HYPCTX_REGS_X15, offsetof(struct hypctx, regs) + 15 * 8);
+ASSYM(HYPCTX_REGS_X16, offsetof(struct hypctx, regs) + 16 * 8);
+ASSYM(HYPCTX_REGS_X17, offsetof(struct hypctx, regs) + 17 * 8);
+ASSYM(HYPCTX_REGS_X18, offsetof(struct hypctx, regs) + 18 * 8);
+ASSYM(HYPCTX_REGS_X19, offsetof(struct hypctx, regs) + 19 * 8);
+ASSYM(HYPCTX_REGS_X20, offsetof(struct hypctx, regs) + 20 * 8);
+ASSYM(HYPCTX_REGS_X21, offsetof(struct hypctx, regs) + 21 * 8);
+ASSYM(HYPCTX_REGS_X22, offsetof(struct hypctx, regs) + 22 * 8);
+ASSYM(HYPCTX_REGS_X23, offsetof(struct hypctx, regs) + 23 * 8);
+ASSYM(HYPCTX_REGS_X24, offsetof(struct hypctx, regs) + 24 * 8);
+ASSYM(HYPCTX_REGS_X25, offsetof(struct hypctx, regs) + 25 * 8);
+ASSYM(HYPCTX_REGS_X26, offsetof(struct hypctx, regs) + 26 * 8);
+ASSYM(HYPCTX_REGS_X27, offsetof(struct hypctx, regs) + 27 * 8);
+ASSYM(HYPCTX_REGS_X28, offsetof(struct hypctx, regs) + 28 * 8);
+ASSYM(HYPCTX_REGS_X29, offsetof(struct hypctx, regs) + 29 * 8);
+ASSYM(HYPCTX_REGS_LR, offsetof(struct hypctx, regs.lr));
+ASSYM(HYPCTX_REGS_SP, offsetof(struct hypctx, regs.sp));
+ASSYM(HYPCTX_REGS_ELR, offsetof(struct hypctx, regs.elr));
+ASSYM(HYPCTX_REGS_SPSR, offsetof(struct hypctx, regs.spsr));
+
+ASSYM(HYPCTX_ACTLR_EL1, offsetof(struct hypctx, actlr_el1));
+ASSYM(HYPCTX_AMAIR_EL1, offsetof(struct hypctx, amair_el1));
+ASSYM(HYPCTX_ELR_EL1, offsetof(struct hypctx, elr_el1));
+ASSYM(HYPCTX_FAR_EL1, offsetof(struct hypctx, far_el1));
+ASSYM(HYPCTX_FP, offsetof(struct hypctx, fp));
+ASSYM(HYPCTX_MAIR_EL1, offsetof(struct hypctx, mair_el1));
+ASSYM(HYPCTX_PAR_EL1, offsetof(struct hypctx, par_el1));
+ASSYM(HYPCTX_SP_EL0, offsetof(struct hypctx, sp_el0));
+ASSYM(HYPCTX_TCR_EL1, offsetof(struct hypctx, tcr_el1));
+ASSYM(HYPCTX_TPIDR_EL0, offsetof(struct hypctx, tpidr_el0));
+ASSYM(HYPCTX_TPIDRRO_EL0, offsetof(struct hypctx, tpidrro_el0));
+ASSYM(HYPCTX_TPIDR_EL1, offsetof(struct hypctx, tpidr_el1));
+ASSYM(HYPCTX_TTBR0_EL1, offsetof(struct hypctx, ttbr0_el1));
+ASSYM(HYPCTX_TTBR1_EL1, offsetof(struct hypctx, ttbr1_el1));
+ASSYM(HYPCTX_VBAR_EL1, offsetof(struct hypctx, vbar_el1));
+ASSYM(HYPCTX_AFSR0_EL1, offsetof(struct hypctx, afsr0_el1));
+ASSYM(HYPCTX_AFSR1_EL1, offsetof(struct hypctx, afsr1_el1));
+ASSYM(HYPCTX_CONTEXTIDR_EL1, offsetof(struct hypctx, contextidr_el1));
+ASSYM(HYPCTX_CPACR_EL1, offsetof(struct hypctx, cpacr_el1));
+ASSYM(HYPCTX_ESR_EL1, offsetof(struct hypctx, esr_el1));
+ASSYM(HYPCTX_SCTLR_EL1, offsetof(struct hypctx, sctlr_el1));
+ASSYM(HYPCTX_SPSR_EL1, offsetof(struct hypctx, spsr_el1));
+
+ASSYM(HYPCTX_ELR_EL2, offsetof(struct hypctx, elr_el2));
+ASSYM(HYPCTX_HCR_EL2, offsetof(struct hypctx, hcr_el2));
+ASSYM(HYPCTX_VPIDR_EL2, offsetof(struct hypctx, vpidr_el2));
+ASSYM(HYPCTX_VMPIDR_EL2, offsetof(struct hypctx, vmpidr_el2));
+ASSYM(HYPCTX_CPTR_EL2, offsetof(struct hypctx, cptr_el2));
+ASSYM(HYPCTX_SPSR_EL2, offsetof(struct hypctx, spsr_el2));
+
+ASSYM(HYPCTX_HYP, offsetof(struct hypctx, hyp));
+
+ASSYM(HYP_VTTBR_EL2, offsetof(struct hyp, vttbr_el2));
+ASSYM(HYP_VTIMER_CNTHCTL_EL2, offsetof(struct hyp, vtimer.cnthctl_el2));
+ASSYM(HYP_VTIMER_CNTVOFF_EL2, offsetof(struct hyp, vtimer.cntvoff_el2));
+
+ASSYM(HYPCTX_EXIT_INFO_ESR_EL2, offsetof(struct hypctx, exit_info.esr_el2));
+ASSYM(HYPCTX_EXIT_INFO_FAR_EL2, offsetof(struct hypctx, exit_info.far_el2));
+ASSYM(HYPCTX_EXIT_INFO_HPFAR_EL2, offsetof(struct hypctx, exit_info.hpfar_el2));
+
+ASSYM(HYPCTX_VGIC_ICH_LR_EL2, offsetof(struct hypctx, vgic_cpu_if.ich_lr_el2));
+ASSYM(HYPCTX_VGIC_ICH_LR_NUM, offsetof(struct hypctx, vgic_cpu_if.ich_lr_num));
+ASSYM(HYPCTX_VGIC_ICH_AP0R_EL2, offsetof(struct hypctx, vgic_cpu_if.ich_ap0r_el2));
+ASSYM(HYPCTX_VGIC_ICH_AP0R_NUM, offsetof(struct hypctx, vgic_cpu_if.ich_ap0r_num));
+ASSYM(HYPCTX_VGIC_ICH_AP1R_EL2, offsetof(struct hypctx, vgic_cpu_if.ich_ap1r_el2));
+ASSYM(HYPCTX_VGIC_ICH_AP1R_NUM, offsetof(struct hypctx, vgic_cpu_if.ich_ap1r_num));
+ASSYM(HYPCTX_VGIC_ICH_EISR_EL2, offsetof(struct hypctx, vgic_cpu_if.ich_eisr_el2));
+ASSYM(HYPCTX_VGIC_ICH_ELRSR_EL2, offsetof(struct hypctx, vgic_cpu_if.ich_elrsr_el2));
+ASSYM(HYPCTX_VGIC_ICH_HCR_EL2, offsetof(struct hypctx, vgic_cpu_if.ich_hcr_el2));
+ASSYM(HYPCTX_VGIC_ICH_MISR_EL2, offsetof(struct hypctx, vgic_cpu_if.ich_misr_el2));
+ASSYM(HYPCTX_VGIC_ICH_VMCR_EL2, offsetof(struct hypctx, vgic_cpu_if.ich_vmcr_el2));
+ASSYM(HYPCTX_VGIC_ICH_LR_EL2, offsetof(struct hypctx, vgic_cpu_if.ich_lr_el2));
+
+ASSYM(HYPCTX_VTIMER_CPU_CNTKCTL_EL1, offsetof(struct hypctx, vtimer_cpu.cntkctl_el1));
+ASSYM(HYPCTX_VTIMER_CPU_CNTV_CVAL_EL0, offsetof(struct hypctx, vtimer_cpu.cntv_cval_el0));
+ASSYM(HYPCTX_VTIMER_CPU_CNTV_CTL_EL0, offsetof(struct hypctx, vtimer_cpu.cntv_ctl_el0));
+
+#ifdef VFP
+ASSYM(HYPCTX_VFPSTATE_Q0, offsetof(struct hypctx, vfpstate.vfp_regs) + 0 * 16);
+ASSYM(HYPCTX_VFPSTATE_Q1, offsetof(struct hypctx, vfpstate.vfp_regs) + 1 * 16);
+ASSYM(HYPCTX_VFPSTATE_Q2, offsetof(struct hypctx, vfpstate.vfp_regs) + 2 * 16);
+ASSYM(HYPCTX_VFPSTATE_Q3, offsetof(struct hypctx, vfpstate.vfp_regs) + 3 * 16);
+ASSYM(HYPCTX_VFPSTATE_Q4, offsetof(struct hypctx, vfpstate.vfp_regs) + 4 * 16);
+ASSYM(HYPCTX_VFPSTATE_Q5, offsetof(struct hypctx, vfpstate.vfp_regs) + 5 * 16);
+ASSYM(HYPCTX_VFPSTATE_Q6, offsetof(struct hypctx, vfpstate.vfp_regs) + 6 * 16);
+ASSYM(HYPCTX_VFPSTATE_Q7, offsetof(struct hypctx, vfpstate.vfp_regs) + 7 * 16);
+ASSYM(HYPCTX_VFPSTATE_Q8, offsetof(struct hypctx, vfpstate.vfp_regs) + 8 * 16);
+ASSYM(HYPCTX_VFPSTATE_Q9, offsetof(struct hypctx, vfpstate.vfp_regs) + 9 * 16);
+ASSYM(HYPCTX_VFPSTATE_Q10, offsetof(struct hypctx, vfpstate.vfp_regs) + 10 * 16);
+ASSYM(HYPCTX_VFPSTATE_Q11, offsetof(struct hypctx, vfpstate.vfp_regs) + 11 * 16);
+ASSYM(HYPCTX_VFPSTATE_Q12, offsetof(struct hypctx, vfpstate.vfp_regs) + 12 * 16);
+ASSYM(HYPCTX_VFPSTATE_Q13, offsetof(struct hypctx, vfpstate.vfp_regs) + 13 * 16);
+ASSYM(HYPCTX_VFPSTATE_Q14, offsetof(struct hypctx, vfpstate.vfp_regs) + 14 * 16);
+ASSYM(HYPCTX_VFPSTATE_Q15, offsetof(struct hypctx, vfpstate.vfp_regs) + 15 * 16);
+ASSYM(HYPCTX_VFPSTATE_Q16, offsetof(struct hypctx, vfpstate.vfp_regs) + 16 * 16);
+ASSYM(HYPCTX_VFPSTATE_Q17, offsetof(struct hypctx, vfpstate.vfp_regs) + 17 * 16);
+ASSYM(HYPCTX_VFPSTATE_Q18, offsetof(struct hypctx, vfpstate.vfp_regs) + 18 * 16);
+ASSYM(HYPCTX_VFPSTATE_Q19, offsetof(struct hypctx, vfpstate.vfp_regs) + 19 * 16);
+ASSYM(HYPCTX_VFPSTATE_Q20, offsetof(struct hypctx, vfpstate.vfp_regs) + 20 * 16);
+ASSYM(HYPCTX_VFPSTATE_Q21, offsetof(struct hypctx, vfpstate.vfp_regs) + 21 * 16);
+ASSYM(HYPCTX_VFPSTATE_Q22, offsetof(struct hypctx, vfpstate.vfp_regs) + 22 * 16);
+ASSYM(HYPCTX_VFPSTATE_Q23, offsetof(struct hypctx, vfpstate.vfp_regs) + 23 * 16);
+ASSYM(HYPCTX_VFPSTATE_Q24, offsetof(struct hypctx, vfpstate.vfp_regs) + 24 * 16);
+ASSYM(HYPCTX_VFPSTATE_Q25, offsetof(struct hypctx, vfpstate.vfp_regs) + 25 * 16);
+ASSYM(HYPCTX_VFPSTATE_Q26, offsetof(struct hypctx, vfpstate.vfp_regs) + 26 * 16);
+ASSYM(HYPCTX_VFPSTATE_Q27, offsetof(struct hypctx, vfpstate.vfp_regs) + 27 * 16);
+ASSYM(HYPCTX_VFPSTATE_Q28, offsetof(struct hypctx, vfpstate.vfp_regs) + 28 * 16);
+ASSYM(HYPCTX_VFPSTATE_Q29, offsetof(struct hypctx, vfpstate.vfp_regs) + 29 * 16);
+ASSYM(HYPCTX_VFPSTATE_Q30, offsetof(struct hypctx, vfpstate.vfp_regs) + 30 * 16);
+ASSYM(HYPCTX_VFPSTATE_Q31, offsetof(struct hypctx, vfpstate.vfp_regs) + 31 * 16);
+
+
+ASSYM(HYPCTX_VFPSTATE_FPCR, offsetof(struct hypctx, vfpstate.vfp_fpcr));
+ASSYM(HYPCTX_VFPSTATE_FPSR, offsetof(struct hypctx, vfpstate.vfp_fpsr));
+#endif
Index: sys/arm64/vmm/hyp_macros.h
===================================================================
--- /dev/null
+++ sys/arm64/vmm/hyp_macros.h
@@ -0,0 +1,690 @@
+/*
+ * Copyright (C) 2015-2021 Mihai Carabas <mihai.carabas@gmail.com>
+ * Copyright (C) 2017-2019 Alexandru Elisei <alexandru.elisei@gmail.com>
+ * Copyright (C) 2017-2021 Darius Mihai <darius.mihai.m@gmail.com>
+ * Copyright (C) 2019-2021 Andrei Martin <andrei.cos.martin@gmail.com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef _VMM_HYP_MACROS_H_
+#define	_VMM_HYP_MACROS_H_
+
+
+#define PUSH_SYS_REG_PAIR(reg0, reg1)			\
+	mrs	x1, reg0;				\
+	mrs	x2, reg1;				\
+	stp	x2, x1, [sp, #-16]!;
+
+
+#define PUSH_SYS_REG(reg)				\
+	mrs 	x1, reg;				\
+	str	x1, [sp, #-16]!;
+
+
+/*
+ * Push all the host registers before entering the guest.
+ */
+#define SAVE_HOST_REGS()				\
+	/* Save the regular registers */		\
+	stp	x0, x1, [sp, #-16]!;			\
+	stp	x2, x3, [sp, #-16]!;			\
+	stp	x4, x5, [sp, #-16]!;			\
+	stp	x6, x7, [sp, #-16]!;			\
+	stp	x8, x9, [sp, #-16]!;			\
+	stp	x10, x11, [sp, #-16]!;			\
+	stp	x12, x13, [sp, #-16]!;			\
+	stp	x14, x15, [sp, #-16]!;			\
+	stp	x16, x17, [sp, #-16]!;			\
+	stp	x18, x19, [sp, #-16]!;			\
+	stp	x20, x21, [sp, #-16]!;			\
+	stp	x22, x23, [sp, #-16]!;			\
+	stp	x24, x25, [sp, #-16]!;			\
+	stp	x26, x27, [sp, #-16]!;			\
+	stp	x28, x29, [sp, #-16]!;			\
+	stp	lr, fp, [sp, #-16]!;			\
+							\
+	/* Push the system registers */			\
+	PUSH_SYS_REG_PAIR(SP_EL0, SP_EL1);		\
+	PUSH_SYS_REG_PAIR(ACTLR_EL1, AMAIR_EL1);	\
+	PUSH_SYS_REG_PAIR(ELR_EL1, PAR_EL1);		\
+	PUSH_SYS_REG_PAIR(MAIR_EL1, TCR_EL1);		\
+	PUSH_SYS_REG_PAIR(TPIDR_EL0, TPIDRRO_EL0);	\
+	PUSH_SYS_REG_PAIR(TPIDR_EL1, TTBR0_EL1);	\
+	PUSH_SYS_REG_PAIR(TTBR1_EL1, VBAR_EL1);		\
+	PUSH_SYS_REG_PAIR(AFSR0_EL1, AFSR1_EL1);	\
+	PUSH_SYS_REG_PAIR(CONTEXTIDR_EL1, CPACR_EL1);	\
+	PUSH_SYS_REG_PAIR(ESR_EL1, FAR_EL1);		\
+	PUSH_SYS_REG_PAIR(SCTLR_EL1, SPSR_EL1);		\
+	PUSH_SYS_REG_PAIR(ELR_EL2, HCR_EL2);		\
+	PUSH_SYS_REG_PAIR(VPIDR_EL2, VMPIDR_EL2);	\
+	PUSH_SYS_REG_PAIR(CPTR_EL2, SPSR_EL2);		\
+	PUSH_SYS_REG_PAIR(ICH_HCR_EL2, ICH_VMCR_EL2);	\
+	PUSH_SYS_REG_PAIR(CNTHCTL_EL2, CNTKCTL_EL1);	\
+	PUSH_SYS_REG(CNTVOFF_EL2);
+
+
+#define	SAVE_HOST_VFP_REGS()				\
+	stp	q0, q1, [sp, #-16 * 2]!;		\
+	stp	q2, q3, [sp, #-16 * 2]!;		\
+	stp	q4, q5, [sp, #-16 * 2]!;		\
+	stp	q6, q7, [sp, #-16 * 2]!;		\
+	stp	q8, q9, [sp, #-16 * 2]!;		\
+	stp	q10, q11, [sp, #-16 * 2]!;		\
+	stp	q12, q13, [sp, #-16 * 2]!;		\
+	stp	q14, q15, [sp, #-16 * 2]!;		\
+	stp	q16, q17, [sp, #-16 * 2]!;		\
+	stp	q18, q19, [sp, #-16 * 2]!;		\
+	stp	q20, q21, [sp, #-16 * 2]!;		\
+	stp	q22, q23, [sp, #-16 * 2]!;		\
+	stp	q24, q25, [sp, #-16 * 2]!;		\
+	stp	q26, q27, [sp, #-16 * 2]!;		\
+	stp	q28, q29, [sp, #-16 * 2]!;		\
+	stp	q30, q31, [sp, #-16 * 2]!;		\
+	PUSH_SYS_REG_PAIR(FPCR, FPSR);
+
+
+#define POP_SYS_REG_PAIR(reg0, reg1)			\
+	ldp	x2, x1, [sp], #16;			\
+	msr	reg1, x2;				\
+	msr	reg0, x1;
+
+
+#define LOAD_HOST_VFP_REGS()				\
+	POP_SYS_REG_PAIR(FPCR, FPSR);			\
+	ldp	q30, q31, [sp], #16 * 2;		\
+	ldp	q28, q29, [sp], #16 * 2;		\
+	ldp	q26, q27, [sp], #16 * 2;		\
+	ldp	q24, q25, [sp], #16 * 2;		\
+	ldp	q22, q23, [sp], #16 * 2;		\
+	ldp	q20, q21, [sp], #16 * 2;		\
+	ldp	q18, q19, [sp], #16 * 2;		\
+	ldp	q16, q17, [sp], #16 * 2;		\
+	ldp	q14, q15, [sp], #16 * 2;		\
+	ldp	q12, q13, [sp], #16 * 2;		\
+	ldp	q10, q11, [sp], #16 * 2;		\
+	ldp	q8, q9, [sp], #16 * 2;			\
+	ldp	q6, q7, [sp], #16 * 2;			\
+	ldp	q4, q5, [sp], #16 * 2;			\
+	ldp	q2, q3, [sp], #16 * 2;			\
+	ldp	q0, q1, [sp], #16 * 2;			\
+
+
+#define POP_SYS_REG(reg)				\
+	ldr	x1, [sp], #16;				\
+	msr	reg, x1;
+
+
+/*
+ * Restore all the host registers before entering the host.
+ */
+#define LOAD_HOST_REGS()				\
+	/* Pop the system registers first */		\
+	POP_SYS_REG(CNTVOFF_EL2);			\
+	POP_SYS_REG_PAIR(CNTHCTL_EL2, CNTKCTL_EL1);	\
+	POP_SYS_REG_PAIR(ICH_HCR_EL2, ICH_VMCR_EL2);	\
+	POP_SYS_REG_PAIR(CPTR_EL2, SPSR_EL2);		\
+	POP_SYS_REG_PAIR(VPIDR_EL2, VMPIDR_EL2);	\
+	POP_SYS_REG_PAIR(ELR_EL2, HCR_EL2);		\
+	POP_SYS_REG_PAIR(SCTLR_EL1, SPSR_EL1);		\
+	POP_SYS_REG_PAIR(ESR_EL1, FAR_EL1);		\
+	POP_SYS_REG_PAIR(CONTEXTIDR_EL1, CPACR_EL1);	\
+	POP_SYS_REG_PAIR(AFSR0_EL1, AFSR1_EL1);		\
+	POP_SYS_REG_PAIR(TTBR1_EL1, VBAR_EL1);		\
+	POP_SYS_REG_PAIR(TPIDR_EL1, TTBR0_EL1);		\
+	POP_SYS_REG_PAIR(TPIDR_EL0, TPIDRRO_EL0);	\
+	POP_SYS_REG_PAIR(MAIR_EL1, TCR_EL1);		\
+	POP_SYS_REG_PAIR(ELR_EL1, PAR_EL1);		\
+	POP_SYS_REG_PAIR(ACTLR_EL1, AMAIR_EL1);		\
+	POP_SYS_REG_PAIR(SP_EL0, SP_EL1);		\
+							\
+	/* Pop the regular registers */			\
+	ldp	lr, fp, [sp], #16;			\
+	ldp	x28, x29, [sp], #16;			\
+	ldp	x26, x27, [sp], #16;			\
+	ldp	x24, x25, [sp], #16;			\
+	ldp	x22, x23, [sp], #16;			\
+	ldp	x20, x21, [sp], #16;			\
+	ldp	x18, x19, [sp], #16;			\
+	ldp	x16, x17, [sp], #16;			\
+	ldp	x14, x15, [sp], #16;			\
+	ldp	x12, x13, [sp], #16;			\
+	ldp	x10, x11, [sp], #16;			\
+	ldp	x8, x9, [sp], #16;			\
+	ldp	x6, x7, [sp], #16;			\
+	ldp	x4, x5, [sp], #16;			\
+	ldp	x2, x3, [sp], #16;			\
+	ldp	x0, x1, [sp], #16;			\
+
+
+#define	SAVE_ARRAY_REG64(reg, dest, remaining)		\
+	cmp	remaining, #0;				\
+	beq	9f;					\
+	mrs	x7, reg;				\
+	str	x7, [dest];				\
+	add	dest, dest, #8;				\
+	sub	remaining, remaining, #1;
+
+
+#define	SAVE_LR_REGS()					\
+	/* Load the number of ICH_LR_EL2 regs from memory */ \
+	mov	x2, #HYPCTX_VGIC_ICH_LR_NUM;		\
+	ldr	x3, [x0, x2];				\
+	/* x1 holds the destination address */		\
+	mov	x1, #HYPCTX_VGIC_ICH_LR_EL2;		\
+	add	x1, x0, x1;				\
+	SAVE_ARRAY_REG64(ich_lr0_el2, x1, x3);		\
+	SAVE_ARRAY_REG64(ich_lr1_el2, x1, x3);		\
+	SAVE_ARRAY_REG64(ich_lr2_el2, x1, x3);		\
+	SAVE_ARRAY_REG64(ich_lr3_el2, x1, x3);		\
+	SAVE_ARRAY_REG64(ich_lr4_el2, x1, x3);		\
+	SAVE_ARRAY_REG64(ich_lr5_el2, x1, x3);		\
+	SAVE_ARRAY_REG64(ich_lr6_el2, x1, x3);		\
+	SAVE_ARRAY_REG64(ich_lr7_el2, x1, x3);		\
+	SAVE_ARRAY_REG64(ich_lr8_el2, x1, x3);		\
+	SAVE_ARRAY_REG64(ich_lr9_el2, x1, x3);		\
+	SAVE_ARRAY_REG64(ich_lr10_el2, x1, x3);		\
+	SAVE_ARRAY_REG64(ich_lr11_el2, x1, x3);		\
+	SAVE_ARRAY_REG64(ich_lr12_el2, x1, x3);		\
+	SAVE_ARRAY_REG64(ich_lr13_el2, x1, x3);		\
+	SAVE_ARRAY_REG64(ich_lr14_el2, x1, x3);		\
+	SAVE_ARRAY_REG64(ich_lr15_el2, x1, x3);		\
+9:;							\
+	;
+
+
+#define	SAVE_ARRAY_REG32(reg, dest, remaining)		\
+	cmp	remaining, #0;				\
+	beq	9f;					\
+	mrs	x7, reg;				\
+	str	w7, [dest];				\
+	add	dest, dest, #4;				\
+	sub	remaining, remaining, #1;
+
+
+#define	SAVE_AP0R_REGS()				\
+	/* Load the number of ICH_AP0R_EL2 regs from memory */ \
+	mov	x2, #HYPCTX_VGIC_ICH_AP0R_NUM;		\
+	ldr	x3, [x0, x2];				\
+	/* x1 holds the destination address */		\
+	mov	x1, #HYPCTX_VGIC_ICH_AP0R_EL2;		\
+	add	x1, x0, x1;				\
+	SAVE_ARRAY_REG32(ich_ap0r0_el2, x1, x3);	\
+	SAVE_ARRAY_REG32(ich_ap0r1_el2, x1, x3);	\
+	SAVE_ARRAY_REG32(ich_ap0r2_el2, x1, x3);	\
+	SAVE_ARRAY_REG32(ich_ap0r3_el2, x1, x3);	\
+9:;							\
+	;
+
+
+#define	SAVE_AP1R_REGS()				\
+	/* Load the number of ICH_AP1R_EL2 regs from memory */ \
+	mov	x2, #HYPCTX_VGIC_ICH_AP1R_NUM;		\
+	ldr	x3, [x0, x2];				\
+	/* x1 holds the destination address */		\
+	mov	x1, #HYPCTX_VGIC_ICH_AP1R_EL2;		\
+	add	x1, x0, x1;				\
+	SAVE_ARRAY_REG32(ich_ap1r0_el2, x1, x3);	\
+	SAVE_ARRAY_REG32(ich_ap1r1_el2, x1, x3);	\
+	SAVE_ARRAY_REG32(ich_ap1r2_el2, x1, x3);	\
+	SAVE_ARRAY_REG32(ich_ap1r3_el2, x1, x3);	\
+9:;							\
+	;
+
+
+/*
+ * The STR and LDR instructions take an offset between [-256, 255], but the
+ * hypctx register offset can be larger than that. To get around this limitation
+ * we use a temporary register to hold the offset.
+ */
+#define	SAVE_SYS_REG64(prefix, reg)			\
+	mrs	x1, reg;				\
+	mov	x2, prefix ##_ ##reg;			\
+	str	x1, [x0, x2];
+
+
+#define	SAVE_SYS_REG32(prefix, reg)			\
+	mrs	x1, reg;				\
+	mov	x2, prefix ##_ ##reg;			\
+	str	w1, [x0, x2];
+
+
+#define	SAVE_REG(prefix, reg)				\
+	mov	x1, prefix ##_ ##reg;			\
+	str	reg, [x0, x1];
+
+/*
+ * The STP and LDP instructions takes an immediate in the range of [-512, 504]
+ * when using the post-indexed addressing mode, but the hypctx register offset
+ * can be larger than that. To get around this limitation we compute the address
+ * by adding the hypctx base address with the struct member offset.
+ *
+ * Using STP/LDP to save/load register pairs to the corresponding struct hypctx
+ * variables works because the registers are declared as an array and they are
+ * stored in contiguous memory addresses.
+ */
+
+#define	SAVE_REG_PAIR(prefix, reg0, reg1)		\
+	mov	x1, prefix ##_ ##reg0;			\
+	add	x1, x0, x1;				\
+	stp	reg0, reg1, [x1];
+
+
+/*
+ * We use x0 to load the hypctx address from TPIDR_EL2 and x1 and x2 as
+ * temporary registers to compute the hypctx member addresses. To save the guest
+ * values at first we push them on the stack, use these temporary registers to
+ * save the rest of the registers and at the end we pop the values from the
+ * stack and save them.
+ */
+#define SAVE_GUEST_X_REGS()				\
+	/* Push x0 */					\
+	str	x0, [sp, #-16]!;			\
+	/* Restore hypctx address */			\
+	mrs	x0, tpidr_el2;				\
+	/* Push x1 and x2 */				\
+	stp	x1, x2, [sp, #-16]!;			\
+							\
+	/* Save the other registers */			\
+	SAVE_REG_PAIR(HYPCTX_REGS, X3, X4);		\
+	SAVE_REG_PAIR(HYPCTX_REGS, X5, X6);		\
+	SAVE_REG_PAIR(HYPCTX_REGS, X7, X8);		\
+	SAVE_REG_PAIR(HYPCTX_REGS, X9, X10);		\
+	SAVE_REG_PAIR(HYPCTX_REGS, X11, X12);		\
+	SAVE_REG_PAIR(HYPCTX_REGS, X13, X14);		\
+	SAVE_REG_PAIR(HYPCTX_REGS, X15, X16);		\
+	SAVE_REG_PAIR(HYPCTX_REGS, X17, X18);		\
+	SAVE_REG_PAIR(HYPCTX_REGS, X19, X20);		\
+	SAVE_REG_PAIR(HYPCTX_REGS, X21, X22);		\
+	SAVE_REG_PAIR(HYPCTX_REGS, X23, X24);		\
+	SAVE_REG_PAIR(HYPCTX_REGS, X25, X26);		\
+	SAVE_REG_PAIR(HYPCTX_REGS, X27, X28);		\
+	SAVE_REG(HYPCTX_REGS, X29);			\
+	SAVE_REG(HYPCTX_REGS, LR);			\
+							\
+	/* Pop and save x1 and x2 */			\
+	ldp	x1, x2, [sp], #16;			\
+	mov	x3, #HYPCTX_REGS_X1;			\
+	add	x3, x0, x3;				\
+	stp	x1, x2, [x3];				\
+	/* Pop and save x0 */				\
+	ldr	x1, [sp], #16;				\
+	mov	x2, #HYPCTX_REGS_X0;			\
+	add	x2, x2, x0;				\
+	str	x1, [x2];
+
+
+/*
+ * Save all the guest registers. Start by saving the regular registers first
+ * because those will be used as temporary registers for accessing the hypctx
+ * member addresses.
+ *
+ * Expecting:
+ * TPIDR_EL2 - struct hypctx address
+ *
+ * After call:
+ * x0 - struct hypctx address
+ */
+#define	SAVE_GUEST_REGS()				\
+	SAVE_GUEST_X_REGS();				\
+							\
+	SAVE_REG(HYPCTX, FP);				\
+							\
+	SAVE_SYS_REG32(HYPCTX_VTIMER_CPU, CNTKCTL_EL1);	\
+	SAVE_SYS_REG64(HYPCTX_VTIMER_CPU, CNTV_CVAL_EL0); \
+	SAVE_SYS_REG32(HYPCTX_VTIMER_CPU, CNTV_CTL_EL0);\
+							\
+	/*						\
+	 * ICH_EISR_EL2, ICH_ELRSR_EL2 and ICH_MISR_EL2 are read-only and are \
+	 * saved because they are modified by the hardware as part of the \
+	 * interrupt virtualization process and we need to inspect them in \
+	 * the VGIC driver. \
+	 */						\
+	SAVE_SYS_REG32(HYPCTX_VGIC, ICH_EISR_EL2);	\
+	SAVE_SYS_REG32(HYPCTX_VGIC, ICH_ELRSR_EL2);	\
+	SAVE_SYS_REG32(HYPCTX_VGIC, ICH_MISR_EL2);	\
+	SAVE_SYS_REG32(HYPCTX_VGIC, ICH_HCR_EL2);	\
+	SAVE_SYS_REG32(HYPCTX_VGIC, ICH_VMCR_EL2);	\
+							\
+	SAVE_LR_REGS();					\
+	SAVE_AP0R_REGS();				\
+	SAVE_AP1R_REGS();				\
+							\
+	/* Save the stack pointer. */			\
+	mrs	x1, sp_el1;				\
+	mov	x2, #HYPCTX_REGS_SP;			\
+	str	x1, [x0, x2];				\
+							\
+	SAVE_SYS_REG64(HYPCTX, ACTLR_EL1);		\
+	SAVE_SYS_REG64(HYPCTX, AFSR0_EL1);		\
+	SAVE_SYS_REG64(HYPCTX, AFSR1_EL1);		\
+	SAVE_SYS_REG64(HYPCTX, AMAIR_EL1);		\
+	SAVE_SYS_REG64(HYPCTX, CONTEXTIDR_EL1);		\
+	SAVE_SYS_REG64(HYPCTX, CPACR_EL1);		\
+	SAVE_SYS_REG64(HYPCTX, ELR_EL1);		\
+	SAVE_SYS_REG64(HYPCTX, ESR_EL1);		\
+	SAVE_SYS_REG64(HYPCTX, FAR_EL1);		\
+	SAVE_SYS_REG64(HYPCTX, MAIR_EL1);		\
+	SAVE_SYS_REG64(HYPCTX, PAR_EL1);		\
+	SAVE_SYS_REG64(HYPCTX, SCTLR_EL1);		\
+	SAVE_SYS_REG64(HYPCTX, SP_EL0);			\
+	SAVE_SYS_REG64(HYPCTX, TCR_EL1);		\
+	SAVE_SYS_REG64(HYPCTX, TPIDR_EL0);		\
+	SAVE_SYS_REG64(HYPCTX, TPIDRRO_EL0);		\
+	SAVE_SYS_REG64(HYPCTX, TPIDR_EL1);		\
+	SAVE_SYS_REG64(HYPCTX, TTBR0_EL1);		\
+	SAVE_SYS_REG64(HYPCTX, TTBR1_EL1);		\
+	SAVE_SYS_REG64(HYPCTX, VBAR_EL1);		\
+							\
+	SAVE_SYS_REG32(HYPCTX, SPSR_EL1);		\
+							\
+	SAVE_SYS_REG64(HYPCTX, CPTR_EL2);		\
+	SAVE_SYS_REG64(HYPCTX, ELR_EL2);		\
+	SAVE_SYS_REG64(HYPCTX, HCR_EL2);		\
+	SAVE_SYS_REG64(HYPCTX, VPIDR_EL2);		\
+	SAVE_SYS_REG64(HYPCTX, VMPIDR_EL2);		\
+	SAVE_SYS_REG32(HYPCTX, SPSR_EL2);
+
+
+#define	SAVE_GUEST_VFP_REGS()				\
+	SAVE_REG_PAIR(HYPCTX_VFPSTATE, Q0, Q1);		\
+	SAVE_REG_PAIR(HYPCTX_VFPSTATE, Q2, Q3);		\
+	SAVE_REG_PAIR(HYPCTX_VFPSTATE, Q4, Q5);		\
+	SAVE_REG_PAIR(HYPCTX_VFPSTATE, Q6, Q7);		\
+	SAVE_REG_PAIR(HYPCTX_VFPSTATE, Q8, Q9);		\
+	SAVE_REG_PAIR(HYPCTX_VFPSTATE, Q10, Q11);	\
+	SAVE_REG_PAIR(HYPCTX_VFPSTATE, Q12, Q13);	\
+	SAVE_REG_PAIR(HYPCTX_VFPSTATE, Q14, Q15);	\
+	SAVE_REG_PAIR(HYPCTX_VFPSTATE, Q16, Q17);	\
+	SAVE_REG_PAIR(HYPCTX_VFPSTATE, Q18, Q19);	\
+	SAVE_REG_PAIR(HYPCTX_VFPSTATE, Q20, Q21);	\
+	SAVE_REG_PAIR(HYPCTX_VFPSTATE, Q22, Q23);	\
+	SAVE_REG_PAIR(HYPCTX_VFPSTATE, Q24, Q25);	\
+	SAVE_REG_PAIR(HYPCTX_VFPSTATE, Q26, Q27);	\
+	SAVE_REG_PAIR(HYPCTX_VFPSTATE, Q28, Q29);	\
+	SAVE_REG_PAIR(HYPCTX_VFPSTATE, Q30, Q31);	\
+							\
+	SAVE_SYS_REG32(HYPCTX_VFPSTATE, FPCR);		\
+	SAVE_SYS_REG32(HYPCTX_VFPSTATE, FPSR);
+
+
+/* See SAVE_SYS_REG */
+#define	LOAD_SYS_REG64(prefix, reg)			\
+	mov	x1, prefix ##_ ##reg;			\
+	ldr	x2, [x0, x1];				\
+	msr	reg, x2;
+
+
+#define	LOAD_SYS_REG32(prefix, reg)			\
+	mov	x1, prefix ##_ ##reg;			\
+	ldr	w2, [x0, x1];				\
+	msr	reg, x2;
+
+
+/* See SAVE_REG_PAIR */
+#define LOAD_REG_PAIR(prefix, reg0, reg1)		\
+	mov	x1, prefix ##_ ##reg0;			\
+	add	x1, x0, x1;				\
+	ldp	reg0, reg1, [x1];
+
+
+#define	LOAD_GUEST_VFP_REGS()				\
+	LOAD_REG_PAIR(HYPCTX_VFPSTATE, Q0, Q1);		\
+	LOAD_REG_PAIR(HYPCTX_VFPSTATE, Q2, Q3);		\
+	LOAD_REG_PAIR(HYPCTX_VFPSTATE, Q4, Q5);		\
+	LOAD_REG_PAIR(HYPCTX_VFPSTATE, Q6, Q7);		\
+	LOAD_REG_PAIR(HYPCTX_VFPSTATE, Q8, Q9);		\
+	LOAD_REG_PAIR(HYPCTX_VFPSTATE, Q10, Q11);	\
+	LOAD_REG_PAIR(HYPCTX_VFPSTATE, Q12, Q13);	\
+	LOAD_REG_PAIR(HYPCTX_VFPSTATE, Q14, Q15);	\
+	LOAD_REG_PAIR(HYPCTX_VFPSTATE, Q16, Q17);	\
+	LOAD_REG_PAIR(HYPCTX_VFPSTATE, Q18, Q19);	\
+	LOAD_REG_PAIR(HYPCTX_VFPSTATE, Q20, Q21);	\
+	LOAD_REG_PAIR(HYPCTX_VFPSTATE, Q22, Q23);	\
+	LOAD_REG_PAIR(HYPCTX_VFPSTATE, Q24, Q25);	\
+	LOAD_REG_PAIR(HYPCTX_VFPSTATE, Q26, Q27);	\
+	LOAD_REG_PAIR(HYPCTX_VFPSTATE, Q28, Q29);	\
+	LOAD_REG_PAIR(HYPCTX_VFPSTATE, Q30, Q31);	\
+							\
+	LOAD_SYS_REG32(HYPCTX_VFPSTATE, FPCR);		\
+	LOAD_SYS_REG32(HYPCTX_VFPSTATE, FPSR);
+
+
+#define	LOAD_REG(prefix, reg)				\
+	mov	x1, prefix ##_ ##reg;			\
+	ldr	reg, [x0, x1];
+
+
+/*
+ * We use x1 as a temporary register to store the hypctx member offset and x0
+ * to hold the hypctx address. We load the guest x0 and x1 register values in
+ * registers x2 and x3, push x2 and x3 on the stack and then we restore x0 and
+ * x1.
+ */
+#define	LOAD_GUEST_X_REGS()				\
+	mov	x1, #HYPCTX_REGS_X0;			\
+	/* x1 now holds the address of hypctx reg x0 */	\
+	add	x1, x1, x0;				\
+	/* Make x2 = guest x0 and x3 = guest x1 */	\
+	ldp	x2, x3, [x1];				\
+	stp	x2, x3, [sp, #-16]!;			\
+							\
+	/* Load the other registers */			\
+	LOAD_REG_PAIR(HYPCTX_REGS, X2, X3);		\
+	LOAD_REG_PAIR(HYPCTX_REGS, X4, X5);		\
+	LOAD_REG_PAIR(HYPCTX_REGS, X6, X7);		\
+	LOAD_REG_PAIR(HYPCTX_REGS, X8, X9);		\
+	LOAD_REG_PAIR(HYPCTX_REGS, X10, X11);		\
+	LOAD_REG_PAIR(HYPCTX_REGS, X12, X13);		\
+	LOAD_REG_PAIR(HYPCTX_REGS, X14, X15);		\
+	LOAD_REG_PAIR(HYPCTX_REGS, X16, X17);		\
+	LOAD_REG_PAIR(HYPCTX_REGS, X18, X19);		\
+	LOAD_REG_PAIR(HYPCTX_REGS, X20, X21);		\
+	LOAD_REG_PAIR(HYPCTX_REGS, X22, X23);		\
+	LOAD_REG_PAIR(HYPCTX_REGS, X24, X25);		\
+	LOAD_REG_PAIR(HYPCTX_REGS, X26, X27);		\
+	LOAD_REG_PAIR(HYPCTX_REGS, X28, X29);		\
+	LOAD_REG(HYPCTX_REGS, LR);			\
+							\
+	/* Pop guest x0 and x1 from the stack */	\
+	ldp	x0, x1, [sp], #16;			\
+
+
+#define	LOAD_ARRAY_REG64(reg, src, remaining)		\
+	cmp	remaining, #0;				\
+	beq	9f;					\
+	ldr	x2, [src];				\
+	msr	reg, x2;				\
+	add	src, src, #8;				\
+	sub	remaining, remaining, #1;
+
+
+#define	LOAD_LR_REGS();					\
+	/* Load the number of ICH_LR_EL2 regs from memory */ \
+	mov	x2, #HYPCTX_VGIC_ICH_LR_NUM;		\
+	ldr	x3, [x0, x2];				\
+	mov	x1, #HYPCTX_VGIC_ICH_LR_EL2;		\
+	/* x1 holds the load address */			\
+	add	x1, x0, x1;				\
+	LOAD_ARRAY_REG64(ich_lr0_el2, x1, x3);		\
+	LOAD_ARRAY_REG64(ich_lr1_el2, x1, x3);		\
+	LOAD_ARRAY_REG64(ich_lr2_el2, x1, x3);		\
+	LOAD_ARRAY_REG64(ich_lr3_el2, x1, x3);		\
+	LOAD_ARRAY_REG64(ich_lr4_el2, x1, x3);		\
+	LOAD_ARRAY_REG64(ich_lr5_el2, x1, x3);		\
+	LOAD_ARRAY_REG64(ich_lr6_el2, x1, x3);		\
+	LOAD_ARRAY_REG64(ich_lr7_el2, x1, x3);		\
+	LOAD_ARRAY_REG64(ich_lr8_el2, x1, x3);		\
+	LOAD_ARRAY_REG64(ich_lr9_el2, x1, x3);		\
+	LOAD_ARRAY_REG64(ich_lr10_el2, x1, x3);		\
+	LOAD_ARRAY_REG64(ich_lr11_el2, x1, x3);		\
+	LOAD_ARRAY_REG64(ich_lr12_el2, x1, x3);		\
+	LOAD_ARRAY_REG64(ich_lr13_el2, x1, x3);		\
+	LOAD_ARRAY_REG64(ich_lr14_el2, x1, x3);		\
+	LOAD_ARRAY_REG64(ich_lr15_el2, x1, x3);		\
+9:;							\
+	;
+
+
+#define	LOAD_ARRAY_REG32(reg, src, remaining)		\
+	cmp	remaining, #0;				\
+	beq	9f;					\
+	ldr	w2, [src];				\
+	msr	reg, x2;				\
+	add	src, src, #4;				\
+	sub	remaining, remaining, #1;
+
+
+#define	LOAD_AP0R_REGS();				\
+	/* Load the number of ICH_AP0R_EL2 regs from memory */ \
+	mov	x2, #HYPCTX_VGIC_ICH_AP0R_NUM;		\
+	ldr	x3, [x0, x2];				\
+	/* x1 holds the load address */			\
+	mov	x1, #HYPCTX_VGIC_ICH_AP0R_EL2;		\
+	add	x1, x0, x1;				\
+	LOAD_ARRAY_REG32(ich_ap0r0_el2, x1, x3);	\
+	LOAD_ARRAY_REG32(ich_ap0r1_el2, x1, x3);	\
+	LOAD_ARRAY_REG32(ich_ap0r2_el2, x1, x3);	\
+	LOAD_ARRAY_REG32(ich_ap0r3_el2, x1, x3);	\
+9:;							\
+	;
+
+
+#define	LOAD_AP1R_REGS();				\
+	/* Load the number of ICH_AP1R_EL2 regs from memory */ \
+	mov	x2, #HYPCTX_VGIC_ICH_AP1R_NUM;		\
+	ldr	x3, [x0, x2];				\
+	/* x1 holds the load address */			\
+	mov	x1, #HYPCTX_VGIC_ICH_AP1R_EL2;		\
+	add	x1, x0, x1;				\
+	LOAD_ARRAY_REG32(ich_ap1r0_el2, x1, x3);	\
+	LOAD_ARRAY_REG32(ich_ap1r1_el2, x1, x3);	\
+	LOAD_ARRAY_REG32(ich_ap1r2_el2, x1, x3);	\
+	LOAD_ARRAY_REG32(ich_ap1r3_el2, x1, x3);	\
+9:;							\
+	;
+
+
+
+#define KTOHYP_REG(reg)					\
+	mov	x7, HYP_KVA_MASK;			\
+	and	reg, reg, x7;				\
+	mov	x7, HYP_KVA_OFFSET;			\
+	orr	reg, reg, x7;
+
+
+/* Load a register from struct hyp *hyp member of hypctx. */
+#define	LOAD_HYP_REG(prefix, reg)			\
+	/* Compute VA of hyp member in x1 */ 		\
+	mov	x1, #HYPCTX_HYP;			\
+	add	x1, x1, x0;				\
+	/* Get hyp address in x2 */			\
+	ldr	x2, [x1];				\
+	/* Transform hyp kernel VA into an EL2 VA */	\
+	KTOHYP_REG(x2);					\
+	/* Get register offset inside struct hyp */	\
+	mov	x1, prefix ##_ ##reg;			\
+	/* Compute regster address */			\
+	add	x2, x2, x1;				\
+	/* Load the register */				\
+	ldr	x1, [x2];				\
+	msr	reg, x1;				\
+
+
+/*
+ * Restore all the guest registers to their original values.
+ *
+ * Expecting:
+ * x0 - struct hypctx address
+ *
+ * After call:
+ * tpidr_el2 - struct hypctx address
+ */
+#define	LOAD_GUEST_REGS()				\
+	LOAD_SYS_REG64(HYPCTX, ACTLR_EL1);		\
+	LOAD_SYS_REG64(HYPCTX, AFSR0_EL1);		\
+	LOAD_SYS_REG64(HYPCTX, AFSR1_EL1);		\
+	LOAD_SYS_REG64(HYPCTX, AMAIR_EL1);		\
+	LOAD_SYS_REG64(HYPCTX, CONTEXTIDR_EL1);		\
+	LOAD_SYS_REG64(HYPCTX, CPACR_EL1);		\
+	LOAD_SYS_REG64(HYPCTX, ELR_EL1);		\
+	LOAD_SYS_REG64(HYPCTX, ESR_EL1);		\
+	LOAD_SYS_REG64(HYPCTX, FAR_EL1);		\
+	LOAD_SYS_REG64(HYPCTX, MAIR_EL1);		\
+	LOAD_SYS_REG64(HYPCTX, PAR_EL1);		\
+	LOAD_SYS_REG64(HYPCTX, SCTLR_EL1);		\
+	LOAD_SYS_REG64(HYPCTX, SP_EL0);			\
+	LOAD_SYS_REG64(HYPCTX, TCR_EL1);		\
+	LOAD_SYS_REG64(HYPCTX, TPIDR_EL0);		\
+	LOAD_SYS_REG64(HYPCTX, TPIDRRO_EL0);		\
+	LOAD_SYS_REG64(HYPCTX, TPIDR_EL1);		\
+	LOAD_SYS_REG64(HYPCTX, TTBR0_EL1);		\
+	LOAD_SYS_REG64(HYPCTX, TTBR1_EL1);		\
+	LOAD_SYS_REG64(HYPCTX, VBAR_EL1);		\
+	LOAD_SYS_REG32(HYPCTX, SPSR_EL1);		\
+							\
+	LOAD_SYS_REG64(HYPCTX, CPTR_EL2);		\
+	LOAD_SYS_REG64(HYPCTX, ELR_EL2);		\
+	LOAD_SYS_REG64(HYPCTX, HCR_EL2);		\
+	LOAD_SYS_REG64(HYPCTX, VPIDR_EL2);		\
+	LOAD_SYS_REG64(HYPCTX, VMPIDR_EL2);		\
+	LOAD_SYS_REG32(HYPCTX, SPSR_EL2);		\
+							\
+	LOAD_SYS_REG32(HYPCTX_VGIC, ICH_HCR_EL2);	\
+	LOAD_SYS_REG32(HYPCTX_VGIC, ICH_VMCR_EL2);	\
+							\
+	LOAD_SYS_REG32(HYPCTX_VTIMER_CPU, CNTKCTL_EL1);	\
+	LOAD_SYS_REG64(HYPCTX_VTIMER_CPU, CNTV_CVAL_EL0); \
+	LOAD_SYS_REG32(HYPCTX_VTIMER_CPU, CNTV_CTL_EL0); \
+							\
+	LOAD_REG(HYPCTX, FP);				\
+							\
+	LOAD_HYP_REG(HYP, VTTBR_EL2);			\
+	LOAD_HYP_REG(HYP_VTIMER, CNTHCTL_EL2);		\
+	LOAD_HYP_REG(HYP_VTIMER, CNTVOFF_EL2);		\
+							\
+	LOAD_LR_REGS();					\
+	LOAD_AP0R_REGS();				\
+	LOAD_AP1R_REGS();				\
+							\
+	/* Load the guest EL1 stack pointer */		\
+	mov	x1, #HYPCTX_REGS_SP;			\
+	add	x1, x1, x0;				\
+	ldr	x2, [x1];				\
+	msr	sp_el1, x2;				\
+							\
+	LOAD_GUEST_X_REGS();				\
+
+
+/*
+ * Save exit information
+ *
+ * Expecting:
+ * x0 - struct hypctx address
+ */
+#define	SAVE_EXIT_INFO()				\
+	SAVE_SYS_REG64(HYPCTX_EXIT_INFO, ESR_EL2);	\
+	SAVE_SYS_REG64(HYPCTX_EXIT_INFO, FAR_EL2);	\
+	SAVE_SYS_REG64(HYPCTX_EXIT_INFO, HPFAR_EL2);	\
+
+#endif /* !_VMM_HYP_MACROS_H_ */
Index: sys/arm64/vmm/io/vgic_v3.h
===================================================================
--- /dev/null
+++ sys/arm64/vmm/io/vgic_v3.h
@@ -0,0 +1,194 @@
+/*
+ * Copyright (C) 2015-2021 Mihai Carabas <mihai.carabas@gmail.com>
+ * Copyright (C) 2017-2019 Alexandru Elisei <alexandru.elisei@gmail.com>
+ * Copyright (C) 2017-2021 Darius Mihai <darius.mihai.m@gmail.com>
+ * Copyright (C) 2019-2021 Andrei Martin <andrei.cos.martin@gmail.com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef _VMM_VGIC_V3_H_
+#define	_VMM_VGIC_V3_H_
+
+#include <sys/cdefs.h>
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/module.h>
+#include <sys/bus.h>
+
+#include <machine/vmm_instruction_emul.h>
+
+#include <arm64/arm64/gic_v3_reg.h>
+#include <arm/arm/gic_common.h>
+
+#define VGIC_SGI_NUM		(GIC_LAST_SGI - GIC_FIRST_SGI + 1)
+#define VGIC_PPI_NUM		(GIC_LAST_PPI - GIC_FIRST_PPI + 1)
+#define VGIC_SPI_NUM		(GIC_LAST_SPI - GIC_FIRST_SPI + 1)
+#define VGIC_PRV_I_NUM		(VGIC_SGI_NUM + VGIC_PPI_NUM)
+#define VGIC_SHR_I_NUM		(VGIC_SPI_NUM)
+
+#define VGIC_ICH_LR_NUM_MAX	16
+#define	VGIC_ICH_AP0R_NUM_MAX	4
+#define	VGIC_ICH_AP1R_NUM_MAX	VGIC_ICH_AP0R_NUM_MAX
+
+#define ICC_SGI1R_EL1_OP0	0x3
+#define ICC_SGI1R_EL1_OP0_MASK	(ICC_SGI1R_EL1_OP0 << ISS_MSR_OP0_SHIFT)
+#define ICC_SGI1R_EL1_OP1	0x0
+#define ICC_SGI1R_EL1_OP1_MASK	(ICC_SGI1R_EL1_OP1 << ISS_MSR_OP1_SHIFT)
+#define ICC_SGI1R_EL1_CRn	0xc
+#define ICC_SGI1R_EL1_CRn_MASK	(ICC_SGI1R_EL1_CRn << ISS_MSR_CRn_SHIFT)
+#define ICC_SGI1R_EL1_CRm	0xb
+#define ICC_SGI1R_EL1_CRm_MASK	(ICC_SGI1R_EL1_CRm << ISS_MSR_CRm_SHIFT)
+#define ICC_SGI1R_EL1_OP2	0x5
+#define ICC_SGI1R_EL1_OP2_MASK	(ICC_SGI1R_EL1_OP2 << ISS_MSR_OP2_SHIFT)
+
+#define ICC_SGI1R_EL1 \
+	(ICC_SGI1R_EL1_OP0_MASK | ICC_SGI1R_EL1_OP1_MASK |	\
+	ICC_SGI1R_EL1_CRn_MASK | ICC_SGI1R_EL1_CRm_MASK |	\
+	ICC_SGI1R_EL1_OP2_MASK)
+
+#define ICC_SGI1R_EL1_TargetList_Bits 16
+
+int vgic_v3_icc_sgi1r_el1_read(void *vm, int vcpuid, uint64_t *rval, void *arg);
+int vgic_v3_icc_sgi1r_el1_write(void *vm, int vcpuid, uint64_t rval, void *arg);
+
+/* Order matters, a lower value means a higher precedence */
+enum vgic_v3_irqtype {
+	VGIC_IRQ_MAXPRIO,
+	VGIC_IRQ_CLK,
+	VGIC_IRQ_VIRTIO,
+	VGIC_IRQ_MISC,
+	VGIC_IRQ_INVALID,
+};
+
+struct vgic_mmio_region {
+	vm_offset_t start;
+	vm_offset_t end;
+	mem_region_read_t read;
+	mem_region_write_t write;
+};
+
+struct vm;
+struct vm_exit;
+struct hyp;
+
+struct vgic_v3_dist {
+	struct mtx 	dist_mtx;
+
+	uint64_t 	start;
+	size_t   	end;
+	size_t		nirqs;
+
+	uint32_t 	gicd_ctlr;	/* Distributor Control Register */
+	uint32_t 	gicd_typer;	/* Interrupt Controller Type Register */
+	uint32_t 	gicd_typer2;	/* Interrupt Controller Type Register */
+	uint32_t 	gicd_iidr;	/* Implementer and Revision of the Distributor */
+	uint32_t 	gicd_pidr2;	/* Distributor Peripheral ID2 Register */
+	/* Interrupt Configuration Registers. */
+	uint32_t	*gicd_icfgr;
+	/* Interrupt Priority Registers. */
+	uint32_t	*gicd_ipriorityr;
+	/* Interrupt Routing Registers. */
+	uint64_t	*gicd_irouter;
+	/* Interrupt Clear-Enable and Set-Enable Registers. */
+	uint32_t	*gicd_ixenabler;
+	uint32_t	*gicd_ixactiver;
+};
+
+#define	aff_routing_en(distp)	(distp->gicd_ctlr & GICD_CTLR_ARE_NS)
+
+struct vgic_v3_redist {
+	uint64_t 	start;
+	uint64_t 	end;
+
+	uint64_t	gicr_typer;	/* Redistributor Type Register */
+	uint32_t	gicr_ctlr;	/* Redistributor Control Regiser */
+	uint32_t	gicr_ixenabler0;
+	/* Interrupt Priority Registers. */
+	uint32_t	gicr_ipriorityr[VGIC_PRV_I_NUM / 4];
+	/* Interupt Configuration Registers */
+	uint32_t	gicr_icfgr0, gicr_icfgr1;
+	uint32_t	gicr_icactiver0;
+};
+
+struct vgic_v3_irq;
+struct vgic_v3_cpu_if {
+	uint32_t	ich_eisr_el2;	/* End of Interrupt Status Register */
+	uint32_t	ich_elrsr_el2;	/* Empty List register Status Register (ICH_ELRSR_EL2) */
+	uint32_t	ich_hcr_el2;	/* Hyp Control Register */
+	uint32_t	ich_misr_el2;	/* Maintenance Interrupt State Register */
+	uint32_t	ich_vmcr_el2;	/* Virtual Machine Control Register */
+
+	/*
+	 * The List Registers are part of the VM context and are modified on a
+	 * world switch. They need to be allocated statically so they are
+	 * mapped in the EL2 translation tables when struct hypctx is mapped.
+	 */
+	uint64_t	ich_lr_el2[VGIC_ICH_LR_NUM_MAX];
+	size_t		ich_lr_num;
+
+	/*
+	 * We need a mutex for accessing the list registers because they are
+	 * modified asynchronously by the virtual timer.
+	 *
+	 * Note that the mutex *MUST* be a spin mutex because an interrupt can
+	 * be injected by a callout callback function, thereby modifying the
+	 * list registers from a context where sleeping is forbidden.
+	 */
+	struct mtx	lr_mtx;
+
+	/* Active Priorities Registers for Group 0 and 1 interrupts */
+	uint32_t	ich_ap0r_el2[VGIC_ICH_AP0R_NUM_MAX];
+	size_t		ich_ap0r_num;
+	uint32_t	ich_ap1r_el2[VGIC_ICH_AP1R_NUM_MAX];
+	size_t		ich_ap1r_num;
+
+	struct vgic_v3_irq *irqbuf;
+	size_t		irqbuf_size;
+	size_t		irqbuf_num;
+};
+
+int 	vgic_v3_attach_to_vm(void *arg, uint64_t dist_start, size_t dist_size,
+			     uint64_t redist_start, size_t redist_size);
+void	vgic_v3_detach_from_vm(void *arg);
+void	vgic_v3_init(uint64_t ich_vtr_el2);
+void	vgic_v3_vminit(void *arg);
+void	vgic_v3_cpuinit(void *arg, bool last_vcpu);
+void 	vgic_v3_sync_hwstate(void *arg);
+
+void	vgic_v3_mmio_init(struct hyp *hyp);
+void	vgic_v3_mmio_destroy(struct hyp *hyp);
+
+int 	vgic_v3_vcpu_pending_irq(void *arg);
+int 	vgic_v3_inject_irq(void *arg, uint32_t irq,
+			   enum vgic_v3_irqtype irqtype);
+int 	vgic_v3_remove_irq(void *arg, uint32_t irq, bool ignore_state);
+
+void	vgic_v3_group_toggle_enabled(bool enabled, struct hyp *hyp);
+int	vgic_v3_irq_toggle_enabled(uint32_t irq, bool enabled,
+				   struct hyp *hyp, int vcpuid);
+
+DECLARE_CLASS(arm_vgic_driver);
+
+#endif /* !_VMM_VGIC_V3_H_ */
Index: sys/arm64/vmm/io/vgic_v3.c
===================================================================
--- /dev/null
+++ sys/arm64/vmm/io/vgic_v3.c
@@ -0,0 +1,1010 @@
+/*
+ * Copyright (C) 2015-2021 Mihai Carabas <mihai.carabas@gmail.com>
+ * Copyright (C) 2017-2019 Alexandru Elisei <alexandru.elisei@gmail.com>
+ * Copyright (C) 2017-2021 Darius Mihai <darius.mihai.m@gmail.com>
+ * Copyright (C) 2019-2021 Andrei Martin <andrei.cos.martin@gmail.com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+
+#include <sys/types.h>
+#include <sys/errno.h>
+#include <sys/systm.h>
+#include <sys/bus.h>
+#include <sys/rman.h>
+#include <sys/malloc.h>
+#include <sys/mutex.h>
+#include <sys/smp.h>
+#include <sys/bitstring.h>
+
+#include <vm/vm.h>
+#include <vm/pmap.h>
+
+#include <dev/ofw/openfirm.h>
+
+#include <machine/bus.h>
+#include <machine/bitops.h>
+#include <machine/cpufunc.h>
+#include <machine/cpu.h>
+#include <machine/param.h>
+#include <machine/pmap.h>
+#include <machine/vmparam.h>
+#include <machine/intr.h>
+#include <machine/vmm.h>
+#include <machine/vmm_instruction_emul.h>
+
+#include <arm/arm/gic_common.h>
+#include <arm/arm/generic_timer.h>
+#include <arm64/arm64/gic_v3_reg.h>
+#include <arm64/arm64/gic_v3_var.h>
+
+#include <arm64/vmm/hyp.h>
+#include <arm64/vmm/mmu.h>
+#include <arm64/vmm/arm64.h>
+
+#include "vgic_v3.h"
+#include "vgic_v3_reg.h"
+
+#define VGIC_V3_DEVNAME		"vgic"
+#define VGIC_V3_DEVSTR		"ARM Virtual Generic Interrupt Controller v3"
+
+#define	RES0			0UL
+
+#define	IRQBUF_SIZE_MIN		32
+#define	IRQBUF_SIZE_MAX		(1 << 10)
+
+#define	IRQ_SCHEDULED		(GIC_LAST_SPI + 1)
+
+#define	lr_pending(lr)		\
+    (ICH_LR_EL2_STATE(lr) == ICH_LR_EL2_STATE_PENDING)
+#define	lr_inactive(lr)		\
+    (ICH_LR_EL2_STATE(lr) == ICH_LR_EL2_STATE_INACTIVE)
+#define lr_active(lr)		\
+    (ICH_LR_EL2_STATE(lr) == ICH_LR_EL2_STATE_ACTIVE)
+#define lr_pending_active(lr)	\
+    (ICH_LR_EL2_STATE(lr) == ICH_LR_EL2_STATE_PENDING_ACTIVE)
+#define	lr_not_active(lr) (!lr_active(lr) && !lr_pending_active(lr))
+
+#define	lr_clear_irq(lr) ((lr) &= ~ICH_LR_EL2_STATE_MASK)
+
+MALLOC_DEFINE(M_VGIC_V3, "ARM VMM VGIC V3", "ARM VMM VGIC V3");
+
+struct vgic_v3_virt_features {
+	uint8_t min_prio;
+	size_t ich_lr_num;
+	size_t ich_ap0r_num;
+	size_t ich_ap1r_num;
+};
+
+struct vgic_v3_ro_regs {
+	uint32_t gicd_icfgr0;
+	uint32_t gicd_pidr2;
+	uint32_t gicd_typer;
+};
+
+struct vgic_v3_irq {
+	uint32_t irq;
+	enum vgic_v3_irqtype irqtype;
+	uint8_t enabled;
+	uint8_t priority;
+};
+
+#define	vip_to_lr(vip, lr)						\
+do {									\
+	lr = ICH_LR_EL2_STATE_PENDING;					\
+	lr |= ICH_LR_EL2_GROUP1;					\
+	lr |= (uint64_t)vip->priority << ICH_LR_EL2_PRIO_SHIFT;		\
+	lr |= vip->irq;							\
+} while (0)
+
+#define	lr_to_vip(lr, vip)						\
+do {									\
+	(vip)->irq = ICH_LR_EL2_VINTID(lr);				\
+	(vip)->priority = \
+	    (uint8_t)(((lr) & ICH_LR_EL2_PRIO_MASK) >> ICH_LR_EL2_PRIO_SHIFT); \
+} while (0)
+
+static struct vgic_v3_virt_features virt_features;
+static struct vgic_v3_ro_regs ro_regs;
+
+static struct gic_v3_softc *gic_sc;
+
+void
+vgic_v3_cpuinit(void *arg, bool last_vcpu)
+{
+	struct hypctx *hypctx = arg;
+	struct vgic_v3_cpu_if *cpu_if = &hypctx->vgic_cpu_if;
+	struct vgic_v3_redist *redist = &hypctx->vgic_redist;
+	uint64_t aff, vmpidr_el2;
+	int i;
+
+	vmpidr_el2 = hypctx->vmpidr_el2;
+	KASSERT(vmpidr_el2 != 0,
+	    ("Trying to init this CPU's vGIC before the vCPU"));
+	/*
+	 * Get affinity for the current CPU. The guest CPU affinity is taken
+	 * from VMPIDR_EL2. The Redistributor corresponding to this CPU is
+	 * the Redistributor with the same affinity from GICR_TYPER.
+	 */
+	aff = (CPU_AFF3(vmpidr_el2) << 24) | (CPU_AFF2(vmpidr_el2) << 16) |
+	    (CPU_AFF1(vmpidr_el2) << 8) | CPU_AFF0(vmpidr_el2);
+
+	/* Set up GICR_TYPER. */
+	redist->gicr_typer = aff << GICR_TYPER_AFF_SHIFT;
+	/* Redistributor doesn't support virtual or physical LPIS. */
+	redist->gicr_typer &= ~GICR_TYPER_VLPIS;
+	redist->gicr_typer &= ~GICR_TYPER_PLPIS;
+
+	if (last_vcpu)
+		/* Mark the last Redistributor */
+		redist->gicr_typer |= GICR_TYPER_LAST;
+
+	/*
+	 * Configure the Redistributor Control Register.
+	 *
+	 * ~GICR_CTLR_LPI_ENABLE: LPIs are disabled
+	 */
+	redist->gicr_ctlr = 0 & ~GICR_CTLR_LPI_ENABLE;
+
+	mtx_init(&cpu_if->lr_mtx, "VGICv3 ICH_LR_EL2 lock", NULL, MTX_SPIN);
+
+	/*
+	 * Configure the Interrupt Controller Hyp Control Register.
+	 *
+	 * ICH_HCR_EL2_En: enable virtual CPU interface.
+	 *
+	 * Maintenance interrupts are disabled.
+	 */
+	cpu_if->ich_hcr_el2 = ICH_HCR_EL2_En;
+
+	/*
+	 * Configure the Interrupt Controller Virtual Machine Control Register.
+	 *
+	 * ICH_VMCR_EL2_VPMR: lowest priority mask for the VCPU interface
+	 * ICH_VMCR_EL2_VBPR1_NO_PREEMPTION: disable interrupt preemption for
+	 * Group 1 interrupts
+	 * ICH_VMCR_EL2_VBPR0_NO_PREEMPTION: disable interrupt preemption for
+	 * Group 0 interrupts
+	 * ~ICH_VMCR_EL2_VEOIM: writes to EOI registers perform priority drop
+	 * and interrupt deactivation.
+	 * ICH_VMCR_EL2_VENG0: virtual Group 0 interrupts enabled.
+	 * ICH_VMCR_EL2_VENG1: virtual Group 1 interrupts enabled.
+	 */
+	cpu_if->ich_vmcr_el2 = \
+	    (virt_features.min_prio << ICH_VMCR_EL2_VPMR_SHIFT) | \
+	    ICH_VMCR_EL2_VBPR1_NO_PREEMPTION | ICH_VMCR_EL2_VBPR0_NO_PREEMPTION;
+	cpu_if->ich_vmcr_el2 &= ~ICH_VMCR_EL2_VEOIM;
+	cpu_if->ich_vmcr_el2 |= ICH_VMCR_EL2_VENG0 | ICH_VMCR_EL2_VENG1;
+
+	cpu_if->ich_lr_num = virt_features.ich_lr_num;
+	for (i = 0; i < cpu_if->ich_lr_num; i++)
+		cpu_if->ich_lr_el2[i] = 0UL;
+
+	cpu_if->ich_ap0r_num = virt_features.ich_ap0r_num;
+	cpu_if->ich_ap1r_num = virt_features.ich_ap1r_num;
+
+	cpu_if->irqbuf = malloc(IRQBUF_SIZE_MIN * sizeof(*cpu_if->irqbuf),
+	    M_VGIC_V3, M_WAITOK | M_ZERO);
+	cpu_if->irqbuf_size = IRQBUF_SIZE_MIN;
+	cpu_if->irqbuf_num = 0;
+}
+
+void
+vgic_v3_vminit(void *arg)
+{
+	struct hyp *hyp = arg;
+	struct vgic_v3_dist *dist = &hyp->vgic_dist;
+
+	/*
+	 * Configure the Distributor control register. The register resets to an
+	 * architecturally UNKNOWN value, so we reset to 0 to disable all
+	 * functionality controlled by the register.
+	 *
+	 * The exception is GICD_CTLR.DS, which is RA0/WI when the Distributor
+	 * supports one security state (ARM GIC Architecture Specification for
+	 * GICv3 and GICv4, p. 4-464)
+	 */
+	dist->gicd_ctlr = GICD_CTLR_DS;
+
+	dist->gicd_typer = ro_regs.gicd_typer;
+	dist->nirqs = GICD_TYPER_I_NUM(dist->gicd_typer);
+	dist->gicd_pidr2 = ro_regs.gicd_pidr2;
+
+	mtx_init(&dist->dist_mtx, "VGICv3 Distributor lock", NULL, MTX_SPIN);
+}
+
+int
+vgic_v3_attach_to_vm(void *arg, uint64_t dist_start, size_t dist_size,
+    uint64_t redist_start, size_t redist_size)
+{
+	struct hyp *hyp = arg;
+	struct vgic_v3_dist *dist = &hyp->vgic_dist;
+	struct vgic_v3_redist *redist;
+	int i;
+
+	/* Set the distributor address and size for trapping guest access. */
+	dist->start = dist_start;
+	dist->end = dist_start + dist_size;
+
+	for (i = 0; i < VM_MAXCPU; i++) {
+		redist = &hyp->ctx[i].vgic_redist;
+		/* Set the redistributor address and size. */
+		redist->start = redist_start;
+		redist->end = redist_start + redist_size;
+	}
+	vgic_v3_mmio_init(hyp);
+
+	hyp->vgic_attached = true;
+
+	return (0);
+}
+
+void
+vgic_v3_detach_from_vm(void *arg)
+{
+	struct hyp *hyp;
+	struct hypctx *hypctx;
+	struct vgic_v3_cpu_if *cpu_if;
+	int i;
+
+	hyp = arg;
+
+	for (i = 0; i < VM_MAXCPU; i++) {
+		hypctx = & hyp->ctx[i];
+		cpu_if = &hypctx->vgic_cpu_if;
+		free(cpu_if->irqbuf, M_VGIC_V3);
+	}
+
+	vgic_v3_mmio_destroy(hyp);
+}
+
+int
+vgic_v3_vcpu_pending_irq(void *arg)
+{
+	struct hypctx *hypctx = arg;
+	struct vgic_v3_cpu_if *cpu_if = &hypctx->vgic_cpu_if;
+
+	return (cpu_if->irqbuf_num);
+}
+
+/* Removes ALL instances of interrupt 'irq' */
+static int
+vgic_v3_irqbuf_remove_nolock(uint32_t irq, struct vgic_v3_cpu_if *cpu_if)
+{
+	size_t dest = 0;
+	size_t from = cpu_if->irqbuf_num;
+
+	while (dest < cpu_if->irqbuf_num) {
+		if (cpu_if->irqbuf[dest].irq == irq) {
+			for (from = dest + 1; from < cpu_if->irqbuf_num; from++) {
+				if (cpu_if->irqbuf[from].irq == irq)
+					continue;
+				cpu_if->irqbuf[dest++] = cpu_if->irqbuf[from];
+			}
+			cpu_if->irqbuf_num = dest;
+		} else {
+			dest++;
+		}
+	}
+
+	return (from - dest);
+}
+
+int
+vgic_v3_remove_irq(void *arg, uint32_t irq, bool ignore_state)
+{
+        struct hypctx *hypctx = arg;
+	struct vgic_v3_cpu_if *cpu_if = &hypctx->vgic_cpu_if;
+	struct vgic_v3_dist *dist = &hypctx->hyp->vgic_dist;
+	size_t i;
+
+	if (irq >= dist->nirqs) {
+		eprintf("Malformed IRQ %u.\n", irq);
+		return (1);
+	}
+
+	mtx_lock_spin(&cpu_if->lr_mtx);
+
+	for (i = 0; i < cpu_if->ich_lr_num; i++) {
+		if (ICH_LR_EL2_VINTID(cpu_if->ich_lr_el2[i]) == irq &&
+		    (lr_not_active(cpu_if->ich_lr_el2[i]) || ignore_state))
+			lr_clear_irq(cpu_if->ich_lr_el2[i]);
+	}
+	vgic_v3_irqbuf_remove_nolock(irq, cpu_if);
+
+	mtx_unlock_spin(&cpu_if->lr_mtx);
+
+	return (0);
+}
+
+static struct vgic_v3_irq *
+vgic_v3_irqbuf_add_nolock(struct vgic_v3_cpu_if *cpu_if)
+{
+	struct vgic_v3_irq *new_irqbuf, *old_irqbuf;
+	size_t new_size;
+
+	if (cpu_if->irqbuf_num == cpu_if->irqbuf_size) {
+		/* Double the size of the buffered interrupts list */
+		new_size = cpu_if->irqbuf_size << 1;
+		if (new_size > IRQBUF_SIZE_MAX)
+			return (NULL);
+
+		new_irqbuf = NULL;
+		/* TODO: malloc sleeps here and causes a panic */
+		while (new_irqbuf == NULL)
+			new_irqbuf = malloc(new_size * sizeof(*cpu_if->irqbuf),
+			    M_VGIC_V3, M_NOWAIT | M_ZERO);
+		memcpy(new_irqbuf, cpu_if->irqbuf,
+		    cpu_if->irqbuf_size * sizeof(*cpu_if->irqbuf));
+
+		old_irqbuf = cpu_if->irqbuf;
+		cpu_if->irqbuf = new_irqbuf;
+		cpu_if->irqbuf_size = new_size;
+		free(old_irqbuf, M_VGIC_V3);
+	}
+
+	cpu_if->irqbuf_num++;
+
+	return (&cpu_if->irqbuf[cpu_if->irqbuf_num - 1]);
+}
+
+static bool
+vgic_v3_int_target(uint32_t irq, struct hypctx *hypctx)
+{
+	struct vgic_v3_dist *dist = &hypctx->hyp->vgic_dist;
+	struct vgic_v3_redist *redist = &hypctx->vgic_redist;
+	uint64_t irouter;
+	uint64_t aff;
+	uint32_t irq_off, irq_mask;
+	int n;
+
+	if (irq <= GIC_LAST_PPI)
+		return (true);
+
+	/* XXX Affinity routing disabled not implemented */
+	if (!aff_routing_en(dist))
+		return (true);
+
+	irq_off = irq % 32;
+	irq_mask = 1 << irq_off;
+	n = irq / 32;
+
+	irouter = dist->gicd_irouter[irq];
+	/* Check if 1-of-N routing is active */
+	if (irouter & GICD_IROUTER_IRM)
+		/* Check if the VCPU is participating */
+		return (redist->gicr_ctlr & GICR_CTLR_DPG1NS ? true : false);
+
+	aff = redist->gicr_typer >> GICR_TYPER_AFF_SHIFT;
+	/* Affinity in format for comparison with irouter */
+	aff = GICR_TYPER_AFF0(redist->gicr_typer) | \
+	    (GICR_TYPER_AFF1(redist->gicr_typer) << 8) | \
+	    (GICR_TYPER_AFF2(redist->gicr_typer) << 16) | \
+	    (GICR_TYPER_AFF3(redist->gicr_typer) << 32);
+	if ((irouter & aff) == aff)
+		return (true);
+	else
+		return (false);
+}
+
+static uint8_t
+vgic_v3_get_priority(uint32_t irq, struct hypctx *hypctx)
+{
+	struct vgic_v3_dist *dist = &hypctx->hyp->vgic_dist;
+	struct vgic_v3_redist *redist = &hypctx->vgic_redist;
+	size_t n;
+	uint32_t off, mask;
+	uint8_t priority;
+
+	n = irq / 4;
+	off = n % 4;
+	mask = 0xff << off;
+	/*
+	 * When affinity routing is enabled, the Redistributor is used for
+	 * SGIs and PPIs and the Distributor for SPIs. When affinity routing
+	 * is not enabled, the Distributor registers are used for all
+	 * interrupts.
+	 */
+	if (aff_routing_en(dist) && (n <= 7))
+		priority = (redist->gicr_ipriorityr[n] & mask) >> off;
+	else
+		priority = (dist->gicd_ipriorityr[n] & mask) >> off;
+
+	return (priority);
+}
+
+static bool
+vgic_v3_intid_enabled(uint32_t irq, struct hypctx *hypctx)
+{
+	struct vgic_v3_dist *dist;
+	struct vgic_v3_redist *redist;
+	uint32_t irq_off, irq_mask;
+	int n;
+
+	irq_off = irq % 32;
+	irq_mask = 1 << irq_off;
+	n = irq / 32;
+
+	if (irq <= GIC_LAST_PPI) {
+		redist = &hypctx->vgic_redist;
+		if (!(redist->gicr_ixenabler0 & irq_mask))
+			return (false);
+	} else {
+		dist = &hypctx->hyp->vgic_dist;
+		if (!(dist->gicd_ixenabler[n] & irq_mask))
+			return (false);
+	}
+
+	return (true);
+}
+
+static inline bool
+dist_group_enabled(struct vgic_v3_dist *dist)
+{
+	return ((dist->gicd_ctlr & GICD_CTLR_G1A) != 0);
+}
+
+int
+vgic_v3_inject_irq(void *arg, uint32_t irq, enum vgic_v3_irqtype irqtype)
+{
+        struct hypctx *hypctx = arg;
+	struct vgic_v3_dist *dist = &hypctx->hyp->vgic_dist;
+	struct vgic_v3_cpu_if *cpu_if = &hypctx->vgic_cpu_if;
+	struct vgic_v3_irq *new_irqbuf, *old_irqbuf;
+	struct vgic_v3_irq *vip;
+	int error;
+	int i;
+	size_t new_size;
+	uint8_t priority;
+	bool enabled;
+
+	if (irq >= dist->nirqs || irqtype >= VGIC_IRQ_INVALID) {
+		eprintf("Malformed IRQ %u.\n", irq);
+		return (1);
+	}
+
+	/*
+	 * TODO: Not sure if this will be reached by ONE thread at a time
+	 * It can interfere with timer interrupts.
+	 */
+	if (cpu_if->irqbuf_num == cpu_if->irqbuf_size &&
+	    irqtype != VGIC_IRQ_CLK) {
+		/* Double the size of the buffered interrupts list */
+		new_size = cpu_if->irqbuf_size << 1;
+		if (new_size > IRQBUF_SIZE_MAX) {
+			eprintf("Error adding IRQ %u to the IRQ buffer.\n", irq);
+			error = 1;
+			goto out;
+		}
+
+		new_irqbuf = NULL;
+		while (new_irqbuf == NULL)
+			new_irqbuf = malloc(new_size * sizeof(*cpu_if->irqbuf),
+			    M_VGIC_V3, M_NOWAIT | M_ZERO);
+		memcpy(new_irqbuf, cpu_if->irqbuf,
+		    cpu_if->irqbuf_size * sizeof(*cpu_if->irqbuf));
+
+		old_irqbuf = cpu_if->irqbuf;
+		cpu_if->irqbuf = new_irqbuf;
+		cpu_if->irqbuf_size = new_size;
+		free(old_irqbuf, M_VGIC_V3);
+	}
+
+	error = 0;
+	mtx_lock_spin(&dist->dist_mtx);
+
+	enabled = dist_group_enabled(&hypctx->hyp->vgic_dist) &&
+	    vgic_v3_intid_enabled(irq, hypctx) &&
+	    vgic_v3_int_target(irq, hypctx);
+	priority = vgic_v3_get_priority(irq, hypctx);
+
+	mtx_lock_spin(&cpu_if->lr_mtx);
+
+	/*
+	 * If the guest is running behind timer interrupts, don't swamp it with
+	 * one interrupt after another. However, if the timer interrupt is being
+	 * serviced by the guest (it is in a state other than pending, either
+	 * active or pending and active), then add it to the buffer to be
+	 * injected later. Otherwise, the timer would stop working because we
+	 * disable the timer in the host interrupt handler.
+	 */
+	if (irqtype == VGIC_IRQ_CLK) {
+		for (i = 0; i < cpu_if->ich_lr_num; i++)
+			if (ICH_LR_EL2_VINTID(cpu_if->ich_lr_el2[i]) == irq &&
+			    lr_pending(cpu_if->ich_lr_el2[i]))
+				goto out;
+		for (i = 0; i < cpu_if->irqbuf_num; i++)
+			if (cpu_if->irqbuf[i].irq == irq)
+				goto out;
+	}
+
+	cpu_if->irqbuf_num++;
+	vip = &cpu_if->irqbuf[cpu_if->irqbuf_num - 1];
+
+	vip->irq = irq;
+	vip->irqtype = irqtype;
+	vip->enabled = enabled;
+	vip->priority = priority;
+
+out:
+	mtx_unlock_spin(&cpu_if->lr_mtx);
+	mtx_unlock_spin(&dist->dist_mtx);
+
+	return (error);
+}
+
+void
+vgic_v3_group_toggle_enabled(bool enabled, struct hyp *hyp)
+{
+	struct hypctx *hypctx;
+	struct vgic_v3_cpu_if *cpu_if;
+	struct vgic_v3_irq *vip;
+	int i, j;
+
+	for (i = 0; i < VM_MAXCPU; i++) {
+		hypctx = &hyp->ctx[i];
+		cpu_if = &hypctx->vgic_cpu_if;
+
+		mtx_lock_spin(&cpu_if->lr_mtx);
+
+		for (j = 0; j < cpu_if->irqbuf_num; j++) {
+			vip = &cpu_if->irqbuf[j];
+			if (!enabled)
+				vip->enabled = 0;
+			else if (vgic_v3_intid_enabled(vip->irq, hypctx))
+				vip->enabled = 1;
+		}
+
+		mtx_unlock_spin(&cpu_if->lr_mtx);
+	}
+}
+
+static int
+vgic_v3_irq_toggle_enabled_vcpu(uint32_t irq, bool enabled,
+    struct vgic_v3_cpu_if *cpu_if)
+{
+	int i;
+
+	mtx_lock_spin(&cpu_if->lr_mtx);
+
+	if (enabled) {
+		/*
+		 * Enable IRQs that were injected when the interrupt ID was
+		 * disabled
+		 */
+		for (i = 0; i < cpu_if->irqbuf_num; i++)
+			if (cpu_if->irqbuf[i].irq == irq)
+				cpu_if->irqbuf[i].enabled = true;
+	} else {
+		/* Remove the disabled IRQ from the LR regs if it is pending */
+		for (i = 0; i < cpu_if->ich_lr_num; i++)
+			if (lr_pending(cpu_if->ich_lr_el2[i]) &&
+			    ICH_LR_EL2_VINTID(cpu_if->ich_lr_el2[i]) == irq)
+				lr_clear_irq(cpu_if->ich_lr_el2[i]);
+
+		/* Remove the IRQ from the interrupt buffer */
+		vgic_v3_irqbuf_remove_nolock(irq, cpu_if);
+	}
+
+	mtx_unlock_spin(&cpu_if->lr_mtx);
+
+	return (0);
+}
+
+int
+vgic_v3_irq_toggle_enabled(uint32_t irq, bool enabled,
+    struct hyp *hyp, int vcpuid)
+{
+	struct vgic_v3_cpu_if *cpu_if;
+	int error;
+	int i;
+
+	if (irq <= GIC_LAST_PPI) {
+		cpu_if = &hyp->ctx[vcpuid].vgic_cpu_if;
+		return (vgic_v3_irq_toggle_enabled_vcpu(irq, enabled, cpu_if));
+	} else {
+		/* TODO: Update irqbuf for all VCPUs, not just VCPU 0 */
+		for (i = 0; i < VM_MAXCPU; i++) {
+			cpu_if = &hyp->ctx[i].vgic_cpu_if;
+			error = vgic_v3_irq_toggle_enabled_vcpu(irq, enabled, cpu_if);
+			if (error)
+				return (error);
+		}
+	}
+
+	return (0);
+}
+
+static int
+irqbuf_highest_priority(struct vgic_v3_cpu_if *cpu_if, int start, int end,
+    struct hypctx *hypctx)
+{
+	uint32_t irq;
+	int i, max_idx;
+	uint8_t priority, max_priority;
+	uint8_t vpmr;
+
+	vpmr = (cpu_if->ich_vmcr_el2 & ICH_VMCR_EL2_VPMR_MASK) >> \
+	    ICH_VMCR_EL2_VPMR_SHIFT;
+
+	max_idx = -1;
+	max_priority = 0xff;
+	for (i = start; i < end; i++) {
+		irq = cpu_if->irqbuf[i].irq;
+		/* Check that the interrupt hasn't been already scheduled */
+		if (irq == IRQ_SCHEDULED)
+			continue;
+
+		if (!dist_group_enabled(&hypctx->hyp->vgic_dist))
+			continue;
+		if (!vgic_v3_int_target(irq, hypctx))
+			continue;
+
+		priority = cpu_if->irqbuf[i].priority;
+		if (priority >= vpmr)
+			continue;
+
+		if (max_idx == -1) {
+			max_idx = i;
+			max_priority = priority;
+		} else if (priority > max_priority) {
+			max_idx = i;
+			max_priority = priority;
+		} else if (priority == max_priority &&
+		    cpu_if->irqbuf[i].irqtype < cpu_if->irqbuf[max_idx].irqtype) {
+			max_idx = i;
+			max_priority = priority;
+		}
+	}
+
+	return (max_idx);
+}
+
+static inline bool
+cpu_if_group_enabled(struct vgic_v3_cpu_if *cpu_if)
+{
+	return ((cpu_if->ich_vmcr_el2 & ICH_VMCR_EL2_VENG1) != 0);
+}
+
+static inline int
+irqbuf_next_enabled(struct vgic_v3_irq *irqbuf, int start, int end,
+    struct hypctx *hypctx, struct vgic_v3_cpu_if *cpu_if)
+{
+	int i;
+
+	if (!cpu_if_group_enabled(cpu_if))
+		return (-1);
+
+	for (i = start; i < end; i++)
+		if (irqbuf[i].enabled)
+			break;
+
+	if (i < end)
+		return (i);
+	else
+		return (-1);
+}
+
+static inline int
+vgic_v3_lr_next_empty(uint32_t ich_elrsr_el2, int start, int end)
+{
+	int i;
+
+	for (i = start; i < end; i++)
+		if (ich_elrsr_el2 & (1U << i))
+			break;
+
+	if (i < end)
+		return (i);
+	else
+		return (-1);
+}
+
+/*
+ * There are two cases in which the virtual timer interrupt is in the list
+ * registers:
+ *
+ * 1. The virtual interrupt is active. The guest is executing the interrupt
+ * handler, and the timer fired after it programmed the new alarm time but
+ * before the guest had the chance to write to the EOIR1 register.
+ *
+ * 2. The virtual interrupt is pending and active. The timer interrupt is level
+ * sensitive. The guest wrote to the EOR1 register, but the write hasn't yet
+ * propagated to the timer.
+ *
+ * Injecting the interrupt in these cases would mean that another timer
+ * interrupt is asserted as soon as the guest writes to the EOIR1 register (or
+ * very shortly thereafter, in the pending and active scenario). This can lead
+ * to the guest servicing timer interrupts one after the other and doing
+ * nothing else. So do not inject a timer interrupt while one is active pending.
+ * The buffered timer interrupts will be injected after the next world switch in
+ * this case.
+ */
+static bool
+clk_irq_in_lr(struct vgic_v3_cpu_if *cpu_if)
+{
+	uint64_t lr;
+	int i;
+
+	for (i = 0; i < cpu_if->ich_lr_num; i++) {
+		lr = cpu_if->ich_lr_el2[i];
+		if (ICH_LR_EL2_VINTID(lr) == GT_VIRT_IRQ &&
+		    (lr_active(lr) || lr_pending_active(lr)))
+			return (true);
+	}
+
+	return (false);
+}
+
+static void
+vgic_v3_irqbuf_to_lr(struct hypctx *hypctx, struct vgic_v3_cpu_if *cpu_if,
+    bool by_priority)
+{
+	struct vgic_v3_irq *vip;
+	int irqbuf_idx;
+	int lr_idx;
+	bool clk_present;
+
+	clk_present = clk_irq_in_lr(cpu_if);
+
+	irqbuf_idx = 0;
+	lr_idx = 0;
+	for (;;) {
+		if (by_priority)
+			irqbuf_idx = irqbuf_highest_priority(cpu_if,
+			    irqbuf_idx, cpu_if->irqbuf_num, hypctx);
+		else
+			irqbuf_idx = irqbuf_next_enabled(cpu_if->irqbuf,
+			    irqbuf_idx, cpu_if->irqbuf_num, hypctx, cpu_if);
+		if (irqbuf_idx == -1)
+			break;
+
+		lr_idx = vgic_v3_lr_next_empty(cpu_if->ich_elrsr_el2,
+		    lr_idx, cpu_if->ich_lr_num);
+		if (lr_idx == -1)
+			break;
+
+		vip = &cpu_if->irqbuf[irqbuf_idx];
+		if (vip->irqtype == VGIC_IRQ_CLK && clk_present) {
+			/* Skip injecting timer interrupt. */
+			irqbuf_idx++;
+			continue;
+		}
+
+		vip_to_lr(vip, cpu_if->ich_lr_el2[lr_idx]);
+		vip->irq = IRQ_SCHEDULED;
+		irqbuf_idx++;
+		lr_idx++;
+	}
+
+	/* Remove all interrupts that were just scheduled. */
+	vgic_v3_irqbuf_remove_nolock(IRQ_SCHEDULED, cpu_if);
+}
+
+void
+vgic_v3_sync_hwstate(void *arg)
+{
+	struct hypctx *hypctx;
+	struct vgic_v3_cpu_if *cpu_if;
+	int lr_free;
+	int i;
+	bool by_priority;
+	bool en_underflow_intr;
+
+	hypctx = arg;
+	cpu_if =  &hypctx->vgic_cpu_if;
+
+	/*
+	 * All Distributor writes have been executed at this point, do not
+	 * protect Distributor reads with a mutex.
+	 *
+	 * This is callled with all interrupts disabled, so there is no need for
+	 * a List Register spinlock either.
+	 */
+	mtx_lock_spin(&cpu_if->lr_mtx);
+
+	/* Exit early if there are no buffered interrupts */
+	if (cpu_if->irqbuf_num == 0) {
+		cpu_if->ich_hcr_el2 &= ~ICH_HCR_EL2_UIE;
+		goto out;
+	}
+
+	/* Test if all buffered interrupts can fit in the LR regs */
+	lr_free = 0;
+	for (i = 0; i < cpu_if->ich_lr_num; i++)
+		if (cpu_if->ich_elrsr_el2 & (1U << i))
+			lr_free++;
+
+	by_priority = (lr_free <= cpu_if->ich_lr_num);
+	vgic_v3_irqbuf_to_lr(hypctx, cpu_if, by_priority);
+
+	lr_free = 0;
+	for (i = 0; i < cpu_if->ich_lr_num; i++)
+		if (cpu_if->ich_elrsr_el2 & (1U << i))
+			lr_free++;
+
+	en_underflow_intr = false;
+	if (cpu_if->irqbuf_num > 0)
+		for (i = 0; i < cpu_if->irqbuf_num; i++)
+			if (cpu_if->irqbuf[i].irqtype != VGIC_IRQ_CLK) {
+				en_underflow_intr = true;
+				break;
+			}
+	if (en_underflow_intr) {
+		cpu_if->ich_hcr_el2 |= ICH_HCR_EL2_UIE;
+	} else {
+		cpu_if->ich_hcr_el2 &= ~ICH_HCR_EL2_UIE;
+	}
+
+out:
+	mtx_unlock_spin(&cpu_if->lr_mtx);
+}
+
+static void
+vgic_v3_get_ro_regs()
+{
+	/* GICD_ICFGR0 configures SGIs and it is read-only. */
+	ro_regs.gicd_icfgr0 = gic_d_read(gic_sc, 4, GICD_ICFGR(0));
+
+	/*
+	 * Configure the GIC type register for the guest.
+	 *
+	 * ~GICD_TYPER_SECURITYEXTN: disable security extensions.
+	 * ~GICD_TYPER_DVIS: direct injection for virtual LPIs not supported.
+	 * ~GICD_TYPER_LPIS: LPIs not supported.
+	 */
+	ro_regs.gicd_typer = gic_d_read(gic_sc, 4, GICD_TYPER);
+	ro_regs.gicd_typer &= ~GICD_TYPER_SECURITYEXTN;
+	ro_regs.gicd_typer &= ~GICD_TYPER_DVIS;
+	ro_regs.gicd_typer &= ~GICD_TYPER_LPIS;
+
+	/*
+	 * XXX. Guest reads of GICD_PIDR2 should return the same ArchRev as
+	 * specified in the guest FDT.
+	 */
+	ro_regs.gicd_pidr2 = gic_d_read(gic_sc, 4, GICD_PIDR2);
+}
+
+void
+vgic_v3_init(uint64_t ich_vtr_el2) {
+	uint32_t pribits, prebits;
+
+	KASSERT(gic_sc != NULL, ("GIC softc is NULL"));
+
+	vgic_v3_get_ro_regs();
+
+	pribits = ICH_VTR_EL2_PRIBITS(ich_vtr_el2);
+	switch (pribits) {
+	case 5:
+		virt_features.min_prio = 0xf8;
+	case 6:
+		virt_features.min_prio = 0xfc;
+	case 7:
+		virt_features.min_prio = 0xfe;
+	case 8:
+		virt_features.min_prio = 0xff;
+	}
+
+	prebits = ICH_VTR_EL2_PREBITS(ich_vtr_el2);
+	switch (prebits) {
+	case 5:
+		virt_features.ich_ap0r_num = 1;
+		virt_features.ich_ap1r_num = 1;
+	case 6:
+		virt_features.ich_ap0r_num = 2;
+		virt_features.ich_ap1r_num = 2;
+	case 7:
+		virt_features.ich_ap0r_num = 4;
+		virt_features.ich_ap1r_num = 4;
+	}
+
+	virt_features.ich_lr_num = ICH_VTR_EL2_LISTREGS(ich_vtr_el2);
+}
+
+static int
+vgic_v3_maint_intr(void *arg)
+{
+	printf("MAINTENANCE INTERRUPT\n");
+
+	return (FILTER_HANDLED);
+}
+
+/*
+ * TODO: Look at how gic_v3_fdt.c adds the gic driver.
+ *
+ * 1. In probe they set the device description.
+ * 2. In attach they create children devices for the GIC (in
+ * gic_v3_ofw_bus_attach).
+ * 3. There is no identify function being called.
+ *
+ * On the other hand, in man 9 DEVICE_IDENTIFY it is stated that a new device
+ * instance is created by the identify function.
+ */
+
+static void
+arm_vgic_identify(driver_t *driver, device_t parent)
+{
+	device_t dev;
+
+	if (strcmp(device_get_name(parent), "gic") == 0) {
+		dev = device_find_child(parent, VGIC_V3_DEVNAME, -1);
+		if (!dev)
+			dev = device_add_child(parent, VGIC_V3_DEVNAME, -1);
+		gic_sc = device_get_softc(parent);
+	}
+}
+
+static int
+arm_vgic_probe(device_t dev)
+{
+	device_t parent;
+
+	parent = device_get_parent(dev);
+	if (strcmp(device_get_name(parent), "gic") == 0) {
+		device_set_desc(dev, VGIC_V3_DEVSTR);
+		return (BUS_PROBE_DEFAULT);
+	}
+
+	return (ENXIO);
+}
+
+static int
+arm_vgic_attach(device_t dev)
+{
+	int error;
+
+	error = gic_v3_setup_maint_intr(vgic_v3_maint_intr, NULL, NULL);
+	if (error)
+		device_printf(dev, "Could not setup maintenance interrupt\n");
+
+	return (0);
+}
+
+static int
+arm_vgic_detach(device_t dev)
+{
+	int error;
+
+	error = gic_v3_teardown_maint_intr();
+	if (error)
+		device_printf(dev, "Could not teardown maintenance interrupt\n");
+
+	gic_sc = NULL;
+
+	return (0);
+}
+
+static device_method_t arm_vgic_methods[] = {
+	DEVMETHOD(device_identify,	arm_vgic_identify),
+	DEVMETHOD(device_probe,		arm_vgic_probe),
+	DEVMETHOD(device_attach,	arm_vgic_attach),
+	DEVMETHOD(device_detach,	arm_vgic_detach),
+	DEVMETHOD_END
+};
+
+DEFINE_CLASS_1(vgic, arm_vgic_driver, arm_vgic_methods, 0, gic_v3_driver);
+
+static devclass_t arm_vgic_devclass;
+DRIVER_MODULE(vgic, gic, arm_vgic_driver, arm_vgic_devclass, 0, 0);
Index: sys/arm64/vmm/io/vgic_v3_mmio.c
===================================================================
--- /dev/null
+++ sys/arm64/vmm/io/vgic_v3_mmio.c
@@ -0,0 +1,1206 @@
+#include <sys/malloc.h>
+#include <sys/mutex.h>
+#include <sys/cpuset.h>
+
+#include <machine/vmm.h>
+#include <machine/vmm_instruction_emul.h>
+#include <arm64/vmm/arm64.h>
+
+#include "vgic_v3.h"
+
+#define	DEBUG 0
+
+#define	GICR_FRAME_RD	0
+#define	GICR_FRAME_SGI	GICR_RD_BASE_SIZE
+#define	GICR_FRAMES_END	(GICR_FRAME_SGI + GICR_SGI_BASE_SIZE)
+
+#define	RES0	(0UL)
+#define	RES1	(~0UL)
+
+#define redist_simple_read(src, destp, vm, vcpuid)			\
+do {									\
+	struct hyp *hyp = vm_get_cookie(vm);				\
+	struct vgic_v3_redist *redist = &hyp->ctx[vcpuid].vgic_redist;	\
+	*destp = redist->src;						\
+} while (0);
+
+#define redist_simple_write(src, dest, vm, vcpuid)			\
+do {									\
+	struct hyp *hyp = vm_get_cookie(vm);				\
+	struct vgic_v3_redist *redist = &hyp->ctx[vcpuid].vgic_redist;	\
+	redist->dest = src;						\
+} while (0);
+
+#define	reg32_idx(ipa, region)		(((ipa) - (region).start) / 4)
+#define	reg64_idx(ipa, region)		(((ipa) - (region).start) / 8)
+
+#define	reg_changed(new, old, mask)	(((new) & (mask)) != ((old) & (mask)))
+
+/* The names should always be in ascending order of memory address */
+enum vgic_mmio_region_name {
+	/* Distributor registers */
+	VGIC_GICD_CTLR,
+	VGIC_GICD_TYPER,
+	VGIC_GICD_IIDR,
+	VGIC_GICD_TYPER2,
+	VGIC_GICD_IGROUPR,
+	VGIC_GICD_ISENABLER,
+	VGIC_GICD_ICENABLER,
+	VGIC_GICD_ISACTIVER,
+	VGIC_GICD_ICACTIVER,
+	VGIC_GICD_IPRIORITYR,
+	VGIC_GICD_ICFGR,
+	VGIC_GICD_IROUTER,
+	VGIC_GICD_PIDR2,
+	/* Redistributor registers */
+	VGIC_GICR_CTLR,
+	VGIC_GICR_TYPER,
+	VGIC_GICR_WAKER,
+	VGIC_GICR_PIDR2,
+	VGIC_GICR_IGROUPR0,
+	VGIC_GICR_ISENABLER0,
+	VGIC_GICR_ICENABLER0,
+	VGIC_GICR_ICACTIVER0,
+	VGIC_GICR_IPRIORITYR,
+	VGIC_GICR_ICFGR0,
+	VGIC_GICR_ICFGR1,
+	VGIC_MMIO_REGIONS_NUM,
+};
+/*
+ * Necessary for calculating the number of Distributor and Redistributor
+ * regions emulated.
+ */
+#define	FIRST_REDIST_MMIO_REGION	VGIC_GICR_CTLR
+
+MALLOC_DEFINE(M_VGIC_V3_MMIO, "ARM VMM VGIC DIST MMIO", "ARM VMM VGIC DIST MMIO");
+
+static int
+dist_ctlr_read(void *vm, int vcpuid, uint64_t fault_ipa, uint64_t *rval,
+    int size, void *arg)
+{
+	struct hyp *hyp;
+	struct vgic_v3_dist *dist;
+	bool *retu = arg;
+
+	hyp = vm_get_cookie(vm);
+	dist = &hyp->vgic_dist;
+
+	mtx_lock_spin(&dist->dist_mtx);
+	*rval = dist->gicd_ctlr;
+	mtx_unlock_spin(&dist->dist_mtx);
+
+	/* Writes are never pending */
+	*rval &= ~GICD_CTLR_RWP;
+
+	*retu = false;
+	return (0);
+}
+
+static int
+dist_ctlr_write(void *vm, int vcpuid, uint64_t fault_ipa, uint64_t wval,
+    int size, void *arg)
+{
+	struct hyp *hyp;
+	struct vgic_v3_dist *dist;
+	bool *retu = arg;
+
+	hyp = vm_get_cookie(vm);
+	dist = &hyp->vgic_dist;
+	/* GICD_CTLR.DS is RAO/WI when only one security state is supported. */
+	wval |= GICD_CTLR_DS;
+
+	mtx_lock_spin(&dist->dist_mtx);
+
+	if (reg_changed(wval, dist->gicd_ctlr, GICD_CTLR_G1A)) {
+		if (!(wval & GICD_CTLR_G1A))
+			vgic_v3_group_toggle_enabled(false, hyp);
+		else
+			vgic_v3_group_toggle_enabled(true, hyp);
+	}
+	dist->gicd_ctlr = wval;
+
+	mtx_unlock_spin(&dist->dist_mtx);
+
+	*retu = false;
+	return (0);
+}
+
+static int
+dist_typer_read(void *vm, int vcpuid, uint64_t fault_ipa, uint64_t *rval,
+    int size, void *arg)
+{
+	struct hyp *hyp;
+	struct vgic_v3_dist *dist;
+	bool *retu = arg;
+
+	hyp = vm_get_cookie(vm);
+	dist = &hyp->vgic_dist;
+
+	*rval = dist->gicd_typer;
+
+	*retu = false;
+	return (0);
+}
+
+static int
+dist_typer_write(void *vm, int vcpuid, uint64_t fault_ipa, uint64_t wval,
+    int size, void *arg)
+{
+	bool *retu = arg;
+
+	eprintf("Warning: Attempted write to read-only register GICD_TYPER.\n");
+
+	*retu = false;
+	return (0);
+}
+
+static int
+dist_iidr_read(void *vm, int vcpuid, uint64_t fault_ipa, uint64_t *rval,
+    int size, void *arg)
+{
+	bool *retu = arg;
+
+	*rval = (0x42 << GICD_IIDR_PROD_SHIFT);
+	*rval |= (1 << GICD_IIDR_REV_SHIFT);
+	*rval |= (0x43b << GICD_IIDR_IMPL_SHIFT);
+
+	*retu = false;
+	return (0);
+}
+
+static int
+dist_iidr_write(void *vm, int vcpuid, uint64_t fault_ipa, uint64_t wval,
+    int size, void *arg)
+{
+	bool *retu = arg;
+
+	*retu = false;
+	return (0);
+}
+
+static int
+dist_typer2_read(void *vm, int vcpuid, uint64_t fault_ipa, uint64_t *rval,
+    int size, void *arg)
+{
+	bool *retu = arg;
+
+	*rval = RES0;
+
+	*retu = false;
+	return (0);
+}
+
+static int
+dist_typer2_write(void *vm, int vcpuid, uint64_t fault_ipa, uint64_t wval,
+    int size, void *arg)
+{
+	bool *retu = arg;
+
+	*retu = false;
+	return (0);
+}
+
+/* Only group 1 interrupts are supported. Treat IGROUPR as RA0/WI. */
+static int
+dist_igroupr_read(void *vm, int vcpuid, uint64_t fault_ipa, uint64_t *rval,
+    int size, void *arg)
+{
+	struct hyp *hyp;
+	struct vgic_v3_dist *dist;
+	int n;
+	bool *retu = arg;
+
+	hyp = vm_get_cookie(vm);
+	dist = &hyp->vgic_dist;
+
+	n = reg32_idx(fault_ipa, hyp->vgic_mmio_regions[VGIC_GICD_IGROUPR]);
+	/*
+	 * GIC Architecture specification, p 8-477: "For SGIs and PPIs: When
+	 * ARE is 1 for the Security state of an interrupt, the field for that
+	 * interrupt is RES0 and an implementation is permitted to make the
+	 * field RAZ/WI in this case".
+	 */
+	if (n == 0 && aff_routing_en(dist)) {
+		*rval = RES0;
+	} else {
+		*rval = RES1;
+	}
+
+	*retu = false;
+	return (0);
+}
+
+/* Only group 1 interrupts are supported. Treat IGROUPR as RA0/WI. */
+static int
+dist_igroupr_write(void *vm, int vcpuid, uint64_t fault_ipa, uint64_t wval,
+    int size, void *arg)
+{
+	bool *retu = arg;
+
+	*retu = false;
+	return (0);
+}
+
+static void
+mmio_update_int_enabled(uint32_t new_ixenabler, uint32_t old_ixenabler,
+    uint32_t irq, struct hyp *hyp, int vcpuid)
+{
+	uint32_t irq_mask;
+	int error;
+	int i;
+	bool enabled;
+
+	irq_mask = 0x1;
+	for (i = 0; i < 32; i++) {
+		if (reg_changed(new_ixenabler, old_ixenabler, irq_mask)) {
+			enabled = ((new_ixenabler & irq_mask) != 0);
+			error = vgic_v3_irq_toggle_enabled(irq, enabled,
+			    hyp, vcpuid);
+			if (error)
+				eprintf("Warning: error while toggling IRQ %u\n", irq);
+		}
+		irq++;
+		irq_mask <<= 1;
+	}
+}
+
+static int
+dist_ixenabler_read(void *vm, int vcpuid, uint64_t fault_ipa, uint64_t *rval,
+    void *arg, enum vgic_mmio_region_name name)
+{
+	struct hyp *hyp;
+	struct vgic_v3_dist *dist;
+	size_t n;
+	bool *retu = arg;
+
+	hyp = vm_get_cookie(vm);
+	dist = &hyp->vgic_dist;
+
+	n = reg32_idx(fault_ipa, hyp->vgic_mmio_regions[name]);
+	/*
+	 * GIC Architecture specification, p 8-471: "When ARE is 1 for the
+	 * Security state of an interrupt, the field for that interrupt is RES0
+	 * and an implementation is permitted to* make the field RAZ/WI in this
+	 * case".
+	 */
+	if (n == 0 && aff_routing_en(dist)) {
+		*rval = RES0;
+		goto out;
+	}
+
+	mtx_lock_spin(&dist->dist_mtx);
+	*rval = dist->gicd_ixenabler[n];
+	mtx_unlock_spin(&dist->dist_mtx);
+
+out:
+	*retu = false;
+	return (0);
+}
+
+static int
+dist_ixenabler_write(void *vm, int vcpuid, uint64_t fault_ipa, uint64_t wval,
+    void *arg, enum vgic_mmio_region_name name)
+{
+	struct hyp *hyp;
+	struct vgic_v3_dist *dist;
+	uint32_t old_ixenabler;
+	size_t n;
+	bool *retu = arg;
+
+	hyp = vm_get_cookie(vm);
+	dist = &hyp->vgic_dist;
+
+	n = reg32_idx(fault_ipa, hyp->vgic_mmio_regions[name]);
+	/* See dist_ixenabler_read() */
+	if (n == 0 && aff_routing_en(dist))
+		/* Ignore writes */
+		goto out;
+
+	mtx_lock_spin(&dist->dist_mtx);
+
+	old_ixenabler = dist->gicd_ixenabler[n];
+	if (name == VGIC_GICD_ICENABLER)
+		dist->gicd_ixenabler[n] &= ~wval;
+	else
+		dist->gicd_ixenabler[n] |= wval;
+	mmio_update_int_enabled(dist->gicd_ixenabler[n], old_ixenabler, n * 32,
+	    hyp, vcpuid);
+
+	mtx_unlock_spin(&dist->dist_mtx);
+
+out:
+	*retu = false;
+	return (0);
+}
+
+static int
+dist_isenabler_read(void *vm, int vcpuid, uint64_t fault_ipa, uint64_t *rval,
+    int size, void *arg)
+{
+	return (dist_ixenabler_read(vm, vcpuid, fault_ipa, rval, arg,
+	    VGIC_GICD_ISENABLER));
+}
+
+static int
+dist_isenabler_write(void *vm, int vcpuid, uint64_t fault_ipa, uint64_t wval,
+    int size, void *arg)
+{
+	return (dist_ixenabler_write(vm, vcpuid, fault_ipa, wval, arg,
+	    VGIC_GICD_ISENABLER));
+}
+
+static int
+dist_icenabler_read(void *vm, int vcpuid, uint64_t fault_ipa, uint64_t *rval,
+    int size, void *arg)
+{
+	return (dist_ixenabler_read(vm, vcpuid, fault_ipa, rval, arg,
+	    VGIC_GICD_ICENABLER));
+}
+
+static int
+dist_icenabler_write(void *vm, int vcpuid, uint64_t fault_ipa, uint64_t wval,
+    int size, void *arg)
+{
+	return (dist_ixenabler_write(vm, vcpuid, fault_ipa, wval, arg,
+	    VGIC_GICD_ICENABLER));
+}
+
+static int
+dist_isactiver_read(void *vm, int vcpuid, uint64_t fault_ipa, uint64_t *rval,
+    int size, void *arg)
+{
+	bool *retu = arg;
+
+	*retu = false;
+
+	return (0);
+}
+
+static int
+dist_isactiver_write(void *vm, int vcpuid, uint64_t fault_ipa, uint64_t wval,
+    int size, void *arg)
+{
+	bool *retu = arg;
+
+	*retu = false;
+
+	return (0);
+}
+
+static int
+dist_icactiver_read(void *vm, int vcpuid, uint64_t fault_ipa, uint64_t *rval,
+    int size, void *arg)
+{
+	bool *retu = arg;
+
+	*retu = false;
+
+	return (0);
+}
+
+static int
+dist_icactiver_write(void *vm, int vcpuid, uint64_t fault_ipa, uint64_t wval,
+    int size, void *arg)
+{
+	bool *retu = arg;
+
+	*retu = false;
+
+	return (0);
+}
+
+/* XXX: Registers are byte accessible. */
+static int
+dist_ipriorityr_read(void *vm, int vcpuid, uint64_t fault_ipa, uint64_t *rval,
+    int size, void *arg)
+{
+	struct hyp *hyp;
+	struct vgic_v3_dist *dist;
+	bool *retu = arg;
+	size_t n;
+
+	hyp = vm_get_cookie(vm);
+	dist = &hyp->vgic_dist;
+
+	n = reg32_idx(fault_ipa, hyp->vgic_mmio_regions[VGIC_GICD_IPRIORITYR]);
+	/*
+	 * GIC Architecture specification, p 8-483: when affinity
+	 * routing is enabled, GICD_IPRIORITYR<n> is RAZ/WI for
+	 * n = 0 to 7.
+	 */
+	if (aff_routing_en(dist) && n <= 7) {
+		*rval = RES0;
+		goto out;
+	}
+
+	mtx_lock_spin(&dist->dist_mtx);
+	*rval = dist->gicd_ipriorityr[n];
+	mtx_unlock_spin(&dist->dist_mtx);
+
+out:
+	*retu = false;
+	return (0);
+
+}
+
+static int
+dist_ipriorityr_write(void *vm, int vcpuid, uint64_t fault_ipa, uint64_t wval,
+    int size, void *arg)
+{
+	struct hyp *hyp;
+	struct vgic_v3_dist *dist;
+	bool *retu = arg;
+	size_t n;
+
+
+	hyp = vm_get_cookie(vm);
+	dist = &hyp->vgic_dist;
+
+	n = reg32_idx(fault_ipa, hyp->vgic_mmio_regions[VGIC_GICD_IPRIORITYR]);
+	/* See dist_ipriorityr_read() */
+	if (aff_routing_en(dist) && n <= 7)
+		/* Ignore writes */
+		goto out;
+
+	mtx_lock_spin(&dist->dist_mtx);
+	dist->gicd_ipriorityr[n] = wval;
+	mtx_unlock_spin(&dist->dist_mtx);
+
+out:
+	*retu = false;
+	return (0);
+}
+
+static int
+dist_icfgr_read(void *vm, int vcpuid, uint64_t fault_ipa, uint64_t *rval,
+    int size, void *arg)
+{
+	struct hyp *hyp;
+	struct vgic_v3_dist *dist;
+	bool *retu = arg;
+	size_t n;
+
+	hyp = vm_get_cookie(vm);
+	dist = &hyp->vgic_dist;
+
+	n = reg32_idx(fault_ipa, hyp->vgic_mmio_regions[VGIC_GICD_ICFGR]);
+	/*
+	 * ARM GIC Architecture Specification, p 8-472: "For SGIs,
+	 * Int_config fields are RO, meaning that GICD_ICFGR0 is RO."
+	 */
+	if (n == 0) {
+		*rval = RES0;
+		goto out;
+	}
+
+	mtx_lock_spin(&dist->dist_mtx);
+	*rval = dist->gicd_icfgr[n];
+	mtx_unlock_spin(&dist->dist_mtx);
+
+out:
+	*retu = false;
+	return (0);
+
+}
+
+static int
+dist_icfgr_write(void *vm, int vcpuid, uint64_t fault_ipa, uint64_t wval,
+    int size, void *arg)
+{
+	struct hyp *hyp;
+	struct vgic_v3_dist *dist;
+	bool *retu = arg;
+	size_t n;
+
+	hyp = vm_get_cookie(vm);
+	dist = &hyp->vgic_dist;
+
+	n = reg32_idx(fault_ipa, hyp->vgic_mmio_regions[VGIC_GICD_ICFGR]);
+	if (n == 0)
+		/* Ignore writes */
+		goto out;
+
+	mtx_lock_spin(&dist->dist_mtx);
+	dist->gicd_icfgr[n] = wval;
+	mtx_unlock_spin(&dist->dist_mtx);
+
+out:
+	*retu = false;
+	return (0);
+}
+
+static int
+dist_irouter_read(void *vm, int vcpuid, uint64_t fault_ipa, uint64_t *rval,
+    int size, void *arg)
+{
+	struct hyp *hyp;
+	struct vgic_v3_dist *dist;
+	size_t n;
+	bool *retu = arg;
+
+	hyp = vm_get_cookie(vm);
+	dist = &hyp->vgic_dist;
+
+	n = reg64_idx(fault_ipa, hyp->vgic_mmio_regions[VGIC_GICD_IROUTER]);
+	/* GIC Architecture Manual, p 8-485: registers 0 to 31 are reserved */
+	if (n <= 31) {
+		eprintf("Warning: Read from register GICD_IROUTER%zu\n", n);
+		*rval = RES0;
+		goto out;
+	}
+
+	/*
+	 * GIC Architecture Manual, p 8-485: when affinity routing is not
+	 * enabled, the registers are RAZ/WI.
+	 */
+	if (!aff_routing_en(dist)) {
+		*rval = RES0;
+		goto out;
+	}
+
+	mtx_lock_spin(&dist->dist_mtx);
+	*rval = dist->gicd_irouter[n];
+	mtx_unlock_spin(&dist->dist_mtx);
+
+out:
+	*retu = false;
+	return (0);
+}
+
+static int
+dist_irouter_write(void *vm, int vcpuid, uint64_t fault_ipa, uint64_t wval,
+    int size, void *arg)
+{
+	struct hyp *hyp;
+	struct vgic_v3_dist *dist;
+	size_t n;
+	bool *retu = arg;
+
+	hyp = vm_get_cookie(vm);
+	dist = &hyp->vgic_dist;
+
+	n = reg64_idx(fault_ipa, hyp->vgic_mmio_regions[VGIC_GICD_IROUTER]);
+	if (n <= 31) {
+		eprintf("Warning: Write to register GICD_IROUTER%zu\n", n);
+		goto out;
+	}
+
+	/* See dist_irouter_read() */
+	if (!aff_routing_en(dist))
+		/* Ignore writes */
+		goto out;
+
+	mtx_lock_spin(&dist->dist_mtx);
+	dist->gicd_irouter[n] = wval;
+	mtx_unlock_spin(&dist->dist_mtx);
+
+out:
+	*retu = false;
+	return (0);
+}
+
+static int
+dist_pidr2_read(void *vm, int vcpuid, uint64_t fault_ipa, uint64_t *rval,
+    int size, void *arg)
+{
+	struct hyp *hyp;
+	struct vgic_v3_dist *dist;
+	bool *retu = arg;
+
+	hyp = vm_get_cookie(vm);
+	dist = &hyp->vgic_dist;
+
+	*rval = dist->gicd_pidr2;
+
+	*retu = false;
+	return (0);
+}
+
+static int
+dist_pidr2_write(void *vm, int vcpuid, uint64_t fault_ipa, uint64_t wval,
+    int size, void *arg)
+{
+	bool *retu = arg;
+
+	eprintf("Warning: Attempted write to read-only register GICD_PIDR2.\n");
+
+	*retu = false;
+	return (0);
+}
+
+static int
+redist_ctlr_read(void *vm, int vcpuid, uint64_t fault_ipa, uint64_t *rval,
+    int size, void *arg)
+{
+	bool *retu = arg;
+
+	redist_simple_read(gicr_ctlr, rval, vm, vcpuid);
+	/* Writes are never pending */
+	*rval &= ~GICR_CTLR_RWP & ~GICR_CTLR_UWP;
+
+#if (DEBUG > 0)
+	eprintf("\n");
+#endif
+
+	*retu = false;
+	return (0);
+}
+
+static int
+redist_ctlr_write(void *vm, int vcpuid, uint64_t fault_ipa, uint64_t wval,
+    int size, void *arg)
+{
+	bool *retu = arg;
+
+	redist_simple_write(wval, gicr_ctlr, vm, vcpuid);
+
+#if (DEBUG > 0)
+	eprintf("\n");
+#endif
+
+	*retu = false;
+	return (0);
+}
+
+static int
+redist_typer_read(void *vm, int vcpuid, uint64_t fault_ipa, uint64_t *rval,
+    int size, void *arg)
+{
+	bool *retu = arg;
+
+	redist_simple_read(gicr_typer, rval, vm, vcpuid);
+
+#if (DEBUG > 0)
+	eprintf("\n");
+#endif
+
+	*retu = false;
+	return (0);
+}
+
+static int
+redist_typer_write(void *vm, int vcpuid, uint64_t fault_ipa, uint64_t wval,
+    int size, void *arg)
+{
+	bool *retu = arg;
+
+	eprintf("Warning: Attempted write to read-only register GICR_TYPER.\n");
+
+	*retu = false;
+	return (0);
+}
+
+static int
+redist_waker_read(void *vm, int vcpuid, uint64_t fault_ipa, uint64_t *rval,
+    int size, void *arg)
+{
+	bool *retu = arg;
+
+	/* Redistributor is always awake */
+	*rval = 0 & ~GICR_WAKER_PS & ~GICR_WAKER_CA;
+
+#if (DEBUG > 0)
+	eprintf("\n");
+#endif
+
+	*retu = false;
+	return (0);
+}
+
+static int
+redist_waker_write(void *vm, int vcpuid, uint64_t fault_ipa, uint64_t wval,
+    int size, void *arg)
+{
+	bool *retu = arg;
+
+	/* Ignore writes */
+#if (DEBUG > 0)
+	eprintf("\n");
+#endif
+
+	*retu = false;
+	return (0);
+}
+
+/* Only group 1 interrupts are supported. Treat IGROUPR0 as RA0/WI. */
+static int
+redist_igroupr0_read(void *vm, int vcpuid, uint64_t fault_ipa, uint64_t *rval,
+    int size, void *arg)
+{
+	bool *retu = arg;
+
+	*rval = RES1;
+	*retu = false;
+	return (0);
+}
+
+/* Only group 1 interrupts are supported. Treat IGROUPR0 as RA0/WI. */
+static int
+redist_igroupr0_write(void *vm, int vcpuid, uint64_t fault_ipa, uint64_t wval,
+    int size, void *arg)
+{
+	bool *retu = arg;
+
+	if (wval == 0UL)
+		printf("Warning: Interrupts marked as group 0, ignoring\n");
+
+	*retu = false;
+	return (0);
+}
+
+static int
+redist_ixenabler_read(void *vm, int vcpuid, uint64_t *rval, void *arg,
+    enum vgic_mmio_region_name reg)
+{
+	struct hyp *hyp;
+	struct vgic_v3_redist *redist;
+	bool *retu = arg;
+
+	hyp = vm_get_cookie(vm);
+	redist = &hyp->ctx[vcpuid].vgic_redist;
+
+	*rval = redist->gicr_ixenabler0;
+
+	*retu = false;
+	return (0);
+}
+
+static int
+redist_ixenabler_write(void *vm, int vcpuid, uint64_t wval, void *arg,
+    enum vgic_mmio_region_name reg)
+{
+	struct hyp *hyp;
+	struct vgic_v3_redist *redist;
+	uint32_t old_ixenabler0, new_ixenabler0;
+	bool *retu = arg;
+
+	hyp = vm_get_cookie(vm);
+	redist = &hyp->ctx[vcpuid].vgic_redist;
+
+	old_ixenabler0 = redist->gicr_ixenabler0;
+	if (reg == VGIC_GICR_ICENABLER0)
+		new_ixenabler0 = old_ixenabler0 & ~wval;
+	else
+		new_ixenabler0 = old_ixenabler0 | wval;
+	mmio_update_int_enabled(new_ixenabler0, old_ixenabler0, 0, hyp, vcpuid);
+	redist->gicr_ixenabler0 = new_ixenabler0;
+
+	*retu = false;
+	return (0);
+}
+
+
+static int
+redist_isenabler0_read(void *vm, int vcpuid, uint64_t fault_ipa, uint64_t *rval,
+    int size, void *arg)
+{
+#if (DEBUG > 0)
+	eprintf("\n");
+#endif
+	return (redist_ixenabler_read(vm, vcpuid, rval, arg,
+	    VGIC_GICR_ISENABLER0));
+}
+
+static int
+redist_isenabler0_write(void *vm, int vcpuid, uint64_t fault_ipa, uint64_t wval,
+    int size, void *arg)
+{
+#if (DEBUG > 0)
+	eprintf("\n");
+#endif
+	return (redist_ixenabler_write(vm, vcpuid, wval, arg,
+	    VGIC_GICR_ISENABLER0));
+}
+
+static int
+redist_icenabler0_read(void *vm, int vcpuid, uint64_t fault_ipa, uint64_t *rval,
+    int size, void *arg)
+{
+#if (DEBUG > 0)
+	eprintf("\n");
+#endif
+	return (redist_ixenabler_read(vm, vcpuid, rval, arg,
+	    VGIC_GICR_ICENABLER0));
+}
+
+static int
+redist_icenabler0_write(void *vm, int vcpuid, uint64_t fault_ipa, uint64_t wval,
+    int size, void *arg)
+{
+#if (DEBUG > 0)
+	eprintf("\n");
+#endif
+	return (redist_ixenabler_write(vm, vcpuid, wval, arg,
+	    VGIC_GICR_ICENABLER0));
+}
+
+static int
+redist_icactiver0_read(void *vm, int vcpuid, uint64_t fault_ipa, uint64_t *rval,
+    int size, void *arg)
+{
+	bool *retu = arg;
+
+	*retu = false;
+
+	return (0);
+}
+
+static int
+redist_icactiver0_write(void *vm, int vcpuid, uint64_t fault_ipa, uint64_t wval,
+    int size, void *arg)
+{
+	bool *retu = arg;
+
+	*retu = false;
+
+	return (0);
+}
+
+static int
+redist_ipriorityr_read(void *vm, int vcpuid, uint64_t fault_ipa, uint64_t *rval,
+    int size, void *arg)
+{
+	struct hyp *hyp;
+	struct vgic_v3_redist *redist;
+	size_t n;
+	bool *retu = arg;
+
+#if (DEBUG > 0)
+	eprintf("\n");
+#endif
+
+	hyp = vm_get_cookie(vm);
+	redist = &hyp->ctx[vcpuid].vgic_redist;
+
+	n = reg32_idx(fault_ipa, hyp->vgic_mmio_regions[VGIC_GICR_IPRIORITYR]);
+	*rval = redist->gicr_ipriorityr[n];
+
+	*retu = false;
+	return (0);
+}
+
+static int
+redist_ipriorityr_write(void *vm, int vcpuid, uint64_t fault_ipa, uint64_t wval,
+    int size, void *arg)
+{
+	struct hyp *hyp;
+	struct vgic_v3_redist *redist;
+	size_t n;
+	bool *retu = arg;
+
+#if (DEBUG > 0)
+	eprintf("\n");
+#endif
+
+	hyp = vm_get_cookie(vm);
+	redist = &hyp->ctx[vcpuid].vgic_redist;
+
+	n = reg32_idx(fault_ipa, hyp->vgic_mmio_regions[VGIC_GICR_IPRIORITYR]);
+	redist->gicr_ipriorityr[n] = wval;
+
+	*retu = false;
+	return (0);
+}
+
+static int
+redist_pidr2_read(void *vm, int vcpuid, uint64_t fault_ipa, uint64_t *rval,
+    int size, void *arg)
+{
+	struct hyp *hyp;
+	struct vgic_v3_dist *dist;
+	bool *retu = arg;
+
+	hyp = vm_get_cookie(vm);
+	dist = &hyp->vgic_dist;
+
+	/* GICR_PIDR2 has the same value as GICD_PIDR2 */
+	*rval = dist->gicd_pidr2;
+#if (DEBUG > 0)
+	eprintf("\n");
+#endif
+
+	*retu = false;
+	return (0);
+}
+
+static int
+redist_pidr2_write(void *vm, int vcpuid, uint64_t fault_ipa, uint64_t wval,
+    int size, void *arg)
+{
+	bool *retu = arg;
+
+	eprintf("Warning: Attempted write to read-only register GICR_PIDR2.\n");
+
+	*retu = false;
+	return (0);
+}
+
+static int
+redist_icfgr0_read(void *vm, int vcpuid, uint64_t fault_ipa, uint64_t *rval,
+    int size, void *arg)
+{
+	bool *retu = arg;
+
+	redist_simple_read(gicr_icfgr0, rval, vm, vcpuid);
+
+	*retu = false;
+	return (0);
+}
+
+static int
+redist_icfgr0_write(void *vm, int vcpuid, uint64_t fault_ipa, uint64_t wval,
+    int size, void *arg)
+{
+	bool *retu = arg;
+
+	redist_simple_write(wval, gicr_icfgr0, vm, vcpuid);
+
+	*retu = false;
+	return (0);
+}
+
+static int
+redist_icfgr1_read(void *vm, int vcpuid, uint64_t fault_ipa, uint64_t *rval,
+    int size, void *arg)
+{
+	bool *retu = arg;
+
+	redist_simple_read(gicr_icfgr0, rval, vm, vcpuid);
+
+	*retu = false;
+	return (0);
+}
+
+static int
+redist_icfgr1_write(void *vm, int vcpuid, uint64_t fault_ipa, uint64_t wval,
+    int size, void *arg)
+{
+	bool *retu = arg;
+
+	redist_simple_write(wval, gicr_icfgr0, vm, vcpuid);
+
+	*retu = false;
+	return (0);
+}
+
+#define	alloc_registers(regs, num, size)				\
+do {									\
+	size = n * sizeof(*regs);					\
+	regs = malloc(size, M_VGIC_V3_MMIO, M_WAITOK | M_ZERO);		\
+} while (0)
+
+#define	div_round_up(n, div)	(((n) + (div) - 1) / (div))
+
+static inline void
+init_mmio_region(struct hyp *hyp, size_t regidx, vm_offset_t start,
+    size_t size, mem_region_read_t read_fn, mem_region_write_t write_fn)
+{
+	static int i = 0;
+
+	hyp->vgic_mmio_regions[i] = (struct vgic_mmio_region) {
+		.start	= start,
+		.end 	= start + size,
+		.read	= read_fn,
+		.write	= write_fn,
+	};
+
+	i++;
+}
+
+static void
+dist_mmio_init_regions(struct vgic_v3_dist *dist, struct hyp *hyp)
+{
+	size_t n;
+	size_t region_size;
+
+	init_mmio_region(hyp, VGIC_GICD_CTLR, dist->start +  GICD_CTLR,
+	    sizeof(dist->gicd_ctlr), dist_ctlr_read, dist_ctlr_write);
+	init_mmio_region(hyp, VGIC_GICD_TYPER, dist->start + GICD_TYPER,
+	    sizeof(dist->gicd_typer), dist_typer_read, dist_typer_write);
+	init_mmio_region(hyp, VGIC_GICD_IIDR, dist->start + GICD_IIDR,
+	    sizeof(dist->gicd_iidr), dist_iidr_read, dist_iidr_write);
+	init_mmio_region(hyp, VGIC_GICD_TYPER2, dist->start + GICD_TYPER2,
+	    sizeof(dist->gicd_typer2), dist_typer2_read, dist_typer2_write);
+
+	n = div_round_up(dist->nirqs, 32);
+	init_mmio_region(hyp, VGIC_GICD_IGROUPR, dist->start + GICD_IGROUPR_BASE,
+	    n * sizeof(uint32_t), dist_igroupr_read, dist_igroupr_write);
+
+	/* ARM GIC Architecture Specification, page 8-471. */
+	n = (dist->gicd_typer & GICD_TYPER_ITLINESNUM_MASK) + 1;
+	alloc_registers(dist->gicd_ixenabler, n , region_size);
+	init_mmio_region(hyp, VGIC_GICD_ISENABLER, dist->start + GICD_ISENABLER_BASE,
+	    region_size, dist_isenabler_read, dist_isenabler_write);
+	init_mmio_region(hyp, VGIC_GICD_ICENABLER, dist->start +  GICD_ICENABLER_BASE,
+	    region_size, dist_icenabler_read, dist_icenabler_write);
+
+	alloc_registers(dist->gicd_ixactiver, n , region_size);
+	init_mmio_region(hyp, VGIC_GICD_ISACTIVER, dist->start + GICD_ISACTIVER_BASE,
+	    region_size, dist_isactiver_read, dist_isactiver_write);
+	init_mmio_region(hyp, VGIC_GICD_ICACTIVER, dist->start + GICD_ICACTIVER_BASE,
+	    region_size, dist_icactiver_read, dist_icactiver_write);
+
+	/* ARM GIC Architecture Specification, page 8-483. */
+	n = 8 * ((dist->gicd_typer & GICD_TYPER_ITLINESNUM_MASK) + 1);
+	alloc_registers(dist->gicd_ipriorityr, n, region_size);
+	init_mmio_region(hyp, VGIC_GICD_IPRIORITYR, dist->start + GICD_IPRIORITYR_BASE,
+	    region_size, dist_ipriorityr_read, dist_ipriorityr_write);
+
+	n = div_round_up(dist->nirqs, 16);
+	alloc_registers(dist->gicd_icfgr, n, region_size);
+	init_mmio_region(hyp, VGIC_GICD_ICFGR, dist->start + GICD_ICFGR_BASE,
+	    region_size, dist_icfgr_read, dist_icfgr_write);
+
+	/* ARM GIC Architecture Specification, page 8-485. */
+	n = 32 * (dist->gicd_typer & GICD_TYPER_ITLINESNUM_MASK + 1) - 1;
+	alloc_registers(dist->gicd_irouter, n, region_size);
+	init_mmio_region(hyp, VGIC_GICD_IROUTER, dist->start + GICD_IROUTER(0),
+	    region_size, dist_irouter_read, dist_irouter_write);
+
+	init_mmio_region(hyp, VGIC_GICD_PIDR2, dist->start + GICD_PIDR2,
+	    sizeof(dist->gicd_pidr2), dist_pidr2_read, dist_pidr2_write);
+}
+
+static void
+redist_mmio_init_regions(struct hyp *hyp, int vcpuid)
+{
+	struct vgic_v3_redist *redist;
+	vm_offset_t start;
+
+	redist = &hyp->ctx[vcpuid].vgic_redist;
+	start = redist->start + GICR_FRAME_RD + GICR_CTLR + GICR_FRAMES_END * vcpuid;
+	/*
+	hyp->vgic_mmio_regions[VGIC_GICR_CTLR] = (struct vgic_mmio_region) {
+		.start 	= start,
+		.end	= start + sizeof(redist->gicr_ctlr),
+		.read	= redist_ctlr_read,
+		.write	= redist_ctlr_write,
+	};
+	*/
+	init_mmio_region(hyp, VGIC_GICR_CTLR, start, sizeof(redist->gicr_ctlr),
+	    redist_ctlr_read, redist_ctlr_write);
+
+	start = redist->start + GICR_FRAME_RD + GICR_TYPER + GICR_FRAMES_END * vcpuid;
+	init_mmio_region(hyp, VGIC_GICR_TYPER, start, sizeof(redist->gicr_typer),
+	    redist_typer_read, redist_typer_write);
+
+	start = redist->start + GICR_FRAME_RD + GICR_WAKER + GICR_FRAMES_END * vcpuid;
+	init_mmio_region(hyp, VGIC_GICR_WAKER, start, 4, redist_waker_read,
+	    redist_waker_write);
+
+	start = redist->start + GICR_FRAME_RD + GICR_PIDR2 + GICR_FRAMES_END * vcpuid;
+	init_mmio_region(hyp, VGIC_GICR_PIDR2, start, 4, redist_pidr2_read,
+	    redist_pidr2_write);
+
+	start = redist->start + GICR_FRAME_SGI + GICR_IGROUPR0 + GICR_FRAMES_END * vcpuid;
+	init_mmio_region(hyp, VGIC_GICR_IGROUPR0, start,
+	    sizeof(uint32_t), redist_igroupr0_read, redist_igroupr0_write);
+
+	start = redist->start + GICR_FRAME_SGI + GICR_ISENABLER0 + GICR_FRAMES_END * vcpuid;
+	init_mmio_region(hyp, VGIC_GICR_ISENABLER0, start,
+	    sizeof(redist->gicr_ixenabler0), redist_isenabler0_read,
+	    redist_isenabler0_write);
+
+	start = redist->start + GICR_FRAME_SGI + GICR_ICACTIVER0 + GICR_FRAMES_END * vcpuid;
+	init_mmio_region(hyp, VGIC_GICR_ICACTIVER0, start,
+	    sizeof(redist->gicr_icactiver0), redist_icactiver0_read,
+	    redist_icactiver0_write);
+
+	start = redist->start + GICR_FRAME_SGI + GICR_ICENABLER0 + GICR_FRAMES_END * vcpuid;
+	init_mmio_region(hyp, VGIC_GICR_ICENABLER0, start,
+	    sizeof(redist->gicr_ixenabler0), redist_icenabler0_read,
+	    redist_icenabler0_write);
+
+	start = redist->start + GICR_FRAME_SGI + GICR_IPRIORITYR_BASE + GICR_FRAMES_END * vcpuid;
+	init_mmio_region(hyp, VGIC_GICR_IPRIORITYR, start,
+	    sizeof(redist->gicr_ipriorityr), redist_ipriorityr_read,
+	    redist_ipriorityr_write);
+
+	start = redist->start + GICR_FRAME_SGI + GICR_ICFGR0 + GICR_FRAMES_END * vcpuid;
+	init_mmio_region(hyp, VGIC_GICR_ICFGR0, start,
+	    sizeof(redist->gicr_icfgr0), redist_icfgr0_read, redist_icfgr0_write);
+
+	start = redist->start + GICR_FRAME_SGI + GICR_ICFGR1 + GICR_FRAMES_END * vcpuid;
+	init_mmio_region(hyp, VGIC_GICR_ICFGR1, start,
+	    sizeof(redist->gicr_icfgr1), redist_icfgr1_read, redist_icfgr1_write);
+}
+
+void
+vgic_v3_mmio_init(struct hyp *hyp)
+{
+	struct vgic_v3_dist *dist = &hyp->vgic_dist;
+	int redist_region_num, dist_region_num, region_num;
+	int ncpus = VM_MAXCPU;
+	int i;
+
+	dist_region_num = FIRST_REDIST_MMIO_REGION;
+	redist_region_num = \
+	    (VGIC_MMIO_REGIONS_NUM - FIRST_REDIST_MMIO_REGION);
+	region_num = dist_region_num + redist_region_num;
+
+	hyp->vgic_mmio_regions = \
+	    malloc(region_num * sizeof(*hyp->vgic_mmio_regions),
+	    M_VGIC_V3_MMIO, M_WAITOK | M_ZERO);
+	hyp->vgic_mmio_regions_num = region_num;
+
+	dist_mmio_init_regions(dist, hyp);
+
+	/* TODO: Do it for all VCPUs */
+	for (i = 0; i < ncpus; i++)
+		redist_mmio_init_regions(hyp, i);
+}
+
+void
+vgic_v3_mmio_destroy(struct hyp *hyp)
+{
+	struct vgic_v3_dist *dist = &hyp->vgic_dist;
+
+	if (!hyp->vgic_mmio_regions)
+		return;
+	free(hyp->vgic_mmio_regions, M_VGIC_V3_MMIO);
+
+	free(dist->gicd_ixenabler, M_VGIC_V3_MMIO);
+	free(dist->gicd_ipriorityr, M_VGIC_V3_MMIO);
+	free(dist->gicd_icfgr, M_VGIC_V3_MMIO);
+	free(dist->gicd_irouter, M_VGIC_V3_MMIO);
+}
+
+int
+vgic_v3_icc_sgi1r_el1_read(void *vm, int vcpuid, uint64_t *rval, void *arg)
+{
+	bool *retu = arg;
+
+	retu = false;
+
+	return (0);
+}
+
+int
+vgic_v3_icc_sgi1r_el1_write(void *vm, int vcpuid, uint64_t wval, void *arg)
+{
+	struct hyp *hyp;
+	bool *retu = arg;
+	int intid, vcpu;
+	cpuset_t active_cpus;
+
+	hyp = vm_get_cookie(vm);
+	active_cpus = vm_active_cpus(vm);
+
+	if ((wval & ICC_SGI1R_EL1_IRM) == 0) {
+		intid = (wval >> ICC_SGI1R_EL1_SGIID_SHIFT) &
+		    ICC_SGI1R_EL1_SGIID_MASK;
+
+		for (vcpu = 0; vcpu < ICC_SGI1R_EL1_TargetList_Bits; vcpu++) {
+			if (CPU_ISSET(vcpu, &active_cpus) && vcpu != vcpuid) {
+				vgic_v3_inject_irq(&hyp->ctx[vcpu], intid,
+				    VGIC_IRQ_MISC);
+			}
+		}
+	} else {
+		/* TODO Interrupts routed to all PEs, excluding "self" */
+	}
+
+	retu = false;
+
+	return (0);
+}
Index: sys/arm64/vmm/io/vgic_v3_reg.h
===================================================================
--- /dev/null
+++ sys/arm64/vmm/io/vgic_v3_reg.h
@@ -0,0 +1,97 @@
+#ifndef _VGIC_V3_REG_H_
+#define _VGIC_V3_REG_H_
+
+/* Interrupt Controller End of Interrupt Status Register */
+#define ICH_EISR_EL2_STATUS_MASK	0xffff
+#define ICH_EISR_EL2_EOI_NOT_HANDLED(lr)	((1 << lr) & ICH_EISR_EL2_STATUS_MASK)
+
+/* Interrupt Controller Empty List Register Status Register */
+#define ICH_ELRSR_EL2_STATUS_MASK	0xffff
+#define ICH_ELRSR_EL2_LR_EMPTY(x)	((1 << x) & ICH_ELRSR_EL2_STATUS_MASK)
+
+/* Interrupt Controller Hyp Control Register */
+#define ICH_HCR_EL2_EOICOUNT_SHIFT	27
+#define ICH_HCR_EL2_EOICOUNT_MASK	(0x1f << ICH_HCR_EL2_EOICOUNT_SHIFT)
+#define ICH_HCR_EL2_TDIR		(1 << 14)	/* Trap non-secure EL1 writes to IC{C, V}_DIR_EL1 */
+#define ICH_HCR_EL2_TSEI		(1 << 14) 	/* Trap System Error Interupts (SEI) to EL2 */
+#define ICH_HCR_EL2_TALL1		(1 << 12) 	/* Trap non-secure EL1 accesses to IC{C, V}_* for Group 1 interrupts */
+#define ICH_HCR_EL2_TALL0		(1 << 11) 	/* Trap non-secure EL1 accesses to IC{C, V}_* for Group 0 interrupts */
+#define ICH_HCR_EL2_TC			(1 << 10) 	/* Trap non-secure EL1 accesses to common IC{C, V}_* registers */
+#define ICH_HCR_EL2_VGRP1DIE		(1 << 7) 	/* VM Group 1 Disabled Interrupt Enable */
+#define ICH_HCR_EL2_VGRP1EIE		(1 << 6)	/* VM Group 1 Enabled Interrupt Enable */
+#define ICH_HCR_EL2_VGRP0DIE		(1 << 5) 	/* VM Group 0 Disabled Interrupt Enable */
+#define ICH_HCR_EL2_VGRP0EIE		(1 << 4)	/* VM Group 0 Enabled Interrupt Enable */
+#define ICH_HCR_EL2_NPIE		(1 << 3)	/* No Pending Interrupt Enable */
+#define ICH_HCR_EL2_LRENPIE		(1 << 2)	/* List Register Entry Not Present Interrupt Enable */
+#define ICH_HCR_EL2_UIE			(1 << 1)	/* Underflow Interrupt Enable */
+#define ICH_HCR_EL2_En			(1 << 0)	/* Global enable for the virtual CPU interface */
+
+/* Interrupt Controller List Registers */
+#define ICH_LR_EL2_VINTID_MASK		0xffffffff
+#define	ICH_LR_EL2_VINTID(x)		((x) & ICH_LR_EL2_VINTID_MASK)
+#define ICH_LR_EL2_PINTID_SHIFT		32
+#define ICH_LR_EL2_PINTID_MASK		(0x3fUL << ICH_LR_EL2_PINTID_SHIFT)
+#define ICH_LR_EL2_PRIO_SHIFT		48
+#define ICH_LR_EL2_PRIO_MASK		(0xffUL << ICH_LR_EL2_PRIO_SHIFT)
+#define	ICH_LR_EL2_GROUP_SHIFT		60
+#define	ICH_LR_EL2_GROUP1		(1UL << ICH_LR_EL2_GROUP_SHIFT)
+#define ICH_LR_EL2_HW			(1UL << 61)
+#define ICH_LR_EL2_STATE_SHIFT		62
+#define ICH_LR_EL2_STATE_MASK		(0x3UL << ICH_LR_EL2_STATE_SHIFT)
+#define	ICH_LR_EL2_STATE(x)		((x) & ICH_LR_EL2_STATE_MASK)
+#define ICH_LR_EL2_STATE_INACTIVE	(0x0UL << ICH_LR_EL2_STATE_SHIFT)
+#define ICH_LR_EL2_STATE_PENDING	(0x1UL << ICH_LR_EL2_STATE_SHIFT)
+#define ICH_LR_EL2_STATE_ACTIVE		(0x2UL << ICH_LR_EL2_STATE_SHIFT)
+#define ICH_LR_EL2_STATE_PENDING_ACTIVE	(0x3UL << ICH_LR_EL2_STATE_SHIFT)
+
+/* Interrupt Controller Maintenance Interrupt State Register */
+#define ICH_MISR_EL2_VGRP1D		(1 << 7)	/* vPE Group 1 Disabled */
+#define ICH_MISR_EL2_VGRP1E		(1 << 6)	/* vPE Group 1 Enabled */
+#define ICH_MISR_EL2_VGRP0D		(1 << 5)	/* vPE Group 0 Disabled */
+#define ICH_MISR_EL2_VGRP0E		(1 << 4)	/* vPE Group 0 Enabled */
+#define ICH_MISR_EL2_NP			(1 << 3)	/* No Pending */
+#define ICH_MISR_EL2_LRENP		(1 << 2)	/* List Register Entry Not Present */
+#define ICH_MISR_EL2_U			(1 << 1)	/* Underflow */
+#define ICH_MISR_EL2_EOI		(1 << 0)	/* End Of Interrupt */
+
+/* Interrupt Controller Virtual Machine Control Register */
+#define ICH_VMCR_EL2_VPMR_SHIFT		24
+#define ICH_VMCR_EL2_VPMR_MASK		(0xff << ICH_VMCR_EL2_VPMR_SHIFT)
+#define  ICH_VMCR_EL2_VPMR_PRIO_LOWEST	(0xff << ICH_VMCR_EL2_VPMR_SHIFT)
+#define  ICH_VMCR_EL2_VPMR_PRIO_HIGHEST	(0x00 << ICH_VMCR_EL2_VPMR_SHIFT)
+#define ICH_VMCR_EL2_VBPR0_SHIFT	21
+#define ICH_VMCR_EL2_VBPR0_MASK		(0x7 << ICH_VMCR_EL2_VBPR0_SHIFT)
+#define  ICH_VMCR_EL2_VBPR0_NO_PREEMPTION \
+    (0x7 << ICH_VMCR_EL2_VBPR0_SHIFT)
+#define ICH_VMCR_EL2_VBPR1_SHIFT	18
+#define ICH_VMCR_EL2_VBPR1_MASK		(0x7 << ICH_VMCR_EL2_VBPR1_SHIFT)
+#define  ICH_VMCR_EL2_VBPR1_NO_PREEMPTION \
+    (0x7 << ICH_VMCR_EL2_VBPR1_SHIFT)
+#define ICH_VMCR_EL2_VEOIM		(1 << 9)	/* Virtual EOI mode */
+#define ICH_VMCR_EL2_VCBPR		(1 << 4)	/* Virtual Common binary Point Register */
+#define ICH_VMCR_EL2_VFIQEN		(1 << 3)	/* Virtual FIQ enable */
+#define ICH_VMCR_EL2_VACKCTL		(1 << 2)	/* Virtual AckCtl */
+#define ICH_VMCR_EL2_VENG1		(1 << 1)	/* Virtual Group 1 Interrupt Enable */
+#define ICH_VMCR_EL2_VENG0		(1 << 0)	/* Virtual Group 0 Interrupt Enable */
+
+/* Interrupt Controller VGIC Type Register */
+#define ICH_VTR_EL2_PRIBITS_SHIFT	29
+#define ICH_VTR_EL2_PRIBITS_MASK	(0x7 << ICH_VTR_EL2_PRIBITS_SHIFT)
+#define	ICH_VTR_EL2_PRIBITS(x)		\
+    ((((x) & ICH_VTR_EL2_PRIBITS_MASK) >> ICH_VTR_EL2_PRIBITS_SHIFT) + 1)
+#define ICH_VTR_EL2_PREBITS_SHIFT	26
+#define ICH_VTR_EL2_PREBITS_MASK	(0x7 << ICH_VTR_EL2_PREBITS_SHIFT)
+#define	ICH_VTR_EL2_PREBITS(x)		\
+    (((x) & ICH_VTR_EL2_PREBITS_MASK) >> ICH_VTR_EL2_PREBITS_SHIFT)
+#define ICH_VTR_EL2_SEIS		(1 << 22)	/* System Error Interrupt (SEI) Support */
+#define ICH_VTR_EL2_A3V			(1 << 21)	/* Affinity 3 Valid */
+#define ICH_VTR_EL2_NV4			(1 << 20)	/* Direct injection of virtual interrupts. RES1 for GICv3 */
+#define ICH_VTR_EL2_TDS			(1 << 19)	/* Implementation supports ICH_HCR_EL2.TDIR */
+#define ICH_VTR_EL2_LISTREGS_MASK	0x1f
+/*
+ * ICH_VTR_EL2.ListRegs holds the number of list registers, minus one. Add one
+ * to get the actual number of list registers.
+ */
+#define	ICH_VTR_EL2_LISTREGS(x)		(((x) & ICH_VTR_EL2_LISTREGS_MASK) + 1)
+
+#endif /* !_VGIC_V3_REG_H_ */
Index: sys/arm64/vmm/io/vtimer.h
===================================================================
--- /dev/null
+++ sys/arm64/vmm/io/vtimer.h
@@ -0,0 +1,112 @@
+/*-
+ * Copyright (c) 2017 The FreeBSD Foundation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. The name of the company nor the name of the author may be used to
+ *    endorse or promote products derived from this software without specific
+ *    prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef _VMM_VTIMER_H_
+#define _VMM_VTIMER_H_
+
+#define	GT_PHYS_NS_IRQ	30
+#define	GT_VIRT_IRQ	27
+
+#define	CNTP_CTL_EL0_OP0	0b11
+#define	CNTP_CTL_EL0_OP2	0b001
+#define	CNTP_CTL_EL0_OP1	0b011
+#define	CNTP_CTL_EL0_CRn	0b1110
+#define	CNTP_CTL_EL0_CRm	0b0010
+#define	ISS_CNTP_CTL_EL0	\
+    (CNTP_CTL_EL0_OP0 << ISS_MSR_OP0_SHIFT | 	\
+     CNTP_CTL_EL0_OP2 << ISS_MSR_OP2_SHIFT |	\
+     CNTP_CTL_EL0_OP1 << ISS_MSR_OP1_SHIFT | 	\
+     CNTP_CTL_EL0_CRn << ISS_MSR_CRn_SHIFT |	\
+     CNTP_CTL_EL0_CRm << ISS_MSR_CRm_SHIFT)
+
+#define	CNTP_CVAL_EL0_OP0	0b11
+#define	CNTP_CVAL_EL0_OP1	0b011
+#define	CNTP_CVAL_EL0_OP2	0b010
+#define	CNTP_CVAL_EL0_CRn	0b1110
+#define	CNTP_CVAL_EL0_CRm	0b0010
+#define	ISS_CNTP_CVAL_EL0	\
+    (CNTP_CVAL_EL0_OP0 << ISS_MSR_OP0_SHIFT | 	\
+     CNTP_CVAL_EL0_OP2 << ISS_MSR_OP2_SHIFT |	\
+     CNTP_CVAL_EL0_OP1 << ISS_MSR_OP1_SHIFT | 	\
+     CNTP_CVAL_EL0_CRn << ISS_MSR_CRn_SHIFT |	\
+     CNTP_CVAL_EL0_CRm << ISS_MSR_CRm_SHIFT)
+
+#define	CNTP_TVAL_EL0_OP0	0b11
+#define	CNTP_TVAL_EL0_OP1	0b011
+#define	CNTP_TVAL_EL0_OP2	0b000
+#define	CNTP_TVAL_EL0_CRn	0b1110
+#define	CNTP_TVAL_EL0_CRm	0b0010
+#define	ISS_CNTP_TVAL_EL0	\
+    (CNTP_TVAL_EL0_OP0 << ISS_MSR_OP0_SHIFT | 	\
+     CNTP_TVAL_EL0_OP2 << ISS_MSR_OP2_SHIFT |	\
+     CNTP_TVAL_EL0_OP1 << ISS_MSR_OP1_SHIFT | 	\
+     CNTP_TVAL_EL0_CRn << ISS_MSR_CRn_SHIFT |	\
+     CNTP_TVAL_EL0_CRm << ISS_MSR_CRm_SHIFT)
+
+struct vtimer
+{
+	uint64_t	cnthctl_el2;
+	uint64_t	cntvoff_el2;
+};
+
+struct vtimer_cpu
+{
+	struct callout	callout;
+	uint32_t	cntkctl_el1;
+	/*
+	 * Emulated registers:
+	 *
+	 * CNTP_CTL_EL0:  Counter-timer Physical Timer Control Register
+	 * CNTP_CVAL_EL0: Counter-timer Physical Timer CompareValue Register
+	 */
+	uint64_t	cntp_cval_el0;
+	uint32_t	cntp_ctl_el0;
+	/*
+	 * The virtual machine has full access to the virtual timer. The
+	 * following registers are part of the VM context for the current CPU:
+	 *
+	 * CNTV_CTL_EL0:  Counter-timer Virtuel Timer Control Register
+	 * CNTV_CVAL_EL0: Counter-timer Virtual Timer CompareValue Register
+	 */
+	uint64_t	cntv_cval_el0;
+	uint32_t	cntv_ctl_el0;
+};
+
+int 	vtimer_init(uint64_t cnthctl_el2);
+void 	vtimer_vminit(void *arg);
+void 	vtimer_cpuinit(void *arg);
+void	vtimer_vmcleanup(void *arg);
+void	vtimer_cleanup(void);
+
+int 	vtimer_phys_ctl_read(void *vm, int vcpuid, uint64_t *rval, void *arg);
+int 	vtimer_phys_ctl_write(void *vm, int vcpuid, uint64_t wval, void *arg);
+int 	vtimer_phys_cval_read(void *vm, int vcpuid, uint64_t *rval, void *arg);
+int 	vtimer_phys_cval_write(void *vm, int vcpuid, uint64_t wval, void *arg);
+int 	vtimer_phys_tval_read(void *vm, int vcpuid, uint64_t *rval, void *arg);
+int 	vtimer_phys_tval_write(void *vm, int vcpuid, uint64_t wval, void *arg);
+#endif
Index: sys/arm64/vmm/io/vtimer.c
===================================================================
--- /dev/null
+++ sys/arm64/vmm/io/vtimer.c
@@ -0,0 +1,407 @@
+/*-
+ * Copyright (c) 2017 The FreeBSD Foundation
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. The name of the company nor the name of the author may be used to
+ *    endorse or promote products derived from this software without specific
+ *    prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/types.h>
+#include <sys/bus.h>
+#include <sys/mutex.h>
+#include <sys/systm.h>
+#include <sys/time.h>
+#include <sys/timeet.h>
+#include <sys/timetc.h>
+
+#include <machine/bus.h>
+#include <machine/vmm.h>
+#include <machine/armreg.h>
+
+#include <arm/arm/generic_timer.h>
+#include <arm64/vmm/arm64.h>
+
+#include "vgic_v3.h"
+#include "vtimer.h"
+
+#define	RES1		0xffffffffffffffffUL
+
+#define timer_enabled(ctl)	\
+    (!((ctl) & CNTP_CTL_IMASK) && ((ctl) & CNTP_CTL_ENABLE))
+
+static uint64_t cnthctl_el2_reg;
+static uint32_t tmr_frq;
+
+#define timer_condition_met(ctl)	((ctl) & CNTP_CTL_ISTATUS)
+
+static int
+vtimer_virtual_timer_intr(void *arg)
+{
+	struct hypctx *hypctx;
+	uint32_t cntv_ctl;
+
+	/*
+	 * TODO everything here is very strange. The relantionship between the
+	 * hardware value and the value in memory is not clear at all.
+	 */
+
+	hypctx = arm64_get_active_vcpu();
+	cntv_ctl = READ_SPECIALREG(cntv_ctl_el0);
+
+	if (!hypctx) {
+		/* vm_destroy() was called. */
+		eprintf("No active vcpu\n");
+		cntv_ctl = READ_SPECIALREG(cntv_ctl_el0);
+		goto out;
+	}
+	if (!timer_enabled(cntv_ctl)) {
+		eprintf("Timer not enabled\n");
+		goto out;
+	}
+	if (!timer_condition_met(cntv_ctl)) {
+		eprintf("Timer condition not met\n");
+		goto out;
+	}
+
+	vgic_v3_inject_irq(hypctx, GT_VIRT_IRQ, VGIC_IRQ_CLK);
+
+	hypctx->vtimer_cpu.cntv_ctl_el0 &= ~CNTP_CTL_ENABLE;
+	cntv_ctl = hypctx->vtimer_cpu.cntv_ctl_el0;
+
+out:
+	/*
+	 * Disable the timer interrupt. This will prevent the interrupt from
+	 * being reasserted as soon as we exit the handler and getting stuck
+	 * in an infinite loop.
+	 *
+	 * This is safe to do because the guest disabled the timer, and then
+	 * enables it as part of the interrupt handling routine.
+	 */
+	cntv_ctl &= ~CNTP_CTL_ENABLE;
+	WRITE_SPECIALREG(cntv_ctl_el0, cntv_ctl);
+
+	return (FILTER_HANDLED);
+}
+
+int
+vtimer_init(uint64_t cnthctl_el2)
+{
+	int error;
+
+	cnthctl_el2_reg = cnthctl_el2;
+	/*
+	 * The guest *MUST* use the same timer frequency as the host. The
+	 * register CNTFRQ_EL0 is accessible to the guest and a different value
+	 * in the guest dts file might have unforseen consequences.
+	 */
+	tmr_frq = READ_SPECIALREG(cntfrq_el0);
+
+	error = arm_tmr_setup_intr(GT_VIRT, vtimer_virtual_timer_intr, NULL, NULL);
+	if (error) {
+		printf("WARNING: arm_tmr_setup_intr() error: %d\n", error);
+		printf("WARNING: Expect reduced performance\n");
+	}
+
+	return (0);
+}
+
+void
+vtimer_vminit(void *arg)
+{
+	struct hyp *hyp;
+	uint64_t now;
+
+	hyp = (struct hyp *)arg;
+	/*
+	 * Configure the Counter-timer Hypervisor Control Register for the VM.
+	 *
+	 * ~CNTHCTL_EL1PCEN: trap access to CNTP_{CTL, CVAL, TVAL}_EL0 from EL1
+	 * CNTHCTL_EL1PCTEN: don't trap access to CNTPCT_EL0
+	 */
+	hyp->vtimer.cnthctl_el2 = cnthctl_el2_reg & ~CNTHCTL_EL1PCEN;
+	hyp->vtimer.cnthctl_el2 |= CNTHCTL_EL1PCTEN;
+
+	now = READ_SPECIALREG(cntpct_el0);
+	hyp->vtimer.cntvoff_el2 = now;
+
+	return;
+}
+
+void
+vtimer_cpuinit(void *arg)
+{
+	struct hypctx *hypctx;
+	struct vtimer_cpu *vtimer_cpu;
+
+	hypctx = (struct hypctx *)arg;
+	vtimer_cpu = &hypctx->vtimer_cpu;
+	/*
+	 * Configure physical timer interrupts for the VCPU.
+	 *
+	 * CNTP_CTL_IMASK: mask interrupts
+	 * ~CNTP_CTL_ENABLE: disable the timer
+	 */
+	vtimer_cpu->cntp_ctl_el0 = CNTP_CTL_IMASK & ~CNTP_CTL_ENABLE;
+	/*
+	 * Callout function is MP_SAFE because the VGIC uses a spin
+	 * mutex when modifying the list registers.
+	 */
+	callout_init(&vtimer_cpu->callout, 1);
+}
+
+void
+vtimer_vmcleanup(void *arg)
+{
+	struct hyp *hyp;
+	struct hypctx *hypctx;
+	struct vtimer *vtimer;
+	struct vtimer_cpu *vtimer_cpu;
+	uint32_t cntv_ctl;
+	int i;
+
+	hyp = arg;
+	vtimer = &hyp->vtimer;
+
+	hypctx = arm64_get_active_vcpu();
+	if (!hypctx) {
+		/* The active VM was destroyed, stop the timer. */
+		cntv_ctl = READ_SPECIALREG(cntv_ctl_el0);
+		cntv_ctl &= ~CNTP_CTL_ENABLE;
+		WRITE_SPECIALREG(cntv_ctl_el0, cntv_ctl);
+	}
+
+	for (i = 0; i < VM_MAXCPU; i++) {
+		vtimer_cpu = &hyp->ctx[i].vtimer_cpu;
+		callout_drain(&vtimer_cpu->callout);
+	}
+}
+
+void
+vtimer_cleanup(void)
+{
+	int error;
+
+	error = arm_tmr_teardown_intr(GT_VIRT);
+	if (error)
+		printf("WARNING: arm_tmr_teardown_intr() error: %d\n", error);
+
+}
+
+static void
+vtimer_inject_irq_callout_func(void *context)
+{
+	struct hypctx *hypctx;
+
+	hypctx = context;
+	vgic_v3_inject_irq(hypctx, GT_PHYS_NS_IRQ, VGIC_IRQ_CLK);
+}
+
+
+static void
+vtimer_schedule_irq(struct vtimer_cpu *vtimer_cpu, struct hypctx *hypctx)
+{
+	sbintime_t time;
+	uint64_t cntpct_el0;
+	uint64_t diff;
+
+	cntpct_el0 = READ_SPECIALREG(cntpct_el0);
+	if (vtimer_cpu->cntp_cval_el0 < cntpct_el0) {
+		/* Timer set in the past, trigger interrupt */
+		vgic_v3_inject_irq(hypctx, GT_PHYS_NS_IRQ, VGIC_IRQ_CLK);
+	} else {
+		diff = vtimer_cpu->cntp_cval_el0 - cntpct_el0;
+		time = diff * SBT_1S / tmr_frq;
+		callout_reset_sbt(&vtimer_cpu->callout, time, 0,
+		    vtimer_inject_irq_callout_func, hypctx, 0);
+	}
+}
+
+static void
+vtimer_remove_irq(struct hypctx *hypctx)
+{
+	struct vtimer_cpu *vtimer_cpu;
+
+	vtimer_cpu = &hypctx->vtimer_cpu;
+
+	callout_drain(&vtimer_cpu->callout);
+	/*
+	 * The interrupt needs to be deactivated here regardless of the callout
+	 * function having been executed. The timer interrupt can be masked with
+	 * the CNTP_CTL_EL0.IMASK bit instead of reading the IAR register.
+	 * Masking the interrupt doesn't remove it from the list registers.
+	 */
+	vgic_v3_remove_irq(hypctx, GT_PHYS_NS_IRQ, true);
+}
+
+/*
+ * Timer emulation functions.
+ *
+ * The guest dts is configured to use the physical timer because the Generic
+ * Timer can only trap physical timer accesses. This is why we always read the
+ * physical counter value when programming the time for the timer interrupt in
+ * the guest.
+ */
+
+int
+vtimer_phys_ctl_read(void *vm, int vcpuid, uint64_t *rval, void *arg)
+{
+	struct hyp *hyp;
+	struct vtimer_cpu *vtimer_cpu;
+	uint64_t cntpct_el0;
+	bool *retu = arg;
+
+	hyp = vm_get_cookie(vm);
+	vtimer_cpu = &hyp->ctx[vcpuid].vtimer_cpu;
+
+	cntpct_el0 = READ_SPECIALREG(cntpct_el0);
+	if (vtimer_cpu->cntp_cval_el0 < cntpct_el0)
+		/* Timer condition met */
+		*rval = vtimer_cpu->cntp_ctl_el0 | CNTP_CTL_ISTATUS;
+	else
+		*rval = vtimer_cpu->cntp_ctl_el0 & ~CNTP_CTL_ISTATUS;
+
+	*retu = false;
+	return (0);
+}
+
+int
+vtimer_phys_ctl_write(void *vm, int vcpuid, uint64_t wval, void *arg)
+{
+	struct hyp *hyp;
+	struct hypctx *hypctx;
+	struct vtimer_cpu *vtimer_cpu;
+	uint64_t ctl_el0;
+	bool timer_toggled_on;
+	bool *retu = arg;
+
+	hyp = vm_get_cookie(vm);
+	hypctx = &hyp->ctx[vcpuid];
+	vtimer_cpu = &hypctx->vtimer_cpu;
+
+	timer_toggled_on = false;
+	ctl_el0 = vtimer_cpu->cntp_ctl_el0;
+
+	if (!timer_enabled(ctl_el0) && timer_enabled(wval))
+		timer_toggled_on = true;
+
+	vtimer_cpu->cntp_ctl_el0 = wval;
+
+	if (timer_toggled_on)
+		vtimer_schedule_irq(vtimer_cpu, hypctx);
+
+	*retu = false;
+	return (0);
+}
+
+int
+vtimer_phys_cval_read(void *vm, int vcpuid, uint64_t *rval, void *arg)
+{
+	struct hyp *hyp;
+	struct vtimer_cpu *vtimer_cpu;
+	bool *retu = arg;
+
+	hyp = vm_get_cookie(vm);
+	vtimer_cpu = &hyp->ctx[vcpuid].vtimer_cpu;
+
+	*rval = vtimer_cpu->cntp_cval_el0;
+
+	*retu = false;
+	return (0);
+}
+
+int
+vtimer_phys_cval_write(void *vm, int vcpuid, uint64_t wval, void *arg)
+{
+	struct hyp *hyp;
+	struct hypctx *hypctx;
+	struct vtimer_cpu *vtimer_cpu;
+	bool *retu = arg;
+
+	hyp = vm_get_cookie(vm);
+	hypctx = &hyp->ctx[vcpuid];
+	vtimer_cpu = &hypctx->vtimer_cpu;
+
+	vtimer_cpu->cntp_cval_el0 = wval;
+
+	if (timer_enabled(vtimer_cpu->cntp_ctl_el0)) {
+		vtimer_remove_irq(hypctx);
+		vtimer_schedule_irq(vtimer_cpu, hypctx);
+	}
+
+	*retu = false;
+	return (0);
+}
+
+int
+vtimer_phys_tval_read(void *vm, int vcpuid, uint64_t *rval, void *arg)
+{
+	struct hyp *hyp;
+	struct vtimer_cpu *vtimer_cpu;
+	uint32_t cntpct_el0;
+	bool *retu = arg;
+
+	hyp = vm_get_cookie(vm);
+	vtimer_cpu = &hyp->ctx[vcpuid].vtimer_cpu;
+
+	if (!(vtimer_cpu->cntp_ctl_el0 & CNTP_CTL_ENABLE)) {
+		/*
+		 * ARMv8 Architecture Manual, p. D7-2702: the result of reading
+		 * TVAL when the timer is disabled is UNKNOWN. I have chosen to
+		 * return the maximum value possible on 32 bits which means the
+		 * timer will fire very far into the future.
+		 */
+		*rval = (uint32_t)RES1;
+	} else {
+		cntpct_el0 = READ_SPECIALREG(cntpct_el0);
+		*rval = vtimer_cpu->cntp_cval_el0 - cntpct_el0;
+	}
+
+	*retu = false;
+	return (0);
+}
+
+int
+vtimer_phys_tval_write(void *vm, int vcpuid, uint64_t wval, void *arg)
+{
+	struct hyp *hyp;
+	struct hypctx *hypctx;
+	struct vtimer_cpu *vtimer_cpu;
+	uint64_t cntpct_el0;
+	bool *retu = arg;
+
+	hyp = vm_get_cookie(vm);
+	hypctx = &hyp->ctx[vcpuid];
+	vtimer_cpu = &hypctx->vtimer_cpu;
+
+	cntpct_el0 = READ_SPECIALREG(cntpct_el0);
+	vtimer_cpu->cntp_cval_el0 = (int32_t)wval + cntpct_el0;
+
+	if (timer_enabled(vtimer_cpu->cntp_ctl_el0)) {
+		vtimer_remove_irq(hypctx);
+		vtimer_schedule_irq(vtimer_cpu, hypctx);
+	}
+
+	*retu = false;
+	return (0);
+}
Index: sys/arm64/vmm/mmu.h
===================================================================
--- /dev/null
+++ sys/arm64/vmm/mmu.h
@@ -0,0 +1,58 @@
+/*
+ * Copyright (C) 2015-2021 Mihai Carabas <mihai.carabas@gmail.com>
+ * Copyright (C) 2017-2019 Alexandru Elisei <alexandru.elisei@gmail.com>
+ * Copyright (C) 2017-2021 Darius Mihai <darius.mihai.m@gmail.com>
+ * Copyright (C) 2019-2021 Andrei Martin <andrei.cos.martin@gmail.com>
+ * All rights reserved.
+ *
+ * This software was developed by Alexandru Elisei under sponsorship
+ * from the FreeBSD Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef _VMM_MMU_H_
+#define	_VMM_MMU_H_
+
+#include <machine/vmparam.h>
+#include <machine/vmm.h>
+
+#include "hyp.h"
+
+#define ktohyp(kva)		(((vm_offset_t)(kva) & HYP_KVA_MASK) | \
+					HYP_KVA_OFFSET)
+#define ipatok(ipa, hypmap)	(PHYS_TO_DMAP(pmap_extract(hypmap, (ipa))))
+#define gtoipa(gva) 		((gva) - KERNBASE + VM_GUEST_BASE_IPA)
+
+#define page_aligned(x)		(((vm_offset_t)(x) & PAGE_MASK) == 0)
+
+void 		hypmap_init(pmap_t map, enum pmap_stage pm_stage);
+void 		hypmap_map(pmap_t map, vm_offset_t va, size_t len,
+			vm_prot_t prot);
+void 		hypmap_map_identity(pmap_t map, vm_offset_t va, size_t len,
+			vm_prot_t prot);
+void 		hypmap_set(void *arg, vm_offset_t va, vm_offset_t pa,
+			size_t len, vm_prot_t prot);
+vm_paddr_t 	hypmap_get(void *arg, vm_offset_t va);
+void 		hypmap_cleanup(pmap_t map);
+
+#endif
Index: sys/arm64/vmm/mmu.c
===================================================================
--- /dev/null
+++ sys/arm64/vmm/mmu.c
@@ -0,0 +1,169 @@
+/*
+ * Copyright (C) 2015-2021 Mihai Carabas <mihai.carabas@gmail.com>
+ * Copyright (C) 2017-2019 Alexandru Elisei <alexandru.elisei@gmail.com>
+ * Copyright (C) 2017-2021 Darius Mihai <darius.mihai.m@gmail.com>
+ * Copyright (C) 2019-2021 Andrei Martin <andrei.cos.martin@gmail.com>
+ * All rights reserved.
+ *
+ * This software was developed by Alexandru Elisei under sponsorship
+ * from the FreeBSD Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+#include <sys/types.h>
+#include <sys/malloc.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
+#include <vm/vm.h>
+#include <vm/pmap.h>
+#include <vm/vm_page.h>
+#include <vm/vm_param.h>
+#include <machine/vm.h>
+#include <machine/vmm.h>
+#include <machine/vmparam.h>
+#include <machine/pmap.h>
+
+#include "mmu.h"
+#include "arm64.h"
+
+MALLOC_DECLARE(M_HYP);
+
+void
+hypmap_init(pmap_t map, enum pmap_stage pm_stage)
+{
+	mtx_init(&map->pm_mtx, "hypmap_pm_mtx", NULL, MTX_DEF);
+	pmap_pinit_stage(map, pm_stage, 4);
+}
+
+void
+hypmap_map(pmap_t map, vm_offset_t va, size_t len, vm_prot_t prot)
+{
+	vm_offset_t va_end, hypva;
+	vm_page_t dummy_page;
+
+	dummy_page = malloc(sizeof(*dummy_page), M_HYP, M_WAITOK | M_ZERO);
+	dummy_page->oflags = VPO_UNMANAGED;
+	dummy_page->md.pv_memattr = VM_MEMATTR_DEFAULT;
+
+	/*
+	 * Add the physical pages which correspond to the specified virtual
+	 * addresses.The virtual addresses span contiguous virtual pages, but
+	 * they might not reside in contiguous physical pages.
+	 */
+	va_end = va + len - 1;
+	va = trunc_page(va);
+	while (va < va_end) {
+		dummy_page->phys_addr = vtophys(va);
+		hypva = (va >= VM_MIN_KERNEL_ADDRESS) ? ktohyp(va) : va;
+		pmap_enter(map, hypva, dummy_page, prot, PMAP_ENTER_WIRED, 0);
+		va += PAGE_SIZE;
+	}
+
+	free(dummy_page, M_HYP);
+}
+
+void
+hypmap_map_identity(pmap_t map, vm_offset_t va, size_t len,
+		vm_prot_t prot)
+{
+	vm_offset_t va_end;
+	vm_page_t dummy_page;
+
+	dummy_page = malloc(sizeof(*dummy_page), M_HYP, M_WAITOK | M_ZERO);
+	dummy_page->oflags = VPO_UNMANAGED;
+	dummy_page->md.pv_memattr = VM_MEMATTR_DEFAULT;
+
+	/*
+	 * The virtual addresses span contiguous virtual pages, but they might
+	 * not reside in contiguous physical pages. For each virtual page we
+	 * get the physical page address and use that for the mapping.
+	 */
+	va_end = va + len - 1;
+	va = trunc_page(va);
+	while (va < va_end) {
+		dummy_page->phys_addr = vtophys(va);
+		pmap_enter(map, dummy_page->phys_addr, dummy_page,
+				prot, PMAP_ENTER_WIRED, 0);
+		va += PAGE_SIZE;
+	}
+
+	free(dummy_page, M_HYP);
+}
+
+/*
+ * Map 'len' bytes starting at virtual address 'va' to 'len' bytes
+ * starting at physical address 'pa'
+ */
+void
+hypmap_set(void *arg, vm_offset_t va, vm_offset_t pa, size_t len,
+		vm_prot_t prot)
+{
+	vm_offset_t va_end, hypva;
+	vm_page_t dummy_page;
+	struct hyp *hyp;
+	pmap_t map;
+
+	hyp = (struct hyp *)arg;
+	map = hyp->stage2_map;
+
+	dummy_page = malloc(sizeof(*dummy_page), M_HYP, M_WAITOK | M_ZERO);
+	dummy_page->oflags = VPO_UNMANAGED;
+	dummy_page->md.pv_memattr = VM_MEMATTR_DEFAULT;
+
+	va_end = va + len - 1;
+	va = trunc_page(va);
+	dummy_page->phys_addr = trunc_page(pa);
+	while (va < va_end) {
+		hypva = (va >= VM_MIN_KERNEL_ADDRESS) ? ktohyp(va) : va;
+		pmap_enter(map, hypva, dummy_page, prot, PMAP_ENTER_WIRED, 0);
+		va += PAGE_SIZE;
+		dummy_page->phys_addr += PAGE_SIZE;
+	}
+
+	free(dummy_page, M_HYP);
+}
+
+/*
+ * Return the physical address associated with virtual address 'va'
+ */
+vm_paddr_t
+hypmap_get(void *arg, vm_offset_t va)
+{
+	struct hyp *hyp;
+	pmap_t map;
+
+	hyp = (struct hyp *)arg;
+	map = hyp->stage2_map;
+
+	return pmap_extract(map, va);
+}
+
+/*
+ * Remove all the mappings from the hyp translation tables
+ */
+void
+hypmap_cleanup(pmap_t map)
+{
+	pmap_remove(map, HYP_VM_MIN_ADDRESS, HYP_VM_MAX_ADDRESS);
+	mtx_destroy(&map->pm_mtx);
+	pmap_release(map);
+}
Index: sys/arm64/vmm/psci.h
===================================================================
--- /dev/null
+++ sys/arm64/vmm/psci.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright (C) 2015-2021 Mihai Carabas <mihai.carabas@gmail.com>
+ * Copyright (C) 2017-2019 Alexandru Elisei <alexandru.elisei@gmail.com>
+ * Copyright (C) 2017-2021 Darius Mihai <darius.mihai.m@gmail.com>
+ * Copyright (C) 2019-2021 Andrei Martin <andrei.cos.martin@gmail.com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef	_PSCI_H_
+#define	_PSCI_H_
+
+#include "arm64.h"
+
+int	psci_handle_call(struct vm *vm, int vcpuid, struct vm_exit *vme,
+    bool *retu);
+
+#endif
Index: sys/arm64/vmm/psci.c
===================================================================
--- /dev/null
+++ sys/arm64/vmm/psci.c
@@ -0,0 +1,126 @@
+/*
+ * Copyright (C) 2015-2021 Mihai Carabas <mihai.carabas@gmail.com>
+ * Copyright (C) 2017-2019 Alexandru Elisei <alexandru.elisei@gmail.com>
+ * Copyright (C) 2017-2021 Darius Mihai <darius.mihai.m@gmail.com>
+ * Copyright (C) 2019-2021 Andrei Martin <andrei.cos.martin@gmail.com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+
+#include <dev/psci/psci.h>
+
+#include "arm64.h"
+#include "psci.h"
+
+#define PSCI_VERSION_0_2	0x2
+
+static int
+psci_version(struct hypctx *hypctx, bool *retu)
+{
+
+	hypctx->regs.x[0] = PSCI_VERSION_0_2;
+
+	*retu = false;
+	return (0);
+}
+
+static int
+psci_migrate_type(struct hypctx *hypctx, bool *retu)
+{
+
+	hypctx->regs.x[0] = PSCI_VERSION_0_2;
+
+	*retu = false;
+	return (0);
+}
+
+static int
+psci_system_off(struct vm_exit *vme, bool *retu)
+{
+	vme->u.suspended.how = VM_SUSPEND_POWEROFF;
+	vme->exitcode = VM_EXITCODE_SUSPENDED;
+
+	*retu = true;
+	return (0);
+}
+
+int
+psci_handle_call(struct vm *vm, int vcpuid, struct vm_exit *vme, bool *retu)
+{
+	struct hyp *hyp;
+	struct hypctx *hypctx;
+	uint64_t func_id;
+	uint32_t esr_el2, esr_iss;
+	int error;
+
+	hyp = vm_get_cookie(vm);
+	hypctx = &hyp->ctx[vcpuid];
+
+	esr_el2 = hypctx->exit_info.esr_el2;
+	esr_iss = esr_el2 & ESR_ELx_ISS_MASK;
+
+	if (esr_iss != 0) {
+		eprintf("Malformed HVC instruction with immediate: 0x%x\n",
+		    esr_iss);
+		error = 1;
+		goto out;
+	}
+
+	func_id = hypctx->regs.x[0];
+	switch (func_id) {
+	case PSCI_FNID_VERSION:
+		error = psci_version(hypctx, retu);
+		break;
+	case PSCI_FNID_SYSTEM_OFF:
+		error = psci_system_off(vme, retu);
+		break;
+	case PSCI_FNID_CPU_ON:
+		vme->exitcode = VM_EXITCODE_SPINUP_AP;
+		vme->u.spinup_ap.vcpu = hypctx->regs.x[1];
+		vme->u.spinup_ap.rip = hypctx->regs.x[2];
+		vme->u.spinup_ap.ctx_id = hypctx->regs.x[3];
+		*retu = true;
+		error = 0;
+		break;
+	case PSCI_FNID_MIGRATE_INFO_TYPE:
+		error = psci_migrate_type(hypctx, retu);
+		break;
+	case PSCI_FNID_MIGRATE_INFO_UP_CPU:
+		hypctx->regs.x[0] = PSCI_VERSION_0_2;
+		error = 0;
+		*retu = false;
+		break;
+	default:
+		eprintf("Unimplemented PSCI function: 0x%016lx\n", func_id);
+		hypctx->regs.x[0] = PSCI_RETVAL_NOT_SUPPORTED;
+		error = 1;
+	}
+
+out:
+	return (error);
+}
Index: sys/arm64/vmm/reset.h
===================================================================
--- /dev/null
+++ sys/arm64/vmm/reset.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (C) 2015-2021 Mihai Carabas <mihai.carabas@gmail.com>
+ * Copyright (C) 2017-2019 Alexandru Elisei <alexandru.elisei@gmail.com>
+ * Copyright (C) 2017-2021 Darius Mihai <darius.mihai.m@gmail.com>
+ * Copyright (C) 2019-2021 Andrei Martin <andrei.cos.martin@gmail.com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+#ifndef _VMM_RESET_H_
+#define _VMM_RESET_H_
+
+void	reset_vm_el01_regs(void *vcpu);
+void	reset_vm_el2_regs(void *vcpu);
+
+#endif
Index: sys/arm64/vmm/reset.c
===================================================================
--- /dev/null
+++ sys/arm64/vmm/reset.c
@@ -0,0 +1,142 @@
+/*
+ * Copyright (C) 2015-2021 Mihai Carabas <mihai.carabas@gmail.com>
+ * Copyright (C) 2017-2019 Alexandru Elisei <alexandru.elisei@gmail.com>
+ * Copyright (C) 2017-2021 Darius Mihai <darius.mihai.m@gmail.com>
+ * Copyright (C) 2019-2021 Andrei Martin <andrei.cos.martin@gmail.com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/param.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+
+#include <machine/armreg.h>
+#include <machine/cpu.h>
+#include <machine/hypervisor.h>
+
+#include "arm64.h"
+#include "reset.h"
+
+/*
+ * Make the architecturally UNKNOWN value 0. As a bonus, we don't have to
+ * manually set all those RES0 fields.
+ */
+#define	ARCH_UNKNOWN		0
+#define	set_arch_unknown(reg)	(memset(&(reg), ARCH_UNKNOWN, sizeof(reg)))
+
+void
+reset_vm_el01_regs(void *vcpu)
+{
+	struct hypctx *el2ctx;
+
+	el2ctx = vcpu;
+
+	set_arch_unknown(el2ctx->regs);
+
+	set_arch_unknown(el2ctx->actlr_el1);
+	set_arch_unknown(el2ctx->afsr0_el1);
+	set_arch_unknown(el2ctx->afsr1_el1);
+	set_arch_unknown(el2ctx->amair_el1);
+	set_arch_unknown(el2ctx->contextidr_el1);
+	set_arch_unknown(el2ctx->cpacr_el1);
+	set_arch_unknown(el2ctx->elr_el1);
+	set_arch_unknown(el2ctx->esr_el1);
+	set_arch_unknown(el2ctx->far_el1);
+	set_arch_unknown(el2ctx->mair_el1);
+	set_arch_unknown(el2ctx->par_el1);
+
+	/*
+	 * Guest starts with:
+	 * ~SCTLR_M: MMU off
+	 * ~SCTLR_C: data cache off
+	 * SCTLR_CP15BEN: memory barrier instruction enable from EL0; RAO/WI
+	 * ~SCTLR_I: instruction cache off
+	 */
+	el2ctx->sctlr_el1 = SCTLR_RES1;
+	el2ctx->sctlr_el1 &= ~SCTLR_M & ~SCTLR_C & ~SCTLR_I;
+	el2ctx->sctlr_el1 |= SCTLR_CP15BEN;
+
+	set_arch_unknown(el2ctx->sp_el0);
+	set_arch_unknown(el2ctx->tcr_el1);
+	set_arch_unknown(el2ctx->tpidr_el0);
+	set_arch_unknown(el2ctx->tpidr_el1);
+	set_arch_unknown(el2ctx->tpidrro_el0);
+	set_arch_unknown(el2ctx->ttbr0_el1);
+	set_arch_unknown(el2ctx->ttbr1_el1);
+	set_arch_unknown(el2ctx->vbar_el1);
+	set_arch_unknown(el2ctx->spsr_el1);
+}
+
+void
+reset_vm_el2_regs(void *vcpu)
+{
+	struct hypctx *el2ctx;
+	uint64_t cpu_aff;
+
+	el2ctx = vcpu;
+
+	/*
+	 * Set the Hypervisor Configuration Register:
+	 *
+	 * HCR_RW: use AArch64 for EL1
+	 * HCR_BSU_IS: barrier instructions apply to the inner shareable
+	 * domain
+	 * HCR_SWIO: turn set/way invalidate into set/way clean and
+	 * invalidate
+	 * HCR_FB: broadcast maintenance operations
+	 * HCR_AMO: route physical SError interrupts to EL2
+	 * HCR_IMO: route physical IRQ interrupts to EL2
+	 * HCR_FMO: route physical FIQ interrupts to EL2
+	 * HCR_VM: use stage 2 translation
+	 */
+	el2ctx->hcr_el2 = HCR_RW | HCR_BSU_IS | HCR_SWIO | HCR_FB | \
+			  HCR_VM | HCR_AMO | HCR_IMO | HCR_FMO;
+
+	el2ctx->vmpidr_el2 = VMPIDR_EL2_RES1;
+	/* The guest will detect a multi-core, single-threaded CPU */
+	el2ctx->vmpidr_el2 &= ~VMPIDR_EL2_U & ~VMPIDR_EL2_MT;
+	/* Only 24 bits of affinity, for a grand total of 16,777,216 cores. */
+	cpu_aff = el2ctx->vcpu & (CPU_AFF0_MASK | CPU_AFF1_MASK | CPU_AFF2_MASK);
+	el2ctx->vmpidr_el2 |= cpu_aff;
+
+	/* Use the same CPU identification information as the host */
+	el2ctx->vpidr_el2 = CPU_IMPL_TO_MIDR(CPU_IMPL_ARM);
+	el2ctx->vpidr_el2 |= CPU_VAR_TO_MIDR(0);
+	el2ctx->vpidr_el2 |= CPU_ARCH_TO_MIDR(0xf);
+	el2ctx->vpidr_el2 |= CPU_PART_TO_MIDR(CPU_PART_FOUNDATION);
+	el2ctx->vpidr_el2 |= CPU_REV_TO_MIDR(0);
+
+	/*
+	 * Don't trap accesses to CPACR_EL1, trace, SVE, Advanced SIMD
+	 * and floating point functionality to EL2.
+	 */
+	el2ctx->cptr_el2 = CPTR_RES1;
+	/*
+	 * Disable interrupts in the guest. The guest OS will re-enable
+	 * them.
+	 */
+	el2ctx->spsr_el2 = PSR_D | PSR_A | PSR_I | PSR_F;
+	/* Use the EL1 stack when taking exceptions to EL1 */
+	el2ctx->spsr_el2 |= PSR_M_EL1h;
+}
Index: sys/arm64/vmm/vmm.c
===================================================================
--- /dev/null
+++ sys/arm64/vmm/vmm.c
@@ -0,0 +1,918 @@
+/*
+ * Copyright (C) 2015-2021 Mihai Carabas <mihai.carabas@gmail.com>
+ * Copyright (C) 2017-2019 Alexandru Elisei <alexandru.elisei@gmail.com>
+ * Copyright (C) 2017-2021 Darius Mihai <darius.mihai.m@gmail.com>
+ * Copyright (C) 2019-2021 Andrei Martin <andrei.cos.martin@gmail.com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/module.h>
+#include <sys/sysctl.h>
+#include <sys/malloc.h>
+#include <sys/pcpu.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
+#include <sys/proc.h>
+#include <sys/rwlock.h>
+#include <sys/sched.h>
+#include <sys/smp.h>
+#include <sys/cpuset.h>
+
+#include <vm/vm.h>
+#include <vm/vm_object.h>
+#include <vm/vm_page.h>
+#include <vm/pmap.h>
+#include <vm/vm_map.h>
+#include <vm/vm_extern.h>
+#include <vm/vm_param.h>
+
+#include <machine/cpu.h>
+#include <machine/vm.h>
+#include <machine/pcb.h>
+#include <machine/param.h>
+#include <machine/smp.h>
+#include <machine/vmparam.h>
+#include <machine/vmm.h>
+#include <machine/vmm_dev.h>
+#include <machine/armreg.h>
+
+#include "vmm_stat.h"
+#include "vmm_mem.h"
+#include "arm64.h"
+#include "mmu.h"
+#include "psci.h"
+
+#include "io/vgic_v3.h"
+#include "io/vtimer.h"
+
+#define	BSP	0			/* the boostrap processor */
+
+struct vcpu {
+	int		flags;
+	enum vcpu_state	state;
+	struct mtx	mtx;
+	int		hostcpu;	/* host cpuid this vcpu last ran on */
+	int		vcpuid;
+	void		*stats;
+	struct vm_exit	exitinfo;
+	uint64_t	nextpc;		/* (x) next instruction to execute */
+};
+
+#define	vcpu_lock_initialized(v) mtx_initialized(&((v)->mtx))
+#define	vcpu_lock_init(v)	mtx_init(&((v)->mtx), "vcpu lock", 0, MTX_SPIN)
+#define	vcpu_lock(v)		mtx_lock_spin(&((v)->mtx))
+#define	vcpu_unlock(v)		mtx_unlock_spin(&((v)->mtx))
+#define	vcpu_assert_locked(v)	mtx_assert(&((v)->mtx), MA_OWNED)
+
+struct mem_seg {
+	uint64_t	gpa;
+	size_t		len;
+	bool		wired;
+	vm_object_t	object;
+};
+#define	VM_MAX_MEMORY_SEGMENTS	2
+
+struct vm {
+	void		*cookie;
+	struct vcpu	vcpu[VM_MAXCPU];
+	int		num_mem_segs;
+	struct vm_memory_segment mem_segs[VM_MAX_MEMORY_SEGMENTS];
+	char		name[VM_MAX_NAMELEN];
+	/*
+	 * Set of active vcpus.
+	 * An active vcpu is one that has been started implicitly (BSP) or
+	 * explicitly (AP) by sending it a startup ipi.
+	 */
+	cpuset_t	active_cpus;
+	uint16_t	maxcpus;
+};
+
+static bool vmm_initialized = false;
+
+static struct vmm_ops *ops = NULL;
+
+#define	VMM_INIT(num)	(ops != NULL ? (*ops->init)(num) : 0)
+#define	VMM_CLEANUP()	(ops != NULL ? (*ops->cleanup)() : 0)
+
+#define	VMINIT(vm) (ops != NULL ? (*ops->vminit)(vm): NULL)
+#define	VMRUN(vmi, vcpu, pc, pmap, rvc, sc) \
+	(ops != NULL ? (*ops->vmrun)(vmi, vcpu, pc, pmap, rvc, sc) : ENXIO)
+#define	VMCLEANUP(vmi)	(ops != NULL ? (*ops->vmcleanup)(vmi) : NULL)
+#define	VMMMAP_SET(vmi, ipa, pa, len, prot)				\
+	(ops != NULL ? 							\
+	(*ops->vmmapset)(vmi, ipa, pa, len, prot) : ENXIO)
+#define	VMMMAP_GET(vmi, gpa) \
+	(ops != NULL ? (*ops->vmmapget)(vmi, gpa) : ENXIO)
+#define	VMGETREG(vmi, vcpu, num, retval)		\
+	(ops != NULL ? (*ops->vmgetreg)(vmi, vcpu, num, retval) : ENXIO)
+#define	VMSETREG(vmi, vcpu, num, val)		\
+	(ops != NULL ? (*ops->vmsetreg)(vmi, vcpu, num, val) : ENXIO)
+#define	VMGETCAP(vmi, vcpu, num, retval)	\
+	(ops != NULL ? (*ops->vmgetcap)(vmi, vcpu, num, retval) : ENXIO)
+#define	VMSETCAP(vmi, vcpu, num, val)		\
+	(ops != NULL ? (*ops->vmsetcap)(vmi, vcpu, num, val) : ENXIO)
+
+#define	fpu_start_emulating()	load_cr0(rcr0() | CR0_TS)
+#define	fpu_stop_emulating()	clts()
+
+static int vm_handle_wfi(struct vm *vm, int vcpuid,
+			 struct vm_exit *vme, bool *retu);
+
+static MALLOC_DEFINE(M_VMM, "vmm", "vmm");
+
+/* statistics */
+static VMM_STAT(VCPU_TOTAL_RUNTIME, "vcpu total runtime");
+
+SYSCTL_NODE(_hw, OID_AUTO, vmm, CTLFLAG_RW, NULL, NULL);
+
+/*
+ * Halt the guest if all vcpus are executing a HLT instruction with
+ * interrupts disabled.
+ */
+static int halt_detection_enabled = 1;
+SYSCTL_INT(_hw_vmm, OID_AUTO, halt_detection, CTLFLAG_RDTUN,
+    &halt_detection_enabled, 0,
+    "Halt VM if all vcpus execute HLT with interrupts disabled");
+
+static int vmm_ipinum;
+SYSCTL_INT(_hw_vmm, OID_AUTO, ipinum, CTLFLAG_RD, &vmm_ipinum, 0,
+    "IPI vector used for vcpu notifications");
+
+static int trace_guest_exceptions;
+SYSCTL_INT(_hw_vmm, OID_AUTO, trace_guest_exceptions, CTLFLAG_RDTUN,
+    &trace_guest_exceptions, 0,
+    "Trap into hypervisor on all guest exceptions and reflect them back");
+
+static void
+vcpu_cleanup(struct vm *vm, int i, bool destroy)
+{
+//	struct vcpu *vcpu = &vm->vcpu[i];
+}
+
+static void
+vcpu_init(struct vm *vm, uint32_t vcpu_id, bool create)
+{
+	struct vcpu *vcpu;
+
+	KASSERT(vcpu_id >= 0 && vcpu_id < vm->maxcpus,
+		("cpus_init: invalid vcpu %d", vcpu_id));
+
+	vcpu = &vm->vcpu[vcpu_id];
+
+	if (create) {
+		KASSERT(!vcpu_lock_initialized(vcpu), ("vcpu %d already "
+		    "initialized", vcpu_id));
+		vcpu_lock_init(vcpu);
+		vcpu->hostcpu = NOCPU;
+		vcpu->vcpuid = vcpu_id;
+	}
+}
+
+struct vm_exit *
+vm_exitinfo(struct vm *vm, int cpuid)
+{
+	struct vcpu *vcpu;
+
+	if (cpuid < 0 || cpuid >= VM_MAXCPU)
+		panic("vm_exitinfo: invalid cpuid %d", cpuid);
+
+	vcpu = &vm->vcpu[cpuid];
+
+	return (&vcpu->exitinfo);
+}
+
+static int
+vmm_init(void)
+{
+	ops = &vmm_ops_arm;
+
+	return (VMM_INIT(0));
+}
+
+static int
+vmm_handler(module_t mod, int what, void *arg)
+{
+	int error;
+
+	switch (what) {
+	case MOD_LOAD:
+		vmmdev_init();
+		error = vmm_init();
+		if (error == 0)
+			vmm_initialized = true;
+		break;
+	case MOD_UNLOAD:
+		error = vmmdev_cleanup();
+		if (error == 0 && vmm_initialized) {
+			error = VMM_CLEANUP();
+			if (error)
+				vmm_initialized = false;
+		}
+		break;
+	default:
+		error = 0;
+		break;
+	}
+	return (error);
+}
+
+static moduledata_t vmm_kmod = {
+	"vmm",
+	vmm_handler,
+	NULL
+};
+
+/*
+ * vmm initialization has the following dependencies:
+ *
+ * - HYP initialization requires smp_rendezvous() and therefore must happen
+ *   after SMP is fully functional (after SI_SUB_SMP).
+ */
+DECLARE_MODULE(vmm, vmm_kmod, SI_SUB_SMP + 1, SI_ORDER_ANY);
+MODULE_VERSION(vmm, 1);
+
+int
+vm_create(const char *name, struct vm **retvm)
+{
+	struct vm *vm;
+	int i;
+
+	/*
+	 * If vmm.ko could not be successfully initialized then don't attempt
+	 * to create the virtual machine.
+	 */
+	if (!vmm_initialized)
+		return (ENXIO);
+
+	if (name == NULL || strlen(name) >= VM_MAX_NAMELEN)
+		return (EINVAL);
+
+	vm = malloc(sizeof(struct vm), M_VMM, M_WAITOK | M_ZERO);
+	strcpy(vm->name, name);
+	vm->maxcpus = VM_MAXCPU;
+	vm->cookie = VMINIT(vm);
+
+	CPU_ZERO(&vm->active_cpus);
+	for (i = 0; i < vm->maxcpus; i++)
+		vcpu_init(vm, i, true);
+
+	*retvm = vm;
+	return (0);
+}
+
+static void
+vm_cleanup(struct vm *vm, bool destroy)
+{
+	VMCLEANUP(vm->cookie);
+}
+
+void
+vm_destroy(struct vm *vm)
+{
+	vm_cleanup(vm, true);
+	free(vm, M_VMM);
+}
+
+const char *
+vm_name(struct vm *vm)
+{
+	return (vm->name);
+}
+
+#include <sys/queue.h>
+#include <sys/linker.h>
+
+static caddr_t
+search_by_type(const char *type, caddr_t preload_metadata)
+{
+    caddr_t	curp, lname;
+    uint32_t	*hdr;
+    int		next;
+
+    if (preload_metadata != NULL) {
+
+	curp = preload_metadata;
+	lname = NULL;
+	for (;;) {
+	    hdr = (uint32_t *)curp;
+	    if (hdr[0] == 0 && hdr[1] == 0)
+		break;
+
+	    /* remember the start of each record */
+	    if (hdr[0] == MODINFO_NAME)
+		lname = curp;
+
+	    /* Search for a MODINFO_TYPE field */
+	    if ((hdr[0] == MODINFO_TYPE) &&
+		!strcmp(type, curp + sizeof(uint32_t) * 2))
+		return(lname);
+
+	    /* skip to next field */
+	    next = sizeof(uint32_t) * 2 + hdr[1];
+	    next = roundup(next, sizeof(u_long));
+	    curp += next;
+	}
+    }
+    return(NULL);
+}
+
+static int
+vm_handle_reg_emul(struct vm *vm, int vcpuid, bool *retu)
+{
+	struct hyp *hyp;
+	struct vm_exit *vme;
+	struct vre *vre;
+	reg_read_t rread;
+	reg_write_t rwrite;
+	uint32_t iss_reg;
+	int error;
+
+	hyp = (struct hyp *)vm->cookie;
+	vme = vm_exitinfo(vm, vcpuid);
+	vre = &vme->u.reg_emul.vre;
+
+	iss_reg = vre->inst_syndrome & ISS_MSR_REG_MASK;
+	switch (iss_reg) {
+	case ISS_CNTP_CTL_EL0:
+		rread = vtimer_phys_ctl_read;
+		rwrite = vtimer_phys_ctl_write;
+		break;
+	case ISS_CNTP_CVAL_EL0:
+		rread = vtimer_phys_cval_read;
+		rwrite = vtimer_phys_cval_write;
+		break;
+	case ISS_CNTP_TVAL_EL0:
+		rread = vtimer_phys_tval_read;
+		rwrite = vtimer_phys_tval_write;
+		break;
+	case ICC_SGI1R_EL1:
+		rread = vgic_v3_icc_sgi1r_el1_read;
+		rwrite = vgic_v3_icc_sgi1r_el1_write;
+		break;
+	default:
+		goto out_user;
+	}
+
+	error = vmm_emulate_register(vm, vcpuid, vre, rread, rwrite, retu);
+
+	return (error);
+
+out_user:
+	*retu = true;
+	return (0);
+}
+
+static int
+vm_mmio_region_match(const void *key, const void *memb)
+{
+	const uint64_t *addr = key;
+	const struct vgic_mmio_region *vmr = memb;
+
+	if (*addr < vmr->start)
+		return (-1);
+	else if (*addr >= vmr->start && *addr < vmr->end)
+		return (0);
+	else
+		return (1);
+}
+
+static int
+vm_handle_inst_emul(struct vm *vm, int vcpuid, bool *retu)
+{
+	struct vm_exit *vme;
+	struct vie *vie;
+	struct hyp *hyp = vm->cookie;
+	uint64_t fault_ipa;
+	struct vgic_mmio_region *vmr;
+	int error, i;
+
+	if (!hyp->vgic_attached)
+		goto out_user;
+
+	vme = vm_exitinfo(vm, vcpuid);
+	vie = &vme->u.inst_emul.vie;
+
+	fault_ipa = vme->u.inst_emul.gpa;
+
+	vmr = NULL;
+	for (i = 0; i < hyp->vgic_mmio_regions_num; i++)
+		if (fault_ipa >= hyp->vgic_mmio_regions[i].start && fault_ipa <= hyp->vgic_mmio_regions[i].end)
+			vmr = &hyp->vgic_mmio_regions[i];
+	if (!vmr)
+		goto out_user;
+
+	error = vmm_emulate_instruction(vm, vcpuid, fault_ipa, vie,
+	    vmr->read, vmr->write, retu);
+
+	return (error);
+
+out_user:
+	*retu = true;
+	return (0);
+}
+
+static int
+vm_handle_poweroff(struct vm *vm, int vcpuid)
+{
+	return (0);
+}
+
+static int
+vm_handle_psci_call(struct vm *vm, int vcpuid, bool *retu)
+{
+	struct vm_exit *vme;
+	enum vm_suspend_how how;
+	int error;
+
+	vme = vm_exitinfo(vm, vcpuid);
+
+	error = psci_handle_call(vm, vcpuid, vme, retu);
+	if (error)
+		goto out;
+
+	if (vme->exitcode == VM_EXITCODE_SUSPENDED) {
+		how = vme->u.suspended.how;
+		switch (how) {
+		case VM_SUSPEND_POWEROFF:
+			vm_handle_poweroff(vm, vcpuid);
+			break;
+		default:
+			/* Nothing to do */
+			;
+		}
+	}
+
+out:
+	return (error);
+}
+
+int
+vm_run(struct vm *vm, struct vm_run *vmrun)
+{
+	int error, vcpuid;
+	register_t pc;
+	struct vm_exit *vme;
+	bool retu;
+	void *rvc, *sc;
+
+	vcpuid = vmrun->cpuid;
+	pc = vmrun->pc;
+
+	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
+		return (EINVAL);
+
+	if (!CPU_ISSET(vcpuid, &vm->active_cpus))
+		return (EINVAL);
+
+	rvc = sc = NULL;
+restart:
+	critical_enter();
+	error = VMRUN(vm->cookie, vcpuid, pc, NULL, rvc, sc);
+	critical_exit();
+
+	vme = vm_exitinfo(vm, vcpuid);
+	if (error == 0) {
+		retu = false;
+		switch (vme->exitcode) {
+		case VM_EXITCODE_INST_EMUL:
+			pc = vme->pc + vme->inst_length;
+			error = vm_handle_inst_emul(vm, vcpuid, &retu);
+			break;
+
+		case VM_EXITCODE_REG_EMUL:
+			pc = vme->pc + vme->inst_length;
+			error = vm_handle_reg_emul(vm, vcpuid, &retu);
+			break;
+
+		case VM_EXITCODE_HVC:
+			/*
+			 * The HVC instruction saves the address for the
+			 * next instruction as the return address.
+			 */
+			pc = vme->pc;
+			/*
+			 * The PSCI call can change the exit information in the
+			 * case of suspend/reset/poweroff/cpu off/cpu on.
+			 */
+			error = psci_handle_call(vm, vcpuid, vme, &retu);
+			break;
+
+		case VM_EXITCODE_WFI:
+			pc = vme->pc + vme->inst_length;
+			error = vm_handle_wfi(vm, vcpuid, vme, &retu);
+			break;
+
+		default:
+			/* Handle in userland */
+			retu = true;
+			break;
+		}
+	}
+
+	if (error == 0 && retu == false)
+		goto restart;
+
+	/* Copy the exit information */
+	bcopy(vme, &vmrun->vm_exit, sizeof(struct vm_exit));
+
+	return (error);
+}
+
+int
+vm_activate_cpu(struct vm *vm, int vcpuid)
+{
+
+	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
+		return (EINVAL);
+
+	if (CPU_ISSET(vcpuid, &vm->active_cpus))
+		return (EBUSY);
+
+	CPU_SET_ATOMIC(vcpuid, &vm->active_cpus);
+	return (0);
+
+}
+
+cpuset_t
+vm_active_cpus(struct vm *vm)
+{
+
+	return (vm->active_cpus);
+}
+
+void *
+vcpu_stats(struct vm *vm, int vcpuid)
+{
+
+	return (vm->vcpu[vcpuid].stats);
+}
+
+static int
+vcpu_set_state_locked(struct vcpu *vcpu, enum vcpu_state newstate,
+    bool from_idle)
+{
+	int error;
+
+	vcpu_assert_locked(vcpu);
+
+	/*
+	 * State transitions from the vmmdev_ioctl() must always begin from
+	 * the VCPU_IDLE state. This guarantees that there is only a single
+	 * ioctl() operating on a vcpu at any point.
+	 */
+	if (from_idle) {
+		while (vcpu->state != VCPU_IDLE)
+			msleep_spin(&vcpu->state, &vcpu->mtx, "vmstat", hz);
+	} else {
+		KASSERT(vcpu->state != VCPU_IDLE, ("invalid transition from "
+		    "vcpu idle state"));
+	}
+
+	if (vcpu->state == VCPU_RUNNING) {
+		KASSERT(vcpu->hostcpu == curcpu, ("curcpu %d and hostcpu %d "
+		    "mismatch for running vcpu", curcpu, vcpu->hostcpu));
+	} else {
+		KASSERT(vcpu->hostcpu == NOCPU, ("Invalid hostcpu %d for a "
+		    "vcpu that is not running", vcpu->hostcpu));
+	}
+
+	/*
+	 * The following state transitions are allowed:
+	 * IDLE -> FROZEN -> IDLE
+	 * FROZEN -> RUNNING -> FROZEN
+	 * FROZEN -> SLEEPING -> FROZEN
+	 */
+	switch (vcpu->state) {
+	case VCPU_IDLE:
+	case VCPU_RUNNING:
+	case VCPU_SLEEPING:
+		error = (newstate != VCPU_FROZEN);
+		break;
+	case VCPU_FROZEN:
+		error = (newstate == VCPU_FROZEN);
+		break;
+	default:
+		error = 1;
+		break;
+	}
+
+	if (error)
+		return (EBUSY);
+
+	vcpu->state = newstate;
+	if (newstate == VCPU_RUNNING)
+		vcpu->hostcpu = curcpu;
+	else
+		vcpu->hostcpu = NOCPU;
+
+	if (newstate == VCPU_IDLE)
+		wakeup(&vcpu->state);
+
+	return (0);
+}
+
+int
+vcpu_set_state(struct vm *vm, int vcpuid, enum vcpu_state newstate,
+		bool from_idle)
+{
+	int error;
+	struct vcpu *vcpu;
+
+	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
+		panic("vm_set_run_state: invalid vcpuid %d", vcpuid);
+
+	vcpu = &vm->vcpu[vcpuid];
+
+	vcpu_lock(vcpu);
+	error = vcpu_set_state_locked(vcpu, newstate, from_idle);
+	vcpu_unlock(vcpu);
+
+	return (error);
+}
+
+enum vcpu_state
+vcpu_get_state(struct vm *vm, int vcpuid, int *hostcpu)
+{
+	struct vcpu *vcpu;
+	enum vcpu_state state;
+
+	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
+		panic("vm_get_run_state: invalid vcpuid %d", vcpuid);
+
+	vcpu = &vm->vcpu[vcpuid];
+
+	vcpu_lock(vcpu);
+	state = vcpu->state;
+	if (hostcpu != NULL)
+		*hostcpu = vcpu->hostcpu;
+	vcpu_unlock(vcpu);
+
+	return (state);
+}
+
+uint64_t
+vm_gpa2hpa(struct vm *vm, uint64_t gpa, size_t len)
+{
+	uint64_t nextpage;
+
+	nextpage = trunc_page(gpa + PAGE_SIZE);
+	if (len > nextpage - gpa)
+		panic("vm_gpa2hpa: invalid gpa/len: 0x%016lx/%zu", gpa, len);
+
+	return (VMMMAP_GET(vm->cookie, gpa));
+}
+
+int
+vm_gpabase2memseg(struct vm *vm, uint64_t gpabase,
+		  struct vm_memory_segment *seg)
+{
+	int i;
+
+	for (i = 0; i < vm->num_mem_segs; i++) {
+		if (gpabase == vm->mem_segs[i].gpa) {
+			*seg = vm->mem_segs[i];
+			return (0);
+		}
+	}
+	return (-1);
+}
+
+int
+vm_get_register(struct vm *vm, int vcpu, int reg, uint64_t *retval)
+{
+
+	if (vcpu < 0 || vcpu >= VM_MAXCPU)
+		return (EINVAL);
+
+	if (reg >= VM_REG_LAST)
+		return (EINVAL);
+
+	return (VMGETREG(vm->cookie, vcpu, reg, retval));
+}
+
+int
+vm_set_register(struct vm *vm, int vcpuid, int reg, uint64_t val)
+{
+	struct vcpu *vcpu;
+	int error;
+
+	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
+		return (EINVAL);
+
+	if (reg >= VM_REG_LAST)
+		return (EINVAL);
+	error = VMSETREG(vm->cookie, vcpuid, reg, val);
+	if (error)
+		return (error);
+
+	vcpu = &vm->vcpu[vcpuid];
+	vcpu->nextpc = val;
+
+	return(0);
+}
+
+void *
+vm_get_cookie(struct vm *vm)
+{
+	return vm->cookie;
+}
+
+uint16_t
+vm_get_maxcpus(struct vm *vm)
+{
+	return (vm->maxcpus);
+}
+
+static void
+vm_free_mem_seg(struct vm *vm, struct vm_memory_segment *seg)
+{
+	size_t len;
+	uint64_t hpa;
+
+	len = 0;
+	while (len < seg->len) {
+		hpa = vm_gpa2hpa(vm, seg->gpa + len, PAGE_SIZE);
+		if (hpa == (uint64_t)-1) {
+			panic("vm_free_mem_segs: cannot free hpa "
+			      "associated with gpa 0x%016lx", seg->gpa + len);
+		}
+
+		vmm_mem_free(hpa, PAGE_SIZE);
+
+		len += PAGE_SIZE;
+	}
+
+	bzero(seg, sizeof(struct vm_memory_segment));
+}
+
+/*
+ * Return true if 'gpa' is available for allocation, false otherwise
+ */
+static bool
+vm_ipa_available(struct vm *vm, uint64_t ipa)
+{
+	uint64_t ipabase, ipalimit;
+	int i;
+
+	if (!page_aligned(ipa))
+		panic("vm_ipa_available: ipa (0x%016lx) not page aligned", ipa);
+
+	for (i = 0; i < vm->num_mem_segs; i++) {
+		ipabase = vm->mem_segs[i].gpa;
+		ipalimit = ipabase + vm->mem_segs[i].len;
+		if (ipa >= ipabase && ipa < ipalimit)
+			return (false);
+	}
+
+	return (true);
+}
+
+/*
+ * Allocate 'len' bytes for the virtual machine starting at address 'ipa'
+ */
+int
+vm_malloc(struct vm *vm, uint64_t ipa, size_t len)
+{
+	struct vm_memory_segment *seg;
+	int error, available, allocated;
+	uint64_t ipa2;
+	vm_paddr_t pa;
+
+	if (!page_aligned(ipa) != 0 || !page_aligned(len) || len == 0)
+		return (EINVAL);
+
+	available = allocated = 0;
+	ipa2 = ipa;
+	while (ipa2 < ipa + len) {
+		if (vm_ipa_available(vm, ipa2))
+			available++;
+		else
+			allocated++;
+		ipa2 += PAGE_SIZE;
+	}
+
+	/*
+	 * If there are some allocated and some available pages in the address
+	 * range then it is an error.
+	 */
+	if (allocated != 0  && available != 0)
+		return (EINVAL);
+
+	/*
+	 * If the entire address range being requested has already been
+	 * allocated then there isn't anything more to do.
+	 */
+	if (allocated != 0 && available == 0)
+		return (0);
+
+	if (vm->num_mem_segs == VM_MAX_MEMORY_SEGMENTS)
+		return (E2BIG);
+
+	seg = &vm->mem_segs[vm->num_mem_segs];
+	error = 0;
+	seg->gpa = ipa;
+	seg->len = 0;
+	while (seg->len < len) {
+		pa = vmm_mem_alloc(PAGE_SIZE);
+		if (pa == 0) {
+			error = ENOMEM;
+			break;
+		}
+		VMMMAP_SET(vm->cookie, ipa, pa, PAGE_SIZE, VM_PROT_ALL);
+
+		seg->len += PAGE_SIZE;
+		ipa += PAGE_SIZE;
+	}
+	vm->num_mem_segs++;
+
+	return (0);
+}
+
+int
+vm_attach_vgic(struct vm *vm, uint64_t dist_start, size_t dist_size,
+		uint64_t redist_start, size_t redist_size)
+{
+	int error;
+
+	error = vgic_v3_attach_to_vm(vm->cookie, dist_start, dist_size,
+	    redist_start, redist_size);
+
+	return (error);
+}
+
+int
+vm_assert_irq(struct vm *vm, uint32_t irq, uint32_t vcpuid)
+{
+	struct hyp *hyp = (struct hyp *)vm->cookie;
+	int error;
+
+	/* TODO: this is crap, send the vcpuid as an argument to vm_assert_irq */
+	error = vgic_v3_inject_irq(&hyp->ctx[vcpuid], irq, VGIC_IRQ_VIRTIO);
+
+	return (error);
+}
+
+int
+vm_deassert_irq(struct vm *vm, uint32_t irq, uint32_t vcpuid)
+{
+	struct hyp *hyp = (struct hyp *)vm->cookie;
+	int error;
+
+	error = vgic_v3_remove_irq(&hyp->ctx[vcpuid], irq, false);
+
+	return (error);
+}
+
+static int
+vm_handle_wfi(struct vm *vm, int vcpuid, struct vm_exit *vme, bool *retu)
+{
+	struct vcpu *vcpu;
+	struct hypctx *hypctx;
+	bool intr_disabled;
+
+	vcpu = &vm->vcpu[vcpuid];
+	hypctx = vme->u.wfi.hypctx;
+	intr_disabled = !(hypctx->regs.spsr & PSR_I);
+
+	vcpu_lock(vcpu);
+	while (1) {
+		if (!intr_disabled && vgic_v3_vcpu_pending_irq(hypctx))
+			break;
+
+		if (vcpu_should_yield(vm, vcpuid))
+			break;
+
+		vcpu_set_state_locked(vcpu, VCPU_SLEEPING, false);
+		msleep_spin(vcpu, &vcpu->mtx, "vmidle", hz);
+		vcpu_set_state_locked(vcpu, VCPU_FROZEN, false);
+	}
+	vcpu_unlock(vcpu);
+
+	*retu = false;
+	return (0);
+}
Index: sys/arm64/vmm/vmm_dev.c
===================================================================
--- /dev/null
+++ sys/arm64/vmm/vmm_dev.c
@@ -0,0 +1,407 @@
+/*
+ * Copyright (C) 2015-2021 Mihai Carabas <mihai.carabas@gmail.com>
+ * Copyright (C) 2017-2019 Alexandru Elisei <alexandru.elisei@gmail.com>
+ * Copyright (C) 2017-2021 Darius Mihai <darius.mihai.m@gmail.com>
+ * Copyright (C) 2019-2021 Andrei Martin <andrei.cos.martin@gmail.com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+#include <sys/param.h>
+#include <sys/kernel.h>
+#include <sys/queue.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
+#include <sys/malloc.h>
+#include <sys/conf.h>
+#include <sys/sysctl.h>
+#include <sys/libkern.h>
+#include <sys/ioccom.h>
+#include <sys/mman.h>
+#include <sys/uio.h>
+
+#include <vm/vm.h>
+#include <vm/pmap.h>
+#include <vm/vm_map.h>
+
+#include <machine/vmparam.h>
+#include <machine/vmm.h>
+#include <machine/vmm_dev.h>
+
+struct vmmdev_softc {
+	struct vm	*vm;		/* vm instance cookie */
+	struct cdev	*cdev;
+	SLIST_ENTRY(vmmdev_softc) link;
+	int		flags;
+};
+#define	VSC_LINKED		0x01
+
+static SLIST_HEAD(, vmmdev_softc) head;
+
+static struct mtx vmmdev_mtx;
+
+static MALLOC_DEFINE(M_VMMDEV, "vmmdev", "vmmdev");
+
+SYSCTL_DECL(_hw_vmm);
+
+static struct vmmdev_softc *
+vmmdev_lookup(const char *name)
+{
+	struct vmmdev_softc *sc;
+
+#ifdef notyet	/* XXX kernel is not compiled with invariants */
+	mtx_assert(&vmmdev_mtx, MA_OWNED);
+#endif
+
+	SLIST_FOREACH(sc, &head, link) {
+		if (strcmp(name, vm_name(sc->vm)) == 0)
+			break;
+	}
+
+	return (sc);
+}
+
+static struct vmmdev_softc *
+vmmdev_lookup2(struct cdev *cdev)
+{
+
+	return (cdev->si_drv1);
+}
+
+static int
+vmmdev_rw(struct cdev *cdev, struct uio *uio, int flags)
+{
+	int error = 0;
+
+	return (error);
+}
+
+static int
+vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag,
+	     struct thread *td)
+{
+	int error, vcpu, state_changed;
+	struct vmmdev_softc *sc;
+	struct vm_run *vmrun;
+	struct vm_memory_segment *seg;
+	struct vm_register *vmreg;
+	struct vm_activate_cpu *vac;
+	struct vm_attach_vgic *vav;
+	struct vm_irq *vi;
+
+	sc = vmmdev_lookup2(cdev);
+	if (sc == NULL)
+		return (ENXIO);
+
+	error = 0;
+	vcpu = -1;
+	state_changed = 0;
+
+	/*
+	 * Some VMM ioctls can operate only on vcpus that are not running.
+	 */
+	switch (cmd) {
+	case VM_RUN:
+	case VM_GET_REGISTER:
+	case VM_SET_REGISTER:
+		/*
+		 * XXX fragile, handle with care
+		 * Assumes that the first field of the ioctl data is the vcpu.
+		 */
+		vcpu = *(int *)data;
+		if (vcpu < 0 || vcpu >= VM_MAXCPU) {
+			error = EINVAL;
+			goto done;
+		}
+
+		error = vcpu_set_state(sc->vm, vcpu, VCPU_FROZEN, true);
+		if (error)
+			goto done;
+
+		state_changed = 1;
+		break;
+
+	case VM_MAP_MEMORY:
+	case VM_ATTACH_VGIC:
+		/*
+		 * ioctls that operate on the entire virtual machine must
+		 * prevent all vcpus from running.
+		 */
+		error = 0;
+		for (vcpu = 0; vcpu < VM_MAXCPU; vcpu++) {
+			error = vcpu_set_state(sc->vm, vcpu, VCPU_FROZEN, true);
+			if (error)
+				break;
+		}
+
+		if (error) {
+			vcpu--;
+			while (vcpu >= 0) {
+				vcpu_set_state(sc->vm, vcpu, VCPU_IDLE, false);
+				vcpu--;
+			}
+			goto done;
+		}
+
+		state_changed = 2;
+		break;
+	case VM_ASSERT_IRQ:
+		vi =(struct vm_irq *)data;
+		error = vm_assert_irq(sc->vm, vi->irq, vi->vcpuid);
+		break;
+	case VM_DEASSERT_IRQ:
+		vi = (struct vm_irq *)data;
+		error = vm_deassert_irq(sc->vm, vi->irq, vi->vcpuid);
+		break;
+	default:
+		break;
+	}
+
+	switch(cmd) {
+	case VM_RUN:
+		vmrun = (struct vm_run *)data;
+		error = vm_run(sc->vm, vmrun);
+		break;
+	case VM_MAP_MEMORY:
+		seg = (struct vm_memory_segment *)data;
+		error = vm_malloc(sc->vm, seg->gpa, seg->len);
+		break;
+	case VM_GET_MEMORY_SEG:
+		seg = (struct vm_memory_segment *)data;
+		seg->len = 0;
+		(void)vm_gpabase2memseg(sc->vm, seg->gpa, seg);
+		error = 0;
+		break;
+	case VM_GET_REGISTER:
+		vmreg = (struct vm_register *)data;
+		error = vm_get_register(sc->vm, vmreg->cpuid, vmreg->regnum,
+					&vmreg->regval);
+		break;
+	case VM_SET_REGISTER:
+		vmreg = (struct vm_register *)data;
+		error = vm_set_register(sc->vm, vmreg->cpuid, vmreg->regnum,
+					vmreg->regval);
+		break;
+	case VM_ACTIVATE_CPU:
+		vac = (struct vm_activate_cpu *)data;
+		error = vm_activate_cpu(sc->vm, vac->vcpuid);
+		break;
+	case VM_ATTACH_VGIC:
+		vav = (struct vm_attach_vgic *)data;
+		error = vm_attach_vgic(sc->vm, vav->dist_start, vav->dist_size,
+				vav->redist_start, vav->redist_size);
+		break;
+	default:
+		error = ENOTTY;
+		break;
+	}
+
+	if (state_changed == 1) {
+		vcpu_set_state(sc->vm, vcpu, VCPU_IDLE, false);
+	} else if (state_changed == 2) {
+		for (vcpu = 0; vcpu < VM_MAXCPU; vcpu++)
+			vcpu_set_state(sc->vm, vcpu, VCPU_IDLE, false);
+	}
+
+done:
+	/* Make sure that no handler returns a bogus value like ERESTART */
+	KASSERT(error >= 0, ("vmmdev_ioctl: invalid error return %d", error));
+	return (error);
+}
+
+static int
+vmmdev_mmap(struct cdev *cdev, vm_ooffset_t offset, vm_paddr_t *paddr,
+    int nprot, vm_memattr_t *memattr)
+{
+	int error;
+	struct vmmdev_softc *sc;
+
+	error = -1;
+	mtx_lock(&vmmdev_mtx);
+
+	sc = vmmdev_lookup2(cdev);
+	if (sc != NULL && !(nprot & PROT_EXEC)) {
+		*paddr = (vm_paddr_t)vm_gpa2hpa(sc->vm, (vm_paddr_t)offset, PAGE_SIZE);
+		if (*paddr != (vm_paddr_t)-1)
+			error = 0;
+	}
+
+	mtx_unlock(&vmmdev_mtx);
+
+	return (error);
+}
+
+static void
+vmmdev_destroy(void *arg)
+{
+
+	struct vmmdev_softc *sc = arg;
+
+	if (sc->cdev != NULL)
+		destroy_dev(sc->cdev);
+
+	if (sc->vm != NULL)
+		vm_destroy(sc->vm);
+
+	if ((sc->flags & VSC_LINKED) != 0) {
+		mtx_lock(&vmmdev_mtx);
+		SLIST_REMOVE(&head, sc, vmmdev_softc, link);
+		mtx_unlock(&vmmdev_mtx);
+	}
+
+	free(sc, M_VMMDEV);
+}
+
+static int
+sysctl_vmm_destroy(SYSCTL_HANDLER_ARGS)
+{
+	int error;
+	char buf[VM_MAX_NAMELEN];
+	struct vmmdev_softc *sc;
+	struct cdev *cdev;
+
+	strlcpy(buf, "beavis", sizeof(buf));
+	error = sysctl_handle_string(oidp, buf, sizeof(buf), req);
+	if (error != 0 || req->newptr == NULL)
+		return (error);
+
+	mtx_lock(&vmmdev_mtx);
+	sc = vmmdev_lookup(buf);
+	if (sc == NULL || sc->cdev == NULL) {
+		mtx_unlock(&vmmdev_mtx);
+		return (EINVAL);
+	}
+
+	/*
+	 * The 'cdev' will be destroyed asynchronously when 'si_threadcount'
+	 * goes down to 0 so we should not do it again in the callback.
+	 */
+	cdev = sc->cdev;
+	sc->cdev = NULL;
+	mtx_unlock(&vmmdev_mtx);
+
+	/*
+	 * Schedule the 'cdev' to be destroyed:
+	 *
+	 * - any new operations on this 'cdev' will return an error (ENXIO).
+	 *
+	 * - when the 'si_threadcount' dwindles down to zero the 'cdev' will
+	 *   be destroyed and the callback will be invoked in a taskqueue
+	 *   context.
+	 */
+	destroy_dev_sched_cb(cdev, vmmdev_destroy, sc);
+
+	return (0);
+}
+SYSCTL_PROC(_hw_vmm, OID_AUTO, destroy, CTLTYPE_STRING | CTLFLAG_RW,
+	    NULL, 0, sysctl_vmm_destroy, "A", NULL);
+
+static struct cdevsw vmmdevsw = {
+	.d_name		= "vmmdev",
+	.d_version	= D_VERSION,
+	.d_ioctl	= vmmdev_ioctl,
+	.d_mmap		= vmmdev_mmap,
+	.d_read		= vmmdev_rw,
+	.d_write	= vmmdev_rw,
+};
+
+static int
+sysctl_vmm_create(SYSCTL_HANDLER_ARGS)
+{
+	int error;
+	struct vm *vm;
+	struct cdev *cdev;
+	struct vmmdev_softc *sc, *sc2;
+	char buf[VM_MAX_NAMELEN];
+
+	strlcpy(buf, "beavis", sizeof(buf));
+	error = sysctl_handle_string(oidp, buf, sizeof(buf), req);
+	if (error != 0 || req->newptr == NULL)
+		return (error);
+
+	mtx_lock(&vmmdev_mtx);
+	sc = vmmdev_lookup(buf);
+	mtx_unlock(&vmmdev_mtx);
+	if (sc != NULL)
+		return (EEXIST);
+
+	error = vm_create(buf, &vm);
+	if (error != 0)
+		return (error);
+
+	sc = malloc(sizeof(struct vmmdev_softc), M_VMMDEV, M_WAITOK | M_ZERO);
+	sc->vm = vm;
+
+	/*
+	 * Lookup the name again just in case somebody sneaked in when we
+	 * dropped the lock.
+	 */
+	mtx_lock(&vmmdev_mtx);
+	sc2 = vmmdev_lookup(buf);
+	if (sc2 == NULL) {
+		SLIST_INSERT_HEAD(&head, sc, link);
+		sc->flags |= VSC_LINKED;
+	}
+	mtx_unlock(&vmmdev_mtx);
+
+	if (sc2 != NULL) {
+		vmmdev_destroy(sc);
+		return (EEXIST);
+	}
+
+	error = make_dev_p(MAKEDEV_CHECKNAME, &cdev, &vmmdevsw, NULL,
+			   UID_ROOT, GID_WHEEL, 0600, "vmm/%s", buf);
+	if (error != 0) {
+		vmmdev_destroy(sc);
+		return (error);
+	}
+
+	mtx_lock(&vmmdev_mtx);
+	sc->cdev = cdev;
+	sc->cdev->si_drv1 = sc;
+	mtx_unlock(&vmmdev_mtx);
+
+	return (0);
+}
+SYSCTL_PROC(_hw_vmm, OID_AUTO, create, CTLTYPE_STRING | CTLFLAG_RW,
+	    NULL, 0, sysctl_vmm_create, "A", NULL);
+
+void
+vmmdev_init(void)
+{
+	mtx_init(&vmmdev_mtx, "vmm device mutex", NULL, MTX_DEF);
+}
+
+int
+vmmdev_cleanup(void)
+{
+	int error;
+
+	if (SLIST_EMPTY(&head))
+		error = 0;
+	else
+		error = EBUSY;
+
+	return (error);
+}
Index: sys/arm64/vmm/vmm_instruction_emul.c
===================================================================
--- /dev/null
+++ sys/arm64/vmm/vmm_instruction_emul.c
@@ -0,0 +1,99 @@
+/*
+ * Copyright (C) 2015-2021 Mihai Carabas <mihai.carabas@gmail.com>
+ * Copyright (C) 2017-2019 Alexandru Elisei <alexandru.elisei@gmail.com>
+ * Copyright (C) 2017-2021 Darius Mihai <darius.mihai.m@gmail.com>
+ * Copyright (C) 2019-2021 Andrei Martin <andrei.cos.martin@gmail.com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifdef _KERNEL
+#include <sys/param.h>
+#include <sys/pcpu.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+
+#include <vm/vm.h>
+
+#include <machine/vmm.h>
+
+#else
+#include <sys/types.h>
+#include <sys/errno.h>
+#include <sys/_iovec.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include <machine/vmm.h>
+
+#include <assert.h>
+#include <vmmapi.h>
+#endif
+
+#include <machine/vmm_instruction_emul.h>
+
+int
+vmm_emulate_instruction(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
+    mem_region_read_t memread, mem_region_write_t memwrite, void *memarg)
+{
+	uint64_t val;
+	int error;
+
+	if (vie->dir == VM_DIR_READ) {
+		error = memread(vm, vcpuid, gpa, &val, vie->access_size, memarg);
+		if (error)
+			goto out;
+		error = vm_set_register(vm, vcpuid, vie->reg, val);
+	} else {
+		error = vm_get_register(vm, vcpuid, vie->reg, &val);
+		if (error)
+			goto out;
+		error = memwrite(vm, vcpuid, gpa, val, vie->access_size, memarg);
+	}
+
+out:
+	return (error);
+}
+
+int
+vmm_emulate_register(void *vm, int vcpuid, struct vre *vre, reg_read_t regread,
+    reg_write_t regwrite, void *regarg)
+{
+	uint64_t val;
+	int error;
+
+	if (vre->dir == VM_DIR_READ) {
+		error = regread(vm, vcpuid, &val, regarg);
+		if (error)
+			goto out;
+		error = vm_set_register(vm, vcpuid, vre->reg, val);
+	} else {
+		error = vm_get_register(vm, vcpuid, vre->reg, &val);
+		if (error)
+			goto out;
+		error = regwrite(vm, vcpuid, val, regarg);
+	}
+
+out:
+	return (error);
+}
Index: sys/arm64/vmm/vmm_mem.h
===================================================================
--- /dev/null
+++ sys/arm64/vmm/vmm_mem.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright (C) 2015-2021 Mihai Carabas <mihai.carabas@gmail.com>
+ * Copyright (C) 2017-2019 Alexandru Elisei <alexandru.elisei@gmail.com>
+ * Copyright (C) 2017-2021 Darius Mihai <darius.mihai.m@gmail.com>
+ * Copyright (C) 2019-2021 Andrei Martin <andrei.cos.martin@gmail.com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef	_VMM_MEM_H_
+#define	_VMM_MEM_H_
+
+int		vmm_mem_init(void);
+vm_paddr_t	vmm_mem_alloc(size_t size);
+void		vmm_mem_free(vm_paddr_t start, size_t size);
+vm_paddr_t	vmm_mem_maxaddr(void);
+
+#endif
Index: sys/arm64/vmm/vmm_mem.c
===================================================================
--- /dev/null
+++ sys/arm64/vmm/vmm_mem.c
@@ -0,0 +1,133 @@
+/*
+ * Copyright (C) 2015-2021 Mihai Carabas <mihai.carabas@gmail.com>
+ * Copyright (C) 2017-2019 Alexandru Elisei <alexandru.elisei@gmail.com>
+ * Copyright (C) 2017-2021 Darius Mihai <darius.mihai.m@gmail.com>
+ * Copyright (C) 2019-2021 Andrei Martin <andrei.cos.martin@gmail.com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+#include <sys/param.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
+#include <sys/linker.h>
+#include <sys/systm.h>
+#include <sys/malloc.h>
+#include <sys/kernel.h>
+#include <sys/sysctl.h>
+#include <sys/vmmeter.h>
+
+#include <vm/vm.h>
+#include <vm/pmap.h>
+#include <vm/vm_page.h>
+#include <vm/vm_pageout.h>
+
+#include <machine/md_var.h>
+#include <machine/metadata.h>
+#include <machine/vmparam.h>
+#include <machine/pmap.h>
+
+#include "vmm_mem.h"
+
+SYSCTL_DECL(_hw_vmm);
+
+static u_long pages_allocated;
+SYSCTL_ULONG(_hw_vmm, OID_AUTO, pages_allocated, CTLFLAG_RD,
+	     &pages_allocated, 0, "4KB pages allocated");
+
+static void
+update_pages_allocated(int howmany)
+{
+	pages_allocated += howmany;	/* XXX locking? */
+}
+
+int
+vmm_mem_init(void)
+{
+
+	return (0);
+}
+
+vm_paddr_t
+vmm_mem_alloc(size_t size)
+{
+
+	int flags;
+	vm_page_t m;
+	vm_paddr_t pa;
+
+	if (size != PAGE_SIZE)
+		panic("vmm_mem_alloc: invalid allocation size %zu", size);
+
+	flags = VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED |
+		VM_ALLOC_ZERO;
+
+	while (1) {
+		/*
+		 * XXX need policy to determine when to back off the allocation
+		 */
+		m = vm_page_alloc(NULL, 0, flags);
+		if (m == NULL)
+			vm_wait(NULL);
+		else
+			break;
+	}
+
+	pa = VM_PAGE_TO_PHYS(m);
+
+	if ((m->flags & PG_ZERO) == 0)
+		pmap_zero_page(m);
+
+	m->valid = VM_PAGE_BITS_ALL;
+	update_pages_allocated(1);
+
+	return (pa);
+}
+
+void
+vmm_mem_free(vm_paddr_t base, size_t length)
+{
+	vm_page_t m;
+
+	if (base & PAGE_MASK) {
+		panic("vmm_mem_free: base 0x%0lx must be aligned on a "
+		      "0x%0x boundary\n", base, PAGE_SIZE);
+	}
+
+	if (length != PAGE_SIZE)
+		panic("vmm_mem_free: invalid length %zu", length);
+
+	m = PHYS_TO_VM_PAGE(base);
+	vm_page_unwire_noq(m);
+	vm_page_free(m);
+
+	update_pages_allocated(-1);
+}
+
+vm_paddr_t
+vmm_mem_maxaddr(void)
+{
+
+	return (ptoa(Maxmem));
+}
Index: sys/arm64/vmm/vmm_stat.h
===================================================================
--- /dev/null
+++ sys/arm64/vmm/vmm_stat.h
@@ -0,0 +1,158 @@
+/*
+ * Copyright (C) 2015-2021 Mihai Carabas <mihai.carabas@gmail.com>
+ * Copyright (C) 2017-2019 Alexandru Elisei <alexandru.elisei@gmail.com>
+ * Copyright (C) 2017-2021 Darius Mihai <darius.mihai.m@gmail.com>
+ * Copyright (C) 2019-2021 Andrei Martin <andrei.cos.martin@gmail.com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef _VMM_STAT_H_
+#define	_VMM_STAT_H_
+
+struct vm;
+
+#define	MAX_VMM_STAT_ELEMS	64		/* arbitrary */
+
+enum vmm_stat_scope {
+	VMM_STAT_SCOPE_ANY,
+	VMM_STAT_SCOPE_INTEL,		/* Intel VMX specific statistic */
+	VMM_STAT_SCOPE_AMD,		/* AMD SVM specific statistic */
+};
+
+struct vmm_stat_type;
+typedef void (*vmm_stat_func_t)(struct vm *vm, int vcpu,
+    struct vmm_stat_type *stat);
+
+struct vmm_stat_type {
+	int	index;			/* position in the stats buffer */
+	int	nelems;			/* standalone or array */
+	const char *desc;		/* description of statistic */
+	vmm_stat_func_t func;
+	enum vmm_stat_scope scope;
+};
+
+void	vmm_stat_register(void *arg);
+
+#define	VMM_STAT_FDEFINE(type, nelems, desc, func, scope)		\
+	struct vmm_stat_type type[1] = {				\
+		{ -1, nelems, desc, func, scope }			\
+	};								\
+	SYSINIT(type##_stat, SI_SUB_KLD, SI_ORDER_ANY, vmm_stat_register, type)
+
+#define VMM_STAT_DEFINE(type, nelems, desc, scope) 			\
+	VMM_STAT_FDEFINE(type, nelems, desc, NULL, scope)
+
+#define	VMM_STAT_DECLARE(type)						\
+	extern struct vmm_stat_type type[1]
+
+#define	VMM_STAT(type, desc)		\
+	VMM_STAT_DEFINE(type, 1, desc, VMM_STAT_SCOPE_ANY)
+#define	VMM_STAT_INTEL(type, desc)	\
+	VMM_STAT_DEFINE(type, 1, desc, VMM_STAT_SCOPE_INTEL)
+#define	VMM_STAT_AMD(type, desc)	\
+	VMM_STAT_DEFINE(type, 1, desc, VMM_STAT_SCOPE_AMD)
+
+#define	VMM_STAT_FUNC(type, desc, func)	\
+	VMM_STAT_FDEFINE(type, 1, desc, func, VMM_STAT_SCOPE_ANY)
+
+#define	VMM_STAT_ARRAY(type, nelems, desc)	\
+	VMM_STAT_DEFINE(type, nelems, desc, VMM_STAT_SCOPE_ANY)
+
+void	*vmm_stat_alloc(void);
+void	vmm_stat_init(void *vp);
+void 	vmm_stat_free(void *vp);
+
+/*
+ * 'buf' should be at least fit 'MAX_VMM_STAT_TYPES' entries
+ */
+int	vmm_stat_copy(struct vm *vm, int vcpu, int *num_stats, uint64_t *buf);
+int	vmm_stat_desc_copy(int index, char *buf, int buflen);
+
+static void __inline
+vmm_stat_array_incr(struct vm *vm, int vcpu, struct vmm_stat_type *vst,
+		    int statidx, uint64_t x)
+{
+#ifdef VMM_KEEP_STATS
+	uint64_t *stats;
+
+	stats = vcpu_stats(vm, vcpu);
+
+	if (vst->index >= 0 && statidx < vst->nelems)
+		stats[vst->index + statidx] += x;
+#endif
+}
+
+static void __inline
+vmm_stat_array_set(struct vm *vm, int vcpu, struct vmm_stat_type *vst,
+		   int statidx, uint64_t val)
+{
+#ifdef VMM_KEEP_STATS
+	uint64_t *stats;
+
+	stats = vcpu_stats(vm, vcpu);
+
+	if (vst->index >= 0 && statidx < vst->nelems)
+		stats[vst->index + statidx] = val;
+#endif
+}
+
+static void __inline
+vmm_stat_incr(struct vm *vm, int vcpu, struct vmm_stat_type *vst, uint64_t x)
+{
+
+#ifdef VMM_KEEP_STATS
+	vmm_stat_array_incr(vm, vcpu, vst, 0, x);
+#endif
+}
+
+static void __inline
+vmm_stat_set(struct vm *vm, int vcpu, struct vmm_stat_type *vst, uint64_t val)
+{
+
+#ifdef VMM_KEEP_STATS
+	vmm_stat_array_set(vm, vcpu, vst, 0, val);
+#endif
+}
+
+VMM_STAT_DECLARE(VCPU_MIGRATIONS);
+VMM_STAT_DECLARE(VMEXIT_COUNT);
+VMM_STAT_DECLARE(VMEXIT_EXTINT);
+VMM_STAT_DECLARE(VMEXIT_HLT);
+VMM_STAT_DECLARE(VMEXIT_CR_ACCESS);
+VMM_STAT_DECLARE(VMEXIT_RDMSR);
+VMM_STAT_DECLARE(VMEXIT_WRMSR);
+VMM_STAT_DECLARE(VMEXIT_MTRAP);
+VMM_STAT_DECLARE(VMEXIT_PAUSE);
+VMM_STAT_DECLARE(VMEXIT_INTR_WINDOW);
+VMM_STAT_DECLARE(VMEXIT_NMI_WINDOW);
+VMM_STAT_DECLARE(VMEXIT_INOUT);
+VMM_STAT_DECLARE(VMEXIT_CPUID);
+VMM_STAT_DECLARE(VMEXIT_NESTED_FAULT);
+VMM_STAT_DECLARE(VMEXIT_INST_EMUL);
+VMM_STAT_DECLARE(VMEXIT_UNKNOWN);
+VMM_STAT_DECLARE(VMEXIT_ASTPENDING);
+VMM_STAT_DECLARE(VMEXIT_USERSPACE);
+VMM_STAT_DECLARE(VMEXIT_RENDEZVOUS);
+VMM_STAT_DECLARE(VMEXIT_EXCEPTION);
+#endif
Index: sys/arm64/vmm/vmm_stat.c
===================================================================
--- /dev/null
+++ sys/arm64/vmm/vmm_stat.c
@@ -0,0 +1,162 @@
+/*
+ * Copyright (C) 2015-2021 Mihai Carabas <mihai.carabas@gmail.com>
+ * Copyright (C) 2017-2019 Alexandru Elisei <alexandru.elisei@gmail.com>
+ * Copyright (C) 2017-2021 Darius Mihai <darius.mihai.m@gmail.com>
+ * Copyright (C) 2019-2021 Andrei Martin <andrei.cos.martin@gmail.com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+
+#include <sys/param.h>
+#include <sys/kernel.h>
+#include <sys/systm.h>
+#include <sys/malloc.h>
+
+#include <machine/vmm.h>
+#include "vmm_stat.h"
+
+/*
+ * 'vst_num_elems' is the total number of addressable statistic elements
+ * 'vst_num_types' is the number of unique statistic types
+ *
+ * It is always true that 'vst_num_elems' is greater than or equal to
+ * 'vst_num_types'. This is because a stat type may represent more than
+ * one element (for e.g. VMM_STAT_ARRAY).
+ */
+static int vst_num_elems, vst_num_types;
+static struct vmm_stat_type *vsttab[MAX_VMM_STAT_ELEMS];
+
+static MALLOC_DEFINE(M_VMM_STAT, "vmm stat", "vmm stat");
+
+#define	vst_size	((size_t)vst_num_elems * sizeof(uint64_t))
+
+void
+vmm_stat_register(void *arg)
+{
+	struct vmm_stat_type *vst = arg;
+
+	/* We require all stats to identify themselves with a description */
+	if (vst->desc == NULL)
+		return;
+
+	if (vst_num_elems + vst->nelems >= MAX_VMM_STAT_ELEMS) {
+		printf("Cannot accomodate vmm stat type \"%s\"!\n", vst->desc);
+		return;
+	}
+
+	vst->index = vst_num_elems;
+	vst_num_elems += vst->nelems;
+
+	vsttab[vst_num_types++] = vst;
+}
+
+int
+vmm_stat_copy(struct vm *vm, int vcpu, int *num_stats, uint64_t *buf)
+{
+	struct vmm_stat_type *vst;
+	uint64_t *stats;
+	int i;
+
+	if (vcpu < 0 || vcpu >= VM_MAXCPU)
+		return (EINVAL);
+
+	/* Let stats functions update their counters */
+	for (i = 0; i < vst_num_types; i++) {
+		vst = vsttab[i];
+		if (vst->func != NULL)
+			(*vst->func)(vm, vcpu, vst);
+	}
+
+	/* Copy over the stats */
+	stats = vcpu_stats(vm, vcpu);
+	for (i = 0; i < vst_num_elems; i++)
+		buf[i] = stats[i];
+	*num_stats = vst_num_elems;
+	return (0);
+}
+
+void *
+vmm_stat_alloc(void)
+{
+
+	return (malloc(vst_size, M_VMM_STAT, M_WAITOK));
+}
+
+void
+vmm_stat_init(void *vp)
+{
+
+	bzero(vp, vst_size);
+}
+
+void
+vmm_stat_free(void *vp)
+{
+	free(vp, M_VMM_STAT);
+}
+
+int
+vmm_stat_desc_copy(int index, char *buf, int bufsize)
+{
+	int i;
+	struct vmm_stat_type *vst;
+
+	for (i = 0; i < vst_num_types; i++) {
+		vst = vsttab[i];
+		if (index >= vst->index && index < vst->index + vst->nelems) {
+			if (vst->nelems > 1) {
+				snprintf(buf, bufsize, "%s[%d]",
+					 vst->desc, index - vst->index);
+			} else {
+				strlcpy(buf, vst->desc, bufsize);
+			}
+			return (0);	/* found it */
+		}
+	}
+
+	return (EINVAL);
+}
+
+/* global statistics */
+VMM_STAT(VCPU_MIGRATIONS, "vcpu migration across host cpus");
+VMM_STAT(VMEXIT_COUNT, "total number of vm exits");
+VMM_STAT(VMEXIT_EXTINT, "vm exits due to external interrupt");
+VMM_STAT(VMEXIT_HLT, "number of times hlt was intercepted");
+VMM_STAT(VMEXIT_CR_ACCESS, "number of times %cr access was intercepted");
+VMM_STAT(VMEXIT_RDMSR, "number of times rdmsr was intercepted");
+VMM_STAT(VMEXIT_WRMSR, "number of times wrmsr was intercepted");
+VMM_STAT(VMEXIT_MTRAP, "number of monitor trap exits");
+VMM_STAT(VMEXIT_PAUSE, "number of times pause was intercepted");
+VMM_STAT(VMEXIT_INTR_WINDOW, "vm exits due to interrupt window opening");
+VMM_STAT(VMEXIT_NMI_WINDOW, "vm exits due to nmi window opening");
+VMM_STAT(VMEXIT_INOUT, "number of times in/out was intercepted");
+VMM_STAT(VMEXIT_CPUID, "number of times cpuid was intercepted");
+VMM_STAT(VMEXIT_NESTED_FAULT, "vm exits due to nested page fault");
+VMM_STAT(VMEXIT_INST_EMUL, "vm exits for instruction emulation");
+VMM_STAT(VMEXIT_UNKNOWN, "number of vm exits for unknown reason");
+VMM_STAT(VMEXIT_ASTPENDING, "number of times astpending at exit");
+VMM_STAT(VMEXIT_USERSPACE, "number of vm exits handled in userspace");
+VMM_STAT(VMEXIT_RENDEZVOUS, "number of times rendezvous pending at exit");
+VMM_STAT(VMEXIT_EXCEPTION, "number of vm exits due to exceptions");
Index: sys/conf/files.arm64
===================================================================
--- sys/conf/files.arm64
+++ sys/conf/files.arm64
@@ -55,6 +55,7 @@
 arm64/arm64/identcpu.c				standard
 arm64/arm64/in_cksum.c				optional inet | inet6
 arm64/arm64/locore.S				standard no-obj
+arm64/arm64/hyp_stub.S				standard
 arm64/arm64/machdep.c				standard
 arm64/arm64/machdep_boot.c			standard
 arm64/arm64/mem.c				standard
Index: sys/dev/psci/psci.h
===================================================================
--- sys/dev/psci/psci.h
+++ sys/dev/psci/psci.h
@@ -32,6 +32,7 @@
 #include <sys/types.h>
 #include <dev/psci/smccc.h>
 
+#ifdef _KERNEL
 typedef int (*psci_initfn_t)(device_t dev, int default_version);
 typedef int (*psci_callfn_t)(register_t, register_t, register_t, register_t,
 	register_t, register_t, register_t, register_t,
@@ -52,6 +53,7 @@
 
 	return (psci_callfn(a, b, c, d, 0, 0, 0, 0, NULL));
 }
+#endif
 
 /*
  * PSCI return codes.
Index: sys/dts/Makefile
===================================================================
--- sys/dts/Makefile
+++ sys/dts/Makefile
@@ -1,5 +1,5 @@
 # $FreeBSD$
 
-SUBDIR=arm mips powerpc
+SUBDIR=arm arm64 mips powerpc
 
 .include <bsd.subdir.mk>
Index: sys/modules/Makefile
===================================================================
--- sys/modules/Makefile
+++ sys/modules/Makefile
@@ -620,6 +620,9 @@
 _enetc_mdio=	enetc_mdio
 _felix=		felix
 _rockchip=	rockchip
+.if ${MK_BHYVE} != "no" || defined(ALL_MODULES)
+_vmm=		vmm
+.endif
 .endif
 
 .if ${MACHINE_CPUARCH} == "aarch64" || ${MACHINE_CPUARCH} == "arm"
Index: sys/modules/vmm/Makefile
===================================================================
--- sys/modules/vmm/Makefile
+++ sys/modules/vmm/Makefile
@@ -4,90 +4,10 @@
 
 KMOD=	vmm
 
-SRCS=	opt_acpi.h opt_bhyve_snapshot.h opt_ddb.h
-SRCS+=	device_if.h bus_if.h pci_if.h pcib_if.h acpi_if.h vnode_if.h
-DPSRCS+=	vmx_assym.h svm_assym.h
-DPSRCS+=	vmx_genassym.c svm_genassym.c offset.inc
+SRCS=	opt_acpi.h opt_ddb.h device_if.h bus_if.h pci_if.h pcib_if.h acpi_if.h
 
-CFLAGS+= -DVMM_KEEP_STATS
-CFLAGS+= -I${SRCTOP}/sys/amd64/vmm
-CFLAGS+= -I${SRCTOP}/sys/amd64/vmm/io
-CFLAGS+= -I${SRCTOP}/sys/amd64/vmm/intel
-CFLAGS+= -I${SRCTOP}/sys/amd64/vmm/amd
+CFLAGS+= -DVMM_KEEP_STATS -DSMP
 
-# generic vmm support
-.PATH: ${SRCTOP}/sys/amd64/vmm
-SRCS+=	vmm.c		\
-	vmm_dev.c	\
-	vmm_host.c	\
-	vmm_instruction_emul.c	\
-	vmm_ioport.c	\
-	vmm_lapic.c	\
-	vmm_mem.c	\
-	vmm_stat.c	\
-	vmm_util.c	\
-	x86.c
-
-.PATH: ${SRCTOP}/sys/amd64/vmm/io
-SRCS+=	iommu.c		\
-	ppt.c           \
-	vatpic.c	\
-	vatpit.c	\
-	vhpet.c		\
-	vioapic.c	\
-	vlapic.c	\
-	vpmtmr.c	\
-	vrtc.c
-
-# intel-specific files
-.PATH: ${SRCTOP}/sys/amd64/vmm/intel
-SRCS+=	ept.c		\
-	vmcs.c		\
-	vmx_msr.c	\
-	vmx_support.S	\
-	vmx.c		\
-	vtd.c
-
-# amd-specific files
-.PATH: ${SRCTOP}/sys/amd64/vmm/amd
-SRCS+=	vmcb.c		\
-	amdiommu.c	\
-	ivhd_if.c	\
-	ivhd_if.h	\
-	svm.c		\
-	svm_support.S	\
-	npt.c		\
-	ivrs_drv.c	\
-	amdvi_hw.c	\
-	svm_msr.c
-
-.if ${KERN_OPTS:MBHYVE_SNAPSHOT} != ""
-SRCS+=	vmm_snapshot.c
-.endif
-
-CLEANFILES=	vmx_assym.h vmx_genassym.o svm_assym.h svm_genassym.o
-
-OBJS_DEPEND_GUESS.vmx_support.o+= vmx_assym.h
-OBJS_DEPEND_GUESS.svm_support.o+= svm_assym.h
-
-vmx_assym.h:    vmx_genassym.o
-	sh ${SYSDIR}/kern/genassym.sh vmx_genassym.o > ${.TARGET}
-
-svm_assym.h:    svm_genassym.o
-	sh ${SYSDIR}/kern/genassym.sh svm_genassym.o > ${.TARGET}
-
-vmx_support.o:
-	${CC} -c -x assembler-with-cpp -DLOCORE ${CFLAGS} \
-	    ${.IMPSRC} -o ${.TARGET}
-
-svm_support.o:
-	${CC} -c -x assembler-with-cpp -DLOCORE ${CFLAGS} \
-	    ${.IMPSRC} -o ${.TARGET}
-
-vmx_genassym.o: offset.inc
-	${CC} -c ${CFLAGS:N-flto:N-fno-common} -fcommon ${.IMPSRC}
-
-svm_genassym.o: offset.inc
-	${CC} -c ${CFLAGS:N-flto:N-fno-common} -fcommon ${.IMPSRC}
+.include <bsd.arch.inc.mk>
 
 .include <bsd.kmod.mk>
Index: sys/modules/vmm/Makefile.amd64
===================================================================
--- /dev/null
+++ sys/modules/vmm/Makefile.amd64
@@ -0,0 +1,89 @@
+# $FreeBSD$
+
+KMOD=	vmm
+
+SRCS=	opt_acpi.h opt_bhyve_snapshot.h opt_ddb.h
+SRCS+=	device_if.h bus_if.h pci_if.h pcib_if.h acpi_if.h vnode_if.h
+DPSRCS+=	vmx_assym.h svm_assym.h
+DPSRCS+=	vmx_genassym.c svm_genassym.c offset.inc
+
+CFLAGS+= -DVMM_KEEP_STATS
+CFLAGS+= -I${SRCTOP}/sys/amd64/vmm
+CFLAGS+= -I${SRCTOP}/sys/amd64/vmm/io
+CFLAGS+= -I${SRCTOP}/sys/amd64/vmm/intel
+CFLAGS+= -I${SRCTOP}/sys/amd64/vmm/amd
+
+# generic vmm support
+.PATH: ${SRCTOP}/sys/amd64/vmm
+SRCS+=	vmm.c		\
+	vmm_dev.c	\
+	vmm_host.c	\
+	vmm_instruction_emul.c	\
+	vmm_ioport.c	\
+	vmm_lapic.c	\
+	vmm_mem.c	\
+	vmm_stat.c	\
+	vmm_util.c	\
+	x86.c
+
+.PATH: ${SRCTOP}/sys/amd64/vmm/io
+SRCS+=	iommu.c		\
+	ppt.c           \
+	vatpic.c	\
+	vatpit.c	\
+	vhpet.c		\
+	vioapic.c	\
+	vlapic.c	\
+	vpmtmr.c	\
+	vrtc.c
+
+# intel-specific files
+.PATH: ${SRCTOP}/sys/amd64/vmm/intel
+SRCS+=	ept.c		\
+	vmcs.c		\
+	vmx_msr.c	\
+	vmx_support.S	\
+	vmx.c		\
+	vtd.c
+
+# amd-specific files
+.PATH: ${SRCTOP}/sys/amd64/vmm/amd
+SRCS+=	vmcb.c		\
+	amdiommu.c	\
+	ivhd_if.c	\
+	ivhd_if.h	\
+	svm.c		\
+	svm_support.S	\
+	npt.c		\
+	ivrs_drv.c	\
+	amdvi_hw.c	\
+	svm_msr.c
+
+.if ${KERN_OPTS:MBHYVE_SNAPSHOT} != ""
+SRCS+=	vmm_snapshot.c
+.endif
+
+CLEANFILES=	vmx_assym.h vmx_genassym.o svm_assym.h svm_genassym.o
+
+OBJS_DEPEND_GUESS.vmx_support.o+= vmx_assym.h
+OBJS_DEPEND_GUESS.svm_support.o+= svm_assym.h
+
+vmx_assym.h:    vmx_genassym.o
+	sh ${SYSDIR}/kern/genassym.sh vmx_genassym.o > ${.TARGET}
+
+svm_assym.h:    svm_genassym.o
+	sh ${SYSDIR}/kern/genassym.sh svm_genassym.o > ${.TARGET}
+
+vmx_support.o:
+	${CC} -c -x assembler-with-cpp -DLOCORE ${CFLAGS} \
+	    ${.IMPSRC} -o ${.TARGET}
+
+svm_support.o:
+	${CC} -c -x assembler-with-cpp -DLOCORE ${CFLAGS} \
+	    ${.IMPSRC} -o ${.TARGET}
+
+vmx_genassym.o: offset.inc
+	${CC} -c ${CFLAGS:N-flto:N-fno-common} -fcommon ${.IMPSRC}
+
+svm_genassym.o: offset.inc
+	${CC} -c ${CFLAGS:N-flto:N-fno-common} -fcommon ${.IMPSRC}
Index: sys/modules/vmm/Makefile.arm64
===================================================================
--- /dev/null
+++ sys/modules/vmm/Makefile.arm64
@@ -0,0 +1,30 @@
+DPSRCS+=	hyp_assym.h
+DPSRCS+=	hyp_genassym.c
+
+CFLAGS+= -I${.CURDIR}/../../arm64/vmm -I${.CURDIR}/../../arm64/include
+
+# generic vmm support
+.PATH: ${.CURDIR}/../../arm64/vmm
+SRCS+=	vmm.c		\
+	vmm_dev.c	\
+	vmm_instruction_emul.c	\
+	vmm_mem.c	\
+	mmu.c		\
+	vmm_stat.c	\
+	arm64.c		\
+	psci.c		\
+	reset.c		\
+	hyp.S
+
+.PATH: ${.CURDIR}/../../arm64/vmm/io
+SRCS+=  vgic_v3.c	\
+	vgic_v3_mmio.c	\
+	vtimer.c
+
+CLEANFILES=	hyp_assym.h hyp_genassym.o
+
+hyp_assym.h:    hyp_genassym.o
+	sh ${SYSDIR}/kern/genassym.sh hyp_genassym.o > ${.TARGET}
+
+hyp_genassym.o:
+	${CC} -c ${CFLAGS:N-flto:N-fno-common} -fcommon ${.IMPSRC}
Index: usr.sbin/Makefile.arm64
===================================================================
--- usr.sbin/Makefile.arm64
+++ usr.sbin/Makefile.arm64
@@ -4,3 +4,9 @@
 SUBDIR+=	acpi
 .endif
 SUBDIR+=	ofwdump
+
+.if ${MK_BHYVE} != "no"
+SUBDIR+=	bhyve
+SUBDIR+=	bhyveload
+SUBDIR+=	bhyvectl
+.endif
Index: usr.sbin/bhyve/Makefile
===================================================================
--- usr.sbin/bhyve/Makefile
+++ usr.sbin/bhyve/Makefile
@@ -3,134 +3,7 @@
 #
 
 .include <src.opts.mk>
-CFLAGS+=-I${.CURDIR}/../../contrib/lib9p
-CFLAGS+=-I${SRCTOP}/sys
-.PATH:  ${SRCTOP}/sys/cam/ctl
 
-PROG=	bhyve
-PACKAGE=	bhyve
-
-MAN=	bhyve.8 bhyve_config.5
-
-BHYVE_SYSDIR?=${SRCTOP}
-
-SRCS=	\
-	atkbdc.c		\
-	acpi.c			\
-	audio.c			\
-	bhyvegc.c		\
-	bhyverun.c		\
-	block_if.c		\
-	bootrom.c		\
-	config.c		\
-	console.c		\
-	ctl_util.c		\
-	ctl_scsi_all.c		\
-	fwctl.c			\
-	gdb.c			\
-	hda_codec.c		\
-	inout.c			\
-	ioapic.c		\
-	kernemu_dev.c		\
-	mem.c			\
-	mevent.c		\
-	mptbl.c			\
-	net_backends.c		\
-	net_utils.c		\
-	pci_ahci.c		\
-	pci_e82545.c		\
-	pci_emul.c		\
-	pci_hda.c		\
-	pci_fbuf.c		\
-	pci_hostbridge.c	\
-	pci_irq.c		\
-	pci_lpc.c		\
-	pci_nvme.c		\
-	pci_passthru.c		\
-	pci_virtio_9p.c		\
-	pci_virtio_block.c	\
-	pci_virtio_console.c	\
-	pci_virtio_input.c	\
-	pci_virtio_net.c	\
-	pci_virtio_rnd.c	\
-	pci_virtio_scsi.c	\
-	pci_uart.c		\
-	pci_xhci.c		\
-	pctestdev.c		\
-	pm.c			\
-	post.c			\
-	ps2kbd.c		\
-	ps2mouse.c		\
-	rfb.c			\
-	rtc.c			\
-	smbiostbl.c		\
-	sockstream.c		\
-	task_switch.c		\
-	uart_emul.c		\
-	usb_emul.c		\
-	usb_mouse.c		\
-	virtio.c		\
-	vga.c			\
-	vmgenc.c		\
-	xmsr.c			\
-	spinup_ap.c		\
-	iov.c
-
-.if ${MK_BHYVE_SNAPSHOT} != "no"
-SRCS+=	snapshot.c
-.endif
-
-CFLAGS.kernemu_dev.c+=	-I${SRCTOP}/sys/amd64
-
-.PATH:  ${BHYVE_SYSDIR}/sys/amd64/vmm
-SRCS+=	vmm_instruction_emul.c
-
-LIBADD=	vmmapi md nv pthread z util sbuf cam 9p
-
-.if ${MK_CASPER} != "no"
-LIBADD+=	casper
-LIBADD+=	cap_pwd
-LIBADD+=	cap_grp
-# Temporary disable capsicum, until we integrate checkpoint code with it.
-#CFLAGS+=-DWITH_CASPER
-.endif
-
-.if ${MK_BHYVE_SNAPSHOT} != "no"
-LIBADD+= ucl xo
-.endif
-
-.if ${MK_INET_SUPPORT} != "no"
-CFLAGS+=-DINET
-.endif
-.if ${MK_INET6_SUPPORT} != "no"
-CFLAGS+=-DINET6
-.endif
-.if ${MK_NETGRAPH_SUPPORT} != "no"
-CFLAGS+=-DNETGRAPH
-LIBADD+=    netgraph
-.endif
-.if ${MK_OPENSSL} == "no"
-CFLAGS+=-DNO_OPENSSL
-.else
-LIBADD+=	crypto
-.endif
-
-CFLAGS+= -I${BHYVE_SYSDIR}/sys/dev/e1000
-CFLAGS+= -I${BHYVE_SYSDIR}/sys/dev/mii
-CFLAGS+= -I${BHYVE_SYSDIR}/sys/dev/usb/controller
-.if ${MK_BHYVE_SNAPSHOT} != "no"
-CFLAGS+= -I${SRCTOP}/contrib/libucl/include
-
-# Temporary disable capsicum, until we integrate checkpoint code with it.
-CFLAGS+= -DWITHOUT_CAPSICUM
-
-CFLAGS+= -DBHYVE_SNAPSHOT
-.endif
-
-.ifdef GDB_LOG
-CFLAGS+=-DGDB_LOG
-.endif
-
-WARNS?=	2
+.include <bsd.arch.inc.mk>
 
 .include <bsd.prog.mk>
Index: usr.sbin/bhyve/Makefile.amd64
===================================================================
--- /dev/null
+++ usr.sbin/bhyve/Makefile.amd64
@@ -0,0 +1,132 @@
+#
+# $FreeBSD$
+#
+
+CFLAGS+=-I${.CURDIR}/../../contrib/lib9p
+CFLAGS+=-I${SRCTOP}/sys
+.PATH:  ${SRCTOP}/sys/cam/ctl
+
+PROG=	bhyve
+PACKAGE=	bhyve
+
+MAN=	bhyve.8 bhyve_config.5
+
+BHYVE_SYSDIR?=${SRCTOP}
+
+SRCS=	\
+	atkbdc.c		\
+	acpi.c			\
+	audio.c			\
+	bhyvegc.c		\
+	bhyverun.c		\
+	block_if.c		\
+	bootrom.c		\
+	config.c		\
+	console.c		\
+	ctl_util.c		\
+	ctl_scsi_all.c		\
+	fwctl.c			\
+	gdb.c			\
+	hda_codec.c		\
+	inout.c			\
+	ioapic.c		\
+	kernemu_dev.c		\
+	mem.c			\
+	mevent.c		\
+	mptbl.c			\
+	net_backends.c		\
+	net_utils.c		\
+	pci_ahci.c		\
+	pci_e82545.c		\
+	pci_emul.c		\
+	pci_hda.c		\
+	pci_fbuf.c		\
+	pci_hostbridge.c	\
+	pci_irq.c		\
+	pci_lpc.c		\
+	pci_nvme.c		\
+	pci_passthru.c		\
+	pci_virtio_9p.c		\
+	pci_virtio_block.c	\
+	pci_virtio_console.c	\
+	pci_virtio_net.c	\
+	pci_virtio_rnd.c	\
+	pci_virtio_scsi.c	\
+	pci_uart.c		\
+	pci_xhci.c		\
+	pctestdev.c		\
+	pm.c			\
+	post.c			\
+	ps2kbd.c		\
+	ps2mouse.c		\
+	rfb.c			\
+	rtc.c			\
+	smbiostbl.c		\
+	sockstream.c		\
+	task_switch.c		\
+	uart_emul.c		\
+	usb_emul.c		\
+	usb_mouse.c		\
+	virtio.c		\
+	vga.c			\
+	vmgenc.c		\
+	xmsr.c			\
+	spinup_ap.c		\
+	iov.c
+
+.if ${MK_BHYVE_SNAPSHOT} != "no"
+SRCS+=	snapshot.c
+.endif
+
+CFLAGS.kernemu_dev.c+=	-I${SRCTOP}/sys/amd64
+
+.PATH:  ${BHYVE_SYSDIR}/sys/amd64/vmm
+SRCS+=	vmm_instruction_emul.c
+
+LIBADD=	vmmapi md nv pthread z util sbuf cam 9p
+
+.if ${MK_CASPER} != "no"
+LIBADD+=	casper
+LIBADD+=	cap_pwd
+LIBADD+=	cap_grp
+# Temporary disable capsicum, until we integrate checkpoint code with it.
+#CFLAGS+=-DWITH_CASPER
+.endif
+
+.if ${MK_BHYVE_SNAPSHOT} != "no"
+LIBADD+= ucl xo
+.endif
+
+.if ${MK_INET_SUPPORT} != "no"
+CFLAGS+=-DINET
+.endif
+.if ${MK_INET6_SUPPORT} != "no"
+CFLAGS+=-DINET6
+.endif
+.if ${MK_NETGRAPH_SUPPORT} != "no"
+CFLAGS+=-DNETGRAPH
+LIBADD+=    netgraph
+.endif
+.if ${MK_OPENSSL} == "no"
+CFLAGS+=-DNO_OPENSSL
+.else
+LIBADD+=	crypto
+.endif
+
+CFLAGS+= -I${BHYVE_SYSDIR}/sys/dev/e1000
+CFLAGS+= -I${BHYVE_SYSDIR}/sys/dev/mii
+CFLAGS+= -I${BHYVE_SYSDIR}/sys/dev/usb/controller
+.if ${MK_BHYVE_SNAPSHOT} != "no"
+CFLAGS+= -I${SRCTOP}/contrib/libucl/include
+
+# Temporary disable capsicum, until we integrate checkpoint code with it.
+CFLAGS+= -DWITHOUT_CAPSICUM
+
+CFLAGS+= -DBHYVE_SNAPSHOT
+.endif
+
+.ifdef GDB_LOG
+CFLAGS+=-DGDB_LOG
+.endif
+
+WARNS?=	2
Index: usr.sbin/bhyve/Makefile.arm64
===================================================================
--- /dev/null
+++ usr.sbin/bhyve/Makefile.arm64
@@ -0,0 +1,53 @@
+#
+# $FreeBSD$
+#
+
+CFLAGS+=-I${SRCTOP}/sys
+.PATH:  ${SRCTOP}/sys/cam/ctl
+
+PROG=	bhyve
+PACKAGE=	bhyve
+
+MAN=	bhyve.8
+
+BHYVE_SYSDIR?=${SRCTOP}
+BHYVE_SRCTOP?=${.CURDIR}
+
+SRCS=	\
+	iov.c			\
+	mevent.c		\
+	sockstream.c
+
+CFLAGS+=	-DWITHOUT_CAPSICUM
+.include "${BHYVE_SRCTOP}/arm64/Makefile.inc"
+.include "${BHYVE_SRCTOP}/mmio/Makefile.inc"
+
+LIBADD= vmmapi md pthread
+
+.if ${MK_INET_SUPPORT} != "no"
+CFLAGS+=-DINET
+.endif
+.if ${MK_INET6_SUPPORT} != "no"
+CFLAGS+=-DINET6
+.endif
+.if ${MK_NETGRAPH_SUPPORT} != "no"
+CFLAGS+=-DNETGRAPH
+LIBADD+=    netgraph
+.endif
+.if ${MK_OPENSSL} == "no"
+CFLAGS+=-DNO_OPENSSL
+.endif
+
+.PATH:	${BHYVE_SYSDIR}/sys/arm64/vmm
+SRCS+=	vmm_instruction_emul.c
+
+CFLAGS+=	-I${BHYVE_SRCTOP}
+CFLAGS+=	-I${BHYVE_SRCTOP}/arm64
+CFLAGS+= 	-I${BHYVE_SYSDIR}/sys/dev/virtio
+CFLAGS+= 	-I${BHYVE_SYSDIR}/sys/dev/virtio/console
+
+.ifdef GDB_LOG
+CFLAGS+=-DGDB_LOG
+.endif
+
+WARNS?=	2
Index: usr.sbin/bhyve/arm64/Makefile.inc
===================================================================
--- /dev/null
+++ usr.sbin/bhyve/arm64/Makefile.inc
@@ -0,0 +1,17 @@
+#
+# $FreeBSD$
+#
+.PATH: ${BHYVE_SRCTOP}/arm64/
+SRCS+=				\
+	arm64/bhyverun.c 	\
+	arm64/bootrom.c		\
+	arm64/mem.c		\
+	arm64/reset.c
+
+.PATH:	${BHYVE_SYSDIR}/sys/${BHYVE_ARCH}/vmm
+
+MK_MAN=no
+
+BHYVE_BUS= mmio
+
+CFLAGS+= 	-I${BHYVE_SYSDIR}/sys/dev/virtio/mmio
Index: usr.sbin/bhyve/arm64/bhyverun.h
===================================================================
--- /dev/null
+++ usr.sbin/bhyve/arm64/bhyverun.h
@@ -0,0 +1,50 @@
+/*-
+ * Copyright (c) 2011 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: src/usr.sbin/bhyve/arm/bhyverun.h 4 2017-04-18 20:28:32Z mihai.carabas $
+ */
+
+#ifndef	_FBSDRUN_H_
+#define	_FBSDRUN_H_
+
+#ifndef CTASSERT		/* Allow lint to override */
+#define	CTASSERT(x)		_CTASSERT(x, __LINE__)
+#define	_CTASSERT(x, y)		__CTASSERT(x, y)
+#define	__CTASSERT(x, y)	typedef char __assert ## y[(x) ? 1 : -1]
+#endif
+
+struct vmctx;
+extern int guest_ncpus;
+extern char *vmname;
+
+void *paddr_guest2host(struct vmctx *ctx, uintptr_t addr, size_t len);
+
+void fbsdrun_addcpu(struct vmctx *ctx, int oldcpu, int cpu, uint64_t rip);
+int  fbsdrun_muxed(void);
+int  fbsdrun_vmexit_on_hlt(void);
+int  fbsdrun_vmexit_on_pause(void);
+int  fbsdrun_disable_x2apic(void);
+int  fbsdrun_virtio_msix(void);
+#endif
Index: usr.sbin/bhyve/arm64/bhyverun.c
===================================================================
--- /dev/null
+++ usr.sbin/bhyve/arm64/bhyverun.c
@@ -0,0 +1,541 @@
+/*
+ * Copyright (C) 2015-2021 Mihai Carabas <mihai.carabas@gmail.com>
+ * Copyright (C) 2017-2019 Alexandru Elisei <alexandru.elisei@gmail.com>
+ * Copyright (C) 2017-2021 Darius Mihai <darius.mihai.m@gmail.com>
+ * Copyright (C) 2019-2021 Andrei Martin <andrei.cos.martin@gmail.com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/types.h>
+#include <sys/mman.h>
+#include <sys/time.h>
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdbool.h>
+#include <libgen.h>
+#include <unistd.h>
+#include <assert.h>
+#include <errno.h>
+#include <err.h>
+#include <pthread.h>
+#include <pthread_np.h>
+#include <sysexits.h>
+#include <vmmapi.h>
+#include <dev/psci/psci.h>
+
+#include <machine/vmm.h>
+#include <machine/atomic.h>
+
+#include "bhyverun.h"
+#include "../mmio/mmio_emul.h"
+#include "../mmio/mmio_irq.h"
+#include "mem.h"
+#include "mevent.h"
+#include "bootrom.h"
+
+/* Exit codes. */
+#define	EXIT_REBOOT	0
+#define EXIT_POWEROFF	1
+#define	EXIT_HALT	2
+#define EXIT_ERROR	4
+
+#define GUEST_NIO_PORT	0x488	/* guest upcalls via i/o port */
+
+#define	VMEXIT_SWITCH	0	/* force vcpu switch in mux mode */
+#define	VMEXIT_CONTINUE	1	/* continue from next instruction */
+#define	VMEXIT_RESTART	2	/* restart current instruction */
+#define	VMEXIT_ABORT	3	/* abort the vm run loop */
+#define	VMEXIT_RESET	4	/* guest machine has reset */
+
+#define MB		(1024UL * 1024)
+#define GB		(1024UL * MB)
+
+#define GIC_V3_DIST_START	0x2f000000UL
+#define GIC_V3_DIST_SIZE	0x10000UL
+#define GIC_V3_REDIST_START	0x2f100000UL
+#define GIC_V3_REDIST_SIZE	0x200000UL
+
+#define	FILE_LEN	256
+
+typedef int (*vmexit_handler_t)(struct vmctx *, struct vm_exit *, int *vcpu);
+
+char *vmname;
+
+int guest_ncpus;
+
+int raw_stdio = 0;
+
+static int foundcpus;
+
+static char *progname;
+static const int BSP = 0;
+
+static cpuset_t cpumask;
+
+static void vm_loop(struct vmctx *ctx, int vcpu, uint64_t pc);
+
+struct vm_exit vmexit[VM_MAXCPU];
+
+struct bhyvestats {
+        uint64_t        vmexit_bogus;
+        uint64_t        vmexit_inst_emul;
+} stats;
+
+struct mt_vmm_info {
+	pthread_t	mt_thr;
+	struct vmctx	*mt_ctx;
+	int		mt_vcpu;
+} mt_vmm_info[VM_MAXCPU];
+
+static cpuset_t *vcpumap[VM_MAXCPU] = { NULL };
+
+static void
+usage(int code)
+{
+
+        fprintf(stderr,
+                "Usage: %s [-bh] [-c vcpus] [-p pincpu] [-s <devemu>] [-l bootrom]"
+		"<vmname>\n"
+		"       -c: # cpus (default 1)\n"
+		"       -p: pin vcpu 'n' to host cpu 'pincpu + n'\n"
+		"       -s: device emulation config\n"
+		"       -l: bootrom file\n"
+		"       -h: help\n",
+		progname);
+
+	exit(code);
+}
+
+static int
+pincpu_parse(const char *opt)
+{
+	int vcpu, pcpu;
+
+	if (sscanf(opt, "%d:%d", &vcpu, &pcpu) != 2) {
+		fprintf(stderr, "invalid format: %s\n", opt);
+		return (-1);
+	}
+
+	if (vcpu < 0 || vcpu >= VM_MAXCPU) {
+		fprintf(stderr, "vcpu '%d' outside valid range from 0 to %d\n",
+		    vcpu, VM_MAXCPU - 1);
+		return (-1);
+	}
+
+	if (pcpu < 0 || pcpu >= CPU_SETSIZE) {
+		fprintf(stderr, "hostcpu '%d' outside valid range from "
+		    "0 to %d\n", pcpu, CPU_SETSIZE - 1);
+		return (-1);
+	}
+
+	if (vcpumap[vcpu] == NULL) {
+		if ((vcpumap[vcpu] = malloc(sizeof(cpuset_t))) == NULL) {
+			perror("malloc");
+			return (-1);
+		}
+		CPU_ZERO(vcpumap[vcpu]);
+	}
+	CPU_SET(pcpu, vcpumap[vcpu]);
+	return (0);
+}
+
+void *
+paddr_guest2host(struct vmctx *ctx, uintptr_t iaddr, size_t len)
+{
+
+	return (vm_map_ipa(ctx, iaddr, len));
+}
+
+int
+fbsdrun_virtio_msix(void)
+{
+
+	return 0;
+}
+
+static void *
+fbsdrun_start_thread(void *param)
+{
+	char tname[MAXCOMLEN + 1];
+	struct mt_vmm_info *mtp;
+	int vcpu;
+
+	mtp = param;
+	vcpu = mtp->mt_vcpu;
+
+	snprintf(tname, sizeof(tname), "%s vcpu %d", vmname, vcpu);
+	pthread_set_name_np(mtp->mt_thr, tname);
+
+	vm_loop(mtp->mt_ctx, vcpu, vmexit[vcpu].pc);
+
+	/* not reached */
+	return (NULL);
+}
+
+void
+fbsdrun_addcpu(struct vmctx *ctx, int fromcpu, int vcpu, uint64_t pc)
+{
+	int error;
+
+	assert(fromcpu == BSP);
+
+	error = vm_activate_cpu(ctx, vcpu);
+	if (error != 0)
+		err(EX_OSERR, "could not activate CPU %d", vcpu);
+
+	CPU_SET_ATOMIC(vcpu, &cpumask);
+	foundcpus++;
+
+	/*
+	 * Set up the vmexit struct to allow execution to start
+	 * at the given RIP
+	 */
+	vmexit[vcpu].pc = pc;
+	vmexit[vcpu].inst_length = 0;
+
+	mt_vmm_info[vcpu].mt_ctx = ctx;
+	mt_vmm_info[vcpu].mt_vcpu = vcpu;
+
+	error = pthread_create(&mt_vmm_info[vcpu].mt_thr, NULL,
+			fbsdrun_start_thread, &mt_vmm_info[vcpu]);
+	assert(error == 0);
+}
+
+static int
+fbsdrun_get_next_cpu(int curcpu)
+{
+
+	/*
+	 * Get the next available CPU. Assumes they arrive
+	 * in ascending order with no gaps.
+	 */
+	return ((curcpu + 1) % foundcpus);
+}
+
+static int
+vmexit_hyp(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
+{
+
+	fprintf(stderr, "vm exit[%d]\n", *pvcpu);
+	fprintf(stderr, "\treason\t\tHYP\n");
+	fprintf(stderr, "\tpc\t\t0x%016lx\n", vmexit->pc);
+	fprintf(stderr, "\tinst_length\t%d\n", vmexit->inst_length);
+
+	return (VMEXIT_ABORT);
+}
+
+static int
+vmexit_bogus(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
+{
+
+	stats.vmexit_bogus++;
+
+	return (VMEXIT_RESTART);
+}
+
+static int
+vmexit_inst_emul(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
+{
+	int err;
+	struct vie *vie;
+
+	stats.vmexit_inst_emul++;
+
+	vie = &vmexit->u.inst_emul.vie;
+	err = emulate_mem(ctx, *pvcpu, vmexit->u.inst_emul.gpa, vie);
+
+	if (err) {
+		if (err == ESRCH) {
+			fprintf(stderr, "Unhandled memory access to 0x%lx\n",
+			    vmexit->u.inst_emul.gpa);
+		}
+
+		fprintf(stderr, "Failed to emulate instruction at 0x%lx\n", vmexit->pc);
+		return (VMEXIT_ABORT);
+	}
+	return (VMEXIT_CONTINUE);
+}
+
+static int
+vmexit_suspend(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
+{
+	enum vm_suspend_how how;
+
+	how = vmexit->u.suspended.how;
+
+	switch (how) {
+	case VM_SUSPEND_POWEROFF:
+		exit(EXIT_POWEROFF);
+	case VM_SUSPEND_RESET:
+		exit(EXIT_REBOOT);
+	case VM_SUSPEND_HALT:
+		exit(EXIT_HALT);
+	case VM_SUSPEND_TRIPLEFAULT:
+		/* Not implemented yet. */
+		exit(EXIT_ERROR);
+	default:
+		fprintf(stderr, "vmexit_suspend: invalid or unimplemented reason %d\n", how);
+		exit(100);
+	}
+
+}
+
+static int
+vmexit_spinup_ap(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
+{
+	int error;
+	int newcpu = vmexit->u.spinup_ap.vcpu;
+	uint64_t pc = vmexit->u.spinup_ap.rip;
+	uint64_t ctx_id = vmexit->u.spinup_ap.ctx_id;
+
+	assert(newcpu != 0);
+	if (guest_ncpus == 1 && newcpu >= guest_ncpus) {
+		error = vm_set_register(ctx, *pvcpu, VM_REG_GUEST_X0,
+			    PSCI_RETVAL_DENIED);
+		assert(error == 0);
+		goto out;
+	}
+
+	error = vm_set_register(ctx, newcpu, VM_REG_GUEST_X0, ctx_id);
+	assert(error == 0);
+
+	error = vm_set_register(ctx, newcpu, VM_REG_ELR_EL2, pc);
+	assert(error == 0);
+
+	fbsdrun_addcpu(ctx, BSP, newcpu, pc);
+
+	error = vm_set_register(ctx, *pvcpu, VM_REG_GUEST_X0,
+		    PSCI_RETVAL_SUCCESS);
+	assert(error == 0);
+
+out:
+	return (VMEXIT_CONTINUE);
+}
+
+static vmexit_handler_t handler[VM_EXITCODE_MAX] = {
+	[VM_EXITCODE_BOGUS]  = vmexit_bogus,
+	[VM_EXITCODE_INST_EMUL] = vmexit_inst_emul,
+	[VM_EXITCODE_REG_EMUL] = vmexit_hyp,
+	[VM_EXITCODE_SUSPENDED] = vmexit_suspend,
+	[VM_EXITCODE_HYP]    = vmexit_hyp,
+	[VM_EXITCODE_SPINUP_AP] = vmexit_spinup_ap,
+};
+
+static void
+vm_loop(struct vmctx *ctx, int vcpu, uint64_t pc)
+{
+	int error, rc, prevcpu;
+	enum vm_exitcode exitcode;
+
+	if (vcpumap[vcpu] != NULL) {
+		error = pthread_setaffinity_np(pthread_self(),
+		    sizeof(cpuset_t), vcpumap[vcpu]);
+		assert(error == 0);
+	}
+
+	while (1) {
+
+		error = vm_run(ctx, vcpu, pc, &vmexit[vcpu]);
+
+		if (error != 0) {
+			/*
+			 * It is possible that 'vmmctl' or some other process
+			 * has transitioned the vcpu to CANNOT_RUN state right
+			 * before we tried to transition it to RUNNING.
+			 *
+			 * This is expected to be temporary so just retry.
+			 */
+			if (errno == EBUSY)
+				continue;
+			else
+				break;
+		}
+
+		prevcpu = vcpu;
+
+		exitcode = vmexit[vcpu].exitcode;
+		if (exitcode >= VM_EXITCODE_MAX || handler[exitcode] == NULL) {
+			fprintf(stderr, "vm_loop: unexpected exitcode 0x%x\n",
+			    exitcode);
+			exit(4);
+		}
+
+                rc = (*handler[exitcode])(ctx, &vmexit[vcpu], &vcpu);
+
+		switch (rc) {
+		case VMEXIT_CONTINUE:
+                        pc = vmexit[vcpu].pc + vmexit[vcpu].inst_length;
+			break;
+		case VMEXIT_RESTART:
+                        pc = vmexit[vcpu].pc;
+			break;
+		case VMEXIT_RESET:
+			exit(0);
+		default:
+			exit(4);
+		}
+	}
+	fprintf(stderr, "vm_run error %d, errno %d\n", error, errno);
+}
+
+static int
+num_vcpus_allowed(struct vmctx *ctx)
+{
+	return (VM_MAXCPU);
+}
+
+int
+main(int argc, char *argv[])
+{
+	int c, error;
+	int max_vcpus;
+	struct vmctx *ctx;
+	uint64_t pc;
+	uint64_t memory_base_address, mem_size;
+	char bootrom_file[FILE_LEN];
+	bool bootrom;
+
+	bootrom = false;
+	memory_base_address = VM_GUEST_BASE_IPA;
+	mem_size = 128 * MB;
+	progname = basename(argv[0]);
+	guest_ncpus = 1;
+
+	while ((c = getopt(argc, argv, "bhcp:s:e:m:l:")) != -1) {
+		switch (c) {
+		case 'e':
+			error = vm_parse_memsize(optarg, &memory_base_address);
+			if (error) {
+				fprintf(stderr, "Invalid memaddr '%s'\n", optarg);
+				exit(1);
+			}
+			break;
+		case 'p':
+                        if (pincpu_parse(optarg) != 0) {
+                            errx(EX_USAGE, "invalid vcpu pinning "
+                                 "configuration '%s'", optarg);
+                        }
+			break;
+                case 'c':
+			guest_ncpus = VM_MAXCPU;
+			break;
+		case 'm':
+			error = vm_parse_memsize(optarg, &mem_size);
+			if (error) {
+				fprintf(stderr, "Invalid memsize '%s'\n", optarg);
+				exit(1);
+			}
+			break;
+		case 's':
+			if (mmio_parse_opts(optarg) != 0)
+				exit(1);
+			break;
+		case 'l':
+			bootrom = true;
+			strncpy(bootrom_file, optarg, FILE_LEN);
+			break;
+		case 'h':
+			usage(0);
+		default:
+			usage(4);
+		}
+	}
+	argc -= optind;
+	argv += optind;
+
+	if (argc != 1)
+		usage(4);
+
+	vmname = argv[0];
+
+	if (bootrom == true) {
+		error = vm_create(vmname);
+		if (error != 0) {
+			fprintf(stderr, "Failed to create vm\n");
+			exit(1);
+		}
+	}
+
+	/* The VM must be created by bhyveload first. */
+	ctx = vm_open(vmname);
+	if (ctx == NULL) {
+		perror("vm_open");
+		exit(1);
+	}
+
+	max_vcpus = num_vcpus_allowed(ctx);
+	if (guest_ncpus > max_vcpus) {
+		fprintf(stderr, "%d vCPUs requested but only %d available\n",
+			guest_ncpus, max_vcpus);
+		exit(1);
+	}
+
+	error = vm_setup_memory(ctx, memory_base_address, mem_size, VM_MMAP_ALL);
+	if (error != 0) {
+		fprintf(stderr, "Unable to setup memory (%d)\n", error);
+		exit(1);
+	}
+
+	init_mem();
+	mmio_irq_init(ctx);
+
+	if (init_mmio(ctx) != 0) {
+		fprintf(stderr, "Failed to initialize device emulation\n");
+		exit(1);
+	}
+
+	if (bootrom == true) {
+		pc = memory_base_address;
+		error = bootrom_loadrom(ctx, bootrom_file, &pc);
+		if (error) {
+			fprintf(stderr, "Error loading bootrom\n");
+			exit(1);
+		}
+
+		error = vm_attach_vgic(ctx, GIC_V3_DIST_START, GIC_V3_DIST_SIZE,
+				GIC_V3_REDIST_START, GIC_V3_REDIST_SIZE);
+		if (error) {
+			fprintf(stderr, "Error attaching VGIC to the virtual machine\n");
+			exit(1);
+		}
+
+		vm_set_register(ctx, BSP, VM_REG_ELR_EL2, pc);
+	}
+
+	error = vm_get_register(ctx, BSP, VM_REG_ELR_EL2, &pc);
+	assert(error == 0);
+	/*
+	 * Add CPU 0
+	 */
+	fbsdrun_addcpu(ctx, BSP, BSP, pc);
+
+	/*
+	 * Head off to the main event dispatch loop
+	 */
+	mevent_dispatch();
+
+	exit(1);
+}
Index: usr.sbin/bhyve/arm64/bootrom.h
===================================================================
--- /dev/null
+++ usr.sbin/bhyve/arm64/bootrom.h
@@ -0,0 +1,44 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2015 Neel Natu <neel@freebsd.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef	_BOOTROM_H_
+#define	_BOOTROM_H_
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <limits.h>
+
+struct vmctx;
+
+
+int bootrom_alloc(struct vmctx *ctx, uint64_t *gpa, size_t len, char **base);
+int bootrom_loadrom(struct vmctx *ctx, const char *romfile, uint64_t *gpa);
+#endif
Index: usr.sbin/bhyve/arm64/bootrom.c
===================================================================
--- /dev/null
+++ usr.sbin/bhyve/arm64/bootrom.c
@@ -0,0 +1,120 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2015 Neel Natu <neel@freebsd.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/param.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/types.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+
+#include <machine/vmm.h>
+
+#include <err.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+#include <stdbool.h>
+
+#include <vmmapi.h>
+#include "bhyverun.h"
+#include "bootrom.h"
+#include "debug.h"
+
+int
+bootrom_alloc(struct vmctx *ctx, uint64_t *gpa, size_t len, char **base)
+{
+	if (len == 0) {
+		warnx("ROM size %zu is invalid", len);
+		return (EINVAL);
+	}
+
+	len = roundup2(len, PAGE_SIZE);
+	*base = vm_map_ipa(ctx, *gpa, len);
+
+	printf("%s: gpa=%#lx base=%#lx\n", __func__, *gpa, (uint64_t)*base);
+
+	return (0);
+}
+
+int
+bootrom_loadrom(struct vmctx *ctx, const char *romfile, uint64_t *gpa)
+{
+	struct stat sbuf;
+	ssize_t rlen;
+	char *base;
+	int fd, i, rv;
+
+	rv = -1;
+	fd = open(romfile, O_RDONLY);
+	if (fd < 0) {
+		EPRINTLN("Error opening bootrom \"%s\": %s",
+		    romfile, strerror(errno));
+		goto done;
+	}
+
+        if (fstat(fd, &sbuf) < 0) {
+		EPRINTLN("Could not fstat bootrom file \"%s\": %s",
+		    romfile, strerror(errno));
+		goto done;
+        }
+
+	/* Map the bootrom into the guest address space */
+	if (bootrom_alloc(ctx, gpa, sbuf.st_size, &base) != 0)
+		goto done;
+
+	/* Read 'romfile' into the guest address space */
+	for (i = 0; i < sbuf.st_size / PAGE_SIZE; i++) {
+		rlen = read(fd, base + i * PAGE_SIZE, PAGE_SIZE);
+		if (rlen != PAGE_SIZE) {
+			perror("read");
+			EPRINTLN("Incomplete read of page %d of bootrom "
+			    "file %s: %ld bytes", i, romfile, rlen);
+			goto done;
+		}
+	}
+
+	if (sbuf.st_size % PAGE_SIZE != 0) {
+		rlen = read(fd, base + i * PAGE_SIZE, sbuf.st_size % PAGE_SIZE);
+		if (rlen != sbuf.st_size % PAGE_SIZE) {
+			perror("read");
+			EPRINTLN("Incomplete read of page %d of bootrom "
+			    "file %s: %ld bytes", i, romfile, rlen);
+			goto done;
+		}
+	}
+
+	printf("Finished reading bootrom\n");
+	rv = 0;
+done:
+	if (fd >= 0)
+		close(fd);
+	return (rv);
+}
Index: usr.sbin/bhyve/arm64/mem.h
===================================================================
--- /dev/null
+++ usr.sbin/bhyve/arm64/mem.h
@@ -0,0 +1,58 @@
+/*-
+ * Copyright (c) 2012 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: src/usr.sbin/bhyve/arm/mem.h 38 2017-06-13 13:34:14Z darius.mihai $
+ */
+
+#ifndef _MEM_H_
+#define	_MEM_H_
+
+#include <sys/linker_set.h>
+
+struct vmctx;
+
+typedef int (*mem_func_t)(struct vmctx *ctx, int vcpu, int dir, uint64_t addr,
+			  int size, uint64_t *val, void *arg1, long arg2);
+
+struct mem_range {
+	const char 	*name;
+	int		flags;
+	mem_func_t	handler;
+	void		*arg1;
+	long		arg2;
+	uint64_t  	base;
+	uint64_t  	size;
+};
+#define	MEM_F_READ		0x1
+#define	MEM_F_WRITE		0x2
+#define	MEM_F_RW		0x3
+
+void	init_mem(void);
+int     emulate_mem(struct vmctx *, int vcpu, uint64_t paddr, void *vie);
+int	register_mem(struct mem_range *memp);
+int	register_mem_fallback(struct mem_range *memp);
+int	unregister_mem(struct mem_range *memp);
+
+#endif	/* _MEM_H_ */
Index: usr.sbin/bhyve/arm64/mem.c
===================================================================
--- /dev/null
+++ usr.sbin/bhyve/arm64/mem.c
@@ -0,0 +1,271 @@
+/*-
+ * Copyright (c) 2012 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: src/usr.sbin/bhyve/arm/mem.c 4 2017-04-18 20:28:32Z mihai.carabas $
+ */
+
+/*
+ * Memory ranges are represented with an RB tree. On insertion, the range
+ * is checked for overlaps. On lookup, the key has the same base and limit
+ * so it can be searched within the range.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/usr.sbin/bhyve/arm/mem.c 4 2017-04-18 20:28:32Z mihai.carabas $");
+
+#include <sys/types.h>
+#include <sys/tree.h>
+#include <sys/errno.h>
+#include <machine/vmm.h>
+#include <machine/vmm_instruction_emul.h>
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <pthread.h>
+
+#include "mem.h"
+
+struct mmio_rb_range {
+	RB_ENTRY(mmio_rb_range)	mr_link;	/* RB tree links */
+	struct mem_range	mr_param;
+	uint64_t                mr_base;
+	uint64_t                mr_end;
+};
+
+struct mmio_rb_tree;
+RB_PROTOTYPE(mmio_rb_tree, mmio_rb_range, mr_link, mmio_rb_range_compare);
+
+RB_HEAD(mmio_rb_tree, mmio_rb_range) mmio_rb_root, mmio_rb_fallback;
+
+/*
+ * Per-vCPU cache. Since most accesses from a vCPU will be to
+ * consecutive addresses in a range, it makes sense to cache the
+ * result of a lookup.
+ */
+static struct mmio_rb_range	*mmio_hint[VM_MAXCPU];
+
+static pthread_rwlock_t mmio_rwlock;
+
+static int
+mmio_rb_range_compare(struct mmio_rb_range *a, struct mmio_rb_range *b)
+{
+	if (a->mr_end < b->mr_base)
+		return (-1);
+	else if (a->mr_base > b->mr_end)
+		return (1);
+	return (0);
+}
+
+static int
+mmio_rb_lookup(struct mmio_rb_tree *rbt, uint64_t addr,
+    struct mmio_rb_range **entry)
+{
+	struct mmio_rb_range find, *res;
+
+	find.mr_base = find.mr_end = addr;
+
+	res = RB_FIND(mmio_rb_tree, rbt, &find);
+
+	if (res != NULL) {
+		*entry = res;
+		return (0);
+	}
+
+	return (ENOENT);
+}
+
+static int
+mmio_rb_add(struct mmio_rb_tree *rbt, struct mmio_rb_range *new)
+{
+	struct mmio_rb_range *overlap;
+
+	overlap = RB_INSERT(mmio_rb_tree, rbt, new);
+
+	if (overlap != NULL) {
+#ifdef RB_DEBUG
+		printf("overlap detected: new %lx:%lx, tree %lx:%lx\n",
+		       new->mr_base, new->mr_end,
+		       overlap->mr_base, overlap->mr_end);
+#endif
+
+		return (EEXIST);
+	}
+
+	return (0);
+}
+
+#if 0
+static void
+mmio_rb_dump(struct mmio_rb_tree *rbt)
+{
+	struct mmio_rb_range *np;
+
+	pthread_rwlock_rdlock(&mmio_rwlock);
+	RB_FOREACH(np, mmio_rb_tree, rbt) {
+		printf(" %lx:%lx, %s\n", np->mr_base, np->mr_end,
+		       np->mr_param.name);
+	}
+	pthread_rwlock_unlock(&mmio_rwlock);
+}
+#endif
+
+RB_GENERATE(mmio_rb_tree, mmio_rb_range, mr_link, mmio_rb_range_compare);
+
+static int
+mem_read(void *ctx, int vcpu, uint64_t gpa, uint64_t *rval, int size, void *arg)
+{
+	int error;
+	struct mem_range *mr = arg;
+
+	error = (*mr->handler)(ctx, vcpu, MEM_F_READ, gpa, size,
+			       rval, mr->arg1, mr->arg2);
+	return (error);
+}
+
+static int
+mem_write(void *ctx, int vcpu, uint64_t gpa, uint64_t wval, int size, void *arg)
+{
+	int error;
+	struct mem_range *mr = arg;
+
+	error = (*mr->handler)(ctx, vcpu, MEM_F_WRITE, gpa, size,
+			       &wval, mr->arg1, mr->arg2);
+	return (error);
+}
+
+int
+emulate_mem(struct vmctx *ctx, int vcpu, uint64_t paddr, void *vie)
+{
+	struct mmio_rb_range *entry;
+	int err;
+
+	pthread_rwlock_rdlock(&mmio_rwlock);
+	/*
+	 * First check the per-vCPU cache
+	 */
+	if (mmio_hint[vcpu] &&
+	    paddr >= mmio_hint[vcpu]->mr_base &&
+	    paddr <= mmio_hint[vcpu]->mr_end) {
+		entry = mmio_hint[vcpu];
+	} else
+		entry = NULL;
+
+	if (entry == NULL) {
+		if (mmio_rb_lookup(&mmio_rb_root, paddr, &entry) == 0) {
+			/* Update the per-vCPU cache */
+			mmio_hint[vcpu] = entry;
+		} else if (mmio_rb_lookup(&mmio_rb_fallback, paddr, &entry)) {
+			pthread_rwlock_unlock(&mmio_rwlock);
+			return (ESRCH);
+		}
+	}
+
+	assert(entry != NULL);
+	assert(NULL == NULL);
+	err = vmm_emulate_instruction(ctx, vcpu, paddr, vie,
+	      mem_read, mem_write, &entry->mr_param);
+
+	pthread_rwlock_unlock(&mmio_rwlock);
+
+	return (err);
+}
+
+static int
+register_mem_int(struct mmio_rb_tree *rbt, struct mem_range *memp)
+{
+	struct mmio_rb_range *entry, *mrp;
+	int		err;
+
+	err = 0;
+
+	mrp = malloc(sizeof(struct mmio_rb_range));
+
+	if (mrp != NULL) {
+		mrp->mr_param = *memp;
+		mrp->mr_base = memp->base;
+		mrp->mr_end = memp->base + memp->size - 1;
+		pthread_rwlock_wrlock(&mmio_rwlock);
+		if (mmio_rb_lookup(rbt, memp->base, &entry) != 0)
+			err = mmio_rb_add(rbt, mrp);
+		pthread_rwlock_unlock(&mmio_rwlock);
+		if (err)
+			free(mrp);
+	} else
+		err = ENOMEM;
+
+	return (err);
+}
+
+int
+register_mem(struct mem_range *memp)
+{
+
+	return (register_mem_int(&mmio_rb_root, memp));
+}
+
+int
+register_mem_fallback(struct mem_range *memp)
+{
+
+	return (register_mem_int(&mmio_rb_fallback, memp));
+}
+
+int
+unregister_mem(struct mem_range *memp)
+{
+	struct mem_range *mr;
+	struct mmio_rb_range *entry = NULL;
+	int err, i;
+
+	pthread_rwlock_wrlock(&mmio_rwlock);
+	err = mmio_rb_lookup(&mmio_rb_root, memp->base, &entry);
+	if (err == 0) {
+		mr = &entry->mr_param;
+		assert(mr->name == memp->name);
+		assert(mr->base == memp->base && mr->size == memp->size);
+		RB_REMOVE(mmio_rb_tree, &mmio_rb_root, entry);
+
+		/* flush Per-vCPU cache */
+		for (i=0; i < VM_MAXCPU; i++) {
+			if (mmio_hint[i] == entry)
+				mmio_hint[i] = NULL;
+		}
+	}
+	pthread_rwlock_unlock(&mmio_rwlock);
+
+	if (entry)
+		free(entry);
+
+	return (err);
+}
+
+void
+init_mem(void)
+{
+	RB_INIT(&mmio_rb_root);
+	RB_INIT(&mmio_rb_fallback);
+	pthread_rwlock_init(&mmio_rwlock, NULL);
+}
Index: usr.sbin/bhyve/arm64/mevent_test.c
===================================================================
--- /dev/null
+++ usr.sbin/bhyve/arm64/mevent_test.c
@@ -0,0 +1,256 @@
+/*-
+ * Copyright (c) 2011 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: src/usr.sbin/bhyve/arm/mevent_test.c 4 2017-04-18 20:28:32Z mihai.carabas $
+ */
+
+/*
+ * Test program for the micro event library. Set up a simple TCP echo
+ * service.
+ *
+ *  cc mevent_test.c mevent.c -lpthread
+ */
+
+#include <sys/types.h>
+#include <sys/stdint.h>
+#include <sys/sysctl.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <machine/cpufunc.h>
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <pthread.h>
+#include <unistd.h>
+
+#include "mevent.h"
+
+#define TEST_PORT	4321
+
+static pthread_mutex_t accept_mutex = PTHREAD_MUTEX_INITIALIZER;
+static pthread_cond_t accept_condvar = PTHREAD_COND_INITIALIZER;
+
+static struct mevent *tevp;
+
+char *vmname = "test vm";
+
+
+#define MEVENT_ECHO
+
+/* Number of timer events to capture */
+#define TEVSZ	4096
+uint64_t tevbuf[TEVSZ];
+
+static void
+timer_print(void)
+{
+	uint64_t min, max, diff, sum, tsc_freq;
+	size_t len;
+	int j;
+
+	min = UINT64_MAX;
+	max = 0;
+	sum = 0;
+
+	len = sizeof(tsc_freq);
+	sysctlbyname("machdep.tsc_freq", &tsc_freq, &len, NULL, 0);
+
+	for (j = 1; j < TEVSZ; j++) {
+		/* Convert a tsc diff into microseconds */
+		diff = (tevbuf[j] - tevbuf[j-1]) * 1000000 / tsc_freq;
+		sum += diff;
+		if (min > diff)
+			min = diff;
+		if (max < diff)
+			max = diff;
+	}
+
+	printf("timers done: usecs, min %ld, max %ld, mean %ld\n", min, max,
+	    sum/(TEVSZ - 1));
+}
+
+static void
+timer_callback(int fd, enum ev_type type, void *param)
+{
+	static int i;
+
+	if (i >= TEVSZ)
+		abort();
+
+	tevbuf[i++] = rdtsc();
+
+	if (i == TEVSZ) {
+		mevent_delete(tevp);
+		timer_print();
+	}
+}
+
+
+#ifdef MEVENT_ECHO
+struct esync {
+	pthread_mutex_t	e_mt;
+	pthread_cond_t	e_cond;
+};
+
+static void
+echoer_callback(int fd, enum ev_type type, void *param)
+{
+	struct esync *sync = param;
+
+	pthread_mutex_lock(&sync->e_mt);
+	pthread_cond_signal(&sync->e_cond);
+	pthread_mutex_unlock(&sync->e_mt);
+}
+
+static void *
+echoer(void *param)
+{
+	struct esync sync;
+	struct mevent *mev;
+	char buf[128];
+	int fd = (int)(uintptr_t) param;
+	int len;
+
+	pthread_mutex_init(&sync.e_mt, NULL);
+	pthread_cond_init(&sync.e_cond, NULL);
+
+	pthread_mutex_lock(&sync.e_mt);
+
+	mev = mevent_add(fd, EVF_READ, echoer_callback, &sync);
+	if (mev == NULL) {
+		printf("Could not allocate echoer event\n");
+		exit(1);
+	}
+
+	while (!pthread_cond_wait(&sync.e_cond, &sync.e_mt)) {
+		len = read(fd, buf, sizeof(buf));
+		if (len > 0) {
+			write(fd, buf, len);
+			write(0, buf, len);
+		} else {
+			break;
+		}
+	}
+
+	mevent_delete_close(mev);
+
+	pthread_mutex_unlock(&sync.e_mt);
+	pthread_mutex_destroy(&sync.e_mt);
+	pthread_cond_destroy(&sync.e_cond);
+
+	return (NULL);
+}
+
+#else
+
+static void *
+echoer(void *param)
+{
+	char buf[128];
+	int fd = (int)(uintptr_t) param;
+	int len;
+
+	while ((len = read(fd, buf, sizeof(buf))) > 0) {
+		write(1, buf, len);
+	}
+
+	return (NULL);
+}
+#endif /* MEVENT_ECHO */
+
+static void
+acceptor_callback(int fd, enum ev_type type, void *param)
+{
+	pthread_mutex_lock(&accept_mutex);
+	pthread_cond_signal(&accept_condvar);
+	pthread_mutex_unlock(&accept_mutex);
+}
+
+static void *
+acceptor(void *param)
+{
+	struct sockaddr_in sin;
+	pthread_t tid;
+	int news;
+	int s;
+	static int first;
+
+        if ((s = socket(AF_INET, SOCK_STREAM, 0)) < 0) {
+                perror("socket");
+                exit(1);
+        }
+
+        sin.sin_len = sizeof(sin);
+        sin.sin_family = AF_INET;
+        sin.sin_addr.s_addr = htonl(INADDR_ANY);
+        sin.sin_port = htons(TEST_PORT);
+
+        if (bind(s, (struct sockaddr *)&sin, sizeof(sin)) < 0) {
+                perror("bind");
+                exit(1);
+        }
+
+        if (listen(s, 1) < 0) {
+                perror("listen");
+                exit(1);
+        }
+
+	(void) mevent_add(s, EVF_READ, acceptor_callback, NULL);
+
+	pthread_mutex_lock(&accept_mutex);
+
+	while (!pthread_cond_wait(&accept_condvar, &accept_mutex)) {
+		news = accept(s, NULL, NULL);
+		if (news < 0) {
+			perror("accept error");
+		} else {
+			static int first = 1;
+
+			if (first) {
+				/*
+				 * Start a timer
+				 */
+				first = 0;
+				tevp = mevent_add(1, EVF_TIMER, timer_callback,
+						  NULL);
+			}
+
+			printf("incoming connection, spawning thread\n");
+			pthread_create(&tid, NULL, echoer,
+				       (void *)(uintptr_t)news);
+		}
+	}
+
+	return (NULL);
+}
+
+main()
+{
+	pthread_t tid;
+
+	pthread_create(&tid, NULL, acceptor, NULL);
+
+	mevent_dispatch();
+}
Index: usr.sbin/bhyve/arm64/reset.h
===================================================================
--- /dev/null
+++ usr.sbin/bhyve/arm64/reset.h
@@ -0,0 +1,12 @@
+#ifndef _RESET_H_
+#define _RESET_H_
+
+#define RESET_MAGIC	0xDEAD9731
+
+#endif /* _RESET_H_ */
+#ifndef _RESET_H_
+#define _RESET_H_
+
+#define RESET_MAGIC	0xDEAD9731
+
+#endif /* _RESET_H_ */
Index: usr.sbin/bhyve/arm64/reset.c
===================================================================
--- /dev/null
+++ usr.sbin/bhyve/arm64/reset.c
@@ -0,0 +1,32 @@
+#include <stdio.h>
+#include <sys/types.h>
+
+#include "mem.h"
+#include "reset.h"
+#include "vmmapi.h"
+
+#define RESET_PORT	0x1c090100
+
+static int
+reset_handler(struct vmctx *ctx, int vcpu, int dir, uint64_t addr, int size, uint64_t *val, void *arg1, long arg2)
+{
+	vm_destroy(ctx);
+
+	return (RESET_MAGIC);
+}
+
+struct mem_range resetport ={
+	"reset",
+	0,
+	reset_handler,
+	NULL,
+	0,
+	RESET_PORT,
+	sizeof(int)
+};
+
+void
+init_reset(void)
+{
+	register_mem(&resetport);
+}
Index: usr.sbin/bhyve/block_if.c
===================================================================
--- usr.sbin/bhyve/block_if.c
+++ usr.sbin/bhyve/block_if.c
@@ -58,7 +58,10 @@
 #include <unistd.h>
 
 #include <machine/atomic.h>
+
+#ifdef BHYVE_SNAPSHOT
 #include <machine/vmm_snapshot.h>
+#endif
 
 #include "bhyverun.h"
 #include "config.h"
Index: usr.sbin/bhyve/mmio/Makefile.inc
===================================================================
--- /dev/null
+++ usr.sbin/bhyve/mmio/Makefile.inc
@@ -0,0 +1,23 @@
+#
+# $FreeBSD$
+#
+
+.PATH: ${BHYVE_SRCTOP}/mmio/
+SRCS+=	\
+	mmio/block_if.c	\
+	mmio/mmio_uart.c	\
+	mmio/mmio_virtio.c	\
+	mmio/mmio_virtio_block.c	\
+	mmio/mmio_virtio_console.c	\
+	mmio/mmio_virtio_net.c		\
+	mmio/mmio_virtio_rnd.c		\
+	mmio/mmio_emul.c		\
+	mmio/mmio_irq.c			\
+	mmio/net_backends.c	\
+	mmio/net_utils.c		\
+	mmio/pl011.c	\
+	mmio/uart_backend.c
+
+
+
+CFLAGS+=	-I${BHYVE_SRCTOP}/mmio
Index: usr.sbin/bhyve/mmio/block_if.h
===================================================================
--- /dev/null
+++ usr.sbin/bhyve/mmio/block_if.h
@@ -0,0 +1,89 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2013  Peter Grehan <grehan@freebsd.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+/*
+ * The block API to be used by bhyve block-device emulations. The routines
+ * are thread safe, with no assumptions about the context of the completion
+ * callback - it may occur in the caller's context, or asynchronously in
+ * another thread.
+ */
+
+#ifndef _BLOCK_IF_H_
+#define _BLOCK_IF_H_
+
+#include <sys/uio.h>
+#include <sys/unistd.h>
+
+struct vm_snapshot_meta;
+
+
+/*
+ * BLOCKIF_IOV_MAX is the maximum number of scatter/gather entries in
+ * a single request.  BLOCKIF_RING_MAX is the maxmimum number of
+ * pending requests that can be queued.
+ */
+#define	BLOCKIF_IOV_MAX		128	/* not practical to be IOV_MAX */
+#define	BLOCKIF_RING_MAX	128
+
+struct blockif_req {
+	int		br_iovcnt;
+	off_t		br_offset;
+	ssize_t		br_resid;
+	void		(*br_callback)(struct blockif_req *req, int err);
+	void		*br_param;
+	struct iovec	br_iov[BLOCKIF_IOV_MAX];
+};
+
+struct blockif_ctxt;
+struct blockif_ctxt *blockif_open(const char *optstr, const char *ident);
+off_t	blockif_size(struct blockif_ctxt *bc);
+void	blockif_chs(struct blockif_ctxt *bc, uint16_t *c, uint8_t *h,
+    uint8_t *s);
+int	blockif_sectsz(struct blockif_ctxt *bc);
+void	blockif_psectsz(struct blockif_ctxt *bc, int *size, int *off);
+int	blockif_queuesz(struct blockif_ctxt *bc);
+int	blockif_is_ro(struct blockif_ctxt *bc);
+int	blockif_candelete(struct blockif_ctxt *bc);
+int	blockif_read(struct blockif_ctxt *bc, struct blockif_req *breq);
+int	blockif_write(struct blockif_ctxt *bc, struct blockif_req *breq);
+int	blockif_flush(struct blockif_ctxt *bc, struct blockif_req *breq);
+int	blockif_delete(struct blockif_ctxt *bc, struct blockif_req *breq);
+int	blockif_cancel(struct blockif_ctxt *bc, struct blockif_req *breq);
+int	blockif_close(struct blockif_ctxt *bc);
+#ifdef BHYVE_SNAPSHOT
+void	blockif_pause(struct blockif_ctxt *bc);
+void	blockif_resume(struct blockif_ctxt *bc);
+int	blockif_snapshot_req(struct blockif_req *br,
+    struct vm_snapshot_meta *meta);
+int	blockif_snapshot(struct blockif_ctxt *bc,
+    struct vm_snapshot_meta *meta);
+#endif
+
+#endif /* _BLOCK_IF_H_ */
Index: usr.sbin/bhyve/mmio/block_if.c
===================================================================
--- /dev/null
+++ usr.sbin/bhyve/mmio/block_if.c
@@ -0,0 +1,991 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2013  Peter Grehan <grehan@freebsd.org>
+ * All rights reserved.
+ * Copyright 2020 Joyent, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#ifndef WITHOUT_CAPSICUM
+#include <sys/capsicum.h>
+#endif
+#include <sys/queue.h>
+#include <sys/errno.h>
+#include <sys/stat.h>
+#include <sys/ioctl.h>
+#include <sys/disk.h>
+
+#include <assert.h>
+#ifndef WITHOUT_CAPSICUM
+#include <capsicum_helpers.h>
+#endif
+#include <err.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <pthread.h>
+#include <pthread_np.h>
+#include <signal.h>
+#include <sysexits.h>
+#include <unistd.h>
+
+#include <machine/atomic.h>
+
+#ifdef BHYVE_SNAPSHOT
+#include <machine/vmm_snapshot.h>
+#endif
+
+#include "bhyverun.h"
+#include "debug.h"
+#include "mevent.h"
+#include "block_if.h"
+
+#define BLOCKIF_SIG	0xb109b109
+
+#define BLOCKIF_NUMTHR	8
+#define BLOCKIF_MAXREQ	(BLOCKIF_RING_MAX + BLOCKIF_NUMTHR)
+
+enum blockop {
+	BOP_READ,
+	BOP_WRITE,
+	BOP_FLUSH,
+	BOP_DELETE
+};
+
+enum blockstat {
+	BST_FREE,
+	BST_BLOCK,
+	BST_PEND,
+	BST_BUSY,
+	BST_DONE
+};
+
+struct blockif_elem {
+	TAILQ_ENTRY(blockif_elem) be_link;
+	struct blockif_req  *be_req;
+	enum blockop	     be_op;
+	enum blockstat	     be_status;
+	pthread_t            be_tid;
+	off_t		     be_block;
+};
+
+struct blockif_ctxt {
+	int			bc_magic;
+	int			bc_fd;
+	int			bc_ischr;
+	int			bc_isgeom;
+	int			bc_candelete;
+	int			bc_rdonly;
+	off_t			bc_size;
+	int			bc_sectsz;
+	int			bc_psectsz;
+	int			bc_psectoff;
+	int			bc_closing;
+	int			bc_paused;
+	int			bc_work_count;
+	pthread_t		bc_btid[BLOCKIF_NUMTHR];
+	pthread_mutex_t		bc_mtx;
+	pthread_cond_t		bc_cond;
+	pthread_cond_t		bc_paused_cond;
+	pthread_cond_t		bc_work_done_cond;
+
+	/* Request elements and free/pending/busy queues */
+	TAILQ_HEAD(, blockif_elem) bc_freeq;       
+	TAILQ_HEAD(, blockif_elem) bc_pendq;
+	TAILQ_HEAD(, blockif_elem) bc_busyq;
+	struct blockif_elem	bc_reqs[BLOCKIF_MAXREQ];
+};
+
+static pthread_once_t blockif_once = PTHREAD_ONCE_INIT;
+
+struct blockif_sig_elem {
+	pthread_mutex_t			bse_mtx;
+	pthread_cond_t			bse_cond;
+	int				bse_pending;
+	struct blockif_sig_elem		*bse_next;
+};
+
+static struct blockif_sig_elem *blockif_bse_head;
+
+static int
+blockif_enqueue(struct blockif_ctxt *bc, struct blockif_req *breq,
+		enum blockop op)
+{
+	struct blockif_elem *be, *tbe;
+	off_t off;
+	int i;
+
+	be = TAILQ_FIRST(&bc->bc_freeq);
+	assert(be != NULL);
+	assert(be->be_status == BST_FREE);
+	TAILQ_REMOVE(&bc->bc_freeq, be, be_link);
+	be->be_req = breq;
+	be->be_op = op;
+	switch (op) {
+	case BOP_READ:
+	case BOP_WRITE:
+	case BOP_DELETE:
+		off = breq->br_offset;
+		for (i = 0; i < breq->br_iovcnt; i++)
+			off += breq->br_iov[i].iov_len;
+		break;
+	default:
+		off = OFF_MAX;
+	}
+	be->be_block = off;
+	TAILQ_FOREACH(tbe, &bc->bc_pendq, be_link) {
+		if (tbe->be_block == breq->br_offset)
+			break;
+	}
+	if (tbe == NULL) {
+		TAILQ_FOREACH(tbe, &bc->bc_busyq, be_link) {
+			if (tbe->be_block == breq->br_offset)
+				break;
+		}
+	}
+	if (tbe == NULL)
+		be->be_status = BST_PEND;
+	else
+		be->be_status = BST_BLOCK;
+	TAILQ_INSERT_TAIL(&bc->bc_pendq, be, be_link);
+	return (be->be_status == BST_PEND);
+}
+
+static int
+blockif_dequeue(struct blockif_ctxt *bc, pthread_t t, struct blockif_elem **bep)
+{
+	struct blockif_elem *be;
+
+	TAILQ_FOREACH(be, &bc->bc_pendq, be_link) {
+		if (be->be_status == BST_PEND)
+			break;
+		assert(be->be_status == BST_BLOCK);
+	}
+	if (be == NULL)
+		return (0);
+	TAILQ_REMOVE(&bc->bc_pendq, be, be_link);
+	be->be_status = BST_BUSY;
+	be->be_tid = t;
+	TAILQ_INSERT_TAIL(&bc->bc_busyq, be, be_link);
+	*bep = be;
+	return (1);
+}
+
+static void
+blockif_complete(struct blockif_ctxt *bc, struct blockif_elem *be)
+{
+	struct blockif_elem *tbe;
+
+	if (be->be_status == BST_DONE || be->be_status == BST_BUSY)
+		TAILQ_REMOVE(&bc->bc_busyq, be, be_link);
+	else
+		TAILQ_REMOVE(&bc->bc_pendq, be, be_link);
+	TAILQ_FOREACH(tbe, &bc->bc_pendq, be_link) {
+		if (tbe->be_req->br_offset == be->be_block)
+			tbe->be_status = BST_PEND;
+	}
+	be->be_tid = 0;
+	be->be_status = BST_FREE;
+	be->be_req = NULL;
+	TAILQ_INSERT_TAIL(&bc->bc_freeq, be, be_link);
+}
+
+static int
+blockif_flush_bc(struct blockif_ctxt *bc)
+{
+	if (bc->bc_ischr) {
+		if (ioctl(bc->bc_fd, DIOCGFLUSH))
+			return (errno);
+	} else if (fsync(bc->bc_fd))
+		return (errno);
+
+	return (0);
+}
+
+static void
+blockif_proc(struct blockif_ctxt *bc, struct blockif_elem *be, uint8_t *buf)
+{
+	struct blockif_req *br;
+	off_t arg[2];
+	ssize_t clen, len, off, boff, voff;
+	int i, err;
+
+	br = be->be_req;
+	if (br->br_iovcnt <= 1)
+		buf = NULL;
+	err = 0;
+	switch (be->be_op) {
+	case BOP_READ:
+		if (buf == NULL) {
+			if ((len = preadv(bc->bc_fd, br->br_iov, br->br_iovcnt,
+				   br->br_offset)) < 0)
+				err = errno;
+			else
+				br->br_resid -= len;
+			break;
+		}
+		i = 0;
+		off = voff = 0;
+		while (br->br_resid > 0) {
+			len = MIN(br->br_resid, MAXPHYS);
+			if (pread(bc->bc_fd, buf, len, br->br_offset +
+			    off) < 0) {
+				err = errno;
+				break;
+			}
+			boff = 0;
+			do {
+				clen = MIN(len - boff, br->br_iov[i].iov_len -
+				    voff);
+				memcpy(br->br_iov[i].iov_base + voff,
+				    buf + boff, clen);
+				if (clen < br->br_iov[i].iov_len - voff)
+					voff += clen;
+				else {
+					i++;
+					voff = 0;
+				}
+				boff += clen;
+			} while (boff < len);
+			off += len;
+			br->br_resid -= len;
+		}
+		break;
+	case BOP_WRITE:
+		if (bc->bc_rdonly) {
+			err = EROFS;
+			break;
+		}
+		if (buf == NULL) {
+			if ((len = pwritev(bc->bc_fd, br->br_iov, br->br_iovcnt,
+				    br->br_offset)) < 0)
+				err = errno;
+			else
+				br->br_resid -= len;
+			break;
+		}
+		i = 0;
+		off = voff = 0;
+		while (br->br_resid > 0) {
+			len = MIN(br->br_resid, MAXPHYS);
+			boff = 0;
+			do {
+				clen = MIN(len - boff, br->br_iov[i].iov_len -
+				    voff);
+				memcpy(buf + boff,
+				    br->br_iov[i].iov_base + voff, clen);
+				if (clen < br->br_iov[i].iov_len - voff)
+					voff += clen;
+				else {
+					i++;
+					voff = 0;
+				}
+				boff += clen;
+			} while (boff < len);
+			if (pwrite(bc->bc_fd, buf, len, br->br_offset +
+			    off) < 0) {
+				err = errno;
+				break;
+			}
+			off += len;
+			br->br_resid -= len;
+		}
+		break;
+	case BOP_FLUSH:
+		err = blockif_flush_bc(bc);
+		break;
+	case BOP_DELETE:
+		if (!bc->bc_candelete)
+			err = EOPNOTSUPP;
+		else if (bc->bc_rdonly)
+			err = EROFS;
+		else if (bc->bc_ischr) {
+			arg[0] = br->br_offset;
+			arg[1] = br->br_resid;
+			if (ioctl(bc->bc_fd, DIOCGDELETE, arg))
+				err = errno;
+			else
+				br->br_resid = 0;
+		} else
+			err = EOPNOTSUPP;
+		break;
+	default:
+		err = EINVAL;
+		break;
+	}
+
+	be->be_status = BST_DONE;
+
+	(*br->br_callback)(br, err);
+}
+
+static void *
+blockif_thr(void *arg)
+{
+	struct blockif_ctxt *bc;
+	struct blockif_elem *be;
+	pthread_t t;
+	uint8_t *buf;
+
+	bc = arg;
+	if (bc->bc_isgeom)
+		buf = malloc(MAXPHYS);
+	else
+		buf = NULL;
+	t = pthread_self();
+
+	pthread_mutex_lock(&bc->bc_mtx);
+	for (;;) {
+		bc->bc_work_count++;
+
+		/* We cannot process work if the interface is paused */
+		while (!bc->bc_paused && blockif_dequeue(bc, t, &be)) {
+			pthread_mutex_unlock(&bc->bc_mtx);
+			blockif_proc(bc, be, buf);
+			pthread_mutex_lock(&bc->bc_mtx);
+			blockif_complete(bc, be);
+		}
+
+		bc->bc_work_count--;
+
+		/* If none of the workers are busy, notify the main thread */
+		if (bc->bc_work_count == 0)
+			pthread_cond_broadcast(&bc->bc_work_done_cond);
+
+		/* Check ctxt status here to see if exit requested */
+		if (bc->bc_closing)
+			break;
+
+		/* Make all worker threads wait here if the device is paused */
+		while (bc->bc_paused)
+			pthread_cond_wait(&bc->bc_paused_cond, &bc->bc_mtx);
+
+		pthread_cond_wait(&bc->bc_cond, &bc->bc_mtx);
+	}
+	pthread_mutex_unlock(&bc->bc_mtx);
+
+	if (buf)
+		free(buf);
+	pthread_exit(NULL);
+	return (NULL);
+}
+
+static void
+blockif_sigcont_handler(int signal, enum ev_type type, void *arg)
+{
+	struct blockif_sig_elem *bse;
+
+	for (;;) {
+		/*
+		 * Process the entire list even if not intended for
+		 * this thread.
+		 */
+		do {
+			bse = blockif_bse_head;
+			if (bse == NULL)
+				return;
+		} while (!atomic_cmpset_ptr((uintptr_t *)&blockif_bse_head,
+					    (uintptr_t)bse,
+					    (uintptr_t)bse->bse_next));
+
+		pthread_mutex_lock(&bse->bse_mtx);
+		bse->bse_pending = 0;
+		pthread_cond_signal(&bse->bse_cond);
+		pthread_mutex_unlock(&bse->bse_mtx);
+	}
+}
+
+static void
+blockif_init(void)
+{
+	mevent_add(SIGCONT, EVF_SIGNAL, blockif_sigcont_handler, NULL);
+	(void) signal(SIGCONT, SIG_IGN);
+}
+
+struct blockif_ctxt *
+blockif_open(const char *optstr, const char *ident)
+{
+	char tname[MAXCOMLEN + 1];
+	char name[MAXPATHLEN];
+	char *nopt, *xopts, *cp;
+	struct blockif_ctxt *bc;
+	struct stat sbuf;
+	struct diocgattr_arg arg;
+	off_t size, psectsz, psectoff;
+	int extra, fd, i, sectsz;
+	int nocache, sync, ro, candelete, geom, ssopt, pssopt;
+	int nodelete;
+
+#ifndef WITHOUT_CAPSICUM
+	cap_rights_t rights;
+	cap_ioctl_t cmds[] = { DIOCGFLUSH, DIOCGDELETE };
+#endif
+
+	pthread_once(&blockif_once, blockif_init);
+
+	fd = -1;
+	ssopt = 0;
+	nocache = 0;
+	sync = 0;
+	ro = 0;
+	nodelete = 0;
+
+	/*
+	 * The first element in the optstring is always a pathname.
+	 * Optional elements follow
+	 */
+	nopt = xopts = strdup(optstr);
+	while (xopts != NULL) {
+		cp = strsep(&xopts, ",");
+		if (cp == nopt)		/* file or device pathname */
+			continue;
+		else if (!strcmp(cp, "nocache"))
+			nocache = 1;
+		else if (!strcmp(cp, "nodelete"))
+			nodelete = 1;
+		else if (!strcmp(cp, "sync") || !strcmp(cp, "direct"))
+			sync = 1;
+		else if (!strcmp(cp, "ro"))
+			ro = 1;
+		else if (sscanf(cp, "sectorsize=%d/%d", &ssopt, &pssopt) == 2)
+			;
+		else if (sscanf(cp, "sectorsize=%d", &ssopt) == 1)
+			pssopt = ssopt;
+		else {
+			EPRINTLN("Invalid device option \"%s\"", cp);
+			goto err;
+		}
+	}
+
+	extra = 0;
+	if (nocache)
+		extra |= O_DIRECT;
+	if (sync)
+		extra |= O_SYNC;
+
+	fd = open(nopt, (ro ? O_RDONLY : O_RDWR) | extra);
+	if (fd < 0 && !ro) {
+		/* Attempt a r/w fail with a r/o open */
+		fd = open(nopt, O_RDONLY | extra);
+		ro = 1;
+	}
+
+	if (fd < 0) {
+		warn("Could not open backing file: %s", nopt);
+		goto err;
+	}
+
+        if (fstat(fd, &sbuf) < 0) {
+		warn("Could not stat backing file %s", nopt);
+		goto err;
+        }
+
+#ifndef WITHOUT_CAPSICUM
+	cap_rights_init(&rights, CAP_FSYNC, CAP_IOCTL, CAP_READ, CAP_SEEK,
+	    CAP_WRITE);
+	if (ro)
+		cap_rights_clear(&rights, CAP_FSYNC, CAP_WRITE);
+
+	if (caph_rights_limit(fd, &rights) == -1)
+		errx(EX_OSERR, "Unable to apply rights for sandbox");
+#endif
+
+        /*
+	 * Deal with raw devices
+	 */
+        size = sbuf.st_size;
+	sectsz = DEV_BSIZE;
+	psectsz = psectoff = 0;
+	candelete = geom = 0;
+	if (S_ISCHR(sbuf.st_mode)) {
+		if (ioctl(fd, DIOCGMEDIASIZE, &size) < 0 ||
+		    ioctl(fd, DIOCGSECTORSIZE, &sectsz)) {
+			perror("Could not fetch dev blk/sector size");
+			goto err;
+		}
+		assert(size != 0);
+		assert(sectsz != 0);
+		if (ioctl(fd, DIOCGSTRIPESIZE, &psectsz) == 0 && psectsz > 0)
+			ioctl(fd, DIOCGSTRIPEOFFSET, &psectoff);
+		strlcpy(arg.name, "GEOM::candelete", sizeof(arg.name));
+		arg.len = sizeof(arg.value.i);
+		if (nodelete == 0 && ioctl(fd, DIOCGATTR, &arg) == 0)
+			candelete = arg.value.i;
+		if (ioctl(fd, DIOCGPROVIDERNAME, name) == 0)
+			geom = 1;
+	} else
+		psectsz = sbuf.st_blksize;
+
+#ifndef WITHOUT_CAPSICUM
+	if (caph_ioctls_limit(fd, cmds, nitems(cmds)) == -1)
+		errx(EX_OSERR, "Unable to apply rights for sandbox");
+#endif
+
+	if (ssopt != 0) {
+		if (!powerof2(ssopt) || !powerof2(pssopt) || ssopt < 512 ||
+		    ssopt > pssopt) {
+			EPRINTLN("Invalid sector size %d/%d",
+			    ssopt, pssopt);
+			goto err;
+		}
+
+		/*
+		 * Some backend drivers (e.g. cd0, ada0) require that the I/O
+		 * size be a multiple of the device's sector size.
+		 *
+		 * Validate that the emulated sector size complies with this
+		 * requirement.
+		 */
+		if (S_ISCHR(sbuf.st_mode)) {
+			if (ssopt < sectsz || (ssopt % sectsz) != 0) {
+				EPRINTLN("Sector size %d incompatible "
+				    "with underlying device sector size %d",
+				    ssopt, sectsz);
+				goto err;
+			}
+		}
+
+		sectsz = ssopt;
+		psectsz = pssopt;
+		psectoff = 0;
+	}
+
+	bc = calloc(1, sizeof(struct blockif_ctxt));
+	if (bc == NULL) {
+		perror("calloc");
+		goto err;
+	}
+
+	bc->bc_magic = BLOCKIF_SIG;
+	bc->bc_fd = fd;
+	bc->bc_ischr = S_ISCHR(sbuf.st_mode);
+	bc->bc_isgeom = geom;
+	bc->bc_candelete = candelete;
+	bc->bc_rdonly = ro;
+	bc->bc_size = size;
+	bc->bc_sectsz = sectsz;
+	bc->bc_psectsz = psectsz;
+	bc->bc_psectoff = psectoff;
+	pthread_mutex_init(&bc->bc_mtx, NULL);
+	pthread_cond_init(&bc->bc_cond, NULL);
+	bc->bc_paused = 0;
+	bc->bc_work_count = 0;
+	pthread_cond_init(&bc->bc_paused_cond, NULL);
+	pthread_cond_init(&bc->bc_work_done_cond, NULL);
+	TAILQ_INIT(&bc->bc_freeq);
+	TAILQ_INIT(&bc->bc_pendq);
+	TAILQ_INIT(&bc->bc_busyq);
+	for (i = 0; i < BLOCKIF_MAXREQ; i++) {
+		bc->bc_reqs[i].be_status = BST_FREE;
+		TAILQ_INSERT_HEAD(&bc->bc_freeq, &bc->bc_reqs[i], be_link);
+	}
+
+	for (i = 0; i < BLOCKIF_NUMTHR; i++) {
+		pthread_create(&bc->bc_btid[i], NULL, blockif_thr, bc);
+		snprintf(tname, sizeof(tname), "blk-%s-%d", ident, i);
+		pthread_set_name_np(bc->bc_btid[i], tname);
+	}
+
+	return (bc);
+err:
+	if (fd >= 0)
+		close(fd);
+	free(nopt);
+	return (NULL);
+}
+
+static int
+blockif_request(struct blockif_ctxt *bc, struct blockif_req *breq,
+		enum blockop op)
+{
+	int err;
+
+	err = 0;
+
+	pthread_mutex_lock(&bc->bc_mtx);
+	if (!TAILQ_EMPTY(&bc->bc_freeq)) {
+		/*
+		 * Enqueue and inform the block i/o thread
+		 * that there is work available
+		 */
+		if (blockif_enqueue(bc, breq, op))
+			pthread_cond_signal(&bc->bc_cond);
+	} else {
+		/*
+		 * Callers are not allowed to enqueue more than
+		 * the specified blockif queue limit. Return an
+		 * error to indicate that the queue length has been
+		 * exceeded.
+		 */
+		err = E2BIG;
+	}
+	pthread_mutex_unlock(&bc->bc_mtx);
+
+	return (err);
+}
+
+int
+blockif_read(struct blockif_ctxt *bc, struct blockif_req *breq)
+{
+
+	assert(bc->bc_magic == BLOCKIF_SIG);
+	return (blockif_request(bc, breq, BOP_READ));
+}
+
+int
+blockif_write(struct blockif_ctxt *bc, struct blockif_req *breq)
+{
+
+	assert(bc->bc_magic == BLOCKIF_SIG);
+	return (blockif_request(bc, breq, BOP_WRITE));
+}
+
+int
+blockif_flush(struct blockif_ctxt *bc, struct blockif_req *breq)
+{
+
+	assert(bc->bc_magic == BLOCKIF_SIG);
+	return (blockif_request(bc, breq, BOP_FLUSH));
+}
+
+int
+blockif_delete(struct blockif_ctxt *bc, struct blockif_req *breq)
+{
+
+	assert(bc->bc_magic == BLOCKIF_SIG);
+	return (blockif_request(bc, breq, BOP_DELETE));
+}
+
+int
+blockif_cancel(struct blockif_ctxt *bc, struct blockif_req *breq)
+{
+	struct blockif_elem *be;
+
+	assert(bc->bc_magic == BLOCKIF_SIG);
+
+	pthread_mutex_lock(&bc->bc_mtx);
+	/* XXX: not waiting while paused */
+
+	/*
+	 * Check pending requests.
+	 */
+	TAILQ_FOREACH(be, &bc->bc_pendq, be_link) {
+		if (be->be_req == breq)
+			break;
+	}
+	if (be != NULL) {
+		/*
+		 * Found it.
+		 */
+		blockif_complete(bc, be);
+		pthread_mutex_unlock(&bc->bc_mtx);
+
+		return (0);
+	}
+
+	/*
+	 * Check in-flight requests.
+	 */
+	TAILQ_FOREACH(be, &bc->bc_busyq, be_link) {
+		if (be->be_req == breq)
+			break;
+	}
+	if (be == NULL) {
+		/*
+		 * Didn't find it.
+		 */
+		pthread_mutex_unlock(&bc->bc_mtx);
+		return (EINVAL);
+	}
+
+	/*
+	 * Interrupt the processing thread to force it return
+	 * prematurely via it's normal callback path.
+	 */
+	while (be->be_status == BST_BUSY) {
+		struct blockif_sig_elem bse, *old_head;
+
+		pthread_mutex_init(&bse.bse_mtx, NULL);
+		pthread_cond_init(&bse.bse_cond, NULL);
+
+		bse.bse_pending = 1;
+
+		do {
+			old_head = blockif_bse_head;
+			bse.bse_next = old_head;
+		} while (!atomic_cmpset_ptr((uintptr_t *)&blockif_bse_head,
+					    (uintptr_t)old_head,
+					    (uintptr_t)&bse));
+
+		pthread_kill(be->be_tid, SIGCONT);
+
+		pthread_mutex_lock(&bse.bse_mtx);
+		while (bse.bse_pending)
+			pthread_cond_wait(&bse.bse_cond, &bse.bse_mtx);
+		pthread_mutex_unlock(&bse.bse_mtx);
+	}
+
+	pthread_mutex_unlock(&bc->bc_mtx);
+
+	/*
+	 * The processing thread has been interrupted.  Since it's not
+	 * clear if the callback has been invoked yet, return EBUSY.
+	 */
+	return (EBUSY);
+}
+
+int
+blockif_close(struct blockif_ctxt *bc)
+{
+	void *jval;
+	int i;
+
+	assert(bc->bc_magic == BLOCKIF_SIG);
+
+	/*
+	 * Stop the block i/o thread
+	 */
+	pthread_mutex_lock(&bc->bc_mtx);
+	bc->bc_closing = 1;
+	pthread_mutex_unlock(&bc->bc_mtx);
+	pthread_cond_broadcast(&bc->bc_cond);
+	for (i = 0; i < BLOCKIF_NUMTHR; i++)
+		pthread_join(bc->bc_btid[i], &jval);
+
+	/* XXX Cancel queued i/o's ??? */
+
+	/*
+	 * Release resources
+	 */
+	bc->bc_magic = 0;
+	close(bc->bc_fd);
+	free(bc);
+
+	return (0);
+}
+
+/*
+ * Return virtual C/H/S values for a given block. Use the algorithm
+ * outlined in the VHD specification to calculate values.
+ */
+void
+blockif_chs(struct blockif_ctxt *bc, uint16_t *c, uint8_t *h, uint8_t *s)
+{
+	off_t sectors;		/* total sectors of the block dev */
+	off_t hcyl;		/* cylinders times heads */
+	uint16_t secpt;		/* sectors per track */
+	uint8_t heads;
+
+	assert(bc->bc_magic == BLOCKIF_SIG);
+
+	sectors = bc->bc_size / bc->bc_sectsz;
+
+	/* Clamp the size to the largest possible with CHS */
+	if (sectors > 65535UL*16*255)
+		sectors = 65535UL*16*255;
+
+	if (sectors >= 65536UL*16*63) {
+		secpt = 255;
+		heads = 16;
+		hcyl = sectors / secpt;
+	} else {
+		secpt = 17;
+		hcyl = sectors / secpt;
+		heads = (hcyl + 1023) / 1024;
+
+		if (heads < 4)
+			heads = 4;
+
+		if (hcyl >= (heads * 1024) || heads > 16) {
+			secpt = 31;
+			heads = 16;
+			hcyl = sectors / secpt;
+		}
+		if (hcyl >= (heads * 1024)) {
+			secpt = 63;
+			heads = 16;
+			hcyl = sectors / secpt;
+		}
+	}
+
+	*c = hcyl / heads;
+	*h = heads;
+	*s = secpt;
+}
+
+/*
+ * Accessors
+ */
+off_t
+blockif_size(struct blockif_ctxt *bc)
+{
+
+	assert(bc->bc_magic == BLOCKIF_SIG);
+	return (bc->bc_size);
+}
+
+int
+blockif_sectsz(struct blockif_ctxt *bc)
+{
+
+	assert(bc->bc_magic == BLOCKIF_SIG);
+	return (bc->bc_sectsz);
+}
+
+void
+blockif_psectsz(struct blockif_ctxt *bc, int *size, int *off)
+{
+
+	assert(bc->bc_magic == BLOCKIF_SIG);
+	*size = bc->bc_psectsz;
+	*off = bc->bc_psectoff;
+}
+
+int
+blockif_queuesz(struct blockif_ctxt *bc)
+{
+
+	assert(bc->bc_magic == BLOCKIF_SIG);
+	return (BLOCKIF_MAXREQ - 1);
+}
+
+int
+blockif_is_ro(struct blockif_ctxt *bc)
+{
+
+	assert(bc->bc_magic == BLOCKIF_SIG);
+	return (bc->bc_rdonly);
+}
+
+int
+blockif_candelete(struct blockif_ctxt *bc)
+{
+
+	assert(bc->bc_magic == BLOCKIF_SIG);
+	return (bc->bc_candelete);
+}
+
+#ifdef BHYVE_SNAPSHOT
+void
+blockif_pause(struct blockif_ctxt *bc)
+{
+	assert(bc != NULL);
+	assert(bc->bc_magic == BLOCKIF_SIG);
+
+	pthread_mutex_lock(&bc->bc_mtx);
+	bc->bc_paused = 1;
+
+	/* The interface is paused. Wait for workers to finish their work */
+	while (bc->bc_work_count)
+		pthread_cond_wait(&bc->bc_work_done_cond, &bc->bc_mtx);
+	pthread_mutex_unlock(&bc->bc_mtx);
+
+	if (blockif_flush_bc(bc))
+		fprintf(stderr, "%s: [WARN] failed to flush backing file.\r\n",
+			__func__);
+}
+
+void
+blockif_resume(struct blockif_ctxt *bc)
+{
+	assert(bc != NULL);
+	assert(bc->bc_magic == BLOCKIF_SIG);
+
+	pthread_mutex_lock(&bc->bc_mtx);
+	bc->bc_paused = 0;
+	/* resume the threads waiting for paused */
+	pthread_cond_broadcast(&bc->bc_paused_cond);
+	/* kick the threads after restore */
+	pthread_cond_broadcast(&bc->bc_cond);
+	pthread_mutex_unlock(&bc->bc_mtx);
+}
+
+int
+blockif_snapshot_req(struct blockif_req *br, struct vm_snapshot_meta *meta)
+{
+	int i;
+	struct iovec *iov;
+	int ret;
+
+	SNAPSHOT_VAR_OR_LEAVE(br->br_iovcnt, meta, ret, done);
+	SNAPSHOT_VAR_OR_LEAVE(br->br_offset, meta, ret, done);
+	SNAPSHOT_VAR_OR_LEAVE(br->br_resid, meta, ret, done);
+
+	/*
+	 * XXX: The callback and parameter must be filled by the virtualized
+	 * device that uses the interface, during its init; we're not touching
+	 * them here.
+	 */
+
+	/* Snapshot the iovecs. */
+	for (i = 0; i < br->br_iovcnt; i++) {
+		iov = &br->br_iov[i];
+
+		SNAPSHOT_VAR_OR_LEAVE(iov->iov_len, meta, ret, done);
+
+		/* We assume the iov is a guest-mapped address. */
+		SNAPSHOT_GUEST2HOST_ADDR_OR_LEAVE(iov->iov_base, iov->iov_len,
+			false, meta, ret, done);
+	}
+
+done:
+	return (ret);
+}
+
+int
+blockif_snapshot(struct blockif_ctxt *bc, struct vm_snapshot_meta *meta)
+{
+	int ret;
+
+	if (bc->bc_paused == 0) {
+		fprintf(stderr, "%s: Snapshot failed: "
+			"interface not paused.\r\n", __func__);
+		return (ENXIO);
+	}
+
+	pthread_mutex_lock(&bc->bc_mtx);
+
+	SNAPSHOT_VAR_OR_LEAVE(bc->bc_magic, meta, ret, done);
+	SNAPSHOT_VAR_OR_LEAVE(bc->bc_ischr, meta, ret, done);
+	SNAPSHOT_VAR_OR_LEAVE(bc->bc_isgeom, meta, ret, done);
+	SNAPSHOT_VAR_OR_LEAVE(bc->bc_candelete, meta, ret, done);
+	SNAPSHOT_VAR_OR_LEAVE(bc->bc_rdonly, meta, ret, done);
+	SNAPSHOT_VAR_OR_LEAVE(bc->bc_size, meta, ret, done);
+	SNAPSHOT_VAR_OR_LEAVE(bc->bc_sectsz, meta, ret, done);
+	SNAPSHOT_VAR_OR_LEAVE(bc->bc_psectsz, meta, ret, done);
+	SNAPSHOT_VAR_OR_LEAVE(bc->bc_psectoff, meta, ret, done);
+	SNAPSHOT_VAR_OR_LEAVE(bc->bc_closing, meta, ret, done);
+
+done:
+	pthread_mutex_unlock(&bc->bc_mtx);
+	return (ret);
+}
+#endif
Index: usr.sbin/bhyve/mmio/mmio_emul.h
===================================================================
--- /dev/null
+++ usr.sbin/bhyve/mmio/mmio_emul.h
@@ -0,0 +1,116 @@
+#ifndef _EMUL_H_
+#define _EMUL_H_
+
+#include <sys/types.h>
+
+#include <assert.h>
+
+struct vmctx;
+struct mmio_devinst;
+
+// TODO suggestive naming
+struct mmio_devemu {
+	char *de_emu;		/* Device emulation name */
+
+	/* Instance creation */
+	int      (*de_init)(struct vmctx *ctx, struct mmio_devinst *di,
+			    char *opts);
+
+	/* Read / Write callbacks */
+	void     (*de_write)(struct vmctx *ctx, int vcpu,
+			     struct mmio_devinst *di, int baridx,
+			     uint64_t offset, int size, uint64_t val);
+
+	uint64_t (*de_read)(struct vmctx *ctx, int vcpu,
+			    struct mmio_devinst *di, int baridx,
+			    uint64_t offset, int size);
+};
+
+#define	MMIO_EMUL_SET(x)	DATA_SET(mmio_set, x);
+#define	DI_NAMESZ		40
+#define	MMIO_REGMAX		0xff
+#define	MMIO_REGNUM		(MMIO_REGMAX + 1)
+
+struct devinst_addr {
+	uint64_t baddr;
+	uint64_t size;
+};
+
+enum lintr_stat {
+	IDLE,
+	ASSERTED,
+	PENDING
+};
+
+// TODO suggestive naming
+struct mmio_devinst {
+	struct mmio_devemu	*pi_d;			/* Back ref to device */
+	struct vmctx		*pi_vmctx;		/* Owner VM context */
+	/* unused for mmio device emulation; may be used as uniquifiers */
+	int			pi_slot, di_func;
+
+	char			pi_name[DI_NAMESZ];	/* Instance name */
+
+	struct {
+		enum lintr_stat	state;
+		int64_t		irq;
+		pthread_mutex_t	lock;
+	} di_lintr;
+
+	void			*pi_arg;		/* Private data */
+
+	u_char			pi_cfgregs[MMIO_REGNUM];/* Config regsters */
+
+	struct devinst_addr	addr;			/* Address info */
+};
+
+int mmio_parse_opts(const char *args);
+int mmio_alloc_mem(struct mmio_devinst *di);
+int init_mmio(struct vmctx *ctx);
+void mmio_lintr_request(struct mmio_devinst *di);
+void mmio_lintr_assert(struct mmio_devinst *di);
+void mmio_lintr_deassert(struct mmio_devinst *di);
+
+static __inline void
+mmio_set_cfgreg8(struct mmio_devinst *di, size_t offset, uint32_t val)
+{
+	assert(offset <= MMIO_REGMAX);
+	*(uint32_t *)(di->pi_cfgregs + offset) = val;
+}
+
+static __inline void
+mmio_set_cfgreg16(struct mmio_devinst *di, size_t offset, uint32_t val)
+{
+	assert(offset <= (MMIO_REGMAX - 1) && (offset & 1) == 0);
+	*(uint32_t *)(di->pi_cfgregs + offset) = val;
+}
+
+static __inline void
+mmio_set_cfgreg32(struct mmio_devinst *di, size_t offset, uint32_t val)
+{
+	assert(offset <= (MMIO_REGMAX - 3) && (offset & 3) == 0);
+	*(uint32_t *)(di->pi_cfgregs + offset) = val;
+}
+
+static __inline uint8_t
+mmio_get_cfgreg8(struct mmio_devinst *di, size_t offset)
+{
+	assert(offset <= MMIO_REGMAX);
+	return (*(uint32_t *)(di->pi_cfgregs + offset));
+}
+
+static __inline uint16_t
+mmio_get_cfgreg16(struct mmio_devinst *di, size_t offset)
+{
+	assert(offset <= (MMIO_REGMAX - 1) && (offset & 1) == 0);
+	return (*(uint32_t *)(di->pi_cfgregs + offset));
+}
+
+static __inline uint32_t
+mmio_get_cfgreg32(struct mmio_devinst *di, size_t offset)
+{
+	assert(offset <= (MMIO_REGMAX - 3) && (offset & 3) == 0);
+	return (*(uint32_t *)(di->pi_cfgregs + offset));
+}
+
+#endif /* _EMUL_H_ */
Index: usr.sbin/bhyve/mmio/mmio_emul.c
===================================================================
--- /dev/null
+++ usr.sbin/bhyve/mmio/mmio_emul.c
@@ -0,0 +1,440 @@
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/linker_set.h>
+#include <sys/param.h>
+#include <sys/types.h>
+
+#include <errno.h>
+#include <pthread.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "arm64/mem.h"
+#include "mmio_emul.h"
+#include "mmio_irq.h"
+
+#define	DEVEMU_MEMLIMIT		0xFD00000000UL
+#define	DEVEMU_MEMBASE		0xD000000000UL
+#define	MEM_ROUNDUP		(1 << 20)
+#ifndef max
+# define max(A, B) ((A) > (B) ? (A) : (B))
+#endif
+
+static uint64_t mmio_membase;
+
+SET_DECLARE(mmio_set, struct mmio_devemu);
+
+static struct mmio_devemu *mmio_finddef(const char *name);
+static void mmio_lintr_route(struct mmio_devinst *di);
+static void mmio_lintr_update(struct mmio_devinst *di);
+
+static struct mmio_emul_info {
+	uint64_t size;			/* address size */
+	uint64_t baddr;			/* address */
+	int64_t irq;			/* device interrupt number */
+	char *name;			/* device name */
+	char *arg;			/* device arguments */
+	struct mmio_emul_info *next;	/* pointer for linked list */
+	struct mmio_devinst *di;	/* pointer to device instance */
+} *mmio_emul_info_head = NULL;
+
+/*
+ * MMIO options are in the form:
+ *
+ * <size>@<base_addr>#<irq>:<emul>[,<config>]
+ *
+ * - size is the number of bytes required for the device mmio
+ * - base_addr is the base address for the MMIO mapped device;
+ * - irq specifies the device interrupt number the value MUST be a DECIMAL
+ *   integer; if the device does not use interrupts, use -1
+ * - emul is a string describing the type of device - e.g., virtio-net;
+ * - config is an optional string, depending on the device, that is used
+ *     for configuration
+ *
+ * Examples of use:
+ *   0x200@0x100000#25:virtio-net,tap0
+ *   0x100@0x200000#-1:dummy
+ */
+static void
+mmio_parse_opts_usage(const char *args)
+{
+	fprintf(stderr, "Invalid mmio arguments \"%s\"\r\n", args);
+}
+
+/*
+ * checks if two memory regions overlap
+ * checks are not required if one of the pointers is null
+ */
+static int
+mmio_mem_overlap(uint64_t pa, uint64_t sa, uint64_t pb, uint64_t sb)
+{
+#define IN_INTERVAL(lower, value, upper)	\
+	(((lower) < (value)) && ((value) < (upper)))
+
+	if ((pa == 0) || (pb == 0))
+		return 0;
+
+	if (IN_INTERVAL(pa, pb, pa + sa) &&
+	    IN_INTERVAL(pb, pa, pb + sb))
+		return 1;
+
+	return 0;
+
+#undef IN_INTERVAL
+}
+
+int
+mmio_parse_opts(const char *args)
+{
+	char *emul, *config, *str;
+	uint64_t size, baddr;
+	int64_t irq;
+	int error;
+	struct mmio_emul_info *dif;
+
+	error = -1;
+	emul = config = NULL;
+	baddr = 0, size = 0;
+	str = strdup(args);
+
+	if ((emul = strchr(str, ':')) != NULL) {
+		*emul++ = '\0';
+
+		/* <size>@<base-addr>#<irq> */
+		if (sscanf(str, "%jx@%jx#%jd", &size, &baddr, &irq) != 3 &&
+		    sscanf(str, "%jx@%jx#%jd", &size, &baddr, &irq) != 3) {
+			mmio_parse_opts_usage(str);
+			goto parse_error;
+		}
+	} else {
+		mmio_parse_opts_usage(str);
+		goto parse_error;
+	}
+
+	if ((config = strchr(emul, ',')) != NULL)
+		*config++ = '\0';
+
+	/*
+	 * check if the required address can be obtained;
+	 * if an address has not been requested, ignore the checks
+	 * (however, an address will have to be later identified)
+	 */
+	if (baddr != 0) {
+		for (dif = mmio_emul_info_head; dif != NULL; dif = dif->next)
+			if (mmio_mem_overlap(dif->baddr, dif->size,
+					       baddr, size))
+				break;
+
+		if (dif != NULL) {
+			fprintf(stderr, "The requested address 0x%jx is "
+				"already bound or overlapping\r\n", baddr);
+			error = EINVAL;
+			goto parse_error;
+		}
+	}
+
+	dif = calloc(1, sizeof(struct mmio_emul_info));
+	if (dif == NULL) {
+		error = ENOMEM;
+		goto parse_error;
+	}
+
+	dif->next = mmio_emul_info_head;
+	mmio_emul_info_head = dif;
+
+	dif->size = size;
+	dif->baddr = baddr;
+	dif->irq = irq;
+	if ((emul != NULL) && (strlen(emul)) > 0)
+		dif->name = strdup(emul);
+	else
+		dif->name = NULL;
+	if ((config != NULL) && (strlen(config)) > 0)
+		dif->arg = strdup(config);
+	else
+		dif->arg = NULL;
+
+	error = 0;
+
+parse_error:
+	free(str);
+
+	return error;
+}
+
+static int
+mmio_mem_handler(struct vmctx *ctx, int vcpu, int dir, uint64_t addr,
+		      int size, uint64_t *val, void *arg1, long arg2)
+{
+	struct mmio_devinst *di = arg1;
+	struct mmio_devemu *de = di->pi_d;
+	uint64_t offset;
+	int bidx = (int) arg2;
+
+	assert(di->addr.baddr <= addr &&
+	       addr + size <= di->addr.baddr + di->addr.size);
+
+	offset = addr - di->addr.baddr;
+
+	if (dir == MEM_F_WRITE) {
+		if (size == 8) {
+			(*de->de_write)(ctx, vcpu, di, bidx, offset,
+					4, *val & 0xffffffff);
+			(*de->de_write)(ctx, vcpu, di, bidx, offset + 4,
+					4, *val >> 32);
+		} else {
+			(*de->de_write)(ctx, vcpu, di, bidx, offset,
+					size, *val);
+		}
+	} else {
+		if (size == 8) {
+			*val = (*de->de_read)(ctx, vcpu, di, bidx,
+						 offset, 4);
+			*val |= (*de->de_read)(ctx, vcpu, di, bidx,
+						  offset + 4, 4) << 32;
+		} else {
+			*val = (*de->de_read)(ctx, vcpu, di, bidx,
+						 offset, size);
+		}
+	}
+
+	return (0);
+}
+
+static void
+modify_mmio_registration(struct mmio_devinst *di, int registration)
+{
+	int error;
+	struct mem_range mr;
+
+	bzero(&mr, sizeof(struct mem_range));
+	mr.name = di->pi_name;
+	mr.base = di->addr.baddr;
+	mr.size = di->addr.size;
+	if (registration) {
+		mr.flags = MEM_F_RW;
+		mr.handler = mmio_mem_handler;
+		mr.arg1 = di;
+		mr.arg2 = 0;
+		error = register_mem(&mr);
+	} else {
+		error = unregister_mem(&mr);
+	}
+
+	assert(error == 0);
+}
+
+static void
+register_mmio(struct mmio_devinst *di)
+{
+	return modify_mmio_registration(di, 1);
+}
+
+static void
+unregister_mmio(struct mmio_devinst *di)
+{
+	return modify_mmio_registration(di, 0);
+}
+
+/*
+ * Update the MMIO address that is decoded
+ */
+static void
+update_mem_address(struct mmio_devinst *di, uint64_t addr)
+{
+	/* TODO: check if the decoding is running */
+	unregister_mmio(di);
+
+	di->addr.baddr = addr;
+
+	register_mmio(di);
+}
+
+static int
+mmio_alloc_resource(uint64_t *baseptr, uint64_t limit, uint64_t size,
+			uint64_t *addr)
+{
+	uint64_t base;
+
+	assert((size & (size - 1)) == 0);	/* must be a power of 2 */
+
+	base = roundup2(*baseptr, size);
+
+	if (base + size <= limit) {
+		*addr = base;
+		*baseptr = base + size;
+		return (0);
+	} else
+		return (-1);
+}
+
+int
+mmio_alloc_mem(struct mmio_devinst *di)
+{
+	int error;
+	uint64_t *baseptr, limit, addr, size;
+
+	baseptr = &di->addr.baddr;
+	size = di->addr.size;
+	limit = DEVEMU_MEMLIMIT;
+
+	if ((size & (size - 1)) != 0)
+		/* Round up to a power of 2 */
+		size = 1UL << flsl(size);
+
+	error = mmio_alloc_resource(baseptr, limit, size, &addr);
+	if (error != 0)
+		return (error);
+
+	di->addr.baddr = addr;
+
+	register_mmio(di);
+
+	return (0);
+}
+
+static struct mmio_devemu *
+mmio_finddev(char *name)
+{
+	struct mmio_devemu **dpp, *dp;
+
+	SET_FOREACH(dpp, mmio_set) {
+		dp = *dpp;
+		if (!strcmp(dp->de_emu, name))
+			return (dp);
+	}
+
+	return (NULL);
+}
+
+static int
+mmio_init(struct vmctx *ctx, struct mmio_devemu *de, struct mmio_emul_info *dif)
+{
+	struct mmio_devinst *di;
+	int error;
+
+	di = calloc(1, sizeof(struct mmio_devinst));
+	if (di == NULL)
+		return (ENOMEM);
+
+	di->pi_d = de;
+	di->pi_vmctx = ctx;
+	snprintf(di->pi_name, DI_NAMESZ, "%s-mmio", de->de_emu);
+	di->di_lintr.state = IDLE;
+	di->di_lintr.irq = dif->irq;
+	pthread_mutex_init(&di->di_lintr.lock, NULL);
+	di->addr.baddr = dif->baddr;
+	di->addr.size = dif->size;
+	/* some devices (e.g., virtio-net) use these as uniquifiers; irq number
+	 * should be unique and sufficient */
+	di->pi_slot = dif->irq;
+	di->di_func = dif->irq;
+
+	error = (*de->de_init)(ctx, di, dif->arg);
+
+	if (error == 0) {
+		dif->di = di;
+	} else {
+		fprintf(stderr, "Device \"%s\": initialization failed\r\n",
+			di->pi_name);
+		fprintf(stderr, "Device arguments were: %s\r\n", dif->arg);
+		free(di);
+	}
+
+	return (error);
+}
+
+static void
+init_mmio_error(const char *name)
+{
+	struct mmio_devemu **mdpp, *mdp;
+
+	fprintf(stderr, "Device \"%s\" does not exist\r\n", name);
+	fprintf(stderr, "The following devices are available:\r\n");
+
+	SET_FOREACH(mdpp, mmio_set) {
+		mdp = *mdpp;
+		fprintf(stderr, "\t%s\r\n", mdp->de_emu);
+	}
+}
+
+int init_mmio(struct vmctx *ctx)
+{
+	struct mmio_devemu *de;
+	struct mmio_emul_info *dif;
+	int error;
+
+	mmio_membase = DEVEMU_MEMBASE;
+
+	for (dif = mmio_emul_info_head; dif != NULL; dif = dif->next) {
+		if (dif->name == NULL)
+			continue;
+
+		de = mmio_finddev(dif->name);
+		if (de == NULL) {
+			init_mmio_error(dif->name);
+			return (1);
+		}
+
+		error = mmio_init(ctx, de, dif);
+		if (error != 0)
+			return (error);
+
+		/*
+		 * as specified in the amd64 implementation, add some
+		 * slop to the memory resources decoded, in order to
+		 * give the guest some flexibility to reprogram the addresses
+		 */
+		mmio_membase += MEM_ROUNDUP;
+		mmio_membase = roundup2(mmio_membase, MEM_ROUNDUP);
+	}
+
+	/* activate the interrupts */
+	for (dif = mmio_emul_info_head; dif != NULL; dif = dif->next)
+		if (dif->di != NULL)
+			mmio_lintr_route(dif->di);
+
+	/* TODO: register fallback handlers? */
+
+	return (0);
+}
+
+void
+mmio_lintr_request(struct mmio_devinst *di)
+{
+	/* do nothing */
+}
+
+static void
+mmio_lintr_route(struct mmio_devinst *di)
+{
+	/* do nothing */
+}
+
+void
+mmio_lintr_assert(struct mmio_devinst *di)
+{
+	pthread_mutex_lock(&di->di_lintr.lock);
+	if (di->di_lintr.state == IDLE) {
+		di->di_lintr.state = ASSERTED;
+		mmio_irq_assert(di);
+	}
+	pthread_mutex_unlock(&di->di_lintr.lock);
+}
+
+void
+mmio_lintr_deassert(struct mmio_devinst *di)
+{
+	pthread_mutex_lock(&di->di_lintr.lock);
+	if (di->di_lintr.state == ASSERTED) {
+		mmio_irq_deassert(di);
+		di->di_lintr.state = IDLE;
+	} else if (di->di_lintr.state == PENDING) {
+		di->di_lintr.state = IDLE;
+	}
+	pthread_mutex_unlock(&di->di_lintr.lock);
+}
+
+/* TODO: Add dummy? */
Index: usr.sbin/bhyve/mmio/mmio_irq.h
===================================================================
--- /dev/null
+++ usr.sbin/bhyve/mmio/mmio_irq.h
@@ -0,0 +1,12 @@
+#ifndef __MMIO_IRQ_H__
+#define __MMIO_IRQ_H__
+
+struct mmio_devinst;
+
+void mmio_irq_init(struct vmctx *ctx);
+void mmio_irq_reserve(int irq);
+void mmio_irq_use(int irq);
+void mmio_irq_assert(struct mmio_devinst *di);
+void mmio_irq_deassert(struct mmio_devinst *di);
+
+#endif
Index: usr.sbin/bhyve/mmio/mmio_irq.c
===================================================================
--- /dev/null
+++ usr.sbin/bhyve/mmio/mmio_irq.c
@@ -0,0 +1,113 @@
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <machine/vmm.h>
+
+#include <assert.h>
+#include <pthread.h>
+#include <stdio.h>
+#include <vmmapi.h>
+
+#include "mmio_emul.h"
+#include "mmio_irq.h"
+#include "mmio_virtio.h"
+
+/* IRQ count to disable IRQ */
+#define IRQ_DISABLED	    0xff
+
+static struct mmio_irq {
+	uint32_t	use_count;	/* number of binds */
+	uint32_t	active_count;	/* number of asserts */
+	uint32_t	active;		/* irq active */
+	pthread_mutex_t	lock;
+} irqs[50];
+
+void
+mmio_irq_reserve(int irq)
+{
+	assert(irq >= 0 && irq < nitems(irqs));
+	assert(irqs[irq].active == 0 || irqs[irq].active == IRQ_DISABLED);
+	irqs[irq].active = IRQ_DISABLED;
+}
+
+void
+mmio_irq_use(int irq) {
+	assert(irq >= 0 && irq < nitems(irqs));
+	assert(irqs[irq].active != IRQ_DISABLED);
+	irqs[irq].active++;
+}
+
+void
+mmio_irq_init(struct vmctx *ctx)
+{
+	int i;
+
+	for (i = 0; i < nitems(irqs); ++i) {
+		irqs[i].use_count = 0;
+		irqs[i].active_count = 0;
+		irqs[i].active = 0;
+		pthread_mutex_init(&irqs[i].lock, NULL);
+	}
+}
+
+void
+mmio_irq_assert(struct mmio_devinst *di)
+{
+	struct mmio_irq *irq;
+	uint32_t irq_status;
+
+	assert(di->di_lintr.irq <= nitems(irqs));
+	if (di->di_lintr.irq < 0)
+		return;
+
+	irq = &irqs[di->di_lintr.irq];
+
+	pthread_mutex_lock(&irq->lock);
+	irq->active_count++;
+
+	pthread_mutex_lock(&di->di_lintr.lock);
+
+	irq_status = mmio_get_cfgreg32(di, VIRTIO_MMIO_INTERRUPT_STATUS);
+	irq_status |= VIRTIO_MMIO_INT_VRING;
+	mmio_set_cfgreg32(di, VIRTIO_MMIO_INTERRUPT_STATUS, irq_status);
+
+	if (irq->active_count == 1)
+		vm_assert_irq(di->pi_vmctx, di->di_lintr.irq, 0);
+
+	pthread_mutex_unlock(&di->di_lintr.lock);
+
+	pthread_mutex_unlock(&irq->lock);
+}
+
+void
+mmio_irq_deassert(struct mmio_devinst *di)
+{
+	struct mmio_irq *irq;
+	uint32_t irq_status;
+
+	assert(di->di_lintr.irq <= nitems(irqs));
+	if (di->di_lintr.irq < 0)
+		return;
+
+	irq = &irqs[di->di_lintr.irq];
+
+	pthread_mutex_lock(&irq->lock);
+	irq->active_count--;
+
+	pthread_mutex_lock(&di->di_lintr.lock);
+
+	irq_status = mmio_get_cfgreg32(di, VIRTIO_MMIO_INTERRUPT_STATUS);
+	irq_status &= ~VIRTIO_MMIO_INT_VRING;
+	mmio_set_cfgreg32(di, VIRTIO_MMIO_INTERRUPT_STATUS, irq_status);
+
+#if 0
+	/* MMIO devices do not require deassertions */
+	if (irq->active_count == 0)
+		vm_deassert_irq(di->di_vmctx, di->di_lintr.irq);
+#endif
+
+	pthread_mutex_unlock(&di->di_lintr.lock);
+
+	pthread_mutex_unlock(&irq->lock);
+}
Index: usr.sbin/bhyve/mmio/mmio_uart.c
===================================================================
--- /dev/null
+++ usr.sbin/bhyve/mmio/mmio_uart.c
@@ -0,0 +1,112 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright TODO
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer
+ *    in this position and unchanged.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/linker_set.h>
+
+#include <sys/param.h>
+#include <assert.h>
+#include <stdio.h>
+#include <pthread.h>
+
+#include <machine/vmm.h>
+#include <vmmapi.h>
+
+#include "bhyverun.h"
+#include "debug.h"
+
+#include "mmio_emul.h"
+#include "mmio_virtio.h"
+#include "uart_emul.h"
+
+static void
+mmio_uart_intr_assert(void *arg, uint32_t irq)
+{
+	struct vmctx *ctx = arg;
+
+	vm_assert_irq(ctx, irq, 0);
+}
+
+static void
+mmio_uart_intr_deassert(void *arg, uint32_t irq)
+{
+	struct vmctx *ctx = arg;
+
+	vm_deassert_irq(ctx, irq, 0);
+}
+
+static int
+mmio_uart_init(struct vmctx *ctx, struct mmio_devinst *pi, char *opts)
+{
+	struct uart_softc *sc;
+
+	sc = uart_init(mmio_uart_intr_assert, mmio_uart_intr_deassert, ctx);
+	if (uart_set_backend(sc, "stdio") != 0) {
+		EPRINTLN("Unable to initialize backend '%s' for "
+		    "mmio_uart", "stdio");
+		return (-1);
+	}
+
+	pi->pi_arg = sc;
+	sc->irqno = pi->di_lintr.irq;
+
+	mmio_alloc_mem(pi);
+
+	return (0);
+}
+
+static void
+mmio_uart_write(struct vmctx *ctx, int vcpu, struct mmio_devinst *di,
+		int baridx, uint64_t offset, int size, uint64_t value)
+{
+	struct uart_softc *sc = di->pi_arg;
+	long reg = offset >> 2;
+
+	uart_write(sc, reg, value);
+}
+
+uint64_t
+mmio_uart_read(struct vmctx *ctx, int vcpu, struct mmio_devinst *di,
+		int baridx, uint64_t offset, int size)
+{
+	struct uart_softc *sc = di->pi_arg;
+	long reg = offset >> 2;
+
+	return uart_read(sc, reg);
+}
+
+struct mmio_devemu mmio_uart = {
+	.de_emu =	"mmio-uart",
+	.de_init =	mmio_uart_init,
+	.de_write =	mmio_uart_write,
+	.de_read =	mmio_uart_read
+};
+MMIO_EMUL_SET(mmio_uart);
Index: usr.sbin/bhyve/mmio/mmio_virtio.h
===================================================================
--- /dev/null
+++ usr.sbin/bhyve/mmio/mmio_virtio.h
@@ -0,0 +1,484 @@
+/*-
+ * Copyright (c) 2013  Chris Torek <torek @ torek net>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef	_VIRTIO_H_
+#define	_VIRTIO_H_
+
+#include <machine/atomic.h>
+
+/*
+ * These are derived from several virtio specifications.
+ *
+ * Some useful links:
+ *    https://github.com/rustyrussell/virtio-spec
+ *    http://people.redhat.com/pbonzini/virtio-spec.pdf
+ */
+
+/*
+ * A virtual device has zero or more "virtual queues" (virtqueue).
+ * Each virtqueue uses at least two 4096-byte pages, laid out thus:
+ *
+ *      +-----------------------------------------------+
+ *      |    "desc":  <N> descriptors, 16 bytes each    |
+ *      |   -----------------------------------------   |
+ *      |   "avail":   2 uint16; <N> uint16; 1 uint16   |
+ *      |   -----------------------------------------   |
+ *      |              pad to 4k boundary               |
+ *      +-----------------------------------------------+
+ *      |   "used": 2 x uint16; <N> elems; 1 uint16     |
+ *      |   -----------------------------------------   |
+ *      |              pad to 4k boundary               |
+ *      +-----------------------------------------------+
+ *
+ * The number <N> that appears here is always a power of two and is
+ * limited to no more than 32768 (as it must fit in a 16-bit field).
+ * If <N> is sufficiently large, the above will occupy more than
+ * two pages.  In any case, all pages must be physically contiguous
+ * within the guest's physical address space.
+ *
+ * The <N> 16-byte "desc" descriptors consist of a 64-bit guest
+ * physical address <addr>, a 32-bit length <len>, a 16-bit
+ * <flags>, and a 16-bit <next> field (all in guest byte order).
+ *
+ * There are three flags that may be set :
+ *	NEXT    descriptor is chained, so use its "next" field
+ *	WRITE   descriptor is for host to write into guest RAM
+ *		(else host is to read from guest RAM)
+ *	INDIRECT   descriptor address field is (guest physical)
+ *		address of a linear array of descriptors
+ *
+ * Unless INDIRECT is set, <len> is the number of bytes that may
+ * be read/written from guest physical address <addr>.  If
+ * INDIRECT is set, WRITE is ignored and <len> provides the length
+ * of the indirect descriptors (and <len> must be a multiple of
+ * 16).  Note that NEXT may still be set in the main descriptor
+ * pointing to the indirect, and should be set in each indirect
+ * descriptor that uses the next descriptor (these should generally
+ * be numbered sequentially).  However, INDIRECT must not be set
+ * in the indirect descriptors.  Upon reaching an indirect descriptor
+ * without a NEXT bit, control returns to the direct descriptors.
+ *
+ * Except inside an indirect, each <next> value must be in the
+ * range [0 .. N) (i.e., the half-open interval).  (Inside an
+ * indirect, each <next> must be in the range [0 .. <len>/16).)
+ *
+ * The "avail" data structures reside in the same pages as the
+ * "desc" structures since both together are used by the device to
+ * pass information to the hypervisor's virtual driver.  These
+ * begin with a 16-bit <flags> field and 16-bit index <idx>, then
+ * have <N> 16-bit <ring> values, followed by one final 16-bit
+ * field <used_event>.  The <N> <ring> entries are simply indices
+ * indices into the descriptor ring (and thus must meet the same
+ * constraints as each <next> value).  However, <idx> is counted
+ * up from 0 (initially) and simply wraps around after 65535; it
+ * is taken mod <N> to find the next available entry.
+ *
+ * The "used" ring occupies a separate page or pages, and contains
+ * values written from the virtual driver back to the guest OS.
+ * This begins with a 16-bit <flags> and 16-bit <idx>, then there
+ * are <N> "vring_used" elements, followed by a 16-bit <avail_event>.
+ * The <N> "vring_used" elements consist of a 32-bit <id> and a
+ * 32-bit <len> (vu_tlen below).  The <id> is simply the index of
+ * the head of a descriptor chain the guest made available
+ * earlier, and the <len> is the number of bytes actually written,
+ * e.g., in the case of a network driver that provided a large
+ * receive buffer but received only a small amount of data.
+ *
+ * The two event fields, <used_event> and <avail_event>, in the
+ * avail and used rings (respectively -- note the reversal!), are
+ * always provided, but are used only if the virtual device
+ * negotiates the VIRTIO_RING_F_EVENT_IDX feature during feature
+ * negotiation.  Similarly, both rings provide a flag --
+ * VRING_AVAIL_F_NO_INTERRUPT and VRING_USED_F_NO_NOTIFY -- in
+ * their <flags> field, indicating that the guest does not need an
+ * interrupt, or that the hypervisor driver does not need a
+ * notify, when descriptors are added to the corresponding ring.
+ * (These are provided only for interrupt optimization and need
+ * not be implemented.)
+ */
+
+#define VIRTIO_MMIO_MAGIC_VALUE         0x000
+#define VIRTIO_MMIO_VERSION             0x004
+#define VIRTIO_MMIO_DEVICE_ID           0x008
+#define VIRTIO_MMIO_VENDOR_ID           0x00c
+#define VIRTIO_MMIO_HOST_FEATURES       0x010
+#define VIRTIO_MMIO_HOST_FEATURES_SEL   0x014
+#define VIRTIO_MMIO_GUEST_FEATURES      0x020
+#define VIRTIO_MMIO_GUEST_FEATURES_SEL  0x024
+#define VIRTIO_MMIO_GUEST_PAGE_SIZE     0x028
+#define VIRTIO_MMIO_QUEUE_SEL           0x030
+#define VIRTIO_MMIO_QUEUE_NUM_MAX       0x034
+#define VIRTIO_MMIO_QUEUE_NUM           0x038
+#define VIRTIO_MMIO_QUEUE_ALIGN         0x03c
+#define VIRTIO_MMIO_QUEUE_PFN           0x040
+#define VIRTIO_MMIO_QUEUE_NOTIFY        0x050
+#define VIRTIO_MMIO_INTERRUPT_STATUS    0x060
+#define VIRTIO_MMIO_INTERRUPT_ACK       0x064
+#define VIRTIO_MMIO_STATUS              0x070
+#define VIRTIO_MMIO_CONFIG              0x100
+#define VIRTIO_MMIO_INT_VRING           (1 << 0)
+#define VIRTIO_MMIO_INT_CONFIG          (1 << 1)
+#define VIRTIO_MMIO_VRING_ALIGN         4096
+
+#define VRING_ALIGN	4096
+
+#define VRING_DESC_F_NEXT	(1 << 0)
+#define VRING_DESC_F_WRITE	(1 << 1)
+#define VRING_DESC_F_INDIRECT	(1 << 2)
+
+struct virtio_desc {			/* AKA vring_desc */
+	uint64_t	vd_addr;	/* guest physical address */
+	uint32_t	vd_len;		/* length of scatter/gather seg */
+	uint16_t	vd_flags;	/* VRING_F_DESC_* */
+	uint16_t	vd_next;	/* next desc if F_NEXT */
+} __packed;
+
+struct virtio_used {			/* AKA vring_used_elem */
+	uint32_t	vu_idx;		/* head of used descriptor chain */
+	uint32_t	vu_tlen;	/* length written-to */
+} __packed;
+
+#define VRING_AVAIL_F_NO_INTERRUPT   1
+
+struct vring_avail {
+	uint16_t	va_flags;	/* VRING_AVAIL_F_* */
+	uint16_t	va_idx;		/* counts to 65535, then cycles */
+	uint16_t	va_ring[];	/* size N, reported in QNUM value */
+/*	uint16_t	va_used_event;	-- after N ring entries */
+} __packed;
+
+#define	VRING_USED_F_NO_NOTIFY		1
+struct vring_used {
+	uint16_t	vu_flags;	/* VRING_USED_F_* */
+	uint16_t	vu_idx;		/* counts to 65535, then cycles */
+	struct virtio_used vu_ring[];	/* size N */
+/*	uint16_t	vu_avail_event;	-- after N ring entries */
+} __packed;
+
+/*
+ * The address of any given virtual queue is determined by a single
+ * Page Frame Number register.  The guest writes the PFN into the
+ * PCI config space.  However, a device that has two or more
+ * virtqueues can have a different PFN, and size, for each queue.
+ * The number of queues is determinable via the PCI config space
+ * VTCFG_R_QSEL register.  Writes to QSEL select the queue: 0 means
+ * queue #0, 1 means queue#1, etc.  Once a queue is selected, the
+ * remaining PFN and QNUM registers refer to that queue.
+ *
+ * QNUM is a read-only register containing a nonzero power of two
+ * that indicates the (hypervisor's) queue size.  Or, if reading it
+ * produces zero, the hypervisor does not have a corresponding
+ * queue.  (The number of possible queues depends on the virtual
+ * device.  The block device has just one; the network device
+ * provides either two -- 0 = receive, 1 = transmit -- or three,
+ * with 2 = control.)
+ *
+ * PFN is a read/write register giving the physical page address of
+ * the virtqueue in guest memory (the guest must allocate enough space
+ * based on the hypervisor's provided QNUM).
+ *
+ * QNOTIFY is effectively write-only: when the guest writes a queue
+ * number to the register, the hypervisor should scan the specified
+ * virtqueue. (Reading QNOTIFY currently always gets 0).
+ */
+
+/*
+ * PFN register shift amount
+ */
+#define VRING_PFN               12
+
+/*
+ * Virtio device types
+ *
+ * XXX Should really be merged with <dev/virtio/virtio.h> defines
+ */
+#define	VIRTIO_TYPE_NET		1
+#define	VIRTIO_TYPE_BLOCK	2
+#define	VIRTIO_TYPE_CONSOLE	3
+#define	VIRTIO_TYPE_ENTROPY	4
+#define	VIRTIO_TYPE_BALLOON	5
+#define	VIRTIO_TYPE_IOMEMORY	6
+#define	VIRTIO_TYPE_RPMSG	7
+#define	VIRTIO_TYPE_SCSI	8
+#define	VIRTIO_TYPE_9P		9
+
+/* experimental IDs start at 65535 and work down */
+
+/*
+ * PCI vendor/device IDs
+ */
+#define	VIRTIO_VENDOR		0x1AF4
+#define	VIRTIO_DEV_NET		0x1000
+#define	VIRTIO_DEV_BLOCK	0x1001
+#define	VIRTIO_DEV_CONSOLE	0x1003
+#define	VIRTIO_DEV_RANDOM	0x1005
+
+#define	VIRTIO_MMIO_MAGIC_NUM	0x74726976
+#define	VIRTIO_MMIO_VERSION_NUM	0x1
+
+/*
+ * Bits in VTCFG_R_STATUS.  Guests need not actually set any of these,
+ * but a guest writing 0 to this register means "please reset".
+ */
+#define	VTCFG_STATUS_ACK	0x01	/* guest OS has acknowledged dev */
+#define	VTCFG_STATUS_DRIVER	0x02	/* guest OS driver is loaded */
+#define	VTCFG_STATUS_DRIVER_OK	0x04	/* guest OS driver ready */
+#define	VTCFG_STATUS_FAILED	0x80	/* guest has given up on this dev */
+
+/*
+ * Bits in VTCFG_R_ISR.  These apply only if not using MSI-X.
+ *
+ * (We don't [yet?] ever use CONF_CHANGED.)
+ */
+#define	VTCFG_ISR_QUEUES	0x01	/* re-scan queues */
+#define	VTCFG_ISR_CONF_CHANGED	0x80	/* configuration changed */
+
+#define VIRTIO_MSI_NO_VECTOR	0xFFFF
+
+/*
+ * Feature flags.
+ * Note: bits 0 through 23 are reserved to each device type.
+ */
+#define	VIRTIO_F_NOTIFY_ON_EMPTY	(1 << 24)
+#define	VIRTIO_RING_F_INDIRECT_DESC	(1 << 28)
+#define	VIRTIO_RING_F_EVENT_IDX		(1 << 29)
+
+/* From section 2.3, "Virtqueue Configuration", of the virtio specification */
+static inline size_t
+vring_size(u_int qsz, uint32_t align)
+{
+	size_t size;
+
+	/* constant 3 below = va_flags, va_idx, va_used_event */
+	size = sizeof(struct virtio_desc) * qsz + sizeof(uint16_t) * (3 + qsz);
+	size = roundup2(size, align);
+
+	/* constant 3 below = vu_flags, vu_idx, vu_avail_event */
+	size += sizeof(uint16_t) * 3 + sizeof(struct virtio_used) * qsz;
+	size = roundup2(size, align);
+
+	return (size);
+}
+
+struct vmctx;
+struct mmio_devinst;
+struct vqueue_info;
+
+/*
+ * A virtual device, with some number (possibly 0) of virtual
+ * queues and some size (possibly 0) of configuration-space
+ * registers private to the device.  The virtio_softc should come
+ * at the front of each "derived class", so that a pointer to the
+ * virtio_softc is also a pointer to the more specific, derived-
+ * from-virtio driver's softc.
+ *
+ * Note: inside each hypervisor virtio driver, changes to these
+ * data structures must be locked against other threads, if any.
+ * Except for PCI config space register read/write, we assume each
+ * driver does the required locking, but we need a pointer to the
+ * lock (if there is one) for PCI config space read/write ops.
+ *
+ * When the guest reads or writes the device's config space, the
+ * generic layer checks for operations on the special registers
+ * described above.  If the offset of the register(s) being read
+ * or written is past the CFG area (CFG0 or CFG1), the request is
+ * passed on to the virtual device, after subtracting off the
+ * generic-layer size.  (So, drivers can just use the offset as
+ * an offset into "struct config", for instance.)
+ *
+ * (The virtio layer also makes sure that the read or write is to/
+ * from a "good" config offset, hence vc_cfgsize, and on BAR #0.
+ * However, the driver must verify the read or write size and offset
+ * and that no one is writing a readonly register.)
+ *
+ * The BROKED flag ("this thing done gone and broked") is for future
+ * use.
+ */
+#define	VIRTIO_USE_MSIX		0x01
+#define	VIRTIO_EVENT_IDX	0x02	/* use the event-index values */
+#define	VIRTIO_BROKED		0x08	/* ??? */
+
+struct virtio_softc {
+	struct virtio_consts *vs_vc;	/* constants (see below) */
+	int	vs_flags;		/* VIRTIO_* flags from above */
+	pthread_mutex_t *vs_mtx;	/* POSIX mutex, if any */
+	struct mmio_devinst *vs_di;	/* device instance */
+	uint32_t vs_negotiated_caps;	/* negotiated capabilities */
+	uint32_t vs_align;		/* virtual queue alignment */
+	struct vqueue_info *vs_queues;	/* one per vc_nvq */
+	int	vs_curq;		/* current queue */
+	int	irq;			/* interrupt */
+	uint8_t	vs_status;		/* value from last status write */
+	uint32_t vs_guest_page_size;	/* size of guest page in bytes */
+};
+
+#define	VS_LOCK(vs)							\
+do {									\
+	if (vs->vs_mtx)							\
+		pthread_mutex_lock(vs->vs_mtx);				\
+} while (0)
+
+#define	VS_UNLOCK(vs)							\
+do {									\
+	if (vs->vs_mtx)							\
+		pthread_mutex_unlock(vs->vs_mtx);			\
+} while (0)
+
+struct virtio_consts {
+	const char *vc_name;		/* name of driver (for diagnostics) */
+	int	vc_nvq;			/* number of virtual queues */
+	size_t	vc_cfgsize;		/* size of dev-specific config regs */
+	void	(*vc_reset)(void *);	/* called on virtual device reset */
+	void	(*vc_qnotify)(void *, struct vqueue_info *);
+					/* called on QNOTIFY if no VQ notify */
+	int	(*vc_cfgread)(void *, int, int, uint32_t *);
+					/* called to read config regs */
+	int	(*vc_cfgwrite)(void *, int, int, uint32_t);
+					/* called to write config regs */
+	void    (*vc_apply_features)(void *, uint64_t);
+				/* called to apply negotiated features */
+	uint64_t vc_hv_caps;		/* hypervisor-provided capabilities */
+};
+
+/*
+ * Data structure allocated (statically) per virtual queue.
+ *
+ * Drivers may change vq_qsize after a reset.  When the guest OS
+ * requests a device reset, the hypervisor first calls
+ * vs->vs_vc->vc_reset(); then the data structure below is
+ * reinitialized (for each virtqueue: vs->vs_vc->vc_nvq).
+ *
+ * The remaining fields should only be fussed-with by the generic
+ * code.
+ *
+ * Note: the addresses of vq_desc, vq_avail, and vq_used are all
+ * computable from each other, but it's a lot simpler if we just
+ * keep a pointer to each one.  The event indices are similarly
+ * (but more easily) computable, and this time we'll compute them:
+ * they're just XX_ring[N].
+ */
+#define	VQ_ALLOC	0x01	/* set once we have a pfn */
+#define	VQ_BROKED	0x02	/* ??? */
+struct vqueue_info {
+	uint16_t vq_qsize;	/* size of this queue (a power of 2) */
+	void	(*vq_notify)(void *, struct vqueue_info *);
+				/* called instead of vc_notify, if not NULL */
+
+	struct virtio_softc *vq_vs;	/* backpointer to softc */
+	uint16_t vq_num;	/* we're the num'th queue in the softc */
+
+	uint16_t vq_flags;	/* flags (see above) */
+	uint16_t vq_last_avail;	/* a recent value of vq_avail->va_idx */
+	uint16_t vq_save_used;	/* saved vq_used->vu_idx; see vq_endchains */
+
+	uint32_t vq_pfn;	/* PFN of virt queue (not shifted!) */
+
+	volatile struct virtio_desc *vq_desc;	/* descriptor array */
+	volatile struct vring_avail *vq_avail;	/* the "avail" ring */
+	volatile struct vring_used *vq_used;	/* the "used" ring */
+};
+/* as noted above, these are sort of backwards, name-wise */
+#define VQ_AVAIL_EVENT_IDX(vq) \
+	(*(volatile uint16_t *)&(vq)->vq_used->vu_ring[(vq)->vq_qsize])
+#define VQ_USED_EVENT_IDX(vq) \
+	((vq)->vq_avail->va_ring[(vq)->vq_qsize])
+
+/*
+ * Is this ring ready for I/O?
+ */
+static inline int
+vq_ring_ready(struct vqueue_info *vq)
+{
+
+	return (vq->vq_flags & VQ_ALLOC);
+}
+
+/*
+ * Are there "available" descriptors?  (This does not count
+ * how many, just returns True if there are some.)
+ */
+static inline int
+vq_has_descs(struct vqueue_info *vq)
+{
+
+	return (vq_ring_ready(vq) && vq->vq_last_avail !=
+	    vq->vq_avail->va_idx);
+}
+
+/*
+ * Deliver an interrupt to guest on the given virtual queue
+ * (if possible, or a generic MSI interrupt if not using MSI-X).
+ */
+static inline void
+vq_interrupt(struct virtio_softc *vs, struct vqueue_info *vq)
+{
+	VS_LOCK(vs);
+	mmio_lintr_assert(vs->vs_di);
+	VS_UNLOCK(vs);
+}
+
+static inline void
+vq_kick_enable(struct vqueue_info *vq)
+{
+
+	vq->vq_used->vu_flags &= ~VRING_USED_F_NO_NOTIFY;
+	/*
+	 * Full memory barrier to make sure the store to vu_flags
+	 * happens before the load from va_idx, which results from
+	 * a subsequent call to vq_has_descs().
+	 */
+	atomic_thread_fence_seq_cst();
+}
+
+static inline void
+vq_kick_disable(struct vqueue_info *vq)
+{
+
+	vq->vq_used->vu_flags |= VRING_USED_F_NO_NOTIFY;
+}
+
+struct iovec;
+void	vi_softc_linkup(struct virtio_softc *vs, struct virtio_consts *vc,
+			void *dev_softc, struct mmio_devinst *di,
+			struct vqueue_info *queues);
+int	vi_intr_init(struct virtio_softc *vs, int barnum, int use_msix);
+void	vi_reset_dev(struct virtio_softc *);
+void	vi_set_io_res(struct virtio_softc *, int);
+
+int	vq_getchain(struct vqueue_info *vq, uint16_t *pidx,
+		    struct iovec *iov, int n_iov, uint16_t *flags);
+void	vq_retchains(struct vqueue_info *vq, uint16_t n_chains);
+void	vq_relchain(struct vqueue_info *vq, uint16_t idx, uint32_t iolen);
+void	vq_endchains(struct vqueue_info *vq, int used_all_avail);
+
+uint64_t vi_mmio_read(struct vmctx *ctx, int vcpu, struct mmio_devinst *di,
+			int baridx, uint64_t offset, int size);
+void	vi_mmio_write(struct vmctx *ctx, int vcpu, struct mmio_devinst *di,
+			int baridx, uint64_t offset, int size, uint64_t value);
+void	vi_devemu_init(struct mmio_devinst *di, uint32_t type);
+#endif	/* _VIRTIO_H_ */
Index: usr.sbin/bhyve/mmio/mmio_virtio.c
===================================================================
--- /dev/null
+++ usr.sbin/bhyve/mmio/mmio_virtio.c
@@ -0,0 +1,707 @@
+/*-
+ * Copyright (c) 2013  Chris Torek <torek @ torek net>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/uio.h>
+
+#include <stdio.h>
+#include <stdint.h>
+#include <pthread.h>
+#include <pthread_np.h>
+
+#include "bhyverun.h"
+#include "mmio_emul.h"
+#include "mmio_virtio.h"
+#include "virtio_ids.h"
+
+static int debug_virtio = 0;
+
+#define DPRINTF(fmt, ...) if (debug_virtio) printf(fmt, ##__VA_ARGS__)
+#define CFG_RW_DBG(offset, value)						\
+	DPRINTF("{device} | %-60s | %-35s | %-30s (%jx): value = %jx\r\n",	\
+		__FILE__, __func__, #offset, (uintmax_t)offset, (uintmax_t)value);
+
+/*
+ * Functions for dealing with generalized "virtual devices" as
+ * defined by <https://www.google.com/#output=search&q=virtio+spec>
+ */
+
+/*
+ * In case we decide to relax the "virtio softc comes at the
+ * front of virtio-based device softc" constraint, let's use
+ * this to convert.
+ */
+#define DEV_SOFTC(vs) ((void *)(vs))
+
+/*
+ * Link a virtio_softc to its constants, the device softc, and
+ * the PCI emulation.
+ */
+void
+vi_softc_linkup(struct virtio_softc *vs, struct virtio_consts *vc,
+		void *dev_softc, struct mmio_devinst *di,
+		struct vqueue_info *queues)
+{
+	int i;
+
+	/* vs and dev_softc addresses must match */
+	assert((void *)vs == dev_softc);
+	vs->vs_vc = vc;
+	vs->vs_di = di;
+	di->pi_arg = vs;
+
+	vs->vs_queues = queues;
+	for (i = 0; i < vc->vc_nvq; i++) {
+		queues[i].vq_vs = vs;
+		queues[i].vq_num = i;
+	}
+}
+
+/*
+ * Reset device (device-wide).  This erases all queues, i.e.,
+ * all the queues become invalid (though we don't wipe out the
+ * internal pointers, we just clear the VQ_ALLOC flag).
+ *
+ * It resets negotiated features to "none".
+ */
+void
+vi_reset_dev(struct virtio_softc *vs)
+{
+	struct vqueue_info *vq;
+	int i, nvq;
+
+	if (vs->vs_mtx)
+		assert(pthread_mutex_isowned_np(vs->vs_mtx));
+
+	nvq = vs->vs_vc->vc_nvq;
+	for (vq = vs->vs_queues, i = 0; i < nvq; vq++, i++) {
+		vq->vq_flags = 0;
+		vq->vq_last_avail = 0;
+		vq->vq_save_used = 0;
+		vq->vq_pfn = 0;
+	}
+	vs->vs_negotiated_caps = 0;
+	vs->vs_curq = 0;
+	/* vs->vs_status = 0; -- redundant */
+	mmio_lintr_deassert(vs->vs_di);
+}
+
+void
+vi_set_io_res(struct virtio_softc *vs, int barnum)
+{
+	mmio_alloc_mem(vs->vs_di);
+}
+
+/*
+ * Initialize interrupts for MMIO
+ */
+int
+vi_intr_init(struct virtio_softc *vs, int barnum, int use_msix)
+{
+	/* activate interrupts */
+	mmio_lintr_request(vs->vs_di);
+
+	return (0);
+}
+
+/*
+ * Initialize the currently-selected virtio queue (vs->vs_curq).
+ * The guest just gave us a page frame number, from which we can
+ * calculate the addresses of the queue.
+ */
+void
+vi_vq_init(struct virtio_softc *vs, uint32_t pfn)
+{
+	struct vqueue_info *vq;
+	uint64_t phys;
+	size_t size;
+	char *base;
+
+	vq = &vs->vs_queues[vs->vs_curq];
+	vq->vq_pfn = pfn;
+	phys = (uint64_t)pfn * vs->vs_guest_page_size;
+	size = vring_size(vq->vq_qsize, vs->vs_align);
+	base = paddr_guest2host(vs->vs_di->pi_vmctx, phys, size);
+
+	/* First page(s) are descriptors... */
+	vq->vq_desc = (struct virtio_desc *)base;
+	base += vq->vq_qsize * sizeof(struct virtio_desc);
+
+	/* ... immediately followed by "avail" ring (entirely uint16_t's) */
+	vq->vq_avail = (struct vring_avail *)base;
+	base += (2 + vq->vq_qsize + 1) * sizeof(uint16_t);
+
+	/* Then it's rounded up to the next page... */
+	base = (char *)roundup2((uintptr_t)base, vs->vs_align);
+
+	/* ... and the last page(s) are the used ring. */
+	vq->vq_used = (struct vring_used *)base;
+
+	/* Mark queue as allocated, and start at 0 when we use it. */
+	vq->vq_flags = VQ_ALLOC;
+	vq->vq_last_avail = 0;
+	vq->vq_save_used = 0;
+}
+
+/*
+ * Helper inline for vq_getchain(): record the i'th "real"
+ * descriptor.
+ */
+static inline void
+_vq_record(int i, volatile struct virtio_desc *vd, struct vmctx *ctx,
+	   struct iovec *iov, int n_iov, uint16_t *flags) {
+
+	if (i >= n_iov)
+		return;
+	iov[i].iov_base = paddr_guest2host(ctx, vd->vd_addr, vd->vd_len);
+	iov[i].iov_len = vd->vd_len;
+	if (flags != NULL)
+		flags[i] = vd->vd_flags;
+}
+#define	VQ_MAX_DESCRIPTORS	512	/* see below */
+
+/*
+ * Examine the chain of descriptors starting at the "next one" to
+ * make sure that they describe a sensible request.  If so, return
+ * the number of "real" descriptors that would be needed/used in
+ * acting on this request.  This may be smaller than the number of
+ * available descriptors, e.g., if there are two available but
+ * they are two separate requests, this just returns 1.  Or, it
+ * may be larger: if there are indirect descriptors involved,
+ * there may only be one descriptor available but it may be an
+ * indirect pointing to eight more.  We return 8 in this case,
+ * i.e., we do not count the indirect descriptors, only the "real"
+ * ones.
+ *
+ * Basically, this vets the vd_flags and vd_next field of each
+ * descriptor and tells you how many are involved.  Since some may
+ * be indirect, this also needs the vmctx (in the pci_devinst
+ * at vs->vs_di) so that it can find indirect descriptors.
+ *
+ * As we process each descriptor, we copy and adjust it (guest to
+ * host address wise, also using the vmtctx) into the given iov[]
+ * array (of the given size).  If the array overflows, we stop
+ * placing values into the array but keep processing descriptors,
+ * up to VQ_MAX_DESCRIPTORS, before giving up and returning -1.
+ * So you, the caller, must not assume that iov[] is as big as the
+ * return value (you can process the same thing twice to allocate
+ * a larger iov array if needed, or supply a zero length to find
+ * out how much space is needed).
+ *
+ * If you want to verify the WRITE flag on each descriptor, pass a
+ * non-NULL "flags" pointer to an array of "uint16_t" of the same size
+ * as n_iov and we'll copy each vd_flags field after unwinding any
+ * indirects.
+ *
+ * If some descriptor(s) are invalid, this prints a diagnostic message
+ * and returns -1.  If no descriptors are ready now it simply returns 0.
+ *
+ * You are assumed to have done a vq_ring_ready() if needed (note
+ * that vq_has_descs() does one).
+ */
+int
+vq_getchain(struct vqueue_info *vq, uint16_t *pidx,
+	    struct iovec *iov, int n_iov, uint16_t *flags)
+{
+	int i;
+	u_int ndesc, n_indir;
+	u_int idx, next;
+	volatile struct virtio_desc *vdir, *vindir, *vp;
+	struct vmctx *ctx;
+	struct virtio_softc *vs;
+	const char *name;
+
+	vs = vq->vq_vs;
+	name = vs->vs_vc->vc_name;
+
+	/*
+	 * Note: it's the responsibility of the guest not to
+	 * update vq->vq_avail->va_idx until all of the descriptors
+         * the guest has written are valid (including all their
+         * vd_next fields and vd_flags).
+	 *
+	 * Compute (last_avail - va_idx) in integers mod 2**16.  This is
+	 * the number of descriptors the device has made available
+	 * since the last time we updated vq->vq_last_avail.
+	 *
+	 * We just need to do the subtraction as an unsigned int,
+	 * then trim off excess bits.
+	 */
+	idx = vq->vq_last_avail;
+	ndesc = (uint16_t)((u_int)vq->vq_avail->va_idx - idx);
+	if (ndesc == 0)
+		return (0);
+	if (ndesc > vq->vq_qsize) {
+		/* XXX need better way to diagnose issues */
+		fprintf(stderr,
+		    "%s: ndesc (%u) out of range, driver confused?\r\n",
+		    name, (u_int)ndesc);
+		return (-1);
+	}
+
+	/*
+	 * Now count/parse "involved" descriptors starting from
+	 * the head of the chain.
+	 *
+	 * To prevent loops, we could be more complicated and
+	 * check whether we're re-visiting a previously visited
+	 * index, but we just abort if the count gets excessive.
+	 */
+	ctx = vs->vs_di->pi_vmctx;
+	*pidx = next = vq->vq_avail->va_ring[idx & (vq->vq_qsize - 1)];
+	vq->vq_last_avail++;
+	for (i = 0; i < VQ_MAX_DESCRIPTORS; next = vdir->vd_next) {
+		if (next >= vq->vq_qsize) {
+			fprintf(stderr,
+			    "%s: descriptor index %u out of range, "
+			    "driver confused?\r\n",
+			    name, next);
+			return (-1);
+		}
+		vdir = &vq->vq_desc[next];
+		if ((vdir->vd_flags & VRING_DESC_F_INDIRECT) == 0) {
+			_vq_record(i, vdir, ctx, iov, n_iov, flags);
+			i++;
+		} else if ((vs->vs_vc->vc_hv_caps &
+		    VIRTIO_RING_F_INDIRECT_DESC) == 0) {
+			fprintf(stderr,
+			    "%s: descriptor has forbidden INDIRECT flag, "
+			    "driver confused?\r\n",
+			    name);
+			return (-1);
+		} else {
+			n_indir = vdir->vd_len / 16;
+			if ((vdir->vd_len & 0xf) || n_indir == 0) {
+				fprintf(stderr,
+				    "%s: invalid indir len 0x%x, "
+				    "driver confused?\r\n",
+				    name, (u_int)vdir->vd_len);
+				return (-1);
+			}
+			vindir = paddr_guest2host(ctx,
+			    vdir->vd_addr, vdir->vd_len);
+			/*
+			 * Indirects start at the 0th, then follow
+			 * their own embedded "next"s until those run
+			 * out.  Each one's indirect flag must be off
+			 * (we don't really have to check, could just
+			 * ignore errors...).
+			 */
+			next = 0;
+			for (;;) {
+				vp = &vindir[next];
+				if (vp->vd_flags & VRING_DESC_F_INDIRECT) {
+					fprintf(stderr,
+					    "%s: indirect desc has INDIR flag,"
+					    " driver confused?\r\n",
+					    name);
+					return (-1);
+				}
+				_vq_record(i, vp, ctx, iov, n_iov, flags);
+				if (++i > VQ_MAX_DESCRIPTORS)
+					goto loopy;
+				if ((vp->vd_flags & VRING_DESC_F_NEXT) == 0)
+					break;
+				next = vp->vd_next;
+				if (next >= n_indir) {
+					fprintf(stderr,
+					    "%s: invalid next %u > %u, "
+					    "driver confused?\r\n",
+					    name, (u_int)next, n_indir);
+					return (-1);
+				}
+			}
+		}
+		if ((vdir->vd_flags & VRING_DESC_F_NEXT) == 0)
+			return (i);
+	}
+loopy:
+	fprintf(stderr,
+	    "%s: descriptor loop? count > %d - driver confused?\r\n",
+	    name, i);
+	return (-1);
+}
+
+/*
+ * Return the currently-first request chain back to the available queue.
+ *
+ * (This chain is the one you handled when you called vq_getchain()
+ * and used its positive return value.)
+ */
+void
+vq_retchains(struct vqueue_info *vq, uint16_t n_chains)
+{
+
+	vq->vq_last_avail -= n_chains;
+}
+
+/*
+ * Return specified request chain to the guest, setting its I/O length
+ * to the provided value.
+ *
+ * (This chain is the one you handled when you called vq_getchain()
+ * and used its positive return value.)
+ */
+void
+vq_relchain(struct vqueue_info *vq, uint16_t idx, uint32_t iolen)
+{
+	uint16_t uidx, mask;
+	volatile struct vring_used *vuh;
+	volatile struct virtio_used *vue;
+
+	/*
+	 * Notes:
+	 *  - mask is N-1 where N is a power of 2 so computes x % N
+	 *  - vuh points to the "used" data shared with guest
+	 *  - vue points to the "used" ring entry we want to update
+	 *  - head is the same value we compute in vq_iovecs().
+	 *
+	 * (I apologize for the two fields named vu_idx; the
+	 * virtio spec calls the one that vue points to, "id"...)
+	 */
+	mask = vq->vq_qsize - 1;
+	vuh = vq->vq_used;
+
+	uidx = vuh->vu_idx;
+	vue = &vuh->vu_ring[uidx++ & mask];
+	vue->vu_idx = idx;
+	vue->vu_tlen = iolen;
+	vuh->vu_idx = uidx;
+}
+
+/*
+ * Driver has finished processing "available" chains and calling
+ * vq_relchain on each one.  If driver used all the available
+ * chains, used_all should be set.
+ *
+ * If the "used" index moved we may need to inform the guest, i.e.,
+ * deliver an interrupt.  Even if the used index did NOT move we
+ * may need to deliver an interrupt, if the avail ring is empty and
+ * we are supposed to interrupt on empty.
+ *
+ * Note that used_all_avail is provided by the caller because it's
+ * a snapshot of the ring state when he decided to finish interrupt
+ * processing -- it's possible that descriptors became available after
+ * that point.  (It's also typically a constant 1/True as well.)
+ */
+void
+vq_endchains(struct vqueue_info *vq, int used_all_avail)
+{
+	struct virtio_softc *vs;
+	uint16_t event_idx, new_idx, old_idx;
+	int intr;
+
+	/*
+	 * Interrupt generation: if we're using EVENT_IDX,
+	 * interrupt if we've crossed the event threshold.
+	 * Otherwise interrupt is generated if we added "used" entries,
+	 * but suppressed by VRING_AVAIL_F_NO_INTERRUPT.
+	 *
+	 * In any case, though, if NOTIFY_ON_EMPTY is set and the
+	 * entire avail was processed, we need to interrupt always.
+	 */
+	vs = vq->vq_vs;
+	old_idx = vq->vq_save_used;
+	vq->vq_save_used = new_idx = vq->vq_used->vu_idx;
+	if (used_all_avail &&
+	    (vs->vs_negotiated_caps & VIRTIO_F_NOTIFY_ON_EMPTY))
+		intr = 1;
+	else if (vs->vs_negotiated_caps & VIRTIO_RING_F_EVENT_IDX) {
+		event_idx = VQ_USED_EVENT_IDX(vq);
+		/*
+		 * This calculation is per docs and the kernel
+		 * (see src/sys/dev/virtio/virtio_ring.h).
+		 */
+		intr = (uint16_t)(new_idx - event_idx - 1) <
+			(uint16_t)(new_idx - old_idx);
+	} else {
+		intr = new_idx != old_idx &&
+		    !(vq->vq_avail->va_flags & VRING_AVAIL_F_NO_INTERRUPT);
+	}
+	if (intr)
+		vq_interrupt(vs, vq);
+}
+
+/*
+ * Handle pci config space reads.
+ * If it's to the interrupt system, do that
+ * If it's part of the virtio standard stuff, do that.
+ * Otherwise dispatch to the actual driver.
+ */
+uint64_t
+vi_mmio_read(struct vmctx *ctx, int vcpu, struct mmio_devinst *di,
+	       int baridx, uint64_t offset, int size)
+{
+	struct virtio_softc *vs = di->pi_arg;
+	struct virtio_consts *vc;
+	const char *name;
+	uint64_t sel;
+	uint32_t value;
+	int error;
+
+	if (vs->vs_mtx)
+		pthread_mutex_lock(vs->vs_mtx);
+
+	vc = vs->vs_vc;
+	name = vc->vc_name;
+	value = size == 1 ? 0xff : size == 2 ? 0xffff : 0xffffffff;
+
+	if (size != 1 && size != 2 && size != 4)
+		goto bad;
+
+	if (offset >= VIRTIO_MMIO_CONFIG) {
+		error = (*vc->vc_cfgread)(DEV_SOFTC(vs),
+					  offset - VIRTIO_MMIO_CONFIG,
+					  size,
+					  &value);
+		if (error)
+			goto bad;
+
+		CFG_RW_DBG(offset, value);
+		goto done;
+	}
+
+	switch (offset) {
+	case VIRTIO_MMIO_MAGIC_VALUE:
+		value = mmio_get_cfgreg32(di, offset);
+		CFG_RW_DBG(VIRTIO_MMIO_MAGIC_VALUE, value);
+		break;
+	case VIRTIO_MMIO_VERSION:
+		value = mmio_get_cfgreg32(di, offset);
+		CFG_RW_DBG(VIRTIO_MMIO_VERSION, value);
+		break;
+	case VIRTIO_MMIO_DEVICE_ID:
+		value = mmio_get_cfgreg32(di, offset);
+		CFG_RW_DBG(VIRTIO_MMIO_DEVICE_ID, value);
+		break;
+	case VIRTIO_MMIO_VENDOR_ID:
+		value = mmio_get_cfgreg32(di, offset);
+		CFG_RW_DBG(VIRTIO_MMIO_VENDOR_ID, value);
+		break;
+	case VIRTIO_MMIO_INTERRUPT_STATUS:
+		value = mmio_get_cfgreg32(di, offset);
+		CFG_RW_DBG(VIRTIO_MMIO_INTERRUPT_STATUS, value);
+		break;
+	case VIRTIO_MMIO_STATUS:
+		value = mmio_get_cfgreg32(di, offset);
+		CFG_RW_DBG(VIRTIO_MMIO_STATUS, value);
+		break;
+	case VIRTIO_MMIO_HOST_FEATURES:
+		sel = mmio_get_cfgreg32(di, VIRTIO_MMIO_HOST_FEATURES_SEL);
+		value = (vc->vc_hv_caps >> (32 * sel)) & 0xffffffff;
+		CFG_RW_DBG(VIRTIO_MMIO_HOST_FEATURES, value);
+		break;
+	case VIRTIO_MMIO_QUEUE_NUM_MAX:
+		value = vs->vs_curq < vc->vc_nvq ?
+			vs->vs_queues[vs->vs_curq].vq_qsize : 0;
+		CFG_RW_DBG(VIRTIO_MMIO_QUEUE_NUM_MAX, value);
+		break;
+	case VIRTIO_MMIO_QUEUE_PFN:
+		value = vs->vs_curq < vc->vc_nvq ?
+			vs->vs_queues[vs->vs_curq].vq_pfn : 0;
+		CFG_RW_DBG(VIRTIO_MMIO_QUEUE_PFN, value);
+		break;
+	default:
+		CFG_RW_DBG(offset, value);
+		goto bad;
+		break;
+	}
+
+	goto done;
+
+bad:
+	fprintf(stderr, "%s: read from bad offset/size: %jd/%d\r\n",
+		name, (uintmax_t)offset, size);
+
+done:
+	if (vs->vs_mtx)
+		pthread_mutex_unlock(vs->vs_mtx);
+	return (value);
+}
+
+/*
+ * Handle pci config space writes.
+ * If it's to the MSI-X info, do that.
+ * If it's part of the virtio standard stuff, do that.
+ * Otherwise dispatch to the actual driver.
+ */
+void
+vi_mmio_write(struct vmctx *ctx, int vcpu, struct mmio_devinst *di,
+		int baridx, uint64_t offset, int size, uint64_t value)
+{
+	struct virtio_softc *vs = di->pi_arg;
+	struct vqueue_info *vq;
+	struct virtio_consts *vc;
+	const char *name;
+	int error;
+
+	if (vs->vs_mtx)
+		pthread_mutex_lock(vs->vs_mtx);
+
+	vc = vs->vs_vc;
+	name = vc->vc_name;
+
+	if (size != 1 && size != 2 && size != 4)
+		goto bad;
+
+	if (offset >= VIRTIO_MMIO_CONFIG) {
+		error = (*vc->vc_cfgwrite)(DEV_SOFTC(vs),
+					   offset - VIRTIO_MMIO_CONFIG,
+					   size, value);
+		if (error)
+			goto bad;
+
+		CFG_RW_DBG(offset, value);
+		goto done;
+	}
+
+	switch (offset) {
+	case VIRTIO_MMIO_HOST_FEATURES_SEL:
+		CFG_RW_DBG(VIRTIO_MMIO_HOST_FEATURES_SEL, value);
+		mmio_set_cfgreg32(di, offset, value);
+		break;
+	case VIRTIO_MMIO_GUEST_FEATURES_SEL:
+		CFG_RW_DBG(VIRTIO_MMIO_GUEST_FEATURES_SEL, value);
+		mmio_set_cfgreg32(di, offset, value);
+		break;
+	case VIRTIO_MMIO_INTERRUPT_ACK:
+		CFG_RW_DBG(VIRTIO_MMIO_INTERRUPT_ACK, value);
+		mmio_lintr_deassert(di);
+		mmio_set_cfgreg32(di, offset, value);
+		break;
+	case VIRTIO_MMIO_STATUS:
+		CFG_RW_DBG(VIRTIO_MMIO_STATUS, value);
+		mmio_set_cfgreg32(di, offset, value);
+		vs->vs_status = value;
+		if (value == 0)
+			(*vc->vc_reset)(DEV_SOFTC(vs));
+		break;
+	case VIRTIO_MMIO_QUEUE_NUM:
+		CFG_RW_DBG(VIRTIO_MMIO_QUEUE_NUM, value);
+		mmio_set_cfgreg32(di, offset, value);
+		vq = &vs->vs_queues[vs->vs_curq];
+		vq->vq_qsize = value;
+		break;
+	case VIRTIO_MMIO_GUEST_FEATURES:
+		CFG_RW_DBG(VIRTIO_MMIO_GUEST_FEATURES, value);
+		mmio_set_cfgreg32(di, offset, value);
+		vs->vs_negotiated_caps = value & vc->vc_hv_caps;
+		if (vc->vc_apply_features)
+			(*vc->vc_apply_features)(DEV_SOFTC(vs),
+			    vs->vs_negotiated_caps);
+		break;
+	case VIRTIO_MMIO_GUEST_PAGE_SIZE:
+		mmio_set_cfgreg32(di, offset, value);
+		vs->vs_guest_page_size = value;
+		break;
+	case VIRTIO_MMIO_QUEUE_SEL:
+		CFG_RW_DBG(VIRTIO_MMIO_QUEUE_SEL, value);
+		mmio_set_cfgreg32(di, offset, value);
+		/*
+		 * Note that the guest is allowed to select an
+		 * invalid queue; we just need to return a QNUM
+		 * of 0 while the bad queue is selected.
+		 */
+		vs->vs_curq = value;
+		break;
+	case VIRTIO_MMIO_QUEUE_ALIGN:
+		CFG_RW_DBG(VIRTIO_MMIO_QUEUE_ALIGN, value);
+		mmio_set_cfgreg32(di, offset, value);
+		vs->vs_align = value;
+		break;
+	case VIRTIO_MMIO_QUEUE_PFN:
+		CFG_RW_DBG(VIRTIO_MMIO_QUEUE_PFN, value);
+		mmio_set_cfgreg32(di, offset, value);
+		if (vs->vs_curq >= vc->vc_nvq)
+			fprintf(stderr, "%s: curq %d >= max %d\r\n",
+				name, vs->vs_curq, vc->vc_nvq);
+		else
+			vi_vq_init(vs, value);
+		break;
+	case VIRTIO_MMIO_QUEUE_NOTIFY:
+		CFG_RW_DBG(VIRTIO_MMIO_QUEUE_NOTIFY, value);
+		if (value >= vc->vc_nvq) {
+			fprintf(stderr, "%s: queue %d notify out of range\r\n",
+				name, (int)value);
+			break;
+		}
+		mmio_set_cfgreg32(di, offset, value);
+		vq = &vs->vs_queues[value];
+		if (vq->vq_notify)
+			(*vq->vq_notify)(DEV_SOFTC(vs), vq);
+		else if (vc->vc_qnotify)
+			(*vc->vc_qnotify)(DEV_SOFTC(vs), vq);
+		else
+			fprintf(stderr,
+			    "%s: qnotify queue %d: missing vq/vc notify\r\n",
+				name, (int)value);
+		break;
+	default:
+		CFG_RW_DBG(offset, value);
+		goto bad;
+		break;
+	}
+
+	goto done;
+
+bad:
+	fprintf(stderr, "%s: write to bad offset/size %jd/%d\r\n",
+		name, (uintmax_t)offset, size);
+done:
+	if (vs->vs_mtx)
+		pthread_mutex_unlock(vs->vs_mtx);
+}
+
+void
+vi_devemu_init(struct mmio_devinst *di, uint32_t type)
+{
+	uint32_t id;
+
+	switch (type) {
+	case VIRTIO_TYPE_NET:
+		id = VIRTIO_ID_NETWORK;
+		break;
+	case VIRTIO_TYPE_BLOCK:
+		id = VIRTIO_ID_BLOCK;
+		break;
+	case VIRTIO_TYPE_CONSOLE:
+		id = VIRTIO_ID_CONSOLE;
+		break;
+	case VIRTIO_TYPE_ENTROPY:
+		id = VIRTIO_ID_ENTROPY;
+		break;
+	default:
+		return;
+	}
+
+	mmio_set_cfgreg32(di, VIRTIO_MMIO_MAGIC_VALUE, VIRTIO_MMIO_MAGIC_NUM);
+	mmio_set_cfgreg32(di, VIRTIO_MMIO_VERSION, VIRTIO_MMIO_VERSION_NUM);
+	mmio_set_cfgreg32(di, VIRTIO_MMIO_DEVICE_ID, id);
+	mmio_set_cfgreg32(di, VIRTIO_MMIO_VENDOR_ID, VIRTIO_VENDOR);
+}
Index: usr.sbin/bhyve/mmio/mmio_virtio_block.c
===================================================================
--- /dev/null
+++ usr.sbin/bhyve/mmio/mmio_virtio_block.c
@@ -0,0 +1,424 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2011 NetApp, Inc.
+ * All rights reserved.
+ * Copyright (c) 2019 Joyent, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/linker_set.h>
+#include <sys/stat.h>
+#include <sys/uio.h>
+#include <sys/ioctl.h>
+#include <sys/disk.h>
+
+#include <errno.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <string.h>
+#include <strings.h>
+#include <unistd.h>
+#include <assert.h>
+#include <pthread.h>
+#include <md5.h>
+#include <dev/pci/pcireg.h>
+
+#include "bhyverun.h"
+#include "debug.h"
+
+#include "mmio_emul.h"
+#include "mmio_virtio.h"
+
+#include "block_if.h"
+
+#define VTBLK_RINGSZ	128
+
+_Static_assert(VTBLK_RINGSZ <= BLOCKIF_RING_MAX, "Each ring entry must be able to queue a request");
+
+#define VTBLK_S_OK	0
+#define VTBLK_S_IOERR	1
+#define	VTBLK_S_UNSUPP	2
+
+#define	VTBLK_BLK_ID_BYTES	20 + 1
+
+/* Capability bits */
+#define	VTBLK_F_SEG_MAX		(1 << 2)	/* Maximum request segments */
+#define	VTBLK_F_BLK_SIZE	(1 << 6)	/* cfg block size valid */
+#define	VTBLK_F_FLUSH		(1 << 9)	/* Cache flush support */
+#define	VTBLK_F_TOPOLOGY	(1 << 10)	/* Optimal I/O alignment */
+
+/*
+ * Host capabilities
+ */
+#define VTBLK_S_HOSTCAPS      \
+  ( VTBLK_F_SEG_MAX  |						    \
+    VTBLK_F_BLK_SIZE |						    \
+    VTBLK_F_FLUSH    |						    \
+    VTBLK_F_TOPOLOGY |						    \
+    VIRTIO_RING_F_INDIRECT_DESC )	/* indirect descriptors */
+
+/*
+ * Config space "registers"
+ */
+struct vtblk_config {
+	uint64_t	vbc_capacity;
+	uint32_t	vbc_size_max;
+	uint32_t	vbc_seg_max;
+	struct {
+		uint16_t cylinders;
+		uint8_t heads;
+		uint8_t sectors;
+	} vbc_geometry;
+	uint32_t	vbc_blk_size;
+	struct {
+		uint8_t physical_block_exp;
+		uint8_t alignment_offset;
+		uint16_t min_io_size;
+		uint32_t opt_io_size;
+	} vbc_topology;
+	uint8_t		vbc_writeback;
+} __packed;
+
+/*
+ * Fixed-size block header
+ */
+struct virtio_blk_hdr {
+#define	VBH_OP_READ		0
+#define	VBH_OP_WRITE		1
+#define	VBH_OP_FLUSH		4
+#define	VBH_OP_FLUSH_OUT	5
+#define	VBH_OP_IDENT		8
+#define	VBH_FLAG_BARRIER	0x80000000	/* OR'ed into vbh_type */
+	uint32_t	vbh_type;
+	uint32_t	vbh_ioprio;
+	uint64_t	vbh_sector;
+} __packed;
+
+/*
+ * Debug printf
+ */
+static int pci_vtblk_debug;
+#define DPRINTF(params) if (pci_vtblk_debug) PRINTLN params
+#define WPRINTF(params) PRINTLN params
+
+struct pci_vtblk_ioreq {
+	struct blockif_req		io_req;
+	struct pci_vtblk_softc		*io_sc;
+	uint8_t				*io_status;
+	uint16_t			io_idx;
+};
+
+/*
+ * Per-device softc
+ */
+struct pci_vtblk_softc {
+	struct virtio_softc vbsc_vs;
+	pthread_mutex_t vsc_mtx;
+	struct vqueue_info vbsc_vq;
+	struct vtblk_config vbsc_cfg;
+	struct blockif_ctxt *bc;
+	char vbsc_ident[VTBLK_BLK_ID_BYTES];
+	struct pci_vtblk_ioreq vbsc_ios[VTBLK_RINGSZ];
+};
+
+static void pci_vtblk_reset(void *);
+static void pci_vtblk_notify(void *, struct vqueue_info *);
+static int pci_vtblk_cfgread(void *, int, int, uint32_t *);
+static int pci_vtblk_cfgwrite(void *, int, int, uint32_t);
+
+static struct virtio_consts vtblk_vi_consts = {
+	"vtblk",		/* our name */
+	1,			/* we support 1 virtqueue */
+	sizeof(struct vtblk_config),	/* config reg size */
+	pci_vtblk_reset,	/* reset */
+	pci_vtblk_notify,	/* device-wide qnotify */
+	pci_vtblk_cfgread,	/* read PCI config */
+	pci_vtblk_cfgwrite,	/* write PCI config */
+	NULL,			/* apply negotiated features */
+	VTBLK_S_HOSTCAPS,	/* our capabilities */
+};
+
+static void
+pci_vtblk_reset(void *vsc)
+{
+	struct pci_vtblk_softc *sc = vsc;
+
+	DPRINTF(("vtblk: device reset requested !"));
+	vi_reset_dev(&sc->vbsc_vs);
+}
+
+static void
+pci_vtblk_done(struct blockif_req *br, int err)
+{
+	struct pci_vtblk_ioreq *io = br->br_param;
+	struct pci_vtblk_softc *sc = io->io_sc;
+
+	/* convert errno into a virtio block error return */
+	if (err == EOPNOTSUPP || err == ENOSYS)
+		*io->io_status = VTBLK_S_UNSUPP;
+	else if (err != 0)
+		*io->io_status = VTBLK_S_IOERR;
+	else
+		*io->io_status = VTBLK_S_OK;
+
+	/*
+	 * Return the descriptor back to the host.
+	 * We wrote 1 byte (our status) to host.
+	 */
+	pthread_mutex_lock(&sc->vsc_mtx);
+	vq_relchain(&sc->vbsc_vq, io->io_idx, 1);
+	vq_endchains(&sc->vbsc_vq, 0);
+	pthread_mutex_unlock(&sc->vsc_mtx);
+}
+
+static void
+pci_vtblk_proc(struct pci_vtblk_softc *sc, struct vqueue_info *vq)
+{
+	struct virtio_blk_hdr *vbh;
+	struct pci_vtblk_ioreq *io;
+	int i, n;
+	int err;
+	ssize_t iolen;
+	int writeop, type;
+	struct iovec iov[BLOCKIF_IOV_MAX + 2];
+	uint16_t idx, flags[BLOCKIF_IOV_MAX + 2];
+
+	n = vq_getchain(vq, &idx, iov, BLOCKIF_IOV_MAX + 2, flags);
+
+	/*
+	 * The first descriptor will be the read-only fixed header,
+	 * and the last is for status (hence +2 above and below).
+	 * The remaining iov's are the actual data I/O vectors.
+	 *
+	 * XXX - note - this fails on crash dump, which does a
+	 * VIRTIO_BLK_T_FLUSH with a zero transfer length
+	 */
+	assert(n >= 2 && n <= BLOCKIF_IOV_MAX + 2);
+
+	io = &sc->vbsc_ios[idx];
+	assert((flags[0] & VRING_DESC_F_WRITE) == 0);
+	assert(iov[0].iov_len == sizeof(struct virtio_blk_hdr));
+	vbh = iov[0].iov_base;
+	memcpy(&io->io_req.br_iov, &iov[1], sizeof(struct iovec) * (n - 2));
+	io->io_req.br_iovcnt = n - 2;
+	io->io_req.br_offset = vbh->vbh_sector * DEV_BSIZE;
+	io->io_status = iov[--n].iov_base;
+	assert(iov[n].iov_len == 1);
+	assert(flags[n] & VRING_DESC_F_WRITE);
+
+	/*
+	 * XXX
+	 * The guest should not be setting the BARRIER flag because
+	 * we don't advertise the capability.
+	 */
+	type = vbh->vbh_type & ~VBH_FLAG_BARRIER;
+	writeop = (type == VBH_OP_WRITE);
+
+	iolen = 0;
+	for (i = 1; i < n; i++) {
+		/*
+		 * - write op implies read-only descriptor,
+		 * - read/ident op implies write-only descriptor,
+		 * therefore test the inverse of the descriptor bit
+		 * to the op.
+		 */
+		assert(((flags[i] & VRING_DESC_F_WRITE) == 0) == writeop);
+		iolen += iov[i].iov_len;
+	}
+	io->io_req.br_resid = iolen;
+
+	DPRINTF(("virtio-block: %s op, %zd bytes, %d segs, offset %lld",
+		 writeop ? "write" : "read/ident", iolen, i - 1,
+		 (long long) io->io_req.br_offset));
+
+	switch (type) {
+	case VBH_OP_READ:
+		err = blockif_read(sc->bc, &io->io_req);
+		break;
+	case VBH_OP_WRITE:
+		err = blockif_write(sc->bc, &io->io_req);
+		break;
+	case VBH_OP_FLUSH:
+	case VBH_OP_FLUSH_OUT:
+		err = blockif_flush(sc->bc, &io->io_req);
+		break;
+	case VBH_OP_IDENT:
+		/* Assume a single buffer */
+		/* S/n equal to buffer is not zero-terminated. */
+		memset(iov[1].iov_base, 0, iov[1].iov_len);
+		strncpy(iov[1].iov_base, sc->vbsc_ident,
+		    MIN(iov[1].iov_len, sizeof(sc->vbsc_ident)));
+		pci_vtblk_done(&io->io_req, 0);
+		return;
+	default:
+		pci_vtblk_done(&io->io_req, EOPNOTSUPP);
+		return;
+	}
+	assert(err == 0);
+}
+
+static void
+pci_vtblk_notify(void *vsc, struct vqueue_info *vq)
+{
+	struct pci_vtblk_softc *sc = vsc;
+
+	while (vq_has_descs(vq))
+		pci_vtblk_proc(sc, vq);
+}
+
+static int
+pci_vtblk_init(struct vmctx *ctx, struct mmio_devinst *pi, char *opts)
+{
+	char bident[sizeof("XX:X:X")];
+	struct blockif_ctxt *bctxt;
+	MD5_CTX mdctx;
+	u_char digest[16];
+	struct pci_vtblk_softc *sc;
+	off_t size;
+	int i, sectsz, sts, sto;
+
+	if (opts == NULL) {
+		WPRINTF(("virtio-block: backing device required"));
+		return (1);
+	}
+
+	/*
+	 * The supplied backing file has to exist
+	 */
+	snprintf(bident, sizeof(bident), "%d:%d", pi->pi_slot, pi->di_func);
+	bctxt = blockif_open(opts, bident);
+	if (bctxt == NULL) {
+		perror("Could not open backing file");
+		return (1);
+	}
+
+	size = blockif_size(bctxt);
+	sectsz = blockif_sectsz(bctxt);
+	blockif_psectsz(bctxt, &sts, &sto);
+
+	sc = calloc(1, sizeof(struct pci_vtblk_softc));
+	sc->bc = bctxt;
+	for (i = 0; i < VTBLK_RINGSZ; i++) {
+		struct pci_vtblk_ioreq *io = &sc->vbsc_ios[i];
+		io->io_req.br_callback = pci_vtblk_done;
+		io->io_req.br_param = io;
+		io->io_sc = sc;
+		io->io_idx = i;
+	}
+
+	pthread_mutex_init(&sc->vsc_mtx, NULL);
+
+	/* init virtio softc and virtqueues */
+	vi_softc_linkup(&sc->vbsc_vs, &vtblk_vi_consts, sc, pi, &sc->vbsc_vq);
+	sc->vbsc_vs.vs_mtx = &sc->vsc_mtx;
+
+	sc->vbsc_vq.vq_qsize = VTBLK_RINGSZ;
+	/* sc->vbsc_vq.vq_notify = we have no per-queue notify */
+
+	/*
+	 * Create an identifier for the backing file. Use parts of the
+	 * md5 sum of the filename
+	 */
+	MD5Init(&mdctx);
+	MD5Update(&mdctx, opts, strlen(opts));
+	MD5Final(digest, &mdctx);
+	snprintf(sc->vbsc_ident, VTBLK_BLK_ID_BYTES,
+	    "BHYVE-%02X%02X-%02X%02X-%02X%02X",
+	    digest[0], digest[1], digest[2], digest[3], digest[4], digest[5]);
+
+	/* setup virtio block config space */
+	sc->vbsc_cfg.vbc_capacity = size / DEV_BSIZE; /* 512-byte units */
+	sc->vbsc_cfg.vbc_size_max = 0;	/* not negotiated */
+
+	/*
+	 * If Linux is presented with a seg_max greater than the virtio queue
+	 * size, it can stumble into situations where it violates its own
+	 * invariants and panics.  For safety, we keep seg_max clamped, paying
+	 * heed to the two extra descriptors needed for the header and status
+	 * of a request.
+	 */
+	sc->vbsc_cfg.vbc_seg_max = MIN(VTBLK_RINGSZ - 2, BLOCKIF_IOV_MAX);
+	sc->vbsc_cfg.vbc_geometry.cylinders = 0;	/* no geometry */
+	sc->vbsc_cfg.vbc_geometry.heads = 0;
+	sc->vbsc_cfg.vbc_geometry.sectors = 0;
+	sc->vbsc_cfg.vbc_blk_size = sectsz;
+	sc->vbsc_cfg.vbc_topology.physical_block_exp =
+	    (sts > sectsz) ? (ffsll(sts / sectsz) - 1) : 0;
+	sc->vbsc_cfg.vbc_topology.alignment_offset =
+	    (sto != 0) ? ((sts - sto) / sectsz) : 0;
+	sc->vbsc_cfg.vbc_topology.min_io_size = 0;
+	sc->vbsc_cfg.vbc_topology.opt_io_size = 0;
+	sc->vbsc_cfg.vbc_writeback = 0;
+
+	/*
+	 * Should we move some of this into virtio.c?  Could
+	 * have the device, class, and subdev_0 as fields in
+	 * the virtio constants structure.
+	 */
+	vi_devemu_init(pi, VIRTIO_TYPE_BLOCK);
+
+	if (vi_intr_init(&sc->vbsc_vs, 1, fbsdrun_virtio_msix())) {
+		blockif_close(sc->bc);
+		free(sc);
+		return (1);
+	}
+	vi_set_io_res(&sc->vbsc_vs, 0);
+	return (0);
+}
+
+static int
+pci_vtblk_cfgwrite(void *vsc, int offset, int size, uint32_t value)
+{
+
+	DPRINTF(("vtblk: write to readonly reg %d", offset));
+	return (1);
+}
+
+static int
+pci_vtblk_cfgread(void *vsc, int offset, int size, uint32_t *retval)
+{
+	struct pci_vtblk_softc *sc = vsc;
+	void *ptr;
+
+	/* our caller has already verified offset and size */
+	ptr = (uint8_t *)&sc->vbsc_cfg + offset;
+	memcpy(retval, ptr, size);
+	return (0);
+}
+
+struct mmio_devemu pci_de_vblk = {
+	.de_emu =	"virtio-blk",
+	.de_init =	pci_vtblk_init,
+	.de_write =	vi_mmio_write,
+	.de_read =	vi_mmio_read
+};
+MMIO_EMUL_SET(pci_de_vblk);
Index: usr.sbin/bhyve/mmio/mmio_virtio_console.c
===================================================================
--- /dev/null
+++ usr.sbin/bhyve/mmio/mmio_virtio_console.c
@@ -0,0 +1,680 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2016 iXsystems Inc.
+ * All rights reserved.
+ *
+ * This software was developed by Jakub Klama <jceel@FreeBSD.org>
+ * under sponsorship from iXsystems Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer
+ *    in this position and unchanged.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#ifndef WITHOUT_CAPSICUM
+#include <sys/capsicum.h>
+#endif
+#include <sys/linker_set.h>
+#include <sys/uio.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+#include <dev/pci/pcireg.h>
+
+#ifndef WITHOUT_CAPSICUM
+#include <capsicum_helpers.h>
+#endif
+#include <err.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <string.h>
+#include <unistd.h>
+#include <assert.h>
+#include <pthread.h>
+#include <libgen.h>
+#include <sysexits.h>
+
+#include "bhyverun.h"
+#include "debug.h"
+
+#include "mmio_emul.h"
+#include "mmio_virtio.h"
+
+#include "mevent.h"
+#include "sockstream.h"
+
+#define	VTCON_RINGSZ	64
+#define	VTCON_MAXPORTS	16
+#define	VTCON_MAXQ	(VTCON_MAXPORTS * 2 + 2)
+
+#define	VTCON_DEVICE_READY	0
+#define	VTCON_DEVICE_ADD	1
+#define	VTCON_DEVICE_REMOVE	2
+#define	VTCON_PORT_READY	3
+#define	VTCON_CONSOLE_PORT	4
+#define	VTCON_CONSOLE_RESIZE	5
+#define	VTCON_PORT_OPEN		6
+#define	VTCON_PORT_NAME		7
+
+#define	VTCON_F_SIZE		0
+#define	VTCON_F_MULTIPORT	1
+#define	VTCON_F_EMERG_WRITE	2
+#define	VTCON_S_HOSTCAPS	\
+    (VTCON_F_SIZE | VTCON_F_MULTIPORT | VTCON_F_EMERG_WRITE)
+
+static int pci_vtcon_debug;
+#define DPRINTF(params) if (pci_vtcon_debug) PRINTLN params
+#define WPRINTF(params) PRINTLN params
+
+struct pci_vtcon_softc;
+struct pci_vtcon_port;
+struct pci_vtcon_config;
+typedef void (pci_vtcon_cb_t)(struct pci_vtcon_port *, void *, struct iovec *,
+    int);
+
+struct pci_vtcon_port {
+	struct pci_vtcon_softc * vsp_sc;
+	int                      vsp_id;
+	const char *             vsp_name;
+	bool                     vsp_enabled;
+	bool                     vsp_console;
+	bool                     vsp_rx_ready;
+	bool                     vsp_open;
+	int                      vsp_rxq;
+	int                      vsp_txq;
+	void *                   vsp_arg;
+	pci_vtcon_cb_t *         vsp_cb;
+};
+
+struct pci_vtcon_sock
+{
+	struct pci_vtcon_port *  vss_port;
+	const char *             vss_path;
+	struct mevent *          vss_server_evp;
+	struct mevent *          vss_conn_evp;
+	int                      vss_server_fd;
+	int                      vss_conn_fd;
+	bool                     vss_open;
+};
+
+struct pci_vtcon_softc {
+	struct virtio_softc      vsc_vs;
+	struct vqueue_info       vsc_queues[VTCON_MAXQ];
+	pthread_mutex_t          vsc_mtx;
+	uint64_t                 vsc_cfg;
+	uint64_t                 vsc_features;
+	char *                   vsc_rootdir;
+	int                      vsc_kq;
+	int                      vsc_nports;
+	bool                     vsc_ready;
+	struct pci_vtcon_port    vsc_control_port;
+	struct pci_vtcon_port    vsc_ports[VTCON_MAXPORTS];
+	struct pci_vtcon_config *vsc_config;
+};
+
+struct pci_vtcon_config {
+	uint16_t cols;
+	uint16_t rows;
+	uint32_t max_nr_ports;
+	uint32_t emerg_wr;
+} __attribute__((packed));
+
+struct pci_vtcon_control {
+	uint32_t id;
+	uint16_t event;
+	uint16_t value;
+} __attribute__((packed));
+
+struct pci_vtcon_console_resize {
+	uint16_t cols;
+	uint16_t rows;
+} __attribute__((packed));
+
+static void pci_vtcon_reset(void *);
+static void pci_vtcon_notify_rx(void *, struct vqueue_info *);
+static void pci_vtcon_notify_tx(void *, struct vqueue_info *);
+static int pci_vtcon_cfgread(void *, int, int, uint32_t *);
+static int pci_vtcon_cfgwrite(void *, int, int, uint32_t);
+static void pci_vtcon_neg_features(void *, uint64_t);
+static void pci_vtcon_sock_accept(int, enum ev_type,  void *);
+static void pci_vtcon_sock_rx(int, enum ev_type, void *);
+static void pci_vtcon_sock_tx(struct pci_vtcon_port *, void *, struct iovec *,
+    int);
+static void pci_vtcon_control_send(struct pci_vtcon_softc *,
+    struct pci_vtcon_control *, const void *, size_t);
+static void pci_vtcon_announce_port(struct pci_vtcon_port *);
+static void pci_vtcon_open_port(struct pci_vtcon_port *, bool);
+
+static struct virtio_consts vtcon_vi_consts = {
+	"vtcon",		/* our name */
+	VTCON_MAXQ,		/* we support VTCON_MAXQ virtqueues */
+	sizeof(struct pci_vtcon_config), /* config reg size */
+	pci_vtcon_reset,	/* reset */
+	NULL,			/* device-wide qnotify */
+	pci_vtcon_cfgread,	/* read virtio config */
+	pci_vtcon_cfgwrite,	/* write virtio config */
+	pci_vtcon_neg_features,	/* apply negotiated features */
+	VTCON_S_HOSTCAPS,	/* our capabilities */
+};
+
+
+static void
+pci_vtcon_reset(void *vsc)
+{
+	struct pci_vtcon_softc *sc;
+
+	sc = vsc;
+
+	DPRINTF(("vtcon: device reset requested!"));
+	vi_reset_dev(&sc->vsc_vs);
+}
+
+static void
+pci_vtcon_neg_features(void *vsc, uint64_t negotiated_features)
+{
+	struct pci_vtcon_softc *sc = vsc;
+
+	sc->vsc_features = negotiated_features;
+}
+
+static int
+pci_vtcon_cfgread(void *vsc, int offset, int size, uint32_t *retval)
+{
+	struct pci_vtcon_softc *sc = vsc;
+	void *ptr;
+
+	ptr = (uint8_t *)sc->vsc_config + offset;
+	memcpy(retval, ptr, size);
+	return (0);
+}
+
+static int
+pci_vtcon_cfgwrite(void *vsc, int offset, int size, uint32_t val)
+{
+
+	return (0);
+}
+
+static inline struct pci_vtcon_port *
+pci_vtcon_vq_to_port(struct pci_vtcon_softc *sc, struct vqueue_info *vq)
+{
+	uint16_t num = vq->vq_num;
+
+	if (num == 0 || num == 1)
+		return (&sc->vsc_ports[0]);
+
+	if (num == 2 || num == 3)
+		return (&sc->vsc_control_port);
+
+	return (&sc->vsc_ports[(num / 2) - 1]);
+}
+
+static inline struct vqueue_info *
+pci_vtcon_port_to_vq(struct pci_vtcon_port *port, bool tx_queue)
+{
+	int qnum;
+
+	qnum = tx_queue ? port->vsp_txq : port->vsp_rxq;
+	return (&port->vsp_sc->vsc_queues[qnum]);
+}
+
+static struct pci_vtcon_port *
+pci_vtcon_port_add(struct pci_vtcon_softc *sc, const char *name,
+    pci_vtcon_cb_t *cb, void *arg)
+{
+	struct pci_vtcon_port *port;
+
+	if (sc->vsc_nports == VTCON_MAXPORTS) {
+		errno = EBUSY;
+		return (NULL);
+	}
+
+	port = &sc->vsc_ports[sc->vsc_nports++];
+	port->vsp_id = sc->vsc_nports - 1;
+	port->vsp_sc = sc;
+	port->vsp_name = name;
+	port->vsp_cb = cb;
+	port->vsp_arg = arg;
+
+	if (port->vsp_id == 0) {
+		/* port0 */
+		port->vsp_txq = 0;
+		port->vsp_rxq = 1;
+	} else {
+		port->vsp_txq = sc->vsc_nports * 2;
+		port->vsp_rxq = port->vsp_txq + 1;
+	}
+
+	port->vsp_enabled = true;
+	return (port);
+}
+
+static int
+pci_vtcon_sock_add(struct pci_vtcon_softc *sc, const char *name,
+    const char *path)
+{
+	struct pci_vtcon_sock *sock;
+	struct sockaddr_un sun;
+	char *pathcopy;
+	int s = -1, fd = -1, error = 0;
+#ifndef WITHOUT_CAPSICUM
+	cap_rights_t rights;
+#endif
+
+	sock = calloc(1, sizeof(struct pci_vtcon_sock));
+	if (sock == NULL) {
+		error = -1;
+		goto out;
+	}
+
+	s = socket(AF_UNIX, SOCK_STREAM, 0);
+	if (s < 0) {
+		error = -1;
+		goto out;
+	}
+
+	pathcopy = strdup(path);
+	if (pathcopy == NULL) {
+		error = -1;
+		goto out;
+	}
+
+	fd = open(dirname(pathcopy), O_RDONLY | O_DIRECTORY);
+	if (fd < 0) {
+		free(pathcopy);
+		error = -1;
+		goto out;
+	}
+
+	sun.sun_family = AF_UNIX;
+	sun.sun_len = sizeof(struct sockaddr_un);
+	strcpy(pathcopy, path);
+	strlcpy(sun.sun_path, basename(pathcopy), sizeof(sun.sun_path));
+	free(pathcopy);
+
+	if (bindat(fd, s, (struct sockaddr *)&sun, sun.sun_len) < 0) {
+		error = -1;
+		goto out;
+	}
+
+	if (fcntl(s, F_SETFL, O_NONBLOCK) < 0) {
+		error = -1;
+		goto out;
+	}
+
+	if (listen(s, 1) < 0) {
+		error = -1;
+		goto out;
+	}
+
+#ifndef WITHOUT_CAPSICUM
+	cap_rights_init(&rights, CAP_ACCEPT, CAP_EVENT, CAP_READ, CAP_WRITE);
+	if (caph_rights_limit(s, &rights) == -1)
+		errx(EX_OSERR, "Unable to apply rights for sandbox");
+#endif
+
+	sock->vss_port = pci_vtcon_port_add(sc, name, pci_vtcon_sock_tx, sock);
+	if (sock->vss_port == NULL) {
+		error = -1;
+		goto out;
+	}
+
+	sock->vss_open = false;
+	sock->vss_conn_fd = -1;
+	sock->vss_server_fd = s;
+	sock->vss_server_evp = mevent_add(s, EVF_READ, pci_vtcon_sock_accept,
+	    sock);
+
+	if (sock->vss_server_evp == NULL) {
+		error = -1;
+		goto out;
+	}
+
+out:
+	if (fd != -1)
+		close(fd);
+
+	if (error != 0) {
+		if (s != -1)
+			close(s);
+		free(sock);
+	}
+
+	return (error);
+}
+
+static void
+pci_vtcon_sock_accept(int fd __unused, enum ev_type t __unused, void *arg)
+{
+	struct pci_vtcon_sock *sock = (struct pci_vtcon_sock *)arg;
+	int s;
+
+	s = accept(sock->vss_server_fd, NULL, NULL);
+	if (s < 0)
+		return;
+
+	if (sock->vss_open) {
+		close(s);
+		return;
+	}
+
+	sock->vss_open = true;
+	sock->vss_conn_fd = s;
+	sock->vss_conn_evp = mevent_add(s, EVF_READ, pci_vtcon_sock_rx, sock);
+
+	pci_vtcon_open_port(sock->vss_port, true);
+}
+
+static void
+pci_vtcon_sock_rx(int fd __unused, enum ev_type t __unused, void *arg)
+{
+	struct pci_vtcon_port *port;
+	struct pci_vtcon_sock *sock = (struct pci_vtcon_sock *)arg;
+	struct vqueue_info *vq;
+	struct iovec iov;
+	static char dummybuf[2048];
+	int len, n;
+	uint16_t idx;
+
+	port = sock->vss_port;
+	vq = pci_vtcon_port_to_vq(port, true);
+
+	if (!sock->vss_open || !port->vsp_rx_ready) {
+		len = read(sock->vss_conn_fd, dummybuf, sizeof(dummybuf));
+		if (len == 0)
+			goto close;
+
+		return;
+	}
+
+	if (!vq_has_descs(vq)) {
+		len = read(sock->vss_conn_fd, dummybuf, sizeof(dummybuf));
+		vq_endchains(vq, 1);
+		if (len == 0)
+			goto close;
+
+		return;
+	}
+
+	do {
+		n = vq_getchain(vq, &idx, &iov, 1, NULL);
+		len = readv(sock->vss_conn_fd, &iov, n);
+
+		if (len == 0 || (len < 0 && errno == EWOULDBLOCK)) {
+			vq_retchains(vq, 1);
+			vq_endchains(vq, 0);
+			if (len == 0)
+				goto close;
+
+			return;
+		}
+
+		vq_relchain(vq, idx, len);
+	} while (vq_has_descs(vq));
+
+	vq_endchains(vq, 1);
+
+close:
+	mevent_delete_close(sock->vss_conn_evp);
+	sock->vss_conn_fd = -1;
+	sock->vss_open = false;
+}
+
+static void
+pci_vtcon_sock_tx(struct pci_vtcon_port *port, void *arg, struct iovec *iov,
+    int niov)
+{
+	struct pci_vtcon_sock *sock;
+	int i, ret;
+
+	sock = (struct pci_vtcon_sock *)arg;
+
+	if (sock->vss_conn_fd == -1)
+		return;
+
+	for (i = 0; i < niov; i++) {
+		ret = stream_write(sock->vss_conn_fd, iov[i].iov_base,
+		    iov[i].iov_len);
+		if (ret <= 0)
+			break;
+	}
+
+	if (ret <= 0) {
+		mevent_delete_close(sock->vss_conn_evp);
+		sock->vss_conn_fd = -1;
+		sock->vss_open = false;
+	}
+}
+
+static void
+pci_vtcon_control_tx(struct pci_vtcon_port *port, void *arg, struct iovec *iov,
+    int niov)
+{
+	struct pci_vtcon_softc *sc;
+	struct pci_vtcon_port *tmp;
+	struct pci_vtcon_control resp, *ctrl;
+	int i;
+
+	assert(niov == 1);
+
+	sc = port->vsp_sc;
+	ctrl = (struct pci_vtcon_control *)iov->iov_base;
+
+	switch (ctrl->event) {
+	case VTCON_DEVICE_READY:
+		sc->vsc_ready = true;
+		/* set port ready events for registered ports */
+		for (i = 0; i < VTCON_MAXPORTS; i++) {
+			tmp = &sc->vsc_ports[i];
+			if (tmp->vsp_enabled)
+				pci_vtcon_announce_port(tmp);
+
+			if (tmp->vsp_open)
+				pci_vtcon_open_port(tmp, true);
+		}
+		break;
+
+	case VTCON_PORT_READY:
+		if (ctrl->id >= sc->vsc_nports) {
+			WPRINTF(("VTCON_PORT_READY event for unknown port %d",
+			    ctrl->id));
+			return;
+		}
+
+		tmp = &sc->vsc_ports[ctrl->id];
+		if (tmp->vsp_console) {
+			resp.event = VTCON_CONSOLE_PORT;
+			resp.id = ctrl->id;
+			resp.value = 1;
+			pci_vtcon_control_send(sc, &resp, NULL, 0);
+		}
+		break;
+	}
+}
+
+static void
+pci_vtcon_announce_port(struct pci_vtcon_port *port)
+{
+	struct pci_vtcon_control event;
+
+	event.id = port->vsp_id;
+	event.event = VTCON_DEVICE_ADD;
+	event.value = 1;
+	pci_vtcon_control_send(port->vsp_sc, &event, NULL, 0);
+
+	event.event = VTCON_PORT_NAME;
+	pci_vtcon_control_send(port->vsp_sc, &event, port->vsp_name,
+	    strlen(port->vsp_name));
+}
+
+static void
+pci_vtcon_open_port(struct pci_vtcon_port *port, bool open)
+{
+	struct pci_vtcon_control event;
+
+	if (!port->vsp_sc->vsc_ready) {
+		port->vsp_open = true;
+		return;
+	}
+
+	event.id = port->vsp_id;
+	event.event = VTCON_PORT_OPEN;
+	event.value = (int)open;
+	pci_vtcon_control_send(port->vsp_sc, &event, NULL, 0);
+}
+
+static void
+pci_vtcon_control_send(struct pci_vtcon_softc *sc,
+    struct pci_vtcon_control *ctrl, const void *payload, size_t len)
+{
+	struct vqueue_info *vq;
+	struct iovec iov;
+	uint16_t idx;
+	int n;
+
+	vq = pci_vtcon_port_to_vq(&sc->vsc_control_port, true);
+
+	if (!vq_has_descs(vq))
+		return;
+
+	n = vq_getchain(vq, &idx, &iov, 1, NULL);
+
+	assert(n == 1);
+
+	memcpy(iov.iov_base, ctrl, sizeof(struct pci_vtcon_control));
+	if (payload != NULL && len > 0)
+		memcpy(iov.iov_base + sizeof(struct pci_vtcon_control),
+		     payload, len);
+
+	vq_relchain(vq, idx, sizeof(struct pci_vtcon_control) + len);
+	vq_endchains(vq, 1);
+}
+
+static void
+pci_vtcon_notify_tx(void *vsc, struct vqueue_info *vq)
+{
+	struct pci_vtcon_softc *sc;
+	struct pci_vtcon_port *port;
+	struct iovec iov[1];
+	uint16_t idx, n;
+	uint16_t flags[8];
+
+	sc = vsc;
+	port = pci_vtcon_vq_to_port(sc, vq);
+
+	while (vq_has_descs(vq)) {
+		n = vq_getchain(vq, &idx, iov, 1, flags);
+		assert(n >= 1);
+		if (port != NULL)
+			port->vsp_cb(port, port->vsp_arg, iov, 1);
+
+		/*
+		 * Release this chain and handle more
+		 */
+		vq_relchain(vq, idx, 0);
+	}
+	vq_endchains(vq, 1);	/* Generate interrupt if appropriate. */
+}
+
+static void
+pci_vtcon_notify_rx(void *vsc, struct vqueue_info *vq)
+{
+	struct pci_vtcon_softc *sc;
+	struct pci_vtcon_port *port;
+
+	sc = vsc;
+	port = pci_vtcon_vq_to_port(sc, vq);
+
+	if (!port->vsp_rx_ready) {
+		port->vsp_rx_ready = 1;
+		vq_kick_disable(vq);
+	}
+}
+
+static int
+pci_vtcon_init(struct vmctx *ctx, struct mmio_devinst *pi, char *opts)
+{
+	struct pci_vtcon_softc *sc;
+	char *portname = NULL;
+	char *portpath = NULL;
+	char *opt;
+	int i;
+
+	sc = calloc(1, sizeof(struct pci_vtcon_softc));
+	sc->vsc_config = calloc(1, sizeof(struct pci_vtcon_config));
+	sc->vsc_config->max_nr_ports = VTCON_MAXPORTS;
+	sc->vsc_config->cols = 80;
+	sc->vsc_config->rows = 25;
+
+	vi_softc_linkup(&sc->vsc_vs, &vtcon_vi_consts, sc, pi, sc->vsc_queues);
+	sc->vsc_vs.vs_mtx = &sc->vsc_mtx;
+
+	for (i = 0; i < VTCON_MAXQ; i++) {
+		sc->vsc_queues[i].vq_qsize = VTCON_RINGSZ;
+		sc->vsc_queues[i].vq_notify = i % 2 == 0
+		    ? pci_vtcon_notify_rx
+		    : pci_vtcon_notify_tx;
+	}
+
+	/* initialize config space */
+	vi_devemu_init(pi, VIRTIO_TYPE_CONSOLE);
+
+	if (vi_intr_init(&sc->vsc_vs, 1, fbsdrun_virtio_msix()))
+		return (1);
+	vi_set_io_res(&sc->vsc_vs, 0);
+
+	/* create control port */
+	sc->vsc_control_port.vsp_sc = sc;
+	sc->vsc_control_port.vsp_txq = 2;
+	sc->vsc_control_port.vsp_rxq = 3;
+	sc->vsc_control_port.vsp_cb = pci_vtcon_control_tx;
+	sc->vsc_control_port.vsp_enabled = true;
+
+	while ((opt = strsep(&opts, ",")) != NULL) {
+		portname = strsep(&opt, "=");
+		portpath = opt;
+
+		/* create port */
+		if (pci_vtcon_sock_add(sc, portname, portpath) < 0) {
+			EPRINTLN("cannot create port %s: %s",
+			    portname, strerror(errno));
+			return (1);
+		}
+	}
+
+	return (0);
+}
+
+struct mmio_devemu pci_de_vcon = {
+	.de_emu =	"virtio-console",
+	.de_init =	pci_vtcon_init,
+	.de_write =	vi_mmio_write,
+	.de_read =	vi_mmio_read
+};
+MMIO_EMUL_SET(pci_de_vcon);
Index: usr.sbin/bhyve/mmio/mmio_virtio_net.c
===================================================================
--- /dev/null
+++ usr.sbin/bhyve/mmio/mmio_virtio_net.c
@@ -0,0 +1,697 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2011 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/linker_set.h>
+#include <sys/select.h>
+#include <sys/uio.h>
+#include <sys/ioctl.h>
+#include <net/ethernet.h>
+#include <net/if.h> /* IFNAMSIZ */
+
+#include <err.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <string.h>
+#include <strings.h>
+#include <unistd.h>
+#include <assert.h>
+#include <pthread.h>
+#include <pthread_np.h>
+#include <dev/pci/pcireg.h>
+
+#include "bhyverun.h"
+#include "debug.h"
+
+#include "mmio_emul.h"
+#include "mmio_virtio.h"
+
+#include "mevent.h"
+#include "net_utils.h"
+#include "net_backends.h"
+#include "iov.h"
+
+#define VTNET_RINGSZ	1024
+
+#define VTNET_MAXSEGS	256
+
+#define VTNET_MAX_PKT_LEN	(65536 + 64)
+
+#define VTNET_S_HOSTCAPS      \
+  ( VIRTIO_NET_F_MAC | VIRTIO_NET_F_STATUS | \
+    VIRTIO_F_NOTIFY_ON_EMPTY | VIRTIO_RING_F_INDIRECT_DESC)
+
+/*
+ * PCI config-space "registers"
+ */
+struct virtio_net_config {
+	uint8_t  mac[6];
+	uint16_t status;
+} __packed;
+
+/*
+ * Queue definitions.
+ */
+#define VTNET_RXQ	0
+#define VTNET_TXQ	1
+#define VTNET_CTLQ	2	/* NB: not yet supported */
+
+#define VTNET_MAXQ	3
+
+/*
+ * Debug printf
+ */
+static int pci_vtnet_debug;
+#define DPRINTF(params) if (pci_vtnet_debug) PRINTLN params
+#define WPRINTF(params) PRINTLN params
+
+/*
+ * Per-device softc
+ */
+struct pci_vtnet_softc {
+	struct virtio_softc vsc_vs;
+	struct vqueue_info vsc_queues[VTNET_MAXQ - 1];
+	pthread_mutex_t vsc_mtx;
+
+	net_backend_t	*vsc_be;
+
+	int		resetting;	/* protected by tx_mtx */
+
+	uint64_t	vsc_features;	/* negotiated features */
+
+	pthread_mutex_t	rx_mtx;
+	int		rx_merge;	/* merged rx bufs in use */
+
+	pthread_t 	tx_tid;
+	pthread_mutex_t	tx_mtx;
+	pthread_cond_t	tx_cond;
+	int		tx_in_progress;
+
+	size_t		vhdrlen;
+	size_t		be_vhdrlen;
+
+	struct virtio_net_config vsc_config;
+	struct virtio_consts vsc_consts;
+};
+
+static void pci_vtnet_reset(void *);
+/* static void pci_vtnet_notify(void *, struct vqueue_info *); */
+static int pci_vtnet_cfgread(void *, int, int, uint32_t *);
+static int pci_vtnet_cfgwrite(void *, int, int, uint32_t);
+static void pci_vtnet_neg_features(void *, uint64_t);
+
+static struct virtio_consts vtnet_vi_consts = {
+	"vtnet",		/* our name */
+	VTNET_MAXQ - 1,		/* we currently support 2 virtqueues */
+	sizeof(struct virtio_net_config), /* config reg size */
+	pci_vtnet_reset,	/* reset */
+	NULL,			/* device-wide qnotify -- not used */
+	pci_vtnet_cfgread,	/* read PCI config */
+	pci_vtnet_cfgwrite,	/* write PCI config */
+	pci_vtnet_neg_features,	/* apply negotiated features */
+	VTNET_S_HOSTCAPS,	/* our capabilities */
+};
+
+static void
+pci_vtnet_reset(void *vsc)
+{
+	struct pci_vtnet_softc *sc = vsc;
+
+	DPRINTF(("vtnet: device reset requested !"));
+
+	/* Acquire the RX lock to block RX processing. */
+	pthread_mutex_lock(&sc->rx_mtx);
+
+	/*
+	 * Make sure receive operation is disabled at least until we
+	 * re-negotiate the features, since receive operation depends
+	 * on the value of sc->rx_merge and the header length, which
+	 * are both set in pci_vtnet_neg_features().
+	 * Receive operation will be enabled again once the guest adds
+	 * the first receive buffers and kicks us.
+	 */
+	netbe_rx_disable(sc->vsc_be);
+
+	/* Set sc->resetting and give a chance to the TX thread to stop. */
+	pthread_mutex_lock(&sc->tx_mtx);
+	sc->resetting = 1;
+	while (sc->tx_in_progress) {
+		pthread_mutex_unlock(&sc->tx_mtx);
+		usleep(10000);
+		pthread_mutex_lock(&sc->tx_mtx);
+	}
+
+	/*
+	 * Now reset rings, MSI-X vectors, and negotiated capabilities.
+	 * Do that with the TX lock held, since we need to reset
+	 * sc->resetting.
+	 */
+	vi_reset_dev(&sc->vsc_vs);
+
+	sc->resetting = 0;
+	pthread_mutex_unlock(&sc->tx_mtx);
+	pthread_mutex_unlock(&sc->rx_mtx);
+}
+
+static __inline struct iovec *
+iov_trim_hdr(struct iovec *iov, int *iovcnt, unsigned int hlen)
+{
+	struct iovec *riov;
+
+	if (iov[0].iov_len < hlen) {
+		/*
+		 * Not enough header space in the first fragment.
+		 * That's not ok for us.
+		 */
+		return NULL;
+	}
+
+	iov[0].iov_len -= hlen;
+	if (iov[0].iov_len == 0) {
+		*iovcnt -= 1;
+		if (*iovcnt == 0) {
+			/*
+			 * Only space for the header. That's not
+			 * enough for us.
+			 */
+			return NULL;
+		}
+		riov = &iov[1];
+	} else {
+		iov[0].iov_base = (void *)((uintptr_t)iov[0].iov_base + hlen);
+		riov = &iov[0];
+	}
+
+	return (riov);
+}
+
+struct virtio_mrg_rxbuf_info {
+	uint16_t idx;
+	uint16_t pad;
+	uint32_t len;
+};
+
+static void
+pci_vtnet_rx(struct pci_vtnet_softc *sc)
+{
+	int prepend_hdr_len = sc->vhdrlen - sc->be_vhdrlen;
+	struct virtio_mrg_rxbuf_info info[VTNET_MAXSEGS];
+	struct iovec iov[VTNET_MAXSEGS + 1];
+	struct vqueue_info *vq;
+
+
+
+	vq = &sc->vsc_queues[VTNET_RXQ];
+	for (;;) {
+		struct virtio_net_rxhdr *hdr;
+		uint32_t riov_bytes;
+		struct iovec *riov;
+		uint32_t ulen;
+		int riov_len;
+		int n_chains;
+		ssize_t rlen;
+		ssize_t plen;
+
+		plen = netbe_peek_recvlen(sc->vsc_be);
+		if (plen <= 0) {
+			/*
+			 * No more packets (plen == 0), or backend errored
+			 * (plen < 0). Interrupt if needed and stop.
+			 */
+			vq_endchains(vq, /*used_all_avail=*/0);
+			return;
+		}
+		plen += prepend_hdr_len;
+
+		/*
+		 * Get a descriptor chain to store the next ingress
+		 * packet. In case of mergeable rx buffers, get as
+		 * many chains as necessary in order to make room
+		 * for a maximum sized LRO packet.
+		 */
+		riov_bytes = 0;
+		riov_len = 0;
+		riov = iov;
+		n_chains = 0;
+		do {
+			int n = vq_getchain(vq, &info[n_chains].idx, riov,
+			    VTNET_MAXSEGS - riov_len, NULL);
+
+			if (n == 0) {
+				/*
+				 * No rx buffers. Enable RX kicks and double
+				 * check.
+				 */
+				vq_kick_enable(vq);
+				if (!vq_has_descs(vq)) {
+					/*
+					 * Still no buffers. Return the unused
+					 * chains (if any), interrupt if needed
+					 * (including for NOTIFY_ON_EMPTY), and
+					 * disable the backend until the next
+					 * kick.
+					 */
+					vq_retchains(vq, n_chains);
+					vq_endchains(vq, /*used_all_avail=*/1);
+					netbe_rx_disable(sc->vsc_be);
+					return;
+				}
+
+				/* More rx buffers found, so keep going. */
+				vq_kick_disable(vq);
+				continue;
+			}
+			assert(n >= 1 && riov_len + n <= VTNET_MAXSEGS);
+			riov_len += n;
+			if (!sc->rx_merge) {
+				n_chains = 1;
+				break;
+			}
+			info[n_chains].len = (uint32_t)count_iov(riov, n);
+			riov_bytes += info[n_chains].len;
+			riov += n;
+			n_chains++;
+		} while (riov_bytes < plen && riov_len < VTNET_MAXSEGS);
+
+		riov = iov;
+		hdr = riov[0].iov_base;
+		if (prepend_hdr_len > 0) {
+			/*
+			 * The frontend uses a virtio-net header, but the
+			 * backend does not. We need to prepend a zeroed
+			 * header.
+			 */
+			riov = iov_trim_hdr(riov, &riov_len, prepend_hdr_len);
+			if (riov == NULL) {
+				/*
+				 * The first collected chain is nonsensical,
+				 * as it is not even enough to store the
+				 * virtio-net header. Just drop it.
+				 */
+				vq_relchain(vq, info[0].idx, 0);
+				vq_retchains(vq, n_chains - 1);
+				continue;
+			}
+			memset(hdr, 0, prepend_hdr_len);
+		}
+
+		rlen = netbe_recv(sc->vsc_be, riov, riov_len);
+
+		if (rlen != plen - prepend_hdr_len) {
+			/*
+			 * No more packets (len == 0), or backend errored
+			 * (err < 0). Return unused available buffers
+			 * and stop.
+			 */
+			vq_retchains(vq, n_chains);
+			/* Interrupt if needed/appropriate and stop. */
+			vq_endchains(vq, /*used_all_avail=*/0);
+			return;
+		}
+
+		ulen = (uint32_t)plen; /* avoid too many casts below */
+
+		/* Publish the used buffers to the guest. */
+		if (!sc->rx_merge) {
+			vq_relchain(vq, info[0].idx, ulen);
+		} else {
+			uint32_t iolen;
+			int i = 0;
+
+			do {
+				iolen = info[i].len;
+				if (iolen > ulen) {
+					iolen = ulen;
+				}
+				vq_relchain(vq, info[i].idx, iolen);
+				ulen -= iolen;
+				i++;
+			} while (ulen > 0);
+
+			hdr->vrh_bufs = i;
+			// TODO add publish for arm64
+			//vq_relchain_publish(vq);
+			vq_retchains(vq, n_chains - i);
+		}
+	}
+
+}
+
+/*
+ * Called when there is read activity on the backend file descriptor.
+ * Each buffer posted by the guest is assumed to be able to contain
+ * an entire ethernet frame + rx header.
+ */
+static void
+pci_vtnet_rx_callback(int fd, enum ev_type type, void *param)
+{
+	struct pci_vtnet_softc *sc = param;
+
+	pthread_mutex_lock(&sc->rx_mtx);
+	pci_vtnet_rx(sc);
+	pthread_mutex_unlock(&sc->rx_mtx);
+
+}
+
+/* Called on RX kick. */
+static void
+pci_vtnet_ping_rxq(void *vsc, struct vqueue_info *vq)
+{
+	struct pci_vtnet_softc *sc = vsc;
+
+	/*
+	 * A qnotify means that the rx process can now begin.
+	 */
+	pthread_mutex_lock(&sc->rx_mtx);
+	vq_kick_disable(vq);
+	netbe_rx_enable(sc->vsc_be);
+	pthread_mutex_unlock(&sc->rx_mtx);
+}
+
+/* TX virtqueue processing, called by the TX thread. */
+static void
+pci_vtnet_proctx(struct pci_vtnet_softc *sc, struct vqueue_info *vq)
+{
+	struct iovec iov[VTNET_MAXSEGS + 1];
+	struct iovec *siov = iov;
+	uint16_t idx;
+	ssize_t len;
+	int n;
+
+	/*
+	 * Obtain chain of descriptors. The first descriptor also
+	 * contains the virtio-net header.
+	 */
+	n = vq_getchain(vq, &idx, iov, VTNET_MAXSEGS, NULL);
+	assert(n >= 1 && n <= VTNET_MAXSEGS);
+
+	if (sc->vhdrlen != sc->be_vhdrlen) {
+		/*
+		 * The frontend uses a virtio-net header, but the backend
+		 * does not. We simply strip the header and ignore it, as
+		 * it should be zero-filled.
+		 */
+		siov = iov_trim_hdr(siov, &n, sc->vhdrlen);
+	}
+
+	if (siov == NULL) {
+		/* The chain is nonsensical. Just drop it. */
+		len = 0;
+	} else {
+		len = netbe_send(sc->vsc_be, siov, n);
+		if (len < 0) {
+			/*
+			 * If send failed, report that 0 bytes
+			 * were read.
+			 */
+			len = 0;
+		}
+	}
+
+	/*
+	 * Return the processed chain to the guest, reporting
+	 * the number of bytes that we read.
+	 */
+	vq_relchain(vq, idx, len > 0 ? len : 0);
+}
+
+/* Called on TX kick. */
+static void
+pci_vtnet_ping_txq(void *vsc, struct vqueue_info *vq)
+{
+	struct pci_vtnet_softc *sc = vsc;
+
+	/*
+	 * Any ring entries to process?
+	 */
+	if (!vq_has_descs(vq))
+		return;
+
+	/* Signal the tx thread for processing */
+	pthread_mutex_lock(&sc->tx_mtx);
+	vq_kick_disable(vq);
+	if (sc->tx_in_progress == 0)
+		pthread_cond_signal(&sc->tx_cond);
+	pthread_mutex_unlock(&sc->tx_mtx);
+}
+
+/*
+ * Thread which will handle processing of TX desc
+ */
+static void *
+pci_vtnet_tx_thread(void *param)
+{
+	struct pci_vtnet_softc *sc = param;
+	struct vqueue_info *vq;
+	int error;
+
+	vq = &sc->vsc_queues[VTNET_TXQ];
+
+	/*
+	 * Let us wait till the tx queue pointers get initialised &
+	 * first tx signaled
+	 */
+	pthread_mutex_lock(&sc->tx_mtx);
+	error = pthread_cond_wait(&sc->tx_cond, &sc->tx_mtx);
+	assert(error == 0);
+
+	for (;;) {
+		/* note - tx mutex is locked here */
+		while (sc->resetting || !vq_has_descs(vq)) {
+			vq_kick_enable(vq);
+			if (!sc->resetting && vq_has_descs(vq))
+				break;
+
+			sc->tx_in_progress = 0;
+			error = pthread_cond_wait(&sc->tx_cond, &sc->tx_mtx);
+			assert(error == 0);
+		}
+		vq_kick_disable(vq);
+		sc->tx_in_progress = 1;
+		pthread_mutex_unlock(&sc->tx_mtx);
+
+		do {
+			/*
+			 * Run through entries, placing them into
+			 * iovecs and sending when an end-of-packet
+			 * is found
+			 */
+			pci_vtnet_proctx(sc, vq);
+		} while (vq_has_descs(vq));
+
+		/*
+		 * Generate an interrupt if needed.
+		 */
+		vq_endchains(vq, /*used_all_avail=*/1);
+
+		pthread_mutex_lock(&sc->tx_mtx);
+	}
+}
+
+#ifdef notyet
+static void
+pci_vtnet_ping_ctlq(void *vsc, struct vqueue_info *vq)
+{
+
+	DPRINTF(("vtnet: control qnotify!"));
+}
+#endif
+
+static int
+pci_vtnet_init(struct vmctx *ctx, struct mmio_devinst *pi, char *opts)
+{
+	struct pci_vtnet_softc *sc;
+	char tname[MAXCOMLEN + 1];
+	int mac_provided;
+
+	/*
+	 * Allocate data structures for further virtio initializations.
+	 * sc also contains a copy of vtnet_vi_consts, since capabilities
+	 * change depending on the backend.
+	 */
+	sc = calloc(1, sizeof(struct pci_vtnet_softc));
+
+	sc->vsc_consts = vtnet_vi_consts;
+	pthread_mutex_init(&sc->vsc_mtx, NULL);
+
+	sc->vsc_queues[VTNET_RXQ].vq_qsize = VTNET_RINGSZ;
+	sc->vsc_queues[VTNET_RXQ].vq_notify = pci_vtnet_ping_rxq;
+	sc->vsc_queues[VTNET_TXQ].vq_qsize = VTNET_RINGSZ;
+	sc->vsc_queues[VTNET_TXQ].vq_notify = pci_vtnet_ping_txq;
+#ifdef notyet
+	sc->vsc_queues[VTNET_CTLQ].vq_qsize = VTNET_RINGSZ;
+        sc->vsc_queues[VTNET_CTLQ].vq_notify = pci_vtnet_ping_ctlq;
+#endif
+
+	/*
+	 * Attempt to open the backend device and read the MAC address
+	 * if specified.
+	 */
+	mac_provided = 0;
+	if (opts != NULL) {
+		char *devname;
+		char *vtopts;
+		int err;
+
+		devname = vtopts = strdup(opts);
+		(void) strsep(&vtopts, ",");
+
+		if (vtopts != NULL) {
+			err = net_parsemac(vtopts, sc->vsc_config.mac);
+			if (err != 0) {
+				free(devname);
+				free(sc);
+				return (err);
+			}
+			mac_provided = 1;
+		}
+
+		err = netbe_init(&sc->vsc_be, devname, pci_vtnet_rx_callback,
+		          sc);
+		free(devname);
+		if (err) {
+			free(sc);
+			return (err);
+		}
+		sc->vsc_consts.vc_hv_caps |= netbe_get_cap(sc->vsc_be);
+	}
+
+	if (!mac_provided) {
+		net_genmac(pi, sc->vsc_config.mac);
+	}
+
+	/* initialize config space */
+	vi_devemu_init(pi, VIRTIO_TYPE_NET);
+
+	/* Link is up if we managed to open backend device. */
+	sc->vsc_config.status = (opts == NULL || sc->vsc_be);
+
+	vi_softc_linkup(&sc->vsc_vs, &sc->vsc_consts, sc, pi, sc->vsc_queues);
+	sc->vsc_vs.vs_mtx = &sc->vsc_mtx;
+
+	/* use BAR 1 to map MSI-X table and PBA, if we're using MSI-X */
+	if (vi_intr_init(&sc->vsc_vs, 1, fbsdrun_virtio_msix())) {
+		free(sc);
+		return (1);
+	}
+
+	/* use BAR 0 to map config regs in IO space */
+	vi_set_io_res(&sc->vsc_vs, 0);
+
+	sc->resetting = 0;
+
+	sc->rx_merge = 0;
+	pthread_mutex_init(&sc->rx_mtx, NULL);
+
+	/*
+	 * Initialize tx semaphore & spawn TX processing thread.
+	 * As of now, only one thread for TX desc processing is
+	 * spawned.
+	 */
+	sc->tx_in_progress = 0;
+	pthread_mutex_init(&sc->tx_mtx, NULL);
+	pthread_cond_init(&sc->tx_cond, NULL);
+	pthread_create(&sc->tx_tid, NULL, pci_vtnet_tx_thread, (void *)sc);
+	snprintf(tname, sizeof(tname), "vtnet-%d:%d tx", pi->pi_slot,
+	    pi->di_func);
+	pthread_set_name_np(sc->tx_tid, tname);
+
+	return (0);
+}
+
+static int
+pci_vtnet_cfgwrite(void *vsc, int offset, int size, uint32_t value)
+{
+	struct pci_vtnet_softc *sc = vsc;
+	void *ptr;
+
+	if (offset < (int)sizeof(sc->vsc_config.mac)) {
+		assert(offset + size <= (int)sizeof(sc->vsc_config.mac));
+		/*
+		 * The driver is allowed to change the MAC address
+		 */
+		ptr = &sc->vsc_config.mac[offset];
+		memcpy(ptr, &value, size);
+	} else {
+		/* silently ignore other writes */
+		DPRINTF(("vtnet: write to readonly reg %d", offset));
+	}
+
+	return (0);
+}
+
+static int
+pci_vtnet_cfgread(void *vsc, int offset, int size, uint32_t *retval)
+{
+	struct pci_vtnet_softc *sc = vsc;
+	void *ptr;
+
+	ptr = (uint8_t *)&sc->vsc_config + offset;
+	memcpy(retval, ptr, size);
+	return (0);
+}
+
+static void
+pci_vtnet_neg_features(void *vsc, uint64_t negotiated_features)
+{
+	struct pci_vtnet_softc *sc = vsc;
+
+	sc->vsc_features = negotiated_features;
+
+	if (negotiated_features & VIRTIO_NET_F_MRG_RXBUF) {
+		sc->vhdrlen = sizeof(struct virtio_net_rxhdr);
+		sc->rx_merge = 1;
+	} else {
+		/*
+		 * Without mergeable rx buffers, virtio-net header is 2
+		 * bytes shorter than sizeof(struct virtio_net_rxhdr).
+		 */
+		sc->vhdrlen = sizeof(struct virtio_net_rxhdr) - 2;
+		sc->rx_merge = 0;
+	}
+
+	/* Tell the backend to enable some capabilities it has advertised. */
+	netbe_set_cap(sc->vsc_be, negotiated_features, sc->vhdrlen);
+	sc->be_vhdrlen = netbe_get_vnet_hdr_len(sc->vsc_be);
+}
+
+static struct mmio_devemu pci_de_vnet = {
+	.de_emu = 	"virtio-net",
+	.de_init =	pci_vtnet_init,
+	.de_write =	vi_mmio_write,
+	.de_read =	vi_mmio_read
+};
+MMIO_EMUL_SET(pci_de_vnet);
Index: usr.sbin/bhyve/mmio/mmio_virtio_rnd.c
===================================================================
--- /dev/null
+++ usr.sbin/bhyve/mmio/mmio_virtio_rnd.c
@@ -0,0 +1,208 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2014 Nahanni Systems Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer
+ *    in this position and unchanged.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * virtio entropy device emulation.
+ * Randomness is sourced from /dev/random which does not block
+ * once it has been seeded at bootup.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#ifndef WITHOUT_CAPSICUM
+#include <sys/capsicum.h>
+#endif
+#include <sys/linker_set.h>
+#include <sys/uio.h>
+
+#ifndef WITHOUT_CAPSICUM
+#include <capsicum_helpers.h>
+#endif
+#include <err.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <assert.h>
+#include <pthread.h>
+#include <dev/pci/pcireg.h>
+#include <sysexits.h>
+
+#include "bhyverun.h"
+#include "debug.h"
+
+#include "mmio_emul.h"
+#include "mmio_virtio.h"
+
+#define VTRND_RINGSZ	64
+
+
+static int pci_vtrnd_debug;
+#define DPRINTF(params) if (pci_vtrnd_debug) PRINTLN params
+#define WPRINTF(params) PRINTLN params
+
+/*
+ * Per-device softc
+ */
+struct pci_vtrnd_softc {
+	struct virtio_softc vrsc_vs;
+	struct vqueue_info  vrsc_vq;
+	pthread_mutex_t     vrsc_mtx;
+	uint64_t            vrsc_cfg;
+	int                 vrsc_fd;
+};
+
+static void pci_vtrnd_reset(void *);
+static void pci_vtrnd_notify(void *, struct vqueue_info *);
+
+static struct virtio_consts vtrnd_vi_consts = {
+	"vtrnd",		/* our name */
+	1,			/* we support 1 virtqueue */
+	0,			/* config reg size */
+	pci_vtrnd_reset,	/* reset */
+	pci_vtrnd_notify,	/* device-wide qnotify */
+	NULL,			/* read virtio config */
+	NULL,			/* write virtio config */
+	NULL,			/* apply negotiated features */
+	0,			/* our capabilities */
+};
+
+
+static void
+pci_vtrnd_reset(void *vsc)
+{
+	struct pci_vtrnd_softc *sc;
+
+	sc = vsc;
+
+	DPRINTF(("vtrnd: device reset requested !"));
+	vi_reset_dev(&sc->vrsc_vs);
+}
+
+
+static void
+pci_vtrnd_notify(void *vsc, struct vqueue_info *vq)
+{
+	struct iovec iov;
+	struct pci_vtrnd_softc *sc;
+	int len;
+	uint16_t idx;
+
+	sc = vsc;
+
+	if (sc->vrsc_fd < 0) {
+		vq_endchains(vq, 0);
+		return;
+	}
+
+	while (vq_has_descs(vq)) {
+		vq_getchain(vq, &idx, &iov, 1, NULL);
+
+		len = read(sc->vrsc_fd, iov.iov_base, iov.iov_len);
+
+		DPRINTF(("vtrnd: vtrnd_notify(): %d", len));
+
+		/* Catastrophe if unable to read from /dev/random */
+		assert(len > 0);
+
+		/*
+		 * Release this chain and handle more
+		 */
+		vq_relchain(vq, idx, len);
+	}
+	vq_endchains(vq, 1);	/* Generate interrupt if appropriate. */
+}
+
+
+static int
+pci_vtrnd_init(struct vmctx *ctx, struct mmio_devinst *pi, char *opts)
+{
+	struct pci_vtrnd_softc *sc;
+	int fd;
+	int len;
+	uint8_t v;
+#ifndef WITHOUT_CAPSICUM
+	cap_rights_t rights;
+#endif
+
+	/*
+	 * Should always be able to open /dev/random.
+	 */
+	fd = open("/dev/random", O_RDONLY | O_NONBLOCK);
+
+	assert(fd >= 0);
+
+#ifndef WITHOUT_CAPSICUM
+	cap_rights_init(&rights, CAP_READ);
+	if (caph_rights_limit(fd, &rights) == -1)
+		errx(EX_OSERR, "Unable to apply rights for sandbox");
+#endif
+
+	/*
+	 * Check that device is seeded and non-blocking.
+	 */
+	len = read(fd, &v, sizeof(v));
+	if (len <= 0) {
+		WPRINTF(("vtrnd: /dev/random not ready, read(): %d", len));
+		close(fd);
+		return (1);
+	}
+
+	sc = calloc(1, sizeof(struct pci_vtrnd_softc));
+
+	vi_softc_linkup(&sc->vrsc_vs, &vtrnd_vi_consts, sc, pi, &sc->vrsc_vq);
+	sc->vrsc_vs.vs_mtx = &sc->vrsc_mtx;
+
+	sc->vrsc_vq.vq_qsize = VTRND_RINGSZ;
+
+	/* keep /dev/random opened while emulating */
+	sc->vrsc_fd = fd;
+
+	/* initialize config space */
+	vi_devemu_init(pi, VIRTIO_TYPE_ENTROPY);
+
+	if (vi_intr_init(&sc->vrsc_vs, 1, fbsdrun_virtio_msix()))
+		return (1);
+	vi_set_io_res(&sc->vrsc_vs, 0);
+
+	return (0);
+}
+
+
+struct mmio_devemu pci_de_vrnd = {
+	.de_emu =	"virtio-rnd",
+	.de_init =	pci_vtrnd_init,
+	.de_write =	vi_mmio_write,
+	.de_read =	vi_mmio_read
+};
+MMIO_EMUL_SET(pci_de_vrnd);
Index: usr.sbin/bhyve/mmio/mmio_virtio_scsi.c
===================================================================
--- /dev/null
+++ usr.sbin/bhyve/mmio/mmio_virtio_scsi.c
@@ -0,0 +1,741 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2016 Jakub Klama <jceel@FreeBSD.org>.
+ * Copyright (c) 2018 Marcelo Araujo <araujo@FreeBSD.org>.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer
+ *    in this position and unchanged.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/linker_set.h>
+#include <sys/types.h>
+#include <sys/uio.h>
+#include <sys/time.h>
+#include <sys/queue.h>
+#include <sys/sbuf.h>
+
+#include <errno.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <string.h>
+#include <unistd.h>
+#include <assert.h>
+#include <pthread.h>
+#include <pthread_np.h>
+
+#include <cam/scsi/scsi_all.h>
+#include <cam/scsi/scsi_message.h>
+#include <cam/ctl/ctl.h>
+#include <cam/ctl/ctl_io.h>
+#include <cam/ctl/ctl_backend.h>
+#include <cam/ctl/ctl_ioctl.h>
+#include <cam/ctl/ctl_util.h>
+#include <cam/ctl/ctl_scsi_all.h>
+#include <camlib.h>
+
+#include "bhyverun.h"
+#include "debug.h"
+#include "iov.h"
+
+#include "mmio_emul.h"
+#include "mmio_virtio.h"
+
+#define VTSCSI_RINGSZ		64
+#define	VTSCSI_REQUESTQ		1
+#define	VTSCSI_THR_PER_Q	16
+#define	VTSCSI_MAXQ		(VTSCSI_REQUESTQ + 2)
+#define	VTSCSI_MAXSEG		64
+
+#define	VTSCSI_IN_HEADER_LEN(_sc)	\
+	(sizeof(struct pci_vtscsi_req_cmd_rd) + _sc->vss_config.cdb_size)
+
+#define	VTSCSI_OUT_HEADER_LEN(_sc) 	\
+	(sizeof(struct pci_vtscsi_req_cmd_wr) + _sc->vss_config.sense_size)
+
+#define	VIRTIO_SCSI_MAX_CHANNEL	0
+#define	VIRTIO_SCSI_MAX_TARGET	0
+#define	VIRTIO_SCSI_MAX_LUN	16383
+
+#define	VIRTIO_SCSI_F_INOUT	(1 << 0)
+#define	VIRTIO_SCSI_F_HOTPLUG	(1 << 1)
+#define	VIRTIO_SCSI_F_CHANGE	(1 << 2)
+
+static int pci_vtscsi_debug = 0;
+#define	DPRINTF(params) if (pci_vtscsi_debug) PRINTLN params
+#define	WPRINTF(params) PRINTLN params
+
+struct pci_vtscsi_config {
+	uint32_t num_queues;
+	uint32_t seg_max;
+	uint32_t max_sectors;
+	uint32_t cmd_per_lun;
+	uint32_t event_info_size;
+	uint32_t sense_size;
+	uint32_t cdb_size;
+	uint16_t max_channel;
+	uint16_t max_target;
+	uint32_t max_lun;
+} __attribute__((packed));
+
+struct pci_vtscsi_queue {
+	struct pci_vtscsi_softc *         vsq_sc;
+	struct vqueue_info *              vsq_vq;
+	pthread_mutex_t                   vsq_mtx;
+	pthread_mutex_t                   vsq_qmtx;
+	pthread_cond_t                    vsq_cv;
+	STAILQ_HEAD(, pci_vtscsi_request) vsq_requests;
+	LIST_HEAD(, pci_vtscsi_worker)    vsq_workers;
+};
+
+struct pci_vtscsi_worker {
+	struct pci_vtscsi_queue *     vsw_queue;
+	pthread_t                     vsw_thread;
+	bool                          vsw_exiting;
+	LIST_ENTRY(pci_vtscsi_worker) vsw_link;
+};
+
+struct pci_vtscsi_request {
+	struct pci_vtscsi_queue * vsr_queue;
+	struct iovec              vsr_iov_in[VTSCSI_MAXSEG];
+	int                       vsr_niov_in;
+	struct iovec              vsr_iov_out[VTSCSI_MAXSEG];
+	int                       vsr_niov_out;
+	uint32_t                  vsr_idx;
+	STAILQ_ENTRY(pci_vtscsi_request) vsr_link;
+};
+
+/*
+ * Per-device softc
+ */
+struct pci_vtscsi_softc {
+	struct virtio_softc      vss_vs;
+	struct vqueue_info       vss_vq[VTSCSI_MAXQ];
+	struct pci_vtscsi_queue  vss_queues[VTSCSI_REQUESTQ];
+	pthread_mutex_t          vss_mtx;
+	int                      vss_iid;
+	int                      vss_ctl_fd;
+	uint32_t                 vss_features;
+	struct pci_vtscsi_config vss_config;
+};
+
+#define	VIRTIO_SCSI_T_TMF			0
+#define	VIRTIO_SCSI_T_TMF_ABORT_TASK		0
+#define	VIRTIO_SCSI_T_TMF_ABORT_TASK_SET	1
+#define	VIRTIO_SCSI_T_TMF_CLEAR_ACA		2
+#define	VIRTIO_SCSI_T_TMF_CLEAR_TASK_SET	3
+#define	VIRTIO_SCSI_T_TMF_I_T_NEXUS_RESET	4
+#define	VIRTIO_SCSI_T_TMF_LOGICAL_UNIT_RESET	5
+#define	VIRTIO_SCSI_T_TMF_QUERY_TASK		6
+#define	VIRTIO_SCSI_T_TMF_QUERY_TASK_SET 	7
+
+/* command-specific response values */
+#define	VIRTIO_SCSI_S_FUNCTION_COMPLETE		0
+#define	VIRTIO_SCSI_S_FUNCTION_SUCCEEDED	10
+#define	VIRTIO_SCSI_S_FUNCTION_REJECTED		11
+
+struct pci_vtscsi_ctrl_tmf {
+	uint32_t type;
+	uint32_t subtype;
+	uint8_t lun[8];
+	uint64_t id;
+	uint8_t response;
+} __attribute__((packed));
+
+#define	VIRTIO_SCSI_T_AN_QUERY			1
+#define	VIRTIO_SCSI_EVT_ASYNC_OPERATIONAL_CHANGE 2
+#define	VIRTIO_SCSI_EVT_ASYNC_POWER_MGMT	4
+#define	VIRTIO_SCSI_EVT_ASYNC_EXTERNAL_REQUEST	8
+#define	VIRTIO_SCSI_EVT_ASYNC_MEDIA_CHANGE	16
+#define	VIRTIO_SCSI_EVT_ASYNC_MULTI_HOST	32
+#define	VIRTIO_SCSI_EVT_ASYNC_DEVICE_BUSY	64
+
+struct pci_vtscsi_ctrl_an {
+	uint32_t type;
+	uint8_t lun[8];
+	uint32_t event_requested;
+	uint32_t event_actual;
+	uint8_t response;
+} __attribute__((packed));
+
+/* command-specific response values */
+#define	VIRTIO_SCSI_S_OK 			0
+#define	VIRTIO_SCSI_S_OVERRUN			1
+#define	VIRTIO_SCSI_S_ABORTED			2
+#define	VIRTIO_SCSI_S_BAD_TARGET		3
+#define	VIRTIO_SCSI_S_RESET			4
+#define	VIRTIO_SCSI_S_BUSY			5
+#define	VIRTIO_SCSI_S_TRANSPORT_FAILURE		6
+#define	VIRTIO_SCSI_S_TARGET_FAILURE		7
+#define	VIRTIO_SCSI_S_NEXUS_FAILURE		8
+#define	VIRTIO_SCSI_S_FAILURE			9
+#define	VIRTIO_SCSI_S_INCORRECT_LUN		12
+
+/* task_attr */
+#define	VIRTIO_SCSI_S_SIMPLE			0
+#define	VIRTIO_SCSI_S_ORDERED			1
+#define	VIRTIO_SCSI_S_HEAD			2
+#define	VIRTIO_SCSI_S_ACA			3
+
+struct pci_vtscsi_event {
+	uint32_t event;
+	uint8_t lun[8];
+	uint32_t reason;
+} __attribute__((packed));
+
+struct pci_vtscsi_req_cmd_rd {
+	uint8_t lun[8];
+	uint64_t id;
+	uint8_t task_attr;
+	uint8_t prio;
+	uint8_t crn;
+	uint8_t cdb[];
+} __attribute__((packed));
+
+struct pci_vtscsi_req_cmd_wr {
+	uint32_t sense_len;
+	uint32_t residual;
+	uint16_t status_qualifier;
+	uint8_t status;
+	uint8_t response;
+	uint8_t sense[];
+} __attribute__((packed));
+
+static void *pci_vtscsi_proc(void *);
+static void pci_vtscsi_reset(void *);
+static void pci_vtscsi_neg_features(void *, uint64_t);
+static int pci_vtscsi_cfgread(void *, int, int, uint32_t *);
+static int pci_vtscsi_cfgwrite(void *, int, int, uint32_t);
+static inline int pci_vtscsi_get_lun(uint8_t *);
+static int pci_vtscsi_control_handle(struct pci_vtscsi_softc *, void *, size_t);
+static int pci_vtscsi_tmf_handle(struct pci_vtscsi_softc *,
+    struct pci_vtscsi_ctrl_tmf *);
+static int pci_vtscsi_an_handle(struct pci_vtscsi_softc *,
+    struct pci_vtscsi_ctrl_an *);
+static int pci_vtscsi_request_handle(struct pci_vtscsi_queue *, struct iovec *,
+    int, struct iovec *, int);
+static void pci_vtscsi_controlq_notify(void *, struct vqueue_info *);
+static void pci_vtscsi_eventq_notify(void *, struct vqueue_info *);
+static void pci_vtscsi_requestq_notify(void *, struct vqueue_info *);
+static int  pci_vtscsi_init_queue(struct pci_vtscsi_softc *,
+    struct pci_vtscsi_queue *, int);
+static int pci_vtscsi_init(struct vmctx *, struct mmio_devinst *, char *);
+
+static struct virtio_consts vtscsi_vi_consts = {
+	"vtscsi",				/* our name */
+	VTSCSI_MAXQ,				/* we support 2+n virtqueues */
+	sizeof(struct pci_vtscsi_config),	/* config reg size */
+	pci_vtscsi_reset,			/* reset */
+	NULL,					/* device-wide qnotify */
+	pci_vtscsi_cfgread,			/* read virtio config */
+	pci_vtscsi_cfgwrite,			/* write virtio config */
+	pci_vtscsi_neg_features,		/* apply negotiated features */
+	0,					/* our capabilities */
+};
+
+static void *
+pci_vtscsi_proc(void *arg)
+{
+	struct pci_vtscsi_worker *worker = (struct pci_vtscsi_worker *)arg;
+	struct pci_vtscsi_queue *q = worker->vsw_queue;
+	struct pci_vtscsi_request *req;
+	int iolen;
+
+	for (;;) {
+		pthread_mutex_lock(&q->vsq_mtx);
+
+		while (STAILQ_EMPTY(&q->vsq_requests)
+		    && !worker->vsw_exiting)
+			pthread_cond_wait(&q->vsq_cv, &q->vsq_mtx);
+
+		if (worker->vsw_exiting)
+			break;
+
+		req = STAILQ_FIRST(&q->vsq_requests);
+		STAILQ_REMOVE_HEAD(&q->vsq_requests, vsr_link);
+
+		pthread_mutex_unlock(&q->vsq_mtx);
+		iolen = pci_vtscsi_request_handle(q, req->vsr_iov_in,
+		    req->vsr_niov_in, req->vsr_iov_out, req->vsr_niov_out);
+
+		pthread_mutex_lock(&q->vsq_qmtx);
+		vq_relchain(q->vsq_vq, req->vsr_idx, iolen);
+		vq_endchains(q->vsq_vq, 0);
+		pthread_mutex_unlock(&q->vsq_qmtx);
+
+		DPRINTF(("virtio-scsi: request <idx=%d> completed",
+		    req->vsr_idx));
+		free(req);
+	}
+
+	pthread_mutex_unlock(&q->vsq_mtx);
+	return (NULL);
+}
+
+static void
+pci_vtscsi_reset(void *vsc)
+{
+	struct pci_vtscsi_softc *sc;
+
+	sc = vsc;
+
+	DPRINTF(("vtscsi: device reset requested"));
+	vi_reset_dev(&sc->vss_vs);
+
+	/* initialize config structure */
+	sc->vss_config = (struct pci_vtscsi_config){
+		.num_queues = VTSCSI_REQUESTQ,
+		/* Leave room for the request and the response. */
+		.seg_max = VTSCSI_MAXSEG - 2,
+		.max_sectors = 2,
+		.cmd_per_lun = 1,
+		.event_info_size = sizeof(struct pci_vtscsi_event),
+		.sense_size = 96,
+		.cdb_size = 32,
+		.max_channel = VIRTIO_SCSI_MAX_CHANNEL,
+		.max_target = VIRTIO_SCSI_MAX_TARGET,
+		.max_lun = VIRTIO_SCSI_MAX_LUN
+	};
+}
+
+static void
+pci_vtscsi_neg_features(void *vsc, uint64_t negotiated_features)
+{
+	struct pci_vtscsi_softc *sc = vsc;
+
+	sc->vss_features = negotiated_features;
+}
+
+static int
+pci_vtscsi_cfgread(void *vsc, int offset, int size, uint32_t *retval)
+{
+	struct pci_vtscsi_softc *sc = vsc;
+	void *ptr;
+
+	ptr = (uint8_t *)&sc->vss_config + offset;
+	memcpy(retval, ptr, size);
+	return (0);
+}
+
+static int
+pci_vtscsi_cfgwrite(void *vsc, int offset, int size, uint32_t val)
+{
+
+	return (0);
+}
+
+static inline int
+pci_vtscsi_get_lun(uint8_t *lun)
+{
+
+	return (((lun[2] << 8) | lun[3]) & 0x3fff);
+}
+
+static int
+pci_vtscsi_control_handle(struct pci_vtscsi_softc *sc, void *buf,
+    size_t bufsize)
+{
+	struct pci_vtscsi_ctrl_tmf *tmf;
+	struct pci_vtscsi_ctrl_an *an;
+	uint32_t type;
+
+	type = *(uint32_t *)buf;
+
+	if (type == VIRTIO_SCSI_T_TMF) {
+		tmf = (struct pci_vtscsi_ctrl_tmf *)buf;
+		return (pci_vtscsi_tmf_handle(sc, tmf));
+	}
+
+	if (type == VIRTIO_SCSI_T_AN_QUERY) {
+		an = (struct pci_vtscsi_ctrl_an *)buf;
+		return (pci_vtscsi_an_handle(sc, an));
+	}
+
+	return (0);
+}
+
+static int
+pci_vtscsi_tmf_handle(struct pci_vtscsi_softc *sc,
+    struct pci_vtscsi_ctrl_tmf *tmf)
+{
+	union ctl_io *io;
+	int err;
+
+	io = ctl_scsi_alloc_io(sc->vss_iid);
+	ctl_scsi_zero_io(io);
+
+	io->io_hdr.io_type = CTL_IO_TASK;
+	io->io_hdr.nexus.initid = sc->vss_iid;
+	io->io_hdr.nexus.targ_lun = pci_vtscsi_get_lun(tmf->lun);
+	io->taskio.tag_type = CTL_TAG_SIMPLE;
+	io->taskio.tag_num = (uint32_t)tmf->id;
+
+	switch (tmf->subtype) {
+	case VIRTIO_SCSI_T_TMF_ABORT_TASK:
+		io->taskio.task_action = CTL_TASK_ABORT_TASK;
+		break;
+
+	case VIRTIO_SCSI_T_TMF_ABORT_TASK_SET:
+		io->taskio.task_action = CTL_TASK_ABORT_TASK_SET;
+		break;
+
+	case VIRTIO_SCSI_T_TMF_CLEAR_ACA:
+		io->taskio.task_action = CTL_TASK_CLEAR_ACA;
+		break;
+
+	case VIRTIO_SCSI_T_TMF_CLEAR_TASK_SET:
+		io->taskio.task_action = CTL_TASK_CLEAR_TASK_SET;
+		break;
+
+	case VIRTIO_SCSI_T_TMF_I_T_NEXUS_RESET:
+		io->taskio.task_action = CTL_TASK_I_T_NEXUS_RESET;
+		break;
+
+	case VIRTIO_SCSI_T_TMF_LOGICAL_UNIT_RESET:
+		io->taskio.task_action = CTL_TASK_LUN_RESET;
+		break;
+
+	case VIRTIO_SCSI_T_TMF_QUERY_TASK:
+		io->taskio.task_action = CTL_TASK_QUERY_TASK;
+		break;
+
+	case VIRTIO_SCSI_T_TMF_QUERY_TASK_SET:
+		io->taskio.task_action = CTL_TASK_QUERY_TASK_SET;
+		break;
+	}
+
+	if (pci_vtscsi_debug) {
+		struct sbuf *sb = sbuf_new_auto();
+		ctl_io_sbuf(io, sb);
+		sbuf_finish(sb);
+		DPRINTF(("pci_virtio_scsi: %s", sbuf_data(sb)));
+		sbuf_delete(sb);
+	}
+
+	err = ioctl(sc->vss_ctl_fd, CTL_IO, io);
+	if (err != 0)
+		WPRINTF(("CTL_IO: err=%d (%s)", errno, strerror(errno)));
+
+	tmf->response = io->taskio.task_status;
+	ctl_scsi_free_io(io);
+	return (1);
+}
+
+static int
+pci_vtscsi_an_handle(struct pci_vtscsi_softc *sc,
+    struct pci_vtscsi_ctrl_an *an)
+{
+
+	return (0);
+}
+
+static int
+pci_vtscsi_request_handle(struct pci_vtscsi_queue *q, struct iovec *iov_in,
+    int niov_in, struct iovec *iov_out, int niov_out)
+{
+	struct pci_vtscsi_softc *sc = q->vsq_sc;
+	struct pci_vtscsi_req_cmd_rd *cmd_rd = NULL;
+	struct pci_vtscsi_req_cmd_wr *cmd_wr;
+	struct iovec data_iov_in[VTSCSI_MAXSEG], data_iov_out[VTSCSI_MAXSEG];
+	union ctl_io *io;
+	int data_niov_in, data_niov_out;
+	void *ext_data_ptr = NULL;
+	uint32_t ext_data_len = 0, ext_sg_entries = 0;
+	int err, nxferred;
+
+	seek_iov(iov_in, niov_in, data_iov_in, &data_niov_in,
+	    VTSCSI_IN_HEADER_LEN(sc));
+	seek_iov(iov_out, niov_out, data_iov_out, &data_niov_out,
+	    VTSCSI_OUT_HEADER_LEN(sc));
+
+	truncate_iov(iov_in, &niov_in, VTSCSI_IN_HEADER_LEN(sc));
+	truncate_iov(iov_out, &niov_out, VTSCSI_OUT_HEADER_LEN(sc));
+	iov_to_buf(iov_in, niov_in, (void **)&cmd_rd);
+
+	cmd_wr = malloc(VTSCSI_OUT_HEADER_LEN(sc));
+	io = ctl_scsi_alloc_io(sc->vss_iid);
+	ctl_scsi_zero_io(io);
+
+	io->io_hdr.nexus.initid = sc->vss_iid;
+	io->io_hdr.nexus.targ_lun = pci_vtscsi_get_lun(cmd_rd->lun);
+
+	io->io_hdr.io_type = CTL_IO_SCSI;
+
+	if (data_niov_in > 0) {
+		ext_data_ptr = (void *)data_iov_in;
+		ext_sg_entries = data_niov_in;
+		ext_data_len = count_iov(data_iov_in, data_niov_in);
+		io->io_hdr.flags |= CTL_FLAG_DATA_OUT;
+	} else if (data_niov_out > 0) {
+		ext_data_ptr = (void *)data_iov_out;
+		ext_sg_entries = data_niov_out;
+		ext_data_len = count_iov(data_iov_out, data_niov_out);
+		io->io_hdr.flags |= CTL_FLAG_DATA_IN;
+	}
+
+	io->scsiio.sense_len = sc->vss_config.sense_size;
+	io->scsiio.tag_num = (uint32_t)cmd_rd->id;
+	switch (cmd_rd->task_attr) {
+	case VIRTIO_SCSI_S_ORDERED:
+		io->scsiio.tag_type = CTL_TAG_ORDERED;
+		break;
+	case VIRTIO_SCSI_S_HEAD:
+		io->scsiio.tag_type = CTL_TAG_HEAD_OF_QUEUE;
+		break;
+	case VIRTIO_SCSI_S_ACA:
+		io->scsiio.tag_type = CTL_TAG_ACA;
+		break;
+	case VIRTIO_SCSI_S_SIMPLE:
+	default:
+		io->scsiio.tag_type = CTL_TAG_SIMPLE;
+		break;
+	}
+	io->scsiio.ext_sg_entries = ext_sg_entries;
+	io->scsiio.ext_data_ptr = ext_data_ptr;
+	io->scsiio.ext_data_len = ext_data_len;
+	io->scsiio.ext_data_filled = 0;
+	io->scsiio.cdb_len = sc->vss_config.cdb_size;
+	memcpy(io->scsiio.cdb, cmd_rd->cdb, sc->vss_config.cdb_size);
+
+	if (pci_vtscsi_debug) {
+		struct sbuf *sb = sbuf_new_auto();
+		ctl_io_sbuf(io, sb);
+		sbuf_finish(sb);
+		DPRINTF(("pci_virtio_scsi: %s", sbuf_data(sb)));
+		sbuf_delete(sb);
+	}
+
+	err = ioctl(sc->vss_ctl_fd, CTL_IO, io);
+	if (err != 0) {
+		WPRINTF(("CTL_IO: err=%d (%s)", errno, strerror(errno)));
+		cmd_wr->response = VIRTIO_SCSI_S_FAILURE;
+	} else {
+		cmd_wr->sense_len = MIN(io->scsiio.sense_len,
+		    sc->vss_config.sense_size);
+		cmd_wr->residual = io->scsiio.residual;
+		cmd_wr->status = io->scsiio.scsi_status;
+		cmd_wr->response = VIRTIO_SCSI_S_OK;
+		memcpy(&cmd_wr->sense, &io->scsiio.sense_data,
+		    cmd_wr->sense_len);
+	}
+
+	buf_to_iov(cmd_wr, VTSCSI_OUT_HEADER_LEN(sc), iov_out, niov_out, 0);
+	nxferred = VTSCSI_OUT_HEADER_LEN(sc) + io->scsiio.ext_data_filled;
+	free(cmd_rd);
+	free(cmd_wr);
+	ctl_scsi_free_io(io);
+	return (nxferred);
+}
+
+static void
+pci_vtscsi_controlq_notify(void *vsc, struct vqueue_info *vq)
+{
+	struct pci_vtscsi_softc *sc;
+	struct iovec iov[VTSCSI_MAXSEG];
+	uint16_t idx, n;
+	void *buf = NULL;
+	size_t bufsize;
+	int iolen;
+
+	sc = vsc;
+
+	while (vq_has_descs(vq)) {
+		n = vq_getchain(vq, &idx, iov, VTSCSI_MAXSEG, NULL);
+		bufsize = iov_to_buf(iov, n, &buf);
+		iolen = pci_vtscsi_control_handle(sc, buf, bufsize);
+		buf_to_iov(buf + bufsize - iolen, iolen, iov, n,
+		    bufsize - iolen);
+
+		/*
+		 * Release this chain and handle more
+		 */
+		vq_relchain(vq, idx, iolen);
+	}
+	vq_endchains(vq, 1);	/* Generate interrupt if appropriate. */
+	free(buf);
+}
+
+static void
+pci_vtscsi_eventq_notify(void *vsc, struct vqueue_info *vq)
+{
+
+	vq_kick_disable(vq);
+}
+
+static void
+pci_vtscsi_requestq_notify(void *vsc, struct vqueue_info *vq)
+{
+	struct pci_vtscsi_softc *sc;
+	struct pci_vtscsi_queue *q;
+	struct pci_vtscsi_request *req;
+	struct iovec iov[VTSCSI_MAXSEG];
+	uint16_t flags[VTSCSI_MAXSEG];
+	uint16_t idx, n, i;
+	int readable;
+
+	sc = vsc;
+	q = &sc->vss_queues[vq->vq_num - 2];
+
+	while (vq_has_descs(vq)) {
+		readable = 0;
+		n = vq_getchain(vq, &idx, iov, VTSCSI_MAXSEG, flags);
+
+		/* Count readable descriptors */
+		for (i = 0; i < n; i++) {
+			if (flags[i] & VRING_DESC_F_WRITE)
+				break;
+
+			readable++;
+		}
+
+		req = calloc(1, sizeof(struct pci_vtscsi_request));
+		req->vsr_idx = idx;
+		req->vsr_queue = q;
+		req->vsr_niov_in = readable;
+		req->vsr_niov_out = n - readable;
+		memcpy(req->vsr_iov_in, iov,
+		    req->vsr_niov_in * sizeof(struct iovec));
+		memcpy(req->vsr_iov_out, iov + readable,
+		    req->vsr_niov_out * sizeof(struct iovec));
+
+		pthread_mutex_lock(&q->vsq_mtx);
+		STAILQ_INSERT_TAIL(&q->vsq_requests, req, vsr_link);
+		pthread_cond_signal(&q->vsq_cv);
+		pthread_mutex_unlock(&q->vsq_mtx);
+
+		DPRINTF(("virtio-scsi: request <idx=%d> enqueued", idx));
+	}
+}
+
+static int
+pci_vtscsi_init_queue(struct pci_vtscsi_softc *sc,
+    struct pci_vtscsi_queue *queue, int num)
+{
+	struct pci_vtscsi_worker *worker;
+	char tname[MAXCOMLEN + 1];
+	int i;
+
+	queue->vsq_sc = sc;
+	queue->vsq_vq = &sc->vss_vq[num + 2];
+
+	pthread_mutex_init(&queue->vsq_mtx, NULL);
+	pthread_mutex_init(&queue->vsq_qmtx, NULL);
+	pthread_cond_init(&queue->vsq_cv, NULL);
+	STAILQ_INIT(&queue->vsq_requests);
+	LIST_INIT(&queue->vsq_workers);
+
+	for (i = 0; i < VTSCSI_THR_PER_Q; i++) {
+		worker = calloc(1, sizeof(struct pci_vtscsi_worker));
+		worker->vsw_queue = queue;
+
+		pthread_create(&worker->vsw_thread, NULL, &pci_vtscsi_proc,
+		    (void *)worker);
+
+		snprintf(tname, sizeof(tname), "vtscsi:%d-%d", num, i);
+		pthread_set_name_np(worker->vsw_thread, tname);
+		LIST_INSERT_HEAD(&queue->vsq_workers, worker, vsw_link);
+	}
+
+	return (0);
+}
+
+static int
+pci_vtscsi_init(struct vmctx *ctx, struct mmio_devinst *pi, char *opts)
+{
+	struct pci_vtscsi_softc *sc;
+	char *opt, *optname;
+	const char *devname;
+	int i, optidx = 0;
+
+	sc = calloc(1, sizeof(struct pci_vtscsi_softc));
+	devname = "/dev/cam/ctl";
+	while ((opt = strsep(&opts, ",")) != NULL) {
+		optname = strsep(&opt, "=");
+		if (opt == NULL && optidx == 0) {
+			if (optname[0] != 0)
+				devname = optname;
+		} else if (strcmp(optname, "dev") == 0 && opt != NULL) {
+			devname = opt;
+		} else if (strcmp(optname, "iid") == 0 && opt != NULL) {
+			sc->vss_iid = strtoul(opt, NULL, 10);
+		} else {
+			EPRINTLN("Invalid option %s", optname);
+			free(sc);
+			return (1);
+		}
+		optidx++;
+	}
+
+	sc->vss_ctl_fd = open(devname, O_RDWR);
+	if (sc->vss_ctl_fd < 0) {
+		WPRINTF(("cannot open %s: %s", devname, strerror(errno)));
+		free(sc);
+		return (1);
+	}
+
+	vi_softc_linkup(&sc->vss_vs, &vtscsi_vi_consts, sc, pi, sc->vss_vq);
+	sc->vss_vs.vs_mtx = &sc->vss_mtx;
+
+	/* controlq */
+	sc->vss_vq[0].vq_qsize = VTSCSI_RINGSZ;
+	sc->vss_vq[0].vq_notify = pci_vtscsi_controlq_notify;
+
+	/* eventq */
+	sc->vss_vq[1].vq_qsize = VTSCSI_RINGSZ;
+	sc->vss_vq[1].vq_notify = pci_vtscsi_eventq_notify;
+
+	/* request queues */
+	for (i = 2; i < VTSCSI_MAXQ; i++) {
+		sc->vss_vq[i].vq_qsize = VTSCSI_RINGSZ;
+		sc->vss_vq[i].vq_notify = pci_vtscsi_requestq_notify;
+		pci_vtscsi_init_queue(sc, &sc->vss_queues[i - 2], i - 2);
+	}
+
+	/* initialize config space */
+	mmio_set_cfgreg16(pi, PCIR_DEVICE, VIRTIO_DEV_SCSI);
+	mmio_set_cfgreg16(pi, PCIR_VENDOR, VIRTIO_VENDOR);
+	mmio_set_cfgreg8(pi, PCIR_CLASS, PCIC_STORAGE);
+	mmio_set_cfgreg16(pi, PCIR_SUBDEV_0, VIRTIO_TYPE_SCSI);
+	mmio_set_cfgreg16(pi, PCIR_SUBVEND_0, VIRTIO_VENDOR);
+
+	if (vi_intr_init(&sc->vss_vs, 1, fbsdrun_virtio_msix()))
+		return (1);
+	vi_set_io_res(&sc->vss_vs, 0);
+
+	return (0);
+}
+
+
+struct mmio_devemu pci_de_vscsi = {
+	.de_emu =	"virtio-scsi",
+	.de_init =	pci_vtscsi_init,
+	.de_write =	vi_mmio_write,
+	.de_read =	vi_mmio_read
+};
+MMIO_EMUL_SET(pci_de_vscsi);
Index: usr.sbin/bhyve/mmio/net_backends.h
===================================================================
--- /dev/null
+++ usr.sbin/bhyve/mmio/net_backends.h
@@ -0,0 +1,95 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2019 Vincenzo Maffione <vmaffione@FreeBSD.org>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
+ * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
+ * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
+ * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef __NET_BACKENDS_H__
+#define __NET_BACKENDS_H__
+
+#include <stdint.h>
+
+/* Opaque type representing a network backend. */
+typedef struct net_backend net_backend_t;
+
+/* Interface between network frontends and the network backends. */
+typedef void (*net_be_rxeof_t)(int, enum ev_type, void *param);
+int	netbe_init(net_backend_t **be, const char *opts, net_be_rxeof_t cb,
+            void *param);
+void	netbe_cleanup(net_backend_t *be);
+uint64_t netbe_get_cap(net_backend_t *be);
+int	 netbe_set_cap(net_backend_t *be, uint64_t cap,
+             unsigned vnet_hdr_len);
+size_t	netbe_get_vnet_hdr_len(net_backend_t *be);
+ssize_t	netbe_send(net_backend_t *be, const struct iovec *iov, int iovcnt);
+ssize_t	netbe_peek_recvlen(net_backend_t *be);
+ssize_t	netbe_recv(net_backend_t *be, const struct iovec *iov, int iovcnt);
+ssize_t	netbe_rx_discard(net_backend_t *be);
+void	netbe_rx_disable(net_backend_t *be);
+void	netbe_rx_enable(net_backend_t *be);
+
+
+/*
+ * Network device capabilities taken from the VirtIO standard.
+ * Despite the name, these capabilities can be used by different frontents
+ * (virtio-net, ptnet) and supported by different backends (netmap, tap, ...).
+ */
+#define	VIRTIO_NET_F_CSUM	(1 <<  0) /* host handles partial cksum */
+#define	VIRTIO_NET_F_GUEST_CSUM	(1 <<  1) /* guest handles partial cksum */
+#define	VIRTIO_NET_F_MTU	(1 <<  3) /* initial MTU advice */
+#define	VIRTIO_NET_F_MAC	(1 <<  5) /* host supplies MAC */
+#define	VIRTIO_NET_F_GSO_DEPREC	(1 <<  6) /* deprecated: host handles GSO */
+#define	VIRTIO_NET_F_GUEST_TSO4	(1 <<  7) /* guest can rcv TSOv4 */
+#define	VIRTIO_NET_F_GUEST_TSO6	(1 <<  8) /* guest can rcv TSOv6 */
+#define	VIRTIO_NET_F_GUEST_ECN	(1 <<  9) /* guest can rcv TSO with ECN */
+#define	VIRTIO_NET_F_GUEST_UFO	(1 << 10) /* guest can rcv UFO */
+#define	VIRTIO_NET_F_HOST_TSO4	(1 << 11) /* host can rcv TSOv4 */
+#define	VIRTIO_NET_F_HOST_TSO6	(1 << 12) /* host can rcv TSOv6 */
+#define	VIRTIO_NET_F_HOST_ECN	(1 << 13) /* host can rcv TSO with ECN */
+#define	VIRTIO_NET_F_HOST_UFO	(1 << 14) /* host can rcv UFO */
+#define	VIRTIO_NET_F_MRG_RXBUF	(1 << 15) /* host can merge RX buffers */
+#define	VIRTIO_NET_F_STATUS	(1 << 16) /* config status field available */
+#define	VIRTIO_NET_F_CTRL_VQ	(1 << 17) /* control channel available */
+#define	VIRTIO_NET_F_CTRL_RX	(1 << 18) /* control channel RX mode support */
+#define	VIRTIO_NET_F_CTRL_VLAN	(1 << 19) /* control channel VLAN filtering */
+#define	VIRTIO_NET_F_GUEST_ANNOUNCE \
+				(1 << 21) /* guest can send gratuitous pkts */
+#define	VIRTIO_NET_F_MQ		(1 << 22) /* host supports multiple VQ pairs */
+
+/*
+ * Fixed network header size
+ */
+struct virtio_net_rxhdr {
+	uint8_t		vrh_flags;
+	uint8_t		vrh_gso_type;
+	uint16_t	vrh_hdr_len;
+	uint16_t	vrh_gso_size;
+	uint16_t	vrh_csum_start;
+	uint16_t	vrh_csum_offset;
+	uint16_t	vrh_bufs;
+} __packed;
+
+#endif /* __NET_BACKENDS_H__ */
Index: usr.sbin/bhyve/mmio/net_backends.c
===================================================================
--- /dev/null
+++ usr.sbin/bhyve/mmio/net_backends.c
@@ -0,0 +1,1108 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2019 Vincenzo Maffione <vmaffione@FreeBSD.org>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
+ * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
+ * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
+ * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+/*
+ * This file implements multiple network backends (tap, netmap, ...),
+ * to be used by network frontends such as virtio-net and e1000.
+ * The API to access the backend (e.g. send/receive packets, negotiate
+ * features) is exported by net_backends.h.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/types.h>		/* u_short etc */
+#ifndef WITHOUT_CAPSICUM
+#include <sys/capsicum.h>
+#endif
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <sys/uio.h>
+
+#include <net/if.h>
+#include <net/netmap.h>
+#include <net/netmap_virt.h>
+#define NETMAP_WITH_LIBS
+#include <net/netmap_user.h>
+
+#ifndef WITHOUT_CAPSICUM
+#include <capsicum_helpers.h>
+#endif
+#include <err.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <string.h>
+#include <unistd.h>
+#include <sysexits.h>
+#include <assert.h>
+#include <pthread.h>
+#include <pthread_np.h>
+#include <poll.h>
+#include <assert.h>
+
+#ifdef NETGRAPH
+#include <sys/param.h>
+#include <sys/sysctl.h>
+#include <netgraph.h>
+#endif
+
+#include "debug.h"
+#include "iov.h"
+#include "mevent.h"
+#include "net_backends.h"
+
+#include <sys/linker_set.h>
+
+/*
+ * Each network backend registers a set of function pointers that are
+ * used to implement the net backends API.
+ * This might need to be exposed if we implement backends in separate files.
+ */
+struct net_backend {
+	const char *prefix;	/* prefix matching this backend */
+
+	/*
+	 * Routines used to initialize and cleanup the resources needed
+	 * by a backend. The cleanup function is used internally,
+	 * and should not be called by the frontend.
+	 */
+	int (*init)(struct net_backend *be, const char *devname,
+	    const char *opts, net_be_rxeof_t cb, void *param);
+	void (*cleanup)(struct net_backend *be);
+
+	/*
+	 * Called to serve a guest transmit request. The scatter-gather
+	 * vector provided by the caller has 'iovcnt' elements and contains
+	 * the packet to send.
+	 */
+	ssize_t (*send)(struct net_backend *be, const struct iovec *iov,
+	    int iovcnt);
+
+	/*
+	 * Get the length of the next packet that can be received from
+	 * the backend. If no packets are currently available, this
+	 * function returns 0.
+	 */
+	ssize_t (*peek_recvlen)(struct net_backend *be);
+
+	/*
+	 * Called to receive a packet from the backend. When the function
+	 * returns a positive value 'len', the scatter-gather vector
+	 * provided by the caller contains a packet with such length.
+	 * The function returns 0 if the backend doesn't have a new packet to
+	 * receive.
+	 */
+	ssize_t (*recv)(struct net_backend *be, const struct iovec *iov,
+	    int iovcnt);
+
+	/*
+	 * Ask the backend to enable or disable receive operation in the
+	 * backend. On return from a disable operation, it is guaranteed
+	 * that the receive callback won't be called until receive is
+	 * enabled again. Note however that it is up to the caller to make
+	 * sure that netbe_recv() is not currently being executed by another
+	 * thread.
+	 */
+	void (*recv_enable)(struct net_backend *be);
+	void (*recv_disable)(struct net_backend *be);
+
+	/*
+	 * Ask the backend for the virtio-net features it is able to
+	 * support. Possible features are TSO, UFO and checksum offloading
+	 * in both rx and tx direction and for both IPv4 and IPv6.
+	 */
+	uint64_t (*get_cap)(struct net_backend *be);
+
+	/*
+	 * Tell the backend to enable/disable the specified virtio-net
+	 * features (capabilities).
+	 */
+	int (*set_cap)(struct net_backend *be, uint64_t features,
+	    unsigned int vnet_hdr_len);
+
+	struct pci_vtnet_softc *sc;
+	int fd;
+
+	/*
+	 * Length of the virtio-net header used by the backend and the
+	 * frontend, respectively. A zero value means that the header
+	 * is not used.
+	 */
+	unsigned int be_vnet_hdr_len;
+	unsigned int fe_vnet_hdr_len;
+
+	/* Size of backend-specific private data. */
+	size_t priv_size;
+
+	/* Room for backend-specific data. */
+	char opaque[0];
+};
+
+SET_DECLARE(net_backend_set, struct net_backend);
+
+#define VNET_HDR_LEN	sizeof(struct virtio_net_rxhdr)
+
+#define WPRINTF(params) PRINTLN params
+
+/*
+ * The tap backend
+ */
+
+struct tap_priv {
+	struct mevent *mevp;
+	/*
+	 * A bounce buffer that allows us to implement the peek_recvlen
+	 * callback. In the future we may get the same information from
+	 * the kevent data.
+	 */
+	char bbuf[1 << 16];
+	ssize_t bbuflen;
+};
+
+static void
+tap_cleanup(struct net_backend *be)
+{
+	struct tap_priv *priv = (struct tap_priv *)be->opaque;
+
+	if (priv->mevp) {
+		mevent_delete(priv->mevp);
+	}
+	if (be->fd != -1) {
+		close(be->fd);
+		be->fd = -1;
+	}
+}
+
+static int
+tap_init(struct net_backend *be, const char *devname,
+	 const char *opts, net_be_rxeof_t cb, void *param)
+{
+	struct tap_priv *priv = (struct tap_priv *)be->opaque;
+	char tbuf[80];
+	int opt = 1;
+#ifndef WITHOUT_CAPSICUM
+	cap_rights_t rights;
+#endif
+
+	if (cb == NULL) {
+		WPRINTF(("TAP backend requires non-NULL callback"));
+		return (-1);
+	}
+
+	strcpy(tbuf, "/dev/");
+	strlcat(tbuf, devname, sizeof(tbuf));
+
+	be->fd = open(tbuf, O_RDWR);
+	if (be->fd == -1) {
+		WPRINTF(("open of tap device %s failed", tbuf));
+		goto error;
+	}
+
+	/*
+	 * Set non-blocking and register for read
+	 * notifications with the event loop
+	 */
+	if (ioctl(be->fd, FIONBIO, &opt) < 0) {
+		WPRINTF(("tap device O_NONBLOCK failed"));
+		goto error;
+	}
+
+#ifndef WITHOUT_CAPSICUM
+	cap_rights_init(&rights, CAP_EVENT, CAP_READ, CAP_WRITE);
+	if (caph_rights_limit(be->fd, &rights) == -1)
+		errx(EX_OSERR, "Unable to apply rights for sandbox");
+#endif
+
+	memset(priv->bbuf, 0, sizeof(priv->bbuf));
+	priv->bbuflen = 0;
+
+	priv->mevp = mevent_add_disabled(be->fd, EVF_READ, cb, param);
+	if (priv->mevp == NULL) {
+		WPRINTF(("Could not register event"));
+		goto error;
+	}
+
+	return (0);
+
+error:
+	tap_cleanup(be);
+	return (-1);
+}
+
+/*
+ * Called to send a buffer chain out to the tap device
+ */
+static ssize_t
+tap_send(struct net_backend *be, const struct iovec *iov, int iovcnt)
+{
+	return (writev(be->fd, iov, iovcnt));
+}
+
+static ssize_t
+tap_peek_recvlen(struct net_backend *be)
+{
+	struct tap_priv *priv = (struct tap_priv *)be->opaque;
+	ssize_t ret;
+
+	if (priv->bbuflen > 0) {
+		/*
+		 * We already have a packet in the bounce buffer.
+		 * Just return its length.
+		 */
+		return priv->bbuflen;
+	}
+
+	/*
+	 * Read the next packet (if any) into the bounce buffer, so
+	 * that we get to know its length and we can return that
+	 * to the caller.
+	 */
+	ret = read(be->fd, priv->bbuf, sizeof(priv->bbuf));
+	if (ret < 0 && errno == EWOULDBLOCK) {
+		return (0);
+	}
+
+	if (ret > 0)
+		priv->bbuflen = ret;
+
+	return (ret);
+}
+
+static ssize_t
+tap_recv(struct net_backend *be, const struct iovec *iov, int iovcnt)
+{
+	struct tap_priv *priv = (struct tap_priv *)be->opaque;
+	ssize_t ret;
+
+	if (priv->bbuflen > 0) {
+		/*
+		 * A packet is available in the bounce buffer, so
+		 * we read it from there.
+		 */
+		ret = buf_to_iov(priv->bbuf, priv->bbuflen,
+		    iov, iovcnt, 0);
+
+		/* Mark the bounce buffer as empty. */
+		priv->bbuflen = 0;
+
+		return (ret);
+	}
+
+	ret = readv(be->fd, iov, iovcnt);
+	if (ret < 0 && errno == EWOULDBLOCK) {
+		return (0);
+	}
+
+	return (ret);
+}
+
+static void
+tap_recv_enable(struct net_backend *be)
+{
+	struct tap_priv *priv = (struct tap_priv *)be->opaque;
+
+	mevent_enable(priv->mevp);
+}
+
+static void
+tap_recv_disable(struct net_backend *be)
+{
+	struct tap_priv *priv = (struct tap_priv *)be->opaque;
+
+	mevent_disable(priv->mevp);
+}
+
+static uint64_t
+tap_get_cap(struct net_backend *be)
+{
+
+	return (0); /* no capabilities for now */
+}
+
+static int
+tap_set_cap(struct net_backend *be, uint64_t features,
+		unsigned vnet_hdr_len)
+{
+
+	return ((features || vnet_hdr_len) ? -1 : 0);
+}
+
+static struct net_backend tap_backend = {
+	.prefix = "tap",
+	.priv_size = sizeof(struct tap_priv),
+	.init = tap_init,
+	.cleanup = tap_cleanup,
+	.send = tap_send,
+	.peek_recvlen = tap_peek_recvlen,
+	.recv = tap_recv,
+	.recv_enable = tap_recv_enable,
+	.recv_disable = tap_recv_disable,
+	.get_cap = tap_get_cap,
+	.set_cap = tap_set_cap,
+};
+
+/* A clone of the tap backend, with a different prefix. */
+static struct net_backend vmnet_backend = {
+	.prefix = "vmnet",
+	.priv_size = sizeof(struct tap_priv),
+	.init = tap_init,
+	.cleanup = tap_cleanup,
+	.send = tap_send,
+	.peek_recvlen = tap_peek_recvlen,
+	.recv = tap_recv,
+	.recv_enable = tap_recv_enable,
+	.recv_disable = tap_recv_disable,
+	.get_cap = tap_get_cap,
+	.set_cap = tap_set_cap,
+};
+
+DATA_SET(net_backend_set, tap_backend);
+DATA_SET(net_backend_set, vmnet_backend);
+
+#ifdef NETGRAPH
+
+/*
+ * Netgraph backend
+ */
+
+#define NG_SBUF_MAX_SIZE (4 * 1024 * 1024)
+
+static int
+ng_init(struct net_backend *be, const char *devname,
+	 const char *opts, net_be_rxeof_t cb, void *param)
+{
+	struct tap_priv *p = (struct tap_priv *)be->opaque;
+	struct ngm_connect ngc;
+	char *ngopts, *tofree;
+	char nodename[NG_NODESIZ];
+	int sbsz;
+	int ctrl_sock;
+	int flags;
+	int path_provided;
+	int peerhook_provided;
+	int socket_provided;
+	unsigned long maxsbsz;
+	size_t msbsz;
+#ifndef WITHOUT_CAPSICUM
+	cap_rights_t rights;
+#endif
+
+	if (cb == NULL) {
+		WPRINTF(("Netgraph backend requires non-NULL callback"));
+		return (-1);
+	}
+
+	be->fd = -1;
+
+	memset(&ngc, 0, sizeof(ngc));
+
+	strncpy(ngc.ourhook, "vmlink", NG_HOOKSIZ - 1);
+
+	tofree = ngopts = strdup(opts);
+
+	if (ngopts == NULL) {
+		WPRINTF(("strdup error"));
+		return (-1);
+	}
+
+	socket_provided = 0;
+	path_provided = 0;
+	peerhook_provided = 0;
+
+	while (ngopts != NULL) {
+		char *value = ngopts;
+		char *key;
+
+		key = strsep(&value, "=");
+		if (value == NULL)
+			break;
+		ngopts = value;
+		(void) strsep(&ngopts, ",");
+
+		if (strcmp(key, "socket") == 0) {
+			strncpy(nodename, value, NG_NODESIZ - 1);
+			socket_provided = 1;
+		} else if (strcmp(key, "path") == 0) {
+			strncpy(ngc.path, value, NG_PATHSIZ - 1);
+			path_provided = 1;
+		} else if (strcmp(key, "hook") == 0) {
+			strncpy(ngc.ourhook, value, NG_HOOKSIZ - 1);
+		} else if (strcmp(key, "peerhook") == 0) {
+			strncpy(ngc.peerhook, value, NG_HOOKSIZ - 1);
+			peerhook_provided = 1;
+		}
+	}
+
+	free(tofree);
+
+	if (!path_provided) {
+		WPRINTF(("path must be provided"));
+		return (-1);
+	}
+
+	if (!peerhook_provided) {
+		WPRINTF(("peer hook must be provided"));
+		return (-1);
+	}
+
+	if (NgMkSockNode(socket_provided ? nodename : NULL,
+		&ctrl_sock, &be->fd) < 0) {
+		WPRINTF(("can't get Netgraph sockets"));
+		return (-1);
+	}
+
+	if (NgSendMsg(ctrl_sock, ".",
+		NGM_GENERIC_COOKIE,
+		NGM_CONNECT, &ngc, sizeof(ngc)) < 0) {
+		WPRINTF(("can't connect to node"));
+		close(ctrl_sock);
+		goto error;
+	}
+
+	close(ctrl_sock);
+
+	flags = fcntl(be->fd, F_GETFL);
+
+	if (flags < 0) {
+		WPRINTF(("can't get socket flags"));
+		goto error;
+	}
+
+	if (fcntl(be->fd, F_SETFL, flags | O_NONBLOCK) < 0) {
+		WPRINTF(("can't set O_NONBLOCK flag"));
+		goto error;
+	}
+
+	/*
+	 * The default ng_socket(4) buffer's size is too low.
+	 * Calculate the minimum value between NG_SBUF_MAX_SIZE
+	 * and kern.ipc.maxsockbuf. 
+	 */
+	msbsz = sizeof(maxsbsz);
+	if (sysctlbyname("kern.ipc.maxsockbuf", &maxsbsz, &msbsz,
+		NULL, 0) < 0) {
+		WPRINTF(("can't get 'kern.ipc.maxsockbuf' value"));
+		goto error;
+	}
+
+	/*
+	 * We can't set the socket buffer size to kern.ipc.maxsockbuf value,
+	 * as it takes into account the mbuf(9) overhead.
+	 */
+	maxsbsz = maxsbsz * MCLBYTES / (MSIZE + MCLBYTES);
+
+	sbsz = MIN(NG_SBUF_MAX_SIZE, maxsbsz);
+
+	if (setsockopt(be->fd, SOL_SOCKET, SO_SNDBUF, &sbsz,
+		sizeof(sbsz)) < 0) {
+		WPRINTF(("can't set TX buffer size"));
+		goto error;
+	}
+
+	if (setsockopt(be->fd, SOL_SOCKET, SO_RCVBUF, &sbsz,
+		sizeof(sbsz)) < 0) {
+		WPRINTF(("can't set RX buffer size"));
+		goto error;
+	}
+
+#ifndef WITHOUT_CAPSICUM
+	cap_rights_init(&rights, CAP_EVENT, CAP_READ, CAP_WRITE);
+	if (caph_rights_limit(be->fd, &rights) == -1)
+		errx(EX_OSERR, "Unable to apply rights for sandbox");
+#endif
+
+	memset(p->bbuf, 0, sizeof(p->bbuf));
+	p->bbuflen = 0;
+
+	p->mevp = mevent_add_disabled(be->fd, EVF_READ, cb, param);
+	if (p->mevp == NULL) {
+		WPRINTF(("Could not register event"));
+		goto error;
+	}
+
+	return (0);
+
+error:
+	tap_cleanup(be);
+	return (-1);
+}
+
+static struct net_backend ng_backend = {
+	.prefix = "netgraph",
+	.priv_size = sizeof(struct tap_priv),
+	.init = ng_init,
+	.cleanup = tap_cleanup,
+	.send = tap_send,
+	.peek_recvlen = tap_peek_recvlen,
+	.recv = tap_recv,
+	.recv_enable = tap_recv_enable,
+	.recv_disable = tap_recv_disable,
+	.get_cap = tap_get_cap,
+	.set_cap = tap_set_cap,
+};
+
+DATA_SET(net_backend_set, ng_backend);
+
+#endif /* NETGRAPH */
+
+/*
+ * The netmap backend
+ */
+
+/* The virtio-net features supported by netmap. */
+#define NETMAP_FEATURES (VIRTIO_NET_F_CSUM | VIRTIO_NET_F_HOST_TSO4 | \
+		VIRTIO_NET_F_HOST_TSO6 | VIRTIO_NET_F_HOST_UFO | \
+		VIRTIO_NET_F_GUEST_CSUM | VIRTIO_NET_F_GUEST_TSO4 | \
+		VIRTIO_NET_F_GUEST_TSO6 | VIRTIO_NET_F_GUEST_UFO)
+
+struct netmap_priv {
+	char ifname[IFNAMSIZ];
+	struct nm_desc *nmd;
+	uint16_t memid;
+	struct netmap_ring *rx;
+	struct netmap_ring *tx;
+	struct mevent *mevp;
+	net_be_rxeof_t cb;
+	void *cb_param;
+};
+
+static void
+nmreq_init(struct nmreq *req, char *ifname)
+{
+
+	memset(req, 0, sizeof(*req));
+	strlcpy(req->nr_name, ifname, sizeof(req->nr_name));
+	req->nr_version = NETMAP_API;
+}
+
+static int
+netmap_set_vnet_hdr_len(struct net_backend *be, int vnet_hdr_len)
+{
+	int err;
+	struct nmreq req;
+	struct netmap_priv *priv = (struct netmap_priv *)be->opaque;
+
+	nmreq_init(&req, priv->ifname);
+	req.nr_cmd = NETMAP_BDG_VNET_HDR;
+	req.nr_arg1 = vnet_hdr_len;
+	err = ioctl(be->fd, NIOCREGIF, &req);
+	if (err) {
+		WPRINTF(("Unable to set vnet header length %d",
+				vnet_hdr_len));
+		return (err);
+	}
+
+	be->be_vnet_hdr_len = vnet_hdr_len;
+
+	return (0);
+}
+
+static int
+netmap_has_vnet_hdr_len(struct net_backend *be, unsigned vnet_hdr_len)
+{
+	int prev_hdr_len = be->be_vnet_hdr_len;
+	int ret;
+
+	if (vnet_hdr_len == prev_hdr_len) {
+		return (1);
+	}
+
+	ret = netmap_set_vnet_hdr_len(be, vnet_hdr_len);
+	if (ret) {
+		return (0);
+	}
+
+	netmap_set_vnet_hdr_len(be, prev_hdr_len);
+
+	return (1);
+}
+
+static uint64_t
+netmap_get_cap(struct net_backend *be)
+{
+
+	return (netmap_has_vnet_hdr_len(be, VNET_HDR_LEN) ?
+	    NETMAP_FEATURES : 0);
+}
+
+static int
+netmap_set_cap(struct net_backend *be, uint64_t features,
+	       unsigned vnet_hdr_len)
+{
+
+	return (netmap_set_vnet_hdr_len(be, vnet_hdr_len));
+}
+
+static int
+netmap_init(struct net_backend *be, const char *devname,
+	    const char *opts, net_be_rxeof_t cb, void *param)
+{
+	struct netmap_priv *priv = (struct netmap_priv *)be->opaque;
+
+	strlcpy(priv->ifname, devname, sizeof(priv->ifname));
+	priv->ifname[sizeof(priv->ifname) - 1] = '\0';
+
+	priv->nmd = nm_open(priv->ifname, NULL, NETMAP_NO_TX_POLL, NULL);
+	if (priv->nmd == NULL) {
+		WPRINTF(("Unable to nm_open(): interface '%s', errno (%s)",
+			devname, strerror(errno)));
+		free(priv);
+		return (-1);
+	}
+
+	priv->memid = priv->nmd->req.nr_arg2;
+	priv->tx = NETMAP_TXRING(priv->nmd->nifp, 0);
+	priv->rx = NETMAP_RXRING(priv->nmd->nifp, 0);
+	priv->cb = cb;
+	priv->cb_param = param;
+	be->fd = priv->nmd->fd;
+
+	priv->mevp = mevent_add_disabled(be->fd, EVF_READ, cb, param);
+	if (priv->mevp == NULL) {
+		WPRINTF(("Could not register event"));
+		return (-1);
+	}
+
+	return (0);
+}
+
+static void
+netmap_cleanup(struct net_backend *be)
+{
+	struct netmap_priv *priv = (struct netmap_priv *)be->opaque;
+
+	if (priv->mevp) {
+		mevent_delete(priv->mevp);
+	}
+	if (priv->nmd) {
+		nm_close(priv->nmd);
+	}
+	be->fd = -1;
+}
+
+static ssize_t
+netmap_send(struct net_backend *be, const struct iovec *iov,
+	    int iovcnt)
+{
+	struct netmap_priv *priv = (struct netmap_priv *)be->opaque;
+	struct netmap_ring *ring;
+	ssize_t totlen = 0;
+	int nm_buf_size;
+	int nm_buf_len;
+	uint32_t head;
+	void *nm_buf;
+	int j;
+
+	ring = priv->tx;
+	head = ring->head;
+	if (head == ring->tail) {
+		WPRINTF(("No space, drop %zu bytes", count_iov(iov, iovcnt)));
+		goto txsync;
+	}
+	nm_buf = NETMAP_BUF(ring, ring->slot[head].buf_idx);
+	nm_buf_size = ring->nr_buf_size;
+	nm_buf_len = 0;
+
+	for (j = 0; j < iovcnt; j++) {
+		int iov_frag_size = iov[j].iov_len;
+		void *iov_frag_buf = iov[j].iov_base;
+
+		totlen += iov_frag_size;
+
+		/*
+		 * Split each iovec fragment over more netmap slots, if
+		 * necessary.
+		 */
+		for (;;) {
+			int copylen;
+
+			copylen = iov_frag_size < nm_buf_size ? iov_frag_size : nm_buf_size;
+			memcpy(nm_buf, iov_frag_buf, copylen);
+
+			iov_frag_buf += copylen;
+			iov_frag_size -= copylen;
+			nm_buf += copylen;
+			nm_buf_size -= copylen;
+			nm_buf_len += copylen;
+
+			if (iov_frag_size == 0) {
+				break;
+			}
+
+			ring->slot[head].len = nm_buf_len;
+			ring->slot[head].flags = NS_MOREFRAG;
+			head = nm_ring_next(ring, head);
+			if (head == ring->tail) {
+				/*
+				 * We ran out of netmap slots while
+				 * splitting the iovec fragments.
+				 */
+				WPRINTF(("No space, drop %zu bytes",
+				   count_iov(iov, iovcnt)));
+				goto txsync;
+			}
+			nm_buf = NETMAP_BUF(ring, ring->slot[head].buf_idx);
+			nm_buf_size = ring->nr_buf_size;
+			nm_buf_len = 0;
+		}
+	}
+
+	/* Complete the last slot, which must not have NS_MOREFRAG set. */
+	ring->slot[head].len = nm_buf_len;
+	ring->slot[head].flags = 0;
+	head = nm_ring_next(ring, head);
+
+	/* Now update ring->head and ring->cur. */
+	ring->head = ring->cur = head;
+txsync:
+	ioctl(be->fd, NIOCTXSYNC, NULL);
+
+	return (totlen);
+}
+
+static ssize_t
+netmap_peek_recvlen(struct net_backend *be)
+{
+	struct netmap_priv *priv = (struct netmap_priv *)be->opaque;
+	struct netmap_ring *ring = priv->rx;
+	uint32_t head = ring->head;
+	ssize_t totlen = 0;
+
+	while (head != ring->tail) {
+		struct netmap_slot *slot = ring->slot + head;
+
+		totlen += slot->len;
+		if ((slot->flags & NS_MOREFRAG) == 0)
+			break;
+		head = nm_ring_next(ring, head);
+	}
+
+	return (totlen);
+}
+
+static ssize_t
+netmap_recv(struct net_backend *be, const struct iovec *iov, int iovcnt)
+{
+	struct netmap_priv *priv = (struct netmap_priv *)be->opaque;
+	struct netmap_slot *slot = NULL;
+	struct netmap_ring *ring;
+	void *iov_frag_buf;
+	int iov_frag_size;
+	ssize_t totlen = 0;
+	uint32_t head;
+
+	assert(iovcnt);
+
+	ring = priv->rx;
+	head = ring->head;
+	iov_frag_buf = iov->iov_base;
+	iov_frag_size = iov->iov_len;
+
+	do {
+		int nm_buf_len;
+		void *nm_buf;
+
+		if (head == ring->tail) {
+			return (0);
+		}
+
+		slot = ring->slot + head;
+		nm_buf = NETMAP_BUF(ring, slot->buf_idx);
+		nm_buf_len = slot->len;
+
+		for (;;) {
+			int copylen = nm_buf_len < iov_frag_size ?
+			    nm_buf_len : iov_frag_size;
+
+			memcpy(iov_frag_buf, nm_buf, copylen);
+			nm_buf += copylen;
+			nm_buf_len -= copylen;
+			iov_frag_buf += copylen;
+			iov_frag_size -= copylen;
+			totlen += copylen;
+
+			if (nm_buf_len == 0) {
+				break;
+			}
+
+			iov++;
+			iovcnt--;
+			if (iovcnt == 0) {
+				/* No space to receive. */
+				WPRINTF(("Short iov, drop %zd bytes",
+				    totlen));
+				return (-ENOSPC);
+			}
+			iov_frag_buf = iov->iov_base;
+			iov_frag_size = iov->iov_len;
+		}
+
+		head = nm_ring_next(ring, head);
+
+	} while (slot->flags & NS_MOREFRAG);
+
+	/* Release slots to netmap. */
+	ring->head = ring->cur = head;
+
+	return (totlen);
+}
+
+static void
+netmap_recv_enable(struct net_backend *be)
+{
+	struct netmap_priv *priv = (struct netmap_priv *)be->opaque;
+
+	mevent_enable(priv->mevp);
+}
+
+static void
+netmap_recv_disable(struct net_backend *be)
+{
+	struct netmap_priv *priv = (struct netmap_priv *)be->opaque;
+
+	mevent_disable(priv->mevp);
+}
+
+static struct net_backend netmap_backend = {
+	.prefix = "netmap",
+	.priv_size = sizeof(struct netmap_priv),
+	.init = netmap_init,
+	.cleanup = netmap_cleanup,
+	.send = netmap_send,
+	.peek_recvlen = netmap_peek_recvlen,
+	.recv = netmap_recv,
+	.recv_enable = netmap_recv_enable,
+	.recv_disable = netmap_recv_disable,
+	.get_cap = netmap_get_cap,
+	.set_cap = netmap_set_cap,
+};
+
+/* A clone of the netmap backend, with a different prefix. */
+static struct net_backend vale_backend = {
+	.prefix = "vale",
+	.priv_size = sizeof(struct netmap_priv),
+	.init = netmap_init,
+	.cleanup = netmap_cleanup,
+	.send = netmap_send,
+	.peek_recvlen = netmap_peek_recvlen,
+	.recv = netmap_recv,
+	.recv_enable = netmap_recv_enable,
+	.recv_disable = netmap_recv_disable,
+	.get_cap = netmap_get_cap,
+	.set_cap = netmap_set_cap,
+};
+
+DATA_SET(net_backend_set, netmap_backend);
+DATA_SET(net_backend_set, vale_backend);
+
+/*
+ * Initialize a backend and attach to the frontend.
+ * This is called during frontend initialization.
+ *  @pbe is a pointer to the backend to be initialized
+ *  @devname is the backend-name as supplied on the command line,
+ * 	e.g. -s 2:0,frontend-name,backend-name[,other-args]
+ *  @cb is the receive callback supplied by the frontend,
+ *	and it is invoked in the event loop when a receive
+ *	event is generated in the hypervisor,
+ *  @param is a pointer to the frontend, and normally used as
+ *	the argument for the callback.
+ */
+int
+netbe_init(struct net_backend **ret, const char *opts, net_be_rxeof_t cb,
+    void *param)
+{
+	struct net_backend **pbe, *nbe, *tbe = NULL;
+	char *devname;
+	char *options;
+	int err;
+
+	devname = options = strdup(opts);
+
+	if (devname == NULL) {
+		return (-1);
+	}
+
+	devname = strsep(&options, ",");
+
+	/*
+	 * Find the network backend that matches the user-provided
+	 * device name. net_backend_set is built using a linker set.
+	 */
+	SET_FOREACH(pbe, net_backend_set) {
+		if (strncmp(devname, (*pbe)->prefix,
+		    strlen((*pbe)->prefix)) == 0) {
+			tbe = *pbe;
+			assert(tbe->init != NULL);
+			assert(tbe->cleanup != NULL);
+			assert(tbe->send != NULL);
+			assert(tbe->recv != NULL);
+			assert(tbe->get_cap != NULL);
+			assert(tbe->set_cap != NULL);
+			break;
+		}
+	}
+
+	*ret = NULL;
+	if (tbe == NULL) {
+		free(devname);
+		return (EINVAL);
+	}
+
+	nbe = calloc(1, sizeof(*nbe) + tbe->priv_size);
+	*nbe = *tbe;	/* copy the template */
+	nbe->fd = -1;
+	nbe->sc = param;
+	nbe->be_vnet_hdr_len = 0;
+	nbe->fe_vnet_hdr_len = 0;
+
+	/* Initialize the backend. */
+	err = nbe->init(nbe, devname, options, cb, param);
+	if (err) {
+		free(devname);
+		free(nbe);
+		return (err);
+	}
+
+	*ret = nbe;
+	free(devname);
+
+	return (0);
+}
+
+void
+netbe_cleanup(struct net_backend *be)
+{
+
+	if (be != NULL) {
+		be->cleanup(be);
+		free(be);
+	}
+}
+
+uint64_t
+netbe_get_cap(struct net_backend *be)
+{
+
+	assert(be != NULL);
+	return (be->get_cap(be));
+}
+
+int
+netbe_set_cap(struct net_backend *be, uint64_t features,
+	      unsigned vnet_hdr_len)
+{
+	int ret;
+
+	assert(be != NULL);
+
+	/* There are only three valid lengths, i.e., 0, 10 and 12. */
+	if (vnet_hdr_len && vnet_hdr_len != VNET_HDR_LEN
+		&& vnet_hdr_len != (VNET_HDR_LEN - sizeof(uint16_t)))
+		return (-1);
+
+	be->fe_vnet_hdr_len = vnet_hdr_len;
+
+	ret = be->set_cap(be, features, vnet_hdr_len);
+	assert(be->be_vnet_hdr_len == 0 ||
+	       be->be_vnet_hdr_len == be->fe_vnet_hdr_len);
+
+	return (ret);
+}
+
+ssize_t
+netbe_send(struct net_backend *be, const struct iovec *iov, int iovcnt)
+{
+
+	return (be->send(be, iov, iovcnt));
+}
+
+ssize_t
+netbe_peek_recvlen(struct net_backend *be)
+{
+
+	return (be->peek_recvlen(be));
+}
+
+/*
+ * Try to read a packet from the backend, without blocking.
+ * If no packets are available, return 0. In case of success, return
+ * the length of the packet just read. Return -1 in case of errors.
+ */
+ssize_t
+netbe_recv(struct net_backend *be, const struct iovec *iov, int iovcnt)
+{
+
+	return (be->recv(be, iov, iovcnt));
+}
+
+/*
+ * Read a packet from the backend and discard it.
+ * Returns the size of the discarded packet or zero if no packet was available.
+ * A negative error code is returned in case of read error.
+ */
+ssize_t
+netbe_rx_discard(struct net_backend *be)
+{
+	/*
+	 * MP note: the dummybuf is only used to discard frames,
+	 * so there is no need for it to be per-vtnet or locked.
+	 * We only make it large enough for TSO-sized segment.
+	 */
+	static uint8_t dummybuf[65536 + 64];
+	struct iovec iov;
+
+	iov.iov_base = dummybuf;
+	iov.iov_len = sizeof(dummybuf);
+
+	return netbe_recv(be, &iov, 1);
+}
+
+void
+netbe_rx_disable(struct net_backend *be)
+{
+
+	return be->recv_disable(be);
+}
+
+void
+netbe_rx_enable(struct net_backend *be)
+{
+
+	return be->recv_enable(be);
+}
+
+size_t
+netbe_get_vnet_hdr_len(struct net_backend *be)
+{
+
+	return (be->be_vnet_hdr_len);
+}
Index: usr.sbin/bhyve/mmio/net_utils.h
===================================================================
--- /dev/null
+++ usr.sbin/bhyve/mmio/net_utils.h
@@ -0,0 +1,39 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2019 Vincenzo Maffione <v.maffione@gmail.com>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
+ * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
+ * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
+ * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _NET_UTILS_H_
+#define _NET_UTILS_H_
+
+#include <stdint.h>
+#include "mmio_emul.h"
+
+void	net_genmac(struct mmio_devinst *pi, uint8_t *macaddr);
+int	net_parsemac(char *mac_str, uint8_t *mac_addr);
+
+#endif /* _NET_UTILS_H_ */
Index: usr.sbin/bhyve/mmio/net_utils.c
===================================================================
--- /dev/null
+++ usr.sbin/bhyve/mmio/net_utils.c
@@ -0,0 +1,90 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2011 NetApp, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
+ * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
+ * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
+ * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/types.h>
+#include <net/ethernet.h>
+
+#include <errno.h>
+#include <md5.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "../arm64/bhyverun.h"
+#include "debug.h"
+#include "net_utils.h"
+
+int
+net_parsemac(char *mac_str, uint8_t *mac_addr)
+{
+        struct ether_addr *ea;
+        char *tmpstr;
+        char zero_addr[ETHER_ADDR_LEN] = { 0, 0, 0, 0, 0, 0 };
+
+        tmpstr = strsep(&mac_str,"=");
+
+        if ((mac_str != NULL) && (!strcmp(tmpstr,"mac"))) {
+                ea = ether_aton(mac_str);
+
+                if (ea == NULL || ETHER_IS_MULTICAST(ea->octet) ||
+                    memcmp(ea->octet, zero_addr, ETHER_ADDR_LEN) == 0) {
+			EPRINTLN("Invalid MAC %s", mac_str);
+                        return (EINVAL);
+                } else
+                        memcpy(mac_addr, ea->octet, ETHER_ADDR_LEN);
+        }
+
+        return (0);
+}
+
+void
+net_genmac(struct mmio_devinst *pi, uint8_t *macaddr)
+{
+	/*
+	 * The default MAC address is the standard NetApp OUI of 00-a0-98,
+	 * followed by an MD5 of the PCI slot/func number and dev name
+	 */
+	MD5_CTX mdctx;
+	unsigned char digest[16];
+	char nstr[80];
+
+	snprintf(nstr, sizeof(nstr), "%d-%d-%s", pi->pi_slot,
+	    pi->di_func, vmname);
+
+	MD5Init(&mdctx);
+	MD5Update(&mdctx, nstr, (unsigned int)strlen(nstr));
+	MD5Final(digest, &mdctx);
+
+	macaddr[0] = 0x00;
+	macaddr[1] = 0xa0;
+	macaddr[2] = 0x98;
+	macaddr[3] = digest[0];
+	macaddr[4] = digest[1];
+	macaddr[5] = digest[2];
+}
Index: usr.sbin/bhyve/mmio/pl011.c
===================================================================
--- /dev/null
+++ usr.sbin/bhyve/mmio/pl011.c
@@ -0,0 +1,384 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2020 Andrew Turner
+ *
+ * This work was supported by Innovate UK project 105694, "Digital Security
+ * by Design (DSbD) Technology Platform Prototype".
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+
+#include <assert.h>
+#include <pthread.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "mevent.h"
+#include "uart_backend.h"
+#include "uart_emul.h"
+
+#define	UART_FIFO_SIZE		16
+
+#define	UARTDR			0x00
+#define	UARTDR_RSR_SHIFT	8
+
+#define	UARTRSR			0x01
+#define	UARTRSR_OE		(1 << 3)
+
+#define	UARTFR			0x06
+#define	UARTFR_TXFE		(1 << 7)
+#define	UARTFR_RXFF		(1 << 6)
+#define	UARTFR_TXFF		(1 << 5)
+#define	UARTFR_RXFE		(1 << 4)
+
+#define	UARTRTINTR		(1 << 6)
+#define	UARTTXINTR		(1 << 5)
+#define	UARTRXINTR		(1 << 4)
+
+#define	UARTIBRD		0x09
+
+#define	UARTFBRD		0x0a
+#define	UARTFBRD_MASK		0x003f
+
+#define	UARTLCR_H		0x0b
+#define	UARTLCR_H_MASK		0x00ff
+#define	UARTLCR_H_FEN		(1 << 4)
+
+#define	UARTCR			0x0c
+/* TODO: Check the flags in the UARTCR register */
+#define	UARTCR_MASK		0xffc7
+#define	UARTCR_LBE		(1 << 7)
+
+#define	UARTIFLS		0x0d
+#define	UARTIFLS_MASK		0x003f
+#define	UARTIFLS_RXIFLSEL(x)	(((x) >> 3) & 0x7)
+#define	UARTIFLS_TXIFLSEL(x)	(((x) >> 0) & 0x7)
+
+#define	UARTIMSC		0x0e
+#define	UARTIMSC_MASK		0x07ff
+
+#define	UARTRIS			0x0f
+#define	UARTMIS			0x10
+
+#define	UARTICR			0x11
+
+
+#define	UARTPeriphID		0x00241011
+#define	UARTPeriphID0		0x3f8
+#define	UARTPeriphID0_VAL	(((UARTPeriphID) >>  0) & 0xff)
+#define	UARTPeriphID1		0x3f9
+#define	UARTPeriphID1_VAL	(((UARTPeriphID) >>  8) & 0xff)
+#define	UARTPeriphID2		0x3fa
+#define	UARTPeriphID2_VAL	(((UARTPeriphID) >> 16) & 0xff)
+#define	UARTPeriphID3		0x3fb
+#define	UARTPeriphID3_VAL	(((UARTPeriphID) >> 24) & 0xff)
+
+#define	UARTPCellID		0xb105f00d
+#define	UARTPCellID0		0x3fc
+#define	UARTPCellID0_VAL	(((UARTPCellID) >>  0) & 0xff)
+#define	UARTPCellID1		0x3fd
+#define	UARTPCellID1_VAL	(((UARTPCellID) >>  8) & 0xff)
+#define	UARTPCellID2		0x3fe
+#define	UARTPCellID2_VAL	(((UARTPCellID) >> 16) & 0xff)
+#define	UARTPCellID3		0x3ff
+#define	UARTPCellID3_VAL	(((UARTPCellID) >> 24) & 0xff)
+
+static void
+uart_reset(struct uart_softc *sc)
+{
+
+	sc->ifls = 0x12;
+
+	/* no fifo until enabled by software */
+	uart_rxfifo_reset(sc->backend, 1);
+}
+
+static int
+uart_rx_trigger_level(struct uart_softc *sc)
+{
+
+	/* If the FIFO is disabled trigger when we have any data */
+	if ((sc->lcr_h & UARTLCR_H_FEN) != 0)
+		return (1);
+
+	/* Trigger base on how full the fifo is */
+	switch(UARTIFLS_RXIFLSEL(sc->ifls)) {
+	case 0:
+		return (UART_FIFO_SIZE / 8);
+	case 1:
+		return (UART_FIFO_SIZE / 4);
+	case 2:
+		return (UART_FIFO_SIZE / 2);
+	case 3:
+		return (UART_FIFO_SIZE * 3 / 4);
+	case 4:
+		return (UART_FIFO_SIZE * 7 / 8);
+	default:
+		/* TODO: Find out what happens in this case */
+		return (UART_FIFO_SIZE);
+	}
+}
+
+static void
+uart_toggle_intr(struct uart_softc *sc)
+{
+	if ((sc->irq_state & sc->imsc) == 0)
+		(*sc->intr_deassert)(sc->arg, sc->irqno);
+	else
+		(*sc->intr_assert)(sc->arg, sc->irqno);
+}
+
+static void
+uart_drain(int fd, enum ev_type ev, void *arg)
+{
+	struct uart_softc *sc;
+	int old_size, trig_lvl;
+	bool loopback;
+
+	sc = arg;
+
+	assert(ev == EVF_READ);
+	
+	/*
+	 * This routine is called in the context of the mevent thread
+	 * to take out the softc lock to protect against concurrent
+	 * access from a vCPU i/o exit
+	 */
+	pthread_mutex_lock(&sc->mtx);
+
+	old_size = uart_rxfifo_numchars(sc->backend);
+
+	loopback = (sc->cr & UARTCR_LBE) != 0;
+	uart_rxfifo_drain(sc->backend, loopback);
+
+	/* If we cross the trigger level raise UARTRXINTR */
+	trig_lvl = uart_rx_trigger_level(sc);
+	if (old_size < trig_lvl &&
+	    uart_rxfifo_numchars(sc->backend) >= trig_lvl)
+		sc->irq_state |= UARTRXINTR;
+
+	if (uart_rxfifo_numchars(sc->backend) > 0)
+		sc->irq_state |= UARTRTINTR;
+	if (!loopback)
+		uart_toggle_intr(sc);
+
+	pthread_mutex_unlock(&sc->mtx);
+}
+
+void
+uart_write(struct uart_softc *sc, int offset, uint32_t value)
+{
+	bool loopback;
+
+	pthread_mutex_lock(&sc->mtx);
+	switch (offset) {
+	case UARTDR:
+		loopback = (sc->cr & UARTCR_LBE) != 0;
+		if (!uart_rxfifo_write(sc->backend, loopback, value & 0xff))
+			sc->rsr |= UARTRSR_OE;
+
+		/* We don't have a TX fifo, so trigger when we have data */
+		sc->irq_state |= UARTTXINTR;
+		break;
+	case UARTRSR:
+		/* Any write clears this register */
+		sc->rsr = 0;
+		break;
+	case UARTFR:
+		/* UARTFR is a read-only register */
+		break;
+	/* TODO: UARTILPR */
+	case UARTIBRD:
+		sc->ibrd = value;
+		break;
+	case UARTFBRD:
+		sc->fbrd = value & UARTFBRD_MASK;
+		break;
+	case UARTLCR_H:
+		/* Check if the FIFO enable bit changed */
+		if (((sc->lcr_h ^ value) & UARTLCR_H_FEN) != 0) {
+			if ((value & UARTLCR_H_FEN) != 0) {
+				uart_rxfifo_reset(sc->backend, UART_FIFO_SIZE);
+			} else {
+				uart_rxfifo_reset(sc->backend, 1);
+			}
+		}
+		sc->lcr_h = value & UARTLCR_H_MASK;
+		break;
+	case UARTCR:
+		sc->cr = value & UARTCR_MASK;
+		break;
+	case UARTIFLS:
+		sc->ifls = value & UARTCR_MASK;
+		break;
+	case UARTIMSC:
+		sc->imsc = value & UARTIMSC_MASK;
+		break;
+	case UARTRIS:
+	case UARTMIS:
+		/* UARTRIS and UARTMIS are read-only registers */
+		break;
+	case UARTICR:
+		sc->irq_state &= ~value;
+		break;
+	default:
+		/* Ignore writes to unassigned/ID registers */
+		break;
+	}
+	uart_toggle_intr(sc);
+	pthread_mutex_unlock(&sc->mtx);
+}
+
+uint32_t
+uart_read(struct uart_softc *sc, int offset)
+{
+	uint32_t reg;
+	int fifo_sz;
+
+	reg = 0;
+	pthread_mutex_lock(&sc->mtx);
+	switch(offset) {
+	case UARTDR:
+		reg = uart_rxfifo_getchar(sc->backend);
+		/* Deassert the irq if below the trigger level */
+		fifo_sz = uart_rxfifo_numchars(sc->backend);
+		if (fifo_sz < uart_rx_trigger_level(sc))
+			sc->irq_state &= ~UARTRXINTR;
+		if (fifo_sz == 0)
+			sc->irq_state &= ~UARTRTINTR;
+
+		reg |= sc->rsr << UARTDR_RSR_SHIFT;
+
+		/* After reading from the fifo there is now space in it */
+		sc->rsr &= UARTRSR_OE;
+		break;
+	case UARTRSR:
+		/* Any write clears this register */
+		reg = sc->rsr;
+		break;
+	case UARTFR:
+		/* Transmit is intstant, so the fifo is always empty */
+		reg = UARTFR_TXFE;
+
+		/* Set the receive fifo full/empty flags */
+		fifo_sz = uart_rxfifo_numchars(sc->backend);
+		if (fifo_sz == UART_FIFO_SIZE)
+			reg |= UARTFR_RXFF;
+		else if (fifo_sz == 0)
+			reg |= UARTFR_RXFE;
+		break;
+	/* TODO: UARTILPR */
+	case UARTIBRD:
+		reg = sc->ibrd;
+		break;
+	case UARTFBRD:
+		reg = sc->fbrd;
+		break;
+	case UARTLCR_H:
+		reg = sc->lcr_h;
+		break;
+	case UARTCR:
+		reg = sc->cr;
+		break;
+	case UARTIMSC:
+		reg = sc->imsc;
+		break;
+	case UARTRIS:
+		reg = sc->irq_state;
+		break;
+	case UARTMIS:
+		reg = sc->irq_state & sc->imsc;
+		break;
+	case UARTICR:
+		reg = 0;
+		break;
+	case UARTPeriphID0:
+		reg = UARTPeriphID0_VAL;
+		break;
+	case UARTPeriphID1:
+		reg =UARTPeriphID1_VAL;
+		break;
+	case UARTPeriphID2:
+		reg = UARTPeriphID2_VAL;
+		break;
+	case UARTPeriphID3:
+		reg = UARTPeriphID3_VAL;
+		break;
+	case UARTPCellID0:
+		reg = UARTPCellID0_VAL;
+		break;
+	case UARTPCellID1:
+		reg = UARTPCellID1_VAL;
+		break;
+	case UARTPCellID2:
+		reg = UARTPCellID2_VAL;
+		break;
+	case UARTPCellID3:
+		reg = UARTPCellID3_VAL;
+		break;
+	default:
+		/* Return 0 in reads from unasigned registers */
+		reg = 0;
+		break;
+	}
+	uart_toggle_intr(sc);
+	pthread_mutex_unlock(&sc->mtx);
+
+	return (reg);
+}
+
+struct uart_softc *
+uart_init(uart_intr_func_t intr_assert, uart_intr_func_t intr_deassert,
+    void *arg)
+{
+	struct uart_softc *sc;
+
+	sc = calloc(1, sizeof(struct uart_softc));
+
+	sc->arg = arg;
+	sc->intr_assert = intr_assert;
+	sc->intr_deassert = intr_deassert;
+	sc->backend = uart_backend_alloc();
+
+	pthread_mutex_init(&sc->mtx, NULL);
+
+	uart_reset(sc);
+
+	return (sc);
+}
+
+int
+uart_set_backend(struct uart_softc *sc, const char *opts)
+{
+	int retval;
+
+	retval = uart_backend_open(sc->backend, opts, uart_drain, sc);
+	return (retval);
+}
+
Index: usr.sbin/bhyve/mmio/uart_backend.h
===================================================================
--- /dev/null
+++ usr.sbin/bhyve/mmio/uart_backend.h
@@ -0,0 +1,46 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2013 Neel Natu <neel@freebsd.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _UART_BACKEND_H_
+#define	_UART_BACKEND_H_
+
+struct uart_backend;
+
+struct uart_backend *uart_backend_alloc(void);
+int uart_backend_open(struct uart_backend *b, const char *opts,
+    void (*func)(int, enum ev_type, void *), void *arg);
+
+void uart_rxfifo_reset(struct uart_backend *b, int size);
+int uart_rxfifo_getchar(struct uart_backend *b);
+int uart_rxfifo_numchars(struct uart_backend *b);
+void uart_rxfifo_drain(struct uart_backend *b, bool loopback);
+bool uart_rxfifo_write(struct uart_backend *b, bool loopback, uint8_t ch);
+
+#endif
Index: usr.sbin/bhyve/mmio/uart_backend.c
===================================================================
--- /dev/null
+++ usr.sbin/bhyve/mmio/uart_backend.c
@@ -0,0 +1,351 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2012 NetApp, Inc.
+ * Copyright (c) 2013 Neel Natu <neel@freebsd.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/types.h>
+#ifndef WITHOUT_CAPSICUM
+#include <sys/capsicum.h>
+#include <capsicum_helpers.h>
+#endif
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <fcntl.h>
+#include <termios.h>
+#include <unistd.h>
+#include <stdbool.h>
+#include <string.h>
+#include <pthread.h>
+#include <sysexits.h>
+
+#include "mevent.h"
+#include "uart_emul.h"
+#include "debug.h"
+
+#define	FIFOSZ	16
+
+static bool uart_stdio;		/* stdio in use for i/o */
+static struct termios tio_stdio_orig;
+
+struct fifo {
+	uint8_t	buf[FIFOSZ];
+	int	rindex;		/* index to read from */
+	int	windex;		/* index to write to */
+	int	num;		/* number of characters in the fifo */
+	int	size;		/* size of the fifo */
+};
+
+struct ttyfd {
+	bool	opened;
+	int	rfd;		/* fd for reading */
+	int	wfd;		/* fd for writing, may be == rfd */
+};
+
+struct uart_backend {
+	struct fifo rxfifo;
+	struct mevent *mev;
+	struct ttyfd tty;
+};
+
+static void
+ttyclose(void)
+{
+
+	tcsetattr(STDIN_FILENO, TCSANOW, &tio_stdio_orig);
+}
+
+static void
+ttyopen(struct ttyfd *tf)
+{
+	struct termios orig, new;
+
+	tcgetattr(tf->rfd, &orig);
+	new = orig;
+	cfmakeraw(&new);
+	new.c_cflag |= CLOCAL;
+	tcsetattr(tf->rfd, TCSANOW, &new);
+	if (uart_stdio) {
+		tio_stdio_orig = orig;
+		atexit(ttyclose);
+	}
+	raw_stdio = 1;
+}
+
+static int
+ttyread(struct ttyfd *tf)
+{
+	unsigned char rb;
+
+	if (read(tf->rfd, &rb, 1) == 1)
+		return (rb);
+	else
+		return (-1);
+}
+
+static void
+ttywrite(struct ttyfd *tf, unsigned char wb)
+{
+	(void)write(tf->wfd, &wb, 1);
+}
+
+void
+uart_rxfifo_reset(struct uart_backend *sc, int size)
+{
+	char flushbuf[32];
+	struct fifo *fifo;
+	ssize_t nread;
+	int error;
+
+	fifo = &sc->rxfifo;
+	bzero(fifo, sizeof(struct fifo));
+	fifo->size = size;
+
+	if (sc->tty.opened) {
+		/*
+		 * Flush any unread input from the tty buffer.
+		 */
+		while (1) {
+			nread = read(sc->tty.rfd, flushbuf, sizeof(flushbuf));
+			if (nread != sizeof(flushbuf))
+				break;
+		}
+
+		/*
+		 * Enable mevent to trigger when new characters are available
+		 * on the tty fd.
+		 */
+		error = mevent_enable(sc->mev);
+		assert(error == 0);
+	}
+}
+
+static int
+rxfifo_available(struct uart_backend *sc)
+{
+	struct fifo *fifo;
+
+	fifo = &sc->rxfifo;
+	return (fifo->num < fifo->size);
+}
+
+static int
+rxfifo_putchar(struct uart_backend *sc, uint8_t ch)
+{
+	struct fifo *fifo;
+	int error;
+
+	fifo = &sc->rxfifo;
+
+	if (fifo->num < fifo->size) {
+		fifo->buf[fifo->windex] = ch;
+		fifo->windex = (fifo->windex + 1) % fifo->size;
+		fifo->num++;
+		if (!rxfifo_available(sc)) {
+			if (sc->tty.opened) {
+				/*
+				 * Disable mevent callback if the FIFO is full.
+				 */
+				error = mevent_disable(sc->mev);
+				assert(error == 0);
+			}
+		}
+		return (0);
+	} else
+		return (-1);
+}
+
+int
+uart_rxfifo_getchar(struct uart_backend *sc)
+{
+	struct fifo *fifo;
+	int c, error, wasfull;
+
+	wasfull = 0;
+	fifo = &sc->rxfifo;
+	if (fifo->num > 0) {
+		if (!rxfifo_available(sc))
+			wasfull = 1;
+		c = fifo->buf[fifo->rindex];
+		fifo->rindex = (fifo->rindex + 1) % fifo->size;
+		fifo->num--;
+		if (wasfull) {
+			if (sc->tty.opened) {
+				error = mevent_enable(sc->mev);
+				assert(error == 0);
+			}
+		}
+		return (c);
+	} else
+		return (-1);
+}
+
+int
+uart_rxfifo_numchars(struct uart_backend *sc)
+{
+	struct fifo *fifo = &sc->rxfifo;
+
+	return (fifo->num);
+}
+
+void
+uart_rxfifo_drain(struct uart_backend *b, bool loopback)
+{
+	int ch;
+
+	if (loopback) {
+		(void) ttyread(&b->tty);
+	} else {
+		while (rxfifo_available(b) &&
+		    ((ch = ttyread(&b->tty)) != -1)) {
+			rxfifo_putchar(b, ch);
+		}
+	}
+}
+
+bool
+uart_rxfifo_write(struct uart_backend *b, bool loopback, uint8_t ch)
+{
+	if (loopback) {
+		if (rxfifo_putchar(b, ch) != 0)
+			return (false);
+	} else if (b->tty.opened) {
+		ttywrite(&b->tty, ch);
+	} /* else drop on floor */
+
+	return (true);
+}
+
+static void
+uart_opentty(struct uart_backend *sc, void (*func)(int, enum ev_type, void *),
+    void *arg)
+{
+	ttyopen(&sc->tty);
+	sc->mev = mevent_add(sc->tty.rfd, EVF_READ, func, arg);
+	assert(sc->mev != NULL);
+}
+
+static int
+uart_stdio_backend(struct uart_backend *sc)
+{
+#ifndef WITHOUT_CAPSICUM
+	cap_rights_t rights;
+	cap_ioctl_t cmds[] = { TIOCGETA, TIOCSETA, TIOCGWINSZ };
+#endif
+
+	if (uart_stdio)
+		return (-1);
+
+	sc->tty.rfd = STDIN_FILENO;
+	sc->tty.wfd = STDOUT_FILENO;
+	sc->tty.opened = true;
+
+	if (fcntl(sc->tty.rfd, F_SETFL, O_NONBLOCK) != 0)
+		return (-1);
+	if (fcntl(sc->tty.wfd, F_SETFL, O_NONBLOCK) != 0)
+		return (-1);
+
+#ifndef WITHOUT_CAPSICUM
+	cap_rights_init(&rights, CAP_EVENT, CAP_IOCTL, CAP_READ);
+	if (caph_rights_limit(sc->tty.rfd, &rights) == -1)
+		errx(EX_OSERR, "Unable to apply rights for sandbox");
+	if (caph_ioctls_limit(sc->tty.rfd, cmds, nitems(cmds)) == -1)
+		errx(EX_OSERR, "Unable to apply rights for sandbox");
+#endif
+
+	uart_stdio = true;
+
+	return (0);
+}
+
+static int
+uart_tty_backend(struct uart_backend *sc, const char *opts)
+{
+#ifndef WITHOUT_CAPSICUM
+	cap_rights_t rights;
+	cap_ioctl_t cmds[] = { TIOCGETA, TIOCSETA, TIOCGWINSZ };
+#endif
+	int fd;
+
+	fd = open(opts, O_RDWR | O_NONBLOCK);
+	if (fd < 0)
+		return (-1);
+
+	if (!isatty(fd)) {
+		close(fd);
+		return (-1);
+	}
+
+	sc->tty.rfd = sc->tty.wfd = fd;
+	sc->tty.opened = true;
+
+#ifndef WITHOUT_CAPSICUM
+	cap_rights_init(&rights, CAP_EVENT, CAP_IOCTL, CAP_READ, CAP_WRITE);
+	if (caph_rights_limit(fd, &rights) == -1)
+		errx(EX_OSERR, "Unable to apply rights for sandbox");
+	if (caph_ioctls_limit(fd, cmds, nitems(cmds)) == -1)
+		errx(EX_OSERR, "Unable to apply rights for sandbox");
+#endif
+
+	return (0);
+}
+
+struct uart_backend *
+uart_backend_alloc(void)
+{
+	struct uart_backend *b;
+
+	b = calloc(1, sizeof(struct uart_backend));
+	assert(b != NULL);
+
+	return (b);
+}
+
+int
+uart_backend_open(struct uart_backend *b, const char *opts,
+    void (*func)(int, enum ev_type, void *), void *arg)
+{
+	int retval;
+
+	if (opts == NULL)
+		return (0);
+
+	if (strcmp("stdio", opts) == 0)
+		retval = uart_stdio_backend(b);
+	else
+		retval = uart_tty_backend(b, opts);
+	if (retval == 0)
+		uart_opentty(b, func, arg);
+
+	return (retval);
+}
Index: usr.sbin/bhyve/mmio/uart_emul.h
===================================================================
--- /dev/null
+++ usr.sbin/bhyve/mmio/uart_emul.h
@@ -0,0 +1,66 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2013 Neel Natu <neel@freebsd.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _UART_EMUL_H_
+#define	_UART_EMUL_H_
+
+typedef void (*uart_intr_func_t)(void *arg, uint32_t irq);
+
+struct uart_softc {
+	struct uart_backend *backend;
+	pthread_mutex_t mtx;	/* protects all softc elements */
+
+	uint16_t	irq_state;
+
+	uint16_t	rsr;
+
+	uint16_t	cr;
+	uint16_t	ifls;
+	uint16_t	imsc;
+	uint16_t	lcr_h;
+
+	uint16_t	ibrd;
+	uint16_t	fbrd;
+
+	void	*arg;
+	uint32_t irqno;
+	uart_intr_func_t intr_assert;
+	uart_intr_func_t intr_deassert;
+};
+
+
+struct uart_softc *uart_init(uart_intr_func_t intr_assert,
+		uart_intr_func_t intr_deassert, void *arg);
+
+int	uart_legacy_alloc(int unit, int *ioaddr, int *irq);
+uint32_t	uart_read(struct uart_softc *sc, int offset);
+void	uart_write(struct uart_softc *sc, int offset, uint32_t value);
+int	uart_set_backend(struct uart_softc *sc, const char *opt);
+#endif
Index: usr.sbin/bhyve/pci_ahci.c
===================================================================
--- usr.sbin/bhyve/pci_ahci.c
+++ usr.sbin/bhyve/pci_ahci.c
@@ -63,6 +63,10 @@
 #include "pci_emul.h"
 #include "ahci.h"
 #include "block_if.h"
+#include "bhyverun.h"
+#include "pci_emul.h"
+#include "ahci.h"
+#include "block_if.h"
 
 #define	DEF_PORTS	6	/* Intel ICH8 AHCI supports 6 ports */
 #define	MAX_PORTS	32	/* AHCI supports 32 ports */
Index: usr.sbin/bhyve/pci_virtio_net.c
===================================================================
--- usr.sbin/bhyve/pci_virtio_net.c
+++ usr.sbin/bhyve/pci_virtio_net.c
@@ -117,7 +117,7 @@
 	int		resetting;	/* protected by tx_mtx */
 
 	uint64_t	vsc_features;	/* negotiated features */
-	
+
 	pthread_mutex_t	rx_mtx;
 	int		rx_merge;	/* merged rx bufs in use */
 
Index: usr.sbin/bhyvectl/Makefile
===================================================================
--- usr.sbin/bhyvectl/Makefile
+++ usr.sbin/bhyvectl/Makefile
@@ -5,16 +5,13 @@
 .include <src.opts.mk>
 
 PROG=	bhyvectl
-SRCS=	bhyvectl.c
 PACKAGE=	bhyve
 
-MAN=	bhyvectl.8
-
 LIBADD=	vmmapi util
 
 WARNS?=	3
 
-CFLAGS+= -I${SRCTOP}/sys/amd64/vmm
+CFLAGS+= -I${SRCTOP}/sys/${MACHINE}/vmm
 
 .if ${MK_BHYVE_SNAPSHOT} != "no"
 CFLAGS+= -DBHYVE_SNAPSHOT
@@ -24,4 +21,5 @@
 CFLAGS+= -I${SRCTOP}/usr.sbin/bhyve
 .endif
 
+.include "${.CURDIR}/${MACHINE}/Makefile.inc"
 .include <bsd.prog.mk>
Index: usr.sbin/bhyvectl/amd64/Makefile.inc
===================================================================
--- /dev/null
+++ usr.sbin/bhyvectl/amd64/Makefile.inc
@@ -0,0 +1,7 @@
+#
+# $FreeBSD$
+#
+.PATH: ${.CURDIR}/amd64
+
+SRCS=	bhyvectl.c
+MAN=	bhyvectl.8
Index: usr.sbin/bhyvectl/arm64/Makefile.inc
===================================================================
--- /dev/null
+++ usr.sbin/bhyvectl/arm64/Makefile.inc
@@ -0,0 +1,7 @@
+#
+# $FreeBSD$
+#
+.PATH: ${.CURDIR}/arm64
+
+SRCS=	bhyvectl.c
+MAN=	bhyvectl.8
Index: usr.sbin/bhyvectl/arm64/bhyvectl.8
===================================================================
--- /dev/null
+++ usr.sbin/bhyvectl/arm64/bhyvectl.8
@@ -0,0 +1,97 @@
+.\" Copyright (c) 2015 Christian Brueffer
+.\" All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\"    notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\"    notice, this list of conditions and the following disclaimer in the
+.\"    documentation and/or other materials provided with the distribution.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" $FreeBSD$
+.\"
+.Dd November 13, 2016
+.Dt BHYVECTL 8
+.Os
+.Sh NAME
+.Nm bhyvectl
+.Nd "control utility for bhyve instances"
+.Sh SYNOPSIS
+.Nm
+.Fl -vm= Ns Ar <vmname>
+.Op Fl -create
+.Op Fl -destroy
+.Op Fl -get-stats
+.Op Fl -inject-nmi
+.Op Fl -force-reset
+.Op Fl -force-poweroff
+.Sh DESCRIPTION
+The
+.Nm
+command is a control utility for active
+.Xr bhyve 8
+virtual machine instances.
+.Pp
+.Em Note :
+Most
+.Nm
+flags are intended for querying and setting the state of an active instance.
+These commands are intended for development purposes, and are not documented here.
+A complete list can be obtained by executing
+.Nm
+without any arguments.
+.Pp
+The user-facing options are as follows:
+.Bl -tag -width ".Fl d Ar argument"
+.It Fl -vm= Ns Ar <vmname>
+Operate on the virtual machine
+.Ar <vmname> .
+.It Fl -create
+Create the specified VM.
+.It Fl -destroy
+Destroy the specified VM.
+.It Fl -get-stats
+Retrieve statistics for the specified VM.
+.It Fl -inject-nmi
+Inject a non-maskable interrupt (NMI) into the VM.
+.It Fl -force-reset
+Force the VM to reset.
+.It Fl -force-poweroff
+Force the VM to power off.
+.El
+.Sh EXIT STATUS
+.Ex -std
+.Sh EXAMPLES
+Destroy the VM called fbsd10:
+.Pp
+.Dl "bhyvectl --vm=fbsd10 --destroy"
+.Sh SEE ALSO
+.Xr bhyve 8 ,
+.Xr bhyveload 8
+.Sh HISTORY
+The
+.Nm
+command first appeared in
+.Fx 10.1 .
+.Sh AUTHORS
+.An -nosplit
+The
+.Nm
+utility was written by
+.An Peter Grehan
+and
+.An Neel Natu .
Index: usr.sbin/bhyvectl/arm64/bhyvectl.c
===================================================================
--- /dev/null
+++ usr.sbin/bhyvectl/arm64/bhyvectl.c
@@ -0,0 +1,143 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (C) 2015-2021 Mihai Carabas <mihai.carabas@gmail.com>
+ * Copyright (C) 2017-2019 Alexandru Elisei <alexandru.elisei@gmail.com>
+ * Copyright (C) 2017-2021 Darius Mihai <darius.mihai.m@gmail.com>
+ * Copyright (C) 2019-2021 Andrei Martin <andrei.cos.martin@gmail.com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/types.h>
+#include <sys/sysctl.h>
+#include <sys/errno.h>
+#include <sys/mman.h>
+#include <sys/cpuset.h>
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <string.h>
+#include <unistd.h>
+#include <libgen.h>
+#include <libutil.h>
+#include <fcntl.h>
+#include <getopt.h>
+#include <time.h>
+#include <assert.h>
+#include <libutil.h>
+
+#include <machine/vmm.h>
+#include <machine/vmm_dev.h>
+
+#include <vmmapi.h>
+
+#define	MB	(1UL << 20)
+#define	GB	(1UL << 30)
+
+#define	REQ_ARG		required_argument
+#define	NO_ARG		no_argument
+#define	OPT_ARG		optional_argument
+
+#define	eprintf(fmt, ...)	printf("%s:%d " fmt, __func__, __LINE__, ##__VA_ARGS__)
+
+static const char *progname;
+
+static void
+usage()
+{
+
+	(void)fprintf(stderr,
+	"Usage: %s --vm=<vmname>\n"
+	"       %*s [--destroy]\n",
+	progname, (int)strlen(progname), "");
+	exit(1);
+}
+
+static int create;
+static int destroy;
+
+enum {
+	VMNAME = 1000,	/* avoid collision with return values from getopt */
+};
+
+const struct option opts[] = {
+	{ "vm",		REQ_ARG,	NULL,		VMNAME },
+	{ "destroy",	NO_ARG,		&destroy,	1 },
+	{ NULL,		0,		NULL,		1 },
+};
+
+int
+main(int argc, char *argv[])
+{
+	char *vmname;
+	int error, ch;
+	struct vmctx *ctx;
+
+	vmname = NULL;
+	progname = basename(argv[0]);
+
+	while ((ch = getopt_long(argc, argv, "", opts, NULL)) != -1) {
+		switch (ch) {
+		case 0:
+			break;
+		case VMNAME:
+			vmname = optarg;
+			break;
+		default:
+			usage();
+		}
+	}
+	argc -= optind;
+	argv += optind;
+
+	if (vmname == NULL)
+		usage();
+
+	error = 0;
+	if (!error && create)
+		error = vm_create(vmname);
+	if (!error) {
+		ctx = vm_open(vmname);
+		if (ctx == NULL) {
+			printf("VM:%s is not created.\n", vmname);
+			exit(1);
+		}
+	}
+
+
+	if (error)
+		printf("errno = %d\n", errno);
+
+	if (!error && destroy)
+		vm_destroy(ctx);
+
+	exit(error);
+}
Index: usr.sbin/bhyveload/Makefile
===================================================================
--- usr.sbin/bhyveload/Makefile
+++ usr.sbin/bhyveload/Makefile
@@ -1,14 +1,17 @@
 # $FreeBSD$
 
 PROG=	bhyveload
-SRCS=	bhyveload.c
-MAN=	bhyveload.8
 PACKAGE=	bhyve
 
+BHYVELOAD_SYSDIR?=${SRCTOP}
+BHYVELOAD_SRCTOP?=${.CURDIR}
+
 LIBADD=	vmmapi
 
 WARNS?=	3
 
 CFLAGS+=-I${SRCTOP}/stand/userboot
 
+.include "${BHYVELOAD_SRCTOP}/${MACHINE}/Makefile.inc"
+
 .include <bsd.prog.mk>
Index: usr.sbin/bhyveload/amd64/Makefile.inc
===================================================================
--- /dev/null
+++ usr.sbin/bhyveload/amd64/Makefile.inc
@@ -0,0 +1,7 @@
+# $FreeBSD$
+.PATH: ${BHYVELOAD_SRCTOP}/amd64/
+
+SRCS=	bhyveload.c
+MAN=	bhyveload.8
+
+CFLAGS+=-I${SRCTOP}/sys/boot/userboot
Index: usr.sbin/bhyveload/arm64/Makefile.inc
===================================================================
--- /dev/null
+++ usr.sbin/bhyveload/arm64/Makefile.inc
@@ -0,0 +1,13 @@
+# $FreeBSD$
+LIBADD+=  util
+
+.PATH: ${BHYVELOAD_SRCTOP}/arm64/
+
+SRCS=	bhyveload.c \
+	boot.c
+
+.PATH:	${.CURDIR}/../../sys/arm64/vmm
+
+CFLAGS += -I${.CURDIR}/../../stand/common
+
+MK_MAN=no
Index: usr.sbin/bhyveload/arm64/bhyveload.c
===================================================================
--- /dev/null
+++ usr.sbin/bhyveload/arm64/bhyveload.c
@@ -0,0 +1,470 @@
+/*
+ * Copyright (C) 2015-2021 Mihai Carabas <mihai.carabas@gmail.com>
+ * Copyright (C) 2017-2019 Alexandru Elisei <alexandru.elisei@gmail.com>
+ * Copyright (C) 2017-2021 Darius Mihai <darius.mihai.m@gmail.com>
+ * Copyright (C) 2019-2021 Andrei Martin <andrei.cos.martin@gmail.com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+#include <sys/ioctl.h>
+#include <sys/stat.h>
+#include <sys/disk.h>
+#include <sys/queue.h>
+#include <sys/mman.h>
+#include <sys/queue.h>
+
+#include <machine/vmm.h>
+#include <machine/vmparam.h>
+
+#include <dirent.h>
+#include <dlfcn.h>
+#include <errno.h>
+#include <err.h>
+#include <fcntl.h>
+#include <getopt.h>
+#include <libgen.h>
+#include <limits.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <string.h>
+#include <sysexits.h>
+#include <termios.h>
+#include <unistd.h>
+#include <vmmapi.h>
+
+#include <libutil.h>
+
+#include "boot.h"
+
+#define	gvatovm(addr)		((uint64_t)(addr) - KERNBASE + 	\
+				kernel_load_address - memory_base_address)
+#define	overlap(x_start, x_end, y_start, y_end)					\
+			((x_start) >= (y_start) && (x_start) < (y_end) || 	\
+			(x_end) >= (y_start) && (x_end) < (y_end))
+
+#define	MB			(1024 * 1024UL)
+#define	BSP			0
+#define	KERNEL_IMAGE_NAME_LEN	256
+
+#define	GIC_V3_DIST_START	0x2f000000UL
+#define	GIC_V3_DIST_SIZE	0x10000UL
+#define	GIC_V3_REDIST_START	0x2f100000UL
+#define	GIC_V3_REDIST_SIZE	0x200000UL
+
+struct env {
+	const char *str;
+	SLIST_ENTRY(env) next;
+};
+static SLIST_HEAD(envhead, env) envhead;
+
+static uint64_t memory_base_address, kernel_load_address;
+
+static char *vmname, *progname;
+static struct vmctx *ctx;
+
+static int
+env_add(const char *str)
+{
+	struct env *env;
+
+	env = malloc(sizeof(*env));
+	if (env == NULL)
+		return (ENOMEM);
+	env->str = str;
+	SLIST_INSERT_HEAD(&envhead, env, next);
+
+	return (0);
+}
+
+static int
+env_tostr(char **envstrp, int *envlen)
+{
+	struct env *env;
+	int i;
+
+	*envlen = 0;
+	SLIST_FOREACH(env, &envhead, next)
+		*envlen = *envlen + strlen(env->str) + 1;
+	/* Make room for the two terminating zeroes */
+	if (*envlen == 0)
+		*envlen = 2;
+	else
+		(*envlen)++;
+
+	*envstrp = malloc(*envlen * sizeof(char));
+	if (*envstrp == NULL)
+		return (ENOMEM);
+
+	i = 0;
+	SLIST_FOREACH(env, &envhead, next) {
+		strncpy(*envstrp + i, env->str, strlen(env->str));
+		i += strlen(env->str);
+		(*envstrp)[i++] = 0;
+	}
+	(*envstrp)[i] = 0;
+
+	/*
+	 * At this point we have envstr[0] == 0 if the environment is empty.
+	 * Add the second 0 to properly terminate the environment string.
+	 */
+	if (SLIST_EMPTY(&envhead))
+		(*envstrp)[1] = 0;
+
+	/*
+	for (i = 0; i < *envlen; i++)
+		printf("%d ", (int)(*envstrp)[i]);
+	printf("\n");
+	*/
+
+	return (0);
+}
+
+/*
+ * Guest virtual machinee
+ */
+static int
+guest_copyin(const void *from, uint64_t to, size_t size)
+{
+	char *ptr;
+	ptr = vm_map_ipa(ctx, to, size);
+	if (ptr == NULL)
+		return (EFAULT);
+
+	memcpy(ptr, from, size);
+	return (0);
+}
+
+static int
+guest_copyout(uint64_t from, void *to, size_t size)
+{
+	char *ptr;
+
+	ptr = vm_map_ipa(ctx, from, size);
+	if (ptr == NULL)
+		return (EFAULT);
+
+	memcpy(to, ptr, size);
+	return (0);
+}
+
+static void
+guest_setreg(enum vm_reg_name vmreg, uint64_t v)
+{
+	int error;
+
+	error = vm_set_register(ctx, BSP, vmreg, v);
+	if (error)
+		perror("vm_set_register");
+}
+
+#if 0
+static int
+parse_memsize(const char *optarg, size_t *ret_memsize)
+{
+	char *endptr;
+	size_t optval;
+	int error;
+
+	optval = strtoul(optarg, &endptr, 0);
+	if (*optarg != '\0' && *endptr == '\0') {
+		/* Memory size must be at least one megabyte. */
+		if (optval < MB)
+			optval = optval * MB;
+		*ret_memsize = optval;
+		error = 0;
+	} else {
+		error = expand_number(optarg, ret_memsize);
+	}
+
+	return (error);
+}
+#endif
+
+static void
+usage(int code)
+{
+	fprintf(stderr,
+	    "Usage: %s [-h] [-k <kernel-image>] [-d dtb-offset] [-t <dtb-file>] [-e <name=value>] [-b base-address]\n"
+	    "       %*s [-m mem-size] [-l load-address] <vmname>\n"
+	    "       -k: path to guest kernel image\n"
+	    "       -d: where to load the device tree, an offset from the start of the kernel address\n"
+	    "       -t: path to guest device tree file\n"
+	    "       -e: guest boot environment\n"
+	    "       -b: memory base address\n"
+	    "       -m: memory size\n"
+	    "       -l: kernel load address in the guest physical memory\n"
+	    "       -h: help\n",
+	    progname, (int)strlen(progname), "");
+	exit(code);
+}
+
+int
+main(int argc, char** argv)
+{
+	struct vm_bootparams bootparams;
+	uint64_t mem_size;
+	int opt, error;
+	int kernel_image_fd, dtb_fd;
+	uint64_t periphbase;
+	char kernel_image_name[KERNEL_IMAGE_NAME_LEN];
+	char device_tree_name[KERNEL_IMAGE_NAME_LEN];
+	struct stat st, dtb_st;
+	void *addr, *dtb_addr;
+	char *envstr;
+	int envlen;
+	uint64_t dtb_address = 0x0;
+	bool dtb_address_is_offset = false;
+	bool use_dtb_file = false;
+
+	progname = basename(argv[0]);
+
+	mem_size = 128 * MB;
+	memory_base_address = VM_GUEST_BASE_IPA;
+	kernel_load_address = memory_base_address;
+	periphbase = 0x2c000000UL;
+	strncpy(kernel_image_name, "kernel.bin", KERNEL_IMAGE_NAME_LEN);
+	memset(&bootparams, 0, sizeof(struct vm_bootparams));
+
+	while ((opt = getopt(argc, argv, "hk:l:b:m:e:d:t:")) != -1) {
+		switch (opt) {
+		case 't':
+			strncpy(device_tree_name, optarg, KERNEL_IMAGE_NAME_LEN);
+			use_dtb_file = true;
+			break;
+		case 'd':
+			dtb_address = strtoul(optarg, NULL, 0);
+			dtb_address_is_offset = true;
+			break;
+		case 'k':
+			strncpy(kernel_image_name, optarg, KERNEL_IMAGE_NAME_LEN);
+			break;
+		case 'l':
+			kernel_load_address = strtoul(optarg, NULL, 0);
+			break;
+		case 'b':
+			memory_base_address = strtoul(optarg, NULL, 0);
+			break;
+		case 'm':
+			error = vm_parse_memsize(optarg, &mem_size);
+			if (error) {
+				fprintf(stderr, "Invalid memsize '%s'\n", optarg);
+				exit(1);
+			}
+			break;
+		case 'e':
+			error = env_add(optarg);
+			if (error) {
+				perror("env_add");
+				exit(1);
+			}
+			break;
+		case 'h':
+			usage(0);
+		default:
+			fprintf(stderr, "Unknown argument '%c'\n", opt);
+			usage(1);
+		}
+	}
+
+	argc -= optind;
+	argv += optind;
+
+	if (argc != 1) {
+		fprintf(stderr, "Missing or unknown arguments\n");
+		usage(1);
+	}
+
+	if (kernel_load_address < memory_base_address) {
+		fprintf(stderr, "Kernel load address is below memory base address\n");
+		exit(1);
+	}
+
+	vmname = argv[0];
+
+	kernel_image_fd = open(kernel_image_name, O_RDONLY);
+	if (kernel_image_fd == -1) {
+		perror("open kernel_image_name");
+		exit(1);
+	}
+
+	error = vm_create(vmname);
+	if (error) {
+		perror("vm_create");
+		exit(1);
+	}
+
+	ctx = vm_open(vmname);
+	if (ctx == NULL) {
+		perror("vm_open");
+		exit(1);
+	}
+
+	error = vm_setup_memory(ctx, memory_base_address, mem_size, VM_MMAP_ALL);
+	if (error) {
+		perror("vm_setup_memory");
+		exit(1);
+	}
+
+	error = fstat(kernel_image_fd, &st);
+	if (error) {
+		perror("fstat");
+		exit(1);
+	}
+
+	if ((uint64_t)st.st_size > mem_size) {
+		fprintf(stderr, "Kernel image larger than memory size\n");
+		exit(1);
+	}
+	if (kernel_load_address + st.st_size >= memory_base_address + mem_size) {
+		fprintf(stderr, "Kernel image out of bounds of guest memory\n");
+		exit(1);
+	}
+
+	addr = mmap(NULL, st.st_size, PROT_READ, MAP_SHARED, kernel_image_fd, 0);
+	if (addr == MAP_FAILED) {
+		perror("mmap kernel_image_fd");
+		exit(1);
+	}
+
+	if (guest_copyin(addr, kernel_load_address - memory_base_address, st.st_size) != 0) {
+		perror("guest_copyin");
+		exit(1);
+	}
+
+	error = env_tostr(&envstr, &envlen);
+	if (error) {
+		perror("parse boot environment\n");
+		exit(1);
+	}
+
+	bootparams.envstr = envstr;
+	bootparams.envlen = envlen;
+	error = parse_kernel(addr, st.st_size, ctx, &bootparams);
+	if (error) {
+		fprintf(stderr, "Error parsing image\n");
+		exit(1);
+	}
+
+	if (dtb_address == 0)
+		dtb_address = kernel_load_address + st.st_size;
+	else if (dtb_address_is_offset)
+		dtb_address += kernel_load_address;
+
+	if (use_dtb_file) {
+		dtb_fd = open(device_tree_name, O_RDONLY);
+		if (dtb_fd == -1) {
+			perror("open device_tree_name");
+			exit(1);
+		}
+
+		error = fstat(dtb_fd, &dtb_st);
+		if (error) {
+			perror("fstat");
+			exit(1);
+		}
+
+		dtb_addr = mmap(NULL, st.st_size, PROT_READ, MAP_SHARED, dtb_fd, 0);
+		if (dtb_addr == MAP_FAILED) {
+			perror("mmap dtb_fd");
+			exit(1);
+		}
+
+		if (guest_copyin(dtb_addr, dtb_address, dtb_st.st_size) != 0) {
+			perror("guest_copyin");
+			exit(1);
+		}
+
+		free(bootparams.modulep);
+
+		bootparams.modulep = calloc(1, dtb_st.st_size);
+		if (bootparams.modulep == NULL) {
+			perror("calloc");
+			return (ENOMEM);
+		}
+
+		memcpy(bootparams.modulep, dtb_addr, dtb_st.st_size);
+
+		bootparams.module_len = dtb_st.st_size;
+	}
+
+	/*
+	fprintf(stderr, "bootparams.envp_gva = 0x%016lx\n", bootparams.envp_gva);
+	fprintf(stderr, "gvatom(bootparams.envp_gva) = 0x%016lx\n", gvatovm(bootparams.envp_gva));
+	fprintf(stderr, "vm_map_ipa() = 0x%016lx\n", (uint64_t)vm_map_ipa(ctx, gvatovm(bootparams.envp_gva), PAGE_SIZE));
+	fprintf(stderr, "\n");
+
+	fprintf(stderr, "bootparams.mudulep_gva = 0x%016lx\n", bootparams.modulep_gva);
+	fprintf(stderr, "gvatom(bootparams.modulep_gva) = 0x%016lx\n", gvatovm(bootparams.modulep_gva));
+	fprintf(stderr, "vm_map_ipa() = 0x%016lx\n", (uint64_t)vm_map_ipa(ctx, gvatovm(bootparams.modulep_gva), PAGE_SIZE));
+	fprintf(stderr, "\n");
+	*/
+
+	/* Copy the environment string in the guest memory */
+	if (guest_copyin((void *)envstr, gvatovm(bootparams.envp_gva), envlen) != 0) {
+		perror("guest_copyin");
+		exit(1);
+	}
+
+	/* Copy the module data in the guest memory */
+	if (guest_copyin(bootparams.modulep, gvatovm(bootparams.modulep_gva), bootparams.module_len) != 0) {
+		perror("guest_copyin");
+		exit(1);
+	}
+
+	uint64_t mem_end = memory_base_address + mem_size;
+	uint64_t dist_end = GIC_V3_DIST_START + GIC_V3_DIST_SIZE;
+	uint64_t redist_end = GIC_V3_REDIST_START + GIC_V3_REDIST_SIZE;
+
+	if (overlap(GIC_V3_DIST_SIZE, dist_end, memory_base_address, mem_end)) {
+		fprintf(stderr, "Guest memory overlaps with VGIC Distributor\n");
+		exit(1);
+	}
+
+	if (overlap(GIC_V3_REDIST_SIZE, redist_end, memory_base_address, mem_end)) {
+		fprintf(stderr, "Guest memory overlaps with VGIC Redistributor\n");
+		exit(1);
+	}
+
+	error = vm_attach_vgic(ctx, GIC_V3_DIST_START, GIC_V3_DIST_SIZE,
+			GIC_V3_REDIST_START, GIC_V3_REDIST_SIZE);
+	if (error) {
+		fprintf(stderr, "Error attaching VGIC to the virtual machine\n");
+		exit(1);
+	}
+
+	munmap(addr, st.st_size);
+	if (use_dtb_file)
+		munmap(dtb_addr, dtb_st.st_size);
+
+	/*  TODO: If we want to boot Linux, this entry_off should be not fine
+	 *  bootparams.entry_off = 0x80000;
+	 *  Based on the Linux ARM64/boot documentation
+	 */
+	guest_setreg(VM_REG_ELR_EL2, kernel_load_address + bootparams.entry_off);
+	guest_setreg(VM_REG_GUEST_X0, bootparams.modulep_gva);
+
+	return 0;
+}
Index: usr.sbin/bhyveload/arm64/boot.h
===================================================================
--- /dev/null
+++ usr.sbin/bhyveload/arm64/boot.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright (C) 2015-2021 Mihai Carabas <mihai.carabas@gmail.com>
+ * Copyright (C) 2017-2019 Alexandru Elisei <alexandru.elisei@gmail.com>
+ * Copyright (C) 2017-2021 Darius Mihai <darius.mihai.m@gmail.com>
+ * Copyright (C) 2019-2021 Andrei Martin <andrei.cos.martin@gmail.com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef _BOOT_H_
+#define	_BOOT_H_
+
+
+
+struct vm_bootparams {
+	uint64_t	entry_off;
+	uint64_t 	modulep_gva;	/* Guest virtual address of modulep data */
+	uint64_t 	envp_gva;	/* Guest virtual address for env */
+	char 		*envstr;
+	int 		envlen;
+	int 		module_len;
+	void 		*modulep;	/* Bhyveload address of modulep data */
+};
+
+int parse_kernel(void *addr, size_t img_size, struct vmctx *ctx,
+		struct vm_bootparams *bootparams);
+
+#endif
Index: usr.sbin/bhyveload/arm64/boot.c
===================================================================
--- /dev/null
+++ usr.sbin/bhyveload/arm64/boot.c
@@ -0,0 +1,622 @@
+/*
+ * Copyright (C) 2015-2021 Mihai Carabas <mihai.carabas@gmail.com>
+ * Copyright (C) 2017-2019 Alexandru Elisei <alexandru.elisei@gmail.com>
+ * Copyright (C) 2017-2021 Darius Mihai <darius.mihai.m@gmail.com>
+ * Copyright (C) 2019-2021 Andrei Martin <andrei.cos.martin@gmail.com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+#include <sys/elf.h>
+#include <sys/param.h>
+#include <sys/queue.h>
+#include <sys/linker.h>
+#include <sys/elf_generic.h>
+#include <sys/module.h>
+#include <sys/errno.h>
+
+#include <machine/vmm.h>
+#include <machine/vmparam.h>
+#include <bootstrap.h>
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <string.h>
+#include <vmmapi.h>
+
+#include "boot.h"
+
+#define gvatou(gva, addr)	((vm_offset_t)(gva) - KERNBASE + (vm_offset_t)(addr))
+
+struct elf_file {
+    Elf_Phdr 	*ph;
+    Elf_Ehdr	*ehdr;
+    Elf_Sym	*symtab;
+    Elf_Hashelt	*hashtab;
+    Elf_Hashelt	nbuckets;
+    Elf_Hashelt	nchains;
+    Elf_Hashelt	*buckets;
+    Elf_Hashelt	*chains;
+    Elf_Rel	*rel;
+    size_t	relsz;
+    Elf_Rela	*rela;
+    size_t	relasz;
+    char	*strtab;
+    size_t	strsz;
+    caddr_t	firstpage_u;	/* Userspace address of mmap'ed guest kernel */
+};
+
+static uint64_t parse_image(struct preloaded_file *img, struct elf_file *ef);
+static void	image_addmetadata(struct preloaded_file *img, int type,
+			size_t size, void *addr);
+static int	image_addmodule(struct preloaded_file *img, char *modname, int version);
+static void	parse_metadata(struct preloaded_file *img, struct elf_file *ef,
+			Elf_Addr p_startu, Elf_Addr p_endu);
+static int	lookup_symbol(struct elf_file *ef, const char *name, Elf_Sym *symp);
+static struct 	kernel_module *image_findmodule(struct preloaded_file *img, char *modname,
+			struct mod_depend *verinfo);
+static uint64_t	moddata_len(struct preloaded_file *img);
+static void	moddata_copy(vm_offset_t dest, struct preloaded_file *img);
+
+static int
+load_elf_header(struct elf_file *ef)
+{
+	Elf_Ehdr  *ehdr;
+
+	ehdr = ef->ehdr = (Elf_Ehdr *)ef->firstpage_u;
+	/* Is it ELF? */
+	if (!IS_ELF(*ehdr))
+		return (EFTYPE);
+
+	if (ehdr->e_ident[EI_CLASS] != ELF_TARG_CLASS ||/* Layout ? */
+	    ehdr->e_ident[EI_DATA] != ELF_TARG_DATA ||
+	    ehdr->e_ident[EI_VERSION] != EV_CURRENT ||	/* Version ? */
+	    ehdr->e_version != EV_CURRENT ||
+	    ehdr->e_machine != ELF_TARG_MACH) 		/* Machine ? */
+		return (EFTYPE);
+
+	return (0);
+}
+
+static caddr_t
+preload_search_by_type(const char *type, caddr_t preload_metadata)
+{
+    caddr_t	curp, lname;
+    uint32_t	*hdr;
+    int		next;
+
+    if (preload_metadata != NULL) {
+
+	curp = preload_metadata;
+	lname = NULL;
+	for (;;) {
+	    hdr = (uint32_t *)curp;
+	    if (hdr[0] == 0 && hdr[1] == 0)
+		break;
+
+	    /* remember the start of each record */
+	    if (hdr[0] == MODINFO_NAME)
+		lname = curp;
+
+	    /* Search for a MODINFO_TYPE field */
+	    if ((hdr[0] == MODINFO_TYPE) &&
+		!strcmp(type, curp + sizeof(uint32_t) * 2))
+		return(lname);
+
+	    /* skip to next field */
+	    next = sizeof(uint32_t) * 2 + hdr[1];
+	    next = roundup(next, sizeof(u_long));
+	    curp += next;
+	}
+    }
+    return(NULL);
+}
+
+int
+parse_kernel(void *addr, size_t img_size, struct vmctx *ctx,
+		struct vm_bootparams *bootparams)
+{
+	struct elf_file ef;
+	struct preloaded_file img;
+	Elf_Ehdr *ehdr_u;
+	int err;
+	vm_offset_t lastaddr_gva;
+	uint64_t kernend;
+	uint64_t size;
+	uint64_t modlen;
+	int boothowto;
+
+	//fprintf(stderr, "[PARSE_KERNEL]\n\n");
+
+	memset(&ef, 0, sizeof(struct elf_file));
+	memset(&img, 0, sizeof(struct preloaded_file));
+
+	ef.firstpage_u = (caddr_t)addr;
+	err = load_elf_header(&ef);
+	if (err != 0)
+		return (err);
+
+	ehdr_u = ef.ehdr;
+	if (ehdr_u->e_type != ET_EXEC) {
+		fprintf(stderr, "Image not a kernel\n");
+		return (EPERM);
+	}
+	img.f_name = "elf kernel";
+	img.f_type = "elf kernel";
+	img.f_size = img_size;
+
+	size = parse_image(&img, &ef);
+	if (size == 0)
+		return (ENOEXEC);
+	bootparams->entry_off = ehdr_u->e_entry - KERNBASE;
+
+	image_addmetadata(&img, MODINFOMD_ELFHDR, sizeof(*ehdr_u), ehdr_u);
+
+	/* XXX: Add boothowto options? */
+	boothowto = 0;
+	image_addmetadata(&img, MODINFOMD_HOWTO, sizeof(boothowto), &boothowto);
+
+	lastaddr_gva = roundup(img.f_addr + img.f_size + 0x3fd000, PAGE_SIZE);
+	image_addmetadata(&img, MODINFOMD_ENVP, sizeof(lastaddr_gva), &lastaddr_gva);
+	bootparams->envp_gva = lastaddr_gva;
+
+	lastaddr_gva = roundup(lastaddr_gva + bootparams->envlen, PAGE_SIZE);
+	/* Module data start in the guest kernel virtual address space */
+	bootparams->modulep_gva = lastaddr_gva;
+
+	modlen = moddata_len(&img);
+	kernend = roundup(bootparams->modulep_gva + modlen, PAGE_SIZE);
+	image_addmetadata(&img, MODINFOMD_KERNEND, sizeof(kernend), &kernend);
+
+	bootparams->module_len = roundup(modlen, PAGE_SIZE);
+	bootparams->modulep = calloc(1, bootparams->module_len);
+	if (bootparams->modulep == NULL) {
+		perror("calloc");
+		return (ENOMEM);
+	}
+
+	moddata_copy((vm_offset_t)bootparams->modulep, &img);
+
+	return (0);
+}
+
+static uint64_t
+parse_image(struct preloaded_file *img, struct elf_file *ef)
+{
+	Elf_Ehdr *ehdr;
+	Elf_Phdr *phdr;
+	Elf_Phdr *php;
+	Elf_Shdr *shdr;
+	Elf_Dyn *dp;
+	Elf_Addr adp;
+	Elf_Addr ctors;
+	Elf_Addr ssym, esym;
+	Elf_Addr p_start, p_end;
+	Elf_Size size;
+	Elf_Sym sym;
+	vm_offset_t firstaddr, lastaddr;
+	vm_offset_t shstr_addr;
+	char *shstr;
+	int symstrindex;
+	int symtabindex;
+	size_t chunk_len;
+	uint64_t ret;
+	int ndp;
+	int i;
+	unsigned int j;
+
+	dp = NULL;
+	shdr = NULL;
+	ret = 0;
+
+	ehdr = ef->ehdr;
+	phdr = (Elf_Phdr *)(ef->firstpage_u + ehdr->e_phoff);
+
+	firstaddr = lastaddr = 0;
+	for (i = 0; i < ehdr->e_phnum; i++) {
+		if (phdr[i].p_type != PT_LOAD)
+			continue;
+		if (firstaddr == 0 || firstaddr > phdr[i].p_vaddr)
+		    firstaddr = phdr[i].p_vaddr;
+		/* We mmap'ed the kernel, so p_memsz == p_filesz. */
+		if (lastaddr == 0 || lastaddr < (phdr[i].p_vaddr + phdr[i].p_filesz))
+		    lastaddr = phdr[i].p_vaddr + phdr[i].p_filesz;
+	}
+	lastaddr = roundup(lastaddr, sizeof(long));
+
+	/*
+	 * Get the section headers.  We need this for finding the .ctors
+	 * section as well as for loading any symbols.  Both may be hard
+	 * to do if reading from a .gz file as it involves seeking.  I
+	 * think the rule is going to have to be that you must strip a
+	 * file to remove symbols before gzipping it.
+	 */
+	chunk_len = ehdr->e_shnum * ehdr->e_shentsize;
+	if (chunk_len == 0 || ehdr->e_shoff == 0)
+		goto nosyms;
+	shdr = (Elf_Shdr *)(ef->firstpage_u + ehdr->e_shoff);
+	image_addmetadata(img, MODINFOMD_SHDR, chunk_len, shdr);
+
+	/*
+	 * Read the section string table and look for the .ctors section.
+	 * We need to tell the kernel where it is so that it can call the
+	 * ctors.
+	 */
+	chunk_len = shdr[ehdr->e_shstrndx].sh_size;
+	if (chunk_len > 0) {
+		shstr_addr = (vm_offset_t)(ef->firstpage_u + \
+		    shdr[ehdr->e_shstrndx].sh_offset);
+		shstr = malloc(chunk_len);
+		memcpy(shstr, (void *)shstr_addr, chunk_len);
+		for (i = 0; i < ehdr->e_shnum; i++) {
+			if (strcmp(shstr + shdr[i].sh_name, ".ctors") != 0)
+				continue;
+			ctors = shdr[i].sh_addr;
+			image_addmetadata(img, MODINFOMD_CTORS_ADDR,
+					sizeof(ctors), &ctors);
+			size = shdr[i].sh_size;
+			image_addmetadata(img, MODINFOMD_CTORS_SIZE,
+					sizeof(size), &size);
+			break;
+		}
+		free(shstr);
+	}
+
+	/*
+	 * Now load any symbols.
+	 */
+	symtabindex = -1;
+	symstrindex = -1;
+	for (i = 0; i < ehdr->e_shnum; i++) {
+		if (shdr[i].sh_type != SHT_SYMTAB)
+			continue;
+		for (j = 0; j < ehdr->e_phnum; j++) {
+			if (phdr[j].p_type != PT_LOAD)
+				continue;
+			if (shdr[i].sh_offset >= phdr[j].p_offset &&
+					(shdr[i].sh_offset + shdr[i].sh_size <=
+					 phdr[j].p_offset + phdr[j].p_filesz)) {
+				shdr[i].sh_offset = 0;
+				shdr[i].sh_size = 0;
+				break;
+			}
+		}
+		if (shdr[i].sh_offset == 0 || shdr[i].sh_size == 0)
+			continue;		/* alread loaded in a PT_LOAD above */
+		/* Save it for loading below */
+		symtabindex = i;
+		symstrindex = shdr[i].sh_link;
+	}
+	if (symtabindex < 0 || symstrindex < 0)
+		goto nosyms;
+
+	ssym = lastaddr;
+	i = symtabindex;
+	for (;;) {
+		size = shdr[i].sh_size;
+		lastaddr += sizeof(size);
+		lastaddr += shdr[i].sh_size;
+		lastaddr = roundup(lastaddr, sizeof(size));
+
+		if (i == symtabindex)
+			i = symstrindex;
+		else if (i == symstrindex)
+			break;
+	}
+	esym = lastaddr;
+
+	image_addmetadata(img, MODINFOMD_SSYM, sizeof(ssym), &ssym);
+	image_addmetadata(img, MODINFOMD_ESYM, sizeof(esym), &esym);
+
+nosyms:
+	ret = lastaddr - firstaddr;
+	img->f_addr = firstaddr;
+
+	php = NULL;
+	for (i = 0; i < ehdr->e_phnum; i++) {
+		if (phdr[i].p_type == PT_DYNAMIC) {
+			php = &phdr[i];
+			adp = php->p_vaddr;
+			image_addmetadata(img, MODINFOMD_DYNAMIC,
+					sizeof(adp), &adp);
+			break;
+		}
+	}
+	if (php == NULL)
+		goto out;
+	ndp = php->p_filesz / sizeof(Elf_Dyn);
+	if (ndp == 0)
+		goto out;
+
+	ef->strsz = 0;
+	dp = (Elf_Dyn *)(ef->firstpage_u + php->p_offset);
+	for (i = 0; i < ndp; i++) {
+		if (dp[i].d_tag == 0)
+			break;
+		switch(dp[i].d_tag) {
+		case DT_HASH:
+			ef->hashtab = (Elf_Hashelt *)(uintptr_t)dp[i].d_un.d_ptr;
+			break;
+		case DT_STRTAB:
+			ef->strtab = (char *)(uintptr_t)dp[i].d_un.d_ptr;
+		case DT_STRSZ:
+			ef->strsz = dp[i].d_un.d_val;
+			break;
+		case DT_SYMTAB:
+			ef->symtab = (Elf_Sym *)(uintptr_t)dp[i].d_un.d_ptr;
+			break;
+		case DT_REL:
+			ef->rel = (Elf_Rel *)(uintptr_t)dp[i].d_un.d_ptr;
+			break;
+		case DT_RELSZ:
+			ef->relsz = dp[i].d_un.d_val;
+			break;
+		case DT_RELA:
+			ef->rela = (Elf_Rela *)(uintptr_t)dp[i].d_un.d_ptr;
+			break;
+		case DT_RELASZ:
+			ef->relasz = dp[i].d_un.d_val;
+			break;
+		}
+	}
+	if (ef->hashtab == NULL || ef->symtab == NULL ||
+	    ef->strtab == NULL || ef->strsz == 0)
+		goto out;
+
+	memcpy(&ef->nbuckets, (void *)gvatou(ef->hashtab, ef->firstpage_u), sizeof(ef->nbuckets));
+	memcpy(&ef->nchains, (void *)gvatou(ef->hashtab + 1, ef->firstpage_u), sizeof(ef->nchains));
+	ef->buckets = (Elf_Hashelt *)gvatou(ef->hashtab + 2, ef->firstpage_u);
+	ef->chains = ef->buckets + ef->nbuckets;
+
+	if (lookup_symbol(ef, "__start_set_modmetadata_set", &sym) != 0) {
+		ret = 0;
+		goto out;
+	}
+	p_start = gvatou(sym.st_value, ef->firstpage_u);
+	if (lookup_symbol(ef, "__stop_set_modmetadata_set", &sym) != 0) {
+		ret = ENOENT;
+		goto out;
+	}
+	p_end = gvatou(sym.st_value, ef->firstpage_u);
+	parse_metadata(img, ef, p_start, p_end);
+
+out:
+	return ret;
+}
+
+static uint64_t
+moddata_len(struct preloaded_file *img)
+{
+	struct file_metadata *md;
+	uint64_t len;
+
+	/* Count the kernel image name */
+	len = 8 + roundup(strlen(img->f_name) + 1, sizeof(uint64_t));
+	/* Count the kernel's type */
+	len += 8 + roundup(strlen(img->f_type) + 1, sizeof(uint64_t));
+	/* Count the kernel's virtual address */
+	len += 8 + roundup(sizeof(img->f_addr), sizeof(uint64_t));
+	/* Count the kernel's size */
+	len += 8 + roundup(sizeof(img->f_size), sizeof(uint64_t));
+	/* Count the metadata size */
+	for (md = img->f_metadata; md != NULL; md = md->md_next)
+		len += 8 + roundup(md->md_size, sizeof(uint64_t));
+
+	return len;
+}
+
+#define	COPY32(dest, what) 						\
+	do {								\
+		uint32_t w = (what);					\
+		memcpy((void *)dest, &w, sizeof(w));			\
+		dest += sizeof(w);					\
+	} while (0)
+
+#define	COPY_MODINFO(modinfo, dest, val, len)				\
+	do {								\
+		COPY32(dest, modinfo);					\
+		COPY32(dest, len);					\
+		memcpy((void *)dest, val, len);				\
+		dest += roundup(len, sizeof(uint64_t));			\
+	} while (0)
+
+#define COPY_MODEND(dest)						\
+	do {								\
+		COPY32(dest, MODINFO_END);				\
+		COPY32(dest, 0);					\
+	} while (0);
+
+static void
+moddata_copy(vm_offset_t dest, struct preloaded_file *img)
+{
+	struct file_metadata *md;
+
+	COPY_MODINFO(MODINFO_NAME, dest, img->f_name, strlen(img->f_name) + 1);
+	COPY_MODINFO(MODINFO_TYPE, dest, img->f_type, strlen(img->f_type) + 1);
+	COPY_MODINFO(MODINFO_ADDR, dest, &img->f_addr, sizeof(img->f_addr));
+	COPY_MODINFO(MODINFO_SIZE, dest, &img->f_size, sizeof(img->f_size));
+
+	for (md = img->f_metadata; md != NULL; md = md->md_next)
+		COPY_MODINFO(MODINFO_METADATA | md->md_type, dest,
+				md->md_data, md->md_size);
+
+	COPY_MODEND(dest);
+}
+
+static void
+image_addmetadata(struct preloaded_file *img, int type,
+		size_t size, void *addr)
+{
+	struct file_metadata *md;
+
+	md = malloc(sizeof(struct file_metadata) - sizeof(md->md_data) + size);
+	md->md_size = size;
+	md->md_type = type;
+	memcpy(md->md_data, addr, size);
+	md->md_next = img->f_metadata;
+	img->f_metadata = md;
+}
+
+static uint64_t
+elf_hash(const char *name)
+{
+	const unsigned char *p = (const unsigned char *)name;
+	uint64_t h;
+	uint64_t g;
+
+	h = 0;
+	while (*p != '\0') {
+		h = (h << 4) + *p++;
+		if ((g = h & 0xf0000000) != 0)
+			h ^= g >> 24;
+		h &= ~g;
+	}
+
+	return h;
+}
+
+static int
+lookup_symbol(struct elf_file *ef, const char *name, Elf_Sym *symp)
+{
+	Elf_Hashelt symnum;
+	Elf_Sym sym;
+	char *strp;
+	uint64_t hash;
+
+	hash = elf_hash(name);
+	memcpy(&symnum, &ef->buckets[hash % ef->nbuckets], sizeof(symnum));
+
+	while (symnum != STN_UNDEF) {
+		if (symnum >= ef->nchains) {
+			fprintf(stderr, "lookup_symbol: corrupt symbol table\n");
+			return ENOENT;
+		}
+
+		memcpy(&sym, (void *)gvatou(ef->symtab + symnum, ef->firstpage_u), sizeof(sym));
+		if (sym.st_name == 0) {
+			fprintf(stderr, "lookup_symbol: corrupt symbol table\n");
+			return ENOENT;
+		}
+
+		strp = strdup((char *)gvatou(ef->strtab + sym.st_name, ef->firstpage_u));
+		if (strcmp(name, strp) == 0) {
+			free(strp);
+			if (sym.st_shndx != SHN_UNDEF ||
+			    (sym.st_value != 0 &&
+			    ELF_ST_TYPE(sym.st_info) == STT_FUNC)) {
+				*symp = sym;
+				return 0;
+			}
+			return ENOENT;
+		}
+		free(strp);
+		memcpy(&symnum, &ef->chains[symnum], sizeof(symnum));
+	}
+
+	return ENOENT;
+}
+
+static void
+parse_metadata(struct preloaded_file *img, struct elf_file *ef,
+			Elf_Addr p_startu, Elf_Addr p_endu)
+{
+	struct mod_metadata md;
+	struct mod_version mver;
+	char *s;
+	int modcnt;
+	Elf_Addr v, p;
+
+	modcnt = 0;
+	for (p = p_startu; p < p_endu; p += sizeof(Elf_Addr)) {
+		memcpy(&v, (void *)p, sizeof(v));
+		memcpy(&md, (void *)gvatou(v, ef->firstpage_u), sizeof(md));
+		if (md.md_type == MDT_VERSION) {
+			s = strdup((char *)gvatou(md.md_cval, ef->firstpage_u));
+			memcpy(&mver,
+				(void *)gvatou(md.md_data, ef->firstpage_u),
+				sizeof(mver));
+			image_addmodule(img, s, mver.mv_version);
+			free(s);
+			modcnt++;
+		}
+	}
+
+	if (modcnt == 0) {
+		image_addmodule(img, "kernel", 1);
+		free(s);
+	}
+}
+
+static int
+image_addmodule(struct preloaded_file *img, char *modname, int version)
+{
+	struct kernel_module *mp;
+	struct mod_depend mdepend;
+
+	bzero(&mdepend, sizeof(mdepend));
+	mdepend.md_ver_preferred = version;
+
+	mp = image_findmodule(img, modname, &mdepend);
+	if (mp)
+		return (EEXIST);
+	mp = malloc(sizeof(struct kernel_module));
+	if (mp == NULL)
+		return (ENOMEM);
+
+	bzero(mp, sizeof(struct kernel_module));
+	mp->m_name = strdup(modname);
+	mp->m_version = version;
+	mp->m_fp = img;
+	mp->m_next = img->f_modules;
+	img->f_modules = mp;
+
+	return (0);
+}
+
+static struct kernel_module *
+image_findmodule(struct preloaded_file *img, char *modname,
+			struct mod_depend *verinfo)
+{
+    struct kernel_module *mp, *best;
+    int bestver, mver;
+
+    best = NULL;
+    bestver = 0;
+    for (mp = img->f_modules; mp != NULL; mp = mp->m_next) {
+        if (strcmp(modname, mp->m_name) == 0) {
+	    if (verinfo == NULL)
+		return (mp);
+	    mver = mp->m_version;
+	    if (mver == verinfo->md_ver_preferred)
+		return (mp);
+	    if (mver >= verinfo->md_ver_minimum &&
+		mver <= verinfo->md_ver_maximum &&
+		mver > bestver) {
+		best = mp;
+		bestver = mver;
+	    }
+	}
+    }
+
+    return (best);
+}