D26976.id82061.diff
No OneTemporary
Actions

Size

528 KB

Referenced Files

None

Subscribers

None

D26976.id82061.diff
View Options

This file is larger than 256 KB, so syntax highlighting was skipped.

	Index: lib/Makefile
	===================================================================
	--- lib/Makefile
	+++ lib/Makefile
	@@ -203,6 +203,9 @@

	.if ${MACHINE_CPUARCH} == "amd64"
	SUBDIR.${MK_PMC}+= libipt
	+.endif
	+
	+.if ${MACHINE_CPUARCH} == "amd64" \|\| ${MACHINE_CPUARCH} == "aarch64"
	SUBDIR.${MK_BHYVE}+= libvmmapi
	.endif

	Index: lib/libvmmapi/Makefile
	===================================================================
	--- lib/libvmmapi/Makefile
	+++ lib/libvmmapi/Makefile
	@@ -1,14 +1,21 @@
	# $FreeBSD$

	-PACKAGE=lib${LIB}
	-LIB= vmmapi
	-SRCS= vmmapi.c vmmapi_freebsd.c
	-INCS= vmmapi.h
	+PACKAGE= lib${LIB}
	+SHLIBDIR?= /lib
	+LIB_SRCTOP?= ${.CURDIR}

	+LIB= vmmapi
	WARNS?= 2

	-LIBADD= util
	+.if exists(${LIB_SRCTOP}/${MACHINE})
	+LIB_ARCH= ${MACHINE}
	+.elif exists(${LIB_SRCTOP}/${MACHINE_ARCH})
	+LIB_ARCH= ${MACHINE_ARCH}
	+.else
	+LIB_ARCH= ${MACHINE_CPUARCH}
	+.endif

	-CFLAGS+= -I${.CURDIR}
	+CFLAGS+= -I${LIB_SRCTOP}/${LIB_ARCH}
	+.include "${LIB_SRCTOP}/${LIB_ARCH}/Makefile.inc"

	.include <bsd.lib.mk>
	Index: lib/libvmmapi/amd64/Makefile.inc
	===================================================================
	--- /dev/null
	+++ lib/libvmmapi/amd64/Makefile.inc
	@@ -0,0 +1,7 @@
	+# $FreeBSD$
	+.PATH: ${LIB_SRCTOP}/amd64/
	+
	+SRCS= vmmapi.c vmmapi_freebsd.c
	+INCS= vmmapi.h
	+
	+LIBADD= util
	Index: lib/libvmmapi/arm64/Makefile.inc
	===================================================================
	--- /dev/null
	+++ lib/libvmmapi/arm64/Makefile.inc
	@@ -0,0 +1,7 @@
	+# $FreeBSD$
	+.PATH: ${LIB_SRCTOP}/arm64/
	+
	+SRCS= vmmapi.c
	+INCS= vmmapi.h
	+
	+LIBADD= util
	Index: lib/libvmmapi/arm64/vmmapi.h
	===================================================================
	--- /dev/null
	+++ lib/libvmmapi/arm64/vmmapi.h
	@@ -0,0 +1,79 @@
	+/*-
	+ * Copyright (c) 2011 NetApp, Inc.
	+ * All rights reserved.
	+ *
	+ * Redistribution and use in source and binary forms, with or without
	+ * modification, are permitted provided that the following conditions
	+ * are met:
	+ * 1. Redistributions of source code must retain the above copyright
	+ * notice, this list of conditions and the following disclaimer.
	+ * 2. Redistributions in binary form must reproduce the above copyright
	+ * notice, this list of conditions and the following disclaimer in the
	+ * documentation and/or other materials provided with the distribution.
	+ *
	+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
	+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	+ * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
	+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	+ * SUCH DAMAGE.
	+ *
	+ * $FreeBSD$
	+ */
	+
	+#ifndef _VMMAPI_H_
	+#define _VMMAPI_H_
	+
	+struct vmctx;
	+struct vm_exit;
	+enum vm_cap_type;
	+
	+/*
	+ * Different styles of mapping the memory assigned to a VM into the address
	+ * space of the controlling process.
	+ */
	+enum vm_mmap_style {
	+ VM_MMAP_NONE, /* no mapping */
	+ VM_MMAP_ALL, /* fully and statically mapped */
	+ VM_MMAP_SPARSE, /* mappings created on-demand */
	+};
	+
	+int vm_create(const char *name);
	+struct vmctx vm_open(const char name);
	+void vm_destroy(struct vmctx *ctx);
	+int vm_parse_memsize(const char optarg, size_t memsize);
	+int vm_get_memory_seg(struct vmctx ctx, uint64_t gpa, size_t ret_len);
	+int vm_setup_memory(struct vmctx *ctx, uint64_t membase, size_t len, enum vm_mmap_style s);
	+void vm_map_ipa(struct vmctx ctx, uint64_t gaddr, size_t len);
	+uint32_t vm_get_mem_limit(struct vmctx *ctx);
	+void vm_set_mem_limit(struct vmctx *ctx, uint32_t limit);
	+int vm_set_register(struct vmctx *ctx, int vcpu, int reg, uint64_t val);
	+int vm_get_register(struct vmctx ctx, int vcpu, int reg, uint64_t retval);
	+int vm_run(struct vmctx *ctx, int vcpu, uint64_t rip,
	+ struct vm_exit *ret_vmexit);
	+const char *vm_capability_type2name(int type);
	+int vm_get_capability(struct vmctx *ctx, int vcpu, enum vm_cap_type cap,
	+ int *retval);
	+int vm_set_capability(struct vmctx *ctx, int vcpu, enum vm_cap_type cap,
	+ int val);
	+int vm_assert_irq(struct vmctx *ctx, uint32_t irq);
	+int vm_deassert_irq(struct vmctx *ctx, uint32_t irq);
	+
	+/*
	+ * Return a pointer to the statistics buffer. Note that this is not MT-safe.
	+ */
	+uint64_t vm_get_stats(struct vmctx ctx, int vcpu, struct timeval *ret_tv,
	+ int *ret_entries);
	+const char vm_get_stat_desc(struct vmctx ctx, int index);
	+
	+/* Reset vcpu register state */
	+int vcpu_reset(struct vmctx *ctx, int vcpu);
	+
	+int vm_attach_vgic(struct vmctx *ctx, uint64_t dist_start, size_t dist_size,
	+ uint64_t redist_start, size_t redist_size);
	+#endif /* _VMMAPI_H_ */
	Index: lib/libvmmapi/arm64/vmmapi.c
	===================================================================
	--- /dev/null
	+++ lib/libvmmapi/arm64/vmmapi.c
	@@ -0,0 +1,392 @@
	+#include <sys/cdefs.h>
	+
	+#include <sys/types.h>
	+#include <sys/sysctl.h>
	+#include <sys/ioctl.h>
	+#include <sys/mman.h>
	+#include <sys/errno.h>
	+
	+#include <stdio.h>
	+#include <stdlib.h>
	+#include <assert.h>
	+#include <string.h>
	+#include <fcntl.h>
	+#include <unistd.h>
	+
	+#include <libutil.h>
	+
	+#include <machine/vmm.h>
	+#include <machine/vmm_dev.h>
	+
	+#include "vmmapi.h"
	+
	+#define MB (1024 * 1024UL)
	+#define GB (1024 * 1024 * 1024UL)
	+
	+struct vmctx {
	+ int fd;
	+ uint32_t mem_limit;
	+ enum vm_mmap_style vms;
	+ size_t mem_size;
	+ uint64_t mem_base;
	+ char *mem_addr;
	+ char *name;
	+};
	+
	+#define CREATE(x) sysctlbyname("hw.vmm.create", NULL, NULL, (x), strlen((x)))
	+#define DESTROY(x) sysctlbyname("hw.vmm.destroy", NULL, NULL, (x), strlen((x)))
	+
	+static int
	+vm_device_open(const char *name)
	+{
	+ int fd, len;
	+ char *vmfile;
	+
	+ len = strlen("/dev/vmm/") + strlen(name) + 1;
	+ vmfile = malloc(len);
	+ assert(vmfile != NULL);
	+ snprintf(vmfile, len, "/dev/vmm/%s", name);
	+
	+ /* Open the device file */
	+ fd = open(vmfile, O_RDWR, 0);
	+
	+ free(vmfile);
	+ return (fd);
	+}
	+
	+int
	+vm_create(const char *name)
	+{
	+
	+ return (CREATE((char *)name));
	+}
	+
	+struct vmctx *
	+vm_open(const char *name)
	+{
	+ struct vmctx *vm;
	+
	+ vm = malloc(sizeof(struct vmctx) + strlen(name) + 1);
	+ assert(vm != NULL);
	+
	+ vm->fd = -1;
	+ vm->mem_limit = 2 * GB;
	+ vm->name = (char *)(vm + 1);
	+ strcpy(vm->name, name);
	+
	+ if ((vm->fd = vm_device_open(vm->name)) < 0)
	+ goto err;
	+
	+ return (vm);
	+err:
	+ vm_destroy(vm);
	+ return (NULL);
	+}
	+
	+void
	+vm_destroy(struct vmctx *vm)
	+{
	+ assert(vm != NULL);
	+
	+ if (vm->fd >= 0)
	+ close(vm->fd);
	+ DESTROY(vm->name);
	+
	+ free(vm);
	+}
	+
	+int
	+vm_parse_memsize(const char optarg, size_t ret_memsize)
	+{
	+ char *endptr;
	+ size_t optval;
	+ int error;
	+
	+ optval = strtoul(optarg, &endptr, 0);
	+ if (optarg != '\0' && endptr == '\0') {
	+ /*
	+ * For the sake of backward compatibility if the memory size
	+ * specified on the command line is less than a megabyte then
	+ * it is interpreted as being in units of MB.
	+ */
	+ if (optval < MB)
	+ optval *= MB;
	+ *ret_memsize = optval;
	+ error = 0;
	+ } else
	+ error = expand_number(optarg, ret_memsize);
	+
	+ return (error);
	+}
	+
	+int
	+vm_get_memory_seg(struct vmctx ctx, uint64_t gpa, size_t ret_len)
	+{
	+ int error;
	+ struct vm_memory_segment seg;
	+
	+ bzero(&seg, sizeof(seg));
	+ seg.gpa = gpa;
	+ error = ioctl(ctx->fd, VM_GET_MEMORY_SEG, &seg);
	+ *ret_len = seg.len;
	+ return (error);
	+}
	+
	+uint32_t
	+vm_get_mem_limit(struct vmctx *ctx)
	+{
	+
	+ return (ctx->mem_limit);
	+}
	+
	+void
	+vm_set_mem_limit(struct vmctx *ctx, uint32_t limit)
	+{
	+
	+ ctx->mem_limit = limit;
	+}
	+
	+static int
	+setup_memory_segment(struct vmctx ctx, uint64_t gpa, size_t len, char *addr)
	+{
	+ int error;
	+ struct vm_memory_segment seg;
	+
	+ /*
	+ * Create and optionally map 'len' bytes of memory at guest
	+ * physical address 'gpa'
	+ */
	+ bzero(&seg, sizeof(seg));
	+ seg.gpa = gpa;
	+ seg.len = len;
	+ error = ioctl(ctx->fd, VM_MAP_MEMORY, &seg);
	+ if (error == 0 && addr != NULL) {
	+ *addr = mmap(NULL, len, PROT_READ \| PROT_WRITE, MAP_SHARED,
	+ ctx->fd, gpa);
	+ }
	+ return (error);
	+}
	+
	+int
	+vm_setup_memory(struct vmctx *ctx, uint64_t membase, size_t memsize, enum vm_mmap_style vms)
	+{
	+ int error;
	+
	+ /* XXX VM_MMAP_SPARSE not implemented yet */
	+ assert(vms == VM_MMAP_ALL);
	+
	+ ctx->vms = vms;
	+ ctx->mem_base = membase;
	+
	+ assert(memsize <= ctx->mem_limit);
	+ ctx->mem_size = memsize;
	+
	+ if (ctx->mem_size > 0) {
	+ error = setup_memory_segment(ctx, ctx->mem_base, ctx->mem_size,
	+ &ctx->mem_addr);
	+ if (error)
	+ return (error);
	+ }
	+
	+ return (0);
	+}
	+
	+void *
	+vm_map_ipa(struct vmctx *ctx, uint64_t iaddr, size_t len)
	+{
	+ /* XXX VM_MMAP_SPARSE not implemented yet */
	+ assert(ctx->vms == VM_MMAP_ALL);
	+
	+ if (iaddr < ctx->mem_base)
	+ return ((void *)(ctx->mem_addr + iaddr));
	+ else
	+ return ((void *)(ctx->mem_addr + (iaddr - ctx->mem_base)));
	+}
	+
	+
	+int
	+vm_set_register(struct vmctx *ctx, int vcpu, int reg, uint64_t val)
	+{
	+ int error;
	+ struct vm_register vmreg;
	+
	+ bzero(&vmreg, sizeof(vmreg));
	+ vmreg.cpuid = vcpu;
	+ vmreg.regnum = reg;
	+ vmreg.regval = val;
	+
	+ error = ioctl(ctx->fd, VM_SET_REGISTER, &vmreg);
	+ return (error);
	+}
	+
	+int
	+vm_get_register(struct vmctx ctx, int vcpu, int reg, uint64_t ret_val)
	+{
	+ int error;
	+ struct vm_register vmreg;
	+
	+ bzero(&vmreg, sizeof(vmreg));
	+ vmreg.cpuid = vcpu;
	+ vmreg.regnum = reg;
	+
	+ error = ioctl(ctx->fd, VM_GET_REGISTER, &vmreg);
	+ *ret_val = vmreg.regval;
	+ return (error);
	+}
	+
	+int
	+vm_run(struct vmctx ctx, int vcpu, uint64_t pc, struct vm_exit vmexit)
	+{
	+ int error;
	+ struct vm_run vmrun;
	+
	+ bzero(&vmrun, sizeof(vmrun));
	+ vmrun.cpuid = vcpu;
	+ vmrun.pc = pc;
	+
	+ error = ioctl(ctx->fd, VM_RUN, &vmrun);
	+ bcopy(&vmrun.vm_exit, vmexit, sizeof(struct vm_exit));
	+ return (error);
	+}
	+
	+static struct {
	+ const char *name;
	+ int type;
	+} capstrmap[] = {
	+ { "hlt_exit", VM_CAP_HALT_EXIT },
	+ { "mtrap_exit", VM_CAP_MTRAP_EXIT },
	+ { "pause_exit", VM_CAP_PAUSE_EXIT },
	+ { "unrestricted_guest", VM_CAP_UNRESTRICTED_GUEST },
	+ { 0 }
	+};
	+
	+int
	+vm_capability_name2type(const char *capname)
	+{
	+ int i;
	+
	+ for (i = 0; capstrmap[i].name != NULL && capname != NULL; i++) {
	+ if (strcmp(capstrmap[i].name, capname) == 0)
	+ return (capstrmap[i].type);
	+ }
	+
	+ return (-1);
	+}
	+
	+const char *
	+vm_capability_type2name(int type)
	+{
	+ int i;
	+
	+ for (i = 0; capstrmap[i].name != NULL; i++) {
	+ if (capstrmap[i].type == type)
	+ return (capstrmap[i].name);
	+ }
	+
	+ return (NULL);
	+}
	+
	+int
	+vm_get_capability(struct vmctx *ctx, int vcpu, enum vm_cap_type cap,
	+ int *retval)
	+{
	+ int error;
	+ struct vm_capability vmcap;
	+
	+ bzero(&vmcap, sizeof(vmcap));
	+ vmcap.cpuid = vcpu;
	+ vmcap.captype = cap;
	+
	+ error = ioctl(ctx->fd, VM_GET_CAPABILITY, &vmcap);
	+ *retval = vmcap.capval;
	+ return (error);
	+}
	+
	+int
	+vm_set_capability(struct vmctx *ctx, int vcpu, enum vm_cap_type cap, int val)
	+{
	+ struct vm_capability vmcap;
	+
	+ bzero(&vmcap, sizeof(vmcap));
	+ vmcap.cpuid = vcpu;
	+ vmcap.captype = cap;
	+ vmcap.capval = val;
	+
	+ return (ioctl(ctx->fd, VM_SET_CAPABILITY, &vmcap));
	+}
	+
	+uint64_t *
	+vm_get_stats(struct vmctx ctx, int vcpu, struct timeval ret_tv,
	+ int *ret_entries)
	+{
	+ int error;
	+
	+ static struct vm_stats vmstats;
	+
	+ vmstats.cpuid = vcpu;
	+
	+ error = ioctl(ctx->fd, VM_STATS, &vmstats);
	+ if (error == 0) {
	+ if (ret_entries)
	+ *ret_entries = vmstats.num_entries;
	+ if (ret_tv)
	+ *ret_tv = vmstats.tv;
	+ return (vmstats.statbuf);
	+ } else
	+ return (NULL);
	+}
	+
	+const char *
	+vm_get_stat_desc(struct vmctx *ctx, int index)
	+{
	+ static struct vm_stat_desc statdesc;
	+
	+ statdesc.index = index;
	+ if (ioctl(ctx->fd, VM_STAT_DESC, &statdesc) == 0)
	+ return (statdesc.desc);
	+ else
	+ return (NULL);
	+}
	+
	+int
	+vcpu_reset(struct vmctx *vmctx, int vcpu)
	+{
	+ return (ENXIO);
	+}
	+
	+int
	+vm_attach_vgic(struct vmctx *ctx, uint64_t dist_start, size_t dist_size,
	+ uint64_t redist_start, size_t redist_size)
	+{
	+ struct vm_attach_vgic vav;
	+
	+ bzero(&vav, sizeof(vav));
	+ vav.dist_start = dist_start;
	+ vav.dist_size = dist_size;
	+ vav.redist_start = redist_start;
	+ vav.redist_size = redist_size;
	+
	+ return (ioctl(ctx->fd, VM_ATTACH_VGIC, &vav));
	+}
	+
	+int
	+vm_assert_irq(struct vmctx *ctx, uint32_t irq)
	+{
	+ struct vm_irq vi;
	+
	+ bzero(&vi, sizeof(vi));
	+ vi.irq = irq;
	+
	+ return (ioctl(ctx->fd, VM_ASSERT_IRQ, &vi));
	+}
	+
	+int
	+vm_deassert_irq(struct vmctx *ctx, uint32_t irq)
	+{
	+ struct vm_irq vi;
	+
	+ bzero(&vi, sizeof(vi));
	+ vi.irq = irq;
	+
	+ return (ioctl(ctx->fd, VM_DEASSERT_IRQ, &vi));
	+}
	Index: sys/arm/arm/generic_timer.h
	===================================================================
	--- /dev/null
	+++ sys/arm/arm/generic_timer.h
	@@ -0,0 +1,44 @@
	+/*-
	+ * Copyright (c) 2018 Alexandru Elise <alexandru.elisei@gmail.com>
	+ * All rights reserved.
	+ *
	+ * Redistribution and use in source and binary forms, with or without
	+ * modification, are permitted provided that the following conditions
	+ * are met:
	+ * 1. Redistributions of source code must retain the above copyright
	+ * notice, this list of conditions and the following disclaimer.
	+ * 2. Redistributions in binary form must reproduce the above copyright
	+ * notice, this list of conditions and the following disclaimer in the
	+ * documentation and/or other materials provided with the distribution.
	+ * 3. The name of the company nor the name of the author may be used to
	+ * endorse or promote products derived from this software without specific
	+ * prior written permission.
	+ *
	+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
	+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
	+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	+ * SUCH DAMAGE.
	+ *
	+ * $FreeBSD$
	+ */
	+
	+#ifndef _ARM_GENERIC_TIMER_H_
	+#define _ARM_GENERIC_TIMER_H_
	+
	+#define GT_PHYS_SECURE 0
	+#define GT_PHYS_NONSECURE 1
	+#define GT_VIRT 2
	+#define GT_HYP 3
	+
	+int arm_tmr_setup_intr(int gt_type, driver_filter_t filter,
	+ driver_intr_t handler, void *arg);
	+int arm_tmr_teardown_intr(int gt_type);
	+
	+#endif
	Index: sys/arm/arm/generic_timer.c
	===================================================================
	--- sys/arm/arm/generic_timer.c
	+++ sys/arm/arm/generic_timer.c
	@@ -63,6 +63,10 @@
	#include <machine/machdep.h> /* For arm_set_delay */
	#endif

	+#if defined(__aarch64__)
	+#include <machine/vmm.h> /* For virt_enabled() */
	+#endif
	+
	#ifdef FDT
	#include <dev/ofw/openfirm.h>
	#include <dev/ofw/ofw_bus.h>
	@@ -74,6 +78,8 @@
	#include <dev/acpica/acpivar.h>
	#endif

	+#include "generic_timer.h"
	+
	#define GT_CTRL_ENABLE (1 << 0)
	#define GT_CTRL_INT_MASK (1 << 1)
	#define GT_CTRL_INT_STAT (1 << 2)
	@@ -123,6 +129,8 @@
	.tc_fill_vdso_timehands = arm_tmr_fill_vdso_timehands,
	};

	+static device_t arm_tmr_dev;
	+
	#ifdef __arm__
	#define get_el0(x) cp15_## x ##_get()
	#define get_el1(x) cp15_## x ##_get()
	@@ -314,6 +322,39 @@
	return (FILTER_HANDLED);
	}

	+int
	+arm_tmr_setup_intr(int gt_type, driver_filter_t filter, driver_intr_t handler,
	+ void *arg)
	+{
	+ if (gt_type != GT_PHYS_SECURE &&
	+ gt_type != GT_PHYS_NONSECURE &&
	+ gt_type != GT_VIRT &&
	+ gt_type != GT_HYP)
	+ return (ENXIO);
	+
	+ if (arm_tmr_sc->res[gt_type] == NULL)
	+ return (ENXIO);
	+
	+ return (bus_setup_intr(arm_tmr_dev, arm_tmr_sc->res[gt_type],
	+ INTR_TYPE_CLK, filter, handler, arg, &arm_tmr_sc->ihl[gt_type]));
	+}
	+
	+int
	+arm_tmr_teardown_intr(int gt_type)
	+{
	+ if (gt_type != GT_PHYS_SECURE &&
	+ gt_type != GT_PHYS_NONSECURE &&
	+ gt_type != GT_VIRT &&
	+ gt_type != GT_HYP)
	+ return (ENXIO);
	+
	+ if (arm_tmr_sc->res[gt_type] == NULL)
	+ return (ENXIO);
	+
	+ return (bus_teardown_intr(arm_tmr_dev, arm_tmr_sc->res[gt_type],
	+ arm_tmr_sc->ihl[gt_type]));
	+}
	+
	#ifdef FDT
	static int
	arm_tmr_fdt_probe(device_t dev)
	@@ -447,13 +488,26 @@
	last_timer = 1;
	}

	+#ifdef __aarch64__
	+ sc->physical \|= virt_enabled();
	+#endif
	+
	arm_tmr_sc = sc;

	/* Setup secure, non-secure and virtual IRQs handler */
	- for (i = first_timer; i <= last_timer; i++) {
	+ for (i = GT_PHYS_SECURE; i <= GT_VIRT; i++) {
	/* If we do not have the interrupt, skip it. */
	if (sc->res[i] == NULL)
	continue;
	+#if defined(__aarch64__)
	+ if (i == 2 && virt_enabled()) {
	+ /*
	+ * Do not install an interrupt handler for the virtual
	+ * timer. This will be used by the VM.
	+ */
	+ continue;
	+ }
	+#endif
	error = bus_setup_intr(dev, sc->res[i], INTR_TYPE_CLK,
	arm_tmr_intr, NULL, sc, &sc->ihl[i]);
	if (error) {
	@@ -461,7 +515,6 @@
	return (ENXIO);
	}
	}
	-
	/* Disable the virtual timer until we are ready */
	if (sc->res[2] != NULL)
	arm_tmr_disable(false);
	@@ -488,6 +541,8 @@
	arm_set_delay(arm_tmr_do_delay, sc);
	#endif

	+ arm_tmr_dev = dev;
	+
	return (0);
	}

	Index: sys/arm/arm/gic.h
	===================================================================
	--- sys/arm/arm/gic.h
	+++ sys/arm/arm/gic.h
	@@ -47,13 +47,16 @@

	struct arm_gic_softc {
	device_t gic_dev;
	+ bool is_root;
	void * gic_intrhand;
	struct gic_irqsrc * gic_irqs;
	- struct resource * gic_res[3];
	+ struct resource * gic_res[6];
	bus_space_tag_t gic_c_bst;
	bus_space_tag_t gic_d_bst;
	bus_space_handle_t gic_c_bsh;
	bus_space_handle_t gic_d_bsh;
	+ bus_space_tag_t gic_h_bst;
	+ bus_space_handle_t gic_h_bsh;
	uint8_t ver;
	struct mtx mutex;
	uint32_t nirqs;
	Index: sys/arm/arm/gic.c
	===================================================================
	--- sys/arm/arm/gic.c
	+++ sys/arm/arm/gic.c
	@@ -128,10 +128,14 @@
	static struct resource_spec arm_gic_spec[] = {
	{ SYS_RES_MEMORY, 0, RF_ACTIVE }, /* Distributor registers */
	{ SYS_RES_MEMORY, 1, RF_ACTIVE }, /* CPU Interrupt Intf. registers */
	- { SYS_RES_IRQ, 0, RF_ACTIVE \| RF_OPTIONAL }, /* Parent interrupt */
	+ { SYS_RES_MEMORY, 2, RF_ACTIVE \| RF_OPTIONAL }, /* Virtual Interface Control */
	+ { SYS_RES_MEMORY, 3, RF_ACTIVE \| RF_OPTIONAL }, /* Virtual CPU interface */
	+ { SYS_RES_IRQ, 0, RF_ACTIVE \| RF_OPTIONAL }, /* vGIC maintenance interrupt or parent interrupt */
	{ -1, 0 }
	};

	+extern char hypmode_enabled[];
	+
	#if defined(__arm__) && defined(INVARIANTS)
	static int gic_debug_spurious = 1;
	#else
	@@ -154,6 +158,22 @@
	#define gic_d_write_4(_sc, _reg, _val) \
	bus_space_write_4((_sc)->gic_d_bst, (_sc)->gic_d_bsh, (_reg), (_val))

	+#define gic_h_read_4(_sc, _reg) \
	+ bus_space_read_4((_sc)->gic_h_bst, (_sc)->gic_h_bsh, (_reg))
	+#define gic_h_write_4(_sc, _reg, _val) \
	+ bus_space_write_4((_sc)->gic_h_bst, (_sc)->gic_h_bsh, (_reg), (_val))
	+
	+struct arm_gic_softc *
	+arm_gic_get_sc(void)
	+{
	+ return gic_sc;
	+}
	+uint32_t
	+arm_gic_get_lr_num(void)
	+{
	+ return (gic_h_read_4(gic_sc, GICH_VTR) & 0x3f) + 1;
	+}
	+
	static inline void
	gic_irq_unmask(struct arm_gic_softc *sc, u_int irq)
	{
	@@ -322,12 +342,25 @@
	mtx_init(&sc->mutex, "GIC lock", NULL, MTX_SPIN);

	/* Distributor Interface */
	- sc->gic_d_bst = rman_get_bustag(sc->gic_res[0]);
	- sc->gic_d_bsh = rman_get_bushandle(sc->gic_res[0]);
	+ sc->gic_d_bst = rman_get_bustag(sc->gic_res[DISTRIBUTOR_RES_IDX]);
	+ sc->gic_d_bsh = rman_get_bushandle(sc->gic_res[DISTRIBUTOR_RES_IDX]);

	/* CPU Interface */
	- sc->gic_c_bst = rman_get_bustag(sc->gic_res[1]);
	- sc->gic_c_bsh = rman_get_bushandle(sc->gic_res[1]);
	+ sc->gic_c_bst = rman_get_bustag(sc->gic_res[CPU_INTERFACE_RES_IDX]);
	+ sc->gic_c_bsh = rman_get_bushandle(sc->gic_res[CPU_INTERFACE_RES_IDX]);
	+
	+ /* Virtual Interface Control */
	+ if (sc->is_root) {
	+ if (sc->gic_res[VIRT_INTERFACE_CONTROL_RES_IDX] == NULL) {
	+ device_printf(dev, "Cannot find Virtual Interface Control Registers. Disabling Hyp-Mode...\n");
	+ hypmode_enabled[0] = -1;
	+ } else {
	+ sc->gic_h_bst = rman_get_bustag(sc->gic_res[VIRT_INTERFACE_CONTROL_RES_IDX]);
	+ sc->gic_h_bsh = rman_get_bushandle(sc->gic_res[VIRT_INTERFACE_CONTROL_RES_IDX]);
	+ }
	+ } else {
	+ hypmode_enabled[0] = -1;
	+ }

	/* Disable interrupt forwarding to the CPU interface */
	gic_d_write_4(sc, GICD_CTLR, 0x00);
	@@ -507,6 +540,33 @@
	("arm_gic_read_ivar: Invalid bus type %u", sc->gic_bus));
	*result = sc->gic_bus;
	return (0);
	+ case GIC_IVAR_VIRTUAL_INT_CTRL_RES:
	+ *result = (uintptr_t)sc->gic_res[VIRT_INTERFACE_CONTROL_RES_IDX];
	+ return (0);
	+ case GIC_IVAR_VIRTUAL_INT_CTRL_VADDR:
	+ *result = (uintptr_t)rman_get_virtual(sc->gic_res[VIRT_INTERFACE_CONTROL_RES_IDX]);
	+ return (0);
	+ case GIC_IVAR_VIRTUAL_INT_CTRL_PADDR:
	+ *result = (uintptr_t)rman_get_start(sc->gic_res[VIRT_INTERFACE_CONTROL_RES_IDX]);
	+ return (0);
	+ case GIC_IVAR_VIRTUAL_INT_CTRL_SIZE:
	+ *result = rman_get_size(sc->gic_res[VIRT_INTERFACE_CONTROL_RES_IDX]);
	+ return (0);
	+ case GIC_IVAR_VIRTUAL_CPU_INT_PADDR:
	+ *result = rman_get_start(sc->gic_res[VIRT_CPU_INTERFACE_RES_IDX]);
	+ return (0);
	+ case GIC_IVAR_VIRTUAL_CPU_INT_SIZE:
	+ *result = rman_get_size(sc->gic_res[VIRT_CPU_INTERFACE_RES_IDX]);
	+ return (0);
	+ case GIC_IVAR_LR_NUM:
	+ *result = (gic_h_read_4(gic_sc, GICH_VTR) & 0x3f) + 1;
	+ return (0);
	+ case GIC_IVAR_MAINTENANCE_INTR_RES:
	+ if (sc->is_root)
	+ *result = (uintptr_t)sc->gic_res[MAINTENANCE_INTR_RES_IDX];
	+ else
	+ result = NULL;
	+ return (0);
	}

	return (ENOENT);
	@@ -979,7 +1039,7 @@
	if (CPU_ISSET(i, &cpus))
	val \|= arm_gic_map[i] << GICD_SGI_TARGET_SHIFT;

	- gic_d_write_4(sc, GICD_SGIR, val \| gi->gi_irq);
	+ gic_d_write_4(sc, GICD_SGIR(0), val \| gi->gi_irq);
	}

	static int
	Index: sys/arm/arm/gic_common.h
	===================================================================
	--- sys/arm/arm/gic_common.h
	+++ sys/arm/arm/gic_common.h
	@@ -32,8 +32,25 @@
	#ifndef _GIC_COMMON_H_
	#define _GIC_COMMON_H_

	-#define GIC_IVAR_HW_REV 500
	-#define GIC_IVAR_BUS 501
	+#ifndef __ASSEMBLER__
	+
	+#define DISTRIBUTOR_RES_IDX 0
	+#define CPU_INTERFACE_RES_IDX 1
	+#define VIRT_INTERFACE_CONTROL_RES_IDX 2
	+#define VIRT_CPU_INTERFACE_RES_IDX 3
	+#define MAINTENANCE_INTR_RES_IDX 4
	+#define INTRNG_RES_IDX 5
	+
	+#define GIC_IVAR_HW_REV 500
	+#define GIC_IVAR_BUS 501
	+#define GIC_IVAR_VIRTUAL_INT_CTRL_RES 502
	+#define GIC_IVAR_VIRTUAL_INT_CTRL_VADDR 503
	+#define GIC_IVAR_VIRTUAL_INT_CTRL_PADDR 505
	+#define GIC_IVAR_VIRTUAL_INT_CTRL_SIZE 504
	+#define GIC_IVAR_VIRTUAL_CPU_INT_PADDR 506
	+#define GIC_IVAR_VIRTUAL_CPU_INT_SIZE 507
	+#define GIC_IVAR_LR_NUM 508
	+#define GIC_IVAR_MAINTENANCE_INTR_RES 509

	/* GIC_IVAR_BUS values */
	#define GIC_BUS_UNKNOWN 0
	@@ -43,6 +60,19 @@

	__BUS_ACCESSOR(gic, hw_rev, GIC, HW_REV, u_int);
	__BUS_ACCESSOR(gic, bus, GIC, BUS, u_int);
	+__BUS_ACCESSOR(gic, virtual_int_ctrl_res, GIC, VIRTUAL_INT_CTRL_RES, struct resource *);
	+__BUS_ACCESSOR(gic, virtual_int_ctrl_vaddr, GIC, VIRTUAL_INT_CTRL_VADDR, uint64_t);
	+__BUS_ACCESSOR(gic, virtual_int_ctrl_paddr, GIC, VIRTUAL_INT_CTRL_PADDR, uint64_t);
	+__BUS_ACCESSOR(gic, virtual_int_ctrl_size, GIC, VIRTUAL_INT_CTRL_SIZE, uint32_t);
	+__BUS_ACCESSOR(gic, virtual_cpu_int_paddr, GIC, VIRTUAL_CPU_INT_PADDR, uint32_t);
	+__BUS_ACCESSOR(gic, virtual_cpu_int_size, GIC, VIRTUAL_CPU_INT_SIZE, uint32_t);
	+__BUS_ACCESSOR(gic, lr_num, GIC, LR_NUM, uint32_t);
	+__BUS_ACCESSOR(gic, maintenance_intr_res, GIC, MAINTENANCE_INTR_RES, struct resource *);
	+
	+struct arm_gic_softc *arm_gic_get_sc(void);
	+uint32_t arm_gic_get_lr_num(void);
	+
	+#endif /__ASSEMBLER__ /

	/* Software Generated Interrupts */
	#define GIC_FIRST_SGI 0 /* Irqs 0-15 are SGIs/IPIs. */
	@@ -56,7 +86,9 @@
	/* Common register values */
	#define GICD_CTLR 0x0000 /* v1 ICDDCR */
	#define GICD_TYPER 0x0004 /* v1 ICDICTR */
	-#define GICD_TYPER_I_NUM(n) ((((n) & 0x1F) + 1) * 32)
	+#define GICD_TYPER_ITLINESNUM_MASK (0x1f)
	+#define GICD_TYPER_I_NUM(n) \
	+ ((((n) & GICD_TYPER_ITLINESNUM_MASK) + 1) * 32)
	#define GICD_IIDR 0x0008 /* v1 ICDIIDR */
	#define GICD_IIDR_PROD_SHIFT 24
	#define GICD_IIDR_PROD_MASK 0xff000000
	@@ -74,19 +106,30 @@
	#define GICD_IIDR_IMPL_MASK 0x00000fff
	#define GICD_IIDR_IMPL(x) \
	(((x) & GICD_IIDR_IMPL_MASK) >> GICD_IIDR_IMPL_SHIFT)
	-#define GICD_IGROUPR(n) (0x0080 + (((n) >> 5) * 4)) /* v1 ICDISER */
	+#define GICD_IGROUPR_BASE (0x0080)
	+#define GICD_IGROUPR(n) \
	+ (GICD_IGROUPR_BASE + (((n) >> 5) * 4)) /* v1 ICDISER */
	#define GICD_I_PER_IGROUPRn 32
	-#define GICD_ISENABLER(n) (0x0100 + (((n) >> 5) * 4)) /* v1 ICDISER */
	+#define GICD_ISENABLER_BASE (0x0100)
	+#define GICD_ISENABLER(n) \
	+ (GICD_ISENABLER_BASE + (((n) >> 5) * 4)) /* v1 ICDISER */
	#define GICD_I_MASK(n) (1ul << ((n) & 0x1f))
	#define GICD_I_PER_ISENABLERn 32
	-#define GICD_ICENABLER(n) (0x0180 + (((n) >> 5) * 4)) /* v1 ICDICER */
	+#define GICD_ICENABLER_BASE (0x0180)
	+#define GICD_ICENABLER(n) \
	+ (GICD_ICENABLER_BASE + (((n) >> 5) * 4)) /* v1 ICDICER */
	#define GICD_ISPENDR(n) (0x0200 + (((n) >> 5) * 4)) /* v1 ICDISPR */
	#define GICD_ICPENDR(n) (0x0280 + (((n) >> 5) * 4)) /* v1 ICDICPR */
	+#define GICD_ISACTIVER(n) (0x0300 + (((n) >> 5) * 4)) /* v1 ICDABR */
	#define GICD_ICACTIVER(n) (0x0380 + (((n) >> 5) * 4)) /* v1 ICDABR */
	-#define GICD_IPRIORITYR(n) (0x0400 + (((n) >> 2) * 4)) /* v1 ICDIPR */
	+#define GICD_IPRIORITYR_BASE (0x0400)
	+#define GICD_IPRIORITYR(n) \
	+ (GICD_IPRIORITYR_BASE + (((n) >> 2) * 4)) /* v1 ICDIPR */
	#define GICD_I_PER_IPRIORITYn 4
	#define GICD_ITARGETSR(n) (0x0800 + (((n) >> 2) * 4)) /* v1 ICDIPTR */
	-#define GICD_ICFGR(n) (0x0C00 + (((n) >> 4) * 4)) /* v1 ICDICFR */
	+#define GICD_ICFGR_BASE (0x0C00)
	+#define GICD_ICFGR(n) \
	+ (GICD_ICFGR_BASE + (((n) >> 4) * 4)) /* v1 ICDICFR */
	#define GICD_I_PER_ICFGRn 16
	/* First bit is a polarity bit (0 - low, 1 - high) */
	#define GICD_ICFGR_POL_LOW (0 << 0)
	@@ -96,7 +139,34 @@
	#define GICD_ICFGR_TRIG_LVL (0 << 1)
	#define GICD_ICFGR_TRIG_EDGE (1 << 1)
	#define GICD_ICFGR_TRIG_MASK 0x2
	-#define GICD_SGIR 0x0F00 /* v1 ICDSGIR */
	+#define GICD_SGIR(n) (0x0F00 + ((n) * 4)) /* v1 ICDSGIR */
	#define GICD_SGI_TARGET_SHIFT 16

	+/* GIC Hypervisor specific registers */
	+#define GICH_HCR 0x0
	+#define GICH_VTR 0x4
	+#define GICH_VMCR 0x8
	+#define GICH_VMCR_VMGRP1EN (1 << 1)
	+#define GICH_MISR 0x10
	+#define GICH_EISR0 0x20
	+#define GICH_EISR1 0x24
	+#define GICH_ELSR0 0x30
	+#define GICH_ELSR1 0x34
	+#define GICH_APR 0xF0
	+#define GICH_LR0 0x100
	+
	+#define GICH_HCR_EN (1 << 0)
	+#define GICH_HCR_UIE (1 << 1)
	+
	+#define GICH_LR_VIRTID (0x3FF << 0)
	+#define GICH_LR_PHYSID_CPUID_SHIFT 10
	+#define GICH_LR_PHYSID_CPUID (7 << GICH_LR_PHYSID_CPUID_SHIFT)
	+#define GICH_LR_STATE (3 << 28)
	+#define GICH_LR_PENDING (1 << 28)
	+#define GICH_LR_ACTIVE (1 << 29)
	+#define GICH_LR_EOI (1 << 19)
	+
	+#define GICH_MISR_EOI (1 << 0)
	+#define GICH_MISR_U (1 << 1)
	+
	#endif /* _GIC_COMMON_H_ */
	Index: sys/arm/arm/gic_fdt.c
	===================================================================
	--- sys/arm/arm/gic_fdt.c
	+++ sys/arm/arm/gic_fdt.c
	@@ -129,18 +129,25 @@
	gic_fdt_attach(device_t dev)
	{
	struct arm_gic_fdt_softc *sc = device_get_softc(dev);
	- phandle_t pxref;
	- intptr_t xref;
	+ phandle_t pxref = ofw_bus_find_iparent(ofw_bus_get_node(dev));
	+ intptr_t xref = OF_xref_from_node(ofw_bus_get_node(dev));
	int err;

	+ sc->base.is_root = false;
	+ /*
	+ * Controller is root if:
	+ * - doesn't have interrupt parent
	+ * - his interrupt parent is this controller
	+ */
	+ if (pxref == 0 \|\| xref == pxref)
	+ sc->base.is_root = true;
	+
	sc->base.gic_bus = GIC_BUS_FDT;

	err = arm_gic_attach(dev);
	if (err != 0)
	return (err);

	- xref = OF_xref_from_node(ofw_bus_get_node(dev));
	-
	/*
	* Now, when everything is initialized, it's right time to
	* register interrupt controller to interrupt framefork.
	@@ -150,13 +157,7 @@
	goto cleanup;
	}

	- /*
	- * Controller is root if:
	- * - doesn't have interrupt parent
	- * - his interrupt parent is this controller
	- */
	- pxref = ofw_bus_find_iparent(ofw_bus_get_node(dev));
	- if (pxref == 0 \|\| xref == pxref) {
	+ if (sc->base.is_root) {
	if (intr_pic_claim_root(dev, xref, arm_gic_intr, sc,
	GIC_LAST_SGI - GIC_FIRST_SGI + 1) != 0) {
	device_printf(dev, "could not set PIC as a root\n");
	Index: sys/arm64/arm64/gic_v3.c
	===================================================================
	--- sys/arm64/arm64/gic_v3.c
	+++ sys/arm64/arm64/gic_v3.c
	@@ -99,6 +99,11 @@
	static u_int sgi_first_unused = GIC_FIRST_SGI;
	#endif

	+static struct resource *maint_res;
	+static device_t gic_dev;
	+static int maint_rid;
	+static void *maint_cookie;
	+
	static device_method_t gic_v3_methods[] = {
	/* Device interface */
	DEVMETHOD(device_detach, gic_v3_detach),
	@@ -366,12 +371,49 @@
	return (0);
	}

	+void
	+gic_v3_alloc_maint_res(device_t dev)
	+{
	+ gic_dev = dev;
	+ maint_res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &maint_rid,
	+ RF_ACTIVE);
	+ if (!maint_res)
	+ device_printf(dev,
	+ "Could not allocate resource for maintenance interrupt\n");
	+}
	+
	+int
	+gic_v3_setup_maint_intr(driver_filter_t filter, driver_intr_t handler,
	+ void *arg)
	+{
	+ int flags;
	+
	+ if (!maint_res)
	+ return (EINVAL);
	+
	+ flags = INTR_TYPE_MISC \| INTR_MPSAFE;
	+ return (bus_setup_intr(gic_dev, maint_res, flags, filter, handler,
	+ arg, &maint_cookie));
	+}
	+
	+int
	+gic_v3_teardown_maint_intr(void)
	+{
	+ if (!maint_res)
	+ return (EINVAL);
	+
	+ return (bus_teardown_intr(gic_dev, maint_res, maint_cookie));
	+}
	+
	static int
	gic_v3_get_domain(device_t dev, device_t child, int *domain)
	{
	struct gic_v3_devinfo *di;

	di = device_get_ivars(child);
	+ if (di == NULL)
	+ return (0);
	+
	if (di->gic_domain < 0)
	return (ENOENT);

	@@ -978,22 +1020,25 @@
	struct resource *res;
	u_int cpuid;
	size_t us_left = 1000000;
	+ uint32_t rwp;

	cpuid = PCPU_GET(cpuid);

	switch (xdist) {
	case DIST:
	res = sc->gic_dist;
	+ rwp = GICD_CTLR_RWP;
	break;
	case REDIST:
	res = &sc->gic_redists.pcpu[cpuid]->res;
	+ rwp = GICR_CTLR_RWP;
	break;
	default:
	KASSERT(0, ("%s: Attempt to wait for unknown RWP", __func__));
	return;
	}

	- while ((bus_read_4(res, GICD_CTLR) & GICD_CTLR_RWP) != 0) {
	+ while ((bus_read_4(res, GICD_CTLR) & rwp) != 0) {
	DELAY(1);
	if (us_left-- == 0)
	panic("GICD Register write pending for too long");
	Index: sys/arm64/arm64/gic_v3_acpi.c
	===================================================================
	--- sys/arm64/arm64/gic_v3_acpi.c
	+++ sys/arm64/arm64/gic_v3_acpi.c
	@@ -338,6 +338,8 @@
	if (device_get_children(dev, &sc->gic_children, &sc->gic_nchildren) !=0)
	sc->gic_nchildren = 0;

	+ gic_v3_alloc_maint_res(dev);
	+
	return (0);

	error:
	Index: sys/arm64/arm64/gic_v3_fdt.c
	===================================================================
	--- sys/arm64/arm64/gic_v3_fdt.c
	+++ sys/arm64/arm64/gic_v3_fdt.c
	@@ -171,6 +171,8 @@
	if (device_get_children(dev, &sc->gic_children, &sc->gic_nchildren) != 0)
	sc->gic_nchildren = 0;

	+ gic_v3_alloc_maint_res(dev);
	+
	return (err);

	error:
	@@ -194,12 +196,19 @@
	static int
	gic_v3_fdt_print_child(device_t bus, device_t child)
	{
	- struct gic_v3_ofw_devinfo *di = device_get_ivars(child);
	- struct resource_list *rl = &di->di_rl;
	+ struct gic_v3_ofw_devinfo *di;
	+ struct resource_list *rl;
	int retval = 0;

	retval += bus_print_child_header(bus, child);
	+
	+ di = device_get_ivars(child);
	+ if (di == NULL)
	+ goto footer;
	+ rl = &di->di_rl;
	+
	retval += resource_list_print_type(rl, "mem", SYS_RES_MEMORY, "%#jx");
	+footer:
	retval += bus_print_child_footer(bus, child);

	return (retval);
	@@ -280,6 +289,7 @@
	size_cells = 2;
	OF_getencprop(parent, "#size-cells", &size_cells,
	sizeof(size_cells));
	+
	/* Iterate through all GIC subordinates */
	for (node = OF_child(parent); node > 0; node = OF_peer(node)) {
	/* Allocate and populate devinfo. */
	Index: sys/arm64/arm64/gic_v3_reg.h
	===================================================================
	--- sys/arm64/arm64/gic_v3_reg.h
	+++ sys/arm64/arm64/gic_v3_reg.h
	@@ -56,14 +56,22 @@
	#define GICD_CTLR_G1 (1 << 0)
	#define GICD_CTLR_G1A (1 << 1)
	#define GICD_CTLR_ARE_NS (1 << 4)
	+#define GICD_CTLR_DS (1 << 6)
	+#define GICD_CTLR_E1NWF (1 << 7)
	#define GICD_CTLR_RWP (1 << 31)
	/* GICD_TYPER */
	#define GICD_TYPER_IDBITS(n) ((((n) >> 19) & 0x1F) + 1)
	+#define GICD_TYPER_SECURITYEXTN \
	+ (1 << 10)
	+#define GICD_TYPER_DVIS (1 << 18)
	+#define GICD_TYPER_LPIS (1 << 17)

	/*
	* Registers (v3)
	*/
	-#define GICD_IROUTER(n) (0x6000 + ((n) * 8))
	+#define GICD_IROUTER_BASE (0x6000)
	+#define GICD_IROUTER(n) (GICD_IROUTER_BASE + ((n) * 8))
	+#define GICD_IROUTER_IRM (31)

	#define GICD_PIDR4 0xFFD0
	#define GICD_PIDR5 0xFFD4
	@@ -84,7 +92,11 @@

	/* Redistributor registers */
	#define GICR_CTLR GICD_CTLR
	-#define GICR_CTLR_LPI_ENABLE (1 << 0)
	+#define GICR_CTLR_RWP (1 << 3)
	+#define GICR_CTLR_UWP (1 << 31)
	+#define GICR_CTLR_LPI_ENABLE (1 << 0)
	+#define GICR_CTLR_DPG1NS (1 << 25)
	+#define GICR_CTLR_DPG0 (1 << 24)

	#define GICR_PIDR2 GICD_PIDR2

	@@ -97,6 +109,10 @@
	#define GICR_TYPER_CPUNUM(x) \
	(((x) & GICR_TYPER_CPUNUM_MASK) >> GICR_TYPER_CPUNUM_SHIFT)
	#define GICR_TYPER_AFF_SHIFT (32)
	+#define GICR_TYPER_AFF0(x) ((x >> GICR_TYPER_AFF_SHIFT) & 0xff)
	+#define GICR_TYPER_AFF1(x) ((x >> (GICR_TYPER_AFF_SHIFT + 8)) & 0xff)
	+#define GICR_TYPER_AFF2(x) ((x >> (GICR_TYPER_AFF_SHIFT + 16)) & 0xff)
	+#define GICR_TYPER_AFF3(x) ((x >> (GICR_TYPER_AFF_SHIFT + 24)) & 0xff)

	#define GICR_WAKER (0x0014)
	#define GICR_WAKER_PS (1 << 1) /* Processor sleep */
	@@ -193,8 +209,12 @@
	#define GICR_I_ENABLER_SGI_MASK (0x0000FFFF)
	#define GICR_I_ENABLER_PPI_MASK (0xFFFF0000)

	+#define GICR_IPRIORITYR_BASE (0x0400)
	#define GICR_I_PER_IPRIORITYn (GICD_I_PER_IPRIORITYn)

	+#define GICR_ICFGR0_BASE (0x0C00)
	+#define GICR_ICFGR1_BASE (0x0C04)
	+
	/* ITS registers */
	#define GITS_PIDR2 GICR_PIDR2
	#define GITS_PIDR2_ARCH_MASK GICR_PIDR2_ARCH_MASK
	Index: sys/arm64/arm64/gic_v3_var.h
	===================================================================
	--- sys/arm64/arm64/gic_v3_var.h
	+++ sys/arm64/arm64/gic_v3_var.h
	@@ -109,6 +109,10 @@
	void gic_r_write_4(device_t, bus_size_t, uint32_t var);
	void gic_r_write_8(device_t, bus_size_t, uint64_t var);

	+void gic_v3_alloc_maint_res(device_t);
	+int gic_v3_setup_maint_intr(driver_filter_t, driver_intr_t, void *);
	+int gic_v3_teardown_maint_intr(void);
	+
	/*
	* GIC Distributor accessors.
	* Notice that only GIC sofc can be passed.
	Index: sys/arm64/arm64/hyp_stub.S
	===================================================================
	--- /dev/null
	+++ sys/arm64/arm64/hyp_stub.S
	@@ -0,0 +1,73 @@
	+/*
	+ * Copyright (c) 2017 Alexandru Elisei <alexandru.elisei@gmail.com>
	+ * All rights reserved.
	+ *
	+ * Redistribution and use in source and binary forms, with or without
	+ * modification, are permitted provided that the following conditions
	+ * are met:
	+ * 1. Redistributions of source code must retain the above copyright
	+ * notice, this list of conditions and the following disclaimer.
	+ * 2. Redistributions in binary form must reproduce the above copyright
	+ * notice, this list of conditions and the following disclaimer in the
	+ * documentation and/or other materials provided with the distribution.
	+ *
	+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
	+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
	+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	+ * SUCH DAMAGE.
	+ */
	+
	+#include <machine/asm.h>
	+
	+__FBSDID("$FreeBSD$");
	+
	+ .text
	+
	+/*
	+ * Install a new exception vector table with the base address supplied by the
	+ * parameter in register x0.
	+ */
	+ENTRY(handle_stub_el1h_sync)
	+ msr vbar_el2, x0
	+ eret
	+END(handle_hyp_stub)
	+
	+.macro vempty
	+ .align 7
	+ 1: b 1b
	+.endm
	+
	+.macro vector name
	+ .align 7
	+ b handle_\name
	+.endm
	+
	+ .align 11
	+ .globl hyp_stub_vectors
	+hyp_stub_vectors:
	+ vempty /* Synchronous EL2t */
	+ vempty /* IRQ EL2t */
	+ vempty /* FIQ EL2t */
	+ vempty /* SError EL2t */
	+
	+ vempty /* Synchronous EL2h */
	+ vempty /* IRQ EL2h */
	+ vempty /* FIQ EL2h */
	+ vempty /* SError EL2h */
	+
	+ vector stub_el1h_sync /* Synchronous 64-bit EL1 */
	+ vempty /* IRQ 64-bit EL1 */
	+ vempty /* FIQ 64-bit EL1 */
	+ vempty /* SError 64-bit EL1 */
	+
	+ vempty /* Synchronous 32-bit EL1 */
	+ vempty /* IRQ 32-bit EL1 */
	+ vempty /* FIQ 32-bit EL1 */
	+ vempty /* SError 32-bit EL1 */
	Index: sys/arm64/arm64/locore.S
	===================================================================
	--- sys/arm64/arm64/locore.S
	+++ sys/arm64/arm64/locore.S
	@@ -213,6 +213,11 @@
	END(mpentry)
	#endif

	+ .align 3
	+ .globl _C_LABEL(hypmode_enabled)
	+_C_LABEL(hypmode_enabled):
	+ .zero 8
	+
	/*
	* If we are started in EL2, configure the required hypervisor
	* registers and drop to EL1.
	@@ -224,8 +229,22 @@
	b.eq 1f
	ret
	1:
	+ /*
	+ * If the MMU is active, then it is using a page table where VA == PA.
	+ * But the page table won't have entries for the hypervisor EL2
	+ * initialization code which is loaded into memory with the vmm module.
	+ *
	+ * So we disable the MMU in EL2 to make the vmm hypervisor code run
	+ * successfully.
	+ */
	+ dsb sy
	+ mrs x2, sctlr_el2
	+ bic x2, x2, SCTLR_M
	+ msr sctlr_el2, x2
	+ isb
	+
	/* Configure the Hypervisor */
	- mov x2, #(HCR_RW)
	+ mov x2, #(HCR_RW & ~HCR_HCD)
	msr hcr_el2, x2

	/* Load the Virtualization Process ID Register */
	@@ -256,10 +275,18 @@
	msr cntvoff_el2, xzr

	/* Hypervisor trap functions */
	- adrp x2, hyp_vectors
	- add x2, x2, :lo12:hyp_vectors
	+ adrp x2, hyp_stub_vectors
	msr vbar_el2, x2

	+ /* Use the host VTTBR_EL2 to tell the host and the guests apart */
	+ mov x2, #VTTBR_HOST
	+ msr vttbr_el2, x2
	+
	+ /* Mark hypervisor mode as enabled */
	+ mov x1, #1
	+ adr x2, hypmode_enabled
	+ str x1, [x2]
	+
	mov x2, #(PSR_F \| PSR_I \| PSR_A \| PSR_D \| PSR_M_EL1h)
	msr spsr_el2, x2

	@@ -288,6 +315,10 @@
	.quad SCTLR_RES1
	LEND(drop_to_el1)

	+hcr:
	+ /* Make sure the HVC instruction is not disabled */
	+ .quad (HCR_RW & ~HCR_HCD)
	+
	#define VECT_EMPTY \
	.align 7; \
	1: b 1b
	@@ -754,6 +785,8 @@

	ENTRY(abort)
	b abort
	+
	+ .align 12 /* 4KiB aligned */
	END(abort)

	.align 3
	Index: sys/arm64/arm64/pmap.c
	===================================================================
	--- sys/arm64/arm64/pmap.c
	+++ sys/arm64/arm64/pmap.c
	@@ -407,6 +407,8 @@
	static int pmap_unuse_pt(pmap_t, vm_offset_t, pd_entry_t, struct spglist *);
	static __inline vm_page_t pmap_remove_pt_page(pmap_t pmap, vm_offset_t va);

	+static uint64_t pa_range_bits = 0;
	+
	/*
	* These load the old table data and store the new value.
	* They need to be atomic as the System MMU may write to the table at
	@@ -431,9 +433,19 @@
	memcpy(d, s, PAGE_SIZE);
	}

	+#define pmap_l0_index(va) (((va) >> L0_SHIFT) & L0_ADDR_MASK)
	+#define pmap_l1_index(va) (((va) >> L1_SHIFT) & Ln_ADDR_MASK)
	+#define pmap_l2_index(va) (((va) >> L2_SHIFT) & Ln_ADDR_MASK)
	+#define pmap_l3_index(va) (((va) >> L3_SHIFT) & Ln_ADDR_MASK)
	+
	+#define STAGE2_L1_ADDR_MASK ((1UL << (pa_range_bits - L1_SHIFT)) - 1)
	+#define pmap_stage2_l1_index(va) (((va) >> L1_SHIFT) & STAGE2_L1_ADDR_MASK)
	+
	static __inline pd_entry_t *
	pmap_l0(pmap_t pmap, vm_offset_t va)
	{
	+ KASSERT(pmap->pm_stage != PM_STAGE2,
	+ ("Level 0 table is invalid for PM_STAGE2 pmap"));

	return (&pmap->pm_l0[pmap_l0_index(va)]);
	}
	@@ -450,6 +462,9 @@
	static __inline pd_entry_t *
	pmap_l1(pmap_t pmap, vm_offset_t va)
	{
	+ if (pmap->pm_stage == PM_STAGE2)
	+ return (&pmap->pm_l0[pmap_stage2_l1_index(va)]);
	+
	pd_entry_t *l0;

	l0 = pmap_l0(pmap, va);
	@@ -459,6 +474,32 @@
	return (pmap_l0_to_l1(l0, va));
	}

	+static __inline vm_page_t
	+pmap_l1pg(pmap_t pmap, vm_offset_t va)
	+{
	+ if (pmap->pm_stage == PM_STAGE1) {
	+ pd_entry_t *l0, tl0;
	+
	+ l0 = pmap_l0(pmap, va);
	+ tl0 = pmap_load(l0);
	+
	+ return (PHYS_TO_VM_PAGE(tl0 & ~ATTR_MASK));
	+ } else {
	+ vm_paddr_t pa, pa_offset;
	+
	+ /*
	+ * The offset will be the bits
	+ * [pa_range_bits-1:L0_SHIFT]
	+ */
	+ va = va & ((1 << pa_range_bits) - 1);
	+ pa_offset = va >> L0_SHIFT;
	+ pa = DMAP_TO_PHYS((vm_offset_t)pmap->pm_l0) + \
	+ (pa_offset << PAGE_SHIFT);
	+
	+ return (PHYS_TO_VM_PAGE(pa));
	+ }
	+}
	+
	static __inline pd_entry_t *
	pmap_l1_to_l2(pd_entry_t *l1p, vm_offset_t va)
	{
	@@ -519,18 +560,28 @@
	{
	pd_entry_t l0, l1, *l2, desc;

	- l0 = pmap_l0(pmap, va);
	- desc = pmap_load(l0) & ATTR_DESCR_MASK;
	- if (desc != L0_TABLE) {
	- *level = -1;
	- return (NULL);
	- }
	+ if (pmap->pm_stage == PM_STAGE1) {
	+ l0 = pmap_l0(pmap, va);
	+ desc = pmap_load(l0) & ATTR_DESCR_MASK;
	+ if (desc != L0_TABLE) {
	+ *level = -1;
	+ return (NULL);
	+ }

	- l1 = pmap_l0_to_l1(l0, va);
	- desc = pmap_load(l1) & ATTR_DESCR_MASK;
	- if (desc != L1_TABLE) {
	- *level = 0;
	- return (l0);
	+ l1 = pmap_l0_to_l1(l0, va);
	+ desc = pmap_load(l1) & ATTR_DESCR_MASK;
	+ if (desc != L1_TABLE) {
	+ *level = 0;
	+ return (l0);
	+ }
	+ } else {
	+ l1 = pmap_l1(pmap, va);
	+ desc = pmap_load(l1) & ATTR_DESCR_MASK;
	+ if (desc != L1_TABLE) {
	+ /* For PM_STAGE2 mappings the first level is level 1 */
	+ *level = -1;
	+ return (NULL);
	+ }
	}

	l2 = pmap_l1_to_l2(l1, va);
	@@ -607,13 +658,18 @@
	if (pmap->pm_l0 == NULL)
	return (false);

	- l0p = pmap_l0(pmap, va);
	- *l0 = l0p;
	+ if (pmap->pm_stage == PM_STAGE1) {
	+ l0p = pmap_l0(pmap, va);
	+ *l0 = l0p;

	- if ((pmap_load(l0p) & ATTR_DESCR_MASK) != L0_TABLE)
	- return (false);
	+ if ((pmap_load(l0p) & ATTR_DESCR_MASK) != L0_TABLE)
	+ return (false);

	- l1p = pmap_l0_to_l1(l0p, va);
	+ l1p = pmap_l0_to_l1(l0p, va);
	+ } else {
	+ *l0 = NULL;
	+ l1p = pmap_l1(pmap, va);
	+ }
	*l1 = l1p;

	if ((pmap_load(l1p) & ATTR_DESCR_MASK) == L1_BLOCK) {
	@@ -948,6 +1004,7 @@
	pmap_bootstrap(vm_offset_t l0pt, vm_offset_t l1pt, vm_paddr_t kernstart,
	vm_size_t kernlen)
	{
	+ uint64_t id_aa64mmfr0_el1;
	vm_offset_t freemempos;
	vm_offset_t dpcpu, msgbufpv;
	vm_paddr_t start_pa, pa, min_pa;
	@@ -1036,6 +1093,35 @@

	physmem_exclude_region(start_pa, pa - start_pa, EXFLAG_NOALLOC);

	+ id_aa64mmfr0_el1 = READ_SPECIALREG(id_aa64mmfr0_el1);
	+ switch (ID_AA64MMFR0_PARange_VAL(id_aa64mmfr0_el1)) {
	+ case ID_AA64MMFR0_PARange_4G:
	+ pa_range_bits = 32;
	+ break;
	+ case ID_AA64MMFR0_PARange_64G:
	+ pa_range_bits = 36;
	+ break;
	+ case ID_AA64MMFR0_PARange_1T:
	+ pa_range_bits = 40;
	+ break;
	+ case ID_AA64MMFR0_PARange_4T:
	+ pa_range_bits = 42;
	+ break;
	+ case ID_AA64MMFR0_PARange_16T:
	+ pa_range_bits = 44;
	+ break;
	+ case ID_AA64MMFR0_PARange_256T:
	+ pa_range_bits = 48;
	+ break;
	+ default:
	+ /*
	+ * Unknown PA range bits, will lead to a panic if a stage 2
	+ * pmap starting at level 1 is created.
	+ */
	+ pa_range_bits = 0;
	+ break;
	+ }
	+
	cpu_tlb_flushID();
	}

	@@ -1619,10 +1705,12 @@
	*/
	if (m->pindex >= (NUL2E + NUL1E)) {
	/* l1 page */
	- pd_entry_t *l0;
	+ if (pmap->pm_stage == PM_STAGE1) {
	+ pd_entry_t *l0;

	- l0 = pmap_l0(pmap, va);
	- pmap_clear(l0);
	+ l0 = pmap_l0(pmap, va);
	+ pmap_clear(l0);
	+ }
	} else if (m->pindex >= NUL2E) {
	/* l2 page */
	pd_entry_t *l1;
	@@ -1648,12 +1736,16 @@
	pmap_unwire_l3(pmap, va, l2pg, free);
	} else if (m->pindex < (NUL2E + NUL1E)) {
	/* We just released an l2, unhold the matching l1 */
	- pd_entry_t *l0, tl0;
	vm_page_t l1pg;
	+ pd_entry_t *l0, tl0;

	- l0 = pmap_l0(pmap, va);
	- tl0 = pmap_load(l0);
	- l1pg = PHYS_TO_VM_PAGE(tl0 & ~ATTR_MASK);
	+ if (pmap->pm_stage == PM_STAGE1) {
	+ l0 = pmap_l0(pmap, va);
	+ tl0 = pmap_load(l0);
	+ l1pg = PHYS_TO_VM_PAGE(tl0 & ~ATTR_MASK);
	+ } else {
	+ l1pg = pmap_l1pg(pmap, va);
	+ }
	pmap_unwire_l3(pmap, va, l1pg, free);
	}
	pmap_invalidate_page(pmap, va);
	@@ -1728,12 +1820,48 @@
	{
	vm_page_t m;

	+ KASSERT((stage == PM_STAGE1 \|\| stage == PM_STAGE2),
	+ ("Invalid pmap stage %d", stage));
	+ KASSERT(!((stage == PM_STAGE2) && (pa_range_bits == 0)),
	+ ("Unknown PARange bits"));
	+
	/*
	* allocate the l0 page
	*/
	- while ((m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL \|
	- VM_ALLOC_NOOBJ \| VM_ALLOC_WIRED \| VM_ALLOC_ZERO)) == NULL)
	- vm_wait(NULL);
	+ if (stage == PM_STAGE1) {
	+ while ((m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL \|
	+ VM_ALLOC_NOOBJ \| VM_ALLOC_WIRED \| VM_ALLOC_ZERO)) == NULL)
	+ vm_wait(NULL);
	+ } else {
	+ uint64_t npages;
	+ uint64_t alignment;
	+
	+ if (pa_range_bits <= L0_SHIFT) {
	+ /*
	+ * The level 1 translation table is not larger than a
	+ * PM_STAGE1 level 1 table, use only one page.
	+ */
	+ npages = 1;
	+ alignment = PAGE_SIZE;
	+ } else {
	+ /*
	+ * The level 1 translation table is larger than a
	+ * regular PM_STAGE1 level 1 table, for every x bits
	+ * that is larger we need 2^x pages and the table must
	+ * be aligned at a 2^(x + 12) boundary.
	+ *
	+ * See Table D5-25 and Example D4-5 from the DDI0487B
	+ * ARMv8 Architecture Manual for more information.
	+ */
	+ npages = 1 << (pa_range_bits - L0_SHIFT);
	+ alignment = 1 << (PAGE_SHIFT + pa_range_bits - L0_SHIFT);
	+ }
	+ while ((m = vm_page_alloc_contig(NULL, 0, VM_ALLOC_NORMAL \|
	+ VM_ALLOC_NOOBJ \| VM_ALLOC_WIRED \| VM_ALLOC_ZERO,
	+ npages, DMAP_MIN_PHYSADDR, DMAP_MAX_PHYSADDR,
	+ alignment, 0, VM_MEMATTR_DEFAULT)) == NULL)
	+ vm_wait(NULL);
	+ }

	pmap->pm_l0_paddr = VM_PAGE_TO_PHYS(m);
	pmap->pm_l0 = (pd_entry_t *)PHYS_TO_DMAP(pmap->pm_l0_paddr);
	@@ -1742,6 +1870,7 @@
	pagezero(pmap->pm_l0);

	pmap->pm_root.rt_root = 0;
	+ pmap->pm_stage = stage;
	bzero(&pmap->pm_stats, sizeof(pmap->pm_stats));
	pmap->pm_cookie = COOKIE_FROM(-1, INT_MAX);

	@@ -1852,25 +1981,30 @@
	pd_entry_t tl0;

	l1index = ptepindex - NUL2E;
	- l0index = l1index >> L0_ENTRIES_SHIFT;
	-
	- l0 = &pmap->pm_l0[l0index];
	- tl0 = pmap_load(l0);
	- if (tl0 == 0) {
	- /* recurse for allocating page dir */
	- if (_pmap_alloc_l3(pmap, NUL2E + NUL1E + l0index,
	- lockp) == NULL) {
	- vm_page_unwire_noq(m);
	- vm_page_free_zero(m);
	- return (NULL);
	+ if (pmap->pm_stage == PM_STAGE1) {
	+ l0index = l1index >> L0_ENTRIES_SHIFT;
	+ l0 = &pmap->pm_l0[l0index];
	+ tl0 = pmap_load(l0);
	+ if (tl0 == 0) {
	+ /* recurse for allocating page dir */
	+ if (_pmap_alloc_l3(pmap, NUL2E + NUL1E + l0index,
	+ lockp) == NULL) {
	+ vm_page_unwire_noq(m);
	+ vm_page_free_zero(m);
	+ return (NULL);
	+ }
	+ } else {
	+ l1pg = PHYS_TO_VM_PAGE(tl0 & ~ATTR_MASK);
	+ l1pg->ref_count++;
	}
	+
	+ l1 = (pd_entry_t *)PHYS_TO_DMAP(pmap_load(l0) & ~ATTR_MASK);
	+ l1 = &l1[ptepindex & Ln_ADDR_MASK];
	} else {
	- l1pg = PHYS_TO_VM_PAGE(tl0 & ~ATTR_MASK);
	+ l1pg = pmap_l1pg(pmap, l1index);
	l1pg->ref_count++;
	+ l1 = &pmap->pm_l0[l1index & STAGE2_L1_ADDR_MASK];
	}
	-
	- l1 = (pd_entry_t *)PHYS_TO_DMAP(pmap_load(l0) & ~ATTR_MASK);
	- l1 = &l1[ptepindex & Ln_ADDR_MASK];
	pmap_store(l1, VM_PAGE_TO_PHYS(m) \| L1_TABLE);
	} else {
	vm_pindex_t l0index, l1index;
	@@ -1878,24 +2012,40 @@
	pd_entry_t tl0, tl1;

	l1index = ptepindex >> Ln_ENTRIES_SHIFT;
	- l0index = l1index >> L0_ENTRIES_SHIFT;
	-
	- l0 = &pmap->pm_l0[l0index];
	- tl0 = pmap_load(l0);
	- if (tl0 == 0) {
	- /* recurse for allocating page dir */
	- if (_pmap_alloc_l3(pmap, NUL2E + l1index,
	- lockp) == NULL) {
	- vm_page_unwire_noq(m);
	- vm_page_free_zero(m);
	- return (NULL);
	- }
	+ if (pmap->pm_stage == PM_STAGE1) {
	+ l0index = l1index >> L0_ENTRIES_SHIFT;
	+ l0 = &pmap->pm_l0[l0index];
	tl0 = pmap_load(l0);
	- l1 = (pd_entry_t *)PHYS_TO_DMAP(tl0 & ~ATTR_MASK);
	- l1 = &l1[l1index & Ln_ADDR_MASK];
	+ if (tl0 == 0) {
	+ /* recurse for allocating page dir */
	+ if (_pmap_alloc_l3(pmap, NUL2E + l1index,
	+ lockp) == NULL) {
	+ vm_page_unwire_noq(m);
	+ vm_page_free_zero(m);
	+ return (NULL);
	+ }
	+ tl0 = pmap_load(l0);
	+ l1 = (pd_entry_t *)PHYS_TO_DMAP(tl0 & ~ATTR_MASK);
	+ l1 = &l1[l1index & Ln_ADDR_MASK];
	+ } else {
	+ l1 = (pd_entry_t *)PHYS_TO_DMAP(tl0 & ~ATTR_MASK);
	+ l1 = &l1[l1index & Ln_ADDR_MASK];
	+ tl1 = pmap_load(l1);
	+ if (tl1 == 0) {
	+ /* recurse for allocating page dir */
	+ if (_pmap_alloc_l3(pmap, NUL2E + l1index,
	+ lockp) == NULL) {
	+ vm_page_unwire_noq(m);
	+ vm_page_free_zero(m);
	+ return (NULL);
	+ }
	+ } else {
	+ l2pg = PHYS_TO_VM_PAGE(tl1 & ~ATTR_MASK);
	+ l2pg->ref_count++;
	+ }
	+ }
	} else {
	- l1 = (pd_entry_t *)PHYS_TO_DMAP(tl0 & ~ATTR_MASK);
	- l1 = &l1[l1index & Ln_ADDR_MASK];
	+ l1 = &pmap->pm_l0[l1index & STAGE2_L1_ADDR_MASK];
	tl1 = pmap_load(l1);
	if (tl1 == 0) {
	/* recurse for allocating page dir */
	@@ -2085,9 +2235,27 @@
	mtx_unlock_spin(&set->asid_set_mutex);
	}

	- m = PHYS_TO_VM_PAGE(pmap->pm_l0_paddr);
	- vm_page_unwire_noq(m);
	- vm_page_free_zero(m);
	+ if (pmap->pm_stage == PM_STAGE1) {
	+ m = PHYS_TO_VM_PAGE(pmap->pm_l0_paddr);
	+ vm_page_unwire_noq(m);
	+ vm_page_free_zero(m);
	+ } else {
	+ uint64_t i, page_cnt;
	+ vm_paddr_t pa;
	+
	+ if (pa_range_bits < L0_SHIFT)
	+ page_cnt = 1;
	+ else
	+ page_cnt = 1 << (pa_range_bits - L0_SHIFT);
	+
	+ pa = DMAP_TO_PHYS((vm_offset_t)pmap->pm_l0);
	+ for (i = 0; i < page_cnt; i++) {
	+ m = PHYS_TO_VM_PAGE(pa);
	+ vm_page_unwire_noq(m);
	+ vm_page_free_zero(m);
	+ pa += PAGE_SIZE;
	+ }
	+ }
	}

	static int
	@@ -2456,7 +2624,7 @@
	vm_page_t m;

	mtx_lock(&pv_chunks_mutex);
	- TAILQ_REMOVE(&pv_chunks, pc, pc_lru);
	+ TAILQ_REMOVE(&pv_chunks, pc, pc_lru);
	mtx_unlock(&pv_chunks_mutex);
	PV_STAT(atomic_subtract_int(&pv_entry_spare, _NPCPV));
	PV_STAT(atomic_subtract_int(&pc_chunk_count, 1));
	@@ -2942,7 +3110,7 @@
	* released. Otherwise, a concurrent
	* pmap_remove_all() on a physical page
	* could return while a stale TLB entry
	- * still provides access to that page.
	+ * still provides access to that page.
	*/
	if (va != eva) {
	pmap_invalidate_range(pmap, va,
	@@ -3003,12 +3171,14 @@
	if (pmap->pm_stats.resident_count == 0)
	break;

	- l0 = pmap_l0(pmap, sva);
	- if (pmap_load(l0) == 0) {
	- va_next = (sva + L0_SIZE) & ~L0_OFFSET;
	- if (va_next < sva)
	- va_next = eva;
	- continue;
	+ if (pmap->pm_stage == PM_STAGE1) {
	+ l0 = pmap_l0(pmap, sva);
	+ if (pmap_load(l0) == 0) {
	+ va_next = (sva + L0_SIZE) & ~L0_OFFSET;
	+ if (va_next < sva)
	+ va_next = eva;
	+ continue;
	+ }
	}

	va_next = (sva + L1_SIZE) & ~L1_OFFSET;
	@@ -3862,33 +4032,19 @@
	new_l3 \|= ATTR_S1_UXN;
	if (pmap != kernel_pmap)
	new_l3 \|= ATTR_S1_nG;
	- } else {
	- /*
	- * Clear the access flag on executable mappings, this will be
	- * set later when the page is accessed. The fault handler is
	- * required to invalidate the I-cache.
	- *
	- * TODO: Switch to the valid flag to allow hardware management
	- * of the access flag. Much of the pmap code assumes the
	- * valid flag is set and fails to destroy the old page tables
	- * correctly if it is clear.
	- */
	- if (prot & VM_PROT_EXECUTE)
	- new_l3 &= ~ATTR_AF;
	- }
	- if ((m->oflags & VPO_UNMANAGED) == 0) {
	- new_l3 \|= ATTR_SW_MANAGED;
	- if ((prot & VM_PROT_WRITE) != 0) {
	- new_l3 \|= ATTR_SW_DBM;
	- if ((flags & VM_PROT_WRITE) == 0) {
	- if (pmap->pm_stage == PM_STAGE1)
	- new_l3 \|= ATTR_S1_AP(ATTR_S1_AP_RO);
	- else
	- new_l3 &=
	- ~ATTR_S2_S2AP(ATTR_S2_S2AP_WRITE);
	+ if ((m->oflags & VPO_UNMANAGED) == 0) {
	+ new_l3 \|= ATTR_SW_MANAGED;
	+ if ((prot & VM_PROT_WRITE) != 0) {
	+ new_l3 \|= ATTR_SW_DBM;
	+ if ((flags & VM_PROT_WRITE) == 0)
	+ new_l3 \|= ATTR_S1_AP(ATTR_S1_AP_RO);
	}
	}
	+ } else {
	+ new_l3 = (pd_entry_t)(pa \| ATTR_ST2_DEFAULT \| L3_PAGE);
	}
	+ if ((flags & PMAP_ENTER_WIRED) != 0)
	+ new_l3 \|= ATTR_SW_WIRED;

	CTR2(KTR_PMAP, "pmap_enter: %.16lx -> %.16lx", va, pa);

	@@ -3942,6 +4098,7 @@
	}
	/* We need to allocate an L3 table. */
	}
	+
	if (va < VM_MAXUSER_ADDRESS) {
	nosleep = (flags & PMAP_ENTER_NOSLEEP) != 0;

	@@ -4776,7 +4933,7 @@
	pmap_abort_ptp(dst_pmap, addr, dstmpte);
	goto out;
	}
	- /* Have we copied all of the valid mappings? */
	+ /* Have we copied all of the valid mappings? */
	if (dstmpte->ref_count >= srcmpte->ref_count)
	break;
	}
	@@ -5076,7 +5233,7 @@
	switch(lvl) {
	case 1:
	pte = pmap_l1_to_l2(pde, pv->pv_va);
	- tpte = pmap_load(pte);
	+ tpte = pmap_load(pte);
	KASSERT((tpte & ATTR_DESCR_MASK) ==
	L2_BLOCK,
	("Attempting to remove an invalid "
	Index: sys/arm64/include/armreg.h
	===================================================================
	--- sys/arm64/include/armreg.h
	+++ sys/arm64/include/armreg.h
	@@ -209,7 +209,7 @@
	#define ISS_DATA_DFSC_TLB_CONFLICT (0x30 << 0)
	#define ESR_ELx_IL (0x01 << 25)
	#define ESR_ELx_EC_SHIFT 26
	-#define ESR_ELx_EC_MASK (0x3f << 26)
	+#define ESR_ELx_EC_MASK (0x3f << ESR_ELx_EC_SHIFT)
	#define ESR_ELx_EXCEPTION(esr) (((esr) & ESR_ELx_EC_MASK) >> ESR_ELx_EC_SHIFT)
	#define EXCP_UNKNOWN 0x00 /* Unkwn exception */
	#define EXCP_TRAP_WFI_WFE 0x01 /* Trapped WFI or WFE */
	@@ -220,10 +220,10 @@
	#define EXCP_HVC 0x16 /* HVC trap */
	#define EXCP_MSR 0x18 /* MSR/MRS trap */
	#define EXCP_INSN_ABORT_L 0x20 /* Instruction abort, from lower EL */
	-#define EXCP_INSN_ABORT 0x21 /* Instruction abort, from same EL */
	+#define EXCP_INSN_ABORT 0x21 /* Instruction abort, from same EL */
	#define EXCP_PC_ALIGN 0x22 /* PC alignment fault */
	#define EXCP_DATA_ABORT_L 0x24 /* Data abort, from lower EL */
	-#define EXCP_DATA_ABORT 0x25 /* Data abort, from same EL */
	+#define EXCP_DATA_ABORT 0x25 /* Data abort, from same EL */
	#define EXCP_SP_ALIGN 0x26 /* SP slignment fault */
	#define EXCP_TRAP_FP 0x2c /* Trapped FP exception */
	#define EXCP_SERROR 0x2f /* SError interrupt */
	Index: sys/arm64/include/bitops.h
	===================================================================
	--- /dev/null
	+++ sys/arm64/include/bitops.h
	@@ -0,0 +1,54 @@
	+/*
	+ * Copyright (C) TODO
	+ * All rights reserved.
	+ *
	+ * Redistribution and use in source and binary forms, with or without
	+ * modification, are permitted provided that the following conditions
	+ * are met:
	+ * 1. Redistributions of source code must retain the above copyright
	+ * notice, this list of conditions and the following disclaimer.
	+ * 2. Redistributions in binary form must reproduce the above copyright
	+ * notice, this list of conditions and the following disclaimer in the
	+ * documentation and/or other materials provided with the distribution.
	+ *
	+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
	+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
	+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	+ * SUCH DAMAGE.
	+ */
	+
	+#ifndef _ARM_BITOPS_H_
	+#define _ARM_BITOPS_H_
	+
	+#include <sys/bitstring.h>
	+
	+#define for_each_set_bit(bit, addr, size) \
	+ for (bit_ffs((bitstr_t )(addr), (size), (int )&(bit)); \
	+ (bit) != -1; \
	+ bit_ffs_at((bitstr_t )(addr), (bit) + 1, (size), (int )&(bit)))
	+
	+/* same as for_each_set_bit() but use bit as value to start with */
	+#define for_each_set_bit_from(bit, addr, size) \
	+ for (bit_ffs_at((bitstr_t )(addr), (bit), (size), (int )&(bit)); \
	+ (bit) != -1; \
	+ bit_ffs_at((bitstr_t )(addr), (bit) + 1, (size), (int )&(bit)))
	+
	+#define for_each_clear_bit(bit, addr, size) \
	+ for (bit_ffc((bitstr_t )(addr), (size), (int )&(bit)); \
	+ (bit) != -1; \
	+ bit_ffc_at((bitstr_t )(addr), (bit) + 1, (size), (int )&(bit)))
	+
	+/* same as for_each_clear_bit() but use bit as value to start with */
	+#define for_each_clear_bit_from(bit, addr, size) \
	+ for (bit_ffc_at((bitstr_t )(addr), (bit), (size), (int )&(bit)); \
	+ (bit) != -1; \
	+ bit_ffc_at((bitstr_t )(addr), (bit) + 1, (size), (int )&(bit)))
	+
	+#endif /* _ARM_BITOPS_H_ */
	Index: sys/arm64/include/cpu.h
	===================================================================
	--- sys/arm64/include/cpu.h
	+++ sys/arm64/include/cpu.h
	@@ -115,6 +115,7 @@

	#define CPU_IMPL_TO_MIDR(val) (((val) & 0xff) << 24)
	#define CPU_PART_TO_MIDR(val) (((val) & 0xfff) << 4)
	+#define CPU_ARCH_TO_MIDR(val) (((val) & 0xf) << 16)
	#define CPU_VAR_TO_MIDR(val) (((val) & 0xf) << 20)
	#define CPU_REV_TO_MIDR(val) (((val) & 0xf) << 0)

	Index: sys/arm64/include/hypervisor.h
	===================================================================
	--- sys/arm64/include/hypervisor.h
	+++ sys/arm64/include/hypervisor.h
	@@ -182,4 +182,35 @@
	#define VTTBR_VMID_SHIFT 48
	#define VTTBR_HOST 0x0000000000000000

	+/* VTCR_EL2 - Virtualization Translation Control Register */
	+#define VTCR_EL2_RES1 (0x1 << 31)
	+#define VTCR_EL2_T0SZ_MASK 0x3f
	+#define VTCR_EL2_SL0_SHIFT 6
	+#define VTCR_EL2_SL0_4K_LVL2 (0x0 << VTCR_EL2_SL0_SHIFT)
	+#define VTCR_EL2_SL0_4K_LVL1 (0x1 << VTCR_EL2_SL0_SHIFT)
	+#define VTCR_EL2_SL0_4K_LVL0 (0x2 << VTCR_EL2_SL0_SHIFT)
	+#define VTCR_EL2_IRGN0_SHIFT 8
	+#define VTCR_EL2_IRGN0_WBWA (0x1 << VTCR_EL2_IRGN0_SHIFT)
	+#define VTCR_EL2_ORGN0_SHIFT 10
	+#define VTCR_EL2_ORGN0_WBWA (0x1 << VTCR_EL2_ORGN0_SHIFT)
	+#define VTCR_EL2_SH0_SHIFT 12
	+#define VTCR_EL2_SH0_NS (0x0 << VTCR_EL2_SH0_SHIFT)
	+#define VTCR_EL2_SH0_OS (0x2 << VTCR_EL2_SH0_SHIFT)
	+#define VTCR_EL2_SH0_IS (0x3 << VTCR_EL2_SH0_SHIFT)
	+#define VTCR_EL2_TG0_SHIFT 14
	+#define VTCR_EL2_TG0_4K (0x0 << VTCR_EL2_TG0_SHIFT)
	+#define VTCR_EL2_TG0_64K (0x1 << VTCR_EL2_TG0_SHIFT)
	+#define VTCR_EL2_TG0_16K (0x2 << VTCR_EL2_TG0_SHIFT)
	+#define VTCR_EL2_PS_SHIFT 16
	+#define VTCR_EL2_PS_32BIT (0x0 << VTCR_EL2_PS_SHIFT)
	+#define VTCR_EL2_PS_36BIT (0x1 << VTCR_EL2_PS_SHIFT)
	+#define VTCR_EL2_PS_40BIT (0x2 << VTCR_EL2_PS_SHIFT)
	+#define VTCR_EL2_PS_42BIT (0x3 << VTCR_EL2_PS_SHIFT)
	+#define VTCR_EL2_PS_44BIT (0x4 << VTCR_EL2_PS_SHIFT)
	+#define VTCR_EL2_PS_48BIT (0x5 << VTCR_EL2_PS_SHIFT)
	+
	+/* HPFAR_EL2 - Hypervisor IPA Fault Address Register */
	+#define HPFAR_EL2_FIPA_SHIFT 4
	+#define HPFAR_EL2_FIPA_MASK 0xfffffffff0
	+
	#endif /* !_MACHINE_HYPERVISOR_H_ */
	Index: sys/arm64/include/pcpu.h
	===================================================================
	--- sys/arm64/include/pcpu.h
	+++ sys/arm64/include/pcpu.h
	@@ -43,6 +43,7 @@
	u_int pc_acpi_id; /* ACPI CPU id */ \
	u_int pc_midr; /* stored MIDR value */ \
	uint64_t pc_clock; \
	+ void *pc_vcpu; \
	pcpu_bp_harden pc_bp_harden; \
	pcpu_ssbd pc_ssbd; \
	struct pmap *pc_curpmap; \
	Index: sys/arm64/include/pmap.h
	===================================================================
	--- sys/arm64/include/pmap.h
	+++ sys/arm64/include/pmap.h
	@@ -188,6 +188,7 @@
	pd_entry_t , pt_entry_t );

	int pmap_fault(pmap_t, uint64_t, uint64_t);
	+int pmap_pinit_type(pmap_t, enum pmap_stage);

	/* System MMU (SMMU). */
	int pmap_senter(pmap_t pmap, vm_offset_t va, vm_paddr_t pa, vm_prot_t prot,
	Index: sys/arm64/include/pte.h
	===================================================================
	--- sys/arm64/include/pte.h
	+++ sys/arm64/include/pte.h
	@@ -99,6 +99,35 @@
	#define ATTR_DESCR_TYPE_TABLE 2
	#define ATTR_DESCR_TYPE_PAGE 2
	#define ATTR_DESCR_TYPE_BLOCK 0
	+/* Stage 2 translation Block and Page attributes */
	+#define ATTR_ST2_AF ATTR_AF
	+#define ATTR_ST2_SH(x) ATTR_SH(x)
	+#define ATTR_ST2_SH_MASK ATTR_SH_MASK
	+#define ATTR_ST2_SH_NS ATTR_SH_NS /* Non-shareable */
	+#define ATTR_ST2_SH_OS ATTR_SH_OS /* Outer-shareable */
	+#define ATTR_ST2_SH_IS ATTR_SH_IS /* Inner-shareable */
	+#define ATTR_ST2_S2AP(x) ((x) << 6) /* Data access permissions */
	+#define ATTR_ST2_S2AP_NONE (0 << 1)
	+#define ATTR_ST2_S2AP_R0 (1 << 0)
	+#define ATTR_ST2_S2AP_W0 (1 << 1)
	+#define ATTR_ST2_S2AP_RW (3 << 0)
	+#define ATTR_ST2_MEMATTR(x) ((x) << 2) /* Memory attributes */
	+#define ATTR_ST2_MEM_DEV (0 << 2) /* Device memory */
	+#define ATTR_ST2_MEM_DEV_nGnRnE (0 << 0)
	+#define ATTR_ST2_MEM_DEV_nGnRE (1 << 0)
	+#define ATTR_ST2_MEM_DEV_nGRE (1 << 1)
	+#define ATTR_ST2_MEM_DEV_GRE (3 << 0)
	+#define ATTR_ST2_MEM_ONC (1 << 2) /* Outer Non-cacheable */
	+#define ATTR_ST2_MEM_OWT (1 << 2) /* Outer Write-Through Cacheable */
	+#define ATTR_ST2_MEM_OWB (3 << 2) /* Outer Write-Back Cacheable */
	+#define ATTR_ST2_MEM_INC (1 << 0) /* Inner Non-cacheable */
	+#define ATTR_ST2_MEM_IWT (1 << 1) /* Inner Write-Through Cacheable */
	+#define ATTR_ST2_MEM_IWB (3 << 0) /* Inner Write-Back Cacheable */
	+
	+#define ATTR_ST2_DEFAULT (ATTR_ST2_AF \| ATTR_ST2_SH(ATTR_ST2_SH_IS) \| \
	+ ATTR_ST2_S2AP(ATTR_ST2_S2AP_RW) \| \
	+ ATTR_ST2_MEMATTR(ATTR_ST2_MEM_OWB \| ATTR_ST2_MEM_IWB))
	+

	/* Level 0 table, 512GiB per entry */
	#define L0_SHIFT 39
	Index: sys/arm64/include/vmm.h
	===================================================================
	--- /dev/null
	+++ sys/arm64/include/vmm.h
	@@ -0,0 +1,428 @@
	+/*
	+ * Copyright (C) 2015 Mihai Carabas <mihai.carabas@gmail.com>
	+ * All rights reserved.
	+ *
	+ * Redistribution and use in source and binary forms, with or without
	+ * modification, are permitted provided that the following conditions
	+ * are met:
	+ * 1. Redistributions of source code must retain the above copyright
	+ * notice, this list of conditions and the following disclaimer.
	+ * 2. Redistributions in binary form must reproduce the above copyright
	+ * notice, this list of conditions and the following disclaimer in the
	+ * documentation and/or other materials provided with the distribution.
	+ *
	+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
	+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
	+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	+ * SUCH DAMAGE.
	+ */
	+
	+#ifndef _VMM_H_
	+#define _VMM_H_
	+
	+#include <sys/param.h>
	+#include <vm/vm.h>
	+#include <vm/pmap.h>
	+
	+#include "pte.h"
	+#include "pmap.h"
	+
	+enum vm_suspend_how {
	+ VM_SUSPEND_NONE,
	+ VM_SUSPEND_RESET,
	+ VM_SUSPEND_POWEROFF,
	+ VM_SUSPEND_HALT,
	+ VM_SUSPEND_TRIPLEFAULT,
	+ VM_SUSPEND_LAST
	+};
	+
	+/*
	+ * Identifiers for architecturally defined registers.
	+ */
	+enum vm_reg_name {
	+ VM_REG_GUEST_X0,
	+ VM_REG_GUEST_X1,
	+ VM_REG_GUEST_X2,
	+ VM_REG_GUEST_X3,
	+ VM_REG_GUEST_X4,
	+ VM_REG_GUEST_X5,
	+ VM_REG_GUEST_X6,
	+ VM_REG_GUEST_X7,
	+ VM_REG_GUEST_X8,
	+ VM_REG_GUEST_X9,
	+ VM_REG_GUEST_X10,
	+ VM_REG_GUEST_X11,
	+ VM_REG_GUEST_X12,
	+ VM_REG_GUEST_X13,
	+ VM_REG_GUEST_X14,
	+ VM_REG_GUEST_X15,
	+ VM_REG_GUEST_X16,
	+ VM_REG_GUEST_X17,
	+ VM_REG_GUEST_X18,
	+ VM_REG_GUEST_X19,
	+ VM_REG_GUEST_X20,
	+ VM_REG_GUEST_X21,
	+ VM_REG_GUEST_X22,
	+ VM_REG_GUEST_X23,
	+ VM_REG_GUEST_X24,
	+ VM_REG_GUEST_X25,
	+ VM_REG_GUEST_X26,
	+ VM_REG_GUEST_X27,
	+ VM_REG_GUEST_X28,
	+ VM_REG_GUEST_X29,
	+ VM_REG_GUEST_LR,
	+ VM_REG_GUEST_SP,
	+ VM_REG_GUEST_ELR,
	+ VM_REG_GUEST_SPSR,
	+ VM_REG_ELR_EL2,
	+ VM_REG_LAST
	+};
	+
	+#define VM_INTINFO_VECTOR(info) ((info) & 0xff)
	+#define VM_INTINFO_DEL_ERRCODE 0x800
	+#define VM_INTINFO_RSVD 0x7ffff000
	+#define VM_INTINFO_VALID 0x80000000
	+#define VM_INTINFO_TYPE 0x700
	+#define VM_INTINFO_HWINTR (0 << 8)
	+#define VM_INTINFO_NMI (2 << 8)
	+#define VM_INTINFO_HWEXCEPTION (3 << 8)
	+#define VM_INTINFO_SWINTR (4 << 8)
	+
	+#define VM_GUEST_BASE_IPA 0x80000000UL /* Guest kernel start ipa */
	+
	+#ifdef _KERNEL
	+
	+#define VM_MAX_NAMELEN 32
	+
	+struct vm;
	+struct vm_exception;
	+struct vm_memory_segment;
	+struct vm_exit;
	+struct vm_run;
	+struct vm_object;
	+struct pmap;
	+struct hypctx;
	+
	+typedef int (*vmm_init_func_t)(int ipinum);
	+typedef int (*vmm_cleanup_func_t)(void);
	+typedef void (*vmm_resume_func_t)(void);
	+typedef void * (vmi_init_func_t)(struct vm vm);
	+typedef int (vmi_run_func_t)(void vmi, int vcpu, register_t rip,
	+ struct pmap pmap, void rendezvous_cookie,
	+ void *suspend_cookie);
	+typedef void (vmi_cleanup_func_t)(void vmi);
	+typedef void (vmi_mmap_set_func_t)(void arg, vm_offset_t va,
	+ vm_offset_t pa, size_t len,
	+ vm_prot_t prot);
	+typedef vm_paddr_t (vmi_mmap_get_func_t)(void arg, vm_offset_t va);
	+typedef int (vmi_get_register_t)(void vmi, int vcpu, int num,
	+ uint64_t *retval);
	+typedef int (vmi_set_register_t)(void vmi, int vcpu, int num,
	+ uint64_t val);
	+typedef int (vmi_get_cap_t)(void vmi, int vcpu, int num, int *retval);
	+typedef int (vmi_set_cap_t)(void vmi, int vcpu, int num, int val);
	+typedef struct vmspace * (*vmi_vmspace_alloc)(vm_offset_t min, vm_offset_t max);
	+typedef void (vmi_vmspace_free)(struct vmspace vmspace);
	+typedef struct vlapic * (vmi_vlapic_init)(void vmi, int vcpu);
	+typedef void (vmi_vlapic_cleanup)(void vmi, struct vlapic *vlapic);
	+
	+struct vmm_ops {
	+ /* Module-wide functions */
	+ vmm_init_func_t init;
	+ vmm_cleanup_func_t cleanup;
	+ vmm_resume_func_t resume;
	+ /* VM specific functions */
	+ vmi_init_func_t vminit;
	+ vmi_run_func_t vmrun;
	+ vmi_cleanup_func_t vmcleanup;
	+ vmi_mmap_set_func_t vmmapset;
	+ vmi_mmap_get_func_t vmmapget;
	+ vmi_get_register_t vmgetreg;
	+ vmi_set_register_t vmsetreg;
	+ vmi_get_cap_t vmgetcap;
	+ vmi_set_cap_t vmsetcap;
	+};
	+
	+extern struct vmm_ops vmm_ops_arm;
	+
	+int vm_create(const char name, struct vm *retvm);
	+void vm_destroy(struct vm *vm);
	+const char vm_name(struct vm vm);
	+int vm_malloc(struct vm *vm, uint64_t gpa, size_t len);
	+uint64_t vm_gpa2hpa(struct vm *vm, uint64_t gpa, size_t size);
	+int vm_gpabase2memseg(struct vm *vm, uint64_t gpabase,
	+ struct vm_memory_segment *seg);
	+boolean_t vm_mem_allocated(struct vm *vm, uint64_t gpa);
	+int vm_get_register(struct vm vm, int vcpu, int reg, uint64_t retval);
	+int vm_set_register(struct vm *vm, int vcpu, int reg, uint64_t val);
	+int vm_run(struct vm vm, struct vm_run vmrun);
	+void* vm_get_cookie(struct vm *vm);
	+uint16_t vm_get_maxcpus(struct vm *vm);
	+int vm_get_capability(struct vm vm, int vcpu, int type, int val);
	+int vm_set_capability(struct vm *vm, int vcpu, int type, int val);
	+int vm_activate_cpu(struct vm *vm, int vcpu);
	+int vm_attach_vgic(struct vm *vm, uint64_t dist_start, size_t dist_size,
	+ uint64_t redist_start, size_t redist_size);
	+int vm_assert_irq(struct vm *vm, uint32_t irq);
	+int vm_deassert_irq(struct vm *vm, uint32_t irq);
	+struct vm_exit vm_exitinfo(struct vm vm, int vcpuid);
	+void vm_exit_suspended(struct vm *vm, int vcpuid, uint64_t rip);
	+void vm_exit_rendezvous(struct vm *vm, int vcpuid, uint64_t rip);
	+void vm_exit_astpending(struct vm *vm, int vcpuid, uint64_t rip);
	+
	+#ifdef _SYS__CPUSET_H_
	+/*
	+ * Rendezvous all vcpus specified in 'dest' and execute 'func(arg)'.
	+ * The rendezvous 'func(arg)' is not allowed to do anything that will
	+ * cause the thread to be put to sleep.
	+ *
	+ * If the rendezvous is being initiated from a vcpu context then the
	+ * 'vcpuid' must refer to that vcpu, otherwise it should be set to -1.
	+ *
	+ * The caller cannot hold any locks when initiating the rendezvous.
	+ *
	+ * The implementation of this API may cause vcpus other than those specified
	+ * by 'dest' to be stalled. The caller should not rely on any vcpus making
	+ * forward progress when the rendezvous is in progress.
	+ */
	+typedef void (vm_rendezvous_func_t)(struct vm vm, int vcpuid, void *arg);
	+void vm_smp_rendezvous(struct vm *vm, int vcpuid, cpuset_t dest,
	+ vm_rendezvous_func_t func, void *arg);
	+cpuset_t vm_active_cpus(struct vm *vm);
	+cpuset_t vm_suspended_cpus(struct vm *vm);
	+#endif /* _SYS__CPUSET_H_ */
	+
	+extern uint64_t hypmode_enabled;
	+static __inline bool
	+virt_enabled()
	+{
	+ return (hypmode_enabled != 0);
	+}
	+
	+static __inline int
	+vcpu_rendezvous_pending(void *rendezvous_cookie)
	+{
	+
	+ return ((uintptr_t )rendezvous_cookie != 0);
	+}
	+
	+static __inline int
	+vcpu_suspended(void *suspend_cookie)
	+{
	+
	+ return ((int )suspend_cookie);
	+}
	+
	+enum vcpu_state {
	+ VCPU_IDLE,
	+ VCPU_FROZEN,
	+ VCPU_RUNNING,
	+ VCPU_SLEEPING,
	+};
	+
	+int vcpu_set_state(struct vm *vm, int vcpu, enum vcpu_state state,
	+ bool from_idle);
	+enum vcpu_state vcpu_get_state(struct vm vm, int vcpu, int hostcpu);
	+
	+static int __inline
	+vcpu_is_running(struct vm vm, int vcpu, int hostcpu)
	+{
	+ return (vcpu_get_state(vm, vcpu, hostcpu) == VCPU_RUNNING);
	+}
	+
	+#ifdef _SYS_PROC_H_
	+static int __inline
	+vcpu_should_yield(struct vm *vm, int vcpu)
	+{
	+
	+ if (curthread->td_flags & (TDF_ASTPENDING \| TDF_NEEDRESCHED))
	+ return (1);
	+ else if (curthread->td_owepreempt)
	+ return (1);
	+ else
	+ return (0);
	+}
	+#endif
	+
	+void vcpu_stats(struct vm vm, int vcpu);
	+void vcpu_notify_event(struct vm *vm, int vcpuid, bool lapic_intr);
	+
	+/*
	+ * This function is called after a VM-exit that occurred during exception or
	+ * interrupt delivery through the IDT. The format of 'intinfo' is described
	+ * in Figure 15-1, "EXITINTINFO for All Intercepts", APM, Vol 2.
	+ *
	+ * If a VM-exit handler completes the event delivery successfully then it
	+ * should call vm_exit_intinfo() to extinguish the pending event. For e.g.,
	+ * if the task switch emulation is triggered via a task gate then it should
	+ * call this function with 'intinfo=0' to indicate that the external event
	+ * is not pending anymore.
	+ *
	+ * Return value is 0 on success and non-zero on failure.
	+ */
	+int vm_exit_intinfo(struct vm *vm, int vcpuid, uint64_t intinfo);
	+
	+/*
	+ * This function is called before every VM-entry to retrieve a pending
	+ * event that should be injected into the guest. This function combines
	+ * nested events into a double or triple fault.
	+ *
	+ * Returns 0 if there are no events that need to be injected into the guest
	+ * and non-zero otherwise.
	+ */
	+int vm_entry_intinfo(struct vm vm, int vcpuid, uint64_t info);
	+
	+int vm_get_intinfo(struct vm vm, int vcpuid, uint64_t info1, uint64_t *info2);
	+
	+enum vm_reg_name vm_segment_name(int seg_encoding);
	+
	+struct vm_copyinfo {
	+ uint64_t gpa;
	+ size_t len;
	+ void *hva;
	+ void *cookie;
	+};
	+
	+int vcpu_trace_exceptions(struct vm *vm, int vcpuid);
	+#endif /* _KERNEL */
	+
	+#define VM_MAXCPU 1
	+
	+#define VM_DIR_READ 0
	+#define VM_DIR_WRITE 1
	+
	+struct vie {
	+ uint8_t access_size:4, sign_extend:1, dir:1, unused:2;
	+ enum vm_reg_name reg;
	+};
	+
	+struct vre {
	+ uint32_t inst_syndrome;
	+ uint8_t dir:1, unused:7;
	+ enum vm_reg_name reg;
	+};
	+
	+/*
	+ * Identifiers for optional vmm capabilities
	+ */
	+enum vm_cap_type {
	+ VM_CAP_HALT_EXIT,
	+ VM_CAP_MTRAP_EXIT,
	+ VM_CAP_PAUSE_EXIT,
	+ VM_CAP_UNRESTRICTED_GUEST,
	+ VM_CAP_MAX
	+};
	+enum vm_exitcode {
	+ VM_EXITCODE_BOGUS,
	+ VM_EXITCODE_INST_EMUL,
	+ VM_EXITCODE_REG_EMUL,
	+ VM_EXITCODE_HVC,
	+ VM_EXITCODE_SUSPENDED,
	+ VM_EXITCODE_HYP,
	+ VM_EXITCODE_WFI,
	+ VM_EXITCODE_MAX
	+};
	+
	+enum task_switch_reason {
	+ TSR_CALL,
	+ TSR_IRET,
	+ TSR_JMP,
	+ TSR_IDT_GATE, /* task gate in IDT */
	+};
	+
	+struct vm_task_switch {
	+ uint16_t tsssel; /* new TSS selector */
	+ int ext; /* task switch due to external event */
	+ uint32_t errcode;
	+ int errcode_valid; /* push 'errcode' on the new stack */
	+ enum task_switch_reason reason;
	+};
	+
	+struct vm_exit {
	+ enum vm_exitcode exitcode;
	+ int inst_length;
	+ uint64_t pc;
	+ union {
	+ /*
	+ * ARM specific payload.
	+ */
	+ struct {
	+ uint32_t exception_nr;
	+ uint32_t esr_el2; /* Exception Syndrome Register */
	+ uint64_t far_el2; /* Fault Address Register */
	+ uint64_t hpfar_el2; /* Hypervisor IPA Fault Address Register */
	+ } hyp;
	+ struct {
	+ struct vre vre;
	+ } reg_emul;
	+ struct {
	+ uint64_t gpa;
	+ int fault_type;
	+ } paging;
	+ struct {
	+ uint64_t gpa;
	+ struct vie vie;
	+ } inst_emul;
	+
	+ struct {
	+ struct hypctx *hypctx;
	+ } wfi;
	+ /*
	+ * VMX specific payload. Used when there is no "better"
	+ * exitcode to represent the VM-exit.
	+ */
	+ struct {
	+ int status; /* vmx inst status */
	+ /*
	+ * 'exit_reason' and 'exit_qualification' are valid
	+ * only if 'status' is zero.
	+ */
	+ uint32_t exit_reason;
	+ uint64_t exit_qualification;
	+ /*
	+ * 'inst_error' and 'inst_type' are valid
	+ * only if 'status' is non-zero.
	+ */
	+ int inst_type;
	+ int inst_error;
	+ } vmx;
	+ /*
	+ * SVM specific payload.
	+ */
	+ struct {
	+ uint64_t exitcode;
	+ uint64_t exitinfo1;
	+ uint64_t exitinfo2;
	+ } svm;
	+ struct {
	+#ifdef __aarch64__
	+#else
	+ uint32_t code; /* ecx value */
	+ uint64_t wval;
	+#endif
	+ } msr;
	+ struct {
	+ int vcpu;
	+ uint64_t rip;
	+ } spinup_ap;
	+ struct {
	+ uint64_t rflags;
	+ } hlt;
	+ struct {
	+ int vector;
	+ } ioapic_eoi;
	+ struct {
	+ enum vm_suspend_how how;
	+ } suspended;
	+ struct vm_task_switch task_switch;
	+ } u;
	+};
	+
	+#endif /* _VMM_H_ */
	Index: sys/arm64/include/vmm_dev.h
	===================================================================
	--- /dev/null
	+++ sys/arm64/include/vmm_dev.h
	@@ -0,0 +1,181 @@
	+/*
	+ * Copyright (C) 2015 Mihai Carabas <mihai.carabas@gmail.com>
	+ * All rights reserved.
	+ *
	+ * Redistribution and use in source and binary forms, with or without
	+ * modification, are permitted provided that the following conditions
	+ * are met:
	+ * 1. Redistributions of source code must retain the above copyright
	+ * notice, this list of conditions and the following disclaimer.
	+ * 2. Redistributions in binary form must reproduce the above copyright
	+ * notice, this list of conditions and the following disclaimer in the
	+ * documentation and/or other materials provided with the distribution.
	+ *
	+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
	+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
	+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	+ * SUCH DAMAGE.
	+ */
	+
	+#ifndef _VMM_DEV_H_
	+#define _VMM_DEV_H_
	+
	+#ifdef _KERNEL
	+void vmmdev_init(void);
	+int vmmdev_cleanup(void);
	+#endif
	+
	+struct vm_memory_segment {
	+ uint64_t gpa; /* in */
	+ size_t len;
	+ int wired;
	+};
	+
	+struct vm_register {
	+ int cpuid;
	+ int regnum; /* enum vm_reg_name */
	+ uint64_t regval;
	+};
	+
	+struct vm_run {
	+ int cpuid;
	+ uint64_t pc;
	+ struct vm_exit vm_exit;
	+
	+};
	+
	+struct vm_exception {
	+ int cpuid;
	+ int vector;
	+ uint32_t error_code;
	+ int error_code_valid;
	+ int restart_instruction;
	+};
	+
	+struct vm_capability {
	+ int cpuid;
	+ enum vm_cap_type captype;
	+ int capval;
	+ int allcpus;
	+};
	+
	+#define MAX_VM_STATS 64
	+struct vm_stats {
	+ int cpuid; /* in */
	+ int num_entries; /* out */
	+ struct timeval tv;
	+ uint64_t statbuf[MAX_VM_STATS];
	+};
	+struct vm_stat_desc {
	+ int index; /* in */
	+ char desc[128]; /* out */
	+};
	+
	+
	+struct vm_suspend {
	+ enum vm_suspend_how how;
	+};
	+
	+struct vm_gla2gpa {
	+ int vcpuid; /* inputs */
	+ int prot; /* PROT_READ or PROT_WRITE */
	+ uint64_t gla;
	+ int fault; /* outputs */
	+ uint64_t gpa;
	+};
	+
	+struct vm_activate_cpu {
	+ int vcpuid;
	+};
	+
	+struct vm_attach_vgic {
	+ uint64_t dist_start;
	+ size_t dist_size;
	+ uint64_t redist_start;
	+ size_t redist_size;
	+};
	+
	+struct vm_irq {
	+ uint32_t irq;
	+};
	+
	+#define VM_ACTIVE_CPUS 0
	+#define VM_SUSPENDED_CPUS 1
	+
	+enum {
	+ /* general routines */
	+ IOCNUM_ABIVERS = 0,
	+ IOCNUM_RUN = 1,
	+ IOCNUM_SET_CAPABILITY = 2,
	+ IOCNUM_GET_CAPABILITY = 3,
	+ IOCNUM_SUSPEND = 4,
	+ IOCNUM_REINIT = 5,
	+
	+ /* memory apis */
	+ IOCNUM_MAP_MEMORY = 10,
	+ IOCNUM_GET_MEMORY_SEG = 11,
	+ IOCNUM_GET_GPA_PMAP = 12,
	+ IOCNUM_GLA2GPA = 13,
	+
	+ /* register/state accessors */
	+ IOCNUM_SET_REGISTER = 20,
	+ IOCNUM_GET_REGISTER = 21,
	+
	+ /* statistics */
	+ IOCNUM_VM_STATS = 50,
	+ IOCNUM_VM_STAT_DESC = 51,
	+
	+ /* interrupt injection */
	+ IOCNUM_ASSERT_IRQ = 80,
	+ IOCNUM_DEASSERT_IRQ = 81,
	+
	+ /* vm_cpuset */
	+ IOCNUM_ACTIVATE_CPU = 90,
	+ IOCNUM_GET_CPUSET = 91,
	+
	+ /* vm_attach_vgic */
	+ IOCNUM_ATTACH_VGIC = 110,
	+};
	+
	+#define VM_RUN \
	+ _IOWR('v', IOCNUM_RUN, struct vm_run)
	+#define VM_SUSPEND \
	+ _IOW('v', IOCNUM_SUSPEND, struct vm_suspend)
	+#define VM_REINIT \
	+ _IO('v', IOCNUM_REINIT)
	+#define VM_MAP_MEMORY \
	+ _IOWR('v', IOCNUM_MAP_MEMORY, struct vm_memory_segment)
	+#define VM_GET_MEMORY_SEG \
	+ _IOWR('v', IOCNUM_GET_MEMORY_SEG, struct vm_memory_segment)
	+#define VM_SET_REGISTER \
	+ _IOW('v', IOCNUM_SET_REGISTER, struct vm_register)
	+#define VM_GET_REGISTER \
	+ _IOWR('v', IOCNUM_GET_REGISTER, struct vm_register)
	+#define VM_SET_CAPABILITY \
	+ _IOW('v', IOCNUM_SET_CAPABILITY, struct vm_capability)
	+#define VM_GET_CAPABILITY \
	+ _IOWR('v', IOCNUM_GET_CAPABILITY, struct vm_capability)
	+#define VM_STATS \
	+ _IOWR('v', IOCNUM_VM_STATS, struct vm_stats)
	+#define VM_STAT_DESC \
	+ _IOWR('v', IOCNUM_VM_STAT_DESC, struct vm_stat_desc)
	+#define VM_ASSERT_IRQ \
	+ _IOW('v', IOCNUM_ASSERT_IRQ, struct vm_irq)
	+#define VM_DEASSERT_IRQ \
	+ _IOW('v', IOCNUM_DEASSERT_IRQ, struct vm_irq)
	+#define VM_GLA2GPA \
	+ _IOWR('v', IOCNUM_GLA2GPA, struct vm_gla2gpa)
	+#define VM_ACTIVATE_CPU \
	+ _IOW('v', IOCNUM_ACTIVATE_CPU, struct vm_activate_cpu)
	+#define VM_GET_CPUS \
	+ _IOW('v', IOCNUM_GET_CPUSET, struct vm_cpuset)
	+#define VM_ATTACH_VGIC \
	+ _IOW('v', IOCNUM_ATTACH_VGIC, struct vm_attach_vgic)
	+#endif
	Index: sys/arm64/include/vmm_instruction_emul.h
	===================================================================
	--- /dev/null
	+++ sys/arm64/include/vmm_instruction_emul.h
	@@ -0,0 +1,72 @@
	+/*
	+ * Copyright (C) 2015 Mihai Carabas <mihai.carabas@gmail.com>
	+ * All rights reserved.
	+ *
	+ * Redistribution and use in source and binary forms, with or without
	+ * modification, are permitted provided that the following conditions
	+ * are met:
	+ * 1. Redistributions of source code must retain the above copyright
	+ * notice, this list of conditions and the following disclaimer.
	+ * 2. Redistributions in binary form must reproduce the above copyright
	+ * notice, this list of conditions and the following disclaimer in the
	+ * documentation and/or other materials provided with the distribution.
	+ *
	+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
	+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
	+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	+ * SUCH DAMAGE.
	+ */
	+
	+#ifndef _VMM_INSTRUCTION_EMUL_H_
	+#define _VMM_INSTRUCTION_EMUL_H_
	+
	+/*
	+ * Callback functions to read and write memory regions.
	+ */
	+typedef int (mem_region_read_t)(void vm, int cpuid, uint64_t gpa,
	+ uint64_t rval, int rsize, void arg);
	+typedef int (mem_region_write_t)(void vm, int cpuid, uint64_t gpa,
	+ uint64_t wval, int wsize, void *arg);
	+
	+/*
	+ * Callback functions to read and write registers.
	+ */
	+typedef int (reg_read_t)(void vm, int cpuid, uint64_t rval, void arg);
	+typedef int (reg_write_t)(void vm, int cpuid, uint64_t wval, void *arg);
	+
	+/*
	+ * Emulate the decoded 'vie' instruction when it contains a memory operation.
	+ *
	+ * The callbacks 'mrr' and 'mrw' emulate reads and writes to the memory region
	+ * containing 'gpa'. 'mrarg' is an opaque argument that is passed into the
	+ * callback functions.
	+ *
	+ * 'void vm' should be 'struct vm ' when called from kernel context and
	+ * 'struct vmctx *' when called from user context.
	+ *
	+ */
	+int vmm_emulate_instruction(void vm, int cpuid, uint64_t gpa, struct vie vie,
	+ mem_region_read_t mrr, mem_region_write_t mrw, void *mrarg);
	+
	+/*
	+ * Emulate the decoded 'vre' instruction when it contains a register access.
	+ *
	+ * The callbacks 'regread' and 'regwrite' emulate reads and writes to the
	+ * register from 'vie'. 'regarg' is an opaque argument that is passed into the
	+ * callback functions.
	+ *
	+ * 'void vm' should be 'struct vm ' when called from kernel context and
	+ * 'struct vmctx *' when called from user context.
	+ *
	+ */
	+int vmm_emulate_register(void vm, int vcpuid, struct vre vre, reg_read_t regread,
	+ reg_write_t regwrite, void *regarg);
	+
	+#endif /* _VMM_INSTRUCTION_EMUL_H_ */
	Index: sys/arm64/vmm/arm64.h
	===================================================================
	--- /dev/null
	+++ sys/arm64/vmm/arm64.h
	@@ -0,0 +1,135 @@
	+/*
	+ * Copyright (C) 2015 Mihai Carabas <mihai.carabas@gmail.com>
	+ * All rights reserved.
	+ *
	+ * Redistribution and use in source and binary forms, with or without
	+ * modification, are permitted provided that the following conditions
	+ * are met:
	+ * 1. Redistributions of source code must retain the above copyright
	+ * notice, this list of conditions and the following disclaimer.
	+ * 2. Redistributions in binary form must reproduce the above copyright
	+ * notice, this list of conditions and the following disclaimer in the
	+ * documentation and/or other materials provided with the distribution.
	+ *
	+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
	+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
	+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	+ * SUCH DAMAGE.
	+ */
	+#ifndef _VMM_ARM64_H_
	+#define _VMM_ARM64_H_
	+
	+#include <machine/reg.h>
	+#include <machine/vfp.h>
	+#include <machine/hypervisor.h>
	+#include <machine/pcpu.h>
	+
	+#include "mmu.h"
	+#include "io/vgic_v3.h"
	+#include "io/vtimer.h"
	+
	+struct hypctx {
	+ struct reg regs;
	+
	+ /* EL1 control registers */
	+ uint64_t actlr_el1; /* Auxiliary Control Register */
	+ uint64_t afsr0_el1; /* Auxiliary Fault Status Register 0 */
	+ uint64_t afsr1_el1; /* Auxiliary Fault Status Register 1 */
	+ uint64_t amair_el1; /* Auxiliary Memory Attribute Indirection Register */
	+ uint64_t contextidr_el1; /* Current Process Identifier */
	+ uint64_t cpacr_el1; /* Arhitectural Feature Access Control Register */
	+ uint64_t elr_el1; /* Exception Link Register */
	+ uint64_t esr_el1; /* Exception Syndrome Register */
	+ uint64_t far_el1; /* Fault Address Register */
	+ uint64_t fp; /* Frame Pointer */
	+ uint64_t mair_el1; /* Memory Attribute Indirection Register */
	+ uint64_t par_el1; /* Physical Address Register */
	+ uint64_t sctlr_el1; /* System Control Register */
	+ uint64_t sp_el0; /* Stack Pointer */
	+ uint64_t tcr_el1; /* Translation Control Register */
	+ uint64_t tpidr_el0; /* EL0 Software ID Register */
	+ uint64_t tpidrro_el0; /* Read-only Thread ID Register */
	+ uint64_t tpidr_el1; /* EL1 Software ID Register */
	+ uint64_t ttbr0_el1; /* Translation Table Base Register 0 */
	+ uint64_t ttbr1_el1; /* Translation Table Base Register 1 */
	+ uint64_t vbar_el1; /* Vector Base Address Register */
	+ uint32_t spsr_el1; /* Saved Program Status Register */
	+
	+ /* EL2 control registers */
	+ uint64_t cptr_el2; /* Architectural Feature Trap Register */
	+ uint64_t elr_el2; /* Exception Link Register */
	+ uint64_t hcr_el2; /* Hypervisor Configuration Register */
	+ uint64_t vpidr_el2; /* Virtualization Processor ID Register */
	+ uint64_t vmpidr_el2; /* Virtualization Multiprocessor ID Register */
	+ uint32_t spsr_el2; /* Saved Program Status Register */
	+
	+ uint32_t vcpu;
	+ struct hyp *hyp;
	+ struct {
	+ uint64_t esr_el2; /* Exception Syndrome Register */
	+ uint64_t far_el2; /* Fault Address Register */
	+ uint64_t hpfar_el2; /* Hypervisor IPA Fault Address Register */
	+ } exit_info;
	+
	+ struct vtimer_cpu vtimer_cpu;
	+ struct vgic_v3_cpu_if vgic_cpu_if;
	+ struct vgic_v3_redist vgic_redist;
	+#ifdef VFP
	+ struct vfpstate vfpstate;
	+#endif
	+};
	+
	+struct hyp {
	+ pmap_t stage2_map;
	+ struct hypctx ctx[VM_MAXCPU];
	+ struct vgic_mmio_region *vgic_mmio_regions;
	+ size_t vgic_mmio_regions_num;
	+ struct vgic_v3_dist vgic_dist;
	+ struct vm *vm;
	+ struct vtimer vtimer;
	+ uint64_t vmid_generation;
	+ uint64_t vttbr_el2;
	+ bool vgic_attached;
	+};
	+
	+uint64_t vmm_call_hyp(void *hyp_func_addr, ...);
	+void vmm_cleanup(void *hyp_stub_vectors);
	+uint64_t vmm_enter_guest(struct hypctx *hypctx);
	+uint64_t vmm_read_ich_vtr_el2(void);
	+uint64_t vmm_read_cnthctl_el2(void);
	+uint64_t vmm_read_tcr_el2(void);
	+
	+#define eprintf(fmt, ...) printf("%s:%d " fmt, __func__, __LINE__, ##__VA_ARGS__)
	+//#define eprintf(fmt, ...) do {} while(0)
	+
	+#define VMID_GENERATION_MASK ((1UL<<8) - 1)
	+#define build_vttbr(vmid, ptaddr) \
	+ ((((vmid) & VMID_GENERATION_MASK) << VTTBR_VMID_SHIFT) \| \
	+ (uint64_t)(ptaddr))
	+
	+#define MPIDR_SMP_MASK (0x3 << 30)
	+#define MPIDR_AFF1_LEVEL(x) (((x) >> 2) << 8)
	+#define MPIDR_AFF0_LEVEL(x) (((x) & 0x3) << 0)
	+
	+/*
	+ * Return true if the exception was caused by a translation fault in the stage 2
	+ * translation regime. The DFSC encoding for a translation fault has the format
	+ * 0b0001LL, where LL (bits [1:0]) represents the level where the fault occured
	+ * (page D7-2280 of the ARMv8 Architecture Manual).
	+ */
	+#define ISS_DATA_DFSC_TF(esr_iss) \
	+ (!((esr_iss) & 0b111000) && ((esr_iss) & 0b000100))
	+#define FAR_EL2_PAGE_OFFSET(x) ((x) & PAGE_MASK)
	+
	+#define DEBUG_ME 0
	+
	+#define arm64_get_active_vcpu() ((struct hypctx *)PCPU_GET(vcpu))
	+
	+#endif /* !_VMM_ARM64_H_ */
	Index: sys/arm64/vmm/arm64.c
	===================================================================
	--- /dev/null
	+++ sys/arm64/vmm/arm64.c
	@@ -0,0 +1,804 @@
	+/*
	+ * Copyright (C) 2015 Mihai Carabas <mihai.carabas@gmail.com>
	+ * All rights reserved.
	+ *
	+ * Redistribution and use in source and binary forms, with or without
	+ * modification, are permitted provided that the following conditions
	+ * are met:
	+ * 1. Redistributions of source code must retain the above copyright
	+ * notice, this list of conditions and the following disclaimer.
	+ * 2. Redistributions in binary form must reproduce the above copyright
	+ * notice, this list of conditions and the following disclaimer in the
	+ * documentation and/or other materials provided with the distribution.
	+ *
	+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
	+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
	+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	+ * SUCH DAMAGE.
	+ */
	+
	+#include <sys/cdefs.h>
	+#include <sys/param.h>
	+#include <sys/systm.h>
	+#include <sys/smp.h>
	+#include <sys/kernel.h>
	+#include <sys/malloc.h>
	+#include <sys/pcpu.h>
	+#include <sys/proc.h>
	+#include <sys/sysctl.h>
	+#include <sys/lock.h>
	+#include <sys/mutex.h>
	+
	+#include <vm/vm.h>
	+#include <vm/pmap.h>
	+#include <vm/vm_page.h>
	+#include <vm/vm_param.h>
	+
	+#include <machine/armreg.h>
	+#include <machine/vm.h>
	+#include <machine/cpufunc.h>
	+#include <machine/cpu.h>
	+#include <machine/vmm.h>
	+#include <machine/vmm_dev.h>
	+#include <machine/atomic.h>
	+#include <machine/hypervisor.h>
	+#include <machine/pmap.h>
	+
	+#include "mmu.h"
	+#include "arm64.h"
	+#include "hyp.h"
	+#include "reset.h"
	+#include "io/vgic_v3.h"
	+#include "io/vtimer.h"
	+
	+#define HANDLED 1
	+#define UNHANDLED 0
	+
	+#define UNUSED 0
	+
	+MALLOC_DEFINE(M_HYP, "ARM VMM HYP", "ARM VMM HYP");
	+
	+extern char hyp_init_vectors[];
	+extern char hyp_vectors[];
	+extern char hyp_code_start[];
	+extern char hyp_code_end[];
	+extern char hyp_stub_vectors[];
	+
	+char *stack[MAXCPU];
	+pmap_t hyp_pmap;
	+
	+static uint64_t vmid_generation = 0;
	+static struct mtx vmid_generation_mtx;
	+
	+static inline void
	+arm64_set_active_vcpu(struct hypctx *hypctx)
	+{
	+ PCPU_SET(vcpu, hypctx);
	+}
	+
	+static void arm64_set_vttbr(struct hyp *hyp)
	+{
	+ if (hyp->vmid_generation != 0 &&
	+ ((hyp->vmid_generation & ~VMID_GENERATION_MASK) !=
	+ (atomic_load_acq_64(&vmid_generation) & ~VMID_GENERATION_MASK)))
	+ goto out;
	+
	+ mtx_lock(&vmid_generation_mtx);
	+
	+ /* Another VCPU has change the VMID already */
	+ if (hyp->vmid_generation &&
	+ ((hyp->vmid_generation & ~VMID_GENERATION_MASK) !=
	+ (vmid_generation & ~VMID_GENERATION_MASK))) {
	+ mtx_unlock(&vmid_generation_mtx);
	+ goto out;
	+ }
	+
	+ vmid_generation++;
	+ if (!(vmid_generation & VMID_GENERATION_MASK))
	+ vmid_generation++;
	+
	+ hyp->vmid_generation = vmid_generation;
	+ mtx_unlock(&vmid_generation_mtx);
	+out:
	+ hyp->vttbr_el2 = build_vttbr(hyp->vmid_generation,
	+ vtophys(hyp->stage2_map->pm_l0));
	+}
	+
	+static void
	+arm_init_vectors(void *arg)
	+{
	+ char *stack_top;
	+ uint64_t tcr_el1, tcr_el2;
	+ uint32_t sctlr_el2;
	+ uint32_t vtcr_el2;
	+ uint64_t id_aa64mmfr0_el1;
	+ uint64_t pa_range_bits;
	+ register_t daif;
	+
	+ daif = intr_disable();
	+
	+ arm64_set_active_vcpu(NULL);
	+
	+ /*
	+ * Install the temporary vectors which will be responsible for
	+ * initializing the VMM when we next trap into EL2.
	+ *
	+ * x0: the exception vector table responsible for hypervisor
	+ * initialization on the next call.
	+ */
	+ vmm_call_hyp((void *)vtophys(hyp_init_vectors));
	+
	+ /* Create and map the hypervisor stack */
	+ stack_top = stack[PCPU_GET(cpuid)] + PAGE_SIZE;
	+
	+ /* Configure address translation at EL2 */
	+ tcr_el1 = READ_SPECIALREG(tcr_el1);
	+ tcr_el2 = TCR_EL2_RES1;
	+
	+ /* Set physical address size */
	+ id_aa64mmfr0_el1 = READ_SPECIALREG(id_aa64mmfr0_el1);
	+ pa_range_bits = ID_AA64MMFR0_PARange_VAL(id_aa64mmfr0_el1);
	+ tcr_el2 \|= (pa_range_bits & 0x7) << TCR_EL2_PS_SHIFT;
	+
	+ /* Use the same address translation attributes as the host */
	+ tcr_el2 \|= tcr_el1 & TCR_T0SZ_MASK;
	+ tcr_el2 \|= tcr_el1 & (0xff << TCR_IRGN0_SHIFT);
	+
	+ /*
	+ * Configure the system control register for EL2:
	+ *
	+ * SCTLR_EL2_M: MMU on
	+ * SCTLR_EL2_C: Data cacheability not affected
	+ * SCTLR_EL2_I: Instruction cacheability not affected
	+ * SCTLR_EL2_A: Instruction alignment check
	+ * SCTLR_EL2_SA: Stack pointer alignment check
	+ * SCTLR_EL2_WXN: Treat writable memory as execute never
	+ * ~SCTLR_EL2_EE: Data accesses are little-endian
	+ */
	+ sctlr_el2 = SCTLR_EL2_RES1;
	+ sctlr_el2 \|= SCTLR_EL2_M \| SCTLR_EL2_C \| SCTLR_EL2_I;
	+ sctlr_el2 \|= SCTLR_EL2_A \| SCTLR_EL2_SA;
	+ sctlr_el2 \|= SCTLR_EL2_WXN;
	+ sctlr_el2 &= ~SCTLR_EL2_EE;
	+
	+ /*
	+ * Configure the Stage 2 translation control register:
	+ *
	+ * VTCR_IRGN0_WBWA: Translation table walks access inner cacheable
	+ * normal memory
	+ * VTCR_ORGN0_WBWA: Translation table walks access outer cacheable
	+ * normal memory
	+ * VTCR_EL2_TG0_4K: Stage 2 uses 4K pages
	+ * VTCR_EL2_SL0_4K_LVL1: Stage 2 uses concatenated level 1 tables
	+ * VTCR_EL2_SH0_IS: Memory associated with Stage 2 walks is inner
	+ * shareable
	+ */
	+ vtcr_el2 = VTCR_EL2_RES1;
	+ vtcr_el2 = (pa_range_bits & 0x7) << VTCR_EL2_PS_SHIFT;
	+ vtcr_el2 \|= VTCR_EL2_IRGN0_WBWA \| VTCR_EL2_ORGN0_WBWA;
	+ vtcr_el2 \|= VTCR_EL2_TG0_4K;
	+ vtcr_el2 \|= VTCR_EL2_SH0_IS;
	+ if (pa_range_bits == ID_AA64MMFR0_PARange_1T) {
	+ /*
	+ * 40 bits of physical addresses, use concatenated level 1
	+ * tables
	+ */
	+ vtcr_el2 \|= 24 & VTCR_EL2_T0SZ_MASK;
	+ vtcr_el2 \|= VTCR_EL2_SL0_4K_LVL1;
	+ }
	+
	+ /* Special call to initialize EL2 */
	+ vmm_call_hyp((void *)vtophys(hyp_vectors), vtophys(hyp_pmap->pm_l0),
	+ ktohyp(stack_top), tcr_el2, sctlr_el2, vtcr_el2);
	+
	+ intr_restore(daif);
	+}
	+
	+static void
	+arm_cleanup_vectors(void *arg)
	+{
	+ register_t daif;
	+
	+ /*
	+ * vmm_cleanup() will disable the MMU. For the next few instructions,
	+ * before the hardware disables the MMU, one of the following is
	+ * possible:
	+ *
	+ * a. The instruction addresses are fetched with the MMU disabled,
	+ * and they must represent the actual physical addresses. This will work
	+ * because we call the vmm_cleanup() function by its physical address.
	+ *
	+ * b. The instruction addresses are fetched using the old translation
	+ * tables. This will work because we have an identity mapping in place
	+ * in the translation tables and vmm_cleanup() is called by its physical
	+ * address.
	+ */
	+ daif = intr_disable();
	+ vmm_call_hyp((void *)vtophys(vmm_cleanup), vtophys(hyp_stub_vectors));
	+ intr_restore(daif);
	+
	+ arm64_set_active_vcpu(NULL);
	+}
	+
	+static int
	+arm_init(int ipinum)
	+{
	+ size_t hyp_code_len;
	+ uint64_t ich_vtr_el2;
	+ uint64_t cnthctl_el2;
	+ int cpu;
	+ register_t daif;
	+
	+ if (!virt_enabled()) {
	+ printf("arm_init: Processor doesn't have support for virtualization.\n");
	+ return (ENXIO);
	+ }
	+
	+ mtx_init(&vmid_generation_mtx, "vmid_generation_mtx", NULL, MTX_DEF);
	+
	+ /* Create the mappings for the hypervisor translation table. */
	+ hyp_pmap = malloc(sizeof(*hyp_pmap), M_HYP, M_WAITOK \| M_ZERO);
	+ hypmap_init(hyp_pmap, PM_STAGE1);
	+ hyp_code_len = (size_t)hyp_code_end - (size_t)hyp_code_start;
	+ hypmap_map(hyp_pmap, (vm_offset_t)hyp_code_start, hyp_code_len, VM_PROT_EXECUTE);
	+
	+ /* We need an identity mapping for when we activate the MMU */
	+ hypmap_map_identity(hyp_pmap, (vm_offset_t)hyp_code_start, hyp_code_len,
	+ VM_PROT_EXECUTE);
	+
	+ /* Create a per-CPU hypervisor stack */
	+ CPU_FOREACH(cpu) {
	+ stack[cpu] = malloc(PAGE_SIZE, M_HYP, M_WAITOK \| M_ZERO);
	+ hypmap_map(hyp_pmap, (vm_offset_t)stack[cpu], PAGE_SIZE,
	+ VM_PROT_READ \| VM_PROT_WRITE);
	+ }
	+
	+
	+ smp_rendezvous(NULL, arm_init_vectors, NULL, NULL);
	+
	+ daif = intr_disable();
	+
	+ ich_vtr_el2 = vmm_call_hyp((void *)ktohyp(vmm_read_ich_vtr_el2));
	+ vgic_v3_init(ich_vtr_el2);
	+
	+ cnthctl_el2 = vmm_call_hyp((void *)ktohyp(vmm_read_cnthctl_el2));
	+ vtimer_init(cnthctl_el2);
	+
	+ intr_restore(daif);
	+
	+ return 0;
	+}
	+
	+static int
	+arm_cleanup(void)
	+{
	+ int cpu;
	+
	+ smp_rendezvous(NULL, arm_cleanup_vectors, NULL, NULL);
	+
	+ vtimer_cleanup();
	+
	+ hypmap_cleanup(hyp_pmap);
	+ free(hyp_pmap, M_HYP);
	+ for (cpu = 0; cpu < nitems(stack); cpu++)
	+ free(stack[cpu], M_HYP);
	+
	+ mtx_destroy(&vmid_generation_mtx);
	+
	+ return (0);
	+}
	+
	+static void *
	+arm_vminit(struct vm *vm)
	+{
	+ struct hyp *hyp;
	+ struct hypctx *hypctx;
	+ bool last_vcpu;
	+ int i;
	+
	+ hyp = malloc(sizeof(struct hyp), M_HYP, M_WAITOK \| M_ZERO);
	+ hyp->vm = vm;
	+ hyp->vgic_attached = false;
	+
	+ hyp->stage2_map = malloc(sizeof(*hyp->stage2_map),
	+ M_HYP, M_WAITOK \| M_ZERO);
	+ hypmap_init(hyp->stage2_map, PM_STAGE2);
	+ arm64_set_vttbr(hyp);
	+
	+ for (i = 0; i < VM_MAXCPU; i++) {
	+ hypctx = &hyp->ctx[i];
	+ hypctx->vcpu = i;
	+ hypctx->hyp = hyp;
	+
	+ reset_vm_el01_regs(hypctx);
	+ reset_vm_el2_regs(hypctx);
	+ }
	+
	+ vtimer_vminit(hyp);
	+ vgic_v3_vminit(hyp);
	+ for (i = 0; i < VM_MAXCPU; i++) {
	+ hypctx = &hyp->ctx[i];
	+ vtimer_cpuinit(hypctx);
	+ last_vcpu = (i == VM_MAXCPU - 1);
	+ vgic_v3_cpuinit(hypctx, last_vcpu);
	+ }
	+
	+ hypmap_map(hyp_pmap, (vm_offset_t)hyp, sizeof(struct hyp),
	+ VM_PROT_READ \| VM_PROT_WRITE);
	+
	+ return (hyp);
	+}
	+
	+static enum vm_reg_name
	+get_vm_reg_name(uint32_t reg_nr, uint32_t mode __attribute__((unused)))
	+{
	+ switch(reg_nr) {
	+ case 0:
	+ return VM_REG_GUEST_X0;
	+ case 1:
	+ return VM_REG_GUEST_X1;
	+ case 2:
	+ return VM_REG_GUEST_X2;
	+ case 3:
	+ return VM_REG_GUEST_X3;
	+ case 4:
	+ return VM_REG_GUEST_X4;
	+ case 5:
	+ return VM_REG_GUEST_X5;
	+ case 6:
	+ return VM_REG_GUEST_X6;
	+ case 7:
	+ return VM_REG_GUEST_X7;
	+ case 8:
	+ return VM_REG_GUEST_X8;
	+ case 9:
	+ return VM_REG_GUEST_X9;
	+ case 10:
	+ return VM_REG_GUEST_X10;
	+ case 11:
	+ return VM_REG_GUEST_X11;
	+ case 12:
	+ return VM_REG_GUEST_X12;
	+ case 13:
	+ return VM_REG_GUEST_X13;
	+ case 14:
	+ return VM_REG_GUEST_X14;
	+ case 15:
	+ return VM_REG_GUEST_X15;
	+ case 16:
	+ return VM_REG_GUEST_X16;
	+ case 17:
	+ return VM_REG_GUEST_X17;
	+ case 18:
	+ return VM_REG_GUEST_X18;
	+ case 19:
	+ return VM_REG_GUEST_X19;
	+ case 20:
	+ return VM_REG_GUEST_X20;
	+ case 21:
	+ return VM_REG_GUEST_X21;
	+ case 22:
	+ return VM_REG_GUEST_X22;
	+ case 23:
	+ return VM_REG_GUEST_X23;
	+ case 24:
	+ return VM_REG_GUEST_X24;
	+ case 25:
	+ return VM_REG_GUEST_X25;
	+ case 26:
	+ return VM_REG_GUEST_X26;
	+ case 27:
	+ return VM_REG_GUEST_X27;
	+ case 28:
	+ return VM_REG_GUEST_X28;
	+ case 29:
	+ return VM_REG_GUEST_X29;
	+ case 30:
	+ return VM_REG_GUEST_LR;
	+ case 31:
	+ return VM_REG_GUEST_SP;
	+ case 32:
	+ return VM_REG_GUEST_ELR;
	+ case 33:
	+ return VM_REG_GUEST_SPSR;
	+ case 34:
	+ return VM_REG_ELR_EL2;
	+ default:
	+ break;
	+ }
	+
	+ return (VM_REG_LAST);
	+}
	+
	+static inline void
	+arm64_print_hyp_regs(struct vm_exit *vme)
	+{
	+ printf("esr_el2: 0x%08x\n", vme->u.hyp.esr_el2);
	+ printf("far_el2: 0x%016lx\n", vme->u.hyp.far_el2);
	+ printf("hpfar_el2: 0x%016lx\n", vme->u.hyp.hpfar_el2);
	+}
	+
	+static void
	+arm64_gen_inst_emul_data(uint32_t esr_iss, struct vm_exit *vme_ret)
	+{
	+ struct vie *vie;
	+ uint32_t esr_sas, reg_num;
	+ uint64_t page_off;
	+
	+ /*
	+ * Get bits [47:12] of the IPA from HPFAR_EL2.
	+ * At this point the 'u.hyp' member will be replaced by 'u.inst_emul'.
	+ */
	+ vme_ret->u.inst_emul.gpa = \
	+ (vme_ret->u.hyp.hpfar_el2) >> HPFAR_EL2_FIPA_SHIFT;
	+ /* The IPA is the base address of a 4KB page, make bits [11:0] zero. */
	+ vme_ret->u.inst_emul.gpa = (vme_ret->u.inst_emul.gpa) << PAGE_SHIFT;
	+ /* Bits [11:0] are the same as bits [11:0] from the virtual address. */
	+ page_off = FAR_EL2_PAGE_OFFSET(vme_ret->u.hyp.far_el2);
	+ vme_ret->u.inst_emul.gpa = vme_ret->u.inst_emul.gpa + page_off;
	+
	+ esr_sas = (esr_iss & ISS_DATA_SAS_MASK) >> ISS_DATA_SAS_SHIFT;
	+ reg_num = (esr_iss & ISS_DATA_SRT_MASK) >> ISS_DATA_SRT_SHIFT;
	+
	+ vie = &vme_ret->u.inst_emul.vie;
	+ vie->access_size = 1 << esr_sas;
	+ vie->sign_extend = (esr_iss & ISS_DATA_SSE) ? 1 : 0;
	+ vie->dir = (esr_iss & ISS_DATA_WnR) ? VM_DIR_WRITE : VM_DIR_READ;
	+ vie->reg = get_vm_reg_name(reg_num, UNUSED);
	+}
	+
	+static void
	+arm64_gen_reg_emul_data(uint32_t esr_iss, struct vm_exit *vme_ret)
	+{
	+ uint32_t reg_num;
	+ struct vre *vre;
	+
	+ /* u.hyp member will be replaced by u.reg_emul */
	+ vre = &vme_ret->u.reg_emul.vre;
	+
	+ vre->inst_syndrome = esr_iss;
	+ /* ARMv8 Architecture Manual, p. D7-2273: 1 means read */
	+ vre->dir = (esr_iss & ISS_MSR_DIR) ? VM_DIR_READ : VM_DIR_WRITE;
	+ reg_num = ISS_MSR_Rt(esr_iss);
	+ vre->reg = get_vm_reg_name(reg_num, UNUSED);
	+}
	+
	+//static bool print_stuff = false;
	+
	+static int
	+handle_el1_sync_excp(struct hyp hyp, int vcpu, struct vm_exit vme_ret)
	+{
	+ uint32_t esr_ec, esr_iss;
	+
	+ esr_ec = ESR_ELx_EXCEPTION(vme_ret->u.hyp.esr_el2);
	+ esr_iss = vme_ret->u.hyp.esr_el2 & ESR_ELx_ISS_MASK;
	+
	+ switch(esr_ec) {
	+ case EXCP_UNKNOWN:
	+ eprintf("Unknown exception from guest\n");
	+ arm64_print_hyp_regs(vme_ret);
	+ vme_ret->exitcode = VM_EXITCODE_HYP;
	+ break;
	+ case EXCP_HVC:
	+ vme_ret->exitcode = VM_EXITCODE_HVC;
	+ break;
	+ case EXCP_MSR:
	+ arm64_gen_reg_emul_data(esr_iss, vme_ret);
	+ vme_ret->exitcode = VM_EXITCODE_REG_EMUL;
	+ break;
	+
	+ case EXCP_DATA_ABORT_L:
	+ /* Check if instruction syndrome is valid */
	+ if (!(esr_iss & ISS_DATA_ISV)) {
	+ eprintf("Data abort with invalid instruction syndrome\n");
	+ arm64_print_hyp_regs(vme_ret);
	+ vme_ret->exitcode = VM_EXITCODE_HYP;
	+ break;
	+ }
	+
	+ /*
	+ * Check if the data abort was caused by a translation fault.
	+ * Any other type of data fault will be treated as an error.
	+ */
	+ if (!(ISS_DATA_DFSC_TF(esr_iss))) {
	+ eprintf("Data abort not on a stage 2 translation\n");
	+ arm64_print_hyp_regs(vme_ret);
	+ vme_ret->exitcode = VM_EXITCODE_HYP;
	+ break;
	+ }
	+
	+ arm64_gen_inst_emul_data(esr_iss, vme_ret);
	+ vme_ret->exitcode = VM_EXITCODE_INST_EMUL;
	+ break;
	+
	+ default:
	+ eprintf("Unsupported synchronous exception from guest: 0x%x\n",
	+ esr_ec);
	+ arm64_print_hyp_regs(vme_ret);
	+ vme_ret->exitcode = VM_EXITCODE_HYP;
	+ break;
	+ }
	+
	+ /* We don't don't do any instruction emulation here */
	+ return (UNHANDLED);
	+}
	+
	+static int
	+arm64_handle_world_switch(struct hyp hyp, int vcpu, struct vm_exit vme)
	+{
	+ int excp_type;
	+ int handled;
	+
	+ excp_type = vme->u.hyp.exception_nr;
	+ switch (excp_type) {
	+ case EXCP_TYPE_EL1_SYNC:
	+ /* The exit code will be set by handle_el1_sync_excp(). */
	+ handled = handle_el1_sync_excp(hyp, vcpu, vme);
	+ break;
	+
	+ case EXCP_TYPE_EL1_IRQ:
	+ case EXCP_TYPE_EL1_FIQ:
	+ /* The host kernel will handle IRQs and FIQs. */
	+ vme->exitcode = VM_EXITCODE_BOGUS;
	+ handled = UNHANDLED;
	+ break;
	+
	+ case EXCP_TYPE_EL1_ERROR:
	+ case EXCP_TYPE_EL2_SYNC:
	+ case EXCP_TYPE_EL2_IRQ:
	+ case EXCP_TYPE_EL2_FIQ:
	+ case EXCP_TYPE_EL2_ERROR:
	+ eprintf("Unhandled exception type: %s\n", __STRING(excp_type));
	+ vme->exitcode = VM_EXITCODE_BOGUS;
	+ handled = UNHANDLED;
	+ break;
	+
	+ default:
	+ eprintf("Unknown exception type: %d\n", excp_type);
	+ vme->exitcode = VM_EXITCODE_BOGUS;
	+ handled = UNHANDLED;
	+ break;
	+ }
	+
	+ return (handled);
	+}
	+
	+static int
	+arm_vmrun(void *arg, int vcpu, register_t pc, pmap_t pmap,
	+ void rendezvous_cookie, void suspend_cookie)
	+{
	+ uint64_t excp_type;
	+ int handled;
	+ register_t daif;
	+ struct hyp *hyp;
	+ struct hypctx *hypctx;
	+ struct vm *vm;
	+ struct vm_exit *vme;
	+
	+ hyp = (struct hyp *)arg;
	+ vm = hyp->vm;
	+ vme = vm_exitinfo(vm, vcpu);
	+
	+ hypctx = &hyp->ctx[vcpu];
	+ hypctx->elr_el2 = (uint64_t)pc;
	+
	+ for (;;) {
	+ daif = intr_disable();
	+ /*
	+ * TODO: What happens if a timer interrupt is asserted exactly
	+ * here, but for the previous VM?
	+ */
	+ arm64_set_active_vcpu(hypctx);
	+ vgic_v3_sync_hwstate(hypctx);
	+ excp_type = vmm_call_hyp((void *)ktohyp(vmm_enter_guest),
	+ ktohyp(hypctx));
	+ intr_restore(daif);
	+
	+ if (excp_type == EXCP_TYPE_MAINT_IRQ)
	+ continue;
	+
	+ vme->pc = hypctx->elr_el2;
	+ vme->inst_length = INSN_SIZE;
	+ vme->u.hyp.exception_nr = excp_type;
	+ vme->u.hyp.esr_el2 = hypctx->exit_info.esr_el2;
	+ vme->u.hyp.far_el2 = hypctx->exit_info.far_el2;
	+ vme->u.hyp.hpfar_el2 = hypctx->exit_info.hpfar_el2;
	+
	+ handled = arm64_handle_world_switch(hyp, vcpu, vme);
	+ if (handled == UNHANDLED)
	+ /* Exit loop to emulate instruction. */
	+ break;
	+ else
	+ /* Resume guest execution from the next instruction. */
	+ hypctx->elr_el2 += vme->inst_length;
	+ }
	+
	+ return (0);
	+}
	+
	+static void
	+arm_deactivate_pcpu(void *arg)
	+{
	+ struct hyp *hyp = arg;
	+ int maxcpu;
	+ int i;
	+
	+ maxcpu = vm_get_maxcpus(hyp->vm);
	+ for (i = 0; i < maxcpu; i++)
	+ if (arm64_get_active_vcpu() == &hyp->ctx[i])
	+ arm64_set_active_vcpu(NULL);
	+}
	+
	+static void
	+arm_vmcleanup(void *arg)
	+{
	+ struct hyp *hyp = arg;
	+
	+ smp_rendezvous(NULL, arm_deactivate_pcpu, NULL, hyp);
	+
	+ vtimer_vmcleanup(arg);
	+ vgic_v3_detach_from_vm(arg);
	+
	+ /* Unmap the VM hyp struct from the hyp mode translation table */
	+ hypmap_map(hyp_pmap, (vm_offset_t)hyp, sizeof(struct hyp),
	+ VM_PROT_NONE);
	+ hypmap_cleanup(hyp->stage2_map);
	+ free(hyp->stage2_map, M_HYP);
	+ free(hyp, M_HYP);
	+}
	+
	+/*
	+ * Return register value. Registers have different sizes and an explicit cast
	+ * must be made to ensure proper conversion.
	+ */
	+static void *
	+hypctx_regptr(struct hypctx *hypctx, int reg)
	+{
	+ switch (reg) {
	+ case VM_REG_GUEST_X0:
	+ return (&hypctx->regs.x[0]);
	+ case VM_REG_GUEST_X1:
	+ return (&hypctx->regs.x[1]);
	+ case VM_REG_GUEST_X2:
	+ return (&hypctx->regs.x[2]);
	+ case VM_REG_GUEST_X3:
	+ return (&hypctx->regs.x[3]);
	+ case VM_REG_GUEST_X4:
	+ return (&hypctx->regs.x[4]);
	+ case VM_REG_GUEST_X5:
	+ return (&hypctx->regs.x[5]);
	+ case VM_REG_GUEST_X6:
	+ return (&hypctx->regs.x[6]);
	+ case VM_REG_GUEST_X7:
	+ return (&hypctx->regs.x[7]);
	+ case VM_REG_GUEST_X8:
	+ return (&hypctx->regs.x[8]);
	+ case VM_REG_GUEST_X9:
	+ return (&hypctx->regs.x[9]);
	+ case VM_REG_GUEST_X10:
	+ return (&hypctx->regs.x[10]);
	+ case VM_REG_GUEST_X11:
	+ return (&hypctx->regs.x[11]);
	+ case VM_REG_GUEST_X12:
	+ return (&hypctx->regs.x[12]);
	+ case VM_REG_GUEST_X13:
	+ return (&hypctx->regs.x[13]);
	+ case VM_REG_GUEST_X14:
	+ return (&hypctx->regs.x[14]);
	+ case VM_REG_GUEST_X15:
	+ return (&hypctx->regs.x[15]);
	+ case VM_REG_GUEST_X16:
	+ return (&hypctx->regs.x[16]);
	+ case VM_REG_GUEST_X17:
	+ return (&hypctx->regs.x[17]);
	+ case VM_REG_GUEST_X18:
	+ return (&hypctx->regs.x[18]);
	+ case VM_REG_GUEST_X19:
	+ return (&hypctx->regs.x[19]);
	+ case VM_REG_GUEST_X20:
	+ return (&hypctx->regs.x[20]);
	+ case VM_REG_GUEST_X21:
	+ return (&hypctx->regs.x[21]);
	+ case VM_REG_GUEST_X22:
	+ return (&hypctx->regs.x[22]);
	+ case VM_REG_GUEST_X23:
	+ return (&hypctx->regs.x[23]);
	+ case VM_REG_GUEST_X24:
	+ return (&hypctx->regs.x[24]);
	+ case VM_REG_GUEST_X25:
	+ return (&hypctx->regs.x[25]);
	+ case VM_REG_GUEST_X26:
	+ return (&hypctx->regs.x[26]);
	+ case VM_REG_GUEST_X27:
	+ return (&hypctx->regs.x[27]);
	+ case VM_REG_GUEST_X28:
	+ return (&hypctx->regs.x[28]);
	+ case VM_REG_GUEST_X29:
	+ return (&hypctx->regs.x[29]);
	+ case VM_REG_GUEST_LR:
	+ return (&hypctx->regs.lr);
	+ case VM_REG_GUEST_SP:
	+ return (&hypctx->regs.sp);
	+ case VM_REG_GUEST_ELR:
	+ return (&hypctx->regs.elr);
	+ case VM_REG_GUEST_SPSR:
	+ return (&hypctx->regs.spsr);
	+ case VM_REG_ELR_EL2:
	+ return (&hypctx->elr_el2);
	+ default:
	+ break;
	+ }
	+ return (NULL);
	+}
	+
	+static int
	+arm_getreg(void arg, int vcpu, int reg, uint64_t retval)
	+{
	+ void *regp;
	+ int running, hostcpu;
	+ struct hyp *hyp = arg;
	+
	+ running = vcpu_is_running(hyp->vm, vcpu, &hostcpu);
	+ if (running && hostcpu != curcpu)
	+ panic("arm_getreg: %s%d is running", vm_name(hyp->vm), vcpu);
	+
	+ if ((regp = hypctx_regptr(&hyp->ctx[vcpu], reg)) != NULL) {
	+ if (reg == VM_REG_GUEST_SPSR)
	+ retval = (uint32_t *)regp;
	+ else
	+ retval = (uint64_t *)regp;
	+ return (0);
	+ } else {
	+ return (EINVAL);
	+ }
	+}
	+
	+static int
	+arm_setreg(void *arg, int vcpu, int reg, uint64_t val)
	+{
	+ void *regp;
	+ struct hyp *hyp = arg;
	+ int running, hostcpu;
	+
	+ running = vcpu_is_running(hyp->vm, vcpu, &hostcpu);
	+ if (running && hostcpu != curcpu)
	+ panic("hyp_setreg: %s%d is running", vm_name(hyp->vm), vcpu);
	+
	+ if ((regp = hypctx_regptr(&hyp->ctx[vcpu], reg)) != NULL) {
	+ if (reg == VM_REG_GUEST_SPSR)
	+ (uint32_t )regp = (uint32_t)val;
	+ else
	+ (uint64_t )regp = val;
	+ return (0);
	+ } else {
	+ return (EINVAL);
	+ }
	+}
	+
	+static
	+void arm_restore(void)
	+{
	+ ;
	+}
	+
	+struct vmm_ops vmm_ops_arm = {
	+ arm_init,
	+ arm_cleanup,
	+ arm_restore,
	+ arm_vminit,
	+ arm_vmrun,
	+ arm_vmcleanup,
	+ hypmap_set,
	+ hypmap_get,
	+ arm_getreg,
	+ arm_setreg,
	+ NULL, /* vmi_get_cap_t */
	+ NULL /* vmi_set_cap_t */
	+};
	Index: sys/arm64/vmm/hyp.h
	===================================================================
	--- /dev/null
	+++ sys/arm64/vmm/hyp.h
	@@ -0,0 +1,97 @@
	+/*
	+ * Copyright (C) 2017 Alexandru Elisei <alexandru.elisei@gmail.com>
	+ * All rights reserved.
	+ *
	+ * Redistribution and use in source and binary forms, with or without
	+ * modification, are permitted provided that the following conditions
	+ * are met:
	+ * 1. Redistributions of source code must retain the above copyright
	+ * notice, this list of conditions and the following disclaimer.
	+ * 2. Redistributions in binary form must reproduce the above copyright
	+ * notice, this list of conditions and the following disclaimer in the
	+ * documentation and/or other materials provided with the distribution.
	+ *
	+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
	+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
	+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	+ * SUCH DAMAGE.
	+ */
	+
	+#ifndef _VMM_HYP_H_
	+#define _VMM_HYP_H_
	+
	+/*
	+ * The translation tables for the hypervisor mode will hold mappings for kernel
	+ * virtual addresses and an identity mapping (VA == PA) necessary when
	+ * enabling/disabling the MMU.
	+ *
	+ * When in EL2 exception level the translation table base register is TTBR0_EL2
	+ * and the virtual addresses generated by the CPU must be at the bottom of the
	+ * memory, with the first 16 bits all set to zero:
	+ *
	+ * 0x0000ffffffffffff End hyp address space
	+ * 0x0000000000000000 Start of hyp address space
	+ *
	+ * To run code in hyp mode we need to convert kernel virtual addresses to
	+ * addreses that fit into this address space.
	+ *
	+ * The kernel virtual address range is:
	+ *
	+ * 0xffff007fffffffff End of KVA
	+ * 0xffff000000000000 Kernel base address & start of KVA
	+ *
	+ * (see /sys/arm64/include/vmparam.h).
	+ *
	+ * We could convert the kernel virtual addresses to valid EL2 addresses by
	+ * setting the first 16 bits to zero and thus mapping the kernel addresses in
	+ * the bottom half of the EL2 address space, but then they might clash with the
	+ * identity mapping addresses. Instead we map the kernel addresses in the upper
	+ * half of the EL2 address space.
	+ *
	+ * The hypervisor address space will look like this:
	+ *
	+ * 0x0000807fffffffff End of KVA mapping
	+ * 0x0000800000000000 Start of KVA mapping
	+ *
	+ * 0x00007fffffffffff End of identity mapping
	+ * 0x0000000000000000 Start of identity mapping
	+ *
	+ * With the scheme we have 47 bits at our disposable for the identity map and
	+ * another 47 bits for the kernel virtual addresses. For a maximum physical
	+ * memory size of 128TB we are guaranteed to not have any clashes between
	+ * addresses.
	+ */
	+#define HYP_VM_MIN_ADDRESS 0x0000000000000000
	+#define HYP_VM_MAX_ADDRESS 0x0000ffffffffffff
	+
	+#define HYP_KVA_OFFSET 0x0000800000000000
	+#define HYP_KVA_MASK 0x0000ffffffffffff
	+
	+/*
	+ * When taking asynchronous exceptions, or interrupts, with the exception of the
	+ * SError interrupt, the exception syndrome register is not updated with the
	+ * exception code. We need to differentiate between the different exception
	+ * types taken to EL2.
	+ */
	+#define EXCP_TYPE_EL1_SYNC 0
	+#define EXCP_TYPE_EL1_IRQ 1
	+#define EXCP_TYPE_EL1_FIQ 2
	+#define EXCP_TYPE_EL1_ERROR 3
	+
	+#define EXCP_TYPE_EL2_SYNC 4
	+#define EXCP_TYPE_EL2_IRQ 5
	+#define EXCP_TYPE_EL2_FIQ 6
	+#define EXCP_TYPE_EL2_ERROR 7
	+
	+#define EXCP_TYPE_MAINT_IRQ 8
	+
	+#define HYP_GET_VECTOR_TABLE -1
	+
	+#endif /* !_VMM_HYP_H_ */
	Index: sys/arm64/vmm/hyp.S
	===================================================================
	--- /dev/null
	+++ sys/arm64/vmm/hyp.S
	@@ -0,0 +1,384 @@
	+/*
	+ * Copyright (C) 2017 Alexandru Elisei <alexandru.elisei@gmail.com>
	+ * All rights reserved.
	+ *
	+ * This software was developed by Alexandru Elisei under sponsorship
	+ * from the FreeBSD Foundation.
	+ *
	+ * Redistribution and use in source and binary forms, with or without
	+ * modification, are permitted provided that the following conditions
	+ * are met:
	+ * 1. Redistributions of source code must retain the above copyright
	+ * notice, this list of conditions and the following disclaimer.
	+ * 2. Redistributions in binary form must reproduce the above copyright
	+ * notice, this list of conditions and the following disclaimer in the
	+ * documentation and/or other materials provided with the distribution.
	+ *
	+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
	+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
	+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	+ * SUCH DAMAGE.
	+ */
	+
	+
	+#include <sys/syscall.h>
	+#include <machine/armreg.h>
	+#include <machine/asm.h>
	+#include <machine/hypervisor.h>
	+#include <arm/arm/gic_common.h>
	+
	+#include "hyp_macros.h"
	+#include "hyp.h"
	+#include "hyp_assym.h"
	+
	+ .text
	+
	+ .globl hyp_code_start
	+ .globl hyp_code_end
	+
	+ .align 12
	+hyp_code_start:
	+
	+
	+ENTRY(vmm_call_hyp)
	+ hvc #0
	+ ret
	+END(vmm_call_hyp)
	+
	+
	+.macro vempty
	+ .align 7
	+ 1: b 1b
	+.endm
	+
	+.macro vector name
	+ .align 7
	+ b handle_\name
	+.endm
	+
	+ .align 11
	+ .globl hyp_init_vectors
	+hyp_init_vectors:
	+ vempty /* Synchronous EL2t */
	+ vempty /* IRQ EL2t */
	+ vempty /* FIQ EL2t */
	+ vempty /* Error EL2t */
	+
	+ vempty /* Synchronous EL2h */
	+ vempty /* IRQ EL2h */
	+ vempty /* FIQ EL2h */
	+ vempty /* Error EL2h */
	+
	+ vector hyp_init /* Synchronous 64-bit EL1 */
	+ vempty /* IRQ 64-bit EL1 */
	+ vempty /* FIQ 64-bit EL1 */
	+ vempty /* Error 64-bit EL1 */
	+
	+ vempty /* Synchronous 32-bit EL1 */
	+ vempty /* IRQ 32-bit EL1 */
	+ vempty /* FIQ 32-bit EL1 */
	+ vempty /* Error 32-bit EL1 */
	+
	+
	+/*
	+ * Initialize the hypervisor mode with a new exception vector table, translation
	+ * table and stack.
	+ *
	+ * Expecting:
	+ * x0 - the hypervisor exception vectors
	+ * x1 - translation tables physical address
	+ * x2 - stack top virtual address
	+ * x3 - TCR_EL2 value
	+ * x4 - SCTLR_EL2 value
	+ * x5 - VTCR_EL2 value
	+ */
	+ENTRY(handle_hyp_init)
	+ /* Install the new exception vectors */
	+ msr vbar_el2, x0
	+ /* Set the stack top address */
	+ mov sp, x2
	+ /* Use the host VTTBR_EL2 to tell the host and the guests apart */
	+ mov x9, #VTTBR_HOST
	+ msr vttbr_el2, x9
	+ /* Load the base address for the translation tables */
	+ msr ttbr0_el2, x1
	+ /* Invalidate the TLB */
	+ tlbi alle2
	+ /* Use the same memory attributes as EL1 */
	+ mrs x9, mair_el1
	+ msr mair_el2, x9
	+ /* Configure address translation */
	+ msr tcr_el2, x3
	+ isb
	+ /* Set the system control register for EL2 */
	+ msr sctlr_el2, x4
	+ /* Set the Stage 2 translation control register */
	+ msr vtcr_el2, x5
	+ /* Return success */
	+ mov x0, #0
	+ /* MMU is up and running */
	+ eret
	+END(handle_hyp_init)
	+
	+
	+ .align 11
	+ .globl hyp_vectors
	+hyp_vectors:
	+ vempty /* Synchronous EL2t */
	+ vempty /* IRQ EL2t */
	+ vempty /* FIQ EL2t */
	+ vempty /* Error EL2t */
	+
	+ vector el2_el2h_sync /* Synchronous EL2h */
	+ vector el2_el2h_irq /* IRQ EL2h */
	+ vector el2_el2h_fiq /* FIQ EL2h */
	+ vector el2_el2h_error /* Error EL2h */
	+
	+ vector el2_el1_sync64 /* Synchronous 64-bit EL1 */
	+ vector el2_el1_irq64 /* IRQ 64-bit EL1 */
	+ vector el2_el1_fiq64 /* FIQ 64-bit EL1 */
	+ vector el2_el1_error64 /* Error 64-bit EL1 */
	+
	+ vempty /* Synchronous 32-bit EL1 */
	+ vempty /* IRQ 32-bit EL1 */
	+ vempty /* FIQ 32-bit EL1 */
	+ vempty /* Error 32-bit EL1 */
	+
	+
	+.macro do_world_switch_to_host
	+ .align 7
	+ SAVE_GUEST_REGS()
	+#ifdef VFP
	+ /*
	+ * Saving the guest VFP registers needs to come after saving the rest of
	+ * the registers because the process dirties the regular registers.
	+ */
	+ SAVE_GUEST_VFP_REGS()
	+ LOAD_HOST_VFP_REGS()
	+#endif
	+ LOAD_HOST_REGS()
	+ SAVE_EXIT_INFO()
	+
	+ /* Restore host VTTBR */
	+ mov x9, #VTTBR_HOST
	+ msr vttbr_el2, x9
	+.endm
	+
	+
	+.macro handle_el2_excp type
	+ .align 7
	+ /* Save registers before modifying so we can restore them */
	+ str x9, [sp, #-16]!
	+
	+ /* Test if the exception happened when the host was running */
	+ mrs x9, vttbr_el2
	+ cmp x9, #VTTBR_HOST
	+ beq 1f
	+
	+ /* We got the exception while the guest was running */
	+ ldr x9, [sp], #16
	+ do_world_switch_to_host
	+ b 2f
	+1:
	+ /* We got the exception while the host was running */
	+ ldr x9, [sp], #16
	+2:
	+ mov x0, \type
	+ eret
	+.endm
	+
	+
	+ENTRY(handle_el2_el2h_sync)
	+ handle_el2_excp #EXCP_TYPE_EL2_SYNC
	+END(handle_el2_el2h_sync)
	+
	+ENTRY(handle_el2_el2h_irq)
	+ handle_el2_excp #EXCP_TYPE_EL2_IRQ
	+END(handle_el2_el2h_sync)
	+
	+ENTRY(handle_el2_el2h_fiq)
	+ handle_el2_excp #EXCP_TYPE_EL2_FIQ
	+END(handle_el2_el2h_sync)
	+
	+ENTRY(handle_el2_el2h_error)
	+ handle_el2_excp #EXCP_TYPE_EL2_ERROR
	+END(handle_el2_el2h_sync)
	+
	+
	+ENTRY(handle_el2_el1_sync64)
	+ /* Save registers before modifying so we can restore them */
	+ str x9, [sp, #-16]!
	+
	+ /* Check for host hypervisor call */
	+ mrs x9, vttbr_el2
	+ cmp x9, #VTTBR_HOST
	+ beq 1f
	+
	+ /* Restore register */
	+ ldr x9, [sp], #16
	+
	+ /* Guest exception taken to EL2 */
	+ do_world_switch_to_host
	+ mov x0, #EXCP_TYPE_EL1_SYNC
	+ b exit
	+
	+1:
	+ /* Restore register */
	+ ldr x9, [sp], #16
	+
	+ cmp x0, #HYP_GET_VECTOR_TABLE
	+ beq 2f
	+ b call_function
	+2:
	+ /* Return the vector table base address */
	+ mrs x0, vbar_el2
	+exit:
	+ eret
	+END(handle_el2_el1_sync64)
	+
	+
	+/*
	+ * Call a function in EL2 context
	+ *
	+ * Expecting:
	+ * x0 - function virtual address
	+ * x1-x7 - function parameters
	+ */
	+ENTRY(call_function)
	+ /* Save the function address before shuffling parameters */
	+ mov x9, x0
	+
	+ /* Shuffle function parameters */
	+ mov x0, x1
	+ mov x1, x2
	+ mov x2, x3
	+ mov x3, x4
	+ mov x4, x5
	+ mov x5, x6
	+ mov x6, x7
	+
	+ /* Call function */
	+ br x9
	+END(call_function)
	+
	+
	+/*
	+ * We only trap IRQ, FIQ and SError exceptions when a guest is running. Do a
	+ * world switch to host to handle these exceptions.
	+ */
	+
	+
	+ENTRY(handle_el2_el1_irq64)
	+ do_world_switch_to_host
	+ str x9, [sp, #-16]!
	+ mrs x9, ich_misr_el2
	+ cmp x9, xzr
	+ beq 1f
	+ mov x0, #EXCP_TYPE_MAINT_IRQ
	+ b 2f
	+1:
	+ mov x0, #EXCP_TYPE_EL1_IRQ
	+2:
	+ ldr x9, [sp], #16
	+ eret
	+END(handle_el2_el1_irq)
	+
	+ENTRY(handle_el2_el1_fiq64)
	+ do_world_switch_to_host
	+ mov x0, #EXCP_TYPE_EL1_FIQ
	+ eret
	+END(handle_el2_el1_fiq64)
	+
	+ENTRY(handle_el2_el1_error64)
	+ do_world_switch_to_host
	+ mov x0, #EXCP_TYPE_EL1_ERROR
	+ eret
	+END(handle_el2_el1_error64)
	+
	+
	+/*
	+ * Usage:
	+ * void vmm_enter_guest(struct hypctx *hypctx)
	+ *
	+ * Expecting:
	+ * x0 - hypctx address
	+ */
	+ENTRY(vmm_enter_guest)
	+ /* Save hypctx address */
	+ msr tpidr_el2, x0
	+
	+ SAVE_HOST_REGS()
	+#ifdef VFP
	+ SAVE_HOST_VFP_REGS()
	+ /*
	+ * Loading the guest VFP registers needs to come before loading the
	+ * rest of the registers because this process dirties the regular
	+ * registers.
	+ */
	+ LOAD_GUEST_VFP_REGS()
	+#endif
	+ LOAD_GUEST_REGS()
	+
	+ /* Enter guest */
	+ eret
	+END(vmm_enter_guest)
	+
	+
	+/*
	+ * Usage:
	+ * void vmm_cleanup(void *hyp_stub_vectors)
	+ *
	+ * Expecting:
	+ * x0 - physical address of hyp_stub_vectors
	+ */
	+ENTRY(vmm_cleanup)
	+ /* Restore the stub vectors */
	+ msr vbar_el2, x0
	+
	+ /* Disable the MMU */
	+ dsb sy
	+ mrs x2, sctlr_el2
	+ bic x2, x2, #SCTLR_EL2_M
	+ msr sctlr_el2, x2
	+
	+ eret
	+END(vmm_cleanup)
	+
	+.macro read_reg name
	+ mrs x0, \name
	+.endm
	+
	+/*
	+ * Return the value of the ICH_VTR_EL2 register.
	+ */
	+ENTRY(vmm_read_ich_vtr_el2)
	+ read_reg ich_vtr_el2
	+ eret
	+END(vmm_read_ich_vtr_el2)
	+
	+/*
	+ * Return the value of the CNTHCTL_EL2 register.
	+ */
	+ENTRY(vmm_read_cnthctl_el2)
	+ read_reg cnthctl_el2
	+ eret
	+END(vmm_read_cnthctl_el2)
	+
	+/*
	+ * Return the value of the TCR_EL2 register.
	+ */
	+ENTRY(vmm_read_tcr_el2)
	+ read_reg tcr_el2
	+ eret
	+END(vmm_read_tcr_el2)
	+
	+
	+
	+hyp_code_end:
	Index: sys/arm64/vmm/hyp_genassym.c
	===================================================================
	--- /dev/null
	+++ sys/arm64/vmm/hyp_genassym.c
	@@ -0,0 +1,167 @@
	+/*
	+ * Copyright (C) 2015 Mihai Carabas <mihai.carabas@gmail.com>
	+ * All rights reserved.
	+ *
	+ * Redistribution and use in source and binary forms, with or without
	+ * modification, are permitted provided that the following conditions
	+ * are met:
	+ * 1. Redistributions of source code must retain the above copyright
	+ * notice, this list of conditions and the following disclaimer.
	+ * 2. Redistributions in binary form must reproduce the above copyright
	+ * notice, this list of conditions and the following disclaimer in the
	+ * documentation and/or other materials provided with the distribution.
	+ *
	+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
	+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
	+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	+ * SUCH DAMAGE.
	+ */
	+
	+#include <sys/cdefs.h>
	+#include <sys/param.h>
	+#include <sys/systm.h>
	+#include <sys/proc.h>
	+#include <sys/assym.h>
	+#include <vm/vm.h>
	+#include <vm/pmap.h>
	+#include <machine/vmm.h>
	+
	+#include "arm64.h"
	+
	+ASSYM(HYPCTX_REGS_X0, offsetof(struct hypctx, regs) + 0 * 8);
	+ASSYM(HYPCTX_REGS_X1, offsetof(struct hypctx, regs) + 1 * 8);
	+ASSYM(HYPCTX_REGS_X2, offsetof(struct hypctx, regs) + 2 * 8);
	+ASSYM(HYPCTX_REGS_X3, offsetof(struct hypctx, regs) + 3 * 8);
	+ASSYM(HYPCTX_REGS_X4, offsetof(struct hypctx, regs) + 4 * 8);
	+ASSYM(HYPCTX_REGS_X5, offsetof(struct hypctx, regs) + 5 * 8);
	+ASSYM(HYPCTX_REGS_X6, offsetof(struct hypctx, regs) + 6 * 8);
	+ASSYM(HYPCTX_REGS_X7, offsetof(struct hypctx, regs) + 7 * 8);
	+ASSYM(HYPCTX_REGS_X8, offsetof(struct hypctx, regs) + 8 * 8);
	+ASSYM(HYPCTX_REGS_X9, offsetof(struct hypctx, regs) + 9 * 8);
	+ASSYM(HYPCTX_REGS_X10, offsetof(struct hypctx, regs) + 10 * 8);
	+ASSYM(HYPCTX_REGS_X11, offsetof(struct hypctx, regs) + 11 * 8);
	+ASSYM(HYPCTX_REGS_X12, offsetof(struct hypctx, regs) + 12 * 8);
	+ASSYM(HYPCTX_REGS_X13, offsetof(struct hypctx, regs) + 13 * 8);
	+ASSYM(HYPCTX_REGS_X14, offsetof(struct hypctx, regs) + 14 * 8);
	+ASSYM(HYPCTX_REGS_X15, offsetof(struct hypctx, regs) + 15 * 8);
	+ASSYM(HYPCTX_REGS_X16, offsetof(struct hypctx, regs) + 16 * 8);
	+ASSYM(HYPCTX_REGS_X17, offsetof(struct hypctx, regs) + 17 * 8);
	+ASSYM(HYPCTX_REGS_X18, offsetof(struct hypctx, regs) + 18 * 8);
	+ASSYM(HYPCTX_REGS_X19, offsetof(struct hypctx, regs) + 19 * 8);
	+ASSYM(HYPCTX_REGS_X20, offsetof(struct hypctx, regs) + 20 * 8);
	+ASSYM(HYPCTX_REGS_X21, offsetof(struct hypctx, regs) + 21 * 8);
	+ASSYM(HYPCTX_REGS_X22, offsetof(struct hypctx, regs) + 22 * 8);
	+ASSYM(HYPCTX_REGS_X23, offsetof(struct hypctx, regs) + 23 * 8);
	+ASSYM(HYPCTX_REGS_X24, offsetof(struct hypctx, regs) + 24 * 8);
	+ASSYM(HYPCTX_REGS_X25, offsetof(struct hypctx, regs) + 25 * 8);
	+ASSYM(HYPCTX_REGS_X26, offsetof(struct hypctx, regs) + 26 * 8);
	+ASSYM(HYPCTX_REGS_X27, offsetof(struct hypctx, regs) + 27 * 8);
	+ASSYM(HYPCTX_REGS_X28, offsetof(struct hypctx, regs) + 28 * 8);
	+ASSYM(HYPCTX_REGS_X29, offsetof(struct hypctx, regs) + 29 * 8);
	+ASSYM(HYPCTX_REGS_LR, offsetof(struct hypctx, regs.lr));
	+ASSYM(HYPCTX_REGS_SP, offsetof(struct hypctx, regs.sp));
	+ASSYM(HYPCTX_REGS_ELR, offsetof(struct hypctx, regs.elr));
	+ASSYM(HYPCTX_REGS_SPSR, offsetof(struct hypctx, regs.spsr));
	+
	+ASSYM(HYPCTX_ACTLR_EL1, offsetof(struct hypctx, actlr_el1));
	+ASSYM(HYPCTX_AMAIR_EL1, offsetof(struct hypctx, amair_el1));
	+ASSYM(HYPCTX_ELR_EL1, offsetof(struct hypctx, elr_el1));
	+ASSYM(HYPCTX_FAR_EL1, offsetof(struct hypctx, far_el1));
	+ASSYM(HYPCTX_FP, offsetof(struct hypctx, fp));
	+ASSYM(HYPCTX_MAIR_EL1, offsetof(struct hypctx, mair_el1));
	+ASSYM(HYPCTX_PAR_EL1, offsetof(struct hypctx, par_el1));
	+ASSYM(HYPCTX_SP_EL0, offsetof(struct hypctx, sp_el0));
	+ASSYM(HYPCTX_TCR_EL1, offsetof(struct hypctx, tcr_el1));
	+ASSYM(HYPCTX_TPIDR_EL0, offsetof(struct hypctx, tpidr_el0));
	+ASSYM(HYPCTX_TPIDRRO_EL0, offsetof(struct hypctx, tpidrro_el0));
	+ASSYM(HYPCTX_TPIDR_EL1, offsetof(struct hypctx, tpidr_el1));
	+ASSYM(HYPCTX_TTBR0_EL1, offsetof(struct hypctx, ttbr0_el1));
	+ASSYM(HYPCTX_TTBR1_EL1, offsetof(struct hypctx, ttbr1_el1));
	+ASSYM(HYPCTX_VBAR_EL1, offsetof(struct hypctx, vbar_el1));
	+ASSYM(HYPCTX_AFSR0_EL1, offsetof(struct hypctx, afsr0_el1));
	+ASSYM(HYPCTX_AFSR1_EL1, offsetof(struct hypctx, afsr1_el1));
	+ASSYM(HYPCTX_CONTEXTIDR_EL1, offsetof(struct hypctx, contextidr_el1));
	+ASSYM(HYPCTX_CPACR_EL1, offsetof(struct hypctx, cpacr_el1));
	+ASSYM(HYPCTX_ESR_EL1, offsetof(struct hypctx, esr_el1));
	+ASSYM(HYPCTX_SCTLR_EL1, offsetof(struct hypctx, sctlr_el1));
	+ASSYM(HYPCTX_SPSR_EL1, offsetof(struct hypctx, spsr_el1));
	+
	+ASSYM(HYPCTX_ELR_EL2, offsetof(struct hypctx, elr_el2));
	+ASSYM(HYPCTX_HCR_EL2, offsetof(struct hypctx, hcr_el2));
	+ASSYM(HYPCTX_VPIDR_EL2, offsetof(struct hypctx, vpidr_el2));
	+ASSYM(HYPCTX_VMPIDR_EL2, offsetof(struct hypctx, vmpidr_el2));
	+ASSYM(HYPCTX_CPTR_EL2, offsetof(struct hypctx, cptr_el2));
	+ASSYM(HYPCTX_SPSR_EL2, offsetof(struct hypctx, spsr_el2));
	+
	+ASSYM(HYPCTX_HYP, offsetof(struct hypctx, hyp));
	+
	+ASSYM(HYP_VTTBR_EL2, offsetof(struct hyp, vttbr_el2));
	+ASSYM(HYP_VTIMER_CNTHCTL_EL2, offsetof(struct hyp, vtimer.cnthctl_el2));
	+ASSYM(HYP_VTIMER_CNTVOFF_EL2, offsetof(struct hyp, vtimer.cntvoff_el2));
	+
	+ASSYM(HYPCTX_EXIT_INFO_ESR_EL2, offsetof(struct hypctx, exit_info.esr_el2));
	+ASSYM(HYPCTX_EXIT_INFO_FAR_EL2, offsetof(struct hypctx, exit_info.far_el2));
	+ASSYM(HYPCTX_EXIT_INFO_HPFAR_EL2, offsetof(struct hypctx, exit_info.hpfar_el2));
	+
	+ASSYM(HYPCTX_VGIC_ICH_LR_EL2, offsetof(struct hypctx, vgic_cpu_if.ich_lr_el2));
	+ASSYM(HYPCTX_VGIC_ICH_LR_NUM, offsetof(struct hypctx, vgic_cpu_if.ich_lr_num));
	+ASSYM(HYPCTX_VGIC_ICH_AP0R_EL2, offsetof(struct hypctx, vgic_cpu_if.ich_ap0r_el2));
	+ASSYM(HYPCTX_VGIC_ICH_AP0R_NUM, offsetof(struct hypctx, vgic_cpu_if.ich_ap0r_num));
	+ASSYM(HYPCTX_VGIC_ICH_AP1R_EL2, offsetof(struct hypctx, vgic_cpu_if.ich_ap1r_el2));
	+ASSYM(HYPCTX_VGIC_ICH_AP1R_NUM, offsetof(struct hypctx, vgic_cpu_if.ich_ap1r_num));
	+ASSYM(HYPCTX_VGIC_ICH_EISR_EL2, offsetof(struct hypctx, vgic_cpu_if.ich_eisr_el2));
	+ASSYM(HYPCTX_VGIC_ICH_ELRSR_EL2, offsetof(struct hypctx, vgic_cpu_if.ich_elrsr_el2));
	+ASSYM(HYPCTX_VGIC_ICH_HCR_EL2, offsetof(struct hypctx, vgic_cpu_if.ich_hcr_el2));
	+ASSYM(HYPCTX_VGIC_ICH_MISR_EL2, offsetof(struct hypctx, vgic_cpu_if.ich_misr_el2));
	+ASSYM(HYPCTX_VGIC_ICH_VMCR_EL2, offsetof(struct hypctx, vgic_cpu_if.ich_vmcr_el2));
	+ASSYM(HYPCTX_VGIC_ICH_LR_EL2, offsetof(struct hypctx, vgic_cpu_if.ich_lr_el2));
	+
	+ASSYM(HYPCTX_VTIMER_CPU_CNTKCTL_EL1, offsetof(struct hypctx, vtimer_cpu.cntkctl_el1));
	+ASSYM(HYPCTX_VTIMER_CPU_CNTV_CVAL_EL0, offsetof(struct hypctx, vtimer_cpu.cntv_cval_el0));
	+ASSYM(HYPCTX_VTIMER_CPU_CNTV_CTL_EL0, offsetof(struct hypctx, vtimer_cpu.cntv_ctl_el0));
	+
	+#ifdef VFP
	+ASSYM(HYPCTX_VFPSTATE_Q0, offsetof(struct hypctx, vfpstate.vfp_regs) + 0 * 16);
	+ASSYM(HYPCTX_VFPSTATE_Q1, offsetof(struct hypctx, vfpstate.vfp_regs) + 1 * 16);
	+ASSYM(HYPCTX_VFPSTATE_Q2, offsetof(struct hypctx, vfpstate.vfp_regs) + 2 * 16);
	+ASSYM(HYPCTX_VFPSTATE_Q3, offsetof(struct hypctx, vfpstate.vfp_regs) + 3 * 16);
	+ASSYM(HYPCTX_VFPSTATE_Q4, offsetof(struct hypctx, vfpstate.vfp_regs) + 4 * 16);
	+ASSYM(HYPCTX_VFPSTATE_Q5, offsetof(struct hypctx, vfpstate.vfp_regs) + 5 * 16);
	+ASSYM(HYPCTX_VFPSTATE_Q6, offsetof(struct hypctx, vfpstate.vfp_regs) + 6 * 16);
	+ASSYM(HYPCTX_VFPSTATE_Q7, offsetof(struct hypctx, vfpstate.vfp_regs) + 7 * 16);
	+ASSYM(HYPCTX_VFPSTATE_Q8, offsetof(struct hypctx, vfpstate.vfp_regs) + 8 * 16);
	+ASSYM(HYPCTX_VFPSTATE_Q9, offsetof(struct hypctx, vfpstate.vfp_regs) + 9 * 16);
	+ASSYM(HYPCTX_VFPSTATE_Q10, offsetof(struct hypctx, vfpstate.vfp_regs) + 10 * 16);
	+ASSYM(HYPCTX_VFPSTATE_Q11, offsetof(struct hypctx, vfpstate.vfp_regs) + 11 * 16);
	+ASSYM(HYPCTX_VFPSTATE_Q12, offsetof(struct hypctx, vfpstate.vfp_regs) + 12 * 16);
	+ASSYM(HYPCTX_VFPSTATE_Q13, offsetof(struct hypctx, vfpstate.vfp_regs) + 13 * 16);
	+ASSYM(HYPCTX_VFPSTATE_Q14, offsetof(struct hypctx, vfpstate.vfp_regs) + 14 * 16);
	+ASSYM(HYPCTX_VFPSTATE_Q15, offsetof(struct hypctx, vfpstate.vfp_regs) + 15 * 16);
	+ASSYM(HYPCTX_VFPSTATE_Q16, offsetof(struct hypctx, vfpstate.vfp_regs) + 16 * 16);
	+ASSYM(HYPCTX_VFPSTATE_Q17, offsetof(struct hypctx, vfpstate.vfp_regs) + 17 * 16);
	+ASSYM(HYPCTX_VFPSTATE_Q18, offsetof(struct hypctx, vfpstate.vfp_regs) + 18 * 16);
	+ASSYM(HYPCTX_VFPSTATE_Q19, offsetof(struct hypctx, vfpstate.vfp_regs) + 19 * 16);
	+ASSYM(HYPCTX_VFPSTATE_Q20, offsetof(struct hypctx, vfpstate.vfp_regs) + 20 * 16);
	+ASSYM(HYPCTX_VFPSTATE_Q21, offsetof(struct hypctx, vfpstate.vfp_regs) + 21 * 16);
	+ASSYM(HYPCTX_VFPSTATE_Q22, offsetof(struct hypctx, vfpstate.vfp_regs) + 22 * 16);
	+ASSYM(HYPCTX_VFPSTATE_Q23, offsetof(struct hypctx, vfpstate.vfp_regs) + 23 * 16);
	+ASSYM(HYPCTX_VFPSTATE_Q24, offsetof(struct hypctx, vfpstate.vfp_regs) + 24 * 16);
	+ASSYM(HYPCTX_VFPSTATE_Q25, offsetof(struct hypctx, vfpstate.vfp_regs) + 25 * 16);
	+ASSYM(HYPCTX_VFPSTATE_Q26, offsetof(struct hypctx, vfpstate.vfp_regs) + 26 * 16);
	+ASSYM(HYPCTX_VFPSTATE_Q27, offsetof(struct hypctx, vfpstate.vfp_regs) + 27 * 16);
	+ASSYM(HYPCTX_VFPSTATE_Q28, offsetof(struct hypctx, vfpstate.vfp_regs) + 28 * 16);
	+ASSYM(HYPCTX_VFPSTATE_Q29, offsetof(struct hypctx, vfpstate.vfp_regs) + 29 * 16);
	+ASSYM(HYPCTX_VFPSTATE_Q30, offsetof(struct hypctx, vfpstate.vfp_regs) + 30 * 16);
	+ASSYM(HYPCTX_VFPSTATE_Q31, offsetof(struct hypctx, vfpstate.vfp_regs) + 31 * 16);
	+
	+
	+ASSYM(HYPCTX_VFPSTATE_FPCR, offsetof(struct hypctx, vfpstate.vfp_fpcr));
	+ASSYM(HYPCTX_VFPSTATE_FPSR, offsetof(struct hypctx, vfpstate.vfp_fpsr));
	+#endif
	Index: sys/arm64/vmm/hyp_macros.h
	===================================================================
	--- /dev/null
	+++ sys/arm64/vmm/hyp_macros.h
	@@ -0,0 +1,687 @@
	+/*
	+ * Copyright (C) 2017 Alexandru Elisei <alexandru.elisei@gmail.com>
	+ * All rights reserved.
	+ *
	+ * Redistribution and use in source and binary forms, with or without
	+ * modification, are permitted provided that the following conditions
	+ * are met:
	+ * 1. Redistributions of source code must retain the above copyright
	+ * notice, this list of conditions and the following disclaimer.
	+ * 2. Redistributions in binary form must reproduce the above copyright
	+ * notice, this list of conditions and the following disclaimer in the
	+ * documentation and/or other materials provided with the distribution.
	+ *
	+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
	+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
	+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	+ * SUCH DAMAGE.
	+ */
	+
	+#ifndef _VMM_HYP_MACROS_H_
	+#define _VMM_HYP_MACROS_H_
	+
	+
	+#define PUSH_SYS_REG_PAIR(reg0, reg1) \
	+ mrs x1, reg0; \
	+ mrs x2, reg1; \
	+ stp x2, x1, [sp, #-16]!;
	+
	+
	+#define PUSH_SYS_REG(reg) \
	+ mrs x1, reg; \
	+ str x1, [sp, #-16]!;
	+
	+
	+/*
	+ * Push all the host registers before entering the guest.
	+ */
	+#define SAVE_HOST_REGS() \
	+ /* Save the regular registers */ \
	+ stp x0, x1, [sp, #-16]!; \
	+ stp x2, x3, [sp, #-16]!; \
	+ stp x4, x5, [sp, #-16]!; \
	+ stp x6, x7, [sp, #-16]!; \
	+ stp x8, x9, [sp, #-16]!; \
	+ stp x10, x11, [sp, #-16]!; \
	+ stp x12, x13, [sp, #-16]!; \
	+ stp x14, x15, [sp, #-16]!; \
	+ stp x16, x17, [sp, #-16]!; \
	+ stp x18, x19, [sp, #-16]!; \
	+ stp x20, x21, [sp, #-16]!; \
	+ stp x22, x23, [sp, #-16]!; \
	+ stp x24, x25, [sp, #-16]!; \
	+ stp x26, x27, [sp, #-16]!; \
	+ stp x28, x29, [sp, #-16]!; \
	+ stp lr, fp, [sp, #-16]!; \
	+ \
	+ /* Push the system registers */ \
	+ PUSH_SYS_REG_PAIR(SP_EL0, SP_EL1); \
	+ PUSH_SYS_REG_PAIR(ACTLR_EL1, AMAIR_EL1); \
	+ PUSH_SYS_REG_PAIR(ELR_EL1, PAR_EL1); \
	+ PUSH_SYS_REG_PAIR(MAIR_EL1, TCR_EL1); \
	+ PUSH_SYS_REG_PAIR(TPIDR_EL0, TPIDRRO_EL0); \
	+ PUSH_SYS_REG_PAIR(TPIDR_EL1, TTBR0_EL1); \
	+ PUSH_SYS_REG_PAIR(TTBR1_EL1, VBAR_EL1); \
	+ PUSH_SYS_REG_PAIR(AFSR0_EL1, AFSR1_EL1); \
	+ PUSH_SYS_REG_PAIR(CONTEXTIDR_EL1, CPACR_EL1); \
	+ PUSH_SYS_REG_PAIR(ESR_EL1, FAR_EL1); \
	+ PUSH_SYS_REG_PAIR(SCTLR_EL1, SPSR_EL1); \
	+ PUSH_SYS_REG_PAIR(ELR_EL2, HCR_EL2); \
	+ PUSH_SYS_REG_PAIR(VPIDR_EL2, VMPIDR_EL2); \
	+ PUSH_SYS_REG_PAIR(CPTR_EL2, SPSR_EL2); \
	+ PUSH_SYS_REG_PAIR(ICH_HCR_EL2, ICH_VMCR_EL2); \
	+ PUSH_SYS_REG_PAIR(CNTHCTL_EL2, CNTKCTL_EL1); \
	+ PUSH_SYS_REG(CNTVOFF_EL2);
	+
	+
	+#define SAVE_HOST_VFP_REGS() \
	+ stp q0, q1, [sp, #-16 * 2]!; \
	+ stp q2, q3, [sp, #-16 * 2]!; \
	+ stp q4, q5, [sp, #-16 * 2]!; \
	+ stp q6, q7, [sp, #-16 * 2]!; \
	+ stp q8, q9, [sp, #-16 * 2]!; \
	+ stp q10, q11, [sp, #-16 * 2]!; \
	+ stp q12, q13, [sp, #-16 * 2]!; \
	+ stp q14, q15, [sp, #-16 * 2]!; \
	+ stp q16, q17, [sp, #-16 * 2]!; \
	+ stp q18, q19, [sp, #-16 * 2]!; \
	+ stp q20, q21, [sp, #-16 * 2]!; \
	+ stp q22, q23, [sp, #-16 * 2]!; \
	+ stp q24, q25, [sp, #-16 * 2]!; \
	+ stp q26, q27, [sp, #-16 * 2]!; \
	+ stp q28, q29, [sp, #-16 * 2]!; \
	+ stp q30, q31, [sp, #-16 * 2]!; \
	+ PUSH_SYS_REG_PAIR(FPCR, FPSR);
	+
	+
	+#define POP_SYS_REG_PAIR(reg0, reg1) \
	+ ldp x2, x1, [sp], #16; \
	+ msr reg1, x2; \
	+ msr reg0, x1;
	+
	+
	+#define LOAD_HOST_VFP_REGS() \
	+ POP_SYS_REG_PAIR(FPCR, FPSR); \
	+ ldp q30, q31, [sp], #16 * 2; \
	+ ldp q28, q29, [sp], #16 * 2; \
	+ ldp q26, q27, [sp], #16 * 2; \
	+ ldp q24, q25, [sp], #16 * 2; \
	+ ldp q22, q23, [sp], #16 * 2; \
	+ ldp q20, q21, [sp], #16 * 2; \
	+ ldp q18, q19, [sp], #16 * 2; \
	+ ldp q16, q17, [sp], #16 * 2; \
	+ ldp q14, q15, [sp], #16 * 2; \
	+ ldp q12, q13, [sp], #16 * 2; \
	+ ldp q10, q11, [sp], #16 * 2; \
	+ ldp q8, q9, [sp], #16 * 2; \
	+ ldp q6, q7, [sp], #16 * 2; \
	+ ldp q4, q5, [sp], #16 * 2; \
	+ ldp q2, q3, [sp], #16 * 2; \
	+ ldp q0, q1, [sp], #16 * 2; \
	+
	+
	+#define POP_SYS_REG(reg) \
	+ ldr x1, [sp], #16; \
	+ msr reg, x1;
	+
	+
	+/*
	+ * Restore all the host registers before entering the host.
	+ */
	+#define LOAD_HOST_REGS() \
	+ /* Pop the system registers first */ \
	+ POP_SYS_REG(CNTVOFF_EL2); \
	+ POP_SYS_REG_PAIR(CNTHCTL_EL2, CNTKCTL_EL1); \
	+ POP_SYS_REG_PAIR(ICH_HCR_EL2, ICH_VMCR_EL2); \
	+ POP_SYS_REG_PAIR(CPTR_EL2, SPSR_EL2); \
	+ POP_SYS_REG_PAIR(VPIDR_EL2, VMPIDR_EL2); \
	+ POP_SYS_REG_PAIR(ELR_EL2, HCR_EL2); \
	+ POP_SYS_REG_PAIR(SCTLR_EL1, SPSR_EL1); \
	+ POP_SYS_REG_PAIR(ESR_EL1, FAR_EL1); \
	+ POP_SYS_REG_PAIR(CONTEXTIDR_EL1, CPACR_EL1); \
	+ POP_SYS_REG_PAIR(AFSR0_EL1, AFSR1_EL1); \
	+ POP_SYS_REG_PAIR(TTBR1_EL1, VBAR_EL1); \
	+ POP_SYS_REG_PAIR(TPIDR_EL1, TTBR0_EL1); \
	+ POP_SYS_REG_PAIR(TPIDR_EL0, TPIDRRO_EL0); \
	+ POP_SYS_REG_PAIR(MAIR_EL1, TCR_EL1); \
	+ POP_SYS_REG_PAIR(ELR_EL1, PAR_EL1); \
	+ POP_SYS_REG_PAIR(ACTLR_EL1, AMAIR_EL1); \
	+ POP_SYS_REG_PAIR(SP_EL0, SP_EL1); \
	+ \
	+ /* Pop the regular registers */ \
	+ ldp lr, fp, [sp], #16; \
	+ ldp x28, x29, [sp], #16; \
	+ ldp x26, x27, [sp], #16; \
	+ ldp x24, x25, [sp], #16; \
	+ ldp x22, x23, [sp], #16; \
	+ ldp x20, x21, [sp], #16; \
	+ ldp x18, x19, [sp], #16; \
	+ ldp x16, x17, [sp], #16; \
	+ ldp x14, x15, [sp], #16; \
	+ ldp x12, x13, [sp], #16; \
	+ ldp x10, x11, [sp], #16; \
	+ ldp x8, x9, [sp], #16; \
	+ ldp x6, x7, [sp], #16; \
	+ ldp x4, x5, [sp], #16; \
	+ ldp x2, x3, [sp], #16; \
	+ ldp x0, x1, [sp], #16; \
	+
	+
	+#define SAVE_ARRAY_REG64(reg, dest, remaining) \
	+ cmp remaining, #0; \
	+ beq 9f; \
	+ mrs x7, reg; \
	+ str x7, [dest]; \
	+ add dest, dest, #8; \
	+ sub remaining, remaining, #1;
	+
	+
	+#define SAVE_LR_REGS() \
	+ /* Load the number of ICH_LR_EL2 regs from memory */ \
	+ mov x2, #HYPCTX_VGIC_ICH_LR_NUM; \
	+ ldr x3, [x0, x2]; \
	+ /* x1 holds the destination address */ \
	+ mov x1, #HYPCTX_VGIC_ICH_LR_EL2; \
	+ add x1, x0, x1; \
	+ SAVE_ARRAY_REG64(ich_lr0_el2, x1, x3); \
	+ SAVE_ARRAY_REG64(ich_lr1_el2, x1, x3); \
	+ SAVE_ARRAY_REG64(ich_lr2_el2, x1, x3); \
	+ SAVE_ARRAY_REG64(ich_lr3_el2, x1, x3); \
	+ SAVE_ARRAY_REG64(ich_lr4_el2, x1, x3); \
	+ SAVE_ARRAY_REG64(ich_lr5_el2, x1, x3); \
	+ SAVE_ARRAY_REG64(ich_lr6_el2, x1, x3); \
	+ SAVE_ARRAY_REG64(ich_lr7_el2, x1, x3); \
	+ SAVE_ARRAY_REG64(ich_lr8_el2, x1, x3); \
	+ SAVE_ARRAY_REG64(ich_lr9_el2, x1, x3); \
	+ SAVE_ARRAY_REG64(ich_lr10_el2, x1, x3); \
	+ SAVE_ARRAY_REG64(ich_lr11_el2, x1, x3); \
	+ SAVE_ARRAY_REG64(ich_lr12_el2, x1, x3); \
	+ SAVE_ARRAY_REG64(ich_lr13_el2, x1, x3); \
	+ SAVE_ARRAY_REG64(ich_lr14_el2, x1, x3); \
	+ SAVE_ARRAY_REG64(ich_lr15_el2, x1, x3); \
	+9:; \
	+ ;
	+
	+
	+#define SAVE_ARRAY_REG32(reg, dest, remaining) \
	+ cmp remaining, #0; \
	+ beq 9f; \
	+ mrs x7, reg; \
	+ str w7, [dest]; \
	+ add dest, dest, #4; \
	+ sub remaining, remaining, #1;
	+
	+
	+#define SAVE_AP0R_REGS() \
	+ /* Load the number of ICH_AP0R_EL2 regs from memory */ \
	+ mov x2, #HYPCTX_VGIC_ICH_AP0R_NUM; \
	+ ldr x3, [x0, x2]; \
	+ /* x1 holds the destination address */ \
	+ mov x1, #HYPCTX_VGIC_ICH_AP0R_EL2; \
	+ add x1, x0, x1; \
	+ SAVE_ARRAY_REG32(ich_ap0r0_el2, x1, x3); \
	+ SAVE_ARRAY_REG32(ich_ap0r1_el2, x1, x3); \
	+ SAVE_ARRAY_REG32(ich_ap0r2_el2, x1, x3); \
	+ SAVE_ARRAY_REG32(ich_ap0r3_el2, x1, x3); \
	+9:; \
	+ ;
	+
	+
	+#define SAVE_AP1R_REGS() \
	+ /* Load the number of ICH_AP1R_EL2 regs from memory */ \
	+ mov x2, #HYPCTX_VGIC_ICH_AP1R_NUM; \
	+ ldr x3, [x0, x2]; \
	+ /* x1 holds the destination address */ \
	+ mov x1, #HYPCTX_VGIC_ICH_AP1R_EL2; \
	+ add x1, x0, x1; \
	+ SAVE_ARRAY_REG32(ich_ap1r0_el2, x1, x3); \
	+ SAVE_ARRAY_REG32(ich_ap1r1_el2, x1, x3); \
	+ SAVE_ARRAY_REG32(ich_ap1r2_el2, x1, x3); \
	+ SAVE_ARRAY_REG32(ich_ap1r3_el2, x1, x3); \
	+9:; \
	+ ;
	+
	+
	+/*
	+ * The STR and LDR instructions take an offset between [-256, 255], but the
	+ * hypctx register offset can be larger than that. To get around this limitation
	+ * we use a temporary register to hold the offset.
	+ */
	+#define SAVE_SYS_REG64(prefix, reg) \
	+ mrs x1, reg; \
	+ mov x2, prefix ##_ ##reg; \
	+ str x1, [x0, x2];
	+
	+
	+#define SAVE_SYS_REG32(prefix, reg) \
	+ mrs x1, reg; \
	+ mov x2, prefix ##_ ##reg; \
	+ str w1, [x0, x2];
	+
	+
	+#define SAVE_REG(prefix, reg) \
	+ mov x1, prefix ##_ ##reg; \
	+ str reg, [x0, x1];
	+
	+/*
	+ * The STP and LDP instructions takes an immediate in the range of [-512, 504]
	+ * when using the post-indexed addressing mode, but the hypctx register offset
	+ * can be larger than that. To get around this limitation we compute the address
	+ * by adding the hypctx base address with the struct member offset.
	+ *
	+ * Using STP/LDP to save/load register pairs to the corresponding struct hypctx
	+ * variables works because the registers are declared as an array and they are
	+ * stored in contiguous memory addresses.
	+ */
	+
	+#define SAVE_REG_PAIR(prefix, reg0, reg1) \
	+ mov x1, prefix ##_ ##reg0; \
	+ add x1, x0, x1; \
	+ stp reg0, reg1, [x1];
	+
	+
	+/*
	+ * We use x0 to load the hypctx address from TPIDR_EL2 and x1 and x2 as
	+ * temporary registers to compute the hypctx member addresses. To save the guest
	+ * values at first we push them on the stack, use these temporary registers to
	+ * save the rest of the registers and at the end we pop the values from the
	+ * stack and save them.
	+ */
	+#define SAVE_GUEST_X_REGS() \
	+ /* Push x0 */ \
	+ str x0, [sp, #-16]!; \
	+ /* Restore hypctx address */ \
	+ mrs x0, tpidr_el2; \
	+ /* Push x1 and x2 */ \
	+ stp x1, x2, [sp, #-16]!; \
	+ \
	+ /* Save the other registers */ \
	+ SAVE_REG_PAIR(HYPCTX_REGS, X3, X4); \
	+ SAVE_REG_PAIR(HYPCTX_REGS, X5, X6); \
	+ SAVE_REG_PAIR(HYPCTX_REGS, X7, X8); \
	+ SAVE_REG_PAIR(HYPCTX_REGS, X9, X10); \
	+ SAVE_REG_PAIR(HYPCTX_REGS, X11, X12); \
	+ SAVE_REG_PAIR(HYPCTX_REGS, X13, X14); \
	+ SAVE_REG_PAIR(HYPCTX_REGS, X15, X16); \
	+ SAVE_REG_PAIR(HYPCTX_REGS, X17, X18); \
	+ SAVE_REG_PAIR(HYPCTX_REGS, X19, X20); \
	+ SAVE_REG_PAIR(HYPCTX_REGS, X21, X22); \
	+ SAVE_REG_PAIR(HYPCTX_REGS, X23, X24); \
	+ SAVE_REG_PAIR(HYPCTX_REGS, X25, X26); \
	+ SAVE_REG_PAIR(HYPCTX_REGS, X27, X28); \
	+ SAVE_REG(HYPCTX_REGS, X29); \
	+ SAVE_REG(HYPCTX_REGS, LR); \
	+ \
	+ /* Pop and save x1 and x2 */ \
	+ ldp x1, x2, [sp], #16; \
	+ mov x3, #HYPCTX_REGS_X1; \
	+ add x3, x0, x3; \
	+ stp x1, x2, [x3]; \
	+ /* Pop and save x0 */ \
	+ ldr x1, [sp], #16; \
	+ mov x2, #HYPCTX_REGS_X0; \
	+ add x2, x2, x0; \
	+ str x1, [x2];
	+
	+
	+/*
	+ * Save all the guest registers. Start by saving the regular registers first
	+ * because those will be used as temporary registers for accessing the hypctx
	+ * member addresses.
	+ *
	+ * Expecting:
	+ * TPIDR_EL2 - struct hypctx address
	+ *
	+ * After call:
	+ * x0 - struct hypctx address
	+ */
	+#define SAVE_GUEST_REGS() \
	+ SAVE_GUEST_X_REGS(); \
	+ \
	+ SAVE_REG(HYPCTX, FP); \
	+ \
	+ SAVE_SYS_REG32(HYPCTX_VTIMER_CPU, CNTKCTL_EL1); \
	+ SAVE_SYS_REG64(HYPCTX_VTIMER_CPU, CNTV_CVAL_EL0); \
	+ SAVE_SYS_REG32(HYPCTX_VTIMER_CPU, CNTV_CTL_EL0);\
	+ \
	+ /* \
	+ * ICH_EISR_EL2, ICH_ELRSR_EL2 and ICH_MISR_EL2 are read-only and are \
	+ * saved because they are modified by the hardware as part of the \
	+ * interrupt virtualization process and we need to inspect them in \
	+ * the VGIC driver. \
	+ */ \
	+ SAVE_SYS_REG32(HYPCTX_VGIC, ICH_EISR_EL2); \
	+ SAVE_SYS_REG32(HYPCTX_VGIC, ICH_ELRSR_EL2); \
	+ SAVE_SYS_REG32(HYPCTX_VGIC, ICH_MISR_EL2); \
	+ SAVE_SYS_REG32(HYPCTX_VGIC, ICH_HCR_EL2); \
	+ SAVE_SYS_REG32(HYPCTX_VGIC, ICH_VMCR_EL2); \
	+ \
	+ SAVE_LR_REGS(); \
	+ SAVE_AP0R_REGS(); \
	+ SAVE_AP1R_REGS(); \
	+ \
	+ /* Save the stack pointer. */ \
	+ mrs x1, sp_el1; \
	+ mov x2, #HYPCTX_REGS_SP; \
	+ str x1, [x0, x2]; \
	+ \
	+ SAVE_SYS_REG64(HYPCTX, ACTLR_EL1); \
	+ SAVE_SYS_REG64(HYPCTX, AFSR0_EL1); \
	+ SAVE_SYS_REG64(HYPCTX, AFSR1_EL1); \
	+ SAVE_SYS_REG64(HYPCTX, AMAIR_EL1); \
	+ SAVE_SYS_REG64(HYPCTX, CONTEXTIDR_EL1); \
	+ SAVE_SYS_REG64(HYPCTX, CPACR_EL1); \
	+ SAVE_SYS_REG64(HYPCTX, ELR_EL1); \
	+ SAVE_SYS_REG64(HYPCTX, ESR_EL1); \
	+ SAVE_SYS_REG64(HYPCTX, FAR_EL1); \
	+ SAVE_SYS_REG64(HYPCTX, MAIR_EL1); \
	+ SAVE_SYS_REG64(HYPCTX, PAR_EL1); \
	+ SAVE_SYS_REG64(HYPCTX, SCTLR_EL1); \
	+ SAVE_SYS_REG64(HYPCTX, SP_EL0); \
	+ SAVE_SYS_REG64(HYPCTX, TCR_EL1); \
	+ SAVE_SYS_REG64(HYPCTX, TPIDR_EL0); \
	+ SAVE_SYS_REG64(HYPCTX, TPIDRRO_EL0); \
	+ SAVE_SYS_REG64(HYPCTX, TPIDR_EL1); \
	+ SAVE_SYS_REG64(HYPCTX, TTBR0_EL1); \
	+ SAVE_SYS_REG64(HYPCTX, TTBR1_EL1); \
	+ SAVE_SYS_REG64(HYPCTX, VBAR_EL1); \
	+ \
	+ SAVE_SYS_REG32(HYPCTX, SPSR_EL1); \
	+ \
	+ SAVE_SYS_REG64(HYPCTX, CPTR_EL2); \
	+ SAVE_SYS_REG64(HYPCTX, ELR_EL2); \
	+ SAVE_SYS_REG64(HYPCTX, HCR_EL2); \
	+ SAVE_SYS_REG64(HYPCTX, VPIDR_EL2); \
	+ SAVE_SYS_REG64(HYPCTX, VMPIDR_EL2); \
	+ SAVE_SYS_REG32(HYPCTX, SPSR_EL2);
	+
	+
	+#define SAVE_GUEST_VFP_REGS() \
	+ SAVE_REG_PAIR(HYPCTX_VFPSTATE, Q0, Q1); \
	+ SAVE_REG_PAIR(HYPCTX_VFPSTATE, Q2, Q3); \
	+ SAVE_REG_PAIR(HYPCTX_VFPSTATE, Q4, Q5); \
	+ SAVE_REG_PAIR(HYPCTX_VFPSTATE, Q6, Q7); \
	+ SAVE_REG_PAIR(HYPCTX_VFPSTATE, Q8, Q9); \
	+ SAVE_REG_PAIR(HYPCTX_VFPSTATE, Q10, Q11); \
	+ SAVE_REG_PAIR(HYPCTX_VFPSTATE, Q12, Q13); \
	+ SAVE_REG_PAIR(HYPCTX_VFPSTATE, Q14, Q15); \
	+ SAVE_REG_PAIR(HYPCTX_VFPSTATE, Q16, Q17); \
	+ SAVE_REG_PAIR(HYPCTX_VFPSTATE, Q18, Q19); \
	+ SAVE_REG_PAIR(HYPCTX_VFPSTATE, Q20, Q21); \
	+ SAVE_REG_PAIR(HYPCTX_VFPSTATE, Q22, Q23); \
	+ SAVE_REG_PAIR(HYPCTX_VFPSTATE, Q24, Q25); \
	+ SAVE_REG_PAIR(HYPCTX_VFPSTATE, Q26, Q27); \
	+ SAVE_REG_PAIR(HYPCTX_VFPSTATE, Q28, Q29); \
	+ SAVE_REG_PAIR(HYPCTX_VFPSTATE, Q30, Q31); \
	+ \
	+ SAVE_SYS_REG32(HYPCTX_VFPSTATE, FPCR); \
	+ SAVE_SYS_REG32(HYPCTX_VFPSTATE, FPSR);
	+
	+
	+/* See SAVE_SYS_REG */
	+#define LOAD_SYS_REG64(prefix, reg) \
	+ mov x1, prefix ##_ ##reg; \
	+ ldr x2, [x0, x1]; \
	+ msr reg, x2;
	+
	+
	+#define LOAD_SYS_REG32(prefix, reg) \
	+ mov x1, prefix ##_ ##reg; \
	+ ldr w2, [x0, x1]; \
	+ msr reg, x2;
	+
	+
	+/* See SAVE_REG_PAIR */
	+#define LOAD_REG_PAIR(prefix, reg0, reg1) \
	+ mov x1, prefix ##_ ##reg0; \
	+ add x1, x0, x1; \
	+ ldp reg0, reg1, [x1];
	+
	+
	+#define LOAD_GUEST_VFP_REGS() \
	+ LOAD_REG_PAIR(HYPCTX_VFPSTATE, Q0, Q1); \
	+ LOAD_REG_PAIR(HYPCTX_VFPSTATE, Q2, Q3); \
	+ LOAD_REG_PAIR(HYPCTX_VFPSTATE, Q4, Q5); \
	+ LOAD_REG_PAIR(HYPCTX_VFPSTATE, Q6, Q7); \
	+ LOAD_REG_PAIR(HYPCTX_VFPSTATE, Q8, Q9); \
	+ LOAD_REG_PAIR(HYPCTX_VFPSTATE, Q10, Q11); \
	+ LOAD_REG_PAIR(HYPCTX_VFPSTATE, Q12, Q13); \
	+ LOAD_REG_PAIR(HYPCTX_VFPSTATE, Q14, Q15); \
	+ LOAD_REG_PAIR(HYPCTX_VFPSTATE, Q16, Q17); \
	+ LOAD_REG_PAIR(HYPCTX_VFPSTATE, Q18, Q19); \
	+ LOAD_REG_PAIR(HYPCTX_VFPSTATE, Q20, Q21); \
	+ LOAD_REG_PAIR(HYPCTX_VFPSTATE, Q22, Q23); \
	+ LOAD_REG_PAIR(HYPCTX_VFPSTATE, Q24, Q25); \
	+ LOAD_REG_PAIR(HYPCTX_VFPSTATE, Q26, Q27); \
	+ LOAD_REG_PAIR(HYPCTX_VFPSTATE, Q28, Q29); \
	+ LOAD_REG_PAIR(HYPCTX_VFPSTATE, Q30, Q31); \
	+ \
	+ LOAD_SYS_REG32(HYPCTX_VFPSTATE, FPCR); \
	+ LOAD_SYS_REG32(HYPCTX_VFPSTATE, FPSR);
	+
	+
	+#define LOAD_REG(prefix, reg) \
	+ mov x1, prefix ##_ ##reg; \
	+ ldr reg, [x0, x1];
	+
	+
	+/*
	+ * We use x1 as a temporary register to store the hypctx member offset and x0
	+ * to hold the hypctx address. We load the guest x0 and x1 register values in
	+ * registers x2 and x3, push x2 and x3 on the stack and then we restore x0 and
	+ * x1.
	+ */
	+#define LOAD_GUEST_X_REGS() \
	+ mov x1, #HYPCTX_REGS_X0; \
	+ /* x1 now holds the address of hypctx reg x0 */ \
	+ add x1, x1, x0; \
	+ /* Make x2 = guest x0 and x3 = guest x1 */ \
	+ ldp x2, x3, [x1]; \
	+ stp x2, x3, [sp, #-16]!; \
	+ \
	+ /* Load the other registers */ \
	+ LOAD_REG_PAIR(HYPCTX_REGS, X2, X3); \
	+ LOAD_REG_PAIR(HYPCTX_REGS, X4, X5); \
	+ LOAD_REG_PAIR(HYPCTX_REGS, X6, X7); \
	+ LOAD_REG_PAIR(HYPCTX_REGS, X8, X9); \
	+ LOAD_REG_PAIR(HYPCTX_REGS, X10, X11); \
	+ LOAD_REG_PAIR(HYPCTX_REGS, X12, X13); \
	+ LOAD_REG_PAIR(HYPCTX_REGS, X14, X15); \
	+ LOAD_REG_PAIR(HYPCTX_REGS, X16, X17); \
	+ LOAD_REG_PAIR(HYPCTX_REGS, X18, X19); \
	+ LOAD_REG_PAIR(HYPCTX_REGS, X20, X21); \
	+ LOAD_REG_PAIR(HYPCTX_REGS, X22, X23); \
	+ LOAD_REG_PAIR(HYPCTX_REGS, X24, X25); \
	+ LOAD_REG_PAIR(HYPCTX_REGS, X26, X27); \
	+ LOAD_REG_PAIR(HYPCTX_REGS, X28, X29); \
	+ LOAD_REG(HYPCTX_REGS, LR); \
	+ \
	+ /* Pop guest x0 and x1 from the stack */ \
	+ ldp x0, x1, [sp], #16; \
	+
	+
	+#define LOAD_ARRAY_REG64(reg, src, remaining) \
	+ cmp remaining, #0; \
	+ beq 9f; \
	+ ldr x2, [src]; \
	+ msr reg, x2; \
	+ add src, src, #8; \
	+ sub remaining, remaining, #1;
	+
	+
	+#define LOAD_LR_REGS(); \
	+ /* Load the number of ICH_LR_EL2 regs from memory */ \
	+ mov x2, #HYPCTX_VGIC_ICH_LR_NUM; \
	+ ldr x3, [x0, x2]; \
	+ mov x1, #HYPCTX_VGIC_ICH_LR_EL2; \
	+ /* x1 holds the load address */ \
	+ add x1, x0, x1; \
	+ LOAD_ARRAY_REG64(ich_lr0_el2, x1, x3); \
	+ LOAD_ARRAY_REG64(ich_lr1_el2, x1, x3); \
	+ LOAD_ARRAY_REG64(ich_lr2_el2, x1, x3); \
	+ LOAD_ARRAY_REG64(ich_lr3_el2, x1, x3); \
	+ LOAD_ARRAY_REG64(ich_lr4_el2, x1, x3); \
	+ LOAD_ARRAY_REG64(ich_lr5_el2, x1, x3); \
	+ LOAD_ARRAY_REG64(ich_lr6_el2, x1, x3); \
	+ LOAD_ARRAY_REG64(ich_lr7_el2, x1, x3); \
	+ LOAD_ARRAY_REG64(ich_lr8_el2, x1, x3); \
	+ LOAD_ARRAY_REG64(ich_lr9_el2, x1, x3); \
	+ LOAD_ARRAY_REG64(ich_lr10_el2, x1, x3); \
	+ LOAD_ARRAY_REG64(ich_lr11_el2, x1, x3); \
	+ LOAD_ARRAY_REG64(ich_lr12_el2, x1, x3); \
	+ LOAD_ARRAY_REG64(ich_lr13_el2, x1, x3); \
	+ LOAD_ARRAY_REG64(ich_lr14_el2, x1, x3); \
	+ LOAD_ARRAY_REG64(ich_lr15_el2, x1, x3); \
	+9:; \
	+ ;
	+
	+
	+#define LOAD_ARRAY_REG32(reg, src, remaining) \
	+ cmp remaining, #0; \
	+ beq 9f; \
	+ ldr w2, [src]; \
	+ msr reg, x2; \
	+ add src, src, #4; \
	+ sub remaining, remaining, #1;
	+
	+
	+#define LOAD_AP0R_REGS(); \
	+ /* Load the number of ICH_AP0R_EL2 regs from memory */ \
	+ mov x2, #HYPCTX_VGIC_ICH_AP0R_NUM; \
	+ ldr x3, [x0, x2]; \
	+ /* x1 holds the load address */ \
	+ mov x1, #HYPCTX_VGIC_ICH_AP0R_EL2; \
	+ add x1, x0, x1; \
	+ LOAD_ARRAY_REG32(ich_ap0r0_el2, x1, x3); \
	+ LOAD_ARRAY_REG32(ich_ap0r1_el2, x1, x3); \
	+ LOAD_ARRAY_REG32(ich_ap0r2_el2, x1, x3); \
	+ LOAD_ARRAY_REG32(ich_ap0r3_el2, x1, x3); \
	+9:; \
	+ ;
	+
	+
	+#define LOAD_AP1R_REGS(); \
	+ /* Load the number of ICH_AP1R_EL2 regs from memory */ \
	+ mov x2, #HYPCTX_VGIC_ICH_AP1R_NUM; \
	+ ldr x3, [x0, x2]; \
	+ /* x1 holds the load address */ \
	+ mov x1, #HYPCTX_VGIC_ICH_AP1R_EL2; \
	+ add x1, x0, x1; \
	+ LOAD_ARRAY_REG32(ich_ap1r0_el2, x1, x3); \
	+ LOAD_ARRAY_REG32(ich_ap1r1_el2, x1, x3); \
	+ LOAD_ARRAY_REG32(ich_ap1r2_el2, x1, x3); \
	+ LOAD_ARRAY_REG32(ich_ap1r3_el2, x1, x3); \
	+9:; \
	+ ;
	+
	+
	+
	+#define KTOHYP_REG(reg) \
	+ mov x7, HYP_KVA_MASK; \
	+ and reg, reg, x7; \
	+ mov x7, HYP_KVA_OFFSET; \
	+ orr reg, reg, x7;
	+
	+
	+/* Load a register from struct hyp hyp member of hypctx. /
	+#define LOAD_HYP_REG(prefix, reg) \
	+ /* Compute VA of hyp member in x1 */ \
	+ mov x1, #HYPCTX_HYP; \
	+ add x1, x1, x0; \
	+ /* Get hyp address in x2 */ \
	+ ldr x2, [x1]; \
	+ /* Transform hyp kernel VA into an EL2 VA */ \
	+ KTOHYP_REG(x2); \
	+ /* Get register offset inside struct hyp */ \
	+ mov x1, prefix ##_ ##reg; \
	+ /* Compute regster address */ \
	+ add x2, x2, x1; \
	+ /* Load the register */ \
	+ ldr x1, [x2]; \
	+ msr reg, x1; \
	+
	+
	+/*
	+ * Restore all the guest registers to their original values.
	+ *
	+ * Expecting:
	+ * x0 - struct hypctx address
	+ *
	+ * After call:
	+ * tpidr_el2 - struct hypctx address
	+ */
	+#define LOAD_GUEST_REGS() \
	+ LOAD_SYS_REG64(HYPCTX, ACTLR_EL1); \
	+ LOAD_SYS_REG64(HYPCTX, AFSR0_EL1); \
	+ LOAD_SYS_REG64(HYPCTX, AFSR1_EL1); \
	+ LOAD_SYS_REG64(HYPCTX, AMAIR_EL1); \
	+ LOAD_SYS_REG64(HYPCTX, CONTEXTIDR_EL1); \
	+ LOAD_SYS_REG64(HYPCTX, CPACR_EL1); \
	+ LOAD_SYS_REG64(HYPCTX, ELR_EL1); \
	+ LOAD_SYS_REG64(HYPCTX, ESR_EL1); \
	+ LOAD_SYS_REG64(HYPCTX, FAR_EL1); \
	+ LOAD_SYS_REG64(HYPCTX, MAIR_EL1); \
	+ LOAD_SYS_REG64(HYPCTX, PAR_EL1); \
	+ LOAD_SYS_REG64(HYPCTX, SCTLR_EL1); \
	+ LOAD_SYS_REG64(HYPCTX, SP_EL0); \
	+ LOAD_SYS_REG64(HYPCTX, TCR_EL1); \
	+ LOAD_SYS_REG64(HYPCTX, TPIDR_EL0); \
	+ LOAD_SYS_REG64(HYPCTX, TPIDRRO_EL0); \
	+ LOAD_SYS_REG64(HYPCTX, TPIDR_EL1); \
	+ LOAD_SYS_REG64(HYPCTX, TTBR0_EL1); \
	+ LOAD_SYS_REG64(HYPCTX, TTBR1_EL1); \
	+ LOAD_SYS_REG64(HYPCTX, VBAR_EL1); \
	+ LOAD_SYS_REG32(HYPCTX, SPSR_EL1); \
	+ \
	+ LOAD_SYS_REG64(HYPCTX, CPTR_EL2); \
	+ LOAD_SYS_REG64(HYPCTX, ELR_EL2); \
	+ LOAD_SYS_REG64(HYPCTX, HCR_EL2); \
	+ LOAD_SYS_REG64(HYPCTX, VPIDR_EL2); \
	+ LOAD_SYS_REG64(HYPCTX, VMPIDR_EL2); \
	+ LOAD_SYS_REG32(HYPCTX, SPSR_EL2); \
	+ \
	+ LOAD_SYS_REG32(HYPCTX_VGIC, ICH_HCR_EL2); \
	+ LOAD_SYS_REG32(HYPCTX_VGIC, ICH_VMCR_EL2); \
	+ \
	+ LOAD_SYS_REG32(HYPCTX_VTIMER_CPU, CNTKCTL_EL1); \
	+ LOAD_SYS_REG64(HYPCTX_VTIMER_CPU, CNTV_CVAL_EL0); \
	+ LOAD_SYS_REG32(HYPCTX_VTIMER_CPU, CNTV_CTL_EL0); \
	+ \
	+ LOAD_REG(HYPCTX, FP); \
	+ \
	+ LOAD_HYP_REG(HYP, VTTBR_EL2); \
	+ LOAD_HYP_REG(HYP_VTIMER, CNTHCTL_EL2); \
	+ LOAD_HYP_REG(HYP_VTIMER, CNTVOFF_EL2); \
	+ \
	+ LOAD_LR_REGS(); \
	+ LOAD_AP0R_REGS(); \
	+ LOAD_AP1R_REGS(); \
	+ \
	+ /* Load the guest EL1 stack pointer */ \
	+ mov x1, #HYPCTX_REGS_SP; \
	+ add x1, x1, x0; \
	+ ldr x2, [x1]; \
	+ msr sp_el1, x2; \
	+ \
	+ LOAD_GUEST_X_REGS(); \
	+
	+
	+/*
	+ * Save exit information
	+ *
	+ * Expecting:
	+ * x0 - struct hypctx address
	+ */
	+#define SAVE_EXIT_INFO() \
	+ SAVE_SYS_REG64(HYPCTX_EXIT_INFO, ESR_EL2); \
	+ SAVE_SYS_REG64(HYPCTX_EXIT_INFO, FAR_EL2); \
	+ SAVE_SYS_REG64(HYPCTX_EXIT_INFO, HPFAR_EL2); \
	+
	+#endif /* !_VMM_HYP_MACROS_H_ */
	Index: sys/arm64/vmm/io/vgic_v3.h
	===================================================================
	--- /dev/null
	+++ sys/arm64/vmm/io/vgic_v3.h
	@@ -0,0 +1,166 @@
	+/*
	+ * Copyright (C) 2015 Mihai Carabas <mihai.carabas@gmail.com>
	+ * All rights reserved.
	+ *
	+ * Redistribution and use in source and binary forms, with or without
	+ * modification, are permitted provided that the following conditions
	+ * are met:
	+ * 1. Redistributions of source code must retain the above copyright
	+ * notice, this list of conditions and the following disclaimer.
	+ * 2. Redistributions in binary form must reproduce the above copyright
	+ * notice, this list of conditions and the following disclaimer in the
	+ * documentation and/or other materials provided with the distribution.
	+ *
	+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
	+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
	+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	+ * SUCH DAMAGE.
	+ */
	+
	+#ifndef _VMM_VGIC_V3_H_
	+#define _VMM_VGIC_V3_H_
	+
	+#include <sys/cdefs.h>
	+#include <sys/param.h>
	+#include <sys/systm.h>
	+#include <sys/kernel.h>
	+#include <sys/module.h>
	+#include <sys/bus.h>
	+
	+#include <machine/vmm_instruction_emul.h>
	+
	+#include <arm64/arm64/gic_v3_reg.h>
	+#include <arm/arm/gic_common.h>
	+
	+#define VGIC_SGI_NUM (GIC_LAST_SGI - GIC_FIRST_SGI + 1)
	+#define VGIC_PPI_NUM (GIC_LAST_PPI - GIC_FIRST_PPI + 1)
	+#define VGIC_SPI_NUM (GIC_LAST_SPI - GIC_FIRST_SPI + 1)
	+#define VGIC_PRV_I_NUM (VGIC_SGI_NUM + VGIC_PPI_NUM)
	+#define VGIC_SHR_I_NUM (VGIC_SPI_NUM)
	+
	+#define VGIC_ICH_LR_NUM_MAX 16
	+#define VGIC_ICH_AP0R_NUM_MAX 4
	+#define VGIC_ICH_AP1R_NUM_MAX VGIC_ICH_AP0R_NUM_MAX
	+
	+/* Order matters, a lower value means a higher precedence */
	+enum vgic_v3_irqtype {
	+ VGIC_IRQ_MAXPRIO,
	+ VGIC_IRQ_CLK,
	+ VGIC_IRQ_VIRTIO,
	+ VGIC_IRQ_MISC,
	+ VGIC_IRQ_INVALID,
	+};
	+
	+struct vgic_mmio_region {
	+ vm_offset_t start;
	+ vm_offset_t end;
	+ mem_region_read_t read;
	+ mem_region_write_t write;
	+};
	+
	+struct vm;
	+struct vm_exit;
	+struct hyp;
	+
	+struct vgic_v3_dist {
	+ struct mtx dist_mtx;
	+
	+ uint64_t start;
	+ size_t end;
	+ size_t nirqs;
	+
	+ uint32_t gicd_ctlr; /* Distributor Control Register */
	+ uint32_t gicd_typer; /* Interrupt Controller Type Register */
	+ uint32_t gicd_pidr2; /* Distributor Peripheral ID2 Register */
	+ /* Interrupt Configuration Registers. */
	+ uint32_t *gicd_icfgr;
	+ /* Interrupt Priority Registers. */
	+ uint32_t *gicd_ipriorityr;
	+ /* Interrupt Routing Registers. */
	+ uint64_t *gicd_irouter;
	+ /* Interrupt Clear-Enable and Set-Enable Registers. */
	+ uint32_t *gicd_ixenabler;
	+};
	+
	+#define aff_routing_en(distp) (distp->gicd_ctlr & GICD_CTLR_ARE_NS)
	+
	+struct vgic_v3_redist {
	+ uint64_t start;
	+ uint64_t end;
	+
	+ uint64_t gicr_typer; /* Redistributor Type Register */
	+ uint32_t gicr_ctlr; /* Redistributor Control Regiser */
	+ uint32_t gicr_ixenabler0;
	+ /* Interrupt Priority Registers. */
	+ uint32_t gicr_ipriorityr[VGIC_PRV_I_NUM / 4];
	+ /* Interupt Configuration Registers */
	+ uint32_t gicr_icfgr0, gicr_icfgr1;
	+};
	+
	+struct vgic_v3_irq;
	+struct vgic_v3_cpu_if {
	+ uint32_t ich_eisr_el2; /* End of Interrupt Status Register */
	+ uint32_t ich_elrsr_el2; /* Empty List register Status Register (ICH_ELRSR_EL2) */
	+ uint32_t ich_hcr_el2; /* Hyp Control Register */
	+ uint32_t ich_misr_el2; /* Maintenance Interrupt State Register */
	+ uint32_t ich_vmcr_el2; /* Virtual Machine Control Register */
	+
	+ /*
	+ * The List Registers are part of the VM context and are modified on a
	+ * world switch. They need to be allocated statically so they are
	+ * mapped in the EL2 translation tables when struct hypctx is mapped.
	+ */
	+ uint64_t ich_lr_el2[VGIC_ICH_LR_NUM_MAX];
	+ size_t ich_lr_num;
	+
	+ /*
	+ * We need a mutex for accessing the list registers because they are
	+ * modified asynchronously by the virtual timer.
	+ *
	+ * Note that the mutex MUST be a spin mutex because an interrupt can
	+ * be injected by a callout callback function, thereby modifying the
	+ * list registers from a context where sleeping is forbidden.
	+ */
	+ struct mtx lr_mtx;
	+
	+ /* Active Priorities Registers for Group 0 and 1 interrupts */
	+ uint32_t ich_ap0r_el2[VGIC_ICH_AP0R_NUM_MAX];
	+ size_t ich_ap0r_num;
	+ uint32_t ich_ap1r_el2[VGIC_ICH_AP1R_NUM_MAX];
	+ size_t ich_ap1r_num;
	+
	+ struct vgic_v3_irq *irqbuf;
	+ size_t irqbuf_size;
	+ size_t irqbuf_num;
	+};
	+
	+int vgic_v3_attach_to_vm(void *arg, uint64_t dist_start, size_t dist_size,
	+ uint64_t redist_start, size_t redist_size);
	+void vgic_v3_detach_from_vm(void *arg);
	+void vgic_v3_init(uint64_t ich_vtr_el2);
	+void vgic_v3_vminit(void *arg);
	+void vgic_v3_cpuinit(void *arg, bool last_vcpu);
	+void vgic_v3_sync_hwstate(void *arg);
	+
	+void vgic_v3_mmio_init(struct hyp *hyp);
	+void vgic_v3_mmio_destroy(struct hyp *hyp);
	+
	+int vgic_v3_vcpu_pending_irq(void *arg);
	+int vgic_v3_inject_irq(void *arg, uint32_t irq,
	+ enum vgic_v3_irqtype irqtype);
	+int vgic_v3_remove_irq(void *arg, uint32_t irq, bool ignore_state);
	+
	+void vgic_v3_group_toggle_enabled(bool enabled, struct hyp *hyp);
	+int vgic_v3_irq_toggle_enabled(uint32_t irq, bool enabled,
	+ struct hyp *hyp, int vcpuid);
	+
	+DECLARE_CLASS(arm_vgic_driver);
	+
	+#endif /* !_VMM_VGIC_V3_H_ */
	Index: sys/arm64/vmm/io/vgic_v3.c
	===================================================================
	--- /dev/null
	+++ sys/arm64/vmm/io/vgic_v3.c
	@@ -0,0 +1,983 @@
	+/*
	+ * Copyright (C) 2018 Alexandru Elisei <alexandru.elisei@gmail.com>
	+ * All rights reserved.
	+ *
	+ * Redistribution and use in source and binary forms, with or without
	+ * modification, are permitted provided that the following conditions
	+ * are met:
	+ * 1. Redistributions of source code must retain the above copyright
	+ * notice, this list of conditions and the following disclaimer.
	+ * 2. Redistributions in binary form must reproduce the above copyright
	+ * notice, this list of conditions and the following disclaimer in the
	+ * documentation and/or other materials provided with the distribution.
	+ *
	+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
	+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
	+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	+ * SUCH DAMAGE.
	+ */
	+
	+#include <sys/cdefs.h>
	+
	+#include <sys/types.h>
	+#include <sys/errno.h>
	+#include <sys/systm.h>
	+#include <sys/bus.h>
	+#include <sys/rman.h>
	+#include <sys/malloc.h>
	+#include <sys/mutex.h>
	+#include <sys/smp.h>
	+#include <sys/bitstring.h>
	+
	+#include <vm/vm.h>
	+#include <vm/pmap.h>
	+
	+#include <dev/ofw/openfirm.h>
	+
	+#include <machine/bus.h>
	+#include <machine/bitops.h>
	+#include <machine/cpufunc.h>
	+#include <machine/cpu.h>
	+#include <machine/param.h>
	+#include <machine/pmap.h>
	+#include <machine/vmparam.h>
	+#include <machine/intr.h>
	+#include <machine/vmm.h>
	+#include <machine/vmm_instruction_emul.h>
	+
	+#include <arm/arm/gic_common.h>
	+#include <arm/arm/generic_timer.h>
	+#include <arm64/arm64/gic_v3_reg.h>
	+#include <arm64/arm64/gic_v3_var.h>
	+
	+#include <arm64/vmm/hyp.h>
	+#include <arm64/vmm/mmu.h>
	+#include <arm64/vmm/arm64.h>
	+
	+#include "vgic_v3.h"
	+#include "vgic_v3_reg.h"
	+
	+#define VGIC_V3_DEVNAME "vgic"
	+#define VGIC_V3_DEVSTR "ARM Virtual Generic Interrupt Controller v3"
	+
	+#define RES0 0UL
	+
	+#define IRQBUF_SIZE_MIN 32
	+#define IRQBUF_SIZE_MAX (1 << 10)
	+
	+#define IRQ_SCHEDULED (GIC_LAST_SPI + 1)
	+
	+#define lr_pending(lr) \
	+ (ICH_LR_EL2_STATE(lr) == ICH_LR_EL2_STATE_PENDING)
	+#define lr_inactive(lr) \
	+ (ICH_LR_EL2_STATE(lr) == ICH_LR_EL2_STATE_INACTIVE)
	+#define lr_active(lr) \
	+ (ICH_LR_EL2_STATE(lr) == ICH_LR_EL2_STATE_ACTIVE)
	+#define lr_pending_active(lr) \
	+ (ICH_LR_EL2_STATE(lr) == ICH_LR_EL2_STATE_PENDING_ACTIVE)
	+#define lr_not_active(lr) (!lr_active(lr) && !lr_pending_active(lr))
	+
	+#define lr_clear_irq(lr) ((lr) &= ~ICH_LR_EL2_STATE_MASK)
	+
	+MALLOC_DEFINE(M_VGIC_V3, "ARM VMM VGIC V3", "ARM VMM VGIC V3");
	+
	+struct vgic_v3_virt_features {
	+ uint8_t min_prio;
	+ size_t ich_lr_num;
	+ size_t ich_ap0r_num;
	+ size_t ich_ap1r_num;
	+};
	+
	+struct vgic_v3_ro_regs {
	+ uint32_t gicd_icfgr0;
	+ uint32_t gicd_pidr2;
	+ uint32_t gicd_typer;
	+};
	+
	+struct vgic_v3_irq {
	+ uint32_t irq;
	+ enum vgic_v3_irqtype irqtype;
	+ uint8_t enabled;
	+ uint8_t priority;
	+};
	+
	+#define vip_to_lr(vip, lr) \
	+do { \
	+ lr = ICH_LR_EL2_STATE_PENDING; \
	+ lr \|= ICH_LR_EL2_GROUP1; \
	+ lr \|= (uint64_t)vip->priority << ICH_LR_EL2_PRIO_SHIFT; \
	+ lr \|= vip->irq; \
	+} while (0)
	+
	+#define lr_to_vip(lr, vip) \
	+do { \
	+ (vip)->irq = ICH_LR_EL2_VINTID(lr); \
	+ (vip)->priority = \
	+ (uint8_t)(((lr) & ICH_LR_EL2_PRIO_MASK) >> ICH_LR_EL2_PRIO_SHIFT); \
	+} while (0)
	+
	+static struct vgic_v3_virt_features virt_features;
	+static struct vgic_v3_ro_regs ro_regs;
	+
	+static struct gic_v3_softc *gic_sc;
	+
	+void
	+vgic_v3_cpuinit(void *arg, bool last_vcpu)
	+{
	+ struct hypctx *hypctx = arg;
	+ struct vgic_v3_cpu_if *cpu_if = &hypctx->vgic_cpu_if;
	+ struct vgic_v3_redist *redist = &hypctx->vgic_redist;
	+ uint64_t aff, vmpidr_el2;
	+ int i;
	+
	+ vmpidr_el2 = hypctx->vmpidr_el2;
	+ KASSERT(vmpidr_el2 != 0,
	+ ("Trying to init this CPU's vGIC before the vCPU"));
	+ /*
	+ * Get affinity for the current CPU. The guest CPU affinity is taken
	+ * from VMPIDR_EL2. The Redistributor corresponding to this CPU is
	+ * the Redistributor with the same affinity from GICR_TYPER.
	+ */
	+ aff = (CPU_AFF3(vmpidr_el2) << 24) \| (CPU_AFF2(vmpidr_el2) << 16) \|
	+ (CPU_AFF1(vmpidr_el2) << 8) \| CPU_AFF0(vmpidr_el2);
	+
	+ /* Set up GICR_TYPER. */
	+ redist->gicr_typer = aff << GICR_TYPER_AFF_SHIFT;
	+ /* Redistributor doesn't support virtual or physical LPIS. */
	+ redist->gicr_typer &= ~GICR_TYPER_VLPIS;
	+ redist->gicr_typer &= ~GICR_TYPER_PLPIS;
	+
	+ if (last_vcpu)
	+ /* Mark the last Redistributor */
	+ redist->gicr_typer \|= GICR_TYPER_LAST;
	+
	+ /*
	+ * Configure the Redistributor Control Register.
	+ *
	+ * ~GICR_CTLR_LPI_ENABLE: LPIs are disabled
	+ */
	+ redist->gicr_ctlr = 0 & ~GICR_CTLR_LPI_ENABLE;
	+
	+ mtx_init(&cpu_if->lr_mtx, "VGICv3 ICH_LR_EL2 lock", NULL, MTX_SPIN);
	+
	+ /*
	+ * Configure the Interrupt Controller Hyp Control Register.
	+ *
	+ * ICH_HCR_EL2_En: enable virtual CPU interface.
	+ *
	+ * Maintenance interrupts are disabled.
	+ */
	+ cpu_if->ich_hcr_el2 = ICH_HCR_EL2_En;
	+
	+ /*
	+ * Configure the Interrupt Controller Virtual Machine Control Register.
	+ *
	+ * ICH_VMCR_EL2_VPMR: lowest priority mask for the VCPU interface
	+ * ICH_VMCR_EL2_VBPR1_NO_PREEMPTION: disable interrupt preemption for
	+ * Group 1 interrupts
	+ * ICH_VMCR_EL2_VBPR0_NO_PREEMPTION: disable interrupt preemption for
	+ * Group 0 interrupts
	+ * ~ICH_VMCR_EL2_VEOIM: writes to EOI registers perform priority drop
	+ * and interrupt deactivation.
	+ * ICH_VMCR_EL2_VENG0: virtual Group 0 interrupts enabled.
	+ * ICH_VMCR_EL2_VENG1: virtual Group 1 interrupts enabled.
	+ */
	+ cpu_if->ich_vmcr_el2 = \
	+ (virt_features.min_prio << ICH_VMCR_EL2_VPMR_SHIFT) \| \
	+ ICH_VMCR_EL2_VBPR1_NO_PREEMPTION \| ICH_VMCR_EL2_VBPR0_NO_PREEMPTION;
	+ cpu_if->ich_vmcr_el2 &= ~ICH_VMCR_EL2_VEOIM;
	+ cpu_if->ich_vmcr_el2 \|= ICH_VMCR_EL2_VENG0 \| ICH_VMCR_EL2_VENG1;
	+
	+ cpu_if->ich_lr_num = virt_features.ich_lr_num;
	+ for (i = 0; i < cpu_if->ich_lr_num; i++)
	+ cpu_if->ich_lr_el2[i] = 0UL;
	+
	+ cpu_if->ich_ap0r_num = virt_features.ich_ap0r_num;
	+ cpu_if->ich_ap1r_num = virt_features.ich_ap1r_num;
	+
	+ cpu_if->irqbuf = malloc(IRQBUF_SIZE_MIN * sizeof(*cpu_if->irqbuf),
	+ M_VGIC_V3, M_WAITOK \| M_ZERO);
	+ cpu_if->irqbuf_size = IRQBUF_SIZE_MIN;
	+ cpu_if->irqbuf_num = 0;
	+}
	+
	+void
	+vgic_v3_vminit(void *arg)
	+{
	+ struct hyp *hyp = arg;
	+ struct vgic_v3_dist *dist = &hyp->vgic_dist;
	+
	+ /*
	+ * Configure the Distributor control register. The register resets to an
	+ * architecturally UNKNOWN value, so we reset to 0 to disable all
	+ * functionality controlled by the register.
	+ *
	+ * The exception is GICD_CTLR.DS, which is RA0/WI when the Distributor
	+ * supports one security state (ARM GIC Architecture Specification for
	+ * GICv3 and GICv4, p. 4-464)
	+ */
	+ dist->gicd_ctlr = GICD_CTLR_DS;
	+
	+ dist->gicd_typer = ro_regs.gicd_typer;
	+ dist->nirqs = GICD_TYPER_I_NUM(dist->gicd_typer);
	+ dist->gicd_pidr2 = ro_regs.gicd_pidr2;
	+
	+ mtx_init(&dist->dist_mtx, "VGICv3 Distributor lock", NULL, MTX_SPIN);
	+}
	+
	+int
	+vgic_v3_attach_to_vm(void *arg, uint64_t dist_start, size_t dist_size,
	+ uint64_t redist_start, size_t redist_size)
	+{
	+ struct hyp *hyp = arg;
	+ struct vgic_v3_dist *dist = &hyp->vgic_dist;
	+ struct vgic_v3_redist *redist;
	+ int i;
	+
	+ /* Set the distributor address and size for trapping guest access. */
	+ dist->start = dist_start;
	+ dist->end = dist_start + dist_size;
	+
	+ for (i = 0; i < VM_MAXCPU; i++) {
	+ redist = &hyp->ctx[i].vgic_redist;
	+ /* Set the redistributor address and size. */
	+ redist->start = redist_start;
	+ redist->end = redist_start + redist_size;
	+ }
	+ vgic_v3_mmio_init(hyp);
	+
	+ hyp->vgic_attached = true;
	+
	+ return (0);
	+}
	+
	+void
	+vgic_v3_detach_from_vm(void *arg)
	+{
	+ struct hyp *hyp;
	+ struct hypctx *hypctx;
	+ struct vgic_v3_cpu_if *cpu_if;
	+ int i;
	+
	+ hyp = arg;
	+
	+ for (i = 0; i < VM_MAXCPU; i++) {
	+ hypctx = & hyp->ctx[i];
	+ cpu_if = &hypctx->vgic_cpu_if;
	+ free(cpu_if->irqbuf, M_VGIC_V3);
	+ }
	+
	+ vgic_v3_mmio_destroy(hyp);
	+}
	+
	+int
	+vgic_v3_vcpu_pending_irq(void *arg)
	+{
	+ struct hypctx *hypctx = arg;
	+ struct vgic_v3_cpu_if *cpu_if = &hypctx->vgic_cpu_if;
	+
	+ return (cpu_if->irqbuf_num);
	+}
	+
	+/* Removes ALL instances of interrupt 'irq' */
	+static int
	+vgic_v3_irqbuf_remove_nolock(uint32_t irq, struct vgic_v3_cpu_if *cpu_if)
	+{
	+ size_t dest = 0;
	+ size_t from = cpu_if->irqbuf_num;
	+
	+ while (dest < cpu_if->irqbuf_num) {
	+ if (cpu_if->irqbuf[dest].irq == irq) {
	+ for (from = dest + 1; from < cpu_if->irqbuf_num; from++) {
	+ if (cpu_if->irqbuf[from].irq == irq)
	+ continue;
	+ cpu_if->irqbuf[dest++] = cpu_if->irqbuf[from];
	+ }
	+ cpu_if->irqbuf_num = dest;
	+ } else {
	+ dest++;
	+ }
	+ }
	+
	+ return (from - dest);
	+}
	+
	+int
	+vgic_v3_remove_irq(void *arg, uint32_t irq, bool ignore_state)
	+{
	+ struct hypctx *hypctx = arg;
	+ struct vgic_v3_cpu_if *cpu_if = &hypctx->vgic_cpu_if;
	+ struct vgic_v3_dist *dist = &hypctx->hyp->vgic_dist;
	+ size_t i;
	+
	+ if (irq >= dist->nirqs) {
	+ eprintf("Malformed IRQ %u.\n", irq);
	+ return (1);
	+ }
	+
	+ mtx_lock_spin(&cpu_if->lr_mtx);
	+
	+ for (i = 0; i < cpu_if->ich_lr_num; i++) {
	+ if (ICH_LR_EL2_VINTID(cpu_if->ich_lr_el2[i]) == irq &&
	+ (lr_not_active(cpu_if->ich_lr_el2[i]) \|\| ignore_state))
	+ lr_clear_irq(cpu_if->ich_lr_el2[i]);
	+ }
	+ vgic_v3_irqbuf_remove_nolock(irq, cpu_if);
	+
	+ mtx_unlock_spin(&cpu_if->lr_mtx);
	+
	+ return (0);
	+}
	+
	+static struct vgic_v3_irq *
	+vgic_v3_irqbuf_add_nolock(struct vgic_v3_cpu_if *cpu_if)
	+{
	+ struct vgic_v3_irq new_irqbuf, old_irqbuf;
	+ size_t new_size;
	+
	+ if (cpu_if->irqbuf_num == cpu_if->irqbuf_size) {
	+ /* Double the size of the buffered interrupts list */
	+ new_size = cpu_if->irqbuf_size << 1;
	+ if (new_size > IRQBUF_SIZE_MAX)
	+ return (NULL);
	+
	+ new_irqbuf = NULL;
	+ /* TODO: malloc sleeps here and causes a panic */
	+ while (new_irqbuf == NULL)
	+ new_irqbuf = malloc(new_size * sizeof(*cpu_if->irqbuf),
	+ M_VGIC_V3, M_NOWAIT \| M_ZERO);
	+ memcpy(new_irqbuf, cpu_if->irqbuf,
	+ cpu_if->irqbuf_size * sizeof(*cpu_if->irqbuf));
	+
	+ old_irqbuf = cpu_if->irqbuf;
	+ cpu_if->irqbuf = new_irqbuf;
	+ cpu_if->irqbuf_size = new_size;
	+ free(old_irqbuf, M_VGIC_V3);
	+ }
	+
	+ cpu_if->irqbuf_num++;
	+
	+ return (&cpu_if->irqbuf[cpu_if->irqbuf_num - 1]);
	+}
	+
	+static bool
	+vgic_v3_int_target(uint32_t irq, struct hypctx *hypctx)
	+{
	+ struct vgic_v3_dist *dist = &hypctx->hyp->vgic_dist;
	+ struct vgic_v3_redist *redist = &hypctx->vgic_redist;
	+ uint64_t irouter;
	+ uint64_t aff;
	+ uint32_t irq_off, irq_mask;
	+ int n;
	+
	+ if (irq <= GIC_LAST_PPI)
	+ return (true);
	+
	+ /* XXX Affinity routing disabled not implemented */
	+ if (!aff_routing_en(dist))
	+ return (true);
	+
	+ irq_off = irq % 32;
	+ irq_mask = 1 << irq_off;
	+ n = irq / 32;
	+
	+ irouter = dist->gicd_irouter[irq];
	+ /* Check if 1-of-N routing is active */
	+ if (irouter & GICD_IROUTER_IRM)
	+ /* Check if the VCPU is participating */
	+ return (redist->gicr_ctlr & GICR_CTLR_DPG1NS ? true : false);
	+
	+ aff = redist->gicr_typer >> GICR_TYPER_AFF_SHIFT;
	+ /* Affinity in format for comparison with irouter */
	+ aff = GICR_TYPER_AFF0(redist->gicr_typer) \| \
	+ (GICR_TYPER_AFF1(redist->gicr_typer) << 8) \| \
	+ (GICR_TYPER_AFF2(redist->gicr_typer) << 16) \| \
	+ (GICR_TYPER_AFF3(redist->gicr_typer) << 32);
	+ if ((irouter & aff) == aff)
	+ return (true);
	+ else
	+ return (false);
	+}
	+
	+static uint8_t
	+vgic_v3_get_priority(uint32_t irq, struct hypctx *hypctx)
	+{
	+ struct vgic_v3_dist *dist = &hypctx->hyp->vgic_dist;
	+ struct vgic_v3_redist *redist = &hypctx->vgic_redist;
	+ size_t n;
	+ uint32_t off, mask;
	+ uint8_t priority;
	+
	+ n = irq / 4;
	+ off = n % 4;
	+ mask = 0xff << off;
	+ /*
	+ * When affinity routing is enabled, the Redistributor is used for
	+ * SGIs and PPIs and the Distributor for SPIs. When affinity routing
	+ * is not enabled, the Distributor registers are used for all
	+ * interrupts.
	+ */
	+ if (aff_routing_en(dist) && (n <= 7))
	+ priority = (redist->gicr_ipriorityr[n] & mask) >> off;
	+ else
	+ priority = (dist->gicd_ipriorityr[n] & mask) >> off;
	+
	+ return (priority);
	+}
	+
	+static bool
	+vgic_v3_intid_enabled(uint32_t irq, struct hypctx *hypctx)
	+{
	+ struct vgic_v3_dist *dist;
	+ struct vgic_v3_redist *redist;
	+ uint32_t irq_off, irq_mask;
	+ int n;
	+
	+ irq_off = irq % 32;
	+ irq_mask = 1 << irq_off;
	+ n = irq / 32;
	+
	+ if (irq <= GIC_LAST_PPI) {
	+ redist = &hypctx->vgic_redist;
	+ if (!(redist->gicr_ixenabler0 & irq_mask))
	+ return (false);
	+ } else {
	+ dist = &hypctx->hyp->vgic_dist;
	+ if (!(dist->gicd_ixenabler[n] & irq_mask))
	+ return (false);
	+ }
	+
	+ return (true);
	+}
	+
	+static inline bool
	+dist_group_enabled(struct vgic_v3_dist *dist)
	+{
	+ return ((dist->gicd_ctlr & GICD_CTLR_G1A) != 0);
	+}
	+
	+int
	+vgic_v3_inject_irq(void *arg, uint32_t irq, enum vgic_v3_irqtype irqtype)
	+{
	+ struct hypctx *hypctx = arg;
	+ struct vgic_v3_dist *dist = &hypctx->hyp->vgic_dist;
	+ struct vgic_v3_cpu_if *cpu_if = &hypctx->vgic_cpu_if;
	+ struct vgic_v3_irq *vip;
	+ int error;
	+ int i;
	+ uint8_t priority;
	+ bool enabled;
	+
	+ KASSERT(irq > GIC_LAST_SGI, ("SGI interrupts not implemented"));
	+
	+ if (irq >= dist->nirqs \|\| irqtype >= VGIC_IRQ_INVALID) {
	+ eprintf("Malformed IRQ %u.\n", irq);
	+ return (1);
	+ }
	+
	+ error = 0;
	+ mtx_lock_spin(&dist->dist_mtx);
	+
	+ enabled = dist_group_enabled(&hypctx->hyp->vgic_dist) &&
	+ vgic_v3_intid_enabled(irq, hypctx) &&
	+ vgic_v3_int_target(irq, hypctx);
	+ priority = vgic_v3_get_priority(irq, hypctx);
	+
	+ mtx_lock_spin(&cpu_if->lr_mtx);
	+
	+ /*
	+ * If the guest is running behind timer interrupts, don't swamp it with
	+ * one interrupt after another. However, if the timer interrupt is being
	+ * serviced by the guest (it is in a state other than pending, either
	+ * active or pending and active), then add it to the buffer to be
	+ * injected later. Otherwise, the timer would stop working because we
	+ * disable the timer in the host interrupt handler.
	+ */
	+ if (irqtype == VGIC_IRQ_CLK) {
	+ for (i = 0; i < cpu_if->ich_lr_num; i++)
	+ if (ICH_LR_EL2_VINTID(cpu_if->ich_lr_el2[i]) == irq &&
	+ lr_pending(cpu_if->ich_lr_el2[i]))
	+ goto out;
	+ for (i = 0; i < cpu_if->irqbuf_num; i++)
	+ if (cpu_if->irqbuf[i].irq == irq)
	+ goto out;
	+ }
	+
	+ vip = vgic_v3_irqbuf_add_nolock(cpu_if);
	+ if (!vip) {
	+ eprintf("Error adding IRQ %u to the IRQ buffer.\n", irq);
	+ error = 1;
	+ goto out;
	+ }
	+ vip->irq = irq;
	+ vip->irqtype = irqtype;
	+ vip->enabled = enabled;
	+ vip->priority = priority;
	+
	+out:
	+ mtx_unlock_spin(&cpu_if->lr_mtx);
	+ mtx_unlock_spin(&dist->dist_mtx);
	+
	+ return (error);
	+}
	+
	+void
	+vgic_v3_group_toggle_enabled(bool enabled, struct hyp *hyp)
	+{
	+ struct hypctx *hypctx;
	+ struct vgic_v3_cpu_if *cpu_if;
	+ struct vgic_v3_irq *vip;
	+ int i, j;
	+
	+ for (i = 0; i < VM_MAXCPU; i++) {
	+ hypctx = &hyp->ctx[i];
	+ cpu_if = &hypctx->vgic_cpu_if;
	+
	+ mtx_lock_spin(&cpu_if->lr_mtx);
	+
	+ for (j = 0; j < cpu_if->irqbuf_num; j++) {
	+ vip = &cpu_if->irqbuf[j];
	+ if (!enabled)
	+ vip->enabled = 0;
	+ else if (vgic_v3_intid_enabled(vip->irq, hypctx))
	+ vip->enabled = 1;
	+ }
	+
	+ mtx_unlock_spin(&cpu_if->lr_mtx);
	+ }
	+}
	+
	+static int
	+vgic_v3_irq_toggle_enabled_vcpu(uint32_t irq, bool enabled,
	+ struct vgic_v3_cpu_if *cpu_if)
	+{
	+ int i;
	+
	+ mtx_lock_spin(&cpu_if->lr_mtx);
	+
	+ if (enabled) {
	+ /*
	+ * Enable IRQs that were injected when the interrupt ID was
	+ * disabled
	+ */
	+ for (i = 0; i < cpu_if->irqbuf_num; i++)
	+ if (cpu_if->irqbuf[i].irq == irq)
	+ cpu_if->irqbuf[i].enabled = true;
	+ } else {
	+ /* Remove the disabled IRQ from the LR regs if it is pending */
	+ for (i = 0; i < cpu_if->ich_lr_num; i++)
	+ if (lr_pending(cpu_if->ich_lr_el2[i]) &&
	+ ICH_LR_EL2_VINTID(cpu_if->ich_lr_el2[i]) == irq)
	+ lr_clear_irq(cpu_if->ich_lr_el2[i]);
	+
	+ /* Remove the IRQ from the interrupt buffer */
	+ vgic_v3_irqbuf_remove_nolock(irq, cpu_if);
	+ }
	+
	+ mtx_unlock_spin(&cpu_if->lr_mtx);
	+
	+ return (0);
	+}
	+
	+int
	+vgic_v3_irq_toggle_enabled(uint32_t irq, bool enabled,
	+ struct hyp *hyp, int vcpuid)
	+{
	+ struct vgic_v3_cpu_if *cpu_if;
	+ int error;
	+ int i;
	+
	+ if (irq <= GIC_LAST_PPI) {
	+ cpu_if = &hyp->ctx[vcpuid].vgic_cpu_if;
	+ return (vgic_v3_irq_toggle_enabled_vcpu(irq, enabled, cpu_if));
	+ } else {
	+ /* TODO: Update irqbuf for all VCPUs, not just VCPU 0 */
	+ for (i = 0; i < 1; i++) {
	+ cpu_if = &hyp->ctx[i].vgic_cpu_if;
	+ error = vgic_v3_irq_toggle_enabled_vcpu(irq, enabled, cpu_if);
	+ if (error)
	+ return (error);
	+ }
	+ }
	+
	+ return (0);
	+}
	+
	+static int
	+irqbuf_highest_priority(struct vgic_v3_cpu_if *cpu_if, int start, int end,
	+ struct hypctx *hypctx)
	+{
	+ uint32_t irq;
	+ int i, max_idx;
	+ uint8_t priority, max_priority;
	+ uint8_t vpmr;
	+
	+ vpmr = (cpu_if->ich_vmcr_el2 & ICH_VMCR_EL2_VPMR_MASK) >> \
	+ ICH_VMCR_EL2_VPMR_SHIFT;
	+
	+ max_idx = -1;
	+ max_priority = 0xff;
	+ for (i = start; i < end; i++) {
	+ irq = cpu_if->irqbuf[i].irq;
	+ /* Check that the interrupt hasn't been already scheduled */
	+ if (irq == IRQ_SCHEDULED)
	+ continue;
	+
	+ if (!dist_group_enabled(&hypctx->hyp->vgic_dist))
	+ continue;
	+ if (!vgic_v3_int_target(irq, hypctx))
	+ continue;
	+
	+ priority = cpu_if->irqbuf[i].priority;
	+ if (priority >= vpmr)
	+ continue;
	+
	+ if (max_idx == -1) {
	+ max_idx = i;
	+ max_priority = priority;
	+ } else if (priority > max_priority) {
	+ max_idx = i;
	+ max_priority = priority;
	+ } else if (priority == max_priority &&
	+ cpu_if->irqbuf[i].irqtype < cpu_if->irqbuf[max_idx].irqtype) {
	+ max_idx = i;
	+ max_priority = priority;
	+ }
	+ }
	+
	+ return (max_idx);
	+}
	+
	+static inline bool
	+cpu_if_group_enabled(struct vgic_v3_cpu_if *cpu_if)
	+{
	+ return ((cpu_if->ich_vmcr_el2 & ICH_VMCR_EL2_VENG1) != 0);
	+}
	+
	+static inline int
	+irqbuf_next_enabled(struct vgic_v3_irq *irqbuf, int start, int end,
	+ struct hypctx hypctx, struct vgic_v3_cpu_if cpu_if)
	+{
	+ int i;
	+
	+ if (!cpu_if_group_enabled(cpu_if))
	+ return (-1);
	+
	+ for (i = start; i < end; i++)
	+ if (irqbuf[i].enabled)
	+ break;
	+
	+ if (i < end)
	+ return (i);
	+ else
	+ return (-1);
	+}
	+
	+static inline int
	+vgic_v3_lr_next_empty(uint32_t ich_elrsr_el2, int start, int end)
	+{
	+ int i;
	+
	+ for (i = start; i < end; i++)
	+ if (ich_elrsr_el2 & (1U << i))
	+ break;
	+
	+ if (i < end)
	+ return (i);
	+ else
	+ return (-1);
	+}
	+
	+/*
	+ * There are two cases in which the virtual timer interrupt is in the list
	+ * registers:
	+ *
	+ * 1. The virtual interrupt is active. The guest is executing the interrupt
	+ * handler, and the timer fired after it programmed the new alarm time but
	+ * before the guest had the chance to write to the EOIR1 register.
	+ *
	+ * 2. The virtual interrupt is pending and active. The timer interrupt is level
	+ * sensitive. The guest wrote to the EOR1 register, but the write hasn't yet
	+ * propagated to the timer.
	+ *
	+ * Injecting the interrupt in these cases would mean that another timer
	+ * interrupt is asserted as soon as the guest writes to the EOIR1 register (or
	+ * very shortly thereafter, in the pending and active scenario). This can lead
	+ * to the guest servicing timer interrupts one after the other and doing
	+ * nothing else. So do not inject a timer interrupt while one is active pending.
	+ * The buffered timer interrupts will be injected after the next world switch in
	+ * this case.
	+ */
	+static bool
	+clk_irq_in_lr(struct vgic_v3_cpu_if *cpu_if)
	+{
	+ uint64_t lr;
	+ int i;
	+
	+ for (i = 0; i < cpu_if->ich_lr_num; i++) {
	+ lr = cpu_if->ich_lr_el2[i];
	+ if (ICH_LR_EL2_VINTID(lr) == GT_VIRT_IRQ &&
	+ (lr_active(lr) \|\| lr_pending_active(lr)))
	+ return (true);
	+ }
	+
	+ return (false);
	+}
	+
	+static void
	+vgic_v3_irqbuf_to_lr(struct hypctx hypctx, struct vgic_v3_cpu_if cpu_if,
	+ bool by_priority)
	+{
	+ struct vgic_v3_irq *vip;
	+ int irqbuf_idx;
	+ int lr_idx;
	+ bool clk_present;
	+
	+ clk_present = clk_irq_in_lr(cpu_if);
	+
	+ irqbuf_idx = 0;
	+ lr_idx = 0;
	+ for (;;) {
	+ if (by_priority)
	+ irqbuf_idx = irqbuf_highest_priority(cpu_if,
	+ irqbuf_idx, cpu_if->irqbuf_num, hypctx);
	+ else
	+ irqbuf_idx = irqbuf_next_enabled(cpu_if->irqbuf,
	+ irqbuf_idx, cpu_if->irqbuf_num, hypctx, cpu_if);
	+ if (irqbuf_idx == -1)
	+ break;
	+
	+ lr_idx = vgic_v3_lr_next_empty(cpu_if->ich_elrsr_el2,
	+ lr_idx, cpu_if->ich_lr_num);
	+ if (lr_idx == -1)
	+ break;
	+
	+ vip = &cpu_if->irqbuf[irqbuf_idx];
	+ if (vip->irqtype == VGIC_IRQ_CLK && clk_present) {
	+ /* Skip injecting timer interrupt. */
	+ irqbuf_idx++;
	+ continue;
	+ }
	+
	+ vip_to_lr(vip, cpu_if->ich_lr_el2[lr_idx]);
	+ vip->irq = IRQ_SCHEDULED;
	+ irqbuf_idx++;
	+ lr_idx++;
	+ }
	+
	+ /* Remove all interrupts that were just scheduled. */
	+ vgic_v3_irqbuf_remove_nolock(IRQ_SCHEDULED, cpu_if);
	+}
	+
	+void
	+vgic_v3_sync_hwstate(void *arg)
	+{
	+ struct hypctx *hypctx;
	+ struct vgic_v3_cpu_if *cpu_if;
	+ int lr_free;
	+ int i;
	+ bool by_priority;
	+ bool en_underflow_intr;
	+
	+ hypctx = arg;
	+ cpu_if = &hypctx->vgic_cpu_if;
	+
	+ /*
	+ * All Distributor writes have been executed at this point, do not
	+ * protect Distributor reads with a mutex.
	+ *
	+ * This is callled with all interrupts disabled, so there is no need for
	+ * a List Register spinlock either.
	+ */
	+ mtx_lock_spin(&cpu_if->lr_mtx);
	+
	+ /* Exit early if there are no buffered interrupts */
	+ if (cpu_if->irqbuf_num == 0) {
	+ cpu_if->ich_hcr_el2 &= ~ICH_HCR_EL2_UIE;
	+ goto out;
	+ }
	+
	+ /* Test if all buffered interrupts can fit in the LR regs */
	+ lr_free = 0;
	+ for (i = 0; i < cpu_if->ich_lr_num; i++)
	+ if (cpu_if->ich_elrsr_el2 & (1U << i))
	+ lr_free++;
	+
	+ by_priority = (lr_free <= cpu_if->ich_lr_num);
	+ vgic_v3_irqbuf_to_lr(hypctx, cpu_if, by_priority);
	+
	+ lr_free = 0;
	+ for (i = 0; i < cpu_if->ich_lr_num; i++)
	+ if (cpu_if->ich_elrsr_el2 & (1U << i))
	+ lr_free++;
	+
	+ en_underflow_intr = false;
	+ if (cpu_if->irqbuf_num > 0)
	+ for (i = 0; i < cpu_if->irqbuf_num; i++)
	+ if (cpu_if->irqbuf[i].irqtype != VGIC_IRQ_CLK) {
	+ en_underflow_intr = true;
	+ break;
	+ }
	+ if (en_underflow_intr) {
	+ cpu_if->ich_hcr_el2 \|= ICH_HCR_EL2_UIE;
	+ } else {
	+ cpu_if->ich_hcr_el2 &= ~ICH_HCR_EL2_UIE;
	+ }
	+
	+out:
	+ mtx_unlock_spin(&cpu_if->lr_mtx);
	+}
	+
	+static void
	+vgic_v3_get_ro_regs()
	+{
	+ /* GICD_ICFGR0 configures SGIs and it is read-only. */
	+ ro_regs.gicd_icfgr0 = gic_d_read(gic_sc, 4, GICD_ICFGR(0));
	+
	+ /*
	+ * Configure the GIC type register for the guest.
	+ *
	+ * ~GICD_TYPER_SECURITYEXTN: disable security extensions.
	+ * ~GICD_TYPER_DVIS: direct injection for virtual LPIs not supported.
	+ * ~GICD_TYPER_LPIS: LPIs not supported.
	+ */
	+ ro_regs.gicd_typer = gic_d_read(gic_sc, 4, GICD_TYPER);
	+ ro_regs.gicd_typer &= ~GICD_TYPER_SECURITYEXTN;
	+ ro_regs.gicd_typer &= ~GICD_TYPER_DVIS;
	+ ro_regs.gicd_typer &= ~GICD_TYPER_LPIS;
	+
	+ /*
	+ * XXX. Guest reads of GICD_PIDR2 should return the same ArchRev as
	+ * specified in the guest FDT.
	+ */
	+ ro_regs.gicd_pidr2 = gic_d_read(gic_sc, 4, GICD_PIDR2);
	+}
	+
	+void
	+vgic_v3_init(uint64_t ich_vtr_el2) {
	+ uint32_t pribits, prebits;
	+
	+ KASSERT(gic_sc != NULL, ("GIC softc is NULL"));
	+
	+ vgic_v3_get_ro_regs();
	+
	+ pribits = ICH_VTR_EL2_PRIBITS(ich_vtr_el2);
	+ switch (pribits) {
	+ case 5:
	+ virt_features.min_prio = 0xf8;
	+ case 6:
	+ virt_features.min_prio = 0xfc;
	+ case 7:
	+ virt_features.min_prio = 0xfe;
	+ case 8:
	+ virt_features.min_prio = 0xff;
	+ }
	+
	+ prebits = ICH_VTR_EL2_PREBITS(ich_vtr_el2);
	+ switch (prebits) {
	+ case 5:
	+ virt_features.ich_ap0r_num = 1;
	+ virt_features.ich_ap1r_num = 1;
	+ case 6:
	+ virt_features.ich_ap0r_num = 2;
	+ virt_features.ich_ap1r_num = 2;
	+ case 7:
	+ virt_features.ich_ap0r_num = 4;
	+ virt_features.ich_ap1r_num = 4;
	+ }
	+
	+ virt_features.ich_lr_num = ICH_VTR_EL2_LISTREGS(ich_vtr_el2);
	+}
	+
	+static int
	+vgic_v3_maint_intr(void *arg)
	+{
	+ printf("MAINTENANCE INTERRUPT\n");
	+
	+ return (FILTER_HANDLED);
	+}
	+
	+/*
	+ * TODO: Look at how gic_v3_fdt.c adds the gic driver.
	+ *
	+ * 1. In probe they set the device description.
	+ * 2. In attach they create children devices for the GIC (in
	+ * gic_v3_ofw_bus_attach).
	+ * 3. There is no identify function being called.
	+ *
	+ * On the other hand, in man 9 DEVICE_IDENTIFY it is stated that a new device
	+ * instance is created by the identify function.
	+ */
	+
	+static void
	+arm_vgic_identify(driver_t *driver, device_t parent)
	+{
	+ device_t dev;
	+
	+ if (strcmp(device_get_name(parent), "gic") == 0) {
	+ dev = device_find_child(parent, VGIC_V3_DEVNAME, -1);
	+ if (!dev)
	+ dev = device_add_child(parent, VGIC_V3_DEVNAME, -1);
	+ gic_sc = device_get_softc(parent);
	+ }
	+}
	+
	+static int
	+arm_vgic_probe(device_t dev)
	+{
	+ device_t parent;
	+
	+ parent = device_get_parent(dev);
	+ if (strcmp(device_get_name(parent), "gic") == 0) {
	+ device_set_desc(dev, VGIC_V3_DEVSTR);
	+ return (BUS_PROBE_DEFAULT);
	+ }
	+
	+ return (ENXIO);
	+}
	+
	+static int
	+arm_vgic_attach(device_t dev)
	+{
	+ int error;
	+
	+ error = gic_v3_setup_maint_intr(vgic_v3_maint_intr, NULL, NULL);
	+ if (error)
	+ device_printf(dev, "Could not setup maintenance interrupt\n");
	+
	+ return (0);
	+}
	+
	+static int
	+arm_vgic_detach(device_t dev)
	+{
	+ int error;
	+
	+ error = gic_v3_teardown_maint_intr();
	+ if (error)
	+ device_printf(dev, "Could not teardown maintenance interrupt\n");
	+
	+ gic_sc = NULL;
	+
	+ return (0);
	+}
	+
	+static device_method_t arm_vgic_methods[] = {
	+ DEVMETHOD(device_identify, arm_vgic_identify),
	+ DEVMETHOD(device_probe, arm_vgic_probe),
	+ DEVMETHOD(device_attach, arm_vgic_attach),
	+ DEVMETHOD(device_detach, arm_vgic_detach),
	+ DEVMETHOD_END
	+};
	+
	+DEFINE_CLASS_1(vgic, arm_vgic_driver, arm_vgic_methods, 0, gic_v3_driver);
	+
	+static devclass_t arm_vgic_devclass;
	+DRIVER_MODULE(vgic, gic, arm_vgic_driver, arm_vgic_devclass, 0, 0);
	Index: sys/arm64/vmm/io/vgic_v3_mmio.c
	===================================================================
	--- /dev/null
	+++ sys/arm64/vmm/io/vgic_v3_mmio.c
	@@ -0,0 +1,1025 @@
	+#include <sys/malloc.h>
	+#include <sys/mutex.h>
	+
	+#include <machine/vmm.h>
	+#include <machine/vmm_instruction_emul.h>
	+#include <arm64/vmm/arm64.h>
	+
	+#include "vgic_v3.h"
	+
	+#define DEBUG 0
	+
	+#define GICR_FRAME_RD 0
	+#define GICR_FRAME_SGI GICR_RD_BASE_SIZE
	+
	+#define RES0 (0UL)
	+#define RES1 (~0UL)
	+
	+#define redist_simple_read(src, destp, vm, vcpuid) \
	+do { \
	+ struct hyp *hyp = vm_get_cookie(vm); \
	+ struct vgic_v3_redist *redist = &hyp->ctx[vcpuid].vgic_redist; \
	+ *destp = redist->src; \
	+} while (0);
	+
	+#define redist_simple_write(src, dest, vm, vcpuid) \
	+do { \
	+ struct hyp *hyp = vm_get_cookie(vm); \
	+ struct vgic_v3_redist *redist = &hyp->ctx[vcpuid].vgic_redist; \
	+ redist->dest = src; \
	+} while (0);
	+
	+#define reg32_idx(ipa, region) (((ipa) - (region).start) / 4)
	+#define reg64_idx(ipa, region) (((ipa) - (region).start) / 8)
	+
	+#define reg_changed(new, old, mask) (((new) & (mask)) != ((old) & (mask)))
	+
	+/* The names should always be in ascending order of memory address */
	+enum vgic_mmio_region_name {
	+ /* Distributor registers */
	+ VGIC_GICD_CTLR,
	+ VGIC_GICD_TYPER,
	+ VGIC_GICD_IGROUPR,
	+ VGIC_GICD_ISENABLER,
	+ VGIC_GICD_ICENABLER,
	+ VGIC_GICD_IPRIORITYR,
	+ VGIC_GICD_ICFGR,
	+ VGIC_GICD_IROUTER,
	+ VGIC_GICD_PIDR2,
	+ /* Redistributor registers */
	+ VGIC_GICR_CTLR,
	+ VGIC_GICR_TYPER,
	+ VGIC_GICR_WAKER,
	+ VGIC_GICR_PIDR2,
	+ VGIC_GICR_IGROUPR0,
	+ VGIC_GICR_ISENABLER0,
	+ VGIC_GICR_ICENABLER0,
	+ VGIC_GICR_IPRIORITYR,
	+ VGIC_GICR_ICFGR0,
	+ VGIC_GICR_ICFGR1,
	+ VGIC_MMIO_REGIONS_NUM,
	+};
	+/*
	+ * Necessary for calculating the number of Distributor and Redistributor
	+ * regions emulated.
	+ */
	+#define FIRST_REDIST_MMIO_REGION VGIC_GICR_CTLR
	+
	+MALLOC_DEFINE(M_VGIC_V3_MMIO, "ARM VMM VGIC DIST MMIO", "ARM VMM VGIC DIST MMIO");
	+
	+static int
	+dist_ctlr_read(void vm, int vcpuid, uint64_t fault_ipa, uint64_t rval,
	+ int size, void *arg)
	+{
	+ struct hyp *hyp;
	+ struct vgic_v3_dist *dist;
	+ bool *retu = arg;
	+
	+ hyp = vm_get_cookie(vm);
	+ dist = &hyp->vgic_dist;
	+
	+ mtx_lock_spin(&dist->dist_mtx);
	+ *rval = dist->gicd_ctlr;
	+ mtx_unlock_spin(&dist->dist_mtx);
	+
	+ /* Writes are never pending */
	+ *rval &= ~GICD_CTLR_RWP;
	+
	+ *retu = false;
	+ return (0);
	+}
	+
	+static int
	+dist_ctlr_write(void *vm, int vcpuid, uint64_t fault_ipa, uint64_t wval,
	+ int size, void *arg)
	+{
	+ struct hyp *hyp;
	+ struct vgic_v3_dist *dist;
	+ bool *retu = arg;
	+
	+ hyp = vm_get_cookie(vm);
	+ dist = &hyp->vgic_dist;
	+ /* GICD_CTLR.DS is RAO/WI when only one security state is supported. */
	+ wval \|= GICD_CTLR_DS;
	+
	+ mtx_lock_spin(&dist->dist_mtx);
	+
	+ if (reg_changed(wval, dist->gicd_ctlr, GICD_CTLR_G1A)) {
	+ if (!(wval & GICD_CTLR_G1A))
	+ vgic_v3_group_toggle_enabled(false, hyp);
	+ else
	+ vgic_v3_group_toggle_enabled(true, hyp);
	+ }
	+ dist->gicd_ctlr = wval;
	+
	+ mtx_unlock_spin(&dist->dist_mtx);
	+
	+ *retu = false;
	+ return (0);
	+}
	+
	+static int
	+dist_typer_read(void vm, int vcpuid, uint64_t fault_ipa, uint64_t rval,
	+ int size, void *arg)
	+{
	+ struct hyp *hyp;
	+ struct vgic_v3_dist *dist;
	+ bool *retu = arg;
	+
	+ hyp = vm_get_cookie(vm);
	+ dist = &hyp->vgic_dist;
	+
	+ *rval = dist->gicd_typer;
	+
	+ *retu = false;
	+ return (0);
	+}
	+
	+static int
	+dist_typer_write(void *vm, int vcpuid, uint64_t fault_ipa, uint64_t wval,
	+ int size, void *arg)
	+{
	+ bool *retu = arg;
	+
	+ eprintf("Warning: Attempted write to read-only register GICD_TYPER.\n");
	+
	+ *retu = false;
	+ return (0);
	+}
	+
	+/* Only group 1 interrupts are supported. Treat IGROUPR as RA0/WI. */
	+static int
	+dist_igroupr_read(void vm, int vcpuid, uint64_t fault_ipa, uint64_t rval,
	+ int size, void *arg)
	+{
	+ struct hyp *hyp;
	+ struct vgic_v3_dist *dist;
	+ int n;
	+ bool *retu = arg;
	+
	+ hyp = vm_get_cookie(vm);
	+ dist = &hyp->vgic_dist;
	+
	+ n = reg32_idx(fault_ipa, hyp->vgic_mmio_regions[VGIC_GICD_IGROUPR]);
	+ /*
	+ * GIC Architecture specification, p 8-477: "For SGIs and PPIs: When
	+ * ARE is 1 for the Security state of an interrupt, the field for that
	+ * interrupt is RES0 and an implementation is permitted to make the
	+ * field RAZ/WI in this case".
	+ */
	+ if (n == 0 && aff_routing_en(dist)) {
	+ *rval = RES0;
	+ } else {
	+ *rval = RES1;
	+ }
	+
	+ *retu = false;
	+ return (0);
	+}
	+
	+/* Only group 1 interrupts are supported. Treat IGROUPR as RA0/WI. */
	+static int
	+dist_igroupr_write(void *vm, int vcpuid, uint64_t fault_ipa, uint64_t wval,
	+ int size, void *arg)
	+{
	+ bool *retu = arg;
	+
	+ *retu = false;
	+ return (0);
	+}
	+
	+static void
	+mmio_update_int_enabled(uint32_t new_ixenabler, uint32_t old_ixenabler,
	+ uint32_t irq, struct hyp *hyp, int vcpuid)
	+{
	+ uint32_t irq_mask;
	+ int error;
	+ int i;
	+ bool enabled;
	+
	+ irq_mask = 0x1;
	+ for (i = 0; i < 32; i++) {
	+ if (reg_changed(new_ixenabler, old_ixenabler, irq_mask)) {
	+ enabled = ((new_ixenabler & irq_mask) != 0);
	+ error = vgic_v3_irq_toggle_enabled(irq, enabled,
	+ hyp, vcpuid);
	+ if (error)
	+ eprintf("Warning: error while toggling IRQ %u\n", irq);
	+ }
	+ irq++;
	+ irq_mask <<= 1;
	+ }
	+}
	+
	+static int
	+dist_ixenabler_read(void vm, int vcpuid, uint64_t fault_ipa, uint64_t rval,
	+ void *arg, enum vgic_mmio_region_name name)
	+{
	+ struct hyp *hyp;
	+ struct vgic_v3_dist *dist;
	+ size_t n;
	+ bool *retu = arg;
	+
	+ hyp = vm_get_cookie(vm);
	+ dist = &hyp->vgic_dist;
	+
	+ n = reg32_idx(fault_ipa, hyp->vgic_mmio_regions[name]);
	+ /*
	+ * GIC Architecture specification, p 8-471: "When ARE is 1 for the
	+ * Security state of an interrupt, the field for that interrupt is RES0
	+ * and an implementation is permitted to* make the field RAZ/WI in this
	+ * case".
	+ */
	+ if (n == 0 && aff_routing_en(dist)) {
	+ *rval = RES0;
	+ goto out;
	+ }
	+
	+ mtx_lock_spin(&dist->dist_mtx);
	+ *rval = dist->gicd_ixenabler[n];
	+ mtx_unlock_spin(&dist->dist_mtx);
	+
	+out:
	+ *retu = false;
	+ return (0);
	+}
	+
	+static int
	+dist_ixenabler_write(void *vm, int vcpuid, uint64_t fault_ipa, uint64_t wval,
	+ void *arg, enum vgic_mmio_region_name name)
	+{
	+ struct hyp *hyp;
	+ struct vgic_v3_dist *dist;
	+ uint32_t old_ixenabler;
	+ size_t n;
	+ bool *retu = arg;
	+
	+ hyp = vm_get_cookie(vm);
	+ dist = &hyp->vgic_dist;
	+
	+ n = reg32_idx(fault_ipa, hyp->vgic_mmio_regions[name]);
	+ /* See dist_ixenabler_read() */
	+ if (n == 0 && aff_routing_en(dist))
	+ /* Ignore writes */
	+ goto out;
	+
	+ mtx_lock_spin(&dist->dist_mtx);
	+
	+ old_ixenabler = dist->gicd_ixenabler[n];
	+ if (name == VGIC_GICD_ICENABLER)
	+ dist->gicd_ixenabler[n] &= ~wval;
	+ else
	+ dist->gicd_ixenabler[n] \|= wval;
	+ mmio_update_int_enabled(dist->gicd_ixenabler[n], old_ixenabler, n * 32,
	+ hyp, vcpuid);
	+
	+ mtx_unlock_spin(&dist->dist_mtx);
	+
	+out:
	+ *retu = false;
	+ return (0);
	+}
	+
	+static int
	+dist_isenabler_read(void vm, int vcpuid, uint64_t fault_ipa, uint64_t rval,
	+ int size, void *arg)
	+{
	+ return (dist_ixenabler_read(vm, vcpuid, fault_ipa, rval, arg,
	+ VGIC_GICD_ISENABLER));
	+}
	+
	+static int
	+dist_isenabler_write(void *vm, int vcpuid, uint64_t fault_ipa, uint64_t wval,
	+ int size, void *arg)
	+{
	+ return (dist_ixenabler_write(vm, vcpuid, fault_ipa, wval, arg,
	+ VGIC_GICD_ISENABLER));
	+}
	+
	+static int
	+dist_icenabler_read(void vm, int vcpuid, uint64_t fault_ipa, uint64_t rval,
	+ int size, void *arg)
	+{
	+ return (dist_ixenabler_read(vm, vcpuid, fault_ipa, rval, arg,
	+ VGIC_GICD_ICENABLER));
	+}
	+
	+static int
	+dist_icenabler_write(void *vm, int vcpuid, uint64_t fault_ipa, uint64_t wval,
	+ int size, void *arg)
	+{
	+ return (dist_ixenabler_write(vm, vcpuid, fault_ipa, wval, arg,
	+ VGIC_GICD_ICENABLER));
	+}
	+
	+/* XXX: Registers are byte accessible. */
	+static int
	+dist_ipriorityr_read(void vm, int vcpuid, uint64_t fault_ipa, uint64_t rval,
	+ int size, void *arg)
	+{
	+ struct hyp *hyp;
	+ struct vgic_v3_dist *dist;
	+ bool *retu = arg;
	+ size_t n;
	+
	+ hyp = vm_get_cookie(vm);
	+ dist = &hyp->vgic_dist;
	+
	+ n = reg32_idx(fault_ipa, hyp->vgic_mmio_regions[VGIC_GICD_IPRIORITYR]);
	+ /*
	+ * GIC Architecture specification, p 8-483: when affinity
	+ * routing is enabled, GICD_IPRIORITYR<n> is RAZ/WI for
	+ * n = 0 to 7.
	+ */
	+ if (aff_routing_en(dist) && n <= 7) {
	+ *rval = RES0;
	+ goto out;
	+ }
	+
	+ mtx_lock_spin(&dist->dist_mtx);
	+ *rval = dist->gicd_ipriorityr[n];
	+ mtx_unlock_spin(&dist->dist_mtx);
	+
	+out:
	+ *retu = false;
	+ return (0);
	+
	+}
	+
	+static int
	+dist_ipriorityr_write(void *vm, int vcpuid, uint64_t fault_ipa, uint64_t wval,
	+ int size, void *arg)
	+{
	+ struct hyp *hyp;
	+ struct vgic_v3_dist *dist;
	+ bool *retu = arg;
	+ size_t n;
	+
	+ hyp = vm_get_cookie(vm);
	+ dist = &hyp->vgic_dist;
	+
	+ n = reg32_idx(fault_ipa, hyp->vgic_mmio_regions[VGIC_GICD_IPRIORITYR]);
	+ /* See dist_ipriorityr_read() */
	+ if (aff_routing_en(dist) && n <= 7)
	+ /* Ignore writes */
	+ goto out;
	+
	+ mtx_lock_spin(&dist->dist_mtx);
	+ dist->gicd_ipriorityr[n] = wval;
	+ mtx_unlock_spin(&dist->dist_mtx);
	+
	+out:
	+ *retu = false;
	+ return (0);
	+}
	+
	+static int
	+dist_icfgr_read(void vm, int vcpuid, uint64_t fault_ipa, uint64_t rval,
	+ int size, void *arg)
	+{
	+ struct hyp *hyp;
	+ struct vgic_v3_dist *dist;
	+ bool *retu = arg;
	+ size_t n;
	+
	+ hyp = vm_get_cookie(vm);
	+ dist = &hyp->vgic_dist;
	+
	+ n = reg32_idx(fault_ipa, hyp->vgic_mmio_regions[VGIC_GICD_ICFGR]);
	+ /*
	+ * ARM GIC Architecture Specification, p 8-472: "For SGIs,
	+ * Int_config fields are RO, meaning that GICD_ICFGR0 is RO."
	+ */
	+ if (n == 0) {
	+ *rval = RES0;
	+ goto out;
	+ }
	+
	+ mtx_lock_spin(&dist->dist_mtx);
	+ *rval = dist->gicd_icfgr[n];
	+ mtx_unlock_spin(&dist->dist_mtx);
	+
	+out:
	+ *retu = false;
	+ return (0);
	+
	+}
	+
	+static int
	+dist_icfgr_write(void *vm, int vcpuid, uint64_t fault_ipa, uint64_t wval,
	+ int size, void *arg)
	+{
	+ struct hyp *hyp;
	+ struct vgic_v3_dist *dist;
	+ bool *retu = arg;
	+ size_t n;
	+
	+ hyp = vm_get_cookie(vm);
	+ dist = &hyp->vgic_dist;
	+
	+ n = reg32_idx(fault_ipa, hyp->vgic_mmio_regions[VGIC_GICD_ICFGR]);
	+ if (n == 0)
	+ /* Ignore writes */
	+ goto out;
	+
	+ mtx_lock_spin(&dist->dist_mtx);
	+ dist->gicd_icfgr[n] = wval;
	+ mtx_unlock_spin(&dist->dist_mtx);
	+
	+out:
	+ *retu = false;
	+ return (0);
	+}
	+
	+static int
	+dist_irouter_read(void vm, int vcpuid, uint64_t fault_ipa, uint64_t rval,
	+ int size, void *arg)
	+{
	+ struct hyp *hyp;
	+ struct vgic_v3_dist *dist;
	+ size_t n;
	+ bool *retu = arg;
	+
	+ hyp = vm_get_cookie(vm);
	+ dist = &hyp->vgic_dist;
	+
	+ n = reg64_idx(fault_ipa, hyp->vgic_mmio_regions[VGIC_GICD_IROUTER]);
	+ /* GIC Architecture Manual, p 8-485: registers 0 to 31 are reserved */
	+ if (n <= 31) {
	+ eprintf("Warning: Read from register GICD_IROUTER%zu\n", n);
	+ *rval = RES0;
	+ goto out;
	+ }
	+
	+ /*
	+ * GIC Architecture Manual, p 8-485: when affinity routing is not
	+ * enabled, the registers are RAZ/WI.
	+ */
	+ if (!aff_routing_en(dist)) {
	+ *rval = RES0;
	+ goto out;
	+ }
	+
	+ mtx_lock_spin(&dist->dist_mtx);
	+ *rval = dist->gicd_irouter[n];
	+ mtx_unlock_spin(&dist->dist_mtx);
	+
	+out:
	+ *retu = false;
	+ return (0);
	+}
	+
	+static int
	+dist_irouter_write(void *vm, int vcpuid, uint64_t fault_ipa, uint64_t wval,
	+ int size, void *arg)
	+{
	+ struct hyp *hyp;
	+ struct vgic_v3_dist *dist;
	+ size_t n;
	+ bool *retu = arg;
	+
	+ hyp = vm_get_cookie(vm);
	+ dist = &hyp->vgic_dist;
	+
	+ n = reg64_idx(fault_ipa, hyp->vgic_mmio_regions[VGIC_GICD_IROUTER]);
	+ if (n <= 31) {
	+ eprintf("Warning: Write to register GICD_IROUTER%zu\n", n);
	+ goto out;
	+ }
	+
	+ /* See dist_irouter_read() */
	+ if (!aff_routing_en(dist))
	+ /* Ignore writes */
	+ goto out;
	+
	+ mtx_lock_spin(&dist->dist_mtx);
	+ dist->gicd_irouter[n] = wval;
	+ mtx_unlock_spin(&dist->dist_mtx);
	+
	+out:
	+ *retu = false;
	+ return (0);
	+}
	+
	+static int
	+dist_pidr2_read(void vm, int vcpuid, uint64_t fault_ipa, uint64_t rval,
	+ int size, void *arg)
	+{
	+ struct hyp *hyp;
	+ struct vgic_v3_dist *dist;
	+ bool *retu = arg;
	+
	+ hyp = vm_get_cookie(vm);
	+ dist = &hyp->vgic_dist;
	+
	+ *rval = dist->gicd_pidr2;
	+
	+ *retu = false;
	+ return (0);
	+}
	+
	+static int
	+dist_pidr2_write(void *vm, int vcpuid, uint64_t fault_ipa, uint64_t wval,
	+ int size, void *arg)
	+{
	+ bool *retu = arg;
	+
	+ eprintf("Warning: Attempted write to read-only register GICD_PIDR2.\n");
	+
	+ *retu = false;
	+ return (0);
	+}
	+
	+static int
	+redist_ctlr_read(void vm, int vcpuid, uint64_t fault_ipa, uint64_t rval,
	+ int size, void *arg)
	+{
	+ bool *retu = arg;
	+
	+ redist_simple_read(gicr_ctlr, rval, vm, vcpuid);
	+ /* Writes are never pending */
	+ *rval &= ~GICR_CTLR_RWP & ~GICR_CTLR_UWP;
	+
	+#if (DEBUG > 0)
	+ eprintf("\n");
	+#endif
	+
	+ *retu = false;
	+ return (0);
	+}
	+
	+static int
	+redist_ctlr_write(void *vm, int vcpuid, uint64_t fault_ipa, uint64_t wval,
	+ int size, void *arg)
	+{
	+ bool *retu = arg;
	+
	+ redist_simple_write(wval, gicr_ctlr, vm, vcpuid);
	+
	+#if (DEBUG > 0)
	+ eprintf("\n");
	+#endif
	+
	+ *retu = false;
	+ return (0);
	+}
	+
	+static int
	+redist_typer_read(void vm, int vcpuid, uint64_t fault_ipa, uint64_t rval,
	+ int size, void *arg)
	+{
	+ bool *retu = arg;
	+
	+ redist_simple_read(gicr_typer, rval, vm, vcpuid);
	+
	+#if (DEBUG > 0)
	+ eprintf("\n");
	+#endif
	+
	+ *retu = false;
	+ return (0);
	+}
	+
	+static int
	+redist_typer_write(void *vm, int vcpuid, uint64_t fault_ipa, uint64_t wval,
	+ int size, void *arg)
	+{
	+ bool *retu = arg;
	+
	+ eprintf("Warning: Attempted write to read-only register GICR_TYPER.\n");
	+
	+ *retu = false;
	+ return (0);
	+}
	+
	+static int
	+redist_waker_read(void vm, int vcpuid, uint64_t fault_ipa, uint64_t rval,
	+ int size, void *arg)
	+{
	+ bool *retu = arg;
	+
	+ /* Redistributor is always awake */
	+ *rval = 0 & ~GICR_WAKER_PS & ~GICR_WAKER_CA;
	+
	+#if (DEBUG > 0)
	+ eprintf("\n");
	+#endif
	+
	+ *retu = false;
	+ return (0);
	+}
	+
	+static int
	+redist_waker_write(void *vm, int vcpuid, uint64_t fault_ipa, uint64_t wval,
	+ int size, void *arg)
	+{
	+ bool *retu = arg;
	+
	+ /* Ignore writes */
	+#if (DEBUG > 0)
	+ eprintf("\n");
	+#endif
	+
	+ *retu = false;
	+ return (0);
	+}
	+
	+/* Only group 1 interrupts are supported. Treat IGROUPR0 as RA0/WI. */
	+static int
	+redist_igroupr0_read(void vm, int vcpuid, uint64_t fault_ipa, uint64_t rval,
	+ int size, void *arg)
	+{
	+ bool *retu = arg;
	+
	+ *rval = RES1;
	+ *retu = false;
	+ return (0);
	+}
	+
	+/* Only group 1 interrupts are supported. Treat IGROUPR0 as RA0/WI. */
	+static int
	+redist_igroupr0_write(void *vm, int vcpuid, uint64_t fault_ipa, uint64_t wval,
	+ int size, void *arg)
	+{
	+ bool *retu = arg;
	+
	+ if (wval == 0UL)
	+ printf("Warning: Interrupts marked as group 0, ignoring\n");
	+
	+ *retu = false;
	+ return (0);
	+}
	+
	+static int
	+redist_ixenabler_read(void vm, int vcpuid, uint64_t rval, void *arg,
	+ enum vgic_mmio_region_name reg)
	+{
	+ struct hyp *hyp;
	+ struct vgic_v3_redist *redist;
	+ bool *retu = arg;
	+
	+ hyp = vm_get_cookie(vm);
	+ redist = &hyp->ctx[vcpuid].vgic_redist;
	+
	+ *rval = redist->gicr_ixenabler0;
	+
	+ *retu = false;
	+ return (0);
	+}
	+
	+static int
	+redist_ixenabler_write(void vm, int vcpuid, uint64_t wval, void arg,
	+ enum vgic_mmio_region_name reg)
	+{
	+ struct hyp *hyp;
	+ struct vgic_v3_redist *redist;
	+ uint32_t old_ixenabler0, new_ixenabler0;
	+ bool *retu = arg;
	+
	+ hyp = vm_get_cookie(vm);
	+ redist = &hyp->ctx[vcpuid].vgic_redist;
	+
	+ old_ixenabler0 = redist->gicr_ixenabler0;
	+ if (reg == VGIC_GICR_ICENABLER0)
	+ new_ixenabler0 = old_ixenabler0 & ~wval;
	+ else
	+ new_ixenabler0 = old_ixenabler0 \| wval;
	+ mmio_update_int_enabled(new_ixenabler0, old_ixenabler0, 0, hyp, vcpuid);
	+ redist->gicr_ixenabler0 = new_ixenabler0;
	+
	+ *retu = false;
	+ return (0);
	+}
	+
	+
	+static int
	+redist_isenabler0_read(void vm, int vcpuid, uint64_t fault_ipa, uint64_t rval,
	+ int size, void *arg)
	+{
	+#if (DEBUG > 0)
	+ eprintf("\n");
	+#endif
	+ return (redist_ixenabler_read(vm, vcpuid, rval, arg,
	+ VGIC_GICR_ISENABLER0));
	+}
	+
	+static int
	+redist_isenabler0_write(void *vm, int vcpuid, uint64_t fault_ipa, uint64_t wval,
	+ int size, void *arg)
	+{
	+#if (DEBUG > 0)
	+ eprintf("\n");
	+#endif
	+ return (redist_ixenabler_write(vm, vcpuid, wval, arg,
	+ VGIC_GICR_ISENABLER0));
	+}
	+
	+static int
	+redist_icenabler0_read(void vm, int vcpuid, uint64_t fault_ipa, uint64_t rval,
	+ int size, void *arg)
	+{
	+#if (DEBUG > 0)
	+ eprintf("\n");
	+#endif
	+ return (redist_ixenabler_read(vm, vcpuid, rval, arg,
	+ VGIC_GICR_ICENABLER0));
	+}
	+
	+static int
	+redist_icenabler0_write(void *vm, int vcpuid, uint64_t fault_ipa, uint64_t wval,
	+ int size, void *arg)
	+{
	+#if (DEBUG > 0)
	+ eprintf("\n");
	+#endif
	+ return (redist_ixenabler_write(vm, vcpuid, wval, arg,
	+ VGIC_GICR_ICENABLER0));
	+}
	+
	+static int
	+redist_ipriorityr_read(void vm, int vcpuid, uint64_t fault_ipa, uint64_t rval,
	+ int size, void *arg)
	+{
	+ struct hyp *hyp;
	+ struct vgic_v3_redist *redist;
	+ size_t n;
	+ bool *retu = arg;
	+
	+#if (DEBUG > 0)
	+ eprintf("\n");
	+#endif
	+
	+ hyp = vm_get_cookie(vm);
	+ redist = &hyp->ctx[vcpuid].vgic_redist;
	+
	+ n = reg32_idx(fault_ipa, hyp->vgic_mmio_regions[VGIC_GICR_IPRIORITYR]);
	+ *rval = redist->gicr_ipriorityr[n];
	+
	+ *retu = false;
	+ return (0);
	+}
	+
	+static int
	+redist_ipriorityr_write(void *vm, int vcpuid, uint64_t fault_ipa, uint64_t wval,
	+ int size, void *arg)
	+{
	+ struct hyp *hyp;
	+ struct vgic_v3_redist *redist;
	+ size_t n;
	+ bool *retu = arg;
	+
	+#if (DEBUG > 0)
	+ eprintf("\n");
	+#endif
	+
	+ hyp = vm_get_cookie(vm);
	+ redist = &hyp->ctx[vcpuid].vgic_redist;
	+
	+ n = reg32_idx(fault_ipa, hyp->vgic_mmio_regions[VGIC_GICR_IPRIORITYR]);
	+ redist->gicr_ipriorityr[n] = wval;
	+
	+ *retu = false;
	+ return (0);
	+}
	+
	+static int
	+redist_pidr2_read(void vm, int vcpuid, uint64_t fault_ipa, uint64_t rval,
	+ int size, void *arg)
	+{
	+ struct hyp *hyp;
	+ struct vgic_v3_dist *dist;
	+ bool *retu = arg;
	+
	+ hyp = vm_get_cookie(vm);
	+ dist = &hyp->vgic_dist;
	+
	+ /* GICR_PIDR2 has the same value as GICD_PIDR2 */
	+ *rval = dist->gicd_pidr2;
	+#if (DEBUG > 0)
	+ eprintf("\n");
	+#endif
	+
	+ *retu = false;
	+ return (0);
	+}
	+
	+static int
	+redist_pidr2_write(void *vm, int vcpuid, uint64_t fault_ipa, uint64_t wval,
	+ int size, void *arg)
	+{
	+ bool *retu = arg;
	+
	+ eprintf("Warning: Attempted write to read-only register GICR_PIDR2.\n");
	+
	+ *retu = false;
	+ return (0);
	+}
	+
	+static int
	+redist_icfgr0_read(void vm, int vcpuid, uint64_t fault_ipa, uint64_t rval,
	+ int size, void *arg)
	+{
	+ bool *retu = arg;
	+
	+ redist_simple_read(gicr_icfgr0, rval, vm, vcpuid);
	+
	+ *retu = false;
	+ return (0);
	+}
	+
	+static int
	+redist_icfgr0_write(void *vm, int vcpuid, uint64_t fault_ipa, uint64_t wval,
	+ int size, void *arg)
	+{
	+ bool *retu = arg;
	+
	+ redist_simple_write(wval, gicr_icfgr0, vm, vcpuid);
	+
	+ *retu = false;
	+ return (0);
	+}
	+
	+static int
	+redist_icfgr1_read(void vm, int vcpuid, uint64_t fault_ipa, uint64_t rval,
	+ int size, void *arg)
	+{
	+ bool *retu = arg;
	+
	+ redist_simple_read(gicr_icfgr0, rval, vm, vcpuid);
	+
	+ *retu = false;
	+ return (0);
	+}
	+
	+static int
	+redist_icfgr1_write(void *vm, int vcpuid, uint64_t fault_ipa, uint64_t wval,
	+ int size, void *arg)
	+{
	+ bool *retu = arg;
	+
	+ redist_simple_write(wval, gicr_icfgr0, vm, vcpuid);
	+
	+ *retu = false;
	+ return (0);
	+}
	+
	+#define alloc_registers(regs, num, size) \
	+do { \
	+ size = n * sizeof(*regs); \
	+ regs = malloc(size, M_VGIC_V3_MMIO, M_WAITOK \| M_ZERO); \
	+} while (0)
	+
	+#define div_round_up(n, div) (((n) + (div) - 1) / (div))
	+
	+static inline void
	+init_mmio_region(struct hyp *hyp, size_t regidx, vm_offset_t start,
	+ size_t size, mem_region_read_t read_fn, mem_region_write_t write_fn)
	+{
	+ hyp->vgic_mmio_regions[regidx] = (struct vgic_mmio_region) {
	+ .start = start,
	+ .end = start + size,
	+ .read = read_fn,
	+ .write = write_fn,
	+ };
	+}
	+
	+static void
	+dist_mmio_init_regions(struct vgic_v3_dist dist, struct hyp hyp)
	+{
	+ size_t n;
	+ size_t region_size;
	+
	+ init_mmio_region(hyp, VGIC_GICD_CTLR, dist->start + GICD_CTLR,
	+ sizeof(dist->gicd_ctlr), dist_ctlr_read, dist_ctlr_write);
	+ init_mmio_region(hyp, VGIC_GICD_TYPER, dist->start + GICD_TYPER,
	+ sizeof(dist->gicd_typer), dist_typer_read, dist_typer_write);
	+
	+ n = div_round_up(dist->nirqs, 32);
	+ init_mmio_region(hyp, VGIC_GICD_IGROUPR, dist->start + GICD_IGROUPR_BASE,
	+ n * sizeof(uint32_t), dist_igroupr_read, dist_igroupr_write);
	+
	+ /* ARM GIC Architecture Specification, page 8-471. */
	+ n = (dist->gicd_typer & GICD_TYPER_ITLINESNUM_MASK) + 1;
	+ alloc_registers(dist->gicd_ixenabler, n , region_size);
	+ init_mmio_region(hyp, VGIC_GICD_ISENABLER, dist->start + GICD_ISENABLER_BASE,
	+ region_size, dist_isenabler_read, dist_isenabler_write);
	+ init_mmio_region(hyp, VGIC_GICD_ICENABLER, dist->start + GICD_ICENABLER_BASE,
	+ region_size, dist_icenabler_read, dist_icenabler_write);
	+
	+ /* ARM GIC Architecture Specification, page 8-483. */
	+ n = 8 * ((dist->gicd_typer & GICD_TYPER_ITLINESNUM_MASK) + 1);
	+ alloc_registers(dist->gicd_ipriorityr, n, region_size);
	+ init_mmio_region(hyp, VGIC_GICD_IPRIORITYR, dist->start + GICD_IPRIORITYR_BASE,
	+ region_size, dist_ipriorityr_read, dist_ipriorityr_write);
	+
	+ n = div_round_up(dist->nirqs, 16);
	+ alloc_registers(dist->gicd_icfgr, n, region_size);
	+ init_mmio_region(hyp, VGIC_GICD_ICFGR, dist->start + GICD_ICFGR_BASE,
	+ region_size, dist_icfgr_read, dist_icfgr_write);
	+
	+ /* ARM GIC Architecture Specification, page 8-485. */
	+ n = 32 * (dist->gicd_typer & GICD_TYPER_ITLINESNUM_MASK + 1) - 1;
	+ alloc_registers(dist->gicd_irouter, n, region_size);
	+ init_mmio_region(hyp, VGIC_GICD_IROUTER, dist->start + GICD_IROUTER_BASE,
	+ region_size, dist_irouter_read, dist_irouter_write);
	+
	+ init_mmio_region(hyp, VGIC_GICD_PIDR2, dist->start + GICD_PIDR2,
	+ sizeof(dist->gicd_pidr2), dist_pidr2_read, dist_pidr2_write);
	+}
	+
	+static void
	+redist_mmio_init_regions(struct hyp *hyp, int vcpuid)
	+{
	+ struct vgic_v3_redist *redist;
	+ vm_offset_t start;
	+
	+ redist = &hyp->ctx[vcpuid].vgic_redist;
	+ start = redist->start + GICR_FRAME_RD + GICR_CTLR;
	+ /*
	+ hyp->vgic_mmio_regions[VGIC_GICR_CTLR] = (struct vgic_mmio_region) {
	+ .start = start,
	+ .end = start + sizeof(redist->gicr_ctlr),
	+ .read = redist_ctlr_read,
	+ .write = redist_ctlr_write,
	+ };
	+ */
	+ init_mmio_region(hyp, VGIC_GICR_CTLR, start, sizeof(redist->gicr_ctlr),
	+ redist_ctlr_read, redist_ctlr_write);
	+
	+ start = redist->start + GICR_FRAME_RD + GICR_TYPER;
	+ init_mmio_region(hyp, VGIC_GICR_TYPER, start, sizeof(redist->gicr_typer),
	+ redist_typer_read, redist_typer_write);
	+
	+ start = redist->start + GICR_FRAME_RD + GICR_WAKER;
	+ init_mmio_region(hyp, VGIC_GICR_WAKER, start, 4, redist_waker_read,
	+ redist_waker_write);
	+
	+ start = redist->start + GICR_FRAME_RD + GICR_PIDR2;
	+ init_mmio_region(hyp, VGIC_GICR_PIDR2, start, 4, redist_pidr2_read,
	+ redist_pidr2_write);
	+
	+ start = redist->start + GICR_FRAME_SGI + GICR_IGROUPR0;
	+ init_mmio_region(hyp, VGIC_GICR_IGROUPR0, start,
	+ sizeof(uint32_t), redist_igroupr0_read, redist_igroupr0_write);
	+
	+ start = redist->start + GICR_FRAME_SGI + GICR_ISENABLER0;
	+ init_mmio_region(hyp, VGIC_GICR_ISENABLER0, start,
	+ sizeof(redist->gicr_ixenabler0), redist_isenabler0_read,
	+ redist_isenabler0_write);
	+
	+ start = redist->start + GICR_FRAME_SGI + GICR_ICENABLER0;
	+ init_mmio_region(hyp, VGIC_GICR_ICENABLER0, start,
	+ sizeof(redist->gicr_ixenabler0), redist_icenabler0_read,
	+ redist_icenabler0_write);
	+
	+ start = redist->start + GICR_FRAME_SGI + GICR_IPRIORITYR_BASE;
	+ init_mmio_region(hyp, VGIC_GICR_IPRIORITYR, start,
	+ sizeof(redist->gicr_ipriorityr), redist_ipriorityr_read,
	+ redist_ipriorityr_write);
	+
	+ start = redist->start + GICR_FRAME_SGI + GICR_ICFGR0_BASE;
	+ init_mmio_region(hyp, VGIC_GICR_ICFGR0, start,
	+ sizeof(redist->gicr_icfgr0), redist_icfgr0_read, redist_icfgr0_write);
	+
	+ start = redist->start + GICR_FRAME_SGI + GICR_ICFGR1_BASE;
	+ init_mmio_region(hyp, VGIC_GICR_ICFGR1, start,
	+ sizeof(redist->gicr_icfgr1), redist_icfgr1_read, redist_icfgr1_write);
	+}
	+
	+void
	+vgic_v3_mmio_init(struct hyp *hyp)
	+{
	+ struct vgic_v3_dist *dist = &hyp->vgic_dist;
	+ int redist_region_num, dist_region_num, region_num;
	+ int ncpus = 1;
	+
	+ dist_region_num = FIRST_REDIST_MMIO_REGION;
	+ redist_region_num = \
	+ ncpus * (VGIC_MMIO_REGIONS_NUM - FIRST_REDIST_MMIO_REGION);
	+ region_num = dist_region_num + redist_region_num;
	+
	+ hyp->vgic_mmio_regions = \
	+ malloc(region_num * sizeof(*hyp->vgic_mmio_regions),
	+ M_VGIC_V3_MMIO, M_WAITOK \| M_ZERO);
	+ hyp->vgic_mmio_regions_num = region_num;
	+
	+ dist_mmio_init_regions(dist, hyp);
	+
	+ /* TODO: Do it for all VCPUs */
	+ redist_mmio_init_regions(hyp, 0);
	+}
	+
	+void
	+vgic_v3_mmio_destroy(struct hyp *hyp)
	+{
	+ struct vgic_v3_dist *dist = &hyp->vgic_dist;
	+
	+ if (!hyp->vgic_mmio_regions)
	+ return;
	+ free(hyp->vgic_mmio_regions, M_VGIC_V3_MMIO);
	+
	+ free(dist->gicd_ixenabler, M_VGIC_V3_MMIO);
	+ free(dist->gicd_ipriorityr, M_VGIC_V3_MMIO);
	+ free(dist->gicd_icfgr, M_VGIC_V3_MMIO);
	+ free(dist->gicd_irouter, M_VGIC_V3_MMIO);
	+}
	Index: sys/arm64/vmm/io/vgic_v3_reg.h
	===================================================================
	--- /dev/null
	+++ sys/arm64/vmm/io/vgic_v3_reg.h
	@@ -0,0 +1,97 @@
	+#ifndef _VGIC_V3_REG_H_
	+#define _VGIC_V3_REG_H_
	+
	+/* Interrupt Controller End of Interrupt Status Register */
	+#define ICH_EISR_EL2_STATUS_MASK 0xffff
	+#define ICH_EISR_EL2_EOI_NOT_HANDLED(lr) ((1 << lr) & ICH_EISR_EL2_STATUS_MASK)
	+
	+/* Interrupt Controller Empty List Register Status Register */
	+#define ICH_ELRSR_EL2_STATUS_MASK 0xffff
	+#define ICH_ELRSR_EL2_LR_EMPTY(x) ((1 << x) & ICH_ELRSR_EL2_STATUS_MASK)
	+
	+/* Interrupt Controller Hyp Control Register */
	+#define ICH_HCR_EL2_EOICOUNT_SHIFT 27
	+#define ICH_HCR_EL2_EOICOUNT_MASK (0x1f << ICH_HCR_EL2_EOICOUNT_SHIFT)
	+#define ICH_HCR_EL2_TDIR (1 << 14) /* Trap non-secure EL1 writes to IC{C, V}_DIR_EL1 */
	+#define ICH_HCR_EL2_TSEI (1 << 14) /* Trap System Error Interupts (SEI) to EL2 */
	+#define ICH_HCR_EL2_TALL1 (1 << 12) /* Trap non-secure EL1 accesses to IC{C, V}_* for Group 1 interrupts */
	+#define ICH_HCR_EL2_TALL0 (1 << 11) /* Trap non-secure EL1 accesses to IC{C, V}_* for Group 0 interrupts */
	+#define ICH_HCR_EL2_TC (1 << 10) /* Trap non-secure EL1 accesses to common IC{C, V}_* registers */
	+#define ICH_HCR_EL2_VGRP1DIE (1 << 7) /* VM Group 1 Disabled Interrupt Enable */
	+#define ICH_HCR_EL2_VGRP1EIE (1 << 6) /* VM Group 1 Enabled Interrupt Enable */
	+#define ICH_HCR_EL2_VGRP0DIE (1 << 5) /* VM Group 0 Disabled Interrupt Enable */
	+#define ICH_HCR_EL2_VGRP0EIE (1 << 4) /* VM Group 0 Enabled Interrupt Enable */
	+#define ICH_HCR_EL2_NPIE (1 << 3) /* No Pending Interrupt Enable */
	+#define ICH_HCR_EL2_LRENPIE (1 << 2) /* List Register Entry Not Present Interrupt Enable */
	+#define ICH_HCR_EL2_UIE (1 << 1) /* Underflow Interrupt Enable */
	+#define ICH_HCR_EL2_En (1 << 0) /* Global enable for the virtual CPU interface */
	+
	+/* Interrupt Controller List Registers */
	+#define ICH_LR_EL2_VINTID_MASK 0xffffffff
	+#define ICH_LR_EL2_VINTID(x) ((x) & ICH_LR_EL2_VINTID_MASK)
	+#define ICH_LR_EL2_PINTID_SHIFT 32
	+#define ICH_LR_EL2_PINTID_MASK (0x3fUL << ICH_LR_EL2_PINTID_SHIFT)
	+#define ICH_LR_EL2_PRIO_SHIFT 48
	+#define ICH_LR_EL2_PRIO_MASK (0xffUL << ICH_LR_EL2_PRIO_SHIFT)
	+#define ICH_LR_EL2_GROUP_SHIFT 60
	+#define ICH_LR_EL2_GROUP1 (1UL << ICH_LR_EL2_GROUP_SHIFT)
	+#define ICH_LR_EL2_HW (1UL << 61)
	+#define ICH_LR_EL2_STATE_SHIFT 62
	+#define ICH_LR_EL2_STATE_MASK (0x3UL << ICH_LR_EL2_STATE_SHIFT)
	+#define ICH_LR_EL2_STATE(x) ((x) & ICH_LR_EL2_STATE_MASK)
	+#define ICH_LR_EL2_STATE_INACTIVE (0x0UL << ICH_LR_EL2_STATE_SHIFT)
	+#define ICH_LR_EL2_STATE_PENDING (0x1UL << ICH_LR_EL2_STATE_SHIFT)
	+#define ICH_LR_EL2_STATE_ACTIVE (0x2UL << ICH_LR_EL2_STATE_SHIFT)
	+#define ICH_LR_EL2_STATE_PENDING_ACTIVE (0x3UL << ICH_LR_EL2_STATE_SHIFT)
	+
	+/* Interrupt Controller Maintenance Interrupt State Register */
	+#define ICH_MISR_EL2_VGRP1D (1 << 7) /* vPE Group 1 Disabled */
	+#define ICH_MISR_EL2_VGRP1E (1 << 6) /* vPE Group 1 Enabled */
	+#define ICH_MISR_EL2_VGRP0D (1 << 5) /* vPE Group 0 Disabled */
	+#define ICH_MISR_EL2_VGRP0E (1 << 4) /* vPE Group 0 Enabled */
	+#define ICH_MISR_EL2_NP (1 << 3) /* No Pending */
	+#define ICH_MISR_EL2_LRENP (1 << 2) /* List Register Entry Not Present */
	+#define ICH_MISR_EL2_U (1 << 1) /* Underflow */
	+#define ICH_MISR_EL2_EOI (1 << 0) /* End Of Interrupt */
	+
	+/* Interrupt Controller Virtual Machine Control Register */
	+#define ICH_VMCR_EL2_VPMR_SHIFT 24
	+#define ICH_VMCR_EL2_VPMR_MASK (0xff << ICH_VMCR_EL2_VPMR_SHIFT)
	+#define ICH_VMCR_EL2_VPMR_PRIO_LOWEST (0xff << ICH_VMCR_EL2_VPMR_SHIFT)
	+#define ICH_VMCR_EL2_VPMR_PRIO_HIGHEST (0x00 << ICH_VMCR_EL2_VPMR_SHIFT)
	+#define ICH_VMCR_EL2_VBPR0_SHIFT 21
	+#define ICH_VMCR_EL2_VBPR0_MASK (0x7 << ICH_VMCR_EL2_VBPR0_SHIFT)
	+#define ICH_VMCR_EL2_VBPR0_NO_PREEMPTION \
	+ (0x7 << ICH_VMCR_EL2_VBPR0_SHIFT)
	+#define ICH_VMCR_EL2_VBPR1_SHIFT 18
	+#define ICH_VMCR_EL2_VBPR1_MASK (0x7 << ICH_VMCR_EL2_VBPR1_SHIFT)
	+#define ICH_VMCR_EL2_VBPR1_NO_PREEMPTION \
	+ (0x7 << ICH_VMCR_EL2_VBPR1_SHIFT)
	+#define ICH_VMCR_EL2_VEOIM (1 << 9) /* Virtual EOI mode */
	+#define ICH_VMCR_EL2_VCBPR (1 << 4) /* Virtual Common binary Point Register */
	+#define ICH_VMCR_EL2_VFIQEN (1 << 3) /* Virtual FIQ enable */
	+#define ICH_VMCR_EL2_VACKCTL (1 << 2) /* Virtual AckCtl */
	+#define ICH_VMCR_EL2_VENG1 (1 << 1) /* Virtual Group 1 Interrupt Enable */
	+#define ICH_VMCR_EL2_VENG0 (1 << 0) /* Virtual Group 0 Interrupt Enable */
	+
	+/* Interrupt Controller VGIC Type Register */
	+#define ICH_VTR_EL2_PRIBITS_SHIFT 29
	+#define ICH_VTR_EL2_PRIBITS_MASK (0x7 << ICH_VTR_EL2_PRIBITS_SHIFT)
	+#define ICH_VTR_EL2_PRIBITS(x) \
	+ ((((x) & ICH_VTR_EL2_PRIBITS_MASK) >> ICH_VTR_EL2_PRIBITS_SHIFT) + 1)
	+#define ICH_VTR_EL2_PREBITS_SHIFT 26
	+#define ICH_VTR_EL2_PREBITS_MASK (0x7 << ICH_VTR_EL2_PREBITS_SHIFT)
	+#define ICH_VTR_EL2_PREBITS(x) \
	+ (((x) & ICH_VTR_EL2_PREBITS_MASK) >> ICH_VTR_EL2_PREBITS_SHIFT)
	+#define ICH_VTR_EL2_SEIS (1 << 22) /* System Error Interrupt (SEI) Support */
	+#define ICH_VTR_EL2_A3V (1 << 21) /* Affinity 3 Valid */
	+#define ICH_VTR_EL2_NV4 (1 << 20) /* Direct injection of virtual interrupts. RES1 for GICv3 */
	+#define ICH_VTR_EL2_TDS (1 << 19) /* Implementation supports ICH_HCR_EL2.TDIR */
	+#define ICH_VTR_EL2_LISTREGS_MASK 0x1f
	+/*
	+ * ICH_VTR_EL2.ListRegs holds the number of list registers, minus one. Add one
	+ * to get the actual number of list registers.
	+ */
	+#define ICH_VTR_EL2_LISTREGS(x) (((x) & ICH_VTR_EL2_LISTREGS_MASK) + 1)
	+
	+#endif /* !_VGIC_V3_REG_H_ */
	Index: sys/arm64/vmm/io/vtimer.h
	===================================================================
	--- /dev/null
	+++ sys/arm64/vmm/io/vtimer.h
	@@ -0,0 +1,112 @@
	+/*-
	+ * Copyright (c) 2017 The FreeBSD Foundation
	+ *
	+ * Redistribution and use in source and binary forms, with or without
	+ * modification, are permitted provided that the following conditions
	+ * are met:
	+ * 1. Redistributions of source code must retain the above copyright
	+ * notice, this list of conditions and the following disclaimer.
	+ * 2. Redistributions in binary form must reproduce the above copyright
	+ * notice, this list of conditions and the following disclaimer in the
	+ * documentation and/or other materials provided with the distribution.
	+ * 3. The name of the company nor the name of the author may be used to
	+ * endorse or promote products derived from this software without specific
	+ * prior written permission.
	+ *
	+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
	+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
	+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	+ * SUCH DAMAGE.
	+ */
	+
	+#ifndef _VMM_VTIMER_H_
	+#define _VMM_VTIMER_H_
	+
	+#define GT_PHYS_NS_IRQ 30
	+#define GT_VIRT_IRQ 27
	+
	+#define CNTP_CTL_EL0_OP0 0b11
	+#define CNTP_CTL_EL0_OP2 0b001
	+#define CNTP_CTL_EL0_OP1 0b011
	+#define CNTP_CTL_EL0_CRn 0b1110
	+#define CNTP_CTL_EL0_CRm 0b0010
	+#define ISS_CNTP_CTL_EL0 \
	+ (CNTP_CTL_EL0_OP0 << ISS_MSR_OP0_SHIFT \| \
	+ CNTP_CTL_EL0_OP2 << ISS_MSR_OP2_SHIFT \| \
	+ CNTP_CTL_EL0_OP1 << ISS_MSR_OP1_SHIFT \| \
	+ CNTP_CTL_EL0_CRn << ISS_MSR_CRn_SHIFT \| \
	+ CNTP_CTL_EL0_CRm << ISS_MSR_CRm_SHIFT)
	+
	+#define CNTP_CVAL_EL0_OP0 0b11
	+#define CNTP_CVAL_EL0_OP1 0b011
	+#define CNTP_CVAL_EL0_OP2 0b010
	+#define CNTP_CVAL_EL0_CRn 0b1110
	+#define CNTP_CVAL_EL0_CRm 0b0010
	+#define ISS_CNTP_CVAL_EL0 \
	+ (CNTP_CVAL_EL0_OP0 << ISS_MSR_OP0_SHIFT \| \
	+ CNTP_CVAL_EL0_OP2 << ISS_MSR_OP2_SHIFT \| \
	+ CNTP_CVAL_EL0_OP1 << ISS_MSR_OP1_SHIFT \| \
	+ CNTP_CVAL_EL0_CRn << ISS_MSR_CRn_SHIFT \| \
	+ CNTP_CVAL_EL0_CRm << ISS_MSR_CRm_SHIFT)
	+
	+#define CNTP_TVAL_EL0_OP0 0b11
	+#define CNTP_TVAL_EL0_OP1 0b011
	+#define CNTP_TVAL_EL0_OP2 0b000
	+#define CNTP_TVAL_EL0_CRn 0b1110
	+#define CNTP_TVAL_EL0_CRm 0b0010
	+#define ISS_CNTP_TVAL_EL0 \
	+ (CNTP_TVAL_EL0_OP0 << ISS_MSR_OP0_SHIFT \| \
	+ CNTP_TVAL_EL0_OP2 << ISS_MSR_OP2_SHIFT \| \
	+ CNTP_TVAL_EL0_OP1 << ISS_MSR_OP1_SHIFT \| \
	+ CNTP_TVAL_EL0_CRn << ISS_MSR_CRn_SHIFT \| \
	+ CNTP_TVAL_EL0_CRm << ISS_MSR_CRm_SHIFT)
	+
	+struct vtimer
	+{
	+ uint64_t cnthctl_el2;
	+ uint64_t cntvoff_el2;
	+};
	+
	+struct vtimer_cpu
	+{
	+ struct callout callout;
	+ uint32_t cntkctl_el1;
	+ /*
	+ * Emulated registers:
	+ *
	+ * CNTP_CTL_EL0: Counter-timer Physical Timer Control Register
	+ * CNTP_CVAL_EL0: Counter-timer Physical Timer CompareValue Register
	+ */
	+ uint64_t cntp_cval_el0;
	+ uint32_t cntp_ctl_el0;
	+ /*
	+ * The virtual machine has full access to the virtual timer. The
	+ * following registers are part of the VM context for the current CPU:
	+ *
	+ * CNTV_CTL_EL0: Counter-timer Virtuel Timer Control Register
	+ * CNTV_CVAL_EL0: Counter-timer Virtual Timer CompareValue Register
	+ */
	+ uint64_t cntv_cval_el0;
	+ uint32_t cntv_ctl_el0;
	+};
	+
	+int vtimer_init(uint64_t cnthctl_el2);
	+void vtimer_vminit(void *arg);
	+void vtimer_cpuinit(void *arg);
	+void vtimer_vmcleanup(void *arg);
	+void vtimer_cleanup(void);
	+
	+int vtimer_phys_ctl_read(void vm, int vcpuid, uint64_t rval, void *arg);
	+int vtimer_phys_ctl_write(void vm, int vcpuid, uint64_t wval, void arg);
	+int vtimer_phys_cval_read(void vm, int vcpuid, uint64_t rval, void *arg);
	+int vtimer_phys_cval_write(void vm, int vcpuid, uint64_t wval, void arg);
	+int vtimer_phys_tval_read(void vm, int vcpuid, uint64_t rval, void *arg);
	+int vtimer_phys_tval_write(void vm, int vcpuid, uint64_t wval, void arg);
	+#endif
	Index: sys/arm64/vmm/io/vtimer.c
	===================================================================
	--- /dev/null
	+++ sys/arm64/vmm/io/vtimer.c
	@@ -0,0 +1,407 @@
	+/*-
	+ * Copyright (c) 2017 The FreeBSD Foundation
	+ * All rights reserved.
	+ *
	+ * Redistribution and use in source and binary forms, with or without
	+ * modification, are permitted provided that the following conditions
	+ * are met:
	+ * 1. Redistributions of source code must retain the above copyright
	+ * notice, this list of conditions and the following disclaimer.
	+ * 2. Redistributions in binary form must reproduce the above copyright
	+ * notice, this list of conditions and the following disclaimer in the
	+ * documentation and/or other materials provided with the distribution.
	+ * 3. The name of the company nor the name of the author may be used to
	+ * endorse or promote products derived from this software without specific
	+ * prior written permission.
	+ *
	+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
	+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
	+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	+ * SUCH DAMAGE.
	+ */
	+
	+#include <sys/types.h>
	+#include <sys/bus.h>
	+#include <sys/mutex.h>
	+#include <sys/systm.h>
	+#include <sys/time.h>
	+#include <sys/timeet.h>
	+#include <sys/timetc.h>
	+
	+#include <machine/bus.h>
	+#include <machine/vmm.h>
	+#include <machine/armreg.h>
	+
	+#include <arm/arm/generic_timer.h>
	+#include <arm64/vmm/arm64.h>
	+
	+#include "vgic_v3.h"
	+#include "vtimer.h"
	+
	+#define RES1 0xffffffffffffffffUL
	+
	+#define timer_enabled(ctl) \
	+ (!((ctl) & CNTP_CTL_IMASK) && ((ctl) & CNTP_CTL_ENABLE))
	+
	+static uint64_t cnthctl_el2_reg;
	+static uint32_t tmr_frq;
	+
	+#define timer_condition_met(ctl) ((ctl) & CNTP_CTL_ISTATUS)
	+
	+static int
	+vtimer_virtual_timer_intr(void *arg)
	+{
	+ struct hypctx *hypctx;
	+ uint32_t cntv_ctl;
	+
	+ /*
	+ * TODO everything here is very strange. The relantionship between the
	+ * hardware value and the value in memory is not clear at all.
	+ */
	+
	+ hypctx = arm64_get_active_vcpu();
	+ cntv_ctl = READ_SPECIALREG(cntv_ctl_el0);
	+
	+ if (!hypctx) {
	+ /* vm_destroy() was called. */
	+ eprintf("No active vcpu\n");
	+ cntv_ctl = READ_SPECIALREG(cntv_ctl_el0);
	+ goto out;
	+ }
	+ if (!timer_enabled(cntv_ctl)) {
	+ eprintf("Timer not enabled\n");
	+ goto out;
	+ }
	+ if (!timer_condition_met(cntv_ctl)) {
	+ eprintf("Timer condition not met\n");
	+ goto out;
	+ }
	+
	+ vgic_v3_inject_irq(hypctx, GT_VIRT_IRQ, VGIC_IRQ_CLK);
	+
	+ hypctx->vtimer_cpu.cntv_ctl_el0 &= ~CNTP_CTL_ENABLE;
	+ cntv_ctl = hypctx->vtimer_cpu.cntv_ctl_el0;
	+
	+out:
	+ /*
	+ * Disable the timer interrupt. This will prevent the interrupt from
	+ * being reasserted as soon as we exit the handler and getting stuck
	+ * in an infinite loop.
	+ *
	+ * This is safe to do because the guest disabled the timer, and then
	+ * enables it as part of the interrupt handling routine.
	+ */
	+ cntv_ctl &= ~CNTP_CTL_ENABLE;
	+ WRITE_SPECIALREG(cntv_ctl_el0, cntv_ctl);
	+
	+ return (FILTER_HANDLED);
	+}
	+
	+int
	+vtimer_init(uint64_t cnthctl_el2)
	+{
	+ int error;
	+
	+ cnthctl_el2_reg = cnthctl_el2;
	+ /*
	+ * The guest MUST use the same timer frequency as the host. The
	+ * register CNTFRQ_EL0 is accessible to the guest and a different value
	+ * in the guest dts file might have unforseen consequences.
	+ */
	+ tmr_frq = READ_SPECIALREG(cntfrq_el0);
	+
	+ error = arm_tmr_setup_intr(GT_VIRT, vtimer_virtual_timer_intr, NULL, NULL);
	+ if (error) {
	+ printf("WARNING: arm_tmr_setup_intr() error: %d\n", error);
	+ printf("WARNING: Expect reduced performance\n");
	+ }
	+
	+ return (0);
	+}
	+
	+void
	+vtimer_vminit(void *arg)
	+{
	+ struct hyp *hyp;
	+ uint64_t now;
	+
	+ hyp = (struct hyp *)arg;
	+ /*
	+ * Configure the Counter-timer Hypervisor Control Register for the VM.
	+ *
	+ * ~CNTHCTL_EL1PCEN: trap access to CNTP_{CTL, CVAL, TVAL}_EL0 from EL1
	+ * CNTHCTL_EL1PCTEN: don't trap access to CNTPCT_EL0
	+ */
	+ hyp->vtimer.cnthctl_el2 = cnthctl_el2_reg & ~CNTHCTL_EL1PCEN;
	+ hyp->vtimer.cnthctl_el2 \|= CNTHCTL_EL1PCTEN;
	+
	+ now = READ_SPECIALREG(cntpct_el0);
	+ hyp->vtimer.cntvoff_el2 = now;
	+
	+ return;
	+}
	+
	+void
	+vtimer_cpuinit(void *arg)
	+{
	+ struct hypctx *hypctx;
	+ struct vtimer_cpu *vtimer_cpu;
	+
	+ hypctx = (struct hypctx *)arg;
	+ vtimer_cpu = &hypctx->vtimer_cpu;
	+ /*
	+ * Configure physical timer interrupts for the VCPU.
	+ *
	+ * CNTP_CTL_IMASK: mask interrupts
	+ * ~CNTP_CTL_ENABLE: disable the timer
	+ */
	+ vtimer_cpu->cntp_ctl_el0 = CNTP_CTL_IMASK & ~CNTP_CTL_ENABLE;
	+ /*
	+ * Callout function is MP_SAFE because the VGIC uses a spin
	+ * mutex when modifying the list registers.
	+ */
	+ callout_init(&vtimer_cpu->callout, 1);
	+}
	+
	+void
	+vtimer_vmcleanup(void *arg)
	+{
	+ struct hyp *hyp;
	+ struct hypctx *hypctx;
	+ struct vtimer *vtimer;
	+ struct vtimer_cpu *vtimer_cpu;
	+ uint32_t cntv_ctl;
	+ int i;
	+
	+ hyp = arg;
	+ vtimer = &hyp->vtimer;
	+
	+ hypctx = arm64_get_active_vcpu();
	+ if (!hypctx) {
	+ /* The active VM was destroyed, stop the timer. */
	+ cntv_ctl = READ_SPECIALREG(cntv_ctl_el0);
	+ cntv_ctl &= ~CNTP_CTL_ENABLE;
	+ WRITE_SPECIALREG(cntv_ctl_el0, cntv_ctl);
	+ }
	+
	+ for (i = 0; i < VM_MAXCPU; i++) {
	+ vtimer_cpu = &hyp->ctx[i].vtimer_cpu;
	+ callout_drain(&vtimer_cpu->callout);
	+ }
	+}
	+
	+void
	+vtimer_cleanup(void)
	+{
	+ int error;
	+
	+ error = arm_tmr_teardown_intr(GT_VIRT);
	+ if (error)
	+ printf("WARNING: arm_tmr_teardown_intr() error: %d\n", error);
	+
	+}
	+
	+static void
	+vtimer_inject_irq_callout_func(void *context)
	+{
	+ struct hypctx *hypctx;
	+
	+ hypctx = context;
	+ vgic_v3_inject_irq(hypctx, GT_PHYS_NS_IRQ, VGIC_IRQ_CLK);
	+}
	+
	+
	+static void
	+vtimer_schedule_irq(struct vtimer_cpu vtimer_cpu, struct hypctx hypctx)
	+{
	+ sbintime_t time;
	+ uint64_t cntpct_el0;
	+ uint64_t diff;
	+
	+ cntpct_el0 = READ_SPECIALREG(cntpct_el0);
	+ if (vtimer_cpu->cntp_cval_el0 < cntpct_el0) {
	+ /* Timer set in the past, trigger interrupt */
	+ vgic_v3_inject_irq(hypctx, GT_PHYS_NS_IRQ, VGIC_IRQ_CLK);
	+ } else {
	+ diff = vtimer_cpu->cntp_cval_el0 - cntpct_el0;
	+ time = diff * SBT_1S / tmr_frq;
	+ callout_reset_sbt(&vtimer_cpu->callout, time, 0,
	+ vtimer_inject_irq_callout_func, hypctx, 0);
	+ }
	+}
	+
	+static void
	+vtimer_remove_irq(struct hypctx *hypctx)
	+{
	+ struct vtimer_cpu *vtimer_cpu;
	+
	+ vtimer_cpu = &hypctx->vtimer_cpu;
	+
	+ callout_drain(&vtimer_cpu->callout);
	+ /*
	+ * The interrupt needs to be deactivated here regardless of the callout
	+ * function having been executed. The timer interrupt can be masked with
	+ * the CNTP_CTL_EL0.IMASK bit instead of reading the IAR register.
	+ * Masking the interrupt doesn't remove it from the list registers.
	+ */
	+ vgic_v3_remove_irq(hypctx, GT_PHYS_NS_IRQ, true);
	+}
	+
	+/*
	+ * Timer emulation functions.
	+ *
	+ * The guest dts is configured to use the physical timer because the Generic
	+ * Timer can only trap physical timer accesses. This is why we always read the
	+ * physical counter value when programming the time for the timer interrupt in
	+ * the guest.
	+ */
	+
	+int
	+vtimer_phys_ctl_read(void vm, int vcpuid, uint64_t rval, void *arg)
	+{
	+ struct hyp *hyp;
	+ struct vtimer_cpu *vtimer_cpu;
	+ uint64_t cntpct_el0;
	+ bool *retu = arg;
	+
	+ hyp = vm_get_cookie(vm);
	+ vtimer_cpu = &hyp->ctx[vcpuid].vtimer_cpu;
	+
	+ cntpct_el0 = READ_SPECIALREG(cntpct_el0);
	+ if (vtimer_cpu->cntp_cval_el0 < cntpct_el0)
	+ /* Timer condition met */
	+ *rval = vtimer_cpu->cntp_ctl_el0 \| CNTP_CTL_ISTATUS;
	+ else
	+ *rval = vtimer_cpu->cntp_ctl_el0 & ~CNTP_CTL_ISTATUS;
	+
	+ *retu = false;
	+ return (0);
	+}
	+
	+int
	+vtimer_phys_ctl_write(void vm, int vcpuid, uint64_t wval, void arg)
	+{
	+ struct hyp *hyp;
	+ struct hypctx *hypctx;
	+ struct vtimer_cpu *vtimer_cpu;
	+ uint64_t ctl_el0;
	+ bool timer_toggled_on;
	+ bool *retu = arg;
	+
	+ hyp = vm_get_cookie(vm);
	+ hypctx = &hyp->ctx[vcpuid];
	+ vtimer_cpu = &hypctx->vtimer_cpu;
	+
	+ timer_toggled_on = false;
	+ ctl_el0 = vtimer_cpu->cntp_ctl_el0;
	+
	+ if (!timer_enabled(ctl_el0) && timer_enabled(wval))
	+ timer_toggled_on = true;
	+
	+ vtimer_cpu->cntp_ctl_el0 = wval;
	+
	+ if (timer_toggled_on)
	+ vtimer_schedule_irq(vtimer_cpu, hypctx);
	+
	+ *retu = false;
	+ return (0);
	+}
	+
	+int
	+vtimer_phys_cval_read(void vm, int vcpuid, uint64_t rval, void *arg)
	+{
	+ struct hyp *hyp;
	+ struct vtimer_cpu *vtimer_cpu;
	+ bool *retu = arg;
	+
	+ hyp = vm_get_cookie(vm);
	+ vtimer_cpu = &hyp->ctx[vcpuid].vtimer_cpu;
	+
	+ *rval = vtimer_cpu->cntp_cval_el0;
	+
	+ *retu = false;
	+ return (0);
	+}
	+
	+int
	+vtimer_phys_cval_write(void vm, int vcpuid, uint64_t wval, void arg)
	+{
	+ struct hyp *hyp;
	+ struct hypctx *hypctx;
	+ struct vtimer_cpu *vtimer_cpu;
	+ bool *retu = arg;
	+
	+ hyp = vm_get_cookie(vm);
	+ hypctx = &hyp->ctx[vcpuid];
	+ vtimer_cpu = &hypctx->vtimer_cpu;
	+
	+ vtimer_cpu->cntp_cval_el0 = wval;
	+
	+ if (timer_enabled(vtimer_cpu->cntp_ctl_el0)) {
	+ vtimer_remove_irq(hypctx);
	+ vtimer_schedule_irq(vtimer_cpu, hypctx);
	+ }
	+
	+ *retu = false;
	+ return (0);
	+}
	+
	+int
	+vtimer_phys_tval_read(void vm, int vcpuid, uint64_t rval, void *arg)
	+{
	+ struct hyp *hyp;
	+ struct vtimer_cpu *vtimer_cpu;
	+ uint32_t cntpct_el0;
	+ bool *retu = arg;
	+
	+ hyp = vm_get_cookie(vm);
	+ vtimer_cpu = &hyp->ctx[vcpuid].vtimer_cpu;
	+
	+ if (!(vtimer_cpu->cntp_ctl_el0 & CNTP_CTL_ENABLE)) {
	+ /*
	+ * ARMv8 Architecture Manual, p. D7-2702: the result of reading
	+ * TVAL when the timer is disabled is UNKNOWN. I have chosen to
	+ * return the maximum value possible on 32 bits which means the
	+ * timer will fire very far into the future.
	+ */
	+ *rval = (uint32_t)RES1;
	+ } else {
	+ cntpct_el0 = READ_SPECIALREG(cntpct_el0);
	+ *rval = vtimer_cpu->cntp_cval_el0 - cntpct_el0;
	+ }
	+
	+ *retu = false;
	+ return (0);
	+}
	+
	+int
	+vtimer_phys_tval_write(void vm, int vcpuid, uint64_t wval, void arg)
	+{
	+ struct hyp *hyp;
	+ struct hypctx *hypctx;
	+ struct vtimer_cpu *vtimer_cpu;
	+ uint64_t cntpct_el0;
	+ bool *retu = arg;
	+
	+ hyp = vm_get_cookie(vm);
	+ hypctx = &hyp->ctx[vcpuid];
	+ vtimer_cpu = &hypctx->vtimer_cpu;
	+
	+ cntpct_el0 = READ_SPECIALREG(cntpct_el0);
	+ vtimer_cpu->cntp_cval_el0 = (int32_t)wval + cntpct_el0;
	+
	+ if (timer_enabled(vtimer_cpu->cntp_ctl_el0)) {
	+ vtimer_remove_irq(hypctx);
	+ vtimer_schedule_irq(vtimer_cpu, hypctx);
	+ }
	+
	+ *retu = false;
	+ return (0);
	+}
	Index: sys/arm64/vmm/mmu.h
	===================================================================
	--- /dev/null
	+++ sys/arm64/vmm/mmu.h
	@@ -0,0 +1,55 @@
	+/*
	+ * Copyright (C) 2017 Alexandru Elisei <alexandru.elisei@gmail.com>
	+ * All rights reserved.
	+ *
	+ * This software was developed by Alexandru Elisei under sponsorship
	+ * from the FreeBSD Foundation.
	+ *
	+ * Redistribution and use in source and binary forms, with or without
	+ * modification, are permitted provided that the following conditions
	+ * are met:
	+ * 1. Redistributions of source code must retain the above copyright
	+ * notice, this list of conditions and the following disclaimer.
	+ * 2. Redistributions in binary form must reproduce the above copyright
	+ * notice, this list of conditions and the following disclaimer in the
	+ * documentation and/or other materials provided with the distribution.
	+ *
	+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
	+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
	+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	+ * SUCH DAMAGE.
	+ */
	+
	+#ifndef _VMM_MMU_H_
	+#define _VMM_MMU_H_
	+
	+#include <machine/vmparam.h>
	+#include <machine/vmm.h>
	+
	+#include "hyp.h"
	+
	+#define ktohyp(kva) (((vm_offset_t)(kva) & HYP_KVA_MASK) \| \
	+ HYP_KVA_OFFSET)
	+#define ipatok(ipa, hypmap) (PHYS_TO_DMAP(pmap_extract(hypmap, (ipa))))
	+#define gtoipa(gva) ((gva) - KERNBASE + VM_GUEST_BASE_IPA)
	+
	+#define page_aligned(x) (((vm_offset_t)(x) & PAGE_MASK) == 0)
	+
	+void hypmap_init(pmap_t map, enum pmap_stage pm_stage);
	+void hypmap_map(pmap_t map, vm_offset_t va, size_t len,
	+ vm_prot_t prot);
	+void hypmap_map_identity(pmap_t map, vm_offset_t va, size_t len,
	+ vm_prot_t prot);
	+void hypmap_set(void *arg, vm_offset_t va, vm_offset_t pa,
	+ size_t len, vm_prot_t prot);
	+vm_paddr_t hypmap_get(void *arg, vm_offset_t va);
	+void hypmap_cleanup(pmap_t map);
	+
	+#endif
	Index: sys/arm64/vmm/mmu.c
	===================================================================
	--- /dev/null
	+++ sys/arm64/vmm/mmu.c
	@@ -0,0 +1,166 @@
	+/*
	+ * Copyright (C) 2017 Alexandru Elisei <alexandru.elisei@gmail.com>
	+ * All rights reserved.
	+ *
	+ * This software was developed by Alexandru Elisei under sponsorship
	+ * from the FreeBSD Foundation.
	+ *
	+ * Redistribution and use in source and binary forms, with or without
	+ * modification, are permitted provided that the following conditions
	+ * are met:
	+ * 1. Redistributions of source code must retain the above copyright
	+ * notice, this list of conditions and the following disclaimer.
	+ * 2. Redistributions in binary form must reproduce the above copyright
	+ * notice, this list of conditions and the following disclaimer in the
	+ * documentation and/or other materials provided with the distribution.
	+ *
	+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
	+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
	+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	+ * SUCH DAMAGE.
	+ */
	+#include <sys/types.h>
	+#include <sys/malloc.h>
	+#include <sys/lock.h>
	+#include <sys/mutex.h>
	+#include <vm/vm.h>
	+#include <vm/pmap.h>
	+#include <vm/vm_page.h>
	+#include <vm/vm_param.h>
	+#include <machine/vm.h>
	+#include <machine/vmm.h>
	+#include <machine/vmparam.h>
	+#include <machine/pmap.h>
	+
	+#include "mmu.h"
	+#include "arm64.h"
	+
	+MALLOC_DECLARE(M_HYP);
	+
	+void
	+hypmap_init(pmap_t map, enum pmap_stage pm_stage)
	+{
	+ mtx_init(&map->pm_mtx, "hypmap_pm_mtx", NULL, MTX_DEF);
	+ pmap_pinit_stage(map, pm_stage, 4);
	+}
	+
	+void
	+hypmap_map(pmap_t map, vm_offset_t va, size_t len, vm_prot_t prot)
	+{
	+ vm_offset_t va_end, hypva;
	+ vm_page_t dummy_page;
	+
	+ dummy_page = malloc(sizeof(*dummy_page), M_HYP, M_WAITOK \| M_ZERO);
	+ dummy_page->oflags = VPO_UNMANAGED;
	+ dummy_page->md.pv_memattr = VM_MEMATTR_DEFAULT;
	+
	+ /*
	+ * Add the physical pages which correspond to the specified virtual
	+ * addresses.The virtual addresses span contiguous virtual pages, but
	+ * they might not reside in contiguous physical pages.
	+ */
	+ va_end = va + len - 1;
	+ va = trunc_page(va);
	+ while (va < va_end) {
	+ dummy_page->phys_addr = vtophys(va);
	+ hypva = (va >= VM_MIN_KERNEL_ADDRESS) ? ktohyp(va) : va;
	+ pmap_enter(map, hypva, dummy_page, prot, PMAP_ENTER_WIRED, 0);
	+ va += PAGE_SIZE;
	+ }
	+
	+ free(dummy_page, M_HYP);
	+}
	+
	+void
	+hypmap_map_identity(pmap_t map, vm_offset_t va, size_t len,
	+ vm_prot_t prot)
	+{
	+ vm_offset_t va_end;
	+ vm_page_t dummy_page;
	+
	+ dummy_page = malloc(sizeof(*dummy_page), M_HYP, M_WAITOK \| M_ZERO);
	+ dummy_page->oflags = VPO_UNMANAGED;
	+ dummy_page->md.pv_memattr = VM_MEMATTR_DEFAULT;
	+
	+ /*
	+ * The virtual addresses span contiguous virtual pages, but they might
	+ * not reside in contiguous physical pages. For each virtual page we
	+ * get the physical page address and use that for the mapping.
	+ */
	+ va_end = va + len - 1;
	+ va = trunc_page(va);
	+ while (va < va_end) {
	+ dummy_page->phys_addr = vtophys(va);
	+ pmap_enter(map, dummy_page->phys_addr, dummy_page,
	+ prot, PMAP_ENTER_WIRED, 0);
	+ va += PAGE_SIZE;
	+ }
	+
	+ free(dummy_page, M_HYP);
	+}
	+
	+/*
	+ * Map 'len' bytes starting at virtual address 'va' to 'len' bytes
	+ * starting at physical address 'pa'
	+ */
	+void
	+hypmap_set(void *arg, vm_offset_t va, vm_offset_t pa, size_t len,
	+ vm_prot_t prot)
	+{
	+ vm_offset_t va_end, hypva;
	+ vm_page_t dummy_page;
	+ struct hyp *hyp;
	+ pmap_t map;
	+
	+ hyp = (struct hyp *)arg;
	+ map = hyp->stage2_map;
	+
	+ dummy_page = malloc(sizeof(*dummy_page), M_HYP, M_WAITOK \| M_ZERO);
	+ dummy_page->oflags = VPO_UNMANAGED;
	+ dummy_page->md.pv_memattr = VM_MEMATTR_DEFAULT;
	+
	+ va_end = va + len - 1;
	+ va = trunc_page(va);
	+ dummy_page->phys_addr = trunc_page(pa);
	+ while (va < va_end) {
	+ hypva = (va >= VM_MIN_KERNEL_ADDRESS) ? ktohyp(va) : va;
	+ pmap_enter(map, hypva, dummy_page, prot, PMAP_ENTER_WIRED, 0);
	+ va += PAGE_SIZE;
	+ dummy_page->phys_addr += PAGE_SIZE;
	+ }
	+
	+ free(dummy_page, M_HYP);
	+}
	+
	+/*
	+ * Return the physical address associated with virtual address 'va'
	+ */
	+vm_paddr_t
	+hypmap_get(void *arg, vm_offset_t va)
	+{
	+ struct hyp *hyp;
	+ pmap_t map;
	+
	+ hyp = (struct hyp *)arg;
	+ map = hyp->stage2_map;
	+
	+ return pmap_extract(map, va);
	+}
	+
	+/*
	+ * Remove all the mappings from the hyp translation tables
	+ */
	+void
	+hypmap_cleanup(pmap_t map)
	+{
	+ pmap_remove(map, HYP_VM_MIN_ADDRESS, HYP_VM_MAX_ADDRESS);
	+ mtx_destroy(&map->pm_mtx);
	+ pmap_release(map);
	+}
	Index: sys/arm64/vmm/psci.h
	===================================================================
	--- /dev/null
	+++ sys/arm64/vmm/psci.h
	@@ -0,0 +1,35 @@
	+/*
	+ * Copyright (C) 2018 Alexandru Elisei <alexandru.elisei@gmail.com>
	+ * All rights reserved.
	+ *
	+ * Redistribution and use in source and binary forms, with or without
	+ * modification, are permitted provided that the following conditions
	+ * are met:
	+ * 1. Redistributions of source code must retain the above copyright
	+ * notice, this list of conditions and the following disclaimer.
	+ * 2. Redistributions in binary form must reproduce the above copyright
	+ * notice, this list of conditions and the following disclaimer in the
	+ * documentation and/or other materials provided with the distribution.
	+ *
	+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
	+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
	+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	+ * SUCH DAMAGE.
	+ */
	+
	+#ifndef _PSCI_H_
	+#define _PSCI_H_
	+
	+#include "arm64.h"
	+
	+int psci_handle_call(struct vm vm, int vcpuid, struct vm_exit vme,
	+ bool *retu);
	+
	+#endif
	Index: sys/arm64/vmm/psci.c
	===================================================================
	--- /dev/null
	+++ sys/arm64/vmm/psci.c
	@@ -0,0 +1,97 @@
	+/*
	+ * Copyright (C) 2018 Alexandru Elisei <alexandru.elisei@gmail.com>
	+ * All rights reserved.
	+ *
	+ * Redistribution and use in source and binary forms, with or without
	+ * modification, are permitted provided that the following conditions
	+ * are met:
	+ * 1. Redistributions of source code must retain the above copyright
	+ * notice, this list of conditions and the following disclaimer.
	+ * 2. Redistributions in binary form must reproduce the above copyright
	+ * notice, this list of conditions and the following disclaimer in the
	+ * documentation and/or other materials provided with the distribution.
	+ *
	+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
	+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
	+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	+ * SUCH DAMAGE.
	+ */
	+
	+#include <sys/cdefs.h>
	+#include <sys/param.h>
	+#include <sys/systm.h>
	+#include <sys/kernel.h>
	+
	+#include <dev/psci/psci.h>
	+
	+#include "arm64.h"
	+#include "psci.h"
	+
	+#define PSCI_VERSION_0_2 0x2
	+
	+static int
	+psci_version(struct hypctx hypctx, bool retu)
	+{
	+
	+ hypctx->regs.x[0] = PSCI_VERSION_0_2;
	+
	+ *retu = false;
	+ return (0);
	+}
	+
	+static int
	+psci_system_off(struct vm_exit vme, bool retu)
	+{
	+ vme->u.suspended.how = VM_SUSPEND_POWEROFF;
	+ vme->exitcode = VM_EXITCODE_SUSPENDED;
	+
	+ *retu = true;
	+ return (0);
	+}
	+
	+int
	+psci_handle_call(struct vm vm, int vcpuid, struct vm_exit vme, bool *retu)
	+{
	+ struct hyp *hyp;
	+ struct hypctx *hypctx;
	+ uint64_t func_id;
	+ uint32_t esr_el2, esr_iss;
	+ int error;
	+
	+ hyp = vm_get_cookie(vm);
	+ hypctx = &hyp->ctx[vcpuid];
	+
	+ esr_el2 = hypctx->exit_info.esr_el2;
	+ esr_iss = esr_el2 & ESR_ELx_ISS_MASK;
	+
	+ if (esr_iss != 0) {
	+ eprintf("Malformed HVC instruction with immediate: 0x%x\n",
	+ esr_iss);
	+ error = 1;
	+ goto out;
	+ }
	+
	+ func_id = hypctx->regs.x[0];
	+ switch (func_id) {
	+ case PSCI_FNID_VERSION:
	+ error = psci_version(hypctx, retu);
	+ break;
	+ case PSCI_FNID_SYSTEM_OFF:
	+ error = psci_system_off(vme, retu);
	+ break;
	+ default:
	+ eprintf("Unimplemented PSCI function: 0x%016lx\n", func_id);
	+ hypctx->regs.x[0] = PSCI_RETVAL_NOT_SUPPORTED;
	+ error = 1;
	+ }
	+
	+out:
	+ return (error);
	+}
	Index: sys/arm64/vmm/reset.h
	===================================================================
	--- /dev/null
	+++ sys/arm64/vmm/reset.h
	@@ -0,0 +1,32 @@
	+/*
	+ * Copyright (C) 2018 Alexandru Elisei <alexandru.elisei@gmail.com>
	+ * All rights reserved.
	+ *
	+ * Redistribution and use in source and binary forms, with or without
	+ * modification, are permitted provided that the following conditions
	+ * are met:
	+ * 1. Redistributions of source code must retain the above copyright
	+ * notice, this list of conditions and the following disclaimer.
	+ * 2. Redistributions in binary form must reproduce the above copyright
	+ * notice, this list of conditions and the following disclaimer in the
	+ * documentation and/or other materials provided with the distribution.
	+ *
	+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
	+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
	+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	+ * SUCH DAMAGE.
	+ */
	+#ifndef _VMM_RESET_H_
	+#define _VMM_RESET_H_
	+
	+void reset_vm_el01_regs(void *vcpu);
	+void reset_vm_el2_regs(void *vcpu);
	+
	+#endif
	Index: sys/arm64/vmm/reset.c
	===================================================================
	--- /dev/null
	+++ sys/arm64/vmm/reset.c
	@@ -0,0 +1,139 @@
	+/*
	+ * Copyright (C) 2018 Alexandru Elisei <alexandru.elisei@gmail.com>
	+ * All rights reserved.
	+ *
	+ * Redistribution and use in source and binary forms, with or without
	+ * modification, are permitted provided that the following conditions
	+ * are met:
	+ * 1. Redistributions of source code must retain the above copyright
	+ * notice, this list of conditions and the following disclaimer.
	+ * 2. Redistributions in binary form must reproduce the above copyright
	+ * notice, this list of conditions and the following disclaimer in the
	+ * documentation and/or other materials provided with the distribution.
	+ *
	+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
	+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
	+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	+ * SUCH DAMAGE.
	+ */
	+
	+#include <sys/param.h>
	+#include <sys/kernel.h>
	+#include <sys/lock.h>
	+
	+#include <machine/armreg.h>
	+#include <machine/cpu.h>
	+#include <machine/hypervisor.h>
	+
	+#include "arm64.h"
	+#include "reset.h"
	+
	+/*
	+ * Make the architecturally UNKNOWN value 0. As a bonus, we don't have to
	+ * manually set all those RES0 fields.
	+ */
	+#define ARCH_UNKNOWN 0
	+#define set_arch_unknown(reg) (memset(&(reg), ARCH_UNKNOWN, sizeof(reg)))
	+
	+void
	+reset_vm_el01_regs(void *vcpu)
	+{
	+ struct hypctx *el2ctx;
	+
	+ el2ctx = vcpu;
	+
	+ set_arch_unknown(el2ctx->regs);
	+
	+ set_arch_unknown(el2ctx->actlr_el1);
	+ set_arch_unknown(el2ctx->afsr0_el1);
	+ set_arch_unknown(el2ctx->afsr1_el1);
	+ set_arch_unknown(el2ctx->amair_el1);
	+ set_arch_unknown(el2ctx->contextidr_el1);
	+ set_arch_unknown(el2ctx->cpacr_el1);
	+ set_arch_unknown(el2ctx->elr_el1);
	+ set_arch_unknown(el2ctx->esr_el1);
	+ set_arch_unknown(el2ctx->far_el1);
	+ set_arch_unknown(el2ctx->mair_el1);
	+ set_arch_unknown(el2ctx->par_el1);
	+
	+ /*
	+ * Guest starts with:
	+ * ~SCTLR_M: MMU off
	+ * ~SCTLR_C: data cache off
	+ * SCTLR_CP15BEN: memory barrier instruction enable from EL0; RAO/WI
	+ * ~SCTLR_I: instruction cache off
	+ */
	+ el2ctx->sctlr_el1 = SCTLR_RES1;
	+ el2ctx->sctlr_el1 &= ~SCTLR_M & ~SCTLR_C & ~SCTLR_I;
	+ el2ctx->sctlr_el1 \|= SCTLR_CP15BEN;
	+
	+ set_arch_unknown(el2ctx->sp_el0);
	+ set_arch_unknown(el2ctx->tcr_el1);
	+ set_arch_unknown(el2ctx->tpidr_el0);
	+ set_arch_unknown(el2ctx->tpidr_el1);
	+ set_arch_unknown(el2ctx->tpidrro_el0);
	+ set_arch_unknown(el2ctx->ttbr0_el1);
	+ set_arch_unknown(el2ctx->ttbr1_el1);
	+ set_arch_unknown(el2ctx->vbar_el1);
	+ set_arch_unknown(el2ctx->spsr_el1);
	+}
	+
	+void
	+reset_vm_el2_regs(void *vcpu)
	+{
	+ struct hypctx *el2ctx;
	+ uint64_t cpu_aff;
	+
	+ el2ctx = vcpu;
	+
	+ /*
	+ * Set the Hypervisor Configuration Register:
	+ *
	+ * HCR_RW: use AArch64 for EL1
	+ * HCR_BSU_IS: barrier instructions apply to the inner shareable
	+ * domain
	+ * HCR_SWIO: turn set/way invalidate into set/way clean and
	+ * invalidate
	+ * HCR_FB: broadcast maintenance operations
	+ * HCR_AMO: route physical SError interrupts to EL2
	+ * HCR_IMO: route physical IRQ interrupts to EL2
	+ * HCR_FMO: route physical FIQ interrupts to EL2
	+ * HCR_VM: use stage 2 translation
	+ */
	+ el2ctx->hcr_el2 = HCR_RW \| HCR_BSU_IS \| HCR_SWIO \| HCR_FB \| \
	+ HCR_VM \| HCR_AMO \| HCR_IMO \| HCR_FMO;
	+
	+ el2ctx->vmpidr_el2 = VMPIDR_EL2_RES1;
	+ /* The guest will detect a multi-core, single-threaded CPU */
	+ el2ctx->vmpidr_el2 &= ~VMPIDR_EL2_U & ~VMPIDR_EL2_MT;
	+ /* Only 24 bits of affinity, for a grand total of 16,777,216 cores. */
	+ cpu_aff = el2ctx->vcpu & (CPU_AFF0_MASK \| CPU_AFF1_MASK \| CPU_AFF2_MASK);
	+ el2ctx->vmpidr_el2 \|= cpu_aff;
	+
	+ /* Use the same CPU identification information as the host */
	+ el2ctx->vpidr_el2 = CPU_IMPL_TO_MIDR(CPU_IMPL_ARM);
	+ el2ctx->vpidr_el2 \|= CPU_VAR_TO_MIDR(0);
	+ el2ctx->vpidr_el2 \|= CPU_ARCH_TO_MIDR(0xf);
	+ el2ctx->vpidr_el2 \|= CPU_PART_TO_MIDR(CPU_PART_FOUNDATION);
	+ el2ctx->vpidr_el2 \|= CPU_REV_TO_MIDR(0);
	+
	+ /*
	+ * Don't trap accesses to CPACR_EL1, trace, SVE, Advanced SIMD
	+ * and floating point functionality to EL2.
	+ */
	+ el2ctx->cptr_el2 = CPTR_RES1;
	+ /*
	+ * Disable interrupts in the guest. The guest OS will re-enable
	+ * them.
	+ */
	+ el2ctx->spsr_el2 = PSR_D \| PSR_A \| PSR_I \| PSR_F;
	+ /* Use the EL1 stack when taking exceptions to EL1 */
	+ el2ctx->spsr_el2 \|= PSR_M_EL1h;
	+}
	Index: sys/arm64/vmm/vmm.c
	===================================================================
	--- /dev/null
	+++ sys/arm64/vmm/vmm.c
	@@ -0,0 +1,910 @@
	+/*
	+ * Copyright (C) 2015 Mihai Carabas <mihai.carabas@gmail.com>
	+ * All rights reserved.
	+ *
	+ * Redistribution and use in source and binary forms, with or without
	+ * modification, are permitted provided that the following conditions
	+ * are met:
	+ * 1. Redistributions of source code must retain the above copyright
	+ * notice, this list of conditions and the following disclaimer.
	+ * 2. Redistributions in binary form must reproduce the above copyright
	+ * notice, this list of conditions and the following disclaimer in the
	+ * documentation and/or other materials provided with the distribution.
	+ *
	+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
	+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
	+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	+ * SUCH DAMAGE.
	+ */
	+
	+#include <sys/cdefs.h>
	+#include <sys/param.h>
	+#include <sys/systm.h>
	+#include <sys/kernel.h>
	+#include <sys/module.h>
	+#include <sys/sysctl.h>
	+#include <sys/malloc.h>
	+#include <sys/pcpu.h>
	+#include <sys/lock.h>
	+#include <sys/mutex.h>
	+#include <sys/proc.h>
	+#include <sys/rwlock.h>
	+#include <sys/sched.h>
	+#include <sys/smp.h>
	+#include <sys/cpuset.h>
	+
	+#include <vm/vm.h>
	+#include <vm/vm_object.h>
	+#include <vm/vm_page.h>
	+#include <vm/pmap.h>
	+#include <vm/vm_map.h>
	+#include <vm/vm_extern.h>
	+#include <vm/vm_param.h>
	+
	+#include <machine/cpu.h>
	+#include <machine/vm.h>
	+#include <machine/pcb.h>
	+#include <machine/param.h>
	+#include <machine/smp.h>
	+#include <machine/vmparam.h>
	+#include <machine/vmm.h>
	+#include <machine/vmm_dev.h>
	+#include <machine/armreg.h>
	+
	+#include "vmm_stat.h"
	+#include "vmm_mem.h"
	+#include "arm64.h"
	+#include "mmu.h"
	+#include "psci.h"
	+
	+#include "io/vgic_v3.h"
	+#include "io/vtimer.h"
	+
	+#define BSP 0 /* the boostrap processor */
	+
	+struct vcpu {
	+ int flags;
	+ enum vcpu_state state;
	+ struct mtx mtx;
	+ int hostcpu; /* host cpuid this vcpu last ran on */
	+ int vcpuid;
	+ void *stats;
	+ struct vm_exit exitinfo;
	+ uint64_t nextpc; /* (x) next instruction to execute */
	+};
	+
	+#define vcpu_lock_initialized(v) mtx_initialized(&((v)->mtx))
	+#define vcpu_lock_init(v) mtx_init(&((v)->mtx), "vcpu lock", 0, MTX_SPIN)
	+#define vcpu_lock(v) mtx_lock_spin(&((v)->mtx))
	+#define vcpu_unlock(v) mtx_unlock_spin(&((v)->mtx))
	+#define vcpu_assert_locked(v) mtx_assert(&((v)->mtx), MA_OWNED)
	+
	+struct mem_seg {
	+ uint64_t gpa;
	+ size_t len;
	+ bool wired;
	+ vm_object_t object;
	+};
	+#define VM_MAX_MEMORY_SEGMENTS 2
	+
	+struct vm {
	+ void *cookie;
	+ struct vcpu vcpu[VM_MAXCPU];
	+ int num_mem_segs;
	+ struct vm_memory_segment mem_segs[VM_MAX_MEMORY_SEGMENTS];
	+ char name[VM_MAX_NAMELEN];
	+ /*
	+ * Set of active vcpus.
	+ * An active vcpu is one that has been started implicitly (BSP) or
	+ * explicitly (AP) by sending it a startup ipi.
	+ */
	+ cpuset_t active_cpus;
	+ uint16_t maxcpus;
	+};
	+
	+static bool vmm_initialized = false;
	+
	+static struct vmm_ops *ops = NULL;
	+
	+#define VMM_INIT(num) (ops != NULL ? (*ops->init)(num) : 0)
	+#define VMM_CLEANUP() (ops != NULL ? (*ops->cleanup)() : 0)
	+
	+#define VMINIT(vm) (ops != NULL ? (*ops->vminit)(vm): NULL)
	+#define VMRUN(vmi, vcpu, pc, pmap, rvc, sc) \
	+ (ops != NULL ? (*ops->vmrun)(vmi, vcpu, pc, pmap, rvc, sc) : ENXIO)
	+#define VMCLEANUP(vmi) (ops != NULL ? (*ops->vmcleanup)(vmi) : NULL)
	+#define VMMMAP_SET(vmi, ipa, pa, len, prot) \
	+ (ops != NULL ? \
	+ (*ops->vmmapset)(vmi, ipa, pa, len, prot) : ENXIO)
	+#define VMMMAP_GET(vmi, gpa) \
	+ (ops != NULL ? (*ops->vmmapget)(vmi, gpa) : ENXIO)
	+#define VMGETREG(vmi, vcpu, num, retval) \
	+ (ops != NULL ? (*ops->vmgetreg)(vmi, vcpu, num, retval) : ENXIO)
	+#define VMSETREG(vmi, vcpu, num, val) \
	+ (ops != NULL ? (*ops->vmsetreg)(vmi, vcpu, num, val) : ENXIO)
	+#define VMGETCAP(vmi, vcpu, num, retval) \
	+ (ops != NULL ? (*ops->vmgetcap)(vmi, vcpu, num, retval) : ENXIO)
	+#define VMSETCAP(vmi, vcpu, num, val) \
	+ (ops != NULL ? (*ops->vmsetcap)(vmi, vcpu, num, val) : ENXIO)
	+
	+#define fpu_start_emulating() load_cr0(rcr0() \| CR0_TS)
	+#define fpu_stop_emulating() clts()
	+
	+static int vm_handle_wfi(struct vm *vm, int vcpuid,
	+ struct vm_exit vme, bool retu);
	+
	+static MALLOC_DEFINE(M_VMM, "vmm", "vmm");
	+
	+/* statistics */
	+static VMM_STAT(VCPU_TOTAL_RUNTIME, "vcpu total runtime");
	+
	+SYSCTL_NODE(_hw, OID_AUTO, vmm, CTLFLAG_RW, NULL, NULL);
	+
	+/*
	+ * Halt the guest if all vcpus are executing a HLT instruction with
	+ * interrupts disabled.
	+ */
	+static int halt_detection_enabled = 1;
	+SYSCTL_INT(_hw_vmm, OID_AUTO, halt_detection, CTLFLAG_RDTUN,
	+ &halt_detection_enabled, 0,
	+ "Halt VM if all vcpus execute HLT with interrupts disabled");
	+
	+static int vmm_ipinum;
	+SYSCTL_INT(_hw_vmm, OID_AUTO, ipinum, CTLFLAG_RD, &vmm_ipinum, 0,
	+ "IPI vector used for vcpu notifications");
	+
	+static int trace_guest_exceptions;
	+SYSCTL_INT(_hw_vmm, OID_AUTO, trace_guest_exceptions, CTLFLAG_RDTUN,
	+ &trace_guest_exceptions, 0,
	+ "Trap into hypervisor on all guest exceptions and reflect them back");
	+
	+static void
	+vcpu_cleanup(struct vm *vm, int i, bool destroy)
	+{
	+// struct vcpu *vcpu = &vm->vcpu[i];
	+}
	+
	+static void
	+vcpu_init(struct vm *vm, uint32_t vcpu_id, bool create)
	+{
	+ struct vcpu *vcpu;
	+
	+ KASSERT(vcpu_id >= 0 && vcpu_id < vm->maxcpus,
	+ ("cpus_init: invalid vcpu %d", vcpu_id));
	+
	+ vcpu = &vm->vcpu[vcpu_id];
	+
	+ if (create) {
	+ KASSERT(!vcpu_lock_initialized(vcpu), ("vcou %d already "
	+ "initialized", vcpu_id));
	+ vcpu_lock_init(vcpu);
	+ vcpu->hostcpu = NOCPU;
	+ vcpu->vcpuid = vcpu_id;
	+ }
	+}
	+
	+struct vm_exit *
	+vm_exitinfo(struct vm *vm, int cpuid)
	+{
	+ struct vcpu *vcpu;
	+
	+ if (cpuid < 0 \|\| cpuid >= VM_MAXCPU)
	+ panic("vm_exitinfo: invalid cpuid %d", cpuid);
	+
	+ vcpu = &vm->vcpu[cpuid];
	+
	+ return (&vcpu->exitinfo);
	+}
	+
	+static int
	+vmm_init(void)
	+{
	+ ops = &vmm_ops_arm;
	+
	+ return (VMM_INIT(0));
	+}
	+
	+static int
	+vmm_handler(module_t mod, int what, void *arg)
	+{
	+ int error;
	+
	+ switch (what) {
	+ case MOD_LOAD:
	+ vmmdev_init();
	+ error = vmm_init();
	+ if (error == 0)
	+ vmm_initialized = true;
	+ break;
	+ case MOD_UNLOAD:
	+ error = vmmdev_cleanup();
	+ if (error == 0 && vmm_initialized) {
	+ error = VMM_CLEANUP();
	+ if (error)
	+ vmm_initialized = false;
	+ }
	+ break;
	+ default:
	+ error = 0;
	+ break;
	+ }
	+ return (error);
	+}
	+
	+static moduledata_t vmm_kmod = {
	+ "vmm",
	+ vmm_handler,
	+ NULL
	+};
	+
	+/*
	+ * vmm initialization has the following dependencies:
	+ *
	+ * - HYP initialization requires smp_rendezvous() and therefore must happen
	+ * after SMP is fully functional (after SI_SUB_SMP).
	+ */
	+DECLARE_MODULE(vmm, vmm_kmod, SI_SUB_SMP + 1, SI_ORDER_ANY);
	+MODULE_VERSION(vmm, 1);
	+
	+int
	+vm_create(const char name, struct vm *retvm)
	+{
	+ struct vm *vm;
	+ int i;
	+
	+ /*
	+ * If vmm.ko could not be successfully initialized then don't attempt
	+ * to create the virtual machine.
	+ */
	+ if (!vmm_initialized)
	+ return (ENXIO);
	+
	+ if (name == NULL \|\| strlen(name) >= VM_MAX_NAMELEN)
	+ return (EINVAL);
	+
	+ vm = malloc(sizeof(struct vm), M_VMM, M_WAITOK \| M_ZERO);
	+ strcpy(vm->name, name);
	+ vm->maxcpus = VM_MAXCPU;
	+ vm->cookie = VMINIT(vm);
	+
	+ for (i = 0; i < vm->maxcpus; i++)
	+ vcpu_init(vm, i, true);
	+
	+ vm_activate_cpu(vm, BSP);
	+
	+ *retvm = vm;
	+ return (0);
	+}
	+
	+static void
	+vm_cleanup(struct vm *vm, bool destroy)
	+{
	+ VMCLEANUP(vm->cookie);
	+}
	+
	+void
	+vm_destroy(struct vm *vm)
	+{
	+ vm_cleanup(vm, true);
	+ free(vm, M_VMM);
	+}
	+
	+const char *
	+vm_name(struct vm *vm)
	+{
	+ return (vm->name);
	+}
	+
	+#include <sys/queue.h>
	+#include <sys/linker.h>
	+
	+static caddr_t
	+search_by_type(const char *type, caddr_t preload_metadata)
	+{
	+ caddr_t curp, lname;
	+ uint32_t *hdr;
	+ int next;
	+
	+ if (preload_metadata != NULL) {
	+
	+ curp = preload_metadata;
	+ lname = NULL;
	+ for (;;) {
	+ hdr = (uint32_t *)curp;
	+ if (hdr[0] == 0 && hdr[1] == 0)
	+ break;
	+
	+ /* remember the start of each record */
	+ if (hdr[0] == MODINFO_NAME)
	+ lname = curp;
	+
	+ /* Search for a MODINFO_TYPE field */
	+ if ((hdr[0] == MODINFO_TYPE) &&
	+ !strcmp(type, curp + sizeof(uint32_t) * 2))
	+ return(lname);
	+
	+ /* skip to next field */
	+ next = sizeof(uint32_t) * 2 + hdr[1];
	+ next = roundup(next, sizeof(u_long));
	+ curp += next;
	+ }
	+ }
	+ return(NULL);
	+}
	+
	+static int
	+vm_handle_reg_emul(struct vm vm, int vcpuid, bool retu)
	+{
	+ struct hyp *hyp;
	+ struct vm_exit *vme;
	+ struct vre *vre;
	+ reg_read_t rread;
	+ reg_write_t rwrite;
	+ uint32_t iss_reg;
	+ int error;
	+
	+ hyp = (struct hyp *)vm->cookie;
	+ vme = vm_exitinfo(vm, vcpuid);
	+ vre = &vme->u.reg_emul.vre;
	+
	+ iss_reg = vre->inst_syndrome & ISS_MSR_REG_MASK;
	+ switch (iss_reg) {
	+ case ISS_CNTP_CTL_EL0:
	+ rread = vtimer_phys_ctl_read;
	+ rwrite = vtimer_phys_ctl_write;
	+ break;
	+ case ISS_CNTP_CVAL_EL0:
	+ rread = vtimer_phys_cval_read;
	+ rwrite = vtimer_phys_cval_write;
	+ break;
	+ case ISS_CNTP_TVAL_EL0:
	+ rread = vtimer_phys_tval_read;
	+ rwrite = vtimer_phys_tval_write;
	+ break;
	+ default:
	+ goto out_user;
	+ }
	+
	+ error = vmm_emulate_register(vm, vcpuid, vre, rread, rwrite, retu);
	+
	+ return (error);
	+
	+out_user:
	+ *retu = true;
	+ return (0);
	+}
	+
	+static int
	+vm_mmio_region_match(const void key, const void memb)
	+{
	+ const uint64_t *addr = key;
	+ const struct vgic_mmio_region *vmr = memb;
	+
	+ if (*addr < vmr->start)
	+ return (-1);
	+ else if (addr >= vmr->start && addr < vmr->end)
	+ return (0);
	+ else
	+ return (1);
	+}
	+
	+static int
	+vm_handle_inst_emul(struct vm vm, int vcpuid, bool retu)
	+{
	+ struct vm_exit *vme;
	+ struct vie *vie;
	+ struct hyp *hyp = vm->cookie;
	+ uint64_t fault_ipa;
	+ struct vgic_mmio_region *vmr;
	+ int error;
	+
	+ if (!hyp->vgic_attached)
	+ goto out_user;
	+
	+ vme = vm_exitinfo(vm, vcpuid);
	+ vie = &vme->u.inst_emul.vie;
	+
	+ fault_ipa = vme->u.inst_emul.gpa;
	+
	+ vmr = bsearch(&fault_ipa, hyp->vgic_mmio_regions,
	+ hyp->vgic_mmio_regions_num, sizeof(struct vgic_mmio_region),
	+ vm_mmio_region_match);
	+ if (!vmr)
	+ goto out_user;
	+
	+ error = vmm_emulate_instruction(vm, vcpuid, fault_ipa, vie,
	+ vmr->read, vmr->write, retu);
	+
	+ return (error);
	+
	+out_user:
	+ *retu = true;
	+ return (0);
	+}
	+
	+static int
	+vm_handle_poweroff(struct vm *vm, int vcpuid)
	+{
	+ return (0);
	+}
	+
	+static int
	+vm_handle_psci_call(struct vm vm, int vcpuid, bool retu)
	+{
	+ struct vm_exit *vme;
	+ enum vm_suspend_how how;
	+ int error;
	+
	+ vme = vm_exitinfo(vm, vcpuid);
	+
	+ error = psci_handle_call(vm, vcpuid, vme, retu);
	+ if (error)
	+ goto out;
	+
	+ if (vme->exitcode == VM_EXITCODE_SUSPENDED) {
	+ how = vme->u.suspended.how;
	+ switch (how) {
	+ case VM_SUSPEND_POWEROFF:
	+ vm_handle_poweroff(vm, vcpuid);
	+ break;
	+ default:
	+ /* Nothing to do */
	+ ;
	+ }
	+ }
	+
	+out:
	+ return (error);
	+}
	+
	+int
	+vm_run(struct vm vm, struct vm_run vmrun)
	+{
	+ int error, vcpuid;
	+ register_t pc;
	+ struct vm_exit *vme;
	+ bool retu;
	+ void rvc, sc;
	+
	+ vcpuid = vmrun->cpuid;
	+ pc = vmrun->pc;
	+
	+ if (vcpuid < 0 \|\| vcpuid >= VM_MAXCPU)
	+ return (EINVAL);
	+
	+ if (!CPU_ISSET(vcpuid, &vm->active_cpus))
	+ return (EINVAL);
	+
	+ rvc = sc = NULL;
	+restart:
	+ critical_enter();
	+ error = VMRUN(vm->cookie, vcpuid, pc, NULL, rvc, sc);
	+ critical_exit();
	+
	+ vme = vm_exitinfo(vm, vcpuid);
	+ if (error == 0) {
	+ retu = false;
	+ switch (vme->exitcode) {
	+ case VM_EXITCODE_INST_EMUL:
	+ pc = vme->pc + vme->inst_length;
	+ error = vm_handle_inst_emul(vm, vcpuid, &retu);
	+ break;
	+
	+ case VM_EXITCODE_REG_EMUL:
	+ pc = vme->pc + vme->inst_length;
	+ error = vm_handle_reg_emul(vm, vcpuid, &retu);
	+ break;
	+
	+ case VM_EXITCODE_HVC:
	+ /*
	+ * The HVC instruction saves the address for the
	+ * next instruction as the return address.
	+ */
	+ pc = vme->pc;
	+ /*
	+ * The PSCI call can change the exit information in the
	+ * case of suspend/reset/poweroff/cpu off/cpu on.
	+ */
	+ error = psci_handle_call(vm, vcpuid, vme, &retu);
	+ break;
	+
	+ case VM_EXITCODE_WFI:
	+ pc = vme->pc + vme->inst_length;
	+ error = vm_handle_wfi(vm, vcpuid, vme, &retu);
	+ break;
	+
	+ default:
	+ /* Handle in userland */
	+ retu = true;
	+ break;
	+ }
	+ }
	+
	+ if (error == 0 && retu == false)
	+ goto restart;
	+
	+ /* Copy the exit information */
	+ bcopy(vme, &vmrun->vm_exit, sizeof(struct vm_exit));
	+
	+ return (error);
	+}
	+
	+int
	+vm_activate_cpu(struct vm *vm, int vcpuid)
	+{
	+
	+ if (vcpuid < 0 \|\| vcpuid >= VM_MAXCPU)
	+ return (EINVAL);
	+
	+ if (CPU_ISSET(vcpuid, &vm->active_cpus))
	+ return (EBUSY);
	+
	+ CPU_SET_ATOMIC(vcpuid, &vm->active_cpus);
	+ return (0);
	+
	+}
	+
	+cpuset_t
	+vm_active_cpus(struct vm *vm)
	+{
	+
	+ return (vm->active_cpus);
	+}
	+
	+void *
	+vcpu_stats(struct vm *vm, int vcpuid)
	+{
	+
	+ return (vm->vcpu[vcpuid].stats);
	+}
	+
	+static int
	+vcpu_set_state_locked(struct vcpu *vcpu, enum vcpu_state newstate,
	+ bool from_idle)
	+{
	+ int error;
	+
	+ vcpu_assert_locked(vcpu);
	+
	+ /*
	+ * State transitions from the vmmdev_ioctl() must always begin from
	+ * the VCPU_IDLE state. This guarantees that there is only a single
	+ * ioctl() operating on a vcpu at any point.
	+ */
	+ if (from_idle) {
	+ while (vcpu->state != VCPU_IDLE)
	+ msleep_spin(&vcpu->state, &vcpu->mtx, "vmstat", hz);
	+ } else {
	+ KASSERT(vcpu->state != VCPU_IDLE, ("invalid transition from "
	+ "vcpu idle state"));
	+ }
	+
	+ if (vcpu->state == VCPU_RUNNING) {
	+ KASSERT(vcpu->hostcpu == curcpu, ("curcpu %d and hostcpu %d "
	+ "mismatch for running vcpu", curcpu, vcpu->hostcpu));
	+ } else {
	+ KASSERT(vcpu->hostcpu == NOCPU, ("Invalid hostcpu %d for a "
	+ "vcpu that is not running", vcpu->hostcpu));
	+ }
	+
	+ /*
	+ * The following state transitions are allowed:
	+ * IDLE -> FROZEN -> IDLE
	+ * FROZEN -> RUNNING -> FROZEN
	+ * FROZEN -> SLEEPING -> FROZEN
	+ */
	+ switch (vcpu->state) {
	+ case VCPU_IDLE:
	+ case VCPU_RUNNING:
	+ case VCPU_SLEEPING:
	+ error = (newstate != VCPU_FROZEN);
	+ break;
	+ case VCPU_FROZEN:
	+ error = (newstate == VCPU_FROZEN);
	+ break;
	+ default:
	+ error = 1;
	+ break;
	+ }
	+
	+ if (error)
	+ return (EBUSY);
	+
	+ vcpu->state = newstate;
	+ if (newstate == VCPU_RUNNING)
	+ vcpu->hostcpu = curcpu;
	+ else
	+ vcpu->hostcpu = NOCPU;
	+
	+ if (newstate == VCPU_IDLE)
	+ wakeup(&vcpu->state);
	+
	+ return (0);
	+}
	+
	+int
	+vcpu_set_state(struct vm *vm, int vcpuid, enum vcpu_state newstate,
	+ bool from_idle)
	+{
	+ int error;
	+ struct vcpu *vcpu;
	+
	+ if (vcpuid < 0 \|\| vcpuid >= VM_MAXCPU)
	+ panic("vm_set_run_state: invalid vcpuid %d", vcpuid);
	+
	+ vcpu = &vm->vcpu[vcpuid];
	+
	+ vcpu_lock(vcpu);
	+ error = vcpu_set_state_locked(vcpu, newstate, from_idle);
	+ vcpu_unlock(vcpu);
	+
	+ return (error);
	+}
	+
	+enum vcpu_state
	+vcpu_get_state(struct vm vm, int vcpuid, int hostcpu)
	+{
	+ struct vcpu *vcpu;
	+ enum vcpu_state state;
	+
	+ if (vcpuid < 0 \|\| vcpuid >= VM_MAXCPU)
	+ panic("vm_get_run_state: invalid vcpuid %d", vcpuid);
	+
	+ vcpu = &vm->vcpu[vcpuid];
	+
	+ vcpu_lock(vcpu);
	+ state = vcpu->state;
	+ if (hostcpu != NULL)
	+ *hostcpu = vcpu->hostcpu;
	+ vcpu_unlock(vcpu);
	+
	+ return (state);
	+}
	+
	+uint64_t
	+vm_gpa2hpa(struct vm *vm, uint64_t gpa, size_t len)
	+{
	+ uint64_t nextpage;
	+
	+ nextpage = trunc_page(gpa + PAGE_SIZE);
	+ if (len > nextpage - gpa)
	+ panic("vm_gpa2hpa: invalid gpa/len: 0x%016lx/%zu", gpa, len);
	+
	+ return (VMMMAP_GET(vm->cookie, gpa));
	+}
	+
	+int
	+vm_gpabase2memseg(struct vm *vm, uint64_t gpabase,
	+ struct vm_memory_segment *seg)
	+{
	+ int i;
	+
	+ for (i = 0; i < vm->num_mem_segs; i++) {
	+ if (gpabase == vm->mem_segs[i].gpa) {
	+ *seg = vm->mem_segs[i];
	+ return (0);
	+ }
	+ }
	+ return (-1);
	+}
	+
	+int
	+vm_get_register(struct vm vm, int vcpu, int reg, uint64_t retval)
	+{
	+
	+ if (vcpu < 0 \|\| vcpu >= VM_MAXCPU)
	+ return (EINVAL);
	+
	+ if (reg >= VM_REG_LAST)
	+ return (EINVAL);
	+
	+ return (VMGETREG(vm->cookie, vcpu, reg, retval));
	+}
	+
	+int
	+vm_set_register(struct vm *vm, int vcpuid, int reg, uint64_t val)
	+{
	+ struct vcpu *vcpu;
	+ int error;
	+
	+ if (vcpuid < 0 \|\| vcpuid >= VM_MAXCPU)
	+ return (EINVAL);
	+
	+ if (reg >= VM_REG_LAST)
	+ return (EINVAL);
	+ error = VMSETREG(vm->cookie, vcpuid, reg, val);
	+ if (error)
	+ return (error);
	+
	+ vcpu = &vm->vcpu[vcpuid];
	+ vcpu->nextpc = val;
	+
	+ return(0);
	+}
	+
	+void *
	+vm_get_cookie(struct vm *vm)
	+{
	+ return vm->cookie;
	+}
	+
	+uint16_t
	+vm_get_maxcpus(struct vm *vm)
	+{
	+ return (vm->maxcpus);
	+}
	+
	+static void
	+vm_free_mem_seg(struct vm vm, struct vm_memory_segment seg)
	+{
	+ size_t len;
	+ uint64_t hpa;
	+
	+ len = 0;
	+ while (len < seg->len) {
	+ hpa = vm_gpa2hpa(vm, seg->gpa + len, PAGE_SIZE);
	+ if (hpa == (uint64_t)-1) {
	+ panic("vm_free_mem_segs: cannot free hpa "
	+ "associated with gpa 0x%016lx", seg->gpa + len);
	+ }
	+
	+ vmm_mem_free(hpa, PAGE_SIZE);
	+
	+ len += PAGE_SIZE;
	+ }
	+
	+ bzero(seg, sizeof(struct vm_memory_segment));
	+}
	+
	+/*
	+ * Return true if 'gpa' is available for allocation, false otherwise
	+ */
	+static bool
	+vm_ipa_available(struct vm *vm, uint64_t ipa)
	+{
	+ uint64_t ipabase, ipalimit;
	+ int i;
	+
	+ if (!page_aligned(ipa))
	+ panic("vm_ipa_available: ipa (0x%016lx) not page aligned", ipa);
	+
	+ for (i = 0; i < vm->num_mem_segs; i++) {
	+ ipabase = vm->mem_segs[i].gpa;
	+ ipalimit = ipabase + vm->mem_segs[i].len;
	+ if (ipa >= ipabase && ipa < ipalimit)
	+ return (false);
	+ }
	+
	+ return (true);
	+}
	+
	+/*
	+ * Allocate 'len' bytes for the virtual machine starting at address 'ipa'
	+ */
	+int
	+vm_malloc(struct vm *vm, uint64_t ipa, size_t len)
	+{
	+ struct vm_memory_segment *seg;
	+ int error, available, allocated;
	+ uint64_t ipa2;
	+ vm_paddr_t pa;
	+
	+ if (!page_aligned(ipa) != 0 \|\| !page_aligned(len) \|\| len == 0)
	+ return (EINVAL);
	+
	+ available = allocated = 0;
	+ ipa2 = ipa;
	+ while (ipa2 < ipa + len) {
	+ if (vm_ipa_available(vm, ipa2))
	+ available++;
	+ else
	+ allocated++;
	+ ipa2 += PAGE_SIZE;
	+ }
	+
	+ /*
	+ * If there are some allocated and some available pages in the address
	+ * range then it is an error.
	+ */
	+ if (allocated != 0 && available != 0)
	+ return (EINVAL);
	+
	+ /*
	+ * If the entire address range being requested has already been
	+ * allocated then there isn't anything more to do.
	+ */
	+ if (allocated != 0 && available == 0)
	+ return (0);
	+
	+ if (vm->num_mem_segs == VM_MAX_MEMORY_SEGMENTS)
	+ return (E2BIG);
	+
	+ seg = &vm->mem_segs[vm->num_mem_segs];
	+ error = 0;
	+ seg->gpa = ipa;
	+ seg->len = 0;
	+ while (seg->len < len) {
	+ pa = vmm_mem_alloc(PAGE_SIZE);
	+ if (pa == 0) {
	+ error = ENOMEM;
	+ break;
	+ }
	+ VMMMAP_SET(vm->cookie, ipa, pa, PAGE_SIZE, VM_PROT_ALL);
	+
	+ seg->len += PAGE_SIZE;
	+ ipa += PAGE_SIZE;
	+ }
	+ vm->num_mem_segs++;
	+
	+ return (0);
	+}
	+
	+int
	+vm_attach_vgic(struct vm *vm, uint64_t dist_start, size_t dist_size,
	+ uint64_t redist_start, size_t redist_size)
	+{
	+ int error;
	+
	+ error = vgic_v3_attach_to_vm(vm->cookie, dist_start, dist_size,
	+ redist_start, redist_size);
	+
	+ return (error);
	+}
	+
	+int
	+vm_assert_irq(struct vm *vm, uint32_t irq)
	+{
	+ struct hyp hyp = (struct hyp )vm->cookie;
	+ int error;
	+
	+ /* TODO: this is crap, send the vcpuid as an argument to vm_assert_irq */
	+ error = vgic_v3_inject_irq(&hyp->ctx[0], irq, VGIC_IRQ_VIRTIO);
	+
	+ return (error);
	+}
	+
	+int
	+vm_deassert_irq(struct vm *vm, uint32_t irq)
	+{
	+ int error;
	+
	+ error = vgic_v3_remove_irq(vm->cookie, irq, false);
	+
	+ return (error);
	+}
	+
	+static int
	+vm_handle_wfi(struct vm vm, int vcpuid, struct vm_exit vme, bool *retu)
	+{
	+ struct vcpu *vcpu;
	+ struct hypctx *hypctx;
	+ bool intr_disabled;
	+
	+ vcpu = &vm->vcpu[vcpuid];
	+ hypctx = vme->u.wfi.hypctx;
	+ intr_disabled = !(hypctx->regs.spsr & PSR_I);
	+
	+ vcpu_lock(vcpu);
	+ while (1) {
	+ if (!intr_disabled && vgic_v3_vcpu_pending_irq(hypctx))
	+ break;
	+
	+ if (vcpu_should_yield(vm, vcpuid))
	+ break;
	+
	+ vcpu_set_state_locked(vcpu, VCPU_SLEEPING, false);
	+ msleep_spin(vcpu, &vcpu->mtx, "vmidle", hz);
	+ vcpu_set_state_locked(vcpu, VCPU_FROZEN, false);
	+ }
	+ vcpu_unlock(vcpu);
	+
	+ *retu = false;
	+ return (0);
	+}
	Index: sys/arm64/vmm/vmm_dev.c
	===================================================================
	--- /dev/null
	+++ sys/arm64/vmm/vmm_dev.c
	@@ -0,0 +1,404 @@
	+/*
	+ * Copyright (C) 2015 Mihai Carabas <mihai.carabas@gmail.com>
	+ * All rights reserved.
	+ *
	+ * Redistribution and use in source and binary forms, with or without
	+ * modification, are permitted provided that the following conditions
	+ * are met:
	+ * 1. Redistributions of source code must retain the above copyright
	+ * notice, this list of conditions and the following disclaimer.
	+ * 2. Redistributions in binary form must reproduce the above copyright
	+ * notice, this list of conditions and the following disclaimer in the
	+ * documentation and/or other materials provided with the distribution.
	+ *
	+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
	+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
	+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	+ * SUCH DAMAGE.
	+ */
	+
	+#include <sys/cdefs.h>
	+#include <sys/param.h>
	+#include <sys/kernel.h>
	+#include <sys/queue.h>
	+#include <sys/lock.h>
	+#include <sys/mutex.h>
	+#include <sys/malloc.h>
	+#include <sys/conf.h>
	+#include <sys/sysctl.h>
	+#include <sys/libkern.h>
	+#include <sys/ioccom.h>
	+#include <sys/mman.h>
	+#include <sys/uio.h>
	+
	+#include <vm/vm.h>
	+#include <vm/pmap.h>
	+#include <vm/vm_map.h>
	+
	+#include <machine/vmparam.h>
	+#include <machine/vmm.h>
	+#include <machine/vmm_dev.h>
	+
	+struct vmmdev_softc {
	+ struct vm vm; / vm instance cookie */
	+ struct cdev *cdev;
	+ SLIST_ENTRY(vmmdev_softc) link;
	+ int flags;
	+};
	+#define VSC_LINKED 0x01
	+
	+static SLIST_HEAD(, vmmdev_softc) head;
	+
	+static struct mtx vmmdev_mtx;
	+
	+static MALLOC_DEFINE(M_VMMDEV, "vmmdev", "vmmdev");
	+
	+SYSCTL_DECL(_hw_vmm);
	+
	+static struct vmmdev_softc *
	+vmmdev_lookup(const char *name)
	+{
	+ struct vmmdev_softc *sc;
	+
	+#ifdef notyet /* XXX kernel is not compiled with invariants */
	+ mtx_assert(&vmmdev_mtx, MA_OWNED);
	+#endif
	+
	+ SLIST_FOREACH(sc, &head, link) {
	+ if (strcmp(name, vm_name(sc->vm)) == 0)
	+ break;
	+ }
	+
	+ return (sc);
	+}
	+
	+static struct vmmdev_softc *
	+vmmdev_lookup2(struct cdev *cdev)
	+{
	+
	+ return (cdev->si_drv1);
	+}
	+
	+static int
	+vmmdev_rw(struct cdev cdev, struct uio uio, int flags)
	+{
	+ int error = 0;
	+
	+ return (error);
	+}
	+
	+static int
	+vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag,
	+ struct thread *td)
	+{
	+ int error, vcpu, state_changed;
	+ struct vmmdev_softc *sc;
	+ struct vm_run *vmrun;
	+ struct vm_memory_segment *seg;
	+ struct vm_register *vmreg;
	+ struct vm_activate_cpu *vac;
	+ struct vm_attach_vgic *vav;
	+ struct vm_irq *vi;
	+
	+ sc = vmmdev_lookup2(cdev);
	+ if (sc == NULL)
	+ return (ENXIO);
	+
	+ error = 0;
	+ vcpu = -1;
	+ state_changed = 0;
	+
	+ /*
	+ * Some VMM ioctls can operate only on vcpus that are not running.
	+ */
	+ switch (cmd) {
	+ case VM_RUN:
	+ case VM_GET_REGISTER:
	+ case VM_SET_REGISTER:
	+ /*
	+ * XXX fragile, handle with care
	+ * Assumes that the first field of the ioctl data is the vcpu.
	+ */
	+ vcpu = (int )data;
	+ if (vcpu < 0 \|\| vcpu >= VM_MAXCPU) {
	+ error = EINVAL;
	+ goto done;
	+ }
	+
	+ error = vcpu_set_state(sc->vm, vcpu, VCPU_FROZEN, true);
	+ if (error)
	+ goto done;
	+
	+ state_changed = 1;
	+ break;
	+
	+ case VM_MAP_MEMORY:
	+ case VM_ATTACH_VGIC:
	+ /*
	+ * ioctls that operate on the entire virtual machine must
	+ * prevent all vcpus from running.
	+ */
	+ error = 0;
	+ for (vcpu = 0; vcpu < VM_MAXCPU; vcpu++) {
	+ error = vcpu_set_state(sc->vm, vcpu, VCPU_FROZEN, true);
	+ if (error)
	+ break;
	+ }
	+
	+ if (error) {
	+ vcpu--;
	+ while (vcpu >= 0) {
	+ vcpu_set_state(sc->vm, vcpu, VCPU_IDLE, false);
	+ vcpu--;
	+ }
	+ goto done;
	+ }
	+
	+ state_changed = 2;
	+ break;
	+ case VM_ASSERT_IRQ:
	+ vi =(struct vm_irq *)data;
	+ error = vm_assert_irq(sc->vm, vi->irq);
	+ break;
	+ case VM_DEASSERT_IRQ:
	+ vi = (struct vm_irq *)data;
	+ error = vm_deassert_irq(sc->vm, vi->irq);
	+ break;
	+ default:
	+ break;
	+ }
	+
	+ switch(cmd) {
	+ case VM_RUN:
	+ vmrun = (struct vm_run *)data;
	+ error = vm_run(sc->vm, vmrun);
	+ break;
	+ case VM_MAP_MEMORY:
	+ seg = (struct vm_memory_segment *)data;
	+ error = vm_malloc(sc->vm, seg->gpa, seg->len);
	+ break;
	+ case VM_GET_MEMORY_SEG:
	+ seg = (struct vm_memory_segment *)data;
	+ seg->len = 0;
	+ (void)vm_gpabase2memseg(sc->vm, seg->gpa, seg);
	+ error = 0;
	+ break;
	+ case VM_GET_REGISTER:
	+ vmreg = (struct vm_register *)data;
	+ error = vm_get_register(sc->vm, vmreg->cpuid, vmreg->regnum,
	+ &vmreg->regval);
	+ break;
	+ case VM_SET_REGISTER:
	+ vmreg = (struct vm_register *)data;
	+ error = vm_set_register(sc->vm, vmreg->cpuid, vmreg->regnum,
	+ vmreg->regval);
	+ break;
	+ case VM_ACTIVATE_CPU:
	+ vac = (struct vm_activate_cpu *)data;
	+ error = vm_activate_cpu(sc->vm, vac->vcpuid);
	+ break;
	+ case VM_ATTACH_VGIC:
	+ vav = (struct vm_attach_vgic *)data;
	+ error = vm_attach_vgic(sc->vm, vav->dist_start, vav->dist_size,
	+ vav->redist_start, vav->redist_size);
	+ break;
	+ default:
	+ error = ENOTTY;
	+ break;
	+ }
	+
	+ if (state_changed == 1) {
	+ vcpu_set_state(sc->vm, vcpu, VCPU_IDLE, false);
	+ } else if (state_changed == 2) {
	+ for (vcpu = 0; vcpu < VM_MAXCPU; vcpu++)
	+ vcpu_set_state(sc->vm, vcpu, VCPU_IDLE, false);
	+ }
	+
	+done:
	+ /* Make sure that no handler returns a bogus value like ERESTART */
	+ KASSERT(error >= 0, ("vmmdev_ioctl: invalid error return %d", error));
	+ return (error);
	+}
	+
	+static int
	+vmmdev_mmap(struct cdev cdev, vm_ooffset_t offset, vm_paddr_t paddr,
	+ int nprot, vm_memattr_t *memattr)
	+{
	+ int error;
	+ struct vmmdev_softc *sc;
	+
	+ error = -1;
	+ mtx_lock(&vmmdev_mtx);
	+
	+ sc = vmmdev_lookup2(cdev);
	+ if (sc != NULL && !(nprot & PROT_EXEC)) {
	+ *paddr = (vm_paddr_t)vm_gpa2hpa(sc->vm, (vm_paddr_t)offset, PAGE_SIZE);
	+ if (*paddr != (vm_paddr_t)-1)
	+ error = 0;
	+ }
	+
	+ mtx_unlock(&vmmdev_mtx);
	+
	+ return (error);
	+}
	+
	+static void
	+vmmdev_destroy(void *arg)
	+{
	+
	+ struct vmmdev_softc *sc = arg;
	+
	+ if (sc->cdev != NULL)
	+ destroy_dev(sc->cdev);
	+
	+ if (sc->vm != NULL)
	+ vm_destroy(sc->vm);
	+
	+ if ((sc->flags & VSC_LINKED) != 0) {
	+ mtx_lock(&vmmdev_mtx);
	+ SLIST_REMOVE(&head, sc, vmmdev_softc, link);
	+ mtx_unlock(&vmmdev_mtx);
	+ }
	+
	+ free(sc, M_VMMDEV);
	+}
	+
	+static int
	+sysctl_vmm_destroy(SYSCTL_HANDLER_ARGS)
	+{
	+ int error;
	+ char buf[VM_MAX_NAMELEN];
	+ struct vmmdev_softc *sc;
	+ struct cdev *cdev;
	+
	+ strlcpy(buf, "beavis", sizeof(buf));
	+ error = sysctl_handle_string(oidp, buf, sizeof(buf), req);
	+ if (error != 0 \|\| req->newptr == NULL)
	+ return (error);
	+
	+ mtx_lock(&vmmdev_mtx);
	+ sc = vmmdev_lookup(buf);
	+ if (sc == NULL \|\| sc->cdev == NULL) {
	+ mtx_unlock(&vmmdev_mtx);
	+ return (EINVAL);
	+ }
	+
	+ /*
	+ * The 'cdev' will be destroyed asynchronously when 'si_threadcount'
	+ * goes down to 0 so we should not do it again in the callback.
	+ */
	+ cdev = sc->cdev;
	+ sc->cdev = NULL;
	+ mtx_unlock(&vmmdev_mtx);
	+
	+ /*
	+ * Schedule the 'cdev' to be destroyed:
	+ *
	+ * - any new operations on this 'cdev' will return an error (ENXIO).
	+ *
	+ * - when the 'si_threadcount' dwindles down to zero the 'cdev' will
	+ * be destroyed and the callback will be invoked in a taskqueue
	+ * context.
	+ */
	+ destroy_dev_sched_cb(cdev, vmmdev_destroy, sc);
	+
	+ return (0);
	+}
	+SYSCTL_PROC(_hw_vmm, OID_AUTO, destroy, CTLTYPE_STRING \| CTLFLAG_RW,
	+ NULL, 0, sysctl_vmm_destroy, "A", NULL);
	+
	+static struct cdevsw vmmdevsw = {
	+ .d_name = "vmmdev",
	+ .d_version = D_VERSION,
	+ .d_ioctl = vmmdev_ioctl,
	+ .d_mmap = vmmdev_mmap,
	+ .d_read = vmmdev_rw,
	+ .d_write = vmmdev_rw,
	+};
	+
	+static int
	+sysctl_vmm_create(SYSCTL_HANDLER_ARGS)
	+{
	+ int error;
	+ struct vm *vm;
	+ struct cdev *cdev;
	+ struct vmmdev_softc sc, sc2;
	+ char buf[VM_MAX_NAMELEN];
	+
	+ strlcpy(buf, "beavis", sizeof(buf));
	+ error = sysctl_handle_string(oidp, buf, sizeof(buf), req);
	+ if (error != 0 \|\| req->newptr == NULL)
	+ return (error);
	+
	+ mtx_lock(&vmmdev_mtx);
	+ sc = vmmdev_lookup(buf);
	+ mtx_unlock(&vmmdev_mtx);
	+ if (sc != NULL)
	+ return (EEXIST);
	+
	+ error = vm_create(buf, &vm);
	+ if (error != 0)
	+ return (error);
	+
	+ sc = malloc(sizeof(struct vmmdev_softc), M_VMMDEV, M_WAITOK \| M_ZERO);
	+ sc->vm = vm;
	+
	+ /*
	+ * Lookup the name again just in case somebody sneaked in when we
	+ * dropped the lock.
	+ */
	+ mtx_lock(&vmmdev_mtx);
	+ sc2 = vmmdev_lookup(buf);
	+ if (sc2 == NULL) {
	+ SLIST_INSERT_HEAD(&head, sc, link);
	+ sc->flags \|= VSC_LINKED;
	+ }
	+ mtx_unlock(&vmmdev_mtx);
	+
	+ if (sc2 != NULL) {
	+ vmmdev_destroy(sc);
	+ return (EEXIST);
	+ }
	+
	+ error = make_dev_p(MAKEDEV_CHECKNAME, &cdev, &vmmdevsw, NULL,
	+ UID_ROOT, GID_WHEEL, 0600, "vmm/%s", buf);
	+ if (error != 0) {
	+ vmmdev_destroy(sc);
	+ return (error);
	+ }
	+
	+ mtx_lock(&vmmdev_mtx);
	+ sc->cdev = cdev;
	+ sc->cdev->si_drv1 = sc;
	+ mtx_unlock(&vmmdev_mtx);
	+
	+ return (0);
	+}
	+SYSCTL_PROC(_hw_vmm, OID_AUTO, create, CTLTYPE_STRING \| CTLFLAG_RW,
	+ NULL, 0, sysctl_vmm_create, "A", NULL);
	+
	+void
	+vmmdev_init(void)
	+{
	+ mtx_init(&vmmdev_mtx, "vmm device mutex", NULL, MTX_DEF);
	+}
	+
	+int
	+vmmdev_cleanup(void)
	+{
	+ int error;
	+
	+ if (SLIST_EMPTY(&head))
	+ error = 0;
	+ else
	+ error = EBUSY;
	+
	+ return (error);
	+}
	Index: sys/arm64/vmm/vmm_instruction_emul.c
	===================================================================
	--- /dev/null
	+++ sys/arm64/vmm/vmm_instruction_emul.c
	@@ -0,0 +1,96 @@
	+/*
	+ * Copyright (C) 2015 Mihai Carabas <mihai.carabas@gmail.com>
	+ * All rights reserved.
	+ *
	+ * Redistribution and use in source and binary forms, with or without
	+ * modification, are permitted provided that the following conditions
	+ * are met:
	+ * 1. Redistributions of source code must retain the above copyright
	+ * notice, this list of conditions and the following disclaimer.
	+ * 2. Redistributions in binary form must reproduce the above copyright
	+ * notice, this list of conditions and the following disclaimer in the
	+ * documentation and/or other materials provided with the distribution.
	+ *
	+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
	+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
	+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	+ * SUCH DAMAGE.
	+ */
	+
	+#ifdef _KERNEL
	+#include <sys/param.h>
	+#include <sys/pcpu.h>
	+#include <sys/systm.h>
	+#include <sys/proc.h>
	+
	+#include <vm/vm.h>
	+
	+#include <machine/vmm.h>
	+
	+#else
	+#include <sys/types.h>
	+#include <sys/errno.h>
	+#include <sys/_iovec.h>
	+#include <stdio.h>
	+#include <stdlib.h>
	+
	+#include <machine/vmm.h>
	+
	+#include <assert.h>
	+#include <vmmapi.h>
	+#endif
	+
	+#include <machine/vmm_instruction_emul.h>
	+
	+int
	+vmm_emulate_instruction(void vm, int vcpuid, uint64_t gpa, struct vie vie,
	+ mem_region_read_t memread, mem_region_write_t memwrite, void *memarg)
	+{
	+ uint64_t val;
	+ int error;
	+
	+ if (vie->dir == VM_DIR_READ) {
	+ error = memread(vm, vcpuid, gpa, &val, vie->access_size, memarg);
	+ if (error)
	+ goto out;
	+ error = vm_set_register(vm, vcpuid, vie->reg, val);
	+ } else {
	+ error = vm_get_register(vm, vcpuid, vie->reg, &val);
	+ if (error)
	+ goto out;
	+ error = memwrite(vm, vcpuid, gpa, val, vie->access_size, memarg);
	+ }
	+
	+out:
	+ return (error);
	+}
	+
	+int
	+vmm_emulate_register(void vm, int vcpuid, struct vre vre, reg_read_t regread,
	+ reg_write_t regwrite, void *regarg)
	+{
	+ uint64_t val;
	+ int error;
	+
	+ if (vre->dir == VM_DIR_READ) {
	+ error = regread(vm, vcpuid, &val, regarg);
	+ if (error)
	+ goto out;
	+ error = vm_set_register(vm, vcpuid, vre->reg, val);
	+ } else {
	+ error = vm_get_register(vm, vcpuid, vre->reg, &val);
	+ if (error)
	+ goto out;
	+ error = regwrite(vm, vcpuid, val, regarg);
	+ }
	+
	+out:
	+ return (error);
	+}
	Index: sys/arm64/vmm/vmm_mem.h
	===================================================================
	--- /dev/null
	+++ sys/arm64/vmm/vmm_mem.h
	@@ -0,0 +1,35 @@
	+/*
	+ * Copyright (C) 2015 Mihai Carabas <mihai.carabas@gmail.com>
	+ * All rights reserved.
	+ *
	+ * Redistribution and use in source and binary forms, with or without
	+ * modification, are permitted provided that the following conditions
	+ * are met:
	+ * 1. Redistributions of source code must retain the above copyright
	+ * notice, this list of conditions and the following disclaimer.
	+ * 2. Redistributions in binary form must reproduce the above copyright
	+ * notice, this list of conditions and the following disclaimer in the
	+ * documentation and/or other materials provided with the distribution.
	+ *
	+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
	+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
	+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	+ * SUCH DAMAGE.
	+ */
	+
	+#ifndef _VMM_MEM_H_
	+#define _VMM_MEM_H_
	+
	+int vmm_mem_init(void);
	+vm_paddr_t vmm_mem_alloc(size_t size);
	+void vmm_mem_free(vm_paddr_t start, size_t size);
	+vm_paddr_t vmm_mem_maxaddr(void);
	+
	+#endif
	Index: sys/arm64/vmm/vmm_mem.c
	===================================================================
	--- /dev/null
	+++ sys/arm64/vmm/vmm_mem.c
	@@ -0,0 +1,130 @@
	+/*
	+ * Copyright (C) 2015 Mihai Carabas <mihai.carabas@gmail.com>
	+ * All rights reserved.
	+ *
	+ * Redistribution and use in source and binary forms, with or without
	+ * modification, are permitted provided that the following conditions
	+ * are met:
	+ * 1. Redistributions of source code must retain the above copyright
	+ * notice, this list of conditions and the following disclaimer.
	+ * 2. Redistributions in binary form must reproduce the above copyright
	+ * notice, this list of conditions and the following disclaimer in the
	+ * documentation and/or other materials provided with the distribution.
	+ *
	+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
	+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
	+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	+ * SUCH DAMAGE.
	+ */
	+
	+#include <sys/cdefs.h>
	+#include <sys/param.h>
	+#include <sys/lock.h>
	+#include <sys/mutex.h>
	+#include <sys/linker.h>
	+#include <sys/systm.h>
	+#include <sys/malloc.h>
	+#include <sys/kernel.h>
	+#include <sys/sysctl.h>
	+#include <sys/vmmeter.h>
	+
	+#include <vm/vm.h>
	+#include <vm/pmap.h>
	+#include <vm/vm_page.h>
	+#include <vm/vm_pageout.h>
	+
	+#include <machine/md_var.h>
	+#include <machine/metadata.h>
	+#include <machine/vmparam.h>
	+#include <machine/pmap.h>
	+
	+#include "vmm_mem.h"
	+
	+SYSCTL_DECL(_hw_vmm);
	+
	+static u_long pages_allocated;
	+SYSCTL_ULONG(_hw_vmm, OID_AUTO, pages_allocated, CTLFLAG_RD,
	+ &pages_allocated, 0, "4KB pages allocated");
	+
	+static void
	+update_pages_allocated(int howmany)
	+{
	+ pages_allocated += howmany; /* XXX locking? */
	+}
	+
	+int
	+vmm_mem_init(void)
	+{
	+
	+ return (0);
	+}
	+
	+vm_paddr_t
	+vmm_mem_alloc(size_t size)
	+{
	+
	+ int flags;
	+ vm_page_t m;
	+ vm_paddr_t pa;
	+
	+ if (size != PAGE_SIZE)
	+ panic("vmm_mem_alloc: invalid allocation size %zu", size);
	+
	+ flags = VM_ALLOC_NORMAL \| VM_ALLOC_NOOBJ \| VM_ALLOC_WIRED \|
	+ VM_ALLOC_ZERO;
	+
	+ while (1) {
	+ /*
	+ * XXX need policy to determine when to back off the allocation
	+ */
	+ m = vm_page_alloc(NULL, 0, flags);
	+ if (m == NULL)
	+ vm_wait(NULL);
	+ else
	+ break;
	+ }
	+
	+ pa = VM_PAGE_TO_PHYS(m);
	+
	+ if ((m->flags & PG_ZERO) == 0)
	+ pmap_zero_page(m);
	+
	+ m->valid = VM_PAGE_BITS_ALL;
	+ update_pages_allocated(1);
	+
	+ return (pa);
	+}
	+
	+void
	+vmm_mem_free(vm_paddr_t base, size_t length)
	+{
	+ vm_page_t m;
	+
	+ if (base & PAGE_MASK) {
	+ panic("vmm_mem_free: base 0x%0lx must be aligned on a "
	+ "0x%0x boundary\n", base, PAGE_SIZE);
	+ }
	+
	+ if (length != PAGE_SIZE)
	+ panic("vmm_mem_free: invalid length %zu", length);
	+
	+ m = PHYS_TO_VM_PAGE(base);
	+ vm_page_unwire_noq(m);
	+ vm_page_free(m);
	+
	+ update_pages_allocated(-1);
	+}
	+
	+vm_paddr_t
	+vmm_mem_maxaddr(void)
	+{
	+
	+ return (ptoa(Maxmem));
	+}
	Index: sys/arm64/vmm/vmm_stat.h
	===================================================================
	--- /dev/null
	+++ sys/arm64/vmm/vmm_stat.h
	@@ -0,0 +1,155 @@
	+/*
	+ * Copyright (C) 2015 Mihai Carabas <mihai.carabas@gmail.com>
	+ * All rights reserved.
	+ *
	+ * Redistribution and use in source and binary forms, with or without
	+ * modification, are permitted provided that the following conditions
	+ * are met:
	+ * 1. Redistributions of source code must retain the above copyright
	+ * notice, this list of conditions and the following disclaimer.
	+ * 2. Redistributions in binary form must reproduce the above copyright
	+ * notice, this list of conditions and the following disclaimer in the
	+ * documentation and/or other materials provided with the distribution.
	+ *
	+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
	+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
	+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	+ * SUCH DAMAGE.
	+ */
	+
	+#ifndef _VMM_STAT_H_
	+#define _VMM_STAT_H_
	+
	+struct vm;
	+
	+#define MAX_VMM_STAT_ELEMS 64 /* arbitrary */
	+
	+enum vmm_stat_scope {
	+ VMM_STAT_SCOPE_ANY,
	+ VMM_STAT_SCOPE_INTEL, /* Intel VMX specific statistic */
	+ VMM_STAT_SCOPE_AMD, /* AMD SVM specific statistic */
	+};
	+
	+struct vmm_stat_type;
	+typedef void (vmm_stat_func_t)(struct vm vm, int vcpu,
	+ struct vmm_stat_type *stat);
	+
	+struct vmm_stat_type {
	+ int index; /* position in the stats buffer */
	+ int nelems; /* standalone or array */
	+ const char desc; / description of statistic */
	+ vmm_stat_func_t func;
	+ enum vmm_stat_scope scope;
	+};
	+
	+void vmm_stat_register(void *arg);
	+
	+#define VMM_STAT_FDEFINE(type, nelems, desc, func, scope) \
	+ struct vmm_stat_type type[1] = { \
	+ { -1, nelems, desc, func, scope } \
	+ }; \
	+ SYSINIT(type##_stat, SI_SUB_KLD, SI_ORDER_ANY, vmm_stat_register, type)
	+
	+#define VMM_STAT_DEFINE(type, nelems, desc, scope) \
	+ VMM_STAT_FDEFINE(type, nelems, desc, NULL, scope)
	+
	+#define VMM_STAT_DECLARE(type) \
	+ extern struct vmm_stat_type type[1]
	+
	+#define VMM_STAT(type, desc) \
	+ VMM_STAT_DEFINE(type, 1, desc, VMM_STAT_SCOPE_ANY)
	+#define VMM_STAT_INTEL(type, desc) \
	+ VMM_STAT_DEFINE(type, 1, desc, VMM_STAT_SCOPE_INTEL)
	+#define VMM_STAT_AMD(type, desc) \
	+ VMM_STAT_DEFINE(type, 1, desc, VMM_STAT_SCOPE_AMD)
	+
	+#define VMM_STAT_FUNC(type, desc, func) \
	+ VMM_STAT_FDEFINE(type, 1, desc, func, VMM_STAT_SCOPE_ANY)
	+
	+#define VMM_STAT_ARRAY(type, nelems, desc) \
	+ VMM_STAT_DEFINE(type, nelems, desc, VMM_STAT_SCOPE_ANY)
	+
	+void *vmm_stat_alloc(void);
	+void vmm_stat_init(void *vp);
	+void vmm_stat_free(void *vp);
	+
	+/*
	+ * 'buf' should be at least fit 'MAX_VMM_STAT_TYPES' entries
	+ */
	+int vmm_stat_copy(struct vm vm, int vcpu, int num_stats, uint64_t *buf);
	+int vmm_stat_desc_copy(int index, char *buf, int buflen);
	+
	+static void __inline
	+vmm_stat_array_incr(struct vm vm, int vcpu, struct vmm_stat_type vst,
	+ int statidx, uint64_t x)
	+{
	+#ifdef VMM_KEEP_STATS
	+ uint64_t *stats;
	+
	+ stats = vcpu_stats(vm, vcpu);
	+
	+ if (vst->index >= 0 && statidx < vst->nelems)
	+ stats[vst->index + statidx] += x;
	+#endif
	+}
	+
	+static void __inline
	+vmm_stat_array_set(struct vm vm, int vcpu, struct vmm_stat_type vst,
	+ int statidx, uint64_t val)
	+{
	+#ifdef VMM_KEEP_STATS
	+ uint64_t *stats;
	+
	+ stats = vcpu_stats(vm, vcpu);
	+
	+ if (vst->index >= 0 && statidx < vst->nelems)
	+ stats[vst->index + statidx] = val;
	+#endif
	+}
	+
	+static void __inline
	+vmm_stat_incr(struct vm vm, int vcpu, struct vmm_stat_type vst, uint64_t x)
	+{
	+
	+#ifdef VMM_KEEP_STATS
	+ vmm_stat_array_incr(vm, vcpu, vst, 0, x);
	+#endif
	+}
	+
	+static void __inline
	+vmm_stat_set(struct vm vm, int vcpu, struct vmm_stat_type vst, uint64_t val)
	+{
	+
	+#ifdef VMM_KEEP_STATS
	+ vmm_stat_array_set(vm, vcpu, vst, 0, val);
	+#endif
	+}
	+
	+VMM_STAT_DECLARE(VCPU_MIGRATIONS);
	+VMM_STAT_DECLARE(VMEXIT_COUNT);
	+VMM_STAT_DECLARE(VMEXIT_EXTINT);
	+VMM_STAT_DECLARE(VMEXIT_HLT);
	+VMM_STAT_DECLARE(VMEXIT_CR_ACCESS);
	+VMM_STAT_DECLARE(VMEXIT_RDMSR);
	+VMM_STAT_DECLARE(VMEXIT_WRMSR);
	+VMM_STAT_DECLARE(VMEXIT_MTRAP);
	+VMM_STAT_DECLARE(VMEXIT_PAUSE);
	+VMM_STAT_DECLARE(VMEXIT_INTR_WINDOW);
	+VMM_STAT_DECLARE(VMEXIT_NMI_WINDOW);
	+VMM_STAT_DECLARE(VMEXIT_INOUT);
	+VMM_STAT_DECLARE(VMEXIT_CPUID);
	+VMM_STAT_DECLARE(VMEXIT_NESTED_FAULT);
	+VMM_STAT_DECLARE(VMEXIT_INST_EMUL);
	+VMM_STAT_DECLARE(VMEXIT_UNKNOWN);
	+VMM_STAT_DECLARE(VMEXIT_ASTPENDING);
	+VMM_STAT_DECLARE(VMEXIT_USERSPACE);
	+VMM_STAT_DECLARE(VMEXIT_RENDEZVOUS);
	+VMM_STAT_DECLARE(VMEXIT_EXCEPTION);
	+#endif
	Index: sys/arm64/vmm/vmm_stat.c
	===================================================================
	--- /dev/null
	+++ sys/arm64/vmm/vmm_stat.c
	@@ -0,0 +1,159 @@
	+/*
	+ * Copyright (C) 2015 Mihai Carabas <mihai.carabas@gmail.com>
	+ * All rights reserved.
	+ *
	+ * Redistribution and use in source and binary forms, with or without
	+ * modification, are permitted provided that the following conditions
	+ * are met:
	+ * 1. Redistributions of source code must retain the above copyright
	+ * notice, this list of conditions and the following disclaimer.
	+ * 2. Redistributions in binary form must reproduce the above copyright
	+ * notice, this list of conditions and the following disclaimer in the
	+ * documentation and/or other materials provided with the distribution.
	+ *
	+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
	+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
	+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	+ * SUCH DAMAGE.
	+ */
	+
	+#include <sys/cdefs.h>
	+
	+#include <sys/param.h>
	+#include <sys/kernel.h>
	+#include <sys/systm.h>
	+#include <sys/malloc.h>
	+
	+#include <machine/vmm.h>
	+#include "vmm_stat.h"
	+
	+/*
	+ * 'vst_num_elems' is the total number of addressable statistic elements
	+ * 'vst_num_types' is the number of unique statistic types
	+ *
	+ * It is always true that 'vst_num_elems' is greater than or equal to
	+ * 'vst_num_types'. This is because a stat type may represent more than
	+ * one element (for e.g. VMM_STAT_ARRAY).
	+ */
	+static int vst_num_elems, vst_num_types;
	+static struct vmm_stat_type *vsttab[MAX_VMM_STAT_ELEMS];
	+
	+static MALLOC_DEFINE(M_VMM_STAT, "vmm stat", "vmm stat");
	+
	+#define vst_size ((size_t)vst_num_elems * sizeof(uint64_t))
	+
	+void
	+vmm_stat_register(void *arg)
	+{
	+ struct vmm_stat_type *vst = arg;
	+
	+ /* We require all stats to identify themselves with a description */
	+ if (vst->desc == NULL)
	+ return;
	+
	+ if (vst_num_elems + vst->nelems >= MAX_VMM_STAT_ELEMS) {
	+ printf("Cannot accomodate vmm stat type \"%s\"!\n", vst->desc);
	+ return;
	+ }
	+
	+ vst->index = vst_num_elems;
	+ vst_num_elems += vst->nelems;
	+
	+ vsttab[vst_num_types++] = vst;
	+}
	+
	+int
	+vmm_stat_copy(struct vm vm, int vcpu, int num_stats, uint64_t *buf)
	+{
	+ struct vmm_stat_type *vst;
	+ uint64_t *stats;
	+ int i;
	+
	+ if (vcpu < 0 \|\| vcpu >= VM_MAXCPU)
	+ return (EINVAL);
	+
	+ /* Let stats functions update their counters */
	+ for (i = 0; i < vst_num_types; i++) {
	+ vst = vsttab[i];
	+ if (vst->func != NULL)
	+ (*vst->func)(vm, vcpu, vst);
	+ }
	+
	+ /* Copy over the stats */
	+ stats = vcpu_stats(vm, vcpu);
	+ for (i = 0; i < vst_num_elems; i++)
	+ buf[i] = stats[i];
	+ *num_stats = vst_num_elems;
	+ return (0);
	+}
	+
	+void *
	+vmm_stat_alloc(void)
	+{
	+
	+ return (malloc(vst_size, M_VMM_STAT, M_WAITOK));
	+}
	+
	+void
	+vmm_stat_init(void *vp)
	+{
	+
	+ bzero(vp, vst_size);
	+}
	+
	+void
	+vmm_stat_free(void *vp)
	+{
	+ free(vp, M_VMM_STAT);
	+}
	+
	+int
	+vmm_stat_desc_copy(int index, char *buf, int bufsize)
	+{
	+ int i;
	+ struct vmm_stat_type *vst;
	+
	+ for (i = 0; i < vst_num_types; i++) {
	+ vst = vsttab[i];
	+ if (index >= vst->index && index < vst->index + vst->nelems) {
	+ if (vst->nelems > 1) {
	+ snprintf(buf, bufsize, "%s[%d]",
	+ vst->desc, index - vst->index);
	+ } else {
	+ strlcpy(buf, vst->desc, bufsize);
	+ }
	+ return (0); /* found it */
	+ }
	+ }
	+
	+ return (EINVAL);
	+}
	+
	+/* global statistics */
	+VMM_STAT(VCPU_MIGRATIONS, "vcpu migration across host cpus");
	+VMM_STAT(VMEXIT_COUNT, "total number of vm exits");
	+VMM_STAT(VMEXIT_EXTINT, "vm exits due to external interrupt");
	+VMM_STAT(VMEXIT_HLT, "number of times hlt was intercepted");
	+VMM_STAT(VMEXIT_CR_ACCESS, "number of times %cr access was intercepted");
	+VMM_STAT(VMEXIT_RDMSR, "number of times rdmsr was intercepted");
	+VMM_STAT(VMEXIT_WRMSR, "number of times wrmsr was intercepted");
	+VMM_STAT(VMEXIT_MTRAP, "number of monitor trap exits");
	+VMM_STAT(VMEXIT_PAUSE, "number of times pause was intercepted");
	+VMM_STAT(VMEXIT_INTR_WINDOW, "vm exits due to interrupt window opening");
	+VMM_STAT(VMEXIT_NMI_WINDOW, "vm exits due to nmi window opening");
	+VMM_STAT(VMEXIT_INOUT, "number of times in/out was intercepted");
	+VMM_STAT(VMEXIT_CPUID, "number of times cpuid was intercepted");
	+VMM_STAT(VMEXIT_NESTED_FAULT, "vm exits due to nested page fault");
	+VMM_STAT(VMEXIT_INST_EMUL, "vm exits for instruction emulation");
	+VMM_STAT(VMEXIT_UNKNOWN, "number of vm exits for unknown reason");
	+VMM_STAT(VMEXIT_ASTPENDING, "number of times astpending at exit");
	+VMM_STAT(VMEXIT_USERSPACE, "number of vm exits handled in userspace");
	+VMM_STAT(VMEXIT_RENDEZVOUS, "number of times rendezvous pending at exit");
	+VMM_STAT(VMEXIT_EXCEPTION, "number of vm exits due to exceptions");
	Index: sys/conf/files.arm64
	===================================================================
	--- sys/conf/files.arm64
	+++ sys/conf/files.arm64
	@@ -176,6 +176,7 @@
	arm64/arm64/identcpu.c standard
	arm64/arm64/in_cksum.c optional inet \| inet6
	arm64/arm64/locore.S standard no-obj
	+arm64/arm64/hyp_stub.S standard
	arm64/arm64/machdep.c standard
	arm64/arm64/machdep_boot.c standard
	arm64/arm64/mem.c standard
	@@ -412,6 +413,7 @@
	dev/vnic/thunder_mdio_fdt.c optional vnic fdt
	dev/vnic/thunder_mdio.c optional vnic
	dev/vnic/lmac_if.m optional inet \| inet6 \| vnic
	+dev/bvm/bvm_console.c optional bvmconsole
	kern/msi_if.m optional intrng
	kern/pic_if.m optional intrng
	kern/subr_devmap.c standard
	Index: sys/dev/bvm/bvm_console.c
	===================================================================
	--- /dev/null
	+++ sys/dev/bvm/bvm_console.c
	@@ -0,0 +1,268 @@
	+/*-
	+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
	+ *
	+ * Copyright (c) 2011 NetApp, Inc.
	+ * All rights reserved.
	+ *
	+ * Redistribution and use in source and binary forms, with or without
	+ * modification, are permitted provided that the following conditions
	+ * are met:
	+ * 1. Redistributions of source code must retain the above copyright
	+ * notice, this list of conditions and the following disclaimer.
	+ * 2. Redistributions in binary form must reproduce the above copyright
	+ * notice, this list of conditions and the following disclaimer in the
	+ * documentation and/or other materials provided with the distribution.
	+ *
	+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
	+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	+ * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
	+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	+ * SUCH DAMAGE.
	+ *
	+ * $FreeBSD$
	+ */
	+
	+#include <sys/cdefs.h>
	+__FBSDID("$FreeBSD$");
	+
	+#include <sys/param.h>
	+#include <sys/kernel.h>
	+#include <sys/systm.h>
	+#include <sys/types.h>
	+#include <sys/cons.h>
	+#include <sys/tty.h>
	+#include <sys/reboot.h>
	+#include <sys/bus.h>
	+
	+#if defined(__aarch64__)
	+#include <vm/vm.h>
	+#include <vm/pmap.h>
	+#endif
	+
	+#include <sys/kdb.h>
	+#include <ddb/ddb.h>
	+
	+#ifndef BVMCONS_POLL_HZ
	+#define BVMCONS_POLL_HZ 4
	+#endif
	+#define BVMBURSTLEN 16 /* max number of bytes to write in one chunk */
	+
	+static tsw_open_t bvm_tty_open;
	+static tsw_close_t bvm_tty_close;
	+static tsw_outwakeup_t bvm_tty_outwakeup;
	+
	+static struct ttydevsw bvm_ttydevsw = {
	+ .tsw_flags = TF_NOPREFIX,
	+ .tsw_open = bvm_tty_open,
	+ .tsw_close = bvm_tty_close,
	+ .tsw_outwakeup = bvm_tty_outwakeup,
	+};
	+
	+static int polltime;
	+static struct callout bvm_timer;
	+
	+#if defined(KDB)
	+static int alt_break_state;
	+#endif
	+
	+#if defined(__i386__) \|\| defined(__amd64__)
	+#define BVM_CONS_PORT 0x220
	+#elif defined(__aarch64__)
	+#define BVM_CONS_PORT 0x090000
	+#endif
	+
	+static vm_offset_t bvm_cons_port = BVM_CONS_PORT;
	+
	+#define BVM_CONS_SIG ('b' << 8 \| 'v')
	+
	+static void bvm_timeout(void *);
	+
	+static cn_probe_t bvm_cnprobe;
	+static cn_init_t bvm_cninit;
	+static cn_term_t bvm_cnterm;
	+static cn_getc_t bvm_cngetc;
	+static cn_putc_t bvm_cnputc;
	+static cn_grab_t bvm_cngrab;
	+static cn_ungrab_t bvm_cnungrab;
	+
	+CONSOLE_DRIVER(bvm);
	+
	+static int
	+bvm_rcons(u_char *ch)
	+{
	+ int c;
	+
	+#if defined(__i386__) \|\| defined(__amd64__)
	+ c = inl(bvm_cons_port);
	+#elif defined(__arm__) \|\| defined(__aarch64__)
	+ c = (int )bvm_cons_port;
	+#endif
	+
	+ if (c != -1) {
	+ *ch = (u_char)c;
	+ return (0);
	+ } else
	+ return (-1);
	+}
	+
	+static void
	+bvm_wcons(u_char ch)
	+{
	+#if defined(__i386__) \|\| defined(__amd64__)
	+ outl(bvm_cons_port, ch);
	+#elif defined(__arm__) \|\| defined(__aarch64__)
	+ (int )bvm_cons_port = ch;
	+#endif
	+}
	+
	+static void
	+cn_drvinit(void *unused)
	+{
	+ struct tty *tp;
	+
	+ gone_in(13, "bvmconsole");
	+
	+ if (bvm_consdev.cn_pri != CN_DEAD) {
	+ tp = tty_alloc(&bvm_ttydevsw, NULL);
	+ callout_init_mtx(&bvm_timer, tty_getlock(tp), 0);
	+ tty_makedev(tp, NULL, "bvmcons");
	+ }
	+}
	+
	+static int
	+bvm_tty_open(struct tty *tp)
	+{
	+ polltime = hz / BVMCONS_POLL_HZ;
	+ if (polltime < 1)
	+ polltime = 1;
	+ callout_reset(&bvm_timer, polltime, bvm_timeout, tp);
	+
	+ return (0);
	+}
	+
	+static void
	+bvm_tty_close(struct tty *tp)
	+{
	+
	+ tty_assert_locked(tp);
	+ callout_stop(&bvm_timer);
	+}
	+
	+static void
	+bvm_tty_outwakeup(struct tty *tp)
	+{
	+ int len, written;
	+ u_char buf[BVMBURSTLEN];
	+
	+ for (;;) {
	+ len = ttydisc_getc(tp, buf, sizeof(buf));
	+ if (len == 0)
	+ break;
	+
	+ written = 0;
	+ while (written < len)
	+ bvm_wcons(buf[written++]);
	+ }
	+}
	+
	+static void
	+bvm_timeout(void *v)
	+{
	+ struct tty *tp;
	+ int c;
	+
	+ tp = (struct tty *)v;
	+
	+ tty_assert_locked(tp);
	+ while ((c = bvm_cngetc(NULL)) != -1)
	+ ttydisc_rint(tp, c, 0);
	+ ttydisc_rint_done(tp);
	+
	+ callout_reset(&bvm_timer, polltime, bvm_timeout, tp);
	+}
	+
	+static void
	+bvm_cnprobe(struct consdev *cp)
	+{
	+ int disabled;
	+#if defined(__i386__) \|\| defined(__amd64__)
	+ int port;
	+#endif
	+
	+ disabled = 0;
	+ cp->cn_pri = CN_DEAD;
	+ strcpy(cp->cn_name, "bvmcons");
	+
	+ resource_int_value("bvmconsole", 0, "disabled", &disabled);
	+ if (!disabled) {
	+#if defined(__i386__) \|\| defined(__amd64__)
	+ if (resource_int_value("bvmconsole", 0, "port", &port) == 0)
	+ bvm_cons_port = port;
	+
	+ if (inw(bvm_cons_port) == BVM_CONS_SIG)
	+#elif defined(__arm__) \|\| defined(__aarch64__)
	+ bvm_cons_port = (vm_offset_t)pmap_mapdev(bvm_cons_port, 0x1000);
	+ if (((short )bvm_cons_port) == BVM_CONS_SIG) {
	+#endif
	+ cp->cn_pri = CN_REMOTE;
	+ }
	+ }
	+}
	+
	+static void
	+bvm_cninit(struct consdev *cp)
	+{
	+ int i;
	+ const char *bootmsg = "Using bvm console.\n";
	+
	+ if (boothowto & RB_VERBOSE) {
	+ for (i = 0; i < strlen(bootmsg); i++)
	+ bvm_cnputc(cp, bootmsg[i]);
	+ }
	+}
	+
	+static void
	+bvm_cnterm(struct consdev *cp)
	+{
	+
	+}
	+
	+static int
	+bvm_cngetc(struct consdev *cp)
	+{
	+ unsigned char ch;
	+
	+ if (bvm_rcons(&ch) == 0) {
	+#if defined(KDB)
	+ kdb_alt_break(ch, &alt_break_state);
	+#endif
	+ return (ch);
	+ }
	+
	+ return (-1);
	+}
	+
	+static void
	+bvm_cnputc(struct consdev *cp, int c)
	+{
	+
	+ bvm_wcons(c);
	+}
	+
	+static void
	+bvm_cngrab(struct consdev *cp)
	+{
	+}
	+
	+static void
	+bvm_cnungrab(struct consdev *cp)
	+{
	+}
	+
	+SYSINIT(cndev, SI_SUB_CONFIGURE, SI_ORDER_MIDDLE, cn_drvinit, NULL);
	Index: sys/dts/Makefile
	===================================================================
	--- sys/dts/Makefile
	+++ sys/dts/Makefile
	@@ -1,5 +1,5 @@
	# $FreeBSD$

	-SUBDIR=arm mips powerpc
	+SUBDIR=arm arm64 mips powerpc

	.include <bsd.subdir.mk>
	Index: sys/kern/kern_cons.c
	===================================================================
	--- sys/kern/kern_cons.c
	+++ sys/kern/kern_cons.c
	@@ -136,7 +136,6 @@
	* Check if we should mute the console (for security reasons perhaps)
	* It can be changes dynamically using sysctl kern.consmute
	* once we are up and going.
	- *
	*/
	cn_mute = ((boothowto & (RB_MUTE
	\|RB_SINGLE
	@@ -174,6 +173,7 @@
	cnadd(cn);
	}
	}
	+
	if (best_cn == NULL)
	return;
	if ((boothowto & RB_MULTIPLE) == 0) {
	Index: sys/modules/Makefile
	===================================================================
	--- sys/modules/Makefile
	+++ sys/modules/Makefile
	@@ -613,6 +613,9 @@
	_armv8crypto= armv8crypto
	_em= em
	_rockchip= rockchip
	+.if ${MK_BHYVE} != "no" \|\| defined(ALL_MODULES)
	+_vmm= vmm
	+.endif
	.endif

	.if ${MACHINE_CPUARCH} == "i386" \|\| ${MACHINE_CPUARCH} == "amd64"
	Index: sys/modules/vmm/Makefile
	===================================================================
	--- sys/modules/vmm/Makefile
	+++ sys/modules/vmm/Makefile
	@@ -4,10 +4,7 @@

	KMOD= vmm

	-SRCS= opt_acpi.h opt_bhyve_snapshot.h opt_ddb.h
	-SRCS+= device_if.h bus_if.h pci_if.h pcib_if.h acpi_if.h vnode_if.h
	-DPSRCS+= vmx_assym.h svm_assym.h
	-DPSRCS+= vmx_genassym.c svm_genassym.c offset.inc
	+SRCS= opt_acpi.h opt_ddb.h device_if.h bus_if.h pci_if.h pcib_if.h acpi_if.h

	CFLAGS+= -DVMM_KEEP_STATS
	CFLAGS+= -I${SRCTOP}/sys/amd64/vmm
	@@ -15,76 +12,6 @@
	CFLAGS+= -I${SRCTOP}/sys/amd64/vmm/intel
	CFLAGS+= -I${SRCTOP}/sys/amd64/vmm/amd

	-# generic vmm support
	-.PATH: ${SRCTOP}/sys/amd64/vmm
	-SRCS+= vmm.c \
	- vmm_dev.c \
	- vmm_host.c \
	- vmm_instruction_emul.c \
	- vmm_ioport.c \
	- vmm_lapic.c \
	- vmm_mem.c \
	- vmm_stat.c \
	- vmm_util.c \
	- x86.c
	-
	-.PATH: ${SRCTOP}/sys/amd64/vmm/io
	-SRCS+= iommu.c \
	- ppt.c \
	- vatpic.c \
	- vatpit.c \
	- vhpet.c \
	- vioapic.c \
	- vlapic.c \
	- vpmtmr.c \
	- vrtc.c
	-
	-# intel-specific files
	-.PATH: ${SRCTOP}/sys/amd64/vmm/intel
	-SRCS+= ept.c \
	- vmcs.c \
	- vmx_msr.c \
	- vmx_support.S \
	- vmx.c \
	- vtd.c
	-
	-# amd-specific files
	-.PATH: ${SRCTOP}/sys/amd64/vmm/amd
	-SRCS+= vmcb.c \
	- svm.c \
	- svm_support.S \
	- npt.c \
	- ivrs_drv.c \
	- amdvi_hw.c \
	- svm_msr.c
	-
	-.if ${KERN_OPTS:MBHYVE_SNAPSHOT} != ""
	-SRCS+= vmm_snapshot.c
	-.endif
	-
	-CLEANFILES= vmx_assym.h vmx_genassym.o svm_assym.h svm_genassym.o
	-
	-OBJS_DEPEND_GUESS.vmx_support.o+= vmx_assym.h
	-OBJS_DEPEND_GUESS.svm_support.o+= svm_assym.h
	-
	-vmx_assym.h: vmx_genassym.o
	- sh ${SYSDIR}/kern/genassym.sh vmx_genassym.o > ${.TARGET}
	-
	-svm_assym.h: svm_genassym.o
	- sh ${SYSDIR}/kern/genassym.sh svm_genassym.o > ${.TARGET}
	-
	-vmx_support.o:
	- ${CC} -c -x assembler-with-cpp -DLOCORE ${CFLAGS} \
	- ${.IMPSRC} -o ${.TARGET}
	-
	-svm_support.o:
	- ${CC} -c -x assembler-with-cpp -DLOCORE ${CFLAGS} \
	- ${.IMPSRC} -o ${.TARGET}
	-
	-vmx_genassym.o: offset.inc
	- ${CC} -c ${CFLAGS:N-flto:N-fno-common} -fcommon ${.IMPSRC}
	-
	-svm_genassym.o: offset.inc
	- ${CC} -c ${CFLAGS:N-flto:N-fno-common} -fcommon ${.IMPSRC}
	+.include <bsd.arch.inc.mk>

	.include <bsd.kmod.mk>
	Index: sys/modules/vmm/Makefile.amd64
	===================================================================
	--- /dev/null
	+++ sys/modules/vmm/Makefile.amd64
	@@ -0,0 +1,77 @@
	+# $FreeBSD$
	+
	+DPSRCS+= vmx_assym.h svm_assym.h
	+DPSRCS+= vmx_genassym.c svm_genassym.c offset.inc
	+
	+CFLAGS+= -I${SRCTOP}/sys/amd64/vmm
	+CFLAGS+= -I${SRCTOP}/sys/amd64/vmm/io
	+CFLAGS+= -I${SRCTOP}/sys/amd64/vmm/intel
	+CFLAGS+= -I${SRCTOP}/sys/amd64/vmm/amd
	+
	+# generic vmm support
	+.PATH: ${SRCTOP}/sys/amd64/vmm
	+SRCS+= vmm.c \
	+ vmm_dev.c \
	+ vmm_host.c \
	+ vmm_instruction_emul.c \
	+ vmm_ioport.c \
	+ vmm_lapic.c \
	+ vmm_mem.c \
	+ vmm_stat.c \
	+ vmm_util.c \
	+ x86.c
	+
	+.PATH: ${SRCTOP}/sys/amd64/vmm/io
	+SRCS+= iommu.c \
	+ ppt.c \
	+ vatpic.c \
	+ vatpit.c \
	+ vhpet.c \
	+ vioapic.c \
	+ vlapic.c \
	+ vpmtmr.c \
	+ vrtc.c
	+
	+# intel-specific files
	+.PATH: ${SRCTOP}/sys/amd64/vmm/intel
	+SRCS+= ept.c \
	+ vmcs.c \
	+ vmx_msr.c \
	+ vmx_support.S \
	+ vmx.c \
	+ vtd.c
	+
	+# amd-specific files
	+.PATH: ${SRCTOP}/sys/amd64/vmm/amd
	+SRCS+= vmcb.c \
	+ svm.c \
	+ svm_support.S \
	+ npt.c \
	+ ivrs_drv.c \
	+ amdvi_hw.c \
	+ svm_msr.c
	+
	+CLEANFILES= vmx_assym.h vmx_genassym.o svm_assym.h svm_genassym.o
	+
	+OBJS_DEPEND_GUESS.vmx_support.o+= vmx_assym.h
	+OBJS_DEPEND_GUESS.svm_support.o+= svm_assym.h
	+
	+vmx_assym.h: vmx_genassym.o
	+ sh ${SYSDIR}/kern/genassym.sh vmx_genassym.o > ${.TARGET}
	+
	+svm_assym.h: svm_genassym.o
	+ sh ${SYSDIR}/kern/genassym.sh svm_genassym.o > ${.TARGET}
	+
	+vmx_support.o:
	+ ${CC} -c -x assembler-with-cpp -DLOCORE ${CFLAGS} \
	+ ${.IMPSRC} -o ${.TARGET}
	+
	+svm_support.o:
	+ ${CC} -c -x assembler-with-cpp -DLOCORE ${CFLAGS} \
	+ ${.IMPSRC} -o ${.TARGET}
	+
	+vmx_genassym.o: offset.inc
	+ ${CC} -c ${CFLAGS:N-flto:N-fno-common} ${.IMPSRC}
	+
	+svm_genassym.o: offset.inc
	+ ${CC} -c ${CFLAGS:N-flto:N-fno-common} ${.IMPSRC}
	Index: sys/modules/vmm/Makefile.arm64
	===================================================================
	--- /dev/null
	+++ sys/modules/vmm/Makefile.arm64
	@@ -0,0 +1,30 @@
	+DPSRCS+= hyp_assym.h
	+DPSRCS+= hyp_genassym.c
	+
	+CFLAGS+= -I${.CURDIR}/../../arm64/vmm -I${.CURDIR}/../../arm64/include
	+
	+# generic vmm support
	+.PATH: ${.CURDIR}/../../arm64/vmm
	+SRCS+= vmm.c \
	+ vmm_dev.c \
	+ vmm_instruction_emul.c \
	+ vmm_mem.c \
	+ mmu.c \
	+ vmm_stat.c \
	+ arm64.c \
	+ psci.c \
	+ reset.c \
	+ hyp.S
	+
	+.PATH: ${.CURDIR}/../../arm64/vmm/io
	+SRCS+= vgic_v3.c \
	+ vgic_v3_mmio.c \
	+ vtimer.c
	+
	+CLEANFILES= hyp_assym.h hyp_genassym.o
	+
	+hyp_assym.h: hyp_genassym.o
	+ sh ${SYSDIR}/kern/genassym.sh hyp_genassym.o > ${.TARGET}
	+
	+hyp_genassym.o:
	+ ${CC} -c ${CFLAGS:N-flto:N-fno-common} -fcommon ${.IMPSRC}
	Index: sys/sys/bitstring.h
	===================================================================
	--- sys/sys/bitstring.h
	+++ sys/sys/bitstring.h
	@@ -419,4 +419,35 @@
	*_result = _value;
	}

	+/* Computes _dstbitstr as the bitwise and of the two _srcbitstr */
	+static inline void
	+bitstr_and(bitstr_t _dstbitstr, bitstr_t _src1bitstr,
	+ bitstr_t *_src2bitstr, int _nbits)
	+{
	+ bitstr_t mask;
	+ while (_nbits >= (int)_BITSTR_BITS) {
	+ _dstbitstr++ = _src1bitstr++ & *_src2bitstr++;
	+ _nbits -= _BITSTR_BITS;
	+ }
	+ if (_nbits > 0) {
	+ mask = _bit_make_mask(0, _bit_offset(_nbits - 1));
	+ _dstbitstr = (_src1bitstr & *_src2bitstr) & mask;
	+ }
	+}
	+
	+/* Computes _dstbitstr as the bitwise or of the two _srcbitstr */
	+static inline void
	+bitstr_or(bitstr_t _dstbitstr, bitstr_t _src1bitstr,
	+ bitstr_t *_src2bitstr, int _nbits)
	+{
	+ bitstr_t mask;
	+ while (_nbits >= (int)_BITSTR_BITS) {
	+ _dstbitstr++ = _src1bitstr++ \| *_src2bitstr++;
	+ _nbits -= _BITSTR_BITS;
	+ }
	+ if (_nbits > 0) {
	+ mask = _bit_make_mask(0, _bit_offset(_nbits - 1));
	+ _dstbitstr = (_src1bitstr \| *_src2bitstr) & mask;
	+ }
	+}
	#endif /* _SYS_BITSTRING_H_ */
	Index: usr.sbin/Makefile.arm64
	===================================================================
	--- usr.sbin/Makefile.arm64
	+++ usr.sbin/Makefile.arm64
	@@ -4,3 +4,9 @@
	SUBDIR+= acpi
	.endif
	SUBDIR+= ofwdump
	+
	+.if ${MK_BHYVE} != "no"
	+SUBDIR+= bhyve
	+SUBDIR+= bhyveload
	+SUBDIR+= bhyvectl
	+.endif
	Index: usr.sbin/bhyve/Makefile
	===================================================================
	--- usr.sbin/bhyve/Makefile
	+++ usr.sbin/bhyve/Makefile
	@@ -3,132 +3,7 @@
	#

	.include <src.opts.mk>
	-CFLAGS+=-I${.CURDIR}/../../contrib/lib9p
	-CFLAGS+=-I${SRCTOP}/sys
	-.PATH: ${SRCTOP}/sys/cam/ctl

	-PROG= bhyve
	-PACKAGE= bhyve
	-
	-MAN= bhyve.8
	-
	-BHYVE_SYSDIR?=${SRCTOP}
	-
	-SRCS= \
	- atkbdc.c \
	- acpi.c \
	- audio.c \
	- bhyvegc.c \
	- bhyverun.c \
	- block_if.c \
	- bootrom.c \
	- console.c \
	- ctl_util.c \
	- ctl_scsi_all.c \
	- fwctl.c \
	- gdb.c \
	- hda_codec.c \
	- inout.c \
	- ioapic.c \
	- kernemu_dev.c \
	- mem.c \
	- mevent.c \
	- mptbl.c \
	- net_backends.c \
	- net_utils.c \
	- pci_ahci.c \
	- pci_e82545.c \
	- pci_emul.c \
	- pci_hda.c \
	- pci_fbuf.c \
	- pci_hostbridge.c \
	- pci_irq.c \
	- pci_lpc.c \
	- pci_nvme.c \
	- pci_passthru.c \
	- pci_virtio_9p.c \
	- pci_virtio_block.c \
	- pci_virtio_console.c \
	- pci_virtio_net.c \
	- pci_virtio_rnd.c \
	- pci_virtio_scsi.c \
	- pci_uart.c \
	- pci_xhci.c \
	- pctestdev.c \
	- pm.c \
	- post.c \
	- ps2kbd.c \
	- ps2mouse.c \
	- rfb.c \
	- rtc.c \
	- smbiostbl.c \
	- sockstream.c \
	- task_switch.c \
	- uart_emul.c \
	- usb_emul.c \
	- usb_mouse.c \
	- virtio.c \
	- vga.c \
	- vmgenc.c \
	- xmsr.c \
	- spinup_ap.c \
	- iov.c
	-
	-.if ${MK_BHYVE_SNAPSHOT} != "no"
	-SRCS+= snapshot.c
	-.endif
	-
	-CFLAGS.kernemu_dev.c+= -I${SRCTOP}/sys/amd64
	-
	-.PATH: ${BHYVE_SYSDIR}/sys/amd64/vmm
	-SRCS+= vmm_instruction_emul.c
	-
	-LIBADD= vmmapi md pthread z util sbuf cam 9p
	-
	-.if ${MK_CASPER} != "no"
	-LIBADD+= casper
	-LIBADD+= cap_pwd
	-LIBADD+= cap_grp
	-# Temporary disable capsicum, until we integrate checkpoint code with it.
	-#CFLAGS+=-DWITH_CASPER
	-.endif
	-
	-.if ${MK_BHYVE_SNAPSHOT} != "no"
	-LIBADD+= ucl xo
	-.endif
	-
	-.if ${MK_INET_SUPPORT} != "no"
	-CFLAGS+=-DINET
	-.endif
	-.if ${MK_INET6_SUPPORT} != "no"
	-CFLAGS+=-DINET6
	-.endif
	-.if ${MK_NETGRAPH_SUPPORT} != "no"
	-CFLAGS+=-DNETGRAPH
	-LIBADD+= netgraph
	-.endif
	-.if ${MK_OPENSSL} == "no"
	-CFLAGS+=-DNO_OPENSSL
	-.else
	-LIBADD+= crypto
	-.endif
	-
	-CFLAGS+= -I${BHYVE_SYSDIR}/sys/dev/e1000
	-CFLAGS+= -I${BHYVE_SYSDIR}/sys/dev/mii
	-CFLAGS+= -I${BHYVE_SYSDIR}/sys/dev/usb/controller
	-.if ${MK_BHYVE_SNAPSHOT} != "no"
	-CFLAGS+= -I${SRCTOP}/contrib/libucl/include
	-
	-# Temporary disable capsicum, until we integrate checkpoint code with it.
	-CFLAGS+= -DWITHOUT_CAPSICUM
	-
	-CFLAGS+= -DBHYVE_SNAPSHOT
	-.endif
	-
	-.ifdef GDB_LOG
	-CFLAGS+=-DGDB_LOG
	-.endif
	-
	-WARNS?= 2
	+.include <bsd.arch.inc.mk>

	.include <bsd.prog.mk>
	Index: usr.sbin/bhyve/Makefile.amd64
	===================================================================
	--- /dev/null
	+++ usr.sbin/bhyve/Makefile.amd64
	@@ -0,0 +1,124 @@
	+#
	+# $FreeBSD$
	+#
	+
	+CFLAGS+=-I${.CURDIR}/../../contrib/lib9p
	+CFLAGS+=-I${SRCTOP}/sys
	+.PATH: ${SRCTOP}/sys/cam/ctl
	+
	+PROG= bhyve
	+PACKAGE= bhyve
	+
	+MAN= bhyve.8
	+
	+BHYVE_SYSDIR?=${SRCTOP}
	+
	+SRCS= \
	+ atkbdc.c \
	+ acpi.c \
	+ audio.c \
	+ bhyvegc.c \
	+ bhyverun.c \
	+ block_if.c \
	+ bootrom.c \
	+ console.c \
	+ consport.c \
	+ ctl_util.c \
	+ ctl_scsi_all.c \
	+ dbgport.c \
	+ fwctl.c \
	+ gdb.c \
	+ hda_codec.c \
	+ inout.c \
	+ ioapic.c \
	+ kernemu_dev.c \
	+ mem.c \
	+ mevent.c \
	+ mptbl.c \
	+ net_backends.c \
	+ net_utils.c \
	+ pci_ahci.c \
	+ pci_e82545.c \
	+ pci_emul.c \
	+ pci_hda.c \
	+ pci_fbuf.c \
	+ pci_hostbridge.c \
	+ pci_irq.c \
	+ pci_lpc.c \
	+ pci_nvme.c \
	+ pci_passthru.c \
	+ pci_virtio_9p.c \
	+ pci_virtio_block.c \
	+ pci_virtio_console.c \
	+ pci_virtio_net.c \
	+ pci_virtio_rnd.c \
	+ pci_virtio_scsi.c \
	+ pci_uart.c \
	+ pci_xhci.c \
	+ pctestdev.c \
	+ pm.c \
	+ post.c \
	+ ps2kbd.c \
	+ ps2mouse.c \
	+ rfb.c \
	+ rtc.c \
	+ smbiostbl.c \
	+ sockstream.c \
	+ task_switch.c \
	+ uart_emul.c \
	+ usb_emul.c \
	+ usb_mouse.c \
	+ virtio.c \
	+ vga.c \
	+ vmgenc.c \
	+ xmsr.c \
	+ spinup_ap.c \
	+ iov.c
	+
	+.if ${MK_BHYVE_SNAPSHOT} != "no"
	+SRCS+= snapshot.c
	+.endif
	+
	+CFLAGS.kernemu_dev.c+= -I${SRCTOP}/sys/amd64
	+
	+.PATH: ${BHYVE_SYSDIR}/sys/amd64/vmm
	+SRCS+= vmm_instruction_emul.c
	+
	+LIBADD= vmmapi md pthread z util sbuf cam 9p casper cap_pwd cap_grp
	+.if ${MK_BHYVE_SNAPSHOT} != "no"
	+LIBADD+= ucl xo
	+.endif
	+
	+.if ${MK_INET_SUPPORT} != "no"
	+CFLAGS+=-DINET
	+.endif
	+.if ${MK_INET6_SUPPORT} != "no"
	+CFLAGS+=-DINET6
	+.endif
	+.if ${MK_NETGRAPH_SUPPORT} != "no"
	+CFLAGS+=-DNETGRAPH
	+LIBADD+= netgraph
	+.endif
	+.if ${MK_OPENSSL} == "no"
	+CFLAGS+=-DNO_OPENSSL
	+.else
	+LIBADD+= crypto
	+.endif
	+
	+CFLAGS+= -I${BHYVE_SYSDIR}/sys/dev/e1000
	+CFLAGS+= -I${BHYVE_SYSDIR}/sys/dev/mii
	+CFLAGS+= -I${BHYVE_SYSDIR}/sys/dev/usb/controller
	+.if ${MK_BHYVE_SNAPSHOT} != "no"
	+CFLAGS+= -I${SRCTOP}/contrib/libucl/include
	+
	+# Temporary disable capsicum, until we integrate checkpoint code with it.
	+CFLAGS+= -DWITHOUT_CAPSICUM
	+
	+CFLAGS+= -DBHYVE_SNAPSHOT
	+.endif
	+
	+.ifdef GDB_LOG
	+CFLAGS+=-DGDB_LOG
	+.endif
	+
	+WARNS?= 2
	Index: usr.sbin/bhyve/Makefile.arm64
	===================================================================
	--- /dev/null
	+++ usr.sbin/bhyve/Makefile.arm64
	@@ -0,0 +1,55 @@
	+#
	+# $FreeBSD$
	+#
	+
	+CFLAGS+=-I${SRCTOP}/sys
	+.PATH: ${SRCTOP}/sys/cam/ctl
	+
	+PROG= bhyve
	+PACKAGE= bhyve
	+
	+MAN= bhyve.8
	+
	+BHYVE_SYSDIR?=${SRCTOP}
	+BHYVE_SRCTOP?=${.CURDIR}
	+
	+SRCS= \
	+ block_if.c \
	+ iov.c \
	+ mevent.c \
	+ net_backends.c \
	+ sockstream.c
	+
	+CFLAGS+= -DWITHOUT_CAPSICUM
	+.include "${BHYVE_SRCTOP}/arm64/Makefile.inc"
	+.include "${BHYVE_SRCTOP}/mmio/Makefile.inc"
	+
	+LIBADD= vmmapi md pthread
	+
	+.if ${MK_INET_SUPPORT} != "no"
	+CFLAGS+=-DINET
	+.endif
	+.if ${MK_INET6_SUPPORT} != "no"
	+CFLAGS+=-DINET6
	+.endif
	+.if ${MK_NETGRAPH_SUPPORT} != "no"
	+CFLAGS+=-DNETGRAPH
	+LIBADD+= netgraph
	+.endif
	+.if ${MK_OPENSSL} == "no"
	+CFLAGS+=-DNO_OPENSSL
	+.endif
	+
	+.PATH: ${BHYVE_SYSDIR}/sys/arm64/vmm
	+SRCS+= vmm_instruction_emul.c
	+
	+CFLAGS+= -I${BHYVE_SRCTOP}
	+CFLAGS+= -I${BHYVE_SRCTOP}/arm64
	+CFLAGS+= -I${BHYVE_SYSDIR}/sys/dev/virtio
	+CFLAGS+= -I${BHYVE_SYSDIR}/sys/dev/virtio/console
	+
	+.ifdef GDB_LOG
	+CFLAGS+=-DGDB_LOG
	+.endif
	+
	+WARNS?= 2
	Index: usr.sbin/bhyve/arm64/Makefile.inc
	===================================================================
	--- /dev/null
	+++ usr.sbin/bhyve/arm64/Makefile.inc
	@@ -0,0 +1,17 @@
	+#
	+# $FreeBSD$
	+#
	+.PATH: ${BHYVE_SRCTOP}/arm64/
	+SRCS+= \
	+ arm64/bhyverun.c \
	+ arm64/mem.c \
	+ arm64/consport.c \
	+ arm64/reset.c
	+
	+.PATH: ${BHYVE_SYSDIR}/sys/${BHYVE_ARCH}/vmm
	+
	+MK_MAN=no
	+
	+BHYVE_BUS= mmio
	+
	+CFLAGS+= -I${BHYVE_SYSDIR}/sys/dev/virtio/mmio
	Index: usr.sbin/bhyve/arm64/bhyverun.h
	===================================================================
	--- /dev/null
	+++ usr.sbin/bhyve/arm64/bhyverun.h
	@@ -0,0 +1,50 @@
	+/*-
	+ * Copyright (c) 2011 NetApp, Inc.
	+ * All rights reserved.
	+ *
	+ * Redistribution and use in source and binary forms, with or without
	+ * modification, are permitted provided that the following conditions
	+ * are met:
	+ * 1. Redistributions of source code must retain the above copyright
	+ * notice, this list of conditions and the following disclaimer.
	+ * 2. Redistributions in binary form must reproduce the above copyright
	+ * notice, this list of conditions and the following disclaimer in the
	+ * documentation and/or other materials provided with the distribution.
	+ *
	+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
	+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	+ * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
	+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	+ * SUCH DAMAGE.
	+ *
	+ * $FreeBSD: src/usr.sbin/bhyve/arm/bhyverun.h 4 2017-04-18 20:28:32Z mihai.carabas $
	+ */
	+
	+#ifndef _FBSDRUN_H_
	+#define _FBSDRUN_H_
	+
	+#ifndef CTASSERT /* Allow lint to override */
	+#define CTASSERT(x) _CTASSERT(x, __LINE__)
	+#define _CTASSERT(x, y) __CTASSERT(x, y)
	+#define __CTASSERT(x, y) typedef char __assert ## y[(x) ? 1 : -1]
	+#endif
	+
	+struct vmctx;
	+extern int guest_ncpus;
	+extern char *vmname;
	+
	+void paddr_guest2host(struct vmctx ctx, uintptr_t addr, size_t len);
	+
	+void fbsdrun_addcpu(struct vmctx *ctx, int cpu, uint64_t rip);
	+int fbsdrun_muxed(void);
	+int fbsdrun_vmexit_on_hlt(void);
	+int fbsdrun_vmexit_on_pause(void);
	+int fbsdrun_disable_x2apic(void);
	+int fbsdrun_virtio_msix(void);
	+#endif
	Index: usr.sbin/bhyve/arm64/bhyverun.c
	===================================================================
	--- /dev/null
	+++ usr.sbin/bhyve/arm64/bhyverun.c
	@@ -0,0 +1,468 @@
	+/*
	+ * Copyright (C) 2015 Mihai Carabas <mihai.carabas@gmail.com>
	+ * All rights reserved.
	+ *
	+ * Redistribution and use in source and binary forms, with or without
	+ * modification, are permitted provided that the following conditions
	+ * are met:
	+ * 1. Redistributions of source code must retain the above copyright
	+ * notice, this list of conditions and the following disclaimer.
	+ * 2. Redistributions in binary form must reproduce the above copyright
	+ * notice, this list of conditions and the following disclaimer in the
	+ * documentation and/or other materials provided with the distribution.
	+ *
	+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
	+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
	+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	+ * SUCH DAMAGE.
	+ */
	+
	+#include <sys/types.h>
	+#include <sys/mman.h>
	+#include <sys/time.h>
	+
	+#include <stdio.h>
	+#include <stdlib.h>
	+#include <string.h>
	+#include <stdbool.h>
	+#include <libgen.h>
	+#include <unistd.h>
	+#include <assert.h>
	+#include <errno.h>
	+#include <err.h>
	+#include <pthread.h>
	+#include <pthread_np.h>
	+#include <sysexits.h>
	+#include <vmmapi.h>
	+
	+#include <machine/vmm.h>
	+
	+#include "bhyverun.h"
	+#include "../mmio/mmio_emul.h"
	+#include "../mmio/mmio_irq.h"
	+#include "mem.h"
	+#include "mevent.h"
	+
	+/* Exit codes. */
	+#define EXIT_REBOOT 0
	+#define EXIT_POWEROFF 1
	+#define EXIT_HALT 2
	+#define EXIT_ERROR 4
	+
	+#define GUEST_NIO_PORT 0x488 /* guest upcalls via i/o port */
	+
	+#define VMEXIT_SWITCH 0 /* force vcpu switch in mux mode */
	+#define VMEXIT_CONTINUE 1 /* continue from next instruction */
	+#define VMEXIT_RESTART 2 /* restart current instruction */
	+#define VMEXIT_ABORT 3 /* abort the vm run loop */
	+#define VMEXIT_RESET 4 /* guest machine has reset */
	+
	+#define MB (1024UL * 1024)
	+#define GB (1024UL * MB)
	+
	+typedef int (vmexit_handler_t)(struct vmctx , struct vm_exit , int vcpu);
	+
	+char *vmname;
	+
	+int guest_ncpus;
	+
	+int raw_stdio = 0;
	+
	+static int foundcpus;
	+
	+static char *progname;
	+static const int BSP = 0;
	+/* TODO Change this to cpuset_t */
	+static int cpumask;
	+
	+static void vm_loop(struct vmctx *ctx, int vcpu, uint64_t pc);
	+
	+struct vm_exit vmexit[VM_MAXCPU];
	+
	+struct bhyvestats {
	+ uint64_t vmexit_bogus;
	+ uint64_t vmexit_inst_emul;
	+} stats;
	+
	+struct mt_vmm_info {
	+ pthread_t mt_thr;
	+ struct vmctx *mt_ctx;
	+ int mt_vcpu;
	+} mt_vmm_info[VM_MAXCPU];
	+
	+static cpuset_t *vcpumap[VM_MAXCPU] = { NULL };
	+
	+static void
	+usage(int code)
	+{
	+
	+ fprintf(stderr,
	+ "Usage: %s [-bh] [-c vcpus] [-p pincpu] [-s <devemu>] "
	+ "<vmname>\n"
	+ " -b: use bvmconsole\n"
	+ " -c: # cpus (default 1)\n"
	+ " -p: pin vcpu 'n' to host cpu 'pincpu + n'\n"
	+ " -s: device emulation config\n"
	+ " -h: help\n",
	+ progname);
	+
	+ exit(code);
	+}
	+
	+static int
	+pincpu_parse(const char *opt)
	+{
	+ int vcpu, pcpu;
	+
	+ if (sscanf(opt, "%d:%d", &vcpu, &pcpu) != 2) {
	+ fprintf(stderr, "invalid format: %s\n", opt);
	+ return (-1);
	+ }
	+
	+ if (vcpu < 0 \|\| vcpu >= VM_MAXCPU) {
	+ fprintf(stderr, "vcpu '%d' outside valid range from 0 to %d\n",
	+ vcpu, VM_MAXCPU - 1);
	+ return (-1);
	+ }
	+
	+ if (pcpu < 0 \|\| pcpu >= CPU_SETSIZE) {
	+ fprintf(stderr, "hostcpu '%d' outside valid range from "
	+ "0 to %d\n", pcpu, CPU_SETSIZE - 1);
	+ return (-1);
	+ }
	+
	+ if (vcpumap[vcpu] == NULL) {
	+ if ((vcpumap[vcpu] = malloc(sizeof(cpuset_t))) == NULL) {
	+ perror("malloc");
	+ return (-1);
	+ }
	+ CPU_ZERO(vcpumap[vcpu]);
	+ }
	+ CPU_SET(pcpu, vcpumap[vcpu]);
	+ return (0);
	+}
	+
	+void *
	+paddr_guest2host(struct vmctx *ctx, uintptr_t iaddr, size_t len)
	+{
	+
	+ return (vm_map_ipa(ctx, iaddr, len));
	+}
	+
	+int
	+fbsdrun_virtio_msix(void)
	+{
	+
	+ return 0;
	+}
	+
	+static void *
	+fbsdrun_start_thread(void *param)
	+{
	+ char tname[MAXCOMLEN + 1];
	+ struct mt_vmm_info *mtp;
	+ int vcpu;
	+
	+ mtp = param;
	+ vcpu = mtp->mt_vcpu;
	+
	+ snprintf(tname, sizeof(tname), "%s vcpu %d", vmname, vcpu);
	+ pthread_set_name_np(mtp->mt_thr, tname);
	+
	+ vm_loop(mtp->mt_ctx, vcpu, vmexit[vcpu].pc);
	+
	+ /* not reached */
	+ return (NULL);
	+}
	+
	+void
	+fbsdrun_addcpu(struct vmctx *ctx, int vcpu, uint64_t pc)
	+{
	+ int error;
	+
	+ if (cpumask & (1 << vcpu)) {
	+ fprintf(stderr, "addcpu: attempting to add existing cpu %d\n",
	+ vcpu);
	+ exit(4);
	+ }
	+
	+ cpumask \|= 1 << vcpu;
	+ foundcpus++;
	+
	+ /*
	+ * Set up the vmexit struct to allow execution to start
	+ * at the given RIP
	+ */
	+ vmexit[vcpu].pc = pc;
	+ vmexit[vcpu].inst_length = 0;
	+
	+ if (vcpu == BSP) {
	+ mt_vmm_info[vcpu].mt_ctx = ctx;
	+ mt_vmm_info[vcpu].mt_vcpu = vcpu;
	+
	+ error = pthread_create(&mt_vmm_info[vcpu].mt_thr, NULL,
	+ fbsdrun_start_thread, &mt_vmm_info[vcpu]);
	+ assert(error == 0);
	+ }
	+}
	+
	+static int
	+fbsdrun_get_next_cpu(int curcpu)
	+{
	+
	+ /*
	+ * Get the next available CPU. Assumes they arrive
	+ * in ascending order with no gaps.
	+ */
	+ return ((curcpu + 1) % foundcpus);
	+}
	+
	+static int
	+vmexit_hyp(struct vmctx ctx, struct vm_exit vmexit, int *pvcpu)
	+{
	+
	+ fprintf(stderr, "vm exit[%d]\n", *pvcpu);
	+ fprintf(stderr, "\treason\t\tHYP\n");
	+ fprintf(stderr, "\tpc\t\t0x%016lx\n", vmexit->pc);
	+ fprintf(stderr, "\tinst_length\t%d\n", vmexit->inst_length);
	+
	+ return (VMEXIT_ABORT);
	+}
	+
	+static int
	+vmexit_bogus(struct vmctx ctx, struct vm_exit vmexit, int *pvcpu)
	+{
	+
	+ stats.vmexit_bogus++;
	+
	+ return (VMEXIT_RESTART);
	+}
	+
	+static int
	+vmexit_inst_emul(struct vmctx ctx, struct vm_exit vmexit, int *pvcpu)
	+{
	+ int err;
	+ struct vie *vie;
	+
	+ stats.vmexit_inst_emul++;
	+
	+ vie = &vmexit->u.inst_emul.vie;
	+ err = emulate_mem(ctx, *pvcpu, vmexit->u.inst_emul.gpa, vie);
	+
	+ if (err) {
	+ if (err == ESRCH) {
	+ fprintf(stderr, "Unhandled memory access to 0x%lx\n",
	+ vmexit->u.inst_emul.gpa);
	+ }
	+
	+ fprintf(stderr, "Failed to emulate instruction at 0x%lx\n", vmexit->pc);
	+ return (VMEXIT_ABORT);
	+ }
	+ return (VMEXIT_CONTINUE);
	+}
	+
	+static int
	+vmexit_suspend(struct vmctx ctx, struct vm_exit vmexit, int *pvcpu)
	+{
	+ enum vm_suspend_how how;
	+
	+ how = vmexit->u.suspended.how;
	+
	+ switch (how) {
	+ case VM_SUSPEND_POWEROFF:
	+ exit(EXIT_POWEROFF);
	+ case VM_SUSPEND_RESET:
	+ exit(EXIT_REBOOT);
	+ case VM_SUSPEND_HALT:
	+ exit(EXIT_HALT);
	+ case VM_SUSPEND_TRIPLEFAULT:
	+ /* Not implemented yet. */
	+ exit(EXIT_ERROR);
	+ default:
	+ fprintf(stderr, "vmexit_suspend: invalid or unimplemented reason %d\n", how);
	+ exit(100);
	+ }
	+
	+}
	+
	+static vmexit_handler_t handler[VM_EXITCODE_MAX] = {
	+ [VM_EXITCODE_BOGUS] = vmexit_bogus,
	+ [VM_EXITCODE_INST_EMUL] = vmexit_inst_emul,
	+ [VM_EXITCODE_REG_EMUL] = vmexit_hyp,
	+ [VM_EXITCODE_SUSPENDED] = vmexit_suspend,
	+ [VM_EXITCODE_HYP] = vmexit_hyp,
	+};
	+
	+static void
	+vm_loop(struct vmctx *ctx, int vcpu, uint64_t pc)
	+{
	+ int error, rc, prevcpu;
	+ enum vm_exitcode exitcode;
	+
	+ if (vcpumap[vcpu] != NULL) {
	+ error = pthread_setaffinity_np(pthread_self(),
	+ sizeof(cpuset_t), vcpumap[vcpu]);
	+ assert(error == 0);
	+ }
	+
	+ while (1) {
	+
	+ error = vm_run(ctx, vcpu, pc, &vmexit[vcpu]);
	+
	+ if (error != 0) {
	+ /*
	+ * It is possible that 'vmmctl' or some other process
	+ * has transitioned the vcpu to CANNOT_RUN state right
	+ * before we tried to transition it to RUNNING.
	+ *
	+ * This is expected to be temporary so just retry.
	+ */
	+ if (errno == EBUSY)
	+ continue;
	+ else
	+ break;
	+ }
	+
	+ prevcpu = vcpu;
	+
	+ exitcode = vmexit[vcpu].exitcode;
	+ if (exitcode >= VM_EXITCODE_MAX \|\| handler[exitcode] == NULL) {
	+ fprintf(stderr, "vm_loop: unexpected exitcode 0x%x\n",
	+ exitcode);
	+ exit(4);
	+ }
	+
	+ rc = (*handler[exitcode])(ctx, &vmexit[vcpu], &vcpu);
	+
	+ switch (rc) {
	+ case VMEXIT_CONTINUE:
	+ pc = vmexit[vcpu].pc + vmexit[vcpu].inst_length;
	+ break;
	+ case VMEXIT_RESTART:
	+ pc = vmexit[vcpu].pc;
	+ break;
	+ case VMEXIT_RESET:
	+ exit(0);
	+ default:
	+ exit(4);
	+ }
	+ }
	+ fprintf(stderr, "vm_run error %d, errno %d\n", error, errno);
	+}
	+
	+static int
	+num_vcpus_allowed(struct vmctx *ctx)
	+{
	+ return (VM_MAXCPU);
	+}
	+
	+int
	+main(int argc, char *argv[])
	+{
	+ int c, error;
	+ bool bvmcons;
	+ int max_vcpus;
	+ struct vmctx *ctx;
	+ uint64_t pc;
	+ uint64_t memory_base_address, mem_size;
	+
	+ bvmcons = false;
	+ memory_base_address = VM_GUEST_BASE_IPA;
	+ mem_size = 128 * MB;
	+ progname = basename(argv[0]);
	+ guest_ncpus = 1;
	+
	+ while ((c = getopt(argc, argv, "bhp:c:s:e:m:")) != -1) {
	+ switch (c) {
	+ case 'b':
	+ bvmcons = true;
	+ break;
	+ case 'e':
	+ memory_base_address = strtoul(optarg, NULL, 0);
	+ break;
	+ case 'p':
	+ if (pincpu_parse(optarg) != 0) {
	+ errx(EX_USAGE, "invalid vcpu pinning "
	+ "configuration '%s'", optarg);
	+ }
	+ break;
	+ case 'c':
	+ guest_ncpus = atoi(optarg);
	+ break;
	+ case 'm':
	+ error = vm_parse_memsize(optarg, &mem_size);
	+ if (error) {
	+ fprintf(stderr, "Invalid memsize '%s'\n", optarg);
	+ exit(1);
	+ }
	+ break;
	+ case 's':
	+ if (mmio_parse_opts(optarg) != 0)
	+ exit(1);
	+ break;
	+ case 'h':
	+ usage(0);
	+ default:
	+ usage(4);
	+ }
	+ }
	+ argc -= optind;
	+ argv += optind;
	+
	+ if (argc != 1)
	+ usage(4);
	+
	+ vmname = argv[0];
	+
	+ /* The VM must be created by bhyveload first. */
	+ ctx = vm_open(vmname);
	+ if (ctx == NULL) {
	+ perror("vm_open");
	+ exit(1);
	+ }
	+
	+ max_vcpus = num_vcpus_allowed(ctx);
	+ if (guest_ncpus > max_vcpus) {
	+ fprintf(stderr, "%d vCPUs requested but only %d available\n",
	+ guest_ncpus, max_vcpus);
	+ exit(1);
	+ }
	+
	+ error = vm_setup_memory(ctx, memory_base_address, mem_size, VM_MMAP_ALL);
	+ if (error != 0) {
	+ fprintf(stderr, "Unable to setup memory (%d)\n", error);
	+ exit(1);
	+ }
	+
	+ init_mem();
	+ mmio_irq_init(ctx);
	+
	+ if (init_mmio(ctx) != 0) {
	+ fprintf(stderr, "Failed to initialize device emulation\n");
	+ exit(1);
	+ }
	+
	+ if (bvmcons)
	+ init_bvmcons();
	+
	+ error = vm_get_register(ctx, BSP, VM_REG_ELR_EL2, &pc);
	+ assert(error == 0);
	+ /*
	+ * Add CPU 0
	+ */
	+ fbsdrun_addcpu(ctx, BSP, pc);
	+
	+ /*
	+ * Head off to the main event dispatch loop
	+ */
	+ mevent_dispatch();
	+
	+ exit(1);
	+}
	Index: usr.sbin/bhyve/arm64/consport.c
	===================================================================
	--- /dev/null
	+++ usr.sbin/bhyve/arm64/consport.c
	@@ -0,0 +1,142 @@
	+/* * Copyright (C) 2015 Mihai Carabas <mihai.carabas@gmail.com> * All rights reserved.
	+ *
	+ * Redistribution and use in source and binary forms, with or without
	+ * modification, are permitted provided that the following conditions
	+ * are met:
	+ * 1. Redistributions of source code must retain the above copyright
	+ * notice, this list of conditions and the following disclaimer.
	+ * 2. Redistributions in binary form must reproduce the above copyright
	+ * notice, this list of conditions and the following disclaimer in the
	+ * documentation and/or other materials provided with the distribution.
	+ *
	+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
	+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
	+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	+ * SUCH DAMAGE.
	+ */
	+
	+#include <sys/types.h>
	+#include <sys/select.h>
	+
	+#include <stdio.h>
	+#include <stdlib.h>
	+#include <termios.h>
	+#include <unistd.h>
	+#include <stdbool.h>
	+
	+#include "mem.h"
	+
	+#define BVM_CONS_PORT 0x090000
	+#define BVM_CONS_SIG ('b' << 8 \| 'v')
	+
	+static struct termios tio_orig, tio_new;
	+
	+static void
	+ttyclose(void)
	+{
	+ tcsetattr(STDIN_FILENO, TCSANOW, &tio_orig);
	+}
	+
	+static void
	+ttyopen(void)
	+{
	+ tcgetattr(STDIN_FILENO, &tio_orig);
	+
	+ cfmakeraw(&tio_new);
	+ tcsetattr(STDIN_FILENO, TCSANOW, &tio_new);
	+
	+ atexit(ttyclose);
	+}
	+
	+static bool
	+tty_char_available(void)
	+{
	+ fd_set rfds;
	+ struct timeval tv;
	+
	+ FD_ZERO(&rfds);
	+ FD_SET(STDIN_FILENO, &rfds);
	+ tv.tv_sec = 0;
	+ tv.tv_usec = 0;
	+ if (select(STDIN_FILENO + 1, &rfds, NULL, NULL, &tv) > 0) {
	+ return (true);
	+ } else {
	+ return (false);
	+ }
	+}
	+
	+static int
	+ttyread(void)
	+{
	+ char rb;
	+
	+ if (tty_char_available()) {
	+ read(STDIN_FILENO, &rb, 1);
	+ return (rb & 0xff);
	+ } else {
	+ return (-1);
	+ }
	+}
	+
	+static void
	+ttywrite(unsigned char wb)
	+{
	+ (void) write(STDOUT_FILENO, &wb, 1);
	+}
	+
	+static int
	+console_handler(struct vmctx ctx, int vcpu, int dir, uint64_t addr, int size, uint64_t val, void *arg1, long arg2)
	+{
	+ static int opened;
	+
	+ if (size == 2 && dir == MEM_F_READ) {
	+ *val = BVM_CONS_SIG;
	+ return (0);
	+ }
	+
	+ /*
	+ * Guests might probe this port to look for old ISA devices
	+ * using single-byte reads. Return 0xff for those.
	+ */
	+ if (size == 1 && dir == MEM_F_READ) {
	+ *val = 0xff;
	+ return (0);
	+ }
	+
	+ if (size != 4)
	+ return (-1);
	+
	+ if (!opened) {
	+ ttyopen();
	+ opened = 1;
	+ }
	+
	+ if (dir == MEM_F_READ)
	+ *val = ttyread();
	+ else
	+ ttywrite(*val);
	+ return (0);
	+}
	+
	+struct mem_range consport ={
	+ "bvmcons",
	+ MEM_F_RW,
	+ console_handler,
	+ NULL,
	+ 0,
	+ BVM_CONS_PORT,
	+ sizeof(int)
	+};
	+
	+void
	+init_bvmcons(void)
	+{
	+ register_mem(&consport);
	+}
	Index: usr.sbin/bhyve/arm64/mem.h
	===================================================================
	--- /dev/null
	+++ usr.sbin/bhyve/arm64/mem.h
	@@ -0,0 +1,59 @@
	+/*-
	+ * Copyright (c) 2012 NetApp, Inc.
	+ * All rights reserved.
	+ *
	+ * Redistribution and use in source and binary forms, with or without
	+ * modification, are permitted provided that the following conditions
	+ * are met:
	+ * 1. Redistributions of source code must retain the above copyright
	+ * notice, this list of conditions and the following disclaimer.
	+ * 2. Redistributions in binary form must reproduce the above copyright
	+ * notice, this list of conditions and the following disclaimer in the
	+ * documentation and/or other materials provided with the distribution.
	+ *
	+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
	+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	+ * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
	+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	+ * SUCH DAMAGE.
	+ *
	+ * $FreeBSD: src/usr.sbin/bhyve/arm/mem.h 38 2017-06-13 13:34:14Z darius.mihai $
	+ */
	+
	+#ifndef _MEM_H_
	+#define _MEM_H_
	+
	+#include <sys/linker_set.h>
	+
	+struct vmctx;
	+
	+typedef int (mem_func_t)(struct vmctx ctx, int vcpu, int dir, uint64_t addr,
	+ int size, uint64_t val, void arg1, long arg2);
	+
	+struct mem_range {
	+ const char *name;
	+ int flags;
	+ mem_func_t handler;
	+ void *arg1;
	+ long arg2;
	+ uint64_t base;
	+ uint64_t size;
	+};
	+#define MEM_F_READ 0x1
	+#define MEM_F_WRITE 0x2
	+#define MEM_F_RW 0x3
	+
	+void init_mem(void);
	+int emulate_mem(struct vmctx , int vcpu, uint64_t paddr, void vie);
	+int register_mem(struct mem_range *memp);
	+int register_mem_fallback(struct mem_range *memp);
	+int unregister_mem(struct mem_range *memp);
	+
	+void init_bvmcons(void);
	+#endif /* _MEM_H_ */
	Index: usr.sbin/bhyve/arm64/mem.c
	===================================================================
	--- /dev/null
	+++ usr.sbin/bhyve/arm64/mem.c
	@@ -0,0 +1,271 @@
	+/*-
	+ * Copyright (c) 2012 NetApp, Inc.
	+ * All rights reserved.
	+ *
	+ * Redistribution and use in source and binary forms, with or without
	+ * modification, are permitted provided that the following conditions
	+ * are met:
	+ * 1. Redistributions of source code must retain the above copyright
	+ * notice, this list of conditions and the following disclaimer.
	+ * 2. Redistributions in binary form must reproduce the above copyright
	+ * notice, this list of conditions and the following disclaimer in the
	+ * documentation and/or other materials provided with the distribution.
	+ *
	+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
	+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	+ * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
	+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	+ * SUCH DAMAGE.
	+ *
	+ * $FreeBSD: src/usr.sbin/bhyve/arm/mem.c 4 2017-04-18 20:28:32Z mihai.carabas $
	+ */
	+
	+/*
	+ * Memory ranges are represented with an RB tree. On insertion, the range
	+ * is checked for overlaps. On lookup, the key has the same base and limit
	+ * so it can be searched within the range.
	+ */
	+
	+#include <sys/cdefs.h>
	+__FBSDID("$FreeBSD: src/usr.sbin/bhyve/arm/mem.c 4 2017-04-18 20:28:32Z mihai.carabas $");
	+
	+#include <sys/types.h>
	+#include <sys/tree.h>
	+#include <sys/errno.h>
	+#include <machine/vmm.h>
	+#include <machine/vmm_instruction_emul.h>
	+
	+#include <stdio.h>
	+#include <stdlib.h>
	+#include <assert.h>
	+#include <pthread.h>
	+
	+#include "mem.h"
	+
	+struct mmio_rb_range {
	+ RB_ENTRY(mmio_rb_range) mr_link; /* RB tree links */
	+ struct mem_range mr_param;
	+ uint64_t mr_base;
	+ uint64_t mr_end;
	+};
	+
	+struct mmio_rb_tree;
	+RB_PROTOTYPE(mmio_rb_tree, mmio_rb_range, mr_link, mmio_rb_range_compare);
	+
	+RB_HEAD(mmio_rb_tree, mmio_rb_range) mmio_rb_root, mmio_rb_fallback;
	+
	+/*
	+ * Per-vCPU cache. Since most accesses from a vCPU will be to
	+ * consecutive addresses in a range, it makes sense to cache the
	+ * result of a lookup.
	+ */
	+static struct mmio_rb_range *mmio_hint[VM_MAXCPU];
	+
	+static pthread_rwlock_t mmio_rwlock;
	+
	+static int
	+mmio_rb_range_compare(struct mmio_rb_range a, struct mmio_rb_range b)
	+{
	+ if (a->mr_end < b->mr_base)
	+ return (-1);
	+ else if (a->mr_base > b->mr_end)
	+ return (1);
	+ return (0);
	+}
	+
	+static int
	+mmio_rb_lookup(struct mmio_rb_tree *rbt, uint64_t addr,
	+ struct mmio_rb_range **entry)
	+{
	+ struct mmio_rb_range find, *res;
	+
	+ find.mr_base = find.mr_end = addr;
	+
	+ res = RB_FIND(mmio_rb_tree, rbt, &find);
	+
	+ if (res != NULL) {
	+ *entry = res;
	+ return (0);
	+ }
	+
	+ return (ENOENT);
	+}
	+
	+static int
	+mmio_rb_add(struct mmio_rb_tree rbt, struct mmio_rb_range new)
	+{
	+ struct mmio_rb_range *overlap;
	+
	+ overlap = RB_INSERT(mmio_rb_tree, rbt, new);
	+
	+ if (overlap != NULL) {
	+#ifdef RB_DEBUG
	+ printf("overlap detected: new %lx:%lx, tree %lx:%lx\n",
	+ new->mr_base, new->mr_end,
	+ overlap->mr_base, overlap->mr_end);
	+#endif
	+
	+ return (EEXIST);
	+ }
	+
	+ return (0);
	+}
	+
	+#if 0
	+static void
	+mmio_rb_dump(struct mmio_rb_tree *rbt)
	+{
	+ struct mmio_rb_range *np;
	+
	+ pthread_rwlock_rdlock(&mmio_rwlock);
	+ RB_FOREACH(np, mmio_rb_tree, rbt) {
	+ printf(" %lx:%lx, %s\n", np->mr_base, np->mr_end,
	+ np->mr_param.name);
	+ }
	+ pthread_rwlock_unlock(&mmio_rwlock);
	+}
	+#endif
	+
	+RB_GENERATE(mmio_rb_tree, mmio_rb_range, mr_link, mmio_rb_range_compare);
	+
	+static int
	+mem_read(void ctx, int vcpu, uint64_t gpa, uint64_t rval, int size, void *arg)
	+{
	+ int error;
	+ struct mem_range *mr = arg;
	+
	+ error = (*mr->handler)(ctx, vcpu, MEM_F_READ, gpa, size,
	+ rval, mr->arg1, mr->arg2);
	+ return (error);
	+}
	+
	+static int
	+mem_write(void ctx, int vcpu, uint64_t gpa, uint64_t wval, int size, void arg)
	+{
	+ int error;
	+ struct mem_range *mr = arg;
	+
	+ error = (*mr->handler)(ctx, vcpu, MEM_F_WRITE, gpa, size,
	+ &wval, mr->arg1, mr->arg2);
	+ return (error);
	+}
	+
	+int
	+emulate_mem(struct vmctx ctx, int vcpu, uint64_t paddr, void vie)
	+{
	+ struct mmio_rb_range *entry;
	+ int err;
	+
	+ pthread_rwlock_rdlock(&mmio_rwlock);
	+ /*
	+ * First check the per-vCPU cache
	+ */
	+ if (mmio_hint[vcpu] &&
	+ paddr >= mmio_hint[vcpu]->mr_base &&
	+ paddr <= mmio_hint[vcpu]->mr_end) {
	+ entry = mmio_hint[vcpu];
	+ } else
	+ entry = NULL;
	+
	+ if (entry == NULL) {
	+ if (mmio_rb_lookup(&mmio_rb_root, paddr, &entry) == 0) {
	+ /* Update the per-vCPU cache */
	+ mmio_hint[vcpu] = entry;
	+ } else if (mmio_rb_lookup(&mmio_rb_fallback, paddr, &entry)) {
	+ pthread_rwlock_unlock(&mmio_rwlock);
	+ return (ESRCH);
	+ }
	+ }
	+
	+ assert(entry != NULL);
	+ assert(NULL == NULL);
	+ err = vmm_emulate_instruction(ctx, vcpu, paddr, vie,
	+ mem_read, mem_write, &entry->mr_param);
	+
	+ pthread_rwlock_unlock(&mmio_rwlock);
	+
	+ return (err);
	+}
	+
	+static int
	+register_mem_int(struct mmio_rb_tree rbt, struct mem_range memp)
	+{
	+ struct mmio_rb_range entry, mrp;
	+ int err;
	+
	+ err = 0;
	+
	+ mrp = malloc(sizeof(struct mmio_rb_range));
	+
	+ if (mrp != NULL) {
	+ mrp->mr_param = *memp;
	+ mrp->mr_base = memp->base;
	+ mrp->mr_end = memp->base + memp->size - 1;
	+ pthread_rwlock_wrlock(&mmio_rwlock);
	+ if (mmio_rb_lookup(rbt, memp->base, &entry) != 0)
	+ err = mmio_rb_add(rbt, mrp);
	+ pthread_rwlock_unlock(&mmio_rwlock);
	+ if (err)
	+ free(mrp);
	+ } else
	+ err = ENOMEM;
	+
	+ return (err);
	+}
	+
	+int
	+register_mem(struct mem_range *memp)
	+{
	+
	+ return (register_mem_int(&mmio_rb_root, memp));
	+}
	+
	+int
	+register_mem_fallback(struct mem_range *memp)
	+{
	+
	+ return (register_mem_int(&mmio_rb_fallback, memp));
	+}
	+
	+int
	+unregister_mem(struct mem_range *memp)
	+{
	+ struct mem_range *mr;
	+ struct mmio_rb_range *entry = NULL;
	+ int err, i;
	+
	+ pthread_rwlock_wrlock(&mmio_rwlock);
	+ err = mmio_rb_lookup(&mmio_rb_root, memp->base, &entry);
	+ if (err == 0) {
	+ mr = &entry->mr_param;
	+ assert(mr->name == memp->name);
	+ assert(mr->base == memp->base && mr->size == memp->size);
	+ RB_REMOVE(mmio_rb_tree, &mmio_rb_root, entry);
	+
	+ /* flush Per-vCPU cache */
	+ for (i=0; i < VM_MAXCPU; i++) {
	+ if (mmio_hint[i] == entry)
	+ mmio_hint[i] = NULL;
	+ }
	+ }
	+ pthread_rwlock_unlock(&mmio_rwlock);
	+
	+ if (entry)
	+ free(entry);
	+
	+ return (err);
	+}
	+
	+void
	+init_mem(void)
	+{
	+ RB_INIT(&mmio_rb_root);
	+ RB_INIT(&mmio_rb_fallback);
	+ pthread_rwlock_init(&mmio_rwlock, NULL);
	+}
	Index: usr.sbin/bhyve/arm64/mevent_test.c
	===================================================================
	--- /dev/null
	+++ usr.sbin/bhyve/arm64/mevent_test.c
	@@ -0,0 +1,256 @@
	+/*-
	+ * Copyright (c) 2011 NetApp, Inc.
	+ * All rights reserved.
	+ *
	+ * Redistribution and use in source and binary forms, with or without
	+ * modification, are permitted provided that the following conditions
	+ * are met:
	+ * 1. Redistributions of source code must retain the above copyright
	+ * notice, this list of conditions and the following disclaimer.
	+ * 2. Redistributions in binary form must reproduce the above copyright
	+ * notice, this list of conditions and the following disclaimer in the
	+ * documentation and/or other materials provided with the distribution.
	+ *
	+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
	+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	+ * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
	+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	+ * SUCH DAMAGE.
	+ *
	+ * $FreeBSD: src/usr.sbin/bhyve/arm/mevent_test.c 4 2017-04-18 20:28:32Z mihai.carabas $
	+ */
	+
	+/*
	+ * Test program for the micro event library. Set up a simple TCP echo
	+ * service.
	+ *
	+ * cc mevent_test.c mevent.c -lpthread
	+ */
	+
	+#include <sys/types.h>
	+#include <sys/stdint.h>
	+#include <sys/sysctl.h>
	+#include <sys/socket.h>
	+#include <netinet/in.h>
	+#include <machine/cpufunc.h>
	+
	+#include <stdio.h>
	+#include <stdlib.h>
	+#include <pthread.h>
	+#include <unistd.h>
	+
	+#include "mevent.h"
	+
	+#define TEST_PORT 4321
	+
	+static pthread_mutex_t accept_mutex = PTHREAD_MUTEX_INITIALIZER;
	+static pthread_cond_t accept_condvar = PTHREAD_COND_INITIALIZER;
	+
	+static struct mevent *tevp;
	+
	+char *vmname = "test vm";
	+
	+
	+#define MEVENT_ECHO
	+
	+/* Number of timer events to capture */
	+#define TEVSZ 4096
	+uint64_t tevbuf[TEVSZ];
	+
	+static void
	+timer_print(void)
	+{
	+ uint64_t min, max, diff, sum, tsc_freq;
	+ size_t len;
	+ int j;
	+
	+ min = UINT64_MAX;
	+ max = 0;
	+ sum = 0;
	+
	+ len = sizeof(tsc_freq);
	+ sysctlbyname("machdep.tsc_freq", &tsc_freq, &len, NULL, 0);
	+
	+ for (j = 1; j < TEVSZ; j++) {
	+ /* Convert a tsc diff into microseconds */
	+ diff = (tevbuf[j] - tevbuf[j-1]) * 1000000 / tsc_freq;
	+ sum += diff;
	+ if (min > diff)
	+ min = diff;
	+ if (max < diff)
	+ max = diff;
	+ }
	+
	+ printf("timers done: usecs, min %ld, max %ld, mean %ld\n", min, max,
	+ sum/(TEVSZ - 1));
	+}
	+
	+static void
	+timer_callback(int fd, enum ev_type type, void *param)
	+{
	+ static int i;
	+
	+ if (i >= TEVSZ)
	+ abort();
	+
	+ tevbuf[i++] = rdtsc();
	+
	+ if (i == TEVSZ) {
	+ mevent_delete(tevp);
	+ timer_print();
	+ }
	+}
	+
	+
	+#ifdef MEVENT_ECHO
	+struct esync {
	+ pthread_mutex_t e_mt;
	+ pthread_cond_t e_cond;
	+};
	+
	+static void
	+echoer_callback(int fd, enum ev_type type, void *param)
	+{
	+ struct esync *sync = param;
	+
	+ pthread_mutex_lock(&sync->e_mt);
	+ pthread_cond_signal(&sync->e_cond);
	+ pthread_mutex_unlock(&sync->e_mt);
	+}
	+
	+static void *
	+echoer(void *param)
	+{
	+ struct esync sync;
	+ struct mevent *mev;
	+ char buf[128];
	+ int fd = (int)(uintptr_t) param;
	+ int len;
	+
	+ pthread_mutex_init(&sync.e_mt, NULL);
	+ pthread_cond_init(&sync.e_cond, NULL);
	+
	+ pthread_mutex_lock(&sync.e_mt);
	+
	+ mev = mevent_add(fd, EVF_READ, echoer_callback, &sync);
	+ if (mev == NULL) {
	+ printf("Could not allocate echoer event\n");
	+ exit(1);
	+ }
	+
	+ while (!pthread_cond_wait(&sync.e_cond, &sync.e_mt)) {
	+ len = read(fd, buf, sizeof(buf));
	+ if (len > 0) {
	+ write(fd, buf, len);
	+ write(0, buf, len);
	+ } else {
	+ break;
	+ }
	+ }
	+
	+ mevent_delete_close(mev);
	+
	+ pthread_mutex_unlock(&sync.e_mt);
	+ pthread_mutex_destroy(&sync.e_mt);
	+ pthread_cond_destroy(&sync.e_cond);
	+
	+ return (NULL);
	+}
	+
	+#else
	+
	+static void *
	+echoer(void *param)
	+{
	+ char buf[128];
	+ int fd = (int)(uintptr_t) param;
	+ int len;
	+
	+ while ((len = read(fd, buf, sizeof(buf))) > 0) {
	+ write(1, buf, len);
	+ }
	+
	+ return (NULL);
	+}
	+#endif /* MEVENT_ECHO */
	+
	+static void
	+acceptor_callback(int fd, enum ev_type type, void *param)
	+{
	+ pthread_mutex_lock(&accept_mutex);
	+ pthread_cond_signal(&accept_condvar);
	+ pthread_mutex_unlock(&accept_mutex);
	+}
	+
	+static void *
	+acceptor(void *param)
	+{
	+ struct sockaddr_in sin;
	+ pthread_t tid;
	+ int news;
	+ int s;
	+ static int first;
	+
	+ if ((s = socket(AF_INET, SOCK_STREAM, 0)) < 0) {
	+ perror("socket");
	+ exit(1);
	+ }
	+
	+ sin.sin_len = sizeof(sin);
	+ sin.sin_family = AF_INET;
	+ sin.sin_addr.s_addr = htonl(INADDR_ANY);
	+ sin.sin_port = htons(TEST_PORT);
	+
	+ if (bind(s, (struct sockaddr *)&sin, sizeof(sin)) < 0) {
	+ perror("bind");
	+ exit(1);
	+ }
	+
	+ if (listen(s, 1) < 0) {
	+ perror("listen");
	+ exit(1);
	+ }
	+
	+ (void) mevent_add(s, EVF_READ, acceptor_callback, NULL);
	+
	+ pthread_mutex_lock(&accept_mutex);
	+
	+ while (!pthread_cond_wait(&accept_condvar, &accept_mutex)) {
	+ news = accept(s, NULL, NULL);
	+ if (news < 0) {
	+ perror("accept error");
	+ } else {
	+ static int first = 1;
	+
	+ if (first) {
	+ /*
	+ * Start a timer
	+ */
	+ first = 0;
	+ tevp = mevent_add(1, EVF_TIMER, timer_callback,
	+ NULL);
	+ }
	+
	+ printf("incoming connection, spawning thread\n");
	+ pthread_create(&tid, NULL, echoer,
	+ (void *)(uintptr_t)news);
	+ }
	+ }
	+
	+ return (NULL);
	+}
	+
	+main()
	+{
	+ pthread_t tid;
	+
	+ pthread_create(&tid, NULL, acceptor, NULL);
	+
	+ mevent_dispatch();
	+}
	Index: usr.sbin/bhyve/arm64/reset.h
	===================================================================
	--- /dev/null
	+++ usr.sbin/bhyve/arm64/reset.h
	@@ -0,0 +1,12 @@
	+#ifndef _RESET_H_
	+#define _RESET_H_
	+
	+#define RESET_MAGIC 0xDEAD9731
	+
	+#endif /* _RESET_H_ */
	+#ifndef _RESET_H_
	+#define _RESET_H_
	+
	+#define RESET_MAGIC 0xDEAD9731
	+
	+#endif /* _RESET_H_ */
	Index: usr.sbin/bhyve/arm64/reset.c
	===================================================================
	--- /dev/null
	+++ usr.sbin/bhyve/arm64/reset.c
	@@ -0,0 +1,32 @@
	+#include <stdio.h>
	+#include <sys/types.h>
	+
	+#include "mem.h"
	+#include "reset.h"
	+#include "vmmapi.h"
	+
	+#define RESET_PORT 0x1c090100
	+
	+static int
	+reset_handler(struct vmctx ctx, int vcpu, int dir, uint64_t addr, int size, uint64_t val, void *arg1, long arg2)
	+{
	+ vm_destroy(ctx);
	+
	+ return (RESET_MAGIC);
	+}
	+
	+struct mem_range resetport ={
	+ "reset",
	+ 0,
	+ reset_handler,
	+ NULL,
	+ 0,
	+ RESET_PORT,
	+ sizeof(int)
	+};
	+
	+void
	+init_reset(void)
	+{
	+ register_mem(&resetport);
	+}
	Index: usr.sbin/bhyve/block_if.c
	===================================================================
	--- usr.sbin/bhyve/block_if.c
	+++ usr.sbin/bhyve/block_if.c
	@@ -58,7 +58,10 @@
	#include <unistd.h>

	#include <machine/atomic.h>
	+
	+#ifdef BHYVE_SNAPSHOT
	#include <machine/vmm_snapshot.h>
	+#endif

	#include "bhyverun.h"
	#include "debug.h"
	Index: usr.sbin/bhyve/consport.c
	===================================================================
	--- /dev/null
	+++ usr.sbin/bhyve/consport.c
	@@ -0,0 +1,178 @@
	+/*-
	+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
	+ *
	+ * Copyright (c) 2011 NetApp, Inc.
	+ * All rights reserved.
	+ *
	+ * Redistribution and use in source and binary forms, with or without
	+ * modification, are permitted provided that the following conditions
	+ * are met:
	+ * 1. Redistributions of source code must retain the above copyright
	+ * notice, this list of conditions and the following disclaimer.
	+ * 2. Redistributions in binary form must reproduce the above copyright
	+ * notice, this list of conditions and the following disclaimer in the
	+ * documentation and/or other materials provided with the distribution.
	+ *
	+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
	+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	+ * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
	+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	+ * SUCH DAMAGE.
	+ *
	+ * $FreeBSD$
	+ */
	+
	+#include <sys/cdefs.h>
	+__FBSDID("$FreeBSD$");
	+
	+#include <sys/types.h>
	+#ifndef WITHOUT_CAPSICUM
	+#include <sys/capsicum.h>
	+#endif
	+#include <sys/select.h>
	+
	+#ifndef WITHOUT_CAPSICUM
	+#include <capsicum_helpers.h>
	+#endif
	+#include <err.h>
	+#include <errno.h>
	+#include <stdio.h>
	+#include <stdlib.h>
	+#include <termios.h>
	+#include <unistd.h>
	+#include <stdbool.h>
	+#include <sysexits.h>
	+
	+#include "inout.h"
	+#include "pci_lpc.h"
	+#include "debug.h"
	+
	+#define BVM_CONSOLE_PORT 0x220
	+#define BVM_CONS_SIG ('b' << 8 \| 'v')
	+
	+static struct termios tio_orig, tio_new;
	+
	+static void
	+ttyclose(void)
	+{
	+ tcsetattr(STDIN_FILENO, TCSANOW, &tio_orig);
	+}
	+
	+static void
	+ttyopen(void)
	+{
	+ tcgetattr(STDIN_FILENO, &tio_orig);
	+
	+ cfmakeraw(&tio_new);
	+ tcsetattr(STDIN_FILENO, TCSANOW, &tio_new);
	+ raw_stdio = 1;
	+
	+ atexit(ttyclose);
	+}
	+
	+static bool
	+tty_char_available(void)
	+{
	+ fd_set rfds;
	+ struct timeval tv;
	+
	+ FD_ZERO(&rfds);
	+ FD_SET(STDIN_FILENO, &rfds);
	+ tv.tv_sec = 0;
	+ tv.tv_usec = 0;
	+ if (select(STDIN_FILENO + 1, &rfds, NULL, NULL, &tv) > 0) {
	+ return (true);
	+ } else {
	+ return (false);
	+ }
	+}
	+
	+static int
	+ttyread(void)
	+{
	+ char rb;
	+
	+ if (tty_char_available()) {
	+ read(STDIN_FILENO, &rb, 1);
	+ return (rb & 0xff);
	+ } else {
	+ return (-1);
	+ }
	+}
	+
	+static void
	+ttywrite(unsigned char wb)
	+{
	+ (void) write(STDOUT_FILENO, &wb, 1);
	+}
	+
	+static int
	+console_handler(struct vmctx *ctx, int vcpu, int in, int port, int bytes,
	+ uint32_t eax, void arg)
	+{
	+ static int opened;
	+#ifndef WITHOUT_CAPSICUM
	+ cap_rights_t rights;
	+ cap_ioctl_t cmds[] = { TIOCGETA, TIOCSETA, TIOCGWINSZ };
	+#endif
	+
	+ if (bytes == 2 && in) {
	+ *eax = BVM_CONS_SIG;
	+ return (0);
	+ }
	+
	+ /*
	+ * Guests might probe this port to look for old ISA devices
	+ * using single-byte reads. Return 0xff for those.
	+ */
	+ if (bytes == 1 && in) {
	+ *eax = 0xff;
	+ return (0);
	+ }
	+
	+ if (bytes != 4)
	+ return (-1);
	+
	+ if (!opened) {
	+#ifndef WITHOUT_CAPSICUM
	+ cap_rights_init(&rights, CAP_EVENT, CAP_IOCTL, CAP_READ,
	+ CAP_WRITE);
	+ if (caph_rights_limit(STDIN_FILENO, &rights) == -1)
	+ errx(EX_OSERR, "Unable to apply rights for sandbox");
	+ if (caph_ioctls_limit(STDIN_FILENO, cmds, nitems(cmds)) == -1)
	+ errx(EX_OSERR, "Unable to apply rights for sandbox");
	+#endif
	+ ttyopen();
	+ opened = 1;
	+ }
	+
	+ if (in)
	+ *eax = ttyread();
	+ else
	+ ttywrite(*eax);
	+
	+ return (0);
	+}
	+
	+SYSRES_IO(BVM_CONSOLE_PORT, 4);
	+
	+static struct inout_port consport = {
	+ "bvmcons",
	+ BVM_CONSOLE_PORT,
	+ 1,
	+ IOPORT_F_INOUT,
	+ console_handler
	+};
	+
	+void
	+init_bvmcons(void)
	+{
	+
	+ register_inout(&consport);
	+}
	Index: usr.sbin/bhyve/dbgport.h
	===================================================================
	--- /dev/null
	+++ usr.sbin/bhyve/dbgport.h
	@@ -0,0 +1,36 @@
	+/*-
	+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
	+ *
	+ * Copyright (c) 2011 NetApp, Inc.
	+ * All rights reserved.
	+ *
	+ * Redistribution and use in source and binary forms, with or without
	+ * modification, are permitted provided that the following conditions
	+ * are met:
	+ * 1. Redistributions of source code must retain the above copyright
	+ * notice, this list of conditions and the following disclaimer.
	+ * 2. Redistributions in binary form must reproduce the above copyright
	+ * notice, this list of conditions and the following disclaimer in the
	+ * documentation and/or other materials provided with the distribution.
	+ *
	+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
	+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	+ * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
	+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	+ * SUCH DAMAGE.
	+ *
	+ * $FreeBSD$
	+ */
	+
	+#ifndef _DBGPORT_H_
	+#define _DBGPORT_H_
	+
	+void init_dbgport(int port);
	+
	+#endif
	Index: usr.sbin/bhyve/dbgport.c
	===================================================================
	--- /dev/null
	+++ usr.sbin/bhyve/dbgport.c
	@@ -0,0 +1,178 @@
	+/*-
	+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
	+ *
	+ * Copyright (c) 2011 NetApp, Inc.
	+ * All rights reserved.
	+ *
	+ * Redistribution and use in source and binary forms, with or without
	+ * modification, are permitted provided that the following conditions
	+ * are met:
	+ * 1. Redistributions of source code must retain the above copyright
	+ * notice, this list of conditions and the following disclaimer.
	+ * 2. Redistributions in binary form must reproduce the above copyright
	+ * notice, this list of conditions and the following disclaimer in the
	+ * documentation and/or other materials provided with the distribution.
	+ *
	+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
	+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	+ * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
	+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	+ * SUCH DAMAGE.
	+ *
	+ * $FreeBSD$
	+ */
	+
	+#include <sys/cdefs.h>
	+__FBSDID("$FreeBSD$");
	+
	+#include <sys/types.h>
	+#ifndef WITHOUT_CAPSICUM
	+#include <sys/capsicum.h>
	+#endif
	+#include <sys/socket.h>
	+#include <netinet/in.h>
	+#include <netinet/tcp.h>
	+#include <sys/uio.h>
	+
	+#ifndef WITHOUT_CAPSICUM
	+#include <capsicum_helpers.h>
	+#endif
	+#include <err.h>
	+#include <stdio.h>
	+#include <stdlib.h>
	+#include <sysexits.h>
	+#include <fcntl.h>
	+#include <unistd.h>
	+#include <errno.h>
	+
	+#include "inout.h"
	+#include "dbgport.h"
	+#include "pci_lpc.h"
	+
	+#define BVM_DBG_PORT 0x224
	+#define BVM_DBG_SIG ('B' << 8 \| 'V')
	+
	+static int listen_fd, conn_fd;
	+
	+static struct sockaddr_in sin;
	+
	+static int
	+dbg_handler(struct vmctx *ctx, int vcpu, int in, int port, int bytes,
	+ uint32_t eax, void arg)
	+{
	+ int nwritten, nread, printonce;
	+ int on = 1;
	+ char ch;
	+
	+ if (bytes == 2 && in) {
	+ *eax = BVM_DBG_SIG;
	+ return (0);
	+ }
	+
	+ if (bytes != 4)
	+ return (-1);
	+
	+again:
	+ printonce = 0;
	+ while (conn_fd < 0) {
	+ if (!printonce) {
	+ printf("Waiting for connection from gdb\r\n");
	+ printonce = 1;
	+ }
	+ conn_fd = accept4(listen_fd, NULL, NULL, SOCK_NONBLOCK);
	+ if (conn_fd >= 0) {
	+ /* Avoid EPIPE after the client drops off. */
	+ (void)setsockopt(conn_fd, SOL_SOCKET, SO_NOSIGPIPE,
	+ &on, sizeof(on));
	+ /* Improve latency for one byte at a time tranfers. */
	+ (void)setsockopt(conn_fd, IPPROTO_TCP, TCP_NODELAY,
	+ &on, sizeof(on));
	+ } else if (errno != EINTR) {
	+ perror("accept");
	+ }
	+ }
	+
	+ if (in) {
	+ nread = read(conn_fd, &ch, 1);
	+ if (nread == -1 && errno == EAGAIN)
	+ *eax = -1;
	+ else if (nread == 1)
	+ *eax = ch;
	+ else {
	+ close(conn_fd);
	+ conn_fd = -1;
	+ goto again;
	+ }
	+ } else {
	+ ch = *eax;
	+ nwritten = write(conn_fd, &ch, 1);
	+ if (nwritten != 1) {
	+ close(conn_fd);
	+ conn_fd = -1;
	+ goto again;
	+ }
	+ }
	+ return (0);
	+}
	+
	+static struct inout_port dbgport = {
	+ "bvmdbg",
	+ BVM_DBG_PORT,
	+ 1,
	+ IOPORT_F_INOUT,
	+ dbg_handler
	+};
	+
	+SYSRES_IO(BVM_DBG_PORT, 4);
	+
	+void
	+init_dbgport(int sport)
	+{
	+ int reuse;
	+#ifndef WITHOUT_CAPSICUM
	+ cap_rights_t rights;
	+#endif
	+
	+ conn_fd = -1;
	+
	+ if ((listen_fd = socket(AF_INET, SOCK_STREAM, 0)) < 0) {
	+ perror("cannot create socket");
	+ exit(4);
	+ }
	+
	+ sin.sin_len = sizeof(sin);
	+ sin.sin_family = AF_INET;
	+ sin.sin_addr.s_addr = htonl(INADDR_ANY);
	+ sin.sin_port = htons(sport);
	+
	+ reuse = 1;
	+ if (setsockopt(listen_fd, SOL_SOCKET, SO_REUSEADDR, &reuse,
	+ sizeof(reuse)) < 0) {
	+ perror("cannot set socket options");
	+ exit(4);
	+ }
	+
	+ if (bind(listen_fd, (struct sockaddr *)&sin, sizeof(sin)) < 0) {
	+ perror("cannot bind socket");
	+ exit(4);
	+ }
	+
	+ if (listen(listen_fd, 1) < 0) {
	+ perror("cannot listen socket");
	+ exit(4);
	+ }
	+
	+#ifndef WITHOUT_CAPSICUM
	+ cap_rights_init(&rights, CAP_ACCEPT, CAP_READ, CAP_WRITE);
	+ if (caph_rights_limit(listen_fd, &rights) == -1)
	+ errx(EX_OSERR, "Unable to apply rights for sandbox");
	+#endif
	+
	+ register_inout(&dbgport);
	+}
	Index: usr.sbin/bhyve/mmio/Makefile.inc
	===================================================================
	--- /dev/null
	+++ usr.sbin/bhyve/mmio/Makefile.inc
	@@ -0,0 +1,17 @@
	+#
	+# $FreeBSD$
	+#
	+
	+.PATH: ${BHYVE_SRCTOP}/mmio/
	+SRCS+= \
	+ mmio/mmio_virtio_block.c \
	+ mmio/mmio_virtio_console.c \
	+ mmio/mmio_virtio_net.c \
	+ mmio/mmio_virtio_rnd.c \
	+ mmio/mmio_emul.c \
	+ mmio/mmio_irq.c \
	+ mmio/net_utils.c \
	+ mmio/mmio_virtio.c
	+
	+
	+CFLAGS+= -I${BHYVE_SRCTOP}/mmio
	Index: usr.sbin/bhyve/mmio/mmio_emul.h
	===================================================================
	--- /dev/null
	+++ usr.sbin/bhyve/mmio/mmio_emul.h
	@@ -0,0 +1,116 @@
	+#ifndef _EMUL_H_
	+#define _EMUL_H_
	+
	+#include <sys/types.h>
	+
	+#include <assert.h>
	+
	+struct vmctx;
	+struct mmio_devinst;
	+
	+// TODO suggestive naming
	+struct mmio_devemu {
	+ char de_emu; / Device emulation name */
	+
	+ /* Instance creation */
	+ int (de_init)(struct vmctx ctx, struct mmio_devinst *di,
	+ char *opts);
	+
	+ /* Read / Write callbacks */
	+ void (de_write)(struct vmctx ctx, int vcpu,
	+ struct mmio_devinst *di, int baridx,
	+ uint64_t offset, int size, uint64_t val);
	+
	+ uint64_t (de_read)(struct vmctx ctx, int vcpu,
	+ struct mmio_devinst *di, int baridx,
	+ uint64_t offset, int size);
	+};
	+
	+#define MMIO_EMUL_SET(x) DATA_SET(mmio_set, x);
	+#define DI_NAMESZ 40
	+#define MMIO_REGMAX 0xff
	+#define MMIO_REGNUM (MMIO_REGMAX + 1)
	+
	+struct devinst_addr {
	+ uint64_t baddr;
	+ uint64_t size;
	+};
	+
	+enum lintr_stat {
	+ IDLE,
	+ ASSERTED,
	+ PENDING
	+};
	+
	+// TODO suggestive naming
	+struct mmio_devinst {
	+ struct mmio_devemu pi_d; / Back ref to device */
	+ struct vmctx pi_vmctx; / Owner VM context */
	+ /* unused for mmio device emulation; may be used as uniquifiers */
	+ int pi_slot, di_func;
	+
	+ char pi_name[DI_NAMESZ]; /* Instance name */
	+
	+ struct {
	+ enum lintr_stat state;
	+ int64_t irq;
	+ pthread_mutex_t lock;
	+ } di_lintr;
	+
	+ void pi_arg; / Private data */
	+
	+ u_char pi_cfgregs[MMIO_REGNUM];/* Config regsters */
	+
	+ struct devinst_addr addr; /* Address info */
	+};
	+
	+int mmio_parse_opts(const char *args);
	+int mmio_alloc_mem(struct mmio_devinst *di);
	+int init_mmio(struct vmctx *ctx);
	+void mmio_lintr_request(struct mmio_devinst *di);
	+void mmio_lintr_assert(struct mmio_devinst *di);
	+void mmio_lintr_deassert(struct mmio_devinst *di);
	+
	+static __inline void
	+mmio_set_cfgreg8(struct mmio_devinst *di, size_t offset, uint32_t val)
	+{
	+ assert(offset <= MMIO_REGMAX);
	+ (uint32_t )(di->pi_cfgregs + offset) = val;
	+}
	+
	+static __inline void
	+mmio_set_cfgreg16(struct mmio_devinst *di, size_t offset, uint32_t val)
	+{
	+ assert(offset <= (MMIO_REGMAX - 1) && (offset & 1) == 0);
	+ (uint32_t )(di->pi_cfgregs + offset) = val;
	+}
	+
	+static __inline void
	+mmio_set_cfgreg32(struct mmio_devinst *di, size_t offset, uint32_t val)
	+{
	+ assert(offset <= (MMIO_REGMAX - 3) && (offset & 3) == 0);
	+ (uint32_t )(di->pi_cfgregs + offset) = val;
	+}
	+
	+static __inline uint8_t
	+mmio_get_cfgreg8(struct mmio_devinst *di, size_t offset)
	+{
	+ assert(offset <= MMIO_REGMAX);
	+ return ((uint32_t )(di->pi_cfgregs + offset));
	+}
	+
	+static __inline uint16_t
	+mmio_get_cfgreg16(struct mmio_devinst *di, size_t offset)
	+{
	+ assert(offset <= (MMIO_REGMAX - 1) && (offset & 1) == 0);
	+ return ((uint32_t )(di->pi_cfgregs + offset));
	+}
	+
	+static __inline uint32_t
	+mmio_get_cfgreg32(struct mmio_devinst *di, size_t offset)
	+{
	+ assert(offset <= (MMIO_REGMAX - 3) && (offset & 3) == 0);
	+ return ((uint32_t )(di->pi_cfgregs + offset));
	+}
	+
	+#endif /* _EMUL_H_ */
	Index: usr.sbin/bhyve/mmio/mmio_emul.c
	===================================================================
	--- /dev/null
	+++ usr.sbin/bhyve/mmio/mmio_emul.c
	@@ -0,0 +1,440 @@
	+#include <sys/cdefs.h>
	+__FBSDID("$FreeBSD$");
	+
	+#include <sys/linker_set.h>
	+#include <sys/param.h>
	+#include <sys/types.h>
	+
	+#include <errno.h>
	+#include <pthread.h>
	+#include <stdio.h>
	+#include <stdlib.h>
	+#include <string.h>
	+
	+#include "arm64/mem.h"
	+#include "mmio_emul.h"
	+#include "mmio_irq.h"
	+
	+#define DEVEMU_MEMLIMIT 0xFD00000000UL
	+#define DEVEMU_MEMBASE 0xD000000000UL
	+#define MEM_ROUNDUP (1 << 20)
	+#ifndef max
	+# define max(A, B) ((A) > (B) ? (A) : (B))
	+#endif
	+
	+static uint64_t mmio_membase;
	+
	+SET_DECLARE(mmio_set, struct mmio_devemu);
	+
	+static struct mmio_devemu mmio_finddef(const char name);
	+static void mmio_lintr_route(struct mmio_devinst *di);
	+static void mmio_lintr_update(struct mmio_devinst *di);
	+
	+static struct mmio_emul_info {
	+ uint64_t size; /* address size */
	+ uint64_t baddr; /* address */
	+ int64_t irq; /* device interrupt number */
	+ char name; / device name */
	+ char arg; / device arguments */
	+ struct mmio_emul_info next; / pointer for linked list */
	+ struct mmio_devinst di; / pointer to device instance */
	+} *mmio_emul_info_head = NULL;
	+
	+/*
	+ * MMIO options are in the form:
	+ *
	+ * <size>@<base_addr>#<irq>:<emul>[,<config>]
	+ *
	+ * - size is the number of bytes required for the device mmio
	+ * - base_addr is the base address for the MMIO mapped device;
	+ * - irq specifies the device interrupt number the value MUST be a DECIMAL
	+ * integer; if the device does not use interrupts, use -1
	+ * - emul is a string describing the type of device - e.g., virtio-net;
	+ * - config is an optional string, depending on the device, that is used
	+ * for configuration
	+ *
	+ * Examples of use:
	+ * 0x200@0x100000#25:virtio-net,tap0
	+ * 0x100@0x200000#-1:dummy
	+ */
	+static void
	+mmio_parse_opts_usage(const char *args)
	+{
	+ fprintf(stderr, "Invalid mmio arguments \"%s\"\r\n", args);
	+}
	+
	+/*
	+ * checks if two memory regions overlap
	+ * checks are not required if one of the pointers is null
	+ */
	+static int
	+mmio_mem_overlap(uint64_t pa, uint64_t sa, uint64_t pb, uint64_t sb)
	+{
	+#define IN_INTERVAL(lower, value, upper) \
	+ (((lower) < (value)) && ((value) < (upper)))
	+
	+ if ((pa == 0) \|\| (pb == 0))
	+ return 0;
	+
	+ if (IN_INTERVAL(pa, pb, pa + sa) &&
	+ IN_INTERVAL(pb, pa, pb + sb))
	+ return 1;
	+
	+ return 0;
	+
	+#undef IN_INTERVAL
	+}
	+
	+int
	+mmio_parse_opts(const char *args)
	+{
	+ char emul, config, *str;
	+ uint64_t size, baddr;
	+ int64_t irq;
	+ int error;
	+ struct mmio_emul_info *dif;
	+
	+ error = -1;
	+ emul = config = NULL;
	+ baddr = 0, size = 0;
	+ str = strdup(args);
	+
	+ if ((emul = strchr(str, ':')) != NULL) {
	+ *emul++ = '\0';
	+
	+ /* <size>@<base-addr>#<irq> */
	+ if (sscanf(str, "%jx@%jx#%jd", &size, &baddr, &irq) != 3 &&
	+ sscanf(str, "%jx@%jx#%jd", &size, &baddr, &irq) != 3) {
	+ mmio_parse_opts_usage(str);
	+ goto parse_error;
	+ }
	+ } else {
	+ mmio_parse_opts_usage(str);
	+ goto parse_error;
	+ }
	+
	+ if ((config = strchr(emul, ',')) != NULL)
	+ *config++ = '\0';
	+
	+ /*
	+ * check if the required address can be obtained;
	+ * if an address has not been requested, ignore the checks
	+ * (however, an address will have to be later identified)
	+ */
	+ if (baddr != 0) {
	+ for (dif = mmio_emul_info_head; dif != NULL; dif = dif->next)
	+ if (mmio_mem_overlap(dif->baddr, dif->size,
	+ baddr, size))
	+ break;
	+
	+ if (dif != NULL) {
	+ fprintf(stderr, "The requested address 0x%jx is "
	+ "already bound or overlapping\r\n", baddr);
	+ error = EINVAL;
	+ goto parse_error;
	+ }
	+ }
	+
	+ dif = calloc(1, sizeof(struct mmio_emul_info));
	+ if (dif == NULL) {
	+ error = ENOMEM;
	+ goto parse_error;
	+ }
	+
	+ dif->next = mmio_emul_info_head;
	+ mmio_emul_info_head = dif;
	+
	+ dif->size = size;
	+ dif->baddr = baddr;
	+ dif->irq = irq;
	+ if ((emul != NULL) && (strlen(emul)) > 0)
	+ dif->name = strdup(emul);
	+ else
	+ dif->name = NULL;
	+ if ((config != NULL) && (strlen(config)) > 0)
	+ dif->arg = strdup(config);
	+ else
	+ dif->arg = NULL;
	+
	+ error = 0;
	+
	+parse_error:
	+ free(str);
	+
	+ return error;
	+}
	+
	+static int
	+mmio_mem_handler(struct vmctx *ctx, int vcpu, int dir, uint64_t addr,
	+ int size, uint64_t val, void arg1, long arg2)
	+{
	+ struct mmio_devinst *di = arg1;
	+ struct mmio_devemu *de = di->pi_d;
	+ uint64_t offset;
	+ int bidx = (int) arg2;
	+
	+ assert(di->addr.baddr <= addr &&
	+ addr + size <= di->addr.baddr + di->addr.size);
	+
	+ offset = addr - di->addr.baddr;
	+
	+ if (dir == MEM_F_WRITE) {
	+ if (size == 8) {
	+ (*de->de_write)(ctx, vcpu, di, bidx, offset,
	+ 4, *val & 0xffffffff);
	+ (*de->de_write)(ctx, vcpu, di, bidx, offset + 4,
	+ 4, *val >> 32);
	+ } else {
	+ (*de->de_write)(ctx, vcpu, di, bidx, offset,
	+ size, *val);
	+ }
	+ } else {
	+ if (size == 8) {
	+ val = (de->de_read)(ctx, vcpu, di, bidx,
	+ offset, 4);
	+ val \|= (de->de_read)(ctx, vcpu, di, bidx,
	+ offset + 4, 4) << 32;
	+ } else {
	+ val = (de->de_read)(ctx, vcpu, di, bidx,
	+ offset, size);
	+ }
	+ }
	+
	+ return (0);
	+}
	+
	+static void
	+modify_mmio_registration(struct mmio_devinst *di, int registration)
	+{
	+ int error;
	+ struct mem_range mr;
	+
	+ bzero(&mr, sizeof(struct mem_range));
	+ mr.name = di->pi_name;
	+ mr.base = di->addr.baddr;
	+ mr.size = di->addr.size;
	+ if (registration) {
	+ mr.flags = MEM_F_RW;
	+ mr.handler = mmio_mem_handler;
	+ mr.arg1 = di;
	+ mr.arg2 = 0;
	+ error = register_mem(&mr);
	+ } else {
	+ error = unregister_mem(&mr);
	+ }
	+
	+ assert(error == 0);
	+}
	+
	+static void
	+register_mmio(struct mmio_devinst *di)
	+{
	+ return modify_mmio_registration(di, 1);
	+}
	+
	+static void
	+unregister_mmio(struct mmio_devinst *di)
	+{
	+ return modify_mmio_registration(di, 0);
	+}
	+
	+/*
	+ * Update the MMIO address that is decoded
	+ */
	+static void
	+update_mem_address(struct mmio_devinst *di, uint64_t addr)
	+{
	+ /* TODO: check if the decoding is running */
	+ unregister_mmio(di);
	+
	+ di->addr.baddr = addr;
	+
	+ register_mmio(di);
	+}
	+
	+static int
	+mmio_alloc_resource(uint64_t *baseptr, uint64_t limit, uint64_t size,
	+ uint64_t *addr)
	+{
	+ uint64_t base;
	+
	+ assert((size & (size - 1)) == 0); /* must be a power of 2 */
	+
	+ base = roundup2(*baseptr, size);
	+
	+ if (base + size <= limit) {
	+ *addr = base;
	+ *baseptr = base + size;
	+ return (0);
	+ } else
	+ return (-1);
	+}
	+
	+int
	+mmio_alloc_mem(struct mmio_devinst *di)
	+{
	+ int error;
	+ uint64_t *baseptr, limit, addr, size;
	+
	+ baseptr = &di->addr.baddr;
	+ size = di->addr.size;
	+ limit = DEVEMU_MEMLIMIT;
	+
	+ if ((size & (size - 1)) != 0)
	+ /* Round up to a power of 2 */
	+ size = 1UL << flsl(size);
	+
	+ error = mmio_alloc_resource(baseptr, limit, size, &addr);
	+ if (error != 0)
	+ return (error);
	+
	+ di->addr.baddr = addr;
	+
	+ register_mmio(di);
	+
	+ return (0);
	+}
	+
	+static struct mmio_devemu *
	+mmio_finddev(char *name)
	+{
	+ struct mmio_devemu *dpp, dp;
	+
	+ SET_FOREACH(dpp, mmio_set) {
	+ dp = *dpp;
	+ if (!strcmp(dp->de_emu, name))
	+ return (dp);
	+ }
	+
	+ return (NULL);
	+}
	+
	+static int
	+mmio_init(struct vmctx ctx, struct mmio_devemu de, struct mmio_emul_info *dif)
	+{
	+ struct mmio_devinst *di;
	+ int error;
	+
	+ di = calloc(1, sizeof(struct mmio_devinst));
	+ if (di == NULL)
	+ return (ENOMEM);
	+
	+ di->pi_d = de;
	+ di->pi_vmctx = ctx;
	+ snprintf(di->pi_name, DI_NAMESZ, "%s-mmio", de->de_emu);
	+ di->di_lintr.state = IDLE;
	+ di->di_lintr.irq = dif->irq;
	+ pthread_mutex_init(&di->di_lintr.lock, NULL);
	+ di->addr.baddr = dif->baddr;
	+ di->addr.size = dif->size;
	+ /* some devices (e.g., virtio-net) use these as uniquifiers; irq number
	+ * should be unique and sufficient */
	+ di->pi_slot = dif->irq;
	+ di->di_func = dif->irq;
	+
	+ error = (*de->de_init)(ctx, di, dif->arg);
	+
	+ if (error == 0) {
	+ dif->di = di;
	+ } else {
	+ fprintf(stderr, "Device \"%s\": initialization failed\r\n",
	+ di->pi_name);
	+ fprintf(stderr, "Device arguments were: %s\r\n", dif->arg);
	+ free(di);
	+ }
	+
	+ return (error);
	+}
	+
	+static void
	+init_mmio_error(const char *name)
	+{
	+ struct mmio_devemu *mdpp, mdp;
	+
	+ fprintf(stderr, "Device \"%s\" does not exist\r\n", name);
	+ fprintf(stderr, "The following devices are available:\r\n");
	+
	+ SET_FOREACH(mdpp, mmio_set) {
	+ mdp = *mdpp;
	+ fprintf(stderr, "\t%s\r\n", mdp->de_emu);
	+ }
	+}
	+
	+int init_mmio(struct vmctx *ctx)
	+{
	+ struct mmio_devemu *de;
	+ struct mmio_emul_info *dif;
	+ int error;
	+
	+ mmio_membase = DEVEMU_MEMBASE;
	+
	+ for (dif = mmio_emul_info_head; dif != NULL; dif = dif->next) {
	+ if (dif->name == NULL)
	+ continue;
	+
	+ de = mmio_finddev(dif->name);
	+ if (de == NULL) {
	+ init_mmio_error(dif->name);
	+ return (1);
	+ }
	+
	+ error = mmio_init(ctx, de, dif);
	+ if (error != 0)
	+ return (error);
	+
	+ /*
	+ * as specified in the amd64 implementation, add some
	+ * slop to the memory resources decoded, in order to
	+ * give the guest some flexibility to reprogram the addresses
	+ */
	+ mmio_membase += MEM_ROUNDUP;
	+ mmio_membase = roundup2(mmio_membase, MEM_ROUNDUP);
	+ }
	+
	+ /* activate the interrupts */
	+ for (dif = mmio_emul_info_head; dif != NULL; dif = dif->next)
	+ if (dif->di != NULL)
	+ mmio_lintr_route(dif->di);
	+
	+ /* TODO: register fallback handlers? */
	+
	+ return (0);
	+}
	+
	+void
	+mmio_lintr_request(struct mmio_devinst *di)
	+{
	+ /* do nothing */
	+}
	+
	+static void
	+mmio_lintr_route(struct mmio_devinst *di)
	+{
	+ /* do nothing */
	+}
	+
	+void
	+mmio_lintr_assert(struct mmio_devinst *di)
	+{
	+ pthread_mutex_lock(&di->di_lintr.lock);
	+ if (di->di_lintr.state == IDLE) {
	+ di->di_lintr.state = ASSERTED;
	+ mmio_irq_assert(di);
	+ }
	+ pthread_mutex_unlock(&di->di_lintr.lock);
	+}
	+
	+void
	+mmio_lintr_deassert(struct mmio_devinst *di)
	+{
	+ pthread_mutex_lock(&di->di_lintr.lock);
	+ if (di->di_lintr.state == ASSERTED) {
	+ mmio_irq_deassert(di);
	+ di->di_lintr.state = IDLE;
	+ } else if (di->di_lintr.state == PENDING) {
	+ di->di_lintr.state = IDLE;
	+ }
	+ pthread_mutex_unlock(&di->di_lintr.lock);
	+}
	+
	+/* TODO: Add dummy? */
	Index: usr.sbin/bhyve/mmio/mmio_irq.h
	===================================================================
	--- /dev/null
	+++ usr.sbin/bhyve/mmio/mmio_irq.h
	@@ -0,0 +1,12 @@
	+#ifndef __MMIO_IRQ_H__
	+#define __MMIO_IRQ_H__
	+
	+struct mmio_devinst;
	+
	+void mmio_irq_init(struct vmctx *ctx);
	+void mmio_irq_reserve(int irq);
	+void mmio_irq_use(int irq);
	+void mmio_irq_assert(struct mmio_devinst *di);
	+void mmio_irq_deassert(struct mmio_devinst *di);
	+
	+#endif
	Index: usr.sbin/bhyve/mmio/mmio_irq.c
	===================================================================
	--- /dev/null
	+++ usr.sbin/bhyve/mmio/mmio_irq.c
	@@ -0,0 +1,113 @@
	+#include <sys/cdefs.h>
	+__FBSDID("$FreeBSD$");
	+
	+#include <sys/param.h>
	+#include <machine/vmm.h>
	+
	+#include <assert.h>
	+#include <pthread.h>
	+#include <stdio.h>
	+#include <vmmapi.h>
	+
	+#include "mmio_emul.h"
	+#include "mmio_irq.h"
	+#include "mmio_virtio.h"
	+
	+/* IRQ count to disable IRQ */
	+#define IRQ_DISABLED 0xff
	+
	+static struct mmio_irq {
	+ uint32_t use_count; /* number of binds */
	+ uint32_t active_count; /* number of asserts */
	+ uint32_t active; /* irq active */
	+ pthread_mutex_t lock;
	+} irqs[50];
	+
	+void
	+mmio_irq_reserve(int irq)
	+{
	+ assert(irq >= 0 && irq < nitems(irqs));
	+ assert(irqs[irq].active == 0 \|\| irqs[irq].active == IRQ_DISABLED);
	+ irqs[irq].active = IRQ_DISABLED;
	+}
	+
	+void
	+mmio_irq_use(int irq) {
	+ assert(irq >= 0 && irq < nitems(irqs));
	+ assert(irqs[irq].active != IRQ_DISABLED);
	+ irqs[irq].active++;
	+}
	+
	+void
	+mmio_irq_init(struct vmctx *ctx)
	+{
	+ int i;
	+
	+ for (i = 0; i < nitems(irqs); ++i) {
	+ irqs[i].use_count = 0;
	+ irqs[i].active_count = 0;
	+ irqs[i].active = 0;
	+ pthread_mutex_init(&irqs[i].lock, NULL);
	+ }
	+}
	+
	+void
	+mmio_irq_assert(struct mmio_devinst *di)
	+{
	+ struct mmio_irq *irq;
	+ uint32_t irq_status;
	+
	+ assert(di->di_lintr.irq <= nitems(irqs));
	+ if (di->di_lintr.irq < 0)
	+ return;
	+
	+ irq = &irqs[di->di_lintr.irq];
	+
	+ pthread_mutex_lock(&irq->lock);
	+ irq->active_count++;
	+
	+ pthread_mutex_lock(&di->di_lintr.lock);
	+
	+ irq_status = mmio_get_cfgreg32(di, VIRTIO_MMIO_INTERRUPT_STATUS);
	+ irq_status \|= VIRTIO_MMIO_INT_VRING;
	+ mmio_set_cfgreg32(di, VIRTIO_MMIO_INTERRUPT_STATUS, irq_status);
	+
	+ if (irq->active_count == 1)
	+ vm_assert_irq(di->pi_vmctx, di->di_lintr.irq);
	+
	+ pthread_mutex_unlock(&di->di_lintr.lock);
	+
	+ pthread_mutex_unlock(&irq->lock);
	+}
	+
	+void
	+mmio_irq_deassert(struct mmio_devinst *di)
	+{
	+ struct mmio_irq *irq;
	+ uint32_t irq_status;
	+
	+ assert(di->di_lintr.irq <= nitems(irqs));
	+ if (di->di_lintr.irq < 0)
	+ return;
	+
	+ irq = &irqs[di->di_lintr.irq];
	+
	+ pthread_mutex_lock(&irq->lock);
	+ irq->active_count--;
	+
	+ pthread_mutex_lock(&di->di_lintr.lock);
	+
	+ irq_status = mmio_get_cfgreg32(di, VIRTIO_MMIO_INTERRUPT_STATUS);
	+ irq_status &= ~VIRTIO_MMIO_INT_VRING;
	+ mmio_set_cfgreg32(di, VIRTIO_MMIO_INTERRUPT_STATUS, irq_status);
	+
	+#if 0
	+ /* MMIO devices do not require deassertions */
	+ if (irq->active_count == 0)
	+ vm_deassert_irq(di->di_vmctx, di->di_lintr.irq);
	+#endif
	+
	+ pthread_mutex_unlock(&di->di_lintr.lock);
	+
	+ pthread_mutex_unlock(&irq->lock);
	+}
	Index: usr.sbin/bhyve/mmio/mmio_virtio.h
	===================================================================
	--- /dev/null
	+++ usr.sbin/bhyve/mmio/mmio_virtio.h
	@@ -0,0 +1,484 @@
	+/*-
	+ * Copyright (c) 2013 Chris Torek <torek @ torek net>
	+ * All rights reserved.
	+ *
	+ * Redistribution and use in source and binary forms, with or without
	+ * modification, are permitted provided that the following conditions
	+ * are met:
	+ * 1. Redistributions of source code must retain the above copyright
	+ * notice, this list of conditions and the following disclaimer.
	+ * 2. Redistributions in binary form must reproduce the above copyright
	+ * notice, this list of conditions and the following disclaimer in the
	+ * documentation and/or other materials provided with the distribution.
	+ *
	+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
	+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
	+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	+ * SUCH DAMAGE.
	+ *
	+ * $FreeBSD$
	+ */
	+
	+#ifndef _VIRTIO_H_
	+#define _VIRTIO_H_
	+
	+#include <machine/atomic.h>
	+
	+/*
	+ * These are derived from several virtio specifications.
	+ *
	+ * Some useful links:
	+ * https://github.com/rustyrussell/virtio-spec
	+ * http://people.redhat.com/pbonzini/virtio-spec.pdf
	+ */
	+
	+/*
	+ * A virtual device has zero or more "virtual queues" (virtqueue).
	+ * Each virtqueue uses at least two 4096-byte pages, laid out thus:
	+ *
	+ * +-----------------------------------------------+
	+ * \| "desc": <N> descriptors, 16 bytes each \|
	+ * \| ----------------------------------------- \|
	+ * \| "avail": 2 uint16; <N> uint16; 1 uint16 \|
	+ * \| ----------------------------------------- \|
	+ * \| pad to 4k boundary \|
	+ * +-----------------------------------------------+
	+ * \| "used": 2 x uint16; <N> elems; 1 uint16 \|
	+ * \| ----------------------------------------- \|
	+ * \| pad to 4k boundary \|
	+ * +-----------------------------------------------+
	+ *
	+ * The number <N> that appears here is always a power of two and is
	+ * limited to no more than 32768 (as it must fit in a 16-bit field).
	+ * If <N> is sufficiently large, the above will occupy more than
	+ * two pages. In any case, all pages must be physically contiguous
	+ * within the guest's physical address space.
	+ *
	+ * The <N> 16-byte "desc" descriptors consist of a 64-bit guest
	+ * physical address <addr>, a 32-bit length <len>, a 16-bit
	+ * <flags>, and a 16-bit <next> field (all in guest byte order).
	+ *
	+ * There are three flags that may be set :
	+ * NEXT descriptor is chained, so use its "next" field
	+ * WRITE descriptor is for host to write into guest RAM
	+ * (else host is to read from guest RAM)
	+ * INDIRECT descriptor address field is (guest physical)
	+ * address of a linear array of descriptors
	+ *
	+ * Unless INDIRECT is set, <len> is the number of bytes that may
	+ * be read/written from guest physical address <addr>. If
	+ * INDIRECT is set, WRITE is ignored and <len> provides the length
	+ * of the indirect descriptors (and <len> must be a multiple of
	+ * 16). Note that NEXT may still be set in the main descriptor
	+ * pointing to the indirect, and should be set in each indirect
	+ * descriptor that uses the next descriptor (these should generally
	+ * be numbered sequentially). However, INDIRECT must not be set
	+ * in the indirect descriptors. Upon reaching an indirect descriptor
	+ * without a NEXT bit, control returns to the direct descriptors.
	+ *
	+ * Except inside an indirect, each <next> value must be in the
	+ * range [0 .. N) (i.e., the half-open interval). (Inside an
	+ * indirect, each <next> must be in the range [0 .. <len>/16).)
	+ *
	+ * The "avail" data structures reside in the same pages as the
	+ * "desc" structures since both together are used by the device to
	+ * pass information to the hypervisor's virtual driver. These
	+ * begin with a 16-bit <flags> field and 16-bit index <idx>, then
	+ * have <N> 16-bit <ring> values, followed by one final 16-bit
	+ * field <used_event>. The <N> <ring> entries are simply indices
	+ * indices into the descriptor ring (and thus must meet the same
	+ * constraints as each <next> value). However, <idx> is counted
	+ * up from 0 (initially) and simply wraps around after 65535; it
	+ * is taken mod <N> to find the next available entry.
	+ *
	+ * The "used" ring occupies a separate page or pages, and contains
	+ * values written from the virtual driver back to the guest OS.
	+ * This begins with a 16-bit <flags> and 16-bit <idx>, then there
	+ * are <N> "vring_used" elements, followed by a 16-bit <avail_event>.
	+ * The <N> "vring_used" elements consist of a 32-bit <id> and a
	+ * 32-bit <len> (vu_tlen below). The <id> is simply the index of
	+ * the head of a descriptor chain the guest made available
	+ * earlier, and the <len> is the number of bytes actually written,
	+ * e.g., in the case of a network driver that provided a large
	+ * receive buffer but received only a small amount of data.
	+ *
	+ * The two event fields, <used_event> and <avail_event>, in the
	+ * avail and used rings (respectively -- note the reversal!), are
	+ * always provided, but are used only if the virtual device
	+ * negotiates the VIRTIO_RING_F_EVENT_IDX feature during feature
	+ * negotiation. Similarly, both rings provide a flag --
	+ * VRING_AVAIL_F_NO_INTERRUPT and VRING_USED_F_NO_NOTIFY -- in
	+ * their <flags> field, indicating that the guest does not need an
	+ * interrupt, or that the hypervisor driver does not need a
	+ * notify, when descriptors are added to the corresponding ring.
	+ * (These are provided only for interrupt optimization and need
	+ * not be implemented.)
	+ */
	+
	+#define VIRTIO_MMIO_MAGIC_VALUE 0x000
	+#define VIRTIO_MMIO_VERSION 0x004
	+#define VIRTIO_MMIO_DEVICE_ID 0x008
	+#define VIRTIO_MMIO_VENDOR_ID 0x00c
	+#define VIRTIO_MMIO_HOST_FEATURES 0x010
	+#define VIRTIO_MMIO_HOST_FEATURES_SEL 0x014
	+#define VIRTIO_MMIO_GUEST_FEATURES 0x020
	+#define VIRTIO_MMIO_GUEST_FEATURES_SEL 0x024
	+#define VIRTIO_MMIO_GUEST_PAGE_SIZE 0x028
	+#define VIRTIO_MMIO_QUEUE_SEL 0x030
	+#define VIRTIO_MMIO_QUEUE_NUM_MAX 0x034
	+#define VIRTIO_MMIO_QUEUE_NUM 0x038
	+#define VIRTIO_MMIO_QUEUE_ALIGN 0x03c
	+#define VIRTIO_MMIO_QUEUE_PFN 0x040
	+#define VIRTIO_MMIO_QUEUE_NOTIFY 0x050
	+#define VIRTIO_MMIO_INTERRUPT_STATUS 0x060
	+#define VIRTIO_MMIO_INTERRUPT_ACK 0x064
	+#define VIRTIO_MMIO_STATUS 0x070
	+#define VIRTIO_MMIO_CONFIG 0x100
	+#define VIRTIO_MMIO_INT_VRING (1 << 0)
	+#define VIRTIO_MMIO_INT_CONFIG (1 << 1)
	+#define VIRTIO_MMIO_VRING_ALIGN 4096
	+
	+#define VRING_ALIGN 4096
	+
	+#define VRING_DESC_F_NEXT (1 << 0)
	+#define VRING_DESC_F_WRITE (1 << 1)
	+#define VRING_DESC_F_INDIRECT (1 << 2)
	+
	+struct virtio_desc { /* AKA vring_desc */
	+ uint64_t vd_addr; /* guest physical address */
	+ uint32_t vd_len; /* length of scatter/gather seg */
	+ uint16_t vd_flags; /* VRING_F_DESC_* */
	+ uint16_t vd_next; /* next desc if F_NEXT */
	+} __packed;
	+
	+struct virtio_used { /* AKA vring_used_elem */
	+ uint32_t vu_idx; /* head of used descriptor chain */
	+ uint32_t vu_tlen; /* length written-to */
	+} __packed;
	+
	+#define VRING_AVAIL_F_NO_INTERRUPT 1
	+
	+struct vring_avail {
	+ uint16_t va_flags; /* VRING_AVAIL_F_* */
	+ uint16_t va_idx; /* counts to 65535, then cycles */
	+ uint16_t va_ring[]; /* size N, reported in QNUM value */
	+/* uint16_t va_used_event; -- after N ring entries */
	+} __packed;
	+
	+#define VRING_USED_F_NO_NOTIFY 1
	+struct vring_used {
	+ uint16_t vu_flags; /* VRING_USED_F_* */
	+ uint16_t vu_idx; /* counts to 65535, then cycles */
	+ struct virtio_used vu_ring[]; /* size N */
	+/* uint16_t vu_avail_event; -- after N ring entries */
	+} __packed;
	+
	+/*
	+ * The address of any given virtual queue is determined by a single
	+ * Page Frame Number register. The guest writes the PFN into the
	+ * PCI config space. However, a device that has two or more
	+ * virtqueues can have a different PFN, and size, for each queue.
	+ * The number of queues is determinable via the PCI config space
	+ * VTCFG_R_QSEL register. Writes to QSEL select the queue: 0 means
	+ * queue #0, 1 means queue#1, etc. Once a queue is selected, the
	+ * remaining PFN and QNUM registers refer to that queue.
	+ *
	+ * QNUM is a read-only register containing a nonzero power of two
	+ * that indicates the (hypervisor's) queue size. Or, if reading it
	+ * produces zero, the hypervisor does not have a corresponding
	+ * queue. (The number of possible queues depends on the virtual
	+ * device. The block device has just one; the network device
	+ * provides either two -- 0 = receive, 1 = transmit -- or three,
	+ * with 2 = control.)
	+ *
	+ * PFN is a read/write register giving the physical page address of
	+ * the virtqueue in guest memory (the guest must allocate enough space
	+ * based on the hypervisor's provided QNUM).
	+ *
	+ * QNOTIFY is effectively write-only: when the guest writes a queue
	+ * number to the register, the hypervisor should scan the specified
	+ * virtqueue. (Reading QNOTIFY currently always gets 0).
	+ */
	+
	+/*
	+ * PFN register shift amount
	+ */
	+#define VRING_PFN 12
	+
	+/*
	+ * Virtio device types
	+ *
	+ * XXX Should really be merged with <dev/virtio/virtio.h> defines
	+ */
	+#define VIRTIO_TYPE_NET 1
	+#define VIRTIO_TYPE_BLOCK 2
	+#define VIRTIO_TYPE_CONSOLE 3
	+#define VIRTIO_TYPE_ENTROPY 4
	+#define VIRTIO_TYPE_BALLOON 5
	+#define VIRTIO_TYPE_IOMEMORY 6
	+#define VIRTIO_TYPE_RPMSG 7
	+#define VIRTIO_TYPE_SCSI 8
	+#define VIRTIO_TYPE_9P 9
	+
	+/* experimental IDs start at 65535 and work down */
	+
	+/*
	+ * PCI vendor/device IDs
	+ */
	+#define VIRTIO_VENDOR 0x1AF4
	+#define VIRTIO_DEV_NET 0x1000
	+#define VIRTIO_DEV_BLOCK 0x1001
	+#define VIRTIO_DEV_CONSOLE 0x1003
	+#define VIRTIO_DEV_RANDOM 0x1005
	+
	+#define VIRTIO_MMIO_MAGIC_NUM 0x74726976
	+#define VIRTIO_MMIO_VERSION_NUM 0x1
	+
	+/*
	+ * Bits in VTCFG_R_STATUS. Guests need not actually set any of these,
	+ * but a guest writing 0 to this register means "please reset".
	+ */
	+#define VTCFG_STATUS_ACK 0x01 /* guest OS has acknowledged dev */
	+#define VTCFG_STATUS_DRIVER 0x02 /* guest OS driver is loaded */
	+#define VTCFG_STATUS_DRIVER_OK 0x04 /* guest OS driver ready */
	+#define VTCFG_STATUS_FAILED 0x80 /* guest has given up on this dev */
	+
	+/*
	+ * Bits in VTCFG_R_ISR. These apply only if not using MSI-X.
	+ *
	+ * (We don't [yet?] ever use CONF_CHANGED.)
	+ */
	+#define VTCFG_ISR_QUEUES 0x01 /* re-scan queues */
	+#define VTCFG_ISR_CONF_CHANGED 0x80 /* configuration changed */
	+
	+#define VIRTIO_MSI_NO_VECTOR 0xFFFF
	+
	+/*
	+ * Feature flags.
	+ * Note: bits 0 through 23 are reserved to each device type.
	+ */
	+#define VIRTIO_F_NOTIFY_ON_EMPTY (1 << 24)
	+#define VIRTIO_RING_F_INDIRECT_DESC (1 << 28)
	+#define VIRTIO_RING_F_EVENT_IDX (1 << 29)
	+
	+/* From section 2.3, "Virtqueue Configuration", of the virtio specification */
	+static inline size_t
	+vring_size(u_int qsz, uint32_t align)
	+{
	+ size_t size;
	+
	+ /* constant 3 below = va_flags, va_idx, va_used_event */
	+ size = sizeof(struct virtio_desc) * qsz + sizeof(uint16_t) * (3 + qsz);
	+ size = roundup2(size, align);
	+
	+ /* constant 3 below = vu_flags, vu_idx, vu_avail_event */
	+ size += sizeof(uint16_t) * 3 + sizeof(struct virtio_used) * qsz;
	+ size = roundup2(size, align);
	+
	+ return (size);
	+}
	+
	+struct vmctx;
	+struct mmio_devinst;
	+struct vqueue_info;
	+
	+/*
	+ * A virtual device, with some number (possibly 0) of virtual
	+ * queues and some size (possibly 0) of configuration-space
	+ * registers private to the device. The virtio_softc should come
	+ * at the front of each "derived class", so that a pointer to the
	+ * virtio_softc is also a pointer to the more specific, derived-
	+ * from-virtio driver's softc.
	+ *
	+ * Note: inside each hypervisor virtio driver, changes to these
	+ * data structures must be locked against other threads, if any.
	+ * Except for PCI config space register read/write, we assume each
	+ * driver does the required locking, but we need a pointer to the
	+ * lock (if there is one) for PCI config space read/write ops.
	+ *
	+ * When the guest reads or writes the device's config space, the
	+ * generic layer checks for operations on the special registers
	+ * described above. If the offset of the register(s) being read
	+ * or written is past the CFG area (CFG0 or CFG1), the request is
	+ * passed on to the virtual device, after subtracting off the
	+ * generic-layer size. (So, drivers can just use the offset as
	+ * an offset into "struct config", for instance.)
	+ *
	+ * (The virtio layer also makes sure that the read or write is to/
	+ * from a "good" config offset, hence vc_cfgsize, and on BAR #0.
	+ * However, the driver must verify the read or write size and offset
	+ * and that no one is writing a readonly register.)
	+ *
	+ * The BROKED flag ("this thing done gone and broked") is for future
	+ * use.
	+ */
	+#define VIRTIO_USE_MSIX 0x01
	+#define VIRTIO_EVENT_IDX 0x02 /* use the event-index values */
	+#define VIRTIO_BROKED 0x08 /* ??? */
	+
	+struct virtio_softc {
	+ struct virtio_consts vs_vc; / constants (see below) */
	+ int vs_flags; /* VIRTIO_* flags from above */
	+ pthread_mutex_t vs_mtx; / POSIX mutex, if any */
	+ struct mmio_devinst vs_di; / device instance */
	+ uint32_t vs_negotiated_caps; /* negotiated capabilities */
	+ uint32_t vs_align; /* virtual queue alignment */
	+ struct vqueue_info vs_queues; / one per vc_nvq */
	+ int vs_curq; /* current queue */
	+ int irq; /* interrupt */
	+ uint8_t vs_status; /* value from last status write */
	+ uint32_t vs_guest_page_size; /* size of guest page in bytes */
	+};
	+
	+#define VS_LOCK(vs) \
	+do { \
	+ if (vs->vs_mtx) \
	+ pthread_mutex_lock(vs->vs_mtx); \
	+} while (0)
	+
	+#define VS_UNLOCK(vs) \
	+do { \
	+ if (vs->vs_mtx) \
	+ pthread_mutex_unlock(vs->vs_mtx); \
	+} while (0)
	+
	+struct virtio_consts {
	+ const char vc_name; / name of driver (for diagnostics) */
	+ int vc_nvq; /* number of virtual queues */
	+ size_t vc_cfgsize; /* size of dev-specific config regs */
	+ void (vc_reset)(void ); /* called on virtual device reset */
	+ void (vc_qnotify)(void , struct vqueue_info *);
	+ /* called on QNOTIFY if no VQ notify */
	+ int (vc_cfgread)(void , int, int, uint32_t *);
	+ /* called to read config regs */
	+ int (vc_cfgwrite)(void , int, int, uint32_t);
	+ /* called to write config regs */
	+ void (vc_apply_features)(void , uint64_t);
	+ /* called to apply negotiated features */
	+ uint64_t vc_hv_caps; /* hypervisor-provided capabilities */
	+};
	+
	+/*
	+ * Data structure allocated (statically) per virtual queue.
	+ *
	+ * Drivers may change vq_qsize after a reset. When the guest OS
	+ * requests a device reset, the hypervisor first calls
	+ * vs->vs_vc->vc_reset(); then the data structure below is
	+ * reinitialized (for each virtqueue: vs->vs_vc->vc_nvq).
	+ *
	+ * The remaining fields should only be fussed-with by the generic
	+ * code.
	+ *
	+ * Note: the addresses of vq_desc, vq_avail, and vq_used are all
	+ * computable from each other, but it's a lot simpler if we just
	+ * keep a pointer to each one. The event indices are similarly
	+ * (but more easily) computable, and this time we'll compute them:
	+ * they're just XX_ring[N].
	+ */
	+#define VQ_ALLOC 0x01 /* set once we have a pfn */
	+#define VQ_BROKED 0x02 /* ??? */
	+struct vqueue_info {
	+ uint16_t vq_qsize; /* size of this queue (a power of 2) */
	+ void (vq_notify)(void , struct vqueue_info *);
	+ /* called instead of vc_notify, if not NULL */
	+
	+ struct virtio_softc vq_vs; / backpointer to softc */
	+ uint16_t vq_num; /* we're the num'th queue in the softc */
	+
	+ uint16_t vq_flags; /* flags (see above) */
	+ uint16_t vq_last_avail; /* a recent value of vq_avail->va_idx */
	+ uint16_t vq_save_used; /* saved vq_used->vu_idx; see vq_endchains */
	+
	+ uint32_t vq_pfn; /* PFN of virt queue (not shifted!) */
	+
	+ volatile struct virtio_desc vq_desc; / descriptor array */
	+ volatile struct vring_avail vq_avail; / the "avail" ring */
	+ volatile struct vring_used vq_used; / the "used" ring */
	+};
	+/* as noted above, these are sort of backwards, name-wise */
	+#define VQ_AVAIL_EVENT_IDX(vq) \
	+ ((volatile uint16_t )&(vq)->vq_used->vu_ring[(vq)->vq_qsize])
	+#define VQ_USED_EVENT_IDX(vq) \
	+ ((vq)->vq_avail->va_ring[(vq)->vq_qsize])
	+
	+/*
	+ * Is this ring ready for I/O?
	+ */
	+static inline int
	+vq_ring_ready(struct vqueue_info *vq)
	+{
	+
	+ return (vq->vq_flags & VQ_ALLOC);
	+}
	+
	+/*
	+ * Are there "available" descriptors? (This does not count
	+ * how many, just returns True if there are some.)
	+ */
	+static inline int
	+vq_has_descs(struct vqueue_info *vq)
	+{
	+
	+ return (vq_ring_ready(vq) && vq->vq_last_avail !=
	+ vq->vq_avail->va_idx);
	+}
	+
	+/*
	+ * Deliver an interrupt to guest on the given virtual queue
	+ * (if possible, or a generic MSI interrupt if not using MSI-X).
	+ */
	+static inline void
	+vq_interrupt(struct virtio_softc vs, struct vqueue_info vq)
	+{
	+ VS_LOCK(vs);
	+ mmio_lintr_assert(vs->vs_di);
	+ VS_UNLOCK(vs);
	+}
	+
	+static inline void
	+vq_kick_enable(struct vqueue_info *vq)
	+{
	+
	+ vq->vq_used->vu_flags &= ~VRING_USED_F_NO_NOTIFY;
	+ /*
	+ * Full memory barrier to make sure the store to vu_flags
	+ * happens before the load from va_idx, which results from
	+ * a subsequent call to vq_has_descs().
	+ */
	+ atomic_thread_fence_seq_cst();
	+}
	+
	+static inline void
	+vq_kick_disable(struct vqueue_info *vq)
	+{
	+
	+ vq->vq_used->vu_flags \|= VRING_USED_F_NO_NOTIFY;
	+}
	+
	+struct iovec;
	+void vi_softc_linkup(struct virtio_softc vs, struct virtio_consts vc,
	+ void dev_softc, struct mmio_devinst di,
	+ struct vqueue_info *queues);
	+int vi_intr_init(struct virtio_softc *vs, int barnum, int use_msix);
	+void vi_reset_dev(struct virtio_softc *);
	+void vi_set_io_res(struct virtio_softc *, int);
	+
	+int vq_getchain(struct vqueue_info vq, uint16_t pidx,
	+ struct iovec iov, int n_iov, uint16_t flags);
	+void vq_retchains(struct vqueue_info *vq, uint16_t n_chains);
	+void vq_relchain(struct vqueue_info *vq, uint16_t idx, uint32_t iolen);
	+void vq_endchains(struct vqueue_info *vq, int used_all_avail);
	+
	+uint64_t vi_mmio_read(struct vmctx ctx, int vcpu, struct mmio_devinst di,
	+ int baridx, uint64_t offset, int size);
	+void vi_mmio_write(struct vmctx ctx, int vcpu, struct mmio_devinst di,
	+ int baridx, uint64_t offset, int size, uint64_t value);
	+void vi_devemu_init(struct mmio_devinst *di, uint32_t type);
	+#endif /* _VIRTIO_H_ */
	Index: usr.sbin/bhyve/mmio/mmio_virtio.c
	===================================================================
	--- /dev/null
	+++ usr.sbin/bhyve/mmio/mmio_virtio.c
	@@ -0,0 +1,707 @@
	+/*-
	+ * Copyright (c) 2013 Chris Torek <torek @ torek net>
	+ * All rights reserved.
	+ *
	+ * Redistribution and use in source and binary forms, with or without
	+ * modification, are permitted provided that the following conditions
	+ * are met:
	+ * 1. Redistributions of source code must retain the above copyright
	+ * notice, this list of conditions and the following disclaimer.
	+ * 2. Redistributions in binary form must reproduce the above copyright
	+ * notice, this list of conditions and the following disclaimer in the
	+ * documentation and/or other materials provided with the distribution.
	+ *
	+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
	+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
	+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	+ * SUCH DAMAGE.
	+ */
	+
	+#include <sys/cdefs.h>
	+__FBSDID("$FreeBSD$");
	+
	+#include <sys/param.h>
	+#include <sys/uio.h>
	+
	+#include <stdio.h>
	+#include <stdint.h>
	+#include <pthread.h>
	+#include <pthread_np.h>
	+
	+#include "bhyverun.h"
	+#include "mmio_emul.h"
	+#include "mmio_virtio.h"
	+#include "virtio_ids.h"
	+
	+static int debug_virtio = 0;
	+
	+#define DPRINTF(fmt, ...) if (debug_virtio) printf(fmt, ##__VA_ARGS__)
	+#define CFG_RW_DBG(offset, value) \
	+ DPRINTF("{device} \| %-60s \| %-35s \| %-30s (%jx): value = %jx\r\n", \
	+ __FILE__, __func__, #offset, (uintmax_t)offset, (uintmax_t)value);
	+
	+/*
	+ * Functions for dealing with generalized "virtual devices" as
	+ * defined by <https://www.google.com/#output=search&q=virtio+spec>
	+ */
	+
	+/*
	+ * In case we decide to relax the "virtio softc comes at the
	+ * front of virtio-based device softc" constraint, let's use
	+ * this to convert.
	+ */
	+#define DEV_SOFTC(vs) ((void *)(vs))
	+
	+/*
	+ * Link a virtio_softc to its constants, the device softc, and
	+ * the PCI emulation.
	+ */
	+void
	+vi_softc_linkup(struct virtio_softc vs, struct virtio_consts vc,
	+ void dev_softc, struct mmio_devinst di,
	+ struct vqueue_info *queues)
	+{
	+ int i;
	+
	+ /* vs and dev_softc addresses must match */
	+ assert((void *)vs == dev_softc);
	+ vs->vs_vc = vc;
	+ vs->vs_di = di;
	+ di->pi_arg = vs;
	+
	+ vs->vs_queues = queues;
	+ for (i = 0; i < vc->vc_nvq; i++) {
	+ queues[i].vq_vs = vs;
	+ queues[i].vq_num = i;
	+ }
	+}
	+
	+/*
	+ * Reset device (device-wide). This erases all queues, i.e.,
	+ * all the queues become invalid (though we don't wipe out the
	+ * internal pointers, we just clear the VQ_ALLOC flag).
	+ *
	+ * It resets negotiated features to "none".
	+ */
	+void
	+vi_reset_dev(struct virtio_softc *vs)
	+{
	+ struct vqueue_info *vq;
	+ int i, nvq;
	+
	+ if (vs->vs_mtx)
	+ assert(pthread_mutex_isowned_np(vs->vs_mtx));
	+
	+ nvq = vs->vs_vc->vc_nvq;
	+ for (vq = vs->vs_queues, i = 0; i < nvq; vq++, i++) {
	+ vq->vq_flags = 0;
	+ vq->vq_last_avail = 0;
	+ vq->vq_save_used = 0;
	+ vq->vq_pfn = 0;
	+ }
	+ vs->vs_negotiated_caps = 0;
	+ vs->vs_curq = 0;
	+ /* vs->vs_status = 0; -- redundant */
	+ mmio_lintr_deassert(vs->vs_di);
	+}
	+
	+void
	+vi_set_io_res(struct virtio_softc *vs, int barnum)
	+{
	+ mmio_alloc_mem(vs->vs_di);
	+}
	+
	+/*
	+ * Initialize interrupts for MMIO
	+ */
	+int
	+vi_intr_init(struct virtio_softc *vs, int barnum, int use_msix)
	+{
	+ /* activate interrupts */
	+ mmio_lintr_request(vs->vs_di);
	+
	+ return (0);
	+}
	+
	+/*
	+ * Initialize the currently-selected virtio queue (vs->vs_curq).
	+ * The guest just gave us a page frame number, from which we can
	+ * calculate the addresses of the queue.
	+ */
	+void
	+vi_vq_init(struct virtio_softc *vs, uint32_t pfn)
	+{
	+ struct vqueue_info *vq;
	+ uint64_t phys;
	+ size_t size;
	+ char *base;
	+
	+ vq = &vs->vs_queues[vs->vs_curq];
	+ vq->vq_pfn = pfn;
	+ phys = (uint64_t)pfn * vs->vs_guest_page_size;
	+ size = vring_size(vq->vq_qsize, vs->vs_align);
	+ base = paddr_guest2host(vs->vs_di->pi_vmctx, phys, size);
	+
	+ /* First page(s) are descriptors... */
	+ vq->vq_desc = (struct virtio_desc *)base;
	+ base += vq->vq_qsize * sizeof(struct virtio_desc);
	+
	+ /* ... immediately followed by "avail" ring (entirely uint16_t's) */
	+ vq->vq_avail = (struct vring_avail *)base;
	+ base += (2 + vq->vq_qsize + 1) * sizeof(uint16_t);
	+
	+ /* Then it's rounded up to the next page... */
	+ base = (char *)roundup2((uintptr_t)base, vs->vs_align);
	+
	+ /* ... and the last page(s) are the used ring. */
	+ vq->vq_used = (struct vring_used *)base;
	+
	+ /* Mark queue as allocated, and start at 0 when we use it. */
	+ vq->vq_flags = VQ_ALLOC;
	+ vq->vq_last_avail = 0;
	+ vq->vq_save_used = 0;
	+}
	+
	+/*
	+ * Helper inline for vq_getchain(): record the i'th "real"
	+ * descriptor.
	+ */
	+static inline void
	+_vq_record(int i, volatile struct virtio_desc vd, struct vmctx ctx,
	+ struct iovec iov, int n_iov, uint16_t flags) {
	+
	+ if (i >= n_iov)
	+ return;
	+ iov[i].iov_base = paddr_guest2host(ctx, vd->vd_addr, vd->vd_len);
	+ iov[i].iov_len = vd->vd_len;
	+ if (flags != NULL)
	+ flags[i] = vd->vd_flags;
	+}
	+#define VQ_MAX_DESCRIPTORS 512 /* see below */
	+
	+/*
	+ * Examine the chain of descriptors starting at the "next one" to
	+ * make sure that they describe a sensible request. If so, return
	+ * the number of "real" descriptors that would be needed/used in
	+ * acting on this request. This may be smaller than the number of
	+ * available descriptors, e.g., if there are two available but
	+ * they are two separate requests, this just returns 1. Or, it
	+ * may be larger: if there are indirect descriptors involved,
	+ * there may only be one descriptor available but it may be an
	+ * indirect pointing to eight more. We return 8 in this case,
	+ * i.e., we do not count the indirect descriptors, only the "real"
	+ * ones.
	+ *
	+ * Basically, this vets the vd_flags and vd_next field of each
	+ * descriptor and tells you how many are involved. Since some may
	+ * be indirect, this also needs the vmctx (in the pci_devinst
	+ * at vs->vs_di) so that it can find indirect descriptors.
	+ *
	+ * As we process each descriptor, we copy and adjust it (guest to
	+ * host address wise, also using the vmtctx) into the given iov[]
	+ * array (of the given size). If the array overflows, we stop
	+ * placing values into the array but keep processing descriptors,
	+ * up to VQ_MAX_DESCRIPTORS, before giving up and returning -1.
	+ * So you, the caller, must not assume that iov[] is as big as the
	+ * return value (you can process the same thing twice to allocate
	+ * a larger iov array if needed, or supply a zero length to find
	+ * out how much space is needed).
	+ *
	+ * If you want to verify the WRITE flag on each descriptor, pass a
	+ * non-NULL "flags" pointer to an array of "uint16_t" of the same size
	+ * as n_iov and we'll copy each vd_flags field after unwinding any
	+ * indirects.
	+ *
	+ * If some descriptor(s) are invalid, this prints a diagnostic message
	+ * and returns -1. If no descriptors are ready now it simply returns 0.
	+ *
	+ * You are assumed to have done a vq_ring_ready() if needed (note
	+ * that vq_has_descs() does one).
	+ */
	+int
	+vq_getchain(struct vqueue_info vq, uint16_t pidx,
	+ struct iovec iov, int n_iov, uint16_t flags)
	+{
	+ int i;
	+ u_int ndesc, n_indir;
	+ u_int idx, next;
	+ volatile struct virtio_desc vdir, vindir, *vp;
	+ struct vmctx *ctx;
	+ struct virtio_softc *vs;
	+ const char *name;
	+
	+ vs = vq->vq_vs;
	+ name = vs->vs_vc->vc_name;
	+
	+ /*
	+ * Note: it's the responsibility of the guest not to
	+ * update vq->vq_avail->va_idx until all of the descriptors
	+ * the guest has written are valid (including all their
	+ * vd_next fields and vd_flags).
	+ *
	+ * Compute (last_avail - va_idx) in integers mod 2**16. This is
	+ * the number of descriptors the device has made available
	+ * since the last time we updated vq->vq_last_avail.
	+ *
	+ * We just need to do the subtraction as an unsigned int,
	+ * then trim off excess bits.
	+ */
	+ idx = vq->vq_last_avail;
	+ ndesc = (uint16_t)((u_int)vq->vq_avail->va_idx - idx);
	+ if (ndesc == 0)
	+ return (0);
	+ if (ndesc > vq->vq_qsize) {
	+ /* XXX need better way to diagnose issues */
	+ fprintf(stderr,
	+ "%s: ndesc (%u) out of range, driver confused?\r\n",
	+ name, (u_int)ndesc);
	+ return (-1);
	+ }
	+
	+ /*
	+ * Now count/parse "involved" descriptors starting from
	+ * the head of the chain.
	+ *
	+ * To prevent loops, we could be more complicated and
	+ * check whether we're re-visiting a previously visited
	+ * index, but we just abort if the count gets excessive.
	+ */
	+ ctx = vs->vs_di->pi_vmctx;
	+ *pidx = next = vq->vq_avail->va_ring[idx & (vq->vq_qsize - 1)];
	+ vq->vq_last_avail++;
	+ for (i = 0; i < VQ_MAX_DESCRIPTORS; next = vdir->vd_next) {
	+ if (next >= vq->vq_qsize) {
	+ fprintf(stderr,
	+ "%s: descriptor index %u out of range, "
	+ "driver confused?\r\n",
	+ name, next);
	+ return (-1);
	+ }
	+ vdir = &vq->vq_desc[next];
	+ if ((vdir->vd_flags & VRING_DESC_F_INDIRECT) == 0) {
	+ _vq_record(i, vdir, ctx, iov, n_iov, flags);
	+ i++;
	+ } else if ((vs->vs_vc->vc_hv_caps &
	+ VIRTIO_RING_F_INDIRECT_DESC) == 0) {
	+ fprintf(stderr,
	+ "%s: descriptor has forbidden INDIRECT flag, "
	+ "driver confused?\r\n",
	+ name);
	+ return (-1);
	+ } else {
	+ n_indir = vdir->vd_len / 16;
	+ if ((vdir->vd_len & 0xf) \|\| n_indir == 0) {
	+ fprintf(stderr,
	+ "%s: invalid indir len 0x%x, "
	+ "driver confused?\r\n",
	+ name, (u_int)vdir->vd_len);
	+ return (-1);
	+ }
	+ vindir = paddr_guest2host(ctx,
	+ vdir->vd_addr, vdir->vd_len);
	+ /*
	+ * Indirects start at the 0th, then follow
	+ * their own embedded "next"s until those run
	+ * out. Each one's indirect flag must be off
	+ * (we don't really have to check, could just
	+ * ignore errors...).
	+ */
	+ next = 0;
	+ for (;;) {
	+ vp = &vindir[next];
	+ if (vp->vd_flags & VRING_DESC_F_INDIRECT) {
	+ fprintf(stderr,
	+ "%s: indirect desc has INDIR flag,"
	+ " driver confused?\r\n",
	+ name);
	+ return (-1);
	+ }
	+ _vq_record(i, vp, ctx, iov, n_iov, flags);
	+ if (++i > VQ_MAX_DESCRIPTORS)
	+ goto loopy;
	+ if ((vp->vd_flags & VRING_DESC_F_NEXT) == 0)
	+ break;
	+ next = vp->vd_next;
	+ if (next >= n_indir) {
	+ fprintf(stderr,
	+ "%s: invalid next %u > %u, "
	+ "driver confused?\r\n",
	+ name, (u_int)next, n_indir);
	+ return (-1);
	+ }
	+ }
	+ }
	+ if ((vdir->vd_flags & VRING_DESC_F_NEXT) == 0)
	+ return (i);
	+ }
	+loopy:
	+ fprintf(stderr,
	+ "%s: descriptor loop? count > %d - driver confused?\r\n",
	+ name, i);
	+ return (-1);
	+}
	+
	+/*
	+ * Return the currently-first request chain back to the available queue.
	+ *
	+ * (This chain is the one you handled when you called vq_getchain()
	+ * and used its positive return value.)
	+ */
	+void
	+vq_retchains(struct vqueue_info *vq, uint16_t n_chains)
	+{
	+
	+ vq->vq_last_avail -= n_chains;
	+}
	+
	+/*
	+ * Return specified request chain to the guest, setting its I/O length
	+ * to the provided value.
	+ *
	+ * (This chain is the one you handled when you called vq_getchain()
	+ * and used its positive return value.)
	+ */
	+void
	+vq_relchain(struct vqueue_info *vq, uint16_t idx, uint32_t iolen)
	+{
	+ uint16_t uidx, mask;
	+ volatile struct vring_used *vuh;
	+ volatile struct virtio_used *vue;
	+
	+ /*
	+ * Notes:
	+ * - mask is N-1 where N is a power of 2 so computes x % N
	+ * - vuh points to the "used" data shared with guest
	+ * - vue points to the "used" ring entry we want to update
	+ * - head is the same value we compute in vq_iovecs().
	+ *
	+ * (I apologize for the two fields named vu_idx; the
	+ * virtio spec calls the one that vue points to, "id"...)
	+ */
	+ mask = vq->vq_qsize - 1;
	+ vuh = vq->vq_used;
	+
	+ uidx = vuh->vu_idx;
	+ vue = &vuh->vu_ring[uidx++ & mask];
	+ vue->vu_idx = idx;
	+ vue->vu_tlen = iolen;
	+ vuh->vu_idx = uidx;
	+}
	+
	+/*
	+ * Driver has finished processing "available" chains and calling
	+ * vq_relchain on each one. If driver used all the available
	+ * chains, used_all should be set.
	+ *
	+ * If the "used" index moved we may need to inform the guest, i.e.,
	+ * deliver an interrupt. Even if the used index did NOT move we
	+ * may need to deliver an interrupt, if the avail ring is empty and
	+ * we are supposed to interrupt on empty.
	+ *
	+ * Note that used_all_avail is provided by the caller because it's
	+ * a snapshot of the ring state when he decided to finish interrupt
	+ * processing -- it's possible that descriptors became available after
	+ * that point. (It's also typically a constant 1/True as well.)
	+ */
	+void
	+vq_endchains(struct vqueue_info *vq, int used_all_avail)
	+{
	+ struct virtio_softc *vs;
	+ uint16_t event_idx, new_idx, old_idx;
	+ int intr;
	+
	+ /*
	+ * Interrupt generation: if we're using EVENT_IDX,
	+ * interrupt if we've crossed the event threshold.
	+ * Otherwise interrupt is generated if we added "used" entries,
	+ * but suppressed by VRING_AVAIL_F_NO_INTERRUPT.
	+ *
	+ * In any case, though, if NOTIFY_ON_EMPTY is set and the
	+ * entire avail was processed, we need to interrupt always.
	+ */
	+ vs = vq->vq_vs;
	+ old_idx = vq->vq_save_used;
	+ vq->vq_save_used = new_idx = vq->vq_used->vu_idx;
	+ if (used_all_avail &&
	+ (vs->vs_negotiated_caps & VIRTIO_F_NOTIFY_ON_EMPTY))
	+ intr = 1;
	+ else if (vs->vs_negotiated_caps & VIRTIO_RING_F_EVENT_IDX) {
	+ event_idx = VQ_USED_EVENT_IDX(vq);
	+ /*
	+ * This calculation is per docs and the kernel
	+ * (see src/sys/dev/virtio/virtio_ring.h).
	+ */
	+ intr = (uint16_t)(new_idx - event_idx - 1) <
	+ (uint16_t)(new_idx - old_idx);
	+ } else {
	+ intr = new_idx != old_idx &&
	+ !(vq->vq_avail->va_flags & VRING_AVAIL_F_NO_INTERRUPT);
	+ }
	+ if (intr)
	+ vq_interrupt(vs, vq);
	+}
	+
	+/*
	+ * Handle pci config space reads.
	+ * If it's to the interrupt system, do that
	+ * If it's part of the virtio standard stuff, do that.
	+ * Otherwise dispatch to the actual driver.
	+ */
	+uint64_t
	+vi_mmio_read(struct vmctx ctx, int vcpu, struct mmio_devinst di,
	+ int baridx, uint64_t offset, int size)
	+{
	+ struct virtio_softc *vs = di->pi_arg;
	+ struct virtio_consts *vc;
	+ const char *name;
	+ uint64_t sel;
	+ uint32_t value;
	+ int error;
	+
	+ if (vs->vs_mtx)
	+ pthread_mutex_lock(vs->vs_mtx);
	+
	+ vc = vs->vs_vc;
	+ name = vc->vc_name;
	+ value = size == 1 ? 0xff : size == 2 ? 0xffff : 0xffffffff;
	+
	+ if (size != 1 && size != 2 && size != 4)
	+ goto bad;
	+
	+ if (offset >= VIRTIO_MMIO_CONFIG) {
	+ error = (*vc->vc_cfgread)(DEV_SOFTC(vs),
	+ offset - VIRTIO_MMIO_CONFIG,
	+ size,
	+ &value);
	+ if (error)
	+ goto bad;
	+
	+ CFG_RW_DBG(offset, value);
	+ goto done;
	+ }
	+
	+ switch (offset) {
	+ case VIRTIO_MMIO_MAGIC_VALUE:
	+ value = mmio_get_cfgreg32(di, offset);
	+ CFG_RW_DBG(VIRTIO_MMIO_MAGIC_VALUE, value);
	+ break;
	+ case VIRTIO_MMIO_VERSION:
	+ value = mmio_get_cfgreg32(di, offset);
	+ CFG_RW_DBG(VIRTIO_MMIO_VERSION, value);
	+ break;
	+ case VIRTIO_MMIO_DEVICE_ID:
	+ value = mmio_get_cfgreg32(di, offset);
	+ CFG_RW_DBG(VIRTIO_MMIO_DEVICE_ID, value);
	+ break;
	+ case VIRTIO_MMIO_VENDOR_ID:
	+ value = mmio_get_cfgreg32(di, offset);
	+ CFG_RW_DBG(VIRTIO_MMIO_VENDOR_ID, value);
	+ break;
	+ case VIRTIO_MMIO_INTERRUPT_STATUS:
	+ value = mmio_get_cfgreg32(di, offset);
	+ CFG_RW_DBG(VIRTIO_MMIO_INTERRUPT_STATUS, value);
	+ break;
	+ case VIRTIO_MMIO_STATUS:
	+ value = mmio_get_cfgreg32(di, offset);
	+ CFG_RW_DBG(VIRTIO_MMIO_STATUS, value);
	+ break;
	+ case VIRTIO_MMIO_HOST_FEATURES:
	+ sel = mmio_get_cfgreg32(di, VIRTIO_MMIO_HOST_FEATURES_SEL);
	+ value = (vc->vc_hv_caps >> (32 * sel)) & 0xffffffff;
	+ CFG_RW_DBG(VIRTIO_MMIO_HOST_FEATURES, value);
	+ break;
	+ case VIRTIO_MMIO_QUEUE_NUM_MAX:
	+ value = vs->vs_curq < vc->vc_nvq ?
	+ vs->vs_queues[vs->vs_curq].vq_qsize : 0;
	+ CFG_RW_DBG(VIRTIO_MMIO_QUEUE_NUM_MAX, value);
	+ break;
	+ case VIRTIO_MMIO_QUEUE_PFN:
	+ value = vs->vs_curq < vc->vc_nvq ?
	+ vs->vs_queues[vs->vs_curq].vq_pfn : 0;
	+ CFG_RW_DBG(VIRTIO_MMIO_QUEUE_PFN, value);
	+ break;
	+ default:
	+ CFG_RW_DBG(offset, value);
	+ goto bad;
	+ break;
	+ }
	+
	+ goto done;
	+
	+bad:
	+ fprintf(stderr, "%s: read from bad offset/size: %jd/%d\r\n",
	+ name, (uintmax_t)offset, size);
	+
	+done:
	+ if (vs->vs_mtx)
	+ pthread_mutex_unlock(vs->vs_mtx);
	+ return (value);
	+}
	+
	+/*
	+ * Handle pci config space writes.
	+ * If it's to the MSI-X info, do that.
	+ * If it's part of the virtio standard stuff, do that.
	+ * Otherwise dispatch to the actual driver.
	+ */
	+void
	+vi_mmio_write(struct vmctx ctx, int vcpu, struct mmio_devinst di,
	+ int baridx, uint64_t offset, int size, uint64_t value)
	+{
	+ struct virtio_softc *vs = di->pi_arg;
	+ struct vqueue_info *vq;
	+ struct virtio_consts *vc;
	+ const char *name;
	+ int error;
	+
	+ if (vs->vs_mtx)
	+ pthread_mutex_lock(vs->vs_mtx);
	+
	+ vc = vs->vs_vc;
	+ name = vc->vc_name;
	+
	+ if (size != 1 && size != 2 && size != 4)
	+ goto bad;
	+
	+ if (offset >= VIRTIO_MMIO_CONFIG) {
	+ error = (*vc->vc_cfgwrite)(DEV_SOFTC(vs),
	+ offset - VIRTIO_MMIO_CONFIG,
	+ size, value);
	+ if (error)
	+ goto bad;
	+
	+ CFG_RW_DBG(offset, value);
	+ goto done;
	+ }
	+
	+ switch (offset) {
	+ case VIRTIO_MMIO_HOST_FEATURES_SEL:
	+ CFG_RW_DBG(VIRTIO_MMIO_HOST_FEATURES_SEL, value);
	+ mmio_set_cfgreg32(di, offset, value);
	+ break;
	+ case VIRTIO_MMIO_GUEST_FEATURES_SEL:
	+ CFG_RW_DBG(VIRTIO_MMIO_GUEST_FEATURES_SEL, value);
	+ mmio_set_cfgreg32(di, offset, value);
	+ break;
	+ case VIRTIO_MMIO_INTERRUPT_ACK:
	+ CFG_RW_DBG(VIRTIO_MMIO_INTERRUPT_ACK, value);
	+ mmio_lintr_deassert(di);
	+ mmio_set_cfgreg32(di, offset, value);
	+ break;
	+ case VIRTIO_MMIO_STATUS:
	+ CFG_RW_DBG(VIRTIO_MMIO_STATUS, value);
	+ mmio_set_cfgreg32(di, offset, value);
	+ vs->vs_status = value;
	+ if (value == 0)
	+ (*vc->vc_reset)(DEV_SOFTC(vs));
	+ break;
	+ case VIRTIO_MMIO_QUEUE_NUM:
	+ CFG_RW_DBG(VIRTIO_MMIO_QUEUE_NUM, value);
	+ mmio_set_cfgreg32(di, offset, value);
	+ vq = &vs->vs_queues[vs->vs_curq];
	+ vq->vq_qsize = value;
	+ break;
	+ case VIRTIO_MMIO_GUEST_FEATURES:
	+ CFG_RW_DBG(VIRTIO_MMIO_GUEST_FEATURES, value);
	+ mmio_set_cfgreg32(di, offset, value);
	+ vs->vs_negotiated_caps = value & vc->vc_hv_caps;
	+ if (vc->vc_apply_features)
	+ (*vc->vc_apply_features)(DEV_SOFTC(vs),
	+ vs->vs_negotiated_caps);
	+ break;
	+ case VIRTIO_MMIO_GUEST_PAGE_SIZE:
	+ mmio_set_cfgreg32(di, offset, value);
	+ vs->vs_guest_page_size = value;
	+ break;
	+ case VIRTIO_MMIO_QUEUE_SEL:
	+ CFG_RW_DBG(VIRTIO_MMIO_QUEUE_SEL, value);
	+ mmio_set_cfgreg32(di, offset, value);
	+ /*
	+ * Note that the guest is allowed to select an
	+ * invalid queue; we just need to return a QNUM
	+ * of 0 while the bad queue is selected.
	+ */
	+ vs->vs_curq = value;
	+ break;
	+ case VIRTIO_MMIO_QUEUE_ALIGN:
	+ CFG_RW_DBG(VIRTIO_MMIO_QUEUE_ALIGN, value);
	+ mmio_set_cfgreg32(di, offset, value);
	+ vs->vs_align = value;
	+ break;
	+ case VIRTIO_MMIO_QUEUE_PFN:
	+ CFG_RW_DBG(VIRTIO_MMIO_QUEUE_PFN, value);
	+ mmio_set_cfgreg32(di, offset, value);
	+ if (vs->vs_curq >= vc->vc_nvq)
	+ fprintf(stderr, "%s: curq %d >= max %d\r\n",
	+ name, vs->vs_curq, vc->vc_nvq);
	+ else
	+ vi_vq_init(vs, value);
	+ break;
	+ case VIRTIO_MMIO_QUEUE_NOTIFY:
	+ CFG_RW_DBG(VIRTIO_MMIO_QUEUE_NOTIFY, value);
	+ if (value >= vc->vc_nvq) {
	+ fprintf(stderr, "%s: queue %d notify out of range\r\n",
	+ name, (int)value);
	+ break;
	+ }
	+ mmio_set_cfgreg32(di, offset, value);
	+ vq = &vs->vs_queues[value];
	+ if (vq->vq_notify)
	+ (*vq->vq_notify)(DEV_SOFTC(vs), vq);
	+ else if (vc->vc_qnotify)
	+ (*vc->vc_qnotify)(DEV_SOFTC(vs), vq);
	+ else
	+ fprintf(stderr,
	+ "%s: qnotify queue %d: missing vq/vc notify\r\n",
	+ name, (int)value);
	+ break;
	+ default:
	+ CFG_RW_DBG(offset, value);
	+ goto bad;
	+ break;
	+ }
	+
	+ goto done;
	+
	+bad:
	+ fprintf(stderr, "%s: write to bad offset/size %jd/%d\r\n",
	+ name, (uintmax_t)offset, size);
	+done:
	+ if (vs->vs_mtx)
	+ pthread_mutex_unlock(vs->vs_mtx);
	+}
	+
	+void
	+vi_devemu_init(struct mmio_devinst *di, uint32_t type)
	+{
	+ uint32_t id;
	+
	+ switch (type) {
	+ case VIRTIO_TYPE_NET:
	+ id = VIRTIO_ID_NETWORK;
	+ break;
	+ case VIRTIO_TYPE_BLOCK:
	+ id = VIRTIO_ID_BLOCK;
	+ break;
	+ case VIRTIO_TYPE_CONSOLE:
	+ id = VIRTIO_ID_CONSOLE;
	+ break;
	+ case VIRTIO_TYPE_ENTROPY:
	+ id = VIRTIO_ID_ENTROPY;
	+ break;
	+ default:
	+ return;
	+ }
	+
	+ mmio_set_cfgreg32(di, VIRTIO_MMIO_MAGIC_VALUE, VIRTIO_MMIO_MAGIC_NUM);
	+ mmio_set_cfgreg32(di, VIRTIO_MMIO_VERSION, VIRTIO_MMIO_VERSION_NUM);
	+ mmio_set_cfgreg32(di, VIRTIO_MMIO_DEVICE_ID, id);
	+ mmio_set_cfgreg32(di, VIRTIO_MMIO_VENDOR_ID, VIRTIO_VENDOR);
	+}
	Index: usr.sbin/bhyve/mmio/mmio_virtio_block.c
	===================================================================
	--- /dev/null
	+++ usr.sbin/bhyve/mmio/mmio_virtio_block.c
	@@ -0,0 +1,424 @@
	+/*-
	+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
	+ *
	+ * Copyright (c) 2011 NetApp, Inc.
	+ * All rights reserved.
	+ * Copyright (c) 2019 Joyent, Inc.
	+ *
	+ * Redistribution and use in source and binary forms, with or without
	+ * modification, are permitted provided that the following conditions
	+ * are met:
	+ * 1. Redistributions of source code must retain the above copyright
	+ * notice, this list of conditions and the following disclaimer.
	+ * 2. Redistributions in binary form must reproduce the above copyright
	+ * notice, this list of conditions and the following disclaimer in the
	+ * documentation and/or other materials provided with the distribution.
	+ *
	+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
	+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	+ * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
	+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	+ * SUCH DAMAGE.
	+ *
	+ * $FreeBSD$
	+ */
	+
	+#include <sys/cdefs.h>
	+__FBSDID("$FreeBSD$");
	+
	+#include <sys/param.h>
	+#include <sys/linker_set.h>
	+#include <sys/stat.h>
	+#include <sys/uio.h>
	+#include <sys/ioctl.h>
	+#include <sys/disk.h>
	+
	+#include <errno.h>
	+#include <fcntl.h>
	+#include <stdio.h>
	+#include <stdlib.h>
	+#include <stdint.h>
	+#include <string.h>
	+#include <strings.h>
	+#include <unistd.h>
	+#include <assert.h>
	+#include <pthread.h>
	+#include <md5.h>
	+#include <dev/pci/pcireg.h>
	+
	+#include "bhyverun.h"
	+#include "debug.h"
	+
	+#include "mmio_emul.h"
	+#include "mmio_virtio.h"
	+
	+#include "block_if.h"
	+
	+#define VTBLK_RINGSZ 128
	+
	+_Static_assert(VTBLK_RINGSZ <= BLOCKIF_RING_MAX, "Each ring entry must be able to queue a request");
	+
	+#define VTBLK_S_OK 0
	+#define VTBLK_S_IOERR 1
	+#define VTBLK_S_UNSUPP 2
	+
	+#define VTBLK_BLK_ID_BYTES 20 + 1
	+
	+/* Capability bits */
	+#define VTBLK_F_SEG_MAX (1 << 2) /* Maximum request segments */
	+#define VTBLK_F_BLK_SIZE (1 << 6) /* cfg block size valid */
	+#define VTBLK_F_FLUSH (1 << 9) /* Cache flush support */
	+#define VTBLK_F_TOPOLOGY (1 << 10) /* Optimal I/O alignment */
	+
	+/*
	+ * Host capabilities
	+ */
	+#define VTBLK_S_HOSTCAPS \
	+ ( VTBLK_F_SEG_MAX \| \
	+ VTBLK_F_BLK_SIZE \| \
	+ VTBLK_F_FLUSH \| \
	+ VTBLK_F_TOPOLOGY \| \
	+ VIRTIO_RING_F_INDIRECT_DESC ) /* indirect descriptors */
	+
	+/*
	+ * Config space "registers"
	+ */
	+struct vtblk_config {
	+ uint64_t vbc_capacity;
	+ uint32_t vbc_size_max;
	+ uint32_t vbc_seg_max;
	+ struct {
	+ uint16_t cylinders;
	+ uint8_t heads;
	+ uint8_t sectors;
	+ } vbc_geometry;
	+ uint32_t vbc_blk_size;
	+ struct {
	+ uint8_t physical_block_exp;
	+ uint8_t alignment_offset;
	+ uint16_t min_io_size;
	+ uint32_t opt_io_size;
	+ } vbc_topology;
	+ uint8_t vbc_writeback;
	+} __packed;
	+
	+/*
	+ * Fixed-size block header
	+ */
	+struct virtio_blk_hdr {
	+#define VBH_OP_READ 0
	+#define VBH_OP_WRITE 1
	+#define VBH_OP_FLUSH 4
	+#define VBH_OP_FLUSH_OUT 5
	+#define VBH_OP_IDENT 8
	+#define VBH_FLAG_BARRIER 0x80000000 /* OR'ed into vbh_type */
	+ uint32_t vbh_type;
	+ uint32_t vbh_ioprio;
	+ uint64_t vbh_sector;
	+} __packed;
	+
	+/*
	+ * Debug printf
	+ */
	+static int pci_vtblk_debug;
	+#define DPRINTF(params) if (pci_vtblk_debug) PRINTLN params
	+#define WPRINTF(params) PRINTLN params
	+
	+struct pci_vtblk_ioreq {
	+ struct blockif_req io_req;
	+ struct pci_vtblk_softc *io_sc;
	+ uint8_t *io_status;
	+ uint16_t io_idx;
	+};
	+
	+/*
	+ * Per-device softc
	+ */
	+struct pci_vtblk_softc {
	+ struct virtio_softc vbsc_vs;
	+ pthread_mutex_t vsc_mtx;
	+ struct vqueue_info vbsc_vq;
	+ struct vtblk_config vbsc_cfg;
	+ struct blockif_ctxt *bc;
	+ char vbsc_ident[VTBLK_BLK_ID_BYTES];
	+ struct pci_vtblk_ioreq vbsc_ios[VTBLK_RINGSZ];
	+};
	+
	+static void pci_vtblk_reset(void *);
	+static void pci_vtblk_notify(void , struct vqueue_info );
	+static int pci_vtblk_cfgread(void , int, int, uint32_t );
	+static int pci_vtblk_cfgwrite(void *, int, int, uint32_t);
	+
	+static struct virtio_consts vtblk_vi_consts = {
	+ "vtblk", /* our name */
	+ 1, /* we support 1 virtqueue */
	+ sizeof(struct vtblk_config), /* config reg size */
	+ pci_vtblk_reset, /* reset */
	+ pci_vtblk_notify, /* device-wide qnotify */
	+ pci_vtblk_cfgread, /* read PCI config */
	+ pci_vtblk_cfgwrite, /* write PCI config */
	+ NULL, /* apply negotiated features */
	+ VTBLK_S_HOSTCAPS, /* our capabilities */
	+};
	+
	+static void
	+pci_vtblk_reset(void *vsc)
	+{
	+ struct pci_vtblk_softc *sc = vsc;
	+
	+ DPRINTF(("vtblk: device reset requested !"));
	+ vi_reset_dev(&sc->vbsc_vs);
	+}
	+
	+static void
	+pci_vtblk_done(struct blockif_req *br, int err)
	+{
	+ struct pci_vtblk_ioreq *io = br->br_param;
	+ struct pci_vtblk_softc *sc = io->io_sc;
	+
	+ /* convert errno into a virtio block error return */
	+ if (err == EOPNOTSUPP \|\| err == ENOSYS)
	+ *io->io_status = VTBLK_S_UNSUPP;
	+ else if (err != 0)
	+ *io->io_status = VTBLK_S_IOERR;
	+ else
	+ *io->io_status = VTBLK_S_OK;
	+
	+ /*
	+ * Return the descriptor back to the host.
	+ * We wrote 1 byte (our status) to host.
	+ */
	+ pthread_mutex_lock(&sc->vsc_mtx);
	+ vq_relchain(&sc->vbsc_vq, io->io_idx, 1);
	+ vq_endchains(&sc->vbsc_vq, 0);
	+ pthread_mutex_unlock(&sc->vsc_mtx);
	+}
	+
	+static void
	+pci_vtblk_proc(struct pci_vtblk_softc sc, struct vqueue_info vq)
	+{
	+ struct virtio_blk_hdr *vbh;
	+ struct pci_vtblk_ioreq *io;
	+ int i, n;
	+ int err;
	+ ssize_t iolen;
	+ int writeop, type;
	+ struct iovec iov[BLOCKIF_IOV_MAX + 2];
	+ uint16_t idx, flags[BLOCKIF_IOV_MAX + 2];
	+
	+ n = vq_getchain(vq, &idx, iov, BLOCKIF_IOV_MAX + 2, flags);
	+
	+ /*
	+ * The first descriptor will be the read-only fixed header,
	+ * and the last is for status (hence +2 above and below).
	+ * The remaining iov's are the actual data I/O vectors.
	+ *
	+ * XXX - note - this fails on crash dump, which does a
	+ * VIRTIO_BLK_T_FLUSH with a zero transfer length
	+ */
	+ assert(n >= 2 && n <= BLOCKIF_IOV_MAX + 2);
	+
	+ io = &sc->vbsc_ios[idx];
	+ assert((flags[0] & VRING_DESC_F_WRITE) == 0);
	+ assert(iov[0].iov_len == sizeof(struct virtio_blk_hdr));
	+ vbh = iov[0].iov_base;
	+ memcpy(&io->io_req.br_iov, &iov[1], sizeof(struct iovec) * (n - 2));
	+ io->io_req.br_iovcnt = n - 2;
	+ io->io_req.br_offset = vbh->vbh_sector * DEV_BSIZE;
	+ io->io_status = iov[--n].iov_base;
	+ assert(iov[n].iov_len == 1);
	+ assert(flags[n] & VRING_DESC_F_WRITE);
	+
	+ /*
	+ * XXX
	+ * The guest should not be setting the BARRIER flag because
	+ * we don't advertise the capability.
	+ */
	+ type = vbh->vbh_type & ~VBH_FLAG_BARRIER;
	+ writeop = (type == VBH_OP_WRITE);
	+
	+ iolen = 0;
	+ for (i = 1; i < n; i++) {
	+ /*
	+ * - write op implies read-only descriptor,
	+ * - read/ident op implies write-only descriptor,
	+ * therefore test the inverse of the descriptor bit
	+ * to the op.
	+ */
	+ assert(((flags[i] & VRING_DESC_F_WRITE) == 0) == writeop);
	+ iolen += iov[i].iov_len;
	+ }
	+ io->io_req.br_resid = iolen;
	+
	+ DPRINTF(("virtio-block: %s op, %zd bytes, %d segs, offset %lld",
	+ writeop ? "write" : "read/ident", iolen, i - 1,
	+ (long long) io->io_req.br_offset));
	+
	+ switch (type) {
	+ case VBH_OP_READ:
	+ err = blockif_read(sc->bc, &io->io_req);
	+ break;
	+ case VBH_OP_WRITE:
	+ err = blockif_write(sc->bc, &io->io_req);
	+ break;
	+ case VBH_OP_FLUSH:
	+ case VBH_OP_FLUSH_OUT:
	+ err = blockif_flush(sc->bc, &io->io_req);
	+ break;
	+ case VBH_OP_IDENT:
	+ /* Assume a single buffer */
	+ /* S/n equal to buffer is not zero-terminated. */
	+ memset(iov[1].iov_base, 0, iov[1].iov_len);
	+ strncpy(iov[1].iov_base, sc->vbsc_ident,
	+ MIN(iov[1].iov_len, sizeof(sc->vbsc_ident)));
	+ pci_vtblk_done(&io->io_req, 0);
	+ return;
	+ default:
	+ pci_vtblk_done(&io->io_req, EOPNOTSUPP);
	+ return;
	+ }
	+ assert(err == 0);
	+}
	+
	+static void
	+pci_vtblk_notify(void vsc, struct vqueue_info vq)
	+{
	+ struct pci_vtblk_softc *sc = vsc;
	+
	+ while (vq_has_descs(vq))
	+ pci_vtblk_proc(sc, vq);
	+}
	+
	+static int
	+pci_vtblk_init(struct vmctx ctx, struct mmio_devinst pi, char *opts)
	+{
	+ char bident[sizeof("XX:X:X")];
	+ struct blockif_ctxt *bctxt;
	+ MD5_CTX mdctx;
	+ u_char digest[16];
	+ struct pci_vtblk_softc *sc;
	+ off_t size;
	+ int i, sectsz, sts, sto;
	+
	+ if (opts == NULL) {
	+ WPRINTF(("virtio-block: backing device required"));
	+ return (1);
	+ }
	+
	+ /*
	+ * The supplied backing file has to exist
	+ */
	+ snprintf(bident, sizeof(bident), "%d:%d", pi->pi_slot, pi->di_func);
	+ bctxt = blockif_open(opts, bident);
	+ if (bctxt == NULL) {
	+ perror("Could not open backing file");
	+ return (1);
	+ }
	+
	+ size = blockif_size(bctxt);
	+ sectsz = blockif_sectsz(bctxt);
	+ blockif_psectsz(bctxt, &sts, &sto);
	+
	+ sc = calloc(1, sizeof(struct pci_vtblk_softc));
	+ sc->bc = bctxt;
	+ for (i = 0; i < VTBLK_RINGSZ; i++) {
	+ struct pci_vtblk_ioreq *io = &sc->vbsc_ios[i];
	+ io->io_req.br_callback = pci_vtblk_done;
	+ io->io_req.br_param = io;
	+ io->io_sc = sc;
	+ io->io_idx = i;
	+ }
	+
	+ pthread_mutex_init(&sc->vsc_mtx, NULL);
	+
	+ /* init virtio softc and virtqueues */
	+ vi_softc_linkup(&sc->vbsc_vs, &vtblk_vi_consts, sc, pi, &sc->vbsc_vq);
	+ sc->vbsc_vs.vs_mtx = &sc->vsc_mtx;
	+
	+ sc->vbsc_vq.vq_qsize = VTBLK_RINGSZ;
	+ /* sc->vbsc_vq.vq_notify = we have no per-queue notify */
	+
	+ /*
	+ * Create an identifier for the backing file. Use parts of the
	+ * md5 sum of the filename
	+ */
	+ MD5Init(&mdctx);
	+ MD5Update(&mdctx, opts, strlen(opts));
	+ MD5Final(digest, &mdctx);
	+ snprintf(sc->vbsc_ident, VTBLK_BLK_ID_BYTES,
	+ "BHYVE-%02X%02X-%02X%02X-%02X%02X",
	+ digest[0], digest[1], digest[2], digest[3], digest[4], digest[5]);
	+
	+ /* setup virtio block config space */
	+ sc->vbsc_cfg.vbc_capacity = size / DEV_BSIZE; /* 512-byte units */
	+ sc->vbsc_cfg.vbc_size_max = 0; /* not negotiated */
	+
	+ /*
	+ * If Linux is presented with a seg_max greater than the virtio queue
	+ * size, it can stumble into situations where it violates its own
	+ * invariants and panics. For safety, we keep seg_max clamped, paying
	+ * heed to the two extra descriptors needed for the header and status
	+ * of a request.
	+ */
	+ sc->vbsc_cfg.vbc_seg_max = MIN(VTBLK_RINGSZ - 2, BLOCKIF_IOV_MAX);
	+ sc->vbsc_cfg.vbc_geometry.cylinders = 0; /* no geometry */
	+ sc->vbsc_cfg.vbc_geometry.heads = 0;
	+ sc->vbsc_cfg.vbc_geometry.sectors = 0;
	+ sc->vbsc_cfg.vbc_blk_size = sectsz;
	+ sc->vbsc_cfg.vbc_topology.physical_block_exp =
	+ (sts > sectsz) ? (ffsll(sts / sectsz) - 1) : 0;
	+ sc->vbsc_cfg.vbc_topology.alignment_offset =
	+ (sto != 0) ? ((sts - sto) / sectsz) : 0;
	+ sc->vbsc_cfg.vbc_topology.min_io_size = 0;
	+ sc->vbsc_cfg.vbc_topology.opt_io_size = 0;
	+ sc->vbsc_cfg.vbc_writeback = 0;
	+
	+ /*
	+ * Should we move some of this into virtio.c? Could
	+ * have the device, class, and subdev_0 as fields in
	+ * the virtio constants structure.
	+ */
	+ vi_devemu_init(pi, VIRTIO_TYPE_BLOCK);
	+
	+ if (vi_intr_init(&sc->vbsc_vs, 1, fbsdrun_virtio_msix())) {
	+ blockif_close(sc->bc);
	+ free(sc);
	+ return (1);
	+ }
	+ vi_set_io_res(&sc->vbsc_vs, 0);
	+ return (0);
	+}
	+
	+static int
	+pci_vtblk_cfgwrite(void *vsc, int offset, int size, uint32_t value)
	+{
	+
	+ DPRINTF(("vtblk: write to readonly reg %d", offset));
	+ return (1);
	+}
	+
	+static int
	+pci_vtblk_cfgread(void vsc, int offset, int size, uint32_t retval)
	+{
	+ struct pci_vtblk_softc *sc = vsc;
	+ void *ptr;
	+
	+ /* our caller has already verified offset and size */
	+ ptr = (uint8_t *)&sc->vbsc_cfg + offset;
	+ memcpy(retval, ptr, size);
	+ return (0);
	+}
	+
	+struct mmio_devemu pci_de_vblk = {
	+ .de_emu = "virtio-blk",
	+ .de_init = pci_vtblk_init,
	+ .de_write = vi_mmio_write,
	+ .de_read = vi_mmio_read
	+};
	+MMIO_EMUL_SET(pci_de_vblk);
	Index: usr.sbin/bhyve/mmio/mmio_virtio_console.c
	===================================================================
	--- /dev/null
	+++ usr.sbin/bhyve/mmio/mmio_virtio_console.c
	@@ -0,0 +1,680 @@
	+/*-
	+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
	+ *
	+ * Copyright (c) 2016 iXsystems Inc.
	+ * All rights reserved.
	+ *
	+ * This software was developed by Jakub Klama <jceel@FreeBSD.org>
	+ * under sponsorship from iXsystems Inc.
	+ *
	+ * Redistribution and use in source and binary forms, with or without
	+ * modification, are permitted provided that the following conditions
	+ * are met:
	+ * 1. Redistributions of source code must retain the above copyright
	+ * notice, this list of conditions and the following disclaimer
	+ * in this position and unchanged.
	+ * 2. Redistributions in binary form must reproduce the above copyright
	+ * notice, this list of conditions and the following disclaimer in the
	+ * documentation and/or other materials provided with the distribution.
	+ *
	+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
	+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
	+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	+ * SUCH DAMAGE.
	+ */
	+
	+#include <sys/cdefs.h>
	+__FBSDID("$FreeBSD$");
	+
	+#include <sys/param.h>
	+#ifndef WITHOUT_CAPSICUM
	+#include <sys/capsicum.h>
	+#endif
	+#include <sys/linker_set.h>
	+#include <sys/uio.h>
	+#include <sys/types.h>
	+#include <sys/socket.h>
	+#include <sys/un.h>
	+#include <dev/pci/pcireg.h>
	+
	+#ifndef WITHOUT_CAPSICUM
	+#include <capsicum_helpers.h>
	+#endif
	+#include <err.h>
	+#include <errno.h>
	+#include <fcntl.h>
	+#include <stdio.h>
	+#include <stdlib.h>
	+#include <stdbool.h>
	+#include <string.h>
	+#include <unistd.h>
	+#include <assert.h>
	+#include <pthread.h>
	+#include <libgen.h>
	+#include <sysexits.h>
	+
	+#include "bhyverun.h"
	+#include "debug.h"
	+
	+#include "mmio_emul.h"
	+#include "mmio_virtio.h"
	+
	+#include "mevent.h"
	+#include "sockstream.h"
	+
	+#define VTCON_RINGSZ 64
	+#define VTCON_MAXPORTS 16
	+#define VTCON_MAXQ (VTCON_MAXPORTS * 2 + 2)
	+
	+#define VTCON_DEVICE_READY 0
	+#define VTCON_DEVICE_ADD 1
	+#define VTCON_DEVICE_REMOVE 2
	+#define VTCON_PORT_READY 3
	+#define VTCON_CONSOLE_PORT 4
	+#define VTCON_CONSOLE_RESIZE 5
	+#define VTCON_PORT_OPEN 6
	+#define VTCON_PORT_NAME 7
	+
	+#define VTCON_F_SIZE 0
	+#define VTCON_F_MULTIPORT 1
	+#define VTCON_F_EMERG_WRITE 2
	+#define VTCON_S_HOSTCAPS \
	+ (VTCON_F_SIZE \| VTCON_F_MULTIPORT \| VTCON_F_EMERG_WRITE)
	+
	+static int pci_vtcon_debug;
	+#define DPRINTF(params) if (pci_vtcon_debug) PRINTLN params
	+#define WPRINTF(params) PRINTLN params
	+
	+struct pci_vtcon_softc;
	+struct pci_vtcon_port;
	+struct pci_vtcon_config;
	+typedef void (pci_vtcon_cb_t)(struct pci_vtcon_port , void , struct iovec *,
	+ int);
	+
	+struct pci_vtcon_port {
	+ struct pci_vtcon_softc * vsp_sc;
	+ int vsp_id;
	+ const char * vsp_name;
	+ bool vsp_enabled;
	+ bool vsp_console;
	+ bool vsp_rx_ready;
	+ bool vsp_open;
	+ int vsp_rxq;
	+ int vsp_txq;
	+ void * vsp_arg;
	+ pci_vtcon_cb_t * vsp_cb;
	+};
	+
	+struct pci_vtcon_sock
	+{
	+ struct pci_vtcon_port * vss_port;
	+ const char * vss_path;
	+ struct mevent * vss_server_evp;
	+ struct mevent * vss_conn_evp;
	+ int vss_server_fd;
	+ int vss_conn_fd;
	+ bool vss_open;
	+};
	+
	+struct pci_vtcon_softc {
	+ struct virtio_softc vsc_vs;
	+ struct vqueue_info vsc_queues[VTCON_MAXQ];
	+ pthread_mutex_t vsc_mtx;
	+ uint64_t vsc_cfg;
	+ uint64_t vsc_features;
	+ char * vsc_rootdir;
	+ int vsc_kq;
	+ int vsc_nports;
	+ bool vsc_ready;
	+ struct pci_vtcon_port vsc_control_port;
	+ struct pci_vtcon_port vsc_ports[VTCON_MAXPORTS];
	+ struct pci_vtcon_config *vsc_config;
	+};
	+
	+struct pci_vtcon_config {
	+ uint16_t cols;
	+ uint16_t rows;
	+ uint32_t max_nr_ports;
	+ uint32_t emerg_wr;
	+} __attribute__((packed));
	+
	+struct pci_vtcon_control {
	+ uint32_t id;
	+ uint16_t event;
	+ uint16_t value;
	+} __attribute__((packed));
	+
	+struct pci_vtcon_console_resize {
	+ uint16_t cols;
	+ uint16_t rows;
	+} __attribute__((packed));
	+
	+static void pci_vtcon_reset(void *);
	+static void pci_vtcon_notify_rx(void , struct vqueue_info );
	+static void pci_vtcon_notify_tx(void , struct vqueue_info );
	+static int pci_vtcon_cfgread(void , int, int, uint32_t );
	+static int pci_vtcon_cfgwrite(void *, int, int, uint32_t);
	+static void pci_vtcon_neg_features(void *, uint64_t);
	+static void pci_vtcon_sock_accept(int, enum ev_type, void *);
	+static void pci_vtcon_sock_rx(int, enum ev_type, void *);
	+static void pci_vtcon_sock_tx(struct pci_vtcon_port , void , struct iovec *,
	+ int);
	+static void pci_vtcon_control_send(struct pci_vtcon_softc *,
	+ struct pci_vtcon_control , const void , size_t);
	+static void pci_vtcon_announce_port(struct pci_vtcon_port *);
	+static void pci_vtcon_open_port(struct pci_vtcon_port *, bool);
	+
	+static struct virtio_consts vtcon_vi_consts = {
	+ "vtcon", /* our name */
	+ VTCON_MAXQ, /* we support VTCON_MAXQ virtqueues */
	+ sizeof(struct pci_vtcon_config), /* config reg size */
	+ pci_vtcon_reset, /* reset */
	+ NULL, /* device-wide qnotify */
	+ pci_vtcon_cfgread, /* read virtio config */
	+ pci_vtcon_cfgwrite, /* write virtio config */
	+ pci_vtcon_neg_features, /* apply negotiated features */
	+ VTCON_S_HOSTCAPS, /* our capabilities */
	+};
	+
	+
	+static void
	+pci_vtcon_reset(void *vsc)
	+{
	+ struct pci_vtcon_softc *sc;
	+
	+ sc = vsc;
	+
	+ DPRINTF(("vtcon: device reset requested!"));
	+ vi_reset_dev(&sc->vsc_vs);
	+}
	+
	+static void
	+pci_vtcon_neg_features(void *vsc, uint64_t negotiated_features)
	+{
	+ struct pci_vtcon_softc *sc = vsc;
	+
	+ sc->vsc_features = negotiated_features;
	+}
	+
	+static int
	+pci_vtcon_cfgread(void vsc, int offset, int size, uint32_t retval)
	+{
	+ struct pci_vtcon_softc *sc = vsc;
	+ void *ptr;
	+
	+ ptr = (uint8_t *)sc->vsc_config + offset;
	+ memcpy(retval, ptr, size);
	+ return (0);
	+}
	+
	+static int
	+pci_vtcon_cfgwrite(void *vsc, int offset, int size, uint32_t val)
	+{
	+
	+ return (0);
	+}
	+
	+static inline struct pci_vtcon_port *
	+pci_vtcon_vq_to_port(struct pci_vtcon_softc sc, struct vqueue_info vq)
	+{
	+ uint16_t num = vq->vq_num;
	+
	+ if (num == 0 \|\| num == 1)
	+ return (&sc->vsc_ports[0]);
	+
	+ if (num == 2 \|\| num == 3)
	+ return (&sc->vsc_control_port);
	+
	+ return (&sc->vsc_ports[(num / 2) - 1]);
	+}
	+
	+static inline struct vqueue_info *
	+pci_vtcon_port_to_vq(struct pci_vtcon_port *port, bool tx_queue)
	+{
	+ int qnum;
	+
	+ qnum = tx_queue ? port->vsp_txq : port->vsp_rxq;
	+ return (&port->vsp_sc->vsc_queues[qnum]);
	+}
	+
	+static struct pci_vtcon_port *
	+pci_vtcon_port_add(struct pci_vtcon_softc sc, const char name,
	+ pci_vtcon_cb_t cb, void arg)
	+{
	+ struct pci_vtcon_port *port;
	+
	+ if (sc->vsc_nports == VTCON_MAXPORTS) {
	+ errno = EBUSY;
	+ return (NULL);
	+ }
	+
	+ port = &sc->vsc_ports[sc->vsc_nports++];
	+ port->vsp_id = sc->vsc_nports - 1;
	+ port->vsp_sc = sc;
	+ port->vsp_name = name;
	+ port->vsp_cb = cb;
	+ port->vsp_arg = arg;
	+
	+ if (port->vsp_id == 0) {
	+ /* port0 */
	+ port->vsp_txq = 0;
	+ port->vsp_rxq = 1;
	+ } else {
	+ port->vsp_txq = sc->vsc_nports * 2;
	+ port->vsp_rxq = port->vsp_txq + 1;
	+ }
	+
	+ port->vsp_enabled = true;
	+ return (port);
	+}
	+
	+static int
	+pci_vtcon_sock_add(struct pci_vtcon_softc sc, const char name,
	+ const char *path)
	+{
	+ struct pci_vtcon_sock *sock;
	+ struct sockaddr_un sun;
	+ char *pathcopy;
	+ int s = -1, fd = -1, error = 0;
	+#ifndef WITHOUT_CAPSICUM
	+ cap_rights_t rights;
	+#endif
	+
	+ sock = calloc(1, sizeof(struct pci_vtcon_sock));
	+ if (sock == NULL) {
	+ error = -1;
	+ goto out;
	+ }
	+
	+ s = socket(AF_UNIX, SOCK_STREAM, 0);
	+ if (s < 0) {
	+ error = -1;
	+ goto out;
	+ }
	+
	+ pathcopy = strdup(path);
	+ if (pathcopy == NULL) {
	+ error = -1;
	+ goto out;
	+ }
	+
	+ fd = open(dirname(pathcopy), O_RDONLY \| O_DIRECTORY);
	+ if (fd < 0) {
	+ free(pathcopy);
	+ error = -1;
	+ goto out;
	+ }
	+
	+ sun.sun_family = AF_UNIX;
	+ sun.sun_len = sizeof(struct sockaddr_un);
	+ strcpy(pathcopy, path);
	+ strlcpy(sun.sun_path, basename(pathcopy), sizeof(sun.sun_path));
	+ free(pathcopy);
	+
	+ if (bindat(fd, s, (struct sockaddr *)&sun, sun.sun_len) < 0) {
	+ error = -1;
	+ goto out;
	+ }
	+
	+ if (fcntl(s, F_SETFL, O_NONBLOCK) < 0) {
	+ error = -1;
	+ goto out;
	+ }
	+
	+ if (listen(s, 1) < 0) {
	+ error = -1;
	+ goto out;
	+ }
	+
	+#ifndef WITHOUT_CAPSICUM
	+ cap_rights_init(&rights, CAP_ACCEPT, CAP_EVENT, CAP_READ, CAP_WRITE);
	+ if (caph_rights_limit(s, &rights) == -1)
	+ errx(EX_OSERR, "Unable to apply rights for sandbox");
	+#endif
	+
	+ sock->vss_port = pci_vtcon_port_add(sc, name, pci_vtcon_sock_tx, sock);
	+ if (sock->vss_port == NULL) {
	+ error = -1;
	+ goto out;
	+ }
	+
	+ sock->vss_open = false;
	+ sock->vss_conn_fd = -1;
	+ sock->vss_server_fd = s;
	+ sock->vss_server_evp = mevent_add(s, EVF_READ, pci_vtcon_sock_accept,
	+ sock);
	+
	+ if (sock->vss_server_evp == NULL) {
	+ error = -1;
	+ goto out;
	+ }
	+
	+out:
	+ if (fd != -1)
	+ close(fd);
	+
	+ if (error != 0) {
	+ if (s != -1)
	+ close(s);
	+ free(sock);
	+ }
	+
	+ return (error);
	+}
	+
	+static void
	+pci_vtcon_sock_accept(int fd __unused, enum ev_type t __unused, void *arg)
	+{
	+ struct pci_vtcon_sock sock = (struct pci_vtcon_sock )arg;
	+ int s;
	+
	+ s = accept(sock->vss_server_fd, NULL, NULL);
	+ if (s < 0)
	+ return;
	+
	+ if (sock->vss_open) {
	+ close(s);
	+ return;
	+ }
	+
	+ sock->vss_open = true;
	+ sock->vss_conn_fd = s;
	+ sock->vss_conn_evp = mevent_add(s, EVF_READ, pci_vtcon_sock_rx, sock);
	+
	+ pci_vtcon_open_port(sock->vss_port, true);
	+}
	+
	+static void
	+pci_vtcon_sock_rx(int fd __unused, enum ev_type t __unused, void *arg)
	+{
	+ struct pci_vtcon_port *port;
	+ struct pci_vtcon_sock sock = (struct pci_vtcon_sock )arg;
	+ struct vqueue_info *vq;
	+ struct iovec iov;
	+ static char dummybuf[2048];
	+ int len, n;
	+ uint16_t idx;
	+
	+ port = sock->vss_port;
	+ vq = pci_vtcon_port_to_vq(port, true);
	+
	+ if (!sock->vss_open \|\| !port->vsp_rx_ready) {
	+ len = read(sock->vss_conn_fd, dummybuf, sizeof(dummybuf));
	+ if (len == 0)
	+ goto close;
	+
	+ return;
	+ }
	+
	+ if (!vq_has_descs(vq)) {
	+ len = read(sock->vss_conn_fd, dummybuf, sizeof(dummybuf));
	+ vq_endchains(vq, 1);
	+ if (len == 0)
	+ goto close;
	+
	+ return;
	+ }
	+
	+ do {
	+ n = vq_getchain(vq, &idx, &iov, 1, NULL);
	+ len = readv(sock->vss_conn_fd, &iov, n);
	+
	+ if (len == 0 \|\| (len < 0 && errno == EWOULDBLOCK)) {
	+ vq_retchains(vq, 1);
	+ vq_endchains(vq, 0);
	+ if (len == 0)
	+ goto close;
	+
	+ return;
	+ }
	+
	+ vq_relchain(vq, idx, len);
	+ } while (vq_has_descs(vq));
	+
	+ vq_endchains(vq, 1);
	+
	+close:
	+ mevent_delete_close(sock->vss_conn_evp);
	+ sock->vss_conn_fd = -1;
	+ sock->vss_open = false;
	+}
	+
	+static void
	+pci_vtcon_sock_tx(struct pci_vtcon_port port, void arg, struct iovec *iov,
	+ int niov)
	+{
	+ struct pci_vtcon_sock *sock;
	+ int i, ret;
	+
	+ sock = (struct pci_vtcon_sock *)arg;
	+
	+ if (sock->vss_conn_fd == -1)
	+ return;
	+
	+ for (i = 0; i < niov; i++) {
	+ ret = stream_write(sock->vss_conn_fd, iov[i].iov_base,
	+ iov[i].iov_len);
	+ if (ret <= 0)
	+ break;
	+ }
	+
	+ if (ret <= 0) {
	+ mevent_delete_close(sock->vss_conn_evp);
	+ sock->vss_conn_fd = -1;
	+ sock->vss_open = false;
	+ }
	+}
	+
	+static void
	+pci_vtcon_control_tx(struct pci_vtcon_port port, void arg, struct iovec *iov,
	+ int niov)
	+{
	+ struct pci_vtcon_softc *sc;
	+ struct pci_vtcon_port *tmp;
	+ struct pci_vtcon_control resp, *ctrl;
	+ int i;
	+
	+ assert(niov == 1);
	+
	+ sc = port->vsp_sc;
	+ ctrl = (struct pci_vtcon_control *)iov->iov_base;
	+
	+ switch (ctrl->event) {
	+ case VTCON_DEVICE_READY:
	+ sc->vsc_ready = true;
	+ /* set port ready events for registered ports */
	+ for (i = 0; i < VTCON_MAXPORTS; i++) {
	+ tmp = &sc->vsc_ports[i];
	+ if (tmp->vsp_enabled)
	+ pci_vtcon_announce_port(tmp);
	+
	+ if (tmp->vsp_open)
	+ pci_vtcon_open_port(tmp, true);
	+ }
	+ break;
	+
	+ case VTCON_PORT_READY:
	+ if (ctrl->id >= sc->vsc_nports) {
	+ WPRINTF(("VTCON_PORT_READY event for unknown port %d",
	+ ctrl->id));
	+ return;
	+ }
	+
	+ tmp = &sc->vsc_ports[ctrl->id];
	+ if (tmp->vsp_console) {
	+ resp.event = VTCON_CONSOLE_PORT;
	+ resp.id = ctrl->id;
	+ resp.value = 1;
	+ pci_vtcon_control_send(sc, &resp, NULL, 0);
	+ }
	+ break;
	+ }
	+}
	+
	+static void
	+pci_vtcon_announce_port(struct pci_vtcon_port *port)
	+{
	+ struct pci_vtcon_control event;
	+
	+ event.id = port->vsp_id;
	+ event.event = VTCON_DEVICE_ADD;
	+ event.value = 1;
	+ pci_vtcon_control_send(port->vsp_sc, &event, NULL, 0);
	+
	+ event.event = VTCON_PORT_NAME;
	+ pci_vtcon_control_send(port->vsp_sc, &event, port->vsp_name,
	+ strlen(port->vsp_name));
	+}
	+
	+static void
	+pci_vtcon_open_port(struct pci_vtcon_port *port, bool open)
	+{
	+ struct pci_vtcon_control event;
	+
	+ if (!port->vsp_sc->vsc_ready) {
	+ port->vsp_open = true;
	+ return;
	+ }
	+
	+ event.id = port->vsp_id;
	+ event.event = VTCON_PORT_OPEN;
	+ event.value = (int)open;
	+ pci_vtcon_control_send(port->vsp_sc, &event, NULL, 0);
	+}
	+
	+static void
	+pci_vtcon_control_send(struct pci_vtcon_softc *sc,
	+ struct pci_vtcon_control ctrl, const void payload, size_t len)
	+{
	+ struct vqueue_info *vq;
	+ struct iovec iov;
	+ uint16_t idx;
	+ int n;
	+
	+ vq = pci_vtcon_port_to_vq(&sc->vsc_control_port, true);
	+
	+ if (!vq_has_descs(vq))
	+ return;
	+
	+ n = vq_getchain(vq, &idx, &iov, 1, NULL);
	+
	+ assert(n == 1);
	+
	+ memcpy(iov.iov_base, ctrl, sizeof(struct pci_vtcon_control));
	+ if (payload != NULL && len > 0)
	+ memcpy(iov.iov_base + sizeof(struct pci_vtcon_control),
	+ payload, len);
	+
	+ vq_relchain(vq, idx, sizeof(struct pci_vtcon_control) + len);
	+ vq_endchains(vq, 1);
	+}
	+
	+static void
	+pci_vtcon_notify_tx(void vsc, struct vqueue_info vq)
	+{
	+ struct pci_vtcon_softc *sc;
	+ struct pci_vtcon_port *port;
	+ struct iovec iov[1];
	+ uint16_t idx, n;
	+ uint16_t flags[8];
	+
	+ sc = vsc;
	+ port = pci_vtcon_vq_to_port(sc, vq);
	+
	+ while (vq_has_descs(vq)) {
	+ n = vq_getchain(vq, &idx, iov, 1, flags);
	+ assert(n >= 1);
	+ if (port != NULL)
	+ port->vsp_cb(port, port->vsp_arg, iov, 1);
	+
	+ /*
	+ * Release this chain and handle more
	+ */
	+ vq_relchain(vq, idx, 0);
	+ }
	+ vq_endchains(vq, 1); /* Generate interrupt if appropriate. */
	+}
	+
	+static void
	+pci_vtcon_notify_rx(void vsc, struct vqueue_info vq)
	+{
	+ struct pci_vtcon_softc *sc;
	+ struct pci_vtcon_port *port;
	+
	+ sc = vsc;
	+ port = pci_vtcon_vq_to_port(sc, vq);
	+
	+ if (!port->vsp_rx_ready) {
	+ port->vsp_rx_ready = 1;
	+ vq_kick_disable(vq);
	+ }
	+}
	+
	+static int
	+pci_vtcon_init(struct vmctx ctx, struct mmio_devinst pi, char *opts)
	+{
	+ struct pci_vtcon_softc *sc;
	+ char *portname = NULL;
	+ char *portpath = NULL;
	+ char *opt;
	+ int i;
	+
	+ sc = calloc(1, sizeof(struct pci_vtcon_softc));
	+ sc->vsc_config = calloc(1, sizeof(struct pci_vtcon_config));
	+ sc->vsc_config->max_nr_ports = VTCON_MAXPORTS;
	+ sc->vsc_config->cols = 80;
	+ sc->vsc_config->rows = 25;
	+
	+ vi_softc_linkup(&sc->vsc_vs, &vtcon_vi_consts, sc, pi, sc->vsc_queues);
	+ sc->vsc_vs.vs_mtx = &sc->vsc_mtx;
	+
	+ for (i = 0; i < VTCON_MAXQ; i++) {
	+ sc->vsc_queues[i].vq_qsize = VTCON_RINGSZ;
	+ sc->vsc_queues[i].vq_notify = i % 2 == 0
	+ ? pci_vtcon_notify_rx
	+ : pci_vtcon_notify_tx;
	+ }
	+
	+ /* initialize config space */
	+ vi_devemu_init(pi, VIRTIO_TYPE_CONSOLE);
	+
	+ if (vi_intr_init(&sc->vsc_vs, 1, fbsdrun_virtio_msix()))
	+ return (1);
	+ vi_set_io_res(&sc->vsc_vs, 0);
	+
	+ /* create control port */
	+ sc->vsc_control_port.vsp_sc = sc;
	+ sc->vsc_control_port.vsp_txq = 2;
	+ sc->vsc_control_port.vsp_rxq = 3;
	+ sc->vsc_control_port.vsp_cb = pci_vtcon_control_tx;
	+ sc->vsc_control_port.vsp_enabled = true;
	+
	+ while ((opt = strsep(&opts, ",")) != NULL) {
	+ portname = strsep(&opt, "=");
	+ portpath = opt;
	+
	+ /* create port */
	+ if (pci_vtcon_sock_add(sc, portname, portpath) < 0) {
	+ EPRINTLN("cannot create port %s: %s",
	+ portname, strerror(errno));
	+ return (1);
	+ }
	+ }
	+
	+ return (0);
	+}
	+
	+struct mmio_devemu pci_de_vcon = {
	+ .de_emu = "virtio-console",
	+ .de_init = pci_vtcon_init,
	+ .de_write = vi_mmio_write,
	+ .de_read = vi_mmio_read
	+};
	+MMIO_EMUL_SET(pci_de_vcon);
	Index: usr.sbin/bhyve/mmio/mmio_virtio_net.c
	===================================================================
	--- /dev/null
	+++ usr.sbin/bhyve/mmio/mmio_virtio_net.c
	@@ -0,0 +1,697 @@
	+/*-
	+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
	+ *
	+ * Copyright (c) 2011 NetApp, Inc.
	+ * All rights reserved.
	+ *
	+ * Redistribution and use in source and binary forms, with or without
	+ * modification, are permitted provided that the following conditions
	+ * are met:
	+ * 1. Redistributions of source code must retain the above copyright
	+ * notice, this list of conditions and the following disclaimer.
	+ * 2. Redistributions in binary form must reproduce the above copyright
	+ * notice, this list of conditions and the following disclaimer in the
	+ * documentation and/or other materials provided with the distribution.
	+ *
	+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
	+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	+ * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
	+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	+ * SUCH DAMAGE.
	+ *
	+ * $FreeBSD$
	+ */
	+
	+#include <sys/cdefs.h>
	+__FBSDID("$FreeBSD$");
	+
	+#include <sys/param.h>
	+#include <sys/linker_set.h>
	+#include <sys/select.h>
	+#include <sys/uio.h>
	+#include <sys/ioctl.h>
	+#include <net/ethernet.h>
	+#include <net/if.h> /* IFNAMSIZ */
	+
	+#include <err.h>
	+#include <errno.h>
	+#include <fcntl.h>
	+#include <stdio.h>
	+#include <stdlib.h>
	+#include <stdint.h>
	+#include <string.h>
	+#include <strings.h>
	+#include <unistd.h>
	+#include <assert.h>
	+#include <pthread.h>
	+#include <pthread_np.h>
	+#include <dev/pci/pcireg.h>
	+
	+#include "bhyverun.h"
	+#include "debug.h"
	+
	+#include "mmio_emul.h"
	+#include "mmio_virtio.h"
	+
	+#include "mevent.h"
	+#include "net_utils.h"
	+#include "net_backends.h"
	+#include "iov.h"
	+
	+#define VTNET_RINGSZ 1024
	+
	+#define VTNET_MAXSEGS 256
	+
	+#define VTNET_MAX_PKT_LEN (65536 + 64)
	+
	+#define VTNET_S_HOSTCAPS \
	+ ( VIRTIO_NET_F_MAC \| VIRTIO_NET_F_STATUS \| \
	+ VIRTIO_F_NOTIFY_ON_EMPTY \| VIRTIO_RING_F_INDIRECT_DESC)
	+
	+/*
	+ * PCI config-space "registers"
	+ */
	+struct virtio_net_config {
	+ uint8_t mac[6];
	+ uint16_t status;
	+} __packed;
	+
	+/*
	+ * Queue definitions.
	+ */
	+#define VTNET_RXQ 0
	+#define VTNET_TXQ 1
	+#define VTNET_CTLQ 2 /* NB: not yet supported */
	+
	+#define VTNET_MAXQ 3
	+
	+/*
	+ * Debug printf
	+ */
	+static int pci_vtnet_debug;
	+#define DPRINTF(params) if (pci_vtnet_debug) PRINTLN params
	+#define WPRINTF(params) PRINTLN params
	+
	+/*
	+ * Per-device softc
	+ */
	+struct pci_vtnet_softc {
	+ struct virtio_softc vsc_vs;
	+ struct vqueue_info vsc_queues[VTNET_MAXQ - 1];
	+ pthread_mutex_t vsc_mtx;
	+
	+ net_backend_t *vsc_be;
	+
	+ int resetting; /* protected by tx_mtx */
	+
	+ uint64_t vsc_features; /* negotiated features */
	+
	+ pthread_mutex_t rx_mtx;
	+ int rx_merge; /* merged rx bufs in use */
	+
	+ pthread_t tx_tid;
	+ pthread_mutex_t tx_mtx;
	+ pthread_cond_t tx_cond;
	+ int tx_in_progress;
	+
	+ size_t vhdrlen;
	+ size_t be_vhdrlen;
	+
	+ struct virtio_net_config vsc_config;
	+ struct virtio_consts vsc_consts;
	+};
	+
	+static void pci_vtnet_reset(void *);
	+/* static void pci_vtnet_notify(void , struct vqueue_info ); */
	+static int pci_vtnet_cfgread(void , int, int, uint32_t );
	+static int pci_vtnet_cfgwrite(void *, int, int, uint32_t);
	+static void pci_vtnet_neg_features(void *, uint64_t);
	+
	+static struct virtio_consts vtnet_vi_consts = {
	+ "vtnet", /* our name */
	+ VTNET_MAXQ - 1, /* we currently support 2 virtqueues */
	+ sizeof(struct virtio_net_config), /* config reg size */
	+ pci_vtnet_reset, /* reset */
	+ NULL, /* device-wide qnotify -- not used */
	+ pci_vtnet_cfgread, /* read PCI config */
	+ pci_vtnet_cfgwrite, /* write PCI config */
	+ pci_vtnet_neg_features, /* apply negotiated features */
	+ VTNET_S_HOSTCAPS, /* our capabilities */
	+};
	+
	+static void
	+pci_vtnet_reset(void *vsc)
	+{
	+ struct pci_vtnet_softc *sc = vsc;
	+
	+ DPRINTF(("vtnet: device reset requested !"));
	+
	+ /* Acquire the RX lock to block RX processing. */
	+ pthread_mutex_lock(&sc->rx_mtx);
	+
	+ /*
	+ * Make sure receive operation is disabled at least until we
	+ * re-negotiate the features, since receive operation depends
	+ * on the value of sc->rx_merge and the header length, which
	+ * are both set in pci_vtnet_neg_features().
	+ * Receive operation will be enabled again once the guest adds
	+ * the first receive buffers and kicks us.
	+ */
	+ netbe_rx_disable(sc->vsc_be);
	+
	+ /* Set sc->resetting and give a chance to the TX thread to stop. */
	+ pthread_mutex_lock(&sc->tx_mtx);
	+ sc->resetting = 1;
	+ while (sc->tx_in_progress) {
	+ pthread_mutex_unlock(&sc->tx_mtx);
	+ usleep(10000);
	+ pthread_mutex_lock(&sc->tx_mtx);
	+ }
	+
	+ /*
	+ * Now reset rings, MSI-X vectors, and negotiated capabilities.
	+ * Do that with the TX lock held, since we need to reset
	+ * sc->resetting.
	+ */
	+ vi_reset_dev(&sc->vsc_vs);
	+
	+ sc->resetting = 0;
	+ pthread_mutex_unlock(&sc->tx_mtx);
	+ pthread_mutex_unlock(&sc->rx_mtx);
	+}
	+
	+static __inline struct iovec *
	+iov_trim_hdr(struct iovec iov, int iovcnt, unsigned int hlen)
	+{
	+ struct iovec *riov;
	+
	+ if (iov[0].iov_len < hlen) {
	+ /*
	+ * Not enough header space in the first fragment.
	+ * That's not ok for us.
	+ */
	+ return NULL;
	+ }
	+
	+ iov[0].iov_len -= hlen;
	+ if (iov[0].iov_len == 0) {
	+ *iovcnt -= 1;
	+ if (*iovcnt == 0) {
	+ /*
	+ * Only space for the header. That's not
	+ * enough for us.
	+ */
	+ return NULL;
	+ }
	+ riov = &iov[1];
	+ } else {
	+ iov[0].iov_base = (void *)((uintptr_t)iov[0].iov_base + hlen);
	+ riov = &iov[0];
	+ }
	+
	+ return (riov);
	+}
	+
	+struct virtio_mrg_rxbuf_info {
	+ uint16_t idx;
	+ uint16_t pad;
	+ uint32_t len;
	+};
	+
	+static void
	+pci_vtnet_rx(struct pci_vtnet_softc *sc)
	+{
	+ int prepend_hdr_len = sc->vhdrlen - sc->be_vhdrlen;
	+ struct virtio_mrg_rxbuf_info info[VTNET_MAXSEGS];
	+ struct iovec iov[VTNET_MAXSEGS + 1];
	+ struct vqueue_info *vq;
	+
	+
	+
	+ vq = &sc->vsc_queues[VTNET_RXQ];
	+ for (;;) {
	+ struct virtio_net_rxhdr *hdr;
	+ uint32_t riov_bytes;
	+ struct iovec *riov;
	+ uint32_t ulen;
	+ int riov_len;
	+ int n_chains;
	+ ssize_t rlen;
	+ ssize_t plen;
	+
	+ plen = netbe_peek_recvlen(sc->vsc_be);
	+ if (plen <= 0) {
	+ /*
	+ * No more packets (plen == 0), or backend errored
	+ * (plen < 0). Interrupt if needed and stop.
	+ */
	+ vq_endchains(vq, /used_all_avail=/0);
	+ return;
	+ }
	+ plen += prepend_hdr_len;
	+
	+ /*
	+ * Get a descriptor chain to store the next ingress
	+ * packet. In case of mergeable rx buffers, get as
	+ * many chains as necessary in order to make room
	+ * for a maximum sized LRO packet.
	+ */
	+ riov_bytes = 0;
	+ riov_len = 0;
	+ riov = iov;
	+ n_chains = 0;
	+ do {
	+ int n = vq_getchain(vq, &info[n_chains].idx, riov,
	+ VTNET_MAXSEGS - riov_len, NULL);
	+
	+ if (n == 0) {
	+ /*
	+ * No rx buffers. Enable RX kicks and double
	+ * check.
	+ */
	+ vq_kick_enable(vq);
	+ if (!vq_has_descs(vq)) {
	+ /*
	+ * Still no buffers. Return the unused
	+ * chains (if any), interrupt if needed
	+ * (including for NOTIFY_ON_EMPTY), and
	+ * disable the backend until the next
	+ * kick.
	+ */
	+ vq_retchains(vq, n_chains);
	+ vq_endchains(vq, /used_all_avail=/1);
	+ netbe_rx_disable(sc->vsc_be);
	+ return;
	+ }
	+
	+ /* More rx buffers found, so keep going. */
	+ vq_kick_disable(vq);
	+ continue;
	+ }
	+ assert(n >= 1 && riov_len + n <= VTNET_MAXSEGS);
	+ riov_len += n;
	+ if (!sc->rx_merge) {
	+ n_chains = 1;
	+ break;
	+ }
	+ info[n_chains].len = (uint32_t)count_iov(riov, n);
	+ riov_bytes += info[n_chains].len;
	+ riov += n;
	+ n_chains++;
	+ } while (riov_bytes < plen && riov_len < VTNET_MAXSEGS);
	+
	+ riov = iov;
	+ hdr = riov[0].iov_base;
	+ if (prepend_hdr_len > 0) {
	+ /*
	+ * The frontend uses a virtio-net header, but the
	+ * backend does not. We need to prepend a zeroed
	+ * header.
	+ */
	+ riov = iov_trim_hdr(riov, &riov_len, prepend_hdr_len);
	+ if (riov == NULL) {
	+ /*
	+ * The first collected chain is nonsensical,
	+ * as it is not even enough to store the
	+ * virtio-net header. Just drop it.
	+ */
	+ vq_relchain(vq, info[0].idx, 0);
	+ vq_retchains(vq, n_chains - 1);
	+ continue;
	+ }
	+ memset(hdr, 0, prepend_hdr_len);
	+ }
	+
	+ rlen = netbe_recv(sc->vsc_be, riov, riov_len);
	+
	+ if (rlen != plen - prepend_hdr_len) {
	+ /*
	+ * No more packets (len == 0), or backend errored
	+ * (err < 0). Return unused available buffers
	+ * and stop.
	+ */
	+ vq_retchains(vq, n_chains);
	+ /* Interrupt if needed/appropriate and stop. */
	+ vq_endchains(vq, /used_all_avail=/0);
	+ return;
	+ }
	+
	+ ulen = (uint32_t)plen; /* avoid too many casts below */
	+
	+ /* Publish the used buffers to the guest. */
	+ if (!sc->rx_merge) {
	+ vq_relchain(vq, info[0].idx, ulen);
	+ } else {
	+ uint32_t iolen;
	+ int i = 0;
	+
	+ do {
	+ iolen = info[i].len;
	+ if (iolen > ulen) {
	+ iolen = ulen;
	+ }
	+ vq_relchain(vq, info[i].idx, iolen);
	+ ulen -= iolen;
	+ i++;
	+ } while (ulen > 0);
	+
	+ hdr->vrh_bufs = i;
	+ // TODO add publish for arm64
	+ //vq_relchain_publish(vq);
	+ vq_retchains(vq, n_chains - i);
	+ }
	+ }
	+
	+}
	+
	+/*
	+ * Called when there is read activity on the backend file descriptor.
	+ * Each buffer posted by the guest is assumed to be able to contain
	+ * an entire ethernet frame + rx header.
	+ */
	+static void
	+pci_vtnet_rx_callback(int fd, enum ev_type type, void *param)
	+{
	+ struct pci_vtnet_softc *sc = param;
	+
	+ pthread_mutex_lock(&sc->rx_mtx);
	+ pci_vtnet_rx(sc);
	+ pthread_mutex_unlock(&sc->rx_mtx);
	+
	+}
	+
	+/* Called on RX kick. */
	+static void
	+pci_vtnet_ping_rxq(void vsc, struct vqueue_info vq)
	+{
	+ struct pci_vtnet_softc *sc = vsc;
	+
	+ /*
	+ * A qnotify means that the rx process can now begin.
	+ */
	+ pthread_mutex_lock(&sc->rx_mtx);
	+ vq_kick_disable(vq);
	+ netbe_rx_enable(sc->vsc_be);
	+ pthread_mutex_unlock(&sc->rx_mtx);
	+}
	+
	+/* TX virtqueue processing, called by the TX thread. */
	+static void
	+pci_vtnet_proctx(struct pci_vtnet_softc sc, struct vqueue_info vq)
	+{
	+ struct iovec iov[VTNET_MAXSEGS + 1];
	+ struct iovec *siov = iov;
	+ uint16_t idx;
	+ ssize_t len;
	+ int n;
	+
	+ /*
	+ * Obtain chain of descriptors. The first descriptor also
	+ * contains the virtio-net header.
	+ */
	+ n = vq_getchain(vq, &idx, iov, VTNET_MAXSEGS, NULL);
	+ assert(n >= 1 && n <= VTNET_MAXSEGS);
	+
	+ if (sc->vhdrlen != sc->be_vhdrlen) {
	+ /*
	+ * The frontend uses a virtio-net header, but the backend
	+ * does not. We simply strip the header and ignore it, as
	+ * it should be zero-filled.
	+ */
	+ siov = iov_trim_hdr(siov, &n, sc->vhdrlen);
	+ }
	+
	+ if (siov == NULL) {
	+ /* The chain is nonsensical. Just drop it. */
	+ len = 0;
	+ } else {
	+ len = netbe_send(sc->vsc_be, siov, n);
	+ if (len < 0) {
	+ /*
	+ * If send failed, report that 0 bytes
	+ * were read.
	+ */
	+ len = 0;
	+ }
	+ }
	+
	+ /*
	+ * Return the processed chain to the guest, reporting
	+ * the number of bytes that we read.
	+ */
	+ vq_relchain(vq, idx, len > 0 ? len : 0);
	+}
	+
	+/* Called on TX kick. */
	+static void
	+pci_vtnet_ping_txq(void vsc, struct vqueue_info vq)
	+{
	+ struct pci_vtnet_softc *sc = vsc;
	+
	+ /*
	+ * Any ring entries to process?
	+ */
	+ if (!vq_has_descs(vq))
	+ return;
	+
	+ /* Signal the tx thread for processing */
	+ pthread_mutex_lock(&sc->tx_mtx);
	+ vq_kick_disable(vq);
	+ if (sc->tx_in_progress == 0)
	+ pthread_cond_signal(&sc->tx_cond);
	+ pthread_mutex_unlock(&sc->tx_mtx);
	+}
	+
	+/*
	+ * Thread which will handle processing of TX desc
	+ */
	+static void *
	+pci_vtnet_tx_thread(void *param)
	+{
	+ struct pci_vtnet_softc *sc = param;
	+ struct vqueue_info *vq;
	+ int error;
	+
	+ vq = &sc->vsc_queues[VTNET_TXQ];
	+
	+ /*
	+ * Let us wait till the tx queue pointers get initialised &
	+ * first tx signaled
	+ */
	+ pthread_mutex_lock(&sc->tx_mtx);
	+ error = pthread_cond_wait(&sc->tx_cond, &sc->tx_mtx);
	+ assert(error == 0);
	+
	+ for (;;) {
	+ /* note - tx mutex is locked here */
	+ while (sc->resetting \|\| !vq_has_descs(vq)) {
	+ vq_kick_enable(vq);
	+ if (!sc->resetting && vq_has_descs(vq))
	+ break;
	+
	+ sc->tx_in_progress = 0;
	+ error = pthread_cond_wait(&sc->tx_cond, &sc->tx_mtx);
	+ assert(error == 0);
	+ }
	+ vq_kick_disable(vq);
	+ sc->tx_in_progress = 1;
	+ pthread_mutex_unlock(&sc->tx_mtx);
	+
	+ do {
	+ /*
	+ * Run through entries, placing them into
	+ * iovecs and sending when an end-of-packet
	+ * is found
	+ */
	+ pci_vtnet_proctx(sc, vq);
	+ } while (vq_has_descs(vq));
	+
	+ /*
	+ * Generate an interrupt if needed.
	+ */
	+ vq_endchains(vq, /used_all_avail=/1);
	+
	+ pthread_mutex_lock(&sc->tx_mtx);
	+ }
	+}
	+
	+#ifdef notyet
	+static void
	+pci_vtnet_ping_ctlq(void vsc, struct vqueue_info vq)
	+{
	+
	+ DPRINTF(("vtnet: control qnotify!"));
	+}
	+#endif
	+
	+static int
	+pci_vtnet_init(struct vmctx ctx, struct mmio_devinst pi, char *opts)
	+{
	+ struct pci_vtnet_softc *sc;
	+ char tname[MAXCOMLEN + 1];
	+ int mac_provided;
	+
	+ /*
	+ * Allocate data structures for further virtio initializations.
	+ * sc also contains a copy of vtnet_vi_consts, since capabilities
	+ * change depending on the backend.
	+ */
	+ sc = calloc(1, sizeof(struct pci_vtnet_softc));
	+
	+ sc->vsc_consts = vtnet_vi_consts;
	+ pthread_mutex_init(&sc->vsc_mtx, NULL);
	+
	+ sc->vsc_queues[VTNET_RXQ].vq_qsize = VTNET_RINGSZ;
	+ sc->vsc_queues[VTNET_RXQ].vq_notify = pci_vtnet_ping_rxq;
	+ sc->vsc_queues[VTNET_TXQ].vq_qsize = VTNET_RINGSZ;
	+ sc->vsc_queues[VTNET_TXQ].vq_notify = pci_vtnet_ping_txq;
	+#ifdef notyet
	+ sc->vsc_queues[VTNET_CTLQ].vq_qsize = VTNET_RINGSZ;
	+ sc->vsc_queues[VTNET_CTLQ].vq_notify = pci_vtnet_ping_ctlq;
	+#endif
	+
	+ /*
	+ * Attempt to open the backend device and read the MAC address
	+ * if specified.
	+ */
	+ mac_provided = 0;
	+ if (opts != NULL) {
	+ char *devname;
	+ char *vtopts;
	+ int err;
	+
	+ devname = vtopts = strdup(opts);
	+ (void) strsep(&vtopts, ",");
	+
	+ if (vtopts != NULL) {
	+ err = net_parsemac(vtopts, sc->vsc_config.mac);
	+ if (err != 0) {
	+ free(devname);
	+ free(sc);
	+ return (err);
	+ }
	+ mac_provided = 1;
	+ }
	+
	+ err = netbe_init(&sc->vsc_be, devname, pci_vtnet_rx_callback,
	+ sc);
	+ free(devname);
	+ if (err) {
	+ free(sc);
	+ return (err);
	+ }
	+ sc->vsc_consts.vc_hv_caps \|= netbe_get_cap(sc->vsc_be);
	+ }
	+
	+ if (!mac_provided) {
	+ net_genmac(pi, sc->vsc_config.mac);
	+ }
	+
	+ /* initialize config space */
	+ vi_devemu_init(pi, VIRTIO_TYPE_NET);
	+
	+ /* Link is up if we managed to open backend device. */
	+ sc->vsc_config.status = (opts == NULL \|\| sc->vsc_be);
	+
	+ vi_softc_linkup(&sc->vsc_vs, &sc->vsc_consts, sc, pi, sc->vsc_queues);
	+ sc->vsc_vs.vs_mtx = &sc->vsc_mtx;
	+
	+ /* use BAR 1 to map MSI-X table and PBA, if we're using MSI-X */
	+ if (vi_intr_init(&sc->vsc_vs, 1, fbsdrun_virtio_msix())) {
	+ free(sc);
	+ return (1);
	+ }
	+
	+ /* use BAR 0 to map config regs in IO space */
	+ vi_set_io_res(&sc->vsc_vs, 0);
	+
	+ sc->resetting = 0;
	+
	+ sc->rx_merge = 0;
	+ pthread_mutex_init(&sc->rx_mtx, NULL);
	+
	+ /*
	+ * Initialize tx semaphore & spawn TX processing thread.
	+ * As of now, only one thread for TX desc processing is
	+ * spawned.
	+ */
	+ sc->tx_in_progress = 0;
	+ pthread_mutex_init(&sc->tx_mtx, NULL);
	+ pthread_cond_init(&sc->tx_cond, NULL);
	+ pthread_create(&sc->tx_tid, NULL, pci_vtnet_tx_thread, (void *)sc);
	+ snprintf(tname, sizeof(tname), "vtnet-%d:%d tx", pi->pi_slot,
	+ pi->di_func);
	+ pthread_set_name_np(sc->tx_tid, tname);
	+
	+ return (0);
	+}
	+
	+static int
	+pci_vtnet_cfgwrite(void *vsc, int offset, int size, uint32_t value)
	+{
	+ struct pci_vtnet_softc *sc = vsc;
	+ void *ptr;
	+
	+ if (offset < (int)sizeof(sc->vsc_config.mac)) {
	+ assert(offset + size <= (int)sizeof(sc->vsc_config.mac));
	+ /*
	+ * The driver is allowed to change the MAC address
	+ */
	+ ptr = &sc->vsc_config.mac[offset];
	+ memcpy(ptr, &value, size);
	+ } else {
	+ /* silently ignore other writes */
	+ DPRINTF(("vtnet: write to readonly reg %d", offset));
	+ }
	+
	+ return (0);
	+}
	+
	+static int
	+pci_vtnet_cfgread(void vsc, int offset, int size, uint32_t retval)
	+{
	+ struct pci_vtnet_softc *sc = vsc;
	+ void *ptr;
	+
	+ ptr = (uint8_t *)&sc->vsc_config + offset;
	+ memcpy(retval, ptr, size);
	+ return (0);
	+}
	+
	+static void
	+pci_vtnet_neg_features(void *vsc, uint64_t negotiated_features)
	+{
	+ struct pci_vtnet_softc *sc = vsc;
	+
	+ sc->vsc_features = negotiated_features;
	+
	+ if (negotiated_features & VIRTIO_NET_F_MRG_RXBUF) {
	+ sc->vhdrlen = sizeof(struct virtio_net_rxhdr);
	+ sc->rx_merge = 1;
	+ } else {
	+ /*
	+ * Without mergeable rx buffers, virtio-net header is 2
	+ * bytes shorter than sizeof(struct virtio_net_rxhdr).
	+ */
	+ sc->vhdrlen = sizeof(struct virtio_net_rxhdr) - 2;
	+ sc->rx_merge = 0;
	+ }
	+
	+ /* Tell the backend to enable some capabilities it has advertised. */
	+ netbe_set_cap(sc->vsc_be, negotiated_features, sc->vhdrlen);
	+ sc->be_vhdrlen = netbe_get_vnet_hdr_len(sc->vsc_be);
	+}
	+
	+static struct mmio_devemu pci_de_vnet = {
	+ .de_emu = "virtio-net",
	+ .de_init = pci_vtnet_init,
	+ .de_write = vi_mmio_write,
	+ .de_read = vi_mmio_read
	+};
	+MMIO_EMUL_SET(pci_de_vnet);
	Index: usr.sbin/bhyve/mmio/mmio_virtio_rnd.c
	===================================================================
	--- /dev/null
	+++ usr.sbin/bhyve/mmio/mmio_virtio_rnd.c
	@@ -0,0 +1,208 @@
	+/*-
	+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
	+ *
	+ * Copyright (c) 2014 Nahanni Systems Inc.
	+ * All rights reserved.
	+ *
	+ * Redistribution and use in source and binary forms, with or without
	+ * modification, are permitted provided that the following conditions
	+ * are met:
	+ * 1. Redistributions of source code must retain the above copyright
	+ * notice, this list of conditions and the following disclaimer
	+ * in this position and unchanged.
	+ * 2. Redistributions in binary form must reproduce the above copyright
	+ * notice, this list of conditions and the following disclaimer in the
	+ * documentation and/or other materials provided with the distribution.
	+ *
	+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
	+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
	+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	+ * SUCH DAMAGE.
	+ */
	+
	+/*
	+ * virtio entropy device emulation.
	+ * Randomness is sourced from /dev/random which does not block
	+ * once it has been seeded at bootup.
	+ */
	+
	+#include <sys/cdefs.h>
	+__FBSDID("$FreeBSD$");
	+
	+#include <sys/param.h>
	+#ifndef WITHOUT_CAPSICUM
	+#include <sys/capsicum.h>
	+#endif
	+#include <sys/linker_set.h>
	+#include <sys/uio.h>
	+
	+#ifndef WITHOUT_CAPSICUM
	+#include <capsicum_helpers.h>
	+#endif
	+#include <err.h>
	+#include <errno.h>
	+#include <fcntl.h>
	+#include <stdio.h>
	+#include <stdlib.h>
	+#include <string.h>
	+#include <unistd.h>
	+#include <assert.h>
	+#include <pthread.h>
	+#include <dev/pci/pcireg.h>
	+#include <sysexits.h>
	+
	+#include "bhyverun.h"
	+#include "debug.h"
	+
	+#include "mmio_emul.h"
	+#include "mmio_virtio.h"
	+
	+#define VTRND_RINGSZ 64
	+
	+
	+static int pci_vtrnd_debug;
	+#define DPRINTF(params) if (pci_vtrnd_debug) PRINTLN params
	+#define WPRINTF(params) PRINTLN params
	+
	+/*
	+ * Per-device softc
	+ */
	+struct pci_vtrnd_softc {
	+ struct virtio_softc vrsc_vs;
	+ struct vqueue_info vrsc_vq;
	+ pthread_mutex_t vrsc_mtx;
	+ uint64_t vrsc_cfg;
	+ int vrsc_fd;
	+};
	+
	+static void pci_vtrnd_reset(void *);
	+static void pci_vtrnd_notify(void , struct vqueue_info );
	+
	+static struct virtio_consts vtrnd_vi_consts = {
	+ "vtrnd", /* our name */
	+ 1, /* we support 1 virtqueue */
	+ 0, /* config reg size */
	+ pci_vtrnd_reset, /* reset */
	+ pci_vtrnd_notify, /* device-wide qnotify */
	+ NULL, /* read virtio config */
	+ NULL, /* write virtio config */
	+ NULL, /* apply negotiated features */
	+ 0, /* our capabilities */
	+};
	+
	+
	+static void
	+pci_vtrnd_reset(void *vsc)
	+{
	+ struct pci_vtrnd_softc *sc;
	+
	+ sc = vsc;
	+
	+ DPRINTF(("vtrnd: device reset requested !"));
	+ vi_reset_dev(&sc->vrsc_vs);
	+}
	+
	+
	+static void
	+pci_vtrnd_notify(void vsc, struct vqueue_info vq)
	+{
	+ struct iovec iov;
	+ struct pci_vtrnd_softc *sc;
	+ int len;
	+ uint16_t idx;
	+
	+ sc = vsc;
	+
	+ if (sc->vrsc_fd < 0) {
	+ vq_endchains(vq, 0);
	+ return;
	+ }
	+
	+ while (vq_has_descs(vq)) {
	+ vq_getchain(vq, &idx, &iov, 1, NULL);
	+
	+ len = read(sc->vrsc_fd, iov.iov_base, iov.iov_len);
	+
	+ DPRINTF(("vtrnd: vtrnd_notify(): %d", len));
	+
	+ /* Catastrophe if unable to read from /dev/random */
	+ assert(len > 0);
	+
	+ /*
	+ * Release this chain and handle more
	+ */
	+ vq_relchain(vq, idx, len);
	+ }
	+ vq_endchains(vq, 1); /* Generate interrupt if appropriate. */
	+}
	+
	+
	+static int
	+pci_vtrnd_init(struct vmctx ctx, struct mmio_devinst pi, char *opts)
	+{
	+ struct pci_vtrnd_softc *sc;
	+ int fd;
	+ int len;
	+ uint8_t v;
	+#ifndef WITHOUT_CAPSICUM
	+ cap_rights_t rights;
	+#endif
	+
	+ /*
	+ * Should always be able to open /dev/random.
	+ */
	+ fd = open("/dev/random", O_RDONLY \| O_NONBLOCK);
	+
	+ assert(fd >= 0);
	+
	+#ifndef WITHOUT_CAPSICUM
	+ cap_rights_init(&rights, CAP_READ);
	+ if (caph_rights_limit(fd, &rights) == -1)
	+ errx(EX_OSERR, "Unable to apply rights for sandbox");
	+#endif
	+
	+ /*
	+ * Check that device is seeded and non-blocking.
	+ */
	+ len = read(fd, &v, sizeof(v));
	+ if (len <= 0) {
	+ WPRINTF(("vtrnd: /dev/random not ready, read(): %d", len));
	+ close(fd);
	+ return (1);
	+ }
	+
	+ sc = calloc(1, sizeof(struct pci_vtrnd_softc));
	+
	+ vi_softc_linkup(&sc->vrsc_vs, &vtrnd_vi_consts, sc, pi, &sc->vrsc_vq);
	+ sc->vrsc_vs.vs_mtx = &sc->vrsc_mtx;
	+
	+ sc->vrsc_vq.vq_qsize = VTRND_RINGSZ;
	+
	+ /* keep /dev/random opened while emulating */
	+ sc->vrsc_fd = fd;
	+
	+ /* initialize config space */
	+ vi_devemu_init(pi, VIRTIO_TYPE_ENTROPY);
	+
	+ if (vi_intr_init(&sc->vrsc_vs, 1, fbsdrun_virtio_msix()))
	+ return (1);
	+ vi_set_io_res(&sc->vrsc_vs, 0);
	+
	+ return (0);
	+}
	+
	+
	+struct mmio_devemu pci_de_vrnd = {
	+ .de_emu = "virtio-rnd",
	+ .de_init = pci_vtrnd_init,
	+ .de_write = vi_mmio_write,
	+ .de_read = vi_mmio_read
	+};
	+MMIO_EMUL_SET(pci_de_vrnd);
	Index: usr.sbin/bhyve/mmio/mmio_virtio_scsi.c
	===================================================================
	--- /dev/null
	+++ usr.sbin/bhyve/mmio/mmio_virtio_scsi.c
	@@ -0,0 +1,741 @@
	+/*-
	+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
	+ *
	+ * Copyright (c) 2016 Jakub Klama <jceel@FreeBSD.org>.
	+ * Copyright (c) 2018 Marcelo Araujo <araujo@FreeBSD.org>.
	+ * All rights reserved.
	+ *
	+ * Redistribution and use in source and binary forms, with or without
	+ * modification, are permitted provided that the following conditions
	+ * are met:
	+ * 1. Redistributions of source code must retain the above copyright
	+ * notice, this list of conditions and the following disclaimer
	+ * in this position and unchanged.
	+ * 2. Redistributions in binary form must reproduce the above copyright
	+ * notice, this list of conditions and the following disclaimer in the
	+ * documentation and/or other materials provided with the distribution.
	+ *
	+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
	+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
	+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	+ * SUCH DAMAGE.
	+ */
	+
	+#include <sys/cdefs.h>
	+__FBSDID("$FreeBSD$");
	+
	+#include <sys/param.h>
	+#include <sys/linker_set.h>
	+#include <sys/types.h>
	+#include <sys/uio.h>
	+#include <sys/time.h>
	+#include <sys/queue.h>
	+#include <sys/sbuf.h>
	+
	+#include <errno.h>
	+#include <fcntl.h>
	+#include <stdio.h>
	+#include <stdlib.h>
	+#include <stdbool.h>
	+#include <string.h>
	+#include <unistd.h>
	+#include <assert.h>
	+#include <pthread.h>
	+#include <pthread_np.h>
	+
	+#include <cam/scsi/scsi_all.h>
	+#include <cam/scsi/scsi_message.h>
	+#include <cam/ctl/ctl.h>
	+#include <cam/ctl/ctl_io.h>
	+#include <cam/ctl/ctl_backend.h>
	+#include <cam/ctl/ctl_ioctl.h>
	+#include <cam/ctl/ctl_util.h>
	+#include <cam/ctl/ctl_scsi_all.h>
	+#include <camlib.h>
	+
	+#include "bhyverun.h"
	+#include "debug.h"
	+#include "iov.h"
	+
	+#include "mmio_emul.h"
	+#include "mmio_virtio.h"
	+
	+#define VTSCSI_RINGSZ 64
	+#define VTSCSI_REQUESTQ 1
	+#define VTSCSI_THR_PER_Q 16
	+#define VTSCSI_MAXQ (VTSCSI_REQUESTQ + 2)
	+#define VTSCSI_MAXSEG 64
	+
	+#define VTSCSI_IN_HEADER_LEN(_sc) \
	+ (sizeof(struct pci_vtscsi_req_cmd_rd) + _sc->vss_config.cdb_size)
	+
	+#define VTSCSI_OUT_HEADER_LEN(_sc) \
	+ (sizeof(struct pci_vtscsi_req_cmd_wr) + _sc->vss_config.sense_size)
	+
	+#define VIRTIO_SCSI_MAX_CHANNEL 0
	+#define VIRTIO_SCSI_MAX_TARGET 0
	+#define VIRTIO_SCSI_MAX_LUN 16383
	+
	+#define VIRTIO_SCSI_F_INOUT (1 << 0)
	+#define VIRTIO_SCSI_F_HOTPLUG (1 << 1)
	+#define VIRTIO_SCSI_F_CHANGE (1 << 2)
	+
	+static int pci_vtscsi_debug = 0;
	+#define DPRINTF(params) if (pci_vtscsi_debug) PRINTLN params
	+#define WPRINTF(params) PRINTLN params
	+
	+struct pci_vtscsi_config {
	+ uint32_t num_queues;
	+ uint32_t seg_max;
	+ uint32_t max_sectors;
	+ uint32_t cmd_per_lun;
	+ uint32_t event_info_size;
	+ uint32_t sense_size;
	+ uint32_t cdb_size;
	+ uint16_t max_channel;
	+ uint16_t max_target;
	+ uint32_t max_lun;
	+} __attribute__((packed));
	+
	+struct pci_vtscsi_queue {
	+ struct pci_vtscsi_softc * vsq_sc;
	+ struct vqueue_info * vsq_vq;
	+ pthread_mutex_t vsq_mtx;
	+ pthread_mutex_t vsq_qmtx;
	+ pthread_cond_t vsq_cv;
	+ STAILQ_HEAD(, pci_vtscsi_request) vsq_requests;
	+ LIST_HEAD(, pci_vtscsi_worker) vsq_workers;
	+};
	+
	+struct pci_vtscsi_worker {
	+ struct pci_vtscsi_queue * vsw_queue;
	+ pthread_t vsw_thread;
	+ bool vsw_exiting;
	+ LIST_ENTRY(pci_vtscsi_worker) vsw_link;
	+};
	+
	+struct pci_vtscsi_request {
	+ struct pci_vtscsi_queue * vsr_queue;
	+ struct iovec vsr_iov_in[VTSCSI_MAXSEG];
	+ int vsr_niov_in;
	+ struct iovec vsr_iov_out[VTSCSI_MAXSEG];
	+ int vsr_niov_out;
	+ uint32_t vsr_idx;
	+ STAILQ_ENTRY(pci_vtscsi_request) vsr_link;
	+};
	+
	+/*
	+ * Per-device softc
	+ */
	+struct pci_vtscsi_softc {
	+ struct virtio_softc vss_vs;
	+ struct vqueue_info vss_vq[VTSCSI_MAXQ];
	+ struct pci_vtscsi_queue vss_queues[VTSCSI_REQUESTQ];
	+ pthread_mutex_t vss_mtx;
	+ int vss_iid;
	+ int vss_ctl_fd;
	+ uint32_t vss_features;
	+ struct pci_vtscsi_config vss_config;
	+};
	+
	+#define VIRTIO_SCSI_T_TMF 0
	+#define VIRTIO_SCSI_T_TMF_ABORT_TASK 0
	+#define VIRTIO_SCSI_T_TMF_ABORT_TASK_SET 1
	+#define VIRTIO_SCSI_T_TMF_CLEAR_ACA 2
	+#define VIRTIO_SCSI_T_TMF_CLEAR_TASK_SET 3
	+#define VIRTIO_SCSI_T_TMF_I_T_NEXUS_RESET 4
	+#define VIRTIO_SCSI_T_TMF_LOGICAL_UNIT_RESET 5
	+#define VIRTIO_SCSI_T_TMF_QUERY_TASK 6
	+#define VIRTIO_SCSI_T_TMF_QUERY_TASK_SET 7
	+
	+/* command-specific response values */
	+#define VIRTIO_SCSI_S_FUNCTION_COMPLETE 0
	+#define VIRTIO_SCSI_S_FUNCTION_SUCCEEDED 10
	+#define VIRTIO_SCSI_S_FUNCTION_REJECTED 11
	+
	+struct pci_vtscsi_ctrl_tmf {
	+ uint32_t type;
	+ uint32_t subtype;
	+ uint8_t lun[8];
	+ uint64_t id;
	+ uint8_t response;
	+} __attribute__((packed));
	+
	+#define VIRTIO_SCSI_T_AN_QUERY 1
	+#define VIRTIO_SCSI_EVT_ASYNC_OPERATIONAL_CHANGE 2
	+#define VIRTIO_SCSI_EVT_ASYNC_POWER_MGMT 4
	+#define VIRTIO_SCSI_EVT_ASYNC_EXTERNAL_REQUEST 8
	+#define VIRTIO_SCSI_EVT_ASYNC_MEDIA_CHANGE 16
	+#define VIRTIO_SCSI_EVT_ASYNC_MULTI_HOST 32
	+#define VIRTIO_SCSI_EVT_ASYNC_DEVICE_BUSY 64
	+
	+struct pci_vtscsi_ctrl_an {
	+ uint32_t type;
	+ uint8_t lun[8];
	+ uint32_t event_requested;
	+ uint32_t event_actual;
	+ uint8_t response;
	+} __attribute__((packed));
	+
	+/* command-specific response values */
	+#define VIRTIO_SCSI_S_OK 0
	+#define VIRTIO_SCSI_S_OVERRUN 1
	+#define VIRTIO_SCSI_S_ABORTED 2
	+#define VIRTIO_SCSI_S_BAD_TARGET 3
	+#define VIRTIO_SCSI_S_RESET 4
	+#define VIRTIO_SCSI_S_BUSY 5
	+#define VIRTIO_SCSI_S_TRANSPORT_FAILURE 6
	+#define VIRTIO_SCSI_S_TARGET_FAILURE 7
	+#define VIRTIO_SCSI_S_NEXUS_FAILURE 8
	+#define VIRTIO_SCSI_S_FAILURE 9
	+#define VIRTIO_SCSI_S_INCORRECT_LUN 12
	+
	+/* task_attr */
	+#define VIRTIO_SCSI_S_SIMPLE 0
	+#define VIRTIO_SCSI_S_ORDERED 1
	+#define VIRTIO_SCSI_S_HEAD 2
	+#define VIRTIO_SCSI_S_ACA 3
	+
	+struct pci_vtscsi_event {
	+ uint32_t event;
	+ uint8_t lun[8];
	+ uint32_t reason;
	+} __attribute__((packed));
	+
	+struct pci_vtscsi_req_cmd_rd {
	+ uint8_t lun[8];
	+ uint64_t id;
	+ uint8_t task_attr;
	+ uint8_t prio;
	+ uint8_t crn;
	+ uint8_t cdb[];
	+} __attribute__((packed));
	+
	+struct pci_vtscsi_req_cmd_wr {
	+ uint32_t sense_len;
	+ uint32_t residual;
	+ uint16_t status_qualifier;
	+ uint8_t status;
	+ uint8_t response;
	+ uint8_t sense[];
	+} __attribute__((packed));
	+
	+static void pci_vtscsi_proc(void );
	+static void pci_vtscsi_reset(void *);
	+static void pci_vtscsi_neg_features(void *, uint64_t);
	+static int pci_vtscsi_cfgread(void , int, int, uint32_t );
	+static int pci_vtscsi_cfgwrite(void *, int, int, uint32_t);
	+static inline int pci_vtscsi_get_lun(uint8_t *);
	+static int pci_vtscsi_control_handle(struct pci_vtscsi_softc , void , size_t);
	+static int pci_vtscsi_tmf_handle(struct pci_vtscsi_softc *,
	+ struct pci_vtscsi_ctrl_tmf *);
	+static int pci_vtscsi_an_handle(struct pci_vtscsi_softc *,
	+ struct pci_vtscsi_ctrl_an *);
	+static int pci_vtscsi_request_handle(struct pci_vtscsi_queue , struct iovec ,
	+ int, struct iovec *, int);
	+static void pci_vtscsi_controlq_notify(void , struct vqueue_info );
	+static void pci_vtscsi_eventq_notify(void , struct vqueue_info );
	+static void pci_vtscsi_requestq_notify(void , struct vqueue_info );
	+static int pci_vtscsi_init_queue(struct pci_vtscsi_softc *,
	+ struct pci_vtscsi_queue *, int);
	+static int pci_vtscsi_init(struct vmctx , struct mmio_devinst , char *);
	+
	+static struct virtio_consts vtscsi_vi_consts = {
	+ "vtscsi", /* our name */
	+ VTSCSI_MAXQ, /* we support 2+n virtqueues */
	+ sizeof(struct pci_vtscsi_config), /* config reg size */
	+ pci_vtscsi_reset, /* reset */
	+ NULL, /* device-wide qnotify */
	+ pci_vtscsi_cfgread, /* read virtio config */
	+ pci_vtscsi_cfgwrite, /* write virtio config */
	+ pci_vtscsi_neg_features, /* apply negotiated features */
	+ 0, /* our capabilities */
	+};
	+
	+static void *
	+pci_vtscsi_proc(void *arg)
	+{
	+ struct pci_vtscsi_worker worker = (struct pci_vtscsi_worker )arg;
	+ struct pci_vtscsi_queue *q = worker->vsw_queue;
	+ struct pci_vtscsi_request *req;
	+ int iolen;
	+
	+ for (;;) {
	+ pthread_mutex_lock(&q->vsq_mtx);
	+
	+ while (STAILQ_EMPTY(&q->vsq_requests)
	+ && !worker->vsw_exiting)
	+ pthread_cond_wait(&q->vsq_cv, &q->vsq_mtx);
	+
	+ if (worker->vsw_exiting)
	+ break;
	+
	+ req = STAILQ_FIRST(&q->vsq_requests);
	+ STAILQ_REMOVE_HEAD(&q->vsq_requests, vsr_link);
	+
	+ pthread_mutex_unlock(&q->vsq_mtx);
	+ iolen = pci_vtscsi_request_handle(q, req->vsr_iov_in,
	+ req->vsr_niov_in, req->vsr_iov_out, req->vsr_niov_out);
	+
	+ pthread_mutex_lock(&q->vsq_qmtx);
	+ vq_relchain(q->vsq_vq, req->vsr_idx, iolen);
	+ vq_endchains(q->vsq_vq, 0);
	+ pthread_mutex_unlock(&q->vsq_qmtx);
	+
	+ DPRINTF(("virtio-scsi: request <idx=%d> completed",
	+ req->vsr_idx));
	+ free(req);
	+ }
	+
	+ pthread_mutex_unlock(&q->vsq_mtx);
	+ return (NULL);
	+}
	+
	+static void
	+pci_vtscsi_reset(void *vsc)
	+{
	+ struct pci_vtscsi_softc *sc;
	+
	+ sc = vsc;
	+
	+ DPRINTF(("vtscsi: device reset requested"));
	+ vi_reset_dev(&sc->vss_vs);
	+
	+ /* initialize config structure */
	+ sc->vss_config = (struct pci_vtscsi_config){
	+ .num_queues = VTSCSI_REQUESTQ,
	+ /* Leave room for the request and the response. */
	+ .seg_max = VTSCSI_MAXSEG - 2,
	+ .max_sectors = 2,
	+ .cmd_per_lun = 1,
	+ .event_info_size = sizeof(struct pci_vtscsi_event),
	+ .sense_size = 96,
	+ .cdb_size = 32,
	+ .max_channel = VIRTIO_SCSI_MAX_CHANNEL,
	+ .max_target = VIRTIO_SCSI_MAX_TARGET,
	+ .max_lun = VIRTIO_SCSI_MAX_LUN
	+ };
	+}
	+
	+static void
	+pci_vtscsi_neg_features(void *vsc, uint64_t negotiated_features)
	+{
	+ struct pci_vtscsi_softc *sc = vsc;
	+
	+ sc->vss_features = negotiated_features;
	+}
	+
	+static int
	+pci_vtscsi_cfgread(void vsc, int offset, int size, uint32_t retval)
	+{
	+ struct pci_vtscsi_softc *sc = vsc;
	+ void *ptr;
	+
	+ ptr = (uint8_t *)&sc->vss_config + offset;
	+ memcpy(retval, ptr, size);
	+ return (0);
	+}
	+
	+static int
	+pci_vtscsi_cfgwrite(void *vsc, int offset, int size, uint32_t val)
	+{
	+
	+ return (0);
	+}
	+
	+static inline int
	+pci_vtscsi_get_lun(uint8_t *lun)
	+{
	+
	+ return (((lun[2] << 8) \| lun[3]) & 0x3fff);
	+}
	+
	+static int
	+pci_vtscsi_control_handle(struct pci_vtscsi_softc sc, void buf,
	+ size_t bufsize)
	+{
	+ struct pci_vtscsi_ctrl_tmf *tmf;
	+ struct pci_vtscsi_ctrl_an *an;
	+ uint32_t type;
	+
	+ type = (uint32_t )buf;
	+
	+ if (type == VIRTIO_SCSI_T_TMF) {
	+ tmf = (struct pci_vtscsi_ctrl_tmf *)buf;
	+ return (pci_vtscsi_tmf_handle(sc, tmf));
	+ }
	+
	+ if (type == VIRTIO_SCSI_T_AN_QUERY) {
	+ an = (struct pci_vtscsi_ctrl_an *)buf;
	+ return (pci_vtscsi_an_handle(sc, an));
	+ }
	+
	+ return (0);
	+}
	+
	+static int
	+pci_vtscsi_tmf_handle(struct pci_vtscsi_softc *sc,
	+ struct pci_vtscsi_ctrl_tmf *tmf)
	+{
	+ union ctl_io *io;
	+ int err;
	+
	+ io = ctl_scsi_alloc_io(sc->vss_iid);
	+ ctl_scsi_zero_io(io);
	+
	+ io->io_hdr.io_type = CTL_IO_TASK;
	+ io->io_hdr.nexus.initid = sc->vss_iid;
	+ io->io_hdr.nexus.targ_lun = pci_vtscsi_get_lun(tmf->lun);
	+ io->taskio.tag_type = CTL_TAG_SIMPLE;
	+ io->taskio.tag_num = (uint32_t)tmf->id;
	+
	+ switch (tmf->subtype) {
	+ case VIRTIO_SCSI_T_TMF_ABORT_TASK:
	+ io->taskio.task_action = CTL_TASK_ABORT_TASK;
	+ break;
	+
	+ case VIRTIO_SCSI_T_TMF_ABORT_TASK_SET:
	+ io->taskio.task_action = CTL_TASK_ABORT_TASK_SET;
	+ break;
	+
	+ case VIRTIO_SCSI_T_TMF_CLEAR_ACA:
	+ io->taskio.task_action = CTL_TASK_CLEAR_ACA;
	+ break;
	+
	+ case VIRTIO_SCSI_T_TMF_CLEAR_TASK_SET:
	+ io->taskio.task_action = CTL_TASK_CLEAR_TASK_SET;
	+ break;
	+
	+ case VIRTIO_SCSI_T_TMF_I_T_NEXUS_RESET:
	+ io->taskio.task_action = CTL_TASK_I_T_NEXUS_RESET;
	+ break;
	+
	+ case VIRTIO_SCSI_T_TMF_LOGICAL_UNIT_RESET:
	+ io->taskio.task_action = CTL_TASK_LUN_RESET;
	+ break;
	+
	+ case VIRTIO_SCSI_T_TMF_QUERY_TASK:
	+ io->taskio.task_action = CTL_TASK_QUERY_TASK;
	+ break;
	+
	+ case VIRTIO_SCSI_T_TMF_QUERY_TASK_SET:
	+ io->taskio.task_action = CTL_TASK_QUERY_TASK_SET;
	+ break;
	+ }
	+
	+ if (pci_vtscsi_debug) {
	+ struct sbuf *sb = sbuf_new_auto();
	+ ctl_io_sbuf(io, sb);
	+ sbuf_finish(sb);
	+ DPRINTF(("pci_virtio_scsi: %s", sbuf_data(sb)));
	+ sbuf_delete(sb);
	+ }
	+
	+ err = ioctl(sc->vss_ctl_fd, CTL_IO, io);
	+ if (err != 0)
	+ WPRINTF(("CTL_IO: err=%d (%s)", errno, strerror(errno)));
	+
	+ tmf->response = io->taskio.task_status;
	+ ctl_scsi_free_io(io);
	+ return (1);
	+}
	+
	+static int
	+pci_vtscsi_an_handle(struct pci_vtscsi_softc *sc,
	+ struct pci_vtscsi_ctrl_an *an)
	+{
	+
	+ return (0);
	+}
	+
	+static int
	+pci_vtscsi_request_handle(struct pci_vtscsi_queue q, struct iovec iov_in,
	+ int niov_in, struct iovec *iov_out, int niov_out)
	+{
	+ struct pci_vtscsi_softc *sc = q->vsq_sc;
	+ struct pci_vtscsi_req_cmd_rd *cmd_rd = NULL;
	+ struct pci_vtscsi_req_cmd_wr *cmd_wr;
	+ struct iovec data_iov_in[VTSCSI_MAXSEG], data_iov_out[VTSCSI_MAXSEG];
	+ union ctl_io *io;
	+ int data_niov_in, data_niov_out;
	+ void *ext_data_ptr = NULL;
	+ uint32_t ext_data_len = 0, ext_sg_entries = 0;
	+ int err, nxferred;
	+
	+ seek_iov(iov_in, niov_in, data_iov_in, &data_niov_in,
	+ VTSCSI_IN_HEADER_LEN(sc));
	+ seek_iov(iov_out, niov_out, data_iov_out, &data_niov_out,
	+ VTSCSI_OUT_HEADER_LEN(sc));
	+
	+ truncate_iov(iov_in, &niov_in, VTSCSI_IN_HEADER_LEN(sc));
	+ truncate_iov(iov_out, &niov_out, VTSCSI_OUT_HEADER_LEN(sc));
	+ iov_to_buf(iov_in, niov_in, (void **)&cmd_rd);
	+
	+ cmd_wr = malloc(VTSCSI_OUT_HEADER_LEN(sc));
	+ io = ctl_scsi_alloc_io(sc->vss_iid);
	+ ctl_scsi_zero_io(io);
	+
	+ io->io_hdr.nexus.initid = sc->vss_iid;
	+ io->io_hdr.nexus.targ_lun = pci_vtscsi_get_lun(cmd_rd->lun);
	+
	+ io->io_hdr.io_type = CTL_IO_SCSI;
	+
	+ if (data_niov_in > 0) {
	+ ext_data_ptr = (void *)data_iov_in;
	+ ext_sg_entries = data_niov_in;
	+ ext_data_len = count_iov(data_iov_in, data_niov_in);
	+ io->io_hdr.flags \|= CTL_FLAG_DATA_OUT;
	+ } else if (data_niov_out > 0) {
	+ ext_data_ptr = (void *)data_iov_out;
	+ ext_sg_entries = data_niov_out;
	+ ext_data_len = count_iov(data_iov_out, data_niov_out);
	+ io->io_hdr.flags \|= CTL_FLAG_DATA_IN;
	+ }
	+
	+ io->scsiio.sense_len = sc->vss_config.sense_size;
	+ io->scsiio.tag_num = (uint32_t)cmd_rd->id;
	+ switch (cmd_rd->task_attr) {
	+ case VIRTIO_SCSI_S_ORDERED:
	+ io->scsiio.tag_type = CTL_TAG_ORDERED;
	+ break;
	+ case VIRTIO_SCSI_S_HEAD:
	+ io->scsiio.tag_type = CTL_TAG_HEAD_OF_QUEUE;
	+ break;
	+ case VIRTIO_SCSI_S_ACA:
	+ io->scsiio.tag_type = CTL_TAG_ACA;
	+ break;
	+ case VIRTIO_SCSI_S_SIMPLE:
	+ default:
	+ io->scsiio.tag_type = CTL_TAG_SIMPLE;
	+ break;
	+ }
	+ io->scsiio.ext_sg_entries = ext_sg_entries;
	+ io->scsiio.ext_data_ptr = ext_data_ptr;
	+ io->scsiio.ext_data_len = ext_data_len;
	+ io->scsiio.ext_data_filled = 0;
	+ io->scsiio.cdb_len = sc->vss_config.cdb_size;
	+ memcpy(io->scsiio.cdb, cmd_rd->cdb, sc->vss_config.cdb_size);
	+
	+ if (pci_vtscsi_debug) {
	+ struct sbuf *sb = sbuf_new_auto();
	+ ctl_io_sbuf(io, sb);
	+ sbuf_finish(sb);
	+ DPRINTF(("pci_virtio_scsi: %s", sbuf_data(sb)));
	+ sbuf_delete(sb);
	+ }
	+
	+ err = ioctl(sc->vss_ctl_fd, CTL_IO, io);
	+ if (err != 0) {
	+ WPRINTF(("CTL_IO: err=%d (%s)", errno, strerror(errno)));
	+ cmd_wr->response = VIRTIO_SCSI_S_FAILURE;
	+ } else {
	+ cmd_wr->sense_len = MIN(io->scsiio.sense_len,
	+ sc->vss_config.sense_size);
	+ cmd_wr->residual = io->scsiio.residual;
	+ cmd_wr->status = io->scsiio.scsi_status;
	+ cmd_wr->response = VIRTIO_SCSI_S_OK;
	+ memcpy(&cmd_wr->sense, &io->scsiio.sense_data,
	+ cmd_wr->sense_len);
	+ }
	+
	+ buf_to_iov(cmd_wr, VTSCSI_OUT_HEADER_LEN(sc), iov_out, niov_out, 0);
	+ nxferred = VTSCSI_OUT_HEADER_LEN(sc) + io->scsiio.ext_data_filled;
	+ free(cmd_rd);
	+ free(cmd_wr);
	+ ctl_scsi_free_io(io);
	+ return (nxferred);
	+}
	+
	+static void
	+pci_vtscsi_controlq_notify(void vsc, struct vqueue_info vq)
	+{
	+ struct pci_vtscsi_softc *sc;
	+ struct iovec iov[VTSCSI_MAXSEG];
	+ uint16_t idx, n;
	+ void *buf = NULL;
	+ size_t bufsize;
	+ int iolen;
	+
	+ sc = vsc;
	+
	+ while (vq_has_descs(vq)) {
	+ n = vq_getchain(vq, &idx, iov, VTSCSI_MAXSEG, NULL);
	+ bufsize = iov_to_buf(iov, n, &buf);
	+ iolen = pci_vtscsi_control_handle(sc, buf, bufsize);
	+ buf_to_iov(buf + bufsize - iolen, iolen, iov, n,
	+ bufsize - iolen);
	+
	+ /*
	+ * Release this chain and handle more
	+ */
	+ vq_relchain(vq, idx, iolen);
	+ }
	+ vq_endchains(vq, 1); /* Generate interrupt if appropriate. */
	+ free(buf);
	+}
	+
	+static void
	+pci_vtscsi_eventq_notify(void vsc, struct vqueue_info vq)
	+{
	+
	+ vq_kick_disable(vq);
	+}
	+
	+static void
	+pci_vtscsi_requestq_notify(void vsc, struct vqueue_info vq)
	+{
	+ struct pci_vtscsi_softc *sc;
	+ struct pci_vtscsi_queue *q;
	+ struct pci_vtscsi_request *req;
	+ struct iovec iov[VTSCSI_MAXSEG];
	+ uint16_t flags[VTSCSI_MAXSEG];
	+ uint16_t idx, n, i;
	+ int readable;
	+
	+ sc = vsc;
	+ q = &sc->vss_queues[vq->vq_num - 2];
	+
	+ while (vq_has_descs(vq)) {
	+ readable = 0;
	+ n = vq_getchain(vq, &idx, iov, VTSCSI_MAXSEG, flags);
	+
	+ /* Count readable descriptors */
	+ for (i = 0; i < n; i++) {
	+ if (flags[i] & VRING_DESC_F_WRITE)
	+ break;
	+
	+ readable++;
	+ }
	+
	+ req = calloc(1, sizeof(struct pci_vtscsi_request));
	+ req->vsr_idx = idx;
	+ req->vsr_queue = q;
	+ req->vsr_niov_in = readable;
	+ req->vsr_niov_out = n - readable;
	+ memcpy(req->vsr_iov_in, iov,
	+ req->vsr_niov_in * sizeof(struct iovec));
	+ memcpy(req->vsr_iov_out, iov + readable,
	+ req->vsr_niov_out * sizeof(struct iovec));
	+
	+ pthread_mutex_lock(&q->vsq_mtx);
	+ STAILQ_INSERT_TAIL(&q->vsq_requests, req, vsr_link);
	+ pthread_cond_signal(&q->vsq_cv);
	+ pthread_mutex_unlock(&q->vsq_mtx);
	+
	+ DPRINTF(("virtio-scsi: request <idx=%d> enqueued", idx));
	+ }
	+}
	+
	+static int
	+pci_vtscsi_init_queue(struct pci_vtscsi_softc *sc,
	+ struct pci_vtscsi_queue *queue, int num)
	+{
	+ struct pci_vtscsi_worker *worker;
	+ char tname[MAXCOMLEN + 1];
	+ int i;
	+
	+ queue->vsq_sc = sc;
	+ queue->vsq_vq = &sc->vss_vq[num + 2];
	+
	+ pthread_mutex_init(&queue->vsq_mtx, NULL);
	+ pthread_mutex_init(&queue->vsq_qmtx, NULL);
	+ pthread_cond_init(&queue->vsq_cv, NULL);
	+ STAILQ_INIT(&queue->vsq_requests);
	+ LIST_INIT(&queue->vsq_workers);
	+
	+ for (i = 0; i < VTSCSI_THR_PER_Q; i++) {
	+ worker = calloc(1, sizeof(struct pci_vtscsi_worker));
	+ worker->vsw_queue = queue;
	+
	+ pthread_create(&worker->vsw_thread, NULL, &pci_vtscsi_proc,
	+ (void *)worker);
	+
	+ snprintf(tname, sizeof(tname), "vtscsi:%d-%d", num, i);
	+ pthread_set_name_np(worker->vsw_thread, tname);
	+ LIST_INSERT_HEAD(&queue->vsq_workers, worker, vsw_link);
	+ }
	+
	+ return (0);
	+}
	+
	+static int
	+pci_vtscsi_init(struct vmctx ctx, struct mmio_devinst pi, char *opts)
	+{
	+ struct pci_vtscsi_softc *sc;
	+ char opt, optname;
	+ const char *devname;
	+ int i, optidx = 0;
	+
	+ sc = calloc(1, sizeof(struct pci_vtscsi_softc));
	+ devname = "/dev/cam/ctl";
	+ while ((opt = strsep(&opts, ",")) != NULL) {
	+ optname = strsep(&opt, "=");
	+ if (opt == NULL && optidx == 0) {
	+ if (optname[0] != 0)
	+ devname = optname;
	+ } else if (strcmp(optname, "dev") == 0 && opt != NULL) {
	+ devname = opt;
	+ } else if (strcmp(optname, "iid") == 0 && opt != NULL) {
	+ sc->vss_iid = strtoul(opt, NULL, 10);
	+ } else {
	+ EPRINTLN("Invalid option %s", optname);
	+ free(sc);
	+ return (1);
	+ }
	+ optidx++;
	+ }
	+
	+ sc->vss_ctl_fd = open(devname, O_RDWR);
	+ if (sc->vss_ctl_fd < 0) {
	+ WPRINTF(("cannot open %s: %s", devname, strerror(errno)));
	+ free(sc);
	+ return (1);
	+ }
	+
	+ vi_softc_linkup(&sc->vss_vs, &vtscsi_vi_consts, sc, pi, sc->vss_vq);
	+ sc->vss_vs.vs_mtx = &sc->vss_mtx;
	+
	+ /* controlq */
	+ sc->vss_vq[0].vq_qsize = VTSCSI_RINGSZ;
	+ sc->vss_vq[0].vq_notify = pci_vtscsi_controlq_notify;
	+
	+ /* eventq */
	+ sc->vss_vq[1].vq_qsize = VTSCSI_RINGSZ;
	+ sc->vss_vq[1].vq_notify = pci_vtscsi_eventq_notify;
	+
	+ /* request queues */
	+ for (i = 2; i < VTSCSI_MAXQ; i++) {
	+ sc->vss_vq[i].vq_qsize = VTSCSI_RINGSZ;
	+ sc->vss_vq[i].vq_notify = pci_vtscsi_requestq_notify;
	+ pci_vtscsi_init_queue(sc, &sc->vss_queues[i - 2], i - 2);
	+ }
	+
	+ /* initialize config space */
	+ mmio_set_cfgreg16(pi, PCIR_DEVICE, VIRTIO_DEV_SCSI);
	+ mmio_set_cfgreg16(pi, PCIR_VENDOR, VIRTIO_VENDOR);
	+ mmio_set_cfgreg8(pi, PCIR_CLASS, PCIC_STORAGE);
	+ mmio_set_cfgreg16(pi, PCIR_SUBDEV_0, VIRTIO_TYPE_SCSI);
	+ mmio_set_cfgreg16(pi, PCIR_SUBVEND_0, VIRTIO_VENDOR);
	+
	+ if (vi_intr_init(&sc->vss_vs, 1, fbsdrun_virtio_msix()))
	+ return (1);
	+ vi_set_io_res(&sc->vss_vs, 0);
	+
	+ return (0);
	+}
	+
	+
	+struct mmio_devemu pci_de_vscsi = {
	+ .de_emu = "virtio-scsi",
	+ .de_init = pci_vtscsi_init,
	+ .de_write = vi_mmio_write,
	+ .de_read = vi_mmio_read
	+};
	+MMIO_EMUL_SET(pci_de_vscsi);
	Index: usr.sbin/bhyve/mmio/net_utils.h
	===================================================================
	--- /dev/null
	+++ usr.sbin/bhyve/mmio/net_utils.h
	@@ -0,0 +1,39 @@
	+/*-
	+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
	+ *
	+ * Copyright (c) 2019 Vincenzo Maffione <v.maffione@gmail.com>
	+ *
	+ * Redistribution and use in source and binary forms, with or without
	+ * modification, are permitted provided that the following conditions
	+ * are met:
	+ * 1. Redistributions of source code must retain the above copyright
	+ * notice, this list of conditions and the following disclaimer.
	+ * 2. Redistributions in binary form must reproduce the above copyright
	+ * notice, this list of conditions and the following disclaimer in the
	+ * documentation and/or other materials provided with the distribution.
	+ *
	+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS``AS IS'' AND
	+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
	+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS
	+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
	+ * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
	+ * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
	+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
	+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
	+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
	+ * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
	+ *
	+ * $FreeBSD$
	+ */
	+
	+#ifndef _NET_UTILS_H_
	+#define _NET_UTILS_H_
	+
	+#include <stdint.h>
	+#include "mmio_emul.h"
	+
	+void net_genmac(struct mmio_devinst pi, uint8_t macaddr);
	+int net_parsemac(char mac_str, uint8_t mac_addr);
	+
	+#endif /* _NET_UTILS_H_ */
	Index: usr.sbin/bhyve/mmio/net_utils.c
	===================================================================
	--- /dev/null
	+++ usr.sbin/bhyve/mmio/net_utils.c
	@@ -0,0 +1,90 @@
	+/*-
	+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
	+ *
	+ * Copyright (c) 2011 NetApp, Inc.
	+ *
	+ * Redistribution and use in source and binary forms, with or without
	+ * modification, are permitted provided that the following conditions
	+ * are met:
	+ * 1. Redistributions of source code must retain the above copyright
	+ * notice, this list of conditions and the following disclaimer.
	+ * 2. Redistributions in binary form must reproduce the above copyright
	+ * notice, this list of conditions and the following disclaimer in the
	+ * documentation and/or other materials provided with the distribution.
	+ *
	+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS``AS IS'' AND
	+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
	+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS
	+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
	+ * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
	+ * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
	+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
	+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
	+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
	+ * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
	+ */
	+
	+#include <sys/cdefs.h>
	+__FBSDID("$FreeBSD$");
	+
	+#include <sys/types.h>
	+#include <net/ethernet.h>
	+
	+#include <errno.h>
	+#include <md5.h>
	+#include <stdio.h>
	+#include <string.h>
	+
	+#include "bhyverun.h"
	+#include "debug.h"
	+#include "net_utils.h"
	+
	+int
	+net_parsemac(char mac_str, uint8_t mac_addr)
	+{
	+ struct ether_addr *ea;
	+ char *tmpstr;
	+ char zero_addr[ETHER_ADDR_LEN] = { 0, 0, 0, 0, 0, 0 };
	+
	+ tmpstr = strsep(&mac_str,"=");
	+
	+ if ((mac_str != NULL) && (!strcmp(tmpstr,"mac"))) {
	+ ea = ether_aton(mac_str);
	+
	+ if (ea == NULL \|\| ETHER_IS_MULTICAST(ea->octet) \|\|
	+ memcmp(ea->octet, zero_addr, ETHER_ADDR_LEN) == 0) {
	+ EPRINTLN("Invalid MAC %s", mac_str);
	+ return (EINVAL);
	+ } else
	+ memcpy(mac_addr, ea->octet, ETHER_ADDR_LEN);
	+ }
	+
	+ return (0);
	+}
	+
	+void
	+net_genmac(struct mmio_devinst pi, uint8_t macaddr)
	+{
	+ /*
	+ * The default MAC address is the standard NetApp OUI of 00-a0-98,
	+ * followed by an MD5 of the PCI slot/func number and dev name
	+ */
	+ MD5_CTX mdctx;
	+ unsigned char digest[16];
	+ char nstr[80];
	+
	+ snprintf(nstr, sizeof(nstr), "%d-%d-%s", pi->pi_slot,
	+ pi->di_func, vmname);
	+
	+ MD5Init(&mdctx);
	+ MD5Update(&mdctx, nstr, (unsigned int)strlen(nstr));
	+ MD5Final(digest, &mdctx);
	+
	+ macaddr[0] = 0x00;
	+ macaddr[1] = 0xa0;
	+ macaddr[2] = 0x98;
	+ macaddr[3] = digest[0];
	+ macaddr[4] = digest[1];
	+ macaddr[5] = digest[2];
	+}
	Index: usr.sbin/bhyve/pci_virtio_net.c
	===================================================================
	--- usr.sbin/bhyve/pci_virtio_net.c
	+++ usr.sbin/bhyve/pci_virtio_net.c
	@@ -116,7 +116,7 @@
	int resetting; /* protected by tx_mtx */

	uint64_t vsc_features; /* negotiated features */
	-
	+
	pthread_mutex_t rx_mtx;
	int rx_merge; /* merged rx bufs in use */

	Index: usr.sbin/bhyvectl/Makefile
	===================================================================
	--- usr.sbin/bhyvectl/Makefile
	+++ usr.sbin/bhyvectl/Makefile
	@@ -5,19 +5,17 @@
	.include <src.opts.mk>

	PROG= bhyvectl
	-SRCS= bhyvectl.c
	PACKAGE= bhyve

	-MAN= bhyvectl.8
	-
	LIBADD= vmmapi util

	WARNS?= 3

	-CFLAGS+= -I${SRCTOP}/sys/amd64/vmm
	+CFLAGS+= -I${SRCTOP}/sys/${MACHINE}/vmm

	.if ${MK_BHYVE_SNAPSHOT} != "no"
	CFLAGS+= -DBHYVE_SNAPSHOT
	.endif

	+.include "${.CURDIR}/${MACHINE}/Makefile.inc"
	.include <bsd.prog.mk>
	Index: usr.sbin/bhyvectl/amd64/Makefile.inc
	===================================================================
	--- /dev/null
	+++ usr.sbin/bhyvectl/amd64/Makefile.inc
	@@ -0,0 +1,7 @@
	+#
	+# $FreeBSD$
	+#
	+.PATH: ${.CURDIR}/amd64
	+
	+SRCS= bhyvectl.c
	+MAN= bhyvectl.8
	Index: usr.sbin/bhyvectl/arm64/Makefile.inc
	===================================================================
	--- /dev/null
	+++ usr.sbin/bhyvectl/arm64/Makefile.inc
	@@ -0,0 +1,7 @@
	+#
	+# $FreeBSD$
	+#
	+.PATH: ${.CURDIR}/arm64
	+
	+SRCS= bhyvectl.c
	+MAN= bhyvectl.8
	Index: usr.sbin/bhyvectl/arm64/bhyvectl.8
	===================================================================
	--- /dev/null
	+++ usr.sbin/bhyvectl/arm64/bhyvectl.8
	@@ -0,0 +1,97 @@
	+.\" Copyright (c) 2015 Christian Brueffer
	+.\" All rights reserved.
	+.\"
	+.\" Redistribution and use in source and binary forms, with or without
	+.\" modification, are permitted provided that the following conditions
	+.\" are met:
	+.\" 1. Redistributions of source code must retain the above copyright
	+.\" notice, this list of conditions and the following disclaimer.
	+.\" 2. Redistributions in binary form must reproduce the above copyright
	+.\" notice, this list of conditions and the following disclaimer in the
	+.\" documentation and/or other materials provided with the distribution.
	+.\"
	+.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
	+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
	+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	+.\" SUCH DAMAGE.
	+.\"
	+.\" $FreeBSD$
	+.\"
	+.Dd November 13, 2016
	+.Dt BHYVECTL 8
	+.Os
	+.Sh NAME
	+.Nm bhyvectl
	+.Nd "control utility for bhyve instances"
	+.Sh SYNOPSIS
	+.Nm
	+.Fl -vm= Ns Ar <vmname>
	+.Op Fl -create
	+.Op Fl -destroy
	+.Op Fl -get-stats
	+.Op Fl -inject-nmi
	+.Op Fl -force-reset
	+.Op Fl -force-poweroff
	+.Sh DESCRIPTION
	+The
	+.Nm
	+command is a control utility for active
	+.Xr bhyve 8
	+virtual machine instances.
	+.Pp
	+.Em Note :
	+Most
	+.Nm
	+flags are intended for querying and setting the state of an active instance.
	+These commands are intended for development purposes, and are not documented here.
	+A complete list can be obtained by executing
	+.Nm
	+without any arguments.
	+.Pp
	+The user-facing options are as follows:
	+.Bl -tag -width ".Fl d Ar argument"
	+.It Fl -vm= Ns Ar <vmname>
	+Operate on the virtual machine
	+.Ar <vmname> .
	+.It Fl -create
	+Create the specified VM.
	+.It Fl -destroy
	+Destroy the specified VM.
	+.It Fl -get-stats
	+Retrieve statistics for the specified VM.
	+.It Fl -inject-nmi
	+Inject a non-maskable interrupt (NMI) into the VM.
	+.It Fl -force-reset
	+Force the VM to reset.
	+.It Fl -force-poweroff
	+Force the VM to power off.
	+.El
	+.Sh EXIT STATUS
	+.Ex -std
	+.Sh EXAMPLES
	+Destroy the VM called fbsd10:
	+.Pp
	+.Dl "bhyvectl --vm=fbsd10 --destroy"
	+.Sh SEE ALSO
	+.Xr bhyve 8 ,
	+.Xr bhyveload 8
	+.Sh HISTORY
	+The
	+.Nm
	+command first appeared in
	+.Fx 10.1 .
	+.Sh AUTHORS
	+.An -nosplit
	+The
	+.Nm
	+utility was written by
	+.An Peter Grehan
	+and
	+.An Neel Natu .
	Index: usr.sbin/bhyvectl/arm64/bhyvectl.c
	===================================================================
	--- /dev/null
	+++ usr.sbin/bhyvectl/arm64/bhyvectl.c
	@@ -0,0 +1,140 @@
	+/*-
	+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
	+ *
	+ * Copyright (c) 2018 Alexandru Elisei <alexandru.elisei@gmail.com>
	+ * All rights reserved.
	+ *
	+ * Redistribution and use in source and binary forms, with or without
	+ * modification, are permitted provided that the following conditions
	+ * are met:
	+ * 1. Redistributions of source code must retain the above copyright
	+ * notice, this list of conditions and the following disclaimer.
	+ * 2. Redistributions in binary form must reproduce the above copyright
	+ * notice, this list of conditions and the following disclaimer in the
	+ * documentation and/or other materials provided with the distribution.
	+ *
	+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
	+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	+ * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
	+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	+ * SUCH DAMAGE.
	+ *
	+ * $FreeBSD$
	+ */
	+
	+#include <sys/cdefs.h>
	+__FBSDID("$FreeBSD$");
	+
	+#include <sys/param.h>
	+#include <sys/types.h>
	+#include <sys/sysctl.h>
	+#include <sys/errno.h>
	+#include <sys/mman.h>
	+#include <sys/cpuset.h>
	+
	+#include <stdio.h>
	+#include <stdlib.h>
	+#include <stdbool.h>
	+#include <string.h>
	+#include <unistd.h>
	+#include <libgen.h>
	+#include <libutil.h>
	+#include <fcntl.h>
	+#include <getopt.h>
	+#include <time.h>
	+#include <assert.h>
	+#include <libutil.h>
	+
	+#include <machine/vmm.h>
	+#include <machine/vmm_dev.h>
	+
	+#include <vmmapi.h>
	+
	+#define MB (1UL << 20)
	+#define GB (1UL << 30)
	+
	+#define REQ_ARG required_argument
	+#define NO_ARG no_argument
	+#define OPT_ARG optional_argument
	+
	+#define eprintf(fmt, ...) printf("%s:%d " fmt, __func__, __LINE__, ##__VA_ARGS__)
	+
	+static const char *progname;
	+
	+static void
	+usage()
	+{
	+
	+ (void)fprintf(stderr,
	+ "Usage: %s --vm=<vmname>\n"
	+ " %*s [--destroy]\n",
	+ progname, (int)strlen(progname), "");
	+ exit(1);
	+}
	+
	+static int create;
	+static int destroy;
	+
	+enum {
	+ VMNAME = 1000, /* avoid collision with return values from getopt */
	+};
	+
	+const struct option opts[] = {
	+ { "vm", REQ_ARG, NULL, VMNAME },
	+ { "destroy", NO_ARG, &destroy, 1 },
	+ { NULL, 0, NULL, 1 },
	+};
	+
	+int
	+main(int argc, char *argv[])
	+{
	+ char *vmname;
	+ int error, ch;
	+ struct vmctx *ctx;
	+
	+ vmname = NULL;
	+ progname = basename(argv[0]);
	+
	+ while ((ch = getopt_long(argc, argv, "", opts, NULL)) != -1) {
	+ switch (ch) {
	+ case 0:
	+ break;
	+ case VMNAME:
	+ vmname = optarg;
	+ break;
	+ default:
	+ usage();
	+ }
	+ }
	+ argc -= optind;
	+ argv += optind;
	+
	+ if (vmname == NULL)
	+ usage();
	+
	+ error = 0;
	+ if (!error && create)
	+ error = vm_create(vmname);
	+ if (!error) {
	+ ctx = vm_open(vmname);
	+ if (ctx == NULL) {
	+ printf("VM:%s is not created.\n", vmname);
	+ exit(1);
	+ }
	+ }
	+
	+
	+ if (error)
	+ printf("errno = %d\n", errno);
	+
	+ if (!error && destroy)
	+ vm_destroy(ctx);
	+
	+ exit(error);
	+}
	Index: usr.sbin/bhyveload/Makefile
	===================================================================
	--- usr.sbin/bhyveload/Makefile
	+++ usr.sbin/bhyveload/Makefile
	@@ -1,14 +1,17 @@
	# $FreeBSD$

	PROG= bhyveload
	-SRCS= bhyveload.c
	-MAN= bhyveload.8
	PACKAGE= bhyve

	+BHYVELOAD_SYSDIR?=${SRCTOP}
	+BHYVELOAD_SRCTOP?=${.CURDIR}
	+
	LIBADD= vmmapi

	WARNS?= 3

	CFLAGS+=-I${SRCTOP}/stand/userboot

	+.include "${BHYVELOAD_SRCTOP}/${MACHINE}/Makefile.inc"
	+
	.include <bsd.prog.mk>
	Index: usr.sbin/bhyveload/amd64/Makefile.inc
	===================================================================
	--- /dev/null
	+++ usr.sbin/bhyveload/amd64/Makefile.inc
	@@ -0,0 +1,7 @@
	+# $FreeBSD$
	+.PATH: ${BHYVELOAD_SRCTOP}/amd64/
	+
	+SRCS= bhyveload.c
	+MAN= bhyveload.8
	+
	+CFLAGS+=-I${SRCTOP}/sys/boot/userboot
	Index: usr.sbin/bhyveload/arm64/Makefile.inc
	===================================================================
	--- /dev/null
	+++ usr.sbin/bhyveload/arm64/Makefile.inc
	@@ -0,0 +1,13 @@
	+# $FreeBSD$
	+LIBADD+= util
	+
	+.PATH: ${BHYVELOAD_SRCTOP}/arm64/
	+
	+SRCS= bhyveload.c \
	+ boot.c
	+
	+.PATH: ${.CURDIR}/../../sys/arm64/vmm
	+
	+CFLAGS += -I${.CURDIR}/../../stand/common
	+
	+MK_MAN=no
	Index: usr.sbin/bhyveload/arm64/bhyveload.c
	===================================================================
	--- /dev/null
	+++ usr.sbin/bhyveload/arm64/bhyveload.c
	@@ -0,0 +1,404 @@
	+/*
	+ * Copyright (C) 2015 Mihai Carabas <mihai.carabas@gmail.com>
	+ * All rights reserved.
	+ *
	+ * Redistribution and use in source and binary forms, with or without
	+ * modification, are permitted provided that the following conditions
	+ * are met:
	+ * 1. Redistributions of source code must retain the above copyright
	+ * notice, this list of conditions and the following disclaimer.
	+ * 2. Redistributions in binary form must reproduce the above copyright
	+ * notice, this list of conditions and the following disclaimer in the
	+ * documentation and/or other materials provided with the distribution.
	+ *
	+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
	+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
	+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	+ * SUCH DAMAGE.
	+ */
	+
	+#include <sys/cdefs.h>
	+#include <sys/ioctl.h>
	+#include <sys/stat.h>
	+#include <sys/disk.h>
	+#include <sys/queue.h>
	+#include <sys/mman.h>
	+#include <sys/queue.h>
	+
	+#include <machine/vmm.h>
	+#include <machine/vmparam.h>
	+
	+#include <dirent.h>
	+#include <dlfcn.h>
	+#include <errno.h>
	+#include <err.h>
	+#include <fcntl.h>
	+#include <getopt.h>
	+#include <libgen.h>
	+#include <limits.h>
	+#include <stdio.h>
	+#include <stdlib.h>
	+#include <string.h>
	+#include <sysexits.h>
	+#include <termios.h>
	+#include <unistd.h>
	+#include <vmmapi.h>
	+
	+#include <libutil.h>
	+
	+#include "boot.h"
	+
	+#define gvatovm(addr) ((uint64_t)(addr) - KERNBASE + \
	+ kernel_load_address - memory_base_address)
	+#define overlap(x_start, x_end, y_start, y_end) \
	+ ((x_start) >= (y_start) && (x_start) < (y_end) \|\| \
	+ (x_end) >= (y_start) && (x_end) < (y_end))
	+
	+#define MB (1024 * 1024UL)
	+#define BSP 0
	+#define KERNEL_IMAGE_NAME_LEN 32
	+
	+#define GIC_V3_DIST_START 0x2f000000UL
	+#define GIC_V3_DIST_SIZE 0x10000UL
	+#define GIC_V3_REDIST_START 0x2f100000UL
	+#define GIC_V3_REDIST_SIZE 0x200000UL
	+
	+struct env {
	+ const char *str;
	+ SLIST_ENTRY(env) next;
	+};
	+static SLIST_HEAD(envhead, env) envhead;
	+
	+static uint64_t memory_base_address, kernel_load_address;
	+
	+static char vmname, progname;
	+static struct vmctx *ctx;
	+
	+static int
	+env_add(const char *str)
	+{
	+ struct env *env;
	+
	+ env = malloc(sizeof(*env));
	+ if (env == NULL)
	+ return (ENOMEM);
	+ env->str = str;
	+ SLIST_INSERT_HEAD(&envhead, env, next);
	+
	+ return (0);
	+}
	+
	+static int
	+env_tostr(char *envstrp, int envlen)
	+{
	+ struct env *env;
	+ int i;
	+
	+ *envlen = 0;
	+ SLIST_FOREACH(env, &envhead, next)
	+ envlen = envlen + strlen(env->str) + 1;
	+ /* Make room for the two terminating zeroes */
	+ if (*envlen == 0)
	+ *envlen = 2;
	+ else
	+ (*envlen)++;
	+
	+ envstrp = malloc(envlen * sizeof(char));
	+ if (*envstrp == NULL)
	+ return (ENOMEM);
	+
	+ i = 0;
	+ SLIST_FOREACH(env, &envhead, next) {
	+ strncpy(*envstrp + i, env->str, strlen(env->str));
	+ i += strlen(env->str);
	+ (*envstrp)[i++] = 0;
	+ }
	+ (*envstrp)[i] = 0;
	+
	+ /*
	+ * At this point we have envstr[0] == 0 if the environment is empty.
	+ * Add the second 0 to properly terminate the environment string.
	+ */
	+ if (SLIST_EMPTY(&envhead))
	+ (*envstrp)[1] = 0;
	+
	+ /*
	+ for (i = 0; i < *envlen; i++)
	+ printf("%d ", (int)(*envstrp)[i]);
	+ printf("\n");
	+ */
	+
	+ return (0);
	+}
	+
	+/*
	+ * Guest virtual machinee
	+ */
	+static int
	+guest_copyin(const void *from, uint64_t to, size_t size)
	+{
	+ char *ptr;
	+ ptr = vm_map_ipa(ctx, to, size);
	+ if (ptr == NULL)
	+ return (EFAULT);
	+
	+ memcpy(ptr, from, size);
	+ return (0);
	+}
	+
	+static int
	+guest_copyout(uint64_t from, void *to, size_t size)
	+{
	+ char *ptr;
	+
	+ ptr = vm_map_ipa(ctx, from, size);
	+ if (ptr == NULL)
	+ return (EFAULT);
	+
	+ memcpy(to, ptr, size);
	+ return (0);
	+}
	+
	+static void
	+guest_setreg(enum vm_reg_name vmreg, uint64_t v)
	+{
	+ int error;
	+
	+ error = vm_set_register(ctx, BSP, vmreg, v);
	+ if (error)
	+ perror("vm_set_register");
	+}
	+
	+#if 0
	+static int
	+parse_memsize(const char optarg, size_t ret_memsize)
	+{
	+ char *endptr;
	+ size_t optval;
	+ int error;
	+
	+ optval = strtoul(optarg, &endptr, 0);
	+ if (optarg != '\0' && endptr == '\0') {
	+ /* Memory size must be at least one megabyte. */
	+ if (optval < MB)
	+ optval = optval * MB;
	+ *ret_memsize = optval;
	+ error = 0;
	+ } else {
	+ error = expand_number(optarg, ret_memsize);
	+ }
	+
	+ return (error);
	+}
	+#endif
	+
	+static void
	+usage(int code)
	+{
	+ fprintf(stderr,
	+ "Usage: %s [-h] [-k <kernel-image>] [-e <name=value>] [-b base-address]\n"
	+ " %*s [-m mem-size] [-l load-address] <vmname>\n"
	+ " -k: path to guest kernel image\n"
	+ " -e: guest boot environment\n"
	+ " -b: memory base address\n"
	+ " -m: memory size\n"
	+ " -l: kernel load address in the guest physical memory\n"
	+ " -h: help\n",
	+ progname, (int)strlen(progname), "");
	+ exit(code);
	+}
	+
	+int
	+main(int argc, char** argv)
	+{
	+ struct vm_bootparams bootparams;
	+ uint64_t mem_size;
	+ int opt, error;
	+ int kernel_image_fd;
	+ uint64_t periphbase;
	+ char kernel_image_name[KERNEL_IMAGE_NAME_LEN];
	+ struct stat st;
	+ void *addr;
	+ char *envstr;
	+ int envlen;
	+
	+ progname = basename(argv[0]);
	+
	+ mem_size = 128 * MB;
	+ memory_base_address = VM_GUEST_BASE_IPA;
	+ kernel_load_address = memory_base_address;
	+ periphbase = 0x2c000000UL;
	+ strncpy(kernel_image_name, "kernel.bin", KERNEL_IMAGE_NAME_LEN);
	+ memset(&bootparams, 0, sizeof(struct vm_bootparams));
	+
	+ while ((opt = getopt(argc, argv, "hk:l:b:m:e:")) != -1) {
	+ switch (opt) {
	+ case 'k':
	+ strncpy(kernel_image_name, optarg, KERNEL_IMAGE_NAME_LEN);
	+ break;
	+ case 'l':
	+ kernel_load_address = strtoul(optarg, NULL, 0);
	+ break;
	+ case 'b':
	+ memory_base_address = strtoul(optarg, NULL, 0);
	+ break;
	+ case 'm':
	+ error = vm_parse_memsize(optarg, &mem_size);
	+ if (error) {
	+ fprintf(stderr, "Invalid memsize '%s'\n", optarg);
	+ exit(1);
	+ }
	+ break;
	+ case 'e':
	+ error = env_add(optarg);
	+ if (error) {
	+ perror("env_add");
	+ exit(1);
	+ }
	+ break;
	+ case 'h':
	+ usage(0);
	+ default:
	+ fprintf(stderr, "Unknown argument '%c'\n", opt);
	+ usage(1);
	+ }
	+ }
	+
	+ argc -= optind;
	+ argv += optind;
	+
	+ if (argc != 1) {
	+ fprintf(stderr, "Missing or unknown arguments\n");
	+ usage(1);
	+ }
	+
	+ if (kernel_load_address < memory_base_address) {
	+ fprintf(stderr, "Kernel load address is below memory base address\n");
	+ exit(1);
	+ }
	+
	+ vmname = argv[0];
	+
	+ kernel_image_fd = open(kernel_image_name, O_RDONLY);
	+ if (kernel_image_fd == -1) {
	+ perror("open kernel_image_name");
	+ exit(1);
	+ }
	+
	+ error = vm_create(vmname);
	+ if (error) {
	+ perror("vm_create");
	+ exit(1);
	+ }
	+
	+ ctx = vm_open(vmname);
	+ if (ctx == NULL) {
	+ perror("vm_open");
	+ exit(1);
	+ }
	+
	+ error = vm_setup_memory(ctx, memory_base_address, mem_size, VM_MMAP_ALL);
	+ if (error) {
	+ perror("vm_setup_memory");
	+ exit(1);
	+ }
	+
	+ error = fstat(kernel_image_fd, &st);
	+ if (error) {
	+ perror("fstat");
	+ exit(1);
	+ }
	+
	+ if ((uint64_t)st.st_size > mem_size) {
	+ fprintf(stderr, "Kernel image larger than memory size\n");
	+ exit(1);
	+ }
	+ if (kernel_load_address + st.st_size >= memory_base_address + mem_size) {
	+ fprintf(stderr, "Kernel image out of bounds of guest memory\n");
	+ exit(1);
	+ }
	+
	+ addr = mmap(NULL, st.st_size, PROT_READ, MAP_SHARED, kernel_image_fd, 0);
	+ if (addr == MAP_FAILED) {
	+ perror("mmap kernel_image_fd");
	+ exit(1);
	+ }
	+
	+ if (guest_copyin(addr, kernel_load_address - memory_base_address, st.st_size) != 0) {
	+ perror("guest_copyin");
	+ exit(1);
	+ }
	+
	+ error = env_tostr(&envstr, &envlen);
	+ if (error) {
	+ perror("parse boot environment\n");
	+ exit(1);
	+ }
	+
	+ bootparams.envstr = envstr;
	+ bootparams.envlen = envlen;
	+ error = parse_kernel(addr, st.st_size, ctx, &bootparams);
	+ if (error) {
	+ fprintf(stderr, "Error parsing image\n");
	+ exit(1);
	+ }
	+
	+ /*
	+ fprintf(stderr, "bootparams.envp_gva = 0x%016lx\n", bootparams.envp_gva);
	+ fprintf(stderr, "gvatom(bootparams.envp_gva) = 0x%016lx\n", gvatovm(bootparams.envp_gva));
	+ fprintf(stderr, "vm_map_ipa() = 0x%016lx\n", (uint64_t)vm_map_ipa(ctx, gvatovm(bootparams.envp_gva), PAGE_SIZE));
	+ fprintf(stderr, "\n");
	+
	+ fprintf(stderr, "bootparams.mudulep_gva = 0x%016lx\n", bootparams.modulep_gva);
	+ fprintf(stderr, "gvatom(bootparams.modulep_gva) = 0x%016lx\n", gvatovm(bootparams.modulep_gva));
	+ fprintf(stderr, "vm_map_ipa() = 0x%016lx\n", (uint64_t)vm_map_ipa(ctx, gvatovm(bootparams.modulep_gva), PAGE_SIZE));
	+ fprintf(stderr, "\n");
	+ */
	+
	+ /* Copy the environment string in the guest memory */
	+ if (guest_copyin((void *)envstr, gvatovm(bootparams.envp_gva), envlen) != 0) {
	+ perror("guest_copyin");
	+ exit(1);
	+ }
	+
	+ /* Copy the module data in the guest memory */
	+ if (guest_copyin(bootparams.modulep, gvatovm(bootparams.modulep_gva), bootparams.module_len) != 0) {
	+ perror("guest_copyin");
	+ exit(1);
	+ }
	+
	+ uint64_t mem_end = memory_base_address + mem_size;
	+ uint64_t dist_end = GIC_V3_DIST_START + GIC_V3_DIST_SIZE;
	+ uint64_t redist_end = GIC_V3_REDIST_START + GIC_V3_REDIST_SIZE;
	+
	+ if (overlap(GIC_V3_DIST_SIZE, dist_end, memory_base_address, mem_end)) {
	+ fprintf(stderr, "Guest memory overlaps with VGIC Distributor\n");
	+ exit(1);
	+ }
	+
	+ if (overlap(GIC_V3_REDIST_SIZE, redist_end, memory_base_address, mem_end)) {
	+ fprintf(stderr, "Guest memory overlaps with VGIC Redistributor\n");
	+ exit(1);
	+ }
	+
	+ error = vm_attach_vgic(ctx, GIC_V3_DIST_START, GIC_V3_DIST_SIZE,
	+ GIC_V3_REDIST_START, GIC_V3_REDIST_SIZE);
	+ if (error) {
	+ fprintf(stderr, "Error attaching VGIC to the virtual machine\n");
	+ exit(1);
	+ }
	+
	+ munmap(addr, st.st_size);
	+
	+ guest_setreg(VM_REG_ELR_EL2, kernel_load_address + bootparams.entry_off);
	+ guest_setreg(VM_REG_GUEST_X0, bootparams.modulep_gva);
	+
	+ return 0;
	+}
	Index: usr.sbin/bhyveload/arm64/boot.h
	===================================================================
	--- /dev/null
	+++ usr.sbin/bhyveload/arm64/boot.h
	@@ -0,0 +1,45 @@
	+/*
	+ * Copyright (C) 2017 Alexandru Elisei <alexandru.elisei@gmail.com>
	+ * All rights reserved.
	+ *
	+ * Redistribution and use in source and binary forms, with or without
	+ * modification, are permitted provided that the following conditions
	+ * are met:
	+ * 1. Redistributions of source code must retain the above copyright
	+ * notice, this list of conditions and the following disclaimer.
	+ * 2. Redistributions in binary form must reproduce the above copyright
	+ * notice, this list of conditions and the following disclaimer in the
	+ * documentation and/or other materials provided with the distribution.
	+ *
	+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
	+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
	+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	+ * SUCH DAMAGE.
	+ */
	+
	+#ifndef _BOOT_H_
	+#define _BOOT_H_
	+
	+
	+
	+struct vm_bootparams {
	+ uint64_t entry_off;
	+ uint64_t modulep_gva; /* Guest virtual address of modulep data */
	+ uint64_t envp_gva; /* Guest virtual address for env */
	+ char *envstr;
	+ int envlen;
	+ int module_len;
	+ void modulep; / Bhyveload address of modulep data */
	+};
	+
	+int parse_kernel(void addr, size_t img_size, struct vmctx ctx,
	+ struct vm_bootparams *bootparams);
	+
	+#endif
	Index: usr.sbin/bhyveload/arm64/boot.c
	===================================================================
	--- /dev/null
	+++ usr.sbin/bhyveload/arm64/boot.c
	@@ -0,0 +1,618 @@
	+/- Copyright (c) 2017 Alexandru Elisei <alexandru.elisei@gmail.com>
	+ * All rights reserved.
	+ *
	+ * Redistribution and use in source and binary forms, with or without
	+ * modification, are permitted provided that the following conditions
	+ * are met:
	+ * 1. Redistributions of source code must retain the above copyright
	+ * notice, this list of conditions and the following disclaimer.
	+ * 2. Redistributions in binary form must reproduce the above copyright
	+ * notice, this list of conditions and the following disclaimer in the
	+ * documentation and/or other materials provided with the distribution.
	+ *
	+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
	+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
	+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	+ * SUCH DAMAGE.
	+ */
	+#include <sys/elf.h>
	+#include <sys/param.h>
	+#include <sys/queue.h>
	+#include <sys/linker.h>
	+#include <sys/elf_generic.h>
	+#include <sys/module.h>
	+#include <sys/errno.h>
	+
	+#include <machine/vmm.h>
	+#include <machine/vmparam.h>
	+#include <bootstrap.h>
	+
	+#include <stdio.h>
	+#include <stdlib.h>
	+#include <stdint.h>
	+#include <string.h>
	+#include <vmmapi.h>
	+
	+#include "boot.h"
	+
	+#define gvatou(gva, addr) ((vm_offset_t)(gva) - KERNBASE + (vm_offset_t)(addr))
	+
	+struct elf_file {
	+ Elf_Phdr *ph;
	+ Elf_Ehdr *ehdr;
	+ Elf_Sym *symtab;
	+ Elf_Hashelt *hashtab;
	+ Elf_Hashelt nbuckets;
	+ Elf_Hashelt nchains;
	+ Elf_Hashelt *buckets;
	+ Elf_Hashelt *chains;
	+ Elf_Rel *rel;
	+ size_t relsz;
	+ Elf_Rela *rela;
	+ size_t relasz;
	+ char *strtab;
	+ size_t strsz;
	+ caddr_t firstpage_u; /* Userspace address of mmap'ed guest kernel */
	+};
	+
	+static uint64_t parse_image(struct preloaded_file img, struct elf_file ef);
	+static void image_addmetadata(struct preloaded_file *img, int type,
	+ size_t size, void *addr);
	+static int image_addmodule(struct preloaded_file img, char modname, int version);
	+static void parse_metadata(struct preloaded_file img, struct elf_file ef,
	+ Elf_Addr p_startu, Elf_Addr p_endu);
	+static int lookup_symbol(struct elf_file ef, const char name, Elf_Sym *symp);
	+static struct kernel_module image_findmodule(struct preloaded_file img, char *modname,
	+ struct mod_depend *verinfo);
	+static uint64_t moddata_len(struct preloaded_file *img);
	+static void moddata_copy(vm_offset_t dest, struct preloaded_file *img);
	+
	+static int
	+load_elf_header(struct elf_file *ef)
	+{
	+ Elf_Ehdr *ehdr;
	+
	+ ehdr = ef->ehdr = (Elf_Ehdr *)ef->firstpage_u;
	+ /* Is it ELF? */
	+ if (!IS_ELF(*ehdr))
	+ return (EFTYPE);
	+
	+ if (ehdr->e_ident[EI_CLASS] != ELF_TARG_CLASS \|\|/* Layout ? */
	+ ehdr->e_ident[EI_DATA] != ELF_TARG_DATA \|\|
	+ ehdr->e_ident[EI_VERSION] != EV_CURRENT \|\| /* Version ? */
	+ ehdr->e_version != EV_CURRENT \|\|
	+ ehdr->e_machine != ELF_TARG_MACH) /* Machine ? */
	+ return (EFTYPE);
	+
	+ return (0);
	+}
	+
	+static caddr_t
	+preload_search_by_type(const char *type, caddr_t preload_metadata)
	+{
	+ caddr_t curp, lname;
	+ uint32_t *hdr;
	+ int next;
	+
	+ if (preload_metadata != NULL) {
	+
	+ curp = preload_metadata;
	+ lname = NULL;
	+ for (;;) {
	+ hdr = (uint32_t *)curp;
	+ if (hdr[0] == 0 && hdr[1] == 0)
	+ break;
	+
	+ /* remember the start of each record */
	+ if (hdr[0] == MODINFO_NAME)
	+ lname = curp;
	+
	+ /* Search for a MODINFO_TYPE field */
	+ if ((hdr[0] == MODINFO_TYPE) &&
	+ !strcmp(type, curp + sizeof(uint32_t) * 2))
	+ return(lname);
	+
	+ /* skip to next field */
	+ next = sizeof(uint32_t) * 2 + hdr[1];
	+ next = roundup(next, sizeof(u_long));
	+ curp += next;
	+ }
	+ }
	+ return(NULL);
	+}
	+
	+int
	+parse_kernel(void addr, size_t img_size, struct vmctx ctx,
	+ struct vm_bootparams *bootparams)
	+{
	+ struct elf_file ef;
	+ struct preloaded_file img;
	+ Elf_Ehdr *ehdr_u;
	+ int err;
	+ vm_offset_t lastaddr_gva;
	+ uint64_t kernend;
	+ uint64_t size;
	+ uint64_t modlen;
	+ int boothowto;
	+
	+ //fprintf(stderr, "[PARSE_KERNEL]\n\n");
	+
	+ memset(&ef, 0, sizeof(struct elf_file));
	+ memset(&img, 0, sizeof(struct preloaded_file));
	+
	+ ef.firstpage_u = (caddr_t)addr;
	+ err = load_elf_header(&ef);
	+ if (err != 0)
	+ return (err);
	+
	+ ehdr_u = ef.ehdr;
	+ if (ehdr_u->e_type != ET_EXEC) {
	+ fprintf(stderr, "Image not a kernel\n");
	+ return (EPERM);
	+ }
	+ img.f_name = "elf kernel";
	+ img.f_type = "elf kernel";
	+ img.f_size = img_size;
	+
	+ size = parse_image(&img, &ef);
	+ if (size == 0)
	+ return (ENOEXEC);
	+ bootparams->entry_off = ehdr_u->e_entry - KERNBASE;
	+
	+ image_addmetadata(&img, MODINFOMD_ELFHDR, sizeof(*ehdr_u), ehdr_u);
	+
	+ /* XXX: Add boothowto options? */
	+ boothowto = 0;
	+ image_addmetadata(&img, MODINFOMD_HOWTO, sizeof(boothowto), &boothowto);
	+
	+ lastaddr_gva = roundup(img.f_addr + img.f_size + 0x3fd000, PAGE_SIZE);
	+ image_addmetadata(&img, MODINFOMD_ENVP, sizeof(lastaddr_gva), &lastaddr_gva);
	+ bootparams->envp_gva = lastaddr_gva;
	+
	+ lastaddr_gva = roundup(lastaddr_gva + bootparams->envlen, PAGE_SIZE);
	+ /* Module data start in the guest kernel virtual address space */
	+ bootparams->modulep_gva = lastaddr_gva;
	+
	+ modlen = moddata_len(&img);
	+ kernend = roundup(bootparams->modulep_gva + modlen, PAGE_SIZE);
	+ image_addmetadata(&img, MODINFOMD_KERNEND, sizeof(kernend), &kernend);
	+
	+ bootparams->module_len = roundup(modlen, PAGE_SIZE);
	+ bootparams->modulep = calloc(1, bootparams->module_len);
	+ if (bootparams->modulep == NULL) {
	+ perror("calloc");
	+ return (ENOMEM);
	+ }
	+
	+ moddata_copy((vm_offset_t)bootparams->modulep, &img);
	+
	+ return (0);
	+}
	+
	+static uint64_t
	+parse_image(struct preloaded_file img, struct elf_file ef)
	+{
	+ Elf_Ehdr *ehdr;
	+ Elf_Phdr *phdr;
	+ Elf_Phdr *php;
	+ Elf_Shdr *shdr;
	+ Elf_Dyn *dp;
	+ Elf_Addr adp;
	+ Elf_Addr ctors;
	+ Elf_Addr ssym, esym;
	+ Elf_Addr p_start, p_end;
	+ Elf_Size size;
	+ Elf_Sym sym;
	+ vm_offset_t firstaddr, lastaddr;
	+ vm_offset_t shstr_addr;
	+ char *shstr;
	+ int symstrindex;
	+ int symtabindex;
	+ size_t chunk_len;
	+ uint64_t ret;
	+ int ndp;
	+ int i;
	+ unsigned int j;
	+
	+ dp = NULL;
	+ shdr = NULL;
	+ ret = 0;
	+
	+ ehdr = ef->ehdr;
	+ phdr = (Elf_Phdr *)(ef->firstpage_u + ehdr->e_phoff);
	+
	+ firstaddr = lastaddr = 0;
	+ for (i = 0; i < ehdr->e_phnum; i++) {
	+ if (phdr[i].p_type != PT_LOAD)
	+ continue;
	+ if (firstaddr == 0 \|\| firstaddr > phdr[i].p_vaddr)
	+ firstaddr = phdr[i].p_vaddr;
	+ /* We mmap'ed the kernel, so p_memsz == p_filesz. */
	+ if (lastaddr == 0 \|\| lastaddr < (phdr[i].p_vaddr + phdr[i].p_filesz))
	+ lastaddr = phdr[i].p_vaddr + phdr[i].p_filesz;
	+ }
	+ lastaddr = roundup(lastaddr, sizeof(long));
	+
	+ /*
	+ * Get the section headers. We need this for finding the .ctors
	+ * section as well as for loading any symbols. Both may be hard
	+ * to do if reading from a .gz file as it involves seeking. I
	+ * think the rule is going to have to be that you must strip a
	+ * file to remove symbols before gzipping it.
	+ */
	+ chunk_len = ehdr->e_shnum * ehdr->e_shentsize;
	+ if (chunk_len == 0 \|\| ehdr->e_shoff == 0)
	+ goto nosyms;
	+ shdr = (Elf_Shdr *)(ef->firstpage_u + ehdr->e_shoff);
	+ image_addmetadata(img, MODINFOMD_SHDR, chunk_len, shdr);
	+
	+ /*
	+ * Read the section string table and look for the .ctors section.
	+ * We need to tell the kernel where it is so that it can call the
	+ * ctors.
	+ */
	+ chunk_len = shdr[ehdr->e_shstrndx].sh_size;
	+ if (chunk_len > 0) {
	+ shstr_addr = (vm_offset_t)(ef->firstpage_u + \
	+ shdr[ehdr->e_shstrndx].sh_offset);
	+ shstr = malloc(chunk_len);
	+ memcpy(shstr, (void *)shstr_addr, chunk_len);
	+ for (i = 0; i < ehdr->e_shnum; i++) {
	+ if (strcmp(shstr + shdr[i].sh_name, ".ctors") != 0)
	+ continue;
	+ ctors = shdr[i].sh_addr;
	+ image_addmetadata(img, MODINFOMD_CTORS_ADDR,
	+ sizeof(ctors), &ctors);
	+ size = shdr[i].sh_size;
	+ image_addmetadata(img, MODINFOMD_CTORS_SIZE,
	+ sizeof(size), &size);
	+ break;
	+ }
	+ free(shstr);
	+ }
	+
	+ /*
	+ * Now load any symbols.
	+ */
	+ symtabindex = -1;
	+ symstrindex = -1;
	+ for (i = 0; i < ehdr->e_shnum; i++) {
	+ if (shdr[i].sh_type != SHT_SYMTAB)
	+ continue;
	+ for (j = 0; j < ehdr->e_phnum; j++) {
	+ if (phdr[j].p_type != PT_LOAD)
	+ continue;
	+ if (shdr[i].sh_offset >= phdr[j].p_offset &&
	+ (shdr[i].sh_offset + shdr[i].sh_size <=
	+ phdr[j].p_offset + phdr[j].p_filesz)) {
	+ shdr[i].sh_offset = 0;
	+ shdr[i].sh_size = 0;
	+ break;
	+ }
	+ }
	+ if (shdr[i].sh_offset == 0 \|\| shdr[i].sh_size == 0)
	+ continue; /* alread loaded in a PT_LOAD above */
	+ /* Save it for loading below */
	+ symtabindex = i;
	+ symstrindex = shdr[i].sh_link;
	+ }
	+ if (symtabindex < 0 \|\| symstrindex < 0)
	+ goto nosyms;
	+
	+ ssym = lastaddr;
	+ i = symtabindex;
	+ for (;;) {
	+ size = shdr[i].sh_size;
	+ lastaddr += sizeof(size);
	+ lastaddr += shdr[i].sh_size;
	+ lastaddr = roundup(lastaddr, sizeof(size));
	+
	+ if (i == symtabindex)
	+ i = symstrindex;
	+ else if (i == symstrindex)
	+ break;
	+ }
	+ esym = lastaddr;
	+
	+ image_addmetadata(img, MODINFOMD_SSYM, sizeof(ssym), &ssym);
	+ image_addmetadata(img, MODINFOMD_ESYM, sizeof(esym), &esym);
	+
	+nosyms:
	+ ret = lastaddr - firstaddr;
	+ img->f_addr = firstaddr;
	+
	+ php = NULL;
	+ for (i = 0; i < ehdr->e_phnum; i++) {
	+ if (phdr[i].p_type == PT_DYNAMIC) {
	+ php = &phdr[i];
	+ adp = php->p_vaddr;
	+ image_addmetadata(img, MODINFOMD_DYNAMIC,
	+ sizeof(adp), &adp);
	+ break;
	+ }
	+ }
	+ if (php == NULL)
	+ goto out;
	+ ndp = php->p_filesz / sizeof(Elf_Dyn);
	+ if (ndp == 0)
	+ goto out;
	+
	+ ef->strsz = 0;
	+ dp = (Elf_Dyn *)(ef->firstpage_u + php->p_offset);
	+ for (i = 0; i < ndp; i++) {
	+ if (dp[i].d_tag == 0)
	+ break;
	+ switch(dp[i].d_tag) {
	+ case DT_HASH:
	+ ef->hashtab = (Elf_Hashelt *)(uintptr_t)dp[i].d_un.d_ptr;
	+ break;
	+ case DT_STRTAB:
	+ ef->strtab = (char *)(uintptr_t)dp[i].d_un.d_ptr;
	+ case DT_STRSZ:
	+ ef->strsz = dp[i].d_un.d_val;
	+ break;
	+ case DT_SYMTAB:
	+ ef->symtab = (Elf_Sym *)(uintptr_t)dp[i].d_un.d_ptr;
	+ break;
	+ case DT_REL:
	+ ef->rel = (Elf_Rel *)(uintptr_t)dp[i].d_un.d_ptr;
	+ break;
	+ case DT_RELSZ:
	+ ef->relsz = dp[i].d_un.d_val;
	+ break;
	+ case DT_RELA:
	+ ef->rela = (Elf_Rela *)(uintptr_t)dp[i].d_un.d_ptr;
	+ break;
	+ case DT_RELASZ:
	+ ef->relasz = dp[i].d_un.d_val;
	+ break;
	+ }
	+ }
	+ if (ef->hashtab == NULL \|\| ef->symtab == NULL \|\|
	+ ef->strtab == NULL \|\| ef->strsz == 0)
	+ goto out;
	+
	+ memcpy(&ef->nbuckets, (void *)gvatou(ef->hashtab, ef->firstpage_u), sizeof(ef->nbuckets));
	+ memcpy(&ef->nchains, (void *)gvatou(ef->hashtab + 1, ef->firstpage_u), sizeof(ef->nchains));
	+ ef->buckets = (Elf_Hashelt *)gvatou(ef->hashtab + 2, ef->firstpage_u);
	+ ef->chains = ef->buckets + ef->nbuckets;
	+
	+ if (lookup_symbol(ef, "__start_set_modmetadata_set", &sym) != 0) {
	+ ret = 0;
	+ goto out;
	+ }
	+ p_start = gvatou(sym.st_value, ef->firstpage_u);
	+ if (lookup_symbol(ef, "__stop_set_modmetadata_set", &sym) != 0) {
	+ ret = ENOENT;
	+ goto out;
	+ }
	+ p_end = gvatou(sym.st_value, ef->firstpage_u);
	+ parse_metadata(img, ef, p_start, p_end);
	+
	+out:
	+ return ret;
	+}
	+
	+static uint64_t
	+moddata_len(struct preloaded_file *img)
	+{
	+ struct file_metadata *md;
	+ uint64_t len;
	+
	+ /* Count the kernel image name */
	+ len = 8 + roundup(strlen(img->f_name) + 1, sizeof(uint64_t));
	+ /* Count the kernel's type */
	+ len += 8 + roundup(strlen(img->f_type) + 1, sizeof(uint64_t));
	+ /* Count the kernel's virtual address */
	+ len += 8 + roundup(sizeof(img->f_addr), sizeof(uint64_t));
	+ /* Count the kernel's size */
	+ len += 8 + roundup(sizeof(img->f_size), sizeof(uint64_t));
	+ /* Count the metadata size */
	+ for (md = img->f_metadata; md != NULL; md = md->md_next)
	+ len += 8 + roundup(md->md_size, sizeof(uint64_t));
	+
	+ return len;
	+}
	+
	+#define COPY32(dest, what) \
	+ do { \
	+ uint32_t w = (what); \
	+ memcpy((void *)dest, &w, sizeof(w)); \
	+ dest += sizeof(w); \
	+ } while (0)
	+
	+#define COPY_MODINFO(modinfo, dest, val, len) \
	+ do { \
	+ COPY32(dest, modinfo); \
	+ COPY32(dest, len); \
	+ memcpy((void *)dest, val, len); \
	+ dest += roundup(len, sizeof(uint64_t)); \
	+ } while (0)
	+
	+#define COPY_MODEND(dest) \
	+ do { \
	+ COPY32(dest, MODINFO_END); \
	+ COPY32(dest, 0); \
	+ } while (0);
	+
	+static void
	+moddata_copy(vm_offset_t dest, struct preloaded_file *img)
	+{
	+ struct file_metadata *md;
	+
	+ COPY_MODINFO(MODINFO_NAME, dest, img->f_name, strlen(img->f_name) + 1);
	+ COPY_MODINFO(MODINFO_TYPE, dest, img->f_type, strlen(img->f_type) + 1);
	+ COPY_MODINFO(MODINFO_ADDR, dest, &img->f_addr, sizeof(img->f_addr));
	+ COPY_MODINFO(MODINFO_SIZE, dest, &img->f_size, sizeof(img->f_size));
	+
	+ for (md = img->f_metadata; md != NULL; md = md->md_next)
	+ COPY_MODINFO(MODINFO_METADATA \| md->md_type, dest,
	+ md->md_data, md->md_size);
	+
	+ COPY_MODEND(dest);
	+}
	+
	+static void
	+image_addmetadata(struct preloaded_file *img, int type,
	+ size_t size, void *addr)
	+{
	+ struct file_metadata *md;
	+
	+ md = malloc(sizeof(struct file_metadata) - sizeof(md->md_data) + size);
	+ md->md_size = size;
	+ md->md_type = type;
	+ memcpy(md->md_data, addr, size);
	+ md->md_next = img->f_metadata;
	+ img->f_metadata = md;
	+}
	+
	+static uint64_t
	+elf_hash(const char *name)
	+{
	+ const unsigned char p = (const unsigned char )name;
	+ uint64_t h;
	+ uint64_t g;
	+
	+ h = 0;
	+ while (*p != '\0') {
	+ h = (h << 4) + *p++;
	+ if ((g = h & 0xf0000000) != 0)
	+ h ^= g >> 24;
	+ h &= ~g;
	+ }
	+
	+ return h;
	+}
	+
	+static int
	+lookup_symbol(struct elf_file ef, const char name, Elf_Sym *symp)
	+{
	+ Elf_Hashelt symnum;
	+ Elf_Sym sym;
	+ char *strp;
	+ uint64_t hash;
	+
	+ hash = elf_hash(name);
	+ memcpy(&symnum, &ef->buckets[hash % ef->nbuckets], sizeof(symnum));
	+
	+ while (symnum != STN_UNDEF) {
	+ if (symnum >= ef->nchains) {
	+ fprintf(stderr, "lookup_symbol: corrupt symbol table\n");
	+ return ENOENT;
	+ }
	+
	+ memcpy(&sym, (void *)gvatou(ef->symtab + symnum, ef->firstpage_u), sizeof(sym));
	+ if (sym.st_name == 0) {
	+ fprintf(stderr, "lookup_symbol: corrupt symbol table\n");
	+ return ENOENT;
	+ }
	+
	+ strp = strdup((char *)gvatou(ef->strtab + sym.st_name, ef->firstpage_u));
	+ if (strcmp(name, strp) == 0) {
	+ free(strp);
	+ if (sym.st_shndx != SHN_UNDEF \|\|
	+ (sym.st_value != 0 &&
	+ ELF_ST_TYPE(sym.st_info) == STT_FUNC)) {
	+ *symp = sym;
	+ return 0;
	+ }
	+ return ENOENT;
	+ }
	+ free(strp);
	+ memcpy(&symnum, &ef->chains[symnum], sizeof(symnum));
	+ }
	+
	+ return ENOENT;
	+}
	+
	+static void
	+parse_metadata(struct preloaded_file img, struct elf_file ef,
	+ Elf_Addr p_startu, Elf_Addr p_endu)
	+{
	+ struct mod_metadata md;
	+ struct mod_version mver;
	+ char *s;
	+ int modcnt;
	+ Elf_Addr v, p;
	+
	+ modcnt = 0;
	+ for (p = p_startu; p < p_endu; p += sizeof(Elf_Addr)) {
	+ memcpy(&v, (void *)p, sizeof(v));
	+ memcpy(&md, (void *)gvatou(v, ef->firstpage_u), sizeof(md));
	+ if (md.md_type == MDT_VERSION) {
	+ s = strdup((char *)gvatou(md.md_cval, ef->firstpage_u));
	+ memcpy(&mver,
	+ (void *)gvatou(md.md_data, ef->firstpage_u),
	+ sizeof(mver));
	+ image_addmodule(img, s, mver.mv_version);
	+ free(s);
	+ modcnt++;
	+ }
	+ }
	+
	+ if (modcnt == 0) {
	+ image_addmodule(img, "kernel", 1);
	+ free(s);
	+ }
	+}
	+
	+static int
	+image_addmodule(struct preloaded_file img, char modname, int version)
	+{
	+ struct kernel_module *mp;
	+ struct mod_depend mdepend;
	+
	+ bzero(&mdepend, sizeof(mdepend));
	+ mdepend.md_ver_preferred = version;
	+
	+ mp = image_findmodule(img, modname, &mdepend);
	+ if (mp)
	+ return (EEXIST);
	+ mp = malloc(sizeof(struct kernel_module));
	+ if (mp == NULL)
	+ return (ENOMEM);
	+
	+ bzero(mp, sizeof(struct kernel_module));
	+ mp->m_name = strdup(modname);
	+ mp->m_version = version;
	+ mp->m_fp = img;
	+ mp->m_next = img->f_modules;
	+ img->f_modules = mp;
	+
	+ return (0);
	+}
	+
	+static struct kernel_module *
	+image_findmodule(struct preloaded_file img, char modname,
	+ struct mod_depend *verinfo)
	+{
	+ struct kernel_module mp, best;
	+ int bestver, mver;
	+
	+ best = NULL;
	+ bestver = 0;
	+ for (mp = img->f_modules; mp != NULL; mp = mp->m_next) {
	+ if (strcmp(modname, mp->m_name) == 0) {
	+ if (verinfo == NULL)
	+ return (mp);
	+ mver = mp->m_version;
	+ if (mver == verinfo->md_ver_preferred)
	+ return (mp);
	+ if (mver >= verinfo->md_ver_minimum &&
	+ mver <= verinfo->md_ver_maximum &&
	+ mver > bestver) {
	+ best = mp;
	+ bestver = mver;
	+ }
	+ }
	+ }
	+
	+ return (best);
	+}

File Metadata

Mime Type: text/plain
Expires: Thu, Dec 25, 9:20 PM (6 h, 26 m)
Storage Engine: blob
Storage Format: Raw Data
Storage Handle: 27262670
Default Alt Text: D26976.id82061.diff (528 KB)

D26976.id82061.diffNo OneTemporaryActions

D26976.id82061.diffView Options

File Metadata

Event Timeline

D26976.id82061.diff
No OneTemporary
Actions

D26976.id82061.diff
View Options