D37428.id113252.diff
No OneTemporary
Actions

Size

293 KB

Referenced Files

None

Subscribers

None

D37428.id113252.diff
View Options

This file is larger than 256 KB, so syntax highlighting was skipped.

	diff --git a/sys/arm64/arm64/genassym.c b/sys/arm64/arm64/genassym.c
	--- a/sys/arm64/arm64/genassym.c
	+++ b/sys/arm64/arm64/genassym.c
	@@ -73,6 +73,7 @@

	ASSYM(TF_SIZE, sizeof(struct trapframe));
	ASSYM(TF_SP, offsetof(struct trapframe, tf_sp));
	+ASSYM(TF_LR, offsetof(struct trapframe, tf_lr));
	ASSYM(TF_ELR, offsetof(struct trapframe, tf_elr));
	ASSYM(TF_SPSR, offsetof(struct trapframe, tf_spsr));
	ASSYM(TF_X, offsetof(struct trapframe, tf_x));
	diff --git a/sys/arm64/arm64/identcpu.c b/sys/arm64/arm64/identcpu.c
	--- a/sys/arm64/arm64/identcpu.c
	+++ b/sys/arm64/arm64/identcpu.c
	@@ -104,8 +104,6 @@
	SYSCTL_STRING(_hw, HW_MODEL, model, CTLFLAG_RD,
	cpu_model, sizeof(cpu_model), "Machine model");

	-#define MAX_CACHES 8 /* Maximum number of caches supported
	- architecturally. */
	/*
	* Per-CPU affinity as provided in MPIDR_EL1
	* Indexed by CPU number in logical order selected by the system.
	@@ -119,32 +117,6 @@
	uint64_t __cpu_affinity[MAXCPU];
	static u_int cpu_aff_levels;

	-struct cpu_desc {
	- uint64_t mpidr;
	- uint64_t id_aa64afr0;
	- uint64_t id_aa64afr1;
	- uint64_t id_aa64dfr0;
	- uint64_t id_aa64dfr1;
	- uint64_t id_aa64isar0;
	- uint64_t id_aa64isar1;
	- uint64_t id_aa64isar2;
	- uint64_t id_aa64mmfr0;
	- uint64_t id_aa64mmfr1;
	- uint64_t id_aa64mmfr2;
	- uint64_t id_aa64pfr0;
	- uint64_t id_aa64pfr1;
	- uint64_t id_aa64zfr0;
	- uint64_t ctr;
	-#ifdef COMPAT_FREEBSD32
	- uint64_t id_isar5;
	- uint64_t mvfr0;
	- uint64_t mvfr1;
	-#endif
	- uint64_t clidr;
	- uint32_t ccsidr[MAX_CACHES][2]; /* 2 possible types. */
	- bool have_sve;
	-};
	-
	static struct cpu_desc cpu_desc[MAXCPU];
	static struct cpu_desc kern_cpu_desc;
	static struct cpu_desc user_cpu_desc;
	@@ -1824,6 +1796,27 @@
	}
	}

	+void
	+update_cpu_desc(struct cpu_desc *desc)
	+{
	+ struct mrs_field *fields;
	+ uint64_t desc_val, kern_val;
	+ int i, j;
	+
	+ for (i = 0; i < nitems(user_regs); i++) {
	+ kern_val = CPU_DESC_FIELD(kern_cpu_desc, i);
	+ desc_val = CPU_DESC_FIELD(*desc, i);
	+
	+ fields = user_regs[i].fields;
	+ for (j = 0; fields[j].type != 0; j++) {
	+ desc_val = update_lower_register(desc_val, kern_val,
	+ fields[j].shift, 4, fields[j].sign);
	+ }
	+
	+ CPU_DESC_FIELD(*desc, i) = desc_val;
	+ }
	+}
	+
	/* HWCAP */
	bool __read_frequently lse_supported = false;

	diff --git a/sys/arm64/include/armreg.h b/sys/arm64/include/armreg.h
	--- a/sys/arm64/include/armreg.h
	+++ b/sys/arm64/include/armreg.h
	@@ -503,6 +503,14 @@
	#define ID_AA64DFR0_TraceFilt_NONE (UL(0x0) << ID_AA64DFR0_TraceFilt_SHIFT)
	#define ID_AA64DFR0_TraceFilt_8_4 (UL(0x1) << ID_AA64DFR0_TraceFilt_SHIFT)

	+/* ID_AA64DFR1_EL1 */
	+#define ID_AA64DFR1_EL1 MRS_REG(ID_AA64DFR0_EL1)
	+#define ID_AA64DFR1_EL1_op0 3
	+#define ID_AA64DFR1_EL1_op1 0
	+#define ID_AA64DFR1_EL1_CRn 0
	+#define ID_AA64DFR1_EL1_CRm 5
	+#define ID_AA64DFR1_EL1_op2 1
	+
	/* ID_AA64ISAR0_EL1 */
	#define ID_AA64ISAR0_EL1 MRS_REG(ID_AA64ISAR0_EL1)
	#define ID_AA64ISAR0_EL1_op0 0x3
	diff --git a/sys/arm64/include/cpu.h b/sys/arm64/include/cpu.h
	--- a/sys/arm64/include/cpu.h
	+++ b/sys/arm64/include/cpu.h
	@@ -178,6 +178,36 @@
	#define CPU_MATCH_ERRATA_CAVIUM_THUNDERX_1_1 0
	#endif

	+#define MAX_CACHES 8 /* Maximum number of caches supported
	+ architecturally. */
	+
	+struct cpu_desc {
	+ uint64_t mpidr;
	+ uint64_t id_aa64afr0;
	+ uint64_t id_aa64afr1;
	+ uint64_t id_aa64dfr0;
	+ uint64_t id_aa64dfr1;
	+ uint64_t id_aa64isar0;
	+ uint64_t id_aa64isar1;
	+ uint64_t id_aa64isar2;
	+ uint64_t id_aa64mmfr0;
	+ uint64_t id_aa64mmfr1;
	+ uint64_t id_aa64mmfr2;
	+ uint64_t id_aa64pfr0;
	+ uint64_t id_aa64pfr1;
	+ uint64_t id_aa64zfr0;
	+ uint64_t ctr;
	+#ifdef COMPAT_FREEBSD32
	+ uint64_t id_isar5;
	+ uint64_t mvfr0;
	+ uint64_t mvfr1;
	+#endif
	+ uint64_t clidr;
	+ uint32_t ccsidr[MAX_CACHES][2]; /* 2 possible types. */
	+ bool have_sve;
	+};
	+
	+
	extern char btext[];
	extern char etext[];

	@@ -217,6 +247,7 @@

	/* Functions to read the sanitised view of the special registers */
	void update_special_regs(u_int);
	+void update_cpu_desc(struct cpu_desc *desc);
	bool extract_user_id_field(u_int, u_int, uint8_t *);
	bool get_kernel_reg(u_int, uint64_t *);

	diff --git a/sys/arm64/include/pcpu.h b/sys/arm64/include/pcpu.h
	--- a/sys/arm64/include/pcpu.h
	+++ b/sys/arm64/include/pcpu.h
	@@ -47,6 +47,7 @@
	pcpu_ssbd pc_ssbd; \
	struct pmap *pc_curpmap; \
	struct pmap *pc_curvmpmap; \
	+ void *pc_vcpu; \
	u_int pc_bcast_tlbi_workaround; \
	/* Store as two u_int values to preserve KBI */ \
	u_int pc_mpidr_low; /* lower MPIDR 32 bits */ \
	diff --git a/sys/arm64/include/vmm.h b/sys/arm64/include/vmm.h
	new file mode 100644
	--- /dev/null
	+++ b/sys/arm64/include/vmm.h
	@@ -0,0 +1,443 @@
	+/*
	+ * Copyright (C) 2015 Mihai Carabas <mihai.carabas@gmail.com>
	+ * All rights reserved.
	+ *
	+ * Redistribution and use in source and binary forms, with or without
	+ * modification, are permitted provided that the following conditions
	+ * are met:
	+ * 1. Redistributions of source code must retain the above copyright
	+ * notice, this list of conditions and the following disclaimer.
	+ * 2. Redistributions in binary form must reproduce the above copyright
	+ * notice, this list of conditions and the following disclaimer in the
	+ * documentation and/or other materials provided with the distribution.
	+ *
	+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
	+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
	+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	+ * SUCH DAMAGE.
	+ */
	+
	+#ifndef _VMM_H_
	+#define _VMM_H_
	+
	+#include <sys/param.h>
	+#include <vm/vm.h>
	+#include <vm/pmap.h>
	+
	+#include "pte.h"
	+#include "pmap.h"
	+
	+enum vm_suspend_how {
	+ VM_SUSPEND_NONE,
	+ VM_SUSPEND_RESET,
	+ VM_SUSPEND_POWEROFF,
	+ VM_SUSPEND_HALT,
	+ VM_SUSPEND_TRIPLEFAULT,
	+ VM_SUSPEND_LAST
	+};
	+
	+/*
	+ * Identifiers for architecturally defined registers.
	+ */
	+enum vm_reg_name {
	+ VM_REG_GUEST_X0,
	+ VM_REG_GUEST_X1,
	+ VM_REG_GUEST_X2,
	+ VM_REG_GUEST_X3,
	+ VM_REG_GUEST_X4,
	+ VM_REG_GUEST_X5,
	+ VM_REG_GUEST_X6,
	+ VM_REG_GUEST_X7,
	+ VM_REG_GUEST_X8,
	+ VM_REG_GUEST_X9,
	+ VM_REG_GUEST_X10,
	+ VM_REG_GUEST_X11,
	+ VM_REG_GUEST_X12,
	+ VM_REG_GUEST_X13,
	+ VM_REG_GUEST_X14,
	+ VM_REG_GUEST_X15,
	+ VM_REG_GUEST_X16,
	+ VM_REG_GUEST_X17,
	+ VM_REG_GUEST_X18,
	+ VM_REG_GUEST_X19,
	+ VM_REG_GUEST_X20,
	+ VM_REG_GUEST_X21,
	+ VM_REG_GUEST_X22,
	+ VM_REG_GUEST_X23,
	+ VM_REG_GUEST_X24,
	+ VM_REG_GUEST_X25,
	+ VM_REG_GUEST_X26,
	+ VM_REG_GUEST_X27,
	+ VM_REG_GUEST_X28,
	+ VM_REG_GUEST_X29,
	+ VM_REG_GUEST_LR,
	+ VM_REG_GUEST_SP,
	+ VM_REG_GUEST_ELR,
	+ VM_REG_GUEST_SPSR,
	+ VM_REG_ELR_EL2,
	+ VM_REG_LAST
	+};
	+
	+#define VM_INTINFO_VECTOR(info) ((info) & 0xff)
	+#define VM_INTINFO_DEL_ERRCODE 0x800
	+#define VM_INTINFO_RSVD 0x7ffff000
	+#define VM_INTINFO_VALID 0x80000000
	+#define VM_INTINFO_TYPE 0x700
	+#define VM_INTINFO_HWINTR (0 << 8)
	+#define VM_INTINFO_NMI (2 << 8)
	+#define VM_INTINFO_HWEXCEPTION (3 << 8)
	+#define VM_INTINFO_SWINTR (4 << 8)
	+
	+#define VM_MAX_SUFFIXLEN 15
	+
	+#define VM_GUEST_BASE_IPA 0x80000000UL /* Guest kernel start ipa */
	+
	+#ifdef _KERNEL
	+
	+#define VM_MAX_NAMELEN 32
	+
	+struct vm;
	+struct vm_exception;
	+struct vm_exit;
	+struct vm_run;
	+struct vm_object;
	+struct pmap;
	+
	+struct vm_eventinfo {
	+ void rptr; / rendezvous cookie */
	+ int sptr; / suspend cookie */
	+ int iptr; / reqidle cookie */
	+};
	+
	+typedef int (*vmm_init_func_t)(int ipinum);
	+typedef int (*vmm_cleanup_func_t)(void);
	+typedef void (*vmm_resume_func_t)(void);
	+typedef void * (vmi_init_func_t)(struct vm vm, struct pmap *pmap);
	+typedef int (vmi_run_func_t)(void vmi, int vcpu, register_t rip,
	+ struct pmap pmap, struct vm_eventinfo evinfo);
	+typedef void (vmi_cleanup_func_t)(void vmi);
	+typedef void (vmi_mmap_set_func_t)(void arg, vm_offset_t va,
	+ vm_offset_t pa, size_t len,
	+ vm_prot_t prot);
	+typedef vm_paddr_t (vmi_mmap_get_func_t)(void arg, vm_offset_t va);
	+typedef int (vmi_get_register_t)(void vmi, int vcpu, int num,
	+ uint64_t *retval);
	+typedef int (vmi_set_register_t)(void vmi, int vcpu, int num,
	+ uint64_t val);
	+typedef int (vmi_get_cap_t)(void vmi, int vcpu, int num, int *retval);
	+typedef int (vmi_set_cap_t)(void vmi, int vcpu, int num, int val);
	+typedef struct vmspace * (*vmi_vmspace_alloc)(vm_offset_t min, vm_offset_t max);
	+typedef void (vmi_vmspace_free)(struct vmspace vmspace);
	+typedef struct vlapic * (vmi_vlapic_init)(void vmi, int vcpu);
	+typedef void (vmi_vlapic_cleanup)(void vmi, struct vlapic *vlapic);
	+
	+struct vmm_ops {
	+ /* Module-wide functions */
	+ vmm_init_func_t init;
	+ vmm_cleanup_func_t cleanup;
	+ vmm_resume_func_t resume;
	+ /* VM specific functions */
	+ vmi_init_func_t vminit;
	+ vmi_run_func_t vmrun;
	+ vmi_cleanup_func_t vmcleanup;
	+ vmi_get_register_t vmgetreg;
	+ vmi_set_register_t vmsetreg;
	+ vmi_get_cap_t vmgetcap;
	+ vmi_set_cap_t vmsetcap;
	+ vmi_vmspace_alloc vmspace_alloc;
	+ vmi_vmspace_free vmspace_free;
	+};
	+
	+extern struct vmm_ops vmm_ops_arm;
	+
	+int vm_create(const char name, struct vm *retvm);
	+void vm_destroy(struct vm *vm);
	+int vm_reinit(struct vm *vm);
	+const char vm_name(struct vm vm);
	+
	+/*
	+ * APIs that modify the guest memory map require all vcpus to be frozen.
	+ */
	+int vm_mmap_memseg(struct vm *vm, vm_paddr_t gpa, int segid, vm_ooffset_t off,
	+ size_t len, int prot, int flags);
	+int vm_alloc_memseg(struct vm *vm, int ident, size_t len, bool sysmem);
	+void vm_free_memseg(struct vm *vm, int ident);
	+int vm_map_mmio(struct vm *vm, vm_paddr_t gpa, size_t len, vm_paddr_t hpa);
	+int vm_unmap_mmio(struct vm *vm, vm_paddr_t gpa, size_t len);
	+int vmm_map_gpa(struct vm *vm, vm_offset_t va, vm_paddr_t gpa, int pages,
	+ vm_page_t *ma);
	+void vmm_unmap_gpa(struct vm vm, vm_offset_t va, size_t pages, vm_page_t ma);
	+
	+/*
	+ * APIs that inspect the guest memory map require only a single vcpu to
	+ * be frozen. This acts like a read lock on the guest memory map since any
	+ * modification requires all vcpus to be frozen.
	+ */
	+int vm_mmap_getnext(struct vm vm, vm_paddr_t gpa, int *segid,
	+ vm_ooffset_t segoff, size_t len, int prot, int flags);
	+int vm_get_memseg(struct vm vm, int ident, size_t len, bool *sysmem,
	+ struct vm_object **objptr);
	+vm_paddr_t vmm_sysmem_maxaddr(struct vm *vm);
	+void vm_gpa_hold(struct vm , int vcpuid, vm_paddr_t gpa, size_t len,
	+ int prot, void **cookie);
	+void vm_gpa_release(void *cookie);
	+bool vm_mem_allocated(struct vm *vm, int vcpuid, vm_paddr_t gpa);
	+
	+uint16_t vm_get_maxcpus(struct vm *vm);
	+void vm_get_topology(struct vm vm, uint16_t sockets, uint16_t *cores,
	+ uint16_t threads, uint16_t maxcpus);
	+int vm_set_topology(struct vm *vm, uint16_t sockets, uint16_t cores,
	+ uint16_t threads, uint16_t maxcpus);
	+int vm_get_register(struct vm vm, int vcpu, int reg, uint64_t retval);
	+int vm_set_register(struct vm *vm, int vcpu, int reg, uint64_t val);
	+int vm_run(struct vm vm, struct vm_run vmrun);
	+int vm_suspend(struct vm *vm, enum vm_suspend_how how);
	+void* vm_get_cookie(struct vm *vm);
	+int vm_get_capability(struct vm vm, int vcpu, int type, int val);
	+int vm_set_capability(struct vm *vm, int vcpu, int type, int val);
	+int vm_activate_cpu(struct vm *vm, int vcpu);
	+int vm_suspend_cpu(struct vm *vm, int vcpu);
	+int vm_resume_cpu(struct vm *vm, int vcpu);
	+int vm_attach_vgic(struct vm *vm, uint64_t dist_start, size_t dist_size,
	+ uint64_t redist_start, size_t redist_size);
	+int vm_assert_irq(struct vm *vm, uint32_t irq);
	+int vm_deassert_irq(struct vm *vm, uint32_t irq);
	+int vm_raise_msi(struct vm *vm, uint64_t msg, uint64_t addr, int bus, int slot,
	+ int func);
	+struct vm_exit vm_exitinfo(struct vm vm, int vcpuid);
	+void vm_exit_suspended(struct vm *vm, int vcpuid, uint64_t rip);
	+void vm_exit_rendezvous(struct vm *vm, int vcpuid, uint64_t rip);
	+void vm_exit_astpending(struct vm *vm, int vcpuid, uint64_t rip);
	+
	+#ifdef _SYS__CPUSET_H_
	+/*
	+ * Rendezvous all vcpus specified in 'dest' and execute 'func(arg)'.
	+ * The rendezvous 'func(arg)' is not allowed to do anything that will
	+ * cause the thread to be put to sleep.
	+ *
	+ * If the rendezvous is being initiated from a vcpu context then the
	+ * 'vcpuid' must refer to that vcpu, otherwise it should be set to -1.
	+ *
	+ * The caller cannot hold any locks when initiating the rendezvous.
	+ *
	+ * The implementation of this API may cause vcpus other than those specified
	+ * by 'dest' to be stalled. The caller should not rely on any vcpus making
	+ * forward progress when the rendezvous is in progress.
	+ */
	+typedef void (vm_rendezvous_func_t)(struct vm vm, int vcpuid, void *arg);
	+void vm_smp_rendezvous(struct vm *vm, int vcpuid, cpuset_t dest,
	+ vm_rendezvous_func_t func, void *arg);
	+cpuset_t vm_active_cpus(struct vm *vm);
	+cpuset_t vm_debug_cpus(struct vm *vm);
	+cpuset_t vm_suspended_cpus(struct vm *vm);
	+#endif /* _SYS__CPUSET_H_ */
	+
	+static __inline bool
	+virt_enabled()
	+{
	+
	+ return (has_hyp());
	+}
	+
	+static __inline int
	+vcpu_rendezvous_pending(struct vm_eventinfo *info)
	+{
	+
	+ return (((uintptr_t )(info->rptr)) != 0);
	+}
	+
	+static __inline int
	+vcpu_suspended(struct vm_eventinfo *info)
	+{
	+
	+ return (*info->sptr);
	+}
	+
	+enum vcpu_state {
	+ VCPU_IDLE,
	+ VCPU_FROZEN,
	+ VCPU_RUNNING,
	+ VCPU_SLEEPING,
	+};
	+
	+int vcpu_set_state(struct vm *vm, int vcpu, enum vcpu_state state,
	+ bool from_idle);
	+enum vcpu_state vcpu_get_state(struct vm vm, int vcpu, int hostcpu);
	+
	+static int __inline
	+vcpu_is_running(struct vm vm, int vcpu, int hostcpu)
	+{
	+ return (vcpu_get_state(vm, vcpu, hostcpu) == VCPU_RUNNING);
	+}
	+
	+#ifdef _SYS_PROC_H_
	+static int __inline
	+vcpu_should_yield(struct vm *vm, int vcpu)
	+{
	+ struct thread *td;
	+
	+ td = curthread;
	+ return (td->td_ast != 0 \|\| td->td_owepreempt != 0);
	+}
	+#endif
	+
	+void vcpu_stats(struct vm vm, int vcpu);
	+void vcpu_notify_event(struct vm *vm, int vcpuid, bool lapic_intr);
	+
	+/*
	+ * This function is called after a VM-exit that occurred during exception or
	+ * interrupt delivery through the IDT. The format of 'intinfo' is described
	+ * in Figure 15-1, "EXITINTINFO for All Intercepts", APM, Vol 2.
	+ *
	+ * If a VM-exit handler completes the event delivery successfully then it
	+ * should call vm_exit_intinfo() to extinguish the pending event. For e.g.,
	+ * if the task switch emulation is triggered via a task gate then it should
	+ * call this function with 'intinfo=0' to indicate that the external event
	+ * is not pending anymore.
	+ *
	+ * Return value is 0 on success and non-zero on failure.
	+ */
	+int vm_exit_intinfo(struct vm *vm, int vcpuid, uint64_t intinfo);
	+
	+/*
	+ * This function is called before every VM-entry to retrieve a pending
	+ * event that should be injected into the guest. This function combines
	+ * nested events into a double or triple fault.
	+ *
	+ * Returns 0 if there are no events that need to be injected into the guest
	+ * and non-zero otherwise.
	+ */
	+int vm_entry_intinfo(struct vm vm, int vcpuid, uint64_t info);
	+
	+int vm_get_intinfo(struct vm vm, int vcpuid, uint64_t info1, uint64_t *info2);
	+
	+enum vm_reg_name vm_segment_name(int seg_encoding);
	+
	+struct vm_copyinfo {
	+ uint64_t gpa;
	+ size_t len;
	+ void *hva;
	+ void *cookie;
	+};
	+
	+int vcpu_trace_exceptions(struct vm *vm, int vcpuid);
	+
	+#endif /* _KERNEL */
	+
	+#define VM_MAXCPU 4
	+
	+#define VM_DIR_READ 0
	+#define VM_DIR_WRITE 1
	+
	+#define VM_GP_M_MASK 0x1f
	+#define VM_GP_MMU_ENABLED (1 << 5)
	+
	+struct vm_guest_paging {
	+ uint64_t far;
	+ uint64_t ttbr0_el1;
	+ uint64_t ttbr1_el1;
	+ int flags;
	+ int padding;
	+};
	+
	+struct vie {
	+ uint8_t access_size:4, sign_extend:1, dir:1, unused:2;
	+ enum vm_reg_name reg;
	+};
	+
	+struct vre {
	+ uint32_t inst_syndrome;
	+ uint8_t dir:1, unused:7;
	+ enum vm_reg_name reg;
	+};
	+
	+/*
	+ * Identifiers for optional vmm capabilities
	+ */
	+enum vm_cap_type {
	+ VM_CAP_HALT_EXIT,
	+ VM_CAP_MTRAP_EXIT,
	+ VM_CAP_PAUSE_EXIT,
	+ VM_CAP_UNRESTRICTED_GUEST,
	+ VM_CAP_MAX
	+};
	+
	+enum vm_exitcode {
	+ VM_EXITCODE_BOGUS,
	+ VM_EXITCODE_INST_EMUL,
	+ VM_EXITCODE_REG_EMUL,
	+ VM_EXITCODE_HVC,
	+ VM_EXITCODE_SUSPENDED,
	+ VM_EXITCODE_HYP,
	+ VM_EXITCODE_WFI,
	+ VM_EXITCODE_PAGING,
	+ VM_EXITCODE_SMCCC,
	+ VM_EXITCODE_MAX
	+};
	+
	+enum task_switch_reason {
	+ TSR_CALL,
	+ TSR_IRET,
	+ TSR_JMP,
	+ TSR_IDT_GATE, /* task gate in IDT */
	+};
	+
	+struct vm_task_switch {
	+ uint16_t tsssel; /* new TSS selector */
	+ int ext; /* task switch due to external event */
	+ uint32_t errcode;
	+ int errcode_valid; /* push 'errcode' on the new stack */
	+ enum task_switch_reason reason;
	+};
	+
	+struct vm_exit {
	+ enum vm_exitcode exitcode;
	+ int inst_length;
	+ uint64_t pc;
	+ union {
	+ /*
	+ * ARM specific payload.
	+ */
	+ struct {
	+ uint32_t exception_nr;
	+ uint32_t esr_el2; /* Exception Syndrome Register */
	+ uint64_t far_el2; /* Fault Address Register */
	+ uint64_t hpfar_el2; /* Hypervisor IPA Fault Address Register */
	+ } hyp;
	+ struct {
	+ struct vre vre;
	+ } reg_emul;
	+ struct {
	+ uint64_t gpa;
	+ uint64_t esr;
	+ } paging;
	+ struct {
	+ uint64_t gpa;
	+ struct vm_guest_paging paging;
	+ struct vie vie;
	+ } inst_emul;
	+
	+ /*
	+ * A SMCCC call, e.g. starting a core via PSCI.
	+ * Further arguments can be read by asking the kernel for
	+ * all register values.
	+ */
	+ struct {
	+ uint64_t func_id;
	+ uint64_t args[3];
	+ } smccc_call;
	+
	+ struct {
	+ enum vm_suspend_how how;
	+ } suspended;
	+ } u;
	+};
	+
	+#endif /* _VMM_H_ */
	diff --git a/sys/arm64/include/vmm_dev.h b/sys/arm64/include/vmm_dev.h
	new file mode 100644
	--- /dev/null
	+++ b/sys/arm64/include/vmm_dev.h
	@@ -0,0 +1,249 @@
	+/*
	+ * Copyright (C) 2015 Mihai Carabas <mihai.carabas@gmail.com>
	+ * All rights reserved.
	+ *
	+ * Redistribution and use in source and binary forms, with or without
	+ * modification, are permitted provided that the following conditions
	+ * are met:
	+ * 1. Redistributions of source code must retain the above copyright
	+ * notice, this list of conditions and the following disclaimer.
	+ * 2. Redistributions in binary form must reproduce the above copyright
	+ * notice, this list of conditions and the following disclaimer in the
	+ * documentation and/or other materials provided with the distribution.
	+ *
	+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
	+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
	+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	+ * SUCH DAMAGE.
	+ */
	+
	+#ifndef _VMM_DEV_H_
	+#define _VMM_DEV_H_
	+
	+#ifdef _KERNEL
	+void vmmdev_init(void);
	+int vmmdev_cleanup(void);
	+#endif
	+
	+struct vm_memmap {
	+ vm_paddr_t gpa;
	+ int segid; /* memory segment */
	+ vm_ooffset_t segoff; /* offset into memory segment */
	+ size_t len; /* mmap length */
	+ int prot; /* RWX */
	+ int flags;
	+};
	+#define VM_MEMMAP_F_WIRED 0x01
	+
	+#define VM_MEMSEG_NAME(m) ((m)->name[0] != '\0' ? (m)->name : NULL)
	+struct vm_memseg {
	+ int segid;
	+ size_t len;
	+ char name[VM_MAX_SUFFIXLEN + 1];
	+};
	+
	+struct vm_register {
	+ int cpuid;
	+ int regnum; /* enum vm_reg_name */
	+ uint64_t regval;
	+};
	+
	+struct vm_register_set {
	+ int cpuid;
	+ unsigned int count;
	+ const int regnums; / enum vm_reg_name */
	+ uint64_t *regvals;
	+};
	+
	+struct vm_run {
	+ int cpuid;
	+ uint64_t pc;
	+ struct vm_exit vm_exit;
	+
	+};
	+
	+struct vm_exception {
	+ int cpuid;
	+ int vector;
	+ uint32_t error_code;
	+ int error_code_valid;
	+ int restart_instruction;
	+};
	+
	+struct vm_msi {
	+ uint64_t msg;
	+ uint64_t addr;
	+ int bus;
	+ int slot;
	+ int func;
	+};
	+
	+struct vm_capability {
	+ int cpuid;
	+ enum vm_cap_type captype;
	+ int capval;
	+ int allcpus;
	+};
	+
	+#define MAX_VM_STATS 64
	+struct vm_stats {
	+ int cpuid; /* in */
	+ int index; /* in */
	+ int num_entries; /* out */
	+ struct timeval tv;
	+ uint64_t statbuf[MAX_VM_STATS];
	+};
	+struct vm_stat_desc {
	+ int index; /* in */
	+ char desc[128]; /* out */
	+};
	+
	+struct vm_suspend {
	+ enum vm_suspend_how how;
	+};
	+
	+struct vm_gla2gpa {
	+ int vcpuid; /* inputs */
	+ int prot; /* PROT_READ or PROT_WRITE */
	+ uint64_t gla;
	+ int fault; /* outputs */
	+ uint64_t gpa;
	+};
	+
	+struct vm_activate_cpu {
	+ int vcpuid;
	+};
	+
	+struct vm_cpuset {
	+ int which;
	+ int cpusetsize;
	+ cpuset_t *cpus;
	+};
	+#define VM_ACTIVE_CPUS 0
	+#define VM_SUSPENDED_CPUS 1
	+#define VM_DEBUG_CPUS 2
	+
	+struct vm_attach_vgic {
	+ uint64_t dist_start;
	+ size_t dist_size;
	+ uint64_t redist_start;
	+ size_t redist_size;
	+};
	+
	+struct vm_irq {
	+ uint32_t irq;
	+};
	+
	+struct vm_cpu_topology {
	+ uint16_t sockets;
	+ uint16_t cores;
	+ uint16_t threads;
	+ uint16_t maxcpus;
	+};
	+
	+enum {
	+ /* general routines */
	+ IOCNUM_ABIVERS = 0,
	+ IOCNUM_RUN = 1,
	+ IOCNUM_SET_CAPABILITY = 2,
	+ IOCNUM_GET_CAPABILITY = 3,
	+ IOCNUM_SUSPEND = 4,
	+ IOCNUM_REINIT = 5,
	+
	+ /* memory apis */
	+ IOCNUM_GET_GPA_PMAP = 12,
	+ IOCNUM_GLA2GPA = 13,
	+ IOCNUM_ALLOC_MEMSEG = 14,
	+ IOCNUM_GET_MEMSEG = 15,
	+ IOCNUM_MMAP_MEMSEG = 16,
	+ IOCNUM_MMAP_GETNEXT = 17,
	+
	+ /* register/state accessors */
	+ IOCNUM_SET_REGISTER = 20,
	+ IOCNUM_GET_REGISTER = 21,
	+ IOCNUM_SET_REGISTER_SET = 24,
	+ IOCNUM_GET_REGISTER_SET = 25,
	+
	+ /* statistics */
	+ IOCNUM_VM_STATS = 50,
	+ IOCNUM_VM_STAT_DESC = 51,
	+
	+ /* CPU Topology */
	+ IOCNUM_SET_TOPOLOGY = 63,
	+ IOCNUM_GET_TOPOLOGY = 64,
	+
	+ /* interrupt injection */
	+ IOCNUM_ASSERT_IRQ = 80,
	+ IOCNUM_DEASSERT_IRQ = 81,
	+ IOCNUM_RAISE_MSI = 82,
	+
	+ /* vm_cpuset */
	+ IOCNUM_ACTIVATE_CPU = 90,
	+ IOCNUM_GET_CPUSET = 91,
	+ IOCNUM_SUSPEND_CPU = 92,
	+ IOCNUM_RESUME_CPU = 93,
	+
	+ /* vm_attach_vgic */
	+ IOCNUM_ATTACH_VGIC = 110,
	+};
	+
	+#define VM_RUN \
	+ _IOWR('v', IOCNUM_RUN, struct vm_run)
	+#define VM_SUSPEND \
	+ _IOW('v', IOCNUM_SUSPEND, struct vm_suspend)
	+#define VM_REINIT \
	+ _IO('v', IOCNUM_REINIT)
	+#define VM_ALLOC_MEMSEG \
	+ _IOW('v', IOCNUM_ALLOC_MEMSEG, struct vm_memseg)
	+#define VM_GET_MEMSEG \
	+ _IOWR('v', IOCNUM_GET_MEMSEG, struct vm_memseg)
	+#define VM_MMAP_MEMSEG \
	+ _IOW('v', IOCNUM_MMAP_MEMSEG, struct vm_memmap)
	+#define VM_MMAP_GETNEXT \
	+ _IOWR('v', IOCNUM_MMAP_GETNEXT, struct vm_memmap)
	+#define VM_SET_REGISTER \
	+ _IOW('v', IOCNUM_SET_REGISTER, struct vm_register)
	+#define VM_GET_REGISTER \
	+ _IOWR('v', IOCNUM_GET_REGISTER, struct vm_register)
	+#define VM_SET_REGISTER_SET \
	+ _IOW('v', IOCNUM_SET_REGISTER_SET, struct vm_register_set)
	+#define VM_GET_REGISTER_SET \
	+ _IOWR('v', IOCNUM_GET_REGISTER_SET, struct vm_register_set)
	+#define VM_SET_CAPABILITY \
	+ _IOW('v', IOCNUM_SET_CAPABILITY, struct vm_capability)
	+#define VM_GET_CAPABILITY \
	+ _IOWR('v', IOCNUM_GET_CAPABILITY, struct vm_capability)
	+#define VM_STATS \
	+ _IOWR('v', IOCNUM_VM_STATS, struct vm_stats)
	+#define VM_STAT_DESC \
	+ _IOWR('v', IOCNUM_VM_STAT_DESC, struct vm_stat_desc)
	+#define VM_ASSERT_IRQ \
	+ _IOW('v', IOCNUM_ASSERT_IRQ, struct vm_irq)
	+#define VM_DEASSERT_IRQ \
	+ _IOW('v', IOCNUM_DEASSERT_IRQ, struct vm_irq)
	+#define VM_RAISE_MSI \
	+ _IOW('v', IOCNUM_RAISE_MSI, struct vm_msi)
	+#define VM_SET_TOPOLOGY \
	+ _IOW('v', IOCNUM_SET_TOPOLOGY, struct vm_cpu_topology)
	+#define VM_GET_TOPOLOGY \
	+ _IOR('v', IOCNUM_GET_TOPOLOGY, struct vm_cpu_topology)
	+#define VM_GLA2GPA \
	+ _IOWR('v', IOCNUM_GLA2GPA, struct vm_gla2gpa)
	+#define VM_ACTIVATE_CPU \
	+ _IOW('v', IOCNUM_ACTIVATE_CPU, struct vm_activate_cpu)
	+#define VM_GET_CPUS \
	+ _IOW('v', IOCNUM_GET_CPUSET, struct vm_cpuset)
	+#define VM_SUSPEND_CPU \
	+ _IOW('v', IOCNUM_SUSPEND_CPU, struct vm_activate_cpu)
	+#define VM_RESUME_CPU \
	+ _IOW('v', IOCNUM_RESUME_CPU, struct vm_activate_cpu)
	+#define VM_ATTACH_VGIC \
	+ _IOW('v', IOCNUM_ATTACH_VGIC, struct vm_attach_vgic)
	+#endif
	diff --git a/sys/arm64/include/vmm_instruction_emul.h b/sys/arm64/include/vmm_instruction_emul.h
	new file mode 100644
	--- /dev/null
	+++ b/sys/arm64/include/vmm_instruction_emul.h
	@@ -0,0 +1,79 @@
	+/*
	+ * Copyright (C) 2015 Mihai Carabas <mihai.carabas@gmail.com>
	+ * All rights reserved.
	+ *
	+ * Redistribution and use in source and binary forms, with or without
	+ * modification, are permitted provided that the following conditions
	+ * are met:
	+ * 1. Redistributions of source code must retain the above copyright
	+ * notice, this list of conditions and the following disclaimer.
	+ * 2. Redistributions in binary form must reproduce the above copyright
	+ * notice, this list of conditions and the following disclaimer in the
	+ * documentation and/or other materials provided with the distribution.
	+ *
	+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
	+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
	+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	+ * SUCH DAMAGE.
	+ */
	+
	+#ifndef _VMM_INSTRUCTION_EMUL_H_
	+#define _VMM_INSTRUCTION_EMUL_H_
	+
	+/*
	+ * Callback functions to read and write memory regions.
	+ */
	+typedef int (mem_region_read_t)(void vm, int cpuid, uint64_t gpa,
	+ uint64_t rval, int rsize, void arg);
	+typedef int (mem_region_write_t)(void vm, int cpuid, uint64_t gpa,
	+ uint64_t wval, int wsize, void *arg);
	+
	+/*
	+ * Callback functions to read and write registers.
	+ */
	+typedef int (reg_read_t)(void vm, int cpuid, uint64_t rval, void arg);
	+typedef int (reg_write_t)(void vm, int cpuid, uint64_t wval, void *arg);
	+
	+/*
	+ * Emulate the decoded 'vie' instruction when it contains a memory operation.
	+ *
	+ * The callbacks 'mrr' and 'mrw' emulate reads and writes to the memory region
	+ * containing 'gpa'. 'mrarg' is an opaque argument that is passed into the
	+ * callback functions.
	+ *
	+ * 'void vm' should be 'struct vm ' when called from kernel context and
	+ * 'struct vmctx *' when called from user context.
	+ *
	+ */
	+int vmm_emulate_instruction(void vm, int cpuid, uint64_t gpa, struct vie vie,
	+ struct vm_guest_paging *paging, mem_region_read_t mrr,
	+ mem_region_write_t mrw, void *mrarg);
	+
	+/*
	+ * Emulate the decoded 'vre' instruction when it contains a register access.
	+ *
	+ * The callbacks 'regread' and 'regwrite' emulate reads and writes to the
	+ * register from 'vie'. 'regarg' is an opaque argument that is passed into the
	+ * callback functions.
	+ *
	+ * 'void vm' should be 'struct vm ' when called from kernel context and
	+ * 'struct vmctx *' when called from user context.
	+ *
	+ */
	+int vmm_emulate_register(void vm, int vcpuid, struct vre vre, reg_read_t regread,
	+ reg_write_t regwrite, void *regarg);
	+
	+#ifdef _KERNEL
	+void vm_register_inst_handler(struct vm *vm, uint64_t start, uint64_t size,
	+ mem_region_read_t mmio_read, mem_region_write_t mmio_write);
	+void vm_deregister_inst_handler(struct vm *vm, uint64_t start, uint64_t size);
	+#endif
	+
	+#endif /* _VMM_INSTRUCTION_EMUL_H_ */
	diff --git a/sys/arm64/include/vmm_snapshot.h b/sys/arm64/include/vmm_snapshot.h
	new file mode 100644
	--- /dev/null
	+++ b/sys/arm64/include/vmm_snapshot.h
	@@ -0,0 +1 @@
	+/* $FreeBSD$ */
	diff --git a/sys/arm64/vmm/arm64.h b/sys/arm64/vmm/arm64.h
	new file mode 100644
	--- /dev/null
	+++ b/sys/arm64/vmm/arm64.h
	@@ -0,0 +1,153 @@
	+/*
	+ * Copyright (C) 2015 Mihai Carabas <mihai.carabas@gmail.com>
	+ * All rights reserved.
	+ *
	+ * Redistribution and use in source and binary forms, with or without
	+ * modification, are permitted provided that the following conditions
	+ * are met:
	+ * 1. Redistributions of source code must retain the above copyright
	+ * notice, this list of conditions and the following disclaimer.
	+ * 2. Redistributions in binary form must reproduce the above copyright
	+ * notice, this list of conditions and the following disclaimer in the
	+ * documentation and/or other materials provided with the distribution.
	+ *
	+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
	+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
	+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	+ * SUCH DAMAGE.
	+ */
	+#ifndef _VMM_ARM64_H_
	+#define _VMM_ARM64_H_
	+
	+#include <machine/reg.h>
	+#include <machine/vfp.h>
	+#include <machine/hypervisor.h>
	+#include <machine/pcpu.h>
	+
	+#include "mmu.h"
	+#include "io/vgic_v3.h"
	+#include "io/vtimer.h"
	+
	+struct hypctx {
	+ struct trapframe tf;
	+
	+ /*
	+ * EL1 control registers.
	+ * Be careful changing the layout of these as we access them from
	+ * assembly when switching between the host and guest.
	+ */
	+ uint64_t elr_el1; /* Exception Link Register */
	+ uint64_t sp_el0; /* Stack pointer */
	+ uint64_t tpidr_el0; /* EL0 Software ID Register */
	+ uint64_t tpidrro_el0; /* Read-only Thread ID Register */
	+ uint64_t tpidr_el1; /* EL1 Software ID Register */
	+ uint64_t vbar_el1; /* Vector Base Address Register */
	+
	+ uint64_t actlr_el1; /* Auxiliary Control Register */
	+ uint64_t afsr0_el1; /* Auxiliary Fault Status Register 0 */
	+ uint64_t afsr1_el1; /* Auxiliary Fault Status Register 1 */
	+ uint64_t amair_el1; /* Auxiliary Memory Attribute Indirection Register */
	+ uint64_t contextidr_el1; /* Current Process Identifier */
	+ uint64_t cpacr_el1; /* Architectural Feature Access Control Register */
	+ uint64_t csselr_el1; /* Cache Size Selection Register */
	+ uint64_t esr_el1; /* Exception Syndrome Register */
	+ uint64_t far_el1; /* Fault Address Register */
	+ uint64_t mair_el1; /* Memory Attribute Indirection Register */
	+ uint64_t mdccint_el1; /* Monitor DCC Interrupt Enable Register */
	+ uint64_t mdscr_el1; /* Monitor Debug System Control Register */
	+ uint64_t par_el1; /* Physical Address Register */
	+ uint64_t sctlr_el1; /* System Control Register */
	+ uint64_t tcr_el1; /* Translation Control Register */
	+ uint64_t ttbr0_el1; /* Translation Table Base Register 0 */
	+ uint64_t ttbr1_el1; /* Translation Table Base Register 1 */
	+ uint64_t spsr_el1; /* Saved Program Status Register */
	+
	+ uint64_t pmcr_el0; /* Performance Monitors Control Register */
	+ uint64_t pmccntr_el0;
	+ uint64_t pmccfiltr_el0;
	+ uint64_t pmcntenset_el0;
	+ uint64_t pmintenset_el1;
	+ uint64_t pmovsset_el0;
	+ uint64_t pmselr_el0;
	+ uint64_t pmuserenr_el0;
	+ uint64_t pmevcntr_el0[31];
	+ uint64_t pmevtyper_el0[31];
	+
	+ uint64_t dbgbcr_el1[16]; /* Debug Breakpoint Control Registers */
	+ uint64_t dbgbvr_el1[16]; /* Debug Breakpoint Value Registers */
	+ uint64_t dbgwcr_el1[16]; /* Debug Watchpoint Control Registers */
	+ uint64_t dbgwvr_el1[16]; /* Debug Watchpoint Value Registers */
	+
	+ /* EL2 control registers */
	+ uint64_t cptr_el2; /* Architectural Feature Trap Register */
	+ uint64_t hcr_el2; /* Hypervisor Configuration Register */
	+ uint64_t mdcr_el2; /* Monitor Debug Configuration Register */
	+ uint64_t vpidr_el2; /* Virtualization Processor ID Register */
	+ uint64_t vmpidr_el2; /* Virtualization Multiprocessor ID Register */
	+ uint32_t vcpu;
	+ struct hyp *hyp;
	+ struct {
	+ uint64_t far_el2; /* Fault Address Register */
	+ uint64_t hpfar_el2; /* Hypervisor IPA Fault Address Register */
	+ } exit_info;
	+
	+ struct vtimer_cpu vtimer_cpu;
	+ struct vgic_v3_cpu_if vgic_cpu_if;
	+ struct vgic_v3_redist vgic_redist;
	+#ifdef VFP
	+ struct vfpstate vfpstate;
	+#endif
	+};
	+
	+struct hyp {
	+ struct hypctx ctx[VM_MAXCPU];
	+ struct vgic_v3_dist vgic_dist;
	+ struct vm *vm;
	+ struct vtimer vtimer;
	+ uint64_t vmid_generation;
	+ uint64_t vttbr_el2;
	+ uint64_t el2_addr; /* The address of this in el2 space */
	+ bool vgic_attached;
	+};
	+
	+uint64_t vmm_call_hyp(uint64_t, ...);
	+void vmm_cleanup(void *hyp_stub_vectors);
	+uint64_t vmm_enter_guest(struct hypctx *hypctx);
	+uint64_t vmm_read_ich_vtr_el2(void);
	+uint64_t vmm_read_cnthctl_el2(void);
	+uint64_t vmm_read_tcr_el2(void);
	+
	+#define eprintf(fmt, ...) printf("%s:%d " fmt, __func__, __LINE__, ##__VA_ARGS__)
	+//#define eprintf(fmt, ...) do {} while(0)
	+
	+#define VMID_GENERATION_MASK ((1UL<<8) - 1)
	+#define build_vttbr(vmid, ptaddr) \
	+ ((((vmid) & VMID_GENERATION_MASK) << VTTBR_VMID_SHIFT) \| \
	+ (uint64_t)(ptaddr))
	+
	+#define MPIDR_SMP_MASK (0x3 << 30)
	+#define MPIDR_AFF1_LEVEL(x) (((x) >> 2) << 8)
	+#define MPIDR_AFF0_LEVEL(x) (((x) & 0x3) << 0)
	+
	+/*
	+ * Return true if the exception was caused by a translation fault in the stage 2
	+ * translation regime. The DFSC encoding for a translation fault has the format
	+ * 0b0001LL, where LL (bits [1:0]) represents the level where the fault occured
	+ * (page D7-2280 of the ARMv8 Architecture Manual).
	+ */
	+#define ISS_DATA_DFSC_TF(esr_iss) \
	+ (!((esr_iss) & 0b111000) && ((esr_iss) & 0b000100))
	+#define FAR_EL2_PAGE_OFFSET(x) ((x) & PAGE_MASK)
	+
	+#define DEBUG_ME 0
	+
	+#define arm64_get_active_vcpu() ((struct hypctx *)PCPU_GET(vcpu))
	+
	+#endif /* !_VMM_ARM64_H_ */
	diff --git a/sys/arm64/vmm/hyp.h b/sys/arm64/vmm/hyp.h
	new file mode 100644
	--- /dev/null
	+++ b/sys/arm64/vmm/hyp.h
	@@ -0,0 +1,113 @@
	+/*
	+ * Copyright (C) 2017 Alexandru Elisei <alexandru.elisei@gmail.com>
	+ * All rights reserved.
	+ *
	+ * Redistribution and use in source and binary forms, with or without
	+ * modification, are permitted provided that the following conditions
	+ * are met:
	+ * 1. Redistributions of source code must retain the above copyright
	+ * notice, this list of conditions and the following disclaimer.
	+ * 2. Redistributions in binary form must reproduce the above copyright
	+ * notice, this list of conditions and the following disclaimer in the
	+ * documentation and/or other materials provided with the distribution.
	+ *
	+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
	+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
	+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	+ * SUCH DAMAGE.
	+ */
	+
	+#ifndef _VMM_HYP_H_
	+#define _VMM_HYP_H_
	+
	+/*
	+ * The translation tables for the hypervisor mode will hold mappings for kernel
	+ * virtual addresses and an identity mapping (VA == PA) necessary when
	+ * enabling/disabling the MMU.
	+ *
	+ * When in EL2 exception level the translation table base register is TTBR0_EL2
	+ * and the virtual addresses generated by the CPU must be at the bottom of the
	+ * memory, with the first 16 bits all set to zero:
	+ *
	+ * 0x0000ffffffffffff End hyp address space
	+ * 0x0000000000000000 Start of hyp address space
	+ *
	+ * To run code in hyp mode we need to convert kernel virtual addresses to
	+ * addresses that fit into this address space.
	+ *
	+ * The kernel virtual address range is:
	+ *
	+ * 0xffff007fffffffff End of KVA
	+ * 0xffff000000000000 Kernel base address & start of KVA
	+ *
	+ * (see /sys/arm64/include/vmparam.h).
	+ *
	+ * We could convert the kernel virtual addresses to valid EL2 addresses by
	+ * setting the first 16 bits to zero and thus mapping the kernel addresses in
	+ * the bottom half of the EL2 address space, but then they might clash with the
	+ * identity mapping addresses. Instead we map the kernel addresses in the upper
	+ * half of the EL2 address space.
	+ *
	+ * The hypervisor address space will look like this:
	+ *
	+ * 0x0000807fffffffff End of KVA mapping
	+ * 0x0000800000000000 Start of KVA mapping
	+ *
	+ * 0x00007fffffffffff End of identity mapping
	+ * 0x0000000000000000 Start of identity mapping
	+ *
	+ * With the scheme we have 47 bits at our disposable for the identity map and
	+ * another 47 bits for the kernel virtual addresses. For a maximum physical
	+ * memory size of 128TB we are guaranteed to not have any clashes between
	+ * addresses.
	+ */
	+#define HYP_VM_MIN_ADDRESS 0x0000000000000000
	+#define HYP_VM_MAX_ADDRESS 0x0001000000000000
	+
	+/*
	+ * When the vmm code is installed the following handles can be used by
	+ * the host to call into EL2.
	+ */
	+#define HYP_CLEANUP 0x00000001
	+#define HYP_ENTER_GUEST 0x00000002
	+#define HYP_READ_REGISTER 0x00000003
	+#define HYP_REG_ICH_VTR 0x1
	+#define HYP_REG_CNTHCTL 0x2
	+#define HYP_CLEAN_S2_TLBI 0x00000004
	+#define HYP_DC_CIVAC 0x00000005
	+#define HYP_EL2_TLBI 0x00000006
	+#define HYP_EL2_TLBI_ALL 0x1
	+#define HYP_EL2_TLBI_VA 0x2
	+#define HYP_S2_TLBI_RANGE 0x00000010
	+#define HYP_S2_TLBI_ALL 0x00000011
	+
	+/*
	+ * When taking asynchronous exceptions, or interrupts, with the exception of the
	+ * SError interrupt, the exception syndrome register is not updated with the
	+ * exception code. We need to differentiate between the different exception
	+ * types taken to EL2.
	+ */
	+#define EXCP_TYPE_EL1_SYNC 0
	+#define EXCP_TYPE_EL1_IRQ 1
	+#define EXCP_TYPE_EL1_FIQ 2
	+#define EXCP_TYPE_EL1_ERROR 3
	+
	+#define EXCP_TYPE_EL2_SYNC 4
	+#define EXCP_TYPE_EL2_IRQ 5
	+#define EXCP_TYPE_EL2_FIQ 6
	+#define EXCP_TYPE_EL2_ERROR 7
	+
	+#define EXCP_TYPE_MAINT_IRQ 8
	+/* Used internally in vmm_hyp.c */
	+#define EXCP_TYPE_REENTER 9
	+
	+#define HYP_GET_VECTOR_TABLE -1
	+
	+#endif /* !_VMM_HYP_H_ */
	diff --git a/sys/arm64/vmm/io/vgic_v3.h b/sys/arm64/vmm/io/vgic_v3.h
	new file mode 100644
	--- /dev/null
	+++ b/sys/arm64/vmm/io/vgic_v3.h
	@@ -0,0 +1,173 @@
	+/*
	+ * Copyright (C) 2015 Mihai Carabas <mihai.carabas@gmail.com>
	+ * All rights reserved.
	+ *
	+ * Redistribution and use in source and binary forms, with or without
	+ * modification, are permitted provided that the following conditions
	+ * are met:
	+ * 1. Redistributions of source code must retain the above copyright
	+ * notice, this list of conditions and the following disclaimer.
	+ * 2. Redistributions in binary form must reproduce the above copyright
	+ * notice, this list of conditions and the following disclaimer in the
	+ * documentation and/or other materials provided with the distribution.
	+ *
	+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
	+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
	+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	+ * SUCH DAMAGE.
	+ */
	+
	+#ifndef _VMM_VGIC_V3_H_
	+#define _VMM_VGIC_V3_H_
	+
	+#include <sys/cdefs.h>
	+#include <sys/param.h>
	+#include <sys/systm.h>
	+#include <sys/kernel.h>
	+#include <sys/module.h>
	+#include <sys/bus.h>
	+
	+#include <machine/vmm_instruction_emul.h>
	+
	+#include <arm64/arm64/gic_v3_reg.h>
	+#include <arm/arm/gic_common.h>
	+
	+struct hypctx;
	+
	+int vgic_v3_icc_sgi1r_read(void vm, int vcpuid, uint64_t rval, void *arg);
	+int vgic_v3_icc_sgi1r_write(void vm, int vcpuid, uint64_t rval, void arg);
	+
	+#define VGIC_SGI_NUM (GIC_LAST_SGI - GIC_FIRST_SGI + 1)
	+#define VGIC_PPI_NUM (GIC_LAST_PPI - GIC_FIRST_PPI + 1)
	+#define VGIC_SPI_NUM (GIC_LAST_SPI - GIC_FIRST_SPI + 1)
	+#define VGIC_PRV_I_NUM (VGIC_SGI_NUM + VGIC_PPI_NUM)
	+#define VGIC_SHR_I_NUM (VGIC_SPI_NUM)
	+
	+#define VGIC_ICH_LR_NUM_MAX 16
	+#define VGIC_ICH_APR_NUM_MAX 4
	+
	+struct vgic_v3_irq {
	+ /* List of IRQs that are active or pending */
	+ TAILQ_ENTRY(vgic_v3_irq) act_pend_list;
	+ struct mtx irq_spinmtx;
	+ uint64_t mpidr;
	+ int target_vcpu;
	+ uint32_t irq;
	+ bool active;
	+ bool pending;
	+ bool enabled;
	+ bool level;
	+ bool on_aplist;
	+ uint8_t priority;
	+ uint8_t config;
	+#define VGIC_CONFIG_MASK 0x2
	+#define VGIC_CONFIG_LEVEL 0x0
	+#define VGIC_CONFIG_EDGE 0x2
	+};
	+
	+struct vgic_v3_lpi {
	+ struct vgic_v3_irq irq;
	+ SLIST_ENTRY(vgic_v3_lpi) next;
	+};
	+
	+struct vgic_mmio_region {
	+ vm_offset_t start;
	+ vm_offset_t end;
	+ mem_region_read_t read;
	+ mem_region_write_t write;
	+};
	+
	+struct vm;
	+struct vm_exit;
	+struct hyp;
	+
	+struct vgic_v3_dist {
	+ struct mtx dist_mtx;
	+
	+ uint64_t start;
	+ size_t end;
	+
	+ uint32_t gicd_ctlr; /* Distributor Control Register */
	+
	+ struct vgic_v3_irq *irqs;
	+ SLIST_HEAD(, vgic_v3_lpi) lpis;
	+};
	+
	+#define aff_routing_en(distp) (distp->gicd_ctlr & GICD_CTLR_ARE_NS)
	+
	+struct vgic_v3_redist {
	+ uint64_t start;
	+ uint64_t end;
	+
	+ uint64_t gicr_typer; /* Redistributor Type Register */
	+};
	+
	+struct vgic_v3_irq;
	+
	+struct vgic_v3_cpu_if {
	+ uint32_t ich_eisr_el2; /* End of Interrupt Status Register */
	+ uint32_t ich_elrsr_el2; /* Empty List register Status Register (ICH_ELRSR_EL2) */
	+ uint32_t ich_hcr_el2; /* Hyp Control Register */
	+ uint32_t ich_misr_el2; /* Maintenance Interrupt State Register */
	+ uint32_t ich_vmcr_el2; /* Virtual Machine Control Register */
	+
	+ /*
	+ * The List Registers are part of the VM context and are modified on a
	+ * world switch. They need to be allocated statically so they are
	+ * mapped in the EL2 translation tables when struct hypctx is mapped.
	+ */
	+ uint64_t ich_lr_el2[VGIC_ICH_LR_NUM_MAX];
	+ size_t ich_lr_num;
	+
	+ /*
	+ * We need a mutex for accessing the list registers because they are
	+ * modified asynchronously by the virtual timer.
	+ *
	+ * Note that the mutex MUST be a spin mutex because an interrupt can
	+ * be injected by a callout callback function, thereby modifying the
	+ * list registers from a context where sleeping is forbidden.
	+ */
	+ struct mtx lr_mtx;
	+
	+ /* Active Priorities Registers for Group 0 and 1 interrupts */
	+ size_t ich_apr_num;
	+ uint32_t ich_ap0r_el2[VGIC_ICH_APR_NUM_MAX];
	+ uint32_t ich_ap1r_el2[VGIC_ICH_APR_NUM_MAX];
	+
	+ struct vgic_v3_irq private_irqs[VGIC_PRV_I_NUM];
	+ TAILQ_HEAD(, vgic_v3_irq) irq_act_pend;
	+ u_int ich_lr_used;
	+};
	+
	+int vgic_v3_attach_to_vm(struct vm *vm, uint64_t dist_start,
	+ size_t dist_size, uint64_t redist_start, size_t redist_size);
	+void vgic_v3_detach_from_vm(struct vm *vm);
	+
	+bool vgic_present(void);
	+void vgic_v3_init(uint64_t ich_vtr_el2);
	+void vgic_v3_vminit(struct hyp *);
	+void vgic_v3_cpuinit(struct hypctx *, bool last_vcpu);
	+void vgic_v3_cpucleanup(struct hypctx *);
	+void vgic_v3_vmcleanup(struct hyp *);
	+void vgic_v3_flush_hwstate(void *arg);
	+void vgic_v3_sync_hwstate(void *arg);
	+
	+bool vgic_v3_vcpu_pending_irq(struct hypctx *hypctx);
	+int vgic_v3_inject_irq(struct hyp *hyp, int vcpuid, uint32_t irqid,
	+ bool level);
	+int vgic_v3_inject_msi(struct hyp *hyp, uint64_t msg, uint64_t addr);
	+
	+void vgic_v3_group_toggle_enabled(bool enabled, struct hyp *hyp);
	+int vgic_v3_irq_toggle_enabled(uint32_t irq, bool enabled,
	+ struct hyp *hyp, int vcpuid);
	+
	+DECLARE_CLASS(arm_vgic_driver);
	+
	+#endif /* !_VMM_VGIC_V3_H_ */
	diff --git a/sys/arm64/vmm/io/vgic_v3.c b/sys/arm64/vmm/io/vgic_v3.c
	new file mode 100644
	--- /dev/null
	+++ b/sys/arm64/vmm/io/vgic_v3.c
	@@ -0,0 +1,2033 @@
	+/*
	+ * Copyright (C) 2018 Alexandru Elisei <alexandru.elisei@gmail.com>
	+ * All rights reserved.
	+ *
	+ * Redistribution and use in source and binary forms, with or without
	+ * modification, are permitted provided that the following conditions
	+ * are met:
	+ * 1. Redistributions of source code must retain the above copyright
	+ * notice, this list of conditions and the following disclaimer.
	+ * 2. Redistributions in binary form must reproduce the above copyright
	+ * notice, this list of conditions and the following disclaimer in the
	+ * documentation and/or other materials provided with the distribution.
	+ *
	+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
	+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
	+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	+ * SUCH DAMAGE.
	+ */
	+
	+#include <sys/cdefs.h>
	+
	+#include <sys/types.h>
	+#include <sys/errno.h>
	+#include <sys/systm.h>
	+#include <sys/bus.h>
	+#include <sys/rman.h>
	+#include <sys/lock.h>
	+#include <sys/malloc.h>
	+#include <sys/mutex.h>
	+#include <sys/smp.h>
	+#include <sys/bitstring.h>
	+
	+#include <vm/vm.h>
	+#include <vm/pmap.h>
	+
	+#include <dev/ofw/openfirm.h>
	+
	+#include <machine/atomic.h>
	+#include <machine/bus.h>
	+#include <machine/cpufunc.h>
	+#include <machine/cpu.h>
	+#include <machine/machdep.h>
	+#include <machine/param.h>
	+#include <machine/pmap.h>
	+#include <machine/vmparam.h>
	+#include <machine/intr.h>
	+#include <machine/vmm.h>
	+#include <machine/vmm_instruction_emul.h>
	+
	+#include <arm/arm/gic_common.h>
	+#include <arm64/arm64/gic_v3_reg.h>
	+#include <arm64/arm64/gic_v3_var.h>
	+
	+#include <arm64/vmm/hyp.h>
	+#include <arm64/vmm/mmu.h>
	+#include <arm64/vmm/arm64.h>
	+
	+#include "vgic_v3.h"
	+#include "vgic_v3_reg.h"
	+
	+MALLOC_DEFINE(M_VGIC_V3, "ARM VMM VGIC V3", "ARM VMM VGIC V3");
	+
	+static bool have_vgic = false;
	+
	+struct vgic_v3_virt_features {
	+ uint8_t min_prio;
	+ size_t ich_lr_num;
	+ size_t ich_apr_num;
	+};
	+
	+/* How many IRQs we support (SGIs + PPIs + SPIs). Not including LPIs */
	+#define VGIC_NIRQS 1023
	+/* Pretend to be an Arm design */
	+#define VGIC_IIDR 0x43b
	+
	+typedef void (register_read)(struct hyp , int, u_int, uint64_t , void *);
	+typedef void (register_write)(struct hyp *, int, u_int, u_int, u_int, uint64_t,
	+ void *);
	+
	+#define VGIC_8_BIT (1 << 0)
	+/* (1 << 1) is reserved for 16 bit accesses */
	+#define VGIC_32_BIT (1 << 2)
	+#define VGIC_64_BIT (1 << 3)
	+
	+struct vgic_register {
	+ u_int start; /* Start within a memory region */
	+ u_int end;
	+ u_int size;
	+ u_int flags;
	+ register_read *read;
	+ register_write *write;
	+};
	+
	+#define VGIC_REGISTER_RANGE(reg_start, reg_end, reg_size, reg_flags, readf, \
	+ writef) \
	+{ \
	+ .start = (reg_start), \
	+ .end = (reg_end), \
	+ .size = (reg_size), \
	+ .flags = (reg_flags), \
	+ .read = (readf), \
	+ .write = (writef), \
	+}
	+
	+#define VGIC_REGISTER_RANGE_RAZ_WI(reg_start, reg_end, reg_size, reg_flags) \
	+ VGIC_REGISTER_RANGE(reg_start, reg_end, reg_size, reg_flags, \
	+ gic_zero_read, gic_ignore_write)
	+
	+#define VGIC_REGISTER(start_addr, reg_size, reg_flags, readf, writef) \
	+ VGIC_REGISTER_RANGE(start_addr, (start_addr) + (reg_size), \
	+ reg_size, reg_flags, readf, writef)
	+
	+#define VGIC_REGISTER_RAZ_WI(start_addr, reg_size, reg_flags) \
	+ VGIC_REGISTER_RANGE_RAZ_WI(start_addr, \
	+ (start_addr) + (reg_size), reg_size, reg_flags)
	+
	+static register_read gic_pidr2_read;
	+static register_read gic_zero_read;
	+static register_write gic_ignore_write;
	+
	+/* GICD_CTLR */
	+static register_read dist_ctlr_read;
	+static register_write dist_ctlr_write;
	+/* GICD_TYPER */
	+static register_read dist_typer_read;
	+/* GICD_IIDR */
	+static register_read dist_iidr_read;
	+/* GICD_STATUSR - RAZ/WI as we don't report errors (yet) */
	+/* GICD_SETSPI_NSR & GICD_CLRSPI_NSR */
	+static register_write dist_setclrspi_nsr_write;
	+/* GICD_SETSPI_SR - RAZ/WI */
	+/* GICD_CLRSPI_SR - RAZ/WI */
	+/* GICD_IGROUPR - RAZ/WI as GICD_CTLR.ARE == 1 */
	+/* GICD_ISENABLER */
	+static register_read dist_isenabler_read;
	+static register_write dist_isenabler_write;
	+/* GICD_ICENABLER */
	+static register_read dist_icenabler_read;
	+static register_write dist_icenabler_write;
	+/* GICD_ISPENDR */
	+static register_read dist_ispendr_read;
	+static register_write dist_ispendr_write;
	+/* GICD_ICPENDR */
	+static register_read dist_icpendr_read;
	+static register_write dist_icpendr_write;
	+/* GICD_ISACTIVER */
	+static register_read dist_isactiver_read;
	+static register_write dist_isactiver_write;
	+/* GICD_ICACTIVER */
	+static register_read dist_icactiver_read;
	+static register_write dist_icactiver_write;
	+/* GICD_IPRIORITYR */
	+static register_read dist_ipriorityr_read;
	+static register_write dist_ipriorityr_write;
	+/* GICD_ITARGETSR - RAZ/WI as GICD_CTLR.ARE == 1 */
	+/* GICD_ICFGR */
	+static register_read dist_icfgr_read;
	+static register_write dist_icfgr_write;
	+/* GICD_IGRPMODR - RAZ/WI from non-secure mode */
	+/* GICD_NSACR - RAZ/WI from non-secure mode */
	+/* GICD_SGIR - RAZ/WI as GICD_CTLR.ARE == 1 */
	+/* GICD_CPENDSGIR - RAZ/WI as GICD_CTLR.ARE == 1 */
	+/* GICD_SPENDSGIR - RAZ/WI as GICD_CTLR.ARE == 1 */
	+/* GICD_IROUTER */
	+static register_read dist_irouter_read;
	+static register_write dist_irouter_write;
	+
	+static struct vgic_register dist_registers[] = {
	+ VGIC_REGISTER(GICD_CTLR, 4, VGIC_32_BIT, dist_ctlr_read,
	+ dist_ctlr_write),
	+ VGIC_REGISTER(GICD_TYPER, 4, VGIC_32_BIT, dist_typer_read,
	+ gic_ignore_write),
	+ VGIC_REGISTER(GICD_IIDR, 4, VGIC_32_BIT, dist_iidr_read,
	+ gic_ignore_write),
	+ VGIC_REGISTER_RAZ_WI(GICD_STATUSR, 4, VGIC_32_BIT),
	+ VGIC_REGISTER(GICD_SETSPI_NSR, 4, VGIC_32_BIT, gic_zero_read,
	+ dist_setclrspi_nsr_write),
	+ VGIC_REGISTER(GICD_CLRSPI_NSR, 4, VGIC_32_BIT, gic_zero_read,
	+ dist_setclrspi_nsr_write),
	+ VGIC_REGISTER_RAZ_WI(GICD_SETSPI_SR, 4, VGIC_32_BIT),
	+ VGIC_REGISTER_RAZ_WI(GICD_CLRSPI_SR, 4, VGIC_32_BIT),
	+ VGIC_REGISTER_RANGE_RAZ_WI(GICD_IGROUPR(0), GICD_IGROUPR(1024), 4,
	+ VGIC_32_BIT),
	+
	+ VGIC_REGISTER_RAZ_WI(GICD_ISENABLER(0), 4, VGIC_32_BIT),
	+ VGIC_REGISTER_RANGE(GICD_ISENABLER(32), GICD_ISENABLER(1024), 4,
	+ VGIC_32_BIT, dist_isenabler_read, dist_isenabler_write),
	+
	+ VGIC_REGISTER_RAZ_WI(GICD_ICENABLER(0), 4, VGIC_32_BIT),
	+ VGIC_REGISTER_RANGE(GICD_ICENABLER(32), GICD_ICENABLER(1024), 4,
	+ VGIC_32_BIT, dist_icenabler_read, dist_icenabler_write),
	+
	+ VGIC_REGISTER_RAZ_WI(GICD_ISPENDR(0), 4, VGIC_32_BIT),
	+ VGIC_REGISTER_RANGE(GICD_ISPENDR(32), GICD_ISPENDR(1024), 4,
	+ VGIC_32_BIT, dist_ispendr_read, dist_ispendr_write),
	+
	+ VGIC_REGISTER_RAZ_WI(GICD_ICPENDR(0), 4, VGIC_32_BIT),
	+ VGIC_REGISTER_RANGE(GICD_ICPENDR(32), GICD_ICPENDR(1024), 4,
	+ VGIC_32_BIT, dist_icpendr_read, dist_icpendr_write),
	+
	+ VGIC_REGISTER_RAZ_WI(GICD_ISACTIVER(0), 4, VGIC_32_BIT),
	+ VGIC_REGISTER_RANGE(GICD_ISACTIVER(32), GICD_ISACTIVER(1024), 4,
	+ VGIC_32_BIT, dist_isactiver_read, dist_isactiver_write),
	+
	+ VGIC_REGISTER_RAZ_WI(GICD_ICACTIVER(0), 4, VGIC_32_BIT),
	+ VGIC_REGISTER_RANGE(GICD_ICACTIVER(32), GICD_ICACTIVER(1024), 4,
	+ VGIC_32_BIT, dist_icactiver_read, dist_icactiver_write),
	+
	+ VGIC_REGISTER_RANGE_RAZ_WI(GICD_IPRIORITYR(0), GICD_IPRIORITYR(32), 4,
	+ VGIC_32_BIT \| VGIC_8_BIT),
	+ VGIC_REGISTER_RANGE(GICD_IPRIORITYR(32), GICD_IPRIORITYR(1024), 4,
	+ VGIC_32_BIT \| VGIC_8_BIT, dist_ipriorityr_read,
	+ dist_ipriorityr_write),
	+
	+ VGIC_REGISTER_RANGE_RAZ_WI(GICD_ITARGETSR(0), GICD_ITARGETSR(1024), 4,
	+ VGIC_32_BIT \| VGIC_8_BIT),
	+
	+ VGIC_REGISTER_RANGE_RAZ_WI(GICD_ICFGR(0), GICD_ICFGR(32), 4,
	+ VGIC_32_BIT),
	+ VGIC_REGISTER_RANGE(GICD_ICFGR(32), GICD_ICFGR(1024), 4,
	+ VGIC_32_BIT, dist_icfgr_read, dist_icfgr_write),
	+/*
	+ VGIC_REGISTER_RANGE(GICD_IGRPMODR(0), GICD_IGRPMODR(1024), 4,
	+ VGIC_32_BIT, dist_igrpmodr_read, dist_igrpmodr_write),
	+ VGIC_REGISTER_RANGE(GICD_NSACR(0), GICD_NSACR(1024), 4,
	+ VGIC_32_BIT, dist_nsacr_read, dist_nsacr_write),
	+*/
	+ VGIC_REGISTER_RAZ_WI(GICD_SGIR, 4, VGIC_32_BIT),
	+/*
	+ VGIC_REGISTER_RANGE(GICD_CPENDSGIR(0), GICD_CPENDSGIR(1024), 4,
	+ VGIC_32_BIT \| VGIC_8_BIT, dist_cpendsgir_read,
	+ dist_cpendsgir_write),
	+ VGIC_REGISTER_RANGE(GICD_SPENDSGIR(0), GICD_SPENDSGIR(1024), 4,
	+ VGIC_32_BIT \| VGIC_8_BIT, dist_spendsgir_read,
	+ dist_spendsgir_write),
	+*/
	+ VGIC_REGISTER_RANGE(GICD_IROUTER(32), GICD_IROUTER(1024), 8,
	+ VGIC_64_BIT \| VGIC_32_BIT, dist_irouter_read, dist_irouter_write),
	+
	+ VGIC_REGISTER_RANGE_RAZ_WI(GICD_PIDR4, GICD_PIDR2, 4, VGIC_32_BIT),
	+ VGIC_REGISTER(GICD_PIDR2, 4, VGIC_32_BIT, gic_pidr2_read,
	+ gic_ignore_write),
	+ VGIC_REGISTER_RANGE_RAZ_WI(GICD_PIDR2 + 4, GICD_SIZE, 4, VGIC_32_BIT),
	+};
	+
	+/* GICR_CTLR - Ignore writes as no bits can be set */
	+static register_read redist_ctlr_read;
	+/* GICR_IIDR */
	+static register_read redist_iidr_read;
	+/* GICR_TYPER */
	+static register_read redist_typer_read;
	+/* GICR_STATUSR - RAZ/WI as we don't report errors (yet) */
	+/* GICR_WAKER - RAZ/WI from non-secure mode */
	+/* GICR_SETLPIR - RAZ/WI as no LPIs are supported */
	+/* GICR_CLRLPIR - RAZ/WI as no LPIs are supported */
	+/* GICR_PROPBASER - RAZ/WI as no LPIs are supported */
	+/* GICR_PENDBASER - RAZ/WI as no LPIs are supported */
	+/* GICR_INVLPIR - RAZ/WI as no LPIs are supported */
	+/* GICR_INVALLR - RAZ/WI as no LPIs are supported */
	+/* GICR_SYNCR - RAZ/WI as no LPIs are supported */
	+
	+static struct vgic_register redist_rd_registers[] = {
	+ VGIC_REGISTER(GICR_CTLR, 4, VGIC_32_BIT, redist_ctlr_read,
	+ gic_ignore_write),
	+ VGIC_REGISTER(GICR_IIDR, 4, VGIC_32_BIT, redist_iidr_read,
	+ gic_ignore_write),
	+ VGIC_REGISTER(GICR_TYPER, 8, VGIC_64_BIT \| VGIC_32_BIT,
	+ redist_typer_read, gic_ignore_write),
	+ VGIC_REGISTER_RAZ_WI(GICR_STATUSR, 4, VGIC_32_BIT),
	+ VGIC_REGISTER_RAZ_WI(GICR_WAKER, 4, VGIC_32_BIT),
	+ VGIC_REGISTER_RAZ_WI(GICR_SETLPIR, 8, VGIC_64_BIT \| VGIC_32_BIT),
	+ VGIC_REGISTER_RAZ_WI(GICR_CLRLPIR, 8, VGIC_64_BIT \| VGIC_32_BIT),
	+ VGIC_REGISTER_RAZ_WI(GICR_PROPBASER, 8, VGIC_64_BIT \| VGIC_32_BIT),
	+ VGIC_REGISTER_RAZ_WI(GICR_PENDBASER, 8, VGIC_64_BIT \| VGIC_32_BIT),
	+ VGIC_REGISTER_RAZ_WI(GICR_INVLPIR, 8, VGIC_64_BIT \| VGIC_32_BIT),
	+ VGIC_REGISTER_RAZ_WI(GICR_INVALLR, 8, VGIC_64_BIT \| VGIC_32_BIT),
	+ VGIC_REGISTER_RAZ_WI(GICR_SYNCR, 4, VGIC_32_BIT),
	+
	+ /* These are identical to the dist registers */
	+ VGIC_REGISTER_RANGE_RAZ_WI(GICD_PIDR4, GICD_PIDR2, 4, VGIC_32_BIT),
	+ VGIC_REGISTER(GICD_PIDR2, 4, VGIC_32_BIT, gic_pidr2_read,
	+ gic_ignore_write),
	+ VGIC_REGISTER_RANGE_RAZ_WI(GICD_PIDR2 + 4, GICD_SIZE, 4,
	+ VGIC_32_BIT),
	+};
	+
	+/* GICR_IGROUPR0 - RAZ/WI from non-secure mode */
	+/* GICR_ISENABLER0 */
	+static register_read redist_ienabler0_read;
	+static register_write redist_isenabler0_write;
	+/* GICR_ICENABLER0 */
	+static register_write redist_icenabler0_write;
	+/* GICR_ISPENDR0 */
	+static register_read redist_ipendr0_read;
	+static register_write redist_ispendr0_write;
	+/* GICR_ICPENDR0 */
	+static register_write redist_icpendr0_write;
	+/* GICR_ISACTIVER0 */
	+static register_read redist_iactiver0_read;
	+static register_write redist_isactiver0_write;
	+/* GICR_ICACTIVER0 */
	+static register_write redist_icactiver0_write;
	+/* GICR_IPRIORITYR */
	+static register_read redist_ipriorityr_read;
	+static register_write redist_ipriorityr_write;
	+/* GICR_ICFGR0 - RAZ/WI from non-secure mode */
	+/* GICR_ICFGR1 */
	+static register_read redist_icfgr1_read;
	+static register_write redist_icfgr1_write;
	+/* GICR_IGRPMODR0 - RAZ/WI from non-secure mode */
	+/* GICR_NSCAR - RAZ/WI from non-secure mode */
	+
	+static struct vgic_register redist_sgi_registers[] = {
	+ VGIC_REGISTER_RAZ_WI(GICR_IGROUPR0, 4, VGIC_32_BIT),
	+ VGIC_REGISTER(GICR_ISENABLER0, 4, VGIC_32_BIT, redist_ienabler0_read,
	+ redist_isenabler0_write),
	+ VGIC_REGISTER(GICR_ICENABLER0, 4, VGIC_32_BIT, redist_ienabler0_read,
	+ redist_icenabler0_write),
	+ VGIC_REGISTER(GICR_ISPENDR0, 4, VGIC_32_BIT, redist_ipendr0_read,
	+ redist_ispendr0_write),
	+ VGIC_REGISTER(GICR_ICPENDR0, 4, VGIC_32_BIT, redist_ipendr0_read,
	+ redist_icpendr0_write),
	+ VGIC_REGISTER(GICR_ISACTIVER0, 4, VGIC_32_BIT, redist_iactiver0_read,
	+ redist_isactiver0_write),
	+ VGIC_REGISTER(GICR_ICACTIVER0, 4, VGIC_32_BIT, redist_iactiver0_read,
	+ redist_icactiver0_write),
	+ VGIC_REGISTER_RANGE(GICR_IPRIORITYR(0), GICR_IPRIORITYR(32), 4,
	+ VGIC_32_BIT \| VGIC_8_BIT, redist_ipriorityr_read,
	+ redist_ipriorityr_write),
	+ VGIC_REGISTER_RAZ_WI(GICR_ICFGR0, 4, VGIC_32_BIT),
	+ VGIC_REGISTER(GICR_ICFGR1, 4, VGIC_32_BIT, redist_icfgr1_read,
	+ redist_icfgr1_write),
	+ VGIC_REGISTER_RAZ_WI(GICR_IGRPMODR0, 4, VGIC_32_BIT),
	+ VGIC_REGISTER_RAZ_WI(GICR_NSACR, 4, VGIC_32_BIT),
	+};
	+
	+static struct vgic_v3_virt_features virt_features;
	+
	+static struct vgic_v3_irq vgic_v3_get_irq(struct hyp , int, uint32_t);
	+static void vgic_v3_release_irq(struct vgic_v3_irq *);
	+
	+/* TODO: Move to a common file */
	+static int
	+mpidr_to_vcpu(struct hyp *hyp, uint64_t mpidr)
	+{
	+ struct vm *vm;
	+
	+ vm = hyp->vm;
	+ for (int i = 0; i < vm_get_maxcpus(vm); i++) {
	+ if ((hyp->ctx[i].vmpidr_el2 & GICD_AFF) == mpidr)
	+ return (i);
	+ }
	+ return (-1);
	+}
	+
	+void
	+vgic_v3_vminit(struct hyp *hyp)
	+{
	+ struct vgic_v3_dist *dist = &hyp->vgic_dist;
	+
	+ /*
	+ * Configure the Distributor control register. The register resets to an
	+ * architecturally UNKNOWN value, so we reset to 0 to disable all
	+ * functionality controlled by the register.
	+ *
	+ * The exception is GICD_CTLR.DS, which is RA0/WI when the Distributor
	+ * supports one security state (ARM GIC Architecture Specification for
	+ * GICv3 and GICv4, p. 4-464)
	+ */
	+ dist->gicd_ctlr = 0;
	+
	+ mtx_init(&dist->dist_mtx, "VGICv3 Distributor lock", NULL, MTX_SPIN);
	+}
	+
	+void
	+vgic_v3_cpuinit(struct hypctx *hypctx, bool last_vcpu)
	+{
	+ struct vgic_v3_cpu_if *cpu_if = &hypctx->vgic_cpu_if;
	+ struct vgic_v3_redist *redist = &hypctx->vgic_redist;
	+ struct vgic_v3_irq *irq;
	+ uint64_t aff, vmpidr_el2;
	+ int i, irqid;
	+
	+ vmpidr_el2 = hypctx->vmpidr_el2;
	+ KASSERT(vmpidr_el2 != 0,
	+ ("Trying to init this CPU's vGIC before the vCPU"));
	+ /*
	+ * Get affinity for the current CPU. The guest CPU affinity is taken
	+ * from VMPIDR_EL2. The Redistributor corresponding to this CPU is
	+ * the Redistributor with the same affinity from GICR_TYPER.
	+ */
	+ aff = (CPU_AFF3(vmpidr_el2) << 24) \| (CPU_AFF2(vmpidr_el2) << 16) \|
	+ (CPU_AFF1(vmpidr_el2) << 8) \| CPU_AFF0(vmpidr_el2);
	+
	+ /* Set up GICR_TYPER. */
	+ redist->gicr_typer = aff << GICR_TYPER_AFF_SHIFT;
	+ /* Set the vcpu as the processsor ID */
	+ redist->gicr_typer \|= hypctx->vcpu << GICR_TYPER_CPUNUM_SHIFT;
	+
	+ if (last_vcpu)
	+ /* Mark the last Redistributor */
	+ redist->gicr_typer \|= GICR_TYPER_LAST;
	+
	+ mtx_init(&cpu_if->lr_mtx, "VGICv3 ICH_LR_EL2 lock", NULL, MTX_SPIN);
	+
	+ /* Set the SGI and PPI state */
	+ for (irqid = 0; irqid < VGIC_PRV_I_NUM; irqid++) {
	+ irq = &cpu_if->private_irqs[irqid];
	+
	+ mtx_init(&irq->irq_spinmtx, "VGIC IRQ spinlock", NULL,
	+ MTX_SPIN);
	+ irq->irq = irqid;
	+ irq->mpidr = hypctx->vmpidr_el2 & GICD_AFF;
	+ irq->target_vcpu = mpidr_to_vcpu(hypctx->hyp, irq->mpidr);
	+ if (irqid < VGIC_SGI_NUM) {
	+ /* SGIs */
	+ irq->enabled = true;
	+ irq->config = VGIC_CONFIG_EDGE;
	+ } else {
	+ /* PPIs */
	+ irq->config = VGIC_CONFIG_LEVEL;
	+ }
	+ irq->priority = 0;
	+ }
	+
	+ /*
	+ * Configure the Interrupt Controller Hyp Control Register.
	+ *
	+ * ICH_HCR_EL2_En: enable virtual CPU interface.
	+ *
	+ * Maintenance interrupts are disabled.
	+ */
	+ cpu_if->ich_hcr_el2 = ICH_HCR_EL2_En;
	+
	+ /*
	+ * Configure the Interrupt Controller Virtual Machine Control Register.
	+ *
	+ * ICH_VMCR_EL2_VPMR: lowest priority mask for the VCPU interface
	+ * ICH_VMCR_EL2_VBPR1_NO_PREEMPTION: disable interrupt preemption for
	+ * Group 1 interrupts
	+ * ICH_VMCR_EL2_VBPR0_NO_PREEMPTION: disable interrupt preemption for
	+ * Group 0 interrupts
	+ * ~ICH_VMCR_EL2_VEOIM: writes to EOI registers perform priority drop
	+ * and interrupt deactivation.
	+ * ICH_VMCR_EL2_VENG0: virtual Group 0 interrupts enabled.
	+ * ICH_VMCR_EL2_VENG1: virtual Group 1 interrupts enabled.
	+ */
	+ cpu_if->ich_vmcr_el2 = \
	+ (virt_features.min_prio << ICH_VMCR_EL2_VPMR_SHIFT) \| \
	+ ICH_VMCR_EL2_VBPR1_NO_PREEMPTION \| ICH_VMCR_EL2_VBPR0_NO_PREEMPTION;
	+ cpu_if->ich_vmcr_el2 &= ~ICH_VMCR_EL2_VEOIM;
	+ cpu_if->ich_vmcr_el2 \|= ICH_VMCR_EL2_VENG0 \| ICH_VMCR_EL2_VENG1;
	+
	+ cpu_if->ich_lr_num = virt_features.ich_lr_num;
	+ for (i = 0; i < cpu_if->ich_lr_num; i++)
	+ cpu_if->ich_lr_el2[i] = 0UL;
	+ cpu_if->ich_lr_used = 0;
	+ TAILQ_INIT(&cpu_if->irq_act_pend);
	+
	+ cpu_if->ich_apr_num = virt_features.ich_apr_num;
	+}
	+
	+void
	+vgic_v3_cpucleanup(struct hypctx *hypctx)
	+{
	+ struct vgic_v3_cpu_if *cpu_if;
	+ struct vgic_v3_irq *irq;
	+ int irqid;
	+
	+ cpu_if = &hypctx->vgic_cpu_if;
	+ for (irqid = 0; irqid < VGIC_PRV_I_NUM; irqid++) {
	+ irq = &cpu_if->private_irqs[irqid];
	+ mtx_destroy(&irq->irq_spinmtx);
	+ }
	+
	+ mtx_destroy(&cpu_if->lr_mtx);
	+}
	+
	+void
	+vgic_v3_vmcleanup(struct hyp *hyp)
	+{
	+ struct vgic_v3_dist *dist = &hyp->vgic_dist;
	+
	+ mtx_destroy(&dist->dist_mtx);
	+}
	+
	+static bool
	+vgic_v3_irq_pending(struct vgic_v3_irq *irq)
	+{
	+ if ((irq->config & VGIC_CONFIG_MASK) == VGIC_CONFIG_LEVEL) {
	+ return (irq->pending \|\| irq->level);
	+ } else {
	+ return (irq->pending);
	+ }
	+}
	+
	+static bool
	+vgic_v3_queue_irq(struct hyp hyp, struct vgic_v3_cpu_if cpu_if,
	+ int vcpuid, struct vgic_v3_irq *irq)
	+{
	+ MPASS(vcpuid >= 0);
	+ MPASS(vcpuid < VM_MAXCPU);
	+
	+ mtx_assert(&cpu_if->lr_mtx, MA_OWNED);
	+ mtx_assert(&irq->irq_spinmtx, MA_OWNED);
	+
	+ /* No need to queue the IRQ */
	+ if (!irq->level && !irq->pending)
	+ return (false);
	+
	+ if (!irq->on_aplist) {
	+ irq->on_aplist = true;
	+ TAILQ_INSERT_TAIL(&cpu_if->irq_act_pend, irq, act_pend_list);
	+ }
	+ return (true);
	+}
	+
	+static uint64_t
	+gic_reg_value_64(uint64_t field, uint64_t val, u_int offset, u_int size)
	+{
	+ uint32_t mask;
	+
	+ if (offset != 0 \|\| size != 8) {
	+ mask = ((1ul << (size * 8)) - 1) << (offset * 8);
	+ /* Shift the new bits to the correct place */
	+ val <<= (offset * 8);
	+ /* Keep only the interesting bits */
	+ val &= mask;
	+ /* Add the bits we are keeping from the old value */
	+ val \|= field & ~mask;
	+ }
	+
	+ return (val);
	+}
	+
	+static void
	+gic_pidr2_read(struct hyp hyp, int vcpuid, u_int reg, uint64_t rval,
	+ void *arg)
	+{
	+ *rval = GICR_PIDR2_ARCH_GICv3 << GICR_PIDR2_ARCH_SHIFT;
	+}
	+
	+/* Common read-only/write-ignored helpers */
	+static void
	+gic_zero_read(struct hyp hyp, int vcpuid, u_int reg, uint64_t rval,
	+ void *arg)
	+{
	+ *rval = 0;
	+}
	+
	+static void
	+gic_ignore_write(struct hyp *hyp, int vcpuid, u_int reg, u_int offset,
	+ u_int size, uint64_t wval, void *arg)
	+{
	+ /* Nothing to do */
	+}
	+
	+static uint64_t
	+read_enabler(struct hyp *hyp, int vcpuid, int n)
	+{
	+ struct vgic_v3_irq *irq;
	+ uint64_t ret;
	+ uint32_t irq_base;
	+ int i;
	+
	+ ret = 0;
	+ irq_base = n * 32;
	+ for (i = 0; i < 32; i++) {
	+ irq = vgic_v3_get_irq(hyp, vcpuid, irq_base + i);
	+ if (irq == NULL)
	+ continue;
	+
	+ if (!irq->enabled)
	+ ret \|= 1u << i;
	+ vgic_v3_release_irq(irq);
	+ }
	+
	+ return (ret);
	+}
	+
	+static void
	+write_enabler(struct hyp *hyp, int vcpuid, int n, bool set, uint64_t val)
	+{
	+ struct vgic_v3_irq *irq;
	+ uint32_t irq_base;
	+ int i;
	+
	+ irq_base = n * 32;
	+ for (i = 0; i < 32; i++) {
	+ /* We only change interrupts when the appropriate bit is set */
	+ if ((val & (1u << i)) == 0)
	+ continue;
	+
	+ /* Find the interrupt this bit represents */
	+ irq = vgic_v3_get_irq(hyp, vcpuid, irq_base + i);
	+ if (irq == NULL)
	+ continue;
	+
	+ irq->enabled = set;
	+ vgic_v3_release_irq(irq);
	+ }
	+}
	+
	+static uint64_t
	+read_pendr(struct hyp *hyp, int vcpuid, int n)
	+{
	+ struct vgic_v3_irq *irq;
	+ uint64_t ret;
	+ uint32_t irq_base;
	+ int i;
	+
	+ ret = 0;
	+ irq_base = n * 32;
	+ for (i = 0; i < 32; i++) {
	+ irq = vgic_v3_get_irq(hyp, vcpuid, irq_base + i);
	+ if (irq == NULL)
	+ continue;
	+
	+ if (vgic_v3_irq_pending(irq))
	+ ret \|= 1u << i;
	+ vgic_v3_release_irq(irq);
	+ }
	+
	+ return (ret);
	+}
	+
	+static uint64_t
	+write_pendr(struct hyp *hyp, int vcpuid, int n, bool set, uint64_t val)
	+{
	+ struct vgic_v3_cpu_if *cpu_if;
	+ struct vgic_v3_irq *irq;
	+ uint64_t ret;
	+ uint32_t irq_base;
	+ int target_vcpu, i;
	+ bool notify;
	+
	+ ret = 0;
	+ irq_base = n * 32;
	+ for (i = 0; i < 32; i++) {
	+ /* We only change interrupts when the appropriate bit is set */
	+ if ((val & (1u << i)) == 0)
	+ continue;
	+
	+ irq = vgic_v3_get_irq(hyp, vcpuid, irq_base + i);
	+ if (irq == NULL)
	+ continue;
	+
	+ notify = false;
	+ target_vcpu = irq->target_vcpu;
	+ if (target_vcpu < 0)
	+ goto next_irq;
	+ cpu_if = &hyp->ctx[target_vcpu].vgic_cpu_if;
	+
	+ if (!set) {
	+ /* pending -> not pending */
	+ irq->pending = false;
	+ } else {
	+ irq->pending = true;
	+ mtx_lock_spin(&cpu_if->lr_mtx);
	+ notify = vgic_v3_queue_irq(hyp, cpu_if, target_vcpu,
	+ irq);
	+ mtx_unlock_spin(&cpu_if->lr_mtx);
	+ }
	+next_irq:
	+ vgic_v3_release_irq(irq);
	+
	+ if (notify)
	+ vcpu_notify_event(hyp->vm, target_vcpu, false);
	+ }
	+
	+ return (ret);
	+}
	+
	+static uint64_t
	+read_activer(struct hyp *hyp, int vcpuid, int n)
	+{
	+ struct vgic_v3_irq *irq;
	+ uint64_t ret;
	+ uint32_t irq_base;
	+ int i;
	+
	+ ret = 0;
	+ irq_base = n * 32;
	+ for (i = 0; i < 32; i++) {
	+ irq = vgic_v3_get_irq(hyp, vcpuid, irq_base + i);
	+ if (irq == NULL)
	+ continue;
	+
	+ if (irq->active)
	+ ret \|= 1u << i;
	+ vgic_v3_release_irq(irq);
	+ }
	+
	+ return (ret);
	+}
	+
	+static void
	+write_activer(struct hyp *hyp, int vcpuid, u_int n, bool set, uint64_t val)
	+{
	+ struct vgic_v3_cpu_if *cpu_if;
	+ struct vgic_v3_irq *irq;
	+ uint32_t irq_base;
	+ int target_vcpu, i;
	+ bool notify;
	+
	+ irq_base = n * 32;
	+ for (i = 0; i < 32; i++) {
	+ /* We only change interrupts when the appropriate bit is set */
	+ if ((val & (1u << i)) == 0)
	+ continue;
	+
	+ irq = vgic_v3_get_irq(hyp, vcpuid, irq_base + i);
	+ if (irq == NULL)
	+ continue;
	+
	+ notify = false;
	+ target_vcpu = irq->target_vcpu;
	+ if (target_vcpu < 0)
	+ goto next_irq;
	+ cpu_if = &hyp->ctx[target_vcpu].vgic_cpu_if;
	+
	+ if (!set) {
	+ /* active -> not active */
	+ irq->active = false;
	+ } else {
	+ /* not active -> active */
	+ irq->active = true;
	+ mtx_lock_spin(&cpu_if->lr_mtx);
	+ notify = vgic_v3_queue_irq(hyp, cpu_if, target_vcpu,
	+ irq);
	+ mtx_unlock_spin(&cpu_if->lr_mtx);
	+ }
	+next_irq:
	+ vgic_v3_release_irq(irq);
	+
	+ if (notify)
	+ vcpu_notify_event(hyp->vm, target_vcpu, false);
	+ }
	+}
	+
	+static uint64_t
	+read_priorityr(struct hyp *hyp, int vcpuid, int n)
	+{
	+ struct vgic_v3_irq *irq;
	+ uint64_t ret;
	+ uint32_t irq_base;
	+ int i;
	+
	+ ret = 0;
	+ irq_base = n * 4;
	+ for (i = 0; i < 4; i++) {
	+ irq = vgic_v3_get_irq(hyp, vcpuid, irq_base + i);
	+ if (irq == NULL)
	+ continue;
	+
	+ ret \|= ((uint64_t)irq->priority) << (i * 8);
	+ vgic_v3_release_irq(irq);
	+ }
	+
	+ return (ret);
	+}
	+
	+static void
	+write_priorityr(struct hyp *hyp, int vcpuid, u_int irq_base, u_int size,
	+ uint64_t val)
	+{
	+ struct vgic_v3_irq *irq;
	+ int i;
	+
	+ for (i = 0; i < size; i++) {
	+ irq = vgic_v3_get_irq(hyp, vcpuid, irq_base + i);
	+ if (irq == NULL)
	+ continue;
	+
	+ /* Set the priority. We support 32 priority steps (5 bits) */
	+ irq->priority = (val >> (i * 8)) & 0xf8;
	+ vgic_v3_release_irq(irq);
	+ }
	+}
	+
	+static uint64_t
	+read_config(struct hyp *hyp, int vcpuid, int n)
	+{
	+ struct vgic_v3_irq *irq;
	+ uint64_t ret;
	+ uint32_t irq_base;
	+ int i;
	+
	+ ret = 0;
	+ irq_base = n * 16;
	+ for (i = 0; i < 16; i++) {
	+ irq = vgic_v3_get_irq(hyp, vcpuid, irq_base + i);
	+ if (irq == NULL)
	+ continue;
	+
	+ ret \|= ((uint64_t)irq->config) << (i * 2);
	+ vgic_v3_release_irq(irq);
	+ }
	+
	+ return (ret);
	+}
	+
	+static void
	+write_config(struct hyp *hyp, int vcpuid, int n, uint64_t val)
	+{
	+ struct vgic_v3_irq *irq;
	+ uint32_t irq_base;
	+ int i;
	+
	+ irq_base = n * 16;
	+ for (i = 0; i < 16; i++) {
	+ /*
	+ * The config can't be changed for SGIs and PPIs. SGIs have
	+ * an edge-triggered behaviour, and the register is
	+ * implementation defined to be read-only for PPIs.
	+ */
	+ if (irq_base + i < VGIC_PRV_I_NUM)
	+ continue;
	+
	+ irq = vgic_v3_get_irq(hyp, vcpuid, irq_base + i);
	+ if (irq == NULL)
	+ continue;
	+
	+ /* Bit 0 is RES0 */
	+ irq->config = (val >> (i * 2)) & VGIC_CONFIG_MASK;
	+ vgic_v3_release_irq(irq);
	+ }
	+}
	+
	+static uint64_t
	+read_route(struct hyp *hyp, int vcpuid, int n)
	+{
	+ struct vgic_v3_irq *irq;
	+ uint64_t mpidr;
	+
	+ irq = vgic_v3_get_irq(hyp, vcpuid, n);
	+ if (irq == NULL)
	+ return (0);
	+
	+ mpidr = irq->mpidr;
	+ vgic_v3_release_irq(irq);
	+
	+ return (mpidr);
	+}
	+
	+static void
	+write_route(struct hyp *hyp, int vcpuid, int n, uint64_t val, u_int offset,
	+ u_int size)
	+{
	+ struct vgic_v3_irq *irq;
	+
	+ irq = vgic_v3_get_irq(hyp, vcpuid, n);
	+ if (irq == NULL)
	+ return;
	+
	+ irq->mpidr = gic_reg_value_64(irq->mpidr, val, offset, size) & GICD_AFF;
	+ irq->target_vcpu = mpidr_to_vcpu(hyp, irq->mpidr);
	+ /*
	+ * If the interrupt is pending we can either use the old mpidr, or
	+ * the new mpidr. To simplify this code we use the old value so we
	+ * don't need to move the interrupt until the next time it is
	+ * moved to the pending state.
	+ */
	+ vgic_v3_release_irq(irq);
	+}
	+
	+/*
	+ * Distributor register handlers.
	+ */
	+/* GICD_CTLR */
	+static void
	+dist_ctlr_read(struct hyp hyp, int vcpuid, u_int reg, uint64_t rval,
	+ void *arg)
	+{
	+ struct vgic_v3_dist *dist;
	+
	+ dist = &hyp->vgic_dist;
	+ mtx_lock_spin(&dist->dist_mtx);
	+ *rval = dist->gicd_ctlr;
	+ mtx_unlock_spin(&dist->dist_mtx);
	+
	+ /* Writes are never pending */
	+ *rval &= ~GICD_CTLR_RWP;
	+}
	+
	+static void
	+dist_ctlr_write(struct hyp *hyp, int vcpuid, u_int reg, u_int offset,
	+ u_int size, uint64_t wval, void *arg)
	+{
	+ struct vgic_v3_dist *dist;
	+
	+ MPASS(offset == 0);
	+ MPASS(size == 4);
	+ dist = &hyp->vgic_dist;
	+
	+ /*
	+ * GICv2 backwards compatibility is not implemented so
	+ * ARE_NS is RAO/WI. This means EnableGrp1 is RES0.
	+ *
	+ * EnableGrp1A is supported, and RWP is read-only.
	+ *
	+ * All other bits are RES0 from non-secure mode as we
	+ * implement as if we are in a system with two security
	+ * states.
	+ */
	+ wval &= GICD_CTLR_G1A;
	+ wval \|= GICD_CTLR_ARE_NS;
	+ mtx_lock_spin(&dist->dist_mtx);
	+ dist->gicd_ctlr = wval;
	+ /* TODO: Wake any vcpus that have interrupts pending */
	+ mtx_unlock_spin(&dist->dist_mtx);
	+}
	+
	+/* GICD_TYPER */
	+static void
	+dist_typer_read(struct hyp hyp, int vcpuid, u_int reg, uint64_t rval,
	+ void *arg)
	+{
	+ uint32_t typer;
	+
	+ typer = (10 - 1) << GICD_TYPER_IDBITS_SHIFT;
	+ typer \|= GICD_TYPER_MBIS;
	+ /* ITLinesNumber: */
	+ typer \|= howmany(VGIC_NIRQS + 1, 32) - 1;
	+
	+ *rval = typer;
	+}
	+
	+/* GICD_IIDR */
	+static void
	+dist_iidr_read(struct hyp hyp, int vcpuid, u_int reg, uint64_t rval,
	+ void *arg)
	+{
	+ *rval = VGIC_IIDR;
	+}
	+
	+/* GICD_SETSPI_NSR & GICD_CLRSPI_NSR */
	+static void
	+dist_setclrspi_nsr_write(struct hyp *hyp, int vcpuid, u_int reg, u_int offset,
	+ u_int size, uint64_t wval, void *arg)
	+{
	+ uint32_t irqid;
	+
	+ MPASS(offset == 0);
	+ MPASS(size == 4);
	+ irqid = wval & GICD_SPI_INTID_MASK;
	+ vgic_v3_inject_irq(hyp, vcpuid, irqid, reg == GICD_SETSPI_NSR);
	+}
	+
	+/* GICD_ISENABLER */
	+static void
	+dist_isenabler_read(struct hyp hyp, int vcpuid, u_int reg, uint64_t rval,
	+ void *arg)
	+{
	+ int n;
	+
	+ n = (reg - GICD_ISENABLER(0)) / 4;
	+ /* GICD_ISENABLER0 is RAZ/WI so handled separately */
	+ MPASS(n > 0);
	+ *rval = read_enabler(hyp, vcpuid, n);
	+}
	+
	+static void
	+dist_isenabler_write(struct hyp *hyp, int vcpuid, u_int reg, u_int offset,
	+ u_int size, uint64_t wval, void *arg)
	+{
	+ int n;
	+
	+ MPASS(offset == 0);
	+ MPASS(size == 4);
	+ n = (reg - GICD_ISENABLER(0)) / 4;
	+ /* GICD_ISENABLER0 is RAZ/WI so handled separately */
	+ MPASS(n > 0);
	+ write_enabler(hyp, vcpuid, n, true, wval);
	+}
	+
	+/* GICD_ICENABLER */
	+static void
	+dist_icenabler_read(struct hyp hyp, int vcpuid, u_int reg, uint64_t rval,
	+ void *arg)
	+{
	+ int n;
	+
	+ n = (reg - GICD_ICENABLER(0)) / 4;
	+ /* GICD_ICENABLER0 is RAZ/WI so handled separately */
	+ MPASS(n > 0);
	+ *rval = read_enabler(hyp, vcpuid, n);
	+}
	+
	+static void
	+dist_icenabler_write(struct hyp *hyp, int vcpuid, u_int reg, u_int offset,
	+ u_int size, uint64_t wval, void *arg)
	+{
	+ int n;
	+
	+ MPASS(offset == 0);
	+ MPASS(size == 4);
	+ n = (reg - GICD_ISENABLER(0)) / 4;
	+ /* GICD_ICENABLER0 is RAZ/WI so handled separately */
	+ MPASS(n > 0);
	+ write_enabler(hyp, vcpuid, n, false, wval);
	+}
	+
	+/* GICD_ISPENDR */
	+static void
	+dist_ispendr_read(struct hyp hyp, int vcpuid, u_int reg, uint64_t rval,
	+ void *arg)
	+{
	+ int n;
	+
	+ n = (reg - GICD_ISPENDR(0)) / 4;
	+ /* GICD_ISPENDR0 is RAZ/WI so handled separately */
	+ MPASS(n > 0);
	+ *rval = read_pendr(hyp, vcpuid, n);
	+}
	+
	+static void
	+dist_ispendr_write(struct hyp *hyp, int vcpuid, u_int reg, u_int offset,
	+ u_int size, uint64_t wval, void *arg)
	+{
	+ int n;
	+
	+ MPASS(offset == 0);
	+ MPASS(size == 4);
	+ n = (reg - GICD_ISPENDR(0)) / 4;
	+ /* GICD_ISPENDR0 is RAZ/WI so handled separately */
	+ MPASS(n > 0);
	+ write_pendr(hyp, vcpuid, n, true, wval);
	+}
	+
	+/* GICD_ICPENDR */
	+static void
	+dist_icpendr_read(struct hyp hyp, int vcpuid, u_int reg, uint64_t rval,
	+ void *arg)
	+{
	+ int n;
	+
	+ n = (reg - GICD_ICPENDR(0)) / 4;
	+ /* GICD_ICPENDR0 is RAZ/WI so handled separately */
	+ MPASS(n > 0);
	+ *rval = read_pendr(hyp, vcpuid, n);
	+}
	+
	+static void
	+dist_icpendr_write(struct hyp *hyp, int vcpuid, u_int reg, u_int offset,
	+ u_int size, uint64_t wval, void *arg)
	+{
	+ int n;
	+
	+ MPASS(offset == 0);
	+ MPASS(size == 4);
	+ n = (reg - GICD_ICPENDR(0)) / 4;
	+ /* GICD_ICPENDR0 is RAZ/WI so handled separately */
	+ MPASS(n > 0);
	+ write_pendr(hyp, vcpuid, n, false, wval);
	+}
	+
	+/* GICD_ISACTIVER */
	+/* Affinity routing is enabled so isactiver0 is RAZ/WI */
	+static void
	+dist_isactiver_read(struct hyp hyp, int vcpuid, u_int reg, uint64_t rval,
	+ void *arg)
	+{
	+ int n;
	+
	+ n = (reg - GICD_ISACTIVER(0)) / 4;
	+ /* GICD_ISACTIVER0 is RAZ/WI so handled separately */
	+ MPASS(n > 0);
	+ *rval = read_activer(hyp, vcpuid, n);
	+}
	+
	+static void
	+dist_isactiver_write(struct hyp *hyp, int vcpuid, u_int reg, u_int offset,
	+ u_int size, uint64_t wval, void *arg)
	+{
	+ int n;
	+
	+ MPASS(offset == 0);
	+ MPASS(size == 4);
	+ n = (reg - GICD_ISACTIVER(0)) / 4;
	+ /* GICD_ISACTIVE0 is RAZ/WI so handled separately */
	+ MPASS(n > 0);
	+ write_activer(hyp, vcpuid, n, true, wval);
	+}
	+
	+/* GICD_ICACTIVER */
	+static void
	+dist_icactiver_read(struct hyp hyp, int vcpuid, u_int reg, uint64_t rval,
	+ void *arg)
	+{
	+ int n;
	+
	+ n = (reg - GICD_ICACTIVER(0)) / 4;
	+ /* GICD_ICACTIVE0 is RAZ/WI so handled separately */
	+ MPASS(n > 0);
	+ *rval = read_activer(hyp, vcpuid, n);
	+}
	+
	+static void
	+dist_icactiver_write(struct hyp *hyp, int vcpuid, u_int reg, u_int offset,
	+ u_int size, uint64_t wval, void *arg)
	+{
	+ int n;
	+
	+ MPASS(offset == 0);
	+ MPASS(size == 4);
	+ n = (reg - GICD_ICACTIVER(0)) / 4;
	+ /* GICD_ICACTIVE0 is RAZ/WI so handled separately */
	+ MPASS(n > 0);
	+ write_activer(hyp, vcpuid, n, false, wval);
	+}
	+
	+/* GICD_IPRIORITYR */
	+/* Affinity routing is enabled so ipriorityr0-7 is RAZ/WI */
	+static void
	+dist_ipriorityr_read(struct hyp hyp, int vcpuid, u_int reg, uint64_t rval,
	+ void *arg)
	+{
	+ int n;
	+
	+ n = (reg - GICD_IPRIORITYR(0)) / 4;
	+ /* GICD_IPRIORITY0-7 is RAZ/WI so handled separately */
	+ MPASS(n > 7);
	+ *rval = read_priorityr(hyp, vcpuid, n);
	+}
	+
	+static void
	+dist_ipriorityr_write(struct hyp *hyp, int vcpuid, u_int reg, u_int offset,
	+ u_int size, uint64_t wval, void *arg)
	+{
	+ u_int irq_base;
	+
	+ irq_base = (reg - GICD_IPRIORITYR(0)) + offset;
	+ /* GICD_IPRIORITY0-7 is RAZ/WI so handled separately */
	+ MPASS(irq_base > 31);
	+ write_priorityr(hyp, vcpuid, irq_base, size, wval);
	+}
	+
	+/* GICD_ICFGR */
	+static void
	+dist_icfgr_read(struct hyp hyp, int vcpuid, u_int reg, uint64_t rval,
	+ void *arg)
	+{
	+ int n;
	+
	+ n = (reg - GICD_ICFGR(0)) / 4;
	+ /* GICD_ICFGR0-1 are RAZ/WI so handled separately */
	+ MPASS(n > 1);
	+ *rval = read_config(hyp, vcpuid, n);
	+}
	+
	+static void
	+dist_icfgr_write(struct hyp *hyp, int vcpuid, u_int reg, u_int offset,
	+ u_int size, uint64_t wval, void *arg)
	+{
	+ int n;
	+
	+ MPASS(offset == 0);
	+ MPASS(size == 4);
	+ n = (reg - GICD_ICFGR(0)) / 4;
	+ /* GICD_ICFGR0-1 are RAZ/WI so handled separately */
	+ MPASS(n > 1);
	+ write_config(hyp, vcpuid, n, wval);
	+}
	+
	+/* GICD_IROUTER */
	+static void
	+dist_irouter_read(struct hyp hyp, int vcpuid, u_int reg, uint64_t rval,
	+ void *arg)
	+{
	+ int n;
	+
	+ n = (reg - GICD_IROUTER(0)) / 8;
	+ /* GICD_IROUTER0-31 don't exist */
	+ MPASS(n > 31);
	+ *rval = read_route(hyp, vcpuid, n);
	+}
	+
	+static void
	+dist_irouter_write(struct hyp *hyp, int vcpuid, u_int reg, u_int offset,
	+ u_int size, uint64_t wval, void *arg)
	+{
	+ int n;
	+
	+ n = (reg - GICD_IROUTER(0)) / 8;
	+ /* GICD_IROUTER0-31 don't exist */
	+ MPASS(n > 31);
	+ write_route(hyp, vcpuid, n, wval, offset, size);
	+}
	+
	+static bool
	+vgic_register_read(struct hyp hyp, struct vgic_register reg_list,
	+ u_int reg_list_size, int vcpuid, u_int reg, u_int size,
	+ uint64_t rval, void arg)
	+{
	+ u_int i, offset;
	+
	+ for (i = 0; i < reg_list_size; i++) {
	+ if (reg_list[i].start <= reg && reg_list[i].end >= reg + size) {
	+ offset = reg & reg_list[i].size - 1;
	+ reg -= offset;
	+ if ((reg_list[i].flags & size) != 0) {
	+ reg_list[i].read(hyp, vcpuid, reg, rval, NULL);
	+
	+ /* Move the bits into the correct place */
	+ rval >>= (offset 8);
	+ if (size < 8) {
	+ rval &= (1ul << (size 8)) - 1;
	+ }
	+ } else {
	+ panic("TODO: Handle invalid register size: "
	+ "reg %x size %d", reg, size);
	+ }
	+ return (true);
	+ }
	+ }
	+ return (false);
	+}
	+
	+static bool
	+vgic_register_write(struct hyp hyp, struct vgic_register reg_list,
	+ u_int reg_list_size, int vcpuid, u_int reg, u_int size,
	+ uint64_t wval, void *arg)
	+{
	+ u_int i, offset;
	+
	+ for (i = 0; i < reg_list_size; i++) {
	+ if (reg_list[i].start <= reg && reg_list[i].end >= reg + size) {
	+ offset = reg & reg_list[i].size - 1;
	+ reg -= offset;
	+ if ((reg_list[i].flags & size) != 0) {
	+ reg_list[i].write(hyp, vcpuid, reg, offset,
	+ size, wval, NULL);
	+ } else {
	+ panic("TODO: Handle invalid register size: "
	+ "reg %x size %d", reg, size);
	+ }
	+ return (true);
	+ }
	+ }
	+ return (false);
	+}
	+
	+static int
	+dist_read(void vm, int vcpuid, uint64_t fault_ipa, uint64_t rval,
	+ int size, void *arg)
	+{
	+ struct hyp *hyp = vm_get_cookie(vm);
	+ struct vgic_v3_dist *dist = &hyp->vgic_dist;
	+ uint64_t reg;
	+
	+ /* Check the register is one of ours and is the correct size */
	+ if (fault_ipa < dist->start \|\| fault_ipa + size > dist->end) {
	+ return (EINVAL);
	+ }
	+
	+ reg = fault_ipa - dist->start;
	+ /* Check the register is correctly aligned */
	+ if ((reg & (size - 1)) != 0)
	+ return (EINVAL);
	+
	+ if (vgic_register_read(hyp, dist_registers, nitems(dist_registers),
	+ vcpuid, reg, size, rval, NULL))
	+ return (0);
	+
	+ /* TODO: Check the correct behaviour */
	+ printf("%s: %lx\n", __func__, fault_ipa - dist->start);
	+ *rval = 0;
	+
	+ return (0);
	+}
	+
	+static int
	+dist_write(void *vm, int vcpuid, uint64_t fault_ipa, uint64_t wval,
	+ int size, void *arg)
	+{
	+ struct hyp *hyp = vm_get_cookie(vm);
	+ struct vgic_v3_dist *dist = &hyp->vgic_dist;
	+ uint64_t reg;
	+
	+ /* Check the register is one of ours and is the correct size */
	+ if (fault_ipa < dist->start \|\| fault_ipa + size > dist->end) {
	+ return (EINVAL);
	+ }
	+
	+ reg = fault_ipa - dist->start;
	+ /* Check the register is correctly aligned */
	+ if ((reg & (size - 1)) != 0)
	+ return (EINVAL);
	+
	+ if (vgic_register_write(hyp, dist_registers, nitems(dist_registers),
	+ vcpuid, reg, size, wval, NULL))
	+ return (0);
	+
	+ panic("%s: %lx\n", __func__, fault_ipa - dist->start);
	+ return (0);
	+}
	+
	+/*
	+ * Redistributor register handlers.
	+ *
	+ * RD_base:
	+ */
	+/* GICR_CTLR */
	+static void
	+redist_ctlr_read(struct hyp hyp, int vcpuid, u_int reg, uint64_t rval,
	+ void *arg)
	+{
	+ /* LPIs not supported */
	+ *rval = 0;
	+}
	+
	+/* GICR_IIDR */
	+static void
	+redist_iidr_read(struct hyp hyp, int vcpuid, u_int reg, uint64_t rval,
	+ void *arg)
	+{
	+ *rval = VGIC_IIDR;
	+}
	+
	+/* GICR_TYPER */
	+static void
	+redist_typer_read(struct hyp hyp, int vcpuid, u_int reg, uint64_t rval,
	+ void *arg)
	+{
	+ struct vgic_v3_redist *redist;
	+
	+ redist = &hyp->ctx[vcpuid].vgic_redist;
	+ *rval = redist->gicr_typer;
	+}
	+
	+/*
	+ * SGI_base:
	+ */
	+/* GICR_ISENABLER0 */
	+static void
	+redist_ienabler0_read(struct hyp hyp, int vcpuid, u_int reg, uint64_t rval,
	+ void *arg)
	+{
	+ *rval = read_enabler(hyp, vcpuid, 0);
	+}
	+
	+static void
	+redist_isenabler0_write(struct hyp *hyp, int vcpuid, u_int reg, u_int offset,
	+ u_int size, uint64_t wval, void *arg)
	+{
	+ MPASS(offset == 0);
	+ MPASS(size == 4);
	+ write_enabler(hyp, vcpuid, 0, true, wval);
	+}
	+
	+/* GICR_ICENABLER0 */
	+static void
	+redist_icenabler0_write(struct hyp *hyp, int vcpuid, u_int reg, u_int offset,
	+ u_int size, uint64_t wval, void *arg)
	+{
	+ MPASS(offset == 0);
	+ MPASS(size == 4);
	+ write_enabler(hyp, vcpuid, 0, false, wval);
	+}
	+
	+/* GICR_ISPENDR0 */
	+static void
	+redist_ipendr0_read(struct hyp hyp, int vcpuid, u_int reg, uint64_t rval,
	+ void *arg)
	+{
	+ *rval = read_pendr(hyp, vcpuid, 0);
	+}
	+
	+static void
	+redist_ispendr0_write(struct hyp *hyp, int vcpuid, u_int reg, u_int offset,
	+ u_int size, uint64_t wval, void *arg)
	+{
	+ MPASS(offset == 0);
	+ MPASS(size == 4);
	+ write_pendr(hyp, vcpuid, 0, true, wval);
	+}
	+
	+/* GICR_ICPENDR0 */
	+static void
	+redist_icpendr0_write(struct hyp *hyp, int vcpuid, u_int reg, u_int offset,
	+ u_int size, uint64_t wval, void *arg)
	+{
	+ MPASS(offset == 0);
	+ MPASS(size == 4);
	+ write_pendr(hyp, vcpuid, 0, false, wval);
	+}
	+
	+/* GICR_ISACTIVER0 */
	+static void
	+redist_iactiver0_read(struct hyp hyp, int vcpuid, u_int reg, uint64_t rval,
	+ void *arg)
	+{
	+ *rval = read_activer(hyp, vcpuid, 0);
	+}
	+
	+static void
	+redist_isactiver0_write(struct hyp *hyp, int vcpuid, u_int reg, u_int offset,
	+ u_int size, uint64_t wval, void *arg)
	+{
	+ write_activer(hyp, vcpuid, 0, true, wval);
	+}
	+
	+/* GICR_ICACTIVER0 */
	+static void
	+redist_icactiver0_write(struct hyp *hyp, int vcpuid, u_int reg, u_int offset,
	+ u_int size, uint64_t wval, void *arg)
	+{
	+ write_activer(hyp, vcpuid, 0, false, wval);
	+}
	+
	+/* GICR_IPRIORITYR */
	+static void
	+redist_ipriorityr_read(struct hyp hyp, int vcpuid, u_int reg, uint64_t rval,
	+ void *arg)
	+{
	+ int n;
	+
	+ n = (reg - GICR_IPRIORITYR(0)) / 4;
	+ *rval = read_priorityr(hyp, vcpuid, n);
	+}
	+
	+static void
	+redist_ipriorityr_write(struct hyp *hyp, int vcpuid, u_int reg, u_int offset,
	+ u_int size, uint64_t wval, void *arg)
	+{
	+ u_int irq_base;
	+
	+ irq_base = (reg - GICR_IPRIORITYR(0)) + offset;
	+ write_priorityr(hyp, vcpuid, irq_base, size, wval);
	+}
	+
	+/* GICR_ICFGR1 */
	+static void
	+redist_icfgr1_read(struct hyp hyp, int vcpuid, u_int reg, uint64_t rval,
	+ void *arg)
	+{
	+ *rval = read_config(hyp, vcpuid, 0);
	+}
	+
	+static void
	+redist_icfgr1_write(struct hyp *hyp, int vcpuid, u_int reg, u_int offset,
	+ u_int size, uint64_t wval, void *arg)
	+{
	+ MPASS(offset == 0);
	+ MPASS(size == 4);
	+ write_config(hyp, vcpuid, 0, wval);
	+}
	+
	+static int
	+redist_read(void vm, int vcpuid, uint64_t fault_ipa, uint64_t rval,
	+ int size, void *arg)
	+{
	+ struct hyp *hyp = vm_get_cookie(vm);
	+ struct vgic_v3_redist *redist = &hyp->ctx[vcpuid].vgic_redist;
	+ uint64_t reg;
	+
	+ /* Check the register is one of ours and is the correct size */
	+ if (fault_ipa < redist->start \|\| fault_ipa + size > redist->end) {
	+ return (EINVAL);
	+ }
	+
	+ reg = fault_ipa - redist->start;
	+ /* Check the register is correctly aligned */
	+ if ((reg & (size - 1)) != 0)
	+ return (EINVAL);
	+
	+ if (reg < GICR_RD_BASE_SIZE) {
	+ if (vgic_register_read(hyp, redist_rd_registers,
	+ nitems(redist_rd_registers), vcpuid, reg, size, rval, NULL))
	+ return (0);
	+ } else if (reg < (GICR_SGI_BASE + GICR_SGI_BASE_SIZE)) {
	+ if (vgic_register_read(hyp, redist_sgi_registers,
	+ nitems(redist_sgi_registers), vcpuid,
	+ reg - GICR_SGI_BASE, size, rval, NULL))
	+ return (0);
	+ }
	+
	+ panic("%s: %lx", __func__, reg);
	+}
	+
	+static int
	+redist_write(void *vm, int vcpuid, uint64_t fault_ipa, uint64_t wval,
	+ int size, void *arg)
	+{
	+ struct hyp *hyp = vm_get_cookie(vm);
	+ struct vgic_v3_redist *redist = &hyp->ctx[vcpuid].vgic_redist;
	+ uint64_t reg;
	+
	+ /* Check the register is one of ours and is the correct size */
	+ if (fault_ipa < redist->start \|\| fault_ipa + size > redist->end) {
	+ return (EINVAL);
	+ }
	+
	+ reg = fault_ipa - redist->start;
	+ /* Check the register is correctly aligned */
	+ if ((reg & (size - 1)) != 0)
	+ return (EINVAL);
	+
	+ if (reg < GICR_RD_BASE_SIZE) {
	+ if (vgic_register_write(hyp, redist_rd_registers,
	+ nitems(redist_rd_registers), vcpuid, reg, size, wval, NULL))
	+ return (0);
	+ } else if (reg < (GICR_SGI_BASE + GICR_SGI_BASE_SIZE)) {
	+ if (vgic_register_write(hyp, redist_sgi_registers,
	+ nitems(redist_sgi_registers), vcpuid,
	+ reg - GICR_SGI_BASE, size, wval, NULL))
	+ return (0);
	+ }
	+
	+ panic("%s: %lx", __func__, reg);
	+}
	+
	+int
	+vgic_v3_icc_sgi1r_read(void vm, int vcpuid, uint64_t rval, void *arg)
	+{
	+ /*
	+ * TODO: Inject an unknown exception.
	+ */
	+ *rval = 0;
	+ return (0);
	+}
	+
	+/* vgic_v3_icc_sgi1r_write currently only handles 16 CPUs */
	+CTASSERT(VM_MAXCPU <= 16);
	+int
	+vgic_v3_icc_sgi1r_write(void vm, int vcpuid, uint64_t rval, void arg)
	+{
	+ struct hyp *hyp;
	+ cpuset_t active_cpus;
	+ uint32_t irqid;
	+ int cpus, vcpu;
	+
	+ hyp = vm_get_cookie(vm);
	+ active_cpus = vm_active_cpus(vm);
	+ irqid = (rval >> ICC_SGI1R_EL1_SGIID_SHIFT) & ICC_SGI1R_EL1_SGIID_MASK;
	+ if ((rval & ICC_SGI1R_EL1_IRM) == 0) {
	+ /*
	+ * TODO: Support on more than 16 CPUs. This is the mask for the
	+ * affinity bits. These should be 0.
	+ */
	+ if ((rval & 0xff00ff00ff000ul) != 0)
	+ return (0);
	+ cpus = rval & 0xff;
	+ vcpu = 0;
	+ while (cpus > 0) {
	+ if (CPU_ISSET(vcpu, &active_cpus) && vcpu != vcpuid) {
	+ vgic_v3_inject_irq(hyp, vcpu, irqid, true);
	+ }
	+ vcpu++;
	+ cpus >>= 1;
	+ }
	+ } else {
	+ /* Send an IPI to all CPUs other than the current CPU */
	+ for (vcpu = 0; vcpu < VM_MAXCPU; vcpu++) {
	+ if (CPU_ISSET(vcpu, &active_cpus) && vcpu != vcpuid) {
	+ vgic_v3_inject_irq(hyp, vcpu, irqid, true);
	+ }
	+ }
	+ }
	+
	+ return (0);
	+}
	+
	+static void
	+vgic_v3_mmio_init(struct hyp *hyp)
	+{
	+ struct vgic_v3_dist *dist;
	+ struct vgic_v3_irq *irq;
	+ int i;
	+
	+ /* Allocate memory for the SPIs */
	+ dist = &hyp->vgic_dist;
	+ dist->irqs = malloc((VGIC_NIRQS - VGIC_PRV_I_NUM) *
	+ sizeof(*dist->irqs), M_VGIC_V3, M_WAITOK \| M_ZERO);
	+
	+ for (i = 0; i < VGIC_NIRQS - VGIC_PRV_I_NUM; i++) {
	+ irq = &dist->irqs[i];
	+
	+ mtx_init(&irq->irq_spinmtx, "VGIC IRQ spinlock", NULL,
	+ MTX_SPIN);
	+
	+ irq->irq = i + VGIC_PRV_I_NUM;
	+ }
	+}
	+
	+static void
	+vgic_v3_mmio_destroy(struct hyp *hyp)
	+{
	+ struct vgic_v3_dist *dist = &hyp->vgic_dist;
	+ struct vgic_v3_irq *irq;
	+ int i;
	+
	+ for (i = 0; i < VGIC_NIRQS - VGIC_PRV_I_NUM; i++) {
	+ irq = &dist->irqs[i];
	+
	+ mtx_destroy(&irq->irq_spinmtx);
	+ }
	+
	+ free(dist->irqs, M_VGIC_V3);
	+}
	+
	+int
	+vgic_v3_attach_to_vm(struct vm *vm, uint64_t dist_start, size_t dist_size,
	+ uint64_t redist_start, size_t redist_size)
	+{
	+ struct hyp *hyp = vm_get_cookie(vm);
	+ struct vgic_v3_dist *dist = &hyp->vgic_dist;
	+ struct vgic_v3_redist *redist;
	+ int i;
	+
	+ /* The register bases need to be 64k aligned */
	+ if (!__is_aligned(dist_start, PAGE_SIZE_64K) \|\|
	+ !__is_aligned(redist_start, PAGE_SIZE_64K))
	+ return (EINVAL);
	+
	+ /* The dist register space is 1 64k block */
	+ if (dist_size != PAGE_SIZE_64K)
	+ return (EINVAL);
	+
	+ /* The redist register space is 2 64k blocks */
	+ if (redist_size != PAGE_SIZE_64K * 2)
	+ return (EINVAL);
	+
	+ /* Set the distributor address and size for trapping guest access. */
	+ dist->start = dist_start;
	+ dist->end = dist_start + dist_size;
	+
	+ for (i = 0; i < VM_MAXCPU; i++) {
	+ redist = &hyp->ctx[i].vgic_redist;
	+ /* Set the redistributor address and size. */
	+ redist->start = redist_start;
	+ redist->end = redist_start + redist_size;
	+ }
	+
	+ vm_register_inst_handler(vm, dist_start, dist_size, dist_read,
	+ dist_write);
	+ vm_register_inst_handler(vm, redist_start, redist_size, redist_read,
	+ redist_write);
	+
	+ vgic_v3_mmio_init(hyp);
	+
	+ hyp->vgic_attached = true;
	+
	+ return (0);
	+}
	+
	+void
	+vgic_v3_detach_from_vm(struct vm *vm)
	+{
	+ struct hyp *hyp = vm_get_cookie(vm);
	+
	+ if (hyp->vgic_attached) {
	+ hyp->vgic_attached = false;
	+ vgic_v3_mmio_destroy(hyp);
	+ }
	+}
	+
	+static struct vgic_v3_irq *
	+vgic_v3_get_irq(struct hyp *hyp, int vcpuid, uint32_t irqid)
	+{
	+ struct vgic_v3_cpu_if *cpu_if;
	+ struct vgic_v3_dist *dist;
	+ struct vgic_v3_irq *irq;
	+
	+ if (irqid < VGIC_PRV_I_NUM) {
	+ if (vcpuid < 0 \|\| vcpuid >= nitems(hyp->ctx))
	+ return (NULL);
	+
	+ cpu_if = &hyp->ctx[vcpuid].vgic_cpu_if;
	+ irq = &cpu_if->private_irqs[irqid];
	+ } else if (irqid <= GIC_LAST_SPI) {
	+ dist = &hyp->vgic_dist;
	+ irqid -= VGIC_PRV_I_NUM;
	+ if (irqid >= VGIC_NIRQS)
	+ return (NULL);
	+ irq = &dist->irqs[irqid];
	+ } else if (irqid < GIC_FIRST_LPI) {
	+ return (NULL);
	+ } else {
	+ /* No support for LPIs */
	+ return (NULL);
	+ }
	+
	+ mtx_lock_spin(&irq->irq_spinmtx);
	+ return (irq);
	+}
	+
	+static void
	+vgic_v3_release_irq(struct vgic_v3_irq *irq)
	+{
	+
	+ mtx_unlock_spin(&irq->irq_spinmtx);
	+}
	+
	+bool
	+vgic_v3_vcpu_pending_irq(struct hypctx *hypctx)
	+{
	+ struct vgic_v3_cpu_if *cpu_if;
	+ bool empty;
	+
	+ cpu_if = &hypctx->vgic_cpu_if;
	+ mtx_lock_spin(&cpu_if->lr_mtx);
	+ empty = TAILQ_EMPTY(&cpu_if->irq_act_pend);
	+ mtx_unlock_spin(&cpu_if->lr_mtx);
	+
	+ return (!empty);
	+}
	+
	+static bool
	+vgic_v3_check_irq(struct vgic_v3_irq *irq, bool level)
	+{
	+ /*
	+ * Only inject if:
	+ * - Level-triggered IRQ: level changes low -> high
	+ * - Edge-triggered IRQ: level is high
	+ */
	+ switch (irq->config & VGIC_CONFIG_MASK) {
	+ case VGIC_CONFIG_LEVEL:
	+ return (level != irq->level);
	+ case VGIC_CONFIG_EDGE:
	+ return (level);
	+ default:
	+ break;
	+ }
	+
	+ return (false);
	+}
	+
	+int
	+vgic_v3_inject_irq(struct hyp *hyp, int vcpuid, uint32_t irqid, bool level)
	+{
	+
	+ struct vgic_v3_cpu_if *cpu_if;
	+ struct vgic_v3_irq *irq;
	+ int target_vcpu;
	+ bool notify;
	+
	+ KASSERT(vcpuid == -1 \|\| irqid < VGIC_PRV_I_NUM,
	+ ("%s: SPI/LPI with vcpuid set: irq %u vcpuid %u", __func__, irqid,
	+ vcpuid));
	+
	+ irq = vgic_v3_get_irq(hyp, vcpuid, irqid);
	+ if (irq == NULL) {
	+ eprintf("Malformed IRQ %u.\n", irqid);
	+ return (1);
	+ }
	+
	+ target_vcpu = irq->target_vcpu;
	+ KASSERT(vcpuid == -1 \|\| vcpuid == target_vcpu,
	+ ("%s: Interrupt %u has bad cpu affinity: vcpu %d target vcpu %d",
	+ __func__, irqid, vcpuid, target_vcpu));
	+ KASSERT(target_vcpu >= 0 && target_vcpu < VM_MAXCPU,
	+ ("%s: Interrupt %u sent to invalid vcpu %d", __func__, irqid,
	+ target_vcpu));
	+
	+ if (vcpuid == -1)
	+ vcpuid = target_vcpu;
	+ /* TODO: Check from 0 to vm->maxcpus */
	+ if (vcpuid < 0 \|\| vcpuid >= VM_MAXCPU) {
	+ vgic_v3_release_irq(irq);
	+ return (1);
	+ }
	+
	+ notify = false;
	+ cpu_if = &hyp->ctx[vcpuid].vgic_cpu_if;
	+
	+ mtx_lock_spin(&cpu_if->lr_mtx);
	+
	+ if (!vgic_v3_check_irq(irq, level)) {
	+ goto out;
	+ }
	+
	+ if ((irq->config & VGIC_CONFIG_MASK) == VGIC_CONFIG_LEVEL)
	+ irq->level = level;
	+ else /* VGIC_CONFIG_EDGE */
	+ irq->pending = true;
	+
	+ notify = vgic_v3_queue_irq(hyp, cpu_if, vcpuid, irq);
	+
	+out:
	+ mtx_unlock_spin(&cpu_if->lr_mtx);
	+ vgic_v3_release_irq(irq);
	+
	+ if (notify)
	+ vcpu_notify_event(hyp->vm, vcpuid, false);
	+
	+ return (0);
	+}
	+
	+int
	+vgic_v3_inject_msi(struct hyp *hyp, uint64_t msg, uint64_t addr)
	+{
	+ struct vgic_v3_dist *dist = &hyp->vgic_dist;
	+ uint64_t reg;
	+
	+ /* This is a 4 byte register */
	+ if (addr < dist->start \|\| addr + 4 > dist->end) {
	+ return (EINVAL);
	+ }
	+
	+ reg = addr - dist->start;
	+ if (reg != GICD_SETSPI_NSR)
	+ return (EINVAL);
	+
	+ return (vgic_v3_inject_irq(hyp, -1, msg, true));
	+}
	+
	+void
	+vgic_v3_flush_hwstate(void *arg)
	+{
	+ struct hypctx *hypctx;
	+ struct vgic_v3_cpu_if *cpu_if;
	+ struct vgic_v3_irq *irq;
	+ int i;
	+
	+ hypctx = arg;
	+ cpu_if = &hypctx->vgic_cpu_if;
	+
	+ /*
	+ * All Distributor writes have been executed at this point, do not
	+ * protect Distributor reads with a mutex.
	+ *
	+ * This is callled with all interrupts disabled, so there is no need for
	+ * a List Register spinlock either.
	+ */
	+ mtx_lock_spin(&cpu_if->lr_mtx);
	+
	+ cpu_if->ich_hcr_el2 &= ~ICH_HCR_EL2_UIE;
	+
	+ /* Exit early if there are no buffered interrupts */
	+ if (TAILQ_EMPTY(&cpu_if->irq_act_pend))
	+ goto out;
	+
	+ KASSERT(cpu_if->ich_lr_used == 0, ("%s: Used LR count not zero %u",
	+ __func__, cpu_if->ich_lr_used));
	+
	+ i = 0;
	+ cpu_if->ich_elrsr_el2 = (1 << cpu_if->ich_lr_num) - 1;
	+ TAILQ_FOREACH(irq, &cpu_if->irq_act_pend, act_pend_list) {
	+ /* No free list register, stop searching for IRQs */
	+ if (i == cpu_if->ich_lr_num)
	+ break;
	+
	+ if (!irq->enabled)
	+ continue;
	+
	+ cpu_if->ich_lr_el2[i] = ICH_LR_EL2_GROUP1 \|
	+ ((uint64_t)irq->priority << ICH_LR_EL2_PRIO_SHIFT) \|
	+ irq->irq;
	+
	+ if (irq->active) {
	+ cpu_if->ich_lr_el2[i] \|= ICH_LR_EL2_STATE_ACTIVE;
	+ }
	+
	+#ifdef notyet
	+ /* TODO: Check why this is needed */
	+ if ((irq->config & _MASK) == LEVEL)
	+ cpu_if->ich_lr_el2[i] \|= ICH_LR_EL2_EOI;
	+#endif
	+
	+ if (!irq->active && vgic_v3_irq_pending(irq)) {
	+ cpu_if->ich_lr_el2[i] \|= ICH_LR_EL2_STATE_PENDING;
	+
	+ /*
	+ * This IRQ is now pending on the guest. Allow for
	+ * another edge that could cause the interrupt to
	+ * be raised again.
	+ */
	+ if ((irq->config & VGIC_CONFIG_MASK) ==
	+ VGIC_CONFIG_EDGE) {
	+ irq->pending = false;
	+ }
	+ }
	+
	+ i++;
	+ }
	+ cpu_if->ich_lr_used = i;
	+
	+out:
	+ mtx_unlock_spin(&cpu_if->lr_mtx);
	+}
	+
	+void
	+vgic_v3_sync_hwstate(void *arg)
	+{
	+ struct hypctx *hypctx;
	+ struct vgic_v3_cpu_if *cpu_if;
	+ struct vgic_v3_irq *irq;
	+ uint64_t lr;
	+ int i;
	+
	+ hypctx = arg;
	+ cpu_if = &hypctx->vgic_cpu_if;
	+
	+ /* Exit early if there are no buffered interrupts */
	+ if (cpu_if->ich_lr_used == 0)
	+ return;
	+
	+ /*
	+ * Check on the IRQ state after running the guest. ich_lr_used and
	+ * ich_lr_el2 are only ever used within this thread so is safe to
	+ * access unlocked.
	+ */
	+ for (i = 0; i < cpu_if->ich_lr_used; i++) {
	+ lr = cpu_if->ich_lr_el2[i];
	+ cpu_if->ich_lr_el2[i] = 0;
	+
	+ irq = vgic_v3_get_irq(hypctx->hyp, hypctx->vcpu,
	+ ICH_LR_EL2_VINTID(lr));
	+ if (irq == NULL)
	+ continue;
	+
	+ irq->active = (lr & ICH_LR_EL2_STATE_ACTIVE) != 0;
	+
	+ if ((irq->config & VGIC_CONFIG_MASK) == VGIC_CONFIG_EDGE) {
	+ /*
	+ * If we have an edge triggered IRQ preserve the
	+ * pending bit until the IRQ has been handled.
	+ */
	+ if ((lr & ICH_LR_EL2_STATE_PENDING) != 0) {
	+ irq->pending = true;
	+ }
	+ } else {
	+ /*
	+ * If we have a level triggerend IRQ remove the
	+ * pending bit if the IRQ has been handled.
	+ * The level is separate, so may still be high
	+ * triggering another IRQ.
	+ */
	+ if ((lr & ICH_LR_EL2_STATE_PENDING) == 0) {
	+ irq->pending = false;
	+ }
	+ }
	+
	+ /* Lock to update irq_act_pend */
	+ mtx_lock_spin(&cpu_if->lr_mtx);
	+ if (irq->active) {
	+ /* Ensure the active IRQ is at the head of the list */
	+ TAILQ_REMOVE(&cpu_if->irq_act_pend, irq, act_pend_list);
	+ TAILQ_INSERT_HEAD(&cpu_if->irq_act_pend, irq,
	+ act_pend_list);
	+ } else if (!vgic_v3_irq_pending(irq)) {
	+ /* If pending or active remove from the list */
	+ TAILQ_REMOVE(&cpu_if->irq_act_pend, irq, act_pend_list);
	+ irq->on_aplist = false;
	+ }
	+ mtx_unlock_spin(&cpu_if->lr_mtx);
	+ vgic_v3_release_irq(irq);
	+ }
	+
	+ cpu_if->ich_hcr_el2 &= ~ICH_HCR_EL2_EOICOUNT_MASK;
	+ cpu_if->ich_lr_used = 0;
	+}
	+
	+static int
	+vgic_probe(device_t dev)
	+{
	+ if (!gic_get_vgic(dev))
	+ return (EINVAL);
	+
	+ /* We currently only support the GICv3 */
	+ if (gic_get_hw_rev(dev) < 3)
	+ return (EINVAL);
	+
	+ device_set_desc(dev, "Virtual GIC");
	+ return (BUS_PROBE_DEFAULT);
	+}
	+
	+static int
	+vgic_attach(device_t dev)
	+{
	+ have_vgic = true;
	+ return (0);
	+}
	+
	+static device_method_t vgic_methods[] = {
	+ /* Device interface */
	+ DEVMETHOD(device_probe, vgic_probe),
	+ DEVMETHOD(device_attach, vgic_attach),
	+
	+ /* End */
	+ DEVMETHOD_END
	+};
	+
	+DEFINE_CLASS_0(vgic, vgic_driver, vgic_methods, 0);
	+
	+DRIVER_MODULE(vgic, gic, vgic_driver, 0, 0);
	+
	+bool
	+vgic_present(void)
	+{
	+ return (have_vgic);
	+}
	+
	+void
	+vgic_v3_init(uint64_t ich_vtr_el2)
	+{
	+ uint32_t pribits, prebits;
	+
	+ MPASS(have_vgic);
	+
	+ pribits = ICH_VTR_EL2_PRIBITS(ich_vtr_el2);
	+ switch (pribits) {
	+ case 5:
	+ virt_features.min_prio = 0xf8;
	+ case 6:
	+ virt_features.min_prio = 0xfc;
	+ case 7:
	+ virt_features.min_prio = 0xfe;
	+ case 8:
	+ virt_features.min_prio = 0xff;
	+ }
	+
	+ prebits = ICH_VTR_EL2_PREBITS(ich_vtr_el2);
	+ switch (prebits) {
	+ case 5:
	+ virt_features.ich_apr_num = 1;
	+ case 6:
	+ virt_features.ich_apr_num = 2;
	+ case 7:
	+ virt_features.ich_apr_num = 4;
	+ }
	+
	+ virt_features.ich_lr_num = ICH_VTR_EL2_LISTREGS(ich_vtr_el2);
	+}
	diff --git a/sys/arm64/vmm/io/vgic_v3_reg.h b/sys/arm64/vmm/io/vgic_v3_reg.h
	new file mode 100644
	--- /dev/null
	+++ b/sys/arm64/vmm/io/vgic_v3_reg.h
	@@ -0,0 +1,99 @@
	+#ifndef _VGIC_V3_REG_H_
	+#define _VGIC_V3_REG_H_
	+
	+/* Interrupt Controller End of Interrupt Status Register */
	+#define ICH_EISR_EL2_STATUS_MASK 0xffff
	+#define ICH_EISR_EL2_EOI_NOT_HANDLED(lr) ((1 << lr) & ICH_EISR_EL2_STATUS_MASK)
	+
	+/* Interrupt Controller Empty List Register Status Register */
	+#define ICH_ELSR_EL2_STATUS_MASK 0xffff
	+#define ICH_ELSR_EL2_LR_EMPTY(x) ((1 << x) & ICH_ELSR_EL2_STATUS_MASK)
	+
	+/* Interrupt Controller Hyp Control Register */
	+#define ICH_HCR_EL2_EOICOUNT_SHIFT 27
	+#define ICH_HCR_EL2_EOICOUNT_MASK (0x1f << ICH_HCR_EL2_EOICOUNT_SHIFT)
	+#define ICH_HCR_EL2_TDIR (1 << 14) /* Trap non-secure EL1 writes to IC{C, V}_DIR_EL1 */
	+#define ICH_HCR_EL2_TSEI (1 << 14) /* Trap System Error Interupts (SEI) to EL2 */
	+#define ICH_HCR_EL2_TALL1 (1 << 12) /* Trap non-secure EL1 accesses to IC{C, V}_* for Group 1 interrupts */
	+#define ICH_HCR_EL2_TALL0 (1 << 11) /* Trap non-secure EL1 accesses to IC{C, V}_* for Group 0 interrupts */
	+#define ICH_HCR_EL2_TC (1 << 10) /* Trap non-secure EL1 accesses to common IC{C, V}_* registers */
	+#define ICH_HCR_EL2_VGRP1DIE (1 << 7) /* VM Group 1 Disabled Interrupt Enable */
	+#define ICH_HCR_EL2_VGRP1EIE (1 << 6) /* VM Group 1 Enabled Interrupt Enable */
	+#define ICH_HCR_EL2_VGRP0DIE (1 << 5) /* VM Group 0 Disabled Interrupt Enable */
	+#define ICH_HCR_EL2_VGRP0EIE (1 << 4) /* VM Group 0 Enabled Interrupt Enable */
	+#define ICH_HCR_EL2_NPIE (1 << 3) /* No Pending Interrupt Enable */
	+#define ICH_HCR_EL2_LRENPIE (1 << 2) /* List Register Entry Not Present Interrupt Enable */
	+#define ICH_HCR_EL2_UIE (1 << 1) /* Underflow Interrupt Enable */
	+#define ICH_HCR_EL2_En (1 << 0) /* Global enable for the virtual CPU interface */
	+
	+/* Interrupt Controller List Registers */
	+#define ICH_LR_EL2_VINTID_MASK 0xffffffff
	+#define ICH_LR_EL2_VINTID(x) ((x) & ICH_LR_EL2_VINTID_MASK)
	+#define ICH_LR_EL2_PINTID_SHIFT 32
	+#define ICH_LR_EL2_PINTID_MASK (0x3fUL << ICH_LR_EL2_PINTID_SHIFT)
	+/* Raise a maintanance IRQ when deactivated (only non-HW virqs) */
	+#define ICH_LR_EL2_EOI (1UL << 41)
	+#define ICH_LR_EL2_PRIO_SHIFT 48
	+#define ICH_LR_EL2_PRIO_MASK (0xffUL << ICH_LR_EL2_PRIO_SHIFT)
	+#define ICH_LR_EL2_GROUP_SHIFT 60
	+#define ICH_LR_EL2_GROUP1 (1UL << ICH_LR_EL2_GROUP_SHIFT)
	+#define ICH_LR_EL2_HW (1UL << 61)
	+#define ICH_LR_EL2_STATE_SHIFT 62
	+#define ICH_LR_EL2_STATE_MASK (0x3UL << ICH_LR_EL2_STATE_SHIFT)
	+#define ICH_LR_EL2_STATE(x) ((x) & ICH_LR_EL2_STATE_MASK)
	+#define ICH_LR_EL2_STATE_INACTIVE (0x0UL << ICH_LR_EL2_STATE_SHIFT)
	+#define ICH_LR_EL2_STATE_PENDING (0x1UL << ICH_LR_EL2_STATE_SHIFT)
	+#define ICH_LR_EL2_STATE_ACTIVE (0x2UL << ICH_LR_EL2_STATE_SHIFT)
	+#define ICH_LR_EL2_STATE_PENDING_ACTIVE (0x3UL << ICH_LR_EL2_STATE_SHIFT)
	+
	+/* Interrupt Controller Maintenance Interrupt State Register */
	+#define ICH_MISR_EL2_VGRP1D (1 << 7) /* vPE Group 1 Disabled */
	+#define ICH_MISR_EL2_VGRP1E (1 << 6) /* vPE Group 1 Enabled */
	+#define ICH_MISR_EL2_VGRP0D (1 << 5) /* vPE Group 0 Disabled */
	+#define ICH_MISR_EL2_VGRP0E (1 << 4) /* vPE Group 0 Enabled */
	+#define ICH_MISR_EL2_NP (1 << 3) /* No Pending */
	+#define ICH_MISR_EL2_LRENP (1 << 2) /* List Register Entry Not Present */
	+#define ICH_MISR_EL2_U (1 << 1) /* Underflow */
	+#define ICH_MISR_EL2_EOI (1 << 0) /* End Of Interrupt */
	+
	+/* Interrupt Controller Virtual Machine Control Register */
	+#define ICH_VMCR_EL2_VPMR_SHIFT 24
	+#define ICH_VMCR_EL2_VPMR_MASK (0xff << ICH_VMCR_EL2_VPMR_SHIFT)
	+#define ICH_VMCR_EL2_VPMR_PRIO_LOWEST (0xff << ICH_VMCR_EL2_VPMR_SHIFT)
	+#define ICH_VMCR_EL2_VPMR_PRIO_HIGHEST (0x00 << ICH_VMCR_EL2_VPMR_SHIFT)
	+#define ICH_VMCR_EL2_VBPR0_SHIFT 21
	+#define ICH_VMCR_EL2_VBPR0_MASK (0x7 << ICH_VMCR_EL2_VBPR0_SHIFT)
	+#define ICH_VMCR_EL2_VBPR0_NO_PREEMPTION \
	+ (0x7 << ICH_VMCR_EL2_VBPR0_SHIFT)
	+#define ICH_VMCR_EL2_VBPR1_SHIFT 18
	+#define ICH_VMCR_EL2_VBPR1_MASK (0x7 << ICH_VMCR_EL2_VBPR1_SHIFT)
	+#define ICH_VMCR_EL2_VBPR1_NO_PREEMPTION \
	+ (0x7 << ICH_VMCR_EL2_VBPR1_SHIFT)
	+#define ICH_VMCR_EL2_VEOIM (1 << 9) /* Virtual EOI mode */
	+#define ICH_VMCR_EL2_VCBPR (1 << 4) /* Virtual Common binary Point Register */
	+#define ICH_VMCR_EL2_VFIQEN (1 << 3) /* Virtual FIQ enable */
	+#define ICH_VMCR_EL2_VACKCTL (1 << 2) /* Virtual AckCtl */
	+#define ICH_VMCR_EL2_VENG1 (1 << 1) /* Virtual Group 1 Interrupt Enable */
	+#define ICH_VMCR_EL2_VENG0 (1 << 0) /* Virtual Group 0 Interrupt Enable */
	+
	+/* Interrupt Controller VGIC Type Register */
	+#define ICH_VTR_EL2_PRIBITS_SHIFT 29
	+#define ICH_VTR_EL2_PRIBITS_MASK (0x7 << ICH_VTR_EL2_PRIBITS_SHIFT)
	+#define ICH_VTR_EL2_PRIBITS(x) \
	+ ((((x) & ICH_VTR_EL2_PRIBITS_MASK) >> ICH_VTR_EL2_PRIBITS_SHIFT) + 1)
	+#define ICH_VTR_EL2_PREBITS_SHIFT 26
	+#define ICH_VTR_EL2_PREBITS_MASK (0x7 << ICH_VTR_EL2_PREBITS_SHIFT)
	+#define ICH_VTR_EL2_PREBITS(x) \
	+ (((x) & ICH_VTR_EL2_PREBITS_MASK) >> ICH_VTR_EL2_PREBITS_SHIFT)
	+#define ICH_VTR_EL2_SEIS (1 << 22) /* System Error Interrupt (SEI) Support */
	+#define ICH_VTR_EL2_A3V (1 << 21) /* Affinity 3 Valid */
	+#define ICH_VTR_EL2_NV4 (1 << 20) /* Direct injection of virtual interrupts. RES1 for GICv3 */
	+#define ICH_VTR_EL2_TDS (1 << 19) /* Implementation supports ICH_HCR_EL2.TDIR */
	+#define ICH_VTR_EL2_LISTREGS_MASK 0x1f
	+/*
	+ * ICH_VTR_EL2.ListRegs holds the number of list registers, minus one. Add one
	+ * to get the actual number of list registers.
	+ */
	+#define ICH_VTR_EL2_LISTREGS(x) (((x) & ICH_VTR_EL2_LISTREGS_MASK) + 1)
	+
	+#endif /* !_VGIC_V3_REG_H_ */
	diff --git a/sys/arm64/vmm/io/vtimer.h b/sys/arm64/vmm/io/vtimer.h
	new file mode 100644
	--- /dev/null
	+++ b/sys/arm64/vmm/io/vtimer.h
	@@ -0,0 +1,82 @@
	+/*-
	+ * Copyright (c) 2017 The FreeBSD Foundation
	+ *
	+ * Redistribution and use in source and binary forms, with or without
	+ * modification, are permitted provided that the following conditions
	+ * are met:
	+ * 1. Redistributions of source code must retain the above copyright
	+ * notice, this list of conditions and the following disclaimer.
	+ * 2. Redistributions in binary form must reproduce the above copyright
	+ * notice, this list of conditions and the following disclaimer in the
	+ * documentation and/or other materials provided with the distribution.
	+ * 3. The name of the company nor the name of the author may be used to
	+ * endorse or promote products derived from this software without specific
	+ * prior written permission.
	+ *
	+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
	+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
	+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	+ * SUCH DAMAGE.
	+ */
	+
	+#ifndef _VMM_VTIMER_H_
	+#define _VMM_VTIMER_H_
	+
	+#define GT_PHYS_NS_IRQ 30
	+#define GT_VIRT_IRQ 27
	+
	+struct vtimer
	+{
	+ uint64_t cnthctl_el2;
	+ uint64_t cntvoff_el2;
	+};
	+
	+struct vtimer_timer
	+{
	+ struct callout callout;
	+ struct mtx mtx;
	+
	+ uint32_t irqid;
	+
	+ /*
	+ * These registers are either emulated for the physical timer, or
	+ * the guest has full access to them for the virtual timer.
	+
	+ * CNTx_CTL_EL0: Counter-timer Timer Control Register
	+ * CNTx_CVAL_EL0: Counter-timer Timer CompareValue Register
	+ */
	+ uint32_t cntx_cval_el0;
	+ uint32_t cntx_ctl_el0;
	+};
	+
	+struct vtimer_cpu
	+{
	+ struct vtimer_timer phys_timer;
	+ struct vtimer_timer virt_timer;
	+
	+ uint32_t cntkctl_el1;
	+};
	+
	+int vtimer_init(uint64_t cnthctl_el2);
	+void vtimer_vminit(struct hyp *);
	+void vtimer_cpuinit(struct hypctx *);
	+void vtimer_cpucleanup(struct hypctx *);
	+void vtimer_vmcleanup(struct hyp *);
	+void vtimer_cleanup(void);
	+
	+int vtimer_phys_ctl_read(void vm, int vcpuid, uint64_t rval, void *arg);
	+int vtimer_phys_ctl_write(void vm, int vcpuid, uint64_t wval, void arg);
	+int vtimer_phys_cnt_read(void vm, int vcpuid, uint64_t rval, void *arg);
	+int vtimer_phys_cnt_write(void vm, int vcpuid, uint64_t wval, void arg);
	+int vtimer_phys_cval_read(void vm, int vcpuid, uint64_t rval, void *arg);
	+int vtimer_phys_cval_write(void vm, int vcpuid, uint64_t wval, void arg);
	+int vtimer_phys_tval_read(void vm, int vcpuid, uint64_t rval, void *arg);
	+int vtimer_phys_tval_write(void vm, int vcpuid, uint64_t wval, void arg);
	+#endif
	diff --git a/sys/arm64/vmm/io/vtimer.c b/sys/arm64/vmm/io/vtimer.c
	new file mode 100644
	--- /dev/null
	+++ b/sys/arm64/vmm/io/vtimer.c
	@@ -0,0 +1,456 @@
	+/*-
	+ * Copyright (c) 2017 The FreeBSD Foundation
	+ * All rights reserved.
	+ *
	+ * Redistribution and use in source and binary forms, with or without
	+ * modification, are permitted provided that the following conditions
	+ * are met:
	+ * 1. Redistributions of source code must retain the above copyright
	+ * notice, this list of conditions and the following disclaimer.
	+ * 2. Redistributions in binary form must reproduce the above copyright
	+ * notice, this list of conditions and the following disclaimer in the
	+ * documentation and/or other materials provided with the distribution.
	+ * 3. The name of the company nor the name of the author may be used to
	+ * endorse or promote products derived from this software without specific
	+ * prior written permission.
	+ *
	+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
	+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
	+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	+ * SUCH DAMAGE.
	+ */
	+
	+#include <sys/types.h>
	+#include <sys/bus.h>
	+#include <sys/mutex.h>
	+#include <sys/rman.h>
	+#include <sys/systm.h>
	+#include <sys/time.h>
	+#include <sys/timeet.h>
	+#include <sys/timetc.h>
	+
	+#include <machine/bus.h>
	+#include <machine/machdep.h>
	+#include <machine/vmm.h>
	+#include <machine/armreg.h>
	+
	+#include <arm64/vmm/arm64.h>
	+
	+#include "vgic_v3.h"
	+#include "vtimer.h"
	+
	+#define RES1 0xffffffffffffffffUL
	+
	+#define timer_enabled(ctl) \
	+ (!((ctl) & CNTP_CTL_IMASK) && ((ctl) & CNTP_CTL_ENABLE))
	+
	+static uint64_t cnthctl_el2_reg;
	+static uint32_t tmr_frq;
	+static bool have_vtimer = false;
	+
	+#define timer_condition_met(ctl) ((ctl) & CNTP_CTL_ISTATUS)
	+
	+static int
	+vtimer_virtual_timer_intr(void *arg)
	+{
	+ struct hypctx *hypctx;
	+ uint32_t cntv_ctl;
	+
	+ /*
	+ * TODO everything here is very strange. The relantionship between the
	+ * hardware value and the value in memory is not clear at all.
	+ */
	+
	+ hypctx = arm64_get_active_vcpu();
	+ cntv_ctl = READ_SPECIALREG(cntv_ctl_el0);
	+
	+ if (!hypctx) {
	+ /* vm_destroy() was called. */
	+ eprintf("No active vcpu\n");
	+ cntv_ctl = READ_SPECIALREG(cntv_ctl_el0);
	+ goto out;
	+ }
	+ if (!timer_enabled(cntv_ctl)) {
	+ eprintf("Timer not enabled\n");
	+ goto out;
	+ }
	+ if (!timer_condition_met(cntv_ctl)) {
	+ eprintf("Timer condition not met\n");
	+ goto out;
	+ }
	+
	+ vgic_v3_inject_irq(hypctx->hyp, hypctx->vcpu, GT_VIRT_IRQ, true);
	+
	+ hypctx->vtimer_cpu.virt_timer.cntx_ctl_el0 &= ~CNTP_CTL_ENABLE;
	+ cntv_ctl = hypctx->vtimer_cpu.virt_timer.cntx_ctl_el0;
	+
	+out:
	+ /*
	+ * Disable the timer interrupt. This will prevent the interrupt from
	+ * being reasserted as soon as we exit the handler and getting stuck
	+ * in an infinite loop.
	+ *
	+ * This is safe to do because the guest disabled the timer, and then
	+ * enables it as part of the interrupt handling routine.
	+ */
	+ cntv_ctl &= ~CNTP_CTL_ENABLE;
	+ WRITE_SPECIALREG(cntv_ctl_el0, cntv_ctl);
	+
	+ return (FILTER_HANDLED);
	+}
	+
	+int
	+vtimer_init(uint64_t cnthctl_el2)
	+{
	+ cnthctl_el2_reg = cnthctl_el2;
	+ /*
	+ * The guest MUST use the same timer frequency as the host. The
	+ * register CNTFRQ_EL0 is accessible to the guest and a different value
	+ * in the guest dts file might have unforseen consequences.
	+ */
	+ tmr_frq = READ_SPECIALREG(cntfrq_el0);
	+
	+ return (0);
	+}
	+
	+void
	+vtimer_vminit(struct hyp *hyp)
	+{
	+ uint64_t now;
	+
	+ /*
	+ * Configure the Counter-timer Hypervisor Control Register for the VM.
	+ *
	+ * ~CNTHCTL_EL1PCEN: trap access to CNTP_{CTL, CVAL, TVAL}_EL0 from EL1
	+ * CNTHCTL_EL1PCTEN: don't trap access to CNTPCT_EL0
	+ */
	+ hyp->vtimer.cnthctl_el2 = cnthctl_el2_reg & ~CNTHCTL_EL1PCEN;
	+ hyp->vtimer.cnthctl_el2 \|= CNTHCTL_EL1PCTEN;
	+
	+ now = READ_SPECIALREG(cntpct_el0);
	+ hyp->vtimer.cntvoff_el2 = now;
	+
	+ return;
	+}
	+
	+void
	+vtimer_cpuinit(struct hypctx *hypctx)
	+{
	+ struct vtimer_cpu *vtimer_cpu;
	+
	+ vtimer_cpu = &hypctx->vtimer_cpu;
	+ /*
	+ * Configure physical timer interrupts for the VCPU.
	+ *
	+ * CNTP_CTL_IMASK: mask interrupts
	+ * ~CNTP_CTL_ENABLE: disable the timer
	+ */
	+ vtimer_cpu->phys_timer.cntx_ctl_el0 = CNTP_CTL_IMASK & ~CNTP_CTL_ENABLE;
	+
	+ mtx_init(&vtimer_cpu->phys_timer.mtx, "vtimer phys callout mutex", NULL,
	+ MTX_DEF);
	+ callout_init_mtx(&vtimer_cpu->phys_timer.callout,
	+ &vtimer_cpu->phys_timer.mtx, 0);
	+ vtimer_cpu->phys_timer.irqid = GT_PHYS_NS_IRQ;
	+
	+ mtx_init(&vtimer_cpu->virt_timer.mtx, "vtimer virt callout mutex", NULL,
	+ MTX_DEF);
	+ callout_init_mtx(&vtimer_cpu->virt_timer.callout,
	+ &vtimer_cpu->virt_timer.mtx, 0);
	+ vtimer_cpu->virt_timer.irqid = GT_VIRT_IRQ;
	+}
	+
	+void
	+vtimer_cpucleanup(struct hypctx *hypctx)
	+{
	+ struct vtimer_cpu *vtimer_cpu;
	+
	+ vtimer_cpu = &hypctx->vtimer_cpu;
	+ callout_drain(&vtimer_cpu->phys_timer.callout);
	+ callout_drain(&vtimer_cpu->virt_timer.callout);
	+ mtx_destroy(&vtimer_cpu->phys_timer.mtx);
	+ mtx_destroy(&vtimer_cpu->virt_timer.mtx);
	+}
	+
	+void
	+vtimer_vmcleanup(struct hyp *hyp)
	+{
	+ struct hypctx *hypctx;
	+ uint32_t cntv_ctl;
	+
	+ hypctx = arm64_get_active_vcpu();
	+ if (!hypctx) {
	+ /* The active VM was destroyed, stop the timer. */
	+ cntv_ctl = READ_SPECIALREG(cntv_ctl_el0);
	+ cntv_ctl &= ~CNTP_CTL_ENABLE;
	+ WRITE_SPECIALREG(cntv_ctl_el0, cntv_ctl);
	+ }
	+}
	+
	+void
	+vtimer_cleanup(void)
	+{
	+}
	+
	+static void
	+vtimer_inject_irq_callout_func(void *context)
	+{
	+ struct hypctx *hypctx;
	+
	+ hypctx = context;
	+ vgic_v3_inject_irq(hypctx->hyp, hypctx->vcpu,
	+ hypctx->vtimer_cpu.phys_timer.irqid, true);
	+}
	+
	+
	+static void
	+vtimer_schedule_irq(struct vtimer_cpu vtimer_cpu, struct hyp hyp, int vcpuid)
	+{
	+ sbintime_t time;
	+ struct vtimer_timer *timer;
	+ uint64_t cntpct_el0;
	+ uint64_t diff;
	+
	+ timer = &vtimer_cpu->phys_timer;
	+ cntpct_el0 = READ_SPECIALREG(cntpct_el0);
	+ if (timer->cntx_cval_el0 < cntpct_el0) {
	+ /* Timer set in the past, trigger interrupt */
	+ vgic_v3_inject_irq(hyp, vcpuid, timer->irqid, true);
	+ } else {
	+ diff = timer->cntx_cval_el0 - cntpct_el0;
	+ time = diff * SBT_1S / tmr_frq;
	+ callout_reset_sbt(&timer->callout, time, 0,
	+ vtimer_inject_irq_callout_func, &hyp->ctx[vcpuid], 0);
	+ }
	+}
	+
	+static void
	+vtimer_remove_irq(struct hypctx *hypctx, int vcpuid)
	+{
	+ struct vtimer_cpu *vtimer_cpu;
	+ struct vtimer_timer *timer;
	+
	+ vtimer_cpu = &hypctx->vtimer_cpu;
	+ timer = &vtimer_cpu->phys_timer;
	+
	+ callout_drain(&timer->callout);
	+ /*
	+ * The interrupt needs to be deactivated here regardless of the callout
	+ * function having been executed. The timer interrupt can be masked with
	+ * the CNTP_CTL_EL0.IMASK bit instead of reading the IAR register.
	+ * Masking the interrupt doesn't remove it from the list registers.
	+ */
	+ vgic_v3_inject_irq(hypctx->hyp, vcpuid, timer->irqid, false);
	+}
	+
	+/*
	+ * Timer emulation functions.
	+ *
	+ * The guest should use the virtual timer, however some software, e.g. u-boot,
	+ * used the physical timer. Emulate this in software for the guest to use.
	+ *
	+ * Adjust for cntvoff_el2 so the physical and virtual timers are at similar
	+ * times. This simplifies interrupt handling in the virtual timer as the
	+ * adjustment will have already happened.
	+ */
	+
	+int
	+vtimer_phys_ctl_read(void vm, int vcpuid, uint64_t rval, void *arg)
	+{
	+ struct hyp *hyp;
	+ struct vtimer_cpu *vtimer_cpu;
	+ uint64_t cntpct_el0;
	+
	+ hyp = vm_get_cookie(vm);
	+ vtimer_cpu = &hyp->ctx[vcpuid].vtimer_cpu;
	+
	+ cntpct_el0 = READ_SPECIALREG(cntpct_el0) - hyp->vtimer.cntvoff_el2;
	+ if (vtimer_cpu->phys_timer.cntx_cval_el0 < cntpct_el0)
	+ /* Timer condition met */
	+ *rval = vtimer_cpu->phys_timer.cntx_ctl_el0 \| CNTP_CTL_ISTATUS;
	+ else
	+ *rval = vtimer_cpu->phys_timer.cntx_ctl_el0 & ~CNTP_CTL_ISTATUS;
	+
	+ return (0);
	+}
	+
	+int
	+vtimer_phys_ctl_write(void vm, int vcpuid, uint64_t wval, void arg)
	+{
	+ struct hyp *hyp;
	+ struct hypctx *hypctx;
	+ struct vtimer_cpu *vtimer_cpu;
	+ uint64_t ctl_el0;
	+ bool timer_toggled_on;
	+
	+ hyp = vm_get_cookie(vm);
	+ hypctx = &hyp->ctx[vcpuid];
	+ vtimer_cpu = &hypctx->vtimer_cpu;
	+
	+ timer_toggled_on = false;
	+ ctl_el0 = vtimer_cpu->phys_timer.cntx_ctl_el0;
	+
	+ if (!timer_enabled(ctl_el0) && timer_enabled(wval))
	+ timer_toggled_on = true;
	+
	+ vtimer_cpu->phys_timer.cntx_ctl_el0 = wval;
	+
	+ if (timer_toggled_on)
	+ vtimer_schedule_irq(vtimer_cpu, hyp, vcpuid);
	+
	+ return (0);
	+}
	+
	+int
	+vtimer_phys_cnt_read(void vm, int vcpuid, uint64_t rval, void *arg)
	+{
	+ struct hyp *hyp;
	+
	+ hyp = vm_get_cookie(vm);
	+ *rval = READ_SPECIALREG(cntpct_el0) - hyp->vtimer.cntvoff_el2;
	+ return (0);
	+}
	+
	+int
	+vtimer_phys_cnt_write(void vm, int vcpuid, uint64_t wval, void arg)
	+{
	+ return (0);
	+}
	+
	+int
	+vtimer_phys_cval_read(void vm, int vcpuid, uint64_t rval, void *arg)
	+{
	+ struct hyp *hyp;
	+ struct vtimer_cpu *vtimer_cpu;
	+
	+ hyp = vm_get_cookie(vm);
	+ vtimer_cpu = &hyp->ctx[vcpuid].vtimer_cpu;
	+
	+ *rval = vtimer_cpu->phys_timer.cntx_cval_el0;
	+
	+ return (0);
	+}
	+
	+int
	+vtimer_phys_cval_write(void vm, int vcpuid, uint64_t wval, void arg)
	+{
	+ struct hyp *hyp;
	+ struct hypctx *hypctx;
	+ struct vtimer_cpu *vtimer_cpu;
	+
	+ hyp = vm_get_cookie(vm);
	+ hypctx = &hyp->ctx[vcpuid];
	+ vtimer_cpu = &hypctx->vtimer_cpu;
	+
	+ vtimer_cpu->phys_timer.cntx_cval_el0 = wval;
	+
	+ if (timer_enabled(vtimer_cpu->phys_timer.cntx_ctl_el0)) {
	+ vtimer_remove_irq(hypctx, vcpuid);
	+ vtimer_schedule_irq(vtimer_cpu, hyp, vcpuid);
	+ }
	+
	+ return (0);
	+}
	+
	+int
	+vtimer_phys_tval_read(void vm, int vcpuid, uint64_t rval, void *arg)
	+{
	+ struct hyp *hyp;
	+ struct vtimer_cpu *vtimer_cpu;
	+ uint32_t cntpct_el0;
	+
	+ hyp = vm_get_cookie(vm);
	+ vtimer_cpu = &hyp->ctx[vcpuid].vtimer_cpu;
	+
	+ if (!(vtimer_cpu->phys_timer.cntx_ctl_el0 & CNTP_CTL_ENABLE)) {
	+ /*
	+ * ARMv8 Architecture Manual, p. D7-2702: the result of reading
	+ * TVAL when the timer is disabled is UNKNOWN. I have chosen to
	+ * return the maximum value possible on 32 bits which means the
	+ * timer will fire very far into the future.
	+ */
	+ *rval = (uint32_t)RES1;
	+ } else {
	+ cntpct_el0 = READ_SPECIALREG(cntpct_el0) -
	+ hyp->vtimer.cntvoff_el2;
	+ *rval = vtimer_cpu->phys_timer.cntx_cval_el0 - cntpct_el0;
	+ }
	+
	+ return (0);
	+}
	+
	+int
	+vtimer_phys_tval_write(void vm, int vcpuid, uint64_t wval, void arg)
	+{
	+ struct hyp *hyp;
	+ struct hypctx *hypctx;
	+ struct vtimer_cpu *vtimer_cpu;
	+ uint64_t cntpct_el0;
	+
	+ hyp = vm_get_cookie(vm);
	+ hypctx = &hyp->ctx[vcpuid];
	+ vtimer_cpu = &hypctx->vtimer_cpu;
	+
	+ cntpct_el0 = READ_SPECIALREG(cntpct_el0) - hyp->vtimer.cntvoff_el2;
	+ vtimer_cpu->phys_timer.cntx_cval_el0 = (int32_t)wval + cntpct_el0;
	+
	+ if (timer_enabled(vtimer_cpu->phys_timer.cntx_ctl_el0)) {
	+ vtimer_remove_irq(hypctx, vcpuid);
	+ vtimer_schedule_irq(vtimer_cpu, hyp, vcpuid);
	+ }
	+
	+ return (0);
	+}
	+
	+struct vtimer_softc {
	+ struct resource *res;
	+ void *ihl;
	+ int rid;
	+};
	+
	+static int
	+vtimer_probe(device_t dev)
	+{
	+ device_set_desc(dev, "Virtual timer");
	+ return (BUS_PROBE_DEFAULT);
	+}
	+
	+static int
	+vtimer_attach(device_t dev)
	+{
	+ struct vtimer_softc *sc;
	+
	+ sc = device_get_softc(dev);
	+
	+ sc->rid = 0;
	+ sc->res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &sc->rid, RF_ACTIVE);
	+ if (sc->res == NULL)
	+ return (ENXIO);
	+
	+ bus_setup_intr(dev, sc->res, INTR_TYPE_CLK, vtimer_virtual_timer_intr,
	+ NULL, NULL, &sc->ihl);
	+
	+ have_vtimer = true;
	+ return (0);
	+}
	+
	+static device_method_t vtimer_methods[] = {
	+ /* Device interface */
	+ DEVMETHOD(device_probe, vtimer_probe),
	+ DEVMETHOD(device_attach, vtimer_attach),
	+
	+ /* End */
	+ DEVMETHOD_END
	+};
	+
	+DEFINE_CLASS_0(vtimer, vtimer_driver, vtimer_methods,
	+ sizeof(struct vtimer_softc));
	+
	+DRIVER_MODULE(vtimer, generic_timer, vtimer_driver, 0, 0);
	diff --git a/sys/arm64/vmm/mmu.h b/sys/arm64/vmm/mmu.h
	new file mode 100644
	--- /dev/null
	+++ b/sys/arm64/vmm/mmu.h
	@@ -0,0 +1,51 @@
	+/*
	+ * Copyright (C) 2017 Alexandru Elisei <alexandru.elisei@gmail.com>
	+ * All rights reserved.
	+ *
	+ * This software was developed by Alexandru Elisei under sponsorship
	+ * from the FreeBSD Foundation.
	+ *
	+ * Redistribution and use in source and binary forms, with or without
	+ * modification, are permitted provided that the following conditions
	+ * are met:
	+ * 1. Redistributions of source code must retain the above copyright
	+ * notice, this list of conditions and the following disclaimer.
	+ * 2. Redistributions in binary form must reproduce the above copyright
	+ * notice, this list of conditions and the following disclaimer in the
	+ * documentation and/or other materials provided with the distribution.
	+ *
	+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
	+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
	+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	+ * SUCH DAMAGE.
	+ */
	+
	+#ifndef _VMM_MMU_H_
	+#define _VMM_MMU_H_
	+
	+#include <machine/machdep.h>
	+#include <machine/vmparam.h>
	+#include <machine/vmm.h>
	+
	+#include "hyp.h"
	+
	+extern char vmm_hyp_code;
	+extern char vmm_hyp_code_end;
	+
	+extern char _vmm_start;
	+extern char _vmm_end;
	+
	+bool vmmpmap_init(void);
	+void vmmpmap_fini(void);
	+uint64_t vmmpmap_to_ttbr0(void);
	+bool vmmpmap_enter(vm_offset_t, vm_size_t, vm_paddr_t, vm_prot_t);
	+void vmmpmap_remove(vm_offset_t, vm_size_t, bool);
	+
	+#endif
	diff --git a/sys/arm64/vmm/psci.h b/sys/arm64/vmm/psci.h
	new file mode 100644
	--- /dev/null
	+++ b/sys/arm64/vmm/psci.h
	@@ -0,0 +1,35 @@
	+/*
	+ * Copyright (C) 2018 Alexandru Elisei <alexandru.elisei@gmail.com>
	+ * All rights reserved.
	+ *
	+ * Redistribution and use in source and binary forms, with or without
	+ * modification, are permitted provided that the following conditions
	+ * are met:
	+ * 1. Redistributions of source code must retain the above copyright
	+ * notice, this list of conditions and the following disclaimer.
	+ * 2. Redistributions in binary form must reproduce the above copyright
	+ * notice, this list of conditions and the following disclaimer in the
	+ * documentation and/or other materials provided with the distribution.
	+ *
	+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
	+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
	+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	+ * SUCH DAMAGE.
	+ */
	+
	+#ifndef _PSCI_H_
	+#define _PSCI_H_
	+
	+#include "arm64.h"
	+
	+int psci_handle_call(struct vm vm, int vcpuid, struct vm_exit vme,
	+ bool *retu);
	+
	+#endif
	diff --git a/sys/arm64/vmm/reset.h b/sys/arm64/vmm/reset.h
	new file mode 100644
	--- /dev/null
	+++ b/sys/arm64/vmm/reset.h
	@@ -0,0 +1,32 @@
	+/*
	+ * Copyright (C) 2018 Alexandru Elisei <alexandru.elisei@gmail.com>
	+ * All rights reserved.
	+ *
	+ * Redistribution and use in source and binary forms, with or without
	+ * modification, are permitted provided that the following conditions
	+ * are met:
	+ * 1. Redistributions of source code must retain the above copyright
	+ * notice, this list of conditions and the following disclaimer.
	+ * 2. Redistributions in binary form must reproduce the above copyright
	+ * notice, this list of conditions and the following disclaimer in the
	+ * documentation and/or other materials provided with the distribution.
	+ *
	+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
	+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
	+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	+ * SUCH DAMAGE.
	+ */
	+#ifndef _VMM_RESET_H_
	+#define _VMM_RESET_H_
	+
	+void reset_vm_el01_regs(void *vcpu);
	+void reset_vm_el2_regs(void *vcpu);
	+
	+#endif
	diff --git a/sys/arm64/vmm/vmm.c b/sys/arm64/vmm/vmm.c
	new file mode 100644
	--- /dev/null
	+++ b/sys/arm64/vmm/vmm.c
	@@ -0,0 +1,1599 @@
	+/*
	+ * Copyright (C) 2015 Mihai Carabas <mihai.carabas@gmail.com>
	+ * All rights reserved.
	+ *
	+ * Redistribution and use in source and binary forms, with or without
	+ * modification, are permitted provided that the following conditions
	+ * are met:
	+ * 1. Redistributions of source code must retain the above copyright
	+ * notice, this list of conditions and the following disclaimer.
	+ * 2. Redistributions in binary form must reproduce the above copyright
	+ * notice, this list of conditions and the following disclaimer in the
	+ * documentation and/or other materials provided with the distribution.
	+ *
	+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
	+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
	+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	+ * SUCH DAMAGE.
	+ */
	+
	+#include <sys/cdefs.h>
	+#include <sys/param.h>
	+#include <sys/systm.h>
	+#include <sys/kernel.h>
	+#include <sys/module.h>
	+#include <sys/sysctl.h>
	+#include <sys/malloc.h>
	+#include <sys/pcpu.h>
	+#include <sys/lock.h>
	+#include <sys/mutex.h>
	+#include <sys/proc.h>
	+#include <sys/rwlock.h>
	+#include <sys/sched.h>
	+#include <sys/smp.h>
	+#include <sys/cpuset.h>
	+
	+#include <vm/vm.h>
	+#include <vm/vm_object.h>
	+#include <vm/vm_page.h>
	+#include <vm/pmap.h>
	+#include <vm/vm_map.h>
	+#include <vm/vm_extern.h>
	+#include <vm/vm_param.h>
	+
	+#include <machine/cpu.h>
	+#include <machine/machdep.h>
	+#include <machine/vm.h>
	+#include <machine/pcb.h>
	+#include <machine/param.h>
	+#include <machine/smp.h>
	+#include <machine/vmparam.h>
	+#include <machine/vmm.h>
	+#include <machine/vmm_dev.h>
	+#include <machine/armreg.h>
	+
	+#include <dev/pci/pcireg.h>
	+
	+#include "vmm_ktr.h"
	+#include "vmm_stat.h"
	+#include "vmm_mem.h"
	+#include "arm64.h"
	+#include "mmu.h"
	+#include "psci.h"
	+
	+#include "io/vgic_v3.h"
	+#include "io/vtimer.h"
	+
	+#define BSP 0 /* the boostrap processor */
	+
	+struct vcpu {
	+ int flags;
	+ enum vcpu_state state;
	+ struct mtx mtx;
	+ int hostcpu; /* host cpuid this vcpu last ran on */
	+ int vcpuid;
	+ void *stats;
	+ struct vm_exit exitinfo;
	+ uint64_t nextpc; /* (x) next instruction to execute */
	+};
	+
	+#define vcpu_lock_initialized(v) mtx_initialized(&((v)->mtx))
	+#define vcpu_lock_init(v) mtx_init(&((v)->mtx), "vcpu lock", 0, MTX_SPIN)
	+#define vcpu_lock(v) mtx_lock_spin(&((v)->mtx))
	+#define vcpu_unlock(v) mtx_unlock_spin(&((v)->mtx))
	+#define vcpu_assert_locked(v) mtx_assert(&((v)->mtx), MA_OWNED)
	+
	+struct mem_seg {
	+ uint64_t gpa;
	+ size_t len;
	+ bool wired;
	+ bool sysmem;
	+ vm_object_t object;
	+};
	+#define VM_MAX_MEMSEGS 3
	+
	+struct mem_map {
	+ vm_paddr_t gpa;
	+ size_t len;
	+ vm_ooffset_t segoff;
	+ int segid;
	+ int prot;
	+ int flags;
	+};
	+#define VM_MAX_MEMMAPS 4
	+
	+struct vmm_mmio_region {
	+ uint64_t start;
	+ uint64_t end;
	+ mem_region_read_t read;
	+ mem_region_write_t write;
	+};
	+#define VM_MAX_MMIO_REGIONS 4
	+
	+/*
	+ * Initialization:
	+ * (o) initialized the first time the VM is created
	+ * (i) initialized when VM is created and when it is reinitialized
	+ * (x) initialized before use
	+ */
	+struct vm {
	+ void cookie; / (i) cpu-specific data */
	+ volatile cpuset_t active_cpus; /* (i) active vcpus */
	+ volatile cpuset_t debug_cpus; /* (i) vcpus stopped for debug */
	+ int suspend; /* (i) stop VM execution */
	+ volatile cpuset_t suspended_cpus; /* (i) suspended vcpus */
	+ volatile cpuset_t halted_cpus; /* (x) cpus in a hard halt */
	+ struct mem_map mem_maps[VM_MAX_MEMMAPS]; /* (i) guest address space */
	+ struct mem_seg mem_segs[VM_MAX_MEMSEGS]; /* (o) guest memory regions */
	+ struct vmspace vmspace; / (o) guest's address space */
	+ char name[VM_MAX_NAMELEN]; /* (o) virtual machine name */
	+ struct vcpu vcpu[VM_MAXCPU]; /* (i) guest vcpus */
	+ struct vmm_mmio_region mmio_region[VM_MAX_MMIO_REGIONS];
	+ /* (o) guest MMIO regions */
	+ /* The following describe the vm cpu topology */
	+ uint16_t sockets; /* (o) num of sockets */
	+ uint16_t cores; /* (o) num of cores/socket */
	+ uint16_t threads; /* (o) num of threads/core */
	+ uint16_t maxcpus; /* (o) max pluggable cpus */
	+};
	+
	+static bool vmm_initialized = false;
	+
	+static struct vmm_ops *ops = NULL;
	+
	+#define VMM_INIT(num) (ops != NULL ? (*ops->init)(num) : 0)
	+#define VMM_CLEANUP() (ops != NULL ? (*ops->cleanup)() : 0)
	+
	+#define VMINIT(vm, pmap) (ops != NULL ? (*ops->vminit)(vm, pmap): NULL)
	+#define VMRUN(vmi, vcpu, pc, pmap, evinfo) \
	+ (ops != NULL ? (*ops->vmrun)(vmi, vcpu, pc, pmap, evinfo) : ENXIO)
	+#define VMCLEANUP(vmi) (ops != NULL ? (*ops->vmcleanup)(vmi) : NULL)
	+#define VMSPACE_ALLOC(min, max) \
	+ (ops != NULL ? (*ops->vmspace_alloc)(min, max) : NULL)
	+#define VMSPACE_FREE(vmspace) \
	+ (ops != NULL ? (*ops->vmspace_free)(vmspace) : ENXIO)
	+#define VMGETREG(vmi, vcpu, num, retval) \
	+ (ops != NULL ? (*ops->vmgetreg)(vmi, vcpu, num, retval) : ENXIO)
	+#define VMSETREG(vmi, vcpu, num, val) \
	+ (ops != NULL ? (*ops->vmsetreg)(vmi, vcpu, num, val) : ENXIO)
	+#define VMGETCAP(vmi, vcpu, num, retval) \
	+ (ops != NULL ? (*ops->vmgetcap)(vmi, vcpu, num, retval) : ENXIO)
	+#define VMSETCAP(vmi, vcpu, num, val) \
	+ (ops != NULL ? (*ops->vmsetcap)(vmi, vcpu, num, val) : ENXIO)
	+
	+#define fpu_start_emulating() load_cr0(rcr0() \| CR0_TS)
	+#define fpu_stop_emulating() clts()
	+
	+static int vm_handle_wfi(struct vm *vm, int vcpuid,
	+ struct vm_exit vme, bool retu);
	+
	+static MALLOC_DEFINE(M_VMM, "vmm", "vmm");
	+
	+/* statistics */
	+static VMM_STAT(VCPU_TOTAL_RUNTIME, "vcpu total runtime");
	+
	+SYSCTL_NODE(_hw, OID_AUTO, vmm, CTLFLAG_RW, NULL, NULL);
	+
	+/*
	+ * Halt the guest if all vcpus are executing a HLT instruction with
	+ * interrupts disabled.
	+ */
	+static int halt_detection_enabled = 1;
	+SYSCTL_INT(_hw_vmm, OID_AUTO, halt_detection, CTLFLAG_RDTUN,
	+ &halt_detection_enabled, 0,
	+ "Halt VM if all vcpus execute HLT with interrupts disabled");
	+
	+static int vmm_ipinum;
	+SYSCTL_INT(_hw_vmm, OID_AUTO, ipinum, CTLFLAG_RD, &vmm_ipinum, 0,
	+ "IPI vector used for vcpu notifications");
	+
	+static int trace_guest_exceptions;
	+SYSCTL_INT(_hw_vmm, OID_AUTO, trace_guest_exceptions, CTLFLAG_RDTUN,
	+ &trace_guest_exceptions, 0,
	+ "Trap into hypervisor on all guest exceptions and reflect them back");
	+
	+static struct cpu_desc vmm_desc = {
	+ .id_aa64afr0 = 0,
	+ .id_aa64afr1 = 0,
	+ .id_aa64dfr0 =
	+ (0xful << ID_AA64DFR0_CTX_CMPs_SHIFT) \|
	+ (0xful << ID_AA64DFR0_WRPs_SHIFT) \|
	+ (0xful << ID_AA64DFR0_BRPs_SHIFT) \|
	+ ID_AA64DFR0_PMUVer_3 \|
	+ ID_AA64DFR0_DebugVer_8,
	+ .id_aa64dfr1 = 0,
	+ .id_aa64isar0 =
	+ ID_AA64ISAR0_TLB_TLBIOSR \|
	+ ID_AA64ISAR0_SHA3_IMPL \|
	+ ID_AA64ISAR0_RDM_IMPL \|
	+ ID_AA64ISAR0_Atomic_IMPL \|
	+ ID_AA64ISAR0_CRC32_BASE \|
	+ ID_AA64ISAR0_SHA2_512 \|
	+ ID_AA64ISAR0_SHA1_BASE \|
	+ ID_AA64ISAR0_AES_PMULL,
	+ .id_aa64isar1 = 0,
	+ .id_aa64mmfr0 =
	+ ID_AA64MMFR0_TGran4_IMPL \|
	+ ID_AA64MMFR0_TGran64_IMPL \|
	+ ID_AA64MMFR0_TGran16_IMPL \|
	+ ID_AA64MMFR0_ASIDBits_16 \|
	+ ID_AA64MMFR0_PARange_4P,
	+ .id_aa64mmfr1 =
	+ ID_AA64MMFR1_SpecSEI_IMPL \|
	+ ID_AA64MMFR1_PAN_ATS1E1 \|
	+ ID_AA64MMFR1_HAFDBS_AF,
	+ .id_aa64mmfr2 = 0,
	+ .id_aa64pfr0 =
	+ ID_AA64PFR0_GIC_CPUIF_NONE \|
	+ ID_AA64PFR0_AdvSIMD_HP \|
	+ ID_AA64PFR0_FP_HP \|
	+ ID_AA64PFR0_EL3_64 \|
	+ ID_AA64PFR0_EL2_64 \|
	+ ID_AA64PFR0_EL1_64 \|
	+ ID_AA64PFR0_EL0_64,
	+ .id_aa64pfr1 = 0,
	+};
	+
	+static void vm_free_memmap(struct vm *vm, int ident);
	+static bool sysmem_mapping(struct vm vm, struct mem_map mm);
	+static void vcpu_notify_event_locked(struct vcpu *vcpu, bool lapic_intr);
	+
	+static void
	+vcpu_cleanup(struct vm *vm, int i, bool destroy)
	+{
	+// struct vcpu *vcpu = &vm->vcpu[i];
	+}
	+
	+static void
	+vcpu_init(struct vm *vm, uint32_t vcpu_id, bool create)
	+{
	+ struct vcpu *vcpu;
	+
	+ vcpu = &vm->vcpu[vcpu_id];
	+
	+ if (create) {
	+ KASSERT(!vcpu_lock_initialized(vcpu), ("vcpu %d already "
	+ "initialized", vcpu_id));
	+ vcpu_lock_init(vcpu);
	+ vcpu->hostcpu = NOCPU;
	+ vcpu->vcpuid = vcpu_id;
	+ }
	+}
	+
	+struct vm_exit *
	+vm_exitinfo(struct vm *vm, int cpuid)
	+{
	+ struct vcpu *vcpu;
	+
	+ if (cpuid < 0 \|\| cpuid >= vm->maxcpus)
	+ panic("vm_exitinfo: invalid cpuid %d", cpuid);
	+
	+ vcpu = &vm->vcpu[cpuid];
	+
	+ return (&vcpu->exitinfo);
	+}
	+
	+static int
	+vmm_init(void)
	+{
	+ ops = &vmm_ops_arm;
	+
	+ update_cpu_desc(&vmm_desc);
	+
	+ return (VMM_INIT(0));
	+}
	+
	+static int
	+vmm_handler(module_t mod, int what, void *arg)
	+{
	+ int error;
	+
	+ switch (what) {
	+ case MOD_LOAD:
	+ vmmdev_init();
	+ error = vmm_init();
	+ if (error == 0)
	+ vmm_initialized = true;
	+ break;
	+ case MOD_UNLOAD:
	+ error = vmmdev_cleanup();
	+ if (error == 0 && vmm_initialized) {
	+ error = VMM_CLEANUP();
	+ if (error)
	+ vmm_initialized = false;
	+ }
	+ break;
	+ default:
	+ error = 0;
	+ break;
	+ }
	+ return (error);
	+}
	+
	+static moduledata_t vmm_kmod = {
	+ "vmm",
	+ vmm_handler,
	+ NULL
	+};
	+
	+/*
	+ * vmm initialization has the following dependencies:
	+ *
	+ * - HYP initialization requires smp_rendezvous() and therefore must happen
	+ * after SMP is fully functional (after SI_SUB_SMP).
	+ */
	+DECLARE_MODULE(vmm, vmm_kmod, SI_SUB_SMP + 1, SI_ORDER_ANY);
	+MODULE_VERSION(vmm, 1);
	+
	+static void
	+vm_init(struct vm *vm, bool create)
	+{
	+ int i;
	+
	+ vm->cookie = VMINIT(vm, vmspace_pmap(vm->vmspace));
	+
	+ CPU_ZERO(&vm->active_cpus);
	+ CPU_ZERO(&vm->debug_cpus);
	+
	+ vm->suspend = 0;
	+ CPU_ZERO(&vm->suspended_cpus);
	+
	+ memset(vm->mmio_region, 0, sizeof(vm->mmio_region));
	+
	+ for (i = 0; i < vm->maxcpus; i++)
	+ vcpu_init(vm, i, create);
	+}
	+
	+int
	+vm_create(const char name, struct vm *retvm)
	+{
	+ struct vm *vm;
	+ struct vmspace *vmspace;
	+
	+ /*
	+ * If vmm.ko could not be successfully initialized then don't attempt
	+ * to create the virtual machine.
	+ */
	+ if (!vmm_initialized)
	+ return (ENXIO);
	+
	+ if (name == NULL \|\| strlen(name) >= VM_MAX_NAMELEN)
	+ return (EINVAL);
	+
	+ vmspace = VMSPACE_ALLOC(0, 1ul << 39);
	+ if (vmspace == NULL)
	+ return (ENOMEM);
	+
	+ vm = malloc(sizeof(struct vm), M_VMM, M_WAITOK \| M_ZERO);
	+ strcpy(vm->name, name);
	+ vm->vmspace = vmspace;
	+
	+ vm->sockets = 1;
	+ vm->cores = 1; /* XXX backwards compatibility */
	+ vm->threads = 1; /* XXX backwards compatibility */
	+ vm->maxcpus = VM_MAXCPU; /* XXX temp to keep code working */
	+
	+ vm_init(vm, true);
	+
	+ *retvm = vm;
	+ return (0);
	+}
	+
	+void
	+vm_get_topology(struct vm vm, uint16_t sockets, uint16_t *cores,
	+ uint16_t threads, uint16_t maxcpus)
	+{
	+ *sockets = vm->sockets;
	+ *cores = vm->cores;
	+ *threads = vm->threads;
	+ *maxcpus = vm->maxcpus;
	+}
	+
	+uint16_t
	+vm_get_maxcpus(struct vm *vm)
	+{
	+ return (vm->maxcpus);
	+}
	+
	+int
	+vm_set_topology(struct vm *vm, uint16_t sockets, uint16_t cores,
	+ uint16_t threads, uint16_t maxcpus)
	+{
	+ if (maxcpus != 0)
	+ return (EINVAL); /* XXX remove when supported */
	+ if ((sockets * cores * threads) > vm->maxcpus)
	+ return (EINVAL);
	+ /* XXX need to check sockets * cores * threads == vCPU, how? */
	+ vm->sockets = sockets;
	+ vm->cores = cores;
	+ vm->threads = threads;
	+ vm->maxcpus = VM_MAXCPU; /* XXX temp to keep code working */
	+ return(0);
	+}
	+
	+static void
	+vm_cleanup(struct vm *vm, bool destroy)
	+{
	+ struct mem_map *mm;
	+ pmap_t pmap;
	+ int i;
	+
	+ if (destroy) {
	+ pmap = vmspace_pmap(vm->vmspace);
	+ sched_pin();
	+ PCPU_SET(curvmpmap, NULL);
	+ sched_unpin();
	+ CPU_FOREACH(i) {
	+ MPASS(cpuid_to_pcpu[i]->pc_curvmpmap != pmap);
	+ }
	+ }
	+
	+ vgic_v3_detach_from_vm(vm);
	+
	+ for (i = 0; i < vm->maxcpus; i++)
	+ vcpu_cleanup(vm, i, destroy);
	+
	+ VMCLEANUP(vm->cookie);
	+
	+ /*
	+ * System memory is removed from the guest address space only when
	+ * the VM is destroyed. This is because the mapping remains the same
	+ * across VM reset.
	+ *
	+ * Device memory can be relocated by the guest (e.g. using PCI BARs)
	+ * so those mappings are removed on a VM reset.
	+ */
	+ if (!destroy) {
	+ for (i = 0; i < VM_MAX_MEMMAPS; i++) {
	+ mm = &vm->mem_maps[i];
	+ if (destroy \|\| !sysmem_mapping(vm, mm))
	+ vm_free_memmap(vm, i);
	+ }
	+ }
	+
	+ if (destroy) {
	+ for (i = 0; i < VM_MAX_MEMSEGS; i++)
	+ vm_free_memseg(vm, i);
	+
	+ VMSPACE_FREE(vm->vmspace);
	+ vm->vmspace = NULL;
	+ }
	+}
	+
	+void
	+vm_destroy(struct vm *vm)
	+{
	+ vm_cleanup(vm, true);
	+ free(vm, M_VMM);
	+}
	+
	+int
	+vm_reinit(struct vm *vm)
	+{
	+ int error;
	+
	+ /*
	+ * A virtual machine can be reset only if all vcpus are suspended.
	+ */
	+ if (CPU_CMP(&vm->suspended_cpus, &vm->active_cpus) == 0) {
	+ vm_cleanup(vm, false);
	+ vm_init(vm, false);
	+ error = 0;
	+ } else {
	+ error = EBUSY;
	+ }
	+
	+ return (error);
	+}
	+
	+const char *
	+vm_name(struct vm *vm)
	+{
	+ return (vm->name);
	+}
	+
	+int
	+vm_map_mmio(struct vm *vm, vm_paddr_t gpa, size_t len, vm_paddr_t hpa)
	+{
	+ vm_object_t obj;
	+
	+ if ((obj = vmm_mmio_alloc(vm->vmspace, gpa, len, hpa)) == NULL)
	+ return (ENOMEM);
	+ else
	+ return (0);
	+}
	+
	+int
	+vm_unmap_mmio(struct vm *vm, vm_paddr_t gpa, size_t len)
	+{
	+
	+ vmm_mmio_free(vm->vmspace, gpa, len);
	+ return (0);
	+}
	+
	+/*
	+ * Return 'true' if 'gpa' is allocated in the guest address space.
	+ *
	+ * This function is called in the context of a running vcpu which acts as
	+ * an implicit lock on 'vm->mem_maps[]'.
	+ */
	+bool
	+vm_mem_allocated(struct vm *vm, int vcpuid, vm_paddr_t gpa)
	+{
	+ struct mem_map *mm;
	+ int i;
	+
	+#ifdef INVARIANTS
	+ int hostcpu, state;
	+ state = vcpu_get_state(vm, vcpuid, &hostcpu);
	+ KASSERT(state == VCPU_RUNNING && hostcpu == curcpu,
	+ ("%s: invalid vcpu state %d/%d", __func__, state, hostcpu));
	+#endif
	+
	+ for (i = 0; i < VM_MAX_MEMMAPS; i++) {
	+ mm = &vm->mem_maps[i];
	+ if (mm->len != 0 && gpa >= mm->gpa && gpa < mm->gpa + mm->len)
	+ return (true); /* 'gpa' is sysmem or devmem */
	+ }
	+
	+#if 0
	+ if (ppt_is_mmio(vm, gpa))
	+ return (true); /* 'gpa' is pci passthru mmio */
	+#endif
	+
	+ return (false);
	+}
	+
	+int
	+vm_alloc_memseg(struct vm *vm, int ident, size_t len, bool sysmem)
	+{
	+ struct mem_seg *seg;
	+ vm_object_t obj;
	+
	+ if (ident < 0 \|\| ident >= VM_MAX_MEMSEGS)
	+ return (EINVAL);
	+
	+ if (len == 0 \|\| (len & PAGE_MASK))
	+ return (EINVAL);
	+
	+ seg = &vm->mem_segs[ident];
	+ if (seg->object != NULL) {
	+ if (seg->len == len && seg->sysmem == sysmem)
	+ return (EEXIST);
	+ else
	+ return (EINVAL);
	+ }
	+
	+ obj = vm_object_allocate(OBJT_DEFAULT, len >> PAGE_SHIFT);
	+ if (obj == NULL)
	+ return (ENOMEM);
	+
	+ seg->len = len;
	+ seg->object = obj;
	+ seg->sysmem = sysmem;
	+ return (0);
	+}
	+
	+int
	+vm_get_memseg(struct vm vm, int ident, size_t len, bool *sysmem,
	+ vm_object_t *objptr)
	+{
	+ struct mem_seg *seg;
	+
	+ if (ident < 0 \|\| ident >= VM_MAX_MEMSEGS)
	+ return (EINVAL);
	+
	+ seg = &vm->mem_segs[ident];
	+ if (len)
	+ *len = seg->len;
	+ if (sysmem)
	+ *sysmem = seg->sysmem;
	+ if (objptr)
	+ *objptr = seg->object;
	+ return (0);
	+}
	+
	+void
	+vm_free_memseg(struct vm *vm, int ident)
	+{
	+ struct mem_seg *seg;
	+
	+ KASSERT(ident >= 0 && ident < VM_MAX_MEMSEGS,
	+ ("%s: invalid memseg ident %d", __func__, ident));
	+
	+ seg = &vm->mem_segs[ident];
	+ if (seg->object != NULL) {
	+ vm_object_deallocate(seg->object);
	+ bzero(seg, sizeof(struct mem_seg));
	+ }
	+}
	+
	+int
	+vm_mmap_memseg(struct vm *vm, vm_paddr_t gpa, int segid, vm_ooffset_t first,
	+ size_t len, int prot, int flags)
	+{
	+ struct mem_seg *seg;
	+ struct mem_map m, map;
	+ vm_ooffset_t last;
	+ int i, error;
	+
	+ if (prot == 0 \|\| (prot & ~(VM_PROT_ALL)) != 0)
	+ return (EINVAL);
	+
	+ if (flags & ~VM_MEMMAP_F_WIRED)
	+ return (EINVAL);
	+
	+ if (segid < 0 \|\| segid >= VM_MAX_MEMSEGS)
	+ return (EINVAL);
	+
	+ seg = &vm->mem_segs[segid];
	+ if (seg->object == NULL)
	+ return (EINVAL);
	+
	+ last = first + len;
	+ if (first < 0 \|\| first >= last \|\| last > seg->len)
	+ return (EINVAL);
	+
	+ if ((gpa \| first \| last) & PAGE_MASK)
	+ return (EINVAL);
	+
	+ map = NULL;
	+ for (i = 0; i < VM_MAX_MEMMAPS; i++) {
	+ m = &vm->mem_maps[i];
	+ if (m->len == 0) {
	+ map = m;
	+ break;
	+ }
	+ }
	+
	+ if (map == NULL)
	+ return (ENOSPC);
	+
	+ error = vm_map_find(&vm->vmspace->vm_map, seg->object, first, &gpa,
	+ len, 0, VMFS_NO_SPACE, prot, prot, 0);
	+ if (error != KERN_SUCCESS)
	+ return (EFAULT);
	+
	+ vm_object_reference(seg->object);
	+
	+ if (flags & VM_MEMMAP_F_WIRED) {
	+ error = vm_map_wire(&vm->vmspace->vm_map, gpa, gpa + len,
	+ VM_MAP_WIRE_USER \| VM_MAP_WIRE_NOHOLES);
	+ if (error != KERN_SUCCESS) {
	+ vm_map_remove(&vm->vmspace->vm_map, gpa, gpa + len);
	+ return (error == KERN_RESOURCE_SHORTAGE ? ENOMEM :
	+ EFAULT);
	+ }
	+ }
	+
	+ map->gpa = gpa;
	+ map->len = len;
	+ map->segoff = first;
	+ map->segid = segid;
	+ map->prot = prot;
	+ map->flags = flags;
	+ return (0);
	+}
	+
	+int
	+vm_mmap_getnext(struct vm vm, vm_paddr_t gpa, int *segid,
	+ vm_ooffset_t segoff, size_t len, int prot, int flags)
	+{
	+ struct mem_map mm, mmnext;
	+ int i;
	+
	+ mmnext = NULL;
	+ for (i = 0; i < VM_MAX_MEMMAPS; i++) {
	+ mm = &vm->mem_maps[i];
	+ if (mm->len == 0 \|\| mm->gpa < *gpa)
	+ continue;
	+ if (mmnext == NULL \|\| mm->gpa < mmnext->gpa)
	+ mmnext = mm;
	+ }
	+
	+ if (mmnext != NULL) {
	+ *gpa = mmnext->gpa;
	+ if (segid)
	+ *segid = mmnext->segid;
	+ if (segoff)
	+ *segoff = mmnext->segoff;
	+ if (len)
	+ *len = mmnext->len;
	+ if (prot)
	+ *prot = mmnext->prot;
	+ if (flags)
	+ *flags = mmnext->flags;
	+ return (0);
	+ } else {
	+ return (ENOENT);
	+ }
	+}
	+
	+static void
	+vm_free_memmap(struct vm *vm, int ident)
	+{
	+ struct mem_map *mm;
	+ int error __diagused;
	+
	+ mm = &vm->mem_maps[ident];
	+ if (mm->len) {
	+ error = vm_map_remove(&vm->vmspace->vm_map, mm->gpa,
	+ mm->gpa + mm->len);
	+ KASSERT(error == KERN_SUCCESS, ("%s: vm_map_remove error %d",
	+ __func__, error));
	+ bzero(mm, sizeof(struct mem_map));
	+ }
	+}
	+
	+static __inline bool
	+sysmem_mapping(struct vm vm, struct mem_map mm)
	+{
	+
	+ if (mm->len != 0 && vm->mem_segs[mm->segid].sysmem)
	+ return (true);
	+ else
	+ return (false);
	+}
	+
	+vm_paddr_t
	+vmm_sysmem_maxaddr(struct vm *vm)
	+{
	+ struct mem_map *mm;
	+ vm_paddr_t maxaddr;
	+ int i;
	+
	+ maxaddr = 0;
	+ for (i = 0; i < VM_MAX_MEMMAPS; i++) {
	+ mm = &vm->mem_maps[i];
	+ if (sysmem_mapping(vm, mm)) {
	+ if (maxaddr < mm->gpa + mm->len)
	+ maxaddr = mm->gpa + mm->len;
	+ }
	+ }
	+ return (maxaddr);
	+}
	+
	+static int
	+vmm_reg_raz(void vm, int vcpuid, uint64_t rval, void *arg)
	+{
	+ *rval = 0;
	+ return (0);
	+}
	+
	+static int
	+vmm_reg_read_arg(void vm, int vcpuid, uint64_t rval, void *arg)
	+{
	+ rval = (uint64_t *)arg;
	+ return (0);
	+}
	+
	+static int
	+vmm_reg_wi(void vm, int vcpuid, uint64_t wval, void arg)
	+{
	+ return (0);
	+}
	+
	+
	+#include <sys/queue.h>
	+#include <sys/linker.h>
	+
	+static struct {
	+ uint32_t esr_iss;
	+ uint32_t esr_mask;
	+ reg_read_t reg_read;
	+ reg_write_t reg_write;
	+ void *arg;
	+} vmm_special_regs[] = {
	+#define SPECIAL_REG(_reg, _read, _write) \
	+ { \
	+ .esr_iss = ((_reg ## _op0) << ISS_MSR_OP0_SHIFT) \| \
	+ ((_reg ## _op1) << ISS_MSR_OP1_SHIFT) \| \
	+ ((_reg ## _CRn) << ISS_MSR_CRn_SHIFT) \| \
	+ ((_reg ## _CRm) << ISS_MSR_CRm_SHIFT) \| \
	+ ((_reg ## _op2) << ISS_MSR_OP2_SHIFT), \
	+ .esr_mask = ISS_MSR_REG_MASK, \
	+ .reg_read = (_read), \
	+ .reg_write = (_write), \
	+ .arg = NULL, \
	+ }
	+#define ID_SPECIAL_REG(_reg, _name) \
	+ { \
	+ .esr_iss = ((_reg ## _op0) << ISS_MSR_OP0_SHIFT) \| \
	+ ((_reg ## _op1) << ISS_MSR_OP1_SHIFT) \| \
	+ ((_reg ## _CRn) << ISS_MSR_CRn_SHIFT) \| \
	+ ((_reg ## _CRm) << ISS_MSR_CRm_SHIFT) \| \
	+ ((_reg ## _op2) << ISS_MSR_OP2_SHIFT), \
	+ .esr_mask = ISS_MSR_REG_MASK, \
	+ .reg_read = vmm_reg_read_arg, \
	+ .reg_write = vmm_reg_wi, \
	+ .arg = &(vmm_desc._name), \
	+ }
	+
	+ /* ID registers */
	+ ID_SPECIAL_REG(ID_AA64PFR0_EL1, id_aa64pfr0),
	+ ID_SPECIAL_REG(ID_AA64PFR1_EL1, id_aa64pfr1),
	+
	+ ID_SPECIAL_REG(ID_AA64DFR0_EL1, id_aa64dfr0),
	+ ID_SPECIAL_REG(ID_AA64DFR1_EL1, id_aa64dfr1),
	+
	+ ID_SPECIAL_REG(ID_AA64ISAR0_EL1, id_aa64isar0),
	+ ID_SPECIAL_REG(ID_AA64ISAR1_EL1, id_aa64isar1),
	+
	+ ID_SPECIAL_REG(ID_AA64MMFR0_EL1, id_aa64mmfr0),
	+ ID_SPECIAL_REG(ID_AA64MMFR1_EL1, id_aa64mmfr1),
	+
	+ /*
	+ * All other ID registers are read as zero.
	+ * They are all in the op0=3, op1=0, CRn=0, CRm={0..7} space.
	+ */
	+ {
	+ .esr_iss = (3 << ISS_MSR_OP0_SHIFT) \|
	+ (0 << ISS_MSR_OP1_SHIFT) \|
	+ (0 << ISS_MSR_CRn_SHIFT) \|
	+ (0 << ISS_MSR_CRm_SHIFT),
	+ .esr_mask = ISS_MSR_OP0_MASK \| ISS_MSR_OP1_MASK \|
	+ ISS_MSR_CRn_MASK \| (0x8 << ISS_MSR_CRm_SHIFT),
	+ .reg_read = vmm_reg_raz,
	+ .reg_write = vmm_reg_wi,
	+ .arg = NULL,
	+ },
	+
	+ /* Counter physical registers */
	+ SPECIAL_REG(CNTP_CTL_EL0, vtimer_phys_ctl_read, vtimer_phys_ctl_write),
	+ SPECIAL_REG(CNTP_CVAL_EL0, vtimer_phys_cval_read,
	+ vtimer_phys_cval_write),
	+ SPECIAL_REG(CNTP_TVAL_EL0, vtimer_phys_tval_read,
	+ vtimer_phys_tval_write),
	+ SPECIAL_REG(CNTPCT_EL0, vtimer_phys_cnt_read, vtimer_phys_cnt_write),
	+
	+ /* GICv3 registers */
	+ SPECIAL_REG(ICC_SGI1R_EL1, vgic_v3_icc_sgi1r_read,
	+ vgic_v3_icc_sgi1r_write),
	+#undef SPECIAL_REG
	+};
	+
	+static int
	+vm_handle_reg_emul(struct vm vm, int vcpuid, bool retu)
	+{
	+ struct vm_exit *vme;
	+ struct vre *vre;
	+ int i, rv;
	+
	+ vme = vm_exitinfo(vm, vcpuid);
	+ vre = &vme->u.reg_emul.vre;
	+
	+ for (i = 0; i < nitems(vmm_special_regs); i++) {
	+ if ((vre->inst_syndrome & vmm_special_regs[i].esr_mask) ==
	+ vmm_special_regs[i].esr_iss) {
	+ rv = vmm_emulate_register(vm, vcpuid, vre,
	+ vmm_special_regs[i].reg_read,
	+ vmm_special_regs[i].reg_write,
	+ vmm_special_regs[i].arg);
	+ if (rv == 0) {
	+ *retu = false;
	+ }
	+ return (rv);
	+ }
	+ }
	+
	+
	+ *retu = true;
	+ return (0);
	+}
	+
	+void
	+vm_register_inst_handler(struct vm *vm, uint64_t start, uint64_t size,
	+ mem_region_read_t mmio_read, mem_region_write_t mmio_write)
	+{
	+ int i;
	+
	+ for (i = 0; i < nitems(vm->mmio_region); i++) {
	+ if (vm->mmio_region[i].start == 0 &&
	+ vm->mmio_region[i].end == 0) {
	+ vm->mmio_region[i].start = start;
	+ vm->mmio_region[i].end = start + size;
	+ vm->mmio_region[i].read = mmio_read;
	+ vm->mmio_region[i].write = mmio_write;
	+ return;
	+ }
	+ }
	+
	+ panic("%s: No free MMIO region", __func__);
	+}
	+
	+void
	+vm_deregister_inst_handler(struct vm *vm, uint64_t start, uint64_t size)
	+{
	+ int i;
	+
	+ for (i = 0; i < nitems(vm->mmio_region); i++) {
	+ if (vm->mmio_region[i].start == start &&
	+ vm->mmio_region[i].end == start + size) {
	+ memset(&vm->mmio_region[i], 0,
	+ sizeof(vm->mmio_region[i]));
	+ return;
	+ }
	+ }
	+
	+ panic("%s: Invalid MMIO region: %lx - %lx", __func__, start,
	+ start + size);
	+}
	+
	+static int
	+vm_handle_inst_emul(struct vm vm, int vcpuid, bool retu)
	+{
	+ struct vm_exit *vme;
	+ struct vie *vie;
	+ struct hyp *hyp = vm->cookie;
	+ uint64_t fault_ipa;
	+ struct vm_guest_paging *paging;
	+ struct vmm_mmio_region *vmr;
	+ int error, i;
	+
	+ if (!hyp->vgic_attached)
	+ goto out_user;
	+
	+ vme = vm_exitinfo(vm, vcpuid);
	+ vie = &vme->u.inst_emul.vie;
	+ paging = &vme->u.inst_emul.paging;
	+
	+ fault_ipa = vme->u.inst_emul.gpa;
	+
	+ vmr = NULL;
	+ for (i = 0; i < nitems(vm->mmio_region); i++) {
	+ if (vm->mmio_region[i].start <= fault_ipa &&
	+ vm->mmio_region[i].end > fault_ipa) {
	+ vmr = &vm->mmio_region[i];
	+ break;
	+ }
	+ }
	+ if (vmr == NULL)
	+ goto out_user;
	+
	+ error = vmm_emulate_instruction(vm, vcpuid, fault_ipa, vie,
	+ paging, vmr->read, vmr->write, retu);
	+ return (error);
	+
	+out_user:
	+ *retu = true;
	+ return (0);
	+}
	+
	+int
	+vm_suspend(struct vm *vm, enum vm_suspend_how how)
	+{
	+ int i;
	+
	+ if (how <= VM_SUSPEND_NONE \|\| how >= VM_SUSPEND_LAST)
	+ return (EINVAL);
	+
	+ if (atomic_cmpset_int(&vm->suspend, 0, how) == 0) {
	+ VM_CTR2(vm, "virtual machine already suspended %d/%d",
	+ vm->suspend, how);
	+ return (EALREADY);
	+ }
	+
	+ VM_CTR1(vm, "virtual machine successfully suspended %d", how);
	+
	+ /*
	+ * Notify all active vcpus that they are now suspended.
	+ */
	+ for (i = 0; i < vm->maxcpus; i++) {
	+ if (CPU_ISSET(i, &vm->active_cpus))
	+ vcpu_notify_event(vm, i, false);
	+ }
	+
	+ return (0);
	+}
	+
	+void
	+vm_exit_suspended(struct vm *vm, int vcpuid, uint64_t pc)
	+{
	+ struct vm_exit *vmexit;
	+
	+ KASSERT(vm->suspend > VM_SUSPEND_NONE && vm->suspend < VM_SUSPEND_LAST,
	+ ("vm_exit_suspended: invalid suspend type %d", vm->suspend));
	+
	+ vmexit = vm_exitinfo(vm, vcpuid);
	+ vmexit->pc = pc;
	+ vmexit->inst_length = 4;
	+ vmexit->exitcode = VM_EXITCODE_SUSPENDED;
	+ vmexit->u.suspended.how = vm->suspend;
	+}
	+
	+int
	+vm_activate_cpu(struct vm *vm, int vcpuid)
	+{
	+
	+ if (vcpuid < 0 \|\| vcpuid >= vm->maxcpus)
	+ return (EINVAL);
	+
	+ if (CPU_ISSET(vcpuid, &vm->active_cpus))
	+ return (EBUSY);
	+
	+ CPU_SET_ATOMIC(vcpuid, &vm->active_cpus);
	+ return (0);
	+
	+}
	+
	+int
	+vm_suspend_cpu(struct vm *vm, int vcpuid)
	+{
	+ int i;
	+
	+ if (vcpuid < -1 \|\| vcpuid >= vm->maxcpus)
	+ return (EINVAL);
	+
	+ if (vcpuid == -1) {
	+ vm->debug_cpus = vm->active_cpus;
	+ for (i = 0; i < vm->maxcpus; i++) {
	+ if (CPU_ISSET(i, &vm->active_cpus))
	+ vcpu_notify_event(vm, i, false);
	+ }
	+ } else {
	+ if (!CPU_ISSET(vcpuid, &vm->active_cpus))
	+ return (EINVAL);
	+
	+ CPU_SET_ATOMIC(vcpuid, &vm->debug_cpus);
	+ vcpu_notify_event(vm, vcpuid, false);
	+ }
	+ return (0);
	+}
	+
	+int
	+vm_resume_cpu(struct vm *vm, int vcpuid)
	+{
	+
	+ if (vcpuid < -1 \|\| vcpuid >= vm->maxcpus)
	+ return (EINVAL);
	+
	+ if (vcpuid == -1) {
	+ CPU_ZERO(&vm->debug_cpus);
	+ } else {
	+ if (!CPU_ISSET(vcpuid, &vm->debug_cpus))
	+ return (EINVAL);
	+
	+ CPU_CLR_ATOMIC(vcpuid, &vm->debug_cpus);
	+ }
	+ return (0);
	+}
	+
	+
	+cpuset_t
	+vm_active_cpus(struct vm *vm)
	+{
	+
	+ return (vm->active_cpus);
	+}
	+
	+cpuset_t
	+vm_debug_cpus(struct vm *vm)
	+{
	+
	+ return (vm->debug_cpus);
	+}
	+
	+cpuset_t
	+vm_suspended_cpus(struct vm *vm)
	+{
	+
	+ return (vm->suspended_cpus);
	+}
	+
	+
	+void *
	+vcpu_stats(struct vm *vm, int vcpuid)
	+{
	+
	+ return (vm->vcpu[vcpuid].stats);
	+}
	+
	+/*
	+ * This function is called to ensure that a vcpu "sees" a pending event
	+ * as soon as possible:
	+ * - If the vcpu thread is sleeping then it is woken up.
	+ * - If the vcpu is running on a different host_cpu then an IPI will be directed
	+ * to the host_cpu to cause the vcpu to trap into the hypervisor.
	+ */
	+static void
	+vcpu_notify_event_locked(struct vcpu *vcpu, bool lapic_intr)
	+{
	+ int hostcpu;
	+
	+ KASSERT(lapic_intr == false, ("%s: lapic_intr != false", __func__));
	+ hostcpu = vcpu->hostcpu;
	+ if (vcpu->state == VCPU_RUNNING) {
	+ KASSERT(hostcpu != NOCPU, ("vcpu running on invalid hostcpu"));
	+ if (hostcpu != curcpu) {
	+#if 0
	+ if (lapic_intr) {
	+ vlapic_post_intr(vcpu->vlapic, hostcpu,
	+ vmm_ipinum);
	+ } else
	+#endif
	+ {
	+ ipi_cpu(hostcpu, vmm_ipinum);
	+ }
	+ } else {
	+ /*
	+ * If the 'vcpu' is running on 'curcpu' then it must
	+ * be sending a notification to itself (e.g. SELF_IPI).
	+ * The pending event will be picked up when the vcpu
	+ * transitions back to guest context.
	+ */
	+ }
	+ } else {
	+ KASSERT(hostcpu == NOCPU, ("vcpu state %d not consistent "
	+ "with hostcpu %d", vcpu->state, hostcpu));
	+ if (vcpu->state == VCPU_SLEEPING)
	+ wakeup_one(vcpu);
	+ }
	+}
	+
	+void
	+vcpu_notify_event(struct vm *vm, int vcpuid, bool lapic_intr)
	+{
	+ struct vcpu *vcpu = &vm->vcpu[vcpuid];
	+
	+ vcpu_lock(vcpu);
	+ vcpu_notify_event_locked(vcpu, lapic_intr);
	+ vcpu_unlock(vcpu);
	+}
	+
	+static int
	+vcpu_set_state_locked(struct vm *vm, int vcpuid, enum vcpu_state newstate,
	+ bool from_idle)
	+{
	+ struct vcpu *vcpu;
	+ int error;
	+
	+ vcpu = &vm->vcpu[vcpuid];
	+ vcpu_assert_locked(vcpu);
	+
	+ /*
	+ * State transitions from the vmmdev_ioctl() must always begin from
	+ * the VCPU_IDLE state. This guarantees that there is only a single
	+ * ioctl() operating on a vcpu at any point.
	+ */
	+ if (from_idle) {
	+ while (vcpu->state != VCPU_IDLE) {
	+ vcpu_notify_event_locked(vcpu, false);
	+ msleep_spin(&vcpu->state, &vcpu->mtx, "vmstat", hz);
	+ }
	+ } else {
	+ KASSERT(vcpu->state != VCPU_IDLE, ("invalid transition from "
	+ "vcpu idle state"));
	+ }
	+
	+ if (vcpu->state == VCPU_RUNNING) {
	+ KASSERT(vcpu->hostcpu == curcpu, ("curcpu %d and hostcpu %d "
	+ "mismatch for running vcpu", curcpu, vcpu->hostcpu));
	+ } else {
	+ KASSERT(vcpu->hostcpu == NOCPU, ("Invalid hostcpu %d for a "
	+ "vcpu that is not running", vcpu->hostcpu));
	+ }
	+
	+ /*
	+ * The following state transitions are allowed:
	+ * IDLE -> FROZEN -> IDLE
	+ * FROZEN -> RUNNING -> FROZEN
	+ * FROZEN -> SLEEPING -> FROZEN
	+ */
	+ switch (vcpu->state) {
	+ case VCPU_IDLE:
	+ case VCPU_RUNNING:
	+ case VCPU_SLEEPING:
	+ error = (newstate != VCPU_FROZEN);
	+ break;
	+ case VCPU_FROZEN:
	+ error = (newstate == VCPU_FROZEN);
	+ break;
	+ default:
	+ error = 1;
	+ break;
	+ }
	+
	+ if (error)
	+ return (EBUSY);
	+
	+ vcpu->state = newstate;
	+ if (newstate == VCPU_RUNNING)
	+ vcpu->hostcpu = curcpu;
	+ else
	+ vcpu->hostcpu = NOCPU;
	+
	+ if (newstate == VCPU_IDLE)
	+ wakeup(&vcpu->state);
	+
	+ return (0);
	+}
	+
	+static void
	+vcpu_require_state(struct vm *vm, int vcpuid, enum vcpu_state newstate)
	+{
	+ int error;
	+
	+ if ((error = vcpu_set_state(vm, vcpuid, newstate, false)) != 0)
	+ panic("Error %d setting state to %d\n", error, newstate);
	+}
	+
	+static void
	+vcpu_require_state_locked(struct vm *vm, int vcpuid, enum vcpu_state newstate)
	+{
	+ int error;
	+
	+ if ((error = vcpu_set_state_locked(vm, vcpuid, newstate, false)) != 0)
	+ panic("Error %d setting state to %d", error, newstate);
	+}
	+
	+int
	+vm_get_capability(struct vm vm, int vcpu, int type, int retval)
	+{
	+ if (vcpu < 0 \|\| vcpu >= vm->maxcpus)
	+ return (EINVAL);
	+
	+ if (type < 0 \|\| type >= VM_CAP_MAX)
	+ return (EINVAL);
	+
	+ return (VMGETCAP(vm->cookie, vcpu, type, retval));
	+}
	+
	+int
	+vm_set_capability(struct vm *vm, int vcpu, int type, int val)
	+{
	+ if (vcpu < 0 \|\| vcpu >= vm->maxcpus)
	+ return (EINVAL);
	+
	+ if (type < 0 \|\| type >= VM_CAP_MAX)
	+ return (EINVAL);
	+
	+ return (VMSETCAP(vm->cookie, vcpu, type, val));
	+}
	+
	+int
	+vcpu_set_state(struct vm *vm, int vcpuid, enum vcpu_state newstate,
	+ bool from_idle)
	+{
	+ int error;
	+ struct vcpu *vcpu;
	+
	+ if (vcpuid < 0 \|\| vcpuid >= vm->maxcpus)
	+ panic("vm_set_run_state: invalid vcpuid %d", vcpuid);
	+
	+ vcpu = &vm->vcpu[vcpuid];
	+
	+ vcpu_lock(vcpu);
	+ error = vcpu_set_state_locked(vm, vcpuid, newstate, from_idle);
	+ vcpu_unlock(vcpu);
	+
	+ return (error);
	+}
	+
	+enum vcpu_state
	+vcpu_get_state(struct vm vm, int vcpuid, int hostcpu)
	+{
	+ struct vcpu *vcpu;
	+ enum vcpu_state state;
	+
	+ if (vcpuid < 0 \|\| vcpuid >= vm->maxcpus)
	+ panic("vm_get_run_state: invalid vcpuid %d", vcpuid);
	+
	+ vcpu = &vm->vcpu[vcpuid];
	+
	+ vcpu_lock(vcpu);
	+ state = vcpu->state;
	+ if (hostcpu != NULL)
	+ *hostcpu = vcpu->hostcpu;
	+ vcpu_unlock(vcpu);
	+
	+ return (state);
	+}
	+
	+void *
	+vm_gpa_hold(struct vm *vm, int vcpuid, vm_paddr_t gpa, size_t len, int reqprot,
	+ void **cookie)
	+{
	+ int i, count, pageoff;
	+ struct mem_map *mm;
	+ vm_page_t m;
	+#ifdef INVARIANTS
	+ /*
	+ * All vcpus are frozen by ioctls that modify the memory map
	+ * (e.g. VM_MMAP_MEMSEG). Therefore 'vm->memmap[]' stability is
	+ * guaranteed if at least one vcpu is in the VCPU_FROZEN state.
	+ */
	+ int state;
	+ KASSERT(vcpuid >= -1 && vcpuid < vm->maxcpus, ("%s: invalid vcpuid %d",
	+ __func__, vcpuid));
	+ for (i = 0; i < vm->maxcpus; i++) {
	+ if (vcpuid != -1 && vcpuid != i)
	+ continue;
	+ state = vcpu_get_state(vm, i, NULL);
	+ KASSERT(state == VCPU_FROZEN, ("%s: invalid vcpu state %d",
	+ __func__, state));
	+ }
	+#endif
	+ pageoff = gpa & PAGE_MASK;
	+ if (len > PAGE_SIZE - pageoff)
	+ panic("vm_gpa_hold: invalid gpa/len: 0x%016lx/%lu", gpa, len);
	+
	+ count = 0;
	+ for (i = 0; i < VM_MAX_MEMMAPS; i++) {
	+ mm = &vm->mem_maps[i];
	+ if (sysmem_mapping(vm, mm) && gpa >= mm->gpa &&
	+ gpa < mm->gpa + mm->len) {
	+ count = vm_fault_quick_hold_pages(&vm->vmspace->vm_map,
	+ trunc_page(gpa), PAGE_SIZE, reqprot, &m, 1);
	+ break;
	+ }
	+ }
	+
	+ if (count == 1) {
	+ *cookie = m;
	+ return ((void *)(PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)) + pageoff));
	+ } else {
	+ *cookie = NULL;
	+ return (NULL);
	+ }
	+}
	+
	+void
	+vm_gpa_release(void *cookie)
	+{
	+ vm_page_t m = cookie;
	+
	+ vm_page_unwire(m, PQ_ACTIVE);
	+}
	+
	+int
	+vm_get_register(struct vm vm, int vcpu, int reg, uint64_t retval)
	+{
	+
	+ if (vcpu < 0 \|\| vcpu >= vm->maxcpus)
	+ return (EINVAL);
	+
	+ if (reg >= VM_REG_LAST)
	+ return (EINVAL);
	+
	+ return (VMGETREG(vm->cookie, vcpu, reg, retval));
	+}
	+
	+int
	+vm_set_register(struct vm *vm, int vcpuid, int reg, uint64_t val)
	+{
	+ struct vcpu *vcpu;
	+ int error;
	+
	+ if (vcpuid < 0 \|\| vcpuid >= vm->maxcpus)
	+ return (EINVAL);
	+
	+ if (reg >= VM_REG_LAST)
	+ return (EINVAL);
	+ error = VMSETREG(vm->cookie, vcpuid, reg, val);
	+ if (error \|\| reg != VM_REG_ELR_EL2)
	+ return (error);
	+
	+ vcpu = &vm->vcpu[vcpuid];
	+ vcpu->nextpc = val;
	+
	+ return(0);
	+}
	+
	+void *
	+vm_get_cookie(struct vm *vm)
	+{
	+ return vm->cookie;
	+}
	+
	+int
	+vm_attach_vgic(struct vm *vm, uint64_t dist_start, size_t dist_size,
	+ uint64_t redist_start, size_t redist_size)
	+{
	+ int error;
	+
	+ error = vgic_v3_attach_to_vm(vm, dist_start, dist_size, redist_start,
	+ redist_size);
	+
	+ return (error);
	+}
	+
	+int
	+vm_assert_irq(struct vm *vm, uint32_t irq)
	+{
	+ struct hyp hyp = (struct hyp )vm->cookie;
	+ int error;
	+
	+ error = vgic_v3_inject_irq(hyp, -1, irq, true);
	+
	+ return (error);
	+}
	+
	+int
	+vm_deassert_irq(struct vm *vm, uint32_t irq)
	+{
	+ struct hyp hyp = (struct hyp )vm->cookie;
	+ int error;
	+
	+ error = vgic_v3_inject_irq(hyp, -1, irq, false);
	+
	+ return (error);
	+}
	+
	+int
	+vm_raise_msi(struct vm *vm, uint64_t msg, uint64_t addr, int bus, int slot,
	+ int func)
	+{
	+ struct hyp hyp = (struct hyp )vm->cookie;
	+ int error;
	+
	+ if (addr >= hyp->vgic_dist.start && addr < hyp->vgic_dist.end) {
	+ error = vgic_v3_inject_msi(hyp, msg, addr);
	+ if (error == 0)
	+ return (0);
	+ }
	+
	+ /* TODO: Should we raise an SError? */
	+ return (EINVAL);
	+}
	+
	+static int
	+vm_handle_wfi(struct vm vm, int vcpuid, struct vm_exit vme, bool *retu)
	+{
	+ struct hyp *hyp;
	+ struct vcpu *vcpu;
	+ struct hypctx *hypctx;
	+
	+ vcpu = &vm->vcpu[vcpuid];
	+ hyp = vm->cookie;
	+ hypctx = &hyp->ctx[vcpuid];
	+
	+ vcpu_lock(vcpu);
	+ while (1) {
	+ if (vgic_v3_vcpu_pending_irq(hypctx))
	+ break;
	+
	+ if (vcpu_should_yield(vm, vcpuid))
	+ break;
	+
	+ vcpu_require_state_locked(vm, vcpuid, VCPU_SLEEPING);
	+ /*
	+ * XXX msleep_spin() cannot be interrupted by signals so
	+ * wake up periodically to check pending signals.
	+ */
	+ msleep_spin(vcpu, &vcpu->mtx, "vmidle", hz);
	+ vcpu_require_state_locked(vm, vcpuid, VCPU_FROZEN);
	+ }
	+ vcpu_unlock(vcpu);
	+
	+ *retu = false;
	+ return (0);
	+}
	+
	+static int
	+vm_handle_paging(struct vm vm, int vcpuid, bool retu)
	+{
	+ struct vm_exit *vme;
	+ struct vm_map *map;
	+ uint64_t addr, esr;
	+ pmap_t pmap;
	+ int ftype, rv;
	+
	+ vme = vm_exitinfo(vm, vcpuid);
	+ pmap = vmspace_pmap(vm->vmspace);
	+ addr = vme->u.paging.gpa;
	+ esr = vme->u.paging.esr;
	+
	+ /* The page exists, but the page table needs to be upddated */
	+ if (pmap_fault(pmap, esr, addr) == KERN_SUCCESS)
	+ return (0);
	+
	+ switch (ESR_ELx_EXCEPTION(esr)) {
	+ case EXCP_INSN_ABORT_L:
	+ case EXCP_DATA_ABORT_L:
	+ ftype = VM_PROT_EXECUTE \| VM_PROT_READ \| VM_PROT_WRITE;
	+ break;
	+ default:
	+ panic("%s: Invalid exception (esr = %lx)", __func__, esr);
	+ }
	+
	+ map = &vm->vmspace->vm_map;
	+ rv = vm_fault(map, vme->u.paging.gpa, ftype, VM_FAULT_NORMAL, NULL);
	+ if (rv != KERN_SUCCESS)
	+ return (EFAULT);
	+
	+ return (0);
	+}
	+
	+int
	+vm_run(struct vm vm, struct vm_run vmrun)
	+{
	+ struct vm_eventinfo evinfo;
	+ int error, vcpuid;
	+ struct vcpu *vcpu;
	+ struct vm_exit *vme;
	+ bool retu;
	+ pmap_t pmap;
	+
	+ vcpuid = vmrun->cpuid;
	+
	+ if (vcpuid < 0 \|\| vcpuid >= vm->maxcpus)
	+ return (EINVAL);
	+
	+ if (!CPU_ISSET(vcpuid, &vm->active_cpus))
	+ return (EINVAL);
	+
	+ if (CPU_ISSET(vcpuid, &vm->suspended_cpus))
	+ return (EINVAL);
	+
	+ pmap = vmspace_pmap(vm->vmspace);
	+ vcpu = &vm->vcpu[vcpuid];
	+ evinfo.rptr = NULL;
	+ evinfo.sptr = &vm->suspend;
	+ evinfo.iptr = NULL;
	+restart:
	+ critical_enter();
	+ vcpu_require_state(vm, vcpuid, VCPU_RUNNING);
	+ error = VMRUN(vm->cookie, vcpuid, vcpu->nextpc, pmap, &evinfo);
	+ vcpu_require_state(vm, vcpuid, VCPU_FROZEN);
	+ critical_exit();
	+
	+ vme = vm_exitinfo(vm, vcpuid);
	+ if (error == 0) {
	+ retu = false;
	+ switch (vme->exitcode) {
	+ case VM_EXITCODE_INST_EMUL:
	+ vcpu->nextpc = vme->pc + vme->inst_length;
	+ error = vm_handle_inst_emul(vm, vcpuid, &retu);
	+ break;
	+
	+ case VM_EXITCODE_REG_EMUL:
	+ vcpu->nextpc = vme->pc + vme->inst_length;
	+ error = vm_handle_reg_emul(vm, vcpuid, &retu);
	+ break;
	+
	+ case VM_EXITCODE_HVC:
	+ /*
	+ * The HVC instruction saves the address for the
	+ * next instruction as the return address.
	+ */
	+ vcpu->nextpc = vme->pc;
	+ /*
	+ * The PSCI call can change the exit information in the
	+ * case of suspend/reset/poweroff/cpu off/cpu on.
	+ */
	+ error = psci_handle_call(vm, vcpuid, vme, &retu);
	+ break;
	+
	+ case VM_EXITCODE_WFI:
	+ vcpu->nextpc = vme->pc + vme->inst_length;
	+ error = vm_handle_wfi(vm, vcpuid, vme, &retu);
	+ break;
	+
	+ case VM_EXITCODE_PAGING:
	+ vcpu->nextpc = vme->pc;
	+ error = vm_handle_paging(vm, vcpuid, &retu);
	+ break;
	+
	+ default:
	+ /* Handle in userland */
	+ vcpu->nextpc = vme->pc;
	+ retu = true;
	+ break;
	+ }
	+ }
	+
	+ if (error == 0 && retu == false)
	+ goto restart;
	+
	+ /* Copy the exit information */
	+ bcopy(vme, &vmrun->vm_exit, sizeof(struct vm_exit));
	+
	+ return (error);
	+}
	diff --git a/sys/arm64/vmm/vmm_arm64.c b/sys/arm64/vmm/vmm_arm64.c
	new file mode 100644
	--- /dev/null
	+++ b/sys/arm64/vmm/vmm_arm64.c
	@@ -0,0 +1,1076 @@
	+/*
	+ * Copyright (C) 2015 Mihai Carabas <mihai.carabas@gmail.com>
	+ * All rights reserved.
	+ *
	+ * Redistribution and use in source and binary forms, with or without
	+ * modification, are permitted provided that the following conditions
	+ * are met:
	+ * 1. Redistributions of source code must retain the above copyright
	+ * notice, this list of conditions and the following disclaimer.
	+ * 2. Redistributions in binary form must reproduce the above copyright
	+ * notice, this list of conditions and the following disclaimer in the
	+ * documentation and/or other materials provided with the distribution.
	+ *
	+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
	+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
	+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	+ * SUCH DAMAGE.
	+ */
	+
	+#include <sys/cdefs.h>
	+#include <sys/param.h>
	+#include <sys/systm.h>
	+#include <sys/smp.h>
	+#include <sys/kernel.h>
	+#include <sys/malloc.h>
	+#include <sys/pcpu.h>
	+#include <sys/proc.h>
	+#include <sys/sysctl.h>
	+#include <sys/lock.h>
	+#include <sys/mutex.h>
	+#include <sys/vmem.h>
	+
	+#include <vm/vm.h>
	+#include <vm/pmap.h>
	+#include <vm/vm_extern.h>
	+#include <vm/vm_map.h>
	+#include <vm/vm_page.h>
	+#include <vm/vm_param.h>
	+
	+#include <machine/armreg.h>
	+#include <machine/vm.h>
	+#include <machine/cpufunc.h>
	+#include <machine/cpu.h>
	+#include <machine/machdep.h>
	+#include <machine/vmm.h>
	+#include <machine/vmm_dev.h>
	+#include <machine/atomic.h>
	+#include <machine/hypervisor.h>
	+#include <machine/pmap.h>
	+
	+#include "mmu.h"
	+#include "arm64.h"
	+#include "hyp.h"
	+#include "reset.h"
	+#include "io/vgic_v3.h"
	+#include "io/vtimer.h"
	+
	+#define HANDLED 1
	+#define UNHANDLED 0
	+
	+#define UNUSED 0
	+
	+/* Number of bits in an EL2 virtual address */
	+#define EL2_VIRT_BITS 48
	+CTASSERT((1ul << EL2_VIRT_BITS) >= HYP_VM_MAX_ADDRESS);
	+
	+/* TODO: Move the host hypctx off the stack */
	+#define VMM_STACK_PAGES 4
	+#define VMM_STACK_SIZE (VMM_STACK_PAGES * PAGE_SIZE)
	+
	+static int vmm_pmap_levels, vmm_virt_bits;
	+
	+/* Register values passed to arm_setup_vectors to set in the hypervisor */
	+struct vmm_init_regs {
	+ uint64_t tcr_el2;
	+ uint64_t vtcr_el2;
	+};
	+
	+MALLOC_DEFINE(M_HYP, "ARM VMM HYP", "ARM VMM HYP");
	+
	+extern char hyp_init_vectors[];
	+extern char hyp_vectors[];
	+extern char hyp_stub_vectors[];
	+
	+static vm_paddr_t hyp_code_base;
	+static size_t hyp_code_len;
	+
	+static char *stack[MAXCPU];
	+static vm_offset_t stack_hyp_va[MAXCPU];
	+
	+static vmem_t *el2_mem_alloc;
	+
	+static void arm_setup_vectors(void *arg);
	+static void vmm_pmap_clean_stage2_tlbi(void);
	+static void vmm_pmap_invalidate_range(uint64_t, vm_offset_t, vm_offset_t, bool);
	+static void vmm_pmap_invalidate_all(uint64_t);
	+
	+static inline void
	+arm64_set_active_vcpu(struct hypctx *hypctx)
	+{
	+
	+ PCPU_SET(vcpu, hypctx);
	+}
	+
	+static void
	+arm_setup_vectors(void *arg)
	+{
	+ struct vmm_init_regs *el2_regs;
	+ char *stack_top;
	+ uint32_t sctlr_el2;
	+ register_t daif;
	+
	+ el2_regs = arg;
	+ arm64_set_active_vcpu(NULL);
	+
	+ daif = intr_disable();
	+
	+ /*
	+ * Install the temporary vectors which will be responsible for
	+ * initializing the VMM when we next trap into EL2.
	+ *
	+ * x0: the exception vector table responsible for hypervisor
	+ * initialization on the next call.
	+ */
	+ vmm_call_hyp(vtophys(&vmm_hyp_code));
	+
	+ /* Create and map the hypervisor stack */
	+ stack_top = (char *)stack_hyp_va[PCPU_GET(cpuid)] + VMM_STACK_SIZE;
	+
	+ /*
	+ * Configure the system control register for EL2:
	+ *
	+ * SCTLR_EL2_M: MMU on
	+ * SCTLR_EL2_C: Data cacheability not affected
	+ * SCTLR_EL2_I: Instruction cacheability not affected
	+ * SCTLR_EL2_A: Instruction alignment check
	+ * SCTLR_EL2_SA: Stack pointer alignment check
	+ * SCTLR_EL2_WXN: Treat writable memory as execute never
	+ * ~SCTLR_EL2_EE: Data accesses are little-endian
	+ */
	+ sctlr_el2 = SCTLR_EL2_RES1;
	+ sctlr_el2 \|= SCTLR_EL2_M \| SCTLR_EL2_C \| SCTLR_EL2_I;
	+ sctlr_el2 \|= SCTLR_EL2_A \| SCTLR_EL2_SA;
	+ sctlr_el2 \|= SCTLR_EL2_WXN;
	+ sctlr_el2 &= ~SCTLR_EL2_EE;
	+
	+ /* Special call to initialize EL2 */
	+ vmm_call_hyp(vmmpmap_to_ttbr0(), stack_top, el2_regs->tcr_el2,
	+ sctlr_el2, el2_regs->vtcr_el2);
	+
	+ intr_restore(daif);
	+}
	+
	+static void
	+arm_teardown_vectors(void *arg)
	+{
	+ register_t daif;
	+
	+ /*
	+ * vmm_cleanup() will disable the MMU. For the next few instructions,
	+ * before the hardware disables the MMU, one of the following is
	+ * possible:
	+ *
	+ * a. The instruction addresses are fetched with the MMU disabled,
	+ * and they must represent the actual physical addresses. This will work
	+ * because we call the vmm_cleanup() function by its physical address.
	+ *
	+ * b. The instruction addresses are fetched using the old translation
	+ * tables. This will work because we have an identity mapping in place
	+ * in the translation tables and vmm_cleanup() is called by its physical
	+ * address.
	+ */
	+ daif = intr_disable();
	+ /* TODO: Invalidate the cache */
	+ vmm_call_hyp(HYP_CLEANUP, vtophys(hyp_stub_vectors));
	+ intr_restore(daif);
	+
	+ arm64_set_active_vcpu(NULL);
	+}
	+
	+static uint64_t
	+vmm_vtcr_el2_sl(u_int levels)
	+{
	+#if PAGE_SIZE == PAGE_SIZE_4K
	+ switch(levels) {
	+ case 2:
	+ return (VTCR_EL2_SL0_4K_LVL2);
	+ case 3:
	+ return (VTCR_EL2_SL0_4K_LVL1);
	+ case 4:
	+ return (VTCR_EL2_SL0_4K_LVL0);
	+ default:
	+ panic("%s: Invalid number of page table levels %u", __func__,
	+ levels);
	+ }
	+#elif PAGE_SIZE == PAGE_SIZE_16K
	+ switch(levels) {
	+ case 2:
	+ return (VTCR_EL2_SL0_16K_LVL2);
	+ case 3:
	+ return (VTCR_EL2_SL0_16K_LVL1);
	+ case 4:
	+ return (VTCR_EL2_SL0_16K_LVL0);
	+ default:
	+ panic("%s: Invalid number of page table levels %u", __func__,
	+ levels);
	+ }
	+#else
	+#error Unsupported page size
	+#endif
	+}
	+
	+static int
	+arm_init(int ipinum)
	+{
	+ struct vmm_init_regs el2_regs;
	+ vm_offset_t next_hyp_va;
	+ vm_paddr_t vmm_base;
	+ uint64_t id_aa64mmfr0_el1, pa_range_bits, pa_range_field;
	+ uint64_t ich_vtr_el2;
	+ uint64_t cnthctl_el2;
	+ register_t daif;
	+ int cpu, i;
	+ bool rv __diagused;
	+
	+ if (!virt_enabled()) {
	+ printf("arm_init: Processor doesn't have support for virtualization.\n");
	+ return (ENXIO);
	+ }
	+
	+ if (!vgic_present()) {
	+ printf("arm_init: No GICv3 found\n");
	+ return (ENODEV);
	+ }
	+
	+ if (!get_kernel_reg(ID_AA64MMFR0_EL1, &id_aa64mmfr0_el1)) {
	+ printf("arm_init: Unable to read ID_AA64MMFR0_EL1\n");
	+ return (ENXIO);
	+ }
	+ pa_range_field = ID_AA64MMFR0_PARange_VAL(id_aa64mmfr0_el1);
	+ /*
	+ * Use 3 levels to give us up to 39 bits with 4k pages, or
	+ * 47 bits with 16k pages.
	+ */
	+ /* TODO: Check the number of levels for 64k pages */
	+ vmm_pmap_levels = 3;
	+ switch (pa_range_field) {
	+ case ID_AA64MMFR0_PARange_4G:
	+ printf("arm_init: Not enough physical address bits\n");
	+ return (ENXIO);
	+ case ID_AA64MMFR0_PARange_64G:
	+ vmm_virt_bits = 36;
	+#if PAGE_SIZE == PAGE_SIZE_16K
	+ /* TODO: Test */
	+ vmm_pmap_levels = 2;
	+#endif
	+ break;
	+ default:
	+ vmm_virt_bits = 39;
	+ break;
	+ }
	+ pa_range_bits = pa_range_field >> ID_AA64MMFR0_PARange_SHIFT;
	+
	+ /* Initialise the EL2 MMU */
	+ if (!vmmpmap_init()) {
	+ printf("arm_init: Failed to init the EL2 MMU\n");
	+ return (ENOMEM);
	+ }
	+
	+ /* Set up the stage 2 pmap callbacks */
	+ MPASS(pmap_clean_stage2_tlbi == NULL);
	+ pmap_clean_stage2_tlbi = vmm_pmap_clean_stage2_tlbi;
	+ pmap_stage2_invalidate_range = vmm_pmap_invalidate_range;
	+ pmap_stage2_invalidate_all = vmm_pmap_invalidate_all;
	+
	+ /* Create the vmem allocator */
	+ el2_mem_alloc = vmem_create("VMM EL2", 0, 0, PAGE_SIZE, 0, M_WAITOK);
	+
	+ /* Create the mappings for the hypervisor translation table. */
	+ hyp_code_len = roundup2(&vmm_hyp_code_end - &vmm_hyp_code, PAGE_SIZE);
	+
	+ /* We need an physical identity mapping for when we activate the MMU */
	+ hyp_code_base = vmm_base = vtophys(&vmm_hyp_code);
	+ rv = vmmpmap_enter(vmm_base, hyp_code_len, vtophys(&vmm_hyp_code),
	+ VM_PROT_READ \| VM_PROT_EXECUTE);
	+ MPASS(rv);
	+
	+ next_hyp_va = roundup2(vtophys(&vmm_hyp_code) + hyp_code_len, L2_SIZE);
	+
	+ /* Create a per-CPU hypervisor stack */
	+ CPU_FOREACH(cpu) {
	+ stack[cpu] = malloc(VMM_STACK_SIZE, M_HYP, M_WAITOK \| M_ZERO);
	+ stack_hyp_va[cpu] = next_hyp_va;
	+
	+ for (i = 0; i < VMM_STACK_PAGES; i++) {
	+ rv = vmmpmap_enter(stack_hyp_va[cpu] + (i * PAGE_SIZE),
	+ PAGE_SIZE, vtophys(stack[cpu] + (i * PAGE_SIZE)),
	+ VM_PROT_READ \| VM_PROT_WRITE);
	+ MPASS(rv);
	+ }
	+ next_hyp_va += L2_SIZE;
	+ }
	+
	+ el2_regs.tcr_el2 = TCR_EL2_RES1;
	+ el2_regs.tcr_el2 \|= min(pa_range_bits << TCR_EL2_PS_SHIFT,
	+ TCR_EL2_PS_52BITS);
	+ el2_regs.tcr_el2 \|= TCR_EL2_T0SZ(64 - EL2_VIRT_BITS);
	+ el2_regs.tcr_el2 \|= TCR_EL2_IRGN0_WBWA \| TCR_EL2_ORGN0_WBWA;
	+#if PAGE_SIZE == PAGE_SIZE_4K
	+ el2_regs.tcr_el2 \|= TCR_EL2_TG0_4K;
	+#elif PAGE_SIZE == PAGE_SIZE_16K
	+ el2_regs.tcr_el2 \|= TCR_EL2_TG0_16K;
	+#else
	+#error Unsupported page size
	+#endif
	+#ifdef SMP
	+ el2_regs.tcr_el2 \|= TCR_EL2_SH0_IS;
	+#endif
	+
	+ /*
	+ * Configure the Stage 2 translation control register:
	+ *
	+ * VTCR_IRGN0_WBWA: Translation table walks access inner cacheable
	+ * normal memory
	+ * VTCR_ORGN0_WBWA: Translation table walks access outer cacheable
	+ * normal memory
	+ * VTCR_EL2_TG0_4K/16K: Stage 2 uses the same page size as the kernel
	+ * VTCR_EL2_SL0_4K_LVL1: Stage 2 uses concatenated level 1 tables
	+ * VTCR_EL2_SH0_IS: Memory associated with Stage 2 walks is inner
	+ * shareable
	+ */
	+ el2_regs.vtcr_el2 = VTCR_EL2_RES1;
	+ el2_regs.vtcr_el2 \|=
	+ min(pa_range_bits << VTCR_EL2_PS_SHIFT, VTCR_EL2_PS_48BIT);
	+ el2_regs.vtcr_el2 \|= VTCR_EL2_IRGN0_WBWA \| VTCR_EL2_ORGN0_WBWA;
	+ el2_regs.vtcr_el2 \|= VTCR_EL2_T0SZ(64 - vmm_virt_bits);
	+ el2_regs.vtcr_el2 \|= vmm_vtcr_el2_sl(vmm_pmap_levels);
	+#if PAGE_SIZE == PAGE_SIZE_4K
	+ el2_regs.vtcr_el2 \|= VTCR_EL2_TG0_4K;
	+#elif PAGE_SIZE == PAGE_SIZE_16K
	+ el2_regs.vtcr_el2 \|= VTCR_EL2_TG0_16K;
	+#else
	+#error Unsupported page size
	+#endif
	+#ifdef SMP
	+ el2_regs.vtcr_el2 \|= VTCR_EL2_SH0_IS;
	+#endif
	+
	+ smp_rendezvous(NULL, arm_setup_vectors, NULL, &el2_regs);
	+
	+ /* Add memory to the vmem allocator (checking there is space) */
	+ if (vmm_base > L2_SIZE) {
	+ /*
	+ * Ensure there is an L2 block before the vmm code to check
	+ * for buffer overflows on earlier data. Include the PAGE_SIZE
	+ * of the minimum we can allocate.
	+ */
	+ vmm_base -= L2_SIZE + PAGE_SIZE;
	+ vmm_base = rounddown2(vmm_base, L2_SIZE);
	+
	+ /*
	+ * Check there is memory before the vmm code to add.
	+ *
	+ * Reserve the L2 block at address 0 so NULL dereference will
	+ * raise an exception
	+ */
	+ if (vmm_base > L2_SIZE)
	+ vmem_add(el2_mem_alloc, L2_SIZE, next_hyp_va - L2_SIZE,
	+ M_WAITOK);
	+ }
	+
	+ /*
	+ * Add the memory after the stacks. There is most of an L2 block
	+ * between the last stack and the first allocation so this should
	+ * be safe without adding more padding.
	+ */
	+ if (next_hyp_va < HYP_VM_MAX_ADDRESS - PAGE_SIZE)
	+ vmem_add(el2_mem_alloc, next_hyp_va,
	+ HYP_VM_MAX_ADDRESS - next_hyp_va, M_WAITOK);
	+
	+
	+ daif = intr_disable();
	+ ich_vtr_el2 = vmm_call_hyp(HYP_READ_REGISTER, HYP_REG_ICH_VTR);
	+ cnthctl_el2 = vmm_call_hyp(HYP_READ_REGISTER, HYP_REG_CNTHCTL);
	+ intr_restore(daif);
	+
	+ vgic_v3_init(ich_vtr_el2);
	+ vtimer_init(cnthctl_el2);
	+
	+ return (0);
	+}
	+
	+static int
	+arm_cleanup(void)
	+{
	+ int cpu;
	+
	+ smp_rendezvous(NULL, arm_teardown_vectors, NULL, NULL);
	+
	+#ifdef INVARIANTS
	+ CPU_FOREACH(cpu) {
	+ vmmpmap_remove(stack_hyp_va[cpu], VMM_STACK_PAGES * PAGE_SIZE,
	+ false);
	+ }
	+
	+ vmmpmap_remove(hyp_code_base, hyp_code_len, false);
	+#endif
	+
	+ vtimer_cleanup();
	+
	+ vmmpmap_fini();
	+ for (cpu = 0; cpu < nitems(stack); cpu++)
	+ free(stack[cpu], M_HYP);
	+
	+ pmap_clean_stage2_tlbi = NULL;
	+
	+ return (0);
	+}
	+
	+static void *
	+arm_vminit(struct vm *vm, pmap_t pmap)
	+{
	+ struct hyp *hyp;
	+ struct hypctx *hypctx;
	+ vmem_addr_t vm_addr;
	+ vm_size_t size;
	+ bool last_vcpu, rv __diagused;
	+ int err __diagused, i, maxcpus;
	+
	+ /* Ensure this is the only data on the page */
	+ size = roundup2(sizeof(struct hyp), PAGE_SIZE);
	+ hyp = malloc(size, M_HYP, M_WAITOK \| M_ZERO);
	+ MPASS(((vm_offset_t)hyp & PAGE_MASK) == 0);
	+
	+ hyp->vm = vm;
	+ hyp->vgic_attached = false;
	+
	+ maxcpus = vm_get_maxcpus(vm);
	+ for (i = 0; i < maxcpus; i++) {
	+ hypctx = &hyp->ctx[i];
	+ hypctx->vcpu = i;
	+ hypctx->hyp = hyp;
	+
	+ reset_vm_el01_regs(hypctx);
	+ reset_vm_el2_regs(hypctx);
	+ }
	+
	+ vtimer_vminit(hyp);
	+ vgic_v3_vminit(hyp);
	+ for (i = 0; i < VM_MAXCPU; i++) {
	+ hypctx = &hyp->ctx[i];
	+ vtimer_cpuinit(hypctx);
	+ last_vcpu = (i == VM_MAXCPU - 1);
	+ vgic_v3_cpuinit(hypctx, last_vcpu);
	+ }
	+
	+ /* XXX: Can this fail? */
	+ err = vmem_alloc(el2_mem_alloc, size, M_NEXTFIT \| M_WAITOK,
	+ &vm_addr);
	+ MPASS(err == 0);
	+ MPASS((vm_addr & PAGE_MASK) == 0);
	+ hyp->el2_addr = vm_addr;
	+
	+ rv = vmmpmap_enter(hyp->el2_addr, size, vtophys(hyp),
	+ VM_PROT_READ \| VM_PROT_WRITE);
	+ MPASS(rv);
	+
	+ return (hyp);
	+}
	+
	+static int
	+arm_vmm_pinit(pmap_t pmap)
	+{
	+
	+ pmap_pinit_stage(pmap, PM_STAGE2, vmm_pmap_levels);
	+ return (1);
	+}
	+
	+static struct vmspace *
	+arm_vmspace_alloc(vm_offset_t min, vm_offset_t max)
	+{
	+ return (vmspace_alloc(min, max, arm_vmm_pinit));
	+}
	+
	+static void
	+arm_vmspace_free(struct vmspace *vmspace)
	+{
	+
	+ pmap_remove_pages(vmspace_pmap(vmspace));
	+ vmspace_free(vmspace);
	+}
	+
	+static void
	+vmm_pmap_clean_stage2_tlbi(void)
	+{
	+ vmm_call_hyp(HYP_CLEAN_S2_TLBI);
	+}
	+
	+static void
	+vmm_pmap_invalidate_range(uint64_t vttbr, vm_offset_t sva, vm_offset_t eva,
	+ bool final_only)
	+{
	+ MPASS(eva > sva);
	+ vmm_call_hyp(HYP_S2_TLBI_RANGE, vttbr, sva, eva, final_only);
	+}
	+
	+static void
	+vmm_pmap_invalidate_all(uint64_t vttbr)
	+{
	+ vmm_call_hyp(HYP_S2_TLBI_ALL, vttbr);
	+}
	+
	+static enum vm_reg_name
	+get_vm_reg_name(uint32_t reg_nr, uint32_t mode __attribute__((unused)))
	+{
	+ switch(reg_nr) {
	+ case 0:
	+ return VM_REG_GUEST_X0;
	+ case 1:
	+ return VM_REG_GUEST_X1;
	+ case 2:
	+ return VM_REG_GUEST_X2;
	+ case 3:
	+ return VM_REG_GUEST_X3;
	+ case 4:
	+ return VM_REG_GUEST_X4;
	+ case 5:
	+ return VM_REG_GUEST_X5;
	+ case 6:
	+ return VM_REG_GUEST_X6;
	+ case 7:
	+ return VM_REG_GUEST_X7;
	+ case 8:
	+ return VM_REG_GUEST_X8;
	+ case 9:
	+ return VM_REG_GUEST_X9;
	+ case 10:
	+ return VM_REG_GUEST_X10;
	+ case 11:
	+ return VM_REG_GUEST_X11;
	+ case 12:
	+ return VM_REG_GUEST_X12;
	+ case 13:
	+ return VM_REG_GUEST_X13;
	+ case 14:
	+ return VM_REG_GUEST_X14;
	+ case 15:
	+ return VM_REG_GUEST_X15;
	+ case 16:
	+ return VM_REG_GUEST_X16;
	+ case 17:
	+ return VM_REG_GUEST_X17;
	+ case 18:
	+ return VM_REG_GUEST_X18;
	+ case 19:
	+ return VM_REG_GUEST_X19;
	+ case 20:
	+ return VM_REG_GUEST_X20;
	+ case 21:
	+ return VM_REG_GUEST_X21;
	+ case 22:
	+ return VM_REG_GUEST_X22;
	+ case 23:
	+ return VM_REG_GUEST_X23;
	+ case 24:
	+ return VM_REG_GUEST_X24;
	+ case 25:
	+ return VM_REG_GUEST_X25;
	+ case 26:
	+ return VM_REG_GUEST_X26;
	+ case 27:
	+ return VM_REG_GUEST_X27;
	+ case 28:
	+ return VM_REG_GUEST_X28;
	+ case 29:
	+ return VM_REG_GUEST_X29;
	+ case 30:
	+ return VM_REG_GUEST_LR;
	+ case 31:
	+ return VM_REG_GUEST_SP;
	+ case 32:
	+ return VM_REG_GUEST_ELR;
	+ case 33:
	+ return VM_REG_GUEST_SPSR;
	+ case 34:
	+ return VM_REG_ELR_EL2;
	+ default:
	+ break;
	+ }
	+
	+ return (VM_REG_LAST);
	+}
	+
	+static inline void
	+arm64_print_hyp_regs(struct vm_exit *vme)
	+{
	+ printf("esr_el2: 0x%08x\n", vme->u.hyp.esr_el2);
	+ printf("far_el2: 0x%016lx\n", vme->u.hyp.far_el2);
	+ printf("hpfar_el2: 0x%016lx\n", vme->u.hyp.hpfar_el2);
	+}
	+
	+static void
	+arm64_gen_inst_emul_data(struct hypctx *hypctx, uint32_t esr_iss,
	+ struct vm_exit *vme_ret)
	+{
	+ struct vm_guest_paging *paging;
	+ struct vie *vie;
	+ uint32_t esr_sas, reg_num;
	+ uint64_t page_off;
	+
	+ /*
	+ * Get the page address from HPFAR_EL2.
	+ */
	+ vme_ret->u.inst_emul.gpa =
	+ HPFAR_EL2_FIPA_ADDR(hypctx->exit_info.hpfar_el2);
	+ /* Bits [11:0] are the same as bits [11:0] from the virtual address. */
	+ page_off = FAR_EL2_PAGE_OFFSET(hypctx->exit_info.far_el2);
	+ vme_ret->u.inst_emul.gpa += page_off;
	+
	+ esr_sas = (esr_iss & ISS_DATA_SAS_MASK) >> ISS_DATA_SAS_SHIFT;
	+ reg_num = (esr_iss & ISS_DATA_SRT_MASK) >> ISS_DATA_SRT_SHIFT;
	+
	+ vie = &vme_ret->u.inst_emul.vie;
	+ vie->access_size = 1 << esr_sas;
	+ vie->sign_extend = (esr_iss & ISS_DATA_SSE) ? 1 : 0;
	+ vie->dir = (esr_iss & ISS_DATA_WnR) ? VM_DIR_WRITE : VM_DIR_READ;
	+ vie->reg = get_vm_reg_name(reg_num, UNUSED);
	+
	+ paging = &vme_ret->u.inst_emul.paging;
	+ paging->far = hypctx->exit_info.far_el2;
	+ paging->ttbr0_el1 = hypctx->ttbr0_el1;
	+ paging->ttbr1_el1 = hypctx->ttbr1_el1;
	+ paging->flags = hypctx->tf.tf_spsr & (PSR_M_MASK \| PSR_M_32);
	+ if ((hypctx->sctlr_el1 & SCTLR_M) != 0)
	+ paging->flags \|= VM_GP_MMU_ENABLED;
	+}
	+
	+static void
	+arm64_gen_reg_emul_data(uint32_t esr_iss, struct vm_exit *vme_ret)
	+{
	+ uint32_t reg_num;
	+ struct vre *vre;
	+
	+ /* u.hyp member will be replaced by u.reg_emul */
	+ vre = &vme_ret->u.reg_emul.vre;
	+
	+ vre->inst_syndrome = esr_iss;
	+ /* ARMv8 Architecture Manual, p. D7-2273: 1 means read */
	+ vre->dir = (esr_iss & ISS_MSR_DIR) ? VM_DIR_READ : VM_DIR_WRITE;
	+ reg_num = ISS_MSR_Rt(esr_iss);
	+ vre->reg = get_vm_reg_name(reg_num, UNUSED);
	+}
	+
	+static int
	+handle_el1_sync_excp(struct hyp hyp, int vcpu, struct vm_exit vme_ret,
	+ pmap_t pmap)
	+{
	+ struct hypctx *hypctx;
	+ uint64_t gpa;
	+ uint32_t esr_ec, esr_iss;
	+
	+ hypctx = &hyp->ctx[vcpu];
	+ esr_ec = ESR_ELx_EXCEPTION(hypctx->tf.tf_esr);
	+ esr_iss = hypctx->tf.tf_esr & ESR_ELx_ISS_MASK;
	+
	+ switch(esr_ec) {
	+ case EXCP_UNKNOWN:
	+ eprintf("Unknown exception from guest\n");
	+ arm64_print_hyp_regs(vme_ret);
	+ vme_ret->exitcode = VM_EXITCODE_HYP;
	+ break;
	+ case EXCP_TRAP_WFI_WFE:
	+ if ((hypctx->tf.tf_esr & 0x3) == 0) /* WFI */
	+ vme_ret->exitcode = VM_EXITCODE_WFI;
	+ else
	+ vme_ret->exitcode = VM_EXITCODE_HYP;
	+ break;
	+ case EXCP_HVC:
	+ vme_ret->exitcode = VM_EXITCODE_HVC;
	+ break;
	+ case EXCP_MSR:
	+ arm64_gen_reg_emul_data(esr_iss, vme_ret);
	+ vme_ret->exitcode = VM_EXITCODE_REG_EMUL;
	+ break;
	+
	+ case EXCP_INSN_ABORT_L:
	+ case EXCP_DATA_ABORT_L:
	+ switch (hypctx->tf.tf_esr & ISS_DATA_DFSC_MASK) {
	+ case ISS_DATA_DFSC_TF_L0:
	+ case ISS_DATA_DFSC_TF_L1:
	+ case ISS_DATA_DFSC_TF_L2:
	+ case ISS_DATA_DFSC_TF_L3:
	+ case ISS_DATA_DFSC_AFF_L1:
	+ case ISS_DATA_DFSC_AFF_L2:
	+ case ISS_DATA_DFSC_AFF_L3:
	+ case ISS_DATA_DFSC_PF_L1:
	+ case ISS_DATA_DFSC_PF_L2:
	+ case ISS_DATA_DFSC_PF_L3:
	+ hypctx = &hyp->ctx[vcpu];
	+ gpa = HPFAR_EL2_FIPA_ADDR(hypctx->exit_info.hpfar_el2);
	+ if (vm_mem_allocated(hyp->vm, vcpu, gpa)) {
	+ vme_ret->exitcode = VM_EXITCODE_PAGING;
	+ vme_ret->inst_length = 0;
	+ vme_ret->u.paging.esr = hypctx->tf.tf_esr;
	+ vme_ret->u.paging.gpa = gpa;
	+ } else if (esr_ec == EXCP_DATA_ABORT_L) {
	+ arm64_gen_inst_emul_data(&hyp->ctx[vcpu],
	+ esr_iss, vme_ret);
	+ vme_ret->exitcode = VM_EXITCODE_INST_EMUL;
	+ } else {
	+ eprintf(
	+ "Unsupported instruction fault from guest\n");
	+ arm64_print_hyp_regs(vme_ret);
	+ vme_ret->exitcode = VM_EXITCODE_HYP;
	+ }
	+ break;
	+ default:
	+ eprintf(
	+ "Unsupported data/instruction fault from guest\n");
	+ arm64_print_hyp_regs(vme_ret);
	+ vme_ret->exitcode = VM_EXITCODE_HYP;
	+ break;
	+ }
	+
	+ break;
	+
	+ default:
	+ eprintf("Unsupported synchronous exception from guest: 0x%x\n",
	+ esr_ec);
	+ arm64_print_hyp_regs(vme_ret);
	+ vme_ret->exitcode = VM_EXITCODE_HYP;
	+ break;
	+ }
	+
	+ /* We don't don't do any instruction emulation here */
	+ return (UNHANDLED);
	+}
	+
	+static int
	+arm64_handle_world_switch(struct hyp *hyp, int vcpu, int excp_type,
	+ struct vm_exit *vme, pmap_t pmap)
	+{
	+ int handled;
	+
	+ switch (excp_type) {
	+ case EXCP_TYPE_EL1_SYNC:
	+ /* The exit code will be set by handle_el1_sync_excp(). */
	+ handled = handle_el1_sync_excp(hyp, vcpu, vme, pmap);
	+ break;
	+
	+ case EXCP_TYPE_EL1_IRQ:
	+ case EXCP_TYPE_EL1_FIQ:
	+ /* The host kernel will handle IRQs and FIQs. */
	+ vme->exitcode = VM_EXITCODE_BOGUS;
	+ handled = UNHANDLED;
	+ break;
	+
	+ case EXCP_TYPE_EL1_ERROR:
	+ case EXCP_TYPE_EL2_SYNC:
	+ case EXCP_TYPE_EL2_IRQ:
	+ case EXCP_TYPE_EL2_FIQ:
	+ case EXCP_TYPE_EL2_ERROR:
	+ eprintf("Unhandled exception type: %s\n", __STRING(excp_type));
	+ vme->exitcode = VM_EXITCODE_BOGUS;
	+ handled = UNHANDLED;
	+ break;
	+
	+ default:
	+ eprintf("Unknown exception type: %d\n", excp_type);
	+ vme->exitcode = VM_EXITCODE_BOGUS;
	+ handled = UNHANDLED;
	+ break;
	+ }
	+
	+ return (handled);
	+}
	+
	+static int
	+arm_vmrun(void *arg, int vcpu, register_t pc, pmap_t pmap,
	+ struct vm_eventinfo *evinfo)
	+{
	+ uint64_t excp_type;
	+ int handled;
	+ register_t daif;
	+ struct hyp *hyp;
	+ struct hypctx *hypctx;
	+ struct vm *vm;
	+ struct vm_exit *vme;
	+
	+ hyp = (struct hyp *)arg;
	+ vm = hyp->vm;
	+ vme = vm_exitinfo(vm, vcpu);
	+
	+ hypctx = &hyp->ctx[vcpu];
	+ hypctx->tf.tf_elr = (uint64_t)pc;
	+
	+ for (;;) {
	+ daif = intr_disable();
	+
	+ /* Check if the vcpu is suspended */
	+ if (vcpu_suspended(evinfo)) {
	+ intr_restore(daif);
	+ vm_exit_suspended(vm, vcpu, pc);
	+ break;
	+ }
	+
	+ /* Activate the stage2 pmap so the vmid is valid */
	+ pmap_activate_vm(pmap);
	+ hyp->vttbr_el2 = pmap_to_ttbr0(pmap);
	+
	+ /*
	+ * TODO: What happens if a timer interrupt is asserted exactly
	+ * here, but for the previous VM?
	+ */
	+ arm64_set_active_vcpu(hypctx);
	+ vgic_v3_flush_hwstate(hypctx);
	+
	+ /* Call into EL2 to switch to the guest */
	+ excp_type = vmm_call_hyp(HYP_ENTER_GUEST,
	+ hyp->el2_addr, vcpu);
	+
	+ vgic_v3_sync_hwstate(hypctx);
	+
	+ /*
	+ * Deactivate the stage2 pmap. vmm_pmap_clean_stage2_tlbi
	+ * depends on this meaning we activate the VM before entering
	+ * the vm again
	+ */
	+ PCPU_SET(curvmpmap, NULL);
	+ intr_restore(daif);
	+
	+ if (excp_type == EXCP_TYPE_MAINT_IRQ)
	+ continue;
	+
	+ vme->pc = hypctx->tf.tf_elr;
	+ vme->inst_length = INSN_SIZE;
	+ vme->u.hyp.exception_nr = excp_type;
	+ vme->u.hyp.esr_el2 = hypctx->tf.tf_esr;
	+ vme->u.hyp.far_el2 = hypctx->exit_info.far_el2;
	+ vme->u.hyp.hpfar_el2 = hypctx->exit_info.hpfar_el2;
	+
	+ handled = arm64_handle_world_switch(hyp, vcpu, excp_type, vme,
	+ pmap);
	+ if (handled == UNHANDLED)
	+ /* Exit loop to emulate instruction. */
	+ break;
	+ else
	+ /* Resume guest execution from the next instruction. */
	+ hypctx->tf.tf_elr += vme->inst_length;
	+ }
	+
	+ return (0);
	+}
	+
	+static void
	+arm_pcpu_vmcleanup(void *arg)
	+{
	+ struct hyp *hyp;
	+ int i, maxcpus;
	+
	+ hyp = arg;
	+ maxcpus = vm_get_maxcpus(hyp->vm);
	+ for (i = 0; i < maxcpus; i++) {
	+ if (arm64_get_active_vcpu() == &hyp->ctx[i]) {
	+ arm64_set_active_vcpu(NULL);
	+ break;
	+ }
	+ }
	+}
	+
	+static void
	+arm_vmcleanup(void *arg)
	+{
	+ struct hyp *hyp = arg;
	+ struct hypctx *hypctx;
	+ int i;
	+
	+ for (i = 0; i < VM_MAXCPU; i++) {
	+ hypctx = &hyp->ctx[i];
	+ vtimer_cpucleanup(hypctx);
	+ vgic_v3_cpucleanup(hypctx);
	+ }
	+
	+ vtimer_vmcleanup(hyp);
	+ vgic_v3_vmcleanup(hyp);
	+
	+ smp_rendezvous(NULL, arm_pcpu_vmcleanup, NULL, hyp);
	+
	+ /* Unmap the VM hyp struct from the hyp mode translation table */
	+ vmmpmap_remove(hyp->el2_addr, roundup2(sizeof(*hyp), PAGE_SIZE),
	+ true);
	+
	+ free(hyp, M_HYP);
	+}
	+
	+/*
	+ * Return register value. Registers have different sizes and an explicit cast
	+ * must be made to ensure proper conversion.
	+ */
	+static void *
	+hypctx_regptr(struct hypctx *hypctx, int reg)
	+{
	+ switch (reg) {
	+ case VM_REG_GUEST_X0:
	+ return (&hypctx->tf.tf_x[0]);
	+ case VM_REG_GUEST_X1:
	+ return (&hypctx->tf.tf_x[1]);
	+ case VM_REG_GUEST_X2:
	+ return (&hypctx->tf.tf_x[2]);
	+ case VM_REG_GUEST_X3:
	+ return (&hypctx->tf.tf_x[3]);
	+ case VM_REG_GUEST_X4:
	+ return (&hypctx->tf.tf_x[4]);
	+ case VM_REG_GUEST_X5:
	+ return (&hypctx->tf.tf_x[5]);
	+ case VM_REG_GUEST_X6:
	+ return (&hypctx->tf.tf_x[6]);
	+ case VM_REG_GUEST_X7:
	+ return (&hypctx->tf.tf_x[7]);
	+ case VM_REG_GUEST_X8:
	+ return (&hypctx->tf.tf_x[8]);
	+ case VM_REG_GUEST_X9:
	+ return (&hypctx->tf.tf_x[9]);
	+ case VM_REG_GUEST_X10:
	+ return (&hypctx->tf.tf_x[10]);
	+ case VM_REG_GUEST_X11:
	+ return (&hypctx->tf.tf_x[11]);
	+ case VM_REG_GUEST_X12:
	+ return (&hypctx->tf.tf_x[12]);
	+ case VM_REG_GUEST_X13:
	+ return (&hypctx->tf.tf_x[13]);
	+ case VM_REG_GUEST_X14:
	+ return (&hypctx->tf.tf_x[14]);
	+ case VM_REG_GUEST_X15:
	+ return (&hypctx->tf.tf_x[15]);
	+ case VM_REG_GUEST_X16:
	+ return (&hypctx->tf.tf_x[16]);
	+ case VM_REG_GUEST_X17:
	+ return (&hypctx->tf.tf_x[17]);
	+ case VM_REG_GUEST_X18:
	+ return (&hypctx->tf.tf_x[18]);
	+ case VM_REG_GUEST_X19:
	+ return (&hypctx->tf.tf_x[19]);
	+ case VM_REG_GUEST_X20:
	+ return (&hypctx->tf.tf_x[20]);
	+ case VM_REG_GUEST_X21:
	+ return (&hypctx->tf.tf_x[21]);
	+ case VM_REG_GUEST_X22:
	+ return (&hypctx->tf.tf_x[22]);
	+ case VM_REG_GUEST_X23:
	+ return (&hypctx->tf.tf_x[23]);
	+ case VM_REG_GUEST_X24:
	+ return (&hypctx->tf.tf_x[24]);
	+ case VM_REG_GUEST_X25:
	+ return (&hypctx->tf.tf_x[25]);
	+ case VM_REG_GUEST_X26:
	+ return (&hypctx->tf.tf_x[26]);
	+ case VM_REG_GUEST_X27:
	+ return (&hypctx->tf.tf_x[27]);
	+ case VM_REG_GUEST_X28:
	+ return (&hypctx->tf.tf_x[28]);
	+ case VM_REG_GUEST_X29:
	+ return (&hypctx->tf.tf_x[29]);
	+ case VM_REG_GUEST_LR:
	+ return (&hypctx->tf.tf_lr);
	+ case VM_REG_GUEST_SP:
	+ return (&hypctx->tf.tf_sp);
	+ case VM_REG_GUEST_ELR: /* This is bogus */
	+ return (&hypctx->tf.tf_elr);
	+ case VM_REG_GUEST_SPSR: /* This is bogus */
	+ return (&hypctx->tf.tf_spsr);
	+ case VM_REG_ELR_EL2:
	+ return (&hypctx->tf.tf_elr);
	+ default:
	+ break;
	+ }
	+ return (NULL);
	+}
	+
	+static int
	+arm_getreg(void arg, int vcpu, int reg, uint64_t retval)
	+{
	+ void *regp;
	+ int running, hostcpu;
	+ struct hyp *hyp = arg;
	+
	+ running = vcpu_is_running(hyp->vm, vcpu, &hostcpu);
	+ if (running && hostcpu != curcpu)
	+ panic("arm_getreg: %s%d is running", vm_name(hyp->vm), vcpu);
	+
	+ if ((regp = hypctx_regptr(&hyp->ctx[vcpu], reg)) != NULL) {
	+ if (reg == VM_REG_GUEST_SPSR)
	+ retval = (uint32_t *)regp;
	+ else
	+ retval = (uint64_t *)regp;
	+ return (0);
	+ } else {
	+ return (EINVAL);
	+ }
	+}
	+
	+static int
	+arm_setreg(void *arg, int vcpu, int reg, uint64_t val)
	+{
	+ void *regp;
	+ struct hyp *hyp = arg;
	+ int running, hostcpu;
	+
	+ running = vcpu_is_running(hyp->vm, vcpu, &hostcpu);
	+ if (running && hostcpu != curcpu)
	+ panic("hyp_setreg: %s%d is running", vm_name(hyp->vm), vcpu);
	+
	+ if ((regp = hypctx_regptr(&hyp->ctx[vcpu], reg)) != NULL) {
	+ if (reg == VM_REG_GUEST_SPSR)
	+ (uint32_t )regp = (uint32_t)val;
	+ else
	+ (uint64_t )regp = val;
	+ return (0);
	+ } else {
	+ return (EINVAL);
	+ }
	+}
	+
	+static int
	+arm_getcap(void arg, int vcpu, int type, int retval)
	+{
	+ int ret;
	+
	+ ret = ENOENT;
	+
	+ switch (type) {
	+ case VM_CAP_UNRESTRICTED_GUEST:
	+ *retval = 1;
	+ ret = 0;
	+ break;
	+ default:
	+ break;
	+ }
	+
	+ return (ret);
	+}
	+
	+static int
	+arm_setcap(void *arg, int vcpu, int type, int val)
	+{
	+
	+ return (ENOENT);
	+}
	+
	+static
	+void arm_restore(void)
	+{
	+ ;
	+}
	+
	+struct vmm_ops vmm_ops_arm = {
	+ .init = arm_init,
	+ .cleanup = arm_cleanup,
	+ .resume = arm_restore,
	+ .vminit = arm_vminit,
	+ .vmrun = arm_vmrun,
	+ .vmcleanup = arm_vmcleanup,
	+ .vmgetreg = arm_getreg,
	+ .vmsetreg = arm_setreg,
	+ .vmgetcap = arm_getcap,
	+ .vmsetcap = arm_setcap,
	+ .vmspace_alloc = arm_vmspace_alloc,
	+ .vmspace_free = arm_vmspace_free,
	+};
	diff --git a/sys/arm64/vmm/vmm_call.S b/sys/arm64/vmm/vmm_call.S
	new file mode 100644
	--- /dev/null
	+++ b/sys/arm64/vmm/vmm_call.S
	@@ -0,0 +1,38 @@
	+/*
	+ * Copyright (C) 2017 Alexandru Elisei <alexandru.elisei@gmail.com>
	+ * All rights reserved.
	+ *
	+ * This software was developed by Alexandru Elisei under sponsorship
	+ * from the FreeBSD Foundation.
	+ *
	+ * Redistribution and use in source and binary forms, with or without
	+ * modification, are permitted provided that the following conditions
	+ * are met:
	+ * 1. Redistributions of source code must retain the above copyright
	+ * notice, this list of conditions and the following disclaimer.
	+ * 2. Redistributions in binary form must reproduce the above copyright
	+ * notice, this list of conditions and the following disclaimer in the
	+ * documentation and/or other materials provided with the distribution.
	+ *
	+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
	+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
	+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	+ * SUCH DAMAGE.
	+ */
	+
	+
	+#include <machine/asm.h>
	+
	+ .text
	+
	+ENTRY(vmm_call_hyp)
	+ hvc #0
	+ ret
	+END(vmm_call_hyp)
	diff --git a/sys/arm64/vmm/vmm_dev.c b/sys/arm64/vmm/vmm_dev.c
	new file mode 100644
	--- /dev/null
	+++ b/sys/arm64/vmm/vmm_dev.c
	@@ -0,0 +1,970 @@
	+/*
	+ * Copyright (C) 2015 Mihai Carabas <mihai.carabas@gmail.com>
	+ * All rights reserved.
	+ *
	+ * Redistribution and use in source and binary forms, with or without
	+ * modification, are permitted provided that the following conditions
	+ * are met:
	+ * 1. Redistributions of source code must retain the above copyright
	+ * notice, this list of conditions and the following disclaimer.
	+ * 2. Redistributions in binary form must reproduce the above copyright
	+ * notice, this list of conditions and the following disclaimer in the
	+ * documentation and/or other materials provided with the distribution.
	+ *
	+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
	+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
	+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	+ * SUCH DAMAGE.
	+ */
	+
	+#include <sys/cdefs.h>
	+__FBSDID("$FreeBSD$");
	+
	+#include <sys/param.h>
	+#include <sys/kernel.h>
	+#include <sys/jail.h>
	+#include <sys/queue.h>
	+#include <sys/lock.h>
	+#include <sys/mutex.h>
	+#include <sys/malloc.h>
	+#include <sys/conf.h>
	+#include <sys/sysctl.h>
	+#include <sys/libkern.h>
	+#include <sys/ioccom.h>
	+#include <sys/mman.h>
	+#include <sys/uio.h>
	+#include <sys/proc.h>
	+
	+#include <vm/vm.h>
	+#include <vm/pmap.h>
	+#include <vm/vm_map.h>
	+#include <vm/vm_object.h>
	+
	+#include <machine/machdep.h>
	+#include <machine/vmparam.h>
	+#include <machine/vmm.h>
	+#include <machine/vmm_dev.h>
	+
	+#include "vmm_stat.h"
	+
	+struct devmem_softc {
	+ int segid;
	+ char *name;
	+ struct cdev *cdev;
	+ struct vmmdev_softc *sc;
	+ SLIST_ENTRY(devmem_softc) link;
	+};
	+
	+struct vmmdev_softc {
	+ struct vm vm; / vm instance cookie */
	+ struct cdev *cdev;
	+ SLIST_ENTRY(vmmdev_softc) link;
	+ SLIST_HEAD(, devmem_softc) devmem;
	+ int flags;
	+};
	+#define VSC_LINKED 0x01
	+
	+static SLIST_HEAD(, vmmdev_softc) head;
	+
	+static unsigned pr_allow_flag;
	+static struct mtx vmmdev_mtx;
	+
	+static MALLOC_DEFINE(M_VMMDEV, "vmmdev", "vmmdev");
	+
	+SYSCTL_DECL(_hw_vmm);
	+
	+static int vmm_priv_check(struct ucred *ucred);
	+static int devmem_create_cdev(const char vmname, int id, char devmem);
	+static void devmem_destroy(void *arg);
	+
	+static int
	+vmm_priv_check(struct ucred *ucred)
	+{
	+
	+ if (jailed(ucred) &&
	+ !(ucred->cr_prison->pr_allow & pr_allow_flag))
	+ return (EPERM);
	+
	+ return (0);
	+}
	+
	+static int
	+vcpu_lock_one(struct vmmdev_softc *sc, int vcpu)
	+{
	+ int error;
	+
	+ if (vcpu < 0 \|\| vcpu >= vm_get_maxcpus(sc->vm))
	+ return (EINVAL);
	+
	+ error = vcpu_set_state(sc->vm, vcpu, VCPU_FROZEN, true);
	+ return (error);
	+}
	+
	+static void
	+vcpu_unlock_one(struct vmmdev_softc *sc, int vcpu)
	+{
	+ enum vcpu_state state;
	+
	+ state = vcpu_get_state(sc->vm, vcpu, NULL);
	+ if (state != VCPU_FROZEN) {
	+ panic("vcpu %s(%d) has invalid state %d", vm_name(sc->vm),
	+ vcpu, state);
	+ }
	+
	+ vcpu_set_state(sc->vm, vcpu, VCPU_IDLE, false);
	+}
	+
	+static int
	+vcpu_lock_all(struct vmmdev_softc *sc)
	+{
	+ int error, vcpu;
	+ uint16_t maxcpus;
	+
	+ maxcpus = vm_get_maxcpus(sc->vm);
	+ for (vcpu = 0; vcpu < maxcpus; vcpu++) {
	+ error = vcpu_lock_one(sc, vcpu);
	+ if (error)
	+ break;
	+ }
	+
	+ if (error) {
	+ while (--vcpu >= 0)
	+ vcpu_unlock_one(sc, vcpu);
	+ }
	+
	+ return (error);
	+}
	+
	+static void
	+vcpu_unlock_all(struct vmmdev_softc *sc)
	+{
	+ int vcpu;
	+ uint16_t maxcpus;
	+
	+ maxcpus = vm_get_maxcpus(sc->vm);
	+ for (vcpu = 0; vcpu < maxcpus; vcpu++)
	+ vcpu_unlock_one(sc, vcpu);
	+}
	+
	+static struct vmmdev_softc *
	+vmmdev_lookup(const char *name)
	+{
	+ struct vmmdev_softc *sc;
	+
	+#ifdef notyet /* XXX kernel is not compiled with invariants */
	+ mtx_assert(&vmmdev_mtx, MA_OWNED);
	+#endif
	+
	+ SLIST_FOREACH(sc, &head, link) {
	+ if (strcmp(name, vm_name(sc->vm)) == 0)
	+ break;
	+ }
	+
	+ return (sc);
	+}
	+
	+static struct vmmdev_softc *
	+vmmdev_lookup2(struct cdev *cdev)
	+{
	+
	+ return (cdev->si_drv1);
	+}
	+
	+static int
	+vmmdev_rw(struct cdev cdev, struct uio uio, int flags)
	+{
	+ int error, off, c, prot;
	+ vm_paddr_t gpa, maxaddr;
	+ void hpa, cookie;
	+ struct vmmdev_softc *sc;
	+ uint16_t lastcpu;
	+
	+ error = vmm_priv_check(curthread->td_ucred);
	+ if (error)
	+ return (error);
	+
	+ sc = vmmdev_lookup2(cdev);
	+ if (sc == NULL)
	+ return (ENXIO);
	+
	+ /*
	+ * Get a read lock on the guest memory map by freezing any vcpu.
	+ */
	+ lastcpu = vm_get_maxcpus(sc->vm) - 1;
	+ error = vcpu_lock_one(sc, lastcpu);
	+ if (error)
	+ return (error);
	+
	+ prot = (uio->uio_rw == UIO_WRITE ? VM_PROT_WRITE : VM_PROT_READ);
	+ maxaddr = vmm_sysmem_maxaddr(sc->vm);
	+ while (uio->uio_resid > 0 && error == 0) {
	+ gpa = uio->uio_offset;
	+ off = gpa & PAGE_MASK;
	+ c = min(uio->uio_resid, PAGE_SIZE - off);
	+
	+ /*
	+ * The VM has a hole in its physical memory map. If we want to
	+ * use 'dd' to inspect memory beyond the hole we need to
	+ * provide bogus data for memory that lies in the hole.
	+ *
	+ * Since this device does not support lseek(2), dd(1) will
	+ * read(2) blocks of data to simulate the lseek(2).
	+ */
	+ hpa = vm_gpa_hold(sc->vm, lastcpu, gpa, c,
	+ prot, &cookie);
	+ if (hpa == NULL) {
	+ if (uio->uio_rw == UIO_READ && gpa < maxaddr)
	+ error = uiomove(__DECONST(void *, zero_region),
	+ c, uio);
	+ else
	+ error = EFAULT;
	+ } else {
	+ error = uiomove(hpa, c, uio);
	+ vm_gpa_release(cookie);
	+ }
	+ }
	+ vcpu_unlock_one(sc, lastcpu);
	+ return (error);
	+}
	+
	+static int
	+get_memseg(struct vmmdev_softc sc, struct vm_memseg mseg)
	+{
	+ struct devmem_softc *dsc;
	+ int error;
	+ bool sysmem;
	+
	+ error = vm_get_memseg(sc->vm, mseg->segid, &mseg->len, &sysmem, NULL);
	+ if (error \|\| mseg->len == 0)
	+ return (error);
	+
	+ if (!sysmem) {
	+ SLIST_FOREACH(dsc, &sc->devmem, link) {
	+ if (dsc->segid == mseg->segid)
	+ break;
	+ }
	+ KASSERT(dsc != NULL, ("%s: devmem segment %d not found",
	+ __func__, mseg->segid));
	+ error = copystr(dsc->name, mseg->name, sizeof(mseg->name),
	+ NULL);
	+ } else {
	+ bzero(mseg->name, sizeof(mseg->name));
	+ }
	+
	+ return (error);
	+}
	+
	+static int
	+alloc_memseg(struct vmmdev_softc sc, struct vm_memseg mseg)
	+{
	+ char *name;
	+ int error;
	+ bool sysmem;
	+
	+ error = 0;
	+ name = NULL;
	+ sysmem = true;
	+
	+ /*
	+ * The allocation is lengthened by 1 to hold a terminating NUL. It'll
	+ * by stripped off when devfs processes the full string.
	+ */
	+ if (VM_MEMSEG_NAME(mseg)) {
	+ sysmem = false;
	+ name = malloc(sizeof(mseg->name), M_VMMDEV, M_WAITOK);
	+ error = copystr(mseg->name, name, sizeof(mseg->name), NULL);
	+ if (error)
	+ goto done;
	+ }
	+
	+ error = vm_alloc_memseg(sc->vm, mseg->segid, mseg->len, sysmem);
	+ if (error)
	+ goto done;
	+
	+ if (VM_MEMSEG_NAME(mseg)) {
	+ error = devmem_create_cdev(vm_name(sc->vm), mseg->segid, name);
	+ if (error)
	+ vm_free_memseg(sc->vm, mseg->segid);
	+ else
	+ name = NULL; /* freed when 'cdev' is destroyed */
	+ }
	+done:
	+ free(name, M_VMMDEV);
	+ return (error);
	+}
	+
	+static int
	+vm_get_register_set(struct vm vm, int vcpu, unsigned int count, int regnum,
	+ uint64_t *regval)
	+{
	+ int error, i;
	+
	+ error = 0;
	+ for (i = 0; i < count; i++) {
	+ error = vm_get_register(vm, vcpu, regnum[i], &regval[i]);
	+ if (error)
	+ break;
	+ }
	+ return (error);
	+}
	+
	+static int
	+vm_set_register_set(struct vm vm, int vcpu, unsigned int count, int regnum,
	+ uint64_t *regval)
	+{
	+ int error, i;
	+
	+ error = 0;
	+ for (i = 0; i < count; i++) {
	+ error = vm_set_register(vm, vcpu, regnum[i], regval[i]);
	+ if (error)
	+ break;
	+ }
	+ return (error);
	+}
	+
	+static int
	+vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag,
	+ struct thread *td)
	+{
	+ int error, vcpu, state_changed, size;
	+ cpuset_t *cpuset;
	+ struct vmmdev_softc *sc;
	+ struct vm_register *vmreg;
	+ struct vm_register_set *vmregset;
	+ struct vm_run *vmrun;
	+ struct vm_activate_cpu *vac;
	+ struct vm_attach_vgic *vav;
	+ struct vm_cpuset *vm_cpuset;
	+ struct vm_irq *vi;
	+ struct vm_capability *vmcap;
	+ struct vm_stats *vmstats;
	+ struct vm_stat_desc *statdesc;
	+ struct vm_suspend *vmsuspend;
	+ struct vm_memmap *mm;
	+ struct vm_msi *vmsi;
	+ struct vm_cpu_topology *topology;
	+ uint64_t *regvals;
	+ int *regnums;
	+
	+ error = vmm_priv_check(curthread->td_ucred);
	+ if (error)
	+ return (error);
	+
	+ sc = vmmdev_lookup2(cdev);
	+ if (sc == NULL)
	+ return (ENXIO);
	+
	+ error = 0;
	+ vcpu = -1;
	+ state_changed = 0;
	+
	+ /*
	+ * Some VMM ioctls can operate only on vcpus that are not running.
	+ */
	+ switch (cmd) {
	+ case VM_RUN:
	+ case VM_GET_REGISTER:
	+ case VM_SET_REGISTER:
	+ case VM_GET_REGISTER_SET:
	+ case VM_SET_REGISTER_SET:
	+ case VM_GET_CAPABILITY:
	+ case VM_SET_CAPABILITY:
	+ case VM_ACTIVATE_CPU:
	+ /*
	+ * XXX fragile, handle with care
	+ * Assumes that the first field of the ioctl data is the vcpu.
	+ */
	+ vcpu = (int )data;
	+ error = vcpu_lock_one(sc, vcpu);
	+ if (error)
	+ goto done;
	+ state_changed = 1;
	+ break;
	+
	+ case VM_ALLOC_MEMSEG:
	+ case VM_MMAP_MEMSEG:
	+ case VM_REINIT:
	+ case VM_ATTACH_VGIC:
	+ /*
	+ * ioctls that operate on the entire virtual machine must
	+ * prevent all vcpus from running.
	+ */
	+ error = vcpu_lock_all(sc);
	+ if (error)
	+ goto done;
	+ state_changed = 2;
	+ break;
	+ case VM_GET_MEMSEG:
	+ case VM_MMAP_GETNEXT:
	+ /*
	+ * Lock a vcpu to make sure that the memory map cannot be
	+ * modified while it is being inspected.
	+ */
	+ vcpu = vm_get_maxcpus(sc->vm) - 1;
	+ error = vcpu_lock_one(sc, vcpu);
	+ if (error)
	+ goto done;
	+ state_changed = 1;
	+ break;
	+ case VM_ASSERT_IRQ:
	+ vi =(struct vm_irq *)data;
	+ error = vm_assert_irq(sc->vm, vi->irq);
	+ break;
	+ case VM_DEASSERT_IRQ:
	+ vi = (struct vm_irq *)data;
	+ error = vm_deassert_irq(sc->vm, vi->irq);
	+ break;
	+ default:
	+ break;
	+ }
	+
	+ switch(cmd) {
	+ case VM_RUN:
	+ vmrun = (struct vm_run *)data;
	+ error = vm_run(sc->vm, vmrun);
	+ break;
	+ case VM_SUSPEND:
	+ vmsuspend = (struct vm_suspend *)data;
	+ error = vm_suspend(sc->vm, vmsuspend->how);
	+ break;
	+ case VM_REINIT:
	+ error = vm_reinit(sc->vm);
	+ break;
	+ case VM_STAT_DESC: {
	+ statdesc = (struct vm_stat_desc *)data;
	+ error = vmm_stat_desc_copy(statdesc->index,
	+ statdesc->desc, sizeof(statdesc->desc));
	+ break;
	+ }
	+ case VM_STATS: {
	+ CTASSERT(MAX_VM_STATS >= MAX_VMM_STAT_ELEMS);
	+ vmstats = (struct vm_stats *)data;
	+ getmicrotime(&vmstats->tv);
	+ error = vmm_stat_copy(sc->vm, vmstats->cpuid, vmstats->index,
	+ nitems(vmstats->statbuf),
	+ &vmstats->num_entries, vmstats->statbuf);
	+ break;
	+ }
	+ case VM_MMAP_GETNEXT:
	+ mm = (struct vm_memmap *)data;
	+ error = vm_mmap_getnext(sc->vm, &mm->gpa, &mm->segid,
	+ &mm->segoff, &mm->len, &mm->prot, &mm->flags);
	+ break;
	+ case VM_MMAP_MEMSEG:
	+ mm = (struct vm_memmap *)data;
	+ error = vm_mmap_memseg(sc->vm, mm->gpa, mm->segid, mm->segoff,
	+ mm->len, mm->prot, mm->flags);
	+ break;
	+ case VM_ALLOC_MEMSEG:
	+ error = alloc_memseg(sc, (struct vm_memseg *)data);
	+ break;
	+ case VM_GET_MEMSEG:
	+ error = get_memseg(sc, (struct vm_memseg *)data);
	+ break;
	+ case VM_GET_REGISTER:
	+ vmreg = (struct vm_register *)data;
	+ error = vm_get_register(sc->vm, vmreg->cpuid, vmreg->regnum,
	+ &vmreg->regval);
	+ break;
	+ case VM_SET_REGISTER:
	+ vmreg = (struct vm_register *)data;
	+ error = vm_set_register(sc->vm, vmreg->cpuid, vmreg->regnum,
	+ vmreg->regval);
	+ break;
	+ case VM_GET_REGISTER_SET:
	+ vmregset = (struct vm_register_set *)data;
	+ if (vmregset->count > VM_REG_LAST) {
	+ error = EINVAL;
	+ break;
	+ }
	+ regvals = malloc(sizeof(regvals[0]) * vmregset->count, M_VMMDEV,
	+ M_WAITOK);
	+ regnums = malloc(sizeof(regnums[0]) * vmregset->count, M_VMMDEV,
	+ M_WAITOK);
	+ error = copyin(vmregset->regnums, regnums, sizeof(regnums[0]) *
	+ vmregset->count);
	+ if (error == 0)
	+ error = vm_get_register_set(sc->vm, vmregset->cpuid,
	+ vmregset->count, regnums, regvals);
	+ if (error == 0)
	+ error = copyout(regvals, vmregset->regvals,
	+ sizeof(regvals[0]) * vmregset->count);
	+ free(regvals, M_VMMDEV);
	+ free(regnums, M_VMMDEV);
	+ break;
	+ case VM_SET_REGISTER_SET:
	+ vmregset = (struct vm_register_set *)data;
	+ if (vmregset->count > VM_REG_LAST) {
	+ error = EINVAL;
	+ break;
	+ }
	+ regvals = malloc(sizeof(regvals[0]) * vmregset->count, M_VMMDEV,
	+ M_WAITOK);
	+ regnums = malloc(sizeof(regnums[0]) * vmregset->count, M_VMMDEV,
	+ M_WAITOK);
	+ error = copyin(vmregset->regnums, regnums, sizeof(regnums[0]) *
	+ vmregset->count);
	+ if (error == 0)
	+ error = copyin(vmregset->regvals, regvals,
	+ sizeof(regvals[0]) * vmregset->count);
	+ if (error == 0)
	+ error = vm_set_register_set(sc->vm, vmregset->cpuid,
	+ vmregset->count, regnums, regvals);
	+ free(regvals, M_VMMDEV);
	+ free(regnums, M_VMMDEV);
	+ break;
	+ case VM_GET_CAPABILITY:
	+ vmcap = (struct vm_capability *)data;
	+ error = vm_get_capability(sc->vm, vmcap->cpuid,
	+ vmcap->captype,
	+ &vmcap->capval);
	+ break;
	+ case VM_SET_CAPABILITY:
	+ vmcap = (struct vm_capability *)data;
	+ error = vm_set_capability(sc->vm, vmcap->cpuid,
	+ vmcap->captype,
	+ vmcap->capval);
	+ break;
	+ case VM_ACTIVATE_CPU:
	+ vac = (struct vm_activate_cpu *)data;
	+ error = vm_activate_cpu(sc->vm, vac->vcpuid);
	+ break;
	+ case VM_GET_CPUS:
	+ error = 0;
	+ vm_cpuset = (struct vm_cpuset *)data;
	+ size = vm_cpuset->cpusetsize;
	+ if (size < sizeof(cpuset_t) \|\| size > CPU_MAXSIZE / NBBY) {
	+ error = ERANGE;
	+ break;
	+ }
	+ cpuset = malloc(size, M_TEMP, M_WAITOK \| M_ZERO);
	+ if (vm_cpuset->which == VM_ACTIVE_CPUS)
	+ *cpuset = vm_active_cpus(sc->vm);
	+ else if (vm_cpuset->which == VM_SUSPENDED_CPUS)
	+ *cpuset = vm_suspended_cpus(sc->vm);
	+ else if (vm_cpuset->which == VM_DEBUG_CPUS)
	+ *cpuset = vm_debug_cpus(sc->vm);
	+ else
	+ error = EINVAL;
	+ if (error == 0)
	+ error = copyout(cpuset, vm_cpuset->cpus, size);
	+ free(cpuset, M_TEMP);
	+ break;
	+ case VM_SUSPEND_CPU:
	+ vac = (struct vm_activate_cpu *)data;
	+ error = vm_suspend_cpu(sc->vm, vac->vcpuid);
	+ break;
	+ case VM_RESUME_CPU:
	+ vac = (struct vm_activate_cpu *)data;
	+ error = vm_resume_cpu(sc->vm, vac->vcpuid);
	+ break;
	+ case VM_ATTACH_VGIC:
	+ vav = (struct vm_attach_vgic *)data;
	+ error = vm_attach_vgic(sc->vm, vav->dist_start, vav->dist_size,
	+ vav->redist_start, vav->redist_size);
	+ break;
	+ case VM_RAISE_MSI:
	+ vmsi = (struct vm_msi *)data;
	+ error = vm_raise_msi(sc->vm, vmsi->msg, vmsi->addr, vmsi->bus,
	+ vmsi->slot, vmsi->func);
	+ break;
	+ case VM_SET_TOPOLOGY:
	+ topology = (struct vm_cpu_topology *)data;
	+ error = vm_set_topology(sc->vm, topology->sockets,
	+ topology->cores, topology->threads, topology->maxcpus);
	+ break;
	+ case VM_GET_TOPOLOGY:
	+ topology = (struct vm_cpu_topology *)data;
	+ vm_get_topology(sc->vm, &topology->sockets, &topology->cores,
	+ &topology->threads, &topology->maxcpus);
	+ error = 0;
	+ break;
	+ default:
	+ error = ENOTTY;
	+ break;
	+ }
	+
	+ if (state_changed == 1)
	+ vcpu_unlock_one(sc, vcpu);
	+ else if (state_changed == 2)
	+ vcpu_unlock_all(sc);
	+
	+done:
	+ /*
	+ * Make sure that no handler returns a kernel-internal
	+ * error value to userspace.
	+ */
	+ KASSERT(error == ERESTART \|\| error >= 0,
	+ ("vmmdev_ioctl: invalid error return %d", error));
	+ return (error);
	+}
	+
	+static int
	+vmmdev_mmap_single(struct cdev cdev, vm_ooffset_t offset, vm_size_t mapsize,
	+ struct vm_object **objp, int nprot)
	+{
	+ struct vmmdev_softc *sc;
	+ vm_paddr_t gpa;
	+ size_t len;
	+ vm_ooffset_t segoff, first, last;
	+ int error, found, segid;
	+ uint16_t lastcpu;
	+ bool sysmem;
	+
	+ error = vmm_priv_check(curthread->td_ucred);
	+ if (error)
	+ return (error);
	+
	+ first = *offset;
	+ last = first + mapsize;
	+ if ((nprot & PROT_EXEC) \|\| first < 0 \|\| first >= last)
	+ return (EINVAL);
	+
	+ sc = vmmdev_lookup2(cdev);
	+ if (sc == NULL) {
	+ /* virtual machine is in the process of being created */
	+ return (EINVAL);
	+ }
	+
	+ /*
	+ * Get a read lock on the guest memory map by freezing any vcpu.
	+ */
	+ lastcpu = vm_get_maxcpus(sc->vm) - 1;
	+ error = vcpu_lock_one(sc, lastcpu);
	+ if (error)
	+ return (error);
	+
	+ gpa = 0;
	+ found = 0;
	+ while (!found) {
	+ error = vm_mmap_getnext(sc->vm, &gpa, &segid, &segoff, &len,
	+ NULL, NULL);
	+ if (error)
	+ break;
	+
	+ if (first >= gpa && last <= gpa + len)
	+ found = 1;
	+ else
	+ gpa += len;
	+ }
	+
	+ if (found) {
	+ error = vm_get_memseg(sc->vm, segid, &len, &sysmem, objp);
	+ KASSERT(error == 0 && *objp != NULL,
	+ ("%s: invalid memory segment %d", __func__, segid));
	+ if (sysmem) {
	+ vm_object_reference(*objp);
	+ *offset = segoff + (first - gpa);
	+ } else {
	+ error = EINVAL;
	+ }
	+ }
	+ vcpu_unlock_one(sc, lastcpu);
	+ return (error);
	+}
	+
	+static void
	+vmmdev_destroy(void *arg)
	+{
	+ struct vmmdev_softc *sc = arg;
	+ struct devmem_softc *dsc;
	+ int error __diagused;
	+
	+ error = vcpu_lock_all(sc);
	+ KASSERT(error == 0, ("%s: error %d freezing vcpus", __func__, error));
	+
	+ while ((dsc = SLIST_FIRST(&sc->devmem)) != NULL) {
	+ KASSERT(dsc->cdev == NULL, ("%s: devmem not free", __func__));
	+ SLIST_REMOVE_HEAD(&sc->devmem, link);
	+ free(dsc->name, M_VMMDEV);
	+ free(dsc, M_VMMDEV);
	+ }
	+
	+ if (sc->cdev != NULL)
	+ destroy_dev(sc->cdev);
	+
	+ if (sc->vm != NULL)
	+ vm_destroy(sc->vm);
	+
	+ if ((sc->flags & VSC_LINKED) != 0) {
	+ mtx_lock(&vmmdev_mtx);
	+ SLIST_REMOVE(&head, sc, vmmdev_softc, link);
	+ mtx_unlock(&vmmdev_mtx);
	+ }
	+
	+ free(sc, M_VMMDEV);
	+}
	+
	+static int
	+sysctl_vmm_destroy(SYSCTL_HANDLER_ARGS)
	+{
	+ struct devmem_softc *dsc;
	+ struct vmmdev_softc *sc;
	+ struct cdev *cdev;
	+ char *buf;
	+ int error, buflen;
	+
	+ error = vmm_priv_check(req->td->td_ucred);
	+ if (error)
	+ return (error);
	+
	+ buflen = VM_MAX_NAMELEN + 1;
	+ buf = malloc(buflen, M_VMMDEV, M_WAITOK \| M_ZERO);
	+ strlcpy(buf, "beavis", buflen);
	+ error = sysctl_handle_string(oidp, buf, buflen, req);
	+ if (error != 0 \|\| req->newptr == NULL)
	+ goto out;
	+
	+ mtx_lock(&vmmdev_mtx);
	+ sc = vmmdev_lookup(buf);
	+ if (sc == NULL \|\| sc->cdev == NULL) {
	+ mtx_unlock(&vmmdev_mtx);
	+ error = EINVAL;
	+ goto out;
	+ }
	+
	+ /*
	+ * The 'cdev' will be destroyed asynchronously when 'si_threadcount'
	+ * goes down to 0 so we should not do it again in the callback.
	+ *
	+ * Setting 'sc->cdev' to NULL is also used to indicate that the VM
	+ * is scheduled for destruction.
	+ */
	+ cdev = sc->cdev;
	+ sc->cdev = NULL;
	+ mtx_unlock(&vmmdev_mtx);
	+
	+ /*
	+ * Schedule all cdevs to be destroyed:
	+ *
	+ * - any new operations on the 'cdev' will return an error (ENXIO).
	+ *
	+ * - when the 'si_threadcount' dwindles down to zero the 'cdev' will
	+ * be destroyed and the callback will be invoked in a taskqueue
	+ * context.
	+ *
	+ * - the 'devmem' cdevs are destroyed before the virtual machine 'cdev'
	+ */
	+ SLIST_FOREACH(dsc, &sc->devmem, link) {
	+ KASSERT(dsc->cdev != NULL, ("devmem cdev already destroyed"));
	+ destroy_dev_sched_cb(dsc->cdev, devmem_destroy, dsc);
	+ }
	+ destroy_dev_sched_cb(cdev, vmmdev_destroy, sc);
	+ error = 0;
	+
	+out:
	+ free(buf, M_VMMDEV);
	+ return (error);
	+}
	+SYSCTL_PROC(_hw_vmm, OID_AUTO, destroy,
	+ CTLTYPE_STRING \| CTLFLAG_RW \| CTLFLAG_PRISON \| CTLFLAG_MPSAFE,
	+ NULL, 0, sysctl_vmm_destroy, "A",
	+ NULL);
	+
	+static struct cdevsw vmmdevsw = {
	+ .d_name = "vmmdev",
	+ .d_version = D_VERSION,
	+ .d_ioctl = vmmdev_ioctl,
	+ .d_mmap_single = vmmdev_mmap_single,
	+ .d_read = vmmdev_rw,
	+ .d_write = vmmdev_rw,
	+};
	+
	+static int
	+sysctl_vmm_create(SYSCTL_HANDLER_ARGS)
	+{
	+ struct vm *vm;
	+ struct cdev *cdev;
	+ struct vmmdev_softc sc, sc2;
	+ char *buf;
	+ int error, buflen;
	+
	+ error = vmm_priv_check(req->td->td_ucred);
	+ if (error)
	+ return (error);
	+
	+ buflen = VM_MAX_NAMELEN + 1;
	+ buf = malloc(buflen, M_VMMDEV, M_WAITOK \| M_ZERO);
	+ strlcpy(buf, "beavis", buflen);
	+ error = sysctl_handle_string(oidp, buf, buflen, req);
	+ if (error != 0 \|\| req->newptr == NULL)
	+ goto out;
	+
	+ mtx_lock(&vmmdev_mtx);
	+ sc = vmmdev_lookup(buf);
	+ mtx_unlock(&vmmdev_mtx);
	+ if (sc != NULL) {
	+ error = EEXIST;
	+ goto out;
	+ }
	+
	+ error = vm_create(buf, &vm);
	+ if (error != 0)
	+ goto out;
	+
	+ sc = malloc(sizeof(struct vmmdev_softc), M_VMMDEV, M_WAITOK \| M_ZERO);
	+ sc->vm = vm;
	+ SLIST_INIT(&sc->devmem);
	+
	+ /*
	+ * Lookup the name again just in case somebody sneaked in when we
	+ * dropped the lock.
	+ */
	+ mtx_lock(&vmmdev_mtx);
	+ sc2 = vmmdev_lookup(buf);
	+ if (sc2 == NULL) {
	+ SLIST_INSERT_HEAD(&head, sc, link);
	+ sc->flags \|= VSC_LINKED;
	+ }
	+ mtx_unlock(&vmmdev_mtx);
	+
	+ if (sc2 != NULL) {
	+ vmmdev_destroy(sc);
	+ error = EEXIST;
	+ goto out;
	+ }
	+
	+ error = make_dev_p(MAKEDEV_CHECKNAME, &cdev, &vmmdevsw, NULL,
	+ UID_ROOT, GID_WHEEL, 0600, "vmm/%s", buf);
	+ if (error != 0) {
	+ vmmdev_destroy(sc);
	+ goto out;
	+ }
	+
	+ mtx_lock(&vmmdev_mtx);
	+ sc->cdev = cdev;
	+ sc->cdev->si_drv1 = sc;
	+ mtx_unlock(&vmmdev_mtx);
	+
	+out:
	+ free(buf, M_VMMDEV);
	+ return (error);
	+}
	+SYSCTL_PROC(_hw_vmm, OID_AUTO, create,
	+ CTLTYPE_STRING \| CTLFLAG_RW \| CTLFLAG_PRISON \| CTLFLAG_MPSAFE,
	+ NULL, 0, sysctl_vmm_create, "A",
	+ NULL);
	+
	+void
	+vmmdev_init(void)
	+{
	+ mtx_init(&vmmdev_mtx, "vmm device mutex", NULL, MTX_DEF);
	+ pr_allow_flag = prison_add_allow(NULL, "vmm", NULL,
	+ "Allow use of vmm in a jail.");
	+}
	+
	+int
	+vmmdev_cleanup(void)
	+{
	+ int error;
	+
	+ if (SLIST_EMPTY(&head))
	+ error = 0;
	+ else
	+ error = EBUSY;
	+
	+ return (error);
	+}
	+
	+static int
	+devmem_mmap_single(struct cdev cdev, vm_ooffset_t offset, vm_size_t len,
	+ struct vm_object **objp, int nprot)
	+{
	+ struct devmem_softc *dsc;
	+ vm_ooffset_t first, last;
	+ size_t seglen;
	+ int error;
	+ uint16_t lastcpu;
	+ bool sysmem;
	+
	+ dsc = cdev->si_drv1;
	+ if (dsc == NULL) {
	+ /* 'cdev' has been created but is not ready for use */
	+ return (ENXIO);
	+ }
	+
	+ first = *offset;
	+ last = *offset + len;
	+ if ((nprot & PROT_EXEC) \|\| first < 0 \|\| first >= last)
	+ return (EINVAL);
	+
	+ lastcpu = vm_get_maxcpus(dsc->sc->vm) - 1;
	+ error = vcpu_lock_one(dsc->sc, lastcpu);
	+ if (error)
	+ return (error);
	+
	+ error = vm_get_memseg(dsc->sc->vm, dsc->segid, &seglen, &sysmem, objp);
	+ KASSERT(error == 0 && !sysmem && *objp != NULL,
	+ ("%s: invalid devmem segment %d", __func__, dsc->segid));
	+
	+ vcpu_unlock_one(dsc->sc, lastcpu);
	+
	+ if (seglen >= last) {
	+ vm_object_reference(*objp);
	+ return (0);
	+ } else {
	+ return (EINVAL);
	+ }
	+}
	+
	+static struct cdevsw devmemsw = {
	+ .d_name = "devmem",
	+ .d_version = D_VERSION,
	+ .d_mmap_single = devmem_mmap_single,
	+};
	+
	+static int
	+devmem_create_cdev(const char vmname, int segid, char devname)
	+{
	+ struct devmem_softc *dsc;
	+ struct vmmdev_softc *sc;
	+ struct cdev *cdev;
	+ int error;
	+
	+ error = make_dev_p(MAKEDEV_CHECKNAME, &cdev, &devmemsw, NULL,
	+ UID_ROOT, GID_WHEEL, 0600, "vmm.io/%s.%s", vmname, devname);
	+ if (error)
	+ return (error);
	+
	+ dsc = malloc(sizeof(struct devmem_softc), M_VMMDEV, M_WAITOK \| M_ZERO);
	+
	+ mtx_lock(&vmmdev_mtx);
	+ sc = vmmdev_lookup(vmname);
	+ KASSERT(sc != NULL, ("%s: vm %s softc not found", __func__, vmname));
	+ if (sc->cdev == NULL) {
	+ /* virtual machine is being created or destroyed */
	+ mtx_unlock(&vmmdev_mtx);
	+ free(dsc, M_VMMDEV);
	+ destroy_dev_sched_cb(cdev, NULL, 0);
	+ return (ENODEV);
	+ }
	+
	+ dsc->segid = segid;
	+ dsc->name = devname;
	+ dsc->cdev = cdev;
	+ dsc->sc = sc;
	+ SLIST_INSERT_HEAD(&sc->devmem, dsc, link);
	+ mtx_unlock(&vmmdev_mtx);
	+
	+ /* The 'cdev' is ready for use after 'si_drv1' is initialized */
	+ cdev->si_drv1 = dsc;
	+ return (0);
	+}
	+
	+static void
	+devmem_destroy(void *arg)
	+{
	+ struct devmem_softc *dsc = arg;
	+
	+ KASSERT(dsc->cdev, ("%s: devmem cdev already destroyed", __func__));
	+ dsc->cdev = NULL;
	+ dsc->sc = NULL;
	+}
	diff --git a/sys/arm64/vmm/vmm_hyp.c b/sys/arm64/vmm/vmm_hyp.c
	new file mode 100644
	--- /dev/null
	+++ b/sys/arm64/vmm/vmm_hyp.c
	@@ -0,0 +1,822 @@
	+/*-
	+ * SPDX-License-Identifier: BSD-2-Clause
	+ *
	+ * Copyright (c) 2021 Andrew Turner
	+ *
	+ * This work was supported by Innovate UK project 105694, "Digital Security
	+ * by Design (DSbD) Technology Platform Prototype".
	+ *
	+ * Redistribution and use in source and binary forms, with or without
	+ * modification, are permitted provided that the following conditions
	+ * are met:
	+ * 1. Redistributions of source code must retain the above copyright
	+ * notice, this list of conditions and the following disclaimer.
	+ * 2. Redistributions in binary form must reproduce the above copyright
	+ * notice, this list of conditions and the following disclaimer in the
	+ * documentation and/or other materials provided with the distribution.
	+ *
	+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
	+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
	+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	+ * SUCH DAMAGE.
	+ */
	+
	+#include <sys/cdefs.h>
	+#include <sys/types.h>
	+#include <sys/proc.h>
	+
	+#include <machine/armreg.h>
	+
	+#include "arm64.h"
	+#include "hyp.h"
	+
	+struct hypctx;
	+
	+uint64_t vmm_hyp_enter(uint64_t, uint64_t, uint64_t, uint64_t, uint64_t,
	+ uint64_t, uint64_t, uint64_t);
	+uint64_t vmm_enter_guest(struct hypctx *);
	+
	+/* TODO: Make this common between this & vfp.h */
	+static void
	+vfp_store(struct vfpstate *state)
	+{
	+ __uint128_t *vfp_state;
	+ uint64_t fpcr, fpsr;
	+
	+ vfp_state = state->vfp_regs;
	+ __asm __volatile(
	+ "mrs %0, fpcr \n"
	+ "mrs %1, fpsr \n"
	+ "stp q0, q1, [%2, #16 * 0]\n"
	+ "stp q2, q3, [%2, #16 * 2]\n"
	+ "stp q4, q5, [%2, #16 * 4]\n"
	+ "stp q6, q7, [%2, #16 * 6]\n"
	+ "stp q8, q9, [%2, #16 * 8]\n"
	+ "stp q10, q11, [%2, #16 * 10]\n"
	+ "stp q12, q13, [%2, #16 * 12]\n"
	+ "stp q14, q15, [%2, #16 * 14]\n"
	+ "stp q16, q17, [%2, #16 * 16]\n"
	+ "stp q18, q19, [%2, #16 * 18]\n"
	+ "stp q20, q21, [%2, #16 * 20]\n"
	+ "stp q22, q23, [%2, #16 * 22]\n"
	+ "stp q24, q25, [%2, #16 * 24]\n"
	+ "stp q26, q27, [%2, #16 * 26]\n"
	+ "stp q28, q29, [%2, #16 * 28]\n"
	+ "stp q30, q31, [%2, #16 * 30]\n"
	+ : "=&r"(fpcr), "=&r"(fpsr) : "r"(vfp_state));
	+
	+ state->vfp_fpcr = fpcr;
	+ state->vfp_fpsr = fpsr;
	+}
	+
	+static void
	+vfp_restore(struct vfpstate *state)
	+{
	+ __uint128_t *vfp_state;
	+ uint64_t fpcr, fpsr;
	+
	+ vfp_state = state->vfp_regs;
	+ fpcr = state->vfp_fpcr;
	+ fpsr = state->vfp_fpsr;
	+
	+ __asm __volatile(
	+ "ldp q0, q1, [%2, #16 * 0]\n"
	+ "ldp q2, q3, [%2, #16 * 2]\n"
	+ "ldp q4, q5, [%2, #16 * 4]\n"
	+ "ldp q6, q7, [%2, #16 * 6]\n"
	+ "ldp q8, q9, [%2, #16 * 8]\n"
	+ "ldp q10, q11, [%2, #16 * 10]\n"
	+ "ldp q12, q13, [%2, #16 * 12]\n"
	+ "ldp q14, q15, [%2, #16 * 14]\n"
	+ "ldp q16, q17, [%2, #16 * 16]\n"
	+ "ldp q18, q19, [%2, #16 * 18]\n"
	+ "ldp q20, q21, [%2, #16 * 20]\n"
	+ "ldp q22, q23, [%2, #16 * 22]\n"
	+ "ldp q24, q25, [%2, #16 * 24]\n"
	+ "ldp q26, q27, [%2, #16 * 26]\n"
	+ "ldp q28, q29, [%2, #16 * 28]\n"
	+ "ldp q30, q31, [%2, #16 * 30]\n"
	+ "msr fpcr, %0 \n"
	+ "msr fpsr, %1 \n"
	+ : : "r"(fpcr), "r"(fpsr), "r"(vfp_state));
	+}
	+
	+static void
	+vmm_hyp_reg_store(struct hypctx hypctx, struct hyp hyp, bool guest)
	+{
	+ uint64_t dfr0;
	+
	+ /* Store the guest VFP registers */
	+ if (guest) {
	+ vfp_store(&hypctx->vfpstate);
	+
	+ /* Store the timer registers */
	+ hypctx->vtimer_cpu.cntkctl_el1 = READ_SPECIALREG(cntkctl_el1);
	+ hypctx->vtimer_cpu.virt_timer.cntx_cval_el0 =
	+ READ_SPECIALREG(cntv_cval_el0);
	+ hypctx->vtimer_cpu.virt_timer.cntx_ctl_el0 =
	+ READ_SPECIALREG(cntv_ctl_el0);
	+
	+ /* Store the GICv3 registers */
	+ hypctx->vgic_cpu_if.ich_eisr_el2 =
	+ READ_SPECIALREG(ich_eisr_el2);
	+ hypctx->vgic_cpu_if.ich_elrsr_el2 =
	+ READ_SPECIALREG(ich_elrsr_el2);
	+ hypctx->vgic_cpu_if.ich_hcr_el2 = READ_SPECIALREG(ich_hcr_el2);
	+ hypctx->vgic_cpu_if.ich_misr_el2 =
	+ READ_SPECIALREG(ich_misr_el2);
	+ hypctx->vgic_cpu_if.ich_vmcr_el2 =
	+ READ_SPECIALREG(ich_vmcr_el2);
	+ switch(hypctx->vgic_cpu_if.ich_lr_num - 1) {
	+#define STORE_LR(x) \
	+ case x: \
	+ hypctx->vgic_cpu_if.ich_lr_el2[x] = \
	+ READ_SPECIALREG(ich_lr ## x ##_el2)
	+ STORE_LR(15);
	+ STORE_LR(14);
	+ STORE_LR(13);
	+ STORE_LR(12);
	+ STORE_LR(11);
	+ STORE_LR(10);
	+ STORE_LR(9);
	+ STORE_LR(8);
	+ STORE_LR(7);
	+ STORE_LR(6);
	+ STORE_LR(5);
	+ STORE_LR(4);
	+ STORE_LR(3);
	+ STORE_LR(2);
	+ STORE_LR(1);
	+ default:
	+ STORE_LR(0);
	+#undef STORE_LR
	+ }
	+
	+ switch(hypctx->vgic_cpu_if.ich_apr_num - 1) {
	+#define STORE_APR(x) \
	+ case x: \
	+ hypctx->vgic_cpu_if.ich_ap0r_el2[x] = \
	+ READ_SPECIALREG(ich_ap0r ## x ##_el2); \
	+ hypctx->vgic_cpu_if.ich_ap1r_el2[x] = \
	+ READ_SPECIALREG(ich_ap1r ## x ##_el2)
	+ STORE_APR(3);
	+ STORE_APR(2);
	+ STORE_APR(1);
	+ default:
	+ STORE_APR(0);
	+#undef STORE_APR
	+ }
	+ }
	+
	+ dfr0 = READ_SPECIALREG(id_aa64dfr0_el1);
	+ switch(ID_AA64DFR0_BRPs_VAL(dfr0) - 1) {
	+#define STORE_DBG_BRP(x) \
	+ case x: \
	+ hypctx->dbgbcr_el1[x] = \
	+ READ_SPECIALREG(dbgbcr ## x ## _el1); \
	+ hypctx->dbgbvr_el1[x] = \
	+ READ_SPECIALREG(dbgbvr ## x ## _el1)
	+ STORE_DBG_BRP(15);
	+ STORE_DBG_BRP(14);
	+ STORE_DBG_BRP(13);
	+ STORE_DBG_BRP(12);
	+ STORE_DBG_BRP(11);
	+ STORE_DBG_BRP(10);
	+ STORE_DBG_BRP(9);
	+ STORE_DBG_BRP(8);
	+ STORE_DBG_BRP(7);
	+ STORE_DBG_BRP(6);
	+ STORE_DBG_BRP(5);
	+ STORE_DBG_BRP(4);
	+ STORE_DBG_BRP(3);
	+ STORE_DBG_BRP(2);
	+ STORE_DBG_BRP(1);
	+ default:
	+ STORE_DBG_BRP(0);
	+#undef STORE_DBG_BRP
	+ }
	+
	+ switch(ID_AA64DFR0_WRPs_VAL(dfr0) - 1) {
	+#define STORE_DBG_WRP(x) \
	+ case x: \
	+ hypctx->dbgwcr_el1[x] = \
	+ READ_SPECIALREG(dbgwcr ## x ## _el1); \
	+ hypctx->dbgwvr_el1[x] = \
	+ READ_SPECIALREG(dbgwvr ## x ## _el1)
	+ STORE_DBG_WRP(15);
	+ STORE_DBG_WRP(14);
	+ STORE_DBG_WRP(13);
	+ STORE_DBG_WRP(12);
	+ STORE_DBG_WRP(11);
	+ STORE_DBG_WRP(10);
	+ STORE_DBG_WRP(9);
	+ STORE_DBG_WRP(8);
	+ STORE_DBG_WRP(7);
	+ STORE_DBG_WRP(6);
	+ STORE_DBG_WRP(5);
	+ STORE_DBG_WRP(4);
	+ STORE_DBG_WRP(3);
	+ STORE_DBG_WRP(2);
	+ STORE_DBG_WRP(1);
	+ default:
	+ STORE_DBG_WRP(0);
	+#undef STORE_DBG_WRP
	+ }
	+
	+ /* Store the PMU registers */
	+ hypctx->pmcr_el0 = READ_SPECIALREG(pmcr_el0);
	+ hypctx->pmccntr_el0 = READ_SPECIALREG(pmccntr_el0);
	+ hypctx->pmccfiltr_el0 = READ_SPECIALREG(pmccfiltr_el0);
	+ hypctx->pmcntenset_el0 = READ_SPECIALREG(pmcntenset_el0);
	+ hypctx->pmintenset_el1 = READ_SPECIALREG(pmintenset_el1);
	+ hypctx->pmovsset_el0 = READ_SPECIALREG(pmovsset_el0);
	+ hypctx->pmuserenr_el0 = READ_SPECIALREG(pmuserenr_el0);
	+ switch ((hypctx->pmcr_el0 & PMCR_N_MASK) >> PMCR_N_SHIFT) {
	+#define STORE_PMU(x) \
	+ case (x + 1): \
	+ hypctx->pmevcntr_el0[x] = \
	+ READ_SPECIALREG(pmevcntr ## x ## _el0); \
	+ hypctx->pmevtyper_el0[x] = \
	+ READ_SPECIALREG(pmevtyper ## x ## _el0)
	+ STORE_PMU(30);
	+ STORE_PMU(29);
	+ STORE_PMU(28);
	+ STORE_PMU(27);
	+ STORE_PMU(26);
	+ STORE_PMU(25);
	+ STORE_PMU(24);
	+ STORE_PMU(23);
	+ STORE_PMU(22);
	+ STORE_PMU(21);
	+ STORE_PMU(20);
	+ STORE_PMU(19);
	+ STORE_PMU(18);
	+ STORE_PMU(17);
	+ STORE_PMU(16);
	+ STORE_PMU(15);
	+ STORE_PMU(14);
	+ STORE_PMU(13);
	+ STORE_PMU(12);
	+ STORE_PMU(11);
	+ STORE_PMU(10);
	+ STORE_PMU(9);
	+ STORE_PMU(8);
	+ STORE_PMU(7);
	+ STORE_PMU(6);
	+ STORE_PMU(5);
	+ STORE_PMU(4);
	+ STORE_PMU(3);
	+ STORE_PMU(2);
	+ STORE_PMU(1);
	+ STORE_PMU(0);
	+ default: /* N == 0 when only PMCCNTR_EL0 is available */
	+ break;
	+#undef STORE_PMU
	+ }
	+
	+ /* Store the special to from the trapframe */
	+ hypctx->tf.tf_sp = READ_SPECIALREG(sp_el1);
	+ hypctx->tf.tf_elr = READ_SPECIALREG(elr_el2);
	+ hypctx->tf.tf_spsr = READ_SPECIALREG(spsr_el2);
	+ if (guest) {
	+ hypctx->tf.tf_esr = READ_SPECIALREG(esr_el2);
	+ }
	+
	+ /* Store the guest special registers */
	+ hypctx->elr_el1 = READ_SPECIALREG(elr_el1);
	+ hypctx->sp_el0 = READ_SPECIALREG(sp_el0);
	+ hypctx->tpidr_el0 = READ_SPECIALREG(tpidr_el0);
	+ hypctx->tpidrro_el0 = READ_SPECIALREG(tpidrro_el0);
	+ hypctx->tpidr_el1 = READ_SPECIALREG(tpidr_el1);
	+ hypctx->vbar_el1 = READ_SPECIALREG(vbar_el1);
	+
	+ hypctx->actlr_el1 = READ_SPECIALREG(actlr_el1);
	+ hypctx->afsr0_el1 = READ_SPECIALREG(afsr0_el1);
	+ hypctx->afsr1_el1 = READ_SPECIALREG(afsr1_el1);
	+ hypctx->amair_el1 = READ_SPECIALREG(amair_el1);
	+ hypctx->contextidr_el1 = READ_SPECIALREG(contextidr_el1);
	+ hypctx->cpacr_el1 = READ_SPECIALREG(cpacr_el1);
	+ hypctx->csselr_el1 = READ_SPECIALREG(csselr_el1);
	+ hypctx->esr_el1 = READ_SPECIALREG(esr_el1);
	+ hypctx->far_el1 = READ_SPECIALREG(far_el1);
	+ hypctx->mair_el1 = READ_SPECIALREG(mair_el1);
	+ hypctx->mdccint_el1 = READ_SPECIALREG(mdccint_el1);
	+ hypctx->mdscr_el1 = READ_SPECIALREG(mdscr_el1);
	+ hypctx->par_el1 = READ_SPECIALREG(par_el1);
	+ hypctx->sctlr_el1 = READ_SPECIALREG(sctlr_el1);
	+ hypctx->spsr_el1 = READ_SPECIALREG(spsr_el1);
	+ hypctx->tcr_el1 = READ_SPECIALREG(tcr_el1);
	+ hypctx->ttbr0_el1 = READ_SPECIALREG(ttbr0_el1);
	+ hypctx->ttbr1_el1 = READ_SPECIALREG(ttbr1_el1);
	+
	+ hypctx->cptr_el2 = READ_SPECIALREG(cptr_el2);
	+ hypctx->hcr_el2 = READ_SPECIALREG(hcr_el2);
	+ hypctx->vpidr_el2 = READ_SPECIALREG(vpidr_el2);
	+ hypctx->vmpidr_el2 = READ_SPECIALREG(vmpidr_el2);
	+}
	+
	+static void
	+vmm_hyp_reg_restore(struct hypctx hypctx, struct hyp hyp, bool guest)
	+{
	+ uint64_t dfr0;
	+
	+ /* Restore the special registers */
	+ WRITE_SPECIALREG(elr_el1, hypctx->elr_el1);
	+ WRITE_SPECIALREG(sp_el0, hypctx->sp_el0);
	+ WRITE_SPECIALREG(tpidr_el0, hypctx->tpidr_el0);
	+ WRITE_SPECIALREG(tpidrro_el0, hypctx->tpidrro_el0);
	+ WRITE_SPECIALREG(tpidr_el1, hypctx->tpidr_el1);
	+ WRITE_SPECIALREG(vbar_el1, hypctx->vbar_el1);
	+
	+ WRITE_SPECIALREG(actlr_el1, hypctx->actlr_el1);
	+ WRITE_SPECIALREG(afsr0_el1, hypctx->afsr0_el1);
	+ WRITE_SPECIALREG(afsr1_el1, hypctx->afsr1_el1);
	+ WRITE_SPECIALREG(amair_el1, hypctx->amair_el1);
	+ WRITE_SPECIALREG(contextidr_el1, hypctx->contextidr_el1);
	+ WRITE_SPECIALREG(cpacr_el1, hypctx->cpacr_el1);
	+ WRITE_SPECIALREG(csselr_el1, hypctx->csselr_el1);
	+ WRITE_SPECIALREG(esr_el1, hypctx->esr_el1);
	+ WRITE_SPECIALREG(far_el1, hypctx->far_el1);
	+ WRITE_SPECIALREG(mdccint_el1, hypctx->mdccint_el1);
	+ WRITE_SPECIALREG(mdscr_el1, hypctx->mdscr_el1);
	+ WRITE_SPECIALREG(mair_el1, hypctx->mair_el1);
	+ WRITE_SPECIALREG(par_el1, hypctx->par_el1);
	+ WRITE_SPECIALREG(sctlr_el1, hypctx->sctlr_el1);
	+ WRITE_SPECIALREG(tcr_el1, hypctx->tcr_el1);
	+ WRITE_SPECIALREG(ttbr0_el1, hypctx->ttbr0_el1);
	+ WRITE_SPECIALREG(ttbr1_el1, hypctx->ttbr1_el1);
	+ WRITE_SPECIALREG(spsr_el1, hypctx->spsr_el1);
	+
	+ WRITE_SPECIALREG(cptr_el2, hypctx->cptr_el2);
	+ WRITE_SPECIALREG(hcr_el2, hypctx->hcr_el2);
	+ WRITE_SPECIALREG(vpidr_el2, hypctx->vpidr_el2);
	+ WRITE_SPECIALREG(vmpidr_el2, hypctx->vmpidr_el2);
	+
	+ /* Load the special regs from the trapframe */
	+ WRITE_SPECIALREG(sp_el1, hypctx->tf.tf_sp);
	+ WRITE_SPECIALREG(elr_el2, hypctx->tf.tf_elr);
	+ WRITE_SPECIALREG(spsr_el2, hypctx->tf.tf_spsr);
	+
	+ /* Restore the PMU registers */
	+ WRITE_SPECIALREG(pmcr_el0, hypctx->pmcr_el0);
	+ WRITE_SPECIALREG(pmccntr_el0, hypctx->pmccntr_el0);
	+ WRITE_SPECIALREG(pmccfiltr_el0, hypctx->pmccfiltr_el0);
	+ /* Clear all events/interrupts then enable them */
	+ WRITE_SPECIALREG(pmcntenclr_el0, 0xfffffffful);
	+ WRITE_SPECIALREG(pmcntenset_el0, hypctx->pmcntenset_el0);
	+ WRITE_SPECIALREG(pmintenclr_el1, 0xfffffffful);
	+ WRITE_SPECIALREG(pmintenset_el1, hypctx->pmintenset_el1);
	+ WRITE_SPECIALREG(pmovsclr_el0, 0xfffffffful);
	+ WRITE_SPECIALREG(pmovsset_el0, hypctx->pmovsset_el0);
	+
	+ switch ((hypctx->pmcr_el0 & PMCR_N_MASK) >> PMCR_N_SHIFT) {
	+#define LOAD_PMU(x) \
	+ case (x + 1): \
	+ WRITE_SPECIALREG(pmevcntr ## x ## _el0, \
	+ hypctx->pmevcntr_el0[x]); \
	+ WRITE_SPECIALREG(pmevtyper ## x ## _el0, \
	+ hypctx->pmevtyper_el0[x])
	+ LOAD_PMU(30);
	+ LOAD_PMU(29);
	+ LOAD_PMU(28);
	+ LOAD_PMU(27);
	+ LOAD_PMU(26);
	+ LOAD_PMU(25);
	+ LOAD_PMU(24);
	+ LOAD_PMU(23);
	+ LOAD_PMU(22);
	+ LOAD_PMU(21);
	+ LOAD_PMU(20);
	+ LOAD_PMU(19);
	+ LOAD_PMU(18);
	+ LOAD_PMU(17);
	+ LOAD_PMU(16);
	+ LOAD_PMU(15);
	+ LOAD_PMU(14);
	+ LOAD_PMU(13);
	+ LOAD_PMU(12);
	+ LOAD_PMU(11);
	+ LOAD_PMU(10);
	+ LOAD_PMU(9);
	+ LOAD_PMU(8);
	+ LOAD_PMU(7);
	+ LOAD_PMU(6);
	+ LOAD_PMU(5);
	+ LOAD_PMU(4);
	+ LOAD_PMU(3);
	+ LOAD_PMU(2);
	+ LOAD_PMU(1);
	+ LOAD_PMU(0);
	+ default: /* N == 0 when only PMCCNTR_EL0 is available */
	+ break;
	+#undef LOAD_PMU
	+ }
	+
	+ dfr0 = READ_SPECIALREG(id_aa64dfr0_el1);
	+ switch(ID_AA64DFR0_BRPs_VAL(dfr0) - 1) {
	+#define LOAD_DBG_BRP(x) \
	+ case x: \
	+ WRITE_SPECIALREG(dbgbcr ## x ## _el1, \
	+ hypctx->dbgbcr_el1[x]); \
	+ WRITE_SPECIALREG(dbgbvr ## x ## _el1, \
	+ hypctx->dbgbvr_el1[x])
	+ LOAD_DBG_BRP(15);
	+ LOAD_DBG_BRP(14);
	+ LOAD_DBG_BRP(13);
	+ LOAD_DBG_BRP(12);
	+ LOAD_DBG_BRP(11);
	+ LOAD_DBG_BRP(10);
	+ LOAD_DBG_BRP(9);
	+ LOAD_DBG_BRP(8);
	+ LOAD_DBG_BRP(7);
	+ LOAD_DBG_BRP(6);
	+ LOAD_DBG_BRP(5);
	+ LOAD_DBG_BRP(4);
	+ LOAD_DBG_BRP(3);
	+ LOAD_DBG_BRP(2);
	+ LOAD_DBG_BRP(1);
	+ default:
	+ LOAD_DBG_BRP(0);
	+#undef LOAD_DBG_BRP
	+ }
	+
	+ switch(ID_AA64DFR0_WRPs_VAL(dfr0) - 1) {
	+#define LOAD_DBG_WRP(x) \
	+ case x: \
	+ WRITE_SPECIALREG(dbgwcr ## x ## _el1, \
	+ hypctx->dbgwcr_el1[x]); \
	+ WRITE_SPECIALREG(dbgwvr ## x ## _el1, \
	+ hypctx->dbgwvr_el1[x])
	+ LOAD_DBG_WRP(15);
	+ LOAD_DBG_WRP(14);
	+ LOAD_DBG_WRP(13);
	+ LOAD_DBG_WRP(12);
	+ LOAD_DBG_WRP(11);
	+ LOAD_DBG_WRP(10);
	+ LOAD_DBG_WRP(9);
	+ LOAD_DBG_WRP(8);
	+ LOAD_DBG_WRP(7);
	+ LOAD_DBG_WRP(6);
	+ LOAD_DBG_WRP(5);
	+ LOAD_DBG_WRP(4);
	+ LOAD_DBG_WRP(3);
	+ LOAD_DBG_WRP(2);
	+ LOAD_DBG_WRP(1);
	+ default:
	+ LOAD_DBG_WRP(0);
	+#undef LOAD_DBG_WRP
	+ }
	+
	+ if (guest) {
	+ /* Load the timer registers */
	+ WRITE_SPECIALREG(cntkctl_el1, hypctx->vtimer_cpu.cntkctl_el1);
	+ WRITE_SPECIALREG(cntv_cval_el0,
	+ hypctx->vtimer_cpu.virt_timer.cntx_cval_el0);
	+ WRITE_SPECIALREG(cntv_ctl_el0,
	+ hypctx->vtimer_cpu.virt_timer.cntx_ctl_el0);
	+ WRITE_SPECIALREG(cnthctl_el2, hyp->vtimer.cnthctl_el2);
	+ WRITE_SPECIALREG(cntvoff_el2, hyp->vtimer.cntvoff_el2);
	+
	+ /* Load the GICv3 registers */
	+ WRITE_SPECIALREG(ich_hcr_el2, hypctx->vgic_cpu_if.ich_hcr_el2);
	+ WRITE_SPECIALREG(ich_vmcr_el2,
	+ hypctx->vgic_cpu_if.ich_vmcr_el2);
	+ switch(hypctx->vgic_cpu_if.ich_lr_num - 1) {
	+#define LOAD_LR(x) \
	+ case x: \
	+ WRITE_SPECIALREG(ich_lr ## x ##_el2, \
	+ hypctx->vgic_cpu_if.ich_lr_el2[x])
	+ LOAD_LR(15);
	+ LOAD_LR(14);
	+ LOAD_LR(13);
	+ LOAD_LR(12);
	+ LOAD_LR(11);
	+ LOAD_LR(10);
	+ LOAD_LR(9);
	+ LOAD_LR(8);
	+ LOAD_LR(7);
	+ LOAD_LR(6);
	+ LOAD_LR(5);
	+ LOAD_LR(4);
	+ LOAD_LR(3);
	+ LOAD_LR(2);
	+ LOAD_LR(1);
	+ default:
	+ LOAD_LR(0);
	+#undef LOAD_LR
	+ }
	+
	+ switch(hypctx->vgic_cpu_if.ich_apr_num - 1) {
	+#define LOAD_APR(x) \
	+ case x: \
	+ WRITE_SPECIALREG(ich_ap0r ## x ##_el2, \
	+ hypctx->vgic_cpu_if.ich_ap0r_el2[x]); \
	+ WRITE_SPECIALREG(ich_ap1r ## x ##_el2, \
	+ hypctx->vgic_cpu_if.ich_ap1r_el2[x])
	+ LOAD_APR(3);
	+ LOAD_APR(2);
	+ LOAD_APR(1);
	+ default:
	+ LOAD_APR(0);
	+#undef LOAD_APR
	+ }
	+
	+ /* Load the guest VFP registers */
	+ vfp_restore(&hypctx->vfpstate);
	+ }
	+}
	+
	+static uint64_t
	+vmm_hyp_call_guest(struct hyp *hyp, int vcpu)
	+{
	+ struct hypctx host_hypctx;
	+ struct hypctx *hypctx;
	+ uint64_t cntvoff_el2;
	+ uint64_t ich_hcr_el2, ich_vmcr_el2, cnthctl_el2, cntkctl_el1;
	+ uint64_t ret;
	+ uint64_t s1e1r, hpfar_el2;
	+ bool hpfar_valid;
	+
	+ vmm_hyp_reg_store(&host_hypctx, NULL, false);
	+
	+ /* TODO: Check cpuid is valid */
	+ hypctx = &hyp->ctx[vcpu];
	+
	+ /* Save the host special registers */
	+ cnthctl_el2 = READ_SPECIALREG(cnthctl_el2);
	+ cntkctl_el1 = READ_SPECIALREG(cntkctl_el1);
	+ cntvoff_el2 = READ_SPECIALREG(cntvoff_el2);
	+
	+ ich_hcr_el2 = READ_SPECIALREG(ich_hcr_el2);
	+ ich_vmcr_el2 = READ_SPECIALREG(ich_vmcr_el2);
	+
	+ vmm_hyp_reg_restore(hypctx, hyp, true);
	+
	+ /* Load the common hypervisor registers */
	+ WRITE_SPECIALREG(vttbr_el2, hyp->vttbr_el2);
	+
	+ host_hypctx.mdcr_el2 = READ_SPECIALREG(mdcr_el2);
	+ WRITE_SPECIALREG(mdcr_el2, hypctx->mdcr_el2);
	+
	+ /* Call into the guest */
	+ ret = vmm_enter_guest(hypctx);
	+
	+ WRITE_SPECIALREG(mdcr_el2, host_hypctx.mdcr_el2);
	+ isb();
	+
	+ /* Store the exit info */
	+ hypctx->exit_info.far_el2 = READ_SPECIALREG(far_el2);
	+ hpfar_valid = true;
	+ if (ret == EXCP_TYPE_EL1_SYNC) {
	+ switch(ESR_ELx_EXCEPTION(hypctx->tf.tf_esr)) {
	+ case EXCP_INSN_ABORT_L:
	+ case EXCP_DATA_ABORT_L:
	+ /*
	+ * The hpfar_el2 register is valid for:
	+ * - Translaation and Access faults.
	+ * - Translaation, Access, and permission faults on
	+ * the translation table walk on the stage 1 tables.
	+ * - A stage 2 Address size fault.
	+ *
	+ * As we only need it in the first 2 cases we can just
	+ * exclude it on permission faults that are not from
	+ * the stage 1 table walk.
	+ *
	+ * TODO: Add a case for Arm erratum 834220.
	+ */
	+ if ((hypctx->tf.tf_esr & ISS_DATA_S1PTW) != 0)
	+ break;
	+ switch(hypctx->tf.tf_esr & ISS_DATA_DFSC_MASK) {
	+ case ISS_DATA_DFSC_PF_L1:
	+ case ISS_DATA_DFSC_PF_L2:
	+ case ISS_DATA_DFSC_PF_L3:
	+ hpfar_valid = false;
	+ break;
	+ }
	+ break;
	+ }
	+ }
	+ if (hpfar_valid) {
	+ hypctx->exit_info.hpfar_el2 = READ_SPECIALREG(hpfar_el2);
	+ } else {
	+ /*
	+ * TODO: There is a risk the at instruction could cause an
	+ * exception here. We should handle it & return a failure.
	+ */
	+ s1e1r =
	+ arm64_address_translate_s1e1r(hypctx->exit_info.far_el2);
	+ if (PAR_SUCCESS(s1e1r)) {
	+ hpfar_el2 = (s1e1r & PAR_PA_MASK) >> PAR_PA_SHIFT;
	+ hpfar_el2 <<= HPFAR_EL2_FIPA_SHIFT;
	+ hypctx->exit_info.hpfar_el2 = hpfar_el2;
	+ } else {
	+ ret = EXCP_TYPE_REENTER;
	+ }
	+ }
	+
	+ vmm_hyp_reg_store(hypctx, hyp, true);
	+
	+ vmm_hyp_reg_restore(&host_hypctx, NULL, false);
	+
	+ /* Restore the host special registers */
	+ WRITE_SPECIALREG(ich_hcr_el2, ich_hcr_el2);
	+ WRITE_SPECIALREG(ich_vmcr_el2, ich_vmcr_el2);
	+
	+ WRITE_SPECIALREG(cnthctl_el2, cnthctl_el2);
	+ WRITE_SPECIALREG(cntkctl_el1, cntkctl_el1);
	+ WRITE_SPECIALREG(cntvoff_el2, cntvoff_el2);
	+
	+ return (ret);
	+}
	+
	+static uint64_t
	+vmm_hyp_read_reg(uint64_t reg)
	+{
	+ switch(reg) {
	+ case HYP_REG_ICH_VTR:
	+ return (READ_SPECIALREG(ich_vtr_el2));
	+ case HYP_REG_CNTHCTL:
	+ return (READ_SPECIALREG(cnthctl_el2));
	+ }
	+
	+ return (0);
	+}
	+
	+static bool
	+vmm_is_vpipt_cache(void)
	+{
	+ /* TODO: Implement */
	+ return (0);
	+}
	+
	+static int
	+vmm_clean_s2_tlbi(void)
	+{
	+ dsb(ishst);
	+ __asm __volatile("tlbi alle1is");
	+
	+ /*
	+ * If we have a VPIPT icache it will use the VMID to tag cachelines.
	+ * As we are changing the allocated VMIDs we need to invalidate the
	+ * icache lines containing all old values.
	+ */
	+ if (vmm_is_vpipt_cache())
	+ __asm __volatile("ic ialluis");
	+ dsb(ish);
	+
	+ return (0);
	+}
	+
	+static int
	+vm_s2_tlbi_range(uint64_t vttbr, vm_offset_t sva, vm_size_t eva,
	+ bool final_only)
	+{
	+ uint64_t end, r, start;
	+ uint64_t host_vttbr;
	+
	+#define TLBI_VA_SHIFT 12
	+#define TLBI_VA_MASK ((1ul << 44) - 1)
	+#define TLBI_VA(addr) (((addr) >> TLBI_VA_SHIFT) & TLBI_VA_MASK)
	+#define TLBI_VA_L3_INCR (L3_SIZE >> TLBI_VA_SHIFT)
	+
	+ /* Switch to the guest vttbr */
	+ /* TODO: Handle Cortex-A57/A72 erratum 131936 */
	+ host_vttbr = READ_SPECIALREG(vttbr_el2);
	+ WRITE_SPECIALREG(vttbr_el2, vttbr);
	+ isb();
	+
	+ /*
	+ * The CPU can cache the stage 1 + 2 combination so we need to ensure
	+ * the stage 2 is invalidated first, then when this has completed we
	+ * invalidate the stage 1 TLB. As we don't know which stage 1 virtual
	+ * addresses point at the stage 2 IPA we need to invalidate the entire
	+ * stage 1 TLB.
	+ */
	+
	+ start = TLBI_VA(sva);
	+ end = TLBI_VA(eva);
	+ for (r = start; r < end; r += TLBI_VA_L3_INCR) {
	+ /* Invalidate the stage 2 TLB entry */
	+ if (final_only)
	+ __asm __volatile("tlbi ipas2le1is, %0" : : "r"(r));
	+ else
	+ __asm __volatile("tlbi ipas2e1is, %0" : : "r"(r));
	+ }
	+ /* Ensure the entry has been invalidated */
	+ dsb(ish);
	+ /* Invalidate the stage 1 TLB. */
	+ __asm __volatile("tlbi vmalle1is");
	+ dsb(ish);
	+ isb();
	+
	+ /* Switch back t othe host vttbr */
	+ WRITE_SPECIALREG(vttbr_el2, host_vttbr);
	+ isb();
	+
	+ return (0);
	+}
	+
	+static int
	+vm_s2_tlbi_all(uint64_t vttbr)
	+{
	+ uint64_t host_vttbr;
	+
	+ /* Switch to the guest vttbr */
	+ /* TODO: Handle Cortex-A57/A72 erratum 131936 */
	+ host_vttbr = READ_SPECIALREG(vttbr_el2);
	+ WRITE_SPECIALREG(vttbr_el2, vttbr);
	+ isb();
	+
	+ __asm __volatile("tlbi vmalls12e1is");
	+ dsb(ish);
	+ isb();
	+
	+ /* Switch back t othe host vttbr */
	+ WRITE_SPECIALREG(vttbr_el2, host_vttbr);
	+ isb();
	+
	+ return (0);
	+}
	+
	+static int
	+vmm_dc_civac(uint64_t start, uint64_t len)
	+{
	+ size_t line_size, end;
	+ uint64_t ctr;
	+
	+ ctr = READ_SPECIALREG(ctr_el0);
	+ line_size = sizeof(int) << CTR_DLINE_SIZE(ctr);
	+ end = start + len;
	+ dsb(ishst);
	+ /* Clean and Invalidate the D-cache */
	+ for (; start < end; start += line_size)
	+ __asm __volatile("dc civac, %0" :: "r" (start) : "memory");
	+ dsb(ish);
	+ return (0);
	+}
	+
	+static int
	+vmm_el2_tlbi(uint64_t type, uint64_t start, uint64_t len)
	+{
	+ uint64_t end, r;
	+
	+ dsb(ishst);
	+ switch (type) {
	+ default:
	+ case HYP_EL2_TLBI_ALL:
	+ __asm __volatile("tlbi alle2" ::: "memory");
	+ break;
	+ case HYP_EL2_TLBI_VA:
	+ end = (start + len) >> 12;
	+ start >>= 12;
	+ while (start < end) {
	+ /* TODO: Use new macros when merged past them */
	+ r = start & 0xffffffffffful;
	+ __asm __volatile("tlbi vae2is, %0" :: "r"(r));
	+ start += PAGE_SIZE;
	+ }
	+ break;
	+ }
	+ dsb(ish);
	+
	+ return (0);
	+}
	+
	+uint64_t
	+vmm_hyp_enter(uint64_t handle, uint64_t x1, uint64_t x2, uint64_t x3,
	+ uint64_t x4, uint64_t x5, uint64_t x6, uint64_t x7)
	+{
	+ uint64_t ret;
	+
	+ switch (handle) {
	+ case HYP_ENTER_GUEST:
	+ do {
	+ ret = vmm_hyp_call_guest((struct hyp *)x1, x2);
	+ } while (ret == EXCP_TYPE_REENTER);
	+ return (ret);
	+ case HYP_READ_REGISTER:
	+ return (vmm_hyp_read_reg(x1));
	+ case HYP_CLEAN_S2_TLBI:
	+ return (vmm_clean_s2_tlbi());
	+ case HYP_DC_CIVAC:
	+ return (vmm_dc_civac(x1, x2));
	+ case HYP_EL2_TLBI:
	+ return (vmm_el2_tlbi(x1, x2, x3));
	+ case HYP_S2_TLBI_RANGE:
	+ return (vm_s2_tlbi_range(x1, x2, x3, x4));
	+ case HYP_S2_TLBI_ALL:
	+ return (vm_s2_tlbi_all(x1));
	+ case HYP_CLEANUP: /* Handled in vmm_hyp_exception.S */
	+ default:
	+ break;
	+ }
	+
	+ return (0);
	+}
	diff --git a/sys/arm64/vmm/vmm_hyp_el2.S b/sys/arm64/vmm/vmm_hyp_el2.S
	new file mode 100644
	--- /dev/null
	+++ b/sys/arm64/vmm/vmm_hyp_el2.S
	@@ -0,0 +1,39 @@
	+/*-
	+ * SPDX-License-Identifier: BSD-2-Clause
	+ *
	+ * Copyright (c) 2021 Andrew Turner
	+ *
	+ * This work was supported by Innovate UK project 105694, "Digital Security
	+ * by Design (DSbD) Technology Platform Prototype".
	+ *
	+ * Redistribution and use in source and binary forms, with or without
	+ * modification, are permitted provided that the following conditions
	+ * are met:
	+ * 1. Redistributions of source code must retain the above copyright
	+ * notice, this list of conditions and the following disclaimer.
	+ * 2. Redistributions in binary form must reproduce the above copyright
	+ * notice, this list of conditions and the following disclaimer in the
	+ * documentation and/or other materials provided with the distribution.
	+ *
	+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
	+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
	+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	+ * SUCH DAMAGE.
	+ */
	+
	+#include <machine/param.h>
	+
	+ .rodata
	+ .align PAGE_SHIFT
	+ .globl vmm_hyp_code
	+vmm_hyp_code:
	+ .incbin "vmm_hyp_blob.bin"
	+ .globl vmm_hyp_code_end
	+vmm_hyp_code_end:
	diff --git a/sys/arm64/vmm/vmm_hyp_exception.S b/sys/arm64/vmm/vmm_hyp_exception.S
	new file mode 100644
	--- /dev/null
	+++ b/sys/arm64/vmm/vmm_hyp_exception.S
	@@ -0,0 +1,383 @@
	+/*
	+ * Copyright (C) 2017 Alexandru Elisei <alexandru.elisei@gmail.com>
	+ * All rights reserved.
	+ * Copyright (c) 2021 Andrew Turner
	+ *
	+ * This software was developed by Alexandru Elisei under sponsorship
	+ * from the FreeBSD Foundation.
	+ *
	+ * Redistribution and use in source and binary forms, with or without
	+ * modification, are permitted provided that the following conditions
	+ * are met:
	+ * 1. Redistributions of source code must retain the above copyright
	+ * notice, this list of conditions and the following disclaimer.
	+ * 2. Redistributions in binary form must reproduce the above copyright
	+ * notice, this list of conditions and the following disclaimer in the
	+ * documentation and/or other materials provided with the distribution.
	+ *
	+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
	+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
	+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	+ * SUCH DAMAGE.
	+ */
	+
	+
	+#include <machine/asm.h>
	+#include <machine/hypervisor.h>
	+
	+#include "assym.inc"
	+#include "hyp.h"
	+
	+.macro save_host_registers
	+ /* TODO: Only store callee saved registers */
	+ sub sp, sp, #(32 * 8)
	+ str x30, [sp, #(30 * 8)]
	+ stp x28, x29, [sp, #(28 * 8)]
	+ stp x26, x27, [sp, #(26 * 8)]
	+ stp x24, x25, [sp, #(24 * 8)]
	+ stp x22, x23, [sp, #(22 * 8)]
	+ stp x20, x21, [sp, #(20 * 8)]
	+ stp x18, x19, [sp, #(18 * 8)]
	+ stp x16, x17, [sp, #(16 * 8)]
	+ stp x14, x15, [sp, #(14 * 8)]
	+ stp x12, x13, [sp, #(12 * 8)]
	+ stp x10, x11, [sp, #(10 * 8)]
	+ stp x8, x9, [sp, #(8 * 8)]
	+ stp x6, x7, [sp, #(6 * 8)]
	+ stp x4, x5, [sp, #(4 * 8)]
	+ stp x2, x3, [sp, #(2 * 8)]
	+ stp x0, x1, [sp, #(0 * 8)]
	+.endm
	+
	+.macro restore_host_registers
	+ /* TODO: Only restore callee saved registers */
	+ ldp x0, x1, [sp, #(0 * 8)]
	+ ldp x2, x3, [sp, #(2 * 8)]
	+ ldp x4, x5, [sp, #(4 * 8)]
	+ ldp x6, x7, [sp, #(6 * 8)]
	+ ldp x8, x9, [sp, #(8 * 8)]
	+ ldp x10, x11, [sp, #(10 * 8)]
	+ ldp x12, x13, [sp, #(12 * 8)]
	+ ldp x14, x15, [sp, #(14 * 8)]
	+ ldp x16, x17, [sp, #(16 * 8)]
	+ ldp x18, x19, [sp, #(18 * 8)]
	+ ldp x20, x21, [sp, #(20 * 8)]
	+ ldp x22, x23, [sp, #(22 * 8)]
	+ ldp x24, x25, [sp, #(24 * 8)]
	+ ldp x26, x27, [sp, #(26 * 8)]
	+ ldp x28, x29, [sp, #(28 * 8)]
	+ ldr x30, [sp, #(30 * 8)]
	+ add sp, sp, #(32 * 8)
	+.endm
	+
	+.macro save_guest_registers
	+ /* Back up x0 so we can use it as a temporary register */
	+ stp x0, x1, [sp, #-(2 * 8)]!
	+
	+ /* Restore the hypctx pointer */
	+ mrs x0, tpidr_el2
	+
	+ stp x2, x3, [x0, #(TF_X + 2 * 8)]
	+ stp x4, x5, [x0, #(TF_X + 4 * 8)]
	+ stp x6, x7, [x0, #(TF_X + 6 * 8)]
	+ stp x8, x9, [x0, #(TF_X + 8 * 8)]
	+ stp x10, x11, [x0, #(TF_X + 10 * 8)]
	+ stp x12, x13, [x0, #(TF_X + 12 * 8)]
	+ stp x14, x15, [x0, #(TF_X + 14 * 8)]
	+ stp x16, x17, [x0, #(TF_X + 16 * 8)]
	+ stp x18, x19, [x0, #(TF_X + 18 * 8)]
	+ stp x20, x21, [x0, #(TF_X + 20 * 8)]
	+ stp x22, x23, [x0, #(TF_X + 22 * 8)]
	+ stp x24, x25, [x0, #(TF_X + 24 * 8)]
	+ stp x26, x27, [x0, #(TF_X + 26 * 8)]
	+ stp x28, x29, [x0, #(TF_X + 28 * 8)]
	+
	+ str lr, [x0, #(TF_LR)]
	+
	+ /* Restore the saved x0 & x1 and save them */
	+ ldp x2, x3, [sp], #(2 * 8)
	+ stp x2, x3, [x0, #(TF_X + 0 * 8)]
	+.endm
	+
	+.macro restore_guest_registers
	+ /*
	+ * Copy the guest x0 and x1 to the stack so we can restore them
	+ * after loading the other registers.
	+ */
	+ ldp x2, x3, [x0, #(TF_X + 0 * 8)]
	+ stp x2, x3, [sp, #-(2 * 8)]!
	+
	+ ldr lr, [x0, #(TF_LR)]
	+
	+ ldp x28, x29, [x0, #(TF_X + 28 * 8)]
	+ ldp x26, x27, [x0, #(TF_X + 26 * 8)]
	+ ldp x24, x25, [x0, #(TF_X + 24 * 8)]
	+ ldp x22, x23, [x0, #(TF_X + 22 * 8)]
	+ ldp x20, x21, [x0, #(TF_X + 20 * 8)]
	+ ldp x18, x19, [x0, #(TF_X + 18 * 8)]
	+ ldp x16, x17, [x0, #(TF_X + 16 * 8)]
	+ ldp x14, x15, [x0, #(TF_X + 14 * 8)]
	+ ldp x12, x13, [x0, #(TF_X + 12 * 8)]
	+ ldp x10, x11, [x0, #(TF_X + 10 * 8)]
	+ ldp x8, x9, [x0, #(TF_X + 8 * 8)]
	+ ldp x6, x7, [x0, #(TF_X + 6 * 8)]
	+ ldp x4, x5, [x0, #(TF_X + 4 * 8)]
	+ ldp x2, x3, [x0, #(TF_X + 2 * 8)]
	+
	+ ldp x0, x1, [sp], #(2 * 8)
	+.endm
	+
	+.macro vempty
	+ .align 7
	+ 1: b 1b
	+.endm
	+
	+.macro vector name
	+ .align 7
	+ b handle_\name
	+.endm
	+
	+ .section ".vmm_vectors","ax"
	+ .align 11
	+hyp_init_vectors:
	+ vempty /* Synchronous EL2t */
	+ vempty /* IRQ EL2t */
	+ vempty /* FIQ EL2t */
	+ vempty /* Error EL2t */
	+
	+ vempty /* Synchronous EL2h */
	+ vempty /* IRQ EL2h */
	+ vempty /* FIQ EL2h */
	+ vempty /* Error EL2h */
	+
	+ vector hyp_init /* Synchronous 64-bit EL1 */
	+ vempty /* IRQ 64-bit EL1 */
	+ vempty /* FIQ 64-bit EL1 */
	+ vempty /* Error 64-bit EL1 */
	+
	+ vempty /* Synchronous 32-bit EL1 */
	+ vempty /* IRQ 32-bit EL1 */
	+ vempty /* FIQ 32-bit EL1 */
	+ vempty /* Error 32-bit EL1 */
	+
	+ .text
	+ .align 11
	+hyp_vectors:
	+ vempty /* Synchronous EL2t */
	+ vempty /* IRQ EL2t */
	+ vempty /* FIQ EL2t */
	+ vempty /* Error EL2t */
	+
	+ vector el2_el2h_sync /* Synchronous EL2h */
	+ vector el2_el2h_irq /* IRQ EL2h */
	+ vector el2_el2h_fiq /* FIQ EL2h */
	+ vector el2_el2h_error /* Error EL2h */
	+
	+ vector el2_el1_sync64 /* Synchronous 64-bit EL1 */
	+ vector el2_el1_irq64 /* IRQ 64-bit EL1 */
	+ vector el2_el1_fiq64 /* FIQ 64-bit EL1 */
	+ vector el2_el1_error64 /* Error 64-bit EL1 */
	+
	+ vempty /* Synchronous 32-bit EL1 */
	+ vempty /* IRQ 32-bit EL1 */
	+ vempty /* FIQ 32-bit EL1 */
	+ vempty /* Error 32-bit EL1 */
	+
	+/*
	+ * Initialize the hypervisor mode with a new exception vector table, translation
	+ * table and stack.
	+ *
	+ * Expecting:
	+ * x0 - translation tables physical address
	+ * x1 - stack top virtual address
	+ * x2 - TCR_EL2 value
	+ * x3 - SCTLR_EL2 value
	+ * x4 - VTCR_EL2 value
	+ */
	+LENTRY(handle_hyp_init)
	+ /* Install the new exception vectors */
	+ adrp x6, hyp_vectors
	+ add x6, x6, :lo12:hyp_vectors
	+ msr vbar_el2, x6
	+ /* Set the stack top address */
	+ mov sp, x1
	+ /* Use the host VTTBR_EL2 to tell the host and the guests apart */
	+ mov x9, #VTTBR_HOST
	+ msr vttbr_el2, x9
	+ /* Load the base address for the translation tables */
	+ msr ttbr0_el2, x0
	+ /* Invalidate the TLB */
	+ tlbi alle2
	+ /* Use the same memory attributes as EL1 */
	+ mrs x9, mair_el1
	+ msr mair_el2, x9
	+ /* Configure address translation */
	+ msr tcr_el2, x2
	+ isb
	+ /* Set the system control register for EL2 */
	+ msr sctlr_el2, x3
	+ /* Set the Stage 2 translation control register */
	+ msr vtcr_el2, x4
	+ /* Return success */
	+ mov x0, #0
	+ /* MMU is up and running */
	+ ERET
	+LEND(handle_hyp_init)
	+
	+.macro do_world_switch_to_host
	+ save_guest_registers
	+ restore_host_registers
	+
	+ /* Restore host VTTBR */
	+ mov x9, #VTTBR_HOST
	+ msr vttbr_el2, x9
	+.endm
	+
	+
	+.macro handle_el2_excp type
	+ /* Save registers before modifying so we can restore them */
	+ str x9, [sp, #-16]!
	+
	+ /* Test if the exception happened when the host was running */
	+ mrs x9, vttbr_el2
	+ cmp x9, #VTTBR_HOST
	+ beq 1f
	+
	+ /* We got the exception while the guest was running */
	+ ldr x9, [sp], #16
	+ do_world_switch_to_host
	+ mov x0, \type
	+ ret
	+
	+1:
	+ /* We got the exception while the host was running */
	+ ldr x9, [sp], #16
	+ mov x0, \type
	+ eret
	+.endm
	+
	+
	+LENTRY(handle_el2_el2h_sync)
	+ handle_el2_excp #EXCP_TYPE_EL2_SYNC
	+LEND(handle_el2_el2h_sync)
	+
	+LENTRY(handle_el2_el2h_irq)
	+ handle_el2_excp #EXCP_TYPE_EL2_IRQ
	+LEND(handle_el2_el2h_irq)
	+
	+LENTRY(handle_el2_el2h_fiq)
	+ handle_el2_excp #EXCP_TYPE_EL2_FIQ
	+LEND(handle_el2_el2h_fiq)
	+
	+LENTRY(handle_el2_el2h_error)
	+ handle_el2_excp #EXCP_TYPE_EL2_ERROR
	+LEND(handle_el2_el2h_error)
	+
	+
	+LENTRY(handle_el2_el1_sync64)
	+ /* Save registers before modifying so we can restore them */
	+ str x9, [sp, #-16]!
	+
	+ /* Check for host hypervisor call */
	+ mrs x9, vttbr_el2
	+ cmp x9, #VTTBR_HOST
	+ ldr x9, [sp], #16 /* Restore the temp register */
	+ bne 1f
	+
	+ /*
	+ * Called from the host
	+ */
	+
	+ /* Check if this is a cleanup call and handle in a controlled state */
	+ cmp x0, #(HYP_CLEANUP)
	+ b.eq vmm_cleanup
	+
	+ str lr, [sp, #-16]!
	+ bl vmm_hyp_enter
	+ ldr lr, [sp], #16
	+ ERET
	+
	+1: /* Guest exception taken to EL2 */
	+ do_world_switch_to_host
	+ mov x0, #EXCP_TYPE_EL1_SYNC
	+ ret
	+LEND(handle_el2_el1_sync64)
	+
	+/*
	+ * We only trap IRQ, FIQ and SError exceptions when a guest is running. Do a
	+ * world switch to host to handle these exceptions.
	+ */
	+
	+LENTRY(handle_el2_el1_irq64)
	+ do_world_switch_to_host
	+ str x9, [sp, #-16]!
	+ mrs x9, ich_misr_el2
	+ cmp x9, xzr
	+ beq 1f
	+ mov x0, #EXCP_TYPE_MAINT_IRQ
	+ b 2f
	+1:
	+ mov x0, #EXCP_TYPE_EL1_IRQ
	+2:
	+ ldr x9, [sp], #16
	+ ret
	+LEND(handle_el2_el1_irq)
	+
	+LENTRY(handle_el2_el1_fiq64)
	+ do_world_switch_to_host
	+ mov x0, #EXCP_TYPE_EL1_FIQ
	+ ret
	+LEND(handle_el2_el1_fiq64)
	+
	+LENTRY(handle_el2_el1_error64)
	+ do_world_switch_to_host
	+ mov x0, #EXCP_TYPE_EL1_ERROR
	+ ret
	+LEND(handle_el2_el1_error64)
	+
	+
	+/*
	+ * Usage:
	+ * uint64_t vmm_enter_guest(struct hypctx *hypctx)
	+ *
	+ * Expecting:
	+ * x0 - hypctx address
	+ */
	+ENTRY(vmm_enter_guest)
	+ /* Save hypctx address */
	+ msr tpidr_el2, x0
	+
	+ save_host_registers
	+ restore_guest_registers
	+
	+ /* Enter guest */
	+ eret
	+END(vmm_enter_guest)
	+
	+/*
	+ * Usage:
	+ * void vmm_cleanup(uint64_t handle, void *hyp_stub_vectors)
	+ *
	+ * Expecting:
	+ * x1 - physical address of hyp_stub_vectors
	+ */
	+LENTRY(vmm_cleanup)
	+ /* Restore the stub vectors */
	+ msr vbar_el2, x1
	+
	+ /* Disable the MMU */
	+ dsb sy
	+ mrs x2, sctlr_el2
	+ bic x2, x2, #SCTLR_EL2_M
	+ msr sctlr_el2, x2
	+ isb
	+
	+ ERET
	+LEND(vmm_cleanup)
	diff --git a/sys/arm64/vmm/vmm_instruction_emul.c b/sys/arm64/vmm/vmm_instruction_emul.c
	new file mode 100644
	--- /dev/null
	+++ b/sys/arm64/vmm/vmm_instruction_emul.c
	@@ -0,0 +1,98 @@
	+/*
	+ * Copyright (C) 2015 Mihai Carabas <mihai.carabas@gmail.com>
	+ * All rights reserved.
	+ *
	+ * Redistribution and use in source and binary forms, with or without
	+ * modification, are permitted provided that the following conditions
	+ * are met:
	+ * 1. Redistributions of source code must retain the above copyright
	+ * notice, this list of conditions and the following disclaimer.
	+ * 2. Redistributions in binary form must reproduce the above copyright
	+ * notice, this list of conditions and the following disclaimer in the
	+ * documentation and/or other materials provided with the distribution.
	+ *
	+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
	+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
	+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	+ * SUCH DAMAGE.
	+ */
	+
	+#ifdef _KERNEL
	+#include <sys/param.h>
	+#include <sys/pcpu.h>
	+#include <sys/systm.h>
	+#include <sys/proc.h>
	+
	+#include <vm/vm.h>
	+
	+#include <machine/machdep.h>
	+#include <machine/vmm.h>
	+
	+#else
	+#include <sys/types.h>
	+#include <sys/errno.h>
	+#include <sys/_iovec.h>
	+#include <stdio.h>
	+#include <stdlib.h>
	+
	+#include <machine/vmm.h>
	+
	+#include <assert.h>
	+#include <vmmapi.h>
	+#endif
	+
	+#include <machine/vmm_instruction_emul.h>
	+
	+int
	+vmm_emulate_instruction(void vm, int vcpuid, uint64_t gpa, struct vie vie,
	+ struct vm_guest_paging *paging, mem_region_read_t memread,
	+ mem_region_write_t memwrite, void *memarg)
	+{
	+ uint64_t val;
	+ int error;
	+
	+ if (vie->dir == VM_DIR_READ) {
	+ error = memread(vm, vcpuid, gpa, &val, vie->access_size, memarg);
	+ if (error)
	+ goto out;
	+ error = vm_set_register(vm, vcpuid, vie->reg, val);
	+ } else {
	+ error = vm_get_register(vm, vcpuid, vie->reg, &val);
	+ if (error)
	+ goto out;
	+ error = memwrite(vm, vcpuid, gpa, val, vie->access_size, memarg);
	+ }
	+
	+out:
	+ return (error);
	+}
	+
	+int
	+vmm_emulate_register(void vm, int vcpuid, struct vre vre, reg_read_t regread,
	+ reg_write_t regwrite, void *regarg)
	+{
	+ uint64_t val;
	+ int error;
	+
	+ if (vre->dir == VM_DIR_READ) {
	+ error = regread(vm, vcpuid, &val, regarg);
	+ if (error)
	+ goto out;
	+ error = vm_set_register(vm, vcpuid, vre->reg, val);
	+ } else {
	+ error = vm_get_register(vm, vcpuid, vre->reg, &val);
	+ if (error)
	+ goto out;
	+ error = regwrite(vm, vcpuid, val, regarg);
	+ }
	+
	+out:
	+ return (error);
	+}
	diff --git a/sys/arm64/vmm/vmm_ktr.h b/sys/arm64/vmm/vmm_ktr.h
	new file mode 100644
	--- /dev/null
	+++ b/sys/arm64/vmm/vmm_ktr.h
	@@ -0,0 +1,71 @@
	+/*-
	+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
	+ *
	+ * Copyright (c) 2011 NetApp, Inc.
	+ * All rights reserved.
	+ *
	+ * Redistribution and use in source and binary forms, with or without
	+ * modification, are permitted provided that the following conditions
	+ * are met:
	+ * 1. Redistributions of source code must retain the above copyright
	+ * notice, this list of conditions and the following disclaimer.
	+ * 2. Redistributions in binary form must reproduce the above copyright
	+ * notice, this list of conditions and the following disclaimer in the
	+ * documentation and/or other materials provided with the distribution.
	+ *
	+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
	+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	+ * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
	+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	+ * SUCH DAMAGE.
	+ *
	+ * $FreeBSD$
	+ */
	+
	+#ifndef _VMM_KTR_H_
	+#define _VMM_KTR_H_
	+
	+#include <sys/ktr.h>
	+#include <sys/pcpu.h>
	+
	+#ifndef KTR_VMM
	+#define KTR_VMM KTR_GEN
	+#endif
	+
	+#define VCPU_CTR0(vm, vcpuid, format) \
	+CTR2(KTR_VMM, "vm %s[%d]: " format, vm_name((vm)), (vcpuid))
	+
	+#define VCPU_CTR1(vm, vcpuid, format, p1) \
	+CTR3(KTR_VMM, "vm %s[%d]: " format, vm_name((vm)), (vcpuid), (p1))
	+
	+#define VCPU_CTR2(vm, vcpuid, format, p1, p2) \
	+CTR4(KTR_VMM, "vm %s[%d]: " format, vm_name((vm)), (vcpuid), (p1), (p2))
	+
	+#define VCPU_CTR3(vm, vcpuid, format, p1, p2, p3) \
	+CTR5(KTR_VMM, "vm %s[%d]: " format, vm_name((vm)), (vcpuid), (p1), (p2), (p3))
	+
	+#define VCPU_CTR4(vm, vcpuid, format, p1, p2, p3, p4) \
	+CTR6(KTR_VMM, "vm %s[%d]: " format, vm_name((vm)), (vcpuid), \
	+ (p1), (p2), (p3), (p4))
	+
	+#define VM_CTR0(vm, format) \
	+CTR1(KTR_VMM, "vm %s: " format, vm_name((vm)))
	+
	+#define VM_CTR1(vm, format, p1) \
	+CTR2(KTR_VMM, "vm %s: " format, vm_name((vm)), (p1))
	+
	+#define VM_CTR2(vm, format, p1, p2) \
	+CTR3(KTR_VMM, "vm %s: " format, vm_name((vm)), (p1), (p2))
	+
	+#define VM_CTR3(vm, format, p1, p2, p3) \
	+CTR4(KTR_VMM, "vm %s: " format, vm_name((vm)), (p1), (p2), (p3))
	+
	+#define VM_CTR4(vm, format, p1, p2, p3, p4) \
	+CTR5(KTR_VMM, "vm %s: " format, vm_name((vm)), (p1), (p2), (p3), (p4))
	+#endif
	diff --git a/sys/arm64/vmm/vmm_mem.h b/sys/arm64/vmm/vmm_mem.h
	new file mode 100644
	--- /dev/null
	+++ b/sys/arm64/vmm/vmm_mem.h
	@@ -0,0 +1,43 @@
	+/*-
	+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
	+ *
	+ * Copyright (c) 2011 NetApp, Inc.
	+ * All rights reserved.
	+ *
	+ * Redistribution and use in source and binary forms, with or without
	+ * modification, are permitted provided that the following conditions
	+ * are met:
	+ * 1. Redistributions of source code must retain the above copyright
	+ * notice, this list of conditions and the following disclaimer.
	+ * 2. Redistributions in binary form must reproduce the above copyright
	+ * notice, this list of conditions and the following disclaimer in the
	+ * documentation and/or other materials provided with the distribution.
	+ *
	+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
	+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	+ * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
	+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	+ * SUCH DAMAGE.
	+ *
	+ * $FreeBSD$
	+ */
	+
	+#ifndef _VMM_MEM_H_
	+#define _VMM_MEM_H_
	+
	+struct vmspace;
	+struct vm_object;
	+
	+int vmm_mem_init(void);
	+struct vm_object vmm_mmio_alloc(struct vmspace , vm_paddr_t gpa, size_t len,
	+ vm_paddr_t hpa);
	+void vmm_mmio_free(struct vmspace *, vm_paddr_t gpa, size_t size);
	+vm_paddr_t vmm_mem_maxaddr(void);
	+
	+#endif
	diff --git a/sys/arm64/vmm/vmm_mem.c b/sys/arm64/vmm/vmm_mem.c
	new file mode 100644
	--- /dev/null
	+++ b/sys/arm64/vmm/vmm_mem.c
	@@ -0,0 +1,124 @@
	+/*-
	+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
	+ *
	+ * Copyright (c) 2011 NetApp, Inc.
	+ * All rights reserved.
	+ *
	+ * Redistribution and use in source and binary forms, with or without
	+ * modification, are permitted provided that the following conditions
	+ * are met:
	+ * 1. Redistributions of source code must retain the above copyright
	+ * notice, this list of conditions and the following disclaimer.
	+ * 2. Redistributions in binary form must reproduce the above copyright
	+ * notice, this list of conditions and the following disclaimer in the
	+ * documentation and/or other materials provided with the distribution.
	+ *
	+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
	+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	+ * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
	+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	+ * SUCH DAMAGE.
	+ *
	+ * $FreeBSD$
	+ */
	+
	+#include <sys/cdefs.h>
	+__FBSDID("$FreeBSD$");
	+
	+#include <sys/param.h>
	+#include <sys/systm.h>
	+#include <sys/malloc.h>
	+#include <sys/sglist.h>
	+#include <sys/lock.h>
	+#include <sys/rwlock.h>
	+
	+#include <vm/vm.h>
	+#include <vm/vm_param.h>
	+#include <vm/pmap.h>
	+#include <vm/vm_map.h>
	+#include <vm/vm_object.h>
	+#include <vm/vm_page.h>
	+#include <vm/vm_pager.h>
	+
	+#include <machine/md_var.h>
	+
	+#include "vmm_mem.h"
	+
	+int
	+vmm_mem_init(void)
	+{
	+
	+ return (0);
	+}
	+
	+vm_object_t
	+vmm_mmio_alloc(struct vmspace *vmspace, vm_paddr_t gpa, size_t len,
	+ vm_paddr_t hpa)
	+{
	+ int error;
	+ vm_object_t obj;
	+ struct sglist *sg;
	+
	+ sg = sglist_alloc(1, M_WAITOK);
	+ error = sglist_append_phys(sg, hpa, len);
	+ KASSERT(error == 0, ("error %d appending physaddr to sglist", error));
	+
	+ obj = vm_pager_allocate(OBJT_SG, sg, len, VM_PROT_RW, 0, NULL);
	+ if (obj != NULL) {
	+ /*
	+ * VT-x ignores the MTRR settings when figuring out the
	+ * memory type for translations obtained through EPT.
	+ *
	+ * Therefore we explicitly force the pages provided by
	+ * this object to be mapped as uncacheable.
	+ */
	+ VM_OBJECT_WLOCK(obj);
	+ error = vm_object_set_memattr(obj, VM_MEMATTR_UNCACHEABLE);
	+ VM_OBJECT_WUNLOCK(obj);
	+ if (error != KERN_SUCCESS) {
	+ panic("vmm_mmio_alloc: vm_object_set_memattr error %d",
	+ error);
	+ }
	+ error = vm_map_find(&vmspace->vm_map, obj, 0, &gpa, len, 0,
	+ VMFS_NO_SPACE, VM_PROT_RW, VM_PROT_RW, 0);
	+ if (error != KERN_SUCCESS) {
	+ vm_object_deallocate(obj);
	+ obj = NULL;
	+ }
	+ }
	+
	+ /*
	+ * Drop the reference on the sglist.
	+ *
	+ * If the scatter/gather object was successfully allocated then it
	+ * has incremented the reference count on the sglist. Dropping the
	+ * initial reference count ensures that the sglist will be freed
	+ * when the object is deallocated.
	+ *
	+ * If the object could not be allocated then we end up freeing the
	+ * sglist.
	+ */
	+ sglist_free(sg);
	+
	+ return (obj);
	+}
	+
	+void
	+vmm_mmio_free(struct vmspace *vmspace, vm_paddr_t gpa, size_t len)
	+{
	+
	+ vm_map_remove(&vmspace->vm_map, gpa, gpa + len);
	+}
	+
	+vm_paddr_t
	+vmm_mem_maxaddr(void)
	+{
	+
	+ return (ptoa(Maxmem));
	+}
	diff --git a/sys/arm64/vmm/vmm_mmu.c b/sys/arm64/vmm/vmm_mmu.c
	new file mode 100644
	--- /dev/null
	+++ b/sys/arm64/vmm/vmm_mmu.c
	@@ -0,0 +1,432 @@
	+/*
	+ * Copyright (C) 2017 Alexandru Elisei <alexandru.elisei@gmail.com>
	+ * All rights reserved.
	+ *
	+ * This software was developed by Alexandru Elisei under sponsorship
	+ * from the FreeBSD Foundation.
	+ *
	+ * Redistribution and use in source and binary forms, with or without
	+ * modification, are permitted provided that the following conditions
	+ * are met:
	+ * 1. Redistributions of source code must retain the above copyright
	+ * notice, this list of conditions and the following disclaimer.
	+ * 2. Redistributions in binary form must reproduce the above copyright
	+ * notice, this list of conditions and the following disclaimer in the
	+ * documentation and/or other materials provided with the distribution.
	+ *
	+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
	+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
	+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	+ * SUCH DAMAGE.
	+ */
	+
	+#include <sys/cdefs.h>
	+
	+#include <sys/types.h>
	+#include <sys/malloc.h>
	+#include <sys/lock.h>
	+#include <sys/mutex.h>
	+#include <vm/vm.h>
	+#include <vm/pmap.h>
	+#include <vm/vm_page.h>
	+#include <vm/vm_param.h>
	+#include <vm/vm_phys.h>
	+
	+#include <machine/atomic.h>
	+#include <machine/machdep.h>
	+#include <machine/vm.h>
	+#include <machine/vmm.h>
	+#include <machine/vmparam.h>
	+
	+#include "mmu.h"
	+#include "arm64.h"
	+
	+MALLOC_DECLARE(M_HYP);
	+
	+static struct mtx vmmpmap_mtx;
	+static pt_entry_t *l0;
	+static vm_paddr_t l0_paddr;
	+
	+bool
	+vmmpmap_init(void)
	+{
	+ vm_page_t m;
	+
	+ m = vm_page_alloc_noobj(VM_ALLOC_WIRED \| VM_ALLOC_ZERO);
	+ if (m == NULL)
	+ return (false);
	+
	+ l0_paddr = VM_PAGE_TO_PHYS(m);
	+ l0 = (pd_entry_t *)PHYS_TO_DMAP(l0_paddr);
	+ memset(l0, 0, PAGE_SIZE);
	+
	+ mtx_init(&vmmpmap_mtx, "vmm pmap", NULL, MTX_DEF);
	+
	+ return (true);
	+}
	+
	+static void
	+vmmpmap_release_l3(pd_entry_t l2e)
	+{
	+ pt_entry_t *l3 __diagused;
	+ vm_page_t m;
	+ int i;
	+
	+ l3 = (pd_entry_t *)PHYS_TO_DMAP(l2e & ~ATTR_MASK);
	+ for (i = 0; i < Ln_ENTRIES; i++) {
	+ KASSERT(l3[i] == 0, ("%s: l3 still mapped: %p %lx", __func__,
	+ &l3[i], l3[i]));
	+ }
	+
	+ m = PHYS_TO_VM_PAGE(l2e & ~ATTR_MASK);
	+ vm_page_unwire_noq(m);
	+ vm_page_free(m);
	+}
	+
	+static void
	+vmmpmap_release_l2(pd_entry_t l1e)
	+{
	+ pt_entry_t *l2;
	+ vm_page_t m;
	+ int i;
	+
	+ l2 = (pd_entry_t *)PHYS_TO_DMAP(l1e & ~ATTR_MASK);
	+ for (i = 0; i < Ln_ENTRIES; i++) {
	+ if (l2[i] != 0) {
	+ vmmpmap_release_l3(l2[i]);
	+ }
	+ }
	+
	+ m = PHYS_TO_VM_PAGE(l1e & ~ATTR_MASK);
	+ vm_page_unwire_noq(m);
	+ vm_page_free(m);
	+}
	+
	+static void
	+vmmpmap_release_l1(pd_entry_t l0e)
	+{
	+ pt_entry_t *l1;
	+ vm_page_t m;
	+ int i;
	+
	+ l1 = (pd_entry_t *)PHYS_TO_DMAP(l0e & ~ATTR_MASK);
	+ for (i = 0; i < Ln_ENTRIES; i++) {
	+ if (l1[i] != 0) {
	+ vmmpmap_release_l2(l1[i]);
	+ }
	+ }
	+
	+ m = PHYS_TO_VM_PAGE(l0e & ~ATTR_MASK);
	+ vm_page_unwire_noq(m);
	+ vm_page_free(m);
	+}
	+
	+void
	+vmmpmap_fini(void)
	+{
	+ vm_page_t m;
	+ int i;
	+
	+ /* Remove the remaining entries */
	+ for (i = 0; i < L0_ENTRIES; i++) {
	+ if (l0[i] != 0) {
	+ vmmpmap_release_l1(l0[i]);
	+ }
	+ }
	+
	+ m = PHYS_TO_VM_PAGE(l0_paddr);
	+ vm_page_unwire_noq(m);
	+ vm_page_free(m);
	+
	+ mtx_destroy(&vmmpmap_mtx);
	+}
	+
	+uint64_t
	+vmmpmap_to_ttbr0(void)
	+{
	+
	+ return (l0_paddr);
	+}
	+
	+/* Returns a pointer to the level 1 table, allocating if needed. */
	+static pt_entry_t *
	+vmmpmap_l1_table(vm_offset_t va)
	+{
	+ pt_entry_t new_l0e, l0e, *l1;
	+ vm_page_t m;
	+ int rv;
	+
	+ m = NULL;
	+again:
	+ l0e = atomic_load_64(&l0[pmap_l0_index(va)]);
	+ if ((l0e & ATTR_DESCR_VALID) == 0) {
	+ /* Allocate a page for the level 1 table */
	+ if (m == NULL) {
	+ m = vm_page_alloc_noobj(VM_ALLOC_WIRED \| VM_ALLOC_ZERO);
	+ if (m == NULL)
	+ return (NULL);
	+ }
	+
	+ new_l0e = VM_PAGE_TO_PHYS(m) \| L0_TABLE;
	+
	+ mtx_lock(&vmmpmap_mtx);
	+ rv = atomic_cmpset_64(&l0[pmap_l0_index(va)], l0e, new_l0e);
	+ mtx_unlock(&vmmpmap_mtx);
	+ /* We may have raced another thread, try again */
	+ if (rv == 0)
	+ goto again;
	+
	+ /* The cmpset succeeded */
	+ l0e = new_l0e;
	+ } else if (m != NULL) {
	+ /* We allocated a page that wasn't used */
	+ vm_page_unwire_noq(m);
	+ vm_page_free_zero(m);
	+ }
	+
	+ l1 = (pd_entry_t *)PHYS_TO_DMAP(l0e & ~ATTR_MASK);
	+ return (l1);
	+}
	+
	+static pt_entry_t *
	+vmmpmap_l2_table(vm_offset_t va)
	+{
	+ pt_entry_t new_l1e, l1e, l1, l2;
	+ vm_page_t m;
	+ int rv;
	+
	+ l1 = vmmpmap_l1_table(va);
	+ if (l1 == NULL)
	+ return (NULL);
	+
	+ m = NULL;
	+again:
	+ l1e = atomic_load_64(&l1[pmap_l1_index(va)]);
	+ if ((l1e & ATTR_DESCR_VALID) == 0) {
	+ /* Allocate a page for the level 2 table */
	+ if (m == NULL) {
	+ m = vm_page_alloc_noobj(VM_ALLOC_WIRED \| VM_ALLOC_ZERO);
	+ if (m == NULL)
	+ return (NULL);
	+ }
	+
	+ new_l1e = VM_PAGE_TO_PHYS(m) \| L1_TABLE;
	+
	+ mtx_lock(&vmmpmap_mtx);
	+ rv = atomic_cmpset_64(&l1[pmap_l1_index(va)], l1e, new_l1e);
	+ mtx_unlock(&vmmpmap_mtx);
	+ /* We may have raced another thread, try again */
	+ if (rv == 0)
	+ goto again;
	+
	+ /* The cmpset succeeded */
	+ l1e = new_l1e;
	+ } else if (m != NULL) {
	+ /* We allocated a page that wasn't used */
	+ vm_page_unwire_noq(m);
	+ vm_page_free_zero(m);
	+ }
	+
	+ l2 = (pd_entry_t *)PHYS_TO_DMAP(l1e & ~ATTR_MASK);
	+ return (l2);
	+}
	+
	+static pd_entry_t *
	+vmmpmap_l3_table(vm_offset_t va)
	+{
	+ pt_entry_t new_l2e, l2e, l2, l3;
	+ vm_page_t m;
	+ int rv;
	+
	+ l2 = vmmpmap_l2_table(va);
	+ if (l2 == NULL)
	+ return (NULL);
	+
	+ m = NULL;
	+again:
	+ l2e = atomic_load_64(&l2[pmap_l2_index(va)]);
	+ if ((l2e & ATTR_DESCR_VALID) == 0) {
	+ /* Allocate a page for the level 3 table */
	+ if (m == NULL) {
	+ m = vm_page_alloc_noobj(VM_ALLOC_WIRED \| VM_ALLOC_ZERO);
	+ if (m == NULL)
	+ return (NULL);
	+ }
	+
	+ new_l2e = VM_PAGE_TO_PHYS(m) \| L2_TABLE;
	+
	+ mtx_lock(&vmmpmap_mtx);
	+ rv = atomic_cmpset_64(&l2[pmap_l2_index(va)], l2e, new_l2e);
	+ mtx_unlock(&vmmpmap_mtx);
	+ /* We may have raced another thread, try again */
	+ if (rv == 0)
	+ goto again;
	+
	+ /* The cmpset succeeded */
	+ l2e = new_l2e;
	+ } else if (m != NULL) {
	+ /* We allocated a page that wasn't used */
	+ vm_page_unwire_noq(m);
	+ vm_page_free_zero(m);
	+ }
	+
	+ l3 = (pt_entry_t *)PHYS_TO_DMAP(l2e & ~ATTR_MASK);
	+ return (l3);
	+}
	+
	+/*
	+ * Creates an EL2 entry in the hyp_pmap. Similar to pmap_kenter.
	+ */
	+bool
	+vmmpmap_enter(vm_offset_t va, vm_size_t size, vm_paddr_t pa, vm_prot_t prot)
	+{
	+ pd_entry_t l3e, *l3;
	+
	+ KASSERT((pa & L3_OFFSET) == 0,
	+ ("%s: Invalid physical address", __func__));
	+ KASSERT((va & L3_OFFSET) == 0,
	+ ("%s: Invalid virtual address", __func__));
	+ KASSERT((size & PAGE_MASK) == 0,
	+ ("%s: Mapping is not page-sized", __func__));
	+
	+ l3e = ATTR_DEFAULT \| L3_PAGE;
	+ /* This bit is res1 at EL2 */
	+ l3e \|= ATTR_S1_AP(ATTR_S1_AP_USER);
	+ /* Only normal memory is used at EL2 */
	+ l3e \|= ATTR_S1_IDX(VM_MEMATTR_DEFAULT);
	+
	+ if ((prot & VM_PROT_EXECUTE) == 0) {
	+ /* PXN is res0 at EL2. UXN is XN */
	+ l3e \|= ATTR_S1_UXN;
	+ }
	+ if ((prot & VM_PROT_WRITE) == 0) {
	+ l3e \|= ATTR_S1_AP(ATTR_S1_AP_RO);
	+ }
	+
	+ while (size > 0) {
	+ l3 = vmmpmap_l3_table(va);
	+ if (l3 == NULL)
	+ return (false);
	+
	+#ifdef INVARIANTS
	+ /*
	+ * Ensure no other threads can write to l3 between the KASSERT
	+ * and store.
	+ */
	+ mtx_lock(&vmmpmap_mtx);
	+#endif
	+ KASSERT(atomic_load_64(&l3[pmap_l3_index(va)]) == 0,
	+ ("%s: VA already mapped", __func__));
	+
	+ atomic_store_64(&l3[pmap_l3_index(va)], l3e \| pa);
	+#ifdef INVARIANTS
	+ mtx_unlock(&vmmpmap_mtx);
	+#endif
	+
	+ size -= PAGE_SIZE;
	+ pa += PAGE_SIZE;
	+ va += PAGE_SIZE;
	+ }
	+
	+ return (true);
	+}
	+
	+void
	+vmmpmap_remove(vm_offset_t va, vm_size_t size, bool invalidate)
	+{
	+ pt_entry_t l0e, l1, l1e, l2, l2e;
	+ pd_entry_t l3, l3e, *l3_list;
	+ vm_offset_t eva, va_next, sva;
	+ size_t i;
	+
	+ KASSERT((va & L3_OFFSET) == 0,
	+ ("%s: Invalid virtual address", __func__));
	+ KASSERT((size & PAGE_MASK) == 0,
	+ ("%s: Mapping is not page-sized", __func__));
	+
	+ if (invalidate) {
	+ l3_list = malloc((size / PAGE_SIZE) * sizeof(l3_list[0]),
	+ M_TEMP, M_WAITOK \| M_ZERO);
	+ }
	+
	+ sva = va;
	+ eva = va + size;
	+ mtx_lock(&vmmpmap_mtx);
	+ for (i = 0; va < eva; va = va_next) {
	+ l0e = atomic_load_64(&l0[pmap_l0_index(va)]);
	+ if (l0e == 0) {
	+ va_next = (va + L0_SIZE) & ~L0_OFFSET;
	+ if (va_next < va)
	+ va_next = eva;
	+ continue;
	+ }
	+ MPASS((l0e & ATTR_DESCR_MASK) == L0_TABLE);
	+
	+ l1 = (pd_entry_t *)PHYS_TO_DMAP(l0e & ~ATTR_MASK);
	+ l1e = atomic_load_64(&l1[pmap_l1_index(va)]);
	+ if (l1e == 0) {
	+ va_next = (va + L1_SIZE) & ~L1_OFFSET;
	+ if (va_next < va)
	+ va_next = eva;
	+ continue;
	+ }
	+ MPASS((l1e & ATTR_DESCR_MASK) == L1_TABLE);
	+
	+ l2 = (pd_entry_t *)PHYS_TO_DMAP(l1e & ~ATTR_MASK);
	+ l2e = atomic_load_64(&l2[pmap_l2_index(va)]);
	+ if (l2e == 0) {
	+ va_next = (va + L2_SIZE) & ~L2_OFFSET;
	+ if (va_next < va)
	+ va_next = eva;
	+ continue;
	+ }
	+ MPASS((l2e & ATTR_DESCR_MASK) == L2_TABLE);
	+
	+ l3 = (pd_entry_t *)PHYS_TO_DMAP(l2e & ~ATTR_MASK);
	+ if (invalidate) {
	+ l3e = atomic_load_64(&l3[pmap_l3_index(va)]);
	+ MPASS(l3e != 0);
	+ /*
	+ * Mark memory as read-only so we can invalidate
	+ * the cache.
	+ */
	+ l3e &= ~ATTR_S1_AP_MASK;
	+ l3e \|= ATTR_S1_AP(ATTR_S1_AP_RO);
	+ atomic_store_64(&l3[pmap_l3_index(va)], l3e);
	+
	+ l3_list[i] = l3;
	+ i++;
	+ } else {
	+ /*
	+ * The caller is responsible for clearing the cache &
	+ * handling the TLB
	+ */
	+ atomic_store_64(&l3[pmap_l3_index(va)], 0);
	+ }
	+
	+ va_next = (va + L3_SIZE) & ~L3_OFFSET;
	+ if (va_next < va)
	+ va_next = eva;
	+ }
	+ mtx_unlock(&vmmpmap_mtx);
	+
	+ if (invalidate) {
	+ /* Invalidate the memory from the D-cache */
	+ vmm_call_hyp(HYP_DC_CIVAC, sva, size);
	+
	+ for (i = 0; i < (size / PAGE_SIZE); i++) {
	+ atomic_store_64(l3_list[i], 0);
	+ }
	+
	+ vmm_call_hyp(HYP_EL2_TLBI, HYP_EL2_TLBI_VA, sva, size);
	+
	+ free(l3_list, M_TEMP);
	+ }
	+}
	diff --git a/sys/arm64/vmm/vmm_psci.c b/sys/arm64/vmm/vmm_psci.c
	new file mode 100644
	--- /dev/null
	+++ b/sys/arm64/vmm/vmm_psci.c
	@@ -0,0 +1,106 @@
	+/*
	+ * Copyright (C) 2018 Alexandru Elisei <alexandru.elisei@gmail.com>
	+ * All rights reserved.
	+ *
	+ * Redistribution and use in source and binary forms, with or without
	+ * modification, are permitted provided that the following conditions
	+ * are met:
	+ * 1. Redistributions of source code must retain the above copyright
	+ * notice, this list of conditions and the following disclaimer.
	+ * 2. Redistributions in binary form must reproduce the above copyright
	+ * notice, this list of conditions and the following disclaimer in the
	+ * documentation and/or other materials provided with the distribution.
	+ *
	+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
	+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
	+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	+ * SUCH DAMAGE.
	+ */
	+
	+#include <sys/cdefs.h>
	+#include <sys/param.h>
	+#include <sys/systm.h>
	+#include <sys/kernel.h>
	+
	+#include <dev/psci/psci.h>
	+
	+#include "arm64.h"
	+#include "psci.h"
	+
	+#define PSCI_VERSION_0_2 0x2
	+
	+static int
	+psci_version(struct hypctx hypctx, bool retu)
	+{
	+
	+ hypctx->tf.tf_x[0] = PSCI_VERSION_0_2;
	+
	+ *retu = false;
	+ return (0);
	+}
	+
	+static int
	+psci_system_off(struct vm *vm)
	+{
	+ return (vm_suspend(vm, VM_SUSPEND_POWEROFF));
	+}
	+
	+static int
	+psci_system_reset(struct vm *vm)
	+{
	+ return (vm_suspend(vm, VM_SUSPEND_RESET));
	+}
	+
	+int
	+psci_handle_call(struct vm vm, int vcpuid, struct vm_exit vme, bool *retu)
	+{
	+ struct hyp *hyp;
	+ struct hypctx *hypctx;
	+ uint64_t func_id;
	+ uint32_t esr_el2, esr_iss;
	+ int error, i;
	+
	+ hyp = vm_get_cookie(vm);
	+ hypctx = &hyp->ctx[vcpuid];
	+
	+ esr_el2 = hypctx->tf.tf_esr;
	+ esr_iss = esr_el2 & ESR_ELx_ISS_MASK;
	+
	+ if (esr_iss != 0) {
	+ eprintf("Malformed HVC instruction with immediate: 0x%x\n",
	+ esr_iss);
	+ error = 1;
	+ goto out;
	+ }
	+
	+ func_id = hypctx->tf.tf_x[0];
	+ switch (func_id) {
	+ case PSCI_FNID_VERSION:
	+ error = psci_version(hypctx, retu);
	+ break;
	+ case PSCI_FNID_SYSTEM_OFF:
	+ error = psci_system_off(vm);
	+ break;
	+ case PSCI_FNID_SYSTEM_RESET:
	+ error = psci_system_reset(vm);
	+ break;
	+ default:
	+ vme->exitcode = VM_EXITCODE_SMCCC;
	+ vme->u.smccc_call.func_id = func_id;
	+ for (i = 0; i < nitems(vme->u.smccc_call.args); i++)
	+ vme->u.smccc_call.args[i] = hypctx->tf.tf_x[i + 1];
	+ *retu = true;
	+ error = 0;
	+ break;
	+ }
	+
	+out:
	+ return (error);
	+}
	diff --git a/sys/arm64/vmm/vmm_reset.c b/sys/arm64/vmm/vmm_reset.c
	new file mode 100644
	--- /dev/null
	+++ b/sys/arm64/vmm/vmm_reset.c
	@@ -0,0 +1,167 @@
	+/*
	+ * Copyright (C) 2018 Alexandru Elisei <alexandru.elisei@gmail.com>
	+ * All rights reserved.
	+ *
	+ * Redistribution and use in source and binary forms, with or without
	+ * modification, are permitted provided that the following conditions
	+ * are met:
	+ * 1. Redistributions of source code must retain the above copyright
	+ * notice, this list of conditions and the following disclaimer.
	+ * 2. Redistributions in binary form must reproduce the above copyright
	+ * notice, this list of conditions and the following disclaimer in the
	+ * documentation and/or other materials provided with the distribution.
	+ *
	+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
	+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
	+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	+ * SUCH DAMAGE.
	+ */
	+
	+#include <sys/param.h>
	+#include <sys/kernel.h>
	+#include <sys/lock.h>
	+
	+#include <machine/armreg.h>
	+#include <machine/cpu.h>
	+#include <machine/hypervisor.h>
	+
	+#include "arm64.h"
	+#include "reset.h"
	+
	+/*
	+ * Make the architecturally UNKNOWN value 0. As a bonus, we don't have to
	+ * manually set all those RES0 fields.
	+ */
	+#define ARCH_UNKNOWN 0
	+#define set_arch_unknown(reg) (memset(&(reg), ARCH_UNKNOWN, sizeof(reg)))
	+
	+void
	+reset_vm_el01_regs(void *vcpu)
	+{
	+ struct hypctx *el2ctx;
	+
	+ el2ctx = vcpu;
	+
	+ set_arch_unknown(el2ctx->tf);
	+
	+ set_arch_unknown(el2ctx->actlr_el1);
	+ set_arch_unknown(el2ctx->afsr0_el1);
	+ set_arch_unknown(el2ctx->afsr1_el1);
	+ set_arch_unknown(el2ctx->amair_el1);
	+ set_arch_unknown(el2ctx->contextidr_el1);
	+ set_arch_unknown(el2ctx->cpacr_el1);
	+ set_arch_unknown(el2ctx->csselr_el1);
	+ set_arch_unknown(el2ctx->elr_el1);
	+ set_arch_unknown(el2ctx->esr_el1);
	+ set_arch_unknown(el2ctx->far_el1);
	+ set_arch_unknown(el2ctx->mair_el1);
	+ set_arch_unknown(el2ctx->mdccint_el1);
	+ set_arch_unknown(el2ctx->mdscr_el1);
	+ set_arch_unknown(el2ctx->par_el1);
	+
	+ /*
	+ * Guest starts with:
	+ * ~SCTLR_M: MMU off
	+ * ~SCTLR_C: data cache off
	+ * SCTLR_CP15BEN: memory barrier instruction enable from EL0; RAO/WI
	+ * ~SCTLR_I: instruction cache off
	+ */
	+ el2ctx->sctlr_el1 = SCTLR_RES1;
	+ el2ctx->sctlr_el1 &= ~SCTLR_M & ~SCTLR_C & ~SCTLR_I;
	+ el2ctx->sctlr_el1 \|= SCTLR_CP15BEN;
	+
	+ set_arch_unknown(el2ctx->sp_el0);
	+ set_arch_unknown(el2ctx->tcr_el1);
	+ set_arch_unknown(el2ctx->tpidr_el0);
	+ set_arch_unknown(el2ctx->tpidr_el1);
	+ set_arch_unknown(el2ctx->tpidrro_el0);
	+ set_arch_unknown(el2ctx->ttbr0_el1);
	+ set_arch_unknown(el2ctx->ttbr1_el1);
	+ set_arch_unknown(el2ctx->vbar_el1);
	+ set_arch_unknown(el2ctx->spsr_el1);
	+
	+ set_arch_unknown(el2ctx->dbgbcr_el1);
	+ set_arch_unknown(el2ctx->dbgbvr_el1);
	+ set_arch_unknown(el2ctx->dbgwcr_el1);
	+ set_arch_unknown(el2ctx->dbgwvr_el1);
	+
	+ el2ctx->pmcr_el0 = READ_SPECIALREG(pmcr_el0) & PMCR_N_MASK;
	+ /* PMCR_LC is unknown when AArch32 is supported or RES1 otherwise */
	+ el2ctx->pmcr_el0 \|= PMCR_LC;
	+ set_arch_unknown(el2ctx->pmccntr_el0);
	+ set_arch_unknown(el2ctx->pmccfiltr_el0);
	+ set_arch_unknown(el2ctx->pmcntenset_el0);
	+ set_arch_unknown(el2ctx->pmintenset_el1);
	+ set_arch_unknown(el2ctx->pmovsset_el0);
	+ set_arch_unknown(el2ctx->pmuserenr_el0);
	+ memset(el2ctx->pmevcntr_el0, 0, sizeof(el2ctx->pmevcntr_el0));
	+ memset(el2ctx->pmevtyper_el0, 0, sizeof(el2ctx->pmevtyper_el0));
	+}
	+
	+void
	+reset_vm_el2_regs(void *vcpu)
	+{
	+ struct hypctx *el2ctx;
	+ uint64_t cpu_aff;
	+
	+ el2ctx = vcpu;
	+
	+ /*
	+ * Set the Hypervisor Configuration Register:
	+ *
	+ * HCR_RW: use AArch64 for EL1
	+ * HCR_TID3: handle ID registers in the vmm to privide a common
	+ * set of featers on all vcpus
	+ * HCR_TWI: Trap WFI to the hypervisor
	+ * HCR_BSU_IS: barrier instructions apply to the inner shareable
	+ * domain
	+ * HCR_FB: broadcast maintenance operations
	+ * HCR_AMO: route physical SError interrupts to EL2
	+ * HCR_IMO: route physical IRQ interrupts to EL2
	+ * HCR_FMO: route physical FIQ interrupts to EL2
	+ * HCR_SWIO: turn set/way invalidate into set/way clean and
	+ * invalidate
	+ * HCR_VM: use stage 2 translation
	+ */
	+ el2ctx->hcr_el2 = HCR_RW \| HCR_TID3 \| HCR_TWI \| HCR_BSU_IS \| HCR_FB \|
	+ HCR_AMO \| HCR_IMO \| HCR_FMO \| HCR_SWIO \| HCR_VM;
	+
	+ /* TODO: Trap all extensions we don't support */
	+ el2ctx->mdcr_el2 = 0;
	+ /* PMCR_EL0.N is read from MDCR_EL2.HPMN */
	+ el2ctx->mdcr_el2 \|= (el2ctx->pmcr_el0 & PMCR_N_MASK) >> PMCR_N_SHIFT;
	+
	+ el2ctx->vmpidr_el2 = VMPIDR_EL2_RES1;
	+ /* The guest will detect a multi-core, single-threaded CPU */
	+ el2ctx->vmpidr_el2 &= ~VMPIDR_EL2_U & ~VMPIDR_EL2_MT;
	+ /* Only 24 bits of affinity, for a grand total of 16,777,216 cores. */
	+ cpu_aff = el2ctx->vcpu & (CPU_AFF0_MASK \| CPU_AFF1_MASK \| CPU_AFF2_MASK);
	+ el2ctx->vmpidr_el2 \|= cpu_aff;
	+
	+ /* Use the same CPU identification information as the host */
	+ el2ctx->vpidr_el2 = CPU_IMPL_TO_MIDR(CPU_IMPL_ARM);
	+ el2ctx->vpidr_el2 \|= CPU_VAR_TO_MIDR(0);
	+ el2ctx->vpidr_el2 \|= CPU_ARCH_TO_MIDR(0xf);
	+ el2ctx->vpidr_el2 \|= CPU_PART_TO_MIDR(CPU_PART_FOUNDATION);
	+ el2ctx->vpidr_el2 \|= CPU_REV_TO_MIDR(0);
	+
	+ /*
	+ * Don't trap accesses to CPACR_EL1, trace, SVE, Advanced SIMD
	+ * and floating point functionality to EL2.
	+ */
	+ el2ctx->cptr_el2 = CPTR_RES1;
	+ /*
	+ * Disable interrupts in the guest. The guest OS will re-enable
	+ * them.
	+ */
	+ el2ctx->tf.tf_spsr = PSR_D \| PSR_A \| PSR_I \| PSR_F;
	+ /* Use the EL1 stack when taking exceptions to EL1 */
	+ el2ctx->tf.tf_spsr \|= PSR_M_EL1h;
	+}
	diff --git a/sys/arm64/vmm/vmm_stat.h b/sys/arm64/vmm/vmm_stat.h
	new file mode 100644
	--- /dev/null
	+++ b/sys/arm64/vmm/vmm_stat.h
	@@ -0,0 +1,153 @@
	+/*
	+ * Copyright (C) 2015 Mihai Carabas <mihai.carabas@gmail.com>
	+ * All rights reserved.
	+ *
	+ * Redistribution and use in source and binary forms, with or without
	+ * modification, are permitted provided that the following conditions
	+ * are met:
	+ * 1. Redistributions of source code must retain the above copyright
	+ * notice, this list of conditions and the following disclaimer.
	+ * 2. Redistributions in binary form must reproduce the above copyright
	+ * notice, this list of conditions and the following disclaimer in the
	+ * documentation and/or other materials provided with the distribution.
	+ *
	+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
	+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
	+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	+ * SUCH DAMAGE.
	+ */
	+
	+#ifndef _VMM_STAT_H_
	+#define _VMM_STAT_H_
	+
	+struct vm;
	+
	+#define MAX_VMM_STAT_ELEMS 64 /* arbitrary */
	+
	+enum vmm_stat_scope {
	+ VMM_STAT_SCOPE_ANY,
	+ VMM_STAT_SCOPE_INTEL, /* Intel VMX specific statistic */
	+ VMM_STAT_SCOPE_AMD, /* AMD SVM specific statistic */
	+};
	+
	+struct vmm_stat_type;
	+typedef void (vmm_stat_func_t)(struct vm vm, int vcpu,
	+ struct vmm_stat_type *stat);
	+
	+struct vmm_stat_type {
	+ int index; /* position in the stats buffer */
	+ int nelems; /* standalone or array */
	+ const char desc; / description of statistic */
	+ vmm_stat_func_t func;
	+ enum vmm_stat_scope scope;
	+};
	+
	+void vmm_stat_register(void *arg);
	+
	+#define VMM_STAT_FDEFINE(type, nelems, desc, func, scope) \
	+ struct vmm_stat_type type[1] = { \
	+ { -1, nelems, desc, func, scope } \
	+ }; \
	+ SYSINIT(type##_stat, SI_SUB_KLD, SI_ORDER_ANY, vmm_stat_register, type)
	+
	+#define VMM_STAT_DEFINE(type, nelems, desc, scope) \
	+ VMM_STAT_FDEFINE(type, nelems, desc, NULL, scope)
	+
	+#define VMM_STAT_DECLARE(type) \
	+ extern struct vmm_stat_type type[1]
	+
	+#define VMM_STAT(type, desc) \
	+ VMM_STAT_DEFINE(type, 1, desc, VMM_STAT_SCOPE_ANY)
	+#define VMM_STAT_INTEL(type, desc) \
	+ VMM_STAT_DEFINE(type, 1, desc, VMM_STAT_SCOPE_INTEL)
	+#define VMM_STAT_AMD(type, desc) \
	+ VMM_STAT_DEFINE(type, 1, desc, VMM_STAT_SCOPE_AMD)
	+
	+#define VMM_STAT_FUNC(type, desc, func) \
	+ VMM_STAT_FDEFINE(type, 1, desc, func, VMM_STAT_SCOPE_ANY)
	+
	+#define VMM_STAT_ARRAY(type, nelems, desc) \
	+ VMM_STAT_DEFINE(type, nelems, desc, VMM_STAT_SCOPE_ANY)
	+
	+void *vmm_stat_alloc(void);
	+void vmm_stat_init(void *vp);
	+void vmm_stat_free(void *vp);
	+
	+int vmm_stat_copy(struct vm *vm, int vcpu, int index, int count,
	+ int num_stats, uint64_t buf);
	+int vmm_stat_desc_copy(int index, char *buf, int buflen);
	+
	+static void __inline
	+vmm_stat_array_incr(struct vm vm, int vcpu, struct vmm_stat_type vst,
	+ int statidx, uint64_t x)
	+{
	+#ifdef VMM_KEEP_STATS
	+ uint64_t *stats;
	+
	+ stats = vcpu_stats(vm, vcpu);
	+
	+ if (vst->index >= 0 && statidx < vst->nelems)
	+ stats[vst->index + statidx] += x;
	+#endif
	+}
	+
	+static void __inline
	+vmm_stat_array_set(struct vm vm, int vcpu, struct vmm_stat_type vst,
	+ int statidx, uint64_t val)
	+{
	+#ifdef VMM_KEEP_STATS
	+ uint64_t *stats;
	+
	+ stats = vcpu_stats(vm, vcpu);
	+
	+ if (vst->index >= 0 && statidx < vst->nelems)
	+ stats[vst->index + statidx] = val;
	+#endif
	+}
	+
	+static void __inline
	+vmm_stat_incr(struct vm vm, int vcpu, struct vmm_stat_type vst, uint64_t x)
	+{
	+
	+#ifdef VMM_KEEP_STATS
	+ vmm_stat_array_incr(vm, vcpu, vst, 0, x);
	+#endif
	+}
	+
	+static void __inline
	+vmm_stat_set(struct vm vm, int vcpu, struct vmm_stat_type vst, uint64_t val)
	+{
	+
	+#ifdef VMM_KEEP_STATS
	+ vmm_stat_array_set(vm, vcpu, vst, 0, val);
	+#endif
	+}
	+
	+VMM_STAT_DECLARE(VCPU_MIGRATIONS);
	+VMM_STAT_DECLARE(VMEXIT_COUNT);
	+VMM_STAT_DECLARE(VMEXIT_EXTINT);
	+VMM_STAT_DECLARE(VMEXIT_HLT);
	+VMM_STAT_DECLARE(VMEXIT_CR_ACCESS);
	+VMM_STAT_DECLARE(VMEXIT_RDMSR);
	+VMM_STAT_DECLARE(VMEXIT_WRMSR);
	+VMM_STAT_DECLARE(VMEXIT_MTRAP);
	+VMM_STAT_DECLARE(VMEXIT_PAUSE);
	+VMM_STAT_DECLARE(VMEXIT_INTR_WINDOW);
	+VMM_STAT_DECLARE(VMEXIT_NMI_WINDOW);
	+VMM_STAT_DECLARE(VMEXIT_INOUT);
	+VMM_STAT_DECLARE(VMEXIT_CPUID);
	+VMM_STAT_DECLARE(VMEXIT_NESTED_FAULT);
	+VMM_STAT_DECLARE(VMEXIT_INST_EMUL);
	+VMM_STAT_DECLARE(VMEXIT_UNKNOWN);
	+VMM_STAT_DECLARE(VMEXIT_ASTPENDING);
	+VMM_STAT_DECLARE(VMEXIT_USERSPACE);
	+VMM_STAT_DECLARE(VMEXIT_RENDEZVOUS);
	+VMM_STAT_DECLARE(VMEXIT_EXCEPTION);
	+#endif
	diff --git a/sys/arm64/vmm/vmm_stat.c b/sys/arm64/vmm/vmm_stat.c
	new file mode 100644
	--- /dev/null
	+++ b/sys/arm64/vmm/vmm_stat.c
	@@ -0,0 +1,173 @@
	+/*
	+ * Copyright (C) 2015 Mihai Carabas <mihai.carabas@gmail.com>
	+ * All rights reserved.
	+ *
	+ * Redistribution and use in source and binary forms, with or without
	+ * modification, are permitted provided that the following conditions
	+ * are met:
	+ * 1. Redistributions of source code must retain the above copyright
	+ * notice, this list of conditions and the following disclaimer.
	+ * 2. Redistributions in binary form must reproduce the above copyright
	+ * notice, this list of conditions and the following disclaimer in the
	+ * documentation and/or other materials provided with the distribution.
	+ *
	+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
	+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
	+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	+ * SUCH DAMAGE.
	+ */
	+
	+#include <sys/cdefs.h>
	+
	+#include <sys/param.h>
	+#include <sys/kernel.h>
	+#include <sys/systm.h>
	+#include <sys/malloc.h>
	+
	+#include <machine/machdep.h>
	+#include <machine/vmm.h>
	+#include "vmm_stat.h"
	+
	+/*
	+ * 'vst_num_elems' is the total number of addressable statistic elements
	+ * 'vst_num_types' is the number of unique statistic types
	+ *
	+ * It is always true that 'vst_num_elems' is greater than or equal to
	+ * 'vst_num_types'. This is because a stat type may represent more than
	+ * one element (for e.g. VMM_STAT_ARRAY).
	+ */
	+static int vst_num_elems, vst_num_types;
	+static struct vmm_stat_type *vsttab[MAX_VMM_STAT_ELEMS];
	+
	+static MALLOC_DEFINE(M_VMM_STAT, "vmm stat", "vmm stat");
	+
	+#define vst_size ((size_t)vst_num_elems * sizeof(uint64_t))
	+
	+void
	+vmm_stat_register(void *arg)
	+{
	+ struct vmm_stat_type *vst = arg;
	+
	+ /* We require all stats to identify themselves with a description */
	+ if (vst->desc == NULL)
	+ return;
	+
	+ if (vst_num_elems + vst->nelems >= MAX_VMM_STAT_ELEMS) {
	+ printf("Cannot accommodate vmm stat type \"%s\"!\n", vst->desc);
	+ return;
	+ }
	+
	+ vst->index = vst_num_elems;
	+ vst_num_elems += vst->nelems;
	+
	+ vsttab[vst_num_types++] = vst;
	+}
	+
	+int
	+vmm_stat_copy(struct vm vm, int vcpu, int index, int count, int num_stats,
	+ uint64_t *buf)
	+{
	+ struct vmm_stat_type *vst;
	+ uint64_t *stats;
	+ int i, tocopy;
	+
	+ if (vcpu < 0 \|\| vcpu >= vm_get_maxcpus(vm))
	+ return (EINVAL);
	+
	+ if (index < 0 \|\| count < 0)
	+ return (EINVAL);
	+
	+ if (index > vst_num_elems)
	+ return (ENOENT);
	+
	+ if (index == vst_num_elems) {
	+ *num_stats = 0;
	+ return (0);
	+ }
	+
	+ tocopy = min(vst_num_elems - index, count);
	+
	+ /* Let stats functions update their counters */
	+ for (i = 0; i < vst_num_types; i++) {
	+ vst = vsttab[i];
	+ if (vst->func != NULL)
	+ (*vst->func)(vm, vcpu, vst);
	+ }
	+
	+ /* Copy over the stats */
	+ stats = vcpu_stats(vm, vcpu);
	+ memcpy(buf, stats + index, tocopy * sizeof(stats[0]));
	+ *num_stats = tocopy;
	+ return (0);
	+}
	+
	+void *
	+vmm_stat_alloc(void)
	+{
	+
	+ return (malloc(vst_size, M_VMM_STAT, M_WAITOK));
	+}
	+
	+void
	+vmm_stat_init(void *vp)
	+{
	+
	+ bzero(vp, vst_size);
	+}
	+
	+void
	+vmm_stat_free(void *vp)
	+{
	+ free(vp, M_VMM_STAT);
	+}
	+
	+int
	+vmm_stat_desc_copy(int index, char *buf, int bufsize)
	+{
	+ int i;
	+ struct vmm_stat_type *vst;
	+
	+ for (i = 0; i < vst_num_types; i++) {
	+ vst = vsttab[i];
	+ if (index >= vst->index && index < vst->index + vst->nelems) {
	+ if (vst->nelems > 1) {
	+ snprintf(buf, bufsize, "%s[%d]",
	+ vst->desc, index - vst->index);
	+ } else {
	+ strlcpy(buf, vst->desc, bufsize);
	+ }
	+ return (0); /* found it */
	+ }
	+ }
	+
	+ return (EINVAL);
	+}
	+
	+/* global statistics */
	+VMM_STAT(VCPU_MIGRATIONS, "vcpu migration across host cpus");
	+VMM_STAT(VMEXIT_COUNT, "total number of vm exits");
	+VMM_STAT(VMEXIT_EXTINT, "vm exits due to external interrupt");
	+VMM_STAT(VMEXIT_HLT, "number of times hlt was intercepted");
	+VMM_STAT(VMEXIT_CR_ACCESS, "number of times %cr access was intercepted");
	+VMM_STAT(VMEXIT_RDMSR, "number of times rdmsr was intercepted");
	+VMM_STAT(VMEXIT_WRMSR, "number of times wrmsr was intercepted");
	+VMM_STAT(VMEXIT_MTRAP, "number of monitor trap exits");
	+VMM_STAT(VMEXIT_PAUSE, "number of times pause was intercepted");
	+VMM_STAT(VMEXIT_INTR_WINDOW, "vm exits due to interrupt window opening");
	+VMM_STAT(VMEXIT_NMI_WINDOW, "vm exits due to nmi window opening");
	+VMM_STAT(VMEXIT_INOUT, "number of times in/out was intercepted");
	+VMM_STAT(VMEXIT_CPUID, "number of times cpuid was intercepted");
	+VMM_STAT(VMEXIT_NESTED_FAULT, "vm exits due to nested page fault");
	+VMM_STAT(VMEXIT_INST_EMUL, "vm exits for instruction emulation");
	+VMM_STAT(VMEXIT_UNKNOWN, "number of vm exits for unknown reason");
	+VMM_STAT(VMEXIT_ASTPENDING, "number of times astpending at exit");
	+VMM_STAT(VMEXIT_USERSPACE, "number of vm exits handled in userspace");
	+VMM_STAT(VMEXIT_RENDEZVOUS, "number of times rendezvous pending at exit");
	+VMM_STAT(VMEXIT_EXCEPTION, "number of vm exits due to exceptions");
	diff --git a/sys/conf/files.arm64 b/sys/conf/files.arm64
	--- a/sys/conf/files.arm64
	+++ b/sys/conf/files.arm64
	@@ -113,6 +113,39 @@
	dev/iommu/busdma_iommu.c optional iommu
	dev/iommu/iommu_gas.c optional iommu

	+arm64/vmm/vmm.c optional vmm
	+arm64/vmm/vmm_dev.c optional vmm
	+arm64/vmm/vmm_instruction_emul.c optional vmm
	+arm64/vmm/vmm_mem.c optional vmm
	+arm64/vmm/vmm_stat.c optional vmm
	+arm64/vmm/vmm_arm64.c optional vmm
	+arm64/vmm/vmm_psci.c optional vmm
	+arm64/vmm/vmm_reset.c optional vmm
	+arm64/vmm/vmm_call.S optional vmm
	+arm64/vmm/vmm_hyp_exception.S optional vmm \
	+ compile-with "${NORMAL_C} -fpie" \
	+ no-obj
	+arm64/vmm/vmm_hyp.c optional vmm \
	+ compile-with "${NORMAL_C} -fpie" \
	+ no-obj
	+vmm_hyp_blob.elf.full optional vmm \
	+ dependency "vmm_hyp.o vmm_hyp_exception.o" \
	+ compile-with "${CC} -o ${.TARGET} ${.ALLSRC} -fPIE -nostdlib -T ${LDSCRIPT} -Wl,--defsym=text_start='0x0'" \
	+ no-obj no-implicit-rule
	+vmm_hyp_blob.elf optional vmm \
	+ dependency "vmm_hyp_blob.elf.full" \
	+ compile-with "${OBJCOPY} --strip-debug ${.ALLSRC} ${.TARGET}" \
	+ no-obj no-implicit-rule
	+vmm_hyp_blob.bin optional vmm \
	+ dependency vmm_hyp_blob.elf \
	+ compile-with "${OBJCOPY} --output-target=binary ${.ALLSRC} ${.TARGET}" \
	+ no-obj no-implicit-rule
	+arm64/vmm/vmm_hyp_el2.S optional vmm \
	+ dependency vmm_hyp_blob.bin
	+arm64/vmm/vmm_mmu.c optional vmm
	+arm64/vmm/io/vgic_v3.c optional vmm
	+arm64/vmm/io/vtimer.c optional vmm
	+
	crypto/armv8/armv8_crypto.c optional armv8crypto
	armv8_crypto_wrap.o optional armv8crypto \
	dependency "$S/crypto/armv8/armv8_crypto_wrap.c" \
	diff --git a/sys/conf/ldscript.arm64 b/sys/conf/ldscript.arm64
	--- a/sys/conf/ldscript.arm64
	+++ b/sys/conf/ldscript.arm64
	@@ -7,6 +7,7 @@
	{
	/* Read-only sections, merged into text segment: */
	. = text_start; /* This is set using --defsym= on the command line. */
	+ .vmm_vectors : { (*.vmm_vectors); }
	.text :
	{
	*(.text)
	@@ -17,6 +18,7 @@
	} =0x9090
	_etext = .;
	PROVIDE (etext = .);
	+
	.fini : { *(.fini) } =0x9090
	.rodata : { (.rodata) (.gnu.linkonce.r) }
	.rodata1 : { *(.rodata1) }
	diff --git a/sys/conf/options.arm64 b/sys/conf/options.arm64
	--- a/sys/conf/options.arm64
	+++ b/sys/conf/options.arm64
	@@ -18,6 +18,9 @@
	# EFI Runtime services support
	EFIRT opt_efirt.h

	+# Bhyve
	+VMM opt_global.h
	+
	# SoC Support
	SOC_ALLWINNER_A64 opt_soc.h
	SOC_ALLWINNER_H5 opt_soc.h
	diff --git a/sys/modules/Makefile b/sys/modules/Makefile
	--- a/sys/modules/Makefile
	+++ b/sys/modules/Makefile
	@@ -798,7 +798,9 @@
	_sgx_linux= sgx_linux
	_smartpqi= smartpqi
	_p2sb= p2sb
	+.endif

	+.if ${MACHINE_CPUARCH} == "aarch64" \|\| ${MACHINE_CPUARCH} == "amd64"
	.if ${MK_BHYVE} != "no" \|\| defined(ALL_MODULES)
	.if ${KERN_OPTS:MSMP}
	_vmm= vmm
	diff --git a/sys/modules/vmm/Makefile b/sys/modules/vmm/Makefile
	--- a/sys/modules/vmm/Makefile
	+++ b/sys/modules/vmm/Makefile
	@@ -4,31 +4,68 @@

	KMOD= vmm

	-SRCS= opt_acpi.h opt_bhyve_snapshot.h opt_ddb.h
	-SRCS+= device_if.h bus_if.h pci_if.h pcib_if.h acpi_if.h vnode_if.h
	-DPSRCS+= vmx_assym.h svm_assym.h
	-DPSRCS+= vmx_genassym.c svm_genassym.c offset.inc
	+SRCS= opt_acpi.h opt_ddb.h device_if.h bus_if.h pci_if.h pcib_if.h acpi_if.h

	CFLAGS+= -DVMM_KEEP_STATS
	-CFLAGS+= -I${SRCTOP}/sys/amd64/vmm
	-CFLAGS+= -I${SRCTOP}/sys/amd64/vmm/io
	-CFLAGS+= -I${SRCTOP}/sys/amd64/vmm/intel
	-CFLAGS+= -I${SRCTOP}/sys/amd64/vmm/amd
	+CFLAGS+= -I${SRCTOP}/sys/${MACHINE}/vmm
	+CFLAGS+= -I${SRCTOP}/sys/${MACHINE}/vmm/io

	# generic vmm support
	-.PATH: ${SRCTOP}/sys/amd64/vmm
	+.PATH: ${SRCTOP}/sys/${MACHINE}/vmm
	SRCS+= vmm.c \
	vmm_dev.c \
	- vmm_host.c \
	vmm_instruction_emul.c \
	+ vmm_mem.c \
	+ vmm_stat.c
	+
	+.if ${MACHINE_CPUARCH} == "aarch64"
	+# TODO: Add the new EL2 code
	+SRCS+= vmm_arm64.c \
	+ vmm_psci.c \
	+ vmm_reset.c \
	+ vmm_call.S \
	+ vmm_mmu.c \
	+ vmm_hyp_el2.S
	+
	+.PATH: ${SRCTOP}/sys/${MACHINE}/vmm/io
	+SRCS+= vgic_v3.c \
	+ vtimer.c
	+
	+CLEANFILES+= vmm_hyp_exception.o vmm_hyp.o vmm_hyp_blob.elf.full
	+CLEANFILES+= vmm_hyp_blob.elf vmm_hyp_blob.bin
	+
	+CFLAGS.vmm_hyp_exception.S += -fpie
	+CFLAGS.vmm_hyp.c += -fpie
	+vmm_hyp_exception.o: vmm_hyp_exception.S
	+vmm_hyp.o: vmm_hyp.c
	+
	+vmm_hyp_blob.elf.full: vmm_hyp_exception.o vmm_hyp.o
	+ ${CC} -o ${.TARGET} ${.ALLSRC} -fPIE -nostdlib \
	+ -T ${SYSDIR}/conf/ldscript.arm64 \
	+ -Wl,--defsym=text_start='0x0'
	+
	+vmm_hyp_blob.elf: vmm_hyp_blob.elf.full
	+ ${OBJCOPY} --strip-debug ${.ALLSRC} ${.TARGET}
	+
	+vmm_hyp_blob.bin: vmm_hyp_blob.elf
	+ ${OBJCOPY} --output-target=binary ${.ALLSRC} ${.TARGET}
	+
	+vmm_hyp_el2.o: vmm_hyp_blob.bin
	+
	+.elif ${MACHINE_CPUARCH} == "amd64"
	+DPSRCS+= vmx_assym.h svm_assym.h
	+DPSRCS+= vmx_genassym.c svm_genassym.c offset.inc
	+
	+CFLAGS+= -I${SRCTOP}/sys/amd64/vmm/intel
	+CFLAGS+= -I${SRCTOP}/sys/amd64/vmm/amd
	+
	+SRCS+= vmm_host.c \
	vmm_ioport.c \
	vmm_lapic.c \
	- vmm_mem.c \
	- vmm_stat.c \
	vmm_util.c \
	x86.c

	-.PATH: ${SRCTOP}/sys/amd64/vmm/io
	+.PATH: ${SRCTOP}/sys/${MACHINE}/vmm/io
	SRCS+= iommu.c \
	ppt.c \
	vatpic.c \
	@@ -65,10 +102,11 @@
	SRCS+= vmm_snapshot.c
	.endif

	-CLEANFILES= vmx_assym.h vmx_genassym.o svm_assym.h svm_genassym.o
	+CLEANFILES+= vmx_assym.h vmx_genassym.o svm_assym.h svm_genassym.o

	OBJS_DEPEND_GUESS.vmx_support.o+= vmx_assym.h
	OBJS_DEPEND_GUESS.svm_support.o+= svm_assym.h
	+.endif

	vmx_assym.h: vmx_genassym.o
	sh ${SYSDIR}/kern/genassym.sh vmx_genassym.o > ${.TARGET}
	@@ -84,6 +122,9 @@
	${CC} -c -x assembler-with-cpp -DLOCORE ${CFLAGS} \
	${.IMPSRC} -o ${.TARGET}

	+hyp_genassym.o: offset.inc
	+ ${CC} -c ${CFLAGS:N-flto:N-fno-common} -fcommon ${.IMPSRC}
	+
	vmx_genassym.o: offset.inc
	${CC} -c ${CFLAGS:N-flto:N-fno-common} -fcommon ${.IMPSRC}

File Metadata

Mime Type: text/plain
Expires: Tue, Mar 10, 6:10 PM (16 h, 44 m)
Storage Engine: blob
Storage Format: Raw Data
Storage Handle: 29504651
Default Alt Text: D37428.id113252.diff (293 KB)

D37428.id113252.diffNo OneTemporaryActions

D37428.id113252.diffView Options

File Metadata

Event Timeline

D37428.id113252.diff
No OneTemporary
Actions

D37428.id113252.diff
View Options