Page MenuHomeFreeBSD

D37428.id113252.diff
No OneTemporary

D37428.id113252.diff

This file is larger than 256 KB, so syntax highlighting was skipped.
diff --git a/sys/arm64/arm64/genassym.c b/sys/arm64/arm64/genassym.c
--- a/sys/arm64/arm64/genassym.c
+++ b/sys/arm64/arm64/genassym.c
@@ -73,6 +73,7 @@
ASSYM(TF_SIZE, sizeof(struct trapframe));
ASSYM(TF_SP, offsetof(struct trapframe, tf_sp));
+ASSYM(TF_LR, offsetof(struct trapframe, tf_lr));
ASSYM(TF_ELR, offsetof(struct trapframe, tf_elr));
ASSYM(TF_SPSR, offsetof(struct trapframe, tf_spsr));
ASSYM(TF_X, offsetof(struct trapframe, tf_x));
diff --git a/sys/arm64/arm64/identcpu.c b/sys/arm64/arm64/identcpu.c
--- a/sys/arm64/arm64/identcpu.c
+++ b/sys/arm64/arm64/identcpu.c
@@ -104,8 +104,6 @@
SYSCTL_STRING(_hw, HW_MODEL, model, CTLFLAG_RD,
cpu_model, sizeof(cpu_model), "Machine model");
-#define MAX_CACHES 8 /* Maximum number of caches supported
- architecturally. */
/*
* Per-CPU affinity as provided in MPIDR_EL1
* Indexed by CPU number in logical order selected by the system.
@@ -119,32 +117,6 @@
uint64_t __cpu_affinity[MAXCPU];
static u_int cpu_aff_levels;
-struct cpu_desc {
- uint64_t mpidr;
- uint64_t id_aa64afr0;
- uint64_t id_aa64afr1;
- uint64_t id_aa64dfr0;
- uint64_t id_aa64dfr1;
- uint64_t id_aa64isar0;
- uint64_t id_aa64isar1;
- uint64_t id_aa64isar2;
- uint64_t id_aa64mmfr0;
- uint64_t id_aa64mmfr1;
- uint64_t id_aa64mmfr2;
- uint64_t id_aa64pfr0;
- uint64_t id_aa64pfr1;
- uint64_t id_aa64zfr0;
- uint64_t ctr;
-#ifdef COMPAT_FREEBSD32
- uint64_t id_isar5;
- uint64_t mvfr0;
- uint64_t mvfr1;
-#endif
- uint64_t clidr;
- uint32_t ccsidr[MAX_CACHES][2]; /* 2 possible types. */
- bool have_sve;
-};
-
static struct cpu_desc cpu_desc[MAXCPU];
static struct cpu_desc kern_cpu_desc;
static struct cpu_desc user_cpu_desc;
@@ -1824,6 +1796,27 @@
}
}
+void
+update_cpu_desc(struct cpu_desc *desc)
+{
+ struct mrs_field *fields;
+ uint64_t desc_val, kern_val;
+ int i, j;
+
+ for (i = 0; i < nitems(user_regs); i++) {
+ kern_val = CPU_DESC_FIELD(kern_cpu_desc, i);
+ desc_val = CPU_DESC_FIELD(*desc, i);
+
+ fields = user_regs[i].fields;
+ for (j = 0; fields[j].type != 0; j++) {
+ desc_val = update_lower_register(desc_val, kern_val,
+ fields[j].shift, 4, fields[j].sign);
+ }
+
+ CPU_DESC_FIELD(*desc, i) = desc_val;
+ }
+}
+
/* HWCAP */
bool __read_frequently lse_supported = false;
diff --git a/sys/arm64/include/armreg.h b/sys/arm64/include/armreg.h
--- a/sys/arm64/include/armreg.h
+++ b/sys/arm64/include/armreg.h
@@ -503,6 +503,14 @@
#define ID_AA64DFR0_TraceFilt_NONE (UL(0x0) << ID_AA64DFR0_TraceFilt_SHIFT)
#define ID_AA64DFR0_TraceFilt_8_4 (UL(0x1) << ID_AA64DFR0_TraceFilt_SHIFT)
+/* ID_AA64DFR1_EL1 */
+#define ID_AA64DFR1_EL1 MRS_REG(ID_AA64DFR0_EL1)
+#define ID_AA64DFR1_EL1_op0 3
+#define ID_AA64DFR1_EL1_op1 0
+#define ID_AA64DFR1_EL1_CRn 0
+#define ID_AA64DFR1_EL1_CRm 5
+#define ID_AA64DFR1_EL1_op2 1
+
/* ID_AA64ISAR0_EL1 */
#define ID_AA64ISAR0_EL1 MRS_REG(ID_AA64ISAR0_EL1)
#define ID_AA64ISAR0_EL1_op0 0x3
diff --git a/sys/arm64/include/cpu.h b/sys/arm64/include/cpu.h
--- a/sys/arm64/include/cpu.h
+++ b/sys/arm64/include/cpu.h
@@ -178,6 +178,36 @@
#define CPU_MATCH_ERRATA_CAVIUM_THUNDERX_1_1 0
#endif
+#define MAX_CACHES 8 /* Maximum number of caches supported
+ architecturally. */
+
+struct cpu_desc {
+ uint64_t mpidr;
+ uint64_t id_aa64afr0;
+ uint64_t id_aa64afr1;
+ uint64_t id_aa64dfr0;
+ uint64_t id_aa64dfr1;
+ uint64_t id_aa64isar0;
+ uint64_t id_aa64isar1;
+ uint64_t id_aa64isar2;
+ uint64_t id_aa64mmfr0;
+ uint64_t id_aa64mmfr1;
+ uint64_t id_aa64mmfr2;
+ uint64_t id_aa64pfr0;
+ uint64_t id_aa64pfr1;
+ uint64_t id_aa64zfr0;
+ uint64_t ctr;
+#ifdef COMPAT_FREEBSD32
+ uint64_t id_isar5;
+ uint64_t mvfr0;
+ uint64_t mvfr1;
+#endif
+ uint64_t clidr;
+ uint32_t ccsidr[MAX_CACHES][2]; /* 2 possible types. */
+ bool have_sve;
+};
+
+
extern char btext[];
extern char etext[];
@@ -217,6 +247,7 @@
/* Functions to read the sanitised view of the special registers */
void update_special_regs(u_int);
+void update_cpu_desc(struct cpu_desc *desc);
bool extract_user_id_field(u_int, u_int, uint8_t *);
bool get_kernel_reg(u_int, uint64_t *);
diff --git a/sys/arm64/include/pcpu.h b/sys/arm64/include/pcpu.h
--- a/sys/arm64/include/pcpu.h
+++ b/sys/arm64/include/pcpu.h
@@ -47,6 +47,7 @@
pcpu_ssbd pc_ssbd; \
struct pmap *pc_curpmap; \
struct pmap *pc_curvmpmap; \
+ void *pc_vcpu; \
u_int pc_bcast_tlbi_workaround; \
/* Store as two u_int values to preserve KBI */ \
u_int pc_mpidr_low; /* lower MPIDR 32 bits */ \
diff --git a/sys/arm64/include/vmm.h b/sys/arm64/include/vmm.h
new file mode 100644
--- /dev/null
+++ b/sys/arm64/include/vmm.h
@@ -0,0 +1,443 @@
+/*
+ * Copyright (C) 2015 Mihai Carabas <mihai.carabas@gmail.com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef _VMM_H_
+#define _VMM_H_
+
+#include <sys/param.h>
+#include <vm/vm.h>
+#include <vm/pmap.h>
+
+#include "pte.h"
+#include "pmap.h"
+
+enum vm_suspend_how {
+ VM_SUSPEND_NONE,
+ VM_SUSPEND_RESET,
+ VM_SUSPEND_POWEROFF,
+ VM_SUSPEND_HALT,
+ VM_SUSPEND_TRIPLEFAULT,
+ VM_SUSPEND_LAST
+};
+
+/*
+ * Identifiers for architecturally defined registers.
+ */
+enum vm_reg_name {
+ VM_REG_GUEST_X0,
+ VM_REG_GUEST_X1,
+ VM_REG_GUEST_X2,
+ VM_REG_GUEST_X3,
+ VM_REG_GUEST_X4,
+ VM_REG_GUEST_X5,
+ VM_REG_GUEST_X6,
+ VM_REG_GUEST_X7,
+ VM_REG_GUEST_X8,
+ VM_REG_GUEST_X9,
+ VM_REG_GUEST_X10,
+ VM_REG_GUEST_X11,
+ VM_REG_GUEST_X12,
+ VM_REG_GUEST_X13,
+ VM_REG_GUEST_X14,
+ VM_REG_GUEST_X15,
+ VM_REG_GUEST_X16,
+ VM_REG_GUEST_X17,
+ VM_REG_GUEST_X18,
+ VM_REG_GUEST_X19,
+ VM_REG_GUEST_X20,
+ VM_REG_GUEST_X21,
+ VM_REG_GUEST_X22,
+ VM_REG_GUEST_X23,
+ VM_REG_GUEST_X24,
+ VM_REG_GUEST_X25,
+ VM_REG_GUEST_X26,
+ VM_REG_GUEST_X27,
+ VM_REG_GUEST_X28,
+ VM_REG_GUEST_X29,
+ VM_REG_GUEST_LR,
+ VM_REG_GUEST_SP,
+ VM_REG_GUEST_ELR,
+ VM_REG_GUEST_SPSR,
+ VM_REG_ELR_EL2,
+ VM_REG_LAST
+};
+
+#define VM_INTINFO_VECTOR(info) ((info) & 0xff)
+#define VM_INTINFO_DEL_ERRCODE 0x800
+#define VM_INTINFO_RSVD 0x7ffff000
+#define VM_INTINFO_VALID 0x80000000
+#define VM_INTINFO_TYPE 0x700
+#define VM_INTINFO_HWINTR (0 << 8)
+#define VM_INTINFO_NMI (2 << 8)
+#define VM_INTINFO_HWEXCEPTION (3 << 8)
+#define VM_INTINFO_SWINTR (4 << 8)
+
+#define VM_MAX_SUFFIXLEN 15
+
+#define VM_GUEST_BASE_IPA 0x80000000UL /* Guest kernel start ipa */
+
+#ifdef _KERNEL
+
+#define VM_MAX_NAMELEN 32
+
+struct vm;
+struct vm_exception;
+struct vm_exit;
+struct vm_run;
+struct vm_object;
+struct pmap;
+
+struct vm_eventinfo {
+ void *rptr; /* rendezvous cookie */
+ int *sptr; /* suspend cookie */
+ int *iptr; /* reqidle cookie */
+};
+
+typedef int (*vmm_init_func_t)(int ipinum);
+typedef int (*vmm_cleanup_func_t)(void);
+typedef void (*vmm_resume_func_t)(void);
+typedef void * (*vmi_init_func_t)(struct vm *vm, struct pmap *pmap);
+typedef int (*vmi_run_func_t)(void *vmi, int vcpu, register_t rip,
+ struct pmap *pmap, struct vm_eventinfo *evinfo);
+typedef void (*vmi_cleanup_func_t)(void *vmi);
+typedef void (*vmi_mmap_set_func_t)(void *arg, vm_offset_t va,
+ vm_offset_t pa, size_t len,
+ vm_prot_t prot);
+typedef vm_paddr_t (*vmi_mmap_get_func_t)(void *arg, vm_offset_t va);
+typedef int (*vmi_get_register_t)(void *vmi, int vcpu, int num,
+ uint64_t *retval);
+typedef int (*vmi_set_register_t)(void *vmi, int vcpu, int num,
+ uint64_t val);
+typedef int (*vmi_get_cap_t)(void *vmi, int vcpu, int num, int *retval);
+typedef int (*vmi_set_cap_t)(void *vmi, int vcpu, int num, int val);
+typedef struct vmspace * (*vmi_vmspace_alloc)(vm_offset_t min, vm_offset_t max);
+typedef void (*vmi_vmspace_free)(struct vmspace *vmspace);
+typedef struct vlapic * (*vmi_vlapic_init)(void *vmi, int vcpu);
+typedef void (*vmi_vlapic_cleanup)(void *vmi, struct vlapic *vlapic);
+
+struct vmm_ops {
+ /* Module-wide functions */
+ vmm_init_func_t init;
+ vmm_cleanup_func_t cleanup;
+ vmm_resume_func_t resume;
+ /* VM specific functions */
+ vmi_init_func_t vminit;
+ vmi_run_func_t vmrun;
+ vmi_cleanup_func_t vmcleanup;
+ vmi_get_register_t vmgetreg;
+ vmi_set_register_t vmsetreg;
+ vmi_get_cap_t vmgetcap;
+ vmi_set_cap_t vmsetcap;
+ vmi_vmspace_alloc vmspace_alloc;
+ vmi_vmspace_free vmspace_free;
+};
+
+extern struct vmm_ops vmm_ops_arm;
+
+int vm_create(const char *name, struct vm **retvm);
+void vm_destroy(struct vm *vm);
+int vm_reinit(struct vm *vm);
+const char *vm_name(struct vm *vm);
+
+/*
+ * APIs that modify the guest memory map require all vcpus to be frozen.
+ */
+int vm_mmap_memseg(struct vm *vm, vm_paddr_t gpa, int segid, vm_ooffset_t off,
+ size_t len, int prot, int flags);
+int vm_alloc_memseg(struct vm *vm, int ident, size_t len, bool sysmem);
+void vm_free_memseg(struct vm *vm, int ident);
+int vm_map_mmio(struct vm *vm, vm_paddr_t gpa, size_t len, vm_paddr_t hpa);
+int vm_unmap_mmio(struct vm *vm, vm_paddr_t gpa, size_t len);
+int vmm_map_gpa(struct vm *vm, vm_offset_t va, vm_paddr_t gpa, int pages,
+ vm_page_t *ma);
+void vmm_unmap_gpa(struct vm *vm, vm_offset_t va, size_t pages, vm_page_t *ma);
+
+/*
+ * APIs that inspect the guest memory map require only a *single* vcpu to
+ * be frozen. This acts like a read lock on the guest memory map since any
+ * modification requires *all* vcpus to be frozen.
+ */
+int vm_mmap_getnext(struct vm *vm, vm_paddr_t *gpa, int *segid,
+ vm_ooffset_t *segoff, size_t *len, int *prot, int *flags);
+int vm_get_memseg(struct vm *vm, int ident, size_t *len, bool *sysmem,
+ struct vm_object **objptr);
+vm_paddr_t vmm_sysmem_maxaddr(struct vm *vm);
+void *vm_gpa_hold(struct vm *, int vcpuid, vm_paddr_t gpa, size_t len,
+ int prot, void **cookie);
+void vm_gpa_release(void *cookie);
+bool vm_mem_allocated(struct vm *vm, int vcpuid, vm_paddr_t gpa);
+
+uint16_t vm_get_maxcpus(struct vm *vm);
+void vm_get_topology(struct vm *vm, uint16_t *sockets, uint16_t *cores,
+ uint16_t *threads, uint16_t *maxcpus);
+int vm_set_topology(struct vm *vm, uint16_t sockets, uint16_t cores,
+ uint16_t threads, uint16_t maxcpus);
+int vm_get_register(struct vm *vm, int vcpu, int reg, uint64_t *retval);
+int vm_set_register(struct vm *vm, int vcpu, int reg, uint64_t val);
+int vm_run(struct vm *vm, struct vm_run *vmrun);
+int vm_suspend(struct vm *vm, enum vm_suspend_how how);
+void* vm_get_cookie(struct vm *vm);
+int vm_get_capability(struct vm *vm, int vcpu, int type, int *val);
+int vm_set_capability(struct vm *vm, int vcpu, int type, int val);
+int vm_activate_cpu(struct vm *vm, int vcpu);
+int vm_suspend_cpu(struct vm *vm, int vcpu);
+int vm_resume_cpu(struct vm *vm, int vcpu);
+int vm_attach_vgic(struct vm *vm, uint64_t dist_start, size_t dist_size,
+ uint64_t redist_start, size_t redist_size);
+int vm_assert_irq(struct vm *vm, uint32_t irq);
+int vm_deassert_irq(struct vm *vm, uint32_t irq);
+int vm_raise_msi(struct vm *vm, uint64_t msg, uint64_t addr, int bus, int slot,
+ int func);
+struct vm_exit *vm_exitinfo(struct vm *vm, int vcpuid);
+void vm_exit_suspended(struct vm *vm, int vcpuid, uint64_t rip);
+void vm_exit_rendezvous(struct vm *vm, int vcpuid, uint64_t rip);
+void vm_exit_astpending(struct vm *vm, int vcpuid, uint64_t rip);
+
+#ifdef _SYS__CPUSET_H_
+/*
+ * Rendezvous all vcpus specified in 'dest' and execute 'func(arg)'.
+ * The rendezvous 'func(arg)' is not allowed to do anything that will
+ * cause the thread to be put to sleep.
+ *
+ * If the rendezvous is being initiated from a vcpu context then the
+ * 'vcpuid' must refer to that vcpu, otherwise it should be set to -1.
+ *
+ * The caller cannot hold any locks when initiating the rendezvous.
+ *
+ * The implementation of this API may cause vcpus other than those specified
+ * by 'dest' to be stalled. The caller should not rely on any vcpus making
+ * forward progress when the rendezvous is in progress.
+ */
+typedef void (*vm_rendezvous_func_t)(struct vm *vm, int vcpuid, void *arg);
+void vm_smp_rendezvous(struct vm *vm, int vcpuid, cpuset_t dest,
+ vm_rendezvous_func_t func, void *arg);
+cpuset_t vm_active_cpus(struct vm *vm);
+cpuset_t vm_debug_cpus(struct vm *vm);
+cpuset_t vm_suspended_cpus(struct vm *vm);
+#endif /* _SYS__CPUSET_H_ */
+
+static __inline bool
+virt_enabled()
+{
+
+ return (has_hyp());
+}
+
+static __inline int
+vcpu_rendezvous_pending(struct vm_eventinfo *info)
+{
+
+ return (*((uintptr_t *)(info->rptr)) != 0);
+}
+
+static __inline int
+vcpu_suspended(struct vm_eventinfo *info)
+{
+
+ return (*info->sptr);
+}
+
+enum vcpu_state {
+ VCPU_IDLE,
+ VCPU_FROZEN,
+ VCPU_RUNNING,
+ VCPU_SLEEPING,
+};
+
+int vcpu_set_state(struct vm *vm, int vcpu, enum vcpu_state state,
+ bool from_idle);
+enum vcpu_state vcpu_get_state(struct vm *vm, int vcpu, int *hostcpu);
+
+static int __inline
+vcpu_is_running(struct vm *vm, int vcpu, int *hostcpu)
+{
+ return (vcpu_get_state(vm, vcpu, hostcpu) == VCPU_RUNNING);
+}
+
+#ifdef _SYS_PROC_H_
+static int __inline
+vcpu_should_yield(struct vm *vm, int vcpu)
+{
+ struct thread *td;
+
+ td = curthread;
+ return (td->td_ast != 0 || td->td_owepreempt != 0);
+}
+#endif
+
+void *vcpu_stats(struct vm *vm, int vcpu);
+void vcpu_notify_event(struct vm *vm, int vcpuid, bool lapic_intr);
+
+/*
+ * This function is called after a VM-exit that occurred during exception or
+ * interrupt delivery through the IDT. The format of 'intinfo' is described
+ * in Figure 15-1, "EXITINTINFO for All Intercepts", APM, Vol 2.
+ *
+ * If a VM-exit handler completes the event delivery successfully then it
+ * should call vm_exit_intinfo() to extinguish the pending event. For e.g.,
+ * if the task switch emulation is triggered via a task gate then it should
+ * call this function with 'intinfo=0' to indicate that the external event
+ * is not pending anymore.
+ *
+ * Return value is 0 on success and non-zero on failure.
+ */
+int vm_exit_intinfo(struct vm *vm, int vcpuid, uint64_t intinfo);
+
+/*
+ * This function is called before every VM-entry to retrieve a pending
+ * event that should be injected into the guest. This function combines
+ * nested events into a double or triple fault.
+ *
+ * Returns 0 if there are no events that need to be injected into the guest
+ * and non-zero otherwise.
+ */
+int vm_entry_intinfo(struct vm *vm, int vcpuid, uint64_t *info);
+
+int vm_get_intinfo(struct vm *vm, int vcpuid, uint64_t *info1, uint64_t *info2);
+
+enum vm_reg_name vm_segment_name(int seg_encoding);
+
+struct vm_copyinfo {
+ uint64_t gpa;
+ size_t len;
+ void *hva;
+ void *cookie;
+};
+
+int vcpu_trace_exceptions(struct vm *vm, int vcpuid);
+
+#endif /* _KERNEL */
+
+#define VM_MAXCPU 4
+
+#define VM_DIR_READ 0
+#define VM_DIR_WRITE 1
+
+#define VM_GP_M_MASK 0x1f
+#define VM_GP_MMU_ENABLED (1 << 5)
+
+struct vm_guest_paging {
+ uint64_t far;
+ uint64_t ttbr0_el1;
+ uint64_t ttbr1_el1;
+ int flags;
+ int padding;
+};
+
+struct vie {
+ uint8_t access_size:4, sign_extend:1, dir:1, unused:2;
+ enum vm_reg_name reg;
+};
+
+struct vre {
+ uint32_t inst_syndrome;
+ uint8_t dir:1, unused:7;
+ enum vm_reg_name reg;
+};
+
+/*
+ * Identifiers for optional vmm capabilities
+ */
+enum vm_cap_type {
+ VM_CAP_HALT_EXIT,
+ VM_CAP_MTRAP_EXIT,
+ VM_CAP_PAUSE_EXIT,
+ VM_CAP_UNRESTRICTED_GUEST,
+ VM_CAP_MAX
+};
+
+enum vm_exitcode {
+ VM_EXITCODE_BOGUS,
+ VM_EXITCODE_INST_EMUL,
+ VM_EXITCODE_REG_EMUL,
+ VM_EXITCODE_HVC,
+ VM_EXITCODE_SUSPENDED,
+ VM_EXITCODE_HYP,
+ VM_EXITCODE_WFI,
+ VM_EXITCODE_PAGING,
+ VM_EXITCODE_SMCCC,
+ VM_EXITCODE_MAX
+};
+
+enum task_switch_reason {
+ TSR_CALL,
+ TSR_IRET,
+ TSR_JMP,
+ TSR_IDT_GATE, /* task gate in IDT */
+};
+
+struct vm_task_switch {
+ uint16_t tsssel; /* new TSS selector */
+ int ext; /* task switch due to external event */
+ uint32_t errcode;
+ int errcode_valid; /* push 'errcode' on the new stack */
+ enum task_switch_reason reason;
+};
+
+struct vm_exit {
+ enum vm_exitcode exitcode;
+ int inst_length;
+ uint64_t pc;
+ union {
+ /*
+ * ARM specific payload.
+ */
+ struct {
+ uint32_t exception_nr;
+ uint32_t esr_el2; /* Exception Syndrome Register */
+ uint64_t far_el2; /* Fault Address Register */
+ uint64_t hpfar_el2; /* Hypervisor IPA Fault Address Register */
+ } hyp;
+ struct {
+ struct vre vre;
+ } reg_emul;
+ struct {
+ uint64_t gpa;
+ uint64_t esr;
+ } paging;
+ struct {
+ uint64_t gpa;
+ struct vm_guest_paging paging;
+ struct vie vie;
+ } inst_emul;
+
+ /*
+ * A SMCCC call, e.g. starting a core via PSCI.
+ * Further arguments can be read by asking the kernel for
+ * all register values.
+ */
+ struct {
+ uint64_t func_id;
+ uint64_t args[3];
+ } smccc_call;
+
+ struct {
+ enum vm_suspend_how how;
+ } suspended;
+ } u;
+};
+
+#endif /* _VMM_H_ */
diff --git a/sys/arm64/include/vmm_dev.h b/sys/arm64/include/vmm_dev.h
new file mode 100644
--- /dev/null
+++ b/sys/arm64/include/vmm_dev.h
@@ -0,0 +1,249 @@
+/*
+ * Copyright (C) 2015 Mihai Carabas <mihai.carabas@gmail.com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef _VMM_DEV_H_
+#define _VMM_DEV_H_
+
+#ifdef _KERNEL
+void vmmdev_init(void);
+int vmmdev_cleanup(void);
+#endif
+
+struct vm_memmap {
+ vm_paddr_t gpa;
+ int segid; /* memory segment */
+ vm_ooffset_t segoff; /* offset into memory segment */
+ size_t len; /* mmap length */
+ int prot; /* RWX */
+ int flags;
+};
+#define VM_MEMMAP_F_WIRED 0x01
+
+#define VM_MEMSEG_NAME(m) ((m)->name[0] != '\0' ? (m)->name : NULL)
+struct vm_memseg {
+ int segid;
+ size_t len;
+ char name[VM_MAX_SUFFIXLEN + 1];
+};
+
+struct vm_register {
+ int cpuid;
+ int regnum; /* enum vm_reg_name */
+ uint64_t regval;
+};
+
+struct vm_register_set {
+ int cpuid;
+ unsigned int count;
+ const int *regnums; /* enum vm_reg_name */
+ uint64_t *regvals;
+};
+
+struct vm_run {
+ int cpuid;
+ uint64_t pc;
+ struct vm_exit vm_exit;
+
+};
+
+struct vm_exception {
+ int cpuid;
+ int vector;
+ uint32_t error_code;
+ int error_code_valid;
+ int restart_instruction;
+};
+
+struct vm_msi {
+ uint64_t msg;
+ uint64_t addr;
+ int bus;
+ int slot;
+ int func;
+};
+
+struct vm_capability {
+ int cpuid;
+ enum vm_cap_type captype;
+ int capval;
+ int allcpus;
+};
+
+#define MAX_VM_STATS 64
+struct vm_stats {
+ int cpuid; /* in */
+ int index; /* in */
+ int num_entries; /* out */
+ struct timeval tv;
+ uint64_t statbuf[MAX_VM_STATS];
+};
+struct vm_stat_desc {
+ int index; /* in */
+ char desc[128]; /* out */
+};
+
+struct vm_suspend {
+ enum vm_suspend_how how;
+};
+
+struct vm_gla2gpa {
+ int vcpuid; /* inputs */
+ int prot; /* PROT_READ or PROT_WRITE */
+ uint64_t gla;
+ int fault; /* outputs */
+ uint64_t gpa;
+};
+
+struct vm_activate_cpu {
+ int vcpuid;
+};
+
+struct vm_cpuset {
+ int which;
+ int cpusetsize;
+ cpuset_t *cpus;
+};
+#define VM_ACTIVE_CPUS 0
+#define VM_SUSPENDED_CPUS 1
+#define VM_DEBUG_CPUS 2
+
+struct vm_attach_vgic {
+ uint64_t dist_start;
+ size_t dist_size;
+ uint64_t redist_start;
+ size_t redist_size;
+};
+
+struct vm_irq {
+ uint32_t irq;
+};
+
+struct vm_cpu_topology {
+ uint16_t sockets;
+ uint16_t cores;
+ uint16_t threads;
+ uint16_t maxcpus;
+};
+
+enum {
+ /* general routines */
+ IOCNUM_ABIVERS = 0,
+ IOCNUM_RUN = 1,
+ IOCNUM_SET_CAPABILITY = 2,
+ IOCNUM_GET_CAPABILITY = 3,
+ IOCNUM_SUSPEND = 4,
+ IOCNUM_REINIT = 5,
+
+ /* memory apis */
+ IOCNUM_GET_GPA_PMAP = 12,
+ IOCNUM_GLA2GPA = 13,
+ IOCNUM_ALLOC_MEMSEG = 14,
+ IOCNUM_GET_MEMSEG = 15,
+ IOCNUM_MMAP_MEMSEG = 16,
+ IOCNUM_MMAP_GETNEXT = 17,
+
+ /* register/state accessors */
+ IOCNUM_SET_REGISTER = 20,
+ IOCNUM_GET_REGISTER = 21,
+ IOCNUM_SET_REGISTER_SET = 24,
+ IOCNUM_GET_REGISTER_SET = 25,
+
+ /* statistics */
+ IOCNUM_VM_STATS = 50,
+ IOCNUM_VM_STAT_DESC = 51,
+
+ /* CPU Topology */
+ IOCNUM_SET_TOPOLOGY = 63,
+ IOCNUM_GET_TOPOLOGY = 64,
+
+ /* interrupt injection */
+ IOCNUM_ASSERT_IRQ = 80,
+ IOCNUM_DEASSERT_IRQ = 81,
+ IOCNUM_RAISE_MSI = 82,
+
+ /* vm_cpuset */
+ IOCNUM_ACTIVATE_CPU = 90,
+ IOCNUM_GET_CPUSET = 91,
+ IOCNUM_SUSPEND_CPU = 92,
+ IOCNUM_RESUME_CPU = 93,
+
+ /* vm_attach_vgic */
+ IOCNUM_ATTACH_VGIC = 110,
+};
+
+#define VM_RUN \
+ _IOWR('v', IOCNUM_RUN, struct vm_run)
+#define VM_SUSPEND \
+ _IOW('v', IOCNUM_SUSPEND, struct vm_suspend)
+#define VM_REINIT \
+ _IO('v', IOCNUM_REINIT)
+#define VM_ALLOC_MEMSEG \
+ _IOW('v', IOCNUM_ALLOC_MEMSEG, struct vm_memseg)
+#define VM_GET_MEMSEG \
+ _IOWR('v', IOCNUM_GET_MEMSEG, struct vm_memseg)
+#define VM_MMAP_MEMSEG \
+ _IOW('v', IOCNUM_MMAP_MEMSEG, struct vm_memmap)
+#define VM_MMAP_GETNEXT \
+ _IOWR('v', IOCNUM_MMAP_GETNEXT, struct vm_memmap)
+#define VM_SET_REGISTER \
+ _IOW('v', IOCNUM_SET_REGISTER, struct vm_register)
+#define VM_GET_REGISTER \
+ _IOWR('v', IOCNUM_GET_REGISTER, struct vm_register)
+#define VM_SET_REGISTER_SET \
+ _IOW('v', IOCNUM_SET_REGISTER_SET, struct vm_register_set)
+#define VM_GET_REGISTER_SET \
+ _IOWR('v', IOCNUM_GET_REGISTER_SET, struct vm_register_set)
+#define VM_SET_CAPABILITY \
+ _IOW('v', IOCNUM_SET_CAPABILITY, struct vm_capability)
+#define VM_GET_CAPABILITY \
+ _IOWR('v', IOCNUM_GET_CAPABILITY, struct vm_capability)
+#define VM_STATS \
+ _IOWR('v', IOCNUM_VM_STATS, struct vm_stats)
+#define VM_STAT_DESC \
+ _IOWR('v', IOCNUM_VM_STAT_DESC, struct vm_stat_desc)
+#define VM_ASSERT_IRQ \
+ _IOW('v', IOCNUM_ASSERT_IRQ, struct vm_irq)
+#define VM_DEASSERT_IRQ \
+ _IOW('v', IOCNUM_DEASSERT_IRQ, struct vm_irq)
+#define VM_RAISE_MSI \
+ _IOW('v', IOCNUM_RAISE_MSI, struct vm_msi)
+#define VM_SET_TOPOLOGY \
+ _IOW('v', IOCNUM_SET_TOPOLOGY, struct vm_cpu_topology)
+#define VM_GET_TOPOLOGY \
+ _IOR('v', IOCNUM_GET_TOPOLOGY, struct vm_cpu_topology)
+#define VM_GLA2GPA \
+ _IOWR('v', IOCNUM_GLA2GPA, struct vm_gla2gpa)
+#define VM_ACTIVATE_CPU \
+ _IOW('v', IOCNUM_ACTIVATE_CPU, struct vm_activate_cpu)
+#define VM_GET_CPUS \
+ _IOW('v', IOCNUM_GET_CPUSET, struct vm_cpuset)
+#define VM_SUSPEND_CPU \
+ _IOW('v', IOCNUM_SUSPEND_CPU, struct vm_activate_cpu)
+#define VM_RESUME_CPU \
+ _IOW('v', IOCNUM_RESUME_CPU, struct vm_activate_cpu)
+#define VM_ATTACH_VGIC \
+ _IOW('v', IOCNUM_ATTACH_VGIC, struct vm_attach_vgic)
+#endif
diff --git a/sys/arm64/include/vmm_instruction_emul.h b/sys/arm64/include/vmm_instruction_emul.h
new file mode 100644
--- /dev/null
+++ b/sys/arm64/include/vmm_instruction_emul.h
@@ -0,0 +1,79 @@
+/*
+ * Copyright (C) 2015 Mihai Carabas <mihai.carabas@gmail.com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef _VMM_INSTRUCTION_EMUL_H_
+#define _VMM_INSTRUCTION_EMUL_H_
+
+/*
+ * Callback functions to read and write memory regions.
+ */
+typedef int (*mem_region_read_t)(void *vm, int cpuid, uint64_t gpa,
+ uint64_t *rval, int rsize, void *arg);
+typedef int (*mem_region_write_t)(void *vm, int cpuid, uint64_t gpa,
+ uint64_t wval, int wsize, void *arg);
+
+/*
+ * Callback functions to read and write registers.
+ */
+typedef int (*reg_read_t)(void *vm, int cpuid, uint64_t *rval, void *arg);
+typedef int (*reg_write_t)(void *vm, int cpuid, uint64_t wval, void *arg);
+
+/*
+ * Emulate the decoded 'vie' instruction when it contains a memory operation.
+ *
+ * The callbacks 'mrr' and 'mrw' emulate reads and writes to the memory region
+ * containing 'gpa'. 'mrarg' is an opaque argument that is passed into the
+ * callback functions.
+ *
+ * 'void *vm' should be 'struct vm *' when called from kernel context and
+ * 'struct vmctx *' when called from user context.
+ *
+ */
+int vmm_emulate_instruction(void *vm, int cpuid, uint64_t gpa, struct vie *vie,
+ struct vm_guest_paging *paging, mem_region_read_t mrr,
+ mem_region_write_t mrw, void *mrarg);
+
+/*
+ * Emulate the decoded 'vre' instruction when it contains a register access.
+ *
+ * The callbacks 'regread' and 'regwrite' emulate reads and writes to the
+ * register from 'vie'. 'regarg' is an opaque argument that is passed into the
+ * callback functions.
+ *
+ * 'void *vm' should be 'struct vm *' when called from kernel context and
+ * 'struct vmctx *' when called from user context.
+ *
+ */
+int vmm_emulate_register(void *vm, int vcpuid, struct vre *vre, reg_read_t regread,
+ reg_write_t regwrite, void *regarg);
+
+#ifdef _KERNEL
+void vm_register_inst_handler(struct vm *vm, uint64_t start, uint64_t size,
+ mem_region_read_t mmio_read, mem_region_write_t mmio_write);
+void vm_deregister_inst_handler(struct vm *vm, uint64_t start, uint64_t size);
+#endif
+
+#endif /* _VMM_INSTRUCTION_EMUL_H_ */
diff --git a/sys/arm64/include/vmm_snapshot.h b/sys/arm64/include/vmm_snapshot.h
new file mode 100644
--- /dev/null
+++ b/sys/arm64/include/vmm_snapshot.h
@@ -0,0 +1 @@
+/* $FreeBSD$ */
diff --git a/sys/arm64/vmm/arm64.h b/sys/arm64/vmm/arm64.h
new file mode 100644
--- /dev/null
+++ b/sys/arm64/vmm/arm64.h
@@ -0,0 +1,153 @@
+/*
+ * Copyright (C) 2015 Mihai Carabas <mihai.carabas@gmail.com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+#ifndef _VMM_ARM64_H_
+#define _VMM_ARM64_H_
+
+#include <machine/reg.h>
+#include <machine/vfp.h>
+#include <machine/hypervisor.h>
+#include <machine/pcpu.h>
+
+#include "mmu.h"
+#include "io/vgic_v3.h"
+#include "io/vtimer.h"
+
+struct hypctx {
+ struct trapframe tf;
+
+ /*
+ * EL1 control registers.
+ * Be careful changing the layout of these as we access them from
+ * assembly when switching between the host and guest.
+ */
+ uint64_t elr_el1; /* Exception Link Register */
+ uint64_t sp_el0; /* Stack pointer */
+ uint64_t tpidr_el0; /* EL0 Software ID Register */
+ uint64_t tpidrro_el0; /* Read-only Thread ID Register */
+ uint64_t tpidr_el1; /* EL1 Software ID Register */
+ uint64_t vbar_el1; /* Vector Base Address Register */
+
+ uint64_t actlr_el1; /* Auxiliary Control Register */
+ uint64_t afsr0_el1; /* Auxiliary Fault Status Register 0 */
+ uint64_t afsr1_el1; /* Auxiliary Fault Status Register 1 */
+ uint64_t amair_el1; /* Auxiliary Memory Attribute Indirection Register */
+ uint64_t contextidr_el1; /* Current Process Identifier */
+ uint64_t cpacr_el1; /* Architectural Feature Access Control Register */
+ uint64_t csselr_el1; /* Cache Size Selection Register */
+ uint64_t esr_el1; /* Exception Syndrome Register */
+ uint64_t far_el1; /* Fault Address Register */
+ uint64_t mair_el1; /* Memory Attribute Indirection Register */
+ uint64_t mdccint_el1; /* Monitor DCC Interrupt Enable Register */
+ uint64_t mdscr_el1; /* Monitor Debug System Control Register */
+ uint64_t par_el1; /* Physical Address Register */
+ uint64_t sctlr_el1; /* System Control Register */
+ uint64_t tcr_el1; /* Translation Control Register */
+ uint64_t ttbr0_el1; /* Translation Table Base Register 0 */
+ uint64_t ttbr1_el1; /* Translation Table Base Register 1 */
+ uint64_t spsr_el1; /* Saved Program Status Register */
+
+ uint64_t pmcr_el0; /* Performance Monitors Control Register */
+ uint64_t pmccntr_el0;
+ uint64_t pmccfiltr_el0;
+ uint64_t pmcntenset_el0;
+ uint64_t pmintenset_el1;
+ uint64_t pmovsset_el0;
+ uint64_t pmselr_el0;
+ uint64_t pmuserenr_el0;
+ uint64_t pmevcntr_el0[31];
+ uint64_t pmevtyper_el0[31];
+
+ uint64_t dbgbcr_el1[16]; /* Debug Breakpoint Control Registers */
+ uint64_t dbgbvr_el1[16]; /* Debug Breakpoint Value Registers */
+ uint64_t dbgwcr_el1[16]; /* Debug Watchpoint Control Registers */
+ uint64_t dbgwvr_el1[16]; /* Debug Watchpoint Value Registers */
+
+ /* EL2 control registers */
+ uint64_t cptr_el2; /* Architectural Feature Trap Register */
+ uint64_t hcr_el2; /* Hypervisor Configuration Register */
+ uint64_t mdcr_el2; /* Monitor Debug Configuration Register */
+ uint64_t vpidr_el2; /* Virtualization Processor ID Register */
+ uint64_t vmpidr_el2; /* Virtualization Multiprocessor ID Register */
+ uint32_t vcpu;
+ struct hyp *hyp;
+ struct {
+ uint64_t far_el2; /* Fault Address Register */
+ uint64_t hpfar_el2; /* Hypervisor IPA Fault Address Register */
+ } exit_info;
+
+ struct vtimer_cpu vtimer_cpu;
+ struct vgic_v3_cpu_if vgic_cpu_if;
+ struct vgic_v3_redist vgic_redist;
+#ifdef VFP
+ struct vfpstate vfpstate;
+#endif
+};
+
+struct hyp {
+ struct hypctx ctx[VM_MAXCPU];
+ struct vgic_v3_dist vgic_dist;
+ struct vm *vm;
+ struct vtimer vtimer;
+ uint64_t vmid_generation;
+ uint64_t vttbr_el2;
+ uint64_t el2_addr; /* The address of this in el2 space */
+ bool vgic_attached;
+};
+
+uint64_t vmm_call_hyp(uint64_t, ...);
+void vmm_cleanup(void *hyp_stub_vectors);
+uint64_t vmm_enter_guest(struct hypctx *hypctx);
+uint64_t vmm_read_ich_vtr_el2(void);
+uint64_t vmm_read_cnthctl_el2(void);
+uint64_t vmm_read_tcr_el2(void);
+
+#define eprintf(fmt, ...) printf("%s:%d " fmt, __func__, __LINE__, ##__VA_ARGS__)
+//#define eprintf(fmt, ...) do {} while(0)
+
+#define VMID_GENERATION_MASK ((1UL<<8) - 1)
+#define build_vttbr(vmid, ptaddr) \
+ ((((vmid) & VMID_GENERATION_MASK) << VTTBR_VMID_SHIFT) | \
+ (uint64_t)(ptaddr))
+
+#define MPIDR_SMP_MASK (0x3 << 30)
+#define MPIDR_AFF1_LEVEL(x) (((x) >> 2) << 8)
+#define MPIDR_AFF0_LEVEL(x) (((x) & 0x3) << 0)
+
+/*
+ * Return true if the exception was caused by a translation fault in the stage 2
+ * translation regime. The DFSC encoding for a translation fault has the format
+ * 0b0001LL, where LL (bits [1:0]) represents the level where the fault occured
+ * (page D7-2280 of the ARMv8 Architecture Manual).
+ */
+#define ISS_DATA_DFSC_TF(esr_iss) \
+ (!((esr_iss) & 0b111000) && ((esr_iss) & 0b000100))
+#define FAR_EL2_PAGE_OFFSET(x) ((x) & PAGE_MASK)
+
+#define DEBUG_ME 0
+
+#define arm64_get_active_vcpu() ((struct hypctx *)PCPU_GET(vcpu))
+
+#endif /* !_VMM_ARM64_H_ */
diff --git a/sys/arm64/vmm/hyp.h b/sys/arm64/vmm/hyp.h
new file mode 100644
--- /dev/null
+++ b/sys/arm64/vmm/hyp.h
@@ -0,0 +1,113 @@
+/*
+ * Copyright (C) 2017 Alexandru Elisei <alexandru.elisei@gmail.com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef _VMM_HYP_H_
+#define _VMM_HYP_H_
+
+/*
+ * The translation tables for the hypervisor mode will hold mappings for kernel
+ * virtual addresses and an identity mapping (VA == PA) necessary when
+ * enabling/disabling the MMU.
+ *
+ * When in EL2 exception level the translation table base register is TTBR0_EL2
+ * and the virtual addresses generated by the CPU must be at the bottom of the
+ * memory, with the first 16 bits all set to zero:
+ *
+ * 0x0000ffffffffffff End hyp address space
+ * 0x0000000000000000 Start of hyp address space
+ *
+ * To run code in hyp mode we need to convert kernel virtual addresses to
+ * addresses that fit into this address space.
+ *
+ * The kernel virtual address range is:
+ *
+ * 0xffff007fffffffff End of KVA
+ * 0xffff000000000000 Kernel base address & start of KVA
+ *
+ * (see /sys/arm64/include/vmparam.h).
+ *
+ * We could convert the kernel virtual addresses to valid EL2 addresses by
+ * setting the first 16 bits to zero and thus mapping the kernel addresses in
+ * the bottom half of the EL2 address space, but then they might clash with the
+ * identity mapping addresses. Instead we map the kernel addresses in the upper
+ * half of the EL2 address space.
+ *
+ * The hypervisor address space will look like this:
+ *
+ * 0x0000807fffffffff End of KVA mapping
+ * 0x0000800000000000 Start of KVA mapping
+ *
+ * 0x00007fffffffffff End of identity mapping
+ * 0x0000000000000000 Start of identity mapping
+ *
+ * With the scheme we have 47 bits at our disposable for the identity map and
+ * another 47 bits for the kernel virtual addresses. For a maximum physical
+ * memory size of 128TB we are guaranteed to not have any clashes between
+ * addresses.
+ */
+#define HYP_VM_MIN_ADDRESS 0x0000000000000000
+#define HYP_VM_MAX_ADDRESS 0x0001000000000000
+
+/*
+ * When the vmm code is installed the following handles can be used by
+ * the host to call into EL2.
+ */
+#define HYP_CLEANUP 0x00000001
+#define HYP_ENTER_GUEST 0x00000002
+#define HYP_READ_REGISTER 0x00000003
+#define HYP_REG_ICH_VTR 0x1
+#define HYP_REG_CNTHCTL 0x2
+#define HYP_CLEAN_S2_TLBI 0x00000004
+#define HYP_DC_CIVAC 0x00000005
+#define HYP_EL2_TLBI 0x00000006
+#define HYP_EL2_TLBI_ALL 0x1
+#define HYP_EL2_TLBI_VA 0x2
+#define HYP_S2_TLBI_RANGE 0x00000010
+#define HYP_S2_TLBI_ALL 0x00000011
+
+/*
+ * When taking asynchronous exceptions, or interrupts, with the exception of the
+ * SError interrupt, the exception syndrome register is not updated with the
+ * exception code. We need to differentiate between the different exception
+ * types taken to EL2.
+ */
+#define EXCP_TYPE_EL1_SYNC 0
+#define EXCP_TYPE_EL1_IRQ 1
+#define EXCP_TYPE_EL1_FIQ 2
+#define EXCP_TYPE_EL1_ERROR 3
+
+#define EXCP_TYPE_EL2_SYNC 4
+#define EXCP_TYPE_EL2_IRQ 5
+#define EXCP_TYPE_EL2_FIQ 6
+#define EXCP_TYPE_EL2_ERROR 7
+
+#define EXCP_TYPE_MAINT_IRQ 8
+/* Used internally in vmm_hyp.c */
+#define EXCP_TYPE_REENTER 9
+
+#define HYP_GET_VECTOR_TABLE -1
+
+#endif /* !_VMM_HYP_H_ */
diff --git a/sys/arm64/vmm/io/vgic_v3.h b/sys/arm64/vmm/io/vgic_v3.h
new file mode 100644
--- /dev/null
+++ b/sys/arm64/vmm/io/vgic_v3.h
@@ -0,0 +1,173 @@
+/*
+ * Copyright (C) 2015 Mihai Carabas <mihai.carabas@gmail.com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef _VMM_VGIC_V3_H_
+#define _VMM_VGIC_V3_H_
+
+#include <sys/cdefs.h>
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/module.h>
+#include <sys/bus.h>
+
+#include <machine/vmm_instruction_emul.h>
+
+#include <arm64/arm64/gic_v3_reg.h>
+#include <arm/arm/gic_common.h>
+
+struct hypctx;
+
+int vgic_v3_icc_sgi1r_read(void *vm, int vcpuid, uint64_t *rval, void *arg);
+int vgic_v3_icc_sgi1r_write(void *vm, int vcpuid, uint64_t rval, void *arg);
+
+#define VGIC_SGI_NUM (GIC_LAST_SGI - GIC_FIRST_SGI + 1)
+#define VGIC_PPI_NUM (GIC_LAST_PPI - GIC_FIRST_PPI + 1)
+#define VGIC_SPI_NUM (GIC_LAST_SPI - GIC_FIRST_SPI + 1)
+#define VGIC_PRV_I_NUM (VGIC_SGI_NUM + VGIC_PPI_NUM)
+#define VGIC_SHR_I_NUM (VGIC_SPI_NUM)
+
+#define VGIC_ICH_LR_NUM_MAX 16
+#define VGIC_ICH_APR_NUM_MAX 4
+
+struct vgic_v3_irq {
+ /* List of IRQs that are active or pending */
+ TAILQ_ENTRY(vgic_v3_irq) act_pend_list;
+ struct mtx irq_spinmtx;
+ uint64_t mpidr;
+ int target_vcpu;
+ uint32_t irq;
+ bool active;
+ bool pending;
+ bool enabled;
+ bool level;
+ bool on_aplist;
+ uint8_t priority;
+ uint8_t config;
+#define VGIC_CONFIG_MASK 0x2
+#define VGIC_CONFIG_LEVEL 0x0
+#define VGIC_CONFIG_EDGE 0x2
+};
+
+struct vgic_v3_lpi {
+ struct vgic_v3_irq irq;
+ SLIST_ENTRY(vgic_v3_lpi) next;
+};
+
+struct vgic_mmio_region {
+ vm_offset_t start;
+ vm_offset_t end;
+ mem_region_read_t read;
+ mem_region_write_t write;
+};
+
+struct vm;
+struct vm_exit;
+struct hyp;
+
+struct vgic_v3_dist {
+ struct mtx dist_mtx;
+
+ uint64_t start;
+ size_t end;
+
+ uint32_t gicd_ctlr; /* Distributor Control Register */
+
+ struct vgic_v3_irq *irqs;
+ SLIST_HEAD(, vgic_v3_lpi) lpis;
+};
+
+#define aff_routing_en(distp) (distp->gicd_ctlr & GICD_CTLR_ARE_NS)
+
+struct vgic_v3_redist {
+ uint64_t start;
+ uint64_t end;
+
+ uint64_t gicr_typer; /* Redistributor Type Register */
+};
+
+struct vgic_v3_irq;
+
+struct vgic_v3_cpu_if {
+ uint32_t ich_eisr_el2; /* End of Interrupt Status Register */
+ uint32_t ich_elrsr_el2; /* Empty List register Status Register (ICH_ELRSR_EL2) */
+ uint32_t ich_hcr_el2; /* Hyp Control Register */
+ uint32_t ich_misr_el2; /* Maintenance Interrupt State Register */
+ uint32_t ich_vmcr_el2; /* Virtual Machine Control Register */
+
+ /*
+ * The List Registers are part of the VM context and are modified on a
+ * world switch. They need to be allocated statically so they are
+ * mapped in the EL2 translation tables when struct hypctx is mapped.
+ */
+ uint64_t ich_lr_el2[VGIC_ICH_LR_NUM_MAX];
+ size_t ich_lr_num;
+
+ /*
+ * We need a mutex for accessing the list registers because they are
+ * modified asynchronously by the virtual timer.
+ *
+ * Note that the mutex *MUST* be a spin mutex because an interrupt can
+ * be injected by a callout callback function, thereby modifying the
+ * list registers from a context where sleeping is forbidden.
+ */
+ struct mtx lr_mtx;
+
+ /* Active Priorities Registers for Group 0 and 1 interrupts */
+ size_t ich_apr_num;
+ uint32_t ich_ap0r_el2[VGIC_ICH_APR_NUM_MAX];
+ uint32_t ich_ap1r_el2[VGIC_ICH_APR_NUM_MAX];
+
+ struct vgic_v3_irq private_irqs[VGIC_PRV_I_NUM];
+ TAILQ_HEAD(, vgic_v3_irq) irq_act_pend;
+ u_int ich_lr_used;
+};
+
+int vgic_v3_attach_to_vm(struct vm *vm, uint64_t dist_start,
+ size_t dist_size, uint64_t redist_start, size_t redist_size);
+void vgic_v3_detach_from_vm(struct vm *vm);
+
+bool vgic_present(void);
+void vgic_v3_init(uint64_t ich_vtr_el2);
+void vgic_v3_vminit(struct hyp *);
+void vgic_v3_cpuinit(struct hypctx *, bool last_vcpu);
+void vgic_v3_cpucleanup(struct hypctx *);
+void vgic_v3_vmcleanup(struct hyp *);
+void vgic_v3_flush_hwstate(void *arg);
+void vgic_v3_sync_hwstate(void *arg);
+
+bool vgic_v3_vcpu_pending_irq(struct hypctx *hypctx);
+int vgic_v3_inject_irq(struct hyp *hyp, int vcpuid, uint32_t irqid,
+ bool level);
+int vgic_v3_inject_msi(struct hyp *hyp, uint64_t msg, uint64_t addr);
+
+void vgic_v3_group_toggle_enabled(bool enabled, struct hyp *hyp);
+int vgic_v3_irq_toggle_enabled(uint32_t irq, bool enabled,
+ struct hyp *hyp, int vcpuid);
+
+DECLARE_CLASS(arm_vgic_driver);
+
+#endif /* !_VMM_VGIC_V3_H_ */
diff --git a/sys/arm64/vmm/io/vgic_v3.c b/sys/arm64/vmm/io/vgic_v3.c
new file mode 100644
--- /dev/null
+++ b/sys/arm64/vmm/io/vgic_v3.c
@@ -0,0 +1,2033 @@
+/*
+ * Copyright (C) 2018 Alexandru Elisei <alexandru.elisei@gmail.com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+
+#include <sys/types.h>
+#include <sys/errno.h>
+#include <sys/systm.h>
+#include <sys/bus.h>
+#include <sys/rman.h>
+#include <sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/mutex.h>
+#include <sys/smp.h>
+#include <sys/bitstring.h>
+
+#include <vm/vm.h>
+#include <vm/pmap.h>
+
+#include <dev/ofw/openfirm.h>
+
+#include <machine/atomic.h>
+#include <machine/bus.h>
+#include <machine/cpufunc.h>
+#include <machine/cpu.h>
+#include <machine/machdep.h>
+#include <machine/param.h>
+#include <machine/pmap.h>
+#include <machine/vmparam.h>
+#include <machine/intr.h>
+#include <machine/vmm.h>
+#include <machine/vmm_instruction_emul.h>
+
+#include <arm/arm/gic_common.h>
+#include <arm64/arm64/gic_v3_reg.h>
+#include <arm64/arm64/gic_v3_var.h>
+
+#include <arm64/vmm/hyp.h>
+#include <arm64/vmm/mmu.h>
+#include <arm64/vmm/arm64.h>
+
+#include "vgic_v3.h"
+#include "vgic_v3_reg.h"
+
+MALLOC_DEFINE(M_VGIC_V3, "ARM VMM VGIC V3", "ARM VMM VGIC V3");
+
+static bool have_vgic = false;
+
+struct vgic_v3_virt_features {
+ uint8_t min_prio;
+ size_t ich_lr_num;
+ size_t ich_apr_num;
+};
+
+/* How many IRQs we support (SGIs + PPIs + SPIs). Not including LPIs */
+#define VGIC_NIRQS 1023
+/* Pretend to be an Arm design */
+#define VGIC_IIDR 0x43b
+
+typedef void (register_read)(struct hyp *, int, u_int, uint64_t *, void *);
+typedef void (register_write)(struct hyp *, int, u_int, u_int, u_int, uint64_t,
+ void *);
+
+#define VGIC_8_BIT (1 << 0)
+/* (1 << 1) is reserved for 16 bit accesses */
+#define VGIC_32_BIT (1 << 2)
+#define VGIC_64_BIT (1 << 3)
+
+struct vgic_register {
+ u_int start; /* Start within a memory region */
+ u_int end;
+ u_int size;
+ u_int flags;
+ register_read *read;
+ register_write *write;
+};
+
+#define VGIC_REGISTER_RANGE(reg_start, reg_end, reg_size, reg_flags, readf, \
+ writef) \
+{ \
+ .start = (reg_start), \
+ .end = (reg_end), \
+ .size = (reg_size), \
+ .flags = (reg_flags), \
+ .read = (readf), \
+ .write = (writef), \
+}
+
+#define VGIC_REGISTER_RANGE_RAZ_WI(reg_start, reg_end, reg_size, reg_flags) \
+ VGIC_REGISTER_RANGE(reg_start, reg_end, reg_size, reg_flags, \
+ gic_zero_read, gic_ignore_write)
+
+#define VGIC_REGISTER(start_addr, reg_size, reg_flags, readf, writef) \
+ VGIC_REGISTER_RANGE(start_addr, (start_addr) + (reg_size), \
+ reg_size, reg_flags, readf, writef)
+
+#define VGIC_REGISTER_RAZ_WI(start_addr, reg_size, reg_flags) \
+ VGIC_REGISTER_RANGE_RAZ_WI(start_addr, \
+ (start_addr) + (reg_size), reg_size, reg_flags)
+
+static register_read gic_pidr2_read;
+static register_read gic_zero_read;
+static register_write gic_ignore_write;
+
+/* GICD_CTLR */
+static register_read dist_ctlr_read;
+static register_write dist_ctlr_write;
+/* GICD_TYPER */
+static register_read dist_typer_read;
+/* GICD_IIDR */
+static register_read dist_iidr_read;
+/* GICD_STATUSR - RAZ/WI as we don't report errors (yet) */
+/* GICD_SETSPI_NSR & GICD_CLRSPI_NSR */
+static register_write dist_setclrspi_nsr_write;
+/* GICD_SETSPI_SR - RAZ/WI */
+/* GICD_CLRSPI_SR - RAZ/WI */
+/* GICD_IGROUPR - RAZ/WI as GICD_CTLR.ARE == 1 */
+/* GICD_ISENABLER */
+static register_read dist_isenabler_read;
+static register_write dist_isenabler_write;
+/* GICD_ICENABLER */
+static register_read dist_icenabler_read;
+static register_write dist_icenabler_write;
+/* GICD_ISPENDR */
+static register_read dist_ispendr_read;
+static register_write dist_ispendr_write;
+/* GICD_ICPENDR */
+static register_read dist_icpendr_read;
+static register_write dist_icpendr_write;
+/* GICD_ISACTIVER */
+static register_read dist_isactiver_read;
+static register_write dist_isactiver_write;
+/* GICD_ICACTIVER */
+static register_read dist_icactiver_read;
+static register_write dist_icactiver_write;
+/* GICD_IPRIORITYR */
+static register_read dist_ipriorityr_read;
+static register_write dist_ipriorityr_write;
+/* GICD_ITARGETSR - RAZ/WI as GICD_CTLR.ARE == 1 */
+/* GICD_ICFGR */
+static register_read dist_icfgr_read;
+static register_write dist_icfgr_write;
+/* GICD_IGRPMODR - RAZ/WI from non-secure mode */
+/* GICD_NSACR - RAZ/WI from non-secure mode */
+/* GICD_SGIR - RAZ/WI as GICD_CTLR.ARE == 1 */
+/* GICD_CPENDSGIR - RAZ/WI as GICD_CTLR.ARE == 1 */
+/* GICD_SPENDSGIR - RAZ/WI as GICD_CTLR.ARE == 1 */
+/* GICD_IROUTER */
+static register_read dist_irouter_read;
+static register_write dist_irouter_write;
+
+static struct vgic_register dist_registers[] = {
+ VGIC_REGISTER(GICD_CTLR, 4, VGIC_32_BIT, dist_ctlr_read,
+ dist_ctlr_write),
+ VGIC_REGISTER(GICD_TYPER, 4, VGIC_32_BIT, dist_typer_read,
+ gic_ignore_write),
+ VGIC_REGISTER(GICD_IIDR, 4, VGIC_32_BIT, dist_iidr_read,
+ gic_ignore_write),
+ VGIC_REGISTER_RAZ_WI(GICD_STATUSR, 4, VGIC_32_BIT),
+ VGIC_REGISTER(GICD_SETSPI_NSR, 4, VGIC_32_BIT, gic_zero_read,
+ dist_setclrspi_nsr_write),
+ VGIC_REGISTER(GICD_CLRSPI_NSR, 4, VGIC_32_BIT, gic_zero_read,
+ dist_setclrspi_nsr_write),
+ VGIC_REGISTER_RAZ_WI(GICD_SETSPI_SR, 4, VGIC_32_BIT),
+ VGIC_REGISTER_RAZ_WI(GICD_CLRSPI_SR, 4, VGIC_32_BIT),
+ VGIC_REGISTER_RANGE_RAZ_WI(GICD_IGROUPR(0), GICD_IGROUPR(1024), 4,
+ VGIC_32_BIT),
+
+ VGIC_REGISTER_RAZ_WI(GICD_ISENABLER(0), 4, VGIC_32_BIT),
+ VGIC_REGISTER_RANGE(GICD_ISENABLER(32), GICD_ISENABLER(1024), 4,
+ VGIC_32_BIT, dist_isenabler_read, dist_isenabler_write),
+
+ VGIC_REGISTER_RAZ_WI(GICD_ICENABLER(0), 4, VGIC_32_BIT),
+ VGIC_REGISTER_RANGE(GICD_ICENABLER(32), GICD_ICENABLER(1024), 4,
+ VGIC_32_BIT, dist_icenabler_read, dist_icenabler_write),
+
+ VGIC_REGISTER_RAZ_WI(GICD_ISPENDR(0), 4, VGIC_32_BIT),
+ VGIC_REGISTER_RANGE(GICD_ISPENDR(32), GICD_ISPENDR(1024), 4,
+ VGIC_32_BIT, dist_ispendr_read, dist_ispendr_write),
+
+ VGIC_REGISTER_RAZ_WI(GICD_ICPENDR(0), 4, VGIC_32_BIT),
+ VGIC_REGISTER_RANGE(GICD_ICPENDR(32), GICD_ICPENDR(1024), 4,
+ VGIC_32_BIT, dist_icpendr_read, dist_icpendr_write),
+
+ VGIC_REGISTER_RAZ_WI(GICD_ISACTIVER(0), 4, VGIC_32_BIT),
+ VGIC_REGISTER_RANGE(GICD_ISACTIVER(32), GICD_ISACTIVER(1024), 4,
+ VGIC_32_BIT, dist_isactiver_read, dist_isactiver_write),
+
+ VGIC_REGISTER_RAZ_WI(GICD_ICACTIVER(0), 4, VGIC_32_BIT),
+ VGIC_REGISTER_RANGE(GICD_ICACTIVER(32), GICD_ICACTIVER(1024), 4,
+ VGIC_32_BIT, dist_icactiver_read, dist_icactiver_write),
+
+ VGIC_REGISTER_RANGE_RAZ_WI(GICD_IPRIORITYR(0), GICD_IPRIORITYR(32), 4,
+ VGIC_32_BIT | VGIC_8_BIT),
+ VGIC_REGISTER_RANGE(GICD_IPRIORITYR(32), GICD_IPRIORITYR(1024), 4,
+ VGIC_32_BIT | VGIC_8_BIT, dist_ipriorityr_read,
+ dist_ipriorityr_write),
+
+ VGIC_REGISTER_RANGE_RAZ_WI(GICD_ITARGETSR(0), GICD_ITARGETSR(1024), 4,
+ VGIC_32_BIT | VGIC_8_BIT),
+
+ VGIC_REGISTER_RANGE_RAZ_WI(GICD_ICFGR(0), GICD_ICFGR(32), 4,
+ VGIC_32_BIT),
+ VGIC_REGISTER_RANGE(GICD_ICFGR(32), GICD_ICFGR(1024), 4,
+ VGIC_32_BIT, dist_icfgr_read, dist_icfgr_write),
+/*
+ VGIC_REGISTER_RANGE(GICD_IGRPMODR(0), GICD_IGRPMODR(1024), 4,
+ VGIC_32_BIT, dist_igrpmodr_read, dist_igrpmodr_write),
+ VGIC_REGISTER_RANGE(GICD_NSACR(0), GICD_NSACR(1024), 4,
+ VGIC_32_BIT, dist_nsacr_read, dist_nsacr_write),
+*/
+ VGIC_REGISTER_RAZ_WI(GICD_SGIR, 4, VGIC_32_BIT),
+/*
+ VGIC_REGISTER_RANGE(GICD_CPENDSGIR(0), GICD_CPENDSGIR(1024), 4,
+ VGIC_32_BIT | VGIC_8_BIT, dist_cpendsgir_read,
+ dist_cpendsgir_write),
+ VGIC_REGISTER_RANGE(GICD_SPENDSGIR(0), GICD_SPENDSGIR(1024), 4,
+ VGIC_32_BIT | VGIC_8_BIT, dist_spendsgir_read,
+ dist_spendsgir_write),
+*/
+ VGIC_REGISTER_RANGE(GICD_IROUTER(32), GICD_IROUTER(1024), 8,
+ VGIC_64_BIT | VGIC_32_BIT, dist_irouter_read, dist_irouter_write),
+
+ VGIC_REGISTER_RANGE_RAZ_WI(GICD_PIDR4, GICD_PIDR2, 4, VGIC_32_BIT),
+ VGIC_REGISTER(GICD_PIDR2, 4, VGIC_32_BIT, gic_pidr2_read,
+ gic_ignore_write),
+ VGIC_REGISTER_RANGE_RAZ_WI(GICD_PIDR2 + 4, GICD_SIZE, 4, VGIC_32_BIT),
+};
+
+/* GICR_CTLR - Ignore writes as no bits can be set */
+static register_read redist_ctlr_read;
+/* GICR_IIDR */
+static register_read redist_iidr_read;
+/* GICR_TYPER */
+static register_read redist_typer_read;
+/* GICR_STATUSR - RAZ/WI as we don't report errors (yet) */
+/* GICR_WAKER - RAZ/WI from non-secure mode */
+/* GICR_SETLPIR - RAZ/WI as no LPIs are supported */
+/* GICR_CLRLPIR - RAZ/WI as no LPIs are supported */
+/* GICR_PROPBASER - RAZ/WI as no LPIs are supported */
+/* GICR_PENDBASER - RAZ/WI as no LPIs are supported */
+/* GICR_INVLPIR - RAZ/WI as no LPIs are supported */
+/* GICR_INVALLR - RAZ/WI as no LPIs are supported */
+/* GICR_SYNCR - RAZ/WI as no LPIs are supported */
+
+static struct vgic_register redist_rd_registers[] = {
+ VGIC_REGISTER(GICR_CTLR, 4, VGIC_32_BIT, redist_ctlr_read,
+ gic_ignore_write),
+ VGIC_REGISTER(GICR_IIDR, 4, VGIC_32_BIT, redist_iidr_read,
+ gic_ignore_write),
+ VGIC_REGISTER(GICR_TYPER, 8, VGIC_64_BIT | VGIC_32_BIT,
+ redist_typer_read, gic_ignore_write),
+ VGIC_REGISTER_RAZ_WI(GICR_STATUSR, 4, VGIC_32_BIT),
+ VGIC_REGISTER_RAZ_WI(GICR_WAKER, 4, VGIC_32_BIT),
+ VGIC_REGISTER_RAZ_WI(GICR_SETLPIR, 8, VGIC_64_BIT | VGIC_32_BIT),
+ VGIC_REGISTER_RAZ_WI(GICR_CLRLPIR, 8, VGIC_64_BIT | VGIC_32_BIT),
+ VGIC_REGISTER_RAZ_WI(GICR_PROPBASER, 8, VGIC_64_BIT | VGIC_32_BIT),
+ VGIC_REGISTER_RAZ_WI(GICR_PENDBASER, 8, VGIC_64_BIT | VGIC_32_BIT),
+ VGIC_REGISTER_RAZ_WI(GICR_INVLPIR, 8, VGIC_64_BIT | VGIC_32_BIT),
+ VGIC_REGISTER_RAZ_WI(GICR_INVALLR, 8, VGIC_64_BIT | VGIC_32_BIT),
+ VGIC_REGISTER_RAZ_WI(GICR_SYNCR, 4, VGIC_32_BIT),
+
+ /* These are identical to the dist registers */
+ VGIC_REGISTER_RANGE_RAZ_WI(GICD_PIDR4, GICD_PIDR2, 4, VGIC_32_BIT),
+ VGIC_REGISTER(GICD_PIDR2, 4, VGIC_32_BIT, gic_pidr2_read,
+ gic_ignore_write),
+ VGIC_REGISTER_RANGE_RAZ_WI(GICD_PIDR2 + 4, GICD_SIZE, 4,
+ VGIC_32_BIT),
+};
+
+/* GICR_IGROUPR0 - RAZ/WI from non-secure mode */
+/* GICR_ISENABLER0 */
+static register_read redist_ienabler0_read;
+static register_write redist_isenabler0_write;
+/* GICR_ICENABLER0 */
+static register_write redist_icenabler0_write;
+/* GICR_ISPENDR0 */
+static register_read redist_ipendr0_read;
+static register_write redist_ispendr0_write;
+/* GICR_ICPENDR0 */
+static register_write redist_icpendr0_write;
+/* GICR_ISACTIVER0 */
+static register_read redist_iactiver0_read;
+static register_write redist_isactiver0_write;
+/* GICR_ICACTIVER0 */
+static register_write redist_icactiver0_write;
+/* GICR_IPRIORITYR */
+static register_read redist_ipriorityr_read;
+static register_write redist_ipriorityr_write;
+/* GICR_ICFGR0 - RAZ/WI from non-secure mode */
+/* GICR_ICFGR1 */
+static register_read redist_icfgr1_read;
+static register_write redist_icfgr1_write;
+/* GICR_IGRPMODR0 - RAZ/WI from non-secure mode */
+/* GICR_NSCAR - RAZ/WI from non-secure mode */
+
+static struct vgic_register redist_sgi_registers[] = {
+ VGIC_REGISTER_RAZ_WI(GICR_IGROUPR0, 4, VGIC_32_BIT),
+ VGIC_REGISTER(GICR_ISENABLER0, 4, VGIC_32_BIT, redist_ienabler0_read,
+ redist_isenabler0_write),
+ VGIC_REGISTER(GICR_ICENABLER0, 4, VGIC_32_BIT, redist_ienabler0_read,
+ redist_icenabler0_write),
+ VGIC_REGISTER(GICR_ISPENDR0, 4, VGIC_32_BIT, redist_ipendr0_read,
+ redist_ispendr0_write),
+ VGIC_REGISTER(GICR_ICPENDR0, 4, VGIC_32_BIT, redist_ipendr0_read,
+ redist_icpendr0_write),
+ VGIC_REGISTER(GICR_ISACTIVER0, 4, VGIC_32_BIT, redist_iactiver0_read,
+ redist_isactiver0_write),
+ VGIC_REGISTER(GICR_ICACTIVER0, 4, VGIC_32_BIT, redist_iactiver0_read,
+ redist_icactiver0_write),
+ VGIC_REGISTER_RANGE(GICR_IPRIORITYR(0), GICR_IPRIORITYR(32), 4,
+ VGIC_32_BIT | VGIC_8_BIT, redist_ipriorityr_read,
+ redist_ipriorityr_write),
+ VGIC_REGISTER_RAZ_WI(GICR_ICFGR0, 4, VGIC_32_BIT),
+ VGIC_REGISTER(GICR_ICFGR1, 4, VGIC_32_BIT, redist_icfgr1_read,
+ redist_icfgr1_write),
+ VGIC_REGISTER_RAZ_WI(GICR_IGRPMODR0, 4, VGIC_32_BIT),
+ VGIC_REGISTER_RAZ_WI(GICR_NSACR, 4, VGIC_32_BIT),
+};
+
+static struct vgic_v3_virt_features virt_features;
+
+static struct vgic_v3_irq *vgic_v3_get_irq(struct hyp *, int, uint32_t);
+static void vgic_v3_release_irq(struct vgic_v3_irq *);
+
+/* TODO: Move to a common file */
+static int
+mpidr_to_vcpu(struct hyp *hyp, uint64_t mpidr)
+{
+ struct vm *vm;
+
+ vm = hyp->vm;
+ for (int i = 0; i < vm_get_maxcpus(vm); i++) {
+ if ((hyp->ctx[i].vmpidr_el2 & GICD_AFF) == mpidr)
+ return (i);
+ }
+ return (-1);
+}
+
+void
+vgic_v3_vminit(struct hyp *hyp)
+{
+ struct vgic_v3_dist *dist = &hyp->vgic_dist;
+
+ /*
+ * Configure the Distributor control register. The register resets to an
+ * architecturally UNKNOWN value, so we reset to 0 to disable all
+ * functionality controlled by the register.
+ *
+ * The exception is GICD_CTLR.DS, which is RA0/WI when the Distributor
+ * supports one security state (ARM GIC Architecture Specification for
+ * GICv3 and GICv4, p. 4-464)
+ */
+ dist->gicd_ctlr = 0;
+
+ mtx_init(&dist->dist_mtx, "VGICv3 Distributor lock", NULL, MTX_SPIN);
+}
+
+void
+vgic_v3_cpuinit(struct hypctx *hypctx, bool last_vcpu)
+{
+ struct vgic_v3_cpu_if *cpu_if = &hypctx->vgic_cpu_if;
+ struct vgic_v3_redist *redist = &hypctx->vgic_redist;
+ struct vgic_v3_irq *irq;
+ uint64_t aff, vmpidr_el2;
+ int i, irqid;
+
+ vmpidr_el2 = hypctx->vmpidr_el2;
+ KASSERT(vmpidr_el2 != 0,
+ ("Trying to init this CPU's vGIC before the vCPU"));
+ /*
+ * Get affinity for the current CPU. The guest CPU affinity is taken
+ * from VMPIDR_EL2. The Redistributor corresponding to this CPU is
+ * the Redistributor with the same affinity from GICR_TYPER.
+ */
+ aff = (CPU_AFF3(vmpidr_el2) << 24) | (CPU_AFF2(vmpidr_el2) << 16) |
+ (CPU_AFF1(vmpidr_el2) << 8) | CPU_AFF0(vmpidr_el2);
+
+ /* Set up GICR_TYPER. */
+ redist->gicr_typer = aff << GICR_TYPER_AFF_SHIFT;
+ /* Set the vcpu as the processsor ID */
+ redist->gicr_typer |= hypctx->vcpu << GICR_TYPER_CPUNUM_SHIFT;
+
+ if (last_vcpu)
+ /* Mark the last Redistributor */
+ redist->gicr_typer |= GICR_TYPER_LAST;
+
+ mtx_init(&cpu_if->lr_mtx, "VGICv3 ICH_LR_EL2 lock", NULL, MTX_SPIN);
+
+ /* Set the SGI and PPI state */
+ for (irqid = 0; irqid < VGIC_PRV_I_NUM; irqid++) {
+ irq = &cpu_if->private_irqs[irqid];
+
+ mtx_init(&irq->irq_spinmtx, "VGIC IRQ spinlock", NULL,
+ MTX_SPIN);
+ irq->irq = irqid;
+ irq->mpidr = hypctx->vmpidr_el2 & GICD_AFF;
+ irq->target_vcpu = mpidr_to_vcpu(hypctx->hyp, irq->mpidr);
+ if (irqid < VGIC_SGI_NUM) {
+ /* SGIs */
+ irq->enabled = true;
+ irq->config = VGIC_CONFIG_EDGE;
+ } else {
+ /* PPIs */
+ irq->config = VGIC_CONFIG_LEVEL;
+ }
+ irq->priority = 0;
+ }
+
+ /*
+ * Configure the Interrupt Controller Hyp Control Register.
+ *
+ * ICH_HCR_EL2_En: enable virtual CPU interface.
+ *
+ * Maintenance interrupts are disabled.
+ */
+ cpu_if->ich_hcr_el2 = ICH_HCR_EL2_En;
+
+ /*
+ * Configure the Interrupt Controller Virtual Machine Control Register.
+ *
+ * ICH_VMCR_EL2_VPMR: lowest priority mask for the VCPU interface
+ * ICH_VMCR_EL2_VBPR1_NO_PREEMPTION: disable interrupt preemption for
+ * Group 1 interrupts
+ * ICH_VMCR_EL2_VBPR0_NO_PREEMPTION: disable interrupt preemption for
+ * Group 0 interrupts
+ * ~ICH_VMCR_EL2_VEOIM: writes to EOI registers perform priority drop
+ * and interrupt deactivation.
+ * ICH_VMCR_EL2_VENG0: virtual Group 0 interrupts enabled.
+ * ICH_VMCR_EL2_VENG1: virtual Group 1 interrupts enabled.
+ */
+ cpu_if->ich_vmcr_el2 = \
+ (virt_features.min_prio << ICH_VMCR_EL2_VPMR_SHIFT) | \
+ ICH_VMCR_EL2_VBPR1_NO_PREEMPTION | ICH_VMCR_EL2_VBPR0_NO_PREEMPTION;
+ cpu_if->ich_vmcr_el2 &= ~ICH_VMCR_EL2_VEOIM;
+ cpu_if->ich_vmcr_el2 |= ICH_VMCR_EL2_VENG0 | ICH_VMCR_EL2_VENG1;
+
+ cpu_if->ich_lr_num = virt_features.ich_lr_num;
+ for (i = 0; i < cpu_if->ich_lr_num; i++)
+ cpu_if->ich_lr_el2[i] = 0UL;
+ cpu_if->ich_lr_used = 0;
+ TAILQ_INIT(&cpu_if->irq_act_pend);
+
+ cpu_if->ich_apr_num = virt_features.ich_apr_num;
+}
+
+void
+vgic_v3_cpucleanup(struct hypctx *hypctx)
+{
+ struct vgic_v3_cpu_if *cpu_if;
+ struct vgic_v3_irq *irq;
+ int irqid;
+
+ cpu_if = &hypctx->vgic_cpu_if;
+ for (irqid = 0; irqid < VGIC_PRV_I_NUM; irqid++) {
+ irq = &cpu_if->private_irqs[irqid];
+ mtx_destroy(&irq->irq_spinmtx);
+ }
+
+ mtx_destroy(&cpu_if->lr_mtx);
+}
+
+void
+vgic_v3_vmcleanup(struct hyp *hyp)
+{
+ struct vgic_v3_dist *dist = &hyp->vgic_dist;
+
+ mtx_destroy(&dist->dist_mtx);
+}
+
+static bool
+vgic_v3_irq_pending(struct vgic_v3_irq *irq)
+{
+ if ((irq->config & VGIC_CONFIG_MASK) == VGIC_CONFIG_LEVEL) {
+ return (irq->pending || irq->level);
+ } else {
+ return (irq->pending);
+ }
+}
+
+static bool
+vgic_v3_queue_irq(struct hyp *hyp, struct vgic_v3_cpu_if *cpu_if,
+ int vcpuid, struct vgic_v3_irq *irq)
+{
+ MPASS(vcpuid >= 0);
+ MPASS(vcpuid < VM_MAXCPU);
+
+ mtx_assert(&cpu_if->lr_mtx, MA_OWNED);
+ mtx_assert(&irq->irq_spinmtx, MA_OWNED);
+
+ /* No need to queue the IRQ */
+ if (!irq->level && !irq->pending)
+ return (false);
+
+ if (!irq->on_aplist) {
+ irq->on_aplist = true;
+ TAILQ_INSERT_TAIL(&cpu_if->irq_act_pend, irq, act_pend_list);
+ }
+ return (true);
+}
+
+static uint64_t
+gic_reg_value_64(uint64_t field, uint64_t val, u_int offset, u_int size)
+{
+ uint32_t mask;
+
+ if (offset != 0 || size != 8) {
+ mask = ((1ul << (size * 8)) - 1) << (offset * 8);
+ /* Shift the new bits to the correct place */
+ val <<= (offset * 8);
+ /* Keep only the interesting bits */
+ val &= mask;
+ /* Add the bits we are keeping from the old value */
+ val |= field & ~mask;
+ }
+
+ return (val);
+}
+
+static void
+gic_pidr2_read(struct hyp *hyp, int vcpuid, u_int reg, uint64_t *rval,
+ void *arg)
+{
+ *rval = GICR_PIDR2_ARCH_GICv3 << GICR_PIDR2_ARCH_SHIFT;
+}
+
+/* Common read-only/write-ignored helpers */
+static void
+gic_zero_read(struct hyp *hyp, int vcpuid, u_int reg, uint64_t *rval,
+ void *arg)
+{
+ *rval = 0;
+}
+
+static void
+gic_ignore_write(struct hyp *hyp, int vcpuid, u_int reg, u_int offset,
+ u_int size, uint64_t wval, void *arg)
+{
+ /* Nothing to do */
+}
+
+static uint64_t
+read_enabler(struct hyp *hyp, int vcpuid, int n)
+{
+ struct vgic_v3_irq *irq;
+ uint64_t ret;
+ uint32_t irq_base;
+ int i;
+
+ ret = 0;
+ irq_base = n * 32;
+ for (i = 0; i < 32; i++) {
+ irq = vgic_v3_get_irq(hyp, vcpuid, irq_base + i);
+ if (irq == NULL)
+ continue;
+
+ if (!irq->enabled)
+ ret |= 1u << i;
+ vgic_v3_release_irq(irq);
+ }
+
+ return (ret);
+}
+
+static void
+write_enabler(struct hyp *hyp, int vcpuid, int n, bool set, uint64_t val)
+{
+ struct vgic_v3_irq *irq;
+ uint32_t irq_base;
+ int i;
+
+ irq_base = n * 32;
+ for (i = 0; i < 32; i++) {
+ /* We only change interrupts when the appropriate bit is set */
+ if ((val & (1u << i)) == 0)
+ continue;
+
+ /* Find the interrupt this bit represents */
+ irq = vgic_v3_get_irq(hyp, vcpuid, irq_base + i);
+ if (irq == NULL)
+ continue;
+
+ irq->enabled = set;
+ vgic_v3_release_irq(irq);
+ }
+}
+
+static uint64_t
+read_pendr(struct hyp *hyp, int vcpuid, int n)
+{
+ struct vgic_v3_irq *irq;
+ uint64_t ret;
+ uint32_t irq_base;
+ int i;
+
+ ret = 0;
+ irq_base = n * 32;
+ for (i = 0; i < 32; i++) {
+ irq = vgic_v3_get_irq(hyp, vcpuid, irq_base + i);
+ if (irq == NULL)
+ continue;
+
+ if (vgic_v3_irq_pending(irq))
+ ret |= 1u << i;
+ vgic_v3_release_irq(irq);
+ }
+
+ return (ret);
+}
+
+static uint64_t
+write_pendr(struct hyp *hyp, int vcpuid, int n, bool set, uint64_t val)
+{
+ struct vgic_v3_cpu_if *cpu_if;
+ struct vgic_v3_irq *irq;
+ uint64_t ret;
+ uint32_t irq_base;
+ int target_vcpu, i;
+ bool notify;
+
+ ret = 0;
+ irq_base = n * 32;
+ for (i = 0; i < 32; i++) {
+ /* We only change interrupts when the appropriate bit is set */
+ if ((val & (1u << i)) == 0)
+ continue;
+
+ irq = vgic_v3_get_irq(hyp, vcpuid, irq_base + i);
+ if (irq == NULL)
+ continue;
+
+ notify = false;
+ target_vcpu = irq->target_vcpu;
+ if (target_vcpu < 0)
+ goto next_irq;
+ cpu_if = &hyp->ctx[target_vcpu].vgic_cpu_if;
+
+ if (!set) {
+ /* pending -> not pending */
+ irq->pending = false;
+ } else {
+ irq->pending = true;
+ mtx_lock_spin(&cpu_if->lr_mtx);
+ notify = vgic_v3_queue_irq(hyp, cpu_if, target_vcpu,
+ irq);
+ mtx_unlock_spin(&cpu_if->lr_mtx);
+ }
+next_irq:
+ vgic_v3_release_irq(irq);
+
+ if (notify)
+ vcpu_notify_event(hyp->vm, target_vcpu, false);
+ }
+
+ return (ret);
+}
+
+static uint64_t
+read_activer(struct hyp *hyp, int vcpuid, int n)
+{
+ struct vgic_v3_irq *irq;
+ uint64_t ret;
+ uint32_t irq_base;
+ int i;
+
+ ret = 0;
+ irq_base = n * 32;
+ for (i = 0; i < 32; i++) {
+ irq = vgic_v3_get_irq(hyp, vcpuid, irq_base + i);
+ if (irq == NULL)
+ continue;
+
+ if (irq->active)
+ ret |= 1u << i;
+ vgic_v3_release_irq(irq);
+ }
+
+ return (ret);
+}
+
+static void
+write_activer(struct hyp *hyp, int vcpuid, u_int n, bool set, uint64_t val)
+{
+ struct vgic_v3_cpu_if *cpu_if;
+ struct vgic_v3_irq *irq;
+ uint32_t irq_base;
+ int target_vcpu, i;
+ bool notify;
+
+ irq_base = n * 32;
+ for (i = 0; i < 32; i++) {
+ /* We only change interrupts when the appropriate bit is set */
+ if ((val & (1u << i)) == 0)
+ continue;
+
+ irq = vgic_v3_get_irq(hyp, vcpuid, irq_base + i);
+ if (irq == NULL)
+ continue;
+
+ notify = false;
+ target_vcpu = irq->target_vcpu;
+ if (target_vcpu < 0)
+ goto next_irq;
+ cpu_if = &hyp->ctx[target_vcpu].vgic_cpu_if;
+
+ if (!set) {
+ /* active -> not active */
+ irq->active = false;
+ } else {
+ /* not active -> active */
+ irq->active = true;
+ mtx_lock_spin(&cpu_if->lr_mtx);
+ notify = vgic_v3_queue_irq(hyp, cpu_if, target_vcpu,
+ irq);
+ mtx_unlock_spin(&cpu_if->lr_mtx);
+ }
+next_irq:
+ vgic_v3_release_irq(irq);
+
+ if (notify)
+ vcpu_notify_event(hyp->vm, target_vcpu, false);
+ }
+}
+
+static uint64_t
+read_priorityr(struct hyp *hyp, int vcpuid, int n)
+{
+ struct vgic_v3_irq *irq;
+ uint64_t ret;
+ uint32_t irq_base;
+ int i;
+
+ ret = 0;
+ irq_base = n * 4;
+ for (i = 0; i < 4; i++) {
+ irq = vgic_v3_get_irq(hyp, vcpuid, irq_base + i);
+ if (irq == NULL)
+ continue;
+
+ ret |= ((uint64_t)irq->priority) << (i * 8);
+ vgic_v3_release_irq(irq);
+ }
+
+ return (ret);
+}
+
+static void
+write_priorityr(struct hyp *hyp, int vcpuid, u_int irq_base, u_int size,
+ uint64_t val)
+{
+ struct vgic_v3_irq *irq;
+ int i;
+
+ for (i = 0; i < size; i++) {
+ irq = vgic_v3_get_irq(hyp, vcpuid, irq_base + i);
+ if (irq == NULL)
+ continue;
+
+ /* Set the priority. We support 32 priority steps (5 bits) */
+ irq->priority = (val >> (i * 8)) & 0xf8;
+ vgic_v3_release_irq(irq);
+ }
+}
+
+static uint64_t
+read_config(struct hyp *hyp, int vcpuid, int n)
+{
+ struct vgic_v3_irq *irq;
+ uint64_t ret;
+ uint32_t irq_base;
+ int i;
+
+ ret = 0;
+ irq_base = n * 16;
+ for (i = 0; i < 16; i++) {
+ irq = vgic_v3_get_irq(hyp, vcpuid, irq_base + i);
+ if (irq == NULL)
+ continue;
+
+ ret |= ((uint64_t)irq->config) << (i * 2);
+ vgic_v3_release_irq(irq);
+ }
+
+ return (ret);
+}
+
+static void
+write_config(struct hyp *hyp, int vcpuid, int n, uint64_t val)
+{
+ struct vgic_v3_irq *irq;
+ uint32_t irq_base;
+ int i;
+
+ irq_base = n * 16;
+ for (i = 0; i < 16; i++) {
+ /*
+ * The config can't be changed for SGIs and PPIs. SGIs have
+ * an edge-triggered behaviour, and the register is
+ * implementation defined to be read-only for PPIs.
+ */
+ if (irq_base + i < VGIC_PRV_I_NUM)
+ continue;
+
+ irq = vgic_v3_get_irq(hyp, vcpuid, irq_base + i);
+ if (irq == NULL)
+ continue;
+
+ /* Bit 0 is RES0 */
+ irq->config = (val >> (i * 2)) & VGIC_CONFIG_MASK;
+ vgic_v3_release_irq(irq);
+ }
+}
+
+static uint64_t
+read_route(struct hyp *hyp, int vcpuid, int n)
+{
+ struct vgic_v3_irq *irq;
+ uint64_t mpidr;
+
+ irq = vgic_v3_get_irq(hyp, vcpuid, n);
+ if (irq == NULL)
+ return (0);
+
+ mpidr = irq->mpidr;
+ vgic_v3_release_irq(irq);
+
+ return (mpidr);
+}
+
+static void
+write_route(struct hyp *hyp, int vcpuid, int n, uint64_t val, u_int offset,
+ u_int size)
+{
+ struct vgic_v3_irq *irq;
+
+ irq = vgic_v3_get_irq(hyp, vcpuid, n);
+ if (irq == NULL)
+ return;
+
+ irq->mpidr = gic_reg_value_64(irq->mpidr, val, offset, size) & GICD_AFF;
+ irq->target_vcpu = mpidr_to_vcpu(hyp, irq->mpidr);
+ /*
+ * If the interrupt is pending we can either use the old mpidr, or
+ * the new mpidr. To simplify this code we use the old value so we
+ * don't need to move the interrupt until the next time it is
+ * moved to the pending state.
+ */
+ vgic_v3_release_irq(irq);
+}
+
+/*
+ * Distributor register handlers.
+ */
+/* GICD_CTLR */
+static void
+dist_ctlr_read(struct hyp *hyp, int vcpuid, u_int reg, uint64_t *rval,
+ void *arg)
+{
+ struct vgic_v3_dist *dist;
+
+ dist = &hyp->vgic_dist;
+ mtx_lock_spin(&dist->dist_mtx);
+ *rval = dist->gicd_ctlr;
+ mtx_unlock_spin(&dist->dist_mtx);
+
+ /* Writes are never pending */
+ *rval &= ~GICD_CTLR_RWP;
+}
+
+static void
+dist_ctlr_write(struct hyp *hyp, int vcpuid, u_int reg, u_int offset,
+ u_int size, uint64_t wval, void *arg)
+{
+ struct vgic_v3_dist *dist;
+
+ MPASS(offset == 0);
+ MPASS(size == 4);
+ dist = &hyp->vgic_dist;
+
+ /*
+ * GICv2 backwards compatibility is not implemented so
+ * ARE_NS is RAO/WI. This means EnableGrp1 is RES0.
+ *
+ * EnableGrp1A is supported, and RWP is read-only.
+ *
+ * All other bits are RES0 from non-secure mode as we
+ * implement as if we are in a system with two security
+ * states.
+ */
+ wval &= GICD_CTLR_G1A;
+ wval |= GICD_CTLR_ARE_NS;
+ mtx_lock_spin(&dist->dist_mtx);
+ dist->gicd_ctlr = wval;
+ /* TODO: Wake any vcpus that have interrupts pending */
+ mtx_unlock_spin(&dist->dist_mtx);
+}
+
+/* GICD_TYPER */
+static void
+dist_typer_read(struct hyp *hyp, int vcpuid, u_int reg, uint64_t *rval,
+ void *arg)
+{
+ uint32_t typer;
+
+ typer = (10 - 1) << GICD_TYPER_IDBITS_SHIFT;
+ typer |= GICD_TYPER_MBIS;
+ /* ITLinesNumber: */
+ typer |= howmany(VGIC_NIRQS + 1, 32) - 1;
+
+ *rval = typer;
+}
+
+/* GICD_IIDR */
+static void
+dist_iidr_read(struct hyp *hyp, int vcpuid, u_int reg, uint64_t *rval,
+ void *arg)
+{
+ *rval = VGIC_IIDR;
+}
+
+/* GICD_SETSPI_NSR & GICD_CLRSPI_NSR */
+static void
+dist_setclrspi_nsr_write(struct hyp *hyp, int vcpuid, u_int reg, u_int offset,
+ u_int size, uint64_t wval, void *arg)
+{
+ uint32_t irqid;
+
+ MPASS(offset == 0);
+ MPASS(size == 4);
+ irqid = wval & GICD_SPI_INTID_MASK;
+ vgic_v3_inject_irq(hyp, vcpuid, irqid, reg == GICD_SETSPI_NSR);
+}
+
+/* GICD_ISENABLER */
+static void
+dist_isenabler_read(struct hyp *hyp, int vcpuid, u_int reg, uint64_t *rval,
+ void *arg)
+{
+ int n;
+
+ n = (reg - GICD_ISENABLER(0)) / 4;
+ /* GICD_ISENABLER0 is RAZ/WI so handled separately */
+ MPASS(n > 0);
+ *rval = read_enabler(hyp, vcpuid, n);
+}
+
+static void
+dist_isenabler_write(struct hyp *hyp, int vcpuid, u_int reg, u_int offset,
+ u_int size, uint64_t wval, void *arg)
+{
+ int n;
+
+ MPASS(offset == 0);
+ MPASS(size == 4);
+ n = (reg - GICD_ISENABLER(0)) / 4;
+ /* GICD_ISENABLER0 is RAZ/WI so handled separately */
+ MPASS(n > 0);
+ write_enabler(hyp, vcpuid, n, true, wval);
+}
+
+/* GICD_ICENABLER */
+static void
+dist_icenabler_read(struct hyp *hyp, int vcpuid, u_int reg, uint64_t *rval,
+ void *arg)
+{
+ int n;
+
+ n = (reg - GICD_ICENABLER(0)) / 4;
+ /* GICD_ICENABLER0 is RAZ/WI so handled separately */
+ MPASS(n > 0);
+ *rval = read_enabler(hyp, vcpuid, n);
+}
+
+static void
+dist_icenabler_write(struct hyp *hyp, int vcpuid, u_int reg, u_int offset,
+ u_int size, uint64_t wval, void *arg)
+{
+ int n;
+
+ MPASS(offset == 0);
+ MPASS(size == 4);
+ n = (reg - GICD_ISENABLER(0)) / 4;
+ /* GICD_ICENABLER0 is RAZ/WI so handled separately */
+ MPASS(n > 0);
+ write_enabler(hyp, vcpuid, n, false, wval);
+}
+
+/* GICD_ISPENDR */
+static void
+dist_ispendr_read(struct hyp *hyp, int vcpuid, u_int reg, uint64_t *rval,
+ void *arg)
+{
+ int n;
+
+ n = (reg - GICD_ISPENDR(0)) / 4;
+ /* GICD_ISPENDR0 is RAZ/WI so handled separately */
+ MPASS(n > 0);
+ *rval = read_pendr(hyp, vcpuid, n);
+}
+
+static void
+dist_ispendr_write(struct hyp *hyp, int vcpuid, u_int reg, u_int offset,
+ u_int size, uint64_t wval, void *arg)
+{
+ int n;
+
+ MPASS(offset == 0);
+ MPASS(size == 4);
+ n = (reg - GICD_ISPENDR(0)) / 4;
+ /* GICD_ISPENDR0 is RAZ/WI so handled separately */
+ MPASS(n > 0);
+ write_pendr(hyp, vcpuid, n, true, wval);
+}
+
+/* GICD_ICPENDR */
+static void
+dist_icpendr_read(struct hyp *hyp, int vcpuid, u_int reg, uint64_t *rval,
+ void *arg)
+{
+ int n;
+
+ n = (reg - GICD_ICPENDR(0)) / 4;
+ /* GICD_ICPENDR0 is RAZ/WI so handled separately */
+ MPASS(n > 0);
+ *rval = read_pendr(hyp, vcpuid, n);
+}
+
+static void
+dist_icpendr_write(struct hyp *hyp, int vcpuid, u_int reg, u_int offset,
+ u_int size, uint64_t wval, void *arg)
+{
+ int n;
+
+ MPASS(offset == 0);
+ MPASS(size == 4);
+ n = (reg - GICD_ICPENDR(0)) / 4;
+ /* GICD_ICPENDR0 is RAZ/WI so handled separately */
+ MPASS(n > 0);
+ write_pendr(hyp, vcpuid, n, false, wval);
+}
+
+/* GICD_ISACTIVER */
+/* Affinity routing is enabled so isactiver0 is RAZ/WI */
+static void
+dist_isactiver_read(struct hyp *hyp, int vcpuid, u_int reg, uint64_t *rval,
+ void *arg)
+{
+ int n;
+
+ n = (reg - GICD_ISACTIVER(0)) / 4;
+ /* GICD_ISACTIVER0 is RAZ/WI so handled separately */
+ MPASS(n > 0);
+ *rval = read_activer(hyp, vcpuid, n);
+}
+
+static void
+dist_isactiver_write(struct hyp *hyp, int vcpuid, u_int reg, u_int offset,
+ u_int size, uint64_t wval, void *arg)
+{
+ int n;
+
+ MPASS(offset == 0);
+ MPASS(size == 4);
+ n = (reg - GICD_ISACTIVER(0)) / 4;
+ /* GICD_ISACTIVE0 is RAZ/WI so handled separately */
+ MPASS(n > 0);
+ write_activer(hyp, vcpuid, n, true, wval);
+}
+
+/* GICD_ICACTIVER */
+static void
+dist_icactiver_read(struct hyp *hyp, int vcpuid, u_int reg, uint64_t *rval,
+ void *arg)
+{
+ int n;
+
+ n = (reg - GICD_ICACTIVER(0)) / 4;
+ /* GICD_ICACTIVE0 is RAZ/WI so handled separately */
+ MPASS(n > 0);
+ *rval = read_activer(hyp, vcpuid, n);
+}
+
+static void
+dist_icactiver_write(struct hyp *hyp, int vcpuid, u_int reg, u_int offset,
+ u_int size, uint64_t wval, void *arg)
+{
+ int n;
+
+ MPASS(offset == 0);
+ MPASS(size == 4);
+ n = (reg - GICD_ICACTIVER(0)) / 4;
+ /* GICD_ICACTIVE0 is RAZ/WI so handled separately */
+ MPASS(n > 0);
+ write_activer(hyp, vcpuid, n, false, wval);
+}
+
+/* GICD_IPRIORITYR */
+/* Affinity routing is enabled so ipriorityr0-7 is RAZ/WI */
+static void
+dist_ipriorityr_read(struct hyp *hyp, int vcpuid, u_int reg, uint64_t *rval,
+ void *arg)
+{
+ int n;
+
+ n = (reg - GICD_IPRIORITYR(0)) / 4;
+ /* GICD_IPRIORITY0-7 is RAZ/WI so handled separately */
+ MPASS(n > 7);
+ *rval = read_priorityr(hyp, vcpuid, n);
+}
+
+static void
+dist_ipriorityr_write(struct hyp *hyp, int vcpuid, u_int reg, u_int offset,
+ u_int size, uint64_t wval, void *arg)
+{
+ u_int irq_base;
+
+ irq_base = (reg - GICD_IPRIORITYR(0)) + offset;
+ /* GICD_IPRIORITY0-7 is RAZ/WI so handled separately */
+ MPASS(irq_base > 31);
+ write_priorityr(hyp, vcpuid, irq_base, size, wval);
+}
+
+/* GICD_ICFGR */
+static void
+dist_icfgr_read(struct hyp *hyp, int vcpuid, u_int reg, uint64_t *rval,
+ void *arg)
+{
+ int n;
+
+ n = (reg - GICD_ICFGR(0)) / 4;
+ /* GICD_ICFGR0-1 are RAZ/WI so handled separately */
+ MPASS(n > 1);
+ *rval = read_config(hyp, vcpuid, n);
+}
+
+static void
+dist_icfgr_write(struct hyp *hyp, int vcpuid, u_int reg, u_int offset,
+ u_int size, uint64_t wval, void *arg)
+{
+ int n;
+
+ MPASS(offset == 0);
+ MPASS(size == 4);
+ n = (reg - GICD_ICFGR(0)) / 4;
+ /* GICD_ICFGR0-1 are RAZ/WI so handled separately */
+ MPASS(n > 1);
+ write_config(hyp, vcpuid, n, wval);
+}
+
+/* GICD_IROUTER */
+static void
+dist_irouter_read(struct hyp *hyp, int vcpuid, u_int reg, uint64_t *rval,
+ void *arg)
+{
+ int n;
+
+ n = (reg - GICD_IROUTER(0)) / 8;
+ /* GICD_IROUTER0-31 don't exist */
+ MPASS(n > 31);
+ *rval = read_route(hyp, vcpuid, n);
+}
+
+static void
+dist_irouter_write(struct hyp *hyp, int vcpuid, u_int reg, u_int offset,
+ u_int size, uint64_t wval, void *arg)
+{
+ int n;
+
+ n = (reg - GICD_IROUTER(0)) / 8;
+ /* GICD_IROUTER0-31 don't exist */
+ MPASS(n > 31);
+ write_route(hyp, vcpuid, n, wval, offset, size);
+}
+
+static bool
+vgic_register_read(struct hyp *hyp, struct vgic_register *reg_list,
+ u_int reg_list_size, int vcpuid, u_int reg, u_int size,
+ uint64_t *rval, void *arg)
+{
+ u_int i, offset;
+
+ for (i = 0; i < reg_list_size; i++) {
+ if (reg_list[i].start <= reg && reg_list[i].end >= reg + size) {
+ offset = reg & reg_list[i].size - 1;
+ reg -= offset;
+ if ((reg_list[i].flags & size) != 0) {
+ reg_list[i].read(hyp, vcpuid, reg, rval, NULL);
+
+ /* Move the bits into the correct place */
+ *rval >>= (offset * 8);
+ if (size < 8) {
+ *rval &= (1ul << (size * 8)) - 1;
+ }
+ } else {
+ panic("TODO: Handle invalid register size: "
+ "reg %x size %d", reg, size);
+ }
+ return (true);
+ }
+ }
+ return (false);
+}
+
+static bool
+vgic_register_write(struct hyp *hyp, struct vgic_register *reg_list,
+ u_int reg_list_size, int vcpuid, u_int reg, u_int size,
+ uint64_t wval, void *arg)
+{
+ u_int i, offset;
+
+ for (i = 0; i < reg_list_size; i++) {
+ if (reg_list[i].start <= reg && reg_list[i].end >= reg + size) {
+ offset = reg & reg_list[i].size - 1;
+ reg -= offset;
+ if ((reg_list[i].flags & size) != 0) {
+ reg_list[i].write(hyp, vcpuid, reg, offset,
+ size, wval, NULL);
+ } else {
+ panic("TODO: Handle invalid register size: "
+ "reg %x size %d", reg, size);
+ }
+ return (true);
+ }
+ }
+ return (false);
+}
+
+static int
+dist_read(void *vm, int vcpuid, uint64_t fault_ipa, uint64_t *rval,
+ int size, void *arg)
+{
+ struct hyp *hyp = vm_get_cookie(vm);
+ struct vgic_v3_dist *dist = &hyp->vgic_dist;
+ uint64_t reg;
+
+ /* Check the register is one of ours and is the correct size */
+ if (fault_ipa < dist->start || fault_ipa + size > dist->end) {
+ return (EINVAL);
+ }
+
+ reg = fault_ipa - dist->start;
+ /* Check the register is correctly aligned */
+ if ((reg & (size - 1)) != 0)
+ return (EINVAL);
+
+ if (vgic_register_read(hyp, dist_registers, nitems(dist_registers),
+ vcpuid, reg, size, rval, NULL))
+ return (0);
+
+ /* TODO: Check the correct behaviour */
+ printf("%s: %lx\n", __func__, fault_ipa - dist->start);
+ *rval = 0;
+
+ return (0);
+}
+
+static int
+dist_write(void *vm, int vcpuid, uint64_t fault_ipa, uint64_t wval,
+ int size, void *arg)
+{
+ struct hyp *hyp = vm_get_cookie(vm);
+ struct vgic_v3_dist *dist = &hyp->vgic_dist;
+ uint64_t reg;
+
+ /* Check the register is one of ours and is the correct size */
+ if (fault_ipa < dist->start || fault_ipa + size > dist->end) {
+ return (EINVAL);
+ }
+
+ reg = fault_ipa - dist->start;
+ /* Check the register is correctly aligned */
+ if ((reg & (size - 1)) != 0)
+ return (EINVAL);
+
+ if (vgic_register_write(hyp, dist_registers, nitems(dist_registers),
+ vcpuid, reg, size, wval, NULL))
+ return (0);
+
+ panic("%s: %lx\n", __func__, fault_ipa - dist->start);
+ return (0);
+}
+
+/*
+ * Redistributor register handlers.
+ *
+ * RD_base:
+ */
+/* GICR_CTLR */
+static void
+redist_ctlr_read(struct hyp *hyp, int vcpuid, u_int reg, uint64_t *rval,
+ void *arg)
+{
+ /* LPIs not supported */
+ *rval = 0;
+}
+
+/* GICR_IIDR */
+static void
+redist_iidr_read(struct hyp *hyp, int vcpuid, u_int reg, uint64_t *rval,
+ void *arg)
+{
+ *rval = VGIC_IIDR;
+}
+
+/* GICR_TYPER */
+static void
+redist_typer_read(struct hyp *hyp, int vcpuid, u_int reg, uint64_t *rval,
+ void *arg)
+{
+ struct vgic_v3_redist *redist;
+
+ redist = &hyp->ctx[vcpuid].vgic_redist;
+ *rval = redist->gicr_typer;
+}
+
+/*
+ * SGI_base:
+ */
+/* GICR_ISENABLER0 */
+static void
+redist_ienabler0_read(struct hyp *hyp, int vcpuid, u_int reg, uint64_t *rval,
+ void *arg)
+{
+ *rval = read_enabler(hyp, vcpuid, 0);
+}
+
+static void
+redist_isenabler0_write(struct hyp *hyp, int vcpuid, u_int reg, u_int offset,
+ u_int size, uint64_t wval, void *arg)
+{
+ MPASS(offset == 0);
+ MPASS(size == 4);
+ write_enabler(hyp, vcpuid, 0, true, wval);
+}
+
+/* GICR_ICENABLER0 */
+static void
+redist_icenabler0_write(struct hyp *hyp, int vcpuid, u_int reg, u_int offset,
+ u_int size, uint64_t wval, void *arg)
+{
+ MPASS(offset == 0);
+ MPASS(size == 4);
+ write_enabler(hyp, vcpuid, 0, false, wval);
+}
+
+/* GICR_ISPENDR0 */
+static void
+redist_ipendr0_read(struct hyp *hyp, int vcpuid, u_int reg, uint64_t *rval,
+ void *arg)
+{
+ *rval = read_pendr(hyp, vcpuid, 0);
+}
+
+static void
+redist_ispendr0_write(struct hyp *hyp, int vcpuid, u_int reg, u_int offset,
+ u_int size, uint64_t wval, void *arg)
+{
+ MPASS(offset == 0);
+ MPASS(size == 4);
+ write_pendr(hyp, vcpuid, 0, true, wval);
+}
+
+/* GICR_ICPENDR0 */
+static void
+redist_icpendr0_write(struct hyp *hyp, int vcpuid, u_int reg, u_int offset,
+ u_int size, uint64_t wval, void *arg)
+{
+ MPASS(offset == 0);
+ MPASS(size == 4);
+ write_pendr(hyp, vcpuid, 0, false, wval);
+}
+
+/* GICR_ISACTIVER0 */
+static void
+redist_iactiver0_read(struct hyp *hyp, int vcpuid, u_int reg, uint64_t *rval,
+ void *arg)
+{
+ *rval = read_activer(hyp, vcpuid, 0);
+}
+
+static void
+redist_isactiver0_write(struct hyp *hyp, int vcpuid, u_int reg, u_int offset,
+ u_int size, uint64_t wval, void *arg)
+{
+ write_activer(hyp, vcpuid, 0, true, wval);
+}
+
+/* GICR_ICACTIVER0 */
+static void
+redist_icactiver0_write(struct hyp *hyp, int vcpuid, u_int reg, u_int offset,
+ u_int size, uint64_t wval, void *arg)
+{
+ write_activer(hyp, vcpuid, 0, false, wval);
+}
+
+/* GICR_IPRIORITYR */
+static void
+redist_ipriorityr_read(struct hyp *hyp, int vcpuid, u_int reg, uint64_t *rval,
+ void *arg)
+{
+ int n;
+
+ n = (reg - GICR_IPRIORITYR(0)) / 4;
+ *rval = read_priorityr(hyp, vcpuid, n);
+}
+
+static void
+redist_ipriorityr_write(struct hyp *hyp, int vcpuid, u_int reg, u_int offset,
+ u_int size, uint64_t wval, void *arg)
+{
+ u_int irq_base;
+
+ irq_base = (reg - GICR_IPRIORITYR(0)) + offset;
+ write_priorityr(hyp, vcpuid, irq_base, size, wval);
+}
+
+/* GICR_ICFGR1 */
+static void
+redist_icfgr1_read(struct hyp *hyp, int vcpuid, u_int reg, uint64_t *rval,
+ void *arg)
+{
+ *rval = read_config(hyp, vcpuid, 0);
+}
+
+static void
+redist_icfgr1_write(struct hyp *hyp, int vcpuid, u_int reg, u_int offset,
+ u_int size, uint64_t wval, void *arg)
+{
+ MPASS(offset == 0);
+ MPASS(size == 4);
+ write_config(hyp, vcpuid, 0, wval);
+}
+
+static int
+redist_read(void *vm, int vcpuid, uint64_t fault_ipa, uint64_t *rval,
+ int size, void *arg)
+{
+ struct hyp *hyp = vm_get_cookie(vm);
+ struct vgic_v3_redist *redist = &hyp->ctx[vcpuid].vgic_redist;
+ uint64_t reg;
+
+ /* Check the register is one of ours and is the correct size */
+ if (fault_ipa < redist->start || fault_ipa + size > redist->end) {
+ return (EINVAL);
+ }
+
+ reg = fault_ipa - redist->start;
+ /* Check the register is correctly aligned */
+ if ((reg & (size - 1)) != 0)
+ return (EINVAL);
+
+ if (reg < GICR_RD_BASE_SIZE) {
+ if (vgic_register_read(hyp, redist_rd_registers,
+ nitems(redist_rd_registers), vcpuid, reg, size, rval, NULL))
+ return (0);
+ } else if (reg < (GICR_SGI_BASE + GICR_SGI_BASE_SIZE)) {
+ if (vgic_register_read(hyp, redist_sgi_registers,
+ nitems(redist_sgi_registers), vcpuid,
+ reg - GICR_SGI_BASE, size, rval, NULL))
+ return (0);
+ }
+
+ panic("%s: %lx", __func__, reg);
+}
+
+static int
+redist_write(void *vm, int vcpuid, uint64_t fault_ipa, uint64_t wval,
+ int size, void *arg)
+{
+ struct hyp *hyp = vm_get_cookie(vm);
+ struct vgic_v3_redist *redist = &hyp->ctx[vcpuid].vgic_redist;
+ uint64_t reg;
+
+ /* Check the register is one of ours and is the correct size */
+ if (fault_ipa < redist->start || fault_ipa + size > redist->end) {
+ return (EINVAL);
+ }
+
+ reg = fault_ipa - redist->start;
+ /* Check the register is correctly aligned */
+ if ((reg & (size - 1)) != 0)
+ return (EINVAL);
+
+ if (reg < GICR_RD_BASE_SIZE) {
+ if (vgic_register_write(hyp, redist_rd_registers,
+ nitems(redist_rd_registers), vcpuid, reg, size, wval, NULL))
+ return (0);
+ } else if (reg < (GICR_SGI_BASE + GICR_SGI_BASE_SIZE)) {
+ if (vgic_register_write(hyp, redist_sgi_registers,
+ nitems(redist_sgi_registers), vcpuid,
+ reg - GICR_SGI_BASE, size, wval, NULL))
+ return (0);
+ }
+
+ panic("%s: %lx", __func__, reg);
+}
+
+int
+vgic_v3_icc_sgi1r_read(void *vm, int vcpuid, uint64_t *rval, void *arg)
+{
+ /*
+ * TODO: Inject an unknown exception.
+ */
+ *rval = 0;
+ return (0);
+}
+
+/* vgic_v3_icc_sgi1r_write currently only handles 16 CPUs */
+CTASSERT(VM_MAXCPU <= 16);
+int
+vgic_v3_icc_sgi1r_write(void *vm, int vcpuid, uint64_t rval, void *arg)
+{
+ struct hyp *hyp;
+ cpuset_t active_cpus;
+ uint32_t irqid;
+ int cpus, vcpu;
+
+ hyp = vm_get_cookie(vm);
+ active_cpus = vm_active_cpus(vm);
+ irqid = (rval >> ICC_SGI1R_EL1_SGIID_SHIFT) & ICC_SGI1R_EL1_SGIID_MASK;
+ if ((rval & ICC_SGI1R_EL1_IRM) == 0) {
+ /*
+ * TODO: Support on more than 16 CPUs. This is the mask for the
+ * affinity bits. These should be 0.
+ */
+ if ((rval & 0xff00ff00ff000ul) != 0)
+ return (0);
+ cpus = rval & 0xff;
+ vcpu = 0;
+ while (cpus > 0) {
+ if (CPU_ISSET(vcpu, &active_cpus) && vcpu != vcpuid) {
+ vgic_v3_inject_irq(hyp, vcpu, irqid, true);
+ }
+ vcpu++;
+ cpus >>= 1;
+ }
+ } else {
+ /* Send an IPI to all CPUs other than the current CPU */
+ for (vcpu = 0; vcpu < VM_MAXCPU; vcpu++) {
+ if (CPU_ISSET(vcpu, &active_cpus) && vcpu != vcpuid) {
+ vgic_v3_inject_irq(hyp, vcpu, irqid, true);
+ }
+ }
+ }
+
+ return (0);
+}
+
+static void
+vgic_v3_mmio_init(struct hyp *hyp)
+{
+ struct vgic_v3_dist *dist;
+ struct vgic_v3_irq *irq;
+ int i;
+
+ /* Allocate memory for the SPIs */
+ dist = &hyp->vgic_dist;
+ dist->irqs = malloc((VGIC_NIRQS - VGIC_PRV_I_NUM) *
+ sizeof(*dist->irqs), M_VGIC_V3, M_WAITOK | M_ZERO);
+
+ for (i = 0; i < VGIC_NIRQS - VGIC_PRV_I_NUM; i++) {
+ irq = &dist->irqs[i];
+
+ mtx_init(&irq->irq_spinmtx, "VGIC IRQ spinlock", NULL,
+ MTX_SPIN);
+
+ irq->irq = i + VGIC_PRV_I_NUM;
+ }
+}
+
+static void
+vgic_v3_mmio_destroy(struct hyp *hyp)
+{
+ struct vgic_v3_dist *dist = &hyp->vgic_dist;
+ struct vgic_v3_irq *irq;
+ int i;
+
+ for (i = 0; i < VGIC_NIRQS - VGIC_PRV_I_NUM; i++) {
+ irq = &dist->irqs[i];
+
+ mtx_destroy(&irq->irq_spinmtx);
+ }
+
+ free(dist->irqs, M_VGIC_V3);
+}
+
+int
+vgic_v3_attach_to_vm(struct vm *vm, uint64_t dist_start, size_t dist_size,
+ uint64_t redist_start, size_t redist_size)
+{
+ struct hyp *hyp = vm_get_cookie(vm);
+ struct vgic_v3_dist *dist = &hyp->vgic_dist;
+ struct vgic_v3_redist *redist;
+ int i;
+
+ /* The register bases need to be 64k aligned */
+ if (!__is_aligned(dist_start, PAGE_SIZE_64K) ||
+ !__is_aligned(redist_start, PAGE_SIZE_64K))
+ return (EINVAL);
+
+ /* The dist register space is 1 64k block */
+ if (dist_size != PAGE_SIZE_64K)
+ return (EINVAL);
+
+ /* The redist register space is 2 64k blocks */
+ if (redist_size != PAGE_SIZE_64K * 2)
+ return (EINVAL);
+
+ /* Set the distributor address and size for trapping guest access. */
+ dist->start = dist_start;
+ dist->end = dist_start + dist_size;
+
+ for (i = 0; i < VM_MAXCPU; i++) {
+ redist = &hyp->ctx[i].vgic_redist;
+ /* Set the redistributor address and size. */
+ redist->start = redist_start;
+ redist->end = redist_start + redist_size;
+ }
+
+ vm_register_inst_handler(vm, dist_start, dist_size, dist_read,
+ dist_write);
+ vm_register_inst_handler(vm, redist_start, redist_size, redist_read,
+ redist_write);
+
+ vgic_v3_mmio_init(hyp);
+
+ hyp->vgic_attached = true;
+
+ return (0);
+}
+
+void
+vgic_v3_detach_from_vm(struct vm *vm)
+{
+ struct hyp *hyp = vm_get_cookie(vm);
+
+ if (hyp->vgic_attached) {
+ hyp->vgic_attached = false;
+ vgic_v3_mmio_destroy(hyp);
+ }
+}
+
+static struct vgic_v3_irq *
+vgic_v3_get_irq(struct hyp *hyp, int vcpuid, uint32_t irqid)
+{
+ struct vgic_v3_cpu_if *cpu_if;
+ struct vgic_v3_dist *dist;
+ struct vgic_v3_irq *irq;
+
+ if (irqid < VGIC_PRV_I_NUM) {
+ if (vcpuid < 0 || vcpuid >= nitems(hyp->ctx))
+ return (NULL);
+
+ cpu_if = &hyp->ctx[vcpuid].vgic_cpu_if;
+ irq = &cpu_if->private_irqs[irqid];
+ } else if (irqid <= GIC_LAST_SPI) {
+ dist = &hyp->vgic_dist;
+ irqid -= VGIC_PRV_I_NUM;
+ if (irqid >= VGIC_NIRQS)
+ return (NULL);
+ irq = &dist->irqs[irqid];
+ } else if (irqid < GIC_FIRST_LPI) {
+ return (NULL);
+ } else {
+ /* No support for LPIs */
+ return (NULL);
+ }
+
+ mtx_lock_spin(&irq->irq_spinmtx);
+ return (irq);
+}
+
+static void
+vgic_v3_release_irq(struct vgic_v3_irq *irq)
+{
+
+ mtx_unlock_spin(&irq->irq_spinmtx);
+}
+
+bool
+vgic_v3_vcpu_pending_irq(struct hypctx *hypctx)
+{
+ struct vgic_v3_cpu_if *cpu_if;
+ bool empty;
+
+ cpu_if = &hypctx->vgic_cpu_if;
+ mtx_lock_spin(&cpu_if->lr_mtx);
+ empty = TAILQ_EMPTY(&cpu_if->irq_act_pend);
+ mtx_unlock_spin(&cpu_if->lr_mtx);
+
+ return (!empty);
+}
+
+static bool
+vgic_v3_check_irq(struct vgic_v3_irq *irq, bool level)
+{
+ /*
+ * Only inject if:
+ * - Level-triggered IRQ: level changes low -> high
+ * - Edge-triggered IRQ: level is high
+ */
+ switch (irq->config & VGIC_CONFIG_MASK) {
+ case VGIC_CONFIG_LEVEL:
+ return (level != irq->level);
+ case VGIC_CONFIG_EDGE:
+ return (level);
+ default:
+ break;
+ }
+
+ return (false);
+}
+
+int
+vgic_v3_inject_irq(struct hyp *hyp, int vcpuid, uint32_t irqid, bool level)
+{
+
+ struct vgic_v3_cpu_if *cpu_if;
+ struct vgic_v3_irq *irq;
+ int target_vcpu;
+ bool notify;
+
+ KASSERT(vcpuid == -1 || irqid < VGIC_PRV_I_NUM,
+ ("%s: SPI/LPI with vcpuid set: irq %u vcpuid %u", __func__, irqid,
+ vcpuid));
+
+ irq = vgic_v3_get_irq(hyp, vcpuid, irqid);
+ if (irq == NULL) {
+ eprintf("Malformed IRQ %u.\n", irqid);
+ return (1);
+ }
+
+ target_vcpu = irq->target_vcpu;
+ KASSERT(vcpuid == -1 || vcpuid == target_vcpu,
+ ("%s: Interrupt %u has bad cpu affinity: vcpu %d target vcpu %d",
+ __func__, irqid, vcpuid, target_vcpu));
+ KASSERT(target_vcpu >= 0 && target_vcpu < VM_MAXCPU,
+ ("%s: Interrupt %u sent to invalid vcpu %d", __func__, irqid,
+ target_vcpu));
+
+ if (vcpuid == -1)
+ vcpuid = target_vcpu;
+ /* TODO: Check from 0 to vm->maxcpus */
+ if (vcpuid < 0 || vcpuid >= VM_MAXCPU) {
+ vgic_v3_release_irq(irq);
+ return (1);
+ }
+
+ notify = false;
+ cpu_if = &hyp->ctx[vcpuid].vgic_cpu_if;
+
+ mtx_lock_spin(&cpu_if->lr_mtx);
+
+ if (!vgic_v3_check_irq(irq, level)) {
+ goto out;
+ }
+
+ if ((irq->config & VGIC_CONFIG_MASK) == VGIC_CONFIG_LEVEL)
+ irq->level = level;
+ else /* VGIC_CONFIG_EDGE */
+ irq->pending = true;
+
+ notify = vgic_v3_queue_irq(hyp, cpu_if, vcpuid, irq);
+
+out:
+ mtx_unlock_spin(&cpu_if->lr_mtx);
+ vgic_v3_release_irq(irq);
+
+ if (notify)
+ vcpu_notify_event(hyp->vm, vcpuid, false);
+
+ return (0);
+}
+
+int
+vgic_v3_inject_msi(struct hyp *hyp, uint64_t msg, uint64_t addr)
+{
+ struct vgic_v3_dist *dist = &hyp->vgic_dist;
+ uint64_t reg;
+
+ /* This is a 4 byte register */
+ if (addr < dist->start || addr + 4 > dist->end) {
+ return (EINVAL);
+ }
+
+ reg = addr - dist->start;
+ if (reg != GICD_SETSPI_NSR)
+ return (EINVAL);
+
+ return (vgic_v3_inject_irq(hyp, -1, msg, true));
+}
+
+void
+vgic_v3_flush_hwstate(void *arg)
+{
+ struct hypctx *hypctx;
+ struct vgic_v3_cpu_if *cpu_if;
+ struct vgic_v3_irq *irq;
+ int i;
+
+ hypctx = arg;
+ cpu_if = &hypctx->vgic_cpu_if;
+
+ /*
+ * All Distributor writes have been executed at this point, do not
+ * protect Distributor reads with a mutex.
+ *
+ * This is callled with all interrupts disabled, so there is no need for
+ * a List Register spinlock either.
+ */
+ mtx_lock_spin(&cpu_if->lr_mtx);
+
+ cpu_if->ich_hcr_el2 &= ~ICH_HCR_EL2_UIE;
+
+ /* Exit early if there are no buffered interrupts */
+ if (TAILQ_EMPTY(&cpu_if->irq_act_pend))
+ goto out;
+
+ KASSERT(cpu_if->ich_lr_used == 0, ("%s: Used LR count not zero %u",
+ __func__, cpu_if->ich_lr_used));
+
+ i = 0;
+ cpu_if->ich_elrsr_el2 = (1 << cpu_if->ich_lr_num) - 1;
+ TAILQ_FOREACH(irq, &cpu_if->irq_act_pend, act_pend_list) {
+ /* No free list register, stop searching for IRQs */
+ if (i == cpu_if->ich_lr_num)
+ break;
+
+ if (!irq->enabled)
+ continue;
+
+ cpu_if->ich_lr_el2[i] = ICH_LR_EL2_GROUP1 |
+ ((uint64_t)irq->priority << ICH_LR_EL2_PRIO_SHIFT) |
+ irq->irq;
+
+ if (irq->active) {
+ cpu_if->ich_lr_el2[i] |= ICH_LR_EL2_STATE_ACTIVE;
+ }
+
+#ifdef notyet
+ /* TODO: Check why this is needed */
+ if ((irq->config & _MASK) == LEVEL)
+ cpu_if->ich_lr_el2[i] |= ICH_LR_EL2_EOI;
+#endif
+
+ if (!irq->active && vgic_v3_irq_pending(irq)) {
+ cpu_if->ich_lr_el2[i] |= ICH_LR_EL2_STATE_PENDING;
+
+ /*
+ * This IRQ is now pending on the guest. Allow for
+ * another edge that could cause the interrupt to
+ * be raised again.
+ */
+ if ((irq->config & VGIC_CONFIG_MASK) ==
+ VGIC_CONFIG_EDGE) {
+ irq->pending = false;
+ }
+ }
+
+ i++;
+ }
+ cpu_if->ich_lr_used = i;
+
+out:
+ mtx_unlock_spin(&cpu_if->lr_mtx);
+}
+
+void
+vgic_v3_sync_hwstate(void *arg)
+{
+ struct hypctx *hypctx;
+ struct vgic_v3_cpu_if *cpu_if;
+ struct vgic_v3_irq *irq;
+ uint64_t lr;
+ int i;
+
+ hypctx = arg;
+ cpu_if = &hypctx->vgic_cpu_if;
+
+ /* Exit early if there are no buffered interrupts */
+ if (cpu_if->ich_lr_used == 0)
+ return;
+
+ /*
+ * Check on the IRQ state after running the guest. ich_lr_used and
+ * ich_lr_el2 are only ever used within this thread so is safe to
+ * access unlocked.
+ */
+ for (i = 0; i < cpu_if->ich_lr_used; i++) {
+ lr = cpu_if->ich_lr_el2[i];
+ cpu_if->ich_lr_el2[i] = 0;
+
+ irq = vgic_v3_get_irq(hypctx->hyp, hypctx->vcpu,
+ ICH_LR_EL2_VINTID(lr));
+ if (irq == NULL)
+ continue;
+
+ irq->active = (lr & ICH_LR_EL2_STATE_ACTIVE) != 0;
+
+ if ((irq->config & VGIC_CONFIG_MASK) == VGIC_CONFIG_EDGE) {
+ /*
+ * If we have an edge triggered IRQ preserve the
+ * pending bit until the IRQ has been handled.
+ */
+ if ((lr & ICH_LR_EL2_STATE_PENDING) != 0) {
+ irq->pending = true;
+ }
+ } else {
+ /*
+ * If we have a level triggerend IRQ remove the
+ * pending bit if the IRQ has been handled.
+ * The level is separate, so may still be high
+ * triggering another IRQ.
+ */
+ if ((lr & ICH_LR_EL2_STATE_PENDING) == 0) {
+ irq->pending = false;
+ }
+ }
+
+ /* Lock to update irq_act_pend */
+ mtx_lock_spin(&cpu_if->lr_mtx);
+ if (irq->active) {
+ /* Ensure the active IRQ is at the head of the list */
+ TAILQ_REMOVE(&cpu_if->irq_act_pend, irq, act_pend_list);
+ TAILQ_INSERT_HEAD(&cpu_if->irq_act_pend, irq,
+ act_pend_list);
+ } else if (!vgic_v3_irq_pending(irq)) {
+ /* If pending or active remove from the list */
+ TAILQ_REMOVE(&cpu_if->irq_act_pend, irq, act_pend_list);
+ irq->on_aplist = false;
+ }
+ mtx_unlock_spin(&cpu_if->lr_mtx);
+ vgic_v3_release_irq(irq);
+ }
+
+ cpu_if->ich_hcr_el2 &= ~ICH_HCR_EL2_EOICOUNT_MASK;
+ cpu_if->ich_lr_used = 0;
+}
+
+static int
+vgic_probe(device_t dev)
+{
+ if (!gic_get_vgic(dev))
+ return (EINVAL);
+
+ /* We currently only support the GICv3 */
+ if (gic_get_hw_rev(dev) < 3)
+ return (EINVAL);
+
+ device_set_desc(dev, "Virtual GIC");
+ return (BUS_PROBE_DEFAULT);
+}
+
+static int
+vgic_attach(device_t dev)
+{
+ have_vgic = true;
+ return (0);
+}
+
+static device_method_t vgic_methods[] = {
+ /* Device interface */
+ DEVMETHOD(device_probe, vgic_probe),
+ DEVMETHOD(device_attach, vgic_attach),
+
+ /* End */
+ DEVMETHOD_END
+};
+
+DEFINE_CLASS_0(vgic, vgic_driver, vgic_methods, 0);
+
+DRIVER_MODULE(vgic, gic, vgic_driver, 0, 0);
+
+bool
+vgic_present(void)
+{
+ return (have_vgic);
+}
+
+void
+vgic_v3_init(uint64_t ich_vtr_el2)
+{
+ uint32_t pribits, prebits;
+
+ MPASS(have_vgic);
+
+ pribits = ICH_VTR_EL2_PRIBITS(ich_vtr_el2);
+ switch (pribits) {
+ case 5:
+ virt_features.min_prio = 0xf8;
+ case 6:
+ virt_features.min_prio = 0xfc;
+ case 7:
+ virt_features.min_prio = 0xfe;
+ case 8:
+ virt_features.min_prio = 0xff;
+ }
+
+ prebits = ICH_VTR_EL2_PREBITS(ich_vtr_el2);
+ switch (prebits) {
+ case 5:
+ virt_features.ich_apr_num = 1;
+ case 6:
+ virt_features.ich_apr_num = 2;
+ case 7:
+ virt_features.ich_apr_num = 4;
+ }
+
+ virt_features.ich_lr_num = ICH_VTR_EL2_LISTREGS(ich_vtr_el2);
+}
diff --git a/sys/arm64/vmm/io/vgic_v3_reg.h b/sys/arm64/vmm/io/vgic_v3_reg.h
new file mode 100644
--- /dev/null
+++ b/sys/arm64/vmm/io/vgic_v3_reg.h
@@ -0,0 +1,99 @@
+#ifndef _VGIC_V3_REG_H_
+#define _VGIC_V3_REG_H_
+
+/* Interrupt Controller End of Interrupt Status Register */
+#define ICH_EISR_EL2_STATUS_MASK 0xffff
+#define ICH_EISR_EL2_EOI_NOT_HANDLED(lr) ((1 << lr) & ICH_EISR_EL2_STATUS_MASK)
+
+/* Interrupt Controller Empty List Register Status Register */
+#define ICH_ELSR_EL2_STATUS_MASK 0xffff
+#define ICH_ELSR_EL2_LR_EMPTY(x) ((1 << x) & ICH_ELSR_EL2_STATUS_MASK)
+
+/* Interrupt Controller Hyp Control Register */
+#define ICH_HCR_EL2_EOICOUNT_SHIFT 27
+#define ICH_HCR_EL2_EOICOUNT_MASK (0x1f << ICH_HCR_EL2_EOICOUNT_SHIFT)
+#define ICH_HCR_EL2_TDIR (1 << 14) /* Trap non-secure EL1 writes to IC{C, V}_DIR_EL1 */
+#define ICH_HCR_EL2_TSEI (1 << 14) /* Trap System Error Interupts (SEI) to EL2 */
+#define ICH_HCR_EL2_TALL1 (1 << 12) /* Trap non-secure EL1 accesses to IC{C, V}_* for Group 1 interrupts */
+#define ICH_HCR_EL2_TALL0 (1 << 11) /* Trap non-secure EL1 accesses to IC{C, V}_* for Group 0 interrupts */
+#define ICH_HCR_EL2_TC (1 << 10) /* Trap non-secure EL1 accesses to common IC{C, V}_* registers */
+#define ICH_HCR_EL2_VGRP1DIE (1 << 7) /* VM Group 1 Disabled Interrupt Enable */
+#define ICH_HCR_EL2_VGRP1EIE (1 << 6) /* VM Group 1 Enabled Interrupt Enable */
+#define ICH_HCR_EL2_VGRP0DIE (1 << 5) /* VM Group 0 Disabled Interrupt Enable */
+#define ICH_HCR_EL2_VGRP0EIE (1 << 4) /* VM Group 0 Enabled Interrupt Enable */
+#define ICH_HCR_EL2_NPIE (1 << 3) /* No Pending Interrupt Enable */
+#define ICH_HCR_EL2_LRENPIE (1 << 2) /* List Register Entry Not Present Interrupt Enable */
+#define ICH_HCR_EL2_UIE (1 << 1) /* Underflow Interrupt Enable */
+#define ICH_HCR_EL2_En (1 << 0) /* Global enable for the virtual CPU interface */
+
+/* Interrupt Controller List Registers */
+#define ICH_LR_EL2_VINTID_MASK 0xffffffff
+#define ICH_LR_EL2_VINTID(x) ((x) & ICH_LR_EL2_VINTID_MASK)
+#define ICH_LR_EL2_PINTID_SHIFT 32
+#define ICH_LR_EL2_PINTID_MASK (0x3fUL << ICH_LR_EL2_PINTID_SHIFT)
+/* Raise a maintanance IRQ when deactivated (only non-HW virqs) */
+#define ICH_LR_EL2_EOI (1UL << 41)
+#define ICH_LR_EL2_PRIO_SHIFT 48
+#define ICH_LR_EL2_PRIO_MASK (0xffUL << ICH_LR_EL2_PRIO_SHIFT)
+#define ICH_LR_EL2_GROUP_SHIFT 60
+#define ICH_LR_EL2_GROUP1 (1UL << ICH_LR_EL2_GROUP_SHIFT)
+#define ICH_LR_EL2_HW (1UL << 61)
+#define ICH_LR_EL2_STATE_SHIFT 62
+#define ICH_LR_EL2_STATE_MASK (0x3UL << ICH_LR_EL2_STATE_SHIFT)
+#define ICH_LR_EL2_STATE(x) ((x) & ICH_LR_EL2_STATE_MASK)
+#define ICH_LR_EL2_STATE_INACTIVE (0x0UL << ICH_LR_EL2_STATE_SHIFT)
+#define ICH_LR_EL2_STATE_PENDING (0x1UL << ICH_LR_EL2_STATE_SHIFT)
+#define ICH_LR_EL2_STATE_ACTIVE (0x2UL << ICH_LR_EL2_STATE_SHIFT)
+#define ICH_LR_EL2_STATE_PENDING_ACTIVE (0x3UL << ICH_LR_EL2_STATE_SHIFT)
+
+/* Interrupt Controller Maintenance Interrupt State Register */
+#define ICH_MISR_EL2_VGRP1D (1 << 7) /* vPE Group 1 Disabled */
+#define ICH_MISR_EL2_VGRP1E (1 << 6) /* vPE Group 1 Enabled */
+#define ICH_MISR_EL2_VGRP0D (1 << 5) /* vPE Group 0 Disabled */
+#define ICH_MISR_EL2_VGRP0E (1 << 4) /* vPE Group 0 Enabled */
+#define ICH_MISR_EL2_NP (1 << 3) /* No Pending */
+#define ICH_MISR_EL2_LRENP (1 << 2) /* List Register Entry Not Present */
+#define ICH_MISR_EL2_U (1 << 1) /* Underflow */
+#define ICH_MISR_EL2_EOI (1 << 0) /* End Of Interrupt */
+
+/* Interrupt Controller Virtual Machine Control Register */
+#define ICH_VMCR_EL2_VPMR_SHIFT 24
+#define ICH_VMCR_EL2_VPMR_MASK (0xff << ICH_VMCR_EL2_VPMR_SHIFT)
+#define ICH_VMCR_EL2_VPMR_PRIO_LOWEST (0xff << ICH_VMCR_EL2_VPMR_SHIFT)
+#define ICH_VMCR_EL2_VPMR_PRIO_HIGHEST (0x00 << ICH_VMCR_EL2_VPMR_SHIFT)
+#define ICH_VMCR_EL2_VBPR0_SHIFT 21
+#define ICH_VMCR_EL2_VBPR0_MASK (0x7 << ICH_VMCR_EL2_VBPR0_SHIFT)
+#define ICH_VMCR_EL2_VBPR0_NO_PREEMPTION \
+ (0x7 << ICH_VMCR_EL2_VBPR0_SHIFT)
+#define ICH_VMCR_EL2_VBPR1_SHIFT 18
+#define ICH_VMCR_EL2_VBPR1_MASK (0x7 << ICH_VMCR_EL2_VBPR1_SHIFT)
+#define ICH_VMCR_EL2_VBPR1_NO_PREEMPTION \
+ (0x7 << ICH_VMCR_EL2_VBPR1_SHIFT)
+#define ICH_VMCR_EL2_VEOIM (1 << 9) /* Virtual EOI mode */
+#define ICH_VMCR_EL2_VCBPR (1 << 4) /* Virtual Common binary Point Register */
+#define ICH_VMCR_EL2_VFIQEN (1 << 3) /* Virtual FIQ enable */
+#define ICH_VMCR_EL2_VACKCTL (1 << 2) /* Virtual AckCtl */
+#define ICH_VMCR_EL2_VENG1 (1 << 1) /* Virtual Group 1 Interrupt Enable */
+#define ICH_VMCR_EL2_VENG0 (1 << 0) /* Virtual Group 0 Interrupt Enable */
+
+/* Interrupt Controller VGIC Type Register */
+#define ICH_VTR_EL2_PRIBITS_SHIFT 29
+#define ICH_VTR_EL2_PRIBITS_MASK (0x7 << ICH_VTR_EL2_PRIBITS_SHIFT)
+#define ICH_VTR_EL2_PRIBITS(x) \
+ ((((x) & ICH_VTR_EL2_PRIBITS_MASK) >> ICH_VTR_EL2_PRIBITS_SHIFT) + 1)
+#define ICH_VTR_EL2_PREBITS_SHIFT 26
+#define ICH_VTR_EL2_PREBITS_MASK (0x7 << ICH_VTR_EL2_PREBITS_SHIFT)
+#define ICH_VTR_EL2_PREBITS(x) \
+ (((x) & ICH_VTR_EL2_PREBITS_MASK) >> ICH_VTR_EL2_PREBITS_SHIFT)
+#define ICH_VTR_EL2_SEIS (1 << 22) /* System Error Interrupt (SEI) Support */
+#define ICH_VTR_EL2_A3V (1 << 21) /* Affinity 3 Valid */
+#define ICH_VTR_EL2_NV4 (1 << 20) /* Direct injection of virtual interrupts. RES1 for GICv3 */
+#define ICH_VTR_EL2_TDS (1 << 19) /* Implementation supports ICH_HCR_EL2.TDIR */
+#define ICH_VTR_EL2_LISTREGS_MASK 0x1f
+/*
+ * ICH_VTR_EL2.ListRegs holds the number of list registers, minus one. Add one
+ * to get the actual number of list registers.
+ */
+#define ICH_VTR_EL2_LISTREGS(x) (((x) & ICH_VTR_EL2_LISTREGS_MASK) + 1)
+
+#endif /* !_VGIC_V3_REG_H_ */
diff --git a/sys/arm64/vmm/io/vtimer.h b/sys/arm64/vmm/io/vtimer.h
new file mode 100644
--- /dev/null
+++ b/sys/arm64/vmm/io/vtimer.h
@@ -0,0 +1,82 @@
+/*-
+ * Copyright (c) 2017 The FreeBSD Foundation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. The name of the company nor the name of the author may be used to
+ * endorse or promote products derived from this software without specific
+ * prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef _VMM_VTIMER_H_
+#define _VMM_VTIMER_H_
+
+#define GT_PHYS_NS_IRQ 30
+#define GT_VIRT_IRQ 27
+
+struct vtimer
+{
+ uint64_t cnthctl_el2;
+ uint64_t cntvoff_el2;
+};
+
+struct vtimer_timer
+{
+ struct callout callout;
+ struct mtx mtx;
+
+ uint32_t irqid;
+
+ /*
+ * These registers are either emulated for the physical timer, or
+ * the guest has full access to them for the virtual timer.
+
+ * CNTx_CTL_EL0: Counter-timer Timer Control Register
+ * CNTx_CVAL_EL0: Counter-timer Timer CompareValue Register
+ */
+ uint32_t cntx_cval_el0;
+ uint32_t cntx_ctl_el0;
+};
+
+struct vtimer_cpu
+{
+ struct vtimer_timer phys_timer;
+ struct vtimer_timer virt_timer;
+
+ uint32_t cntkctl_el1;
+};
+
+int vtimer_init(uint64_t cnthctl_el2);
+void vtimer_vminit(struct hyp *);
+void vtimer_cpuinit(struct hypctx *);
+void vtimer_cpucleanup(struct hypctx *);
+void vtimer_vmcleanup(struct hyp *);
+void vtimer_cleanup(void);
+
+int vtimer_phys_ctl_read(void *vm, int vcpuid, uint64_t *rval, void *arg);
+int vtimer_phys_ctl_write(void *vm, int vcpuid, uint64_t wval, void *arg);
+int vtimer_phys_cnt_read(void *vm, int vcpuid, uint64_t *rval, void *arg);
+int vtimer_phys_cnt_write(void *vm, int vcpuid, uint64_t wval, void *arg);
+int vtimer_phys_cval_read(void *vm, int vcpuid, uint64_t *rval, void *arg);
+int vtimer_phys_cval_write(void *vm, int vcpuid, uint64_t wval, void *arg);
+int vtimer_phys_tval_read(void *vm, int vcpuid, uint64_t *rval, void *arg);
+int vtimer_phys_tval_write(void *vm, int vcpuid, uint64_t wval, void *arg);
+#endif
diff --git a/sys/arm64/vmm/io/vtimer.c b/sys/arm64/vmm/io/vtimer.c
new file mode 100644
--- /dev/null
+++ b/sys/arm64/vmm/io/vtimer.c
@@ -0,0 +1,456 @@
+/*-
+ * Copyright (c) 2017 The FreeBSD Foundation
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. The name of the company nor the name of the author may be used to
+ * endorse or promote products derived from this software without specific
+ * prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/types.h>
+#include <sys/bus.h>
+#include <sys/mutex.h>
+#include <sys/rman.h>
+#include <sys/systm.h>
+#include <sys/time.h>
+#include <sys/timeet.h>
+#include <sys/timetc.h>
+
+#include <machine/bus.h>
+#include <machine/machdep.h>
+#include <machine/vmm.h>
+#include <machine/armreg.h>
+
+#include <arm64/vmm/arm64.h>
+
+#include "vgic_v3.h"
+#include "vtimer.h"
+
+#define RES1 0xffffffffffffffffUL
+
+#define timer_enabled(ctl) \
+ (!((ctl) & CNTP_CTL_IMASK) && ((ctl) & CNTP_CTL_ENABLE))
+
+static uint64_t cnthctl_el2_reg;
+static uint32_t tmr_frq;
+static bool have_vtimer = false;
+
+#define timer_condition_met(ctl) ((ctl) & CNTP_CTL_ISTATUS)
+
+static int
+vtimer_virtual_timer_intr(void *arg)
+{
+ struct hypctx *hypctx;
+ uint32_t cntv_ctl;
+
+ /*
+ * TODO everything here is very strange. The relantionship between the
+ * hardware value and the value in memory is not clear at all.
+ */
+
+ hypctx = arm64_get_active_vcpu();
+ cntv_ctl = READ_SPECIALREG(cntv_ctl_el0);
+
+ if (!hypctx) {
+ /* vm_destroy() was called. */
+ eprintf("No active vcpu\n");
+ cntv_ctl = READ_SPECIALREG(cntv_ctl_el0);
+ goto out;
+ }
+ if (!timer_enabled(cntv_ctl)) {
+ eprintf("Timer not enabled\n");
+ goto out;
+ }
+ if (!timer_condition_met(cntv_ctl)) {
+ eprintf("Timer condition not met\n");
+ goto out;
+ }
+
+ vgic_v3_inject_irq(hypctx->hyp, hypctx->vcpu, GT_VIRT_IRQ, true);
+
+ hypctx->vtimer_cpu.virt_timer.cntx_ctl_el0 &= ~CNTP_CTL_ENABLE;
+ cntv_ctl = hypctx->vtimer_cpu.virt_timer.cntx_ctl_el0;
+
+out:
+ /*
+ * Disable the timer interrupt. This will prevent the interrupt from
+ * being reasserted as soon as we exit the handler and getting stuck
+ * in an infinite loop.
+ *
+ * This is safe to do because the guest disabled the timer, and then
+ * enables it as part of the interrupt handling routine.
+ */
+ cntv_ctl &= ~CNTP_CTL_ENABLE;
+ WRITE_SPECIALREG(cntv_ctl_el0, cntv_ctl);
+
+ return (FILTER_HANDLED);
+}
+
+int
+vtimer_init(uint64_t cnthctl_el2)
+{
+ cnthctl_el2_reg = cnthctl_el2;
+ /*
+ * The guest *MUST* use the same timer frequency as the host. The
+ * register CNTFRQ_EL0 is accessible to the guest and a different value
+ * in the guest dts file might have unforseen consequences.
+ */
+ tmr_frq = READ_SPECIALREG(cntfrq_el0);
+
+ return (0);
+}
+
+void
+vtimer_vminit(struct hyp *hyp)
+{
+ uint64_t now;
+
+ /*
+ * Configure the Counter-timer Hypervisor Control Register for the VM.
+ *
+ * ~CNTHCTL_EL1PCEN: trap access to CNTP_{CTL, CVAL, TVAL}_EL0 from EL1
+ * CNTHCTL_EL1PCTEN: don't trap access to CNTPCT_EL0
+ */
+ hyp->vtimer.cnthctl_el2 = cnthctl_el2_reg & ~CNTHCTL_EL1PCEN;
+ hyp->vtimer.cnthctl_el2 |= CNTHCTL_EL1PCTEN;
+
+ now = READ_SPECIALREG(cntpct_el0);
+ hyp->vtimer.cntvoff_el2 = now;
+
+ return;
+}
+
+void
+vtimer_cpuinit(struct hypctx *hypctx)
+{
+ struct vtimer_cpu *vtimer_cpu;
+
+ vtimer_cpu = &hypctx->vtimer_cpu;
+ /*
+ * Configure physical timer interrupts for the VCPU.
+ *
+ * CNTP_CTL_IMASK: mask interrupts
+ * ~CNTP_CTL_ENABLE: disable the timer
+ */
+ vtimer_cpu->phys_timer.cntx_ctl_el0 = CNTP_CTL_IMASK & ~CNTP_CTL_ENABLE;
+
+ mtx_init(&vtimer_cpu->phys_timer.mtx, "vtimer phys callout mutex", NULL,
+ MTX_DEF);
+ callout_init_mtx(&vtimer_cpu->phys_timer.callout,
+ &vtimer_cpu->phys_timer.mtx, 0);
+ vtimer_cpu->phys_timer.irqid = GT_PHYS_NS_IRQ;
+
+ mtx_init(&vtimer_cpu->virt_timer.mtx, "vtimer virt callout mutex", NULL,
+ MTX_DEF);
+ callout_init_mtx(&vtimer_cpu->virt_timer.callout,
+ &vtimer_cpu->virt_timer.mtx, 0);
+ vtimer_cpu->virt_timer.irqid = GT_VIRT_IRQ;
+}
+
+void
+vtimer_cpucleanup(struct hypctx *hypctx)
+{
+ struct vtimer_cpu *vtimer_cpu;
+
+ vtimer_cpu = &hypctx->vtimer_cpu;
+ callout_drain(&vtimer_cpu->phys_timer.callout);
+ callout_drain(&vtimer_cpu->virt_timer.callout);
+ mtx_destroy(&vtimer_cpu->phys_timer.mtx);
+ mtx_destroy(&vtimer_cpu->virt_timer.mtx);
+}
+
+void
+vtimer_vmcleanup(struct hyp *hyp)
+{
+ struct hypctx *hypctx;
+ uint32_t cntv_ctl;
+
+ hypctx = arm64_get_active_vcpu();
+ if (!hypctx) {
+ /* The active VM was destroyed, stop the timer. */
+ cntv_ctl = READ_SPECIALREG(cntv_ctl_el0);
+ cntv_ctl &= ~CNTP_CTL_ENABLE;
+ WRITE_SPECIALREG(cntv_ctl_el0, cntv_ctl);
+ }
+}
+
+void
+vtimer_cleanup(void)
+{
+}
+
+static void
+vtimer_inject_irq_callout_func(void *context)
+{
+ struct hypctx *hypctx;
+
+ hypctx = context;
+ vgic_v3_inject_irq(hypctx->hyp, hypctx->vcpu,
+ hypctx->vtimer_cpu.phys_timer.irqid, true);
+}
+
+
+static void
+vtimer_schedule_irq(struct vtimer_cpu *vtimer_cpu, struct hyp *hyp, int vcpuid)
+{
+ sbintime_t time;
+ struct vtimer_timer *timer;
+ uint64_t cntpct_el0;
+ uint64_t diff;
+
+ timer = &vtimer_cpu->phys_timer;
+ cntpct_el0 = READ_SPECIALREG(cntpct_el0);
+ if (timer->cntx_cval_el0 < cntpct_el0) {
+ /* Timer set in the past, trigger interrupt */
+ vgic_v3_inject_irq(hyp, vcpuid, timer->irqid, true);
+ } else {
+ diff = timer->cntx_cval_el0 - cntpct_el0;
+ time = diff * SBT_1S / tmr_frq;
+ callout_reset_sbt(&timer->callout, time, 0,
+ vtimer_inject_irq_callout_func, &hyp->ctx[vcpuid], 0);
+ }
+}
+
+static void
+vtimer_remove_irq(struct hypctx *hypctx, int vcpuid)
+{
+ struct vtimer_cpu *vtimer_cpu;
+ struct vtimer_timer *timer;
+
+ vtimer_cpu = &hypctx->vtimer_cpu;
+ timer = &vtimer_cpu->phys_timer;
+
+ callout_drain(&timer->callout);
+ /*
+ * The interrupt needs to be deactivated here regardless of the callout
+ * function having been executed. The timer interrupt can be masked with
+ * the CNTP_CTL_EL0.IMASK bit instead of reading the IAR register.
+ * Masking the interrupt doesn't remove it from the list registers.
+ */
+ vgic_v3_inject_irq(hypctx->hyp, vcpuid, timer->irqid, false);
+}
+
+/*
+ * Timer emulation functions.
+ *
+ * The guest should use the virtual timer, however some software, e.g. u-boot,
+ * used the physical timer. Emulate this in software for the guest to use.
+ *
+ * Adjust for cntvoff_el2 so the physical and virtual timers are at similar
+ * times. This simplifies interrupt handling in the virtual timer as the
+ * adjustment will have already happened.
+ */
+
+int
+vtimer_phys_ctl_read(void *vm, int vcpuid, uint64_t *rval, void *arg)
+{
+ struct hyp *hyp;
+ struct vtimer_cpu *vtimer_cpu;
+ uint64_t cntpct_el0;
+
+ hyp = vm_get_cookie(vm);
+ vtimer_cpu = &hyp->ctx[vcpuid].vtimer_cpu;
+
+ cntpct_el0 = READ_SPECIALREG(cntpct_el0) - hyp->vtimer.cntvoff_el2;
+ if (vtimer_cpu->phys_timer.cntx_cval_el0 < cntpct_el0)
+ /* Timer condition met */
+ *rval = vtimer_cpu->phys_timer.cntx_ctl_el0 | CNTP_CTL_ISTATUS;
+ else
+ *rval = vtimer_cpu->phys_timer.cntx_ctl_el0 & ~CNTP_CTL_ISTATUS;
+
+ return (0);
+}
+
+int
+vtimer_phys_ctl_write(void *vm, int vcpuid, uint64_t wval, void *arg)
+{
+ struct hyp *hyp;
+ struct hypctx *hypctx;
+ struct vtimer_cpu *vtimer_cpu;
+ uint64_t ctl_el0;
+ bool timer_toggled_on;
+
+ hyp = vm_get_cookie(vm);
+ hypctx = &hyp->ctx[vcpuid];
+ vtimer_cpu = &hypctx->vtimer_cpu;
+
+ timer_toggled_on = false;
+ ctl_el0 = vtimer_cpu->phys_timer.cntx_ctl_el0;
+
+ if (!timer_enabled(ctl_el0) && timer_enabled(wval))
+ timer_toggled_on = true;
+
+ vtimer_cpu->phys_timer.cntx_ctl_el0 = wval;
+
+ if (timer_toggled_on)
+ vtimer_schedule_irq(vtimer_cpu, hyp, vcpuid);
+
+ return (0);
+}
+
+int
+vtimer_phys_cnt_read(void *vm, int vcpuid, uint64_t *rval, void *arg)
+{
+ struct hyp *hyp;
+
+ hyp = vm_get_cookie(vm);
+ *rval = READ_SPECIALREG(cntpct_el0) - hyp->vtimer.cntvoff_el2;
+ return (0);
+}
+
+int
+vtimer_phys_cnt_write(void *vm, int vcpuid, uint64_t wval, void *arg)
+{
+ return (0);
+}
+
+int
+vtimer_phys_cval_read(void *vm, int vcpuid, uint64_t *rval, void *arg)
+{
+ struct hyp *hyp;
+ struct vtimer_cpu *vtimer_cpu;
+
+ hyp = vm_get_cookie(vm);
+ vtimer_cpu = &hyp->ctx[vcpuid].vtimer_cpu;
+
+ *rval = vtimer_cpu->phys_timer.cntx_cval_el0;
+
+ return (0);
+}
+
+int
+vtimer_phys_cval_write(void *vm, int vcpuid, uint64_t wval, void *arg)
+{
+ struct hyp *hyp;
+ struct hypctx *hypctx;
+ struct vtimer_cpu *vtimer_cpu;
+
+ hyp = vm_get_cookie(vm);
+ hypctx = &hyp->ctx[vcpuid];
+ vtimer_cpu = &hypctx->vtimer_cpu;
+
+ vtimer_cpu->phys_timer.cntx_cval_el0 = wval;
+
+ if (timer_enabled(vtimer_cpu->phys_timer.cntx_ctl_el0)) {
+ vtimer_remove_irq(hypctx, vcpuid);
+ vtimer_schedule_irq(vtimer_cpu, hyp, vcpuid);
+ }
+
+ return (0);
+}
+
+int
+vtimer_phys_tval_read(void *vm, int vcpuid, uint64_t *rval, void *arg)
+{
+ struct hyp *hyp;
+ struct vtimer_cpu *vtimer_cpu;
+ uint32_t cntpct_el0;
+
+ hyp = vm_get_cookie(vm);
+ vtimer_cpu = &hyp->ctx[vcpuid].vtimer_cpu;
+
+ if (!(vtimer_cpu->phys_timer.cntx_ctl_el0 & CNTP_CTL_ENABLE)) {
+ /*
+ * ARMv8 Architecture Manual, p. D7-2702: the result of reading
+ * TVAL when the timer is disabled is UNKNOWN. I have chosen to
+ * return the maximum value possible on 32 bits which means the
+ * timer will fire very far into the future.
+ */
+ *rval = (uint32_t)RES1;
+ } else {
+ cntpct_el0 = READ_SPECIALREG(cntpct_el0) -
+ hyp->vtimer.cntvoff_el2;
+ *rval = vtimer_cpu->phys_timer.cntx_cval_el0 - cntpct_el0;
+ }
+
+ return (0);
+}
+
+int
+vtimer_phys_tval_write(void *vm, int vcpuid, uint64_t wval, void *arg)
+{
+ struct hyp *hyp;
+ struct hypctx *hypctx;
+ struct vtimer_cpu *vtimer_cpu;
+ uint64_t cntpct_el0;
+
+ hyp = vm_get_cookie(vm);
+ hypctx = &hyp->ctx[vcpuid];
+ vtimer_cpu = &hypctx->vtimer_cpu;
+
+ cntpct_el0 = READ_SPECIALREG(cntpct_el0) - hyp->vtimer.cntvoff_el2;
+ vtimer_cpu->phys_timer.cntx_cval_el0 = (int32_t)wval + cntpct_el0;
+
+ if (timer_enabled(vtimer_cpu->phys_timer.cntx_ctl_el0)) {
+ vtimer_remove_irq(hypctx, vcpuid);
+ vtimer_schedule_irq(vtimer_cpu, hyp, vcpuid);
+ }
+
+ return (0);
+}
+
+struct vtimer_softc {
+ struct resource *res;
+ void *ihl;
+ int rid;
+};
+
+static int
+vtimer_probe(device_t dev)
+{
+ device_set_desc(dev, "Virtual timer");
+ return (BUS_PROBE_DEFAULT);
+}
+
+static int
+vtimer_attach(device_t dev)
+{
+ struct vtimer_softc *sc;
+
+ sc = device_get_softc(dev);
+
+ sc->rid = 0;
+ sc->res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &sc->rid, RF_ACTIVE);
+ if (sc->res == NULL)
+ return (ENXIO);
+
+ bus_setup_intr(dev, sc->res, INTR_TYPE_CLK, vtimer_virtual_timer_intr,
+ NULL, NULL, &sc->ihl);
+
+ have_vtimer = true;
+ return (0);
+}
+
+static device_method_t vtimer_methods[] = {
+ /* Device interface */
+ DEVMETHOD(device_probe, vtimer_probe),
+ DEVMETHOD(device_attach, vtimer_attach),
+
+ /* End */
+ DEVMETHOD_END
+};
+
+DEFINE_CLASS_0(vtimer, vtimer_driver, vtimer_methods,
+ sizeof(struct vtimer_softc));
+
+DRIVER_MODULE(vtimer, generic_timer, vtimer_driver, 0, 0);
diff --git a/sys/arm64/vmm/mmu.h b/sys/arm64/vmm/mmu.h
new file mode 100644
--- /dev/null
+++ b/sys/arm64/vmm/mmu.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright (C) 2017 Alexandru Elisei <alexandru.elisei@gmail.com>
+ * All rights reserved.
+ *
+ * This software was developed by Alexandru Elisei under sponsorship
+ * from the FreeBSD Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef _VMM_MMU_H_
+#define _VMM_MMU_H_
+
+#include <machine/machdep.h>
+#include <machine/vmparam.h>
+#include <machine/vmm.h>
+
+#include "hyp.h"
+
+extern char vmm_hyp_code;
+extern char vmm_hyp_code_end;
+
+extern char _vmm_start;
+extern char _vmm_end;
+
+bool vmmpmap_init(void);
+void vmmpmap_fini(void);
+uint64_t vmmpmap_to_ttbr0(void);
+bool vmmpmap_enter(vm_offset_t, vm_size_t, vm_paddr_t, vm_prot_t);
+void vmmpmap_remove(vm_offset_t, vm_size_t, bool);
+
+#endif
diff --git a/sys/arm64/vmm/psci.h b/sys/arm64/vmm/psci.h
new file mode 100644
--- /dev/null
+++ b/sys/arm64/vmm/psci.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (C) 2018 Alexandru Elisei <alexandru.elisei@gmail.com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef _PSCI_H_
+#define _PSCI_H_
+
+#include "arm64.h"
+
+int psci_handle_call(struct vm *vm, int vcpuid, struct vm_exit *vme,
+ bool *retu);
+
+#endif
diff --git a/sys/arm64/vmm/reset.h b/sys/arm64/vmm/reset.h
new file mode 100644
--- /dev/null
+++ b/sys/arm64/vmm/reset.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright (C) 2018 Alexandru Elisei <alexandru.elisei@gmail.com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+#ifndef _VMM_RESET_H_
+#define _VMM_RESET_H_
+
+void reset_vm_el01_regs(void *vcpu);
+void reset_vm_el2_regs(void *vcpu);
+
+#endif
diff --git a/sys/arm64/vmm/vmm.c b/sys/arm64/vmm/vmm.c
new file mode 100644
--- /dev/null
+++ b/sys/arm64/vmm/vmm.c
@@ -0,0 +1,1599 @@
+/*
+ * Copyright (C) 2015 Mihai Carabas <mihai.carabas@gmail.com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/module.h>
+#include <sys/sysctl.h>
+#include <sys/malloc.h>
+#include <sys/pcpu.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
+#include <sys/proc.h>
+#include <sys/rwlock.h>
+#include <sys/sched.h>
+#include <sys/smp.h>
+#include <sys/cpuset.h>
+
+#include <vm/vm.h>
+#include <vm/vm_object.h>
+#include <vm/vm_page.h>
+#include <vm/pmap.h>
+#include <vm/vm_map.h>
+#include <vm/vm_extern.h>
+#include <vm/vm_param.h>
+
+#include <machine/cpu.h>
+#include <machine/machdep.h>
+#include <machine/vm.h>
+#include <machine/pcb.h>
+#include <machine/param.h>
+#include <machine/smp.h>
+#include <machine/vmparam.h>
+#include <machine/vmm.h>
+#include <machine/vmm_dev.h>
+#include <machine/armreg.h>
+
+#include <dev/pci/pcireg.h>
+
+#include "vmm_ktr.h"
+#include "vmm_stat.h"
+#include "vmm_mem.h"
+#include "arm64.h"
+#include "mmu.h"
+#include "psci.h"
+
+#include "io/vgic_v3.h"
+#include "io/vtimer.h"
+
+#define BSP 0 /* the boostrap processor */
+
+struct vcpu {
+ int flags;
+ enum vcpu_state state;
+ struct mtx mtx;
+ int hostcpu; /* host cpuid this vcpu last ran on */
+ int vcpuid;
+ void *stats;
+ struct vm_exit exitinfo;
+ uint64_t nextpc; /* (x) next instruction to execute */
+};
+
+#define vcpu_lock_initialized(v) mtx_initialized(&((v)->mtx))
+#define vcpu_lock_init(v) mtx_init(&((v)->mtx), "vcpu lock", 0, MTX_SPIN)
+#define vcpu_lock(v) mtx_lock_spin(&((v)->mtx))
+#define vcpu_unlock(v) mtx_unlock_spin(&((v)->mtx))
+#define vcpu_assert_locked(v) mtx_assert(&((v)->mtx), MA_OWNED)
+
+struct mem_seg {
+ uint64_t gpa;
+ size_t len;
+ bool wired;
+ bool sysmem;
+ vm_object_t object;
+};
+#define VM_MAX_MEMSEGS 3
+
+struct mem_map {
+ vm_paddr_t gpa;
+ size_t len;
+ vm_ooffset_t segoff;
+ int segid;
+ int prot;
+ int flags;
+};
+#define VM_MAX_MEMMAPS 4
+
+struct vmm_mmio_region {
+ uint64_t start;
+ uint64_t end;
+ mem_region_read_t read;
+ mem_region_write_t write;
+};
+#define VM_MAX_MMIO_REGIONS 4
+
+/*
+ * Initialization:
+ * (o) initialized the first time the VM is created
+ * (i) initialized when VM is created and when it is reinitialized
+ * (x) initialized before use
+ */
+struct vm {
+ void *cookie; /* (i) cpu-specific data */
+ volatile cpuset_t active_cpus; /* (i) active vcpus */
+ volatile cpuset_t debug_cpus; /* (i) vcpus stopped for debug */
+ int suspend; /* (i) stop VM execution */
+ volatile cpuset_t suspended_cpus; /* (i) suspended vcpus */
+ volatile cpuset_t halted_cpus; /* (x) cpus in a hard halt */
+ struct mem_map mem_maps[VM_MAX_MEMMAPS]; /* (i) guest address space */
+ struct mem_seg mem_segs[VM_MAX_MEMSEGS]; /* (o) guest memory regions */
+ struct vmspace *vmspace; /* (o) guest's address space */
+ char name[VM_MAX_NAMELEN]; /* (o) virtual machine name */
+ struct vcpu vcpu[VM_MAXCPU]; /* (i) guest vcpus */
+ struct vmm_mmio_region mmio_region[VM_MAX_MMIO_REGIONS];
+ /* (o) guest MMIO regions */
+ /* The following describe the vm cpu topology */
+ uint16_t sockets; /* (o) num of sockets */
+ uint16_t cores; /* (o) num of cores/socket */
+ uint16_t threads; /* (o) num of threads/core */
+ uint16_t maxcpus; /* (o) max pluggable cpus */
+};
+
+static bool vmm_initialized = false;
+
+static struct vmm_ops *ops = NULL;
+
+#define VMM_INIT(num) (ops != NULL ? (*ops->init)(num) : 0)
+#define VMM_CLEANUP() (ops != NULL ? (*ops->cleanup)() : 0)
+
+#define VMINIT(vm, pmap) (ops != NULL ? (*ops->vminit)(vm, pmap): NULL)
+#define VMRUN(vmi, vcpu, pc, pmap, evinfo) \
+ (ops != NULL ? (*ops->vmrun)(vmi, vcpu, pc, pmap, evinfo) : ENXIO)
+#define VMCLEANUP(vmi) (ops != NULL ? (*ops->vmcleanup)(vmi) : NULL)
+#define VMSPACE_ALLOC(min, max) \
+ (ops != NULL ? (*ops->vmspace_alloc)(min, max) : NULL)
+#define VMSPACE_FREE(vmspace) \
+ (ops != NULL ? (*ops->vmspace_free)(vmspace) : ENXIO)
+#define VMGETREG(vmi, vcpu, num, retval) \
+ (ops != NULL ? (*ops->vmgetreg)(vmi, vcpu, num, retval) : ENXIO)
+#define VMSETREG(vmi, vcpu, num, val) \
+ (ops != NULL ? (*ops->vmsetreg)(vmi, vcpu, num, val) : ENXIO)
+#define VMGETCAP(vmi, vcpu, num, retval) \
+ (ops != NULL ? (*ops->vmgetcap)(vmi, vcpu, num, retval) : ENXIO)
+#define VMSETCAP(vmi, vcpu, num, val) \
+ (ops != NULL ? (*ops->vmsetcap)(vmi, vcpu, num, val) : ENXIO)
+
+#define fpu_start_emulating() load_cr0(rcr0() | CR0_TS)
+#define fpu_stop_emulating() clts()
+
+static int vm_handle_wfi(struct vm *vm, int vcpuid,
+ struct vm_exit *vme, bool *retu);
+
+static MALLOC_DEFINE(M_VMM, "vmm", "vmm");
+
+/* statistics */
+static VMM_STAT(VCPU_TOTAL_RUNTIME, "vcpu total runtime");
+
+SYSCTL_NODE(_hw, OID_AUTO, vmm, CTLFLAG_RW, NULL, NULL);
+
+/*
+ * Halt the guest if all vcpus are executing a HLT instruction with
+ * interrupts disabled.
+ */
+static int halt_detection_enabled = 1;
+SYSCTL_INT(_hw_vmm, OID_AUTO, halt_detection, CTLFLAG_RDTUN,
+ &halt_detection_enabled, 0,
+ "Halt VM if all vcpus execute HLT with interrupts disabled");
+
+static int vmm_ipinum;
+SYSCTL_INT(_hw_vmm, OID_AUTO, ipinum, CTLFLAG_RD, &vmm_ipinum, 0,
+ "IPI vector used for vcpu notifications");
+
+static int trace_guest_exceptions;
+SYSCTL_INT(_hw_vmm, OID_AUTO, trace_guest_exceptions, CTLFLAG_RDTUN,
+ &trace_guest_exceptions, 0,
+ "Trap into hypervisor on all guest exceptions and reflect them back");
+
+static struct cpu_desc vmm_desc = {
+ .id_aa64afr0 = 0,
+ .id_aa64afr1 = 0,
+ .id_aa64dfr0 =
+ (0xful << ID_AA64DFR0_CTX_CMPs_SHIFT) |
+ (0xful << ID_AA64DFR0_WRPs_SHIFT) |
+ (0xful << ID_AA64DFR0_BRPs_SHIFT) |
+ ID_AA64DFR0_PMUVer_3 |
+ ID_AA64DFR0_DebugVer_8,
+ .id_aa64dfr1 = 0,
+ .id_aa64isar0 =
+ ID_AA64ISAR0_TLB_TLBIOSR |
+ ID_AA64ISAR0_SHA3_IMPL |
+ ID_AA64ISAR0_RDM_IMPL |
+ ID_AA64ISAR0_Atomic_IMPL |
+ ID_AA64ISAR0_CRC32_BASE |
+ ID_AA64ISAR0_SHA2_512 |
+ ID_AA64ISAR0_SHA1_BASE |
+ ID_AA64ISAR0_AES_PMULL,
+ .id_aa64isar1 = 0,
+ .id_aa64mmfr0 =
+ ID_AA64MMFR0_TGran4_IMPL |
+ ID_AA64MMFR0_TGran64_IMPL |
+ ID_AA64MMFR0_TGran16_IMPL |
+ ID_AA64MMFR0_ASIDBits_16 |
+ ID_AA64MMFR0_PARange_4P,
+ .id_aa64mmfr1 =
+ ID_AA64MMFR1_SpecSEI_IMPL |
+ ID_AA64MMFR1_PAN_ATS1E1 |
+ ID_AA64MMFR1_HAFDBS_AF,
+ .id_aa64mmfr2 = 0,
+ .id_aa64pfr0 =
+ ID_AA64PFR0_GIC_CPUIF_NONE |
+ ID_AA64PFR0_AdvSIMD_HP |
+ ID_AA64PFR0_FP_HP |
+ ID_AA64PFR0_EL3_64 |
+ ID_AA64PFR0_EL2_64 |
+ ID_AA64PFR0_EL1_64 |
+ ID_AA64PFR0_EL0_64,
+ .id_aa64pfr1 = 0,
+};
+
+static void vm_free_memmap(struct vm *vm, int ident);
+static bool sysmem_mapping(struct vm *vm, struct mem_map *mm);
+static void vcpu_notify_event_locked(struct vcpu *vcpu, bool lapic_intr);
+
+static void
+vcpu_cleanup(struct vm *vm, int i, bool destroy)
+{
+// struct vcpu *vcpu = &vm->vcpu[i];
+}
+
+static void
+vcpu_init(struct vm *vm, uint32_t vcpu_id, bool create)
+{
+ struct vcpu *vcpu;
+
+ vcpu = &vm->vcpu[vcpu_id];
+
+ if (create) {
+ KASSERT(!vcpu_lock_initialized(vcpu), ("vcpu %d already "
+ "initialized", vcpu_id));
+ vcpu_lock_init(vcpu);
+ vcpu->hostcpu = NOCPU;
+ vcpu->vcpuid = vcpu_id;
+ }
+}
+
+struct vm_exit *
+vm_exitinfo(struct vm *vm, int cpuid)
+{
+ struct vcpu *vcpu;
+
+ if (cpuid < 0 || cpuid >= vm->maxcpus)
+ panic("vm_exitinfo: invalid cpuid %d", cpuid);
+
+ vcpu = &vm->vcpu[cpuid];
+
+ return (&vcpu->exitinfo);
+}
+
+static int
+vmm_init(void)
+{
+ ops = &vmm_ops_arm;
+
+ update_cpu_desc(&vmm_desc);
+
+ return (VMM_INIT(0));
+}
+
+static int
+vmm_handler(module_t mod, int what, void *arg)
+{
+ int error;
+
+ switch (what) {
+ case MOD_LOAD:
+ vmmdev_init();
+ error = vmm_init();
+ if (error == 0)
+ vmm_initialized = true;
+ break;
+ case MOD_UNLOAD:
+ error = vmmdev_cleanup();
+ if (error == 0 && vmm_initialized) {
+ error = VMM_CLEANUP();
+ if (error)
+ vmm_initialized = false;
+ }
+ break;
+ default:
+ error = 0;
+ break;
+ }
+ return (error);
+}
+
+static moduledata_t vmm_kmod = {
+ "vmm",
+ vmm_handler,
+ NULL
+};
+
+/*
+ * vmm initialization has the following dependencies:
+ *
+ * - HYP initialization requires smp_rendezvous() and therefore must happen
+ * after SMP is fully functional (after SI_SUB_SMP).
+ */
+DECLARE_MODULE(vmm, vmm_kmod, SI_SUB_SMP + 1, SI_ORDER_ANY);
+MODULE_VERSION(vmm, 1);
+
+static void
+vm_init(struct vm *vm, bool create)
+{
+ int i;
+
+ vm->cookie = VMINIT(vm, vmspace_pmap(vm->vmspace));
+
+ CPU_ZERO(&vm->active_cpus);
+ CPU_ZERO(&vm->debug_cpus);
+
+ vm->suspend = 0;
+ CPU_ZERO(&vm->suspended_cpus);
+
+ memset(vm->mmio_region, 0, sizeof(vm->mmio_region));
+
+ for (i = 0; i < vm->maxcpus; i++)
+ vcpu_init(vm, i, create);
+}
+
+int
+vm_create(const char *name, struct vm **retvm)
+{
+ struct vm *vm;
+ struct vmspace *vmspace;
+
+ /*
+ * If vmm.ko could not be successfully initialized then don't attempt
+ * to create the virtual machine.
+ */
+ if (!vmm_initialized)
+ return (ENXIO);
+
+ if (name == NULL || strlen(name) >= VM_MAX_NAMELEN)
+ return (EINVAL);
+
+ vmspace = VMSPACE_ALLOC(0, 1ul << 39);
+ if (vmspace == NULL)
+ return (ENOMEM);
+
+ vm = malloc(sizeof(struct vm), M_VMM, M_WAITOK | M_ZERO);
+ strcpy(vm->name, name);
+ vm->vmspace = vmspace;
+
+ vm->sockets = 1;
+ vm->cores = 1; /* XXX backwards compatibility */
+ vm->threads = 1; /* XXX backwards compatibility */
+ vm->maxcpus = VM_MAXCPU; /* XXX temp to keep code working */
+
+ vm_init(vm, true);
+
+ *retvm = vm;
+ return (0);
+}
+
+void
+vm_get_topology(struct vm *vm, uint16_t *sockets, uint16_t *cores,
+ uint16_t *threads, uint16_t *maxcpus)
+{
+ *sockets = vm->sockets;
+ *cores = vm->cores;
+ *threads = vm->threads;
+ *maxcpus = vm->maxcpus;
+}
+
+uint16_t
+vm_get_maxcpus(struct vm *vm)
+{
+ return (vm->maxcpus);
+}
+
+int
+vm_set_topology(struct vm *vm, uint16_t sockets, uint16_t cores,
+ uint16_t threads, uint16_t maxcpus)
+{
+ if (maxcpus != 0)
+ return (EINVAL); /* XXX remove when supported */
+ if ((sockets * cores * threads) > vm->maxcpus)
+ return (EINVAL);
+ /* XXX need to check sockets * cores * threads == vCPU, how? */
+ vm->sockets = sockets;
+ vm->cores = cores;
+ vm->threads = threads;
+ vm->maxcpus = VM_MAXCPU; /* XXX temp to keep code working */
+ return(0);
+}
+
+static void
+vm_cleanup(struct vm *vm, bool destroy)
+{
+ struct mem_map *mm;
+ pmap_t pmap;
+ int i;
+
+ if (destroy) {
+ pmap = vmspace_pmap(vm->vmspace);
+ sched_pin();
+ PCPU_SET(curvmpmap, NULL);
+ sched_unpin();
+ CPU_FOREACH(i) {
+ MPASS(cpuid_to_pcpu[i]->pc_curvmpmap != pmap);
+ }
+ }
+
+ vgic_v3_detach_from_vm(vm);
+
+ for (i = 0; i < vm->maxcpus; i++)
+ vcpu_cleanup(vm, i, destroy);
+
+ VMCLEANUP(vm->cookie);
+
+ /*
+ * System memory is removed from the guest address space only when
+ * the VM is destroyed. This is because the mapping remains the same
+ * across VM reset.
+ *
+ * Device memory can be relocated by the guest (e.g. using PCI BARs)
+ * so those mappings are removed on a VM reset.
+ */
+ if (!destroy) {
+ for (i = 0; i < VM_MAX_MEMMAPS; i++) {
+ mm = &vm->mem_maps[i];
+ if (destroy || !sysmem_mapping(vm, mm))
+ vm_free_memmap(vm, i);
+ }
+ }
+
+ if (destroy) {
+ for (i = 0; i < VM_MAX_MEMSEGS; i++)
+ vm_free_memseg(vm, i);
+
+ VMSPACE_FREE(vm->vmspace);
+ vm->vmspace = NULL;
+ }
+}
+
+void
+vm_destroy(struct vm *vm)
+{
+ vm_cleanup(vm, true);
+ free(vm, M_VMM);
+}
+
+int
+vm_reinit(struct vm *vm)
+{
+ int error;
+
+ /*
+ * A virtual machine can be reset only if all vcpus are suspended.
+ */
+ if (CPU_CMP(&vm->suspended_cpus, &vm->active_cpus) == 0) {
+ vm_cleanup(vm, false);
+ vm_init(vm, false);
+ error = 0;
+ } else {
+ error = EBUSY;
+ }
+
+ return (error);
+}
+
+const char *
+vm_name(struct vm *vm)
+{
+ return (vm->name);
+}
+
+int
+vm_map_mmio(struct vm *vm, vm_paddr_t gpa, size_t len, vm_paddr_t hpa)
+{
+ vm_object_t obj;
+
+ if ((obj = vmm_mmio_alloc(vm->vmspace, gpa, len, hpa)) == NULL)
+ return (ENOMEM);
+ else
+ return (0);
+}
+
+int
+vm_unmap_mmio(struct vm *vm, vm_paddr_t gpa, size_t len)
+{
+
+ vmm_mmio_free(vm->vmspace, gpa, len);
+ return (0);
+}
+
+/*
+ * Return 'true' if 'gpa' is allocated in the guest address space.
+ *
+ * This function is called in the context of a running vcpu which acts as
+ * an implicit lock on 'vm->mem_maps[]'.
+ */
+bool
+vm_mem_allocated(struct vm *vm, int vcpuid, vm_paddr_t gpa)
+{
+ struct mem_map *mm;
+ int i;
+
+#ifdef INVARIANTS
+ int hostcpu, state;
+ state = vcpu_get_state(vm, vcpuid, &hostcpu);
+ KASSERT(state == VCPU_RUNNING && hostcpu == curcpu,
+ ("%s: invalid vcpu state %d/%d", __func__, state, hostcpu));
+#endif
+
+ for (i = 0; i < VM_MAX_MEMMAPS; i++) {
+ mm = &vm->mem_maps[i];
+ if (mm->len != 0 && gpa >= mm->gpa && gpa < mm->gpa + mm->len)
+ return (true); /* 'gpa' is sysmem or devmem */
+ }
+
+#if 0
+ if (ppt_is_mmio(vm, gpa))
+ return (true); /* 'gpa' is pci passthru mmio */
+#endif
+
+ return (false);
+}
+
+int
+vm_alloc_memseg(struct vm *vm, int ident, size_t len, bool sysmem)
+{
+ struct mem_seg *seg;
+ vm_object_t obj;
+
+ if (ident < 0 || ident >= VM_MAX_MEMSEGS)
+ return (EINVAL);
+
+ if (len == 0 || (len & PAGE_MASK))
+ return (EINVAL);
+
+ seg = &vm->mem_segs[ident];
+ if (seg->object != NULL) {
+ if (seg->len == len && seg->sysmem == sysmem)
+ return (EEXIST);
+ else
+ return (EINVAL);
+ }
+
+ obj = vm_object_allocate(OBJT_DEFAULT, len >> PAGE_SHIFT);
+ if (obj == NULL)
+ return (ENOMEM);
+
+ seg->len = len;
+ seg->object = obj;
+ seg->sysmem = sysmem;
+ return (0);
+}
+
+int
+vm_get_memseg(struct vm *vm, int ident, size_t *len, bool *sysmem,
+ vm_object_t *objptr)
+{
+ struct mem_seg *seg;
+
+ if (ident < 0 || ident >= VM_MAX_MEMSEGS)
+ return (EINVAL);
+
+ seg = &vm->mem_segs[ident];
+ if (len)
+ *len = seg->len;
+ if (sysmem)
+ *sysmem = seg->sysmem;
+ if (objptr)
+ *objptr = seg->object;
+ return (0);
+}
+
+void
+vm_free_memseg(struct vm *vm, int ident)
+{
+ struct mem_seg *seg;
+
+ KASSERT(ident >= 0 && ident < VM_MAX_MEMSEGS,
+ ("%s: invalid memseg ident %d", __func__, ident));
+
+ seg = &vm->mem_segs[ident];
+ if (seg->object != NULL) {
+ vm_object_deallocate(seg->object);
+ bzero(seg, sizeof(struct mem_seg));
+ }
+}
+
+int
+vm_mmap_memseg(struct vm *vm, vm_paddr_t gpa, int segid, vm_ooffset_t first,
+ size_t len, int prot, int flags)
+{
+ struct mem_seg *seg;
+ struct mem_map *m, *map;
+ vm_ooffset_t last;
+ int i, error;
+
+ if (prot == 0 || (prot & ~(VM_PROT_ALL)) != 0)
+ return (EINVAL);
+
+ if (flags & ~VM_MEMMAP_F_WIRED)
+ return (EINVAL);
+
+ if (segid < 0 || segid >= VM_MAX_MEMSEGS)
+ return (EINVAL);
+
+ seg = &vm->mem_segs[segid];
+ if (seg->object == NULL)
+ return (EINVAL);
+
+ last = first + len;
+ if (first < 0 || first >= last || last > seg->len)
+ return (EINVAL);
+
+ if ((gpa | first | last) & PAGE_MASK)
+ return (EINVAL);
+
+ map = NULL;
+ for (i = 0; i < VM_MAX_MEMMAPS; i++) {
+ m = &vm->mem_maps[i];
+ if (m->len == 0) {
+ map = m;
+ break;
+ }
+ }
+
+ if (map == NULL)
+ return (ENOSPC);
+
+ error = vm_map_find(&vm->vmspace->vm_map, seg->object, first, &gpa,
+ len, 0, VMFS_NO_SPACE, prot, prot, 0);
+ if (error != KERN_SUCCESS)
+ return (EFAULT);
+
+ vm_object_reference(seg->object);
+
+ if (flags & VM_MEMMAP_F_WIRED) {
+ error = vm_map_wire(&vm->vmspace->vm_map, gpa, gpa + len,
+ VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES);
+ if (error != KERN_SUCCESS) {
+ vm_map_remove(&vm->vmspace->vm_map, gpa, gpa + len);
+ return (error == KERN_RESOURCE_SHORTAGE ? ENOMEM :
+ EFAULT);
+ }
+ }
+
+ map->gpa = gpa;
+ map->len = len;
+ map->segoff = first;
+ map->segid = segid;
+ map->prot = prot;
+ map->flags = flags;
+ return (0);
+}
+
+int
+vm_mmap_getnext(struct vm *vm, vm_paddr_t *gpa, int *segid,
+ vm_ooffset_t *segoff, size_t *len, int *prot, int *flags)
+{
+ struct mem_map *mm, *mmnext;
+ int i;
+
+ mmnext = NULL;
+ for (i = 0; i < VM_MAX_MEMMAPS; i++) {
+ mm = &vm->mem_maps[i];
+ if (mm->len == 0 || mm->gpa < *gpa)
+ continue;
+ if (mmnext == NULL || mm->gpa < mmnext->gpa)
+ mmnext = mm;
+ }
+
+ if (mmnext != NULL) {
+ *gpa = mmnext->gpa;
+ if (segid)
+ *segid = mmnext->segid;
+ if (segoff)
+ *segoff = mmnext->segoff;
+ if (len)
+ *len = mmnext->len;
+ if (prot)
+ *prot = mmnext->prot;
+ if (flags)
+ *flags = mmnext->flags;
+ return (0);
+ } else {
+ return (ENOENT);
+ }
+}
+
+static void
+vm_free_memmap(struct vm *vm, int ident)
+{
+ struct mem_map *mm;
+ int error __diagused;
+
+ mm = &vm->mem_maps[ident];
+ if (mm->len) {
+ error = vm_map_remove(&vm->vmspace->vm_map, mm->gpa,
+ mm->gpa + mm->len);
+ KASSERT(error == KERN_SUCCESS, ("%s: vm_map_remove error %d",
+ __func__, error));
+ bzero(mm, sizeof(struct mem_map));
+ }
+}
+
+static __inline bool
+sysmem_mapping(struct vm *vm, struct mem_map *mm)
+{
+
+ if (mm->len != 0 && vm->mem_segs[mm->segid].sysmem)
+ return (true);
+ else
+ return (false);
+}
+
+vm_paddr_t
+vmm_sysmem_maxaddr(struct vm *vm)
+{
+ struct mem_map *mm;
+ vm_paddr_t maxaddr;
+ int i;
+
+ maxaddr = 0;
+ for (i = 0; i < VM_MAX_MEMMAPS; i++) {
+ mm = &vm->mem_maps[i];
+ if (sysmem_mapping(vm, mm)) {
+ if (maxaddr < mm->gpa + mm->len)
+ maxaddr = mm->gpa + mm->len;
+ }
+ }
+ return (maxaddr);
+}
+
+static int
+vmm_reg_raz(void *vm, int vcpuid, uint64_t *rval, void *arg)
+{
+ *rval = 0;
+ return (0);
+}
+
+static int
+vmm_reg_read_arg(void *vm, int vcpuid, uint64_t *rval, void *arg)
+{
+ *rval = *(uint64_t *)arg;
+ return (0);
+}
+
+static int
+vmm_reg_wi(void *vm, int vcpuid, uint64_t wval, void *arg)
+{
+ return (0);
+}
+
+
+#include <sys/queue.h>
+#include <sys/linker.h>
+
+static struct {
+ uint32_t esr_iss;
+ uint32_t esr_mask;
+ reg_read_t reg_read;
+ reg_write_t reg_write;
+ void *arg;
+} vmm_special_regs[] = {
+#define SPECIAL_REG(_reg, _read, _write) \
+ { \
+ .esr_iss = ((_reg ## _op0) << ISS_MSR_OP0_SHIFT) | \
+ ((_reg ## _op1) << ISS_MSR_OP1_SHIFT) | \
+ ((_reg ## _CRn) << ISS_MSR_CRn_SHIFT) | \
+ ((_reg ## _CRm) << ISS_MSR_CRm_SHIFT) | \
+ ((_reg ## _op2) << ISS_MSR_OP2_SHIFT), \
+ .esr_mask = ISS_MSR_REG_MASK, \
+ .reg_read = (_read), \
+ .reg_write = (_write), \
+ .arg = NULL, \
+ }
+#define ID_SPECIAL_REG(_reg, _name) \
+ { \
+ .esr_iss = ((_reg ## _op0) << ISS_MSR_OP0_SHIFT) | \
+ ((_reg ## _op1) << ISS_MSR_OP1_SHIFT) | \
+ ((_reg ## _CRn) << ISS_MSR_CRn_SHIFT) | \
+ ((_reg ## _CRm) << ISS_MSR_CRm_SHIFT) | \
+ ((_reg ## _op2) << ISS_MSR_OP2_SHIFT), \
+ .esr_mask = ISS_MSR_REG_MASK, \
+ .reg_read = vmm_reg_read_arg, \
+ .reg_write = vmm_reg_wi, \
+ .arg = &(vmm_desc._name), \
+ }
+
+ /* ID registers */
+ ID_SPECIAL_REG(ID_AA64PFR0_EL1, id_aa64pfr0),
+ ID_SPECIAL_REG(ID_AA64PFR1_EL1, id_aa64pfr1),
+
+ ID_SPECIAL_REG(ID_AA64DFR0_EL1, id_aa64dfr0),
+ ID_SPECIAL_REG(ID_AA64DFR1_EL1, id_aa64dfr1),
+
+ ID_SPECIAL_REG(ID_AA64ISAR0_EL1, id_aa64isar0),
+ ID_SPECIAL_REG(ID_AA64ISAR1_EL1, id_aa64isar1),
+
+ ID_SPECIAL_REG(ID_AA64MMFR0_EL1, id_aa64mmfr0),
+ ID_SPECIAL_REG(ID_AA64MMFR1_EL1, id_aa64mmfr1),
+
+ /*
+ * All other ID registers are read as zero.
+ * They are all in the op0=3, op1=0, CRn=0, CRm={0..7} space.
+ */
+ {
+ .esr_iss = (3 << ISS_MSR_OP0_SHIFT) |
+ (0 << ISS_MSR_OP1_SHIFT) |
+ (0 << ISS_MSR_CRn_SHIFT) |
+ (0 << ISS_MSR_CRm_SHIFT),
+ .esr_mask = ISS_MSR_OP0_MASK | ISS_MSR_OP1_MASK |
+ ISS_MSR_CRn_MASK | (0x8 << ISS_MSR_CRm_SHIFT),
+ .reg_read = vmm_reg_raz,
+ .reg_write = vmm_reg_wi,
+ .arg = NULL,
+ },
+
+ /* Counter physical registers */
+ SPECIAL_REG(CNTP_CTL_EL0, vtimer_phys_ctl_read, vtimer_phys_ctl_write),
+ SPECIAL_REG(CNTP_CVAL_EL0, vtimer_phys_cval_read,
+ vtimer_phys_cval_write),
+ SPECIAL_REG(CNTP_TVAL_EL0, vtimer_phys_tval_read,
+ vtimer_phys_tval_write),
+ SPECIAL_REG(CNTPCT_EL0, vtimer_phys_cnt_read, vtimer_phys_cnt_write),
+
+ /* GICv3 registers */
+ SPECIAL_REG(ICC_SGI1R_EL1, vgic_v3_icc_sgi1r_read,
+ vgic_v3_icc_sgi1r_write),
+#undef SPECIAL_REG
+};
+
+static int
+vm_handle_reg_emul(struct vm *vm, int vcpuid, bool *retu)
+{
+ struct vm_exit *vme;
+ struct vre *vre;
+ int i, rv;
+
+ vme = vm_exitinfo(vm, vcpuid);
+ vre = &vme->u.reg_emul.vre;
+
+ for (i = 0; i < nitems(vmm_special_regs); i++) {
+ if ((vre->inst_syndrome & vmm_special_regs[i].esr_mask) ==
+ vmm_special_regs[i].esr_iss) {
+ rv = vmm_emulate_register(vm, vcpuid, vre,
+ vmm_special_regs[i].reg_read,
+ vmm_special_regs[i].reg_write,
+ vmm_special_regs[i].arg);
+ if (rv == 0) {
+ *retu = false;
+ }
+ return (rv);
+ }
+ }
+
+
+ *retu = true;
+ return (0);
+}
+
+void
+vm_register_inst_handler(struct vm *vm, uint64_t start, uint64_t size,
+ mem_region_read_t mmio_read, mem_region_write_t mmio_write)
+{
+ int i;
+
+ for (i = 0; i < nitems(vm->mmio_region); i++) {
+ if (vm->mmio_region[i].start == 0 &&
+ vm->mmio_region[i].end == 0) {
+ vm->mmio_region[i].start = start;
+ vm->mmio_region[i].end = start + size;
+ vm->mmio_region[i].read = mmio_read;
+ vm->mmio_region[i].write = mmio_write;
+ return;
+ }
+ }
+
+ panic("%s: No free MMIO region", __func__);
+}
+
+void
+vm_deregister_inst_handler(struct vm *vm, uint64_t start, uint64_t size)
+{
+ int i;
+
+ for (i = 0; i < nitems(vm->mmio_region); i++) {
+ if (vm->mmio_region[i].start == start &&
+ vm->mmio_region[i].end == start + size) {
+ memset(&vm->mmio_region[i], 0,
+ sizeof(vm->mmio_region[i]));
+ return;
+ }
+ }
+
+ panic("%s: Invalid MMIO region: %lx - %lx", __func__, start,
+ start + size);
+}
+
+static int
+vm_handle_inst_emul(struct vm *vm, int vcpuid, bool *retu)
+{
+ struct vm_exit *vme;
+ struct vie *vie;
+ struct hyp *hyp = vm->cookie;
+ uint64_t fault_ipa;
+ struct vm_guest_paging *paging;
+ struct vmm_mmio_region *vmr;
+ int error, i;
+
+ if (!hyp->vgic_attached)
+ goto out_user;
+
+ vme = vm_exitinfo(vm, vcpuid);
+ vie = &vme->u.inst_emul.vie;
+ paging = &vme->u.inst_emul.paging;
+
+ fault_ipa = vme->u.inst_emul.gpa;
+
+ vmr = NULL;
+ for (i = 0; i < nitems(vm->mmio_region); i++) {
+ if (vm->mmio_region[i].start <= fault_ipa &&
+ vm->mmio_region[i].end > fault_ipa) {
+ vmr = &vm->mmio_region[i];
+ break;
+ }
+ }
+ if (vmr == NULL)
+ goto out_user;
+
+ error = vmm_emulate_instruction(vm, vcpuid, fault_ipa, vie,
+ paging, vmr->read, vmr->write, retu);
+ return (error);
+
+out_user:
+ *retu = true;
+ return (0);
+}
+
+int
+vm_suspend(struct vm *vm, enum vm_suspend_how how)
+{
+ int i;
+
+ if (how <= VM_SUSPEND_NONE || how >= VM_SUSPEND_LAST)
+ return (EINVAL);
+
+ if (atomic_cmpset_int(&vm->suspend, 0, how) == 0) {
+ VM_CTR2(vm, "virtual machine already suspended %d/%d",
+ vm->suspend, how);
+ return (EALREADY);
+ }
+
+ VM_CTR1(vm, "virtual machine successfully suspended %d", how);
+
+ /*
+ * Notify all active vcpus that they are now suspended.
+ */
+ for (i = 0; i < vm->maxcpus; i++) {
+ if (CPU_ISSET(i, &vm->active_cpus))
+ vcpu_notify_event(vm, i, false);
+ }
+
+ return (0);
+}
+
+void
+vm_exit_suspended(struct vm *vm, int vcpuid, uint64_t pc)
+{
+ struct vm_exit *vmexit;
+
+ KASSERT(vm->suspend > VM_SUSPEND_NONE && vm->suspend < VM_SUSPEND_LAST,
+ ("vm_exit_suspended: invalid suspend type %d", vm->suspend));
+
+ vmexit = vm_exitinfo(vm, vcpuid);
+ vmexit->pc = pc;
+ vmexit->inst_length = 4;
+ vmexit->exitcode = VM_EXITCODE_SUSPENDED;
+ vmexit->u.suspended.how = vm->suspend;
+}
+
+int
+vm_activate_cpu(struct vm *vm, int vcpuid)
+{
+
+ if (vcpuid < 0 || vcpuid >= vm->maxcpus)
+ return (EINVAL);
+
+ if (CPU_ISSET(vcpuid, &vm->active_cpus))
+ return (EBUSY);
+
+ CPU_SET_ATOMIC(vcpuid, &vm->active_cpus);
+ return (0);
+
+}
+
+int
+vm_suspend_cpu(struct vm *vm, int vcpuid)
+{
+ int i;
+
+ if (vcpuid < -1 || vcpuid >= vm->maxcpus)
+ return (EINVAL);
+
+ if (vcpuid == -1) {
+ vm->debug_cpus = vm->active_cpus;
+ for (i = 0; i < vm->maxcpus; i++) {
+ if (CPU_ISSET(i, &vm->active_cpus))
+ vcpu_notify_event(vm, i, false);
+ }
+ } else {
+ if (!CPU_ISSET(vcpuid, &vm->active_cpus))
+ return (EINVAL);
+
+ CPU_SET_ATOMIC(vcpuid, &vm->debug_cpus);
+ vcpu_notify_event(vm, vcpuid, false);
+ }
+ return (0);
+}
+
+int
+vm_resume_cpu(struct vm *vm, int vcpuid)
+{
+
+ if (vcpuid < -1 || vcpuid >= vm->maxcpus)
+ return (EINVAL);
+
+ if (vcpuid == -1) {
+ CPU_ZERO(&vm->debug_cpus);
+ } else {
+ if (!CPU_ISSET(vcpuid, &vm->debug_cpus))
+ return (EINVAL);
+
+ CPU_CLR_ATOMIC(vcpuid, &vm->debug_cpus);
+ }
+ return (0);
+}
+
+
+cpuset_t
+vm_active_cpus(struct vm *vm)
+{
+
+ return (vm->active_cpus);
+}
+
+cpuset_t
+vm_debug_cpus(struct vm *vm)
+{
+
+ return (vm->debug_cpus);
+}
+
+cpuset_t
+vm_suspended_cpus(struct vm *vm)
+{
+
+ return (vm->suspended_cpus);
+}
+
+
+void *
+vcpu_stats(struct vm *vm, int vcpuid)
+{
+
+ return (vm->vcpu[vcpuid].stats);
+}
+
+/*
+ * This function is called to ensure that a vcpu "sees" a pending event
+ * as soon as possible:
+ * - If the vcpu thread is sleeping then it is woken up.
+ * - If the vcpu is running on a different host_cpu then an IPI will be directed
+ * to the host_cpu to cause the vcpu to trap into the hypervisor.
+ */
+static void
+vcpu_notify_event_locked(struct vcpu *vcpu, bool lapic_intr)
+{
+ int hostcpu;
+
+ KASSERT(lapic_intr == false, ("%s: lapic_intr != false", __func__));
+ hostcpu = vcpu->hostcpu;
+ if (vcpu->state == VCPU_RUNNING) {
+ KASSERT(hostcpu != NOCPU, ("vcpu running on invalid hostcpu"));
+ if (hostcpu != curcpu) {
+#if 0
+ if (lapic_intr) {
+ vlapic_post_intr(vcpu->vlapic, hostcpu,
+ vmm_ipinum);
+ } else
+#endif
+ {
+ ipi_cpu(hostcpu, vmm_ipinum);
+ }
+ } else {
+ /*
+ * If the 'vcpu' is running on 'curcpu' then it must
+ * be sending a notification to itself (e.g. SELF_IPI).
+ * The pending event will be picked up when the vcpu
+ * transitions back to guest context.
+ */
+ }
+ } else {
+ KASSERT(hostcpu == NOCPU, ("vcpu state %d not consistent "
+ "with hostcpu %d", vcpu->state, hostcpu));
+ if (vcpu->state == VCPU_SLEEPING)
+ wakeup_one(vcpu);
+ }
+}
+
+void
+vcpu_notify_event(struct vm *vm, int vcpuid, bool lapic_intr)
+{
+ struct vcpu *vcpu = &vm->vcpu[vcpuid];
+
+ vcpu_lock(vcpu);
+ vcpu_notify_event_locked(vcpu, lapic_intr);
+ vcpu_unlock(vcpu);
+}
+
+static int
+vcpu_set_state_locked(struct vm *vm, int vcpuid, enum vcpu_state newstate,
+ bool from_idle)
+{
+ struct vcpu *vcpu;
+ int error;
+
+ vcpu = &vm->vcpu[vcpuid];
+ vcpu_assert_locked(vcpu);
+
+ /*
+ * State transitions from the vmmdev_ioctl() must always begin from
+ * the VCPU_IDLE state. This guarantees that there is only a single
+ * ioctl() operating on a vcpu at any point.
+ */
+ if (from_idle) {
+ while (vcpu->state != VCPU_IDLE) {
+ vcpu_notify_event_locked(vcpu, false);
+ msleep_spin(&vcpu->state, &vcpu->mtx, "vmstat", hz);
+ }
+ } else {
+ KASSERT(vcpu->state != VCPU_IDLE, ("invalid transition from "
+ "vcpu idle state"));
+ }
+
+ if (vcpu->state == VCPU_RUNNING) {
+ KASSERT(vcpu->hostcpu == curcpu, ("curcpu %d and hostcpu %d "
+ "mismatch for running vcpu", curcpu, vcpu->hostcpu));
+ } else {
+ KASSERT(vcpu->hostcpu == NOCPU, ("Invalid hostcpu %d for a "
+ "vcpu that is not running", vcpu->hostcpu));
+ }
+
+ /*
+ * The following state transitions are allowed:
+ * IDLE -> FROZEN -> IDLE
+ * FROZEN -> RUNNING -> FROZEN
+ * FROZEN -> SLEEPING -> FROZEN
+ */
+ switch (vcpu->state) {
+ case VCPU_IDLE:
+ case VCPU_RUNNING:
+ case VCPU_SLEEPING:
+ error = (newstate != VCPU_FROZEN);
+ break;
+ case VCPU_FROZEN:
+ error = (newstate == VCPU_FROZEN);
+ break;
+ default:
+ error = 1;
+ break;
+ }
+
+ if (error)
+ return (EBUSY);
+
+ vcpu->state = newstate;
+ if (newstate == VCPU_RUNNING)
+ vcpu->hostcpu = curcpu;
+ else
+ vcpu->hostcpu = NOCPU;
+
+ if (newstate == VCPU_IDLE)
+ wakeup(&vcpu->state);
+
+ return (0);
+}
+
+static void
+vcpu_require_state(struct vm *vm, int vcpuid, enum vcpu_state newstate)
+{
+ int error;
+
+ if ((error = vcpu_set_state(vm, vcpuid, newstate, false)) != 0)
+ panic("Error %d setting state to %d\n", error, newstate);
+}
+
+static void
+vcpu_require_state_locked(struct vm *vm, int vcpuid, enum vcpu_state newstate)
+{
+ int error;
+
+ if ((error = vcpu_set_state_locked(vm, vcpuid, newstate, false)) != 0)
+ panic("Error %d setting state to %d", error, newstate);
+}
+
+int
+vm_get_capability(struct vm *vm, int vcpu, int type, int *retval)
+{
+ if (vcpu < 0 || vcpu >= vm->maxcpus)
+ return (EINVAL);
+
+ if (type < 0 || type >= VM_CAP_MAX)
+ return (EINVAL);
+
+ return (VMGETCAP(vm->cookie, vcpu, type, retval));
+}
+
+int
+vm_set_capability(struct vm *vm, int vcpu, int type, int val)
+{
+ if (vcpu < 0 || vcpu >= vm->maxcpus)
+ return (EINVAL);
+
+ if (type < 0 || type >= VM_CAP_MAX)
+ return (EINVAL);
+
+ return (VMSETCAP(vm->cookie, vcpu, type, val));
+}
+
+int
+vcpu_set_state(struct vm *vm, int vcpuid, enum vcpu_state newstate,
+ bool from_idle)
+{
+ int error;
+ struct vcpu *vcpu;
+
+ if (vcpuid < 0 || vcpuid >= vm->maxcpus)
+ panic("vm_set_run_state: invalid vcpuid %d", vcpuid);
+
+ vcpu = &vm->vcpu[vcpuid];
+
+ vcpu_lock(vcpu);
+ error = vcpu_set_state_locked(vm, vcpuid, newstate, from_idle);
+ vcpu_unlock(vcpu);
+
+ return (error);
+}
+
+enum vcpu_state
+vcpu_get_state(struct vm *vm, int vcpuid, int *hostcpu)
+{
+ struct vcpu *vcpu;
+ enum vcpu_state state;
+
+ if (vcpuid < 0 || vcpuid >= vm->maxcpus)
+ panic("vm_get_run_state: invalid vcpuid %d", vcpuid);
+
+ vcpu = &vm->vcpu[vcpuid];
+
+ vcpu_lock(vcpu);
+ state = vcpu->state;
+ if (hostcpu != NULL)
+ *hostcpu = vcpu->hostcpu;
+ vcpu_unlock(vcpu);
+
+ return (state);
+}
+
+void *
+vm_gpa_hold(struct vm *vm, int vcpuid, vm_paddr_t gpa, size_t len, int reqprot,
+ void **cookie)
+{
+ int i, count, pageoff;
+ struct mem_map *mm;
+ vm_page_t m;
+#ifdef INVARIANTS
+ /*
+ * All vcpus are frozen by ioctls that modify the memory map
+ * (e.g. VM_MMAP_MEMSEG). Therefore 'vm->memmap[]' stability is
+ * guaranteed if at least one vcpu is in the VCPU_FROZEN state.
+ */
+ int state;
+ KASSERT(vcpuid >= -1 && vcpuid < vm->maxcpus, ("%s: invalid vcpuid %d",
+ __func__, vcpuid));
+ for (i = 0; i < vm->maxcpus; i++) {
+ if (vcpuid != -1 && vcpuid != i)
+ continue;
+ state = vcpu_get_state(vm, i, NULL);
+ KASSERT(state == VCPU_FROZEN, ("%s: invalid vcpu state %d",
+ __func__, state));
+ }
+#endif
+ pageoff = gpa & PAGE_MASK;
+ if (len > PAGE_SIZE - pageoff)
+ panic("vm_gpa_hold: invalid gpa/len: 0x%016lx/%lu", gpa, len);
+
+ count = 0;
+ for (i = 0; i < VM_MAX_MEMMAPS; i++) {
+ mm = &vm->mem_maps[i];
+ if (sysmem_mapping(vm, mm) && gpa >= mm->gpa &&
+ gpa < mm->gpa + mm->len) {
+ count = vm_fault_quick_hold_pages(&vm->vmspace->vm_map,
+ trunc_page(gpa), PAGE_SIZE, reqprot, &m, 1);
+ break;
+ }
+ }
+
+ if (count == 1) {
+ *cookie = m;
+ return ((void *)(PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)) + pageoff));
+ } else {
+ *cookie = NULL;
+ return (NULL);
+ }
+}
+
+void
+vm_gpa_release(void *cookie)
+{
+ vm_page_t m = cookie;
+
+ vm_page_unwire(m, PQ_ACTIVE);
+}
+
+int
+vm_get_register(struct vm *vm, int vcpu, int reg, uint64_t *retval)
+{
+
+ if (vcpu < 0 || vcpu >= vm->maxcpus)
+ return (EINVAL);
+
+ if (reg >= VM_REG_LAST)
+ return (EINVAL);
+
+ return (VMGETREG(vm->cookie, vcpu, reg, retval));
+}
+
+int
+vm_set_register(struct vm *vm, int vcpuid, int reg, uint64_t val)
+{
+ struct vcpu *vcpu;
+ int error;
+
+ if (vcpuid < 0 || vcpuid >= vm->maxcpus)
+ return (EINVAL);
+
+ if (reg >= VM_REG_LAST)
+ return (EINVAL);
+ error = VMSETREG(vm->cookie, vcpuid, reg, val);
+ if (error || reg != VM_REG_ELR_EL2)
+ return (error);
+
+ vcpu = &vm->vcpu[vcpuid];
+ vcpu->nextpc = val;
+
+ return(0);
+}
+
+void *
+vm_get_cookie(struct vm *vm)
+{
+ return vm->cookie;
+}
+
+int
+vm_attach_vgic(struct vm *vm, uint64_t dist_start, size_t dist_size,
+ uint64_t redist_start, size_t redist_size)
+{
+ int error;
+
+ error = vgic_v3_attach_to_vm(vm, dist_start, dist_size, redist_start,
+ redist_size);
+
+ return (error);
+}
+
+int
+vm_assert_irq(struct vm *vm, uint32_t irq)
+{
+ struct hyp *hyp = (struct hyp *)vm->cookie;
+ int error;
+
+ error = vgic_v3_inject_irq(hyp, -1, irq, true);
+
+ return (error);
+}
+
+int
+vm_deassert_irq(struct vm *vm, uint32_t irq)
+{
+ struct hyp *hyp = (struct hyp *)vm->cookie;
+ int error;
+
+ error = vgic_v3_inject_irq(hyp, -1, irq, false);
+
+ return (error);
+}
+
+int
+vm_raise_msi(struct vm *vm, uint64_t msg, uint64_t addr, int bus, int slot,
+ int func)
+{
+ struct hyp *hyp = (struct hyp *)vm->cookie;
+ int error;
+
+ if (addr >= hyp->vgic_dist.start && addr < hyp->vgic_dist.end) {
+ error = vgic_v3_inject_msi(hyp, msg, addr);
+ if (error == 0)
+ return (0);
+ }
+
+ /* TODO: Should we raise an SError? */
+ return (EINVAL);
+}
+
+static int
+vm_handle_wfi(struct vm *vm, int vcpuid, struct vm_exit *vme, bool *retu)
+{
+ struct hyp *hyp;
+ struct vcpu *vcpu;
+ struct hypctx *hypctx;
+
+ vcpu = &vm->vcpu[vcpuid];
+ hyp = vm->cookie;
+ hypctx = &hyp->ctx[vcpuid];
+
+ vcpu_lock(vcpu);
+ while (1) {
+ if (vgic_v3_vcpu_pending_irq(hypctx))
+ break;
+
+ if (vcpu_should_yield(vm, vcpuid))
+ break;
+
+ vcpu_require_state_locked(vm, vcpuid, VCPU_SLEEPING);
+ /*
+ * XXX msleep_spin() cannot be interrupted by signals so
+ * wake up periodically to check pending signals.
+ */
+ msleep_spin(vcpu, &vcpu->mtx, "vmidle", hz);
+ vcpu_require_state_locked(vm, vcpuid, VCPU_FROZEN);
+ }
+ vcpu_unlock(vcpu);
+
+ *retu = false;
+ return (0);
+}
+
+static int
+vm_handle_paging(struct vm *vm, int vcpuid, bool *retu)
+{
+ struct vm_exit *vme;
+ struct vm_map *map;
+ uint64_t addr, esr;
+ pmap_t pmap;
+ int ftype, rv;
+
+ vme = vm_exitinfo(vm, vcpuid);
+ pmap = vmspace_pmap(vm->vmspace);
+ addr = vme->u.paging.gpa;
+ esr = vme->u.paging.esr;
+
+ /* The page exists, but the page table needs to be upddated */
+ if (pmap_fault(pmap, esr, addr) == KERN_SUCCESS)
+ return (0);
+
+ switch (ESR_ELx_EXCEPTION(esr)) {
+ case EXCP_INSN_ABORT_L:
+ case EXCP_DATA_ABORT_L:
+ ftype = VM_PROT_EXECUTE | VM_PROT_READ | VM_PROT_WRITE;
+ break;
+ default:
+ panic("%s: Invalid exception (esr = %lx)", __func__, esr);
+ }
+
+ map = &vm->vmspace->vm_map;
+ rv = vm_fault(map, vme->u.paging.gpa, ftype, VM_FAULT_NORMAL, NULL);
+ if (rv != KERN_SUCCESS)
+ return (EFAULT);
+
+ return (0);
+}
+
+int
+vm_run(struct vm *vm, struct vm_run *vmrun)
+{
+ struct vm_eventinfo evinfo;
+ int error, vcpuid;
+ struct vcpu *vcpu;
+ struct vm_exit *vme;
+ bool retu;
+ pmap_t pmap;
+
+ vcpuid = vmrun->cpuid;
+
+ if (vcpuid < 0 || vcpuid >= vm->maxcpus)
+ return (EINVAL);
+
+ if (!CPU_ISSET(vcpuid, &vm->active_cpus))
+ return (EINVAL);
+
+ if (CPU_ISSET(vcpuid, &vm->suspended_cpus))
+ return (EINVAL);
+
+ pmap = vmspace_pmap(vm->vmspace);
+ vcpu = &vm->vcpu[vcpuid];
+ evinfo.rptr = NULL;
+ evinfo.sptr = &vm->suspend;
+ evinfo.iptr = NULL;
+restart:
+ critical_enter();
+ vcpu_require_state(vm, vcpuid, VCPU_RUNNING);
+ error = VMRUN(vm->cookie, vcpuid, vcpu->nextpc, pmap, &evinfo);
+ vcpu_require_state(vm, vcpuid, VCPU_FROZEN);
+ critical_exit();
+
+ vme = vm_exitinfo(vm, vcpuid);
+ if (error == 0) {
+ retu = false;
+ switch (vme->exitcode) {
+ case VM_EXITCODE_INST_EMUL:
+ vcpu->nextpc = vme->pc + vme->inst_length;
+ error = vm_handle_inst_emul(vm, vcpuid, &retu);
+ break;
+
+ case VM_EXITCODE_REG_EMUL:
+ vcpu->nextpc = vme->pc + vme->inst_length;
+ error = vm_handle_reg_emul(vm, vcpuid, &retu);
+ break;
+
+ case VM_EXITCODE_HVC:
+ /*
+ * The HVC instruction saves the address for the
+ * next instruction as the return address.
+ */
+ vcpu->nextpc = vme->pc;
+ /*
+ * The PSCI call can change the exit information in the
+ * case of suspend/reset/poweroff/cpu off/cpu on.
+ */
+ error = psci_handle_call(vm, vcpuid, vme, &retu);
+ break;
+
+ case VM_EXITCODE_WFI:
+ vcpu->nextpc = vme->pc + vme->inst_length;
+ error = vm_handle_wfi(vm, vcpuid, vme, &retu);
+ break;
+
+ case VM_EXITCODE_PAGING:
+ vcpu->nextpc = vme->pc;
+ error = vm_handle_paging(vm, vcpuid, &retu);
+ break;
+
+ default:
+ /* Handle in userland */
+ vcpu->nextpc = vme->pc;
+ retu = true;
+ break;
+ }
+ }
+
+ if (error == 0 && retu == false)
+ goto restart;
+
+ /* Copy the exit information */
+ bcopy(vme, &vmrun->vm_exit, sizeof(struct vm_exit));
+
+ return (error);
+}
diff --git a/sys/arm64/vmm/vmm_arm64.c b/sys/arm64/vmm/vmm_arm64.c
new file mode 100644
--- /dev/null
+++ b/sys/arm64/vmm/vmm_arm64.c
@@ -0,0 +1,1076 @@
+/*
+ * Copyright (C) 2015 Mihai Carabas <mihai.carabas@gmail.com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/smp.h>
+#include <sys/kernel.h>
+#include <sys/malloc.h>
+#include <sys/pcpu.h>
+#include <sys/proc.h>
+#include <sys/sysctl.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
+#include <sys/vmem.h>
+
+#include <vm/vm.h>
+#include <vm/pmap.h>
+#include <vm/vm_extern.h>
+#include <vm/vm_map.h>
+#include <vm/vm_page.h>
+#include <vm/vm_param.h>
+
+#include <machine/armreg.h>
+#include <machine/vm.h>
+#include <machine/cpufunc.h>
+#include <machine/cpu.h>
+#include <machine/machdep.h>
+#include <machine/vmm.h>
+#include <machine/vmm_dev.h>
+#include <machine/atomic.h>
+#include <machine/hypervisor.h>
+#include <machine/pmap.h>
+
+#include "mmu.h"
+#include "arm64.h"
+#include "hyp.h"
+#include "reset.h"
+#include "io/vgic_v3.h"
+#include "io/vtimer.h"
+
+#define HANDLED 1
+#define UNHANDLED 0
+
+#define UNUSED 0
+
+/* Number of bits in an EL2 virtual address */
+#define EL2_VIRT_BITS 48
+CTASSERT((1ul << EL2_VIRT_BITS) >= HYP_VM_MAX_ADDRESS);
+
+/* TODO: Move the host hypctx off the stack */
+#define VMM_STACK_PAGES 4
+#define VMM_STACK_SIZE (VMM_STACK_PAGES * PAGE_SIZE)
+
+static int vmm_pmap_levels, vmm_virt_bits;
+
+/* Register values passed to arm_setup_vectors to set in the hypervisor */
+struct vmm_init_regs {
+ uint64_t tcr_el2;
+ uint64_t vtcr_el2;
+};
+
+MALLOC_DEFINE(M_HYP, "ARM VMM HYP", "ARM VMM HYP");
+
+extern char hyp_init_vectors[];
+extern char hyp_vectors[];
+extern char hyp_stub_vectors[];
+
+static vm_paddr_t hyp_code_base;
+static size_t hyp_code_len;
+
+static char *stack[MAXCPU];
+static vm_offset_t stack_hyp_va[MAXCPU];
+
+static vmem_t *el2_mem_alloc;
+
+static void arm_setup_vectors(void *arg);
+static void vmm_pmap_clean_stage2_tlbi(void);
+static void vmm_pmap_invalidate_range(uint64_t, vm_offset_t, vm_offset_t, bool);
+static void vmm_pmap_invalidate_all(uint64_t);
+
+static inline void
+arm64_set_active_vcpu(struct hypctx *hypctx)
+{
+
+ PCPU_SET(vcpu, hypctx);
+}
+
+static void
+arm_setup_vectors(void *arg)
+{
+ struct vmm_init_regs *el2_regs;
+ char *stack_top;
+ uint32_t sctlr_el2;
+ register_t daif;
+
+ el2_regs = arg;
+ arm64_set_active_vcpu(NULL);
+
+ daif = intr_disable();
+
+ /*
+ * Install the temporary vectors which will be responsible for
+ * initializing the VMM when we next trap into EL2.
+ *
+ * x0: the exception vector table responsible for hypervisor
+ * initialization on the next call.
+ */
+ vmm_call_hyp(vtophys(&vmm_hyp_code));
+
+ /* Create and map the hypervisor stack */
+ stack_top = (char *)stack_hyp_va[PCPU_GET(cpuid)] + VMM_STACK_SIZE;
+
+ /*
+ * Configure the system control register for EL2:
+ *
+ * SCTLR_EL2_M: MMU on
+ * SCTLR_EL2_C: Data cacheability not affected
+ * SCTLR_EL2_I: Instruction cacheability not affected
+ * SCTLR_EL2_A: Instruction alignment check
+ * SCTLR_EL2_SA: Stack pointer alignment check
+ * SCTLR_EL2_WXN: Treat writable memory as execute never
+ * ~SCTLR_EL2_EE: Data accesses are little-endian
+ */
+ sctlr_el2 = SCTLR_EL2_RES1;
+ sctlr_el2 |= SCTLR_EL2_M | SCTLR_EL2_C | SCTLR_EL2_I;
+ sctlr_el2 |= SCTLR_EL2_A | SCTLR_EL2_SA;
+ sctlr_el2 |= SCTLR_EL2_WXN;
+ sctlr_el2 &= ~SCTLR_EL2_EE;
+
+ /* Special call to initialize EL2 */
+ vmm_call_hyp(vmmpmap_to_ttbr0(), stack_top, el2_regs->tcr_el2,
+ sctlr_el2, el2_regs->vtcr_el2);
+
+ intr_restore(daif);
+}
+
+static void
+arm_teardown_vectors(void *arg)
+{
+ register_t daif;
+
+ /*
+ * vmm_cleanup() will disable the MMU. For the next few instructions,
+ * before the hardware disables the MMU, one of the following is
+ * possible:
+ *
+ * a. The instruction addresses are fetched with the MMU disabled,
+ * and they must represent the actual physical addresses. This will work
+ * because we call the vmm_cleanup() function by its physical address.
+ *
+ * b. The instruction addresses are fetched using the old translation
+ * tables. This will work because we have an identity mapping in place
+ * in the translation tables and vmm_cleanup() is called by its physical
+ * address.
+ */
+ daif = intr_disable();
+ /* TODO: Invalidate the cache */
+ vmm_call_hyp(HYP_CLEANUP, vtophys(hyp_stub_vectors));
+ intr_restore(daif);
+
+ arm64_set_active_vcpu(NULL);
+}
+
+static uint64_t
+vmm_vtcr_el2_sl(u_int levels)
+{
+#if PAGE_SIZE == PAGE_SIZE_4K
+ switch(levels) {
+ case 2:
+ return (VTCR_EL2_SL0_4K_LVL2);
+ case 3:
+ return (VTCR_EL2_SL0_4K_LVL1);
+ case 4:
+ return (VTCR_EL2_SL0_4K_LVL0);
+ default:
+ panic("%s: Invalid number of page table levels %u", __func__,
+ levels);
+ }
+#elif PAGE_SIZE == PAGE_SIZE_16K
+ switch(levels) {
+ case 2:
+ return (VTCR_EL2_SL0_16K_LVL2);
+ case 3:
+ return (VTCR_EL2_SL0_16K_LVL1);
+ case 4:
+ return (VTCR_EL2_SL0_16K_LVL0);
+ default:
+ panic("%s: Invalid number of page table levels %u", __func__,
+ levels);
+ }
+#else
+#error Unsupported page size
+#endif
+}
+
+static int
+arm_init(int ipinum)
+{
+ struct vmm_init_regs el2_regs;
+ vm_offset_t next_hyp_va;
+ vm_paddr_t vmm_base;
+ uint64_t id_aa64mmfr0_el1, pa_range_bits, pa_range_field;
+ uint64_t ich_vtr_el2;
+ uint64_t cnthctl_el2;
+ register_t daif;
+ int cpu, i;
+ bool rv __diagused;
+
+ if (!virt_enabled()) {
+ printf("arm_init: Processor doesn't have support for virtualization.\n");
+ return (ENXIO);
+ }
+
+ if (!vgic_present()) {
+ printf("arm_init: No GICv3 found\n");
+ return (ENODEV);
+ }
+
+ if (!get_kernel_reg(ID_AA64MMFR0_EL1, &id_aa64mmfr0_el1)) {
+ printf("arm_init: Unable to read ID_AA64MMFR0_EL1\n");
+ return (ENXIO);
+ }
+ pa_range_field = ID_AA64MMFR0_PARange_VAL(id_aa64mmfr0_el1);
+ /*
+ * Use 3 levels to give us up to 39 bits with 4k pages, or
+ * 47 bits with 16k pages.
+ */
+ /* TODO: Check the number of levels for 64k pages */
+ vmm_pmap_levels = 3;
+ switch (pa_range_field) {
+ case ID_AA64MMFR0_PARange_4G:
+ printf("arm_init: Not enough physical address bits\n");
+ return (ENXIO);
+ case ID_AA64MMFR0_PARange_64G:
+ vmm_virt_bits = 36;
+#if PAGE_SIZE == PAGE_SIZE_16K
+ /* TODO: Test */
+ vmm_pmap_levels = 2;
+#endif
+ break;
+ default:
+ vmm_virt_bits = 39;
+ break;
+ }
+ pa_range_bits = pa_range_field >> ID_AA64MMFR0_PARange_SHIFT;
+
+ /* Initialise the EL2 MMU */
+ if (!vmmpmap_init()) {
+ printf("arm_init: Failed to init the EL2 MMU\n");
+ return (ENOMEM);
+ }
+
+ /* Set up the stage 2 pmap callbacks */
+ MPASS(pmap_clean_stage2_tlbi == NULL);
+ pmap_clean_stage2_tlbi = vmm_pmap_clean_stage2_tlbi;
+ pmap_stage2_invalidate_range = vmm_pmap_invalidate_range;
+ pmap_stage2_invalidate_all = vmm_pmap_invalidate_all;
+
+ /* Create the vmem allocator */
+ el2_mem_alloc = vmem_create("VMM EL2", 0, 0, PAGE_SIZE, 0, M_WAITOK);
+
+ /* Create the mappings for the hypervisor translation table. */
+ hyp_code_len = roundup2(&vmm_hyp_code_end - &vmm_hyp_code, PAGE_SIZE);
+
+ /* We need an physical identity mapping for when we activate the MMU */
+ hyp_code_base = vmm_base = vtophys(&vmm_hyp_code);
+ rv = vmmpmap_enter(vmm_base, hyp_code_len, vtophys(&vmm_hyp_code),
+ VM_PROT_READ | VM_PROT_EXECUTE);
+ MPASS(rv);
+
+ next_hyp_va = roundup2(vtophys(&vmm_hyp_code) + hyp_code_len, L2_SIZE);
+
+ /* Create a per-CPU hypervisor stack */
+ CPU_FOREACH(cpu) {
+ stack[cpu] = malloc(VMM_STACK_SIZE, M_HYP, M_WAITOK | M_ZERO);
+ stack_hyp_va[cpu] = next_hyp_va;
+
+ for (i = 0; i < VMM_STACK_PAGES; i++) {
+ rv = vmmpmap_enter(stack_hyp_va[cpu] + (i * PAGE_SIZE),
+ PAGE_SIZE, vtophys(stack[cpu] + (i * PAGE_SIZE)),
+ VM_PROT_READ | VM_PROT_WRITE);
+ MPASS(rv);
+ }
+ next_hyp_va += L2_SIZE;
+ }
+
+ el2_regs.tcr_el2 = TCR_EL2_RES1;
+ el2_regs.tcr_el2 |= min(pa_range_bits << TCR_EL2_PS_SHIFT,
+ TCR_EL2_PS_52BITS);
+ el2_regs.tcr_el2 |= TCR_EL2_T0SZ(64 - EL2_VIRT_BITS);
+ el2_regs.tcr_el2 |= TCR_EL2_IRGN0_WBWA | TCR_EL2_ORGN0_WBWA;
+#if PAGE_SIZE == PAGE_SIZE_4K
+ el2_regs.tcr_el2 |= TCR_EL2_TG0_4K;
+#elif PAGE_SIZE == PAGE_SIZE_16K
+ el2_regs.tcr_el2 |= TCR_EL2_TG0_16K;
+#else
+#error Unsupported page size
+#endif
+#ifdef SMP
+ el2_regs.tcr_el2 |= TCR_EL2_SH0_IS;
+#endif
+
+ /*
+ * Configure the Stage 2 translation control register:
+ *
+ * VTCR_IRGN0_WBWA: Translation table walks access inner cacheable
+ * normal memory
+ * VTCR_ORGN0_WBWA: Translation table walks access outer cacheable
+ * normal memory
+ * VTCR_EL2_TG0_4K/16K: Stage 2 uses the same page size as the kernel
+ * VTCR_EL2_SL0_4K_LVL1: Stage 2 uses concatenated level 1 tables
+ * VTCR_EL2_SH0_IS: Memory associated with Stage 2 walks is inner
+ * shareable
+ */
+ el2_regs.vtcr_el2 = VTCR_EL2_RES1;
+ el2_regs.vtcr_el2 |=
+ min(pa_range_bits << VTCR_EL2_PS_SHIFT, VTCR_EL2_PS_48BIT);
+ el2_regs.vtcr_el2 |= VTCR_EL2_IRGN0_WBWA | VTCR_EL2_ORGN0_WBWA;
+ el2_regs.vtcr_el2 |= VTCR_EL2_T0SZ(64 - vmm_virt_bits);
+ el2_regs.vtcr_el2 |= vmm_vtcr_el2_sl(vmm_pmap_levels);
+#if PAGE_SIZE == PAGE_SIZE_4K
+ el2_regs.vtcr_el2 |= VTCR_EL2_TG0_4K;
+#elif PAGE_SIZE == PAGE_SIZE_16K
+ el2_regs.vtcr_el2 |= VTCR_EL2_TG0_16K;
+#else
+#error Unsupported page size
+#endif
+#ifdef SMP
+ el2_regs.vtcr_el2 |= VTCR_EL2_SH0_IS;
+#endif
+
+ smp_rendezvous(NULL, arm_setup_vectors, NULL, &el2_regs);
+
+ /* Add memory to the vmem allocator (checking there is space) */
+ if (vmm_base > L2_SIZE) {
+ /*
+ * Ensure there is an L2 block before the vmm code to check
+ * for buffer overflows on earlier data. Include the PAGE_SIZE
+ * of the minimum we can allocate.
+ */
+ vmm_base -= L2_SIZE + PAGE_SIZE;
+ vmm_base = rounddown2(vmm_base, L2_SIZE);
+
+ /*
+ * Check there is memory before the vmm code to add.
+ *
+ * Reserve the L2 block at address 0 so NULL dereference will
+ * raise an exception
+ */
+ if (vmm_base > L2_SIZE)
+ vmem_add(el2_mem_alloc, L2_SIZE, next_hyp_va - L2_SIZE,
+ M_WAITOK);
+ }
+
+ /*
+ * Add the memory after the stacks. There is most of an L2 block
+ * between the last stack and the first allocation so this should
+ * be safe without adding more padding.
+ */
+ if (next_hyp_va < HYP_VM_MAX_ADDRESS - PAGE_SIZE)
+ vmem_add(el2_mem_alloc, next_hyp_va,
+ HYP_VM_MAX_ADDRESS - next_hyp_va, M_WAITOK);
+
+
+ daif = intr_disable();
+ ich_vtr_el2 = vmm_call_hyp(HYP_READ_REGISTER, HYP_REG_ICH_VTR);
+ cnthctl_el2 = vmm_call_hyp(HYP_READ_REGISTER, HYP_REG_CNTHCTL);
+ intr_restore(daif);
+
+ vgic_v3_init(ich_vtr_el2);
+ vtimer_init(cnthctl_el2);
+
+ return (0);
+}
+
+static int
+arm_cleanup(void)
+{
+ int cpu;
+
+ smp_rendezvous(NULL, arm_teardown_vectors, NULL, NULL);
+
+#ifdef INVARIANTS
+ CPU_FOREACH(cpu) {
+ vmmpmap_remove(stack_hyp_va[cpu], VMM_STACK_PAGES * PAGE_SIZE,
+ false);
+ }
+
+ vmmpmap_remove(hyp_code_base, hyp_code_len, false);
+#endif
+
+ vtimer_cleanup();
+
+ vmmpmap_fini();
+ for (cpu = 0; cpu < nitems(stack); cpu++)
+ free(stack[cpu], M_HYP);
+
+ pmap_clean_stage2_tlbi = NULL;
+
+ return (0);
+}
+
+static void *
+arm_vminit(struct vm *vm, pmap_t pmap)
+{
+ struct hyp *hyp;
+ struct hypctx *hypctx;
+ vmem_addr_t vm_addr;
+ vm_size_t size;
+ bool last_vcpu, rv __diagused;
+ int err __diagused, i, maxcpus;
+
+ /* Ensure this is the only data on the page */
+ size = roundup2(sizeof(struct hyp), PAGE_SIZE);
+ hyp = malloc(size, M_HYP, M_WAITOK | M_ZERO);
+ MPASS(((vm_offset_t)hyp & PAGE_MASK) == 0);
+
+ hyp->vm = vm;
+ hyp->vgic_attached = false;
+
+ maxcpus = vm_get_maxcpus(vm);
+ for (i = 0; i < maxcpus; i++) {
+ hypctx = &hyp->ctx[i];
+ hypctx->vcpu = i;
+ hypctx->hyp = hyp;
+
+ reset_vm_el01_regs(hypctx);
+ reset_vm_el2_regs(hypctx);
+ }
+
+ vtimer_vminit(hyp);
+ vgic_v3_vminit(hyp);
+ for (i = 0; i < VM_MAXCPU; i++) {
+ hypctx = &hyp->ctx[i];
+ vtimer_cpuinit(hypctx);
+ last_vcpu = (i == VM_MAXCPU - 1);
+ vgic_v3_cpuinit(hypctx, last_vcpu);
+ }
+
+ /* XXX: Can this fail? */
+ err = vmem_alloc(el2_mem_alloc, size, M_NEXTFIT | M_WAITOK,
+ &vm_addr);
+ MPASS(err == 0);
+ MPASS((vm_addr & PAGE_MASK) == 0);
+ hyp->el2_addr = vm_addr;
+
+ rv = vmmpmap_enter(hyp->el2_addr, size, vtophys(hyp),
+ VM_PROT_READ | VM_PROT_WRITE);
+ MPASS(rv);
+
+ return (hyp);
+}
+
+static int
+arm_vmm_pinit(pmap_t pmap)
+{
+
+ pmap_pinit_stage(pmap, PM_STAGE2, vmm_pmap_levels);
+ return (1);
+}
+
+static struct vmspace *
+arm_vmspace_alloc(vm_offset_t min, vm_offset_t max)
+{
+ return (vmspace_alloc(min, max, arm_vmm_pinit));
+}
+
+static void
+arm_vmspace_free(struct vmspace *vmspace)
+{
+
+ pmap_remove_pages(vmspace_pmap(vmspace));
+ vmspace_free(vmspace);
+}
+
+static void
+vmm_pmap_clean_stage2_tlbi(void)
+{
+ vmm_call_hyp(HYP_CLEAN_S2_TLBI);
+}
+
+static void
+vmm_pmap_invalidate_range(uint64_t vttbr, vm_offset_t sva, vm_offset_t eva,
+ bool final_only)
+{
+ MPASS(eva > sva);
+ vmm_call_hyp(HYP_S2_TLBI_RANGE, vttbr, sva, eva, final_only);
+}
+
+static void
+vmm_pmap_invalidate_all(uint64_t vttbr)
+{
+ vmm_call_hyp(HYP_S2_TLBI_ALL, vttbr);
+}
+
+static enum vm_reg_name
+get_vm_reg_name(uint32_t reg_nr, uint32_t mode __attribute__((unused)))
+{
+ switch(reg_nr) {
+ case 0:
+ return VM_REG_GUEST_X0;
+ case 1:
+ return VM_REG_GUEST_X1;
+ case 2:
+ return VM_REG_GUEST_X2;
+ case 3:
+ return VM_REG_GUEST_X3;
+ case 4:
+ return VM_REG_GUEST_X4;
+ case 5:
+ return VM_REG_GUEST_X5;
+ case 6:
+ return VM_REG_GUEST_X6;
+ case 7:
+ return VM_REG_GUEST_X7;
+ case 8:
+ return VM_REG_GUEST_X8;
+ case 9:
+ return VM_REG_GUEST_X9;
+ case 10:
+ return VM_REG_GUEST_X10;
+ case 11:
+ return VM_REG_GUEST_X11;
+ case 12:
+ return VM_REG_GUEST_X12;
+ case 13:
+ return VM_REG_GUEST_X13;
+ case 14:
+ return VM_REG_GUEST_X14;
+ case 15:
+ return VM_REG_GUEST_X15;
+ case 16:
+ return VM_REG_GUEST_X16;
+ case 17:
+ return VM_REG_GUEST_X17;
+ case 18:
+ return VM_REG_GUEST_X18;
+ case 19:
+ return VM_REG_GUEST_X19;
+ case 20:
+ return VM_REG_GUEST_X20;
+ case 21:
+ return VM_REG_GUEST_X21;
+ case 22:
+ return VM_REG_GUEST_X22;
+ case 23:
+ return VM_REG_GUEST_X23;
+ case 24:
+ return VM_REG_GUEST_X24;
+ case 25:
+ return VM_REG_GUEST_X25;
+ case 26:
+ return VM_REG_GUEST_X26;
+ case 27:
+ return VM_REG_GUEST_X27;
+ case 28:
+ return VM_REG_GUEST_X28;
+ case 29:
+ return VM_REG_GUEST_X29;
+ case 30:
+ return VM_REG_GUEST_LR;
+ case 31:
+ return VM_REG_GUEST_SP;
+ case 32:
+ return VM_REG_GUEST_ELR;
+ case 33:
+ return VM_REG_GUEST_SPSR;
+ case 34:
+ return VM_REG_ELR_EL2;
+ default:
+ break;
+ }
+
+ return (VM_REG_LAST);
+}
+
+static inline void
+arm64_print_hyp_regs(struct vm_exit *vme)
+{
+ printf("esr_el2: 0x%08x\n", vme->u.hyp.esr_el2);
+ printf("far_el2: 0x%016lx\n", vme->u.hyp.far_el2);
+ printf("hpfar_el2: 0x%016lx\n", vme->u.hyp.hpfar_el2);
+}
+
+static void
+arm64_gen_inst_emul_data(struct hypctx *hypctx, uint32_t esr_iss,
+ struct vm_exit *vme_ret)
+{
+ struct vm_guest_paging *paging;
+ struct vie *vie;
+ uint32_t esr_sas, reg_num;
+ uint64_t page_off;
+
+ /*
+ * Get the page address from HPFAR_EL2.
+ */
+ vme_ret->u.inst_emul.gpa =
+ HPFAR_EL2_FIPA_ADDR(hypctx->exit_info.hpfar_el2);
+ /* Bits [11:0] are the same as bits [11:0] from the virtual address. */
+ page_off = FAR_EL2_PAGE_OFFSET(hypctx->exit_info.far_el2);
+ vme_ret->u.inst_emul.gpa += page_off;
+
+ esr_sas = (esr_iss & ISS_DATA_SAS_MASK) >> ISS_DATA_SAS_SHIFT;
+ reg_num = (esr_iss & ISS_DATA_SRT_MASK) >> ISS_DATA_SRT_SHIFT;
+
+ vie = &vme_ret->u.inst_emul.vie;
+ vie->access_size = 1 << esr_sas;
+ vie->sign_extend = (esr_iss & ISS_DATA_SSE) ? 1 : 0;
+ vie->dir = (esr_iss & ISS_DATA_WnR) ? VM_DIR_WRITE : VM_DIR_READ;
+ vie->reg = get_vm_reg_name(reg_num, UNUSED);
+
+ paging = &vme_ret->u.inst_emul.paging;
+ paging->far = hypctx->exit_info.far_el2;
+ paging->ttbr0_el1 = hypctx->ttbr0_el1;
+ paging->ttbr1_el1 = hypctx->ttbr1_el1;
+ paging->flags = hypctx->tf.tf_spsr & (PSR_M_MASK | PSR_M_32);
+ if ((hypctx->sctlr_el1 & SCTLR_M) != 0)
+ paging->flags |= VM_GP_MMU_ENABLED;
+}
+
+static void
+arm64_gen_reg_emul_data(uint32_t esr_iss, struct vm_exit *vme_ret)
+{
+ uint32_t reg_num;
+ struct vre *vre;
+
+ /* u.hyp member will be replaced by u.reg_emul */
+ vre = &vme_ret->u.reg_emul.vre;
+
+ vre->inst_syndrome = esr_iss;
+ /* ARMv8 Architecture Manual, p. D7-2273: 1 means read */
+ vre->dir = (esr_iss & ISS_MSR_DIR) ? VM_DIR_READ : VM_DIR_WRITE;
+ reg_num = ISS_MSR_Rt(esr_iss);
+ vre->reg = get_vm_reg_name(reg_num, UNUSED);
+}
+
+static int
+handle_el1_sync_excp(struct hyp *hyp, int vcpu, struct vm_exit *vme_ret,
+ pmap_t pmap)
+{
+ struct hypctx *hypctx;
+ uint64_t gpa;
+ uint32_t esr_ec, esr_iss;
+
+ hypctx = &hyp->ctx[vcpu];
+ esr_ec = ESR_ELx_EXCEPTION(hypctx->tf.tf_esr);
+ esr_iss = hypctx->tf.tf_esr & ESR_ELx_ISS_MASK;
+
+ switch(esr_ec) {
+ case EXCP_UNKNOWN:
+ eprintf("Unknown exception from guest\n");
+ arm64_print_hyp_regs(vme_ret);
+ vme_ret->exitcode = VM_EXITCODE_HYP;
+ break;
+ case EXCP_TRAP_WFI_WFE:
+ if ((hypctx->tf.tf_esr & 0x3) == 0) /* WFI */
+ vme_ret->exitcode = VM_EXITCODE_WFI;
+ else
+ vme_ret->exitcode = VM_EXITCODE_HYP;
+ break;
+ case EXCP_HVC:
+ vme_ret->exitcode = VM_EXITCODE_HVC;
+ break;
+ case EXCP_MSR:
+ arm64_gen_reg_emul_data(esr_iss, vme_ret);
+ vme_ret->exitcode = VM_EXITCODE_REG_EMUL;
+ break;
+
+ case EXCP_INSN_ABORT_L:
+ case EXCP_DATA_ABORT_L:
+ switch (hypctx->tf.tf_esr & ISS_DATA_DFSC_MASK) {
+ case ISS_DATA_DFSC_TF_L0:
+ case ISS_DATA_DFSC_TF_L1:
+ case ISS_DATA_DFSC_TF_L2:
+ case ISS_DATA_DFSC_TF_L3:
+ case ISS_DATA_DFSC_AFF_L1:
+ case ISS_DATA_DFSC_AFF_L2:
+ case ISS_DATA_DFSC_AFF_L3:
+ case ISS_DATA_DFSC_PF_L1:
+ case ISS_DATA_DFSC_PF_L2:
+ case ISS_DATA_DFSC_PF_L3:
+ hypctx = &hyp->ctx[vcpu];
+ gpa = HPFAR_EL2_FIPA_ADDR(hypctx->exit_info.hpfar_el2);
+ if (vm_mem_allocated(hyp->vm, vcpu, gpa)) {
+ vme_ret->exitcode = VM_EXITCODE_PAGING;
+ vme_ret->inst_length = 0;
+ vme_ret->u.paging.esr = hypctx->tf.tf_esr;
+ vme_ret->u.paging.gpa = gpa;
+ } else if (esr_ec == EXCP_DATA_ABORT_L) {
+ arm64_gen_inst_emul_data(&hyp->ctx[vcpu],
+ esr_iss, vme_ret);
+ vme_ret->exitcode = VM_EXITCODE_INST_EMUL;
+ } else {
+ eprintf(
+ "Unsupported instruction fault from guest\n");
+ arm64_print_hyp_regs(vme_ret);
+ vme_ret->exitcode = VM_EXITCODE_HYP;
+ }
+ break;
+ default:
+ eprintf(
+ "Unsupported data/instruction fault from guest\n");
+ arm64_print_hyp_regs(vme_ret);
+ vme_ret->exitcode = VM_EXITCODE_HYP;
+ break;
+ }
+
+ break;
+
+ default:
+ eprintf("Unsupported synchronous exception from guest: 0x%x\n",
+ esr_ec);
+ arm64_print_hyp_regs(vme_ret);
+ vme_ret->exitcode = VM_EXITCODE_HYP;
+ break;
+ }
+
+ /* We don't don't do any instruction emulation here */
+ return (UNHANDLED);
+}
+
+static int
+arm64_handle_world_switch(struct hyp *hyp, int vcpu, int excp_type,
+ struct vm_exit *vme, pmap_t pmap)
+{
+ int handled;
+
+ switch (excp_type) {
+ case EXCP_TYPE_EL1_SYNC:
+ /* The exit code will be set by handle_el1_sync_excp(). */
+ handled = handle_el1_sync_excp(hyp, vcpu, vme, pmap);
+ break;
+
+ case EXCP_TYPE_EL1_IRQ:
+ case EXCP_TYPE_EL1_FIQ:
+ /* The host kernel will handle IRQs and FIQs. */
+ vme->exitcode = VM_EXITCODE_BOGUS;
+ handled = UNHANDLED;
+ break;
+
+ case EXCP_TYPE_EL1_ERROR:
+ case EXCP_TYPE_EL2_SYNC:
+ case EXCP_TYPE_EL2_IRQ:
+ case EXCP_TYPE_EL2_FIQ:
+ case EXCP_TYPE_EL2_ERROR:
+ eprintf("Unhandled exception type: %s\n", __STRING(excp_type));
+ vme->exitcode = VM_EXITCODE_BOGUS;
+ handled = UNHANDLED;
+ break;
+
+ default:
+ eprintf("Unknown exception type: %d\n", excp_type);
+ vme->exitcode = VM_EXITCODE_BOGUS;
+ handled = UNHANDLED;
+ break;
+ }
+
+ return (handled);
+}
+
+static int
+arm_vmrun(void *arg, int vcpu, register_t pc, pmap_t pmap,
+ struct vm_eventinfo *evinfo)
+{
+ uint64_t excp_type;
+ int handled;
+ register_t daif;
+ struct hyp *hyp;
+ struct hypctx *hypctx;
+ struct vm *vm;
+ struct vm_exit *vme;
+
+ hyp = (struct hyp *)arg;
+ vm = hyp->vm;
+ vme = vm_exitinfo(vm, vcpu);
+
+ hypctx = &hyp->ctx[vcpu];
+ hypctx->tf.tf_elr = (uint64_t)pc;
+
+ for (;;) {
+ daif = intr_disable();
+
+ /* Check if the vcpu is suspended */
+ if (vcpu_suspended(evinfo)) {
+ intr_restore(daif);
+ vm_exit_suspended(vm, vcpu, pc);
+ break;
+ }
+
+ /* Activate the stage2 pmap so the vmid is valid */
+ pmap_activate_vm(pmap);
+ hyp->vttbr_el2 = pmap_to_ttbr0(pmap);
+
+ /*
+ * TODO: What happens if a timer interrupt is asserted exactly
+ * here, but for the previous VM?
+ */
+ arm64_set_active_vcpu(hypctx);
+ vgic_v3_flush_hwstate(hypctx);
+
+ /* Call into EL2 to switch to the guest */
+ excp_type = vmm_call_hyp(HYP_ENTER_GUEST,
+ hyp->el2_addr, vcpu);
+
+ vgic_v3_sync_hwstate(hypctx);
+
+ /*
+ * Deactivate the stage2 pmap. vmm_pmap_clean_stage2_tlbi
+ * depends on this meaning we activate the VM before entering
+ * the vm again
+ */
+ PCPU_SET(curvmpmap, NULL);
+ intr_restore(daif);
+
+ if (excp_type == EXCP_TYPE_MAINT_IRQ)
+ continue;
+
+ vme->pc = hypctx->tf.tf_elr;
+ vme->inst_length = INSN_SIZE;
+ vme->u.hyp.exception_nr = excp_type;
+ vme->u.hyp.esr_el2 = hypctx->tf.tf_esr;
+ vme->u.hyp.far_el2 = hypctx->exit_info.far_el2;
+ vme->u.hyp.hpfar_el2 = hypctx->exit_info.hpfar_el2;
+
+ handled = arm64_handle_world_switch(hyp, vcpu, excp_type, vme,
+ pmap);
+ if (handled == UNHANDLED)
+ /* Exit loop to emulate instruction. */
+ break;
+ else
+ /* Resume guest execution from the next instruction. */
+ hypctx->tf.tf_elr += vme->inst_length;
+ }
+
+ return (0);
+}
+
+static void
+arm_pcpu_vmcleanup(void *arg)
+{
+ struct hyp *hyp;
+ int i, maxcpus;
+
+ hyp = arg;
+ maxcpus = vm_get_maxcpus(hyp->vm);
+ for (i = 0; i < maxcpus; i++) {
+ if (arm64_get_active_vcpu() == &hyp->ctx[i]) {
+ arm64_set_active_vcpu(NULL);
+ break;
+ }
+ }
+}
+
+static void
+arm_vmcleanup(void *arg)
+{
+ struct hyp *hyp = arg;
+ struct hypctx *hypctx;
+ int i;
+
+ for (i = 0; i < VM_MAXCPU; i++) {
+ hypctx = &hyp->ctx[i];
+ vtimer_cpucleanup(hypctx);
+ vgic_v3_cpucleanup(hypctx);
+ }
+
+ vtimer_vmcleanup(hyp);
+ vgic_v3_vmcleanup(hyp);
+
+ smp_rendezvous(NULL, arm_pcpu_vmcleanup, NULL, hyp);
+
+ /* Unmap the VM hyp struct from the hyp mode translation table */
+ vmmpmap_remove(hyp->el2_addr, roundup2(sizeof(*hyp), PAGE_SIZE),
+ true);
+
+ free(hyp, M_HYP);
+}
+
+/*
+ * Return register value. Registers have different sizes and an explicit cast
+ * must be made to ensure proper conversion.
+ */
+static void *
+hypctx_regptr(struct hypctx *hypctx, int reg)
+{
+ switch (reg) {
+ case VM_REG_GUEST_X0:
+ return (&hypctx->tf.tf_x[0]);
+ case VM_REG_GUEST_X1:
+ return (&hypctx->tf.tf_x[1]);
+ case VM_REG_GUEST_X2:
+ return (&hypctx->tf.tf_x[2]);
+ case VM_REG_GUEST_X3:
+ return (&hypctx->tf.tf_x[3]);
+ case VM_REG_GUEST_X4:
+ return (&hypctx->tf.tf_x[4]);
+ case VM_REG_GUEST_X5:
+ return (&hypctx->tf.tf_x[5]);
+ case VM_REG_GUEST_X6:
+ return (&hypctx->tf.tf_x[6]);
+ case VM_REG_GUEST_X7:
+ return (&hypctx->tf.tf_x[7]);
+ case VM_REG_GUEST_X8:
+ return (&hypctx->tf.tf_x[8]);
+ case VM_REG_GUEST_X9:
+ return (&hypctx->tf.tf_x[9]);
+ case VM_REG_GUEST_X10:
+ return (&hypctx->tf.tf_x[10]);
+ case VM_REG_GUEST_X11:
+ return (&hypctx->tf.tf_x[11]);
+ case VM_REG_GUEST_X12:
+ return (&hypctx->tf.tf_x[12]);
+ case VM_REG_GUEST_X13:
+ return (&hypctx->tf.tf_x[13]);
+ case VM_REG_GUEST_X14:
+ return (&hypctx->tf.tf_x[14]);
+ case VM_REG_GUEST_X15:
+ return (&hypctx->tf.tf_x[15]);
+ case VM_REG_GUEST_X16:
+ return (&hypctx->tf.tf_x[16]);
+ case VM_REG_GUEST_X17:
+ return (&hypctx->tf.tf_x[17]);
+ case VM_REG_GUEST_X18:
+ return (&hypctx->tf.tf_x[18]);
+ case VM_REG_GUEST_X19:
+ return (&hypctx->tf.tf_x[19]);
+ case VM_REG_GUEST_X20:
+ return (&hypctx->tf.tf_x[20]);
+ case VM_REG_GUEST_X21:
+ return (&hypctx->tf.tf_x[21]);
+ case VM_REG_GUEST_X22:
+ return (&hypctx->tf.tf_x[22]);
+ case VM_REG_GUEST_X23:
+ return (&hypctx->tf.tf_x[23]);
+ case VM_REG_GUEST_X24:
+ return (&hypctx->tf.tf_x[24]);
+ case VM_REG_GUEST_X25:
+ return (&hypctx->tf.tf_x[25]);
+ case VM_REG_GUEST_X26:
+ return (&hypctx->tf.tf_x[26]);
+ case VM_REG_GUEST_X27:
+ return (&hypctx->tf.tf_x[27]);
+ case VM_REG_GUEST_X28:
+ return (&hypctx->tf.tf_x[28]);
+ case VM_REG_GUEST_X29:
+ return (&hypctx->tf.tf_x[29]);
+ case VM_REG_GUEST_LR:
+ return (&hypctx->tf.tf_lr);
+ case VM_REG_GUEST_SP:
+ return (&hypctx->tf.tf_sp);
+ case VM_REG_GUEST_ELR: /* This is bogus */
+ return (&hypctx->tf.tf_elr);
+ case VM_REG_GUEST_SPSR: /* This is bogus */
+ return (&hypctx->tf.tf_spsr);
+ case VM_REG_ELR_EL2:
+ return (&hypctx->tf.tf_elr);
+ default:
+ break;
+ }
+ return (NULL);
+}
+
+static int
+arm_getreg(void *arg, int vcpu, int reg, uint64_t *retval)
+{
+ void *regp;
+ int running, hostcpu;
+ struct hyp *hyp = arg;
+
+ running = vcpu_is_running(hyp->vm, vcpu, &hostcpu);
+ if (running && hostcpu != curcpu)
+ panic("arm_getreg: %s%d is running", vm_name(hyp->vm), vcpu);
+
+ if ((regp = hypctx_regptr(&hyp->ctx[vcpu], reg)) != NULL) {
+ if (reg == VM_REG_GUEST_SPSR)
+ *retval = *(uint32_t *)regp;
+ else
+ *retval = *(uint64_t *)regp;
+ return (0);
+ } else {
+ return (EINVAL);
+ }
+}
+
+static int
+arm_setreg(void *arg, int vcpu, int reg, uint64_t val)
+{
+ void *regp;
+ struct hyp *hyp = arg;
+ int running, hostcpu;
+
+ running = vcpu_is_running(hyp->vm, vcpu, &hostcpu);
+ if (running && hostcpu != curcpu)
+ panic("hyp_setreg: %s%d is running", vm_name(hyp->vm), vcpu);
+
+ if ((regp = hypctx_regptr(&hyp->ctx[vcpu], reg)) != NULL) {
+ if (reg == VM_REG_GUEST_SPSR)
+ *(uint32_t *)regp = (uint32_t)val;
+ else
+ *(uint64_t *)regp = val;
+ return (0);
+ } else {
+ return (EINVAL);
+ }
+}
+
+static int
+arm_getcap(void *arg, int vcpu, int type, int *retval)
+{
+ int ret;
+
+ ret = ENOENT;
+
+ switch (type) {
+ case VM_CAP_UNRESTRICTED_GUEST:
+ *retval = 1;
+ ret = 0;
+ break;
+ default:
+ break;
+ }
+
+ return (ret);
+}
+
+static int
+arm_setcap(void *arg, int vcpu, int type, int val)
+{
+
+ return (ENOENT);
+}
+
+static
+void arm_restore(void)
+{
+ ;
+}
+
+struct vmm_ops vmm_ops_arm = {
+ .init = arm_init,
+ .cleanup = arm_cleanup,
+ .resume = arm_restore,
+ .vminit = arm_vminit,
+ .vmrun = arm_vmrun,
+ .vmcleanup = arm_vmcleanup,
+ .vmgetreg = arm_getreg,
+ .vmsetreg = arm_setreg,
+ .vmgetcap = arm_getcap,
+ .vmsetcap = arm_setcap,
+ .vmspace_alloc = arm_vmspace_alloc,
+ .vmspace_free = arm_vmspace_free,
+};
diff --git a/sys/arm64/vmm/vmm_call.S b/sys/arm64/vmm/vmm_call.S
new file mode 100644
--- /dev/null
+++ b/sys/arm64/vmm/vmm_call.S
@@ -0,0 +1,38 @@
+/*
+ * Copyright (C) 2017 Alexandru Elisei <alexandru.elisei@gmail.com>
+ * All rights reserved.
+ *
+ * This software was developed by Alexandru Elisei under sponsorship
+ * from the FreeBSD Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+
+#include <machine/asm.h>
+
+ .text
+
+ENTRY(vmm_call_hyp)
+ hvc #0
+ ret
+END(vmm_call_hyp)
diff --git a/sys/arm64/vmm/vmm_dev.c b/sys/arm64/vmm/vmm_dev.c
new file mode 100644
--- /dev/null
+++ b/sys/arm64/vmm/vmm_dev.c
@@ -0,0 +1,970 @@
+/*
+ * Copyright (C) 2015 Mihai Carabas <mihai.carabas@gmail.com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/kernel.h>
+#include <sys/jail.h>
+#include <sys/queue.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
+#include <sys/malloc.h>
+#include <sys/conf.h>
+#include <sys/sysctl.h>
+#include <sys/libkern.h>
+#include <sys/ioccom.h>
+#include <sys/mman.h>
+#include <sys/uio.h>
+#include <sys/proc.h>
+
+#include <vm/vm.h>
+#include <vm/pmap.h>
+#include <vm/vm_map.h>
+#include <vm/vm_object.h>
+
+#include <machine/machdep.h>
+#include <machine/vmparam.h>
+#include <machine/vmm.h>
+#include <machine/vmm_dev.h>
+
+#include "vmm_stat.h"
+
+struct devmem_softc {
+ int segid;
+ char *name;
+ struct cdev *cdev;
+ struct vmmdev_softc *sc;
+ SLIST_ENTRY(devmem_softc) link;
+};
+
+struct vmmdev_softc {
+ struct vm *vm; /* vm instance cookie */
+ struct cdev *cdev;
+ SLIST_ENTRY(vmmdev_softc) link;
+ SLIST_HEAD(, devmem_softc) devmem;
+ int flags;
+};
+#define VSC_LINKED 0x01
+
+static SLIST_HEAD(, vmmdev_softc) head;
+
+static unsigned pr_allow_flag;
+static struct mtx vmmdev_mtx;
+
+static MALLOC_DEFINE(M_VMMDEV, "vmmdev", "vmmdev");
+
+SYSCTL_DECL(_hw_vmm);
+
+static int vmm_priv_check(struct ucred *ucred);
+static int devmem_create_cdev(const char *vmname, int id, char *devmem);
+static void devmem_destroy(void *arg);
+
+static int
+vmm_priv_check(struct ucred *ucred)
+{
+
+ if (jailed(ucred) &&
+ !(ucred->cr_prison->pr_allow & pr_allow_flag))
+ return (EPERM);
+
+ return (0);
+}
+
+static int
+vcpu_lock_one(struct vmmdev_softc *sc, int vcpu)
+{
+ int error;
+
+ if (vcpu < 0 || vcpu >= vm_get_maxcpus(sc->vm))
+ return (EINVAL);
+
+ error = vcpu_set_state(sc->vm, vcpu, VCPU_FROZEN, true);
+ return (error);
+}
+
+static void
+vcpu_unlock_one(struct vmmdev_softc *sc, int vcpu)
+{
+ enum vcpu_state state;
+
+ state = vcpu_get_state(sc->vm, vcpu, NULL);
+ if (state != VCPU_FROZEN) {
+ panic("vcpu %s(%d) has invalid state %d", vm_name(sc->vm),
+ vcpu, state);
+ }
+
+ vcpu_set_state(sc->vm, vcpu, VCPU_IDLE, false);
+}
+
+static int
+vcpu_lock_all(struct vmmdev_softc *sc)
+{
+ int error, vcpu;
+ uint16_t maxcpus;
+
+ maxcpus = vm_get_maxcpus(sc->vm);
+ for (vcpu = 0; vcpu < maxcpus; vcpu++) {
+ error = vcpu_lock_one(sc, vcpu);
+ if (error)
+ break;
+ }
+
+ if (error) {
+ while (--vcpu >= 0)
+ vcpu_unlock_one(sc, vcpu);
+ }
+
+ return (error);
+}
+
+static void
+vcpu_unlock_all(struct vmmdev_softc *sc)
+{
+ int vcpu;
+ uint16_t maxcpus;
+
+ maxcpus = vm_get_maxcpus(sc->vm);
+ for (vcpu = 0; vcpu < maxcpus; vcpu++)
+ vcpu_unlock_one(sc, vcpu);
+}
+
+static struct vmmdev_softc *
+vmmdev_lookup(const char *name)
+{
+ struct vmmdev_softc *sc;
+
+#ifdef notyet /* XXX kernel is not compiled with invariants */
+ mtx_assert(&vmmdev_mtx, MA_OWNED);
+#endif
+
+ SLIST_FOREACH(sc, &head, link) {
+ if (strcmp(name, vm_name(sc->vm)) == 0)
+ break;
+ }
+
+ return (sc);
+}
+
+static struct vmmdev_softc *
+vmmdev_lookup2(struct cdev *cdev)
+{
+
+ return (cdev->si_drv1);
+}
+
+static int
+vmmdev_rw(struct cdev *cdev, struct uio *uio, int flags)
+{
+ int error, off, c, prot;
+ vm_paddr_t gpa, maxaddr;
+ void *hpa, *cookie;
+ struct vmmdev_softc *sc;
+ uint16_t lastcpu;
+
+ error = vmm_priv_check(curthread->td_ucred);
+ if (error)
+ return (error);
+
+ sc = vmmdev_lookup2(cdev);
+ if (sc == NULL)
+ return (ENXIO);
+
+ /*
+ * Get a read lock on the guest memory map by freezing any vcpu.
+ */
+ lastcpu = vm_get_maxcpus(sc->vm) - 1;
+ error = vcpu_lock_one(sc, lastcpu);
+ if (error)
+ return (error);
+
+ prot = (uio->uio_rw == UIO_WRITE ? VM_PROT_WRITE : VM_PROT_READ);
+ maxaddr = vmm_sysmem_maxaddr(sc->vm);
+ while (uio->uio_resid > 0 && error == 0) {
+ gpa = uio->uio_offset;
+ off = gpa & PAGE_MASK;
+ c = min(uio->uio_resid, PAGE_SIZE - off);
+
+ /*
+ * The VM has a hole in its physical memory map. If we want to
+ * use 'dd' to inspect memory beyond the hole we need to
+ * provide bogus data for memory that lies in the hole.
+ *
+ * Since this device does not support lseek(2), dd(1) will
+ * read(2) blocks of data to simulate the lseek(2).
+ */
+ hpa = vm_gpa_hold(sc->vm, lastcpu, gpa, c,
+ prot, &cookie);
+ if (hpa == NULL) {
+ if (uio->uio_rw == UIO_READ && gpa < maxaddr)
+ error = uiomove(__DECONST(void *, zero_region),
+ c, uio);
+ else
+ error = EFAULT;
+ } else {
+ error = uiomove(hpa, c, uio);
+ vm_gpa_release(cookie);
+ }
+ }
+ vcpu_unlock_one(sc, lastcpu);
+ return (error);
+}
+
+static int
+get_memseg(struct vmmdev_softc *sc, struct vm_memseg *mseg)
+{
+ struct devmem_softc *dsc;
+ int error;
+ bool sysmem;
+
+ error = vm_get_memseg(sc->vm, mseg->segid, &mseg->len, &sysmem, NULL);
+ if (error || mseg->len == 0)
+ return (error);
+
+ if (!sysmem) {
+ SLIST_FOREACH(dsc, &sc->devmem, link) {
+ if (dsc->segid == mseg->segid)
+ break;
+ }
+ KASSERT(dsc != NULL, ("%s: devmem segment %d not found",
+ __func__, mseg->segid));
+ error = copystr(dsc->name, mseg->name, sizeof(mseg->name),
+ NULL);
+ } else {
+ bzero(mseg->name, sizeof(mseg->name));
+ }
+
+ return (error);
+}
+
+static int
+alloc_memseg(struct vmmdev_softc *sc, struct vm_memseg *mseg)
+{
+ char *name;
+ int error;
+ bool sysmem;
+
+ error = 0;
+ name = NULL;
+ sysmem = true;
+
+ /*
+ * The allocation is lengthened by 1 to hold a terminating NUL. It'll
+ * by stripped off when devfs processes the full string.
+ */
+ if (VM_MEMSEG_NAME(mseg)) {
+ sysmem = false;
+ name = malloc(sizeof(mseg->name), M_VMMDEV, M_WAITOK);
+ error = copystr(mseg->name, name, sizeof(mseg->name), NULL);
+ if (error)
+ goto done;
+ }
+
+ error = vm_alloc_memseg(sc->vm, mseg->segid, mseg->len, sysmem);
+ if (error)
+ goto done;
+
+ if (VM_MEMSEG_NAME(mseg)) {
+ error = devmem_create_cdev(vm_name(sc->vm), mseg->segid, name);
+ if (error)
+ vm_free_memseg(sc->vm, mseg->segid);
+ else
+ name = NULL; /* freed when 'cdev' is destroyed */
+ }
+done:
+ free(name, M_VMMDEV);
+ return (error);
+}
+
+static int
+vm_get_register_set(struct vm *vm, int vcpu, unsigned int count, int *regnum,
+ uint64_t *regval)
+{
+ int error, i;
+
+ error = 0;
+ for (i = 0; i < count; i++) {
+ error = vm_get_register(vm, vcpu, regnum[i], &regval[i]);
+ if (error)
+ break;
+ }
+ return (error);
+}
+
+static int
+vm_set_register_set(struct vm *vm, int vcpu, unsigned int count, int *regnum,
+ uint64_t *regval)
+{
+ int error, i;
+
+ error = 0;
+ for (i = 0; i < count; i++) {
+ error = vm_set_register(vm, vcpu, regnum[i], regval[i]);
+ if (error)
+ break;
+ }
+ return (error);
+}
+
+static int
+vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag,
+ struct thread *td)
+{
+ int error, vcpu, state_changed, size;
+ cpuset_t *cpuset;
+ struct vmmdev_softc *sc;
+ struct vm_register *vmreg;
+ struct vm_register_set *vmregset;
+ struct vm_run *vmrun;
+ struct vm_activate_cpu *vac;
+ struct vm_attach_vgic *vav;
+ struct vm_cpuset *vm_cpuset;
+ struct vm_irq *vi;
+ struct vm_capability *vmcap;
+ struct vm_stats *vmstats;
+ struct vm_stat_desc *statdesc;
+ struct vm_suspend *vmsuspend;
+ struct vm_memmap *mm;
+ struct vm_msi *vmsi;
+ struct vm_cpu_topology *topology;
+ uint64_t *regvals;
+ int *regnums;
+
+ error = vmm_priv_check(curthread->td_ucred);
+ if (error)
+ return (error);
+
+ sc = vmmdev_lookup2(cdev);
+ if (sc == NULL)
+ return (ENXIO);
+
+ error = 0;
+ vcpu = -1;
+ state_changed = 0;
+
+ /*
+ * Some VMM ioctls can operate only on vcpus that are not running.
+ */
+ switch (cmd) {
+ case VM_RUN:
+ case VM_GET_REGISTER:
+ case VM_SET_REGISTER:
+ case VM_GET_REGISTER_SET:
+ case VM_SET_REGISTER_SET:
+ case VM_GET_CAPABILITY:
+ case VM_SET_CAPABILITY:
+ case VM_ACTIVATE_CPU:
+ /*
+ * XXX fragile, handle with care
+ * Assumes that the first field of the ioctl data is the vcpu.
+ */
+ vcpu = *(int *)data;
+ error = vcpu_lock_one(sc, vcpu);
+ if (error)
+ goto done;
+ state_changed = 1;
+ break;
+
+ case VM_ALLOC_MEMSEG:
+ case VM_MMAP_MEMSEG:
+ case VM_REINIT:
+ case VM_ATTACH_VGIC:
+ /*
+ * ioctls that operate on the entire virtual machine must
+ * prevent all vcpus from running.
+ */
+ error = vcpu_lock_all(sc);
+ if (error)
+ goto done;
+ state_changed = 2;
+ break;
+ case VM_GET_MEMSEG:
+ case VM_MMAP_GETNEXT:
+ /*
+ * Lock a vcpu to make sure that the memory map cannot be
+ * modified while it is being inspected.
+ */
+ vcpu = vm_get_maxcpus(sc->vm) - 1;
+ error = vcpu_lock_one(sc, vcpu);
+ if (error)
+ goto done;
+ state_changed = 1;
+ break;
+ case VM_ASSERT_IRQ:
+ vi =(struct vm_irq *)data;
+ error = vm_assert_irq(sc->vm, vi->irq);
+ break;
+ case VM_DEASSERT_IRQ:
+ vi = (struct vm_irq *)data;
+ error = vm_deassert_irq(sc->vm, vi->irq);
+ break;
+ default:
+ break;
+ }
+
+ switch(cmd) {
+ case VM_RUN:
+ vmrun = (struct vm_run *)data;
+ error = vm_run(sc->vm, vmrun);
+ break;
+ case VM_SUSPEND:
+ vmsuspend = (struct vm_suspend *)data;
+ error = vm_suspend(sc->vm, vmsuspend->how);
+ break;
+ case VM_REINIT:
+ error = vm_reinit(sc->vm);
+ break;
+ case VM_STAT_DESC: {
+ statdesc = (struct vm_stat_desc *)data;
+ error = vmm_stat_desc_copy(statdesc->index,
+ statdesc->desc, sizeof(statdesc->desc));
+ break;
+ }
+ case VM_STATS: {
+ CTASSERT(MAX_VM_STATS >= MAX_VMM_STAT_ELEMS);
+ vmstats = (struct vm_stats *)data;
+ getmicrotime(&vmstats->tv);
+ error = vmm_stat_copy(sc->vm, vmstats->cpuid, vmstats->index,
+ nitems(vmstats->statbuf),
+ &vmstats->num_entries, vmstats->statbuf);
+ break;
+ }
+ case VM_MMAP_GETNEXT:
+ mm = (struct vm_memmap *)data;
+ error = vm_mmap_getnext(sc->vm, &mm->gpa, &mm->segid,
+ &mm->segoff, &mm->len, &mm->prot, &mm->flags);
+ break;
+ case VM_MMAP_MEMSEG:
+ mm = (struct vm_memmap *)data;
+ error = vm_mmap_memseg(sc->vm, mm->gpa, mm->segid, mm->segoff,
+ mm->len, mm->prot, mm->flags);
+ break;
+ case VM_ALLOC_MEMSEG:
+ error = alloc_memseg(sc, (struct vm_memseg *)data);
+ break;
+ case VM_GET_MEMSEG:
+ error = get_memseg(sc, (struct vm_memseg *)data);
+ break;
+ case VM_GET_REGISTER:
+ vmreg = (struct vm_register *)data;
+ error = vm_get_register(sc->vm, vmreg->cpuid, vmreg->regnum,
+ &vmreg->regval);
+ break;
+ case VM_SET_REGISTER:
+ vmreg = (struct vm_register *)data;
+ error = vm_set_register(sc->vm, vmreg->cpuid, vmreg->regnum,
+ vmreg->regval);
+ break;
+ case VM_GET_REGISTER_SET:
+ vmregset = (struct vm_register_set *)data;
+ if (vmregset->count > VM_REG_LAST) {
+ error = EINVAL;
+ break;
+ }
+ regvals = malloc(sizeof(regvals[0]) * vmregset->count, M_VMMDEV,
+ M_WAITOK);
+ regnums = malloc(sizeof(regnums[0]) * vmregset->count, M_VMMDEV,
+ M_WAITOK);
+ error = copyin(vmregset->regnums, regnums, sizeof(regnums[0]) *
+ vmregset->count);
+ if (error == 0)
+ error = vm_get_register_set(sc->vm, vmregset->cpuid,
+ vmregset->count, regnums, regvals);
+ if (error == 0)
+ error = copyout(regvals, vmregset->regvals,
+ sizeof(regvals[0]) * vmregset->count);
+ free(regvals, M_VMMDEV);
+ free(regnums, M_VMMDEV);
+ break;
+ case VM_SET_REGISTER_SET:
+ vmregset = (struct vm_register_set *)data;
+ if (vmregset->count > VM_REG_LAST) {
+ error = EINVAL;
+ break;
+ }
+ regvals = malloc(sizeof(regvals[0]) * vmregset->count, M_VMMDEV,
+ M_WAITOK);
+ regnums = malloc(sizeof(regnums[0]) * vmregset->count, M_VMMDEV,
+ M_WAITOK);
+ error = copyin(vmregset->regnums, regnums, sizeof(regnums[0]) *
+ vmregset->count);
+ if (error == 0)
+ error = copyin(vmregset->regvals, regvals,
+ sizeof(regvals[0]) * vmregset->count);
+ if (error == 0)
+ error = vm_set_register_set(sc->vm, vmregset->cpuid,
+ vmregset->count, regnums, regvals);
+ free(regvals, M_VMMDEV);
+ free(regnums, M_VMMDEV);
+ break;
+ case VM_GET_CAPABILITY:
+ vmcap = (struct vm_capability *)data;
+ error = vm_get_capability(sc->vm, vmcap->cpuid,
+ vmcap->captype,
+ &vmcap->capval);
+ break;
+ case VM_SET_CAPABILITY:
+ vmcap = (struct vm_capability *)data;
+ error = vm_set_capability(sc->vm, vmcap->cpuid,
+ vmcap->captype,
+ vmcap->capval);
+ break;
+ case VM_ACTIVATE_CPU:
+ vac = (struct vm_activate_cpu *)data;
+ error = vm_activate_cpu(sc->vm, vac->vcpuid);
+ break;
+ case VM_GET_CPUS:
+ error = 0;
+ vm_cpuset = (struct vm_cpuset *)data;
+ size = vm_cpuset->cpusetsize;
+ if (size < sizeof(cpuset_t) || size > CPU_MAXSIZE / NBBY) {
+ error = ERANGE;
+ break;
+ }
+ cpuset = malloc(size, M_TEMP, M_WAITOK | M_ZERO);
+ if (vm_cpuset->which == VM_ACTIVE_CPUS)
+ *cpuset = vm_active_cpus(sc->vm);
+ else if (vm_cpuset->which == VM_SUSPENDED_CPUS)
+ *cpuset = vm_suspended_cpus(sc->vm);
+ else if (vm_cpuset->which == VM_DEBUG_CPUS)
+ *cpuset = vm_debug_cpus(sc->vm);
+ else
+ error = EINVAL;
+ if (error == 0)
+ error = copyout(cpuset, vm_cpuset->cpus, size);
+ free(cpuset, M_TEMP);
+ break;
+ case VM_SUSPEND_CPU:
+ vac = (struct vm_activate_cpu *)data;
+ error = vm_suspend_cpu(sc->vm, vac->vcpuid);
+ break;
+ case VM_RESUME_CPU:
+ vac = (struct vm_activate_cpu *)data;
+ error = vm_resume_cpu(sc->vm, vac->vcpuid);
+ break;
+ case VM_ATTACH_VGIC:
+ vav = (struct vm_attach_vgic *)data;
+ error = vm_attach_vgic(sc->vm, vav->dist_start, vav->dist_size,
+ vav->redist_start, vav->redist_size);
+ break;
+ case VM_RAISE_MSI:
+ vmsi = (struct vm_msi *)data;
+ error = vm_raise_msi(sc->vm, vmsi->msg, vmsi->addr, vmsi->bus,
+ vmsi->slot, vmsi->func);
+ break;
+ case VM_SET_TOPOLOGY:
+ topology = (struct vm_cpu_topology *)data;
+ error = vm_set_topology(sc->vm, topology->sockets,
+ topology->cores, topology->threads, topology->maxcpus);
+ break;
+ case VM_GET_TOPOLOGY:
+ topology = (struct vm_cpu_topology *)data;
+ vm_get_topology(sc->vm, &topology->sockets, &topology->cores,
+ &topology->threads, &topology->maxcpus);
+ error = 0;
+ break;
+ default:
+ error = ENOTTY;
+ break;
+ }
+
+ if (state_changed == 1)
+ vcpu_unlock_one(sc, vcpu);
+ else if (state_changed == 2)
+ vcpu_unlock_all(sc);
+
+done:
+ /*
+ * Make sure that no handler returns a kernel-internal
+ * error value to userspace.
+ */
+ KASSERT(error == ERESTART || error >= 0,
+ ("vmmdev_ioctl: invalid error return %d", error));
+ return (error);
+}
+
+static int
+vmmdev_mmap_single(struct cdev *cdev, vm_ooffset_t *offset, vm_size_t mapsize,
+ struct vm_object **objp, int nprot)
+{
+ struct vmmdev_softc *sc;
+ vm_paddr_t gpa;
+ size_t len;
+ vm_ooffset_t segoff, first, last;
+ int error, found, segid;
+ uint16_t lastcpu;
+ bool sysmem;
+
+ error = vmm_priv_check(curthread->td_ucred);
+ if (error)
+ return (error);
+
+ first = *offset;
+ last = first + mapsize;
+ if ((nprot & PROT_EXEC) || first < 0 || first >= last)
+ return (EINVAL);
+
+ sc = vmmdev_lookup2(cdev);
+ if (sc == NULL) {
+ /* virtual machine is in the process of being created */
+ return (EINVAL);
+ }
+
+ /*
+ * Get a read lock on the guest memory map by freezing any vcpu.
+ */
+ lastcpu = vm_get_maxcpus(sc->vm) - 1;
+ error = vcpu_lock_one(sc, lastcpu);
+ if (error)
+ return (error);
+
+ gpa = 0;
+ found = 0;
+ while (!found) {
+ error = vm_mmap_getnext(sc->vm, &gpa, &segid, &segoff, &len,
+ NULL, NULL);
+ if (error)
+ break;
+
+ if (first >= gpa && last <= gpa + len)
+ found = 1;
+ else
+ gpa += len;
+ }
+
+ if (found) {
+ error = vm_get_memseg(sc->vm, segid, &len, &sysmem, objp);
+ KASSERT(error == 0 && *objp != NULL,
+ ("%s: invalid memory segment %d", __func__, segid));
+ if (sysmem) {
+ vm_object_reference(*objp);
+ *offset = segoff + (first - gpa);
+ } else {
+ error = EINVAL;
+ }
+ }
+ vcpu_unlock_one(sc, lastcpu);
+ return (error);
+}
+
+static void
+vmmdev_destroy(void *arg)
+{
+ struct vmmdev_softc *sc = arg;
+ struct devmem_softc *dsc;
+ int error __diagused;
+
+ error = vcpu_lock_all(sc);
+ KASSERT(error == 0, ("%s: error %d freezing vcpus", __func__, error));
+
+ while ((dsc = SLIST_FIRST(&sc->devmem)) != NULL) {
+ KASSERT(dsc->cdev == NULL, ("%s: devmem not free", __func__));
+ SLIST_REMOVE_HEAD(&sc->devmem, link);
+ free(dsc->name, M_VMMDEV);
+ free(dsc, M_VMMDEV);
+ }
+
+ if (sc->cdev != NULL)
+ destroy_dev(sc->cdev);
+
+ if (sc->vm != NULL)
+ vm_destroy(sc->vm);
+
+ if ((sc->flags & VSC_LINKED) != 0) {
+ mtx_lock(&vmmdev_mtx);
+ SLIST_REMOVE(&head, sc, vmmdev_softc, link);
+ mtx_unlock(&vmmdev_mtx);
+ }
+
+ free(sc, M_VMMDEV);
+}
+
+static int
+sysctl_vmm_destroy(SYSCTL_HANDLER_ARGS)
+{
+ struct devmem_softc *dsc;
+ struct vmmdev_softc *sc;
+ struct cdev *cdev;
+ char *buf;
+ int error, buflen;
+
+ error = vmm_priv_check(req->td->td_ucred);
+ if (error)
+ return (error);
+
+ buflen = VM_MAX_NAMELEN + 1;
+ buf = malloc(buflen, M_VMMDEV, M_WAITOK | M_ZERO);
+ strlcpy(buf, "beavis", buflen);
+ error = sysctl_handle_string(oidp, buf, buflen, req);
+ if (error != 0 || req->newptr == NULL)
+ goto out;
+
+ mtx_lock(&vmmdev_mtx);
+ sc = vmmdev_lookup(buf);
+ if (sc == NULL || sc->cdev == NULL) {
+ mtx_unlock(&vmmdev_mtx);
+ error = EINVAL;
+ goto out;
+ }
+
+ /*
+ * The 'cdev' will be destroyed asynchronously when 'si_threadcount'
+ * goes down to 0 so we should not do it again in the callback.
+ *
+ * Setting 'sc->cdev' to NULL is also used to indicate that the VM
+ * is scheduled for destruction.
+ */
+ cdev = sc->cdev;
+ sc->cdev = NULL;
+ mtx_unlock(&vmmdev_mtx);
+
+ /*
+ * Schedule all cdevs to be destroyed:
+ *
+ * - any new operations on the 'cdev' will return an error (ENXIO).
+ *
+ * - when the 'si_threadcount' dwindles down to zero the 'cdev' will
+ * be destroyed and the callback will be invoked in a taskqueue
+ * context.
+ *
+ * - the 'devmem' cdevs are destroyed before the virtual machine 'cdev'
+ */
+ SLIST_FOREACH(dsc, &sc->devmem, link) {
+ KASSERT(dsc->cdev != NULL, ("devmem cdev already destroyed"));
+ destroy_dev_sched_cb(dsc->cdev, devmem_destroy, dsc);
+ }
+ destroy_dev_sched_cb(cdev, vmmdev_destroy, sc);
+ error = 0;
+
+out:
+ free(buf, M_VMMDEV);
+ return (error);
+}
+SYSCTL_PROC(_hw_vmm, OID_AUTO, destroy,
+ CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE,
+ NULL, 0, sysctl_vmm_destroy, "A",
+ NULL);
+
+static struct cdevsw vmmdevsw = {
+ .d_name = "vmmdev",
+ .d_version = D_VERSION,
+ .d_ioctl = vmmdev_ioctl,
+ .d_mmap_single = vmmdev_mmap_single,
+ .d_read = vmmdev_rw,
+ .d_write = vmmdev_rw,
+};
+
+static int
+sysctl_vmm_create(SYSCTL_HANDLER_ARGS)
+{
+ struct vm *vm;
+ struct cdev *cdev;
+ struct vmmdev_softc *sc, *sc2;
+ char *buf;
+ int error, buflen;
+
+ error = vmm_priv_check(req->td->td_ucred);
+ if (error)
+ return (error);
+
+ buflen = VM_MAX_NAMELEN + 1;
+ buf = malloc(buflen, M_VMMDEV, M_WAITOK | M_ZERO);
+ strlcpy(buf, "beavis", buflen);
+ error = sysctl_handle_string(oidp, buf, buflen, req);
+ if (error != 0 || req->newptr == NULL)
+ goto out;
+
+ mtx_lock(&vmmdev_mtx);
+ sc = vmmdev_lookup(buf);
+ mtx_unlock(&vmmdev_mtx);
+ if (sc != NULL) {
+ error = EEXIST;
+ goto out;
+ }
+
+ error = vm_create(buf, &vm);
+ if (error != 0)
+ goto out;
+
+ sc = malloc(sizeof(struct vmmdev_softc), M_VMMDEV, M_WAITOK | M_ZERO);
+ sc->vm = vm;
+ SLIST_INIT(&sc->devmem);
+
+ /*
+ * Lookup the name again just in case somebody sneaked in when we
+ * dropped the lock.
+ */
+ mtx_lock(&vmmdev_mtx);
+ sc2 = vmmdev_lookup(buf);
+ if (sc2 == NULL) {
+ SLIST_INSERT_HEAD(&head, sc, link);
+ sc->flags |= VSC_LINKED;
+ }
+ mtx_unlock(&vmmdev_mtx);
+
+ if (sc2 != NULL) {
+ vmmdev_destroy(sc);
+ error = EEXIST;
+ goto out;
+ }
+
+ error = make_dev_p(MAKEDEV_CHECKNAME, &cdev, &vmmdevsw, NULL,
+ UID_ROOT, GID_WHEEL, 0600, "vmm/%s", buf);
+ if (error != 0) {
+ vmmdev_destroy(sc);
+ goto out;
+ }
+
+ mtx_lock(&vmmdev_mtx);
+ sc->cdev = cdev;
+ sc->cdev->si_drv1 = sc;
+ mtx_unlock(&vmmdev_mtx);
+
+out:
+ free(buf, M_VMMDEV);
+ return (error);
+}
+SYSCTL_PROC(_hw_vmm, OID_AUTO, create,
+ CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE,
+ NULL, 0, sysctl_vmm_create, "A",
+ NULL);
+
+void
+vmmdev_init(void)
+{
+ mtx_init(&vmmdev_mtx, "vmm device mutex", NULL, MTX_DEF);
+ pr_allow_flag = prison_add_allow(NULL, "vmm", NULL,
+ "Allow use of vmm in a jail.");
+}
+
+int
+vmmdev_cleanup(void)
+{
+ int error;
+
+ if (SLIST_EMPTY(&head))
+ error = 0;
+ else
+ error = EBUSY;
+
+ return (error);
+}
+
+static int
+devmem_mmap_single(struct cdev *cdev, vm_ooffset_t *offset, vm_size_t len,
+ struct vm_object **objp, int nprot)
+{
+ struct devmem_softc *dsc;
+ vm_ooffset_t first, last;
+ size_t seglen;
+ int error;
+ uint16_t lastcpu;
+ bool sysmem;
+
+ dsc = cdev->si_drv1;
+ if (dsc == NULL) {
+ /* 'cdev' has been created but is not ready for use */
+ return (ENXIO);
+ }
+
+ first = *offset;
+ last = *offset + len;
+ if ((nprot & PROT_EXEC) || first < 0 || first >= last)
+ return (EINVAL);
+
+ lastcpu = vm_get_maxcpus(dsc->sc->vm) - 1;
+ error = vcpu_lock_one(dsc->sc, lastcpu);
+ if (error)
+ return (error);
+
+ error = vm_get_memseg(dsc->sc->vm, dsc->segid, &seglen, &sysmem, objp);
+ KASSERT(error == 0 && !sysmem && *objp != NULL,
+ ("%s: invalid devmem segment %d", __func__, dsc->segid));
+
+ vcpu_unlock_one(dsc->sc, lastcpu);
+
+ if (seglen >= last) {
+ vm_object_reference(*objp);
+ return (0);
+ } else {
+ return (EINVAL);
+ }
+}
+
+static struct cdevsw devmemsw = {
+ .d_name = "devmem",
+ .d_version = D_VERSION,
+ .d_mmap_single = devmem_mmap_single,
+};
+
+static int
+devmem_create_cdev(const char *vmname, int segid, char *devname)
+{
+ struct devmem_softc *dsc;
+ struct vmmdev_softc *sc;
+ struct cdev *cdev;
+ int error;
+
+ error = make_dev_p(MAKEDEV_CHECKNAME, &cdev, &devmemsw, NULL,
+ UID_ROOT, GID_WHEEL, 0600, "vmm.io/%s.%s", vmname, devname);
+ if (error)
+ return (error);
+
+ dsc = malloc(sizeof(struct devmem_softc), M_VMMDEV, M_WAITOK | M_ZERO);
+
+ mtx_lock(&vmmdev_mtx);
+ sc = vmmdev_lookup(vmname);
+ KASSERT(sc != NULL, ("%s: vm %s softc not found", __func__, vmname));
+ if (sc->cdev == NULL) {
+ /* virtual machine is being created or destroyed */
+ mtx_unlock(&vmmdev_mtx);
+ free(dsc, M_VMMDEV);
+ destroy_dev_sched_cb(cdev, NULL, 0);
+ return (ENODEV);
+ }
+
+ dsc->segid = segid;
+ dsc->name = devname;
+ dsc->cdev = cdev;
+ dsc->sc = sc;
+ SLIST_INSERT_HEAD(&sc->devmem, dsc, link);
+ mtx_unlock(&vmmdev_mtx);
+
+ /* The 'cdev' is ready for use after 'si_drv1' is initialized */
+ cdev->si_drv1 = dsc;
+ return (0);
+}
+
+static void
+devmem_destroy(void *arg)
+{
+ struct devmem_softc *dsc = arg;
+
+ KASSERT(dsc->cdev, ("%s: devmem cdev already destroyed", __func__));
+ dsc->cdev = NULL;
+ dsc->sc = NULL;
+}
diff --git a/sys/arm64/vmm/vmm_hyp.c b/sys/arm64/vmm/vmm_hyp.c
new file mode 100644
--- /dev/null
+++ b/sys/arm64/vmm/vmm_hyp.c
@@ -0,0 +1,822 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2021 Andrew Turner
+ *
+ * This work was supported by Innovate UK project 105694, "Digital Security
+ * by Design (DSbD) Technology Platform Prototype".
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+#include <sys/types.h>
+#include <sys/proc.h>
+
+#include <machine/armreg.h>
+
+#include "arm64.h"
+#include "hyp.h"
+
+struct hypctx;
+
+uint64_t vmm_hyp_enter(uint64_t, uint64_t, uint64_t, uint64_t, uint64_t,
+ uint64_t, uint64_t, uint64_t);
+uint64_t vmm_enter_guest(struct hypctx *);
+
+/* TODO: Make this common between this & vfp.h */
+static void
+vfp_store(struct vfpstate *state)
+{
+ __uint128_t *vfp_state;
+ uint64_t fpcr, fpsr;
+
+ vfp_state = state->vfp_regs;
+ __asm __volatile(
+ "mrs %0, fpcr \n"
+ "mrs %1, fpsr \n"
+ "stp q0, q1, [%2, #16 * 0]\n"
+ "stp q2, q3, [%2, #16 * 2]\n"
+ "stp q4, q5, [%2, #16 * 4]\n"
+ "stp q6, q7, [%2, #16 * 6]\n"
+ "stp q8, q9, [%2, #16 * 8]\n"
+ "stp q10, q11, [%2, #16 * 10]\n"
+ "stp q12, q13, [%2, #16 * 12]\n"
+ "stp q14, q15, [%2, #16 * 14]\n"
+ "stp q16, q17, [%2, #16 * 16]\n"
+ "stp q18, q19, [%2, #16 * 18]\n"
+ "stp q20, q21, [%2, #16 * 20]\n"
+ "stp q22, q23, [%2, #16 * 22]\n"
+ "stp q24, q25, [%2, #16 * 24]\n"
+ "stp q26, q27, [%2, #16 * 26]\n"
+ "stp q28, q29, [%2, #16 * 28]\n"
+ "stp q30, q31, [%2, #16 * 30]\n"
+ : "=&r"(fpcr), "=&r"(fpsr) : "r"(vfp_state));
+
+ state->vfp_fpcr = fpcr;
+ state->vfp_fpsr = fpsr;
+}
+
+static void
+vfp_restore(struct vfpstate *state)
+{
+ __uint128_t *vfp_state;
+ uint64_t fpcr, fpsr;
+
+ vfp_state = state->vfp_regs;
+ fpcr = state->vfp_fpcr;
+ fpsr = state->vfp_fpsr;
+
+ __asm __volatile(
+ "ldp q0, q1, [%2, #16 * 0]\n"
+ "ldp q2, q3, [%2, #16 * 2]\n"
+ "ldp q4, q5, [%2, #16 * 4]\n"
+ "ldp q6, q7, [%2, #16 * 6]\n"
+ "ldp q8, q9, [%2, #16 * 8]\n"
+ "ldp q10, q11, [%2, #16 * 10]\n"
+ "ldp q12, q13, [%2, #16 * 12]\n"
+ "ldp q14, q15, [%2, #16 * 14]\n"
+ "ldp q16, q17, [%2, #16 * 16]\n"
+ "ldp q18, q19, [%2, #16 * 18]\n"
+ "ldp q20, q21, [%2, #16 * 20]\n"
+ "ldp q22, q23, [%2, #16 * 22]\n"
+ "ldp q24, q25, [%2, #16 * 24]\n"
+ "ldp q26, q27, [%2, #16 * 26]\n"
+ "ldp q28, q29, [%2, #16 * 28]\n"
+ "ldp q30, q31, [%2, #16 * 30]\n"
+ "msr fpcr, %0 \n"
+ "msr fpsr, %1 \n"
+ : : "r"(fpcr), "r"(fpsr), "r"(vfp_state));
+}
+
+static void
+vmm_hyp_reg_store(struct hypctx *hypctx, struct hyp *hyp, bool guest)
+{
+ uint64_t dfr0;
+
+ /* Store the guest VFP registers */
+ if (guest) {
+ vfp_store(&hypctx->vfpstate);
+
+ /* Store the timer registers */
+ hypctx->vtimer_cpu.cntkctl_el1 = READ_SPECIALREG(cntkctl_el1);
+ hypctx->vtimer_cpu.virt_timer.cntx_cval_el0 =
+ READ_SPECIALREG(cntv_cval_el0);
+ hypctx->vtimer_cpu.virt_timer.cntx_ctl_el0 =
+ READ_SPECIALREG(cntv_ctl_el0);
+
+ /* Store the GICv3 registers */
+ hypctx->vgic_cpu_if.ich_eisr_el2 =
+ READ_SPECIALREG(ich_eisr_el2);
+ hypctx->vgic_cpu_if.ich_elrsr_el2 =
+ READ_SPECIALREG(ich_elrsr_el2);
+ hypctx->vgic_cpu_if.ich_hcr_el2 = READ_SPECIALREG(ich_hcr_el2);
+ hypctx->vgic_cpu_if.ich_misr_el2 =
+ READ_SPECIALREG(ich_misr_el2);
+ hypctx->vgic_cpu_if.ich_vmcr_el2 =
+ READ_SPECIALREG(ich_vmcr_el2);
+ switch(hypctx->vgic_cpu_if.ich_lr_num - 1) {
+#define STORE_LR(x) \
+ case x: \
+ hypctx->vgic_cpu_if.ich_lr_el2[x] = \
+ READ_SPECIALREG(ich_lr ## x ##_el2)
+ STORE_LR(15);
+ STORE_LR(14);
+ STORE_LR(13);
+ STORE_LR(12);
+ STORE_LR(11);
+ STORE_LR(10);
+ STORE_LR(9);
+ STORE_LR(8);
+ STORE_LR(7);
+ STORE_LR(6);
+ STORE_LR(5);
+ STORE_LR(4);
+ STORE_LR(3);
+ STORE_LR(2);
+ STORE_LR(1);
+ default:
+ STORE_LR(0);
+#undef STORE_LR
+ }
+
+ switch(hypctx->vgic_cpu_if.ich_apr_num - 1) {
+#define STORE_APR(x) \
+ case x: \
+ hypctx->vgic_cpu_if.ich_ap0r_el2[x] = \
+ READ_SPECIALREG(ich_ap0r ## x ##_el2); \
+ hypctx->vgic_cpu_if.ich_ap1r_el2[x] = \
+ READ_SPECIALREG(ich_ap1r ## x ##_el2)
+ STORE_APR(3);
+ STORE_APR(2);
+ STORE_APR(1);
+ default:
+ STORE_APR(0);
+#undef STORE_APR
+ }
+ }
+
+ dfr0 = READ_SPECIALREG(id_aa64dfr0_el1);
+ switch(ID_AA64DFR0_BRPs_VAL(dfr0) - 1) {
+#define STORE_DBG_BRP(x) \
+ case x: \
+ hypctx->dbgbcr_el1[x] = \
+ READ_SPECIALREG(dbgbcr ## x ## _el1); \
+ hypctx->dbgbvr_el1[x] = \
+ READ_SPECIALREG(dbgbvr ## x ## _el1)
+ STORE_DBG_BRP(15);
+ STORE_DBG_BRP(14);
+ STORE_DBG_BRP(13);
+ STORE_DBG_BRP(12);
+ STORE_DBG_BRP(11);
+ STORE_DBG_BRP(10);
+ STORE_DBG_BRP(9);
+ STORE_DBG_BRP(8);
+ STORE_DBG_BRP(7);
+ STORE_DBG_BRP(6);
+ STORE_DBG_BRP(5);
+ STORE_DBG_BRP(4);
+ STORE_DBG_BRP(3);
+ STORE_DBG_BRP(2);
+ STORE_DBG_BRP(1);
+ default:
+ STORE_DBG_BRP(0);
+#undef STORE_DBG_BRP
+ }
+
+ switch(ID_AA64DFR0_WRPs_VAL(dfr0) - 1) {
+#define STORE_DBG_WRP(x) \
+ case x: \
+ hypctx->dbgwcr_el1[x] = \
+ READ_SPECIALREG(dbgwcr ## x ## _el1); \
+ hypctx->dbgwvr_el1[x] = \
+ READ_SPECIALREG(dbgwvr ## x ## _el1)
+ STORE_DBG_WRP(15);
+ STORE_DBG_WRP(14);
+ STORE_DBG_WRP(13);
+ STORE_DBG_WRP(12);
+ STORE_DBG_WRP(11);
+ STORE_DBG_WRP(10);
+ STORE_DBG_WRP(9);
+ STORE_DBG_WRP(8);
+ STORE_DBG_WRP(7);
+ STORE_DBG_WRP(6);
+ STORE_DBG_WRP(5);
+ STORE_DBG_WRP(4);
+ STORE_DBG_WRP(3);
+ STORE_DBG_WRP(2);
+ STORE_DBG_WRP(1);
+ default:
+ STORE_DBG_WRP(0);
+#undef STORE_DBG_WRP
+ }
+
+ /* Store the PMU registers */
+ hypctx->pmcr_el0 = READ_SPECIALREG(pmcr_el0);
+ hypctx->pmccntr_el0 = READ_SPECIALREG(pmccntr_el0);
+ hypctx->pmccfiltr_el0 = READ_SPECIALREG(pmccfiltr_el0);
+ hypctx->pmcntenset_el0 = READ_SPECIALREG(pmcntenset_el0);
+ hypctx->pmintenset_el1 = READ_SPECIALREG(pmintenset_el1);
+ hypctx->pmovsset_el0 = READ_SPECIALREG(pmovsset_el0);
+ hypctx->pmuserenr_el0 = READ_SPECIALREG(pmuserenr_el0);
+ switch ((hypctx->pmcr_el0 & PMCR_N_MASK) >> PMCR_N_SHIFT) {
+#define STORE_PMU(x) \
+ case (x + 1): \
+ hypctx->pmevcntr_el0[x] = \
+ READ_SPECIALREG(pmevcntr ## x ## _el0); \
+ hypctx->pmevtyper_el0[x] = \
+ READ_SPECIALREG(pmevtyper ## x ## _el0)
+ STORE_PMU(30);
+ STORE_PMU(29);
+ STORE_PMU(28);
+ STORE_PMU(27);
+ STORE_PMU(26);
+ STORE_PMU(25);
+ STORE_PMU(24);
+ STORE_PMU(23);
+ STORE_PMU(22);
+ STORE_PMU(21);
+ STORE_PMU(20);
+ STORE_PMU(19);
+ STORE_PMU(18);
+ STORE_PMU(17);
+ STORE_PMU(16);
+ STORE_PMU(15);
+ STORE_PMU(14);
+ STORE_PMU(13);
+ STORE_PMU(12);
+ STORE_PMU(11);
+ STORE_PMU(10);
+ STORE_PMU(9);
+ STORE_PMU(8);
+ STORE_PMU(7);
+ STORE_PMU(6);
+ STORE_PMU(5);
+ STORE_PMU(4);
+ STORE_PMU(3);
+ STORE_PMU(2);
+ STORE_PMU(1);
+ STORE_PMU(0);
+ default: /* N == 0 when only PMCCNTR_EL0 is available */
+ break;
+#undef STORE_PMU
+ }
+
+ /* Store the special to from the trapframe */
+ hypctx->tf.tf_sp = READ_SPECIALREG(sp_el1);
+ hypctx->tf.tf_elr = READ_SPECIALREG(elr_el2);
+ hypctx->tf.tf_spsr = READ_SPECIALREG(spsr_el2);
+ if (guest) {
+ hypctx->tf.tf_esr = READ_SPECIALREG(esr_el2);
+ }
+
+ /* Store the guest special registers */
+ hypctx->elr_el1 = READ_SPECIALREG(elr_el1);
+ hypctx->sp_el0 = READ_SPECIALREG(sp_el0);
+ hypctx->tpidr_el0 = READ_SPECIALREG(tpidr_el0);
+ hypctx->tpidrro_el0 = READ_SPECIALREG(tpidrro_el0);
+ hypctx->tpidr_el1 = READ_SPECIALREG(tpidr_el1);
+ hypctx->vbar_el1 = READ_SPECIALREG(vbar_el1);
+
+ hypctx->actlr_el1 = READ_SPECIALREG(actlr_el1);
+ hypctx->afsr0_el1 = READ_SPECIALREG(afsr0_el1);
+ hypctx->afsr1_el1 = READ_SPECIALREG(afsr1_el1);
+ hypctx->amair_el1 = READ_SPECIALREG(amair_el1);
+ hypctx->contextidr_el1 = READ_SPECIALREG(contextidr_el1);
+ hypctx->cpacr_el1 = READ_SPECIALREG(cpacr_el1);
+ hypctx->csselr_el1 = READ_SPECIALREG(csselr_el1);
+ hypctx->esr_el1 = READ_SPECIALREG(esr_el1);
+ hypctx->far_el1 = READ_SPECIALREG(far_el1);
+ hypctx->mair_el1 = READ_SPECIALREG(mair_el1);
+ hypctx->mdccint_el1 = READ_SPECIALREG(mdccint_el1);
+ hypctx->mdscr_el1 = READ_SPECIALREG(mdscr_el1);
+ hypctx->par_el1 = READ_SPECIALREG(par_el1);
+ hypctx->sctlr_el1 = READ_SPECIALREG(sctlr_el1);
+ hypctx->spsr_el1 = READ_SPECIALREG(spsr_el1);
+ hypctx->tcr_el1 = READ_SPECIALREG(tcr_el1);
+ hypctx->ttbr0_el1 = READ_SPECIALREG(ttbr0_el1);
+ hypctx->ttbr1_el1 = READ_SPECIALREG(ttbr1_el1);
+
+ hypctx->cptr_el2 = READ_SPECIALREG(cptr_el2);
+ hypctx->hcr_el2 = READ_SPECIALREG(hcr_el2);
+ hypctx->vpidr_el2 = READ_SPECIALREG(vpidr_el2);
+ hypctx->vmpidr_el2 = READ_SPECIALREG(vmpidr_el2);
+}
+
+static void
+vmm_hyp_reg_restore(struct hypctx *hypctx, struct hyp *hyp, bool guest)
+{
+ uint64_t dfr0;
+
+ /* Restore the special registers */
+ WRITE_SPECIALREG(elr_el1, hypctx->elr_el1);
+ WRITE_SPECIALREG(sp_el0, hypctx->sp_el0);
+ WRITE_SPECIALREG(tpidr_el0, hypctx->tpidr_el0);
+ WRITE_SPECIALREG(tpidrro_el0, hypctx->tpidrro_el0);
+ WRITE_SPECIALREG(tpidr_el1, hypctx->tpidr_el1);
+ WRITE_SPECIALREG(vbar_el1, hypctx->vbar_el1);
+
+ WRITE_SPECIALREG(actlr_el1, hypctx->actlr_el1);
+ WRITE_SPECIALREG(afsr0_el1, hypctx->afsr0_el1);
+ WRITE_SPECIALREG(afsr1_el1, hypctx->afsr1_el1);
+ WRITE_SPECIALREG(amair_el1, hypctx->amair_el1);
+ WRITE_SPECIALREG(contextidr_el1, hypctx->contextidr_el1);
+ WRITE_SPECIALREG(cpacr_el1, hypctx->cpacr_el1);
+ WRITE_SPECIALREG(csselr_el1, hypctx->csselr_el1);
+ WRITE_SPECIALREG(esr_el1, hypctx->esr_el1);
+ WRITE_SPECIALREG(far_el1, hypctx->far_el1);
+ WRITE_SPECIALREG(mdccint_el1, hypctx->mdccint_el1);
+ WRITE_SPECIALREG(mdscr_el1, hypctx->mdscr_el1);
+ WRITE_SPECIALREG(mair_el1, hypctx->mair_el1);
+ WRITE_SPECIALREG(par_el1, hypctx->par_el1);
+ WRITE_SPECIALREG(sctlr_el1, hypctx->sctlr_el1);
+ WRITE_SPECIALREG(tcr_el1, hypctx->tcr_el1);
+ WRITE_SPECIALREG(ttbr0_el1, hypctx->ttbr0_el1);
+ WRITE_SPECIALREG(ttbr1_el1, hypctx->ttbr1_el1);
+ WRITE_SPECIALREG(spsr_el1, hypctx->spsr_el1);
+
+ WRITE_SPECIALREG(cptr_el2, hypctx->cptr_el2);
+ WRITE_SPECIALREG(hcr_el2, hypctx->hcr_el2);
+ WRITE_SPECIALREG(vpidr_el2, hypctx->vpidr_el2);
+ WRITE_SPECIALREG(vmpidr_el2, hypctx->vmpidr_el2);
+
+ /* Load the special regs from the trapframe */
+ WRITE_SPECIALREG(sp_el1, hypctx->tf.tf_sp);
+ WRITE_SPECIALREG(elr_el2, hypctx->tf.tf_elr);
+ WRITE_SPECIALREG(spsr_el2, hypctx->tf.tf_spsr);
+
+ /* Restore the PMU registers */
+ WRITE_SPECIALREG(pmcr_el0, hypctx->pmcr_el0);
+ WRITE_SPECIALREG(pmccntr_el0, hypctx->pmccntr_el0);
+ WRITE_SPECIALREG(pmccfiltr_el0, hypctx->pmccfiltr_el0);
+ /* Clear all events/interrupts then enable them */
+ WRITE_SPECIALREG(pmcntenclr_el0, 0xfffffffful);
+ WRITE_SPECIALREG(pmcntenset_el0, hypctx->pmcntenset_el0);
+ WRITE_SPECIALREG(pmintenclr_el1, 0xfffffffful);
+ WRITE_SPECIALREG(pmintenset_el1, hypctx->pmintenset_el1);
+ WRITE_SPECIALREG(pmovsclr_el0, 0xfffffffful);
+ WRITE_SPECIALREG(pmovsset_el0, hypctx->pmovsset_el0);
+
+ switch ((hypctx->pmcr_el0 & PMCR_N_MASK) >> PMCR_N_SHIFT) {
+#define LOAD_PMU(x) \
+ case (x + 1): \
+ WRITE_SPECIALREG(pmevcntr ## x ## _el0, \
+ hypctx->pmevcntr_el0[x]); \
+ WRITE_SPECIALREG(pmevtyper ## x ## _el0, \
+ hypctx->pmevtyper_el0[x])
+ LOAD_PMU(30);
+ LOAD_PMU(29);
+ LOAD_PMU(28);
+ LOAD_PMU(27);
+ LOAD_PMU(26);
+ LOAD_PMU(25);
+ LOAD_PMU(24);
+ LOAD_PMU(23);
+ LOAD_PMU(22);
+ LOAD_PMU(21);
+ LOAD_PMU(20);
+ LOAD_PMU(19);
+ LOAD_PMU(18);
+ LOAD_PMU(17);
+ LOAD_PMU(16);
+ LOAD_PMU(15);
+ LOAD_PMU(14);
+ LOAD_PMU(13);
+ LOAD_PMU(12);
+ LOAD_PMU(11);
+ LOAD_PMU(10);
+ LOAD_PMU(9);
+ LOAD_PMU(8);
+ LOAD_PMU(7);
+ LOAD_PMU(6);
+ LOAD_PMU(5);
+ LOAD_PMU(4);
+ LOAD_PMU(3);
+ LOAD_PMU(2);
+ LOAD_PMU(1);
+ LOAD_PMU(0);
+ default: /* N == 0 when only PMCCNTR_EL0 is available */
+ break;
+#undef LOAD_PMU
+ }
+
+ dfr0 = READ_SPECIALREG(id_aa64dfr0_el1);
+ switch(ID_AA64DFR0_BRPs_VAL(dfr0) - 1) {
+#define LOAD_DBG_BRP(x) \
+ case x: \
+ WRITE_SPECIALREG(dbgbcr ## x ## _el1, \
+ hypctx->dbgbcr_el1[x]); \
+ WRITE_SPECIALREG(dbgbvr ## x ## _el1, \
+ hypctx->dbgbvr_el1[x])
+ LOAD_DBG_BRP(15);
+ LOAD_DBG_BRP(14);
+ LOAD_DBG_BRP(13);
+ LOAD_DBG_BRP(12);
+ LOAD_DBG_BRP(11);
+ LOAD_DBG_BRP(10);
+ LOAD_DBG_BRP(9);
+ LOAD_DBG_BRP(8);
+ LOAD_DBG_BRP(7);
+ LOAD_DBG_BRP(6);
+ LOAD_DBG_BRP(5);
+ LOAD_DBG_BRP(4);
+ LOAD_DBG_BRP(3);
+ LOAD_DBG_BRP(2);
+ LOAD_DBG_BRP(1);
+ default:
+ LOAD_DBG_BRP(0);
+#undef LOAD_DBG_BRP
+ }
+
+ switch(ID_AA64DFR0_WRPs_VAL(dfr0) - 1) {
+#define LOAD_DBG_WRP(x) \
+ case x: \
+ WRITE_SPECIALREG(dbgwcr ## x ## _el1, \
+ hypctx->dbgwcr_el1[x]); \
+ WRITE_SPECIALREG(dbgwvr ## x ## _el1, \
+ hypctx->dbgwvr_el1[x])
+ LOAD_DBG_WRP(15);
+ LOAD_DBG_WRP(14);
+ LOAD_DBG_WRP(13);
+ LOAD_DBG_WRP(12);
+ LOAD_DBG_WRP(11);
+ LOAD_DBG_WRP(10);
+ LOAD_DBG_WRP(9);
+ LOAD_DBG_WRP(8);
+ LOAD_DBG_WRP(7);
+ LOAD_DBG_WRP(6);
+ LOAD_DBG_WRP(5);
+ LOAD_DBG_WRP(4);
+ LOAD_DBG_WRP(3);
+ LOAD_DBG_WRP(2);
+ LOAD_DBG_WRP(1);
+ default:
+ LOAD_DBG_WRP(0);
+#undef LOAD_DBG_WRP
+ }
+
+ if (guest) {
+ /* Load the timer registers */
+ WRITE_SPECIALREG(cntkctl_el1, hypctx->vtimer_cpu.cntkctl_el1);
+ WRITE_SPECIALREG(cntv_cval_el0,
+ hypctx->vtimer_cpu.virt_timer.cntx_cval_el0);
+ WRITE_SPECIALREG(cntv_ctl_el0,
+ hypctx->vtimer_cpu.virt_timer.cntx_ctl_el0);
+ WRITE_SPECIALREG(cnthctl_el2, hyp->vtimer.cnthctl_el2);
+ WRITE_SPECIALREG(cntvoff_el2, hyp->vtimer.cntvoff_el2);
+
+ /* Load the GICv3 registers */
+ WRITE_SPECIALREG(ich_hcr_el2, hypctx->vgic_cpu_if.ich_hcr_el2);
+ WRITE_SPECIALREG(ich_vmcr_el2,
+ hypctx->vgic_cpu_if.ich_vmcr_el2);
+ switch(hypctx->vgic_cpu_if.ich_lr_num - 1) {
+#define LOAD_LR(x) \
+ case x: \
+ WRITE_SPECIALREG(ich_lr ## x ##_el2, \
+ hypctx->vgic_cpu_if.ich_lr_el2[x])
+ LOAD_LR(15);
+ LOAD_LR(14);
+ LOAD_LR(13);
+ LOAD_LR(12);
+ LOAD_LR(11);
+ LOAD_LR(10);
+ LOAD_LR(9);
+ LOAD_LR(8);
+ LOAD_LR(7);
+ LOAD_LR(6);
+ LOAD_LR(5);
+ LOAD_LR(4);
+ LOAD_LR(3);
+ LOAD_LR(2);
+ LOAD_LR(1);
+ default:
+ LOAD_LR(0);
+#undef LOAD_LR
+ }
+
+ switch(hypctx->vgic_cpu_if.ich_apr_num - 1) {
+#define LOAD_APR(x) \
+ case x: \
+ WRITE_SPECIALREG(ich_ap0r ## x ##_el2, \
+ hypctx->vgic_cpu_if.ich_ap0r_el2[x]); \
+ WRITE_SPECIALREG(ich_ap1r ## x ##_el2, \
+ hypctx->vgic_cpu_if.ich_ap1r_el2[x])
+ LOAD_APR(3);
+ LOAD_APR(2);
+ LOAD_APR(1);
+ default:
+ LOAD_APR(0);
+#undef LOAD_APR
+ }
+
+ /* Load the guest VFP registers */
+ vfp_restore(&hypctx->vfpstate);
+ }
+}
+
+static uint64_t
+vmm_hyp_call_guest(struct hyp *hyp, int vcpu)
+{
+ struct hypctx host_hypctx;
+ struct hypctx *hypctx;
+ uint64_t cntvoff_el2;
+ uint64_t ich_hcr_el2, ich_vmcr_el2, cnthctl_el2, cntkctl_el1;
+ uint64_t ret;
+ uint64_t s1e1r, hpfar_el2;
+ bool hpfar_valid;
+
+ vmm_hyp_reg_store(&host_hypctx, NULL, false);
+
+ /* TODO: Check cpuid is valid */
+ hypctx = &hyp->ctx[vcpu];
+
+ /* Save the host special registers */
+ cnthctl_el2 = READ_SPECIALREG(cnthctl_el2);
+ cntkctl_el1 = READ_SPECIALREG(cntkctl_el1);
+ cntvoff_el2 = READ_SPECIALREG(cntvoff_el2);
+
+ ich_hcr_el2 = READ_SPECIALREG(ich_hcr_el2);
+ ich_vmcr_el2 = READ_SPECIALREG(ich_vmcr_el2);
+
+ vmm_hyp_reg_restore(hypctx, hyp, true);
+
+ /* Load the common hypervisor registers */
+ WRITE_SPECIALREG(vttbr_el2, hyp->vttbr_el2);
+
+ host_hypctx.mdcr_el2 = READ_SPECIALREG(mdcr_el2);
+ WRITE_SPECIALREG(mdcr_el2, hypctx->mdcr_el2);
+
+ /* Call into the guest */
+ ret = vmm_enter_guest(hypctx);
+
+ WRITE_SPECIALREG(mdcr_el2, host_hypctx.mdcr_el2);
+ isb();
+
+ /* Store the exit info */
+ hypctx->exit_info.far_el2 = READ_SPECIALREG(far_el2);
+ hpfar_valid = true;
+ if (ret == EXCP_TYPE_EL1_SYNC) {
+ switch(ESR_ELx_EXCEPTION(hypctx->tf.tf_esr)) {
+ case EXCP_INSN_ABORT_L:
+ case EXCP_DATA_ABORT_L:
+ /*
+ * The hpfar_el2 register is valid for:
+ * - Translaation and Access faults.
+ * - Translaation, Access, and permission faults on
+ * the translation table walk on the stage 1 tables.
+ * - A stage 2 Address size fault.
+ *
+ * As we only need it in the first 2 cases we can just
+ * exclude it on permission faults that are not from
+ * the stage 1 table walk.
+ *
+ * TODO: Add a case for Arm erratum 834220.
+ */
+ if ((hypctx->tf.tf_esr & ISS_DATA_S1PTW) != 0)
+ break;
+ switch(hypctx->tf.tf_esr & ISS_DATA_DFSC_MASK) {
+ case ISS_DATA_DFSC_PF_L1:
+ case ISS_DATA_DFSC_PF_L2:
+ case ISS_DATA_DFSC_PF_L3:
+ hpfar_valid = false;
+ break;
+ }
+ break;
+ }
+ }
+ if (hpfar_valid) {
+ hypctx->exit_info.hpfar_el2 = READ_SPECIALREG(hpfar_el2);
+ } else {
+ /*
+ * TODO: There is a risk the at instruction could cause an
+ * exception here. We should handle it & return a failure.
+ */
+ s1e1r =
+ arm64_address_translate_s1e1r(hypctx->exit_info.far_el2);
+ if (PAR_SUCCESS(s1e1r)) {
+ hpfar_el2 = (s1e1r & PAR_PA_MASK) >> PAR_PA_SHIFT;
+ hpfar_el2 <<= HPFAR_EL2_FIPA_SHIFT;
+ hypctx->exit_info.hpfar_el2 = hpfar_el2;
+ } else {
+ ret = EXCP_TYPE_REENTER;
+ }
+ }
+
+ vmm_hyp_reg_store(hypctx, hyp, true);
+
+ vmm_hyp_reg_restore(&host_hypctx, NULL, false);
+
+ /* Restore the host special registers */
+ WRITE_SPECIALREG(ich_hcr_el2, ich_hcr_el2);
+ WRITE_SPECIALREG(ich_vmcr_el2, ich_vmcr_el2);
+
+ WRITE_SPECIALREG(cnthctl_el2, cnthctl_el2);
+ WRITE_SPECIALREG(cntkctl_el1, cntkctl_el1);
+ WRITE_SPECIALREG(cntvoff_el2, cntvoff_el2);
+
+ return (ret);
+}
+
+static uint64_t
+vmm_hyp_read_reg(uint64_t reg)
+{
+ switch(reg) {
+ case HYP_REG_ICH_VTR:
+ return (READ_SPECIALREG(ich_vtr_el2));
+ case HYP_REG_CNTHCTL:
+ return (READ_SPECIALREG(cnthctl_el2));
+ }
+
+ return (0);
+}
+
+static bool
+vmm_is_vpipt_cache(void)
+{
+ /* TODO: Implement */
+ return (0);
+}
+
+static int
+vmm_clean_s2_tlbi(void)
+{
+ dsb(ishst);
+ __asm __volatile("tlbi alle1is");
+
+ /*
+ * If we have a VPIPT icache it will use the VMID to tag cachelines.
+ * As we are changing the allocated VMIDs we need to invalidate the
+ * icache lines containing all old values.
+ */
+ if (vmm_is_vpipt_cache())
+ __asm __volatile("ic ialluis");
+ dsb(ish);
+
+ return (0);
+}
+
+static int
+vm_s2_tlbi_range(uint64_t vttbr, vm_offset_t sva, vm_size_t eva,
+ bool final_only)
+{
+ uint64_t end, r, start;
+ uint64_t host_vttbr;
+
+#define TLBI_VA_SHIFT 12
+#define TLBI_VA_MASK ((1ul << 44) - 1)
+#define TLBI_VA(addr) (((addr) >> TLBI_VA_SHIFT) & TLBI_VA_MASK)
+#define TLBI_VA_L3_INCR (L3_SIZE >> TLBI_VA_SHIFT)
+
+ /* Switch to the guest vttbr */
+ /* TODO: Handle Cortex-A57/A72 erratum 131936 */
+ host_vttbr = READ_SPECIALREG(vttbr_el2);
+ WRITE_SPECIALREG(vttbr_el2, vttbr);
+ isb();
+
+ /*
+ * The CPU can cache the stage 1 + 2 combination so we need to ensure
+ * the stage 2 is invalidated first, then when this has completed we
+ * invalidate the stage 1 TLB. As we don't know which stage 1 virtual
+ * addresses point at the stage 2 IPA we need to invalidate the entire
+ * stage 1 TLB.
+ */
+
+ start = TLBI_VA(sva);
+ end = TLBI_VA(eva);
+ for (r = start; r < end; r += TLBI_VA_L3_INCR) {
+ /* Invalidate the stage 2 TLB entry */
+ if (final_only)
+ __asm __volatile("tlbi ipas2le1is, %0" : : "r"(r));
+ else
+ __asm __volatile("tlbi ipas2e1is, %0" : : "r"(r));
+ }
+ /* Ensure the entry has been invalidated */
+ dsb(ish);
+ /* Invalidate the stage 1 TLB. */
+ __asm __volatile("tlbi vmalle1is");
+ dsb(ish);
+ isb();
+
+ /* Switch back t othe host vttbr */
+ WRITE_SPECIALREG(vttbr_el2, host_vttbr);
+ isb();
+
+ return (0);
+}
+
+static int
+vm_s2_tlbi_all(uint64_t vttbr)
+{
+ uint64_t host_vttbr;
+
+ /* Switch to the guest vttbr */
+ /* TODO: Handle Cortex-A57/A72 erratum 131936 */
+ host_vttbr = READ_SPECIALREG(vttbr_el2);
+ WRITE_SPECIALREG(vttbr_el2, vttbr);
+ isb();
+
+ __asm __volatile("tlbi vmalls12e1is");
+ dsb(ish);
+ isb();
+
+ /* Switch back t othe host vttbr */
+ WRITE_SPECIALREG(vttbr_el2, host_vttbr);
+ isb();
+
+ return (0);
+}
+
+static int
+vmm_dc_civac(uint64_t start, uint64_t len)
+{
+ size_t line_size, end;
+ uint64_t ctr;
+
+ ctr = READ_SPECIALREG(ctr_el0);
+ line_size = sizeof(int) << CTR_DLINE_SIZE(ctr);
+ end = start + len;
+ dsb(ishst);
+ /* Clean and Invalidate the D-cache */
+ for (; start < end; start += line_size)
+ __asm __volatile("dc civac, %0" :: "r" (start) : "memory");
+ dsb(ish);
+ return (0);
+}
+
+static int
+vmm_el2_tlbi(uint64_t type, uint64_t start, uint64_t len)
+{
+ uint64_t end, r;
+
+ dsb(ishst);
+ switch (type) {
+ default:
+ case HYP_EL2_TLBI_ALL:
+ __asm __volatile("tlbi alle2" ::: "memory");
+ break;
+ case HYP_EL2_TLBI_VA:
+ end = (start + len) >> 12;
+ start >>= 12;
+ while (start < end) {
+ /* TODO: Use new macros when merged past them */
+ r = start & 0xffffffffffful;
+ __asm __volatile("tlbi vae2is, %0" :: "r"(r));
+ start += PAGE_SIZE;
+ }
+ break;
+ }
+ dsb(ish);
+
+ return (0);
+}
+
+uint64_t
+vmm_hyp_enter(uint64_t handle, uint64_t x1, uint64_t x2, uint64_t x3,
+ uint64_t x4, uint64_t x5, uint64_t x6, uint64_t x7)
+{
+ uint64_t ret;
+
+ switch (handle) {
+ case HYP_ENTER_GUEST:
+ do {
+ ret = vmm_hyp_call_guest((struct hyp *)x1, x2);
+ } while (ret == EXCP_TYPE_REENTER);
+ return (ret);
+ case HYP_READ_REGISTER:
+ return (vmm_hyp_read_reg(x1));
+ case HYP_CLEAN_S2_TLBI:
+ return (vmm_clean_s2_tlbi());
+ case HYP_DC_CIVAC:
+ return (vmm_dc_civac(x1, x2));
+ case HYP_EL2_TLBI:
+ return (vmm_el2_tlbi(x1, x2, x3));
+ case HYP_S2_TLBI_RANGE:
+ return (vm_s2_tlbi_range(x1, x2, x3, x4));
+ case HYP_S2_TLBI_ALL:
+ return (vm_s2_tlbi_all(x1));
+ case HYP_CLEANUP: /* Handled in vmm_hyp_exception.S */
+ default:
+ break;
+ }
+
+ return (0);
+}
diff --git a/sys/arm64/vmm/vmm_hyp_el2.S b/sys/arm64/vmm/vmm_hyp_el2.S
new file mode 100644
--- /dev/null
+++ b/sys/arm64/vmm/vmm_hyp_el2.S
@@ -0,0 +1,39 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2021 Andrew Turner
+ *
+ * This work was supported by Innovate UK project 105694, "Digital Security
+ * by Design (DSbD) Technology Platform Prototype".
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <machine/param.h>
+
+ .rodata
+ .align PAGE_SHIFT
+ .globl vmm_hyp_code
+vmm_hyp_code:
+ .incbin "vmm_hyp_blob.bin"
+ .globl vmm_hyp_code_end
+vmm_hyp_code_end:
diff --git a/sys/arm64/vmm/vmm_hyp_exception.S b/sys/arm64/vmm/vmm_hyp_exception.S
new file mode 100644
--- /dev/null
+++ b/sys/arm64/vmm/vmm_hyp_exception.S
@@ -0,0 +1,383 @@
+/*
+ * Copyright (C) 2017 Alexandru Elisei <alexandru.elisei@gmail.com>
+ * All rights reserved.
+ * Copyright (c) 2021 Andrew Turner
+ *
+ * This software was developed by Alexandru Elisei under sponsorship
+ * from the FreeBSD Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+
+#include <machine/asm.h>
+#include <machine/hypervisor.h>
+
+#include "assym.inc"
+#include "hyp.h"
+
+.macro save_host_registers
+ /* TODO: Only store callee saved registers */
+ sub sp, sp, #(32 * 8)
+ str x30, [sp, #(30 * 8)]
+ stp x28, x29, [sp, #(28 * 8)]
+ stp x26, x27, [sp, #(26 * 8)]
+ stp x24, x25, [sp, #(24 * 8)]
+ stp x22, x23, [sp, #(22 * 8)]
+ stp x20, x21, [sp, #(20 * 8)]
+ stp x18, x19, [sp, #(18 * 8)]
+ stp x16, x17, [sp, #(16 * 8)]
+ stp x14, x15, [sp, #(14 * 8)]
+ stp x12, x13, [sp, #(12 * 8)]
+ stp x10, x11, [sp, #(10 * 8)]
+ stp x8, x9, [sp, #(8 * 8)]
+ stp x6, x7, [sp, #(6 * 8)]
+ stp x4, x5, [sp, #(4 * 8)]
+ stp x2, x3, [sp, #(2 * 8)]
+ stp x0, x1, [sp, #(0 * 8)]
+.endm
+
+.macro restore_host_registers
+ /* TODO: Only restore callee saved registers */
+ ldp x0, x1, [sp, #(0 * 8)]
+ ldp x2, x3, [sp, #(2 * 8)]
+ ldp x4, x5, [sp, #(4 * 8)]
+ ldp x6, x7, [sp, #(6 * 8)]
+ ldp x8, x9, [sp, #(8 * 8)]
+ ldp x10, x11, [sp, #(10 * 8)]
+ ldp x12, x13, [sp, #(12 * 8)]
+ ldp x14, x15, [sp, #(14 * 8)]
+ ldp x16, x17, [sp, #(16 * 8)]
+ ldp x18, x19, [sp, #(18 * 8)]
+ ldp x20, x21, [sp, #(20 * 8)]
+ ldp x22, x23, [sp, #(22 * 8)]
+ ldp x24, x25, [sp, #(24 * 8)]
+ ldp x26, x27, [sp, #(26 * 8)]
+ ldp x28, x29, [sp, #(28 * 8)]
+ ldr x30, [sp, #(30 * 8)]
+ add sp, sp, #(32 * 8)
+.endm
+
+.macro save_guest_registers
+ /* Back up x0 so we can use it as a temporary register */
+ stp x0, x1, [sp, #-(2 * 8)]!
+
+ /* Restore the hypctx pointer */
+ mrs x0, tpidr_el2
+
+ stp x2, x3, [x0, #(TF_X + 2 * 8)]
+ stp x4, x5, [x0, #(TF_X + 4 * 8)]
+ stp x6, x7, [x0, #(TF_X + 6 * 8)]
+ stp x8, x9, [x0, #(TF_X + 8 * 8)]
+ stp x10, x11, [x0, #(TF_X + 10 * 8)]
+ stp x12, x13, [x0, #(TF_X + 12 * 8)]
+ stp x14, x15, [x0, #(TF_X + 14 * 8)]
+ stp x16, x17, [x0, #(TF_X + 16 * 8)]
+ stp x18, x19, [x0, #(TF_X + 18 * 8)]
+ stp x20, x21, [x0, #(TF_X + 20 * 8)]
+ stp x22, x23, [x0, #(TF_X + 22 * 8)]
+ stp x24, x25, [x0, #(TF_X + 24 * 8)]
+ stp x26, x27, [x0, #(TF_X + 26 * 8)]
+ stp x28, x29, [x0, #(TF_X + 28 * 8)]
+
+ str lr, [x0, #(TF_LR)]
+
+ /* Restore the saved x0 & x1 and save them */
+ ldp x2, x3, [sp], #(2 * 8)
+ stp x2, x3, [x0, #(TF_X + 0 * 8)]
+.endm
+
+.macro restore_guest_registers
+ /*
+ * Copy the guest x0 and x1 to the stack so we can restore them
+ * after loading the other registers.
+ */
+ ldp x2, x3, [x0, #(TF_X + 0 * 8)]
+ stp x2, x3, [sp, #-(2 * 8)]!
+
+ ldr lr, [x0, #(TF_LR)]
+
+ ldp x28, x29, [x0, #(TF_X + 28 * 8)]
+ ldp x26, x27, [x0, #(TF_X + 26 * 8)]
+ ldp x24, x25, [x0, #(TF_X + 24 * 8)]
+ ldp x22, x23, [x0, #(TF_X + 22 * 8)]
+ ldp x20, x21, [x0, #(TF_X + 20 * 8)]
+ ldp x18, x19, [x0, #(TF_X + 18 * 8)]
+ ldp x16, x17, [x0, #(TF_X + 16 * 8)]
+ ldp x14, x15, [x0, #(TF_X + 14 * 8)]
+ ldp x12, x13, [x0, #(TF_X + 12 * 8)]
+ ldp x10, x11, [x0, #(TF_X + 10 * 8)]
+ ldp x8, x9, [x0, #(TF_X + 8 * 8)]
+ ldp x6, x7, [x0, #(TF_X + 6 * 8)]
+ ldp x4, x5, [x0, #(TF_X + 4 * 8)]
+ ldp x2, x3, [x0, #(TF_X + 2 * 8)]
+
+ ldp x0, x1, [sp], #(2 * 8)
+.endm
+
+.macro vempty
+ .align 7
+ 1: b 1b
+.endm
+
+.macro vector name
+ .align 7
+ b handle_\name
+.endm
+
+ .section ".vmm_vectors","ax"
+ .align 11
+hyp_init_vectors:
+ vempty /* Synchronous EL2t */
+ vempty /* IRQ EL2t */
+ vempty /* FIQ EL2t */
+ vempty /* Error EL2t */
+
+ vempty /* Synchronous EL2h */
+ vempty /* IRQ EL2h */
+ vempty /* FIQ EL2h */
+ vempty /* Error EL2h */
+
+ vector hyp_init /* Synchronous 64-bit EL1 */
+ vempty /* IRQ 64-bit EL1 */
+ vempty /* FIQ 64-bit EL1 */
+ vempty /* Error 64-bit EL1 */
+
+ vempty /* Synchronous 32-bit EL1 */
+ vempty /* IRQ 32-bit EL1 */
+ vempty /* FIQ 32-bit EL1 */
+ vempty /* Error 32-bit EL1 */
+
+ .text
+ .align 11
+hyp_vectors:
+ vempty /* Synchronous EL2t */
+ vempty /* IRQ EL2t */
+ vempty /* FIQ EL2t */
+ vempty /* Error EL2t */
+
+ vector el2_el2h_sync /* Synchronous EL2h */
+ vector el2_el2h_irq /* IRQ EL2h */
+ vector el2_el2h_fiq /* FIQ EL2h */
+ vector el2_el2h_error /* Error EL2h */
+
+ vector el2_el1_sync64 /* Synchronous 64-bit EL1 */
+ vector el2_el1_irq64 /* IRQ 64-bit EL1 */
+ vector el2_el1_fiq64 /* FIQ 64-bit EL1 */
+ vector el2_el1_error64 /* Error 64-bit EL1 */
+
+ vempty /* Synchronous 32-bit EL1 */
+ vempty /* IRQ 32-bit EL1 */
+ vempty /* FIQ 32-bit EL1 */
+ vempty /* Error 32-bit EL1 */
+
+/*
+ * Initialize the hypervisor mode with a new exception vector table, translation
+ * table and stack.
+ *
+ * Expecting:
+ * x0 - translation tables physical address
+ * x1 - stack top virtual address
+ * x2 - TCR_EL2 value
+ * x3 - SCTLR_EL2 value
+ * x4 - VTCR_EL2 value
+ */
+LENTRY(handle_hyp_init)
+ /* Install the new exception vectors */
+ adrp x6, hyp_vectors
+ add x6, x6, :lo12:hyp_vectors
+ msr vbar_el2, x6
+ /* Set the stack top address */
+ mov sp, x1
+ /* Use the host VTTBR_EL2 to tell the host and the guests apart */
+ mov x9, #VTTBR_HOST
+ msr vttbr_el2, x9
+ /* Load the base address for the translation tables */
+ msr ttbr0_el2, x0
+ /* Invalidate the TLB */
+ tlbi alle2
+ /* Use the same memory attributes as EL1 */
+ mrs x9, mair_el1
+ msr mair_el2, x9
+ /* Configure address translation */
+ msr tcr_el2, x2
+ isb
+ /* Set the system control register for EL2 */
+ msr sctlr_el2, x3
+ /* Set the Stage 2 translation control register */
+ msr vtcr_el2, x4
+ /* Return success */
+ mov x0, #0
+ /* MMU is up and running */
+ ERET
+LEND(handle_hyp_init)
+
+.macro do_world_switch_to_host
+ save_guest_registers
+ restore_host_registers
+
+ /* Restore host VTTBR */
+ mov x9, #VTTBR_HOST
+ msr vttbr_el2, x9
+.endm
+
+
+.macro handle_el2_excp type
+ /* Save registers before modifying so we can restore them */
+ str x9, [sp, #-16]!
+
+ /* Test if the exception happened when the host was running */
+ mrs x9, vttbr_el2
+ cmp x9, #VTTBR_HOST
+ beq 1f
+
+ /* We got the exception while the guest was running */
+ ldr x9, [sp], #16
+ do_world_switch_to_host
+ mov x0, \type
+ ret
+
+1:
+ /* We got the exception while the host was running */
+ ldr x9, [sp], #16
+ mov x0, \type
+ eret
+.endm
+
+
+LENTRY(handle_el2_el2h_sync)
+ handle_el2_excp #EXCP_TYPE_EL2_SYNC
+LEND(handle_el2_el2h_sync)
+
+LENTRY(handle_el2_el2h_irq)
+ handle_el2_excp #EXCP_TYPE_EL2_IRQ
+LEND(handle_el2_el2h_irq)
+
+LENTRY(handle_el2_el2h_fiq)
+ handle_el2_excp #EXCP_TYPE_EL2_FIQ
+LEND(handle_el2_el2h_fiq)
+
+LENTRY(handle_el2_el2h_error)
+ handle_el2_excp #EXCP_TYPE_EL2_ERROR
+LEND(handle_el2_el2h_error)
+
+
+LENTRY(handle_el2_el1_sync64)
+ /* Save registers before modifying so we can restore them */
+ str x9, [sp, #-16]!
+
+ /* Check for host hypervisor call */
+ mrs x9, vttbr_el2
+ cmp x9, #VTTBR_HOST
+ ldr x9, [sp], #16 /* Restore the temp register */
+ bne 1f
+
+ /*
+ * Called from the host
+ */
+
+ /* Check if this is a cleanup call and handle in a controlled state */
+ cmp x0, #(HYP_CLEANUP)
+ b.eq vmm_cleanup
+
+ str lr, [sp, #-16]!
+ bl vmm_hyp_enter
+ ldr lr, [sp], #16
+ ERET
+
+1: /* Guest exception taken to EL2 */
+ do_world_switch_to_host
+ mov x0, #EXCP_TYPE_EL1_SYNC
+ ret
+LEND(handle_el2_el1_sync64)
+
+/*
+ * We only trap IRQ, FIQ and SError exceptions when a guest is running. Do a
+ * world switch to host to handle these exceptions.
+ */
+
+LENTRY(handle_el2_el1_irq64)
+ do_world_switch_to_host
+ str x9, [sp, #-16]!
+ mrs x9, ich_misr_el2
+ cmp x9, xzr
+ beq 1f
+ mov x0, #EXCP_TYPE_MAINT_IRQ
+ b 2f
+1:
+ mov x0, #EXCP_TYPE_EL1_IRQ
+2:
+ ldr x9, [sp], #16
+ ret
+LEND(handle_el2_el1_irq)
+
+LENTRY(handle_el2_el1_fiq64)
+ do_world_switch_to_host
+ mov x0, #EXCP_TYPE_EL1_FIQ
+ ret
+LEND(handle_el2_el1_fiq64)
+
+LENTRY(handle_el2_el1_error64)
+ do_world_switch_to_host
+ mov x0, #EXCP_TYPE_EL1_ERROR
+ ret
+LEND(handle_el2_el1_error64)
+
+
+/*
+ * Usage:
+ * uint64_t vmm_enter_guest(struct hypctx *hypctx)
+ *
+ * Expecting:
+ * x0 - hypctx address
+ */
+ENTRY(vmm_enter_guest)
+ /* Save hypctx address */
+ msr tpidr_el2, x0
+
+ save_host_registers
+ restore_guest_registers
+
+ /* Enter guest */
+ eret
+END(vmm_enter_guest)
+
+/*
+ * Usage:
+ * void vmm_cleanup(uint64_t handle, void *hyp_stub_vectors)
+ *
+ * Expecting:
+ * x1 - physical address of hyp_stub_vectors
+ */
+LENTRY(vmm_cleanup)
+ /* Restore the stub vectors */
+ msr vbar_el2, x1
+
+ /* Disable the MMU */
+ dsb sy
+ mrs x2, sctlr_el2
+ bic x2, x2, #SCTLR_EL2_M
+ msr sctlr_el2, x2
+ isb
+
+ ERET
+LEND(vmm_cleanup)
diff --git a/sys/arm64/vmm/vmm_instruction_emul.c b/sys/arm64/vmm/vmm_instruction_emul.c
new file mode 100644
--- /dev/null
+++ b/sys/arm64/vmm/vmm_instruction_emul.c
@@ -0,0 +1,98 @@
+/*
+ * Copyright (C) 2015 Mihai Carabas <mihai.carabas@gmail.com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifdef _KERNEL
+#include <sys/param.h>
+#include <sys/pcpu.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+
+#include <vm/vm.h>
+
+#include <machine/machdep.h>
+#include <machine/vmm.h>
+
+#else
+#include <sys/types.h>
+#include <sys/errno.h>
+#include <sys/_iovec.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include <machine/vmm.h>
+
+#include <assert.h>
+#include <vmmapi.h>
+#endif
+
+#include <machine/vmm_instruction_emul.h>
+
+int
+vmm_emulate_instruction(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
+ struct vm_guest_paging *paging, mem_region_read_t memread,
+ mem_region_write_t memwrite, void *memarg)
+{
+ uint64_t val;
+ int error;
+
+ if (vie->dir == VM_DIR_READ) {
+ error = memread(vm, vcpuid, gpa, &val, vie->access_size, memarg);
+ if (error)
+ goto out;
+ error = vm_set_register(vm, vcpuid, vie->reg, val);
+ } else {
+ error = vm_get_register(vm, vcpuid, vie->reg, &val);
+ if (error)
+ goto out;
+ error = memwrite(vm, vcpuid, gpa, val, vie->access_size, memarg);
+ }
+
+out:
+ return (error);
+}
+
+int
+vmm_emulate_register(void *vm, int vcpuid, struct vre *vre, reg_read_t regread,
+ reg_write_t regwrite, void *regarg)
+{
+ uint64_t val;
+ int error;
+
+ if (vre->dir == VM_DIR_READ) {
+ error = regread(vm, vcpuid, &val, regarg);
+ if (error)
+ goto out;
+ error = vm_set_register(vm, vcpuid, vre->reg, val);
+ } else {
+ error = vm_get_register(vm, vcpuid, vre->reg, &val);
+ if (error)
+ goto out;
+ error = regwrite(vm, vcpuid, val, regarg);
+ }
+
+out:
+ return (error);
+}
diff --git a/sys/arm64/vmm/vmm_ktr.h b/sys/arm64/vmm/vmm_ktr.h
new file mode 100644
--- /dev/null
+++ b/sys/arm64/vmm/vmm_ktr.h
@@ -0,0 +1,71 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2011 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _VMM_KTR_H_
+#define _VMM_KTR_H_
+
+#include <sys/ktr.h>
+#include <sys/pcpu.h>
+
+#ifndef KTR_VMM
+#define KTR_VMM KTR_GEN
+#endif
+
+#define VCPU_CTR0(vm, vcpuid, format) \
+CTR2(KTR_VMM, "vm %s[%d]: " format, vm_name((vm)), (vcpuid))
+
+#define VCPU_CTR1(vm, vcpuid, format, p1) \
+CTR3(KTR_VMM, "vm %s[%d]: " format, vm_name((vm)), (vcpuid), (p1))
+
+#define VCPU_CTR2(vm, vcpuid, format, p1, p2) \
+CTR4(KTR_VMM, "vm %s[%d]: " format, vm_name((vm)), (vcpuid), (p1), (p2))
+
+#define VCPU_CTR3(vm, vcpuid, format, p1, p2, p3) \
+CTR5(KTR_VMM, "vm %s[%d]: " format, vm_name((vm)), (vcpuid), (p1), (p2), (p3))
+
+#define VCPU_CTR4(vm, vcpuid, format, p1, p2, p3, p4) \
+CTR6(KTR_VMM, "vm %s[%d]: " format, vm_name((vm)), (vcpuid), \
+ (p1), (p2), (p3), (p4))
+
+#define VM_CTR0(vm, format) \
+CTR1(KTR_VMM, "vm %s: " format, vm_name((vm)))
+
+#define VM_CTR1(vm, format, p1) \
+CTR2(KTR_VMM, "vm %s: " format, vm_name((vm)), (p1))
+
+#define VM_CTR2(vm, format, p1, p2) \
+CTR3(KTR_VMM, "vm %s: " format, vm_name((vm)), (p1), (p2))
+
+#define VM_CTR3(vm, format, p1, p2, p3) \
+CTR4(KTR_VMM, "vm %s: " format, vm_name((vm)), (p1), (p2), (p3))
+
+#define VM_CTR4(vm, format, p1, p2, p3, p4) \
+CTR5(KTR_VMM, "vm %s: " format, vm_name((vm)), (p1), (p2), (p3), (p4))
+#endif
diff --git a/sys/arm64/vmm/vmm_mem.h b/sys/arm64/vmm/vmm_mem.h
new file mode 100644
--- /dev/null
+++ b/sys/arm64/vmm/vmm_mem.h
@@ -0,0 +1,43 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2011 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _VMM_MEM_H_
+#define _VMM_MEM_H_
+
+struct vmspace;
+struct vm_object;
+
+int vmm_mem_init(void);
+struct vm_object *vmm_mmio_alloc(struct vmspace *, vm_paddr_t gpa, size_t len,
+ vm_paddr_t hpa);
+void vmm_mmio_free(struct vmspace *, vm_paddr_t gpa, size_t size);
+vm_paddr_t vmm_mem_maxaddr(void);
+
+#endif
diff --git a/sys/arm64/vmm/vmm_mem.c b/sys/arm64/vmm/vmm_mem.c
new file mode 100644
--- /dev/null
+++ b/sys/arm64/vmm/vmm_mem.c
@@ -0,0 +1,124 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2011 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/malloc.h>
+#include <sys/sglist.h>
+#include <sys/lock.h>
+#include <sys/rwlock.h>
+
+#include <vm/vm.h>
+#include <vm/vm_param.h>
+#include <vm/pmap.h>
+#include <vm/vm_map.h>
+#include <vm/vm_object.h>
+#include <vm/vm_page.h>
+#include <vm/vm_pager.h>
+
+#include <machine/md_var.h>
+
+#include "vmm_mem.h"
+
+int
+vmm_mem_init(void)
+{
+
+ return (0);
+}
+
+vm_object_t
+vmm_mmio_alloc(struct vmspace *vmspace, vm_paddr_t gpa, size_t len,
+ vm_paddr_t hpa)
+{
+ int error;
+ vm_object_t obj;
+ struct sglist *sg;
+
+ sg = sglist_alloc(1, M_WAITOK);
+ error = sglist_append_phys(sg, hpa, len);
+ KASSERT(error == 0, ("error %d appending physaddr to sglist", error));
+
+ obj = vm_pager_allocate(OBJT_SG, sg, len, VM_PROT_RW, 0, NULL);
+ if (obj != NULL) {
+ /*
+ * VT-x ignores the MTRR settings when figuring out the
+ * memory type for translations obtained through EPT.
+ *
+ * Therefore we explicitly force the pages provided by
+ * this object to be mapped as uncacheable.
+ */
+ VM_OBJECT_WLOCK(obj);
+ error = vm_object_set_memattr(obj, VM_MEMATTR_UNCACHEABLE);
+ VM_OBJECT_WUNLOCK(obj);
+ if (error != KERN_SUCCESS) {
+ panic("vmm_mmio_alloc: vm_object_set_memattr error %d",
+ error);
+ }
+ error = vm_map_find(&vmspace->vm_map, obj, 0, &gpa, len, 0,
+ VMFS_NO_SPACE, VM_PROT_RW, VM_PROT_RW, 0);
+ if (error != KERN_SUCCESS) {
+ vm_object_deallocate(obj);
+ obj = NULL;
+ }
+ }
+
+ /*
+ * Drop the reference on the sglist.
+ *
+ * If the scatter/gather object was successfully allocated then it
+ * has incremented the reference count on the sglist. Dropping the
+ * initial reference count ensures that the sglist will be freed
+ * when the object is deallocated.
+ *
+ * If the object could not be allocated then we end up freeing the
+ * sglist.
+ */
+ sglist_free(sg);
+
+ return (obj);
+}
+
+void
+vmm_mmio_free(struct vmspace *vmspace, vm_paddr_t gpa, size_t len)
+{
+
+ vm_map_remove(&vmspace->vm_map, gpa, gpa + len);
+}
+
+vm_paddr_t
+vmm_mem_maxaddr(void)
+{
+
+ return (ptoa(Maxmem));
+}
diff --git a/sys/arm64/vmm/vmm_mmu.c b/sys/arm64/vmm/vmm_mmu.c
new file mode 100644
--- /dev/null
+++ b/sys/arm64/vmm/vmm_mmu.c
@@ -0,0 +1,432 @@
+/*
+ * Copyright (C) 2017 Alexandru Elisei <alexandru.elisei@gmail.com>
+ * All rights reserved.
+ *
+ * This software was developed by Alexandru Elisei under sponsorship
+ * from the FreeBSD Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+
+#include <sys/types.h>
+#include <sys/malloc.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
+#include <vm/vm.h>
+#include <vm/pmap.h>
+#include <vm/vm_page.h>
+#include <vm/vm_param.h>
+#include <vm/vm_phys.h>
+
+#include <machine/atomic.h>
+#include <machine/machdep.h>
+#include <machine/vm.h>
+#include <machine/vmm.h>
+#include <machine/vmparam.h>
+
+#include "mmu.h"
+#include "arm64.h"
+
+MALLOC_DECLARE(M_HYP);
+
+static struct mtx vmmpmap_mtx;
+static pt_entry_t *l0;
+static vm_paddr_t l0_paddr;
+
+bool
+vmmpmap_init(void)
+{
+ vm_page_t m;
+
+ m = vm_page_alloc_noobj(VM_ALLOC_WIRED | VM_ALLOC_ZERO);
+ if (m == NULL)
+ return (false);
+
+ l0_paddr = VM_PAGE_TO_PHYS(m);
+ l0 = (pd_entry_t *)PHYS_TO_DMAP(l0_paddr);
+ memset(l0, 0, PAGE_SIZE);
+
+ mtx_init(&vmmpmap_mtx, "vmm pmap", NULL, MTX_DEF);
+
+ return (true);
+}
+
+static void
+vmmpmap_release_l3(pd_entry_t l2e)
+{
+ pt_entry_t *l3 __diagused;
+ vm_page_t m;
+ int i;
+
+ l3 = (pd_entry_t *)PHYS_TO_DMAP(l2e & ~ATTR_MASK);
+ for (i = 0; i < Ln_ENTRIES; i++) {
+ KASSERT(l3[i] == 0, ("%s: l3 still mapped: %p %lx", __func__,
+ &l3[i], l3[i]));
+ }
+
+ m = PHYS_TO_VM_PAGE(l2e & ~ATTR_MASK);
+ vm_page_unwire_noq(m);
+ vm_page_free(m);
+}
+
+static void
+vmmpmap_release_l2(pd_entry_t l1e)
+{
+ pt_entry_t *l2;
+ vm_page_t m;
+ int i;
+
+ l2 = (pd_entry_t *)PHYS_TO_DMAP(l1e & ~ATTR_MASK);
+ for (i = 0; i < Ln_ENTRIES; i++) {
+ if (l2[i] != 0) {
+ vmmpmap_release_l3(l2[i]);
+ }
+ }
+
+ m = PHYS_TO_VM_PAGE(l1e & ~ATTR_MASK);
+ vm_page_unwire_noq(m);
+ vm_page_free(m);
+}
+
+static void
+vmmpmap_release_l1(pd_entry_t l0e)
+{
+ pt_entry_t *l1;
+ vm_page_t m;
+ int i;
+
+ l1 = (pd_entry_t *)PHYS_TO_DMAP(l0e & ~ATTR_MASK);
+ for (i = 0; i < Ln_ENTRIES; i++) {
+ if (l1[i] != 0) {
+ vmmpmap_release_l2(l1[i]);
+ }
+ }
+
+ m = PHYS_TO_VM_PAGE(l0e & ~ATTR_MASK);
+ vm_page_unwire_noq(m);
+ vm_page_free(m);
+}
+
+void
+vmmpmap_fini(void)
+{
+ vm_page_t m;
+ int i;
+
+ /* Remove the remaining entries */
+ for (i = 0; i < L0_ENTRIES; i++) {
+ if (l0[i] != 0) {
+ vmmpmap_release_l1(l0[i]);
+ }
+ }
+
+ m = PHYS_TO_VM_PAGE(l0_paddr);
+ vm_page_unwire_noq(m);
+ vm_page_free(m);
+
+ mtx_destroy(&vmmpmap_mtx);
+}
+
+uint64_t
+vmmpmap_to_ttbr0(void)
+{
+
+ return (l0_paddr);
+}
+
+/* Returns a pointer to the level 1 table, allocating if needed. */
+static pt_entry_t *
+vmmpmap_l1_table(vm_offset_t va)
+{
+ pt_entry_t new_l0e, l0e, *l1;
+ vm_page_t m;
+ int rv;
+
+ m = NULL;
+again:
+ l0e = atomic_load_64(&l0[pmap_l0_index(va)]);
+ if ((l0e & ATTR_DESCR_VALID) == 0) {
+ /* Allocate a page for the level 1 table */
+ if (m == NULL) {
+ m = vm_page_alloc_noobj(VM_ALLOC_WIRED | VM_ALLOC_ZERO);
+ if (m == NULL)
+ return (NULL);
+ }
+
+ new_l0e = VM_PAGE_TO_PHYS(m) | L0_TABLE;
+
+ mtx_lock(&vmmpmap_mtx);
+ rv = atomic_cmpset_64(&l0[pmap_l0_index(va)], l0e, new_l0e);
+ mtx_unlock(&vmmpmap_mtx);
+ /* We may have raced another thread, try again */
+ if (rv == 0)
+ goto again;
+
+ /* The cmpset succeeded */
+ l0e = new_l0e;
+ } else if (m != NULL) {
+ /* We allocated a page that wasn't used */
+ vm_page_unwire_noq(m);
+ vm_page_free_zero(m);
+ }
+
+ l1 = (pd_entry_t *)PHYS_TO_DMAP(l0e & ~ATTR_MASK);
+ return (l1);
+}
+
+static pt_entry_t *
+vmmpmap_l2_table(vm_offset_t va)
+{
+ pt_entry_t new_l1e, l1e, *l1, *l2;
+ vm_page_t m;
+ int rv;
+
+ l1 = vmmpmap_l1_table(va);
+ if (l1 == NULL)
+ return (NULL);
+
+ m = NULL;
+again:
+ l1e = atomic_load_64(&l1[pmap_l1_index(va)]);
+ if ((l1e & ATTR_DESCR_VALID) == 0) {
+ /* Allocate a page for the level 2 table */
+ if (m == NULL) {
+ m = vm_page_alloc_noobj(VM_ALLOC_WIRED | VM_ALLOC_ZERO);
+ if (m == NULL)
+ return (NULL);
+ }
+
+ new_l1e = VM_PAGE_TO_PHYS(m) | L1_TABLE;
+
+ mtx_lock(&vmmpmap_mtx);
+ rv = atomic_cmpset_64(&l1[pmap_l1_index(va)], l1e, new_l1e);
+ mtx_unlock(&vmmpmap_mtx);
+ /* We may have raced another thread, try again */
+ if (rv == 0)
+ goto again;
+
+ /* The cmpset succeeded */
+ l1e = new_l1e;
+ } else if (m != NULL) {
+ /* We allocated a page that wasn't used */
+ vm_page_unwire_noq(m);
+ vm_page_free_zero(m);
+ }
+
+ l2 = (pd_entry_t *)PHYS_TO_DMAP(l1e & ~ATTR_MASK);
+ return (l2);
+}
+
+static pd_entry_t *
+vmmpmap_l3_table(vm_offset_t va)
+{
+ pt_entry_t new_l2e, l2e, *l2, *l3;
+ vm_page_t m;
+ int rv;
+
+ l2 = vmmpmap_l2_table(va);
+ if (l2 == NULL)
+ return (NULL);
+
+ m = NULL;
+again:
+ l2e = atomic_load_64(&l2[pmap_l2_index(va)]);
+ if ((l2e & ATTR_DESCR_VALID) == 0) {
+ /* Allocate a page for the level 3 table */
+ if (m == NULL) {
+ m = vm_page_alloc_noobj(VM_ALLOC_WIRED | VM_ALLOC_ZERO);
+ if (m == NULL)
+ return (NULL);
+ }
+
+ new_l2e = VM_PAGE_TO_PHYS(m) | L2_TABLE;
+
+ mtx_lock(&vmmpmap_mtx);
+ rv = atomic_cmpset_64(&l2[pmap_l2_index(va)], l2e, new_l2e);
+ mtx_unlock(&vmmpmap_mtx);
+ /* We may have raced another thread, try again */
+ if (rv == 0)
+ goto again;
+
+ /* The cmpset succeeded */
+ l2e = new_l2e;
+ } else if (m != NULL) {
+ /* We allocated a page that wasn't used */
+ vm_page_unwire_noq(m);
+ vm_page_free_zero(m);
+ }
+
+ l3 = (pt_entry_t *)PHYS_TO_DMAP(l2e & ~ATTR_MASK);
+ return (l3);
+}
+
+/*
+ * Creates an EL2 entry in the hyp_pmap. Similar to pmap_kenter.
+ */
+bool
+vmmpmap_enter(vm_offset_t va, vm_size_t size, vm_paddr_t pa, vm_prot_t prot)
+{
+ pd_entry_t l3e, *l3;
+
+ KASSERT((pa & L3_OFFSET) == 0,
+ ("%s: Invalid physical address", __func__));
+ KASSERT((va & L3_OFFSET) == 0,
+ ("%s: Invalid virtual address", __func__));
+ KASSERT((size & PAGE_MASK) == 0,
+ ("%s: Mapping is not page-sized", __func__));
+
+ l3e = ATTR_DEFAULT | L3_PAGE;
+ /* This bit is res1 at EL2 */
+ l3e |= ATTR_S1_AP(ATTR_S1_AP_USER);
+ /* Only normal memory is used at EL2 */
+ l3e |= ATTR_S1_IDX(VM_MEMATTR_DEFAULT);
+
+ if ((prot & VM_PROT_EXECUTE) == 0) {
+ /* PXN is res0 at EL2. UXN is XN */
+ l3e |= ATTR_S1_UXN;
+ }
+ if ((prot & VM_PROT_WRITE) == 0) {
+ l3e |= ATTR_S1_AP(ATTR_S1_AP_RO);
+ }
+
+ while (size > 0) {
+ l3 = vmmpmap_l3_table(va);
+ if (l3 == NULL)
+ return (false);
+
+#ifdef INVARIANTS
+ /*
+ * Ensure no other threads can write to l3 between the KASSERT
+ * and store.
+ */
+ mtx_lock(&vmmpmap_mtx);
+#endif
+ KASSERT(atomic_load_64(&l3[pmap_l3_index(va)]) == 0,
+ ("%s: VA already mapped", __func__));
+
+ atomic_store_64(&l3[pmap_l3_index(va)], l3e | pa);
+#ifdef INVARIANTS
+ mtx_unlock(&vmmpmap_mtx);
+#endif
+
+ size -= PAGE_SIZE;
+ pa += PAGE_SIZE;
+ va += PAGE_SIZE;
+ }
+
+ return (true);
+}
+
+void
+vmmpmap_remove(vm_offset_t va, vm_size_t size, bool invalidate)
+{
+ pt_entry_t l0e, *l1, l1e, *l2, l2e;
+ pd_entry_t *l3, l3e, **l3_list;
+ vm_offset_t eva, va_next, sva;
+ size_t i;
+
+ KASSERT((va & L3_OFFSET) == 0,
+ ("%s: Invalid virtual address", __func__));
+ KASSERT((size & PAGE_MASK) == 0,
+ ("%s: Mapping is not page-sized", __func__));
+
+ if (invalidate) {
+ l3_list = malloc((size / PAGE_SIZE) * sizeof(l3_list[0]),
+ M_TEMP, M_WAITOK | M_ZERO);
+ }
+
+ sva = va;
+ eva = va + size;
+ mtx_lock(&vmmpmap_mtx);
+ for (i = 0; va < eva; va = va_next) {
+ l0e = atomic_load_64(&l0[pmap_l0_index(va)]);
+ if (l0e == 0) {
+ va_next = (va + L0_SIZE) & ~L0_OFFSET;
+ if (va_next < va)
+ va_next = eva;
+ continue;
+ }
+ MPASS((l0e & ATTR_DESCR_MASK) == L0_TABLE);
+
+ l1 = (pd_entry_t *)PHYS_TO_DMAP(l0e & ~ATTR_MASK);
+ l1e = atomic_load_64(&l1[pmap_l1_index(va)]);
+ if (l1e == 0) {
+ va_next = (va + L1_SIZE) & ~L1_OFFSET;
+ if (va_next < va)
+ va_next = eva;
+ continue;
+ }
+ MPASS((l1e & ATTR_DESCR_MASK) == L1_TABLE);
+
+ l2 = (pd_entry_t *)PHYS_TO_DMAP(l1e & ~ATTR_MASK);
+ l2e = atomic_load_64(&l2[pmap_l2_index(va)]);
+ if (l2e == 0) {
+ va_next = (va + L2_SIZE) & ~L2_OFFSET;
+ if (va_next < va)
+ va_next = eva;
+ continue;
+ }
+ MPASS((l2e & ATTR_DESCR_MASK) == L2_TABLE);
+
+ l3 = (pd_entry_t *)PHYS_TO_DMAP(l2e & ~ATTR_MASK);
+ if (invalidate) {
+ l3e = atomic_load_64(&l3[pmap_l3_index(va)]);
+ MPASS(l3e != 0);
+ /*
+ * Mark memory as read-only so we can invalidate
+ * the cache.
+ */
+ l3e &= ~ATTR_S1_AP_MASK;
+ l3e |= ATTR_S1_AP(ATTR_S1_AP_RO);
+ atomic_store_64(&l3[pmap_l3_index(va)], l3e);
+
+ l3_list[i] = l3;
+ i++;
+ } else {
+ /*
+ * The caller is responsible for clearing the cache &
+ * handling the TLB
+ */
+ atomic_store_64(&l3[pmap_l3_index(va)], 0);
+ }
+
+ va_next = (va + L3_SIZE) & ~L3_OFFSET;
+ if (va_next < va)
+ va_next = eva;
+ }
+ mtx_unlock(&vmmpmap_mtx);
+
+ if (invalidate) {
+ /* Invalidate the memory from the D-cache */
+ vmm_call_hyp(HYP_DC_CIVAC, sva, size);
+
+ for (i = 0; i < (size / PAGE_SIZE); i++) {
+ atomic_store_64(l3_list[i], 0);
+ }
+
+ vmm_call_hyp(HYP_EL2_TLBI, HYP_EL2_TLBI_VA, sva, size);
+
+ free(l3_list, M_TEMP);
+ }
+}
diff --git a/sys/arm64/vmm/vmm_psci.c b/sys/arm64/vmm/vmm_psci.c
new file mode 100644
--- /dev/null
+++ b/sys/arm64/vmm/vmm_psci.c
@@ -0,0 +1,106 @@
+/*
+ * Copyright (C) 2018 Alexandru Elisei <alexandru.elisei@gmail.com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+
+#include <dev/psci/psci.h>
+
+#include "arm64.h"
+#include "psci.h"
+
+#define PSCI_VERSION_0_2 0x2
+
+static int
+psci_version(struct hypctx *hypctx, bool *retu)
+{
+
+ hypctx->tf.tf_x[0] = PSCI_VERSION_0_2;
+
+ *retu = false;
+ return (0);
+}
+
+static int
+psci_system_off(struct vm *vm)
+{
+ return (vm_suspend(vm, VM_SUSPEND_POWEROFF));
+}
+
+static int
+psci_system_reset(struct vm *vm)
+{
+ return (vm_suspend(vm, VM_SUSPEND_RESET));
+}
+
+int
+psci_handle_call(struct vm *vm, int vcpuid, struct vm_exit *vme, bool *retu)
+{
+ struct hyp *hyp;
+ struct hypctx *hypctx;
+ uint64_t func_id;
+ uint32_t esr_el2, esr_iss;
+ int error, i;
+
+ hyp = vm_get_cookie(vm);
+ hypctx = &hyp->ctx[vcpuid];
+
+ esr_el2 = hypctx->tf.tf_esr;
+ esr_iss = esr_el2 & ESR_ELx_ISS_MASK;
+
+ if (esr_iss != 0) {
+ eprintf("Malformed HVC instruction with immediate: 0x%x\n",
+ esr_iss);
+ error = 1;
+ goto out;
+ }
+
+ func_id = hypctx->tf.tf_x[0];
+ switch (func_id) {
+ case PSCI_FNID_VERSION:
+ error = psci_version(hypctx, retu);
+ break;
+ case PSCI_FNID_SYSTEM_OFF:
+ error = psci_system_off(vm);
+ break;
+ case PSCI_FNID_SYSTEM_RESET:
+ error = psci_system_reset(vm);
+ break;
+ default:
+ vme->exitcode = VM_EXITCODE_SMCCC;
+ vme->u.smccc_call.func_id = func_id;
+ for (i = 0; i < nitems(vme->u.smccc_call.args); i++)
+ vme->u.smccc_call.args[i] = hypctx->tf.tf_x[i + 1];
+ *retu = true;
+ error = 0;
+ break;
+ }
+
+out:
+ return (error);
+}
diff --git a/sys/arm64/vmm/vmm_reset.c b/sys/arm64/vmm/vmm_reset.c
new file mode 100644
--- /dev/null
+++ b/sys/arm64/vmm/vmm_reset.c
@@ -0,0 +1,167 @@
+/*
+ * Copyright (C) 2018 Alexandru Elisei <alexandru.elisei@gmail.com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/param.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+
+#include <machine/armreg.h>
+#include <machine/cpu.h>
+#include <machine/hypervisor.h>
+
+#include "arm64.h"
+#include "reset.h"
+
+/*
+ * Make the architecturally UNKNOWN value 0. As a bonus, we don't have to
+ * manually set all those RES0 fields.
+ */
+#define ARCH_UNKNOWN 0
+#define set_arch_unknown(reg) (memset(&(reg), ARCH_UNKNOWN, sizeof(reg)))
+
+void
+reset_vm_el01_regs(void *vcpu)
+{
+ struct hypctx *el2ctx;
+
+ el2ctx = vcpu;
+
+ set_arch_unknown(el2ctx->tf);
+
+ set_arch_unknown(el2ctx->actlr_el1);
+ set_arch_unknown(el2ctx->afsr0_el1);
+ set_arch_unknown(el2ctx->afsr1_el1);
+ set_arch_unknown(el2ctx->amair_el1);
+ set_arch_unknown(el2ctx->contextidr_el1);
+ set_arch_unknown(el2ctx->cpacr_el1);
+ set_arch_unknown(el2ctx->csselr_el1);
+ set_arch_unknown(el2ctx->elr_el1);
+ set_arch_unknown(el2ctx->esr_el1);
+ set_arch_unknown(el2ctx->far_el1);
+ set_arch_unknown(el2ctx->mair_el1);
+ set_arch_unknown(el2ctx->mdccint_el1);
+ set_arch_unknown(el2ctx->mdscr_el1);
+ set_arch_unknown(el2ctx->par_el1);
+
+ /*
+ * Guest starts with:
+ * ~SCTLR_M: MMU off
+ * ~SCTLR_C: data cache off
+ * SCTLR_CP15BEN: memory barrier instruction enable from EL0; RAO/WI
+ * ~SCTLR_I: instruction cache off
+ */
+ el2ctx->sctlr_el1 = SCTLR_RES1;
+ el2ctx->sctlr_el1 &= ~SCTLR_M & ~SCTLR_C & ~SCTLR_I;
+ el2ctx->sctlr_el1 |= SCTLR_CP15BEN;
+
+ set_arch_unknown(el2ctx->sp_el0);
+ set_arch_unknown(el2ctx->tcr_el1);
+ set_arch_unknown(el2ctx->tpidr_el0);
+ set_arch_unknown(el2ctx->tpidr_el1);
+ set_arch_unknown(el2ctx->tpidrro_el0);
+ set_arch_unknown(el2ctx->ttbr0_el1);
+ set_arch_unknown(el2ctx->ttbr1_el1);
+ set_arch_unknown(el2ctx->vbar_el1);
+ set_arch_unknown(el2ctx->spsr_el1);
+
+ set_arch_unknown(el2ctx->dbgbcr_el1);
+ set_arch_unknown(el2ctx->dbgbvr_el1);
+ set_arch_unknown(el2ctx->dbgwcr_el1);
+ set_arch_unknown(el2ctx->dbgwvr_el1);
+
+ el2ctx->pmcr_el0 = READ_SPECIALREG(pmcr_el0) & PMCR_N_MASK;
+ /* PMCR_LC is unknown when AArch32 is supported or RES1 otherwise */
+ el2ctx->pmcr_el0 |= PMCR_LC;
+ set_arch_unknown(el2ctx->pmccntr_el0);
+ set_arch_unknown(el2ctx->pmccfiltr_el0);
+ set_arch_unknown(el2ctx->pmcntenset_el0);
+ set_arch_unknown(el2ctx->pmintenset_el1);
+ set_arch_unknown(el2ctx->pmovsset_el0);
+ set_arch_unknown(el2ctx->pmuserenr_el0);
+ memset(el2ctx->pmevcntr_el0, 0, sizeof(el2ctx->pmevcntr_el0));
+ memset(el2ctx->pmevtyper_el0, 0, sizeof(el2ctx->pmevtyper_el0));
+}
+
+void
+reset_vm_el2_regs(void *vcpu)
+{
+ struct hypctx *el2ctx;
+ uint64_t cpu_aff;
+
+ el2ctx = vcpu;
+
+ /*
+ * Set the Hypervisor Configuration Register:
+ *
+ * HCR_RW: use AArch64 for EL1
+ * HCR_TID3: handle ID registers in the vmm to privide a common
+ * set of featers on all vcpus
+ * HCR_TWI: Trap WFI to the hypervisor
+ * HCR_BSU_IS: barrier instructions apply to the inner shareable
+ * domain
+ * HCR_FB: broadcast maintenance operations
+ * HCR_AMO: route physical SError interrupts to EL2
+ * HCR_IMO: route physical IRQ interrupts to EL2
+ * HCR_FMO: route physical FIQ interrupts to EL2
+ * HCR_SWIO: turn set/way invalidate into set/way clean and
+ * invalidate
+ * HCR_VM: use stage 2 translation
+ */
+ el2ctx->hcr_el2 = HCR_RW | HCR_TID3 | HCR_TWI | HCR_BSU_IS | HCR_FB |
+ HCR_AMO | HCR_IMO | HCR_FMO | HCR_SWIO | HCR_VM;
+
+ /* TODO: Trap all extensions we don't support */
+ el2ctx->mdcr_el2 = 0;
+ /* PMCR_EL0.N is read from MDCR_EL2.HPMN */
+ el2ctx->mdcr_el2 |= (el2ctx->pmcr_el0 & PMCR_N_MASK) >> PMCR_N_SHIFT;
+
+ el2ctx->vmpidr_el2 = VMPIDR_EL2_RES1;
+ /* The guest will detect a multi-core, single-threaded CPU */
+ el2ctx->vmpidr_el2 &= ~VMPIDR_EL2_U & ~VMPIDR_EL2_MT;
+ /* Only 24 bits of affinity, for a grand total of 16,777,216 cores. */
+ cpu_aff = el2ctx->vcpu & (CPU_AFF0_MASK | CPU_AFF1_MASK | CPU_AFF2_MASK);
+ el2ctx->vmpidr_el2 |= cpu_aff;
+
+ /* Use the same CPU identification information as the host */
+ el2ctx->vpidr_el2 = CPU_IMPL_TO_MIDR(CPU_IMPL_ARM);
+ el2ctx->vpidr_el2 |= CPU_VAR_TO_MIDR(0);
+ el2ctx->vpidr_el2 |= CPU_ARCH_TO_MIDR(0xf);
+ el2ctx->vpidr_el2 |= CPU_PART_TO_MIDR(CPU_PART_FOUNDATION);
+ el2ctx->vpidr_el2 |= CPU_REV_TO_MIDR(0);
+
+ /*
+ * Don't trap accesses to CPACR_EL1, trace, SVE, Advanced SIMD
+ * and floating point functionality to EL2.
+ */
+ el2ctx->cptr_el2 = CPTR_RES1;
+ /*
+ * Disable interrupts in the guest. The guest OS will re-enable
+ * them.
+ */
+ el2ctx->tf.tf_spsr = PSR_D | PSR_A | PSR_I | PSR_F;
+ /* Use the EL1 stack when taking exceptions to EL1 */
+ el2ctx->tf.tf_spsr |= PSR_M_EL1h;
+}
diff --git a/sys/arm64/vmm/vmm_stat.h b/sys/arm64/vmm/vmm_stat.h
new file mode 100644
--- /dev/null
+++ b/sys/arm64/vmm/vmm_stat.h
@@ -0,0 +1,153 @@
+/*
+ * Copyright (C) 2015 Mihai Carabas <mihai.carabas@gmail.com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef _VMM_STAT_H_
+#define _VMM_STAT_H_
+
+struct vm;
+
+#define MAX_VMM_STAT_ELEMS 64 /* arbitrary */
+
+enum vmm_stat_scope {
+ VMM_STAT_SCOPE_ANY,
+ VMM_STAT_SCOPE_INTEL, /* Intel VMX specific statistic */
+ VMM_STAT_SCOPE_AMD, /* AMD SVM specific statistic */
+};
+
+struct vmm_stat_type;
+typedef void (*vmm_stat_func_t)(struct vm *vm, int vcpu,
+ struct vmm_stat_type *stat);
+
+struct vmm_stat_type {
+ int index; /* position in the stats buffer */
+ int nelems; /* standalone or array */
+ const char *desc; /* description of statistic */
+ vmm_stat_func_t func;
+ enum vmm_stat_scope scope;
+};
+
+void vmm_stat_register(void *arg);
+
+#define VMM_STAT_FDEFINE(type, nelems, desc, func, scope) \
+ struct vmm_stat_type type[1] = { \
+ { -1, nelems, desc, func, scope } \
+ }; \
+ SYSINIT(type##_stat, SI_SUB_KLD, SI_ORDER_ANY, vmm_stat_register, type)
+
+#define VMM_STAT_DEFINE(type, nelems, desc, scope) \
+ VMM_STAT_FDEFINE(type, nelems, desc, NULL, scope)
+
+#define VMM_STAT_DECLARE(type) \
+ extern struct vmm_stat_type type[1]
+
+#define VMM_STAT(type, desc) \
+ VMM_STAT_DEFINE(type, 1, desc, VMM_STAT_SCOPE_ANY)
+#define VMM_STAT_INTEL(type, desc) \
+ VMM_STAT_DEFINE(type, 1, desc, VMM_STAT_SCOPE_INTEL)
+#define VMM_STAT_AMD(type, desc) \
+ VMM_STAT_DEFINE(type, 1, desc, VMM_STAT_SCOPE_AMD)
+
+#define VMM_STAT_FUNC(type, desc, func) \
+ VMM_STAT_FDEFINE(type, 1, desc, func, VMM_STAT_SCOPE_ANY)
+
+#define VMM_STAT_ARRAY(type, nelems, desc) \
+ VMM_STAT_DEFINE(type, nelems, desc, VMM_STAT_SCOPE_ANY)
+
+void *vmm_stat_alloc(void);
+void vmm_stat_init(void *vp);
+void vmm_stat_free(void *vp);
+
+int vmm_stat_copy(struct vm *vm, int vcpu, int index, int count,
+ int *num_stats, uint64_t *buf);
+int vmm_stat_desc_copy(int index, char *buf, int buflen);
+
+static void __inline
+vmm_stat_array_incr(struct vm *vm, int vcpu, struct vmm_stat_type *vst,
+ int statidx, uint64_t x)
+{
+#ifdef VMM_KEEP_STATS
+ uint64_t *stats;
+
+ stats = vcpu_stats(vm, vcpu);
+
+ if (vst->index >= 0 && statidx < vst->nelems)
+ stats[vst->index + statidx] += x;
+#endif
+}
+
+static void __inline
+vmm_stat_array_set(struct vm *vm, int vcpu, struct vmm_stat_type *vst,
+ int statidx, uint64_t val)
+{
+#ifdef VMM_KEEP_STATS
+ uint64_t *stats;
+
+ stats = vcpu_stats(vm, vcpu);
+
+ if (vst->index >= 0 && statidx < vst->nelems)
+ stats[vst->index + statidx] = val;
+#endif
+}
+
+static void __inline
+vmm_stat_incr(struct vm *vm, int vcpu, struct vmm_stat_type *vst, uint64_t x)
+{
+
+#ifdef VMM_KEEP_STATS
+ vmm_stat_array_incr(vm, vcpu, vst, 0, x);
+#endif
+}
+
+static void __inline
+vmm_stat_set(struct vm *vm, int vcpu, struct vmm_stat_type *vst, uint64_t val)
+{
+
+#ifdef VMM_KEEP_STATS
+ vmm_stat_array_set(vm, vcpu, vst, 0, val);
+#endif
+}
+
+VMM_STAT_DECLARE(VCPU_MIGRATIONS);
+VMM_STAT_DECLARE(VMEXIT_COUNT);
+VMM_STAT_DECLARE(VMEXIT_EXTINT);
+VMM_STAT_DECLARE(VMEXIT_HLT);
+VMM_STAT_DECLARE(VMEXIT_CR_ACCESS);
+VMM_STAT_DECLARE(VMEXIT_RDMSR);
+VMM_STAT_DECLARE(VMEXIT_WRMSR);
+VMM_STAT_DECLARE(VMEXIT_MTRAP);
+VMM_STAT_DECLARE(VMEXIT_PAUSE);
+VMM_STAT_DECLARE(VMEXIT_INTR_WINDOW);
+VMM_STAT_DECLARE(VMEXIT_NMI_WINDOW);
+VMM_STAT_DECLARE(VMEXIT_INOUT);
+VMM_STAT_DECLARE(VMEXIT_CPUID);
+VMM_STAT_DECLARE(VMEXIT_NESTED_FAULT);
+VMM_STAT_DECLARE(VMEXIT_INST_EMUL);
+VMM_STAT_DECLARE(VMEXIT_UNKNOWN);
+VMM_STAT_DECLARE(VMEXIT_ASTPENDING);
+VMM_STAT_DECLARE(VMEXIT_USERSPACE);
+VMM_STAT_DECLARE(VMEXIT_RENDEZVOUS);
+VMM_STAT_DECLARE(VMEXIT_EXCEPTION);
+#endif
diff --git a/sys/arm64/vmm/vmm_stat.c b/sys/arm64/vmm/vmm_stat.c
new file mode 100644
--- /dev/null
+++ b/sys/arm64/vmm/vmm_stat.c
@@ -0,0 +1,173 @@
+/*
+ * Copyright (C) 2015 Mihai Carabas <mihai.carabas@gmail.com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+
+#include <sys/param.h>
+#include <sys/kernel.h>
+#include <sys/systm.h>
+#include <sys/malloc.h>
+
+#include <machine/machdep.h>
+#include <machine/vmm.h>
+#include "vmm_stat.h"
+
+/*
+ * 'vst_num_elems' is the total number of addressable statistic elements
+ * 'vst_num_types' is the number of unique statistic types
+ *
+ * It is always true that 'vst_num_elems' is greater than or equal to
+ * 'vst_num_types'. This is because a stat type may represent more than
+ * one element (for e.g. VMM_STAT_ARRAY).
+ */
+static int vst_num_elems, vst_num_types;
+static struct vmm_stat_type *vsttab[MAX_VMM_STAT_ELEMS];
+
+static MALLOC_DEFINE(M_VMM_STAT, "vmm stat", "vmm stat");
+
+#define vst_size ((size_t)vst_num_elems * sizeof(uint64_t))
+
+void
+vmm_stat_register(void *arg)
+{
+ struct vmm_stat_type *vst = arg;
+
+ /* We require all stats to identify themselves with a description */
+ if (vst->desc == NULL)
+ return;
+
+ if (vst_num_elems + vst->nelems >= MAX_VMM_STAT_ELEMS) {
+ printf("Cannot accommodate vmm stat type \"%s\"!\n", vst->desc);
+ return;
+ }
+
+ vst->index = vst_num_elems;
+ vst_num_elems += vst->nelems;
+
+ vsttab[vst_num_types++] = vst;
+}
+
+int
+vmm_stat_copy(struct vm *vm, int vcpu, int index, int count, int *num_stats,
+ uint64_t *buf)
+{
+ struct vmm_stat_type *vst;
+ uint64_t *stats;
+ int i, tocopy;
+
+ if (vcpu < 0 || vcpu >= vm_get_maxcpus(vm))
+ return (EINVAL);
+
+ if (index < 0 || count < 0)
+ return (EINVAL);
+
+ if (index > vst_num_elems)
+ return (ENOENT);
+
+ if (index == vst_num_elems) {
+ *num_stats = 0;
+ return (0);
+ }
+
+ tocopy = min(vst_num_elems - index, count);
+
+ /* Let stats functions update their counters */
+ for (i = 0; i < vst_num_types; i++) {
+ vst = vsttab[i];
+ if (vst->func != NULL)
+ (*vst->func)(vm, vcpu, vst);
+ }
+
+ /* Copy over the stats */
+ stats = vcpu_stats(vm, vcpu);
+ memcpy(buf, stats + index, tocopy * sizeof(stats[0]));
+ *num_stats = tocopy;
+ return (0);
+}
+
+void *
+vmm_stat_alloc(void)
+{
+
+ return (malloc(vst_size, M_VMM_STAT, M_WAITOK));
+}
+
+void
+vmm_stat_init(void *vp)
+{
+
+ bzero(vp, vst_size);
+}
+
+void
+vmm_stat_free(void *vp)
+{
+ free(vp, M_VMM_STAT);
+}
+
+int
+vmm_stat_desc_copy(int index, char *buf, int bufsize)
+{
+ int i;
+ struct vmm_stat_type *vst;
+
+ for (i = 0; i < vst_num_types; i++) {
+ vst = vsttab[i];
+ if (index >= vst->index && index < vst->index + vst->nelems) {
+ if (vst->nelems > 1) {
+ snprintf(buf, bufsize, "%s[%d]",
+ vst->desc, index - vst->index);
+ } else {
+ strlcpy(buf, vst->desc, bufsize);
+ }
+ return (0); /* found it */
+ }
+ }
+
+ return (EINVAL);
+}
+
+/* global statistics */
+VMM_STAT(VCPU_MIGRATIONS, "vcpu migration across host cpus");
+VMM_STAT(VMEXIT_COUNT, "total number of vm exits");
+VMM_STAT(VMEXIT_EXTINT, "vm exits due to external interrupt");
+VMM_STAT(VMEXIT_HLT, "number of times hlt was intercepted");
+VMM_STAT(VMEXIT_CR_ACCESS, "number of times %cr access was intercepted");
+VMM_STAT(VMEXIT_RDMSR, "number of times rdmsr was intercepted");
+VMM_STAT(VMEXIT_WRMSR, "number of times wrmsr was intercepted");
+VMM_STAT(VMEXIT_MTRAP, "number of monitor trap exits");
+VMM_STAT(VMEXIT_PAUSE, "number of times pause was intercepted");
+VMM_STAT(VMEXIT_INTR_WINDOW, "vm exits due to interrupt window opening");
+VMM_STAT(VMEXIT_NMI_WINDOW, "vm exits due to nmi window opening");
+VMM_STAT(VMEXIT_INOUT, "number of times in/out was intercepted");
+VMM_STAT(VMEXIT_CPUID, "number of times cpuid was intercepted");
+VMM_STAT(VMEXIT_NESTED_FAULT, "vm exits due to nested page fault");
+VMM_STAT(VMEXIT_INST_EMUL, "vm exits for instruction emulation");
+VMM_STAT(VMEXIT_UNKNOWN, "number of vm exits for unknown reason");
+VMM_STAT(VMEXIT_ASTPENDING, "number of times astpending at exit");
+VMM_STAT(VMEXIT_USERSPACE, "number of vm exits handled in userspace");
+VMM_STAT(VMEXIT_RENDEZVOUS, "number of times rendezvous pending at exit");
+VMM_STAT(VMEXIT_EXCEPTION, "number of vm exits due to exceptions");
diff --git a/sys/conf/files.arm64 b/sys/conf/files.arm64
--- a/sys/conf/files.arm64
+++ b/sys/conf/files.arm64
@@ -113,6 +113,39 @@
dev/iommu/busdma_iommu.c optional iommu
dev/iommu/iommu_gas.c optional iommu
+arm64/vmm/vmm.c optional vmm
+arm64/vmm/vmm_dev.c optional vmm
+arm64/vmm/vmm_instruction_emul.c optional vmm
+arm64/vmm/vmm_mem.c optional vmm
+arm64/vmm/vmm_stat.c optional vmm
+arm64/vmm/vmm_arm64.c optional vmm
+arm64/vmm/vmm_psci.c optional vmm
+arm64/vmm/vmm_reset.c optional vmm
+arm64/vmm/vmm_call.S optional vmm
+arm64/vmm/vmm_hyp_exception.S optional vmm \
+ compile-with "${NORMAL_C} -fpie" \
+ no-obj
+arm64/vmm/vmm_hyp.c optional vmm \
+ compile-with "${NORMAL_C} -fpie" \
+ no-obj
+vmm_hyp_blob.elf.full optional vmm \
+ dependency "vmm_hyp.o vmm_hyp_exception.o" \
+ compile-with "${CC} -o ${.TARGET} ${.ALLSRC} -fPIE -nostdlib -T ${LDSCRIPT} -Wl,--defsym=text_start='0x0'" \
+ no-obj no-implicit-rule
+vmm_hyp_blob.elf optional vmm \
+ dependency "vmm_hyp_blob.elf.full" \
+ compile-with "${OBJCOPY} --strip-debug ${.ALLSRC} ${.TARGET}" \
+ no-obj no-implicit-rule
+vmm_hyp_blob.bin optional vmm \
+ dependency vmm_hyp_blob.elf \
+ compile-with "${OBJCOPY} --output-target=binary ${.ALLSRC} ${.TARGET}" \
+ no-obj no-implicit-rule
+arm64/vmm/vmm_hyp_el2.S optional vmm \
+ dependency vmm_hyp_blob.bin
+arm64/vmm/vmm_mmu.c optional vmm
+arm64/vmm/io/vgic_v3.c optional vmm
+arm64/vmm/io/vtimer.c optional vmm
+
crypto/armv8/armv8_crypto.c optional armv8crypto
armv8_crypto_wrap.o optional armv8crypto \
dependency "$S/crypto/armv8/armv8_crypto_wrap.c" \
diff --git a/sys/conf/ldscript.arm64 b/sys/conf/ldscript.arm64
--- a/sys/conf/ldscript.arm64
+++ b/sys/conf/ldscript.arm64
@@ -7,6 +7,7 @@
{
/* Read-only sections, merged into text segment: */
. = text_start; /* This is set using --defsym= on the command line. */
+ .vmm_vectors : { (*.vmm_vectors); }
.text :
{
*(.text)
@@ -17,6 +18,7 @@
} =0x9090
_etext = .;
PROVIDE (etext = .);
+
.fini : { *(.fini) } =0x9090
.rodata : { *(.rodata*) *(.gnu.linkonce.r*) }
.rodata1 : { *(.rodata1) }
diff --git a/sys/conf/options.arm64 b/sys/conf/options.arm64
--- a/sys/conf/options.arm64
+++ b/sys/conf/options.arm64
@@ -18,6 +18,9 @@
# EFI Runtime services support
EFIRT opt_efirt.h
+# Bhyve
+VMM opt_global.h
+
# SoC Support
SOC_ALLWINNER_A64 opt_soc.h
SOC_ALLWINNER_H5 opt_soc.h
diff --git a/sys/modules/Makefile b/sys/modules/Makefile
--- a/sys/modules/Makefile
+++ b/sys/modules/Makefile
@@ -798,7 +798,9 @@
_sgx_linux= sgx_linux
_smartpqi= smartpqi
_p2sb= p2sb
+.endif
+.if ${MACHINE_CPUARCH} == "aarch64" || ${MACHINE_CPUARCH} == "amd64"
.if ${MK_BHYVE} != "no" || defined(ALL_MODULES)
.if ${KERN_OPTS:MSMP}
_vmm= vmm
diff --git a/sys/modules/vmm/Makefile b/sys/modules/vmm/Makefile
--- a/sys/modules/vmm/Makefile
+++ b/sys/modules/vmm/Makefile
@@ -4,31 +4,68 @@
KMOD= vmm
-SRCS= opt_acpi.h opt_bhyve_snapshot.h opt_ddb.h
-SRCS+= device_if.h bus_if.h pci_if.h pcib_if.h acpi_if.h vnode_if.h
-DPSRCS+= vmx_assym.h svm_assym.h
-DPSRCS+= vmx_genassym.c svm_genassym.c offset.inc
+SRCS= opt_acpi.h opt_ddb.h device_if.h bus_if.h pci_if.h pcib_if.h acpi_if.h
CFLAGS+= -DVMM_KEEP_STATS
-CFLAGS+= -I${SRCTOP}/sys/amd64/vmm
-CFLAGS+= -I${SRCTOP}/sys/amd64/vmm/io
-CFLAGS+= -I${SRCTOP}/sys/amd64/vmm/intel
-CFLAGS+= -I${SRCTOP}/sys/amd64/vmm/amd
+CFLAGS+= -I${SRCTOP}/sys/${MACHINE}/vmm
+CFLAGS+= -I${SRCTOP}/sys/${MACHINE}/vmm/io
# generic vmm support
-.PATH: ${SRCTOP}/sys/amd64/vmm
+.PATH: ${SRCTOP}/sys/${MACHINE}/vmm
SRCS+= vmm.c \
vmm_dev.c \
- vmm_host.c \
vmm_instruction_emul.c \
+ vmm_mem.c \
+ vmm_stat.c
+
+.if ${MACHINE_CPUARCH} == "aarch64"
+# TODO: Add the new EL2 code
+SRCS+= vmm_arm64.c \
+ vmm_psci.c \
+ vmm_reset.c \
+ vmm_call.S \
+ vmm_mmu.c \
+ vmm_hyp_el2.S
+
+.PATH: ${SRCTOP}/sys/${MACHINE}/vmm/io
+SRCS+= vgic_v3.c \
+ vtimer.c
+
+CLEANFILES+= vmm_hyp_exception.o vmm_hyp.o vmm_hyp_blob.elf.full
+CLEANFILES+= vmm_hyp_blob.elf vmm_hyp_blob.bin
+
+CFLAGS.vmm_hyp_exception.S += -fpie
+CFLAGS.vmm_hyp.c += -fpie
+vmm_hyp_exception.o: vmm_hyp_exception.S
+vmm_hyp.o: vmm_hyp.c
+
+vmm_hyp_blob.elf.full: vmm_hyp_exception.o vmm_hyp.o
+ ${CC} -o ${.TARGET} ${.ALLSRC} -fPIE -nostdlib \
+ -T ${SYSDIR}/conf/ldscript.arm64 \
+ -Wl,--defsym=text_start='0x0'
+
+vmm_hyp_blob.elf: vmm_hyp_blob.elf.full
+ ${OBJCOPY} --strip-debug ${.ALLSRC} ${.TARGET}
+
+vmm_hyp_blob.bin: vmm_hyp_blob.elf
+ ${OBJCOPY} --output-target=binary ${.ALLSRC} ${.TARGET}
+
+vmm_hyp_el2.o: vmm_hyp_blob.bin
+
+.elif ${MACHINE_CPUARCH} == "amd64"
+DPSRCS+= vmx_assym.h svm_assym.h
+DPSRCS+= vmx_genassym.c svm_genassym.c offset.inc
+
+CFLAGS+= -I${SRCTOP}/sys/amd64/vmm/intel
+CFLAGS+= -I${SRCTOP}/sys/amd64/vmm/amd
+
+SRCS+= vmm_host.c \
vmm_ioport.c \
vmm_lapic.c \
- vmm_mem.c \
- vmm_stat.c \
vmm_util.c \
x86.c
-.PATH: ${SRCTOP}/sys/amd64/vmm/io
+.PATH: ${SRCTOP}/sys/${MACHINE}/vmm/io
SRCS+= iommu.c \
ppt.c \
vatpic.c \
@@ -65,10 +102,11 @@
SRCS+= vmm_snapshot.c
.endif
-CLEANFILES= vmx_assym.h vmx_genassym.o svm_assym.h svm_genassym.o
+CLEANFILES+= vmx_assym.h vmx_genassym.o svm_assym.h svm_genassym.o
OBJS_DEPEND_GUESS.vmx_support.o+= vmx_assym.h
OBJS_DEPEND_GUESS.svm_support.o+= svm_assym.h
+.endif
vmx_assym.h: vmx_genassym.o
sh ${SYSDIR}/kern/genassym.sh vmx_genassym.o > ${.TARGET}
@@ -84,6 +122,9 @@
${CC} -c -x assembler-with-cpp -DLOCORE ${CFLAGS} \
${.IMPSRC} -o ${.TARGET}
+hyp_genassym.o: offset.inc
+ ${CC} -c ${CFLAGS:N-flto:N-fno-common} -fcommon ${.IMPSRC}
+
vmx_genassym.o: offset.inc
${CC} -c ${CFLAGS:N-flto:N-fno-common} -fcommon ${.IMPSRC}

File Metadata

Mime Type
text/plain
Expires
Tue, Mar 10, 6:10 PM (16 h, 44 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
29504651
Default Alt Text
D37428.id113252.diff (293 KB)

Event Timeline