D45553.id140216.diff
No OneTemporary
Actions

Size

147 KB

Referenced Files

None

Subscribers

None

D45553.id140216.diff
View Options

	Index: sys/conf/files.riscv
	===================================================================
	--- sys/conf/files.riscv
	+++ sys/conf/files.riscv
	@@ -43,6 +43,7 @@
	riscv/riscv/elf_machdep.c standard
	riscv/riscv/exception.S standard
	riscv/riscv/exec_machdep.c standard
	+riscv/riscv/fpe.c optional vmm
	riscv/riscv/gdb_machdep.c optional gdb
	riscv/riscv/intc.c standard
	riscv/riscv/identcpu.c standard
	@@ -71,6 +72,14 @@
	riscv/riscv/uio_machdep.c standard
	riscv/riscv/unwind.c optional ddb \| kdtrace_hooks \| stack
	riscv/riscv/vm_machdep.c standard
	+riscv/vmm/vmm.c optional vmm
	+riscv/vmm/vmm_aplic.c optional vmm
	+riscv/vmm/vmm_dev.c optional vmm
	+riscv/vmm/vmm_instruction_emul.c optional vmm
	+riscv/vmm/vmm_riscv.c optional vmm
	+riscv/vmm/vmm_sbi.c optional vmm
	+riscv/vmm/vmm_stat.c optional vmm
	+riscv/vmm/vmm_switch.S optional vmm

	# Zstd
	contrib/zstd/lib/freebsd/zstd_kfreebsd.c optional zstdio compile-with ${ZSTD_C}
	Index: sys/conf/kern.mk
	===================================================================
	--- sys/conf/kern.mk
	+++ sys/conf/kern.mk
	@@ -160,7 +160,7 @@
	# code model as "medium" and "medany" respectively.
	#
	.if ${MACHINE_CPUARCH} == "riscv"
	-CFLAGS+= -march=rv64imafdc
	+CFLAGS+= -march=rv64imafdch
	CFLAGS+= -mabi=lp64
	CFLAGS.clang+= -mcmodel=medium
	CFLAGS.gcc+= -mcmodel=medany
	Index: sys/riscv/include/cpu.h
	===================================================================
	--- sys/riscv/include/cpu.h
	+++ sys/riscv/include/cpu.h
	@@ -47,8 +47,6 @@
	#define cpu_spinwait() /* nothing */
	#define cpu_lock_delay() DELAY(1)

	-#ifdef _KERNEL
	-
	/*
	* Core manufacturer IDs, as reported by the mvendorid CSR.
	*/
	@@ -89,6 +87,8 @@
	#define MMU_SV48 0x2 /* 4-level paging */
	#define MMU_SV57 0x4 /* 5-level paging */

	+#ifdef _KERNEL
	+
	extern char btext[];
	extern char etext[];

	Index: sys/riscv/include/elf.h
	===================================================================
	--- sys/riscv/include/elf.h
	+++ sys/riscv/include/elf.h
	@@ -80,6 +80,7 @@
	#define HWCAP_ISA_F HWCAP_ISA_BIT('f')
	#define HWCAP_ISA_D HWCAP_ISA_BIT('d')
	#define HWCAP_ISA_C HWCAP_ISA_BIT('c')
	+#define HWCAP_ISA_H HWCAP_ISA_BIT('h')
	#define HWCAP_ISA_G \
	(HWCAP_ISA_I \| HWCAP_ISA_M \| HWCAP_ISA_A \| HWCAP_ISA_F \| HWCAP_ISA_D)

	Index: sys/riscv/include/md_var.h
	===================================================================
	--- sys/riscv/include/md_var.h
	+++ sys/riscv/include/md_var.h
	@@ -42,6 +42,7 @@
	extern u_int mmu_caps;

	/* Supervisor-mode extension support */
	+extern bool has_hyp;
	extern bool has_sstc;
	extern bool has_sscofpmf;

	Index: sys/riscv/include/riscvreg.h
	===================================================================
	--- sys/riscv/include/riscvreg.h
	+++ sys/riscv/include/riscvreg.h
	@@ -1,5 +1,5 @@
	/*-
	- * Copyright (c) 2015-2017 Ruslan Bukin <br@bsdpad.com>
	+ * Copyright (c) 2015-2024 Ruslan Bukin <br@bsdpad.com>
	* All rights reserved.
	*
	* Portions of this software were developed by SRI International and the
	@@ -47,9 +47,15 @@
	#define SCAUSE_STORE_ACCESS_FAULT 7
	#define SCAUSE_ECALL_USER 8
	#define SCAUSE_ECALL_SUPERVISOR 9
	+#define SCAUSE_VIRTUAL_SUPERVISOR_ECALL 10
	+#define SCAUSE_MACHINE_ECALL 11
	#define SCAUSE_INST_PAGE_FAULT 12
	#define SCAUSE_LOAD_PAGE_FAULT 13
	#define SCAUSE_STORE_PAGE_FAULT 15
	+#define SCAUSE_FETCH_GUEST_PAGE_FAULT 20
	+#define SCAUSE_LOAD_GUEST_PAGE_FAULT 21
	+#define SCAUSE_VIRTUAL_INSTRUCTION 22
	+#define SCAUSE_STORE_GUEST_PAGE_FAULT 23

	#define SSTATUS_UIE (1 << 0)
	#define SSTATUS_SIE (1 << 1)
	@@ -116,6 +122,17 @@
	#define MSTATUS_PRV_H 2 /* hypervisor */
	#define MSTATUS_PRV_M 3 /* machine */

	+#define HSTATUS_VSBE (1 << 5)
	+#define HSTATUS_GVA (1 << 6)
	+#define HSTATUS_SPV (1 << 7)
	+#define HSTATUS_SPVP (1 << 8)
	+#define HSTATUS_HU (1 << 9)
	+#define HSTATUS_VGEIN_S 12
	+#define HSTATUS_VGEIN_M (0xf << HSTATUS_VGEIN_S)
	+#define HSTATUS_VTVM (1 << 20)
	+#define HSTATUS_VTW (1 << 21)
	+#define HSTATUS_VTSR (1 << 22)
	+
	#define MIE_USIE (1 << 0)
	#define MIE_SSIE (1 << 1)
	#define MIE_HSIE (1 << 2)
	@@ -143,10 +160,35 @@

	#define MIP_SEIP (1 << 9)

	+#define HVIP_VSSIP (1 << 2)
	+#define HVIP_VSTIP (1 << 6)
	+#define HVIP_VSEIP (1 << 10)
	+
	+#define HIE_VSSIE (1 << 2)
	+#define HIE_VSTIE (1 << 6)
	+#define HIE_VSEIE (1 << 10)
	+#define HIE_SGEIE (1 << 12)
	+
	/* Note: sip register has no SIP_STIP bit in Spike simulator */
	#define SIP_SSIP (1 << 1)
	#define SIP_STIP (1 << 5)

	+#define HENVCFG_STCE (1UL << 63)
	+#define HENVCFG_PBMTE (1UL << 62)
	+#define HENVCFG_ADUE (1UL << 61)
	+#define HENVCFG_CDE (1UL << 60)
	+#define HENVCFG_PMM_S (1UL << 31)
	+#define HENVCFG_PMM_M (0x3 << HENVCFG_PMM_S)
	+#define HENVCFG_CBZE (1UL << 7)
	+#define HENVCFG_CBCFE (1UL << 6)
	+#define HENVCFG_CBIE_S (1UL << 4)
	+#define HENVCFG_CBIE_M (0x3 << HENVCFG_CBIE_S)
	+#define HENVCFG_FIOM (1UL << 0)
	+
	+#define HCOUNTEREN_CY (1UL << 0) /* Cycle */
	+#define HCOUNTEREN_TM (1UL << 1) /* Time */
	+#define HCOUNTEREN_IR (1UL << 2) /* Instret */
	+
	#define SATP_PPN_S 0
	#define SATP_PPN_M (0xfffffffffffUL << SATP_PPN_S)
	#define SATP_ASID_S 44
	Index: sys/riscv/include/vmm.h
	===================================================================
	--- /dev/null
	+++ sys/riscv/include/vmm.h
	@@ -0,0 +1,323 @@
	+/*
	+ * Copyright (c) 2015 Mihai Carabas <mihai.carabas@gmail.com>
	+ * Copyright (c) 2024 Ruslan Bukin <br@bsdpad.com>
	+ *
	+ * This software was developed by the University of Cambridge Computer
	+ * Laboratory (Department of Computer Science and Technology) under Innovate
	+ * UK project 105694, "Digital Security by Design (DSbD) Technology Platform
	+ * Prototype".
	+ *
	+ * Redistribution and use in source and binary forms, with or without
	+ * modification, are permitted provided that the following conditions
	+ * are met:
	+ * 1. Redistributions of source code must retain the above copyright
	+ * notice, this list of conditions and the following disclaimer.
	+ * 2. Redistributions in binary form must reproduce the above copyright
	+ * notice, this list of conditions and the following disclaimer in the
	+ * documentation and/or other materials provided with the distribution.
	+ *
	+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
	+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
	+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	+ * SUCH DAMAGE.
	+ */
	+
	+#ifndef _VMM_H_
	+#define _VMM_H_
	+
	+#include <sys/param.h>
	+#include <sys/cpuset.h>
	+#include <vm/vm.h>
	+#include <vm/pmap.h>
	+
	+#include "pte.h"
	+#include "pmap.h"
	+
	+struct vcpu;
	+
	+enum vm_suspend_how {
	+ VM_SUSPEND_NONE,
	+ VM_SUSPEND_RESET,
	+ VM_SUSPEND_POWEROFF,
	+ VM_SUSPEND_HALT,
	+ VM_SUSPEND_LAST
	+};
	+
	+/*
	+ * Identifiers for architecturally defined registers.
	+ */
	+enum vm_reg_name {
	+ VM_REG_GUEST_ZERO = 0,
	+ VM_REG_GUEST_RA,
	+ VM_REG_GUEST_SP,
	+ VM_REG_GUEST_GP,
	+ VM_REG_GUEST_TP,
	+ VM_REG_GUEST_T0,
	+ VM_REG_GUEST_T1,
	+ VM_REG_GUEST_T2,
	+ VM_REG_GUEST_S0,
	+ VM_REG_GUEST_S1,
	+ VM_REG_GUEST_A0,
	+ VM_REG_GUEST_A1,
	+ VM_REG_GUEST_A2,
	+ VM_REG_GUEST_A3,
	+ VM_REG_GUEST_A4,
	+ VM_REG_GUEST_A5,
	+ VM_REG_GUEST_A6,
	+ VM_REG_GUEST_A7,
	+ VM_REG_GUEST_S2,
	+ VM_REG_GUEST_S3,
	+ VM_REG_GUEST_S4,
	+ VM_REG_GUEST_S5,
	+ VM_REG_GUEST_S6,
	+ VM_REG_GUEST_S7,
	+ VM_REG_GUEST_S8,
	+ VM_REG_GUEST_S9,
	+ VM_REG_GUEST_S10,
	+ VM_REG_GUEST_S11,
	+ VM_REG_GUEST_T3,
	+ VM_REG_GUEST_T4,
	+ VM_REG_GUEST_T5,
	+ VM_REG_GUEST_T6,
	+ VM_REG_GUEST_SEPC,
	+ VM_REG_LAST
	+};
	+
	+#define VM_INTINFO_VECTOR(info) ((info) & 0xff)
	+#define VM_INTINFO_DEL_ERRCODE 0x800
	+#define VM_INTINFO_RSVD 0x7ffff000
	+#define VM_INTINFO_VALID 0x80000000
	+#define VM_INTINFO_TYPE 0x700
	+#define VM_INTINFO_HWINTR (0 << 8)
	+#define VM_INTINFO_NMI (2 << 8)
	+#define VM_INTINFO_HWEXCEPTION (3 << 8)
	+#define VM_INTINFO_SWINTR (4 << 8)
	+
	+#define VM_MAX_SUFFIXLEN 15
	+
	+#ifdef _KERNEL
	+
	+#define VM_MAX_NAMELEN 32
	+
	+struct vm;
	+struct vm_exception;
	+struct vm_exit;
	+struct vm_run;
	+struct vm_object;
	+struct vm_guest_paging;
	+struct vm_aplic_descr;
	+struct pmap;
	+
	+struct vm_eventinfo {
	+ void rptr; / rendezvous cookie */
	+ int sptr; / suspend cookie */
	+ int iptr; / reqidle cookie */
	+};
	+
	+int vm_create(const char name, struct vm *retvm);
	+struct vcpu vm_alloc_vcpu(struct vm vm, int vcpuid);
	+void vm_slock_vcpus(struct vm *vm);
	+void vm_unlock_vcpus(struct vm *vm);
	+void vm_destroy(struct vm *vm);
	+int vm_reinit(struct vm *vm);
	+const char vm_name(struct vm vm);
	+
	+/*
	+ * APIs that modify the guest memory map require all vcpus to be frozen.
	+ */
	+void vm_slock_memsegs(struct vm *vm);
	+void vm_xlock_memsegs(struct vm *vm);
	+void vm_unlock_memsegs(struct vm *vm);
	+int vm_mmap_memseg(struct vm *vm, vm_paddr_t gpa, int segid, vm_ooffset_t off,
	+ size_t len, int prot, int flags);
	+int vm_munmap_memseg(struct vm *vm, vm_paddr_t gpa, size_t len);
	+int vm_alloc_memseg(struct vm *vm, int ident, size_t len, bool sysmem);
	+void vm_free_memseg(struct vm *vm, int ident);
	+
	+/*
	+ * APIs that inspect the guest memory map require only a single vcpu to
	+ * be frozen. This acts like a read lock on the guest memory map since any
	+ * modification requires all vcpus to be frozen.
	+ */
	+int vm_mmap_getnext(struct vm vm, vm_paddr_t gpa, int *segid,
	+ vm_ooffset_t segoff, size_t len, int prot, int flags);
	+int vm_get_memseg(struct vm vm, int ident, size_t len, bool *sysmem,
	+ struct vm_object **objptr);
	+vm_paddr_t vmm_sysmem_maxaddr(struct vm *vm);
	+void vm_gpa_hold(struct vcpu vcpu, vm_paddr_t gpa, size_t len,
	+ int prot, void **cookie);
	+void vm_gpa_hold_global(struct vm vm, vm_paddr_t gpa, size_t len,
	+ int prot, void **cookie);
	+void vm_gpa_release(void *cookie);
	+bool vm_mem_allocated(struct vcpu *vcpu, vm_paddr_t gpa);
	+
	+int vm_gla2gpa_nofault(struct vcpu vcpu, struct vm_guest_paging paging,
	+ uint64_t gla, int prot, uint64_t gpa, int is_fault);
	+
	+uint16_t vm_get_maxcpus(struct vm *vm);
	+void vm_get_topology(struct vm vm, uint16_t sockets, uint16_t *cores,
	+ uint16_t threads, uint16_t maxcpus);
	+int vm_set_topology(struct vm *vm, uint16_t sockets, uint16_t cores,
	+ uint16_t threads, uint16_t maxcpus);
	+int vm_get_register(struct vcpu vcpu, int reg, uint64_t retval);
	+int vm_set_register(struct vcpu *vcpu, int reg, uint64_t val);
	+int vm_run(struct vcpu *vcpu);
	+int vm_suspend(struct vm *vm, enum vm_suspend_how how);
	+void* vm_get_cookie(struct vm *vm);
	+int vcpu_vcpuid(struct vcpu *vcpu);
	+void vcpu_get_cookie(struct vcpu vcpu);
	+struct vm vcpu_vm(struct vcpu vcpu);
	+struct vcpu vm_vcpu(struct vm vm, int cpu);
	+int vm_get_capability(struct vcpu vcpu, int type, int val);
	+int vm_set_capability(struct vcpu *vcpu, int type, int val);
	+int vm_activate_cpu(struct vcpu *vcpu);
	+int vm_suspend_cpu(struct vm vm, struct vcpu vcpu);
	+int vm_resume_cpu(struct vm vm, struct vcpu vcpu);
	+int vm_inject_exception(struct vcpu *vcpu, uint64_t scause);
	+int vm_attach_aplic(struct vm vm, struct vm_aplic_descr descr);
	+int vm_assert_irq(struct vm *vm, uint32_t irq);
	+int vm_deassert_irq(struct vm *vm, uint32_t irq);
	+int vm_raise_msi(struct vm *vm, uint64_t msg, uint64_t addr, int bus, int slot,
	+ int func);
	+struct vm_exit vm_exitinfo(struct vcpu vcpu);
	+void vm_exit_suspended(struct vcpu *vcpu, uint64_t pc);
	+void vm_exit_debug(struct vcpu *vcpu, uint64_t pc);
	+void vm_exit_rendezvous(struct vcpu *vcpu, uint64_t pc);
	+void vm_exit_astpending(struct vcpu *vcpu, uint64_t pc);
	+
	+cpuset_t vm_active_cpus(struct vm *vm);
	+cpuset_t vm_debug_cpus(struct vm *vm);
	+cpuset_t vm_suspended_cpus(struct vm *vm);
	+
	+static __inline int
	+vcpu_rendezvous_pending(struct vm_eventinfo *info)
	+{
	+
	+ return (((uintptr_t )(info->rptr)) != 0);
	+}
	+
	+static __inline int
	+vcpu_suspended(struct vm_eventinfo *info)
	+{
	+
	+ return (*info->sptr);
	+}
	+
	+int vcpu_debugged(struct vcpu *vcpu);
	+
	+enum vcpu_state {
	+ VCPU_IDLE,
	+ VCPU_FROZEN,
	+ VCPU_RUNNING,
	+ VCPU_SLEEPING,
	+};
	+
	+int vcpu_set_state(struct vcpu *vcpu, enum vcpu_state state, bool from_idle);
	+enum vcpu_state vcpu_get_state(struct vcpu vcpu, int hostcpu);
	+
	+static int __inline
	+vcpu_is_running(struct vcpu vcpu, int hostcpu)
	+{
	+ return (vcpu_get_state(vcpu, hostcpu) == VCPU_RUNNING);
	+}
	+
	+#ifdef _SYS_PROC_H_
	+static int __inline
	+vcpu_should_yield(struct vcpu *vcpu)
	+{
	+ struct thread *td;
	+
	+ td = curthread;
	+ return (td->td_ast != 0 \|\| td->td_owepreempt != 0);
	+}
	+#endif
	+
	+void vcpu_stats(struct vcpu vcpu);
	+void vcpu_notify_event(struct vcpu *vcpu);
	+
	+enum vm_reg_name vm_segment_name(int seg_encoding);
	+
	+#endif /* _KERNEL */
	+
	+#define VM_DIR_READ 0
	+#define VM_DIR_WRITE 1
	+
	+#define VM_GP_M_MASK 0x1f
	+#define VM_GP_MMU_ENABLED (1 << 5)
	+
	+struct vm_guest_paging {
	+ int flags;
	+ int padding;
	+};
	+
	+struct vie {
	+ uint8_t access_size:4, sign_extend:1, dir:1, unused:2;
	+ enum vm_reg_name reg;
	+};
	+
	+struct vre {
	+ uint32_t inst_syndrome;
	+ uint8_t dir:1, unused:7;
	+ enum vm_reg_name reg;
	+};
	+
	+/*
	+ * Identifiers for optional vmm capabilities
	+ */
	+enum vm_cap_type {
	+ VM_CAP_HALT_EXIT,
	+ VM_CAP_MTRAP_EXIT,
	+ VM_CAP_PAUSE_EXIT,
	+ VM_CAP_UNRESTRICTED_GUEST,
	+ VM_CAP_MAX
	+};
	+
	+enum vm_exitcode {
	+ VM_EXITCODE_BOGUS,
	+ VM_EXITCODE_ECALL,
	+ VM_EXITCODE_PAGING,
	+ VM_EXITCODE_SUSPENDED,
	+ VM_EXITCODE_DEBUG,
	+ VM_EXITCODE_INST_EMUL,
	+ VM_EXITCODE_WFI,
	+ VM_EXITCODE_MAX
	+};
	+
	+struct vm_exit {
	+ uint64_t scause;
	+ uint64_t sepc;
	+ uint64_t stval;
	+ uint64_t htval;
	+ uint64_t htinst;
	+ enum vm_exitcode exitcode;
	+ int inst_length;
	+ uint64_t pc;
	+ union {
	+ struct {
	+ uint64_t gpa;
	+ } paging;
	+
	+ struct {
	+ uint64_t gpa;
	+ struct vm_guest_paging paging;
	+ struct vie vie;
	+ } inst_emul;
	+
	+ struct {
	+ uint64_t args[8];
	+ } ecall;
	+
	+ struct {
	+ enum vm_suspend_how how;
	+ } suspended;
	+ } u;
	+};
	+
	+#endif /* _VMM_H_ */
	Index: sys/riscv/include/vmm_dev.h
	===================================================================
	--- /dev/null
	+++ sys/riscv/include/vmm_dev.h
	@@ -0,0 +1,261 @@
	+/*
	+ * Copyright (c) 2015 Mihai Carabas <mihai.carabas@gmail.com>
	+ * Copyright (c) 2024 Ruslan Bukin <br@bsdpad.com>
	+ *
	+ * This software was developed by the University of Cambridge Computer
	+ * Laboratory (Department of Computer Science and Technology) under Innovate
	+ * UK project 105694, "Digital Security by Design (DSbD) Technology Platform
	+ * Prototype".
	+ *
	+ * Redistribution and use in source and binary forms, with or without
	+ * modification, are permitted provided that the following conditions
	+ * are met:
	+ * 1. Redistributions of source code must retain the above copyright
	+ * notice, this list of conditions and the following disclaimer.
	+ * 2. Redistributions in binary form must reproduce the above copyright
	+ * notice, this list of conditions and the following disclaimer in the
	+ * documentation and/or other materials provided with the distribution.
	+ *
	+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
	+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
	+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	+ * SUCH DAMAGE.
	+ */
	+
	+#ifndef _VMM_DEV_H_
	+#define _VMM_DEV_H_
	+
	+#ifdef _KERNEL
	+void vmmdev_init(void);
	+int vmmdev_cleanup(void);
	+#endif
	+
	+struct vm_memmap {
	+ vm_paddr_t gpa;
	+ int segid; /* memory segment */
	+ vm_ooffset_t segoff; /* offset into memory segment */
	+ size_t len; /* mmap length */
	+ int prot; /* RWX */
	+ int flags;
	+};
	+#define VM_MEMMAP_F_WIRED 0x01
	+
	+struct vm_munmap {
	+ vm_paddr_t gpa;
	+ size_t len;
	+};
	+
	+#define VM_MEMSEG_NAME(m) ((m)->name[0] != '\0' ? (m)->name : NULL)
	+struct vm_memseg {
	+ int segid;
	+ size_t len;
	+ char name[VM_MAX_SUFFIXLEN + 1];
	+};
	+
	+struct vm_register {
	+ int cpuid;
	+ int regnum; /* enum vm_reg_name */
	+ uint64_t regval;
	+};
	+
	+struct vm_register_set {
	+ int cpuid;
	+ unsigned int count;
	+ const int regnums; / enum vm_reg_name */
	+ uint64_t *regvals;
	+};
	+
	+struct vm_run {
	+ int cpuid;
	+ cpuset_t cpuset; / CPU set storage */
	+ size_t cpusetsize;
	+ struct vm_exit *vm_exit;
	+};
	+
	+struct vm_exception {
	+ int cpuid;
	+ uint64_t scause;
	+};
	+
	+struct vm_msi {
	+ uint64_t msg;
	+ uint64_t addr;
	+ int bus;
	+ int slot;
	+ int func;
	+};
	+
	+struct vm_capability {
	+ int cpuid;
	+ enum vm_cap_type captype;
	+ int capval;
	+ int allcpus;
	+};
	+
	+#define MAX_VM_STATS 64
	+struct vm_stats {
	+ int cpuid; /* in */
	+ int index; /* in */
	+ int num_entries; /* out */
	+ struct timeval tv;
	+ uint64_t statbuf[MAX_VM_STATS];
	+};
	+struct vm_stat_desc {
	+ int index; /* in */
	+ char desc[128]; /* out */
	+};
	+
	+struct vm_suspend {
	+ enum vm_suspend_how how;
	+};
	+
	+struct vm_gla2gpa {
	+ int vcpuid; /* inputs */
	+ int prot; /* PROT_READ or PROT_WRITE */
	+ uint64_t gla;
	+ struct vm_guest_paging paging;
	+ int fault; /* outputs */
	+ uint64_t gpa;
	+};
	+
	+struct vm_activate_cpu {
	+ int vcpuid;
	+};
	+
	+struct vm_cpuset {
	+ int which;
	+ int cpusetsize;
	+ cpuset_t *cpus;
	+};
	+#define VM_ACTIVE_CPUS 0
	+#define VM_SUSPENDED_CPUS 1
	+#define VM_DEBUG_CPUS 2
	+
	+struct vm_aplic_descr {
	+ uint64_t mem_start;
	+ uint64_t mem_size;
	+};
	+
	+struct vm_irq {
	+ uint32_t irq;
	+};
	+
	+struct vm_cpu_topology {
	+ uint16_t sockets;
	+ uint16_t cores;
	+ uint16_t threads;
	+ uint16_t maxcpus;
	+};
	+
	+enum {
	+ /* general routines */
	+ IOCNUM_ABIVERS = 0,
	+ IOCNUM_RUN = 1,
	+ IOCNUM_SET_CAPABILITY = 2,
	+ IOCNUM_GET_CAPABILITY = 3,
	+ IOCNUM_SUSPEND = 4,
	+ IOCNUM_REINIT = 5,
	+
	+ /* memory apis */
	+ IOCNUM_GET_GPA_PMAP = 12,
	+ IOCNUM_GLA2GPA_NOFAULT = 13,
	+ IOCNUM_ALLOC_MEMSEG = 14,
	+ IOCNUM_GET_MEMSEG = 15,
	+ IOCNUM_MMAP_MEMSEG = 16,
	+ IOCNUM_MMAP_GETNEXT = 17,
	+ IOCNUM_MUNMAP_MEMSEG = 18,
	+
	+ /* register/state accessors */
	+ IOCNUM_SET_REGISTER = 20,
	+ IOCNUM_GET_REGISTER = 21,
	+ IOCNUM_SET_REGISTER_SET = 24,
	+ IOCNUM_GET_REGISTER_SET = 25,
	+
	+ /* statistics */
	+ IOCNUM_VM_STATS = 50,
	+ IOCNUM_VM_STAT_DESC = 51,
	+
	+ /* CPU Topology */
	+ IOCNUM_SET_TOPOLOGY = 63,
	+ IOCNUM_GET_TOPOLOGY = 64,
	+
	+ /* interrupt injection */
	+ IOCNUM_ASSERT_IRQ = 80,
	+ IOCNUM_DEASSERT_IRQ = 81,
	+ IOCNUM_RAISE_MSI = 82,
	+ IOCNUM_INJECT_EXCEPTION = 83,
	+
	+ /* vm_cpuset */
	+ IOCNUM_ACTIVATE_CPU = 90,
	+ IOCNUM_GET_CPUSET = 91,
	+ IOCNUM_SUSPEND_CPU = 92,
	+ IOCNUM_RESUME_CPU = 93,
	+
	+ /* vm_attach_aplic */
	+ IOCNUM_ATTACH_APLIC = 110,
	+};
	+
	+#define VM_RUN \
	+ _IOWR('v', IOCNUM_RUN, struct vm_run)
	+#define VM_SUSPEND \
	+ _IOW('v', IOCNUM_SUSPEND, struct vm_suspend)
	+#define VM_REINIT \
	+ _IO('v', IOCNUM_REINIT)
	+#define VM_ALLOC_MEMSEG \
	+ _IOW('v', IOCNUM_ALLOC_MEMSEG, struct vm_memseg)
	+#define VM_GET_MEMSEG \
	+ _IOWR('v', IOCNUM_GET_MEMSEG, struct vm_memseg)
	+#define VM_MMAP_MEMSEG \
	+ _IOW('v', IOCNUM_MMAP_MEMSEG, struct vm_memmap)
	+#define VM_MMAP_GETNEXT \
	+ _IOWR('v', IOCNUM_MMAP_GETNEXT, struct vm_memmap)
	+#define VM_MUNMAP_MEMSEG \
	+ _IOW('v', IOCNUM_MUNMAP_MEMSEG, struct vm_munmap)
	+#define VM_SET_REGISTER \
	+ _IOW('v', IOCNUM_SET_REGISTER, struct vm_register)
	+#define VM_GET_REGISTER \
	+ _IOWR('v', IOCNUM_GET_REGISTER, struct vm_register)
	+#define VM_SET_REGISTER_SET \
	+ _IOW('v', IOCNUM_SET_REGISTER_SET, struct vm_register_set)
	+#define VM_GET_REGISTER_SET \
	+ _IOWR('v', IOCNUM_GET_REGISTER_SET, struct vm_register_set)
	+#define VM_SET_CAPABILITY \
	+ _IOW('v', IOCNUM_SET_CAPABILITY, struct vm_capability)
	+#define VM_GET_CAPABILITY \
	+ _IOWR('v', IOCNUM_GET_CAPABILITY, struct vm_capability)
	+#define VM_STATS \
	+ _IOWR('v', IOCNUM_VM_STATS, struct vm_stats)
	+#define VM_STAT_DESC \
	+ _IOWR('v', IOCNUM_VM_STAT_DESC, struct vm_stat_desc)
	+#define VM_ASSERT_IRQ \
	+ _IOW('v', IOCNUM_ASSERT_IRQ, struct vm_irq)
	+#define VM_DEASSERT_IRQ \
	+ _IOW('v', IOCNUM_DEASSERT_IRQ, struct vm_irq)
	+#define VM_RAISE_MSI \
	+ _IOW('v', IOCNUM_RAISE_MSI, struct vm_msi)
	+#define VM_INJECT_EXCEPTION \
	+ _IOW('v', IOCNUM_INJECT_EXCEPTION, struct vm_exception)
	+#define VM_SET_TOPOLOGY \
	+ _IOW('v', IOCNUM_SET_TOPOLOGY, struct vm_cpu_topology)
	+#define VM_GET_TOPOLOGY \
	+ _IOR('v', IOCNUM_GET_TOPOLOGY, struct vm_cpu_topology)
	+#define VM_GLA2GPA_NOFAULT \
	+ _IOWR('v', IOCNUM_GLA2GPA_NOFAULT, struct vm_gla2gpa)
	+#define VM_ACTIVATE_CPU \
	+ _IOW('v', IOCNUM_ACTIVATE_CPU, struct vm_activate_cpu)
	+#define VM_GET_CPUS \
	+ _IOW('v', IOCNUM_GET_CPUSET, struct vm_cpuset)
	+#define VM_SUSPEND_CPU \
	+ _IOW('v', IOCNUM_SUSPEND_CPU, struct vm_activate_cpu)
	+#define VM_RESUME_CPU \
	+ _IOW('v', IOCNUM_RESUME_CPU, struct vm_activate_cpu)
	+#define VM_ATTACH_APLIC \
	+ _IOW('v', IOCNUM_ATTACH_APLIC, struct vm_aplic_descr)
	+#endif
	Index: sys/riscv/include/vmm_instruction_emul.h
	===================================================================
	--- /dev/null
	+++ sys/riscv/include/vmm_instruction_emul.h
	@@ -0,0 +1,83 @@
	+/*
	+ * Copyright (c) 2015 Mihai Carabas <mihai.carabas@gmail.com>
	+ * All rights reserved.
	+ *
	+ * Redistribution and use in source and binary forms, with or without
	+ * modification, are permitted provided that the following conditions
	+ * are met:
	+ * 1. Redistributions of source code must retain the above copyright
	+ * notice, this list of conditions and the following disclaimer.
	+ * 2. Redistributions in binary form must reproduce the above copyright
	+ * notice, this list of conditions and the following disclaimer in the
	+ * documentation and/or other materials provided with the distribution.
	+ *
	+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
	+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
	+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	+ * SUCH DAMAGE.
	+ */
	+
	+#ifndef _VMM_INSTRUCTION_EMUL_H_
	+#define _VMM_INSTRUCTION_EMUL_H_
	+
	+/*
	+ * Callback functions to read and write memory regions.
	+ */
	+typedef int (mem_region_read_t)(struct vcpu vcpu, uint64_t gpa,
	+ uint64_t rval, int rsize, void arg);
	+typedef int (mem_region_write_t)(struct vcpu vcpu, uint64_t gpa,
	+ uint64_t wval, int wsize, void *arg);
	+
	+/*
	+ * Callback functions to read and write registers.
	+ */
	+typedef int (reg_read_t)(struct vcpu vcpu, uint64_t rval, void arg);
	+typedef int (reg_write_t)(struct vcpu vcpu, uint64_t wval, void *arg);
	+
	+/*
	+ * Emulate the decoded 'vie' instruction when it contains a memory operation.
	+ *
	+ * The callbacks 'mrr' and 'mrw' emulate reads and writes to the memory region
	+ * containing 'gpa'. 'mrarg' is an opaque argument that is passed into the
	+ * callback functions.
	+ *
	+ * 'void vm' should be 'struct vm ' when called from kernel context and
	+ * 'struct vmctx *' when called from user context.
	+ *
	+ */
	+int vmm_emulate_instruction(struct vcpu vcpu, uint64_t gpa, struct vie vie,
	+ struct vm_guest_paging *paging, mem_region_read_t mrr,
	+ mem_region_write_t mrw, void *mrarg);
	+
	+/*
	+ * Emulate the decoded 'vre' instruction when it contains a register access.
	+ *
	+ * The callbacks 'regread' and 'regwrite' emulate reads and writes to the
	+ * register from 'vie'. 'regarg' is an opaque argument that is passed into the
	+ * callback functions.
	+ *
	+ * 'void vm' should be 'struct vm ' when called from kernel context and
	+ * 'struct vmctx *' when called from user context.
	+ *
	+ */
	+int vmm_emulate_register(struct vcpu vcpu, struct vre vre, reg_read_t regread,
	+ reg_write_t regwrite, void *regarg);
	+
	+#ifdef _KERNEL
	+void vm_register_reg_handler(struct vm *vm, uint64_t iss, uint64_t mask,
	+ reg_read_t reg_read, reg_write_t reg_write, void *arg);
	+void vm_deregister_reg_handler(struct vm *vm, uint64_t iss, uint64_t mask);
	+
	+void vm_register_inst_handler(struct vm *vm, uint64_t start, uint64_t size,
	+ mem_region_read_t mmio_read, mem_region_write_t mmio_write);
	+void vm_deregister_inst_handler(struct vm *vm, uint64_t start, uint64_t size);
	+#endif
	+
	+#endif /* _VMM_INSTRUCTION_EMUL_H_ */
	Index: sys/riscv/include/vmm_snapshot.h
	===================================================================
	--- /dev/null
	+++ sys/riscv/include/vmm_snapshot.h
	@@ -0,0 +1 @@
	+/* $FreeBSD$ */
	Index: sys/riscv/riscv/genassym.c
	===================================================================
	--- sys/riscv/riscv/genassym.c
	+++ sys/riscv/riscv/genassym.c
	@@ -55,6 +55,8 @@
	#include <machine/machdep.h>
	#include <machine/vmparam.h>

	+#include <riscv/vmm/riscv.h>
	+
	ASSYM(KERNBASE, KERNBASE);
	ASSYM(VM_MAXUSER_ADDRESS, VM_MAXUSER_ADDRESS);
	ASSYM(VM_MAX_KERNEL_ADDRESS, VM_MAX_KERNEL_ADDRESS);
	@@ -98,6 +100,32 @@
	ASSYM(TF_SCAUSE, offsetof(struct trapframe, tf_scause));
	ASSYM(TF_SSTATUS, offsetof(struct trapframe, tf_sstatus));

	+ASSYM(HYP_H_RA, offsetof(struct hypctx, host_regs.hyp_ra));
	+ASSYM(HYP_H_SP, offsetof(struct hypctx, host_regs.hyp_sp));
	+ASSYM(HYP_H_GP, offsetof(struct hypctx, host_regs.hyp_gp));
	+ASSYM(HYP_H_TP, offsetof(struct hypctx, host_regs.hyp_tp));
	+ASSYM(HYP_H_T, offsetof(struct hypctx, host_regs.hyp_t));
	+ASSYM(HYP_H_S, offsetof(struct hypctx, host_regs.hyp_s));
	+ASSYM(HYP_H_A, offsetof(struct hypctx, host_regs.hyp_a));
	+ASSYM(HYP_H_SEPC, offsetof(struct hypctx, host_regs.hyp_sepc));
	+ASSYM(HYP_H_SSTATUS, offsetof(struct hypctx, host_regs.hyp_sstatus));
	+ASSYM(HYP_H_HSTATUS, offsetof(struct hypctx, host_regs.hyp_hstatus));
	+ASSYM(HYP_H_SSCRATCH, offsetof(struct hypctx, host_sscratch));
	+ASSYM(HYP_H_STVEC, offsetof(struct hypctx, host_stvec));
	+ASSYM(HYP_H_SCOUNTEREN, offsetof(struct hypctx, host_scounteren));
	+
	+ASSYM(HYP_G_RA, offsetof(struct hypctx, guest_regs.hyp_ra));
	+ASSYM(HYP_G_SP, offsetof(struct hypctx, guest_regs.hyp_sp));
	+ASSYM(HYP_G_GP, offsetof(struct hypctx, guest_regs.hyp_gp));
	+ASSYM(HYP_G_TP, offsetof(struct hypctx, guest_regs.hyp_tp));
	+ASSYM(HYP_G_T, offsetof(struct hypctx, guest_regs.hyp_t));
	+ASSYM(HYP_G_S, offsetof(struct hypctx, guest_regs.hyp_s));
	+ASSYM(HYP_G_A, offsetof(struct hypctx, guest_regs.hyp_a));
	+ASSYM(HYP_G_SEPC, offsetof(struct hypctx, guest_regs.hyp_sepc));
	+ASSYM(HYP_G_SSTATUS, offsetof(struct hypctx, guest_regs.hyp_sstatus));
	+ASSYM(HYP_G_HSTATUS, offsetof(struct hypctx, guest_regs.hyp_hstatus));
	+ASSYM(HYP_G_SCOUNTEREN, offsetof(struct hypctx, guest_scounteren));
	+
	ASSYM(RISCV_BOOTPARAMS_SIZE, sizeof(struct riscv_bootparams));
	ASSYM(RISCV_BOOTPARAMS_KERN_PHYS, offsetof(struct riscv_bootparams, kern_phys));
	ASSYM(RISCV_BOOTPARAMS_KERN_STACK, offsetof(struct riscv_bootparams,
	Index: sys/riscv/riscv/identcpu.c
	===================================================================
	--- sys/riscv/riscv/identcpu.c
	+++ sys/riscv/riscv/identcpu.c
	@@ -72,6 +72,7 @@
	u_int mmu_caps;

	/* Supervisor-mode extension support. */
	+bool has_hyp;
	bool __read_frequently has_sstc;
	bool __read_frequently has_sscofpmf;

	@@ -247,6 +248,7 @@
	case 'c':
	case 'd':
	case 'f':
	+ case 'h':
	case 'i':
	case 'm':
	desc->isa_extensions \|= HWCAP_ISA_BIT(isa[i]);
	@@ -412,6 +414,7 @@
	UPDATE_CAP(mmu_caps, desc->mmu_caps);

	/* Supervisor-mode extension support. */
	+ UPDATE_CAP(has_hyp, (desc->isa_extensions & HWCAP_ISA_H) != 0);
	UPDATE_CAP(has_sstc, (desc->smode_extensions & SV_SSTC) != 0);
	UPDATE_CAP(has_sscofpmf, (desc->smode_extensions & SV_SSCOFPMF) != 0);

	@@ -511,6 +514,7 @@
	"\03Compressed"
	"\04Double"
	"\06Float"
	+ "\10Hypervisor"
	"\15Mult/Div");
	}

	Index: sys/riscv/vmm/riscv.h
	===================================================================
	--- /dev/null
	+++ sys/riscv/vmm/riscv.h
	@@ -0,0 +1,116 @@
	+/*-
	+ * Copyright (c) 2015 Mihai Carabas <mihai.carabas@gmail.com>
	+ * Copyright (c) 2024 Ruslan Bukin <br@bsdpad.com>
	+ *
	+ * This software was developed by the University of Cambridge Computer
	+ * Laboratory (Department of Computer Science and Technology) under Innovate
	+ * UK project 105694, "Digital Security by Design (DSbD) Technology Platform
	+ * Prototype".
	+ *
	+ * Redistribution and use in source and binary forms, with or without
	+ * modification, are permitted provided that the following conditions
	+ * are met:
	+ * 1. Redistributions of source code must retain the above copyright
	+ * notice, this list of conditions and the following disclaimer.
	+ * 2. Redistributions in binary form must reproduce the above copyright
	+ * notice, this list of conditions and the following disclaimer in the
	+ * documentation and/or other materials provided with the distribution.
	+ *
	+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
	+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
	+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	+ * SUCH DAMAGE.
	+ */
	+
	+#ifndef _VMM_RISCV_H_
	+#define _VMM_RISCV_H_
	+
	+#include <machine/reg.h>
	+#include <machine/pcpu.h>
	+#include <machine/vmm.h>
	+
	+struct hypregs {
	+ uint64_t hyp_ra;
	+ uint64_t hyp_sp;
	+ uint64_t hyp_gp;
	+ uint64_t hyp_tp;
	+ uint64_t hyp_t[7];
	+ uint64_t hyp_s[12];
	+ uint64_t hyp_a[8];
	+ uint64_t hyp_sepc;
	+ uint64_t hyp_sstatus;
	+ uint64_t hyp_hstatus;
	+};
	+
	+struct hypcsr {
	+ uint64_t hvip;
	+ uint64_t vsstatus;
	+ uint64_t vsie;
	+ uint64_t vstvec;
	+ uint64_t vsscratch;
	+ uint64_t vsepc;
	+ uint64_t vscause;
	+ uint64_t vstval;
	+ uint64_t vsatp;
	+ uint64_t scounteren;
	+ uint64_t senvcfg;
	+};
	+
	+struct hypctx {
	+ struct hypregs host_regs;
	+ struct hypregs guest_regs;
	+ struct hypcsr guest_csrs;
	+ uint64_t host_sscratch;
	+ uint64_t host_stvec;
	+ uint64_t host_scounteren;
	+ uint64_t guest_scounteren;
	+ struct hyp *hyp;
	+ struct vcpu *vcpu;
	+ bool has_exception;
	+};
	+
	+struct hyp {
	+ struct vm *vm;
	+ uint64_t vmid_generation;
	+ bool aplic_attached;
	+ struct aplic *aplic;
	+ struct hypctx *ctx[];
	+};
	+
	+#define DEFINE_VMMOPS_IFUNC(ret_type, opname, args) \
	+ ret_type vmmops_##opname args;
	+
	+DEFINE_VMMOPS_IFUNC(int, modinit, (void))
	+DEFINE_VMMOPS_IFUNC(int, modcleanup, (void))
	+DEFINE_VMMOPS_IFUNC(void , init, (struct vm vm, struct pmap *pmap))
	+DEFINE_VMMOPS_IFUNC(int, gla2gpa, (void vcpui, struct vm_guest_paging paging,
	+ uint64_t gla, int prot, uint64_t gpa, int is_fault))
	+DEFINE_VMMOPS_IFUNC(int, run, (void vcpui, register_t pc, struct pmap pmap,
	+ struct vm_eventinfo *info))
	+DEFINE_VMMOPS_IFUNC(void, cleanup, (void *vmi))
	+DEFINE_VMMOPS_IFUNC(void , vcpu_init, (void vmi, struct vcpu *vcpu,
	+ int vcpu_id))
	+DEFINE_VMMOPS_IFUNC(void, vcpu_cleanup, (void *vcpui))
	+DEFINE_VMMOPS_IFUNC(int, exception, (void *vcpui, uint64_t scause))
	+DEFINE_VMMOPS_IFUNC(int, getreg, (void vcpui, int num, uint64_t retval))
	+DEFINE_VMMOPS_IFUNC(int, setreg, (void *vcpui, int num, uint64_t val))
	+DEFINE_VMMOPS_IFUNC(int, getcap, (void vcpui, int num, int retval))
	+DEFINE_VMMOPS_IFUNC(int, setcap, (void *vcpui, int num, int val))
	+DEFINE_VMMOPS_IFUNC(struct vmspace *, vmspace_alloc, (vm_offset_t min,
	+ vm_offset_t max))
	+DEFINE_VMMOPS_IFUNC(void, vmspace_free, (struct vmspace *vmspace))
	+
	+#define dprintf(fmt, ...)
	+
	+struct hypctx *riscv_get_active_vcpu(void);
	+void vmm_switch(struct hypctx *);
	+int vmm_sbi_ecall(struct vcpu , bool );
	+
	+#endif /* !_VMM_RISCV_H_ */
	Index: sys/riscv/vmm/vmm.c
	===================================================================
	--- /dev/null
	+++ sys/riscv/vmm/vmm.c
	@@ -0,0 +1,1541 @@
	+/*-
	+ * Copyright (c) 2015 Mihai Carabas <mihai.carabas@gmail.com>
	+ * Copyright (c) 2024 Ruslan Bukin <br@bsdpad.com>
	+ *
	+ * This software was developed by the University of Cambridge Computer
	+ * Laboratory (Department of Computer Science and Technology) under Innovate
	+ * UK project 105694, "Digital Security by Design (DSbD) Technology Platform
	+ * Prototype".
	+ *
	+ * Redistribution and use in source and binary forms, with or without
	+ * modification, are permitted provided that the following conditions
	+ * are met:
	+ * 1. Redistributions of source code must retain the above copyright
	+ * notice, this list of conditions and the following disclaimer.
	+ * 2. Redistributions in binary form must reproduce the above copyright
	+ * notice, this list of conditions and the following disclaimer in the
	+ * documentation and/or other materials provided with the distribution.
	+ *
	+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
	+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
	+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	+ * SUCH DAMAGE.
	+ */
	+
	+#include <sys/param.h>
	+#include <sys/systm.h>
	+#include <sys/cpuset.h>
	+#include <sys/kernel.h>
	+#include <sys/linker.h>
	+#include <sys/lock.h>
	+#include <sys/malloc.h>
	+#include <sys/module.h>
	+#include <sys/mutex.h>
	+#include <sys/pcpu.h>
	+#include <sys/proc.h>
	+#include <sys/queue.h>
	+#include <sys/rwlock.h>
	+#include <sys/sched.h>
	+#include <sys/smp.h>
	+#include <sys/sysctl.h>
	+
	+#include <vm/vm.h>
	+#include <vm/vm_object.h>
	+#include <vm/vm_page.h>
	+#include <vm/pmap.h>
	+#include <vm/vm_map.h>
	+#include <vm/vm_extern.h>
	+#include <vm/vm_param.h>
	+
	+#include <machine/riscvreg.h>
	+#include <machine/cpu.h>
	+#include <machine/fpe.h>
	+#include <machine/machdep.h>
	+#include <machine/pcb.h>
	+#include <machine/smp.h>
	+#include <machine/vm.h>
	+#include <machine/vmparam.h>
	+#include <machine/vmm.h>
	+#include <machine/vmm_dev.h>
	+#include <machine/vmm_instruction_emul.h>
	+
	+#include <dev/pci/pcireg.h>
	+
	+#include "vmm_ktr.h"
	+#include "vmm_stat.h"
	+#include "riscv.h"
	+
	+#include "vmm_aplic.h"
	+
	+struct vcpu {
	+ int flags;
	+ enum vcpu_state state;
	+ struct mtx mtx;
	+ int hostcpu; /* host cpuid this vcpu last ran on */
	+ int vcpuid;
	+ void *stats;
	+ struct vm_exit exitinfo;
	+ uint64_t nextpc; /* (x) next instruction to execute */
	+ struct vm vm; / (o) */
	+ void cookie; / (i) cpu-specific data */
	+ struct fpreg guestfpu; / (a,i) guest fpu state */
	+};
	+
	+#define vcpu_lock_initialized(v) mtx_initialized(&((v)->mtx))
	+#define vcpu_lock_init(v) mtx_init(&((v)->mtx), "vcpu lock", 0, MTX_SPIN)
	+#define vcpu_lock_destroy(v) mtx_destroy(&((v)->mtx))
	+#define vcpu_lock(v) mtx_lock_spin(&((v)->mtx))
	+#define vcpu_unlock(v) mtx_unlock_spin(&((v)->mtx))
	+#define vcpu_assert_locked(v) mtx_assert(&((v)->mtx), MA_OWNED)
	+
	+struct mem_seg {
	+ uint64_t gpa;
	+ size_t len;
	+ bool wired;
	+ bool sysmem;
	+ vm_object_t object;
	+};
	+#define VM_MAX_MEMSEGS 3
	+
	+struct mem_map {
	+ vm_paddr_t gpa;
	+ size_t len;
	+ vm_ooffset_t segoff;
	+ int segid;
	+ int prot;
	+ int flags;
	+};
	+#define VM_MAX_MEMMAPS 4
	+
	+struct vmm_mmio_region {
	+ uint64_t start;
	+ uint64_t end;
	+ mem_region_read_t read;
	+ mem_region_write_t write;
	+};
	+#define VM_MAX_MMIO_REGIONS 4
	+
	+/*
	+ * Initialization:
	+ * (o) initialized the first time the VM is created
	+ * (i) initialized when VM is created and when it is reinitialized
	+ * (x) initialized before use
	+ */
	+struct vm {
	+ void cookie; / (i) cpu-specific data */
	+ volatile cpuset_t active_cpus; /* (i) active vcpus */
	+ volatile cpuset_t debug_cpus; /* (i) vcpus stopped for debug*/
	+ int suspend; /* (i) stop VM execution */
	+ volatile cpuset_t suspended_cpus; /* (i) suspended vcpus */
	+ volatile cpuset_t halted_cpus; /* (x) cpus in a hard halt */
	+ struct mem_map mem_maps[VM_MAX_MEMMAPS]; /* (i) guest address space */
	+ struct mem_seg mem_segs[VM_MAX_MEMSEGS]; /* (o) guest memory regions */
	+ struct vmspace vmspace; / (o) guest's address space */
	+ char name[VM_MAX_NAMELEN]; /* (o) virtual machine name */
	+ struct vcpu *vcpu; / (i) guest vcpus */
	+ struct vmm_mmio_region mmio_region[VM_MAX_MMIO_REGIONS];
	+ /* (o) guest MMIO regions */
	+ /* The following describe the vm cpu topology */
	+ uint16_t sockets; /* (o) num of sockets */
	+ uint16_t cores; /* (o) num of cores/socket */
	+ uint16_t threads; /* (o) num of threads/core */
	+ uint16_t maxcpus; /* (o) max pluggable cpus */
	+ struct sx mem_segs_lock; /* (o) */
	+ struct sx vcpus_init_lock; /* (o) */
	+};
	+
	+static bool vmm_initialized = false;
	+
	+static MALLOC_DEFINE(M_VMM, "vmm", "vmm");
	+
	+/* statistics */
	+static VMM_STAT(VCPU_TOTAL_RUNTIME, "vcpu total runtime");
	+
	+SYSCTL_NODE(_hw, OID_AUTO, vmm, CTLFLAG_RW, NULL, NULL);
	+
	+static int vmm_ipinum;
	+SYSCTL_INT(_hw_vmm, OID_AUTO, ipinum, CTLFLAG_RD, &vmm_ipinum, 0,
	+ "IPI vector used for vcpu notifications");
	+
	+u_int vm_maxcpu;
	+SYSCTL_UINT(_hw_vmm, OID_AUTO, maxcpu, CTLFLAG_RDTUN \| CTLFLAG_NOFETCH,
	+ &vm_maxcpu, 0, "Maximum number of vCPUs");
	+
	+static void vm_free_memmap(struct vm *vm, int ident);
	+static bool sysmem_mapping(struct vm vm, struct mem_map mm);
	+static void vcpu_notify_event_locked(struct vcpu *vcpu);
	+
	+/*
	+ * Upper limit on vm_maxcpu. We could increase this to 28 bits, but this
	+ * is a safe value for now.
	+ */
	+#define VM_MAXCPU MIN(0xffff - 1, CPU_SETSIZE)
	+
	+static void
	+vcpu_cleanup(struct vcpu *vcpu, bool destroy)
	+{
	+ vmmops_vcpu_cleanup(vcpu->cookie);
	+ vcpu->cookie = NULL;
	+ if (destroy) {
	+ vmm_stat_free(vcpu->stats);
	+ fpu_save_area_free(vcpu->guestfpu);
	+ vcpu_lock_destroy(vcpu);
	+ }
	+}
	+
	+static struct vcpu *
	+vcpu_alloc(struct vm *vm, int vcpu_id)
	+{
	+ struct vcpu *vcpu;
	+
	+ KASSERT(vcpu_id >= 0 && vcpu_id < vm->maxcpus,
	+ ("vcpu_alloc: invalid vcpu %d", vcpu_id));
	+
	+ vcpu = malloc(sizeof(*vcpu), M_VMM, M_WAITOK \| M_ZERO);
	+ vcpu_lock_init(vcpu);
	+ vcpu->state = VCPU_IDLE;
	+ vcpu->hostcpu = NOCPU;
	+ vcpu->vcpuid = vcpu_id;
	+ vcpu->vm = vm;
	+ vcpu->guestfpu = fpu_save_area_alloc();
	+ vcpu->stats = vmm_stat_alloc();
	+ return (vcpu);
	+}
	+
	+static void
	+vcpu_init(struct vcpu *vcpu)
	+{
	+ vcpu->cookie = vmmops_vcpu_init(vcpu->vm->cookie, vcpu, vcpu->vcpuid);
	+ MPASS(vcpu->cookie != NULL);
	+ fpu_save_area_reset(vcpu->guestfpu);
	+ vmm_stat_init(vcpu->stats);
	+}
	+
	+struct vm_exit *
	+vm_exitinfo(struct vcpu *vcpu)
	+{
	+ return (&vcpu->exitinfo);
	+}
	+
	+static int
	+vmm_init(void)
	+{
	+
	+ vm_maxcpu = mp_ncpus;
	+
	+ TUNABLE_INT_FETCH("hw.vmm.maxcpu", &vm_maxcpu);
	+
	+ if (vm_maxcpu > VM_MAXCPU) {
	+ printf("vmm: vm_maxcpu clamped to %u\n", VM_MAXCPU);
	+ vm_maxcpu = VM_MAXCPU;
	+ }
	+
	+ if (vm_maxcpu == 0)
	+ vm_maxcpu = 1;
	+
	+ return (vmmops_modinit());
	+}
	+
	+static int
	+vmm_handler(module_t mod, int what, void *arg)
	+{
	+ int error;
	+
	+ switch (what) {
	+ case MOD_LOAD:
	+ /* TODO: check if has_hyp here? */
	+ vmmdev_init();
	+ error = vmm_init();
	+ if (error == 0)
	+ vmm_initialized = true;
	+ break;
	+ case MOD_UNLOAD:
	+ /* TODO: check if has_hyp here? */
	+ error = vmmdev_cleanup();
	+ if (error == 0 && vmm_initialized) {
	+ error = vmmops_modcleanup();
	+ if (error)
	+ vmm_initialized = false;
	+ }
	+ break;
	+ default:
	+ error = 0;
	+ break;
	+ }
	+ return (error);
	+}
	+
	+static moduledata_t vmm_kmod = {
	+ "vmm",
	+ vmm_handler,
	+ NULL
	+};
	+
	+/*
	+ * vmm initialization has the following dependencies:
	+ *
	+ * - HYP initialization requires smp_rendezvous() and therefore must happen
	+ * after SMP is fully functional (after SI_SUB_SMP).
	+ */
	+DECLARE_MODULE(vmm, vmm_kmod, SI_SUB_SMP + 1, SI_ORDER_ANY);
	+MODULE_VERSION(vmm, 1);
	+
	+static void
	+vm_init(struct vm *vm, bool create)
	+{
	+ int i;
	+
	+ vm->cookie = vmmops_init(vm, vmspace_pmap(vm->vmspace));
	+ MPASS(vm->cookie != NULL);
	+
	+ CPU_ZERO(&vm->active_cpus);
	+ CPU_ZERO(&vm->debug_cpus);
	+
	+ vm->suspend = 0;
	+ CPU_ZERO(&vm->suspended_cpus);
	+
	+ memset(vm->mmio_region, 0, sizeof(vm->mmio_region));
	+
	+ if (!create) {
	+ for (i = 0; i < vm->maxcpus; i++) {
	+ if (vm->vcpu[i] != NULL)
	+ vcpu_init(vm->vcpu[i]);
	+ }
	+ }
	+}
	+
	+struct vcpu *
	+vm_alloc_vcpu(struct vm *vm, int vcpuid)
	+{
	+ struct vcpu *vcpu;
	+
	+ if (vcpuid < 0 \|\| vcpuid >= vm_get_maxcpus(vm))
	+ return (NULL);
	+
	+ /* Some interrupt controllers may have a CPU limit */
	+ if (vcpuid >= aplic_max_cpu_count(vm->cookie))
	+ return (NULL);
	+
	+ vcpu = atomic_load_ptr(&vm->vcpu[vcpuid]);
	+ if (__predict_true(vcpu != NULL))
	+ return (vcpu);
	+
	+ sx_xlock(&vm->vcpus_init_lock);
	+ vcpu = vm->vcpu[vcpuid];
	+ if (vcpu == NULL/* && !vm->dying*/) {
	+ vcpu = vcpu_alloc(vm, vcpuid);
	+ vcpu_init(vcpu);
	+
	+ /*
	+ * Ensure vCPU is fully created before updating pointer
	+ * to permit unlocked reads above.
	+ */
	+ atomic_store_rel_ptr((uintptr_t *)&vm->vcpu[vcpuid],
	+ (uintptr_t)vcpu);
	+ }
	+ sx_xunlock(&vm->vcpus_init_lock);
	+ return (vcpu);
	+}
	+
	+void
	+vm_slock_vcpus(struct vm *vm)
	+{
	+ sx_slock(&vm->vcpus_init_lock);
	+}
	+
	+void
	+vm_unlock_vcpus(struct vm *vm)
	+{
	+ sx_unlock(&vm->vcpus_init_lock);
	+}
	+
	+int
	+vm_create(const char name, struct vm *retvm)
	+{
	+ struct vm *vm;
	+ struct vmspace *vmspace;
	+
	+ /*
	+ * If vmm.ko could not be successfully initialized then don't attempt
	+ * to create the virtual machine.
	+ */
	+ if (!vmm_initialized)
	+ return (ENXIO);
	+
	+ if (name == NULL \|\| strlen(name) >= VM_MAX_NAMELEN)
	+ return (EINVAL);
	+
	+ vmspace = vmmops_vmspace_alloc(0, 1ul << 39);
	+ if (vmspace == NULL)
	+ return (ENOMEM);
	+
	+ vm = malloc(sizeof(struct vm), M_VMM, M_WAITOK \| M_ZERO);
	+ strcpy(vm->name, name);
	+ vm->vmspace = vmspace;
	+ sx_init(&vm->mem_segs_lock, "vm mem_segs");
	+ sx_init(&vm->vcpus_init_lock, "vm vcpus");
	+
	+ vm->sockets = 1;
	+ vm->cores = 1; /* XXX backwards compatibility */
	+ vm->threads = 1; /* XXX backwards compatibility */
	+ vm->maxcpus = vm_maxcpu;
	+
	+ vm->vcpu = malloc(sizeof(vm->vcpu) vm->maxcpus, M_VMM,
	+ M_WAITOK \| M_ZERO);
	+
	+ vm_init(vm, true);
	+
	+ *retvm = vm;
	+ return (0);
	+}
	+
	+void
	+vm_get_topology(struct vm vm, uint16_t sockets, uint16_t *cores,
	+ uint16_t threads, uint16_t maxcpus)
	+{
	+ *sockets = vm->sockets;
	+ *cores = vm->cores;
	+ *threads = vm->threads;
	+ *maxcpus = vm->maxcpus;
	+}
	+
	+uint16_t
	+vm_get_maxcpus(struct vm *vm)
	+{
	+ return (vm->maxcpus);
	+}
	+
	+int
	+vm_set_topology(struct vm *vm, uint16_t sockets, uint16_t cores,
	+ uint16_t threads, uint16_t maxcpus)
	+{
	+ /* Ignore maxcpus. */
	+ if ((sockets * cores * threads) > vm->maxcpus)
	+ return (EINVAL);
	+ vm->sockets = sockets;
	+ vm->cores = cores;
	+ vm->threads = threads;
	+ return(0);
	+}
	+
	+static void
	+vm_cleanup(struct vm *vm, bool destroy)
	+{
	+ struct mem_map *mm;
	+ int i;
	+
	+ aplic_detach_from_vm(vm->cookie);
	+
	+ for (i = 0; i < vm->maxcpus; i++) {
	+ if (vm->vcpu[i] != NULL)
	+ vcpu_cleanup(vm->vcpu[i], destroy);
	+ }
	+
	+ vmmops_cleanup(vm->cookie);
	+
	+ /*
	+ * System memory is removed from the guest address space only when
	+ * the VM is destroyed. This is because the mapping remains the same
	+ * across VM reset.
	+ *
	+ * Device memory can be relocated by the guest (e.g. using PCI BARs)
	+ * so those mappings are removed on a VM reset.
	+ */
	+ if (!destroy) {
	+ for (i = 0; i < VM_MAX_MEMMAPS; i++) {
	+ mm = &vm->mem_maps[i];
	+ if (destroy \|\| !sysmem_mapping(vm, mm))
	+ vm_free_memmap(vm, i);
	+ }
	+ }
	+
	+ if (destroy) {
	+ for (i = 0; i < VM_MAX_MEMSEGS; i++)
	+ vm_free_memseg(vm, i);
	+
	+ vmmops_vmspace_free(vm->vmspace);
	+ vm->vmspace = NULL;
	+
	+ for (i = 0; i < vm->maxcpus; i++)
	+ free(vm->vcpu[i], M_VMM);
	+ free(vm->vcpu, M_VMM);
	+ sx_destroy(&vm->vcpus_init_lock);
	+ sx_destroy(&vm->mem_segs_lock);
	+ }
	+}
	+
	+void
	+vm_destroy(struct vm *vm)
	+{
	+
	+ vm_cleanup(vm, true);
	+
	+ free(vm, M_VMM);
	+}
	+
	+int
	+vm_reinit(struct vm *vm)
	+{
	+ int error;
	+
	+ /*
	+ * A virtual machine can be reset only if all vcpus are suspended.
	+ */
	+ if (CPU_CMP(&vm->suspended_cpus, &vm->active_cpus) == 0) {
	+ vm_cleanup(vm, false);
	+ vm_init(vm, false);
	+ error = 0;
	+ } else {
	+ error = EBUSY;
	+ }
	+
	+ return (error);
	+}
	+
	+const char *
	+vm_name(struct vm *vm)
	+{
	+ return (vm->name);
	+}
	+
	+void
	+vm_slock_memsegs(struct vm *vm)
	+{
	+ sx_slock(&vm->mem_segs_lock);
	+}
	+
	+void
	+vm_xlock_memsegs(struct vm *vm)
	+{
	+ sx_xlock(&vm->mem_segs_lock);
	+}
	+
	+void
	+vm_unlock_memsegs(struct vm *vm)
	+{
	+ sx_unlock(&vm->mem_segs_lock);
	+}
	+
	+/*
	+ * Return 'true' if 'gpa' is allocated in the guest address space.
	+ *
	+ * This function is called in the context of a running vcpu which acts as
	+ * an implicit lock on 'vm->mem_maps[]'.
	+ */
	+bool
	+vm_mem_allocated(struct vcpu *vcpu, vm_paddr_t gpa)
	+{
	+ struct vm *vm = vcpu->vm;
	+ struct mem_map *mm;
	+ int i;
	+
	+#ifdef INVARIANTS
	+ int hostcpu, state;
	+ state = vcpu_get_state(vcpu, &hostcpu);
	+ KASSERT(state == VCPU_RUNNING && hostcpu == curcpu,
	+ ("%s: invalid vcpu state %d/%d", __func__, state, hostcpu));
	+#endif
	+
	+ for (i = 0; i < VM_MAX_MEMMAPS; i++) {
	+ mm = &vm->mem_maps[i];
	+ if (mm->len != 0 && gpa >= mm->gpa && gpa < mm->gpa + mm->len)
	+ return (true); /* 'gpa' is sysmem or devmem */
	+ }
	+
	+ return (false);
	+}
	+
	+int
	+vm_alloc_memseg(struct vm *vm, int ident, size_t len, bool sysmem)
	+{
	+ struct mem_seg *seg;
	+ vm_object_t obj;
	+
	+ sx_assert(&vm->mem_segs_lock, SX_XLOCKED);
	+
	+ if (ident < 0 \|\| ident >= VM_MAX_MEMSEGS)
	+ return (EINVAL);
	+
	+ if (len == 0 \|\| (len & PAGE_MASK))
	+ return (EINVAL);
	+
	+ seg = &vm->mem_segs[ident];
	+ if (seg->object != NULL) {
	+ if (seg->len == len && seg->sysmem == sysmem)
	+ return (EEXIST);
	+ else
	+ return (EINVAL);
	+ }
	+
	+ obj = vm_object_allocate(OBJT_DEFAULT, len >> PAGE_SHIFT);
	+ if (obj == NULL)
	+ return (ENOMEM);
	+
	+ seg->len = len;
	+ seg->object = obj;
	+ seg->sysmem = sysmem;
	+ return (0);
	+}
	+
	+int
	+vm_get_memseg(struct vm vm, int ident, size_t len, bool *sysmem,
	+ vm_object_t *objptr)
	+{
	+ struct mem_seg *seg;
	+
	+ sx_assert(&vm->mem_segs_lock, SX_LOCKED);
	+
	+ if (ident < 0 \|\| ident >= VM_MAX_MEMSEGS)
	+ return (EINVAL);
	+
	+ seg = &vm->mem_segs[ident];
	+ if (len)
	+ *len = seg->len;
	+ if (sysmem)
	+ *sysmem = seg->sysmem;
	+ if (objptr)
	+ *objptr = seg->object;
	+ return (0);
	+}
	+
	+void
	+vm_free_memseg(struct vm *vm, int ident)
	+{
	+ struct mem_seg *seg;
	+
	+ KASSERT(ident >= 0 && ident < VM_MAX_MEMSEGS,
	+ ("%s: invalid memseg ident %d", __func__, ident));
	+
	+ seg = &vm->mem_segs[ident];
	+ if (seg->object != NULL) {
	+ vm_object_deallocate(seg->object);
	+ bzero(seg, sizeof(struct mem_seg));
	+ }
	+}
	+
	+int
	+vm_mmap_memseg(struct vm *vm, vm_paddr_t gpa, int segid, vm_ooffset_t first,
	+ size_t len, int prot, int flags)
	+{
	+ struct mem_seg *seg;
	+ struct mem_map m, map;
	+ vm_ooffset_t last;
	+ int i, error;
	+
	+ dprintf("%s: gpa %lx first %lx len %lx\n", __func__, gpa, first, len);
	+
	+ if (prot == 0 \|\| (prot & ~(VM_PROT_ALL)) != 0)
	+ return (EINVAL);
	+
	+ if (flags & ~VM_MEMMAP_F_WIRED)
	+ return (EINVAL);
	+
	+ if (segid < 0 \|\| segid >= VM_MAX_MEMSEGS)
	+ return (EINVAL);
	+
	+ seg = &vm->mem_segs[segid];
	+ if (seg->object == NULL)
	+ return (EINVAL);
	+
	+ last = first + len;
	+ if (first < 0 \|\| first >= last \|\| last > seg->len)
	+ return (EINVAL);
	+
	+ if ((gpa \| first \| last) & PAGE_MASK)
	+ return (EINVAL);
	+
	+ map = NULL;
	+ for (i = 0; i < VM_MAX_MEMMAPS; i++) {
	+ m = &vm->mem_maps[i];
	+ if (m->len == 0) {
	+ map = m;
	+ break;
	+ }
	+ }
	+
	+ if (map == NULL)
	+ return (ENOSPC);
	+
	+ error = vm_map_find(&vm->vmspace->vm_map, seg->object, first, &gpa,
	+ len, 0, VMFS_NO_SPACE, prot, prot, 0);
	+ if (error != KERN_SUCCESS)
	+ return (EFAULT);
	+
	+ vm_object_reference(seg->object);
	+
	+ if (flags & VM_MEMMAP_F_WIRED) {
	+ error = vm_map_wire(&vm->vmspace->vm_map, gpa, gpa + len,
	+ VM_MAP_WIRE_USER \| VM_MAP_WIRE_NOHOLES);
	+ if (error != KERN_SUCCESS) {
	+ vm_map_remove(&vm->vmspace->vm_map, gpa, gpa + len);
	+ return (error == KERN_RESOURCE_SHORTAGE ? ENOMEM :
	+ EFAULT);
	+ }
	+ }
	+
	+ map->gpa = gpa;
	+ map->len = len;
	+ map->segoff = first;
	+ map->segid = segid;
	+ map->prot = prot;
	+ map->flags = flags;
	+ return (0);
	+}
	+
	+int
	+vm_munmap_memseg(struct vm *vm, vm_paddr_t gpa, size_t len)
	+{
	+ struct mem_map *m;
	+ int i;
	+
	+ dprintf("%s: gpa %lx len %lx\n", __func__, gpa, len);
	+
	+ for (i = 0; i < VM_MAX_MEMMAPS; i++) {
	+ m = &vm->mem_maps[i];
	+ if (m->gpa == gpa && m->len == len) {
	+ vm_free_memmap(vm, i);
	+ return (0);
	+ }
	+ }
	+
	+ return (EINVAL);
	+}
	+
	+int
	+vm_mmap_getnext(struct vm vm, vm_paddr_t gpa, int *segid,
	+ vm_ooffset_t segoff, size_t len, int prot, int flags)
	+{
	+ struct mem_map mm, mmnext;
	+ int i;
	+
	+ mmnext = NULL;
	+ for (i = 0; i < VM_MAX_MEMMAPS; i++) {
	+ mm = &vm->mem_maps[i];
	+ if (mm->len == 0 \|\| mm->gpa < *gpa)
	+ continue;
	+ if (mmnext == NULL \|\| mm->gpa < mmnext->gpa)
	+ mmnext = mm;
	+ }
	+
	+ if (mmnext != NULL) {
	+ *gpa = mmnext->gpa;
	+ if (segid)
	+ *segid = mmnext->segid;
	+ if (segoff)
	+ *segoff = mmnext->segoff;
	+ if (len)
	+ *len = mmnext->len;
	+ if (prot)
	+ *prot = mmnext->prot;
	+ if (flags)
	+ *flags = mmnext->flags;
	+ return (0);
	+ } else {
	+ return (ENOENT);
	+ }
	+}
	+
	+static void
	+vm_free_memmap(struct vm *vm, int ident)
	+{
	+ struct mem_map *mm;
	+ int error __diagused;
	+
	+ mm = &vm->mem_maps[ident];
	+ if (mm->len) {
	+ error = vm_map_remove(&vm->vmspace->vm_map, mm->gpa,
	+ mm->gpa + mm->len);
	+ KASSERT(error == KERN_SUCCESS, ("%s: vm_map_remove error %d",
	+ __func__, error));
	+ bzero(mm, sizeof(struct mem_map));
	+ }
	+}
	+
	+static __inline bool
	+sysmem_mapping(struct vm vm, struct mem_map mm)
	+{
	+
	+ if (mm->len != 0 && vm->mem_segs[mm->segid].sysmem)
	+ return (true);
	+ else
	+ return (false);
	+}
	+
	+vm_paddr_t
	+vmm_sysmem_maxaddr(struct vm *vm)
	+{
	+ struct mem_map *mm;
	+ vm_paddr_t maxaddr;
	+ int i;
	+
	+ maxaddr = 0;
	+ for (i = 0; i < VM_MAX_MEMMAPS; i++) {
	+ mm = &vm->mem_maps[i];
	+ if (sysmem_mapping(vm, mm)) {
	+ if (maxaddr < mm->gpa + mm->len)
	+ maxaddr = mm->gpa + mm->len;
	+ }
	+ }
	+ return (maxaddr);
	+}
	+
	+int
	+vm_gla2gpa_nofault(struct vcpu vcpu, struct vm_guest_paging paging,
	+ uint64_t gla, int prot, uint64_t gpa, int is_fault)
	+{
	+
	+ vmmops_gla2gpa(vcpu->cookie, paging, gla, prot, gpa, is_fault);
	+ return (0);
	+}
	+
	+void
	+vm_register_inst_handler(struct vm *vm, uint64_t start, uint64_t size,
	+ mem_region_read_t mmio_read, mem_region_write_t mmio_write)
	+{
	+ int i;
	+
	+ for (i = 0; i < nitems(vm->mmio_region); i++) {
	+ if (vm->mmio_region[i].start == 0 &&
	+ vm->mmio_region[i].end == 0) {
	+ vm->mmio_region[i].start = start;
	+ vm->mmio_region[i].end = start + size;
	+ vm->mmio_region[i].read = mmio_read;
	+ vm->mmio_region[i].write = mmio_write;
	+ return;
	+ }
	+ }
	+
	+ panic("%s: No free MMIO region", __func__);
	+}
	+
	+void
	+vm_deregister_inst_handler(struct vm *vm, uint64_t start, uint64_t size)
	+{
	+ int i;
	+
	+ for (i = 0; i < nitems(vm->mmio_region); i++) {
	+ if (vm->mmio_region[i].start == start &&
	+ vm->mmio_region[i].end == start + size) {
	+ memset(&vm->mmio_region[i], 0,
	+ sizeof(vm->mmio_region[i]));
	+ return;
	+ }
	+ }
	+
	+ panic("%s: Invalid MMIO region: %lx - %lx", __func__, start,
	+ start + size);
	+}
	+
	+static int
	+vm_handle_inst_emul(struct vcpu vcpu, bool retu)
	+{
	+ struct vm *vm;
	+ struct vm_exit *vme;
	+ struct vie *vie;
	+ struct hyp *hyp;
	+ uint64_t fault_ipa;
	+ struct vm_guest_paging *paging;
	+ struct vmm_mmio_region *vmr;
	+ int error, i;
	+
	+ vm = vcpu->vm;
	+ hyp = vm->cookie;
	+ if (!hyp->aplic_attached)
	+ goto out_user;
	+
	+ vme = &vcpu->exitinfo;
	+ vie = &vme->u.inst_emul.vie;
	+ paging = &vme->u.inst_emul.paging;
	+
	+ fault_ipa = vme->u.inst_emul.gpa;
	+
	+ vmr = NULL;
	+ for (i = 0; i < nitems(vm->mmio_region); i++) {
	+ if (vm->mmio_region[i].start <= fault_ipa &&
	+ vm->mmio_region[i].end > fault_ipa) {
	+ vmr = &vm->mmio_region[i];
	+ break;
	+ }
	+ }
	+ if (vmr == NULL)
	+ goto out_user;
	+
	+ error = vmm_emulate_instruction(vcpu, fault_ipa, vie, paging,
	+ vmr->read, vmr->write, retu);
	+ return (error);
	+
	+out_user:
	+ *retu = true;
	+ return (0);
	+}
	+
	+int
	+vm_suspend(struct vm *vm, enum vm_suspend_how how)
	+{
	+ int i;
	+
	+ if (how <= VM_SUSPEND_NONE \|\| how >= VM_SUSPEND_LAST)
	+ return (EINVAL);
	+
	+ if (atomic_cmpset_int(&vm->suspend, 0, how) == 0) {
	+ VM_CTR2(vm, "virtual machine already suspended %d/%d",
	+ vm->suspend, how);
	+ return (EALREADY);
	+ }
	+
	+ VM_CTR1(vm, "virtual machine successfully suspended %d", how);
	+
	+ /*
	+ * Notify all active vcpus that they are now suspended.
	+ */
	+ for (i = 0; i < vm->maxcpus; i++) {
	+ if (CPU_ISSET(i, &vm->active_cpus))
	+ vcpu_notify_event(vm_vcpu(vm, i));
	+ }
	+
	+ return (0);
	+}
	+
	+void
	+vm_exit_suspended(struct vcpu *vcpu, uint64_t pc)
	+{
	+ struct vm *vm = vcpu->vm;
	+ struct vm_exit *vmexit;
	+
	+ KASSERT(vm->suspend > VM_SUSPEND_NONE && vm->suspend < VM_SUSPEND_LAST,
	+ ("vm_exit_suspended: invalid suspend type %d", vm->suspend));
	+
	+ vmexit = vm_exitinfo(vcpu);
	+ vmexit->pc = pc;
	+ vmexit->inst_length = 4;
	+ vmexit->exitcode = VM_EXITCODE_SUSPENDED;
	+ vmexit->u.suspended.how = vm->suspend;
	+}
	+
	+void
	+vm_exit_debug(struct vcpu *vcpu, uint64_t pc)
	+{
	+ struct vm_exit *vmexit;
	+
	+ vmexit = vm_exitinfo(vcpu);
	+ vmexit->pc = pc;
	+ vmexit->inst_length = 4;
	+ vmexit->exitcode = VM_EXITCODE_DEBUG;
	+}
	+
	+int
	+vm_activate_cpu(struct vcpu *vcpu)
	+{
	+ struct vm *vm = vcpu->vm;
	+
	+ if (CPU_ISSET(vcpu->vcpuid, &vm->active_cpus))
	+ return (EBUSY);
	+
	+ CPU_SET_ATOMIC(vcpu->vcpuid, &vm->active_cpus);
	+ return (0);
	+
	+}
	+
	+int
	+vm_suspend_cpu(struct vm vm, struct vcpu vcpu)
	+{
	+ if (vcpu == NULL) {
	+ vm->debug_cpus = vm->active_cpus;
	+ for (int i = 0; i < vm->maxcpus; i++) {
	+ if (CPU_ISSET(i, &vm->active_cpus))
	+ vcpu_notify_event(vm_vcpu(vm, i));
	+ }
	+ } else {
	+ if (!CPU_ISSET(vcpu->vcpuid, &vm->active_cpus))
	+ return (EINVAL);
	+
	+ CPU_SET_ATOMIC(vcpu->vcpuid, &vm->debug_cpus);
	+ vcpu_notify_event(vcpu);
	+ }
	+ return (0);
	+}
	+
	+int
	+vm_resume_cpu(struct vm vm, struct vcpu vcpu)
	+{
	+
	+ if (vcpu == NULL) {
	+ CPU_ZERO(&vm->debug_cpus);
	+ } else {
	+ if (!CPU_ISSET(vcpu->vcpuid, &vm->debug_cpus))
	+ return (EINVAL);
	+
	+ CPU_CLR_ATOMIC(vcpu->vcpuid, &vm->debug_cpus);
	+ }
	+ return (0);
	+}
	+
	+int
	+vcpu_debugged(struct vcpu *vcpu)
	+{
	+
	+ return (CPU_ISSET(vcpu->vcpuid, &vcpu->vm->debug_cpus));
	+}
	+
	+cpuset_t
	+vm_active_cpus(struct vm *vm)
	+{
	+
	+ return (vm->active_cpus);
	+}
	+
	+cpuset_t
	+vm_debug_cpus(struct vm *vm)
	+{
	+
	+ return (vm->debug_cpus);
	+}
	+
	+cpuset_t
	+vm_suspended_cpus(struct vm *vm)
	+{
	+
	+ return (vm->suspended_cpus);
	+}
	+
	+
	+void *
	+vcpu_stats(struct vcpu *vcpu)
	+{
	+
	+ return (vcpu->stats);
	+}
	+
	+/*
	+ * This function is called to ensure that a vcpu "sees" a pending event
	+ * as soon as possible:
	+ * - If the vcpu thread is sleeping then it is woken up.
	+ * - If the vcpu is running on a different host_cpu then an IPI will be directed
	+ * to the host_cpu to cause the vcpu to trap into the hypervisor.
	+ */
	+static void
	+vcpu_notify_event_locked(struct vcpu *vcpu)
	+{
	+ int hostcpu;
	+
	+ hostcpu = vcpu->hostcpu;
	+ if (vcpu->state == VCPU_RUNNING) {
	+ KASSERT(hostcpu != NOCPU, ("vcpu running on invalid hostcpu"));
	+ if (hostcpu != curcpu) {
	+ ipi_cpu(hostcpu, vmm_ipinum);
	+ } else {
	+ /*
	+ * If the 'vcpu' is running on 'curcpu' then it must
	+ * be sending a notification to itself (e.g. SELF_IPI).
	+ * The pending event will be picked up when the vcpu
	+ * transitions back to guest context.
	+ */
	+ }
	+ } else {
	+ KASSERT(hostcpu == NOCPU, ("vcpu state %d not consistent "
	+ "with hostcpu %d", vcpu->state, hostcpu));
	+ if (vcpu->state == VCPU_SLEEPING)
	+ wakeup_one(vcpu);
	+ }
	+}
	+
	+void
	+vcpu_notify_event(struct vcpu *vcpu)
	+{
	+ vcpu_lock(vcpu);
	+ vcpu_notify_event_locked(vcpu);
	+ vcpu_unlock(vcpu);
	+}
	+
	+static void
	+restore_guest_fpustate(struct vcpu *vcpu)
	+{
	+
	+ /* Flush host state to the pcb. */
	+ fpe_state_save(curthread);
	+
	+ /* Ensure the VFP state will be re-loaded when exiting the guest. */
	+ PCPU_SET(fpcurthread, NULL);
	+
	+ /* restore guest FPU state */
	+ fpe_enable();
	+ fpe_restore(vcpu->guestfpu);
	+
	+ /*
	+ * The FPU is now "dirty" with the guest's state so turn on emulation
	+ * to trap any access to the FPU by the host.
	+ */
	+ fpe_disable();
	+}
	+
	+static void
	+save_guest_fpustate(struct vcpu *vcpu)
	+{
	+
	+ /* Save guest FPE state. */
	+ fpe_enable();
	+ fpe_store(vcpu->guestfpu);
	+ fpe_disable();
	+
	+ KASSERT(PCPU_GET(fpcurthread) == NULL,
	+ ("%s: fpcurthread set with guest registers", __func__));
	+}
	+
	+static int
	+vcpu_set_state_locked(struct vcpu *vcpu, enum vcpu_state newstate,
	+ bool from_idle)
	+{
	+ int error;
	+
	+ vcpu_assert_locked(vcpu);
	+
	+ /*
	+ * State transitions from the vmmdev_ioctl() must always begin from
	+ * the VCPU_IDLE state. This guarantees that there is only a single
	+ * ioctl() operating on a vcpu at any point.
	+ */
	+ if (from_idle) {
	+ while (vcpu->state != VCPU_IDLE) {
	+ vcpu_notify_event_locked(vcpu);
	+ msleep_spin(&vcpu->state, &vcpu->mtx, "vmstat",
	+ hz / 1000);
	+ }
	+ } else {
	+ KASSERT(vcpu->state != VCPU_IDLE, ("invalid transition from "
	+ "vcpu idle state"));
	+ }
	+
	+ if (vcpu->state == VCPU_RUNNING) {
	+ KASSERT(vcpu->hostcpu == curcpu, ("curcpu %d and hostcpu %d "
	+ "mismatch for running vcpu", curcpu, vcpu->hostcpu));
	+ } else {
	+ KASSERT(vcpu->hostcpu == NOCPU, ("Invalid hostcpu %d for a "
	+ "vcpu that is not running", vcpu->hostcpu));
	+ }
	+
	+ /*
	+ * The following state transitions are allowed:
	+ * IDLE -> FROZEN -> IDLE
	+ * FROZEN -> RUNNING -> FROZEN
	+ * FROZEN -> SLEEPING -> FROZEN
	+ */
	+ switch (vcpu->state) {
	+ case VCPU_IDLE:
	+ case VCPU_RUNNING:
	+ case VCPU_SLEEPING:
	+ error = (newstate != VCPU_FROZEN);
	+ break;
	+ case VCPU_FROZEN:
	+ error = (newstate == VCPU_FROZEN);
	+ break;
	+ default:
	+ error = 1;
	+ break;
	+ }
	+
	+ if (error)
	+ return (EBUSY);
	+
	+ vcpu->state = newstate;
	+ if (newstate == VCPU_RUNNING)
	+ vcpu->hostcpu = curcpu;
	+ else
	+ vcpu->hostcpu = NOCPU;
	+
	+ if (newstate == VCPU_IDLE)
	+ wakeup(&vcpu->state);
	+
	+ return (0);
	+}
	+
	+static void
	+vcpu_require_state(struct vcpu *vcpu, enum vcpu_state newstate)
	+{
	+ int error;
	+
	+ if ((error = vcpu_set_state(vcpu, newstate, false)) != 0)
	+ panic("Error %d setting state to %d\n", error, newstate);
	+}
	+
	+static void
	+vcpu_require_state_locked(struct vcpu *vcpu, enum vcpu_state newstate)
	+{
	+ int error;
	+
	+ if ((error = vcpu_set_state_locked(vcpu, newstate, false)) != 0)
	+ panic("Error %d setting state to %d", error, newstate);
	+}
	+
	+int
	+vm_get_capability(struct vcpu vcpu, int type, int retval)
	+{
	+
	+ if (type < 0 \|\| type >= VM_CAP_MAX)
	+ return (EINVAL);
	+
	+ return (vmmops_getcap(vcpu->cookie, type, retval));
	+}
	+
	+int
	+vm_set_capability(struct vcpu *vcpu, int type, int val)
	+{
	+
	+ if (type < 0 \|\| type >= VM_CAP_MAX)
	+ return (EINVAL);
	+
	+ return (vmmops_setcap(vcpu->cookie, type, val));
	+}
	+
	+struct vm *
	+vcpu_vm(struct vcpu *vcpu)
	+{
	+
	+ return (vcpu->vm);
	+}
	+
	+int
	+vcpu_vcpuid(struct vcpu *vcpu)
	+{
	+
	+ return (vcpu->vcpuid);
	+}
	+
	+void *
	+vcpu_get_cookie(struct vcpu *vcpu)
	+{
	+
	+ return (vcpu->cookie);
	+}
	+
	+struct vcpu *
	+vm_vcpu(struct vm *vm, int vcpuid)
	+{
	+
	+ return (vm->vcpu[vcpuid]);
	+}
	+
	+int
	+vcpu_set_state(struct vcpu *vcpu, enum vcpu_state newstate, bool from_idle)
	+{
	+ int error;
	+
	+ vcpu_lock(vcpu);
	+ error = vcpu_set_state_locked(vcpu, newstate, from_idle);
	+ vcpu_unlock(vcpu);
	+
	+ return (error);
	+}
	+
	+enum vcpu_state
	+vcpu_get_state(struct vcpu vcpu, int hostcpu)
	+{
	+ enum vcpu_state state;
	+
	+ vcpu_lock(vcpu);
	+ state = vcpu->state;
	+ if (hostcpu != NULL)
	+ *hostcpu = vcpu->hostcpu;
	+ vcpu_unlock(vcpu);
	+
	+ return (state);
	+}
	+
	+static void *
	+_vm_gpa_hold(struct vm *vm, vm_paddr_t gpa, size_t len, int reqprot,
	+ void **cookie)
	+{
	+ int i, count, pageoff;
	+ struct mem_map *mm;
	+ vm_page_t m;
	+
	+ pageoff = gpa & PAGE_MASK;
	+ if (len > PAGE_SIZE - pageoff)
	+ panic("vm_gpa_hold: invalid gpa/len: 0x%016lx/%lu", gpa, len);
	+
	+ count = 0;
	+ for (i = 0; i < VM_MAX_MEMMAPS; i++) {
	+ mm = &vm->mem_maps[i];
	+ if (sysmem_mapping(vm, mm) && gpa >= mm->gpa &&
	+ gpa < mm->gpa + mm->len) {
	+ count = vm_fault_quick_hold_pages(&vm->vmspace->vm_map,
	+ trunc_page(gpa), PAGE_SIZE, reqprot, &m, 1);
	+ break;
	+ }
	+ }
	+
	+ if (count == 1) {
	+ *cookie = m;
	+ return ((void *)(PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)) + pageoff));
	+ } else {
	+ *cookie = NULL;
	+ return (NULL);
	+ }
	+}
	+
	+void *
	+vm_gpa_hold(struct vcpu *vcpu, vm_paddr_t gpa, size_t len, int reqprot,
	+ void **cookie)
	+{
	+#ifdef INVARIANTS
	+ /*
	+ * The current vcpu should be frozen to ensure 'vm_memmap[]'
	+ * stability.
	+ */
	+ int state = vcpu_get_state(vcpu, NULL);
	+ KASSERT(state == VCPU_FROZEN, ("%s: invalid vcpu state %d",
	+ __func__, state));
	+#endif
	+ return (_vm_gpa_hold(vcpu->vm, gpa, len, reqprot, cookie));
	+}
	+
	+void *
	+vm_gpa_hold_global(struct vm *vm, vm_paddr_t gpa, size_t len, int reqprot,
	+ void **cookie)
	+{
	+ sx_assert(&vm->mem_segs_lock, SX_LOCKED);
	+ return (_vm_gpa_hold(vm, gpa, len, reqprot, cookie));
	+}
	+
	+void
	+vm_gpa_release(void *cookie)
	+{
	+ vm_page_t m = cookie;
	+
	+ vm_page_unwire(m, PQ_ACTIVE);
	+}
	+
	+int
	+vm_get_register(struct vcpu vcpu, int reg, uint64_t retval)
	+{
	+
	+ if (reg >= VM_REG_LAST)
	+ return (EINVAL);
	+
	+ return (vmmops_getreg(vcpu->cookie, reg, retval));
	+}
	+
	+int
	+vm_set_register(struct vcpu *vcpu, int reg, uint64_t val)
	+{
	+ int error;
	+
	+ if (reg >= VM_REG_LAST)
	+ return (EINVAL);
	+ error = vmmops_setreg(vcpu->cookie, reg, val);
	+ if (error \|\| reg != VM_REG_GUEST_SEPC)
	+ return (error);
	+
	+ vcpu->nextpc = val;
	+
	+ return (0);
	+}
	+
	+void *
	+vm_get_cookie(struct vm *vm)
	+{
	+
	+ return (vm->cookie);
	+}
	+
	+int
	+vm_inject_exception(struct vcpu *vcpu, uint64_t scause)
	+{
	+
	+ return (vmmops_exception(vcpu->cookie, scause));
	+}
	+
	+int
	+vm_attach_aplic(struct vm vm, struct vm_aplic_descr descr)
	+{
	+
	+ return (aplic_attach_to_vm(vm->cookie, descr));
	+}
	+
	+int
	+vm_assert_irq(struct vm *vm, uint32_t irq)
	+{
	+
	+ return (aplic_inject_irq(vm->cookie, -1, irq, true));
	+}
	+
	+int
	+vm_deassert_irq(struct vm *vm, uint32_t irq)
	+{
	+
	+ return (aplic_inject_irq(vm->cookie, -1, irq, false));
	+}
	+
	+int
	+vm_raise_msi(struct vm *vm, uint64_t msg, uint64_t addr, int bus, int slot,
	+ int func)
	+{
	+
	+ return (aplic_inject_msi(vm->cookie, msg, addr));
	+}
	+
	+static int
	+vm_handle_wfi(struct vcpu vcpu, struct vm_exit vme, bool *retu)
	+{
	+
	+ vcpu_lock(vcpu);
	+
	+ while (1) {
	+ if (aplic_check_pending(vcpu->cookie))
	+ break;
	+
	+ if (vcpu_should_yield(vcpu))
	+ break;
	+
	+ vcpu_require_state_locked(vcpu, VCPU_SLEEPING);
	+ /*
	+ * XXX msleep_spin() cannot be interrupted by signals so
	+ * wake up periodically to check pending signals.
	+ */
	+ msleep_spin(vcpu, &vcpu->mtx, "vmidle", hz / 1000);
	+ vcpu_require_state_locked(vcpu, VCPU_FROZEN);
	+ }
	+ vcpu_unlock(vcpu);
	+
	+ *retu = false;
	+
	+ return (0);
	+}
	+
	+static int
	+vm_handle_paging(struct vcpu vcpu, bool retu)
	+{
	+ struct vm *vm;
	+ struct vm_exit *vme;
	+ struct vm_map *map;
	+ uint64_t addr;
	+ pmap_t pmap;
	+ int ftype, rv;
	+
	+ vm = vcpu->vm;
	+ vme = &vcpu->exitinfo;
	+
	+ pmap = vmspace_pmap(vm->vmspace);
	+ addr = (vme->htval << 2) & ~(PAGE_SIZE - 1);
	+
	+ dprintf("%s: %lx\n", __func__, addr);
	+
	+ switch (vme->scause) {
	+ case SCAUSE_STORE_GUEST_PAGE_FAULT:
	+ ftype = VM_PROT_WRITE;
	+ break;
	+ case SCAUSE_FETCH_GUEST_PAGE_FAULT:
	+ ftype = VM_PROT_EXECUTE;
	+ break;
	+ case SCAUSE_LOAD_GUEST_PAGE_FAULT:
	+ ftype = VM_PROT_READ;
	+ break;
	+ default:
	+ panic("unknown page trap: %lu", vme->scause);
	+ }
	+
	+ /* The page exists, but the page table needs to be updated. */
	+ if (pmap_fault(pmap, addr, ftype) != KERN_SUCCESS) {
	+ //printf("%s: pmap_fault failed\n", __func__);
	+ return (0);
	+ }
	+
	+ map = &vm->vmspace->vm_map;
	+ rv = vm_fault(map, addr, ftype, VM_FAULT_NORMAL, NULL);
	+ if (rv != KERN_SUCCESS) {
	+ printf("%s: vm_fault failed, addr %lx, ftype %d, err %d\n",
	+ __func__, addr, ftype, rv);
	+ return (EFAULT);
	+ }
	+
	+ return (0);
	+}
	+
	+int
	+vm_run(struct vcpu *vcpu)
	+{
	+ struct vm_eventinfo evinfo;
	+ struct vm_exit *vme;
	+ struct vm *vm;
	+ struct hypctx *hypctx;
	+ pmap_t pmap;
	+ int error;
	+ int vcpuid;
	+ int i;
	+ bool retu;
	+
	+ vm = vcpu->vm;
	+
	+ dprintf("%s\n", __func__);
	+
	+ vcpuid = vcpu->vcpuid;
	+
	+ if (!CPU_ISSET(vcpuid, &vm->active_cpus))
	+ return (EINVAL);
	+
	+ if (CPU_ISSET(vcpuid, &vm->suspended_cpus))
	+ return (EINVAL);
	+
	+ pmap = vmspace_pmap(vm->vmspace);
	+ vme = &vcpu->exitinfo;
	+ evinfo.rptr = NULL;
	+ evinfo.sptr = &vm->suspend;
	+ evinfo.iptr = NULL;
	+restart:
	+ critical_enter();
	+
	+ restore_guest_fpustate(vcpu);
	+
	+ vcpu_require_state(vcpu, VCPU_RUNNING);
	+ error = vmmops_run(vcpu->cookie, vcpu->nextpc, pmap, &evinfo);
	+ vcpu_require_state(vcpu, VCPU_FROZEN);
	+
	+ save_guest_fpustate(vcpu);
	+
	+ critical_exit();
	+
	+ if (error == 0) {
	+ retu = false;
	+ switch (vme->exitcode) {
	+ case VM_EXITCODE_INST_EMUL:
	+ vcpu->nextpc = vme->pc + vme->inst_length;
	+ error = vm_handle_inst_emul(vcpu, &retu);
	+ break;
	+ case VM_EXITCODE_WFI:
	+ vcpu->nextpc = vme->pc + vme->inst_length;
	+ error = vm_handle_wfi(vcpu, vme, &retu);
	+ break;
	+ case VM_EXITCODE_ECALL:
	+ /* Handle in userland. */
	+ vcpu->nextpc = vme->pc + vme->inst_length;
	+ error = vmm_sbi_ecall(vcpu, &retu);
	+ if (retu == true) {
	+ hypctx = vcpu_get_cookie(vcpu);
	+ for (i = 0; i < nitems(vme->u.ecall.args); i++)
	+ vme->u.ecall.args[i] =
	+ hypctx->guest_regs.hyp_a[i];
	+ }
	+ break;
	+ case VM_EXITCODE_PAGING:
	+ vcpu->nextpc = vme->pc;
	+ error = vm_handle_paging(vcpu, &retu);
	+ break;
	+ default:
	+ /* Handle in userland. */
	+ vcpu->nextpc = vme->pc;
	+ retu = true;
	+ break;
	+ }
	+ }
	+
	+ if (error == 0 && retu == false)
	+ goto restart;
	+
	+ return (error);
	+}
	Index: sys/riscv/vmm/vmm_aplic.h
	===================================================================
	--- /dev/null
	+++ sys/riscv/vmm/vmm_aplic.h
	@@ -0,0 +1,52 @@
	+/*-
	+ * Copyright (c) 2024 Ruslan Bukin <br@bsdpad.com>
	+ *
	+ * This software was developed by the University of Cambridge Computer
	+ * Laboratory (Department of Computer Science and Technology) under Innovate
	+ * UK project 105694, "Digital Security by Design (DSbD) Technology Platform
	+ * Prototype".
	+ *
	+ * Redistribution and use in source and binary forms, with or without
	+ * modification, are permitted provided that the following conditions
	+ * are met:
	+ * 1. Redistributions of source code must retain the above copyright
	+ * notice, this list of conditions and the following disclaimer.
	+ * 2. Redistributions in binary form must reproduce the above copyright
	+ * notice, this list of conditions and the following disclaimer in the
	+ * documentation and/or other materials provided with the distribution.
	+ *
	+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
	+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
	+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	+ * SUCH DAMAGE.
	+ */
	+
	+#ifndef _VMM_APLIC_H_
	+#define _VMM_APLIC_H_
	+
	+struct hyp;
	+struct hypctx;
	+struct vm_aplic_descr;
	+
	+int aplic_attach_to_vm(struct hyp hyp, struct vm_aplic_descr descr);
	+void aplic_detach_from_vm(struct hyp *hyp);
	+int aplic_inject_irq(struct hyp *hyp, int vcpuid, uint32_t irqid, bool level);
	+int aplic_inject_msi(struct hyp *hyp, uint64_t msg, uint64_t addr);
	+void aplic_vminit(struct hyp *hyp);
	+void aplic_vmcleanup(struct hyp *hyp);
	+int aplic_check_pending(struct hypctx *hypctx);
	+
	+void aplic_cpuinit(struct hypctx *hypctx);
	+void aplic_cpucleanup(struct hypctx *hypctx);
	+void aplic_flush_hwstate(struct hypctx *hypctx);
	+void aplic_sync_hwstate(struct hypctx *hypctx);
	+int aplic_max_cpu_count(struct hyp *hyp);
	+
	+#endif /* !_VMM_APLIC_H_ */
	Index: sys/riscv/vmm/vmm_aplic.c
	===================================================================
	--- /dev/null
	+++ sys/riscv/vmm/vmm_aplic.c
	@@ -0,0 +1,461 @@
	+/*-
	+ * Copyright (c) 2024 Ruslan Bukin <br@bsdpad.com>
	+ *
	+ * This software was developed by the University of Cambridge Computer
	+ * Laboratory (Department of Computer Science and Technology) under Innovate
	+ * UK project 105694, "Digital Security by Design (DSbD) Technology Platform
	+ * Prototype".
	+ *
	+ * Redistribution and use in source and binary forms, with or without
	+ * modification, are permitted provided that the following conditions
	+ * are met:
	+ * 1. Redistributions of source code must retain the above copyright
	+ * notice, this list of conditions and the following disclaimer.
	+ * 2. Redistributions in binary form must reproduce the above copyright
	+ * notice, this list of conditions and the following disclaimer in the
	+ * documentation and/or other materials provided with the distribution.
	+ *
	+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
	+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
	+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	+ * SUCH DAMAGE.
	+ */
	+
	+#include <sys/types.h>
	+#include <sys/errno.h>
	+#include <sys/systm.h>
	+#include <sys/bus.h>
	+#include <sys/kernel.h>
	+#include <sys/lock.h>
	+#include <sys/malloc.h>
	+#include <sys/module.h>
	+#include <sys/mutex.h>
	+#include <sys/rman.h>
	+#include <sys/smp.h>
	+
	+#include <riscv/vmm/riscv.h>
	+#include <riscv/vmm/vmm_aplic.h>
	+
	+#include <machine/vmm_instruction_emul.h>
	+#include <machine/vmm_dev.h>
	+
	+MALLOC_DEFINE(M_APLIC, "RISC-V VMM APLIC", "RISC-V AIA APLIC");
	+
	+#define APLIC_DOMAINCFG 0x0000
	+#define DOMAINCFG_IE (1 << 8) /* Interrupt Enable. */
	+#define DOMAINCFG_DM (1 << 2) /* Direct Mode. */
	+#define DOMAINCFG_BE (1 << 0) /* Big-Endian. */
	+#define APLIC_SOURCECFG(x) (0x0004 + ((x) - 1) * 4)
	+#define SOURCECFG_D (1 << 10) /* D - Delegate. */
	+/* If D == 0. */
	+#define SOURCECFG_SM_S (0)
	+#define SOURCECFG_SM_M (0x7 << SOURCECFG_SM_S)
	+#define SOURCECFG_SM_INACTIVE (0) /* Not delegated. */
	+#define SOURCECFG_SM_DETACHED (1)
	+#define SOURCECFG_SM_RESERVED (2)
	+#define SOURCECFG_SM_RESERVED1 (3)
	+#define SOURCECFG_SM_EDGE1 (4) /* Rising edge. */
	+#define SOURCECFG_SM_EDGE0 (5) /* Falling edge. */
	+#define SOURCECFG_SM_LEVEL1 (6) /* High. */
	+#define SOURCECFG_SM_LEVEL0 (7) /* Low. */
	+/* If D == 1. */
	+#define SOURCECFG_CHILD_INDEX_S (0)
	+#define SOURCECFG_CHILD_INDEX_M (0x3ff << SOURCECFG_CHILD_INDEX_S)
	+#define APLIC_SETIPNUM 0x1cdc
	+#define APLIC_CLRIPNUM 0x1ddc
	+#define APLIC_SETIENUM 0x1edc
	+#define APLIC_CLRIENUM 0x1fdc
	+#define APLIC_GENMSI 0x3000
	+#define APLIC_TARGET(x) (0x3004 + ((x) - 1) * 4)
	+#define APLIC_IDC(x) (0x4000 + (x) * 32)
	+#define IDC_IDELIVERY(x) (APLIC_IDC(x) + 0x0)
	+#define IDC_IFORCE(x) (APLIC_IDC(x) + 0x4)
	+#define IDC_ITHRESHOLD(x) (APLIC_IDC(x) + 0x8)
	+#define IDC_TOPI(x) (APLIC_IDC(x) + 0x18)
	+#define IDC_CLAIMI(x) (APLIC_IDC(x) + 0x1C)
	+#define CLAIMI_IRQ_S (16)
	+#define CLAIMI_IRQ_M (0x3ff << CLAIMI_IRQ_S)
	+#define CLAIMI_PRIO_S (0)
	+#define CLAIMI_PRIO_M (0xff << CLAIMI_PRIO_S)
	+
	+struct aplic_irq {
	+ uint32_t sourcecfg;
	+ uint32_t state;
	+#define APLIC_IRQ_STATE_PENDING (1 << 0)
	+#define APLIC_IRQ_STATE_ENABLED (1 << 1)
	+ uint32_t target;
	+};
	+
	+struct aplic {
	+ uint32_t mem_start;
	+ uint32_t mem_end;
	+ struct mtx mtx;
	+ struct aplic_irq *irqs;
	+ int nirqs;
	+ uint32_t domaincfg;
	+};
	+
	+static int
	+aplic_handle_sourcecfg(struct aplic aplic, int i, bool write, uint64_t val)
	+{
	+ struct aplic_irq *irq;
	+
	+ irq = &aplic->irqs[i];
	+ if (write)
	+ irq->sourcecfg = *val;
	+ else
	+ *val = irq->sourcecfg;
	+
	+ return (0);
	+}
	+
	+static int
	+aplic_set_enabled(struct aplic aplic, bool write, uint64_t val, bool enabled)
	+{
	+ struct aplic_irq *irq;
	+ int i;
	+
	+ if (!write) {
	+ *val = 0;
	+ return (0);
	+ }
	+
	+ i = *val;
	+ if (i <= 0 \|\| i > aplic->nirqs)
	+ return (-1);
	+
	+ irq = &aplic->irqs[i];
	+
	+ if (enabled)
	+ irq->state \|= APLIC_IRQ_STATE_ENABLED;
	+ else
	+ irq->state &= ~APLIC_IRQ_STATE_ENABLED;
	+
	+ return (0);
	+}
	+
	+static int
	+aplic_handle_target(struct aplic aplic, int i, bool write, uint64_t val)
	+{
	+
	+ printf("%s: i %d\n", __func__, i);
	+
	+ return (0);
	+}
	+
	+static int
	+aplic_handle_idc_claimi(struct aplic aplic, int cpu, bool write, uint64_t val)
	+{
	+ struct aplic_irq *irq;
	+ int i;
	+
	+ /* Writes to claimi are ignored. */
	+ if (write)
	+ return (-1);
	+
	+ for (i = 0; i < aplic->nirqs; i++) {
	+ irq = &aplic->irqs[i];
	+ if (irq->state & APLIC_IRQ_STATE_PENDING) {
	+ *val = (i << CLAIMI_IRQ_S) \| (0 << CLAIMI_PRIO_S);
	+ irq->state &= ~APLIC_IRQ_STATE_PENDING;
	+ return (0);
	+ }
	+ }
	+
	+ panic("claimi without pending");
	+
	+ return (0);
	+}
	+
	+static int
	+aplic_handle_idc(struct aplic *aplic, int cpu, int reg, bool write,
	+ uint64_t *val)
	+{
	+ int error;
	+
	+ switch (reg + APLIC_IDC(0)) {
	+ case IDC_IDELIVERY(0):
	+ case IDC_IFORCE(0):
	+ case IDC_ITHRESHOLD(0):
	+ case IDC_TOPI(0):
	+ error = 0;
	+ break;
	+ case IDC_CLAIMI(0):
	+ error = aplic_handle_idc_claimi(aplic, cpu, write, val);
	+ break;
	+ default:
	+ panic("unknown reg");
	+ }
	+
	+ return (error);
	+}
	+
	+static int
	+aplic_mmio_access(struct aplic aplic, uint64_t reg, bool write, uint64_t val)
	+{
	+ int error;
	+ int cpu;
	+ int r;
	+ int i;
	+
	+ if ((reg >= APLIC_SOURCECFG(1)) &&
	+ (reg <= APLIC_SOURCECFG(aplic->nirqs))) {
	+ i = ((reg - APLIC_SOURCECFG(1)) >> 2) + 1;
	+ error = aplic_handle_sourcecfg(aplic, i, write, val);
	+ return (error);
	+ }
	+
	+ if ((reg >= APLIC_TARGET(1)) && (reg <= APLIC_TARGET(aplic->nirqs))) {
	+ i = (reg - APLIC_TARGET(1)) >> 2;
	+ error = aplic_handle_target(aplic, i, write, val);
	+ return (error);
	+ }
	+
	+ if ((reg >= APLIC_IDC(0)) && (reg < APLIC_IDC(mp_ncpus))) {
	+ cpu = (reg - APLIC_IDC(0)) >> 5;
	+ r = (reg - APLIC_IDC(0)) % 32;
	+ error = aplic_handle_idc(aplic, cpu, r, write, val);
	+ return (error);
	+ }
	+
	+ switch (reg) {
	+ case APLIC_DOMAINCFG:
	+ aplic->domaincfg = *val & DOMAINCFG_IE;
	+ break;
	+ case APLIC_SETIENUM:
	+ aplic_set_enabled(aplic, write, val, true);
	+ break;
	+ case APLIC_CLRIENUM:
	+ aplic_set_enabled(aplic, write, val, false);
	+ break;
	+ default:
	+ panic("unknown reg %lx", reg);
	+ break;
	+ };
	+
	+ return (0);
	+}
	+
	+static int
	+mem_read(struct vcpu vcpu, uint64_t fault_ipa, uint64_t rval, int size,
	+ void *arg)
	+{
	+ struct hypctx *hypctx;
	+ struct hyp *hyp;
	+ struct aplic *aplic;
	+ uint64_t reg;
	+ uint64_t val;
	+ int error;
	+
	+ hypctx = vcpu_get_cookie(vcpu);
	+ hyp = hypctx->hyp;
	+ aplic = hyp->aplic;
	+
	+ dprintf("%s: fault_ipa %lx size %d\n", __func__, fault_ipa, size);
	+
	+ if (fault_ipa < aplic->mem_start \|\| fault_ipa + size > aplic->mem_end)
	+ return (EINVAL);
	+
	+ reg = fault_ipa - aplic->mem_start;
	+
	+ error = aplic_mmio_access(aplic, reg, false, &val);
	+ if (error == 0)
	+ *rval = val;
	+
	+ return (error);
	+}
	+
	+static int
	+mem_write(struct vcpu *vcpu, uint64_t fault_ipa, uint64_t wval, int size,
	+ void *arg)
	+{
	+ struct hypctx *hypctx;
	+ struct hyp *hyp;
	+ struct aplic *aplic;
	+ uint64_t reg;
	+ uint64_t val;
	+ int error;
	+
	+ hypctx = vcpu_get_cookie(vcpu);
	+ hyp = hypctx->hyp;
	+ aplic = hyp->aplic;
	+
	+ dprintf("%s: fault_ipa %lx wval %lx size %d\n", __func__, fault_ipa,
	+ wval, size);
	+
	+ if (fault_ipa < aplic->mem_start \|\| fault_ipa + size > aplic->mem_end)
	+ return (EINVAL);
	+
	+ reg = fault_ipa - aplic->mem_start;
	+
	+ val = wval;
	+
	+ error = aplic_mmio_access(aplic, reg, true, &val);
	+
	+ return (error);
	+}
	+
	+void
	+aplic_vminit(struct hyp *hyp)
	+{
	+ struct aplic *aplic;
	+
	+ hyp->aplic = malloc(sizeof(*hyp->aplic), M_APLIC,
	+ M_WAITOK \| M_ZERO);
	+ aplic = hyp->aplic;
	+
	+ mtx_init(&aplic->mtx, "APLIC lock", NULL, MTX_SPIN);
	+}
	+
	+void
	+aplic_vmcleanup(struct hyp *hyp)
	+{
	+ struct aplic *aplic;
	+
	+ aplic = hyp->aplic;
	+
	+ mtx_destroy(&aplic->mtx);
	+
	+ free(hyp->aplic, M_APLIC);
	+}
	+
	+int
	+aplic_attach_to_vm(struct hyp hyp, struct vm_aplic_descr descr)
	+{
	+ struct aplic *aplic;
	+ struct vm *vm;
	+
	+ vm = hyp->vm;
	+
	+ printf("%s\n", __func__);
	+
	+ vm_register_inst_handler(vm, descr->mem_start, descr->mem_size,
	+ mem_read, mem_write);
	+
	+ aplic = hyp->aplic;
	+ aplic->nirqs = 63;
	+ aplic->mem_start = descr->mem_start;
	+ aplic->mem_end = descr->mem_start + descr->mem_size;
	+ aplic->irqs = malloc(sizeof(struct aplic_irq) * aplic->nirqs, M_APLIC,
	+ M_WAITOK \| M_ZERO);
	+
	+ hyp->aplic_attached = true;
	+
	+ return (0);
	+}
	+
	+void
	+aplic_detach_from_vm(struct hyp *hyp)
	+{
	+ struct aplic *aplic;
	+
	+ aplic = hyp->aplic;
	+
	+ printf("%s\n", __func__);
	+
	+ if (hyp->aplic_attached) {
	+ hyp->aplic_attached = false;
	+
	+ free(aplic->irqs, M_APLIC);
	+ }
	+}
	+
	+int
	+aplic_check_pending(struct hypctx *hypctx)
	+{
	+ struct aplic_irq *irq;
	+ struct aplic *aplic;
	+ struct hyp *hyp;
	+ int i;
	+
	+ hyp = hypctx->hyp;
	+ aplic = hyp->aplic;
	+ if ((aplic->domaincfg & DOMAINCFG_IE) == 0)
	+ return (0);
	+
	+ for (i = 0; i < aplic->nirqs; i++) {
	+ irq = &aplic->irqs[i];
	+ if (irq->state & APLIC_IRQ_STATE_PENDING)
	+ return (1);
	+ }
	+
	+ return (0);
	+}
	+
	+int
	+aplic_inject_irq(struct hyp *hyp, int vcpuid, uint32_t irqid, bool level)
	+{
	+ struct aplic_irq *irq;
	+ struct aplic *aplic;
	+
	+ aplic = hyp->aplic;
	+ if ((aplic->domaincfg & DOMAINCFG_IE) == 0)
	+ return (0);
	+
	+ irq = &aplic->irqs[irqid];
	+ if (irq->sourcecfg & SOURCECFG_D)
	+ return (0);
	+
	+ switch (irq->sourcecfg & SOURCECFG_SM_M) {
	+ case SOURCECFG_SM_EDGE1:
	+ if (level)
	+ irq->state \|= APLIC_IRQ_STATE_PENDING;
	+ else
	+ irq->state &= ~APLIC_IRQ_STATE_PENDING;
	+ break;
	+ default:
	+ break;
	+ }
	+
	+ return (0);
	+}
	+
	+int
	+aplic_inject_msi(struct hyp *hyp, uint64_t msg, uint64_t addr)
	+{
	+
	+ /* TODO. */
	+
	+ return (ENXIO);
	+}
	+
	+void
	+aplic_cpuinit(struct hypctx *hypctx)
	+{
	+
	+}
	+
	+void
	+aplic_cpucleanup(struct hypctx *hypctx)
	+{
	+
	+}
	+
	+void
	+aplic_flush_hwstate(struct hypctx *hypctx)
	+{
	+
	+}
	+
	+void
	+aplic_sync_hwstate(struct hypctx *hypctx)
	+{
	+
	+}
	+
	+int
	+aplic_max_cpu_count(struct hyp *hyp)
	+{
	+ int16_t max_count;
	+
	+ max_count = vm_get_maxcpus(hyp->vm);
	+
	+ return (max_count);
	+}
	Index: sys/riscv/vmm/vmm_dev.c
	===================================================================
	--- /dev/null
	+++ sys/riscv/vmm/vmm_dev.c
	@@ -0,0 +1,1052 @@
	+/*-
	+ * SPDX-License-Identifier: BSD-2-Clause
	+ *
	+ * Copyright (c) 2011 NetApp, Inc.
	+ * Copyright (c) 2015 Mihai Carabas <mihai.carabas@gmail.com>
	+ * All rights reserved.
	+ * Copyright (c) 2024 Ruslan Bukin <br@bsdpad.com>
	+ *
	+ * This software was developed by the University of Cambridge Computer
	+ * Laboratory (Department of Computer Science and Technology) under Innovate
	+ * UK project 105694, "Digital Security by Design (DSbD) Technology Platform
	+ * Prototype".
	+ *
	+ * Redistribution and use in source and binary forms, with or without
	+ * modification, are permitted provided that the following conditions
	+ * are met:
	+ * 1. Redistributions of source code must retain the above copyright
	+ * notice, this list of conditions and the following disclaimer.
	+ * 2. Redistributions in binary form must reproduce the above copyright
	+ * notice, this list of conditions and the following disclaimer in the
	+ * documentation and/or other materials provided with the distribution.
	+ *
	+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
	+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
	+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	+ * SUCH DAMAGE.
	+ */
	+
	+#include <sys/param.h>
	+#include <sys/kernel.h>
	+#include <sys/jail.h>
	+#include <sys/queue.h>
	+#include <sys/lock.h>
	+#include <sys/mutex.h>
	+#include <sys/malloc.h>
	+#include <sys/conf.h>
	+#include <sys/sysctl.h>
	+#include <sys/libkern.h>
	+#include <sys/ioccom.h>
	+#include <sys/mman.h>
	+#include <sys/uio.h>
	+#include <sys/proc.h>
	+
	+#include <vm/vm.h>
	+#include <vm/pmap.h>
	+#include <vm/vm_map.h>
	+#include <vm/vm_object.h>
	+
	+#include <machine/machdep.h>
	+#include <machine/vmparam.h>
	+#include <machine/vmm.h>
	+#include <machine/vmm_dev.h>
	+
	+#include "riscv.h"
	+#include "vmm_stat.h"
	+#include "vmm_aplic.h"
	+
	+struct devmem_softc {
	+ int segid;
	+ char *name;
	+ struct cdev *cdev;
	+ struct vmmdev_softc *sc;
	+ SLIST_ENTRY(devmem_softc) link;
	+};
	+
	+struct vmmdev_softc {
	+ struct vm vm; / vm instance cookie */
	+ struct cdev *cdev;
	+ struct ucred *ucred;
	+ SLIST_ENTRY(vmmdev_softc) link;
	+ SLIST_HEAD(, devmem_softc) devmem;
	+ int flags;
	+};
	+#define VSC_LINKED 0x01
	+
	+static SLIST_HEAD(, vmmdev_softc) head;
	+
	+static unsigned pr_allow_flag;
	+static struct mtx vmmdev_mtx;
	+MTX_SYSINIT(vmmdev_mtx, &vmmdev_mtx, "vmm device mutex", MTX_DEF);
	+
	+static MALLOC_DEFINE(M_VMMDEV, "vmmdev", "vmmdev");
	+
	+SYSCTL_DECL(_hw_vmm);
	+
	+static int vmm_priv_check(struct ucred *ucred);
	+static int devmem_create_cdev(const char vmname, int id, char devmem);
	+static void devmem_destroy(void *arg);
	+
	+static int
	+vmm_priv_check(struct ucred *ucred)
	+{
	+
	+ if (jailed(ucred) &&
	+ !(ucred->cr_prison->pr_allow & pr_allow_flag))
	+ return (EPERM);
	+
	+ return (0);
	+}
	+
	+static int
	+vcpu_lock_one(struct vcpu *vcpu)
	+{
	+ int error;
	+
	+ error = vcpu_set_state(vcpu, VCPU_FROZEN, true);
	+ return (error);
	+}
	+
	+static void
	+vcpu_unlock_one(struct vcpu *vcpu)
	+{
	+ enum vcpu_state state;
	+
	+ state = vcpu_get_state(vcpu, NULL);
	+ if (state != VCPU_FROZEN) {
	+ panic("vcpu %s(%d) has invalid state %d",
	+ vm_name(vcpu_vm(vcpu)), vcpu_vcpuid(vcpu), state);
	+ }
	+
	+ vcpu_set_state(vcpu, VCPU_IDLE, false);
	+}
	+
	+static int
	+vcpu_lock_all(struct vmmdev_softc *sc)
	+{
	+ struct vcpu *vcpu;
	+ int error;
	+ uint16_t i, j, maxcpus;
	+
	+ error = 0;
	+ vm_slock_vcpus(sc->vm);
	+ maxcpus = vm_get_maxcpus(sc->vm);
	+ for (i = 0; i < maxcpus; i++) {
	+ vcpu = vm_vcpu(sc->vm, i);
	+ if (vcpu == NULL)
	+ continue;
	+ error = vcpu_lock_one(vcpu);
	+ if (error)
	+ break;
	+ }
	+
	+ if (error) {
	+ for (j = 0; j < i; j++) {
	+ vcpu = vm_vcpu(sc->vm, j);
	+ if (vcpu == NULL)
	+ continue;
	+ vcpu_unlock_one(vcpu);
	+ }
	+ vm_unlock_vcpus(sc->vm);
	+ }
	+
	+ return (error);
	+}
	+
	+static void
	+vcpu_unlock_all(struct vmmdev_softc *sc)
	+{
	+ struct vcpu *vcpu;
	+ uint16_t i, maxcpus;
	+
	+ maxcpus = vm_get_maxcpus(sc->vm);
	+ for (i = 0; i < maxcpus; i++) {
	+ vcpu = vm_vcpu(sc->vm, i);
	+ if (vcpu == NULL)
	+ continue;
	+ vcpu_unlock_one(vcpu);
	+ }
	+ vm_unlock_vcpus(sc->vm);
	+}
	+
	+static struct vmmdev_softc *
	+vmmdev_lookup(const char *name)
	+{
	+ struct vmmdev_softc *sc;
	+
	+#ifdef notyet /* XXX kernel is not compiled with invariants */
	+ mtx_assert(&vmmdev_mtx, MA_OWNED);
	+#endif
	+
	+ SLIST_FOREACH(sc, &head, link) {
	+ if (strcmp(name, vm_name(sc->vm)) == 0)
	+ break;
	+ }
	+
	+ if (sc == NULL)
	+ return (NULL);
	+
	+ if (cr_cansee(curthread->td_ucred, sc->ucred))
	+ return (NULL);
	+
	+ return (sc);
	+}
	+
	+static struct vmmdev_softc *
	+vmmdev_lookup2(struct cdev *cdev)
	+{
	+
	+ return (cdev->si_drv1);
	+}
	+
	+static int
	+vmmdev_rw(struct cdev cdev, struct uio uio, int flags)
	+{
	+ int error, off, c, prot;
	+ vm_paddr_t gpa, maxaddr;
	+ void hpa, cookie;
	+ struct vmmdev_softc *sc;
	+
	+ error = vmm_priv_check(curthread->td_ucred);
	+ if (error)
	+ return (error);
	+
	+ sc = vmmdev_lookup2(cdev);
	+ if (sc == NULL)
	+ return (ENXIO);
	+
	+ /*
	+ * Get a read lock on the guest memory map.
	+ */
	+ vm_slock_memsegs(sc->vm);
	+
	+ prot = (uio->uio_rw == UIO_WRITE ? VM_PROT_WRITE : VM_PROT_READ);
	+ maxaddr = vmm_sysmem_maxaddr(sc->vm);
	+ while (uio->uio_resid > 0 && error == 0) {
	+ gpa = uio->uio_offset;
	+ off = gpa & PAGE_MASK;
	+ c = min(uio->uio_resid, PAGE_SIZE - off);
	+
	+ /*
	+ * The VM has a hole in its physical memory map. If we want to
	+ * use 'dd' to inspect memory beyond the hole we need to
	+ * provide bogus data for memory that lies in the hole.
	+ *
	+ * Since this device does not support lseek(2), dd(1) will
	+ * read(2) blocks of data to simulate the lseek(2).
	+ */
	+ hpa = vm_gpa_hold_global(sc->vm, gpa, c, prot, &cookie);
	+ if (hpa == NULL) {
	+ if (uio->uio_rw == UIO_READ && gpa < maxaddr)
	+ error = uiomove(__DECONST(void *, zero_region),
	+ c, uio);
	+ else
	+ error = EFAULT;
	+ } else {
	+ error = uiomove(hpa, c, uio);
	+ vm_gpa_release(cookie);
	+ }
	+ }
	+ vm_unlock_memsegs(sc->vm);
	+ return (error);
	+}
	+
	+static int
	+get_memseg(struct vmmdev_softc sc, struct vm_memseg mseg)
	+{
	+ struct devmem_softc *dsc;
	+ int error;
	+ bool sysmem;
	+
	+ error = vm_get_memseg(sc->vm, mseg->segid, &mseg->len, &sysmem, NULL);
	+ if (error \|\| mseg->len == 0)
	+ return (error);
	+
	+ if (!sysmem) {
	+ SLIST_FOREACH(dsc, &sc->devmem, link) {
	+ if (dsc->segid == mseg->segid)
	+ break;
	+ }
	+ KASSERT(dsc != NULL, ("%s: devmem segment %d not found",
	+ __func__, mseg->segid));
	+ error = copystr(dsc->name, mseg->name, sizeof(mseg->name),
	+ NULL);
	+ } else {
	+ bzero(mseg->name, sizeof(mseg->name));
	+ }
	+
	+ return (error);
	+}
	+
	+static int
	+alloc_memseg(struct vmmdev_softc sc, struct vm_memseg mseg)
	+{
	+ char *name;
	+ int error;
	+ bool sysmem;
	+
	+ error = 0;
	+ name = NULL;
	+ sysmem = true;
	+
	+ /*
	+ * The allocation is lengthened by 1 to hold a terminating NUL. It'll
	+ * by stripped off when devfs processes the full string.
	+ */
	+ if (VM_MEMSEG_NAME(mseg)) {
	+ sysmem = false;
	+ name = malloc(sizeof(mseg->name), M_VMMDEV, M_WAITOK);
	+ error = copystr(mseg->name, name, sizeof(mseg->name), NULL);
	+ if (error)
	+ goto done;
	+ }
	+
	+ error = vm_alloc_memseg(sc->vm, mseg->segid, mseg->len, sysmem);
	+ if (error)
	+ goto done;
	+
	+ if (VM_MEMSEG_NAME(mseg)) {
	+ error = devmem_create_cdev(vm_name(sc->vm), mseg->segid, name);
	+ if (error)
	+ vm_free_memseg(sc->vm, mseg->segid);
	+ else
	+ name = NULL; /* freed when 'cdev' is destroyed */
	+ }
	+done:
	+ free(name, M_VMMDEV);
	+ return (error);
	+}
	+
	+static int
	+vm_get_register_set(struct vcpu vcpu, unsigned int count, int regnum,
	+ uint64_t *regval)
	+{
	+ int error, i;
	+
	+ error = 0;
	+ for (i = 0; i < count; i++) {
	+ error = vm_get_register(vcpu, regnum[i], &regval[i]);
	+ if (error)
	+ break;
	+ }
	+ return (error);
	+}
	+
	+static int
	+vm_set_register_set(struct vcpu vcpu, unsigned int count, int regnum,
	+ uint64_t *regval)
	+{
	+ int error, i;
	+
	+ error = 0;
	+ for (i = 0; i < count; i++) {
	+ error = vm_set_register(vcpu, regnum[i], regval[i]);
	+ if (error)
	+ break;
	+ }
	+ return (error);
	+}
	+
	+static int
	+vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag,
	+ struct thread *td)
	+{
	+ int error, vcpuid, size;
	+ cpuset_t *cpuset;
	+ struct vmmdev_softc *sc;
	+ struct vcpu *vcpu;
	+ struct vm_register *vmreg;
	+ struct vm_register_set *vmregset;
	+ struct vm_run *vmrun;
	+ struct vm_aplic_descr *aplic;
	+ struct vm_cpuset *vm_cpuset;
	+ struct vm_irq *vi;
	+ struct vm_capability *vmcap;
	+ struct vm_stats *vmstats;
	+ struct vm_stat_desc *statdesc;
	+ struct vm_suspend *vmsuspend;
	+ struct vm_exception *vmexc;
	+ struct vm_gla2gpa *gg;
	+ struct vm_memmap *mm;
	+ struct vm_munmap *mu;
	+ struct vm_msi *vmsi;
	+ struct vm_cpu_topology *topology;
	+ uint64_t *regvals;
	+ int *regnums;
	+ enum { NONE, SINGLE, ALL } vcpus_locked;
	+ bool memsegs_locked;
	+
	+ dprintf("%s: cmd %ld\n", __func__, cmd);
	+
	+ error = vmm_priv_check(curthread->td_ucred);
	+ if (error)
	+ return (error);
	+
	+ sc = vmmdev_lookup2(cdev);
	+ if (sc == NULL)
	+ return (ENXIO);
	+
	+ error = 0;
	+ vcpuid = -1;
	+ vcpu = NULL;
	+ vcpus_locked = NONE;
	+ memsegs_locked = false;
	+
	+ /*
	+ * Some VMM ioctls can operate only on vcpus that are not running.
	+ */
	+ switch (cmd) {
	+ case VM_RUN:
	+ case VM_GET_REGISTER:
	+ case VM_SET_REGISTER:
	+ case VM_GET_REGISTER_SET:
	+ case VM_SET_REGISTER_SET:
	+ case VM_INJECT_EXCEPTION:
	+ case VM_GET_CAPABILITY:
	+ case VM_SET_CAPABILITY:
	+ case VM_GLA2GPA_NOFAULT:
	+ case VM_ACTIVATE_CPU:
	+ /*
	+ * ioctls that can operate only on vcpus that are not running.
	+ */
	+ vcpuid = (int )data;
	+ vcpu = vm_alloc_vcpu(sc->vm, vcpuid);
	+ if (vcpu == NULL) {
	+ error = EINVAL;
	+ goto done;
	+ }
	+ error = vcpu_lock_one(vcpu);
	+ if (error)
	+ goto done;
	+ vcpus_locked = SINGLE;
	+ break;
	+
	+ case VM_ALLOC_MEMSEG:
	+ case VM_MMAP_MEMSEG:
	+ case VM_MUNMAP_MEMSEG:
	+ case VM_REINIT:
	+ case VM_ATTACH_APLIC:
	+ /*
	+ * ioctls that modify the memory map must lock memory
	+ * segments exclusively.
	+ */
	+ vm_xlock_memsegs(sc->vm);
	+ memsegs_locked = true;
	+
	+ /*
	+ * ioctls that operate on the entire virtual machine must
	+ * prevent all vcpus from running.
	+ */
	+ error = vcpu_lock_all(sc);
	+ if (error)
	+ goto done;
	+ vcpus_locked = ALL;
	+ break;
	+ case VM_GET_MEMSEG:
	+ case VM_MMAP_GETNEXT:
	+ /*
	+ * Lock the memory map while it is being inspected.
	+ */
	+ vm_slock_memsegs(sc->vm);
	+ memsegs_locked = true;
	+ break;
	+
	+ case VM_STATS:
	+ /*
	+ * These do not need the vCPU locked but do operate on
	+ * a specific vCPU.
	+ */
	+ vcpuid = (int )data;
	+ vcpu = vm_alloc_vcpu(sc->vm, vcpuid);
	+ if (vcpu == NULL) {
	+ error = EINVAL;
	+ goto done;
	+ }
	+ break;
	+
	+ case VM_SUSPEND_CPU:
	+ case VM_RESUME_CPU:
	+ /*
	+ * These can either operate on all CPUs via a vcpuid of
	+ * -1 or on a specific vCPU.
	+ */
	+ vcpuid = (int )data;
	+ if (vcpuid == -1)
	+ break;
	+ vcpu = vm_alloc_vcpu(sc->vm, vcpuid);
	+ if (vcpu == NULL) {
	+ error = EINVAL;
	+ goto done;
	+ }
	+ break;
	+
	+ case VM_ASSERT_IRQ:
	+ vi = (struct vm_irq *)data;
	+ error = vm_assert_irq(sc->vm, vi->irq);
	+ break;
	+ case VM_DEASSERT_IRQ:
	+ vi = (struct vm_irq *)data;
	+ error = vm_deassert_irq(sc->vm, vi->irq);
	+ break;
	+ default:
	+ break;
	+ }
	+
	+ switch (cmd) {
	+ case VM_RUN: {
	+ struct vm_exit *vme;
	+
	+ vmrun = (struct vm_run *)data;
	+ vme = vm_exitinfo(vcpu);
	+
	+ error = vm_run(vcpu);
	+ if (error != 0)
	+ break;
	+
	+ error = copyout(vme, vmrun->vm_exit, sizeof(*vme));
	+ if (error != 0)
	+ break;
	+ break;
	+ }
	+ case VM_SUSPEND:
	+ vmsuspend = (struct vm_suspend *)data;
	+ error = vm_suspend(sc->vm, vmsuspend->how);
	+ break;
	+ case VM_REINIT:
	+ error = vm_reinit(sc->vm);
	+ break;
	+ case VM_STAT_DESC: {
	+ statdesc = (struct vm_stat_desc *)data;
	+ error = vmm_stat_desc_copy(statdesc->index,
	+ statdesc->desc, sizeof(statdesc->desc));
	+ break;
	+ }
	+ case VM_STATS: {
	+ CTASSERT(MAX_VM_STATS >= MAX_VMM_STAT_ELEMS);
	+ vmstats = (struct vm_stats *)data;
	+ getmicrotime(&vmstats->tv);
	+ error = vmm_stat_copy(vcpu, vmstats->index,
	+ nitems(vmstats->statbuf),
	+ &vmstats->num_entries, vmstats->statbuf);
	+ break;
	+ }
	+ case VM_MMAP_GETNEXT:
	+ mm = (struct vm_memmap *)data;
	+ error = vm_mmap_getnext(sc->vm, &mm->gpa, &mm->segid,
	+ &mm->segoff, &mm->len, &mm->prot, &mm->flags);
	+ break;
	+ case VM_MMAP_MEMSEG:
	+ mm = (struct vm_memmap *)data;
	+ error = vm_mmap_memseg(sc->vm, mm->gpa, mm->segid, mm->segoff,
	+ mm->len, mm->prot, mm->flags);
	+ break;
	+ case VM_MUNMAP_MEMSEG:
	+ mu = (struct vm_munmap *)data;
	+ error = vm_munmap_memseg(sc->vm, mu->gpa, mu->len);
	+ break;
	+ case VM_ALLOC_MEMSEG:
	+ error = alloc_memseg(sc, (struct vm_memseg *)data);
	+ break;
	+ case VM_GET_MEMSEG:
	+ error = get_memseg(sc, (struct vm_memseg *)data);
	+ break;
	+ case VM_GET_REGISTER:
	+ vmreg = (struct vm_register *)data;
	+ error = vm_get_register(vcpu, vmreg->regnum, &vmreg->regval);
	+ break;
	+ case VM_SET_REGISTER:
	+ vmreg = (struct vm_register *)data;
	+ error = vm_set_register(vcpu, vmreg->regnum, vmreg->regval);
	+ break;
	+ case VM_GET_REGISTER_SET:
	+ vmregset = (struct vm_register_set *)data;
	+ if (vmregset->count > VM_REG_LAST) {
	+ error = EINVAL;
	+ break;
	+ }
	+ regvals = malloc(sizeof(regvals[0]) * vmregset->count, M_VMMDEV,
	+ M_WAITOK);
	+ regnums = malloc(sizeof(regnums[0]) * vmregset->count, M_VMMDEV,
	+ M_WAITOK);
	+ error = copyin(vmregset->regnums, regnums, sizeof(regnums[0]) *
	+ vmregset->count);
	+ if (error == 0)
	+ error = vm_get_register_set(vcpu, vmregset->count,
	+ regnums, regvals);
	+ if (error == 0)
	+ error = copyout(regvals, vmregset->regvals,
	+ sizeof(regvals[0]) * vmregset->count);
	+ free(regvals, M_VMMDEV);
	+ free(regnums, M_VMMDEV);
	+ break;
	+ case VM_SET_REGISTER_SET:
	+ vmregset = (struct vm_register_set *)data;
	+ if (vmregset->count > VM_REG_LAST) {
	+ error = EINVAL;
	+ break;
	+ }
	+ regvals = malloc(sizeof(regvals[0]) * vmregset->count, M_VMMDEV,
	+ M_WAITOK);
	+ regnums = malloc(sizeof(regnums[0]) * vmregset->count, M_VMMDEV,
	+ M_WAITOK);
	+ error = copyin(vmregset->regnums, regnums, sizeof(regnums[0]) *
	+ vmregset->count);
	+ if (error == 0)
	+ error = copyin(vmregset->regvals, regvals,
	+ sizeof(regvals[0]) * vmregset->count);
	+ if (error == 0)
	+ error = vm_set_register_set(vcpu, vmregset->count,
	+ regnums, regvals);
	+ free(regvals, M_VMMDEV);
	+ free(regnums, M_VMMDEV);
	+ break;
	+ case VM_GET_CAPABILITY:
	+ vmcap = (struct vm_capability *)data;
	+ error = vm_get_capability(vcpu,
	+ vmcap->captype,
	+ &vmcap->capval);
	+ break;
	+ case VM_SET_CAPABILITY:
	+ vmcap = (struct vm_capability *)data;
	+ error = vm_set_capability(vcpu,
	+ vmcap->captype,
	+ vmcap->capval);
	+ break;
	+ case VM_INJECT_EXCEPTION:
	+ vmexc = (struct vm_exception *)data;
	+ error = vm_inject_exception(vcpu, vmexc->scause);
	+ break;
	+ case VM_GLA2GPA_NOFAULT:
	+ gg = (struct vm_gla2gpa *)data;
	+ error = vm_gla2gpa_nofault(vcpu, &gg->paging, gg->gla,
	+ gg->prot, &gg->gpa, &gg->fault);
	+ KASSERT(error == 0 \|\| error == EFAULT,
	+ ("%s: vm_gla2gpa unknown error %d", __func__, error));
	+ break;
	+ case VM_ACTIVATE_CPU:
	+ error = vm_activate_cpu(vcpu);
	+ break;
	+ case VM_GET_CPUS:
	+ error = 0;
	+ vm_cpuset = (struct vm_cpuset *)data;
	+ size = vm_cpuset->cpusetsize;
	+ if (size < sizeof(cpuset_t) \|\| size > CPU_MAXSIZE / NBBY) {
	+ error = ERANGE;
	+ break;
	+ }
	+ cpuset = malloc(size, M_TEMP, M_WAITOK \| M_ZERO);
	+ if (vm_cpuset->which == VM_ACTIVE_CPUS)
	+ *cpuset = vm_active_cpus(sc->vm);
	+ else if (vm_cpuset->which == VM_SUSPENDED_CPUS)
	+ *cpuset = vm_suspended_cpus(sc->vm);
	+ else if (vm_cpuset->which == VM_DEBUG_CPUS)
	+ *cpuset = vm_debug_cpus(sc->vm);
	+ else
	+ error = EINVAL;
	+ if (error == 0)
	+ error = copyout(cpuset, vm_cpuset->cpus, size);
	+ free(cpuset, M_TEMP);
	+ break;
	+ case VM_SUSPEND_CPU:
	+ error = vm_suspend_cpu(sc->vm, vcpu);
	+ break;
	+ case VM_RESUME_CPU:
	+ error = vm_resume_cpu(sc->vm, vcpu);
	+ break;
	+ case VM_ATTACH_APLIC:
	+ aplic = (struct vm_aplic_descr *)data;
	+ error = vm_attach_aplic(sc->vm, aplic);
	+ break;
	+ case VM_RAISE_MSI:
	+ vmsi = (struct vm_msi *)data;
	+ error = vm_raise_msi(sc->vm, vmsi->msg, vmsi->addr, vmsi->bus,
	+ vmsi->slot, vmsi->func);
	+ break;
	+ case VM_SET_TOPOLOGY:
	+ topology = (struct vm_cpu_topology *)data;
	+ error = vm_set_topology(sc->vm, topology->sockets,
	+ topology->cores, topology->threads, topology->maxcpus);
	+ break;
	+ case VM_GET_TOPOLOGY:
	+ topology = (struct vm_cpu_topology *)data;
	+ vm_get_topology(sc->vm, &topology->sockets, &topology->cores,
	+ &topology->threads, &topology->maxcpus);
	+ error = 0;
	+ break;
	+ default:
	+ error = ENOTTY;
	+ break;
	+ }
	+
	+done:
	+ if (vcpus_locked == SINGLE)
	+ vcpu_unlock_one(vcpu);
	+ else if (vcpus_locked == ALL)
	+ vcpu_unlock_all(sc);
	+ if (memsegs_locked)
	+ vm_unlock_memsegs(sc->vm);
	+
	+ /*
	+ * Make sure that no handler returns a kernel-internal
	+ * error value to userspace.
	+ */
	+ KASSERT(error == ERESTART \|\| error >= 0,
	+ ("vmmdev_ioctl: invalid error return %d", error));
	+ return (error);
	+}
	+
	+static int
	+vmmdev_mmap_single(struct cdev cdev, vm_ooffset_t offset, vm_size_t mapsize,
	+ struct vm_object **objp, int nprot)
	+{
	+ struct vmmdev_softc *sc;
	+ vm_paddr_t gpa;
	+ size_t len;
	+ vm_ooffset_t segoff, first, last;
	+ int error, found, segid;
	+ bool sysmem;
	+
	+ error = vmm_priv_check(curthread->td_ucred);
	+ if (error)
	+ return (error);
	+
	+ first = *offset;
	+ last = first + mapsize;
	+ if ((nprot & PROT_EXEC) \|\| first < 0 \|\| first >= last)
	+ return (EINVAL);
	+
	+ sc = vmmdev_lookup2(cdev);
	+ if (sc == NULL) {
	+ /* virtual machine is in the process of being created */
	+ return (EINVAL);
	+ }
	+
	+ /*
	+ * Get a read lock on the guest memory map.
	+ */
	+ vm_slock_memsegs(sc->vm);
	+
	+ gpa = 0;
	+ found = 0;
	+ while (!found) {
	+ error = vm_mmap_getnext(sc->vm, &gpa, &segid, &segoff, &len,
	+ NULL, NULL);
	+ if (error)
	+ break;
	+
	+ if (first >= gpa && last <= gpa + len)
	+ found = 1;
	+ else
	+ gpa += len;
	+ }
	+
	+ if (found) {
	+ error = vm_get_memseg(sc->vm, segid, &len, &sysmem, objp);
	+ KASSERT(error == 0 && *objp != NULL,
	+ ("%s: invalid memory segment %d", __func__, segid));
	+ if (sysmem) {
	+ vm_object_reference(*objp);
	+ *offset = segoff + (first - gpa);
	+ } else {
	+ error = EINVAL;
	+ }
	+ }
	+ vm_unlock_memsegs(sc->vm);
	+ return (error);
	+}
	+
	+static void
	+vmmdev_destroy(void *arg)
	+{
	+ struct vmmdev_softc *sc = arg;
	+ struct devmem_softc *dsc;
	+ int error __diagused;
	+
	+ error = vcpu_lock_all(sc);
	+ KASSERT(error == 0, ("%s: error %d freezing vcpus", __func__, error));
	+ vm_unlock_vcpus(sc->vm);
	+
	+ while ((dsc = SLIST_FIRST(&sc->devmem)) != NULL) {
	+ KASSERT(dsc->cdev == NULL, ("%s: devmem not free", __func__));
	+ SLIST_REMOVE_HEAD(&sc->devmem, link);
	+ free(dsc->name, M_VMMDEV);
	+ free(dsc, M_VMMDEV);
	+ }
	+
	+ if (sc->cdev != NULL)
	+ destroy_dev(sc->cdev);
	+
	+ if (sc->vm != NULL)
	+ vm_destroy(sc->vm);
	+
	+ if (sc->ucred != NULL)
	+ crfree(sc->ucred);
	+
	+ if ((sc->flags & VSC_LINKED) != 0) {
	+ mtx_lock(&vmmdev_mtx);
	+ SLIST_REMOVE(&head, sc, vmmdev_softc, link);
	+ mtx_unlock(&vmmdev_mtx);
	+ }
	+
	+ free(sc, M_VMMDEV);
	+}
	+
	+static int
	+sysctl_vmm_destroy(SYSCTL_HANDLER_ARGS)
	+{
	+ struct devmem_softc *dsc;
	+ struct vmmdev_softc *sc;
	+ struct cdev *cdev;
	+ char *buf;
	+ int error, buflen;
	+
	+ error = vmm_priv_check(req->td->td_ucred);
	+ if (error)
	+ return (error);
	+
	+ buflen = VM_MAX_NAMELEN + 1;
	+ buf = malloc(buflen, M_VMMDEV, M_WAITOK \| M_ZERO);
	+ strlcpy(buf, "beavis", buflen);
	+ error = sysctl_handle_string(oidp, buf, buflen, req);
	+ if (error != 0 \|\| req->newptr == NULL)
	+ goto out;
	+
	+ mtx_lock(&vmmdev_mtx);
	+ sc = vmmdev_lookup(buf);
	+ if (sc == NULL \|\| sc->cdev == NULL) {
	+ mtx_unlock(&vmmdev_mtx);
	+ error = EINVAL;
	+ goto out;
	+ }
	+
	+ /*
	+ * Setting 'sc->cdev' to NULL is used to indicate that the VM
	+ * is scheduled for destruction.
	+ */
	+ cdev = sc->cdev;
	+ sc->cdev = NULL;
	+ mtx_unlock(&vmmdev_mtx);
	+
	+ /*
	+ * Destroy all cdevs:
	+ *
	+ * - any new operations on the 'cdev' will return an error (ENXIO).
	+ *
	+ * - the 'devmem' cdevs are destroyed before the virtual machine 'cdev'
	+ */
	+ SLIST_FOREACH(dsc, &sc->devmem, link) {
	+ KASSERT(dsc->cdev != NULL, ("devmem cdev already destroyed"));
	+ destroy_dev(dsc->cdev);
	+ devmem_destroy(dsc);
	+ }
	+ destroy_dev(cdev);
	+ vmmdev_destroy(sc);
	+ error = 0;
	+
	+out:
	+ free(buf, M_VMMDEV);
	+ return (error);
	+}
	+SYSCTL_PROC(_hw_vmm, OID_AUTO, destroy, CTLTYPE_STRING \| CTLFLAG_RW \|
	+ CTLFLAG_PRISON \| CTLFLAG_MPSAFE, NULL, 0, sysctl_vmm_destroy, "A", NULL);
	+
	+static struct cdevsw vmmdevsw = {
	+ .d_name = "vmmdev",
	+ .d_version = D_VERSION,
	+ .d_ioctl = vmmdev_ioctl,
	+ .d_mmap_single = vmmdev_mmap_single,
	+ .d_read = vmmdev_rw,
	+ .d_write = vmmdev_rw,
	+};
	+
	+static int
	+sysctl_vmm_create(SYSCTL_HANDLER_ARGS)
	+{
	+ struct vm *vm;
	+ struct cdev *cdev;
	+ struct vmmdev_softc sc, sc2;
	+ char *buf;
	+ int error, buflen;
	+
	+ error = vmm_priv_check(req->td->td_ucred);
	+ if (error)
	+ return (error);
	+
	+ buflen = VM_MAX_NAMELEN + 1;
	+ buf = malloc(buflen, M_VMMDEV, M_WAITOK \| M_ZERO);
	+ strlcpy(buf, "beavis", buflen);
	+ error = sysctl_handle_string(oidp, buf, buflen, req);
	+ if (error != 0 \|\| req->newptr == NULL)
	+ goto out;
	+
	+ mtx_lock(&vmmdev_mtx);
	+ sc = vmmdev_lookup(buf);
	+ mtx_unlock(&vmmdev_mtx);
	+ if (sc != NULL) {
	+ error = EEXIST;
	+ goto out;
	+ }
	+
	+ error = vm_create(buf, &vm);
	+ if (error != 0)
	+ goto out;
	+
	+ sc = malloc(sizeof(struct vmmdev_softc), M_VMMDEV, M_WAITOK \| M_ZERO);
	+ sc->ucred = crhold(curthread->td_ucred);
	+ sc->vm = vm;
	+ SLIST_INIT(&sc->devmem);
	+
	+ /*
	+ * Lookup the name again just in case somebody sneaked in when we
	+ * dropped the lock.
	+ */
	+ mtx_lock(&vmmdev_mtx);
	+ sc2 = vmmdev_lookup(buf);
	+ if (sc2 == NULL) {
	+ SLIST_INSERT_HEAD(&head, sc, link);
	+ sc->flags \|= VSC_LINKED;
	+ }
	+ mtx_unlock(&vmmdev_mtx);
	+
	+ if (sc2 != NULL) {
	+ vmmdev_destroy(sc);
	+ error = EEXIST;
	+ goto out;
	+ }
	+
	+ error = make_dev_p(MAKEDEV_CHECKNAME, &cdev, &vmmdevsw, sc->ucred,
	+ UID_ROOT, GID_WHEEL, 0600, "vmm/%s", buf);
	+ if (error != 0) {
	+ vmmdev_destroy(sc);
	+ goto out;
	+ }
	+
	+ mtx_lock(&vmmdev_mtx);
	+ sc->cdev = cdev;
	+ sc->cdev->si_drv1 = sc;
	+ mtx_unlock(&vmmdev_mtx);
	+
	+out:
	+ free(buf, M_VMMDEV);
	+ return (error);
	+}
	+SYSCTL_PROC(_hw_vmm, OID_AUTO, create, CTLTYPE_STRING \| CTLFLAG_RW \|
	+ CTLFLAG_PRISON \| CTLFLAG_MPSAFE, NULL, 0, sysctl_vmm_create, "A", NULL);
	+
	+void
	+vmmdev_init(void)
	+{
	+ pr_allow_flag = prison_add_allow(NULL, "vmm", NULL,
	+ "Allow use of vmm in a jail.");
	+}
	+
	+int
	+vmmdev_cleanup(void)
	+{
	+ int error;
	+
	+ if (SLIST_EMPTY(&head))
	+ error = 0;
	+ else
	+ error = EBUSY;
	+
	+ return (error);
	+}
	+
	+static int
	+devmem_mmap_single(struct cdev cdev, vm_ooffset_t offset, vm_size_t len,
	+ struct vm_object **objp, int nprot)
	+{
	+ struct devmem_softc *dsc;
	+ vm_ooffset_t first, last;
	+ size_t seglen;
	+ int error;
	+ bool sysmem;
	+
	+ dprintf("%s: offset %lx len %lx\n", __func__, *offset, len);
	+
	+ dsc = cdev->si_drv1;
	+ if (dsc == NULL) {
	+ /* 'cdev' has been created but is not ready for use */
	+ return (ENXIO);
	+ }
	+
	+ first = *offset;
	+ last = *offset + len;
	+ if ((nprot & PROT_EXEC) \|\| first < 0 \|\| first >= last)
	+ return (EINVAL);
	+
	+ vm_slock_memsegs(dsc->sc->vm);
	+
	+ error = vm_get_memseg(dsc->sc->vm, dsc->segid, &seglen, &sysmem, objp);
	+ KASSERT(error == 0 && !sysmem && *objp != NULL,
	+ ("%s: invalid devmem segment %d", __func__, dsc->segid));
	+
	+ if (seglen >= last)
	+ vm_object_reference(*objp);
	+ else
	+ error = 0;
	+ vm_unlock_memsegs(dsc->sc->vm);
	+ return (error);
	+}
	+
	+static struct cdevsw devmemsw = {
	+ .d_name = "devmem",
	+ .d_version = D_VERSION,
	+ .d_mmap_single = devmem_mmap_single,
	+};
	+
	+static int
	+devmem_create_cdev(const char vmname, int segid, char devname)
	+{
	+ struct devmem_softc *dsc;
	+ struct vmmdev_softc *sc;
	+ struct cdev *cdev;
	+ int error;
	+
	+ error = make_dev_p(MAKEDEV_CHECKNAME, &cdev, &devmemsw, NULL,
	+ UID_ROOT, GID_WHEEL, 0600, "vmm.io/%s.%s", vmname, devname);
	+ if (error)
	+ return (error);
	+
	+ dsc = malloc(sizeof(struct devmem_softc), M_VMMDEV, M_WAITOK \| M_ZERO);
	+
	+ mtx_lock(&vmmdev_mtx);
	+ sc = vmmdev_lookup(vmname);
	+ KASSERT(sc != NULL, ("%s: vm %s softc not found", __func__, vmname));
	+ if (sc->cdev == NULL) {
	+ /* virtual machine is being created or destroyed */
	+ mtx_unlock(&vmmdev_mtx);
	+ free(dsc, M_VMMDEV);
	+ destroy_dev_sched_cb(cdev, NULL, 0);
	+ return (ENODEV);
	+ }
	+
	+ dsc->segid = segid;
	+ dsc->name = devname;
	+ dsc->cdev = cdev;
	+ dsc->sc = sc;
	+ SLIST_INSERT_HEAD(&sc->devmem, dsc, link);
	+ mtx_unlock(&vmmdev_mtx);
	+
	+ /* The 'cdev' is ready for use after 'si_drv1' is initialized */
	+ cdev->si_drv1 = dsc;
	+ return (0);
	+}
	+
	+static void
	+devmem_destroy(void *arg)
	+{
	+ struct devmem_softc *dsc = arg;
	+
	+ KASSERT(dsc->cdev, ("%s: devmem cdev already destroyed", __func__));
	+ dsc->cdev = NULL;
	+ dsc->sc = NULL;
	+}
	Index: sys/riscv/vmm/vmm_instruction_emul.c
	===================================================================
	--- /dev/null
	+++ sys/riscv/vmm/vmm_instruction_emul.c
	@@ -0,0 +1,107 @@
	+/*-
	+ * Copyright (c) 2015 Mihai Carabas <mihai.carabas@gmail.com>
	+ * Copyright (c) 2024 Ruslan Bukin <br@bsdpad.com>
	+ *
	+ * This software was developed by the University of Cambridge Computer
	+ * Laboratory (Department of Computer Science and Technology) under Innovate
	+ * UK project 105694, "Digital Security by Design (DSbD) Technology Platform
	+ * Prototype".
	+ *
	+ * Redistribution and use in source and binary forms, with or without
	+ * modification, are permitted provided that the following conditions
	+ * are met:
	+ * 1. Redistributions of source code must retain the above copyright
	+ * notice, this list of conditions and the following disclaimer.
	+ * 2. Redistributions in binary form must reproduce the above copyright
	+ * notice, this list of conditions and the following disclaimer in the
	+ * documentation and/or other materials provided with the distribution.
	+ *
	+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
	+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
	+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	+ * SUCH DAMAGE.
	+ */
	+
	+#ifdef _KERNEL
	+#include <sys/param.h>
	+#include <sys/pcpu.h>
	+#include <sys/systm.h>
	+#include <sys/proc.h>
	+
	+#include <vm/vm.h>
	+
	+#include <machine/machdep.h>
	+#include <machine/vmm.h>
	+#else
	+#include <sys/types.h>
	+#include <sys/errno.h>
	+#include <sys/_iovec.h>
	+
	+#include <machine/vmm.h>
	+
	+#include <assert.h>
	+#include <stdio.h>
	+#include <stdlib.h>
	+#include <vmmapi.h>
	+#endif
	+
	+#include <machine/vmm_instruction_emul.h>
	+
	+int
	+vmm_emulate_instruction(struct vcpu vcpu, uint64_t gpa, struct vie vie,
	+ struct vm_guest_paging *paging __unused, mem_region_read_t memread,
	+ mem_region_write_t memwrite, void *memarg)
	+{
	+ uint64_t val;
	+ int error;
	+
	+ if (vie->dir == VM_DIR_READ) {
	+ error = memread(vcpu, gpa, &val, vie->access_size, memarg);
	+ if (error)
	+ goto out;
	+ if ((vie->sign_extend == 0) && (vie->access_size < 8))
	+ val &= (1ul << (vie->access_size * 8)) - 1;
	+ error = vm_set_register(vcpu, vie->reg, val);
	+ } else {
	+ error = vm_get_register(vcpu, vie->reg, &val);
	+ if (error)
	+ goto out;
	+ /* Mask any unneeded bits from the register */
	+ if (vie->access_size < 8)
	+ val &= (1ul << (vie->access_size * 8)) - 1;
	+ error = memwrite(vcpu, gpa, val, vie->access_size, memarg);
	+ }
	+
	+out:
	+ return (error);
	+}
	+
	+int
	+vmm_emulate_register(struct vcpu vcpu, struct vre vre, reg_read_t regread,
	+ reg_write_t regwrite, void *regarg)
	+{
	+ uint64_t val;
	+ int error;
	+
	+ if (vre->dir == VM_DIR_READ) {
	+ error = regread(vcpu, &val, regarg);
	+ if (error)
	+ goto out;
	+ error = vm_set_register(vcpu, vre->reg, val);
	+ } else {
	+ error = vm_get_register(vcpu, vre->reg, &val);
	+ if (error)
	+ goto out;
	+ error = regwrite(vcpu, val, regarg);
	+ }
	+
	+out:
	+ return (error);
	+}
	Index: sys/riscv/vmm/vmm_ktr.h
	===================================================================
	--- /dev/null
	+++ sys/riscv/vmm/vmm_ktr.h
	@@ -0,0 +1,69 @@
	+/*-
	+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
	+ *
	+ * Copyright (c) 2011 NetApp, Inc.
	+ * All rights reserved.
	+ *
	+ * Redistribution and use in source and binary forms, with or without
	+ * modification, are permitted provided that the following conditions
	+ * are met:
	+ * 1. Redistributions of source code must retain the above copyright
	+ * notice, this list of conditions and the following disclaimer.
	+ * 2. Redistributions in binary form must reproduce the above copyright
	+ * notice, this list of conditions and the following disclaimer in the
	+ * documentation and/or other materials provided with the distribution.
	+ *
	+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
	+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	+ * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
	+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	+ * SUCH DAMAGE.
	+ */
	+
	+#ifndef _VMM_KTR_H_
	+#define _VMM_KTR_H_
	+
	+#include <sys/ktr.h>
	+#include <sys/pcpu.h>
	+
	+#ifndef KTR_VMM
	+#define KTR_VMM KTR_GEN
	+#endif
	+
	+#define VCPU_CTR0(vm, vcpuid, format) \
	+CTR2(KTR_VMM, "vm %s[%d]: " format, vm_name((vm)), (vcpuid))
	+
	+#define VCPU_CTR1(vm, vcpuid, format, p1) \
	+CTR3(KTR_VMM, "vm %s[%d]: " format, vm_name((vm)), (vcpuid), (p1))
	+
	+#define VCPU_CTR2(vm, vcpuid, format, p1, p2) \
	+CTR4(KTR_VMM, "vm %s[%d]: " format, vm_name((vm)), (vcpuid), (p1), (p2))
	+
	+#define VCPU_CTR3(vm, vcpuid, format, p1, p2, p3) \
	+CTR5(KTR_VMM, "vm %s[%d]: " format, vm_name((vm)), (vcpuid), (p1), (p2), (p3))
	+
	+#define VCPU_CTR4(vm, vcpuid, format, p1, p2, p3, p4) \
	+CTR6(KTR_VMM, "vm %s[%d]: " format, vm_name((vm)), (vcpuid), \
	+ (p1), (p2), (p3), (p4))
	+
	+#define VM_CTR0(vm, format) \
	+CTR1(KTR_VMM, "vm %s: " format, vm_name((vm)))
	+
	+#define VM_CTR1(vm, format, p1) \
	+CTR2(KTR_VMM, "vm %s: " format, vm_name((vm)), (p1))
	+
	+#define VM_CTR2(vm, format, p1, p2) \
	+CTR3(KTR_VMM, "vm %s: " format, vm_name((vm)), (p1), (p2))
	+
	+#define VM_CTR3(vm, format, p1, p2, p3) \
	+CTR4(KTR_VMM, "vm %s: " format, vm_name((vm)), (p1), (p2), (p3))
	+
	+#define VM_CTR4(vm, format, p1, p2, p3, p4) \
	+CTR5(KTR_VMM, "vm %s: " format, vm_name((vm)), (p1), (p2), (p3), (p4))
	+#endif
	Index: sys/riscv/vmm/vmm_riscv.c
	===================================================================
	--- /dev/null
	+++ sys/riscv/vmm/vmm_riscv.c
	@@ -0,0 +1,797 @@
	+/*-
	+ * Copyright (c) 2024 Ruslan Bukin <br@bsdpad.com>
	+ *
	+ * This software was developed by the University of Cambridge Computer
	+ * Laboratory (Department of Computer Science and Technology) under Innovate
	+ * UK project 105694, "Digital Security by Design (DSbD) Technology Platform
	+ * Prototype".
	+ *
	+ * Redistribution and use in source and binary forms, with or without
	+ * modification, are permitted provided that the following conditions
	+ * are met:
	+ * 1. Redistributions of source code must retain the above copyright
	+ * notice, this list of conditions and the following disclaimer.
	+ * 2. Redistributions in binary form must reproduce the above copyright
	+ * notice, this list of conditions and the following disclaimer in the
	+ * documentation and/or other materials provided with the distribution.
	+ *
	+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
	+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
	+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	+ * SUCH DAMAGE.
	+ */
	+
	+#include <sys/param.h>
	+#include <sys/systm.h>
	+#include <sys/smp.h>
	+#include <sys/kernel.h>
	+#include <sys/malloc.h>
	+#include <sys/mman.h>
	+#include <sys/pcpu.h>
	+#include <sys/proc.h>
	+#include <sys/rman.h>
	+#include <sys/sysctl.h>
	+#include <sys/lock.h>
	+#include <sys/mutex.h>
	+#include <sys/vmem.h>
	+#include <sys/bus.h>
	+
	+#include <vm/vm.h>
	+#include <vm/pmap.h>
	+#include <vm/vm_extern.h>
	+#include <vm/vm_map.h>
	+#include <vm/vm_page.h>
	+#include <vm/vm_param.h>
	+
	+#include <machine/md_var.h>
	+#include <machine/riscvreg.h>
	+#include <machine/vm.h>
	+#include <machine/cpufunc.h>
	+#include <machine/cpu.h>
	+#include <machine/machdep.h>
	+#include <machine/vmm.h>
	+#include <machine/vmm_dev.h>
	+#include <machine/atomic.h>
	+#include <machine/pmap.h>
	+#include <machine/intr.h>
	+#include <machine/encoding.h>
	+#include <machine/db_machdep.h>
	+
	+#include "riscv.h"
	+#include "vmm_aplic.h"
	+#include "vmm_stat.h"
	+
	+MALLOC_DEFINE(M_HYP, "RISC-V VMM HYP", "RISC-V VMM HYP");
	+
	+DPCPU_DEFINE_STATIC(struct hypctx *, vcpu);
	+
	+static int
	+m_op(uint32_t insn, int match, int mask)
	+{
	+
	+ if (((insn ^ match) & mask) == 0)
	+ return (1);
	+
	+ return (0);
	+}
	+
	+static inline void
	+riscv_set_active_vcpu(struct hypctx *hypctx)
	+{
	+
	+ DPCPU_SET(vcpu, hypctx);
	+}
	+
	+struct hypctx *
	+riscv_get_active_vcpu(void)
	+{
	+
	+ return (DPCPU_GET(vcpu));
	+}
	+
	+int
	+vmmops_modinit(void)
	+{
	+
	+ if (!has_hyp) {
	+ printf("vmm: riscv hart doesn't support H-extension.\n");
	+ return (ENXIO);
	+ }
	+
	+ if (!has_sstc) {
	+ printf("vmm: riscv hart doesn't support SSTC extension.\n");
	+ return (ENXIO);
	+ }
	+
	+ return (0);
	+}
	+
	+int
	+vmmops_modcleanup(void)
	+{
	+
	+ return (0);
	+}
	+
	+void *
	+vmmops_init(struct vm *vm, pmap_t pmap)
	+{
	+ struct hyp *hyp;
	+ vm_size_t size;
	+
	+ size = round_page(sizeof(struct hyp) +
	+ sizeof(struct hypctx ) vm_get_maxcpus(vm));
	+ hyp = malloc_aligned(size, PAGE_SIZE, M_HYP, M_WAITOK \| M_ZERO);
	+ hyp->vm = vm;
	+ hyp->aplic_attached = false;
	+
	+ aplic_vminit(hyp);
	+
	+ return (hyp);
	+}
	+
	+static void
	+vmmops_delegate(void)
	+{
	+ uint64_t hedeleg;
	+ uint64_t hideleg;
	+
	+ hedeleg = (1UL << SCAUSE_INST_MISALIGNED);
	+ hedeleg \|= (1UL << SCAUSE_ILLEGAL_INSTRUCTION);
	+ hedeleg \|= (1UL << SCAUSE_BREAKPOINT);
	+ hedeleg \|= (1UL << SCAUSE_ECALL_USER);
	+ hedeleg \|= (1UL << SCAUSE_INST_PAGE_FAULT);
	+ hedeleg \|= (1UL << SCAUSE_LOAD_PAGE_FAULT);
	+ hedeleg \|= (1UL << SCAUSE_STORE_PAGE_FAULT);
	+ csr_write(hedeleg, hedeleg);
	+
	+ hideleg = (1UL << IRQ_SOFTWARE_HYPERVISOR);
	+ hideleg \|= (1UL << IRQ_TIMER_HYPERVISOR);
	+ hideleg \|= (1UL << IRQ_EXTERNAL_HYPERVISOR);
	+ csr_write(hideleg, hideleg);
	+}
	+
	+static void
	+vmmops_vcpu_restore_csrs(struct hypctx *hypctx)
	+{
	+ struct hypcsr *csrs;
	+
	+ csrs = &hypctx->guest_csrs;
	+
	+ csr_write(vsstatus, csrs->vsstatus);
	+ csr_write(vsie, csrs->vsie);
	+ csr_write(vstvec, csrs->vstvec);
	+ csr_write(vsscratch, csrs->vsscratch);
	+ csr_write(vsepc, csrs->vsepc);
	+ csr_write(vscause, csrs->vscause);
	+ csr_write(vstval, csrs->vstval);
	+ csr_write(hvip, csrs->hvip);
	+ csr_write(vsatp, csrs->vsatp);
	+}
	+
	+static void
	+vmmops_vcpu_save_csrs(struct hypctx *hypctx)
	+{
	+ struct hypcsr *csrs;
	+
	+ csrs = &hypctx->guest_csrs;
	+
	+ csrs->vsstatus = csr_read(vsstatus);
	+ csrs->vsie = csr_read(vsie);
	+ csrs->vstvec = csr_read(vstvec);
	+ csrs->vsscratch = csr_read(vsscratch);
	+ csrs->vsepc = csr_read(vsepc);
	+ csrs->vscause = csr_read(vscause);
	+ csrs->vstval = csr_read(vstval);
	+ csrs->hvip = csr_read(hvip);
	+ csrs->vsatp = csr_read(vsatp);
	+}
	+
	+void *
	+vmmops_vcpu_init(void vmi, struct vcpu vcpu1, int vcpuid)
	+{
	+ struct hypctx *hypctx;
	+ struct hyp *hyp;
	+ vm_size_t size;
	+
	+ hyp = vmi;
	+
	+ dprintf("%s: hyp %p\n", __func__, hyp);
	+
	+ size = round_page(sizeof(struct hypctx));
	+ hypctx = malloc_aligned(size, PAGE_SIZE, M_HYP, M_WAITOK \| M_ZERO);
	+
	+ KASSERT(vcpuid >= 0 && vcpuid < vm_get_maxcpus(hyp->vm),
	+ ("%s: Invalid vcpuid %d", __func__, vcpuid));
	+ hyp->ctx[vcpuid] = hypctx;
	+
	+ hypctx->hyp = hyp;
	+ hypctx->vcpu = vcpu1;
	+
	+ /*
	+ * TODO: set initial state for CSRs if needed.
	+ */
	+ vmmops_vcpu_restore_csrs(hypctx);
	+
	+ aplic_cpuinit(hypctx);
	+
	+ vmmops_delegate();
	+
	+ csr_write(henvcfg, HENVCFG_STCE);
	+ csr_write(hie, HIE_VSEIE \| HIE_SGEIE);
	+
	+ /*
	+ * TODO: should we trap rdcycle / rdtime ?
	+ */
	+ csr_write(hcounteren, HCOUNTEREN_CY \| HCOUNTEREN_TM);
	+ hypctx->guest_scounteren = HCOUNTEREN_CY \| HCOUNTEREN_TM;
	+ hypctx->guest_regs.hyp_sstatus = SSTATUS_SPP \| SSTATUS_SPIE;
	+ hypctx->guest_regs.hyp_sstatus \|= SSTATUS_FS_INITIAL;
	+ hypctx->guest_regs.hyp_hstatus = HSTATUS_SPV \| HSTATUS_VTW;
	+
	+ return (hypctx);
	+}
	+
	+static int
	+riscv_vmm_pinit(pmap_t pmap)
	+{
	+
	+ dprintf("%s: pmap %p\n", __func__, pmap);
	+
	+ pmap_pinit_stage(pmap, PM_STAGE2);
	+
	+ return (1);
	+}
	+
	+struct vmspace *
	+vmmops_vmspace_alloc(vm_offset_t min, vm_offset_t max)
	+{
	+
	+ return (vmspace_alloc(min, max, riscv_vmm_pinit));
	+}
	+
	+void
	+vmmops_vmspace_free(struct vmspace *vmspace)
	+{
	+
	+ pmap_remove_pages(vmspace_pmap(vmspace));
	+ vmspace_free(vmspace);
	+}
	+
	+static void
	+riscv_unpriv_read(struct hypctx hypctx, uint64_t guest_addr, uint64_t data)
	+{
	+ uint64_t old_hstatus;
	+ uint64_t val;
	+ uint64_t tmp;
	+
	+ old_hstatus = csr_swap(hstatus, hypctx->guest_regs.hyp_hstatus);
	+
	+ /*
	+ * TODO: handle exceptions during unprivilege read.
	+ */
	+
	+ __asm __volatile(".option push\n"
	+ ".option norvc\n"
	+ "hlvx.hu %[val], (%[addr])\n"
	+ ".option pop\n"
	+ : [val] "=&r" (val), [addr] "+&r" (guest_addr)
	+ :: "memory");
	+
	+ if ((val & 0x3) == 0x3) {
	+ guest_addr += 2;
	+ __asm __volatile(".option push\n"
	+ ".option norvc\n"
	+ "hlvx.hu %[tmp], (%[addr])\n"
	+ ".option pop\n"
	+ : [tmp] "=&r" (tmp), [addr] "+&r" (guest_addr)
	+ :: "memory");
	+ val \|= (tmp << 16);
	+ }
	+
	+ csr_write(hstatus, old_hstatus);
	+
	+ *data = val;
	+}
	+
	+static void
	+riscv_gen_inst_emul_data(struct hypctx hypctx, struct vm_exit vme_ret)
	+{
	+ uint64_t guest_addr;
	+ struct vie *vie;
	+ uint64_t insn;
	+ int reg_num;
	+ int rs2, rd;
	+
	+ vme_ret->u.inst_emul.gpa = (vme_ret->htval << 2) \|
	+ (vme_ret->stval & 0x3);
	+
	+ guest_addr = vme_ret->sepc;
	+
	+ vie = &vme_ret->u.inst_emul.vie;
	+ vie->dir = vme_ret->scause == SCAUSE_STORE_GUEST_PAGE_FAULT ? \
	+ VM_DIR_WRITE : VM_DIR_READ;
	+ vie->sign_extend = 1;
	+
	+ riscv_unpriv_read(hypctx, guest_addr, &insn);
	+
	+ if ((insn & 0x3) == 0x3) {
	+ rs2 = (insn & RS2_MASK) >> RS2_SHIFT;
	+ rd = (insn & RD_MASK) >> RD_SHIFT;
	+
	+ if (vie->dir == VM_DIR_WRITE) {
	+ if (m_op(insn, MATCH_SB, MASK_SB))
	+ vie->access_size = 1;
	+ else if (m_op(insn, MATCH_SH, MASK_SH))
	+ vie->access_size = 2;
	+ else if (m_op(insn, MATCH_SW, MASK_SW))
	+ vie->access_size = 4;
	+ else if (m_op(insn, MATCH_SD, MASK_SD))
	+ vie->access_size = 8;
	+ else
	+ panic("unknown store instr at %lx", guest_addr);
	+ reg_num = rs2;
	+ } else {
	+ if (m_op(insn, MATCH_LB, MASK_LB))
	+ vie->access_size = 1;
	+ else if (m_op(insn, MATCH_LH, MASK_LH))
	+ vie->access_size = 2;
	+ else if (m_op(insn, MATCH_LW, MASK_LW))
	+ vie->access_size = 4;
	+ else if (m_op(insn, MATCH_LD, MASK_LD))
	+ vie->access_size = 8;
	+ else if (m_op(insn, MATCH_LBU, MASK_LBU)) {
	+ vie->access_size = 1;
	+ vie->sign_extend = 0;
	+ } else if (m_op(insn, MATCH_LHU, MASK_LHU)) {
	+ vie->access_size = 2;
	+ vie->sign_extend = 0;
	+ } else if (m_op(insn, MATCH_LWU, MASK_LWU)) {
	+ vie->access_size = 4;
	+ vie->sign_extend = 0;
	+ } else
	+ panic("unknown load instr at %lx", guest_addr);
	+ reg_num = rd;
	+ }
	+ vme_ret->inst_length = 4;
	+ } else {
	+ rs2 = (insn >> 7) & 0x7;
	+ rs2 += 0x8;
	+ rd = (insn >> 2) & 0x7;
	+ rd += 0x8;
	+
	+ if (vie->dir == VM_DIR_WRITE) {
	+ if (m_op(insn, MATCH_C_SW, MASK_C_SW))
	+ vie->access_size = 4;
	+ else if (m_op(insn, MATCH_C_SD, MASK_C_SD))
	+ vie->access_size = 8;
	+ else
	+ panic("unknown store instr at %lx", guest_addr);
	+ } else {
	+ if (m_op(insn, MATCH_C_LW, MASK_C_LW))
	+ vie->access_size = 4;
	+ else if (m_op(insn, MATCH_C_LD, MASK_C_LD))
	+ vie->access_size = 8;
	+ else
	+ panic("unknown load instr at %lx", guest_addr);
	+ }
	+ reg_num = rd;
	+ vme_ret->inst_length = 2;
	+ }
	+
	+ dprintf("guest_addr %lx insn %lx, reg %d\n", guest_addr, insn, reg_num);
	+
	+ vie->reg = reg_num;
	+}
	+
	+static bool
	+riscv_handle_world_switch(struct hypctx hypctx, struct vm_exit vme,
	+ pmap_t pmap)
	+{
	+ uint64_t insn;
	+ uint64_t gpa;
	+ bool handled;
	+
	+ handled = false;
	+
	+ if (vme->scause & SCAUSE_INTR) {
	+ /*
	+ * Host interrupt? Leave critical section to handle.
	+ */
	+ vmm_stat_incr(hypctx->vcpu, VMEXIT_IRQ, 1);
	+ vme->exitcode = VM_EXITCODE_BOGUS;
	+ vme->inst_length = 0;
	+ return (handled);
	+ }
	+
	+ switch (vme->scause) {
	+ case SCAUSE_FETCH_GUEST_PAGE_FAULT:
	+ case SCAUSE_LOAD_GUEST_PAGE_FAULT:
	+ case SCAUSE_STORE_GUEST_PAGE_FAULT:
	+ gpa = (vme->htval << 2) \| (vme->stval & 0x3);
	+ if (vm_mem_allocated(hypctx->vcpu, gpa)) {
	+ vme->exitcode = VM_EXITCODE_PAGING;
	+ vme->inst_length = 0;
	+ vme->u.paging.gpa = gpa;
	+ } else {
	+ riscv_gen_inst_emul_data(hypctx, vme);
	+ vme->exitcode = VM_EXITCODE_INST_EMUL;
	+ }
	+ break;
	+ case SCAUSE_ILLEGAL_INSTRUCTION:
	+ /*
	+ * TODO: handle illegal instruction properly.
	+ */
	+ panic("%s: Illegal instr at %lx stval 0x%lx htval 0x%lx\n",
	+ __func__, vme->sepc, vme->stval, vme->htval);
	+ case SCAUSE_VIRTUAL_SUPERVISOR_ECALL:
	+ vme->exitcode = VM_EXITCODE_ECALL;
	+ handled = false;
	+ break;
	+ case SCAUSE_VIRTUAL_INSTRUCTION:
	+ insn = vme->stval;
	+ if (m_op(insn, MATCH_WFI, MASK_WFI))
	+ vme->exitcode = VM_EXITCODE_WFI;
	+ else
	+ vme->exitcode = VM_EXITCODE_BOGUS;
	+ handled = false;
	+ break;
	+ default:
	+ printf("unknown scause %lx\n", vme->scause);
	+ vmm_stat_incr(hypctx->vcpu, VMEXIT_UNHANDLED, 1);
	+ vme->exitcode = VM_EXITCODE_BOGUS;
	+ handled = false;
	+ break;
	+ }
	+
	+ return (handled);
	+}
	+
	+int
	+vmmops_gla2gpa(void vcpui, struct vm_guest_paging paging, uint64_t gla,
	+ int prot, uint64_t gpa, int is_fault)
	+{
	+
	+ /* Implement me. */
	+
	+ return (0);
	+}
	+
	+static void
	+riscv_sync_interrupts(struct hypctx *hypctx)
	+{
	+ int pending;
	+
	+ pending = aplic_check_pending(hypctx);
	+
	+ if (pending)
	+ hypctx->guest_csrs.hvip \|= HVIP_VSEIP;
	+ else
	+ hypctx->guest_csrs.hvip &= ~HVIP_VSEIP;
	+
	+ csr_write(hvip, hypctx->guest_csrs.hvip);
	+}
	+
	+int
	+vmmops_run(void vcpui, register_t pc, pmap_t pmap, struct vm_eventinfo evinfo)
	+{
	+ struct hypctx *hypctx;
	+ struct vm_exit *vme;
	+ struct vcpu *vcpu;
	+ register_t val;
	+ int handled;
	+
	+ hypctx = (struct hypctx *)vcpui;
	+ vcpu = hypctx->vcpu;
	+ vme = vm_exitinfo(vcpu);
	+
	+ hypctx->guest_regs.hyp_sepc = (uint64_t)pc;
	+
	+ if (hypctx->guest_regs.hyp_sstatus & SSTATUS_SPP)
	+ hypctx->guest_regs.hyp_hstatus \|= HSTATUS_SPVP;
	+ else
	+ hypctx->guest_regs.hyp_hstatus &= HSTATUS_SPVP;
	+
	+ hypctx->guest_regs.hyp_hstatus \|= HSTATUS_SPV \| HSTATUS_VTW;
	+
	+ csr_write(hgatp, pmap->pm_satp);
	+
	+ vmmops_vcpu_restore_csrs(hypctx);
	+
	+ for (;;) {
	+ dprintf("%s: pc %lx\n", __func__, pc);
	+
	+ if (hypctx->has_exception) {
	+ hypctx->has_exception = false;
	+ /*
	+ * TODO: implement exception injection.
	+ */
	+ }
	+
	+ val = intr_disable();
	+
	+ /* Check if the vcpu is suspended */
	+ if (vcpu_suspended(evinfo)) {
	+ intr_restore(val);
	+ vm_exit_suspended(vcpu, pc);
	+ break;
	+ }
	+
	+ if (vcpu_debugged(vcpu)) {
	+ intr_restore(val);
	+ vm_exit_debug(vcpu, pc);
	+ break;
	+ }
	+
	+ /*
	+ * TODO: What happens if a timer interrupt is asserted exactly
	+ * here, but for the previous VM?
	+ */
	+ riscv_set_active_vcpu(hypctx);
	+ aplic_flush_hwstate(hypctx);
	+
	+ riscv_sync_interrupts(hypctx);
	+
	+ dprintf("%s: Entering guest VM, vsatp %lx, ss %lx hs %lx\n",
	+ __func__, csr_read(vsatp), hypctx->guest_regs.hyp_sstatus,
	+ hypctx->guest_regs.hyp_hstatus);
	+
	+ vmm_switch(hypctx);
	+
	+ dprintf("%s: Leaving guest VM\n", __func__);
	+
	+ aplic_sync_hwstate(hypctx);
	+
	+ /*
	+ * TODO: deactivate stage 2 pmap here if needed.
	+ */
	+
	+ vme->scause = csr_read(scause);
	+ vme->sepc = csr_read(sepc);
	+ vme->stval = csr_read(stval);
	+ vme->htval = csr_read(htval);
	+ vme->htinst = csr_read(htinst);
	+
	+ intr_restore(val);
	+
	+ vmm_stat_incr(vcpu, VMEXIT_COUNT, 1);
	+ vme->pc = hypctx->guest_regs.hyp_sepc;
	+ vme->inst_length = INSN_SIZE;
	+
	+ handled = riscv_handle_world_switch(hypctx, vme, pmap);
	+ if (handled == false)
	+ /* Exit loop to emulate instruction. */
	+ break;
	+ else {
	+ /* Resume guest execution from the next instruction. */
	+ hypctx->guest_regs.hyp_sepc += vme->inst_length;
	+ }
	+ }
	+
	+ vmmops_vcpu_save_csrs(hypctx);
	+
	+ return (0);
	+}
	+
	+static void
	+riscv_pcpu_vmcleanup(void *arg)
	+{
	+ struct hyp *hyp;
	+ int i, maxcpus;
	+
	+ hyp = arg;
	+ maxcpus = vm_get_maxcpus(hyp->vm);
	+ for (i = 0; i < maxcpus; i++) {
	+ if (riscv_get_active_vcpu() == hyp->ctx[i]) {
	+ riscv_set_active_vcpu(NULL);
	+ break;
	+ }
	+ }
	+}
	+
	+void
	+vmmops_vcpu_cleanup(void *vcpui)
	+{
	+ struct hypctx *hypctx;
	+
	+ hypctx = vcpui;
	+
	+ dprintf("%s\n", __func__);
	+
	+ aplic_cpucleanup(hypctx);
	+
	+ free(hypctx, M_HYP);
	+}
	+
	+void
	+vmmops_cleanup(void *vmi)
	+{
	+ struct hyp *hyp;
	+
	+ hyp = vmi;
	+
	+ dprintf("%s\n", __func__);
	+
	+ aplic_vmcleanup(hyp);
	+
	+ smp_rendezvous(NULL, riscv_pcpu_vmcleanup, NULL, hyp);
	+
	+ free(hyp, M_HYP);
	+}
	+
	+/*
	+ * Return register value. Registers have different sizes and an explicit cast
	+ * must be made to ensure proper conversion.
	+ */
	+static uint64_t *
	+hypctx_regptr(struct hypctx *hypctx, int reg)
	+{
	+
	+ switch (reg) {
	+ case VM_REG_GUEST_RA:
	+ return (&hypctx->guest_regs.hyp_ra);
	+ case VM_REG_GUEST_SP:
	+ return (&hypctx->guest_regs.hyp_sp);
	+ case VM_REG_GUEST_GP:
	+ return (&hypctx->guest_regs.hyp_gp);
	+ case VM_REG_GUEST_TP:
	+ return (&hypctx->guest_regs.hyp_tp);
	+ case VM_REG_GUEST_T0:
	+ return (&hypctx->guest_regs.hyp_t[0]);
	+ case VM_REG_GUEST_T1:
	+ return (&hypctx->guest_regs.hyp_t[1]);
	+ case VM_REG_GUEST_T2:
	+ return (&hypctx->guest_regs.hyp_t[2]);
	+ case VM_REG_GUEST_S0:
	+ return (&hypctx->guest_regs.hyp_s[0]);
	+ case VM_REG_GUEST_S1:
	+ return (&hypctx->guest_regs.hyp_s[1]);
	+ case VM_REG_GUEST_A0:
	+ return (&hypctx->guest_regs.hyp_a[0]);
	+ case VM_REG_GUEST_A1:
	+ return (&hypctx->guest_regs.hyp_a[1]);
	+ case VM_REG_GUEST_A2:
	+ return (&hypctx->guest_regs.hyp_a[2]);
	+ case VM_REG_GUEST_A3:
	+ return (&hypctx->guest_regs.hyp_a[3]);
	+ case VM_REG_GUEST_A4:
	+ return (&hypctx->guest_regs.hyp_a[4]);
	+ case VM_REG_GUEST_A5:
	+ return (&hypctx->guest_regs.hyp_a[5]);
	+ case VM_REG_GUEST_A6:
	+ return (&hypctx->guest_regs.hyp_a[6]);
	+ case VM_REG_GUEST_A7:
	+ return (&hypctx->guest_regs.hyp_a[7]);
	+ case VM_REG_GUEST_S2:
	+ return (&hypctx->guest_regs.hyp_s[2]);
	+ case VM_REG_GUEST_S3:
	+ return (&hypctx->guest_regs.hyp_s[3]);
	+ case VM_REG_GUEST_S4:
	+ return (&hypctx->guest_regs.hyp_s[4]);
	+ case VM_REG_GUEST_S5:
	+ return (&hypctx->guest_regs.hyp_s[5]);
	+ case VM_REG_GUEST_S6:
	+ return (&hypctx->guest_regs.hyp_s[6]);
	+ case VM_REG_GUEST_S7:
	+ return (&hypctx->guest_regs.hyp_s[7]);
	+ case VM_REG_GUEST_S8:
	+ return (&hypctx->guest_regs.hyp_s[8]);
	+ case VM_REG_GUEST_S9:
	+ return (&hypctx->guest_regs.hyp_s[9]);
	+ case VM_REG_GUEST_S10:
	+ return (&hypctx->guest_regs.hyp_s[10]);
	+ case VM_REG_GUEST_S11:
	+ return (&hypctx->guest_regs.hyp_s[11]);
	+ case VM_REG_GUEST_T3:
	+ return (&hypctx->guest_regs.hyp_t[3]);
	+ case VM_REG_GUEST_T4:
	+ return (&hypctx->guest_regs.hyp_t[4]);
	+ case VM_REG_GUEST_T5:
	+ return (&hypctx->guest_regs.hyp_t[5]);
	+ case VM_REG_GUEST_T6:
	+ return (&hypctx->guest_regs.hyp_t[6]);
	+ case VM_REG_GUEST_SEPC:
	+ return (&hypctx->guest_regs.hyp_sepc);
	+ default:
	+ break;
	+ }
	+
	+ return (NULL);
	+}
	+
	+int
	+vmmops_getreg(void vcpui, int reg, uint64_t retval)
	+{
	+ uint64_t *regp;
	+ int running, hostcpu;
	+ struct hypctx *hypctx;
	+
	+ hypctx = vcpui;
	+
	+ running = vcpu_is_running(hypctx->vcpu, &hostcpu);
	+ if (running && hostcpu != curcpu)
	+ panic("%s: %s%d is running", __func__, vm_name(hypctx->hyp->vm),
	+ vcpu_vcpuid(hypctx->vcpu));
	+
	+ regp = hypctx_regptr(hypctx, reg);
	+ if (regp == NULL)
	+ return (EINVAL);
	+
	+ retval = regp;
	+
	+ return (0);
	+}
	+
	+int
	+vmmops_setreg(void *vcpui, int reg, uint64_t val)
	+{
	+ uint64_t *regp;
	+ struct hypctx *hypctx;
	+ int running, hostcpu;
	+
	+ hypctx = vcpui;
	+
	+ running = vcpu_is_running(hypctx->vcpu, &hostcpu);
	+ if (running && hostcpu != curcpu)
	+ panic("%s: %s%d is running", __func__, vm_name(hypctx->hyp->vm),
	+ vcpu_vcpuid(hypctx->vcpu));
	+
	+ regp = hypctx_regptr(hypctx, reg);
	+ if (regp == NULL)
	+ return (EINVAL);
	+
	+ *regp = val;
	+
	+ return (0);
	+}
	+
	+int
	+vmmops_exception(void *vcpui, uint64_t scause)
	+{
	+ struct hypctx *hypctx = vcpui;
	+ int running, hostcpu;
	+
	+ running = vcpu_is_running(hypctx->vcpu, &hostcpu);
	+ if (running && hostcpu != curcpu)
	+ panic("%s: %s%d is running", __func__, vm_name(hypctx->hyp->vm),
	+ vcpu_vcpuid(hypctx->vcpu));
	+
	+ /* TODO: set registers. */
	+
	+ hypctx->has_exception = true;
	+
	+ return (0);
	+}
	+
	+int
	+vmmops_getcap(void vcpui, int num, int retval)
	+{
	+ int ret;
	+
	+ ret = ENOENT;
	+
	+ switch (num) {
	+ case VM_CAP_UNRESTRICTED_GUEST:
	+ *retval = 1;
	+ ret = 0;
	+ break;
	+ default:
	+ break;
	+ }
	+
	+ return (ret);
	+}
	+
	+int
	+vmmops_setcap(void *vcpui, int num, int val)
	+{
	+
	+ return (ENOENT);
	+}
	Index: sys/riscv/vmm/vmm_sbi.c
	===================================================================
	--- /dev/null
	+++ sys/riscv/vmm/vmm_sbi.c
	@@ -0,0 +1,96 @@
	+/*-
	+ * Copyright (c) 2024 Ruslan Bukin <br@bsdpad.com>
	+ *
	+ * This software was developed by the University of Cambridge Computer
	+ * Laboratory (Department of Computer Science and Technology) under Innovate
	+ * UK project 105694, "Digital Security by Design (DSbD) Technology Platform
	+ * Prototype".
	+ *
	+ * Redistribution and use in source and binary forms, with or without
	+ * modification, are permitted provided that the following conditions
	+ * are met:
	+ * 1. Redistributions of source code must retain the above copyright
	+ * notice, this list of conditions and the following disclaimer.
	+ * 2. Redistributions in binary form must reproduce the above copyright
	+ * notice, this list of conditions and the following disclaimer in the
	+ * documentation and/or other materials provided with the distribution.
	+ *
	+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
	+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
	+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	+ * SUCH DAMAGE.
	+ */
	+
	+#include <sys/param.h>
	+#include <sys/kernel.h>
	+#include <sys/jail.h>
	+#include <sys/queue.h>
	+#include <sys/lock.h>
	+#include <sys/mutex.h>
	+#include <sys/malloc.h>
	+#include <sys/conf.h>
	+#include <sys/sysctl.h>
	+#include <sys/libkern.h>
	+#include <sys/ioccom.h>
	+#include <sys/mman.h>
	+#include <sys/uio.h>
	+#include <sys/proc.h>
	+
	+#include <vm/vm.h>
	+#include <vm/pmap.h>
	+#include <vm/vm_map.h>
	+#include <vm/vm_object.h>
	+
	+#include <machine/machdep.h>
	+#include <machine/vmparam.h>
	+#include <machine/vmm.h>
	+#include <machine/vmm_dev.h>
	+#include <machine/md_var.h>
	+#include <machine/sbi.h>
	+
	+#include "riscv.h"
	+
	+/*
	+ * SBI is fully handled in userspace.
	+ *
	+ * TODO: We may need to handle the SBI IPI extension here in kernel.
	+ * The same for the SBI TIME extension in case of no SSTC support in HW.
	+ */
	+
	+int
	+vmm_sbi_ecall(struct vcpu vcpu, bool retu)
	+{
	+ int sbi_extension_id __unused;
	+ struct hypctx *hypctx;
	+
	+ hypctx = riscv_get_active_vcpu();
	+ sbi_extension_id = hypctx->guest_regs.hyp_a[7];
	+
	+ dprintf("%s: args %lx %lx %lx %lx %lx %lx %lx %lx\n", __func__,
	+ hypctx->guest_regs.hyp_a[0],
	+ hypctx->guest_regs.hyp_a[1],
	+ hypctx->guest_regs.hyp_a[2],
	+ hypctx->guest_regs.hyp_a[3],
	+ hypctx->guest_regs.hyp_a[4],
	+ hypctx->guest_regs.hyp_a[5],
	+ hypctx->guest_regs.hyp_a[6],
	+ hypctx->guest_regs.hyp_a[7]);
	+
	+ switch (sbi_extension_id) {
	+ case SBI_EXT_ID_TIME:
	+ break;
	+ default:
	+ break;
	+ }
	+
	+ *retu = true;
	+
	+ return (0);
	+}
	Index: sys/riscv/vmm/vmm_stat.h
	===================================================================
	--- /dev/null
	+++ sys/riscv/vmm/vmm_stat.h
	@@ -0,0 +1,144 @@
	+/*-
	+ * SPDX-License-Identifier: BSD-3-Clause
	+ *
	+ * Copyright (c) 2011 NetApp, Inc.
	+ * All rights reserved.
	+ *
	+ * Redistribution and use in source and binary forms, with or without
	+ * modification, are permitted provided that the following conditions
	+ * are met:
	+ * 1. Redistributions of source code must retain the above copyright
	+ * notice, this list of conditions and the following disclaimer.
	+ * 2. Redistributions in binary form must reproduce the above copyright
	+ * notice, this list of conditions and the following disclaimer in the
	+ * documentation and/or other materials provided with the distribution.
	+ * 3. Neither the name of the University nor the names of its contributors
	+ * may be used to endorse or promote products derived from this software
	+ * without specific prior written permission.
	+ *
	+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
	+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
	+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	+ * SUCH DAMAGE.
	+ */
	+
	+#ifndef _VMM_STAT_H_
	+#define _VMM_STAT_H_
	+
	+struct vm;
	+
	+#define MAX_VMM_STAT_ELEMS 64 /* arbitrary */
	+
	+enum vmm_stat_scope {
	+ VMM_STAT_SCOPE_ANY,
	+};
	+
	+struct vmm_stat_type;
	+typedef void (vmm_stat_func_t)(struct vcpu vcpu,
	+ struct vmm_stat_type *stat);
	+
	+struct vmm_stat_type {
	+ int index; /* position in the stats buffer */
	+ int nelems; /* standalone or array */
	+ const char desc; / description of statistic */
	+ vmm_stat_func_t func;
	+ enum vmm_stat_scope scope;
	+};
	+
	+void vmm_stat_register(void *arg);
	+
	+#define VMM_STAT_FDEFINE(type, nelems, desc, func, scope) \
	+ struct vmm_stat_type type[1] = { \
	+ { -1, nelems, desc, func, scope } \
	+ }; \
	+ SYSINIT(type##_stat, SI_SUB_KLD, SI_ORDER_ANY, vmm_stat_register, type)
	+
	+#define VMM_STAT_DEFINE(type, nelems, desc, scope) \
	+ VMM_STAT_FDEFINE(type, nelems, desc, NULL, scope)
	+
	+#define VMM_STAT_DECLARE(type) \
	+ extern struct vmm_stat_type type[1]
	+
	+#define VMM_STAT(type, desc) \
	+ VMM_STAT_DEFINE(type, 1, desc, VMM_STAT_SCOPE_ANY)
	+
	+#define VMM_STAT_FUNC(type, desc, func) \
	+ VMM_STAT_FDEFINE(type, 1, desc, func, VMM_STAT_SCOPE_ANY)
	+
	+#define VMM_STAT_ARRAY(type, nelems, desc) \
	+ VMM_STAT_DEFINE(type, nelems, desc, VMM_STAT_SCOPE_ANY)
	+
	+void *vmm_stat_alloc(void);
	+void vmm_stat_init(void *vp);
	+void vmm_stat_free(void *vp);
	+
	+int vmm_stat_copy(struct vcpu *vcpu, int index, int count,
	+ int num_stats, uint64_t buf);
	+int vmm_stat_desc_copy(int index, char *buf, int buflen);
	+
	+static void __inline
	+vmm_stat_array_incr(struct vcpu vcpu, struct vmm_stat_type vst, int statidx,
	+ uint64_t x)
	+{
	+#ifdef VMM_KEEP_STATS
	+ uint64_t *stats;
	+
	+ stats = vcpu_stats(vcpu);
	+
	+ if (vst->index >= 0 && statidx < vst->nelems)
	+ stats[vst->index + statidx] += x;
	+#endif
	+}
	+
	+static void __inline
	+vmm_stat_array_set(struct vcpu vcpu, struct vmm_stat_type vst, int statidx,
	+ uint64_t val)
	+{
	+#ifdef VMM_KEEP_STATS
	+ uint64_t *stats;
	+
	+ stats = vcpu_stats(vcpu);
	+
	+ if (vst->index >= 0 && statidx < vst->nelems)
	+ stats[vst->index + statidx] = val;
	+#endif
	+}
	+
	+static void __inline
	+vmm_stat_incr(struct vcpu vcpu, struct vmm_stat_type vst, uint64_t x)
	+{
	+
	+#ifdef VMM_KEEP_STATS
	+ vmm_stat_array_incr(vcpu, vst, 0, x);
	+#endif
	+}
	+
	+static void __inline
	+vmm_stat_set(struct vcpu vcpu, struct vmm_stat_type vst, uint64_t val)
	+{
	+
	+#ifdef VMM_KEEP_STATS
	+ vmm_stat_array_set(vcpu, vst, 0, val);
	+#endif
	+}
	+
	+VMM_STAT_DECLARE(VMEXIT_COUNT);
	+VMM_STAT_DECLARE(VMEXIT_UNKNOWN);
	+VMM_STAT_DECLARE(VMEXIT_WFI);
	+VMM_STAT_DECLARE(VMEXIT_WFE);
	+VMM_STAT_DECLARE(VMEXIT_HVC);
	+VMM_STAT_DECLARE(VMEXIT_MSR);
	+VMM_STAT_DECLARE(VMEXIT_DATA_ABORT);
	+VMM_STAT_DECLARE(VMEXIT_INSN_ABORT);
	+VMM_STAT_DECLARE(VMEXIT_UNHANDLED_SYNC);
	+VMM_STAT_DECLARE(VMEXIT_IRQ);
	+VMM_STAT_DECLARE(VMEXIT_FIQ);
	+VMM_STAT_DECLARE(VMEXIT_UNHANDLED);
	+#endif
	Index: sys/riscv/vmm/vmm_stat.c
	===================================================================
	--- /dev/null
	+++ sys/riscv/vmm/vmm_stat.c
	@@ -0,0 +1,162 @@
	+/*-
	+ * SPDX-License-Identifier: BSD-2-Clause
	+ *
	+ * Copyright (c) 2011 NetApp, Inc.
	+ * All rights reserved.
	+ *
	+ * Redistribution and use in source and binary forms, with or without
	+ * modification, are permitted provided that the following conditions
	+ * are met:
	+ * 1. Redistributions of source code must retain the above copyright
	+ * notice, this list of conditions and the following disclaimer.
	+ * 2. Redistributions in binary form must reproduce the above copyright
	+ * notice, this list of conditions and the following disclaimer in the
	+ * documentation and/or other materials provided with the distribution.
	+ *
	+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
	+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	+ * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
	+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	+ * SUCH DAMAGE.
	+ */
	+
	+#include <sys/param.h>
	+#include <sys/kernel.h>
	+#include <sys/systm.h>
	+#include <sys/malloc.h>
	+
	+#include <machine/machdep.h>
	+#include <machine/vmm.h>
	+#include "vmm_stat.h"
	+
	+/*
	+ * 'vst_num_elems' is the total number of addressable statistic elements
	+ * 'vst_num_types' is the number of unique statistic types
	+ *
	+ * It is always true that 'vst_num_elems' is greater than or equal to
	+ * 'vst_num_types'. This is because a stat type may represent more than
	+ * one element (for e.g. VMM_STAT_ARRAY).
	+ */
	+static int vst_num_elems, vst_num_types;
	+static struct vmm_stat_type *vsttab[MAX_VMM_STAT_ELEMS];
	+
	+static MALLOC_DEFINE(M_VMM_STAT, "vmm stat", "vmm stat");
	+
	+#define vst_size ((size_t)vst_num_elems * sizeof(uint64_t))
	+
	+void
	+vmm_stat_register(void *arg)
	+{
	+ struct vmm_stat_type *vst = arg;
	+
	+ /* We require all stats to identify themselves with a description */
	+ if (vst->desc == NULL)
	+ return;
	+
	+ if (vst_num_elems + vst->nelems >= MAX_VMM_STAT_ELEMS) {
	+ printf("Cannot accommodate vmm stat type \"%s\"!\n", vst->desc);
	+ return;
	+ }
	+
	+ vst->index = vst_num_elems;
	+ vst_num_elems += vst->nelems;
	+
	+ vsttab[vst_num_types++] = vst;
	+}
	+
	+int
	+vmm_stat_copy(struct vcpu vcpu, int index, int count, int num_stats,
	+ uint64_t *buf)
	+{
	+ struct vmm_stat_type *vst;
	+ uint64_t *stats;
	+ int i, tocopy;
	+
	+ if (index < 0 \|\| count < 0)
	+ return (EINVAL);
	+
	+ if (index > vst_num_elems)
	+ return (ENOENT);
	+
	+ if (index == vst_num_elems) {
	+ *num_stats = 0;
	+ return (0);
	+ }
	+
	+ tocopy = min(vst_num_elems - index, count);
	+
	+ /* Let stats functions update their counters */
	+ for (i = 0; i < vst_num_types; i++) {
	+ vst = vsttab[i];
	+ if (vst->func != NULL)
	+ (*vst->func)(vcpu, vst);
	+ }
	+
	+ /* Copy over the stats */
	+ stats = vcpu_stats(vcpu);
	+ memcpy(buf, stats + index, tocopy * sizeof(stats[0]));
	+ *num_stats = tocopy;
	+ return (0);
	+}
	+
	+void *
	+vmm_stat_alloc(void)
	+{
	+
	+ return (malloc(vst_size, M_VMM_STAT, M_WAITOK));
	+}
	+
	+void
	+vmm_stat_init(void *vp)
	+{
	+
	+ bzero(vp, vst_size);
	+}
	+
	+void
	+vmm_stat_free(void *vp)
	+{
	+ free(vp, M_VMM_STAT);
	+}
	+
	+int
	+vmm_stat_desc_copy(int index, char *buf, int bufsize)
	+{
	+ int i;
	+ struct vmm_stat_type *vst;
	+
	+ for (i = 0; i < vst_num_types; i++) {
	+ vst = vsttab[i];
	+ if (index >= vst->index && index < vst->index + vst->nelems) {
	+ if (vst->nelems > 1) {
	+ snprintf(buf, bufsize, "%s[%d]",
	+ vst->desc, index - vst->index);
	+ } else {
	+ strlcpy(buf, vst->desc, bufsize);
	+ }
	+ return (0); /* found it */
	+ }
	+ }
	+
	+ return (EINVAL);
	+}
	+
	+/* global statistics */
	+VMM_STAT(VMEXIT_COUNT, "total number of vm exits");
	+VMM_STAT(VMEXIT_UNKNOWN, "number of vmexits for the unknown exception");
	+VMM_STAT(VMEXIT_WFI, "number of times wfi was intercepted");
	+VMM_STAT(VMEXIT_WFE, "number of times wfe was intercepted");
	+VMM_STAT(VMEXIT_HVC, "number of times hvc was intercepted");
	+VMM_STAT(VMEXIT_MSR, "number of times msr/mrs was intercepted");
	+VMM_STAT(VMEXIT_DATA_ABORT, "number of vmexits for a data abort");
	+VMM_STAT(VMEXIT_INSN_ABORT, "number of vmexits for an instruction abort");
	+VMM_STAT(VMEXIT_UNHANDLED_SYNC, "number of vmexits for an unhandled synchronous exception");
	+VMM_STAT(VMEXIT_IRQ, "number of vmexits for an irq");
	+VMM_STAT(VMEXIT_FIQ, "number of vmexits for an interrupt");
	+VMM_STAT(VMEXIT_UNHANDLED, "number of vmexits for an unhandled exception");
	Index: sys/riscv/vmm/vmm_switch.S
	===================================================================
	--- /dev/null
	+++ sys/riscv/vmm/vmm_switch.S
	@@ -0,0 +1,202 @@
	+/*-
	+ * Copyright (c) 2024 Ruslan Bukin <br@bsdpad.com>
	+ *
	+ * This software was developed by the University of Cambridge Computer
	+ * Laboratory (Department of Computer Science and Technology) under Innovate
	+ * UK project 105694, "Digital Security by Design (DSbD) Technology Platform
	+ * Prototype".
	+ *
	+ * Redistribution and use in source and binary forms, with or without
	+ * modification, are permitted provided that the following conditions
	+ * are met:
	+ * 1. Redistributions of source code must retain the above copyright
	+ * notice, this list of conditions and the following disclaimer.
	+ * 2. Redistributions in binary form must reproduce the above copyright
	+ * notice, this list of conditions and the following disclaimer in the
	+ * documentation and/or other materials provided with the distribution.
	+ *
	+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
	+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
	+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	+ * SUCH DAMAGE.
	+ */
	+
	+#include <machine/asm.h>
	+#include "assym.inc"
	+
	+ .text
	+
	+/*
	+ * a0 == hypctx *
	+ */
	+ENTRY(vmm_switch)
	+ sd ra, (HYP_H_RA)(a0)
	+ sd sp, (HYP_H_SP)(a0)
	+ sd tp, (HYP_H_TP)(a0)
	+ sd gp, (HYP_H_GP)(a0)
	+ sd s0, (HYP_H_S + 0 * 8)(a0)
	+ sd s1, (HYP_H_S + 1 * 8)(a0)
	+ sd s2, (HYP_H_S + 2 * 8)(a0)
	+ sd s3, (HYP_H_S + 3 * 8)(a0)
	+ sd s4, (HYP_H_S + 4 * 8)(a0)
	+ sd s5, (HYP_H_S + 5 * 8)(a0)
	+ sd s6, (HYP_H_S + 6 * 8)(a0)
	+ sd s7, (HYP_H_S + 7 * 8)(a0)
	+ sd s8, (HYP_H_S + 8 * 8)(a0)
	+ sd s9, (HYP_H_S + 9 * 8)(a0)
	+ sd s10, (HYP_H_S + 10 * 8)(a0)
	+ sd s11, (HYP_H_S + 11 * 8)(a0)
	+
	+ sd a1, (HYP_H_A + 1 * 8)(a0)
	+ sd a2, (HYP_H_A + 2 * 8)(a0)
	+ sd a3, (HYP_H_A + 3 * 8)(a0)
	+ sd a4, (HYP_H_A + 4 * 8)(a0)
	+ sd a5, (HYP_H_A + 5 * 8)(a0)
	+ sd a6, (HYP_H_A + 6 * 8)(a0)
	+ sd a7, (HYP_H_A + 7 * 8)(a0)
	+
	+ ld t0, (HYP_G_SSTATUS)(a0)
	+ ld t1, (HYP_G_HSTATUS)(a0)
	+ ld t2, (HYP_G_SCOUNTEREN)(a0)
	+ la t4, .Lswitch_return
	+ ld t5, (HYP_G_SEPC)(a0)
	+
	+ csrrw t0, sstatus, t0
	+ csrrw t1, hstatus, t1
	+ csrrw t2, scounteren, t2
	+ csrrw t3, sscratch, a0
	+ csrrw t4, stvec, t4
	+ csrw sepc, t5
	+
	+ sd t0, (HYP_H_SSTATUS)(a0)
	+ sd t1, (HYP_H_HSTATUS)(a0)
	+ sd t2, (HYP_H_SCOUNTEREN)(a0)
	+ sd t3, (HYP_H_SSCRATCH)(a0)
	+ sd t4, (HYP_H_STVEC)(a0)
	+
	+ ld ra, (HYP_G_RA)(a0)
	+ ld sp, (HYP_G_SP)(a0)
	+ ld gp, (HYP_G_GP)(a0)
	+ ld tp, (HYP_G_TP)(a0)
	+ ld t0, (HYP_G_T + 0 * 8)(a0)
	+ ld t1, (HYP_G_T + 1 * 8)(a0)
	+ ld t2, (HYP_G_T + 2 * 8)(a0)
	+ ld t3, (HYP_G_T + 3 * 8)(a0)
	+ ld t4, (HYP_G_T + 4 * 8)(a0)
	+ ld t5, (HYP_G_T + 5 * 8)(a0)
	+ ld t6, (HYP_G_T + 6 * 8)(a0)
	+ ld s0, (HYP_G_S + 0 * 8)(a0)
	+ ld s1, (HYP_G_S + 1 * 8)(a0)
	+ ld s2, (HYP_G_S + 2 * 8)(a0)
	+ ld s3, (HYP_G_S + 3 * 8)(a0)
	+ ld s4, (HYP_G_S + 4 * 8)(a0)
	+ ld s5, (HYP_G_S + 5 * 8)(a0)
	+ ld s6, (HYP_G_S + 6 * 8)(a0)
	+ ld s7, (HYP_G_S + 7 * 8)(a0)
	+ ld s8, (HYP_G_S + 8 * 8)(a0)
	+ ld s9, (HYP_G_S + 9 * 8)(a0)
	+ ld s10, (HYP_G_S + 10 * 8)(a0)
	+ ld s11, (HYP_G_S + 11 * 8)(a0)
	+ /* skip a0 for now. */
	+ ld a1, (HYP_G_A + 1 * 8)(a0)
	+ ld a2, (HYP_G_A + 2 * 8)(a0)
	+ ld a3, (HYP_G_A + 3 * 8)(a0)
	+ ld a4, (HYP_G_A + 4 * 8)(a0)
	+ ld a5, (HYP_G_A + 5 * 8)(a0)
	+ ld a6, (HYP_G_A + 6 * 8)(a0)
	+ ld a7, (HYP_G_A + 7 * 8)(a0)
	+ /* now load a0. */
	+ ld a0, (HYP_G_A + 0 * 8)(a0)
	+
	+ sret
	+
	+ .align 2
	+.Lswitch_return:
	+
	+ csrrw a0, sscratch, a0
	+ sd ra, (HYP_G_RA)(a0)
	+ sd sp, (HYP_G_SP)(a0)
	+ sd gp, (HYP_G_GP)(a0)
	+ sd tp, (HYP_G_TP)(a0)
	+ sd t0, (HYP_G_T + 0 * 8)(a0)
	+ sd t1, (HYP_G_T + 1 * 8)(a0)
	+ sd t2, (HYP_G_T + 2 * 8)(a0)
	+ sd t3, (HYP_G_T + 3 * 8)(a0)
	+ sd t4, (HYP_G_T + 4 * 8)(a0)
	+ sd t5, (HYP_G_T + 5 * 8)(a0)
	+ sd t6, (HYP_G_T + 6 * 8)(a0)
	+ sd s0, (HYP_G_S + 0 * 8)(a0)
	+ sd s1, (HYP_G_S + 1 * 8)(a0)
	+ sd s2, (HYP_G_S + 2 * 8)(a0)
	+ sd s3, (HYP_G_S + 3 * 8)(a0)
	+ sd s4, (HYP_G_S + 4 * 8)(a0)
	+ sd s5, (HYP_G_S + 5 * 8)(a0)
	+ sd s6, (HYP_G_S + 6 * 8)(a0)
	+ sd s7, (HYP_G_S + 7 * 8)(a0)
	+ sd s8, (HYP_G_S + 8 * 8)(a0)
	+ sd s9, (HYP_G_S + 9 * 8)(a0)
	+ sd s10, (HYP_G_S + 10 * 8)(a0)
	+ sd s11, (HYP_G_S + 11 * 8)(a0)
	+ /* skip a0 */
	+ sd a1, (HYP_G_A + 1 * 8)(a0)
	+ sd a2, (HYP_G_A + 2 * 8)(a0)
	+ sd a3, (HYP_G_A + 3 * 8)(a0)
	+ sd a4, (HYP_G_A + 4 * 8)(a0)
	+ sd a5, (HYP_G_A + 5 * 8)(a0)
	+ sd a6, (HYP_G_A + 6 * 8)(a0)
	+ sd a7, (HYP_G_A + 7 * 8)(a0)
	+
	+ ld t1, (HYP_H_STVEC)(a0)
	+ ld t2, (HYP_H_SSCRATCH)(a0)
	+ ld t3, (HYP_H_SCOUNTEREN)(a0)
	+ ld t4, (HYP_H_HSTATUS)(a0)
	+ ld t5, (HYP_H_SSTATUS)(a0)
	+
	+ csrr t0, sepc
	+ csrw stvec, t1
	+ csrrw t2, sscratch, t2
	+ csrrw t3, scounteren, t3
	+ csrrw t4, hstatus, t4
	+ csrrw t5, sstatus, t5
	+
	+ sd t0, (HYP_G_SEPC)(a0)
	+ sd t2, (HYP_G_A + 0 * 8)(a0)
	+ sd t3, (HYP_G_SCOUNTEREN)(a0)
	+ sd t4, (HYP_G_HSTATUS)(a0)
	+ sd t5, (HYP_G_SSTATUS)(a0)
	+
	+ ld ra, (HYP_H_RA)(a0)
	+ ld sp, (HYP_H_SP)(a0)
	+ ld tp, (HYP_H_TP)(a0)
	+ ld gp, (HYP_H_GP)(a0)
	+ ld s0, (HYP_H_S + 0 * 8)(a0)
	+ ld s1, (HYP_H_S + 1 * 8)(a0)
	+ ld s2, (HYP_H_S + 2 * 8)(a0)
	+ ld s3, (HYP_H_S + 3 * 8)(a0)
	+ ld s4, (HYP_H_S + 4 * 8)(a0)
	+ ld s5, (HYP_H_S + 5 * 8)(a0)
	+ ld s6, (HYP_H_S + 6 * 8)(a0)
	+ ld s7, (HYP_H_S + 7 * 8)(a0)
	+ ld s8, (HYP_H_S + 8 * 8)(a0)
	+ ld s9, (HYP_H_S + 9 * 8)(a0)
	+ ld s10, (HYP_H_S + 10 * 8)(a0)
	+ ld s11, (HYP_H_S + 11 * 8)(a0)
	+
	+ ld a1, (HYP_H_A + 1 * 8)(a0)
	+ ld a2, (HYP_H_A + 2 * 8)(a0)
	+ ld a3, (HYP_H_A + 3 * 8)(a0)
	+ ld a4, (HYP_H_A + 4 * 8)(a0)
	+ ld a5, (HYP_H_A + 5 * 8)(a0)
	+ ld a6, (HYP_H_A + 6 * 8)(a0)
	+ ld a7, (HYP_H_A + 7 * 8)(a0)
	+
	+ ret
	+
	+END(vmm_switch)

File Metadata

Mime Type: text/plain
Expires: Tue, Feb 10, 1:32 PM (14 h, 13 m)
Storage Engine: blob
Storage Format: Raw Data
Storage Handle: 28624962
Default Alt Text: D45553.id140216.diff (147 KB)

D45553.id140216.diffNo OneTemporaryActions

D45553.id140216.diffView Options

File Metadata

Event Timeline

D45553.id140216.diff
No OneTemporary
Actions

D45553.id140216.diff
View Options