Page Menu
Home
FreeBSD
Search
Configure Global Search
Log In
Files
F144557290
D45553.id140216.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
147 KB
Referenced Files
None
Subscribers
None
D45553.id140216.diff
View Options
Index: sys/conf/files.riscv
===================================================================
--- sys/conf/files.riscv
+++ sys/conf/files.riscv
@@ -43,6 +43,7 @@
riscv/riscv/elf_machdep.c standard
riscv/riscv/exception.S standard
riscv/riscv/exec_machdep.c standard
+riscv/riscv/fpe.c optional vmm
riscv/riscv/gdb_machdep.c optional gdb
riscv/riscv/intc.c standard
riscv/riscv/identcpu.c standard
@@ -71,6 +72,14 @@
riscv/riscv/uio_machdep.c standard
riscv/riscv/unwind.c optional ddb | kdtrace_hooks | stack
riscv/riscv/vm_machdep.c standard
+riscv/vmm/vmm.c optional vmm
+riscv/vmm/vmm_aplic.c optional vmm
+riscv/vmm/vmm_dev.c optional vmm
+riscv/vmm/vmm_instruction_emul.c optional vmm
+riscv/vmm/vmm_riscv.c optional vmm
+riscv/vmm/vmm_sbi.c optional vmm
+riscv/vmm/vmm_stat.c optional vmm
+riscv/vmm/vmm_switch.S optional vmm
# Zstd
contrib/zstd/lib/freebsd/zstd_kfreebsd.c optional zstdio compile-with ${ZSTD_C}
Index: sys/conf/kern.mk
===================================================================
--- sys/conf/kern.mk
+++ sys/conf/kern.mk
@@ -160,7 +160,7 @@
# code model as "medium" and "medany" respectively.
#
.if ${MACHINE_CPUARCH} == "riscv"
-CFLAGS+= -march=rv64imafdc
+CFLAGS+= -march=rv64imafdch
CFLAGS+= -mabi=lp64
CFLAGS.clang+= -mcmodel=medium
CFLAGS.gcc+= -mcmodel=medany
Index: sys/riscv/include/cpu.h
===================================================================
--- sys/riscv/include/cpu.h
+++ sys/riscv/include/cpu.h
@@ -47,8 +47,6 @@
#define cpu_spinwait() /* nothing */
#define cpu_lock_delay() DELAY(1)
-#ifdef _KERNEL
-
/*
* Core manufacturer IDs, as reported by the mvendorid CSR.
*/
@@ -89,6 +87,8 @@
#define MMU_SV48 0x2 /* 4-level paging */
#define MMU_SV57 0x4 /* 5-level paging */
+#ifdef _KERNEL
+
extern char btext[];
extern char etext[];
Index: sys/riscv/include/elf.h
===================================================================
--- sys/riscv/include/elf.h
+++ sys/riscv/include/elf.h
@@ -80,6 +80,7 @@
#define HWCAP_ISA_F HWCAP_ISA_BIT('f')
#define HWCAP_ISA_D HWCAP_ISA_BIT('d')
#define HWCAP_ISA_C HWCAP_ISA_BIT('c')
+#define HWCAP_ISA_H HWCAP_ISA_BIT('h')
#define HWCAP_ISA_G \
(HWCAP_ISA_I | HWCAP_ISA_M | HWCAP_ISA_A | HWCAP_ISA_F | HWCAP_ISA_D)
Index: sys/riscv/include/md_var.h
===================================================================
--- sys/riscv/include/md_var.h
+++ sys/riscv/include/md_var.h
@@ -42,6 +42,7 @@
extern u_int mmu_caps;
/* Supervisor-mode extension support */
+extern bool has_hyp;
extern bool has_sstc;
extern bool has_sscofpmf;
Index: sys/riscv/include/riscvreg.h
===================================================================
--- sys/riscv/include/riscvreg.h
+++ sys/riscv/include/riscvreg.h
@@ -1,5 +1,5 @@
/*-
- * Copyright (c) 2015-2017 Ruslan Bukin <br@bsdpad.com>
+ * Copyright (c) 2015-2024 Ruslan Bukin <br@bsdpad.com>
* All rights reserved.
*
* Portions of this software were developed by SRI International and the
@@ -47,9 +47,15 @@
#define SCAUSE_STORE_ACCESS_FAULT 7
#define SCAUSE_ECALL_USER 8
#define SCAUSE_ECALL_SUPERVISOR 9
+#define SCAUSE_VIRTUAL_SUPERVISOR_ECALL 10
+#define SCAUSE_MACHINE_ECALL 11
#define SCAUSE_INST_PAGE_FAULT 12
#define SCAUSE_LOAD_PAGE_FAULT 13
#define SCAUSE_STORE_PAGE_FAULT 15
+#define SCAUSE_FETCH_GUEST_PAGE_FAULT 20
+#define SCAUSE_LOAD_GUEST_PAGE_FAULT 21
+#define SCAUSE_VIRTUAL_INSTRUCTION 22
+#define SCAUSE_STORE_GUEST_PAGE_FAULT 23
#define SSTATUS_UIE (1 << 0)
#define SSTATUS_SIE (1 << 1)
@@ -116,6 +122,17 @@
#define MSTATUS_PRV_H 2 /* hypervisor */
#define MSTATUS_PRV_M 3 /* machine */
+#define HSTATUS_VSBE (1 << 5)
+#define HSTATUS_GVA (1 << 6)
+#define HSTATUS_SPV (1 << 7)
+#define HSTATUS_SPVP (1 << 8)
+#define HSTATUS_HU (1 << 9)
+#define HSTATUS_VGEIN_S 12
+#define HSTATUS_VGEIN_M (0xf << HSTATUS_VGEIN_S)
+#define HSTATUS_VTVM (1 << 20)
+#define HSTATUS_VTW (1 << 21)
+#define HSTATUS_VTSR (1 << 22)
+
#define MIE_USIE (1 << 0)
#define MIE_SSIE (1 << 1)
#define MIE_HSIE (1 << 2)
@@ -143,10 +160,35 @@
#define MIP_SEIP (1 << 9)
+#define HVIP_VSSIP (1 << 2)
+#define HVIP_VSTIP (1 << 6)
+#define HVIP_VSEIP (1 << 10)
+
+#define HIE_VSSIE (1 << 2)
+#define HIE_VSTIE (1 << 6)
+#define HIE_VSEIE (1 << 10)
+#define HIE_SGEIE (1 << 12)
+
/* Note: sip register has no SIP_STIP bit in Spike simulator */
#define SIP_SSIP (1 << 1)
#define SIP_STIP (1 << 5)
+#define HENVCFG_STCE (1UL << 63)
+#define HENVCFG_PBMTE (1UL << 62)
+#define HENVCFG_ADUE (1UL << 61)
+#define HENVCFG_CDE (1UL << 60)
+#define HENVCFG_PMM_S (1UL << 31)
+#define HENVCFG_PMM_M (0x3 << HENVCFG_PMM_S)
+#define HENVCFG_CBZE (1UL << 7)
+#define HENVCFG_CBCFE (1UL << 6)
+#define HENVCFG_CBIE_S (1UL << 4)
+#define HENVCFG_CBIE_M (0x3 << HENVCFG_CBIE_S)
+#define HENVCFG_FIOM (1UL << 0)
+
+#define HCOUNTEREN_CY (1UL << 0) /* Cycle */
+#define HCOUNTEREN_TM (1UL << 1) /* Time */
+#define HCOUNTEREN_IR (1UL << 2) /* Instret */
+
#define SATP_PPN_S 0
#define SATP_PPN_M (0xfffffffffffUL << SATP_PPN_S)
#define SATP_ASID_S 44
Index: sys/riscv/include/vmm.h
===================================================================
--- /dev/null
+++ sys/riscv/include/vmm.h
@@ -0,0 +1,323 @@
+/*
+ * Copyright (c) 2015 Mihai Carabas <mihai.carabas@gmail.com>
+ * Copyright (c) 2024 Ruslan Bukin <br@bsdpad.com>
+ *
+ * This software was developed by the University of Cambridge Computer
+ * Laboratory (Department of Computer Science and Technology) under Innovate
+ * UK project 105694, "Digital Security by Design (DSbD) Technology Platform
+ * Prototype".
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef _VMM_H_
+#define _VMM_H_
+
+#include <sys/param.h>
+#include <sys/cpuset.h>
+#include <vm/vm.h>
+#include <vm/pmap.h>
+
+#include "pte.h"
+#include "pmap.h"
+
+struct vcpu;
+
+enum vm_suspend_how {
+ VM_SUSPEND_NONE,
+ VM_SUSPEND_RESET,
+ VM_SUSPEND_POWEROFF,
+ VM_SUSPEND_HALT,
+ VM_SUSPEND_LAST
+};
+
+/*
+ * Identifiers for architecturally defined registers.
+ */
+enum vm_reg_name {
+ VM_REG_GUEST_ZERO = 0,
+ VM_REG_GUEST_RA,
+ VM_REG_GUEST_SP,
+ VM_REG_GUEST_GP,
+ VM_REG_GUEST_TP,
+ VM_REG_GUEST_T0,
+ VM_REG_GUEST_T1,
+ VM_REG_GUEST_T2,
+ VM_REG_GUEST_S0,
+ VM_REG_GUEST_S1,
+ VM_REG_GUEST_A0,
+ VM_REG_GUEST_A1,
+ VM_REG_GUEST_A2,
+ VM_REG_GUEST_A3,
+ VM_REG_GUEST_A4,
+ VM_REG_GUEST_A5,
+ VM_REG_GUEST_A6,
+ VM_REG_GUEST_A7,
+ VM_REG_GUEST_S2,
+ VM_REG_GUEST_S3,
+ VM_REG_GUEST_S4,
+ VM_REG_GUEST_S5,
+ VM_REG_GUEST_S6,
+ VM_REG_GUEST_S7,
+ VM_REG_GUEST_S8,
+ VM_REG_GUEST_S9,
+ VM_REG_GUEST_S10,
+ VM_REG_GUEST_S11,
+ VM_REG_GUEST_T3,
+ VM_REG_GUEST_T4,
+ VM_REG_GUEST_T5,
+ VM_REG_GUEST_T6,
+ VM_REG_GUEST_SEPC,
+ VM_REG_LAST
+};
+
+#define VM_INTINFO_VECTOR(info) ((info) & 0xff)
+#define VM_INTINFO_DEL_ERRCODE 0x800
+#define VM_INTINFO_RSVD 0x7ffff000
+#define VM_INTINFO_VALID 0x80000000
+#define VM_INTINFO_TYPE 0x700
+#define VM_INTINFO_HWINTR (0 << 8)
+#define VM_INTINFO_NMI (2 << 8)
+#define VM_INTINFO_HWEXCEPTION (3 << 8)
+#define VM_INTINFO_SWINTR (4 << 8)
+
+#define VM_MAX_SUFFIXLEN 15
+
+#ifdef _KERNEL
+
+#define VM_MAX_NAMELEN 32
+
+struct vm;
+struct vm_exception;
+struct vm_exit;
+struct vm_run;
+struct vm_object;
+struct vm_guest_paging;
+struct vm_aplic_descr;
+struct pmap;
+
+struct vm_eventinfo {
+ void *rptr; /* rendezvous cookie */
+ int *sptr; /* suspend cookie */
+ int *iptr; /* reqidle cookie */
+};
+
+int vm_create(const char *name, struct vm **retvm);
+struct vcpu *vm_alloc_vcpu(struct vm *vm, int vcpuid);
+void vm_slock_vcpus(struct vm *vm);
+void vm_unlock_vcpus(struct vm *vm);
+void vm_destroy(struct vm *vm);
+int vm_reinit(struct vm *vm);
+const char *vm_name(struct vm *vm);
+
+/*
+ * APIs that modify the guest memory map require all vcpus to be frozen.
+ */
+void vm_slock_memsegs(struct vm *vm);
+void vm_xlock_memsegs(struct vm *vm);
+void vm_unlock_memsegs(struct vm *vm);
+int vm_mmap_memseg(struct vm *vm, vm_paddr_t gpa, int segid, vm_ooffset_t off,
+ size_t len, int prot, int flags);
+int vm_munmap_memseg(struct vm *vm, vm_paddr_t gpa, size_t len);
+int vm_alloc_memseg(struct vm *vm, int ident, size_t len, bool sysmem);
+void vm_free_memseg(struct vm *vm, int ident);
+
+/*
+ * APIs that inspect the guest memory map require only a *single* vcpu to
+ * be frozen. This acts like a read lock on the guest memory map since any
+ * modification requires *all* vcpus to be frozen.
+ */
+int vm_mmap_getnext(struct vm *vm, vm_paddr_t *gpa, int *segid,
+ vm_ooffset_t *segoff, size_t *len, int *prot, int *flags);
+int vm_get_memseg(struct vm *vm, int ident, size_t *len, bool *sysmem,
+ struct vm_object **objptr);
+vm_paddr_t vmm_sysmem_maxaddr(struct vm *vm);
+void *vm_gpa_hold(struct vcpu *vcpu, vm_paddr_t gpa, size_t len,
+ int prot, void **cookie);
+void *vm_gpa_hold_global(struct vm *vm, vm_paddr_t gpa, size_t len,
+ int prot, void **cookie);
+void vm_gpa_release(void *cookie);
+bool vm_mem_allocated(struct vcpu *vcpu, vm_paddr_t gpa);
+
+int vm_gla2gpa_nofault(struct vcpu *vcpu, struct vm_guest_paging *paging,
+ uint64_t gla, int prot, uint64_t *gpa, int *is_fault);
+
+uint16_t vm_get_maxcpus(struct vm *vm);
+void vm_get_topology(struct vm *vm, uint16_t *sockets, uint16_t *cores,
+ uint16_t *threads, uint16_t *maxcpus);
+int vm_set_topology(struct vm *vm, uint16_t sockets, uint16_t cores,
+ uint16_t threads, uint16_t maxcpus);
+int vm_get_register(struct vcpu *vcpu, int reg, uint64_t *retval);
+int vm_set_register(struct vcpu *vcpu, int reg, uint64_t val);
+int vm_run(struct vcpu *vcpu);
+int vm_suspend(struct vm *vm, enum vm_suspend_how how);
+void* vm_get_cookie(struct vm *vm);
+int vcpu_vcpuid(struct vcpu *vcpu);
+void *vcpu_get_cookie(struct vcpu *vcpu);
+struct vm *vcpu_vm(struct vcpu *vcpu);
+struct vcpu *vm_vcpu(struct vm *vm, int cpu);
+int vm_get_capability(struct vcpu *vcpu, int type, int *val);
+int vm_set_capability(struct vcpu *vcpu, int type, int val);
+int vm_activate_cpu(struct vcpu *vcpu);
+int vm_suspend_cpu(struct vm *vm, struct vcpu *vcpu);
+int vm_resume_cpu(struct vm *vm, struct vcpu *vcpu);
+int vm_inject_exception(struct vcpu *vcpu, uint64_t scause);
+int vm_attach_aplic(struct vm *vm, struct vm_aplic_descr *descr);
+int vm_assert_irq(struct vm *vm, uint32_t irq);
+int vm_deassert_irq(struct vm *vm, uint32_t irq);
+int vm_raise_msi(struct vm *vm, uint64_t msg, uint64_t addr, int bus, int slot,
+ int func);
+struct vm_exit *vm_exitinfo(struct vcpu *vcpu);
+void vm_exit_suspended(struct vcpu *vcpu, uint64_t pc);
+void vm_exit_debug(struct vcpu *vcpu, uint64_t pc);
+void vm_exit_rendezvous(struct vcpu *vcpu, uint64_t pc);
+void vm_exit_astpending(struct vcpu *vcpu, uint64_t pc);
+
+cpuset_t vm_active_cpus(struct vm *vm);
+cpuset_t vm_debug_cpus(struct vm *vm);
+cpuset_t vm_suspended_cpus(struct vm *vm);
+
+static __inline int
+vcpu_rendezvous_pending(struct vm_eventinfo *info)
+{
+
+ return (*((uintptr_t *)(info->rptr)) != 0);
+}
+
+static __inline int
+vcpu_suspended(struct vm_eventinfo *info)
+{
+
+ return (*info->sptr);
+}
+
+int vcpu_debugged(struct vcpu *vcpu);
+
+enum vcpu_state {
+ VCPU_IDLE,
+ VCPU_FROZEN,
+ VCPU_RUNNING,
+ VCPU_SLEEPING,
+};
+
+int vcpu_set_state(struct vcpu *vcpu, enum vcpu_state state, bool from_idle);
+enum vcpu_state vcpu_get_state(struct vcpu *vcpu, int *hostcpu);
+
+static int __inline
+vcpu_is_running(struct vcpu *vcpu, int *hostcpu)
+{
+ return (vcpu_get_state(vcpu, hostcpu) == VCPU_RUNNING);
+}
+
+#ifdef _SYS_PROC_H_
+static int __inline
+vcpu_should_yield(struct vcpu *vcpu)
+{
+ struct thread *td;
+
+ td = curthread;
+ return (td->td_ast != 0 || td->td_owepreempt != 0);
+}
+#endif
+
+void *vcpu_stats(struct vcpu *vcpu);
+void vcpu_notify_event(struct vcpu *vcpu);
+
+enum vm_reg_name vm_segment_name(int seg_encoding);
+
+#endif /* _KERNEL */
+
+#define VM_DIR_READ 0
+#define VM_DIR_WRITE 1
+
+#define VM_GP_M_MASK 0x1f
+#define VM_GP_MMU_ENABLED (1 << 5)
+
+struct vm_guest_paging {
+ int flags;
+ int padding;
+};
+
+struct vie {
+ uint8_t access_size:4, sign_extend:1, dir:1, unused:2;
+ enum vm_reg_name reg;
+};
+
+struct vre {
+ uint32_t inst_syndrome;
+ uint8_t dir:1, unused:7;
+ enum vm_reg_name reg;
+};
+
+/*
+ * Identifiers for optional vmm capabilities
+ */
+enum vm_cap_type {
+ VM_CAP_HALT_EXIT,
+ VM_CAP_MTRAP_EXIT,
+ VM_CAP_PAUSE_EXIT,
+ VM_CAP_UNRESTRICTED_GUEST,
+ VM_CAP_MAX
+};
+
+enum vm_exitcode {
+ VM_EXITCODE_BOGUS,
+ VM_EXITCODE_ECALL,
+ VM_EXITCODE_PAGING,
+ VM_EXITCODE_SUSPENDED,
+ VM_EXITCODE_DEBUG,
+ VM_EXITCODE_INST_EMUL,
+ VM_EXITCODE_WFI,
+ VM_EXITCODE_MAX
+};
+
+struct vm_exit {
+ uint64_t scause;
+ uint64_t sepc;
+ uint64_t stval;
+ uint64_t htval;
+ uint64_t htinst;
+ enum vm_exitcode exitcode;
+ int inst_length;
+ uint64_t pc;
+ union {
+ struct {
+ uint64_t gpa;
+ } paging;
+
+ struct {
+ uint64_t gpa;
+ struct vm_guest_paging paging;
+ struct vie vie;
+ } inst_emul;
+
+ struct {
+ uint64_t args[8];
+ } ecall;
+
+ struct {
+ enum vm_suspend_how how;
+ } suspended;
+ } u;
+};
+
+#endif /* _VMM_H_ */
Index: sys/riscv/include/vmm_dev.h
===================================================================
--- /dev/null
+++ sys/riscv/include/vmm_dev.h
@@ -0,0 +1,261 @@
+/*
+ * Copyright (c) 2015 Mihai Carabas <mihai.carabas@gmail.com>
+ * Copyright (c) 2024 Ruslan Bukin <br@bsdpad.com>
+ *
+ * This software was developed by the University of Cambridge Computer
+ * Laboratory (Department of Computer Science and Technology) under Innovate
+ * UK project 105694, "Digital Security by Design (DSbD) Technology Platform
+ * Prototype".
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef _VMM_DEV_H_
+#define _VMM_DEV_H_
+
+#ifdef _KERNEL
+void vmmdev_init(void);
+int vmmdev_cleanup(void);
+#endif
+
+struct vm_memmap {
+ vm_paddr_t gpa;
+ int segid; /* memory segment */
+ vm_ooffset_t segoff; /* offset into memory segment */
+ size_t len; /* mmap length */
+ int prot; /* RWX */
+ int flags;
+};
+#define VM_MEMMAP_F_WIRED 0x01
+
+struct vm_munmap {
+ vm_paddr_t gpa;
+ size_t len;
+};
+
+#define VM_MEMSEG_NAME(m) ((m)->name[0] != '\0' ? (m)->name : NULL)
+struct vm_memseg {
+ int segid;
+ size_t len;
+ char name[VM_MAX_SUFFIXLEN + 1];
+};
+
+struct vm_register {
+ int cpuid;
+ int regnum; /* enum vm_reg_name */
+ uint64_t regval;
+};
+
+struct vm_register_set {
+ int cpuid;
+ unsigned int count;
+ const int *regnums; /* enum vm_reg_name */
+ uint64_t *regvals;
+};
+
+struct vm_run {
+ int cpuid;
+ cpuset_t *cpuset; /* CPU set storage */
+ size_t cpusetsize;
+ struct vm_exit *vm_exit;
+};
+
+struct vm_exception {
+ int cpuid;
+ uint64_t scause;
+};
+
+struct vm_msi {
+ uint64_t msg;
+ uint64_t addr;
+ int bus;
+ int slot;
+ int func;
+};
+
+struct vm_capability {
+ int cpuid;
+ enum vm_cap_type captype;
+ int capval;
+ int allcpus;
+};
+
+#define MAX_VM_STATS 64
+struct vm_stats {
+ int cpuid; /* in */
+ int index; /* in */
+ int num_entries; /* out */
+ struct timeval tv;
+ uint64_t statbuf[MAX_VM_STATS];
+};
+struct vm_stat_desc {
+ int index; /* in */
+ char desc[128]; /* out */
+};
+
+struct vm_suspend {
+ enum vm_suspend_how how;
+};
+
+struct vm_gla2gpa {
+ int vcpuid; /* inputs */
+ int prot; /* PROT_READ or PROT_WRITE */
+ uint64_t gla;
+ struct vm_guest_paging paging;
+ int fault; /* outputs */
+ uint64_t gpa;
+};
+
+struct vm_activate_cpu {
+ int vcpuid;
+};
+
+struct vm_cpuset {
+ int which;
+ int cpusetsize;
+ cpuset_t *cpus;
+};
+#define VM_ACTIVE_CPUS 0
+#define VM_SUSPENDED_CPUS 1
+#define VM_DEBUG_CPUS 2
+
+struct vm_aplic_descr {
+ uint64_t mem_start;
+ uint64_t mem_size;
+};
+
+struct vm_irq {
+ uint32_t irq;
+};
+
+struct vm_cpu_topology {
+ uint16_t sockets;
+ uint16_t cores;
+ uint16_t threads;
+ uint16_t maxcpus;
+};
+
+enum {
+ /* general routines */
+ IOCNUM_ABIVERS = 0,
+ IOCNUM_RUN = 1,
+ IOCNUM_SET_CAPABILITY = 2,
+ IOCNUM_GET_CAPABILITY = 3,
+ IOCNUM_SUSPEND = 4,
+ IOCNUM_REINIT = 5,
+
+ /* memory apis */
+ IOCNUM_GET_GPA_PMAP = 12,
+ IOCNUM_GLA2GPA_NOFAULT = 13,
+ IOCNUM_ALLOC_MEMSEG = 14,
+ IOCNUM_GET_MEMSEG = 15,
+ IOCNUM_MMAP_MEMSEG = 16,
+ IOCNUM_MMAP_GETNEXT = 17,
+ IOCNUM_MUNMAP_MEMSEG = 18,
+
+ /* register/state accessors */
+ IOCNUM_SET_REGISTER = 20,
+ IOCNUM_GET_REGISTER = 21,
+ IOCNUM_SET_REGISTER_SET = 24,
+ IOCNUM_GET_REGISTER_SET = 25,
+
+ /* statistics */
+ IOCNUM_VM_STATS = 50,
+ IOCNUM_VM_STAT_DESC = 51,
+
+ /* CPU Topology */
+ IOCNUM_SET_TOPOLOGY = 63,
+ IOCNUM_GET_TOPOLOGY = 64,
+
+ /* interrupt injection */
+ IOCNUM_ASSERT_IRQ = 80,
+ IOCNUM_DEASSERT_IRQ = 81,
+ IOCNUM_RAISE_MSI = 82,
+ IOCNUM_INJECT_EXCEPTION = 83,
+
+ /* vm_cpuset */
+ IOCNUM_ACTIVATE_CPU = 90,
+ IOCNUM_GET_CPUSET = 91,
+ IOCNUM_SUSPEND_CPU = 92,
+ IOCNUM_RESUME_CPU = 93,
+
+ /* vm_attach_aplic */
+ IOCNUM_ATTACH_APLIC = 110,
+};
+
+#define VM_RUN \
+ _IOWR('v', IOCNUM_RUN, struct vm_run)
+#define VM_SUSPEND \
+ _IOW('v', IOCNUM_SUSPEND, struct vm_suspend)
+#define VM_REINIT \
+ _IO('v', IOCNUM_REINIT)
+#define VM_ALLOC_MEMSEG \
+ _IOW('v', IOCNUM_ALLOC_MEMSEG, struct vm_memseg)
+#define VM_GET_MEMSEG \
+ _IOWR('v', IOCNUM_GET_MEMSEG, struct vm_memseg)
+#define VM_MMAP_MEMSEG \
+ _IOW('v', IOCNUM_MMAP_MEMSEG, struct vm_memmap)
+#define VM_MMAP_GETNEXT \
+ _IOWR('v', IOCNUM_MMAP_GETNEXT, struct vm_memmap)
+#define VM_MUNMAP_MEMSEG \
+ _IOW('v', IOCNUM_MUNMAP_MEMSEG, struct vm_munmap)
+#define VM_SET_REGISTER \
+ _IOW('v', IOCNUM_SET_REGISTER, struct vm_register)
+#define VM_GET_REGISTER \
+ _IOWR('v', IOCNUM_GET_REGISTER, struct vm_register)
+#define VM_SET_REGISTER_SET \
+ _IOW('v', IOCNUM_SET_REGISTER_SET, struct vm_register_set)
+#define VM_GET_REGISTER_SET \
+ _IOWR('v', IOCNUM_GET_REGISTER_SET, struct vm_register_set)
+#define VM_SET_CAPABILITY \
+ _IOW('v', IOCNUM_SET_CAPABILITY, struct vm_capability)
+#define VM_GET_CAPABILITY \
+ _IOWR('v', IOCNUM_GET_CAPABILITY, struct vm_capability)
+#define VM_STATS \
+ _IOWR('v', IOCNUM_VM_STATS, struct vm_stats)
+#define VM_STAT_DESC \
+ _IOWR('v', IOCNUM_VM_STAT_DESC, struct vm_stat_desc)
+#define VM_ASSERT_IRQ \
+ _IOW('v', IOCNUM_ASSERT_IRQ, struct vm_irq)
+#define VM_DEASSERT_IRQ \
+ _IOW('v', IOCNUM_DEASSERT_IRQ, struct vm_irq)
+#define VM_RAISE_MSI \
+ _IOW('v', IOCNUM_RAISE_MSI, struct vm_msi)
+#define VM_INJECT_EXCEPTION \
+ _IOW('v', IOCNUM_INJECT_EXCEPTION, struct vm_exception)
+#define VM_SET_TOPOLOGY \
+ _IOW('v', IOCNUM_SET_TOPOLOGY, struct vm_cpu_topology)
+#define VM_GET_TOPOLOGY \
+ _IOR('v', IOCNUM_GET_TOPOLOGY, struct vm_cpu_topology)
+#define VM_GLA2GPA_NOFAULT \
+ _IOWR('v', IOCNUM_GLA2GPA_NOFAULT, struct vm_gla2gpa)
+#define VM_ACTIVATE_CPU \
+ _IOW('v', IOCNUM_ACTIVATE_CPU, struct vm_activate_cpu)
+#define VM_GET_CPUS \
+ _IOW('v', IOCNUM_GET_CPUSET, struct vm_cpuset)
+#define VM_SUSPEND_CPU \
+ _IOW('v', IOCNUM_SUSPEND_CPU, struct vm_activate_cpu)
+#define VM_RESUME_CPU \
+ _IOW('v', IOCNUM_RESUME_CPU, struct vm_activate_cpu)
+#define VM_ATTACH_APLIC \
+ _IOW('v', IOCNUM_ATTACH_APLIC, struct vm_aplic_descr)
+#endif
Index: sys/riscv/include/vmm_instruction_emul.h
===================================================================
--- /dev/null
+++ sys/riscv/include/vmm_instruction_emul.h
@@ -0,0 +1,83 @@
+/*
+ * Copyright (c) 2015 Mihai Carabas <mihai.carabas@gmail.com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef _VMM_INSTRUCTION_EMUL_H_
+#define _VMM_INSTRUCTION_EMUL_H_
+
+/*
+ * Callback functions to read and write memory regions.
+ */
+typedef int (*mem_region_read_t)(struct vcpu *vcpu, uint64_t gpa,
+ uint64_t *rval, int rsize, void *arg);
+typedef int (*mem_region_write_t)(struct vcpu *vcpu, uint64_t gpa,
+ uint64_t wval, int wsize, void *arg);
+
+/*
+ * Callback functions to read and write registers.
+ */
+typedef int (*reg_read_t)(struct vcpu *vcpu, uint64_t *rval, void *arg);
+typedef int (*reg_write_t)(struct vcpu *vcpu, uint64_t wval, void *arg);
+
+/*
+ * Emulate the decoded 'vie' instruction when it contains a memory operation.
+ *
+ * The callbacks 'mrr' and 'mrw' emulate reads and writes to the memory region
+ * containing 'gpa'. 'mrarg' is an opaque argument that is passed into the
+ * callback functions.
+ *
+ * 'void *vm' should be 'struct vm *' when called from kernel context and
+ * 'struct vmctx *' when called from user context.
+ *
+ */
+int vmm_emulate_instruction(struct vcpu *vcpu, uint64_t gpa, struct vie *vie,
+ struct vm_guest_paging *paging, mem_region_read_t mrr,
+ mem_region_write_t mrw, void *mrarg);
+
+/*
+ * Emulate the decoded 'vre' instruction when it contains a register access.
+ *
+ * The callbacks 'regread' and 'regwrite' emulate reads and writes to the
+ * register from 'vie'. 'regarg' is an opaque argument that is passed into the
+ * callback functions.
+ *
+ * 'void *vm' should be 'struct vm *' when called from kernel context and
+ * 'struct vmctx *' when called from user context.
+ *
+ */
+int vmm_emulate_register(struct vcpu *vcpu, struct vre *vre, reg_read_t regread,
+ reg_write_t regwrite, void *regarg);
+
+#ifdef _KERNEL
+void vm_register_reg_handler(struct vm *vm, uint64_t iss, uint64_t mask,
+ reg_read_t reg_read, reg_write_t reg_write, void *arg);
+void vm_deregister_reg_handler(struct vm *vm, uint64_t iss, uint64_t mask);
+
+void vm_register_inst_handler(struct vm *vm, uint64_t start, uint64_t size,
+ mem_region_read_t mmio_read, mem_region_write_t mmio_write);
+void vm_deregister_inst_handler(struct vm *vm, uint64_t start, uint64_t size);
+#endif
+
+#endif /* _VMM_INSTRUCTION_EMUL_H_ */
Index: sys/riscv/include/vmm_snapshot.h
===================================================================
--- /dev/null
+++ sys/riscv/include/vmm_snapshot.h
@@ -0,0 +1 @@
+/* $FreeBSD$ */
Index: sys/riscv/riscv/genassym.c
===================================================================
--- sys/riscv/riscv/genassym.c
+++ sys/riscv/riscv/genassym.c
@@ -55,6 +55,8 @@
#include <machine/machdep.h>
#include <machine/vmparam.h>
+#include <riscv/vmm/riscv.h>
+
ASSYM(KERNBASE, KERNBASE);
ASSYM(VM_MAXUSER_ADDRESS, VM_MAXUSER_ADDRESS);
ASSYM(VM_MAX_KERNEL_ADDRESS, VM_MAX_KERNEL_ADDRESS);
@@ -98,6 +100,32 @@
ASSYM(TF_SCAUSE, offsetof(struct trapframe, tf_scause));
ASSYM(TF_SSTATUS, offsetof(struct trapframe, tf_sstatus));
+ASSYM(HYP_H_RA, offsetof(struct hypctx, host_regs.hyp_ra));
+ASSYM(HYP_H_SP, offsetof(struct hypctx, host_regs.hyp_sp));
+ASSYM(HYP_H_GP, offsetof(struct hypctx, host_regs.hyp_gp));
+ASSYM(HYP_H_TP, offsetof(struct hypctx, host_regs.hyp_tp));
+ASSYM(HYP_H_T, offsetof(struct hypctx, host_regs.hyp_t));
+ASSYM(HYP_H_S, offsetof(struct hypctx, host_regs.hyp_s));
+ASSYM(HYP_H_A, offsetof(struct hypctx, host_regs.hyp_a));
+ASSYM(HYP_H_SEPC, offsetof(struct hypctx, host_regs.hyp_sepc));
+ASSYM(HYP_H_SSTATUS, offsetof(struct hypctx, host_regs.hyp_sstatus));
+ASSYM(HYP_H_HSTATUS, offsetof(struct hypctx, host_regs.hyp_hstatus));
+ASSYM(HYP_H_SSCRATCH, offsetof(struct hypctx, host_sscratch));
+ASSYM(HYP_H_STVEC, offsetof(struct hypctx, host_stvec));
+ASSYM(HYP_H_SCOUNTEREN, offsetof(struct hypctx, host_scounteren));
+
+ASSYM(HYP_G_RA, offsetof(struct hypctx, guest_regs.hyp_ra));
+ASSYM(HYP_G_SP, offsetof(struct hypctx, guest_regs.hyp_sp));
+ASSYM(HYP_G_GP, offsetof(struct hypctx, guest_regs.hyp_gp));
+ASSYM(HYP_G_TP, offsetof(struct hypctx, guest_regs.hyp_tp));
+ASSYM(HYP_G_T, offsetof(struct hypctx, guest_regs.hyp_t));
+ASSYM(HYP_G_S, offsetof(struct hypctx, guest_regs.hyp_s));
+ASSYM(HYP_G_A, offsetof(struct hypctx, guest_regs.hyp_a));
+ASSYM(HYP_G_SEPC, offsetof(struct hypctx, guest_regs.hyp_sepc));
+ASSYM(HYP_G_SSTATUS, offsetof(struct hypctx, guest_regs.hyp_sstatus));
+ASSYM(HYP_G_HSTATUS, offsetof(struct hypctx, guest_regs.hyp_hstatus));
+ASSYM(HYP_G_SCOUNTEREN, offsetof(struct hypctx, guest_scounteren));
+
ASSYM(RISCV_BOOTPARAMS_SIZE, sizeof(struct riscv_bootparams));
ASSYM(RISCV_BOOTPARAMS_KERN_PHYS, offsetof(struct riscv_bootparams, kern_phys));
ASSYM(RISCV_BOOTPARAMS_KERN_STACK, offsetof(struct riscv_bootparams,
Index: sys/riscv/riscv/identcpu.c
===================================================================
--- sys/riscv/riscv/identcpu.c
+++ sys/riscv/riscv/identcpu.c
@@ -72,6 +72,7 @@
u_int mmu_caps;
/* Supervisor-mode extension support. */
+bool has_hyp;
bool __read_frequently has_sstc;
bool __read_frequently has_sscofpmf;
@@ -247,6 +248,7 @@
case 'c':
case 'd':
case 'f':
+ case 'h':
case 'i':
case 'm':
desc->isa_extensions |= HWCAP_ISA_BIT(isa[i]);
@@ -412,6 +414,7 @@
UPDATE_CAP(mmu_caps, desc->mmu_caps);
/* Supervisor-mode extension support. */
+ UPDATE_CAP(has_hyp, (desc->isa_extensions & HWCAP_ISA_H) != 0);
UPDATE_CAP(has_sstc, (desc->smode_extensions & SV_SSTC) != 0);
UPDATE_CAP(has_sscofpmf, (desc->smode_extensions & SV_SSCOFPMF) != 0);
@@ -511,6 +514,7 @@
"\03Compressed"
"\04Double"
"\06Float"
+ "\10Hypervisor"
"\15Mult/Div");
}
Index: sys/riscv/vmm/riscv.h
===================================================================
--- /dev/null
+++ sys/riscv/vmm/riscv.h
@@ -0,0 +1,116 @@
+/*-
+ * Copyright (c) 2015 Mihai Carabas <mihai.carabas@gmail.com>
+ * Copyright (c) 2024 Ruslan Bukin <br@bsdpad.com>
+ *
+ * This software was developed by the University of Cambridge Computer
+ * Laboratory (Department of Computer Science and Technology) under Innovate
+ * UK project 105694, "Digital Security by Design (DSbD) Technology Platform
+ * Prototype".
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef _VMM_RISCV_H_
+#define _VMM_RISCV_H_
+
+#include <machine/reg.h>
+#include <machine/pcpu.h>
+#include <machine/vmm.h>
+
+struct hypregs {
+ uint64_t hyp_ra;
+ uint64_t hyp_sp;
+ uint64_t hyp_gp;
+ uint64_t hyp_tp;
+ uint64_t hyp_t[7];
+ uint64_t hyp_s[12];
+ uint64_t hyp_a[8];
+ uint64_t hyp_sepc;
+ uint64_t hyp_sstatus;
+ uint64_t hyp_hstatus;
+};
+
+struct hypcsr {
+ uint64_t hvip;
+ uint64_t vsstatus;
+ uint64_t vsie;
+ uint64_t vstvec;
+ uint64_t vsscratch;
+ uint64_t vsepc;
+ uint64_t vscause;
+ uint64_t vstval;
+ uint64_t vsatp;
+ uint64_t scounteren;
+ uint64_t senvcfg;
+};
+
+struct hypctx {
+ struct hypregs host_regs;
+ struct hypregs guest_regs;
+ struct hypcsr guest_csrs;
+ uint64_t host_sscratch;
+ uint64_t host_stvec;
+ uint64_t host_scounteren;
+ uint64_t guest_scounteren;
+ struct hyp *hyp;
+ struct vcpu *vcpu;
+ bool has_exception;
+};
+
+struct hyp {
+ struct vm *vm;
+ uint64_t vmid_generation;
+ bool aplic_attached;
+ struct aplic *aplic;
+ struct hypctx *ctx[];
+};
+
+#define DEFINE_VMMOPS_IFUNC(ret_type, opname, args) \
+ ret_type vmmops_##opname args;
+
+DEFINE_VMMOPS_IFUNC(int, modinit, (void))
+DEFINE_VMMOPS_IFUNC(int, modcleanup, (void))
+DEFINE_VMMOPS_IFUNC(void *, init, (struct vm *vm, struct pmap *pmap))
+DEFINE_VMMOPS_IFUNC(int, gla2gpa, (void *vcpui, struct vm_guest_paging *paging,
+ uint64_t gla, int prot, uint64_t *gpa, int *is_fault))
+DEFINE_VMMOPS_IFUNC(int, run, (void *vcpui, register_t pc, struct pmap *pmap,
+ struct vm_eventinfo *info))
+DEFINE_VMMOPS_IFUNC(void, cleanup, (void *vmi))
+DEFINE_VMMOPS_IFUNC(void *, vcpu_init, (void *vmi, struct vcpu *vcpu,
+ int vcpu_id))
+DEFINE_VMMOPS_IFUNC(void, vcpu_cleanup, (void *vcpui))
+DEFINE_VMMOPS_IFUNC(int, exception, (void *vcpui, uint64_t scause))
+DEFINE_VMMOPS_IFUNC(int, getreg, (void *vcpui, int num, uint64_t *retval))
+DEFINE_VMMOPS_IFUNC(int, setreg, (void *vcpui, int num, uint64_t val))
+DEFINE_VMMOPS_IFUNC(int, getcap, (void *vcpui, int num, int *retval))
+DEFINE_VMMOPS_IFUNC(int, setcap, (void *vcpui, int num, int val))
+DEFINE_VMMOPS_IFUNC(struct vmspace *, vmspace_alloc, (vm_offset_t min,
+ vm_offset_t max))
+DEFINE_VMMOPS_IFUNC(void, vmspace_free, (struct vmspace *vmspace))
+
+#define dprintf(fmt, ...)
+
+struct hypctx *riscv_get_active_vcpu(void);
+void vmm_switch(struct hypctx *);
+int vmm_sbi_ecall(struct vcpu *, bool *);
+
+#endif /* !_VMM_RISCV_H_ */
Index: sys/riscv/vmm/vmm.c
===================================================================
--- /dev/null
+++ sys/riscv/vmm/vmm.c
@@ -0,0 +1,1541 @@
+/*-
+ * Copyright (c) 2015 Mihai Carabas <mihai.carabas@gmail.com>
+ * Copyright (c) 2024 Ruslan Bukin <br@bsdpad.com>
+ *
+ * This software was developed by the University of Cambridge Computer
+ * Laboratory (Department of Computer Science and Technology) under Innovate
+ * UK project 105694, "Digital Security by Design (DSbD) Technology Platform
+ * Prototype".
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/cpuset.h>
+#include <sys/kernel.h>
+#include <sys/linker.h>
+#include <sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/module.h>
+#include <sys/mutex.h>
+#include <sys/pcpu.h>
+#include <sys/proc.h>
+#include <sys/queue.h>
+#include <sys/rwlock.h>
+#include <sys/sched.h>
+#include <sys/smp.h>
+#include <sys/sysctl.h>
+
+#include <vm/vm.h>
+#include <vm/vm_object.h>
+#include <vm/vm_page.h>
+#include <vm/pmap.h>
+#include <vm/vm_map.h>
+#include <vm/vm_extern.h>
+#include <vm/vm_param.h>
+
+#include <machine/riscvreg.h>
+#include <machine/cpu.h>
+#include <machine/fpe.h>
+#include <machine/machdep.h>
+#include <machine/pcb.h>
+#include <machine/smp.h>
+#include <machine/vm.h>
+#include <machine/vmparam.h>
+#include <machine/vmm.h>
+#include <machine/vmm_dev.h>
+#include <machine/vmm_instruction_emul.h>
+
+#include <dev/pci/pcireg.h>
+
+#include "vmm_ktr.h"
+#include "vmm_stat.h"
+#include "riscv.h"
+
+#include "vmm_aplic.h"
+
+struct vcpu {
+ int flags;
+ enum vcpu_state state;
+ struct mtx mtx;
+ int hostcpu; /* host cpuid this vcpu last ran on */
+ int vcpuid;
+ void *stats;
+ struct vm_exit exitinfo;
+ uint64_t nextpc; /* (x) next instruction to execute */
+ struct vm *vm; /* (o) */
+ void *cookie; /* (i) cpu-specific data */
+ struct fpreg *guestfpu; /* (a,i) guest fpu state */
+};
+
+#define vcpu_lock_initialized(v) mtx_initialized(&((v)->mtx))
+#define vcpu_lock_init(v) mtx_init(&((v)->mtx), "vcpu lock", 0, MTX_SPIN)
+#define vcpu_lock_destroy(v) mtx_destroy(&((v)->mtx))
+#define vcpu_lock(v) mtx_lock_spin(&((v)->mtx))
+#define vcpu_unlock(v) mtx_unlock_spin(&((v)->mtx))
+#define vcpu_assert_locked(v) mtx_assert(&((v)->mtx), MA_OWNED)
+
+struct mem_seg {
+ uint64_t gpa;
+ size_t len;
+ bool wired;
+ bool sysmem;
+ vm_object_t object;
+};
+#define VM_MAX_MEMSEGS 3
+
+struct mem_map {
+ vm_paddr_t gpa;
+ size_t len;
+ vm_ooffset_t segoff;
+ int segid;
+ int prot;
+ int flags;
+};
+#define VM_MAX_MEMMAPS 4
+
+struct vmm_mmio_region {
+ uint64_t start;
+ uint64_t end;
+ mem_region_read_t read;
+ mem_region_write_t write;
+};
+#define VM_MAX_MMIO_REGIONS 4
+
+/*
+ * Initialization:
+ * (o) initialized the first time the VM is created
+ * (i) initialized when VM is created and when it is reinitialized
+ * (x) initialized before use
+ */
+struct vm {
+ void *cookie; /* (i) cpu-specific data */
+ volatile cpuset_t active_cpus; /* (i) active vcpus */
+ volatile cpuset_t debug_cpus; /* (i) vcpus stopped for debug*/
+ int suspend; /* (i) stop VM execution */
+ volatile cpuset_t suspended_cpus; /* (i) suspended vcpus */
+ volatile cpuset_t halted_cpus; /* (x) cpus in a hard halt */
+ struct mem_map mem_maps[VM_MAX_MEMMAPS]; /* (i) guest address space */
+ struct mem_seg mem_segs[VM_MAX_MEMSEGS]; /* (o) guest memory regions */
+ struct vmspace *vmspace; /* (o) guest's address space */
+ char name[VM_MAX_NAMELEN]; /* (o) virtual machine name */
+ struct vcpu **vcpu; /* (i) guest vcpus */
+ struct vmm_mmio_region mmio_region[VM_MAX_MMIO_REGIONS];
+ /* (o) guest MMIO regions */
+ /* The following describe the vm cpu topology */
+ uint16_t sockets; /* (o) num of sockets */
+ uint16_t cores; /* (o) num of cores/socket */
+ uint16_t threads; /* (o) num of threads/core */
+ uint16_t maxcpus; /* (o) max pluggable cpus */
+ struct sx mem_segs_lock; /* (o) */
+ struct sx vcpus_init_lock; /* (o) */
+};
+
+static bool vmm_initialized = false;
+
+static MALLOC_DEFINE(M_VMM, "vmm", "vmm");
+
+/* statistics */
+static VMM_STAT(VCPU_TOTAL_RUNTIME, "vcpu total runtime");
+
+SYSCTL_NODE(_hw, OID_AUTO, vmm, CTLFLAG_RW, NULL, NULL);
+
+static int vmm_ipinum;
+SYSCTL_INT(_hw_vmm, OID_AUTO, ipinum, CTLFLAG_RD, &vmm_ipinum, 0,
+ "IPI vector used for vcpu notifications");
+
+u_int vm_maxcpu;
+SYSCTL_UINT(_hw_vmm, OID_AUTO, maxcpu, CTLFLAG_RDTUN | CTLFLAG_NOFETCH,
+ &vm_maxcpu, 0, "Maximum number of vCPUs");
+
+static void vm_free_memmap(struct vm *vm, int ident);
+static bool sysmem_mapping(struct vm *vm, struct mem_map *mm);
+static void vcpu_notify_event_locked(struct vcpu *vcpu);
+
+/*
+ * Upper limit on vm_maxcpu. We could increase this to 28 bits, but this
+ * is a safe value for now.
+ */
+#define VM_MAXCPU MIN(0xffff - 1, CPU_SETSIZE)
+
+static void
+vcpu_cleanup(struct vcpu *vcpu, bool destroy)
+{
+ vmmops_vcpu_cleanup(vcpu->cookie);
+ vcpu->cookie = NULL;
+ if (destroy) {
+ vmm_stat_free(vcpu->stats);
+ fpu_save_area_free(vcpu->guestfpu);
+ vcpu_lock_destroy(vcpu);
+ }
+}
+
+static struct vcpu *
+vcpu_alloc(struct vm *vm, int vcpu_id)
+{
+ struct vcpu *vcpu;
+
+ KASSERT(vcpu_id >= 0 && vcpu_id < vm->maxcpus,
+ ("vcpu_alloc: invalid vcpu %d", vcpu_id));
+
+ vcpu = malloc(sizeof(*vcpu), M_VMM, M_WAITOK | M_ZERO);
+ vcpu_lock_init(vcpu);
+ vcpu->state = VCPU_IDLE;
+ vcpu->hostcpu = NOCPU;
+ vcpu->vcpuid = vcpu_id;
+ vcpu->vm = vm;
+ vcpu->guestfpu = fpu_save_area_alloc();
+ vcpu->stats = vmm_stat_alloc();
+ return (vcpu);
+}
+
+static void
+vcpu_init(struct vcpu *vcpu)
+{
+ vcpu->cookie = vmmops_vcpu_init(vcpu->vm->cookie, vcpu, vcpu->vcpuid);
+ MPASS(vcpu->cookie != NULL);
+ fpu_save_area_reset(vcpu->guestfpu);
+ vmm_stat_init(vcpu->stats);
+}
+
+struct vm_exit *
+vm_exitinfo(struct vcpu *vcpu)
+{
+ return (&vcpu->exitinfo);
+}
+
+static int
+vmm_init(void)
+{
+
+ vm_maxcpu = mp_ncpus;
+
+ TUNABLE_INT_FETCH("hw.vmm.maxcpu", &vm_maxcpu);
+
+ if (vm_maxcpu > VM_MAXCPU) {
+ printf("vmm: vm_maxcpu clamped to %u\n", VM_MAXCPU);
+ vm_maxcpu = VM_MAXCPU;
+ }
+
+ if (vm_maxcpu == 0)
+ vm_maxcpu = 1;
+
+ return (vmmops_modinit());
+}
+
+static int
+vmm_handler(module_t mod, int what, void *arg)
+{
+ int error;
+
+ switch (what) {
+ case MOD_LOAD:
+ /* TODO: check if has_hyp here? */
+ vmmdev_init();
+ error = vmm_init();
+ if (error == 0)
+ vmm_initialized = true;
+ break;
+ case MOD_UNLOAD:
+ /* TODO: check if has_hyp here? */
+ error = vmmdev_cleanup();
+ if (error == 0 && vmm_initialized) {
+ error = vmmops_modcleanup();
+ if (error)
+ vmm_initialized = false;
+ }
+ break;
+ default:
+ error = 0;
+ break;
+ }
+ return (error);
+}
+
+static moduledata_t vmm_kmod = {
+ "vmm",
+ vmm_handler,
+ NULL
+};
+
+/*
+ * vmm initialization has the following dependencies:
+ *
+ * - HYP initialization requires smp_rendezvous() and therefore must happen
+ * after SMP is fully functional (after SI_SUB_SMP).
+ */
+DECLARE_MODULE(vmm, vmm_kmod, SI_SUB_SMP + 1, SI_ORDER_ANY);
+MODULE_VERSION(vmm, 1);
+
+static void
+vm_init(struct vm *vm, bool create)
+{
+ int i;
+
+ vm->cookie = vmmops_init(vm, vmspace_pmap(vm->vmspace));
+ MPASS(vm->cookie != NULL);
+
+ CPU_ZERO(&vm->active_cpus);
+ CPU_ZERO(&vm->debug_cpus);
+
+ vm->suspend = 0;
+ CPU_ZERO(&vm->suspended_cpus);
+
+ memset(vm->mmio_region, 0, sizeof(vm->mmio_region));
+
+ if (!create) {
+ for (i = 0; i < vm->maxcpus; i++) {
+ if (vm->vcpu[i] != NULL)
+ vcpu_init(vm->vcpu[i]);
+ }
+ }
+}
+
+struct vcpu *
+vm_alloc_vcpu(struct vm *vm, int vcpuid)
+{
+ struct vcpu *vcpu;
+
+ if (vcpuid < 0 || vcpuid >= vm_get_maxcpus(vm))
+ return (NULL);
+
+ /* Some interrupt controllers may have a CPU limit */
+ if (vcpuid >= aplic_max_cpu_count(vm->cookie))
+ return (NULL);
+
+ vcpu = atomic_load_ptr(&vm->vcpu[vcpuid]);
+ if (__predict_true(vcpu != NULL))
+ return (vcpu);
+
+ sx_xlock(&vm->vcpus_init_lock);
+ vcpu = vm->vcpu[vcpuid];
+ if (vcpu == NULL/* && !vm->dying*/) {
+ vcpu = vcpu_alloc(vm, vcpuid);
+ vcpu_init(vcpu);
+
+ /*
+ * Ensure vCPU is fully created before updating pointer
+ * to permit unlocked reads above.
+ */
+ atomic_store_rel_ptr((uintptr_t *)&vm->vcpu[vcpuid],
+ (uintptr_t)vcpu);
+ }
+ sx_xunlock(&vm->vcpus_init_lock);
+ return (vcpu);
+}
+
+void
+vm_slock_vcpus(struct vm *vm)
+{
+ sx_slock(&vm->vcpus_init_lock);
+}
+
+void
+vm_unlock_vcpus(struct vm *vm)
+{
+ sx_unlock(&vm->vcpus_init_lock);
+}
+
+int
+vm_create(const char *name, struct vm **retvm)
+{
+ struct vm *vm;
+ struct vmspace *vmspace;
+
+ /*
+ * If vmm.ko could not be successfully initialized then don't attempt
+ * to create the virtual machine.
+ */
+ if (!vmm_initialized)
+ return (ENXIO);
+
+ if (name == NULL || strlen(name) >= VM_MAX_NAMELEN)
+ return (EINVAL);
+
+ vmspace = vmmops_vmspace_alloc(0, 1ul << 39);
+ if (vmspace == NULL)
+ return (ENOMEM);
+
+ vm = malloc(sizeof(struct vm), M_VMM, M_WAITOK | M_ZERO);
+ strcpy(vm->name, name);
+ vm->vmspace = vmspace;
+ sx_init(&vm->mem_segs_lock, "vm mem_segs");
+ sx_init(&vm->vcpus_init_lock, "vm vcpus");
+
+ vm->sockets = 1;
+ vm->cores = 1; /* XXX backwards compatibility */
+ vm->threads = 1; /* XXX backwards compatibility */
+ vm->maxcpus = vm_maxcpu;
+
+ vm->vcpu = malloc(sizeof(*vm->vcpu) * vm->maxcpus, M_VMM,
+ M_WAITOK | M_ZERO);
+
+ vm_init(vm, true);
+
+ *retvm = vm;
+ return (0);
+}
+
+void
+vm_get_topology(struct vm *vm, uint16_t *sockets, uint16_t *cores,
+ uint16_t *threads, uint16_t *maxcpus)
+{
+ *sockets = vm->sockets;
+ *cores = vm->cores;
+ *threads = vm->threads;
+ *maxcpus = vm->maxcpus;
+}
+
+uint16_t
+vm_get_maxcpus(struct vm *vm)
+{
+ return (vm->maxcpus);
+}
+
+int
+vm_set_topology(struct vm *vm, uint16_t sockets, uint16_t cores,
+ uint16_t threads, uint16_t maxcpus)
+{
+ /* Ignore maxcpus. */
+ if ((sockets * cores * threads) > vm->maxcpus)
+ return (EINVAL);
+ vm->sockets = sockets;
+ vm->cores = cores;
+ vm->threads = threads;
+ return(0);
+}
+
+static void
+vm_cleanup(struct vm *vm, bool destroy)
+{
+ struct mem_map *mm;
+ int i;
+
+ aplic_detach_from_vm(vm->cookie);
+
+ for (i = 0; i < vm->maxcpus; i++) {
+ if (vm->vcpu[i] != NULL)
+ vcpu_cleanup(vm->vcpu[i], destroy);
+ }
+
+ vmmops_cleanup(vm->cookie);
+
+ /*
+ * System memory is removed from the guest address space only when
+ * the VM is destroyed. This is because the mapping remains the same
+ * across VM reset.
+ *
+ * Device memory can be relocated by the guest (e.g. using PCI BARs)
+ * so those mappings are removed on a VM reset.
+ */
+ if (!destroy) {
+ for (i = 0; i < VM_MAX_MEMMAPS; i++) {
+ mm = &vm->mem_maps[i];
+ if (destroy || !sysmem_mapping(vm, mm))
+ vm_free_memmap(vm, i);
+ }
+ }
+
+ if (destroy) {
+ for (i = 0; i < VM_MAX_MEMSEGS; i++)
+ vm_free_memseg(vm, i);
+
+ vmmops_vmspace_free(vm->vmspace);
+ vm->vmspace = NULL;
+
+ for (i = 0; i < vm->maxcpus; i++)
+ free(vm->vcpu[i], M_VMM);
+ free(vm->vcpu, M_VMM);
+ sx_destroy(&vm->vcpus_init_lock);
+ sx_destroy(&vm->mem_segs_lock);
+ }
+}
+
+void
+vm_destroy(struct vm *vm)
+{
+
+ vm_cleanup(vm, true);
+
+ free(vm, M_VMM);
+}
+
+int
+vm_reinit(struct vm *vm)
+{
+ int error;
+
+ /*
+ * A virtual machine can be reset only if all vcpus are suspended.
+ */
+ if (CPU_CMP(&vm->suspended_cpus, &vm->active_cpus) == 0) {
+ vm_cleanup(vm, false);
+ vm_init(vm, false);
+ error = 0;
+ } else {
+ error = EBUSY;
+ }
+
+ return (error);
+}
+
+const char *
+vm_name(struct vm *vm)
+{
+ return (vm->name);
+}
+
+void
+vm_slock_memsegs(struct vm *vm)
+{
+ sx_slock(&vm->mem_segs_lock);
+}
+
+void
+vm_xlock_memsegs(struct vm *vm)
+{
+ sx_xlock(&vm->mem_segs_lock);
+}
+
+void
+vm_unlock_memsegs(struct vm *vm)
+{
+ sx_unlock(&vm->mem_segs_lock);
+}
+
+/*
+ * Return 'true' if 'gpa' is allocated in the guest address space.
+ *
+ * This function is called in the context of a running vcpu which acts as
+ * an implicit lock on 'vm->mem_maps[]'.
+ */
+bool
+vm_mem_allocated(struct vcpu *vcpu, vm_paddr_t gpa)
+{
+ struct vm *vm = vcpu->vm;
+ struct mem_map *mm;
+ int i;
+
+#ifdef INVARIANTS
+ int hostcpu, state;
+ state = vcpu_get_state(vcpu, &hostcpu);
+ KASSERT(state == VCPU_RUNNING && hostcpu == curcpu,
+ ("%s: invalid vcpu state %d/%d", __func__, state, hostcpu));
+#endif
+
+ for (i = 0; i < VM_MAX_MEMMAPS; i++) {
+ mm = &vm->mem_maps[i];
+ if (mm->len != 0 && gpa >= mm->gpa && gpa < mm->gpa + mm->len)
+ return (true); /* 'gpa' is sysmem or devmem */
+ }
+
+ return (false);
+}
+
+int
+vm_alloc_memseg(struct vm *vm, int ident, size_t len, bool sysmem)
+{
+ struct mem_seg *seg;
+ vm_object_t obj;
+
+ sx_assert(&vm->mem_segs_lock, SX_XLOCKED);
+
+ if (ident < 0 || ident >= VM_MAX_MEMSEGS)
+ return (EINVAL);
+
+ if (len == 0 || (len & PAGE_MASK))
+ return (EINVAL);
+
+ seg = &vm->mem_segs[ident];
+ if (seg->object != NULL) {
+ if (seg->len == len && seg->sysmem == sysmem)
+ return (EEXIST);
+ else
+ return (EINVAL);
+ }
+
+ obj = vm_object_allocate(OBJT_DEFAULT, len >> PAGE_SHIFT);
+ if (obj == NULL)
+ return (ENOMEM);
+
+ seg->len = len;
+ seg->object = obj;
+ seg->sysmem = sysmem;
+ return (0);
+}
+
+int
+vm_get_memseg(struct vm *vm, int ident, size_t *len, bool *sysmem,
+ vm_object_t *objptr)
+{
+ struct mem_seg *seg;
+
+ sx_assert(&vm->mem_segs_lock, SX_LOCKED);
+
+ if (ident < 0 || ident >= VM_MAX_MEMSEGS)
+ return (EINVAL);
+
+ seg = &vm->mem_segs[ident];
+ if (len)
+ *len = seg->len;
+ if (sysmem)
+ *sysmem = seg->sysmem;
+ if (objptr)
+ *objptr = seg->object;
+ return (0);
+}
+
+void
+vm_free_memseg(struct vm *vm, int ident)
+{
+ struct mem_seg *seg;
+
+ KASSERT(ident >= 0 && ident < VM_MAX_MEMSEGS,
+ ("%s: invalid memseg ident %d", __func__, ident));
+
+ seg = &vm->mem_segs[ident];
+ if (seg->object != NULL) {
+ vm_object_deallocate(seg->object);
+ bzero(seg, sizeof(struct mem_seg));
+ }
+}
+
+int
+vm_mmap_memseg(struct vm *vm, vm_paddr_t gpa, int segid, vm_ooffset_t first,
+ size_t len, int prot, int flags)
+{
+ struct mem_seg *seg;
+ struct mem_map *m, *map;
+ vm_ooffset_t last;
+ int i, error;
+
+ dprintf("%s: gpa %lx first %lx len %lx\n", __func__, gpa, first, len);
+
+ if (prot == 0 || (prot & ~(VM_PROT_ALL)) != 0)
+ return (EINVAL);
+
+ if (flags & ~VM_MEMMAP_F_WIRED)
+ return (EINVAL);
+
+ if (segid < 0 || segid >= VM_MAX_MEMSEGS)
+ return (EINVAL);
+
+ seg = &vm->mem_segs[segid];
+ if (seg->object == NULL)
+ return (EINVAL);
+
+ last = first + len;
+ if (first < 0 || first >= last || last > seg->len)
+ return (EINVAL);
+
+ if ((gpa | first | last) & PAGE_MASK)
+ return (EINVAL);
+
+ map = NULL;
+ for (i = 0; i < VM_MAX_MEMMAPS; i++) {
+ m = &vm->mem_maps[i];
+ if (m->len == 0) {
+ map = m;
+ break;
+ }
+ }
+
+ if (map == NULL)
+ return (ENOSPC);
+
+ error = vm_map_find(&vm->vmspace->vm_map, seg->object, first, &gpa,
+ len, 0, VMFS_NO_SPACE, prot, prot, 0);
+ if (error != KERN_SUCCESS)
+ return (EFAULT);
+
+ vm_object_reference(seg->object);
+
+ if (flags & VM_MEMMAP_F_WIRED) {
+ error = vm_map_wire(&vm->vmspace->vm_map, gpa, gpa + len,
+ VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES);
+ if (error != KERN_SUCCESS) {
+ vm_map_remove(&vm->vmspace->vm_map, gpa, gpa + len);
+ return (error == KERN_RESOURCE_SHORTAGE ? ENOMEM :
+ EFAULT);
+ }
+ }
+
+ map->gpa = gpa;
+ map->len = len;
+ map->segoff = first;
+ map->segid = segid;
+ map->prot = prot;
+ map->flags = flags;
+ return (0);
+}
+
+int
+vm_munmap_memseg(struct vm *vm, vm_paddr_t gpa, size_t len)
+{
+ struct mem_map *m;
+ int i;
+
+ dprintf("%s: gpa %lx len %lx\n", __func__, gpa, len);
+
+ for (i = 0; i < VM_MAX_MEMMAPS; i++) {
+ m = &vm->mem_maps[i];
+ if (m->gpa == gpa && m->len == len) {
+ vm_free_memmap(vm, i);
+ return (0);
+ }
+ }
+
+ return (EINVAL);
+}
+
+int
+vm_mmap_getnext(struct vm *vm, vm_paddr_t *gpa, int *segid,
+ vm_ooffset_t *segoff, size_t *len, int *prot, int *flags)
+{
+ struct mem_map *mm, *mmnext;
+ int i;
+
+ mmnext = NULL;
+ for (i = 0; i < VM_MAX_MEMMAPS; i++) {
+ mm = &vm->mem_maps[i];
+ if (mm->len == 0 || mm->gpa < *gpa)
+ continue;
+ if (mmnext == NULL || mm->gpa < mmnext->gpa)
+ mmnext = mm;
+ }
+
+ if (mmnext != NULL) {
+ *gpa = mmnext->gpa;
+ if (segid)
+ *segid = mmnext->segid;
+ if (segoff)
+ *segoff = mmnext->segoff;
+ if (len)
+ *len = mmnext->len;
+ if (prot)
+ *prot = mmnext->prot;
+ if (flags)
+ *flags = mmnext->flags;
+ return (0);
+ } else {
+ return (ENOENT);
+ }
+}
+
+static void
+vm_free_memmap(struct vm *vm, int ident)
+{
+ struct mem_map *mm;
+ int error __diagused;
+
+ mm = &vm->mem_maps[ident];
+ if (mm->len) {
+ error = vm_map_remove(&vm->vmspace->vm_map, mm->gpa,
+ mm->gpa + mm->len);
+ KASSERT(error == KERN_SUCCESS, ("%s: vm_map_remove error %d",
+ __func__, error));
+ bzero(mm, sizeof(struct mem_map));
+ }
+}
+
+static __inline bool
+sysmem_mapping(struct vm *vm, struct mem_map *mm)
+{
+
+ if (mm->len != 0 && vm->mem_segs[mm->segid].sysmem)
+ return (true);
+ else
+ return (false);
+}
+
+vm_paddr_t
+vmm_sysmem_maxaddr(struct vm *vm)
+{
+ struct mem_map *mm;
+ vm_paddr_t maxaddr;
+ int i;
+
+ maxaddr = 0;
+ for (i = 0; i < VM_MAX_MEMMAPS; i++) {
+ mm = &vm->mem_maps[i];
+ if (sysmem_mapping(vm, mm)) {
+ if (maxaddr < mm->gpa + mm->len)
+ maxaddr = mm->gpa + mm->len;
+ }
+ }
+ return (maxaddr);
+}
+
+int
+vm_gla2gpa_nofault(struct vcpu *vcpu, struct vm_guest_paging *paging,
+ uint64_t gla, int prot, uint64_t *gpa, int *is_fault)
+{
+
+ vmmops_gla2gpa(vcpu->cookie, paging, gla, prot, gpa, is_fault);
+ return (0);
+}
+
+void
+vm_register_inst_handler(struct vm *vm, uint64_t start, uint64_t size,
+ mem_region_read_t mmio_read, mem_region_write_t mmio_write)
+{
+ int i;
+
+ for (i = 0; i < nitems(vm->mmio_region); i++) {
+ if (vm->mmio_region[i].start == 0 &&
+ vm->mmio_region[i].end == 0) {
+ vm->mmio_region[i].start = start;
+ vm->mmio_region[i].end = start + size;
+ vm->mmio_region[i].read = mmio_read;
+ vm->mmio_region[i].write = mmio_write;
+ return;
+ }
+ }
+
+ panic("%s: No free MMIO region", __func__);
+}
+
+void
+vm_deregister_inst_handler(struct vm *vm, uint64_t start, uint64_t size)
+{
+ int i;
+
+ for (i = 0; i < nitems(vm->mmio_region); i++) {
+ if (vm->mmio_region[i].start == start &&
+ vm->mmio_region[i].end == start + size) {
+ memset(&vm->mmio_region[i], 0,
+ sizeof(vm->mmio_region[i]));
+ return;
+ }
+ }
+
+ panic("%s: Invalid MMIO region: %lx - %lx", __func__, start,
+ start + size);
+}
+
+static int
+vm_handle_inst_emul(struct vcpu *vcpu, bool *retu)
+{
+ struct vm *vm;
+ struct vm_exit *vme;
+ struct vie *vie;
+ struct hyp *hyp;
+ uint64_t fault_ipa;
+ struct vm_guest_paging *paging;
+ struct vmm_mmio_region *vmr;
+ int error, i;
+
+ vm = vcpu->vm;
+ hyp = vm->cookie;
+ if (!hyp->aplic_attached)
+ goto out_user;
+
+ vme = &vcpu->exitinfo;
+ vie = &vme->u.inst_emul.vie;
+ paging = &vme->u.inst_emul.paging;
+
+ fault_ipa = vme->u.inst_emul.gpa;
+
+ vmr = NULL;
+ for (i = 0; i < nitems(vm->mmio_region); i++) {
+ if (vm->mmio_region[i].start <= fault_ipa &&
+ vm->mmio_region[i].end > fault_ipa) {
+ vmr = &vm->mmio_region[i];
+ break;
+ }
+ }
+ if (vmr == NULL)
+ goto out_user;
+
+ error = vmm_emulate_instruction(vcpu, fault_ipa, vie, paging,
+ vmr->read, vmr->write, retu);
+ return (error);
+
+out_user:
+ *retu = true;
+ return (0);
+}
+
+int
+vm_suspend(struct vm *vm, enum vm_suspend_how how)
+{
+ int i;
+
+ if (how <= VM_SUSPEND_NONE || how >= VM_SUSPEND_LAST)
+ return (EINVAL);
+
+ if (atomic_cmpset_int(&vm->suspend, 0, how) == 0) {
+ VM_CTR2(vm, "virtual machine already suspended %d/%d",
+ vm->suspend, how);
+ return (EALREADY);
+ }
+
+ VM_CTR1(vm, "virtual machine successfully suspended %d", how);
+
+ /*
+ * Notify all active vcpus that they are now suspended.
+ */
+ for (i = 0; i < vm->maxcpus; i++) {
+ if (CPU_ISSET(i, &vm->active_cpus))
+ vcpu_notify_event(vm_vcpu(vm, i));
+ }
+
+ return (0);
+}
+
+void
+vm_exit_suspended(struct vcpu *vcpu, uint64_t pc)
+{
+ struct vm *vm = vcpu->vm;
+ struct vm_exit *vmexit;
+
+ KASSERT(vm->suspend > VM_SUSPEND_NONE && vm->suspend < VM_SUSPEND_LAST,
+ ("vm_exit_suspended: invalid suspend type %d", vm->suspend));
+
+ vmexit = vm_exitinfo(vcpu);
+ vmexit->pc = pc;
+ vmexit->inst_length = 4;
+ vmexit->exitcode = VM_EXITCODE_SUSPENDED;
+ vmexit->u.suspended.how = vm->suspend;
+}
+
+void
+vm_exit_debug(struct vcpu *vcpu, uint64_t pc)
+{
+ struct vm_exit *vmexit;
+
+ vmexit = vm_exitinfo(vcpu);
+ vmexit->pc = pc;
+ vmexit->inst_length = 4;
+ vmexit->exitcode = VM_EXITCODE_DEBUG;
+}
+
+int
+vm_activate_cpu(struct vcpu *vcpu)
+{
+ struct vm *vm = vcpu->vm;
+
+ if (CPU_ISSET(vcpu->vcpuid, &vm->active_cpus))
+ return (EBUSY);
+
+ CPU_SET_ATOMIC(vcpu->vcpuid, &vm->active_cpus);
+ return (0);
+
+}
+
+int
+vm_suspend_cpu(struct vm *vm, struct vcpu *vcpu)
+{
+ if (vcpu == NULL) {
+ vm->debug_cpus = vm->active_cpus;
+ for (int i = 0; i < vm->maxcpus; i++) {
+ if (CPU_ISSET(i, &vm->active_cpus))
+ vcpu_notify_event(vm_vcpu(vm, i));
+ }
+ } else {
+ if (!CPU_ISSET(vcpu->vcpuid, &vm->active_cpus))
+ return (EINVAL);
+
+ CPU_SET_ATOMIC(vcpu->vcpuid, &vm->debug_cpus);
+ vcpu_notify_event(vcpu);
+ }
+ return (0);
+}
+
+int
+vm_resume_cpu(struct vm *vm, struct vcpu *vcpu)
+{
+
+ if (vcpu == NULL) {
+ CPU_ZERO(&vm->debug_cpus);
+ } else {
+ if (!CPU_ISSET(vcpu->vcpuid, &vm->debug_cpus))
+ return (EINVAL);
+
+ CPU_CLR_ATOMIC(vcpu->vcpuid, &vm->debug_cpus);
+ }
+ return (0);
+}
+
+int
+vcpu_debugged(struct vcpu *vcpu)
+{
+
+ return (CPU_ISSET(vcpu->vcpuid, &vcpu->vm->debug_cpus));
+}
+
+cpuset_t
+vm_active_cpus(struct vm *vm)
+{
+
+ return (vm->active_cpus);
+}
+
+cpuset_t
+vm_debug_cpus(struct vm *vm)
+{
+
+ return (vm->debug_cpus);
+}
+
+cpuset_t
+vm_suspended_cpus(struct vm *vm)
+{
+
+ return (vm->suspended_cpus);
+}
+
+
+void *
+vcpu_stats(struct vcpu *vcpu)
+{
+
+ return (vcpu->stats);
+}
+
+/*
+ * This function is called to ensure that a vcpu "sees" a pending event
+ * as soon as possible:
+ * - If the vcpu thread is sleeping then it is woken up.
+ * - If the vcpu is running on a different host_cpu then an IPI will be directed
+ * to the host_cpu to cause the vcpu to trap into the hypervisor.
+ */
+static void
+vcpu_notify_event_locked(struct vcpu *vcpu)
+{
+ int hostcpu;
+
+ hostcpu = vcpu->hostcpu;
+ if (vcpu->state == VCPU_RUNNING) {
+ KASSERT(hostcpu != NOCPU, ("vcpu running on invalid hostcpu"));
+ if (hostcpu != curcpu) {
+ ipi_cpu(hostcpu, vmm_ipinum);
+ } else {
+ /*
+ * If the 'vcpu' is running on 'curcpu' then it must
+ * be sending a notification to itself (e.g. SELF_IPI).
+ * The pending event will be picked up when the vcpu
+ * transitions back to guest context.
+ */
+ }
+ } else {
+ KASSERT(hostcpu == NOCPU, ("vcpu state %d not consistent "
+ "with hostcpu %d", vcpu->state, hostcpu));
+ if (vcpu->state == VCPU_SLEEPING)
+ wakeup_one(vcpu);
+ }
+}
+
+void
+vcpu_notify_event(struct vcpu *vcpu)
+{
+ vcpu_lock(vcpu);
+ vcpu_notify_event_locked(vcpu);
+ vcpu_unlock(vcpu);
+}
+
+static void
+restore_guest_fpustate(struct vcpu *vcpu)
+{
+
+ /* Flush host state to the pcb. */
+ fpe_state_save(curthread);
+
+ /* Ensure the VFP state will be re-loaded when exiting the guest. */
+ PCPU_SET(fpcurthread, NULL);
+
+ /* restore guest FPU state */
+ fpe_enable();
+ fpe_restore(vcpu->guestfpu);
+
+ /*
+ * The FPU is now "dirty" with the guest's state so turn on emulation
+ * to trap any access to the FPU by the host.
+ */
+ fpe_disable();
+}
+
+static void
+save_guest_fpustate(struct vcpu *vcpu)
+{
+
+ /* Save guest FPE state. */
+ fpe_enable();
+ fpe_store(vcpu->guestfpu);
+ fpe_disable();
+
+ KASSERT(PCPU_GET(fpcurthread) == NULL,
+ ("%s: fpcurthread set with guest registers", __func__));
+}
+
+static int
+vcpu_set_state_locked(struct vcpu *vcpu, enum vcpu_state newstate,
+ bool from_idle)
+{
+ int error;
+
+ vcpu_assert_locked(vcpu);
+
+ /*
+ * State transitions from the vmmdev_ioctl() must always begin from
+ * the VCPU_IDLE state. This guarantees that there is only a single
+ * ioctl() operating on a vcpu at any point.
+ */
+ if (from_idle) {
+ while (vcpu->state != VCPU_IDLE) {
+ vcpu_notify_event_locked(vcpu);
+ msleep_spin(&vcpu->state, &vcpu->mtx, "vmstat",
+ hz / 1000);
+ }
+ } else {
+ KASSERT(vcpu->state != VCPU_IDLE, ("invalid transition from "
+ "vcpu idle state"));
+ }
+
+ if (vcpu->state == VCPU_RUNNING) {
+ KASSERT(vcpu->hostcpu == curcpu, ("curcpu %d and hostcpu %d "
+ "mismatch for running vcpu", curcpu, vcpu->hostcpu));
+ } else {
+ KASSERT(vcpu->hostcpu == NOCPU, ("Invalid hostcpu %d for a "
+ "vcpu that is not running", vcpu->hostcpu));
+ }
+
+ /*
+ * The following state transitions are allowed:
+ * IDLE -> FROZEN -> IDLE
+ * FROZEN -> RUNNING -> FROZEN
+ * FROZEN -> SLEEPING -> FROZEN
+ */
+ switch (vcpu->state) {
+ case VCPU_IDLE:
+ case VCPU_RUNNING:
+ case VCPU_SLEEPING:
+ error = (newstate != VCPU_FROZEN);
+ break;
+ case VCPU_FROZEN:
+ error = (newstate == VCPU_FROZEN);
+ break;
+ default:
+ error = 1;
+ break;
+ }
+
+ if (error)
+ return (EBUSY);
+
+ vcpu->state = newstate;
+ if (newstate == VCPU_RUNNING)
+ vcpu->hostcpu = curcpu;
+ else
+ vcpu->hostcpu = NOCPU;
+
+ if (newstate == VCPU_IDLE)
+ wakeup(&vcpu->state);
+
+ return (0);
+}
+
+static void
+vcpu_require_state(struct vcpu *vcpu, enum vcpu_state newstate)
+{
+ int error;
+
+ if ((error = vcpu_set_state(vcpu, newstate, false)) != 0)
+ panic("Error %d setting state to %d\n", error, newstate);
+}
+
+static void
+vcpu_require_state_locked(struct vcpu *vcpu, enum vcpu_state newstate)
+{
+ int error;
+
+ if ((error = vcpu_set_state_locked(vcpu, newstate, false)) != 0)
+ panic("Error %d setting state to %d", error, newstate);
+}
+
+int
+vm_get_capability(struct vcpu *vcpu, int type, int *retval)
+{
+
+ if (type < 0 || type >= VM_CAP_MAX)
+ return (EINVAL);
+
+ return (vmmops_getcap(vcpu->cookie, type, retval));
+}
+
+int
+vm_set_capability(struct vcpu *vcpu, int type, int val)
+{
+
+ if (type < 0 || type >= VM_CAP_MAX)
+ return (EINVAL);
+
+ return (vmmops_setcap(vcpu->cookie, type, val));
+}
+
+struct vm *
+vcpu_vm(struct vcpu *vcpu)
+{
+
+ return (vcpu->vm);
+}
+
+int
+vcpu_vcpuid(struct vcpu *vcpu)
+{
+
+ return (vcpu->vcpuid);
+}
+
+void *
+vcpu_get_cookie(struct vcpu *vcpu)
+{
+
+ return (vcpu->cookie);
+}
+
+struct vcpu *
+vm_vcpu(struct vm *vm, int vcpuid)
+{
+
+ return (vm->vcpu[vcpuid]);
+}
+
+int
+vcpu_set_state(struct vcpu *vcpu, enum vcpu_state newstate, bool from_idle)
+{
+ int error;
+
+ vcpu_lock(vcpu);
+ error = vcpu_set_state_locked(vcpu, newstate, from_idle);
+ vcpu_unlock(vcpu);
+
+ return (error);
+}
+
+enum vcpu_state
+vcpu_get_state(struct vcpu *vcpu, int *hostcpu)
+{
+ enum vcpu_state state;
+
+ vcpu_lock(vcpu);
+ state = vcpu->state;
+ if (hostcpu != NULL)
+ *hostcpu = vcpu->hostcpu;
+ vcpu_unlock(vcpu);
+
+ return (state);
+}
+
+static void *
+_vm_gpa_hold(struct vm *vm, vm_paddr_t gpa, size_t len, int reqprot,
+ void **cookie)
+{
+ int i, count, pageoff;
+ struct mem_map *mm;
+ vm_page_t m;
+
+ pageoff = gpa & PAGE_MASK;
+ if (len > PAGE_SIZE - pageoff)
+ panic("vm_gpa_hold: invalid gpa/len: 0x%016lx/%lu", gpa, len);
+
+ count = 0;
+ for (i = 0; i < VM_MAX_MEMMAPS; i++) {
+ mm = &vm->mem_maps[i];
+ if (sysmem_mapping(vm, mm) && gpa >= mm->gpa &&
+ gpa < mm->gpa + mm->len) {
+ count = vm_fault_quick_hold_pages(&vm->vmspace->vm_map,
+ trunc_page(gpa), PAGE_SIZE, reqprot, &m, 1);
+ break;
+ }
+ }
+
+ if (count == 1) {
+ *cookie = m;
+ return ((void *)(PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)) + pageoff));
+ } else {
+ *cookie = NULL;
+ return (NULL);
+ }
+}
+
+void *
+vm_gpa_hold(struct vcpu *vcpu, vm_paddr_t gpa, size_t len, int reqprot,
+ void **cookie)
+{
+#ifdef INVARIANTS
+ /*
+ * The current vcpu should be frozen to ensure 'vm_memmap[]'
+ * stability.
+ */
+ int state = vcpu_get_state(vcpu, NULL);
+ KASSERT(state == VCPU_FROZEN, ("%s: invalid vcpu state %d",
+ __func__, state));
+#endif
+ return (_vm_gpa_hold(vcpu->vm, gpa, len, reqprot, cookie));
+}
+
+void *
+vm_gpa_hold_global(struct vm *vm, vm_paddr_t gpa, size_t len, int reqprot,
+ void **cookie)
+{
+ sx_assert(&vm->mem_segs_lock, SX_LOCKED);
+ return (_vm_gpa_hold(vm, gpa, len, reqprot, cookie));
+}
+
+void
+vm_gpa_release(void *cookie)
+{
+ vm_page_t m = cookie;
+
+ vm_page_unwire(m, PQ_ACTIVE);
+}
+
+int
+vm_get_register(struct vcpu *vcpu, int reg, uint64_t *retval)
+{
+
+ if (reg >= VM_REG_LAST)
+ return (EINVAL);
+
+ return (vmmops_getreg(vcpu->cookie, reg, retval));
+}
+
+int
+vm_set_register(struct vcpu *vcpu, int reg, uint64_t val)
+{
+ int error;
+
+ if (reg >= VM_REG_LAST)
+ return (EINVAL);
+ error = vmmops_setreg(vcpu->cookie, reg, val);
+ if (error || reg != VM_REG_GUEST_SEPC)
+ return (error);
+
+ vcpu->nextpc = val;
+
+ return (0);
+}
+
+void *
+vm_get_cookie(struct vm *vm)
+{
+
+ return (vm->cookie);
+}
+
+int
+vm_inject_exception(struct vcpu *vcpu, uint64_t scause)
+{
+
+ return (vmmops_exception(vcpu->cookie, scause));
+}
+
+int
+vm_attach_aplic(struct vm *vm, struct vm_aplic_descr *descr)
+{
+
+ return (aplic_attach_to_vm(vm->cookie, descr));
+}
+
+int
+vm_assert_irq(struct vm *vm, uint32_t irq)
+{
+
+ return (aplic_inject_irq(vm->cookie, -1, irq, true));
+}
+
+int
+vm_deassert_irq(struct vm *vm, uint32_t irq)
+{
+
+ return (aplic_inject_irq(vm->cookie, -1, irq, false));
+}
+
+int
+vm_raise_msi(struct vm *vm, uint64_t msg, uint64_t addr, int bus, int slot,
+ int func)
+{
+
+ return (aplic_inject_msi(vm->cookie, msg, addr));
+}
+
+static int
+vm_handle_wfi(struct vcpu *vcpu, struct vm_exit *vme, bool *retu)
+{
+
+ vcpu_lock(vcpu);
+
+ while (1) {
+ if (aplic_check_pending(vcpu->cookie))
+ break;
+
+ if (vcpu_should_yield(vcpu))
+ break;
+
+ vcpu_require_state_locked(vcpu, VCPU_SLEEPING);
+ /*
+ * XXX msleep_spin() cannot be interrupted by signals so
+ * wake up periodically to check pending signals.
+ */
+ msleep_spin(vcpu, &vcpu->mtx, "vmidle", hz / 1000);
+ vcpu_require_state_locked(vcpu, VCPU_FROZEN);
+ }
+ vcpu_unlock(vcpu);
+
+ *retu = false;
+
+ return (0);
+}
+
+static int
+vm_handle_paging(struct vcpu *vcpu, bool *retu)
+{
+ struct vm *vm;
+ struct vm_exit *vme;
+ struct vm_map *map;
+ uint64_t addr;
+ pmap_t pmap;
+ int ftype, rv;
+
+ vm = vcpu->vm;
+ vme = &vcpu->exitinfo;
+
+ pmap = vmspace_pmap(vm->vmspace);
+ addr = (vme->htval << 2) & ~(PAGE_SIZE - 1);
+
+ dprintf("%s: %lx\n", __func__, addr);
+
+ switch (vme->scause) {
+ case SCAUSE_STORE_GUEST_PAGE_FAULT:
+ ftype = VM_PROT_WRITE;
+ break;
+ case SCAUSE_FETCH_GUEST_PAGE_FAULT:
+ ftype = VM_PROT_EXECUTE;
+ break;
+ case SCAUSE_LOAD_GUEST_PAGE_FAULT:
+ ftype = VM_PROT_READ;
+ break;
+ default:
+ panic("unknown page trap: %lu", vme->scause);
+ }
+
+ /* The page exists, but the page table needs to be updated. */
+ if (pmap_fault(pmap, addr, ftype) != KERN_SUCCESS) {
+ //printf("%s: pmap_fault failed\n", __func__);
+ return (0);
+ }
+
+ map = &vm->vmspace->vm_map;
+ rv = vm_fault(map, addr, ftype, VM_FAULT_NORMAL, NULL);
+ if (rv != KERN_SUCCESS) {
+ printf("%s: vm_fault failed, addr %lx, ftype %d, err %d\n",
+ __func__, addr, ftype, rv);
+ return (EFAULT);
+ }
+
+ return (0);
+}
+
+int
+vm_run(struct vcpu *vcpu)
+{
+ struct vm_eventinfo evinfo;
+ struct vm_exit *vme;
+ struct vm *vm;
+ struct hypctx *hypctx;
+ pmap_t pmap;
+ int error;
+ int vcpuid;
+ int i;
+ bool retu;
+
+ vm = vcpu->vm;
+
+ dprintf("%s\n", __func__);
+
+ vcpuid = vcpu->vcpuid;
+
+ if (!CPU_ISSET(vcpuid, &vm->active_cpus))
+ return (EINVAL);
+
+ if (CPU_ISSET(vcpuid, &vm->suspended_cpus))
+ return (EINVAL);
+
+ pmap = vmspace_pmap(vm->vmspace);
+ vme = &vcpu->exitinfo;
+ evinfo.rptr = NULL;
+ evinfo.sptr = &vm->suspend;
+ evinfo.iptr = NULL;
+restart:
+ critical_enter();
+
+ restore_guest_fpustate(vcpu);
+
+ vcpu_require_state(vcpu, VCPU_RUNNING);
+ error = vmmops_run(vcpu->cookie, vcpu->nextpc, pmap, &evinfo);
+ vcpu_require_state(vcpu, VCPU_FROZEN);
+
+ save_guest_fpustate(vcpu);
+
+ critical_exit();
+
+ if (error == 0) {
+ retu = false;
+ switch (vme->exitcode) {
+ case VM_EXITCODE_INST_EMUL:
+ vcpu->nextpc = vme->pc + vme->inst_length;
+ error = vm_handle_inst_emul(vcpu, &retu);
+ break;
+ case VM_EXITCODE_WFI:
+ vcpu->nextpc = vme->pc + vme->inst_length;
+ error = vm_handle_wfi(vcpu, vme, &retu);
+ break;
+ case VM_EXITCODE_ECALL:
+ /* Handle in userland. */
+ vcpu->nextpc = vme->pc + vme->inst_length;
+ error = vmm_sbi_ecall(vcpu, &retu);
+ if (retu == true) {
+ hypctx = vcpu_get_cookie(vcpu);
+ for (i = 0; i < nitems(vme->u.ecall.args); i++)
+ vme->u.ecall.args[i] =
+ hypctx->guest_regs.hyp_a[i];
+ }
+ break;
+ case VM_EXITCODE_PAGING:
+ vcpu->nextpc = vme->pc;
+ error = vm_handle_paging(vcpu, &retu);
+ break;
+ default:
+ /* Handle in userland. */
+ vcpu->nextpc = vme->pc;
+ retu = true;
+ break;
+ }
+ }
+
+ if (error == 0 && retu == false)
+ goto restart;
+
+ return (error);
+}
Index: sys/riscv/vmm/vmm_aplic.h
===================================================================
--- /dev/null
+++ sys/riscv/vmm/vmm_aplic.h
@@ -0,0 +1,52 @@
+/*-
+ * Copyright (c) 2024 Ruslan Bukin <br@bsdpad.com>
+ *
+ * This software was developed by the University of Cambridge Computer
+ * Laboratory (Department of Computer Science and Technology) under Innovate
+ * UK project 105694, "Digital Security by Design (DSbD) Technology Platform
+ * Prototype".
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef _VMM_APLIC_H_
+#define _VMM_APLIC_H_
+
+struct hyp;
+struct hypctx;
+struct vm_aplic_descr;
+
+int aplic_attach_to_vm(struct hyp *hyp, struct vm_aplic_descr *descr);
+void aplic_detach_from_vm(struct hyp *hyp);
+int aplic_inject_irq(struct hyp *hyp, int vcpuid, uint32_t irqid, bool level);
+int aplic_inject_msi(struct hyp *hyp, uint64_t msg, uint64_t addr);
+void aplic_vminit(struct hyp *hyp);
+void aplic_vmcleanup(struct hyp *hyp);
+int aplic_check_pending(struct hypctx *hypctx);
+
+void aplic_cpuinit(struct hypctx *hypctx);
+void aplic_cpucleanup(struct hypctx *hypctx);
+void aplic_flush_hwstate(struct hypctx *hypctx);
+void aplic_sync_hwstate(struct hypctx *hypctx);
+int aplic_max_cpu_count(struct hyp *hyp);
+
+#endif /* !_VMM_APLIC_H_ */
Index: sys/riscv/vmm/vmm_aplic.c
===================================================================
--- /dev/null
+++ sys/riscv/vmm/vmm_aplic.c
@@ -0,0 +1,461 @@
+/*-
+ * Copyright (c) 2024 Ruslan Bukin <br@bsdpad.com>
+ *
+ * This software was developed by the University of Cambridge Computer
+ * Laboratory (Department of Computer Science and Technology) under Innovate
+ * UK project 105694, "Digital Security by Design (DSbD) Technology Platform
+ * Prototype".
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/types.h>
+#include <sys/errno.h>
+#include <sys/systm.h>
+#include <sys/bus.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/module.h>
+#include <sys/mutex.h>
+#include <sys/rman.h>
+#include <sys/smp.h>
+
+#include <riscv/vmm/riscv.h>
+#include <riscv/vmm/vmm_aplic.h>
+
+#include <machine/vmm_instruction_emul.h>
+#include <machine/vmm_dev.h>
+
+MALLOC_DEFINE(M_APLIC, "RISC-V VMM APLIC", "RISC-V AIA APLIC");
+
+#define APLIC_DOMAINCFG 0x0000
+#define DOMAINCFG_IE (1 << 8) /* Interrupt Enable. */
+#define DOMAINCFG_DM (1 << 2) /* Direct Mode. */
+#define DOMAINCFG_BE (1 << 0) /* Big-Endian. */
+#define APLIC_SOURCECFG(x) (0x0004 + ((x) - 1) * 4)
+#define SOURCECFG_D (1 << 10) /* D - Delegate. */
+/* If D == 0. */
+#define SOURCECFG_SM_S (0)
+#define SOURCECFG_SM_M (0x7 << SOURCECFG_SM_S)
+#define SOURCECFG_SM_INACTIVE (0) /* Not delegated. */
+#define SOURCECFG_SM_DETACHED (1)
+#define SOURCECFG_SM_RESERVED (2)
+#define SOURCECFG_SM_RESERVED1 (3)
+#define SOURCECFG_SM_EDGE1 (4) /* Rising edge. */
+#define SOURCECFG_SM_EDGE0 (5) /* Falling edge. */
+#define SOURCECFG_SM_LEVEL1 (6) /* High. */
+#define SOURCECFG_SM_LEVEL0 (7) /* Low. */
+/* If D == 1. */
+#define SOURCECFG_CHILD_INDEX_S (0)
+#define SOURCECFG_CHILD_INDEX_M (0x3ff << SOURCECFG_CHILD_INDEX_S)
+#define APLIC_SETIPNUM 0x1cdc
+#define APLIC_CLRIPNUM 0x1ddc
+#define APLIC_SETIENUM 0x1edc
+#define APLIC_CLRIENUM 0x1fdc
+#define APLIC_GENMSI 0x3000
+#define APLIC_TARGET(x) (0x3004 + ((x) - 1) * 4)
+#define APLIC_IDC(x) (0x4000 + (x) * 32)
+#define IDC_IDELIVERY(x) (APLIC_IDC(x) + 0x0)
+#define IDC_IFORCE(x) (APLIC_IDC(x) + 0x4)
+#define IDC_ITHRESHOLD(x) (APLIC_IDC(x) + 0x8)
+#define IDC_TOPI(x) (APLIC_IDC(x) + 0x18)
+#define IDC_CLAIMI(x) (APLIC_IDC(x) + 0x1C)
+#define CLAIMI_IRQ_S (16)
+#define CLAIMI_IRQ_M (0x3ff << CLAIMI_IRQ_S)
+#define CLAIMI_PRIO_S (0)
+#define CLAIMI_PRIO_M (0xff << CLAIMI_PRIO_S)
+
+struct aplic_irq {
+ uint32_t sourcecfg;
+ uint32_t state;
+#define APLIC_IRQ_STATE_PENDING (1 << 0)
+#define APLIC_IRQ_STATE_ENABLED (1 << 1)
+ uint32_t target;
+};
+
+struct aplic {
+ uint32_t mem_start;
+ uint32_t mem_end;
+ struct mtx mtx;
+ struct aplic_irq *irqs;
+ int nirqs;
+ uint32_t domaincfg;
+};
+
+static int
+aplic_handle_sourcecfg(struct aplic *aplic, int i, bool write, uint64_t *val)
+{
+ struct aplic_irq *irq;
+
+ irq = &aplic->irqs[i];
+ if (write)
+ irq->sourcecfg = *val;
+ else
+ *val = irq->sourcecfg;
+
+ return (0);
+}
+
+static int
+aplic_set_enabled(struct aplic *aplic, bool write, uint64_t *val, bool enabled)
+{
+ struct aplic_irq *irq;
+ int i;
+
+ if (!write) {
+ *val = 0;
+ return (0);
+ }
+
+ i = *val;
+ if (i <= 0 || i > aplic->nirqs)
+ return (-1);
+
+ irq = &aplic->irqs[i];
+
+ if (enabled)
+ irq->state |= APLIC_IRQ_STATE_ENABLED;
+ else
+ irq->state &= ~APLIC_IRQ_STATE_ENABLED;
+
+ return (0);
+}
+
+static int
+aplic_handle_target(struct aplic *aplic, int i, bool write, uint64_t *val)
+{
+
+ printf("%s: i %d\n", __func__, i);
+
+ return (0);
+}
+
+static int
+aplic_handle_idc_claimi(struct aplic *aplic, int cpu, bool write, uint64_t *val)
+{
+ struct aplic_irq *irq;
+ int i;
+
+ /* Writes to claimi are ignored. */
+ if (write)
+ return (-1);
+
+ for (i = 0; i < aplic->nirqs; i++) {
+ irq = &aplic->irqs[i];
+ if (irq->state & APLIC_IRQ_STATE_PENDING) {
+ *val = (i << CLAIMI_IRQ_S) | (0 << CLAIMI_PRIO_S);
+ irq->state &= ~APLIC_IRQ_STATE_PENDING;
+ return (0);
+ }
+ }
+
+ panic("claimi without pending");
+
+ return (0);
+}
+
+static int
+aplic_handle_idc(struct aplic *aplic, int cpu, int reg, bool write,
+ uint64_t *val)
+{
+ int error;
+
+ switch (reg + APLIC_IDC(0)) {
+ case IDC_IDELIVERY(0):
+ case IDC_IFORCE(0):
+ case IDC_ITHRESHOLD(0):
+ case IDC_TOPI(0):
+ error = 0;
+ break;
+ case IDC_CLAIMI(0):
+ error = aplic_handle_idc_claimi(aplic, cpu, write, val);
+ break;
+ default:
+ panic("unknown reg");
+ }
+
+ return (error);
+}
+
+static int
+aplic_mmio_access(struct aplic *aplic, uint64_t reg, bool write, uint64_t *val)
+{
+ int error;
+ int cpu;
+ int r;
+ int i;
+
+ if ((reg >= APLIC_SOURCECFG(1)) &&
+ (reg <= APLIC_SOURCECFG(aplic->nirqs))) {
+ i = ((reg - APLIC_SOURCECFG(1)) >> 2) + 1;
+ error = aplic_handle_sourcecfg(aplic, i, write, val);
+ return (error);
+ }
+
+ if ((reg >= APLIC_TARGET(1)) && (reg <= APLIC_TARGET(aplic->nirqs))) {
+ i = (reg - APLIC_TARGET(1)) >> 2;
+ error = aplic_handle_target(aplic, i, write, val);
+ return (error);
+ }
+
+ if ((reg >= APLIC_IDC(0)) && (reg < APLIC_IDC(mp_ncpus))) {
+ cpu = (reg - APLIC_IDC(0)) >> 5;
+ r = (reg - APLIC_IDC(0)) % 32;
+ error = aplic_handle_idc(aplic, cpu, r, write, val);
+ return (error);
+ }
+
+ switch (reg) {
+ case APLIC_DOMAINCFG:
+ aplic->domaincfg = *val & DOMAINCFG_IE;
+ break;
+ case APLIC_SETIENUM:
+ aplic_set_enabled(aplic, write, val, true);
+ break;
+ case APLIC_CLRIENUM:
+ aplic_set_enabled(aplic, write, val, false);
+ break;
+ default:
+ panic("unknown reg %lx", reg);
+ break;
+ };
+
+ return (0);
+}
+
+static int
+mem_read(struct vcpu *vcpu, uint64_t fault_ipa, uint64_t *rval, int size,
+ void *arg)
+{
+ struct hypctx *hypctx;
+ struct hyp *hyp;
+ struct aplic *aplic;
+ uint64_t reg;
+ uint64_t val;
+ int error;
+
+ hypctx = vcpu_get_cookie(vcpu);
+ hyp = hypctx->hyp;
+ aplic = hyp->aplic;
+
+ dprintf("%s: fault_ipa %lx size %d\n", __func__, fault_ipa, size);
+
+ if (fault_ipa < aplic->mem_start || fault_ipa + size > aplic->mem_end)
+ return (EINVAL);
+
+ reg = fault_ipa - aplic->mem_start;
+
+ error = aplic_mmio_access(aplic, reg, false, &val);
+ if (error == 0)
+ *rval = val;
+
+ return (error);
+}
+
+static int
+mem_write(struct vcpu *vcpu, uint64_t fault_ipa, uint64_t wval, int size,
+ void *arg)
+{
+ struct hypctx *hypctx;
+ struct hyp *hyp;
+ struct aplic *aplic;
+ uint64_t reg;
+ uint64_t val;
+ int error;
+
+ hypctx = vcpu_get_cookie(vcpu);
+ hyp = hypctx->hyp;
+ aplic = hyp->aplic;
+
+ dprintf("%s: fault_ipa %lx wval %lx size %d\n", __func__, fault_ipa,
+ wval, size);
+
+ if (fault_ipa < aplic->mem_start || fault_ipa + size > aplic->mem_end)
+ return (EINVAL);
+
+ reg = fault_ipa - aplic->mem_start;
+
+ val = wval;
+
+ error = aplic_mmio_access(aplic, reg, true, &val);
+
+ return (error);
+}
+
+void
+aplic_vminit(struct hyp *hyp)
+{
+ struct aplic *aplic;
+
+ hyp->aplic = malloc(sizeof(*hyp->aplic), M_APLIC,
+ M_WAITOK | M_ZERO);
+ aplic = hyp->aplic;
+
+ mtx_init(&aplic->mtx, "APLIC lock", NULL, MTX_SPIN);
+}
+
+void
+aplic_vmcleanup(struct hyp *hyp)
+{
+ struct aplic *aplic;
+
+ aplic = hyp->aplic;
+
+ mtx_destroy(&aplic->mtx);
+
+ free(hyp->aplic, M_APLIC);
+}
+
+int
+aplic_attach_to_vm(struct hyp *hyp, struct vm_aplic_descr *descr)
+{
+ struct aplic *aplic;
+ struct vm *vm;
+
+ vm = hyp->vm;
+
+ printf("%s\n", __func__);
+
+ vm_register_inst_handler(vm, descr->mem_start, descr->mem_size,
+ mem_read, mem_write);
+
+ aplic = hyp->aplic;
+ aplic->nirqs = 63;
+ aplic->mem_start = descr->mem_start;
+ aplic->mem_end = descr->mem_start + descr->mem_size;
+ aplic->irqs = malloc(sizeof(struct aplic_irq) * aplic->nirqs, M_APLIC,
+ M_WAITOK | M_ZERO);
+
+ hyp->aplic_attached = true;
+
+ return (0);
+}
+
+void
+aplic_detach_from_vm(struct hyp *hyp)
+{
+ struct aplic *aplic;
+
+ aplic = hyp->aplic;
+
+ printf("%s\n", __func__);
+
+ if (hyp->aplic_attached) {
+ hyp->aplic_attached = false;
+
+ free(aplic->irqs, M_APLIC);
+ }
+}
+
+int
+aplic_check_pending(struct hypctx *hypctx)
+{
+ struct aplic_irq *irq;
+ struct aplic *aplic;
+ struct hyp *hyp;
+ int i;
+
+ hyp = hypctx->hyp;
+ aplic = hyp->aplic;
+ if ((aplic->domaincfg & DOMAINCFG_IE) == 0)
+ return (0);
+
+ for (i = 0; i < aplic->nirqs; i++) {
+ irq = &aplic->irqs[i];
+ if (irq->state & APLIC_IRQ_STATE_PENDING)
+ return (1);
+ }
+
+ return (0);
+}
+
+int
+aplic_inject_irq(struct hyp *hyp, int vcpuid, uint32_t irqid, bool level)
+{
+ struct aplic_irq *irq;
+ struct aplic *aplic;
+
+ aplic = hyp->aplic;
+ if ((aplic->domaincfg & DOMAINCFG_IE) == 0)
+ return (0);
+
+ irq = &aplic->irqs[irqid];
+ if (irq->sourcecfg & SOURCECFG_D)
+ return (0);
+
+ switch (irq->sourcecfg & SOURCECFG_SM_M) {
+ case SOURCECFG_SM_EDGE1:
+ if (level)
+ irq->state |= APLIC_IRQ_STATE_PENDING;
+ else
+ irq->state &= ~APLIC_IRQ_STATE_PENDING;
+ break;
+ default:
+ break;
+ }
+
+ return (0);
+}
+
+int
+aplic_inject_msi(struct hyp *hyp, uint64_t msg, uint64_t addr)
+{
+
+ /* TODO. */
+
+ return (ENXIO);
+}
+
+void
+aplic_cpuinit(struct hypctx *hypctx)
+{
+
+}
+
+void
+aplic_cpucleanup(struct hypctx *hypctx)
+{
+
+}
+
+void
+aplic_flush_hwstate(struct hypctx *hypctx)
+{
+
+}
+
+void
+aplic_sync_hwstate(struct hypctx *hypctx)
+{
+
+}
+
+int
+aplic_max_cpu_count(struct hyp *hyp)
+{
+ int16_t max_count;
+
+ max_count = vm_get_maxcpus(hyp->vm);
+
+ return (max_count);
+}
Index: sys/riscv/vmm/vmm_dev.c
===================================================================
--- /dev/null
+++ sys/riscv/vmm/vmm_dev.c
@@ -0,0 +1,1052 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2011 NetApp, Inc.
+ * Copyright (c) 2015 Mihai Carabas <mihai.carabas@gmail.com>
+ * All rights reserved.
+ * Copyright (c) 2024 Ruslan Bukin <br@bsdpad.com>
+ *
+ * This software was developed by the University of Cambridge Computer
+ * Laboratory (Department of Computer Science and Technology) under Innovate
+ * UK project 105694, "Digital Security by Design (DSbD) Technology Platform
+ * Prototype".
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/param.h>
+#include <sys/kernel.h>
+#include <sys/jail.h>
+#include <sys/queue.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
+#include <sys/malloc.h>
+#include <sys/conf.h>
+#include <sys/sysctl.h>
+#include <sys/libkern.h>
+#include <sys/ioccom.h>
+#include <sys/mman.h>
+#include <sys/uio.h>
+#include <sys/proc.h>
+
+#include <vm/vm.h>
+#include <vm/pmap.h>
+#include <vm/vm_map.h>
+#include <vm/vm_object.h>
+
+#include <machine/machdep.h>
+#include <machine/vmparam.h>
+#include <machine/vmm.h>
+#include <machine/vmm_dev.h>
+
+#include "riscv.h"
+#include "vmm_stat.h"
+#include "vmm_aplic.h"
+
+struct devmem_softc {
+ int segid;
+ char *name;
+ struct cdev *cdev;
+ struct vmmdev_softc *sc;
+ SLIST_ENTRY(devmem_softc) link;
+};
+
+struct vmmdev_softc {
+ struct vm *vm; /* vm instance cookie */
+ struct cdev *cdev;
+ struct ucred *ucred;
+ SLIST_ENTRY(vmmdev_softc) link;
+ SLIST_HEAD(, devmem_softc) devmem;
+ int flags;
+};
+#define VSC_LINKED 0x01
+
+static SLIST_HEAD(, vmmdev_softc) head;
+
+static unsigned pr_allow_flag;
+static struct mtx vmmdev_mtx;
+MTX_SYSINIT(vmmdev_mtx, &vmmdev_mtx, "vmm device mutex", MTX_DEF);
+
+static MALLOC_DEFINE(M_VMMDEV, "vmmdev", "vmmdev");
+
+SYSCTL_DECL(_hw_vmm);
+
+static int vmm_priv_check(struct ucred *ucred);
+static int devmem_create_cdev(const char *vmname, int id, char *devmem);
+static void devmem_destroy(void *arg);
+
+static int
+vmm_priv_check(struct ucred *ucred)
+{
+
+ if (jailed(ucred) &&
+ !(ucred->cr_prison->pr_allow & pr_allow_flag))
+ return (EPERM);
+
+ return (0);
+}
+
+static int
+vcpu_lock_one(struct vcpu *vcpu)
+{
+ int error;
+
+ error = vcpu_set_state(vcpu, VCPU_FROZEN, true);
+ return (error);
+}
+
+static void
+vcpu_unlock_one(struct vcpu *vcpu)
+{
+ enum vcpu_state state;
+
+ state = vcpu_get_state(vcpu, NULL);
+ if (state != VCPU_FROZEN) {
+ panic("vcpu %s(%d) has invalid state %d",
+ vm_name(vcpu_vm(vcpu)), vcpu_vcpuid(vcpu), state);
+ }
+
+ vcpu_set_state(vcpu, VCPU_IDLE, false);
+}
+
+static int
+vcpu_lock_all(struct vmmdev_softc *sc)
+{
+ struct vcpu *vcpu;
+ int error;
+ uint16_t i, j, maxcpus;
+
+ error = 0;
+ vm_slock_vcpus(sc->vm);
+ maxcpus = vm_get_maxcpus(sc->vm);
+ for (i = 0; i < maxcpus; i++) {
+ vcpu = vm_vcpu(sc->vm, i);
+ if (vcpu == NULL)
+ continue;
+ error = vcpu_lock_one(vcpu);
+ if (error)
+ break;
+ }
+
+ if (error) {
+ for (j = 0; j < i; j++) {
+ vcpu = vm_vcpu(sc->vm, j);
+ if (vcpu == NULL)
+ continue;
+ vcpu_unlock_one(vcpu);
+ }
+ vm_unlock_vcpus(sc->vm);
+ }
+
+ return (error);
+}
+
+static void
+vcpu_unlock_all(struct vmmdev_softc *sc)
+{
+ struct vcpu *vcpu;
+ uint16_t i, maxcpus;
+
+ maxcpus = vm_get_maxcpus(sc->vm);
+ for (i = 0; i < maxcpus; i++) {
+ vcpu = vm_vcpu(sc->vm, i);
+ if (vcpu == NULL)
+ continue;
+ vcpu_unlock_one(vcpu);
+ }
+ vm_unlock_vcpus(sc->vm);
+}
+
+static struct vmmdev_softc *
+vmmdev_lookup(const char *name)
+{
+ struct vmmdev_softc *sc;
+
+#ifdef notyet /* XXX kernel is not compiled with invariants */
+ mtx_assert(&vmmdev_mtx, MA_OWNED);
+#endif
+
+ SLIST_FOREACH(sc, &head, link) {
+ if (strcmp(name, vm_name(sc->vm)) == 0)
+ break;
+ }
+
+ if (sc == NULL)
+ return (NULL);
+
+ if (cr_cansee(curthread->td_ucred, sc->ucred))
+ return (NULL);
+
+ return (sc);
+}
+
+static struct vmmdev_softc *
+vmmdev_lookup2(struct cdev *cdev)
+{
+
+ return (cdev->si_drv1);
+}
+
+static int
+vmmdev_rw(struct cdev *cdev, struct uio *uio, int flags)
+{
+ int error, off, c, prot;
+ vm_paddr_t gpa, maxaddr;
+ void *hpa, *cookie;
+ struct vmmdev_softc *sc;
+
+ error = vmm_priv_check(curthread->td_ucred);
+ if (error)
+ return (error);
+
+ sc = vmmdev_lookup2(cdev);
+ if (sc == NULL)
+ return (ENXIO);
+
+ /*
+ * Get a read lock on the guest memory map.
+ */
+ vm_slock_memsegs(sc->vm);
+
+ prot = (uio->uio_rw == UIO_WRITE ? VM_PROT_WRITE : VM_PROT_READ);
+ maxaddr = vmm_sysmem_maxaddr(sc->vm);
+ while (uio->uio_resid > 0 && error == 0) {
+ gpa = uio->uio_offset;
+ off = gpa & PAGE_MASK;
+ c = min(uio->uio_resid, PAGE_SIZE - off);
+
+ /*
+ * The VM has a hole in its physical memory map. If we want to
+ * use 'dd' to inspect memory beyond the hole we need to
+ * provide bogus data for memory that lies in the hole.
+ *
+ * Since this device does not support lseek(2), dd(1) will
+ * read(2) blocks of data to simulate the lseek(2).
+ */
+ hpa = vm_gpa_hold_global(sc->vm, gpa, c, prot, &cookie);
+ if (hpa == NULL) {
+ if (uio->uio_rw == UIO_READ && gpa < maxaddr)
+ error = uiomove(__DECONST(void *, zero_region),
+ c, uio);
+ else
+ error = EFAULT;
+ } else {
+ error = uiomove(hpa, c, uio);
+ vm_gpa_release(cookie);
+ }
+ }
+ vm_unlock_memsegs(sc->vm);
+ return (error);
+}
+
+static int
+get_memseg(struct vmmdev_softc *sc, struct vm_memseg *mseg)
+{
+ struct devmem_softc *dsc;
+ int error;
+ bool sysmem;
+
+ error = vm_get_memseg(sc->vm, mseg->segid, &mseg->len, &sysmem, NULL);
+ if (error || mseg->len == 0)
+ return (error);
+
+ if (!sysmem) {
+ SLIST_FOREACH(dsc, &sc->devmem, link) {
+ if (dsc->segid == mseg->segid)
+ break;
+ }
+ KASSERT(dsc != NULL, ("%s: devmem segment %d not found",
+ __func__, mseg->segid));
+ error = copystr(dsc->name, mseg->name, sizeof(mseg->name),
+ NULL);
+ } else {
+ bzero(mseg->name, sizeof(mseg->name));
+ }
+
+ return (error);
+}
+
+static int
+alloc_memseg(struct vmmdev_softc *sc, struct vm_memseg *mseg)
+{
+ char *name;
+ int error;
+ bool sysmem;
+
+ error = 0;
+ name = NULL;
+ sysmem = true;
+
+ /*
+ * The allocation is lengthened by 1 to hold a terminating NUL. It'll
+ * by stripped off when devfs processes the full string.
+ */
+ if (VM_MEMSEG_NAME(mseg)) {
+ sysmem = false;
+ name = malloc(sizeof(mseg->name), M_VMMDEV, M_WAITOK);
+ error = copystr(mseg->name, name, sizeof(mseg->name), NULL);
+ if (error)
+ goto done;
+ }
+
+ error = vm_alloc_memseg(sc->vm, mseg->segid, mseg->len, sysmem);
+ if (error)
+ goto done;
+
+ if (VM_MEMSEG_NAME(mseg)) {
+ error = devmem_create_cdev(vm_name(sc->vm), mseg->segid, name);
+ if (error)
+ vm_free_memseg(sc->vm, mseg->segid);
+ else
+ name = NULL; /* freed when 'cdev' is destroyed */
+ }
+done:
+ free(name, M_VMMDEV);
+ return (error);
+}
+
+static int
+vm_get_register_set(struct vcpu *vcpu, unsigned int count, int *regnum,
+ uint64_t *regval)
+{
+ int error, i;
+
+ error = 0;
+ for (i = 0; i < count; i++) {
+ error = vm_get_register(vcpu, regnum[i], ®val[i]);
+ if (error)
+ break;
+ }
+ return (error);
+}
+
+static int
+vm_set_register_set(struct vcpu *vcpu, unsigned int count, int *regnum,
+ uint64_t *regval)
+{
+ int error, i;
+
+ error = 0;
+ for (i = 0; i < count; i++) {
+ error = vm_set_register(vcpu, regnum[i], regval[i]);
+ if (error)
+ break;
+ }
+ return (error);
+}
+
+static int
+vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag,
+ struct thread *td)
+{
+ int error, vcpuid, size;
+ cpuset_t *cpuset;
+ struct vmmdev_softc *sc;
+ struct vcpu *vcpu;
+ struct vm_register *vmreg;
+ struct vm_register_set *vmregset;
+ struct vm_run *vmrun;
+ struct vm_aplic_descr *aplic;
+ struct vm_cpuset *vm_cpuset;
+ struct vm_irq *vi;
+ struct vm_capability *vmcap;
+ struct vm_stats *vmstats;
+ struct vm_stat_desc *statdesc;
+ struct vm_suspend *vmsuspend;
+ struct vm_exception *vmexc;
+ struct vm_gla2gpa *gg;
+ struct vm_memmap *mm;
+ struct vm_munmap *mu;
+ struct vm_msi *vmsi;
+ struct vm_cpu_topology *topology;
+ uint64_t *regvals;
+ int *regnums;
+ enum { NONE, SINGLE, ALL } vcpus_locked;
+ bool memsegs_locked;
+
+ dprintf("%s: cmd %ld\n", __func__, cmd);
+
+ error = vmm_priv_check(curthread->td_ucred);
+ if (error)
+ return (error);
+
+ sc = vmmdev_lookup2(cdev);
+ if (sc == NULL)
+ return (ENXIO);
+
+ error = 0;
+ vcpuid = -1;
+ vcpu = NULL;
+ vcpus_locked = NONE;
+ memsegs_locked = false;
+
+ /*
+ * Some VMM ioctls can operate only on vcpus that are not running.
+ */
+ switch (cmd) {
+ case VM_RUN:
+ case VM_GET_REGISTER:
+ case VM_SET_REGISTER:
+ case VM_GET_REGISTER_SET:
+ case VM_SET_REGISTER_SET:
+ case VM_INJECT_EXCEPTION:
+ case VM_GET_CAPABILITY:
+ case VM_SET_CAPABILITY:
+ case VM_GLA2GPA_NOFAULT:
+ case VM_ACTIVATE_CPU:
+ /*
+ * ioctls that can operate only on vcpus that are not running.
+ */
+ vcpuid = *(int *)data;
+ vcpu = vm_alloc_vcpu(sc->vm, vcpuid);
+ if (vcpu == NULL) {
+ error = EINVAL;
+ goto done;
+ }
+ error = vcpu_lock_one(vcpu);
+ if (error)
+ goto done;
+ vcpus_locked = SINGLE;
+ break;
+
+ case VM_ALLOC_MEMSEG:
+ case VM_MMAP_MEMSEG:
+ case VM_MUNMAP_MEMSEG:
+ case VM_REINIT:
+ case VM_ATTACH_APLIC:
+ /*
+ * ioctls that modify the memory map must lock memory
+ * segments exclusively.
+ */
+ vm_xlock_memsegs(sc->vm);
+ memsegs_locked = true;
+
+ /*
+ * ioctls that operate on the entire virtual machine must
+ * prevent all vcpus from running.
+ */
+ error = vcpu_lock_all(sc);
+ if (error)
+ goto done;
+ vcpus_locked = ALL;
+ break;
+ case VM_GET_MEMSEG:
+ case VM_MMAP_GETNEXT:
+ /*
+ * Lock the memory map while it is being inspected.
+ */
+ vm_slock_memsegs(sc->vm);
+ memsegs_locked = true;
+ break;
+
+ case VM_STATS:
+ /*
+ * These do not need the vCPU locked but do operate on
+ * a specific vCPU.
+ */
+ vcpuid = *(int *)data;
+ vcpu = vm_alloc_vcpu(sc->vm, vcpuid);
+ if (vcpu == NULL) {
+ error = EINVAL;
+ goto done;
+ }
+ break;
+
+ case VM_SUSPEND_CPU:
+ case VM_RESUME_CPU:
+ /*
+ * These can either operate on all CPUs via a vcpuid of
+ * -1 or on a specific vCPU.
+ */
+ vcpuid = *(int *)data;
+ if (vcpuid == -1)
+ break;
+ vcpu = vm_alloc_vcpu(sc->vm, vcpuid);
+ if (vcpu == NULL) {
+ error = EINVAL;
+ goto done;
+ }
+ break;
+
+ case VM_ASSERT_IRQ:
+ vi = (struct vm_irq *)data;
+ error = vm_assert_irq(sc->vm, vi->irq);
+ break;
+ case VM_DEASSERT_IRQ:
+ vi = (struct vm_irq *)data;
+ error = vm_deassert_irq(sc->vm, vi->irq);
+ break;
+ default:
+ break;
+ }
+
+ switch (cmd) {
+ case VM_RUN: {
+ struct vm_exit *vme;
+
+ vmrun = (struct vm_run *)data;
+ vme = vm_exitinfo(vcpu);
+
+ error = vm_run(vcpu);
+ if (error != 0)
+ break;
+
+ error = copyout(vme, vmrun->vm_exit, sizeof(*vme));
+ if (error != 0)
+ break;
+ break;
+ }
+ case VM_SUSPEND:
+ vmsuspend = (struct vm_suspend *)data;
+ error = vm_suspend(sc->vm, vmsuspend->how);
+ break;
+ case VM_REINIT:
+ error = vm_reinit(sc->vm);
+ break;
+ case VM_STAT_DESC: {
+ statdesc = (struct vm_stat_desc *)data;
+ error = vmm_stat_desc_copy(statdesc->index,
+ statdesc->desc, sizeof(statdesc->desc));
+ break;
+ }
+ case VM_STATS: {
+ CTASSERT(MAX_VM_STATS >= MAX_VMM_STAT_ELEMS);
+ vmstats = (struct vm_stats *)data;
+ getmicrotime(&vmstats->tv);
+ error = vmm_stat_copy(vcpu, vmstats->index,
+ nitems(vmstats->statbuf),
+ &vmstats->num_entries, vmstats->statbuf);
+ break;
+ }
+ case VM_MMAP_GETNEXT:
+ mm = (struct vm_memmap *)data;
+ error = vm_mmap_getnext(sc->vm, &mm->gpa, &mm->segid,
+ &mm->segoff, &mm->len, &mm->prot, &mm->flags);
+ break;
+ case VM_MMAP_MEMSEG:
+ mm = (struct vm_memmap *)data;
+ error = vm_mmap_memseg(sc->vm, mm->gpa, mm->segid, mm->segoff,
+ mm->len, mm->prot, mm->flags);
+ break;
+ case VM_MUNMAP_MEMSEG:
+ mu = (struct vm_munmap *)data;
+ error = vm_munmap_memseg(sc->vm, mu->gpa, mu->len);
+ break;
+ case VM_ALLOC_MEMSEG:
+ error = alloc_memseg(sc, (struct vm_memseg *)data);
+ break;
+ case VM_GET_MEMSEG:
+ error = get_memseg(sc, (struct vm_memseg *)data);
+ break;
+ case VM_GET_REGISTER:
+ vmreg = (struct vm_register *)data;
+ error = vm_get_register(vcpu, vmreg->regnum, &vmreg->regval);
+ break;
+ case VM_SET_REGISTER:
+ vmreg = (struct vm_register *)data;
+ error = vm_set_register(vcpu, vmreg->regnum, vmreg->regval);
+ break;
+ case VM_GET_REGISTER_SET:
+ vmregset = (struct vm_register_set *)data;
+ if (vmregset->count > VM_REG_LAST) {
+ error = EINVAL;
+ break;
+ }
+ regvals = malloc(sizeof(regvals[0]) * vmregset->count, M_VMMDEV,
+ M_WAITOK);
+ regnums = malloc(sizeof(regnums[0]) * vmregset->count, M_VMMDEV,
+ M_WAITOK);
+ error = copyin(vmregset->regnums, regnums, sizeof(regnums[0]) *
+ vmregset->count);
+ if (error == 0)
+ error = vm_get_register_set(vcpu, vmregset->count,
+ regnums, regvals);
+ if (error == 0)
+ error = copyout(regvals, vmregset->regvals,
+ sizeof(regvals[0]) * vmregset->count);
+ free(regvals, M_VMMDEV);
+ free(regnums, M_VMMDEV);
+ break;
+ case VM_SET_REGISTER_SET:
+ vmregset = (struct vm_register_set *)data;
+ if (vmregset->count > VM_REG_LAST) {
+ error = EINVAL;
+ break;
+ }
+ regvals = malloc(sizeof(regvals[0]) * vmregset->count, M_VMMDEV,
+ M_WAITOK);
+ regnums = malloc(sizeof(regnums[0]) * vmregset->count, M_VMMDEV,
+ M_WAITOK);
+ error = copyin(vmregset->regnums, regnums, sizeof(regnums[0]) *
+ vmregset->count);
+ if (error == 0)
+ error = copyin(vmregset->regvals, regvals,
+ sizeof(regvals[0]) * vmregset->count);
+ if (error == 0)
+ error = vm_set_register_set(vcpu, vmregset->count,
+ regnums, regvals);
+ free(regvals, M_VMMDEV);
+ free(regnums, M_VMMDEV);
+ break;
+ case VM_GET_CAPABILITY:
+ vmcap = (struct vm_capability *)data;
+ error = vm_get_capability(vcpu,
+ vmcap->captype,
+ &vmcap->capval);
+ break;
+ case VM_SET_CAPABILITY:
+ vmcap = (struct vm_capability *)data;
+ error = vm_set_capability(vcpu,
+ vmcap->captype,
+ vmcap->capval);
+ break;
+ case VM_INJECT_EXCEPTION:
+ vmexc = (struct vm_exception *)data;
+ error = vm_inject_exception(vcpu, vmexc->scause);
+ break;
+ case VM_GLA2GPA_NOFAULT:
+ gg = (struct vm_gla2gpa *)data;
+ error = vm_gla2gpa_nofault(vcpu, &gg->paging, gg->gla,
+ gg->prot, &gg->gpa, &gg->fault);
+ KASSERT(error == 0 || error == EFAULT,
+ ("%s: vm_gla2gpa unknown error %d", __func__, error));
+ break;
+ case VM_ACTIVATE_CPU:
+ error = vm_activate_cpu(vcpu);
+ break;
+ case VM_GET_CPUS:
+ error = 0;
+ vm_cpuset = (struct vm_cpuset *)data;
+ size = vm_cpuset->cpusetsize;
+ if (size < sizeof(cpuset_t) || size > CPU_MAXSIZE / NBBY) {
+ error = ERANGE;
+ break;
+ }
+ cpuset = malloc(size, M_TEMP, M_WAITOK | M_ZERO);
+ if (vm_cpuset->which == VM_ACTIVE_CPUS)
+ *cpuset = vm_active_cpus(sc->vm);
+ else if (vm_cpuset->which == VM_SUSPENDED_CPUS)
+ *cpuset = vm_suspended_cpus(sc->vm);
+ else if (vm_cpuset->which == VM_DEBUG_CPUS)
+ *cpuset = vm_debug_cpus(sc->vm);
+ else
+ error = EINVAL;
+ if (error == 0)
+ error = copyout(cpuset, vm_cpuset->cpus, size);
+ free(cpuset, M_TEMP);
+ break;
+ case VM_SUSPEND_CPU:
+ error = vm_suspend_cpu(sc->vm, vcpu);
+ break;
+ case VM_RESUME_CPU:
+ error = vm_resume_cpu(sc->vm, vcpu);
+ break;
+ case VM_ATTACH_APLIC:
+ aplic = (struct vm_aplic_descr *)data;
+ error = vm_attach_aplic(sc->vm, aplic);
+ break;
+ case VM_RAISE_MSI:
+ vmsi = (struct vm_msi *)data;
+ error = vm_raise_msi(sc->vm, vmsi->msg, vmsi->addr, vmsi->bus,
+ vmsi->slot, vmsi->func);
+ break;
+ case VM_SET_TOPOLOGY:
+ topology = (struct vm_cpu_topology *)data;
+ error = vm_set_topology(sc->vm, topology->sockets,
+ topology->cores, topology->threads, topology->maxcpus);
+ break;
+ case VM_GET_TOPOLOGY:
+ topology = (struct vm_cpu_topology *)data;
+ vm_get_topology(sc->vm, &topology->sockets, &topology->cores,
+ &topology->threads, &topology->maxcpus);
+ error = 0;
+ break;
+ default:
+ error = ENOTTY;
+ break;
+ }
+
+done:
+ if (vcpus_locked == SINGLE)
+ vcpu_unlock_one(vcpu);
+ else if (vcpus_locked == ALL)
+ vcpu_unlock_all(sc);
+ if (memsegs_locked)
+ vm_unlock_memsegs(sc->vm);
+
+ /*
+ * Make sure that no handler returns a kernel-internal
+ * error value to userspace.
+ */
+ KASSERT(error == ERESTART || error >= 0,
+ ("vmmdev_ioctl: invalid error return %d", error));
+ return (error);
+}
+
+static int
+vmmdev_mmap_single(struct cdev *cdev, vm_ooffset_t *offset, vm_size_t mapsize,
+ struct vm_object **objp, int nprot)
+{
+ struct vmmdev_softc *sc;
+ vm_paddr_t gpa;
+ size_t len;
+ vm_ooffset_t segoff, first, last;
+ int error, found, segid;
+ bool sysmem;
+
+ error = vmm_priv_check(curthread->td_ucred);
+ if (error)
+ return (error);
+
+ first = *offset;
+ last = first + mapsize;
+ if ((nprot & PROT_EXEC) || first < 0 || first >= last)
+ return (EINVAL);
+
+ sc = vmmdev_lookup2(cdev);
+ if (sc == NULL) {
+ /* virtual machine is in the process of being created */
+ return (EINVAL);
+ }
+
+ /*
+ * Get a read lock on the guest memory map.
+ */
+ vm_slock_memsegs(sc->vm);
+
+ gpa = 0;
+ found = 0;
+ while (!found) {
+ error = vm_mmap_getnext(sc->vm, &gpa, &segid, &segoff, &len,
+ NULL, NULL);
+ if (error)
+ break;
+
+ if (first >= gpa && last <= gpa + len)
+ found = 1;
+ else
+ gpa += len;
+ }
+
+ if (found) {
+ error = vm_get_memseg(sc->vm, segid, &len, &sysmem, objp);
+ KASSERT(error == 0 && *objp != NULL,
+ ("%s: invalid memory segment %d", __func__, segid));
+ if (sysmem) {
+ vm_object_reference(*objp);
+ *offset = segoff + (first - gpa);
+ } else {
+ error = EINVAL;
+ }
+ }
+ vm_unlock_memsegs(sc->vm);
+ return (error);
+}
+
+static void
+vmmdev_destroy(void *arg)
+{
+ struct vmmdev_softc *sc = arg;
+ struct devmem_softc *dsc;
+ int error __diagused;
+
+ error = vcpu_lock_all(sc);
+ KASSERT(error == 0, ("%s: error %d freezing vcpus", __func__, error));
+ vm_unlock_vcpus(sc->vm);
+
+ while ((dsc = SLIST_FIRST(&sc->devmem)) != NULL) {
+ KASSERT(dsc->cdev == NULL, ("%s: devmem not free", __func__));
+ SLIST_REMOVE_HEAD(&sc->devmem, link);
+ free(dsc->name, M_VMMDEV);
+ free(dsc, M_VMMDEV);
+ }
+
+ if (sc->cdev != NULL)
+ destroy_dev(sc->cdev);
+
+ if (sc->vm != NULL)
+ vm_destroy(sc->vm);
+
+ if (sc->ucred != NULL)
+ crfree(sc->ucred);
+
+ if ((sc->flags & VSC_LINKED) != 0) {
+ mtx_lock(&vmmdev_mtx);
+ SLIST_REMOVE(&head, sc, vmmdev_softc, link);
+ mtx_unlock(&vmmdev_mtx);
+ }
+
+ free(sc, M_VMMDEV);
+}
+
+static int
+sysctl_vmm_destroy(SYSCTL_HANDLER_ARGS)
+{
+ struct devmem_softc *dsc;
+ struct vmmdev_softc *sc;
+ struct cdev *cdev;
+ char *buf;
+ int error, buflen;
+
+ error = vmm_priv_check(req->td->td_ucred);
+ if (error)
+ return (error);
+
+ buflen = VM_MAX_NAMELEN + 1;
+ buf = malloc(buflen, M_VMMDEV, M_WAITOK | M_ZERO);
+ strlcpy(buf, "beavis", buflen);
+ error = sysctl_handle_string(oidp, buf, buflen, req);
+ if (error != 0 || req->newptr == NULL)
+ goto out;
+
+ mtx_lock(&vmmdev_mtx);
+ sc = vmmdev_lookup(buf);
+ if (sc == NULL || sc->cdev == NULL) {
+ mtx_unlock(&vmmdev_mtx);
+ error = EINVAL;
+ goto out;
+ }
+
+ /*
+ * Setting 'sc->cdev' to NULL is used to indicate that the VM
+ * is scheduled for destruction.
+ */
+ cdev = sc->cdev;
+ sc->cdev = NULL;
+ mtx_unlock(&vmmdev_mtx);
+
+ /*
+ * Destroy all cdevs:
+ *
+ * - any new operations on the 'cdev' will return an error (ENXIO).
+ *
+ * - the 'devmem' cdevs are destroyed before the virtual machine 'cdev'
+ */
+ SLIST_FOREACH(dsc, &sc->devmem, link) {
+ KASSERT(dsc->cdev != NULL, ("devmem cdev already destroyed"));
+ destroy_dev(dsc->cdev);
+ devmem_destroy(dsc);
+ }
+ destroy_dev(cdev);
+ vmmdev_destroy(sc);
+ error = 0;
+
+out:
+ free(buf, M_VMMDEV);
+ return (error);
+}
+SYSCTL_PROC(_hw_vmm, OID_AUTO, destroy, CTLTYPE_STRING | CTLFLAG_RW |
+ CTLFLAG_PRISON | CTLFLAG_MPSAFE, NULL, 0, sysctl_vmm_destroy, "A", NULL);
+
+static struct cdevsw vmmdevsw = {
+ .d_name = "vmmdev",
+ .d_version = D_VERSION,
+ .d_ioctl = vmmdev_ioctl,
+ .d_mmap_single = vmmdev_mmap_single,
+ .d_read = vmmdev_rw,
+ .d_write = vmmdev_rw,
+};
+
+static int
+sysctl_vmm_create(SYSCTL_HANDLER_ARGS)
+{
+ struct vm *vm;
+ struct cdev *cdev;
+ struct vmmdev_softc *sc, *sc2;
+ char *buf;
+ int error, buflen;
+
+ error = vmm_priv_check(req->td->td_ucred);
+ if (error)
+ return (error);
+
+ buflen = VM_MAX_NAMELEN + 1;
+ buf = malloc(buflen, M_VMMDEV, M_WAITOK | M_ZERO);
+ strlcpy(buf, "beavis", buflen);
+ error = sysctl_handle_string(oidp, buf, buflen, req);
+ if (error != 0 || req->newptr == NULL)
+ goto out;
+
+ mtx_lock(&vmmdev_mtx);
+ sc = vmmdev_lookup(buf);
+ mtx_unlock(&vmmdev_mtx);
+ if (sc != NULL) {
+ error = EEXIST;
+ goto out;
+ }
+
+ error = vm_create(buf, &vm);
+ if (error != 0)
+ goto out;
+
+ sc = malloc(sizeof(struct vmmdev_softc), M_VMMDEV, M_WAITOK | M_ZERO);
+ sc->ucred = crhold(curthread->td_ucred);
+ sc->vm = vm;
+ SLIST_INIT(&sc->devmem);
+
+ /*
+ * Lookup the name again just in case somebody sneaked in when we
+ * dropped the lock.
+ */
+ mtx_lock(&vmmdev_mtx);
+ sc2 = vmmdev_lookup(buf);
+ if (sc2 == NULL) {
+ SLIST_INSERT_HEAD(&head, sc, link);
+ sc->flags |= VSC_LINKED;
+ }
+ mtx_unlock(&vmmdev_mtx);
+
+ if (sc2 != NULL) {
+ vmmdev_destroy(sc);
+ error = EEXIST;
+ goto out;
+ }
+
+ error = make_dev_p(MAKEDEV_CHECKNAME, &cdev, &vmmdevsw, sc->ucred,
+ UID_ROOT, GID_WHEEL, 0600, "vmm/%s", buf);
+ if (error != 0) {
+ vmmdev_destroy(sc);
+ goto out;
+ }
+
+ mtx_lock(&vmmdev_mtx);
+ sc->cdev = cdev;
+ sc->cdev->si_drv1 = sc;
+ mtx_unlock(&vmmdev_mtx);
+
+out:
+ free(buf, M_VMMDEV);
+ return (error);
+}
+SYSCTL_PROC(_hw_vmm, OID_AUTO, create, CTLTYPE_STRING | CTLFLAG_RW |
+ CTLFLAG_PRISON | CTLFLAG_MPSAFE, NULL, 0, sysctl_vmm_create, "A", NULL);
+
+void
+vmmdev_init(void)
+{
+ pr_allow_flag = prison_add_allow(NULL, "vmm", NULL,
+ "Allow use of vmm in a jail.");
+}
+
+int
+vmmdev_cleanup(void)
+{
+ int error;
+
+ if (SLIST_EMPTY(&head))
+ error = 0;
+ else
+ error = EBUSY;
+
+ return (error);
+}
+
+static int
+devmem_mmap_single(struct cdev *cdev, vm_ooffset_t *offset, vm_size_t len,
+ struct vm_object **objp, int nprot)
+{
+ struct devmem_softc *dsc;
+ vm_ooffset_t first, last;
+ size_t seglen;
+ int error;
+ bool sysmem;
+
+ dprintf("%s: offset %lx len %lx\n", __func__, *offset, len);
+
+ dsc = cdev->si_drv1;
+ if (dsc == NULL) {
+ /* 'cdev' has been created but is not ready for use */
+ return (ENXIO);
+ }
+
+ first = *offset;
+ last = *offset + len;
+ if ((nprot & PROT_EXEC) || first < 0 || first >= last)
+ return (EINVAL);
+
+ vm_slock_memsegs(dsc->sc->vm);
+
+ error = vm_get_memseg(dsc->sc->vm, dsc->segid, &seglen, &sysmem, objp);
+ KASSERT(error == 0 && !sysmem && *objp != NULL,
+ ("%s: invalid devmem segment %d", __func__, dsc->segid));
+
+ if (seglen >= last)
+ vm_object_reference(*objp);
+ else
+ error = 0;
+ vm_unlock_memsegs(dsc->sc->vm);
+ return (error);
+}
+
+static struct cdevsw devmemsw = {
+ .d_name = "devmem",
+ .d_version = D_VERSION,
+ .d_mmap_single = devmem_mmap_single,
+};
+
+static int
+devmem_create_cdev(const char *vmname, int segid, char *devname)
+{
+ struct devmem_softc *dsc;
+ struct vmmdev_softc *sc;
+ struct cdev *cdev;
+ int error;
+
+ error = make_dev_p(MAKEDEV_CHECKNAME, &cdev, &devmemsw, NULL,
+ UID_ROOT, GID_WHEEL, 0600, "vmm.io/%s.%s", vmname, devname);
+ if (error)
+ return (error);
+
+ dsc = malloc(sizeof(struct devmem_softc), M_VMMDEV, M_WAITOK | M_ZERO);
+
+ mtx_lock(&vmmdev_mtx);
+ sc = vmmdev_lookup(vmname);
+ KASSERT(sc != NULL, ("%s: vm %s softc not found", __func__, vmname));
+ if (sc->cdev == NULL) {
+ /* virtual machine is being created or destroyed */
+ mtx_unlock(&vmmdev_mtx);
+ free(dsc, M_VMMDEV);
+ destroy_dev_sched_cb(cdev, NULL, 0);
+ return (ENODEV);
+ }
+
+ dsc->segid = segid;
+ dsc->name = devname;
+ dsc->cdev = cdev;
+ dsc->sc = sc;
+ SLIST_INSERT_HEAD(&sc->devmem, dsc, link);
+ mtx_unlock(&vmmdev_mtx);
+
+ /* The 'cdev' is ready for use after 'si_drv1' is initialized */
+ cdev->si_drv1 = dsc;
+ return (0);
+}
+
+static void
+devmem_destroy(void *arg)
+{
+ struct devmem_softc *dsc = arg;
+
+ KASSERT(dsc->cdev, ("%s: devmem cdev already destroyed", __func__));
+ dsc->cdev = NULL;
+ dsc->sc = NULL;
+}
Index: sys/riscv/vmm/vmm_instruction_emul.c
===================================================================
--- /dev/null
+++ sys/riscv/vmm/vmm_instruction_emul.c
@@ -0,0 +1,107 @@
+/*-
+ * Copyright (c) 2015 Mihai Carabas <mihai.carabas@gmail.com>
+ * Copyright (c) 2024 Ruslan Bukin <br@bsdpad.com>
+ *
+ * This software was developed by the University of Cambridge Computer
+ * Laboratory (Department of Computer Science and Technology) under Innovate
+ * UK project 105694, "Digital Security by Design (DSbD) Technology Platform
+ * Prototype".
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifdef _KERNEL
+#include <sys/param.h>
+#include <sys/pcpu.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+
+#include <vm/vm.h>
+
+#include <machine/machdep.h>
+#include <machine/vmm.h>
+#else
+#include <sys/types.h>
+#include <sys/errno.h>
+#include <sys/_iovec.h>
+
+#include <machine/vmm.h>
+
+#include <assert.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <vmmapi.h>
+#endif
+
+#include <machine/vmm_instruction_emul.h>
+
+int
+vmm_emulate_instruction(struct vcpu *vcpu, uint64_t gpa, struct vie *vie,
+ struct vm_guest_paging *paging __unused, mem_region_read_t memread,
+ mem_region_write_t memwrite, void *memarg)
+{
+ uint64_t val;
+ int error;
+
+ if (vie->dir == VM_DIR_READ) {
+ error = memread(vcpu, gpa, &val, vie->access_size, memarg);
+ if (error)
+ goto out;
+ if ((vie->sign_extend == 0) && (vie->access_size < 8))
+ val &= (1ul << (vie->access_size * 8)) - 1;
+ error = vm_set_register(vcpu, vie->reg, val);
+ } else {
+ error = vm_get_register(vcpu, vie->reg, &val);
+ if (error)
+ goto out;
+ /* Mask any unneeded bits from the register */
+ if (vie->access_size < 8)
+ val &= (1ul << (vie->access_size * 8)) - 1;
+ error = memwrite(vcpu, gpa, val, vie->access_size, memarg);
+ }
+
+out:
+ return (error);
+}
+
+int
+vmm_emulate_register(struct vcpu *vcpu, struct vre *vre, reg_read_t regread,
+ reg_write_t regwrite, void *regarg)
+{
+ uint64_t val;
+ int error;
+
+ if (vre->dir == VM_DIR_READ) {
+ error = regread(vcpu, &val, regarg);
+ if (error)
+ goto out;
+ error = vm_set_register(vcpu, vre->reg, val);
+ } else {
+ error = vm_get_register(vcpu, vre->reg, &val);
+ if (error)
+ goto out;
+ error = regwrite(vcpu, val, regarg);
+ }
+
+out:
+ return (error);
+}
Index: sys/riscv/vmm/vmm_ktr.h
===================================================================
--- /dev/null
+++ sys/riscv/vmm/vmm_ktr.h
@@ -0,0 +1,69 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2011 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef _VMM_KTR_H_
+#define _VMM_KTR_H_
+
+#include <sys/ktr.h>
+#include <sys/pcpu.h>
+
+#ifndef KTR_VMM
+#define KTR_VMM KTR_GEN
+#endif
+
+#define VCPU_CTR0(vm, vcpuid, format) \
+CTR2(KTR_VMM, "vm %s[%d]: " format, vm_name((vm)), (vcpuid))
+
+#define VCPU_CTR1(vm, vcpuid, format, p1) \
+CTR3(KTR_VMM, "vm %s[%d]: " format, vm_name((vm)), (vcpuid), (p1))
+
+#define VCPU_CTR2(vm, vcpuid, format, p1, p2) \
+CTR4(KTR_VMM, "vm %s[%d]: " format, vm_name((vm)), (vcpuid), (p1), (p2))
+
+#define VCPU_CTR3(vm, vcpuid, format, p1, p2, p3) \
+CTR5(KTR_VMM, "vm %s[%d]: " format, vm_name((vm)), (vcpuid), (p1), (p2), (p3))
+
+#define VCPU_CTR4(vm, vcpuid, format, p1, p2, p3, p4) \
+CTR6(KTR_VMM, "vm %s[%d]: " format, vm_name((vm)), (vcpuid), \
+ (p1), (p2), (p3), (p4))
+
+#define VM_CTR0(vm, format) \
+CTR1(KTR_VMM, "vm %s: " format, vm_name((vm)))
+
+#define VM_CTR1(vm, format, p1) \
+CTR2(KTR_VMM, "vm %s: " format, vm_name((vm)), (p1))
+
+#define VM_CTR2(vm, format, p1, p2) \
+CTR3(KTR_VMM, "vm %s: " format, vm_name((vm)), (p1), (p2))
+
+#define VM_CTR3(vm, format, p1, p2, p3) \
+CTR4(KTR_VMM, "vm %s: " format, vm_name((vm)), (p1), (p2), (p3))
+
+#define VM_CTR4(vm, format, p1, p2, p3, p4) \
+CTR5(KTR_VMM, "vm %s: " format, vm_name((vm)), (p1), (p2), (p3), (p4))
+#endif
Index: sys/riscv/vmm/vmm_riscv.c
===================================================================
--- /dev/null
+++ sys/riscv/vmm/vmm_riscv.c
@@ -0,0 +1,797 @@
+/*-
+ * Copyright (c) 2024 Ruslan Bukin <br@bsdpad.com>
+ *
+ * This software was developed by the University of Cambridge Computer
+ * Laboratory (Department of Computer Science and Technology) under Innovate
+ * UK project 105694, "Digital Security by Design (DSbD) Technology Platform
+ * Prototype".
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/smp.h>
+#include <sys/kernel.h>
+#include <sys/malloc.h>
+#include <sys/mman.h>
+#include <sys/pcpu.h>
+#include <sys/proc.h>
+#include <sys/rman.h>
+#include <sys/sysctl.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
+#include <sys/vmem.h>
+#include <sys/bus.h>
+
+#include <vm/vm.h>
+#include <vm/pmap.h>
+#include <vm/vm_extern.h>
+#include <vm/vm_map.h>
+#include <vm/vm_page.h>
+#include <vm/vm_param.h>
+
+#include <machine/md_var.h>
+#include <machine/riscvreg.h>
+#include <machine/vm.h>
+#include <machine/cpufunc.h>
+#include <machine/cpu.h>
+#include <machine/machdep.h>
+#include <machine/vmm.h>
+#include <machine/vmm_dev.h>
+#include <machine/atomic.h>
+#include <machine/pmap.h>
+#include <machine/intr.h>
+#include <machine/encoding.h>
+#include <machine/db_machdep.h>
+
+#include "riscv.h"
+#include "vmm_aplic.h"
+#include "vmm_stat.h"
+
+MALLOC_DEFINE(M_HYP, "RISC-V VMM HYP", "RISC-V VMM HYP");
+
+DPCPU_DEFINE_STATIC(struct hypctx *, vcpu);
+
+static int
+m_op(uint32_t insn, int match, int mask)
+{
+
+ if (((insn ^ match) & mask) == 0)
+ return (1);
+
+ return (0);
+}
+
+static inline void
+riscv_set_active_vcpu(struct hypctx *hypctx)
+{
+
+ DPCPU_SET(vcpu, hypctx);
+}
+
+struct hypctx *
+riscv_get_active_vcpu(void)
+{
+
+ return (DPCPU_GET(vcpu));
+}
+
+int
+vmmops_modinit(void)
+{
+
+ if (!has_hyp) {
+ printf("vmm: riscv hart doesn't support H-extension.\n");
+ return (ENXIO);
+ }
+
+ if (!has_sstc) {
+ printf("vmm: riscv hart doesn't support SSTC extension.\n");
+ return (ENXIO);
+ }
+
+ return (0);
+}
+
+int
+vmmops_modcleanup(void)
+{
+
+ return (0);
+}
+
+void *
+vmmops_init(struct vm *vm, pmap_t pmap)
+{
+ struct hyp *hyp;
+ vm_size_t size;
+
+ size = round_page(sizeof(struct hyp) +
+ sizeof(struct hypctx *) * vm_get_maxcpus(vm));
+ hyp = malloc_aligned(size, PAGE_SIZE, M_HYP, M_WAITOK | M_ZERO);
+ hyp->vm = vm;
+ hyp->aplic_attached = false;
+
+ aplic_vminit(hyp);
+
+ return (hyp);
+}
+
+static void
+vmmops_delegate(void)
+{
+ uint64_t hedeleg;
+ uint64_t hideleg;
+
+ hedeleg = (1UL << SCAUSE_INST_MISALIGNED);
+ hedeleg |= (1UL << SCAUSE_ILLEGAL_INSTRUCTION);
+ hedeleg |= (1UL << SCAUSE_BREAKPOINT);
+ hedeleg |= (1UL << SCAUSE_ECALL_USER);
+ hedeleg |= (1UL << SCAUSE_INST_PAGE_FAULT);
+ hedeleg |= (1UL << SCAUSE_LOAD_PAGE_FAULT);
+ hedeleg |= (1UL << SCAUSE_STORE_PAGE_FAULT);
+ csr_write(hedeleg, hedeleg);
+
+ hideleg = (1UL << IRQ_SOFTWARE_HYPERVISOR);
+ hideleg |= (1UL << IRQ_TIMER_HYPERVISOR);
+ hideleg |= (1UL << IRQ_EXTERNAL_HYPERVISOR);
+ csr_write(hideleg, hideleg);
+}
+
+static void
+vmmops_vcpu_restore_csrs(struct hypctx *hypctx)
+{
+ struct hypcsr *csrs;
+
+ csrs = &hypctx->guest_csrs;
+
+ csr_write(vsstatus, csrs->vsstatus);
+ csr_write(vsie, csrs->vsie);
+ csr_write(vstvec, csrs->vstvec);
+ csr_write(vsscratch, csrs->vsscratch);
+ csr_write(vsepc, csrs->vsepc);
+ csr_write(vscause, csrs->vscause);
+ csr_write(vstval, csrs->vstval);
+ csr_write(hvip, csrs->hvip);
+ csr_write(vsatp, csrs->vsatp);
+}
+
+static void
+vmmops_vcpu_save_csrs(struct hypctx *hypctx)
+{
+ struct hypcsr *csrs;
+
+ csrs = &hypctx->guest_csrs;
+
+ csrs->vsstatus = csr_read(vsstatus);
+ csrs->vsie = csr_read(vsie);
+ csrs->vstvec = csr_read(vstvec);
+ csrs->vsscratch = csr_read(vsscratch);
+ csrs->vsepc = csr_read(vsepc);
+ csrs->vscause = csr_read(vscause);
+ csrs->vstval = csr_read(vstval);
+ csrs->hvip = csr_read(hvip);
+ csrs->vsatp = csr_read(vsatp);
+}
+
+void *
+vmmops_vcpu_init(void *vmi, struct vcpu *vcpu1, int vcpuid)
+{
+ struct hypctx *hypctx;
+ struct hyp *hyp;
+ vm_size_t size;
+
+ hyp = vmi;
+
+ dprintf("%s: hyp %p\n", __func__, hyp);
+
+ size = round_page(sizeof(struct hypctx));
+ hypctx = malloc_aligned(size, PAGE_SIZE, M_HYP, M_WAITOK | M_ZERO);
+
+ KASSERT(vcpuid >= 0 && vcpuid < vm_get_maxcpus(hyp->vm),
+ ("%s: Invalid vcpuid %d", __func__, vcpuid));
+ hyp->ctx[vcpuid] = hypctx;
+
+ hypctx->hyp = hyp;
+ hypctx->vcpu = vcpu1;
+
+ /*
+ * TODO: set initial state for CSRs if needed.
+ */
+ vmmops_vcpu_restore_csrs(hypctx);
+
+ aplic_cpuinit(hypctx);
+
+ vmmops_delegate();
+
+ csr_write(henvcfg, HENVCFG_STCE);
+ csr_write(hie, HIE_VSEIE | HIE_SGEIE);
+
+ /*
+ * TODO: should we trap rdcycle / rdtime ?
+ */
+ csr_write(hcounteren, HCOUNTEREN_CY | HCOUNTEREN_TM);
+ hypctx->guest_scounteren = HCOUNTEREN_CY | HCOUNTEREN_TM;
+ hypctx->guest_regs.hyp_sstatus = SSTATUS_SPP | SSTATUS_SPIE;
+ hypctx->guest_regs.hyp_sstatus |= SSTATUS_FS_INITIAL;
+ hypctx->guest_regs.hyp_hstatus = HSTATUS_SPV | HSTATUS_VTW;
+
+ return (hypctx);
+}
+
+static int
+riscv_vmm_pinit(pmap_t pmap)
+{
+
+ dprintf("%s: pmap %p\n", __func__, pmap);
+
+ pmap_pinit_stage(pmap, PM_STAGE2);
+
+ return (1);
+}
+
+struct vmspace *
+vmmops_vmspace_alloc(vm_offset_t min, vm_offset_t max)
+{
+
+ return (vmspace_alloc(min, max, riscv_vmm_pinit));
+}
+
+void
+vmmops_vmspace_free(struct vmspace *vmspace)
+{
+
+ pmap_remove_pages(vmspace_pmap(vmspace));
+ vmspace_free(vmspace);
+}
+
+static void
+riscv_unpriv_read(struct hypctx *hypctx, uint64_t guest_addr, uint64_t *data)
+{
+ uint64_t old_hstatus;
+ uint64_t val;
+ uint64_t tmp;
+
+ old_hstatus = csr_swap(hstatus, hypctx->guest_regs.hyp_hstatus);
+
+ /*
+ * TODO: handle exceptions during unprivilege read.
+ */
+
+ __asm __volatile(".option push\n"
+ ".option norvc\n"
+ "hlvx.hu %[val], (%[addr])\n"
+ ".option pop\n"
+ : [val] "=&r" (val), [addr] "+&r" (guest_addr)
+ :: "memory");
+
+ if ((val & 0x3) == 0x3) {
+ guest_addr += 2;
+ __asm __volatile(".option push\n"
+ ".option norvc\n"
+ "hlvx.hu %[tmp], (%[addr])\n"
+ ".option pop\n"
+ : [tmp] "=&r" (tmp), [addr] "+&r" (guest_addr)
+ :: "memory");
+ val |= (tmp << 16);
+ }
+
+ csr_write(hstatus, old_hstatus);
+
+ *data = val;
+}
+
+static void
+riscv_gen_inst_emul_data(struct hypctx *hypctx, struct vm_exit *vme_ret)
+{
+ uint64_t guest_addr;
+ struct vie *vie;
+ uint64_t insn;
+ int reg_num;
+ int rs2, rd;
+
+ vme_ret->u.inst_emul.gpa = (vme_ret->htval << 2) |
+ (vme_ret->stval & 0x3);
+
+ guest_addr = vme_ret->sepc;
+
+ vie = &vme_ret->u.inst_emul.vie;
+ vie->dir = vme_ret->scause == SCAUSE_STORE_GUEST_PAGE_FAULT ? \
+ VM_DIR_WRITE : VM_DIR_READ;
+ vie->sign_extend = 1;
+
+ riscv_unpriv_read(hypctx, guest_addr, &insn);
+
+ if ((insn & 0x3) == 0x3) {
+ rs2 = (insn & RS2_MASK) >> RS2_SHIFT;
+ rd = (insn & RD_MASK) >> RD_SHIFT;
+
+ if (vie->dir == VM_DIR_WRITE) {
+ if (m_op(insn, MATCH_SB, MASK_SB))
+ vie->access_size = 1;
+ else if (m_op(insn, MATCH_SH, MASK_SH))
+ vie->access_size = 2;
+ else if (m_op(insn, MATCH_SW, MASK_SW))
+ vie->access_size = 4;
+ else if (m_op(insn, MATCH_SD, MASK_SD))
+ vie->access_size = 8;
+ else
+ panic("unknown store instr at %lx", guest_addr);
+ reg_num = rs2;
+ } else {
+ if (m_op(insn, MATCH_LB, MASK_LB))
+ vie->access_size = 1;
+ else if (m_op(insn, MATCH_LH, MASK_LH))
+ vie->access_size = 2;
+ else if (m_op(insn, MATCH_LW, MASK_LW))
+ vie->access_size = 4;
+ else if (m_op(insn, MATCH_LD, MASK_LD))
+ vie->access_size = 8;
+ else if (m_op(insn, MATCH_LBU, MASK_LBU)) {
+ vie->access_size = 1;
+ vie->sign_extend = 0;
+ } else if (m_op(insn, MATCH_LHU, MASK_LHU)) {
+ vie->access_size = 2;
+ vie->sign_extend = 0;
+ } else if (m_op(insn, MATCH_LWU, MASK_LWU)) {
+ vie->access_size = 4;
+ vie->sign_extend = 0;
+ } else
+ panic("unknown load instr at %lx", guest_addr);
+ reg_num = rd;
+ }
+ vme_ret->inst_length = 4;
+ } else {
+ rs2 = (insn >> 7) & 0x7;
+ rs2 += 0x8;
+ rd = (insn >> 2) & 0x7;
+ rd += 0x8;
+
+ if (vie->dir == VM_DIR_WRITE) {
+ if (m_op(insn, MATCH_C_SW, MASK_C_SW))
+ vie->access_size = 4;
+ else if (m_op(insn, MATCH_C_SD, MASK_C_SD))
+ vie->access_size = 8;
+ else
+ panic("unknown store instr at %lx", guest_addr);
+ } else {
+ if (m_op(insn, MATCH_C_LW, MASK_C_LW))
+ vie->access_size = 4;
+ else if (m_op(insn, MATCH_C_LD, MASK_C_LD))
+ vie->access_size = 8;
+ else
+ panic("unknown load instr at %lx", guest_addr);
+ }
+ reg_num = rd;
+ vme_ret->inst_length = 2;
+ }
+
+ dprintf("guest_addr %lx insn %lx, reg %d\n", guest_addr, insn, reg_num);
+
+ vie->reg = reg_num;
+}
+
+static bool
+riscv_handle_world_switch(struct hypctx *hypctx, struct vm_exit *vme,
+ pmap_t pmap)
+{
+ uint64_t insn;
+ uint64_t gpa;
+ bool handled;
+
+ handled = false;
+
+ if (vme->scause & SCAUSE_INTR) {
+ /*
+ * Host interrupt? Leave critical section to handle.
+ */
+ vmm_stat_incr(hypctx->vcpu, VMEXIT_IRQ, 1);
+ vme->exitcode = VM_EXITCODE_BOGUS;
+ vme->inst_length = 0;
+ return (handled);
+ }
+
+ switch (vme->scause) {
+ case SCAUSE_FETCH_GUEST_PAGE_FAULT:
+ case SCAUSE_LOAD_GUEST_PAGE_FAULT:
+ case SCAUSE_STORE_GUEST_PAGE_FAULT:
+ gpa = (vme->htval << 2) | (vme->stval & 0x3);
+ if (vm_mem_allocated(hypctx->vcpu, gpa)) {
+ vme->exitcode = VM_EXITCODE_PAGING;
+ vme->inst_length = 0;
+ vme->u.paging.gpa = gpa;
+ } else {
+ riscv_gen_inst_emul_data(hypctx, vme);
+ vme->exitcode = VM_EXITCODE_INST_EMUL;
+ }
+ break;
+ case SCAUSE_ILLEGAL_INSTRUCTION:
+ /*
+ * TODO: handle illegal instruction properly.
+ */
+ panic("%s: Illegal instr at %lx stval 0x%lx htval 0x%lx\n",
+ __func__, vme->sepc, vme->stval, vme->htval);
+ case SCAUSE_VIRTUAL_SUPERVISOR_ECALL:
+ vme->exitcode = VM_EXITCODE_ECALL;
+ handled = false;
+ break;
+ case SCAUSE_VIRTUAL_INSTRUCTION:
+ insn = vme->stval;
+ if (m_op(insn, MATCH_WFI, MASK_WFI))
+ vme->exitcode = VM_EXITCODE_WFI;
+ else
+ vme->exitcode = VM_EXITCODE_BOGUS;
+ handled = false;
+ break;
+ default:
+ printf("unknown scause %lx\n", vme->scause);
+ vmm_stat_incr(hypctx->vcpu, VMEXIT_UNHANDLED, 1);
+ vme->exitcode = VM_EXITCODE_BOGUS;
+ handled = false;
+ break;
+ }
+
+ return (handled);
+}
+
+int
+vmmops_gla2gpa(void *vcpui, struct vm_guest_paging *paging, uint64_t gla,
+ int prot, uint64_t *gpa, int *is_fault)
+{
+
+ /* Implement me. */
+
+ return (0);
+}
+
+static void
+riscv_sync_interrupts(struct hypctx *hypctx)
+{
+ int pending;
+
+ pending = aplic_check_pending(hypctx);
+
+ if (pending)
+ hypctx->guest_csrs.hvip |= HVIP_VSEIP;
+ else
+ hypctx->guest_csrs.hvip &= ~HVIP_VSEIP;
+
+ csr_write(hvip, hypctx->guest_csrs.hvip);
+}
+
+int
+vmmops_run(void *vcpui, register_t pc, pmap_t pmap, struct vm_eventinfo *evinfo)
+{
+ struct hypctx *hypctx;
+ struct vm_exit *vme;
+ struct vcpu *vcpu;
+ register_t val;
+ int handled;
+
+ hypctx = (struct hypctx *)vcpui;
+ vcpu = hypctx->vcpu;
+ vme = vm_exitinfo(vcpu);
+
+ hypctx->guest_regs.hyp_sepc = (uint64_t)pc;
+
+ if (hypctx->guest_regs.hyp_sstatus & SSTATUS_SPP)
+ hypctx->guest_regs.hyp_hstatus |= HSTATUS_SPVP;
+ else
+ hypctx->guest_regs.hyp_hstatus &= HSTATUS_SPVP;
+
+ hypctx->guest_regs.hyp_hstatus |= HSTATUS_SPV | HSTATUS_VTW;
+
+ csr_write(hgatp, pmap->pm_satp);
+
+ vmmops_vcpu_restore_csrs(hypctx);
+
+ for (;;) {
+ dprintf("%s: pc %lx\n", __func__, pc);
+
+ if (hypctx->has_exception) {
+ hypctx->has_exception = false;
+ /*
+ * TODO: implement exception injection.
+ */
+ }
+
+ val = intr_disable();
+
+ /* Check if the vcpu is suspended */
+ if (vcpu_suspended(evinfo)) {
+ intr_restore(val);
+ vm_exit_suspended(vcpu, pc);
+ break;
+ }
+
+ if (vcpu_debugged(vcpu)) {
+ intr_restore(val);
+ vm_exit_debug(vcpu, pc);
+ break;
+ }
+
+ /*
+ * TODO: What happens if a timer interrupt is asserted exactly
+ * here, but for the previous VM?
+ */
+ riscv_set_active_vcpu(hypctx);
+ aplic_flush_hwstate(hypctx);
+
+ riscv_sync_interrupts(hypctx);
+
+ dprintf("%s: Entering guest VM, vsatp %lx, ss %lx hs %lx\n",
+ __func__, csr_read(vsatp), hypctx->guest_regs.hyp_sstatus,
+ hypctx->guest_regs.hyp_hstatus);
+
+ vmm_switch(hypctx);
+
+ dprintf("%s: Leaving guest VM\n", __func__);
+
+ aplic_sync_hwstate(hypctx);
+
+ /*
+ * TODO: deactivate stage 2 pmap here if needed.
+ */
+
+ vme->scause = csr_read(scause);
+ vme->sepc = csr_read(sepc);
+ vme->stval = csr_read(stval);
+ vme->htval = csr_read(htval);
+ vme->htinst = csr_read(htinst);
+
+ intr_restore(val);
+
+ vmm_stat_incr(vcpu, VMEXIT_COUNT, 1);
+ vme->pc = hypctx->guest_regs.hyp_sepc;
+ vme->inst_length = INSN_SIZE;
+
+ handled = riscv_handle_world_switch(hypctx, vme, pmap);
+ if (handled == false)
+ /* Exit loop to emulate instruction. */
+ break;
+ else {
+ /* Resume guest execution from the next instruction. */
+ hypctx->guest_regs.hyp_sepc += vme->inst_length;
+ }
+ }
+
+ vmmops_vcpu_save_csrs(hypctx);
+
+ return (0);
+}
+
+static void
+riscv_pcpu_vmcleanup(void *arg)
+{
+ struct hyp *hyp;
+ int i, maxcpus;
+
+ hyp = arg;
+ maxcpus = vm_get_maxcpus(hyp->vm);
+ for (i = 0; i < maxcpus; i++) {
+ if (riscv_get_active_vcpu() == hyp->ctx[i]) {
+ riscv_set_active_vcpu(NULL);
+ break;
+ }
+ }
+}
+
+void
+vmmops_vcpu_cleanup(void *vcpui)
+{
+ struct hypctx *hypctx;
+
+ hypctx = vcpui;
+
+ dprintf("%s\n", __func__);
+
+ aplic_cpucleanup(hypctx);
+
+ free(hypctx, M_HYP);
+}
+
+void
+vmmops_cleanup(void *vmi)
+{
+ struct hyp *hyp;
+
+ hyp = vmi;
+
+ dprintf("%s\n", __func__);
+
+ aplic_vmcleanup(hyp);
+
+ smp_rendezvous(NULL, riscv_pcpu_vmcleanup, NULL, hyp);
+
+ free(hyp, M_HYP);
+}
+
+/*
+ * Return register value. Registers have different sizes and an explicit cast
+ * must be made to ensure proper conversion.
+ */
+static uint64_t *
+hypctx_regptr(struct hypctx *hypctx, int reg)
+{
+
+ switch (reg) {
+ case VM_REG_GUEST_RA:
+ return (&hypctx->guest_regs.hyp_ra);
+ case VM_REG_GUEST_SP:
+ return (&hypctx->guest_regs.hyp_sp);
+ case VM_REG_GUEST_GP:
+ return (&hypctx->guest_regs.hyp_gp);
+ case VM_REG_GUEST_TP:
+ return (&hypctx->guest_regs.hyp_tp);
+ case VM_REG_GUEST_T0:
+ return (&hypctx->guest_regs.hyp_t[0]);
+ case VM_REG_GUEST_T1:
+ return (&hypctx->guest_regs.hyp_t[1]);
+ case VM_REG_GUEST_T2:
+ return (&hypctx->guest_regs.hyp_t[2]);
+ case VM_REG_GUEST_S0:
+ return (&hypctx->guest_regs.hyp_s[0]);
+ case VM_REG_GUEST_S1:
+ return (&hypctx->guest_regs.hyp_s[1]);
+ case VM_REG_GUEST_A0:
+ return (&hypctx->guest_regs.hyp_a[0]);
+ case VM_REG_GUEST_A1:
+ return (&hypctx->guest_regs.hyp_a[1]);
+ case VM_REG_GUEST_A2:
+ return (&hypctx->guest_regs.hyp_a[2]);
+ case VM_REG_GUEST_A3:
+ return (&hypctx->guest_regs.hyp_a[3]);
+ case VM_REG_GUEST_A4:
+ return (&hypctx->guest_regs.hyp_a[4]);
+ case VM_REG_GUEST_A5:
+ return (&hypctx->guest_regs.hyp_a[5]);
+ case VM_REG_GUEST_A6:
+ return (&hypctx->guest_regs.hyp_a[6]);
+ case VM_REG_GUEST_A7:
+ return (&hypctx->guest_regs.hyp_a[7]);
+ case VM_REG_GUEST_S2:
+ return (&hypctx->guest_regs.hyp_s[2]);
+ case VM_REG_GUEST_S3:
+ return (&hypctx->guest_regs.hyp_s[3]);
+ case VM_REG_GUEST_S4:
+ return (&hypctx->guest_regs.hyp_s[4]);
+ case VM_REG_GUEST_S5:
+ return (&hypctx->guest_regs.hyp_s[5]);
+ case VM_REG_GUEST_S6:
+ return (&hypctx->guest_regs.hyp_s[6]);
+ case VM_REG_GUEST_S7:
+ return (&hypctx->guest_regs.hyp_s[7]);
+ case VM_REG_GUEST_S8:
+ return (&hypctx->guest_regs.hyp_s[8]);
+ case VM_REG_GUEST_S9:
+ return (&hypctx->guest_regs.hyp_s[9]);
+ case VM_REG_GUEST_S10:
+ return (&hypctx->guest_regs.hyp_s[10]);
+ case VM_REG_GUEST_S11:
+ return (&hypctx->guest_regs.hyp_s[11]);
+ case VM_REG_GUEST_T3:
+ return (&hypctx->guest_regs.hyp_t[3]);
+ case VM_REG_GUEST_T4:
+ return (&hypctx->guest_regs.hyp_t[4]);
+ case VM_REG_GUEST_T5:
+ return (&hypctx->guest_regs.hyp_t[5]);
+ case VM_REG_GUEST_T6:
+ return (&hypctx->guest_regs.hyp_t[6]);
+ case VM_REG_GUEST_SEPC:
+ return (&hypctx->guest_regs.hyp_sepc);
+ default:
+ break;
+ }
+
+ return (NULL);
+}
+
+int
+vmmops_getreg(void *vcpui, int reg, uint64_t *retval)
+{
+ uint64_t *regp;
+ int running, hostcpu;
+ struct hypctx *hypctx;
+
+ hypctx = vcpui;
+
+ running = vcpu_is_running(hypctx->vcpu, &hostcpu);
+ if (running && hostcpu != curcpu)
+ panic("%s: %s%d is running", __func__, vm_name(hypctx->hyp->vm),
+ vcpu_vcpuid(hypctx->vcpu));
+
+ regp = hypctx_regptr(hypctx, reg);
+ if (regp == NULL)
+ return (EINVAL);
+
+ *retval = *regp;
+
+ return (0);
+}
+
+int
+vmmops_setreg(void *vcpui, int reg, uint64_t val)
+{
+ uint64_t *regp;
+ struct hypctx *hypctx;
+ int running, hostcpu;
+
+ hypctx = vcpui;
+
+ running = vcpu_is_running(hypctx->vcpu, &hostcpu);
+ if (running && hostcpu != curcpu)
+ panic("%s: %s%d is running", __func__, vm_name(hypctx->hyp->vm),
+ vcpu_vcpuid(hypctx->vcpu));
+
+ regp = hypctx_regptr(hypctx, reg);
+ if (regp == NULL)
+ return (EINVAL);
+
+ *regp = val;
+
+ return (0);
+}
+
+int
+vmmops_exception(void *vcpui, uint64_t scause)
+{
+ struct hypctx *hypctx = vcpui;
+ int running, hostcpu;
+
+ running = vcpu_is_running(hypctx->vcpu, &hostcpu);
+ if (running && hostcpu != curcpu)
+ panic("%s: %s%d is running", __func__, vm_name(hypctx->hyp->vm),
+ vcpu_vcpuid(hypctx->vcpu));
+
+ /* TODO: set registers. */
+
+ hypctx->has_exception = true;
+
+ return (0);
+}
+
+int
+vmmops_getcap(void *vcpui, int num, int *retval)
+{
+ int ret;
+
+ ret = ENOENT;
+
+ switch (num) {
+ case VM_CAP_UNRESTRICTED_GUEST:
+ *retval = 1;
+ ret = 0;
+ break;
+ default:
+ break;
+ }
+
+ return (ret);
+}
+
+int
+vmmops_setcap(void *vcpui, int num, int val)
+{
+
+ return (ENOENT);
+}
Index: sys/riscv/vmm/vmm_sbi.c
===================================================================
--- /dev/null
+++ sys/riscv/vmm/vmm_sbi.c
@@ -0,0 +1,96 @@
+/*-
+ * Copyright (c) 2024 Ruslan Bukin <br@bsdpad.com>
+ *
+ * This software was developed by the University of Cambridge Computer
+ * Laboratory (Department of Computer Science and Technology) under Innovate
+ * UK project 105694, "Digital Security by Design (DSbD) Technology Platform
+ * Prototype".
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/param.h>
+#include <sys/kernel.h>
+#include <sys/jail.h>
+#include <sys/queue.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
+#include <sys/malloc.h>
+#include <sys/conf.h>
+#include <sys/sysctl.h>
+#include <sys/libkern.h>
+#include <sys/ioccom.h>
+#include <sys/mman.h>
+#include <sys/uio.h>
+#include <sys/proc.h>
+
+#include <vm/vm.h>
+#include <vm/pmap.h>
+#include <vm/vm_map.h>
+#include <vm/vm_object.h>
+
+#include <machine/machdep.h>
+#include <machine/vmparam.h>
+#include <machine/vmm.h>
+#include <machine/vmm_dev.h>
+#include <machine/md_var.h>
+#include <machine/sbi.h>
+
+#include "riscv.h"
+
+/*
+ * SBI is fully handled in userspace.
+ *
+ * TODO: We may need to handle the SBI IPI extension here in kernel.
+ * The same for the SBI TIME extension in case of no SSTC support in HW.
+ */
+
+int
+vmm_sbi_ecall(struct vcpu *vcpu, bool *retu)
+{
+ int sbi_extension_id __unused;
+ struct hypctx *hypctx;
+
+ hypctx = riscv_get_active_vcpu();
+ sbi_extension_id = hypctx->guest_regs.hyp_a[7];
+
+ dprintf("%s: args %lx %lx %lx %lx %lx %lx %lx %lx\n", __func__,
+ hypctx->guest_regs.hyp_a[0],
+ hypctx->guest_regs.hyp_a[1],
+ hypctx->guest_regs.hyp_a[2],
+ hypctx->guest_regs.hyp_a[3],
+ hypctx->guest_regs.hyp_a[4],
+ hypctx->guest_regs.hyp_a[5],
+ hypctx->guest_regs.hyp_a[6],
+ hypctx->guest_regs.hyp_a[7]);
+
+ switch (sbi_extension_id) {
+ case SBI_EXT_ID_TIME:
+ break;
+ default:
+ break;
+ }
+
+ *retu = true;
+
+ return (0);
+}
Index: sys/riscv/vmm/vmm_stat.h
===================================================================
--- /dev/null
+++ sys/riscv/vmm/vmm_stat.h
@@ -0,0 +1,144 @@
+/*-
+ * SPDX-License-Identifier: BSD-3-Clause
+ *
+ * Copyright (c) 2011 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef _VMM_STAT_H_
+#define _VMM_STAT_H_
+
+struct vm;
+
+#define MAX_VMM_STAT_ELEMS 64 /* arbitrary */
+
+enum vmm_stat_scope {
+ VMM_STAT_SCOPE_ANY,
+};
+
+struct vmm_stat_type;
+typedef void (*vmm_stat_func_t)(struct vcpu *vcpu,
+ struct vmm_stat_type *stat);
+
+struct vmm_stat_type {
+ int index; /* position in the stats buffer */
+ int nelems; /* standalone or array */
+ const char *desc; /* description of statistic */
+ vmm_stat_func_t func;
+ enum vmm_stat_scope scope;
+};
+
+void vmm_stat_register(void *arg);
+
+#define VMM_STAT_FDEFINE(type, nelems, desc, func, scope) \
+ struct vmm_stat_type type[1] = { \
+ { -1, nelems, desc, func, scope } \
+ }; \
+ SYSINIT(type##_stat, SI_SUB_KLD, SI_ORDER_ANY, vmm_stat_register, type)
+
+#define VMM_STAT_DEFINE(type, nelems, desc, scope) \
+ VMM_STAT_FDEFINE(type, nelems, desc, NULL, scope)
+
+#define VMM_STAT_DECLARE(type) \
+ extern struct vmm_stat_type type[1]
+
+#define VMM_STAT(type, desc) \
+ VMM_STAT_DEFINE(type, 1, desc, VMM_STAT_SCOPE_ANY)
+
+#define VMM_STAT_FUNC(type, desc, func) \
+ VMM_STAT_FDEFINE(type, 1, desc, func, VMM_STAT_SCOPE_ANY)
+
+#define VMM_STAT_ARRAY(type, nelems, desc) \
+ VMM_STAT_DEFINE(type, nelems, desc, VMM_STAT_SCOPE_ANY)
+
+void *vmm_stat_alloc(void);
+void vmm_stat_init(void *vp);
+void vmm_stat_free(void *vp);
+
+int vmm_stat_copy(struct vcpu *vcpu, int index, int count,
+ int *num_stats, uint64_t *buf);
+int vmm_stat_desc_copy(int index, char *buf, int buflen);
+
+static void __inline
+vmm_stat_array_incr(struct vcpu *vcpu, struct vmm_stat_type *vst, int statidx,
+ uint64_t x)
+{
+#ifdef VMM_KEEP_STATS
+ uint64_t *stats;
+
+ stats = vcpu_stats(vcpu);
+
+ if (vst->index >= 0 && statidx < vst->nelems)
+ stats[vst->index + statidx] += x;
+#endif
+}
+
+static void __inline
+vmm_stat_array_set(struct vcpu *vcpu, struct vmm_stat_type *vst, int statidx,
+ uint64_t val)
+{
+#ifdef VMM_KEEP_STATS
+ uint64_t *stats;
+
+ stats = vcpu_stats(vcpu);
+
+ if (vst->index >= 0 && statidx < vst->nelems)
+ stats[vst->index + statidx] = val;
+#endif
+}
+
+static void __inline
+vmm_stat_incr(struct vcpu *vcpu, struct vmm_stat_type *vst, uint64_t x)
+{
+
+#ifdef VMM_KEEP_STATS
+ vmm_stat_array_incr(vcpu, vst, 0, x);
+#endif
+}
+
+static void __inline
+vmm_stat_set(struct vcpu *vcpu, struct vmm_stat_type *vst, uint64_t val)
+{
+
+#ifdef VMM_KEEP_STATS
+ vmm_stat_array_set(vcpu, vst, 0, val);
+#endif
+}
+
+VMM_STAT_DECLARE(VMEXIT_COUNT);
+VMM_STAT_DECLARE(VMEXIT_UNKNOWN);
+VMM_STAT_DECLARE(VMEXIT_WFI);
+VMM_STAT_DECLARE(VMEXIT_WFE);
+VMM_STAT_DECLARE(VMEXIT_HVC);
+VMM_STAT_DECLARE(VMEXIT_MSR);
+VMM_STAT_DECLARE(VMEXIT_DATA_ABORT);
+VMM_STAT_DECLARE(VMEXIT_INSN_ABORT);
+VMM_STAT_DECLARE(VMEXIT_UNHANDLED_SYNC);
+VMM_STAT_DECLARE(VMEXIT_IRQ);
+VMM_STAT_DECLARE(VMEXIT_FIQ);
+VMM_STAT_DECLARE(VMEXIT_UNHANDLED);
+#endif
Index: sys/riscv/vmm/vmm_stat.c
===================================================================
--- /dev/null
+++ sys/riscv/vmm/vmm_stat.c
@@ -0,0 +1,162 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2011 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/param.h>
+#include <sys/kernel.h>
+#include <sys/systm.h>
+#include <sys/malloc.h>
+
+#include <machine/machdep.h>
+#include <machine/vmm.h>
+#include "vmm_stat.h"
+
+/*
+ * 'vst_num_elems' is the total number of addressable statistic elements
+ * 'vst_num_types' is the number of unique statistic types
+ *
+ * It is always true that 'vst_num_elems' is greater than or equal to
+ * 'vst_num_types'. This is because a stat type may represent more than
+ * one element (for e.g. VMM_STAT_ARRAY).
+ */
+static int vst_num_elems, vst_num_types;
+static struct vmm_stat_type *vsttab[MAX_VMM_STAT_ELEMS];
+
+static MALLOC_DEFINE(M_VMM_STAT, "vmm stat", "vmm stat");
+
+#define vst_size ((size_t)vst_num_elems * sizeof(uint64_t))
+
+void
+vmm_stat_register(void *arg)
+{
+ struct vmm_stat_type *vst = arg;
+
+ /* We require all stats to identify themselves with a description */
+ if (vst->desc == NULL)
+ return;
+
+ if (vst_num_elems + vst->nelems >= MAX_VMM_STAT_ELEMS) {
+ printf("Cannot accommodate vmm stat type \"%s\"!\n", vst->desc);
+ return;
+ }
+
+ vst->index = vst_num_elems;
+ vst_num_elems += vst->nelems;
+
+ vsttab[vst_num_types++] = vst;
+}
+
+int
+vmm_stat_copy(struct vcpu *vcpu, int index, int count, int *num_stats,
+ uint64_t *buf)
+{
+ struct vmm_stat_type *vst;
+ uint64_t *stats;
+ int i, tocopy;
+
+ if (index < 0 || count < 0)
+ return (EINVAL);
+
+ if (index > vst_num_elems)
+ return (ENOENT);
+
+ if (index == vst_num_elems) {
+ *num_stats = 0;
+ return (0);
+ }
+
+ tocopy = min(vst_num_elems - index, count);
+
+ /* Let stats functions update their counters */
+ for (i = 0; i < vst_num_types; i++) {
+ vst = vsttab[i];
+ if (vst->func != NULL)
+ (*vst->func)(vcpu, vst);
+ }
+
+ /* Copy over the stats */
+ stats = vcpu_stats(vcpu);
+ memcpy(buf, stats + index, tocopy * sizeof(stats[0]));
+ *num_stats = tocopy;
+ return (0);
+}
+
+void *
+vmm_stat_alloc(void)
+{
+
+ return (malloc(vst_size, M_VMM_STAT, M_WAITOK));
+}
+
+void
+vmm_stat_init(void *vp)
+{
+
+ bzero(vp, vst_size);
+}
+
+void
+vmm_stat_free(void *vp)
+{
+ free(vp, M_VMM_STAT);
+}
+
+int
+vmm_stat_desc_copy(int index, char *buf, int bufsize)
+{
+ int i;
+ struct vmm_stat_type *vst;
+
+ for (i = 0; i < vst_num_types; i++) {
+ vst = vsttab[i];
+ if (index >= vst->index && index < vst->index + vst->nelems) {
+ if (vst->nelems > 1) {
+ snprintf(buf, bufsize, "%s[%d]",
+ vst->desc, index - vst->index);
+ } else {
+ strlcpy(buf, vst->desc, bufsize);
+ }
+ return (0); /* found it */
+ }
+ }
+
+ return (EINVAL);
+}
+
+/* global statistics */
+VMM_STAT(VMEXIT_COUNT, "total number of vm exits");
+VMM_STAT(VMEXIT_UNKNOWN, "number of vmexits for the unknown exception");
+VMM_STAT(VMEXIT_WFI, "number of times wfi was intercepted");
+VMM_STAT(VMEXIT_WFE, "number of times wfe was intercepted");
+VMM_STAT(VMEXIT_HVC, "number of times hvc was intercepted");
+VMM_STAT(VMEXIT_MSR, "number of times msr/mrs was intercepted");
+VMM_STAT(VMEXIT_DATA_ABORT, "number of vmexits for a data abort");
+VMM_STAT(VMEXIT_INSN_ABORT, "number of vmexits for an instruction abort");
+VMM_STAT(VMEXIT_UNHANDLED_SYNC, "number of vmexits for an unhandled synchronous exception");
+VMM_STAT(VMEXIT_IRQ, "number of vmexits for an irq");
+VMM_STAT(VMEXIT_FIQ, "number of vmexits for an interrupt");
+VMM_STAT(VMEXIT_UNHANDLED, "number of vmexits for an unhandled exception");
Index: sys/riscv/vmm/vmm_switch.S
===================================================================
--- /dev/null
+++ sys/riscv/vmm/vmm_switch.S
@@ -0,0 +1,202 @@
+/*-
+ * Copyright (c) 2024 Ruslan Bukin <br@bsdpad.com>
+ *
+ * This software was developed by the University of Cambridge Computer
+ * Laboratory (Department of Computer Science and Technology) under Innovate
+ * UK project 105694, "Digital Security by Design (DSbD) Technology Platform
+ * Prototype".
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <machine/asm.h>
+#include "assym.inc"
+
+ .text
+
+/*
+ * a0 == hypctx *
+ */
+ENTRY(vmm_switch)
+ sd ra, (HYP_H_RA)(a0)
+ sd sp, (HYP_H_SP)(a0)
+ sd tp, (HYP_H_TP)(a0)
+ sd gp, (HYP_H_GP)(a0)
+ sd s0, (HYP_H_S + 0 * 8)(a0)
+ sd s1, (HYP_H_S + 1 * 8)(a0)
+ sd s2, (HYP_H_S + 2 * 8)(a0)
+ sd s3, (HYP_H_S + 3 * 8)(a0)
+ sd s4, (HYP_H_S + 4 * 8)(a0)
+ sd s5, (HYP_H_S + 5 * 8)(a0)
+ sd s6, (HYP_H_S + 6 * 8)(a0)
+ sd s7, (HYP_H_S + 7 * 8)(a0)
+ sd s8, (HYP_H_S + 8 * 8)(a0)
+ sd s9, (HYP_H_S + 9 * 8)(a0)
+ sd s10, (HYP_H_S + 10 * 8)(a0)
+ sd s11, (HYP_H_S + 11 * 8)(a0)
+
+ sd a1, (HYP_H_A + 1 * 8)(a0)
+ sd a2, (HYP_H_A + 2 * 8)(a0)
+ sd a3, (HYP_H_A + 3 * 8)(a0)
+ sd a4, (HYP_H_A + 4 * 8)(a0)
+ sd a5, (HYP_H_A + 5 * 8)(a0)
+ sd a6, (HYP_H_A + 6 * 8)(a0)
+ sd a7, (HYP_H_A + 7 * 8)(a0)
+
+ ld t0, (HYP_G_SSTATUS)(a0)
+ ld t1, (HYP_G_HSTATUS)(a0)
+ ld t2, (HYP_G_SCOUNTEREN)(a0)
+ la t4, .Lswitch_return
+ ld t5, (HYP_G_SEPC)(a0)
+
+ csrrw t0, sstatus, t0
+ csrrw t1, hstatus, t1
+ csrrw t2, scounteren, t2
+ csrrw t3, sscratch, a0
+ csrrw t4, stvec, t4
+ csrw sepc, t5
+
+ sd t0, (HYP_H_SSTATUS)(a0)
+ sd t1, (HYP_H_HSTATUS)(a0)
+ sd t2, (HYP_H_SCOUNTEREN)(a0)
+ sd t3, (HYP_H_SSCRATCH)(a0)
+ sd t4, (HYP_H_STVEC)(a0)
+
+ ld ra, (HYP_G_RA)(a0)
+ ld sp, (HYP_G_SP)(a0)
+ ld gp, (HYP_G_GP)(a0)
+ ld tp, (HYP_G_TP)(a0)
+ ld t0, (HYP_G_T + 0 * 8)(a0)
+ ld t1, (HYP_G_T + 1 * 8)(a0)
+ ld t2, (HYP_G_T + 2 * 8)(a0)
+ ld t3, (HYP_G_T + 3 * 8)(a0)
+ ld t4, (HYP_G_T + 4 * 8)(a0)
+ ld t5, (HYP_G_T + 5 * 8)(a0)
+ ld t6, (HYP_G_T + 6 * 8)(a0)
+ ld s0, (HYP_G_S + 0 * 8)(a0)
+ ld s1, (HYP_G_S + 1 * 8)(a0)
+ ld s2, (HYP_G_S + 2 * 8)(a0)
+ ld s3, (HYP_G_S + 3 * 8)(a0)
+ ld s4, (HYP_G_S + 4 * 8)(a0)
+ ld s5, (HYP_G_S + 5 * 8)(a0)
+ ld s6, (HYP_G_S + 6 * 8)(a0)
+ ld s7, (HYP_G_S + 7 * 8)(a0)
+ ld s8, (HYP_G_S + 8 * 8)(a0)
+ ld s9, (HYP_G_S + 9 * 8)(a0)
+ ld s10, (HYP_G_S + 10 * 8)(a0)
+ ld s11, (HYP_G_S + 11 * 8)(a0)
+ /* skip a0 for now. */
+ ld a1, (HYP_G_A + 1 * 8)(a0)
+ ld a2, (HYP_G_A + 2 * 8)(a0)
+ ld a3, (HYP_G_A + 3 * 8)(a0)
+ ld a4, (HYP_G_A + 4 * 8)(a0)
+ ld a5, (HYP_G_A + 5 * 8)(a0)
+ ld a6, (HYP_G_A + 6 * 8)(a0)
+ ld a7, (HYP_G_A + 7 * 8)(a0)
+ /* now load a0. */
+ ld a0, (HYP_G_A + 0 * 8)(a0)
+
+ sret
+
+ .align 2
+.Lswitch_return:
+
+ csrrw a0, sscratch, a0
+ sd ra, (HYP_G_RA)(a0)
+ sd sp, (HYP_G_SP)(a0)
+ sd gp, (HYP_G_GP)(a0)
+ sd tp, (HYP_G_TP)(a0)
+ sd t0, (HYP_G_T + 0 * 8)(a0)
+ sd t1, (HYP_G_T + 1 * 8)(a0)
+ sd t2, (HYP_G_T + 2 * 8)(a0)
+ sd t3, (HYP_G_T + 3 * 8)(a0)
+ sd t4, (HYP_G_T + 4 * 8)(a0)
+ sd t5, (HYP_G_T + 5 * 8)(a0)
+ sd t6, (HYP_G_T + 6 * 8)(a0)
+ sd s0, (HYP_G_S + 0 * 8)(a0)
+ sd s1, (HYP_G_S + 1 * 8)(a0)
+ sd s2, (HYP_G_S + 2 * 8)(a0)
+ sd s3, (HYP_G_S + 3 * 8)(a0)
+ sd s4, (HYP_G_S + 4 * 8)(a0)
+ sd s5, (HYP_G_S + 5 * 8)(a0)
+ sd s6, (HYP_G_S + 6 * 8)(a0)
+ sd s7, (HYP_G_S + 7 * 8)(a0)
+ sd s8, (HYP_G_S + 8 * 8)(a0)
+ sd s9, (HYP_G_S + 9 * 8)(a0)
+ sd s10, (HYP_G_S + 10 * 8)(a0)
+ sd s11, (HYP_G_S + 11 * 8)(a0)
+ /* skip a0 */
+ sd a1, (HYP_G_A + 1 * 8)(a0)
+ sd a2, (HYP_G_A + 2 * 8)(a0)
+ sd a3, (HYP_G_A + 3 * 8)(a0)
+ sd a4, (HYP_G_A + 4 * 8)(a0)
+ sd a5, (HYP_G_A + 5 * 8)(a0)
+ sd a6, (HYP_G_A + 6 * 8)(a0)
+ sd a7, (HYP_G_A + 7 * 8)(a0)
+
+ ld t1, (HYP_H_STVEC)(a0)
+ ld t2, (HYP_H_SSCRATCH)(a0)
+ ld t3, (HYP_H_SCOUNTEREN)(a0)
+ ld t4, (HYP_H_HSTATUS)(a0)
+ ld t5, (HYP_H_SSTATUS)(a0)
+
+ csrr t0, sepc
+ csrw stvec, t1
+ csrrw t2, sscratch, t2
+ csrrw t3, scounteren, t3
+ csrrw t4, hstatus, t4
+ csrrw t5, sstatus, t5
+
+ sd t0, (HYP_G_SEPC)(a0)
+ sd t2, (HYP_G_A + 0 * 8)(a0)
+ sd t3, (HYP_G_SCOUNTEREN)(a0)
+ sd t4, (HYP_G_HSTATUS)(a0)
+ sd t5, (HYP_G_SSTATUS)(a0)
+
+ ld ra, (HYP_H_RA)(a0)
+ ld sp, (HYP_H_SP)(a0)
+ ld tp, (HYP_H_TP)(a0)
+ ld gp, (HYP_H_GP)(a0)
+ ld s0, (HYP_H_S + 0 * 8)(a0)
+ ld s1, (HYP_H_S + 1 * 8)(a0)
+ ld s2, (HYP_H_S + 2 * 8)(a0)
+ ld s3, (HYP_H_S + 3 * 8)(a0)
+ ld s4, (HYP_H_S + 4 * 8)(a0)
+ ld s5, (HYP_H_S + 5 * 8)(a0)
+ ld s6, (HYP_H_S + 6 * 8)(a0)
+ ld s7, (HYP_H_S + 7 * 8)(a0)
+ ld s8, (HYP_H_S + 8 * 8)(a0)
+ ld s9, (HYP_H_S + 9 * 8)(a0)
+ ld s10, (HYP_H_S + 10 * 8)(a0)
+ ld s11, (HYP_H_S + 11 * 8)(a0)
+
+ ld a1, (HYP_H_A + 1 * 8)(a0)
+ ld a2, (HYP_H_A + 2 * 8)(a0)
+ ld a3, (HYP_H_A + 3 * 8)(a0)
+ ld a4, (HYP_H_A + 4 * 8)(a0)
+ ld a5, (HYP_H_A + 5 * 8)(a0)
+ ld a6, (HYP_H_A + 6 * 8)(a0)
+ ld a7, (HYP_H_A + 7 * 8)(a0)
+
+ ret
+
+END(vmm_switch)
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Tue, Feb 10, 1:32 PM (14 h, 13 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
28624962
Default Alt Text
D45553.id140216.diff (147 KB)
Attached To
Mode
D45553: bhyve/riscv kernel part
Attached
Detach File
Event Timeline
Log In to Comment