Index: sys/conf/files.riscv =================================================================== --- sys/conf/files.riscv +++ sys/conf/files.riscv @@ -43,6 +43,7 @@ riscv/riscv/elf_machdep.c standard riscv/riscv/exception.S standard riscv/riscv/exec_machdep.c standard +riscv/riscv/fpe.c optional vmm riscv/riscv/gdb_machdep.c optional gdb riscv/riscv/intc.c standard riscv/riscv/identcpu.c standard @@ -71,6 +72,14 @@ riscv/riscv/uio_machdep.c standard riscv/riscv/unwind.c optional ddb | kdtrace_hooks | stack riscv/riscv/vm_machdep.c standard +riscv/vmm/vmm.c optional vmm +riscv/vmm/vmm_aplic.c optional vmm +riscv/vmm/vmm_dev.c optional vmm +riscv/vmm/vmm_instruction_emul.c optional vmm +riscv/vmm/vmm_riscv.c optional vmm +riscv/vmm/vmm_sbi.c optional vmm +riscv/vmm/vmm_stat.c optional vmm +riscv/vmm/vmm_switch.S optional vmm # Zstd contrib/zstd/lib/freebsd/zstd_kfreebsd.c optional zstdio compile-with ${ZSTD_C} Index: sys/conf/kern.mk =================================================================== --- sys/conf/kern.mk +++ sys/conf/kern.mk @@ -160,7 +160,7 @@ # code model as "medium" and "medany" respectively. # .if ${MACHINE_CPUARCH} == "riscv" -CFLAGS+= -march=rv64imafdc +CFLAGS+= -march=rv64imafdch CFLAGS+= -mabi=lp64 CFLAGS.clang+= -mcmodel=medium CFLAGS.gcc+= -mcmodel=medany Index: sys/riscv/include/cpu.h =================================================================== --- sys/riscv/include/cpu.h +++ sys/riscv/include/cpu.h @@ -47,8 +47,6 @@ #define cpu_spinwait() /* nothing */ #define cpu_lock_delay() DELAY(1) -#ifdef _KERNEL - /* * Core manufacturer IDs, as reported by the mvendorid CSR. */ @@ -89,6 +87,8 @@ #define MMU_SV48 0x2 /* 4-level paging */ #define MMU_SV57 0x4 /* 5-level paging */ +#ifdef _KERNEL + extern char btext[]; extern char etext[]; Index: sys/riscv/include/elf.h =================================================================== --- sys/riscv/include/elf.h +++ sys/riscv/include/elf.h @@ -80,6 +80,7 @@ #define HWCAP_ISA_F HWCAP_ISA_BIT('f') #define HWCAP_ISA_D HWCAP_ISA_BIT('d') #define HWCAP_ISA_C HWCAP_ISA_BIT('c') +#define HWCAP_ISA_H HWCAP_ISA_BIT('h') #define HWCAP_ISA_G \ (HWCAP_ISA_I | HWCAP_ISA_M | HWCAP_ISA_A | HWCAP_ISA_F | HWCAP_ISA_D) Index: sys/riscv/include/md_var.h =================================================================== --- sys/riscv/include/md_var.h +++ sys/riscv/include/md_var.h @@ -42,6 +42,7 @@ extern u_int mmu_caps; /* Supervisor-mode extension support */ +extern bool has_hyp; extern bool has_sstc; extern bool has_sscofpmf; Index: sys/riscv/include/riscvreg.h =================================================================== --- sys/riscv/include/riscvreg.h +++ sys/riscv/include/riscvreg.h @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2015-2017 Ruslan Bukin + * Copyright (c) 2015-2024 Ruslan Bukin * All rights reserved. * * Portions of this software were developed by SRI International and the @@ -47,9 +47,15 @@ #define SCAUSE_STORE_ACCESS_FAULT 7 #define SCAUSE_ECALL_USER 8 #define SCAUSE_ECALL_SUPERVISOR 9 +#define SCAUSE_VIRTUAL_SUPERVISOR_ECALL 10 +#define SCAUSE_MACHINE_ECALL 11 #define SCAUSE_INST_PAGE_FAULT 12 #define SCAUSE_LOAD_PAGE_FAULT 13 #define SCAUSE_STORE_PAGE_FAULT 15 +#define SCAUSE_FETCH_GUEST_PAGE_FAULT 20 +#define SCAUSE_LOAD_GUEST_PAGE_FAULT 21 +#define SCAUSE_VIRTUAL_INSTRUCTION 22 +#define SCAUSE_STORE_GUEST_PAGE_FAULT 23 #define SSTATUS_UIE (1 << 0) #define SSTATUS_SIE (1 << 1) @@ -116,6 +122,17 @@ #define MSTATUS_PRV_H 2 /* hypervisor */ #define MSTATUS_PRV_M 3 /* machine */ +#define HSTATUS_VSBE (1 << 5) +#define HSTATUS_GVA (1 << 6) +#define HSTATUS_SPV (1 << 7) +#define HSTATUS_SPVP (1 << 8) +#define HSTATUS_HU (1 << 9) +#define HSTATUS_VGEIN_S 12 +#define HSTATUS_VGEIN_M (0xf << HSTATUS_VGEIN_S) +#define HSTATUS_VTVM (1 << 20) +#define HSTATUS_VTW (1 << 21) +#define HSTATUS_VTSR (1 << 22) + #define MIE_USIE (1 << 0) #define MIE_SSIE (1 << 1) #define MIE_HSIE (1 << 2) @@ -143,10 +160,35 @@ #define MIP_SEIP (1 << 9) +#define HVIP_VSSIP (1 << 2) +#define HVIP_VSTIP (1 << 6) +#define HVIP_VSEIP (1 << 10) + +#define HIE_VSSIE (1 << 2) +#define HIE_VSTIE (1 << 6) +#define HIE_VSEIE (1 << 10) +#define HIE_SGEIE (1 << 12) + /* Note: sip register has no SIP_STIP bit in Spike simulator */ #define SIP_SSIP (1 << 1) #define SIP_STIP (1 << 5) +#define HENVCFG_STCE (1UL << 63) +#define HENVCFG_PBMTE (1UL << 62) +#define HENVCFG_ADUE (1UL << 61) +#define HENVCFG_CDE (1UL << 60) +#define HENVCFG_PMM_S (1UL << 31) +#define HENVCFG_PMM_M (0x3 << HENVCFG_PMM_S) +#define HENVCFG_CBZE (1UL << 7) +#define HENVCFG_CBCFE (1UL << 6) +#define HENVCFG_CBIE_S (1UL << 4) +#define HENVCFG_CBIE_M (0x3 << HENVCFG_CBIE_S) +#define HENVCFG_FIOM (1UL << 0) + +#define HCOUNTEREN_CY (1UL << 0) /* Cycle */ +#define HCOUNTEREN_TM (1UL << 1) /* Time */ +#define HCOUNTEREN_IR (1UL << 2) /* Instret */ + #define SATP_PPN_S 0 #define SATP_PPN_M (0xfffffffffffUL << SATP_PPN_S) #define SATP_ASID_S 44 Index: sys/riscv/include/vmm.h =================================================================== --- /dev/null +++ sys/riscv/include/vmm.h @@ -0,0 +1,323 @@ +/* + * Copyright (c) 2015 Mihai Carabas + * Copyright (c) 2024 Ruslan Bukin + * + * This software was developed by the University of Cambridge Computer + * Laboratory (Department of Computer Science and Technology) under Innovate + * UK project 105694, "Digital Security by Design (DSbD) Technology Platform + * Prototype". + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef _VMM_H_ +#define _VMM_H_ + +#include +#include +#include +#include + +#include "pte.h" +#include "pmap.h" + +struct vcpu; + +enum vm_suspend_how { + VM_SUSPEND_NONE, + VM_SUSPEND_RESET, + VM_SUSPEND_POWEROFF, + VM_SUSPEND_HALT, + VM_SUSPEND_LAST +}; + +/* + * Identifiers for architecturally defined registers. + */ +enum vm_reg_name { + VM_REG_GUEST_ZERO = 0, + VM_REG_GUEST_RA, + VM_REG_GUEST_SP, + VM_REG_GUEST_GP, + VM_REG_GUEST_TP, + VM_REG_GUEST_T0, + VM_REG_GUEST_T1, + VM_REG_GUEST_T2, + VM_REG_GUEST_S0, + VM_REG_GUEST_S1, + VM_REG_GUEST_A0, + VM_REG_GUEST_A1, + VM_REG_GUEST_A2, + VM_REG_GUEST_A3, + VM_REG_GUEST_A4, + VM_REG_GUEST_A5, + VM_REG_GUEST_A6, + VM_REG_GUEST_A7, + VM_REG_GUEST_S2, + VM_REG_GUEST_S3, + VM_REG_GUEST_S4, + VM_REG_GUEST_S5, + VM_REG_GUEST_S6, + VM_REG_GUEST_S7, + VM_REG_GUEST_S8, + VM_REG_GUEST_S9, + VM_REG_GUEST_S10, + VM_REG_GUEST_S11, + VM_REG_GUEST_T3, + VM_REG_GUEST_T4, + VM_REG_GUEST_T5, + VM_REG_GUEST_T6, + VM_REG_GUEST_SEPC, + VM_REG_LAST +}; + +#define VM_INTINFO_VECTOR(info) ((info) & 0xff) +#define VM_INTINFO_DEL_ERRCODE 0x800 +#define VM_INTINFO_RSVD 0x7ffff000 +#define VM_INTINFO_VALID 0x80000000 +#define VM_INTINFO_TYPE 0x700 +#define VM_INTINFO_HWINTR (0 << 8) +#define VM_INTINFO_NMI (2 << 8) +#define VM_INTINFO_HWEXCEPTION (3 << 8) +#define VM_INTINFO_SWINTR (4 << 8) + +#define VM_MAX_SUFFIXLEN 15 + +#ifdef _KERNEL + +#define VM_MAX_NAMELEN 32 + +struct vm; +struct vm_exception; +struct vm_exit; +struct vm_run; +struct vm_object; +struct vm_guest_paging; +struct vm_aplic_descr; +struct pmap; + +struct vm_eventinfo { + void *rptr; /* rendezvous cookie */ + int *sptr; /* suspend cookie */ + int *iptr; /* reqidle cookie */ +}; + +int vm_create(const char *name, struct vm **retvm); +struct vcpu *vm_alloc_vcpu(struct vm *vm, int vcpuid); +void vm_slock_vcpus(struct vm *vm); +void vm_unlock_vcpus(struct vm *vm); +void vm_destroy(struct vm *vm); +int vm_reinit(struct vm *vm); +const char *vm_name(struct vm *vm); + +/* + * APIs that modify the guest memory map require all vcpus to be frozen. + */ +void vm_slock_memsegs(struct vm *vm); +void vm_xlock_memsegs(struct vm *vm); +void vm_unlock_memsegs(struct vm *vm); +int vm_mmap_memseg(struct vm *vm, vm_paddr_t gpa, int segid, vm_ooffset_t off, + size_t len, int prot, int flags); +int vm_munmap_memseg(struct vm *vm, vm_paddr_t gpa, size_t len); +int vm_alloc_memseg(struct vm *vm, int ident, size_t len, bool sysmem); +void vm_free_memseg(struct vm *vm, int ident); + +/* + * APIs that inspect the guest memory map require only a *single* vcpu to + * be frozen. This acts like a read lock on the guest memory map since any + * modification requires *all* vcpus to be frozen. + */ +int vm_mmap_getnext(struct vm *vm, vm_paddr_t *gpa, int *segid, + vm_ooffset_t *segoff, size_t *len, int *prot, int *flags); +int vm_get_memseg(struct vm *vm, int ident, size_t *len, bool *sysmem, + struct vm_object **objptr); +vm_paddr_t vmm_sysmem_maxaddr(struct vm *vm); +void *vm_gpa_hold(struct vcpu *vcpu, vm_paddr_t gpa, size_t len, + int prot, void **cookie); +void *vm_gpa_hold_global(struct vm *vm, vm_paddr_t gpa, size_t len, + int prot, void **cookie); +void vm_gpa_release(void *cookie); +bool vm_mem_allocated(struct vcpu *vcpu, vm_paddr_t gpa); + +int vm_gla2gpa_nofault(struct vcpu *vcpu, struct vm_guest_paging *paging, + uint64_t gla, int prot, uint64_t *gpa, int *is_fault); + +uint16_t vm_get_maxcpus(struct vm *vm); +void vm_get_topology(struct vm *vm, uint16_t *sockets, uint16_t *cores, + uint16_t *threads, uint16_t *maxcpus); +int vm_set_topology(struct vm *vm, uint16_t sockets, uint16_t cores, + uint16_t threads, uint16_t maxcpus); +int vm_get_register(struct vcpu *vcpu, int reg, uint64_t *retval); +int vm_set_register(struct vcpu *vcpu, int reg, uint64_t val); +int vm_run(struct vcpu *vcpu); +int vm_suspend(struct vm *vm, enum vm_suspend_how how); +void* vm_get_cookie(struct vm *vm); +int vcpu_vcpuid(struct vcpu *vcpu); +void *vcpu_get_cookie(struct vcpu *vcpu); +struct vm *vcpu_vm(struct vcpu *vcpu); +struct vcpu *vm_vcpu(struct vm *vm, int cpu); +int vm_get_capability(struct vcpu *vcpu, int type, int *val); +int vm_set_capability(struct vcpu *vcpu, int type, int val); +int vm_activate_cpu(struct vcpu *vcpu); +int vm_suspend_cpu(struct vm *vm, struct vcpu *vcpu); +int vm_resume_cpu(struct vm *vm, struct vcpu *vcpu); +int vm_inject_exception(struct vcpu *vcpu, uint64_t scause); +int vm_attach_aplic(struct vm *vm, struct vm_aplic_descr *descr); +int vm_assert_irq(struct vm *vm, uint32_t irq); +int vm_deassert_irq(struct vm *vm, uint32_t irq); +int vm_raise_msi(struct vm *vm, uint64_t msg, uint64_t addr, int bus, int slot, + int func); +struct vm_exit *vm_exitinfo(struct vcpu *vcpu); +void vm_exit_suspended(struct vcpu *vcpu, uint64_t pc); +void vm_exit_debug(struct vcpu *vcpu, uint64_t pc); +void vm_exit_rendezvous(struct vcpu *vcpu, uint64_t pc); +void vm_exit_astpending(struct vcpu *vcpu, uint64_t pc); + +cpuset_t vm_active_cpus(struct vm *vm); +cpuset_t vm_debug_cpus(struct vm *vm); +cpuset_t vm_suspended_cpus(struct vm *vm); + +static __inline int +vcpu_rendezvous_pending(struct vm_eventinfo *info) +{ + + return (*((uintptr_t *)(info->rptr)) != 0); +} + +static __inline int +vcpu_suspended(struct vm_eventinfo *info) +{ + + return (*info->sptr); +} + +int vcpu_debugged(struct vcpu *vcpu); + +enum vcpu_state { + VCPU_IDLE, + VCPU_FROZEN, + VCPU_RUNNING, + VCPU_SLEEPING, +}; + +int vcpu_set_state(struct vcpu *vcpu, enum vcpu_state state, bool from_idle); +enum vcpu_state vcpu_get_state(struct vcpu *vcpu, int *hostcpu); + +static int __inline +vcpu_is_running(struct vcpu *vcpu, int *hostcpu) +{ + return (vcpu_get_state(vcpu, hostcpu) == VCPU_RUNNING); +} + +#ifdef _SYS_PROC_H_ +static int __inline +vcpu_should_yield(struct vcpu *vcpu) +{ + struct thread *td; + + td = curthread; + return (td->td_ast != 0 || td->td_owepreempt != 0); +} +#endif + +void *vcpu_stats(struct vcpu *vcpu); +void vcpu_notify_event(struct vcpu *vcpu); + +enum vm_reg_name vm_segment_name(int seg_encoding); + +#endif /* _KERNEL */ + +#define VM_DIR_READ 0 +#define VM_DIR_WRITE 1 + +#define VM_GP_M_MASK 0x1f +#define VM_GP_MMU_ENABLED (1 << 5) + +struct vm_guest_paging { + int flags; + int padding; +}; + +struct vie { + uint8_t access_size:4, sign_extend:1, dir:1, unused:2; + enum vm_reg_name reg; +}; + +struct vre { + uint32_t inst_syndrome; + uint8_t dir:1, unused:7; + enum vm_reg_name reg; +}; + +/* + * Identifiers for optional vmm capabilities + */ +enum vm_cap_type { + VM_CAP_HALT_EXIT, + VM_CAP_MTRAP_EXIT, + VM_CAP_PAUSE_EXIT, + VM_CAP_UNRESTRICTED_GUEST, + VM_CAP_MAX +}; + +enum vm_exitcode { + VM_EXITCODE_BOGUS, + VM_EXITCODE_ECALL, + VM_EXITCODE_PAGING, + VM_EXITCODE_SUSPENDED, + VM_EXITCODE_DEBUG, + VM_EXITCODE_INST_EMUL, + VM_EXITCODE_WFI, + VM_EXITCODE_MAX +}; + +struct vm_exit { + uint64_t scause; + uint64_t sepc; + uint64_t stval; + uint64_t htval; + uint64_t htinst; + enum vm_exitcode exitcode; + int inst_length; + uint64_t pc; + union { + struct { + uint64_t gpa; + } paging; + + struct { + uint64_t gpa; + struct vm_guest_paging paging; + struct vie vie; + } inst_emul; + + struct { + uint64_t args[8]; + } ecall; + + struct { + enum vm_suspend_how how; + } suspended; + } u; +}; + +#endif /* _VMM_H_ */ Index: sys/riscv/include/vmm_dev.h =================================================================== --- /dev/null +++ sys/riscv/include/vmm_dev.h @@ -0,0 +1,261 @@ +/* + * Copyright (c) 2015 Mihai Carabas + * Copyright (c) 2024 Ruslan Bukin + * + * This software was developed by the University of Cambridge Computer + * Laboratory (Department of Computer Science and Technology) under Innovate + * UK project 105694, "Digital Security by Design (DSbD) Technology Platform + * Prototype". + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef _VMM_DEV_H_ +#define _VMM_DEV_H_ + +#ifdef _KERNEL +void vmmdev_init(void); +int vmmdev_cleanup(void); +#endif + +struct vm_memmap { + vm_paddr_t gpa; + int segid; /* memory segment */ + vm_ooffset_t segoff; /* offset into memory segment */ + size_t len; /* mmap length */ + int prot; /* RWX */ + int flags; +}; +#define VM_MEMMAP_F_WIRED 0x01 + +struct vm_munmap { + vm_paddr_t gpa; + size_t len; +}; + +#define VM_MEMSEG_NAME(m) ((m)->name[0] != '\0' ? (m)->name : NULL) +struct vm_memseg { + int segid; + size_t len; + char name[VM_MAX_SUFFIXLEN + 1]; +}; + +struct vm_register { + int cpuid; + int regnum; /* enum vm_reg_name */ + uint64_t regval; +}; + +struct vm_register_set { + int cpuid; + unsigned int count; + const int *regnums; /* enum vm_reg_name */ + uint64_t *regvals; +}; + +struct vm_run { + int cpuid; + cpuset_t *cpuset; /* CPU set storage */ + size_t cpusetsize; + struct vm_exit *vm_exit; +}; + +struct vm_exception { + int cpuid; + uint64_t scause; +}; + +struct vm_msi { + uint64_t msg; + uint64_t addr; + int bus; + int slot; + int func; +}; + +struct vm_capability { + int cpuid; + enum vm_cap_type captype; + int capval; + int allcpus; +}; + +#define MAX_VM_STATS 64 +struct vm_stats { + int cpuid; /* in */ + int index; /* in */ + int num_entries; /* out */ + struct timeval tv; + uint64_t statbuf[MAX_VM_STATS]; +}; +struct vm_stat_desc { + int index; /* in */ + char desc[128]; /* out */ +}; + +struct vm_suspend { + enum vm_suspend_how how; +}; + +struct vm_gla2gpa { + int vcpuid; /* inputs */ + int prot; /* PROT_READ or PROT_WRITE */ + uint64_t gla; + struct vm_guest_paging paging; + int fault; /* outputs */ + uint64_t gpa; +}; + +struct vm_activate_cpu { + int vcpuid; +}; + +struct vm_cpuset { + int which; + int cpusetsize; + cpuset_t *cpus; +}; +#define VM_ACTIVE_CPUS 0 +#define VM_SUSPENDED_CPUS 1 +#define VM_DEBUG_CPUS 2 + +struct vm_aplic_descr { + uint64_t mem_start; + uint64_t mem_size; +}; + +struct vm_irq { + uint32_t irq; +}; + +struct vm_cpu_topology { + uint16_t sockets; + uint16_t cores; + uint16_t threads; + uint16_t maxcpus; +}; + +enum { + /* general routines */ + IOCNUM_ABIVERS = 0, + IOCNUM_RUN = 1, + IOCNUM_SET_CAPABILITY = 2, + IOCNUM_GET_CAPABILITY = 3, + IOCNUM_SUSPEND = 4, + IOCNUM_REINIT = 5, + + /* memory apis */ + IOCNUM_GET_GPA_PMAP = 12, + IOCNUM_GLA2GPA_NOFAULT = 13, + IOCNUM_ALLOC_MEMSEG = 14, + IOCNUM_GET_MEMSEG = 15, + IOCNUM_MMAP_MEMSEG = 16, + IOCNUM_MMAP_GETNEXT = 17, + IOCNUM_MUNMAP_MEMSEG = 18, + + /* register/state accessors */ + IOCNUM_SET_REGISTER = 20, + IOCNUM_GET_REGISTER = 21, + IOCNUM_SET_REGISTER_SET = 24, + IOCNUM_GET_REGISTER_SET = 25, + + /* statistics */ + IOCNUM_VM_STATS = 50, + IOCNUM_VM_STAT_DESC = 51, + + /* CPU Topology */ + IOCNUM_SET_TOPOLOGY = 63, + IOCNUM_GET_TOPOLOGY = 64, + + /* interrupt injection */ + IOCNUM_ASSERT_IRQ = 80, + IOCNUM_DEASSERT_IRQ = 81, + IOCNUM_RAISE_MSI = 82, + IOCNUM_INJECT_EXCEPTION = 83, + + /* vm_cpuset */ + IOCNUM_ACTIVATE_CPU = 90, + IOCNUM_GET_CPUSET = 91, + IOCNUM_SUSPEND_CPU = 92, + IOCNUM_RESUME_CPU = 93, + + /* vm_attach_aplic */ + IOCNUM_ATTACH_APLIC = 110, +}; + +#define VM_RUN \ + _IOWR('v', IOCNUM_RUN, struct vm_run) +#define VM_SUSPEND \ + _IOW('v', IOCNUM_SUSPEND, struct vm_suspend) +#define VM_REINIT \ + _IO('v', IOCNUM_REINIT) +#define VM_ALLOC_MEMSEG \ + _IOW('v', IOCNUM_ALLOC_MEMSEG, struct vm_memseg) +#define VM_GET_MEMSEG \ + _IOWR('v', IOCNUM_GET_MEMSEG, struct vm_memseg) +#define VM_MMAP_MEMSEG \ + _IOW('v', IOCNUM_MMAP_MEMSEG, struct vm_memmap) +#define VM_MMAP_GETNEXT \ + _IOWR('v', IOCNUM_MMAP_GETNEXT, struct vm_memmap) +#define VM_MUNMAP_MEMSEG \ + _IOW('v', IOCNUM_MUNMAP_MEMSEG, struct vm_munmap) +#define VM_SET_REGISTER \ + _IOW('v', IOCNUM_SET_REGISTER, struct vm_register) +#define VM_GET_REGISTER \ + _IOWR('v', IOCNUM_GET_REGISTER, struct vm_register) +#define VM_SET_REGISTER_SET \ + _IOW('v', IOCNUM_SET_REGISTER_SET, struct vm_register_set) +#define VM_GET_REGISTER_SET \ + _IOWR('v', IOCNUM_GET_REGISTER_SET, struct vm_register_set) +#define VM_SET_CAPABILITY \ + _IOW('v', IOCNUM_SET_CAPABILITY, struct vm_capability) +#define VM_GET_CAPABILITY \ + _IOWR('v', IOCNUM_GET_CAPABILITY, struct vm_capability) +#define VM_STATS \ + _IOWR('v', IOCNUM_VM_STATS, struct vm_stats) +#define VM_STAT_DESC \ + _IOWR('v', IOCNUM_VM_STAT_DESC, struct vm_stat_desc) +#define VM_ASSERT_IRQ \ + _IOW('v', IOCNUM_ASSERT_IRQ, struct vm_irq) +#define VM_DEASSERT_IRQ \ + _IOW('v', IOCNUM_DEASSERT_IRQ, struct vm_irq) +#define VM_RAISE_MSI \ + _IOW('v', IOCNUM_RAISE_MSI, struct vm_msi) +#define VM_INJECT_EXCEPTION \ + _IOW('v', IOCNUM_INJECT_EXCEPTION, struct vm_exception) +#define VM_SET_TOPOLOGY \ + _IOW('v', IOCNUM_SET_TOPOLOGY, struct vm_cpu_topology) +#define VM_GET_TOPOLOGY \ + _IOR('v', IOCNUM_GET_TOPOLOGY, struct vm_cpu_topology) +#define VM_GLA2GPA_NOFAULT \ + _IOWR('v', IOCNUM_GLA2GPA_NOFAULT, struct vm_gla2gpa) +#define VM_ACTIVATE_CPU \ + _IOW('v', IOCNUM_ACTIVATE_CPU, struct vm_activate_cpu) +#define VM_GET_CPUS \ + _IOW('v', IOCNUM_GET_CPUSET, struct vm_cpuset) +#define VM_SUSPEND_CPU \ + _IOW('v', IOCNUM_SUSPEND_CPU, struct vm_activate_cpu) +#define VM_RESUME_CPU \ + _IOW('v', IOCNUM_RESUME_CPU, struct vm_activate_cpu) +#define VM_ATTACH_APLIC \ + _IOW('v', IOCNUM_ATTACH_APLIC, struct vm_aplic_descr) +#endif Index: sys/riscv/include/vmm_instruction_emul.h =================================================================== --- /dev/null +++ sys/riscv/include/vmm_instruction_emul.h @@ -0,0 +1,83 @@ +/* + * Copyright (c) 2015 Mihai Carabas + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef _VMM_INSTRUCTION_EMUL_H_ +#define _VMM_INSTRUCTION_EMUL_H_ + +/* + * Callback functions to read and write memory regions. + */ +typedef int (*mem_region_read_t)(struct vcpu *vcpu, uint64_t gpa, + uint64_t *rval, int rsize, void *arg); +typedef int (*mem_region_write_t)(struct vcpu *vcpu, uint64_t gpa, + uint64_t wval, int wsize, void *arg); + +/* + * Callback functions to read and write registers. + */ +typedef int (*reg_read_t)(struct vcpu *vcpu, uint64_t *rval, void *arg); +typedef int (*reg_write_t)(struct vcpu *vcpu, uint64_t wval, void *arg); + +/* + * Emulate the decoded 'vie' instruction when it contains a memory operation. + * + * The callbacks 'mrr' and 'mrw' emulate reads and writes to the memory region + * containing 'gpa'. 'mrarg' is an opaque argument that is passed into the + * callback functions. + * + * 'void *vm' should be 'struct vm *' when called from kernel context and + * 'struct vmctx *' when called from user context. + * + */ +int vmm_emulate_instruction(struct vcpu *vcpu, uint64_t gpa, struct vie *vie, + struct vm_guest_paging *paging, mem_region_read_t mrr, + mem_region_write_t mrw, void *mrarg); + +/* + * Emulate the decoded 'vre' instruction when it contains a register access. + * + * The callbacks 'regread' and 'regwrite' emulate reads and writes to the + * register from 'vie'. 'regarg' is an opaque argument that is passed into the + * callback functions. + * + * 'void *vm' should be 'struct vm *' when called from kernel context and + * 'struct vmctx *' when called from user context. + * + */ +int vmm_emulate_register(struct vcpu *vcpu, struct vre *vre, reg_read_t regread, + reg_write_t regwrite, void *regarg); + +#ifdef _KERNEL +void vm_register_reg_handler(struct vm *vm, uint64_t iss, uint64_t mask, + reg_read_t reg_read, reg_write_t reg_write, void *arg); +void vm_deregister_reg_handler(struct vm *vm, uint64_t iss, uint64_t mask); + +void vm_register_inst_handler(struct vm *vm, uint64_t start, uint64_t size, + mem_region_read_t mmio_read, mem_region_write_t mmio_write); +void vm_deregister_inst_handler(struct vm *vm, uint64_t start, uint64_t size); +#endif + +#endif /* _VMM_INSTRUCTION_EMUL_H_ */ Index: sys/riscv/include/vmm_snapshot.h =================================================================== --- /dev/null +++ sys/riscv/include/vmm_snapshot.h @@ -0,0 +1 @@ +/* $FreeBSD$ */ Index: sys/riscv/riscv/genassym.c =================================================================== --- sys/riscv/riscv/genassym.c +++ sys/riscv/riscv/genassym.c @@ -55,6 +55,8 @@ #include #include +#include + ASSYM(KERNBASE, KERNBASE); ASSYM(VM_MAXUSER_ADDRESS, VM_MAXUSER_ADDRESS); ASSYM(VM_MAX_KERNEL_ADDRESS, VM_MAX_KERNEL_ADDRESS); @@ -98,6 +100,32 @@ ASSYM(TF_SCAUSE, offsetof(struct trapframe, tf_scause)); ASSYM(TF_SSTATUS, offsetof(struct trapframe, tf_sstatus)); +ASSYM(HYP_H_RA, offsetof(struct hypctx, host_regs.hyp_ra)); +ASSYM(HYP_H_SP, offsetof(struct hypctx, host_regs.hyp_sp)); +ASSYM(HYP_H_GP, offsetof(struct hypctx, host_regs.hyp_gp)); +ASSYM(HYP_H_TP, offsetof(struct hypctx, host_regs.hyp_tp)); +ASSYM(HYP_H_T, offsetof(struct hypctx, host_regs.hyp_t)); +ASSYM(HYP_H_S, offsetof(struct hypctx, host_regs.hyp_s)); +ASSYM(HYP_H_A, offsetof(struct hypctx, host_regs.hyp_a)); +ASSYM(HYP_H_SEPC, offsetof(struct hypctx, host_regs.hyp_sepc)); +ASSYM(HYP_H_SSTATUS, offsetof(struct hypctx, host_regs.hyp_sstatus)); +ASSYM(HYP_H_HSTATUS, offsetof(struct hypctx, host_regs.hyp_hstatus)); +ASSYM(HYP_H_SSCRATCH, offsetof(struct hypctx, host_sscratch)); +ASSYM(HYP_H_STVEC, offsetof(struct hypctx, host_stvec)); +ASSYM(HYP_H_SCOUNTEREN, offsetof(struct hypctx, host_scounteren)); + +ASSYM(HYP_G_RA, offsetof(struct hypctx, guest_regs.hyp_ra)); +ASSYM(HYP_G_SP, offsetof(struct hypctx, guest_regs.hyp_sp)); +ASSYM(HYP_G_GP, offsetof(struct hypctx, guest_regs.hyp_gp)); +ASSYM(HYP_G_TP, offsetof(struct hypctx, guest_regs.hyp_tp)); +ASSYM(HYP_G_T, offsetof(struct hypctx, guest_regs.hyp_t)); +ASSYM(HYP_G_S, offsetof(struct hypctx, guest_regs.hyp_s)); +ASSYM(HYP_G_A, offsetof(struct hypctx, guest_regs.hyp_a)); +ASSYM(HYP_G_SEPC, offsetof(struct hypctx, guest_regs.hyp_sepc)); +ASSYM(HYP_G_SSTATUS, offsetof(struct hypctx, guest_regs.hyp_sstatus)); +ASSYM(HYP_G_HSTATUS, offsetof(struct hypctx, guest_regs.hyp_hstatus)); +ASSYM(HYP_G_SCOUNTEREN, offsetof(struct hypctx, guest_scounteren)); + ASSYM(RISCV_BOOTPARAMS_SIZE, sizeof(struct riscv_bootparams)); ASSYM(RISCV_BOOTPARAMS_KERN_PHYS, offsetof(struct riscv_bootparams, kern_phys)); ASSYM(RISCV_BOOTPARAMS_KERN_STACK, offsetof(struct riscv_bootparams, Index: sys/riscv/riscv/identcpu.c =================================================================== --- sys/riscv/riscv/identcpu.c +++ sys/riscv/riscv/identcpu.c @@ -72,6 +72,7 @@ u_int mmu_caps; /* Supervisor-mode extension support. */ +bool has_hyp; bool __read_frequently has_sstc; bool __read_frequently has_sscofpmf; @@ -247,6 +248,7 @@ case 'c': case 'd': case 'f': + case 'h': case 'i': case 'm': desc->isa_extensions |= HWCAP_ISA_BIT(isa[i]); @@ -412,6 +414,7 @@ UPDATE_CAP(mmu_caps, desc->mmu_caps); /* Supervisor-mode extension support. */ + UPDATE_CAP(has_hyp, (desc->isa_extensions & HWCAP_ISA_H) != 0); UPDATE_CAP(has_sstc, (desc->smode_extensions & SV_SSTC) != 0); UPDATE_CAP(has_sscofpmf, (desc->smode_extensions & SV_SSCOFPMF) != 0); @@ -511,6 +514,7 @@ "\03Compressed" "\04Double" "\06Float" + "\10Hypervisor" "\15Mult/Div"); } Index: sys/riscv/vmm/riscv.h =================================================================== --- /dev/null +++ sys/riscv/vmm/riscv.h @@ -0,0 +1,116 @@ +/*- + * Copyright (c) 2015 Mihai Carabas + * Copyright (c) 2024 Ruslan Bukin + * + * This software was developed by the University of Cambridge Computer + * Laboratory (Department of Computer Science and Technology) under Innovate + * UK project 105694, "Digital Security by Design (DSbD) Technology Platform + * Prototype". + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef _VMM_RISCV_H_ +#define _VMM_RISCV_H_ + +#include +#include +#include + +struct hypregs { + uint64_t hyp_ra; + uint64_t hyp_sp; + uint64_t hyp_gp; + uint64_t hyp_tp; + uint64_t hyp_t[7]; + uint64_t hyp_s[12]; + uint64_t hyp_a[8]; + uint64_t hyp_sepc; + uint64_t hyp_sstatus; + uint64_t hyp_hstatus; +}; + +struct hypcsr { + uint64_t hvip; + uint64_t vsstatus; + uint64_t vsie; + uint64_t vstvec; + uint64_t vsscratch; + uint64_t vsepc; + uint64_t vscause; + uint64_t vstval; + uint64_t vsatp; + uint64_t scounteren; + uint64_t senvcfg; +}; + +struct hypctx { + struct hypregs host_regs; + struct hypregs guest_regs; + struct hypcsr guest_csrs; + uint64_t host_sscratch; + uint64_t host_stvec; + uint64_t host_scounteren; + uint64_t guest_scounteren; + struct hyp *hyp; + struct vcpu *vcpu; + bool has_exception; +}; + +struct hyp { + struct vm *vm; + uint64_t vmid_generation; + bool aplic_attached; + struct aplic *aplic; + struct hypctx *ctx[]; +}; + +#define DEFINE_VMMOPS_IFUNC(ret_type, opname, args) \ + ret_type vmmops_##opname args; + +DEFINE_VMMOPS_IFUNC(int, modinit, (void)) +DEFINE_VMMOPS_IFUNC(int, modcleanup, (void)) +DEFINE_VMMOPS_IFUNC(void *, init, (struct vm *vm, struct pmap *pmap)) +DEFINE_VMMOPS_IFUNC(int, gla2gpa, (void *vcpui, struct vm_guest_paging *paging, + uint64_t gla, int prot, uint64_t *gpa, int *is_fault)) +DEFINE_VMMOPS_IFUNC(int, run, (void *vcpui, register_t pc, struct pmap *pmap, + struct vm_eventinfo *info)) +DEFINE_VMMOPS_IFUNC(void, cleanup, (void *vmi)) +DEFINE_VMMOPS_IFUNC(void *, vcpu_init, (void *vmi, struct vcpu *vcpu, + int vcpu_id)) +DEFINE_VMMOPS_IFUNC(void, vcpu_cleanup, (void *vcpui)) +DEFINE_VMMOPS_IFUNC(int, exception, (void *vcpui, uint64_t scause)) +DEFINE_VMMOPS_IFUNC(int, getreg, (void *vcpui, int num, uint64_t *retval)) +DEFINE_VMMOPS_IFUNC(int, setreg, (void *vcpui, int num, uint64_t val)) +DEFINE_VMMOPS_IFUNC(int, getcap, (void *vcpui, int num, int *retval)) +DEFINE_VMMOPS_IFUNC(int, setcap, (void *vcpui, int num, int val)) +DEFINE_VMMOPS_IFUNC(struct vmspace *, vmspace_alloc, (vm_offset_t min, + vm_offset_t max)) +DEFINE_VMMOPS_IFUNC(void, vmspace_free, (struct vmspace *vmspace)) + +#define dprintf(fmt, ...) + +struct hypctx *riscv_get_active_vcpu(void); +void vmm_switch(struct hypctx *); +int vmm_sbi_ecall(struct vcpu *, bool *); + +#endif /* !_VMM_RISCV_H_ */ Index: sys/riscv/vmm/vmm.c =================================================================== --- /dev/null +++ sys/riscv/vmm/vmm.c @@ -0,0 +1,1541 @@ +/*- + * Copyright (c) 2015 Mihai Carabas + * Copyright (c) 2024 Ruslan Bukin + * + * This software was developed by the University of Cambridge Computer + * Laboratory (Department of Computer Science and Technology) under Innovate + * UK project 105694, "Digital Security by Design (DSbD) Technology Platform + * Prototype". + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "vmm_ktr.h" +#include "vmm_stat.h" +#include "riscv.h" + +#include "vmm_aplic.h" + +struct vcpu { + int flags; + enum vcpu_state state; + struct mtx mtx; + int hostcpu; /* host cpuid this vcpu last ran on */ + int vcpuid; + void *stats; + struct vm_exit exitinfo; + uint64_t nextpc; /* (x) next instruction to execute */ + struct vm *vm; /* (o) */ + void *cookie; /* (i) cpu-specific data */ + struct fpreg *guestfpu; /* (a,i) guest fpu state */ +}; + +#define vcpu_lock_initialized(v) mtx_initialized(&((v)->mtx)) +#define vcpu_lock_init(v) mtx_init(&((v)->mtx), "vcpu lock", 0, MTX_SPIN) +#define vcpu_lock_destroy(v) mtx_destroy(&((v)->mtx)) +#define vcpu_lock(v) mtx_lock_spin(&((v)->mtx)) +#define vcpu_unlock(v) mtx_unlock_spin(&((v)->mtx)) +#define vcpu_assert_locked(v) mtx_assert(&((v)->mtx), MA_OWNED) + +struct mem_seg { + uint64_t gpa; + size_t len; + bool wired; + bool sysmem; + vm_object_t object; +}; +#define VM_MAX_MEMSEGS 3 + +struct mem_map { + vm_paddr_t gpa; + size_t len; + vm_ooffset_t segoff; + int segid; + int prot; + int flags; +}; +#define VM_MAX_MEMMAPS 4 + +struct vmm_mmio_region { + uint64_t start; + uint64_t end; + mem_region_read_t read; + mem_region_write_t write; +}; +#define VM_MAX_MMIO_REGIONS 4 + +/* + * Initialization: + * (o) initialized the first time the VM is created + * (i) initialized when VM is created and when it is reinitialized + * (x) initialized before use + */ +struct vm { + void *cookie; /* (i) cpu-specific data */ + volatile cpuset_t active_cpus; /* (i) active vcpus */ + volatile cpuset_t debug_cpus; /* (i) vcpus stopped for debug*/ + int suspend; /* (i) stop VM execution */ + volatile cpuset_t suspended_cpus; /* (i) suspended vcpus */ + volatile cpuset_t halted_cpus; /* (x) cpus in a hard halt */ + struct mem_map mem_maps[VM_MAX_MEMMAPS]; /* (i) guest address space */ + struct mem_seg mem_segs[VM_MAX_MEMSEGS]; /* (o) guest memory regions */ + struct vmspace *vmspace; /* (o) guest's address space */ + char name[VM_MAX_NAMELEN]; /* (o) virtual machine name */ + struct vcpu **vcpu; /* (i) guest vcpus */ + struct vmm_mmio_region mmio_region[VM_MAX_MMIO_REGIONS]; + /* (o) guest MMIO regions */ + /* The following describe the vm cpu topology */ + uint16_t sockets; /* (o) num of sockets */ + uint16_t cores; /* (o) num of cores/socket */ + uint16_t threads; /* (o) num of threads/core */ + uint16_t maxcpus; /* (o) max pluggable cpus */ + struct sx mem_segs_lock; /* (o) */ + struct sx vcpus_init_lock; /* (o) */ +}; + +static bool vmm_initialized = false; + +static MALLOC_DEFINE(M_VMM, "vmm", "vmm"); + +/* statistics */ +static VMM_STAT(VCPU_TOTAL_RUNTIME, "vcpu total runtime"); + +SYSCTL_NODE(_hw, OID_AUTO, vmm, CTLFLAG_RW, NULL, NULL); + +static int vmm_ipinum; +SYSCTL_INT(_hw_vmm, OID_AUTO, ipinum, CTLFLAG_RD, &vmm_ipinum, 0, + "IPI vector used for vcpu notifications"); + +u_int vm_maxcpu; +SYSCTL_UINT(_hw_vmm, OID_AUTO, maxcpu, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, + &vm_maxcpu, 0, "Maximum number of vCPUs"); + +static void vm_free_memmap(struct vm *vm, int ident); +static bool sysmem_mapping(struct vm *vm, struct mem_map *mm); +static void vcpu_notify_event_locked(struct vcpu *vcpu); + +/* + * Upper limit on vm_maxcpu. We could increase this to 28 bits, but this + * is a safe value for now. + */ +#define VM_MAXCPU MIN(0xffff - 1, CPU_SETSIZE) + +static void +vcpu_cleanup(struct vcpu *vcpu, bool destroy) +{ + vmmops_vcpu_cleanup(vcpu->cookie); + vcpu->cookie = NULL; + if (destroy) { + vmm_stat_free(vcpu->stats); + fpu_save_area_free(vcpu->guestfpu); + vcpu_lock_destroy(vcpu); + } +} + +static struct vcpu * +vcpu_alloc(struct vm *vm, int vcpu_id) +{ + struct vcpu *vcpu; + + KASSERT(vcpu_id >= 0 && vcpu_id < vm->maxcpus, + ("vcpu_alloc: invalid vcpu %d", vcpu_id)); + + vcpu = malloc(sizeof(*vcpu), M_VMM, M_WAITOK | M_ZERO); + vcpu_lock_init(vcpu); + vcpu->state = VCPU_IDLE; + vcpu->hostcpu = NOCPU; + vcpu->vcpuid = vcpu_id; + vcpu->vm = vm; + vcpu->guestfpu = fpu_save_area_alloc(); + vcpu->stats = vmm_stat_alloc(); + return (vcpu); +} + +static void +vcpu_init(struct vcpu *vcpu) +{ + vcpu->cookie = vmmops_vcpu_init(vcpu->vm->cookie, vcpu, vcpu->vcpuid); + MPASS(vcpu->cookie != NULL); + fpu_save_area_reset(vcpu->guestfpu); + vmm_stat_init(vcpu->stats); +} + +struct vm_exit * +vm_exitinfo(struct vcpu *vcpu) +{ + return (&vcpu->exitinfo); +} + +static int +vmm_init(void) +{ + + vm_maxcpu = mp_ncpus; + + TUNABLE_INT_FETCH("hw.vmm.maxcpu", &vm_maxcpu); + + if (vm_maxcpu > VM_MAXCPU) { + printf("vmm: vm_maxcpu clamped to %u\n", VM_MAXCPU); + vm_maxcpu = VM_MAXCPU; + } + + if (vm_maxcpu == 0) + vm_maxcpu = 1; + + return (vmmops_modinit()); +} + +static int +vmm_handler(module_t mod, int what, void *arg) +{ + int error; + + switch (what) { + case MOD_LOAD: + /* TODO: check if has_hyp here? */ + vmmdev_init(); + error = vmm_init(); + if (error == 0) + vmm_initialized = true; + break; + case MOD_UNLOAD: + /* TODO: check if has_hyp here? */ + error = vmmdev_cleanup(); + if (error == 0 && vmm_initialized) { + error = vmmops_modcleanup(); + if (error) + vmm_initialized = false; + } + break; + default: + error = 0; + break; + } + return (error); +} + +static moduledata_t vmm_kmod = { + "vmm", + vmm_handler, + NULL +}; + +/* + * vmm initialization has the following dependencies: + * + * - HYP initialization requires smp_rendezvous() and therefore must happen + * after SMP is fully functional (after SI_SUB_SMP). + */ +DECLARE_MODULE(vmm, vmm_kmod, SI_SUB_SMP + 1, SI_ORDER_ANY); +MODULE_VERSION(vmm, 1); + +static void +vm_init(struct vm *vm, bool create) +{ + int i; + + vm->cookie = vmmops_init(vm, vmspace_pmap(vm->vmspace)); + MPASS(vm->cookie != NULL); + + CPU_ZERO(&vm->active_cpus); + CPU_ZERO(&vm->debug_cpus); + + vm->suspend = 0; + CPU_ZERO(&vm->suspended_cpus); + + memset(vm->mmio_region, 0, sizeof(vm->mmio_region)); + + if (!create) { + for (i = 0; i < vm->maxcpus; i++) { + if (vm->vcpu[i] != NULL) + vcpu_init(vm->vcpu[i]); + } + } +} + +struct vcpu * +vm_alloc_vcpu(struct vm *vm, int vcpuid) +{ + struct vcpu *vcpu; + + if (vcpuid < 0 || vcpuid >= vm_get_maxcpus(vm)) + return (NULL); + + /* Some interrupt controllers may have a CPU limit */ + if (vcpuid >= aplic_max_cpu_count(vm->cookie)) + return (NULL); + + vcpu = atomic_load_ptr(&vm->vcpu[vcpuid]); + if (__predict_true(vcpu != NULL)) + return (vcpu); + + sx_xlock(&vm->vcpus_init_lock); + vcpu = vm->vcpu[vcpuid]; + if (vcpu == NULL/* && !vm->dying*/) { + vcpu = vcpu_alloc(vm, vcpuid); + vcpu_init(vcpu); + + /* + * Ensure vCPU is fully created before updating pointer + * to permit unlocked reads above. + */ + atomic_store_rel_ptr((uintptr_t *)&vm->vcpu[vcpuid], + (uintptr_t)vcpu); + } + sx_xunlock(&vm->vcpus_init_lock); + return (vcpu); +} + +void +vm_slock_vcpus(struct vm *vm) +{ + sx_slock(&vm->vcpus_init_lock); +} + +void +vm_unlock_vcpus(struct vm *vm) +{ + sx_unlock(&vm->vcpus_init_lock); +} + +int +vm_create(const char *name, struct vm **retvm) +{ + struct vm *vm; + struct vmspace *vmspace; + + /* + * If vmm.ko could not be successfully initialized then don't attempt + * to create the virtual machine. + */ + if (!vmm_initialized) + return (ENXIO); + + if (name == NULL || strlen(name) >= VM_MAX_NAMELEN) + return (EINVAL); + + vmspace = vmmops_vmspace_alloc(0, 1ul << 39); + if (vmspace == NULL) + return (ENOMEM); + + vm = malloc(sizeof(struct vm), M_VMM, M_WAITOK | M_ZERO); + strcpy(vm->name, name); + vm->vmspace = vmspace; + sx_init(&vm->mem_segs_lock, "vm mem_segs"); + sx_init(&vm->vcpus_init_lock, "vm vcpus"); + + vm->sockets = 1; + vm->cores = 1; /* XXX backwards compatibility */ + vm->threads = 1; /* XXX backwards compatibility */ + vm->maxcpus = vm_maxcpu; + + vm->vcpu = malloc(sizeof(*vm->vcpu) * vm->maxcpus, M_VMM, + M_WAITOK | M_ZERO); + + vm_init(vm, true); + + *retvm = vm; + return (0); +} + +void +vm_get_topology(struct vm *vm, uint16_t *sockets, uint16_t *cores, + uint16_t *threads, uint16_t *maxcpus) +{ + *sockets = vm->sockets; + *cores = vm->cores; + *threads = vm->threads; + *maxcpus = vm->maxcpus; +} + +uint16_t +vm_get_maxcpus(struct vm *vm) +{ + return (vm->maxcpus); +} + +int +vm_set_topology(struct vm *vm, uint16_t sockets, uint16_t cores, + uint16_t threads, uint16_t maxcpus) +{ + /* Ignore maxcpus. */ + if ((sockets * cores * threads) > vm->maxcpus) + return (EINVAL); + vm->sockets = sockets; + vm->cores = cores; + vm->threads = threads; + return(0); +} + +static void +vm_cleanup(struct vm *vm, bool destroy) +{ + struct mem_map *mm; + int i; + + aplic_detach_from_vm(vm->cookie); + + for (i = 0; i < vm->maxcpus; i++) { + if (vm->vcpu[i] != NULL) + vcpu_cleanup(vm->vcpu[i], destroy); + } + + vmmops_cleanup(vm->cookie); + + /* + * System memory is removed from the guest address space only when + * the VM is destroyed. This is because the mapping remains the same + * across VM reset. + * + * Device memory can be relocated by the guest (e.g. using PCI BARs) + * so those mappings are removed on a VM reset. + */ + if (!destroy) { + for (i = 0; i < VM_MAX_MEMMAPS; i++) { + mm = &vm->mem_maps[i]; + if (destroy || !sysmem_mapping(vm, mm)) + vm_free_memmap(vm, i); + } + } + + if (destroy) { + for (i = 0; i < VM_MAX_MEMSEGS; i++) + vm_free_memseg(vm, i); + + vmmops_vmspace_free(vm->vmspace); + vm->vmspace = NULL; + + for (i = 0; i < vm->maxcpus; i++) + free(vm->vcpu[i], M_VMM); + free(vm->vcpu, M_VMM); + sx_destroy(&vm->vcpus_init_lock); + sx_destroy(&vm->mem_segs_lock); + } +} + +void +vm_destroy(struct vm *vm) +{ + + vm_cleanup(vm, true); + + free(vm, M_VMM); +} + +int +vm_reinit(struct vm *vm) +{ + int error; + + /* + * A virtual machine can be reset only if all vcpus are suspended. + */ + if (CPU_CMP(&vm->suspended_cpus, &vm->active_cpus) == 0) { + vm_cleanup(vm, false); + vm_init(vm, false); + error = 0; + } else { + error = EBUSY; + } + + return (error); +} + +const char * +vm_name(struct vm *vm) +{ + return (vm->name); +} + +void +vm_slock_memsegs(struct vm *vm) +{ + sx_slock(&vm->mem_segs_lock); +} + +void +vm_xlock_memsegs(struct vm *vm) +{ + sx_xlock(&vm->mem_segs_lock); +} + +void +vm_unlock_memsegs(struct vm *vm) +{ + sx_unlock(&vm->mem_segs_lock); +} + +/* + * Return 'true' if 'gpa' is allocated in the guest address space. + * + * This function is called in the context of a running vcpu which acts as + * an implicit lock on 'vm->mem_maps[]'. + */ +bool +vm_mem_allocated(struct vcpu *vcpu, vm_paddr_t gpa) +{ + struct vm *vm = vcpu->vm; + struct mem_map *mm; + int i; + +#ifdef INVARIANTS + int hostcpu, state; + state = vcpu_get_state(vcpu, &hostcpu); + KASSERT(state == VCPU_RUNNING && hostcpu == curcpu, + ("%s: invalid vcpu state %d/%d", __func__, state, hostcpu)); +#endif + + for (i = 0; i < VM_MAX_MEMMAPS; i++) { + mm = &vm->mem_maps[i]; + if (mm->len != 0 && gpa >= mm->gpa && gpa < mm->gpa + mm->len) + return (true); /* 'gpa' is sysmem or devmem */ + } + + return (false); +} + +int +vm_alloc_memseg(struct vm *vm, int ident, size_t len, bool sysmem) +{ + struct mem_seg *seg; + vm_object_t obj; + + sx_assert(&vm->mem_segs_lock, SX_XLOCKED); + + if (ident < 0 || ident >= VM_MAX_MEMSEGS) + return (EINVAL); + + if (len == 0 || (len & PAGE_MASK)) + return (EINVAL); + + seg = &vm->mem_segs[ident]; + if (seg->object != NULL) { + if (seg->len == len && seg->sysmem == sysmem) + return (EEXIST); + else + return (EINVAL); + } + + obj = vm_object_allocate(OBJT_DEFAULT, len >> PAGE_SHIFT); + if (obj == NULL) + return (ENOMEM); + + seg->len = len; + seg->object = obj; + seg->sysmem = sysmem; + return (0); +} + +int +vm_get_memseg(struct vm *vm, int ident, size_t *len, bool *sysmem, + vm_object_t *objptr) +{ + struct mem_seg *seg; + + sx_assert(&vm->mem_segs_lock, SX_LOCKED); + + if (ident < 0 || ident >= VM_MAX_MEMSEGS) + return (EINVAL); + + seg = &vm->mem_segs[ident]; + if (len) + *len = seg->len; + if (sysmem) + *sysmem = seg->sysmem; + if (objptr) + *objptr = seg->object; + return (0); +} + +void +vm_free_memseg(struct vm *vm, int ident) +{ + struct mem_seg *seg; + + KASSERT(ident >= 0 && ident < VM_MAX_MEMSEGS, + ("%s: invalid memseg ident %d", __func__, ident)); + + seg = &vm->mem_segs[ident]; + if (seg->object != NULL) { + vm_object_deallocate(seg->object); + bzero(seg, sizeof(struct mem_seg)); + } +} + +int +vm_mmap_memseg(struct vm *vm, vm_paddr_t gpa, int segid, vm_ooffset_t first, + size_t len, int prot, int flags) +{ + struct mem_seg *seg; + struct mem_map *m, *map; + vm_ooffset_t last; + int i, error; + + dprintf("%s: gpa %lx first %lx len %lx\n", __func__, gpa, first, len); + + if (prot == 0 || (prot & ~(VM_PROT_ALL)) != 0) + return (EINVAL); + + if (flags & ~VM_MEMMAP_F_WIRED) + return (EINVAL); + + if (segid < 0 || segid >= VM_MAX_MEMSEGS) + return (EINVAL); + + seg = &vm->mem_segs[segid]; + if (seg->object == NULL) + return (EINVAL); + + last = first + len; + if (first < 0 || first >= last || last > seg->len) + return (EINVAL); + + if ((gpa | first | last) & PAGE_MASK) + return (EINVAL); + + map = NULL; + for (i = 0; i < VM_MAX_MEMMAPS; i++) { + m = &vm->mem_maps[i]; + if (m->len == 0) { + map = m; + break; + } + } + + if (map == NULL) + return (ENOSPC); + + error = vm_map_find(&vm->vmspace->vm_map, seg->object, first, &gpa, + len, 0, VMFS_NO_SPACE, prot, prot, 0); + if (error != KERN_SUCCESS) + return (EFAULT); + + vm_object_reference(seg->object); + + if (flags & VM_MEMMAP_F_WIRED) { + error = vm_map_wire(&vm->vmspace->vm_map, gpa, gpa + len, + VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES); + if (error != KERN_SUCCESS) { + vm_map_remove(&vm->vmspace->vm_map, gpa, gpa + len); + return (error == KERN_RESOURCE_SHORTAGE ? ENOMEM : + EFAULT); + } + } + + map->gpa = gpa; + map->len = len; + map->segoff = first; + map->segid = segid; + map->prot = prot; + map->flags = flags; + return (0); +} + +int +vm_munmap_memseg(struct vm *vm, vm_paddr_t gpa, size_t len) +{ + struct mem_map *m; + int i; + + dprintf("%s: gpa %lx len %lx\n", __func__, gpa, len); + + for (i = 0; i < VM_MAX_MEMMAPS; i++) { + m = &vm->mem_maps[i]; + if (m->gpa == gpa && m->len == len) { + vm_free_memmap(vm, i); + return (0); + } + } + + return (EINVAL); +} + +int +vm_mmap_getnext(struct vm *vm, vm_paddr_t *gpa, int *segid, + vm_ooffset_t *segoff, size_t *len, int *prot, int *flags) +{ + struct mem_map *mm, *mmnext; + int i; + + mmnext = NULL; + for (i = 0; i < VM_MAX_MEMMAPS; i++) { + mm = &vm->mem_maps[i]; + if (mm->len == 0 || mm->gpa < *gpa) + continue; + if (mmnext == NULL || mm->gpa < mmnext->gpa) + mmnext = mm; + } + + if (mmnext != NULL) { + *gpa = mmnext->gpa; + if (segid) + *segid = mmnext->segid; + if (segoff) + *segoff = mmnext->segoff; + if (len) + *len = mmnext->len; + if (prot) + *prot = mmnext->prot; + if (flags) + *flags = mmnext->flags; + return (0); + } else { + return (ENOENT); + } +} + +static void +vm_free_memmap(struct vm *vm, int ident) +{ + struct mem_map *mm; + int error __diagused; + + mm = &vm->mem_maps[ident]; + if (mm->len) { + error = vm_map_remove(&vm->vmspace->vm_map, mm->gpa, + mm->gpa + mm->len); + KASSERT(error == KERN_SUCCESS, ("%s: vm_map_remove error %d", + __func__, error)); + bzero(mm, sizeof(struct mem_map)); + } +} + +static __inline bool +sysmem_mapping(struct vm *vm, struct mem_map *mm) +{ + + if (mm->len != 0 && vm->mem_segs[mm->segid].sysmem) + return (true); + else + return (false); +} + +vm_paddr_t +vmm_sysmem_maxaddr(struct vm *vm) +{ + struct mem_map *mm; + vm_paddr_t maxaddr; + int i; + + maxaddr = 0; + for (i = 0; i < VM_MAX_MEMMAPS; i++) { + mm = &vm->mem_maps[i]; + if (sysmem_mapping(vm, mm)) { + if (maxaddr < mm->gpa + mm->len) + maxaddr = mm->gpa + mm->len; + } + } + return (maxaddr); +} + +int +vm_gla2gpa_nofault(struct vcpu *vcpu, struct vm_guest_paging *paging, + uint64_t gla, int prot, uint64_t *gpa, int *is_fault) +{ + + vmmops_gla2gpa(vcpu->cookie, paging, gla, prot, gpa, is_fault); + return (0); +} + +void +vm_register_inst_handler(struct vm *vm, uint64_t start, uint64_t size, + mem_region_read_t mmio_read, mem_region_write_t mmio_write) +{ + int i; + + for (i = 0; i < nitems(vm->mmio_region); i++) { + if (vm->mmio_region[i].start == 0 && + vm->mmio_region[i].end == 0) { + vm->mmio_region[i].start = start; + vm->mmio_region[i].end = start + size; + vm->mmio_region[i].read = mmio_read; + vm->mmio_region[i].write = mmio_write; + return; + } + } + + panic("%s: No free MMIO region", __func__); +} + +void +vm_deregister_inst_handler(struct vm *vm, uint64_t start, uint64_t size) +{ + int i; + + for (i = 0; i < nitems(vm->mmio_region); i++) { + if (vm->mmio_region[i].start == start && + vm->mmio_region[i].end == start + size) { + memset(&vm->mmio_region[i], 0, + sizeof(vm->mmio_region[i])); + return; + } + } + + panic("%s: Invalid MMIO region: %lx - %lx", __func__, start, + start + size); +} + +static int +vm_handle_inst_emul(struct vcpu *vcpu, bool *retu) +{ + struct vm *vm; + struct vm_exit *vme; + struct vie *vie; + struct hyp *hyp; + uint64_t fault_ipa; + struct vm_guest_paging *paging; + struct vmm_mmio_region *vmr; + int error, i; + + vm = vcpu->vm; + hyp = vm->cookie; + if (!hyp->aplic_attached) + goto out_user; + + vme = &vcpu->exitinfo; + vie = &vme->u.inst_emul.vie; + paging = &vme->u.inst_emul.paging; + + fault_ipa = vme->u.inst_emul.gpa; + + vmr = NULL; + for (i = 0; i < nitems(vm->mmio_region); i++) { + if (vm->mmio_region[i].start <= fault_ipa && + vm->mmio_region[i].end > fault_ipa) { + vmr = &vm->mmio_region[i]; + break; + } + } + if (vmr == NULL) + goto out_user; + + error = vmm_emulate_instruction(vcpu, fault_ipa, vie, paging, + vmr->read, vmr->write, retu); + return (error); + +out_user: + *retu = true; + return (0); +} + +int +vm_suspend(struct vm *vm, enum vm_suspend_how how) +{ + int i; + + if (how <= VM_SUSPEND_NONE || how >= VM_SUSPEND_LAST) + return (EINVAL); + + if (atomic_cmpset_int(&vm->suspend, 0, how) == 0) { + VM_CTR2(vm, "virtual machine already suspended %d/%d", + vm->suspend, how); + return (EALREADY); + } + + VM_CTR1(vm, "virtual machine successfully suspended %d", how); + + /* + * Notify all active vcpus that they are now suspended. + */ + for (i = 0; i < vm->maxcpus; i++) { + if (CPU_ISSET(i, &vm->active_cpus)) + vcpu_notify_event(vm_vcpu(vm, i)); + } + + return (0); +} + +void +vm_exit_suspended(struct vcpu *vcpu, uint64_t pc) +{ + struct vm *vm = vcpu->vm; + struct vm_exit *vmexit; + + KASSERT(vm->suspend > VM_SUSPEND_NONE && vm->suspend < VM_SUSPEND_LAST, + ("vm_exit_suspended: invalid suspend type %d", vm->suspend)); + + vmexit = vm_exitinfo(vcpu); + vmexit->pc = pc; + vmexit->inst_length = 4; + vmexit->exitcode = VM_EXITCODE_SUSPENDED; + vmexit->u.suspended.how = vm->suspend; +} + +void +vm_exit_debug(struct vcpu *vcpu, uint64_t pc) +{ + struct vm_exit *vmexit; + + vmexit = vm_exitinfo(vcpu); + vmexit->pc = pc; + vmexit->inst_length = 4; + vmexit->exitcode = VM_EXITCODE_DEBUG; +} + +int +vm_activate_cpu(struct vcpu *vcpu) +{ + struct vm *vm = vcpu->vm; + + if (CPU_ISSET(vcpu->vcpuid, &vm->active_cpus)) + return (EBUSY); + + CPU_SET_ATOMIC(vcpu->vcpuid, &vm->active_cpus); + return (0); + +} + +int +vm_suspend_cpu(struct vm *vm, struct vcpu *vcpu) +{ + if (vcpu == NULL) { + vm->debug_cpus = vm->active_cpus; + for (int i = 0; i < vm->maxcpus; i++) { + if (CPU_ISSET(i, &vm->active_cpus)) + vcpu_notify_event(vm_vcpu(vm, i)); + } + } else { + if (!CPU_ISSET(vcpu->vcpuid, &vm->active_cpus)) + return (EINVAL); + + CPU_SET_ATOMIC(vcpu->vcpuid, &vm->debug_cpus); + vcpu_notify_event(vcpu); + } + return (0); +} + +int +vm_resume_cpu(struct vm *vm, struct vcpu *vcpu) +{ + + if (vcpu == NULL) { + CPU_ZERO(&vm->debug_cpus); + } else { + if (!CPU_ISSET(vcpu->vcpuid, &vm->debug_cpus)) + return (EINVAL); + + CPU_CLR_ATOMIC(vcpu->vcpuid, &vm->debug_cpus); + } + return (0); +} + +int +vcpu_debugged(struct vcpu *vcpu) +{ + + return (CPU_ISSET(vcpu->vcpuid, &vcpu->vm->debug_cpus)); +} + +cpuset_t +vm_active_cpus(struct vm *vm) +{ + + return (vm->active_cpus); +} + +cpuset_t +vm_debug_cpus(struct vm *vm) +{ + + return (vm->debug_cpus); +} + +cpuset_t +vm_suspended_cpus(struct vm *vm) +{ + + return (vm->suspended_cpus); +} + + +void * +vcpu_stats(struct vcpu *vcpu) +{ + + return (vcpu->stats); +} + +/* + * This function is called to ensure that a vcpu "sees" a pending event + * as soon as possible: + * - If the vcpu thread is sleeping then it is woken up. + * - If the vcpu is running on a different host_cpu then an IPI will be directed + * to the host_cpu to cause the vcpu to trap into the hypervisor. + */ +static void +vcpu_notify_event_locked(struct vcpu *vcpu) +{ + int hostcpu; + + hostcpu = vcpu->hostcpu; + if (vcpu->state == VCPU_RUNNING) { + KASSERT(hostcpu != NOCPU, ("vcpu running on invalid hostcpu")); + if (hostcpu != curcpu) { + ipi_cpu(hostcpu, vmm_ipinum); + } else { + /* + * If the 'vcpu' is running on 'curcpu' then it must + * be sending a notification to itself (e.g. SELF_IPI). + * The pending event will be picked up when the vcpu + * transitions back to guest context. + */ + } + } else { + KASSERT(hostcpu == NOCPU, ("vcpu state %d not consistent " + "with hostcpu %d", vcpu->state, hostcpu)); + if (vcpu->state == VCPU_SLEEPING) + wakeup_one(vcpu); + } +} + +void +vcpu_notify_event(struct vcpu *vcpu) +{ + vcpu_lock(vcpu); + vcpu_notify_event_locked(vcpu); + vcpu_unlock(vcpu); +} + +static void +restore_guest_fpustate(struct vcpu *vcpu) +{ + + /* Flush host state to the pcb. */ + fpe_state_save(curthread); + + /* Ensure the VFP state will be re-loaded when exiting the guest. */ + PCPU_SET(fpcurthread, NULL); + + /* restore guest FPU state */ + fpe_enable(); + fpe_restore(vcpu->guestfpu); + + /* + * The FPU is now "dirty" with the guest's state so turn on emulation + * to trap any access to the FPU by the host. + */ + fpe_disable(); +} + +static void +save_guest_fpustate(struct vcpu *vcpu) +{ + + /* Save guest FPE state. */ + fpe_enable(); + fpe_store(vcpu->guestfpu); + fpe_disable(); + + KASSERT(PCPU_GET(fpcurthread) == NULL, + ("%s: fpcurthread set with guest registers", __func__)); +} + +static int +vcpu_set_state_locked(struct vcpu *vcpu, enum vcpu_state newstate, + bool from_idle) +{ + int error; + + vcpu_assert_locked(vcpu); + + /* + * State transitions from the vmmdev_ioctl() must always begin from + * the VCPU_IDLE state. This guarantees that there is only a single + * ioctl() operating on a vcpu at any point. + */ + if (from_idle) { + while (vcpu->state != VCPU_IDLE) { + vcpu_notify_event_locked(vcpu); + msleep_spin(&vcpu->state, &vcpu->mtx, "vmstat", + hz / 1000); + } + } else { + KASSERT(vcpu->state != VCPU_IDLE, ("invalid transition from " + "vcpu idle state")); + } + + if (vcpu->state == VCPU_RUNNING) { + KASSERT(vcpu->hostcpu == curcpu, ("curcpu %d and hostcpu %d " + "mismatch for running vcpu", curcpu, vcpu->hostcpu)); + } else { + KASSERT(vcpu->hostcpu == NOCPU, ("Invalid hostcpu %d for a " + "vcpu that is not running", vcpu->hostcpu)); + } + + /* + * The following state transitions are allowed: + * IDLE -> FROZEN -> IDLE + * FROZEN -> RUNNING -> FROZEN + * FROZEN -> SLEEPING -> FROZEN + */ + switch (vcpu->state) { + case VCPU_IDLE: + case VCPU_RUNNING: + case VCPU_SLEEPING: + error = (newstate != VCPU_FROZEN); + break; + case VCPU_FROZEN: + error = (newstate == VCPU_FROZEN); + break; + default: + error = 1; + break; + } + + if (error) + return (EBUSY); + + vcpu->state = newstate; + if (newstate == VCPU_RUNNING) + vcpu->hostcpu = curcpu; + else + vcpu->hostcpu = NOCPU; + + if (newstate == VCPU_IDLE) + wakeup(&vcpu->state); + + return (0); +} + +static void +vcpu_require_state(struct vcpu *vcpu, enum vcpu_state newstate) +{ + int error; + + if ((error = vcpu_set_state(vcpu, newstate, false)) != 0) + panic("Error %d setting state to %d\n", error, newstate); +} + +static void +vcpu_require_state_locked(struct vcpu *vcpu, enum vcpu_state newstate) +{ + int error; + + if ((error = vcpu_set_state_locked(vcpu, newstate, false)) != 0) + panic("Error %d setting state to %d", error, newstate); +} + +int +vm_get_capability(struct vcpu *vcpu, int type, int *retval) +{ + + if (type < 0 || type >= VM_CAP_MAX) + return (EINVAL); + + return (vmmops_getcap(vcpu->cookie, type, retval)); +} + +int +vm_set_capability(struct vcpu *vcpu, int type, int val) +{ + + if (type < 0 || type >= VM_CAP_MAX) + return (EINVAL); + + return (vmmops_setcap(vcpu->cookie, type, val)); +} + +struct vm * +vcpu_vm(struct vcpu *vcpu) +{ + + return (vcpu->vm); +} + +int +vcpu_vcpuid(struct vcpu *vcpu) +{ + + return (vcpu->vcpuid); +} + +void * +vcpu_get_cookie(struct vcpu *vcpu) +{ + + return (vcpu->cookie); +} + +struct vcpu * +vm_vcpu(struct vm *vm, int vcpuid) +{ + + return (vm->vcpu[vcpuid]); +} + +int +vcpu_set_state(struct vcpu *vcpu, enum vcpu_state newstate, bool from_idle) +{ + int error; + + vcpu_lock(vcpu); + error = vcpu_set_state_locked(vcpu, newstate, from_idle); + vcpu_unlock(vcpu); + + return (error); +} + +enum vcpu_state +vcpu_get_state(struct vcpu *vcpu, int *hostcpu) +{ + enum vcpu_state state; + + vcpu_lock(vcpu); + state = vcpu->state; + if (hostcpu != NULL) + *hostcpu = vcpu->hostcpu; + vcpu_unlock(vcpu); + + return (state); +} + +static void * +_vm_gpa_hold(struct vm *vm, vm_paddr_t gpa, size_t len, int reqprot, + void **cookie) +{ + int i, count, pageoff; + struct mem_map *mm; + vm_page_t m; + + pageoff = gpa & PAGE_MASK; + if (len > PAGE_SIZE - pageoff) + panic("vm_gpa_hold: invalid gpa/len: 0x%016lx/%lu", gpa, len); + + count = 0; + for (i = 0; i < VM_MAX_MEMMAPS; i++) { + mm = &vm->mem_maps[i]; + if (sysmem_mapping(vm, mm) && gpa >= mm->gpa && + gpa < mm->gpa + mm->len) { + count = vm_fault_quick_hold_pages(&vm->vmspace->vm_map, + trunc_page(gpa), PAGE_SIZE, reqprot, &m, 1); + break; + } + } + + if (count == 1) { + *cookie = m; + return ((void *)(PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)) + pageoff)); + } else { + *cookie = NULL; + return (NULL); + } +} + +void * +vm_gpa_hold(struct vcpu *vcpu, vm_paddr_t gpa, size_t len, int reqprot, + void **cookie) +{ +#ifdef INVARIANTS + /* + * The current vcpu should be frozen to ensure 'vm_memmap[]' + * stability. + */ + int state = vcpu_get_state(vcpu, NULL); + KASSERT(state == VCPU_FROZEN, ("%s: invalid vcpu state %d", + __func__, state)); +#endif + return (_vm_gpa_hold(vcpu->vm, gpa, len, reqprot, cookie)); +} + +void * +vm_gpa_hold_global(struct vm *vm, vm_paddr_t gpa, size_t len, int reqprot, + void **cookie) +{ + sx_assert(&vm->mem_segs_lock, SX_LOCKED); + return (_vm_gpa_hold(vm, gpa, len, reqprot, cookie)); +} + +void +vm_gpa_release(void *cookie) +{ + vm_page_t m = cookie; + + vm_page_unwire(m, PQ_ACTIVE); +} + +int +vm_get_register(struct vcpu *vcpu, int reg, uint64_t *retval) +{ + + if (reg >= VM_REG_LAST) + return (EINVAL); + + return (vmmops_getreg(vcpu->cookie, reg, retval)); +} + +int +vm_set_register(struct vcpu *vcpu, int reg, uint64_t val) +{ + int error; + + if (reg >= VM_REG_LAST) + return (EINVAL); + error = vmmops_setreg(vcpu->cookie, reg, val); + if (error || reg != VM_REG_GUEST_SEPC) + return (error); + + vcpu->nextpc = val; + + return (0); +} + +void * +vm_get_cookie(struct vm *vm) +{ + + return (vm->cookie); +} + +int +vm_inject_exception(struct vcpu *vcpu, uint64_t scause) +{ + + return (vmmops_exception(vcpu->cookie, scause)); +} + +int +vm_attach_aplic(struct vm *vm, struct vm_aplic_descr *descr) +{ + + return (aplic_attach_to_vm(vm->cookie, descr)); +} + +int +vm_assert_irq(struct vm *vm, uint32_t irq) +{ + + return (aplic_inject_irq(vm->cookie, -1, irq, true)); +} + +int +vm_deassert_irq(struct vm *vm, uint32_t irq) +{ + + return (aplic_inject_irq(vm->cookie, -1, irq, false)); +} + +int +vm_raise_msi(struct vm *vm, uint64_t msg, uint64_t addr, int bus, int slot, + int func) +{ + + return (aplic_inject_msi(vm->cookie, msg, addr)); +} + +static int +vm_handle_wfi(struct vcpu *vcpu, struct vm_exit *vme, bool *retu) +{ + + vcpu_lock(vcpu); + + while (1) { + if (aplic_check_pending(vcpu->cookie)) + break; + + if (vcpu_should_yield(vcpu)) + break; + + vcpu_require_state_locked(vcpu, VCPU_SLEEPING); + /* + * XXX msleep_spin() cannot be interrupted by signals so + * wake up periodically to check pending signals. + */ + msleep_spin(vcpu, &vcpu->mtx, "vmidle", hz / 1000); + vcpu_require_state_locked(vcpu, VCPU_FROZEN); + } + vcpu_unlock(vcpu); + + *retu = false; + + return (0); +} + +static int +vm_handle_paging(struct vcpu *vcpu, bool *retu) +{ + struct vm *vm; + struct vm_exit *vme; + struct vm_map *map; + uint64_t addr; + pmap_t pmap; + int ftype, rv; + + vm = vcpu->vm; + vme = &vcpu->exitinfo; + + pmap = vmspace_pmap(vm->vmspace); + addr = (vme->htval << 2) & ~(PAGE_SIZE - 1); + + dprintf("%s: %lx\n", __func__, addr); + + switch (vme->scause) { + case SCAUSE_STORE_GUEST_PAGE_FAULT: + ftype = VM_PROT_WRITE; + break; + case SCAUSE_FETCH_GUEST_PAGE_FAULT: + ftype = VM_PROT_EXECUTE; + break; + case SCAUSE_LOAD_GUEST_PAGE_FAULT: + ftype = VM_PROT_READ; + break; + default: + panic("unknown page trap: %lu", vme->scause); + } + + /* The page exists, but the page table needs to be updated. */ + if (pmap_fault(pmap, addr, ftype) != KERN_SUCCESS) { + //printf("%s: pmap_fault failed\n", __func__); + return (0); + } + + map = &vm->vmspace->vm_map; + rv = vm_fault(map, addr, ftype, VM_FAULT_NORMAL, NULL); + if (rv != KERN_SUCCESS) { + printf("%s: vm_fault failed, addr %lx, ftype %d, err %d\n", + __func__, addr, ftype, rv); + return (EFAULT); + } + + return (0); +} + +int +vm_run(struct vcpu *vcpu) +{ + struct vm_eventinfo evinfo; + struct vm_exit *vme; + struct vm *vm; + struct hypctx *hypctx; + pmap_t pmap; + int error; + int vcpuid; + int i; + bool retu; + + vm = vcpu->vm; + + dprintf("%s\n", __func__); + + vcpuid = vcpu->vcpuid; + + if (!CPU_ISSET(vcpuid, &vm->active_cpus)) + return (EINVAL); + + if (CPU_ISSET(vcpuid, &vm->suspended_cpus)) + return (EINVAL); + + pmap = vmspace_pmap(vm->vmspace); + vme = &vcpu->exitinfo; + evinfo.rptr = NULL; + evinfo.sptr = &vm->suspend; + evinfo.iptr = NULL; +restart: + critical_enter(); + + restore_guest_fpustate(vcpu); + + vcpu_require_state(vcpu, VCPU_RUNNING); + error = vmmops_run(vcpu->cookie, vcpu->nextpc, pmap, &evinfo); + vcpu_require_state(vcpu, VCPU_FROZEN); + + save_guest_fpustate(vcpu); + + critical_exit(); + + if (error == 0) { + retu = false; + switch (vme->exitcode) { + case VM_EXITCODE_INST_EMUL: + vcpu->nextpc = vme->pc + vme->inst_length; + error = vm_handle_inst_emul(vcpu, &retu); + break; + case VM_EXITCODE_WFI: + vcpu->nextpc = vme->pc + vme->inst_length; + error = vm_handle_wfi(vcpu, vme, &retu); + break; + case VM_EXITCODE_ECALL: + /* Handle in userland. */ + vcpu->nextpc = vme->pc + vme->inst_length; + error = vmm_sbi_ecall(vcpu, &retu); + if (retu == true) { + hypctx = vcpu_get_cookie(vcpu); + for (i = 0; i < nitems(vme->u.ecall.args); i++) + vme->u.ecall.args[i] = + hypctx->guest_regs.hyp_a[i]; + } + break; + case VM_EXITCODE_PAGING: + vcpu->nextpc = vme->pc; + error = vm_handle_paging(vcpu, &retu); + break; + default: + /* Handle in userland. */ + vcpu->nextpc = vme->pc; + retu = true; + break; + } + } + + if (error == 0 && retu == false) + goto restart; + + return (error); +} Index: sys/riscv/vmm/vmm_aplic.h =================================================================== --- /dev/null +++ sys/riscv/vmm/vmm_aplic.h @@ -0,0 +1,52 @@ +/*- + * Copyright (c) 2024 Ruslan Bukin + * + * This software was developed by the University of Cambridge Computer + * Laboratory (Department of Computer Science and Technology) under Innovate + * UK project 105694, "Digital Security by Design (DSbD) Technology Platform + * Prototype". + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef _VMM_APLIC_H_ +#define _VMM_APLIC_H_ + +struct hyp; +struct hypctx; +struct vm_aplic_descr; + +int aplic_attach_to_vm(struct hyp *hyp, struct vm_aplic_descr *descr); +void aplic_detach_from_vm(struct hyp *hyp); +int aplic_inject_irq(struct hyp *hyp, int vcpuid, uint32_t irqid, bool level); +int aplic_inject_msi(struct hyp *hyp, uint64_t msg, uint64_t addr); +void aplic_vminit(struct hyp *hyp); +void aplic_vmcleanup(struct hyp *hyp); +int aplic_check_pending(struct hypctx *hypctx); + +void aplic_cpuinit(struct hypctx *hypctx); +void aplic_cpucleanup(struct hypctx *hypctx); +void aplic_flush_hwstate(struct hypctx *hypctx); +void aplic_sync_hwstate(struct hypctx *hypctx); +int aplic_max_cpu_count(struct hyp *hyp); + +#endif /* !_VMM_APLIC_H_ */ Index: sys/riscv/vmm/vmm_aplic.c =================================================================== --- /dev/null +++ sys/riscv/vmm/vmm_aplic.c @@ -0,0 +1,461 @@ +/*- + * Copyright (c) 2024 Ruslan Bukin + * + * This software was developed by the University of Cambridge Computer + * Laboratory (Department of Computer Science and Technology) under Innovate + * UK project 105694, "Digital Security by Design (DSbD) Technology Platform + * Prototype". + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include + +MALLOC_DEFINE(M_APLIC, "RISC-V VMM APLIC", "RISC-V AIA APLIC"); + +#define APLIC_DOMAINCFG 0x0000 +#define DOMAINCFG_IE (1 << 8) /* Interrupt Enable. */ +#define DOMAINCFG_DM (1 << 2) /* Direct Mode. */ +#define DOMAINCFG_BE (1 << 0) /* Big-Endian. */ +#define APLIC_SOURCECFG(x) (0x0004 + ((x) - 1) * 4) +#define SOURCECFG_D (1 << 10) /* D - Delegate. */ +/* If D == 0. */ +#define SOURCECFG_SM_S (0) +#define SOURCECFG_SM_M (0x7 << SOURCECFG_SM_S) +#define SOURCECFG_SM_INACTIVE (0) /* Not delegated. */ +#define SOURCECFG_SM_DETACHED (1) +#define SOURCECFG_SM_RESERVED (2) +#define SOURCECFG_SM_RESERVED1 (3) +#define SOURCECFG_SM_EDGE1 (4) /* Rising edge. */ +#define SOURCECFG_SM_EDGE0 (5) /* Falling edge. */ +#define SOURCECFG_SM_LEVEL1 (6) /* High. */ +#define SOURCECFG_SM_LEVEL0 (7) /* Low. */ +/* If D == 1. */ +#define SOURCECFG_CHILD_INDEX_S (0) +#define SOURCECFG_CHILD_INDEX_M (0x3ff << SOURCECFG_CHILD_INDEX_S) +#define APLIC_SETIPNUM 0x1cdc +#define APLIC_CLRIPNUM 0x1ddc +#define APLIC_SETIENUM 0x1edc +#define APLIC_CLRIENUM 0x1fdc +#define APLIC_GENMSI 0x3000 +#define APLIC_TARGET(x) (0x3004 + ((x) - 1) * 4) +#define APLIC_IDC(x) (0x4000 + (x) * 32) +#define IDC_IDELIVERY(x) (APLIC_IDC(x) + 0x0) +#define IDC_IFORCE(x) (APLIC_IDC(x) + 0x4) +#define IDC_ITHRESHOLD(x) (APLIC_IDC(x) + 0x8) +#define IDC_TOPI(x) (APLIC_IDC(x) + 0x18) +#define IDC_CLAIMI(x) (APLIC_IDC(x) + 0x1C) +#define CLAIMI_IRQ_S (16) +#define CLAIMI_IRQ_M (0x3ff << CLAIMI_IRQ_S) +#define CLAIMI_PRIO_S (0) +#define CLAIMI_PRIO_M (0xff << CLAIMI_PRIO_S) + +struct aplic_irq { + uint32_t sourcecfg; + uint32_t state; +#define APLIC_IRQ_STATE_PENDING (1 << 0) +#define APLIC_IRQ_STATE_ENABLED (1 << 1) + uint32_t target; +}; + +struct aplic { + uint32_t mem_start; + uint32_t mem_end; + struct mtx mtx; + struct aplic_irq *irqs; + int nirqs; + uint32_t domaincfg; +}; + +static int +aplic_handle_sourcecfg(struct aplic *aplic, int i, bool write, uint64_t *val) +{ + struct aplic_irq *irq; + + irq = &aplic->irqs[i]; + if (write) + irq->sourcecfg = *val; + else + *val = irq->sourcecfg; + + return (0); +} + +static int +aplic_set_enabled(struct aplic *aplic, bool write, uint64_t *val, bool enabled) +{ + struct aplic_irq *irq; + int i; + + if (!write) { + *val = 0; + return (0); + } + + i = *val; + if (i <= 0 || i > aplic->nirqs) + return (-1); + + irq = &aplic->irqs[i]; + + if (enabled) + irq->state |= APLIC_IRQ_STATE_ENABLED; + else + irq->state &= ~APLIC_IRQ_STATE_ENABLED; + + return (0); +} + +static int +aplic_handle_target(struct aplic *aplic, int i, bool write, uint64_t *val) +{ + + printf("%s: i %d\n", __func__, i); + + return (0); +} + +static int +aplic_handle_idc_claimi(struct aplic *aplic, int cpu, bool write, uint64_t *val) +{ + struct aplic_irq *irq; + int i; + + /* Writes to claimi are ignored. */ + if (write) + return (-1); + + for (i = 0; i < aplic->nirqs; i++) { + irq = &aplic->irqs[i]; + if (irq->state & APLIC_IRQ_STATE_PENDING) { + *val = (i << CLAIMI_IRQ_S) | (0 << CLAIMI_PRIO_S); + irq->state &= ~APLIC_IRQ_STATE_PENDING; + return (0); + } + } + + panic("claimi without pending"); + + return (0); +} + +static int +aplic_handle_idc(struct aplic *aplic, int cpu, int reg, bool write, + uint64_t *val) +{ + int error; + + switch (reg + APLIC_IDC(0)) { + case IDC_IDELIVERY(0): + case IDC_IFORCE(0): + case IDC_ITHRESHOLD(0): + case IDC_TOPI(0): + error = 0; + break; + case IDC_CLAIMI(0): + error = aplic_handle_idc_claimi(aplic, cpu, write, val); + break; + default: + panic("unknown reg"); + } + + return (error); +} + +static int +aplic_mmio_access(struct aplic *aplic, uint64_t reg, bool write, uint64_t *val) +{ + int error; + int cpu; + int r; + int i; + + if ((reg >= APLIC_SOURCECFG(1)) && + (reg <= APLIC_SOURCECFG(aplic->nirqs))) { + i = ((reg - APLIC_SOURCECFG(1)) >> 2) + 1; + error = aplic_handle_sourcecfg(aplic, i, write, val); + return (error); + } + + if ((reg >= APLIC_TARGET(1)) && (reg <= APLIC_TARGET(aplic->nirqs))) { + i = (reg - APLIC_TARGET(1)) >> 2; + error = aplic_handle_target(aplic, i, write, val); + return (error); + } + + if ((reg >= APLIC_IDC(0)) && (reg < APLIC_IDC(mp_ncpus))) { + cpu = (reg - APLIC_IDC(0)) >> 5; + r = (reg - APLIC_IDC(0)) % 32; + error = aplic_handle_idc(aplic, cpu, r, write, val); + return (error); + } + + switch (reg) { + case APLIC_DOMAINCFG: + aplic->domaincfg = *val & DOMAINCFG_IE; + break; + case APLIC_SETIENUM: + aplic_set_enabled(aplic, write, val, true); + break; + case APLIC_CLRIENUM: + aplic_set_enabled(aplic, write, val, false); + break; + default: + panic("unknown reg %lx", reg); + break; + }; + + return (0); +} + +static int +mem_read(struct vcpu *vcpu, uint64_t fault_ipa, uint64_t *rval, int size, + void *arg) +{ + struct hypctx *hypctx; + struct hyp *hyp; + struct aplic *aplic; + uint64_t reg; + uint64_t val; + int error; + + hypctx = vcpu_get_cookie(vcpu); + hyp = hypctx->hyp; + aplic = hyp->aplic; + + dprintf("%s: fault_ipa %lx size %d\n", __func__, fault_ipa, size); + + if (fault_ipa < aplic->mem_start || fault_ipa + size > aplic->mem_end) + return (EINVAL); + + reg = fault_ipa - aplic->mem_start; + + error = aplic_mmio_access(aplic, reg, false, &val); + if (error == 0) + *rval = val; + + return (error); +} + +static int +mem_write(struct vcpu *vcpu, uint64_t fault_ipa, uint64_t wval, int size, + void *arg) +{ + struct hypctx *hypctx; + struct hyp *hyp; + struct aplic *aplic; + uint64_t reg; + uint64_t val; + int error; + + hypctx = vcpu_get_cookie(vcpu); + hyp = hypctx->hyp; + aplic = hyp->aplic; + + dprintf("%s: fault_ipa %lx wval %lx size %d\n", __func__, fault_ipa, + wval, size); + + if (fault_ipa < aplic->mem_start || fault_ipa + size > aplic->mem_end) + return (EINVAL); + + reg = fault_ipa - aplic->mem_start; + + val = wval; + + error = aplic_mmio_access(aplic, reg, true, &val); + + return (error); +} + +void +aplic_vminit(struct hyp *hyp) +{ + struct aplic *aplic; + + hyp->aplic = malloc(sizeof(*hyp->aplic), M_APLIC, + M_WAITOK | M_ZERO); + aplic = hyp->aplic; + + mtx_init(&aplic->mtx, "APLIC lock", NULL, MTX_SPIN); +} + +void +aplic_vmcleanup(struct hyp *hyp) +{ + struct aplic *aplic; + + aplic = hyp->aplic; + + mtx_destroy(&aplic->mtx); + + free(hyp->aplic, M_APLIC); +} + +int +aplic_attach_to_vm(struct hyp *hyp, struct vm_aplic_descr *descr) +{ + struct aplic *aplic; + struct vm *vm; + + vm = hyp->vm; + + printf("%s\n", __func__); + + vm_register_inst_handler(vm, descr->mem_start, descr->mem_size, + mem_read, mem_write); + + aplic = hyp->aplic; + aplic->nirqs = 63; + aplic->mem_start = descr->mem_start; + aplic->mem_end = descr->mem_start + descr->mem_size; + aplic->irqs = malloc(sizeof(struct aplic_irq) * aplic->nirqs, M_APLIC, + M_WAITOK | M_ZERO); + + hyp->aplic_attached = true; + + return (0); +} + +void +aplic_detach_from_vm(struct hyp *hyp) +{ + struct aplic *aplic; + + aplic = hyp->aplic; + + printf("%s\n", __func__); + + if (hyp->aplic_attached) { + hyp->aplic_attached = false; + + free(aplic->irqs, M_APLIC); + } +} + +int +aplic_check_pending(struct hypctx *hypctx) +{ + struct aplic_irq *irq; + struct aplic *aplic; + struct hyp *hyp; + int i; + + hyp = hypctx->hyp; + aplic = hyp->aplic; + if ((aplic->domaincfg & DOMAINCFG_IE) == 0) + return (0); + + for (i = 0; i < aplic->nirqs; i++) { + irq = &aplic->irqs[i]; + if (irq->state & APLIC_IRQ_STATE_PENDING) + return (1); + } + + return (0); +} + +int +aplic_inject_irq(struct hyp *hyp, int vcpuid, uint32_t irqid, bool level) +{ + struct aplic_irq *irq; + struct aplic *aplic; + + aplic = hyp->aplic; + if ((aplic->domaincfg & DOMAINCFG_IE) == 0) + return (0); + + irq = &aplic->irqs[irqid]; + if (irq->sourcecfg & SOURCECFG_D) + return (0); + + switch (irq->sourcecfg & SOURCECFG_SM_M) { + case SOURCECFG_SM_EDGE1: + if (level) + irq->state |= APLIC_IRQ_STATE_PENDING; + else + irq->state &= ~APLIC_IRQ_STATE_PENDING; + break; + default: + break; + } + + return (0); +} + +int +aplic_inject_msi(struct hyp *hyp, uint64_t msg, uint64_t addr) +{ + + /* TODO. */ + + return (ENXIO); +} + +void +aplic_cpuinit(struct hypctx *hypctx) +{ + +} + +void +aplic_cpucleanup(struct hypctx *hypctx) +{ + +} + +void +aplic_flush_hwstate(struct hypctx *hypctx) +{ + +} + +void +aplic_sync_hwstate(struct hypctx *hypctx) +{ + +} + +int +aplic_max_cpu_count(struct hyp *hyp) +{ + int16_t max_count; + + max_count = vm_get_maxcpus(hyp->vm); + + return (max_count); +} Index: sys/riscv/vmm/vmm_dev.c =================================================================== --- /dev/null +++ sys/riscv/vmm/vmm_dev.c @@ -0,0 +1,1052 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2011 NetApp, Inc. + * Copyright (c) 2015 Mihai Carabas + * All rights reserved. + * Copyright (c) 2024 Ruslan Bukin + * + * This software was developed by the University of Cambridge Computer + * Laboratory (Department of Computer Science and Technology) under Innovate + * UK project 105694, "Digital Security by Design (DSbD) Technology Platform + * Prototype". + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include +#include +#include +#include + +#include "riscv.h" +#include "vmm_stat.h" +#include "vmm_aplic.h" + +struct devmem_softc { + int segid; + char *name; + struct cdev *cdev; + struct vmmdev_softc *sc; + SLIST_ENTRY(devmem_softc) link; +}; + +struct vmmdev_softc { + struct vm *vm; /* vm instance cookie */ + struct cdev *cdev; + struct ucred *ucred; + SLIST_ENTRY(vmmdev_softc) link; + SLIST_HEAD(, devmem_softc) devmem; + int flags; +}; +#define VSC_LINKED 0x01 + +static SLIST_HEAD(, vmmdev_softc) head; + +static unsigned pr_allow_flag; +static struct mtx vmmdev_mtx; +MTX_SYSINIT(vmmdev_mtx, &vmmdev_mtx, "vmm device mutex", MTX_DEF); + +static MALLOC_DEFINE(M_VMMDEV, "vmmdev", "vmmdev"); + +SYSCTL_DECL(_hw_vmm); + +static int vmm_priv_check(struct ucred *ucred); +static int devmem_create_cdev(const char *vmname, int id, char *devmem); +static void devmem_destroy(void *arg); + +static int +vmm_priv_check(struct ucred *ucred) +{ + + if (jailed(ucred) && + !(ucred->cr_prison->pr_allow & pr_allow_flag)) + return (EPERM); + + return (0); +} + +static int +vcpu_lock_one(struct vcpu *vcpu) +{ + int error; + + error = vcpu_set_state(vcpu, VCPU_FROZEN, true); + return (error); +} + +static void +vcpu_unlock_one(struct vcpu *vcpu) +{ + enum vcpu_state state; + + state = vcpu_get_state(vcpu, NULL); + if (state != VCPU_FROZEN) { + panic("vcpu %s(%d) has invalid state %d", + vm_name(vcpu_vm(vcpu)), vcpu_vcpuid(vcpu), state); + } + + vcpu_set_state(vcpu, VCPU_IDLE, false); +} + +static int +vcpu_lock_all(struct vmmdev_softc *sc) +{ + struct vcpu *vcpu; + int error; + uint16_t i, j, maxcpus; + + error = 0; + vm_slock_vcpus(sc->vm); + maxcpus = vm_get_maxcpus(sc->vm); + for (i = 0; i < maxcpus; i++) { + vcpu = vm_vcpu(sc->vm, i); + if (vcpu == NULL) + continue; + error = vcpu_lock_one(vcpu); + if (error) + break; + } + + if (error) { + for (j = 0; j < i; j++) { + vcpu = vm_vcpu(sc->vm, j); + if (vcpu == NULL) + continue; + vcpu_unlock_one(vcpu); + } + vm_unlock_vcpus(sc->vm); + } + + return (error); +} + +static void +vcpu_unlock_all(struct vmmdev_softc *sc) +{ + struct vcpu *vcpu; + uint16_t i, maxcpus; + + maxcpus = vm_get_maxcpus(sc->vm); + for (i = 0; i < maxcpus; i++) { + vcpu = vm_vcpu(sc->vm, i); + if (vcpu == NULL) + continue; + vcpu_unlock_one(vcpu); + } + vm_unlock_vcpus(sc->vm); +} + +static struct vmmdev_softc * +vmmdev_lookup(const char *name) +{ + struct vmmdev_softc *sc; + +#ifdef notyet /* XXX kernel is not compiled with invariants */ + mtx_assert(&vmmdev_mtx, MA_OWNED); +#endif + + SLIST_FOREACH(sc, &head, link) { + if (strcmp(name, vm_name(sc->vm)) == 0) + break; + } + + if (sc == NULL) + return (NULL); + + if (cr_cansee(curthread->td_ucred, sc->ucred)) + return (NULL); + + return (sc); +} + +static struct vmmdev_softc * +vmmdev_lookup2(struct cdev *cdev) +{ + + return (cdev->si_drv1); +} + +static int +vmmdev_rw(struct cdev *cdev, struct uio *uio, int flags) +{ + int error, off, c, prot; + vm_paddr_t gpa, maxaddr; + void *hpa, *cookie; + struct vmmdev_softc *sc; + + error = vmm_priv_check(curthread->td_ucred); + if (error) + return (error); + + sc = vmmdev_lookup2(cdev); + if (sc == NULL) + return (ENXIO); + + /* + * Get a read lock on the guest memory map. + */ + vm_slock_memsegs(sc->vm); + + prot = (uio->uio_rw == UIO_WRITE ? VM_PROT_WRITE : VM_PROT_READ); + maxaddr = vmm_sysmem_maxaddr(sc->vm); + while (uio->uio_resid > 0 && error == 0) { + gpa = uio->uio_offset; + off = gpa & PAGE_MASK; + c = min(uio->uio_resid, PAGE_SIZE - off); + + /* + * The VM has a hole in its physical memory map. If we want to + * use 'dd' to inspect memory beyond the hole we need to + * provide bogus data for memory that lies in the hole. + * + * Since this device does not support lseek(2), dd(1) will + * read(2) blocks of data to simulate the lseek(2). + */ + hpa = vm_gpa_hold_global(sc->vm, gpa, c, prot, &cookie); + if (hpa == NULL) { + if (uio->uio_rw == UIO_READ && gpa < maxaddr) + error = uiomove(__DECONST(void *, zero_region), + c, uio); + else + error = EFAULT; + } else { + error = uiomove(hpa, c, uio); + vm_gpa_release(cookie); + } + } + vm_unlock_memsegs(sc->vm); + return (error); +} + +static int +get_memseg(struct vmmdev_softc *sc, struct vm_memseg *mseg) +{ + struct devmem_softc *dsc; + int error; + bool sysmem; + + error = vm_get_memseg(sc->vm, mseg->segid, &mseg->len, &sysmem, NULL); + if (error || mseg->len == 0) + return (error); + + if (!sysmem) { + SLIST_FOREACH(dsc, &sc->devmem, link) { + if (dsc->segid == mseg->segid) + break; + } + KASSERT(dsc != NULL, ("%s: devmem segment %d not found", + __func__, mseg->segid)); + error = copystr(dsc->name, mseg->name, sizeof(mseg->name), + NULL); + } else { + bzero(mseg->name, sizeof(mseg->name)); + } + + return (error); +} + +static int +alloc_memseg(struct vmmdev_softc *sc, struct vm_memseg *mseg) +{ + char *name; + int error; + bool sysmem; + + error = 0; + name = NULL; + sysmem = true; + + /* + * The allocation is lengthened by 1 to hold a terminating NUL. It'll + * by stripped off when devfs processes the full string. + */ + if (VM_MEMSEG_NAME(mseg)) { + sysmem = false; + name = malloc(sizeof(mseg->name), M_VMMDEV, M_WAITOK); + error = copystr(mseg->name, name, sizeof(mseg->name), NULL); + if (error) + goto done; + } + + error = vm_alloc_memseg(sc->vm, mseg->segid, mseg->len, sysmem); + if (error) + goto done; + + if (VM_MEMSEG_NAME(mseg)) { + error = devmem_create_cdev(vm_name(sc->vm), mseg->segid, name); + if (error) + vm_free_memseg(sc->vm, mseg->segid); + else + name = NULL; /* freed when 'cdev' is destroyed */ + } +done: + free(name, M_VMMDEV); + return (error); +} + +static int +vm_get_register_set(struct vcpu *vcpu, unsigned int count, int *regnum, + uint64_t *regval) +{ + int error, i; + + error = 0; + for (i = 0; i < count; i++) { + error = vm_get_register(vcpu, regnum[i], ®val[i]); + if (error) + break; + } + return (error); +} + +static int +vm_set_register_set(struct vcpu *vcpu, unsigned int count, int *regnum, + uint64_t *regval) +{ + int error, i; + + error = 0; + for (i = 0; i < count; i++) { + error = vm_set_register(vcpu, regnum[i], regval[i]); + if (error) + break; + } + return (error); +} + +static int +vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag, + struct thread *td) +{ + int error, vcpuid, size; + cpuset_t *cpuset; + struct vmmdev_softc *sc; + struct vcpu *vcpu; + struct vm_register *vmreg; + struct vm_register_set *vmregset; + struct vm_run *vmrun; + struct vm_aplic_descr *aplic; + struct vm_cpuset *vm_cpuset; + struct vm_irq *vi; + struct vm_capability *vmcap; + struct vm_stats *vmstats; + struct vm_stat_desc *statdesc; + struct vm_suspend *vmsuspend; + struct vm_exception *vmexc; + struct vm_gla2gpa *gg; + struct vm_memmap *mm; + struct vm_munmap *mu; + struct vm_msi *vmsi; + struct vm_cpu_topology *topology; + uint64_t *regvals; + int *regnums; + enum { NONE, SINGLE, ALL } vcpus_locked; + bool memsegs_locked; + + dprintf("%s: cmd %ld\n", __func__, cmd); + + error = vmm_priv_check(curthread->td_ucred); + if (error) + return (error); + + sc = vmmdev_lookup2(cdev); + if (sc == NULL) + return (ENXIO); + + error = 0; + vcpuid = -1; + vcpu = NULL; + vcpus_locked = NONE; + memsegs_locked = false; + + /* + * Some VMM ioctls can operate only on vcpus that are not running. + */ + switch (cmd) { + case VM_RUN: + case VM_GET_REGISTER: + case VM_SET_REGISTER: + case VM_GET_REGISTER_SET: + case VM_SET_REGISTER_SET: + case VM_INJECT_EXCEPTION: + case VM_GET_CAPABILITY: + case VM_SET_CAPABILITY: + case VM_GLA2GPA_NOFAULT: + case VM_ACTIVATE_CPU: + /* + * ioctls that can operate only on vcpus that are not running. + */ + vcpuid = *(int *)data; + vcpu = vm_alloc_vcpu(sc->vm, vcpuid); + if (vcpu == NULL) { + error = EINVAL; + goto done; + } + error = vcpu_lock_one(vcpu); + if (error) + goto done; + vcpus_locked = SINGLE; + break; + + case VM_ALLOC_MEMSEG: + case VM_MMAP_MEMSEG: + case VM_MUNMAP_MEMSEG: + case VM_REINIT: + case VM_ATTACH_APLIC: + /* + * ioctls that modify the memory map must lock memory + * segments exclusively. + */ + vm_xlock_memsegs(sc->vm); + memsegs_locked = true; + + /* + * ioctls that operate on the entire virtual machine must + * prevent all vcpus from running. + */ + error = vcpu_lock_all(sc); + if (error) + goto done; + vcpus_locked = ALL; + break; + case VM_GET_MEMSEG: + case VM_MMAP_GETNEXT: + /* + * Lock the memory map while it is being inspected. + */ + vm_slock_memsegs(sc->vm); + memsegs_locked = true; + break; + + case VM_STATS: + /* + * These do not need the vCPU locked but do operate on + * a specific vCPU. + */ + vcpuid = *(int *)data; + vcpu = vm_alloc_vcpu(sc->vm, vcpuid); + if (vcpu == NULL) { + error = EINVAL; + goto done; + } + break; + + case VM_SUSPEND_CPU: + case VM_RESUME_CPU: + /* + * These can either operate on all CPUs via a vcpuid of + * -1 or on a specific vCPU. + */ + vcpuid = *(int *)data; + if (vcpuid == -1) + break; + vcpu = vm_alloc_vcpu(sc->vm, vcpuid); + if (vcpu == NULL) { + error = EINVAL; + goto done; + } + break; + + case VM_ASSERT_IRQ: + vi = (struct vm_irq *)data; + error = vm_assert_irq(sc->vm, vi->irq); + break; + case VM_DEASSERT_IRQ: + vi = (struct vm_irq *)data; + error = vm_deassert_irq(sc->vm, vi->irq); + break; + default: + break; + } + + switch (cmd) { + case VM_RUN: { + struct vm_exit *vme; + + vmrun = (struct vm_run *)data; + vme = vm_exitinfo(vcpu); + + error = vm_run(vcpu); + if (error != 0) + break; + + error = copyout(vme, vmrun->vm_exit, sizeof(*vme)); + if (error != 0) + break; + break; + } + case VM_SUSPEND: + vmsuspend = (struct vm_suspend *)data; + error = vm_suspend(sc->vm, vmsuspend->how); + break; + case VM_REINIT: + error = vm_reinit(sc->vm); + break; + case VM_STAT_DESC: { + statdesc = (struct vm_stat_desc *)data; + error = vmm_stat_desc_copy(statdesc->index, + statdesc->desc, sizeof(statdesc->desc)); + break; + } + case VM_STATS: { + CTASSERT(MAX_VM_STATS >= MAX_VMM_STAT_ELEMS); + vmstats = (struct vm_stats *)data; + getmicrotime(&vmstats->tv); + error = vmm_stat_copy(vcpu, vmstats->index, + nitems(vmstats->statbuf), + &vmstats->num_entries, vmstats->statbuf); + break; + } + case VM_MMAP_GETNEXT: + mm = (struct vm_memmap *)data; + error = vm_mmap_getnext(sc->vm, &mm->gpa, &mm->segid, + &mm->segoff, &mm->len, &mm->prot, &mm->flags); + break; + case VM_MMAP_MEMSEG: + mm = (struct vm_memmap *)data; + error = vm_mmap_memseg(sc->vm, mm->gpa, mm->segid, mm->segoff, + mm->len, mm->prot, mm->flags); + break; + case VM_MUNMAP_MEMSEG: + mu = (struct vm_munmap *)data; + error = vm_munmap_memseg(sc->vm, mu->gpa, mu->len); + break; + case VM_ALLOC_MEMSEG: + error = alloc_memseg(sc, (struct vm_memseg *)data); + break; + case VM_GET_MEMSEG: + error = get_memseg(sc, (struct vm_memseg *)data); + break; + case VM_GET_REGISTER: + vmreg = (struct vm_register *)data; + error = vm_get_register(vcpu, vmreg->regnum, &vmreg->regval); + break; + case VM_SET_REGISTER: + vmreg = (struct vm_register *)data; + error = vm_set_register(vcpu, vmreg->regnum, vmreg->regval); + break; + case VM_GET_REGISTER_SET: + vmregset = (struct vm_register_set *)data; + if (vmregset->count > VM_REG_LAST) { + error = EINVAL; + break; + } + regvals = malloc(sizeof(regvals[0]) * vmregset->count, M_VMMDEV, + M_WAITOK); + regnums = malloc(sizeof(regnums[0]) * vmregset->count, M_VMMDEV, + M_WAITOK); + error = copyin(vmregset->regnums, regnums, sizeof(regnums[0]) * + vmregset->count); + if (error == 0) + error = vm_get_register_set(vcpu, vmregset->count, + regnums, regvals); + if (error == 0) + error = copyout(regvals, vmregset->regvals, + sizeof(regvals[0]) * vmregset->count); + free(regvals, M_VMMDEV); + free(regnums, M_VMMDEV); + break; + case VM_SET_REGISTER_SET: + vmregset = (struct vm_register_set *)data; + if (vmregset->count > VM_REG_LAST) { + error = EINVAL; + break; + } + regvals = malloc(sizeof(regvals[0]) * vmregset->count, M_VMMDEV, + M_WAITOK); + regnums = malloc(sizeof(regnums[0]) * vmregset->count, M_VMMDEV, + M_WAITOK); + error = copyin(vmregset->regnums, regnums, sizeof(regnums[0]) * + vmregset->count); + if (error == 0) + error = copyin(vmregset->regvals, regvals, + sizeof(regvals[0]) * vmregset->count); + if (error == 0) + error = vm_set_register_set(vcpu, vmregset->count, + regnums, regvals); + free(regvals, M_VMMDEV); + free(regnums, M_VMMDEV); + break; + case VM_GET_CAPABILITY: + vmcap = (struct vm_capability *)data; + error = vm_get_capability(vcpu, + vmcap->captype, + &vmcap->capval); + break; + case VM_SET_CAPABILITY: + vmcap = (struct vm_capability *)data; + error = vm_set_capability(vcpu, + vmcap->captype, + vmcap->capval); + break; + case VM_INJECT_EXCEPTION: + vmexc = (struct vm_exception *)data; + error = vm_inject_exception(vcpu, vmexc->scause); + break; + case VM_GLA2GPA_NOFAULT: + gg = (struct vm_gla2gpa *)data; + error = vm_gla2gpa_nofault(vcpu, &gg->paging, gg->gla, + gg->prot, &gg->gpa, &gg->fault); + KASSERT(error == 0 || error == EFAULT, + ("%s: vm_gla2gpa unknown error %d", __func__, error)); + break; + case VM_ACTIVATE_CPU: + error = vm_activate_cpu(vcpu); + break; + case VM_GET_CPUS: + error = 0; + vm_cpuset = (struct vm_cpuset *)data; + size = vm_cpuset->cpusetsize; + if (size < sizeof(cpuset_t) || size > CPU_MAXSIZE / NBBY) { + error = ERANGE; + break; + } + cpuset = malloc(size, M_TEMP, M_WAITOK | M_ZERO); + if (vm_cpuset->which == VM_ACTIVE_CPUS) + *cpuset = vm_active_cpus(sc->vm); + else if (vm_cpuset->which == VM_SUSPENDED_CPUS) + *cpuset = vm_suspended_cpus(sc->vm); + else if (vm_cpuset->which == VM_DEBUG_CPUS) + *cpuset = vm_debug_cpus(sc->vm); + else + error = EINVAL; + if (error == 0) + error = copyout(cpuset, vm_cpuset->cpus, size); + free(cpuset, M_TEMP); + break; + case VM_SUSPEND_CPU: + error = vm_suspend_cpu(sc->vm, vcpu); + break; + case VM_RESUME_CPU: + error = vm_resume_cpu(sc->vm, vcpu); + break; + case VM_ATTACH_APLIC: + aplic = (struct vm_aplic_descr *)data; + error = vm_attach_aplic(sc->vm, aplic); + break; + case VM_RAISE_MSI: + vmsi = (struct vm_msi *)data; + error = vm_raise_msi(sc->vm, vmsi->msg, vmsi->addr, vmsi->bus, + vmsi->slot, vmsi->func); + break; + case VM_SET_TOPOLOGY: + topology = (struct vm_cpu_topology *)data; + error = vm_set_topology(sc->vm, topology->sockets, + topology->cores, topology->threads, topology->maxcpus); + break; + case VM_GET_TOPOLOGY: + topology = (struct vm_cpu_topology *)data; + vm_get_topology(sc->vm, &topology->sockets, &topology->cores, + &topology->threads, &topology->maxcpus); + error = 0; + break; + default: + error = ENOTTY; + break; + } + +done: + if (vcpus_locked == SINGLE) + vcpu_unlock_one(vcpu); + else if (vcpus_locked == ALL) + vcpu_unlock_all(sc); + if (memsegs_locked) + vm_unlock_memsegs(sc->vm); + + /* + * Make sure that no handler returns a kernel-internal + * error value to userspace. + */ + KASSERT(error == ERESTART || error >= 0, + ("vmmdev_ioctl: invalid error return %d", error)); + return (error); +} + +static int +vmmdev_mmap_single(struct cdev *cdev, vm_ooffset_t *offset, vm_size_t mapsize, + struct vm_object **objp, int nprot) +{ + struct vmmdev_softc *sc; + vm_paddr_t gpa; + size_t len; + vm_ooffset_t segoff, first, last; + int error, found, segid; + bool sysmem; + + error = vmm_priv_check(curthread->td_ucred); + if (error) + return (error); + + first = *offset; + last = first + mapsize; + if ((nprot & PROT_EXEC) || first < 0 || first >= last) + return (EINVAL); + + sc = vmmdev_lookup2(cdev); + if (sc == NULL) { + /* virtual machine is in the process of being created */ + return (EINVAL); + } + + /* + * Get a read lock on the guest memory map. + */ + vm_slock_memsegs(sc->vm); + + gpa = 0; + found = 0; + while (!found) { + error = vm_mmap_getnext(sc->vm, &gpa, &segid, &segoff, &len, + NULL, NULL); + if (error) + break; + + if (first >= gpa && last <= gpa + len) + found = 1; + else + gpa += len; + } + + if (found) { + error = vm_get_memseg(sc->vm, segid, &len, &sysmem, objp); + KASSERT(error == 0 && *objp != NULL, + ("%s: invalid memory segment %d", __func__, segid)); + if (sysmem) { + vm_object_reference(*objp); + *offset = segoff + (first - gpa); + } else { + error = EINVAL; + } + } + vm_unlock_memsegs(sc->vm); + return (error); +} + +static void +vmmdev_destroy(void *arg) +{ + struct vmmdev_softc *sc = arg; + struct devmem_softc *dsc; + int error __diagused; + + error = vcpu_lock_all(sc); + KASSERT(error == 0, ("%s: error %d freezing vcpus", __func__, error)); + vm_unlock_vcpus(sc->vm); + + while ((dsc = SLIST_FIRST(&sc->devmem)) != NULL) { + KASSERT(dsc->cdev == NULL, ("%s: devmem not free", __func__)); + SLIST_REMOVE_HEAD(&sc->devmem, link); + free(dsc->name, M_VMMDEV); + free(dsc, M_VMMDEV); + } + + if (sc->cdev != NULL) + destroy_dev(sc->cdev); + + if (sc->vm != NULL) + vm_destroy(sc->vm); + + if (sc->ucred != NULL) + crfree(sc->ucred); + + if ((sc->flags & VSC_LINKED) != 0) { + mtx_lock(&vmmdev_mtx); + SLIST_REMOVE(&head, sc, vmmdev_softc, link); + mtx_unlock(&vmmdev_mtx); + } + + free(sc, M_VMMDEV); +} + +static int +sysctl_vmm_destroy(SYSCTL_HANDLER_ARGS) +{ + struct devmem_softc *dsc; + struct vmmdev_softc *sc; + struct cdev *cdev; + char *buf; + int error, buflen; + + error = vmm_priv_check(req->td->td_ucred); + if (error) + return (error); + + buflen = VM_MAX_NAMELEN + 1; + buf = malloc(buflen, M_VMMDEV, M_WAITOK | M_ZERO); + strlcpy(buf, "beavis", buflen); + error = sysctl_handle_string(oidp, buf, buflen, req); + if (error != 0 || req->newptr == NULL) + goto out; + + mtx_lock(&vmmdev_mtx); + sc = vmmdev_lookup(buf); + if (sc == NULL || sc->cdev == NULL) { + mtx_unlock(&vmmdev_mtx); + error = EINVAL; + goto out; + } + + /* + * Setting 'sc->cdev' to NULL is used to indicate that the VM + * is scheduled for destruction. + */ + cdev = sc->cdev; + sc->cdev = NULL; + mtx_unlock(&vmmdev_mtx); + + /* + * Destroy all cdevs: + * + * - any new operations on the 'cdev' will return an error (ENXIO). + * + * - the 'devmem' cdevs are destroyed before the virtual machine 'cdev' + */ + SLIST_FOREACH(dsc, &sc->devmem, link) { + KASSERT(dsc->cdev != NULL, ("devmem cdev already destroyed")); + destroy_dev(dsc->cdev); + devmem_destroy(dsc); + } + destroy_dev(cdev); + vmmdev_destroy(sc); + error = 0; + +out: + free(buf, M_VMMDEV); + return (error); +} +SYSCTL_PROC(_hw_vmm, OID_AUTO, destroy, CTLTYPE_STRING | CTLFLAG_RW | + CTLFLAG_PRISON | CTLFLAG_MPSAFE, NULL, 0, sysctl_vmm_destroy, "A", NULL); + +static struct cdevsw vmmdevsw = { + .d_name = "vmmdev", + .d_version = D_VERSION, + .d_ioctl = vmmdev_ioctl, + .d_mmap_single = vmmdev_mmap_single, + .d_read = vmmdev_rw, + .d_write = vmmdev_rw, +}; + +static int +sysctl_vmm_create(SYSCTL_HANDLER_ARGS) +{ + struct vm *vm; + struct cdev *cdev; + struct vmmdev_softc *sc, *sc2; + char *buf; + int error, buflen; + + error = vmm_priv_check(req->td->td_ucred); + if (error) + return (error); + + buflen = VM_MAX_NAMELEN + 1; + buf = malloc(buflen, M_VMMDEV, M_WAITOK | M_ZERO); + strlcpy(buf, "beavis", buflen); + error = sysctl_handle_string(oidp, buf, buflen, req); + if (error != 0 || req->newptr == NULL) + goto out; + + mtx_lock(&vmmdev_mtx); + sc = vmmdev_lookup(buf); + mtx_unlock(&vmmdev_mtx); + if (sc != NULL) { + error = EEXIST; + goto out; + } + + error = vm_create(buf, &vm); + if (error != 0) + goto out; + + sc = malloc(sizeof(struct vmmdev_softc), M_VMMDEV, M_WAITOK | M_ZERO); + sc->ucred = crhold(curthread->td_ucred); + sc->vm = vm; + SLIST_INIT(&sc->devmem); + + /* + * Lookup the name again just in case somebody sneaked in when we + * dropped the lock. + */ + mtx_lock(&vmmdev_mtx); + sc2 = vmmdev_lookup(buf); + if (sc2 == NULL) { + SLIST_INSERT_HEAD(&head, sc, link); + sc->flags |= VSC_LINKED; + } + mtx_unlock(&vmmdev_mtx); + + if (sc2 != NULL) { + vmmdev_destroy(sc); + error = EEXIST; + goto out; + } + + error = make_dev_p(MAKEDEV_CHECKNAME, &cdev, &vmmdevsw, sc->ucred, + UID_ROOT, GID_WHEEL, 0600, "vmm/%s", buf); + if (error != 0) { + vmmdev_destroy(sc); + goto out; + } + + mtx_lock(&vmmdev_mtx); + sc->cdev = cdev; + sc->cdev->si_drv1 = sc; + mtx_unlock(&vmmdev_mtx); + +out: + free(buf, M_VMMDEV); + return (error); +} +SYSCTL_PROC(_hw_vmm, OID_AUTO, create, CTLTYPE_STRING | CTLFLAG_RW | + CTLFLAG_PRISON | CTLFLAG_MPSAFE, NULL, 0, sysctl_vmm_create, "A", NULL); + +void +vmmdev_init(void) +{ + pr_allow_flag = prison_add_allow(NULL, "vmm", NULL, + "Allow use of vmm in a jail."); +} + +int +vmmdev_cleanup(void) +{ + int error; + + if (SLIST_EMPTY(&head)) + error = 0; + else + error = EBUSY; + + return (error); +} + +static int +devmem_mmap_single(struct cdev *cdev, vm_ooffset_t *offset, vm_size_t len, + struct vm_object **objp, int nprot) +{ + struct devmem_softc *dsc; + vm_ooffset_t first, last; + size_t seglen; + int error; + bool sysmem; + + dprintf("%s: offset %lx len %lx\n", __func__, *offset, len); + + dsc = cdev->si_drv1; + if (dsc == NULL) { + /* 'cdev' has been created but is not ready for use */ + return (ENXIO); + } + + first = *offset; + last = *offset + len; + if ((nprot & PROT_EXEC) || first < 0 || first >= last) + return (EINVAL); + + vm_slock_memsegs(dsc->sc->vm); + + error = vm_get_memseg(dsc->sc->vm, dsc->segid, &seglen, &sysmem, objp); + KASSERT(error == 0 && !sysmem && *objp != NULL, + ("%s: invalid devmem segment %d", __func__, dsc->segid)); + + if (seglen >= last) + vm_object_reference(*objp); + else + error = 0; + vm_unlock_memsegs(dsc->sc->vm); + return (error); +} + +static struct cdevsw devmemsw = { + .d_name = "devmem", + .d_version = D_VERSION, + .d_mmap_single = devmem_mmap_single, +}; + +static int +devmem_create_cdev(const char *vmname, int segid, char *devname) +{ + struct devmem_softc *dsc; + struct vmmdev_softc *sc; + struct cdev *cdev; + int error; + + error = make_dev_p(MAKEDEV_CHECKNAME, &cdev, &devmemsw, NULL, + UID_ROOT, GID_WHEEL, 0600, "vmm.io/%s.%s", vmname, devname); + if (error) + return (error); + + dsc = malloc(sizeof(struct devmem_softc), M_VMMDEV, M_WAITOK | M_ZERO); + + mtx_lock(&vmmdev_mtx); + sc = vmmdev_lookup(vmname); + KASSERT(sc != NULL, ("%s: vm %s softc not found", __func__, vmname)); + if (sc->cdev == NULL) { + /* virtual machine is being created or destroyed */ + mtx_unlock(&vmmdev_mtx); + free(dsc, M_VMMDEV); + destroy_dev_sched_cb(cdev, NULL, 0); + return (ENODEV); + } + + dsc->segid = segid; + dsc->name = devname; + dsc->cdev = cdev; + dsc->sc = sc; + SLIST_INSERT_HEAD(&sc->devmem, dsc, link); + mtx_unlock(&vmmdev_mtx); + + /* The 'cdev' is ready for use after 'si_drv1' is initialized */ + cdev->si_drv1 = dsc; + return (0); +} + +static void +devmem_destroy(void *arg) +{ + struct devmem_softc *dsc = arg; + + KASSERT(dsc->cdev, ("%s: devmem cdev already destroyed", __func__)); + dsc->cdev = NULL; + dsc->sc = NULL; +} Index: sys/riscv/vmm/vmm_instruction_emul.c =================================================================== --- /dev/null +++ sys/riscv/vmm/vmm_instruction_emul.c @@ -0,0 +1,107 @@ +/*- + * Copyright (c) 2015 Mihai Carabas + * Copyright (c) 2024 Ruslan Bukin + * + * This software was developed by the University of Cambridge Computer + * Laboratory (Department of Computer Science and Technology) under Innovate + * UK project 105694, "Digital Security by Design (DSbD) Technology Platform + * Prototype". + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifdef _KERNEL +#include +#include +#include +#include + +#include + +#include +#include +#else +#include +#include +#include + +#include + +#include +#include +#include +#include +#endif + +#include + +int +vmm_emulate_instruction(struct vcpu *vcpu, uint64_t gpa, struct vie *vie, + struct vm_guest_paging *paging __unused, mem_region_read_t memread, + mem_region_write_t memwrite, void *memarg) +{ + uint64_t val; + int error; + + if (vie->dir == VM_DIR_READ) { + error = memread(vcpu, gpa, &val, vie->access_size, memarg); + if (error) + goto out; + if ((vie->sign_extend == 0) && (vie->access_size < 8)) + val &= (1ul << (vie->access_size * 8)) - 1; + error = vm_set_register(vcpu, vie->reg, val); + } else { + error = vm_get_register(vcpu, vie->reg, &val); + if (error) + goto out; + /* Mask any unneeded bits from the register */ + if (vie->access_size < 8) + val &= (1ul << (vie->access_size * 8)) - 1; + error = memwrite(vcpu, gpa, val, vie->access_size, memarg); + } + +out: + return (error); +} + +int +vmm_emulate_register(struct vcpu *vcpu, struct vre *vre, reg_read_t regread, + reg_write_t regwrite, void *regarg) +{ + uint64_t val; + int error; + + if (vre->dir == VM_DIR_READ) { + error = regread(vcpu, &val, regarg); + if (error) + goto out; + error = vm_set_register(vcpu, vre->reg, val); + } else { + error = vm_get_register(vcpu, vre->reg, &val); + if (error) + goto out; + error = regwrite(vcpu, val, regarg); + } + +out: + return (error); +} Index: sys/riscv/vmm/vmm_ktr.h =================================================================== --- /dev/null +++ sys/riscv/vmm/vmm_ktr.h @@ -0,0 +1,69 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * + * Copyright (c) 2011 NetApp, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef _VMM_KTR_H_ +#define _VMM_KTR_H_ + +#include +#include + +#ifndef KTR_VMM +#define KTR_VMM KTR_GEN +#endif + +#define VCPU_CTR0(vm, vcpuid, format) \ +CTR2(KTR_VMM, "vm %s[%d]: " format, vm_name((vm)), (vcpuid)) + +#define VCPU_CTR1(vm, vcpuid, format, p1) \ +CTR3(KTR_VMM, "vm %s[%d]: " format, vm_name((vm)), (vcpuid), (p1)) + +#define VCPU_CTR2(vm, vcpuid, format, p1, p2) \ +CTR4(KTR_VMM, "vm %s[%d]: " format, vm_name((vm)), (vcpuid), (p1), (p2)) + +#define VCPU_CTR3(vm, vcpuid, format, p1, p2, p3) \ +CTR5(KTR_VMM, "vm %s[%d]: " format, vm_name((vm)), (vcpuid), (p1), (p2), (p3)) + +#define VCPU_CTR4(vm, vcpuid, format, p1, p2, p3, p4) \ +CTR6(KTR_VMM, "vm %s[%d]: " format, vm_name((vm)), (vcpuid), \ + (p1), (p2), (p3), (p4)) + +#define VM_CTR0(vm, format) \ +CTR1(KTR_VMM, "vm %s: " format, vm_name((vm))) + +#define VM_CTR1(vm, format, p1) \ +CTR2(KTR_VMM, "vm %s: " format, vm_name((vm)), (p1)) + +#define VM_CTR2(vm, format, p1, p2) \ +CTR3(KTR_VMM, "vm %s: " format, vm_name((vm)), (p1), (p2)) + +#define VM_CTR3(vm, format, p1, p2, p3) \ +CTR4(KTR_VMM, "vm %s: " format, vm_name((vm)), (p1), (p2), (p3)) + +#define VM_CTR4(vm, format, p1, p2, p3, p4) \ +CTR5(KTR_VMM, "vm %s: " format, vm_name((vm)), (p1), (p2), (p3), (p4)) +#endif Index: sys/riscv/vmm/vmm_riscv.c =================================================================== --- /dev/null +++ sys/riscv/vmm/vmm_riscv.c @@ -0,0 +1,797 @@ +/*- + * Copyright (c) 2024 Ruslan Bukin + * + * This software was developed by the University of Cambridge Computer + * Laboratory (Department of Computer Science and Technology) under Innovate + * UK project 105694, "Digital Security by Design (DSbD) Technology Platform + * Prototype". + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "riscv.h" +#include "vmm_aplic.h" +#include "vmm_stat.h" + +MALLOC_DEFINE(M_HYP, "RISC-V VMM HYP", "RISC-V VMM HYP"); + +DPCPU_DEFINE_STATIC(struct hypctx *, vcpu); + +static int +m_op(uint32_t insn, int match, int mask) +{ + + if (((insn ^ match) & mask) == 0) + return (1); + + return (0); +} + +static inline void +riscv_set_active_vcpu(struct hypctx *hypctx) +{ + + DPCPU_SET(vcpu, hypctx); +} + +struct hypctx * +riscv_get_active_vcpu(void) +{ + + return (DPCPU_GET(vcpu)); +} + +int +vmmops_modinit(void) +{ + + if (!has_hyp) { + printf("vmm: riscv hart doesn't support H-extension.\n"); + return (ENXIO); + } + + if (!has_sstc) { + printf("vmm: riscv hart doesn't support SSTC extension.\n"); + return (ENXIO); + } + + return (0); +} + +int +vmmops_modcleanup(void) +{ + + return (0); +} + +void * +vmmops_init(struct vm *vm, pmap_t pmap) +{ + struct hyp *hyp; + vm_size_t size; + + size = round_page(sizeof(struct hyp) + + sizeof(struct hypctx *) * vm_get_maxcpus(vm)); + hyp = malloc_aligned(size, PAGE_SIZE, M_HYP, M_WAITOK | M_ZERO); + hyp->vm = vm; + hyp->aplic_attached = false; + + aplic_vminit(hyp); + + return (hyp); +} + +static void +vmmops_delegate(void) +{ + uint64_t hedeleg; + uint64_t hideleg; + + hedeleg = (1UL << SCAUSE_INST_MISALIGNED); + hedeleg |= (1UL << SCAUSE_ILLEGAL_INSTRUCTION); + hedeleg |= (1UL << SCAUSE_BREAKPOINT); + hedeleg |= (1UL << SCAUSE_ECALL_USER); + hedeleg |= (1UL << SCAUSE_INST_PAGE_FAULT); + hedeleg |= (1UL << SCAUSE_LOAD_PAGE_FAULT); + hedeleg |= (1UL << SCAUSE_STORE_PAGE_FAULT); + csr_write(hedeleg, hedeleg); + + hideleg = (1UL << IRQ_SOFTWARE_HYPERVISOR); + hideleg |= (1UL << IRQ_TIMER_HYPERVISOR); + hideleg |= (1UL << IRQ_EXTERNAL_HYPERVISOR); + csr_write(hideleg, hideleg); +} + +static void +vmmops_vcpu_restore_csrs(struct hypctx *hypctx) +{ + struct hypcsr *csrs; + + csrs = &hypctx->guest_csrs; + + csr_write(vsstatus, csrs->vsstatus); + csr_write(vsie, csrs->vsie); + csr_write(vstvec, csrs->vstvec); + csr_write(vsscratch, csrs->vsscratch); + csr_write(vsepc, csrs->vsepc); + csr_write(vscause, csrs->vscause); + csr_write(vstval, csrs->vstval); + csr_write(hvip, csrs->hvip); + csr_write(vsatp, csrs->vsatp); +} + +static void +vmmops_vcpu_save_csrs(struct hypctx *hypctx) +{ + struct hypcsr *csrs; + + csrs = &hypctx->guest_csrs; + + csrs->vsstatus = csr_read(vsstatus); + csrs->vsie = csr_read(vsie); + csrs->vstvec = csr_read(vstvec); + csrs->vsscratch = csr_read(vsscratch); + csrs->vsepc = csr_read(vsepc); + csrs->vscause = csr_read(vscause); + csrs->vstval = csr_read(vstval); + csrs->hvip = csr_read(hvip); + csrs->vsatp = csr_read(vsatp); +} + +void * +vmmops_vcpu_init(void *vmi, struct vcpu *vcpu1, int vcpuid) +{ + struct hypctx *hypctx; + struct hyp *hyp; + vm_size_t size; + + hyp = vmi; + + dprintf("%s: hyp %p\n", __func__, hyp); + + size = round_page(sizeof(struct hypctx)); + hypctx = malloc_aligned(size, PAGE_SIZE, M_HYP, M_WAITOK | M_ZERO); + + KASSERT(vcpuid >= 0 && vcpuid < vm_get_maxcpus(hyp->vm), + ("%s: Invalid vcpuid %d", __func__, vcpuid)); + hyp->ctx[vcpuid] = hypctx; + + hypctx->hyp = hyp; + hypctx->vcpu = vcpu1; + + /* + * TODO: set initial state for CSRs if needed. + */ + vmmops_vcpu_restore_csrs(hypctx); + + aplic_cpuinit(hypctx); + + vmmops_delegate(); + + csr_write(henvcfg, HENVCFG_STCE); + csr_write(hie, HIE_VSEIE | HIE_SGEIE); + + /* + * TODO: should we trap rdcycle / rdtime ? + */ + csr_write(hcounteren, HCOUNTEREN_CY | HCOUNTEREN_TM); + hypctx->guest_scounteren = HCOUNTEREN_CY | HCOUNTEREN_TM; + hypctx->guest_regs.hyp_sstatus = SSTATUS_SPP | SSTATUS_SPIE; + hypctx->guest_regs.hyp_sstatus |= SSTATUS_FS_INITIAL; + hypctx->guest_regs.hyp_hstatus = HSTATUS_SPV | HSTATUS_VTW; + + return (hypctx); +} + +static int +riscv_vmm_pinit(pmap_t pmap) +{ + + dprintf("%s: pmap %p\n", __func__, pmap); + + pmap_pinit_stage(pmap, PM_STAGE2); + + return (1); +} + +struct vmspace * +vmmops_vmspace_alloc(vm_offset_t min, vm_offset_t max) +{ + + return (vmspace_alloc(min, max, riscv_vmm_pinit)); +} + +void +vmmops_vmspace_free(struct vmspace *vmspace) +{ + + pmap_remove_pages(vmspace_pmap(vmspace)); + vmspace_free(vmspace); +} + +static void +riscv_unpriv_read(struct hypctx *hypctx, uint64_t guest_addr, uint64_t *data) +{ + uint64_t old_hstatus; + uint64_t val; + uint64_t tmp; + + old_hstatus = csr_swap(hstatus, hypctx->guest_regs.hyp_hstatus); + + /* + * TODO: handle exceptions during unprivilege read. + */ + + __asm __volatile(".option push\n" + ".option norvc\n" + "hlvx.hu %[val], (%[addr])\n" + ".option pop\n" + : [val] "=&r" (val), [addr] "+&r" (guest_addr) + :: "memory"); + + if ((val & 0x3) == 0x3) { + guest_addr += 2; + __asm __volatile(".option push\n" + ".option norvc\n" + "hlvx.hu %[tmp], (%[addr])\n" + ".option pop\n" + : [tmp] "=&r" (tmp), [addr] "+&r" (guest_addr) + :: "memory"); + val |= (tmp << 16); + } + + csr_write(hstatus, old_hstatus); + + *data = val; +} + +static void +riscv_gen_inst_emul_data(struct hypctx *hypctx, struct vm_exit *vme_ret) +{ + uint64_t guest_addr; + struct vie *vie; + uint64_t insn; + int reg_num; + int rs2, rd; + + vme_ret->u.inst_emul.gpa = (vme_ret->htval << 2) | + (vme_ret->stval & 0x3); + + guest_addr = vme_ret->sepc; + + vie = &vme_ret->u.inst_emul.vie; + vie->dir = vme_ret->scause == SCAUSE_STORE_GUEST_PAGE_FAULT ? \ + VM_DIR_WRITE : VM_DIR_READ; + vie->sign_extend = 1; + + riscv_unpriv_read(hypctx, guest_addr, &insn); + + if ((insn & 0x3) == 0x3) { + rs2 = (insn & RS2_MASK) >> RS2_SHIFT; + rd = (insn & RD_MASK) >> RD_SHIFT; + + if (vie->dir == VM_DIR_WRITE) { + if (m_op(insn, MATCH_SB, MASK_SB)) + vie->access_size = 1; + else if (m_op(insn, MATCH_SH, MASK_SH)) + vie->access_size = 2; + else if (m_op(insn, MATCH_SW, MASK_SW)) + vie->access_size = 4; + else if (m_op(insn, MATCH_SD, MASK_SD)) + vie->access_size = 8; + else + panic("unknown store instr at %lx", guest_addr); + reg_num = rs2; + } else { + if (m_op(insn, MATCH_LB, MASK_LB)) + vie->access_size = 1; + else if (m_op(insn, MATCH_LH, MASK_LH)) + vie->access_size = 2; + else if (m_op(insn, MATCH_LW, MASK_LW)) + vie->access_size = 4; + else if (m_op(insn, MATCH_LD, MASK_LD)) + vie->access_size = 8; + else if (m_op(insn, MATCH_LBU, MASK_LBU)) { + vie->access_size = 1; + vie->sign_extend = 0; + } else if (m_op(insn, MATCH_LHU, MASK_LHU)) { + vie->access_size = 2; + vie->sign_extend = 0; + } else if (m_op(insn, MATCH_LWU, MASK_LWU)) { + vie->access_size = 4; + vie->sign_extend = 0; + } else + panic("unknown load instr at %lx", guest_addr); + reg_num = rd; + } + vme_ret->inst_length = 4; + } else { + rs2 = (insn >> 7) & 0x7; + rs2 += 0x8; + rd = (insn >> 2) & 0x7; + rd += 0x8; + + if (vie->dir == VM_DIR_WRITE) { + if (m_op(insn, MATCH_C_SW, MASK_C_SW)) + vie->access_size = 4; + else if (m_op(insn, MATCH_C_SD, MASK_C_SD)) + vie->access_size = 8; + else + panic("unknown store instr at %lx", guest_addr); + } else { + if (m_op(insn, MATCH_C_LW, MASK_C_LW)) + vie->access_size = 4; + else if (m_op(insn, MATCH_C_LD, MASK_C_LD)) + vie->access_size = 8; + else + panic("unknown load instr at %lx", guest_addr); + } + reg_num = rd; + vme_ret->inst_length = 2; + } + + dprintf("guest_addr %lx insn %lx, reg %d\n", guest_addr, insn, reg_num); + + vie->reg = reg_num; +} + +static bool +riscv_handle_world_switch(struct hypctx *hypctx, struct vm_exit *vme, + pmap_t pmap) +{ + uint64_t insn; + uint64_t gpa; + bool handled; + + handled = false; + + if (vme->scause & SCAUSE_INTR) { + /* + * Host interrupt? Leave critical section to handle. + */ + vmm_stat_incr(hypctx->vcpu, VMEXIT_IRQ, 1); + vme->exitcode = VM_EXITCODE_BOGUS; + vme->inst_length = 0; + return (handled); + } + + switch (vme->scause) { + case SCAUSE_FETCH_GUEST_PAGE_FAULT: + case SCAUSE_LOAD_GUEST_PAGE_FAULT: + case SCAUSE_STORE_GUEST_PAGE_FAULT: + gpa = (vme->htval << 2) | (vme->stval & 0x3); + if (vm_mem_allocated(hypctx->vcpu, gpa)) { + vme->exitcode = VM_EXITCODE_PAGING; + vme->inst_length = 0; + vme->u.paging.gpa = gpa; + } else { + riscv_gen_inst_emul_data(hypctx, vme); + vme->exitcode = VM_EXITCODE_INST_EMUL; + } + break; + case SCAUSE_ILLEGAL_INSTRUCTION: + /* + * TODO: handle illegal instruction properly. + */ + panic("%s: Illegal instr at %lx stval 0x%lx htval 0x%lx\n", + __func__, vme->sepc, vme->stval, vme->htval); + case SCAUSE_VIRTUAL_SUPERVISOR_ECALL: + vme->exitcode = VM_EXITCODE_ECALL; + handled = false; + break; + case SCAUSE_VIRTUAL_INSTRUCTION: + insn = vme->stval; + if (m_op(insn, MATCH_WFI, MASK_WFI)) + vme->exitcode = VM_EXITCODE_WFI; + else + vme->exitcode = VM_EXITCODE_BOGUS; + handled = false; + break; + default: + printf("unknown scause %lx\n", vme->scause); + vmm_stat_incr(hypctx->vcpu, VMEXIT_UNHANDLED, 1); + vme->exitcode = VM_EXITCODE_BOGUS; + handled = false; + break; + } + + return (handled); +} + +int +vmmops_gla2gpa(void *vcpui, struct vm_guest_paging *paging, uint64_t gla, + int prot, uint64_t *gpa, int *is_fault) +{ + + /* Implement me. */ + + return (0); +} + +static void +riscv_sync_interrupts(struct hypctx *hypctx) +{ + int pending; + + pending = aplic_check_pending(hypctx); + + if (pending) + hypctx->guest_csrs.hvip |= HVIP_VSEIP; + else + hypctx->guest_csrs.hvip &= ~HVIP_VSEIP; + + csr_write(hvip, hypctx->guest_csrs.hvip); +} + +int +vmmops_run(void *vcpui, register_t pc, pmap_t pmap, struct vm_eventinfo *evinfo) +{ + struct hypctx *hypctx; + struct vm_exit *vme; + struct vcpu *vcpu; + register_t val; + int handled; + + hypctx = (struct hypctx *)vcpui; + vcpu = hypctx->vcpu; + vme = vm_exitinfo(vcpu); + + hypctx->guest_regs.hyp_sepc = (uint64_t)pc; + + if (hypctx->guest_regs.hyp_sstatus & SSTATUS_SPP) + hypctx->guest_regs.hyp_hstatus |= HSTATUS_SPVP; + else + hypctx->guest_regs.hyp_hstatus &= HSTATUS_SPVP; + + hypctx->guest_regs.hyp_hstatus |= HSTATUS_SPV | HSTATUS_VTW; + + csr_write(hgatp, pmap->pm_satp); + + vmmops_vcpu_restore_csrs(hypctx); + + for (;;) { + dprintf("%s: pc %lx\n", __func__, pc); + + if (hypctx->has_exception) { + hypctx->has_exception = false; + /* + * TODO: implement exception injection. + */ + } + + val = intr_disable(); + + /* Check if the vcpu is suspended */ + if (vcpu_suspended(evinfo)) { + intr_restore(val); + vm_exit_suspended(vcpu, pc); + break; + } + + if (vcpu_debugged(vcpu)) { + intr_restore(val); + vm_exit_debug(vcpu, pc); + break; + } + + /* + * TODO: What happens if a timer interrupt is asserted exactly + * here, but for the previous VM? + */ + riscv_set_active_vcpu(hypctx); + aplic_flush_hwstate(hypctx); + + riscv_sync_interrupts(hypctx); + + dprintf("%s: Entering guest VM, vsatp %lx, ss %lx hs %lx\n", + __func__, csr_read(vsatp), hypctx->guest_regs.hyp_sstatus, + hypctx->guest_regs.hyp_hstatus); + + vmm_switch(hypctx); + + dprintf("%s: Leaving guest VM\n", __func__); + + aplic_sync_hwstate(hypctx); + + /* + * TODO: deactivate stage 2 pmap here if needed. + */ + + vme->scause = csr_read(scause); + vme->sepc = csr_read(sepc); + vme->stval = csr_read(stval); + vme->htval = csr_read(htval); + vme->htinst = csr_read(htinst); + + intr_restore(val); + + vmm_stat_incr(vcpu, VMEXIT_COUNT, 1); + vme->pc = hypctx->guest_regs.hyp_sepc; + vme->inst_length = INSN_SIZE; + + handled = riscv_handle_world_switch(hypctx, vme, pmap); + if (handled == false) + /* Exit loop to emulate instruction. */ + break; + else { + /* Resume guest execution from the next instruction. */ + hypctx->guest_regs.hyp_sepc += vme->inst_length; + } + } + + vmmops_vcpu_save_csrs(hypctx); + + return (0); +} + +static void +riscv_pcpu_vmcleanup(void *arg) +{ + struct hyp *hyp; + int i, maxcpus; + + hyp = arg; + maxcpus = vm_get_maxcpus(hyp->vm); + for (i = 0; i < maxcpus; i++) { + if (riscv_get_active_vcpu() == hyp->ctx[i]) { + riscv_set_active_vcpu(NULL); + break; + } + } +} + +void +vmmops_vcpu_cleanup(void *vcpui) +{ + struct hypctx *hypctx; + + hypctx = vcpui; + + dprintf("%s\n", __func__); + + aplic_cpucleanup(hypctx); + + free(hypctx, M_HYP); +} + +void +vmmops_cleanup(void *vmi) +{ + struct hyp *hyp; + + hyp = vmi; + + dprintf("%s\n", __func__); + + aplic_vmcleanup(hyp); + + smp_rendezvous(NULL, riscv_pcpu_vmcleanup, NULL, hyp); + + free(hyp, M_HYP); +} + +/* + * Return register value. Registers have different sizes and an explicit cast + * must be made to ensure proper conversion. + */ +static uint64_t * +hypctx_regptr(struct hypctx *hypctx, int reg) +{ + + switch (reg) { + case VM_REG_GUEST_RA: + return (&hypctx->guest_regs.hyp_ra); + case VM_REG_GUEST_SP: + return (&hypctx->guest_regs.hyp_sp); + case VM_REG_GUEST_GP: + return (&hypctx->guest_regs.hyp_gp); + case VM_REG_GUEST_TP: + return (&hypctx->guest_regs.hyp_tp); + case VM_REG_GUEST_T0: + return (&hypctx->guest_regs.hyp_t[0]); + case VM_REG_GUEST_T1: + return (&hypctx->guest_regs.hyp_t[1]); + case VM_REG_GUEST_T2: + return (&hypctx->guest_regs.hyp_t[2]); + case VM_REG_GUEST_S0: + return (&hypctx->guest_regs.hyp_s[0]); + case VM_REG_GUEST_S1: + return (&hypctx->guest_regs.hyp_s[1]); + case VM_REG_GUEST_A0: + return (&hypctx->guest_regs.hyp_a[0]); + case VM_REG_GUEST_A1: + return (&hypctx->guest_regs.hyp_a[1]); + case VM_REG_GUEST_A2: + return (&hypctx->guest_regs.hyp_a[2]); + case VM_REG_GUEST_A3: + return (&hypctx->guest_regs.hyp_a[3]); + case VM_REG_GUEST_A4: + return (&hypctx->guest_regs.hyp_a[4]); + case VM_REG_GUEST_A5: + return (&hypctx->guest_regs.hyp_a[5]); + case VM_REG_GUEST_A6: + return (&hypctx->guest_regs.hyp_a[6]); + case VM_REG_GUEST_A7: + return (&hypctx->guest_regs.hyp_a[7]); + case VM_REG_GUEST_S2: + return (&hypctx->guest_regs.hyp_s[2]); + case VM_REG_GUEST_S3: + return (&hypctx->guest_regs.hyp_s[3]); + case VM_REG_GUEST_S4: + return (&hypctx->guest_regs.hyp_s[4]); + case VM_REG_GUEST_S5: + return (&hypctx->guest_regs.hyp_s[5]); + case VM_REG_GUEST_S6: + return (&hypctx->guest_regs.hyp_s[6]); + case VM_REG_GUEST_S7: + return (&hypctx->guest_regs.hyp_s[7]); + case VM_REG_GUEST_S8: + return (&hypctx->guest_regs.hyp_s[8]); + case VM_REG_GUEST_S9: + return (&hypctx->guest_regs.hyp_s[9]); + case VM_REG_GUEST_S10: + return (&hypctx->guest_regs.hyp_s[10]); + case VM_REG_GUEST_S11: + return (&hypctx->guest_regs.hyp_s[11]); + case VM_REG_GUEST_T3: + return (&hypctx->guest_regs.hyp_t[3]); + case VM_REG_GUEST_T4: + return (&hypctx->guest_regs.hyp_t[4]); + case VM_REG_GUEST_T5: + return (&hypctx->guest_regs.hyp_t[5]); + case VM_REG_GUEST_T6: + return (&hypctx->guest_regs.hyp_t[6]); + case VM_REG_GUEST_SEPC: + return (&hypctx->guest_regs.hyp_sepc); + default: + break; + } + + return (NULL); +} + +int +vmmops_getreg(void *vcpui, int reg, uint64_t *retval) +{ + uint64_t *regp; + int running, hostcpu; + struct hypctx *hypctx; + + hypctx = vcpui; + + running = vcpu_is_running(hypctx->vcpu, &hostcpu); + if (running && hostcpu != curcpu) + panic("%s: %s%d is running", __func__, vm_name(hypctx->hyp->vm), + vcpu_vcpuid(hypctx->vcpu)); + + regp = hypctx_regptr(hypctx, reg); + if (regp == NULL) + return (EINVAL); + + *retval = *regp; + + return (0); +} + +int +vmmops_setreg(void *vcpui, int reg, uint64_t val) +{ + uint64_t *regp; + struct hypctx *hypctx; + int running, hostcpu; + + hypctx = vcpui; + + running = vcpu_is_running(hypctx->vcpu, &hostcpu); + if (running && hostcpu != curcpu) + panic("%s: %s%d is running", __func__, vm_name(hypctx->hyp->vm), + vcpu_vcpuid(hypctx->vcpu)); + + regp = hypctx_regptr(hypctx, reg); + if (regp == NULL) + return (EINVAL); + + *regp = val; + + return (0); +} + +int +vmmops_exception(void *vcpui, uint64_t scause) +{ + struct hypctx *hypctx = vcpui; + int running, hostcpu; + + running = vcpu_is_running(hypctx->vcpu, &hostcpu); + if (running && hostcpu != curcpu) + panic("%s: %s%d is running", __func__, vm_name(hypctx->hyp->vm), + vcpu_vcpuid(hypctx->vcpu)); + + /* TODO: set registers. */ + + hypctx->has_exception = true; + + return (0); +} + +int +vmmops_getcap(void *vcpui, int num, int *retval) +{ + int ret; + + ret = ENOENT; + + switch (num) { + case VM_CAP_UNRESTRICTED_GUEST: + *retval = 1; + ret = 0; + break; + default: + break; + } + + return (ret); +} + +int +vmmops_setcap(void *vcpui, int num, int val) +{ + + return (ENOENT); +} Index: sys/riscv/vmm/vmm_sbi.c =================================================================== --- /dev/null +++ sys/riscv/vmm/vmm_sbi.c @@ -0,0 +1,96 @@ +/*- + * Copyright (c) 2024 Ruslan Bukin + * + * This software was developed by the University of Cambridge Computer + * Laboratory (Department of Computer Science and Technology) under Innovate + * UK project 105694, "Digital Security by Design (DSbD) Technology Platform + * Prototype". + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include "riscv.h" + +/* + * SBI is fully handled in userspace. + * + * TODO: We may need to handle the SBI IPI extension here in kernel. + * The same for the SBI TIME extension in case of no SSTC support in HW. + */ + +int +vmm_sbi_ecall(struct vcpu *vcpu, bool *retu) +{ + int sbi_extension_id __unused; + struct hypctx *hypctx; + + hypctx = riscv_get_active_vcpu(); + sbi_extension_id = hypctx->guest_regs.hyp_a[7]; + + dprintf("%s: args %lx %lx %lx %lx %lx %lx %lx %lx\n", __func__, + hypctx->guest_regs.hyp_a[0], + hypctx->guest_regs.hyp_a[1], + hypctx->guest_regs.hyp_a[2], + hypctx->guest_regs.hyp_a[3], + hypctx->guest_regs.hyp_a[4], + hypctx->guest_regs.hyp_a[5], + hypctx->guest_regs.hyp_a[6], + hypctx->guest_regs.hyp_a[7]); + + switch (sbi_extension_id) { + case SBI_EXT_ID_TIME: + break; + default: + break; + } + + *retu = true; + + return (0); +} Index: sys/riscv/vmm/vmm_stat.h =================================================================== --- /dev/null +++ sys/riscv/vmm/vmm_stat.h @@ -0,0 +1,144 @@ +/*- + * SPDX-License-Identifier: BSD-3-Clause + * + * Copyright (c) 2011 NetApp, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef _VMM_STAT_H_ +#define _VMM_STAT_H_ + +struct vm; + +#define MAX_VMM_STAT_ELEMS 64 /* arbitrary */ + +enum vmm_stat_scope { + VMM_STAT_SCOPE_ANY, +}; + +struct vmm_stat_type; +typedef void (*vmm_stat_func_t)(struct vcpu *vcpu, + struct vmm_stat_type *stat); + +struct vmm_stat_type { + int index; /* position in the stats buffer */ + int nelems; /* standalone or array */ + const char *desc; /* description of statistic */ + vmm_stat_func_t func; + enum vmm_stat_scope scope; +}; + +void vmm_stat_register(void *arg); + +#define VMM_STAT_FDEFINE(type, nelems, desc, func, scope) \ + struct vmm_stat_type type[1] = { \ + { -1, nelems, desc, func, scope } \ + }; \ + SYSINIT(type##_stat, SI_SUB_KLD, SI_ORDER_ANY, vmm_stat_register, type) + +#define VMM_STAT_DEFINE(type, nelems, desc, scope) \ + VMM_STAT_FDEFINE(type, nelems, desc, NULL, scope) + +#define VMM_STAT_DECLARE(type) \ + extern struct vmm_stat_type type[1] + +#define VMM_STAT(type, desc) \ + VMM_STAT_DEFINE(type, 1, desc, VMM_STAT_SCOPE_ANY) + +#define VMM_STAT_FUNC(type, desc, func) \ + VMM_STAT_FDEFINE(type, 1, desc, func, VMM_STAT_SCOPE_ANY) + +#define VMM_STAT_ARRAY(type, nelems, desc) \ + VMM_STAT_DEFINE(type, nelems, desc, VMM_STAT_SCOPE_ANY) + +void *vmm_stat_alloc(void); +void vmm_stat_init(void *vp); +void vmm_stat_free(void *vp); + +int vmm_stat_copy(struct vcpu *vcpu, int index, int count, + int *num_stats, uint64_t *buf); +int vmm_stat_desc_copy(int index, char *buf, int buflen); + +static void __inline +vmm_stat_array_incr(struct vcpu *vcpu, struct vmm_stat_type *vst, int statidx, + uint64_t x) +{ +#ifdef VMM_KEEP_STATS + uint64_t *stats; + + stats = vcpu_stats(vcpu); + + if (vst->index >= 0 && statidx < vst->nelems) + stats[vst->index + statidx] += x; +#endif +} + +static void __inline +vmm_stat_array_set(struct vcpu *vcpu, struct vmm_stat_type *vst, int statidx, + uint64_t val) +{ +#ifdef VMM_KEEP_STATS + uint64_t *stats; + + stats = vcpu_stats(vcpu); + + if (vst->index >= 0 && statidx < vst->nelems) + stats[vst->index + statidx] = val; +#endif +} + +static void __inline +vmm_stat_incr(struct vcpu *vcpu, struct vmm_stat_type *vst, uint64_t x) +{ + +#ifdef VMM_KEEP_STATS + vmm_stat_array_incr(vcpu, vst, 0, x); +#endif +} + +static void __inline +vmm_stat_set(struct vcpu *vcpu, struct vmm_stat_type *vst, uint64_t val) +{ + +#ifdef VMM_KEEP_STATS + vmm_stat_array_set(vcpu, vst, 0, val); +#endif +} + +VMM_STAT_DECLARE(VMEXIT_COUNT); +VMM_STAT_DECLARE(VMEXIT_UNKNOWN); +VMM_STAT_DECLARE(VMEXIT_WFI); +VMM_STAT_DECLARE(VMEXIT_WFE); +VMM_STAT_DECLARE(VMEXIT_HVC); +VMM_STAT_DECLARE(VMEXIT_MSR); +VMM_STAT_DECLARE(VMEXIT_DATA_ABORT); +VMM_STAT_DECLARE(VMEXIT_INSN_ABORT); +VMM_STAT_DECLARE(VMEXIT_UNHANDLED_SYNC); +VMM_STAT_DECLARE(VMEXIT_IRQ); +VMM_STAT_DECLARE(VMEXIT_FIQ); +VMM_STAT_DECLARE(VMEXIT_UNHANDLED); +#endif Index: sys/riscv/vmm/vmm_stat.c =================================================================== --- /dev/null +++ sys/riscv/vmm/vmm_stat.c @@ -0,0 +1,162 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2011 NetApp, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +#include +#include +#include + +#include +#include +#include "vmm_stat.h" + +/* + * 'vst_num_elems' is the total number of addressable statistic elements + * 'vst_num_types' is the number of unique statistic types + * + * It is always true that 'vst_num_elems' is greater than or equal to + * 'vst_num_types'. This is because a stat type may represent more than + * one element (for e.g. VMM_STAT_ARRAY). + */ +static int vst_num_elems, vst_num_types; +static struct vmm_stat_type *vsttab[MAX_VMM_STAT_ELEMS]; + +static MALLOC_DEFINE(M_VMM_STAT, "vmm stat", "vmm stat"); + +#define vst_size ((size_t)vst_num_elems * sizeof(uint64_t)) + +void +vmm_stat_register(void *arg) +{ + struct vmm_stat_type *vst = arg; + + /* We require all stats to identify themselves with a description */ + if (vst->desc == NULL) + return; + + if (vst_num_elems + vst->nelems >= MAX_VMM_STAT_ELEMS) { + printf("Cannot accommodate vmm stat type \"%s\"!\n", vst->desc); + return; + } + + vst->index = vst_num_elems; + vst_num_elems += vst->nelems; + + vsttab[vst_num_types++] = vst; +} + +int +vmm_stat_copy(struct vcpu *vcpu, int index, int count, int *num_stats, + uint64_t *buf) +{ + struct vmm_stat_type *vst; + uint64_t *stats; + int i, tocopy; + + if (index < 0 || count < 0) + return (EINVAL); + + if (index > vst_num_elems) + return (ENOENT); + + if (index == vst_num_elems) { + *num_stats = 0; + return (0); + } + + tocopy = min(vst_num_elems - index, count); + + /* Let stats functions update their counters */ + for (i = 0; i < vst_num_types; i++) { + vst = vsttab[i]; + if (vst->func != NULL) + (*vst->func)(vcpu, vst); + } + + /* Copy over the stats */ + stats = vcpu_stats(vcpu); + memcpy(buf, stats + index, tocopy * sizeof(stats[0])); + *num_stats = tocopy; + return (0); +} + +void * +vmm_stat_alloc(void) +{ + + return (malloc(vst_size, M_VMM_STAT, M_WAITOK)); +} + +void +vmm_stat_init(void *vp) +{ + + bzero(vp, vst_size); +} + +void +vmm_stat_free(void *vp) +{ + free(vp, M_VMM_STAT); +} + +int +vmm_stat_desc_copy(int index, char *buf, int bufsize) +{ + int i; + struct vmm_stat_type *vst; + + for (i = 0; i < vst_num_types; i++) { + vst = vsttab[i]; + if (index >= vst->index && index < vst->index + vst->nelems) { + if (vst->nelems > 1) { + snprintf(buf, bufsize, "%s[%d]", + vst->desc, index - vst->index); + } else { + strlcpy(buf, vst->desc, bufsize); + } + return (0); /* found it */ + } + } + + return (EINVAL); +} + +/* global statistics */ +VMM_STAT(VMEXIT_COUNT, "total number of vm exits"); +VMM_STAT(VMEXIT_UNKNOWN, "number of vmexits for the unknown exception"); +VMM_STAT(VMEXIT_WFI, "number of times wfi was intercepted"); +VMM_STAT(VMEXIT_WFE, "number of times wfe was intercepted"); +VMM_STAT(VMEXIT_HVC, "number of times hvc was intercepted"); +VMM_STAT(VMEXIT_MSR, "number of times msr/mrs was intercepted"); +VMM_STAT(VMEXIT_DATA_ABORT, "number of vmexits for a data abort"); +VMM_STAT(VMEXIT_INSN_ABORT, "number of vmexits for an instruction abort"); +VMM_STAT(VMEXIT_UNHANDLED_SYNC, "number of vmexits for an unhandled synchronous exception"); +VMM_STAT(VMEXIT_IRQ, "number of vmexits for an irq"); +VMM_STAT(VMEXIT_FIQ, "number of vmexits for an interrupt"); +VMM_STAT(VMEXIT_UNHANDLED, "number of vmexits for an unhandled exception"); Index: sys/riscv/vmm/vmm_switch.S =================================================================== --- /dev/null +++ sys/riscv/vmm/vmm_switch.S @@ -0,0 +1,202 @@ +/*- + * Copyright (c) 2024 Ruslan Bukin + * + * This software was developed by the University of Cambridge Computer + * Laboratory (Department of Computer Science and Technology) under Innovate + * UK project 105694, "Digital Security by Design (DSbD) Technology Platform + * Prototype". + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +#include "assym.inc" + + .text + +/* + * a0 == hypctx * + */ +ENTRY(vmm_switch) + sd ra, (HYP_H_RA)(a0) + sd sp, (HYP_H_SP)(a0) + sd tp, (HYP_H_TP)(a0) + sd gp, (HYP_H_GP)(a0) + sd s0, (HYP_H_S + 0 * 8)(a0) + sd s1, (HYP_H_S + 1 * 8)(a0) + sd s2, (HYP_H_S + 2 * 8)(a0) + sd s3, (HYP_H_S + 3 * 8)(a0) + sd s4, (HYP_H_S + 4 * 8)(a0) + sd s5, (HYP_H_S + 5 * 8)(a0) + sd s6, (HYP_H_S + 6 * 8)(a0) + sd s7, (HYP_H_S + 7 * 8)(a0) + sd s8, (HYP_H_S + 8 * 8)(a0) + sd s9, (HYP_H_S + 9 * 8)(a0) + sd s10, (HYP_H_S + 10 * 8)(a0) + sd s11, (HYP_H_S + 11 * 8)(a0) + + sd a1, (HYP_H_A + 1 * 8)(a0) + sd a2, (HYP_H_A + 2 * 8)(a0) + sd a3, (HYP_H_A + 3 * 8)(a0) + sd a4, (HYP_H_A + 4 * 8)(a0) + sd a5, (HYP_H_A + 5 * 8)(a0) + sd a6, (HYP_H_A + 6 * 8)(a0) + sd a7, (HYP_H_A + 7 * 8)(a0) + + ld t0, (HYP_G_SSTATUS)(a0) + ld t1, (HYP_G_HSTATUS)(a0) + ld t2, (HYP_G_SCOUNTEREN)(a0) + la t4, .Lswitch_return + ld t5, (HYP_G_SEPC)(a0) + + csrrw t0, sstatus, t0 + csrrw t1, hstatus, t1 + csrrw t2, scounteren, t2 + csrrw t3, sscratch, a0 + csrrw t4, stvec, t4 + csrw sepc, t5 + + sd t0, (HYP_H_SSTATUS)(a0) + sd t1, (HYP_H_HSTATUS)(a0) + sd t2, (HYP_H_SCOUNTEREN)(a0) + sd t3, (HYP_H_SSCRATCH)(a0) + sd t4, (HYP_H_STVEC)(a0) + + ld ra, (HYP_G_RA)(a0) + ld sp, (HYP_G_SP)(a0) + ld gp, (HYP_G_GP)(a0) + ld tp, (HYP_G_TP)(a0) + ld t0, (HYP_G_T + 0 * 8)(a0) + ld t1, (HYP_G_T + 1 * 8)(a0) + ld t2, (HYP_G_T + 2 * 8)(a0) + ld t3, (HYP_G_T + 3 * 8)(a0) + ld t4, (HYP_G_T + 4 * 8)(a0) + ld t5, (HYP_G_T + 5 * 8)(a0) + ld t6, (HYP_G_T + 6 * 8)(a0) + ld s0, (HYP_G_S + 0 * 8)(a0) + ld s1, (HYP_G_S + 1 * 8)(a0) + ld s2, (HYP_G_S + 2 * 8)(a0) + ld s3, (HYP_G_S + 3 * 8)(a0) + ld s4, (HYP_G_S + 4 * 8)(a0) + ld s5, (HYP_G_S + 5 * 8)(a0) + ld s6, (HYP_G_S + 6 * 8)(a0) + ld s7, (HYP_G_S + 7 * 8)(a0) + ld s8, (HYP_G_S + 8 * 8)(a0) + ld s9, (HYP_G_S + 9 * 8)(a0) + ld s10, (HYP_G_S + 10 * 8)(a0) + ld s11, (HYP_G_S + 11 * 8)(a0) + /* skip a0 for now. */ + ld a1, (HYP_G_A + 1 * 8)(a0) + ld a2, (HYP_G_A + 2 * 8)(a0) + ld a3, (HYP_G_A + 3 * 8)(a0) + ld a4, (HYP_G_A + 4 * 8)(a0) + ld a5, (HYP_G_A + 5 * 8)(a0) + ld a6, (HYP_G_A + 6 * 8)(a0) + ld a7, (HYP_G_A + 7 * 8)(a0) + /* now load a0. */ + ld a0, (HYP_G_A + 0 * 8)(a0) + + sret + + .align 2 +.Lswitch_return: + + csrrw a0, sscratch, a0 + sd ra, (HYP_G_RA)(a0) + sd sp, (HYP_G_SP)(a0) + sd gp, (HYP_G_GP)(a0) + sd tp, (HYP_G_TP)(a0) + sd t0, (HYP_G_T + 0 * 8)(a0) + sd t1, (HYP_G_T + 1 * 8)(a0) + sd t2, (HYP_G_T + 2 * 8)(a0) + sd t3, (HYP_G_T + 3 * 8)(a0) + sd t4, (HYP_G_T + 4 * 8)(a0) + sd t5, (HYP_G_T + 5 * 8)(a0) + sd t6, (HYP_G_T + 6 * 8)(a0) + sd s0, (HYP_G_S + 0 * 8)(a0) + sd s1, (HYP_G_S + 1 * 8)(a0) + sd s2, (HYP_G_S + 2 * 8)(a0) + sd s3, (HYP_G_S + 3 * 8)(a0) + sd s4, (HYP_G_S + 4 * 8)(a0) + sd s5, (HYP_G_S + 5 * 8)(a0) + sd s6, (HYP_G_S + 6 * 8)(a0) + sd s7, (HYP_G_S + 7 * 8)(a0) + sd s8, (HYP_G_S + 8 * 8)(a0) + sd s9, (HYP_G_S + 9 * 8)(a0) + sd s10, (HYP_G_S + 10 * 8)(a0) + sd s11, (HYP_G_S + 11 * 8)(a0) + /* skip a0 */ + sd a1, (HYP_G_A + 1 * 8)(a0) + sd a2, (HYP_G_A + 2 * 8)(a0) + sd a3, (HYP_G_A + 3 * 8)(a0) + sd a4, (HYP_G_A + 4 * 8)(a0) + sd a5, (HYP_G_A + 5 * 8)(a0) + sd a6, (HYP_G_A + 6 * 8)(a0) + sd a7, (HYP_G_A + 7 * 8)(a0) + + ld t1, (HYP_H_STVEC)(a0) + ld t2, (HYP_H_SSCRATCH)(a0) + ld t3, (HYP_H_SCOUNTEREN)(a0) + ld t4, (HYP_H_HSTATUS)(a0) + ld t5, (HYP_H_SSTATUS)(a0) + + csrr t0, sepc + csrw stvec, t1 + csrrw t2, sscratch, t2 + csrrw t3, scounteren, t3 + csrrw t4, hstatus, t4 + csrrw t5, sstatus, t5 + + sd t0, (HYP_G_SEPC)(a0) + sd t2, (HYP_G_A + 0 * 8)(a0) + sd t3, (HYP_G_SCOUNTEREN)(a0) + sd t4, (HYP_G_HSTATUS)(a0) + sd t5, (HYP_G_SSTATUS)(a0) + + ld ra, (HYP_H_RA)(a0) + ld sp, (HYP_H_SP)(a0) + ld tp, (HYP_H_TP)(a0) + ld gp, (HYP_H_GP)(a0) + ld s0, (HYP_H_S + 0 * 8)(a0) + ld s1, (HYP_H_S + 1 * 8)(a0) + ld s2, (HYP_H_S + 2 * 8)(a0) + ld s3, (HYP_H_S + 3 * 8)(a0) + ld s4, (HYP_H_S + 4 * 8)(a0) + ld s5, (HYP_H_S + 5 * 8)(a0) + ld s6, (HYP_H_S + 6 * 8)(a0) + ld s7, (HYP_H_S + 7 * 8)(a0) + ld s8, (HYP_H_S + 8 * 8)(a0) + ld s9, (HYP_H_S + 9 * 8)(a0) + ld s10, (HYP_H_S + 10 * 8)(a0) + ld s11, (HYP_H_S + 11 * 8)(a0) + + ld a1, (HYP_H_A + 1 * 8)(a0) + ld a2, (HYP_H_A + 2 * 8)(a0) + ld a3, (HYP_H_A + 3 * 8)(a0) + ld a4, (HYP_H_A + 4 * 8)(a0) + ld a5, (HYP_H_A + 5 * 8)(a0) + ld a6, (HYP_H_A + 6 * 8)(a0) + ld a7, (HYP_H_A + 7 * 8)(a0) + + ret + +END(vmm_switch)