Index: sys/amd64/vmm/vmm.c =================================================================== --- sys/amd64/vmm/vmm.c +++ sys/amd64/vmm/vmm.c @@ -559,7 +559,8 @@ if (vcpuid < 0 || vcpuid >= vm_get_maxcpus(vm)) return (NULL); - vcpu = atomic_load_ptr(&vm->vcpu[vcpuid]); + vcpu = (struct vcpu *) + atomic_load_acq_ptr((uintptr_t *)&vm->vcpu[vcpuid]); if (__predict_true(vcpu != NULL)) return (vcpu); Index: sys/arm64/vmm/vmm.c =================================================================== --- sys/arm64/vmm/vmm.c +++ sys/arm64/vmm/vmm.c @@ -443,7 +443,8 @@ if (vcpuid >= vgic_max_cpu_count(vm->cookie)) return (NULL); - vcpu = atomic_load_ptr(&vm->vcpu[vcpuid]); + vcpu = (struct vcpu *) + atomic_load_acq_ptr((uintptr_t *)&vm->vcpu[vcpuid]); if (__predict_true(vcpu != NULL)) return (vcpu); Index: sys/conf/files.riscv =================================================================== --- sys/conf/files.riscv +++ sys/conf/files.riscv @@ -12,6 +12,8 @@ dev/pci/pci_host_generic_fdt.c optional pci fdt dev/uart/uart_cpu_fdt.c optional uart fdt dev/uart/uart_dev_lowrisc.c optional uart_lowrisc +dev/vmm/vmm_dev.c optional vmm +dev/vmm/vmm_stat.c optional vmm dev/xilinx/axi_quad_spi.c optional xilinx_spi dev/xilinx/axidma.c optional axidma xdma dev/xilinx/if_xae.c optional xae @@ -44,6 +46,7 @@ riscv/riscv/elf_machdep.c standard riscv/riscv/exception.S standard riscv/riscv/exec_machdep.c standard +riscv/riscv/fpe.c optional vmm riscv/riscv/gdb_machdep.c optional gdb riscv/riscv/intc.c standard riscv/riscv/identcpu.c standard @@ -72,6 +75,13 @@ riscv/riscv/uio_machdep.c standard riscv/riscv/unwind.c optional ddb | kdtrace_hooks | stack riscv/riscv/vm_machdep.c standard +riscv/vmm/vmm.c optional vmm +riscv/vmm/vmm_aplic.c optional vmm +riscv/vmm/vmm_dev_machdep.c optional vmm +riscv/vmm/vmm_instruction_emul.c optional vmm +riscv/vmm/vmm_riscv.c optional vmm +riscv/vmm/vmm_sbi.c optional vmm +riscv/vmm/vmm_switch.S optional vmm # Zstd contrib/zstd/lib/freebsd/zstd_kfreebsd.c optional zstdio compile-with ${ZSTD_C} Index: sys/conf/kern.mk =================================================================== --- sys/conf/kern.mk +++ sys/conf/kern.mk @@ -163,7 +163,7 @@ # code model as "medium" and "medany" respectively. # .if ${MACHINE_CPUARCH} == "riscv" -CFLAGS+= -march=rv64imafdc +CFLAGS+= -march=rv64imafdch CFLAGS+= -mabi=lp64 CFLAGS.clang+= -mcmodel=medium CFLAGS.gcc+= -mcmodel=medany Index: sys/riscv/include/cpu.h =================================================================== --- sys/riscv/include/cpu.h +++ sys/riscv/include/cpu.h @@ -47,8 +47,6 @@ #define cpu_spinwait() /* nothing */ #define cpu_lock_delay() DELAY(1) -#ifdef _KERNEL - /* * Core manufacturer IDs, as reported by the mvendorid CSR. */ @@ -89,6 +87,8 @@ #define MMU_SV48 0x2 /* 4-level paging */ #define MMU_SV57 0x4 /* 5-level paging */ +#ifdef _KERNEL + extern char btext[]; extern char etext[]; Index: sys/riscv/include/elf.h =================================================================== --- sys/riscv/include/elf.h +++ sys/riscv/include/elf.h @@ -80,6 +80,7 @@ #define HWCAP_ISA_F HWCAP_ISA_BIT('f') #define HWCAP_ISA_D HWCAP_ISA_BIT('d') #define HWCAP_ISA_C HWCAP_ISA_BIT('c') +#define HWCAP_ISA_H HWCAP_ISA_BIT('h') #define HWCAP_ISA_G \ (HWCAP_ISA_I | HWCAP_ISA_M | HWCAP_ISA_A | HWCAP_ISA_F | HWCAP_ISA_D) #define HWCAP_ISA_B HWCAP_ISA_BIT('b') Index: sys/riscv/include/md_var.h =================================================================== --- sys/riscv/include/md_var.h +++ sys/riscv/include/md_var.h @@ -42,6 +42,7 @@ extern u_int mmu_caps; /* Supervisor-mode extension support */ +extern bool has_hyp; extern bool has_sstc; extern bool has_sscofpmf; extern bool has_svpbmt; Index: sys/riscv/include/riscvreg.h =================================================================== --- sys/riscv/include/riscvreg.h +++ sys/riscv/include/riscvreg.h @@ -47,9 +47,15 @@ #define SCAUSE_STORE_ACCESS_FAULT 7 #define SCAUSE_ECALL_USER 8 #define SCAUSE_ECALL_SUPERVISOR 9 +#define SCAUSE_VIRTUAL_SUPERVISOR_ECALL 10 +#define SCAUSE_MACHINE_ECALL 11 #define SCAUSE_INST_PAGE_FAULT 12 #define SCAUSE_LOAD_PAGE_FAULT 13 #define SCAUSE_STORE_PAGE_FAULT 15 +#define SCAUSE_FETCH_GUEST_PAGE_FAULT 20 +#define SCAUSE_LOAD_GUEST_PAGE_FAULT 21 +#define SCAUSE_VIRTUAL_INSTRUCTION 22 +#define SCAUSE_STORE_GUEST_PAGE_FAULT 23 #define SSTATUS_UIE (1 << 0) #define SSTATUS_SIE (1 << 1) @@ -116,6 +122,17 @@ #define MSTATUS_PRV_H 2 /* hypervisor */ #define MSTATUS_PRV_M 3 /* machine */ +#define HSTATUS_VSBE (1 << 5) +#define HSTATUS_GVA (1 << 6) +#define HSTATUS_SPV (1 << 7) +#define HSTATUS_SPVP (1 << 8) +#define HSTATUS_HU (1 << 9) +#define HSTATUS_VGEIN_S 12 +#define HSTATUS_VGEIN_M (0xf << HSTATUS_VGEIN_S) +#define HSTATUS_VTVM (1 << 20) +#define HSTATUS_VTW (1 << 21) +#define HSTATUS_VTSR (1 << 22) + #define MIE_USIE (1 << 0) #define MIE_SSIE (1 << 1) #define MIE_HSIE (1 << 2) @@ -143,10 +160,35 @@ #define MIP_SEIP (1 << 9) +#define HVIP_VSSIP (1 << 2) +#define HVIP_VSTIP (1 << 6) +#define HVIP_VSEIP (1 << 10) + +#define HIE_VSSIE (1 << 2) +#define HIE_VSTIE (1 << 6) +#define HIE_VSEIE (1 << 10) +#define HIE_SGEIE (1 << 12) + /* Note: sip register has no SIP_STIP bit in Spike simulator */ #define SIP_SSIP (1 << 1) #define SIP_STIP (1 << 5) +#define HENVCFG_STCE (1UL << 63) +#define HENVCFG_PBMTE (1UL << 62) +#define HENVCFG_ADUE (1UL << 61) +#define HENVCFG_CDE (1UL << 60) +#define HENVCFG_PMM_S (1UL << 31) +#define HENVCFG_PMM_M (0x3 << HENVCFG_PMM_S) +#define HENVCFG_CBZE (1UL << 7) +#define HENVCFG_CBCFE (1UL << 6) +#define HENVCFG_CBIE_S (1UL << 4) +#define HENVCFG_CBIE_M (0x3 << HENVCFG_CBIE_S) +#define HENVCFG_FIOM (1UL << 0) + +#define HCOUNTEREN_CY (1UL << 0) /* Cycle */ +#define HCOUNTEREN_TM (1UL << 1) /* Time */ +#define HCOUNTEREN_IR (1UL << 2) /* Instret */ + #define SATP_PPN_S 0 #define SATP_PPN_M (0xfffffffffffUL << SATP_PPN_S) #define SATP_ASID_S 44 Index: sys/riscv/include/vmm.h =================================================================== --- /dev/null +++ sys/riscv/include/vmm.h @@ -0,0 +1,328 @@ +/* + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2015 Mihai Carabas + * Copyright (c) 2024 Ruslan Bukin + * + * This software was developed by the University of Cambridge Computer + * Laboratory (Department of Computer Science and Technology) under Innovate + * UK project 105694, "Digital Security by Design (DSbD) Technology Platform + * Prototype". + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef _VMM_H_ +#define _VMM_H_ + +#include +#include +#include +#include + +#include "pte.h" +#include "pmap.h" + +struct vcpu; + +enum vm_suspend_how { + VM_SUSPEND_NONE, + VM_SUSPEND_RESET, + VM_SUSPEND_POWEROFF, + VM_SUSPEND_HALT, + VM_SUSPEND_LAST +}; + +/* + * Identifiers for architecturally defined registers. + */ +enum vm_reg_name { + VM_REG_GUEST_ZERO = 0, + VM_REG_GUEST_RA, + VM_REG_GUEST_SP, + VM_REG_GUEST_GP, + VM_REG_GUEST_TP, + VM_REG_GUEST_T0, + VM_REG_GUEST_T1, + VM_REG_GUEST_T2, + VM_REG_GUEST_S0, + VM_REG_GUEST_S1, + VM_REG_GUEST_A0, + VM_REG_GUEST_A1, + VM_REG_GUEST_A2, + VM_REG_GUEST_A3, + VM_REG_GUEST_A4, + VM_REG_GUEST_A5, + VM_REG_GUEST_A6, + VM_REG_GUEST_A7, + VM_REG_GUEST_S2, + VM_REG_GUEST_S3, + VM_REG_GUEST_S4, + VM_REG_GUEST_S5, + VM_REG_GUEST_S6, + VM_REG_GUEST_S7, + VM_REG_GUEST_S8, + VM_REG_GUEST_S9, + VM_REG_GUEST_S10, + VM_REG_GUEST_S11, + VM_REG_GUEST_T3, + VM_REG_GUEST_T4, + VM_REG_GUEST_T5, + VM_REG_GUEST_T6, + VM_REG_GUEST_SEPC, + VM_REG_LAST +}; + +#define VM_INTINFO_VECTOR(info) ((info) & 0xff) +#define VM_INTINFO_DEL_ERRCODE 0x800 +#define VM_INTINFO_RSVD 0x7ffff000 +#define VM_INTINFO_VALID 0x80000000 +#define VM_INTINFO_TYPE 0x700 +#define VM_INTINFO_HWINTR (0 << 8) +#define VM_INTINFO_NMI (2 << 8) +#define VM_INTINFO_HWEXCEPTION (3 << 8) +#define VM_INTINFO_SWINTR (4 << 8) + +#define VM_MAX_SUFFIXLEN 15 + +#ifdef _KERNEL + +#define VM_MAX_NAMELEN 32 + +struct vm; +struct vm_exception; +struct vm_exit; +struct vm_run; +struct vm_object; +struct vm_guest_paging; +struct vm_aplic_descr; +struct pmap; + +struct vm_eventinfo { + void *rptr; /* rendezvous cookie */ + int *sptr; /* suspend cookie */ + int *iptr; /* reqidle cookie */ +}; + +int vm_create(const char *name, struct vm **retvm); +struct vcpu *vm_alloc_vcpu(struct vm *vm, int vcpuid); +void vm_disable_vcpu_creation(struct vm *vm); +void vm_slock_vcpus(struct vm *vm); +void vm_unlock_vcpus(struct vm *vm); +void vm_destroy(struct vm *vm); +int vm_reinit(struct vm *vm); +const char *vm_name(struct vm *vm); + +/* + * APIs that modify the guest memory map require all vcpus to be frozen. + */ +void vm_slock_memsegs(struct vm *vm); +void vm_xlock_memsegs(struct vm *vm); +void vm_unlock_memsegs(struct vm *vm); +int vm_mmap_memseg(struct vm *vm, vm_paddr_t gpa, int segid, vm_ooffset_t off, + size_t len, int prot, int flags); +int vm_munmap_memseg(struct vm *vm, vm_paddr_t gpa, size_t len); +int vm_alloc_memseg(struct vm *vm, int ident, size_t len, bool sysmem); +void vm_free_memseg(struct vm *vm, int ident); + +/* + * APIs that inspect the guest memory map require only a *single* vcpu to + * be frozen. This acts like a read lock on the guest memory map since any + * modification requires *all* vcpus to be frozen. + */ +int vm_mmap_getnext(struct vm *vm, vm_paddr_t *gpa, int *segid, + vm_ooffset_t *segoff, size_t *len, int *prot, int *flags); +int vm_get_memseg(struct vm *vm, int ident, size_t *len, bool *sysmem, + struct vm_object **objptr); +vm_paddr_t vmm_sysmem_maxaddr(struct vm *vm); +void *vm_gpa_hold(struct vcpu *vcpu, vm_paddr_t gpa, size_t len, + int prot, void **cookie); +void *vm_gpa_hold_global(struct vm *vm, vm_paddr_t gpa, size_t len, + int prot, void **cookie); +void vm_gpa_release(void *cookie); +bool vm_mem_allocated(struct vcpu *vcpu, vm_paddr_t gpa); + +int vm_gla2gpa_nofault(struct vcpu *vcpu, struct vm_guest_paging *paging, + uint64_t gla, int prot, uint64_t *gpa, int *is_fault); + +uint16_t vm_get_maxcpus(struct vm *vm); +void vm_get_topology(struct vm *vm, uint16_t *sockets, uint16_t *cores, + uint16_t *threads, uint16_t *maxcpus); +int vm_set_topology(struct vm *vm, uint16_t sockets, uint16_t cores, + uint16_t threads, uint16_t maxcpus); +int vm_get_register(struct vcpu *vcpu, int reg, uint64_t *retval); +int vm_set_register(struct vcpu *vcpu, int reg, uint64_t val); +int vm_run(struct vcpu *vcpu); +int vm_suspend(struct vm *vm, enum vm_suspend_how how); +void* vm_get_cookie(struct vm *vm); +int vcpu_vcpuid(struct vcpu *vcpu); +void *vcpu_get_cookie(struct vcpu *vcpu); +struct vm *vcpu_vm(struct vcpu *vcpu); +struct vcpu *vm_vcpu(struct vm *vm, int cpu); +int vm_get_capability(struct vcpu *vcpu, int type, int *val); +int vm_set_capability(struct vcpu *vcpu, int type, int val); +int vm_activate_cpu(struct vcpu *vcpu); +int vm_suspend_cpu(struct vm *vm, struct vcpu *vcpu); +int vm_resume_cpu(struct vm *vm, struct vcpu *vcpu); +int vm_inject_exception(struct vcpu *vcpu, uint64_t scause); +int vm_attach_aplic(struct vm *vm, struct vm_aplic_descr *descr); +int vm_assert_irq(struct vm *vm, uint32_t irq); +int vm_deassert_irq(struct vm *vm, uint32_t irq); +int vm_raise_msi(struct vm *vm, uint64_t msg, uint64_t addr, int bus, int slot, + int func); +struct vm_exit *vm_exitinfo(struct vcpu *vcpu); +void vm_exit_suspended(struct vcpu *vcpu, uint64_t pc); +void vm_exit_debug(struct vcpu *vcpu, uint64_t pc); +void vm_exit_rendezvous(struct vcpu *vcpu, uint64_t pc); +void vm_exit_astpending(struct vcpu *vcpu, uint64_t pc); + +cpuset_t vm_active_cpus(struct vm *vm); +cpuset_t vm_debug_cpus(struct vm *vm); +cpuset_t vm_suspended_cpus(struct vm *vm); + +static __inline int +vcpu_rendezvous_pending(struct vm_eventinfo *info) +{ + + return (*((uintptr_t *)(info->rptr)) != 0); +} + +static __inline int +vcpu_suspended(struct vm_eventinfo *info) +{ + + return (*info->sptr); +} + +int vcpu_debugged(struct vcpu *vcpu); + +enum vcpu_state { + VCPU_IDLE, + VCPU_FROZEN, + VCPU_RUNNING, + VCPU_SLEEPING, +}; + +int vcpu_set_state(struct vcpu *vcpu, enum vcpu_state state, bool from_idle); +enum vcpu_state vcpu_get_state(struct vcpu *vcpu, int *hostcpu); + +static int __inline +vcpu_is_running(struct vcpu *vcpu, int *hostcpu) +{ + return (vcpu_get_state(vcpu, hostcpu) == VCPU_RUNNING); +} + +#ifdef _SYS_PROC_H_ +static int __inline +vcpu_should_yield(struct vcpu *vcpu) +{ + struct thread *td; + + td = curthread; + return (td->td_ast != 0 || td->td_owepreempt != 0); +} +#endif + +void *vcpu_stats(struct vcpu *vcpu); +void vcpu_notify_event(struct vcpu *vcpu); + +enum vm_reg_name vm_segment_name(int seg_encoding); + +#endif /* _KERNEL */ + +#define VM_DIR_READ 0 +#define VM_DIR_WRITE 1 + +#define VM_GP_M_MASK 0x1f +#define VM_GP_MMU_ENABLED (1 << 5) + +struct vm_guest_paging { + int flags; + int padding; +}; + +struct vie { + uint8_t access_size:4, sign_extend:1, dir:1, unused:2; + enum vm_reg_name reg; +}; + +struct vre { + uint32_t inst_syndrome; + uint8_t dir:1, unused:7; + enum vm_reg_name reg; +}; + +/* + * Identifiers for optional vmm capabilities + */ +enum vm_cap_type { + VM_CAP_UNRESTRICTED_GUEST, + VM_CAP_MAX +}; + +enum vm_exitcode { + VM_EXITCODE_BOGUS, + VM_EXITCODE_ECALL, + VM_EXITCODE_HYP, + VM_EXITCODE_PAGING, + VM_EXITCODE_SUSPENDED, + VM_EXITCODE_DEBUG, + VM_EXITCODE_INST_EMUL, + VM_EXITCODE_WFI, + VM_EXITCODE_MAX +}; + +struct vm_exit { + uint64_t scause; + uint64_t sepc; + uint64_t stval; + uint64_t htval; + uint64_t htinst; + enum vm_exitcode exitcode; + int inst_length; + uint64_t pc; + union { + struct { + uint64_t gpa; + } paging; + + struct { + uint64_t gpa; + struct vm_guest_paging paging; + struct vie vie; + } inst_emul; + + struct { + uint64_t args[8]; + } ecall; + + struct { + enum vm_suspend_how how; + } suspended; + + struct { + uint64_t scause; + } hyp; + } u; +}; + +#endif /* _VMM_H_ */ Index: sys/riscv/include/vmm_dev.h =================================================================== --- /dev/null +++ sys/riscv/include/vmm_dev.h @@ -0,0 +1,258 @@ +/* + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2015 Mihai Carabas + * Copyright (c) 2024 Ruslan Bukin + * + * This software was developed by the University of Cambridge Computer + * Laboratory (Department of Computer Science and Technology) under Innovate + * UK project 105694, "Digital Security by Design (DSbD) Technology Platform + * Prototype". + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef _VMM_DEV_H_ +#define _VMM_DEV_H_ + +struct vm_memmap { + vm_paddr_t gpa; + int segid; /* memory segment */ + vm_ooffset_t segoff; /* offset into memory segment */ + size_t len; /* mmap length */ + int prot; /* RWX */ + int flags; +}; +#define VM_MEMMAP_F_WIRED 0x01 + +struct vm_munmap { + vm_paddr_t gpa; + size_t len; +}; + +#define VM_MEMSEG_NAME(m) ((m)->name[0] != '\0' ? (m)->name : NULL) +struct vm_memseg { + int segid; + size_t len; + char name[VM_MAX_SUFFIXLEN + 1]; +}; + +struct vm_register { + int cpuid; + int regnum; /* enum vm_reg_name */ + uint64_t regval; +}; + +struct vm_register_set { + int cpuid; + unsigned int count; + const int *regnums; /* enum vm_reg_name */ + uint64_t *regvals; +}; + +struct vm_run { + int cpuid; + cpuset_t *cpuset; /* CPU set storage */ + size_t cpusetsize; + struct vm_exit *vm_exit; +}; + +struct vm_exception { + int cpuid; + uint64_t scause; +}; + +struct vm_msi { + uint64_t msg; + uint64_t addr; + int bus; + int slot; + int func; +}; + +struct vm_capability { + int cpuid; + enum vm_cap_type captype; + int capval; + int allcpus; +}; + +#define MAX_VM_STATS 64 +struct vm_stats { + int cpuid; /* in */ + int index; /* in */ + int num_entries; /* out */ + struct timeval tv; + uint64_t statbuf[MAX_VM_STATS]; +}; +struct vm_stat_desc { + int index; /* in */ + char desc[128]; /* out */ +}; + +struct vm_suspend { + enum vm_suspend_how how; +}; + +struct vm_gla2gpa { + int vcpuid; /* inputs */ + int prot; /* PROT_READ or PROT_WRITE */ + uint64_t gla; + struct vm_guest_paging paging; + int fault; /* outputs */ + uint64_t gpa; +}; + +struct vm_activate_cpu { + int vcpuid; +}; + +struct vm_cpuset { + int which; + int cpusetsize; + cpuset_t *cpus; +}; +#define VM_ACTIVE_CPUS 0 +#define VM_SUSPENDED_CPUS 1 +#define VM_DEBUG_CPUS 2 + +struct vm_aplic_descr { + uint64_t mem_start; + uint64_t mem_size; +}; + +struct vm_irq { + uint32_t irq; +}; + +struct vm_cpu_topology { + uint16_t sockets; + uint16_t cores; + uint16_t threads; + uint16_t maxcpus; +}; + +enum { + /* general routines */ + IOCNUM_ABIVERS = 0, + IOCNUM_RUN = 1, + IOCNUM_SET_CAPABILITY = 2, + IOCNUM_GET_CAPABILITY = 3, + IOCNUM_SUSPEND = 4, + IOCNUM_REINIT = 5, + + /* memory apis */ + IOCNUM_GET_GPA_PMAP = 12, + IOCNUM_GLA2GPA_NOFAULT = 13, + IOCNUM_ALLOC_MEMSEG = 14, + IOCNUM_GET_MEMSEG = 15, + IOCNUM_MMAP_MEMSEG = 16, + IOCNUM_MMAP_GETNEXT = 17, + IOCNUM_MUNMAP_MEMSEG = 18, + + /* register/state accessors */ + IOCNUM_SET_REGISTER = 20, + IOCNUM_GET_REGISTER = 21, + IOCNUM_SET_REGISTER_SET = 24, + IOCNUM_GET_REGISTER_SET = 25, + + /* statistics */ + IOCNUM_VM_STATS = 50, + IOCNUM_VM_STAT_DESC = 51, + + /* CPU Topology */ + IOCNUM_SET_TOPOLOGY = 63, + IOCNUM_GET_TOPOLOGY = 64, + + /* interrupt injection */ + IOCNUM_ASSERT_IRQ = 80, + IOCNUM_DEASSERT_IRQ = 81, + IOCNUM_RAISE_MSI = 82, + IOCNUM_INJECT_EXCEPTION = 83, + + /* vm_cpuset */ + IOCNUM_ACTIVATE_CPU = 90, + IOCNUM_GET_CPUSET = 91, + IOCNUM_SUSPEND_CPU = 92, + IOCNUM_RESUME_CPU = 93, + + /* vm_attach_aplic */ + IOCNUM_ATTACH_APLIC = 110, +}; + +#define VM_RUN \ + _IOWR('v', IOCNUM_RUN, struct vm_run) +#define VM_SUSPEND \ + _IOW('v', IOCNUM_SUSPEND, struct vm_suspend) +#define VM_REINIT \ + _IO('v', IOCNUM_REINIT) +#define VM_ALLOC_MEMSEG \ + _IOW('v', IOCNUM_ALLOC_MEMSEG, struct vm_memseg) +#define VM_GET_MEMSEG \ + _IOWR('v', IOCNUM_GET_MEMSEG, struct vm_memseg) +#define VM_MMAP_MEMSEG \ + _IOW('v', IOCNUM_MMAP_MEMSEG, struct vm_memmap) +#define VM_MMAP_GETNEXT \ + _IOWR('v', IOCNUM_MMAP_GETNEXT, struct vm_memmap) +#define VM_MUNMAP_MEMSEG \ + _IOW('v', IOCNUM_MUNMAP_MEMSEG, struct vm_munmap) +#define VM_SET_REGISTER \ + _IOW('v', IOCNUM_SET_REGISTER, struct vm_register) +#define VM_GET_REGISTER \ + _IOWR('v', IOCNUM_GET_REGISTER, struct vm_register) +#define VM_SET_REGISTER_SET \ + _IOW('v', IOCNUM_SET_REGISTER_SET, struct vm_register_set) +#define VM_GET_REGISTER_SET \ + _IOWR('v', IOCNUM_GET_REGISTER_SET, struct vm_register_set) +#define VM_SET_CAPABILITY \ + _IOW('v', IOCNUM_SET_CAPABILITY, struct vm_capability) +#define VM_GET_CAPABILITY \ + _IOWR('v', IOCNUM_GET_CAPABILITY, struct vm_capability) +#define VM_STATS \ + _IOWR('v', IOCNUM_VM_STATS, struct vm_stats) +#define VM_STAT_DESC \ + _IOWR('v', IOCNUM_VM_STAT_DESC, struct vm_stat_desc) +#define VM_ASSERT_IRQ \ + _IOW('v', IOCNUM_ASSERT_IRQ, struct vm_irq) +#define VM_DEASSERT_IRQ \ + _IOW('v', IOCNUM_DEASSERT_IRQ, struct vm_irq) +#define VM_RAISE_MSI \ + _IOW('v', IOCNUM_RAISE_MSI, struct vm_msi) +#define VM_INJECT_EXCEPTION \ + _IOW('v', IOCNUM_INJECT_EXCEPTION, struct vm_exception) +#define VM_SET_TOPOLOGY \ + _IOW('v', IOCNUM_SET_TOPOLOGY, struct vm_cpu_topology) +#define VM_GET_TOPOLOGY \ + _IOR('v', IOCNUM_GET_TOPOLOGY, struct vm_cpu_topology) +#define VM_GLA2GPA_NOFAULT \ + _IOWR('v', IOCNUM_GLA2GPA_NOFAULT, struct vm_gla2gpa) +#define VM_ACTIVATE_CPU \ + _IOW('v', IOCNUM_ACTIVATE_CPU, struct vm_activate_cpu) +#define VM_GET_CPUS \ + _IOW('v', IOCNUM_GET_CPUSET, struct vm_cpuset) +#define VM_SUSPEND_CPU \ + _IOW('v', IOCNUM_SUSPEND_CPU, struct vm_activate_cpu) +#define VM_RESUME_CPU \ + _IOW('v', IOCNUM_RESUME_CPU, struct vm_activate_cpu) +#define VM_ATTACH_APLIC \ + _IOW('v', IOCNUM_ATTACH_APLIC, struct vm_aplic_descr) +#endif Index: sys/riscv/include/vmm_instruction_emul.h =================================================================== --- /dev/null +++ sys/riscv/include/vmm_instruction_emul.h @@ -0,0 +1,85 @@ +/* + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2015 Mihai Carabas + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef _VMM_INSTRUCTION_EMUL_H_ +#define _VMM_INSTRUCTION_EMUL_H_ + +/* + * Callback functions to read and write memory regions. + */ +typedef int (*mem_region_read_t)(struct vcpu *vcpu, uint64_t gpa, + uint64_t *rval, int rsize, void *arg); +typedef int (*mem_region_write_t)(struct vcpu *vcpu, uint64_t gpa, + uint64_t wval, int wsize, void *arg); + +/* + * Callback functions to read and write registers. + */ +typedef int (*reg_read_t)(struct vcpu *vcpu, uint64_t *rval, void *arg); +typedef int (*reg_write_t)(struct vcpu *vcpu, uint64_t wval, void *arg); + +/* + * Emulate the decoded 'vie' instruction when it contains a memory operation. + * + * The callbacks 'mrr' and 'mrw' emulate reads and writes to the memory region + * containing 'gpa'. 'mrarg' is an opaque argument that is passed into the + * callback functions. + * + * 'void *vm' should be 'struct vm *' when called from kernel context and + * 'struct vmctx *' when called from user context. + * + */ +int vmm_emulate_instruction(struct vcpu *vcpu, uint64_t gpa, struct vie *vie, + struct vm_guest_paging *paging, mem_region_read_t mrr, + mem_region_write_t mrw, void *mrarg); + +/* + * Emulate the decoded 'vre' instruction when it contains a register access. + * + * The callbacks 'regread' and 'regwrite' emulate reads and writes to the + * register from 'vie'. 'regarg' is an opaque argument that is passed into the + * callback functions. + * + * 'void *vm' should be 'struct vm *' when called from kernel context and + * 'struct vmctx *' when called from user context. + * + */ +int vmm_emulate_register(struct vcpu *vcpu, struct vre *vre, reg_read_t regread, + reg_write_t regwrite, void *regarg); + +#ifdef _KERNEL +void vm_register_reg_handler(struct vm *vm, uint64_t iss, uint64_t mask, + reg_read_t reg_read, reg_write_t reg_write, void *arg); +void vm_deregister_reg_handler(struct vm *vm, uint64_t iss, uint64_t mask); + +void vm_register_inst_handler(struct vm *vm, uint64_t start, uint64_t size, + mem_region_read_t mmio_read, mem_region_write_t mmio_write); +void vm_deregister_inst_handler(struct vm *vm, uint64_t start, uint64_t size); +#endif + +#endif /* _VMM_INSTRUCTION_EMUL_H_ */ Index: sys/riscv/include/vmm_snapshot.h =================================================================== --- /dev/null +++ sys/riscv/include/vmm_snapshot.h @@ -0,0 +1 @@ +/* $FreeBSD$ */ Index: sys/riscv/riscv/genassym.c =================================================================== --- sys/riscv/riscv/genassym.c +++ sys/riscv/riscv/genassym.c @@ -55,6 +55,8 @@ #include #include +#include + ASSYM(KERNBASE, KERNBASE); ASSYM(VM_MAXUSER_ADDRESS, VM_MAXUSER_ADDRESS); ASSYM(VM_MAX_KERNEL_ADDRESS, VM_MAX_KERNEL_ADDRESS); @@ -98,6 +100,38 @@ ASSYM(TF_SCAUSE, offsetof(struct trapframe, tf_scause)); ASSYM(TF_SSTATUS, offsetof(struct trapframe, tf_sstatus)); +ASSYM(HYP_H_RA, offsetof(struct hypctx, host_regs.hyp_ra)); +ASSYM(HYP_H_SP, offsetof(struct hypctx, host_regs.hyp_sp)); +ASSYM(HYP_H_GP, offsetof(struct hypctx, host_regs.hyp_gp)); +ASSYM(HYP_H_TP, offsetof(struct hypctx, host_regs.hyp_tp)); +ASSYM(HYP_H_T, offsetof(struct hypctx, host_regs.hyp_t)); +ASSYM(HYP_H_S, offsetof(struct hypctx, host_regs.hyp_s)); +ASSYM(HYP_H_A, offsetof(struct hypctx, host_regs.hyp_a)); +ASSYM(HYP_H_SEPC, offsetof(struct hypctx, host_regs.hyp_sepc)); +ASSYM(HYP_H_SSTATUS, offsetof(struct hypctx, host_regs.hyp_sstatus)); +ASSYM(HYP_H_HSTATUS, offsetof(struct hypctx, host_regs.hyp_hstatus)); +ASSYM(HYP_H_SSCRATCH, offsetof(struct hypctx, host_sscratch)); +ASSYM(HYP_H_STVEC, offsetof(struct hypctx, host_stvec)); +ASSYM(HYP_H_SCOUNTEREN, offsetof(struct hypctx, host_scounteren)); + +ASSYM(HYP_G_RA, offsetof(struct hypctx, guest_regs.hyp_ra)); +ASSYM(HYP_G_SP, offsetof(struct hypctx, guest_regs.hyp_sp)); +ASSYM(HYP_G_GP, offsetof(struct hypctx, guest_regs.hyp_gp)); +ASSYM(HYP_G_TP, offsetof(struct hypctx, guest_regs.hyp_tp)); +ASSYM(HYP_G_T, offsetof(struct hypctx, guest_regs.hyp_t)); +ASSYM(HYP_G_S, offsetof(struct hypctx, guest_regs.hyp_s)); +ASSYM(HYP_G_A, offsetof(struct hypctx, guest_regs.hyp_a)); +ASSYM(HYP_G_SEPC, offsetof(struct hypctx, guest_regs.hyp_sepc)); +ASSYM(HYP_G_SSTATUS, offsetof(struct hypctx, guest_regs.hyp_sstatus)); +ASSYM(HYP_G_HSTATUS, offsetof(struct hypctx, guest_regs.hyp_hstatus)); +ASSYM(HYP_G_SCOUNTEREN, offsetof(struct hypctx, guest_scounteren)); + +ASSYM(HYP_TRAP_SEPC, offsetof(struct hyptrap, sepc)); +ASSYM(HYP_TRAP_SCAUSE, offsetof(struct hyptrap, scause)); +ASSYM(HYP_TRAP_STVAL, offsetof(struct hyptrap, stval)); +ASSYM(HYP_TRAP_HTVAL, offsetof(struct hyptrap, htval)); +ASSYM(HYP_TRAP_HTINST, offsetof(struct hyptrap, htinst)); + ASSYM(RISCV_BOOTPARAMS_SIZE, sizeof(struct riscv_bootparams)); ASSYM(RISCV_BOOTPARAMS_KERN_PHYS, offsetof(struct riscv_bootparams, kern_phys)); ASSYM(RISCV_BOOTPARAMS_KERN_STACK, offsetof(struct riscv_bootparams, Index: sys/riscv/riscv/identcpu.c =================================================================== --- sys/riscv/riscv/identcpu.c +++ sys/riscv/riscv/identcpu.c @@ -72,6 +72,7 @@ u_int mmu_caps; /* Supervisor-mode extension support. */ +bool has_hyp; bool __read_frequently has_sstc; bool __read_frequently has_sscofpmf; bool has_svpbmt; @@ -249,6 +250,7 @@ case 'c': case 'd': case 'f': + case 'h': case 'i': case 'm': desc->isa_extensions |= HWCAP_ISA_BIT(isa[i]); @@ -414,6 +416,7 @@ UPDATE_CAP(mmu_caps, desc->mmu_caps); /* Supervisor-mode extension support. */ + UPDATE_CAP(has_hyp, (desc->isa_extensions & HWCAP_ISA_H) != 0); UPDATE_CAP(has_sstc, (desc->smode_extensions & SV_SSTC) != 0); UPDATE_CAP(has_sscofpmf, (desc->smode_extensions & SV_SSCOFPMF) != 0); UPDATE_CAP(has_svpbmt, (desc->smode_extensions & SV_SVPBMT) != 0); @@ -514,6 +517,7 @@ "\03Compressed" "\04Double" "\06Float" + "\10Hypervisor" "\15Mult/Div"); } Index: sys/riscv/vmm/riscv.h =================================================================== --- /dev/null +++ sys/riscv/vmm/riscv.h @@ -0,0 +1,132 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2015 Mihai Carabas + * Copyright (c) 2024 Ruslan Bukin + * + * This software was developed by the University of Cambridge Computer + * Laboratory (Department of Computer Science and Technology) under Innovate + * UK project 105694, "Digital Security by Design (DSbD) Technology Platform + * Prototype". + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef _VMM_RISCV_H_ +#define _VMM_RISCV_H_ + +#include +#include +#include + +struct hypregs { + uint64_t hyp_ra; + uint64_t hyp_sp; + uint64_t hyp_gp; + uint64_t hyp_tp; + uint64_t hyp_t[7]; + uint64_t hyp_s[12]; + uint64_t hyp_a[8]; + uint64_t hyp_sepc; + uint64_t hyp_sstatus; + uint64_t hyp_hstatus; +}; + +struct hypcsr { + uint64_t hvip; + uint64_t vsstatus; + uint64_t vsie; + uint64_t vstvec; + uint64_t vsscratch; + uint64_t vsepc; + uint64_t vscause; + uint64_t vstval; + uint64_t vsatp; + uint64_t scounteren; + uint64_t senvcfg; +}; + +struct hypctx { + struct hypregs host_regs; + struct hypregs guest_regs; + struct hypcsr guest_csrs; + uint64_t host_sscratch; + uint64_t host_stvec; + uint64_t host_scounteren; + uint64_t guest_scounteren; + struct hyp *hyp; + struct vcpu *vcpu; + bool has_exception; + int cpu_id; + int ipi_pending; +}; + +struct hyp { + struct vm *vm; + uint64_t vmid_generation; + bool aplic_attached; + struct aplic *aplic; + struct hypctx *ctx[]; +}; + +struct hyptrap { + uint64_t sepc; + uint64_t scause; + uint64_t stval; + uint64_t htval; + uint64_t htinst; +}; + +#define DEFINE_VMMOPS_IFUNC(ret_type, opname, args) \ + ret_type vmmops_##opname args; + +DEFINE_VMMOPS_IFUNC(int, modinit, (void)) +DEFINE_VMMOPS_IFUNC(int, modcleanup, (void)) +DEFINE_VMMOPS_IFUNC(void *, init, (struct vm *vm, struct pmap *pmap)) +DEFINE_VMMOPS_IFUNC(int, gla2gpa, (void *vcpui, struct vm_guest_paging *paging, + uint64_t gla, int prot, uint64_t *gpa, int *is_fault)) +DEFINE_VMMOPS_IFUNC(int, run, (void *vcpui, register_t pc, struct pmap *pmap, + struct vm_eventinfo *info)) +DEFINE_VMMOPS_IFUNC(void, cleanup, (void *vmi)) +DEFINE_VMMOPS_IFUNC(void *, vcpu_init, (void *vmi, struct vcpu *vcpu, + int vcpu_id)) +DEFINE_VMMOPS_IFUNC(void, vcpu_cleanup, (void *vcpui)) +DEFINE_VMMOPS_IFUNC(int, exception, (void *vcpui, uint64_t scause)) +DEFINE_VMMOPS_IFUNC(int, getreg, (void *vcpui, int num, uint64_t *retval)) +DEFINE_VMMOPS_IFUNC(int, setreg, (void *vcpui, int num, uint64_t val)) +DEFINE_VMMOPS_IFUNC(int, getcap, (void *vcpui, int num, int *retval)) +DEFINE_VMMOPS_IFUNC(int, setcap, (void *vcpui, int num, int val)) +DEFINE_VMMOPS_IFUNC(struct vmspace *, vmspace_alloc, (vm_offset_t min, + vm_offset_t max)) +DEFINE_VMMOPS_IFUNC(void, vmspace_free, (struct vmspace *vmspace)) + +#define dprintf(fmt, ...) + +struct hypctx *riscv_get_active_vcpu(void); +void vmm_switch(struct hypctx *); +void vmm_unpriv_trap(struct hyptrap *, uint64_t tmp); +int vmm_sbi_ecall(struct vcpu *, bool *); + +void riscv_send_ipi(struct hypctx *hypctx, int hart_id); +int riscv_check_ipi(struct hypctx *hypctx, bool clear); + +#endif /* !_VMM_RISCV_H_ */ Index: sys/riscv/vmm/vmm.c =================================================================== --- /dev/null +++ sys/riscv/vmm/vmm.c @@ -0,0 +1,1606 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2015 Mihai Carabas + * Copyright (c) 2024 Ruslan Bukin + * + * This software was developed by the University of Cambridge Computer + * Laboratory (Department of Computer Science and Technology) under Innovate + * UK project 105694, "Digital Security by Design (DSbD) Technology Platform + * Prototype". + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include + +#include "vmm_stat.h" +#include "riscv.h" + +#include "vmm_aplic.h" + +struct vcpu { + int flags; + enum vcpu_state state; + struct mtx mtx; + int hostcpu; /* host cpuid this vcpu last ran on */ + int vcpuid; + void *stats; + struct vm_exit exitinfo; + uint64_t nextpc; /* (x) next instruction to execute */ + struct vm *vm; /* (o) */ + void *cookie; /* (i) cpu-specific data */ + struct fpreg *guestfpu; /* (a,i) guest fpu state */ +}; + +#define vcpu_lock_initialized(v) mtx_initialized(&((v)->mtx)) +#define vcpu_lock_init(v) mtx_init(&((v)->mtx), "vcpu lock", 0, MTX_SPIN) +#define vcpu_lock_destroy(v) mtx_destroy(&((v)->mtx)) +#define vcpu_lock(v) mtx_lock_spin(&((v)->mtx)) +#define vcpu_unlock(v) mtx_unlock_spin(&((v)->mtx)) +#define vcpu_assert_locked(v) mtx_assert(&((v)->mtx), MA_OWNED) + +struct mem_seg { + uint64_t gpa; + size_t len; + bool wired; + bool sysmem; + vm_object_t object; +}; +#define VM_MAX_MEMSEGS 3 + +struct mem_map { + vm_paddr_t gpa; + size_t len; + vm_ooffset_t segoff; + int segid; + int prot; + int flags; +}; +#define VM_MAX_MEMMAPS 4 + +struct vmm_mmio_region { + uint64_t start; + uint64_t end; + mem_region_read_t read; + mem_region_write_t write; +}; +#define VM_MAX_MMIO_REGIONS 4 + +/* + * Initialization: + * (o) initialized the first time the VM is created + * (i) initialized when VM is created and when it is reinitialized + * (x) initialized before use + */ +struct vm { + void *cookie; /* (i) cpu-specific data */ + volatile cpuset_t active_cpus; /* (i) active vcpus */ + volatile cpuset_t debug_cpus; /* (i) vcpus stopped for debug*/ + int suspend; /* (i) stop VM execution */ + bool dying; /* (o) is dying */ + volatile cpuset_t suspended_cpus; /* (i) suspended vcpus */ + volatile cpuset_t halted_cpus; /* (x) cpus in a hard halt */ + struct mem_map mem_maps[VM_MAX_MEMMAPS]; /* (i) guest address space */ + struct mem_seg mem_segs[VM_MAX_MEMSEGS]; /* (o) guest memory regions */ + struct vmspace *vmspace; /* (o) guest's address space */ + char name[VM_MAX_NAMELEN]; /* (o) virtual machine name */ + struct vcpu **vcpu; /* (i) guest vcpus */ + struct vmm_mmio_region mmio_region[VM_MAX_MMIO_REGIONS]; + /* (o) guest MMIO regions */ + /* The following describe the vm cpu topology */ + uint16_t sockets; /* (o) num of sockets */ + uint16_t cores; /* (o) num of cores/socket */ + uint16_t threads; /* (o) num of threads/core */ + uint16_t maxcpus; /* (o) max pluggable cpus */ + struct sx mem_segs_lock; /* (o) */ + struct sx vcpus_init_lock; /* (o) */ +}; + +static bool vmm_initialized = false; + +static MALLOC_DEFINE(M_VMM, "vmm", "vmm"); + +/* statistics */ +static VMM_STAT(VCPU_TOTAL_RUNTIME, "vcpu total runtime"); + +SYSCTL_NODE(_hw, OID_AUTO, vmm, CTLFLAG_RW, NULL, NULL); + +static int vmm_ipinum; +SYSCTL_INT(_hw_vmm, OID_AUTO, ipinum, CTLFLAG_RD, &vmm_ipinum, 0, + "IPI vector used for vcpu notifications"); + +u_int vm_maxcpu; +SYSCTL_UINT(_hw_vmm, OID_AUTO, maxcpu, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, + &vm_maxcpu, 0, "Maximum number of vCPUs"); + +static void vm_free_memmap(struct vm *vm, int ident); +static bool sysmem_mapping(struct vm *vm, struct mem_map *mm); +static void vcpu_notify_event_locked(struct vcpu *vcpu); + +/* + * Upper limit on vm_maxcpu. We could increase this to 28 bits, but this + * is a safe value for now. + */ +#define VM_MAXCPU MIN(0xffff - 1, CPU_SETSIZE) + +static void +vcpu_cleanup(struct vcpu *vcpu, bool destroy) +{ + vmmops_vcpu_cleanup(vcpu->cookie); + vcpu->cookie = NULL; + if (destroy) { + vmm_stat_free(vcpu->stats); + fpu_save_area_free(vcpu->guestfpu); + vcpu_lock_destroy(vcpu); + } +} + +static struct vcpu * +vcpu_alloc(struct vm *vm, int vcpu_id) +{ + struct vcpu *vcpu; + + KASSERT(vcpu_id >= 0 && vcpu_id < vm->maxcpus, + ("vcpu_alloc: invalid vcpu %d", vcpu_id)); + + vcpu = malloc(sizeof(*vcpu), M_VMM, M_WAITOK | M_ZERO); + vcpu_lock_init(vcpu); + vcpu->state = VCPU_IDLE; + vcpu->hostcpu = NOCPU; + vcpu->vcpuid = vcpu_id; + vcpu->vm = vm; + vcpu->guestfpu = fpu_save_area_alloc(); + vcpu->stats = vmm_stat_alloc(); + return (vcpu); +} + +static void +vcpu_init(struct vcpu *vcpu) +{ + vcpu->cookie = vmmops_vcpu_init(vcpu->vm->cookie, vcpu, vcpu->vcpuid); + MPASS(vcpu->cookie != NULL); + fpu_save_area_reset(vcpu->guestfpu); + vmm_stat_init(vcpu->stats); +} + +struct vm_exit * +vm_exitinfo(struct vcpu *vcpu) +{ + return (&vcpu->exitinfo); +} + +static int +vmm_init(void) +{ + + vm_maxcpu = mp_ncpus; + + TUNABLE_INT_FETCH("hw.vmm.maxcpu", &vm_maxcpu); + + if (vm_maxcpu > VM_MAXCPU) { + printf("vmm: vm_maxcpu clamped to %u\n", VM_MAXCPU); + vm_maxcpu = VM_MAXCPU; + } + + if (vm_maxcpu == 0) + vm_maxcpu = 1; + + return (vmmops_modinit()); +} + +static int +vmm_handler(module_t mod, int what, void *arg) +{ + int error; + + switch (what) { + case MOD_LOAD: + /* TODO: check if has_hyp here? */ + vmmdev_init(); + error = vmm_init(); + if (error == 0) + vmm_initialized = true; + break; + case MOD_UNLOAD: + /* TODO: check if has_hyp here? */ + error = vmmdev_cleanup(); + if (error == 0 && vmm_initialized) { + error = vmmops_modcleanup(); + if (error) + vmm_initialized = false; + } + break; + default: + error = 0; + break; + } + return (error); +} + +static moduledata_t vmm_kmod = { + "vmm", + vmm_handler, + NULL +}; + +/* + * vmm initialization has the following dependencies: + * + * - HYP initialization requires smp_rendezvous() and therefore must happen + * after SMP is fully functional (after SI_SUB_SMP). + */ +DECLARE_MODULE(vmm, vmm_kmod, SI_SUB_SMP + 1, SI_ORDER_ANY); +MODULE_VERSION(vmm, 1); + +static void +vm_init(struct vm *vm, bool create) +{ + int i; + + vm->cookie = vmmops_init(vm, vmspace_pmap(vm->vmspace)); + MPASS(vm->cookie != NULL); + + CPU_ZERO(&vm->active_cpus); + CPU_ZERO(&vm->debug_cpus); + + vm->suspend = 0; + CPU_ZERO(&vm->suspended_cpus); + + memset(vm->mmio_region, 0, sizeof(vm->mmio_region)); + + if (!create) { + for (i = 0; i < vm->maxcpus; i++) { + if (vm->vcpu[i] != NULL) + vcpu_init(vm->vcpu[i]); + } + } +} + +void +vm_disable_vcpu_creation(struct vm *vm) +{ + sx_xlock(&vm->vcpus_init_lock); + vm->dying = true; + sx_xunlock(&vm->vcpus_init_lock); +} + +struct vcpu * +vm_alloc_vcpu(struct vm *vm, int vcpuid) +{ + struct vcpu *vcpu; + + if (vcpuid < 0 || vcpuid >= vm_get_maxcpus(vm)) + return (NULL); + + /* Some interrupt controllers may have a CPU limit */ + if (vcpuid >= aplic_max_cpu_count(vm->cookie)) + return (NULL); + + vcpu = (struct vcpu *) + atomic_load_acq_ptr((uintptr_t *)&vm->vcpu[vcpuid]); + if (__predict_true(vcpu != NULL)) + return (vcpu); + + sx_xlock(&vm->vcpus_init_lock); + vcpu = vm->vcpu[vcpuid]; + if (vcpu == NULL && !vm->dying) { + vcpu = vcpu_alloc(vm, vcpuid); + vcpu_init(vcpu); + + /* + * Ensure vCPU is fully created before updating pointer + * to permit unlocked reads above. + */ + atomic_store_rel_ptr((uintptr_t *)&vm->vcpu[vcpuid], + (uintptr_t)vcpu); + } + sx_xunlock(&vm->vcpus_init_lock); + return (vcpu); +} + +void +vm_slock_vcpus(struct vm *vm) +{ + sx_slock(&vm->vcpus_init_lock); +} + +void +vm_unlock_vcpus(struct vm *vm) +{ + sx_unlock(&vm->vcpus_init_lock); +} + +int +vm_create(const char *name, struct vm **retvm) +{ + struct vm *vm; + struct vmspace *vmspace; + + /* + * If vmm.ko could not be successfully initialized then don't attempt + * to create the virtual machine. + */ + if (!vmm_initialized) + return (ENXIO); + + if (name == NULL || strlen(name) >= VM_MAX_NAMELEN) + return (EINVAL); + + vmspace = vmmops_vmspace_alloc(0, 1ul << 39); + if (vmspace == NULL) + return (ENOMEM); + + vm = malloc(sizeof(struct vm), M_VMM, M_WAITOK | M_ZERO); + strcpy(vm->name, name); + vm->vmspace = vmspace; + sx_init(&vm->mem_segs_lock, "vm mem_segs"); + sx_init(&vm->vcpus_init_lock, "vm vcpus"); + + vm->sockets = 1; + vm->cores = 1; /* XXX backwards compatibility */ + vm->threads = 1; /* XXX backwards compatibility */ + vm->maxcpus = vm_maxcpu; + + vm->vcpu = malloc(sizeof(*vm->vcpu) * vm->maxcpus, M_VMM, + M_WAITOK | M_ZERO); + + vm_init(vm, true); + + *retvm = vm; + return (0); +} + +void +vm_get_topology(struct vm *vm, uint16_t *sockets, uint16_t *cores, + uint16_t *threads, uint16_t *maxcpus) +{ + *sockets = vm->sockets; + *cores = vm->cores; + *threads = vm->threads; + *maxcpus = vm->maxcpus; +} + +uint16_t +vm_get_maxcpus(struct vm *vm) +{ + return (vm->maxcpus); +} + +int +vm_set_topology(struct vm *vm, uint16_t sockets, uint16_t cores, + uint16_t threads, uint16_t maxcpus) +{ + /* Ignore maxcpus. */ + if ((sockets * cores * threads) > vm->maxcpus) + return (EINVAL); + vm->sockets = sockets; + vm->cores = cores; + vm->threads = threads; + return(0); +} + +static void +vm_cleanup(struct vm *vm, bool destroy) +{ + struct mem_map *mm; + int i; + + aplic_detach_from_vm(vm->cookie); + + for (i = 0; i < vm->maxcpus; i++) { + if (vm->vcpu[i] != NULL) + vcpu_cleanup(vm->vcpu[i], destroy); + } + + vmmops_cleanup(vm->cookie); + + /* + * System memory is removed from the guest address space only when + * the VM is destroyed. This is because the mapping remains the same + * across VM reset. + * + * Device memory can be relocated by the guest (e.g. using PCI BARs) + * so those mappings are removed on a VM reset. + */ + if (!destroy) { + for (i = 0; i < VM_MAX_MEMMAPS; i++) { + mm = &vm->mem_maps[i]; + if (destroy || !sysmem_mapping(vm, mm)) + vm_free_memmap(vm, i); + } + } + + if (destroy) { + for (i = 0; i < VM_MAX_MEMSEGS; i++) + vm_free_memseg(vm, i); + + vmmops_vmspace_free(vm->vmspace); + vm->vmspace = NULL; + + for (i = 0; i < vm->maxcpus; i++) + free(vm->vcpu[i], M_VMM); + free(vm->vcpu, M_VMM); + sx_destroy(&vm->vcpus_init_lock); + sx_destroy(&vm->mem_segs_lock); + } +} + +void +vm_destroy(struct vm *vm) +{ + + vm_cleanup(vm, true); + + free(vm, M_VMM); +} + +int +vm_reinit(struct vm *vm) +{ + int error; + + /* + * A virtual machine can be reset only if all vcpus are suspended. + */ + if (CPU_CMP(&vm->suspended_cpus, &vm->active_cpus) == 0) { + vm_cleanup(vm, false); + vm_init(vm, false); + error = 0; + } else { + error = EBUSY; + } + + return (error); +} + +const char * +vm_name(struct vm *vm) +{ + return (vm->name); +} + +void +vm_slock_memsegs(struct vm *vm) +{ + sx_slock(&vm->mem_segs_lock); +} + +void +vm_xlock_memsegs(struct vm *vm) +{ + sx_xlock(&vm->mem_segs_lock); +} + +void +vm_unlock_memsegs(struct vm *vm) +{ + sx_unlock(&vm->mem_segs_lock); +} + +/* + * Return 'true' if 'gpa' is allocated in the guest address space. + * + * This function is called in the context of a running vcpu which acts as + * an implicit lock on 'vm->mem_maps[]'. + */ +bool +vm_mem_allocated(struct vcpu *vcpu, vm_paddr_t gpa) +{ + struct vm *vm = vcpu->vm; + struct mem_map *mm; + int i; + +#ifdef INVARIANTS + int hostcpu, state; + state = vcpu_get_state(vcpu, &hostcpu); + KASSERT(state == VCPU_RUNNING && hostcpu == curcpu, + ("%s: invalid vcpu state %d/%d", __func__, state, hostcpu)); +#endif + + for (i = 0; i < VM_MAX_MEMMAPS; i++) { + mm = &vm->mem_maps[i]; + if (mm->len != 0 && gpa >= mm->gpa && gpa < mm->gpa + mm->len) + return (true); /* 'gpa' is sysmem or devmem */ + } + + return (false); +} + +int +vm_alloc_memseg(struct vm *vm, int ident, size_t len, bool sysmem) +{ + struct mem_seg *seg; + vm_object_t obj; + + sx_assert(&vm->mem_segs_lock, SX_XLOCKED); + + if (ident < 0 || ident >= VM_MAX_MEMSEGS) + return (EINVAL); + + if (len == 0 || (len & PAGE_MASK)) + return (EINVAL); + + seg = &vm->mem_segs[ident]; + if (seg->object != NULL) { + if (seg->len == len && seg->sysmem == sysmem) + return (EEXIST); + else + return (EINVAL); + } + + obj = vm_object_allocate(OBJT_DEFAULT, len >> PAGE_SHIFT); + if (obj == NULL) + return (ENOMEM); + + seg->len = len; + seg->object = obj; + seg->sysmem = sysmem; + return (0); +} + +int +vm_get_memseg(struct vm *vm, int ident, size_t *len, bool *sysmem, + vm_object_t *objptr) +{ + struct mem_seg *seg; + + sx_assert(&vm->mem_segs_lock, SX_LOCKED); + + if (ident < 0 || ident >= VM_MAX_MEMSEGS) + return (EINVAL); + + seg = &vm->mem_segs[ident]; + if (len) + *len = seg->len; + if (sysmem) + *sysmem = seg->sysmem; + if (objptr) + *objptr = seg->object; + return (0); +} + +void +vm_free_memseg(struct vm *vm, int ident) +{ + struct mem_seg *seg; + + KASSERT(ident >= 0 && ident < VM_MAX_MEMSEGS, + ("%s: invalid memseg ident %d", __func__, ident)); + + seg = &vm->mem_segs[ident]; + if (seg->object != NULL) { + vm_object_deallocate(seg->object); + bzero(seg, sizeof(struct mem_seg)); + } +} + +int +vm_mmap_memseg(struct vm *vm, vm_paddr_t gpa, int segid, vm_ooffset_t first, + size_t len, int prot, int flags) +{ + struct mem_seg *seg; + struct mem_map *m, *map; + vm_ooffset_t last; + int i, error; + + dprintf("%s: gpa %lx first %lx len %lx\n", __func__, gpa, first, len); + + if (prot == 0 || (prot & ~(VM_PROT_ALL)) != 0) + return (EINVAL); + + if (flags & ~VM_MEMMAP_F_WIRED) + return (EINVAL); + + if (segid < 0 || segid >= VM_MAX_MEMSEGS) + return (EINVAL); + + seg = &vm->mem_segs[segid]; + if (seg->object == NULL) + return (EINVAL); + + last = first + len; + if (first < 0 || first >= last || last > seg->len) + return (EINVAL); + + if ((gpa | first | last) & PAGE_MASK) + return (EINVAL); + + map = NULL; + for (i = 0; i < VM_MAX_MEMMAPS; i++) { + m = &vm->mem_maps[i]; + if (m->len == 0) { + map = m; + break; + } + } + + if (map == NULL) + return (ENOSPC); + + error = vm_map_find(&vm->vmspace->vm_map, seg->object, first, &gpa, + len, 0, VMFS_NO_SPACE, prot, prot, 0); + if (error != KERN_SUCCESS) + return (EFAULT); + + vm_object_reference(seg->object); + + if (flags & VM_MEMMAP_F_WIRED) { + error = vm_map_wire(&vm->vmspace->vm_map, gpa, gpa + len, + VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES); + if (error != KERN_SUCCESS) { + vm_map_remove(&vm->vmspace->vm_map, gpa, gpa + len); + return (error == KERN_RESOURCE_SHORTAGE ? ENOMEM : + EFAULT); + } + } + + map->gpa = gpa; + map->len = len; + map->segoff = first; + map->segid = segid; + map->prot = prot; + map->flags = flags; + return (0); +} + +int +vm_munmap_memseg(struct vm *vm, vm_paddr_t gpa, size_t len) +{ + struct mem_map *m; + int i; + + dprintf("%s: gpa %lx len %lx\n", __func__, gpa, len); + + for (i = 0; i < VM_MAX_MEMMAPS; i++) { + m = &vm->mem_maps[i]; + if (m->gpa == gpa && m->len == len) { + vm_free_memmap(vm, i); + return (0); + } + } + + return (EINVAL); +} + +int +vm_mmap_getnext(struct vm *vm, vm_paddr_t *gpa, int *segid, + vm_ooffset_t *segoff, size_t *len, int *prot, int *flags) +{ + struct mem_map *mm, *mmnext; + int i; + + mmnext = NULL; + for (i = 0; i < VM_MAX_MEMMAPS; i++) { + mm = &vm->mem_maps[i]; + if (mm->len == 0 || mm->gpa < *gpa) + continue; + if (mmnext == NULL || mm->gpa < mmnext->gpa) + mmnext = mm; + } + + if (mmnext != NULL) { + *gpa = mmnext->gpa; + if (segid) + *segid = mmnext->segid; + if (segoff) + *segoff = mmnext->segoff; + if (len) + *len = mmnext->len; + if (prot) + *prot = mmnext->prot; + if (flags) + *flags = mmnext->flags; + return (0); + } else { + return (ENOENT); + } +} + +static void +vm_free_memmap(struct vm *vm, int ident) +{ + struct mem_map *mm; + int error __diagused; + + mm = &vm->mem_maps[ident]; + if (mm->len) { + error = vm_map_remove(&vm->vmspace->vm_map, mm->gpa, + mm->gpa + mm->len); + KASSERT(error == KERN_SUCCESS, ("%s: vm_map_remove error %d", + __func__, error)); + bzero(mm, sizeof(struct mem_map)); + } +} + +static __inline bool +sysmem_mapping(struct vm *vm, struct mem_map *mm) +{ + + if (mm->len != 0 && vm->mem_segs[mm->segid].sysmem) + return (true); + else + return (false); +} + +vm_paddr_t +vmm_sysmem_maxaddr(struct vm *vm) +{ + struct mem_map *mm; + vm_paddr_t maxaddr; + int i; + + maxaddr = 0; + for (i = 0; i < VM_MAX_MEMMAPS; i++) { + mm = &vm->mem_maps[i]; + if (sysmem_mapping(vm, mm)) { + if (maxaddr < mm->gpa + mm->len) + maxaddr = mm->gpa + mm->len; + } + } + return (maxaddr); +} + +int +vm_gla2gpa_nofault(struct vcpu *vcpu, struct vm_guest_paging *paging, + uint64_t gla, int prot, uint64_t *gpa, int *is_fault) +{ + int error; + + error = vmmops_gla2gpa(vcpu->cookie, paging, gla, prot, gpa, is_fault); + + return (error); +} + +void +vm_register_inst_handler(struct vm *vm, uint64_t start, uint64_t size, + mem_region_read_t mmio_read, mem_region_write_t mmio_write) +{ + int i; + + for (i = 0; i < nitems(vm->mmio_region); i++) { + if (vm->mmio_region[i].start == 0 && + vm->mmio_region[i].end == 0) { + vm->mmio_region[i].start = start; + vm->mmio_region[i].end = start + size; + vm->mmio_region[i].read = mmio_read; + vm->mmio_region[i].write = mmio_write; + return; + } + } + + panic("%s: No free MMIO region", __func__); +} + +void +vm_deregister_inst_handler(struct vm *vm, uint64_t start, uint64_t size) +{ + int i; + + for (i = 0; i < nitems(vm->mmio_region); i++) { + if (vm->mmio_region[i].start == start && + vm->mmio_region[i].end == start + size) { + memset(&vm->mmio_region[i], 0, + sizeof(vm->mmio_region[i])); + return; + } + } + + panic("%s: Invalid MMIO region: %lx - %lx", __func__, start, + start + size); +} + +static int +vm_handle_inst_emul(struct vcpu *vcpu, bool *retu) +{ + struct vm *vm; + struct vm_exit *vme; + struct vie *vie; + struct hyp *hyp; + uint64_t fault_ipa; + struct vm_guest_paging *paging; + struct vmm_mmio_region *vmr; + int error, i; + + vm = vcpu->vm; + hyp = vm->cookie; + if (!hyp->aplic_attached) + goto out_user; + + vme = &vcpu->exitinfo; + vie = &vme->u.inst_emul.vie; + paging = &vme->u.inst_emul.paging; + + fault_ipa = vme->u.inst_emul.gpa; + + vmr = NULL; + for (i = 0; i < nitems(vm->mmio_region); i++) { + if (vm->mmio_region[i].start <= fault_ipa && + vm->mmio_region[i].end > fault_ipa) { + vmr = &vm->mmio_region[i]; + break; + } + } + if (vmr == NULL) + goto out_user; + + error = vmm_emulate_instruction(vcpu, fault_ipa, vie, paging, + vmr->read, vmr->write, retu); + return (error); + +out_user: + *retu = true; + return (0); +} + +int +vm_suspend(struct vm *vm, enum vm_suspend_how how) +{ + int i; + + if (how <= VM_SUSPEND_NONE || how >= VM_SUSPEND_LAST) + return (EINVAL); + + if (atomic_cmpset_int(&vm->suspend, 0, how) == 0) { + VM_CTR2(vm, "virtual machine already suspended %d/%d", + vm->suspend, how); + return (EALREADY); + } + + VM_CTR1(vm, "virtual machine successfully suspended %d", how); + + /* + * Notify all active vcpus that they are now suspended. + */ + for (i = 0; i < vm->maxcpus; i++) { + if (CPU_ISSET(i, &vm->active_cpus)) + vcpu_notify_event(vm_vcpu(vm, i)); + } + + return (0); +} + +void +vm_exit_suspended(struct vcpu *vcpu, uint64_t pc) +{ + struct vm *vm = vcpu->vm; + struct vm_exit *vmexit; + + KASSERT(vm->suspend > VM_SUSPEND_NONE && vm->suspend < VM_SUSPEND_LAST, + ("vm_exit_suspended: invalid suspend type %d", vm->suspend)); + + vmexit = vm_exitinfo(vcpu); + vmexit->pc = pc; + vmexit->inst_length = 4; + vmexit->exitcode = VM_EXITCODE_SUSPENDED; + vmexit->u.suspended.how = vm->suspend; +} + +void +vm_exit_debug(struct vcpu *vcpu, uint64_t pc) +{ + struct vm_exit *vmexit; + + vmexit = vm_exitinfo(vcpu); + vmexit->pc = pc; + vmexit->inst_length = 4; + vmexit->exitcode = VM_EXITCODE_DEBUG; +} + +int +vm_activate_cpu(struct vcpu *vcpu) +{ + struct vm *vm = vcpu->vm; + + if (CPU_ISSET(vcpu->vcpuid, &vm->active_cpus)) + return (EBUSY); + + CPU_SET_ATOMIC(vcpu->vcpuid, &vm->active_cpus); + return (0); + +} + +int +vm_suspend_cpu(struct vm *vm, struct vcpu *vcpu) +{ + if (vcpu == NULL) { + vm->debug_cpus = vm->active_cpus; + for (int i = 0; i < vm->maxcpus; i++) { + if (CPU_ISSET(i, &vm->active_cpus)) + vcpu_notify_event(vm_vcpu(vm, i)); + } + } else { + if (!CPU_ISSET(vcpu->vcpuid, &vm->active_cpus)) + return (EINVAL); + + CPU_SET_ATOMIC(vcpu->vcpuid, &vm->debug_cpus); + vcpu_notify_event(vcpu); + } + return (0); +} + +int +vm_resume_cpu(struct vm *vm, struct vcpu *vcpu) +{ + + if (vcpu == NULL) { + CPU_ZERO(&vm->debug_cpus); + } else { + if (!CPU_ISSET(vcpu->vcpuid, &vm->debug_cpus)) + return (EINVAL); + + CPU_CLR_ATOMIC(vcpu->vcpuid, &vm->debug_cpus); + } + return (0); +} + +int +vcpu_debugged(struct vcpu *vcpu) +{ + + return (CPU_ISSET(vcpu->vcpuid, &vcpu->vm->debug_cpus)); +} + +cpuset_t +vm_active_cpus(struct vm *vm) +{ + + return (vm->active_cpus); +} + +cpuset_t +vm_debug_cpus(struct vm *vm) +{ + + return (vm->debug_cpus); +} + +cpuset_t +vm_suspended_cpus(struct vm *vm) +{ + + return (vm->suspended_cpus); +} + + +void * +vcpu_stats(struct vcpu *vcpu) +{ + + return (vcpu->stats); +} + +/* + * This function is called to ensure that a vcpu "sees" a pending event + * as soon as possible: + * - If the vcpu thread is sleeping then it is woken up. + * - If the vcpu is running on a different host_cpu then an IPI will be directed + * to the host_cpu to cause the vcpu to trap into the hypervisor. + */ +static void +vcpu_notify_event_locked(struct vcpu *vcpu) +{ + int hostcpu; + + hostcpu = vcpu->hostcpu; + if (vcpu->state == VCPU_RUNNING) { + KASSERT(hostcpu != NOCPU, ("vcpu running on invalid hostcpu")); + if (hostcpu != curcpu) { + ipi_cpu(hostcpu, vmm_ipinum); + } else { + /* + * If the 'vcpu' is running on 'curcpu' then it must + * be sending a notification to itself (e.g. SELF_IPI). + * The pending event will be picked up when the vcpu + * transitions back to guest context. + */ + } + } else { + KASSERT(hostcpu == NOCPU, ("vcpu state %d not consistent " + "with hostcpu %d", vcpu->state, hostcpu)); + if (vcpu->state == VCPU_SLEEPING) + wakeup_one(vcpu); + } +} + +void +vcpu_notify_event(struct vcpu *vcpu) +{ + vcpu_lock(vcpu); + vcpu_notify_event_locked(vcpu); + vcpu_unlock(vcpu); +} + +static void +restore_guest_fpustate(struct vcpu *vcpu) +{ + + /* Flush host state to the pcb. */ + fpe_state_save(curthread); + + /* Ensure the VFP state will be re-loaded when exiting the guest. */ + PCPU_SET(fpcurthread, NULL); + + /* restore guest FPU state */ + fpe_enable(); + fpe_restore(vcpu->guestfpu); + + /* + * The FPU is now "dirty" with the guest's state so turn on emulation + * to trap any access to the FPU by the host. + */ + fpe_disable(); +} + +static void +save_guest_fpustate(struct vcpu *vcpu) +{ + + /* Save guest FPE state. */ + fpe_enable(); + fpe_store(vcpu->guestfpu); + fpe_disable(); + + KASSERT(PCPU_GET(fpcurthread) == NULL, + ("%s: fpcurthread set with guest registers", __func__)); +} + +static int +vcpu_set_state_locked(struct vcpu *vcpu, enum vcpu_state newstate, + bool from_idle) +{ + int error; + + vcpu_assert_locked(vcpu); + + /* + * State transitions from the vmmdev_ioctl() must always begin from + * the VCPU_IDLE state. This guarantees that there is only a single + * ioctl() operating on a vcpu at any point. + */ + if (from_idle) { + while (vcpu->state != VCPU_IDLE) { + vcpu_notify_event_locked(vcpu); + msleep_spin(&vcpu->state, &vcpu->mtx, "vmstat", + hz / 1000); + } + } else { + KASSERT(vcpu->state != VCPU_IDLE, ("invalid transition from " + "vcpu idle state")); + } + + if (vcpu->state == VCPU_RUNNING) { + KASSERT(vcpu->hostcpu == curcpu, ("curcpu %d and hostcpu %d " + "mismatch for running vcpu", curcpu, vcpu->hostcpu)); + } else { + KASSERT(vcpu->hostcpu == NOCPU, ("Invalid hostcpu %d for a " + "vcpu that is not running", vcpu->hostcpu)); + } + + /* + * The following state transitions are allowed: + * IDLE -> FROZEN -> IDLE + * FROZEN -> RUNNING -> FROZEN + * FROZEN -> SLEEPING -> FROZEN + */ + switch (vcpu->state) { + case VCPU_IDLE: + case VCPU_RUNNING: + case VCPU_SLEEPING: + error = (newstate != VCPU_FROZEN); + break; + case VCPU_FROZEN: + error = (newstate == VCPU_FROZEN); + break; + default: + error = 1; + break; + } + + if (error) + return (EBUSY); + + vcpu->state = newstate; + if (newstate == VCPU_RUNNING) + vcpu->hostcpu = curcpu; + else + vcpu->hostcpu = NOCPU; + + if (newstate == VCPU_IDLE) + wakeup(&vcpu->state); + + return (0); +} + +static void +vcpu_require_state(struct vcpu *vcpu, enum vcpu_state newstate) +{ + int error; + + if ((error = vcpu_set_state(vcpu, newstate, false)) != 0) + panic("Error %d setting state to %d\n", error, newstate); +} + +static void +vcpu_require_state_locked(struct vcpu *vcpu, enum vcpu_state newstate) +{ + int error; + + if ((error = vcpu_set_state_locked(vcpu, newstate, false)) != 0) + panic("Error %d setting state to %d", error, newstate); +} + +int +vm_get_capability(struct vcpu *vcpu, int type, int *retval) +{ + + if (type < 0 || type >= VM_CAP_MAX) + return (EINVAL); + + return (vmmops_getcap(vcpu->cookie, type, retval)); +} + +int +vm_set_capability(struct vcpu *vcpu, int type, int val) +{ + + if (type < 0 || type >= VM_CAP_MAX) + return (EINVAL); + + return (vmmops_setcap(vcpu->cookie, type, val)); +} + +struct vm * +vcpu_vm(struct vcpu *vcpu) +{ + + return (vcpu->vm); +} + +int +vcpu_vcpuid(struct vcpu *vcpu) +{ + + return (vcpu->vcpuid); +} + +void * +vcpu_get_cookie(struct vcpu *vcpu) +{ + + return (vcpu->cookie); +} + +struct vcpu * +vm_vcpu(struct vm *vm, int vcpuid) +{ + + return (vm->vcpu[vcpuid]); +} + +int +vcpu_set_state(struct vcpu *vcpu, enum vcpu_state newstate, bool from_idle) +{ + int error; + + vcpu_lock(vcpu); + error = vcpu_set_state_locked(vcpu, newstate, from_idle); + vcpu_unlock(vcpu); + + return (error); +} + +enum vcpu_state +vcpu_get_state(struct vcpu *vcpu, int *hostcpu) +{ + enum vcpu_state state; + + vcpu_lock(vcpu); + state = vcpu->state; + if (hostcpu != NULL) + *hostcpu = vcpu->hostcpu; + vcpu_unlock(vcpu); + + return (state); +} + +static void * +_vm_gpa_hold(struct vm *vm, vm_paddr_t gpa, size_t len, int reqprot, + void **cookie) +{ + int i, count, pageoff; + struct mem_map *mm; + vm_page_t m; + + pageoff = gpa & PAGE_MASK; + if (len > PAGE_SIZE - pageoff) + panic("vm_gpa_hold: invalid gpa/len: 0x%016lx/%lu", gpa, len); + + count = 0; + for (i = 0; i < VM_MAX_MEMMAPS; i++) { + mm = &vm->mem_maps[i]; + if (sysmem_mapping(vm, mm) && gpa >= mm->gpa && + gpa < mm->gpa + mm->len) { + count = vm_fault_quick_hold_pages(&vm->vmspace->vm_map, + trunc_page(gpa), PAGE_SIZE, reqprot, &m, 1); + break; + } + } + + if (count == 1) { + *cookie = m; + return ((void *)(PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)) + pageoff)); + } else { + *cookie = NULL; + return (NULL); + } +} + +void * +vm_gpa_hold(struct vcpu *vcpu, vm_paddr_t gpa, size_t len, int reqprot, + void **cookie) +{ +#ifdef INVARIANTS + /* + * The current vcpu should be frozen to ensure 'vm_memmap[]' + * stability. + */ + int state = vcpu_get_state(vcpu, NULL); + KASSERT(state == VCPU_FROZEN, ("%s: invalid vcpu state %d", + __func__, state)); +#endif + return (_vm_gpa_hold(vcpu->vm, gpa, len, reqprot, cookie)); +} + +void * +vm_gpa_hold_global(struct vm *vm, vm_paddr_t gpa, size_t len, int reqprot, + void **cookie) +{ + sx_assert(&vm->mem_segs_lock, SX_LOCKED); + return (_vm_gpa_hold(vm, gpa, len, reqprot, cookie)); +} + +void +vm_gpa_release(void *cookie) +{ + vm_page_t m = cookie; + + vm_page_unwire(m, PQ_ACTIVE); +} + +int +vm_get_register(struct vcpu *vcpu, int reg, uint64_t *retval) +{ + + if (reg >= VM_REG_LAST) + return (EINVAL); + + return (vmmops_getreg(vcpu->cookie, reg, retval)); +} + +int +vm_set_register(struct vcpu *vcpu, int reg, uint64_t val) +{ + int error; + + if (reg >= VM_REG_LAST) + return (EINVAL); + error = vmmops_setreg(vcpu->cookie, reg, val); + if (error || reg != VM_REG_GUEST_SEPC) + return (error); + + vcpu->nextpc = val; + + return (0); +} + +void * +vm_get_cookie(struct vm *vm) +{ + + return (vm->cookie); +} + +int +vm_inject_exception(struct vcpu *vcpu, uint64_t scause) +{ + + return (vmmops_exception(vcpu->cookie, scause)); +} + +int +vm_attach_aplic(struct vm *vm, struct vm_aplic_descr *descr) +{ + + return (aplic_attach_to_vm(vm->cookie, descr)); +} + +int +vm_assert_irq(struct vm *vm, uint32_t irq) +{ + + return (aplic_inject_irq(vm->cookie, -1, irq, true)); +} + +int +vm_deassert_irq(struct vm *vm, uint32_t irq) +{ + + return (aplic_inject_irq(vm->cookie, -1, irq, false)); +} + +int +vm_raise_msi(struct vm *vm, uint64_t msg, uint64_t addr, int bus, int slot, + int func) +{ + + return (aplic_inject_msi(vm->cookie, msg, addr)); +} + +static int +vm_handle_wfi(struct vcpu *vcpu, struct vm_exit *vme, bool *retu) +{ + + vcpu_lock(vcpu); + + while (1) { + if (aplic_check_pending(vcpu->cookie)) + break; + + if (riscv_check_ipi(vcpu->cookie, false)) + break; + + if (vcpu_should_yield(vcpu)) + break; + + vcpu_require_state_locked(vcpu, VCPU_SLEEPING); + /* + * XXX msleep_spin() cannot be interrupted by signals so + * wake up periodically to check pending signals. + */ + msleep_spin(vcpu, &vcpu->mtx, "vmidle", hz / 1000); + vcpu_require_state_locked(vcpu, VCPU_FROZEN); + } + vcpu_unlock(vcpu); + + *retu = false; + + return (0); +} + +static int +vm_handle_paging(struct vcpu *vcpu, bool *retu) +{ + struct vm *vm; + struct vm_exit *vme; + struct vm_map *map; + uint64_t addr; + pmap_t pmap; + int ftype, rv; + + vm = vcpu->vm; + vme = &vcpu->exitinfo; + + pmap = vmspace_pmap(vm->vmspace); + addr = (vme->htval << 2) & ~(PAGE_SIZE - 1); + + dprintf("%s: %lx\n", __func__, addr); + + switch (vme->scause) { + case SCAUSE_STORE_GUEST_PAGE_FAULT: + ftype = VM_PROT_WRITE; + break; + case SCAUSE_FETCH_GUEST_PAGE_FAULT: + ftype = VM_PROT_EXECUTE; + break; + case SCAUSE_LOAD_GUEST_PAGE_FAULT: + ftype = VM_PROT_READ; + break; + default: + panic("unknown page trap: %lu", vme->scause); + } + + /* The page exists, but the page table needs to be updated. */ + if (pmap_fault(pmap, addr, ftype)) + return (0); + + map = &vm->vmspace->vm_map; + rv = vm_fault(map, addr, ftype, VM_FAULT_NORMAL, NULL); + if (rv != KERN_SUCCESS) { + printf("%s: vm_fault failed, addr %lx, ftype %d, err %d\n", + __func__, addr, ftype, rv); + return (EFAULT); + } + + return (0); +} + +static int +vm_handle_suspend(struct vcpu *vcpu, bool *retu) +{ + struct vm *vm = vcpu->vm; + int error, i; + struct thread *td; + + error = 0; + td = curthread; + + CPU_SET_ATOMIC(vcpu->vcpuid, &vm->suspended_cpus); + + /* + * Wait until all 'active_cpus' have suspended themselves. + * + * Since a VM may be suspended at any time including when one or + * more vcpus are doing a rendezvous we need to call the rendezvous + * handler while we are waiting to prevent a deadlock. + */ + vcpu_lock(vcpu); + while (error == 0) { + if (CPU_CMP(&vm->suspended_cpus, &vm->active_cpus) == 0) + break; + + vcpu_require_state_locked(vcpu, VCPU_SLEEPING); + msleep_spin(vcpu, &vcpu->mtx, "vmsusp", hz); + vcpu_require_state_locked(vcpu, VCPU_FROZEN); + if (td_ast_pending(td, TDA_SUSPEND)) { + vcpu_unlock(vcpu); + error = thread_check_susp(td, false); + vcpu_lock(vcpu); + } + } + vcpu_unlock(vcpu); + + /* + * Wakeup the other sleeping vcpus and return to userspace. + */ + for (i = 0; i < vm->maxcpus; i++) { + if (CPU_ISSET(i, &vm->suspended_cpus)) { + vcpu_notify_event(vm_vcpu(vm, i)); + } + } + + *retu = true; + return (error); +} + +int +vm_run(struct vcpu *vcpu) +{ + struct vm_eventinfo evinfo; + struct vm_exit *vme; + struct vm *vm; + pmap_t pmap; + int error; + int vcpuid; + bool retu; + + vm = vcpu->vm; + + dprintf("%s\n", __func__); + + vcpuid = vcpu->vcpuid; + + if (!CPU_ISSET(vcpuid, &vm->active_cpus)) + return (EINVAL); + + if (CPU_ISSET(vcpuid, &vm->suspended_cpus)) + return (EINVAL); + + pmap = vmspace_pmap(vm->vmspace); + vme = &vcpu->exitinfo; + evinfo.rptr = NULL; + evinfo.sptr = &vm->suspend; + evinfo.iptr = NULL; +restart: + critical_enter(); + + restore_guest_fpustate(vcpu); + + vcpu_require_state(vcpu, VCPU_RUNNING); + error = vmmops_run(vcpu->cookie, vcpu->nextpc, pmap, &evinfo); + vcpu_require_state(vcpu, VCPU_FROZEN); + + save_guest_fpustate(vcpu); + + critical_exit(); + + if (error == 0) { + retu = false; + switch (vme->exitcode) { + case VM_EXITCODE_INST_EMUL: + vcpu->nextpc = vme->pc + vme->inst_length; + error = vm_handle_inst_emul(vcpu, &retu); + break; + case VM_EXITCODE_WFI: + vcpu->nextpc = vme->pc + vme->inst_length; + error = vm_handle_wfi(vcpu, vme, &retu); + break; + case VM_EXITCODE_ECALL: + /* Handle in userland. */ + vcpu->nextpc = vme->pc + vme->inst_length; + retu = true; + break; + case VM_EXITCODE_PAGING: + vcpu->nextpc = vme->pc; + error = vm_handle_paging(vcpu, &retu); + break; + case VM_EXITCODE_BOGUS: + vcpu->nextpc = vme->pc; + retu = false; + error = 0; + break; + case VM_EXITCODE_SUSPENDED: + vcpu->nextpc = vme->pc; + error = vm_handle_suspend(vcpu, &retu); + break; + default: + /* Handle in userland. */ + vcpu->nextpc = vme->pc; + retu = true; + break; + } + } + + if (error == 0 && retu == false) + goto restart; + + return (error); +} Index: sys/riscv/vmm/vmm_aplic.h =================================================================== --- /dev/null +++ sys/riscv/vmm/vmm_aplic.h @@ -0,0 +1,54 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2024 Ruslan Bukin + * + * This software was developed by the University of Cambridge Computer + * Laboratory (Department of Computer Science and Technology) under Innovate + * UK project 105694, "Digital Security by Design (DSbD) Technology Platform + * Prototype". + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef _VMM_APLIC_H_ +#define _VMM_APLIC_H_ + +struct hyp; +struct hypctx; +struct vm_aplic_descr; + +int aplic_attach_to_vm(struct hyp *hyp, struct vm_aplic_descr *descr); +void aplic_detach_from_vm(struct hyp *hyp); +int aplic_inject_irq(struct hyp *hyp, int vcpuid, uint32_t irqid, bool level); +int aplic_inject_msi(struct hyp *hyp, uint64_t msg, uint64_t addr); +void aplic_vminit(struct hyp *hyp); +void aplic_vmcleanup(struct hyp *hyp); +int aplic_check_pending(struct hypctx *hypctx); + +void aplic_cpuinit(struct hypctx *hypctx); +void aplic_cpucleanup(struct hypctx *hypctx); +void aplic_flush_hwstate(struct hypctx *hypctx); +void aplic_sync_hwstate(struct hypctx *hypctx); +int aplic_max_cpu_count(struct hyp *hyp); + +#endif /* !_VMM_APLIC_H_ */ Index: sys/riscv/vmm/vmm_aplic.c =================================================================== --- /dev/null +++ sys/riscv/vmm/vmm_aplic.c @@ -0,0 +1,523 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2024 Ruslan Bukin + * + * This software was developed by the University of Cambridge Computer + * Laboratory (Department of Computer Science and Technology) under Innovate + * UK project 105694, "Digital Security by Design (DSbD) Technology Platform + * Prototype". + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include + +MALLOC_DEFINE(M_APLIC, "RISC-V VMM APLIC", "RISC-V AIA APLIC"); + +#define APLIC_DOMAINCFG 0x0000 +#define DOMAINCFG_IE (1 << 8) /* Interrupt Enable. */ +#define DOMAINCFG_DM (1 << 2) /* Direct Mode. */ +#define DOMAINCFG_BE (1 << 0) /* Big-Endian. */ +#define APLIC_SOURCECFG(x) (0x0004 + ((x) - 1) * 4) +#define SOURCECFG_D (1 << 10) /* D - Delegate. */ +/* If D == 0. */ +#define SOURCECFG_SM_S (0) +#define SOURCECFG_SM_M (0x7 << SOURCECFG_SM_S) +#define SOURCECFG_SM_INACTIVE (0) /* Not delegated. */ +#define SOURCECFG_SM_DETACHED (1) +#define SOURCECFG_SM_RESERVED (2) +#define SOURCECFG_SM_RESERVED1 (3) +#define SOURCECFG_SM_EDGE1 (4) /* Rising edge. */ +#define SOURCECFG_SM_EDGE0 (5) /* Falling edge. */ +#define SOURCECFG_SM_LEVEL1 (6) /* High. */ +#define SOURCECFG_SM_LEVEL0 (7) /* Low. */ +/* If D == 1. */ +#define SOURCECFG_CHILD_INDEX_S (0) +#define SOURCECFG_CHILD_INDEX_M (0x3ff << SOURCECFG_CHILD_INDEX_S) +#define APLIC_SETIPNUM 0x1cdc +#define APLIC_CLRIPNUM 0x1ddc +#define APLIC_SETIENUM 0x1edc +#define APLIC_CLRIENUM 0x1fdc +#define APLIC_GENMSI 0x3000 +#define APLIC_TARGET(x) (0x3004 + ((x) - 1) * 4) +#define TARGET_HART_S 18 +#define TARGET_HART_M 0x3fff +#define APLIC_IDC(x) (0x4000 + (x) * 32) +#define IDC_IDELIVERY(x) (APLIC_IDC(x) + 0x0) +#define IDC_IFORCE(x) (APLIC_IDC(x) + 0x4) +#define IDC_ITHRESHOLD(x) (APLIC_IDC(x) + 0x8) +#define IDC_TOPI(x) (APLIC_IDC(x) + 0x18) +#define IDC_CLAIMI(x) (APLIC_IDC(x) + 0x1C) +#define CLAIMI_IRQ_S (16) +#define CLAIMI_IRQ_M (0x3ff << CLAIMI_IRQ_S) +#define CLAIMI_PRIO_S (0) +#define CLAIMI_PRIO_M (0xff << CLAIMI_PRIO_S) + +#define APLIC_NIRQS 63 + +struct aplic_irq { + uint32_t sourcecfg; + uint32_t state; +#define APLIC_IRQ_STATE_PENDING (1 << 0) +#define APLIC_IRQ_STATE_ENABLED (1 << 1) + uint32_t target; + uint32_t target_hart; +}; + +struct aplic { + uint32_t mem_start; + uint32_t mem_end; + struct mtx mtx; + struct aplic_irq *irqs; + int nirqs; + uint32_t domaincfg; +}; + +static int +aplic_handle_sourcecfg(struct aplic *aplic, int i, bool write, uint64_t *val) +{ + struct aplic_irq *irq; + + if (i <= 0 || i > aplic->nirqs) + return (ENOENT); + + mtx_lock_spin(&aplic->mtx); + irq = &aplic->irqs[i]; + if (write) + irq->sourcecfg = *val; + else + *val = irq->sourcecfg; + mtx_unlock_spin(&aplic->mtx); + + return (0); +} + +static int +aplic_set_enabled(struct aplic *aplic, bool write, uint64_t *val, bool enabled) +{ + struct aplic_irq *irq; + int i; + + if (!write) { + *val = 0; + return (0); + } + + i = *val; + if (i <= 0 || i > aplic->nirqs) + return (-1); + + irq = &aplic->irqs[i]; + + mtx_lock_spin(&aplic->mtx); + if (enabled) + irq->state |= APLIC_IRQ_STATE_ENABLED; + else + irq->state &= ~APLIC_IRQ_STATE_ENABLED; + mtx_unlock_spin(&aplic->mtx); + + return (0); +} + +static int +aplic_handle_target(struct aplic *aplic, int i, bool write, uint64_t *val) +{ + struct aplic_irq *irq; + + mtx_lock_spin(&aplic->mtx); + irq = &aplic->irqs[i]; + if (write) { + irq->target = *val; + irq->target_hart = (irq->target >> TARGET_HART_S); + } else + *val = irq->target; + mtx_unlock_spin(&aplic->mtx); + + return (0); +} + +static int +aplic_handle_idc_claimi(struct hyp *hyp, struct aplic *aplic, int cpu_id, + bool write, uint64_t *val) +{ + struct aplic_irq *irq; + int i; + + /* Writes to claimi are ignored. */ + if (write) + return (-1); + + mtx_lock_spin(&aplic->mtx); + for (i = 0; i < aplic->nirqs; i++) { + irq = &aplic->irqs[i]; + if (irq->target_hart != cpu_id) + continue; + if (irq->state & APLIC_IRQ_STATE_PENDING) { + *val = (i << CLAIMI_IRQ_S) | (0 << CLAIMI_PRIO_S); + irq->state &= ~APLIC_IRQ_STATE_PENDING; + mtx_unlock_spin(&aplic->mtx); + return (0); + } + } + + dprintf("%s: claimi without pending, cpu_id %d", __func__, cpu_id); + + return (0); +} + +static int +aplic_handle_idc(struct hyp *hyp, struct aplic *aplic, int cpu, int reg, + bool write, uint64_t *val) +{ + int error; + + switch (reg + APLIC_IDC(0)) { + case IDC_IDELIVERY(0): + case IDC_IFORCE(0): + case IDC_ITHRESHOLD(0): + case IDC_TOPI(0): + error = 0; + break; + case IDC_CLAIMI(0): + error = aplic_handle_idc_claimi(hyp, aplic, cpu, write, val); + break; + default: + error = ENOENT; + } + + return (error); +} + +static int +aplic_mmio_access(struct hyp *hyp, struct aplic *aplic, uint64_t reg, + bool write, uint64_t *val) +{ + int error; + int cpu; + int r; + int i; + + if ((reg >= APLIC_SOURCECFG(1)) && + (reg <= APLIC_SOURCECFG(aplic->nirqs))) { + i = ((reg - APLIC_SOURCECFG(1)) >> 2) + 1; + error = aplic_handle_sourcecfg(aplic, i, write, val); + return (error); + } + + if ((reg >= APLIC_TARGET(1)) && (reg <= APLIC_TARGET(aplic->nirqs))) { + i = ((reg - APLIC_TARGET(1)) >> 2) + 1; + error = aplic_handle_target(aplic, i, write, val); + return (error); + } + + if ((reg >= APLIC_IDC(0)) && (reg < APLIC_IDC(mp_ncpus))) { + cpu = (reg - APLIC_IDC(0)) >> 5; + r = (reg - APLIC_IDC(0)) % 32; + error = aplic_handle_idc(hyp, aplic, cpu, r, write, val); + return (error); + } + + switch (reg) { + case APLIC_DOMAINCFG: + aplic->domaincfg = *val & DOMAINCFG_IE; + error = 0; + break; + case APLIC_SETIENUM: + error = aplic_set_enabled(aplic, write, val, true); + break; + case APLIC_CLRIENUM: + error = aplic_set_enabled(aplic, write, val, false); + break; + default: + dprintf("%s: unknown reg %lx", __func__, reg); + error = ENOENT; + break; + }; + + return (error); +} + +static int +mem_read(struct vcpu *vcpu, uint64_t fault_ipa, uint64_t *rval, int size, + void *arg) +{ + struct hypctx *hypctx; + struct hyp *hyp; + struct aplic *aplic; + uint64_t reg; + uint64_t val; + int error; + + hypctx = vcpu_get_cookie(vcpu); + hyp = hypctx->hyp; + aplic = hyp->aplic; + + dprintf("%s: fault_ipa %lx size %d\n", __func__, fault_ipa, size); + + if (fault_ipa < aplic->mem_start || fault_ipa + size > aplic->mem_end) + return (EINVAL); + + reg = fault_ipa - aplic->mem_start; + + error = aplic_mmio_access(hyp, aplic, reg, false, &val); + if (error == 0) + *rval = val; + + return (error); +} + +static int +mem_write(struct vcpu *vcpu, uint64_t fault_ipa, uint64_t wval, int size, + void *arg) +{ + struct hypctx *hypctx; + struct hyp *hyp; + struct aplic *aplic; + uint64_t reg; + uint64_t val; + int error; + + hypctx = vcpu_get_cookie(vcpu); + hyp = hypctx->hyp; + aplic = hyp->aplic; + + dprintf("%s: fault_ipa %lx wval %lx size %d\n", __func__, fault_ipa, + wval, size); + + if (fault_ipa < aplic->mem_start || fault_ipa + size > aplic->mem_end) + return (EINVAL); + + reg = fault_ipa - aplic->mem_start; + + val = wval; + + error = aplic_mmio_access(hyp, aplic, reg, true, &val); + + return (error); +} + +void +aplic_vminit(struct hyp *hyp) +{ + struct aplic *aplic; + + hyp->aplic = malloc(sizeof(*hyp->aplic), M_APLIC, + M_WAITOK | M_ZERO); + aplic = hyp->aplic; + + mtx_init(&aplic->mtx, "APLIC lock", NULL, MTX_SPIN); +} + +void +aplic_vmcleanup(struct hyp *hyp) +{ + struct aplic *aplic; + + aplic = hyp->aplic; + + mtx_destroy(&aplic->mtx); + + free(hyp->aplic, M_APLIC); +} + +int +aplic_attach_to_vm(struct hyp *hyp, struct vm_aplic_descr *descr) +{ + struct aplic *aplic; + struct vm *vm; + + vm = hyp->vm; + + dprintf("%s\n", __func__); + + vm_register_inst_handler(vm, descr->mem_start, descr->mem_size, + mem_read, mem_write); + + aplic = hyp->aplic; + aplic->nirqs = APLIC_NIRQS; + aplic->mem_start = descr->mem_start; + aplic->mem_end = descr->mem_start + descr->mem_size; + aplic->irqs = malloc(sizeof(struct aplic_irq) * aplic->nirqs, M_APLIC, + M_WAITOK | M_ZERO); + + hyp->aplic_attached = true; + + return (0); +} + +void +aplic_detach_from_vm(struct hyp *hyp) +{ + struct aplic *aplic; + + aplic = hyp->aplic; + + dprintf("%s\n", __func__); + + if (hyp->aplic_attached) { + hyp->aplic_attached = false; + free(aplic->irqs, M_APLIC); + } +} + +int +aplic_check_pending(struct hypctx *hypctx) +{ + struct aplic_irq *irq; + struct aplic *aplic; + struct hyp *hyp; + int i; + + hyp = hypctx->hyp; + aplic = hyp->aplic; + + mtx_lock_spin(&aplic->mtx); + if ((aplic->domaincfg & DOMAINCFG_IE) == 0) { + mtx_unlock_spin(&aplic->mtx); + return (0); + } + + for (i = 0; i < aplic->nirqs; i++) { + irq = &aplic->irqs[i]; + if (irq->target_hart != hypctx->cpu_id) + continue; + if ((irq->state & APLIC_IRQ_STATE_ENABLED) && + (irq->state & APLIC_IRQ_STATE_PENDING)) { + mtx_unlock_spin(&aplic->mtx); + /* Found. */ + return (1); + } + } + mtx_unlock_spin(&aplic->mtx); + + return (0); +} + +int +aplic_inject_irq(struct hyp *hyp, int vcpuid, uint32_t irqid, bool level) +{ + struct aplic_irq *irq; + struct aplic *aplic; + bool notify; + int error; + + aplic = hyp->aplic; + + error = 0; + + mtx_lock_spin(&aplic->mtx); + if ((aplic->domaincfg & DOMAINCFG_IE) == 0) { + mtx_unlock_spin(&aplic->mtx); + return (error); + } + + irq = &aplic->irqs[irqid]; + if (irq->sourcecfg & SOURCECFG_D) { + mtx_unlock_spin(&aplic->mtx); + return (error); + } + + notify = false; + switch (irq->sourcecfg & SOURCECFG_SM_M) { + case SOURCECFG_SM_EDGE1: + if (level) { + irq->state |= APLIC_IRQ_STATE_PENDING; + if (irq->state & APLIC_IRQ_STATE_ENABLED) + notify = true; + } else + irq->state &= ~APLIC_IRQ_STATE_PENDING; + break; + case SOURCECFG_SM_DETACHED: + break; + default: + /* TODO. */ + dprintf("sourcecfg %d\n", irq->sourcecfg & SOURCECFG_SM_M); + error = ENXIO; + break; + } + mtx_unlock_spin(&aplic->mtx); + + if (notify) + vcpu_notify_event(vm_vcpu(hyp->vm, irq->target_hart)); + + return (error); +} + +int +aplic_inject_msi(struct hyp *hyp, uint64_t msg, uint64_t addr) +{ + + /* TODO. */ + + return (ENXIO); +} + +void +aplic_cpuinit(struct hypctx *hypctx) +{ + +} + +void +aplic_cpucleanup(struct hypctx *hypctx) +{ + +} + +void +aplic_flush_hwstate(struct hypctx *hypctx) +{ + +} + +void +aplic_sync_hwstate(struct hypctx *hypctx) +{ + +} + +int +aplic_max_cpu_count(struct hyp *hyp) +{ + int16_t max_count; + + max_count = vm_get_maxcpus(hyp->vm); + + return (max_count); +} Index: sys/riscv/vmm/vmm_dev_machdep.c =================================================================== --- /dev/null +++ sys/riscv/vmm/vmm_dev_machdep.c @@ -0,0 +1,126 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2011 NetApp, Inc. + * Copyright (C) 2015 Mihai Carabas + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include +#include +#include + +#include + +#include "vmm_aplic.h" + +const struct vmmdev_ioctl vmmdev_machdep_ioctls[] = { + VMMDEV_IOCTL(VM_RUN, VMMDEV_IOCTL_LOCK_ONE_VCPU), + VMMDEV_IOCTL(VM_INJECT_EXCEPTION, VMMDEV_IOCTL_LOCK_ONE_VCPU), + VMMDEV_IOCTL(VM_GLA2GPA_NOFAULT, VMMDEV_IOCTL_LOCK_ONE_VCPU), + + VMMDEV_IOCTL(VM_ATTACH_APLIC, + VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS), + + VMMDEV_IOCTL(VM_RAISE_MSI, 0), + VMMDEV_IOCTL(VM_ASSERT_IRQ, 0), + VMMDEV_IOCTL(VM_DEASSERT_IRQ, 0), +}; +const size_t vmmdev_machdep_ioctl_count = nitems(vmmdev_machdep_ioctls); + +int +vmmdev_machdep_ioctl(struct vm *vm, struct vcpu *vcpu, u_long cmd, caddr_t data, + int fflag, struct thread *td) +{ + struct vm_run *vmrun; + struct vm_aplic_descr *aplic; + struct vm_irq *vi; + struct vm_exception *vmexc; + struct vm_gla2gpa *gg; + struct vm_msi *vmsi; + int error; + + error = 0; + switch (cmd) { + case VM_RUN: { + struct vm_exit *vme; + + vmrun = (struct vm_run *)data; + vme = vm_exitinfo(vcpu); + + error = vm_run(vcpu); + if (error != 0) + break; + + error = copyout(vme, vmrun->vm_exit, sizeof(*vme)); + break; + } + case VM_INJECT_EXCEPTION: + vmexc = (struct vm_exception *)data; + error = vm_inject_exception(vcpu, vmexc->scause); + break; + case VM_GLA2GPA_NOFAULT: + gg = (struct vm_gla2gpa *)data; + error = vm_gla2gpa_nofault(vcpu, &gg->paging, gg->gla, + gg->prot, &gg->gpa, &gg->fault); + KASSERT(error == 0 || error == EFAULT, + ("%s: vm_gla2gpa unknown error %d", __func__, error)); + break; + case VM_ATTACH_APLIC: + aplic = (struct vm_aplic_descr *)data; + error = vm_attach_aplic(vm, aplic); + break; + case VM_RAISE_MSI: + vmsi = (struct vm_msi *)data; + error = vm_raise_msi(vm, vmsi->msg, vmsi->addr, vmsi->bus, + vmsi->slot, vmsi->func); + break; + case VM_ASSERT_IRQ: + vi = (struct vm_irq *)data; + error = vm_assert_irq(vm, vi->irq); + break; + case VM_DEASSERT_IRQ: + vi = (struct vm_irq *)data; + error = vm_deassert_irq(vm, vi->irq); + break; + default: + error = ENOTTY; + break; + } + + return (error); +} Index: sys/riscv/vmm/vmm_instruction_emul.c =================================================================== --- /dev/null +++ sys/riscv/vmm/vmm_instruction_emul.c @@ -0,0 +1,109 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2015 Mihai Carabas + * Copyright (c) 2024 Ruslan Bukin + * + * This software was developed by the University of Cambridge Computer + * Laboratory (Department of Computer Science and Technology) under Innovate + * UK project 105694, "Digital Security by Design (DSbD) Technology Platform + * Prototype". + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifdef _KERNEL +#include +#include +#include +#include + +#include + +#include +#include +#else +#include +#include +#include + +#include + +#include +#include +#include +#include +#endif + +#include + +int +vmm_emulate_instruction(struct vcpu *vcpu, uint64_t gpa, struct vie *vie, + struct vm_guest_paging *paging __unused, mem_region_read_t memread, + mem_region_write_t memwrite, void *memarg) +{ + uint64_t val; + int error; + + if (vie->dir == VM_DIR_READ) { + error = memread(vcpu, gpa, &val, vie->access_size, memarg); + if (error) + goto out; + if ((vie->sign_extend == 0) && (vie->access_size < 8)) + val &= (1ul << (vie->access_size * 8)) - 1; + error = vm_set_register(vcpu, vie->reg, val); + } else { + error = vm_get_register(vcpu, vie->reg, &val); + if (error) + goto out; + /* Mask any unneeded bits from the register */ + if (vie->access_size < 8) + val &= (1ul << (vie->access_size * 8)) - 1; + error = memwrite(vcpu, gpa, val, vie->access_size, memarg); + } + +out: + return (error); +} + +int +vmm_emulate_register(struct vcpu *vcpu, struct vre *vre, reg_read_t regread, + reg_write_t regwrite, void *regarg) +{ + uint64_t val; + int error; + + if (vre->dir == VM_DIR_READ) { + error = regread(vcpu, &val, regarg); + if (error) + goto out; + error = vm_set_register(vcpu, vre->reg, val); + } else { + error = vm_get_register(vcpu, vre->reg, &val); + if (error) + goto out; + error = regwrite(vcpu, val, regarg); + } + +out: + return (error); +} Index: sys/riscv/vmm/vmm_riscv.c =================================================================== --- /dev/null +++ sys/riscv/vmm/vmm_riscv.c @@ -0,0 +1,922 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2024 Ruslan Bukin + * + * This software was developed by the University of Cambridge Computer + * Laboratory (Department of Computer Science and Technology) under Innovate + * UK project 105694, "Digital Security by Design (DSbD) Technology Platform + * Prototype". + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "riscv.h" +#include "vmm_aplic.h" +#include "vmm_stat.h" + +MALLOC_DEFINE(M_HYP, "RISC-V VMM HYP", "RISC-V VMM HYP"); + +DPCPU_DEFINE_STATIC(struct hypctx *, vcpu); + +static int +m_op(uint32_t insn, int match, int mask) +{ + + if (((insn ^ match) & mask) == 0) + return (1); + + return (0); +} + +static inline void +riscv_set_active_vcpu(struct hypctx *hypctx) +{ + + DPCPU_SET(vcpu, hypctx); +} + +struct hypctx * +riscv_get_active_vcpu(void) +{ + + return (DPCPU_GET(vcpu)); +} + +int +vmmops_modinit(void) +{ + + if (!has_hyp) { + printf("vmm: riscv hart doesn't support H-extension.\n"); + return (ENXIO); + } + + if (!has_sstc) { + printf("vmm: riscv hart doesn't support SSTC extension.\n"); + return (ENXIO); + } + + return (0); +} + +int +vmmops_modcleanup(void) +{ + + return (0); +} + +void * +vmmops_init(struct vm *vm, pmap_t pmap) +{ + struct hyp *hyp; + vm_size_t size; + + size = round_page(sizeof(struct hyp) + + sizeof(struct hypctx *) * vm_get_maxcpus(vm)); + hyp = malloc_aligned(size, PAGE_SIZE, M_HYP, M_WAITOK | M_ZERO); + hyp->vm = vm; + hyp->aplic_attached = false; + + aplic_vminit(hyp); + + return (hyp); +} + +static void +vmmops_delegate(void) +{ + uint64_t hedeleg; + uint64_t hideleg; + + hedeleg = (1UL << SCAUSE_INST_MISALIGNED); + hedeleg |= (1UL << SCAUSE_ILLEGAL_INSTRUCTION); + hedeleg |= (1UL << SCAUSE_BREAKPOINT); + hedeleg |= (1UL << SCAUSE_ECALL_USER); + hedeleg |= (1UL << SCAUSE_INST_PAGE_FAULT); + hedeleg |= (1UL << SCAUSE_LOAD_PAGE_FAULT); + hedeleg |= (1UL << SCAUSE_STORE_PAGE_FAULT); + csr_write(hedeleg, hedeleg); + + hideleg = (1UL << IRQ_SOFTWARE_HYPERVISOR); + hideleg |= (1UL << IRQ_TIMER_HYPERVISOR); + hideleg |= (1UL << IRQ_EXTERNAL_HYPERVISOR); + csr_write(hideleg, hideleg); +} + +static void +vmmops_vcpu_restore_csrs(struct hypctx *hypctx) +{ + struct hypcsr *csrs; + + csrs = &hypctx->guest_csrs; + + csr_write(vsstatus, csrs->vsstatus); + csr_write(vsie, csrs->vsie); + csr_write(vstvec, csrs->vstvec); + csr_write(vsscratch, csrs->vsscratch); + csr_write(vsepc, csrs->vsepc); + csr_write(vscause, csrs->vscause); + csr_write(vstval, csrs->vstval); + csr_write(hvip, csrs->hvip); + csr_write(vsatp, csrs->vsatp); +} + +static void +vmmops_vcpu_save_csrs(struct hypctx *hypctx) +{ + struct hypcsr *csrs; + + csrs = &hypctx->guest_csrs; + + csrs->vsstatus = csr_read(vsstatus); + csrs->vsie = csr_read(vsie); + csrs->vstvec = csr_read(vstvec); + csrs->vsscratch = csr_read(vsscratch); + csrs->vsepc = csr_read(vsepc); + csrs->vscause = csr_read(vscause); + csrs->vstval = csr_read(vstval); + csrs->hvip = csr_read(hvip); + csrs->vsatp = csr_read(vsatp); +} + +void * +vmmops_vcpu_init(void *vmi, struct vcpu *vcpu1, int vcpuid) +{ + struct hypctx *hypctx; + struct hyp *hyp; + vm_size_t size; + + hyp = vmi; + + dprintf("%s: hyp %p\n", __func__, hyp); + + KASSERT(vcpuid >= 0 && vcpuid < vm_get_maxcpus(hyp->vm), + ("%s: Invalid vcpuid %d", __func__, vcpuid)); + + size = round_page(sizeof(struct hypctx)); + + hypctx = malloc_aligned(size, PAGE_SIZE, M_HYP, M_WAITOK | M_ZERO); + hypctx->hyp = hyp; + hypctx->vcpu = vcpu1; + hypctx->guest_scounteren = HCOUNTEREN_CY | HCOUNTEREN_TM; + + /* sstatus */ + hypctx->guest_regs.hyp_sstatus = SSTATUS_SPP | SSTATUS_SPIE; + hypctx->guest_regs.hyp_sstatus |= SSTATUS_FS_INITIAL; + + /* hstatus */ + hypctx->guest_regs.hyp_hstatus = HSTATUS_SPV | HSTATUS_VTW; + hypctx->guest_regs.hyp_hstatus |= HSTATUS_SPVP; + + hypctx->cpu_id = vcpuid; + hyp->ctx[vcpuid] = hypctx; + + aplic_cpuinit(hypctx); + + return (hypctx); +} + +static int +riscv_vmm_pinit(pmap_t pmap) +{ + + dprintf("%s: pmap %p\n", __func__, pmap); + + pmap_pinit_stage(pmap, PM_STAGE2); + + return (1); +} + +struct vmspace * +vmmops_vmspace_alloc(vm_offset_t min, vm_offset_t max) +{ + + return (vmspace_alloc(min, max, riscv_vmm_pinit)); +} + +void +vmmops_vmspace_free(struct vmspace *vmspace) +{ + + pmap_remove_pages(vmspace_pmap(vmspace)); + vmspace_free(vmspace); +} + +static void +riscv_unpriv_read(struct hypctx *hypctx, uintptr_t guest_addr, uint64_t *data, + struct hyptrap *trap) +{ + register struct hyptrap * htrap asm("a0"); + uintptr_t old_hstatus; + uintptr_t old_stvec; + uintptr_t entry; + uint64_t val; + uint64_t tmp; + int intr; + + entry = (uintptr_t)&vmm_unpriv_trap; + htrap = trap; + + intr = intr_disable(); + + old_hstatus = csr_swap(hstatus, hypctx->guest_regs.hyp_hstatus); + /* + * Setup a temporary exception vector, so that if hlvx.hu raises + * an exception we catch it in the vmm_unpriv_trap(). + */ + old_stvec = csr_swap(stvec, entry); + + /* + * Read first two bytes of instruction assuming it could be a + * compressed one. + */ + __asm __volatile(".option push\n" + ".option norvc\n" + "hlvx.hu %[val], (%[addr])\n" + ".option pop\n" + : [val] "=r" (val) + : [addr] "r" (guest_addr), "r" (htrap) + : "a1", "memory"); + + /* + * Check if previous hlvx.hu did not raise an exception, and then + * read the rest of instruction if it is a full-length one. + */ + if (trap->scause == -1 && (val & 0x3) == 0x3) { + guest_addr += 2; + __asm __volatile(".option push\n" + ".option norvc\n" + "hlvx.hu %[tmp], (%[addr])\n" + ".option pop\n" + : [tmp] "=r" (tmp) + : [addr] "r" (guest_addr), "r" (htrap) + : "a1", "memory"); + val |= (tmp << 16); + } + + csr_write(hstatus, old_hstatus); + csr_write(stvec, old_stvec); + + intr_restore(intr); + + *data = val; +} + +static int +riscv_gen_inst_emul_data(struct hypctx *hypctx, struct vm_exit *vme_ret, + struct hyptrap *trap) +{ + uintptr_t guest_addr; + struct vie *vie; + uint64_t insn; + int reg_num; + int rs2, rd; + int direction; + int sign_extend; + int access_size; + + guest_addr = vme_ret->sepc; + + KASSERT(vme_ret->scause == SCAUSE_FETCH_GUEST_PAGE_FAULT || + vme_ret->scause == SCAUSE_LOAD_GUEST_PAGE_FAULT || + vme_ret->scause == SCAUSE_STORE_GUEST_PAGE_FAULT, + ("Invalid scause")); + + direction = vme_ret->scause == SCAUSE_STORE_GUEST_PAGE_FAULT ? + VM_DIR_WRITE : VM_DIR_READ; + + sign_extend = 1; + + bzero(trap, sizeof(struct hyptrap)); + trap->scause = -1; + riscv_unpriv_read(hypctx, guest_addr, &insn, trap); + if (trap->scause != -1) + return (-1); + + if ((insn & 0x3) == 0x3) { + rs2 = (insn & RS2_MASK) >> RS2_SHIFT; + rd = (insn & RD_MASK) >> RD_SHIFT; + + if (direction == VM_DIR_WRITE) { + if (m_op(insn, MATCH_SB, MASK_SB)) + access_size = 1; + else if (m_op(insn, MATCH_SH, MASK_SH)) + access_size = 2; + else if (m_op(insn, MATCH_SW, MASK_SW)) + access_size = 4; + else if (m_op(insn, MATCH_SD, MASK_SD)) + access_size = 8; + else { + printf("unknown store instr at %lx", + guest_addr); + return (-2); + } + reg_num = rs2; + } else { + if (m_op(insn, MATCH_LB, MASK_LB)) + access_size = 1; + else if (m_op(insn, MATCH_LH, MASK_LH)) + access_size = 2; + else if (m_op(insn, MATCH_LW, MASK_LW)) + access_size = 4; + else if (m_op(insn, MATCH_LD, MASK_LD)) + access_size = 8; + else if (m_op(insn, MATCH_LBU, MASK_LBU)) { + access_size = 1; + sign_extend = 0; + } else if (m_op(insn, MATCH_LHU, MASK_LHU)) { + access_size = 2; + sign_extend = 0; + } else if (m_op(insn, MATCH_LWU, MASK_LWU)) { + access_size = 4; + sign_extend = 0; + } else { + printf("unknown load instr at %lx", + guest_addr); + return (-3); + } + reg_num = rd; + } + vme_ret->inst_length = 4; + } else { + rs2 = (insn >> 7) & 0x7; + rs2 += 0x8; + rd = (insn >> 2) & 0x7; + rd += 0x8; + + if (direction == VM_DIR_WRITE) { + if (m_op(insn, MATCH_C_SW, MASK_C_SW)) + access_size = 4; + else if (m_op(insn, MATCH_C_SD, MASK_C_SD)) + access_size = 8; + else { + printf("unknown compressed store instr at %lx", + guest_addr); + return (-4); + } + } else { + if (m_op(insn, MATCH_C_LW, MASK_C_LW)) + access_size = 4; + else if (m_op(insn, MATCH_C_LD, MASK_C_LD)) + access_size = 8; + else { + printf("unknown load instr at %lx", guest_addr); + return (-5); + } + } + reg_num = rd; + vme_ret->inst_length = 2; + } + + vme_ret->u.inst_emul.gpa = (vme_ret->htval << 2) | + (vme_ret->stval & 0x3); + + dprintf("guest_addr %lx insn %lx, reg %d, gpa %lx\n", guest_addr, insn, + reg_num, vme_ret->u.inst_emul.gpa); + + vie = &vme_ret->u.inst_emul.vie; + vie->dir = direction; + vie->reg = reg_num; + vie->sign_extend = sign_extend; + vie->access_size = access_size; + + return (0); +} + +static bool +riscv_handle_world_switch(struct hypctx *hypctx, struct vm_exit *vme, + pmap_t pmap) +{ + struct hyptrap trap; + uint64_t insn; + uint64_t gpa; + bool handled; + bool retu; + int ret; + int i; + + handled = false; + + if (vme->scause & SCAUSE_INTR) { + /* + * Host interrupt? Leave critical section to handle. + */ + vmm_stat_incr(hypctx->vcpu, VMEXIT_IRQ, 1); + vme->exitcode = VM_EXITCODE_BOGUS; + vme->inst_length = 0; + return (handled); + } + + switch (vme->scause) { + case SCAUSE_FETCH_GUEST_PAGE_FAULT: + case SCAUSE_LOAD_GUEST_PAGE_FAULT: + case SCAUSE_STORE_GUEST_PAGE_FAULT: + gpa = (vme->htval << 2) | (vme->stval & 0x3); + if (vm_mem_allocated(hypctx->vcpu, gpa)) { + vme->exitcode = VM_EXITCODE_PAGING; + vme->inst_length = 0; + vme->u.paging.gpa = gpa; + } else { + ret = riscv_gen_inst_emul_data(hypctx, vme, &trap); + if (ret != 0) { + vme->exitcode = VM_EXITCODE_HYP; + vme->u.hyp.scause = trap.scause; + break; + } + vme->exitcode = VM_EXITCODE_INST_EMUL; + } + break; + case SCAUSE_ILLEGAL_INSTRUCTION: + /* + * TODO: handle illegal instruction properly. + */ + printf("%s: Illegal instruction at %lx stval 0x%lx htval " + "0x%lx\n", __func__, vme->sepc, vme->stval, vme->htval); + vmm_stat_incr(hypctx->vcpu, VMEXIT_UNHANDLED, 1); + vme->exitcode = VM_EXITCODE_BOGUS; + handled = false; + break; + case SCAUSE_VIRTUAL_SUPERVISOR_ECALL: + retu = false; + vmm_sbi_ecall(hypctx->vcpu, &retu); + if (retu == false) { + handled = true; + break; + } + for (i = 0; i < nitems(vme->u.ecall.args); i++) + vme->u.ecall.args[i] = hypctx->guest_regs.hyp_a[i]; + vme->exitcode = VM_EXITCODE_ECALL; + handled = false; + break; + case SCAUSE_VIRTUAL_INSTRUCTION: + insn = vme->stval; + if (m_op(insn, MATCH_WFI, MASK_WFI)) + vme->exitcode = VM_EXITCODE_WFI; + else + vme->exitcode = VM_EXITCODE_BOGUS; + handled = false; + break; + default: + printf("unknown scause %lx\n", vme->scause); + vmm_stat_incr(hypctx->vcpu, VMEXIT_UNHANDLED, 1); + vme->exitcode = VM_EXITCODE_BOGUS; + handled = false; + break; + } + + return (handled); +} + +int +vmmops_gla2gpa(void *vcpui, struct vm_guest_paging *paging, uint64_t gla, + int prot, uint64_t *gpa, int *is_fault) +{ + + /* Implement me. */ + + return (ENOSYS); +} + +void +riscv_send_ipi(struct hypctx *hypctx, int hart_id) +{ + struct hyp *hyp; + struct vm *vm; + + hyp = hypctx->hyp; + vm = hyp->vm; + + atomic_set_32(&hypctx->ipi_pending, 1); + + vcpu_notify_event(vm_vcpu(vm, hart_id)); +} + +int +riscv_check_ipi(struct hypctx *hypctx, bool clear) +{ + int val; + + if (clear) + val = atomic_swap_32(&hypctx->ipi_pending, 0); + else + val = hypctx->ipi_pending; + + return (val); +} + +static void +riscv_sync_interrupts(struct hypctx *hypctx) +{ + int pending; + + pending = aplic_check_pending(hypctx); + + if (pending) + hypctx->guest_csrs.hvip |= HVIP_VSEIP; + else + hypctx->guest_csrs.hvip &= ~HVIP_VSEIP; + + csr_write(hvip, hypctx->guest_csrs.hvip); +} + +static void +riscv_sync_ipi(struct hypctx *hypctx) +{ + + /* Guest clears VSSIP bit manually. */ + if (riscv_check_ipi(hypctx, true)) + hypctx->guest_csrs.hvip |= HVIP_VSSIP; + + csr_write(hvip, hypctx->guest_csrs.hvip); +} + +int +vmmops_run(void *vcpui, register_t pc, pmap_t pmap, struct vm_eventinfo *evinfo) +{ + struct hypctx *hypctx; + struct vm_exit *vme; + struct vcpu *vcpu; + register_t val; + bool handled; + + hypctx = (struct hypctx *)vcpui; + vcpu = hypctx->vcpu; + vme = vm_exitinfo(vcpu); + + hypctx->guest_regs.hyp_sepc = (uint64_t)pc; + + vmmops_delegate(); + + /* + * From The RISC-V Instruction Set Manual + * Volume II: RISC-V Privileged Architectures + * + * If the new virtual machine's guest physical page tables + * have been modified, it may be necessary to execute an HFENCE.GVMA + * instruction (see Section 5.3.2) before or after writing hgatp. + */ + __asm __volatile("hfence.gvma" ::: "memory"); + + csr_write(hgatp, pmap->pm_satp); + csr_write(henvcfg, HENVCFG_STCE); + csr_write(hie, HIE_VSEIE | HIE_VSSIE | HIE_SGEIE); + /* TODO: should we trap rdcycle / rdtime? */ + csr_write(hcounteren, HCOUNTEREN_CY | HCOUNTEREN_TM); + + vmmops_vcpu_restore_csrs(hypctx); + + for (;;) { + dprintf("%s: pc %lx\n", __func__, pc); + + if (hypctx->has_exception) { + hypctx->has_exception = false; + /* + * TODO: implement exception injection. + */ + } + + val = intr_disable(); + + /* Check if the vcpu is suspended */ + if (vcpu_suspended(evinfo)) { + intr_restore(val); + vm_exit_suspended(vcpu, pc); + break; + } + + if (vcpu_debugged(vcpu)) { + intr_restore(val); + vm_exit_debug(vcpu, pc); + break; + } + + /* + * TODO: What happens if a timer interrupt is asserted exactly + * here, but for the previous VM? + */ + riscv_set_active_vcpu(hypctx); + aplic_flush_hwstate(hypctx); + + riscv_sync_interrupts(hypctx); + riscv_sync_ipi(hypctx); + + dprintf("%s: Entering guest VM, vsatp %lx, ss %lx hs %lx\n", + __func__, csr_read(vsatp), hypctx->guest_regs.hyp_sstatus, + hypctx->guest_regs.hyp_hstatus); + + vmm_switch(hypctx); + + dprintf("%s: Leaving guest VM, hstatus %lx\n", __func__, + hypctx->guest_regs.hyp_hstatus); + + aplic_sync_hwstate(hypctx); + riscv_sync_interrupts(hypctx); + + /* + * TODO: deactivate stage 2 pmap here if needed. + */ + + vme->scause = csr_read(scause); + vme->sepc = csr_read(sepc); + vme->stval = csr_read(stval); + vme->htval = csr_read(htval); + vme->htinst = csr_read(htinst); + + intr_restore(val); + + vmm_stat_incr(vcpu, VMEXIT_COUNT, 1); + vme->pc = hypctx->guest_regs.hyp_sepc; + vme->inst_length = INSN_SIZE; + + handled = riscv_handle_world_switch(hypctx, vme, pmap); + if (handled == false) + /* Exit loop to emulate instruction. */ + break; + else { + /* Resume guest execution from the next instruction. */ + hypctx->guest_regs.hyp_sepc += vme->inst_length; + } + } + + vmmops_vcpu_save_csrs(hypctx); + + return (0); +} + +static void +riscv_pcpu_vmcleanup(void *arg) +{ + struct hyp *hyp; + int i, maxcpus; + + hyp = arg; + maxcpus = vm_get_maxcpus(hyp->vm); + for (i = 0; i < maxcpus; i++) { + if (riscv_get_active_vcpu() == hyp->ctx[i]) { + riscv_set_active_vcpu(NULL); + break; + } + } +} + +void +vmmops_vcpu_cleanup(void *vcpui) +{ + struct hypctx *hypctx; + + hypctx = vcpui; + + dprintf("%s\n", __func__); + + aplic_cpucleanup(hypctx); + + free(hypctx, M_HYP); +} + +void +vmmops_cleanup(void *vmi) +{ + struct hyp *hyp; + + hyp = vmi; + + dprintf("%s\n", __func__); + + aplic_vmcleanup(hyp); + + smp_rendezvous(NULL, riscv_pcpu_vmcleanup, NULL, hyp); + + free(hyp, M_HYP); +} + +/* + * Return register value. Registers have different sizes and an explicit cast + * must be made to ensure proper conversion. + */ +static uint64_t * +hypctx_regptr(struct hypctx *hypctx, int reg) +{ + + switch (reg) { + case VM_REG_GUEST_RA: + return (&hypctx->guest_regs.hyp_ra); + case VM_REG_GUEST_SP: + return (&hypctx->guest_regs.hyp_sp); + case VM_REG_GUEST_GP: + return (&hypctx->guest_regs.hyp_gp); + case VM_REG_GUEST_TP: + return (&hypctx->guest_regs.hyp_tp); + case VM_REG_GUEST_T0: + return (&hypctx->guest_regs.hyp_t[0]); + case VM_REG_GUEST_T1: + return (&hypctx->guest_regs.hyp_t[1]); + case VM_REG_GUEST_T2: + return (&hypctx->guest_regs.hyp_t[2]); + case VM_REG_GUEST_S0: + return (&hypctx->guest_regs.hyp_s[0]); + case VM_REG_GUEST_S1: + return (&hypctx->guest_regs.hyp_s[1]); + case VM_REG_GUEST_A0: + return (&hypctx->guest_regs.hyp_a[0]); + case VM_REG_GUEST_A1: + return (&hypctx->guest_regs.hyp_a[1]); + case VM_REG_GUEST_A2: + return (&hypctx->guest_regs.hyp_a[2]); + case VM_REG_GUEST_A3: + return (&hypctx->guest_regs.hyp_a[3]); + case VM_REG_GUEST_A4: + return (&hypctx->guest_regs.hyp_a[4]); + case VM_REG_GUEST_A5: + return (&hypctx->guest_regs.hyp_a[5]); + case VM_REG_GUEST_A6: + return (&hypctx->guest_regs.hyp_a[6]); + case VM_REG_GUEST_A7: + return (&hypctx->guest_regs.hyp_a[7]); + case VM_REG_GUEST_S2: + return (&hypctx->guest_regs.hyp_s[2]); + case VM_REG_GUEST_S3: + return (&hypctx->guest_regs.hyp_s[3]); + case VM_REG_GUEST_S4: + return (&hypctx->guest_regs.hyp_s[4]); + case VM_REG_GUEST_S5: + return (&hypctx->guest_regs.hyp_s[5]); + case VM_REG_GUEST_S6: + return (&hypctx->guest_regs.hyp_s[6]); + case VM_REG_GUEST_S7: + return (&hypctx->guest_regs.hyp_s[7]); + case VM_REG_GUEST_S8: + return (&hypctx->guest_regs.hyp_s[8]); + case VM_REG_GUEST_S9: + return (&hypctx->guest_regs.hyp_s[9]); + case VM_REG_GUEST_S10: + return (&hypctx->guest_regs.hyp_s[10]); + case VM_REG_GUEST_S11: + return (&hypctx->guest_regs.hyp_s[11]); + case VM_REG_GUEST_T3: + return (&hypctx->guest_regs.hyp_t[3]); + case VM_REG_GUEST_T4: + return (&hypctx->guest_regs.hyp_t[4]); + case VM_REG_GUEST_T5: + return (&hypctx->guest_regs.hyp_t[5]); + case VM_REG_GUEST_T6: + return (&hypctx->guest_regs.hyp_t[6]); + case VM_REG_GUEST_SEPC: + return (&hypctx->guest_regs.hyp_sepc); + default: + break; + } + + return (NULL); +} + +int +vmmops_getreg(void *vcpui, int reg, uint64_t *retval) +{ + uint64_t *regp; + int running, hostcpu; + struct hypctx *hypctx; + + hypctx = vcpui; + + running = vcpu_is_running(hypctx->vcpu, &hostcpu); + if (running && hostcpu != curcpu) + panic("%s: %s%d is running", __func__, vm_name(hypctx->hyp->vm), + vcpu_vcpuid(hypctx->vcpu)); + + if (reg == VM_REG_GUEST_ZERO) { + *retval = 0; + return (0); + } + + regp = hypctx_regptr(hypctx, reg); + if (regp == NULL) + return (EINVAL); + + *retval = *regp; + + return (0); +} + +int +vmmops_setreg(void *vcpui, int reg, uint64_t val) +{ + struct hypctx *hypctx; + int running, hostcpu; + uint64_t *regp; + + hypctx = vcpui; + + running = vcpu_is_running(hypctx->vcpu, &hostcpu); + if (running && hostcpu != curcpu) + panic("%s: %s%d is running", __func__, vm_name(hypctx->hyp->vm), + vcpu_vcpuid(hypctx->vcpu)); + + regp = hypctx_regptr(hypctx, reg); + if (regp == NULL) + return (EINVAL); + + *regp = val; + + return (0); +} + +int +vmmops_exception(void *vcpui, uint64_t scause) +{ + struct hypctx *hypctx; + int running, hostcpu; + + hypctx = vcpui; + + running = vcpu_is_running(hypctx->vcpu, &hostcpu); + if (running && hostcpu != curcpu) + panic("%s: %s%d is running", __func__, vm_name(hypctx->hyp->vm), + vcpu_vcpuid(hypctx->vcpu)); + + /* TODO: implement me. */ + + return (ENOSYS); +} + +int +vmmops_getcap(void *vcpui, int num, int *retval) +{ + int ret; + + ret = ENOENT; + + switch (num) { + case VM_CAP_UNRESTRICTED_GUEST: + *retval = 1; + ret = 0; + break; + default: + break; + } + + return (ret); +} + +int +vmmops_setcap(void *vcpui, int num, int val) +{ + + return (ENOENT); +} Index: sys/riscv/vmm/vmm_sbi.c =================================================================== --- /dev/null +++ sys/riscv/vmm/vmm_sbi.c @@ -0,0 +1,179 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2024 Ruslan Bukin + * + * This software was developed by the University of Cambridge Computer + * Laboratory (Department of Computer Science and Technology) under Innovate + * UK project 105694, "Digital Security by Design (DSbD) Technology Platform + * Prototype". + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include "riscv.h" + +static int +vmm_sbi_handle_rfnc(struct vcpu *vcpu, struct hypctx *hypctx) +{ + uint64_t hart_mask __unused; + uint64_t start __unused; + uint64_t size __unused; + uint64_t asid __unused; + uint64_t func_id; + + func_id = hypctx->guest_regs.hyp_a[6]; + hart_mask = hypctx->guest_regs.hyp_a[0]; + start = hypctx->guest_regs.hyp_a[2]; + size = hypctx->guest_regs.hyp_a[3]; + asid = hypctx->guest_regs.hyp_a[4]; + + dprintf("%s: %ld hart_mask %lx start %lx size %lx\n", __func__, + func_id, hart_mask, start, size); + + /* TODO: implement remote sfence. */ + + switch (func_id) { + case SBI_RFNC_REMOTE_FENCE_I: + break; + case SBI_RFNC_REMOTE_SFENCE_VMA: + break; + case SBI_RFNC_REMOTE_SFENCE_VMA_ASID: + break; + default: + break; + } + + hypctx->guest_regs.hyp_a[0] = 0; + + return (0); +} + +static int +vmm_sbi_handle_ipi(struct vcpu *vcpu, struct hypctx *hypctx) +{ + struct hypctx *target_hypctx; + struct vcpu *target_vcpu __unused; + cpuset_t active_cpus; + struct hyp *hyp; + uint64_t hart_mask; + uint64_t func_id; + int hart_id; + int bit; + int ret; + + func_id = hypctx->guest_regs.hyp_a[6]; + hart_mask = hypctx->guest_regs.hyp_a[0]; + + dprintf("%s: hart_mask %lx\n", __func__, hart_mask); + + hyp = hypctx->hyp; + + active_cpus = vm_active_cpus(hyp->vm); + + switch (func_id) { + case SBI_IPI_SEND_IPI: + while ((bit = ffs(hart_mask))) { + hart_id = (bit - 1); + hart_mask &= ~(1u << hart_id); + if (CPU_ISSET(hart_id, &active_cpus)) { + /* TODO. */ + target_vcpu = vm_vcpu(hyp->vm, hart_id); + target_hypctx = hypctx->hyp->ctx[hart_id]; + riscv_send_ipi(target_hypctx, hart_id); + } + } + ret = 0; + break; + default: + printf("%s: unknown func %ld\n", __func__, func_id); + ret = -1; + break; + } + + hypctx->guest_regs.hyp_a[0] = ret; + + return (0); +} + +int +vmm_sbi_ecall(struct vcpu *vcpu, bool *retu) +{ + int sbi_extension_id __unused; + struct hypctx *hypctx; + + hypctx = riscv_get_active_vcpu(); + sbi_extension_id = hypctx->guest_regs.hyp_a[7]; + + dprintf("%s: args %lx %lx %lx %lx %lx %lx %lx %lx\n", __func__, + hypctx->guest_regs.hyp_a[0], + hypctx->guest_regs.hyp_a[1], + hypctx->guest_regs.hyp_a[2], + hypctx->guest_regs.hyp_a[3], + hypctx->guest_regs.hyp_a[4], + hypctx->guest_regs.hyp_a[5], + hypctx->guest_regs.hyp_a[6], + hypctx->guest_regs.hyp_a[7]); + + switch (sbi_extension_id) { + case SBI_EXT_ID_RFNC: + vmm_sbi_handle_rfnc(vcpu, hypctx); + break; + case SBI_EXT_ID_TIME: + break; + case SBI_EXT_ID_IPI: + vmm_sbi_handle_ipi(vcpu, hypctx); + break; + default: + *retu = true; + break; + } + + return (0); +} Index: sys/riscv/vmm/vmm_stat.h =================================================================== --- /dev/null +++ sys/riscv/vmm/vmm_stat.h @@ -0,0 +1,43 @@ +/*- + * SPDX-License-Identifier: BSD-3-Clause + * + * Copyright (c) 2011 NetApp, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef _VMM_STAT_H_ +#define _VMM_STAT_H_ + +#include + +VMM_STAT_DECLARE(VMEXIT_COUNT); +VMM_STAT_DECLARE(VMEXIT_UNKNOWN); +VMM_STAT_DECLARE(VMEXIT_WFI); +VMM_STAT_DECLARE(VMEXIT_IRQ); +VMM_STAT_DECLARE(VMEXIT_UNHANDLED); + +#endif Index: sys/riscv/vmm/vmm_switch.S =================================================================== --- /dev/null +++ sys/riscv/vmm/vmm_switch.S @@ -0,0 +1,220 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2024 Ruslan Bukin + * + * This software was developed by the University of Cambridge Computer + * Laboratory (Department of Computer Science and Technology) under Innovate + * UK project 105694, "Digital Security by Design (DSbD) Technology Platform + * Prototype". + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +#include "assym.inc" + + .text + +/* + * a0 == hypctx * + */ +ENTRY(vmm_switch) + sd ra, (HYP_H_RA)(a0) + sd sp, (HYP_H_SP)(a0) + sd tp, (HYP_H_TP)(a0) + sd gp, (HYP_H_GP)(a0) + sd s0, (HYP_H_S + 0 * 8)(a0) + sd s1, (HYP_H_S + 1 * 8)(a0) + sd s2, (HYP_H_S + 2 * 8)(a0) + sd s3, (HYP_H_S + 3 * 8)(a0) + sd s4, (HYP_H_S + 4 * 8)(a0) + sd s5, (HYP_H_S + 5 * 8)(a0) + sd s6, (HYP_H_S + 6 * 8)(a0) + sd s7, (HYP_H_S + 7 * 8)(a0) + sd s8, (HYP_H_S + 8 * 8)(a0) + sd s9, (HYP_H_S + 9 * 8)(a0) + sd s10, (HYP_H_S + 10 * 8)(a0) + sd s11, (HYP_H_S + 11 * 8)(a0) + + sd a1, (HYP_H_A + 1 * 8)(a0) + sd a2, (HYP_H_A + 2 * 8)(a0) + sd a3, (HYP_H_A + 3 * 8)(a0) + sd a4, (HYP_H_A + 4 * 8)(a0) + sd a5, (HYP_H_A + 5 * 8)(a0) + sd a6, (HYP_H_A + 6 * 8)(a0) + sd a7, (HYP_H_A + 7 * 8)(a0) + + ld t0, (HYP_G_SSTATUS)(a0) + ld t1, (HYP_G_HSTATUS)(a0) + ld t2, (HYP_G_SCOUNTEREN)(a0) + la t4, .Lswitch_return + ld t5, (HYP_G_SEPC)(a0) + + csrrw t0, sstatus, t0 + csrrw t1, hstatus, t1 + csrrw t2, scounteren, t2 + csrrw t3, sscratch, a0 + csrrw t4, stvec, t4 + csrw sepc, t5 + + sd t0, (HYP_H_SSTATUS)(a0) + sd t1, (HYP_H_HSTATUS)(a0) + sd t2, (HYP_H_SCOUNTEREN)(a0) + sd t3, (HYP_H_SSCRATCH)(a0) + sd t4, (HYP_H_STVEC)(a0) + + ld ra, (HYP_G_RA)(a0) + ld sp, (HYP_G_SP)(a0) + ld gp, (HYP_G_GP)(a0) + ld tp, (HYP_G_TP)(a0) + ld t0, (HYP_G_T + 0 * 8)(a0) + ld t1, (HYP_G_T + 1 * 8)(a0) + ld t2, (HYP_G_T + 2 * 8)(a0) + ld t3, (HYP_G_T + 3 * 8)(a0) + ld t4, (HYP_G_T + 4 * 8)(a0) + ld t5, (HYP_G_T + 5 * 8)(a0) + ld t6, (HYP_G_T + 6 * 8)(a0) + ld s0, (HYP_G_S + 0 * 8)(a0) + ld s1, (HYP_G_S + 1 * 8)(a0) + ld s2, (HYP_G_S + 2 * 8)(a0) + ld s3, (HYP_G_S + 3 * 8)(a0) + ld s4, (HYP_G_S + 4 * 8)(a0) + ld s5, (HYP_G_S + 5 * 8)(a0) + ld s6, (HYP_G_S + 6 * 8)(a0) + ld s7, (HYP_G_S + 7 * 8)(a0) + ld s8, (HYP_G_S + 8 * 8)(a0) + ld s9, (HYP_G_S + 9 * 8)(a0) + ld s10, (HYP_G_S + 10 * 8)(a0) + ld s11, (HYP_G_S + 11 * 8)(a0) + /* skip a0 for now. */ + ld a1, (HYP_G_A + 1 * 8)(a0) + ld a2, (HYP_G_A + 2 * 8)(a0) + ld a3, (HYP_G_A + 3 * 8)(a0) + ld a4, (HYP_G_A + 4 * 8)(a0) + ld a5, (HYP_G_A + 5 * 8)(a0) + ld a6, (HYP_G_A + 6 * 8)(a0) + ld a7, (HYP_G_A + 7 * 8)(a0) + /* now load a0. */ + ld a0, (HYP_G_A + 0 * 8)(a0) + + sret + + .align 2 +.Lswitch_return: + + csrrw a0, sscratch, a0 + sd ra, (HYP_G_RA)(a0) + sd sp, (HYP_G_SP)(a0) + sd gp, (HYP_G_GP)(a0) + sd tp, (HYP_G_TP)(a0) + sd t0, (HYP_G_T + 0 * 8)(a0) + sd t1, (HYP_G_T + 1 * 8)(a0) + sd t2, (HYP_G_T + 2 * 8)(a0) + sd t3, (HYP_G_T + 3 * 8)(a0) + sd t4, (HYP_G_T + 4 * 8)(a0) + sd t5, (HYP_G_T + 5 * 8)(a0) + sd t6, (HYP_G_T + 6 * 8)(a0) + sd s0, (HYP_G_S + 0 * 8)(a0) + sd s1, (HYP_G_S + 1 * 8)(a0) + sd s2, (HYP_G_S + 2 * 8)(a0) + sd s3, (HYP_G_S + 3 * 8)(a0) + sd s4, (HYP_G_S + 4 * 8)(a0) + sd s5, (HYP_G_S + 5 * 8)(a0) + sd s6, (HYP_G_S + 6 * 8)(a0) + sd s7, (HYP_G_S + 7 * 8)(a0) + sd s8, (HYP_G_S + 8 * 8)(a0) + sd s9, (HYP_G_S + 9 * 8)(a0) + sd s10, (HYP_G_S + 10 * 8)(a0) + sd s11, (HYP_G_S + 11 * 8)(a0) + /* skip a0 */ + sd a1, (HYP_G_A + 1 * 8)(a0) + sd a2, (HYP_G_A + 2 * 8)(a0) + sd a3, (HYP_G_A + 3 * 8)(a0) + sd a4, (HYP_G_A + 4 * 8)(a0) + sd a5, (HYP_G_A + 5 * 8)(a0) + sd a6, (HYP_G_A + 6 * 8)(a0) + sd a7, (HYP_G_A + 7 * 8)(a0) + + ld t1, (HYP_H_STVEC)(a0) + ld t2, (HYP_H_SSCRATCH)(a0) + ld t3, (HYP_H_SCOUNTEREN)(a0) + ld t4, (HYP_H_HSTATUS)(a0) + ld t5, (HYP_H_SSTATUS)(a0) + + csrr t0, sepc + csrw stvec, t1 + csrrw t2, sscratch, t2 + csrrw t3, scounteren, t3 + csrrw t4, hstatus, t4 + csrrw t5, sstatus, t5 + + sd t0, (HYP_G_SEPC)(a0) + sd t2, (HYP_G_A + 0 * 8)(a0) + sd t3, (HYP_G_SCOUNTEREN)(a0) + sd t4, (HYP_G_HSTATUS)(a0) + sd t5, (HYP_G_SSTATUS)(a0) + + ld ra, (HYP_H_RA)(a0) + ld sp, (HYP_H_SP)(a0) + ld tp, (HYP_H_TP)(a0) + ld gp, (HYP_H_GP)(a0) + ld s0, (HYP_H_S + 0 * 8)(a0) + ld s1, (HYP_H_S + 1 * 8)(a0) + ld s2, (HYP_H_S + 2 * 8)(a0) + ld s3, (HYP_H_S + 3 * 8)(a0) + ld s4, (HYP_H_S + 4 * 8)(a0) + ld s5, (HYP_H_S + 5 * 8)(a0) + ld s6, (HYP_H_S + 6 * 8)(a0) + ld s7, (HYP_H_S + 7 * 8)(a0) + ld s8, (HYP_H_S + 8 * 8)(a0) + ld s9, (HYP_H_S + 9 * 8)(a0) + ld s10, (HYP_H_S + 10 * 8)(a0) + ld s11, (HYP_H_S + 11 * 8)(a0) + + ld a1, (HYP_H_A + 1 * 8)(a0) + ld a2, (HYP_H_A + 2 * 8)(a0) + ld a3, (HYP_H_A + 3 * 8)(a0) + ld a4, (HYP_H_A + 4 * 8)(a0) + ld a5, (HYP_H_A + 5 * 8)(a0) + ld a6, (HYP_H_A + 6 * 8)(a0) + ld a7, (HYP_H_A + 7 * 8)(a0) + + ret + +END(vmm_switch) + +ENTRY(vmm_unpriv_trap) + csrr a1, sepc + sd a1, HYP_TRAP_SEPC(a0) + addi a1, a1, 4 /* Next instruction after hlvx.hu */ + csrw sepc, a1 + csrr a1, scause + sd a1, HYP_TRAP_SCAUSE(a0) + csrr a1, stval + sd a1, HYP_TRAP_STVAL(a0) + csrr a1, htval + sd a1, HYP_TRAP_HTVAL(a0) + csrr a1, htinst + sd a1, HYP_TRAP_HTINST(a0) + sret +END(vmm_unpriv_trap)