diff --git a/sys/conf/files.riscv b/sys/conf/files.riscv index 89fb6f3cbe5a..f75fee72fde2 100644 --- a/sys/conf/files.riscv +++ b/sys/conf/files.riscv @@ -1,77 +1,87 @@ cddl/dev/dtrace/riscv/dtrace_asm.S optional dtrace compile-with "${DTRACE_S}" cddl/dev/dtrace/riscv/dtrace_isa.c optional dtrace compile-with "${DTRACE_C}" cddl/dev/dtrace/riscv/dtrace_subr.c optional dtrace compile-with "${DTRACE_C}" cddl/dev/dtrace/riscv/instr_size.c optional dtrace compile-with "${DTRACE_C}" cddl/dev/fbt/riscv/fbt_isa.c optional dtrace_fbt | dtraceall compile-with "${FBT_C}" crypto/des/des_enc.c optional netsmb dev/ofw/ofw_cpu.c optional fdt dev/ofw/ofw_pcib.c optional pci fdt dev/pci/pci_dw.c optional pci fdt dev/pci/pci_dw_if.m optional pci fdt dev/pci/pci_host_generic.c optional pci dev/pci/pci_host_generic_fdt.c optional pci fdt dev/uart/uart_cpu_fdt.c optional uart fdt dev/uart/uart_dev_lowrisc.c optional uart_lowrisc +dev/vmm/vmm_dev.c optional vmm +dev/vmm/vmm_stat.c optional vmm dev/xilinx/axi_quad_spi.c optional xilinx_spi dev/xilinx/axidma.c optional axidma xdma dev/xilinx/if_xae.c optional xae dev/xilinx/xlnx_pcib.c optional pci fdt xlnx_pcib kern/msi_if.m standard kern/pic_if.m standard kern/subr_devmap.c standard kern/subr_dummy_vdso_tc.c standard kern/subr_intr.c standard kern/subr_physmem.c standard libkern/bcopy.c standard libkern/memcmp.c standard libkern/memset.c standard libkern/strcmp.c standard libkern/strlen.c standard libkern/strncmp.c standard riscv/riscv/aplic.c standard riscv/riscv/autoconf.c standard riscv/riscv/bus_machdep.c standard riscv/riscv/bus_space_asm.S standard riscv/riscv/busdma_bounce.c standard riscv/riscv/busdma_machdep.c standard riscv/riscv/clock.c standard riscv/riscv/copyinout.S standard riscv/riscv/cpufunc_asm.S standard riscv/riscv/db_disasm.c optional ddb riscv/riscv/db_interface.c optional ddb riscv/riscv/db_trace.c optional ddb riscv/riscv/dump_machdep.c standard riscv/riscv/elf_machdep.c standard riscv/riscv/exception.S standard riscv/riscv/exec_machdep.c standard +riscv/riscv/fpe.c optional vmm riscv/riscv/gdb_machdep.c optional gdb riscv/riscv/intc.c standard riscv/riscv/identcpu.c standard riscv/riscv/locore.S standard no-obj riscv/riscv/machdep.c standard riscv/riscv/minidump_machdep.c standard riscv/riscv/mp_machdep.c optional smp riscv/riscv/mem.c standard riscv/riscv/nexus.c standard riscv/riscv/ofw_machdep.c optional fdt riscv/riscv/plic.c standard riscv/riscv/pmap.c standard riscv/riscv/ptrace_machdep.c standard riscv/riscv/riscv_console.c optional rcons riscv/riscv/riscv_syscon.c optional syscon riscv_syscon fdt riscv/riscv/sigtramp.S standard riscv/riscv/sbi.c standard riscv/riscv/sbi_ipi.c optional smp riscv/riscv/sdt_machdep.c optional kdtrace_hooks riscv/riscv/stack_machdep.c optional ddb | stack riscv/riscv/support.S standard riscv/riscv/swtch.S standard riscv/riscv/sys_machdep.c standard riscv/riscv/trap.c standard riscv/riscv/timer.c standard riscv/riscv/uio_machdep.c standard riscv/riscv/unwind.c optional ddb | kdtrace_hooks | stack riscv/riscv/vm_machdep.c standard +riscv/vmm/vmm.c optional vmm +riscv/vmm/vmm_aplic.c optional vmm +riscv/vmm/vmm_dev_machdep.c optional vmm +riscv/vmm/vmm_instruction_emul.c optional vmm +riscv/vmm/vmm_riscv.c optional vmm +riscv/vmm/vmm_sbi.c optional vmm +riscv/vmm/vmm_switch.S optional vmm # Zstd contrib/zstd/lib/freebsd/zstd_kfreebsd.c optional zstdio compile-with ${ZSTD_C} diff --git a/sys/conf/kern.mk b/sys/conf/kern.mk index 4c3014f2abb6..2f451f9286a6 100644 --- a/sys/conf/kern.mk +++ b/sys/conf/kern.mk @@ -1,334 +1,334 @@ # # Warning flags for compiling the kernel and components of the kernel: # CWARNFLAGS?= -Wall -Wstrict-prototypes \ -Wmissing-prototypes -Wpointer-arith -Wcast-qual \ -Wundef -Wno-pointer-sign ${FORMAT_EXTENSIONS} \ -Wmissing-include-dirs -fdiagnostics-show-option \ -Wno-unknown-pragmas -Wswitch \ ${CWARNEXTRA} # # The following flags are next up for working on: # -Wextra # Disable a few warnings for clang, since there are several places in the # kernel where fixing them is more trouble than it is worth, or where there is # a false positive. .if ${COMPILER_TYPE} == "clang" NO_WCONSTANT_CONVERSION= -Wno-error=constant-conversion NO_WSHIFT_COUNT_NEGATIVE= -Wno-shift-count-negative NO_WSHIFT_COUNT_OVERFLOW= -Wno-shift-count-overflow NO_WSELF_ASSIGN= -Wno-self-assign NO_WUNNEEDED_INTERNAL_DECL= -Wno-error=unneeded-internal-declaration NO_WSOMETIMES_UNINITIALIZED= -Wno-error=sometimes-uninitialized NO_WCAST_QUAL= -Wno-error=cast-qual NO_WTAUTOLOGICAL_POINTER_COMPARE= -Wno-tautological-pointer-compare .if ${COMPILER_VERSION} >= 100000 NO_WMISLEADING_INDENTATION= -Wno-misleading-indentation .endif .if ${COMPILER_VERSION} >= 130000 NO_WUNUSED_BUT_SET_VARIABLE= -Wno-unused-but-set-variable .endif .if ${COMPILER_VERSION} >= 140000 NO_WBITWISE_INSTEAD_OF_LOGICAL= -Wno-bitwise-instead-of-logical .endif .if ${COMPILER_VERSION} >= 150000 NO_WSTRICT_PROTOTYPES= -Wno-strict-prototypes NO_WDEPRECATED_NON_PROTOTYPE= -Wno-deprecated-non-prototype .endif # Several other warnings which might be useful in some cases, but not severe # enough to error out the whole kernel build. Display them anyway, so there is # some incentive to fix them eventually. CWARNEXTRA?= -Wno-error=tautological-compare -Wno-error=empty-body \ -Wno-error=parentheses-equality -Wno-error=unused-function \ -Wno-error=pointer-sign CWARNEXTRA+= -Wno-error=shift-negative-value CWARNEXTRA+= -Wno-address-of-packed-member .endif # clang .if ${COMPILER_TYPE} == "gcc" # Catch-all for all the things that are in our tree, but for which we're # not yet ready for this compiler. NO_WUNUSED_BUT_SET_VARIABLE=-Wno-unused-but-set-variable CWARNEXTRA?= -Wno-error=address \ -Wno-error=aggressive-loop-optimizations \ -Wno-error=array-bounds \ -Wno-error=attributes \ -Wno-error=cast-qual \ -Wno-error=enum-compare \ -Wno-error=maybe-uninitialized \ -Wno-error=misleading-indentation \ -Wno-error=nonnull-compare \ -Wno-error=overflow \ -Wno-error=sequence-point \ -Wno-error=shift-overflow \ -Wno-error=tautological-compare \ -Wno-error=unused-function .if ${COMPILER_VERSION} >= 70100 CWARNEXTRA+= -Wno-error=stringop-overflow .endif .if ${COMPILER_VERSION} >= 70200 CWARNEXTRA+= -Wno-error=memset-elt-size .endif .if ${COMPILER_VERSION} >= 80000 CWARNEXTRA+= -Wno-error=packed-not-aligned .endif .if ${COMPILER_VERSION} >= 90100 CWARNEXTRA+= -Wno-address-of-packed-member \ -Wno-alloc-size-larger-than \ -Wno-error=alloca-larger-than= .if ${COMPILER_VERSION} >= 120100 CWARNEXTRA+= -Wno-error=nonnull \ -Wno-dangling-pointer \ -Wno-zero-length-bounds NO_WINFINITE_RECURSION= -Wno-infinite-recursion NO_WSTRINGOP_OVERREAD= -Wno-stringop-overread .endif .endif # GCC produces false positives for functions that switch on an # enum (GCC bug 87950) CWARNFLAGS+= -Wno-return-type .endif # gcc # This warning is utter nonsense CWARNFLAGS+= -Wno-format-zero-length # External compilers may not support our format extensions. Allow them # to be disabled. WARNING: format checking is disabled in this case. .if ${MK_FORMAT_EXTENSIONS} == "no" FORMAT_EXTENSIONS= -Wno-format .elif ${COMPILER_TYPE} == "clang" || \ (${COMPILER_TYPE} == "gcc" && ${COMPILER_VERSION} >= 120100) FORMAT_EXTENSIONS= -D__printf__=__freebsd_kprintf__ .else FORMAT_EXTENSIONS= -fformat-extensions .endif # # On i386, do not align the stack to 16-byte boundaries. Otherwise GCC 2.95 # and above adds code to the entry and exit point of every function to align the # stack to 16-byte boundaries -- thus wasting approximately 12 bytes of stack # per function call. While the 16-byte alignment may benefit micro benchmarks, # it is probably an overall loss as it makes the code bigger (less efficient # use of code cache tag lines) and uses more stack (less efficient use of data # cache tag lines). Explicitly prohibit the use of FPU, SSE and other SIMD # operations inside the kernel itself. These operations are exclusively # reserved for user applications. # # gcc: # Setting -mno-mmx implies -mno-3dnow # Setting -mno-sse implies -mno-sse2, -mno-sse3 and -mno-ssse3 # # clang: # Setting -mno-mmx implies -mno-3dnow and -mno-3dnowa # Setting -mno-sse implies -mno-sse2, -mno-sse3, -mno-ssse3, -mno-sse41 and -mno-sse42 # .if ${MACHINE_CPUARCH} == "i386" CFLAGS.gcc+= -mpreferred-stack-boundary=2 CFLAGS.clang+= -mno-aes -mno-avx CFLAGS+= -mno-mmx -mno-sse -msoft-float INLINE_LIMIT?= 8000 .endif .if ${MACHINE_CPUARCH} == "arm" INLINE_LIMIT?= 8000 .endif .if ${MACHINE_CPUARCH} == "aarch64" # We generally don't want fpu instructions in the kernel. CFLAGS += -mgeneral-regs-only # Reserve x18 for pcpu data CFLAGS += -ffixed-x18 # Build with BTI+PAC CFLAGS += -mbranch-protection=standard .if ${LINKER_FEATURES:Mbti-report} LDFLAGS += -Wl,-zbti-report=error .endif # TODO: support outline atomics CFLAGS += -mno-outline-atomics INLINE_LIMIT?= 8000 .endif # # For RISC-V we specify the soft-float ABI (lp64) to avoid the use of floating # point registers within the kernel. However, we include the F and D extensions # in -march so we can have limited floating point support in context switching # code. This is different than userland where we use a hard-float ABI (lp64d). # # We also specify the "medium" code model, which generates code suitable for a # 2GiB addressing range located at any offset, allowing modules to be located # anywhere in the 64-bit address space. Note that clang and GCC refer to this # code model as "medium" and "medany" respectively. # .if ${MACHINE_CPUARCH} == "riscv" -CFLAGS+= -march=rv64imafdc +CFLAGS+= -march=rv64imafdch CFLAGS+= -mabi=lp64 CFLAGS.clang+= -mcmodel=medium CFLAGS.gcc+= -mcmodel=medany INLINE_LIMIT?= 8000 .if ${LINKER_FEATURES:Mriscv-relaxations} == "" CFLAGS+= -mno-relax .endif .endif # # For AMD64, we explicitly prohibit the use of FPU, SSE and other SIMD # operations inside the kernel itself. These operations are exclusively # reserved for user applications. # # gcc: # Setting -mno-mmx implies -mno-3dnow # Setting -mno-sse implies -mno-sse2, -mno-sse3, -mno-ssse3 and -mfpmath=387 # # clang: # Setting -mno-mmx implies -mno-3dnow and -mno-3dnowa # Setting -mno-sse implies -mno-sse2, -mno-sse3, -mno-ssse3, -mno-sse41 and -mno-sse42 # (-mfpmath= is not supported) # .if ${MACHINE_CPUARCH} == "amd64" CFLAGS.clang+= -mno-aes -mno-avx CFLAGS+= -mcmodel=kernel -mno-red-zone -mno-mmx -mno-sse -msoft-float \ -fno-asynchronous-unwind-tables INLINE_LIMIT?= 8000 .endif # # For PowerPC we tell gcc to use floating point emulation. This avoids using # floating point registers for integer operations which it has a tendency to do. # Also explicitly disable Altivec instructions inside the kernel. # .if ${MACHINE_CPUARCH} == "powerpc" CFLAGS+= -mno-altivec -msoft-float INLINE_LIMIT?= 15000 .endif .if ${MACHINE_ARCH} == "powerpcspe" CFLAGS.gcc+= -mno-spe .endif # # Use dot symbols (or, better, the V2 ELF ABI) on powerpc64 to make # DDB happy. ELFv2, if available, has some other efficiency benefits. # .if ${MACHINE_ARCH:Mpowerpc64*} != "" && \ ${COMPILER_TYPE} == "clang" && ${COMPILER_VERSION} < 160000 CFLAGS+= -mabi=elfv2 .endif # # GCC 3.0 and above like to do certain optimizations based on the # assumption that the program is linked against libc. Stop this. # CFLAGS+= -ffreestanding # # The C standard leaves signed integer overflow behavior undefined. # gcc and clang opimizers take advantage of this. The kernel makes # use of signed integer wraparound mechanics so we need the compiler # to treat it as a wraparound and not take shortcuts. # CFLAGS+= -fwrapv # # GCC SSP support # .if ${MK_SSP} != "no" CFLAGS+= -fstack-protector .endif # # Retpoline speculative execution vulnerability mitigation (CVE-2017-5715) # .if defined(COMPILER_FEATURES) && ${COMPILER_FEATURES:Mretpoline} != "" && \ ${MK_KERNEL_RETPOLINE} != "no" CFLAGS+= -mretpoline .endif # # Initialize stack variables on function entry # .if ${OPT_INIT_ALL} != "none" .if ${COMPILER_FEATURES:Minit-all} CFLAGS+= -ftrivial-auto-var-init=${OPT_INIT_ALL} .if ${OPT_INIT_ALL} == "zero" && ${COMPILER_TYPE} == "clang" && ${COMPILER_VERSION} < 160000 CFLAGS+= -enable-trivial-auto-var-init-zero-knowing-it-will-be-removed-from-clang .endif .else .warning INIT_ALL (${OPT_INIT_ALL}) requested but not supported by compiler .endif .endif # # Some newer toolchains default to DWARF 5, which isn't supported by some build # tools yet. # .if (${CFLAGS:M-g} != "" || ${CFLAGS:M-g[0-3]} != "") && ${CFLAGS:M-gdwarf*} == "" CFLAGS+= -gdwarf-4 .endif CFLAGS+= ${CWARNFLAGS:M*} ${CWARNFLAGS.${.IMPSRC:T}} CFLAGS+= ${CWARNFLAGS.${COMPILER_TYPE}} CFLAGS+= ${CFLAGS.${COMPILER_TYPE}} ${CFLAGS.${.IMPSRC:T}} # Tell bmake not to mistake standard targets for things to be searched for # or expect to ever be up-to-date. PHONY_NOTMAIN = afterdepend afterinstall all beforedepend beforeinstall \ beforelinking build build-tools buildfiles buildincludes \ checkdpadd clean cleandepend cleandir cleanobj configure \ depend distclean distribute exe \ html includes install installfiles installincludes \ obj objlink objs objwarn \ realinstall regress \ tags whereobj .PHONY: ${PHONY_NOTMAIN} .NOTMAIN: ${PHONY_NOTMAIN} CSTD?= gnu99 # c99/gnu99 is the minimum C standard version supported for kernel build .if ${CSTD} == "k&r" || ${CSTD} == "c89" || ${CSTD} == "c90" || \ ${CSTD} == "c94" || ${CSTD} == "c95" .error "Only c99/gnu99 or later is supported" .else # CSTD CFLAGS+= -std=${CSTD} .endif # CSTD NOSAN_CFLAGS= ${CFLAGS:N-fsanitize*:N-fno-sanitize*:N-fasan-shadow-offset*} # Please keep this if in sync with bsd.sys.mk .if ${LD} != "ld" && (${CC:[1]:H} != ${LD:[1]:H} || ${LD:[1]:T} != "ld") # Add -fuse-ld=${LD} if $LD is in a different directory or not called "ld". .if ${COMPILER_TYPE} == "clang" # Note: Clang does not like relative paths for ld so we map ld.lld -> lld. .if ${COMPILER_VERSION} >= 120000 CCLDFLAGS+= --ld-path=${LD:[1]:S/^ld.//1W} .else CCLDFLAGS+= -fuse-ld=${LD:[1]:S/^ld.//1W} .endif .else # GCC does not support an absolute path for -fuse-ld so we just print this # warning instead and let the user add the required symlinks. # However, we can avoid this warning if -B is set appropriately (e.g. for # CROSS_TOOLCHAIN=...-gcc). .if !(${LD:[1]:T} == "ld" && ${CC:tw:M-B${LD:[1]:H}/}) .warning LD (${LD}) is not the default linker for ${CC} but -fuse-ld= is not supported .endif .endif .endif # Set target-specific linker emulation name. LD_EMULATION_aarch64=aarch64elf LD_EMULATION_amd64=elf_x86_64_fbsd LD_EMULATION_arm=armelf_fbsd LD_EMULATION_armv7=armelf_fbsd LD_EMULATION_i386=elf_i386_fbsd LD_EMULATION_powerpc= elf32ppc_fbsd LD_EMULATION_powerpcspe= elf32ppc_fbsd LD_EMULATION_powerpc64= elf64ppc_fbsd LD_EMULATION_powerpc64le= elf64lppc_fbsd LD_EMULATION_riscv64= elf64lriscv LD_EMULATION=${LD_EMULATION_${MACHINE_ARCH}} diff --git a/sys/riscv/include/cpu.h b/sys/riscv/include/cpu.h index cbf660dcda0c..0c33adb2abcd 100644 --- a/sys/riscv/include/cpu.h +++ b/sys/riscv/include/cpu.h @@ -1,110 +1,110 @@ /*- * Copyright (c) 2015-2018 Ruslan Bukin * All rights reserved. * * Portions of this software were developed by SRI International and the * University of Cambridge Computer Laboratory under DARPA/AFRL contract * FA8750-10-C-0237 ("CTSRD"), as part of the DARPA CRASH research programme. * * Portions of this software were developed by the University of Cambridge * Computer Laboratory as part of the CTSRD Project, with support from the * UK Higher Education Innovation Fund (HEIF). * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #ifndef _MACHINE_CPU_H_ #define _MACHINE_CPU_H_ #include #include #include #define TRAPF_PC(tfp) ((tfp)->tf_sepc) #define TRAPF_USERMODE(tfp) (((tfp)->tf_sstatus & SSTATUS_SPP) == 0) #define cpu_getstack(td) ((td)->td_frame->tf_sp) #define cpu_setstack(td, sp) ((td)->td_frame->tf_sp = (sp)) #define cpu_spinwait() /* nothing */ #define cpu_lock_delay() DELAY(1) -#ifdef _KERNEL - /* * Core manufacturer IDs, as reported by the mvendorid CSR. */ #define MVENDORID_UNIMPL 0x0 #define MVENDORID_SIFIVE 0x489 #define MVENDORID_THEAD 0x5b7 /* * Micro-architecture ID register, marchid. * * IDs for open-source implementations are allocated globally. Commercial IDs * will have the most-significant bit set. */ #define MARCHID_UNIMPL 0x0 #define MARCHID_MSB (1ul << (XLEN - 1)) #define MARCHID_OPENSOURCE(v) (v) #define MARCHID_COMMERCIAL(v) (MARCHID_MSB | (v)) #define MARCHID_IS_OPENSOURCE(m) (((m) & MARCHID_MSB) == 0) /* * Open-source marchid values. * * https://github.com/riscv/riscv-isa-manual/blob/master/marchid.md */ #define MARCHID_UCB_ROCKET MARCHID_OPENSOURCE(1) #define MARCHID_UCB_BOOM MARCHID_OPENSOURCE(2) #define MARCHID_UCB_SPIKE MARCHID_OPENSOURCE(5) #define MARCHID_UCAM_RVBS MARCHID_OPENSOURCE(10) /* SiFive marchid values */ #define MARCHID_SIFIVE_U7 MARCHID_COMMERCIAL(7) /* * MMU virtual-addressing modes. Support for each level implies the previous, * so Sv48-enabled systems MUST support Sv39, etc. */ #define MMU_SV39 0x1 /* 3-level paging */ #define MMU_SV48 0x2 /* 4-level paging */ #define MMU_SV57 0x4 /* 5-level paging */ +#ifdef _KERNEL + extern char btext[]; extern char etext[]; void cpu_halt(void) __dead2; void cpu_reset(void) __dead2; void fork_trampoline(void); void identify_cpu(u_int cpu); void printcpuinfo(u_int cpu); static __inline uint64_t get_cyclecount(void) { return (rdcycle()); } #endif #endif /* !_MACHINE_CPU_H_ */ diff --git a/sys/riscv/include/elf.h b/sys/riscv/include/elf.h index a14d6859902b..78788abe1e57 100644 --- a/sys/riscv/include/elf.h +++ b/sys/riscv/include/elf.h @@ -1,87 +1,88 @@ /*- * Copyright (c) 1996-1997 John D. Polstra. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #ifndef _MACHINE_ELF_H_ #define _MACHINE_ELF_H_ /* * ELF definitions for the RISC-V architecture. */ #include /* Definitions common to all 32 bit architectures. */ #include /* Definitions common to all 64 bit architectures. */ #define __ELF_WORD_SIZE 64 /* Used by */ #include /* * Auxiliary vector entries for passing information to the interpreter. */ typedef struct { /* Auxiliary vector entry on initial stack */ int a_type; /* Entry type. */ union { int a_val; /* Integer value. */ } a_un; } Elf32_Auxinfo; typedef struct { /* Auxiliary vector entry on initial stack */ long a_type; /* Entry type. */ union { long a_val; /* Integer value. */ void *a_ptr; /* Address. */ void (*a_fcn)(void); /* Function pointer (not used). */ } a_un; } Elf64_Auxinfo; __ElfType(Auxinfo); #define ELF_ARCH EM_RISCV #define ELF_MACHINE_OK(x) ((x) == (ELF_ARCH)) /* Define "machine" characteristics */ #define ELF_TARG_CLASS ELFCLASS64 #define ELF_TARG_DATA ELFDATA2LSB #define ELF_TARG_MACH EM_RISCV #define ELF_TARG_VER 1 /* TODO: set correct value */ #define ET_DYN_LOAD_ADDR 0x100000 /* Flags passed in AT_HWCAP */ #define HWCAP_ISA_BIT(c) (1 << ((c) - 'a')) #define HWCAP_ISA_I HWCAP_ISA_BIT('i') #define HWCAP_ISA_M HWCAP_ISA_BIT('m') #define HWCAP_ISA_A HWCAP_ISA_BIT('a') #define HWCAP_ISA_F HWCAP_ISA_BIT('f') #define HWCAP_ISA_D HWCAP_ISA_BIT('d') #define HWCAP_ISA_C HWCAP_ISA_BIT('c') +#define HWCAP_ISA_H HWCAP_ISA_BIT('h') #define HWCAP_ISA_G \ (HWCAP_ISA_I | HWCAP_ISA_M | HWCAP_ISA_A | HWCAP_ISA_F | HWCAP_ISA_D) #define HWCAP_ISA_B HWCAP_ISA_BIT('b') #endif /* !_MACHINE_ELF_H_ */ diff --git a/sys/riscv/include/md_var.h b/sys/riscv/include/md_var.h index d9404db914a2..85a51e30b4a7 100644 --- a/sys/riscv/include/md_var.h +++ b/sys/riscv/include/md_var.h @@ -1,54 +1,55 @@ /*- * Copyright (c) 1995 Bruce D. Evans. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the author nor the names of contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: FreeBSD: src/sys/i386/include/md_var.h,v 1.40 2001/07/12 */ #ifndef _MACHINE_MD_VAR_H_ #define _MACHINE_MD_VAR_H_ extern long Maxmem; extern char sigcode[]; extern int szsigcode; extern u_long elf_hwcap; extern register_t mvendorid; extern register_t marchid; extern register_t mimpid; extern u_int mmu_caps; /* Supervisor-mode extension support */ +extern bool has_hyp; extern bool has_sstc; extern bool has_sscofpmf; extern bool has_svpbmt; struct dumperinfo; struct minidumpstate; int cpu_minidumpsys(struct dumperinfo *, const struct minidumpstate *); #endif /* !_MACHINE_MD_VAR_H_ */ diff --git a/sys/riscv/include/riscvreg.h b/sys/riscv/include/riscvreg.h index e1ad09acedc8..23feb419d04c 100644 --- a/sys/riscv/include/riscvreg.h +++ b/sys/riscv/include/riscvreg.h @@ -1,248 +1,286 @@ /*- * Copyright (c) 2015-2024 Ruslan Bukin * All rights reserved. * * Portions of this software were developed by SRI International and the * University of Cambridge Computer Laboratory under DARPA/AFRL contract * FA8750-10-C-0237 ("CTSRD"), as part of the DARPA CRASH research programme. * * Portions of this software were developed by the University of Cambridge * Computer Laboratory as part of the CTSRD Project, with support from the * UK Higher Education Innovation Fund (HEIF). * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #ifndef _MACHINE_RISCVREG_H_ #define _MACHINE_RISCVREG_H_ #define SCAUSE_INTR (1ul << 63) #define SCAUSE_CODE (~SCAUSE_INTR) #define SCAUSE_INST_MISALIGNED 0 #define SCAUSE_INST_ACCESS_FAULT 1 #define SCAUSE_ILLEGAL_INSTRUCTION 2 #define SCAUSE_BREAKPOINT 3 #define SCAUSE_LOAD_MISALIGNED 4 #define SCAUSE_LOAD_ACCESS_FAULT 5 #define SCAUSE_STORE_MISALIGNED 6 #define SCAUSE_STORE_ACCESS_FAULT 7 #define SCAUSE_ECALL_USER 8 #define SCAUSE_ECALL_SUPERVISOR 9 +#define SCAUSE_VIRTUAL_SUPERVISOR_ECALL 10 +#define SCAUSE_MACHINE_ECALL 11 #define SCAUSE_INST_PAGE_FAULT 12 #define SCAUSE_LOAD_PAGE_FAULT 13 #define SCAUSE_STORE_PAGE_FAULT 15 +#define SCAUSE_FETCH_GUEST_PAGE_FAULT 20 +#define SCAUSE_LOAD_GUEST_PAGE_FAULT 21 +#define SCAUSE_VIRTUAL_INSTRUCTION 22 +#define SCAUSE_STORE_GUEST_PAGE_FAULT 23 #define SSTATUS_UIE (1 << 0) #define SSTATUS_SIE (1 << 1) #define SSTATUS_UPIE (1 << 4) #define SSTATUS_SPIE (1 << 5) #define SSTATUS_SPIE_SHIFT 5 #define SSTATUS_SPP (1 << 8) #define SSTATUS_SPP_SHIFT 8 #define SSTATUS_FS_SHIFT 13 #define SSTATUS_FS_OFF (0x0 << SSTATUS_FS_SHIFT) #define SSTATUS_FS_INITIAL (0x1 << SSTATUS_FS_SHIFT) #define SSTATUS_FS_CLEAN (0x2 << SSTATUS_FS_SHIFT) #define SSTATUS_FS_DIRTY (0x3 << SSTATUS_FS_SHIFT) #define SSTATUS_FS_MASK (0x3 << SSTATUS_FS_SHIFT) #define SSTATUS_XS_SHIFT 15 #define SSTATUS_XS_MASK (0x3 << SSTATUS_XS_SHIFT) #define SSTATUS_SUM (1 << 18) #if __riscv_xlen == 64 #define SSTATUS_SD (1ul << 63) #else #define SSTATUS_SD (1 << 31) #endif #define MSTATUS_UIE (1 << 0) #define MSTATUS_SIE (1 << 1) #define MSTATUS_HIE (1 << 2) #define MSTATUS_MIE (1 << 3) #define MSTATUS_UPIE (1 << 4) #define MSTATUS_SPIE (1 << 5) #define MSTATUS_SPIE_SHIFT 5 #define MSTATUS_HPIE (1 << 6) #define MSTATUS_MPIE (1 << 7) #define MSTATUS_MPIE_SHIFT 7 #define MSTATUS_SPP (1 << 8) #define MSTATUS_SPP_SHIFT 8 #define MSTATUS_HPP_MASK 0x3 #define MSTATUS_HPP_SHIFT 9 #define MSTATUS_MPP_MASK 0x3 #define MSTATUS_MPP_SHIFT 11 #define MSTATUS_FS_MASK 0x3 #define MSTATUS_FS_SHIFT 13 #define MSTATUS_XS_MASK 0x3 #define MSTATUS_XS_SHIFT 15 #define MSTATUS_MPRV (1 << 17) #define MSTATUS_PUM (1 << 18) #define MSTATUS_VM_MASK 0x1f #define MSTATUS_VM_SHIFT 24 #define MSTATUS_VM_MBARE 0 #define MSTATUS_VM_MBB 1 #define MSTATUS_VM_MBBID 2 #define MSTATUS_VM_SV32 8 #define MSTATUS_VM_SV39 9 #define MSTATUS_VM_SV48 10 #define MSTATUS_VM_SV57 11 #define MSTATUS_VM_SV64 12 #if __riscv_xlen == 64 #define MSTATUS_SD (1ul << 63) #else #define MSTATUS_SD (1 << 31) #endif #define MSTATUS_PRV_U 0 /* user */ #define MSTATUS_PRV_S 1 /* supervisor */ #define MSTATUS_PRV_H 2 /* hypervisor */ #define MSTATUS_PRV_M 3 /* machine */ +#define HSTATUS_VSBE (1 << 5) +#define HSTATUS_GVA (1 << 6) +#define HSTATUS_SPV (1 << 7) +#define HSTATUS_SPVP (1 << 8) +#define HSTATUS_HU (1 << 9) +#define HSTATUS_VGEIN_S 12 +#define HSTATUS_VGEIN_M (0xf << HSTATUS_VGEIN_S) +#define HSTATUS_VTVM (1 << 20) +#define HSTATUS_VTW (1 << 21) +#define HSTATUS_VTSR (1 << 22) + #define MIE_USIE (1 << 0) #define MIE_SSIE (1 << 1) #define MIE_HSIE (1 << 2) #define MIE_MSIE (1 << 3) #define MIE_UTIE (1 << 4) #define MIE_STIE (1 << 5) #define MIE_HTIE (1 << 6) #define MIE_MTIE (1 << 7) #define MIP_USIP (1 << 0) #define MIP_SSIP (1 << 1) #define MIP_HSIP (1 << 2) #define MIP_MSIP (1 << 3) #define MIP_UTIP (1 << 4) #define MIP_STIP (1 << 5) #define MIP_HTIP (1 << 6) #define MIP_MTIP (1 << 7) #define SIE_USIE (1 << 0) #define SIE_SSIE (1 << 1) #define SIE_UTIE (1 << 4) #define SIE_STIE (1 << 5) #define SIE_UEIE (1 << 8) #define SIE_SEIE (1 << 9) #define MIP_SEIP (1 << 9) +#define HVIP_VSSIP (1 << 2) +#define HVIP_VSTIP (1 << 6) +#define HVIP_VSEIP (1 << 10) + +#define HIE_VSSIE (1 << 2) +#define HIE_VSTIE (1 << 6) +#define HIE_VSEIE (1 << 10) +#define HIE_SGEIE (1 << 12) + /* Note: sip register has no SIP_STIP bit in Spike simulator */ #define SIP_SSIP (1 << 1) #define SIP_STIP (1 << 5) +#define HENVCFG_STCE (1UL << 63) +#define HENVCFG_PBMTE (1UL << 62) +#define HENVCFG_CBZE (1UL << 7) +#define HENVCFG_CBCFE (1UL << 6) +#define HENVCFG_CBIE_S 4 +#define HENVCFG_CBIE_M (0x3 << HENVCFG_CBIE_S) +#define HENVCFG_FIOM (1UL << 0) + +#define HCOUNTEREN_CY (1UL << 0) /* Cycle */ +#define HCOUNTEREN_TM (1UL << 1) /* Time */ +#define HCOUNTEREN_IR (1UL << 2) /* Instret */ + #define SATP_PPN_S 0 #define SATP_PPN_M (0xfffffffffffUL << SATP_PPN_S) #define SATP_ASID_S 44 #define SATP_ASID_M (0xffffUL << SATP_ASID_S) #define SATP_MODE_S 60 #define SATP_MODE_M (0xfUL << SATP_MODE_S) #define SATP_MODE_SV39 (8ULL << SATP_MODE_S) #define SATP_MODE_SV48 (9ULL << SATP_MODE_S) #define XLEN __riscv_xlen #define XLEN_BYTES (XLEN / 8) #define INSN_SIZE 4 #define INSN_C_SIZE 2 #define X_RA 1 #define X_SP 2 #define X_GP 3 #define X_TP 4 #define X_T0 5 #define X_T1 6 #define X_T2 7 #define X_T3 28 #define RD_SHIFT 7 #define RD_MASK (0x1f << RD_SHIFT) #define RS1_SHIFT 15 #define RS1_MASK (0x1f << RS1_SHIFT) #define RS1_SP (X_SP << RS1_SHIFT) #define RS2_SHIFT 20 #define RS2_MASK (0x1f << RS2_SHIFT) #define RS2_RA (X_RA << RS2_SHIFT) #define IMM_SHIFT 20 #define IMM_MASK (0xfff << IMM_SHIFT) #define RS2_C_SHIFT 2 #define RS2_C_MASK (0x1f << RS2_C_SHIFT) #define RS2_C_RA (X_RA << RS2_C_SHIFT) #define CSR_ZIMM(val) \ (__builtin_constant_p(val) && ((u_long)(val) < 32)) #define csr_swap(csr, val) \ ({ u_long ret; \ if (CSR_ZIMM(val)) \ __asm __volatile("csrrwi %0, " #csr ", %1" \ : "=r" (ret) : "i" (val)); \ else \ __asm __volatile("csrrw %0, " #csr ", %1" \ : "=r" (ret) : "r" (val)); \ ret; \ }) #define csr_write(csr, val) \ ({ if (CSR_ZIMM(val)) \ __asm __volatile("csrwi " #csr ", %0" :: "i" (val)); \ else \ __asm __volatile("csrw " #csr ", %0" :: "r" (val)); \ }) #define csr_set(csr, val) \ ({ if (CSR_ZIMM(val)) \ __asm __volatile("csrsi " #csr ", %0" :: "i" (val)); \ else \ __asm __volatile("csrs " #csr ", %0" :: "r" (val)); \ }) #define csr_clear(csr, val) \ ({ if (CSR_ZIMM(val)) \ __asm __volatile("csrci " #csr ", %0" :: "i" (val)); \ else \ __asm __volatile("csrc " #csr ", %0" :: "r" (val)); \ }) #define csr_read(csr) \ ({ u_long val; \ __asm __volatile("csrr %0, " #csr : "=r" (val)); \ val; \ }) #if __riscv_xlen == 32 #define csr_read64(csr) \ ({ uint64_t val; \ uint32_t high, low; \ __asm __volatile("1: " \ "csrr t0, " #csr "h\n" \ "csrr %0, " #csr "\n" \ "csrr %1, " #csr "h\n" \ "bne t0, %1, 1b" \ : "=r" (low), "=r" (high) \ : \ : "t0"); \ val = (low | ((uint64_t)high << 32)); \ val; \ }) #else #define csr_read64(csr) ((uint64_t)csr_read(csr)) #endif #endif /* !_MACHINE_RISCVREG_H_ */ diff --git a/sys/riscv/include/vmm.h b/sys/riscv/include/vmm.h new file mode 100644 index 000000000000..e148cd95b522 --- /dev/null +++ b/sys/riscv/include/vmm.h @@ -0,0 +1,328 @@ +/* + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2015 Mihai Carabas + * Copyright (c) 2024 Ruslan Bukin + * + * This software was developed by the University of Cambridge Computer + * Laboratory (Department of Computer Science and Technology) under Innovate + * UK project 105694, "Digital Security by Design (DSbD) Technology Platform + * Prototype". + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef _VMM_H_ +#define _VMM_H_ + +#include +#include +#include +#include + +#include "pte.h" +#include "pmap.h" + +struct vcpu; + +enum vm_suspend_how { + VM_SUSPEND_NONE, + VM_SUSPEND_RESET, + VM_SUSPEND_POWEROFF, + VM_SUSPEND_HALT, + VM_SUSPEND_LAST +}; + +/* + * Identifiers for architecturally defined registers. + */ +enum vm_reg_name { + VM_REG_GUEST_ZERO = 0, + VM_REG_GUEST_RA, + VM_REG_GUEST_SP, + VM_REG_GUEST_GP, + VM_REG_GUEST_TP, + VM_REG_GUEST_T0, + VM_REG_GUEST_T1, + VM_REG_GUEST_T2, + VM_REG_GUEST_S0, + VM_REG_GUEST_S1, + VM_REG_GUEST_A0, + VM_REG_GUEST_A1, + VM_REG_GUEST_A2, + VM_REG_GUEST_A3, + VM_REG_GUEST_A4, + VM_REG_GUEST_A5, + VM_REG_GUEST_A6, + VM_REG_GUEST_A7, + VM_REG_GUEST_S2, + VM_REG_GUEST_S3, + VM_REG_GUEST_S4, + VM_REG_GUEST_S5, + VM_REG_GUEST_S6, + VM_REG_GUEST_S7, + VM_REG_GUEST_S8, + VM_REG_GUEST_S9, + VM_REG_GUEST_S10, + VM_REG_GUEST_S11, + VM_REG_GUEST_T3, + VM_REG_GUEST_T4, + VM_REG_GUEST_T5, + VM_REG_GUEST_T6, + VM_REG_GUEST_SEPC, + VM_REG_LAST +}; + +#define VM_INTINFO_VECTOR(info) ((info) & 0xff) +#define VM_INTINFO_DEL_ERRCODE 0x800 +#define VM_INTINFO_RSVD 0x7ffff000 +#define VM_INTINFO_VALID 0x80000000 +#define VM_INTINFO_TYPE 0x700 +#define VM_INTINFO_HWINTR (0 << 8) +#define VM_INTINFO_NMI (2 << 8) +#define VM_INTINFO_HWEXCEPTION (3 << 8) +#define VM_INTINFO_SWINTR (4 << 8) + +#define VM_MAX_SUFFIXLEN 15 + +#ifdef _KERNEL + +#define VM_MAX_NAMELEN 32 + +struct vm; +struct vm_exception; +struct vm_exit; +struct vm_run; +struct vm_object; +struct vm_guest_paging; +struct vm_aplic_descr; +struct pmap; + +struct vm_eventinfo { + void *rptr; /* rendezvous cookie */ + int *sptr; /* suspend cookie */ + int *iptr; /* reqidle cookie */ +}; + +int vm_create(const char *name, struct vm **retvm); +struct vcpu *vm_alloc_vcpu(struct vm *vm, int vcpuid); +void vm_disable_vcpu_creation(struct vm *vm); +void vm_slock_vcpus(struct vm *vm); +void vm_unlock_vcpus(struct vm *vm); +void vm_destroy(struct vm *vm); +int vm_reinit(struct vm *vm); +const char *vm_name(struct vm *vm); + +/* + * APIs that modify the guest memory map require all vcpus to be frozen. + */ +void vm_slock_memsegs(struct vm *vm); +void vm_xlock_memsegs(struct vm *vm); +void vm_unlock_memsegs(struct vm *vm); +int vm_mmap_memseg(struct vm *vm, vm_paddr_t gpa, int segid, vm_ooffset_t off, + size_t len, int prot, int flags); +int vm_munmap_memseg(struct vm *vm, vm_paddr_t gpa, size_t len); +int vm_alloc_memseg(struct vm *vm, int ident, size_t len, bool sysmem); +void vm_free_memseg(struct vm *vm, int ident); + +/* + * APIs that inspect the guest memory map require only a *single* vcpu to + * be frozen. This acts like a read lock on the guest memory map since any + * modification requires *all* vcpus to be frozen. + */ +int vm_mmap_getnext(struct vm *vm, vm_paddr_t *gpa, int *segid, + vm_ooffset_t *segoff, size_t *len, int *prot, int *flags); +int vm_get_memseg(struct vm *vm, int ident, size_t *len, bool *sysmem, + struct vm_object **objptr); +vm_paddr_t vmm_sysmem_maxaddr(struct vm *vm); +void *vm_gpa_hold(struct vcpu *vcpu, vm_paddr_t gpa, size_t len, + int prot, void **cookie); +void *vm_gpa_hold_global(struct vm *vm, vm_paddr_t gpa, size_t len, + int prot, void **cookie); +void vm_gpa_release(void *cookie); +bool vm_mem_allocated(struct vcpu *vcpu, vm_paddr_t gpa); + +int vm_gla2gpa_nofault(struct vcpu *vcpu, struct vm_guest_paging *paging, + uint64_t gla, int prot, uint64_t *gpa, int *is_fault); + +uint16_t vm_get_maxcpus(struct vm *vm); +void vm_get_topology(struct vm *vm, uint16_t *sockets, uint16_t *cores, + uint16_t *threads, uint16_t *maxcpus); +int vm_set_topology(struct vm *vm, uint16_t sockets, uint16_t cores, + uint16_t threads, uint16_t maxcpus); +int vm_get_register(struct vcpu *vcpu, int reg, uint64_t *retval); +int vm_set_register(struct vcpu *vcpu, int reg, uint64_t val); +int vm_run(struct vcpu *vcpu); +int vm_suspend(struct vm *vm, enum vm_suspend_how how); +void* vm_get_cookie(struct vm *vm); +int vcpu_vcpuid(struct vcpu *vcpu); +void *vcpu_get_cookie(struct vcpu *vcpu); +struct vm *vcpu_vm(struct vcpu *vcpu); +struct vcpu *vm_vcpu(struct vm *vm, int cpu); +int vm_get_capability(struct vcpu *vcpu, int type, int *val); +int vm_set_capability(struct vcpu *vcpu, int type, int val); +int vm_activate_cpu(struct vcpu *vcpu); +int vm_suspend_cpu(struct vm *vm, struct vcpu *vcpu); +int vm_resume_cpu(struct vm *vm, struct vcpu *vcpu); +int vm_inject_exception(struct vcpu *vcpu, uint64_t scause); +int vm_attach_aplic(struct vm *vm, struct vm_aplic_descr *descr); +int vm_assert_irq(struct vm *vm, uint32_t irq); +int vm_deassert_irq(struct vm *vm, uint32_t irq); +int vm_raise_msi(struct vm *vm, uint64_t msg, uint64_t addr, int bus, int slot, + int func); +struct vm_exit *vm_exitinfo(struct vcpu *vcpu); +void vm_exit_suspended(struct vcpu *vcpu, uint64_t pc); +void vm_exit_debug(struct vcpu *vcpu, uint64_t pc); +void vm_exit_rendezvous(struct vcpu *vcpu, uint64_t pc); +void vm_exit_astpending(struct vcpu *vcpu, uint64_t pc); + +cpuset_t vm_active_cpus(struct vm *vm); +cpuset_t vm_debug_cpus(struct vm *vm); +cpuset_t vm_suspended_cpus(struct vm *vm); + +static __inline int +vcpu_rendezvous_pending(struct vm_eventinfo *info) +{ + + return (*((uintptr_t *)(info->rptr)) != 0); +} + +static __inline int +vcpu_suspended(struct vm_eventinfo *info) +{ + + return (*info->sptr); +} + +int vcpu_debugged(struct vcpu *vcpu); + +enum vcpu_state { + VCPU_IDLE, + VCPU_FROZEN, + VCPU_RUNNING, + VCPU_SLEEPING, +}; + +int vcpu_set_state(struct vcpu *vcpu, enum vcpu_state state, bool from_idle); +enum vcpu_state vcpu_get_state(struct vcpu *vcpu, int *hostcpu); + +static int __inline +vcpu_is_running(struct vcpu *vcpu, int *hostcpu) +{ + return (vcpu_get_state(vcpu, hostcpu) == VCPU_RUNNING); +} + +#ifdef _SYS_PROC_H_ +static int __inline +vcpu_should_yield(struct vcpu *vcpu) +{ + struct thread *td; + + td = curthread; + return (td->td_ast != 0 || td->td_owepreempt != 0); +} +#endif + +void *vcpu_stats(struct vcpu *vcpu); +void vcpu_notify_event(struct vcpu *vcpu); + +enum vm_reg_name vm_segment_name(int seg_encoding); + +#endif /* _KERNEL */ + +#define VM_DIR_READ 0 +#define VM_DIR_WRITE 1 + +#define VM_GP_M_MASK 0x1f +#define VM_GP_MMU_ENABLED (1 << 5) + +struct vm_guest_paging { + int flags; + int padding; +}; + +struct vie { + uint8_t access_size:4, sign_extend:1, dir:1, unused:2; + enum vm_reg_name reg; +}; + +struct vre { + uint32_t inst_syndrome; + uint8_t dir:1, unused:7; + enum vm_reg_name reg; +}; + +/* + * Identifiers for optional vmm capabilities + */ +enum vm_cap_type { + VM_CAP_UNRESTRICTED_GUEST, + VM_CAP_MAX +}; + +enum vm_exitcode { + VM_EXITCODE_BOGUS, + VM_EXITCODE_ECALL, + VM_EXITCODE_HYP, + VM_EXITCODE_PAGING, + VM_EXITCODE_SUSPENDED, + VM_EXITCODE_DEBUG, + VM_EXITCODE_INST_EMUL, + VM_EXITCODE_WFI, + VM_EXITCODE_MAX +}; + +struct vm_exit { + uint64_t scause; + uint64_t sepc; + uint64_t stval; + uint64_t htval; + uint64_t htinst; + enum vm_exitcode exitcode; + int inst_length; + uint64_t pc; + union { + struct { + uint64_t gpa; + } paging; + + struct { + uint64_t gpa; + struct vm_guest_paging paging; + struct vie vie; + } inst_emul; + + struct { + uint64_t args[8]; + } ecall; + + struct { + enum vm_suspend_how how; + } suspended; + + struct { + uint64_t scause; + } hyp; + } u; +}; + +#endif /* _VMM_H_ */ diff --git a/sys/riscv/include/vmm_dev.h b/sys/riscv/include/vmm_dev.h new file mode 100644 index 000000000000..a21528a8dc68 --- /dev/null +++ b/sys/riscv/include/vmm_dev.h @@ -0,0 +1,258 @@ +/* + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2015 Mihai Carabas + * Copyright (c) 2024 Ruslan Bukin + * + * This software was developed by the University of Cambridge Computer + * Laboratory (Department of Computer Science and Technology) under Innovate + * UK project 105694, "Digital Security by Design (DSbD) Technology Platform + * Prototype". + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef _VMM_DEV_H_ +#define _VMM_DEV_H_ + +struct vm_memmap { + vm_paddr_t gpa; + int segid; /* memory segment */ + vm_ooffset_t segoff; /* offset into memory segment */ + size_t len; /* mmap length */ + int prot; /* RWX */ + int flags; +}; +#define VM_MEMMAP_F_WIRED 0x01 + +struct vm_munmap { + vm_paddr_t gpa; + size_t len; +}; + +#define VM_MEMSEG_NAME(m) ((m)->name[0] != '\0' ? (m)->name : NULL) +struct vm_memseg { + int segid; + size_t len; + char name[VM_MAX_SUFFIXLEN + 1]; +}; + +struct vm_register { + int cpuid; + int regnum; /* enum vm_reg_name */ + uint64_t regval; +}; + +struct vm_register_set { + int cpuid; + unsigned int count; + const int *regnums; /* enum vm_reg_name */ + uint64_t *regvals; +}; + +struct vm_run { + int cpuid; + cpuset_t *cpuset; /* CPU set storage */ + size_t cpusetsize; + struct vm_exit *vm_exit; +}; + +struct vm_exception { + int cpuid; + uint64_t scause; +}; + +struct vm_msi { + uint64_t msg; + uint64_t addr; + int bus; + int slot; + int func; +}; + +struct vm_capability { + int cpuid; + enum vm_cap_type captype; + int capval; + int allcpus; +}; + +#define MAX_VM_STATS 64 +struct vm_stats { + int cpuid; /* in */ + int index; /* in */ + int num_entries; /* out */ + struct timeval tv; + uint64_t statbuf[MAX_VM_STATS]; +}; +struct vm_stat_desc { + int index; /* in */ + char desc[128]; /* out */ +}; + +struct vm_suspend { + enum vm_suspend_how how; +}; + +struct vm_gla2gpa { + int vcpuid; /* inputs */ + int prot; /* PROT_READ or PROT_WRITE */ + uint64_t gla; + struct vm_guest_paging paging; + int fault; /* outputs */ + uint64_t gpa; +}; + +struct vm_activate_cpu { + int vcpuid; +}; + +struct vm_cpuset { + int which; + int cpusetsize; + cpuset_t *cpus; +}; +#define VM_ACTIVE_CPUS 0 +#define VM_SUSPENDED_CPUS 1 +#define VM_DEBUG_CPUS 2 + +struct vm_aplic_descr { + uint64_t mem_start; + uint64_t mem_size; +}; + +struct vm_irq { + uint32_t irq; +}; + +struct vm_cpu_topology { + uint16_t sockets; + uint16_t cores; + uint16_t threads; + uint16_t maxcpus; +}; + +enum { + /* general routines */ + IOCNUM_ABIVERS = 0, + IOCNUM_RUN = 1, + IOCNUM_SET_CAPABILITY = 2, + IOCNUM_GET_CAPABILITY = 3, + IOCNUM_SUSPEND = 4, + IOCNUM_REINIT = 5, + + /* memory apis */ + IOCNUM_GET_GPA_PMAP = 12, + IOCNUM_GLA2GPA_NOFAULT = 13, + IOCNUM_ALLOC_MEMSEG = 14, + IOCNUM_GET_MEMSEG = 15, + IOCNUM_MMAP_MEMSEG = 16, + IOCNUM_MMAP_GETNEXT = 17, + IOCNUM_MUNMAP_MEMSEG = 18, + + /* register/state accessors */ + IOCNUM_SET_REGISTER = 20, + IOCNUM_GET_REGISTER = 21, + IOCNUM_SET_REGISTER_SET = 24, + IOCNUM_GET_REGISTER_SET = 25, + + /* statistics */ + IOCNUM_VM_STATS = 50, + IOCNUM_VM_STAT_DESC = 51, + + /* CPU Topology */ + IOCNUM_SET_TOPOLOGY = 63, + IOCNUM_GET_TOPOLOGY = 64, + + /* interrupt injection */ + IOCNUM_ASSERT_IRQ = 80, + IOCNUM_DEASSERT_IRQ = 81, + IOCNUM_RAISE_MSI = 82, + IOCNUM_INJECT_EXCEPTION = 83, + + /* vm_cpuset */ + IOCNUM_ACTIVATE_CPU = 90, + IOCNUM_GET_CPUSET = 91, + IOCNUM_SUSPEND_CPU = 92, + IOCNUM_RESUME_CPU = 93, + + /* vm_attach_aplic */ + IOCNUM_ATTACH_APLIC = 110, +}; + +#define VM_RUN \ + _IOWR('v', IOCNUM_RUN, struct vm_run) +#define VM_SUSPEND \ + _IOW('v', IOCNUM_SUSPEND, struct vm_suspend) +#define VM_REINIT \ + _IO('v', IOCNUM_REINIT) +#define VM_ALLOC_MEMSEG \ + _IOW('v', IOCNUM_ALLOC_MEMSEG, struct vm_memseg) +#define VM_GET_MEMSEG \ + _IOWR('v', IOCNUM_GET_MEMSEG, struct vm_memseg) +#define VM_MMAP_MEMSEG \ + _IOW('v', IOCNUM_MMAP_MEMSEG, struct vm_memmap) +#define VM_MMAP_GETNEXT \ + _IOWR('v', IOCNUM_MMAP_GETNEXT, struct vm_memmap) +#define VM_MUNMAP_MEMSEG \ + _IOW('v', IOCNUM_MUNMAP_MEMSEG, struct vm_munmap) +#define VM_SET_REGISTER \ + _IOW('v', IOCNUM_SET_REGISTER, struct vm_register) +#define VM_GET_REGISTER \ + _IOWR('v', IOCNUM_GET_REGISTER, struct vm_register) +#define VM_SET_REGISTER_SET \ + _IOW('v', IOCNUM_SET_REGISTER_SET, struct vm_register_set) +#define VM_GET_REGISTER_SET \ + _IOWR('v', IOCNUM_GET_REGISTER_SET, struct vm_register_set) +#define VM_SET_CAPABILITY \ + _IOW('v', IOCNUM_SET_CAPABILITY, struct vm_capability) +#define VM_GET_CAPABILITY \ + _IOWR('v', IOCNUM_GET_CAPABILITY, struct vm_capability) +#define VM_STATS \ + _IOWR('v', IOCNUM_VM_STATS, struct vm_stats) +#define VM_STAT_DESC \ + _IOWR('v', IOCNUM_VM_STAT_DESC, struct vm_stat_desc) +#define VM_ASSERT_IRQ \ + _IOW('v', IOCNUM_ASSERT_IRQ, struct vm_irq) +#define VM_DEASSERT_IRQ \ + _IOW('v', IOCNUM_DEASSERT_IRQ, struct vm_irq) +#define VM_RAISE_MSI \ + _IOW('v', IOCNUM_RAISE_MSI, struct vm_msi) +#define VM_INJECT_EXCEPTION \ + _IOW('v', IOCNUM_INJECT_EXCEPTION, struct vm_exception) +#define VM_SET_TOPOLOGY \ + _IOW('v', IOCNUM_SET_TOPOLOGY, struct vm_cpu_topology) +#define VM_GET_TOPOLOGY \ + _IOR('v', IOCNUM_GET_TOPOLOGY, struct vm_cpu_topology) +#define VM_GLA2GPA_NOFAULT \ + _IOWR('v', IOCNUM_GLA2GPA_NOFAULT, struct vm_gla2gpa) +#define VM_ACTIVATE_CPU \ + _IOW('v', IOCNUM_ACTIVATE_CPU, struct vm_activate_cpu) +#define VM_GET_CPUS \ + _IOW('v', IOCNUM_GET_CPUSET, struct vm_cpuset) +#define VM_SUSPEND_CPU \ + _IOW('v', IOCNUM_SUSPEND_CPU, struct vm_activate_cpu) +#define VM_RESUME_CPU \ + _IOW('v', IOCNUM_RESUME_CPU, struct vm_activate_cpu) +#define VM_ATTACH_APLIC \ + _IOW('v', IOCNUM_ATTACH_APLIC, struct vm_aplic_descr) +#endif diff --git a/sys/riscv/include/vmm_instruction_emul.h b/sys/riscv/include/vmm_instruction_emul.h new file mode 100644 index 000000000000..bee63d2f86ba --- /dev/null +++ b/sys/riscv/include/vmm_instruction_emul.h @@ -0,0 +1,85 @@ +/* + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2015 Mihai Carabas + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef _VMM_INSTRUCTION_EMUL_H_ +#define _VMM_INSTRUCTION_EMUL_H_ + +/* + * Callback functions to read and write memory regions. + */ +typedef int (*mem_region_read_t)(struct vcpu *vcpu, uint64_t gpa, + uint64_t *rval, int rsize, void *arg); +typedef int (*mem_region_write_t)(struct vcpu *vcpu, uint64_t gpa, + uint64_t wval, int wsize, void *arg); + +/* + * Callback functions to read and write registers. + */ +typedef int (*reg_read_t)(struct vcpu *vcpu, uint64_t *rval, void *arg); +typedef int (*reg_write_t)(struct vcpu *vcpu, uint64_t wval, void *arg); + +/* + * Emulate the decoded 'vie' instruction when it contains a memory operation. + * + * The callbacks 'mrr' and 'mrw' emulate reads and writes to the memory region + * containing 'gpa'. 'mrarg' is an opaque argument that is passed into the + * callback functions. + * + * 'void *vm' should be 'struct vm *' when called from kernel context and + * 'struct vmctx *' when called from user context. + * + */ +int vmm_emulate_instruction(struct vcpu *vcpu, uint64_t gpa, struct vie *vie, + struct vm_guest_paging *paging, mem_region_read_t mrr, + mem_region_write_t mrw, void *mrarg); + +/* + * Emulate the decoded 'vre' instruction when it contains a register access. + * + * The callbacks 'regread' and 'regwrite' emulate reads and writes to the + * register from 'vie'. 'regarg' is an opaque argument that is passed into the + * callback functions. + * + * 'void *vm' should be 'struct vm *' when called from kernel context and + * 'struct vmctx *' when called from user context. + * + */ +int vmm_emulate_register(struct vcpu *vcpu, struct vre *vre, reg_read_t regread, + reg_write_t regwrite, void *regarg); + +#ifdef _KERNEL +void vm_register_reg_handler(struct vm *vm, uint64_t iss, uint64_t mask, + reg_read_t reg_read, reg_write_t reg_write, void *arg); +void vm_deregister_reg_handler(struct vm *vm, uint64_t iss, uint64_t mask); + +void vm_register_inst_handler(struct vm *vm, uint64_t start, uint64_t size, + mem_region_read_t mmio_read, mem_region_write_t mmio_write); +void vm_deregister_inst_handler(struct vm *vm, uint64_t start, uint64_t size); +#endif + +#endif /* _VMM_INSTRUCTION_EMUL_H_ */ diff --git a/sys/riscv/include/vmm_snapshot.h b/sys/riscv/include/vmm_snapshot.h new file mode 100644 index 000000000000..da23dbe43a4f --- /dev/null +++ b/sys/riscv/include/vmm_snapshot.h @@ -0,0 +1 @@ +/* $FreeBSD$ */ diff --git a/sys/riscv/riscv/genassym.c b/sys/riscv/riscv/genassym.c index 637510db242e..74b70858edab 100644 --- a/sys/riscv/riscv/genassym.c +++ b/sys/riscv/riscv/genassym.c @@ -1,106 +1,140 @@ /*- * Copyright (c) 2015-2016 Ruslan Bukin * All rights reserved. * * Portions of this software were developed by SRI International and the * University of Cambridge Computer Laboratory under DARPA/AFRL contract * FA8750-10-C-0237 ("CTSRD"), as part of the DARPA CRASH research programme. * * Portions of this software were developed by the University of Cambridge * Computer Laboratory as part of the CTSRD Project, with support from the * UK Higher Education Innovation Fund (HEIF). * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include +#include + ASSYM(KERNBASE, KERNBASE); ASSYM(VM_MAXUSER_ADDRESS, VM_MAXUSER_ADDRESS); ASSYM(VM_MAX_KERNEL_ADDRESS, VM_MAX_KERNEL_ADDRESS); ASSYM(PMAP_MAPDEV_EARLY_SIZE, PMAP_MAPDEV_EARLY_SIZE); ASSYM(PM_SATP, offsetof(struct pmap, pm_satp)); ASSYM(PCB_ONFAULT, offsetof(struct pcb, pcb_onfault)); ASSYM(PCB_SIZE, sizeof(struct pcb)); ASSYM(PCB_RA, offsetof(struct pcb, pcb_ra)); ASSYM(PCB_SP, offsetof(struct pcb, pcb_sp)); ASSYM(PCB_GP, offsetof(struct pcb, pcb_gp)); ASSYM(PCB_TP, offsetof(struct pcb, pcb_tp)); ASSYM(PCB_S, offsetof(struct pcb, pcb_s)); ASSYM(PCB_X, offsetof(struct pcb, pcb_x)); ASSYM(PCB_FCSR, offsetof(struct pcb, pcb_fcsr)); ASSYM(SF_UC, offsetof(struct sigframe, sf_uc)); ASSYM(PC_CURPCB, offsetof(struct pcpu, pc_curpcb)); ASSYM(PC_CURTHREAD, offsetof(struct pcpu, pc_curthread)); ASSYM(TD_PCB, offsetof(struct thread, td_pcb)); ASSYM(TD_FLAGS, offsetof(struct thread, td_flags)); ASSYM(TD_AST, offsetof(struct thread, td_ast)); ASSYM(TD_PROC, offsetof(struct thread, td_proc)); ASSYM(TD_FRAME, offsetof(struct thread, td_frame)); ASSYM(TD_MD, offsetof(struct thread, td_md)); ASSYM(TD_LOCK, offsetof(struct thread, td_lock)); ASSYM(TF_SIZE, roundup2(sizeof(struct trapframe), STACKALIGNBYTES + 1)); ASSYM(TF_RA, offsetof(struct trapframe, tf_ra)); ASSYM(TF_SP, offsetof(struct trapframe, tf_sp)); ASSYM(TF_GP, offsetof(struct trapframe, tf_gp)); ASSYM(TF_TP, offsetof(struct trapframe, tf_tp)); ASSYM(TF_T, offsetof(struct trapframe, tf_t)); ASSYM(TF_S, offsetof(struct trapframe, tf_s)); ASSYM(TF_A, offsetof(struct trapframe, tf_a)); ASSYM(TF_SEPC, offsetof(struct trapframe, tf_sepc)); ASSYM(TF_STVAL, offsetof(struct trapframe, tf_stval)); ASSYM(TF_SCAUSE, offsetof(struct trapframe, tf_scause)); ASSYM(TF_SSTATUS, offsetof(struct trapframe, tf_sstatus)); +ASSYM(HYP_H_RA, offsetof(struct hypctx, host_regs.hyp_ra)); +ASSYM(HYP_H_SP, offsetof(struct hypctx, host_regs.hyp_sp)); +ASSYM(HYP_H_GP, offsetof(struct hypctx, host_regs.hyp_gp)); +ASSYM(HYP_H_TP, offsetof(struct hypctx, host_regs.hyp_tp)); +ASSYM(HYP_H_T, offsetof(struct hypctx, host_regs.hyp_t)); +ASSYM(HYP_H_S, offsetof(struct hypctx, host_regs.hyp_s)); +ASSYM(HYP_H_A, offsetof(struct hypctx, host_regs.hyp_a)); +ASSYM(HYP_H_SEPC, offsetof(struct hypctx, host_regs.hyp_sepc)); +ASSYM(HYP_H_SSTATUS, offsetof(struct hypctx, host_regs.hyp_sstatus)); +ASSYM(HYP_H_HSTATUS, offsetof(struct hypctx, host_regs.hyp_hstatus)); +ASSYM(HYP_H_SSCRATCH, offsetof(struct hypctx, host_sscratch)); +ASSYM(HYP_H_STVEC, offsetof(struct hypctx, host_stvec)); +ASSYM(HYP_H_SCOUNTEREN, offsetof(struct hypctx, host_scounteren)); + +ASSYM(HYP_G_RA, offsetof(struct hypctx, guest_regs.hyp_ra)); +ASSYM(HYP_G_SP, offsetof(struct hypctx, guest_regs.hyp_sp)); +ASSYM(HYP_G_GP, offsetof(struct hypctx, guest_regs.hyp_gp)); +ASSYM(HYP_G_TP, offsetof(struct hypctx, guest_regs.hyp_tp)); +ASSYM(HYP_G_T, offsetof(struct hypctx, guest_regs.hyp_t)); +ASSYM(HYP_G_S, offsetof(struct hypctx, guest_regs.hyp_s)); +ASSYM(HYP_G_A, offsetof(struct hypctx, guest_regs.hyp_a)); +ASSYM(HYP_G_SEPC, offsetof(struct hypctx, guest_regs.hyp_sepc)); +ASSYM(HYP_G_SSTATUS, offsetof(struct hypctx, guest_regs.hyp_sstatus)); +ASSYM(HYP_G_HSTATUS, offsetof(struct hypctx, guest_regs.hyp_hstatus)); +ASSYM(HYP_G_SCOUNTEREN, offsetof(struct hypctx, guest_scounteren)); + +ASSYM(HYP_TRAP_SEPC, offsetof(struct hyptrap, sepc)); +ASSYM(HYP_TRAP_SCAUSE, offsetof(struct hyptrap, scause)); +ASSYM(HYP_TRAP_STVAL, offsetof(struct hyptrap, stval)); +ASSYM(HYP_TRAP_HTVAL, offsetof(struct hyptrap, htval)); +ASSYM(HYP_TRAP_HTINST, offsetof(struct hyptrap, htinst)); + ASSYM(RISCV_BOOTPARAMS_SIZE, sizeof(struct riscv_bootparams)); ASSYM(RISCV_BOOTPARAMS_KERN_PHYS, offsetof(struct riscv_bootparams, kern_phys)); ASSYM(RISCV_BOOTPARAMS_KERN_STACK, offsetof(struct riscv_bootparams, kern_stack)); ASSYM(RISCV_BOOTPARAMS_DTBP_PHYS, offsetof(struct riscv_bootparams, dtbp_phys)); ASSYM(RISCV_BOOTPARAMS_MODULEP, offsetof(struct riscv_bootparams, modulep)); diff --git a/sys/riscv/riscv/identcpu.c b/sys/riscv/riscv/identcpu.c index 54eb302982f1..7823830c3136 100644 --- a/sys/riscv/riscv/identcpu.c +++ b/sys/riscv/riscv/identcpu.c @@ -1,531 +1,535 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2015-2016 Ruslan Bukin * All rights reserved. * Copyright (c) 2022 Mitchell Horne * Copyright (c) 2023 The FreeBSD Foundation * * Portions of this software were developed by SRI International and the * University of Cambridge Computer Laboratory under DARPA/AFRL contract * FA8750-10-C-0237 ("CTSRD"), as part of the DARPA CRASH research programme. * * Portions of this software were developed by the University of Cambridge * Computer Laboratory as part of the CTSRD Project, with support from the * UK Higher Education Innovation Fund (HEIF). * * Portions of this software were developed by Mitchell Horne * under sponsorship from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include "opt_platform.h" #include #include #include #include #include #include #include #include #include #include #ifdef FDT #include #include #include #endif const char machine[] = "riscv"; SYSCTL_CONST_STRING(_hw, HW_MACHINE, machine, CTLFLAG_RD | CTLFLAG_CAPRD, machine, "Machine class"); /* Hardware implementation info. These values may be empty. */ register_t mvendorid; /* The CPU's JEDEC vendor ID */ register_t marchid; /* The architecture ID */ register_t mimpid; /* The implementation ID */ u_int mmu_caps; /* Supervisor-mode extension support. */ +bool has_hyp; bool __read_frequently has_sstc; bool __read_frequently has_sscofpmf; bool has_svpbmt; struct cpu_desc { const char *cpu_mvendor_name; const char *cpu_march_name; u_int isa_extensions; /* Single-letter extensions. */ u_int mmu_caps; u_int smode_extensions; #define SV_SSTC (1 << 0) #define SV_SVNAPOT (1 << 1) #define SV_SVPBMT (1 << 2) #define SV_SVINVAL (1 << 3) #define SV_SSCOFPMF (1 << 4) }; struct cpu_desc cpu_desc[MAXCPU]; /* * Micro-architecture tables. */ struct marchid_entry { register_t march_id; const char *march_name; }; #define MARCHID_END { -1ul, NULL } /* Open-source RISC-V architecture IDs; globally allocated. */ static const struct marchid_entry global_marchids[] = { { MARCHID_UCB_ROCKET, "UC Berkeley Rocket" }, { MARCHID_UCB_BOOM, "UC Berkeley Boom" }, { MARCHID_UCB_SPIKE, "UC Berkeley Spike" }, { MARCHID_UCAM_RVBS, "University of Cambridge RVBS" }, MARCHID_END }; static const struct marchid_entry sifive_marchids[] = { { MARCHID_SIFIVE_U7, "6/7/P200/X200-Series Processor" }, MARCHID_END }; /* * Known CPU vendor/manufacturer table. */ static const struct { register_t mvendor_id; const char *mvendor_name; const struct marchid_entry *marchid_table; } mvendor_ids[] = { { MVENDORID_UNIMPL, "Unspecified", NULL }, { MVENDORID_SIFIVE, "SiFive", sifive_marchids }, { MVENDORID_THEAD, "T-Head", NULL }, }; /* * The ISA string describes the complete set of instructions supported by a * RISC-V CPU. The string begins with a small prefix (e.g. rv64) indicating the * base ISA. It is followed first by single-letter ISA extensions, and then * multi-letter ISA extensions. * * Underscores are used mainly to separate consecutive multi-letter extensions, * but may optionally appear between any two extensions. An extension may be * followed by a version number, in the form of 'Mpm', where M is the * extension's major version number, and 'm' is the minor version number. * * The format is described in detail by the "ISA Extension Naming Conventions" * chapter of the unprivileged spec. */ #define ISA_PREFIX ("rv" __XSTRING(__riscv_xlen)) #define ISA_PREFIX_LEN (sizeof(ISA_PREFIX) - 1) static __inline int parse_ext_s(struct cpu_desc *desc, char *isa, int idx, int len) { #define CHECK_S_EXT(str, flag) \ do { \ if (strncmp(&isa[idx], (str), \ MIN(strlen(str), len - idx)) == 0) { \ desc->smode_extensions |= flag; \ return (idx + strlen(str)); \ } \ } while (0) /* Check for known/supported extensions. */ CHECK_S_EXT("sstc", SV_SSTC); CHECK_S_EXT("svnapot", SV_SVNAPOT); CHECK_S_EXT("svpbmt", SV_SVPBMT); CHECK_S_EXT("svinval", SV_SVINVAL); CHECK_S_EXT("sscofpmf", SV_SSCOFPMF); #undef CHECK_S_EXT /* * Proceed to the next multi-letter extension or the end of the * string. */ while (isa[idx] != '_' && idx < len) { idx++; } return (idx); } static __inline int parse_ext_x(struct cpu_desc *desc __unused, char *isa, int idx, int len) { /* * Proceed to the next multi-letter extension or the end of the * string. */ while (isa[idx] != '_' && idx < len) { idx++; } return (idx); } static __inline int parse_ext_z(struct cpu_desc *desc __unused, char *isa, int idx, int len) { /* * Proceed to the next multi-letter extension or the end of the * string. * * TODO: parse some of these. */ while (isa[idx] != '_' && idx < len) { idx++; } return (idx); } static __inline int parse_ext_version(char *isa, int idx, u_int *majorp __unused, u_int *minorp __unused) { /* Major version. */ while (isdigit(isa[idx])) idx++; if (isa[idx] != 'p') return (idx); else idx++; /* Minor version. */ while (isdigit(isa[idx])) idx++; return (idx); } /* * Parse the ISA string, building up the set of HWCAP bits as they are found. */ static int parse_riscv_isa(struct cpu_desc *desc, char *isa, int len) { int i; /* Check the string prefix. */ if (strncmp(isa, ISA_PREFIX, ISA_PREFIX_LEN) != 0) { printf("%s: Unrecognized ISA string: %s\n", __func__, isa); return (-1); } i = ISA_PREFIX_LEN; while (i < len) { switch(isa[i]) { case 'a': case 'b': case 'c': case 'd': case 'f': + case 'h': case 'i': case 'm': desc->isa_extensions |= HWCAP_ISA_BIT(isa[i]); i++; break; case 'g': desc->isa_extensions |= HWCAP_ISA_G; i++; break; case 's': /* * XXX: older versions of this string erroneously * indicated supervisor and user mode support as * single-letter extensions. Detect and skip both 's' * and 'u'. */ if (isa[i - 1] != '_' && isa[i + 1] == 'u') { i += 2; continue; } /* * Supervisor-level extension namespace. */ i = parse_ext_s(desc, isa, i, len); break; case 'x': /* * Custom extension namespace. For now, we ignore * these. */ i = parse_ext_x(desc, isa, i, len); break; case 'z': /* * Multi-letter standard extension namespace. */ i = parse_ext_z(desc, isa, i, len); break; case '_': i++; continue; default: /* Unrecognized/unsupported. */ i++; break; } i = parse_ext_version(isa, i, NULL, NULL); } return (0); } #ifdef FDT static void parse_mmu_fdt(struct cpu_desc *desc, phandle_t node) { char mmu[16]; desc->mmu_caps |= MMU_SV39; if (OF_getprop(node, "mmu-type", mmu, sizeof(mmu)) > 0) { if (strcmp(mmu, "riscv,sv48") == 0) desc->mmu_caps |= MMU_SV48; else if (strcmp(mmu, "riscv,sv57") == 0) desc->mmu_caps |= MMU_SV48 | MMU_SV57; } } static void identify_cpu_features_fdt(u_int cpu, struct cpu_desc *desc) { char isa[1024]; phandle_t node; ssize_t len; pcell_t reg; u_int hart; node = OF_finddevice("/cpus"); if (node == -1) { printf("%s: could not find /cpus node in FDT\n", __func__); return; } hart = pcpu_find(cpu)->pc_hart; /* * Locate our current CPU's node in the device-tree, and parse its * contents to detect supported CPU/ISA features and extensions. */ for (node = OF_child(node); node > 0; node = OF_peer(node)) { /* Skip any non-CPU nodes, such as cpu-map. */ if (!ofw_bus_node_is_compatible(node, "riscv")) continue; /* Find this CPU */ if (OF_getencprop(node, "reg", ®, sizeof(reg)) <= 0 || reg != hart) continue; len = OF_getprop(node, "riscv,isa", isa, sizeof(isa)); KASSERT(len <= sizeof(isa), ("ISA string truncated")); if (len == -1) { printf("%s: could not find 'riscv,isa' property " "for CPU %d, hart %u\n", __func__, cpu, hart); return; } /* * The string is specified to be lowercase, but let's be * certain. */ for (int i = 0; i < len; i++) isa[i] = tolower(isa[i]); if (parse_riscv_isa(desc, isa, len) != 0) return; /* Check MMU features. */ parse_mmu_fdt(desc, node); /* We are done. */ break; } if (node <= 0) { printf("%s: could not find FDT node for CPU %u, hart %u\n", __func__, cpu, hart); } } #endif static void identify_cpu_features(u_int cpu, struct cpu_desc *desc) { #ifdef FDT identify_cpu_features_fdt(cpu, desc); #endif } /* * Update kernel/user global state based on the feature parsing results, stored * in desc. * * We keep only the subset of values common to all CPUs. */ static void update_global_capabilities(u_int cpu, struct cpu_desc *desc) { #define UPDATE_CAP(t, v) \ do { \ if (cpu == 0) { \ (t) = (v); \ } else { \ (t) &= (v); \ } \ } while (0) /* Update the capabilities exposed to userspace via AT_HWCAP. */ UPDATE_CAP(elf_hwcap, (u_long)desc->isa_extensions); /* * MMU capabilities, e.g. Sv48. */ UPDATE_CAP(mmu_caps, desc->mmu_caps); /* Supervisor-mode extension support. */ + UPDATE_CAP(has_hyp, (desc->isa_extensions & HWCAP_ISA_H) != 0); UPDATE_CAP(has_sstc, (desc->smode_extensions & SV_SSTC) != 0); UPDATE_CAP(has_sscofpmf, (desc->smode_extensions & SV_SSCOFPMF) != 0); UPDATE_CAP(has_svpbmt, (desc->smode_extensions & SV_SVPBMT) != 0); #undef UPDATE_CAP } static void identify_cpu_ids(struct cpu_desc *desc) { const struct marchid_entry *table = NULL; int i; desc->cpu_mvendor_name = "Unknown"; desc->cpu_march_name = "Unknown"; /* * Search for a recognized vendor, and possibly obtain the secondary * table for marchid lookup. */ for (i = 0; i < nitems(mvendor_ids); i++) { if (mvendorid == mvendor_ids[i].mvendor_id) { desc->cpu_mvendor_name = mvendor_ids[i].mvendor_name; table = mvendor_ids[i].marchid_table; break; } } if (marchid == MARCHID_UNIMPL) { desc->cpu_march_name = "Unspecified"; return; } if (MARCHID_IS_OPENSOURCE(marchid)) { table = global_marchids; } else if (table == NULL) return; for (i = 0; table[i].march_name != NULL; i++) { if (marchid == table[i].march_id) { desc->cpu_march_name = table[i].march_name; break; } } } void identify_cpu(u_int cpu) { struct cpu_desc *desc = &cpu_desc[cpu]; identify_cpu_ids(desc); identify_cpu_features(cpu, desc); update_global_capabilities(cpu, desc); } void printcpuinfo(u_int cpu) { struct cpu_desc *desc; u_int hart; desc = &cpu_desc[cpu]; hart = pcpu_find(cpu)->pc_hart; /* XXX: check this here so we are guaranteed to have console output. */ KASSERT(desc->isa_extensions != 0, ("Empty extension set for CPU %u, did parsing fail?", cpu)); /* * Suppress the output of some fields in the common case of identical * CPU features. */ #define SHOULD_PRINT(_field) \ (cpu == 0 || desc[0]._field != desc[-1]._field) /* Always print summary line. */ printf("CPU %-3u: Vendor=%s Core=%s (Hart %u)\n", cpu, desc->cpu_mvendor_name, desc->cpu_march_name, hart); /* These values are global. */ if (cpu == 0) printf(" marchid=%#lx, mimpid=%#lx\n", marchid, mimpid); if (SHOULD_PRINT(mmu_caps)) { printf(" MMU: %#b\n", desc->mmu_caps, "\020" "\01Sv39" "\02Sv48" "\03Sv57"); } if (SHOULD_PRINT(isa_extensions)) { printf(" ISA: %#b\n", desc->isa_extensions, "\020" "\01Atomic" "\03Compressed" "\04Double" "\06Float" + "\10Hypervisor" "\15Mult/Div"); } if (SHOULD_PRINT(smode_extensions)) { printf(" S-mode Extensions: %#b\n", desc->smode_extensions, "\020" "\01Sstc" "\02Svnapot" "\03Svpbmt" "\04Svinval" "\05Sscofpmf"); } #undef SHOULD_PRINT } diff --git a/sys/riscv/vmm/riscv.h b/sys/riscv/vmm/riscv.h new file mode 100644 index 000000000000..ed4b65003f94 --- /dev/null +++ b/sys/riscv/vmm/riscv.h @@ -0,0 +1,132 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2015 Mihai Carabas + * Copyright (c) 2024 Ruslan Bukin + * + * This software was developed by the University of Cambridge Computer + * Laboratory (Department of Computer Science and Technology) under Innovate + * UK project 105694, "Digital Security by Design (DSbD) Technology Platform + * Prototype". + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef _VMM_RISCV_H_ +#define _VMM_RISCV_H_ + +#include +#include +#include + +struct hypregs { + uint64_t hyp_ra; + uint64_t hyp_sp; + uint64_t hyp_gp; + uint64_t hyp_tp; + uint64_t hyp_t[7]; + uint64_t hyp_s[12]; + uint64_t hyp_a[8]; + uint64_t hyp_sepc; + uint64_t hyp_sstatus; + uint64_t hyp_hstatus; +}; + +struct hypcsr { + uint64_t hvip; + uint64_t vsstatus; + uint64_t vsie; + uint64_t vstvec; + uint64_t vsscratch; + uint64_t vsepc; + uint64_t vscause; + uint64_t vstval; + uint64_t vsatp; + uint64_t scounteren; + uint64_t senvcfg; +}; + +struct hypctx { + struct hypregs host_regs; + struct hypregs guest_regs; + struct hypcsr guest_csrs; + uint64_t host_sscratch; + uint64_t host_stvec; + uint64_t host_scounteren; + uint64_t guest_scounteren; + struct hyp *hyp; + struct vcpu *vcpu; + bool has_exception; + int cpu_id; + int ipi_pending; +}; + +struct hyp { + struct vm *vm; + uint64_t vmid_generation; + bool aplic_attached; + struct aplic *aplic; + struct hypctx *ctx[]; +}; + +struct hyptrap { + uint64_t sepc; + uint64_t scause; + uint64_t stval; + uint64_t htval; + uint64_t htinst; +}; + +#define DEFINE_VMMOPS_IFUNC(ret_type, opname, args) \ + ret_type vmmops_##opname args; + +DEFINE_VMMOPS_IFUNC(int, modinit, (void)) +DEFINE_VMMOPS_IFUNC(int, modcleanup, (void)) +DEFINE_VMMOPS_IFUNC(void *, init, (struct vm *vm, struct pmap *pmap)) +DEFINE_VMMOPS_IFUNC(int, gla2gpa, (void *vcpui, struct vm_guest_paging *paging, + uint64_t gla, int prot, uint64_t *gpa, int *is_fault)) +DEFINE_VMMOPS_IFUNC(int, run, (void *vcpui, register_t pc, struct pmap *pmap, + struct vm_eventinfo *info)) +DEFINE_VMMOPS_IFUNC(void, cleanup, (void *vmi)) +DEFINE_VMMOPS_IFUNC(void *, vcpu_init, (void *vmi, struct vcpu *vcpu, + int vcpu_id)) +DEFINE_VMMOPS_IFUNC(void, vcpu_cleanup, (void *vcpui)) +DEFINE_VMMOPS_IFUNC(int, exception, (void *vcpui, uint64_t scause)) +DEFINE_VMMOPS_IFUNC(int, getreg, (void *vcpui, int num, uint64_t *retval)) +DEFINE_VMMOPS_IFUNC(int, setreg, (void *vcpui, int num, uint64_t val)) +DEFINE_VMMOPS_IFUNC(int, getcap, (void *vcpui, int num, int *retval)) +DEFINE_VMMOPS_IFUNC(int, setcap, (void *vcpui, int num, int val)) +DEFINE_VMMOPS_IFUNC(struct vmspace *, vmspace_alloc, (vm_offset_t min, + vm_offset_t max)) +DEFINE_VMMOPS_IFUNC(void, vmspace_free, (struct vmspace *vmspace)) + +#define dprintf(fmt, ...) + +struct hypctx *riscv_get_active_vcpu(void); +void vmm_switch(struct hypctx *); +void vmm_unpriv_trap(struct hyptrap *, uint64_t tmp); +int vmm_sbi_ecall(struct vcpu *, bool *); + +void riscv_send_ipi(struct hypctx *hypctx, int hart_id); +int riscv_check_ipi(struct hypctx *hypctx, bool clear); + +#endif /* !_VMM_RISCV_H_ */ diff --git a/sys/riscv/vmm/vmm.c b/sys/riscv/vmm/vmm.c new file mode 100644 index 000000000000..52e881dd220c --- /dev/null +++ b/sys/riscv/vmm/vmm.c @@ -0,0 +1,1606 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2015 Mihai Carabas + * Copyright (c) 2024 Ruslan Bukin + * + * This software was developed by the University of Cambridge Computer + * Laboratory (Department of Computer Science and Technology) under Innovate + * UK project 105694, "Digital Security by Design (DSbD) Technology Platform + * Prototype". + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include + +#include "vmm_stat.h" +#include "riscv.h" + +#include "vmm_aplic.h" + +struct vcpu { + int flags; + enum vcpu_state state; + struct mtx mtx; + int hostcpu; /* host cpuid this vcpu last ran on */ + int vcpuid; + void *stats; + struct vm_exit exitinfo; + uint64_t nextpc; /* (x) next instruction to execute */ + struct vm *vm; /* (o) */ + void *cookie; /* (i) cpu-specific data */ + struct fpreg *guestfpu; /* (a,i) guest fpu state */ +}; + +#define vcpu_lock_initialized(v) mtx_initialized(&((v)->mtx)) +#define vcpu_lock_init(v) mtx_init(&((v)->mtx), "vcpu lock", 0, MTX_SPIN) +#define vcpu_lock_destroy(v) mtx_destroy(&((v)->mtx)) +#define vcpu_lock(v) mtx_lock_spin(&((v)->mtx)) +#define vcpu_unlock(v) mtx_unlock_spin(&((v)->mtx)) +#define vcpu_assert_locked(v) mtx_assert(&((v)->mtx), MA_OWNED) + +struct mem_seg { + uint64_t gpa; + size_t len; + bool wired; + bool sysmem; + vm_object_t object; +}; +#define VM_MAX_MEMSEGS 3 + +struct mem_map { + vm_paddr_t gpa; + size_t len; + vm_ooffset_t segoff; + int segid; + int prot; + int flags; +}; +#define VM_MAX_MEMMAPS 4 + +struct vmm_mmio_region { + uint64_t start; + uint64_t end; + mem_region_read_t read; + mem_region_write_t write; +}; +#define VM_MAX_MMIO_REGIONS 4 + +/* + * Initialization: + * (o) initialized the first time the VM is created + * (i) initialized when VM is created and when it is reinitialized + * (x) initialized before use + */ +struct vm { + void *cookie; /* (i) cpu-specific data */ + volatile cpuset_t active_cpus; /* (i) active vcpus */ + volatile cpuset_t debug_cpus; /* (i) vcpus stopped for debug*/ + int suspend; /* (i) stop VM execution */ + bool dying; /* (o) is dying */ + volatile cpuset_t suspended_cpus; /* (i) suspended vcpus */ + volatile cpuset_t halted_cpus; /* (x) cpus in a hard halt */ + struct mem_map mem_maps[VM_MAX_MEMMAPS]; /* (i) guest address space */ + struct mem_seg mem_segs[VM_MAX_MEMSEGS]; /* (o) guest memory regions */ + struct vmspace *vmspace; /* (o) guest's address space */ + char name[VM_MAX_NAMELEN]; /* (o) virtual machine name */ + struct vcpu **vcpu; /* (i) guest vcpus */ + struct vmm_mmio_region mmio_region[VM_MAX_MMIO_REGIONS]; + /* (o) guest MMIO regions */ + /* The following describe the vm cpu topology */ + uint16_t sockets; /* (o) num of sockets */ + uint16_t cores; /* (o) num of cores/socket */ + uint16_t threads; /* (o) num of threads/core */ + uint16_t maxcpus; /* (o) max pluggable cpus */ + struct sx mem_segs_lock; /* (o) */ + struct sx vcpus_init_lock; /* (o) */ +}; + +static bool vmm_initialized = false; + +static MALLOC_DEFINE(M_VMM, "vmm", "vmm"); + +/* statistics */ +static VMM_STAT(VCPU_TOTAL_RUNTIME, "vcpu total runtime"); + +SYSCTL_NODE(_hw, OID_AUTO, vmm, CTLFLAG_RW, NULL, NULL); + +static int vmm_ipinum; +SYSCTL_INT(_hw_vmm, OID_AUTO, ipinum, CTLFLAG_RD, &vmm_ipinum, 0, + "IPI vector used for vcpu notifications"); + +u_int vm_maxcpu; +SYSCTL_UINT(_hw_vmm, OID_AUTO, maxcpu, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, + &vm_maxcpu, 0, "Maximum number of vCPUs"); + +static void vm_free_memmap(struct vm *vm, int ident); +static bool sysmem_mapping(struct vm *vm, struct mem_map *mm); +static void vcpu_notify_event_locked(struct vcpu *vcpu); + +/* + * Upper limit on vm_maxcpu. We could increase this to 28 bits, but this + * is a safe value for now. + */ +#define VM_MAXCPU MIN(0xffff - 1, CPU_SETSIZE) + +static void +vcpu_cleanup(struct vcpu *vcpu, bool destroy) +{ + vmmops_vcpu_cleanup(vcpu->cookie); + vcpu->cookie = NULL; + if (destroy) { + vmm_stat_free(vcpu->stats); + fpu_save_area_free(vcpu->guestfpu); + vcpu_lock_destroy(vcpu); + } +} + +static struct vcpu * +vcpu_alloc(struct vm *vm, int vcpu_id) +{ + struct vcpu *vcpu; + + KASSERT(vcpu_id >= 0 && vcpu_id < vm->maxcpus, + ("vcpu_alloc: invalid vcpu %d", vcpu_id)); + + vcpu = malloc(sizeof(*vcpu), M_VMM, M_WAITOK | M_ZERO); + vcpu_lock_init(vcpu); + vcpu->state = VCPU_IDLE; + vcpu->hostcpu = NOCPU; + vcpu->vcpuid = vcpu_id; + vcpu->vm = vm; + vcpu->guestfpu = fpu_save_area_alloc(); + vcpu->stats = vmm_stat_alloc(); + return (vcpu); +} + +static void +vcpu_init(struct vcpu *vcpu) +{ + vcpu->cookie = vmmops_vcpu_init(vcpu->vm->cookie, vcpu, vcpu->vcpuid); + MPASS(vcpu->cookie != NULL); + fpu_save_area_reset(vcpu->guestfpu); + vmm_stat_init(vcpu->stats); +} + +struct vm_exit * +vm_exitinfo(struct vcpu *vcpu) +{ + return (&vcpu->exitinfo); +} + +static int +vmm_init(void) +{ + + vm_maxcpu = mp_ncpus; + + TUNABLE_INT_FETCH("hw.vmm.maxcpu", &vm_maxcpu); + + if (vm_maxcpu > VM_MAXCPU) { + printf("vmm: vm_maxcpu clamped to %u\n", VM_MAXCPU); + vm_maxcpu = VM_MAXCPU; + } + + if (vm_maxcpu == 0) + vm_maxcpu = 1; + + return (vmmops_modinit()); +} + +static int +vmm_handler(module_t mod, int what, void *arg) +{ + int error; + + switch (what) { + case MOD_LOAD: + /* TODO: check if has_hyp here? */ + vmmdev_init(); + error = vmm_init(); + if (error == 0) + vmm_initialized = true; + break; + case MOD_UNLOAD: + /* TODO: check if has_hyp here? */ + error = vmmdev_cleanup(); + if (error == 0 && vmm_initialized) { + error = vmmops_modcleanup(); + if (error) + vmm_initialized = false; + } + break; + default: + error = 0; + break; + } + return (error); +} + +static moduledata_t vmm_kmod = { + "vmm", + vmm_handler, + NULL +}; + +/* + * vmm initialization has the following dependencies: + * + * - HYP initialization requires smp_rendezvous() and therefore must happen + * after SMP is fully functional (after SI_SUB_SMP). + */ +DECLARE_MODULE(vmm, vmm_kmod, SI_SUB_SMP + 1, SI_ORDER_ANY); +MODULE_VERSION(vmm, 1); + +static void +vm_init(struct vm *vm, bool create) +{ + int i; + + vm->cookie = vmmops_init(vm, vmspace_pmap(vm->vmspace)); + MPASS(vm->cookie != NULL); + + CPU_ZERO(&vm->active_cpus); + CPU_ZERO(&vm->debug_cpus); + + vm->suspend = 0; + CPU_ZERO(&vm->suspended_cpus); + + memset(vm->mmio_region, 0, sizeof(vm->mmio_region)); + + if (!create) { + for (i = 0; i < vm->maxcpus; i++) { + if (vm->vcpu[i] != NULL) + vcpu_init(vm->vcpu[i]); + } + } +} + +void +vm_disable_vcpu_creation(struct vm *vm) +{ + sx_xlock(&vm->vcpus_init_lock); + vm->dying = true; + sx_xunlock(&vm->vcpus_init_lock); +} + +struct vcpu * +vm_alloc_vcpu(struct vm *vm, int vcpuid) +{ + struct vcpu *vcpu; + + if (vcpuid < 0 || vcpuid >= vm_get_maxcpus(vm)) + return (NULL); + + /* Some interrupt controllers may have a CPU limit */ + if (vcpuid >= aplic_max_cpu_count(vm->cookie)) + return (NULL); + + vcpu = (struct vcpu *) + atomic_load_acq_ptr((uintptr_t *)&vm->vcpu[vcpuid]); + if (__predict_true(vcpu != NULL)) + return (vcpu); + + sx_xlock(&vm->vcpus_init_lock); + vcpu = vm->vcpu[vcpuid]; + if (vcpu == NULL && !vm->dying) { + vcpu = vcpu_alloc(vm, vcpuid); + vcpu_init(vcpu); + + /* + * Ensure vCPU is fully created before updating pointer + * to permit unlocked reads above. + */ + atomic_store_rel_ptr((uintptr_t *)&vm->vcpu[vcpuid], + (uintptr_t)vcpu); + } + sx_xunlock(&vm->vcpus_init_lock); + return (vcpu); +} + +void +vm_slock_vcpus(struct vm *vm) +{ + sx_slock(&vm->vcpus_init_lock); +} + +void +vm_unlock_vcpus(struct vm *vm) +{ + sx_unlock(&vm->vcpus_init_lock); +} + +int +vm_create(const char *name, struct vm **retvm) +{ + struct vm *vm; + struct vmspace *vmspace; + + /* + * If vmm.ko could not be successfully initialized then don't attempt + * to create the virtual machine. + */ + if (!vmm_initialized) + return (ENXIO); + + if (name == NULL || strlen(name) >= VM_MAX_NAMELEN) + return (EINVAL); + + vmspace = vmmops_vmspace_alloc(0, 1ul << 39); + if (vmspace == NULL) + return (ENOMEM); + + vm = malloc(sizeof(struct vm), M_VMM, M_WAITOK | M_ZERO); + strcpy(vm->name, name); + vm->vmspace = vmspace; + sx_init(&vm->mem_segs_lock, "vm mem_segs"); + sx_init(&vm->vcpus_init_lock, "vm vcpus"); + + vm->sockets = 1; + vm->cores = 1; /* XXX backwards compatibility */ + vm->threads = 1; /* XXX backwards compatibility */ + vm->maxcpus = vm_maxcpu; + + vm->vcpu = malloc(sizeof(*vm->vcpu) * vm->maxcpus, M_VMM, + M_WAITOK | M_ZERO); + + vm_init(vm, true); + + *retvm = vm; + return (0); +} + +void +vm_get_topology(struct vm *vm, uint16_t *sockets, uint16_t *cores, + uint16_t *threads, uint16_t *maxcpus) +{ + *sockets = vm->sockets; + *cores = vm->cores; + *threads = vm->threads; + *maxcpus = vm->maxcpus; +} + +uint16_t +vm_get_maxcpus(struct vm *vm) +{ + return (vm->maxcpus); +} + +int +vm_set_topology(struct vm *vm, uint16_t sockets, uint16_t cores, + uint16_t threads, uint16_t maxcpus) +{ + /* Ignore maxcpus. */ + if ((sockets * cores * threads) > vm->maxcpus) + return (EINVAL); + vm->sockets = sockets; + vm->cores = cores; + vm->threads = threads; + return(0); +} + +static void +vm_cleanup(struct vm *vm, bool destroy) +{ + struct mem_map *mm; + int i; + + aplic_detach_from_vm(vm->cookie); + + for (i = 0; i < vm->maxcpus; i++) { + if (vm->vcpu[i] != NULL) + vcpu_cleanup(vm->vcpu[i], destroy); + } + + vmmops_cleanup(vm->cookie); + + /* + * System memory is removed from the guest address space only when + * the VM is destroyed. This is because the mapping remains the same + * across VM reset. + * + * Device memory can be relocated by the guest (e.g. using PCI BARs) + * so those mappings are removed on a VM reset. + */ + if (!destroy) { + for (i = 0; i < VM_MAX_MEMMAPS; i++) { + mm = &vm->mem_maps[i]; + if (destroy || !sysmem_mapping(vm, mm)) + vm_free_memmap(vm, i); + } + } + + if (destroy) { + for (i = 0; i < VM_MAX_MEMSEGS; i++) + vm_free_memseg(vm, i); + + vmmops_vmspace_free(vm->vmspace); + vm->vmspace = NULL; + + for (i = 0; i < vm->maxcpus; i++) + free(vm->vcpu[i], M_VMM); + free(vm->vcpu, M_VMM); + sx_destroy(&vm->vcpus_init_lock); + sx_destroy(&vm->mem_segs_lock); + } +} + +void +vm_destroy(struct vm *vm) +{ + + vm_cleanup(vm, true); + + free(vm, M_VMM); +} + +int +vm_reinit(struct vm *vm) +{ + int error; + + /* + * A virtual machine can be reset only if all vcpus are suspended. + */ + if (CPU_CMP(&vm->suspended_cpus, &vm->active_cpus) == 0) { + vm_cleanup(vm, false); + vm_init(vm, false); + error = 0; + } else { + error = EBUSY; + } + + return (error); +} + +const char * +vm_name(struct vm *vm) +{ + return (vm->name); +} + +void +vm_slock_memsegs(struct vm *vm) +{ + sx_slock(&vm->mem_segs_lock); +} + +void +vm_xlock_memsegs(struct vm *vm) +{ + sx_xlock(&vm->mem_segs_lock); +} + +void +vm_unlock_memsegs(struct vm *vm) +{ + sx_unlock(&vm->mem_segs_lock); +} + +/* + * Return 'true' if 'gpa' is allocated in the guest address space. + * + * This function is called in the context of a running vcpu which acts as + * an implicit lock on 'vm->mem_maps[]'. + */ +bool +vm_mem_allocated(struct vcpu *vcpu, vm_paddr_t gpa) +{ + struct vm *vm = vcpu->vm; + struct mem_map *mm; + int i; + +#ifdef INVARIANTS + int hostcpu, state; + state = vcpu_get_state(vcpu, &hostcpu); + KASSERT(state == VCPU_RUNNING && hostcpu == curcpu, + ("%s: invalid vcpu state %d/%d", __func__, state, hostcpu)); +#endif + + for (i = 0; i < VM_MAX_MEMMAPS; i++) { + mm = &vm->mem_maps[i]; + if (mm->len != 0 && gpa >= mm->gpa && gpa < mm->gpa + mm->len) + return (true); /* 'gpa' is sysmem or devmem */ + } + + return (false); +} + +int +vm_alloc_memseg(struct vm *vm, int ident, size_t len, bool sysmem) +{ + struct mem_seg *seg; + vm_object_t obj; + + sx_assert(&vm->mem_segs_lock, SX_XLOCKED); + + if (ident < 0 || ident >= VM_MAX_MEMSEGS) + return (EINVAL); + + if (len == 0 || (len & PAGE_MASK)) + return (EINVAL); + + seg = &vm->mem_segs[ident]; + if (seg->object != NULL) { + if (seg->len == len && seg->sysmem == sysmem) + return (EEXIST); + else + return (EINVAL); + } + + obj = vm_object_allocate(OBJT_DEFAULT, len >> PAGE_SHIFT); + if (obj == NULL) + return (ENOMEM); + + seg->len = len; + seg->object = obj; + seg->sysmem = sysmem; + return (0); +} + +int +vm_get_memseg(struct vm *vm, int ident, size_t *len, bool *sysmem, + vm_object_t *objptr) +{ + struct mem_seg *seg; + + sx_assert(&vm->mem_segs_lock, SX_LOCKED); + + if (ident < 0 || ident >= VM_MAX_MEMSEGS) + return (EINVAL); + + seg = &vm->mem_segs[ident]; + if (len) + *len = seg->len; + if (sysmem) + *sysmem = seg->sysmem; + if (objptr) + *objptr = seg->object; + return (0); +} + +void +vm_free_memseg(struct vm *vm, int ident) +{ + struct mem_seg *seg; + + KASSERT(ident >= 0 && ident < VM_MAX_MEMSEGS, + ("%s: invalid memseg ident %d", __func__, ident)); + + seg = &vm->mem_segs[ident]; + if (seg->object != NULL) { + vm_object_deallocate(seg->object); + bzero(seg, sizeof(struct mem_seg)); + } +} + +int +vm_mmap_memseg(struct vm *vm, vm_paddr_t gpa, int segid, vm_ooffset_t first, + size_t len, int prot, int flags) +{ + struct mem_seg *seg; + struct mem_map *m, *map; + vm_ooffset_t last; + int i, error; + + dprintf("%s: gpa %lx first %lx len %lx\n", __func__, gpa, first, len); + + if (prot == 0 || (prot & ~(VM_PROT_ALL)) != 0) + return (EINVAL); + + if (flags & ~VM_MEMMAP_F_WIRED) + return (EINVAL); + + if (segid < 0 || segid >= VM_MAX_MEMSEGS) + return (EINVAL); + + seg = &vm->mem_segs[segid]; + if (seg->object == NULL) + return (EINVAL); + + last = first + len; + if (first < 0 || first >= last || last > seg->len) + return (EINVAL); + + if ((gpa | first | last) & PAGE_MASK) + return (EINVAL); + + map = NULL; + for (i = 0; i < VM_MAX_MEMMAPS; i++) { + m = &vm->mem_maps[i]; + if (m->len == 0) { + map = m; + break; + } + } + + if (map == NULL) + return (ENOSPC); + + error = vm_map_find(&vm->vmspace->vm_map, seg->object, first, &gpa, + len, 0, VMFS_NO_SPACE, prot, prot, 0); + if (error != KERN_SUCCESS) + return (EFAULT); + + vm_object_reference(seg->object); + + if (flags & VM_MEMMAP_F_WIRED) { + error = vm_map_wire(&vm->vmspace->vm_map, gpa, gpa + len, + VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES); + if (error != KERN_SUCCESS) { + vm_map_remove(&vm->vmspace->vm_map, gpa, gpa + len); + return (error == KERN_RESOURCE_SHORTAGE ? ENOMEM : + EFAULT); + } + } + + map->gpa = gpa; + map->len = len; + map->segoff = first; + map->segid = segid; + map->prot = prot; + map->flags = flags; + return (0); +} + +int +vm_munmap_memseg(struct vm *vm, vm_paddr_t gpa, size_t len) +{ + struct mem_map *m; + int i; + + dprintf("%s: gpa %lx len %lx\n", __func__, gpa, len); + + for (i = 0; i < VM_MAX_MEMMAPS; i++) { + m = &vm->mem_maps[i]; + if (m->gpa == gpa && m->len == len) { + vm_free_memmap(vm, i); + return (0); + } + } + + return (EINVAL); +} + +int +vm_mmap_getnext(struct vm *vm, vm_paddr_t *gpa, int *segid, + vm_ooffset_t *segoff, size_t *len, int *prot, int *flags) +{ + struct mem_map *mm, *mmnext; + int i; + + mmnext = NULL; + for (i = 0; i < VM_MAX_MEMMAPS; i++) { + mm = &vm->mem_maps[i]; + if (mm->len == 0 || mm->gpa < *gpa) + continue; + if (mmnext == NULL || mm->gpa < mmnext->gpa) + mmnext = mm; + } + + if (mmnext != NULL) { + *gpa = mmnext->gpa; + if (segid) + *segid = mmnext->segid; + if (segoff) + *segoff = mmnext->segoff; + if (len) + *len = mmnext->len; + if (prot) + *prot = mmnext->prot; + if (flags) + *flags = mmnext->flags; + return (0); + } else { + return (ENOENT); + } +} + +static void +vm_free_memmap(struct vm *vm, int ident) +{ + struct mem_map *mm; + int error __diagused; + + mm = &vm->mem_maps[ident]; + if (mm->len) { + error = vm_map_remove(&vm->vmspace->vm_map, mm->gpa, + mm->gpa + mm->len); + KASSERT(error == KERN_SUCCESS, ("%s: vm_map_remove error %d", + __func__, error)); + bzero(mm, sizeof(struct mem_map)); + } +} + +static __inline bool +sysmem_mapping(struct vm *vm, struct mem_map *mm) +{ + + if (mm->len != 0 && vm->mem_segs[mm->segid].sysmem) + return (true); + else + return (false); +} + +vm_paddr_t +vmm_sysmem_maxaddr(struct vm *vm) +{ + struct mem_map *mm; + vm_paddr_t maxaddr; + int i; + + maxaddr = 0; + for (i = 0; i < VM_MAX_MEMMAPS; i++) { + mm = &vm->mem_maps[i]; + if (sysmem_mapping(vm, mm)) { + if (maxaddr < mm->gpa + mm->len) + maxaddr = mm->gpa + mm->len; + } + } + return (maxaddr); +} + +int +vm_gla2gpa_nofault(struct vcpu *vcpu, struct vm_guest_paging *paging, + uint64_t gla, int prot, uint64_t *gpa, int *is_fault) +{ + int error; + + error = vmmops_gla2gpa(vcpu->cookie, paging, gla, prot, gpa, is_fault); + + return (error); +} + +void +vm_register_inst_handler(struct vm *vm, uint64_t start, uint64_t size, + mem_region_read_t mmio_read, mem_region_write_t mmio_write) +{ + int i; + + for (i = 0; i < nitems(vm->mmio_region); i++) { + if (vm->mmio_region[i].start == 0 && + vm->mmio_region[i].end == 0) { + vm->mmio_region[i].start = start; + vm->mmio_region[i].end = start + size; + vm->mmio_region[i].read = mmio_read; + vm->mmio_region[i].write = mmio_write; + return; + } + } + + panic("%s: No free MMIO region", __func__); +} + +void +vm_deregister_inst_handler(struct vm *vm, uint64_t start, uint64_t size) +{ + int i; + + for (i = 0; i < nitems(vm->mmio_region); i++) { + if (vm->mmio_region[i].start == start && + vm->mmio_region[i].end == start + size) { + memset(&vm->mmio_region[i], 0, + sizeof(vm->mmio_region[i])); + return; + } + } + + panic("%s: Invalid MMIO region: %lx - %lx", __func__, start, + start + size); +} + +static int +vm_handle_inst_emul(struct vcpu *vcpu, bool *retu) +{ + struct vm *vm; + struct vm_exit *vme; + struct vie *vie; + struct hyp *hyp; + uint64_t fault_ipa; + struct vm_guest_paging *paging; + struct vmm_mmio_region *vmr; + int error, i; + + vm = vcpu->vm; + hyp = vm->cookie; + if (!hyp->aplic_attached) + goto out_user; + + vme = &vcpu->exitinfo; + vie = &vme->u.inst_emul.vie; + paging = &vme->u.inst_emul.paging; + + fault_ipa = vme->u.inst_emul.gpa; + + vmr = NULL; + for (i = 0; i < nitems(vm->mmio_region); i++) { + if (vm->mmio_region[i].start <= fault_ipa && + vm->mmio_region[i].end > fault_ipa) { + vmr = &vm->mmio_region[i]; + break; + } + } + if (vmr == NULL) + goto out_user; + + error = vmm_emulate_instruction(vcpu, fault_ipa, vie, paging, + vmr->read, vmr->write, retu); + return (error); + +out_user: + *retu = true; + return (0); +} + +int +vm_suspend(struct vm *vm, enum vm_suspend_how how) +{ + int i; + + if (how <= VM_SUSPEND_NONE || how >= VM_SUSPEND_LAST) + return (EINVAL); + + if (atomic_cmpset_int(&vm->suspend, 0, how) == 0) { + VM_CTR2(vm, "virtual machine already suspended %d/%d", + vm->suspend, how); + return (EALREADY); + } + + VM_CTR1(vm, "virtual machine successfully suspended %d", how); + + /* + * Notify all active vcpus that they are now suspended. + */ + for (i = 0; i < vm->maxcpus; i++) { + if (CPU_ISSET(i, &vm->active_cpus)) + vcpu_notify_event(vm_vcpu(vm, i)); + } + + return (0); +} + +void +vm_exit_suspended(struct vcpu *vcpu, uint64_t pc) +{ + struct vm *vm = vcpu->vm; + struct vm_exit *vmexit; + + KASSERT(vm->suspend > VM_SUSPEND_NONE && vm->suspend < VM_SUSPEND_LAST, + ("vm_exit_suspended: invalid suspend type %d", vm->suspend)); + + vmexit = vm_exitinfo(vcpu); + vmexit->pc = pc; + vmexit->inst_length = 4; + vmexit->exitcode = VM_EXITCODE_SUSPENDED; + vmexit->u.suspended.how = vm->suspend; +} + +void +vm_exit_debug(struct vcpu *vcpu, uint64_t pc) +{ + struct vm_exit *vmexit; + + vmexit = vm_exitinfo(vcpu); + vmexit->pc = pc; + vmexit->inst_length = 4; + vmexit->exitcode = VM_EXITCODE_DEBUG; +} + +int +vm_activate_cpu(struct vcpu *vcpu) +{ + struct vm *vm = vcpu->vm; + + if (CPU_ISSET(vcpu->vcpuid, &vm->active_cpus)) + return (EBUSY); + + CPU_SET_ATOMIC(vcpu->vcpuid, &vm->active_cpus); + return (0); + +} + +int +vm_suspend_cpu(struct vm *vm, struct vcpu *vcpu) +{ + if (vcpu == NULL) { + vm->debug_cpus = vm->active_cpus; + for (int i = 0; i < vm->maxcpus; i++) { + if (CPU_ISSET(i, &vm->active_cpus)) + vcpu_notify_event(vm_vcpu(vm, i)); + } + } else { + if (!CPU_ISSET(vcpu->vcpuid, &vm->active_cpus)) + return (EINVAL); + + CPU_SET_ATOMIC(vcpu->vcpuid, &vm->debug_cpus); + vcpu_notify_event(vcpu); + } + return (0); +} + +int +vm_resume_cpu(struct vm *vm, struct vcpu *vcpu) +{ + + if (vcpu == NULL) { + CPU_ZERO(&vm->debug_cpus); + } else { + if (!CPU_ISSET(vcpu->vcpuid, &vm->debug_cpus)) + return (EINVAL); + + CPU_CLR_ATOMIC(vcpu->vcpuid, &vm->debug_cpus); + } + return (0); +} + +int +vcpu_debugged(struct vcpu *vcpu) +{ + + return (CPU_ISSET(vcpu->vcpuid, &vcpu->vm->debug_cpus)); +} + +cpuset_t +vm_active_cpus(struct vm *vm) +{ + + return (vm->active_cpus); +} + +cpuset_t +vm_debug_cpus(struct vm *vm) +{ + + return (vm->debug_cpus); +} + +cpuset_t +vm_suspended_cpus(struct vm *vm) +{ + + return (vm->suspended_cpus); +} + + +void * +vcpu_stats(struct vcpu *vcpu) +{ + + return (vcpu->stats); +} + +/* + * This function is called to ensure that a vcpu "sees" a pending event + * as soon as possible: + * - If the vcpu thread is sleeping then it is woken up. + * - If the vcpu is running on a different host_cpu then an IPI will be directed + * to the host_cpu to cause the vcpu to trap into the hypervisor. + */ +static void +vcpu_notify_event_locked(struct vcpu *vcpu) +{ + int hostcpu; + + hostcpu = vcpu->hostcpu; + if (vcpu->state == VCPU_RUNNING) { + KASSERT(hostcpu != NOCPU, ("vcpu running on invalid hostcpu")); + if (hostcpu != curcpu) { + ipi_cpu(hostcpu, vmm_ipinum); + } else { + /* + * If the 'vcpu' is running on 'curcpu' then it must + * be sending a notification to itself (e.g. SELF_IPI). + * The pending event will be picked up when the vcpu + * transitions back to guest context. + */ + } + } else { + KASSERT(hostcpu == NOCPU, ("vcpu state %d not consistent " + "with hostcpu %d", vcpu->state, hostcpu)); + if (vcpu->state == VCPU_SLEEPING) + wakeup_one(vcpu); + } +} + +void +vcpu_notify_event(struct vcpu *vcpu) +{ + vcpu_lock(vcpu); + vcpu_notify_event_locked(vcpu); + vcpu_unlock(vcpu); +} + +static void +restore_guest_fpustate(struct vcpu *vcpu) +{ + + /* Flush host state to the pcb. */ + fpe_state_save(curthread); + + /* Ensure the VFP state will be re-loaded when exiting the guest. */ + PCPU_SET(fpcurthread, NULL); + + /* restore guest FPU state */ + fpe_enable(); + fpe_restore(vcpu->guestfpu); + + /* + * The FPU is now "dirty" with the guest's state so turn on emulation + * to trap any access to the FPU by the host. + */ + fpe_disable(); +} + +static void +save_guest_fpustate(struct vcpu *vcpu) +{ + + /* Save guest FPE state. */ + fpe_enable(); + fpe_store(vcpu->guestfpu); + fpe_disable(); + + KASSERT(PCPU_GET(fpcurthread) == NULL, + ("%s: fpcurthread set with guest registers", __func__)); +} + +static int +vcpu_set_state_locked(struct vcpu *vcpu, enum vcpu_state newstate, + bool from_idle) +{ + int error; + + vcpu_assert_locked(vcpu); + + /* + * State transitions from the vmmdev_ioctl() must always begin from + * the VCPU_IDLE state. This guarantees that there is only a single + * ioctl() operating on a vcpu at any point. + */ + if (from_idle) { + while (vcpu->state != VCPU_IDLE) { + vcpu_notify_event_locked(vcpu); + msleep_spin(&vcpu->state, &vcpu->mtx, "vmstat", + hz / 1000); + } + } else { + KASSERT(vcpu->state != VCPU_IDLE, ("invalid transition from " + "vcpu idle state")); + } + + if (vcpu->state == VCPU_RUNNING) { + KASSERT(vcpu->hostcpu == curcpu, ("curcpu %d and hostcpu %d " + "mismatch for running vcpu", curcpu, vcpu->hostcpu)); + } else { + KASSERT(vcpu->hostcpu == NOCPU, ("Invalid hostcpu %d for a " + "vcpu that is not running", vcpu->hostcpu)); + } + + /* + * The following state transitions are allowed: + * IDLE -> FROZEN -> IDLE + * FROZEN -> RUNNING -> FROZEN + * FROZEN -> SLEEPING -> FROZEN + */ + switch (vcpu->state) { + case VCPU_IDLE: + case VCPU_RUNNING: + case VCPU_SLEEPING: + error = (newstate != VCPU_FROZEN); + break; + case VCPU_FROZEN: + error = (newstate == VCPU_FROZEN); + break; + default: + error = 1; + break; + } + + if (error) + return (EBUSY); + + vcpu->state = newstate; + if (newstate == VCPU_RUNNING) + vcpu->hostcpu = curcpu; + else + vcpu->hostcpu = NOCPU; + + if (newstate == VCPU_IDLE) + wakeup(&vcpu->state); + + return (0); +} + +static void +vcpu_require_state(struct vcpu *vcpu, enum vcpu_state newstate) +{ + int error; + + if ((error = vcpu_set_state(vcpu, newstate, false)) != 0) + panic("Error %d setting state to %d\n", error, newstate); +} + +static void +vcpu_require_state_locked(struct vcpu *vcpu, enum vcpu_state newstate) +{ + int error; + + if ((error = vcpu_set_state_locked(vcpu, newstate, false)) != 0) + panic("Error %d setting state to %d", error, newstate); +} + +int +vm_get_capability(struct vcpu *vcpu, int type, int *retval) +{ + + if (type < 0 || type >= VM_CAP_MAX) + return (EINVAL); + + return (vmmops_getcap(vcpu->cookie, type, retval)); +} + +int +vm_set_capability(struct vcpu *vcpu, int type, int val) +{ + + if (type < 0 || type >= VM_CAP_MAX) + return (EINVAL); + + return (vmmops_setcap(vcpu->cookie, type, val)); +} + +struct vm * +vcpu_vm(struct vcpu *vcpu) +{ + + return (vcpu->vm); +} + +int +vcpu_vcpuid(struct vcpu *vcpu) +{ + + return (vcpu->vcpuid); +} + +void * +vcpu_get_cookie(struct vcpu *vcpu) +{ + + return (vcpu->cookie); +} + +struct vcpu * +vm_vcpu(struct vm *vm, int vcpuid) +{ + + return (vm->vcpu[vcpuid]); +} + +int +vcpu_set_state(struct vcpu *vcpu, enum vcpu_state newstate, bool from_idle) +{ + int error; + + vcpu_lock(vcpu); + error = vcpu_set_state_locked(vcpu, newstate, from_idle); + vcpu_unlock(vcpu); + + return (error); +} + +enum vcpu_state +vcpu_get_state(struct vcpu *vcpu, int *hostcpu) +{ + enum vcpu_state state; + + vcpu_lock(vcpu); + state = vcpu->state; + if (hostcpu != NULL) + *hostcpu = vcpu->hostcpu; + vcpu_unlock(vcpu); + + return (state); +} + +static void * +_vm_gpa_hold(struct vm *vm, vm_paddr_t gpa, size_t len, int reqprot, + void **cookie) +{ + int i, count, pageoff; + struct mem_map *mm; + vm_page_t m; + + pageoff = gpa & PAGE_MASK; + if (len > PAGE_SIZE - pageoff) + panic("vm_gpa_hold: invalid gpa/len: 0x%016lx/%lu", gpa, len); + + count = 0; + for (i = 0; i < VM_MAX_MEMMAPS; i++) { + mm = &vm->mem_maps[i]; + if (sysmem_mapping(vm, mm) && gpa >= mm->gpa && + gpa < mm->gpa + mm->len) { + count = vm_fault_quick_hold_pages(&vm->vmspace->vm_map, + trunc_page(gpa), PAGE_SIZE, reqprot, &m, 1); + break; + } + } + + if (count == 1) { + *cookie = m; + return ((void *)(PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)) + pageoff)); + } else { + *cookie = NULL; + return (NULL); + } +} + +void * +vm_gpa_hold(struct vcpu *vcpu, vm_paddr_t gpa, size_t len, int reqprot, + void **cookie) +{ +#ifdef INVARIANTS + /* + * The current vcpu should be frozen to ensure 'vm_memmap[]' + * stability. + */ + int state = vcpu_get_state(vcpu, NULL); + KASSERT(state == VCPU_FROZEN, ("%s: invalid vcpu state %d", + __func__, state)); +#endif + return (_vm_gpa_hold(vcpu->vm, gpa, len, reqprot, cookie)); +} + +void * +vm_gpa_hold_global(struct vm *vm, vm_paddr_t gpa, size_t len, int reqprot, + void **cookie) +{ + sx_assert(&vm->mem_segs_lock, SX_LOCKED); + return (_vm_gpa_hold(vm, gpa, len, reqprot, cookie)); +} + +void +vm_gpa_release(void *cookie) +{ + vm_page_t m = cookie; + + vm_page_unwire(m, PQ_ACTIVE); +} + +int +vm_get_register(struct vcpu *vcpu, int reg, uint64_t *retval) +{ + + if (reg >= VM_REG_LAST) + return (EINVAL); + + return (vmmops_getreg(vcpu->cookie, reg, retval)); +} + +int +vm_set_register(struct vcpu *vcpu, int reg, uint64_t val) +{ + int error; + + if (reg >= VM_REG_LAST) + return (EINVAL); + error = vmmops_setreg(vcpu->cookie, reg, val); + if (error || reg != VM_REG_GUEST_SEPC) + return (error); + + vcpu->nextpc = val; + + return (0); +} + +void * +vm_get_cookie(struct vm *vm) +{ + + return (vm->cookie); +} + +int +vm_inject_exception(struct vcpu *vcpu, uint64_t scause) +{ + + return (vmmops_exception(vcpu->cookie, scause)); +} + +int +vm_attach_aplic(struct vm *vm, struct vm_aplic_descr *descr) +{ + + return (aplic_attach_to_vm(vm->cookie, descr)); +} + +int +vm_assert_irq(struct vm *vm, uint32_t irq) +{ + + return (aplic_inject_irq(vm->cookie, -1, irq, true)); +} + +int +vm_deassert_irq(struct vm *vm, uint32_t irq) +{ + + return (aplic_inject_irq(vm->cookie, -1, irq, false)); +} + +int +vm_raise_msi(struct vm *vm, uint64_t msg, uint64_t addr, int bus, int slot, + int func) +{ + + return (aplic_inject_msi(vm->cookie, msg, addr)); +} + +static int +vm_handle_wfi(struct vcpu *vcpu, struct vm_exit *vme, bool *retu) +{ + + vcpu_lock(vcpu); + + while (1) { + if (aplic_check_pending(vcpu->cookie)) + break; + + if (riscv_check_ipi(vcpu->cookie, false)) + break; + + if (vcpu_should_yield(vcpu)) + break; + + vcpu_require_state_locked(vcpu, VCPU_SLEEPING); + /* + * XXX msleep_spin() cannot be interrupted by signals so + * wake up periodically to check pending signals. + */ + msleep_spin(vcpu, &vcpu->mtx, "vmidle", hz / 1000); + vcpu_require_state_locked(vcpu, VCPU_FROZEN); + } + vcpu_unlock(vcpu); + + *retu = false; + + return (0); +} + +static int +vm_handle_paging(struct vcpu *vcpu, bool *retu) +{ + struct vm *vm; + struct vm_exit *vme; + struct vm_map *map; + uint64_t addr; + pmap_t pmap; + int ftype, rv; + + vm = vcpu->vm; + vme = &vcpu->exitinfo; + + pmap = vmspace_pmap(vm->vmspace); + addr = (vme->htval << 2) & ~(PAGE_SIZE - 1); + + dprintf("%s: %lx\n", __func__, addr); + + switch (vme->scause) { + case SCAUSE_STORE_GUEST_PAGE_FAULT: + ftype = VM_PROT_WRITE; + break; + case SCAUSE_FETCH_GUEST_PAGE_FAULT: + ftype = VM_PROT_EXECUTE; + break; + case SCAUSE_LOAD_GUEST_PAGE_FAULT: + ftype = VM_PROT_READ; + break; + default: + panic("unknown page trap: %lu", vme->scause); + } + + /* The page exists, but the page table needs to be updated. */ + if (pmap_fault(pmap, addr, ftype)) + return (0); + + map = &vm->vmspace->vm_map; + rv = vm_fault(map, addr, ftype, VM_FAULT_NORMAL, NULL); + if (rv != KERN_SUCCESS) { + printf("%s: vm_fault failed, addr %lx, ftype %d, err %d\n", + __func__, addr, ftype, rv); + return (EFAULT); + } + + return (0); +} + +static int +vm_handle_suspend(struct vcpu *vcpu, bool *retu) +{ + struct vm *vm = vcpu->vm; + int error, i; + struct thread *td; + + error = 0; + td = curthread; + + CPU_SET_ATOMIC(vcpu->vcpuid, &vm->suspended_cpus); + + /* + * Wait until all 'active_cpus' have suspended themselves. + * + * Since a VM may be suspended at any time including when one or + * more vcpus are doing a rendezvous we need to call the rendezvous + * handler while we are waiting to prevent a deadlock. + */ + vcpu_lock(vcpu); + while (error == 0) { + if (CPU_CMP(&vm->suspended_cpus, &vm->active_cpus) == 0) + break; + + vcpu_require_state_locked(vcpu, VCPU_SLEEPING); + msleep_spin(vcpu, &vcpu->mtx, "vmsusp", hz); + vcpu_require_state_locked(vcpu, VCPU_FROZEN); + if (td_ast_pending(td, TDA_SUSPEND)) { + vcpu_unlock(vcpu); + error = thread_check_susp(td, false); + vcpu_lock(vcpu); + } + } + vcpu_unlock(vcpu); + + /* + * Wakeup the other sleeping vcpus and return to userspace. + */ + for (i = 0; i < vm->maxcpus; i++) { + if (CPU_ISSET(i, &vm->suspended_cpus)) { + vcpu_notify_event(vm_vcpu(vm, i)); + } + } + + *retu = true; + return (error); +} + +int +vm_run(struct vcpu *vcpu) +{ + struct vm_eventinfo evinfo; + struct vm_exit *vme; + struct vm *vm; + pmap_t pmap; + int error; + int vcpuid; + bool retu; + + vm = vcpu->vm; + + dprintf("%s\n", __func__); + + vcpuid = vcpu->vcpuid; + + if (!CPU_ISSET(vcpuid, &vm->active_cpus)) + return (EINVAL); + + if (CPU_ISSET(vcpuid, &vm->suspended_cpus)) + return (EINVAL); + + pmap = vmspace_pmap(vm->vmspace); + vme = &vcpu->exitinfo; + evinfo.rptr = NULL; + evinfo.sptr = &vm->suspend; + evinfo.iptr = NULL; +restart: + critical_enter(); + + restore_guest_fpustate(vcpu); + + vcpu_require_state(vcpu, VCPU_RUNNING); + error = vmmops_run(vcpu->cookie, vcpu->nextpc, pmap, &evinfo); + vcpu_require_state(vcpu, VCPU_FROZEN); + + save_guest_fpustate(vcpu); + + critical_exit(); + + if (error == 0) { + retu = false; + switch (vme->exitcode) { + case VM_EXITCODE_INST_EMUL: + vcpu->nextpc = vme->pc + vme->inst_length; + error = vm_handle_inst_emul(vcpu, &retu); + break; + case VM_EXITCODE_WFI: + vcpu->nextpc = vme->pc + vme->inst_length; + error = vm_handle_wfi(vcpu, vme, &retu); + break; + case VM_EXITCODE_ECALL: + /* Handle in userland. */ + vcpu->nextpc = vme->pc + vme->inst_length; + retu = true; + break; + case VM_EXITCODE_PAGING: + vcpu->nextpc = vme->pc; + error = vm_handle_paging(vcpu, &retu); + break; + case VM_EXITCODE_BOGUS: + vcpu->nextpc = vme->pc; + retu = false; + error = 0; + break; + case VM_EXITCODE_SUSPENDED: + vcpu->nextpc = vme->pc; + error = vm_handle_suspend(vcpu, &retu); + break; + default: + /* Handle in userland. */ + vcpu->nextpc = vme->pc; + retu = true; + break; + } + } + + if (error == 0 && retu == false) + goto restart; + + return (error); +} diff --git a/sys/riscv/vmm/vmm_aplic.c b/sys/riscv/vmm/vmm_aplic.c new file mode 100644 index 000000000000..60da6b4a27fb --- /dev/null +++ b/sys/riscv/vmm/vmm_aplic.c @@ -0,0 +1,528 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2024 Ruslan Bukin + * + * This software was developed by the University of Cambridge Computer + * Laboratory (Department of Computer Science and Technology) under Innovate + * UK project 105694, "Digital Security by Design (DSbD) Technology Platform + * Prototype". + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include + +MALLOC_DEFINE(M_APLIC, "RISC-V VMM APLIC", "RISC-V AIA APLIC"); + +#define APLIC_DOMAINCFG 0x0000 +#define DOMAINCFG_IE (1 << 8) /* Interrupt Enable. */ +#define DOMAINCFG_DM (1 << 2) /* Direct Mode. */ +#define DOMAINCFG_BE (1 << 0) /* Big-Endian. */ +#define APLIC_SOURCECFG(x) (0x0004 + ((x) - 1) * 4) +#define SOURCECFG_D (1 << 10) /* D - Delegate. */ +/* If D == 0. */ +#define SOURCECFG_SM_S (0) +#define SOURCECFG_SM_M (0x7 << SOURCECFG_SM_S) +#define SOURCECFG_SM_INACTIVE (0) /* Not delegated. */ +#define SOURCECFG_SM_DETACHED (1) +#define SOURCECFG_SM_RESERVED (2) +#define SOURCECFG_SM_RESERVED1 (3) +#define SOURCECFG_SM_EDGE1 (4) /* Rising edge. */ +#define SOURCECFG_SM_EDGE0 (5) /* Falling edge. */ +#define SOURCECFG_SM_LEVEL1 (6) /* High. */ +#define SOURCECFG_SM_LEVEL0 (7) /* Low. */ +/* If D == 1. */ +#define SOURCECFG_CHILD_INDEX_S (0) +#define SOURCECFG_CHILD_INDEX_M (0x3ff << SOURCECFG_CHILD_INDEX_S) +#define APLIC_SETIPNUM 0x1cdc +#define APLIC_CLRIPNUM 0x1ddc +#define APLIC_SETIENUM 0x1edc +#define APLIC_CLRIENUM 0x1fdc +#define APLIC_GENMSI 0x3000 +#define APLIC_TARGET(x) (0x3004 + ((x) - 1) * 4) +#define TARGET_HART_S 18 +#define TARGET_HART_M 0x3fff +#define APLIC_IDC(x) (0x4000 + (x) * 32) +#define IDC_IDELIVERY(x) (APLIC_IDC(x) + 0x0) +#define IDC_IFORCE(x) (APLIC_IDC(x) + 0x4) +#define IDC_ITHRESHOLD(x) (APLIC_IDC(x) + 0x8) +#define IDC_TOPI(x) (APLIC_IDC(x) + 0x18) +#define IDC_CLAIMI(x) (APLIC_IDC(x) + 0x1C) +#define CLAIMI_IRQ_S (16) +#define CLAIMI_IRQ_M (0x3ff << CLAIMI_IRQ_S) +#define CLAIMI_PRIO_S (0) +#define CLAIMI_PRIO_M (0xff << CLAIMI_PRIO_S) + +#define APLIC_NIRQS 63 + +struct aplic_irq { + uint32_t sourcecfg; + uint32_t state; +#define APLIC_IRQ_STATE_PENDING (1 << 0) +#define APLIC_IRQ_STATE_ENABLED (1 << 1) + uint32_t target; + uint32_t target_hart; +}; + +struct aplic { + uint32_t mem_start; + uint32_t mem_end; + struct mtx mtx; + struct aplic_irq *irqs; + int nirqs; + uint32_t domaincfg; +}; + +static int +aplic_handle_sourcecfg(struct aplic *aplic, int i, bool write, uint64_t *val) +{ + struct aplic_irq *irq; + + if (i <= 0 || i > aplic->nirqs) + return (ENOENT); + + mtx_lock_spin(&aplic->mtx); + irq = &aplic->irqs[i]; + if (write) + irq->sourcecfg = *val; + else + *val = irq->sourcecfg; + mtx_unlock_spin(&aplic->mtx); + + return (0); +} + +static int +aplic_set_enabled(struct aplic *aplic, bool write, uint64_t *val, bool enabled) +{ + struct aplic_irq *irq; + int i; + + if (!write) { + *val = 0; + return (0); + } + + i = *val; + if (i <= 0 || i > aplic->nirqs) + return (-1); + + irq = &aplic->irqs[i]; + + mtx_lock_spin(&aplic->mtx); + if (enabled) + irq->state |= APLIC_IRQ_STATE_ENABLED; + else + irq->state &= ~APLIC_IRQ_STATE_ENABLED; + mtx_unlock_spin(&aplic->mtx); + + return (0); +} + +static int +aplic_handle_target(struct aplic *aplic, int i, bool write, uint64_t *val) +{ + struct aplic_irq *irq; + + mtx_lock_spin(&aplic->mtx); + irq = &aplic->irqs[i]; + if (write) { + irq->target = *val; + irq->target_hart = (irq->target >> TARGET_HART_S); + } else + *val = irq->target; + mtx_unlock_spin(&aplic->mtx); + + return (0); +} + +static int +aplic_handle_idc_claimi(struct hyp *hyp, struct aplic *aplic, int cpu_id, + bool write, uint64_t *val) +{ + struct aplic_irq *irq; + bool found; + int i; + + /* Writes to claimi are ignored. */ + if (write) + return (-1); + + found = false; + + mtx_lock_spin(&aplic->mtx); + for (i = 0; i < aplic->nirqs; i++) { + irq = &aplic->irqs[i]; + if (irq->target_hart != cpu_id) + continue; + if (irq->state & APLIC_IRQ_STATE_PENDING) { + *val = (i << CLAIMI_IRQ_S) | (0 << CLAIMI_PRIO_S); + irq->state &= ~APLIC_IRQ_STATE_PENDING; + found = true; + break; + } + } + mtx_unlock_spin(&aplic->mtx); + + if (found == false) + *val = 0; + + return (0); +} + +static int +aplic_handle_idc(struct hyp *hyp, struct aplic *aplic, int cpu, int reg, + bool write, uint64_t *val) +{ + int error; + + switch (reg + APLIC_IDC(0)) { + case IDC_IDELIVERY(0): + case IDC_IFORCE(0): + case IDC_ITHRESHOLD(0): + case IDC_TOPI(0): + error = 0; + break; + case IDC_CLAIMI(0): + error = aplic_handle_idc_claimi(hyp, aplic, cpu, write, val); + break; + default: + error = ENOENT; + } + + return (error); +} + +static int +aplic_mmio_access(struct hyp *hyp, struct aplic *aplic, uint64_t reg, + bool write, uint64_t *val) +{ + int error; + int cpu; + int r; + int i; + + if ((reg >= APLIC_SOURCECFG(1)) && + (reg <= APLIC_SOURCECFG(aplic->nirqs))) { + i = ((reg - APLIC_SOURCECFG(1)) >> 2) + 1; + error = aplic_handle_sourcecfg(aplic, i, write, val); + return (error); + } + + if ((reg >= APLIC_TARGET(1)) && (reg <= APLIC_TARGET(aplic->nirqs))) { + i = ((reg - APLIC_TARGET(1)) >> 2) + 1; + error = aplic_handle_target(aplic, i, write, val); + return (error); + } + + if ((reg >= APLIC_IDC(0)) && (reg < APLIC_IDC(mp_ncpus))) { + cpu = (reg - APLIC_IDC(0)) >> 5; + r = (reg - APLIC_IDC(0)) % 32; + error = aplic_handle_idc(hyp, aplic, cpu, r, write, val); + return (error); + } + + switch (reg) { + case APLIC_DOMAINCFG: + aplic->domaincfg = *val & DOMAINCFG_IE; + error = 0; + break; + case APLIC_SETIENUM: + error = aplic_set_enabled(aplic, write, val, true); + break; + case APLIC_CLRIENUM: + error = aplic_set_enabled(aplic, write, val, false); + break; + default: + dprintf("%s: unknown reg %lx", __func__, reg); + error = ENOENT; + break; + }; + + return (error); +} + +static int +mem_read(struct vcpu *vcpu, uint64_t fault_ipa, uint64_t *rval, int size, + void *arg) +{ + struct hypctx *hypctx; + struct hyp *hyp; + struct aplic *aplic; + uint64_t reg; + uint64_t val; + int error; + + hypctx = vcpu_get_cookie(vcpu); + hyp = hypctx->hyp; + aplic = hyp->aplic; + + dprintf("%s: fault_ipa %lx size %d\n", __func__, fault_ipa, size); + + if (fault_ipa < aplic->mem_start || fault_ipa + size > aplic->mem_end) + return (EINVAL); + + reg = fault_ipa - aplic->mem_start; + + error = aplic_mmio_access(hyp, aplic, reg, false, &val); + if (error == 0) + *rval = val; + + return (error); +} + +static int +mem_write(struct vcpu *vcpu, uint64_t fault_ipa, uint64_t wval, int size, + void *arg) +{ + struct hypctx *hypctx; + struct hyp *hyp; + struct aplic *aplic; + uint64_t reg; + uint64_t val; + int error; + + hypctx = vcpu_get_cookie(vcpu); + hyp = hypctx->hyp; + aplic = hyp->aplic; + + dprintf("%s: fault_ipa %lx wval %lx size %d\n", __func__, fault_ipa, + wval, size); + + if (fault_ipa < aplic->mem_start || fault_ipa + size > aplic->mem_end) + return (EINVAL); + + reg = fault_ipa - aplic->mem_start; + + val = wval; + + error = aplic_mmio_access(hyp, aplic, reg, true, &val); + + return (error); +} + +void +aplic_vminit(struct hyp *hyp) +{ + struct aplic *aplic; + + hyp->aplic = malloc(sizeof(*hyp->aplic), M_APLIC, + M_WAITOK | M_ZERO); + aplic = hyp->aplic; + + mtx_init(&aplic->mtx, "APLIC lock", NULL, MTX_SPIN); +} + +void +aplic_vmcleanup(struct hyp *hyp) +{ + struct aplic *aplic; + + aplic = hyp->aplic; + + mtx_destroy(&aplic->mtx); + + free(hyp->aplic, M_APLIC); +} + +int +aplic_attach_to_vm(struct hyp *hyp, struct vm_aplic_descr *descr) +{ + struct aplic *aplic; + struct vm *vm; + + vm = hyp->vm; + + dprintf("%s\n", __func__); + + vm_register_inst_handler(vm, descr->mem_start, descr->mem_size, + mem_read, mem_write); + + aplic = hyp->aplic; + aplic->nirqs = APLIC_NIRQS; + aplic->mem_start = descr->mem_start; + aplic->mem_end = descr->mem_start + descr->mem_size; + aplic->irqs = malloc(sizeof(struct aplic_irq) * aplic->nirqs, M_APLIC, + M_WAITOK | M_ZERO); + + hyp->aplic_attached = true; + + return (0); +} + +void +aplic_detach_from_vm(struct hyp *hyp) +{ + struct aplic *aplic; + + aplic = hyp->aplic; + + dprintf("%s\n", __func__); + + if (hyp->aplic_attached) { + hyp->aplic_attached = false; + free(aplic->irqs, M_APLIC); + } +} + +int +aplic_check_pending(struct hypctx *hypctx) +{ + struct aplic_irq *irq; + struct aplic *aplic; + struct hyp *hyp; + int i; + + hyp = hypctx->hyp; + aplic = hyp->aplic; + + mtx_lock_spin(&aplic->mtx); + if ((aplic->domaincfg & DOMAINCFG_IE) == 0) { + mtx_unlock_spin(&aplic->mtx); + return (0); + } + + for (i = 0; i < aplic->nirqs; i++) { + irq = &aplic->irqs[i]; + if (irq->target_hart != hypctx->cpu_id) + continue; + if ((irq->state & APLIC_IRQ_STATE_ENABLED) && + (irq->state & APLIC_IRQ_STATE_PENDING)) { + mtx_unlock_spin(&aplic->mtx); + /* Found. */ + return (1); + } + } + mtx_unlock_spin(&aplic->mtx); + + return (0); +} + +int +aplic_inject_irq(struct hyp *hyp, int vcpuid, uint32_t irqid, bool level) +{ + struct aplic_irq *irq; + struct aplic *aplic; + bool notify; + int error; + + aplic = hyp->aplic; + + error = 0; + + mtx_lock_spin(&aplic->mtx); + if ((aplic->domaincfg & DOMAINCFG_IE) == 0) { + mtx_unlock_spin(&aplic->mtx); + return (error); + } + + irq = &aplic->irqs[irqid]; + if (irq->sourcecfg & SOURCECFG_D) { + mtx_unlock_spin(&aplic->mtx); + return (error); + } + + notify = false; + switch (irq->sourcecfg & SOURCECFG_SM_M) { + case SOURCECFG_SM_EDGE1: + if (level) { + irq->state |= APLIC_IRQ_STATE_PENDING; + if (irq->state & APLIC_IRQ_STATE_ENABLED) + notify = true; + } else + irq->state &= ~APLIC_IRQ_STATE_PENDING; + break; + case SOURCECFG_SM_DETACHED: + break; + default: + /* TODO. */ + dprintf("sourcecfg %d\n", irq->sourcecfg & SOURCECFG_SM_M); + error = ENXIO; + break; + } + mtx_unlock_spin(&aplic->mtx); + + if (notify) + vcpu_notify_event(vm_vcpu(hyp->vm, irq->target_hart)); + + return (error); +} + +int +aplic_inject_msi(struct hyp *hyp, uint64_t msg, uint64_t addr) +{ + + /* TODO. */ + + return (ENXIO); +} + +void +aplic_cpuinit(struct hypctx *hypctx) +{ + +} + +void +aplic_cpucleanup(struct hypctx *hypctx) +{ + +} + +void +aplic_flush_hwstate(struct hypctx *hypctx) +{ + +} + +void +aplic_sync_hwstate(struct hypctx *hypctx) +{ + +} + +int +aplic_max_cpu_count(struct hyp *hyp) +{ + int16_t max_count; + + max_count = vm_get_maxcpus(hyp->vm); + + return (max_count); +} diff --git a/sys/riscv/vmm/vmm_aplic.h b/sys/riscv/vmm/vmm_aplic.h new file mode 100644 index 000000000000..49510221b419 --- /dev/null +++ b/sys/riscv/vmm/vmm_aplic.h @@ -0,0 +1,54 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2024 Ruslan Bukin + * + * This software was developed by the University of Cambridge Computer + * Laboratory (Department of Computer Science and Technology) under Innovate + * UK project 105694, "Digital Security by Design (DSbD) Technology Platform + * Prototype". + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef _VMM_APLIC_H_ +#define _VMM_APLIC_H_ + +struct hyp; +struct hypctx; +struct vm_aplic_descr; + +int aplic_attach_to_vm(struct hyp *hyp, struct vm_aplic_descr *descr); +void aplic_detach_from_vm(struct hyp *hyp); +int aplic_inject_irq(struct hyp *hyp, int vcpuid, uint32_t irqid, bool level); +int aplic_inject_msi(struct hyp *hyp, uint64_t msg, uint64_t addr); +void aplic_vminit(struct hyp *hyp); +void aplic_vmcleanup(struct hyp *hyp); +int aplic_check_pending(struct hypctx *hypctx); + +void aplic_cpuinit(struct hypctx *hypctx); +void aplic_cpucleanup(struct hypctx *hypctx); +void aplic_flush_hwstate(struct hypctx *hypctx); +void aplic_sync_hwstate(struct hypctx *hypctx); +int aplic_max_cpu_count(struct hyp *hyp); + +#endif /* !_VMM_APLIC_H_ */ diff --git a/sys/riscv/vmm/vmm_dev_machdep.c b/sys/riscv/vmm/vmm_dev_machdep.c new file mode 100644 index 000000000000..889d83f0ce2e --- /dev/null +++ b/sys/riscv/vmm/vmm_dev_machdep.c @@ -0,0 +1,126 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2011 NetApp, Inc. + * Copyright (C) 2015 Mihai Carabas + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include +#include +#include + +#include + +#include "vmm_aplic.h" + +const struct vmmdev_ioctl vmmdev_machdep_ioctls[] = { + VMMDEV_IOCTL(VM_RUN, VMMDEV_IOCTL_LOCK_ONE_VCPU), + VMMDEV_IOCTL(VM_INJECT_EXCEPTION, VMMDEV_IOCTL_LOCK_ONE_VCPU), + VMMDEV_IOCTL(VM_GLA2GPA_NOFAULT, VMMDEV_IOCTL_LOCK_ONE_VCPU), + + VMMDEV_IOCTL(VM_ATTACH_APLIC, + VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS), + + VMMDEV_IOCTL(VM_RAISE_MSI, 0), + VMMDEV_IOCTL(VM_ASSERT_IRQ, 0), + VMMDEV_IOCTL(VM_DEASSERT_IRQ, 0), +}; +const size_t vmmdev_machdep_ioctl_count = nitems(vmmdev_machdep_ioctls); + +int +vmmdev_machdep_ioctl(struct vm *vm, struct vcpu *vcpu, u_long cmd, caddr_t data, + int fflag, struct thread *td) +{ + struct vm_run *vmrun; + struct vm_aplic_descr *aplic; + struct vm_irq *vi; + struct vm_exception *vmexc; + struct vm_gla2gpa *gg; + struct vm_msi *vmsi; + int error; + + error = 0; + switch (cmd) { + case VM_RUN: { + struct vm_exit *vme; + + vmrun = (struct vm_run *)data; + vme = vm_exitinfo(vcpu); + + error = vm_run(vcpu); + if (error != 0) + break; + + error = copyout(vme, vmrun->vm_exit, sizeof(*vme)); + break; + } + case VM_INJECT_EXCEPTION: + vmexc = (struct vm_exception *)data; + error = vm_inject_exception(vcpu, vmexc->scause); + break; + case VM_GLA2GPA_NOFAULT: + gg = (struct vm_gla2gpa *)data; + error = vm_gla2gpa_nofault(vcpu, &gg->paging, gg->gla, + gg->prot, &gg->gpa, &gg->fault); + KASSERT(error == 0 || error == EFAULT, + ("%s: vm_gla2gpa unknown error %d", __func__, error)); + break; + case VM_ATTACH_APLIC: + aplic = (struct vm_aplic_descr *)data; + error = vm_attach_aplic(vm, aplic); + break; + case VM_RAISE_MSI: + vmsi = (struct vm_msi *)data; + error = vm_raise_msi(vm, vmsi->msg, vmsi->addr, vmsi->bus, + vmsi->slot, vmsi->func); + break; + case VM_ASSERT_IRQ: + vi = (struct vm_irq *)data; + error = vm_assert_irq(vm, vi->irq); + break; + case VM_DEASSERT_IRQ: + vi = (struct vm_irq *)data; + error = vm_deassert_irq(vm, vi->irq); + break; + default: + error = ENOTTY; + break; + } + + return (error); +} diff --git a/sys/riscv/vmm/vmm_instruction_emul.c b/sys/riscv/vmm/vmm_instruction_emul.c new file mode 100644 index 000000000000..dc663f0a2040 --- /dev/null +++ b/sys/riscv/vmm/vmm_instruction_emul.c @@ -0,0 +1,109 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2015 Mihai Carabas + * Copyright (c) 2024 Ruslan Bukin + * + * This software was developed by the University of Cambridge Computer + * Laboratory (Department of Computer Science and Technology) under Innovate + * UK project 105694, "Digital Security by Design (DSbD) Technology Platform + * Prototype". + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifdef _KERNEL +#include +#include +#include +#include + +#include + +#include +#include +#else +#include +#include +#include + +#include + +#include +#include +#include +#include +#endif + +#include + +int +vmm_emulate_instruction(struct vcpu *vcpu, uint64_t gpa, struct vie *vie, + struct vm_guest_paging *paging __unused, mem_region_read_t memread, + mem_region_write_t memwrite, void *memarg) +{ + uint64_t val; + int error; + + if (vie->dir == VM_DIR_READ) { + error = memread(vcpu, gpa, &val, vie->access_size, memarg); + if (error) + goto out; + if ((vie->sign_extend == 0) && (vie->access_size < 8)) + val &= (1ul << (vie->access_size * 8)) - 1; + error = vm_set_register(vcpu, vie->reg, val); + } else { + error = vm_get_register(vcpu, vie->reg, &val); + if (error) + goto out; + /* Mask any unneeded bits from the register */ + if (vie->access_size < 8) + val &= (1ul << (vie->access_size * 8)) - 1; + error = memwrite(vcpu, gpa, val, vie->access_size, memarg); + } + +out: + return (error); +} + +int +vmm_emulate_register(struct vcpu *vcpu, struct vre *vre, reg_read_t regread, + reg_write_t regwrite, void *regarg) +{ + uint64_t val; + int error; + + if (vre->dir == VM_DIR_READ) { + error = regread(vcpu, &val, regarg); + if (error) + goto out; + error = vm_set_register(vcpu, vre->reg, val); + } else { + error = vm_get_register(vcpu, vre->reg, &val); + if (error) + goto out; + error = regwrite(vcpu, val, regarg); + } + +out: + return (error); +} diff --git a/sys/riscv/vmm/vmm_riscv.c b/sys/riscv/vmm/vmm_riscv.c new file mode 100644 index 000000000000..6a76f8cf4f26 --- /dev/null +++ b/sys/riscv/vmm/vmm_riscv.c @@ -0,0 +1,922 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2024 Ruslan Bukin + * + * This software was developed by the University of Cambridge Computer + * Laboratory (Department of Computer Science and Technology) under Innovate + * UK project 105694, "Digital Security by Design (DSbD) Technology Platform + * Prototype". + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "riscv.h" +#include "vmm_aplic.h" +#include "vmm_stat.h" + +MALLOC_DEFINE(M_HYP, "RISC-V VMM HYP", "RISC-V VMM HYP"); + +DPCPU_DEFINE_STATIC(struct hypctx *, vcpu); + +static int +m_op(uint32_t insn, int match, int mask) +{ + + if (((insn ^ match) & mask) == 0) + return (1); + + return (0); +} + +static inline void +riscv_set_active_vcpu(struct hypctx *hypctx) +{ + + DPCPU_SET(vcpu, hypctx); +} + +struct hypctx * +riscv_get_active_vcpu(void) +{ + + return (DPCPU_GET(vcpu)); +} + +int +vmmops_modinit(void) +{ + + if (!has_hyp) { + printf("vmm: riscv hart doesn't support H-extension.\n"); + return (ENXIO); + } + + if (!has_sstc) { + printf("vmm: riscv hart doesn't support SSTC extension.\n"); + return (ENXIO); + } + + return (0); +} + +int +vmmops_modcleanup(void) +{ + + return (0); +} + +void * +vmmops_init(struct vm *vm, pmap_t pmap) +{ + struct hyp *hyp; + vm_size_t size; + + size = round_page(sizeof(struct hyp) + + sizeof(struct hypctx *) * vm_get_maxcpus(vm)); + hyp = malloc_aligned(size, PAGE_SIZE, M_HYP, M_WAITOK | M_ZERO); + hyp->vm = vm; + hyp->aplic_attached = false; + + aplic_vminit(hyp); + + return (hyp); +} + +static void +vmmops_delegate(void) +{ + uint64_t hedeleg; + uint64_t hideleg; + + hedeleg = (1UL << SCAUSE_INST_MISALIGNED); + hedeleg |= (1UL << SCAUSE_ILLEGAL_INSTRUCTION); + hedeleg |= (1UL << SCAUSE_BREAKPOINT); + hedeleg |= (1UL << SCAUSE_ECALL_USER); + hedeleg |= (1UL << SCAUSE_INST_PAGE_FAULT); + hedeleg |= (1UL << SCAUSE_LOAD_PAGE_FAULT); + hedeleg |= (1UL << SCAUSE_STORE_PAGE_FAULT); + csr_write(hedeleg, hedeleg); + + hideleg = (1UL << IRQ_SOFTWARE_HYPERVISOR); + hideleg |= (1UL << IRQ_TIMER_HYPERVISOR); + hideleg |= (1UL << IRQ_EXTERNAL_HYPERVISOR); + csr_write(hideleg, hideleg); +} + +static void +vmmops_vcpu_restore_csrs(struct hypctx *hypctx) +{ + struct hypcsr *csrs; + + csrs = &hypctx->guest_csrs; + + csr_write(vsstatus, csrs->vsstatus); + csr_write(vsie, csrs->vsie); + csr_write(vstvec, csrs->vstvec); + csr_write(vsscratch, csrs->vsscratch); + csr_write(vsepc, csrs->vsepc); + csr_write(vscause, csrs->vscause); + csr_write(vstval, csrs->vstval); + csr_write(hvip, csrs->hvip); + csr_write(vsatp, csrs->vsatp); +} + +static void +vmmops_vcpu_save_csrs(struct hypctx *hypctx) +{ + struct hypcsr *csrs; + + csrs = &hypctx->guest_csrs; + + csrs->vsstatus = csr_read(vsstatus); + csrs->vsie = csr_read(vsie); + csrs->vstvec = csr_read(vstvec); + csrs->vsscratch = csr_read(vsscratch); + csrs->vsepc = csr_read(vsepc); + csrs->vscause = csr_read(vscause); + csrs->vstval = csr_read(vstval); + csrs->hvip = csr_read(hvip); + csrs->vsatp = csr_read(vsatp); +} + +void * +vmmops_vcpu_init(void *vmi, struct vcpu *vcpu1, int vcpuid) +{ + struct hypctx *hypctx; + struct hyp *hyp; + vm_size_t size; + + hyp = vmi; + + dprintf("%s: hyp %p\n", __func__, hyp); + + KASSERT(vcpuid >= 0 && vcpuid < vm_get_maxcpus(hyp->vm), + ("%s: Invalid vcpuid %d", __func__, vcpuid)); + + size = round_page(sizeof(struct hypctx)); + + hypctx = malloc_aligned(size, PAGE_SIZE, M_HYP, M_WAITOK | M_ZERO); + hypctx->hyp = hyp; + hypctx->vcpu = vcpu1; + hypctx->guest_scounteren = HCOUNTEREN_CY | HCOUNTEREN_TM; + + /* sstatus */ + hypctx->guest_regs.hyp_sstatus = SSTATUS_SPP | SSTATUS_SPIE; + hypctx->guest_regs.hyp_sstatus |= SSTATUS_FS_INITIAL; + + /* hstatus */ + hypctx->guest_regs.hyp_hstatus = HSTATUS_SPV | HSTATUS_VTW; + hypctx->guest_regs.hyp_hstatus |= HSTATUS_SPVP; + + hypctx->cpu_id = vcpuid; + hyp->ctx[vcpuid] = hypctx; + + aplic_cpuinit(hypctx); + + return (hypctx); +} + +static int +riscv_vmm_pinit(pmap_t pmap) +{ + + dprintf("%s: pmap %p\n", __func__, pmap); + + pmap_pinit_stage(pmap, PM_STAGE2); + + return (1); +} + +struct vmspace * +vmmops_vmspace_alloc(vm_offset_t min, vm_offset_t max) +{ + + return (vmspace_alloc(min, max, riscv_vmm_pinit)); +} + +void +vmmops_vmspace_free(struct vmspace *vmspace) +{ + + pmap_remove_pages(vmspace_pmap(vmspace)); + vmspace_free(vmspace); +} + +static void +riscv_unpriv_read(struct hypctx *hypctx, uintptr_t guest_addr, uint64_t *data, + struct hyptrap *trap) +{ + register struct hyptrap * htrap asm("a0"); + uintptr_t old_hstatus; + uintptr_t old_stvec; + uintptr_t entry; + uint64_t val; + uint64_t tmp; + int intr; + + entry = (uintptr_t)&vmm_unpriv_trap; + htrap = trap; + + intr = intr_disable(); + + old_hstatus = csr_swap(hstatus, hypctx->guest_regs.hyp_hstatus); + /* + * Setup a temporary exception vector, so that if hlvx.hu raises + * an exception we catch it in the vmm_unpriv_trap(). + */ + old_stvec = csr_swap(stvec, entry); + + /* + * Read first two bytes of instruction assuming it could be a + * compressed one. + */ + __asm __volatile(".option push\n" + ".option norvc\n" + "hlvx.hu %[val], (%[addr])\n" + ".option pop\n" + : [val] "=r" (val) + : [addr] "r" (guest_addr), "r" (htrap) + : "a1", "memory"); + + /* + * Check if previous hlvx.hu did not raise an exception, and then + * read the rest of instruction if it is a full-length one. + */ + if (trap->scause == -1 && (val & 0x3) == 0x3) { + guest_addr += 2; + __asm __volatile(".option push\n" + ".option norvc\n" + "hlvx.hu %[tmp], (%[addr])\n" + ".option pop\n" + : [tmp] "=r" (tmp) + : [addr] "r" (guest_addr), "r" (htrap) + : "a1", "memory"); + val |= (tmp << 16); + } + + csr_write(hstatus, old_hstatus); + csr_write(stvec, old_stvec); + + intr_restore(intr); + + *data = val; +} + +static int +riscv_gen_inst_emul_data(struct hypctx *hypctx, struct vm_exit *vme_ret, + struct hyptrap *trap) +{ + uintptr_t guest_addr; + struct vie *vie; + uint64_t insn; + int reg_num; + int rs2, rd; + int direction; + int sign_extend; + int access_size; + + guest_addr = vme_ret->sepc; + + KASSERT(vme_ret->scause == SCAUSE_FETCH_GUEST_PAGE_FAULT || + vme_ret->scause == SCAUSE_LOAD_GUEST_PAGE_FAULT || + vme_ret->scause == SCAUSE_STORE_GUEST_PAGE_FAULT, + ("Invalid scause")); + + direction = vme_ret->scause == SCAUSE_STORE_GUEST_PAGE_FAULT ? + VM_DIR_WRITE : VM_DIR_READ; + + sign_extend = 1; + + bzero(trap, sizeof(struct hyptrap)); + trap->scause = -1; + riscv_unpriv_read(hypctx, guest_addr, &insn, trap); + if (trap->scause != -1) + return (-1); + + if ((insn & 0x3) == 0x3) { + rs2 = (insn & RS2_MASK) >> RS2_SHIFT; + rd = (insn & RD_MASK) >> RD_SHIFT; + + if (direction == VM_DIR_WRITE) { + if (m_op(insn, MATCH_SB, MASK_SB)) + access_size = 1; + else if (m_op(insn, MATCH_SH, MASK_SH)) + access_size = 2; + else if (m_op(insn, MATCH_SW, MASK_SW)) + access_size = 4; + else if (m_op(insn, MATCH_SD, MASK_SD)) + access_size = 8; + else { + printf("unknown store instr at %lx", + guest_addr); + return (-2); + } + reg_num = rs2; + } else { + if (m_op(insn, MATCH_LB, MASK_LB)) + access_size = 1; + else if (m_op(insn, MATCH_LH, MASK_LH)) + access_size = 2; + else if (m_op(insn, MATCH_LW, MASK_LW)) + access_size = 4; + else if (m_op(insn, MATCH_LD, MASK_LD)) + access_size = 8; + else if (m_op(insn, MATCH_LBU, MASK_LBU)) { + access_size = 1; + sign_extend = 0; + } else if (m_op(insn, MATCH_LHU, MASK_LHU)) { + access_size = 2; + sign_extend = 0; + } else if (m_op(insn, MATCH_LWU, MASK_LWU)) { + access_size = 4; + sign_extend = 0; + } else { + printf("unknown load instr at %lx", + guest_addr); + return (-3); + } + reg_num = rd; + } + vme_ret->inst_length = 4; + } else { + rs2 = (insn >> 7) & 0x7; + rs2 += 0x8; + rd = (insn >> 2) & 0x7; + rd += 0x8; + + if (direction == VM_DIR_WRITE) { + if (m_op(insn, MATCH_C_SW, MASK_C_SW)) + access_size = 4; + else if (m_op(insn, MATCH_C_SD, MASK_C_SD)) + access_size = 8; + else { + printf("unknown compressed store instr at %lx", + guest_addr); + return (-4); + } + } else { + if (m_op(insn, MATCH_C_LW, MASK_C_LW)) + access_size = 4; + else if (m_op(insn, MATCH_C_LD, MASK_C_LD)) + access_size = 8; + else { + printf("unknown load instr at %lx", guest_addr); + return (-5); + } + } + reg_num = rd; + vme_ret->inst_length = 2; + } + + vme_ret->u.inst_emul.gpa = (vme_ret->htval << 2) | + (vme_ret->stval & 0x3); + + dprintf("guest_addr %lx insn %lx, reg %d, gpa %lx\n", guest_addr, insn, + reg_num, vme_ret->u.inst_emul.gpa); + + vie = &vme_ret->u.inst_emul.vie; + vie->dir = direction; + vie->reg = reg_num; + vie->sign_extend = sign_extend; + vie->access_size = access_size; + + return (0); +} + +static bool +riscv_handle_world_switch(struct hypctx *hypctx, struct vm_exit *vme, + pmap_t pmap) +{ + struct hyptrap trap; + uint64_t insn; + uint64_t gpa; + bool handled; + bool retu; + int ret; + int i; + + handled = false; + + if (vme->scause & SCAUSE_INTR) { + /* + * Host interrupt? Leave critical section to handle. + */ + vmm_stat_incr(hypctx->vcpu, VMEXIT_IRQ, 1); + vme->exitcode = VM_EXITCODE_BOGUS; + vme->inst_length = 0; + return (handled); + } + + switch (vme->scause) { + case SCAUSE_FETCH_GUEST_PAGE_FAULT: + case SCAUSE_LOAD_GUEST_PAGE_FAULT: + case SCAUSE_STORE_GUEST_PAGE_FAULT: + gpa = (vme->htval << 2) | (vme->stval & 0x3); + if (vm_mem_allocated(hypctx->vcpu, gpa)) { + vme->exitcode = VM_EXITCODE_PAGING; + vme->inst_length = 0; + vme->u.paging.gpa = gpa; + } else { + ret = riscv_gen_inst_emul_data(hypctx, vme, &trap); + if (ret != 0) { + vme->exitcode = VM_EXITCODE_HYP; + vme->u.hyp.scause = trap.scause; + break; + } + vme->exitcode = VM_EXITCODE_INST_EMUL; + } + break; + case SCAUSE_ILLEGAL_INSTRUCTION: + /* + * TODO: handle illegal instruction properly. + */ + printf("%s: Illegal instruction at %lx stval 0x%lx htval " + "0x%lx\n", __func__, vme->sepc, vme->stval, vme->htval); + vmm_stat_incr(hypctx->vcpu, VMEXIT_UNHANDLED, 1); + vme->exitcode = VM_EXITCODE_BOGUS; + handled = false; + break; + case SCAUSE_VIRTUAL_SUPERVISOR_ECALL: + retu = false; + vmm_sbi_ecall(hypctx->vcpu, &retu); + if (retu == false) { + handled = true; + break; + } + for (i = 0; i < nitems(vme->u.ecall.args); i++) + vme->u.ecall.args[i] = hypctx->guest_regs.hyp_a[i]; + vme->exitcode = VM_EXITCODE_ECALL; + handled = false; + break; + case SCAUSE_VIRTUAL_INSTRUCTION: + insn = vme->stval; + if (m_op(insn, MATCH_WFI, MASK_WFI)) + vme->exitcode = VM_EXITCODE_WFI; + else + vme->exitcode = VM_EXITCODE_BOGUS; + handled = false; + break; + default: + printf("unknown scause %lx\n", vme->scause); + vmm_stat_incr(hypctx->vcpu, VMEXIT_UNHANDLED, 1); + vme->exitcode = VM_EXITCODE_BOGUS; + handled = false; + break; + } + + return (handled); +} + +int +vmmops_gla2gpa(void *vcpui, struct vm_guest_paging *paging, uint64_t gla, + int prot, uint64_t *gpa, int *is_fault) +{ + + /* Implement me. */ + + return (ENOSYS); +} + +void +riscv_send_ipi(struct hypctx *hypctx, int hart_id) +{ + struct hyp *hyp; + struct vm *vm; + + hyp = hypctx->hyp; + vm = hyp->vm; + + atomic_set_32(&hypctx->ipi_pending, 1); + + vcpu_notify_event(vm_vcpu(vm, hart_id)); +} + +int +riscv_check_ipi(struct hypctx *hypctx, bool clear) +{ + int val; + + if (clear) + val = atomic_swap_32(&hypctx->ipi_pending, 0); + else + val = hypctx->ipi_pending; + + return (val); +} + +static void +riscv_sync_interrupts(struct hypctx *hypctx) +{ + int pending; + + pending = aplic_check_pending(hypctx); + + if (pending) + hypctx->guest_csrs.hvip |= HVIP_VSEIP; + else + hypctx->guest_csrs.hvip &= ~HVIP_VSEIP; + + csr_write(hvip, hypctx->guest_csrs.hvip); +} + +static void +riscv_sync_ipi(struct hypctx *hypctx) +{ + + /* Guest clears VSSIP bit manually. */ + if (riscv_check_ipi(hypctx, true)) + hypctx->guest_csrs.hvip |= HVIP_VSSIP; + + csr_write(hvip, hypctx->guest_csrs.hvip); +} + +int +vmmops_run(void *vcpui, register_t pc, pmap_t pmap, struct vm_eventinfo *evinfo) +{ + struct hypctx *hypctx; + struct vm_exit *vme; + struct vcpu *vcpu; + register_t val; + bool handled; + + hypctx = (struct hypctx *)vcpui; + vcpu = hypctx->vcpu; + vme = vm_exitinfo(vcpu); + + hypctx->guest_regs.hyp_sepc = (uint64_t)pc; + + vmmops_delegate(); + + /* + * From The RISC-V Instruction Set Manual + * Volume II: RISC-V Privileged Architectures + * + * If the new virtual machine's guest physical page tables + * have been modified, it may be necessary to execute an HFENCE.GVMA + * instruction (see Section 5.3.2) before or after writing hgatp. + */ + __asm __volatile("hfence.gvma" ::: "memory"); + + csr_write(hgatp, pmap->pm_satp); + csr_write(henvcfg, HENVCFG_STCE); + csr_write(hie, HIE_VSEIE | HIE_VSSIE | HIE_SGEIE); + /* TODO: should we trap rdcycle / rdtime? */ + csr_write(hcounteren, HCOUNTEREN_CY | HCOUNTEREN_TM); + + vmmops_vcpu_restore_csrs(hypctx); + + for (;;) { + dprintf("%s: pc %lx\n", __func__, pc); + + if (hypctx->has_exception) { + hypctx->has_exception = false; + /* + * TODO: implement exception injection. + */ + } + + val = intr_disable(); + + /* Check if the vcpu is suspended */ + if (vcpu_suspended(evinfo)) { + intr_restore(val); + vm_exit_suspended(vcpu, pc); + break; + } + + if (vcpu_debugged(vcpu)) { + intr_restore(val); + vm_exit_debug(vcpu, pc); + break; + } + + /* + * TODO: What happens if a timer interrupt is asserted exactly + * here, but for the previous VM? + */ + riscv_set_active_vcpu(hypctx); + aplic_flush_hwstate(hypctx); + + riscv_sync_interrupts(hypctx); + riscv_sync_ipi(hypctx); + + dprintf("%s: Entering guest VM, vsatp %lx, ss %lx hs %lx\n", + __func__, csr_read(vsatp), hypctx->guest_regs.hyp_sstatus, + hypctx->guest_regs.hyp_hstatus); + + vmm_switch(hypctx); + + dprintf("%s: Leaving guest VM, hstatus %lx\n", __func__, + hypctx->guest_regs.hyp_hstatus); + + aplic_sync_hwstate(hypctx); + riscv_sync_interrupts(hypctx); + + /* + * TODO: deactivate stage 2 pmap here if needed. + */ + + vme->scause = csr_read(scause); + vme->sepc = csr_read(sepc); + vme->stval = csr_read(stval); + vme->htval = csr_read(htval); + vme->htinst = csr_read(htinst); + + intr_restore(val); + + vmm_stat_incr(vcpu, VMEXIT_COUNT, 1); + vme->pc = hypctx->guest_regs.hyp_sepc; + vme->inst_length = INSN_SIZE; + + handled = riscv_handle_world_switch(hypctx, vme, pmap); + if (handled == false) + /* Exit loop to emulate instruction. */ + break; + else { + /* Resume guest execution from the next instruction. */ + hypctx->guest_regs.hyp_sepc += vme->inst_length; + } + } + + vmmops_vcpu_save_csrs(hypctx); + + return (0); +} + +static void +riscv_pcpu_vmcleanup(void *arg) +{ + struct hyp *hyp; + int i, maxcpus; + + hyp = arg; + maxcpus = vm_get_maxcpus(hyp->vm); + for (i = 0; i < maxcpus; i++) { + if (riscv_get_active_vcpu() == hyp->ctx[i]) { + riscv_set_active_vcpu(NULL); + break; + } + } +} + +void +vmmops_vcpu_cleanup(void *vcpui) +{ + struct hypctx *hypctx; + + hypctx = vcpui; + + dprintf("%s\n", __func__); + + aplic_cpucleanup(hypctx); + + free(hypctx, M_HYP); +} + +void +vmmops_cleanup(void *vmi) +{ + struct hyp *hyp; + + hyp = vmi; + + dprintf("%s\n", __func__); + + aplic_vmcleanup(hyp); + + smp_rendezvous(NULL, riscv_pcpu_vmcleanup, NULL, hyp); + + free(hyp, M_HYP); +} + +/* + * Return register value. Registers have different sizes and an explicit cast + * must be made to ensure proper conversion. + */ +static uint64_t * +hypctx_regptr(struct hypctx *hypctx, int reg) +{ + + switch (reg) { + case VM_REG_GUEST_RA: + return (&hypctx->guest_regs.hyp_ra); + case VM_REG_GUEST_SP: + return (&hypctx->guest_regs.hyp_sp); + case VM_REG_GUEST_GP: + return (&hypctx->guest_regs.hyp_gp); + case VM_REG_GUEST_TP: + return (&hypctx->guest_regs.hyp_tp); + case VM_REG_GUEST_T0: + return (&hypctx->guest_regs.hyp_t[0]); + case VM_REG_GUEST_T1: + return (&hypctx->guest_regs.hyp_t[1]); + case VM_REG_GUEST_T2: + return (&hypctx->guest_regs.hyp_t[2]); + case VM_REG_GUEST_S0: + return (&hypctx->guest_regs.hyp_s[0]); + case VM_REG_GUEST_S1: + return (&hypctx->guest_regs.hyp_s[1]); + case VM_REG_GUEST_A0: + return (&hypctx->guest_regs.hyp_a[0]); + case VM_REG_GUEST_A1: + return (&hypctx->guest_regs.hyp_a[1]); + case VM_REG_GUEST_A2: + return (&hypctx->guest_regs.hyp_a[2]); + case VM_REG_GUEST_A3: + return (&hypctx->guest_regs.hyp_a[3]); + case VM_REG_GUEST_A4: + return (&hypctx->guest_regs.hyp_a[4]); + case VM_REG_GUEST_A5: + return (&hypctx->guest_regs.hyp_a[5]); + case VM_REG_GUEST_A6: + return (&hypctx->guest_regs.hyp_a[6]); + case VM_REG_GUEST_A7: + return (&hypctx->guest_regs.hyp_a[7]); + case VM_REG_GUEST_S2: + return (&hypctx->guest_regs.hyp_s[2]); + case VM_REG_GUEST_S3: + return (&hypctx->guest_regs.hyp_s[3]); + case VM_REG_GUEST_S4: + return (&hypctx->guest_regs.hyp_s[4]); + case VM_REG_GUEST_S5: + return (&hypctx->guest_regs.hyp_s[5]); + case VM_REG_GUEST_S6: + return (&hypctx->guest_regs.hyp_s[6]); + case VM_REG_GUEST_S7: + return (&hypctx->guest_regs.hyp_s[7]); + case VM_REG_GUEST_S8: + return (&hypctx->guest_regs.hyp_s[8]); + case VM_REG_GUEST_S9: + return (&hypctx->guest_regs.hyp_s[9]); + case VM_REG_GUEST_S10: + return (&hypctx->guest_regs.hyp_s[10]); + case VM_REG_GUEST_S11: + return (&hypctx->guest_regs.hyp_s[11]); + case VM_REG_GUEST_T3: + return (&hypctx->guest_regs.hyp_t[3]); + case VM_REG_GUEST_T4: + return (&hypctx->guest_regs.hyp_t[4]); + case VM_REG_GUEST_T5: + return (&hypctx->guest_regs.hyp_t[5]); + case VM_REG_GUEST_T6: + return (&hypctx->guest_regs.hyp_t[6]); + case VM_REG_GUEST_SEPC: + return (&hypctx->guest_regs.hyp_sepc); + default: + break; + } + + return (NULL); +} + +int +vmmops_getreg(void *vcpui, int reg, uint64_t *retval) +{ + uint64_t *regp; + int running, hostcpu; + struct hypctx *hypctx; + + hypctx = vcpui; + + running = vcpu_is_running(hypctx->vcpu, &hostcpu); + if (running && hostcpu != curcpu) + panic("%s: %s%d is running", __func__, vm_name(hypctx->hyp->vm), + vcpu_vcpuid(hypctx->vcpu)); + + if (reg == VM_REG_GUEST_ZERO) { + *retval = 0; + return (0); + } + + regp = hypctx_regptr(hypctx, reg); + if (regp == NULL) + return (EINVAL); + + *retval = *regp; + + return (0); +} + +int +vmmops_setreg(void *vcpui, int reg, uint64_t val) +{ + struct hypctx *hypctx; + int running, hostcpu; + uint64_t *regp; + + hypctx = vcpui; + + running = vcpu_is_running(hypctx->vcpu, &hostcpu); + if (running && hostcpu != curcpu) + panic("%s: %s%d is running", __func__, vm_name(hypctx->hyp->vm), + vcpu_vcpuid(hypctx->vcpu)); + + regp = hypctx_regptr(hypctx, reg); + if (regp == NULL) + return (EINVAL); + + *regp = val; + + return (0); +} + +int +vmmops_exception(void *vcpui, uint64_t scause) +{ + struct hypctx *hypctx; + int running, hostcpu; + + hypctx = vcpui; + + running = vcpu_is_running(hypctx->vcpu, &hostcpu); + if (running && hostcpu != curcpu) + panic("%s: %s%d is running", __func__, vm_name(hypctx->hyp->vm), + vcpu_vcpuid(hypctx->vcpu)); + + /* TODO: implement me. */ + + return (ENOSYS); +} + +int +vmmops_getcap(void *vcpui, int num, int *retval) +{ + int ret; + + ret = ENOENT; + + switch (num) { + case VM_CAP_UNRESTRICTED_GUEST: + *retval = 1; + ret = 0; + break; + default: + break; + } + + return (ret); +} + +int +vmmops_setcap(void *vcpui, int num, int val) +{ + + return (ENOENT); +} diff --git a/sys/riscv/vmm/vmm_sbi.c b/sys/riscv/vmm/vmm_sbi.c new file mode 100644 index 000000000000..6444b8c9e396 --- /dev/null +++ b/sys/riscv/vmm/vmm_sbi.c @@ -0,0 +1,179 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2024 Ruslan Bukin + * + * This software was developed by the University of Cambridge Computer + * Laboratory (Department of Computer Science and Technology) under Innovate + * UK project 105694, "Digital Security by Design (DSbD) Technology Platform + * Prototype". + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include "riscv.h" + +static int +vmm_sbi_handle_rfnc(struct vcpu *vcpu, struct hypctx *hypctx) +{ + uint64_t hart_mask __unused; + uint64_t start __unused; + uint64_t size __unused; + uint64_t asid __unused; + uint64_t func_id; + + func_id = hypctx->guest_regs.hyp_a[6]; + hart_mask = hypctx->guest_regs.hyp_a[0]; + start = hypctx->guest_regs.hyp_a[2]; + size = hypctx->guest_regs.hyp_a[3]; + asid = hypctx->guest_regs.hyp_a[4]; + + dprintf("%s: %ld hart_mask %lx start %lx size %lx\n", __func__, + func_id, hart_mask, start, size); + + /* TODO: implement remote sfence. */ + + switch (func_id) { + case SBI_RFNC_REMOTE_FENCE_I: + break; + case SBI_RFNC_REMOTE_SFENCE_VMA: + break; + case SBI_RFNC_REMOTE_SFENCE_VMA_ASID: + break; + default: + break; + } + + hypctx->guest_regs.hyp_a[0] = 0; + + return (0); +} + +static int +vmm_sbi_handle_ipi(struct vcpu *vcpu, struct hypctx *hypctx) +{ + struct hypctx *target_hypctx; + struct vcpu *target_vcpu __unused; + cpuset_t active_cpus; + struct hyp *hyp; + uint64_t hart_mask; + uint64_t func_id; + int hart_id; + int bit; + int ret; + + func_id = hypctx->guest_regs.hyp_a[6]; + hart_mask = hypctx->guest_regs.hyp_a[0]; + + dprintf("%s: hart_mask %lx\n", __func__, hart_mask); + + hyp = hypctx->hyp; + + active_cpus = vm_active_cpus(hyp->vm); + + switch (func_id) { + case SBI_IPI_SEND_IPI: + while ((bit = ffs(hart_mask))) { + hart_id = (bit - 1); + hart_mask &= ~(1u << hart_id); + if (CPU_ISSET(hart_id, &active_cpus)) { + /* TODO. */ + target_vcpu = vm_vcpu(hyp->vm, hart_id); + target_hypctx = hypctx->hyp->ctx[hart_id]; + riscv_send_ipi(target_hypctx, hart_id); + } + } + ret = 0; + break; + default: + printf("%s: unknown func %ld\n", __func__, func_id); + ret = -1; + break; + } + + hypctx->guest_regs.hyp_a[0] = ret; + + return (0); +} + +int +vmm_sbi_ecall(struct vcpu *vcpu, bool *retu) +{ + int sbi_extension_id __unused; + struct hypctx *hypctx; + + hypctx = riscv_get_active_vcpu(); + sbi_extension_id = hypctx->guest_regs.hyp_a[7]; + + dprintf("%s: args %lx %lx %lx %lx %lx %lx %lx %lx\n", __func__, + hypctx->guest_regs.hyp_a[0], + hypctx->guest_regs.hyp_a[1], + hypctx->guest_regs.hyp_a[2], + hypctx->guest_regs.hyp_a[3], + hypctx->guest_regs.hyp_a[4], + hypctx->guest_regs.hyp_a[5], + hypctx->guest_regs.hyp_a[6], + hypctx->guest_regs.hyp_a[7]); + + switch (sbi_extension_id) { + case SBI_EXT_ID_RFNC: + vmm_sbi_handle_rfnc(vcpu, hypctx); + break; + case SBI_EXT_ID_TIME: + break; + case SBI_EXT_ID_IPI: + vmm_sbi_handle_ipi(vcpu, hypctx); + break; + default: + *retu = true; + break; + } + + return (0); +} diff --git a/sys/riscv/include/md_var.h b/sys/riscv/vmm/vmm_stat.h similarity index 61% copy from sys/riscv/include/md_var.h copy to sys/riscv/vmm/vmm_stat.h index d9404db914a2..575c9e9f0a31 100644 --- a/sys/riscv/include/md_var.h +++ b/sys/riscv/vmm/vmm_stat.h @@ -1,54 +1,43 @@ /*- - * Copyright (c) 1995 Bruce D. Evans. + * SPDX-License-Identifier: BSD-3-Clause + * + * Copyright (c) 2011 NetApp, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 3. Neither the name of the author nor the names of contributors + * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. - * - * from: FreeBSD: src/sys/i386/include/md_var.h,v 1.40 2001/07/12 */ -#ifndef _MACHINE_MD_VAR_H_ -#define _MACHINE_MD_VAR_H_ - -extern long Maxmem; -extern char sigcode[]; -extern int szsigcode; -extern u_long elf_hwcap; -extern register_t mvendorid; -extern register_t marchid; -extern register_t mimpid; -extern u_int mmu_caps; - -/* Supervisor-mode extension support */ -extern bool has_sstc; -extern bool has_sscofpmf; -extern bool has_svpbmt; +#ifndef _VMM_STAT_H_ +#define _VMM_STAT_H_ -struct dumperinfo; -struct minidumpstate; +#include -int cpu_minidumpsys(struct dumperinfo *, const struct minidumpstate *); +VMM_STAT_DECLARE(VMEXIT_COUNT); +VMM_STAT_DECLARE(VMEXIT_UNKNOWN); +VMM_STAT_DECLARE(VMEXIT_WFI); +VMM_STAT_DECLARE(VMEXIT_IRQ); +VMM_STAT_DECLARE(VMEXIT_UNHANDLED); -#endif /* !_MACHINE_MD_VAR_H_ */ +#endif diff --git a/sys/riscv/vmm/vmm_switch.S b/sys/riscv/vmm/vmm_switch.S new file mode 100644 index 000000000000..8dcc6c5c2a47 --- /dev/null +++ b/sys/riscv/vmm/vmm_switch.S @@ -0,0 +1,220 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2024 Ruslan Bukin + * + * This software was developed by the University of Cambridge Computer + * Laboratory (Department of Computer Science and Technology) under Innovate + * UK project 105694, "Digital Security by Design (DSbD) Technology Platform + * Prototype". + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +#include "assym.inc" + + .text + +/* + * a0 == hypctx * + */ +ENTRY(vmm_switch) + sd ra, (HYP_H_RA)(a0) + sd sp, (HYP_H_SP)(a0) + sd tp, (HYP_H_TP)(a0) + sd gp, (HYP_H_GP)(a0) + sd s0, (HYP_H_S + 0 * 8)(a0) + sd s1, (HYP_H_S + 1 * 8)(a0) + sd s2, (HYP_H_S + 2 * 8)(a0) + sd s3, (HYP_H_S + 3 * 8)(a0) + sd s4, (HYP_H_S + 4 * 8)(a0) + sd s5, (HYP_H_S + 5 * 8)(a0) + sd s6, (HYP_H_S + 6 * 8)(a0) + sd s7, (HYP_H_S + 7 * 8)(a0) + sd s8, (HYP_H_S + 8 * 8)(a0) + sd s9, (HYP_H_S + 9 * 8)(a0) + sd s10, (HYP_H_S + 10 * 8)(a0) + sd s11, (HYP_H_S + 11 * 8)(a0) + + sd a1, (HYP_H_A + 1 * 8)(a0) + sd a2, (HYP_H_A + 2 * 8)(a0) + sd a3, (HYP_H_A + 3 * 8)(a0) + sd a4, (HYP_H_A + 4 * 8)(a0) + sd a5, (HYP_H_A + 5 * 8)(a0) + sd a6, (HYP_H_A + 6 * 8)(a0) + sd a7, (HYP_H_A + 7 * 8)(a0) + + ld t0, (HYP_G_SSTATUS)(a0) + ld t1, (HYP_G_HSTATUS)(a0) + ld t2, (HYP_G_SCOUNTEREN)(a0) + la t4, .Lswitch_return + ld t5, (HYP_G_SEPC)(a0) + + csrrw t0, sstatus, t0 + csrrw t1, hstatus, t1 + csrrw t2, scounteren, t2 + csrrw t3, sscratch, a0 + csrrw t4, stvec, t4 + csrw sepc, t5 + + sd t0, (HYP_H_SSTATUS)(a0) + sd t1, (HYP_H_HSTATUS)(a0) + sd t2, (HYP_H_SCOUNTEREN)(a0) + sd t3, (HYP_H_SSCRATCH)(a0) + sd t4, (HYP_H_STVEC)(a0) + + ld ra, (HYP_G_RA)(a0) + ld sp, (HYP_G_SP)(a0) + ld gp, (HYP_G_GP)(a0) + ld tp, (HYP_G_TP)(a0) + ld t0, (HYP_G_T + 0 * 8)(a0) + ld t1, (HYP_G_T + 1 * 8)(a0) + ld t2, (HYP_G_T + 2 * 8)(a0) + ld t3, (HYP_G_T + 3 * 8)(a0) + ld t4, (HYP_G_T + 4 * 8)(a0) + ld t5, (HYP_G_T + 5 * 8)(a0) + ld t6, (HYP_G_T + 6 * 8)(a0) + ld s0, (HYP_G_S + 0 * 8)(a0) + ld s1, (HYP_G_S + 1 * 8)(a0) + ld s2, (HYP_G_S + 2 * 8)(a0) + ld s3, (HYP_G_S + 3 * 8)(a0) + ld s4, (HYP_G_S + 4 * 8)(a0) + ld s5, (HYP_G_S + 5 * 8)(a0) + ld s6, (HYP_G_S + 6 * 8)(a0) + ld s7, (HYP_G_S + 7 * 8)(a0) + ld s8, (HYP_G_S + 8 * 8)(a0) + ld s9, (HYP_G_S + 9 * 8)(a0) + ld s10, (HYP_G_S + 10 * 8)(a0) + ld s11, (HYP_G_S + 11 * 8)(a0) + /* skip a0 for now. */ + ld a1, (HYP_G_A + 1 * 8)(a0) + ld a2, (HYP_G_A + 2 * 8)(a0) + ld a3, (HYP_G_A + 3 * 8)(a0) + ld a4, (HYP_G_A + 4 * 8)(a0) + ld a5, (HYP_G_A + 5 * 8)(a0) + ld a6, (HYP_G_A + 6 * 8)(a0) + ld a7, (HYP_G_A + 7 * 8)(a0) + /* now load a0. */ + ld a0, (HYP_G_A + 0 * 8)(a0) + + sret + + .align 2 +.Lswitch_return: + + csrrw a0, sscratch, a0 + sd ra, (HYP_G_RA)(a0) + sd sp, (HYP_G_SP)(a0) + sd gp, (HYP_G_GP)(a0) + sd tp, (HYP_G_TP)(a0) + sd t0, (HYP_G_T + 0 * 8)(a0) + sd t1, (HYP_G_T + 1 * 8)(a0) + sd t2, (HYP_G_T + 2 * 8)(a0) + sd t3, (HYP_G_T + 3 * 8)(a0) + sd t4, (HYP_G_T + 4 * 8)(a0) + sd t5, (HYP_G_T + 5 * 8)(a0) + sd t6, (HYP_G_T + 6 * 8)(a0) + sd s0, (HYP_G_S + 0 * 8)(a0) + sd s1, (HYP_G_S + 1 * 8)(a0) + sd s2, (HYP_G_S + 2 * 8)(a0) + sd s3, (HYP_G_S + 3 * 8)(a0) + sd s4, (HYP_G_S + 4 * 8)(a0) + sd s5, (HYP_G_S + 5 * 8)(a0) + sd s6, (HYP_G_S + 6 * 8)(a0) + sd s7, (HYP_G_S + 7 * 8)(a0) + sd s8, (HYP_G_S + 8 * 8)(a0) + sd s9, (HYP_G_S + 9 * 8)(a0) + sd s10, (HYP_G_S + 10 * 8)(a0) + sd s11, (HYP_G_S + 11 * 8)(a0) + /* skip a0 */ + sd a1, (HYP_G_A + 1 * 8)(a0) + sd a2, (HYP_G_A + 2 * 8)(a0) + sd a3, (HYP_G_A + 3 * 8)(a0) + sd a4, (HYP_G_A + 4 * 8)(a0) + sd a5, (HYP_G_A + 5 * 8)(a0) + sd a6, (HYP_G_A + 6 * 8)(a0) + sd a7, (HYP_G_A + 7 * 8)(a0) + + ld t1, (HYP_H_STVEC)(a0) + ld t2, (HYP_H_SSCRATCH)(a0) + ld t3, (HYP_H_SCOUNTEREN)(a0) + ld t4, (HYP_H_HSTATUS)(a0) + ld t5, (HYP_H_SSTATUS)(a0) + + csrr t0, sepc + csrw stvec, t1 + csrrw t2, sscratch, t2 + csrrw t3, scounteren, t3 + csrrw t4, hstatus, t4 + csrrw t5, sstatus, t5 + + sd t0, (HYP_G_SEPC)(a0) + sd t2, (HYP_G_A + 0 * 8)(a0) + sd t3, (HYP_G_SCOUNTEREN)(a0) + sd t4, (HYP_G_HSTATUS)(a0) + sd t5, (HYP_G_SSTATUS)(a0) + + ld ra, (HYP_H_RA)(a0) + ld sp, (HYP_H_SP)(a0) + ld tp, (HYP_H_TP)(a0) + ld gp, (HYP_H_GP)(a0) + ld s0, (HYP_H_S + 0 * 8)(a0) + ld s1, (HYP_H_S + 1 * 8)(a0) + ld s2, (HYP_H_S + 2 * 8)(a0) + ld s3, (HYP_H_S + 3 * 8)(a0) + ld s4, (HYP_H_S + 4 * 8)(a0) + ld s5, (HYP_H_S + 5 * 8)(a0) + ld s6, (HYP_H_S + 6 * 8)(a0) + ld s7, (HYP_H_S + 7 * 8)(a0) + ld s8, (HYP_H_S + 8 * 8)(a0) + ld s9, (HYP_H_S + 9 * 8)(a0) + ld s10, (HYP_H_S + 10 * 8)(a0) + ld s11, (HYP_H_S + 11 * 8)(a0) + + ld a1, (HYP_H_A + 1 * 8)(a0) + ld a2, (HYP_H_A + 2 * 8)(a0) + ld a3, (HYP_H_A + 3 * 8)(a0) + ld a4, (HYP_H_A + 4 * 8)(a0) + ld a5, (HYP_H_A + 5 * 8)(a0) + ld a6, (HYP_H_A + 6 * 8)(a0) + ld a7, (HYP_H_A + 7 * 8)(a0) + + ret + +END(vmm_switch) + +ENTRY(vmm_unpriv_trap) + csrr a1, sepc + sd a1, HYP_TRAP_SEPC(a0) + addi a1, a1, 4 /* Next instruction after hlvx.hu */ + csrw sepc, a1 + csrr a1, scause + sd a1, HYP_TRAP_SCAUSE(a0) + csrr a1, stval + sd a1, HYP_TRAP_STVAL(a0) + csrr a1, htval + sd a1, HYP_TRAP_HTVAL(a0) + csrr a1, htinst + sd a1, HYP_TRAP_HTINST(a0) + sret +END(vmm_unpriv_trap)