diff --git a/sys/conf/files.riscv b/sys/conf/files.riscv index 6186ae9b3371..534fe5013c56 100644 --- a/sys/conf/files.riscv +++ b/sys/conf/files.riscv @@ -1,90 +1,91 @@ cddl/dev/dtrace/riscv/dtrace_asm.S optional dtrace compile-with "${DTRACE_S}" cddl/dev/dtrace/riscv/dtrace_isa.c optional dtrace compile-with "${DTRACE_C}" cddl/dev/dtrace/riscv/dtrace_subr.c optional dtrace compile-with "${DTRACE_C}" cddl/dev/dtrace/riscv/instr_size.c optional dtrace compile-with "${DTRACE_C}" cddl/dev/fbt/riscv/fbt_isa.c optional dtrace_fbt | dtraceall compile-with "${FBT_C}" crypto/des/des_enc.c optional netsmb dev/ofw/ofw_cpu.c optional fdt dev/ofw/ofw_pcib.c optional pci fdt dev/pci/pci_dw.c optional pci fdt dev/pci/pci_dw_if.m optional pci fdt dev/pci/pci_host_generic.c optional pci dev/pci/pci_host_generic_fdt.c optional pci fdt dev/uart/uart_cpu_fdt.c optional uart fdt dev/uart/uart_dev_lowrisc.c optional uart_lowrisc dev/vmm/vmm_dev.c optional vmm dev/vmm/vmm_stat.c optional vmm dev/xilinx/axi_quad_spi.c optional xilinx_spi dev/xilinx/axidma.c optional axidma xdma dev/xilinx/if_xae.c optional xae dev/xilinx/xlnx_pcib.c optional pci fdt xlnx_pcib kern/msi_if.m standard kern/pic_if.m standard kern/subr_devmap.c standard kern/subr_dummy_vdso_tc.c standard kern/subr_intr.c standard kern/subr_physmem.c standard libkern/bcopy.c standard libkern/memcmp.c standard libkern/memset.c standard libkern/strcmp.c standard libkern/strlen.c standard libkern/strncmp.c standard riscv/riscv/aplic.c standard riscv/riscv/autoconf.c standard riscv/riscv/bus_machdep.c standard riscv/riscv/bus_space_asm.S standard riscv/riscv/busdma_bounce.c standard riscv/riscv/busdma_machdep.c standard riscv/riscv/cache.c standard riscv/riscv/clock.c standard riscv/riscv/copyinout.S standard riscv/riscv/cpufunc_asm.S standard riscv/riscv/db_disasm.c optional ddb riscv/riscv/db_interface.c optional ddb riscv/riscv/db_trace.c optional ddb riscv/riscv/dump_machdep.c standard riscv/riscv/elf_machdep.c standard riscv/riscv/exception.S standard riscv/riscv/exec_machdep.c standard riscv/riscv/fpe.c standard riscv/riscv/gdb_machdep.c optional gdb riscv/riscv/intc.c standard riscv/riscv/identcpu.c standard riscv/riscv/locore.S standard no-obj riscv/riscv/machdep.c standard riscv/riscv/minidump_machdep.c standard riscv/riscv/mp_machdep.c optional smp riscv/riscv/mem.c standard riscv/riscv/nexus.c standard riscv/riscv/ofw_machdep.c optional fdt riscv/riscv/plic.c standard riscv/riscv/pmap.c standard riscv/riscv/ptrace_machdep.c standard riscv/riscv/riscv_console.c optional rcons riscv/riscv/riscv_syscon.c optional syscon riscv_syscon fdt riscv/riscv/sigtramp.S standard riscv/riscv/sbi.c standard riscv/riscv/sbi_ipi.c optional smp riscv/riscv/sdt_machdep.c optional kdtrace_hooks riscv/riscv/stack_machdep.c optional ddb | stack riscv/riscv/support.S standard riscv/riscv/swtch.S standard riscv/riscv/sys_machdep.c standard riscv/riscv/trap.c standard riscv/riscv/timer.c standard riscv/riscv/uio_machdep.c standard riscv/riscv/unwind.c optional ddb | kdtrace_hooks | stack riscv/riscv/vm_machdep.c standard riscv/vmm/vmm.c optional vmm riscv/vmm/vmm_aplic.c optional vmm riscv/vmm/vmm_dev_machdep.c optional vmm riscv/vmm/vmm_instruction_emul.c optional vmm riscv/vmm/vmm_riscv.c optional vmm riscv/vmm/vmm_sbi.c optional vmm riscv/vmm/vmm_switch.S optional vmm +riscv/vmm/vmm_vtimer.c optional vmm riscv/thead/thead.c standard # Zstd contrib/zstd/lib/freebsd/zstd_kfreebsd.c optional zstdio compile-with ${ZSTD_C} diff --git a/sys/modules/vmm/Makefile b/sys/modules/vmm/Makefile index 0ec1147d0d4b..7c34dd92939b 100644 --- a/sys/modules/vmm/Makefile +++ b/sys/modules/vmm/Makefile @@ -1,161 +1,162 @@ .include KMOD= vmm .if ${MACHINE_CPUARCH} == "amd64" .endif SRCS+= acpi_if.h bus_if.h device_if.h pci_if.h pcib_if.h vnode_if.h CFLAGS+= -DVMM_KEEP_STATS CFLAGS+= -I${SRCTOP}/sys/${MACHINE}/vmm # generic vmm support .PATH: ${SRCTOP}/sys/dev/vmm ${SRCTOP}/sys/${MACHINE}/vmm SRCS+= vmm.c \ vmm_dev.c \ vmm_dev_machdep.c \ vmm_instruction_emul.c \ vmm_stat.c .if ${MACHINE_CPUARCH} == "aarch64" CFLAGS+= -I${SRCTOP}/sys/${MACHINE}/vmm/io DPSRCS+= assym.inc # TODO: Add the new EL2 code SRCS+= vmm_arm64.c \ vmm_reset.c \ vmm_call.S \ vmm_handlers.c \ vmm_mmu.c \ vmm_vhe_exception.S \ vmm_vhe.c \ vmm_hyp_el2.S .PATH: ${SRCTOP}/sys/${MACHINE}/vmm/io SRCS+= vgic.c \ vgic_if.h \ vgic_if.c \ vgic_v3.c \ vtimer.c CLEANFILES+= vmm_nvhe_exception.o vmm_nvhe.o CLEANFILES+= vmm_hyp_blob.elf.full CLEANFILES+= vmm_hyp_blob.elf vmm_hyp_blob.bin vmm_nvhe_exception.o: vmm_nvhe_exception.S ${CC} -c -x assembler-with-cpp -DLOCORE \ ${NOSAN_CFLAGS:N-mbranch-protection*} ${.IMPSRC} -o ${.TARGET} -fpie vmm_nvhe.o: vmm_nvhe.c ${CC} -c ${NOSAN_CFLAGS:N-mbranch-protection*} ${.IMPSRC} \ -o ${.TARGET} -fpie vmm_hyp_blob.elf.full: vmm_nvhe_exception.o vmm_nvhe.o ${LD} -m ${LD_EMULATION} -Bdynamic -L ${SYSDIR}/conf -T ${SYSDIR}/conf/ldscript.arm64 \ ${_LDFLAGS:N-zbti-report*} --no-warn-mismatch --warn-common --export-dynamic \ --dynamic-linker /red/herring -X -o ${.TARGET} ${.ALLSRC} \ --defsym=_start='0x0' --defsym=text_start='0x0' vmm_hyp_blob.elf: vmm_hyp_blob.elf.full ${OBJCOPY} --strip-debug ${.ALLSRC} ${.TARGET} vmm_hyp_blob.bin: vmm_hyp_blob.elf ${OBJCOPY} --output-target=binary ${.ALLSRC} ${.TARGET} vmm_hyp_el2.o: vmm_hyp_blob.bin .elif ${MACHINE_CPUARCH} == "amd64" CFLAGS+= -I${SRCTOP}/sys/${MACHINE}/vmm/io DPSRCS+= vmx_assym.h svm_assym.h DPSRCS+= vmx_genassym.c svm_genassym.c offset.inc CFLAGS+= -I${SRCTOP}/sys/amd64/vmm/intel CFLAGS+= -I${SRCTOP}/sys/amd64/vmm/amd SRCS+= opt_acpi.h \ opt_bhyve_snapshot.h \ opt_ddb.h SRCS+= vmm_host.c \ vmm_ioport.c \ vmm_lapic.c \ vmm_mem.c \ vmm_util.c \ x86.c .PATH: ${SRCTOP}/sys/${MACHINE}/vmm/io SRCS+= iommu.c \ ppt.c \ vatpic.c \ vatpit.c \ vhpet.c \ vioapic.c \ vlapic.c \ vpmtmr.c \ vrtc.c # intel-specific files .PATH: ${SRCTOP}/sys/amd64/vmm/intel SRCS+= ept.c \ vmcs.c \ vmx_msr.c \ vmx_support.S \ vmx.c \ vtd.c # amd-specific files .PATH: ${SRCTOP}/sys/amd64/vmm/amd SRCS+= vmcb.c \ amdviiommu.c \ ivhd_if.c \ ivhd_if.h \ svm.c \ svm_support.S \ npt.c \ ivrs_drv.c \ amdvi_hw.c \ svm_msr.c SRCS.BHYVE_SNAPSHOT= vmm_snapshot.c CLEANFILES+= vmx_assym.h vmx_genassym.o svm_assym.h svm_genassym.o OBJS_DEPEND_GUESS.vmx_support.o+= vmx_assym.h OBJS_DEPEND_GUESS.svm_support.o+= svm_assym.h vmx_assym.h: vmx_genassym.o sh ${SYSDIR}/kern/genassym.sh vmx_genassym.o > ${.TARGET} svm_assym.h: svm_genassym.o sh ${SYSDIR}/kern/genassym.sh svm_genassym.o > ${.TARGET} vmx_support.o: ${CC} -c -x assembler-with-cpp -DLOCORE ${CFLAGS} \ ${.IMPSRC} -o ${.TARGET} svm_support.o: ${CC} -c -x assembler-with-cpp -DLOCORE ${CFLAGS} \ ${.IMPSRC} -o ${.TARGET} hyp_genassym.o: offset.inc ${CC} -c ${NOSAN_CFLAGS:N-flto:N-fno-common} -fcommon ${.IMPSRC} vmx_genassym.o: offset.inc ${CC} -c ${NOSAN_CFLAGS:N-flto*:N-fno-common} -fcommon ${.IMPSRC} svm_genassym.o: offset.inc ${CC} -c ${NOSAN_CFLAGS:N-flto*:N-fno-common} -fcommon ${.IMPSRC} .elif ${MACHINE_CPUARCH} == "riscv" SRCS+= vmm_aplic.c \ vmm_riscv.c \ vmm_sbi.c \ - vmm_switch.S + vmm_switch.S \ + vmm_vtimer.c .endif .include diff --git a/sys/riscv/vmm/riscv.h b/sys/riscv/vmm/riscv.h index ed4b65003f94..f3665d33a386 100644 --- a/sys/riscv/vmm/riscv.h +++ b/sys/riscv/vmm/riscv.h @@ -1,132 +1,137 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2015 Mihai Carabas * Copyright (c) 2024 Ruslan Bukin * * This software was developed by the University of Cambridge Computer * Laboratory (Department of Computer Science and Technology) under Innovate * UK project 105694, "Digital Security by Design (DSbD) Technology Platform * Prototype". * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #ifndef _VMM_RISCV_H_ #define _VMM_RISCV_H_ #include #include #include +#include + struct hypregs { uint64_t hyp_ra; uint64_t hyp_sp; uint64_t hyp_gp; uint64_t hyp_tp; uint64_t hyp_t[7]; uint64_t hyp_s[12]; uint64_t hyp_a[8]; uint64_t hyp_sepc; uint64_t hyp_sstatus; uint64_t hyp_hstatus; }; struct hypcsr { uint64_t hvip; uint64_t vsstatus; uint64_t vsie; uint64_t vstvec; uint64_t vsscratch; uint64_t vsepc; uint64_t vscause; uint64_t vstval; uint64_t vsatp; uint64_t scounteren; uint64_t senvcfg; }; struct hypctx { struct hypregs host_regs; struct hypregs guest_regs; struct hypcsr guest_csrs; uint64_t host_sscratch; uint64_t host_stvec; uint64_t host_scounteren; uint64_t guest_scounteren; struct hyp *hyp; struct vcpu *vcpu; bool has_exception; int cpu_id; int ipi_pending; + int interrupts_pending; + struct vtimer vtimer; }; struct hyp { struct vm *vm; uint64_t vmid_generation; bool aplic_attached; struct aplic *aplic; struct hypctx *ctx[]; }; struct hyptrap { uint64_t sepc; uint64_t scause; uint64_t stval; uint64_t htval; uint64_t htinst; }; #define DEFINE_VMMOPS_IFUNC(ret_type, opname, args) \ ret_type vmmops_##opname args; DEFINE_VMMOPS_IFUNC(int, modinit, (void)) DEFINE_VMMOPS_IFUNC(int, modcleanup, (void)) DEFINE_VMMOPS_IFUNC(void *, init, (struct vm *vm, struct pmap *pmap)) DEFINE_VMMOPS_IFUNC(int, gla2gpa, (void *vcpui, struct vm_guest_paging *paging, uint64_t gla, int prot, uint64_t *gpa, int *is_fault)) DEFINE_VMMOPS_IFUNC(int, run, (void *vcpui, register_t pc, struct pmap *pmap, struct vm_eventinfo *info)) DEFINE_VMMOPS_IFUNC(void, cleanup, (void *vmi)) DEFINE_VMMOPS_IFUNC(void *, vcpu_init, (void *vmi, struct vcpu *vcpu, int vcpu_id)) DEFINE_VMMOPS_IFUNC(void, vcpu_cleanup, (void *vcpui)) DEFINE_VMMOPS_IFUNC(int, exception, (void *vcpui, uint64_t scause)) DEFINE_VMMOPS_IFUNC(int, getreg, (void *vcpui, int num, uint64_t *retval)) DEFINE_VMMOPS_IFUNC(int, setreg, (void *vcpui, int num, uint64_t val)) DEFINE_VMMOPS_IFUNC(int, getcap, (void *vcpui, int num, int *retval)) DEFINE_VMMOPS_IFUNC(int, setcap, (void *vcpui, int num, int val)) DEFINE_VMMOPS_IFUNC(struct vmspace *, vmspace_alloc, (vm_offset_t min, vm_offset_t max)) DEFINE_VMMOPS_IFUNC(void, vmspace_free, (struct vmspace *vmspace)) #define dprintf(fmt, ...) struct hypctx *riscv_get_active_vcpu(void); void vmm_switch(struct hypctx *); void vmm_unpriv_trap(struct hyptrap *, uint64_t tmp); int vmm_sbi_ecall(struct vcpu *, bool *); void riscv_send_ipi(struct hypctx *hypctx, int hart_id); int riscv_check_ipi(struct hypctx *hypctx, bool clear); +bool riscv_check_interrupts_pending(struct hypctx *hypctx); #endif /* !_VMM_RISCV_H_ */ diff --git a/sys/riscv/vmm/vmm.c b/sys/riscv/vmm/vmm.c index 33a0cb5fe420..0596e0de2e43 100644 --- a/sys/riscv/vmm/vmm.c +++ b/sys/riscv/vmm/vmm.c @@ -1,1612 +1,1615 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2015 Mihai Carabas * Copyright (c) 2024 Ruslan Bukin * * This software was developed by the University of Cambridge Computer * Laboratory (Department of Computer Science and Technology) under Innovate * UK project 105694, "Digital Security by Design (DSbD) Technology Platform * Prototype". * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "vmm_stat.h" #include "riscv.h" #include "vmm_aplic.h" struct vcpu { int flags; enum vcpu_state state; struct mtx mtx; int hostcpu; /* host cpuid this vcpu last ran on */ int vcpuid; void *stats; struct vm_exit exitinfo; uint64_t nextpc; /* (x) next instruction to execute */ struct vm *vm; /* (o) */ void *cookie; /* (i) cpu-specific data */ struct fpreg *guestfpu; /* (a,i) guest fpu state */ }; #define vcpu_lock_initialized(v) mtx_initialized(&((v)->mtx)) #define vcpu_lock_init(v) mtx_init(&((v)->mtx), "vcpu lock", 0, MTX_SPIN) #define vcpu_lock_destroy(v) mtx_destroy(&((v)->mtx)) #define vcpu_lock(v) mtx_lock_spin(&((v)->mtx)) #define vcpu_unlock(v) mtx_unlock_spin(&((v)->mtx)) #define vcpu_assert_locked(v) mtx_assert(&((v)->mtx), MA_OWNED) struct mem_seg { uint64_t gpa; size_t len; bool wired; bool sysmem; vm_object_t object; }; #define VM_MAX_MEMSEGS 3 struct mem_map { vm_paddr_t gpa; size_t len; vm_ooffset_t segoff; int segid; int prot; int flags; }; #define VM_MAX_MEMMAPS 4 struct vmm_mmio_region { uint64_t start; uint64_t end; mem_region_read_t read; mem_region_write_t write; }; #define VM_MAX_MMIO_REGIONS 4 /* * Initialization: * (o) initialized the first time the VM is created * (i) initialized when VM is created and when it is reinitialized * (x) initialized before use */ struct vm { void *cookie; /* (i) cpu-specific data */ volatile cpuset_t active_cpus; /* (i) active vcpus */ volatile cpuset_t debug_cpus; /* (i) vcpus stopped for debug*/ int suspend; /* (i) stop VM execution */ bool dying; /* (o) is dying */ volatile cpuset_t suspended_cpus; /* (i) suspended vcpus */ volatile cpuset_t halted_cpus; /* (x) cpus in a hard halt */ struct mem_map mem_maps[VM_MAX_MEMMAPS]; /* (i) guest address space */ struct mem_seg mem_segs[VM_MAX_MEMSEGS]; /* (o) guest memory regions */ struct vmspace *vmspace; /* (o) guest's address space */ char name[VM_MAX_NAMELEN]; /* (o) virtual machine name */ struct vcpu **vcpu; /* (i) guest vcpus */ struct vmm_mmio_region mmio_region[VM_MAX_MMIO_REGIONS]; /* (o) guest MMIO regions */ /* The following describe the vm cpu topology */ uint16_t sockets; /* (o) num of sockets */ uint16_t cores; /* (o) num of cores/socket */ uint16_t threads; /* (o) num of threads/core */ uint16_t maxcpus; /* (o) max pluggable cpus */ struct sx mem_segs_lock; /* (o) */ struct sx vcpus_init_lock; /* (o) */ }; static bool vmm_initialized = false; static MALLOC_DEFINE(M_VMM, "vmm", "vmm"); /* statistics */ static VMM_STAT(VCPU_TOTAL_RUNTIME, "vcpu total runtime"); SYSCTL_NODE(_hw, OID_AUTO, vmm, CTLFLAG_RW, NULL, NULL); static int vmm_ipinum; SYSCTL_INT(_hw_vmm, OID_AUTO, ipinum, CTLFLAG_RD, &vmm_ipinum, 0, "IPI vector used for vcpu notifications"); u_int vm_maxcpu; SYSCTL_UINT(_hw_vmm, OID_AUTO, maxcpu, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, &vm_maxcpu, 0, "Maximum number of vCPUs"); static void vm_free_memmap(struct vm *vm, int ident); static bool sysmem_mapping(struct vm *vm, struct mem_map *mm); static void vcpu_notify_event_locked(struct vcpu *vcpu); /* global statistics */ VMM_STAT(VMEXIT_COUNT, "total number of vm exits"); VMM_STAT(VMEXIT_IRQ, "number of vmexits for an irq"); VMM_STAT(VMEXIT_UNHANDLED, "number of vmexits for an unhandled exception"); /* * Upper limit on vm_maxcpu. We could increase this to 28 bits, but this * is a safe value for now. */ #define VM_MAXCPU MIN(0xffff - 1, CPU_SETSIZE) static void vcpu_cleanup(struct vcpu *vcpu, bool destroy) { vmmops_vcpu_cleanup(vcpu->cookie); vcpu->cookie = NULL; if (destroy) { vmm_stat_free(vcpu->stats); fpu_save_area_free(vcpu->guestfpu); vcpu_lock_destroy(vcpu); } } static struct vcpu * vcpu_alloc(struct vm *vm, int vcpu_id) { struct vcpu *vcpu; KASSERT(vcpu_id >= 0 && vcpu_id < vm->maxcpus, ("vcpu_alloc: invalid vcpu %d", vcpu_id)); vcpu = malloc(sizeof(*vcpu), M_VMM, M_WAITOK | M_ZERO); vcpu_lock_init(vcpu); vcpu->state = VCPU_IDLE; vcpu->hostcpu = NOCPU; vcpu->vcpuid = vcpu_id; vcpu->vm = vm; vcpu->guestfpu = fpu_save_area_alloc(); vcpu->stats = vmm_stat_alloc(); return (vcpu); } static void vcpu_init(struct vcpu *vcpu) { vcpu->cookie = vmmops_vcpu_init(vcpu->vm->cookie, vcpu, vcpu->vcpuid); MPASS(vcpu->cookie != NULL); fpu_save_area_reset(vcpu->guestfpu); vmm_stat_init(vcpu->stats); } struct vm_exit * vm_exitinfo(struct vcpu *vcpu) { return (&vcpu->exitinfo); } static int vmm_init(void) { vm_maxcpu = mp_ncpus; TUNABLE_INT_FETCH("hw.vmm.maxcpu", &vm_maxcpu); if (vm_maxcpu > VM_MAXCPU) { printf("vmm: vm_maxcpu clamped to %u\n", VM_MAXCPU); vm_maxcpu = VM_MAXCPU; } if (vm_maxcpu == 0) vm_maxcpu = 1; return (vmmops_modinit()); } static int vmm_handler(module_t mod, int what, void *arg) { int error; switch (what) { case MOD_LOAD: /* TODO: check if has_hyp here? */ error = vmmdev_init(); if (error != 0) break; error = vmm_init(); if (error == 0) vmm_initialized = true; break; case MOD_UNLOAD: /* TODO: check if has_hyp here? */ error = vmmdev_cleanup(); if (error == 0 && vmm_initialized) { error = vmmops_modcleanup(); if (error) vmm_initialized = false; } break; default: error = 0; break; } return (error); } static moduledata_t vmm_kmod = { "vmm", vmm_handler, NULL }; /* * vmm initialization has the following dependencies: * * - vmm device initialization requires an initialized devfs. */ DECLARE_MODULE(vmm, vmm_kmod, SI_SUB_DEVFS + 1, SI_ORDER_ANY); MODULE_VERSION(vmm, 1); static void vm_init(struct vm *vm, bool create) { int i; vm->cookie = vmmops_init(vm, vmspace_pmap(vm->vmspace)); MPASS(vm->cookie != NULL); CPU_ZERO(&vm->active_cpus); CPU_ZERO(&vm->debug_cpus); vm->suspend = 0; CPU_ZERO(&vm->suspended_cpus); memset(vm->mmio_region, 0, sizeof(vm->mmio_region)); if (!create) { for (i = 0; i < vm->maxcpus; i++) { if (vm->vcpu[i] != NULL) vcpu_init(vm->vcpu[i]); } } } void vm_disable_vcpu_creation(struct vm *vm) { sx_xlock(&vm->vcpus_init_lock); vm->dying = true; sx_xunlock(&vm->vcpus_init_lock); } struct vcpu * vm_alloc_vcpu(struct vm *vm, int vcpuid) { struct vcpu *vcpu; if (vcpuid < 0 || vcpuid >= vm_get_maxcpus(vm)) return (NULL); /* Some interrupt controllers may have a CPU limit */ if (vcpuid >= aplic_max_cpu_count(vm->cookie)) return (NULL); vcpu = (struct vcpu *) atomic_load_acq_ptr((uintptr_t *)&vm->vcpu[vcpuid]); if (__predict_true(vcpu != NULL)) return (vcpu); sx_xlock(&vm->vcpus_init_lock); vcpu = vm->vcpu[vcpuid]; if (vcpu == NULL && !vm->dying) { vcpu = vcpu_alloc(vm, vcpuid); vcpu_init(vcpu); /* * Ensure vCPU is fully created before updating pointer * to permit unlocked reads above. */ atomic_store_rel_ptr((uintptr_t *)&vm->vcpu[vcpuid], (uintptr_t)vcpu); } sx_xunlock(&vm->vcpus_init_lock); return (vcpu); } void vm_slock_vcpus(struct vm *vm) { sx_slock(&vm->vcpus_init_lock); } void vm_unlock_vcpus(struct vm *vm) { sx_unlock(&vm->vcpus_init_lock); } int vm_create(const char *name, struct vm **retvm) { struct vm *vm; struct vmspace *vmspace; /* * If vmm.ko could not be successfully initialized then don't attempt * to create the virtual machine. */ if (!vmm_initialized) return (ENXIO); if (name == NULL || strlen(name) >= VM_MAX_NAMELEN) return (EINVAL); vmspace = vmmops_vmspace_alloc(0, 1ul << 39); if (vmspace == NULL) return (ENOMEM); vm = malloc(sizeof(struct vm), M_VMM, M_WAITOK | M_ZERO); strcpy(vm->name, name); vm->vmspace = vmspace; sx_init(&vm->mem_segs_lock, "vm mem_segs"); sx_init(&vm->vcpus_init_lock, "vm vcpus"); vm->sockets = 1; vm->cores = 1; /* XXX backwards compatibility */ vm->threads = 1; /* XXX backwards compatibility */ vm->maxcpus = vm_maxcpu; vm->vcpu = malloc(sizeof(*vm->vcpu) * vm->maxcpus, M_VMM, M_WAITOK | M_ZERO); vm_init(vm, true); *retvm = vm; return (0); } void vm_get_topology(struct vm *vm, uint16_t *sockets, uint16_t *cores, uint16_t *threads, uint16_t *maxcpus) { *sockets = vm->sockets; *cores = vm->cores; *threads = vm->threads; *maxcpus = vm->maxcpus; } uint16_t vm_get_maxcpus(struct vm *vm) { return (vm->maxcpus); } int vm_set_topology(struct vm *vm, uint16_t sockets, uint16_t cores, uint16_t threads, uint16_t maxcpus) { /* Ignore maxcpus. */ if ((sockets * cores * threads) > vm->maxcpus) return (EINVAL); vm->sockets = sockets; vm->cores = cores; vm->threads = threads; return(0); } static void vm_cleanup(struct vm *vm, bool destroy) { struct mem_map *mm; int i; aplic_detach_from_vm(vm->cookie); for (i = 0; i < vm->maxcpus; i++) { if (vm->vcpu[i] != NULL) vcpu_cleanup(vm->vcpu[i], destroy); } vmmops_cleanup(vm->cookie); /* * System memory is removed from the guest address space only when * the VM is destroyed. This is because the mapping remains the same * across VM reset. * * Device memory can be relocated by the guest (e.g. using PCI BARs) * so those mappings are removed on a VM reset. */ if (!destroy) { for (i = 0; i < VM_MAX_MEMMAPS; i++) { mm = &vm->mem_maps[i]; if (destroy || !sysmem_mapping(vm, mm)) vm_free_memmap(vm, i); } } if (destroy) { for (i = 0; i < VM_MAX_MEMSEGS; i++) vm_free_memseg(vm, i); vmmops_vmspace_free(vm->vmspace); vm->vmspace = NULL; for (i = 0; i < vm->maxcpus; i++) free(vm->vcpu[i], M_VMM); free(vm->vcpu, M_VMM); sx_destroy(&vm->vcpus_init_lock); sx_destroy(&vm->mem_segs_lock); } } void vm_destroy(struct vm *vm) { vm_cleanup(vm, true); free(vm, M_VMM); } int vm_reinit(struct vm *vm) { int error; /* * A virtual machine can be reset only if all vcpus are suspended. */ if (CPU_CMP(&vm->suspended_cpus, &vm->active_cpus) == 0) { vm_cleanup(vm, false); vm_init(vm, false); error = 0; } else { error = EBUSY; } return (error); } const char * vm_name(struct vm *vm) { return (vm->name); } void vm_slock_memsegs(struct vm *vm) { sx_slock(&vm->mem_segs_lock); } void vm_xlock_memsegs(struct vm *vm) { sx_xlock(&vm->mem_segs_lock); } void vm_unlock_memsegs(struct vm *vm) { sx_unlock(&vm->mem_segs_lock); } /* * Return 'true' if 'gpa' is allocated in the guest address space. * * This function is called in the context of a running vcpu which acts as * an implicit lock on 'vm->mem_maps[]'. */ bool vm_mem_allocated(struct vcpu *vcpu, vm_paddr_t gpa) { struct vm *vm = vcpu->vm; struct mem_map *mm; int i; #ifdef INVARIANTS int hostcpu, state; state = vcpu_get_state(vcpu, &hostcpu); KASSERT(state == VCPU_RUNNING && hostcpu == curcpu, ("%s: invalid vcpu state %d/%d", __func__, state, hostcpu)); #endif for (i = 0; i < VM_MAX_MEMMAPS; i++) { mm = &vm->mem_maps[i]; if (mm->len != 0 && gpa >= mm->gpa && gpa < mm->gpa + mm->len) return (true); /* 'gpa' is sysmem or devmem */ } return (false); } int vm_alloc_memseg(struct vm *vm, int ident, size_t len, bool sysmem) { struct mem_seg *seg; vm_object_t obj; sx_assert(&vm->mem_segs_lock, SX_XLOCKED); if (ident < 0 || ident >= VM_MAX_MEMSEGS) return (EINVAL); if (len == 0 || (len & PAGE_MASK)) return (EINVAL); seg = &vm->mem_segs[ident]; if (seg->object != NULL) { if (seg->len == len && seg->sysmem == sysmem) return (EEXIST); else return (EINVAL); } obj = vm_object_allocate(OBJT_DEFAULT, len >> PAGE_SHIFT); if (obj == NULL) return (ENOMEM); seg->len = len; seg->object = obj; seg->sysmem = sysmem; return (0); } int vm_get_memseg(struct vm *vm, int ident, size_t *len, bool *sysmem, vm_object_t *objptr) { struct mem_seg *seg; sx_assert(&vm->mem_segs_lock, SX_LOCKED); if (ident < 0 || ident >= VM_MAX_MEMSEGS) return (EINVAL); seg = &vm->mem_segs[ident]; if (len) *len = seg->len; if (sysmem) *sysmem = seg->sysmem; if (objptr) *objptr = seg->object; return (0); } void vm_free_memseg(struct vm *vm, int ident) { struct mem_seg *seg; KASSERT(ident >= 0 && ident < VM_MAX_MEMSEGS, ("%s: invalid memseg ident %d", __func__, ident)); seg = &vm->mem_segs[ident]; if (seg->object != NULL) { vm_object_deallocate(seg->object); bzero(seg, sizeof(struct mem_seg)); } } int vm_mmap_memseg(struct vm *vm, vm_paddr_t gpa, int segid, vm_ooffset_t first, size_t len, int prot, int flags) { struct mem_seg *seg; struct mem_map *m, *map; vm_ooffset_t last; int i, error; dprintf("%s: gpa %lx first %lx len %lx\n", __func__, gpa, first, len); if (prot == 0 || (prot & ~(VM_PROT_ALL)) != 0) return (EINVAL); if (flags & ~VM_MEMMAP_F_WIRED) return (EINVAL); if (segid < 0 || segid >= VM_MAX_MEMSEGS) return (EINVAL); seg = &vm->mem_segs[segid]; if (seg->object == NULL) return (EINVAL); last = first + len; if (first < 0 || first >= last || last > seg->len) return (EINVAL); if ((gpa | first | last) & PAGE_MASK) return (EINVAL); map = NULL; for (i = 0; i < VM_MAX_MEMMAPS; i++) { m = &vm->mem_maps[i]; if (m->len == 0) { map = m; break; } } if (map == NULL) return (ENOSPC); error = vm_map_find(&vm->vmspace->vm_map, seg->object, first, &gpa, len, 0, VMFS_NO_SPACE, prot, prot, 0); if (error != KERN_SUCCESS) return (EFAULT); vm_object_reference(seg->object); if (flags & VM_MEMMAP_F_WIRED) { error = vm_map_wire(&vm->vmspace->vm_map, gpa, gpa + len, VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES); if (error != KERN_SUCCESS) { vm_map_remove(&vm->vmspace->vm_map, gpa, gpa + len); return (error == KERN_RESOURCE_SHORTAGE ? ENOMEM : EFAULT); } } map->gpa = gpa; map->len = len; map->segoff = first; map->segid = segid; map->prot = prot; map->flags = flags; return (0); } int vm_munmap_memseg(struct vm *vm, vm_paddr_t gpa, size_t len) { struct mem_map *m; int i; dprintf("%s: gpa %lx len %lx\n", __func__, gpa, len); for (i = 0; i < VM_MAX_MEMMAPS; i++) { m = &vm->mem_maps[i]; if (m->gpa == gpa && m->len == len) { vm_free_memmap(vm, i); return (0); } } return (EINVAL); } int vm_mmap_getnext(struct vm *vm, vm_paddr_t *gpa, int *segid, vm_ooffset_t *segoff, size_t *len, int *prot, int *flags) { struct mem_map *mm, *mmnext; int i; mmnext = NULL; for (i = 0; i < VM_MAX_MEMMAPS; i++) { mm = &vm->mem_maps[i]; if (mm->len == 0 || mm->gpa < *gpa) continue; if (mmnext == NULL || mm->gpa < mmnext->gpa) mmnext = mm; } if (mmnext != NULL) { *gpa = mmnext->gpa; if (segid) *segid = mmnext->segid; if (segoff) *segoff = mmnext->segoff; if (len) *len = mmnext->len; if (prot) *prot = mmnext->prot; if (flags) *flags = mmnext->flags; return (0); } else { return (ENOENT); } } static void vm_free_memmap(struct vm *vm, int ident) { struct mem_map *mm; int error __diagused; mm = &vm->mem_maps[ident]; if (mm->len) { error = vm_map_remove(&vm->vmspace->vm_map, mm->gpa, mm->gpa + mm->len); KASSERT(error == KERN_SUCCESS, ("%s: vm_map_remove error %d", __func__, error)); bzero(mm, sizeof(struct mem_map)); } } static __inline bool sysmem_mapping(struct vm *vm, struct mem_map *mm) { if (mm->len != 0 && vm->mem_segs[mm->segid].sysmem) return (true); else return (false); } vm_paddr_t vmm_sysmem_maxaddr(struct vm *vm) { struct mem_map *mm; vm_paddr_t maxaddr; int i; maxaddr = 0; for (i = 0; i < VM_MAX_MEMMAPS; i++) { mm = &vm->mem_maps[i]; if (sysmem_mapping(vm, mm)) { if (maxaddr < mm->gpa + mm->len) maxaddr = mm->gpa + mm->len; } } return (maxaddr); } int vm_gla2gpa_nofault(struct vcpu *vcpu, struct vm_guest_paging *paging, uint64_t gla, int prot, uint64_t *gpa, int *is_fault) { int error; error = vmmops_gla2gpa(vcpu->cookie, paging, gla, prot, gpa, is_fault); return (error); } void vm_register_inst_handler(struct vm *vm, uint64_t start, uint64_t size, mem_region_read_t mmio_read, mem_region_write_t mmio_write) { int i; for (i = 0; i < nitems(vm->mmio_region); i++) { if (vm->mmio_region[i].start == 0 && vm->mmio_region[i].end == 0) { vm->mmio_region[i].start = start; vm->mmio_region[i].end = start + size; vm->mmio_region[i].read = mmio_read; vm->mmio_region[i].write = mmio_write; return; } } panic("%s: No free MMIO region", __func__); } void vm_deregister_inst_handler(struct vm *vm, uint64_t start, uint64_t size) { int i; for (i = 0; i < nitems(vm->mmio_region); i++) { if (vm->mmio_region[i].start == start && vm->mmio_region[i].end == start + size) { memset(&vm->mmio_region[i], 0, sizeof(vm->mmio_region[i])); return; } } panic("%s: Invalid MMIO region: %lx - %lx", __func__, start, start + size); } static int vm_handle_inst_emul(struct vcpu *vcpu, bool *retu) { struct vm *vm; struct vm_exit *vme; struct vie *vie; struct hyp *hyp; uint64_t fault_ipa; struct vm_guest_paging *paging; struct vmm_mmio_region *vmr; int error, i; vm = vcpu->vm; hyp = vm->cookie; if (!hyp->aplic_attached) goto out_user; vme = &vcpu->exitinfo; vie = &vme->u.inst_emul.vie; paging = &vme->u.inst_emul.paging; fault_ipa = vme->u.inst_emul.gpa; vmr = NULL; for (i = 0; i < nitems(vm->mmio_region); i++) { if (vm->mmio_region[i].start <= fault_ipa && vm->mmio_region[i].end > fault_ipa) { vmr = &vm->mmio_region[i]; break; } } if (vmr == NULL) goto out_user; error = vmm_emulate_instruction(vcpu, fault_ipa, vie, paging, vmr->read, vmr->write, retu); return (error); out_user: *retu = true; return (0); } int vm_suspend(struct vm *vm, enum vm_suspend_how how) { int i; if (how <= VM_SUSPEND_NONE || how >= VM_SUSPEND_LAST) return (EINVAL); if (atomic_cmpset_int(&vm->suspend, 0, how) == 0) { VM_CTR2(vm, "virtual machine already suspended %d/%d", vm->suspend, how); return (EALREADY); } VM_CTR1(vm, "virtual machine successfully suspended %d", how); /* * Notify all active vcpus that they are now suspended. */ for (i = 0; i < vm->maxcpus; i++) { if (CPU_ISSET(i, &vm->active_cpus)) vcpu_notify_event(vm_vcpu(vm, i)); } return (0); } void vm_exit_suspended(struct vcpu *vcpu, uint64_t pc) { struct vm *vm = vcpu->vm; struct vm_exit *vmexit; KASSERT(vm->suspend > VM_SUSPEND_NONE && vm->suspend < VM_SUSPEND_LAST, ("vm_exit_suspended: invalid suspend type %d", vm->suspend)); vmexit = vm_exitinfo(vcpu); vmexit->pc = pc; vmexit->inst_length = 4; vmexit->exitcode = VM_EXITCODE_SUSPENDED; vmexit->u.suspended.how = vm->suspend; } void vm_exit_debug(struct vcpu *vcpu, uint64_t pc) { struct vm_exit *vmexit; vmexit = vm_exitinfo(vcpu); vmexit->pc = pc; vmexit->inst_length = 4; vmexit->exitcode = VM_EXITCODE_DEBUG; } int vm_activate_cpu(struct vcpu *vcpu) { struct vm *vm = vcpu->vm; if (CPU_ISSET(vcpu->vcpuid, &vm->active_cpus)) return (EBUSY); CPU_SET_ATOMIC(vcpu->vcpuid, &vm->active_cpus); return (0); } int vm_suspend_cpu(struct vm *vm, struct vcpu *vcpu) { if (vcpu == NULL) { vm->debug_cpus = vm->active_cpus; for (int i = 0; i < vm->maxcpus; i++) { if (CPU_ISSET(i, &vm->active_cpus)) vcpu_notify_event(vm_vcpu(vm, i)); } } else { if (!CPU_ISSET(vcpu->vcpuid, &vm->active_cpus)) return (EINVAL); CPU_SET_ATOMIC(vcpu->vcpuid, &vm->debug_cpus); vcpu_notify_event(vcpu); } return (0); } int vm_resume_cpu(struct vm *vm, struct vcpu *vcpu) { if (vcpu == NULL) { CPU_ZERO(&vm->debug_cpus); } else { if (!CPU_ISSET(vcpu->vcpuid, &vm->debug_cpus)) return (EINVAL); CPU_CLR_ATOMIC(vcpu->vcpuid, &vm->debug_cpus); } return (0); } int vcpu_debugged(struct vcpu *vcpu) { return (CPU_ISSET(vcpu->vcpuid, &vcpu->vm->debug_cpus)); } cpuset_t vm_active_cpus(struct vm *vm) { return (vm->active_cpus); } cpuset_t vm_debug_cpus(struct vm *vm) { return (vm->debug_cpus); } cpuset_t vm_suspended_cpus(struct vm *vm) { return (vm->suspended_cpus); } void * vcpu_stats(struct vcpu *vcpu) { return (vcpu->stats); } /* * This function is called to ensure that a vcpu "sees" a pending event * as soon as possible: * - If the vcpu thread is sleeping then it is woken up. * - If the vcpu is running on a different host_cpu then an IPI will be directed * to the host_cpu to cause the vcpu to trap into the hypervisor. */ static void vcpu_notify_event_locked(struct vcpu *vcpu) { int hostcpu; hostcpu = vcpu->hostcpu; if (vcpu->state == VCPU_RUNNING) { KASSERT(hostcpu != NOCPU, ("vcpu running on invalid hostcpu")); if (hostcpu != curcpu) { ipi_cpu(hostcpu, vmm_ipinum); } else { /* * If the 'vcpu' is running on 'curcpu' then it must * be sending a notification to itself (e.g. SELF_IPI). * The pending event will be picked up when the vcpu * transitions back to guest context. */ } } else { KASSERT(hostcpu == NOCPU, ("vcpu state %d not consistent " "with hostcpu %d", vcpu->state, hostcpu)); if (vcpu->state == VCPU_SLEEPING) wakeup_one(vcpu); } } void vcpu_notify_event(struct vcpu *vcpu) { vcpu_lock(vcpu); vcpu_notify_event_locked(vcpu); vcpu_unlock(vcpu); } static void restore_guest_fpustate(struct vcpu *vcpu) { /* Flush host state to the pcb. */ fpe_state_save(curthread); /* Ensure the VFP state will be re-loaded when exiting the guest. */ PCPU_SET(fpcurthread, NULL); /* restore guest FPU state */ fpe_enable(); fpe_restore(vcpu->guestfpu); /* * The FPU is now "dirty" with the guest's state so turn on emulation * to trap any access to the FPU by the host. */ fpe_disable(); } static void save_guest_fpustate(struct vcpu *vcpu) { /* Save guest FPE state. */ fpe_enable(); fpe_store(vcpu->guestfpu); fpe_disable(); KASSERT(PCPU_GET(fpcurthread) == NULL, ("%s: fpcurthread set with guest registers", __func__)); } static int vcpu_set_state_locked(struct vcpu *vcpu, enum vcpu_state newstate, bool from_idle) { int error; vcpu_assert_locked(vcpu); /* * State transitions from the vmmdev_ioctl() must always begin from * the VCPU_IDLE state. This guarantees that there is only a single * ioctl() operating on a vcpu at any point. */ if (from_idle) { while (vcpu->state != VCPU_IDLE) { vcpu_notify_event_locked(vcpu); msleep_spin(&vcpu->state, &vcpu->mtx, "vmstat", hz / 1000); } } else { KASSERT(vcpu->state != VCPU_IDLE, ("invalid transition from " "vcpu idle state")); } if (vcpu->state == VCPU_RUNNING) { KASSERT(vcpu->hostcpu == curcpu, ("curcpu %d and hostcpu %d " "mismatch for running vcpu", curcpu, vcpu->hostcpu)); } else { KASSERT(vcpu->hostcpu == NOCPU, ("Invalid hostcpu %d for a " "vcpu that is not running", vcpu->hostcpu)); } /* * The following state transitions are allowed: * IDLE -> FROZEN -> IDLE * FROZEN -> RUNNING -> FROZEN * FROZEN -> SLEEPING -> FROZEN */ switch (vcpu->state) { case VCPU_IDLE: case VCPU_RUNNING: case VCPU_SLEEPING: error = (newstate != VCPU_FROZEN); break; case VCPU_FROZEN: error = (newstate == VCPU_FROZEN); break; default: error = 1; break; } if (error) return (EBUSY); vcpu->state = newstate; if (newstate == VCPU_RUNNING) vcpu->hostcpu = curcpu; else vcpu->hostcpu = NOCPU; if (newstate == VCPU_IDLE) wakeup(&vcpu->state); return (0); } static void vcpu_require_state(struct vcpu *vcpu, enum vcpu_state newstate) { int error; if ((error = vcpu_set_state(vcpu, newstate, false)) != 0) panic("Error %d setting state to %d\n", error, newstate); } static void vcpu_require_state_locked(struct vcpu *vcpu, enum vcpu_state newstate) { int error; if ((error = vcpu_set_state_locked(vcpu, newstate, false)) != 0) panic("Error %d setting state to %d", error, newstate); } int vm_get_capability(struct vcpu *vcpu, int type, int *retval) { if (type < 0 || type >= VM_CAP_MAX) return (EINVAL); return (vmmops_getcap(vcpu->cookie, type, retval)); } int vm_set_capability(struct vcpu *vcpu, int type, int val) { if (type < 0 || type >= VM_CAP_MAX) return (EINVAL); return (vmmops_setcap(vcpu->cookie, type, val)); } struct vm * vcpu_vm(struct vcpu *vcpu) { return (vcpu->vm); } int vcpu_vcpuid(struct vcpu *vcpu) { return (vcpu->vcpuid); } void * vcpu_get_cookie(struct vcpu *vcpu) { return (vcpu->cookie); } struct vcpu * vm_vcpu(struct vm *vm, int vcpuid) { return (vm->vcpu[vcpuid]); } int vcpu_set_state(struct vcpu *vcpu, enum vcpu_state newstate, bool from_idle) { int error; vcpu_lock(vcpu); error = vcpu_set_state_locked(vcpu, newstate, from_idle); vcpu_unlock(vcpu); return (error); } enum vcpu_state vcpu_get_state(struct vcpu *vcpu, int *hostcpu) { enum vcpu_state state; vcpu_lock(vcpu); state = vcpu->state; if (hostcpu != NULL) *hostcpu = vcpu->hostcpu; vcpu_unlock(vcpu); return (state); } static void * _vm_gpa_hold(struct vm *vm, vm_paddr_t gpa, size_t len, int reqprot, void **cookie) { int i, count, pageoff; struct mem_map *mm; vm_page_t m; pageoff = gpa & PAGE_MASK; if (len > PAGE_SIZE - pageoff) panic("vm_gpa_hold: invalid gpa/len: 0x%016lx/%lu", gpa, len); count = 0; for (i = 0; i < VM_MAX_MEMMAPS; i++) { mm = &vm->mem_maps[i]; if (sysmem_mapping(vm, mm) && gpa >= mm->gpa && gpa < mm->gpa + mm->len) { count = vm_fault_quick_hold_pages(&vm->vmspace->vm_map, trunc_page(gpa), PAGE_SIZE, reqprot, &m, 1); break; } } if (count == 1) { *cookie = m; return ((void *)(PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)) + pageoff)); } else { *cookie = NULL; return (NULL); } } void * vm_gpa_hold(struct vcpu *vcpu, vm_paddr_t gpa, size_t len, int reqprot, void **cookie) { #ifdef INVARIANTS /* * The current vcpu should be frozen to ensure 'vm_memmap[]' * stability. */ int state = vcpu_get_state(vcpu, NULL); KASSERT(state == VCPU_FROZEN, ("%s: invalid vcpu state %d", __func__, state)); #endif return (_vm_gpa_hold(vcpu->vm, gpa, len, reqprot, cookie)); } void * vm_gpa_hold_global(struct vm *vm, vm_paddr_t gpa, size_t len, int reqprot, void **cookie) { sx_assert(&vm->mem_segs_lock, SX_LOCKED); return (_vm_gpa_hold(vm, gpa, len, reqprot, cookie)); } void vm_gpa_release(void *cookie) { vm_page_t m = cookie; vm_page_unwire(m, PQ_ACTIVE); } int vm_get_register(struct vcpu *vcpu, int reg, uint64_t *retval) { if (reg >= VM_REG_LAST) return (EINVAL); return (vmmops_getreg(vcpu->cookie, reg, retval)); } int vm_set_register(struct vcpu *vcpu, int reg, uint64_t val) { int error; if (reg >= VM_REG_LAST) return (EINVAL); error = vmmops_setreg(vcpu->cookie, reg, val); if (error || reg != VM_REG_GUEST_SEPC) return (error); vcpu->nextpc = val; return (0); } void * vm_get_cookie(struct vm *vm) { return (vm->cookie); } int vm_inject_exception(struct vcpu *vcpu, uint64_t scause) { return (vmmops_exception(vcpu->cookie, scause)); } int vm_attach_aplic(struct vm *vm, struct vm_aplic_descr *descr) { return (aplic_attach_to_vm(vm->cookie, descr)); } int vm_assert_irq(struct vm *vm, uint32_t irq) { return (aplic_inject_irq(vm->cookie, -1, irq, true)); } int vm_deassert_irq(struct vm *vm, uint32_t irq) { return (aplic_inject_irq(vm->cookie, -1, irq, false)); } int vm_raise_msi(struct vm *vm, uint64_t msg, uint64_t addr, int bus, int slot, int func) { return (aplic_inject_msi(vm->cookie, msg, addr)); } static int vm_handle_wfi(struct vcpu *vcpu, struct vm_exit *vme, bool *retu) { vcpu_lock(vcpu); while (1) { if (aplic_check_pending(vcpu->cookie)) break; if (riscv_check_ipi(vcpu->cookie, false)) break; + if (riscv_check_interrupts_pending(vcpu->cookie)) + break; + if (vcpu_should_yield(vcpu)) break; vcpu_require_state_locked(vcpu, VCPU_SLEEPING); /* * XXX msleep_spin() cannot be interrupted by signals so * wake up periodically to check pending signals. */ msleep_spin(vcpu, &vcpu->mtx, "vmidle", hz / 1000); vcpu_require_state_locked(vcpu, VCPU_FROZEN); } vcpu_unlock(vcpu); *retu = false; return (0); } static int vm_handle_paging(struct vcpu *vcpu, bool *retu) { struct vm *vm; struct vm_exit *vme; struct vm_map *map; uint64_t addr; pmap_t pmap; int ftype, rv; vm = vcpu->vm; vme = &vcpu->exitinfo; pmap = vmspace_pmap(vm->vmspace); addr = (vme->htval << 2) & ~(PAGE_SIZE - 1); dprintf("%s: %lx\n", __func__, addr); switch (vme->scause) { case SCAUSE_STORE_GUEST_PAGE_FAULT: ftype = VM_PROT_WRITE; break; case SCAUSE_FETCH_GUEST_PAGE_FAULT: ftype = VM_PROT_EXECUTE; break; case SCAUSE_LOAD_GUEST_PAGE_FAULT: ftype = VM_PROT_READ; break; default: panic("unknown page trap: %lu", vme->scause); } /* The page exists, but the page table needs to be updated. */ if (pmap_fault(pmap, addr, ftype)) return (0); map = &vm->vmspace->vm_map; rv = vm_fault(map, addr, ftype, VM_FAULT_NORMAL, NULL); if (rv != KERN_SUCCESS) { printf("%s: vm_fault failed, addr %lx, ftype %d, err %d\n", __func__, addr, ftype, rv); return (EFAULT); } return (0); } static int vm_handle_suspend(struct vcpu *vcpu, bool *retu) { struct vm *vm = vcpu->vm; int error, i; struct thread *td; error = 0; td = curthread; CPU_SET_ATOMIC(vcpu->vcpuid, &vm->suspended_cpus); /* * Wait until all 'active_cpus' have suspended themselves. * * Since a VM may be suspended at any time including when one or * more vcpus are doing a rendezvous we need to call the rendezvous * handler while we are waiting to prevent a deadlock. */ vcpu_lock(vcpu); while (error == 0) { if (CPU_CMP(&vm->suspended_cpus, &vm->active_cpus) == 0) break; vcpu_require_state_locked(vcpu, VCPU_SLEEPING); msleep_spin(vcpu, &vcpu->mtx, "vmsusp", hz); vcpu_require_state_locked(vcpu, VCPU_FROZEN); if (td_ast_pending(td, TDA_SUSPEND)) { vcpu_unlock(vcpu); error = thread_check_susp(td, false); vcpu_lock(vcpu); } } vcpu_unlock(vcpu); /* * Wakeup the other sleeping vcpus and return to userspace. */ for (i = 0; i < vm->maxcpus; i++) { if (CPU_ISSET(i, &vm->suspended_cpus)) { vcpu_notify_event(vm_vcpu(vm, i)); } } *retu = true; return (error); } int vm_run(struct vcpu *vcpu) { struct vm_eventinfo evinfo; struct vm_exit *vme; struct vm *vm; pmap_t pmap; int error; int vcpuid; bool retu; vm = vcpu->vm; dprintf("%s\n", __func__); vcpuid = vcpu->vcpuid; if (!CPU_ISSET(vcpuid, &vm->active_cpus)) return (EINVAL); if (CPU_ISSET(vcpuid, &vm->suspended_cpus)) return (EINVAL); pmap = vmspace_pmap(vm->vmspace); vme = &vcpu->exitinfo; evinfo.rptr = NULL; evinfo.sptr = &vm->suspend; evinfo.iptr = NULL; restart: critical_enter(); restore_guest_fpustate(vcpu); vcpu_require_state(vcpu, VCPU_RUNNING); error = vmmops_run(vcpu->cookie, vcpu->nextpc, pmap, &evinfo); vcpu_require_state(vcpu, VCPU_FROZEN); save_guest_fpustate(vcpu); critical_exit(); if (error == 0) { retu = false; switch (vme->exitcode) { case VM_EXITCODE_INST_EMUL: vcpu->nextpc = vme->pc + vme->inst_length; error = vm_handle_inst_emul(vcpu, &retu); break; case VM_EXITCODE_WFI: vcpu->nextpc = vme->pc + vme->inst_length; error = vm_handle_wfi(vcpu, vme, &retu); break; case VM_EXITCODE_ECALL: /* Handle in userland. */ vcpu->nextpc = vme->pc + vme->inst_length; retu = true; break; case VM_EXITCODE_PAGING: vcpu->nextpc = vme->pc; error = vm_handle_paging(vcpu, &retu); break; case VM_EXITCODE_BOGUS: vcpu->nextpc = vme->pc; retu = false; error = 0; break; case VM_EXITCODE_SUSPENDED: vcpu->nextpc = vme->pc; error = vm_handle_suspend(vcpu, &retu); break; default: /* Handle in userland. */ vcpu->nextpc = vme->pc; retu = true; break; } } if (error == 0 && retu == false) goto restart; return (error); } diff --git a/sys/riscv/vmm/vmm_riscv.c b/sys/riscv/vmm/vmm_riscv.c index e276f8583e37..6ac945dfa1d0 100644 --- a/sys/riscv/vmm/vmm_riscv.c +++ b/sys/riscv/vmm/vmm_riscv.c @@ -1,926 +1,939 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2024 Ruslan Bukin * * This software was developed by the University of Cambridge Computer * Laboratory (Department of Computer Science and Technology) under Innovate * UK project 105694, "Digital Security by Design (DSbD) Technology Platform * Prototype". * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "riscv.h" #include "vmm_aplic.h" #include "vmm_stat.h" MALLOC_DEFINE(M_HYP, "RISC-V VMM HYP", "RISC-V VMM HYP"); DPCPU_DEFINE_STATIC(struct hypctx *, vcpu); static int m_op(uint32_t insn, int match, int mask) { if (((insn ^ match) & mask) == 0) return (1); return (0); } static inline void riscv_set_active_vcpu(struct hypctx *hypctx) { DPCPU_SET(vcpu, hypctx); } struct hypctx * riscv_get_active_vcpu(void) { return (DPCPU_GET(vcpu)); } int vmmops_modinit(void) { if (!has_hyp) { printf("vmm: riscv hart doesn't support H-extension.\n"); return (ENXIO); } - if (!has_sstc) { - printf("vmm: riscv hart doesn't support SSTC extension.\n"); - return (ENXIO); - } - return (0); } int vmmops_modcleanup(void) { return (0); } void * vmmops_init(struct vm *vm, pmap_t pmap) { struct hyp *hyp; vm_size_t size; size = round_page(sizeof(struct hyp) + sizeof(struct hypctx *) * vm_get_maxcpus(vm)); hyp = malloc_aligned(size, PAGE_SIZE, M_HYP, M_WAITOK | M_ZERO); hyp->vm = vm; hyp->aplic_attached = false; aplic_vminit(hyp); return (hyp); } static void vmmops_delegate(void) { uint64_t hedeleg; uint64_t hideleg; hedeleg = (1UL << SCAUSE_INST_MISALIGNED); hedeleg |= (1UL << SCAUSE_ILLEGAL_INSTRUCTION); hedeleg |= (1UL << SCAUSE_BREAKPOINT); hedeleg |= (1UL << SCAUSE_ECALL_USER); hedeleg |= (1UL << SCAUSE_INST_PAGE_FAULT); hedeleg |= (1UL << SCAUSE_LOAD_PAGE_FAULT); hedeleg |= (1UL << SCAUSE_STORE_PAGE_FAULT); csr_write(hedeleg, hedeleg); hideleg = (1UL << IRQ_SOFTWARE_HYPERVISOR); hideleg |= (1UL << IRQ_TIMER_HYPERVISOR); hideleg |= (1UL << IRQ_EXTERNAL_HYPERVISOR); csr_write(hideleg, hideleg); } static void vmmops_vcpu_restore_csrs(struct hypctx *hypctx) { struct hypcsr *csrs; csrs = &hypctx->guest_csrs; csr_write(vsstatus, csrs->vsstatus); csr_write(vsie, csrs->vsie); csr_write(vstvec, csrs->vstvec); csr_write(vsscratch, csrs->vsscratch); csr_write(vsepc, csrs->vsepc); csr_write(vscause, csrs->vscause); csr_write(vstval, csrs->vstval); csr_write(hvip, csrs->hvip); csr_write(vsatp, csrs->vsatp); } static void vmmops_vcpu_save_csrs(struct hypctx *hypctx) { struct hypcsr *csrs; csrs = &hypctx->guest_csrs; csrs->vsstatus = csr_read(vsstatus); csrs->vsie = csr_read(vsie); csrs->vstvec = csr_read(vstvec); csrs->vsscratch = csr_read(vsscratch); csrs->vsepc = csr_read(vsepc); csrs->vscause = csr_read(vscause); csrs->vstval = csr_read(vstval); csrs->hvip = csr_read(hvip); csrs->vsatp = csr_read(vsatp); } void * vmmops_vcpu_init(void *vmi, struct vcpu *vcpu1, int vcpuid) { struct hypctx *hypctx; struct hyp *hyp; vm_size_t size; hyp = vmi; dprintf("%s: hyp %p\n", __func__, hyp); KASSERT(vcpuid >= 0 && vcpuid < vm_get_maxcpus(hyp->vm), ("%s: Invalid vcpuid %d", __func__, vcpuid)); size = round_page(sizeof(struct hypctx)); hypctx = malloc_aligned(size, PAGE_SIZE, M_HYP, M_WAITOK | M_ZERO); hypctx->hyp = hyp; hypctx->vcpu = vcpu1; hypctx->guest_scounteren = HCOUNTEREN_CY | HCOUNTEREN_TM; /* sstatus */ hypctx->guest_regs.hyp_sstatus = SSTATUS_SPP | SSTATUS_SPIE; hypctx->guest_regs.hyp_sstatus |= SSTATUS_FS_INITIAL; /* hstatus */ hypctx->guest_regs.hyp_hstatus = HSTATUS_SPV | HSTATUS_VTW; hypctx->guest_regs.hyp_hstatus |= HSTATUS_SPVP; hypctx->cpu_id = vcpuid; hyp->ctx[vcpuid] = hypctx; aplic_cpuinit(hypctx); + vtimer_cpuinit(hypctx); return (hypctx); } static int riscv_vmm_pinit(pmap_t pmap) { dprintf("%s: pmap %p\n", __func__, pmap); pmap_pinit_stage(pmap, PM_STAGE2); return (1); } struct vmspace * vmmops_vmspace_alloc(vm_offset_t min, vm_offset_t max) { return (vmspace_alloc(min, max, riscv_vmm_pinit)); } void vmmops_vmspace_free(struct vmspace *vmspace) { pmap_remove_pages(vmspace_pmap(vmspace)); vmspace_free(vmspace); } static void riscv_unpriv_read(struct hypctx *hypctx, uintptr_t guest_addr, uint64_t *data, struct hyptrap *trap) { register struct hyptrap * htrap asm("a0"); uintptr_t old_hstatus; uintptr_t old_stvec; uintptr_t entry; uint64_t val; uint64_t tmp; int intr; entry = (uintptr_t)&vmm_unpriv_trap; htrap = trap; intr = intr_disable(); old_hstatus = csr_swap(hstatus, hypctx->guest_regs.hyp_hstatus); /* * Setup a temporary exception vector, so that if hlvx.hu raises * an exception we catch it in the vmm_unpriv_trap(). */ old_stvec = csr_swap(stvec, entry); /* * Read first two bytes of instruction assuming it could be a * compressed one. */ __asm __volatile(".option push\n" ".option norvc\n" "hlvx.hu %[val], (%[addr])\n" ".option pop\n" : [val] "=r" (val) : [addr] "r" (guest_addr), "r" (htrap) : "a1", "memory"); /* * Check if previous hlvx.hu did not raise an exception, and then * read the rest of instruction if it is a full-length one. */ if (trap->scause == -1 && (val & 0x3) == 0x3) { guest_addr += 2; __asm __volatile(".option push\n" ".option norvc\n" "hlvx.hu %[tmp], (%[addr])\n" ".option pop\n" : [tmp] "=r" (tmp) : [addr] "r" (guest_addr), "r" (htrap) : "a1", "memory"); val |= (tmp << 16); } csr_write(hstatus, old_hstatus); csr_write(stvec, old_stvec); intr_restore(intr); *data = val; } static int riscv_gen_inst_emul_data(struct hypctx *hypctx, struct vm_exit *vme_ret, struct hyptrap *trap) { uintptr_t guest_addr; struct vie *vie; uint64_t insn; int reg_num; int rs2, rd; int direction; int sign_extend; int access_size; guest_addr = vme_ret->sepc; KASSERT(vme_ret->scause == SCAUSE_FETCH_GUEST_PAGE_FAULT || vme_ret->scause == SCAUSE_LOAD_GUEST_PAGE_FAULT || vme_ret->scause == SCAUSE_STORE_GUEST_PAGE_FAULT, ("Invalid scause")); direction = vme_ret->scause == SCAUSE_STORE_GUEST_PAGE_FAULT ? VM_DIR_WRITE : VM_DIR_READ; sign_extend = 1; bzero(trap, sizeof(struct hyptrap)); trap->scause = -1; riscv_unpriv_read(hypctx, guest_addr, &insn, trap); if (trap->scause != -1) return (-1); if ((insn & 0x3) == 0x3) { rs2 = (insn & RS2_MASK) >> RS2_SHIFT; rd = (insn & RD_MASK) >> RD_SHIFT; if (direction == VM_DIR_WRITE) { if (m_op(insn, MATCH_SB, MASK_SB)) access_size = 1; else if (m_op(insn, MATCH_SH, MASK_SH)) access_size = 2; else if (m_op(insn, MATCH_SW, MASK_SW)) access_size = 4; else if (m_op(insn, MATCH_SD, MASK_SD)) access_size = 8; else { printf("unknown store instr at %lx", guest_addr); return (-2); } reg_num = rs2; } else { if (m_op(insn, MATCH_LB, MASK_LB)) access_size = 1; else if (m_op(insn, MATCH_LH, MASK_LH)) access_size = 2; else if (m_op(insn, MATCH_LW, MASK_LW)) access_size = 4; else if (m_op(insn, MATCH_LD, MASK_LD)) access_size = 8; else if (m_op(insn, MATCH_LBU, MASK_LBU)) { access_size = 1; sign_extend = 0; } else if (m_op(insn, MATCH_LHU, MASK_LHU)) { access_size = 2; sign_extend = 0; } else if (m_op(insn, MATCH_LWU, MASK_LWU)) { access_size = 4; sign_extend = 0; } else { printf("unknown load instr at %lx", guest_addr); return (-3); } reg_num = rd; } vme_ret->inst_length = 4; } else { rs2 = (insn >> 7) & 0x7; rs2 += 0x8; rd = (insn >> 2) & 0x7; rd += 0x8; if (direction == VM_DIR_WRITE) { if (m_op(insn, MATCH_C_SW, MASK_C_SW)) access_size = 4; else if (m_op(insn, MATCH_C_SD, MASK_C_SD)) access_size = 8; else { printf("unknown compressed store instr at %lx", guest_addr); return (-4); } } else { if (m_op(insn, MATCH_C_LW, MASK_C_LW)) access_size = 4; else if (m_op(insn, MATCH_C_LD, MASK_C_LD)) access_size = 8; else { printf("unknown load instr at %lx", guest_addr); return (-5); } } reg_num = rd; vme_ret->inst_length = 2; } vme_ret->u.inst_emul.gpa = (vme_ret->htval << 2) | (vme_ret->stval & 0x3); dprintf("guest_addr %lx insn %lx, reg %d, gpa %lx\n", guest_addr, insn, reg_num, vme_ret->u.inst_emul.gpa); vie = &vme_ret->u.inst_emul.vie; vie->dir = direction; vie->reg = reg_num; vie->sign_extend = sign_extend; vie->access_size = access_size; return (0); } static bool riscv_handle_world_switch(struct hypctx *hypctx, struct vm_exit *vme, pmap_t pmap) { struct hyptrap trap; uint64_t insn; uint64_t gpa; bool handled; bool retu; int ret; int i; handled = false; if (vme->scause & SCAUSE_INTR) { /* * Host interrupt? Leave critical section to handle. */ vmm_stat_incr(hypctx->vcpu, VMEXIT_IRQ, 1); vme->exitcode = VM_EXITCODE_BOGUS; vme->inst_length = 0; return (handled); } switch (vme->scause) { case SCAUSE_FETCH_GUEST_PAGE_FAULT: case SCAUSE_LOAD_GUEST_PAGE_FAULT: case SCAUSE_STORE_GUEST_PAGE_FAULT: gpa = (vme->htval << 2) | (vme->stval & 0x3); if (vm_mem_allocated(hypctx->vcpu, gpa)) { vme->exitcode = VM_EXITCODE_PAGING; vme->inst_length = 0; vme->u.paging.gpa = gpa; } else { ret = riscv_gen_inst_emul_data(hypctx, vme, &trap); if (ret != 0) { vme->exitcode = VM_EXITCODE_HYP; vme->u.hyp.scause = trap.scause; break; } vme->exitcode = VM_EXITCODE_INST_EMUL; } break; case SCAUSE_ILLEGAL_INSTRUCTION: /* * TODO: handle illegal instruction properly. */ printf("%s: Illegal instruction at %lx stval 0x%lx htval " "0x%lx\n", __func__, vme->sepc, vme->stval, vme->htval); vmm_stat_incr(hypctx->vcpu, VMEXIT_UNHANDLED, 1); vme->exitcode = VM_EXITCODE_BOGUS; handled = false; break; case SCAUSE_VIRTUAL_SUPERVISOR_ECALL: retu = false; vmm_sbi_ecall(hypctx->vcpu, &retu); if (retu == false) { handled = true; break; } for (i = 0; i < nitems(vme->u.ecall.args); i++) vme->u.ecall.args[i] = hypctx->guest_regs.hyp_a[i]; vme->exitcode = VM_EXITCODE_ECALL; handled = false; break; case SCAUSE_VIRTUAL_INSTRUCTION: insn = vme->stval; if (m_op(insn, MATCH_WFI, MASK_WFI)) vme->exitcode = VM_EXITCODE_WFI; else vme->exitcode = VM_EXITCODE_BOGUS; handled = false; break; default: printf("unknown scause %lx\n", vme->scause); vmm_stat_incr(hypctx->vcpu, VMEXIT_UNHANDLED, 1); vme->exitcode = VM_EXITCODE_BOGUS; handled = false; break; } return (handled); } int vmmops_gla2gpa(void *vcpui, struct vm_guest_paging *paging, uint64_t gla, int prot, uint64_t *gpa, int *is_fault) { /* Implement me. */ return (ENOSYS); } void riscv_send_ipi(struct hypctx *hypctx, int hart_id) { struct hyp *hyp; struct vm *vm; hyp = hypctx->hyp; vm = hyp->vm; atomic_set_32(&hypctx->ipi_pending, 1); vcpu_notify_event(vm_vcpu(vm, hart_id)); } int riscv_check_ipi(struct hypctx *hypctx, bool clear) { int val; if (clear) val = atomic_swap_32(&hypctx->ipi_pending, 0); else val = hypctx->ipi_pending; return (val); } +bool +riscv_check_interrupts_pending(struct hypctx *hypctx) +{ + + if (hypctx->interrupts_pending) + return (true); + + return (false); +} + static void riscv_sync_interrupts(struct hypctx *hypctx) { int pending; pending = aplic_check_pending(hypctx); - if (pending) hypctx->guest_csrs.hvip |= HVIP_VSEIP; else hypctx->guest_csrs.hvip &= ~HVIP_VSEIP; - csr_write(hvip, hypctx->guest_csrs.hvip); -} - -static void -riscv_sync_ipi(struct hypctx *hypctx) -{ - /* Guest clears VSSIP bit manually. */ if (riscv_check_ipi(hypctx, true)) hypctx->guest_csrs.hvip |= HVIP_VSSIP; + if (riscv_check_interrupts_pending(hypctx)) + hypctx->guest_csrs.hvip |= HVIP_VSTIP; + else + hypctx->guest_csrs.hvip &= ~HVIP_VSTIP; + csr_write(hvip, hypctx->guest_csrs.hvip); } int vmmops_run(void *vcpui, register_t pc, pmap_t pmap, struct vm_eventinfo *evinfo) { struct hypctx *hypctx; struct vm_exit *vme; struct vcpu *vcpu; register_t val; + uint64_t hvip; bool handled; hypctx = (struct hypctx *)vcpui; vcpu = hypctx->vcpu; vme = vm_exitinfo(vcpu); hypctx->guest_regs.hyp_sepc = (uint64_t)pc; vmmops_delegate(); /* * From The RISC-V Instruction Set Manual * Volume II: RISC-V Privileged Architectures * * If the new virtual machine's guest physical page tables * have been modified, it may be necessary to execute an HFENCE.GVMA * instruction (see Section 5.3.2) before or after writing hgatp. */ __asm __volatile("hfence.gvma" ::: "memory"); csr_write(hgatp, pmap->pm_satp); - csr_write(henvcfg, HENVCFG_STCE); + if (has_sstc) + csr_write(henvcfg, HENVCFG_STCE); csr_write(hie, HIE_VSEIE | HIE_VSSIE | HIE_SGEIE); /* TODO: should we trap rdcycle / rdtime? */ csr_write(hcounteren, HCOUNTEREN_CY | HCOUNTEREN_TM); vmmops_vcpu_restore_csrs(hypctx); for (;;) { dprintf("%s: pc %lx\n", __func__, pc); if (hypctx->has_exception) { hypctx->has_exception = false; /* * TODO: implement exception injection. */ } val = intr_disable(); /* Check if the vcpu is suspended */ if (vcpu_suspended(evinfo)) { intr_restore(val); vm_exit_suspended(vcpu, pc); break; } if (vcpu_debugged(vcpu)) { intr_restore(val); vm_exit_debug(vcpu, pc); break; } /* * TODO: What happens if a timer interrupt is asserted exactly * here, but for the previous VM? */ riscv_set_active_vcpu(hypctx); aplic_flush_hwstate(hypctx); - riscv_sync_interrupts(hypctx); - riscv_sync_ipi(hypctx); dprintf("%s: Entering guest VM, vsatp %lx, ss %lx hs %lx\n", __func__, csr_read(vsatp), hypctx->guest_regs.hyp_sstatus, hypctx->guest_regs.hyp_hstatus); vmm_switch(hypctx); dprintf("%s: Leaving guest VM, hstatus %lx\n", __func__, hypctx->guest_regs.hyp_hstatus); + /* Guest can clear VSSIP. It can't clear VSTIP or VSEIP. */ + hvip = csr_read(hvip); + if ((hypctx->guest_csrs.hvip ^ hvip) & HVIP_VSSIP) { + if (hvip & HVIP_VSSIP) { + /* TODO: VSSIP was set by guest. */ + } else { + /* VSSIP was cleared by guest. */ + hypctx->guest_csrs.hvip &= ~HVIP_VSSIP; + } + } + aplic_sync_hwstate(hypctx); - riscv_sync_interrupts(hypctx); /* * TODO: deactivate stage 2 pmap here if needed. */ vme->scause = csr_read(scause); vme->sepc = csr_read(sepc); vme->stval = csr_read(stval); vme->htval = csr_read(htval); vme->htinst = csr_read(htinst); intr_restore(val); vmm_stat_incr(vcpu, VMEXIT_COUNT, 1); vme->pc = hypctx->guest_regs.hyp_sepc; vme->inst_length = INSN_SIZE; handled = riscv_handle_world_switch(hypctx, vme, pmap); if (handled == false) /* Exit loop to emulate instruction. */ break; else { /* Resume guest execution from the next instruction. */ hypctx->guest_regs.hyp_sepc += vme->inst_length; } } vmmops_vcpu_save_csrs(hypctx); return (0); } static void riscv_pcpu_vmcleanup(void *arg) { struct hyp *hyp; int i, maxcpus; hyp = arg; maxcpus = vm_get_maxcpus(hyp->vm); for (i = 0; i < maxcpus; i++) { if (riscv_get_active_vcpu() == hyp->ctx[i]) { riscv_set_active_vcpu(NULL); break; } } } void vmmops_vcpu_cleanup(void *vcpui) { struct hypctx *hypctx; hypctx = vcpui; dprintf("%s\n", __func__); aplic_cpucleanup(hypctx); free(hypctx, M_HYP); } void vmmops_cleanup(void *vmi) { struct hyp *hyp; hyp = vmi; dprintf("%s\n", __func__); aplic_vmcleanup(hyp); smp_rendezvous(NULL, riscv_pcpu_vmcleanup, NULL, hyp); free(hyp, M_HYP); } /* * Return register value. Registers have different sizes and an explicit cast * must be made to ensure proper conversion. */ static uint64_t * hypctx_regptr(struct hypctx *hypctx, int reg) { switch (reg) { case VM_REG_GUEST_RA: return (&hypctx->guest_regs.hyp_ra); case VM_REG_GUEST_SP: return (&hypctx->guest_regs.hyp_sp); case VM_REG_GUEST_GP: return (&hypctx->guest_regs.hyp_gp); case VM_REG_GUEST_TP: return (&hypctx->guest_regs.hyp_tp); case VM_REG_GUEST_T0: return (&hypctx->guest_regs.hyp_t[0]); case VM_REG_GUEST_T1: return (&hypctx->guest_regs.hyp_t[1]); case VM_REG_GUEST_T2: return (&hypctx->guest_regs.hyp_t[2]); case VM_REG_GUEST_S0: return (&hypctx->guest_regs.hyp_s[0]); case VM_REG_GUEST_S1: return (&hypctx->guest_regs.hyp_s[1]); case VM_REG_GUEST_A0: return (&hypctx->guest_regs.hyp_a[0]); case VM_REG_GUEST_A1: return (&hypctx->guest_regs.hyp_a[1]); case VM_REG_GUEST_A2: return (&hypctx->guest_regs.hyp_a[2]); case VM_REG_GUEST_A3: return (&hypctx->guest_regs.hyp_a[3]); case VM_REG_GUEST_A4: return (&hypctx->guest_regs.hyp_a[4]); case VM_REG_GUEST_A5: return (&hypctx->guest_regs.hyp_a[5]); case VM_REG_GUEST_A6: return (&hypctx->guest_regs.hyp_a[6]); case VM_REG_GUEST_A7: return (&hypctx->guest_regs.hyp_a[7]); case VM_REG_GUEST_S2: return (&hypctx->guest_regs.hyp_s[2]); case VM_REG_GUEST_S3: return (&hypctx->guest_regs.hyp_s[3]); case VM_REG_GUEST_S4: return (&hypctx->guest_regs.hyp_s[4]); case VM_REG_GUEST_S5: return (&hypctx->guest_regs.hyp_s[5]); case VM_REG_GUEST_S6: return (&hypctx->guest_regs.hyp_s[6]); case VM_REG_GUEST_S7: return (&hypctx->guest_regs.hyp_s[7]); case VM_REG_GUEST_S8: return (&hypctx->guest_regs.hyp_s[8]); case VM_REG_GUEST_S9: return (&hypctx->guest_regs.hyp_s[9]); case VM_REG_GUEST_S10: return (&hypctx->guest_regs.hyp_s[10]); case VM_REG_GUEST_S11: return (&hypctx->guest_regs.hyp_s[11]); case VM_REG_GUEST_T3: return (&hypctx->guest_regs.hyp_t[3]); case VM_REG_GUEST_T4: return (&hypctx->guest_regs.hyp_t[4]); case VM_REG_GUEST_T5: return (&hypctx->guest_regs.hyp_t[5]); case VM_REG_GUEST_T6: return (&hypctx->guest_regs.hyp_t[6]); case VM_REG_GUEST_SEPC: return (&hypctx->guest_regs.hyp_sepc); default: break; } return (NULL); } int vmmops_getreg(void *vcpui, int reg, uint64_t *retval) { uint64_t *regp; int running, hostcpu; struct hypctx *hypctx; hypctx = vcpui; running = vcpu_is_running(hypctx->vcpu, &hostcpu); if (running && hostcpu != curcpu) panic("%s: %s%d is running", __func__, vm_name(hypctx->hyp->vm), vcpu_vcpuid(hypctx->vcpu)); if (reg == VM_REG_GUEST_ZERO) { *retval = 0; return (0); } regp = hypctx_regptr(hypctx, reg); if (regp == NULL) return (EINVAL); *retval = *regp; return (0); } int vmmops_setreg(void *vcpui, int reg, uint64_t val) { struct hypctx *hypctx; int running, hostcpu; uint64_t *regp; hypctx = vcpui; running = vcpu_is_running(hypctx->vcpu, &hostcpu); if (running && hostcpu != curcpu) panic("%s: %s%d is running", __func__, vm_name(hypctx->hyp->vm), vcpu_vcpuid(hypctx->vcpu)); regp = hypctx_regptr(hypctx, reg); if (regp == NULL) return (EINVAL); *regp = val; return (0); } int vmmops_exception(void *vcpui, uint64_t scause) { struct hypctx *hypctx; int running, hostcpu; hypctx = vcpui; running = vcpu_is_running(hypctx->vcpu, &hostcpu); if (running && hostcpu != curcpu) panic("%s: %s%d is running", __func__, vm_name(hypctx->hyp->vm), vcpu_vcpuid(hypctx->vcpu)); /* TODO: implement me. */ return (ENOSYS); } int vmmops_getcap(void *vcpui, int num, int *retval) { int ret; ret = ENOENT; switch (num) { case VM_CAP_SSTC: *retval = has_sstc; ret = 0; break; case VM_CAP_UNRESTRICTED_GUEST: *retval = 1; ret = 0; break; default: break; } return (ret); } int vmmops_setcap(void *vcpui, int num, int val) { return (ENOENT); } diff --git a/sys/riscv/vmm/vmm_sbi.c b/sys/riscv/vmm/vmm_sbi.c index 6444b8c9e396..63dcf9b4a7ae 100644 --- a/sys/riscv/vmm/vmm_sbi.c +++ b/sys/riscv/vmm/vmm_sbi.c @@ -1,179 +1,205 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2024 Ruslan Bukin * * This software was developed by the University of Cambridge Computer * Laboratory (Department of Computer Science and Technology) under Innovate * UK project 105694, "Digital Security by Design (DSbD) Technology Platform * Prototype". * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "riscv.h" static int vmm_sbi_handle_rfnc(struct vcpu *vcpu, struct hypctx *hypctx) { uint64_t hart_mask __unused; uint64_t start __unused; uint64_t size __unused; uint64_t asid __unused; uint64_t func_id; func_id = hypctx->guest_regs.hyp_a[6]; hart_mask = hypctx->guest_regs.hyp_a[0]; start = hypctx->guest_regs.hyp_a[2]; size = hypctx->guest_regs.hyp_a[3]; asid = hypctx->guest_regs.hyp_a[4]; dprintf("%s: %ld hart_mask %lx start %lx size %lx\n", __func__, func_id, hart_mask, start, size); /* TODO: implement remote sfence. */ switch (func_id) { case SBI_RFNC_REMOTE_FENCE_I: break; case SBI_RFNC_REMOTE_SFENCE_VMA: break; case SBI_RFNC_REMOTE_SFENCE_VMA_ASID: break; default: break; } hypctx->guest_regs.hyp_a[0] = 0; return (0); } +static int +vmm_sbi_handle_time(struct vcpu *vcpu, struct hypctx *hypctx) +{ + uint64_t func_id; + uint64_t next_val; + int ret; + + func_id = hypctx->guest_regs.hyp_a[6]; + next_val = hypctx->guest_regs.hyp_a[0]; + + switch (func_id) { + case SBI_TIME_SET_TIMER: + vtimer_set_timer(hypctx, next_val); + ret = 0; + break; + default: + ret = -1; + break; + } + + hypctx->guest_regs.hyp_a[0] = ret; + + return (0); +} + static int vmm_sbi_handle_ipi(struct vcpu *vcpu, struct hypctx *hypctx) { struct hypctx *target_hypctx; struct vcpu *target_vcpu __unused; cpuset_t active_cpus; struct hyp *hyp; uint64_t hart_mask; uint64_t func_id; int hart_id; int bit; int ret; func_id = hypctx->guest_regs.hyp_a[6]; hart_mask = hypctx->guest_regs.hyp_a[0]; dprintf("%s: hart_mask %lx\n", __func__, hart_mask); hyp = hypctx->hyp; active_cpus = vm_active_cpus(hyp->vm); switch (func_id) { case SBI_IPI_SEND_IPI: while ((bit = ffs(hart_mask))) { hart_id = (bit - 1); hart_mask &= ~(1u << hart_id); if (CPU_ISSET(hart_id, &active_cpus)) { /* TODO. */ target_vcpu = vm_vcpu(hyp->vm, hart_id); target_hypctx = hypctx->hyp->ctx[hart_id]; riscv_send_ipi(target_hypctx, hart_id); } } ret = 0; break; default: printf("%s: unknown func %ld\n", __func__, func_id); ret = -1; break; } hypctx->guest_regs.hyp_a[0] = ret; return (0); } int vmm_sbi_ecall(struct vcpu *vcpu, bool *retu) { int sbi_extension_id __unused; struct hypctx *hypctx; hypctx = riscv_get_active_vcpu(); sbi_extension_id = hypctx->guest_regs.hyp_a[7]; dprintf("%s: args %lx %lx %lx %lx %lx %lx %lx %lx\n", __func__, hypctx->guest_regs.hyp_a[0], hypctx->guest_regs.hyp_a[1], hypctx->guest_regs.hyp_a[2], hypctx->guest_regs.hyp_a[3], hypctx->guest_regs.hyp_a[4], hypctx->guest_regs.hyp_a[5], hypctx->guest_regs.hyp_a[6], hypctx->guest_regs.hyp_a[7]); switch (sbi_extension_id) { case SBI_EXT_ID_RFNC: vmm_sbi_handle_rfnc(vcpu, hypctx); break; case SBI_EXT_ID_TIME: + vmm_sbi_handle_time(vcpu, hypctx); break; case SBI_EXT_ID_IPI: vmm_sbi_handle_ipi(vcpu, hypctx); break; default: *retu = true; break; } return (0); } diff --git a/sys/riscv/vmm/vmm_vtimer.c b/sys/riscv/vmm/vmm_vtimer.c new file mode 100644 index 000000000000..0dadc962114f --- /dev/null +++ b/sys/riscv/vmm/vmm_vtimer.c @@ -0,0 +1,117 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2024 Ruslan Bukin + * + * This software was developed by the University of Cambridge Computer + * Laboratory (Department of Computer Science and Technology) under Innovate + * UK project 105694, "Digital Security by Design (DSbD) Technology Platform + * Prototype". + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +#include +#include +#include + +#include +#include +#include + +#include "riscv.h" + +#define VTIMER_DEFAULT_FREQ 1000000 + +static int +vtimer_get_timebase(uint32_t *freq) +{ + phandle_t node; + int len; + + node = OF_finddevice("/cpus"); + if (node == -1) + return (ENXIO); + + len = OF_getproplen(node, "timebase-frequency"); + if (len != 4) + return (ENXIO); + + OF_getencprop(node, "timebase-frequency", freq, len); + + return (0); +} + +void +vtimer_cpuinit(struct hypctx *hypctx) +{ + struct vtimer *vtimer; + uint32_t freq; + int error; + + vtimer = &hypctx->vtimer; + mtx_init(&vtimer->mtx, "vtimer callout mutex", NULL, MTX_DEF); + callout_init_mtx(&vtimer->callout, &vtimer->mtx, 0); + + error = vtimer_get_timebase(&freq); + if (error) + freq = VTIMER_DEFAULT_FREQ; + + vtimer->freq = freq; +} + +static void +vtimer_inject_irq_callout(void *arg) +{ + struct hypctx *hypctx; + struct hyp *hyp; + + hypctx = arg; + hyp = hypctx->hyp; + + atomic_set_32(&hypctx->interrupts_pending, HVIP_VSTIP); + vcpu_notify_event(vm_vcpu(hyp->vm, hypctx->cpu_id)); +} + +int +vtimer_set_timer(struct hypctx *hypctx, uint64_t next_val) +{ + struct vtimer *vtimer; + sbintime_t time; + uint64_t curtime; + uint64_t delta; + + vtimer = &hypctx->vtimer; + + curtime = rdtime(); + if (curtime < next_val) { + delta = next_val - curtime; + time = delta * SBT_1S / vtimer->freq; + atomic_clear_32(&hypctx->interrupts_pending, HVIP_VSTIP); + callout_reset_sbt(&vtimer->callout, time, 0, + vtimer_inject_irq_callout, hypctx, 0); + } else + atomic_set_32(&hypctx->interrupts_pending, HVIP_VSTIP); + + return (0); +} diff --git a/sys/riscv/vmm/vmm_vtimer.h b/sys/riscv/vmm/vmm_vtimer.h new file mode 100644 index 000000000000..6deca322ce99 --- /dev/null +++ b/sys/riscv/vmm/vmm_vtimer.h @@ -0,0 +1,47 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2024 Ruslan Bukin + * + * This software was developed by the University of Cambridge Computer + * Laboratory (Department of Computer Science and Technology) under Innovate + * UK project 105694, "Digital Security by Design (DSbD) Technology Platform + * Prototype". + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef _VMM_VTIMER_H_ +#define _VMM_VTIMER_H_ + +struct hypctx; + +struct vtimer { + struct callout callout; + struct mtx mtx; + uint32_t freq; +}; + +void vtimer_cpuinit(struct hypctx *hypctx); +int vtimer_set_timer(struct hypctx *hypctx, uint64_t next_val); + +#endif /* !_VMM_VTIMER_H_ */ diff --git a/usr.sbin/bhyve/riscv/fdt.c b/usr.sbin/bhyve/riscv/fdt.c index bef3f64b0c64..81296714e013 100644 --- a/usr.sbin/bhyve/riscv/fdt.c +++ b/usr.sbin/bhyve/riscv/fdt.c @@ -1,327 +1,328 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2022 The FreeBSD Foundation * Copyright (c) 2024 Ruslan Bukin * * This software was developed by Andrew Turner under sponsorship from * the FreeBSD Foundation. * * This software was developed by the University of Cambridge Computer * Laboratory (Department of Computer Science and Technology) under Innovate * UK project 105694, "Digital Security by Design (DSbD) Technology Platform * Prototype". * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include #include #include #include #include #include #include #include "config.h" #include "bhyverun.h" #include "fdt.h" #define SET_PROP_U32(prop, idx, val) \ ((uint32_t *)(prop))[(idx)] = cpu_to_fdt32(val) #define SET_PROP_U64(prop, idx, val) \ ((uint64_t *)(prop))[(idx)] = cpu_to_fdt64(val) #define IRQ_TYPE_LEVEL_HIGH 4 #define IRQ_TYPE_LEVEL_LOW 8 static void *fdtroot; static uint32_t aplic_phandle = 0; static uint32_t intc0_phandle = 0; static uint32_t assign_phandle(void *fdt) { static uint32_t next_phandle = 1; uint32_t phandle; phandle = next_phandle; next_phandle++; fdt_property_u32(fdt, "phandle", phandle); return (phandle); } static void set_single_reg(void *fdt, uint64_t start, uint64_t len) { void *reg; fdt_property_placeholder(fdt, "reg", 2 * sizeof(uint64_t), ®); SET_PROP_U64(reg, 0, start); SET_PROP_U64(reg, 1, len); } static void add_cpu(void *fdt, int cpuid, const char *isa) { char node_name[16]; snprintf(node_name, sizeof(node_name), "cpu@%d", cpuid); fdt_begin_node(fdt, node_name); fdt_property_string(fdt, "device_type", "cpu"); fdt_property_string(fdt, "compatible", "riscv"); fdt_property_u32(fdt, "reg", cpuid); fdt_property_string(fdt, "riscv,isa", isa); fdt_property_string(fdt, "mmu-type", "riscv,sv39"); fdt_property_string(fdt, "clock-frequency", "1000000000"); fdt_begin_node(fdt, "interrupt-controller"); intc0_phandle = assign_phandle(fdt); fdt_property_u32(fdt, "#address-cells", 2); fdt_property_u32(fdt, "#interrupt-cells", 1); fdt_property(fdt, "interrupt-controller", NULL, 0); fdt_property_string(fdt, "compatible", "riscv,cpu-intc"); fdt_end_node(fdt); fdt_end_node(fdt); } static void add_cpus(void *fdt, int ncpu, const char *isa) { int cpuid; fdt_begin_node(fdt, "cpus"); /* XXX: Needed given the root #address-cells? */ fdt_property_u32(fdt, "#address-cells", 1); fdt_property_u32(fdt, "#size-cells", 0); - fdt_property_u32(fdt, "timebase-frequency", 10000000); + /* TODO: take timebase from kernel? */ + fdt_property_u32(fdt, "timebase-frequency", 1000000); for (cpuid = 0; cpuid < ncpu; cpuid++) add_cpu(fdt, cpuid, isa); fdt_end_node(fdt); } int fdt_init(struct vmctx *ctx, int ncpu, vm_paddr_t fdtaddr, vm_size_t fdtsize, const char *isa) { void *fdt; const char *bootargs; fdt = paddr_guest2host(ctx, fdtaddr, fdtsize); if (fdt == NULL) return (EFAULT); fdt_create(fdt, (int)fdtsize); /* Add the memory reserve map (needed even if none is reserved) */ fdt_finish_reservemap(fdt); /* Create the root node */ fdt_begin_node(fdt, ""); fdt_property_string(fdt, "compatible", "freebsd,bhyve"); fdt_property_u32(fdt, "#address-cells", 2); fdt_property_u32(fdt, "#size-cells", 2); fdt_begin_node(fdt, "chosen"); fdt_property_string(fdt, "stdout-path", "serial0:115200n8"); bootargs = get_config_value("fdt.bootargs"); if (bootargs != NULL) fdt_property_string(fdt, "bootargs", bootargs); fdt_end_node(fdt); fdt_begin_node(fdt, "memory"); fdt_property_string(fdt, "device_type", "memory"); /* There is no lowmem on riscv. */ assert(vm_get_lowmem_size(ctx) == 0); set_single_reg(fdt, vm_get_highmem_base(ctx), vm_get_highmem_size(ctx)); fdt_end_node(fdt); add_cpus(fdt, ncpu, isa); /* Finalized by fdt_finalized(). */ fdtroot = fdt; return (0); } void fdt_add_aplic(uint64_t mem_base, uint64_t mem_size) { char node_name[32]; void *fdt, *prop; fdt = fdtroot; snprintf(node_name, sizeof(node_name), "interrupt-controller@%lx", (unsigned long)mem_base); fdt_begin_node(fdt, node_name); aplic_phandle = assign_phandle(fdt); fdt_property_string(fdt, "compatible", "riscv,aplic"); fdt_property(fdt, "interrupt-controller", NULL, 0); #if notyet fdt_property(fdt, "msi-controller", NULL, 0); #endif /* XXX: Needed given the root #address-cells? */ fdt_property_u32(fdt, "#address-cells", 2); fdt_property_u32(fdt, "#interrupt-cells", 2); fdt_property_placeholder(fdt, "reg", 2 * sizeof(uint64_t), &prop); SET_PROP_U64(prop, 0, mem_base); SET_PROP_U64(prop, 1, mem_size); fdt_property_placeholder(fdt, "interrupts-extended", 2 * sizeof(uint32_t), &prop); SET_PROP_U32(prop, 0, intc0_phandle); SET_PROP_U32(prop, 1, 9); fdt_property_u32(fdt, "riscv,num-sources", 63); fdt_end_node(fdt); fdt_property_u32(fdt, "interrupt-parent", aplic_phandle); } void fdt_add_uart(uint64_t uart_base, uint64_t uart_size, int intr) { void *fdt, *interrupts; char node_name[32]; assert(aplic_phandle != 0); fdt = fdtroot; snprintf(node_name, sizeof(node_name), "serial@%lx", uart_base); fdt_begin_node(fdt, node_name); fdt_property_string(fdt, "compatible", "ns16550"); set_single_reg(fdt, uart_base, uart_size); fdt_property_u32(fdt, "interrupt-parent", aplic_phandle); fdt_property_placeholder(fdt, "interrupts", 2 * sizeof(uint32_t), &interrupts); SET_PROP_U32(interrupts, 0, intr); SET_PROP_U32(interrupts, 1, IRQ_TYPE_LEVEL_HIGH); fdt_end_node(fdt); snprintf(node_name, sizeof(node_name), "/serial@%lx", uart_base); fdt_begin_node(fdt, "aliases"); fdt_property_string(fdt, "serial0", node_name); fdt_end_node(fdt); } void fdt_add_pcie(int intrs[static 4]) { void *fdt, *prop; int slot, pin, intr, i; assert(aplic_phandle != 0); fdt = fdtroot; fdt_begin_node(fdt, "pcie@1f0000000"); fdt_property_string(fdt, "compatible", "pci-host-ecam-generic"); fdt_property_u32(fdt, "#address-cells", 3); fdt_property_u32(fdt, "#size-cells", 2); fdt_property_string(fdt, "device_type", "pci"); fdt_property_u64(fdt, "bus-range", (0ul << 32) | 1); set_single_reg(fdt, 0xe0000000, 0x10000000); fdt_property_placeholder(fdt, "ranges", 2 * 7 * sizeof(uint32_t), &prop); SET_PROP_U32(prop, 0, 0x01000000); SET_PROP_U32(prop, 1, 0); SET_PROP_U32(prop, 2, 0xdf000000); SET_PROP_U32(prop, 3, 0); SET_PROP_U32(prop, 4, 0xdf000000); SET_PROP_U32(prop, 5, 0); SET_PROP_U32(prop, 6, 0x01000000); SET_PROP_U32(prop, 7, 0x02000000); SET_PROP_U32(prop, 8, 0); SET_PROP_U32(prop, 9, 0xa0000000); SET_PROP_U32(prop, 10, 0); SET_PROP_U32(prop, 11, 0xa0000000); SET_PROP_U32(prop, 12, 0); SET_PROP_U32(prop, 13, 0x3f000000); #if notyet fdt_property_placeholder(fdt, "msi-map", 4 * sizeof(uint32_t), &prop); SET_PROP_U32(prop, 0, 0); /* RID base */ SET_PROP_U32(prop, 1, aplic_phandle); /* MSI parent */ SET_PROP_U32(prop, 2, 0); /* MSI base */ SET_PROP_U32(prop, 3, 0x10000); /* RID length */ fdt_property_u32(fdt, "msi-parent", aplic_phandle); #endif fdt_property_u32(fdt, "#interrupt-cells", 1); fdt_property_u32(fdt, "interrupt-parent", aplic_phandle); /* * Describe standard swizzled interrupts routing (pins rotated by one * for each consecutive slot). Must match pci_irq_route(). */ fdt_property_placeholder(fdt, "interrupt-map-mask", 4 * sizeof(uint32_t), &prop); SET_PROP_U32(prop, 0, 3 << 11); SET_PROP_U32(prop, 1, 0); SET_PROP_U32(prop, 2, 0); SET_PROP_U32(prop, 3, 7); fdt_property_placeholder(fdt, "interrupt-map", 16 * 9 * sizeof(uint32_t), &prop); for (i = 0; i < 16; ++i) { pin = i % 4; slot = i / 4; intr = intrs[(pin + slot) % 4]; SET_PROP_U32(prop, 10 * i + 0, slot << 11); SET_PROP_U32(prop, 10 * i + 1, 0); SET_PROP_U32(prop, 10 * i + 2, 0); SET_PROP_U32(prop, 10 * i + 3, pin + 1); SET_PROP_U32(prop, 10 * i + 4, aplic_phandle); SET_PROP_U32(prop, 10 * i + 5, 0); SET_PROP_U32(prop, 10 * i + 6, 0); SET_PROP_U32(prop, 10 * i + 7, intr); SET_PROP_U32(prop, 10 * i + 8, IRQ_TYPE_LEVEL_HIGH); } fdt_end_node(fdt); } void fdt_finalize(void) { fdt_end_node(fdtroot); fdt_finish(fdtroot); }