diff --git a/lib/libvmmapi/riscv/vmmapi_machdep.c b/lib/libvmmapi/riscv/vmmapi_machdep.c index 9c70185942c9..4da2fb909f61 100644 --- a/lib/libvmmapi/riscv/vmmapi_machdep.c +++ b/lib/libvmmapi/riscv/vmmapi_machdep.c @@ -1,117 +1,118 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2011 NetApp, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include #include #include #include #include #include #include #include "vmmapi.h" #include "internal.h" const char *vm_capstrmap[] = { + [VM_CAP_SSTC] = "sstc", [VM_CAP_MAX] = NULL, }; #define VM_MD_IOCTLS \ VM_ATTACH_APLIC, \ VM_ASSERT_IRQ, \ VM_DEASSERT_IRQ, \ VM_RAISE_MSI const cap_ioctl_t vm_ioctl_cmds[] = { VM_COMMON_IOCTLS, VM_MD_IOCTLS, }; size_t vm_ioctl_ncmds = nitems(vm_ioctl_cmds); int vm_attach_aplic(struct vmctx *ctx, uint64_t mem_start, size_t mem_size) { struct vm_aplic_descr aplic; bzero(&aplic, sizeof(aplic)); aplic.mem_start = mem_start; aplic.mem_size = mem_size; return (ioctl(ctx->fd, VM_ATTACH_APLIC, &aplic)); } int vm_assert_irq(struct vmctx *ctx, uint32_t irq) { struct vm_irq vi; bzero(&vi, sizeof(vi)); vi.irq = irq; return (ioctl(ctx->fd, VM_ASSERT_IRQ, &vi)); } int vm_deassert_irq(struct vmctx *ctx, uint32_t irq) { struct vm_irq vi; bzero(&vi, sizeof(vi)); vi.irq = irq; return (ioctl(ctx->fd, VM_DEASSERT_IRQ, &vi)); } int vm_raise_msi(struct vmctx *ctx, uint64_t addr, uint64_t msg, int bus, int slot, int func) { struct vm_msi vmsi; bzero(&vmsi, sizeof(vmsi)); vmsi.addr = addr; vmsi.msg = msg; vmsi.bus = bus; vmsi.slot = slot; vmsi.func = func; return (ioctl(ctx->fd, VM_RAISE_MSI, &vmsi)); } int vm_inject_exception(struct vcpu *vcpu, uint64_t scause) { struct vm_exception vmexc; bzero(&vmexc, sizeof(vmexc)); vmexc.scause = scause; return (vcpu_ioctl(vcpu, VM_INJECT_EXCEPTION, &vmexc)); } diff --git a/sys/riscv/include/vmm.h b/sys/riscv/include/vmm.h index 1093e1cd0096..6c027f50e97a 100644 --- a/sys/riscv/include/vmm.h +++ b/sys/riscv/include/vmm.h @@ -1,327 +1,328 @@ /* * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2015 Mihai Carabas * Copyright (c) 2024 Ruslan Bukin * * This software was developed by the University of Cambridge Computer * Laboratory (Department of Computer Science and Technology) under Innovate * UK project 105694, "Digital Security by Design (DSbD) Technology Platform * Prototype". * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #ifndef _VMM_H_ #define _VMM_H_ #include #include #include #include #include "pte.h" #include "pmap.h" struct vcpu; enum vm_suspend_how { VM_SUSPEND_NONE, VM_SUSPEND_RESET, VM_SUSPEND_POWEROFF, VM_SUSPEND_HALT, VM_SUSPEND_LAST }; /* * Identifiers for architecturally defined registers. */ enum vm_reg_name { VM_REG_GUEST_ZERO = 0, VM_REG_GUEST_RA, VM_REG_GUEST_SP, VM_REG_GUEST_GP, VM_REG_GUEST_TP, VM_REG_GUEST_T0, VM_REG_GUEST_T1, VM_REG_GUEST_T2, VM_REG_GUEST_S0, VM_REG_GUEST_S1, VM_REG_GUEST_A0, VM_REG_GUEST_A1, VM_REG_GUEST_A2, VM_REG_GUEST_A3, VM_REG_GUEST_A4, VM_REG_GUEST_A5, VM_REG_GUEST_A6, VM_REG_GUEST_A7, VM_REG_GUEST_S2, VM_REG_GUEST_S3, VM_REG_GUEST_S4, VM_REG_GUEST_S5, VM_REG_GUEST_S6, VM_REG_GUEST_S7, VM_REG_GUEST_S8, VM_REG_GUEST_S9, VM_REG_GUEST_S10, VM_REG_GUEST_S11, VM_REG_GUEST_T3, VM_REG_GUEST_T4, VM_REG_GUEST_T5, VM_REG_GUEST_T6, VM_REG_GUEST_SEPC, VM_REG_LAST }; #define VM_INTINFO_VECTOR(info) ((info) & 0xff) #define VM_INTINFO_DEL_ERRCODE 0x800 #define VM_INTINFO_RSVD 0x7ffff000 #define VM_INTINFO_VALID 0x80000000 #define VM_INTINFO_TYPE 0x700 #define VM_INTINFO_HWINTR (0 << 8) #define VM_INTINFO_NMI (2 << 8) #define VM_INTINFO_HWEXCEPTION (3 << 8) #define VM_INTINFO_SWINTR (4 << 8) #define VM_MAX_NAMELEN 32 #define VM_MAX_SUFFIXLEN 15 #ifdef _KERNEL struct vm; struct vm_exception; struct vm_exit; struct vm_run; struct vm_object; struct vm_guest_paging; struct vm_aplic_descr; struct pmap; struct vm_eventinfo { void *rptr; /* rendezvous cookie */ int *sptr; /* suspend cookie */ int *iptr; /* reqidle cookie */ }; int vm_create(const char *name, struct vm **retvm); struct vcpu *vm_alloc_vcpu(struct vm *vm, int vcpuid); void vm_disable_vcpu_creation(struct vm *vm); void vm_slock_vcpus(struct vm *vm); void vm_unlock_vcpus(struct vm *vm); void vm_destroy(struct vm *vm); int vm_reinit(struct vm *vm); const char *vm_name(struct vm *vm); /* * APIs that modify the guest memory map require all vcpus to be frozen. */ void vm_slock_memsegs(struct vm *vm); void vm_xlock_memsegs(struct vm *vm); void vm_unlock_memsegs(struct vm *vm); int vm_mmap_memseg(struct vm *vm, vm_paddr_t gpa, int segid, vm_ooffset_t off, size_t len, int prot, int flags); int vm_munmap_memseg(struct vm *vm, vm_paddr_t gpa, size_t len); int vm_alloc_memseg(struct vm *vm, int ident, size_t len, bool sysmem); void vm_free_memseg(struct vm *vm, int ident); /* * APIs that inspect the guest memory map require only a *single* vcpu to * be frozen. This acts like a read lock on the guest memory map since any * modification requires *all* vcpus to be frozen. */ int vm_mmap_getnext(struct vm *vm, vm_paddr_t *gpa, int *segid, vm_ooffset_t *segoff, size_t *len, int *prot, int *flags); int vm_get_memseg(struct vm *vm, int ident, size_t *len, bool *sysmem, struct vm_object **objptr); vm_paddr_t vmm_sysmem_maxaddr(struct vm *vm); void *vm_gpa_hold(struct vcpu *vcpu, vm_paddr_t gpa, size_t len, int prot, void **cookie); void *vm_gpa_hold_global(struct vm *vm, vm_paddr_t gpa, size_t len, int prot, void **cookie); void vm_gpa_release(void *cookie); bool vm_mem_allocated(struct vcpu *vcpu, vm_paddr_t gpa); int vm_gla2gpa_nofault(struct vcpu *vcpu, struct vm_guest_paging *paging, uint64_t gla, int prot, uint64_t *gpa, int *is_fault); uint16_t vm_get_maxcpus(struct vm *vm); void vm_get_topology(struct vm *vm, uint16_t *sockets, uint16_t *cores, uint16_t *threads, uint16_t *maxcpus); int vm_set_topology(struct vm *vm, uint16_t sockets, uint16_t cores, uint16_t threads, uint16_t maxcpus); int vm_get_register(struct vcpu *vcpu, int reg, uint64_t *retval); int vm_set_register(struct vcpu *vcpu, int reg, uint64_t val); int vm_run(struct vcpu *vcpu); int vm_suspend(struct vm *vm, enum vm_suspend_how how); void* vm_get_cookie(struct vm *vm); int vcpu_vcpuid(struct vcpu *vcpu); void *vcpu_get_cookie(struct vcpu *vcpu); struct vm *vcpu_vm(struct vcpu *vcpu); struct vcpu *vm_vcpu(struct vm *vm, int cpu); int vm_get_capability(struct vcpu *vcpu, int type, int *val); int vm_set_capability(struct vcpu *vcpu, int type, int val); int vm_activate_cpu(struct vcpu *vcpu); int vm_suspend_cpu(struct vm *vm, struct vcpu *vcpu); int vm_resume_cpu(struct vm *vm, struct vcpu *vcpu); int vm_inject_exception(struct vcpu *vcpu, uint64_t scause); int vm_attach_aplic(struct vm *vm, struct vm_aplic_descr *descr); int vm_assert_irq(struct vm *vm, uint32_t irq); int vm_deassert_irq(struct vm *vm, uint32_t irq); int vm_raise_msi(struct vm *vm, uint64_t msg, uint64_t addr, int bus, int slot, int func); struct vm_exit *vm_exitinfo(struct vcpu *vcpu); void vm_exit_suspended(struct vcpu *vcpu, uint64_t pc); void vm_exit_debug(struct vcpu *vcpu, uint64_t pc); void vm_exit_rendezvous(struct vcpu *vcpu, uint64_t pc); void vm_exit_astpending(struct vcpu *vcpu, uint64_t pc); cpuset_t vm_active_cpus(struct vm *vm); cpuset_t vm_debug_cpus(struct vm *vm); cpuset_t vm_suspended_cpus(struct vm *vm); static __inline int vcpu_rendezvous_pending(struct vm_eventinfo *info) { return (*((uintptr_t *)(info->rptr)) != 0); } static __inline int vcpu_suspended(struct vm_eventinfo *info) { return (*info->sptr); } int vcpu_debugged(struct vcpu *vcpu); enum vcpu_state { VCPU_IDLE, VCPU_FROZEN, VCPU_RUNNING, VCPU_SLEEPING, }; int vcpu_set_state(struct vcpu *vcpu, enum vcpu_state state, bool from_idle); enum vcpu_state vcpu_get_state(struct vcpu *vcpu, int *hostcpu); static int __inline vcpu_is_running(struct vcpu *vcpu, int *hostcpu) { return (vcpu_get_state(vcpu, hostcpu) == VCPU_RUNNING); } #ifdef _SYS_PROC_H_ static int __inline vcpu_should_yield(struct vcpu *vcpu) { struct thread *td; td = curthread; return (td->td_ast != 0 || td->td_owepreempt != 0); } #endif void *vcpu_stats(struct vcpu *vcpu); void vcpu_notify_event(struct vcpu *vcpu); enum vm_reg_name vm_segment_name(int seg_encoding); #endif /* _KERNEL */ #define VM_DIR_READ 0 #define VM_DIR_WRITE 1 #define VM_GP_M_MASK 0x1f #define VM_GP_MMU_ENABLED (1 << 5) struct vm_guest_paging { int flags; int padding; }; struct vie { uint8_t access_size:4, sign_extend:1, dir:1, unused:2; enum vm_reg_name reg; }; struct vre { uint32_t inst_syndrome; uint8_t dir:1, unused:7; enum vm_reg_name reg; }; /* * Identifiers for optional vmm capabilities */ enum vm_cap_type { VM_CAP_UNRESTRICTED_GUEST, + VM_CAP_SSTC, VM_CAP_MAX }; enum vm_exitcode { VM_EXITCODE_BOGUS, VM_EXITCODE_ECALL, VM_EXITCODE_HYP, VM_EXITCODE_PAGING, VM_EXITCODE_SUSPENDED, VM_EXITCODE_DEBUG, VM_EXITCODE_INST_EMUL, VM_EXITCODE_WFI, VM_EXITCODE_MAX }; struct vm_exit { uint64_t scause; uint64_t sepc; uint64_t stval; uint64_t htval; uint64_t htinst; enum vm_exitcode exitcode; int inst_length; uint64_t pc; union { struct { uint64_t gpa; } paging; struct { uint64_t gpa; struct vm_guest_paging paging; struct vie vie; } inst_emul; struct { uint64_t args[8]; } ecall; struct { enum vm_suspend_how how; } suspended; struct { uint64_t scause; } hyp; } u; }; #endif /* _VMM_H_ */ diff --git a/sys/riscv/vmm/vmm_riscv.c b/sys/riscv/vmm/vmm_riscv.c index 6a76f8cf4f26..e276f8583e37 100644 --- a/sys/riscv/vmm/vmm_riscv.c +++ b/sys/riscv/vmm/vmm_riscv.c @@ -1,922 +1,926 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2024 Ruslan Bukin * * This software was developed by the University of Cambridge Computer * Laboratory (Department of Computer Science and Technology) under Innovate * UK project 105694, "Digital Security by Design (DSbD) Technology Platform * Prototype". * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "riscv.h" #include "vmm_aplic.h" #include "vmm_stat.h" MALLOC_DEFINE(M_HYP, "RISC-V VMM HYP", "RISC-V VMM HYP"); DPCPU_DEFINE_STATIC(struct hypctx *, vcpu); static int m_op(uint32_t insn, int match, int mask) { if (((insn ^ match) & mask) == 0) return (1); return (0); } static inline void riscv_set_active_vcpu(struct hypctx *hypctx) { DPCPU_SET(vcpu, hypctx); } struct hypctx * riscv_get_active_vcpu(void) { return (DPCPU_GET(vcpu)); } int vmmops_modinit(void) { if (!has_hyp) { printf("vmm: riscv hart doesn't support H-extension.\n"); return (ENXIO); } if (!has_sstc) { printf("vmm: riscv hart doesn't support SSTC extension.\n"); return (ENXIO); } return (0); } int vmmops_modcleanup(void) { return (0); } void * vmmops_init(struct vm *vm, pmap_t pmap) { struct hyp *hyp; vm_size_t size; size = round_page(sizeof(struct hyp) + sizeof(struct hypctx *) * vm_get_maxcpus(vm)); hyp = malloc_aligned(size, PAGE_SIZE, M_HYP, M_WAITOK | M_ZERO); hyp->vm = vm; hyp->aplic_attached = false; aplic_vminit(hyp); return (hyp); } static void vmmops_delegate(void) { uint64_t hedeleg; uint64_t hideleg; hedeleg = (1UL << SCAUSE_INST_MISALIGNED); hedeleg |= (1UL << SCAUSE_ILLEGAL_INSTRUCTION); hedeleg |= (1UL << SCAUSE_BREAKPOINT); hedeleg |= (1UL << SCAUSE_ECALL_USER); hedeleg |= (1UL << SCAUSE_INST_PAGE_FAULT); hedeleg |= (1UL << SCAUSE_LOAD_PAGE_FAULT); hedeleg |= (1UL << SCAUSE_STORE_PAGE_FAULT); csr_write(hedeleg, hedeleg); hideleg = (1UL << IRQ_SOFTWARE_HYPERVISOR); hideleg |= (1UL << IRQ_TIMER_HYPERVISOR); hideleg |= (1UL << IRQ_EXTERNAL_HYPERVISOR); csr_write(hideleg, hideleg); } static void vmmops_vcpu_restore_csrs(struct hypctx *hypctx) { struct hypcsr *csrs; csrs = &hypctx->guest_csrs; csr_write(vsstatus, csrs->vsstatus); csr_write(vsie, csrs->vsie); csr_write(vstvec, csrs->vstvec); csr_write(vsscratch, csrs->vsscratch); csr_write(vsepc, csrs->vsepc); csr_write(vscause, csrs->vscause); csr_write(vstval, csrs->vstval); csr_write(hvip, csrs->hvip); csr_write(vsatp, csrs->vsatp); } static void vmmops_vcpu_save_csrs(struct hypctx *hypctx) { struct hypcsr *csrs; csrs = &hypctx->guest_csrs; csrs->vsstatus = csr_read(vsstatus); csrs->vsie = csr_read(vsie); csrs->vstvec = csr_read(vstvec); csrs->vsscratch = csr_read(vsscratch); csrs->vsepc = csr_read(vsepc); csrs->vscause = csr_read(vscause); csrs->vstval = csr_read(vstval); csrs->hvip = csr_read(hvip); csrs->vsatp = csr_read(vsatp); } void * vmmops_vcpu_init(void *vmi, struct vcpu *vcpu1, int vcpuid) { struct hypctx *hypctx; struct hyp *hyp; vm_size_t size; hyp = vmi; dprintf("%s: hyp %p\n", __func__, hyp); KASSERT(vcpuid >= 0 && vcpuid < vm_get_maxcpus(hyp->vm), ("%s: Invalid vcpuid %d", __func__, vcpuid)); size = round_page(sizeof(struct hypctx)); hypctx = malloc_aligned(size, PAGE_SIZE, M_HYP, M_WAITOK | M_ZERO); hypctx->hyp = hyp; hypctx->vcpu = vcpu1; hypctx->guest_scounteren = HCOUNTEREN_CY | HCOUNTEREN_TM; /* sstatus */ hypctx->guest_regs.hyp_sstatus = SSTATUS_SPP | SSTATUS_SPIE; hypctx->guest_regs.hyp_sstatus |= SSTATUS_FS_INITIAL; /* hstatus */ hypctx->guest_regs.hyp_hstatus = HSTATUS_SPV | HSTATUS_VTW; hypctx->guest_regs.hyp_hstatus |= HSTATUS_SPVP; hypctx->cpu_id = vcpuid; hyp->ctx[vcpuid] = hypctx; aplic_cpuinit(hypctx); return (hypctx); } static int riscv_vmm_pinit(pmap_t pmap) { dprintf("%s: pmap %p\n", __func__, pmap); pmap_pinit_stage(pmap, PM_STAGE2); return (1); } struct vmspace * vmmops_vmspace_alloc(vm_offset_t min, vm_offset_t max) { return (vmspace_alloc(min, max, riscv_vmm_pinit)); } void vmmops_vmspace_free(struct vmspace *vmspace) { pmap_remove_pages(vmspace_pmap(vmspace)); vmspace_free(vmspace); } static void riscv_unpriv_read(struct hypctx *hypctx, uintptr_t guest_addr, uint64_t *data, struct hyptrap *trap) { register struct hyptrap * htrap asm("a0"); uintptr_t old_hstatus; uintptr_t old_stvec; uintptr_t entry; uint64_t val; uint64_t tmp; int intr; entry = (uintptr_t)&vmm_unpriv_trap; htrap = trap; intr = intr_disable(); old_hstatus = csr_swap(hstatus, hypctx->guest_regs.hyp_hstatus); /* * Setup a temporary exception vector, so that if hlvx.hu raises * an exception we catch it in the vmm_unpriv_trap(). */ old_stvec = csr_swap(stvec, entry); /* * Read first two bytes of instruction assuming it could be a * compressed one. */ __asm __volatile(".option push\n" ".option norvc\n" "hlvx.hu %[val], (%[addr])\n" ".option pop\n" : [val] "=r" (val) : [addr] "r" (guest_addr), "r" (htrap) : "a1", "memory"); /* * Check if previous hlvx.hu did not raise an exception, and then * read the rest of instruction if it is a full-length one. */ if (trap->scause == -1 && (val & 0x3) == 0x3) { guest_addr += 2; __asm __volatile(".option push\n" ".option norvc\n" "hlvx.hu %[tmp], (%[addr])\n" ".option pop\n" : [tmp] "=r" (tmp) : [addr] "r" (guest_addr), "r" (htrap) : "a1", "memory"); val |= (tmp << 16); } csr_write(hstatus, old_hstatus); csr_write(stvec, old_stvec); intr_restore(intr); *data = val; } static int riscv_gen_inst_emul_data(struct hypctx *hypctx, struct vm_exit *vme_ret, struct hyptrap *trap) { uintptr_t guest_addr; struct vie *vie; uint64_t insn; int reg_num; int rs2, rd; int direction; int sign_extend; int access_size; guest_addr = vme_ret->sepc; KASSERT(vme_ret->scause == SCAUSE_FETCH_GUEST_PAGE_FAULT || vme_ret->scause == SCAUSE_LOAD_GUEST_PAGE_FAULT || vme_ret->scause == SCAUSE_STORE_GUEST_PAGE_FAULT, ("Invalid scause")); direction = vme_ret->scause == SCAUSE_STORE_GUEST_PAGE_FAULT ? VM_DIR_WRITE : VM_DIR_READ; sign_extend = 1; bzero(trap, sizeof(struct hyptrap)); trap->scause = -1; riscv_unpriv_read(hypctx, guest_addr, &insn, trap); if (trap->scause != -1) return (-1); if ((insn & 0x3) == 0x3) { rs2 = (insn & RS2_MASK) >> RS2_SHIFT; rd = (insn & RD_MASK) >> RD_SHIFT; if (direction == VM_DIR_WRITE) { if (m_op(insn, MATCH_SB, MASK_SB)) access_size = 1; else if (m_op(insn, MATCH_SH, MASK_SH)) access_size = 2; else if (m_op(insn, MATCH_SW, MASK_SW)) access_size = 4; else if (m_op(insn, MATCH_SD, MASK_SD)) access_size = 8; else { printf("unknown store instr at %lx", guest_addr); return (-2); } reg_num = rs2; } else { if (m_op(insn, MATCH_LB, MASK_LB)) access_size = 1; else if (m_op(insn, MATCH_LH, MASK_LH)) access_size = 2; else if (m_op(insn, MATCH_LW, MASK_LW)) access_size = 4; else if (m_op(insn, MATCH_LD, MASK_LD)) access_size = 8; else if (m_op(insn, MATCH_LBU, MASK_LBU)) { access_size = 1; sign_extend = 0; } else if (m_op(insn, MATCH_LHU, MASK_LHU)) { access_size = 2; sign_extend = 0; } else if (m_op(insn, MATCH_LWU, MASK_LWU)) { access_size = 4; sign_extend = 0; } else { printf("unknown load instr at %lx", guest_addr); return (-3); } reg_num = rd; } vme_ret->inst_length = 4; } else { rs2 = (insn >> 7) & 0x7; rs2 += 0x8; rd = (insn >> 2) & 0x7; rd += 0x8; if (direction == VM_DIR_WRITE) { if (m_op(insn, MATCH_C_SW, MASK_C_SW)) access_size = 4; else if (m_op(insn, MATCH_C_SD, MASK_C_SD)) access_size = 8; else { printf("unknown compressed store instr at %lx", guest_addr); return (-4); } } else { if (m_op(insn, MATCH_C_LW, MASK_C_LW)) access_size = 4; else if (m_op(insn, MATCH_C_LD, MASK_C_LD)) access_size = 8; else { printf("unknown load instr at %lx", guest_addr); return (-5); } } reg_num = rd; vme_ret->inst_length = 2; } vme_ret->u.inst_emul.gpa = (vme_ret->htval << 2) | (vme_ret->stval & 0x3); dprintf("guest_addr %lx insn %lx, reg %d, gpa %lx\n", guest_addr, insn, reg_num, vme_ret->u.inst_emul.gpa); vie = &vme_ret->u.inst_emul.vie; vie->dir = direction; vie->reg = reg_num; vie->sign_extend = sign_extend; vie->access_size = access_size; return (0); } static bool riscv_handle_world_switch(struct hypctx *hypctx, struct vm_exit *vme, pmap_t pmap) { struct hyptrap trap; uint64_t insn; uint64_t gpa; bool handled; bool retu; int ret; int i; handled = false; if (vme->scause & SCAUSE_INTR) { /* * Host interrupt? Leave critical section to handle. */ vmm_stat_incr(hypctx->vcpu, VMEXIT_IRQ, 1); vme->exitcode = VM_EXITCODE_BOGUS; vme->inst_length = 0; return (handled); } switch (vme->scause) { case SCAUSE_FETCH_GUEST_PAGE_FAULT: case SCAUSE_LOAD_GUEST_PAGE_FAULT: case SCAUSE_STORE_GUEST_PAGE_FAULT: gpa = (vme->htval << 2) | (vme->stval & 0x3); if (vm_mem_allocated(hypctx->vcpu, gpa)) { vme->exitcode = VM_EXITCODE_PAGING; vme->inst_length = 0; vme->u.paging.gpa = gpa; } else { ret = riscv_gen_inst_emul_data(hypctx, vme, &trap); if (ret != 0) { vme->exitcode = VM_EXITCODE_HYP; vme->u.hyp.scause = trap.scause; break; } vme->exitcode = VM_EXITCODE_INST_EMUL; } break; case SCAUSE_ILLEGAL_INSTRUCTION: /* * TODO: handle illegal instruction properly. */ printf("%s: Illegal instruction at %lx stval 0x%lx htval " "0x%lx\n", __func__, vme->sepc, vme->stval, vme->htval); vmm_stat_incr(hypctx->vcpu, VMEXIT_UNHANDLED, 1); vme->exitcode = VM_EXITCODE_BOGUS; handled = false; break; case SCAUSE_VIRTUAL_SUPERVISOR_ECALL: retu = false; vmm_sbi_ecall(hypctx->vcpu, &retu); if (retu == false) { handled = true; break; } for (i = 0; i < nitems(vme->u.ecall.args); i++) vme->u.ecall.args[i] = hypctx->guest_regs.hyp_a[i]; vme->exitcode = VM_EXITCODE_ECALL; handled = false; break; case SCAUSE_VIRTUAL_INSTRUCTION: insn = vme->stval; if (m_op(insn, MATCH_WFI, MASK_WFI)) vme->exitcode = VM_EXITCODE_WFI; else vme->exitcode = VM_EXITCODE_BOGUS; handled = false; break; default: printf("unknown scause %lx\n", vme->scause); vmm_stat_incr(hypctx->vcpu, VMEXIT_UNHANDLED, 1); vme->exitcode = VM_EXITCODE_BOGUS; handled = false; break; } return (handled); } int vmmops_gla2gpa(void *vcpui, struct vm_guest_paging *paging, uint64_t gla, int prot, uint64_t *gpa, int *is_fault) { /* Implement me. */ return (ENOSYS); } void riscv_send_ipi(struct hypctx *hypctx, int hart_id) { struct hyp *hyp; struct vm *vm; hyp = hypctx->hyp; vm = hyp->vm; atomic_set_32(&hypctx->ipi_pending, 1); vcpu_notify_event(vm_vcpu(vm, hart_id)); } int riscv_check_ipi(struct hypctx *hypctx, bool clear) { int val; if (clear) val = atomic_swap_32(&hypctx->ipi_pending, 0); else val = hypctx->ipi_pending; return (val); } static void riscv_sync_interrupts(struct hypctx *hypctx) { int pending; pending = aplic_check_pending(hypctx); if (pending) hypctx->guest_csrs.hvip |= HVIP_VSEIP; else hypctx->guest_csrs.hvip &= ~HVIP_VSEIP; csr_write(hvip, hypctx->guest_csrs.hvip); } static void riscv_sync_ipi(struct hypctx *hypctx) { /* Guest clears VSSIP bit manually. */ if (riscv_check_ipi(hypctx, true)) hypctx->guest_csrs.hvip |= HVIP_VSSIP; csr_write(hvip, hypctx->guest_csrs.hvip); } int vmmops_run(void *vcpui, register_t pc, pmap_t pmap, struct vm_eventinfo *evinfo) { struct hypctx *hypctx; struct vm_exit *vme; struct vcpu *vcpu; register_t val; bool handled; hypctx = (struct hypctx *)vcpui; vcpu = hypctx->vcpu; vme = vm_exitinfo(vcpu); hypctx->guest_regs.hyp_sepc = (uint64_t)pc; vmmops_delegate(); /* * From The RISC-V Instruction Set Manual * Volume II: RISC-V Privileged Architectures * * If the new virtual machine's guest physical page tables * have been modified, it may be necessary to execute an HFENCE.GVMA * instruction (see Section 5.3.2) before or after writing hgatp. */ __asm __volatile("hfence.gvma" ::: "memory"); csr_write(hgatp, pmap->pm_satp); csr_write(henvcfg, HENVCFG_STCE); csr_write(hie, HIE_VSEIE | HIE_VSSIE | HIE_SGEIE); /* TODO: should we trap rdcycle / rdtime? */ csr_write(hcounteren, HCOUNTEREN_CY | HCOUNTEREN_TM); vmmops_vcpu_restore_csrs(hypctx); for (;;) { dprintf("%s: pc %lx\n", __func__, pc); if (hypctx->has_exception) { hypctx->has_exception = false; /* * TODO: implement exception injection. */ } val = intr_disable(); /* Check if the vcpu is suspended */ if (vcpu_suspended(evinfo)) { intr_restore(val); vm_exit_suspended(vcpu, pc); break; } if (vcpu_debugged(vcpu)) { intr_restore(val); vm_exit_debug(vcpu, pc); break; } /* * TODO: What happens if a timer interrupt is asserted exactly * here, but for the previous VM? */ riscv_set_active_vcpu(hypctx); aplic_flush_hwstate(hypctx); riscv_sync_interrupts(hypctx); riscv_sync_ipi(hypctx); dprintf("%s: Entering guest VM, vsatp %lx, ss %lx hs %lx\n", __func__, csr_read(vsatp), hypctx->guest_regs.hyp_sstatus, hypctx->guest_regs.hyp_hstatus); vmm_switch(hypctx); dprintf("%s: Leaving guest VM, hstatus %lx\n", __func__, hypctx->guest_regs.hyp_hstatus); aplic_sync_hwstate(hypctx); riscv_sync_interrupts(hypctx); /* * TODO: deactivate stage 2 pmap here if needed. */ vme->scause = csr_read(scause); vme->sepc = csr_read(sepc); vme->stval = csr_read(stval); vme->htval = csr_read(htval); vme->htinst = csr_read(htinst); intr_restore(val); vmm_stat_incr(vcpu, VMEXIT_COUNT, 1); vme->pc = hypctx->guest_regs.hyp_sepc; vme->inst_length = INSN_SIZE; handled = riscv_handle_world_switch(hypctx, vme, pmap); if (handled == false) /* Exit loop to emulate instruction. */ break; else { /* Resume guest execution from the next instruction. */ hypctx->guest_regs.hyp_sepc += vme->inst_length; } } vmmops_vcpu_save_csrs(hypctx); return (0); } static void riscv_pcpu_vmcleanup(void *arg) { struct hyp *hyp; int i, maxcpus; hyp = arg; maxcpus = vm_get_maxcpus(hyp->vm); for (i = 0; i < maxcpus; i++) { if (riscv_get_active_vcpu() == hyp->ctx[i]) { riscv_set_active_vcpu(NULL); break; } } } void vmmops_vcpu_cleanup(void *vcpui) { struct hypctx *hypctx; hypctx = vcpui; dprintf("%s\n", __func__); aplic_cpucleanup(hypctx); free(hypctx, M_HYP); } void vmmops_cleanup(void *vmi) { struct hyp *hyp; hyp = vmi; dprintf("%s\n", __func__); aplic_vmcleanup(hyp); smp_rendezvous(NULL, riscv_pcpu_vmcleanup, NULL, hyp); free(hyp, M_HYP); } /* * Return register value. Registers have different sizes and an explicit cast * must be made to ensure proper conversion. */ static uint64_t * hypctx_regptr(struct hypctx *hypctx, int reg) { switch (reg) { case VM_REG_GUEST_RA: return (&hypctx->guest_regs.hyp_ra); case VM_REG_GUEST_SP: return (&hypctx->guest_regs.hyp_sp); case VM_REG_GUEST_GP: return (&hypctx->guest_regs.hyp_gp); case VM_REG_GUEST_TP: return (&hypctx->guest_regs.hyp_tp); case VM_REG_GUEST_T0: return (&hypctx->guest_regs.hyp_t[0]); case VM_REG_GUEST_T1: return (&hypctx->guest_regs.hyp_t[1]); case VM_REG_GUEST_T2: return (&hypctx->guest_regs.hyp_t[2]); case VM_REG_GUEST_S0: return (&hypctx->guest_regs.hyp_s[0]); case VM_REG_GUEST_S1: return (&hypctx->guest_regs.hyp_s[1]); case VM_REG_GUEST_A0: return (&hypctx->guest_regs.hyp_a[0]); case VM_REG_GUEST_A1: return (&hypctx->guest_regs.hyp_a[1]); case VM_REG_GUEST_A2: return (&hypctx->guest_regs.hyp_a[2]); case VM_REG_GUEST_A3: return (&hypctx->guest_regs.hyp_a[3]); case VM_REG_GUEST_A4: return (&hypctx->guest_regs.hyp_a[4]); case VM_REG_GUEST_A5: return (&hypctx->guest_regs.hyp_a[5]); case VM_REG_GUEST_A6: return (&hypctx->guest_regs.hyp_a[6]); case VM_REG_GUEST_A7: return (&hypctx->guest_regs.hyp_a[7]); case VM_REG_GUEST_S2: return (&hypctx->guest_regs.hyp_s[2]); case VM_REG_GUEST_S3: return (&hypctx->guest_regs.hyp_s[3]); case VM_REG_GUEST_S4: return (&hypctx->guest_regs.hyp_s[4]); case VM_REG_GUEST_S5: return (&hypctx->guest_regs.hyp_s[5]); case VM_REG_GUEST_S6: return (&hypctx->guest_regs.hyp_s[6]); case VM_REG_GUEST_S7: return (&hypctx->guest_regs.hyp_s[7]); case VM_REG_GUEST_S8: return (&hypctx->guest_regs.hyp_s[8]); case VM_REG_GUEST_S9: return (&hypctx->guest_regs.hyp_s[9]); case VM_REG_GUEST_S10: return (&hypctx->guest_regs.hyp_s[10]); case VM_REG_GUEST_S11: return (&hypctx->guest_regs.hyp_s[11]); case VM_REG_GUEST_T3: return (&hypctx->guest_regs.hyp_t[3]); case VM_REG_GUEST_T4: return (&hypctx->guest_regs.hyp_t[4]); case VM_REG_GUEST_T5: return (&hypctx->guest_regs.hyp_t[5]); case VM_REG_GUEST_T6: return (&hypctx->guest_regs.hyp_t[6]); case VM_REG_GUEST_SEPC: return (&hypctx->guest_regs.hyp_sepc); default: break; } return (NULL); } int vmmops_getreg(void *vcpui, int reg, uint64_t *retval) { uint64_t *regp; int running, hostcpu; struct hypctx *hypctx; hypctx = vcpui; running = vcpu_is_running(hypctx->vcpu, &hostcpu); if (running && hostcpu != curcpu) panic("%s: %s%d is running", __func__, vm_name(hypctx->hyp->vm), vcpu_vcpuid(hypctx->vcpu)); if (reg == VM_REG_GUEST_ZERO) { *retval = 0; return (0); } regp = hypctx_regptr(hypctx, reg); if (regp == NULL) return (EINVAL); *retval = *regp; return (0); } int vmmops_setreg(void *vcpui, int reg, uint64_t val) { struct hypctx *hypctx; int running, hostcpu; uint64_t *regp; hypctx = vcpui; running = vcpu_is_running(hypctx->vcpu, &hostcpu); if (running && hostcpu != curcpu) panic("%s: %s%d is running", __func__, vm_name(hypctx->hyp->vm), vcpu_vcpuid(hypctx->vcpu)); regp = hypctx_regptr(hypctx, reg); if (regp == NULL) return (EINVAL); *regp = val; return (0); } int vmmops_exception(void *vcpui, uint64_t scause) { struct hypctx *hypctx; int running, hostcpu; hypctx = vcpui; running = vcpu_is_running(hypctx->vcpu, &hostcpu); if (running && hostcpu != curcpu) panic("%s: %s%d is running", __func__, vm_name(hypctx->hyp->vm), vcpu_vcpuid(hypctx->vcpu)); /* TODO: implement me. */ return (ENOSYS); } int vmmops_getcap(void *vcpui, int num, int *retval) { int ret; ret = ENOENT; switch (num) { + case VM_CAP_SSTC: + *retval = has_sstc; + ret = 0; + break; case VM_CAP_UNRESTRICTED_GUEST: *retval = 1; ret = 0; break; default: break; } return (ret); } int vmmops_setcap(void *vcpui, int num, int val) { return (ENOENT); } diff --git a/usr.sbin/bhyve/riscv/bhyverun_machdep.c b/usr.sbin/bhyve/riscv/bhyverun_machdep.c index 39d6a7cdf231..d06b517a6624 100644 --- a/usr.sbin/bhyve/riscv/bhyverun_machdep.c +++ b/usr.sbin/bhyve/riscv/bhyverun_machdep.c @@ -1,357 +1,364 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2011 NetApp, Inc. * All rights reserved. * Copyright (c) 2024 Ruslan Bukin * * This software was developed by the University of Cambridge Computer * Laboratory (Department of Computer Science and Technology) under Innovate * UK project 105694, "Digital Security by Design (DSbD) Technology Platform * Prototype". * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include #include #include #include #include #include #include #include #include #include #include #include #include "bhyverun.h" #include "config.h" #include "debug.h" #include "fdt.h" #include "mem.h" #include "pci_emul.h" #include "pci_irq.h" #include "uart_emul.h" #include "riscv.h" #define FDT_SIZE (64 * 1024) #define FDT_DTB_ALIGN 8 /* Start of lowmem + 64K */ #define UART_MMIO_BASE 0x10000 #define UART_MMIO_SIZE 0x1000 #define UART_INTR 1 #define APLIC_MEM_BASE 0x2f000000 #define APLIC_MEM_SIZE 0x10000 #define PCIE_INTA 2 #define PCIE_INTB 3 #define PCIE_INTC 4 #define PCIE_INTD 5 void bhyve_init_config(void) { init_config(); /* Set default values prior to option parsing. */ set_config_bool("acpi_tables", false); set_config_bool("acpi_tables_in_memory", false); set_config_value("memory.size", "256M"); } void bhyve_usage(int code) { const char *progname; progname = getprogname(); fprintf(stderr, "Usage: %s [-CDHhSW]\n" " %*s [-c [[cpus=]numcpus][,sockets=n][,cores=n][,threads=n]]\n" " %*s [-k config_file] [-m mem] [-o var=value]\n" " %*s [-p vcpu:hostcpu] [-r file] [-s pci] [-U uuid] vmname\n" " -C: include guest memory in core file\n" " -c: number of CPUs and/or topology specification\n" " -D: destroy on power-off\n" " -h: help\n" " -k: key=value flat config file\n" " -m: memory size\n" " -o: set config 'var' to 'value'\n" " -p: pin 'vcpu' to 'hostcpu'\n" " -S: guest memory cannot be swapped\n" " -s: PCI slot config\n" " -U: UUID\n" " -W: force virtio to use single-vector MSI\n", progname, (int)strlen(progname), "", (int)strlen(progname), "", (int)strlen(progname), ""); exit(code); } void bhyve_optparse(int argc, char **argv) { const char *optstr; int c; optstr = "hCDSWk:f:o:p:c:s:m:U:"; while ((c = getopt(argc, argv, optstr)) != -1) { switch (c) { case 'c': if (bhyve_topology_parse(optarg) != 0) { errx(EX_USAGE, "invalid cpu topology '%s'", optarg); } break; case 'C': set_config_bool("memory.guest_in_core", true); break; case 'D': set_config_bool("destroy_on_poweroff", true); break; case 'k': bhyve_parse_simple_config_file(optarg); break; case 'm': set_config_value("memory.size", optarg); break; case 'o': if (!bhyve_parse_config_option(optarg)) { errx(EX_USAGE, "invalid configuration option '%s'", optarg); } break; case 'p': if (bhyve_pincpu_parse(optarg) != 0) { errx(EX_USAGE, "invalid vcpu pinning configuration '%s'", optarg); } break; case 's': if (strncmp(optarg, "help", strlen(optarg)) == 0) { pci_print_supported_devices(); exit(0); } else if (pci_parse_slot(optarg) != 0) exit(4); else break; case 'S': set_config_bool("memory.wired", true); break; case 'U': set_config_value("uuid", optarg); break; case 'W': set_config_bool("virtio_msix", false); break; case 'h': bhyve_usage(0); default: bhyve_usage(1); } } } void bhyve_init_vcpu(struct vcpu *vcpu __unused) { } void bhyve_start_vcpu(struct vcpu *vcpu, bool bsp __unused) { int error; /* Set hart ID. */ error = vm_set_register(vcpu, VM_REG_GUEST_A0, vcpu_id(vcpu)); assert(error == 0); fbsdrun_addcpu(vcpu_id(vcpu)); } /* * Load the specified boot code at the beginning of high memory. */ static void load_bootrom(struct vmctx *ctx, const char *path, uint64_t *elrp, uint64_t *lenp) { struct stat sb; void *data, *gptr; vm_paddr_t loadaddr; off_t size; int fd; fd = open(path, O_RDONLY); if (fd < 0) err(1, "open(%s)", path); if (fstat(fd, &sb) != 0) err(1, "fstat(%s)", path); size = sb.st_size; loadaddr = vm_get_highmem_base(ctx); gptr = vm_map_gpa(ctx, loadaddr, round_page(size)); data = mmap(NULL, size, PROT_READ, MAP_PRIVATE, fd, 0); if (data == MAP_FAILED) err(1, "mmap(%s)", path); (void)close(fd); memcpy(gptr, data, size); if (munmap(data, size) != 0) err(1, "munmap(%s)", path); *elrp = loadaddr; *lenp = size; } static void mmio_uart_intr_assert(void *arg) { struct vmctx *ctx = arg; vm_assert_irq(ctx, UART_INTR); } static void mmio_uart_intr_deassert(void *arg) { struct vmctx *ctx = arg; vm_deassert_irq(ctx, UART_INTR); } static int mmio_uart_mem_handler(struct vcpu *vcpu __unused, int dir, uint64_t addr, int size __unused, uint64_t *val, void *arg1, long arg2) { struct uart_ns16550_softc *sc = arg1; long reg; reg = addr - arg2; if (dir == MEM_F_WRITE) uart_ns16550_write(sc, reg, *val); else *val = uart_ns16550_read(sc, reg); return (0); } static bool init_mmio_uart(struct vmctx *ctx) { struct uart_ns16550_softc *sc; struct mem_range mr; const char *path; int error; path = get_config_value("console"); if (path == NULL) return (false); sc = uart_ns16550_init(mmio_uart_intr_assert, mmio_uart_intr_deassert, ctx); if (uart_ns16550_tty_open(sc, path) != 0) { EPRINTLN("Unable to initialize backend '%s' for mmio uart", path); assert(0); } bzero(&mr, sizeof(struct mem_range)); mr.name = "uart"; mr.base = UART_MMIO_BASE; mr.size = UART_MMIO_SIZE; mr.flags = MEM_F_RW; mr.handler = mmio_uart_mem_handler; mr.arg1 = sc; mr.arg2 = mr.base; error = register_mem(&mr); assert(error == 0); return (true); } int bhyve_init_platform(struct vmctx *ctx, struct vcpu *bsp) { const char *bootrom; uint64_t elr; uint64_t len; int error; int pcie_intrs[4] = {PCIE_INTA, PCIE_INTB, PCIE_INTC, PCIE_INTD}; vm_paddr_t fdt_gpa; + char isa[32]; + int retval; bootrom = get_config_value("bootrom"); if (bootrom == NULL) { warnx("no bootrom specified"); return (ENOENT); } load_bootrom(ctx, bootrom, &elr, &len); error = vm_set_register(bsp, VM_REG_GUEST_SEPC, elr); if (error != 0) { warn("vm_set_register(GUEST_SEPC)"); return (error); } + error = vm_get_capability(bsp, VM_CAP_SSTC, &retval); + assert(error == 0); + snprintf(isa, sizeof(isa), "%s%s", "rv64imafdc", + retval == 1 ? "_sstc" : ""); + fdt_gpa = vm_get_highmem_base(ctx) + roundup2(len, FDT_DTB_ALIGN); - error = fdt_init(ctx, guest_ncpus, fdt_gpa, FDT_SIZE); + error = fdt_init(ctx, guest_ncpus, fdt_gpa, FDT_SIZE, isa); if (error != 0) return (error); /* Set FDT base address to the bootable hart. */ error = vm_set_register(bsp, VM_REG_GUEST_A1, fdt_gpa); assert(error == 0); fdt_add_aplic(APLIC_MEM_BASE, APLIC_MEM_SIZE); error = vm_attach_aplic(ctx, APLIC_MEM_BASE, APLIC_MEM_SIZE); if (error != 0) { warn("vm_attach_aplic()"); return (error); } if (init_mmio_uart(ctx)) fdt_add_uart(UART_MMIO_BASE, UART_MMIO_SIZE, UART_INTR); pci_irq_init(pcie_intrs); fdt_add_pcie(pcie_intrs); vmexit_set_bsp(vcpu_id(bsp)); return (0); } int bhyve_init_platform_late(struct vmctx *ctx __unused, struct vcpu *bsp __unused) { fdt_finalize(); return (0); } diff --git a/usr.sbin/bhyve/riscv/fdt.c b/usr.sbin/bhyve/riscv/fdt.c index 54b75c68ea76..bef3f64b0c64 100644 --- a/usr.sbin/bhyve/riscv/fdt.c +++ b/usr.sbin/bhyve/riscv/fdt.c @@ -1,326 +1,327 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2022 The FreeBSD Foundation * Copyright (c) 2024 Ruslan Bukin * * This software was developed by Andrew Turner under sponsorship from * the FreeBSD Foundation. * * This software was developed by the University of Cambridge Computer * Laboratory (Department of Computer Science and Technology) under Innovate * UK project 105694, "Digital Security by Design (DSbD) Technology Platform * Prototype". * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include #include #include #include #include #include #include #include "config.h" #include "bhyverun.h" #include "fdt.h" #define SET_PROP_U32(prop, idx, val) \ ((uint32_t *)(prop))[(idx)] = cpu_to_fdt32(val) #define SET_PROP_U64(prop, idx, val) \ ((uint64_t *)(prop))[(idx)] = cpu_to_fdt64(val) #define IRQ_TYPE_LEVEL_HIGH 4 #define IRQ_TYPE_LEVEL_LOW 8 static void *fdtroot; static uint32_t aplic_phandle = 0; static uint32_t intc0_phandle = 0; static uint32_t assign_phandle(void *fdt) { static uint32_t next_phandle = 1; uint32_t phandle; phandle = next_phandle; next_phandle++; fdt_property_u32(fdt, "phandle", phandle); return (phandle); } static void set_single_reg(void *fdt, uint64_t start, uint64_t len) { void *reg; fdt_property_placeholder(fdt, "reg", 2 * sizeof(uint64_t), ®); SET_PROP_U64(reg, 0, start); SET_PROP_U64(reg, 1, len); } static void -add_cpu(void *fdt, int cpuid) +add_cpu(void *fdt, int cpuid, const char *isa) { char node_name[16]; snprintf(node_name, sizeof(node_name), "cpu@%d", cpuid); fdt_begin_node(fdt, node_name); fdt_property_string(fdt, "device_type", "cpu"); fdt_property_string(fdt, "compatible", "riscv"); fdt_property_u32(fdt, "reg", cpuid); - fdt_property_string(fdt, "riscv,isa", "rv64imafdc_sstc"); + fdt_property_string(fdt, "riscv,isa", isa); fdt_property_string(fdt, "mmu-type", "riscv,sv39"); fdt_property_string(fdt, "clock-frequency", "1000000000"); fdt_begin_node(fdt, "interrupt-controller"); intc0_phandle = assign_phandle(fdt); fdt_property_u32(fdt, "#address-cells", 2); fdt_property_u32(fdt, "#interrupt-cells", 1); fdt_property(fdt, "interrupt-controller", NULL, 0); fdt_property_string(fdt, "compatible", "riscv,cpu-intc"); fdt_end_node(fdt); fdt_end_node(fdt); } static void -add_cpus(void *fdt, int ncpu) +add_cpus(void *fdt, int ncpu, const char *isa) { int cpuid; fdt_begin_node(fdt, "cpus"); /* XXX: Needed given the root #address-cells? */ fdt_property_u32(fdt, "#address-cells", 1); fdt_property_u32(fdt, "#size-cells", 0); fdt_property_u32(fdt, "timebase-frequency", 10000000); - for (cpuid = 0; cpuid < ncpu; cpuid++) { - add_cpu(fdt, cpuid); - } + for (cpuid = 0; cpuid < ncpu; cpuid++) + add_cpu(fdt, cpuid, isa); + fdt_end_node(fdt); } int -fdt_init(struct vmctx *ctx, int ncpu, vm_paddr_t fdtaddr, vm_size_t fdtsize) +fdt_init(struct vmctx *ctx, int ncpu, vm_paddr_t fdtaddr, vm_size_t fdtsize, + const char *isa) { void *fdt; const char *bootargs; fdt = paddr_guest2host(ctx, fdtaddr, fdtsize); if (fdt == NULL) return (EFAULT); fdt_create(fdt, (int)fdtsize); /* Add the memory reserve map (needed even if none is reserved) */ fdt_finish_reservemap(fdt); /* Create the root node */ fdt_begin_node(fdt, ""); fdt_property_string(fdt, "compatible", "freebsd,bhyve"); fdt_property_u32(fdt, "#address-cells", 2); fdt_property_u32(fdt, "#size-cells", 2); fdt_begin_node(fdt, "chosen"); fdt_property_string(fdt, "stdout-path", "serial0:115200n8"); bootargs = get_config_value("fdt.bootargs"); if (bootargs != NULL) fdt_property_string(fdt, "bootargs", bootargs); fdt_end_node(fdt); fdt_begin_node(fdt, "memory"); fdt_property_string(fdt, "device_type", "memory"); /* There is no lowmem on riscv. */ assert(vm_get_lowmem_size(ctx) == 0); set_single_reg(fdt, vm_get_highmem_base(ctx), vm_get_highmem_size(ctx)); fdt_end_node(fdt); - add_cpus(fdt, ncpu); + add_cpus(fdt, ncpu, isa); /* Finalized by fdt_finalized(). */ fdtroot = fdt; return (0); } void fdt_add_aplic(uint64_t mem_base, uint64_t mem_size) { char node_name[32]; void *fdt, *prop; fdt = fdtroot; snprintf(node_name, sizeof(node_name), "interrupt-controller@%lx", (unsigned long)mem_base); fdt_begin_node(fdt, node_name); aplic_phandle = assign_phandle(fdt); fdt_property_string(fdt, "compatible", "riscv,aplic"); fdt_property(fdt, "interrupt-controller", NULL, 0); #if notyet fdt_property(fdt, "msi-controller", NULL, 0); #endif /* XXX: Needed given the root #address-cells? */ fdt_property_u32(fdt, "#address-cells", 2); fdt_property_u32(fdt, "#interrupt-cells", 2); fdt_property_placeholder(fdt, "reg", 2 * sizeof(uint64_t), &prop); SET_PROP_U64(prop, 0, mem_base); SET_PROP_U64(prop, 1, mem_size); fdt_property_placeholder(fdt, "interrupts-extended", 2 * sizeof(uint32_t), &prop); SET_PROP_U32(prop, 0, intc0_phandle); SET_PROP_U32(prop, 1, 9); fdt_property_u32(fdt, "riscv,num-sources", 63); fdt_end_node(fdt); fdt_property_u32(fdt, "interrupt-parent", aplic_phandle); } void fdt_add_uart(uint64_t uart_base, uint64_t uart_size, int intr) { void *fdt, *interrupts; char node_name[32]; assert(aplic_phandle != 0); fdt = fdtroot; snprintf(node_name, sizeof(node_name), "serial@%lx", uart_base); fdt_begin_node(fdt, node_name); fdt_property_string(fdt, "compatible", "ns16550"); set_single_reg(fdt, uart_base, uart_size); fdt_property_u32(fdt, "interrupt-parent", aplic_phandle); fdt_property_placeholder(fdt, "interrupts", 2 * sizeof(uint32_t), &interrupts); SET_PROP_U32(interrupts, 0, intr); SET_PROP_U32(interrupts, 1, IRQ_TYPE_LEVEL_HIGH); fdt_end_node(fdt); snprintf(node_name, sizeof(node_name), "/serial@%lx", uart_base); fdt_begin_node(fdt, "aliases"); fdt_property_string(fdt, "serial0", node_name); fdt_end_node(fdt); } void fdt_add_pcie(int intrs[static 4]) { void *fdt, *prop; int slot, pin, intr, i; assert(aplic_phandle != 0); fdt = fdtroot; fdt_begin_node(fdt, "pcie@1f0000000"); fdt_property_string(fdt, "compatible", "pci-host-ecam-generic"); fdt_property_u32(fdt, "#address-cells", 3); fdt_property_u32(fdt, "#size-cells", 2); fdt_property_string(fdt, "device_type", "pci"); fdt_property_u64(fdt, "bus-range", (0ul << 32) | 1); set_single_reg(fdt, 0xe0000000, 0x10000000); fdt_property_placeholder(fdt, "ranges", 2 * 7 * sizeof(uint32_t), &prop); SET_PROP_U32(prop, 0, 0x01000000); SET_PROP_U32(prop, 1, 0); SET_PROP_U32(prop, 2, 0xdf000000); SET_PROP_U32(prop, 3, 0); SET_PROP_U32(prop, 4, 0xdf000000); SET_PROP_U32(prop, 5, 0); SET_PROP_U32(prop, 6, 0x01000000); SET_PROP_U32(prop, 7, 0x02000000); SET_PROP_U32(prop, 8, 0); SET_PROP_U32(prop, 9, 0xa0000000); SET_PROP_U32(prop, 10, 0); SET_PROP_U32(prop, 11, 0xa0000000); SET_PROP_U32(prop, 12, 0); SET_PROP_U32(prop, 13, 0x3f000000); #if notyet fdt_property_placeholder(fdt, "msi-map", 4 * sizeof(uint32_t), &prop); SET_PROP_U32(prop, 0, 0); /* RID base */ SET_PROP_U32(prop, 1, aplic_phandle); /* MSI parent */ SET_PROP_U32(prop, 2, 0); /* MSI base */ SET_PROP_U32(prop, 3, 0x10000); /* RID length */ fdt_property_u32(fdt, "msi-parent", aplic_phandle); #endif fdt_property_u32(fdt, "#interrupt-cells", 1); fdt_property_u32(fdt, "interrupt-parent", aplic_phandle); /* * Describe standard swizzled interrupts routing (pins rotated by one * for each consecutive slot). Must match pci_irq_route(). */ fdt_property_placeholder(fdt, "interrupt-map-mask", 4 * sizeof(uint32_t), &prop); SET_PROP_U32(prop, 0, 3 << 11); SET_PROP_U32(prop, 1, 0); SET_PROP_U32(prop, 2, 0); SET_PROP_U32(prop, 3, 7); fdt_property_placeholder(fdt, "interrupt-map", 16 * 9 * sizeof(uint32_t), &prop); for (i = 0; i < 16; ++i) { pin = i % 4; slot = i / 4; intr = intrs[(pin + slot) % 4]; SET_PROP_U32(prop, 10 * i + 0, slot << 11); SET_PROP_U32(prop, 10 * i + 1, 0); SET_PROP_U32(prop, 10 * i + 2, 0); SET_PROP_U32(prop, 10 * i + 3, pin + 1); SET_PROP_U32(prop, 10 * i + 4, aplic_phandle); SET_PROP_U32(prop, 10 * i + 5, 0); SET_PROP_U32(prop, 10 * i + 6, 0); SET_PROP_U32(prop, 10 * i + 7, intr); SET_PROP_U32(prop, 10 * i + 8, IRQ_TYPE_LEVEL_HIGH); } fdt_end_node(fdt); } void fdt_finalize(void) { fdt_end_node(fdtroot); fdt_finish(fdtroot); } diff --git a/usr.sbin/bhyve/riscv/fdt.h b/usr.sbin/bhyve/riscv/fdt.h index 9bebe6ffa29d..60140a82a211 100644 --- a/usr.sbin/bhyve/riscv/fdt.h +++ b/usr.sbin/bhyve/riscv/fdt.h @@ -1,45 +1,45 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2022 The FreeBSD Foundation * * This software was developed by Andrew Turner under sponsorship from * the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #ifndef _FDT_H_ #define _FDT_H_ #include struct vmctx; int fdt_init(struct vmctx *ctx, int ncpu, vm_paddr_t addrp, - vm_size_t size); + vm_size_t size, const char *isa); void fdt_add_aplic(uint64_t dist_base, uint64_t dist_size); void fdt_add_pcie(int intrs[static 4]); void fdt_add_uart(uint64_t uart_base, uint64_t uart_size, int intr); void fdt_finalize(void); #endif /* _FDT_H_ */