diff --git a/sys/riscv/vmm/riscv.h b/sys/riscv/vmm/riscv.h index 793c61534cee..870d0d6c5cd1 100644 --- a/sys/riscv/vmm/riscv.h +++ b/sys/riscv/vmm/riscv.h @@ -1,159 +1,159 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2015 Mihai Carabas * Copyright (c) 2024 Ruslan Bukin * * This software was developed by the University of Cambridge Computer * Laboratory (Department of Computer Science and Technology) under Innovate * UK project 105694, "Digital Security by Design (DSbD) Technology Platform * Prototype". * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #ifndef _VMM_RISCV_H_ #define _VMM_RISCV_H_ #include #include #include #include struct hypregs { uint64_t hyp_ra; uint64_t hyp_sp; uint64_t hyp_gp; uint64_t hyp_tp; uint64_t hyp_t[7]; uint64_t hyp_s[12]; uint64_t hyp_a[8]; uint64_t hyp_sepc; uint64_t hyp_sstatus; uint64_t hyp_hstatus; }; struct hypcsr { uint64_t hvip; uint64_t vsstatus; uint64_t vsie; uint64_t vstvec; uint64_t vsscratch; uint64_t vsepc; uint64_t vscause; uint64_t vstval; uint64_t vsatp; uint64_t scounteren; uint64_t senvcfg; }; enum vmm_fence_type { VMM_RISCV_FENCE_INVALID = 0, VMM_RISCV_FENCE_I, VMM_RISCV_FENCE_VMA, VMM_RISCV_FENCE_VMA_ASID, }; struct vmm_fence { enum vmm_fence_type type; size_t start; size_t size; uint64_t asid; }; struct hypctx { struct hypregs host_regs; struct hypregs guest_regs; struct hypcsr guest_csrs; uint64_t host_sscratch; uint64_t host_stvec; uint64_t host_scounteren; uint64_t guest_scounteren; struct hyp *hyp; struct vcpu *vcpu; bool has_exception; int cpu_id; int ipi_pending; int interrupts_pending; struct vtimer vtimer; struct vmm_fence *fence_queue; struct mtx fence_queue_mtx; int fence_queue_head; int fence_queue_tail; #define FENCE_REQ_I (1 << 0) #define FENCE_REQ_VMA (1 << 1) int fence_req; }; struct hyp { struct vm *vm; uint64_t vmid_generation; bool aplic_attached; struct aplic *aplic; struct hypctx *ctx[]; }; struct hyptrap { uint64_t sepc; uint64_t scause; uint64_t stval; uint64_t htval; uint64_t htinst; }; #define DEFINE_VMMOPS_IFUNC(ret_type, opname, args) \ ret_type vmmops_##opname args; DEFINE_VMMOPS_IFUNC(int, modinit, (void)) DEFINE_VMMOPS_IFUNC(int, modcleanup, (void)) DEFINE_VMMOPS_IFUNC(void *, init, (struct vm *vm, struct pmap *pmap)) DEFINE_VMMOPS_IFUNC(int, gla2gpa, (void *vcpui, struct vm_guest_paging *paging, uint64_t gla, int prot, uint64_t *gpa, int *is_fault)) DEFINE_VMMOPS_IFUNC(int, run, (void *vcpui, register_t pc, struct pmap *pmap, struct vm_eventinfo *info)) DEFINE_VMMOPS_IFUNC(void, cleanup, (void *vmi)) DEFINE_VMMOPS_IFUNC(void *, vcpu_init, (void *vmi, struct vcpu *vcpu, int vcpu_id)) DEFINE_VMMOPS_IFUNC(void, vcpu_cleanup, (void *vcpui)) DEFINE_VMMOPS_IFUNC(int, exception, (void *vcpui, uint64_t scause)) DEFINE_VMMOPS_IFUNC(int, getreg, (void *vcpui, int num, uint64_t *retval)) DEFINE_VMMOPS_IFUNC(int, setreg, (void *vcpui, int num, uint64_t val)) DEFINE_VMMOPS_IFUNC(int, getcap, (void *vcpui, int num, int *retval)) DEFINE_VMMOPS_IFUNC(int, setcap, (void *vcpui, int num, int val)) DEFINE_VMMOPS_IFUNC(struct vmspace *, vmspace_alloc, (vm_offset_t min, vm_offset_t max)) DEFINE_VMMOPS_IFUNC(void, vmspace_free, (struct vmspace *vmspace)) #define dprintf(fmt, ...) struct hypctx *riscv_get_active_vcpu(void); void vmm_switch(struct hypctx *); void vmm_unpriv_trap(struct hyptrap *, uint64_t tmp); -int vmm_sbi_ecall(struct vcpu *, bool *); +bool vmm_sbi_ecall(struct vcpu *); -void riscv_send_ipi(struct hypctx *hypctx, int hart_id); +void riscv_send_ipi(struct hyp *hyp, cpuset_t *cpus); int riscv_check_ipi(struct hypctx *hypctx, bool clear); bool riscv_check_interrupts_pending(struct hypctx *hypctx); #endif /* !_VMM_RISCV_H_ */ diff --git a/sys/riscv/vmm/vmm_riscv.c b/sys/riscv/vmm/vmm_riscv.c index 78250ae7c440..ca2ef50dbd24 100644 --- a/sys/riscv/vmm/vmm_riscv.c +++ b/sys/riscv/vmm/vmm_riscv.c @@ -1,948 +1,949 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * - * Copyright (c) 2024 Ruslan Bukin + * Copyright (c) 2024-2025 Ruslan Bukin * * This software was developed by the University of Cambridge Computer * Laboratory (Department of Computer Science and Technology) under Innovate * UK project 105694, "Digital Security by Design (DSbD) Technology Platform * Prototype". * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "riscv.h" #include "vmm_aplic.h" #include "vmm_fence.h" #include "vmm_stat.h" MALLOC_DEFINE(M_HYP, "RISC-V VMM HYP", "RISC-V VMM HYP"); DPCPU_DEFINE_STATIC(struct hypctx *, vcpu); static int m_op(uint32_t insn, int match, int mask) { if (((insn ^ match) & mask) == 0) return (1); return (0); } static inline void riscv_set_active_vcpu(struct hypctx *hypctx) { DPCPU_SET(vcpu, hypctx); } struct hypctx * riscv_get_active_vcpu(void) { return (DPCPU_GET(vcpu)); } int vmmops_modinit(void) { if (!has_hyp) { printf("vmm: riscv hart doesn't support H-extension.\n"); return (ENXIO); } return (0); } int vmmops_modcleanup(void) { return (0); } void * vmmops_init(struct vm *vm, pmap_t pmap) { struct hyp *hyp; vm_size_t size; size = round_page(sizeof(struct hyp) + sizeof(struct hypctx *) * vm_get_maxcpus(vm)); hyp = malloc_aligned(size, PAGE_SIZE, M_HYP, M_WAITOK | M_ZERO); hyp->vm = vm; hyp->aplic_attached = false; aplic_vminit(hyp); return (hyp); } static void vmmops_delegate(void) { uint64_t hedeleg; uint64_t hideleg; hedeleg = (1UL << SCAUSE_INST_MISALIGNED); hedeleg |= (1UL << SCAUSE_ILLEGAL_INSTRUCTION); hedeleg |= (1UL << SCAUSE_BREAKPOINT); hedeleg |= (1UL << SCAUSE_ECALL_USER); hedeleg |= (1UL << SCAUSE_INST_PAGE_FAULT); hedeleg |= (1UL << SCAUSE_LOAD_PAGE_FAULT); hedeleg |= (1UL << SCAUSE_STORE_PAGE_FAULT); csr_write(hedeleg, hedeleg); hideleg = (1UL << IRQ_SOFTWARE_HYPERVISOR); hideleg |= (1UL << IRQ_TIMER_HYPERVISOR); hideleg |= (1UL << IRQ_EXTERNAL_HYPERVISOR); csr_write(hideleg, hideleg); } static void vmmops_vcpu_restore_csrs(struct hypctx *hypctx) { struct hypcsr *csrs; csrs = &hypctx->guest_csrs; csr_write(vsstatus, csrs->vsstatus); csr_write(vsie, csrs->vsie); csr_write(vstvec, csrs->vstvec); csr_write(vsscratch, csrs->vsscratch); csr_write(vsepc, csrs->vsepc); csr_write(vscause, csrs->vscause); csr_write(vstval, csrs->vstval); csr_write(hvip, csrs->hvip); csr_write(vsatp, csrs->vsatp); } static void vmmops_vcpu_save_csrs(struct hypctx *hypctx) { struct hypcsr *csrs; csrs = &hypctx->guest_csrs; csrs->vsstatus = csr_read(vsstatus); csrs->vsie = csr_read(vsie); csrs->vstvec = csr_read(vstvec); csrs->vsscratch = csr_read(vsscratch); csrs->vsepc = csr_read(vsepc); csrs->vscause = csr_read(vscause); csrs->vstval = csr_read(vstval); csrs->hvip = csr_read(hvip); csrs->vsatp = csr_read(vsatp); } void * vmmops_vcpu_init(void *vmi, struct vcpu *vcpu1, int vcpuid) { struct hypctx *hypctx; struct hyp *hyp; vm_size_t size; hyp = vmi; dprintf("%s: hyp %p\n", __func__, hyp); KASSERT(vcpuid >= 0 && vcpuid < vm_get_maxcpus(hyp->vm), ("%s: Invalid vcpuid %d", __func__, vcpuid)); size = round_page(sizeof(struct hypctx)); hypctx = malloc_aligned(size, PAGE_SIZE, M_HYP, M_WAITOK | M_ZERO); hypctx->hyp = hyp; hypctx->vcpu = vcpu1; hypctx->guest_scounteren = HCOUNTEREN_CY | HCOUNTEREN_TM; /* Fence queue. */ hypctx->fence_queue = mallocarray(VMM_FENCE_QUEUE_SIZE, sizeof(struct vmm_fence), M_HYP, M_WAITOK | M_ZERO); mtx_init(&hypctx->fence_queue_mtx, "fence queue", NULL, MTX_SPIN); /* sstatus */ hypctx->guest_regs.hyp_sstatus = SSTATUS_SPP | SSTATUS_SPIE; hypctx->guest_regs.hyp_sstatus |= SSTATUS_FS_INITIAL; /* hstatus */ hypctx->guest_regs.hyp_hstatus = HSTATUS_SPV | HSTATUS_VTW; hypctx->guest_regs.hyp_hstatus |= HSTATUS_SPVP; hypctx->cpu_id = vcpuid; hyp->ctx[vcpuid] = hypctx; aplic_cpuinit(hypctx); vtimer_cpuinit(hypctx); return (hypctx); } static int riscv_vmm_pinit(pmap_t pmap) { dprintf("%s: pmap %p\n", __func__, pmap); pmap_pinit_stage(pmap, PM_STAGE2); return (1); } struct vmspace * vmmops_vmspace_alloc(vm_offset_t min, vm_offset_t max) { return (vmspace_alloc(min, max, riscv_vmm_pinit)); } void vmmops_vmspace_free(struct vmspace *vmspace) { pmap_remove_pages(vmspace_pmap(vmspace)); vmspace_free(vmspace); } static void riscv_unpriv_read(struct hypctx *hypctx, uintptr_t guest_addr, uint64_t *data, struct hyptrap *trap) { register struct hyptrap * htrap asm("a0"); uintptr_t old_hstatus; uintptr_t old_stvec; uintptr_t entry; uint64_t val; uint64_t tmp; int intr; entry = (uintptr_t)&vmm_unpriv_trap; htrap = trap; intr = intr_disable(); old_hstatus = csr_swap(hstatus, hypctx->guest_regs.hyp_hstatus); /* * Setup a temporary exception vector, so that if hlvx.hu raises * an exception we catch it in the vmm_unpriv_trap(). */ old_stvec = csr_swap(stvec, entry); /* * Read first two bytes of instruction assuming it could be a * compressed one. */ __asm __volatile(".option push\n" ".option norvc\n" "hlvx.hu %[val], (%[addr])\n" ".option pop\n" : [val] "=r" (val) : [addr] "r" (guest_addr), "r" (htrap) : "a1", "memory"); /* * Check if previous hlvx.hu did not raise an exception, and then * read the rest of instruction if it is a full-length one. */ if (trap->scause == -1 && (val & 0x3) == 0x3) { guest_addr += 2; __asm __volatile(".option push\n" ".option norvc\n" "hlvx.hu %[tmp], (%[addr])\n" ".option pop\n" : [tmp] "=r" (tmp) : [addr] "r" (guest_addr), "r" (htrap) : "a1", "memory"); val |= (tmp << 16); } csr_write(hstatus, old_hstatus); csr_write(stvec, old_stvec); intr_restore(intr); *data = val; } static int riscv_gen_inst_emul_data(struct hypctx *hypctx, struct vm_exit *vme_ret, struct hyptrap *trap) { uintptr_t guest_addr; struct vie *vie; uint64_t insn; int reg_num; int rs2, rd; int direction; int sign_extend; int access_size; guest_addr = vme_ret->sepc; KASSERT(vme_ret->scause == SCAUSE_FETCH_GUEST_PAGE_FAULT || vme_ret->scause == SCAUSE_LOAD_GUEST_PAGE_FAULT || vme_ret->scause == SCAUSE_STORE_GUEST_PAGE_FAULT, ("Invalid scause")); direction = vme_ret->scause == SCAUSE_STORE_GUEST_PAGE_FAULT ? VM_DIR_WRITE : VM_DIR_READ; sign_extend = 1; bzero(trap, sizeof(struct hyptrap)); trap->scause = -1; riscv_unpriv_read(hypctx, guest_addr, &insn, trap); if (trap->scause != -1) return (-1); if ((insn & 0x3) == 0x3) { rs2 = (insn & RS2_MASK) >> RS2_SHIFT; rd = (insn & RD_MASK) >> RD_SHIFT; if (direction == VM_DIR_WRITE) { if (m_op(insn, MATCH_SB, MASK_SB)) access_size = 1; else if (m_op(insn, MATCH_SH, MASK_SH)) access_size = 2; else if (m_op(insn, MATCH_SW, MASK_SW)) access_size = 4; else if (m_op(insn, MATCH_SD, MASK_SD)) access_size = 8; else { printf("unknown store instr at %lx", guest_addr); return (-2); } reg_num = rs2; } else { if (m_op(insn, MATCH_LB, MASK_LB)) access_size = 1; else if (m_op(insn, MATCH_LH, MASK_LH)) access_size = 2; else if (m_op(insn, MATCH_LW, MASK_LW)) access_size = 4; else if (m_op(insn, MATCH_LD, MASK_LD)) access_size = 8; else if (m_op(insn, MATCH_LBU, MASK_LBU)) { access_size = 1; sign_extend = 0; } else if (m_op(insn, MATCH_LHU, MASK_LHU)) { access_size = 2; sign_extend = 0; } else if (m_op(insn, MATCH_LWU, MASK_LWU)) { access_size = 4; sign_extend = 0; } else { printf("unknown load instr at %lx", guest_addr); return (-3); } reg_num = rd; } vme_ret->inst_length = 4; } else { rs2 = (insn >> 7) & 0x7; rs2 += 0x8; rd = (insn >> 2) & 0x7; rd += 0x8; if (direction == VM_DIR_WRITE) { if (m_op(insn, MATCH_C_SW, MASK_C_SW)) access_size = 4; else if (m_op(insn, MATCH_C_SD, MASK_C_SD)) access_size = 8; else { printf("unknown compressed store instr at %lx", guest_addr); return (-4); } } else { if (m_op(insn, MATCH_C_LW, MASK_C_LW)) access_size = 4; else if (m_op(insn, MATCH_C_LD, MASK_C_LD)) access_size = 8; else { printf("unknown load instr at %lx", guest_addr); return (-5); } } reg_num = rd; vme_ret->inst_length = 2; } vme_ret->u.inst_emul.gpa = (vme_ret->htval << 2) | (vme_ret->stval & 0x3); dprintf("guest_addr %lx insn %lx, reg %d, gpa %lx\n", guest_addr, insn, reg_num, vme_ret->u.inst_emul.gpa); vie = &vme_ret->u.inst_emul.vie; vie->dir = direction; vie->reg = reg_num; vie->sign_extend = sign_extend; vie->access_size = access_size; return (0); } static bool riscv_handle_world_switch(struct hypctx *hypctx, struct vm_exit *vme, pmap_t pmap) { struct hyptrap trap; uint64_t insn; uint64_t gpa; bool handled; - bool retu; int ret; int i; handled = false; if (vme->scause & SCAUSE_INTR) { /* * Host interrupt? Leave critical section to handle. */ vmm_stat_incr(hypctx->vcpu, VMEXIT_IRQ, 1); vme->exitcode = VM_EXITCODE_BOGUS; vme->inst_length = 0; return (handled); } switch (vme->scause) { case SCAUSE_FETCH_GUEST_PAGE_FAULT: case SCAUSE_LOAD_GUEST_PAGE_FAULT: case SCAUSE_STORE_GUEST_PAGE_FAULT: gpa = (vme->htval << 2) | (vme->stval & 0x3); if (vm_mem_allocated(hypctx->vcpu, gpa)) { vme->exitcode = VM_EXITCODE_PAGING; vme->inst_length = 0; vme->u.paging.gpa = gpa; } else { ret = riscv_gen_inst_emul_data(hypctx, vme, &trap); if (ret != 0) { vme->exitcode = VM_EXITCODE_HYP; vme->u.hyp.scause = trap.scause; break; } vme->exitcode = VM_EXITCODE_INST_EMUL; } break; case SCAUSE_ILLEGAL_INSTRUCTION: /* * TODO: handle illegal instruction properly. */ printf("%s: Illegal instruction at %lx stval 0x%lx htval " "0x%lx\n", __func__, vme->sepc, vme->stval, vme->htval); vmm_stat_incr(hypctx->vcpu, VMEXIT_UNHANDLED, 1); vme->exitcode = VM_EXITCODE_BOGUS; handled = false; break; case SCAUSE_VIRTUAL_SUPERVISOR_ECALL: - retu = false; - vmm_sbi_ecall(hypctx->vcpu, &retu); - if (retu == false) { - handled = true; + handled = vmm_sbi_ecall(hypctx->vcpu); + if (handled == true) break; - } for (i = 0; i < nitems(vme->u.ecall.args); i++) vme->u.ecall.args[i] = hypctx->guest_regs.hyp_a[i]; vme->exitcode = VM_EXITCODE_ECALL; - handled = false; break; case SCAUSE_VIRTUAL_INSTRUCTION: insn = vme->stval; if (m_op(insn, MATCH_WFI, MASK_WFI)) vme->exitcode = VM_EXITCODE_WFI; else vme->exitcode = VM_EXITCODE_BOGUS; handled = false; break; default: printf("unknown scause %lx\n", vme->scause); vmm_stat_incr(hypctx->vcpu, VMEXIT_UNHANDLED, 1); vme->exitcode = VM_EXITCODE_BOGUS; handled = false; break; } return (handled); } int vmmops_gla2gpa(void *vcpui, struct vm_guest_paging *paging, uint64_t gla, int prot, uint64_t *gpa, int *is_fault) { /* Implement me. */ return (ENOSYS); } void -riscv_send_ipi(struct hypctx *hypctx, int hart_id) +riscv_send_ipi(struct hyp *hyp, cpuset_t *cpus) { - struct hyp *hyp; + struct hypctx *hypctx; struct vm *vm; + uint16_t maxcpus; + int i; - hyp = hypctx->hyp; vm = hyp->vm; - atomic_set_32(&hypctx->ipi_pending, 1); - - vcpu_notify_event(vm_vcpu(vm, hart_id)); + maxcpus = vm_get_maxcpus(hyp->vm); + for (i = 0; i < maxcpus; i++) { + if (!CPU_ISSET(i, cpus)) + continue; + hypctx = hyp->ctx[i]; + atomic_set_32(&hypctx->ipi_pending, 1); + vcpu_notify_event(vm_vcpu(vm, i)); + } } int riscv_check_ipi(struct hypctx *hypctx, bool clear) { int val; if (clear) val = atomic_swap_32(&hypctx->ipi_pending, 0); else val = hypctx->ipi_pending; return (val); } bool riscv_check_interrupts_pending(struct hypctx *hypctx) { if (hypctx->interrupts_pending) return (true); return (false); } static void riscv_sync_interrupts(struct hypctx *hypctx) { int pending; pending = aplic_check_pending(hypctx); if (pending) hypctx->guest_csrs.hvip |= HVIP_VSEIP; else hypctx->guest_csrs.hvip &= ~HVIP_VSEIP; /* Guest clears VSSIP bit manually. */ if (riscv_check_ipi(hypctx, true)) hypctx->guest_csrs.hvip |= HVIP_VSSIP; if (riscv_check_interrupts_pending(hypctx)) hypctx->guest_csrs.hvip |= HVIP_VSTIP; else hypctx->guest_csrs.hvip &= ~HVIP_VSTIP; csr_write(hvip, hypctx->guest_csrs.hvip); } int vmmops_run(void *vcpui, register_t pc, pmap_t pmap, struct vm_eventinfo *evinfo) { struct hypctx *hypctx; struct vm_exit *vme; struct vcpu *vcpu; register_t val; uint64_t hvip; bool handled; hypctx = (struct hypctx *)vcpui; vcpu = hypctx->vcpu; vme = vm_exitinfo(vcpu); hypctx->guest_regs.hyp_sepc = (uint64_t)pc; vmmops_delegate(); /* * From The RISC-V Instruction Set Manual * Volume II: RISC-V Privileged Architectures * * If the new virtual machine's guest physical page tables * have been modified, it may be necessary to execute an HFENCE.GVMA * instruction (see Section 5.3.2) before or after writing hgatp. */ __asm __volatile("hfence.gvma" ::: "memory"); csr_write(hgatp, pmap->pm_satp); if (has_sstc) csr_write(henvcfg, HENVCFG_STCE); csr_write(hie, HIE_VSEIE | HIE_VSSIE | HIE_SGEIE); /* TODO: should we trap rdcycle / rdtime? */ csr_write(hcounteren, HCOUNTEREN_CY | HCOUNTEREN_TM); vmmops_vcpu_restore_csrs(hypctx); for (;;) { dprintf("%s: pc %lx\n", __func__, pc); if (hypctx->has_exception) { hypctx->has_exception = false; /* * TODO: implement exception injection. */ } val = intr_disable(); /* Check if the vcpu is suspended */ if (vcpu_suspended(evinfo)) { intr_restore(val); vm_exit_suspended(vcpu, pc); break; } if (vcpu_debugged(vcpu)) { intr_restore(val); vm_exit_debug(vcpu, pc); break; } /* * TODO: What happens if a timer interrupt is asserted exactly * here, but for the previous VM? */ riscv_set_active_vcpu(hypctx); aplic_flush_hwstate(hypctx); riscv_sync_interrupts(hypctx); vmm_fence_process(hypctx); dprintf("%s: Entering guest VM, vsatp %lx, ss %lx hs %lx\n", __func__, csr_read(vsatp), hypctx->guest_regs.hyp_sstatus, hypctx->guest_regs.hyp_hstatus); vmm_switch(hypctx); dprintf("%s: Leaving guest VM, hstatus %lx\n", __func__, hypctx->guest_regs.hyp_hstatus); /* Guest can clear VSSIP. It can't clear VSTIP or VSEIP. */ hvip = csr_read(hvip); if ((hypctx->guest_csrs.hvip ^ hvip) & HVIP_VSSIP) { if (hvip & HVIP_VSSIP) { /* TODO: VSSIP was set by guest. */ } else { /* VSSIP was cleared by guest. */ hypctx->guest_csrs.hvip &= ~HVIP_VSSIP; } } aplic_sync_hwstate(hypctx); /* * TODO: deactivate stage 2 pmap here if needed. */ vme->scause = csr_read(scause); vme->sepc = csr_read(sepc); vme->stval = csr_read(stval); vme->htval = csr_read(htval); vme->htinst = csr_read(htinst); intr_restore(val); vmm_stat_incr(vcpu, VMEXIT_COUNT, 1); vme->pc = hypctx->guest_regs.hyp_sepc; vme->inst_length = INSN_SIZE; handled = riscv_handle_world_switch(hypctx, vme, pmap); if (handled == false) /* Exit loop to emulate instruction. */ break; else { /* Resume guest execution from the next instruction. */ hypctx->guest_regs.hyp_sepc += vme->inst_length; } } vmmops_vcpu_save_csrs(hypctx); return (0); } static void riscv_pcpu_vmcleanup(void *arg) { struct hyp *hyp; int i, maxcpus; hyp = arg; maxcpus = vm_get_maxcpus(hyp->vm); for (i = 0; i < maxcpus; i++) { if (riscv_get_active_vcpu() == hyp->ctx[i]) { riscv_set_active_vcpu(NULL); break; } } } void vmmops_vcpu_cleanup(void *vcpui) { struct hypctx *hypctx; hypctx = vcpui; dprintf("%s\n", __func__); aplic_cpucleanup(hypctx); mtx_destroy(&hypctx->fence_queue_mtx); free(hypctx->fence_queue, M_HYP); free(hypctx, M_HYP); } void vmmops_cleanup(void *vmi) { struct hyp *hyp; hyp = vmi; dprintf("%s\n", __func__); aplic_vmcleanup(hyp); smp_rendezvous(NULL, riscv_pcpu_vmcleanup, NULL, hyp); free(hyp, M_HYP); } /* * Return register value. Registers have different sizes and an explicit cast * must be made to ensure proper conversion. */ static uint64_t * hypctx_regptr(struct hypctx *hypctx, int reg) { switch (reg) { case VM_REG_GUEST_RA: return (&hypctx->guest_regs.hyp_ra); case VM_REG_GUEST_SP: return (&hypctx->guest_regs.hyp_sp); case VM_REG_GUEST_GP: return (&hypctx->guest_regs.hyp_gp); case VM_REG_GUEST_TP: return (&hypctx->guest_regs.hyp_tp); case VM_REG_GUEST_T0: return (&hypctx->guest_regs.hyp_t[0]); case VM_REG_GUEST_T1: return (&hypctx->guest_regs.hyp_t[1]); case VM_REG_GUEST_T2: return (&hypctx->guest_regs.hyp_t[2]); case VM_REG_GUEST_S0: return (&hypctx->guest_regs.hyp_s[0]); case VM_REG_GUEST_S1: return (&hypctx->guest_regs.hyp_s[1]); case VM_REG_GUEST_A0: return (&hypctx->guest_regs.hyp_a[0]); case VM_REG_GUEST_A1: return (&hypctx->guest_regs.hyp_a[1]); case VM_REG_GUEST_A2: return (&hypctx->guest_regs.hyp_a[2]); case VM_REG_GUEST_A3: return (&hypctx->guest_regs.hyp_a[3]); case VM_REG_GUEST_A4: return (&hypctx->guest_regs.hyp_a[4]); case VM_REG_GUEST_A5: return (&hypctx->guest_regs.hyp_a[5]); case VM_REG_GUEST_A6: return (&hypctx->guest_regs.hyp_a[6]); case VM_REG_GUEST_A7: return (&hypctx->guest_regs.hyp_a[7]); case VM_REG_GUEST_S2: return (&hypctx->guest_regs.hyp_s[2]); case VM_REG_GUEST_S3: return (&hypctx->guest_regs.hyp_s[3]); case VM_REG_GUEST_S4: return (&hypctx->guest_regs.hyp_s[4]); case VM_REG_GUEST_S5: return (&hypctx->guest_regs.hyp_s[5]); case VM_REG_GUEST_S6: return (&hypctx->guest_regs.hyp_s[6]); case VM_REG_GUEST_S7: return (&hypctx->guest_regs.hyp_s[7]); case VM_REG_GUEST_S8: return (&hypctx->guest_regs.hyp_s[8]); case VM_REG_GUEST_S9: return (&hypctx->guest_regs.hyp_s[9]); case VM_REG_GUEST_S10: return (&hypctx->guest_regs.hyp_s[10]); case VM_REG_GUEST_S11: return (&hypctx->guest_regs.hyp_s[11]); case VM_REG_GUEST_T3: return (&hypctx->guest_regs.hyp_t[3]); case VM_REG_GUEST_T4: return (&hypctx->guest_regs.hyp_t[4]); case VM_REG_GUEST_T5: return (&hypctx->guest_regs.hyp_t[5]); case VM_REG_GUEST_T6: return (&hypctx->guest_regs.hyp_t[6]); case VM_REG_GUEST_SEPC: return (&hypctx->guest_regs.hyp_sepc); default: break; } return (NULL); } int vmmops_getreg(void *vcpui, int reg, uint64_t *retval) { uint64_t *regp; int running, hostcpu; struct hypctx *hypctx; hypctx = vcpui; running = vcpu_is_running(hypctx->vcpu, &hostcpu); if (running && hostcpu != curcpu) panic("%s: %s%d is running", __func__, vm_name(hypctx->hyp->vm), vcpu_vcpuid(hypctx->vcpu)); if (reg == VM_REG_GUEST_ZERO) { *retval = 0; return (0); } regp = hypctx_regptr(hypctx, reg); if (regp == NULL) return (EINVAL); *retval = *regp; return (0); } int vmmops_setreg(void *vcpui, int reg, uint64_t val) { struct hypctx *hypctx; int running, hostcpu; uint64_t *regp; hypctx = vcpui; running = vcpu_is_running(hypctx->vcpu, &hostcpu); if (running && hostcpu != curcpu) panic("%s: %s%d is running", __func__, vm_name(hypctx->hyp->vm), vcpu_vcpuid(hypctx->vcpu)); regp = hypctx_regptr(hypctx, reg); if (regp == NULL) return (EINVAL); *regp = val; return (0); } int vmmops_exception(void *vcpui, uint64_t scause) { struct hypctx *hypctx; int running, hostcpu; hypctx = vcpui; running = vcpu_is_running(hypctx->vcpu, &hostcpu); if (running && hostcpu != curcpu) panic("%s: %s%d is running", __func__, vm_name(hypctx->hyp->vm), vcpu_vcpuid(hypctx->vcpu)); /* TODO: implement me. */ return (ENOSYS); } int vmmops_getcap(void *vcpui, int num, int *retval) { int ret; ret = ENOENT; switch (num) { case VM_CAP_SSTC: *retval = has_sstc; ret = 0; break; case VM_CAP_UNRESTRICTED_GUEST: *retval = 1; ret = 0; break; default: break; } return (ret); } int vmmops_setcap(void *vcpui, int num, int val) { return (ENOENT); } diff --git a/sys/riscv/vmm/vmm_sbi.c b/sys/riscv/vmm/vmm_sbi.c index 3ba90e349b3c..426276444357 100644 --- a/sys/riscv/vmm/vmm_sbi.c +++ b/sys/riscv/vmm/vmm_sbi.c @@ -1,237 +1,218 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * - * Copyright (c) 2024 Ruslan Bukin + * Copyright (c) 2024-2025 Ruslan Bukin * * This software was developed by the University of Cambridge Computer * Laboratory (Department of Computer Science and Technology) under Innovate * UK project 105694, "Digital Security by Design (DSbD) Technology Platform * Prototype". * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include #include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include #include -#include -#include -#include -#include - -#include -#include -#include -#include -#include #include #include "riscv.h" #include "vmm_fence.h" static int vmm_sbi_handle_rfnc(struct vcpu *vcpu, struct hypctx *hypctx) { struct vmm_fence fence; + cpuset_t active_cpus; uint64_t hart_mask; uint64_t hart_mask_base; uint64_t func_id; struct hyp *hyp; uint16_t maxcpus; cpuset_t cpus; - int vcpu_id; int i; func_id = hypctx->guest_regs.hyp_a[6]; hart_mask = hypctx->guest_regs.hyp_a[0]; hart_mask_base = hypctx->guest_regs.hyp_a[1]; /* Construct vma_fence. */ fence.start = hypctx->guest_regs.hyp_a[2]; fence.size = hypctx->guest_regs.hyp_a[3]; fence.asid = hypctx->guest_regs.hyp_a[4]; switch (func_id) { case SBI_RFNC_REMOTE_FENCE_I: fence.type = VMM_RISCV_FENCE_I; break; case SBI_RFNC_REMOTE_SFENCE_VMA: fence.type = VMM_RISCV_FENCE_VMA; break; case SBI_RFNC_REMOTE_SFENCE_VMA_ASID: fence.type = VMM_RISCV_FENCE_VMA_ASID; break; default: - return (-1); + return (SBI_ERR_NOT_SUPPORTED); } /* Construct cpuset_t from the mask supplied. */ - CPU_ZERO(&cpus); hyp = hypctx->hyp; + active_cpus = vm_active_cpus(hyp->vm); maxcpus = vm_get_maxcpus(hyp->vm); for (i = 0; i < maxcpus; i++) { vcpu = vm_vcpu(hyp->vm, i); if (vcpu == NULL) continue; - vcpu_id = vcpu_vcpuid(vcpu); if (hart_mask_base != -1UL) { - if (vcpu_id < hart_mask_base) + if (i < hart_mask_base) continue; - if (!(hart_mask & (1UL << (vcpu_id - hart_mask_base)))) + if (!(hart_mask & (1UL << (i - hart_mask_base)))) continue; } + /* + * If either hart_mask_base or at least one hartid from + * hart_mask is not valid, then return error. + */ + if (!CPU_ISSET(i, &active_cpus)) + return (SBI_ERR_INVALID_PARAM); CPU_SET(i, &cpus); } + if (CPU_EMPTY(&cpus)) + return (SBI_ERR_INVALID_PARAM); + vmm_fence_add(hyp->vm, &cpus, &fence); - return (0); + return (SBI_SUCCESS); } static int vmm_sbi_handle_time(struct vcpu *vcpu, struct hypctx *hypctx) { uint64_t func_id; uint64_t next_val; - int ret; func_id = hypctx->guest_regs.hyp_a[6]; next_val = hypctx->guest_regs.hyp_a[0]; switch (func_id) { case SBI_TIME_SET_TIMER: vtimer_set_timer(hypctx, next_val); - ret = 0; break; default: - ret = -1; - break; + return (SBI_ERR_NOT_SUPPORTED); } - hypctx->guest_regs.hyp_a[0] = ret; - - return (0); + return (SBI_SUCCESS); } static int vmm_sbi_handle_ipi(struct vcpu *vcpu, struct hypctx *hypctx) { - struct hypctx *target_hypctx; - struct vcpu *target_vcpu __unused; cpuset_t active_cpus; struct hyp *hyp; uint64_t hart_mask; uint64_t hart_mask_base; uint64_t func_id; + cpuset_t cpus; int hart_id; int bit; - int ret; func_id = hypctx->guest_regs.hyp_a[6]; hart_mask = hypctx->guest_regs.hyp_a[0]; hart_mask_base = hypctx->guest_regs.hyp_a[1]; dprintf("%s: hart_mask %lx\n", __func__, hart_mask); hyp = hypctx->hyp; active_cpus = vm_active_cpus(hyp->vm); + CPU_ZERO(&cpus); switch (func_id) { case SBI_IPI_SEND_IPI: while ((bit = ffs(hart_mask))) { hart_id = (bit - 1); hart_mask &= ~(1u << hart_id); if (hart_mask_base != -1) hart_id += hart_mask_base; - if (CPU_ISSET(hart_id, &active_cpus)) { - /* TODO. */ - target_vcpu = vm_vcpu(hyp->vm, hart_id); - target_hypctx = hypctx->hyp->ctx[hart_id]; - riscv_send_ipi(target_hypctx, hart_id); - } + if (!CPU_ISSET(hart_id, &active_cpus)) + return (SBI_ERR_INVALID_PARAM); + CPU_SET(hart_id, &cpus); } - ret = 0; break; default: - printf("%s: unknown func %ld\n", __func__, func_id); - ret = -1; - break; + dprintf("%s: unknown func %ld\n", __func__, func_id); + return (SBI_ERR_NOT_SUPPORTED); } - hypctx->guest_regs.hyp_a[0] = ret; + if (CPU_EMPTY(&cpus)) + return (SBI_ERR_INVALID_PARAM); - return (0); + riscv_send_ipi(hyp, &cpus); + + return (SBI_SUCCESS); } -int -vmm_sbi_ecall(struct vcpu *vcpu, bool *retu) +bool +vmm_sbi_ecall(struct vcpu *vcpu) { - int sbi_extension_id __unused; + int sbi_extension_id; struct hypctx *hypctx; int error; hypctx = riscv_get_active_vcpu(); sbi_extension_id = hypctx->guest_regs.hyp_a[7]; dprintf("%s: args %lx %lx %lx %lx %lx %lx %lx %lx\n", __func__, hypctx->guest_regs.hyp_a[0], hypctx->guest_regs.hyp_a[1], hypctx->guest_regs.hyp_a[2], hypctx->guest_regs.hyp_a[3], hypctx->guest_regs.hyp_a[4], hypctx->guest_regs.hyp_a[5], hypctx->guest_regs.hyp_a[6], hypctx->guest_regs.hyp_a[7]); switch (sbi_extension_id) { case SBI_EXT_ID_RFNC: error = vmm_sbi_handle_rfnc(vcpu, hypctx); - hypctx->guest_regs.hyp_a[0] = error; break; case SBI_EXT_ID_TIME: - vmm_sbi_handle_time(vcpu, hypctx); + error = vmm_sbi_handle_time(vcpu, hypctx); break; case SBI_EXT_ID_IPI: - vmm_sbi_handle_ipi(vcpu, hypctx); + error = vmm_sbi_handle_ipi(vcpu, hypctx); break; default: - *retu = true; - break; + /* Return to handle in userspace. */ + return (false); } - return (0); + hypctx->guest_regs.hyp_a[0] = error; + + /* Request is handled in kernel mode. */ + return (true); }