diff --git a/sys/arm64/include/vmm.h b/sys/arm64/include/vmm.h index 8e2c9c868635..c06d2ad947e4 100644 --- a/sys/arm64/include/vmm.h +++ b/sys/arm64/include/vmm.h @@ -1,362 +1,366 @@ /* * Copyright (C) 2015 Mihai Carabas * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #ifndef _VMM_H_ #define _VMM_H_ #include #include #include #include #include "pte.h" #include "pmap.h" struct vcpu; enum vm_suspend_how { VM_SUSPEND_NONE, VM_SUSPEND_RESET, VM_SUSPEND_POWEROFF, VM_SUSPEND_HALT, VM_SUSPEND_LAST }; /* * Identifiers for architecturally defined registers. */ enum vm_reg_name { VM_REG_GUEST_X0 = 0, VM_REG_GUEST_X1, VM_REG_GUEST_X2, VM_REG_GUEST_X3, VM_REG_GUEST_X4, VM_REG_GUEST_X5, VM_REG_GUEST_X6, VM_REG_GUEST_X7, VM_REG_GUEST_X8, VM_REG_GUEST_X9, VM_REG_GUEST_X10, VM_REG_GUEST_X11, VM_REG_GUEST_X12, VM_REG_GUEST_X13, VM_REG_GUEST_X14, VM_REG_GUEST_X15, VM_REG_GUEST_X16, VM_REG_GUEST_X17, VM_REG_GUEST_X18, VM_REG_GUEST_X19, VM_REG_GUEST_X20, VM_REG_GUEST_X21, VM_REG_GUEST_X22, VM_REG_GUEST_X23, VM_REG_GUEST_X24, VM_REG_GUEST_X25, VM_REG_GUEST_X26, VM_REG_GUEST_X27, VM_REG_GUEST_X28, VM_REG_GUEST_X29, VM_REG_GUEST_LR, VM_REG_GUEST_SP, VM_REG_GUEST_PC, VM_REG_GUEST_CPSR, VM_REG_GUEST_SCTLR_EL1, VM_REG_GUEST_TTBR0_EL1, VM_REG_GUEST_TTBR1_EL1, VM_REG_GUEST_TCR_EL1, VM_REG_GUEST_TCR2_EL1, VM_REG_LAST }; #define VM_INTINFO_VECTOR(info) ((info) & 0xff) #define VM_INTINFO_DEL_ERRCODE 0x800 #define VM_INTINFO_RSVD 0x7ffff000 #define VM_INTINFO_VALID 0x80000000 #define VM_INTINFO_TYPE 0x700 #define VM_INTINFO_HWINTR (0 << 8) #define VM_INTINFO_NMI (2 << 8) #define VM_INTINFO_HWEXCEPTION (3 << 8) #define VM_INTINFO_SWINTR (4 << 8) #define VM_MAX_SUFFIXLEN 15 #define VM_GUEST_BASE_IPA 0x80000000UL /* Guest kernel start ipa */ #ifdef _KERNEL #define VM_MAX_NAMELEN 32 struct vm; struct vm_exception; struct vm_exit; struct vm_run; struct vm_object; struct vm_guest_paging; struct vm_vgic_descr; struct pmap; struct vm_eventinfo { void *rptr; /* rendezvous cookie */ int *sptr; /* suspend cookie */ int *iptr; /* reqidle cookie */ }; int vm_create(const char *name, struct vm **retvm); struct vcpu *vm_alloc_vcpu(struct vm *vm, int vcpuid); void vm_slock_vcpus(struct vm *vm); void vm_unlock_vcpus(struct vm *vm); void vm_destroy(struct vm *vm); int vm_reinit(struct vm *vm); const char *vm_name(struct vm *vm); /* * APIs that modify the guest memory map require all vcpus to be frozen. */ void vm_slock_memsegs(struct vm *vm); void vm_xlock_memsegs(struct vm *vm); void vm_unlock_memsegs(struct vm *vm); int vm_mmap_memseg(struct vm *vm, vm_paddr_t gpa, int segid, vm_ooffset_t off, size_t len, int prot, int flags); int vm_munmap_memseg(struct vm *vm, vm_paddr_t gpa, size_t len); int vm_alloc_memseg(struct vm *vm, int ident, size_t len, bool sysmem); void vm_free_memseg(struct vm *vm, int ident); /* * APIs that inspect the guest memory map require only a *single* vcpu to * be frozen. This acts like a read lock on the guest memory map since any * modification requires *all* vcpus to be frozen. */ int vm_mmap_getnext(struct vm *vm, vm_paddr_t *gpa, int *segid, vm_ooffset_t *segoff, size_t *len, int *prot, int *flags); int vm_get_memseg(struct vm *vm, int ident, size_t *len, bool *sysmem, struct vm_object **objptr); vm_paddr_t vmm_sysmem_maxaddr(struct vm *vm); void *vm_gpa_hold(struct vcpu *vcpu, vm_paddr_t gpa, size_t len, int prot, void **cookie); void *vm_gpa_hold_global(struct vm *vm, vm_paddr_t gpa, size_t len, int prot, void **cookie); void vm_gpa_release(void *cookie); bool vm_mem_allocated(struct vcpu *vcpu, vm_paddr_t gpa); int vm_gla2gpa_nofault(struct vcpu *vcpu, struct vm_guest_paging *paging, uint64_t gla, int prot, uint64_t *gpa, int *is_fault); uint16_t vm_get_maxcpus(struct vm *vm); void vm_get_topology(struct vm *vm, uint16_t *sockets, uint16_t *cores, uint16_t *threads, uint16_t *maxcpus); int vm_set_topology(struct vm *vm, uint16_t sockets, uint16_t cores, uint16_t threads, uint16_t maxcpus); int vm_get_register(struct vcpu *vcpu, int reg, uint64_t *retval); int vm_set_register(struct vcpu *vcpu, int reg, uint64_t val); int vm_run(struct vcpu *vcpu); int vm_suspend(struct vm *vm, enum vm_suspend_how how); void* vm_get_cookie(struct vm *vm); int vcpu_vcpuid(struct vcpu *vcpu); void *vcpu_get_cookie(struct vcpu *vcpu); struct vm *vcpu_vm(struct vcpu *vcpu); struct vcpu *vm_vcpu(struct vm *vm, int cpu); int vm_get_capability(struct vcpu *vcpu, int type, int *val); int vm_set_capability(struct vcpu *vcpu, int type, int val); int vm_activate_cpu(struct vcpu *vcpu); int vm_suspend_cpu(struct vm *vm, struct vcpu *vcpu); int vm_resume_cpu(struct vm *vm, struct vcpu *vcpu); int vm_inject_exception(struct vcpu *vcpu, uint64_t esr, uint64_t far); int vm_attach_vgic(struct vm *vm, struct vm_vgic_descr *descr); int vm_assert_irq(struct vm *vm, uint32_t irq); int vm_deassert_irq(struct vm *vm, uint32_t irq); int vm_raise_msi(struct vm *vm, uint64_t msg, uint64_t addr, int bus, int slot, int func); struct vm_exit *vm_exitinfo(struct vcpu *vcpu); void vm_exit_suspended(struct vcpu *vcpu, uint64_t pc); void vm_exit_debug(struct vcpu *vcpu, uint64_t pc); void vm_exit_rendezvous(struct vcpu *vcpu, uint64_t pc); void vm_exit_astpending(struct vcpu *vcpu, uint64_t pc); cpuset_t vm_active_cpus(struct vm *vm); cpuset_t vm_debug_cpus(struct vm *vm); cpuset_t vm_suspended_cpus(struct vm *vm); static __inline bool virt_enabled(void) { return (has_hyp()); } static __inline int vcpu_rendezvous_pending(struct vm_eventinfo *info) { return (*((uintptr_t *)(info->rptr)) != 0); } static __inline int vcpu_suspended(struct vm_eventinfo *info) { return (*info->sptr); } int vcpu_debugged(struct vcpu *vcpu); enum vcpu_state { VCPU_IDLE, VCPU_FROZEN, VCPU_RUNNING, VCPU_SLEEPING, }; int vcpu_set_state(struct vcpu *vcpu, enum vcpu_state state, bool from_idle); enum vcpu_state vcpu_get_state(struct vcpu *vcpu, int *hostcpu); static int __inline vcpu_is_running(struct vcpu *vcpu, int *hostcpu) { return (vcpu_get_state(vcpu, hostcpu) == VCPU_RUNNING); } #ifdef _SYS_PROC_H_ static int __inline vcpu_should_yield(struct vcpu *vcpu) { struct thread *td; td = curthread; return (td->td_ast != 0 || td->td_owepreempt != 0); } #endif void *vcpu_stats(struct vcpu *vcpu); void vcpu_notify_event(struct vcpu *vcpu); enum vm_reg_name vm_segment_name(int seg_encoding); struct vm_copyinfo { uint64_t gpa; size_t len; void *hva; void *cookie; }; #endif /* _KERNEL */ #define VM_DIR_READ 0 #define VM_DIR_WRITE 1 #define VM_GP_M_MASK 0x1f #define VM_GP_MMU_ENABLED (1 << 5) struct vm_guest_paging { uint64_t ttbr0_addr; uint64_t ttbr1_addr; uint64_t tcr_el1; uint64_t tcr2_el1; int flags; int padding; }; struct vie { uint8_t access_size:4, sign_extend:1, dir:1, unused:2; enum vm_reg_name reg; }; struct vre { uint32_t inst_syndrome; uint8_t dir:1, unused:7; enum vm_reg_name reg; }; /* * Identifiers for optional vmm capabilities */ enum vm_cap_type { VM_CAP_HALT_EXIT, - VM_CAP_MTRAP_EXIT, VM_CAP_PAUSE_EXIT, VM_CAP_UNRESTRICTED_GUEST, + VM_CAP_BRK_EXIT, + VM_CAP_SS_EXIT, + VM_CAP_MASK_HWINTR, VM_CAP_MAX }; enum vm_exitcode { VM_EXITCODE_BOGUS, VM_EXITCODE_INST_EMUL, VM_EXITCODE_REG_EMUL, VM_EXITCODE_HVC, VM_EXITCODE_SUSPENDED, VM_EXITCODE_HYP, VM_EXITCODE_WFI, VM_EXITCODE_PAGING, VM_EXITCODE_SMCCC, VM_EXITCODE_DEBUG, + VM_EXITCODE_BRK, + VM_EXITCODE_SS, VM_EXITCODE_MAX }; struct vm_exit { enum vm_exitcode exitcode; int inst_length; uint64_t pc; union { /* * ARM specific payload. */ struct { uint32_t exception_nr; uint32_t pad; uint64_t esr_el2; /* Exception Syndrome Register */ uint64_t far_el2; /* Fault Address Register */ uint64_t hpfar_el2; /* Hypervisor IPA Fault Address Register */ } hyp; struct { struct vre vre; } reg_emul; struct { uint64_t gpa; uint64_t esr; } paging; struct { uint64_t gpa; struct vm_guest_paging paging; struct vie vie; } inst_emul; /* * A SMCCC call, e.g. starting a core via PSCI. * Further arguments can be read by asking the kernel for * all register values. */ struct { uint64_t func_id; uint64_t args[7]; } smccc_call; struct { enum vm_suspend_how how; } suspended; } u; }; #endif /* _VMM_H_ */ diff --git a/sys/arm64/vmm/arm64.h b/sys/arm64/vmm/arm64.h index 43459d14e143..8cfe77dcde6f 100644 --- a/sys/arm64/vmm/arm64.h +++ b/sys/arm64/vmm/arm64.h @@ -1,165 +1,174 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (C) 2015 Mihai Carabas * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #ifndef _VMM_ARM64_H_ #define _VMM_ARM64_H_ #include #include #include #include "mmu.h" #include "io/vgic_v3.h" #include "io/vtimer.h" struct vgic_v3; struct vgic_v3_cpu; +/* + * Per-vCPU hypervisor state. + */ struct hypctx { struct trapframe tf; /* * EL1 control registers. */ uint64_t elr_el1; /* Exception Link Register */ uint64_t sp_el0; /* Stack pointer */ uint64_t tpidr_el0; /* EL0 Software ID Register */ uint64_t tpidrro_el0; /* Read-only Thread ID Register */ uint64_t tpidr_el1; /* EL1 Software ID Register */ uint64_t vbar_el1; /* Vector Base Address Register */ uint64_t actlr_el1; /* Auxiliary Control Register */ uint64_t afsr0_el1; /* Auxiliary Fault Status Register 0 */ uint64_t afsr1_el1; /* Auxiliary Fault Status Register 1 */ uint64_t amair_el1; /* Auxiliary Memory Attribute Indirection Register */ uint64_t contextidr_el1; /* Current Process Identifier */ uint64_t cpacr_el1; /* Architectural Feature Access Control Register */ uint64_t csselr_el1; /* Cache Size Selection Register */ uint64_t esr_el1; /* Exception Syndrome Register */ uint64_t far_el1; /* Fault Address Register */ uint64_t mair_el1; /* Memory Attribute Indirection Register */ uint64_t mdccint_el1; /* Monitor DCC Interrupt Enable Register */ uint64_t mdscr_el1; /* Monitor Debug System Control Register */ uint64_t par_el1; /* Physical Address Register */ uint64_t sctlr_el1; /* System Control Register */ uint64_t tcr_el1; /* Translation Control Register */ uint64_t tcr2_el1; /* Translation Control Register 2 */ uint64_t ttbr0_el1; /* Translation Table Base Register 0 */ uint64_t ttbr1_el1; /* Translation Table Base Register 1 */ uint64_t spsr_el1; /* Saved Program Status Register */ uint64_t pmcr_el0; /* Performance Monitors Control Register */ uint64_t pmccntr_el0; uint64_t pmccfiltr_el0; uint64_t pmcntenset_el0; uint64_t pmintenset_el1; uint64_t pmovsset_el0; uint64_t pmselr_el0; uint64_t pmuserenr_el0; uint64_t pmevcntr_el0[31]; uint64_t pmevtyper_el0[31]; uint64_t dbgbcr_el1[16]; /* Debug Breakpoint Control Registers */ uint64_t dbgbvr_el1[16]; /* Debug Breakpoint Value Registers */ uint64_t dbgwcr_el1[16]; /* Debug Watchpoint Control Registers */ uint64_t dbgwvr_el1[16]; /* Debug Watchpoint Value Registers */ /* EL2 control registers */ uint64_t cptr_el2; /* Architectural Feature Trap Register */ uint64_t hcr_el2; /* Hypervisor Configuration Register */ uint64_t mdcr_el2; /* Monitor Debug Configuration Register */ uint64_t vpidr_el2; /* Virtualization Processor ID Register */ uint64_t vmpidr_el2; /* Virtualization Multiprocessor ID Register */ uint64_t el2_addr; /* The address of this in el2 space */ struct hyp *hyp; struct vcpu *vcpu; struct { uint64_t far_el2; /* Fault Address Register */ uint64_t hpfar_el2; /* Hypervisor IPA Fault Address Register */ } exit_info; struct vtimer_cpu vtimer_cpu; + uint64_t setcaps; /* Currently enabled capabilities. */ + + /* vCPU state used to handle guest debugging. */ + uint64_t debug_spsr; /* Saved guest SPSR */ + uint64_t debug_mdscr; /* Saved guest MDSCR */ + struct vgic_v3_regs vgic_v3_regs; struct vgic_v3_cpu *vgic_cpu; bool has_exception; }; struct hyp { struct vm *vm; struct vtimer vtimer; uint64_t vmid_generation; uint64_t vttbr_el2; uint64_t el2_addr; /* The address of this in el2 space */ bool vgic_attached; struct vgic_v3 *vgic; struct hypctx *ctx[]; }; #define DEFINE_VMMOPS_IFUNC(ret_type, opname, args) \ ret_type vmmops_##opname args; DEFINE_VMMOPS_IFUNC(int, modinit, (int ipinum)) DEFINE_VMMOPS_IFUNC(int, modcleanup, (void)) DEFINE_VMMOPS_IFUNC(void *, init, (struct vm *vm, struct pmap *pmap)) DEFINE_VMMOPS_IFUNC(int, gla2gpa, (void *vcpui, struct vm_guest_paging *paging, uint64_t gla, int prot, uint64_t *gpa, int *is_fault)) DEFINE_VMMOPS_IFUNC(int, run, (void *vcpui, register_t pc, struct pmap *pmap, struct vm_eventinfo *info)) DEFINE_VMMOPS_IFUNC(void, cleanup, (void *vmi)) DEFINE_VMMOPS_IFUNC(void *, vcpu_init, (void *vmi, struct vcpu *vcpu, int vcpu_id)) DEFINE_VMMOPS_IFUNC(void, vcpu_cleanup, (void *vcpui)) DEFINE_VMMOPS_IFUNC(int, exception, (void *vcpui, uint64_t esr, uint64_t far)) DEFINE_VMMOPS_IFUNC(int, getreg, (void *vcpui, int num, uint64_t *retval)) DEFINE_VMMOPS_IFUNC(int, setreg, (void *vcpui, int num, uint64_t val)) DEFINE_VMMOPS_IFUNC(int, getcap, (void *vcpui, int num, int *retval)) DEFINE_VMMOPS_IFUNC(int, setcap, (void *vcpui, int num, int val)) DEFINE_VMMOPS_IFUNC(struct vmspace *, vmspace_alloc, (vm_offset_t min, vm_offset_t max)) DEFINE_VMMOPS_IFUNC(void, vmspace_free, (struct vmspace *vmspace)) #ifdef notyet #ifdef BHYVE_SNAPSHOT DEFINE_VMMOPS_IFUNC(int, snapshot, (void *vmi, struct vm_snapshot_meta *meta)) DEFINE_VMMOPS_IFUNC(int, vcpu_snapshot, (void *vcpui, struct vm_snapshot_meta *meta)) DEFINE_VMMOPS_IFUNC(int, restore_tsc, (void *vcpui, uint64_t now)) #endif #endif uint64_t vmm_call_hyp(uint64_t, ...); #if 0 #define eprintf(fmt, ...) printf("%s:%d " fmt, __func__, __LINE__, ##__VA_ARGS__) #else #define eprintf(fmt, ...) do {} while(0) #endif struct hypctx *arm64_get_active_vcpu(void); void raise_data_insn_abort(struct hypctx *, uint64_t, bool, int); #endif /* !_VMM_ARM64_H_ */ diff --git a/sys/arm64/vmm/vmm_arm64.c b/sys/arm64/vmm/vmm_arm64.c index e71761f9ccef..e0547bcef914 100644 --- a/sys/arm64/vmm/vmm_arm64.c +++ b/sys/arm64/vmm/vmm_arm64.c @@ -1,1337 +1,1412 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (C) 2015 Mihai Carabas * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "mmu.h" #include "arm64.h" #include "hyp.h" #include "reset.h" #include "io/vgic.h" #include "io/vgic_v3.h" #include "io/vtimer.h" #include "vmm_stat.h" #define HANDLED 1 #define UNHANDLED 0 /* Number of bits in an EL2 virtual address */ #define EL2_VIRT_BITS 48 CTASSERT((1ul << EL2_VIRT_BITS) >= HYP_VM_MAX_ADDRESS); /* TODO: Move the host hypctx off the stack */ #define VMM_STACK_PAGES 4 #define VMM_STACK_SIZE (VMM_STACK_PAGES * PAGE_SIZE) static int vmm_pmap_levels, vmm_virt_bits, vmm_max_ipa_bits; /* Register values passed to arm_setup_vectors to set in the hypervisor */ struct vmm_init_regs { uint64_t tcr_el2; uint64_t vtcr_el2; }; MALLOC_DEFINE(M_HYP, "ARM VMM HYP", "ARM VMM HYP"); extern char hyp_init_vectors[]; extern char hyp_vectors[]; extern char hyp_stub_vectors[]; static vm_paddr_t hyp_code_base; static size_t hyp_code_len; static char *stack[MAXCPU]; static vm_offset_t stack_hyp_va[MAXCPU]; static vmem_t *el2_mem_alloc; static void arm_setup_vectors(void *arg); static void vmm_pmap_clean_stage2_tlbi(void); static void vmm_pmap_invalidate_range(uint64_t, vm_offset_t, vm_offset_t, bool); static void vmm_pmap_invalidate_all(uint64_t); DPCPU_DEFINE_STATIC(struct hypctx *, vcpu); static inline void arm64_set_active_vcpu(struct hypctx *hypctx) { DPCPU_SET(vcpu, hypctx); } struct hypctx * arm64_get_active_vcpu(void) { return (DPCPU_GET(vcpu)); } static void arm_setup_vectors(void *arg) { struct vmm_init_regs *el2_regs; uintptr_t stack_top; uint32_t sctlr_el2; register_t daif; el2_regs = arg; arm64_set_active_vcpu(NULL); daif = intr_disable(); /* * Install the temporary vectors which will be responsible for * initializing the VMM when we next trap into EL2. * * x0: the exception vector table responsible for hypervisor * initialization on the next call. */ vmm_call_hyp(vtophys(&vmm_hyp_code)); /* Create and map the hypervisor stack */ stack_top = stack_hyp_va[PCPU_GET(cpuid)] + VMM_STACK_SIZE; /* * Configure the system control register for EL2: * * SCTLR_EL2_M: MMU on * SCTLR_EL2_C: Data cacheability not affected * SCTLR_EL2_I: Instruction cacheability not affected * SCTLR_EL2_A: Instruction alignment check * SCTLR_EL2_SA: Stack pointer alignment check * SCTLR_EL2_WXN: Treat writable memory as execute never * ~SCTLR_EL2_EE: Data accesses are little-endian */ sctlr_el2 = SCTLR_EL2_RES1; sctlr_el2 |= SCTLR_EL2_M | SCTLR_EL2_C | SCTLR_EL2_I; sctlr_el2 |= SCTLR_EL2_A | SCTLR_EL2_SA; sctlr_el2 |= SCTLR_EL2_WXN; sctlr_el2 &= ~SCTLR_EL2_EE; /* Special call to initialize EL2 */ vmm_call_hyp(vmmpmap_to_ttbr0(), stack_top, el2_regs->tcr_el2, sctlr_el2, el2_regs->vtcr_el2); intr_restore(daif); } static void arm_teardown_vectors(void *arg) { register_t daif; /* * vmm_cleanup() will disable the MMU. For the next few instructions, * before the hardware disables the MMU, one of the following is * possible: * * a. The instruction addresses are fetched with the MMU disabled, * and they must represent the actual physical addresses. This will work * because we call the vmm_cleanup() function by its physical address. * * b. The instruction addresses are fetched using the old translation * tables. This will work because we have an identity mapping in place * in the translation tables and vmm_cleanup() is called by its physical * address. */ daif = intr_disable(); /* TODO: Invalidate the cache */ vmm_call_hyp(HYP_CLEANUP, vtophys(hyp_stub_vectors)); intr_restore(daif); arm64_set_active_vcpu(NULL); } static uint64_t vmm_vtcr_el2_sl(u_int levels) { #if PAGE_SIZE == PAGE_SIZE_4K switch (levels) { case 2: return (VTCR_EL2_SL0_4K_LVL2); case 3: return (VTCR_EL2_SL0_4K_LVL1); case 4: return (VTCR_EL2_SL0_4K_LVL0); default: panic("%s: Invalid number of page table levels %u", __func__, levels); } #elif PAGE_SIZE == PAGE_SIZE_16K switch (levels) { case 2: return (VTCR_EL2_SL0_16K_LVL2); case 3: return (VTCR_EL2_SL0_16K_LVL1); case 4: return (VTCR_EL2_SL0_16K_LVL0); default: panic("%s: Invalid number of page table levels %u", __func__, levels); } #else #error Unsupported page size #endif } int vmmops_modinit(int ipinum) { struct vmm_init_regs el2_regs; vm_offset_t next_hyp_va; vm_paddr_t vmm_base; uint64_t id_aa64mmfr0_el1, pa_range_bits, pa_range_field; uint64_t cnthctl_el2; register_t daif; int cpu, i; bool rv __diagused; if (!virt_enabled()) { printf( "vmm: Processor doesn't have support for virtualization\n"); return (ENXIO); } /* TODO: Support VHE */ if (in_vhe()) { printf("vmm: VHE is unsupported\n"); return (ENXIO); } if (!vgic_present()) { printf("vmm: No vgic found\n"); return (ENODEV); } if (!get_kernel_reg(ID_AA64MMFR0_EL1, &id_aa64mmfr0_el1)) { printf("vmm: Unable to read ID_AA64MMFR0_EL1\n"); return (ENXIO); } pa_range_field = ID_AA64MMFR0_PARange_VAL(id_aa64mmfr0_el1); /* * Use 3 levels to give us up to 39 bits with 4k pages, or * 47 bits with 16k pages. */ /* TODO: Check the number of levels for 64k pages */ vmm_pmap_levels = 3; switch (pa_range_field) { case ID_AA64MMFR0_PARange_4G: printf("vmm: Not enough physical address bits\n"); return (ENXIO); case ID_AA64MMFR0_PARange_64G: vmm_virt_bits = 36; #if PAGE_SIZE == PAGE_SIZE_16K vmm_pmap_levels = 2; #endif break; default: vmm_virt_bits = 39; break; } pa_range_bits = pa_range_field >> ID_AA64MMFR0_PARange_SHIFT; /* Initialise the EL2 MMU */ if (!vmmpmap_init()) { printf("vmm: Failed to init the EL2 MMU\n"); return (ENOMEM); } /* Set up the stage 2 pmap callbacks */ MPASS(pmap_clean_stage2_tlbi == NULL); pmap_clean_stage2_tlbi = vmm_pmap_clean_stage2_tlbi; pmap_stage2_invalidate_range = vmm_pmap_invalidate_range; pmap_stage2_invalidate_all = vmm_pmap_invalidate_all; /* * Create an allocator for the virtual address space used by EL2. * EL2 code is identity-mapped; the allocator is used to find space for * VM structures. */ el2_mem_alloc = vmem_create("VMM EL2", 0, 0, PAGE_SIZE, 0, M_WAITOK); /* Create the mappings for the hypervisor translation table. */ hyp_code_len = round_page(&vmm_hyp_code_end - &vmm_hyp_code); /* We need an physical identity mapping for when we activate the MMU */ hyp_code_base = vmm_base = vtophys(&vmm_hyp_code); rv = vmmpmap_enter(vmm_base, hyp_code_len, vmm_base, VM_PROT_READ | VM_PROT_EXECUTE); MPASS(rv); next_hyp_va = roundup2(vmm_base + hyp_code_len, L2_SIZE); /* Create a per-CPU hypervisor stack */ CPU_FOREACH(cpu) { stack[cpu] = malloc(VMM_STACK_SIZE, M_HYP, M_WAITOK | M_ZERO); stack_hyp_va[cpu] = next_hyp_va; for (i = 0; i < VMM_STACK_PAGES; i++) { rv = vmmpmap_enter(stack_hyp_va[cpu] + ptoa(i), PAGE_SIZE, vtophys(stack[cpu] + ptoa(i)), VM_PROT_READ | VM_PROT_WRITE); MPASS(rv); } next_hyp_va += L2_SIZE; } el2_regs.tcr_el2 = TCR_EL2_RES1; el2_regs.tcr_el2 |= min(pa_range_bits << TCR_EL2_PS_SHIFT, TCR_EL2_PS_52BITS); el2_regs.tcr_el2 |= TCR_EL2_T0SZ(64 - EL2_VIRT_BITS); el2_regs.tcr_el2 |= TCR_EL2_IRGN0_WBWA | TCR_EL2_ORGN0_WBWA; #if PAGE_SIZE == PAGE_SIZE_4K el2_regs.tcr_el2 |= TCR_EL2_TG0_4K; #elif PAGE_SIZE == PAGE_SIZE_16K el2_regs.tcr_el2 |= TCR_EL2_TG0_16K; #else #error Unsupported page size #endif #ifdef SMP el2_regs.tcr_el2 |= TCR_EL2_SH0_IS; #endif switch (el2_regs.tcr_el2 & TCR_EL2_PS_MASK) { case TCR_EL2_PS_32BITS: vmm_max_ipa_bits = 32; break; case TCR_EL2_PS_36BITS: vmm_max_ipa_bits = 36; break; case TCR_EL2_PS_40BITS: vmm_max_ipa_bits = 40; break; case TCR_EL2_PS_42BITS: vmm_max_ipa_bits = 42; break; case TCR_EL2_PS_44BITS: vmm_max_ipa_bits = 44; break; case TCR_EL2_PS_48BITS: vmm_max_ipa_bits = 48; break; case TCR_EL2_PS_52BITS: default: vmm_max_ipa_bits = 52; break; } /* * Configure the Stage 2 translation control register: * * VTCR_IRGN0_WBWA: Translation table walks access inner cacheable * normal memory * VTCR_ORGN0_WBWA: Translation table walks access outer cacheable * normal memory * VTCR_EL2_TG0_4K/16K: Stage 2 uses the same page size as the kernel * VTCR_EL2_SL0_4K_LVL1: Stage 2 uses concatenated level 1 tables * VTCR_EL2_SH0_IS: Memory associated with Stage 2 walks is inner * shareable */ el2_regs.vtcr_el2 = VTCR_EL2_RES1; el2_regs.vtcr_el2 |= min(pa_range_bits << VTCR_EL2_PS_SHIFT, VTCR_EL2_PS_48BIT); el2_regs.vtcr_el2 |= VTCR_EL2_IRGN0_WBWA | VTCR_EL2_ORGN0_WBWA; el2_regs.vtcr_el2 |= VTCR_EL2_T0SZ(64 - vmm_virt_bits); el2_regs.vtcr_el2 |= vmm_vtcr_el2_sl(vmm_pmap_levels); #if PAGE_SIZE == PAGE_SIZE_4K el2_regs.vtcr_el2 |= VTCR_EL2_TG0_4K; #elif PAGE_SIZE == PAGE_SIZE_16K el2_regs.vtcr_el2 |= VTCR_EL2_TG0_16K; #else #error Unsupported page size #endif #ifdef SMP el2_regs.vtcr_el2 |= VTCR_EL2_SH0_IS; #endif smp_rendezvous(NULL, arm_setup_vectors, NULL, &el2_regs); /* Add memory to the vmem allocator (checking there is space) */ if (vmm_base > (L2_SIZE + PAGE_SIZE)) { /* * Ensure there is an L2 block before the vmm code to check * for buffer overflows on earlier data. Include the PAGE_SIZE * of the minimum we can allocate. */ vmm_base -= L2_SIZE + PAGE_SIZE; vmm_base = rounddown2(vmm_base, L2_SIZE); /* * Check there is memory before the vmm code to add. * * Reserve the L2 block at address 0 so NULL dereference will * raise an exception. */ if (vmm_base > L2_SIZE) vmem_add(el2_mem_alloc, L2_SIZE, vmm_base - L2_SIZE, M_WAITOK); } /* * Add the memory after the stacks. There is most of an L2 block * between the last stack and the first allocation so this should * be safe without adding more padding. */ if (next_hyp_va < HYP_VM_MAX_ADDRESS - PAGE_SIZE) vmem_add(el2_mem_alloc, next_hyp_va, HYP_VM_MAX_ADDRESS - next_hyp_va, M_WAITOK); daif = intr_disable(); cnthctl_el2 = vmm_call_hyp(HYP_READ_REGISTER, HYP_REG_CNTHCTL); intr_restore(daif); vgic_init(); vtimer_init(cnthctl_el2); return (0); } int vmmops_modcleanup(void) { int cpu; smp_rendezvous(NULL, arm_teardown_vectors, NULL, NULL); CPU_FOREACH(cpu) { vmmpmap_remove(stack_hyp_va[cpu], VMM_STACK_PAGES * PAGE_SIZE, false); } vmmpmap_remove(hyp_code_base, hyp_code_len, false); vtimer_cleanup(); vmmpmap_fini(); CPU_FOREACH(cpu) free(stack[cpu], M_HYP); pmap_clean_stage2_tlbi = NULL; pmap_stage2_invalidate_range = NULL; pmap_stage2_invalidate_all = NULL; return (0); } static vm_size_t el2_hyp_size(struct vm *vm) { return (round_page(sizeof(struct hyp) + sizeof(struct hypctx *) * vm_get_maxcpus(vm))); } static vm_size_t el2_hypctx_size(void) { return (round_page(sizeof(struct hypctx))); } static vm_offset_t el2_map_enter(vm_offset_t data, vm_size_t size, vm_prot_t prot) { vmem_addr_t addr; int err __diagused; bool rv __diagused; err = vmem_alloc(el2_mem_alloc, size, M_NEXTFIT | M_WAITOK, &addr); MPASS(err == 0); rv = vmmpmap_enter(addr, size, vtophys(data), prot); MPASS(rv); return (addr); } void * vmmops_init(struct vm *vm, pmap_t pmap) { struct hyp *hyp; vm_size_t size; size = el2_hyp_size(vm); hyp = malloc_aligned(size, PAGE_SIZE, M_HYP, M_WAITOK | M_ZERO); hyp->vm = vm; hyp->vgic_attached = false; vtimer_vminit(hyp); vgic_vminit(hyp); hyp->el2_addr = el2_map_enter((vm_offset_t)hyp, size, VM_PROT_READ | VM_PROT_WRITE); return (hyp); } void * vmmops_vcpu_init(void *vmi, struct vcpu *vcpu1, int vcpuid) { struct hyp *hyp = vmi; struct hypctx *hypctx; vm_size_t size; size = el2_hypctx_size(); hypctx = malloc_aligned(size, PAGE_SIZE, M_HYP, M_WAITOK | M_ZERO); KASSERT(vcpuid >= 0 && vcpuid < vm_get_maxcpus(hyp->vm), ("%s: Invalid vcpuid %d", __func__, vcpuid)); hyp->ctx[vcpuid] = hypctx; hypctx->hyp = hyp; hypctx->vcpu = vcpu1; reset_vm_el01_regs(hypctx); reset_vm_el2_regs(hypctx); vtimer_cpuinit(hypctx); vgic_cpuinit(hypctx); hypctx->el2_addr = el2_map_enter((vm_offset_t)hypctx, size, VM_PROT_READ | VM_PROT_WRITE); return (hypctx); } static int arm_vmm_pinit(pmap_t pmap) { pmap_pinit_stage(pmap, PM_STAGE2, vmm_pmap_levels); return (1); } struct vmspace * vmmops_vmspace_alloc(vm_offset_t min, vm_offset_t max) { return (vmspace_alloc(min, max, arm_vmm_pinit)); } void vmmops_vmspace_free(struct vmspace *vmspace) { pmap_remove_pages(vmspace_pmap(vmspace)); vmspace_free(vmspace); } static void vmm_pmap_clean_stage2_tlbi(void) { vmm_call_hyp(HYP_CLEAN_S2_TLBI); } static void vmm_pmap_invalidate_range(uint64_t vttbr, vm_offset_t sva, vm_offset_t eva, bool final_only) { MPASS(eva > sva); vmm_call_hyp(HYP_S2_TLBI_RANGE, vttbr, sva, eva, final_only); } static void vmm_pmap_invalidate_all(uint64_t vttbr) { vmm_call_hyp(HYP_S2_TLBI_ALL, vttbr); } static inline void arm64_print_hyp_regs(struct vm_exit *vme) { printf("esr_el2: 0x%016lx\n", vme->u.hyp.esr_el2); printf("far_el2: 0x%016lx\n", vme->u.hyp.far_el2); printf("hpfar_el2: 0x%016lx\n", vme->u.hyp.hpfar_el2); printf("elr_el2: 0x%016lx\n", vme->pc); } static void arm64_gen_inst_emul_data(struct hypctx *hypctx, uint32_t esr_iss, struct vm_exit *vme_ret) { struct vm_guest_paging *paging; struct vie *vie; uint32_t esr_sas, reg_num; /* * Get the page address from HPFAR_EL2. */ vme_ret->u.inst_emul.gpa = HPFAR_EL2_FIPA_ADDR(hypctx->exit_info.hpfar_el2); /* Bits [11:0] are the same as bits [11:0] from the virtual address. */ vme_ret->u.inst_emul.gpa += hypctx->exit_info.far_el2 & FAR_EL2_HPFAR_PAGE_MASK; esr_sas = (esr_iss & ISS_DATA_SAS_MASK) >> ISS_DATA_SAS_SHIFT; reg_num = (esr_iss & ISS_DATA_SRT_MASK) >> ISS_DATA_SRT_SHIFT; vie = &vme_ret->u.inst_emul.vie; vie->access_size = 1 << esr_sas; vie->sign_extend = (esr_iss & ISS_DATA_SSE) ? 1 : 0; vie->dir = (esr_iss & ISS_DATA_WnR) ? VM_DIR_WRITE : VM_DIR_READ; vie->reg = reg_num; paging = &vme_ret->u.inst_emul.paging; paging->ttbr0_addr = hypctx->ttbr0_el1 & ~(TTBR_ASID_MASK | TTBR_CnP); paging->ttbr1_addr = hypctx->ttbr1_el1 & ~(TTBR_ASID_MASK | TTBR_CnP); paging->tcr_el1 = hypctx->tcr_el1; paging->tcr2_el1 = hypctx->tcr2_el1; paging->flags = hypctx->tf.tf_spsr & (PSR_M_MASK | PSR_M_32); if ((hypctx->sctlr_el1 & SCTLR_M) != 0) paging->flags |= VM_GP_MMU_ENABLED; } static void arm64_gen_reg_emul_data(uint32_t esr_iss, struct vm_exit *vme_ret) { uint32_t reg_num; struct vre *vre; /* u.hyp member will be replaced by u.reg_emul */ vre = &vme_ret->u.reg_emul.vre; vre->inst_syndrome = esr_iss; /* ARMv8 Architecture Manual, p. D7-2273: 1 means read */ vre->dir = (esr_iss & ISS_MSR_DIR) ? VM_DIR_READ : VM_DIR_WRITE; reg_num = ISS_MSR_Rt(esr_iss); vre->reg = reg_num; } void raise_data_insn_abort(struct hypctx *hypctx, uint64_t far, bool dabort, int fsc) { uint64_t esr; if ((hypctx->tf.tf_spsr & PSR_M_MASK) == PSR_M_EL0t) esr = EXCP_INSN_ABORT_L << ESR_ELx_EC_SHIFT; else esr = EXCP_INSN_ABORT << ESR_ELx_EC_SHIFT; /* Set the bit that changes from insn -> data abort */ if (dabort) esr |= EXCP_DATA_ABORT_L << ESR_ELx_EC_SHIFT; /* Set the IL bit if set by hardware */ esr |= hypctx->tf.tf_esr & ESR_ELx_IL; vmmops_exception(hypctx, esr | fsc, far); } static int handle_el1_sync_excp(struct hypctx *hypctx, struct vm_exit *vme_ret, pmap_t pmap) { uint64_t gpa; uint32_t esr_ec, esr_iss; esr_ec = ESR_ELx_EXCEPTION(hypctx->tf.tf_esr); esr_iss = hypctx->tf.tf_esr & ESR_ELx_ISS_MASK; switch (esr_ec) { case EXCP_UNKNOWN: vmm_stat_incr(hypctx->vcpu, VMEXIT_UNKNOWN, 1); arm64_print_hyp_regs(vme_ret); vme_ret->exitcode = VM_EXITCODE_HYP; break; case EXCP_TRAP_WFI_WFE: if ((hypctx->tf.tf_esr & 0x3) == 0) { /* WFI */ vmm_stat_incr(hypctx->vcpu, VMEXIT_WFI, 1); vme_ret->exitcode = VM_EXITCODE_WFI; } else { vmm_stat_incr(hypctx->vcpu, VMEXIT_WFE, 1); vme_ret->exitcode = VM_EXITCODE_HYP; } break; case EXCP_HVC: vmm_stat_incr(hypctx->vcpu, VMEXIT_HVC, 1); vme_ret->exitcode = VM_EXITCODE_HVC; break; case EXCP_MSR: vmm_stat_incr(hypctx->vcpu, VMEXIT_MSR, 1); arm64_gen_reg_emul_data(esr_iss, vme_ret); vme_ret->exitcode = VM_EXITCODE_REG_EMUL; break; - + case EXCP_BRK: + vmm_stat_incr(hypctx->vcpu, VMEXIT_BRK, 1); + vme_ret->exitcode = VM_EXITCODE_BRK; + break; + case EXCP_SOFTSTP_EL0: + vmm_stat_incr(hypctx->vcpu, VMEXIT_SS, 1); + vme_ret->exitcode = VM_EXITCODE_SS; + break; case EXCP_INSN_ABORT_L: case EXCP_DATA_ABORT_L: vmm_stat_incr(hypctx->vcpu, esr_ec == EXCP_DATA_ABORT_L ? VMEXIT_DATA_ABORT : VMEXIT_INSN_ABORT, 1); switch (hypctx->tf.tf_esr & ISS_DATA_DFSC_MASK) { case ISS_DATA_DFSC_TF_L0: case ISS_DATA_DFSC_TF_L1: case ISS_DATA_DFSC_TF_L2: case ISS_DATA_DFSC_TF_L3: case ISS_DATA_DFSC_AFF_L1: case ISS_DATA_DFSC_AFF_L2: case ISS_DATA_DFSC_AFF_L3: case ISS_DATA_DFSC_PF_L1: case ISS_DATA_DFSC_PF_L2: case ISS_DATA_DFSC_PF_L3: gpa = HPFAR_EL2_FIPA_ADDR(hypctx->exit_info.hpfar_el2); /* Check the IPA is valid */ if (gpa >= (1ul << vmm_max_ipa_bits)) { raise_data_insn_abort(hypctx, hypctx->exit_info.far_el2, esr_ec == EXCP_DATA_ABORT_L, ISS_DATA_DFSC_ASF_L0); vme_ret->inst_length = 0; return (HANDLED); } if (vm_mem_allocated(hypctx->vcpu, gpa)) { vme_ret->exitcode = VM_EXITCODE_PAGING; vme_ret->inst_length = 0; vme_ret->u.paging.esr = hypctx->tf.tf_esr; vme_ret->u.paging.gpa = gpa; } else if (esr_ec == EXCP_INSN_ABORT_L) { /* * Raise an external abort. Device memory is * not executable */ raise_data_insn_abort(hypctx, hypctx->exit_info.far_el2, false, ISS_DATA_DFSC_EXT); vme_ret->inst_length = 0; return (HANDLED); } else { arm64_gen_inst_emul_data(hypctx, esr_iss, vme_ret); vme_ret->exitcode = VM_EXITCODE_INST_EMUL; } break; default: arm64_print_hyp_regs(vme_ret); vme_ret->exitcode = VM_EXITCODE_HYP; break; } break; default: vmm_stat_incr(hypctx->vcpu, VMEXIT_UNHANDLED_SYNC, 1); arm64_print_hyp_regs(vme_ret); vme_ret->exitcode = VM_EXITCODE_HYP; break; } /* We don't don't do any instruction emulation here */ return (UNHANDLED); } static int arm64_handle_world_switch(struct hypctx *hypctx, int excp_type, struct vm_exit *vme, pmap_t pmap) { int handled; switch (excp_type) { case EXCP_TYPE_EL1_SYNC: /* The exit code will be set by handle_el1_sync_excp(). */ handled = handle_el1_sync_excp(hypctx, vme, pmap); break; case EXCP_TYPE_EL1_IRQ: case EXCP_TYPE_EL1_FIQ: /* The host kernel will handle IRQs and FIQs. */ vmm_stat_incr(hypctx->vcpu, excp_type == EXCP_TYPE_EL1_IRQ ? VMEXIT_IRQ : VMEXIT_FIQ,1); vme->exitcode = VM_EXITCODE_BOGUS; handled = UNHANDLED; break; case EXCP_TYPE_EL1_ERROR: case EXCP_TYPE_EL2_SYNC: case EXCP_TYPE_EL2_IRQ: case EXCP_TYPE_EL2_FIQ: case EXCP_TYPE_EL2_ERROR: vmm_stat_incr(hypctx->vcpu, VMEXIT_UNHANDLED_EL2, 1); vme->exitcode = VM_EXITCODE_BOGUS; handled = UNHANDLED; break; default: vmm_stat_incr(hypctx->vcpu, VMEXIT_UNHANDLED, 1); vme->exitcode = VM_EXITCODE_BOGUS; handled = UNHANDLED; break; } return (handled); } static void ptp_release(void **cookie) { if (*cookie != NULL) { vm_gpa_release(*cookie); *cookie = NULL; } } static void * ptp_hold(struct vcpu *vcpu, vm_paddr_t ptpphys, size_t len, void **cookie) { void *ptr; ptp_release(cookie); ptr = vm_gpa_hold(vcpu, ptpphys, len, VM_PROT_RW, cookie); return (ptr); } /* log2 of the number of bytes in a page table entry */ #define PTE_SHIFT 3 int vmmops_gla2gpa(void *vcpui, struct vm_guest_paging *paging, uint64_t gla, int prot, uint64_t *gpa, int *is_fault) { struct hypctx *hypctx; void *cookie; uint64_t mask, *ptep, pte, pte_addr; int address_bits, granule_shift, ia_bits, levels, pte_shift, tsz; bool is_el0; /* Check if the MMU is off */ if ((paging->flags & VM_GP_MMU_ENABLED) == 0) { *is_fault = 0; *gpa = gla; return (0); } is_el0 = (paging->flags & PSR_M_MASK) == PSR_M_EL0t; if (ADDR_IS_KERNEL(gla)) { /* If address translation is disabled raise an exception */ if ((paging->tcr_el1 & TCR_EPD1) != 0) { *is_fault = 1; return (0); } if (is_el0 && (paging->tcr_el1 & TCR_E0PD1) != 0) { *is_fault = 1; return (0); } pte_addr = paging->ttbr1_addr; tsz = (paging->tcr_el1 & TCR_T1SZ_MASK) >> TCR_T1SZ_SHIFT; /* Clear the top byte if TBI is on */ if ((paging->tcr_el1 & TCR_TBI1) != 0) gla |= (0xfful << 56); switch (paging->tcr_el1 & TCR_TG1_MASK) { case TCR_TG1_4K: granule_shift = PAGE_SHIFT_4K; break; case TCR_TG1_16K: granule_shift = PAGE_SHIFT_16K; break; case TCR_TG1_64K: granule_shift = PAGE_SHIFT_64K; break; default: *is_fault = 1; return (EINVAL); } } else { /* If address translation is disabled raise an exception */ if ((paging->tcr_el1 & TCR_EPD0) != 0) { *is_fault = 1; return (0); } if (is_el0 && (paging->tcr_el1 & TCR_E0PD0) != 0) { *is_fault = 1; return (0); } pte_addr = paging->ttbr0_addr; tsz = (paging->tcr_el1 & TCR_T0SZ_MASK) >> TCR_T0SZ_SHIFT; /* Clear the top byte if TBI is on */ if ((paging->tcr_el1 & TCR_TBI0) != 0) gla &= ~(0xfful << 56); switch (paging->tcr_el1 & TCR_TG0_MASK) { case TCR_TG0_4K: granule_shift = PAGE_SHIFT_4K; break; case TCR_TG0_16K: granule_shift = PAGE_SHIFT_16K; break; case TCR_TG0_64K: granule_shift = PAGE_SHIFT_64K; break; default: *is_fault = 1; return (EINVAL); } } /* * TODO: Support FEAT_TTST for smaller tsz values and FEAT_LPA2 * for larger values. */ switch (granule_shift) { case PAGE_SHIFT_4K: case PAGE_SHIFT_16K: /* * See "Table D8-11 4KB granule, determining stage 1 initial * lookup level" and "Table D8-21 16KB granule, determining * stage 1 initial lookup level" from the "Arm Architecture * Reference Manual for A-Profile architecture" revision I.a * for the minimum and maximum values. * * TODO: Support less than 16 when FEAT_LPA2 is implemented * and TCR_EL1.DS == 1 * TODO: Support more than 39 when FEAT_TTST is implemented */ if (tsz < 16 || tsz > 39) { *is_fault = 1; return (EINVAL); } break; case PAGE_SHIFT_64K: /* TODO: Support 64k granule. It will probably work, but is untested */ default: *is_fault = 1; return (EINVAL); } /* * Calculate the input address bits. These are 64 bit in an address * with the top tsz bits being all 0 or all 1. */ ia_bits = 64 - tsz; /* * Calculate the number of address bits used in the page table * calculation. This is ia_bits minus the bottom granule_shift * bits that are passed to the output address. */ address_bits = ia_bits - granule_shift; /* * Calculate the number of levels. Each level uses * granule_shift - PTE_SHIFT bits of the input address. * This is because the table is 1 << granule_shift and each * entry is 1 << PTE_SHIFT bytes. */ levels = howmany(address_bits, granule_shift - PTE_SHIFT); /* Mask of the upper unused bits in the virtual address */ gla &= (1ul << ia_bits) - 1; hypctx = (struct hypctx *)vcpui; cookie = NULL; /* TODO: Check if the level supports block descriptors */ for (;levels > 0; levels--) { int idx; pte_shift = (levels - 1) * (granule_shift - PTE_SHIFT) + granule_shift; idx = (gla >> pte_shift) & ((1ul << (granule_shift - PTE_SHIFT)) - 1); while (idx > PAGE_SIZE / sizeof(pte)) { idx -= PAGE_SIZE / sizeof(pte); pte_addr += PAGE_SIZE; } ptep = ptp_hold(hypctx->vcpu, pte_addr, PAGE_SIZE, &cookie); if (ptep == NULL) goto error; pte = ptep[idx]; /* Calculate the level we are looking at */ switch (levels) { default: goto fault; /* TODO: Level -1 when FEAT_LPA2 is implemented */ case 4: /* Level 0 */ if ((pte & ATTR_DESCR_MASK) != L0_TABLE) goto fault; /* FALLTHROUGH */ case 3: /* Level 1 */ case 2: /* Level 2 */ switch (pte & ATTR_DESCR_MASK) { /* Use L1 macro as all levels are the same */ case L1_TABLE: /* Check if EL0 can access this address space */ if (is_el0 && (pte & TATTR_AP_TABLE_NO_EL0) != 0) goto fault; /* Check if the address space is writable */ if ((prot & PROT_WRITE) != 0 && (pte & TATTR_AP_TABLE_RO) != 0) goto fault; if ((prot & PROT_EXEC) != 0) { /* Check the table exec attribute */ if ((is_el0 && (pte & TATTR_UXN_TABLE) != 0) || (!is_el0 && (pte & TATTR_PXN_TABLE) != 0)) goto fault; } pte_addr = pte & ~ATTR_MASK; break; case L1_BLOCK: goto done; default: goto fault; } break; case 1: /* Level 3 */ if ((pte & ATTR_DESCR_MASK) == L3_PAGE) goto done; goto fault; } } done: /* Check if EL0 has access to the block/page */ if (is_el0 && (pte & ATTR_S1_AP(ATTR_S1_AP_USER)) == 0) goto fault; if ((prot & PROT_WRITE) != 0 && (pte & ATTR_S1_AP_RW_BIT) != 0) goto fault; if ((prot & PROT_EXEC) != 0) { if ((is_el0 && (pte & ATTR_S1_UXN) != 0) || (!is_el0 && (pte & ATTR_S1_PXN) != 0)) goto fault; } mask = (1ul << pte_shift) - 1; *gpa = (pte & ~ATTR_MASK) | (gla & mask); *is_fault = 0; ptp_release(&cookie); return (0); error: ptp_release(&cookie); return (EFAULT); fault: *is_fault = 1; ptp_release(&cookie); return (0); } int vmmops_run(void *vcpui, register_t pc, pmap_t pmap, struct vm_eventinfo *evinfo) { uint64_t excp_type; int handled; register_t daif; struct hyp *hyp; struct hypctx *hypctx; struct vcpu *vcpu; struct vm_exit *vme; int mode; hypctx = (struct hypctx *)vcpui; hyp = hypctx->hyp; vcpu = hypctx->vcpu; vme = vm_exitinfo(vcpu); hypctx->tf.tf_elr = (uint64_t)pc; for (;;) { if (hypctx->has_exception) { hypctx->has_exception = false; hypctx->elr_el1 = hypctx->tf.tf_elr; mode = hypctx->tf.tf_spsr & (PSR_M_MASK | PSR_M_32); if (mode == PSR_M_EL1t) { hypctx->tf.tf_elr = hypctx->vbar_el1 + 0x0; } else if (mode == PSR_M_EL1h) { hypctx->tf.tf_elr = hypctx->vbar_el1 + 0x200; } else if ((mode & PSR_M_32) == PSR_M_64) { /* 64-bit EL0 */ hypctx->tf.tf_elr = hypctx->vbar_el1 + 0x400; } else { /* 32-bit EL0 */ hypctx->tf.tf_elr = hypctx->vbar_el1 + 0x600; } /* Set the new spsr */ hypctx->spsr_el1 = hypctx->tf.tf_spsr; /* Set the new cpsr */ hypctx->tf.tf_spsr = hypctx->spsr_el1 & PSR_FLAGS; hypctx->tf.tf_spsr |= PSR_DAIF | PSR_M_EL1h; /* * Update fields that may change on exeption entry * based on how sctlr_el1 is configured. */ if ((hypctx->sctlr_el1 & SCTLR_SPAN) != 0) hypctx->tf.tf_spsr |= PSR_PAN; if ((hypctx->sctlr_el1 & SCTLR_DSSBS) == 0) hypctx->tf.tf_spsr &= ~PSR_SSBS; else hypctx->tf.tf_spsr |= PSR_SSBS; } daif = intr_disable(); /* Check if the vcpu is suspended */ if (vcpu_suspended(evinfo)) { intr_restore(daif); vm_exit_suspended(vcpu, pc); break; } if (vcpu_debugged(vcpu)) { intr_restore(daif); vm_exit_debug(vcpu, pc); break; } /* Activate the stage2 pmap so the vmid is valid */ pmap_activate_vm(pmap); hyp->vttbr_el2 = pmap_to_ttbr0(pmap); /* * TODO: What happens if a timer interrupt is asserted exactly * here, but for the previous VM? */ arm64_set_active_vcpu(hypctx); vgic_flush_hwstate(hypctx); /* Call into EL2 to switch to the guest */ excp_type = vmm_call_hyp(HYP_ENTER_GUEST, hyp->el2_addr, hypctx->el2_addr); vgic_sync_hwstate(hypctx); vtimer_sync_hwstate(hypctx); /* * Deactivate the stage2 pmap. vmm_pmap_clean_stage2_tlbi * depends on this meaning we activate the VM before entering * the vm again */ PCPU_SET(curvmpmap, NULL); intr_restore(daif); vmm_stat_incr(vcpu, VMEXIT_COUNT, 1); if (excp_type == EXCP_TYPE_MAINT_IRQ) continue; vme->pc = hypctx->tf.tf_elr; vme->inst_length = INSN_SIZE; vme->u.hyp.exception_nr = excp_type; vme->u.hyp.esr_el2 = hypctx->tf.tf_esr; vme->u.hyp.far_el2 = hypctx->exit_info.far_el2; vme->u.hyp.hpfar_el2 = hypctx->exit_info.hpfar_el2; handled = arm64_handle_world_switch(hypctx, excp_type, vme, pmap); if (handled == UNHANDLED) /* Exit loop to emulate instruction. */ break; else /* Resume guest execution from the next instruction. */ hypctx->tf.tf_elr += vme->inst_length; } return (0); } static void arm_pcpu_vmcleanup(void *arg) { struct hyp *hyp; int i, maxcpus; hyp = arg; maxcpus = vm_get_maxcpus(hyp->vm); for (i = 0; i < maxcpus; i++) { if (arm64_get_active_vcpu() == hyp->ctx[i]) { arm64_set_active_vcpu(NULL); break; } } } void vmmops_vcpu_cleanup(void *vcpui) { struct hypctx *hypctx = vcpui; vtimer_cpucleanup(hypctx); vgic_cpucleanup(hypctx); vmmpmap_remove(hypctx->el2_addr, el2_hypctx_size(), true); free(hypctx, M_HYP); } void vmmops_cleanup(void *vmi) { struct hyp *hyp = vmi; vtimer_vmcleanup(hyp); vgic_vmcleanup(hyp); smp_rendezvous(NULL, arm_pcpu_vmcleanup, NULL, hyp); vmmpmap_remove(hyp->el2_addr, el2_hyp_size(hyp->vm), true); free(hyp, M_HYP); } /* * Return register value. Registers have different sizes and an explicit cast * must be made to ensure proper conversion. */ static uint64_t * hypctx_regptr(struct hypctx *hypctx, int reg) { switch (reg) { case VM_REG_GUEST_X0 ... VM_REG_GUEST_X29: return (&hypctx->tf.tf_x[reg]); case VM_REG_GUEST_LR: return (&hypctx->tf.tf_lr); case VM_REG_GUEST_SP: return (&hypctx->tf.tf_sp); case VM_REG_GUEST_CPSR: return (&hypctx->tf.tf_spsr); case VM_REG_GUEST_PC: return (&hypctx->tf.tf_elr); case VM_REG_GUEST_SCTLR_EL1: return (&hypctx->sctlr_el1); case VM_REG_GUEST_TTBR0_EL1: return (&hypctx->ttbr0_el1); case VM_REG_GUEST_TTBR1_EL1: return (&hypctx->ttbr1_el1); case VM_REG_GUEST_TCR_EL1: return (&hypctx->tcr_el1); case VM_REG_GUEST_TCR2_EL1: return (&hypctx->tcr2_el1); default: break; } return (NULL); } int vmmops_getreg(void *vcpui, int reg, uint64_t *retval) { uint64_t *regp; int running, hostcpu; struct hypctx *hypctx = vcpui; running = vcpu_is_running(hypctx->vcpu, &hostcpu); if (running && hostcpu != curcpu) panic("arm_getreg: %s%d is running", vm_name(hypctx->hyp->vm), vcpu_vcpuid(hypctx->vcpu)); regp = hypctx_regptr(hypctx, reg); if (regp == NULL) return (EINVAL); *retval = *regp; return (0); } int vmmops_setreg(void *vcpui, int reg, uint64_t val) { uint64_t *regp; struct hypctx *hypctx = vcpui; int running, hostcpu; running = vcpu_is_running(hypctx->vcpu, &hostcpu); if (running && hostcpu != curcpu) panic("arm_setreg: %s%d is running", vm_name(hypctx->hyp->vm), vcpu_vcpuid(hypctx->vcpu)); regp = hypctx_regptr(hypctx, reg); if (regp == NULL) return (EINVAL); *regp = val; return (0); } int vmmops_exception(void *vcpui, uint64_t esr, uint64_t far) { struct hypctx *hypctx = vcpui; int running, hostcpu; running = vcpu_is_running(hypctx->vcpu, &hostcpu); if (running && hostcpu != curcpu) panic("%s: %s%d is running", __func__, vm_name(hypctx->hyp->vm), vcpu_vcpuid(hypctx->vcpu)); hypctx->far_el1 = far; hypctx->esr_el1 = esr; hypctx->has_exception = true; return (0); } int vmmops_getcap(void *vcpui, int num, int *retval) { + struct hypctx *hypctx = vcpui; int ret; ret = ENOENT; switch (num) { case VM_CAP_UNRESTRICTED_GUEST: *retval = 1; ret = 0; break; + case VM_CAP_BRK_EXIT: + case VM_CAP_SS_EXIT: + case VM_CAP_MASK_HWINTR: + *retval = (hypctx->setcaps & (1ul << num)) != 0; + break; default: break; } return (ret); } int vmmops_setcap(void *vcpui, int num, int val) { + struct hypctx *hypctx = vcpui; + int ret; + + ret = 0; - return (ENOENT); + switch (num) { + case VM_CAP_BRK_EXIT: + if ((val != 0) == (hypctx->setcaps & (1ul << num)) != 0) + break; + if (val != 0) + hypctx->mdcr_el2 |= MDCR_EL2_TDE; + else + hypctx->mdcr_el2 &= ~MDCR_EL2_TDE; + break; + case VM_CAP_SS_EXIT: + if ((val != 0) == (hypctx->setcaps & (1ul << num)) != 0) + break; + + if (val != 0) { + hypctx->debug_spsr |= (hypctx->tf.tf_spsr & PSR_SS); + hypctx->debug_mdscr |= hypctx->mdscr_el1 & + (MDSCR_SS | MDSCR_KDE); + + hypctx->tf.tf_spsr |= PSR_SS; + hypctx->mdscr_el1 |= MDSCR_SS | MDSCR_KDE; + hypctx->mdcr_el2 |= MDCR_EL2_TDE; + } else { + hypctx->tf.tf_spsr &= ~PSR_SS; + hypctx->tf.tf_spsr |= hypctx->debug_spsr; + hypctx->debug_spsr &= ~PSR_SS; + hypctx->mdscr_el1 &= ~(MDSCR_SS | MDSCR_KDE); + hypctx->mdscr_el1 |= hypctx->debug_mdscr; + hypctx->debug_mdscr &= ~(MDSCR_SS | MDSCR_KDE); + hypctx->mdcr_el2 &= ~MDCR_EL2_TDE; + } + break; + case VM_CAP_MASK_HWINTR: + if ((val != 0) == (hypctx->setcaps & (1ul << num)) != 0) + break; + + if (val != 0) { + hypctx->debug_spsr |= (hypctx->tf.tf_spsr & + (PSR_I | PSR_F)); + hypctx->tf.tf_spsr |= PSR_I | PSR_F; + } else { + hypctx->tf.tf_spsr &= ~(PSR_I | PSR_F); + hypctx->tf.tf_spsr |= (hypctx->debug_spsr & + (PSR_I | PSR_F)); + hypctx->debug_spsr &= ~(PSR_I | PSR_F); + } + break; + default: + ret = ENOENT; + break; + } + + if (ret == 0) { + if (val == 0) + hypctx->setcaps &= ~(1ul << num); + else + hypctx->setcaps |= (1ul << num); + } + + return (ret); } diff --git a/sys/arm64/vmm/vmm_stat.c b/sys/arm64/vmm/vmm_stat.c index 858ce980843a..05ece6f30579 100644 --- a/sys/arm64/vmm/vmm_stat.c +++ b/sys/arm64/vmm/vmm_stat.c @@ -1,165 +1,167 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2011 NetApp, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include #include #include #include #include #include #include #include "vmm_stat.h" /* * 'vst_num_elems' is the total number of addressable statistic elements * 'vst_num_types' is the number of unique statistic types * * It is always true that 'vst_num_elems' is greater than or equal to * 'vst_num_types'. This is because a stat type may represent more than * one element (for e.g. VMM_STAT_ARRAY). */ static int vst_num_elems, vst_num_types; static struct vmm_stat_type *vsttab[MAX_VMM_STAT_ELEMS]; static MALLOC_DEFINE(M_VMM_STAT, "vmm stat", "vmm stat"); #define vst_size ((size_t)vst_num_elems * sizeof(uint64_t)) void vmm_stat_register(void *arg) { struct vmm_stat_type *vst = arg; /* We require all stats to identify themselves with a description */ if (vst->desc == NULL) return; if (vst_num_elems + vst->nelems >= MAX_VMM_STAT_ELEMS) { printf("Cannot accommodate vmm stat type \"%s\"!\n", vst->desc); return; } vst->index = vst_num_elems; vst_num_elems += vst->nelems; vsttab[vst_num_types++] = vst; } int vmm_stat_copy(struct vcpu *vcpu, int index, int count, int *num_stats, uint64_t *buf) { struct vmm_stat_type *vst; uint64_t *stats; int i, tocopy; if (index < 0 || count < 0) return (EINVAL); if (index > vst_num_elems) return (ENOENT); if (index == vst_num_elems) { *num_stats = 0; return (0); } tocopy = min(vst_num_elems - index, count); /* Let stats functions update their counters */ for (i = 0; i < vst_num_types; i++) { vst = vsttab[i]; if (vst->func != NULL) (*vst->func)(vcpu, vst); } /* Copy over the stats */ stats = vcpu_stats(vcpu); memcpy(buf, stats + index, tocopy * sizeof(stats[0])); *num_stats = tocopy; return (0); } void * vmm_stat_alloc(void) { return (malloc(vst_size, M_VMM_STAT, M_WAITOK)); } void vmm_stat_init(void *vp) { bzero(vp, vst_size); } void vmm_stat_free(void *vp) { free(vp, M_VMM_STAT); } int vmm_stat_desc_copy(int index, char *buf, int bufsize) { int i; struct vmm_stat_type *vst; for (i = 0; i < vst_num_types; i++) { vst = vsttab[i]; if (index >= vst->index && index < vst->index + vst->nelems) { if (vst->nelems > 1) { snprintf(buf, bufsize, "%s[%d]", vst->desc, index - vst->index); } else { strlcpy(buf, vst->desc, bufsize); } return (0); /* found it */ } } return (EINVAL); } /* global statistics */ VMM_STAT(VMEXIT_COUNT, "total number of vm exits"); VMM_STAT(VMEXIT_UNKNOWN, "number of vmexits for the unknown exception"); VMM_STAT(VMEXIT_WFI, "number of times wfi was intercepted"); VMM_STAT(VMEXIT_WFE, "number of times wfe was intercepted"); VMM_STAT(VMEXIT_HVC, "number of times hvc was intercepted"); VMM_STAT(VMEXIT_MSR, "number of times msr/mrs was intercepted"); VMM_STAT(VMEXIT_DATA_ABORT, "number of vmexits for a data abort"); VMM_STAT(VMEXIT_INSN_ABORT, "number of vmexits for an instruction abort"); VMM_STAT(VMEXIT_UNHANDLED_SYNC, "number of vmexits for an unhandled synchronous exception"); VMM_STAT(VMEXIT_IRQ, "number of vmexits for an irq"); VMM_STAT(VMEXIT_FIQ, "number of vmexits for an interrupt"); +VMM_STAT(VMEXIT_BRK, "number of vmexits for a breakpoint exception"); +VMM_STAT(VMEXIT_SS, "number of vmexits for a single-step exception"); VMM_STAT(VMEXIT_UNHANDLED_EL2, "number of vmexits for an unhandled EL2 exception"); VMM_STAT(VMEXIT_UNHANDLED, "number of vmexits for an unhandled exception"); diff --git a/sys/arm64/vmm/vmm_stat.h b/sys/arm64/vmm/vmm_stat.h index b0a06ef79253..402fa2f1b1e4 100644 --- a/sys/arm64/vmm/vmm_stat.h +++ b/sys/arm64/vmm/vmm_stat.h @@ -1,145 +1,147 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 2011 NetApp, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #ifndef _VMM_STAT_H_ #define _VMM_STAT_H_ struct vm; #define MAX_VMM_STAT_ELEMS 64 /* arbitrary */ enum vmm_stat_scope { VMM_STAT_SCOPE_ANY, }; struct vmm_stat_type; typedef void (*vmm_stat_func_t)(struct vcpu *vcpu, struct vmm_stat_type *stat); struct vmm_stat_type { int index; /* position in the stats buffer */ int nelems; /* standalone or array */ const char *desc; /* description of statistic */ vmm_stat_func_t func; enum vmm_stat_scope scope; }; void vmm_stat_register(void *arg); #define VMM_STAT_FDEFINE(type, nelems, desc, func, scope) \ struct vmm_stat_type type[1] = { \ { -1, nelems, desc, func, scope } \ }; \ SYSINIT(type##_stat, SI_SUB_KLD, SI_ORDER_ANY, vmm_stat_register, type) #define VMM_STAT_DEFINE(type, nelems, desc, scope) \ VMM_STAT_FDEFINE(type, nelems, desc, NULL, scope) #define VMM_STAT_DECLARE(type) \ extern struct vmm_stat_type type[1] #define VMM_STAT(type, desc) \ VMM_STAT_DEFINE(type, 1, desc, VMM_STAT_SCOPE_ANY) #define VMM_STAT_FUNC(type, desc, func) \ VMM_STAT_FDEFINE(type, 1, desc, func, VMM_STAT_SCOPE_ANY) #define VMM_STAT_ARRAY(type, nelems, desc) \ VMM_STAT_DEFINE(type, nelems, desc, VMM_STAT_SCOPE_ANY) void *vmm_stat_alloc(void); void vmm_stat_init(void *vp); void vmm_stat_free(void *vp); int vmm_stat_copy(struct vcpu *vcpu, int index, int count, int *num_stats, uint64_t *buf); int vmm_stat_desc_copy(int index, char *buf, int buflen); static void __inline vmm_stat_array_incr(struct vcpu *vcpu, struct vmm_stat_type *vst, int statidx, uint64_t x) { #ifdef VMM_KEEP_STATS uint64_t *stats; stats = vcpu_stats(vcpu); if (vst->index >= 0 && statidx < vst->nelems) stats[vst->index + statidx] += x; #endif } static void __inline vmm_stat_array_set(struct vcpu *vcpu, struct vmm_stat_type *vst, int statidx, uint64_t val) { #ifdef VMM_KEEP_STATS uint64_t *stats; stats = vcpu_stats(vcpu); if (vst->index >= 0 && statidx < vst->nelems) stats[vst->index + statidx] = val; #endif } static void __inline vmm_stat_incr(struct vcpu *vcpu, struct vmm_stat_type *vst, uint64_t x) { #ifdef VMM_KEEP_STATS vmm_stat_array_incr(vcpu, vst, 0, x); #endif } static void __inline vmm_stat_set(struct vcpu *vcpu, struct vmm_stat_type *vst, uint64_t val) { #ifdef VMM_KEEP_STATS vmm_stat_array_set(vcpu, vst, 0, val); #endif } VMM_STAT_DECLARE(VMEXIT_COUNT); VMM_STAT_DECLARE(VMEXIT_UNKNOWN); VMM_STAT_DECLARE(VMEXIT_WFI); VMM_STAT_DECLARE(VMEXIT_WFE); VMM_STAT_DECLARE(VMEXIT_HVC); VMM_STAT_DECLARE(VMEXIT_MSR); VMM_STAT_DECLARE(VMEXIT_DATA_ABORT); VMM_STAT_DECLARE(VMEXIT_INSN_ABORT); VMM_STAT_DECLARE(VMEXIT_UNHANDLED_SYNC); VMM_STAT_DECLARE(VMEXIT_IRQ); VMM_STAT_DECLARE(VMEXIT_FIQ); +VMM_STAT_DECLARE(VMEXIT_BRK); +VMM_STAT_DECLARE(VMEXIT_SS); VMM_STAT_DECLARE(VMEXIT_UNHANDLED_EL2); VMM_STAT_DECLARE(VMEXIT_UNHANDLED); #endif