Index: lib/Makefile =================================================================== --- lib/Makefile +++ lib/Makefile @@ -174,6 +174,10 @@ SUBDIR.${MK_BHYVE}+= libvmmapi .endif +.if ${MACHINE_CPUARCH} == "arm" +SUBDIR.${MK_BHYVE}+= libvmmapiarm +.endif + .if ${MACHINE_CPUARCH} != "sparc64" _libproc= libproc _librtld_db= librtld_db Index: lib/libvmmapiarm/Makefile =================================================================== --- lib/libvmmapiarm/Makefile +++ lib/libvmmapiarm/Makefile @@ -0,0 +1,11 @@ +# $FreeBSD$ + +LIB= vmmapiarm +SRCS= vmmapi.c +INCS= vmmapi.h + +WARNS?= 2 + +CFLAGS+= -I${.CURDIR} + +.include Index: lib/libvmmapiarm/vmmapi.h =================================================================== --- lib/libvmmapiarm/vmmapi.h +++ lib/libvmmapiarm/vmmapi.h @@ -0,0 +1,75 @@ +/*- + * Copyright (c) 2011 NetApp, Inc. + * Copyright (C) 2015 Mihai Carabas + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _VMMAPI_H_ +#define _VMMAPI_H_ + +struct vmctx; + +/* + * Different styles of mapping the memory assigned to a VM into the address + * space of the controlling process. + */ +enum vm_mmap_style { + VM_MMAP_NONE, /* no mapping */ + VM_MMAP_ALL, /* fully and statically mapped */ + VM_MMAP_SPARSE, /* mappings created on-demand */ +}; + +int vm_create(const char *name); +struct vmctx *vm_open(const char *name); +void vm_destroy(struct vmctx *ctx); +int vm_get_memory_seg(struct vmctx *ctx, uint64_t gpa, size_t *ret_len); +int vm_setup_memory(struct vmctx *ctx, uint64_t membase, size_t len, enum vm_mmap_style s); +void *vm_map_gpa(struct vmctx *ctx, uint64_t gaddr, size_t len); +uint32_t vm_get_mem_limit(struct vmctx *ctx); +void vm_set_mem_limit(struct vmctx *ctx, uint32_t limit); +int vm_set_register(struct vmctx *ctx, int vcpu, int reg, uint64_t val); +int vm_get_register(struct vmctx *ctx, int vcpu, int reg, uint64_t *retval); +int vm_run(struct vmctx *ctx, int vcpu, uint64_t rip, + struct vm_exit *ret_vmexit); +const char *vm_capability_type2name(int type); +int vm_get_capability(struct vmctx *ctx, int vcpu, enum vm_cap_type cap, + int *retval); +int vm_set_capability(struct vmctx *ctx, int vcpu, enum vm_cap_type cap, + int val); + +/* + * Return a pointer to the statistics buffer. Note that this is not MT-safe. + */ +uint64_t *vm_get_stats(struct vmctx *ctx, int vcpu, struct timeval *ret_tv, + int *ret_entries); +const char *vm_get_stat_desc(struct vmctx *ctx, int index); + +/* Reset vcpu register state */ +int vcpu_reset(struct vmctx *ctx, int vcpu); + +int vm_attach_vgic(struct vmctx *ctx, uint64_t distributor_paddr, uint64_t cpu_int_paddr); + +#endif /* _VMMAPI_H_ */ Index: lib/libvmmapiarm/vmmapi.c =================================================================== --- lib/libvmmapiarm/vmmapi.c +++ lib/libvmmapiarm/vmmapi.c @@ -0,0 +1,369 @@ +/* + * Copyright (C) 2015 Mihai Carabas + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + + + +#include + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "vmmapi.h" + +#define GB (1024 * 1024 * 1024UL) + +struct vmctx { + int fd; + uint32_t mem_limit; + enum vm_mmap_style vms; + size_t mem_size; + uint64_t mem_base; + char *mem_addr; + char *name; +}; + +#define CREATE(x) sysctlbyname("hw.vmm.create", NULL, NULL, (x), strlen((x))) +#define DESTROY(x) sysctlbyname("hw.vmm.destroy", NULL, NULL, (x), strlen((x))) + +static int +vm_device_open(const char *name) +{ + int fd, len; + char *vmfile; + + len = strlen("/dev/vmm/") + strlen(name) + 1; + vmfile = malloc(len); + assert(vmfile != NULL); + snprintf(vmfile, len, "/dev/vmm/%s", name); + + /* Open the device file */ + fd = open(vmfile, O_RDWR, 0); + + free(vmfile); + return (fd); +} + +int +vm_create(const char *name) +{ + + return (CREATE((char *)name)); +} + +struct vmctx * +vm_open(const char *name) +{ + struct vmctx *vm; + + vm = malloc(sizeof(struct vmctx) + strlen(name) + 1); + assert(vm != NULL); + + vm->fd = -1; + vm->mem_limit = 2 * GB; + vm->name = (char *)(vm + 1); + strcpy(vm->name, name); + + if ((vm->fd = vm_device_open(vm->name)) < 0) + goto err; + + return (vm); +err: + vm_destroy(vm); + return (NULL); +} + +void +vm_destroy(struct vmctx *vm) +{ + assert(vm != NULL); + + if (vm->fd >= 0) + close(vm->fd); + DESTROY(vm->name); + + free(vm); +} + +int +vm_get_memory_seg(struct vmctx *ctx, uint64_t gpa, size_t *ret_len) +{ + int error; + struct vm_memory_segment seg; + + bzero(&seg, sizeof(seg)); + seg.gpa = gpa; + error = ioctl(ctx->fd, VM_GET_MEMORY_SEG, &seg); + *ret_len = seg.len; + return (error); +} + +uint32_t +vm_get_mem_limit(struct vmctx *ctx) +{ + + return (ctx->mem_limit); +} + +void +vm_set_mem_limit(struct vmctx *ctx, uint32_t limit) +{ + + ctx->mem_limit = limit; +} + +static int +setup_memory_segment(struct vmctx *ctx, uint64_t gpa, size_t len, char **addr) +{ + int error; + struct vm_memory_segment seg; + + /* + * Create and optionally map 'len' bytes of memory at guest + * physical address 'gpa' + */ + bzero(&seg, sizeof(seg)); + seg.gpa = gpa; + seg.len = len; + error = ioctl(ctx->fd, VM_MAP_MEMORY, &seg); + if (error == 0 && addr != NULL) { + *addr = mmap(NULL, len, PROT_READ | PROT_WRITE, MAP_SHARED, + ctx->fd, gpa); + } + return (error); +} + +int +vm_setup_memory(struct vmctx *ctx, uint64_t membase, size_t memsize, enum vm_mmap_style vms) +{ + char **addr; + int error; + + /* XXX VM_MMAP_SPARSE not implemented yet */ + assert(vms == VM_MMAP_NONE || vms == VM_MMAP_ALL); + ctx->vms = vms; + + ctx->mem_base = membase; + + assert(memsize <= ctx->mem_limit); + ctx->mem_size = memsize; + + if (ctx->mem_size > 0) { + addr = (vms == VM_MMAP_ALL) ? &ctx->mem_addr : NULL; + error = setup_memory_segment(ctx, ctx->mem_base, ctx->mem_size, addr); + if (error) + return (error); + } + + return (0); +} + +void * +vm_map_gpa(struct vmctx *ctx, uint64_t gaddr, size_t len) +{ + + /* XXX VM_MMAP_SPARSE not implemented yet */ + assert(ctx->vms == VM_MMAP_ALL); + + return ((void *)(ctx->mem_addr + gaddr)); +} + + +int +vm_set_register(struct vmctx *ctx, int vcpu, int reg, uint64_t val) +{ + int error; + struct vm_register vmreg; + + bzero(&vmreg, sizeof(vmreg)); + vmreg.cpuid = vcpu; + vmreg.regnum = reg; + vmreg.regval = val; + + error = ioctl(ctx->fd, VM_SET_REGISTER, &vmreg); + return (error); +} + +int +vm_get_register(struct vmctx *ctx, int vcpu, int reg, uint64_t *ret_val) +{ + int error; + struct vm_register vmreg; + + bzero(&vmreg, sizeof(vmreg)); + vmreg.cpuid = vcpu; + vmreg.regnum = reg; + + error = ioctl(ctx->fd, VM_GET_REGISTER, &vmreg); + *ret_val = vmreg.regval; + return (error); +} + +int +vm_run(struct vmctx *ctx, int vcpu, uint64_t pc, struct vm_exit *vmexit) +{ + int error; + struct vm_run vmrun; + + bzero(&vmrun, sizeof(vmrun)); + vmrun.cpuid = vcpu; + vmrun.pc = pc; + + error = ioctl(ctx->fd, VM_RUN, &vmrun); + bcopy(&vmrun.vm_exit, vmexit, sizeof(struct vm_exit)); + return (error); +} + +static struct { + const char *name; + int type; +} capstrmap[] = { + { "hlt_exit", VM_CAP_HALT_EXIT }, + { "mtrap_exit", VM_CAP_MTRAP_EXIT }, + { "pause_exit", VM_CAP_PAUSE_EXIT }, + { "unrestricted_guest", VM_CAP_UNRESTRICTED_GUEST }, + { 0 } +}; + +int +vm_capability_name2type(const char *capname) +{ + int i; + + for (i = 0; capstrmap[i].name != NULL && capname != NULL; i++) { + if (strcmp(capstrmap[i].name, capname) == 0) + return (capstrmap[i].type); + } + + return (-1); +} + +const char * +vm_capability_type2name(int type) +{ + int i; + + for (i = 0; capstrmap[i].name != NULL; i++) { + if (capstrmap[i].type == type) + return (capstrmap[i].name); + } + + return (NULL); +} + +int +vm_get_capability(struct vmctx *ctx, int vcpu, enum vm_cap_type cap, + int *retval) +{ + int error; + struct vm_capability vmcap; + + bzero(&vmcap, sizeof(vmcap)); + vmcap.cpuid = vcpu; + vmcap.captype = cap; + + error = ioctl(ctx->fd, VM_GET_CAPABILITY, &vmcap); + *retval = vmcap.capval; + return (error); +} + +int +vm_set_capability(struct vmctx *ctx, int vcpu, enum vm_cap_type cap, int val) +{ + struct vm_capability vmcap; + + bzero(&vmcap, sizeof(vmcap)); + vmcap.cpuid = vcpu; + vmcap.captype = cap; + vmcap.capval = val; + + return (ioctl(ctx->fd, VM_SET_CAPABILITY, &vmcap)); +} + +uint64_t * +vm_get_stats(struct vmctx *ctx, int vcpu, struct timeval *ret_tv, + int *ret_entries) +{ + int error; + + static struct vm_stats vmstats; + + vmstats.cpuid = vcpu; + + error = ioctl(ctx->fd, VM_STATS, &vmstats); + if (error == 0) { + if (ret_entries) + *ret_entries = vmstats.num_entries; + if (ret_tv) + *ret_tv = vmstats.tv; + return (vmstats.statbuf); + } else + return (NULL); +} + +const char * +vm_get_stat_desc(struct vmctx *ctx, int index) +{ + static struct vm_stat_desc statdesc; + + statdesc.index = index; + if (ioctl(ctx->fd, VM_STAT_DESC, &statdesc) == 0) + return (statdesc.desc); + else + return (NULL); +} + +int +vcpu_reset(struct vmctx *vmctx, int vcpu) +{ + return (ENXIO); +} + +int +vm_attach_vgic(struct vmctx *ctx, uint64_t distributor_paddr, uint64_t cpu_int_paddr) +{ + struct vm_attach_vgic vav; + + bzero(&vav, sizeof(vav)); + vav.distributor_paddr = distributor_paddr; + vav.cpu_int_paddr = cpu_int_paddr; + + return (ioctl(ctx->fd, VM_ATTACH_VGIC, &vav)); +} + + Index: sys/arm/arm/bitops.c =================================================================== --- sys/arm/arm/bitops.c +++ sys/arm/arm/bitops.c @@ -0,0 +1,175 @@ +/* + * Copyright (C) 2017 Nicolae-Alexandru Ivan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +#include +#include + +uint32_t find_next_bit(const uint32_t *addr, uint32_t size, + uint32_t offset) +{ + const uint32_t *p = addr + BIT_WORD(offset); + uint32_t result = offset & ~(BITS_PER_LONG-1); + uint32_t tmp; + + if (offset >= size) + return size; + size -= result; + offset %= BITS_PER_LONG; + if (offset) { + tmp = *(p++); + tmp &= (~0UL << offset); + if (size < BITS_PER_LONG) + goto found_first; + if (tmp) + goto found_middle; + size -= BITS_PER_LONG; + result += BITS_PER_LONG; + } + while (size & ~(BITS_PER_LONG-1)) { + if ((tmp = *(p++))) + goto found_middle; + result += BITS_PER_LONG; + size -= BITS_PER_LONG; + } + if (!size) + return result; + tmp = *p; + +found_first: + tmp &= (~0UL >> (BITS_PER_LONG - size)); + if (tmp == 0UL) /* Are any bits set? */ + return result + size; /* Nope. */ +found_middle: + return result + __ffs(tmp); +} + +/* + * This implementation of find_{first,next}_zero_bit was stolen from + * Linus' asm-alpha/bitops.h. + */ +uint32_t find_next_zero_bit(const uint32_t *addr, uint32_t size, + uint32_t offset) +{ + const uint32_t *p = addr + BIT_WORD(offset); + uint32_t result = offset & ~(BITS_PER_LONG-1); + uint32_t tmp; + + if (offset >= size) + return size; + size -= result; + offset %= BITS_PER_LONG; + if (offset) { + tmp = *(p++); + tmp |= ~0UL >> (BITS_PER_LONG - offset); + if (size < BITS_PER_LONG) + goto found_first; + if (~tmp) + goto found_middle; + size -= BITS_PER_LONG; + result += BITS_PER_LONG; + } + while (size & ~(BITS_PER_LONG-1)) { + if (~(tmp = *(p++))) + goto found_middle; + result += BITS_PER_LONG; + size -= BITS_PER_LONG; + } + if (!size) + return result; + tmp = *p; + +found_first: + tmp |= ~0UL << size; + if (tmp == ~0UL) /* Are any bits zero? */ + return result + size; /* Nope. */ +found_middle: + return result + ffz(tmp); +} + +uint32_t find_first_bit(const uint32_t *addr, uint32_t size) +{ + const uint32_t *p = addr; + uint32_t result = 0; + uint32_t tmp; + + while (size & ~(BITS_PER_LONG-1)) { + if ((tmp = *(p++))) + goto found; + result += BITS_PER_LONG; + size -= BITS_PER_LONG; + } + if (!size) + return result; + + tmp = (*p) & (~0UL >> (BITS_PER_LONG - size)); + if (tmp == 0UL) /* Are any bits set? */ + return result + size; /* Nope. */ +found: + return result + __ffs(tmp); +} + +uint32_t find_first_zero_bit(const uint32_t *addr, uint32_t size) +{ + const uint32_t *p = addr; + uint32_t result = 0; + uint32_t tmp; + + while (size & ~(BITS_PER_LONG-1)) { + if (~(tmp = *(p++))) + goto found; + result += BITS_PER_LONG; + size -= BITS_PER_LONG; + } + if (!size) + return result; + + tmp = (*p) | (~0UL << size); + if (tmp == ~0UL) /* Are any bits zero? */ + return result + size; /* Nope. */ +found: + return result + ffz(tmp); +} + +void +bitmap_and(uint32_t *dst, const uint32_t *src1, const uint32_t *src2, int bits) +{ + int k; + int nr = bits / BITS_PER_LONG; + + for (k = 0; k < nr; k++) + dst[k] = src1[k] & src2[k]; +} + +void +bitmap_or(uint32_t *dst, const uint32_t *src1, const uint32_t *src2, int bits) +{ + int k; + int nr = bits / BITS_PER_LONG; + + for (k = 0; k < nr; k++) + dst[k] = src1[k] | src2[k]; +} Index: sys/arm/arm/gic.h =================================================================== --- sys/arm/arm/gic.h +++ sys/arm/arm/gic.h @@ -66,6 +66,10 @@ bus_space_tag_t gic_d_bst; bus_space_handle_t gic_c_bsh; bus_space_handle_t gic_d_bsh; +#ifdef VMM_ARM_VGIC + bus_space_tag_t gic_h_bst; + bus_space_handle_t gic_h_bsh; +#endif uint8_t ver; struct mtx mutex; uint32_t nirqs; @@ -103,4 +107,6 @@ int arm_gicv2m_attach(device_t); int arm_gic_intr(void *); +struct arm_gic_softc *get_arm_gic_sc(void); + #endif /* _ARM_GIC_H_ */ Index: sys/arm/arm/gic.c =================================================================== --- sys/arm/arm/gic.c +++ sys/arm/arm/gic.c @@ -182,6 +182,12 @@ } #endif +struct arm_gic_softc * +get_arm_gic_sc(void) +{ + return gic_sc; +} + static uint8_t gic_cpu_mask(struct arm_gic_softc *sc) { @@ -1099,7 +1105,7 @@ if (CPU_ISSET(i, &cpus)) val |= arm_gic_map[i] << GICD_SGI_TARGET_SHIFT; - gic_d_write_4(sc, GICD_SGIR, val | gi->gi_irq); + gic_d_write_4(sc, GICD_SGIR(0), val | gi->gi_irq); } static int @@ -1232,7 +1238,7 @@ if (CPU_ISSET(i, &cpus)) val |= arm_gic_map[i] << GICD_SGI_TARGET_SHIFT; - gic_d_write_4(sc, GICD_SGIR, val | ipi); + gic_d_write_4(sc, GICD_SGIR(0), val | ipi); } static int Index: sys/arm/arm/gic_common.h =================================================================== --- sys/arm/arm/gic_common.h +++ sys/arm/arm/gic_common.h @@ -73,6 +73,7 @@ #define GICD_ICENABLER(n) (0x0180 + (((n) >> 5) * 4)) /* v1 ICDICER */ #define GICD_ISPENDR(n) (0x0200 + (((n) >> 5) * 4)) /* v1 ICDISPR */ #define GICD_ICPENDR(n) (0x0280 + (((n) >> 5) * 4)) /* v1 ICDICPR */ +#define GICD_ISACTIVER(n) (0x0300 + ((n) * 4)) /* v1 ICDABR */ #define GICD_ICACTIVER(n) (0x0380 + (((n) >> 5) * 4)) /* v1 ICDABR */ #define GICD_IPRIORITYR(n) (0x0400 + (((n) >> 2) * 4)) /* v1 ICDIPR */ #define GICD_I_PER_IPRIORITYn 4 @@ -87,7 +88,8 @@ #define GICD_ICFGR_TRIG_LVL (0 << 1) #define GICD_ICFGR_TRIG_EDGE (1 << 1) #define GICD_ICFGR_TRIG_MASK 0x2 -#define GICD_SGIR 0x0F00 /* v1 ICDSGIR */ +#define GICD_SGIR(n) (0x0F00 + ((n) * 4)) /* v1 ICDSGIR */ + #define GICD_SGI_TARGET_SHIFT 16 #endif /* _GIC_COMMON_H_ */ Index: sys/arm/conf/GENERIC =================================================================== --- sys/arm/conf/GENERIC +++ sys/arm/conf/GENERIC @@ -23,7 +23,7 @@ cpu CPU_CORTEXA options SMP_ON_UP machine arm armv6 -makeoptions CONF_CFLAGS="-march=armv7a" +makeoptions CONF_CFLAGS="-march=armv7a -mcpu=cortex-a7" makeoptions KERNVIRTADDR=0xc0000000 options KERNVIRTADDR=0xc0000000 @@ -228,6 +228,9 @@ # Extensible Firmware Interface options EFI +# VMM ARM VGIC +options VMM_ARM_VGIC + # Flattened Device Tree options FDT # Configure using FDT/DTB data makeoptions MODULES_EXTRA="dtb/allwinner dtb/am335x dtb/nvidia dtb/rpi dtb/omap4" Index: sys/arm/include/bitops.h =================================================================== --- sys/arm/include/bitops.h +++ sys/arm/include/bitops.h @@ -0,0 +1,147 @@ +/* + * Copyright (C) 2017 Nicolae-Alexandru Ivan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef _ARM_BITOPS_H_ +#define _ARM_BITOPS_H_ + +#define BITS_PER_LONG 32 +#define BIT(nr) (1UL << (nr)) +#define BIT_MASK(nr) (1UL << ((nr) & (BITS_PER_LONG-1))) +#define BIT_WORD(nr) ((nr) / BITS_PER_LONG) + +inline void set_bit(int bit, uint32_t *addr) +{ + uint32_t mask = BIT_MASK(bit); + uint32_t *p = addr + BIT_WORD(bit); + + *p |= mask; +} + +inline void clear_bit(int bit, uint32_t *addr) +{ + uint32_t mask = BIT_MASK(bit); + uint32_t *p = addr + BIT_WORD(bit); + + *p &= ~mask; +} + +inline void change_bit(int bit, uint32_t *addr) +{ + uint32_t mask = BIT_MASK(bit); + uint32_t *p = addr + BIT_WORD(bit); + + *p ^= mask; +} + +inline int test_and_set_bit(int bit, uint32_t *addr) +{ + uint32_t mask = BIT_MASK(bit); + uint32_t *p = addr + BIT_WORD(bit); + uint32_t old = *p; + + *p = old | mask; + return ((old & mask) != 0); +} + +inline int test_and_clear_bit(int bit, uint32_t *addr) +{ + uint32_t mask = BIT_MASK(bit); + uint32_t *p = addr + BIT_WORD(bit); + uint32_t old = *p; + + *p = old & ~mask; + return ((old & mask) != 0); +} + +inline int test_and_change_bit(int bit, uint32_t *addr) +{ + uint32_t mask = BIT_MASK(bit); + uint32_t *p = addr + BIT_WORD(bit); + uint32_t old = *p; + + *p = old ^ mask; + return ((old & mask) != 0); +} + +inline int test_bit(int bit, const uint32_t *addr) +{ + return 1UL & (*(addr + BIT_WORD(bit)) + >> (bit & (BITS_PER_LONG-1))); +} + +inline int fls(int x) +{ + int ret; + + __asm("clz\t%0, %1" : "=r" (ret) : "r" (x)); + ret = BITS_PER_LONG - ret; + return ret; +} + +#define __fls(x) (fls(x) - 1) +#define ffs(x) ({ uint32_t __t = (x); fls(__t & -__t); }) +#define __ffs(x) (ffs(x) - 1) +#define ffz(x) __ffs( ~(x) ) + +uint32_t find_next_bit(const uint32_t *addr, uint32_t size, + uint32_t offset); + +uint32_t find_next_zero_bit(const uint32_t *addr, uint32_t size, + uint32_t offset); + +uint32_t find_first_bit(const uint32_t *addr, uint32_t size); + +uint32_t find_first_zero_bit(const uint32_t *addr, uint32_t size); + +#define for_each_set_bit(bit, addr, size) \ + for ((bit) = find_first_bit((addr), (size)); \ + (bit) < (size); \ + (bit) = find_next_bit((addr), (size), (bit) + 1)) + +/* same as for_each_set_bit() but use bit as value to start with */ +#define for_each_set_bit_from(bit, addr, size) \ + for ((bit) = find_next_bit((addr), (size), (bit)); \ + (bit) < (size); \ + (bit) = find_next_bit((addr), (size), (bit) + 1)) + +#define for_each_clear_bit(bit, addr, size) \ + for ((bit) = find_first_zero_bit((addr), (size)); \ + (bit) < (size); \ + (bit) = find_next_zero_bit((addr), (size), (bit) + 1)) + +/* same as for_each_clear_bit() but use bit as value to start with */ +#define for_each_clear_bit_from(bit, addr, size) \ + for ((bit) = find_next_zero_bit((addr), (size), (bit)); \ + (bit) < (size); \ + (bit) = find_next_zero_bit((addr), (size), (bit) + 1)) + +void bitmap_and(uint32_t *dst, const uint32_t *src1, + const uint32_t *src2, int bits); + +void bitmap_or(uint32_t *dst, const uint32_t *src1, + const uint32_t *src2, int bits); + +#endif /* _ARM_BITOPS_H_ */ Index: sys/arm/include/vmm.h =================================================================== --- sys/arm/include/vmm.h +++ sys/arm/include/vmm.h @@ -0,0 +1,385 @@ +/* + * Copyright (C) 2015 Mihai Carabas + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef _VMM_H_ +#define _VMM_H_ + + +enum vm_suspend_how { + VM_SUSPEND_NONE, + VM_SUSPEND_RESET, + VM_SUSPEND_POWEROFF, + VM_SUSPEND_HALT, + VM_SUSPEND_TRIPLEFAULT, + VM_SUSPEND_LAST +}; + +/* + * Identifiers for architecturally defined registers. + */ +enum vm_reg_name { + VM_REG_GUEST_R0, + VM_REG_GUEST_R1, + VM_REG_GUEST_R2, + VM_REG_GUEST_R3, + VM_REG_GUEST_R4, + VM_REG_GUEST_R5, + VM_REG_GUEST_R6, + VM_REG_GUEST_R7, + VM_REG_GUEST_R8, + VM_REG_GUEST_R9, + VM_REG_GUEST_R10, + VM_REG_GUEST_R11, + VM_REG_GUEST_R12, + VM_REG_GUEST_SP, + VM_REG_GUEST_LR, + VM_REG_GUEST_PC, + VM_REG_GUEST_CPSR, + VM_REG_GUEST_SP_SVC, + VM_REG_GUEST_LR_SVC, + VM_REG_GUEST_SP_ABT, + VM_REG_GUEST_LR_ABT, + VM_REG_GUEST_SP_UND, + VM_REG_GUEST_LR_UND, + VM_REG_GUEST_SP_IRQ, + VM_REG_GUEST_LR_IRQ, + VM_REG_GUEST_R8_FIQ, + VM_REG_GUEST_R9_FIQ, + VM_REG_GUEST_R10_FIQ, + VM_REG_GUEST_R11_FIQ, + VM_REG_GUEST_R12_FIQ, + VM_REG_GUEST_SP_FIQ, + VM_REG_GUEST_LR_FIQ, + VM_REG_LAST +}; + +#define VM_INTINFO_VECTOR(info) ((info) & 0xff) +#define VM_INTINFO_DEL_ERRCODE 0x800 +#define VM_INTINFO_RSVD 0x7ffff000 +#define VM_INTINFO_VALID 0x80000000 +#define VM_INTINFO_TYPE 0x700 +#define VM_INTINFO_HWINTR (0 << 8) +#define VM_INTINFO_NMI (2 << 8) +#define VM_INTINFO_HWEXCEPTION (3 << 8) +#define VM_INTINFO_SWINTR (4 << 8) + +#ifdef _KERNEL + +#define VM_MAX_NAMELEN 32 + +struct vm; +struct vm_exception; +struct vm_memory_segment; +struct vm_exit; +struct vm_run; +struct vm_object; +struct pmap; + +typedef int (*vmm_init_func_t)(int ipinum); +typedef int (*vmm_cleanup_func_t)(void); +typedef void (*vmm_resume_func_t)(void); +typedef void * (*vmi_init_func_t)(struct vm *vm, struct pmap *pmap); +typedef int (*vmi_run_func_t)(void *vmi, int vcpu, register_t rip, + struct pmap *pmap, void *rendezvous_cookie, + void *suspend_cookie); +typedef void (*vmi_cleanup_func_t)(void *vmi); +typedef int (*vmi_mmap_set_func_t)(void *vmi, uint64_t gpa, + uint64_t hpa, size_t length, + int prot); +typedef uint64_t (*vmi_mmap_get_func_t)(void *vmi, uint64_t ipa); + +typedef int (*vmi_get_register_t)(void *vmi, int vcpu, int num, + uint64_t *retval); +typedef int (*vmi_set_register_t)(void *vmi, int vcpu, int num, + uint64_t val); +typedef int (*vmi_get_cap_t)(void *vmi, int vcpu, int num, int *retval); +typedef int (*vmi_set_cap_t)(void *vmi, int vcpu, int num, int val); +typedef struct vmspace * (*vmi_vmspace_alloc)(vm_offset_t min, vm_offset_t max); +typedef void (*vmi_vmspace_free)(struct vmspace *vmspace); +typedef struct vlapic * (*vmi_vlapic_init)(void *vmi, int vcpu); +typedef void (*vmi_vlapic_cleanup)(void *vmi, struct vlapic *vlapic); + +struct vmm_ops { + vmm_init_func_t init; /* module wide initialization */ + vmm_cleanup_func_t cleanup; + vmm_resume_func_t resume; + + vmi_init_func_t vminit; /* vm-specific initialization */ + vmi_run_func_t vmrun; + vmi_cleanup_func_t vmcleanup; + vmi_mmap_set_func_t vmmapset; + vmi_mmap_get_func_t vmmapget; + vmi_get_register_t vmgetreg; + vmi_set_register_t vmsetreg; + vmi_get_cap_t vmgetcap; + vmi_set_cap_t vmsetcap; +}; + +extern struct vmm_ops vmm_ops_arm; + +int vm_create(const char *name, struct vm **retvm); +void vm_destroy(struct vm *vm); +const char *vm_name(struct vm *vm); +int vm_malloc(struct vm *vm, uint64_t gpa, size_t len); +uint64_t vm_gpa2hpa(struct vm *vm, uint64_t gpa, size_t size); +int vm_gpabase2memseg(struct vm *vm, uint64_t gpabase, + struct vm_memory_segment *seg); +boolean_t vm_mem_allocated(struct vm *vm, uint64_t gpa); +int vm_get_register(struct vm *vm, int vcpu, int reg, uint64_t *retval); +int vm_set_register(struct vm *vm, int vcpu, int reg, uint64_t val); +int vm_run(struct vm *vm, struct vm_run *vmrun); +void* vm_get_cookie(struct vm *vm); +int vm_get_capability(struct vm *vm, int vcpu, int type, int *val); +int vm_set_capability(struct vm *vm, int vcpu, int type, int val); +int vm_activate_cpu(struct vm *vm, int vcpu); +int vm_attach_vgic(struct vm *vm, uint64_t distributor_paddr, uint64_t cpu_int_paddr); +struct vm_exit *vm_exitinfo(struct vm *vm, int vcpuid); +void vm_exit_suspended(struct vm *vm, int vcpuid, uint64_t rip); +void vm_exit_rendezvous(struct vm *vm, int vcpuid, uint64_t rip); +void vm_exit_astpending(struct vm *vm, int vcpuid, uint64_t rip); + +#ifdef _SYS__CPUSET_H_ +/* + * Rendezvous all vcpus specified in 'dest' and execute 'func(arg)'. + * The rendezvous 'func(arg)' is not allowed to do anything that will + * cause the thread to be put to sleep. + * + * If the rendezvous is being initiated from a vcpu context then the + * 'vcpuid' must refer to that vcpu, otherwise it should be set to -1. + * + * The caller cannot hold any locks when initiating the rendezvous. + * + * The implementation of this API may cause vcpus other than those specified + * by 'dest' to be stalled. The caller should not rely on any vcpus making + * forward progress when the rendezvous is in progress. + */ +typedef void (*vm_rendezvous_func_t)(struct vm *vm, int vcpuid, void *arg); +void vm_smp_rendezvous(struct vm *vm, int vcpuid, cpuset_t dest, + vm_rendezvous_func_t func, void *arg); +cpuset_t vm_active_cpus(struct vm *vm); +cpuset_t vm_suspended_cpus(struct vm *vm); +#endif /* _SYS__CPUSET_H_ */ + +static __inline int +vcpu_rendezvous_pending(void *rendezvous_cookie) +{ + + return (*(uintptr_t *)rendezvous_cookie != 0); +} + +static __inline int +vcpu_suspended(void *suspend_cookie) +{ + + return (*(int *)suspend_cookie); +} + +enum vcpu_state { + VCPU_IDLE, + VCPU_FROZEN, + VCPU_RUNNING, + VCPU_SLEEPING, +}; + +int vcpu_set_state(struct vm *vm, int vcpu, enum vcpu_state state, + bool from_idle); +enum vcpu_state vcpu_get_state(struct vm *vm, int vcpu, int *hostcpu); + +static int __inline +vcpu_is_running(struct vm *vm, int vcpu, int *hostcpu) +{ + return (vcpu_get_state(vm, vcpu, hostcpu) == VCPU_RUNNING); +} + +#ifdef _SYS_PROC_H_ +static int __inline +vcpu_should_yield(struct vm *vm, int vcpu) +{ + + if (curthread->td_flags & (TDF_ASTPENDING | TDF_NEEDRESCHED)) + return (1); + else if (curthread->td_owepreempt) + return (1); + else + return (0); +} +#endif + +void *vcpu_stats(struct vm *vm, int vcpu); +void vcpu_notify_event(struct vm *vm, int vcpuid, bool lapic_intr); + +/* + * This function is called after a VM-exit that occurred during exception or + * interrupt delivery through the IDT. The format of 'intinfo' is described + * in Figure 15-1, "EXITINTINFO for All Intercepts", APM, Vol 2. + * + * If a VM-exit handler completes the event delivery successfully then it + * should call vm_exit_intinfo() to extinguish the pending event. For e.g., + * if the task switch emulation is triggered via a task gate then it should + * call this function with 'intinfo=0' to indicate that the external event + * is not pending anymore. + * + * Return value is 0 on success and non-zero on failure. + */ +int vm_exit_intinfo(struct vm *vm, int vcpuid, uint64_t intinfo); + +/* + * This function is called before every VM-entry to retrieve a pending + * event that should be injected into the guest. This function combines + * nested events into a double or triple fault. + * + * Returns 0 if there are no events that need to be injected into the guest + * and non-zero otherwise. + */ +int vm_entry_intinfo(struct vm *vm, int vcpuid, uint64_t *info); + +int vm_get_intinfo(struct vm *vm, int vcpuid, uint64_t *info1, uint64_t *info2); + +enum vm_reg_name vm_segment_name(int seg_encoding); + +struct vm_copyinfo { + uint64_t gpa; + size_t len; + void *hva; + void *cookie; +}; + +int vcpu_trace_exceptions(struct vm *vm, int vcpuid); +#endif /* KERNEL */ + +#define VM_MAXCPU 16 /* maximum virtual cpus */ + +struct vie { + uint8_t access_size:4, sign_extend:1, dir:1, unused:2; + enum vm_reg_name reg; +}; + +/* + * Identifiers for optional vmm capabilities + */ +enum vm_cap_type { + VM_CAP_HALT_EXIT, + VM_CAP_MTRAP_EXIT, + VM_CAP_PAUSE_EXIT, + VM_CAP_UNRESTRICTED_GUEST, + VM_CAP_MAX +}; +enum vm_exitcode { + VM_EXITCODE_BOGUS, + VM_EXITCODE_INST_EMUL, + VM_EXITCODE_HYP, + VM_EXITCODE_MAX +}; + +enum task_switch_reason { + TSR_CALL, + TSR_IRET, + TSR_JMP, + TSR_IDT_GATE, /* task gate in IDT */ +}; + +struct vm_task_switch { + uint16_t tsssel; /* new TSS selector */ + int ext; /* task switch due to external event */ + uint32_t errcode; + int errcode_valid; /* push 'errcode' on the new stack */ + enum task_switch_reason reason; +}; + +struct vm_exit { + enum vm_exitcode exitcode; + int inst_length; + uint64_t pc; + union { + /* + * ARM specific payload. + */ + struct { + uint32_t exception_nr; + uint32_t hsr; /* Hyp Syndrome Register */ + uint32_t hdfar; /* VA at a Data Abort exception */ + uint32_t hifar; /* VA at a Prefetch Abort exception */ + uint32_t hpfar; /* IPA[39:12] at aborts on stage 2 address translations */ + uint32_t mode; + } hyp; + + struct { + uint64_t gpa; + int fault_type; + } paging; + struct { + uint64_t gpa; + struct vie vie; + } inst_emul; + /* + * VMX specific payload. Used when there is no "better" + * exitcode to represent the VM-exit. + */ + struct { + int status; /* vmx inst status */ + /* + * 'exit_reason' and 'exit_qualification' are valid + * only if 'status' is zero. + */ + uint32_t exit_reason; + uint64_t exit_qualification; + /* + * 'inst_error' and 'inst_type' are valid + * only if 'status' is non-zero. + */ + int inst_type; + int inst_error; + } vmx; + /* + * SVM specific payload. + */ + struct { + uint64_t exitcode; + uint64_t exitinfo1; + uint64_t exitinfo2; + } svm; + struct { + uint32_t code; /* ecx value */ + uint64_t wval; + } msr; + struct { + int vcpu; + uint64_t rip; + } spinup_ap; + struct { + uint64_t rflags; + } hlt; + struct { + int vector; + } ioapic_eoi; + struct { + enum vm_suspend_how how; + } suspended; + struct vm_task_switch task_switch; + } u; +}; + +#endif /* _VMM_H_ */ Index: sys/arm/include/vmm_dev.h =================================================================== --- sys/arm/include/vmm_dev.h +++ sys/arm/include/vmm_dev.h @@ -0,0 +1,168 @@ +/* + * Copyright (C) 2015 Mihai Carabas + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef _VMM_DEV_H_ +#define _VMM_DEV_H_ + +#ifdef _KERNEL +void vmmdev_init(void); +int vmmdev_cleanup(void); +#endif + +struct vm_memory_segment { + uint64_t gpa; /* in */ + size_t len; + int wired; +}; + +struct vm_register { + int cpuid; + int regnum; /* enum vm_reg_name */ + uint64_t regval; +}; + +struct vm_run { + int cpuid; + uint64_t pc; + struct vm_exit vm_exit; + +}; + +struct vm_exception { + int cpuid; + int vector; + uint32_t error_code; + int error_code_valid; + int restart_instruction; +}; + +struct vm_capability { + int cpuid; + enum vm_cap_type captype; + int capval; + int allcpus; +}; + +#define MAX_VM_STATS 64 +struct vm_stats { + int cpuid; /* in */ + int num_entries; /* out */ + struct timeval tv; + uint64_t statbuf[MAX_VM_STATS]; +}; +struct vm_stat_desc { + int index; /* in */ + char desc[128]; /* out */ +}; + + +struct vm_suspend { + enum vm_suspend_how how; +}; + +struct vm_gla2gpa { + int vcpuid; /* inputs */ + int prot; /* PROT_READ or PROT_WRITE */ + uint64_t gla; + int fault; /* outputs */ + uint64_t gpa; +}; + +struct vm_activate_cpu { + int vcpuid; +}; + +struct vm_attach_vgic { + uint64_t distributor_paddr; + uint64_t cpu_int_paddr; +}; + +#define VM_ACTIVE_CPUS 0 +#define VM_SUSPENDED_CPUS 1 + +enum { + /* general routines */ + IOCNUM_ABIVERS = 0, + IOCNUM_RUN = 1, + IOCNUM_SET_CAPABILITY = 2, + IOCNUM_GET_CAPABILITY = 3, + IOCNUM_SUSPEND = 4, + IOCNUM_REINIT = 5, + + /* memory apis */ + IOCNUM_MAP_MEMORY = 10, + IOCNUM_GET_MEMORY_SEG = 11, + IOCNUM_GET_GPA_PMAP = 12, + IOCNUM_GLA2GPA = 13, + + /* register/state accessors */ + IOCNUM_SET_REGISTER = 20, + IOCNUM_GET_REGISTER = 21, + + /* statistics */ + IOCNUM_VM_STATS = 50, + IOCNUM_VM_STAT_DESC = 51, + + /* vm_cpuset */ + IOCNUM_ACTIVATE_CPU = 90, + IOCNUM_GET_CPUSET = 91, + + /* vm_attach_vgic */ + IOCNUM_ATTACH_VGIC = 110, +}; + +#define VM_RUN \ + _IOWR('v', IOCNUM_RUN, struct vm_run) +#define VM_SUSPEND \ + _IOW('v', IOCNUM_SUSPEND, struct vm_suspend) +#define VM_REINIT \ + _IO('v', IOCNUM_REINIT) +#define VM_MAP_MEMORY \ + _IOWR('v', IOCNUM_MAP_MEMORY, struct vm_memory_segment) +#define VM_GET_MEMORY_SEG \ + _IOWR('v', IOCNUM_GET_MEMORY_SEG, struct vm_memory_segment) +#define VM_SET_REGISTER \ + _IOW('v', IOCNUM_SET_REGISTER, struct vm_register) +#define VM_GET_REGISTER \ + _IOWR('v', IOCNUM_GET_REGISTER, struct vm_register) +#define VM_SET_CAPABILITY \ + _IOW('v', IOCNUM_SET_CAPABILITY, struct vm_capability) +#define VM_GET_CAPABILITY \ + _IOWR('v', IOCNUM_GET_CAPABILITY, struct vm_capability) +#define VM_STATS \ + _IOWR('v', IOCNUM_VM_STATS, struct vm_stats) +#define VM_STAT_DESC \ + _IOWR('v', IOCNUM_VM_STAT_DESC, struct vm_stat_desc) +#define VM_GLA2GPA \ + _IOWR('v', IOCNUM_GLA2GPA, struct vm_gla2gpa) +#define VM_ACTIVATE_CPU \ + _IOW('v', IOCNUM_ACTIVATE_CPU, struct vm_activate_cpu) +#define VM_GET_CPUS \ + _IOW('v', IOCNUM_GET_CPUSET, struct vm_cpuset) +#define VM_ATTACH_VGIC \ + _IOW('v', IOCNUM_ATTACH_VGIC, struct vm_attach_vgic) + +#endif Index: sys/arm/include/vmm_instruction_emul.h =================================================================== --- sys/arm/include/vmm_instruction_emul.h +++ sys/arm/include/vmm_instruction_emul.h @@ -0,0 +1,55 @@ +/* + * Copyright (C) 2015 Mihai Carabas + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef _VMM_INSTRUCTION_EMUL_H_ +#define _VMM_INSTRUCTION_EMUL_H_ + +#include + +/* + * Callback functions to read and write memory regions. + */ +typedef int (*mem_region_read_t)(void *vm, int cpuid, uint64_t gpa, + uint64_t *rval, int rsize, void *arg); + +typedef int (*mem_region_write_t)(void *vm, int cpuid, uint64_t gpa, + uint64_t wval, int wsize, void *arg); + +/* + * Emulate the decoded 'vie' instruction. + * + * The callbacks 'mrr' and 'mrw' emulate reads and writes to the memory region + * containing 'gpa'. 'mrarg' is an opaque argument that is passed into the + * callback functions. + * + * 'void *vm' should be 'struct vm *' when called from kernel context and + * 'struct vmctx *' when called from user context. + * s + */ +int vmm_emulate_instruction(void *vm, int cpuid, uint64_t gpa, struct vie *vie, + mem_region_read_t mrr, mem_region_write_t mrw, void *mrarg); + +#endif /* _VMM_INSTRUCTION_EMUL_H_ */ Index: sys/arm/vmm/arm.h =================================================================== --- sys/arm/vmm/arm.h +++ sys/arm/vmm/arm.h @@ -0,0 +1,125 @@ +/* + * Copyright (C) 2015 Mihai Carabas + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "mmu.h" +#include "vgic.h" +#include + +struct hypctx { + uint32_t vcpu; + struct hyp* hyp; + + uint32_t hcr; + + uint32_t midr; + uint32_t mpidr; + + struct reg regs; + + uint32_t sp_und; + uint32_t lr_und; + uint32_t spsr_und; + + uint32_t sp_svc; + uint32_t lr_svc; + uint32_t spsr_svc; + + uint32_t sp_abt; + uint32_t lr_abt; + uint32_t spsr_abt; + + uint32_t sp_irq; + uint32_t lr_irq; + uint32_t spsr_irq; + + uint32_t sp_fiq; + uint32_t lr_fiq; + uint32_t spsr_fiq; + uint32_t r8_fiq; + uint32_t r9_fiq; + uint32_t r10_fiq; + uint32_t r11_fiq; + uint32_t r12_fiq; + + uint32_t cp15_sctlr; + uint32_t cp15_cpacr; + uint32_t cp15_ttbcr; + uint32_t cp15_dacr; + uint64_t cp15_ttbr0; + uint64_t cp15_ttbr1; + uint32_t cp15_prrr; + uint32_t cp15_nmrr; + uint32_t cp15_csselr; + uint32_t cp15_cid; + uint32_t cp15_tid_urw; + uint32_t cp15_tid_uro; + uint32_t cp15_tid_priv; + uint32_t cp15_dfsr; + uint32_t cp15_ifsr; + uint32_t cp15_adfsr; + uint32_t cp15_aifsr; + uint32_t cp15_dfar; + uint32_t cp15_ifar; + uint32_t cp15_vbar; + uint32_t cp15_cntkctl; + uint64_t cp15_par; + uint32_t cp15_amair0; + uint32_t cp15_amair1; + struct { + uint32_t hsr; /* Hyp Syndrome Register */ + uint32_t hdfar; /* VA at a Data Abort exception */ + uint32_t hifar; /* VA at a Prefetch Abort exception */ + uint32_t hpfar; /* IPA[39:12] at aborts on stage 2 address translations */ + } exit_info; + struct vgic_cpu_int vgic_cpu_int; +}; + +struct hyp { + lpae_pd_entry_t l1pd[2 * LPAE_L1_ENTRIES]; + lpae_pd_entry_t vttbr; + uint64_t vmid_generation; + struct vm *vm; + lpae_pd_entry_t l1pd_phys; + struct hypctx ctx[VM_MAXCPU]; + bool vgic_attached; + struct vgic_distributor vgic_distributor; +}; +CTASSERT((offsetof(struct hyp, l1pd) & PAGE_MASK) == 0); + +uint64_t vmm_call_hyp(void *hyp_func_addr, ...); + +extern void vmm_stub_install(void *hypervisor_stub_vect); +extern int hyp_enter_guest(struct hypctx *hypctx); + +#define LOW(x) (x & 0xFFFFFFFF) +#define HIGH(x) LOW(x >> 32) + +#define VMID_GENERATION_MASK ((1UL<<8) - 1) +#define BUILD_VTTBR(VMID, PTADDR) ((VMID << 48) | PTADDR); + +#define MPIDR_SMP_MASK (0x3 << 30) +#define MPIDR_AFF1_LEVEL(x) ((x >> 2) << 8) +#define MPIDR_AFF0_LEVEL(x) ((x & 0x3) << 0) Index: sys/arm/vmm/arm.c =================================================================== --- sys/arm/vmm/arm.c +++ sys/arm/vmm/arm.c @@ -0,0 +1,682 @@ +/* + * Copyright (C) 2015 Mihai Carabas + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +#include +#include + +#include +#include +#include +#include + +#include "mmu.h" +#include "arm.h" +#include "hyp.h" +#include "vgic.h" + +#define HANDLED 1 +#define UNHANDLED 0 + +MALLOC_DEFINE(M_HYP, "ARM VMM HYP", "ARM VMM HYP"); + +extern char init_hyp_vector[]; +extern char hyp_vector[]; +extern char hyp_code_start[]; +extern char hypervisor_stub_vect[]; +extern char hypmode_enabled[]; + +lpae_pd_entry_t *hyp_l1pd; +char *stack; + +static uint64_t vmid_generation = 0; +static struct mtx vmid_generation_mtx; + +static void set_vttbr(struct hyp* hyp) { + if (hyp->vmid_generation && + ((hyp->vmid_generation & ~VMID_GENERATION_MASK) != + (atomic_load_64(&vmid_generation) & ~VMID_GENERATION_MASK))) + goto out; + + mtx_lock(&vmid_generation_mtx); + + /* Another VCPU has change the VMID already */ + if (hyp->vmid_generation && + ((hyp->vmid_generation & ~VMID_GENERATION_MASK) != + (vmid_generation & ~VMID_GENERATION_MASK))) { + mtx_unlock(&vmid_generation_mtx); + goto out; + } + + vmid_generation++; + if (!(vmid_generation & VMID_GENERATION_MASK)) + vmid_generation++; + + hyp->vmid_generation = vmid_generation; + mtx_unlock(&vmid_generation_mtx); +out: + hyp->vttbr = BUILD_VTTBR((hyp->vmid_generation & VMID_GENERATION_MASK), hyp->l1pd_phys); +} + +static int +arm_init(int ipinum) +{ + char *stack_top; + lpae_vm_paddr_t phys_hyp_l1pd, phys_check; + + if (hypmode_enabled[0]) { + printf("arm_init: processor didn't boot in HYP-mode (no support)\n"); + return (ENXIO); + } + + mtx_init(&vmid_generation_mtx, "vmid_generation_mtx", NULL, MTX_DEF); + + stack = malloc(PAGE_SIZE, M_HYP, M_WAITOK | M_ZERO); + stack_top = stack + PAGE_SIZE; + + hyp_l1pd = malloc(2 * LPAE_L1_ENTRIES * sizeof(lpae_pd_entry_t), + M_HYP, M_WAITOK | M_ZERO); + + lpae_vmmmap_set(NULL, + (lpae_vm_vaddr_t)stack, + (lpae_vm_paddr_t)vtophys(stack), + PAGE_SIZE, + VM_PROT_READ | VM_PROT_WRITE); + + printf("%s hyp_code_start: %p, phys_hyp_code_start: %p\n", __func__, + (void*) hyp_code_start, (void*)vtophys(hyp_code_start)); + + /* + * Create two mappings: + * - one identity - VA == PA + * - one normal mappings to HYP pagetable + */ + lpae_vmmmap_set(NULL, + (lpae_vm_vaddr_t)hyp_code_start, + (lpae_vm_paddr_t)vtophys(hyp_code_start), + PAGE_SIZE, + VM_PROT_READ | VM_PROT_WRITE); + + lpae_vmmmap_set(NULL, + (lpae_vm_vaddr_t)vtophys(hyp_code_start), + (lpae_vm_paddr_t)vtophys(hyp_code_start), + PAGE_SIZE, + VM_PROT_READ | VM_PROT_WRITE); + /* + * Flush all caches to be sure we tables in physical memory + */ + armv7_idcache_wbinv_all(); + cpu_l2cache_wbinv_all(); + + dump_lpae_mapping(NULL); + + phys_check = lpae_vmmmap_get(NULL, (lpae_vm_vaddr_t)hyp_code_start); + if (phys_check != (lpae_vm_vaddr_t)vtophys(hyp_code_start)) { + printf ("%s lpae_vmmmap_get returned %p instead of %p\n", + __func__, + (void*) phys_check, + (void*) vtophys(hyp_code_start)); + } + /* + * Install the temporary vector from which + * will do the initialization part of VMM + */ + printf("%s init_hyp_vector: %p\n", __func__, (void*) &init_hyp_vector[0]); + + vmm_call_hyp((void *)vtophys(&init_hyp_vector[0])); + + /* + * Special init call to activate the MMU + * and change the exception vector. + * - r0 - first parameter unused + * - r1 - stack pointer + * - r2 - lower 32 bits for the HTTBR + * - r3 - upper 32 bits for the HTTBR + */ + + phys_hyp_l1pd = (lpae_vm_paddr_t)vtophys(hyp_l1pd); + + printf("%s hyp_l1pd: %p, phys_hyp_l1pd %p\n", __func__, (void*) hyp_l1pd, (void*)phys_hyp_l1pd); + vmm_call_hyp(&hyp_vector[0], stack_top, LOW(phys_hyp_l1pd), HIGH(phys_hyp_l1pd)); + + /* Initialize VGIC infrastructure */ + if (vgic_hyp_init()) { + return (ENXIO); + } + + return 0; +} + +static int +arm_cleanup(void) +{ + printf("%s before vmm_call_hyp\n", __func__); + vmm_call_hyp((void *) vtophys(vmm_stub_install), (void *)vtophys(&hypervisor_stub_vect[0])); + printf("%s after vmm_call_hyp\n", __func__); + + printf("%s before freestack\n", __func__); + free(stack, M_HYP); + + printf("%s before lpae_vmcleanup\n", __func__); + lpae_vmcleanup(NULL); + + free(hyp_l1pd, M_HYP); + + mtx_destroy(&vmid_generation_mtx); + + return 0; +} + +static void +arm_restore(void) +{ + + ; +} + +static void * +arm_vminit(struct vm *vm, pmap_t pmap) +{ + struct hyp *hyp; + struct hypctx *hypctx; + int i; + + hyp = malloc(sizeof(struct hyp), M_HYP, M_WAITOK | M_ZERO); + if ((uintptr_t)hyp & PAGE_MASK) { + panic("malloc of struct hyp not aligned on %d byte boundary", + PAGE_SIZE); + } + hyp->vm = vm; + + hyp->vgic_attached = false; + + mtx_init(&hyp->vgic_distributor.distributor_lock, "Distributor Lock", "", MTX_SPIN); + + hyp->l1pd_phys = (lpae_pd_entry_t) vtophys(&hyp->l1pd[0]); + set_vttbr(hyp); + + for (i = 0; i < VM_MAXCPU; i++) { + hypctx = &hyp->ctx[i]; + hypctx->vcpu = i; + hypctx->hyp = hyp; + hypctx->hcr = HCR_GUEST_MASK & ~HCR_TSW & ~HCR_TAC & ~HCR_IMO & ~HCR_FMO; + hypctx->midr = cpu_ident(); + hypctx->mpidr = (cp15_mpidr_get() & MPIDR_SMP_MASK) | + MPIDR_AFF1_LEVEL(i) | + MPIDR_AFF0_LEVEL(i); + hypctx->regs.r_cpsr = PSR_SVC32_MODE | PSR_A | PSR_I | PSR_F; + } + + lpae_vmmmap_set(NULL, + (lpae_vm_vaddr_t)hyp, + (lpae_vm_paddr_t)vtophys(hyp), + sizeof(struct hyp), + VM_PROT_READ | VM_PROT_WRITE); + + /* Map Timer0 SP804 */ + lpae_vmmmap_set(hyp, + (lpae_vm_vaddr_t)0x1c110000, + (lpae_vm_paddr_t)0x1c110000, + PAGE_SIZE, + VM_PROT_READ | VM_PROT_WRITE); + + lpae_vmmmap_set(hyp, + (lpae_vm_vaddr_t)0x1c120000, + (lpae_vm_paddr_t)0x1c120000, + PAGE_SIZE, + VM_PROT_READ | VM_PROT_WRITE); + + + return (hyp); +} + +static enum vm_reg_name +get_vm_reg_name(uint32_t reg_nr, uint32_t mode) +{ + switch(reg_nr) { + case 0: + return VM_REG_GUEST_R0; + case 1: + return VM_REG_GUEST_R1; + case 2: + return VM_REG_GUEST_R2; + case 3: + return VM_REG_GUEST_R3; + case 4: + return VM_REG_GUEST_R4; + case 5: + return VM_REG_GUEST_R5; + case 6: + return VM_REG_GUEST_R6; + case 7: + return VM_REG_GUEST_R7; + case 8: + if (mode == PSR_FIQ32_MODE) + return VM_REG_GUEST_R8_FIQ; + else + return VM_REG_GUEST_R8; + case 9: + if (mode == PSR_FIQ32_MODE) + return VM_REG_GUEST_R9_FIQ; + else + return VM_REG_GUEST_R9; + case 10: + if (mode == PSR_FIQ32_MODE) + return VM_REG_GUEST_R10_FIQ; + else + return VM_REG_GUEST_R10; + case 11: + if (mode == PSR_FIQ32_MODE) + return VM_REG_GUEST_R11_FIQ; + else + return VM_REG_GUEST_R11; + case 12: + if (mode == PSR_FIQ32_MODE) + return VM_REG_GUEST_R12_FIQ; + else + return VM_REG_GUEST_R12; + case 13: + if (mode == PSR_FIQ32_MODE) + return VM_REG_GUEST_SP_FIQ; + else if (mode == PSR_SVC32_MODE) + return VM_REG_GUEST_SP_SVC; + else if (mode == PSR_ABT32_MODE) + return VM_REG_GUEST_SP_ABT; + else if (mode == PSR_UND32_MODE) + return VM_REG_GUEST_SP_UND; + else if (mode == PSR_IRQ32_MODE) + return VM_REG_GUEST_SP_IRQ; + else + return VM_REG_GUEST_SP; + case 14: + if (mode == PSR_FIQ32_MODE) + return VM_REG_GUEST_LR_FIQ; + else if (mode == PSR_SVC32_MODE) + return VM_REG_GUEST_LR_SVC; + else if (mode == PSR_ABT32_MODE) + return VM_REG_GUEST_LR_ABT; + else if (mode == PSR_UND32_MODE) + return VM_REG_GUEST_LR_UND; + else if (mode == PSR_IRQ32_MODE) + return VM_REG_GUEST_LR_IRQ; + else + return VM_REG_GUEST_LR; + } + return VM_REG_LAST; +} + +static int hyp_handle_exception(struct vm_exit *vmexit) +{ + int handled; + int hsr_ec, hsr_il, hsr_iss; + + handled = UNHANDLED; + hsr_ec = HSR_EC(vmexit->u.hyp.hsr); + hsr_il = HSR_IL(vmexit->u.hyp.hsr); + hsr_iss = HSR_ISS(vmexit->u.hyp.hsr); + + switch(hsr_ec) { + case HSR_EC_UNKN: + printf("%s:%d Unknown exception\n",__func__, __LINE__); + break; + case HSR_EC_WFI_WFE: + printf("%s:%d WFI/WFE exception - unimplemented\n", + __func__, __LINE__); + break; + case HSR_EC_MCR_MRC_CP15: + printf("%s:%d MCR/MRC CP15 - unimplemented\n", + __func__, __LINE__); + break; + case HSR_EC_MCRR_MRRC_CP15: + printf("%s:%d MCRR/MRRC CP15 - unimplemented\n", + __func__, __LINE__); + break; + case HSR_EC_MCR_MRC_CP14: + printf("%s:%d MCR/MRC CP14 - unimplemented\n", + __func__, __LINE__); + break; + case HSR_EC_LDC_STC_CP14: + printf("%s:%d LDC/STC CP14 - unimplemented\n", + __func__, __LINE__); + break; + case HSR_EC_HCPTR_CP0_CP13: + printf("%s:%d MCR/MRC CP14 - unimplemented\n", + __func__, __LINE__); + break; + case HSR_EC_MRC_VMRS_CP10: + printf("%s:%d MCR/VMRS CP14 - unimplemented\n", + __func__, __LINE__); + break; + case HSR_EC_BXJ: + printf("%s:%d BXJ - unimplemented\n", + __func__, __LINE__); + break; + case HSR_EC_MRRC_CP14: + printf("%s:%d MRRC CP14 - unimplemented\n", + __func__, __LINE__); + break; + case HSR_EC_SVC: + panic("%s:%d SVC called from hyp-mode\n", + __func__, __LINE__); + break; + case HSR_EC_HVC: + printf("%s:%d HVC called from hyp-mode - unsupported\n", + __func__, __LINE__); + break; + case HSR_EC_SMC: + printf("%s:%d SMC called from hyp-mode - unsupported\n", + __func__, __LINE__); + break; + case HSR_EC_PABT: + printf("%s:%d PABT from guest at address %x - unimplemented\n", + __func__, __LINE__, vmexit->u.hyp.hifar); + break; + case HSR_EC_PABT_HYP: + printf("%s:%d PABT taken from HYP mode at %x with HSR: %x\n", + __func__, __LINE__, vmexit->u.hyp.hifar, vmexit->u.hyp.hsr); + break; + case HSR_EC_DABT: + if (HSR_ISS_ISV(hsr_iss)) { + if (LPAE_TRANSLATION_FAULT(HSR_ISS_DFSC(hsr_iss))) { + /* + * The page is not mapped and a possible MMIO access + * Build the instruction info and return to user to emulate + */ + vmexit->exitcode = VM_EXITCODE_INST_EMUL; + vmexit->u.inst_emul.gpa = ((uint64_t)(vmexit->u.hyp.hpfar >> 4) << 12) | + (vmexit->u.hyp.hdfar & ((1 << 12) - 1)); + vmexit->u.inst_emul.vie.access_size = HSR_ISS_ACCESS_SIZE(HSR_ISS_SAS(hsr_iss)); + vmexit->u.inst_emul.vie.sign_extend = HSR_ISS_SSE(hsr_iss); + vmexit->u.inst_emul.vie.dir = HSR_ISS_WnR(hsr_iss); + vmexit->u.inst_emul.vie.reg = get_vm_reg_name(HSR_ISS_SRT(hsr_iss), + vmexit->u.hyp.mode); +// printf("%s:%d gpa: %llx, as: %d, se: %d, dir: %d, reg: %d\n",__func__, __LINE__, +// vmexit->u.inst_emul.gpa, vmexit->u.inst_emul.vie.access_size, vmexit->u.inst_emul.vie.sign_extend, +// vmexit->u.inst_emul.vie.dir, vmexit->u.inst_emul.vie.reg); + + } else { + printf("%s:%d DABT from guest at address %x with hsr %x with a stage-2 fault != translation\n", + __func__, __LINE__, vmexit->u.hyp.hdfar, vmexit->u.hyp.hsr); + } + } else { + printf("%s:%d DABT from guest at address %x with hsr %x, hpfar: %x without a stage-2 fault translation\n", + __func__, __LINE__, vmexit->u.hyp.hdfar, vmexit->u.hyp.hsr, vmexit->u.hyp.hpfar); + } + break; + case HSR_EC_DABT_HYP: + printf("%s:%d DABT taken from HYP mode at %x with HSR: %x\n", + __func__, __LINE__, vmexit->u.hyp.hdfar, vmexit->u.hyp.hsr); + break; + default: + printf("%s:%d Unknown HSR_EC code: %x\n",__func__, __LINE__, hsr_ec); + break; + } + return handled; +} + +static int +hyp_exit_process(struct hyp *hyp, int vcpu, struct vm_exit *vmexit) +{ + int handled; + struct hypctx *hypctx; + + hypctx = &hyp->ctx[vcpu]; + + handled = UNHANDLED; + + vmexit->exitcode = VM_EXITCODE_BOGUS; + + switch(vmexit->u.hyp.exception_nr) { + case EXCEPTION_UNDEF: + panic("%s undefined exception\n", __func__); + break; + case EXCEPTION_SVC: + panic("%s take SVC exception to hyp mode\n", __func__); + break; + /* The following are in the same category and are distinguished using HSR */ + case EXCEPTION_PABT: + case EXCEPTION_DABT: + case EXCEPTION_HVC: +// printf("%s PABT/DABT/HYP exception\n",__func__); +// printf("%s HSR: %x, HIFAR: %x, HDFAR: %x, HPFAR: %x\n", __func__, +// vmexit->u.hyp.hsr, vmexit->u.hyp.hifar, +// vmexit->u.hyp.hdfar, vmexit->u.hyp.hpfar); + + vmexit->exitcode = VM_EXITCODE_HYP; + handled = hyp_handle_exception(vmexit); + + break; + case EXCEPTION_FIQ: + printf("%s FIQ unsupported exception\n",__func__); + vmexit->exitcode = VM_EXITCODE_HYP; + break; + case EXCEPTION_IRQ: + printf("%s IRQ unsupported exception\n",__func__); + vmexit->exitcode = VM_EXITCODE_HYP; + break; + default: + printf("%s unknown exception: %d\n",__func__, vmexit->u.hyp.exception_nr); + vmexit->exitcode = VM_EXITCODE_HYP; + break; + } + return (handled); +} + +static int +arm_vmrun(void *arg, int vcpu, register_t pc, pmap_t pmap, + void *rend_cookie, void *suspended_cookie) +{ + int rc; + int handled; + struct hyp *hyp; + struct hypctx *hypctx; + struct vm *vm; + struct vm_exit *vmexit; + + hyp = arg; + hypctx = &hyp->ctx[vcpu]; + vm = hyp->vm; + vmexit = vm_exitinfo(vm, vcpu); + + hypctx->regs.r_pc = (uint32_t) pc; + + do { + handled = UNHANDLED; + + vgic_flush_hwstate(hypctx); + + rc = vmm_call_hyp((void *)hyp_enter_guest, hypctx); + + vmexit->pc = hypctx->regs.r_pc; + + vmexit->u.hyp.exception_nr = rc; + vmexit->inst_length = HSR_IL(hypctx->exit_info.hsr) ? 4 : 2; + + vmexit->u.hyp.hsr = hypctx->exit_info.hsr; + vmexit->u.hyp.hifar = hypctx->exit_info.hifar; + vmexit->u.hyp.hdfar = hypctx->exit_info.hdfar; + vmexit->u.hyp.hpfar = hypctx->exit_info.hpfar; + vmexit->u.hyp.mode = hypctx->regs.r_cpsr & PSR_MODE; + + handled = hyp_exit_process(hyp, vcpu, vmexit); + + vgic_sync_hwstate(hypctx); + + } while(handled); + return 0; +} + +static void +arm_vmcleanup(void *arg) +{ + struct hyp *hyp = arg; + + /* Unmap from HYP-mode the hyp tructure */ + lpae_vmmmap_set(NULL, + (lpae_vm_vaddr_t)hyp, + (lpae_vm_paddr_t)vtophys(hyp), + sizeof(struct hyp), + VM_PROT_NONE); + + lpae_vmcleanup(&(hyp->l1pd[0])); + free(hyp, M_HYP); +} + +static uint32_t * +hypctx_regptr(struct hypctx *hypctx, int reg) +{ + + switch (reg) { + case VM_REG_GUEST_R0: + return (&hypctx->regs.r[0]); + case VM_REG_GUEST_R1: + return (&hypctx->regs.r[1]); + case VM_REG_GUEST_R2: + return (&hypctx->regs.r[2]); + case VM_REG_GUEST_R3: + return (&hypctx->regs.r[3]); + case VM_REG_GUEST_R5: + return (&hypctx->regs.r[4]); + case VM_REG_GUEST_R6: + return (&hypctx->regs.r[5]); + case VM_REG_GUEST_R7: + return (&hypctx->regs.r[6]); + case VM_REG_GUEST_R8: + return (&hypctx->regs.r[7]); + case VM_REG_GUEST_R9: + return (&hypctx->regs.r[8]); + case VM_REG_GUEST_R10: + return (&hypctx->regs.r[9]); + case VM_REG_GUEST_R11: + return (&hypctx->regs.r[10]); + case VM_REG_GUEST_R12: + return (&hypctx->regs.r[11]); + case VM_REG_GUEST_SP: + return (&hypctx->regs.r_sp); + case VM_REG_GUEST_LR: + return (&hypctx->regs.r_lr); + case VM_REG_GUEST_PC: + return (&hypctx->regs.r_pc); + case VM_REG_GUEST_CPSR: + return (&hypctx->regs.r_cpsr); + case VM_REG_GUEST_SP_SVC: + return (&hypctx->sp_svc); + case VM_REG_GUEST_LR_SVC: + return (&hypctx->lr_svc); + case VM_REG_GUEST_SP_ABT: + return (&hypctx->sp_abt); + case VM_REG_GUEST_LR_ABT: + return (&hypctx->lr_abt); + case VM_REG_GUEST_SP_UND: + return (&hypctx->sp_und); + case VM_REG_GUEST_LR_UND: + return (&hypctx->lr_und); + case VM_REG_GUEST_SP_IRQ: + return (&hypctx->sp_irq); + case VM_REG_GUEST_LR_IRQ: + return (&hypctx->lr_irq); + case VM_REG_GUEST_R8_FIQ: + return (&hypctx->r8_fiq); + case VM_REG_GUEST_R9_FIQ: + return (&hypctx->r9_fiq); + case VM_REG_GUEST_R10_FIQ: + return (&hypctx->r10_fiq); + case VM_REG_GUEST_R11_FIQ: + return (&hypctx->r11_fiq); + case VM_REG_GUEST_R12_FIQ: + return (&hypctx->r12_fiq); + case VM_REG_GUEST_SP_FIQ: + return (&hypctx->sp_fiq); + case VM_REG_GUEST_LR_FIQ: + return (&hypctx->lr_fiq); + default: + break; + } + return (NULL); +} + +static int +arm_getreg(void *arg, int vcpu, int reg, uint64_t *retval) +{ + uint32_t *regp; + int running, hostcpu; + struct hyp *hyp = arg; + + running = vcpu_is_running(hyp->vm, vcpu, &hostcpu); + if (running && hostcpu != curcpu) + panic("arm_getreg: %s%d is running", vm_name(hyp->vm), vcpu); + + if ((regp = hypctx_regptr(&hyp->ctx[vcpu], reg)) != NULL) { + *retval = *regp; + return (0); + } else + return (EINVAL); +} + +static int +arm_setreg(void *arg, int vcpu, int reg, uint64_t val) +{ + uint32_t *regp; + struct hyp *hyp = arg; + int running, hostcpu; + + running = vcpu_is_running(hyp->vm, vcpu, &hostcpu); + if (running && hostcpu != curcpu) + panic("hyp_setreg: %s%d is running", vm_name(hyp->vm), vcpu); + + if ((regp = hypctx_regptr(&hyp->ctx[vcpu], reg)) != NULL) { + *regp = val; + return (0); + } else + return (EINVAL); +} + +struct vmm_ops vmm_ops_arm = { + arm_init, + arm_cleanup, + arm_restore, + arm_vminit, + arm_vmrun, + arm_vmcleanup, + lpae_vmmmap_set, + lpae_vmmmap_get, + arm_getreg, + arm_setreg, + NULL, /* vmi_get_cap_t */ + NULL /* vmi_set_cap_t */ +}; Index: sys/arm/vmm/hyp.h =================================================================== --- sys/arm/vmm/hyp.h +++ sys/arm/vmm/hyp.h @@ -0,0 +1,250 @@ +/* + * Copyright (C) 2015 Mihai Carabas + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef _VMM_HYP_H_ +#define _VMM_HYP_H_ + +/* Hyp Exceptions */ +#define EXCEPTION_RESET 0 +#define EXCEPTION_UNDEF 1 +#define EXCEPTION_SVC 2 +#define EXCEPTION_PABT 3 +#define EXCEPTION_DABT 4 +#define EXCEPTION_HVC 5 +#define EXCEPTION_FIQ 6 +#define EXCEPTION_IRQ 7 + + + +#define HSR_EC_SHIFT 26 +#define HSR_IL_SHIFT 25 +#define HSR_IL_MASK (1 << HSR_IL_SHIFT) +#define HSR_ISS_MASK ((1 << 25) - 1) + +#define HSR_EC(x) (x >> HSR_EC_SHIFT) +#define HSR_IL(x) ((x & HSR_IL_MASK) >> HSR_IL_SHIFT) +#define HSR_ISS(x) (x & HSR_ISS_MASK) + +#define HSR_EC_UNKN 0x00 +#define HSR_EC_WFI_WFE 0x01 +#define HSR_EC_MCR_MRC_CP15 0x03 +#define HSR_EC_MCRR_MRRC_CP15 0x04 +#define HSR_EC_MCR_MRC_CP14 0x05 +#define HSR_EC_LDC_STC_CP14 0x06 +#define HSR_EC_HCPTR_CP0_CP13 0x07 +#define HSR_EC_MRC_VMRS_CP10 0x08 +#define HSR_EC_BXJ 0x0A +#define HSR_EC_MRRC_CP14 0x0C + +#define HSR_EC_SVC 0x11 +#define HSR_EC_HVC 0x12 +#define HSR_EC_SMC 0x13 +#define HSR_EC_PABT 0x20 +#define HSR_EC_PABT_HYP 0x21 +#define HSR_EC_DABT 0x24 +#define HSR_EC_DABT_HYP 0x25 + +#define HSR_ISS_ISV(x) ((x >> 24) & 1) +#define HSR_ISS_SAS(x) ((x >> 22) & 3) +#define HSR_ISS_SSE(x) ((x >> 21) & 1) +#define HSR_ISS_SRT(x) ((x >> 16) & 0xf) +#define HSR_ISS_EA(x) ((x >> 9) & 1) +#define HSR_ISS_CM(x) ((x >> 8) & 1) +#define HSR_ISS_S1PTW(x) ((x >> 7) & 1) +#define HSR_ISS_WnR(x) ((x >> 6) & 1) +#define HSR_ISS_DFSC(x) ((x >> 0) & 0x3f) + +#define HSR_ISS_ACCESS_SIZE(x) ((x == 0) ? 1 : (x == 1) ? 2 : 4) + + +#define VTTBR_VMID_SHIFT 16 +#define VTTBR_VMID_MASK 0xff + +/* Hyp System Control Register (HSCTLR) bits */ +#define HSCTLR_TE (1 << 30) +#define HSCTLR_EE (1 << 25) +#define HSCTLR_FI (1 << 21) +#define HSCTLR_WXN (1 << 19) +#define HSCTLR_I (1 << 12) +#define HSCTLR_C (1 << 2) +#define HSCTLR_A (1 << 1) +#define HSCTLR_M (1 << 0) +#define HSCTLR_MASK (HSCTLR_M | HSCTLR_A | HSCTLR_C | HSCTLR_I | HSCTLR_WXN | HSCTLR_FI | HSCTLR_EE | HSCTLR_TE) +/* Hyp Coprocessor Trap Register */ +#define HCPTR_TCP(x) (1 << x) +#define HCPTR_TCP_MASK (0x3fff) +#define HCPTR_TASE (1 << 15) +#define HCPTR_TTA (1 << 20) +#define HCPTR_TCPAC (1 << 31) + +/* TTBCR and HTCR Registers bits */ +#define TTBCR_EAE (1 << 31) +#define TTBCR_IMP (1 << 30) +#define TTBCR_SH1 (3 << 28) +#define TTBCR_ORGN1 (3 << 26) +#define TTBCR_IRGN1 (3 << 24) +#define TTBCR_EPD1 (1 << 23) +#define TTBCR_A1 (1 << 22) +#define TTBCR_T1SZ (7 << 16) +#define TTBCR_SH0 (3 << 12) +#define TTBCR_ORGN0 (3 << 10) +#define TTBCR_IRGN0 (3 << 8) +#define TTBCR_EPD0 (1 << 7) +#define TTBCR_T0SZ (7 << 0) +#define HTCR_MASK (TTBCR_T0SZ | TTBCR_IRGN0 | TTBCR_ORGN0 | TTBCR_SH0) + +/* Virtualization Translation Control Register (VTCR) bits */ +#define VTCR_RES (1 << 31) +#define VTCR_SH0 (3 << 12) +#define VTCR_ORGN0 (3 << 10) +#define VTCR_IRGN0 (3 << 8) +#define VTCR_SL0 (3 << 6) +#define VTCR_S (1 << 4) +#define VTCR_T0SZ (0xf) +#define VTCR_MASK (VTCR_SH0 | VTCR_ORGN0 | VTCR_IRGN0 | VTCR_SL0 | VTCR_S | VTCR_T0SZ) +#define VTCR_HTCR_SH (VTCR_SH0 | VTCR_ORGN0 | VTCR_IRGN0) +#define VTCR_SL_L1 (1 << 6) /* Starting-level: 1 */ +/* Stage 2 address input size is 2^(32-VTCR T0SZ) (ARM - B4.1.159) */ +#define VMM_IPA_LEN 32 +#define VMM_VTCR_T0SZ ((32 - VMM_IPA_LEN) & VTCR_T0SZ) +/* The sign bit VTCR.S = VTCR.T0SZ[4] */ +#define VMM_VTCR_S (((VMM_VTCR_T0SZ) << 1) & VTCR_S) + +/* Hyp Configuration Register (HCR) bits */ +#define HCR_TGE (1 << 27) +#define HCR_TVM (1 << 26) +#define HCR_TTLB (1 << 25) +#define HCR_TPU (1 << 24) +#define HCR_TPC (1 << 23) +#define HCR_TSW (1 << 22) +#define HCR_TAC (1 << 21) +#define HCR_TIDCP (1 << 20) +#define HCR_TSC (1 << 19) +#define HCR_TID3 (1 << 18) +#define HCR_TID2 (1 << 17) +#define HCR_TID1 (1 << 16) +#define HCR_TID0 (1 << 15) +#define HCR_TWE (1 << 14) +#define HCR_TWI (1 << 13) +#define HCR_DC (1 << 12) +#define HCR_BSU (3 << 10) +#define HCR_BSU_IS (1 << 10) +#define HCR_FB (1 << 9) +#define HCR_VA (1 << 8) +#define HCR_VI (1 << 7) +#define HCR_VF (1 << 6) +#define HCR_AMO (1 << 5) +#define HCR_IMO (1 << 4) +#define HCR_FMO (1 << 3) +#define HCR_PTW (1 << 2) +#define HCR_SWIO (1 << 1) +#define HCR_VM 1 +/* + * B4.1.65 HCR, Hyp Configuration Register, + * + * HCR_TSW - Trap set/way cache maintenance operations + * HCR_TAC - Trap ACTLR accessses + * HCR_TIDCP - Trap lockdown + * HCR_TSC - Trap SMC instruction + * HCR_TWE - Trap WFE instruction + * HCR_TWI - Trap WFI instruction + * HCR_BSU_IS - Barrier shareability upgrade + * HCR_FB - Force broadcast TLB/branch predictor/ cache invalidate across ISB + * HCR_AMO - Overrides the CPSR.A bit, and enables signaling by the VA bit + * HCR_IMO - Overrides the CPSR.I bit, and enables signaling by the VI bit + * HCR_FMO - Overrides the CPSR.F bit, and enables signaling by the VF bit + * HCR_SWIO - Set/way invalidation override + * HCR_VM - Virtualization MMU enable (stage 2) + */ +#define HCR_GUEST_MASK (HCR_TSW | HCR_TAC | HCR_TIDCP | \ + HCR_TSC | HCR_TWE | HCR_TWI | HCR_BSU_IS | HCR_FB | \ + HCR_AMO | HCR_IMO | HCR_FMO | HCR_SWIO | HCR_VM) + +/* Hyp Coprocessor Trap Register */ +#define HCPTR_TCP(x) (1 << x) +#define HCPTR_TCP_MASK (0x3fff) +#define HCPTR_TASE (1 << 15) +#define HCPTR_TTA (1 << 20) +#define HCPTR_TCPAC (1 << 31) + +/* Hyp System Trap Register */ +#define HSTR_T(x) (1 << x) +#define HSTR_TTEE (1 << 16) +#define HSTR_TJDBX (1 << 17) + +/* + * Memory region attributes for LPAE (defined in pgtable-3level.h): + * + * n = AttrIndx[2:0] + * + * n MAIR + * UNCACHED 000 00000000 + * BUFFERABLE 001 01000100 + * DEV_WC 001 01000100 + * WRITETHROUGH 010 10101010 + * WRITEBACK 011 11101110 + * DEV_CACHED 011 11101110 + * DEV_SHARED 100 00000100 + * DEV_NONSHARED 100 00000100 + * unused 101 + * unused 110 + * WRITEALLOC 111 11111111 + */ +#define MAIR0 0xeeaa4400 +#define MAIR1 0xff000004 +#define HMAIR0 MAIR0 +#define HMAIR1 MAIR1 + +#define HYPCTX_REGS_R(x) (HYPCTX_REGS + x * 4) + +/* GIC Hypervisor specific registers */ +#define GICH_HCR 0x0 +#define GICH_VTR 0x4 +#define GICH_VMCR 0x8 +#define GICH_MISR 0x10 +#define GICH_EISR0 0x20 +#define GICH_EISR1 0x24 +#define GICH_ELSR0 0x30 +#define GICH_ELSR1 0x34 +#define GICH_APR 0xF0 +#define GICH_LR0 0x100 + +#define GICH_HCR_EN (1 << 0) +#define GICH_HCR_UIE (1 << 1) + +#define GICH_LR_VIRTID (0x3FF << 0) +#define GICH_LR_PHYSID_CPUID_SHIFT 10 +#define GICH_LR_PHYSID_CPUID (7 << GICH_LR_PHYSID_CPUID_SHIFT) +#define GICH_LR_STATE (3 << 28) +#define GICH_LR_PENDING (1 << 28) +#define GICH_LR_ACTIVE (1 << 29) +#define GICH_LR_EOI (1 << 19) + +#define GICH_MISR_EOI (1 << 0) +#define GICH_MISR_U (1 << 1) + +#endif + Index: sys/arm/vmm/hyp.S =================================================================== --- sys/arm/vmm/hyp.S +++ sys/arm/vmm/hyp.S @@ -0,0 +1,380 @@ +/* + * Copyright (C) 2015 Mihai Carabas + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "assym.s" +#include +#include +#include +#include +#include + +#include "hyp.h" +#include "hyp_assym.h" +#include "hyp_helpers.h" + +.text + .globl hyp_code_start + .globl hyp_code_end + .globl hyp_vector + .globl init_hyp_vector + + .p2align 12 +hyp_code_start: +ENTRY(vmm_call_hyp) + hvc #0 + bx lr +END(vmm_call_hyp) +/* + * int hyp_enter_guest(struct *hyp_vmxctx); + * - r0 pointer to the struct hyp_vmxctx + */ +ENTRY(hyp_enter_guest) + mcr p15, 4, r0, c13, c0, 2 @ Store hyp_vmxctx into HTPIDR + save_host_regs + + restore_vgic_regs + + /* Save HOST CP15 registers */ + load_cp15_regs_batch1 @ Load in r2-r12 CP15 regs + push {r2-r12} + load_cp15_regs_batch2 @ Load in r2-r12 CP15 regs + push {r2-r12} + load_cp15_regs_batch3 @ Load in r2-r6 CP15 regs + push {r2-r6} + + /* Load GUEST CP15 registers */ + load_guest_cp15_regs_batch1 + store_cp15_regs_batch1 + load_guest_cp15_regs_batch2 + store_cp15_regs_batch2 + load_guest_cp15_regs_batch3 + store_cp15_regs_batch3 + + /* Enable stage-2 MMU, trap interrupts */ + ldr r1, [r0, #HYPCTX_HCR] + mcr p15, 4, r1, c1, c1, 0 + + /* Set MIDR and MPIDR for the Guest */ + ldr r1, [r0, #HYPCTX_MIDR] + mcr p15, 4, r1, c0, c0, 0 + ldr r1, [r0, #HYPCTX_MPIDR] + mcr p15, 4, r1, c0, c0, 5 + + /* Set VTTBR for stage-2 translation */ + ldr r1, [r0, #HYPCTX_HYP] + add r1, r1, #HYP_VTTBR + ldrd r2, r3, [r1] + mcrr p15, 6, r2, r3, c2 + + /* Trap access to the CP10/CP11 [vfp/simd] */ + mrc p15, 4, r1, c1, c1, 2 + ldr r2, =(HCPTR_TTA | HCPTR_TCP(10) | HCPTR_TCP(11)) + orr r1, r1, r2 + mcr p15, 4, r1, c1, c1, 2 + + restore_guest_regs + + eret +hyp_exit_guest: + /* + * r0 - hypctx pointer + * r1 - exception code + * guest r0-r2 saved on stack when trapping in HYP mode + */ + + /* Save exit status registers */ + mrc p15, 4, r2, c5, c2, 0 @ Load HSR + str r2, [r0, #HYPCTX_EXIT_INFO_HSR] + mrc p15, 4, r2, c6, c0, 2 @ Load HIFAR + str r2, [r0, #HYPCTX_EXIT_INFO_HIFAR] + mrc p15, 4, r2, c6, c0, 0 @ Load HDFAR + str r2, [r0, #HYPCTX_EXIT_INFO_HDFAR] + mrc p15, 4, r2, c6, c0, 4 @ Load HPFAR + str r2, [r0, #HYPCTX_EXIT_INFO_HPFAR] + + save_guest_regs + + /* Disable trap access to the CP10/CP11 [vfp/simd] */ + mrc p15, 4, r2, c1, c1, 2 + ldr r3, =(HCPTR_TTA | HCPTR_TCP(10) | HCPTR_TCP(11)) + bic r2, r2, r3 + mcr p15, 4, r2, c1, c1, 2 + + /* Set VTTBR to 0 (VMID to 0) */ + mov r2, #0 + mov r3, #0 + mcrr p15, 6, r2, r3, c2 + + /* Set MIDR and MPIDR at hardware values */ + mrc p15, 0, r2, c0, c0, 0 + mcr p15, 4, r2, c0, c0, 0 + mrc p15, 0, r2, c0, c0, 5 + mcr p15, 4, r2, c0, c0, 5 + + /* Disable all traps - HCR */ + mov r2, #0 + mcr p15, 4, r2, c1, c1, 0 + + + /* Save guest CP15 registers */ + load_cp15_regs_batch1 + store_guest_cp15_regs_batch1 + load_cp15_regs_batch2 + store_guest_cp15_regs_batch2 + load_cp15_regs_batch3 + store_guest_cp15_regs_batch3 + + /* Load HOST CP15 registers in reverse order from the stack */ + pop {r2-r6} + store_cp15_regs_batch3 @ Load in r2-r6 CP15 regs + pop {r2-r12} + store_cp15_regs_batch2 @ Load in r2-r12 CP15 regs + pop {r2-r12} + store_cp15_regs_batch1 @ Load in r2-r12 CP15 regs + + save_vgic_regs + + restore_host_regs + + mov r0, r1 @ r0 must hold the return value + bx lr @ go back to the host ("Returned from function" comment) +END(hyp_enter_guest) + +/* + * void vmm_stub_install(void *hypervisor_stub_vect); + * - r0 - the pointer to the stub vector + */ +ENTRY(vmm_stub_install) + + /* Install hypervisor stub vectors. */ + mcr p15, 4, r0, c12, c0, 0 @ set HVBAR + + /* Disable all the traps in the hypervisor. */ + mov r0, #0 + mcr p15, 4, r0, c1, c1, 0 @ HCR + mcr p15, 4, r0, c1, c1, 2 @ HCPTR + mcr p15, 4, r0, c1, c1, 3 @ HSTR + mcr p15, 4, r0, c1, c0, 0 @ HSCTLR + + /* Don't disable access to perf-mon from PL0,1 */ + mrc p15, 4, r0, c1, c1, 1 @ HDCR + and r0, #0x1f @ Preserve HPMN + mcr p15, 4, r0, c1, c1, 1 @ HDCR + + eret +END(vmm_stub_install) + +ENTRY(vmm_set_get_hvbar) + /* + * If the first parameter is -1 than return the + * exception vector (HVBAR), otherwise set it to + * the value of it. + */ + cmp r0, #-1 + mrceq p15, 4, r0, c12, c0, 0 @ get HVBAR + mcrne p15, 4, r0, c12, c0, 0 @ set HVBAR + bx lr +END(vmm_set_get_hvbar) + + .align 5 +init_hyp_vector: + .word 0 /* Reset */ + .word 0 /* undev */ + .word 0 /* SVC */ + .word 0 /* PABT */ + .word 0 /* DABT */ + b hyp_init_hvc /* HYP-Mode */ + .word 0 /* FIQ */ + .word 0 /* IRQ */ + +hyp_init_hvc: + mcr p15, 4, r0, c12, c0, 0 @ set HVBAR to the new vector + mov sp, r1 @ set SP. r1 contains the stack pointer + mcrr p15, 4, r2, r3, c2 @ set the HTTBR (r2 is the low word, r3 is the high word) + isb + + @ Set HTCR.T0SZ=0 so x=5 (ARM man: B4.1.76) + @ Set HTCR.ORGN0/.IRGN0/.SH0 to 0 to disable cacheability and shareability + @ HTCR_MASK contains all the above bits + mrc p15, 4, r0, c2, c0, 2 @ HTCR + ldr r1,=HTCR_MASK + bic r0, r0, r1 + mcr p15, 4, r0, c2, c0, 2 @ HTCR + + @ VTCR for supporting only 32 bit IPA [see VMM_VTCR_T0SZ in hyp.h] + ldr r0, =(VTCR_RES | VTCR_SL_L1 | VMM_VTCR_T0SZ | VMM_VTCR_S) + mcr p15, 4, r0, c2, c1, 2 @ VTCR + + @ Set the HMAIR0/1 (same as MAIR0/1) registers for AttrIndx[2:0] + ldr r0, =HMAIR0 + mcr p15, 4, r0, c10, c2, 0 + ldr r0, =HMAIR1 + mcr p15, 4, r0, c10, c2, 1 + + @ Flush the TLB entries from Hyp-Mode + mcr p15, 4, r0, c8, c7, 0 @ TLBIALLH + dsb ish + + mrc p15, 4, r0, c1, c0, 0 @ Read current HSCTLR + ldr r2, =HSCTLR_MASK + bic r0, r0, r2 + + mrc p15, 0, r1, c1, c0, 0 @ Read the current SCTLR + ldr r2, =(HSCTLR_EE | HSCTLR_FI | HSCTLR_I | HSCTLR_C) + and r1, r1, r2 + ldr r2, =(HSCTLR_M | HSCTLR_A) + orr r1, r1, r2 + orr r0, r0, r1 + isb + mcr p15, 4, r0, c1, c0, 0 @ Set the new HSCTLR + eret + + .align 5 +hyp_vector: + b hyp_reset /* Reset */ + b hyp_undef /* undef */ + b hyp_svc /* SVC */ + b hyp_pabt /* PABT */ + b hyp_dabt /* DABT */ + b hyp_hvc /* HYP-Mode */ + b hyp_fiq /* FIQ */ + b hyp_irq /* IRQ */ + .align + +hyp_reset: + b loop + + .align +hyp_undef: + ldr r0, =und_die_str + mov r1, #EXCEPTION_UNDEF + bl handle_bad_exception +und_die_str: + .ascii "unexpected undefined exception in Hyp mode at: %#08x\n" + + .align +hyp_svc: + ldr r0, =svc_die_str + mov r1, #EXCEPTION_SVC + bl handle_bad_exception +svc_die_str: + .ascii "unexpected HVC/SVC trap in Hyp mode at: %#08x\n" + + .align +hyp_pabt: + ldr r0, =pabt_die_str + mov r1, #EXCEPTION_PABT + bl handle_bad_exception +pabt_die_str: + .ascii "unexpected prefetch abort in Hyp mode at: %#08x\n" + + .align +hyp_dabt: + ldr r0, =dabt_die_str + mov r1, #EXCEPTION_DABT + bl handle_bad_exception +dabt_die_str: + .ascii "unexpected data abort in Hyp mode at: %#08x\n" + + .align +hyp_hvc: + push {r0, r1, r2} @ Save registers in order to use them + mrc p15, 4, r1, c5, c2, 0 @ Check HSR for explicit HVC call + lsr r0, r1, #HSR_EC_SHIFT + cmp r0, #HSR_EC_HVC + bne guest_trap + + mrrc p15, 6, r0, r1, c2 @ Check VMID=0 to be sure that host called HVC + lsr r1, r1, #VTTBR_VMID_SHIFT + and r1, r1, #VTTBR_VMID_MASK + cmp r1, #0 + bne guest_trap + +host_called_hyp: + pop {r0, r1, r2} @ Restore registers + push {lr} + mrs lr, SPSR + push {lr} + + /* Build param list for the function pointer in r0 */ + mov lr, r0 + mov r0, r1 + mov r1, r2 + mov r2, r3 + blx lr + /* Returned from function */ + pop {lr} + msr SPSR_csxf, lr + pop {lr} + eret + +guest_trap: + /* Load hypctx in r0 from HTPIDR */ + mrc p15, 4, r0, c13, c0, 2 + mov r1, #EXCEPTION_HVC + + b hyp_exit_guest + + .align +hyp_fiq: + b loop + + .align +hyp_irq: + push {r0, r1, r2} @ Save registers in order to use them + /* Load hypctx pointer to r0 */ + mrc p15, 4, r0, c13, c0, 2 + mov r1, #EXCEPTION_IRQ + b hyp_exit_guest + .align + +ENTRY(handle_bad_exception) + /* We have in r0 pointer to the panic string and in r1 the exception code */ + mrrc p15, 6, r3, r2, c2 @ Read VTTBR + lsr r2, r2, #16 + ands r2, r2, #0xff + bne guest_bad_exception + + mrs r2, cpsr + bic r2, r2, #PSR_MODE + orr r2, r2, #PSR_SVC32_MODE + msr spsr_cxsf, r2 + mrs r1, ELR_hyp @ We don't need anymore the exception code, we store 2nd param for panic */ + ldr r3, =panic + msr ELR_hyp, r3 + eret + +guest_bad_exception: + push {r0, r1, r2} @ Emulate a push to the stack to respect hyp_exit_guest restore convention + /* Load hypctx pointer to r0 */ + mrc p15, 4, r0, c13, c0, 2 + b hyp_exit_guest +END(handle_bad_exception) + +loop: + b loop + +hyp_code_end: Index: sys/arm/vmm/hyp_genassym.c =================================================================== --- sys/arm/vmm/hyp_genassym.c +++ sys/arm/vmm/hyp_genassym.c @@ -0,0 +1,144 @@ +/* + * Copyright (C) 2015 Mihai Carabas + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include + +#include +#include +#include +#include + +#include +#include + +#include +#include "arm.h" + +ASSYM(HYPCTX_HYP, offsetof(struct hypctx, hyp)); +ASSYM(HYP_VTTBR, offsetof(struct hyp, vttbr)); + +ASSYM(HYPCTX_MIDR, offsetof(struct hypctx, midr)); +ASSYM(HYPCTX_MPIDR, offsetof(struct hypctx, mpidr)); +ASSYM(HYPCTX_HCR, offsetof(struct hypctx, hcr)); + +ASSYM(HYPCTX_SP_und, offsetof(struct hypctx, sp_und)); +ASSYM(HYPCTX_LR_und, offsetof(struct hypctx, lr_und)); +ASSYM(HYPCTX_SPSR_und, offsetof(struct hypctx, spsr_und)); +ASSYM(HYPCTX_SP_svc, offsetof(struct hypctx, sp_svc)); +ASSYM(HYPCTX_LR_svc, offsetof(struct hypctx, lr_svc)); +ASSYM(HYPCTX_SPSR_svc, offsetof(struct hypctx, spsr_svc)); +ASSYM(HYPCTX_SP_abt, offsetof(struct hypctx, sp_abt)); +ASSYM(HYPCTX_LR_abt, offsetof(struct hypctx, lr_abt)); +ASSYM(HYPCTX_SPSR_abt, offsetof(struct hypctx, spsr_abt)); +ASSYM(HYPCTX_SP_irq, offsetof(struct hypctx, sp_irq)); +ASSYM(HYPCTX_LR_irq, offsetof(struct hypctx, lr_irq)); +ASSYM(HYPCTX_SPSR_irq, offsetof(struct hypctx, spsr_irq)); +ASSYM(HYPCTX_SP_fiq, offsetof(struct hypctx, sp_fiq)); +ASSYM(HYPCTX_LR_fiq, offsetof(struct hypctx, lr_fiq)); +ASSYM(HYPCTX_SPSR_fiq, offsetof(struct hypctx, spsr_fiq)); +ASSYM(HYPCTX_r8_fiq, offsetof(struct hypctx, r8_fiq)); +ASSYM(HYPCTX_r9_fiq, offsetof(struct hypctx, r9_fiq)); +ASSYM(HYPCTX_r10_fiq, offsetof(struct hypctx, r10_fiq)); +ASSYM(HYPCTX_r11_fiq, offsetof(struct hypctx, r11_fiq)); +ASSYM(HYPCTX_r12_fiq, offsetof(struct hypctx, r12_fiq)); + +ASSYM(HYPCTX_REGS, offsetof(struct hypctx, regs)); +ASSYM(HYPCTX_REGS_LR, offsetof(struct hypctx, regs.r_lr)); +ASSYM(HYPCTX_REGS_SP, offsetof(struct hypctx, regs.r_sp)); +ASSYM(HYPCTX_REGS_PC, offsetof(struct hypctx, regs.r_pc)); +ASSYM(HYPCTX_REGS_CPSR, offsetof(struct hypctx, regs.r_cpsr)); + + +ASSYM(HYPCTX_CP15_SCTLR, offsetof(struct hypctx, cp15_sctlr)); +ASSYM(HYPCTX_CP15_CPACR, offsetof(struct hypctx, cp15_cpacr)); +ASSYM(HYPCTX_CP15_TTBCR, offsetof(struct hypctx, cp15_ttbcr)); +ASSYM(HYPCTX_CP15_DACR, offsetof(struct hypctx, cp15_dacr)); +ASSYM(HYPCTX_CP15_TTBR0, offsetof(struct hypctx, cp15_ttbr0)); +ASSYM(HYPCTX_CP15_TTBR1, offsetof(struct hypctx, cp15_ttbr1)); +ASSYM(HYPCTX_CP15_PRRR, offsetof(struct hypctx, cp15_prrr)); +ASSYM(HYPCTX_CP15_NMRR, offsetof(struct hypctx, cp15_nmrr)); +ASSYM(HYPCTX_CP15_CSSELR, offsetof(struct hypctx, cp15_csselr)); +ASSYM(HYPCTX_CP15_CID, offsetof(struct hypctx, cp15_cid)); +ASSYM(HYPCTX_CP15_TID_URW, offsetof(struct hypctx, cp15_tid_urw)); +ASSYM(HYPCTX_CP15_TID_URO, offsetof(struct hypctx, cp15_tid_uro)); +ASSYM(HYPCTX_CP15_TID_PRIV, offsetof(struct hypctx, cp15_tid_priv)); +ASSYM(HYPCTX_CP15_DFSR, offsetof(struct hypctx, cp15_dfsr)); +ASSYM(HYPCTX_CP15_IFSR, offsetof(struct hypctx, cp15_ifsr)); +ASSYM(HYPCTX_CP15_ADFSR, offsetof(struct hypctx, cp15_adfsr)); +ASSYM(HYPCTX_CP15_AIFSR, offsetof(struct hypctx, cp15_aifsr)); +ASSYM(HYPCTX_CP15_DFAR, offsetof(struct hypctx, cp15_dfar)); +ASSYM(HYPCTX_CP15_IFAR, offsetof(struct hypctx, cp15_ifar)); +ASSYM(HYPCTX_CP15_VBAR, offsetof(struct hypctx, cp15_vbar)); +ASSYM(HYPCTX_CP15_CNTKCTL, offsetof(struct hypctx, cp15_cntkctl)); +ASSYM(HYPCTX_CP15_PAR, offsetof(struct hypctx, cp15_par)); +ASSYM(HYPCTX_CP15_AMAIR0, offsetof(struct hypctx, cp15_amair0)); +ASSYM(HYPCTX_CP15_AMAIR1, offsetof(struct hypctx, cp15_amair1)); + +ASSYM(HYPCTX_EXIT_INFO_HSR, offsetof(struct hypctx, exit_info.hsr)); +ASSYM(HYPCTX_EXIT_INFO_HDFAR, offsetof(struct hypctx, exit_info.hdfar)); +ASSYM(HYPCTX_EXIT_INFO_HIFAR, offsetof(struct hypctx, exit_info.hifar)); +ASSYM(HYPCTX_EXIT_INFO_HPFAR, offsetof(struct hypctx, exit_info.hpfar)); + +ASSYM(HYPCTX_VGIC_INT_CTRL, offsetof(struct hypctx, vgic_cpu_int.virtual_int_ctrl)); +ASSYM(HYPCTX_VGIC_LR_NUM, offsetof(struct hypctx, vgic_cpu_int.lr_num)); +ASSYM(HYPCTX_VGIC_HCR, offsetof(struct hypctx, vgic_cpu_int.hcr)); +ASSYM(HYPCTX_VGIC_VMCR, offsetof(struct hypctx, vgic_cpu_int.vmcr)); +ASSYM(HYPCTX_VGIC_MISR, offsetof(struct hypctx, vgic_cpu_int.misr)); +ASSYM(HYPCTX_VGIC_EISR, offsetof(struct hypctx, vgic_cpu_int.eisr)); +ASSYM(HYPCTX_VGIC_ELSR, offsetof(struct hypctx, vgic_cpu_int.elsr)); +ASSYM(HYPCTX_VGIC_APR, offsetof(struct hypctx, vgic_cpu_int.apr)); +ASSYM(HYPCTX_VGIC_LR, offsetof(struct hypctx, vgic_cpu_int.lr)); + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Index: sys/arm/vmm/hyp_helpers.h =================================================================== --- sys/arm/vmm/hyp_helpers.h +++ sys/arm/vmm/hyp_helpers.h @@ -0,0 +1,347 @@ +/* + * Copyright (C) 2015 Mihai Carabas + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef _VMM_HYP_HELPERS_H_ +#define _VMM_HYP_HELPERS_H_ + +/* Banked registers */ +#define SAVE_GUEST_BANKED_REG(reg) \ + mrs r2, reg; \ + str r2, [r0, #HYPCTX_##reg] +#define SAVE_GUEST_BANKED_MODE(mode) \ + SAVE_GUEST_BANKED_REG(SP_##mode); \ + SAVE_GUEST_BANKED_REG(LR_##mode); \ + SAVE_GUEST_BANKED_REG(SPSR_##mode) + +#define RESTORE_GUEST_BANKED_REG(reg) \ + ldr r2, [r0, #HYPCTX_##reg]; \ + msr reg, r2 +#define RESTORE_GUEST_BANKED_MODE(mode) \ + RESTORE_GUEST_BANKED_REG(SP_##mode); \ + RESTORE_GUEST_BANKED_REG(LR_##mode); \ + RESTORE_GUEST_BANKED_REG(SPSR_##mode) + +#define save_guest_regs \ + /* r0 - address of the hypctx */ \ + add r2, r0, #HYPCTX_REGS_R(3); \ + stm r2, {r3-r12}; \ + pop {r3-r5}; /* Get r0-r2 from the stack */ \ + add r2, r0, #HYPCTX_REGS_R(0); \ + stm r2, {r3-r5}; \ + \ + str lr, [r0, #HYPCTX_REGS_LR]; \ + mrs r2, SP_usr; \ + str r2, [r0, #HYPCTX_REGS_SP]; \ + \ + mrs r2, ELR_hyp; \ + str r2, [r0, #HYPCTX_REGS_PC]; \ + mrs r2, spsr; \ + str r2, [r0, #HYPCTX_REGS_CPSR]; \ + \ + SAVE_GUEST_BANKED_MODE(svc); \ + SAVE_GUEST_BANKED_MODE(abt); \ + SAVE_GUEST_BANKED_MODE(und); \ + SAVE_GUEST_BANKED_MODE(irq); \ + SAVE_GUEST_BANKED_MODE(fiq); \ + SAVE_GUEST_BANKED_REG(r8_fiq); \ + SAVE_GUEST_BANKED_REG(r9_fiq); \ + SAVE_GUEST_BANKED_REG(r10_fiq); \ + SAVE_GUEST_BANKED_REG(r11_fiq); \ + SAVE_GUEST_BANKED_REG(r12_fiq) + +#define restore_guest_regs \ + /* r0 - address of the hypctx */ \ + RESTORE_GUEST_BANKED_MODE(svc); \ + RESTORE_GUEST_BANKED_MODE(abt); \ + RESTORE_GUEST_BANKED_MODE(und); \ + RESTORE_GUEST_BANKED_MODE(irq); \ + RESTORE_GUEST_BANKED_MODE(fiq); \ + RESTORE_GUEST_BANKED_REG(r8_fiq); \ + RESTORE_GUEST_BANKED_REG(r9_fiq); \ + RESTORE_GUEST_BANKED_REG(r10_fiq); \ + RESTORE_GUEST_BANKED_REG(r11_fiq); \ + RESTORE_GUEST_BANKED_REG(r12_fiq); \ + \ + ldr r2, [r0, #HYPCTX_REGS_PC]; \ + msr ELR_hyp, r2; \ + ldr r2, [r0, #HYPCTX_REGS_CPSR]; \ + msr SPSR_cxsf, r2; \ + \ + ldr lr, [r0, #HYPCTX_REGS_LR]; \ + ldr r2, [r0, #HYPCTX_REGS_SP]; \ + msr SP_usr, r2; \ + \ + add r2, r0, #HYPCTX_REGS_R(0); \ + ldm r2, {r0-r12} + + +#define SAVE_HOST_BANKED_REG(reg) \ + mrs r2, reg; \ + push {r2} +#define SAVE_HOST_BANKED_MODE(mode) \ + SAVE_HOST_BANKED_REG(SP_##mode); \ + SAVE_HOST_BANKED_REG(LR_##mode); \ + SAVE_HOST_BANKED_REG(SPSR_##mode) + +#define RESTORE_HOST_BANKED_REG(reg) \ + pop {r2}; \ + msr reg, r2 +#define RESTORE_HOST_BANKED_MODE(mode) \ + RESTORE_HOST_BANKED_REG(SPSR_##mode); \ + RESTORE_HOST_BANKED_REG(LR_##mode); \ + RESTORE_HOST_BANKED_REG(SP_##mode) + +#define save_host_regs \ + /* SPSR was saved when entered HYP mode */ \ + mrs r2, ELR_hyp; \ + push {r2}; \ + \ + push {r4-r12}; \ + mrs r2, SP_usr; \ + push {r2}; \ + push {lr}; \ + \ + SAVE_HOST_BANKED_MODE(svc); \ + SAVE_HOST_BANKED_MODE(abt); \ + SAVE_HOST_BANKED_MODE(und); \ + SAVE_HOST_BANKED_MODE(irq); \ + SAVE_HOST_BANKED_MODE(fiq); \ + SAVE_HOST_BANKED_REG(r8_fiq); \ + SAVE_HOST_BANKED_REG(r9_fiq); \ + SAVE_HOST_BANKED_REG(r10_fiq); \ + SAVE_HOST_BANKED_REG(r11_fiq); \ + SAVE_HOST_BANKED_REG(r12_fiq) + +#define restore_host_regs \ + RESTORE_HOST_BANKED_REG(r12_fiq); \ + RESTORE_HOST_BANKED_REG(r11_fiq); \ + RESTORE_HOST_BANKED_REG(r10_fiq); \ + RESTORE_HOST_BANKED_REG(r9_fiq); \ + RESTORE_HOST_BANKED_REG(r8_fiq); \ + RESTORE_HOST_BANKED_MODE(fiq); \ + RESTORE_HOST_BANKED_MODE(irq); \ + RESTORE_HOST_BANKED_MODE(und); \ + RESTORE_HOST_BANKED_MODE(abt); \ + RESTORE_HOST_BANKED_MODE(svc); \ + \ + pop {lr}; \ + pop {r2}; \ + msr SP_usr, r2; \ + pop {r4-r12}; \ + \ + pop {r2}; \ + msr ELR_hyp, r2 + +#define load_cp15_regs_batch1 \ + mrc p15, 0, r2, c1, c0, 0; /* SCTLR */ \ + mrc p15, 0, r3, c1, c0, 2; /* CPACR */ \ + mrc p15, 0, r4, c2, c0, 2; /* TTBCR */ \ + mrc p15, 0, r5, c3, c0, 0; /* DACR */ \ + mrrc p15, 0, r6, r7, c2; /* TTBR 0 */ \ + mrrc p15, 1, r8, r9, c2; /* TTBR 1 */ \ + mrc p15, 0, r10, c10, c2, 0; /* PRRR */ \ + mrc p15, 0, r11, c10, c2, 1; /* NMRR */ \ + mrc p15, 2, r12, c0, c0, 0 /* CSSELR */ + +#define load_cp15_regs_batch2 \ + mrc p15, 0, r2, c13, c0, 1; /* CID */ \ + mrc p15, 0, r3, c13, c0, 2; /* TID_URW */ \ + mrc p15, 0, r4, c13, c0, 3; /* TID_URO */ \ + mrc p15, 0, r5, c13, c0, 4; /* TID_PRIV */ \ + mrc p15, 0, r6, c5, c0, 0; /* DFSR */ \ + mrc p15, 0, r7, c5, c0, 1; /* IFSR */ \ + mrc p15, 0, r8, c5, c1, 0; /* ADFSR */ \ + mrc p15, 0, r9, c5, c1, 1; /* AIFSR */ \ + mrc p15, 0, r10, c6, c0, 0; /* DFAR */ \ + mrc p15, 0, r11, c6, c0, 2; /* IFAR */ \ + mrc p15, 0, r12, c12, c0, 0 /* VBAR */ + +#define load_cp15_regs_batch3 \ + mrc p15, 0, r2, c14, c1, 0; /* CNTKCTL */ \ + mrrc p15, 0, r4, r5, c7; /* PAR */ \ + mrc p15, 0, r3, c10, c3, 0; /* AMAIR0 */ \ + mrc p15, 0, r6, c10, c3, 1 /* AMAIR1 */ + +#define store_cp15_regs_batch1 \ + mcr p15, 0, r2, c1, c0, 0; /* SCTLR */ \ + mcr p15, 0, r3, c1, c0, 2; /* CPACR */ \ + mcr p15, 0, r4, c2, c0, 2; /* TTBCR */ \ + mcr p15, 0, r5, c3, c0, 0; /* DACR */ \ + mcrr p15, 0, r6, r7, c2; /* TTBR 0 */ \ + mcrr p15, 1, r8, r9, c2; /* TTBR 1 */ \ + mcr p15, 0, r10, c10, c2, 0; /* PRRR */ \ + mcr p15, 0, r11, c10, c2, 1; /* NMRR */ \ + mcr p15, 2, r12, c0, c0, 0 /* CSSELR */ + +#define store_cp15_regs_batch2 \ + mcr p15, 0, r2, c13, c0, 1; /* CID */ \ + mcr p15, 0, r3, c13, c0, 2; /* TID_URW */ \ + mcr p15, 0, r4, c13, c0, 3; /* TID_URO */ \ + mcr p15, 0, r5, c13, c0, 4; /* TID_PRIV */ \ + mcr p15, 0, r6, c5, c0, 0; /* DFSR */ \ + mcr p15, 0, r7, c5, c0, 1; /* IFSR */ \ + mcr p15, 0, r8, c5, c1, 0; /* ADFSR */ \ + mcr p15, 0, r9, c5, c1, 1; /* AIFSR */ \ + mcr p15, 0, r10, c6, c0, 0; /* DFAR */ \ + mcr p15, 0, r11, c6, c0, 2; /* IFAR */ \ + mcr p15, 0, r12, c12, c0, 0 /* VBAR */ + +#define store_cp15_regs_batch3 \ + mcr p15, 0, r2, c14, c1, 0; /* CNTKCTL */ \ + mcrr p15, 0, r4, r5, c7; /* PAR */ \ + mcr p15, 0, r3, c10, c3, 0; /* AMAIR0 */ \ + mcr p15, 0, r6, c10, c3, 1 /* AMAIR1 */ + +#define store_guest_cp15_regs_batch1 \ + str r2, [r0, #HYPCTX_CP15_SCTLR]; \ + str r3, [r0, #HYPCTX_CP15_CPACR]; \ + str r4, [r0, #HYPCTX_CP15_TTBCR]; \ + str r5, [r0, #HYPCTX_CP15_DACR]; \ + add r2, r0, #HYPCTX_CP15_TTBR0; \ + strd r6, r7, [r2]; \ + add r2, r0, #HYPCTX_CP15_TTBR1; \ + strd r8, r9, [r2]; \ + str r10, [r0, #HYPCTX_CP15_PRRR]; \ + str r11, [r0, #HYPCTX_CP15_NMRR]; \ + str r12, [r0, #HYPCTX_CP15_CSSELR] + +#define store_guest_cp15_regs_batch2 \ + str r2, [r0, #HYPCTX_CP15_CID]; \ + str r3, [r0, #HYPCTX_CP15_TID_URW]; \ + str r4, [r0, #HYPCTX_CP15_TID_URO]; \ + str r5, [r0, #HYPCTX_CP15_TID_PRIV]; \ + str r6, [r0, #HYPCTX_CP15_DFSR]; \ + str r7, [r0, #HYPCTX_CP15_IFSR]; \ + str r8, [r0, #HYPCTX_CP15_ADFSR]; \ + str r9, [r0, #HYPCTX_CP15_AIFSR]; \ + str r10, [r0, #HYPCTX_CP15_DFAR]; \ + str r11, [r0, #HYPCTX_CP15_IFAR]; \ + str r12, [r0, #HYPCTX_CP15_VBAR] + +#define store_guest_cp15_regs_batch3 \ + str r2, [r0, #HYPCTX_CP15_CNTKCTL]; \ + add r2, r0, #HYPCTX_CP15_PAR; \ + strd r4, r5, [r2]; \ + str r3, [r0, #HYPCTX_CP15_AMAIR0]; \ + str r6, [r0, #HYPCTX_CP15_AMAIR1] + +#define load_guest_cp15_regs_batch1 \ + ldr r2, [r0, #HYPCTX_CP15_SCTLR]; \ + ldr r3, [r0, #HYPCTX_CP15_CPACR]; \ + ldr r4, [r0, #HYPCTX_CP15_TTBCR]; \ + ldr r5, [r0, #HYPCTX_CP15_DACR]; \ + add r10, r0, #HYPCTX_CP15_TTBR0; \ + ldrd r6, r7, [r10]; \ + add r10, r0, #HYPCTX_CP15_TTBR1; \ + ldrd r8, r9, [r10]; \ + ldr r10, [r0, #HYPCTX_CP15_PRRR]; \ + ldr r11, [r0, #HYPCTX_CP15_NMRR]; \ + ldr r12, [r0, #HYPCTX_CP15_CSSELR] + +#define load_guest_cp15_regs_batch2 \ + ldr r2, [r0, #HYPCTX_CP15_CID]; \ + ldr r3, [r0, #HYPCTX_CP15_TID_URW]; \ + ldr r4, [r0, #HYPCTX_CP15_TID_URO]; \ + ldr r5, [r0, #HYPCTX_CP15_TID_PRIV]; \ + ldr r6, [r0, #HYPCTX_CP15_DFSR]; \ + ldr r7, [r0, #HYPCTX_CP15_IFSR]; \ + ldr r8, [r0, #HYPCTX_CP15_ADFSR]; \ + ldr r9, [r0, #HYPCTX_CP15_AIFSR]; \ + ldr r10, [r0, #HYPCTX_CP15_DFAR]; \ + ldr r11, [r0, #HYPCTX_CP15_IFAR]; \ + ldr r12, [r0, #HYPCTX_CP15_VBAR] + +#define load_guest_cp15_regs_batch3 \ + ldr r2, [r0, #HYPCTX_CP15_CNTKCTL]; \ + add r3, r0, #HYPCTX_CP15_PAR; \ + ldrd r4, r5, [r3]; \ + ldr r3, [r0, #HYPCTX_CP15_AMAIR0]; \ + ldr r6, [r0, #HYPCTX_CP15_AMAIR1] + + +#define save_vgic_regs \ + ldr r2, [r0, #HYPCTX_VGIC_INT_CTRL]; \ + cmp r2, #0; \ + beq 1f; \ + \ + ldr r3, [r2, #GICH_HCR]; \ + str r3, [r0, #HYPCTX_VGIC_HCR]; \ + \ + mov r3, #0; \ + str r3, [r2, #GICH_HCR]; \ + \ + ldr r3, [r2, #GICH_VMCR]; \ + str r3, [r0, #HYPCTX_VGIC_VMCR]; \ + \ + ldr r3, [r2, #GICH_MISR]; \ + str r3, [r0, #HYPCTX_VGIC_MISR]; \ + \ + ldr r3, [r2, #GICH_EISR0]; \ + ldr r4, [r2, #GICH_EISR1]; \ + str r3, [r2, #HYPCTX_VGIC_EISR]; \ + str r4, [r2, #(HYPCTX_VGIC_EISR + 4)]; \ + \ + ldr r3, [r2, #GICH_ELSR0]; \ + ldr r4, [r2, #GICH_ELSR1]; \ + str r3, [r0, #HYPCTX_VGIC_ELSR]; \ + str r4, [r0, #(HYPCTX_VGIC_ELSR + 4)]; \ + \ + ldr r3, [r2, #GICH_APR]; \ + str r3, [r0, #HYPCTX_VGIC_APR]; \ + \ + ldr r3, [r0, #HYPCTX_VGIC_LR_NUM]; \ + add r4, r2, #GICH_LR0; \ + add r5, r0, #HYPCTX_VGIC_LR; \ +2: ldr r6, [r4], #4; \ + str r6, [r5], #4; \ + subs r3, r3, #1; \ + bne 2b; \ +1: + +#define restore_vgic_regs \ + ldr r2, [r0, #HYPCTX_VGIC_INT_CTRL]; \ + cmp r2, #0; \ + beq 3f; \ + \ + ldr r3, [r0, #HYPCTX_VGIC_HCR]; \ + str r3, [r2, #GICH_HCR]; \ + \ + ldr r3, [r0, #HYPCTX_VGIC_VMCR]; \ + str r3, [r2, #GICH_VMCR]; \ + \ + str r3, [r0, #HYPCTX_VGIC_APR]; \ + ldr r3, [r2, #GICH_APR]; \ + \ + ldr r3, [r0, #HYPCTX_VGIC_LR_NUM]; \ + add r4, r2, #GICH_LR0; \ + add r5, r0, #HYPCTX_VGIC_LR; \ +4: ldr r6, [r5], #4; \ + str r6, [r4], #4; \ + subs r3, r3, #1; \ + bne 4b; \ +3: + +#endif Index: sys/arm/vmm/mmu.h =================================================================== --- sys/arm/vmm/mmu.h +++ sys/arm/vmm/mmu.h @@ -0,0 +1,94 @@ +/* + * Copyright (C) 2015 Mihai Carabas + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef _VMM_MMU_H_ +#define _VMM_MMU_H_ + +typedef uint64_t lpae_pd_entry_t; /* LPAE page directory entry */ +typedef uint64_t lpae_pt_entry_t; /* LPAE page table entry */ + +typedef uint64_t lpae_vm_paddr_t; /* LPAE VM paddr */ +typedef uint64_t lpae_vm_vaddr_t; /* LPAE VM vaddr */ + +int lpae_vmmmap_set(void *arg, + uint64_t virt_start, + uint64_t phys_start, + size_t len, + int prot); +uint64_t lpae_vmmmap_get(void *arg, + uint64_t ipa); +void lpae_vmcleanup(void *arg); + +/* Debug only */ +void dump_lpae_mapping(void *arg); + +#define LPAE_NLEVELS 3 + +#define LPAE_L1_TABLE_SIZE 0x1000 /* 4K */ +#define LPAE_L1_ENTRIES (LPAE_L1_TABLE_SIZE / 8) /* 512 */ + +#define LPAE_L2_TABLE_SIZE 0x1000 /* 4K */ +#define LPAE_L2_ENTRIES (LPAE_L2_TABLE_SIZE / 8) /* 512 */ + +#define LPAE_L3_TABLE_SIZE 0x1000 /* 4K */ +#define LPAE_L3_ENTRIES (LPAE_L3_TABLE_SIZE / 8) /* 512 */ + +#define LPAE_L1_SHIFT 30 +#define LPAE_L1_SIZE (1 << 30) +#define LPAE_L1_INDEX_MASK 0x3 +#define LPAE_L1_T_ADDR_MASK ((uint64_t)0xFFFFFFF000) /* phys address of L2 Table */ +#define LPAE_L1_B_ADDR_MASK ((uint64_t)0xFFC0000000) /* phys address of Phys Block */ + +#define LPAE_L2_SHIFT 21 +#define LPAE_L2_SIZE (1 << 21) +#define LPAE_L2_INDEX_MASK 0x1FF +#define LPAE_L2_T_ADDR_MASK ((uint64_t)0xFFFFFFF000)/* phys address of L3 Table */ +#define LPAE_L2_B_ADDR_MASK ((uint64_t)0xFFFFE00000)/* phys address of Phys Block */ + +#define LPAE_L3_SHIFT 12 +#define LPAE_L3_SIZE (1 << 12) +#define LPAE_L3_INDEX_MASK 0x1FF +#define LPAE_L3_B_ADDR_MASK ((uint64_t)0xFFFFFFF000)/* phys address of Phys Block */ + +#define LPAE_TYPE_LINK 0x03 +#define LPAE_L12_TYPE_BLOCK 0x01 +#define LPAE_L3_TYPE_BLOCK 0x03 +#define LPAE_TYPE_MASK 0x03 /* mask of type bits */ + +#define LPAE_AP_HYP_RW (0x01 << 6) /* RW permissions for PL-2 stage 1*/ +#define LPAE_AP_HYP_RDONLY (0x03 << 6) /* RD permissions for PL-2 stage 1 */ + +#define LPAE_HAP_READ (0x01 << 6) /* read permissions for stage 2 */ +#define LPAE_HAP_WRITE (0x02 << 6) /* write permissions for stage 2*/ + +#define LPAE_AF (0x1 << 10) /* Access Flag */ + +/* Table B3-24 Long-descriptor format FSR encodings */ +#define LPAE_TRANSLATION_FAULT(x) ((0b000111) & x) +#define LPAE_ACCESS_FLAG_FAULT(x) ((0b001011) & x) +#define LPAE_PERMISSION_FAULT(x) ((0b001111) & x) +#define LPAE_FAULT_LEVEL(x) (0x3 & x) +#endif Index: sys/arm/vmm/mmu.c =================================================================== --- sys/arm/vmm/mmu.c +++ sys/arm/vmm/mmu.c @@ -0,0 +1,302 @@ +/* + * Copyright (C) 2015 Mihai Carabas + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include + +#include +#include +#include +#include +#include + +#include +#include + +#include +#include +#include +#include + +#include +#include "mmu.h" +#include "arm.h" + +MALLOC_DECLARE(M_HYP); +extern lpae_pd_entry_t *hyp_l1pd; +/* + * create_lpae_mapping + * - l1pd - the level 1 address of the PD (NULL for the HYP mode PD) + * - virt_start - a 32 bit virtual address to be mapped + * - phys_start - a 64 bit physical address to map to + * - len - the desired length mapping, but it will be truncated to the virt_start + * alignment + * - prot - the FreeBSD mapping permissions + * - returns the actual length of the mapping + * + * An l1pd or l2pd will have a size of 8K (2 * LPAE_Lx_ENTRIES * sizeof(lpae_pd_entry_t)). + * The first 4K will include the bits for the MMU (physical addresses and bit permissions) + * and the second 4K will be a mirror of the first one but will include the virtual + * addresses of allocated page tables needed for walking and clean-up. + * + */ +static int create_lpae_mapping(lpae_pd_entry_t *l1pd, + lpae_vm_vaddr_t virt_start, + lpae_vm_paddr_t phys_start, + size_t len, + vm_prot_t prot) +{ + lpae_pd_entry_t *l2pd, *l3pd, *l1pd_shadow, *l2pd_shadow, *pd; + int l1_index, l2_index, l3_index; + int mapped_size = 0; + bool is_hyp_pd = false; + + if (l1pd == NULL) { + l1pd = &hyp_l1pd[0]; + is_hyp_pd = true; + } + + l1_index = (virt_start >> LPAE_L1_SHIFT) & LPAE_L1_INDEX_MASK; + l2_index = (virt_start >> LPAE_L2_SHIFT) & LPAE_L2_INDEX_MASK; + l3_index = (virt_start >> LPAE_L3_SHIFT) & LPAE_L3_INDEX_MASK; + + if ((virt_start & LPAE_L1_B_ADDR_MASK) == virt_start) { + if (len >= LPAE_L1_SIZE) { + mapped_size = LPAE_L1_SIZE; + } + } + if(!mapped_size && (virt_start & LPAE_L2_B_ADDR_MASK) == virt_start) { + if (len >= LPAE_L2_SIZE) { + mapped_size = LPAE_L2_SIZE; + } + } + if(!mapped_size) { + mapped_size = LPAE_L3_SIZE; + } + + if (mapped_size == LPAE_L1_SIZE) { + pd = &l1pd[l1_index]; + /* See if this PD is a link and fallback to the next level */ + if ((*pd & LPAE_TYPE_LINK) == LPAE_TYPE_LINK) + mapped_size = LPAE_L2_SIZE; + else + goto set_prot; + } + + l1pd_shadow = &l1pd[LPAE_L1_ENTRIES]; + + if (l1pd[l1_index] == 0) { + l2pd = malloc(2 * PAGE_SIZE, M_HYP, M_WAITOK | M_ZERO); + l2pd_shadow = &l2pd[LPAE_L2_ENTRIES]; + + l1pd[l1_index] = (lpae_pd_entry_t) vtophys(l2pd); + l1pd[l1_index] |= LPAE_TYPE_LINK; + + l1pd_shadow[l1_index] = (lpae_pd_entry_t) l2pd; + + } else { + l2pd = (lpae_pd_entry_t *) (l1pd_shadow[l1_index]); + l2pd_shadow = &l2pd[LPAE_L2_ENTRIES]; + } + + if (mapped_size == LPAE_L2_SIZE) { + pd = &l2pd[l2_index]; + /* See if this PD is a link and fallback to the next level */ + if ((*pd & LPAE_TYPE_LINK) == LPAE_TYPE_LINK) + mapped_size = LPAE_L3_SIZE; + else + goto set_prot; + } + + if (l2pd[l2_index] == 0) { + l3pd = malloc(PAGE_SIZE, M_HYP, M_WAITOK | M_ZERO); + l2pd[l2_index] = vtophys(l3pd); + l2pd[l2_index] |= LPAE_TYPE_LINK; + + l2pd_shadow[l2_index] = (lpae_pd_entry_t) l3pd; + } else { + l3pd = (lpae_pd_entry_t *) (l2pd_shadow[l2_index]); + } + + pd = &l3pd[l3_index]; + +set_prot: + if (prot != VM_PROT_NONE) { + *pd = phys_start; + *pd |= LPAE_AF; + if (mapped_size == LPAE_L3_SIZE) + *pd |= LPAE_L3_TYPE_BLOCK; + else + *pd |= LPAE_L12_TYPE_BLOCK; + + if (is_hyp_pd) { /* PL-2 stage-1 table */ + if (prot & (VM_PROT_READ | VM_PROT_WRITE)) + *pd |= LPAE_AP_HYP_RW; + else /* Map read-only*/ + *pd |= LPAE_AP_HYP_RDONLY; + } else { /* VM stage-2 page table */ + if (prot & VM_PROT_READ) + *pd |= LPAE_HAP_READ; + if (prot & VM_PROT_WRITE) + *pd |= LPAE_HAP_WRITE; + } + } else { + *pd = 0; + } + + return mapped_size; +} + +void dump_lpae_mapping(void *arg) +{ + int i, j, k; + struct hyp *vm_hyp; + lpae_pd_entry_t *l1pd, *l1pd_shadow, *l2pd, *l2pd_shadow, *l3pd; + + vm_hyp = arg; + + if (arg) + l1pd = &vm_hyp->l1pd[0]; + else + l1pd = &hyp_l1pd[0]; + + l1pd_shadow = &l1pd[LPAE_L1_ENTRIES]; + + printf("l1pd = %x\n", vtophys(l1pd)); + + for (i = 0; i < LPAE_L1_ENTRIES; i++) { + if(l1pd_shadow[i]) { + printf("\t %d: l2pd = %llx\n", i, l1pd[i]); + l2pd = (lpae_pd_entry_t *) l1pd_shadow[i]; + l2pd_shadow = &l2pd[LPAE_L2_ENTRIES]; + for (j = 0; j < LPAE_L2_ENTRIES; j++) { + if (l2pd_shadow[j]) { + printf("\t\t %d: l3pd = %llx\n", j, l2pd[j]); + l3pd = (lpae_pd_entry_t *) l2pd_shadow[j]; + for (k = 0; k < LPAE_L3_ENTRIES; k++) { + if (l3pd[k]) + printf("\t\t\t %d: l3_entry = %llx\n", k, l3pd[k]); + } + } + } + } + } +} + +int lpae_vmmmap_set(void *arg, + uint64_t virt_start, + uint64_t phys_start, + size_t len, + int prot) +{ + size_t n; + struct hyp *vm_hyp; + lpae_pd_entry_t *l1pd = NULL; + vm_hyp = arg; + if (arg) + l1pd = &vm_hyp->l1pd[0]; + + while (1) { + n = create_lpae_mapping(l1pd, virt_start, phys_start, len, prot); + + if (len <= n) + break; + len -= n; + virt_start += n; + phys_start += n; + printf("%s n: %d %d\n", __func__, n, len); + } + return (0); +} + +uint64_t lpae_vmmmap_get(void *arg, uint64_t ipa) +{ + struct hyp *vm_hyp; + int l1_index, l2_index, l3_index; + lpae_pd_entry_t *l1pd, *l1pd_shadow, *l2pd, *l2pd_shadow, *l3pd; + + vm_hyp = arg; + + if (arg) + l1pd = &vm_hyp->l1pd[0]; + else + l1pd = &hyp_l1pd[0]; + + l1pd_shadow = &l1pd[LPAE_L1_ENTRIES]; + + /* Check if there is a connnection to a 2nd level PT */ + l1_index = (ipa >> LPAE_L1_SHIFT) & LPAE_L1_INDEX_MASK; + if ((l1pd[l1_index] & LPAE_TYPE_LINK) == LPAE_TYPE_LINK) { + + /* Grab the virtual address of the 2nd leel PT */ + l2pd = (lpae_pd_entry_t *) (l1pd_shadow[l1_index]); + l2pd_shadow = &l2pd[LPAE_L2_ENTRIES]; + + /* Check if there is a connect to a 3nd level PT */ + l2_index = (ipa >> LPAE_L2_SHIFT) & LPAE_L2_INDEX_MASK; + if ((l2pd[l2_index] & LPAE_TYPE_LINK) == LPAE_TYPE_LINK) { + + l3pd = (lpae_pd_entry_t *) (l2pd_shadow[l2_index]); + + l3_index = (ipa >> LPAE_L3_SHIFT) & LPAE_L3_INDEX_MASK; + return (l3pd[l3_index] & LPAE_L3_B_ADDR_MASK); + } else { + return (l2pd[l2_index] & LPAE_L2_B_ADDR_MASK); + } + } else { + return (l1pd[l1_index] & LPAE_L1_B_ADDR_MASK); + } + + return ((uint64_t)-1); +} + +void lpae_vmcleanup(void *arg) +{ + int i, j; + struct hyp *vm_hyp; + lpae_pd_entry_t *l1pd, *l1pd_shadow, *l2pd, *l2pd_shadow; + + vm_hyp = arg; + + if (arg) + l1pd = &vm_hyp->l1pd[0]; + else + l1pd = &hyp_l1pd[0]; + + l1pd_shadow = &l1pd[LPAE_L1_ENTRIES]; + + for (i = 0; i < LPAE_L1_ENTRIES; i++) { + if(l1pd_shadow[i]) { + l2pd = (lpae_pd_entry_t *) l1pd_shadow[i]; + l2pd_shadow = &l2pd[LPAE_L2_ENTRIES]; + for (j = 0; j < LPAE_L2_ENTRIES; j++) { + if (l2pd_shadow[j]) { + free((void *) l2pd_shadow[j], M_HYP); + } + } + free((void *) l1pd_shadow[i], M_HYP); + } + } +} Index: sys/arm/vmm/vgic.h =================================================================== --- sys/arm/vmm/vgic.h +++ sys/arm/vmm/vgic.h @@ -0,0 +1,116 @@ +/* + * Copyright (C) 2015 Mihai Carabas + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef _VMM_VGIC_H_ +#define _VMM_VGIC_H_ + +#define VGIC_NR_IRQ 128 +#define VGIC_NR_SGI 16 +#define VGIC_NR_PPI 16 +#define VGIC_NR_PRV_IRQ (VGIC_NR_SGI + VGIC_NR_PPI) +#define VGIC_NR_SHR_IRQ (VGIC_NR_IRQ - VGIC_NR_PRV_IRQ) +#define VGIC_MAXCPU VM_MAXCPU +#define VGIC_LR_NUM 64 + +#define LR_EMPTY 0xff + +struct vm; +struct vm_exit; + +struct vgic_distributor { + struct mtx distributor_lock; + + uint64_t distributor_base; + uint64_t cpu_int_base; + + uint32_t enabled; + + /* Bitmaps for IRQ state in the distributor*/ + + /* Interrupt enabled */ + uint32_t irq_enabled_prv[VGIC_MAXCPU][VGIC_NR_PRV_IRQ / (sizeof(uint32_t) * 8)]; + uint32_t irq_enabled_shr[VGIC_NR_SHR_IRQ / (sizeof(uint32_t) * 8)]; + + /* Interrupt level */ + uint32_t irq_state_prv[VGIC_MAXCPU][VGIC_NR_PRV_IRQ / (sizeof(uint32_t) * 8)]; + uint32_t irq_state_shr[VGIC_NR_SHR_IRQ / (sizeof(uint32_t) * 8)]; + + /* Level interrupts in progress */ + uint32_t irq_active_prv[VGIC_MAXCPU][VGIC_NR_PRV_IRQ / (sizeof(uint32_t) * 8)]; + uint32_t irq_active_shr[VGIC_NR_SHR_IRQ / (sizeof(uint32_t) * 8)]; + + /* Configure type of IRQ: level or edge triggered */ + uint32_t irq_conf_prv[VGIC_MAXCPU][VGIC_NR_PRV_IRQ / (sizeof(uint32_t) * 8)]; + uint32_t irq_conf_shr[VGIC_NR_SHR_IRQ / (sizeof(uint32_t) * 8)]; + + /* Interrupt targets */ + uint32_t irq_target_shr[VGIC_NR_SHR_IRQ / sizeof(uint32_t)]; + + uint8_t irq_sgi_source[VGIC_MAXCPU][VGIC_NR_SGI]; + + uint32_t sgir; + + uint32_t irq_pending_on_cpu; +}; + +struct vgic_cpu_int { + /* Bitmaps for pending IRQs */ + uint32_t pending_prv[VGIC_NR_PRV_IRQ / (sizeof(uint32_t) * 8)]; + uint32_t pending_shr[VGIC_NR_SHR_IRQ / (sizeof(uint32_t) * 8)]; + + uint64_t virtual_int_ctrl; + uint32_t lr_num; + uint32_t hcr; + uint32_t vmcr; + uint32_t misr; + uint64_t eisr; + uint64_t elsr; + uint32_t apr; + uint32_t lr[VGIC_LR_NUM]; + uint8_t lr_used[VGIC_LR_NUM]; + uint8_t irq_to_lr[VGIC_NR_IRQ]; +}; + +int vgic_hyp_init(void); + +int vgic_emulate_distributor(void *arg, int vcpuid, + struct vm_exit *vme, bool *retu); + +int vgic_attach(void *arg, uint64_t distributor_paddr, + uint64_t cpu_int_paddr); + +void vgic_sync_hwstate(void *arg); + +void vgic_flush_hwstate(void *arg); + +int vgic_vcpu_pending_irq(void *arg); + +#define gic_h_read_4(_sc, _reg) \ + bus_space_read_4((_sc)->gic_h_bst, (_sc)->gic_h_bsh, (_reg)) +#define gic_h_write_4(_sc, _reg, _val) \ + bus_space_write_4((_sc)->gic_h_bst, (_sc)->gic_h_bsh, (_reg), (_val)) + +#endif Index: sys/arm/vmm/vgic.c =================================================================== --- sys/arm/vmm/vgic.c +++ sys/arm/vmm/vgic.c @@ -0,0 +1,955 @@ +/* + * Copyright (C) 2017 Nicolae-Alexandru Ivan + * Copyright (C) 2015 Mihai Carabas + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "hyp.h" +#include "mmu.h" +#include "vgic.h" +#include "arm.h" + +static struct arm_gic_softc *gic_sc; +static uint64_t virtual_int_ctrl_vaddr; +static uint64_t virtual_int_ctrl_paddr; +static uint32_t virtual_int_ctrl_size; + +static uint64_t virtual_cpu_int_paddr; +static uint32_t virtual_cpu_int_size; + +static uint32_t lr_num; + +static struct resource_spec arm_vgic_spec[] = { + { SYS_RES_MEMORY, 2, RF_ACTIVE }, /* Virtual Interface Control */ + { SYS_RES_MEMORY, 3, RF_ACTIVE }, /* Virtual CPU interface */ + { SYS_RES_IRQ, 0, RF_ACTIVE }, /* vGIC maintenance interrupt */ + { -1, 0 } +}; +#define VIRTUAL_INTERFACE_CONTROL 0 +#define VIRTUAL_CPU_INTERFACE 1 +#define MAINTENANCE_INTR 2 +static struct resource *arm_vgic_res[3]; +static void *arm_vgic_maintenance_intr_ihl[1]; + +static void vgic_update_state(struct hyp *hyp); +static void vgic_retire_disabled_irqs(struct hypctx *hypctx); +static void vgic_dispatch_sgi(struct hypctx *hypctx); + +static uint32_t vgic_dist_conf_expand(uint16_t val) +{ + uint32_t res; + int i; + + res = 0; + + for (i = 0; i < 16; ++i) { + res |= (val & 1) << (2 * i + 1); + val = val >> 1; + } + + return res; +} + +static uint16_t vgic_dist_conf_compress(uint32_t val) +{ + uint32_t res; + int i; + + res = 0; + + for (i = 0; i < 16; ++i) { + val = val >> 1; + res |= (val & 1) << i; + val = val >> 1; + } + + return res; +} + +static int +vgic_dist_mmio_read(void *vm, int vcpuid, uint64_t gpa, uint64_t *rval, int size, + void *arg) +{ + uint64_t offset; + uint64_t base_offset; + uint64_t byte_offset; + uint64_t mask; + struct hyp *hyp; + struct vgic_distributor *dist; + + hyp = vm_get_cookie(vm); + dist = &hyp->vgic_distributor; + + /* offset of distributor register */ + offset = gpa - dist->distributor_base; + base_offset = offset - (offset & 3); + byte_offset = (offset - base_offset) * 8; + mask = (1 << size * 8) - 1; + + if (base_offset >= GICD_CTLR && base_offset < GICD_TYPER) { + + *rval = (dist->enabled >> byte_offset) & mask; + + } else if (base_offset >= GICD_TYPER && base_offset < GICD_IIDR) { + + *rval = (((VGIC_MAXCPU - 1) << 5) | ((VGIC_NR_IRQ / 32) - 1) >> byte_offset) & mask; + + } else if (base_offset >= GICD_IIDR && base_offset < GICD_IGROUPR(0)) { + + *rval = (0x0000043B >> byte_offset) & mask; + + } else if (base_offset >= GICD_IGROUPR(0) && base_offset < GICD_ISENABLER(0)) { + + /* irq group control is RAZ */ + *rval = 0; + + } else if (base_offset >= GICD_ISENABLER(0) && base_offset < GICD_ISENABLER(1)) { + + /* private set-enable irq */ + *rval = (dist->irq_enabled_prv[vcpuid][0] >> byte_offset) & mask; + + } else if (base_offset >= GICD_ISENABLER(1) && base_offset < GICD_ICENABLER(0)) { + + /* shared set-enable irq */ + *rval = (dist->irq_enabled_shr[(base_offset - GICD_ISENABLER(1)) / sizeof(uint32_t)] >> byte_offset) & mask; + + } else if (base_offset >= GICD_ICENABLER(0) && base_offset < GICD_ICENABLER(1)) { + + /* private clear-enable irq */ + *rval = (dist->irq_enabled_prv[vcpuid][0] >> byte_offset) & mask; + + } else if (offset >= GICD_ICENABLER(1) && offset < GICD_ISPENDR(0)) { + + /* shared clear-enable irq */ + *rval = (dist->irq_enabled_shr[(base_offset - GICD_ICENABLER(1)) / sizeof(uint32_t)] >> byte_offset) & mask; + + } else if (base_offset >= GICD_ISPENDR(0) && base_offset < GICD_ISPENDR(1)) { + + /* private set-pending irq */ + *rval = (dist->irq_state_prv[vcpuid][0] >> byte_offset) & mask; + + } else if (base_offset >= GICD_ISPENDR(1) && base_offset < GICD_ICPENDR(0)) { + + /* shared set-pending irq */ + *rval = (dist->irq_state_shr[(base_offset - GICD_ISPENDR(1)) / sizeof(uint32_t)] >> byte_offset) & mask; + + } else if (base_offset >= GICD_ICPENDR(0) && base_offset < GICD_ICPENDR(1)) { + + /* private clear-pending irq */ + *rval = (dist->irq_state_prv[vcpuid][0] >> byte_offset) & mask; + + } else if (base_offset >= GICD_ICPENDR(1) && base_offset < GICD_ICACTIVER(0)) { + + /* shared clear-pending irq */ + *rval = (dist->irq_state_shr[(base_offset - GICD_ICPENDR(1)) / sizeof(uint32_t)] >> byte_offset) & mask; + + } else if (base_offset >= GICD_ISACTIVER(0) && base_offset < GICD_IPRIORITYR(0)) { + + /* active irq is RAZ */ + *rval = 0; + + } else if (base_offset >= GICD_ITARGETSR(0) && base_offset < GICD_ITARGETSR(8)) { + + /* target for banked interrupts is read-only and returns the processor reading this register */ + *rval = (1 << vcpuid); + *rval |= *rval << 8; + *rval |= *rval << 16; + *rval = (*rval >> byte_offset) & mask; + + } else if (base_offset >= GICD_ITARGETSR(8) && base_offset < GICD_ICFGR(0)) { + + /* target for shared irqs */ + *rval = (dist->irq_target_shr[(base_offset - GICD_ITARGETSR(8)) / sizeof(uint32_t)] >> byte_offset) & mask; + + } else if (base_offset >= GICD_ICFGR(0) && base_offset < GICD_ICFGR(1)) { + + /* private configure irq */ + if (offset & 2) { + *rval = (vgic_dist_conf_expand(dist->irq_conf_prv[vcpuid][0] >> 16) >> byte_offset) & mask; + } else { + *rval = (vgic_dist_conf_expand(dist->irq_conf_prv[vcpuid][0] & 0xffff) >> byte_offset) & mask; + } + + } else if (base_offset >= GICD_ICFGR(1) && base_offset < GICD_SGIR(0)) { + + /* shared configure irq */ + if (offset & 2) { + *rval = (vgic_dist_conf_expand(dist->irq_conf_shr[(base_offset - GICD_ICFGR(1)) / sizeof(uint32_t) / 2] >> 16) >> byte_offset) & mask; + } else { + *rval = (vgic_dist_conf_expand(dist->irq_conf_shr[(base_offset - GICD_ICFGR(1)) / sizeof(uint32_t) / 2] & 0xffff) >> byte_offset) & mask; + } + + } + + printf("%s on cpu: %d with gpa: %llx size: %x\n", __func__, vcpuid, gpa, size); + return (0); +} + +static int +vgic_dist_mmio_write(void *vm, int vcpuid, uint64_t gpa, uint64_t val, int size, + void *arg) +{ + uint64_t offset; + uint64_t base_offset; + uint64_t byte_offset; + uint64_t mask; + struct hyp *hyp; + struct vgic_distributor *dist; + + hyp = vm_get_cookie(vm); + dist = &hyp->vgic_distributor; + + offset = gpa - dist->distributor_base; + base_offset = offset - (offset & 3); + byte_offset = (offset - base_offset) * 8; + mask = (1 << size * 8) - 1; + + if (base_offset >= GICD_CTLR && base_offset < GICD_TYPER) { + + dist->enabled = ((val & mask) << byte_offset) & 1; + + } else if (base_offset >= GICD_IGROUPR(0) && base_offset < GICD_ISENABLER(0)) { + /* irq group control is WI */ + } else if (base_offset >= GICD_ISENABLER(0) && base_offset < GICD_ISENABLER(1)) { + + /* private set-enable irq */ + dist->irq_enabled_prv[vcpuid][0] |= (val & mask) << byte_offset; + + } else if (base_offset >= GICD_ISENABLER(1) && base_offset < GICD_ICENABLER(0)) { + + /* shared set-enable irq */ + dist->irq_enabled_shr[(base_offset - GICD_ISENABLER(1)) / sizeof(uint32_t)] |= (val & mask) << byte_offset; + + } else if (base_offset >= GICD_ICENABLER(0) && base_offset < GICD_ICENABLER(1)) { + + /* private clear-enable irq */ + dist->irq_enabled_prv[vcpuid][0] &= ~((val & mask) << byte_offset); + vgic_retire_disabled_irqs(&hyp->ctx[vcpuid]); + + } else if (offset >= GICD_ICENABLER(1) && offset < GICD_ISPENDR(0)) { + + /* shared clear-enable irq */ + dist->irq_enabled_shr[(base_offset - GICD_ICENABLER(1)) / sizeof(uint32_t)] &= ~((val & mask) << byte_offset); + vgic_retire_disabled_irqs(&hyp->ctx[vcpuid]); + + } else if (base_offset >= GICD_ISPENDR(0) && base_offset < GICD_ISPENDR(1)) { + + /* private set-pending irq */ + dist->irq_state_prv[vcpuid][0] |= (val & mask) << byte_offset; + + } else if (base_offset >= GICD_ISPENDR(1) && base_offset < GICD_ICPENDR(0)) { + + /* shared set-pending irq */ + dist->irq_state_shr[(base_offset - GICD_ISPENDR(1)) / sizeof(uint32_t)] |= (val & mask) << byte_offset; + + } else if (base_offset >= GICD_ICPENDR(0) && base_offset < GICD_ICPENDR(1)) { + + /* private clear-pending irq */ + dist->irq_state_prv[vcpuid][0] &= ~((val & mask) << byte_offset); + + } else if (base_offset >= GICD_ICPENDR(1) && base_offset < GICD_ICACTIVER(0)) { + + /* shared clear-pending irq */ + dist->irq_state_shr[(base_offset - GICD_ICPENDR(1)) / sizeof(uint32_t)] &= ~((val & mask) << byte_offset); + + } else if (base_offset >= GICD_ISACTIVER(0) && base_offset < GICD_IPRIORITYR(0)) { + /* active irq is WI */ + } else if (base_offset >= GICD_ITARGETSR(0) && base_offset < GICD_ITARGETSR(8)) { + /* target for banked interrupts is WI */ + } else if (base_offset >= GICD_ITARGETSR(8) && base_offset < GICD_ICFGR(0)) { + + /* target for shared irqs */ + dist->irq_target_shr[(base_offset - GICD_ITARGETSR(8)) / sizeof(uint32_t)] = + (dist->irq_target_shr[(base_offset - GICD_ITARGETSR(8)) / sizeof(uint32_t)] & ~(mask << byte_offset)) + | ((val & mask) << byte_offset); + + } else if (base_offset >= GICD_ICFGR(0) && base_offset < GICD_ICFGR(1)) { + + /* private configure irq */ + if (offset < 4) { + dist->irq_conf_prv[vcpuid][0] |= ~0U; + goto end; + } + + if (offset & 2) { + val = (vgic_dist_conf_expand(dist->irq_conf_prv[vcpuid][0] >> 16) & ~(mask << byte_offset)) + | ((val & mask) << byte_offset); + val = vgic_dist_conf_compress(val); + dist->irq_conf_prv[vcpuid][0] &= 0xffff; + dist->irq_conf_prv[vcpuid][0] |= val << 16; + + } else { + val = (vgic_dist_conf_expand(dist->irq_conf_prv[vcpuid][0] & 0xffff) & ~(mask << byte_offset)) + | ((val & mask) << byte_offset); + val = vgic_dist_conf_compress(val); + dist->irq_conf_prv[vcpuid][0] &= 0xffff << 16; + dist->irq_conf_prv[vcpuid][0] |= val; + } + + } else if (base_offset >= GICD_ICFGR(1) && base_offset < GICD_SGIR(0)) { + + /* shared configure irq */ + if (offset < 4) { + dist->irq_conf_shr[(base_offset - GICD_ICFGR(1)) / sizeof(uint32_t) / 2] |= ~0U; + goto end; + } + + if (offset & 2) { + val = (vgic_dist_conf_expand(dist->irq_conf_shr[(base_offset - GICD_ICFGR(1)) / sizeof(uint32_t) / 2] >> 16) & ~(mask << byte_offset)) + | ((val & mask) << byte_offset); + val = vgic_dist_conf_compress(val); + dist->irq_conf_shr[(base_offset - GICD_ICFGR(1)) / sizeof(uint32_t) / 2] &= 0xffff; + dist->irq_conf_shr[(base_offset - GICD_ICFGR(1)) / sizeof(uint32_t) / 2] |= val << 16; + } else { + val = (vgic_dist_conf_expand(dist->irq_conf_shr[(base_offset - GICD_ICFGR(1)) / sizeof(uint32_t) / 2] & 0xffff) & ~(mask << byte_offset)) + | ((val & mask) << byte_offset); + val = vgic_dist_conf_compress(val); + dist->irq_conf_shr[(base_offset - GICD_ICFGR(1)) / sizeof(uint32_t) / 2] &= 0xffff << 16; + dist->irq_conf_shr[(base_offset - GICD_ICFGR(1)) / sizeof(uint32_t) / 2] |= val; + } + + } else if (base_offset >= GICD_SGIR(0) && base_offset < GICD_SGIR(1)) { + + dist->sgir = (dist->sgir & ~(mask << byte_offset)) | ((val & mask) << byte_offset); + vgic_dispatch_sgi(&hyp->ctx[vcpuid]); + + } + +end: + vgic_update_state(hyp); + + printf("%s on cpu: %d with gpa: %llx size: %x with val: %llx\n", __func__, vcpuid, gpa, size, val); + return (0); +} + +int +vgic_emulate_distributor(void *arg, int vcpuid, struct vm_exit *vme, bool *retu) +{ + struct hyp *hyp; + int error; + + hyp = arg; + + if (vme->u.inst_emul.gpa < hyp->vgic_distributor.distributor_base || + vme->u.inst_emul.gpa > hyp->vgic_distributor.distributor_base + PAGE_SIZE || + !hyp->vgic_attached) { + + *retu = true; + return (0); + } + + *retu = false; + error = vmm_emulate_instruction(hyp->vm, vcpuid, vme->u.inst_emul.gpa, &vme->u.inst_emul.vie, + vgic_dist_mmio_read, vgic_dist_mmio_write, retu); + + return (error); +} + +int +vgic_attach(void *arg, uint64_t distributor_paddr, uint64_t cpu_int_paddr) +{ + struct hyp *hyp; + struct hypctx *hypctx; + int i; + + hyp = arg; + + /* + * Set the distributor address which will be + * emulated using the MMIO infrasctructure + * */ + hyp->vgic_distributor.distributor_base = distributor_paddr; + hyp->vgic_distributor.cpu_int_base = cpu_int_paddr; + hyp->vgic_attached = true; + /* + * Set the Virtual Interface Control address to + * save/restore registers at context switch. + * Also set the number of LRs + * */ + for (i = 0; i < VM_MAXCPU; i++) { + hypctx = &hyp->ctx[i]; + hypctx->vgic_cpu_int.virtual_int_ctrl = virtual_int_ctrl_vaddr; + hypctx->vgic_cpu_int.lr_num = lr_num; + hypctx->vgic_cpu_int.hcr = GICH_HCR_EN; + hypctx->vgic_cpu_int.vmcr = 0; + } + + /* Map the CPU Interface over the Virtual CPU Interface */ + lpae_vmmmap_set(arg, + (lpae_vm_vaddr_t)cpu_int_paddr, + (lpae_vm_paddr_t)virtual_cpu_int_paddr, + virtual_cpu_int_size, + VM_PROT_READ | VM_PROT_WRITE); + + return (0); +} + +static int +vgic_bitmap_get_irq_val(uint32_t *irq_prv, uint32_t *irq_shr, int irq) +{ + if (irq < VGIC_NR_PRV_IRQ) + return test_bit(irq, irq_prv); + + return test_bit(irq - VGIC_NR_PRV_IRQ, irq_shr); +} + +static void +vgic_bitmap_set_irq_val(uint32_t *irq_prv, uint32_t *irq_shr, int irq, int val) +{ + uint32_t *reg; + + if (irq < VGIC_NR_PRV_IRQ) { + reg = irq_prv; + } else { + reg = irq_shr; + irq -= VGIC_NR_PRV_IRQ; + } + + if (val) + set_bit(irq, reg); + else + clear_bit(irq, reg); +} + +#define VGIC_CFG_LEVEL 0 +#define VGIC_CFG_EDGE 1 + +static bool +vgic_irq_is_edge(struct hypctx *hypctx, int irq) +{ + struct vgic_distributor *vgic_distributor = &hypctx->hyp->vgic_distributor; + int irq_val; + + irq_val = vgic_bitmap_get_irq_val(vgic_distributor->irq_conf_prv[hypctx->vcpu], + vgic_distributor->irq_conf_shr, irq); + return irq_val == VGIC_CFG_EDGE; +} + +static int +vgic_irq_is_enabled(struct hypctx *hypctx, int irq) +{ + struct vgic_distributor *vgic_distributor = &hypctx->hyp->vgic_distributor; + + return vgic_bitmap_get_irq_val(vgic_distributor->irq_enabled_prv[hypctx->vcpu], + vgic_distributor->irq_enabled_shr, irq); +} + +static int +vgic_irq_is_active(struct hypctx *hypctx, int irq) +{ + struct vgic_distributor *vgic_distributor = &hypctx->hyp->vgic_distributor; + + return vgic_bitmap_get_irq_val(vgic_distributor->irq_active_prv[hypctx->vcpu], + vgic_distributor->irq_active_shr, irq); +} + +static void +vgic_irq_set_active(struct hypctx *hypctx, int irq) +{ + struct vgic_distributor *vgic_distributor = &hypctx->hyp->vgic_distributor; + + vgic_bitmap_set_irq_val(vgic_distributor->irq_active_prv[hypctx->vcpu], + vgic_distributor->irq_active_shr, irq, 1); +} + +static void +vgic_irq_clear_active(struct hypctx *hypctx, int irq) +{ + struct vgic_distributor *vgic_distributor = &hypctx->hyp->vgic_distributor; + + vgic_bitmap_set_irq_val(vgic_distributor->irq_active_prv[hypctx->vcpu], + vgic_distributor->irq_active_shr, irq, 0); +} + +static int +vgic_dist_irq_is_pending(struct hypctx *hypctx, int irq) +{ + struct vgic_distributor *vgic_distributor = &hypctx->hyp->vgic_distributor; + + return vgic_bitmap_get_irq_val(vgic_distributor->irq_state_prv[hypctx->vcpu], + vgic_distributor->irq_state_shr, irq); +} + +static void +vgic_dist_irq_set(struct hypctx *hypctx, int irq) +{ + struct vgic_distributor *vgic_distributor = &hypctx->hyp->vgic_distributor; + + vgic_bitmap_set_irq_val(vgic_distributor->irq_state_prv[hypctx->vcpu], + vgic_distributor->irq_state_shr, irq, 1); +} + +static void +vgic_dist_irq_clear(struct hypctx *hypctx, int irq) +{ + struct vgic_distributor *vgic_distributor = &hypctx->hyp->vgic_distributor; + + vgic_bitmap_set_irq_val(vgic_distributor->irq_state_prv[hypctx->vcpu], + vgic_distributor->irq_state_shr, irq, 0); +} + +static void +vgic_cpu_irq_set(struct hypctx *hypctx, int irq) +{ + struct vgic_cpu_int *vgic_cpu_int = &hypctx->vgic_cpu_int; + + if (irq < VGIC_NR_PRV_IRQ) + set_bit(irq, vgic_cpu_int->pending_prv); + else + set_bit(irq - VGIC_NR_PRV_IRQ, vgic_cpu_int->pending_shr); +} + +static void +vgic_cpu_irq_clear(struct hypctx *hypctx, int irq) +{ + struct vgic_cpu_int *vgic_cpu_int = &hypctx->vgic_cpu_int; + + if (irq < VGIC_NR_PRV_IRQ) + clear_bit(irq, vgic_cpu_int->pending_prv); + else + clear_bit(irq - VGIC_NR_PRV_IRQ, vgic_cpu_int->pending_shr); +} + +static int +compute_pending_for_cpu(struct hyp *hyp, int vcpu) +{ + struct vgic_distributor *vgic_distributor = &hyp->vgic_distributor; + struct vgic_cpu_int *vgic_cpu_int = &hyp->ctx[vcpu].vgic_cpu_int; + + uint32_t *pending, *enabled, *pend_percpu, *pend_shared, *target; + uint32_t pending_private, pending_shared; + + pend_percpu = vgic_cpu_int->pending_prv; + pend_shared = vgic_cpu_int->pending_shr; + + pending = vgic_distributor->irq_state_prv[vcpu]; + enabled = vgic_distributor->irq_enabled_prv[vcpu]; + bitmap_and(pend_percpu, pending, enabled, VGIC_NR_PRV_IRQ); + + pending = vgic_distributor->irq_state_shr; + enabled = vgic_distributor->irq_enabled_shr; + target = vgic_distributor->irq_target_shr; + bitmap_and(pend_shared, pending, enabled, VGIC_NR_SHR_IRQ); + bitmap_and(pend_shared, pend_shared, target, VGIC_NR_SHR_IRQ); + + pending_private = find_first_bit(pend_percpu, VGIC_NR_PRV_IRQ); + pending_shared = find_first_bit(pend_shared, VGIC_NR_SHR_IRQ); + return (pending_private < VGIC_NR_PRV_IRQ || + pending_shared < VGIC_NR_SHR_IRQ); +} + +static void +vgic_dispatch_sgi(struct hypctx *hypctx) +{ + struct vgic_distributor *vgic_distributor = &hypctx->hyp->vgic_distributor; + // TODO Get actual number of cpus on current machine + int vcpu_num = VM_MAXCPU; + int sgi, mode, cpu; + uint8_t targets; + + sgi = vgic_distributor->sgir & 0xf; + targets = (vgic_distributor->sgir >> 16) & 0xff; + mode = (vgic_distributor->sgir >> 24) & 3; + + switch (mode) { + case 0: + if (!targets) + return; + + case 1: + targets = ((1 << vcpu_num) - 1) & ~(1 << hypctx->vcpu) & 0xff; + break; + + case 2: + targets = 1 << hypctx->vcpu; + break; + } + + for (cpu = 0; cpu < vcpu_num; ++cpu) { + if ((targets >> cpu) & 1) { + vgic_dist_irq_set(hypctx, sgi); + vgic_distributor->irq_sgi_source[cpu][sgi] |= 1 << hypctx->vcpu; + //printf("SGI%d from CPU%d to CPU%d\n", sgi, vcpu_id, c); + } + } +} + +static void +vgic_update_state(struct hyp *hyp) +{ + struct vgic_distributor *vgic_distributor = &hyp->vgic_distributor; + int cpu; + + //mtx_lock_spin(&vgic_distributor->distributor_lock); + + if (!vgic_distributor->enabled) { + set_bit(0, &vgic_distributor->irq_pending_on_cpu); + goto end; + } + + // TODO Get actual number of cpus on current machine + for (cpu = 0; cpu < VM_MAXCPU; ++cpu) { + if (compute_pending_for_cpu(hyp, cpu)) { + printf("CPU%d has pending interrupts\n", cpu); + set_bit(cpu, &vgic_distributor->irq_pending_on_cpu); + } + } + +end: + ;//mtx_unlock_spin(&vgic_distributor->distributor_lock); +} + +#define LR_CPUID(lr) \ + (((lr) & GICH_LR_PHYSID_CPUID) >> GICH_LR_PHYSID_CPUID_SHIFT) +#define MK_LR_PEND(src, irq) \ + (GICH_LR_PENDING | ((src) << GICH_LR_PHYSID_CPUID_SHIFT) | (irq)) + +static void +vgic_retire_disabled_irqs(struct hypctx *hypctx) +{ + struct vgic_cpu_int *vgic_cpu_int = &hypctx->vgic_cpu_int; + int lr_idx; + + for_each_set_bit(lr_idx, (uint32_t *)vgic_cpu_int->lr_used, + vgic_cpu_int->lr_num) { + + int irq = vgic_cpu_int->lr[lr_idx] & GICH_LR_VIRTID; + + if (!vgic_irq_is_enabled(hypctx, irq)) { + vgic_cpu_int->irq_to_lr[irq] = LR_EMPTY; + clear_bit(lr_idx, (uint32_t *)vgic_cpu_int->lr_used); + vgic_cpu_int->lr[lr_idx] &= ~GICH_LR_STATE; + if (vgic_irq_is_active(hypctx, irq)) + vgic_irq_clear_active(hypctx, irq); + } + } +} + +static bool +vgic_queue_irq(struct hypctx *hypctx, uint8_t sgi_source_cpu, int irq) +{ + struct vgic_cpu_int *vgic_cpu_int = &hypctx->vgic_cpu_int; + int lr_idx; + + //printf("Queue IRQ%d\n", irq); + + lr_idx = vgic_cpu_int->irq_to_lr[irq]; + + if (lr_idx != LR_EMPTY && + (LR_CPUID(vgic_cpu_int->lr[lr_idx]) == sgi_source_cpu)) { + + //printf("LR%d piggyback for IRQ%d %x\n", lr, irq, vgic_cpu->vgic_lr[lr]); + + vgic_cpu_int->lr[lr_idx] |= GICH_LR_PENDING; + + goto end; + } + + lr_idx = find_first_zero_bit((uint32_t *)vgic_cpu_int->lr_used, + vgic_cpu_int->lr_num); + if (lr_idx >= vgic_cpu_int->lr_num) + return false; + + //printf("LR%d allocated for IRQ%d %x\n", lr, irq, sgi_source_id); + vgic_cpu_int->lr[lr_idx] = MK_LR_PEND(sgi_source_cpu, irq); + vgic_cpu_int->irq_to_lr[irq] = lr_idx; + set_bit(lr_idx, (uint32_t *)vgic_cpu_int->lr_used); + +end: + if (!vgic_irq_is_edge(hypctx, irq)) + vgic_cpu_int->lr[lr_idx] |= GICH_LR_EOI; + + return true; +} + +static bool +vgic_queue_sgi(struct hypctx *hypctx, int irq) +{ + struct vgic_distributor *vgic_distributor = &hypctx->hyp->vgic_distributor; + uint8_t source, cpu; + + source = vgic_distributor->irq_sgi_source[hypctx->vcpu][irq]; + + for_each_set_bit(cpu, (uint32_t *)&source, VGIC_MAXCPU) { + if (vgic_queue_irq(hypctx, cpu, irq)) + clear_bit(cpu, (uint32_t *)&source); + } + + vgic_distributor->irq_sgi_source[hypctx->vcpu][irq] = source; + + if (!source) { + vgic_dist_irq_clear(hypctx, irq); + vgic_cpu_irq_clear(hypctx, irq); + return true; + } + + return false; +} + +static bool +vgic_queue_hwirq(struct hypctx *hypctx, int irq) +{ + if (vgic_irq_is_active(hypctx, irq)) + return true; /* already queued */ + + if (vgic_queue_irq(hypctx, 0, irq)) { + if (vgic_irq_is_edge(hypctx, irq)) { + vgic_dist_irq_clear(hypctx, irq); + vgic_cpu_irq_clear(hypctx, irq); + } else { + vgic_irq_set_active(hypctx, irq); + } + + return true; + } + + return false; +} + +static bool +vgic_process_maintenance(struct hypctx *hypctx) +{ + struct vgic_cpu_int *vgic_cpu_int = &hypctx->vgic_cpu_int; + int lr_idx, irq; + bool level_pending = false; + + //printf("MISR = %08x\n", vgic_cpu_int->misr); + + if (vgic_cpu_int->misr & GICH_MISR_EOI) { + + for_each_set_bit(lr_idx, (uint32_t *)&vgic_cpu_int->eisr, + vgic_cpu_int->lr_num) { + + irq = vgic_cpu_int->lr[lr_idx] & GICH_LR_VIRTID; + + vgic_irq_clear_active(hypctx, irq); + vgic_cpu_int->lr[lr_idx] &= ~GICH_LR_EOI; + + if (vgic_dist_irq_is_pending(hypctx, irq)) { + vgic_cpu_irq_set(hypctx, irq); + level_pending = true; + } else { + vgic_cpu_irq_clear(hypctx, irq); + } + } + } + + if (vgic_cpu_int->misr & GICH_MISR_U) + vgic_cpu_int->hcr &= ~GICH_HCR_UIE; + + return level_pending; +} + +void +vgic_flush_hwstate(void *arg) +{ + struct hypctx *hypctx; + struct vgic_cpu_int *vgic_cpu_int; + struct vgic_distributor *vgic_distributor; + int i, overflow = 0; + + hypctx = arg; + vgic_cpu_int = &hypctx->vgic_cpu_int; + vgic_distributor = &hypctx->hyp->vgic_distributor; + + //printf("vgic_flush_hwstate\n"); + + //mtx_lock_spin(&vgic_distributor->distributor_lock); + + if (!vgic_vcpu_pending_irq(hypctx)) { + //printf("CPU%d has no pending interrupt\n", hypctx->vcpu); + goto end; + } + + /* SGIs */ + for_each_set_bit(i, vgic_cpu_int->pending_prv, VGIC_NR_SGI) { + if (!vgic_queue_sgi(hypctx, i)) + overflow = 1; + } + + /* PPIs */ + for_each_set_bit_from(i, vgic_cpu_int->pending_prv, VGIC_NR_PRV_IRQ) { + if (!vgic_queue_hwirq(hypctx, i)) + overflow = 1; + } + + /* SPIs */ + for_each_set_bit(i, vgic_cpu_int->pending_shr, VGIC_NR_SHR_IRQ) { + if (!vgic_queue_hwirq(hypctx, i + VGIC_NR_PRV_IRQ)) + overflow = 1; + } + +end: + if (overflow) { + vgic_cpu_int->hcr |= GICH_HCR_UIE; + } else { + vgic_cpu_int->hcr &= ~GICH_HCR_UIE; + clear_bit(hypctx->vcpu, &vgic_distributor->irq_pending_on_cpu); + } + //mtx_unlock_spin(&vgic_distributor->distributor_lock); +} + +void +vgic_sync_hwstate(void *arg) +{ + struct hypctx *hypctx; + struct vgic_cpu_int *vgic_cpu_int; + struct vgic_distributor *vgic_distributor; + int lr_idx, pending, irq; + bool level_pending; + + hypctx = arg; + vgic_cpu_int = &hypctx->vgic_cpu_int; + vgic_distributor = &hypctx->hyp->vgic_distributor; + + //printf("vgic_sync_hwstate\n"); + + level_pending = vgic_process_maintenance(hypctx); + + for_each_set_bit(lr_idx, (uint32_t *)&vgic_cpu_int->elsr, + vgic_cpu_int->lr_num) { + + if (!test_and_clear_bit(lr_idx, (uint32_t *)vgic_cpu_int->lr_used)) + continue; + + irq = vgic_cpu_int->lr[lr_idx] & GICH_LR_VIRTID; + vgic_cpu_int->irq_to_lr[irq] = LR_EMPTY; + } + + pending = find_first_zero_bit((uint32_t *)&vgic_cpu_int->elsr, + vgic_cpu_int->lr_num); + if (level_pending || pending < vgic_cpu_int->lr_num) + set_bit(hypctx->vcpu, &vgic_distributor->irq_pending_on_cpu); +} + +int +vgic_vcpu_pending_irq(void *arg) +{ + struct hypctx *hypctx; + struct vgic_distributor *vgic_distributor; + + hypctx = arg; + vgic_distributor = &hypctx->hyp->vgic_distributor; + + return test_bit(hypctx->vcpu, &vgic_distributor->irq_pending_on_cpu); +} + +static int +vgic_maintenance_intr(void *arg) +{ + + static struct arm_gic_softc *sc; + int maintenance_intr; + + sc = (struct arm_gic_softc *)arg; + + maintenance_intr = gic_h_read_4(sc, GICH_MISR); + + //printf("%s: %x\n",__func__, maintenance_intr); + + return (FILTER_HANDLED); +} + +int +vgic_hyp_init(void) +{ + int error; + + if (!(gic_sc = get_arm_gic_sc())) { + //printf("vgic_hyp_init: GIC no present\n"); + return (ENXIO); + } + + if (bus_alloc_resources(gic_sc->gic_dev, arm_vgic_spec, arm_vgic_res)) { + //printf("vgic_hyp_init: Could not allocate IRQ resource\n"); + + return (ENXIO); + } + + if (arm_vgic_res[VIRTUAL_INTERFACE_CONTROL] == NULL || + arm_vgic_res[VIRTUAL_CPU_INTERFACE] == NULL) { + printf("vgic_hyp_init: Virtual CPU interface control" + " and registers not present in DTS\n"); + return (ENXIO); + } + + /* Virtual Interface Control */ + gic_sc->gic_h_bst = rman_get_bustag(arm_vgic_res[VIRTUAL_INTERFACE_CONTROL]); + gic_sc->gic_h_bsh = rman_get_bushandle(arm_vgic_res[VIRTUAL_INTERFACE_CONTROL]); + virtual_int_ctrl_vaddr = (uint64_t)rman_get_virtual(arm_vgic_res[VIRTUAL_INTERFACE_CONTROL]); + virtual_int_ctrl_paddr = (uint64_t)rman_get_start(arm_vgic_res[VIRTUAL_INTERFACE_CONTROL]); + virtual_int_ctrl_size = rman_get_size(arm_vgic_res[VIRTUAL_INTERFACE_CONTROL]); + + /* Virtual CPU Interface */ + virtual_cpu_int_paddr = rman_get_start(arm_vgic_res[VIRTUAL_CPU_INTERFACE]); + virtual_cpu_int_size = rman_get_size(arm_vgic_res[VIRTUAL_CPU_INTERFACE]); + + lr_num = (gic_h_read_4(gic_sc, GICH_VTR) & 0x3f) + 1; + + + /* Register the vGIC maintenance interrupt */ + error = bus_setup_intr(gic_sc->gic_dev, arm_vgic_res[MAINTENANCE_INTR], INTR_TYPE_CLK, + vgic_maintenance_intr, NULL, gic_sc, &arm_vgic_maintenance_intr_ihl[0]); + if (error) { + //printf("vgic_hyp_init: Unable to setup maintenance interrupt\n"); + return (ENXIO); + } + + lpae_vmmmap_set(NULL, + (lpae_vm_vaddr_t)virtual_int_ctrl_vaddr, + (lpae_vm_paddr_t)virtual_int_ctrl_paddr, + virtual_int_ctrl_size, + VM_PROT_READ | VM_PROT_WRITE); + + return (0); +} Index: sys/arm/vmm/vmm.c =================================================================== --- sys/arm/vmm/vmm.c +++ sys/arm/vmm/vmm.c @@ -0,0 +1,672 @@ +/* + * Copyright (C) 2015 Mihai Carabas + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include +#include + +#include "vmm_stat.h" +#include "vmm_mem.h" +#include "mmu.h" +#include "vgic.h" + +struct vcpu { + int flags; + enum vcpu_state state; + struct mtx mtx; + int hostcpu; /* host cpuid this vcpu last ran on */ + int vcpuid; + void *stats; + struct vm_exit exitinfo; + uint64_t nextpc; /* (x) next instruction to execute */ +}; + +#define vcpu_lock_initialized(v) mtx_initialized(&((v)->mtx)) +#define vcpu_lock_init(v) mtx_init(&((v)->mtx), "vcpu lock", 0, MTX_SPIN) +#define vcpu_lock(v) mtx_lock_spin(&((v)->mtx)) +#define vcpu_unlock(v) mtx_unlock_spin(&((v)->mtx)) +#define vcpu_assert_locked(v) mtx_assert(&((v)->mtx), MA_OWNED) + +struct mem_seg { + uint64_t gpa; + size_t len; + boolean_t wired; + vm_object_t object; +}; +#define VM_MAX_MEMORY_SEGMENTS 2 + +struct vm { + void *cookie; /* processor-specific data */ + struct vcpu vcpu[VM_MAXCPU]; + int num_mem_segs; + struct vm_memory_segment mem_segs[VM_MAX_MEMORY_SEGMENTS]; + char name[VM_MAX_NAMELEN]; + + /* + * Set of active vcpus. + * An active vcpu is one that has been started implicitly (BSP) or + * explicitly (AP) by sending it a startup ipi. + */ + cpuset_t active_cpus; +}; + + +static int vmm_initialized; + +static struct vmm_ops *ops; +#define VMM_INIT(num) (ops != NULL ? (*ops->init)(num) : 0) +#define VMM_CLEANUP() (ops != NULL ? (*ops->cleanup)() : 0) + +#define VMINIT(vm) (ops != NULL ? (*ops->vminit)(vm, NULL): NULL) +#define VMRUN(vmi, vcpu, pc, pmap, rptr, sptr) \ + (ops != NULL ? (*ops->vmrun)(vmi, vcpu, pc, pmap, rptr, sptr) : ENXIO) +#define VMCLEANUP(vmi) (ops != NULL ? (*ops->vmcleanup)(vmi) : NULL) +#define VMMMAP_SET(vmi, gpa, hpa, len, prot) \ + (ops != NULL ? \ + (*ops->vmmapset)(vmi, gpa, hpa, len, prot) : ENXIO) +#define VMMMAP_GET(vmi, gpa) \ + (ops != NULL ? (*ops->vmmapget)(vmi, gpa) : ENXIO) +#define VMGETREG(vmi, vcpu, num, retval) \ + (ops != NULL ? (*ops->vmgetreg)(vmi, vcpu, num, retval) : ENXIO) +#define VMSETREG(vmi, vcpu, num, val) \ + (ops != NULL ? (*ops->vmsetreg)(vmi, vcpu, num, val) : ENXIO) +#define VMGETCAP(vmi, vcpu, num, retval) \ + (ops != NULL ? (*ops->vmgetcap)(vmi, vcpu, num, retval) : ENXIO) +#define VMSETCAP(vmi, vcpu, num, val) \ + (ops != NULL ? (*ops->vmsetcap)(vmi, vcpu, num, val) : ENXIO) + +#define fpu_start_emulating() load_cr0(rcr0() | CR0_TS) +#define fpu_stop_emulating() clts() + +static MALLOC_DEFINE(M_VM, "vm", "vm"); + +/* statistics */ +static VMM_STAT(VCPU_TOTAL_RUNTIME, "vcpu total runtime"); + +SYSCTL_NODE(_hw, OID_AUTO, vmm, CTLFLAG_RW, NULL, NULL); + +/* + * Halt the guest if all vcpus are executing a HLT instruction with + * interrupts disabled. + */ +static int halt_detection_enabled = 1; +SYSCTL_INT(_hw_vmm, OID_AUTO, halt_detection, CTLFLAG_RDTUN, + &halt_detection_enabled, 0, + "Halt VM if all vcpus execute HLT with interrupts disabled"); + +static int vmm_ipinum; +SYSCTL_INT(_hw_vmm, OID_AUTO, ipinum, CTLFLAG_RD, &vmm_ipinum, 0, + "IPI vector used for vcpu notifications"); + +static int trace_guest_exceptions; +SYSCTL_INT(_hw_vmm, OID_AUTO, trace_guest_exceptions, CTLFLAG_RDTUN, + &trace_guest_exceptions, 0, + "Trap into hypervisor on all guest exceptions and reflect them back"); + +static void +vcpu_cleanup(struct vm *vm, int i, bool destroy) +{ +// struct vcpu *vcpu = &vm->vcpu[i]; + +} +static void +vcpu_init(struct vm *vm, uint32_t vcpu_id) +{ + struct vcpu *vcpu; + + vcpu = &vm->vcpu[vcpu_id]; + + vcpu_lock_init(vcpu); + vcpu->hostcpu = NOCPU; + vcpu->vcpuid = vcpu_id; +} + +struct vm_exit * +vm_exitinfo(struct vm *vm, int cpuid) +{ + struct vcpu *vcpu; + + if (cpuid < 0 || cpuid >= VM_MAXCPU) + panic("vm_exitinfo: invalid cpuid %d", cpuid); + + vcpu = &vm->vcpu[cpuid]; + + return (&vcpu->exitinfo); +} + +static int +vmm_init(void) +{ + ops = &vmm_ops_arm; + + return (VMM_INIT(0)); +} + +static int +vmm_handler(module_t mod, int what, void *arg) +{ + int error; + + switch (what) { + case MOD_LOAD: + vmmdev_init(); + error = vmm_init(); + if (error == 0) + vmm_initialized = 1; + break; + case MOD_UNLOAD: + error = vmmdev_cleanup(); + if (error == 0 && vmm_initialized) { + error = VMM_CLEANUP(); + if (error) + vmm_initialized = 0; + } + break; + default: + error = 0; + break; + } + return (error); +} + +static moduledata_t vmm_kmod = { + "vmm", + vmm_handler, + NULL +}; + +/* + * vmm initialization has the following dependencies: + * + * - HYP initialization requires smp_rendezvous() and therefore must happen + * after SMP is fully functional (after SI_SUB_SMP). + */ +DECLARE_MODULE(vmm, vmm_kmod, SI_SUB_SMP + 1, SI_ORDER_ANY); +MODULE_VERSION(vmm, 1); + +int +vm_create(const char *name, struct vm **retvm) +{ + int i; + struct vm *vm; + uint64_t maxaddr; + + const int BSP = 0; + + /* + * If vmm.ko could not be successfully initialized then don't attempt + * to create the virtual machine. + */ + if (!vmm_initialized) + return (ENXIO); + + if (name == NULL || strlen(name) >= VM_MAX_NAMELEN) + return (EINVAL); + + vm = malloc(sizeof(struct vm), M_VM, M_WAITOK | M_ZERO); + strcpy(vm->name, name); + vm->cookie = VMINIT(vm); + + /* TEMP - PL804 timer mapping */ + VMMMAP_SET(vm->cookie, 0x1c110000, 0x1c110000, PAGE_SIZE, + VM_PROT_ALL); + + for (i = 0; i < VM_MAXCPU; i++) { + vcpu_init(vm, i); + } + + maxaddr = vmm_mem_maxaddr(); + vm_activate_cpu(vm, BSP); + + *retvm = vm; + return (0); +} + +static void +vm_cleanup(struct vm *vm, bool destroy) +{ + VMCLEANUP(vm->cookie); +} + +void +vm_destroy(struct vm *vm) +{ + vm_cleanup(vm, true); + free(vm, M_VM); +} + +const char * +vm_name(struct vm *vm) +{ + return (vm->name); +} + +int +vm_run(struct vm *vm, struct vm_run *vmrun) +{ + int error, vcpuid; + uint32_t pc; + struct vcpu *vcpu; + struct vm_exit *vme; + bool retu; + void *rptr = NULL, *sptr = NULL; + + vcpuid = vmrun->cpuid; + pc = vmrun->pc; + + if (vcpuid < 0 || vcpuid >= VM_MAXCPU) + return (EINVAL); + + if (!CPU_ISSET(vcpuid, &vm->active_cpus)) + return (EINVAL); + + vcpu = &vm->vcpu[vcpuid]; + vme = &vcpu->exitinfo; + +// printf("%s vcpuid: %d, nextpc: %x\n",__func__, vcpuid, pc); + +restart: + critical_enter(); + + error = VMRUN(vm->cookie, vcpuid, pc, NULL, rptr, sptr); + +// printf("%s VMRUN error: %d\n",__func__, error); + + critical_exit(); + + if (error == 0) { + switch (vme->exitcode) { + case VM_EXITCODE_INST_EMUL: + /* Check if we need to do in-kernel emulation */ + + pc = vme->pc + vme->inst_length; + retu = true; + error = vgic_emulate_distributor(vm->cookie, vcpuid, vme, &retu); + break; + default: + retu = true; /* handled in userland */ + break; + } + } + + if (error == 0 && retu == false) + goto restart; + + /* copy the exit information */ + bcopy(vme, &vmrun->vm_exit, sizeof(struct vm_exit)); + + return (error); +} + +int +vm_activate_cpu(struct vm *vm, int vcpuid) +{ + + if (vcpuid < 0 || vcpuid >= VM_MAXCPU) + return (EINVAL); + + if (CPU_ISSET(vcpuid, &vm->active_cpus)) + return (EBUSY); + + CPU_SET_ATOMIC(vcpuid, &vm->active_cpus); + return (0); + +} + +cpuset_t +vm_active_cpus(struct vm *vm) +{ + + return (vm->active_cpus); +} + +void * +vcpu_stats(struct vm *vm, int vcpuid) +{ + + return (vm->vcpu[vcpuid].stats); +} + +static int +vcpu_set_state_locked(struct vcpu *vcpu, enum vcpu_state newstate, + bool from_idle) +{ + int error; + + vcpu_assert_locked(vcpu); + + /* + * State transitions from the vmmdev_ioctl() must always begin from + * the VCPU_IDLE state. This guarantees that there is only a single + * ioctl() operating on a vcpu at any point. + */ + if (from_idle) { + while (vcpu->state != VCPU_IDLE) + msleep_spin(&vcpu->state, &vcpu->mtx, "vmstat", hz); + } else { + KASSERT(vcpu->state != VCPU_IDLE, ("invalid transition from " + "vcpu idle state")); + } + + if (vcpu->state == VCPU_RUNNING) { + KASSERT(vcpu->hostcpu == curcpu, ("curcpu %d and hostcpu %d " + "mismatch for running vcpu", curcpu, vcpu->hostcpu)); + } else { + KASSERT(vcpu->hostcpu == NOCPU, ("Invalid hostcpu %d for a " + "vcpu that is not running", vcpu->hostcpu)); + } + + /* + * The following state transitions are allowed: + * IDLE -> FROZEN -> IDLE + * FROZEN -> RUNNING -> FROZEN + * FROZEN -> SLEEPING -> FROZEN + */ + switch (vcpu->state) { + case VCPU_IDLE: + case VCPU_RUNNING: + case VCPU_SLEEPING: + error = (newstate != VCPU_FROZEN); + break; + case VCPU_FROZEN: + error = (newstate == VCPU_FROZEN); + break; + default: + error = 1; + break; + } + + if (error) + return (EBUSY); + + vcpu->state = newstate; + if (newstate == VCPU_RUNNING) + vcpu->hostcpu = curcpu; + else + vcpu->hostcpu = NOCPU; + + if (newstate == VCPU_IDLE) + wakeup(&vcpu->state); + + return (0); +} + +int +vcpu_set_state(struct vm *vm, int vcpuid, enum vcpu_state newstate, + bool from_idle) +{ + int error; + struct vcpu *vcpu; + + if (vcpuid < 0 || vcpuid >= VM_MAXCPU) + panic("vm_set_run_state: invalid vcpuid %d", vcpuid); + + vcpu = &vm->vcpu[vcpuid]; + + vcpu_lock(vcpu); + error = vcpu_set_state_locked(vcpu, newstate, from_idle); + vcpu_unlock(vcpu); + + return (error); +} + +enum vcpu_state +vcpu_get_state(struct vm *vm, int vcpuid, int *hostcpu) +{ + struct vcpu *vcpu; + enum vcpu_state state; + + if (vcpuid < 0 || vcpuid >= VM_MAXCPU) + panic("vm_get_run_state: invalid vcpuid %d", vcpuid); + + vcpu = &vm->vcpu[vcpuid]; + + vcpu_lock(vcpu); + state = vcpu->state; + if (hostcpu != NULL) + *hostcpu = vcpu->hostcpu; + vcpu_unlock(vcpu); + + return (state); +} + +uint64_t +vm_gpa2hpa(struct vm *vm, uint64_t gpa, size_t len) +{ + uint64_t nextpage; + + nextpage = rounddown(gpa + PAGE_SIZE, PAGE_SIZE); + if (len > nextpage - gpa) + panic("vm_gpa2hpa: invalid gpa/len: 0x%016llx/%zu", gpa, len); + + return (VMMMAP_GET(vm->cookie, gpa)); +} + +int +vm_gpabase2memseg(struct vm *vm, uint64_t gpabase, + struct vm_memory_segment *seg) +{ + int i; + + for (i = 0; i < vm->num_mem_segs; i++) { + if (gpabase == vm->mem_segs[i].gpa) { + *seg = vm->mem_segs[i]; + return (0); + } + } + return (-1); +} + +int +vm_get_register(struct vm *vm, int vcpu, int reg, uint64_t *retval) +{ + + if (vcpu < 0 || vcpu >= VM_MAXCPU) + return (EINVAL); + + if (reg >= VM_REG_LAST) + return (EINVAL); + + return (VMGETREG(vm->cookie, vcpu, reg, retval)); +} + +int +vm_set_register(struct vm *vm, int vcpuid, int reg, uint64_t val) +{ + struct vcpu *vcpu; + int error; + + if (vcpuid < 0 || vcpuid >= VM_MAXCPU) + return (EINVAL); + + if (reg >= VM_REG_LAST) + return (EINVAL); + error = (VMSETREG(vm->cookie, vcpuid, reg, val)); + if (error || reg != VM_REG_GUEST_PC) + return (error); + + vcpu = &vm->vcpu[vcpuid]; + vcpu->nextpc = val; + + return(0); +} + +void * +vm_get_cookie(struct vm *vm) +{ + return vm->cookie; +} + +static void +vm_free_mem_seg(struct vm *vm, struct vm_memory_segment *seg) +{ + size_t len; + uint64_t hpa; + + len = 0; + while (len < seg->len) { + hpa = vm_gpa2hpa(vm, seg->gpa + len, PAGE_SIZE); + if (hpa == (uint64_t)-1) { + panic("vm_free_mem_segs: cannot free hpa " + "associated with gpa 0x%016llx", seg->gpa + len); + } + + vmm_mem_free(hpa, PAGE_SIZE); + + len += PAGE_SIZE; + } + + bzero(seg, sizeof(struct vm_memory_segment)); +} + + +/* + * Returns TRUE if 'gpa' is available for allocation and FALSE otherwise + */ +static boolean_t +vm_gpa_available(struct vm *vm, uint64_t gpa) +{ + int i; + uint64_t gpabase, gpalimit; + + if (gpa & PAGE_MASK) + panic("vm_gpa_available: gpa (0x%016llx) not page aligned", gpa); + + for (i = 0; i < vm->num_mem_segs; i++) { + gpabase = vm->mem_segs[i].gpa; + gpalimit = gpabase + vm->mem_segs[i].len; + if (gpa >= gpabase && gpa < gpalimit) + return (FALSE); + } + + return (TRUE); +} + +int +vm_malloc(struct vm *vm, uint64_t gpa, size_t len) +{ + int error, available, allocated; + struct vm_memory_segment *seg; + uint64_t g, hpa; + + if ((gpa & PAGE_MASK) || (len & PAGE_MASK) || len == 0) + return (EINVAL); + + available = allocated = 0; + g = gpa; + while (g < gpa + len) { + if (vm_gpa_available(vm, g)) + available++; + else + allocated++; + + g += PAGE_SIZE; + } + + /* + * If there are some allocated and some available pages in the address + * range then it is an error. + */ + if (allocated && available) + return (EINVAL); + + /* + * If the entire address range being requested has already been + * allocated then there isn't anything more to do. + */ + if (allocated && available == 0) + return (0); + + if (vm->num_mem_segs >= VM_MAX_MEMORY_SEGMENTS) + return (E2BIG); + + seg = &vm->mem_segs[vm->num_mem_segs]; + + error = 0; + seg->gpa = gpa; + seg->len = 0; + while (seg->len < len) { + hpa = vmm_mem_alloc(PAGE_SIZE); + if (hpa == 0) { + error = ENOMEM; + break; + } + + error = VMMMAP_SET(vm->cookie, gpa + seg->len, hpa, PAGE_SIZE, + VM_PROT_ALL); + if (error) + break; + + seg->len += PAGE_SIZE; + } + + if (error) { + vm_free_mem_seg(vm, seg); + return (error); + } + + vm->num_mem_segs++; + + return (0); +} + +int +vm_attach_vgic(struct vm *vm, uint64_t distributor_paddr, uint64_t cpu_int_paddr) +{ + return vgic_attach(vm->cookie, distributor_paddr, cpu_int_paddr); +} Index: sys/arm/vmm/vmm_dev.c =================================================================== --- sys/arm/vmm/vmm_dev.c +++ sys/arm/vmm/vmm_dev.c @@ -0,0 +1,391 @@ +/* + * Copyright (C) 2015 Mihai Carabas + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include +#include +#include + +struct vmmdev_softc { + struct vm *vm; /* vm instance cookie */ + struct cdev *cdev; + SLIST_ENTRY(vmmdev_softc) link; + int flags; +}; +#define VSC_LINKED 0x01 + +static SLIST_HEAD(, vmmdev_softc) head; + +static struct mtx vmmdev_mtx; + +static MALLOC_DEFINE(M_VMMDEV, "vmmdev", "vmmdev"); + +SYSCTL_DECL(_hw_vmm); + +static struct vmmdev_softc * +vmmdev_lookup(const char *name) +{ + struct vmmdev_softc *sc; + +#ifdef notyet /* XXX kernel is not compiled with invariants */ + mtx_assert(&vmmdev_mtx, MA_OWNED); +#endif + + SLIST_FOREACH(sc, &head, link) { + if (strcmp(name, vm_name(sc->vm)) == 0) + break; + } + + return (sc); +} + +static struct vmmdev_softc * +vmmdev_lookup2(struct cdev *cdev) +{ + + return (cdev->si_drv1); +} + +static int +vmmdev_rw(struct cdev *cdev, struct uio *uio, int flags) +{ + int error = 0; + + return (error); +} + +static int +vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag, + struct thread *td) +{ + int error, vcpu, state_changed; + struct vmmdev_softc *sc; + struct vm_run *vmrun; + struct vm_memory_segment *seg; + struct vm_register *vmreg; + struct vm_activate_cpu *vac; + struct vm_attach_vgic *vav; + + sc = vmmdev_lookup2(cdev); + if (sc == NULL) + return (ENXIO); + + error = 0; + vcpu = -1; + state_changed = 0; + + /* + * Some VMM ioctls can operate only on vcpus that are not running. + */ + switch (cmd) { + case VM_RUN: + case VM_GET_REGISTER: + case VM_SET_REGISTER: + /* + * XXX fragile, handle with care + * Assumes that the first field of the ioctl data is the vcpu. + */ + vcpu = *(int *)data; + if (vcpu < 0 || vcpu >= VM_MAXCPU) { + error = EINVAL; + goto done; + } + + error = vcpu_set_state(sc->vm, vcpu, VCPU_FROZEN, true); + if (error) + goto done; + + state_changed = 1; + break; + + case VM_MAP_MEMORY: + case VM_ATTACH_VGIC: + /* + * ioctls that operate on the entire virtual machine must + * prevent all vcpus from running. + */ + error = 0; + for (vcpu = 0; vcpu < VM_MAXCPU; vcpu++) { + error = vcpu_set_state(sc->vm, vcpu, VCPU_FROZEN, true); + if (error) + break; + } + + if (error) { + while (--vcpu >= 0) + vcpu_set_state(sc->vm, vcpu, VCPU_IDLE, false); + goto done; + } + + state_changed = 2; + break; + default: + break; + } + + switch(cmd) { + case VM_RUN: + vmrun = (struct vm_run *)data; + error = vm_run(sc->vm, vmrun); + break; + case VM_MAP_MEMORY: + seg = (struct vm_memory_segment *)data; + error = vm_malloc(sc->vm, seg->gpa, seg->len); + break; + case VM_GET_MEMORY_SEG: + seg = (struct vm_memory_segment *)data; + seg->len = 0; + (void)vm_gpabase2memseg(sc->vm, seg->gpa, seg); + error = 0; + break; + case VM_GET_REGISTER: + vmreg = (struct vm_register *)data; + error = vm_get_register(sc->vm, vmreg->cpuid, vmreg->regnum, + &vmreg->regval); + break; + case VM_SET_REGISTER: + vmreg = (struct vm_register *)data; + error = vm_set_register(sc->vm, vmreg->cpuid, vmreg->regnum, + vmreg->regval); + break; + case VM_ACTIVATE_CPU: + vac = (struct vm_activate_cpu *)data; + error = vm_activate_cpu(sc->vm, vac->vcpuid); + case VM_ATTACH_VGIC: + vav = (struct vm_attach_vgic *)data; + error = vm_attach_vgic(sc->vm, vav->distributor_paddr, + vav->cpu_int_paddr); + default: + error = ENOTTY; + break; + } + + if (state_changed == 1) { + vcpu_set_state(sc->vm, vcpu, VCPU_IDLE, false); + } else if (state_changed == 2) { + for (vcpu = 0; vcpu < VM_MAXCPU; vcpu++) + vcpu_set_state(sc->vm, vcpu, VCPU_IDLE, false); + } + +done: + /* Make sure that no handler returns a bogus value like ERESTART */ + KASSERT(error >= 0, ("vmmdev_ioctl: invalid error return %d", error)); + return (error); +} + +static int +vmmdev_mmap(struct cdev *cdev, vm_ooffset_t offset, vm_paddr_t *paddr, + int nprot, vm_memattr_t *memattr) +{ + int error; + struct vmmdev_softc *sc; + + error = -1; + mtx_lock(&vmmdev_mtx); + + sc = vmmdev_lookup2(cdev); + if (sc != NULL && (nprot & PROT_EXEC) == 0) { + *paddr = (vm_paddr_t) vm_gpa2hpa(sc->vm, (vm_paddr_t)offset, PAGE_SIZE); + if (*paddr != (vm_paddr_t)-1) + error = 0; + } + + mtx_unlock(&vmmdev_mtx); + + return (error); +} + +static void +vmmdev_destroy(void *arg) +{ + + struct vmmdev_softc *sc = arg; + + if (sc->cdev != NULL) + destroy_dev(sc->cdev); + + if (sc->vm != NULL) + vm_destroy(sc->vm); + + if ((sc->flags & VSC_LINKED) != 0) { + mtx_lock(&vmmdev_mtx); + SLIST_REMOVE(&head, sc, vmmdev_softc, link); + mtx_unlock(&vmmdev_mtx); + } + + free(sc, M_VMMDEV); +} + +static int +sysctl_vmm_destroy(SYSCTL_HANDLER_ARGS) +{ + int error; + char buf[VM_MAX_NAMELEN]; + struct vmmdev_softc *sc; + struct cdev *cdev; + + strlcpy(buf, "beavis", sizeof(buf)); + error = sysctl_handle_string(oidp, buf, sizeof(buf), req); + if (error != 0 || req->newptr == NULL) + return (error); + + mtx_lock(&vmmdev_mtx); + sc = vmmdev_lookup(buf); + if (sc == NULL || sc->cdev == NULL) { + mtx_unlock(&vmmdev_mtx); + return (EINVAL); + } + + /* + * The 'cdev' will be destroyed asynchronously when 'si_threadcount' + * goes down to 0 so we should not do it again in the callback. + */ + cdev = sc->cdev; + sc->cdev = NULL; + mtx_unlock(&vmmdev_mtx); + + /* + * Schedule the 'cdev' to be destroyed: + * + * - any new operations on this 'cdev' will return an error (ENXIO). + * + * - when the 'si_threadcount' dwindles down to zero the 'cdev' will + * be destroyed and the callback will be invoked in a taskqueue + * context. + */ + destroy_dev_sched_cb(cdev, vmmdev_destroy, sc); + + return (0); +} +SYSCTL_PROC(_hw_vmm, OID_AUTO, destroy, CTLTYPE_STRING | CTLFLAG_RW, + NULL, 0, sysctl_vmm_destroy, "A", NULL); + +static struct cdevsw vmmdevsw = { + .d_name = "vmmdev", + .d_version = D_VERSION, + .d_ioctl = vmmdev_ioctl, + .d_mmap = vmmdev_mmap, + .d_read = vmmdev_rw, + .d_write = vmmdev_rw, +}; + +static int +sysctl_vmm_create(SYSCTL_HANDLER_ARGS) +{ + int error; + struct vm *vm; + struct cdev *cdev; + struct vmmdev_softc *sc, *sc2; + char buf[VM_MAX_NAMELEN]; + + strlcpy(buf, "beavis", sizeof(buf)); + error = sysctl_handle_string(oidp, buf, sizeof(buf), req); + if (error != 0 || req->newptr == NULL) + return (error); + + mtx_lock(&vmmdev_mtx); + sc = vmmdev_lookup(buf); + mtx_unlock(&vmmdev_mtx); + if (sc != NULL) + return (EEXIST); + + error = vm_create(buf, &vm); + if (error != 0) + return (error); + + sc = malloc(sizeof(struct vmmdev_softc), M_VMMDEV, M_WAITOK | M_ZERO); + sc->vm = vm; + + /* + * Lookup the name again just in case somebody sneaked in when we + * dropped the lock. + */ + mtx_lock(&vmmdev_mtx); + sc2 = vmmdev_lookup(buf); + if (sc2 == NULL) { + SLIST_INSERT_HEAD(&head, sc, link); + sc->flags |= VSC_LINKED; + } + mtx_unlock(&vmmdev_mtx); + + if (sc2 != NULL) { + vmmdev_destroy(sc); + return (EEXIST); + } + + error = make_dev_p(MAKEDEV_CHECKNAME, &cdev, &vmmdevsw, NULL, + UID_ROOT, GID_WHEEL, 0600, "vmm/%s", buf); + if (error != 0) { + vmmdev_destroy(sc); + return (error); + } + + mtx_lock(&vmmdev_mtx); + sc->cdev = cdev; + sc->cdev->si_drv1 = sc; + mtx_unlock(&vmmdev_mtx); + + return (0); +} +SYSCTL_PROC(_hw_vmm, OID_AUTO, create, CTLTYPE_STRING | CTLFLAG_RW, + NULL, 0, sysctl_vmm_create, "A", NULL); + +void +vmmdev_init(void) +{ + mtx_init(&vmmdev_mtx, "vmm device mutex", NULL, MTX_DEF); +} + +int +vmmdev_cleanup(void) +{ + int error; + + if (SLIST_EMPTY(&head)) + error = 0; + else + error = EBUSY; + + return (error); +} Index: sys/arm/vmm/vmm_instruction_emul.c =================================================================== --- sys/arm/vmm/vmm_instruction_emul.c +++ sys/arm/vmm/vmm_instruction_emul.c @@ -0,0 +1,73 @@ +/* + * Copyright (C) 2015 Mihai Carabas + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifdef _KERNEL +#include +#include +#include +#include + +#include + +#include + +#else +#include +#include +#include +#include +#include + +#include + +#include +#include +#endif + +#include + +int +vmm_emulate_instruction(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, + mem_region_read_t memread, mem_region_write_t memwrite, void *memarg) +{ + int error; + uint64_t val; + + if(vie->dir) { + error = vm_get_register(vm, vcpuid, vie->reg, &val); + if (error) + goto out; + + error = memwrite(vm, vcpuid, gpa, val, vie->access_size, memarg); + } else { + error = memread(vm, vcpuid, gpa, &val, vie->access_size, memarg); + if (error) + goto out; + error = vm_set_register(vm, vcpuid, vie->reg, val); + } +out: + return (error); +} Index: sys/arm/vmm/vmm_mem.h =================================================================== --- sys/arm/vmm/vmm_mem.h +++ sys/arm/vmm/vmm_mem.h @@ -0,0 +1,35 @@ +/* + * Copyright (C) 2015 Mihai Carabas + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef _VMM_MEM_H_ +#define _VMM_MEM_H_ + +int vmm_mem_init(void); +vm_paddr_t vmm_mem_alloc(size_t size); +void vmm_mem_free(vm_paddr_t start, size_t size); +vm_paddr_t vmm_mem_maxaddr(void); + +#endif Index: sys/arm/vmm/vmm_mem.c =================================================================== --- sys/arm/vmm/vmm_mem.c +++ sys/arm/vmm/vmm_mem.c @@ -0,0 +1,131 @@ +/* + * Copyright (C) 2015 Mihai Carabas + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include +#include +#include +#include + +#include "vmm_mem.h" + +SYSCTL_DECL(_hw_vmm); + +static u_long pages_allocated; +SYSCTL_ULONG(_hw_vmm, OID_AUTO, pages_allocated, CTLFLAG_RD, + &pages_allocated, 0, "4KB pages allocated"); + +static void +update_pages_allocated(int howmany) +{ + pages_allocated += howmany; /* XXX locking? */ +} + +int +vmm_mem_init(void) +{ + + return (0); +} + +vm_paddr_t +vmm_mem_alloc(size_t size) +{ + + int flags; + vm_page_t m; + vm_paddr_t pa; + + if (size != PAGE_SIZE) + panic("vmm_mem_alloc: invalid allocation size %zu", size); + + flags = VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | + VM_ALLOC_ZERO; + + while (1) { + /* + * XXX need policy to determine when to back off the allocation + */ + m = vm_page_alloc(NULL, 0, flags); + if (m == NULL) + VM_WAIT; + else + break; + } + + pa = VM_PAGE_TO_PHYS(m); + + if ((m->flags & PG_ZERO) == 0) + pmap_zero_page(m); + + m->valid = VM_PAGE_BITS_ALL; + update_pages_allocated(1); + + return (pa); +} + +void +vmm_mem_free(vm_paddr_t base, size_t length) +{ + vm_page_t m; + + if (base & PAGE_MASK) { + panic("vmm_mem_free: base 0x%0x must be aligned on a " + "0x%0x boundary\n", base, PAGE_SIZE); + } + + if (length != PAGE_SIZE) + panic("vmm_mem_free: invalid length %zu", length); + + m = PHYS_TO_VM_PAGE(base); + m->wire_count--; + vm_page_free(m); + atomic_subtract_int(&vm_cnt.v_wire_count, 1); + + update_pages_allocated(-1); +} + +vm_paddr_t +vmm_mem_maxaddr(void) +{ + + return (ptoa(Maxmem)); +} Index: sys/arm/vmm/vmm_stat.h =================================================================== --- sys/arm/vmm/vmm_stat.h +++ sys/arm/vmm/vmm_stat.h @@ -0,0 +1,155 @@ +/* + * Copyright (C) 2015 Mihai Carabas + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef _VMM_STAT_H_ +#define _VMM_STAT_H_ + +struct vm; + +#define MAX_VMM_STAT_ELEMS 64 /* arbitrary */ + +enum vmm_stat_scope { + VMM_STAT_SCOPE_ANY, + VMM_STAT_SCOPE_INTEL, /* Intel VMX specific statistic */ + VMM_STAT_SCOPE_AMD, /* AMD SVM specific statistic */ +}; + +struct vmm_stat_type; +typedef void (*vmm_stat_func_t)(struct vm *vm, int vcpu, + struct vmm_stat_type *stat); + +struct vmm_stat_type { + int index; /* position in the stats buffer */ + int nelems; /* standalone or array */ + const char *desc; /* description of statistic */ + vmm_stat_func_t func; + enum vmm_stat_scope scope; +}; + +void vmm_stat_register(void *arg); + +#define VMM_STAT_FDEFINE(type, nelems, desc, func, scope) \ + struct vmm_stat_type type[1] = { \ + { -1, nelems, desc, func, scope } \ + }; \ + SYSINIT(type##_stat, SI_SUB_KLD, SI_ORDER_ANY, vmm_stat_register, type) + +#define VMM_STAT_DEFINE(type, nelems, desc, scope) \ + VMM_STAT_FDEFINE(type, nelems, desc, NULL, scope) + +#define VMM_STAT_DECLARE(type) \ + extern struct vmm_stat_type type[1] + +#define VMM_STAT(type, desc) \ + VMM_STAT_DEFINE(type, 1, desc, VMM_STAT_SCOPE_ANY) +#define VMM_STAT_INTEL(type, desc) \ + VMM_STAT_DEFINE(type, 1, desc, VMM_STAT_SCOPE_INTEL) +#define VMM_STAT_AMD(type, desc) \ + VMM_STAT_DEFINE(type, 1, desc, VMM_STAT_SCOPE_AMD) + +#define VMM_STAT_FUNC(type, desc, func) \ + VMM_STAT_FDEFINE(type, 1, desc, func, VMM_STAT_SCOPE_ANY) + +#define VMM_STAT_ARRAY(type, nelems, desc) \ + VMM_STAT_DEFINE(type, nelems, desc, VMM_STAT_SCOPE_ANY) + +void *vmm_stat_alloc(void); +void vmm_stat_init(void *vp); +void vmm_stat_free(void *vp); + +/* + * 'buf' should be at least fit 'MAX_VMM_STAT_TYPES' entries + */ +int vmm_stat_copy(struct vm *vm, int vcpu, int *num_stats, uint64_t *buf); +int vmm_stat_desc_copy(int index, char *buf, int buflen); + +static void __inline +vmm_stat_array_incr(struct vm *vm, int vcpu, struct vmm_stat_type *vst, + int statidx, uint64_t x) +{ +#ifdef VMM_KEEP_STATS + uint64_t *stats; + + stats = vcpu_stats(vm, vcpu); + + if (vst->index >= 0 && statidx < vst->nelems) + stats[vst->index + statidx] += x; +#endif +} + +static void __inline +vmm_stat_array_set(struct vm *vm, int vcpu, struct vmm_stat_type *vst, + int statidx, uint64_t val) +{ +#ifdef VMM_KEEP_STATS + uint64_t *stats; + + stats = vcpu_stats(vm, vcpu); + + if (vst->index >= 0 && statidx < vst->nelems) + stats[vst->index + statidx] = val; +#endif +} + +static void __inline +vmm_stat_incr(struct vm *vm, int vcpu, struct vmm_stat_type *vst, uint64_t x) +{ + +#ifdef VMM_KEEP_STATS + vmm_stat_array_incr(vm, vcpu, vst, 0, x); +#endif +} + +static void __inline +vmm_stat_set(struct vm *vm, int vcpu, struct vmm_stat_type *vst, uint64_t val) +{ + +#ifdef VMM_KEEP_STATS + vmm_stat_array_set(vm, vcpu, vst, 0, val); +#endif +} + +VMM_STAT_DECLARE(VCPU_MIGRATIONS); +VMM_STAT_DECLARE(VMEXIT_COUNT); +VMM_STAT_DECLARE(VMEXIT_EXTINT); +VMM_STAT_DECLARE(VMEXIT_HLT); +VMM_STAT_DECLARE(VMEXIT_CR_ACCESS); +VMM_STAT_DECLARE(VMEXIT_RDMSR); +VMM_STAT_DECLARE(VMEXIT_WRMSR); +VMM_STAT_DECLARE(VMEXIT_MTRAP); +VMM_STAT_DECLARE(VMEXIT_PAUSE); +VMM_STAT_DECLARE(VMEXIT_INTR_WINDOW); +VMM_STAT_DECLARE(VMEXIT_NMI_WINDOW); +VMM_STAT_DECLARE(VMEXIT_INOUT); +VMM_STAT_DECLARE(VMEXIT_CPUID); +VMM_STAT_DECLARE(VMEXIT_NESTED_FAULT); +VMM_STAT_DECLARE(VMEXIT_INST_EMUL); +VMM_STAT_DECLARE(VMEXIT_UNKNOWN); +VMM_STAT_DECLARE(VMEXIT_ASTPENDING); +VMM_STAT_DECLARE(VMEXIT_USERSPACE); +VMM_STAT_DECLARE(VMEXIT_RENDEZVOUS); +VMM_STAT_DECLARE(VMEXIT_EXCEPTION); +#endif Index: sys/arm/vmm/vmm_stat.c =================================================================== --- sys/arm/vmm/vmm_stat.c +++ sys/arm/vmm/vmm_stat.c @@ -0,0 +1,159 @@ +/* + * Copyright (C) 2015 Mihai Carabas + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include + +#include +#include +#include +#include + +#include +#include "vmm_stat.h" + +/* + * 'vst_num_elems' is the total number of addressable statistic elements + * 'vst_num_types' is the number of unique statistic types + * + * It is always true that 'vst_num_elems' is greater than or equal to + * 'vst_num_types'. This is because a stat type may represent more than + * one element (for e.g. VMM_STAT_ARRAY). + */ +static int vst_num_elems, vst_num_types; +static struct vmm_stat_type *vsttab[MAX_VMM_STAT_ELEMS]; + +static MALLOC_DEFINE(M_VMM_STAT, "vmm stat", "vmm stat"); + +#define vst_size ((size_t)vst_num_elems * sizeof(uint64_t)) + +void +vmm_stat_register(void *arg) +{ + struct vmm_stat_type *vst = arg; + + /* We require all stats to identify themselves with a description */ + if (vst->desc == NULL) + return; + + if (vst_num_elems + vst->nelems >= MAX_VMM_STAT_ELEMS) { + printf("Cannot accomodate vmm stat type \"%s\"!\n", vst->desc); + return; + } + + vst->index = vst_num_elems; + vst_num_elems += vst->nelems; + + vsttab[vst_num_types++] = vst; +} + +int +vmm_stat_copy(struct vm *vm, int vcpu, int *num_stats, uint64_t *buf) +{ + struct vmm_stat_type *vst; + uint64_t *stats; + int i; + + if (vcpu < 0 || vcpu >= VM_MAXCPU) + return (EINVAL); + + /* Let stats functions update their counters */ + for (i = 0; i < vst_num_types; i++) { + vst = vsttab[i]; + if (vst->func != NULL) + (*vst->func)(vm, vcpu, vst); + } + + /* Copy over the stats */ + stats = vcpu_stats(vm, vcpu); + for (i = 0; i < vst_num_elems; i++) + buf[i] = stats[i]; + *num_stats = vst_num_elems; + return (0); +} + +void * +vmm_stat_alloc(void) +{ + + return (malloc(vst_size, M_VMM_STAT, M_WAITOK)); +} + +void +vmm_stat_init(void *vp) +{ + + bzero(vp, vst_size); +} + +void +vmm_stat_free(void *vp) +{ + free(vp, M_VMM_STAT); +} + +int +vmm_stat_desc_copy(int index, char *buf, int bufsize) +{ + int i; + struct vmm_stat_type *vst; + + for (i = 0; i < vst_num_types; i++) { + vst = vsttab[i]; + if (index >= vst->index && index < vst->index + vst->nelems) { + if (vst->nelems > 1) { + snprintf(buf, bufsize, "%s[%d]", + vst->desc, index - vst->index); + } else { + strlcpy(buf, vst->desc, bufsize); + } + return (0); /* found it */ + } + } + + return (EINVAL); +} + +/* global statistics */ +VMM_STAT(VCPU_MIGRATIONS, "vcpu migration across host cpus"); +VMM_STAT(VMEXIT_COUNT, "total number of vm exits"); +VMM_STAT(VMEXIT_EXTINT, "vm exits due to external interrupt"); +VMM_STAT(VMEXIT_HLT, "number of times hlt was intercepted"); +VMM_STAT(VMEXIT_CR_ACCESS, "number of times %cr access was intercepted"); +VMM_STAT(VMEXIT_RDMSR, "number of times rdmsr was intercepted"); +VMM_STAT(VMEXIT_WRMSR, "number of times wrmsr was intercepted"); +VMM_STAT(VMEXIT_MTRAP, "number of monitor trap exits"); +VMM_STAT(VMEXIT_PAUSE, "number of times pause was intercepted"); +VMM_STAT(VMEXIT_INTR_WINDOW, "vm exits due to interrupt window opening"); +VMM_STAT(VMEXIT_NMI_WINDOW, "vm exits due to nmi window opening"); +VMM_STAT(VMEXIT_INOUT, "number of times in/out was intercepted"); +VMM_STAT(VMEXIT_CPUID, "number of times cpuid was intercepted"); +VMM_STAT(VMEXIT_NESTED_FAULT, "vm exits due to nested page fault"); +VMM_STAT(VMEXIT_INST_EMUL, "vm exits for instruction emulation"); +VMM_STAT(VMEXIT_UNKNOWN, "number of vm exits for unknown reason"); +VMM_STAT(VMEXIT_ASTPENDING, "number of times astpending at exit"); +VMM_STAT(VMEXIT_USERSPACE, "number of vm exits handled in userspace"); +VMM_STAT(VMEXIT_RENDEZVOUS, "number of times rendezvous pending at exit"); +VMM_STAT(VMEXIT_EXCEPTION, "number of vm exits due to exceptions"); Index: sys/conf/options.arm =================================================================== --- sys/conf/options.arm +++ sys/conf/options.arm @@ -81,3 +81,4 @@ GFB_NO_MODE_CHANGE opt_gfb.h AT91C_MAIN_CLOCK opt_at91.h VFP opt_global.h +VMM_ARM_VGIC opt_global.h Index: sys/dev/bvm/bvm_console.c =================================================================== --- sys/dev/bvm/bvm_console.c +++ sys/dev/bvm/bvm_console.c @@ -38,6 +38,11 @@ #include #include +#if defined(__arm__) +#include +#include +#endif + #include #include @@ -64,9 +69,13 @@ static int alt_break_state; #endif +#if defined(__i386__) || defined(__amd64__) #define BVM_CONS_PORT 0x220 -static int bvm_cons_port = BVM_CONS_PORT; +#elif defined(__arm__) +#define BVM_CONS_PORT 0x1c090000 +#endif +static int bvm_cons_port = BVM_CONS_PORT; #define BVM_CONS_SIG ('b' << 8 | 'v') static void bvm_timeout(void *); @@ -85,8 +94,11 @@ bvm_rcons(u_char *ch) { int c; - +#if defined(__i386__) || defined(__amd64__) c = inl(bvm_cons_port); +#elif defined(__arm__) + c = (*(int *)bvm_cons_port); +#endif if (c != -1) { *ch = (u_char)c; return (0); @@ -97,8 +109,11 @@ static void bvm_wcons(u_char ch) { - +#if defined(__i386__) || defined(__amd64__) outl(bvm_cons_port, ch); +#elif defined(__arm__) + (*(int *)bvm_cons_port) = ch; +#endif } static void @@ -168,7 +183,7 @@ static void bvm_cnprobe(struct consdev *cp) { - int disabled, port; + int disabled; disabled = 0; cp->cn_pri = CN_DEAD; @@ -176,10 +191,15 @@ resource_int_value("bvmconsole", 0, "disabled", &disabled); if (!disabled) { + +#if defined(__i386__) || defined(__amd64__) if (resource_int_value("bvmconsole", 0, "port", &port) == 0) bvm_cons_port = port; - if (inw(bvm_cons_port) == BVM_CONS_SIG) +#elif defined(__arm__) + bvm_cons_port = (int) pmap_mapdev(bvm_cons_port, 0x1000); + if ((*(short *)bvm_cons_port) == BVM_CONS_SIG) +#endif cp->cn_pri = CN_REMOTE; } } Index: sys/modules/Makefile =================================================================== --- sys/modules/Makefile +++ sys/modules/Makefile @@ -385,6 +385,7 @@ videomode \ vkbd \ ${_vmm} \ + ${_vmm-arm} \ ${_vmware} \ ${_vpo} \ vr \ @@ -739,6 +740,11 @@ .if ${MACHINE_CPUARCH} == "arm" _cfi= cfi _cpsw= cpsw + +.if ${MK_BHYVE} != "no" || defined(ALL_MODULES) +_vmm-arm= vmm-arm +.endif + .endif .if ${MACHINE_CPUARCH} == "powerpc" Index: sys/modules/vmm-arm/Makefile =================================================================== --- sys/modules/vmm-arm/Makefile +++ sys/modules/vmm-arm/Makefile @@ -0,0 +1,33 @@ +KMOD= vmm-arm + +SRCS= opt_acpi.h opt_ddb.h device_if.h bus_if.h pci_if.h +SRCS+= hyp_assym.h +DPSRCS= hyp_genassym.c + +CFLAGS+= -DVMM_KEEP_STATS -DSMP +CFLAGS+= -I${.CURDIR}/../../arm/vmm -I${.CURDIR}/../../arm/include + +# generic vmm support +.PATH: ${.CURDIR}/../../arm/vmm +SRCS+= vmm.c \ + vmm_dev.c \ + vmm_instruction_emul.c \ + vmm_mem.c \ + mmu.c \ + vmm_stat.c \ + arm.c \ + vgic.c \ + hyp.S + +.PATH: ${.CURDIR}/../../arm/arm +SRCS+= bitops.c + +CLEANFILES= hyp_assym.h hyp_genassym.o + +hyp_assym.h: hyp_genassym.o + sh ${SYSDIR}/kern/genassym.sh hyp_genassym.o > ${.TARGET} + +hyp_genassym.o: + ${CC} -c ${CFLAGS:N-fno-common} ${.IMPSRC} + +.include Index: usr.sbin/Makefile.arm =================================================================== --- usr.sbin/Makefile.arm +++ usr.sbin/Makefile.arm @@ -2,3 +2,8 @@ SUBDIR+= kgmon SUBDIR+= ofwdump + +.if ${MK_BHYVE} != "no" +SUBDIR+= bhyvearm +SUBDIR+= bhyveloadarm +.endif Index: usr.sbin/bhyvearm/Makefile =================================================================== --- usr.sbin/bhyvearm/Makefile +++ usr.sbin/bhyvearm/Makefile @@ -0,0 +1,24 @@ +# +# $FreeBSD$ +# + +PROG= bhyvearm + +DEBUG_FLAGS= -g -O0 + +SRCS= bhyverun.c block_if.c mem.c mevent.c consport.c + +.PATH: ${.CURDIR}/../../sys/arm/vmm + +MK_MAN=no + +.PATH: ${.CURDIR}/../../sys/arm/vmm +SRCS+= vmm_instruction_emul.c + +DPADD= ${LIBVMMAPIARM} +LDADD= -lvmmapiarm +LIBADD= md pthread + +WARNS?= 2 + +.include Index: usr.sbin/bhyvearm/bhyverun.h =================================================================== --- usr.sbin/bhyvearm/bhyverun.h +++ usr.sbin/bhyvearm/bhyverun.h @@ -0,0 +1,49 @@ +/*- + * Copyright (c) 2011 NetApp, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _FBSDRUN_H_ +#define _FBSDRUN_H_ + +#ifndef CTASSERT /* Allow lint to override */ +#define CTASSERT(x) _CTASSERT(x, __LINE__) +#define _CTASSERT(x, y) __CTASSERT(x, y) +#define __CTASSERT(x, y) typedef char __assert ## y[(x) ? 1 : -1] +#endif + +struct vmctx; +extern int guest_ncpus; +extern char *vmname; + +void *paddr_guest2host(struct vmctx *ctx, uintptr_t addr, size_t len); + +void fbsdrun_addcpu(struct vmctx *ctx, int cpu, uint64_t rip); +int fbsdrun_muxed(void); +int fbsdrun_vmexit_on_hlt(void); +int fbsdrun_vmexit_on_pause(void); +int fbsdrun_disable_x2apic(void); +#endif Index: usr.sbin/bhyvearm/bhyverun.c =================================================================== --- usr.sbin/bhyvearm/bhyverun.c +++ usr.sbin/bhyvearm/bhyverun.c @@ -0,0 +1,360 @@ +/* + * Copyright (C) 2015 Mihai Carabas + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "bhyverun.h" +#include "mem.h" +#include "mevent.h" + +#define GUEST_NIO_PORT 0x488 /* guest upcalls via i/o port */ + +#define VMEXIT_SWITCH 0 /* force vcpu switch in mux mode */ +#define VMEXIT_CONTINUE 1 /* continue from next instruction */ +#define VMEXIT_RESTART 2 /* restart current instruction */ +#define VMEXIT_ABORT 3 /* abort the vm run loop */ +#define VMEXIT_RESET 4 /* guest machine has reset */ + +#define MB (1024UL * 1024) +#define GB (1024UL * MB) + +typedef int (*vmexit_handler_t)(struct vmctx *, struct vm_exit *, int *vcpu); + +char *vmname; + +int guest_ncpus; + +static int pincpu = -1; + +static int foundcpus; + +static char *progname; +static const int BSP = 0; + +static int cpumask; + +static void vm_loop(struct vmctx *ctx, int vcpu, uint64_t pc); + +struct vm_exit vmexit[VM_MAXCPU]; + +struct bhyvestats { + uint64_t vmexit_bogus; + uint64_t vmexit_inst_emul; +} stats; + +struct mt_vmm_info { + pthread_t mt_thr; + struct vmctx *mt_ctx; + int mt_vcpu; +} mt_vmm_info[VM_MAXCPU]; + +static void +usage(int code) +{ + + fprintf(stderr, + "Usage: %s [-b] [-c vcpus][-p pincpu]" + " \n" + " -b: use bvmconsole" + " -c: # cpus (default 1)\n" + " -p: pin vcpu 'n' to host cpu 'pincpu + n'\n" + " -h: help\n", + progname); + + exit(code); +} + +void * +paddr_guest2host(struct vmctx *ctx, uintptr_t gaddr, size_t len) +{ + + return (vm_map_gpa(ctx, gaddr, len)); +} + +static void * +fbsdrun_start_thread(void *param) +{ + char tname[MAXCOMLEN + 1]; + struct mt_vmm_info *mtp; + int vcpu; + + mtp = param; + vcpu = mtp->mt_vcpu; + + snprintf(tname, sizeof(tname), "%s vcpu %d", vmname, vcpu); + pthread_set_name_np(mtp->mt_thr, tname); + + vm_loop(mtp->mt_ctx, vcpu, vmexit[vcpu].pc); + + /* not reached */ + exit(1); + return (NULL); +} + +void +fbsdrun_addcpu(struct vmctx *ctx, int vcpu, uint64_t pc) +{ + int error; + + if (cpumask & (1 << vcpu)) { + fprintf(stderr, "addcpu: attempting to add existing cpu %d\n", + vcpu); + exit(1); + } + + cpumask |= 1 << vcpu; + foundcpus++; + + /* + * Set up the vmexit struct to allow execution to start + * at the given RIP + */ + vmexit[vcpu].pc = pc; + vmexit[vcpu].inst_length = 0; + + if (vcpu == BSP) { + mt_vmm_info[vcpu].mt_ctx = ctx; + mt_vmm_info[vcpu].mt_vcpu = vcpu; + + error = pthread_create(&mt_vmm_info[vcpu].mt_thr, NULL, + fbsdrun_start_thread, &mt_vmm_info[vcpu]); + assert(error == 0); + } +} + +static int +fbsdrun_get_next_cpu(int curcpu) +{ + + /* + * Get the next available CPU. Assumes they arrive + * in ascending order with no gaps. + */ + return ((curcpu + 1) % foundcpus); +} + +static int +vmexit_hyp(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) +{ + + fprintf(stderr, "vm exit[%d]\n", *pvcpu); + fprintf(stderr, "\treason\t\tHYP\n"); + fprintf(stderr, "\tpc\t\t0x%016llx\n", vmexit->pc); + fprintf(stderr, "\tinst_length\t%d\n", vmexit->inst_length); + + return (VMEXIT_ABORT); +} + +static int +vmexit_bogus(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) +{ + + stats.vmexit_bogus++; + + return (VMEXIT_RESTART); +} + +static int +vmexit_inst_emul(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) +{ + int err; + struct vie *vie; + + stats.vmexit_inst_emul++; + + vie = &vmexit->u.inst_emul.vie; + err = emulate_mem(ctx, *pvcpu, vmexit->u.inst_emul.gpa, vie); + + if (err) { + if (err == ESRCH) { + fprintf(stderr, "Unhandled memory access to 0x%llx\n", + vmexit->u.inst_emul.gpa); + } + + fprintf(stderr, "Failed to emulate instruction at 0x%llx\n", vmexit->pc); + return (VMEXIT_ABORT); + } + return (VMEXIT_CONTINUE); +} + + +static vmexit_handler_t handler[VM_EXITCODE_MAX] = { + [VM_EXITCODE_HYP] = vmexit_hyp, + [VM_EXITCODE_BOGUS] = vmexit_bogus, + [VM_EXITCODE_INST_EMUL] = vmexit_inst_emul, +}; + +static void +vm_loop(struct vmctx *ctx, int vcpu, uint64_t pc) +{ + cpuset_t mask; + int error, rc, prevcpu; + enum vm_exitcode exitcode; + + if (pincpu >= 0) { + CPU_ZERO(&mask); + CPU_SET(pincpu + vcpu, &mask); + error = pthread_setaffinity_np(pthread_self(), + sizeof(mask), &mask); + assert(error == 0); + } + while (1) { + + error = vm_run(ctx, vcpu, pc, &vmexit[vcpu]); + + if (error != 0) { + /* + * It is possible that 'vmmctl' or some other process + * has transitioned the vcpu to CANNOT_RUN state right + * before we tried to transition it to RUNNING. + * + * This is expected to be temporary so just retry. + */ + if (errno == EBUSY) + continue; + else + break; + } + + prevcpu = vcpu; + + exitcode = vmexit[vcpu].exitcode; + if (exitcode >= VM_EXITCODE_MAX || handler[exitcode] == NULL) { + fprintf(stderr, "vm_loop: unexpected exitcode 0x%x\n", + exitcode); + exit(1); + } + + rc = (*handler[exitcode])(ctx, &vmexit[vcpu], &vcpu); + + switch (rc) { + case VMEXIT_CONTINUE: + pc = vmexit[vcpu].pc + vmexit[vcpu].inst_length; + break; + case VMEXIT_RESTART: + pc = vmexit[vcpu].pc; + break; + case VMEXIT_RESET: + exit(0); + default: + exit(1); + } + } + fprintf(stderr, "vm_run error %d, errno %d\n", error, errno); +} + +static int +num_vcpus_allowed(struct vmctx *ctx) +{ + /* Max one VCPU */ + return (1); +} + +int +main(int argc, char *argv[]) +{ + int c, error, bvmcons; + int max_vcpus; + struct vmctx *ctx; + uint64_t pc; + + bvmcons = 0; + progname = basename(argv[0]); + guest_ncpus = 1; + + while ((c = getopt(argc, argv, "abehAHIPp:g:c:s:S:m:")) != -1) { + switch (c) { + case 'b': + bvmcons = 1; + break; + case 'p': + pincpu = atoi(optarg); + break; + case 'c': + guest_ncpus = atoi(optarg); + break; + case 'h': + usage(0); + default: + usage(1); + } + } + argc -= optind; + argv += optind; + + if (argc != 1) + usage(1); + + vmname = argv[0]; + + ctx = vm_open(vmname); + if (ctx == NULL) { + perror("vm_open"); + exit(1); + } + + max_vcpus = num_vcpus_allowed(ctx); + if (guest_ncpus > max_vcpus) { + fprintf(stderr, "%d vCPUs requested but only %d available\n", + guest_ncpus, max_vcpus); + exit(1); + } + + init_mem(); + + if (bvmcons) + init_bvmcons(); + + error = vm_get_register(ctx, BSP, VM_REG_GUEST_PC, &pc); + assert(error == 0); + /* + * Add CPU 0 + */ + fbsdrun_addcpu(ctx, BSP, pc); + + /* + * Head off to the main event dispatch loop + */ + mevent_dispatch(); + + exit(1); +} Index: usr.sbin/bhyvearm/block_if.h =================================================================== --- usr.sbin/bhyvearm/block_if.h +++ usr.sbin/bhyvearm/block_if.h @@ -0,0 +1,64 @@ +/*- + * Copyright (c) 2013 Peter Grehan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +/* + * The block API to be used by bhyve block-device emulations. The routines + * are thread safe, with no assumptions about the context of the completion + * callback - it may occur in the caller's context, or asynchronously in + * another thread. + */ + +#ifndef _BLOCK_IF_H_ +#define _BLOCK_IF_H_ + +#include +#include + +#define BLOCKIF_IOV_MAX 32 /* not practical to be IOV_MAX */ + +struct blockif_req { + struct iovec br_iov[BLOCKIF_IOV_MAX]; + int br_iovcnt; + off_t br_offset; + void (*br_callback)(struct blockif_req *req, int err); + void *br_param; +}; + +struct blockif_ctxt; +struct blockif_ctxt *blockif_open(const char *optstr, const char *ident); +off_t blockif_size(struct blockif_ctxt *bc); +int blockif_sectsz(struct blockif_ctxt *bc); +int blockif_queuesz(struct blockif_ctxt *bc); +int blockif_is_ro(struct blockif_ctxt *bc); +int blockif_read(struct blockif_ctxt *bc, struct blockif_req *breq); +int blockif_write(struct blockif_ctxt *bc, struct blockif_req *breq); +int blockif_flush(struct blockif_ctxt *bc, struct blockif_req *breq); +int blockif_cancel(struct blockif_ctxt *bc, struct blockif_req *breq); +int blockif_close(struct blockif_ctxt *bc); + +#endif /* _BLOCK_IF_H_ */ Index: usr.sbin/bhyvearm/block_if.c =================================================================== --- usr.sbin/bhyvearm/block_if.c +++ usr.sbin/bhyvearm/block_if.c @@ -0,0 +1,426 @@ +/*- + * Copyright (c) 2013 Peter Grehan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "bhyverun.h" +#include "block_if.h" + +#define BLOCKIF_SIG 0xb109b109 + +#define BLOCKIF_MAXREQ 16 + +enum blockop { + BOP_READ, + BOP_WRITE, + BOP_FLUSH, + BOP_CANCEL +}; + +enum blockstat { + BST_FREE, + BST_INUSE +}; + +struct blockif_elem { + TAILQ_ENTRY(blockif_elem) be_link; + struct blockif_req *be_req; + enum blockop be_op; + enum blockstat be_status; +}; + +struct blockif_ctxt { + int bc_magic; + int bc_fd; + int bc_rdonly; + off_t bc_size; + int bc_sectsz; + pthread_t bc_btid; + pthread_mutex_t bc_mtx; + pthread_cond_t bc_cond; + int bc_closing; + + /* Request elements and free/inuse queues */ + TAILQ_HEAD(, blockif_elem) bc_freeq; + TAILQ_HEAD(, blockif_elem) bc_inuseq; + u_int bc_req_count; + struct blockif_elem bc_reqs[BLOCKIF_MAXREQ]; +}; + +static int +blockif_enqueue(struct blockif_ctxt *bc, struct blockif_req *breq, + enum blockop op) +{ + struct blockif_elem *be; + + assert(bc->bc_req_count < BLOCKIF_MAXREQ); + + be = TAILQ_FIRST(&bc->bc_freeq); + assert(be != NULL); + assert(be->be_status == BST_FREE); + + TAILQ_REMOVE(&bc->bc_freeq, be, be_link); + be->be_status = BST_INUSE; + be->be_req = breq; + be->be_op = op; + TAILQ_INSERT_TAIL(&bc->bc_inuseq, be, be_link); + + bc->bc_req_count++; + + return (0); +} + +static int +blockif_dequeue(struct blockif_ctxt *bc, struct blockif_elem *el) +{ + struct blockif_elem *be; + + if (bc->bc_req_count == 0) + return (ENOENT); + + be = TAILQ_FIRST(&bc->bc_inuseq); + assert(be != NULL); + assert(be->be_status == BST_INUSE); + *el = *be; + + TAILQ_REMOVE(&bc->bc_inuseq, be, be_link); + be->be_status = BST_FREE; + be->be_req = NULL; + TAILQ_INSERT_TAIL(&bc->bc_freeq, be, be_link); + + bc->bc_req_count--; + + return (0); +} + +static void +blockif_proc(struct blockif_ctxt *bc, struct blockif_elem *be) +{ + struct blockif_req *br; + int err; + + br = be->be_req; + err = 0; + + switch (be->be_op) { + case BOP_READ: + if (preadv(bc->bc_fd, br->br_iov, br->br_iovcnt, + br->br_offset) < 0) + err = errno; + break; + case BOP_WRITE: + if (bc->bc_rdonly) + err = EROFS; + else if (pwritev(bc->bc_fd, br->br_iov, br->br_iovcnt, + br->br_offset) < 0) + err = errno; + break; + case BOP_FLUSH: + break; + case BOP_CANCEL: + err = EINTR; + break; + default: + err = EINVAL; + break; + } + + (*br->br_callback)(br, err); +} + +static void * +blockif_thr(void *arg) +{ + struct blockif_ctxt *bc; + struct blockif_elem req; + + bc = arg; + + for (;;) { + pthread_mutex_lock(&bc->bc_mtx); + while (!blockif_dequeue(bc, &req)) { + pthread_mutex_unlock(&bc->bc_mtx); + blockif_proc(bc, &req); + pthread_mutex_lock(&bc->bc_mtx); + } + pthread_cond_wait(&bc->bc_cond, &bc->bc_mtx); + pthread_mutex_unlock(&bc->bc_mtx); + + /* + * Check ctxt status here to see if exit requested + */ + if (bc->bc_closing) + pthread_exit(NULL); + } + + /* Not reached */ + return (NULL); +} + +struct blockif_ctxt * +blockif_open(const char *optstr, const char *ident) +{ + char tname[MAXCOMLEN + 1]; + char *nopt, *xopts; + struct blockif_ctxt *bc; + struct stat sbuf; + off_t size; + int extra, fd, i, sectsz; + int nocache, sync, ro; + + nocache = 0; + sync = 0; + ro = 0; + + /* + * The first element in the optstring is always a pathname. + * Optional elements follow + */ + nopt = strdup(optstr); + for (xopts = strtok(nopt, ","); + xopts != NULL; + xopts = strtok(NULL, ",")) { + if (!strcmp(xopts, "nocache")) + nocache = 1; + else if (!strcmp(xopts, "sync")) + sync = 1; + else if (!strcmp(xopts, "ro")) + ro = 1; + } + + extra = 0; + if (nocache) + extra |= O_DIRECT; + if (sync) + extra |= O_SYNC; + + fd = open(nopt, (ro ? O_RDONLY : O_RDWR) | extra); + if (fd < 0 && !ro) { + /* Attempt a r/w fail with a r/o open */ + fd = open(nopt, O_RDONLY | extra); + ro = 1; + } + + if (fd < 0) { + perror("Could not open backing file"); + return (NULL); + } + + if (fstat(fd, &sbuf) < 0) { + perror("Could not stat backing file"); + close(fd); + return (NULL); + } + + /* + * Deal with raw devices + */ + size = sbuf.st_size; + sectsz = DEV_BSIZE; + if (S_ISCHR(sbuf.st_mode)) { + if (ioctl(fd, DIOCGMEDIASIZE, &size) < 0 || + ioctl(fd, DIOCGSECTORSIZE, §sz)) { + perror("Could not fetch dev blk/sector size"); + close(fd); + return (NULL); + } + assert(size != 0); + assert(sectsz != 0); + } + + bc = malloc(sizeof(struct blockif_ctxt)); + if (bc == NULL) { + close(fd); + return (NULL); + } + + memset(bc, 0, sizeof(*bc)); + bc->bc_magic = BLOCKIF_SIG; + bc->bc_fd = fd; + bc->bc_size = size; + bc->bc_sectsz = sectsz; + pthread_mutex_init(&bc->bc_mtx, NULL); + pthread_cond_init(&bc->bc_cond, NULL); + TAILQ_INIT(&bc->bc_freeq); + TAILQ_INIT(&bc->bc_inuseq); + bc->bc_req_count = 0; + for (i = 0; i < BLOCKIF_MAXREQ; i++) { + bc->bc_reqs[i].be_status = BST_FREE; + TAILQ_INSERT_HEAD(&bc->bc_freeq, &bc->bc_reqs[i], be_link); + } + + pthread_create(&bc->bc_btid, NULL, blockif_thr, bc); + + snprintf(tname, sizeof(tname), "%s blk-%s", vmname, ident); + pthread_set_name_np(bc->bc_btid, tname); + + return (bc); +} + +static int +blockif_request(struct blockif_ctxt *bc, struct blockif_req *breq, + enum blockop op) +{ + int err; + + err = 0; + + pthread_mutex_lock(&bc->bc_mtx); + if (bc->bc_req_count < BLOCKIF_MAXREQ) { + /* + * Enqueue and inform the block i/o thread + * that there is work available + */ + blockif_enqueue(bc, breq, op); + pthread_cond_signal(&bc->bc_cond); + } else { + /* + * Callers are not allowed to enqueue more than + * the specified blockif queue limit. Return an + * error to indicate that the queue length has been + * exceeded. + */ + err = E2BIG; + } + pthread_mutex_unlock(&bc->bc_mtx); + + return (err); +} + +int +blockif_read(struct blockif_ctxt *bc, struct blockif_req *breq) +{ + + assert(bc->bc_magic == BLOCKIF_SIG); + return (blockif_request(bc, breq, BOP_READ)); +} + +int +blockif_write(struct blockif_ctxt *bc, struct blockif_req *breq) +{ + + assert(bc->bc_magic == BLOCKIF_SIG); + return (blockif_request(bc, breq, BOP_WRITE)); +} + +int +blockif_flush(struct blockif_ctxt *bc, struct blockif_req *breq) +{ + + assert(bc->bc_magic == BLOCKIF_SIG); + return (blockif_request(bc, breq, BOP_FLUSH)); +} + +int +blockif_cancel(struct blockif_ctxt *bc, struct blockif_req *breq) +{ + + assert(bc->bc_magic == BLOCKIF_SIG); + return (blockif_request(bc, breq, BOP_CANCEL)); +} + +int +blockif_close(struct blockif_ctxt *bc) +{ + void *jval; + int err; + + err = 0; + + assert(bc->bc_magic == BLOCKIF_SIG); + + /* + * Stop the block i/o thread + */ + bc->bc_closing = 1; + pthread_cond_signal(&bc->bc_cond); + pthread_join(bc->bc_btid, &jval); + + /* XXX Cancel queued i/o's ??? */ + + /* + * Release resources + */ + bc->bc_magic = 0; + close(bc->bc_fd); + free(bc); + + return (0); +} + +/* + * Accessors + */ +off_t +blockif_size(struct blockif_ctxt *bc) +{ + + assert(bc->bc_magic == BLOCKIF_SIG); + return (bc->bc_size); +} + +int +blockif_sectsz(struct blockif_ctxt *bc) +{ + + assert(bc->bc_magic == BLOCKIF_SIG); + return (bc->bc_sectsz); +} + +int +blockif_queuesz(struct blockif_ctxt *bc) +{ + + assert(bc->bc_magic == BLOCKIF_SIG); + return (BLOCKIF_MAXREQ); +} + +int +blockif_is_ro(struct blockif_ctxt *bc) +{ + + assert(bc->bc_magic == BLOCKIF_SIG); + return (bc->bc_rdonly); +} Index: usr.sbin/bhyvearm/consport.c =================================================================== --- usr.sbin/bhyvearm/consport.c +++ usr.sbin/bhyvearm/consport.c @@ -0,0 +1,144 @@ +/* + * Copyright (C) 2015 Mihai Carabas + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +#include + +#include +#include +#include +#include +#include + +#include "mem.h" + +#define BVM_CONSOLE_PORT 0x1c090000 +#define BVM_CONS_SIG ('b' << 8 | 'v') + +static struct termios tio_orig, tio_new; + +static void +ttyclose(void) +{ + tcsetattr(STDIN_FILENO, TCSANOW, &tio_orig); +} + +static void +ttyopen(void) +{ + tcgetattr(STDIN_FILENO, &tio_orig); + + cfmakeraw(&tio_new); + tcsetattr(STDIN_FILENO, TCSANOW, &tio_new); + + atexit(ttyclose); +} + +static bool +tty_char_available(void) +{ + fd_set rfds; + struct timeval tv; + + FD_ZERO(&rfds); + FD_SET(STDIN_FILENO, &rfds); + tv.tv_sec = 0; + tv.tv_usec = 0; + if (select(STDIN_FILENO + 1, &rfds, NULL, NULL, &tv) > 0) { + return (true); + } else { + return (false); + } +} + +static int +ttyread(void) +{ + char rb; + + if (tty_char_available()) { + read(STDIN_FILENO, &rb, 1); + return (rb & 0xff); + } else { + return (-1); + } +} + +static void +ttywrite(unsigned char wb) +{ + (void) write(STDOUT_FILENO, &wb, 1); +} + +static int +console_handler(struct vmctx *ctx, int vcpu, int dir, uint64_t addr, int size, uint64_t *val, void *arg1, long arg2) +{ + static int opened; + + if (size == 2 && dir == MEM_F_READ) { + *val = BVM_CONS_SIG; + return (0); + } + + /* + * Guests might probe this port to look for old ISA devices + * using single-byte reads. Return 0xff for those. + */ + if (size == 1 && dir == MEM_F_READ) { + *val = 0xff; + return (0); + } + + if (size != 4) + return (-1); + + if (!opened) { + ttyopen(); + opened = 1; + } + + if (dir == MEM_F_READ) + *val = ttyread(); + else + ttywrite(*val); + return (0); +} + +struct mem_range consport ={ + "bvmcons", + MEM_F_RW, + console_handler, + NULL, + 0, + BVM_CONSOLE_PORT, + sizeof(int) +}; + +void +init_bvmcons(void) +{ + register_mem(&consport); +} Index: usr.sbin/bhyvearm/mem.h =================================================================== --- usr.sbin/bhyvearm/mem.h +++ usr.sbin/bhyvearm/mem.h @@ -0,0 +1,60 @@ +/*- + * Copyright (c) 2012 NetApp, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _MEM_H_ +#define _MEM_H_ + +#include + +struct vmctx; + +typedef int (*mem_func_t)(struct vmctx *ctx, int vcpu, int dir, uint64_t addr, + int size, uint64_t *val, void *arg1, long arg2); + +struct mem_range { + const char *name; + int flags; + mem_func_t handler; + void *arg1; + long arg2; + uint64_t base; + uint64_t size; +}; +#define MEM_F_READ 0x1 +#define MEM_F_WRITE 0x2 +#define MEM_F_RW 0x3 + +void init_mem(void); +int emulate_mem(struct vmctx *, int vcpu, uint64_t paddr, void *vie); + +int register_mem(struct mem_range *memp); +int register_mem_fallback(struct mem_range *memp); +int unregister_mem(struct mem_range *memp); + +void init_bvmcons(void); +#endif /* _MEM_H_ */ Index: usr.sbin/bhyvearm/mem.c =================================================================== --- usr.sbin/bhyvearm/mem.c +++ usr.sbin/bhyvearm/mem.c @@ -0,0 +1,272 @@ +/*- + * Copyright (c) 2012 NetApp, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +/* + * Memory ranges are represented with an RB tree. On insertion, the range + * is checked for overlaps. On lookup, the key has the same base and limit + * so it can be searched within the range. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include "mem.h" + +struct mmio_rb_range { + RB_ENTRY(mmio_rb_range) mr_link; /* RB tree links */ + struct mem_range mr_param; + uint64_t mr_base; + uint64_t mr_end; +}; + +struct mmio_rb_tree; +RB_PROTOTYPE(mmio_rb_tree, mmio_rb_range, mr_link, mmio_rb_range_compare); + +RB_HEAD(mmio_rb_tree, mmio_rb_range) mmio_rb_root, mmio_rb_fallback; + +/* + * Per-vCPU cache. Since most accesses from a vCPU will be to + * consecutive addresses in a range, it makes sense to cache the + * result of a lookup. + */ +static struct mmio_rb_range *mmio_hint[VM_MAXCPU]; + +static pthread_rwlock_t mmio_rwlock; + +static int +mmio_rb_range_compare(struct mmio_rb_range *a, struct mmio_rb_range *b) +{ + if (a->mr_end < b->mr_base) + return (-1); + else if (a->mr_base > b->mr_end) + return (1); + return (0); +} + +static int +mmio_rb_lookup(struct mmio_rb_tree *rbt, uint64_t addr, + struct mmio_rb_range **entry) +{ + struct mmio_rb_range find, *res; + + find.mr_base = find.mr_end = addr; + + res = RB_FIND(mmio_rb_tree, rbt, &find); + + if (res != NULL) { + *entry = res; + return (0); + } + + return (ENOENT); +} + +static int +mmio_rb_add(struct mmio_rb_tree *rbt, struct mmio_rb_range *new) +{ + struct mmio_rb_range *overlap; + + overlap = RB_INSERT(mmio_rb_tree, rbt, new); + + if (overlap != NULL) { +#ifdef RB_DEBUG + printf("overlap detected: new %lx:%lx, tree %lx:%lx\n", + new->mr_base, new->mr_end, + overlap->mr_base, overlap->mr_end); +#endif + + return (EEXIST); + } + + return (0); +} + +#if 0 +static void +mmio_rb_dump(struct mmio_rb_tree *rbt) +{ + struct mmio_rb_range *np; + + pthread_rwlock_rdlock(&mmio_rwlock); + RB_FOREACH(np, mmio_rb_tree, rbt) { + printf(" %lx:%lx, %s\n", np->mr_base, np->mr_end, + np->mr_param.name); + } + pthread_rwlock_unlock(&mmio_rwlock); +} +#endif + +RB_GENERATE(mmio_rb_tree, mmio_rb_range, mr_link, mmio_rb_range_compare); + +static int +mem_read(void *ctx, int vcpu, uint64_t gpa, uint64_t *rval, int size, void *arg) +{ + int error; + struct mem_range *mr = arg; + + error = (*mr->handler)(ctx, vcpu, MEM_F_READ, gpa, size, + rval, mr->arg1, mr->arg2); + return (error); +} + +static int +mem_write(void *ctx, int vcpu, uint64_t gpa, uint64_t wval, int size, void *arg) +{ + int error; + struct mem_range *mr = arg; + + error = (*mr->handler)(ctx, vcpu, MEM_F_WRITE, gpa, size, + &wval, mr->arg1, mr->arg2); + return (error); +} + +int +emulate_mem(struct vmctx *ctx, int vcpu, uint64_t paddr, void *vie) +{ + struct mmio_rb_range *entry; + int err; + + pthread_rwlock_rdlock(&mmio_rwlock); + /* + * First check the per-vCPU cache + */ + if (mmio_hint[vcpu] && + paddr >= mmio_hint[vcpu]->mr_base && + paddr <= mmio_hint[vcpu]->mr_end) { + entry = mmio_hint[vcpu]; + } else + entry = NULL; + + if (entry == NULL) { + if (mmio_rb_lookup(&mmio_rb_root, paddr, &entry) == 0) { + /* Update the per-vCPU cache */ + mmio_hint[vcpu] = entry; + } else if (mmio_rb_lookup(&mmio_rb_fallback, paddr, &entry)) { + pthread_rwlock_unlock(&mmio_rwlock); + return (ESRCH); + } + } + + assert(entry != NULL); + assert(NULL == NULL); + err = vmm_emulate_instruction(ctx, vcpu, paddr, vie, + mem_read, mem_write, &entry->mr_param); + + pthread_rwlock_unlock(&mmio_rwlock); + + return (err); +} + +static int +register_mem_int(struct mmio_rb_tree *rbt, struct mem_range *memp) +{ + struct mmio_rb_range *entry, *mrp; + int err; + + err = 0; + + mrp = malloc(sizeof(struct mmio_rb_range)); + + if (mrp != NULL) { + mrp->mr_param = *memp; + mrp->mr_base = memp->base; + mrp->mr_end = memp->base + memp->size - 1; + pthread_rwlock_wrlock(&mmio_rwlock); + if (mmio_rb_lookup(rbt, memp->base, &entry) != 0) + err = mmio_rb_add(rbt, mrp); + pthread_rwlock_unlock(&mmio_rwlock); + if (err) + free(mrp); + } else + err = ENOMEM; + + return (err); +} + +int +register_mem(struct mem_range *memp) +{ + + return (register_mem_int(&mmio_rb_root, memp)); +} + +int +register_mem_fallback(struct mem_range *memp) +{ + + return (register_mem_int(&mmio_rb_fallback, memp)); +} + +int +unregister_mem(struct mem_range *memp) +{ + struct mem_range *mr; + struct mmio_rb_range *entry = NULL; + int err, i; + + pthread_rwlock_wrlock(&mmio_rwlock); + err = mmio_rb_lookup(&mmio_rb_root, memp->base, &entry); + if (err == 0) { + mr = &entry->mr_param; + assert(mr->name == memp->name); + assert(mr->base == memp->base && mr->size == memp->size); + RB_REMOVE(mmio_rb_tree, &mmio_rb_root, entry); + + /* flush Per-vCPU cache */ + for (i=0; i < VM_MAXCPU; i++) { + if (mmio_hint[i] == entry) + mmio_hint[i] = NULL; + } + } + pthread_rwlock_unlock(&mmio_rwlock); + + if (entry) + free(entry); + + return (err); +} + +void +init_mem(void) +{ + + RB_INIT(&mmio_rb_root); + RB_INIT(&mmio_rb_fallback); + pthread_rwlock_init(&mmio_rwlock, NULL); +} Index: usr.sbin/bhyvearm/mevent.h =================================================================== --- usr.sbin/bhyvearm/mevent.h +++ usr.sbin/bhyvearm/mevent.h @@ -0,0 +1,50 @@ +/*- + * Copyright (c) 2011 NetApp, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _MEVENT_H_ +#define _MEVENT_H_ + +enum ev_type { + EVF_READ, + EVF_WRITE, + EVF_TIMER +}; + +struct mevent; + +struct mevent *mevent_add(int fd, enum ev_type type, + void (*func)(int, enum ev_type, void *), + void *param); +int mevent_enable(struct mevent *evp); +int mevent_disable(struct mevent *evp); +int mevent_delete(struct mevent *evp); +int mevent_delete_close(struct mevent *evp); + +void mevent_dispatch(void); + +#endif /* _MEVENT_H_ */ Index: usr.sbin/bhyvearm/mevent.c =================================================================== --- usr.sbin/bhyvearm/mevent.c +++ usr.sbin/bhyvearm/mevent.c @@ -0,0 +1,451 @@ +/*- + * Copyright (c) 2011 NetApp, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +/* + * Micro event library for FreeBSD, designed for a single i/o thread + * using kqueue, and having events be persistent by default. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include +#include + +#include "mevent.h" + +#define MEVENT_MAX 64 + +#define MEV_ENABLE 1 +#define MEV_DISABLE 2 +#define MEV_DEL_PENDING 3 + +extern char *vmname; + +static pthread_t mevent_tid; +static int mevent_timid = 43; +static int mevent_pipefd[2]; +static pthread_mutex_t mevent_lmutex = PTHREAD_MUTEX_INITIALIZER; + +struct mevent { + void (*me_func)(int, enum ev_type, void *); +#define me_msecs me_fd + int me_fd; + int me_timid; + enum ev_type me_type; + void *me_param; + int me_cq; + int me_state; + int me_closefd; + LIST_ENTRY(mevent) me_list; +}; + +static LIST_HEAD(listhead, mevent) global_head, change_head; + +static void +mevent_qlock(void) +{ + pthread_mutex_lock(&mevent_lmutex); +} + +static void +mevent_qunlock(void) +{ + pthread_mutex_unlock(&mevent_lmutex); +} + +static void +mevent_pipe_read(int fd, enum ev_type type, void *param) +{ + char buf[MEVENT_MAX]; + int status; + + /* + * Drain the pipe read side. The fd is non-blocking so this is + * safe to do. + */ + do { + status = read(fd, buf, sizeof(buf)); + } while (status == MEVENT_MAX); +} + +static void +mevent_notify(void) +{ + char c; + + /* + * If calling from outside the i/o thread, write a byte on the + * pipe to force the i/o thread to exit the blocking kevent call. + */ + if (mevent_pipefd[1] != 0 && pthread_self() != mevent_tid) { + write(mevent_pipefd[1], &c, 1); + } +} + +static int +mevent_kq_filter(struct mevent *mevp) +{ + int retval; + + retval = 0; + + if (mevp->me_type == EVF_READ) + retval = EVFILT_READ; + + if (mevp->me_type == EVF_WRITE) + retval = EVFILT_WRITE; + + if (mevp->me_type == EVF_TIMER) + retval = EVFILT_TIMER; + + return (retval); +} + +static int +mevent_kq_flags(struct mevent *mevp) +{ + int ret; + + switch (mevp->me_state) { + case MEV_ENABLE: + ret = EV_ADD; + if (mevp->me_type == EVF_TIMER) + ret |= EV_ENABLE; + break; + case MEV_DISABLE: + ret = EV_DISABLE; + break; + case MEV_DEL_PENDING: + ret = EV_DELETE; + break; + } + + return (ret); +} + +static int +mevent_kq_fflags(struct mevent *mevp) +{ + /* XXX nothing yet, perhaps EV_EOF for reads ? */ + return (0); +} + +static int +mevent_build(int mfd, struct kevent *kev) +{ + struct mevent *mevp, *tmpp; + int i; + + i = 0; + + mevent_qlock(); + + LIST_FOREACH_SAFE(mevp, &change_head, me_list, tmpp) { + if (mevp->me_closefd) { + /* + * A close of the file descriptor will remove the + * event + */ + close(mevp->me_fd); + } else { + if (mevp->me_type == EVF_TIMER) { + kev[i].ident = mevp->me_timid; + kev[i].data = mevp->me_msecs; + } else { + kev[i].ident = mevp->me_fd; + kev[i].data = 0; + } + kev[i].filter = mevent_kq_filter(mevp); + kev[i].flags = mevent_kq_flags(mevp); + kev[i].fflags = mevent_kq_fflags(mevp); + kev[i].udata = mevp; + i++; + } + + mevp->me_cq = 0; + LIST_REMOVE(mevp, me_list); + + if (mevp->me_state == MEV_DEL_PENDING) { + free(mevp); + } else { + LIST_INSERT_HEAD(&global_head, mevp, me_list); + } + + assert(i < MEVENT_MAX); + } + + mevent_qunlock(); + + return (i); +} + +static void +mevent_handle(struct kevent *kev, int numev) +{ + struct mevent *mevp; + int i; + + for (i = 0; i < numev; i++) { + mevp = kev[i].udata; + + /* XXX check for EV_ERROR ? */ + + (*mevp->me_func)(mevp->me_fd, mevp->me_type, mevp->me_param); + } +} + +struct mevent * +mevent_add(int tfd, enum ev_type type, + void (*func)(int, enum ev_type, void *), void *param) +{ + struct mevent *lp, *mevp; + + if (tfd < 0 || func == NULL) { + return (NULL); + } + + mevp = NULL; + + mevent_qlock(); + + /* + * Verify that the fd/type tuple is not present in any list + */ + LIST_FOREACH(lp, &global_head, me_list) { + if (type != EVF_TIMER && lp->me_fd == tfd && + lp->me_type == type) { + goto exit; + } + } + + LIST_FOREACH(lp, &change_head, me_list) { + if (type != EVF_TIMER && lp->me_fd == tfd && + lp->me_type == type) { + goto exit; + } + } + + /* + * Allocate an entry, populate it, and add it to the change list. + */ + mevp = malloc(sizeof(struct mevent)); + if (mevp == NULL) { + goto exit; + } + + memset(mevp, 0, sizeof(struct mevent)); + if (type == EVF_TIMER) { + mevp->me_msecs = tfd; + mevp->me_timid = mevent_timid++; + } else + mevp->me_fd = tfd; + mevp->me_type = type; + mevp->me_func = func; + mevp->me_param = param; + + LIST_INSERT_HEAD(&change_head, mevp, me_list); + mevp->me_cq = 1; + mevp->me_state = MEV_ENABLE; + mevent_notify(); + +exit: + mevent_qunlock(); + + return (mevp); +} + +static int +mevent_update(struct mevent *evp, int newstate) +{ + /* + * It's not possible to enable/disable a deleted event + */ + if (evp->me_state == MEV_DEL_PENDING) + return (EINVAL); + + /* + * No update needed if state isn't changing + */ + if (evp->me_state == newstate) + return (0); + + mevent_qlock(); + + evp->me_state = newstate; + + /* + * Place the entry onto the changed list if not already there. + */ + if (evp->me_cq == 0) { + evp->me_cq = 1; + LIST_REMOVE(evp, me_list); + LIST_INSERT_HEAD(&change_head, evp, me_list); + mevent_notify(); + } + + mevent_qunlock(); + + return (0); +} + +int +mevent_enable(struct mevent *evp) +{ + + return (mevent_update(evp, MEV_ENABLE)); +} + +int +mevent_disable(struct mevent *evp) +{ + + return (mevent_update(evp, MEV_DISABLE)); +} + +static int +mevent_delete_event(struct mevent *evp, int closefd) +{ + mevent_qlock(); + + /* + * Place the entry onto the changed list if not already there, and + * mark as to be deleted. + */ + if (evp->me_cq == 0) { + evp->me_cq = 1; + LIST_REMOVE(evp, me_list); + LIST_INSERT_HEAD(&change_head, evp, me_list); + mevent_notify(); + } + evp->me_state = MEV_DEL_PENDING; + + if (closefd) + evp->me_closefd = 1; + + mevent_qunlock(); + + return (0); +} + +int +mevent_delete(struct mevent *evp) +{ + + return (mevent_delete_event(evp, 0)); +} + +int +mevent_delete_close(struct mevent *evp) +{ + + return (mevent_delete_event(evp, 1)); +} + +static void +mevent_set_name(void) +{ + char tname[MAXCOMLEN + 1]; + + snprintf(tname, sizeof(tname), "%s mevent", vmname); + pthread_set_name_np(mevent_tid, tname); +} + +void +mevent_dispatch(void) +{ + struct kevent changelist[MEVENT_MAX]; + struct kevent eventlist[MEVENT_MAX]; + struct mevent *pipev; + int mfd; + int numev; + int ret; + + mevent_tid = pthread_self(); + mevent_set_name(); + + mfd = kqueue(); + assert(mfd > 0); + + /* + * Open the pipe that will be used for other threads to force + * the blocking kqueue call to exit by writing to it. Set the + * descriptor to non-blocking. + */ + ret = pipe(mevent_pipefd); + if (ret < 0) { + perror("pipe"); + exit(0); + } + + /* + * Add internal event handler for the pipe write fd + */ + pipev = mevent_add(mevent_pipefd[0], EVF_READ, mevent_pipe_read, NULL); + assert(pipev != NULL); + + for (;;) { + /* + * Build changelist if required. + * XXX the changelist can be put into the blocking call + * to eliminate the extra syscall. Currently better for + * debug. + */ + numev = mevent_build(mfd, changelist); + if (numev) { + ret = kevent(mfd, changelist, numev, NULL, 0, NULL); + if (ret == -1) { + perror("Error return from kevent change"); + } + } + + /* + * Block awaiting events + */ + ret = kevent(mfd, NULL, 0, eventlist, MEVENT_MAX, NULL); + if (ret == -1) { + perror("Error return from kevent monitor"); + } + + /* + * Handle reported events + */ + mevent_handle(eventlist, ret); + } +} Index: usr.sbin/bhyvearm/mevent_test.c =================================================================== --- usr.sbin/bhyvearm/mevent_test.c +++ usr.sbin/bhyvearm/mevent_test.c @@ -0,0 +1,256 @@ +/*- + * Copyright (c) 2011 NetApp, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +/* + * Test program for the micro event library. Set up a simple TCP echo + * service. + * + * cc mevent_test.c mevent.c -lpthread + */ + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include "mevent.h" + +#define TEST_PORT 4321 + +static pthread_mutex_t accept_mutex = PTHREAD_MUTEX_INITIALIZER; +static pthread_cond_t accept_condvar = PTHREAD_COND_INITIALIZER; + +static struct mevent *tevp; + +char *vmname = "test vm"; + + +#define MEVENT_ECHO + +/* Number of timer events to capture */ +#define TEVSZ 4096 +uint64_t tevbuf[TEVSZ]; + +static void +timer_print(void) +{ + uint64_t min, max, diff, sum, tsc_freq; + size_t len; + int j; + + min = UINT64_MAX; + max = 0; + sum = 0; + + len = sizeof(tsc_freq); + sysctlbyname("machdep.tsc_freq", &tsc_freq, &len, NULL, 0); + + for (j = 1; j < TEVSZ; j++) { + /* Convert a tsc diff into microseconds */ + diff = (tevbuf[j] - tevbuf[j-1]) * 1000000 / tsc_freq; + sum += diff; + if (min > diff) + min = diff; + if (max < diff) + max = diff; + } + + printf("timers done: usecs, min %ld, max %ld, mean %ld\n", min, max, + sum/(TEVSZ - 1)); +} + +static void +timer_callback(int fd, enum ev_type type, void *param) +{ + static int i; + + if (i >= TEVSZ) + abort(); + + tevbuf[i++] = rdtsc(); + + if (i == TEVSZ) { + mevent_delete(tevp); + timer_print(); + } +} + + +#ifdef MEVENT_ECHO +struct esync { + pthread_mutex_t e_mt; + pthread_cond_t e_cond; +}; + +static void +echoer_callback(int fd, enum ev_type type, void *param) +{ + struct esync *sync = param; + + pthread_mutex_lock(&sync->e_mt); + pthread_cond_signal(&sync->e_cond); + pthread_mutex_unlock(&sync->e_mt); +} + +static void * +echoer(void *param) +{ + struct esync sync; + struct mevent *mev; + char buf[128]; + int fd = (int)(uintptr_t) param; + int len; + + pthread_mutex_init(&sync.e_mt, NULL); + pthread_cond_init(&sync.e_cond, NULL); + + pthread_mutex_lock(&sync.e_mt); + + mev = mevent_add(fd, EVF_READ, echoer_callback, &sync); + if (mev == NULL) { + printf("Could not allocate echoer event\n"); + exit(1); + } + + while (!pthread_cond_wait(&sync.e_cond, &sync.e_mt)) { + len = read(fd, buf, sizeof(buf)); + if (len > 0) { + write(fd, buf, len); + write(0, buf, len); + } else { + break; + } + } + + mevent_delete_close(mev); + + pthread_mutex_unlock(&sync.e_mt); + pthread_mutex_destroy(&sync.e_mt); + pthread_cond_destroy(&sync.e_cond); + + return (NULL); +} + +#else + +static void * +echoer(void *param) +{ + char buf[128]; + int fd = (int)(uintptr_t) param; + int len; + + while ((len = read(fd, buf, sizeof(buf))) > 0) { + write(1, buf, len); + } + + return (NULL); +} +#endif /* MEVENT_ECHO */ + +static void +acceptor_callback(int fd, enum ev_type type, void *param) +{ + pthread_mutex_lock(&accept_mutex); + pthread_cond_signal(&accept_condvar); + pthread_mutex_unlock(&accept_mutex); +} + +static void * +acceptor(void *param) +{ + struct sockaddr_in sin; + pthread_t tid; + int news; + int s; + static int first; + + if ((s = socket(AF_INET, SOCK_STREAM, 0)) < 0) { + perror("socket"); + exit(1); + } + + sin.sin_len = sizeof(sin); + sin.sin_family = AF_INET; + sin.sin_addr.s_addr = htonl(INADDR_ANY); + sin.sin_port = htons(TEST_PORT); + + if (bind(s, (struct sockaddr *)&sin, sizeof(sin)) < 0) { + perror("bind"); + exit(1); + } + + if (listen(s, 1) < 0) { + perror("listen"); + exit(1); + } + + (void) mevent_add(s, EVF_READ, acceptor_callback, NULL); + + pthread_mutex_lock(&accept_mutex); + + while (!pthread_cond_wait(&accept_condvar, &accept_mutex)) { + news = accept(s, NULL, NULL); + if (news < 0) { + perror("accept error"); + } else { + static int first = 1; + + if (first) { + /* + * Start a timer + */ + first = 0; + tevp = mevent_add(1, EVF_TIMER, timer_callback, + NULL); + } + + printf("incoming connection, spawning thread\n"); + pthread_create(&tid, NULL, echoer, + (void *)(uintptr_t)news); + } + } + + return (NULL); +} + +main() +{ + pthread_t tid; + + pthread_create(&tid, NULL, acceptor, NULL); + + mevent_dispatch(); +} Index: usr.sbin/bhyveloadarm/Makefile =================================================================== --- usr.sbin/bhyveloadarm/Makefile +++ usr.sbin/bhyveloadarm/Makefile @@ -0,0 +1,15 @@ +# $FreeBSD$ + +PROG= bhyveloadarm +SRCS= bhyveloadarm.c + +.PATH: ${.CURDIR}/../../sys/arm/vmm + +MK_MAN=no + +DPADD+= ${LIBVMMAPIARM} +LDADD+= -lvmmapiarm + +WARNS?= 3 + +.include Index: usr.sbin/bhyveloadarm/bhyveloadarm.c =================================================================== --- usr.sbin/bhyveloadarm/bhyveloadarm.c +++ usr.sbin/bhyveloadarm/bhyveloadarm.c @@ -0,0 +1,211 @@ +/* + * Copyright (C) 2015 Mihai Carabas + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include + +#include +#include +#include +#include +#include + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#define MB (1024 * 1024UL) +#define GB (1024 * 1024 * 1024UL) +#define BSP 0 +#define KERNEL_IMAGE_NAME_LEN 32 + +static char *vmname, *progname; +static struct vmctx *ctx; + + +/* + * Guest virtual machinee + */ +static int +guest_copyin(const void *from, uint64_t to, size_t size) +{ + char *ptr; + ptr = vm_map_gpa(ctx, to, size); + if (ptr == NULL) + return (EFAULT); + + memcpy(ptr, from, size); + return (0); +} + +static int +guest_copyout(uint64_t from, void *to, size_t size) +{ + char *ptr; + + ptr = vm_map_gpa(ctx, from, size); + if (ptr == NULL) + return (EFAULT); + + memcpy(to, ptr, size); + return (0); +} + +static void +guest_setreg(enum vm_reg_name vmreg, uint64_t v) +{ + int error; + + error = vm_set_register(ctx, BSP, vmreg, v); + if (error) { + perror("vm_set_register"); + } +} + +static void +usage(void) +{ + + fprintf(stderr, + "usage: %s [-k ] -l , -b \n" + " %*s [-m mem-size] [-p periphbase] \n", + progname, + (int)strlen(progname), ""); + exit(1); +} + +int +main(int argc, char** argv) +{ + uint64_t mem_size; + int opt, error; + int kernel_image_fd; + uint64_t kernel_load_address, memory_base_address; + uint64_t periphbase; + char kernel_image_name[KERNEL_IMAGE_NAME_LEN]; + struct stat st; + void *addr; + + progname = basename(argv[0]); + + mem_size = 128 * MB; + kernel_load_address = 0xc0000000; + memory_base_address = 0xc0000000; + periphbase = 0x2c000000; + strncpy(kernel_image_name, "kernel.bin", KERNEL_IMAGE_NAME_LEN); + + while ((opt = getopt(argc, argv, "k:l:b:m:p")) != -1) { + switch (opt) { + case 'k': + strncpy(kernel_image_name, optarg, KERNEL_IMAGE_NAME_LEN); + break; + case 'l': + kernel_load_address = strtoul(optarg, NULL, 0); + break; + case 'b': + memory_base_address = strtoul(optarg, NULL, 0); + break; + case 'm': + mem_size = strtoul(optarg, NULL, 0) * MB; + break; + case 'p': + periphbase = strtoul(optarg, NULL, 0); + case '?': + usage(); + } + } + + argc -= optind; + argv += optind; + + if (argc != 1) + usage(); + + vmname = argv[0]; + + kernel_image_fd = open(kernel_image_name, O_RDONLY); + if (kernel_image_fd == -1) { + perror("open kernel_image_name"); + exit(1); + } + + error = vm_create(vmname); + if (error) { + perror("vm_create"); + exit(1); + } + + ctx = vm_open(vmname); + if (ctx == NULL) { + perror("vm_open"); + exit(1); + } + + error = vm_setup_memory(ctx, memory_base_address, mem_size, VM_MMAP_ALL); + if (error) { + perror("vm_setup_memory"); + exit(1); + } + + error = fstat(kernel_image_fd, &st); + if (error) { + perror("fstat"); + exit(1); + } + + addr = mmap(NULL, st.st_size, PROT_READ, MAP_SHARED, kernel_image_fd, 0); + if (addr == MAP_FAILED) { + perror("mmap kernel_image_fd"); + exit(1); + } + + if (guest_copyin(addr, kernel_load_address - memory_base_address, st.st_size)) { + perror("guest_copyin"); + exit(1); + } + + error = vm_attach_vgic(ctx, periphbase + 0x1000, periphbase + 0x2000); + if (error) { + } + munmap(addr, st.st_size); + + guest_setreg(VM_REG_GUEST_PC, kernel_load_address); + return 0; +}