Page MenuHomeFreeBSD

D26976.id82061.diff
No OneTemporary

D26976.id82061.diff

This file is larger than 256 KB, so syntax highlighting was skipped.
Index: lib/Makefile
===================================================================
--- lib/Makefile
+++ lib/Makefile
@@ -203,6 +203,9 @@
.if ${MACHINE_CPUARCH} == "amd64"
SUBDIR.${MK_PMC}+= libipt
+.endif
+
+.if ${MACHINE_CPUARCH} == "amd64" || ${MACHINE_CPUARCH} == "aarch64"
SUBDIR.${MK_BHYVE}+= libvmmapi
.endif
Index: lib/libvmmapi/Makefile
===================================================================
--- lib/libvmmapi/Makefile
+++ lib/libvmmapi/Makefile
@@ -1,14 +1,21 @@
# $FreeBSD$
-PACKAGE=lib${LIB}
-LIB= vmmapi
-SRCS= vmmapi.c vmmapi_freebsd.c
-INCS= vmmapi.h
+PACKAGE= lib${LIB}
+SHLIBDIR?= /lib
+LIB_SRCTOP?= ${.CURDIR}
+LIB= vmmapi
WARNS?= 2
-LIBADD= util
+.if exists(${LIB_SRCTOP}/${MACHINE})
+LIB_ARCH= ${MACHINE}
+.elif exists(${LIB_SRCTOP}/${MACHINE_ARCH})
+LIB_ARCH= ${MACHINE_ARCH}
+.else
+LIB_ARCH= ${MACHINE_CPUARCH}
+.endif
-CFLAGS+= -I${.CURDIR}
+CFLAGS+= -I${LIB_SRCTOP}/${LIB_ARCH}
+.include "${LIB_SRCTOP}/${LIB_ARCH}/Makefile.inc"
.include <bsd.lib.mk>
Index: lib/libvmmapi/amd64/Makefile.inc
===================================================================
--- /dev/null
+++ lib/libvmmapi/amd64/Makefile.inc
@@ -0,0 +1,7 @@
+# $FreeBSD$
+.PATH: ${LIB_SRCTOP}/amd64/
+
+SRCS= vmmapi.c vmmapi_freebsd.c
+INCS= vmmapi.h
+
+LIBADD= util
Index: lib/libvmmapi/arm64/Makefile.inc
===================================================================
--- /dev/null
+++ lib/libvmmapi/arm64/Makefile.inc
@@ -0,0 +1,7 @@
+# $FreeBSD$
+.PATH: ${LIB_SRCTOP}/arm64/
+
+SRCS= vmmapi.c
+INCS= vmmapi.h
+
+LIBADD= util
Index: lib/libvmmapi/arm64/vmmapi.h
===================================================================
--- /dev/null
+++ lib/libvmmapi/arm64/vmmapi.h
@@ -0,0 +1,79 @@
+/*-
+ * Copyright (c) 2011 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _VMMAPI_H_
+#define _VMMAPI_H_
+
+struct vmctx;
+struct vm_exit;
+enum vm_cap_type;
+
+/*
+ * Different styles of mapping the memory assigned to a VM into the address
+ * space of the controlling process.
+ */
+enum vm_mmap_style {
+ VM_MMAP_NONE, /* no mapping */
+ VM_MMAP_ALL, /* fully and statically mapped */
+ VM_MMAP_SPARSE, /* mappings created on-demand */
+};
+
+int vm_create(const char *name);
+struct vmctx *vm_open(const char *name);
+void vm_destroy(struct vmctx *ctx);
+int vm_parse_memsize(const char *optarg, size_t *memsize);
+int vm_get_memory_seg(struct vmctx *ctx, uint64_t gpa, size_t *ret_len);
+int vm_setup_memory(struct vmctx *ctx, uint64_t membase, size_t len, enum vm_mmap_style s);
+void *vm_map_ipa(struct vmctx *ctx, uint64_t gaddr, size_t len);
+uint32_t vm_get_mem_limit(struct vmctx *ctx);
+void vm_set_mem_limit(struct vmctx *ctx, uint32_t limit);
+int vm_set_register(struct vmctx *ctx, int vcpu, int reg, uint64_t val);
+int vm_get_register(struct vmctx *ctx, int vcpu, int reg, uint64_t *retval);
+int vm_run(struct vmctx *ctx, int vcpu, uint64_t rip,
+ struct vm_exit *ret_vmexit);
+const char *vm_capability_type2name(int type);
+int vm_get_capability(struct vmctx *ctx, int vcpu, enum vm_cap_type cap,
+ int *retval);
+int vm_set_capability(struct vmctx *ctx, int vcpu, enum vm_cap_type cap,
+ int val);
+int vm_assert_irq(struct vmctx *ctx, uint32_t irq);
+int vm_deassert_irq(struct vmctx *ctx, uint32_t irq);
+
+/*
+ * Return a pointer to the statistics buffer. Note that this is not MT-safe.
+ */
+uint64_t *vm_get_stats(struct vmctx *ctx, int vcpu, struct timeval *ret_tv,
+ int *ret_entries);
+const char *vm_get_stat_desc(struct vmctx *ctx, int index);
+
+/* Reset vcpu register state */
+int vcpu_reset(struct vmctx *ctx, int vcpu);
+
+int vm_attach_vgic(struct vmctx *ctx, uint64_t dist_start, size_t dist_size,
+ uint64_t redist_start, size_t redist_size);
+#endif /* _VMMAPI_H_ */
Index: lib/libvmmapi/arm64/vmmapi.c
===================================================================
--- /dev/null
+++ lib/libvmmapi/arm64/vmmapi.c
@@ -0,0 +1,392 @@
+#include <sys/cdefs.h>
+
+#include <sys/types.h>
+#include <sys/sysctl.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <sys/errno.h>
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <string.h>
+#include <fcntl.h>
+#include <unistd.h>
+
+#include <libutil.h>
+
+#include <machine/vmm.h>
+#include <machine/vmm_dev.h>
+
+#include "vmmapi.h"
+
+#define MB (1024 * 1024UL)
+#define GB (1024 * 1024 * 1024UL)
+
+struct vmctx {
+ int fd;
+ uint32_t mem_limit;
+ enum vm_mmap_style vms;
+ size_t mem_size;
+ uint64_t mem_base;
+ char *mem_addr;
+ char *name;
+};
+
+#define CREATE(x) sysctlbyname("hw.vmm.create", NULL, NULL, (x), strlen((x)))
+#define DESTROY(x) sysctlbyname("hw.vmm.destroy", NULL, NULL, (x), strlen((x)))
+
+static int
+vm_device_open(const char *name)
+{
+ int fd, len;
+ char *vmfile;
+
+ len = strlen("/dev/vmm/") + strlen(name) + 1;
+ vmfile = malloc(len);
+ assert(vmfile != NULL);
+ snprintf(vmfile, len, "/dev/vmm/%s", name);
+
+ /* Open the device file */
+ fd = open(vmfile, O_RDWR, 0);
+
+ free(vmfile);
+ return (fd);
+}
+
+int
+vm_create(const char *name)
+{
+
+ return (CREATE((char *)name));
+}
+
+struct vmctx *
+vm_open(const char *name)
+{
+ struct vmctx *vm;
+
+ vm = malloc(sizeof(struct vmctx) + strlen(name) + 1);
+ assert(vm != NULL);
+
+ vm->fd = -1;
+ vm->mem_limit = 2 * GB;
+ vm->name = (char *)(vm + 1);
+ strcpy(vm->name, name);
+
+ if ((vm->fd = vm_device_open(vm->name)) < 0)
+ goto err;
+
+ return (vm);
+err:
+ vm_destroy(vm);
+ return (NULL);
+}
+
+void
+vm_destroy(struct vmctx *vm)
+{
+ assert(vm != NULL);
+
+ if (vm->fd >= 0)
+ close(vm->fd);
+ DESTROY(vm->name);
+
+ free(vm);
+}
+
+int
+vm_parse_memsize(const char *optarg, size_t *ret_memsize)
+{
+ char *endptr;
+ size_t optval;
+ int error;
+
+ optval = strtoul(optarg, &endptr, 0);
+ if (*optarg != '\0' && *endptr == '\0') {
+ /*
+ * For the sake of backward compatibility if the memory size
+ * specified on the command line is less than a megabyte then
+ * it is interpreted as being in units of MB.
+ */
+ if (optval < MB)
+ optval *= MB;
+ *ret_memsize = optval;
+ error = 0;
+ } else
+ error = expand_number(optarg, ret_memsize);
+
+ return (error);
+}
+
+int
+vm_get_memory_seg(struct vmctx *ctx, uint64_t gpa, size_t *ret_len)
+{
+ int error;
+ struct vm_memory_segment seg;
+
+ bzero(&seg, sizeof(seg));
+ seg.gpa = gpa;
+ error = ioctl(ctx->fd, VM_GET_MEMORY_SEG, &seg);
+ *ret_len = seg.len;
+ return (error);
+}
+
+uint32_t
+vm_get_mem_limit(struct vmctx *ctx)
+{
+
+ return (ctx->mem_limit);
+}
+
+void
+vm_set_mem_limit(struct vmctx *ctx, uint32_t limit)
+{
+
+ ctx->mem_limit = limit;
+}
+
+static int
+setup_memory_segment(struct vmctx *ctx, uint64_t gpa, size_t len, char **addr)
+{
+ int error;
+ struct vm_memory_segment seg;
+
+ /*
+ * Create and optionally map 'len' bytes of memory at guest
+ * physical address 'gpa'
+ */
+ bzero(&seg, sizeof(seg));
+ seg.gpa = gpa;
+ seg.len = len;
+ error = ioctl(ctx->fd, VM_MAP_MEMORY, &seg);
+ if (error == 0 && addr != NULL) {
+ *addr = mmap(NULL, len, PROT_READ | PROT_WRITE, MAP_SHARED,
+ ctx->fd, gpa);
+ }
+ return (error);
+}
+
+int
+vm_setup_memory(struct vmctx *ctx, uint64_t membase, size_t memsize, enum vm_mmap_style vms)
+{
+ int error;
+
+ /* XXX VM_MMAP_SPARSE not implemented yet */
+ assert(vms == VM_MMAP_ALL);
+
+ ctx->vms = vms;
+ ctx->mem_base = membase;
+
+ assert(memsize <= ctx->mem_limit);
+ ctx->mem_size = memsize;
+
+ if (ctx->mem_size > 0) {
+ error = setup_memory_segment(ctx, ctx->mem_base, ctx->mem_size,
+ &ctx->mem_addr);
+ if (error)
+ return (error);
+ }
+
+ return (0);
+}
+
+void *
+vm_map_ipa(struct vmctx *ctx, uint64_t iaddr, size_t len)
+{
+ /* XXX VM_MMAP_SPARSE not implemented yet */
+ assert(ctx->vms == VM_MMAP_ALL);
+
+ if (iaddr < ctx->mem_base)
+ return ((void *)(ctx->mem_addr + iaddr));
+ else
+ return ((void *)(ctx->mem_addr + (iaddr - ctx->mem_base)));
+}
+
+
+int
+vm_set_register(struct vmctx *ctx, int vcpu, int reg, uint64_t val)
+{
+ int error;
+ struct vm_register vmreg;
+
+ bzero(&vmreg, sizeof(vmreg));
+ vmreg.cpuid = vcpu;
+ vmreg.regnum = reg;
+ vmreg.regval = val;
+
+ error = ioctl(ctx->fd, VM_SET_REGISTER, &vmreg);
+ return (error);
+}
+
+int
+vm_get_register(struct vmctx *ctx, int vcpu, int reg, uint64_t *ret_val)
+{
+ int error;
+ struct vm_register vmreg;
+
+ bzero(&vmreg, sizeof(vmreg));
+ vmreg.cpuid = vcpu;
+ vmreg.regnum = reg;
+
+ error = ioctl(ctx->fd, VM_GET_REGISTER, &vmreg);
+ *ret_val = vmreg.regval;
+ return (error);
+}
+
+int
+vm_run(struct vmctx *ctx, int vcpu, uint64_t pc, struct vm_exit *vmexit)
+{
+ int error;
+ struct vm_run vmrun;
+
+ bzero(&vmrun, sizeof(vmrun));
+ vmrun.cpuid = vcpu;
+ vmrun.pc = pc;
+
+ error = ioctl(ctx->fd, VM_RUN, &vmrun);
+ bcopy(&vmrun.vm_exit, vmexit, sizeof(struct vm_exit));
+ return (error);
+}
+
+static struct {
+ const char *name;
+ int type;
+} capstrmap[] = {
+ { "hlt_exit", VM_CAP_HALT_EXIT },
+ { "mtrap_exit", VM_CAP_MTRAP_EXIT },
+ { "pause_exit", VM_CAP_PAUSE_EXIT },
+ { "unrestricted_guest", VM_CAP_UNRESTRICTED_GUEST },
+ { 0 }
+};
+
+int
+vm_capability_name2type(const char *capname)
+{
+ int i;
+
+ for (i = 0; capstrmap[i].name != NULL && capname != NULL; i++) {
+ if (strcmp(capstrmap[i].name, capname) == 0)
+ return (capstrmap[i].type);
+ }
+
+ return (-1);
+}
+
+const char *
+vm_capability_type2name(int type)
+{
+ int i;
+
+ for (i = 0; capstrmap[i].name != NULL; i++) {
+ if (capstrmap[i].type == type)
+ return (capstrmap[i].name);
+ }
+
+ return (NULL);
+}
+
+int
+vm_get_capability(struct vmctx *ctx, int vcpu, enum vm_cap_type cap,
+ int *retval)
+{
+ int error;
+ struct vm_capability vmcap;
+
+ bzero(&vmcap, sizeof(vmcap));
+ vmcap.cpuid = vcpu;
+ vmcap.captype = cap;
+
+ error = ioctl(ctx->fd, VM_GET_CAPABILITY, &vmcap);
+ *retval = vmcap.capval;
+ return (error);
+}
+
+int
+vm_set_capability(struct vmctx *ctx, int vcpu, enum vm_cap_type cap, int val)
+{
+ struct vm_capability vmcap;
+
+ bzero(&vmcap, sizeof(vmcap));
+ vmcap.cpuid = vcpu;
+ vmcap.captype = cap;
+ vmcap.capval = val;
+
+ return (ioctl(ctx->fd, VM_SET_CAPABILITY, &vmcap));
+}
+
+uint64_t *
+vm_get_stats(struct vmctx *ctx, int vcpu, struct timeval *ret_tv,
+ int *ret_entries)
+{
+ int error;
+
+ static struct vm_stats vmstats;
+
+ vmstats.cpuid = vcpu;
+
+ error = ioctl(ctx->fd, VM_STATS, &vmstats);
+ if (error == 0) {
+ if (ret_entries)
+ *ret_entries = vmstats.num_entries;
+ if (ret_tv)
+ *ret_tv = vmstats.tv;
+ return (vmstats.statbuf);
+ } else
+ return (NULL);
+}
+
+const char *
+vm_get_stat_desc(struct vmctx *ctx, int index)
+{
+ static struct vm_stat_desc statdesc;
+
+ statdesc.index = index;
+ if (ioctl(ctx->fd, VM_STAT_DESC, &statdesc) == 0)
+ return (statdesc.desc);
+ else
+ return (NULL);
+}
+
+int
+vcpu_reset(struct vmctx *vmctx, int vcpu)
+{
+ return (ENXIO);
+}
+
+int
+vm_attach_vgic(struct vmctx *ctx, uint64_t dist_start, size_t dist_size,
+ uint64_t redist_start, size_t redist_size)
+{
+ struct vm_attach_vgic vav;
+
+ bzero(&vav, sizeof(vav));
+ vav.dist_start = dist_start;
+ vav.dist_size = dist_size;
+ vav.redist_start = redist_start;
+ vav.redist_size = redist_size;
+
+ return (ioctl(ctx->fd, VM_ATTACH_VGIC, &vav));
+}
+
+int
+vm_assert_irq(struct vmctx *ctx, uint32_t irq)
+{
+ struct vm_irq vi;
+
+ bzero(&vi, sizeof(vi));
+ vi.irq = irq;
+
+ return (ioctl(ctx->fd, VM_ASSERT_IRQ, &vi));
+}
+
+int
+vm_deassert_irq(struct vmctx *ctx, uint32_t irq)
+{
+ struct vm_irq vi;
+
+ bzero(&vi, sizeof(vi));
+ vi.irq = irq;
+
+ return (ioctl(ctx->fd, VM_DEASSERT_IRQ, &vi));
+}
Index: sys/arm/arm/generic_timer.h
===================================================================
--- /dev/null
+++ sys/arm/arm/generic_timer.h
@@ -0,0 +1,44 @@
+/*-
+ * Copyright (c) 2018 Alexandru Elise <alexandru.elisei@gmail.com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. The name of the company nor the name of the author may be used to
+ * endorse or promote products derived from this software without specific
+ * prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _ARM_GENERIC_TIMER_H_
+#define _ARM_GENERIC_TIMER_H_
+
+#define GT_PHYS_SECURE 0
+#define GT_PHYS_NONSECURE 1
+#define GT_VIRT 2
+#define GT_HYP 3
+
+int arm_tmr_setup_intr(int gt_type, driver_filter_t filter,
+ driver_intr_t handler, void *arg);
+int arm_tmr_teardown_intr(int gt_type);
+
+#endif
Index: sys/arm/arm/generic_timer.c
===================================================================
--- sys/arm/arm/generic_timer.c
+++ sys/arm/arm/generic_timer.c
@@ -63,6 +63,10 @@
#include <machine/machdep.h> /* For arm_set_delay */
#endif
+#if defined(__aarch64__)
+#include <machine/vmm.h> /* For virt_enabled() */
+#endif
+
#ifdef FDT
#include <dev/ofw/openfirm.h>
#include <dev/ofw/ofw_bus.h>
@@ -74,6 +78,8 @@
#include <dev/acpica/acpivar.h>
#endif
+#include "generic_timer.h"
+
#define GT_CTRL_ENABLE (1 << 0)
#define GT_CTRL_INT_MASK (1 << 1)
#define GT_CTRL_INT_STAT (1 << 2)
@@ -123,6 +129,8 @@
.tc_fill_vdso_timehands = arm_tmr_fill_vdso_timehands,
};
+static device_t arm_tmr_dev;
+
#ifdef __arm__
#define get_el0(x) cp15_## x ##_get()
#define get_el1(x) cp15_## x ##_get()
@@ -314,6 +322,39 @@
return (FILTER_HANDLED);
}
+int
+arm_tmr_setup_intr(int gt_type, driver_filter_t filter, driver_intr_t handler,
+ void *arg)
+{
+ if (gt_type != GT_PHYS_SECURE &&
+ gt_type != GT_PHYS_NONSECURE &&
+ gt_type != GT_VIRT &&
+ gt_type != GT_HYP)
+ return (ENXIO);
+
+ if (arm_tmr_sc->res[gt_type] == NULL)
+ return (ENXIO);
+
+ return (bus_setup_intr(arm_tmr_dev, arm_tmr_sc->res[gt_type],
+ INTR_TYPE_CLK, filter, handler, arg, &arm_tmr_sc->ihl[gt_type]));
+}
+
+int
+arm_tmr_teardown_intr(int gt_type)
+{
+ if (gt_type != GT_PHYS_SECURE &&
+ gt_type != GT_PHYS_NONSECURE &&
+ gt_type != GT_VIRT &&
+ gt_type != GT_HYP)
+ return (ENXIO);
+
+ if (arm_tmr_sc->res[gt_type] == NULL)
+ return (ENXIO);
+
+ return (bus_teardown_intr(arm_tmr_dev, arm_tmr_sc->res[gt_type],
+ arm_tmr_sc->ihl[gt_type]));
+}
+
#ifdef FDT
static int
arm_tmr_fdt_probe(device_t dev)
@@ -447,13 +488,26 @@
last_timer = 1;
}
+#ifdef __aarch64__
+ sc->physical |= virt_enabled();
+#endif
+
arm_tmr_sc = sc;
/* Setup secure, non-secure and virtual IRQs handler */
- for (i = first_timer; i <= last_timer; i++) {
+ for (i = GT_PHYS_SECURE; i <= GT_VIRT; i++) {
/* If we do not have the interrupt, skip it. */
if (sc->res[i] == NULL)
continue;
+#if defined(__aarch64__)
+ if (i == 2 && virt_enabled()) {
+ /*
+ * Do not install an interrupt handler for the virtual
+ * timer. This will be used by the VM.
+ */
+ continue;
+ }
+#endif
error = bus_setup_intr(dev, sc->res[i], INTR_TYPE_CLK,
arm_tmr_intr, NULL, sc, &sc->ihl[i]);
if (error) {
@@ -461,7 +515,6 @@
return (ENXIO);
}
}
-
/* Disable the virtual timer until we are ready */
if (sc->res[2] != NULL)
arm_tmr_disable(false);
@@ -488,6 +541,8 @@
arm_set_delay(arm_tmr_do_delay, sc);
#endif
+ arm_tmr_dev = dev;
+
return (0);
}
Index: sys/arm/arm/gic.h
===================================================================
--- sys/arm/arm/gic.h
+++ sys/arm/arm/gic.h
@@ -47,13 +47,16 @@
struct arm_gic_softc {
device_t gic_dev;
+ bool is_root;
void * gic_intrhand;
struct gic_irqsrc * gic_irqs;
- struct resource * gic_res[3];
+ struct resource * gic_res[6];
bus_space_tag_t gic_c_bst;
bus_space_tag_t gic_d_bst;
bus_space_handle_t gic_c_bsh;
bus_space_handle_t gic_d_bsh;
+ bus_space_tag_t gic_h_bst;
+ bus_space_handle_t gic_h_bsh;
uint8_t ver;
struct mtx mutex;
uint32_t nirqs;
Index: sys/arm/arm/gic.c
===================================================================
--- sys/arm/arm/gic.c
+++ sys/arm/arm/gic.c
@@ -128,10 +128,14 @@
static struct resource_spec arm_gic_spec[] = {
{ SYS_RES_MEMORY, 0, RF_ACTIVE }, /* Distributor registers */
{ SYS_RES_MEMORY, 1, RF_ACTIVE }, /* CPU Interrupt Intf. registers */
- { SYS_RES_IRQ, 0, RF_ACTIVE | RF_OPTIONAL }, /* Parent interrupt */
+ { SYS_RES_MEMORY, 2, RF_ACTIVE | RF_OPTIONAL }, /* Virtual Interface Control */
+ { SYS_RES_MEMORY, 3, RF_ACTIVE | RF_OPTIONAL }, /* Virtual CPU interface */
+ { SYS_RES_IRQ, 0, RF_ACTIVE | RF_OPTIONAL }, /* vGIC maintenance interrupt or parent interrupt */
{ -1, 0 }
};
+extern char hypmode_enabled[];
+
#if defined(__arm__) && defined(INVARIANTS)
static int gic_debug_spurious = 1;
#else
@@ -154,6 +158,22 @@
#define gic_d_write_4(_sc, _reg, _val) \
bus_space_write_4((_sc)->gic_d_bst, (_sc)->gic_d_bsh, (_reg), (_val))
+#define gic_h_read_4(_sc, _reg) \
+ bus_space_read_4((_sc)->gic_h_bst, (_sc)->gic_h_bsh, (_reg))
+#define gic_h_write_4(_sc, _reg, _val) \
+ bus_space_write_4((_sc)->gic_h_bst, (_sc)->gic_h_bsh, (_reg), (_val))
+
+struct arm_gic_softc *
+arm_gic_get_sc(void)
+{
+ return gic_sc;
+}
+uint32_t
+arm_gic_get_lr_num(void)
+{
+ return (gic_h_read_4(gic_sc, GICH_VTR) & 0x3f) + 1;
+}
+
static inline void
gic_irq_unmask(struct arm_gic_softc *sc, u_int irq)
{
@@ -322,12 +342,25 @@
mtx_init(&sc->mutex, "GIC lock", NULL, MTX_SPIN);
/* Distributor Interface */
- sc->gic_d_bst = rman_get_bustag(sc->gic_res[0]);
- sc->gic_d_bsh = rman_get_bushandle(sc->gic_res[0]);
+ sc->gic_d_bst = rman_get_bustag(sc->gic_res[DISTRIBUTOR_RES_IDX]);
+ sc->gic_d_bsh = rman_get_bushandle(sc->gic_res[DISTRIBUTOR_RES_IDX]);
/* CPU Interface */
- sc->gic_c_bst = rman_get_bustag(sc->gic_res[1]);
- sc->gic_c_bsh = rman_get_bushandle(sc->gic_res[1]);
+ sc->gic_c_bst = rman_get_bustag(sc->gic_res[CPU_INTERFACE_RES_IDX]);
+ sc->gic_c_bsh = rman_get_bushandle(sc->gic_res[CPU_INTERFACE_RES_IDX]);
+
+ /* Virtual Interface Control */
+ if (sc->is_root) {
+ if (sc->gic_res[VIRT_INTERFACE_CONTROL_RES_IDX] == NULL) {
+ device_printf(dev, "Cannot find Virtual Interface Control Registers. Disabling Hyp-Mode...\n");
+ hypmode_enabled[0] = -1;
+ } else {
+ sc->gic_h_bst = rman_get_bustag(sc->gic_res[VIRT_INTERFACE_CONTROL_RES_IDX]);
+ sc->gic_h_bsh = rman_get_bushandle(sc->gic_res[VIRT_INTERFACE_CONTROL_RES_IDX]);
+ }
+ } else {
+ hypmode_enabled[0] = -1;
+ }
/* Disable interrupt forwarding to the CPU interface */
gic_d_write_4(sc, GICD_CTLR, 0x00);
@@ -507,6 +540,33 @@
("arm_gic_read_ivar: Invalid bus type %u", sc->gic_bus));
*result = sc->gic_bus;
return (0);
+ case GIC_IVAR_VIRTUAL_INT_CTRL_RES:
+ *result = (uintptr_t)sc->gic_res[VIRT_INTERFACE_CONTROL_RES_IDX];
+ return (0);
+ case GIC_IVAR_VIRTUAL_INT_CTRL_VADDR:
+ *result = (uintptr_t)rman_get_virtual(sc->gic_res[VIRT_INTERFACE_CONTROL_RES_IDX]);
+ return (0);
+ case GIC_IVAR_VIRTUAL_INT_CTRL_PADDR:
+ *result = (uintptr_t)rman_get_start(sc->gic_res[VIRT_INTERFACE_CONTROL_RES_IDX]);
+ return (0);
+ case GIC_IVAR_VIRTUAL_INT_CTRL_SIZE:
+ *result = rman_get_size(sc->gic_res[VIRT_INTERFACE_CONTROL_RES_IDX]);
+ return (0);
+ case GIC_IVAR_VIRTUAL_CPU_INT_PADDR:
+ *result = rman_get_start(sc->gic_res[VIRT_CPU_INTERFACE_RES_IDX]);
+ return (0);
+ case GIC_IVAR_VIRTUAL_CPU_INT_SIZE:
+ *result = rman_get_size(sc->gic_res[VIRT_CPU_INTERFACE_RES_IDX]);
+ return (0);
+ case GIC_IVAR_LR_NUM:
+ *result = (gic_h_read_4(gic_sc, GICH_VTR) & 0x3f) + 1;
+ return (0);
+ case GIC_IVAR_MAINTENANCE_INTR_RES:
+ if (sc->is_root)
+ *result = (uintptr_t)sc->gic_res[MAINTENANCE_INTR_RES_IDX];
+ else
+ result = NULL;
+ return (0);
}
return (ENOENT);
@@ -979,7 +1039,7 @@
if (CPU_ISSET(i, &cpus))
val |= arm_gic_map[i] << GICD_SGI_TARGET_SHIFT;
- gic_d_write_4(sc, GICD_SGIR, val | gi->gi_irq);
+ gic_d_write_4(sc, GICD_SGIR(0), val | gi->gi_irq);
}
static int
Index: sys/arm/arm/gic_common.h
===================================================================
--- sys/arm/arm/gic_common.h
+++ sys/arm/arm/gic_common.h
@@ -32,8 +32,25 @@
#ifndef _GIC_COMMON_H_
#define _GIC_COMMON_H_
-#define GIC_IVAR_HW_REV 500
-#define GIC_IVAR_BUS 501
+#ifndef __ASSEMBLER__
+
+#define DISTRIBUTOR_RES_IDX 0
+#define CPU_INTERFACE_RES_IDX 1
+#define VIRT_INTERFACE_CONTROL_RES_IDX 2
+#define VIRT_CPU_INTERFACE_RES_IDX 3
+#define MAINTENANCE_INTR_RES_IDX 4
+#define INTRNG_RES_IDX 5
+
+#define GIC_IVAR_HW_REV 500
+#define GIC_IVAR_BUS 501
+#define GIC_IVAR_VIRTUAL_INT_CTRL_RES 502
+#define GIC_IVAR_VIRTUAL_INT_CTRL_VADDR 503
+#define GIC_IVAR_VIRTUAL_INT_CTRL_PADDR 505
+#define GIC_IVAR_VIRTUAL_INT_CTRL_SIZE 504
+#define GIC_IVAR_VIRTUAL_CPU_INT_PADDR 506
+#define GIC_IVAR_VIRTUAL_CPU_INT_SIZE 507
+#define GIC_IVAR_LR_NUM 508
+#define GIC_IVAR_MAINTENANCE_INTR_RES 509
/* GIC_IVAR_BUS values */
#define GIC_BUS_UNKNOWN 0
@@ -43,6 +60,19 @@
__BUS_ACCESSOR(gic, hw_rev, GIC, HW_REV, u_int);
__BUS_ACCESSOR(gic, bus, GIC, BUS, u_int);
+__BUS_ACCESSOR(gic, virtual_int_ctrl_res, GIC, VIRTUAL_INT_CTRL_RES, struct resource *);
+__BUS_ACCESSOR(gic, virtual_int_ctrl_vaddr, GIC, VIRTUAL_INT_CTRL_VADDR, uint64_t);
+__BUS_ACCESSOR(gic, virtual_int_ctrl_paddr, GIC, VIRTUAL_INT_CTRL_PADDR, uint64_t);
+__BUS_ACCESSOR(gic, virtual_int_ctrl_size, GIC, VIRTUAL_INT_CTRL_SIZE, uint32_t);
+__BUS_ACCESSOR(gic, virtual_cpu_int_paddr, GIC, VIRTUAL_CPU_INT_PADDR, uint32_t);
+__BUS_ACCESSOR(gic, virtual_cpu_int_size, GIC, VIRTUAL_CPU_INT_SIZE, uint32_t);
+__BUS_ACCESSOR(gic, lr_num, GIC, LR_NUM, uint32_t);
+__BUS_ACCESSOR(gic, maintenance_intr_res, GIC, MAINTENANCE_INTR_RES, struct resource *);
+
+struct arm_gic_softc *arm_gic_get_sc(void);
+uint32_t arm_gic_get_lr_num(void);
+
+#endif /*__ASSEMBLER__ */
/* Software Generated Interrupts */
#define GIC_FIRST_SGI 0 /* Irqs 0-15 are SGIs/IPIs. */
@@ -56,7 +86,9 @@
/* Common register values */
#define GICD_CTLR 0x0000 /* v1 ICDDCR */
#define GICD_TYPER 0x0004 /* v1 ICDICTR */
-#define GICD_TYPER_I_NUM(n) ((((n) & 0x1F) + 1) * 32)
+#define GICD_TYPER_ITLINESNUM_MASK (0x1f)
+#define GICD_TYPER_I_NUM(n) \
+ ((((n) & GICD_TYPER_ITLINESNUM_MASK) + 1) * 32)
#define GICD_IIDR 0x0008 /* v1 ICDIIDR */
#define GICD_IIDR_PROD_SHIFT 24
#define GICD_IIDR_PROD_MASK 0xff000000
@@ -74,19 +106,30 @@
#define GICD_IIDR_IMPL_MASK 0x00000fff
#define GICD_IIDR_IMPL(x) \
(((x) & GICD_IIDR_IMPL_MASK) >> GICD_IIDR_IMPL_SHIFT)
-#define GICD_IGROUPR(n) (0x0080 + (((n) >> 5) * 4)) /* v1 ICDISER */
+#define GICD_IGROUPR_BASE (0x0080)
+#define GICD_IGROUPR(n) \
+ (GICD_IGROUPR_BASE + (((n) >> 5) * 4)) /* v1 ICDISER */
#define GICD_I_PER_IGROUPRn 32
-#define GICD_ISENABLER(n) (0x0100 + (((n) >> 5) * 4)) /* v1 ICDISER */
+#define GICD_ISENABLER_BASE (0x0100)
+#define GICD_ISENABLER(n) \
+ (GICD_ISENABLER_BASE + (((n) >> 5) * 4)) /* v1 ICDISER */
#define GICD_I_MASK(n) (1ul << ((n) & 0x1f))
#define GICD_I_PER_ISENABLERn 32
-#define GICD_ICENABLER(n) (0x0180 + (((n) >> 5) * 4)) /* v1 ICDICER */
+#define GICD_ICENABLER_BASE (0x0180)
+#define GICD_ICENABLER(n) \
+ (GICD_ICENABLER_BASE + (((n) >> 5) * 4)) /* v1 ICDICER */
#define GICD_ISPENDR(n) (0x0200 + (((n) >> 5) * 4)) /* v1 ICDISPR */
#define GICD_ICPENDR(n) (0x0280 + (((n) >> 5) * 4)) /* v1 ICDICPR */
+#define GICD_ISACTIVER(n) (0x0300 + (((n) >> 5) * 4)) /* v1 ICDABR */
#define GICD_ICACTIVER(n) (0x0380 + (((n) >> 5) * 4)) /* v1 ICDABR */
-#define GICD_IPRIORITYR(n) (0x0400 + (((n) >> 2) * 4)) /* v1 ICDIPR */
+#define GICD_IPRIORITYR_BASE (0x0400)
+#define GICD_IPRIORITYR(n) \
+ (GICD_IPRIORITYR_BASE + (((n) >> 2) * 4)) /* v1 ICDIPR */
#define GICD_I_PER_IPRIORITYn 4
#define GICD_ITARGETSR(n) (0x0800 + (((n) >> 2) * 4)) /* v1 ICDIPTR */
-#define GICD_ICFGR(n) (0x0C00 + (((n) >> 4) * 4)) /* v1 ICDICFR */
+#define GICD_ICFGR_BASE (0x0C00)
+#define GICD_ICFGR(n) \
+ (GICD_ICFGR_BASE + (((n) >> 4) * 4)) /* v1 ICDICFR */
#define GICD_I_PER_ICFGRn 16
/* First bit is a polarity bit (0 - low, 1 - high) */
#define GICD_ICFGR_POL_LOW (0 << 0)
@@ -96,7 +139,34 @@
#define GICD_ICFGR_TRIG_LVL (0 << 1)
#define GICD_ICFGR_TRIG_EDGE (1 << 1)
#define GICD_ICFGR_TRIG_MASK 0x2
-#define GICD_SGIR 0x0F00 /* v1 ICDSGIR */
+#define GICD_SGIR(n) (0x0F00 + ((n) * 4)) /* v1 ICDSGIR */
#define GICD_SGI_TARGET_SHIFT 16
+/* GIC Hypervisor specific registers */
+#define GICH_HCR 0x0
+#define GICH_VTR 0x4
+#define GICH_VMCR 0x8
+#define GICH_VMCR_VMGRP1EN (1 << 1)
+#define GICH_MISR 0x10
+#define GICH_EISR0 0x20
+#define GICH_EISR1 0x24
+#define GICH_ELSR0 0x30
+#define GICH_ELSR1 0x34
+#define GICH_APR 0xF0
+#define GICH_LR0 0x100
+
+#define GICH_HCR_EN (1 << 0)
+#define GICH_HCR_UIE (1 << 1)
+
+#define GICH_LR_VIRTID (0x3FF << 0)
+#define GICH_LR_PHYSID_CPUID_SHIFT 10
+#define GICH_LR_PHYSID_CPUID (7 << GICH_LR_PHYSID_CPUID_SHIFT)
+#define GICH_LR_STATE (3 << 28)
+#define GICH_LR_PENDING (1 << 28)
+#define GICH_LR_ACTIVE (1 << 29)
+#define GICH_LR_EOI (1 << 19)
+
+#define GICH_MISR_EOI (1 << 0)
+#define GICH_MISR_U (1 << 1)
+
#endif /* _GIC_COMMON_H_ */
Index: sys/arm/arm/gic_fdt.c
===================================================================
--- sys/arm/arm/gic_fdt.c
+++ sys/arm/arm/gic_fdt.c
@@ -129,18 +129,25 @@
gic_fdt_attach(device_t dev)
{
struct arm_gic_fdt_softc *sc = device_get_softc(dev);
- phandle_t pxref;
- intptr_t xref;
+ phandle_t pxref = ofw_bus_find_iparent(ofw_bus_get_node(dev));
+ intptr_t xref = OF_xref_from_node(ofw_bus_get_node(dev));
int err;
+ sc->base.is_root = false;
+ /*
+ * Controller is root if:
+ * - doesn't have interrupt parent
+ * - his interrupt parent is this controller
+ */
+ if (pxref == 0 || xref == pxref)
+ sc->base.is_root = true;
+
sc->base.gic_bus = GIC_BUS_FDT;
err = arm_gic_attach(dev);
if (err != 0)
return (err);
- xref = OF_xref_from_node(ofw_bus_get_node(dev));
-
/*
* Now, when everything is initialized, it's right time to
* register interrupt controller to interrupt framefork.
@@ -150,13 +157,7 @@
goto cleanup;
}
- /*
- * Controller is root if:
- * - doesn't have interrupt parent
- * - his interrupt parent is this controller
- */
- pxref = ofw_bus_find_iparent(ofw_bus_get_node(dev));
- if (pxref == 0 || xref == pxref) {
+ if (sc->base.is_root) {
if (intr_pic_claim_root(dev, xref, arm_gic_intr, sc,
GIC_LAST_SGI - GIC_FIRST_SGI + 1) != 0) {
device_printf(dev, "could not set PIC as a root\n");
Index: sys/arm64/arm64/gic_v3.c
===================================================================
--- sys/arm64/arm64/gic_v3.c
+++ sys/arm64/arm64/gic_v3.c
@@ -99,6 +99,11 @@
static u_int sgi_first_unused = GIC_FIRST_SGI;
#endif
+static struct resource *maint_res;
+static device_t gic_dev;
+static int maint_rid;
+static void *maint_cookie;
+
static device_method_t gic_v3_methods[] = {
/* Device interface */
DEVMETHOD(device_detach, gic_v3_detach),
@@ -366,12 +371,49 @@
return (0);
}
+void
+gic_v3_alloc_maint_res(device_t dev)
+{
+ gic_dev = dev;
+ maint_res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &maint_rid,
+ RF_ACTIVE);
+ if (!maint_res)
+ device_printf(dev,
+ "Could not allocate resource for maintenance interrupt\n");
+}
+
+int
+gic_v3_setup_maint_intr(driver_filter_t filter, driver_intr_t handler,
+ void *arg)
+{
+ int flags;
+
+ if (!maint_res)
+ return (EINVAL);
+
+ flags = INTR_TYPE_MISC | INTR_MPSAFE;
+ return (bus_setup_intr(gic_dev, maint_res, flags, filter, handler,
+ arg, &maint_cookie));
+}
+
+int
+gic_v3_teardown_maint_intr(void)
+{
+ if (!maint_res)
+ return (EINVAL);
+
+ return (bus_teardown_intr(gic_dev, maint_res, maint_cookie));
+}
+
static int
gic_v3_get_domain(device_t dev, device_t child, int *domain)
{
struct gic_v3_devinfo *di;
di = device_get_ivars(child);
+ if (di == NULL)
+ return (0);
+
if (di->gic_domain < 0)
return (ENOENT);
@@ -978,22 +1020,25 @@
struct resource *res;
u_int cpuid;
size_t us_left = 1000000;
+ uint32_t rwp;
cpuid = PCPU_GET(cpuid);
switch (xdist) {
case DIST:
res = sc->gic_dist;
+ rwp = GICD_CTLR_RWP;
break;
case REDIST:
res = &sc->gic_redists.pcpu[cpuid]->res;
+ rwp = GICR_CTLR_RWP;
break;
default:
KASSERT(0, ("%s: Attempt to wait for unknown RWP", __func__));
return;
}
- while ((bus_read_4(res, GICD_CTLR) & GICD_CTLR_RWP) != 0) {
+ while ((bus_read_4(res, GICD_CTLR) & rwp) != 0) {
DELAY(1);
if (us_left-- == 0)
panic("GICD Register write pending for too long");
Index: sys/arm64/arm64/gic_v3_acpi.c
===================================================================
--- sys/arm64/arm64/gic_v3_acpi.c
+++ sys/arm64/arm64/gic_v3_acpi.c
@@ -338,6 +338,8 @@
if (device_get_children(dev, &sc->gic_children, &sc->gic_nchildren) !=0)
sc->gic_nchildren = 0;
+ gic_v3_alloc_maint_res(dev);
+
return (0);
error:
Index: sys/arm64/arm64/gic_v3_fdt.c
===================================================================
--- sys/arm64/arm64/gic_v3_fdt.c
+++ sys/arm64/arm64/gic_v3_fdt.c
@@ -171,6 +171,8 @@
if (device_get_children(dev, &sc->gic_children, &sc->gic_nchildren) != 0)
sc->gic_nchildren = 0;
+ gic_v3_alloc_maint_res(dev);
+
return (err);
error:
@@ -194,12 +196,19 @@
static int
gic_v3_fdt_print_child(device_t bus, device_t child)
{
- struct gic_v3_ofw_devinfo *di = device_get_ivars(child);
- struct resource_list *rl = &di->di_rl;
+ struct gic_v3_ofw_devinfo *di;
+ struct resource_list *rl;
int retval = 0;
retval += bus_print_child_header(bus, child);
+
+ di = device_get_ivars(child);
+ if (di == NULL)
+ goto footer;
+ rl = &di->di_rl;
+
retval += resource_list_print_type(rl, "mem", SYS_RES_MEMORY, "%#jx");
+footer:
retval += bus_print_child_footer(bus, child);
return (retval);
@@ -280,6 +289,7 @@
size_cells = 2;
OF_getencprop(parent, "#size-cells", &size_cells,
sizeof(size_cells));
+
/* Iterate through all GIC subordinates */
for (node = OF_child(parent); node > 0; node = OF_peer(node)) {
/* Allocate and populate devinfo. */
Index: sys/arm64/arm64/gic_v3_reg.h
===================================================================
--- sys/arm64/arm64/gic_v3_reg.h
+++ sys/arm64/arm64/gic_v3_reg.h
@@ -56,14 +56,22 @@
#define GICD_CTLR_G1 (1 << 0)
#define GICD_CTLR_G1A (1 << 1)
#define GICD_CTLR_ARE_NS (1 << 4)
+#define GICD_CTLR_DS (1 << 6)
+#define GICD_CTLR_E1NWF (1 << 7)
#define GICD_CTLR_RWP (1 << 31)
/* GICD_TYPER */
#define GICD_TYPER_IDBITS(n) ((((n) >> 19) & 0x1F) + 1)
+#define GICD_TYPER_SECURITYEXTN \
+ (1 << 10)
+#define GICD_TYPER_DVIS (1 << 18)
+#define GICD_TYPER_LPIS (1 << 17)
/*
* Registers (v3)
*/
-#define GICD_IROUTER(n) (0x6000 + ((n) * 8))
+#define GICD_IROUTER_BASE (0x6000)
+#define GICD_IROUTER(n) (GICD_IROUTER_BASE + ((n) * 8))
+#define GICD_IROUTER_IRM (31)
#define GICD_PIDR4 0xFFD0
#define GICD_PIDR5 0xFFD4
@@ -84,7 +92,11 @@
/* Redistributor registers */
#define GICR_CTLR GICD_CTLR
-#define GICR_CTLR_LPI_ENABLE (1 << 0)
+#define GICR_CTLR_RWP (1 << 3)
+#define GICR_CTLR_UWP (1 << 31)
+#define GICR_CTLR_LPI_ENABLE (1 << 0)
+#define GICR_CTLR_DPG1NS (1 << 25)
+#define GICR_CTLR_DPG0 (1 << 24)
#define GICR_PIDR2 GICD_PIDR2
@@ -97,6 +109,10 @@
#define GICR_TYPER_CPUNUM(x) \
(((x) & GICR_TYPER_CPUNUM_MASK) >> GICR_TYPER_CPUNUM_SHIFT)
#define GICR_TYPER_AFF_SHIFT (32)
+#define GICR_TYPER_AFF0(x) ((x >> GICR_TYPER_AFF_SHIFT) & 0xff)
+#define GICR_TYPER_AFF1(x) ((x >> (GICR_TYPER_AFF_SHIFT + 8)) & 0xff)
+#define GICR_TYPER_AFF2(x) ((x >> (GICR_TYPER_AFF_SHIFT + 16)) & 0xff)
+#define GICR_TYPER_AFF3(x) ((x >> (GICR_TYPER_AFF_SHIFT + 24)) & 0xff)
#define GICR_WAKER (0x0014)
#define GICR_WAKER_PS (1 << 1) /* Processor sleep */
@@ -193,8 +209,12 @@
#define GICR_I_ENABLER_SGI_MASK (0x0000FFFF)
#define GICR_I_ENABLER_PPI_MASK (0xFFFF0000)
+#define GICR_IPRIORITYR_BASE (0x0400)
#define GICR_I_PER_IPRIORITYn (GICD_I_PER_IPRIORITYn)
+#define GICR_ICFGR0_BASE (0x0C00)
+#define GICR_ICFGR1_BASE (0x0C04)
+
/* ITS registers */
#define GITS_PIDR2 GICR_PIDR2
#define GITS_PIDR2_ARCH_MASK GICR_PIDR2_ARCH_MASK
Index: sys/arm64/arm64/gic_v3_var.h
===================================================================
--- sys/arm64/arm64/gic_v3_var.h
+++ sys/arm64/arm64/gic_v3_var.h
@@ -109,6 +109,10 @@
void gic_r_write_4(device_t, bus_size_t, uint32_t var);
void gic_r_write_8(device_t, bus_size_t, uint64_t var);
+void gic_v3_alloc_maint_res(device_t);
+int gic_v3_setup_maint_intr(driver_filter_t, driver_intr_t, void *);
+int gic_v3_teardown_maint_intr(void);
+
/*
* GIC Distributor accessors.
* Notice that only GIC sofc can be passed.
Index: sys/arm64/arm64/hyp_stub.S
===================================================================
--- /dev/null
+++ sys/arm64/arm64/hyp_stub.S
@@ -0,0 +1,73 @@
+/*
+ * Copyright (c) 2017 Alexandru Elisei <alexandru.elisei@gmail.com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <machine/asm.h>
+
+__FBSDID("$FreeBSD$");
+
+ .text
+
+/*
+ * Install a new exception vector table with the base address supplied by the
+ * parameter in register x0.
+ */
+ENTRY(handle_stub_el1h_sync)
+ msr vbar_el2, x0
+ eret
+END(handle_hyp_stub)
+
+.macro vempty
+ .align 7
+ 1: b 1b
+.endm
+
+.macro vector name
+ .align 7
+ b handle_\name
+.endm
+
+ .align 11
+ .globl hyp_stub_vectors
+hyp_stub_vectors:
+ vempty /* Synchronous EL2t */
+ vempty /* IRQ EL2t */
+ vempty /* FIQ EL2t */
+ vempty /* SError EL2t */
+
+ vempty /* Synchronous EL2h */
+ vempty /* IRQ EL2h */
+ vempty /* FIQ EL2h */
+ vempty /* SError EL2h */
+
+ vector stub_el1h_sync /* Synchronous 64-bit EL1 */
+ vempty /* IRQ 64-bit EL1 */
+ vempty /* FIQ 64-bit EL1 */
+ vempty /* SError 64-bit EL1 */
+
+ vempty /* Synchronous 32-bit EL1 */
+ vempty /* IRQ 32-bit EL1 */
+ vempty /* FIQ 32-bit EL1 */
+ vempty /* SError 32-bit EL1 */
Index: sys/arm64/arm64/locore.S
===================================================================
--- sys/arm64/arm64/locore.S
+++ sys/arm64/arm64/locore.S
@@ -213,6 +213,11 @@
END(mpentry)
#endif
+ .align 3
+ .globl _C_LABEL(hypmode_enabled)
+_C_LABEL(hypmode_enabled):
+ .zero 8
+
/*
* If we are started in EL2, configure the required hypervisor
* registers and drop to EL1.
@@ -224,8 +229,22 @@
b.eq 1f
ret
1:
+ /*
+ * If the MMU is active, then it is using a page table where VA == PA.
+ * But the page table won't have entries for the hypervisor EL2
+ * initialization code which is loaded into memory with the vmm module.
+ *
+ * So we disable the MMU in EL2 to make the vmm hypervisor code run
+ * successfully.
+ */
+ dsb sy
+ mrs x2, sctlr_el2
+ bic x2, x2, SCTLR_M
+ msr sctlr_el2, x2
+ isb
+
/* Configure the Hypervisor */
- mov x2, #(HCR_RW)
+ mov x2, #(HCR_RW & ~HCR_HCD)
msr hcr_el2, x2
/* Load the Virtualization Process ID Register */
@@ -256,10 +275,18 @@
msr cntvoff_el2, xzr
/* Hypervisor trap functions */
- adrp x2, hyp_vectors
- add x2, x2, :lo12:hyp_vectors
+ adrp x2, hyp_stub_vectors
msr vbar_el2, x2
+ /* Use the host VTTBR_EL2 to tell the host and the guests apart */
+ mov x2, #VTTBR_HOST
+ msr vttbr_el2, x2
+
+ /* Mark hypervisor mode as enabled */
+ mov x1, #1
+ adr x2, hypmode_enabled
+ str x1, [x2]
+
mov x2, #(PSR_F | PSR_I | PSR_A | PSR_D | PSR_M_EL1h)
msr spsr_el2, x2
@@ -288,6 +315,10 @@
.quad SCTLR_RES1
LEND(drop_to_el1)
+hcr:
+ /* Make sure the HVC instruction is not disabled */
+ .quad (HCR_RW & ~HCR_HCD)
+
#define VECT_EMPTY \
.align 7; \
1: b 1b
@@ -754,6 +785,8 @@
ENTRY(abort)
b abort
+
+ .align 12 /* 4KiB aligned */
END(abort)
.align 3
Index: sys/arm64/arm64/pmap.c
===================================================================
--- sys/arm64/arm64/pmap.c
+++ sys/arm64/arm64/pmap.c
@@ -407,6 +407,8 @@
static int pmap_unuse_pt(pmap_t, vm_offset_t, pd_entry_t, struct spglist *);
static __inline vm_page_t pmap_remove_pt_page(pmap_t pmap, vm_offset_t va);
+static uint64_t pa_range_bits = 0;
+
/*
* These load the old table data and store the new value.
* They need to be atomic as the System MMU may write to the table at
@@ -431,9 +433,19 @@
memcpy(d, s, PAGE_SIZE);
}
+#define pmap_l0_index(va) (((va) >> L0_SHIFT) & L0_ADDR_MASK)
+#define pmap_l1_index(va) (((va) >> L1_SHIFT) & Ln_ADDR_MASK)
+#define pmap_l2_index(va) (((va) >> L2_SHIFT) & Ln_ADDR_MASK)
+#define pmap_l3_index(va) (((va) >> L3_SHIFT) & Ln_ADDR_MASK)
+
+#define STAGE2_L1_ADDR_MASK ((1UL << (pa_range_bits - L1_SHIFT)) - 1)
+#define pmap_stage2_l1_index(va) (((va) >> L1_SHIFT) & STAGE2_L1_ADDR_MASK)
+
static __inline pd_entry_t *
pmap_l0(pmap_t pmap, vm_offset_t va)
{
+ KASSERT(pmap->pm_stage != PM_STAGE2,
+ ("Level 0 table is invalid for PM_STAGE2 pmap"));
return (&pmap->pm_l0[pmap_l0_index(va)]);
}
@@ -450,6 +462,9 @@
static __inline pd_entry_t *
pmap_l1(pmap_t pmap, vm_offset_t va)
{
+ if (pmap->pm_stage == PM_STAGE2)
+ return (&pmap->pm_l0[pmap_stage2_l1_index(va)]);
+
pd_entry_t *l0;
l0 = pmap_l0(pmap, va);
@@ -459,6 +474,32 @@
return (pmap_l0_to_l1(l0, va));
}
+static __inline vm_page_t
+pmap_l1pg(pmap_t pmap, vm_offset_t va)
+{
+ if (pmap->pm_stage == PM_STAGE1) {
+ pd_entry_t *l0, tl0;
+
+ l0 = pmap_l0(pmap, va);
+ tl0 = pmap_load(l0);
+
+ return (PHYS_TO_VM_PAGE(tl0 & ~ATTR_MASK));
+ } else {
+ vm_paddr_t pa, pa_offset;
+
+ /*
+ * The offset will be the bits
+ * [pa_range_bits-1:L0_SHIFT]
+ */
+ va = va & ((1 << pa_range_bits) - 1);
+ pa_offset = va >> L0_SHIFT;
+ pa = DMAP_TO_PHYS((vm_offset_t)pmap->pm_l0) + \
+ (pa_offset << PAGE_SHIFT);
+
+ return (PHYS_TO_VM_PAGE(pa));
+ }
+}
+
static __inline pd_entry_t *
pmap_l1_to_l2(pd_entry_t *l1p, vm_offset_t va)
{
@@ -519,18 +560,28 @@
{
pd_entry_t *l0, *l1, *l2, desc;
- l0 = pmap_l0(pmap, va);
- desc = pmap_load(l0) & ATTR_DESCR_MASK;
- if (desc != L0_TABLE) {
- *level = -1;
- return (NULL);
- }
+ if (pmap->pm_stage == PM_STAGE1) {
+ l0 = pmap_l0(pmap, va);
+ desc = pmap_load(l0) & ATTR_DESCR_MASK;
+ if (desc != L0_TABLE) {
+ *level = -1;
+ return (NULL);
+ }
- l1 = pmap_l0_to_l1(l0, va);
- desc = pmap_load(l1) & ATTR_DESCR_MASK;
- if (desc != L1_TABLE) {
- *level = 0;
- return (l0);
+ l1 = pmap_l0_to_l1(l0, va);
+ desc = pmap_load(l1) & ATTR_DESCR_MASK;
+ if (desc != L1_TABLE) {
+ *level = 0;
+ return (l0);
+ }
+ } else {
+ l1 = pmap_l1(pmap, va);
+ desc = pmap_load(l1) & ATTR_DESCR_MASK;
+ if (desc != L1_TABLE) {
+ /* For PM_STAGE2 mappings the first level is level 1 */
+ *level = -1;
+ return (NULL);
+ }
}
l2 = pmap_l1_to_l2(l1, va);
@@ -607,13 +658,18 @@
if (pmap->pm_l0 == NULL)
return (false);
- l0p = pmap_l0(pmap, va);
- *l0 = l0p;
+ if (pmap->pm_stage == PM_STAGE1) {
+ l0p = pmap_l0(pmap, va);
+ *l0 = l0p;
- if ((pmap_load(l0p) & ATTR_DESCR_MASK) != L0_TABLE)
- return (false);
+ if ((pmap_load(l0p) & ATTR_DESCR_MASK) != L0_TABLE)
+ return (false);
- l1p = pmap_l0_to_l1(l0p, va);
+ l1p = pmap_l0_to_l1(l0p, va);
+ } else {
+ *l0 = NULL;
+ l1p = pmap_l1(pmap, va);
+ }
*l1 = l1p;
if ((pmap_load(l1p) & ATTR_DESCR_MASK) == L1_BLOCK) {
@@ -948,6 +1004,7 @@
pmap_bootstrap(vm_offset_t l0pt, vm_offset_t l1pt, vm_paddr_t kernstart,
vm_size_t kernlen)
{
+ uint64_t id_aa64mmfr0_el1;
vm_offset_t freemempos;
vm_offset_t dpcpu, msgbufpv;
vm_paddr_t start_pa, pa, min_pa;
@@ -1036,6 +1093,35 @@
physmem_exclude_region(start_pa, pa - start_pa, EXFLAG_NOALLOC);
+ id_aa64mmfr0_el1 = READ_SPECIALREG(id_aa64mmfr0_el1);
+ switch (ID_AA64MMFR0_PARange_VAL(id_aa64mmfr0_el1)) {
+ case ID_AA64MMFR0_PARange_4G:
+ pa_range_bits = 32;
+ break;
+ case ID_AA64MMFR0_PARange_64G:
+ pa_range_bits = 36;
+ break;
+ case ID_AA64MMFR0_PARange_1T:
+ pa_range_bits = 40;
+ break;
+ case ID_AA64MMFR0_PARange_4T:
+ pa_range_bits = 42;
+ break;
+ case ID_AA64MMFR0_PARange_16T:
+ pa_range_bits = 44;
+ break;
+ case ID_AA64MMFR0_PARange_256T:
+ pa_range_bits = 48;
+ break;
+ default:
+ /*
+ * Unknown PA range bits, will lead to a panic if a stage 2
+ * pmap starting at level 1 is created.
+ */
+ pa_range_bits = 0;
+ break;
+ }
+
cpu_tlb_flushID();
}
@@ -1619,10 +1705,12 @@
*/
if (m->pindex >= (NUL2E + NUL1E)) {
/* l1 page */
- pd_entry_t *l0;
+ if (pmap->pm_stage == PM_STAGE1) {
+ pd_entry_t *l0;
- l0 = pmap_l0(pmap, va);
- pmap_clear(l0);
+ l0 = pmap_l0(pmap, va);
+ pmap_clear(l0);
+ }
} else if (m->pindex >= NUL2E) {
/* l2 page */
pd_entry_t *l1;
@@ -1648,12 +1736,16 @@
pmap_unwire_l3(pmap, va, l2pg, free);
} else if (m->pindex < (NUL2E + NUL1E)) {
/* We just released an l2, unhold the matching l1 */
- pd_entry_t *l0, tl0;
vm_page_t l1pg;
+ pd_entry_t *l0, tl0;
- l0 = pmap_l0(pmap, va);
- tl0 = pmap_load(l0);
- l1pg = PHYS_TO_VM_PAGE(tl0 & ~ATTR_MASK);
+ if (pmap->pm_stage == PM_STAGE1) {
+ l0 = pmap_l0(pmap, va);
+ tl0 = pmap_load(l0);
+ l1pg = PHYS_TO_VM_PAGE(tl0 & ~ATTR_MASK);
+ } else {
+ l1pg = pmap_l1pg(pmap, va);
+ }
pmap_unwire_l3(pmap, va, l1pg, free);
}
pmap_invalidate_page(pmap, va);
@@ -1728,12 +1820,48 @@
{
vm_page_t m;
+ KASSERT((stage == PM_STAGE1 || stage == PM_STAGE2),
+ ("Invalid pmap stage %d", stage));
+ KASSERT(!((stage == PM_STAGE2) && (pa_range_bits == 0)),
+ ("Unknown PARange bits"));
+
/*
* allocate the l0 page
*/
- while ((m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL |
- VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | VM_ALLOC_ZERO)) == NULL)
- vm_wait(NULL);
+ if (stage == PM_STAGE1) {
+ while ((m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL |
+ VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | VM_ALLOC_ZERO)) == NULL)
+ vm_wait(NULL);
+ } else {
+ uint64_t npages;
+ uint64_t alignment;
+
+ if (pa_range_bits <= L0_SHIFT) {
+ /*
+ * The level 1 translation table is not larger than a
+ * PM_STAGE1 level 1 table, use only one page.
+ */
+ npages = 1;
+ alignment = PAGE_SIZE;
+ } else {
+ /*
+ * The level 1 translation table is larger than a
+ * regular PM_STAGE1 level 1 table, for every x bits
+ * that is larger we need 2^x pages and the table must
+ * be aligned at a 2^(x + 12) boundary.
+ *
+ * See Table D5-25 and Example D4-5 from the DDI0487B
+ * ARMv8 Architecture Manual for more information.
+ */
+ npages = 1 << (pa_range_bits - L0_SHIFT);
+ alignment = 1 << (PAGE_SHIFT + pa_range_bits - L0_SHIFT);
+ }
+ while ((m = vm_page_alloc_contig(NULL, 0, VM_ALLOC_NORMAL |
+ VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | VM_ALLOC_ZERO,
+ npages, DMAP_MIN_PHYSADDR, DMAP_MAX_PHYSADDR,
+ alignment, 0, VM_MEMATTR_DEFAULT)) == NULL)
+ vm_wait(NULL);
+ }
pmap->pm_l0_paddr = VM_PAGE_TO_PHYS(m);
pmap->pm_l0 = (pd_entry_t *)PHYS_TO_DMAP(pmap->pm_l0_paddr);
@@ -1742,6 +1870,7 @@
pagezero(pmap->pm_l0);
pmap->pm_root.rt_root = 0;
+ pmap->pm_stage = stage;
bzero(&pmap->pm_stats, sizeof(pmap->pm_stats));
pmap->pm_cookie = COOKIE_FROM(-1, INT_MAX);
@@ -1852,25 +1981,30 @@
pd_entry_t tl0;
l1index = ptepindex - NUL2E;
- l0index = l1index >> L0_ENTRIES_SHIFT;
-
- l0 = &pmap->pm_l0[l0index];
- tl0 = pmap_load(l0);
- if (tl0 == 0) {
- /* recurse for allocating page dir */
- if (_pmap_alloc_l3(pmap, NUL2E + NUL1E + l0index,
- lockp) == NULL) {
- vm_page_unwire_noq(m);
- vm_page_free_zero(m);
- return (NULL);
+ if (pmap->pm_stage == PM_STAGE1) {
+ l0index = l1index >> L0_ENTRIES_SHIFT;
+ l0 = &pmap->pm_l0[l0index];
+ tl0 = pmap_load(l0);
+ if (tl0 == 0) {
+ /* recurse for allocating page dir */
+ if (_pmap_alloc_l3(pmap, NUL2E + NUL1E + l0index,
+ lockp) == NULL) {
+ vm_page_unwire_noq(m);
+ vm_page_free_zero(m);
+ return (NULL);
+ }
+ } else {
+ l1pg = PHYS_TO_VM_PAGE(tl0 & ~ATTR_MASK);
+ l1pg->ref_count++;
}
+
+ l1 = (pd_entry_t *)PHYS_TO_DMAP(pmap_load(l0) & ~ATTR_MASK);
+ l1 = &l1[ptepindex & Ln_ADDR_MASK];
} else {
- l1pg = PHYS_TO_VM_PAGE(tl0 & ~ATTR_MASK);
+ l1pg = pmap_l1pg(pmap, l1index);
l1pg->ref_count++;
+ l1 = &pmap->pm_l0[l1index & STAGE2_L1_ADDR_MASK];
}
-
- l1 = (pd_entry_t *)PHYS_TO_DMAP(pmap_load(l0) & ~ATTR_MASK);
- l1 = &l1[ptepindex & Ln_ADDR_MASK];
pmap_store(l1, VM_PAGE_TO_PHYS(m) | L1_TABLE);
} else {
vm_pindex_t l0index, l1index;
@@ -1878,24 +2012,40 @@
pd_entry_t tl0, tl1;
l1index = ptepindex >> Ln_ENTRIES_SHIFT;
- l0index = l1index >> L0_ENTRIES_SHIFT;
-
- l0 = &pmap->pm_l0[l0index];
- tl0 = pmap_load(l0);
- if (tl0 == 0) {
- /* recurse for allocating page dir */
- if (_pmap_alloc_l3(pmap, NUL2E + l1index,
- lockp) == NULL) {
- vm_page_unwire_noq(m);
- vm_page_free_zero(m);
- return (NULL);
- }
+ if (pmap->pm_stage == PM_STAGE1) {
+ l0index = l1index >> L0_ENTRIES_SHIFT;
+ l0 = &pmap->pm_l0[l0index];
tl0 = pmap_load(l0);
- l1 = (pd_entry_t *)PHYS_TO_DMAP(tl0 & ~ATTR_MASK);
- l1 = &l1[l1index & Ln_ADDR_MASK];
+ if (tl0 == 0) {
+ /* recurse for allocating page dir */
+ if (_pmap_alloc_l3(pmap, NUL2E + l1index,
+ lockp) == NULL) {
+ vm_page_unwire_noq(m);
+ vm_page_free_zero(m);
+ return (NULL);
+ }
+ tl0 = pmap_load(l0);
+ l1 = (pd_entry_t *)PHYS_TO_DMAP(tl0 & ~ATTR_MASK);
+ l1 = &l1[l1index & Ln_ADDR_MASK];
+ } else {
+ l1 = (pd_entry_t *)PHYS_TO_DMAP(tl0 & ~ATTR_MASK);
+ l1 = &l1[l1index & Ln_ADDR_MASK];
+ tl1 = pmap_load(l1);
+ if (tl1 == 0) {
+ /* recurse for allocating page dir */
+ if (_pmap_alloc_l3(pmap, NUL2E + l1index,
+ lockp) == NULL) {
+ vm_page_unwire_noq(m);
+ vm_page_free_zero(m);
+ return (NULL);
+ }
+ } else {
+ l2pg = PHYS_TO_VM_PAGE(tl1 & ~ATTR_MASK);
+ l2pg->ref_count++;
+ }
+ }
} else {
- l1 = (pd_entry_t *)PHYS_TO_DMAP(tl0 & ~ATTR_MASK);
- l1 = &l1[l1index & Ln_ADDR_MASK];
+ l1 = &pmap->pm_l0[l1index & STAGE2_L1_ADDR_MASK];
tl1 = pmap_load(l1);
if (tl1 == 0) {
/* recurse for allocating page dir */
@@ -2085,9 +2235,27 @@
mtx_unlock_spin(&set->asid_set_mutex);
}
- m = PHYS_TO_VM_PAGE(pmap->pm_l0_paddr);
- vm_page_unwire_noq(m);
- vm_page_free_zero(m);
+ if (pmap->pm_stage == PM_STAGE1) {
+ m = PHYS_TO_VM_PAGE(pmap->pm_l0_paddr);
+ vm_page_unwire_noq(m);
+ vm_page_free_zero(m);
+ } else {
+ uint64_t i, page_cnt;
+ vm_paddr_t pa;
+
+ if (pa_range_bits < L0_SHIFT)
+ page_cnt = 1;
+ else
+ page_cnt = 1 << (pa_range_bits - L0_SHIFT);
+
+ pa = DMAP_TO_PHYS((vm_offset_t)pmap->pm_l0);
+ for (i = 0; i < page_cnt; i++) {
+ m = PHYS_TO_VM_PAGE(pa);
+ vm_page_unwire_noq(m);
+ vm_page_free_zero(m);
+ pa += PAGE_SIZE;
+ }
+ }
}
static int
@@ -2456,7 +2624,7 @@
vm_page_t m;
mtx_lock(&pv_chunks_mutex);
- TAILQ_REMOVE(&pv_chunks, pc, pc_lru);
+ TAILQ_REMOVE(&pv_chunks, pc, pc_lru);
mtx_unlock(&pv_chunks_mutex);
PV_STAT(atomic_subtract_int(&pv_entry_spare, _NPCPV));
PV_STAT(atomic_subtract_int(&pc_chunk_count, 1));
@@ -2942,7 +3110,7 @@
* released. Otherwise, a concurrent
* pmap_remove_all() on a physical page
* could return while a stale TLB entry
- * still provides access to that page.
+ * still provides access to that page.
*/
if (va != eva) {
pmap_invalidate_range(pmap, va,
@@ -3003,12 +3171,14 @@
if (pmap->pm_stats.resident_count == 0)
break;
- l0 = pmap_l0(pmap, sva);
- if (pmap_load(l0) == 0) {
- va_next = (sva + L0_SIZE) & ~L0_OFFSET;
- if (va_next < sva)
- va_next = eva;
- continue;
+ if (pmap->pm_stage == PM_STAGE1) {
+ l0 = pmap_l0(pmap, sva);
+ if (pmap_load(l0) == 0) {
+ va_next = (sva + L0_SIZE) & ~L0_OFFSET;
+ if (va_next < sva)
+ va_next = eva;
+ continue;
+ }
}
va_next = (sva + L1_SIZE) & ~L1_OFFSET;
@@ -3862,33 +4032,19 @@
new_l3 |= ATTR_S1_UXN;
if (pmap != kernel_pmap)
new_l3 |= ATTR_S1_nG;
- } else {
- /*
- * Clear the access flag on executable mappings, this will be
- * set later when the page is accessed. The fault handler is
- * required to invalidate the I-cache.
- *
- * TODO: Switch to the valid flag to allow hardware management
- * of the access flag. Much of the pmap code assumes the
- * valid flag is set and fails to destroy the old page tables
- * correctly if it is clear.
- */
- if (prot & VM_PROT_EXECUTE)
- new_l3 &= ~ATTR_AF;
- }
- if ((m->oflags & VPO_UNMANAGED) == 0) {
- new_l3 |= ATTR_SW_MANAGED;
- if ((prot & VM_PROT_WRITE) != 0) {
- new_l3 |= ATTR_SW_DBM;
- if ((flags & VM_PROT_WRITE) == 0) {
- if (pmap->pm_stage == PM_STAGE1)
- new_l3 |= ATTR_S1_AP(ATTR_S1_AP_RO);
- else
- new_l3 &=
- ~ATTR_S2_S2AP(ATTR_S2_S2AP_WRITE);
+ if ((m->oflags & VPO_UNMANAGED) == 0) {
+ new_l3 |= ATTR_SW_MANAGED;
+ if ((prot & VM_PROT_WRITE) != 0) {
+ new_l3 |= ATTR_SW_DBM;
+ if ((flags & VM_PROT_WRITE) == 0)
+ new_l3 |= ATTR_S1_AP(ATTR_S1_AP_RO);
}
}
+ } else {
+ new_l3 = (pd_entry_t)(pa | ATTR_ST2_DEFAULT | L3_PAGE);
}
+ if ((flags & PMAP_ENTER_WIRED) != 0)
+ new_l3 |= ATTR_SW_WIRED;
CTR2(KTR_PMAP, "pmap_enter: %.16lx -> %.16lx", va, pa);
@@ -3942,6 +4098,7 @@
}
/* We need to allocate an L3 table. */
}
+
if (va < VM_MAXUSER_ADDRESS) {
nosleep = (flags & PMAP_ENTER_NOSLEEP) != 0;
@@ -4776,7 +4933,7 @@
pmap_abort_ptp(dst_pmap, addr, dstmpte);
goto out;
}
- /* Have we copied all of the valid mappings? */
+ /* Have we copied all of the valid mappings? */
if (dstmpte->ref_count >= srcmpte->ref_count)
break;
}
@@ -5076,7 +5233,7 @@
switch(lvl) {
case 1:
pte = pmap_l1_to_l2(pde, pv->pv_va);
- tpte = pmap_load(pte);
+ tpte = pmap_load(pte);
KASSERT((tpte & ATTR_DESCR_MASK) ==
L2_BLOCK,
("Attempting to remove an invalid "
Index: sys/arm64/include/armreg.h
===================================================================
--- sys/arm64/include/armreg.h
+++ sys/arm64/include/armreg.h
@@ -209,7 +209,7 @@
#define ISS_DATA_DFSC_TLB_CONFLICT (0x30 << 0)
#define ESR_ELx_IL (0x01 << 25)
#define ESR_ELx_EC_SHIFT 26
-#define ESR_ELx_EC_MASK (0x3f << 26)
+#define ESR_ELx_EC_MASK (0x3f << ESR_ELx_EC_SHIFT)
#define ESR_ELx_EXCEPTION(esr) (((esr) & ESR_ELx_EC_MASK) >> ESR_ELx_EC_SHIFT)
#define EXCP_UNKNOWN 0x00 /* Unkwn exception */
#define EXCP_TRAP_WFI_WFE 0x01 /* Trapped WFI or WFE */
@@ -220,10 +220,10 @@
#define EXCP_HVC 0x16 /* HVC trap */
#define EXCP_MSR 0x18 /* MSR/MRS trap */
#define EXCP_INSN_ABORT_L 0x20 /* Instruction abort, from lower EL */
-#define EXCP_INSN_ABORT 0x21 /* Instruction abort, from same EL */
+#define EXCP_INSN_ABORT 0x21 /* Instruction abort, from same EL */
#define EXCP_PC_ALIGN 0x22 /* PC alignment fault */
#define EXCP_DATA_ABORT_L 0x24 /* Data abort, from lower EL */
-#define EXCP_DATA_ABORT 0x25 /* Data abort, from same EL */
+#define EXCP_DATA_ABORT 0x25 /* Data abort, from same EL */
#define EXCP_SP_ALIGN 0x26 /* SP slignment fault */
#define EXCP_TRAP_FP 0x2c /* Trapped FP exception */
#define EXCP_SERROR 0x2f /* SError interrupt */
Index: sys/arm64/include/bitops.h
===================================================================
--- /dev/null
+++ sys/arm64/include/bitops.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright (C) TODO
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef _ARM_BITOPS_H_
+#define _ARM_BITOPS_H_
+
+#include <sys/bitstring.h>
+
+#define for_each_set_bit(bit, addr, size) \
+ for (bit_ffs((bitstr_t *)(addr), (size), (int *)&(bit)); \
+ (bit) != -1; \
+ bit_ffs_at((bitstr_t *)(addr), (bit) + 1, (size), (int *)&(bit)))
+
+/* same as for_each_set_bit() but use bit as value to start with */
+#define for_each_set_bit_from(bit, addr, size) \
+ for (bit_ffs_at((bitstr_t *)(addr), (bit), (size), (int *)&(bit)); \
+ (bit) != -1; \
+ bit_ffs_at((bitstr_t *)(addr), (bit) + 1, (size), (int *)&(bit)))
+
+#define for_each_clear_bit(bit, addr, size) \
+ for (bit_ffc((bitstr_t *)(addr), (size), (int *)&(bit)); \
+ (bit) != -1; \
+ bit_ffc_at((bitstr_t *)(addr), (bit) + 1, (size), (int *)&(bit)))
+
+/* same as for_each_clear_bit() but use bit as value to start with */
+#define for_each_clear_bit_from(bit, addr, size) \
+ for (bit_ffc_at((bitstr_t *)(addr), (bit), (size), (int *)&(bit)); \
+ (bit) != -1; \
+ bit_ffc_at((bitstr_t *)(addr), (bit) + 1, (size), (int *)&(bit)))
+
+#endif /* _ARM_BITOPS_H_ */
Index: sys/arm64/include/cpu.h
===================================================================
--- sys/arm64/include/cpu.h
+++ sys/arm64/include/cpu.h
@@ -115,6 +115,7 @@
#define CPU_IMPL_TO_MIDR(val) (((val) & 0xff) << 24)
#define CPU_PART_TO_MIDR(val) (((val) & 0xfff) << 4)
+#define CPU_ARCH_TO_MIDR(val) (((val) & 0xf) << 16)
#define CPU_VAR_TO_MIDR(val) (((val) & 0xf) << 20)
#define CPU_REV_TO_MIDR(val) (((val) & 0xf) << 0)
Index: sys/arm64/include/hypervisor.h
===================================================================
--- sys/arm64/include/hypervisor.h
+++ sys/arm64/include/hypervisor.h
@@ -182,4 +182,35 @@
#define VTTBR_VMID_SHIFT 48
#define VTTBR_HOST 0x0000000000000000
+/* VTCR_EL2 - Virtualization Translation Control Register */
+#define VTCR_EL2_RES1 (0x1 << 31)
+#define VTCR_EL2_T0SZ_MASK 0x3f
+#define VTCR_EL2_SL0_SHIFT 6
+#define VTCR_EL2_SL0_4K_LVL2 (0x0 << VTCR_EL2_SL0_SHIFT)
+#define VTCR_EL2_SL0_4K_LVL1 (0x1 << VTCR_EL2_SL0_SHIFT)
+#define VTCR_EL2_SL0_4K_LVL0 (0x2 << VTCR_EL2_SL0_SHIFT)
+#define VTCR_EL2_IRGN0_SHIFT 8
+#define VTCR_EL2_IRGN0_WBWA (0x1 << VTCR_EL2_IRGN0_SHIFT)
+#define VTCR_EL2_ORGN0_SHIFT 10
+#define VTCR_EL2_ORGN0_WBWA (0x1 << VTCR_EL2_ORGN0_SHIFT)
+#define VTCR_EL2_SH0_SHIFT 12
+#define VTCR_EL2_SH0_NS (0x0 << VTCR_EL2_SH0_SHIFT)
+#define VTCR_EL2_SH0_OS (0x2 << VTCR_EL2_SH0_SHIFT)
+#define VTCR_EL2_SH0_IS (0x3 << VTCR_EL2_SH0_SHIFT)
+#define VTCR_EL2_TG0_SHIFT 14
+#define VTCR_EL2_TG0_4K (0x0 << VTCR_EL2_TG0_SHIFT)
+#define VTCR_EL2_TG0_64K (0x1 << VTCR_EL2_TG0_SHIFT)
+#define VTCR_EL2_TG0_16K (0x2 << VTCR_EL2_TG0_SHIFT)
+#define VTCR_EL2_PS_SHIFT 16
+#define VTCR_EL2_PS_32BIT (0x0 << VTCR_EL2_PS_SHIFT)
+#define VTCR_EL2_PS_36BIT (0x1 << VTCR_EL2_PS_SHIFT)
+#define VTCR_EL2_PS_40BIT (0x2 << VTCR_EL2_PS_SHIFT)
+#define VTCR_EL2_PS_42BIT (0x3 << VTCR_EL2_PS_SHIFT)
+#define VTCR_EL2_PS_44BIT (0x4 << VTCR_EL2_PS_SHIFT)
+#define VTCR_EL2_PS_48BIT (0x5 << VTCR_EL2_PS_SHIFT)
+
+/* HPFAR_EL2 - Hypervisor IPA Fault Address Register */
+#define HPFAR_EL2_FIPA_SHIFT 4
+#define HPFAR_EL2_FIPA_MASK 0xfffffffff0
+
#endif /* !_MACHINE_HYPERVISOR_H_ */
Index: sys/arm64/include/pcpu.h
===================================================================
--- sys/arm64/include/pcpu.h
+++ sys/arm64/include/pcpu.h
@@ -43,6 +43,7 @@
u_int pc_acpi_id; /* ACPI CPU id */ \
u_int pc_midr; /* stored MIDR value */ \
uint64_t pc_clock; \
+ void *pc_vcpu; \
pcpu_bp_harden pc_bp_harden; \
pcpu_ssbd pc_ssbd; \
struct pmap *pc_curpmap; \
Index: sys/arm64/include/pmap.h
===================================================================
--- sys/arm64/include/pmap.h
+++ sys/arm64/include/pmap.h
@@ -188,6 +188,7 @@
pd_entry_t **, pt_entry_t **);
int pmap_fault(pmap_t, uint64_t, uint64_t);
+int pmap_pinit_type(pmap_t, enum pmap_stage);
/* System MMU (SMMU). */
int pmap_senter(pmap_t pmap, vm_offset_t va, vm_paddr_t pa, vm_prot_t prot,
Index: sys/arm64/include/pte.h
===================================================================
--- sys/arm64/include/pte.h
+++ sys/arm64/include/pte.h
@@ -99,6 +99,35 @@
#define ATTR_DESCR_TYPE_TABLE 2
#define ATTR_DESCR_TYPE_PAGE 2
#define ATTR_DESCR_TYPE_BLOCK 0
+/* Stage 2 translation Block and Page attributes */
+#define ATTR_ST2_AF ATTR_AF
+#define ATTR_ST2_SH(x) ATTR_SH(x)
+#define ATTR_ST2_SH_MASK ATTR_SH_MASK
+#define ATTR_ST2_SH_NS ATTR_SH_NS /* Non-shareable */
+#define ATTR_ST2_SH_OS ATTR_SH_OS /* Outer-shareable */
+#define ATTR_ST2_SH_IS ATTR_SH_IS /* Inner-shareable */
+#define ATTR_ST2_S2AP(x) ((x) << 6) /* Data access permissions */
+#define ATTR_ST2_S2AP_NONE (0 << 1)
+#define ATTR_ST2_S2AP_R0 (1 << 0)
+#define ATTR_ST2_S2AP_W0 (1 << 1)
+#define ATTR_ST2_S2AP_RW (3 << 0)
+#define ATTR_ST2_MEMATTR(x) ((x) << 2) /* Memory attributes */
+#define ATTR_ST2_MEM_DEV (0 << 2) /* Device memory */
+#define ATTR_ST2_MEM_DEV_nGnRnE (0 << 0)
+#define ATTR_ST2_MEM_DEV_nGnRE (1 << 0)
+#define ATTR_ST2_MEM_DEV_nGRE (1 << 1)
+#define ATTR_ST2_MEM_DEV_GRE (3 << 0)
+#define ATTR_ST2_MEM_ONC (1 << 2) /* Outer Non-cacheable */
+#define ATTR_ST2_MEM_OWT (1 << 2) /* Outer Write-Through Cacheable */
+#define ATTR_ST2_MEM_OWB (3 << 2) /* Outer Write-Back Cacheable */
+#define ATTR_ST2_MEM_INC (1 << 0) /* Inner Non-cacheable */
+#define ATTR_ST2_MEM_IWT (1 << 1) /* Inner Write-Through Cacheable */
+#define ATTR_ST2_MEM_IWB (3 << 0) /* Inner Write-Back Cacheable */
+
+#define ATTR_ST2_DEFAULT (ATTR_ST2_AF | ATTR_ST2_SH(ATTR_ST2_SH_IS) | \
+ ATTR_ST2_S2AP(ATTR_ST2_S2AP_RW) | \
+ ATTR_ST2_MEMATTR(ATTR_ST2_MEM_OWB | ATTR_ST2_MEM_IWB))
+
/* Level 0 table, 512GiB per entry */
#define L0_SHIFT 39
Index: sys/arm64/include/vmm.h
===================================================================
--- /dev/null
+++ sys/arm64/include/vmm.h
@@ -0,0 +1,428 @@
+/*
+ * Copyright (C) 2015 Mihai Carabas <mihai.carabas@gmail.com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef _VMM_H_
+#define _VMM_H_
+
+#include <sys/param.h>
+#include <vm/vm.h>
+#include <vm/pmap.h>
+
+#include "pte.h"
+#include "pmap.h"
+
+enum vm_suspend_how {
+ VM_SUSPEND_NONE,
+ VM_SUSPEND_RESET,
+ VM_SUSPEND_POWEROFF,
+ VM_SUSPEND_HALT,
+ VM_SUSPEND_TRIPLEFAULT,
+ VM_SUSPEND_LAST
+};
+
+/*
+ * Identifiers for architecturally defined registers.
+ */
+enum vm_reg_name {
+ VM_REG_GUEST_X0,
+ VM_REG_GUEST_X1,
+ VM_REG_GUEST_X2,
+ VM_REG_GUEST_X3,
+ VM_REG_GUEST_X4,
+ VM_REG_GUEST_X5,
+ VM_REG_GUEST_X6,
+ VM_REG_GUEST_X7,
+ VM_REG_GUEST_X8,
+ VM_REG_GUEST_X9,
+ VM_REG_GUEST_X10,
+ VM_REG_GUEST_X11,
+ VM_REG_GUEST_X12,
+ VM_REG_GUEST_X13,
+ VM_REG_GUEST_X14,
+ VM_REG_GUEST_X15,
+ VM_REG_GUEST_X16,
+ VM_REG_GUEST_X17,
+ VM_REG_GUEST_X18,
+ VM_REG_GUEST_X19,
+ VM_REG_GUEST_X20,
+ VM_REG_GUEST_X21,
+ VM_REG_GUEST_X22,
+ VM_REG_GUEST_X23,
+ VM_REG_GUEST_X24,
+ VM_REG_GUEST_X25,
+ VM_REG_GUEST_X26,
+ VM_REG_GUEST_X27,
+ VM_REG_GUEST_X28,
+ VM_REG_GUEST_X29,
+ VM_REG_GUEST_LR,
+ VM_REG_GUEST_SP,
+ VM_REG_GUEST_ELR,
+ VM_REG_GUEST_SPSR,
+ VM_REG_ELR_EL2,
+ VM_REG_LAST
+};
+
+#define VM_INTINFO_VECTOR(info) ((info) & 0xff)
+#define VM_INTINFO_DEL_ERRCODE 0x800
+#define VM_INTINFO_RSVD 0x7ffff000
+#define VM_INTINFO_VALID 0x80000000
+#define VM_INTINFO_TYPE 0x700
+#define VM_INTINFO_HWINTR (0 << 8)
+#define VM_INTINFO_NMI (2 << 8)
+#define VM_INTINFO_HWEXCEPTION (3 << 8)
+#define VM_INTINFO_SWINTR (4 << 8)
+
+#define VM_GUEST_BASE_IPA 0x80000000UL /* Guest kernel start ipa */
+
+#ifdef _KERNEL
+
+#define VM_MAX_NAMELEN 32
+
+struct vm;
+struct vm_exception;
+struct vm_memory_segment;
+struct vm_exit;
+struct vm_run;
+struct vm_object;
+struct pmap;
+struct hypctx;
+
+typedef int (*vmm_init_func_t)(int ipinum);
+typedef int (*vmm_cleanup_func_t)(void);
+typedef void (*vmm_resume_func_t)(void);
+typedef void * (*vmi_init_func_t)(struct vm *vm);
+typedef int (*vmi_run_func_t)(void *vmi, int vcpu, register_t rip,
+ struct pmap *pmap, void *rendezvous_cookie,
+ void *suspend_cookie);
+typedef void (*vmi_cleanup_func_t)(void *vmi);
+typedef void (*vmi_mmap_set_func_t)(void *arg, vm_offset_t va,
+ vm_offset_t pa, size_t len,
+ vm_prot_t prot);
+typedef vm_paddr_t (*vmi_mmap_get_func_t)(void *arg, vm_offset_t va);
+typedef int (*vmi_get_register_t)(void *vmi, int vcpu, int num,
+ uint64_t *retval);
+typedef int (*vmi_set_register_t)(void *vmi, int vcpu, int num,
+ uint64_t val);
+typedef int (*vmi_get_cap_t)(void *vmi, int vcpu, int num, int *retval);
+typedef int (*vmi_set_cap_t)(void *vmi, int vcpu, int num, int val);
+typedef struct vmspace * (*vmi_vmspace_alloc)(vm_offset_t min, vm_offset_t max);
+typedef void (*vmi_vmspace_free)(struct vmspace *vmspace);
+typedef struct vlapic * (*vmi_vlapic_init)(void *vmi, int vcpu);
+typedef void (*vmi_vlapic_cleanup)(void *vmi, struct vlapic *vlapic);
+
+struct vmm_ops {
+ /* Module-wide functions */
+ vmm_init_func_t init;
+ vmm_cleanup_func_t cleanup;
+ vmm_resume_func_t resume;
+ /* VM specific functions */
+ vmi_init_func_t vminit;
+ vmi_run_func_t vmrun;
+ vmi_cleanup_func_t vmcleanup;
+ vmi_mmap_set_func_t vmmapset;
+ vmi_mmap_get_func_t vmmapget;
+ vmi_get_register_t vmgetreg;
+ vmi_set_register_t vmsetreg;
+ vmi_get_cap_t vmgetcap;
+ vmi_set_cap_t vmsetcap;
+};
+
+extern struct vmm_ops vmm_ops_arm;
+
+int vm_create(const char *name, struct vm **retvm);
+void vm_destroy(struct vm *vm);
+const char *vm_name(struct vm *vm);
+int vm_malloc(struct vm *vm, uint64_t gpa, size_t len);
+uint64_t vm_gpa2hpa(struct vm *vm, uint64_t gpa, size_t size);
+int vm_gpabase2memseg(struct vm *vm, uint64_t gpabase,
+ struct vm_memory_segment *seg);
+boolean_t vm_mem_allocated(struct vm *vm, uint64_t gpa);
+int vm_get_register(struct vm *vm, int vcpu, int reg, uint64_t *retval);
+int vm_set_register(struct vm *vm, int vcpu, int reg, uint64_t val);
+int vm_run(struct vm *vm, struct vm_run *vmrun);
+void* vm_get_cookie(struct vm *vm);
+uint16_t vm_get_maxcpus(struct vm *vm);
+int vm_get_capability(struct vm *vm, int vcpu, int type, int *val);
+int vm_set_capability(struct vm *vm, int vcpu, int type, int val);
+int vm_activate_cpu(struct vm *vm, int vcpu);
+int vm_attach_vgic(struct vm *vm, uint64_t dist_start, size_t dist_size,
+ uint64_t redist_start, size_t redist_size);
+int vm_assert_irq(struct vm *vm, uint32_t irq);
+int vm_deassert_irq(struct vm *vm, uint32_t irq);
+struct vm_exit *vm_exitinfo(struct vm *vm, int vcpuid);
+void vm_exit_suspended(struct vm *vm, int vcpuid, uint64_t rip);
+void vm_exit_rendezvous(struct vm *vm, int vcpuid, uint64_t rip);
+void vm_exit_astpending(struct vm *vm, int vcpuid, uint64_t rip);
+
+#ifdef _SYS__CPUSET_H_
+/*
+ * Rendezvous all vcpus specified in 'dest' and execute 'func(arg)'.
+ * The rendezvous 'func(arg)' is not allowed to do anything that will
+ * cause the thread to be put to sleep.
+ *
+ * If the rendezvous is being initiated from a vcpu context then the
+ * 'vcpuid' must refer to that vcpu, otherwise it should be set to -1.
+ *
+ * The caller cannot hold any locks when initiating the rendezvous.
+ *
+ * The implementation of this API may cause vcpus other than those specified
+ * by 'dest' to be stalled. The caller should not rely on any vcpus making
+ * forward progress when the rendezvous is in progress.
+ */
+typedef void (*vm_rendezvous_func_t)(struct vm *vm, int vcpuid, void *arg);
+void vm_smp_rendezvous(struct vm *vm, int vcpuid, cpuset_t dest,
+ vm_rendezvous_func_t func, void *arg);
+cpuset_t vm_active_cpus(struct vm *vm);
+cpuset_t vm_suspended_cpus(struct vm *vm);
+#endif /* _SYS__CPUSET_H_ */
+
+extern uint64_t hypmode_enabled;
+static __inline bool
+virt_enabled()
+{
+ return (hypmode_enabled != 0);
+}
+
+static __inline int
+vcpu_rendezvous_pending(void *rendezvous_cookie)
+{
+
+ return (*(uintptr_t *)rendezvous_cookie != 0);
+}
+
+static __inline int
+vcpu_suspended(void *suspend_cookie)
+{
+
+ return (*(int *)suspend_cookie);
+}
+
+enum vcpu_state {
+ VCPU_IDLE,
+ VCPU_FROZEN,
+ VCPU_RUNNING,
+ VCPU_SLEEPING,
+};
+
+int vcpu_set_state(struct vm *vm, int vcpu, enum vcpu_state state,
+ bool from_idle);
+enum vcpu_state vcpu_get_state(struct vm *vm, int vcpu, int *hostcpu);
+
+static int __inline
+vcpu_is_running(struct vm *vm, int vcpu, int *hostcpu)
+{
+ return (vcpu_get_state(vm, vcpu, hostcpu) == VCPU_RUNNING);
+}
+
+#ifdef _SYS_PROC_H_
+static int __inline
+vcpu_should_yield(struct vm *vm, int vcpu)
+{
+
+ if (curthread->td_flags & (TDF_ASTPENDING | TDF_NEEDRESCHED))
+ return (1);
+ else if (curthread->td_owepreempt)
+ return (1);
+ else
+ return (0);
+}
+#endif
+
+void *vcpu_stats(struct vm *vm, int vcpu);
+void vcpu_notify_event(struct vm *vm, int vcpuid, bool lapic_intr);
+
+/*
+ * This function is called after a VM-exit that occurred during exception or
+ * interrupt delivery through the IDT. The format of 'intinfo' is described
+ * in Figure 15-1, "EXITINTINFO for All Intercepts", APM, Vol 2.
+ *
+ * If a VM-exit handler completes the event delivery successfully then it
+ * should call vm_exit_intinfo() to extinguish the pending event. For e.g.,
+ * if the task switch emulation is triggered via a task gate then it should
+ * call this function with 'intinfo=0' to indicate that the external event
+ * is not pending anymore.
+ *
+ * Return value is 0 on success and non-zero on failure.
+ */
+int vm_exit_intinfo(struct vm *vm, int vcpuid, uint64_t intinfo);
+
+/*
+ * This function is called before every VM-entry to retrieve a pending
+ * event that should be injected into the guest. This function combines
+ * nested events into a double or triple fault.
+ *
+ * Returns 0 if there are no events that need to be injected into the guest
+ * and non-zero otherwise.
+ */
+int vm_entry_intinfo(struct vm *vm, int vcpuid, uint64_t *info);
+
+int vm_get_intinfo(struct vm *vm, int vcpuid, uint64_t *info1, uint64_t *info2);
+
+enum vm_reg_name vm_segment_name(int seg_encoding);
+
+struct vm_copyinfo {
+ uint64_t gpa;
+ size_t len;
+ void *hva;
+ void *cookie;
+};
+
+int vcpu_trace_exceptions(struct vm *vm, int vcpuid);
+#endif /* _KERNEL */
+
+#define VM_MAXCPU 1
+
+#define VM_DIR_READ 0
+#define VM_DIR_WRITE 1
+
+struct vie {
+ uint8_t access_size:4, sign_extend:1, dir:1, unused:2;
+ enum vm_reg_name reg;
+};
+
+struct vre {
+ uint32_t inst_syndrome;
+ uint8_t dir:1, unused:7;
+ enum vm_reg_name reg;
+};
+
+/*
+ * Identifiers for optional vmm capabilities
+ */
+enum vm_cap_type {
+ VM_CAP_HALT_EXIT,
+ VM_CAP_MTRAP_EXIT,
+ VM_CAP_PAUSE_EXIT,
+ VM_CAP_UNRESTRICTED_GUEST,
+ VM_CAP_MAX
+};
+enum vm_exitcode {
+ VM_EXITCODE_BOGUS,
+ VM_EXITCODE_INST_EMUL,
+ VM_EXITCODE_REG_EMUL,
+ VM_EXITCODE_HVC,
+ VM_EXITCODE_SUSPENDED,
+ VM_EXITCODE_HYP,
+ VM_EXITCODE_WFI,
+ VM_EXITCODE_MAX
+};
+
+enum task_switch_reason {
+ TSR_CALL,
+ TSR_IRET,
+ TSR_JMP,
+ TSR_IDT_GATE, /* task gate in IDT */
+};
+
+struct vm_task_switch {
+ uint16_t tsssel; /* new TSS selector */
+ int ext; /* task switch due to external event */
+ uint32_t errcode;
+ int errcode_valid; /* push 'errcode' on the new stack */
+ enum task_switch_reason reason;
+};
+
+struct vm_exit {
+ enum vm_exitcode exitcode;
+ int inst_length;
+ uint64_t pc;
+ union {
+ /*
+ * ARM specific payload.
+ */
+ struct {
+ uint32_t exception_nr;
+ uint32_t esr_el2; /* Exception Syndrome Register */
+ uint64_t far_el2; /* Fault Address Register */
+ uint64_t hpfar_el2; /* Hypervisor IPA Fault Address Register */
+ } hyp;
+ struct {
+ struct vre vre;
+ } reg_emul;
+ struct {
+ uint64_t gpa;
+ int fault_type;
+ } paging;
+ struct {
+ uint64_t gpa;
+ struct vie vie;
+ } inst_emul;
+
+ struct {
+ struct hypctx *hypctx;
+ } wfi;
+ /*
+ * VMX specific payload. Used when there is no "better"
+ * exitcode to represent the VM-exit.
+ */
+ struct {
+ int status; /* vmx inst status */
+ /*
+ * 'exit_reason' and 'exit_qualification' are valid
+ * only if 'status' is zero.
+ */
+ uint32_t exit_reason;
+ uint64_t exit_qualification;
+ /*
+ * 'inst_error' and 'inst_type' are valid
+ * only if 'status' is non-zero.
+ */
+ int inst_type;
+ int inst_error;
+ } vmx;
+ /*
+ * SVM specific payload.
+ */
+ struct {
+ uint64_t exitcode;
+ uint64_t exitinfo1;
+ uint64_t exitinfo2;
+ } svm;
+ struct {
+#ifdef __aarch64__
+#else
+ uint32_t code; /* ecx value */
+ uint64_t wval;
+#endif
+ } msr;
+ struct {
+ int vcpu;
+ uint64_t rip;
+ } spinup_ap;
+ struct {
+ uint64_t rflags;
+ } hlt;
+ struct {
+ int vector;
+ } ioapic_eoi;
+ struct {
+ enum vm_suspend_how how;
+ } suspended;
+ struct vm_task_switch task_switch;
+ } u;
+};
+
+#endif /* _VMM_H_ */
Index: sys/arm64/include/vmm_dev.h
===================================================================
--- /dev/null
+++ sys/arm64/include/vmm_dev.h
@@ -0,0 +1,181 @@
+/*
+ * Copyright (C) 2015 Mihai Carabas <mihai.carabas@gmail.com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef _VMM_DEV_H_
+#define _VMM_DEV_H_
+
+#ifdef _KERNEL
+void vmmdev_init(void);
+int vmmdev_cleanup(void);
+#endif
+
+struct vm_memory_segment {
+ uint64_t gpa; /* in */
+ size_t len;
+ int wired;
+};
+
+struct vm_register {
+ int cpuid;
+ int regnum; /* enum vm_reg_name */
+ uint64_t regval;
+};
+
+struct vm_run {
+ int cpuid;
+ uint64_t pc;
+ struct vm_exit vm_exit;
+
+};
+
+struct vm_exception {
+ int cpuid;
+ int vector;
+ uint32_t error_code;
+ int error_code_valid;
+ int restart_instruction;
+};
+
+struct vm_capability {
+ int cpuid;
+ enum vm_cap_type captype;
+ int capval;
+ int allcpus;
+};
+
+#define MAX_VM_STATS 64
+struct vm_stats {
+ int cpuid; /* in */
+ int num_entries; /* out */
+ struct timeval tv;
+ uint64_t statbuf[MAX_VM_STATS];
+};
+struct vm_stat_desc {
+ int index; /* in */
+ char desc[128]; /* out */
+};
+
+
+struct vm_suspend {
+ enum vm_suspend_how how;
+};
+
+struct vm_gla2gpa {
+ int vcpuid; /* inputs */
+ int prot; /* PROT_READ or PROT_WRITE */
+ uint64_t gla;
+ int fault; /* outputs */
+ uint64_t gpa;
+};
+
+struct vm_activate_cpu {
+ int vcpuid;
+};
+
+struct vm_attach_vgic {
+ uint64_t dist_start;
+ size_t dist_size;
+ uint64_t redist_start;
+ size_t redist_size;
+};
+
+struct vm_irq {
+ uint32_t irq;
+};
+
+#define VM_ACTIVE_CPUS 0
+#define VM_SUSPENDED_CPUS 1
+
+enum {
+ /* general routines */
+ IOCNUM_ABIVERS = 0,
+ IOCNUM_RUN = 1,
+ IOCNUM_SET_CAPABILITY = 2,
+ IOCNUM_GET_CAPABILITY = 3,
+ IOCNUM_SUSPEND = 4,
+ IOCNUM_REINIT = 5,
+
+ /* memory apis */
+ IOCNUM_MAP_MEMORY = 10,
+ IOCNUM_GET_MEMORY_SEG = 11,
+ IOCNUM_GET_GPA_PMAP = 12,
+ IOCNUM_GLA2GPA = 13,
+
+ /* register/state accessors */
+ IOCNUM_SET_REGISTER = 20,
+ IOCNUM_GET_REGISTER = 21,
+
+ /* statistics */
+ IOCNUM_VM_STATS = 50,
+ IOCNUM_VM_STAT_DESC = 51,
+
+ /* interrupt injection */
+ IOCNUM_ASSERT_IRQ = 80,
+ IOCNUM_DEASSERT_IRQ = 81,
+
+ /* vm_cpuset */
+ IOCNUM_ACTIVATE_CPU = 90,
+ IOCNUM_GET_CPUSET = 91,
+
+ /* vm_attach_vgic */
+ IOCNUM_ATTACH_VGIC = 110,
+};
+
+#define VM_RUN \
+ _IOWR('v', IOCNUM_RUN, struct vm_run)
+#define VM_SUSPEND \
+ _IOW('v', IOCNUM_SUSPEND, struct vm_suspend)
+#define VM_REINIT \
+ _IO('v', IOCNUM_REINIT)
+#define VM_MAP_MEMORY \
+ _IOWR('v', IOCNUM_MAP_MEMORY, struct vm_memory_segment)
+#define VM_GET_MEMORY_SEG \
+ _IOWR('v', IOCNUM_GET_MEMORY_SEG, struct vm_memory_segment)
+#define VM_SET_REGISTER \
+ _IOW('v', IOCNUM_SET_REGISTER, struct vm_register)
+#define VM_GET_REGISTER \
+ _IOWR('v', IOCNUM_GET_REGISTER, struct vm_register)
+#define VM_SET_CAPABILITY \
+ _IOW('v', IOCNUM_SET_CAPABILITY, struct vm_capability)
+#define VM_GET_CAPABILITY \
+ _IOWR('v', IOCNUM_GET_CAPABILITY, struct vm_capability)
+#define VM_STATS \
+ _IOWR('v', IOCNUM_VM_STATS, struct vm_stats)
+#define VM_STAT_DESC \
+ _IOWR('v', IOCNUM_VM_STAT_DESC, struct vm_stat_desc)
+#define VM_ASSERT_IRQ \
+ _IOW('v', IOCNUM_ASSERT_IRQ, struct vm_irq)
+#define VM_DEASSERT_IRQ \
+ _IOW('v', IOCNUM_DEASSERT_IRQ, struct vm_irq)
+#define VM_GLA2GPA \
+ _IOWR('v', IOCNUM_GLA2GPA, struct vm_gla2gpa)
+#define VM_ACTIVATE_CPU \
+ _IOW('v', IOCNUM_ACTIVATE_CPU, struct vm_activate_cpu)
+#define VM_GET_CPUS \
+ _IOW('v', IOCNUM_GET_CPUSET, struct vm_cpuset)
+#define VM_ATTACH_VGIC \
+ _IOW('v', IOCNUM_ATTACH_VGIC, struct vm_attach_vgic)
+#endif
Index: sys/arm64/include/vmm_instruction_emul.h
===================================================================
--- /dev/null
+++ sys/arm64/include/vmm_instruction_emul.h
@@ -0,0 +1,72 @@
+/*
+ * Copyright (C) 2015 Mihai Carabas <mihai.carabas@gmail.com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef _VMM_INSTRUCTION_EMUL_H_
+#define _VMM_INSTRUCTION_EMUL_H_
+
+/*
+ * Callback functions to read and write memory regions.
+ */
+typedef int (*mem_region_read_t)(void *vm, int cpuid, uint64_t gpa,
+ uint64_t *rval, int rsize, void *arg);
+typedef int (*mem_region_write_t)(void *vm, int cpuid, uint64_t gpa,
+ uint64_t wval, int wsize, void *arg);
+
+/*
+ * Callback functions to read and write registers.
+ */
+typedef int (*reg_read_t)(void *vm, int cpuid, uint64_t *rval, void *arg);
+typedef int (*reg_write_t)(void *vm, int cpuid, uint64_t wval, void *arg);
+
+/*
+ * Emulate the decoded 'vie' instruction when it contains a memory operation.
+ *
+ * The callbacks 'mrr' and 'mrw' emulate reads and writes to the memory region
+ * containing 'gpa'. 'mrarg' is an opaque argument that is passed into the
+ * callback functions.
+ *
+ * 'void *vm' should be 'struct vm *' when called from kernel context and
+ * 'struct vmctx *' when called from user context.
+ *
+ */
+int vmm_emulate_instruction(void *vm, int cpuid, uint64_t gpa, struct vie *vie,
+ mem_region_read_t mrr, mem_region_write_t mrw, void *mrarg);
+
+/*
+ * Emulate the decoded 'vre' instruction when it contains a register access.
+ *
+ * The callbacks 'regread' and 'regwrite' emulate reads and writes to the
+ * register from 'vie'. 'regarg' is an opaque argument that is passed into the
+ * callback functions.
+ *
+ * 'void *vm' should be 'struct vm *' when called from kernel context and
+ * 'struct vmctx *' when called from user context.
+ *
+ */
+int vmm_emulate_register(void *vm, int vcpuid, struct vre *vre, reg_read_t regread,
+ reg_write_t regwrite, void *regarg);
+
+#endif /* _VMM_INSTRUCTION_EMUL_H_ */
Index: sys/arm64/vmm/arm64.h
===================================================================
--- /dev/null
+++ sys/arm64/vmm/arm64.h
@@ -0,0 +1,135 @@
+/*
+ * Copyright (C) 2015 Mihai Carabas <mihai.carabas@gmail.com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+#ifndef _VMM_ARM64_H_
+#define _VMM_ARM64_H_
+
+#include <machine/reg.h>
+#include <machine/vfp.h>
+#include <machine/hypervisor.h>
+#include <machine/pcpu.h>
+
+#include "mmu.h"
+#include "io/vgic_v3.h"
+#include "io/vtimer.h"
+
+struct hypctx {
+ struct reg regs;
+
+ /* EL1 control registers */
+ uint64_t actlr_el1; /* Auxiliary Control Register */
+ uint64_t afsr0_el1; /* Auxiliary Fault Status Register 0 */
+ uint64_t afsr1_el1; /* Auxiliary Fault Status Register 1 */
+ uint64_t amair_el1; /* Auxiliary Memory Attribute Indirection Register */
+ uint64_t contextidr_el1; /* Current Process Identifier */
+ uint64_t cpacr_el1; /* Arhitectural Feature Access Control Register */
+ uint64_t elr_el1; /* Exception Link Register */
+ uint64_t esr_el1; /* Exception Syndrome Register */
+ uint64_t far_el1; /* Fault Address Register */
+ uint64_t fp; /* Frame Pointer */
+ uint64_t mair_el1; /* Memory Attribute Indirection Register */
+ uint64_t par_el1; /* Physical Address Register */
+ uint64_t sctlr_el1; /* System Control Register */
+ uint64_t sp_el0; /* Stack Pointer */
+ uint64_t tcr_el1; /* Translation Control Register */
+ uint64_t tpidr_el0; /* EL0 Software ID Register */
+ uint64_t tpidrro_el0; /* Read-only Thread ID Register */
+ uint64_t tpidr_el1; /* EL1 Software ID Register */
+ uint64_t ttbr0_el1; /* Translation Table Base Register 0 */
+ uint64_t ttbr1_el1; /* Translation Table Base Register 1 */
+ uint64_t vbar_el1; /* Vector Base Address Register */
+ uint32_t spsr_el1; /* Saved Program Status Register */
+
+ /* EL2 control registers */
+ uint64_t cptr_el2; /* Architectural Feature Trap Register */
+ uint64_t elr_el2; /* Exception Link Register */
+ uint64_t hcr_el2; /* Hypervisor Configuration Register */
+ uint64_t vpidr_el2; /* Virtualization Processor ID Register */
+ uint64_t vmpidr_el2; /* Virtualization Multiprocessor ID Register */
+ uint32_t spsr_el2; /* Saved Program Status Register */
+
+ uint32_t vcpu;
+ struct hyp *hyp;
+ struct {
+ uint64_t esr_el2; /* Exception Syndrome Register */
+ uint64_t far_el2; /* Fault Address Register */
+ uint64_t hpfar_el2; /* Hypervisor IPA Fault Address Register */
+ } exit_info;
+
+ struct vtimer_cpu vtimer_cpu;
+ struct vgic_v3_cpu_if vgic_cpu_if;
+ struct vgic_v3_redist vgic_redist;
+#ifdef VFP
+ struct vfpstate vfpstate;
+#endif
+};
+
+struct hyp {
+ pmap_t stage2_map;
+ struct hypctx ctx[VM_MAXCPU];
+ struct vgic_mmio_region *vgic_mmio_regions;
+ size_t vgic_mmio_regions_num;
+ struct vgic_v3_dist vgic_dist;
+ struct vm *vm;
+ struct vtimer vtimer;
+ uint64_t vmid_generation;
+ uint64_t vttbr_el2;
+ bool vgic_attached;
+};
+
+uint64_t vmm_call_hyp(void *hyp_func_addr, ...);
+void vmm_cleanup(void *hyp_stub_vectors);
+uint64_t vmm_enter_guest(struct hypctx *hypctx);
+uint64_t vmm_read_ich_vtr_el2(void);
+uint64_t vmm_read_cnthctl_el2(void);
+uint64_t vmm_read_tcr_el2(void);
+
+#define eprintf(fmt, ...) printf("%s:%d " fmt, __func__, __LINE__, ##__VA_ARGS__)
+//#define eprintf(fmt, ...) do {} while(0)
+
+#define VMID_GENERATION_MASK ((1UL<<8) - 1)
+#define build_vttbr(vmid, ptaddr) \
+ ((((vmid) & VMID_GENERATION_MASK) << VTTBR_VMID_SHIFT) | \
+ (uint64_t)(ptaddr))
+
+#define MPIDR_SMP_MASK (0x3 << 30)
+#define MPIDR_AFF1_LEVEL(x) (((x) >> 2) << 8)
+#define MPIDR_AFF0_LEVEL(x) (((x) & 0x3) << 0)
+
+/*
+ * Return true if the exception was caused by a translation fault in the stage 2
+ * translation regime. The DFSC encoding for a translation fault has the format
+ * 0b0001LL, where LL (bits [1:0]) represents the level where the fault occured
+ * (page D7-2280 of the ARMv8 Architecture Manual).
+ */
+#define ISS_DATA_DFSC_TF(esr_iss) \
+ (!((esr_iss) & 0b111000) && ((esr_iss) & 0b000100))
+#define FAR_EL2_PAGE_OFFSET(x) ((x) & PAGE_MASK)
+
+#define DEBUG_ME 0
+
+#define arm64_get_active_vcpu() ((struct hypctx *)PCPU_GET(vcpu))
+
+#endif /* !_VMM_ARM64_H_ */
Index: sys/arm64/vmm/arm64.c
===================================================================
--- /dev/null
+++ sys/arm64/vmm/arm64.c
@@ -0,0 +1,804 @@
+/*
+ * Copyright (C) 2015 Mihai Carabas <mihai.carabas@gmail.com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/smp.h>
+#include <sys/kernel.h>
+#include <sys/malloc.h>
+#include <sys/pcpu.h>
+#include <sys/proc.h>
+#include <sys/sysctl.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
+
+#include <vm/vm.h>
+#include <vm/pmap.h>
+#include <vm/vm_page.h>
+#include <vm/vm_param.h>
+
+#include <machine/armreg.h>
+#include <machine/vm.h>
+#include <machine/cpufunc.h>
+#include <machine/cpu.h>
+#include <machine/vmm.h>
+#include <machine/vmm_dev.h>
+#include <machine/atomic.h>
+#include <machine/hypervisor.h>
+#include <machine/pmap.h>
+
+#include "mmu.h"
+#include "arm64.h"
+#include "hyp.h"
+#include "reset.h"
+#include "io/vgic_v3.h"
+#include "io/vtimer.h"
+
+#define HANDLED 1
+#define UNHANDLED 0
+
+#define UNUSED 0
+
+MALLOC_DEFINE(M_HYP, "ARM VMM HYP", "ARM VMM HYP");
+
+extern char hyp_init_vectors[];
+extern char hyp_vectors[];
+extern char hyp_code_start[];
+extern char hyp_code_end[];
+extern char hyp_stub_vectors[];
+
+char *stack[MAXCPU];
+pmap_t hyp_pmap;
+
+static uint64_t vmid_generation = 0;
+static struct mtx vmid_generation_mtx;
+
+static inline void
+arm64_set_active_vcpu(struct hypctx *hypctx)
+{
+ PCPU_SET(vcpu, hypctx);
+}
+
+static void arm64_set_vttbr(struct hyp *hyp)
+{
+ if (hyp->vmid_generation != 0 &&
+ ((hyp->vmid_generation & ~VMID_GENERATION_MASK) !=
+ (atomic_load_acq_64(&vmid_generation) & ~VMID_GENERATION_MASK)))
+ goto out;
+
+ mtx_lock(&vmid_generation_mtx);
+
+ /* Another VCPU has change the VMID already */
+ if (hyp->vmid_generation &&
+ ((hyp->vmid_generation & ~VMID_GENERATION_MASK) !=
+ (vmid_generation & ~VMID_GENERATION_MASK))) {
+ mtx_unlock(&vmid_generation_mtx);
+ goto out;
+ }
+
+ vmid_generation++;
+ if (!(vmid_generation & VMID_GENERATION_MASK))
+ vmid_generation++;
+
+ hyp->vmid_generation = vmid_generation;
+ mtx_unlock(&vmid_generation_mtx);
+out:
+ hyp->vttbr_el2 = build_vttbr(hyp->vmid_generation,
+ vtophys(hyp->stage2_map->pm_l0));
+}
+
+static void
+arm_init_vectors(void *arg)
+{
+ char *stack_top;
+ uint64_t tcr_el1, tcr_el2;
+ uint32_t sctlr_el2;
+ uint32_t vtcr_el2;
+ uint64_t id_aa64mmfr0_el1;
+ uint64_t pa_range_bits;
+ register_t daif;
+
+ daif = intr_disable();
+
+ arm64_set_active_vcpu(NULL);
+
+ /*
+ * Install the temporary vectors which will be responsible for
+ * initializing the VMM when we next trap into EL2.
+ *
+ * x0: the exception vector table responsible for hypervisor
+ * initialization on the next call.
+ */
+ vmm_call_hyp((void *)vtophys(hyp_init_vectors));
+
+ /* Create and map the hypervisor stack */
+ stack_top = stack[PCPU_GET(cpuid)] + PAGE_SIZE;
+
+ /* Configure address translation at EL2 */
+ tcr_el1 = READ_SPECIALREG(tcr_el1);
+ tcr_el2 = TCR_EL2_RES1;
+
+ /* Set physical address size */
+ id_aa64mmfr0_el1 = READ_SPECIALREG(id_aa64mmfr0_el1);
+ pa_range_bits = ID_AA64MMFR0_PARange_VAL(id_aa64mmfr0_el1);
+ tcr_el2 |= (pa_range_bits & 0x7) << TCR_EL2_PS_SHIFT;
+
+ /* Use the same address translation attributes as the host */
+ tcr_el2 |= tcr_el1 & TCR_T0SZ_MASK;
+ tcr_el2 |= tcr_el1 & (0xff << TCR_IRGN0_SHIFT);
+
+ /*
+ * Configure the system control register for EL2:
+ *
+ * SCTLR_EL2_M: MMU on
+ * SCTLR_EL2_C: Data cacheability not affected
+ * SCTLR_EL2_I: Instruction cacheability not affected
+ * SCTLR_EL2_A: Instruction alignment check
+ * SCTLR_EL2_SA: Stack pointer alignment check
+ * SCTLR_EL2_WXN: Treat writable memory as execute never
+ * ~SCTLR_EL2_EE: Data accesses are little-endian
+ */
+ sctlr_el2 = SCTLR_EL2_RES1;
+ sctlr_el2 |= SCTLR_EL2_M | SCTLR_EL2_C | SCTLR_EL2_I;
+ sctlr_el2 |= SCTLR_EL2_A | SCTLR_EL2_SA;
+ sctlr_el2 |= SCTLR_EL2_WXN;
+ sctlr_el2 &= ~SCTLR_EL2_EE;
+
+ /*
+ * Configure the Stage 2 translation control register:
+ *
+ * VTCR_IRGN0_WBWA: Translation table walks access inner cacheable
+ * normal memory
+ * VTCR_ORGN0_WBWA: Translation table walks access outer cacheable
+ * normal memory
+ * VTCR_EL2_TG0_4K: Stage 2 uses 4K pages
+ * VTCR_EL2_SL0_4K_LVL1: Stage 2 uses concatenated level 1 tables
+ * VTCR_EL2_SH0_IS: Memory associated with Stage 2 walks is inner
+ * shareable
+ */
+ vtcr_el2 = VTCR_EL2_RES1;
+ vtcr_el2 = (pa_range_bits & 0x7) << VTCR_EL2_PS_SHIFT;
+ vtcr_el2 |= VTCR_EL2_IRGN0_WBWA | VTCR_EL2_ORGN0_WBWA;
+ vtcr_el2 |= VTCR_EL2_TG0_4K;
+ vtcr_el2 |= VTCR_EL2_SH0_IS;
+ if (pa_range_bits == ID_AA64MMFR0_PARange_1T) {
+ /*
+ * 40 bits of physical addresses, use concatenated level 1
+ * tables
+ */
+ vtcr_el2 |= 24 & VTCR_EL2_T0SZ_MASK;
+ vtcr_el2 |= VTCR_EL2_SL0_4K_LVL1;
+ }
+
+ /* Special call to initialize EL2 */
+ vmm_call_hyp((void *)vtophys(hyp_vectors), vtophys(hyp_pmap->pm_l0),
+ ktohyp(stack_top), tcr_el2, sctlr_el2, vtcr_el2);
+
+ intr_restore(daif);
+}
+
+static void
+arm_cleanup_vectors(void *arg)
+{
+ register_t daif;
+
+ /*
+ * vmm_cleanup() will disable the MMU. For the next few instructions,
+ * before the hardware disables the MMU, one of the following is
+ * possible:
+ *
+ * a. The instruction addresses are fetched with the MMU disabled,
+ * and they must represent the actual physical addresses. This will work
+ * because we call the vmm_cleanup() function by its physical address.
+ *
+ * b. The instruction addresses are fetched using the old translation
+ * tables. This will work because we have an identity mapping in place
+ * in the translation tables and vmm_cleanup() is called by its physical
+ * address.
+ */
+ daif = intr_disable();
+ vmm_call_hyp((void *)vtophys(vmm_cleanup), vtophys(hyp_stub_vectors));
+ intr_restore(daif);
+
+ arm64_set_active_vcpu(NULL);
+}
+
+static int
+arm_init(int ipinum)
+{
+ size_t hyp_code_len;
+ uint64_t ich_vtr_el2;
+ uint64_t cnthctl_el2;
+ int cpu;
+ register_t daif;
+
+ if (!virt_enabled()) {
+ printf("arm_init: Processor doesn't have support for virtualization.\n");
+ return (ENXIO);
+ }
+
+ mtx_init(&vmid_generation_mtx, "vmid_generation_mtx", NULL, MTX_DEF);
+
+ /* Create the mappings for the hypervisor translation table. */
+ hyp_pmap = malloc(sizeof(*hyp_pmap), M_HYP, M_WAITOK | M_ZERO);
+ hypmap_init(hyp_pmap, PM_STAGE1);
+ hyp_code_len = (size_t)hyp_code_end - (size_t)hyp_code_start;
+ hypmap_map(hyp_pmap, (vm_offset_t)hyp_code_start, hyp_code_len, VM_PROT_EXECUTE);
+
+ /* We need an identity mapping for when we activate the MMU */
+ hypmap_map_identity(hyp_pmap, (vm_offset_t)hyp_code_start, hyp_code_len,
+ VM_PROT_EXECUTE);
+
+ /* Create a per-CPU hypervisor stack */
+ CPU_FOREACH(cpu) {
+ stack[cpu] = malloc(PAGE_SIZE, M_HYP, M_WAITOK | M_ZERO);
+ hypmap_map(hyp_pmap, (vm_offset_t)stack[cpu], PAGE_SIZE,
+ VM_PROT_READ | VM_PROT_WRITE);
+ }
+
+
+ smp_rendezvous(NULL, arm_init_vectors, NULL, NULL);
+
+ daif = intr_disable();
+
+ ich_vtr_el2 = vmm_call_hyp((void *)ktohyp(vmm_read_ich_vtr_el2));
+ vgic_v3_init(ich_vtr_el2);
+
+ cnthctl_el2 = vmm_call_hyp((void *)ktohyp(vmm_read_cnthctl_el2));
+ vtimer_init(cnthctl_el2);
+
+ intr_restore(daif);
+
+ return 0;
+}
+
+static int
+arm_cleanup(void)
+{
+ int cpu;
+
+ smp_rendezvous(NULL, arm_cleanup_vectors, NULL, NULL);
+
+ vtimer_cleanup();
+
+ hypmap_cleanup(hyp_pmap);
+ free(hyp_pmap, M_HYP);
+ for (cpu = 0; cpu < nitems(stack); cpu++)
+ free(stack[cpu], M_HYP);
+
+ mtx_destroy(&vmid_generation_mtx);
+
+ return (0);
+}
+
+static void *
+arm_vminit(struct vm *vm)
+{
+ struct hyp *hyp;
+ struct hypctx *hypctx;
+ bool last_vcpu;
+ int i;
+
+ hyp = malloc(sizeof(struct hyp), M_HYP, M_WAITOK | M_ZERO);
+ hyp->vm = vm;
+ hyp->vgic_attached = false;
+
+ hyp->stage2_map = malloc(sizeof(*hyp->stage2_map),
+ M_HYP, M_WAITOK | M_ZERO);
+ hypmap_init(hyp->stage2_map, PM_STAGE2);
+ arm64_set_vttbr(hyp);
+
+ for (i = 0; i < VM_MAXCPU; i++) {
+ hypctx = &hyp->ctx[i];
+ hypctx->vcpu = i;
+ hypctx->hyp = hyp;
+
+ reset_vm_el01_regs(hypctx);
+ reset_vm_el2_regs(hypctx);
+ }
+
+ vtimer_vminit(hyp);
+ vgic_v3_vminit(hyp);
+ for (i = 0; i < VM_MAXCPU; i++) {
+ hypctx = &hyp->ctx[i];
+ vtimer_cpuinit(hypctx);
+ last_vcpu = (i == VM_MAXCPU - 1);
+ vgic_v3_cpuinit(hypctx, last_vcpu);
+ }
+
+ hypmap_map(hyp_pmap, (vm_offset_t)hyp, sizeof(struct hyp),
+ VM_PROT_READ | VM_PROT_WRITE);
+
+ return (hyp);
+}
+
+static enum vm_reg_name
+get_vm_reg_name(uint32_t reg_nr, uint32_t mode __attribute__((unused)))
+{
+ switch(reg_nr) {
+ case 0:
+ return VM_REG_GUEST_X0;
+ case 1:
+ return VM_REG_GUEST_X1;
+ case 2:
+ return VM_REG_GUEST_X2;
+ case 3:
+ return VM_REG_GUEST_X3;
+ case 4:
+ return VM_REG_GUEST_X4;
+ case 5:
+ return VM_REG_GUEST_X5;
+ case 6:
+ return VM_REG_GUEST_X6;
+ case 7:
+ return VM_REG_GUEST_X7;
+ case 8:
+ return VM_REG_GUEST_X8;
+ case 9:
+ return VM_REG_GUEST_X9;
+ case 10:
+ return VM_REG_GUEST_X10;
+ case 11:
+ return VM_REG_GUEST_X11;
+ case 12:
+ return VM_REG_GUEST_X12;
+ case 13:
+ return VM_REG_GUEST_X13;
+ case 14:
+ return VM_REG_GUEST_X14;
+ case 15:
+ return VM_REG_GUEST_X15;
+ case 16:
+ return VM_REG_GUEST_X16;
+ case 17:
+ return VM_REG_GUEST_X17;
+ case 18:
+ return VM_REG_GUEST_X18;
+ case 19:
+ return VM_REG_GUEST_X19;
+ case 20:
+ return VM_REG_GUEST_X20;
+ case 21:
+ return VM_REG_GUEST_X21;
+ case 22:
+ return VM_REG_GUEST_X22;
+ case 23:
+ return VM_REG_GUEST_X23;
+ case 24:
+ return VM_REG_GUEST_X24;
+ case 25:
+ return VM_REG_GUEST_X25;
+ case 26:
+ return VM_REG_GUEST_X26;
+ case 27:
+ return VM_REG_GUEST_X27;
+ case 28:
+ return VM_REG_GUEST_X28;
+ case 29:
+ return VM_REG_GUEST_X29;
+ case 30:
+ return VM_REG_GUEST_LR;
+ case 31:
+ return VM_REG_GUEST_SP;
+ case 32:
+ return VM_REG_GUEST_ELR;
+ case 33:
+ return VM_REG_GUEST_SPSR;
+ case 34:
+ return VM_REG_ELR_EL2;
+ default:
+ break;
+ }
+
+ return (VM_REG_LAST);
+}
+
+static inline void
+arm64_print_hyp_regs(struct vm_exit *vme)
+{
+ printf("esr_el2: 0x%08x\n", vme->u.hyp.esr_el2);
+ printf("far_el2: 0x%016lx\n", vme->u.hyp.far_el2);
+ printf("hpfar_el2: 0x%016lx\n", vme->u.hyp.hpfar_el2);
+}
+
+static void
+arm64_gen_inst_emul_data(uint32_t esr_iss, struct vm_exit *vme_ret)
+{
+ struct vie *vie;
+ uint32_t esr_sas, reg_num;
+ uint64_t page_off;
+
+ /*
+ * Get bits [47:12] of the IPA from HPFAR_EL2.
+ * At this point the 'u.hyp' member will be replaced by 'u.inst_emul'.
+ */
+ vme_ret->u.inst_emul.gpa = \
+ (vme_ret->u.hyp.hpfar_el2) >> HPFAR_EL2_FIPA_SHIFT;
+ /* The IPA is the base address of a 4KB page, make bits [11:0] zero. */
+ vme_ret->u.inst_emul.gpa = (vme_ret->u.inst_emul.gpa) << PAGE_SHIFT;
+ /* Bits [11:0] are the same as bits [11:0] from the virtual address. */
+ page_off = FAR_EL2_PAGE_OFFSET(vme_ret->u.hyp.far_el2);
+ vme_ret->u.inst_emul.gpa = vme_ret->u.inst_emul.gpa + page_off;
+
+ esr_sas = (esr_iss & ISS_DATA_SAS_MASK) >> ISS_DATA_SAS_SHIFT;
+ reg_num = (esr_iss & ISS_DATA_SRT_MASK) >> ISS_DATA_SRT_SHIFT;
+
+ vie = &vme_ret->u.inst_emul.vie;
+ vie->access_size = 1 << esr_sas;
+ vie->sign_extend = (esr_iss & ISS_DATA_SSE) ? 1 : 0;
+ vie->dir = (esr_iss & ISS_DATA_WnR) ? VM_DIR_WRITE : VM_DIR_READ;
+ vie->reg = get_vm_reg_name(reg_num, UNUSED);
+}
+
+static void
+arm64_gen_reg_emul_data(uint32_t esr_iss, struct vm_exit *vme_ret)
+{
+ uint32_t reg_num;
+ struct vre *vre;
+
+ /* u.hyp member will be replaced by u.reg_emul */
+ vre = &vme_ret->u.reg_emul.vre;
+
+ vre->inst_syndrome = esr_iss;
+ /* ARMv8 Architecture Manual, p. D7-2273: 1 means read */
+ vre->dir = (esr_iss & ISS_MSR_DIR) ? VM_DIR_READ : VM_DIR_WRITE;
+ reg_num = ISS_MSR_Rt(esr_iss);
+ vre->reg = get_vm_reg_name(reg_num, UNUSED);
+}
+
+//static bool print_stuff = false;
+
+static int
+handle_el1_sync_excp(struct hyp *hyp, int vcpu, struct vm_exit *vme_ret)
+{
+ uint32_t esr_ec, esr_iss;
+
+ esr_ec = ESR_ELx_EXCEPTION(vme_ret->u.hyp.esr_el2);
+ esr_iss = vme_ret->u.hyp.esr_el2 & ESR_ELx_ISS_MASK;
+
+ switch(esr_ec) {
+ case EXCP_UNKNOWN:
+ eprintf("Unknown exception from guest\n");
+ arm64_print_hyp_regs(vme_ret);
+ vme_ret->exitcode = VM_EXITCODE_HYP;
+ break;
+ case EXCP_HVC:
+ vme_ret->exitcode = VM_EXITCODE_HVC;
+ break;
+ case EXCP_MSR:
+ arm64_gen_reg_emul_data(esr_iss, vme_ret);
+ vme_ret->exitcode = VM_EXITCODE_REG_EMUL;
+ break;
+
+ case EXCP_DATA_ABORT_L:
+ /* Check if instruction syndrome is valid */
+ if (!(esr_iss & ISS_DATA_ISV)) {
+ eprintf("Data abort with invalid instruction syndrome\n");
+ arm64_print_hyp_regs(vme_ret);
+ vme_ret->exitcode = VM_EXITCODE_HYP;
+ break;
+ }
+
+ /*
+ * Check if the data abort was caused by a translation fault.
+ * Any other type of data fault will be treated as an error.
+ */
+ if (!(ISS_DATA_DFSC_TF(esr_iss))) {
+ eprintf("Data abort not on a stage 2 translation\n");
+ arm64_print_hyp_regs(vme_ret);
+ vme_ret->exitcode = VM_EXITCODE_HYP;
+ break;
+ }
+
+ arm64_gen_inst_emul_data(esr_iss, vme_ret);
+ vme_ret->exitcode = VM_EXITCODE_INST_EMUL;
+ break;
+
+ default:
+ eprintf("Unsupported synchronous exception from guest: 0x%x\n",
+ esr_ec);
+ arm64_print_hyp_regs(vme_ret);
+ vme_ret->exitcode = VM_EXITCODE_HYP;
+ break;
+ }
+
+ /* We don't don't do any instruction emulation here */
+ return (UNHANDLED);
+}
+
+static int
+arm64_handle_world_switch(struct hyp *hyp, int vcpu, struct vm_exit *vme)
+{
+ int excp_type;
+ int handled;
+
+ excp_type = vme->u.hyp.exception_nr;
+ switch (excp_type) {
+ case EXCP_TYPE_EL1_SYNC:
+ /* The exit code will be set by handle_el1_sync_excp(). */
+ handled = handle_el1_sync_excp(hyp, vcpu, vme);
+ break;
+
+ case EXCP_TYPE_EL1_IRQ:
+ case EXCP_TYPE_EL1_FIQ:
+ /* The host kernel will handle IRQs and FIQs. */
+ vme->exitcode = VM_EXITCODE_BOGUS;
+ handled = UNHANDLED;
+ break;
+
+ case EXCP_TYPE_EL1_ERROR:
+ case EXCP_TYPE_EL2_SYNC:
+ case EXCP_TYPE_EL2_IRQ:
+ case EXCP_TYPE_EL2_FIQ:
+ case EXCP_TYPE_EL2_ERROR:
+ eprintf("Unhandled exception type: %s\n", __STRING(excp_type));
+ vme->exitcode = VM_EXITCODE_BOGUS;
+ handled = UNHANDLED;
+ break;
+
+ default:
+ eprintf("Unknown exception type: %d\n", excp_type);
+ vme->exitcode = VM_EXITCODE_BOGUS;
+ handled = UNHANDLED;
+ break;
+ }
+
+ return (handled);
+}
+
+static int
+arm_vmrun(void *arg, int vcpu, register_t pc, pmap_t pmap,
+ void *rendezvous_cookie, void *suspend_cookie)
+{
+ uint64_t excp_type;
+ int handled;
+ register_t daif;
+ struct hyp *hyp;
+ struct hypctx *hypctx;
+ struct vm *vm;
+ struct vm_exit *vme;
+
+ hyp = (struct hyp *)arg;
+ vm = hyp->vm;
+ vme = vm_exitinfo(vm, vcpu);
+
+ hypctx = &hyp->ctx[vcpu];
+ hypctx->elr_el2 = (uint64_t)pc;
+
+ for (;;) {
+ daif = intr_disable();
+ /*
+ * TODO: What happens if a timer interrupt is asserted exactly
+ * here, but for the previous VM?
+ */
+ arm64_set_active_vcpu(hypctx);
+ vgic_v3_sync_hwstate(hypctx);
+ excp_type = vmm_call_hyp((void *)ktohyp(vmm_enter_guest),
+ ktohyp(hypctx));
+ intr_restore(daif);
+
+ if (excp_type == EXCP_TYPE_MAINT_IRQ)
+ continue;
+
+ vme->pc = hypctx->elr_el2;
+ vme->inst_length = INSN_SIZE;
+ vme->u.hyp.exception_nr = excp_type;
+ vme->u.hyp.esr_el2 = hypctx->exit_info.esr_el2;
+ vme->u.hyp.far_el2 = hypctx->exit_info.far_el2;
+ vme->u.hyp.hpfar_el2 = hypctx->exit_info.hpfar_el2;
+
+ handled = arm64_handle_world_switch(hyp, vcpu, vme);
+ if (handled == UNHANDLED)
+ /* Exit loop to emulate instruction. */
+ break;
+ else
+ /* Resume guest execution from the next instruction. */
+ hypctx->elr_el2 += vme->inst_length;
+ }
+
+ return (0);
+}
+
+static void
+arm_deactivate_pcpu(void *arg)
+{
+ struct hyp *hyp = arg;
+ int maxcpu;
+ int i;
+
+ maxcpu = vm_get_maxcpus(hyp->vm);
+ for (i = 0; i < maxcpu; i++)
+ if (arm64_get_active_vcpu() == &hyp->ctx[i])
+ arm64_set_active_vcpu(NULL);
+}
+
+static void
+arm_vmcleanup(void *arg)
+{
+ struct hyp *hyp = arg;
+
+ smp_rendezvous(NULL, arm_deactivate_pcpu, NULL, hyp);
+
+ vtimer_vmcleanup(arg);
+ vgic_v3_detach_from_vm(arg);
+
+ /* Unmap the VM hyp struct from the hyp mode translation table */
+ hypmap_map(hyp_pmap, (vm_offset_t)hyp, sizeof(struct hyp),
+ VM_PROT_NONE);
+ hypmap_cleanup(hyp->stage2_map);
+ free(hyp->stage2_map, M_HYP);
+ free(hyp, M_HYP);
+}
+
+/*
+ * Return register value. Registers have different sizes and an explicit cast
+ * must be made to ensure proper conversion.
+ */
+static void *
+hypctx_regptr(struct hypctx *hypctx, int reg)
+{
+ switch (reg) {
+ case VM_REG_GUEST_X0:
+ return (&hypctx->regs.x[0]);
+ case VM_REG_GUEST_X1:
+ return (&hypctx->regs.x[1]);
+ case VM_REG_GUEST_X2:
+ return (&hypctx->regs.x[2]);
+ case VM_REG_GUEST_X3:
+ return (&hypctx->regs.x[3]);
+ case VM_REG_GUEST_X4:
+ return (&hypctx->regs.x[4]);
+ case VM_REG_GUEST_X5:
+ return (&hypctx->regs.x[5]);
+ case VM_REG_GUEST_X6:
+ return (&hypctx->regs.x[6]);
+ case VM_REG_GUEST_X7:
+ return (&hypctx->regs.x[7]);
+ case VM_REG_GUEST_X8:
+ return (&hypctx->regs.x[8]);
+ case VM_REG_GUEST_X9:
+ return (&hypctx->regs.x[9]);
+ case VM_REG_GUEST_X10:
+ return (&hypctx->regs.x[10]);
+ case VM_REG_GUEST_X11:
+ return (&hypctx->regs.x[11]);
+ case VM_REG_GUEST_X12:
+ return (&hypctx->regs.x[12]);
+ case VM_REG_GUEST_X13:
+ return (&hypctx->regs.x[13]);
+ case VM_REG_GUEST_X14:
+ return (&hypctx->regs.x[14]);
+ case VM_REG_GUEST_X15:
+ return (&hypctx->regs.x[15]);
+ case VM_REG_GUEST_X16:
+ return (&hypctx->regs.x[16]);
+ case VM_REG_GUEST_X17:
+ return (&hypctx->regs.x[17]);
+ case VM_REG_GUEST_X18:
+ return (&hypctx->regs.x[18]);
+ case VM_REG_GUEST_X19:
+ return (&hypctx->regs.x[19]);
+ case VM_REG_GUEST_X20:
+ return (&hypctx->regs.x[20]);
+ case VM_REG_GUEST_X21:
+ return (&hypctx->regs.x[21]);
+ case VM_REG_GUEST_X22:
+ return (&hypctx->regs.x[22]);
+ case VM_REG_GUEST_X23:
+ return (&hypctx->regs.x[23]);
+ case VM_REG_GUEST_X24:
+ return (&hypctx->regs.x[24]);
+ case VM_REG_GUEST_X25:
+ return (&hypctx->regs.x[25]);
+ case VM_REG_GUEST_X26:
+ return (&hypctx->regs.x[26]);
+ case VM_REG_GUEST_X27:
+ return (&hypctx->regs.x[27]);
+ case VM_REG_GUEST_X28:
+ return (&hypctx->regs.x[28]);
+ case VM_REG_GUEST_X29:
+ return (&hypctx->regs.x[29]);
+ case VM_REG_GUEST_LR:
+ return (&hypctx->regs.lr);
+ case VM_REG_GUEST_SP:
+ return (&hypctx->regs.sp);
+ case VM_REG_GUEST_ELR:
+ return (&hypctx->regs.elr);
+ case VM_REG_GUEST_SPSR:
+ return (&hypctx->regs.spsr);
+ case VM_REG_ELR_EL2:
+ return (&hypctx->elr_el2);
+ default:
+ break;
+ }
+ return (NULL);
+}
+
+static int
+arm_getreg(void *arg, int vcpu, int reg, uint64_t *retval)
+{
+ void *regp;
+ int running, hostcpu;
+ struct hyp *hyp = arg;
+
+ running = vcpu_is_running(hyp->vm, vcpu, &hostcpu);
+ if (running && hostcpu != curcpu)
+ panic("arm_getreg: %s%d is running", vm_name(hyp->vm), vcpu);
+
+ if ((regp = hypctx_regptr(&hyp->ctx[vcpu], reg)) != NULL) {
+ if (reg == VM_REG_GUEST_SPSR)
+ *retval = *(uint32_t *)regp;
+ else
+ *retval = *(uint64_t *)regp;
+ return (0);
+ } else {
+ return (EINVAL);
+ }
+}
+
+static int
+arm_setreg(void *arg, int vcpu, int reg, uint64_t val)
+{
+ void *regp;
+ struct hyp *hyp = arg;
+ int running, hostcpu;
+
+ running = vcpu_is_running(hyp->vm, vcpu, &hostcpu);
+ if (running && hostcpu != curcpu)
+ panic("hyp_setreg: %s%d is running", vm_name(hyp->vm), vcpu);
+
+ if ((regp = hypctx_regptr(&hyp->ctx[vcpu], reg)) != NULL) {
+ if (reg == VM_REG_GUEST_SPSR)
+ *(uint32_t *)regp = (uint32_t)val;
+ else
+ *(uint64_t *)regp = val;
+ return (0);
+ } else {
+ return (EINVAL);
+ }
+}
+
+static
+void arm_restore(void)
+{
+ ;
+}
+
+struct vmm_ops vmm_ops_arm = {
+ arm_init,
+ arm_cleanup,
+ arm_restore,
+ arm_vminit,
+ arm_vmrun,
+ arm_vmcleanup,
+ hypmap_set,
+ hypmap_get,
+ arm_getreg,
+ arm_setreg,
+ NULL, /* vmi_get_cap_t */
+ NULL /* vmi_set_cap_t */
+};
Index: sys/arm64/vmm/hyp.h
===================================================================
--- /dev/null
+++ sys/arm64/vmm/hyp.h
@@ -0,0 +1,97 @@
+/*
+ * Copyright (C) 2017 Alexandru Elisei <alexandru.elisei@gmail.com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef _VMM_HYP_H_
+#define _VMM_HYP_H_
+
+/*
+ * The translation tables for the hypervisor mode will hold mappings for kernel
+ * virtual addresses and an identity mapping (VA == PA) necessary when
+ * enabling/disabling the MMU.
+ *
+ * When in EL2 exception level the translation table base register is TTBR0_EL2
+ * and the virtual addresses generated by the CPU must be at the bottom of the
+ * memory, with the first 16 bits all set to zero:
+ *
+ * 0x0000ffffffffffff End hyp address space
+ * 0x0000000000000000 Start of hyp address space
+ *
+ * To run code in hyp mode we need to convert kernel virtual addresses to
+ * addreses that fit into this address space.
+ *
+ * The kernel virtual address range is:
+ *
+ * 0xffff007fffffffff End of KVA
+ * 0xffff000000000000 Kernel base address & start of KVA
+ *
+ * (see /sys/arm64/include/vmparam.h).
+ *
+ * We could convert the kernel virtual addresses to valid EL2 addresses by
+ * setting the first 16 bits to zero and thus mapping the kernel addresses in
+ * the bottom half of the EL2 address space, but then they might clash with the
+ * identity mapping addresses. Instead we map the kernel addresses in the upper
+ * half of the EL2 address space.
+ *
+ * The hypervisor address space will look like this:
+ *
+ * 0x0000807fffffffff End of KVA mapping
+ * 0x0000800000000000 Start of KVA mapping
+ *
+ * 0x00007fffffffffff End of identity mapping
+ * 0x0000000000000000 Start of identity mapping
+ *
+ * With the scheme we have 47 bits at our disposable for the identity map and
+ * another 47 bits for the kernel virtual addresses. For a maximum physical
+ * memory size of 128TB we are guaranteed to not have any clashes between
+ * addresses.
+ */
+#define HYP_VM_MIN_ADDRESS 0x0000000000000000
+#define HYP_VM_MAX_ADDRESS 0x0000ffffffffffff
+
+#define HYP_KVA_OFFSET 0x0000800000000000
+#define HYP_KVA_MASK 0x0000ffffffffffff
+
+/*
+ * When taking asynchronous exceptions, or interrupts, with the exception of the
+ * SError interrupt, the exception syndrome register is not updated with the
+ * exception code. We need to differentiate between the different exception
+ * types taken to EL2.
+ */
+#define EXCP_TYPE_EL1_SYNC 0
+#define EXCP_TYPE_EL1_IRQ 1
+#define EXCP_TYPE_EL1_FIQ 2
+#define EXCP_TYPE_EL1_ERROR 3
+
+#define EXCP_TYPE_EL2_SYNC 4
+#define EXCP_TYPE_EL2_IRQ 5
+#define EXCP_TYPE_EL2_FIQ 6
+#define EXCP_TYPE_EL2_ERROR 7
+
+#define EXCP_TYPE_MAINT_IRQ 8
+
+#define HYP_GET_VECTOR_TABLE -1
+
+#endif /* !_VMM_HYP_H_ */
Index: sys/arm64/vmm/hyp.S
===================================================================
--- /dev/null
+++ sys/arm64/vmm/hyp.S
@@ -0,0 +1,384 @@
+/*
+ * Copyright (C) 2017 Alexandru Elisei <alexandru.elisei@gmail.com>
+ * All rights reserved.
+ *
+ * This software was developed by Alexandru Elisei under sponsorship
+ * from the FreeBSD Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+
+#include <sys/syscall.h>
+#include <machine/armreg.h>
+#include <machine/asm.h>
+#include <machine/hypervisor.h>
+#include <arm/arm/gic_common.h>
+
+#include "hyp_macros.h"
+#include "hyp.h"
+#include "hyp_assym.h"
+
+ .text
+
+ .globl hyp_code_start
+ .globl hyp_code_end
+
+ .align 12
+hyp_code_start:
+
+
+ENTRY(vmm_call_hyp)
+ hvc #0
+ ret
+END(vmm_call_hyp)
+
+
+.macro vempty
+ .align 7
+ 1: b 1b
+.endm
+
+.macro vector name
+ .align 7
+ b handle_\name
+.endm
+
+ .align 11
+ .globl hyp_init_vectors
+hyp_init_vectors:
+ vempty /* Synchronous EL2t */
+ vempty /* IRQ EL2t */
+ vempty /* FIQ EL2t */
+ vempty /* Error EL2t */
+
+ vempty /* Synchronous EL2h */
+ vempty /* IRQ EL2h */
+ vempty /* FIQ EL2h */
+ vempty /* Error EL2h */
+
+ vector hyp_init /* Synchronous 64-bit EL1 */
+ vempty /* IRQ 64-bit EL1 */
+ vempty /* FIQ 64-bit EL1 */
+ vempty /* Error 64-bit EL1 */
+
+ vempty /* Synchronous 32-bit EL1 */
+ vempty /* IRQ 32-bit EL1 */
+ vempty /* FIQ 32-bit EL1 */
+ vempty /* Error 32-bit EL1 */
+
+
+/*
+ * Initialize the hypervisor mode with a new exception vector table, translation
+ * table and stack.
+ *
+ * Expecting:
+ * x0 - the hypervisor exception vectors
+ * x1 - translation tables physical address
+ * x2 - stack top virtual address
+ * x3 - TCR_EL2 value
+ * x4 - SCTLR_EL2 value
+ * x5 - VTCR_EL2 value
+ */
+ENTRY(handle_hyp_init)
+ /* Install the new exception vectors */
+ msr vbar_el2, x0
+ /* Set the stack top address */
+ mov sp, x2
+ /* Use the host VTTBR_EL2 to tell the host and the guests apart */
+ mov x9, #VTTBR_HOST
+ msr vttbr_el2, x9
+ /* Load the base address for the translation tables */
+ msr ttbr0_el2, x1
+ /* Invalidate the TLB */
+ tlbi alle2
+ /* Use the same memory attributes as EL1 */
+ mrs x9, mair_el1
+ msr mair_el2, x9
+ /* Configure address translation */
+ msr tcr_el2, x3
+ isb
+ /* Set the system control register for EL2 */
+ msr sctlr_el2, x4
+ /* Set the Stage 2 translation control register */
+ msr vtcr_el2, x5
+ /* Return success */
+ mov x0, #0
+ /* MMU is up and running */
+ eret
+END(handle_hyp_init)
+
+
+ .align 11
+ .globl hyp_vectors
+hyp_vectors:
+ vempty /* Synchronous EL2t */
+ vempty /* IRQ EL2t */
+ vempty /* FIQ EL2t */
+ vempty /* Error EL2t */
+
+ vector el2_el2h_sync /* Synchronous EL2h */
+ vector el2_el2h_irq /* IRQ EL2h */
+ vector el2_el2h_fiq /* FIQ EL2h */
+ vector el2_el2h_error /* Error EL2h */
+
+ vector el2_el1_sync64 /* Synchronous 64-bit EL1 */
+ vector el2_el1_irq64 /* IRQ 64-bit EL1 */
+ vector el2_el1_fiq64 /* FIQ 64-bit EL1 */
+ vector el2_el1_error64 /* Error 64-bit EL1 */
+
+ vempty /* Synchronous 32-bit EL1 */
+ vempty /* IRQ 32-bit EL1 */
+ vempty /* FIQ 32-bit EL1 */
+ vempty /* Error 32-bit EL1 */
+
+
+.macro do_world_switch_to_host
+ .align 7
+ SAVE_GUEST_REGS()
+#ifdef VFP
+ /*
+ * Saving the guest VFP registers needs to come after saving the rest of
+ * the registers because the process dirties the regular registers.
+ */
+ SAVE_GUEST_VFP_REGS()
+ LOAD_HOST_VFP_REGS()
+#endif
+ LOAD_HOST_REGS()
+ SAVE_EXIT_INFO()
+
+ /* Restore host VTTBR */
+ mov x9, #VTTBR_HOST
+ msr vttbr_el2, x9
+.endm
+
+
+.macro handle_el2_excp type
+ .align 7
+ /* Save registers before modifying so we can restore them */
+ str x9, [sp, #-16]!
+
+ /* Test if the exception happened when the host was running */
+ mrs x9, vttbr_el2
+ cmp x9, #VTTBR_HOST
+ beq 1f
+
+ /* We got the exception while the guest was running */
+ ldr x9, [sp], #16
+ do_world_switch_to_host
+ b 2f
+1:
+ /* We got the exception while the host was running */
+ ldr x9, [sp], #16
+2:
+ mov x0, \type
+ eret
+.endm
+
+
+ENTRY(handle_el2_el2h_sync)
+ handle_el2_excp #EXCP_TYPE_EL2_SYNC
+END(handle_el2_el2h_sync)
+
+ENTRY(handle_el2_el2h_irq)
+ handle_el2_excp #EXCP_TYPE_EL2_IRQ
+END(handle_el2_el2h_sync)
+
+ENTRY(handle_el2_el2h_fiq)
+ handle_el2_excp #EXCP_TYPE_EL2_FIQ
+END(handle_el2_el2h_sync)
+
+ENTRY(handle_el2_el2h_error)
+ handle_el2_excp #EXCP_TYPE_EL2_ERROR
+END(handle_el2_el2h_sync)
+
+
+ENTRY(handle_el2_el1_sync64)
+ /* Save registers before modifying so we can restore them */
+ str x9, [sp, #-16]!
+
+ /* Check for host hypervisor call */
+ mrs x9, vttbr_el2
+ cmp x9, #VTTBR_HOST
+ beq 1f
+
+ /* Restore register */
+ ldr x9, [sp], #16
+
+ /* Guest exception taken to EL2 */
+ do_world_switch_to_host
+ mov x0, #EXCP_TYPE_EL1_SYNC
+ b exit
+
+1:
+ /* Restore register */
+ ldr x9, [sp], #16
+
+ cmp x0, #HYP_GET_VECTOR_TABLE
+ beq 2f
+ b call_function
+2:
+ /* Return the vector table base address */
+ mrs x0, vbar_el2
+exit:
+ eret
+END(handle_el2_el1_sync64)
+
+
+/*
+ * Call a function in EL2 context
+ *
+ * Expecting:
+ * x0 - function virtual address
+ * x1-x7 - function parameters
+ */
+ENTRY(call_function)
+ /* Save the function address before shuffling parameters */
+ mov x9, x0
+
+ /* Shuffle function parameters */
+ mov x0, x1
+ mov x1, x2
+ mov x2, x3
+ mov x3, x4
+ mov x4, x5
+ mov x5, x6
+ mov x6, x7
+
+ /* Call function */
+ br x9
+END(call_function)
+
+
+/*
+ * We only trap IRQ, FIQ and SError exceptions when a guest is running. Do a
+ * world switch to host to handle these exceptions.
+ */
+
+
+ENTRY(handle_el2_el1_irq64)
+ do_world_switch_to_host
+ str x9, [sp, #-16]!
+ mrs x9, ich_misr_el2
+ cmp x9, xzr
+ beq 1f
+ mov x0, #EXCP_TYPE_MAINT_IRQ
+ b 2f
+1:
+ mov x0, #EXCP_TYPE_EL1_IRQ
+2:
+ ldr x9, [sp], #16
+ eret
+END(handle_el2_el1_irq)
+
+ENTRY(handle_el2_el1_fiq64)
+ do_world_switch_to_host
+ mov x0, #EXCP_TYPE_EL1_FIQ
+ eret
+END(handle_el2_el1_fiq64)
+
+ENTRY(handle_el2_el1_error64)
+ do_world_switch_to_host
+ mov x0, #EXCP_TYPE_EL1_ERROR
+ eret
+END(handle_el2_el1_error64)
+
+
+/*
+ * Usage:
+ * void vmm_enter_guest(struct hypctx *hypctx)
+ *
+ * Expecting:
+ * x0 - hypctx address
+ */
+ENTRY(vmm_enter_guest)
+ /* Save hypctx address */
+ msr tpidr_el2, x0
+
+ SAVE_HOST_REGS()
+#ifdef VFP
+ SAVE_HOST_VFP_REGS()
+ /*
+ * Loading the guest VFP registers needs to come before loading the
+ * rest of the registers because this process dirties the regular
+ * registers.
+ */
+ LOAD_GUEST_VFP_REGS()
+#endif
+ LOAD_GUEST_REGS()
+
+ /* Enter guest */
+ eret
+END(vmm_enter_guest)
+
+
+/*
+ * Usage:
+ * void vmm_cleanup(void *hyp_stub_vectors)
+ *
+ * Expecting:
+ * x0 - physical address of hyp_stub_vectors
+ */
+ENTRY(vmm_cleanup)
+ /* Restore the stub vectors */
+ msr vbar_el2, x0
+
+ /* Disable the MMU */
+ dsb sy
+ mrs x2, sctlr_el2
+ bic x2, x2, #SCTLR_EL2_M
+ msr sctlr_el2, x2
+
+ eret
+END(vmm_cleanup)
+
+.macro read_reg name
+ mrs x0, \name
+.endm
+
+/*
+ * Return the value of the ICH_VTR_EL2 register.
+ */
+ENTRY(vmm_read_ich_vtr_el2)
+ read_reg ich_vtr_el2
+ eret
+END(vmm_read_ich_vtr_el2)
+
+/*
+ * Return the value of the CNTHCTL_EL2 register.
+ */
+ENTRY(vmm_read_cnthctl_el2)
+ read_reg cnthctl_el2
+ eret
+END(vmm_read_cnthctl_el2)
+
+/*
+ * Return the value of the TCR_EL2 register.
+ */
+ENTRY(vmm_read_tcr_el2)
+ read_reg tcr_el2
+ eret
+END(vmm_read_tcr_el2)
+
+
+
+hyp_code_end:
Index: sys/arm64/vmm/hyp_genassym.c
===================================================================
--- /dev/null
+++ sys/arm64/vmm/hyp_genassym.c
@@ -0,0 +1,167 @@
+/*
+ * Copyright (C) 2015 Mihai Carabas <mihai.carabas@gmail.com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/assym.h>
+#include <vm/vm.h>
+#include <vm/pmap.h>
+#include <machine/vmm.h>
+
+#include "arm64.h"
+
+ASSYM(HYPCTX_REGS_X0, offsetof(struct hypctx, regs) + 0 * 8);
+ASSYM(HYPCTX_REGS_X1, offsetof(struct hypctx, regs) + 1 * 8);
+ASSYM(HYPCTX_REGS_X2, offsetof(struct hypctx, regs) + 2 * 8);
+ASSYM(HYPCTX_REGS_X3, offsetof(struct hypctx, regs) + 3 * 8);
+ASSYM(HYPCTX_REGS_X4, offsetof(struct hypctx, regs) + 4 * 8);
+ASSYM(HYPCTX_REGS_X5, offsetof(struct hypctx, regs) + 5 * 8);
+ASSYM(HYPCTX_REGS_X6, offsetof(struct hypctx, regs) + 6 * 8);
+ASSYM(HYPCTX_REGS_X7, offsetof(struct hypctx, regs) + 7 * 8);
+ASSYM(HYPCTX_REGS_X8, offsetof(struct hypctx, regs) + 8 * 8);
+ASSYM(HYPCTX_REGS_X9, offsetof(struct hypctx, regs) + 9 * 8);
+ASSYM(HYPCTX_REGS_X10, offsetof(struct hypctx, regs) + 10 * 8);
+ASSYM(HYPCTX_REGS_X11, offsetof(struct hypctx, regs) + 11 * 8);
+ASSYM(HYPCTX_REGS_X12, offsetof(struct hypctx, regs) + 12 * 8);
+ASSYM(HYPCTX_REGS_X13, offsetof(struct hypctx, regs) + 13 * 8);
+ASSYM(HYPCTX_REGS_X14, offsetof(struct hypctx, regs) + 14 * 8);
+ASSYM(HYPCTX_REGS_X15, offsetof(struct hypctx, regs) + 15 * 8);
+ASSYM(HYPCTX_REGS_X16, offsetof(struct hypctx, regs) + 16 * 8);
+ASSYM(HYPCTX_REGS_X17, offsetof(struct hypctx, regs) + 17 * 8);
+ASSYM(HYPCTX_REGS_X18, offsetof(struct hypctx, regs) + 18 * 8);
+ASSYM(HYPCTX_REGS_X19, offsetof(struct hypctx, regs) + 19 * 8);
+ASSYM(HYPCTX_REGS_X20, offsetof(struct hypctx, regs) + 20 * 8);
+ASSYM(HYPCTX_REGS_X21, offsetof(struct hypctx, regs) + 21 * 8);
+ASSYM(HYPCTX_REGS_X22, offsetof(struct hypctx, regs) + 22 * 8);
+ASSYM(HYPCTX_REGS_X23, offsetof(struct hypctx, regs) + 23 * 8);
+ASSYM(HYPCTX_REGS_X24, offsetof(struct hypctx, regs) + 24 * 8);
+ASSYM(HYPCTX_REGS_X25, offsetof(struct hypctx, regs) + 25 * 8);
+ASSYM(HYPCTX_REGS_X26, offsetof(struct hypctx, regs) + 26 * 8);
+ASSYM(HYPCTX_REGS_X27, offsetof(struct hypctx, regs) + 27 * 8);
+ASSYM(HYPCTX_REGS_X28, offsetof(struct hypctx, regs) + 28 * 8);
+ASSYM(HYPCTX_REGS_X29, offsetof(struct hypctx, regs) + 29 * 8);
+ASSYM(HYPCTX_REGS_LR, offsetof(struct hypctx, regs.lr));
+ASSYM(HYPCTX_REGS_SP, offsetof(struct hypctx, regs.sp));
+ASSYM(HYPCTX_REGS_ELR, offsetof(struct hypctx, regs.elr));
+ASSYM(HYPCTX_REGS_SPSR, offsetof(struct hypctx, regs.spsr));
+
+ASSYM(HYPCTX_ACTLR_EL1, offsetof(struct hypctx, actlr_el1));
+ASSYM(HYPCTX_AMAIR_EL1, offsetof(struct hypctx, amair_el1));
+ASSYM(HYPCTX_ELR_EL1, offsetof(struct hypctx, elr_el1));
+ASSYM(HYPCTX_FAR_EL1, offsetof(struct hypctx, far_el1));
+ASSYM(HYPCTX_FP, offsetof(struct hypctx, fp));
+ASSYM(HYPCTX_MAIR_EL1, offsetof(struct hypctx, mair_el1));
+ASSYM(HYPCTX_PAR_EL1, offsetof(struct hypctx, par_el1));
+ASSYM(HYPCTX_SP_EL0, offsetof(struct hypctx, sp_el0));
+ASSYM(HYPCTX_TCR_EL1, offsetof(struct hypctx, tcr_el1));
+ASSYM(HYPCTX_TPIDR_EL0, offsetof(struct hypctx, tpidr_el0));
+ASSYM(HYPCTX_TPIDRRO_EL0, offsetof(struct hypctx, tpidrro_el0));
+ASSYM(HYPCTX_TPIDR_EL1, offsetof(struct hypctx, tpidr_el1));
+ASSYM(HYPCTX_TTBR0_EL1, offsetof(struct hypctx, ttbr0_el1));
+ASSYM(HYPCTX_TTBR1_EL1, offsetof(struct hypctx, ttbr1_el1));
+ASSYM(HYPCTX_VBAR_EL1, offsetof(struct hypctx, vbar_el1));
+ASSYM(HYPCTX_AFSR0_EL1, offsetof(struct hypctx, afsr0_el1));
+ASSYM(HYPCTX_AFSR1_EL1, offsetof(struct hypctx, afsr1_el1));
+ASSYM(HYPCTX_CONTEXTIDR_EL1, offsetof(struct hypctx, contextidr_el1));
+ASSYM(HYPCTX_CPACR_EL1, offsetof(struct hypctx, cpacr_el1));
+ASSYM(HYPCTX_ESR_EL1, offsetof(struct hypctx, esr_el1));
+ASSYM(HYPCTX_SCTLR_EL1, offsetof(struct hypctx, sctlr_el1));
+ASSYM(HYPCTX_SPSR_EL1, offsetof(struct hypctx, spsr_el1));
+
+ASSYM(HYPCTX_ELR_EL2, offsetof(struct hypctx, elr_el2));
+ASSYM(HYPCTX_HCR_EL2, offsetof(struct hypctx, hcr_el2));
+ASSYM(HYPCTX_VPIDR_EL2, offsetof(struct hypctx, vpidr_el2));
+ASSYM(HYPCTX_VMPIDR_EL2, offsetof(struct hypctx, vmpidr_el2));
+ASSYM(HYPCTX_CPTR_EL2, offsetof(struct hypctx, cptr_el2));
+ASSYM(HYPCTX_SPSR_EL2, offsetof(struct hypctx, spsr_el2));
+
+ASSYM(HYPCTX_HYP, offsetof(struct hypctx, hyp));
+
+ASSYM(HYP_VTTBR_EL2, offsetof(struct hyp, vttbr_el2));
+ASSYM(HYP_VTIMER_CNTHCTL_EL2, offsetof(struct hyp, vtimer.cnthctl_el2));
+ASSYM(HYP_VTIMER_CNTVOFF_EL2, offsetof(struct hyp, vtimer.cntvoff_el2));
+
+ASSYM(HYPCTX_EXIT_INFO_ESR_EL2, offsetof(struct hypctx, exit_info.esr_el2));
+ASSYM(HYPCTX_EXIT_INFO_FAR_EL2, offsetof(struct hypctx, exit_info.far_el2));
+ASSYM(HYPCTX_EXIT_INFO_HPFAR_EL2, offsetof(struct hypctx, exit_info.hpfar_el2));
+
+ASSYM(HYPCTX_VGIC_ICH_LR_EL2, offsetof(struct hypctx, vgic_cpu_if.ich_lr_el2));
+ASSYM(HYPCTX_VGIC_ICH_LR_NUM, offsetof(struct hypctx, vgic_cpu_if.ich_lr_num));
+ASSYM(HYPCTX_VGIC_ICH_AP0R_EL2, offsetof(struct hypctx, vgic_cpu_if.ich_ap0r_el2));
+ASSYM(HYPCTX_VGIC_ICH_AP0R_NUM, offsetof(struct hypctx, vgic_cpu_if.ich_ap0r_num));
+ASSYM(HYPCTX_VGIC_ICH_AP1R_EL2, offsetof(struct hypctx, vgic_cpu_if.ich_ap1r_el2));
+ASSYM(HYPCTX_VGIC_ICH_AP1R_NUM, offsetof(struct hypctx, vgic_cpu_if.ich_ap1r_num));
+ASSYM(HYPCTX_VGIC_ICH_EISR_EL2, offsetof(struct hypctx, vgic_cpu_if.ich_eisr_el2));
+ASSYM(HYPCTX_VGIC_ICH_ELRSR_EL2, offsetof(struct hypctx, vgic_cpu_if.ich_elrsr_el2));
+ASSYM(HYPCTX_VGIC_ICH_HCR_EL2, offsetof(struct hypctx, vgic_cpu_if.ich_hcr_el2));
+ASSYM(HYPCTX_VGIC_ICH_MISR_EL2, offsetof(struct hypctx, vgic_cpu_if.ich_misr_el2));
+ASSYM(HYPCTX_VGIC_ICH_VMCR_EL2, offsetof(struct hypctx, vgic_cpu_if.ich_vmcr_el2));
+ASSYM(HYPCTX_VGIC_ICH_LR_EL2, offsetof(struct hypctx, vgic_cpu_if.ich_lr_el2));
+
+ASSYM(HYPCTX_VTIMER_CPU_CNTKCTL_EL1, offsetof(struct hypctx, vtimer_cpu.cntkctl_el1));
+ASSYM(HYPCTX_VTIMER_CPU_CNTV_CVAL_EL0, offsetof(struct hypctx, vtimer_cpu.cntv_cval_el0));
+ASSYM(HYPCTX_VTIMER_CPU_CNTV_CTL_EL0, offsetof(struct hypctx, vtimer_cpu.cntv_ctl_el0));
+
+#ifdef VFP
+ASSYM(HYPCTX_VFPSTATE_Q0, offsetof(struct hypctx, vfpstate.vfp_regs) + 0 * 16);
+ASSYM(HYPCTX_VFPSTATE_Q1, offsetof(struct hypctx, vfpstate.vfp_regs) + 1 * 16);
+ASSYM(HYPCTX_VFPSTATE_Q2, offsetof(struct hypctx, vfpstate.vfp_regs) + 2 * 16);
+ASSYM(HYPCTX_VFPSTATE_Q3, offsetof(struct hypctx, vfpstate.vfp_regs) + 3 * 16);
+ASSYM(HYPCTX_VFPSTATE_Q4, offsetof(struct hypctx, vfpstate.vfp_regs) + 4 * 16);
+ASSYM(HYPCTX_VFPSTATE_Q5, offsetof(struct hypctx, vfpstate.vfp_regs) + 5 * 16);
+ASSYM(HYPCTX_VFPSTATE_Q6, offsetof(struct hypctx, vfpstate.vfp_regs) + 6 * 16);
+ASSYM(HYPCTX_VFPSTATE_Q7, offsetof(struct hypctx, vfpstate.vfp_regs) + 7 * 16);
+ASSYM(HYPCTX_VFPSTATE_Q8, offsetof(struct hypctx, vfpstate.vfp_regs) + 8 * 16);
+ASSYM(HYPCTX_VFPSTATE_Q9, offsetof(struct hypctx, vfpstate.vfp_regs) + 9 * 16);
+ASSYM(HYPCTX_VFPSTATE_Q10, offsetof(struct hypctx, vfpstate.vfp_regs) + 10 * 16);
+ASSYM(HYPCTX_VFPSTATE_Q11, offsetof(struct hypctx, vfpstate.vfp_regs) + 11 * 16);
+ASSYM(HYPCTX_VFPSTATE_Q12, offsetof(struct hypctx, vfpstate.vfp_regs) + 12 * 16);
+ASSYM(HYPCTX_VFPSTATE_Q13, offsetof(struct hypctx, vfpstate.vfp_regs) + 13 * 16);
+ASSYM(HYPCTX_VFPSTATE_Q14, offsetof(struct hypctx, vfpstate.vfp_regs) + 14 * 16);
+ASSYM(HYPCTX_VFPSTATE_Q15, offsetof(struct hypctx, vfpstate.vfp_regs) + 15 * 16);
+ASSYM(HYPCTX_VFPSTATE_Q16, offsetof(struct hypctx, vfpstate.vfp_regs) + 16 * 16);
+ASSYM(HYPCTX_VFPSTATE_Q17, offsetof(struct hypctx, vfpstate.vfp_regs) + 17 * 16);
+ASSYM(HYPCTX_VFPSTATE_Q18, offsetof(struct hypctx, vfpstate.vfp_regs) + 18 * 16);
+ASSYM(HYPCTX_VFPSTATE_Q19, offsetof(struct hypctx, vfpstate.vfp_regs) + 19 * 16);
+ASSYM(HYPCTX_VFPSTATE_Q20, offsetof(struct hypctx, vfpstate.vfp_regs) + 20 * 16);
+ASSYM(HYPCTX_VFPSTATE_Q21, offsetof(struct hypctx, vfpstate.vfp_regs) + 21 * 16);
+ASSYM(HYPCTX_VFPSTATE_Q22, offsetof(struct hypctx, vfpstate.vfp_regs) + 22 * 16);
+ASSYM(HYPCTX_VFPSTATE_Q23, offsetof(struct hypctx, vfpstate.vfp_regs) + 23 * 16);
+ASSYM(HYPCTX_VFPSTATE_Q24, offsetof(struct hypctx, vfpstate.vfp_regs) + 24 * 16);
+ASSYM(HYPCTX_VFPSTATE_Q25, offsetof(struct hypctx, vfpstate.vfp_regs) + 25 * 16);
+ASSYM(HYPCTX_VFPSTATE_Q26, offsetof(struct hypctx, vfpstate.vfp_regs) + 26 * 16);
+ASSYM(HYPCTX_VFPSTATE_Q27, offsetof(struct hypctx, vfpstate.vfp_regs) + 27 * 16);
+ASSYM(HYPCTX_VFPSTATE_Q28, offsetof(struct hypctx, vfpstate.vfp_regs) + 28 * 16);
+ASSYM(HYPCTX_VFPSTATE_Q29, offsetof(struct hypctx, vfpstate.vfp_regs) + 29 * 16);
+ASSYM(HYPCTX_VFPSTATE_Q30, offsetof(struct hypctx, vfpstate.vfp_regs) + 30 * 16);
+ASSYM(HYPCTX_VFPSTATE_Q31, offsetof(struct hypctx, vfpstate.vfp_regs) + 31 * 16);
+
+
+ASSYM(HYPCTX_VFPSTATE_FPCR, offsetof(struct hypctx, vfpstate.vfp_fpcr));
+ASSYM(HYPCTX_VFPSTATE_FPSR, offsetof(struct hypctx, vfpstate.vfp_fpsr));
+#endif
Index: sys/arm64/vmm/hyp_macros.h
===================================================================
--- /dev/null
+++ sys/arm64/vmm/hyp_macros.h
@@ -0,0 +1,687 @@
+/*
+ * Copyright (C) 2017 Alexandru Elisei <alexandru.elisei@gmail.com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef _VMM_HYP_MACROS_H_
+#define _VMM_HYP_MACROS_H_
+
+
+#define PUSH_SYS_REG_PAIR(reg0, reg1) \
+ mrs x1, reg0; \
+ mrs x2, reg1; \
+ stp x2, x1, [sp, #-16]!;
+
+
+#define PUSH_SYS_REG(reg) \
+ mrs x1, reg; \
+ str x1, [sp, #-16]!;
+
+
+/*
+ * Push all the host registers before entering the guest.
+ */
+#define SAVE_HOST_REGS() \
+ /* Save the regular registers */ \
+ stp x0, x1, [sp, #-16]!; \
+ stp x2, x3, [sp, #-16]!; \
+ stp x4, x5, [sp, #-16]!; \
+ stp x6, x7, [sp, #-16]!; \
+ stp x8, x9, [sp, #-16]!; \
+ stp x10, x11, [sp, #-16]!; \
+ stp x12, x13, [sp, #-16]!; \
+ stp x14, x15, [sp, #-16]!; \
+ stp x16, x17, [sp, #-16]!; \
+ stp x18, x19, [sp, #-16]!; \
+ stp x20, x21, [sp, #-16]!; \
+ stp x22, x23, [sp, #-16]!; \
+ stp x24, x25, [sp, #-16]!; \
+ stp x26, x27, [sp, #-16]!; \
+ stp x28, x29, [sp, #-16]!; \
+ stp lr, fp, [sp, #-16]!; \
+ \
+ /* Push the system registers */ \
+ PUSH_SYS_REG_PAIR(SP_EL0, SP_EL1); \
+ PUSH_SYS_REG_PAIR(ACTLR_EL1, AMAIR_EL1); \
+ PUSH_SYS_REG_PAIR(ELR_EL1, PAR_EL1); \
+ PUSH_SYS_REG_PAIR(MAIR_EL1, TCR_EL1); \
+ PUSH_SYS_REG_PAIR(TPIDR_EL0, TPIDRRO_EL0); \
+ PUSH_SYS_REG_PAIR(TPIDR_EL1, TTBR0_EL1); \
+ PUSH_SYS_REG_PAIR(TTBR1_EL1, VBAR_EL1); \
+ PUSH_SYS_REG_PAIR(AFSR0_EL1, AFSR1_EL1); \
+ PUSH_SYS_REG_PAIR(CONTEXTIDR_EL1, CPACR_EL1); \
+ PUSH_SYS_REG_PAIR(ESR_EL1, FAR_EL1); \
+ PUSH_SYS_REG_PAIR(SCTLR_EL1, SPSR_EL1); \
+ PUSH_SYS_REG_PAIR(ELR_EL2, HCR_EL2); \
+ PUSH_SYS_REG_PAIR(VPIDR_EL2, VMPIDR_EL2); \
+ PUSH_SYS_REG_PAIR(CPTR_EL2, SPSR_EL2); \
+ PUSH_SYS_REG_PAIR(ICH_HCR_EL2, ICH_VMCR_EL2); \
+ PUSH_SYS_REG_PAIR(CNTHCTL_EL2, CNTKCTL_EL1); \
+ PUSH_SYS_REG(CNTVOFF_EL2);
+
+
+#define SAVE_HOST_VFP_REGS() \
+ stp q0, q1, [sp, #-16 * 2]!; \
+ stp q2, q3, [sp, #-16 * 2]!; \
+ stp q4, q5, [sp, #-16 * 2]!; \
+ stp q6, q7, [sp, #-16 * 2]!; \
+ stp q8, q9, [sp, #-16 * 2]!; \
+ stp q10, q11, [sp, #-16 * 2]!; \
+ stp q12, q13, [sp, #-16 * 2]!; \
+ stp q14, q15, [sp, #-16 * 2]!; \
+ stp q16, q17, [sp, #-16 * 2]!; \
+ stp q18, q19, [sp, #-16 * 2]!; \
+ stp q20, q21, [sp, #-16 * 2]!; \
+ stp q22, q23, [sp, #-16 * 2]!; \
+ stp q24, q25, [sp, #-16 * 2]!; \
+ stp q26, q27, [sp, #-16 * 2]!; \
+ stp q28, q29, [sp, #-16 * 2]!; \
+ stp q30, q31, [sp, #-16 * 2]!; \
+ PUSH_SYS_REG_PAIR(FPCR, FPSR);
+
+
+#define POP_SYS_REG_PAIR(reg0, reg1) \
+ ldp x2, x1, [sp], #16; \
+ msr reg1, x2; \
+ msr reg0, x1;
+
+
+#define LOAD_HOST_VFP_REGS() \
+ POP_SYS_REG_PAIR(FPCR, FPSR); \
+ ldp q30, q31, [sp], #16 * 2; \
+ ldp q28, q29, [sp], #16 * 2; \
+ ldp q26, q27, [sp], #16 * 2; \
+ ldp q24, q25, [sp], #16 * 2; \
+ ldp q22, q23, [sp], #16 * 2; \
+ ldp q20, q21, [sp], #16 * 2; \
+ ldp q18, q19, [sp], #16 * 2; \
+ ldp q16, q17, [sp], #16 * 2; \
+ ldp q14, q15, [sp], #16 * 2; \
+ ldp q12, q13, [sp], #16 * 2; \
+ ldp q10, q11, [sp], #16 * 2; \
+ ldp q8, q9, [sp], #16 * 2; \
+ ldp q6, q7, [sp], #16 * 2; \
+ ldp q4, q5, [sp], #16 * 2; \
+ ldp q2, q3, [sp], #16 * 2; \
+ ldp q0, q1, [sp], #16 * 2; \
+
+
+#define POP_SYS_REG(reg) \
+ ldr x1, [sp], #16; \
+ msr reg, x1;
+
+
+/*
+ * Restore all the host registers before entering the host.
+ */
+#define LOAD_HOST_REGS() \
+ /* Pop the system registers first */ \
+ POP_SYS_REG(CNTVOFF_EL2); \
+ POP_SYS_REG_PAIR(CNTHCTL_EL2, CNTKCTL_EL1); \
+ POP_SYS_REG_PAIR(ICH_HCR_EL2, ICH_VMCR_EL2); \
+ POP_SYS_REG_PAIR(CPTR_EL2, SPSR_EL2); \
+ POP_SYS_REG_PAIR(VPIDR_EL2, VMPIDR_EL2); \
+ POP_SYS_REG_PAIR(ELR_EL2, HCR_EL2); \
+ POP_SYS_REG_PAIR(SCTLR_EL1, SPSR_EL1); \
+ POP_SYS_REG_PAIR(ESR_EL1, FAR_EL1); \
+ POP_SYS_REG_PAIR(CONTEXTIDR_EL1, CPACR_EL1); \
+ POP_SYS_REG_PAIR(AFSR0_EL1, AFSR1_EL1); \
+ POP_SYS_REG_PAIR(TTBR1_EL1, VBAR_EL1); \
+ POP_SYS_REG_PAIR(TPIDR_EL1, TTBR0_EL1); \
+ POP_SYS_REG_PAIR(TPIDR_EL0, TPIDRRO_EL0); \
+ POP_SYS_REG_PAIR(MAIR_EL1, TCR_EL1); \
+ POP_SYS_REG_PAIR(ELR_EL1, PAR_EL1); \
+ POP_SYS_REG_PAIR(ACTLR_EL1, AMAIR_EL1); \
+ POP_SYS_REG_PAIR(SP_EL0, SP_EL1); \
+ \
+ /* Pop the regular registers */ \
+ ldp lr, fp, [sp], #16; \
+ ldp x28, x29, [sp], #16; \
+ ldp x26, x27, [sp], #16; \
+ ldp x24, x25, [sp], #16; \
+ ldp x22, x23, [sp], #16; \
+ ldp x20, x21, [sp], #16; \
+ ldp x18, x19, [sp], #16; \
+ ldp x16, x17, [sp], #16; \
+ ldp x14, x15, [sp], #16; \
+ ldp x12, x13, [sp], #16; \
+ ldp x10, x11, [sp], #16; \
+ ldp x8, x9, [sp], #16; \
+ ldp x6, x7, [sp], #16; \
+ ldp x4, x5, [sp], #16; \
+ ldp x2, x3, [sp], #16; \
+ ldp x0, x1, [sp], #16; \
+
+
+#define SAVE_ARRAY_REG64(reg, dest, remaining) \
+ cmp remaining, #0; \
+ beq 9f; \
+ mrs x7, reg; \
+ str x7, [dest]; \
+ add dest, dest, #8; \
+ sub remaining, remaining, #1;
+
+
+#define SAVE_LR_REGS() \
+ /* Load the number of ICH_LR_EL2 regs from memory */ \
+ mov x2, #HYPCTX_VGIC_ICH_LR_NUM; \
+ ldr x3, [x0, x2]; \
+ /* x1 holds the destination address */ \
+ mov x1, #HYPCTX_VGIC_ICH_LR_EL2; \
+ add x1, x0, x1; \
+ SAVE_ARRAY_REG64(ich_lr0_el2, x1, x3); \
+ SAVE_ARRAY_REG64(ich_lr1_el2, x1, x3); \
+ SAVE_ARRAY_REG64(ich_lr2_el2, x1, x3); \
+ SAVE_ARRAY_REG64(ich_lr3_el2, x1, x3); \
+ SAVE_ARRAY_REG64(ich_lr4_el2, x1, x3); \
+ SAVE_ARRAY_REG64(ich_lr5_el2, x1, x3); \
+ SAVE_ARRAY_REG64(ich_lr6_el2, x1, x3); \
+ SAVE_ARRAY_REG64(ich_lr7_el2, x1, x3); \
+ SAVE_ARRAY_REG64(ich_lr8_el2, x1, x3); \
+ SAVE_ARRAY_REG64(ich_lr9_el2, x1, x3); \
+ SAVE_ARRAY_REG64(ich_lr10_el2, x1, x3); \
+ SAVE_ARRAY_REG64(ich_lr11_el2, x1, x3); \
+ SAVE_ARRAY_REG64(ich_lr12_el2, x1, x3); \
+ SAVE_ARRAY_REG64(ich_lr13_el2, x1, x3); \
+ SAVE_ARRAY_REG64(ich_lr14_el2, x1, x3); \
+ SAVE_ARRAY_REG64(ich_lr15_el2, x1, x3); \
+9:; \
+ ;
+
+
+#define SAVE_ARRAY_REG32(reg, dest, remaining) \
+ cmp remaining, #0; \
+ beq 9f; \
+ mrs x7, reg; \
+ str w7, [dest]; \
+ add dest, dest, #4; \
+ sub remaining, remaining, #1;
+
+
+#define SAVE_AP0R_REGS() \
+ /* Load the number of ICH_AP0R_EL2 regs from memory */ \
+ mov x2, #HYPCTX_VGIC_ICH_AP0R_NUM; \
+ ldr x3, [x0, x2]; \
+ /* x1 holds the destination address */ \
+ mov x1, #HYPCTX_VGIC_ICH_AP0R_EL2; \
+ add x1, x0, x1; \
+ SAVE_ARRAY_REG32(ich_ap0r0_el2, x1, x3); \
+ SAVE_ARRAY_REG32(ich_ap0r1_el2, x1, x3); \
+ SAVE_ARRAY_REG32(ich_ap0r2_el2, x1, x3); \
+ SAVE_ARRAY_REG32(ich_ap0r3_el2, x1, x3); \
+9:; \
+ ;
+
+
+#define SAVE_AP1R_REGS() \
+ /* Load the number of ICH_AP1R_EL2 regs from memory */ \
+ mov x2, #HYPCTX_VGIC_ICH_AP1R_NUM; \
+ ldr x3, [x0, x2]; \
+ /* x1 holds the destination address */ \
+ mov x1, #HYPCTX_VGIC_ICH_AP1R_EL2; \
+ add x1, x0, x1; \
+ SAVE_ARRAY_REG32(ich_ap1r0_el2, x1, x3); \
+ SAVE_ARRAY_REG32(ich_ap1r1_el2, x1, x3); \
+ SAVE_ARRAY_REG32(ich_ap1r2_el2, x1, x3); \
+ SAVE_ARRAY_REG32(ich_ap1r3_el2, x1, x3); \
+9:; \
+ ;
+
+
+/*
+ * The STR and LDR instructions take an offset between [-256, 255], but the
+ * hypctx register offset can be larger than that. To get around this limitation
+ * we use a temporary register to hold the offset.
+ */
+#define SAVE_SYS_REG64(prefix, reg) \
+ mrs x1, reg; \
+ mov x2, prefix ##_ ##reg; \
+ str x1, [x0, x2];
+
+
+#define SAVE_SYS_REG32(prefix, reg) \
+ mrs x1, reg; \
+ mov x2, prefix ##_ ##reg; \
+ str w1, [x0, x2];
+
+
+#define SAVE_REG(prefix, reg) \
+ mov x1, prefix ##_ ##reg; \
+ str reg, [x0, x1];
+
+/*
+ * The STP and LDP instructions takes an immediate in the range of [-512, 504]
+ * when using the post-indexed addressing mode, but the hypctx register offset
+ * can be larger than that. To get around this limitation we compute the address
+ * by adding the hypctx base address with the struct member offset.
+ *
+ * Using STP/LDP to save/load register pairs to the corresponding struct hypctx
+ * variables works because the registers are declared as an array and they are
+ * stored in contiguous memory addresses.
+ */
+
+#define SAVE_REG_PAIR(prefix, reg0, reg1) \
+ mov x1, prefix ##_ ##reg0; \
+ add x1, x0, x1; \
+ stp reg0, reg1, [x1];
+
+
+/*
+ * We use x0 to load the hypctx address from TPIDR_EL2 and x1 and x2 as
+ * temporary registers to compute the hypctx member addresses. To save the guest
+ * values at first we push them on the stack, use these temporary registers to
+ * save the rest of the registers and at the end we pop the values from the
+ * stack and save them.
+ */
+#define SAVE_GUEST_X_REGS() \
+ /* Push x0 */ \
+ str x0, [sp, #-16]!; \
+ /* Restore hypctx address */ \
+ mrs x0, tpidr_el2; \
+ /* Push x1 and x2 */ \
+ stp x1, x2, [sp, #-16]!; \
+ \
+ /* Save the other registers */ \
+ SAVE_REG_PAIR(HYPCTX_REGS, X3, X4); \
+ SAVE_REG_PAIR(HYPCTX_REGS, X5, X6); \
+ SAVE_REG_PAIR(HYPCTX_REGS, X7, X8); \
+ SAVE_REG_PAIR(HYPCTX_REGS, X9, X10); \
+ SAVE_REG_PAIR(HYPCTX_REGS, X11, X12); \
+ SAVE_REG_PAIR(HYPCTX_REGS, X13, X14); \
+ SAVE_REG_PAIR(HYPCTX_REGS, X15, X16); \
+ SAVE_REG_PAIR(HYPCTX_REGS, X17, X18); \
+ SAVE_REG_PAIR(HYPCTX_REGS, X19, X20); \
+ SAVE_REG_PAIR(HYPCTX_REGS, X21, X22); \
+ SAVE_REG_PAIR(HYPCTX_REGS, X23, X24); \
+ SAVE_REG_PAIR(HYPCTX_REGS, X25, X26); \
+ SAVE_REG_PAIR(HYPCTX_REGS, X27, X28); \
+ SAVE_REG(HYPCTX_REGS, X29); \
+ SAVE_REG(HYPCTX_REGS, LR); \
+ \
+ /* Pop and save x1 and x2 */ \
+ ldp x1, x2, [sp], #16; \
+ mov x3, #HYPCTX_REGS_X1; \
+ add x3, x0, x3; \
+ stp x1, x2, [x3]; \
+ /* Pop and save x0 */ \
+ ldr x1, [sp], #16; \
+ mov x2, #HYPCTX_REGS_X0; \
+ add x2, x2, x0; \
+ str x1, [x2];
+
+
+/*
+ * Save all the guest registers. Start by saving the regular registers first
+ * because those will be used as temporary registers for accessing the hypctx
+ * member addresses.
+ *
+ * Expecting:
+ * TPIDR_EL2 - struct hypctx address
+ *
+ * After call:
+ * x0 - struct hypctx address
+ */
+#define SAVE_GUEST_REGS() \
+ SAVE_GUEST_X_REGS(); \
+ \
+ SAVE_REG(HYPCTX, FP); \
+ \
+ SAVE_SYS_REG32(HYPCTX_VTIMER_CPU, CNTKCTL_EL1); \
+ SAVE_SYS_REG64(HYPCTX_VTIMER_CPU, CNTV_CVAL_EL0); \
+ SAVE_SYS_REG32(HYPCTX_VTIMER_CPU, CNTV_CTL_EL0);\
+ \
+ /* \
+ * ICH_EISR_EL2, ICH_ELRSR_EL2 and ICH_MISR_EL2 are read-only and are \
+ * saved because they are modified by the hardware as part of the \
+ * interrupt virtualization process and we need to inspect them in \
+ * the VGIC driver. \
+ */ \
+ SAVE_SYS_REG32(HYPCTX_VGIC, ICH_EISR_EL2); \
+ SAVE_SYS_REG32(HYPCTX_VGIC, ICH_ELRSR_EL2); \
+ SAVE_SYS_REG32(HYPCTX_VGIC, ICH_MISR_EL2); \
+ SAVE_SYS_REG32(HYPCTX_VGIC, ICH_HCR_EL2); \
+ SAVE_SYS_REG32(HYPCTX_VGIC, ICH_VMCR_EL2); \
+ \
+ SAVE_LR_REGS(); \
+ SAVE_AP0R_REGS(); \
+ SAVE_AP1R_REGS(); \
+ \
+ /* Save the stack pointer. */ \
+ mrs x1, sp_el1; \
+ mov x2, #HYPCTX_REGS_SP; \
+ str x1, [x0, x2]; \
+ \
+ SAVE_SYS_REG64(HYPCTX, ACTLR_EL1); \
+ SAVE_SYS_REG64(HYPCTX, AFSR0_EL1); \
+ SAVE_SYS_REG64(HYPCTX, AFSR1_EL1); \
+ SAVE_SYS_REG64(HYPCTX, AMAIR_EL1); \
+ SAVE_SYS_REG64(HYPCTX, CONTEXTIDR_EL1); \
+ SAVE_SYS_REG64(HYPCTX, CPACR_EL1); \
+ SAVE_SYS_REG64(HYPCTX, ELR_EL1); \
+ SAVE_SYS_REG64(HYPCTX, ESR_EL1); \
+ SAVE_SYS_REG64(HYPCTX, FAR_EL1); \
+ SAVE_SYS_REG64(HYPCTX, MAIR_EL1); \
+ SAVE_SYS_REG64(HYPCTX, PAR_EL1); \
+ SAVE_SYS_REG64(HYPCTX, SCTLR_EL1); \
+ SAVE_SYS_REG64(HYPCTX, SP_EL0); \
+ SAVE_SYS_REG64(HYPCTX, TCR_EL1); \
+ SAVE_SYS_REG64(HYPCTX, TPIDR_EL0); \
+ SAVE_SYS_REG64(HYPCTX, TPIDRRO_EL0); \
+ SAVE_SYS_REG64(HYPCTX, TPIDR_EL1); \
+ SAVE_SYS_REG64(HYPCTX, TTBR0_EL1); \
+ SAVE_SYS_REG64(HYPCTX, TTBR1_EL1); \
+ SAVE_SYS_REG64(HYPCTX, VBAR_EL1); \
+ \
+ SAVE_SYS_REG32(HYPCTX, SPSR_EL1); \
+ \
+ SAVE_SYS_REG64(HYPCTX, CPTR_EL2); \
+ SAVE_SYS_REG64(HYPCTX, ELR_EL2); \
+ SAVE_SYS_REG64(HYPCTX, HCR_EL2); \
+ SAVE_SYS_REG64(HYPCTX, VPIDR_EL2); \
+ SAVE_SYS_REG64(HYPCTX, VMPIDR_EL2); \
+ SAVE_SYS_REG32(HYPCTX, SPSR_EL2);
+
+
+#define SAVE_GUEST_VFP_REGS() \
+ SAVE_REG_PAIR(HYPCTX_VFPSTATE, Q0, Q1); \
+ SAVE_REG_PAIR(HYPCTX_VFPSTATE, Q2, Q3); \
+ SAVE_REG_PAIR(HYPCTX_VFPSTATE, Q4, Q5); \
+ SAVE_REG_PAIR(HYPCTX_VFPSTATE, Q6, Q7); \
+ SAVE_REG_PAIR(HYPCTX_VFPSTATE, Q8, Q9); \
+ SAVE_REG_PAIR(HYPCTX_VFPSTATE, Q10, Q11); \
+ SAVE_REG_PAIR(HYPCTX_VFPSTATE, Q12, Q13); \
+ SAVE_REG_PAIR(HYPCTX_VFPSTATE, Q14, Q15); \
+ SAVE_REG_PAIR(HYPCTX_VFPSTATE, Q16, Q17); \
+ SAVE_REG_PAIR(HYPCTX_VFPSTATE, Q18, Q19); \
+ SAVE_REG_PAIR(HYPCTX_VFPSTATE, Q20, Q21); \
+ SAVE_REG_PAIR(HYPCTX_VFPSTATE, Q22, Q23); \
+ SAVE_REG_PAIR(HYPCTX_VFPSTATE, Q24, Q25); \
+ SAVE_REG_PAIR(HYPCTX_VFPSTATE, Q26, Q27); \
+ SAVE_REG_PAIR(HYPCTX_VFPSTATE, Q28, Q29); \
+ SAVE_REG_PAIR(HYPCTX_VFPSTATE, Q30, Q31); \
+ \
+ SAVE_SYS_REG32(HYPCTX_VFPSTATE, FPCR); \
+ SAVE_SYS_REG32(HYPCTX_VFPSTATE, FPSR);
+
+
+/* See SAVE_SYS_REG */
+#define LOAD_SYS_REG64(prefix, reg) \
+ mov x1, prefix ##_ ##reg; \
+ ldr x2, [x0, x1]; \
+ msr reg, x2;
+
+
+#define LOAD_SYS_REG32(prefix, reg) \
+ mov x1, prefix ##_ ##reg; \
+ ldr w2, [x0, x1]; \
+ msr reg, x2;
+
+
+/* See SAVE_REG_PAIR */
+#define LOAD_REG_PAIR(prefix, reg0, reg1) \
+ mov x1, prefix ##_ ##reg0; \
+ add x1, x0, x1; \
+ ldp reg0, reg1, [x1];
+
+
+#define LOAD_GUEST_VFP_REGS() \
+ LOAD_REG_PAIR(HYPCTX_VFPSTATE, Q0, Q1); \
+ LOAD_REG_PAIR(HYPCTX_VFPSTATE, Q2, Q3); \
+ LOAD_REG_PAIR(HYPCTX_VFPSTATE, Q4, Q5); \
+ LOAD_REG_PAIR(HYPCTX_VFPSTATE, Q6, Q7); \
+ LOAD_REG_PAIR(HYPCTX_VFPSTATE, Q8, Q9); \
+ LOAD_REG_PAIR(HYPCTX_VFPSTATE, Q10, Q11); \
+ LOAD_REG_PAIR(HYPCTX_VFPSTATE, Q12, Q13); \
+ LOAD_REG_PAIR(HYPCTX_VFPSTATE, Q14, Q15); \
+ LOAD_REG_PAIR(HYPCTX_VFPSTATE, Q16, Q17); \
+ LOAD_REG_PAIR(HYPCTX_VFPSTATE, Q18, Q19); \
+ LOAD_REG_PAIR(HYPCTX_VFPSTATE, Q20, Q21); \
+ LOAD_REG_PAIR(HYPCTX_VFPSTATE, Q22, Q23); \
+ LOAD_REG_PAIR(HYPCTX_VFPSTATE, Q24, Q25); \
+ LOAD_REG_PAIR(HYPCTX_VFPSTATE, Q26, Q27); \
+ LOAD_REG_PAIR(HYPCTX_VFPSTATE, Q28, Q29); \
+ LOAD_REG_PAIR(HYPCTX_VFPSTATE, Q30, Q31); \
+ \
+ LOAD_SYS_REG32(HYPCTX_VFPSTATE, FPCR); \
+ LOAD_SYS_REG32(HYPCTX_VFPSTATE, FPSR);
+
+
+#define LOAD_REG(prefix, reg) \
+ mov x1, prefix ##_ ##reg; \
+ ldr reg, [x0, x1];
+
+
+/*
+ * We use x1 as a temporary register to store the hypctx member offset and x0
+ * to hold the hypctx address. We load the guest x0 and x1 register values in
+ * registers x2 and x3, push x2 and x3 on the stack and then we restore x0 and
+ * x1.
+ */
+#define LOAD_GUEST_X_REGS() \
+ mov x1, #HYPCTX_REGS_X0; \
+ /* x1 now holds the address of hypctx reg x0 */ \
+ add x1, x1, x0; \
+ /* Make x2 = guest x0 and x3 = guest x1 */ \
+ ldp x2, x3, [x1]; \
+ stp x2, x3, [sp, #-16]!; \
+ \
+ /* Load the other registers */ \
+ LOAD_REG_PAIR(HYPCTX_REGS, X2, X3); \
+ LOAD_REG_PAIR(HYPCTX_REGS, X4, X5); \
+ LOAD_REG_PAIR(HYPCTX_REGS, X6, X7); \
+ LOAD_REG_PAIR(HYPCTX_REGS, X8, X9); \
+ LOAD_REG_PAIR(HYPCTX_REGS, X10, X11); \
+ LOAD_REG_PAIR(HYPCTX_REGS, X12, X13); \
+ LOAD_REG_PAIR(HYPCTX_REGS, X14, X15); \
+ LOAD_REG_PAIR(HYPCTX_REGS, X16, X17); \
+ LOAD_REG_PAIR(HYPCTX_REGS, X18, X19); \
+ LOAD_REG_PAIR(HYPCTX_REGS, X20, X21); \
+ LOAD_REG_PAIR(HYPCTX_REGS, X22, X23); \
+ LOAD_REG_PAIR(HYPCTX_REGS, X24, X25); \
+ LOAD_REG_PAIR(HYPCTX_REGS, X26, X27); \
+ LOAD_REG_PAIR(HYPCTX_REGS, X28, X29); \
+ LOAD_REG(HYPCTX_REGS, LR); \
+ \
+ /* Pop guest x0 and x1 from the stack */ \
+ ldp x0, x1, [sp], #16; \
+
+
+#define LOAD_ARRAY_REG64(reg, src, remaining) \
+ cmp remaining, #0; \
+ beq 9f; \
+ ldr x2, [src]; \
+ msr reg, x2; \
+ add src, src, #8; \
+ sub remaining, remaining, #1;
+
+
+#define LOAD_LR_REGS(); \
+ /* Load the number of ICH_LR_EL2 regs from memory */ \
+ mov x2, #HYPCTX_VGIC_ICH_LR_NUM; \
+ ldr x3, [x0, x2]; \
+ mov x1, #HYPCTX_VGIC_ICH_LR_EL2; \
+ /* x1 holds the load address */ \
+ add x1, x0, x1; \
+ LOAD_ARRAY_REG64(ich_lr0_el2, x1, x3); \
+ LOAD_ARRAY_REG64(ich_lr1_el2, x1, x3); \
+ LOAD_ARRAY_REG64(ich_lr2_el2, x1, x3); \
+ LOAD_ARRAY_REG64(ich_lr3_el2, x1, x3); \
+ LOAD_ARRAY_REG64(ich_lr4_el2, x1, x3); \
+ LOAD_ARRAY_REG64(ich_lr5_el2, x1, x3); \
+ LOAD_ARRAY_REG64(ich_lr6_el2, x1, x3); \
+ LOAD_ARRAY_REG64(ich_lr7_el2, x1, x3); \
+ LOAD_ARRAY_REG64(ich_lr8_el2, x1, x3); \
+ LOAD_ARRAY_REG64(ich_lr9_el2, x1, x3); \
+ LOAD_ARRAY_REG64(ich_lr10_el2, x1, x3); \
+ LOAD_ARRAY_REG64(ich_lr11_el2, x1, x3); \
+ LOAD_ARRAY_REG64(ich_lr12_el2, x1, x3); \
+ LOAD_ARRAY_REG64(ich_lr13_el2, x1, x3); \
+ LOAD_ARRAY_REG64(ich_lr14_el2, x1, x3); \
+ LOAD_ARRAY_REG64(ich_lr15_el2, x1, x3); \
+9:; \
+ ;
+
+
+#define LOAD_ARRAY_REG32(reg, src, remaining) \
+ cmp remaining, #0; \
+ beq 9f; \
+ ldr w2, [src]; \
+ msr reg, x2; \
+ add src, src, #4; \
+ sub remaining, remaining, #1;
+
+
+#define LOAD_AP0R_REGS(); \
+ /* Load the number of ICH_AP0R_EL2 regs from memory */ \
+ mov x2, #HYPCTX_VGIC_ICH_AP0R_NUM; \
+ ldr x3, [x0, x2]; \
+ /* x1 holds the load address */ \
+ mov x1, #HYPCTX_VGIC_ICH_AP0R_EL2; \
+ add x1, x0, x1; \
+ LOAD_ARRAY_REG32(ich_ap0r0_el2, x1, x3); \
+ LOAD_ARRAY_REG32(ich_ap0r1_el2, x1, x3); \
+ LOAD_ARRAY_REG32(ich_ap0r2_el2, x1, x3); \
+ LOAD_ARRAY_REG32(ich_ap0r3_el2, x1, x3); \
+9:; \
+ ;
+
+
+#define LOAD_AP1R_REGS(); \
+ /* Load the number of ICH_AP1R_EL2 regs from memory */ \
+ mov x2, #HYPCTX_VGIC_ICH_AP1R_NUM; \
+ ldr x3, [x0, x2]; \
+ /* x1 holds the load address */ \
+ mov x1, #HYPCTX_VGIC_ICH_AP1R_EL2; \
+ add x1, x0, x1; \
+ LOAD_ARRAY_REG32(ich_ap1r0_el2, x1, x3); \
+ LOAD_ARRAY_REG32(ich_ap1r1_el2, x1, x3); \
+ LOAD_ARRAY_REG32(ich_ap1r2_el2, x1, x3); \
+ LOAD_ARRAY_REG32(ich_ap1r3_el2, x1, x3); \
+9:; \
+ ;
+
+
+
+#define KTOHYP_REG(reg) \
+ mov x7, HYP_KVA_MASK; \
+ and reg, reg, x7; \
+ mov x7, HYP_KVA_OFFSET; \
+ orr reg, reg, x7;
+
+
+/* Load a register from struct hyp *hyp member of hypctx. */
+#define LOAD_HYP_REG(prefix, reg) \
+ /* Compute VA of hyp member in x1 */ \
+ mov x1, #HYPCTX_HYP; \
+ add x1, x1, x0; \
+ /* Get hyp address in x2 */ \
+ ldr x2, [x1]; \
+ /* Transform hyp kernel VA into an EL2 VA */ \
+ KTOHYP_REG(x2); \
+ /* Get register offset inside struct hyp */ \
+ mov x1, prefix ##_ ##reg; \
+ /* Compute regster address */ \
+ add x2, x2, x1; \
+ /* Load the register */ \
+ ldr x1, [x2]; \
+ msr reg, x1; \
+
+
+/*
+ * Restore all the guest registers to their original values.
+ *
+ * Expecting:
+ * x0 - struct hypctx address
+ *
+ * After call:
+ * tpidr_el2 - struct hypctx address
+ */
+#define LOAD_GUEST_REGS() \
+ LOAD_SYS_REG64(HYPCTX, ACTLR_EL1); \
+ LOAD_SYS_REG64(HYPCTX, AFSR0_EL1); \
+ LOAD_SYS_REG64(HYPCTX, AFSR1_EL1); \
+ LOAD_SYS_REG64(HYPCTX, AMAIR_EL1); \
+ LOAD_SYS_REG64(HYPCTX, CONTEXTIDR_EL1); \
+ LOAD_SYS_REG64(HYPCTX, CPACR_EL1); \
+ LOAD_SYS_REG64(HYPCTX, ELR_EL1); \
+ LOAD_SYS_REG64(HYPCTX, ESR_EL1); \
+ LOAD_SYS_REG64(HYPCTX, FAR_EL1); \
+ LOAD_SYS_REG64(HYPCTX, MAIR_EL1); \
+ LOAD_SYS_REG64(HYPCTX, PAR_EL1); \
+ LOAD_SYS_REG64(HYPCTX, SCTLR_EL1); \
+ LOAD_SYS_REG64(HYPCTX, SP_EL0); \
+ LOAD_SYS_REG64(HYPCTX, TCR_EL1); \
+ LOAD_SYS_REG64(HYPCTX, TPIDR_EL0); \
+ LOAD_SYS_REG64(HYPCTX, TPIDRRO_EL0); \
+ LOAD_SYS_REG64(HYPCTX, TPIDR_EL1); \
+ LOAD_SYS_REG64(HYPCTX, TTBR0_EL1); \
+ LOAD_SYS_REG64(HYPCTX, TTBR1_EL1); \
+ LOAD_SYS_REG64(HYPCTX, VBAR_EL1); \
+ LOAD_SYS_REG32(HYPCTX, SPSR_EL1); \
+ \
+ LOAD_SYS_REG64(HYPCTX, CPTR_EL2); \
+ LOAD_SYS_REG64(HYPCTX, ELR_EL2); \
+ LOAD_SYS_REG64(HYPCTX, HCR_EL2); \
+ LOAD_SYS_REG64(HYPCTX, VPIDR_EL2); \
+ LOAD_SYS_REG64(HYPCTX, VMPIDR_EL2); \
+ LOAD_SYS_REG32(HYPCTX, SPSR_EL2); \
+ \
+ LOAD_SYS_REG32(HYPCTX_VGIC, ICH_HCR_EL2); \
+ LOAD_SYS_REG32(HYPCTX_VGIC, ICH_VMCR_EL2); \
+ \
+ LOAD_SYS_REG32(HYPCTX_VTIMER_CPU, CNTKCTL_EL1); \
+ LOAD_SYS_REG64(HYPCTX_VTIMER_CPU, CNTV_CVAL_EL0); \
+ LOAD_SYS_REG32(HYPCTX_VTIMER_CPU, CNTV_CTL_EL0); \
+ \
+ LOAD_REG(HYPCTX, FP); \
+ \
+ LOAD_HYP_REG(HYP, VTTBR_EL2); \
+ LOAD_HYP_REG(HYP_VTIMER, CNTHCTL_EL2); \
+ LOAD_HYP_REG(HYP_VTIMER, CNTVOFF_EL2); \
+ \
+ LOAD_LR_REGS(); \
+ LOAD_AP0R_REGS(); \
+ LOAD_AP1R_REGS(); \
+ \
+ /* Load the guest EL1 stack pointer */ \
+ mov x1, #HYPCTX_REGS_SP; \
+ add x1, x1, x0; \
+ ldr x2, [x1]; \
+ msr sp_el1, x2; \
+ \
+ LOAD_GUEST_X_REGS(); \
+
+
+/*
+ * Save exit information
+ *
+ * Expecting:
+ * x0 - struct hypctx address
+ */
+#define SAVE_EXIT_INFO() \
+ SAVE_SYS_REG64(HYPCTX_EXIT_INFO, ESR_EL2); \
+ SAVE_SYS_REG64(HYPCTX_EXIT_INFO, FAR_EL2); \
+ SAVE_SYS_REG64(HYPCTX_EXIT_INFO, HPFAR_EL2); \
+
+#endif /* !_VMM_HYP_MACROS_H_ */
Index: sys/arm64/vmm/io/vgic_v3.h
===================================================================
--- /dev/null
+++ sys/arm64/vmm/io/vgic_v3.h
@@ -0,0 +1,166 @@
+/*
+ * Copyright (C) 2015 Mihai Carabas <mihai.carabas@gmail.com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef _VMM_VGIC_V3_H_
+#define _VMM_VGIC_V3_H_
+
+#include <sys/cdefs.h>
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/module.h>
+#include <sys/bus.h>
+
+#include <machine/vmm_instruction_emul.h>
+
+#include <arm64/arm64/gic_v3_reg.h>
+#include <arm/arm/gic_common.h>
+
+#define VGIC_SGI_NUM (GIC_LAST_SGI - GIC_FIRST_SGI + 1)
+#define VGIC_PPI_NUM (GIC_LAST_PPI - GIC_FIRST_PPI + 1)
+#define VGIC_SPI_NUM (GIC_LAST_SPI - GIC_FIRST_SPI + 1)
+#define VGIC_PRV_I_NUM (VGIC_SGI_NUM + VGIC_PPI_NUM)
+#define VGIC_SHR_I_NUM (VGIC_SPI_NUM)
+
+#define VGIC_ICH_LR_NUM_MAX 16
+#define VGIC_ICH_AP0R_NUM_MAX 4
+#define VGIC_ICH_AP1R_NUM_MAX VGIC_ICH_AP0R_NUM_MAX
+
+/* Order matters, a lower value means a higher precedence */
+enum vgic_v3_irqtype {
+ VGIC_IRQ_MAXPRIO,
+ VGIC_IRQ_CLK,
+ VGIC_IRQ_VIRTIO,
+ VGIC_IRQ_MISC,
+ VGIC_IRQ_INVALID,
+};
+
+struct vgic_mmio_region {
+ vm_offset_t start;
+ vm_offset_t end;
+ mem_region_read_t read;
+ mem_region_write_t write;
+};
+
+struct vm;
+struct vm_exit;
+struct hyp;
+
+struct vgic_v3_dist {
+ struct mtx dist_mtx;
+
+ uint64_t start;
+ size_t end;
+ size_t nirqs;
+
+ uint32_t gicd_ctlr; /* Distributor Control Register */
+ uint32_t gicd_typer; /* Interrupt Controller Type Register */
+ uint32_t gicd_pidr2; /* Distributor Peripheral ID2 Register */
+ /* Interrupt Configuration Registers. */
+ uint32_t *gicd_icfgr;
+ /* Interrupt Priority Registers. */
+ uint32_t *gicd_ipriorityr;
+ /* Interrupt Routing Registers. */
+ uint64_t *gicd_irouter;
+ /* Interrupt Clear-Enable and Set-Enable Registers. */
+ uint32_t *gicd_ixenabler;
+};
+
+#define aff_routing_en(distp) (distp->gicd_ctlr & GICD_CTLR_ARE_NS)
+
+struct vgic_v3_redist {
+ uint64_t start;
+ uint64_t end;
+
+ uint64_t gicr_typer; /* Redistributor Type Register */
+ uint32_t gicr_ctlr; /* Redistributor Control Regiser */
+ uint32_t gicr_ixenabler0;
+ /* Interrupt Priority Registers. */
+ uint32_t gicr_ipriorityr[VGIC_PRV_I_NUM / 4];
+ /* Interupt Configuration Registers */
+ uint32_t gicr_icfgr0, gicr_icfgr1;
+};
+
+struct vgic_v3_irq;
+struct vgic_v3_cpu_if {
+ uint32_t ich_eisr_el2; /* End of Interrupt Status Register */
+ uint32_t ich_elrsr_el2; /* Empty List register Status Register (ICH_ELRSR_EL2) */
+ uint32_t ich_hcr_el2; /* Hyp Control Register */
+ uint32_t ich_misr_el2; /* Maintenance Interrupt State Register */
+ uint32_t ich_vmcr_el2; /* Virtual Machine Control Register */
+
+ /*
+ * The List Registers are part of the VM context and are modified on a
+ * world switch. They need to be allocated statically so they are
+ * mapped in the EL2 translation tables when struct hypctx is mapped.
+ */
+ uint64_t ich_lr_el2[VGIC_ICH_LR_NUM_MAX];
+ size_t ich_lr_num;
+
+ /*
+ * We need a mutex for accessing the list registers because they are
+ * modified asynchronously by the virtual timer.
+ *
+ * Note that the mutex *MUST* be a spin mutex because an interrupt can
+ * be injected by a callout callback function, thereby modifying the
+ * list registers from a context where sleeping is forbidden.
+ */
+ struct mtx lr_mtx;
+
+ /* Active Priorities Registers for Group 0 and 1 interrupts */
+ uint32_t ich_ap0r_el2[VGIC_ICH_AP0R_NUM_MAX];
+ size_t ich_ap0r_num;
+ uint32_t ich_ap1r_el2[VGIC_ICH_AP1R_NUM_MAX];
+ size_t ich_ap1r_num;
+
+ struct vgic_v3_irq *irqbuf;
+ size_t irqbuf_size;
+ size_t irqbuf_num;
+};
+
+int vgic_v3_attach_to_vm(void *arg, uint64_t dist_start, size_t dist_size,
+ uint64_t redist_start, size_t redist_size);
+void vgic_v3_detach_from_vm(void *arg);
+void vgic_v3_init(uint64_t ich_vtr_el2);
+void vgic_v3_vminit(void *arg);
+void vgic_v3_cpuinit(void *arg, bool last_vcpu);
+void vgic_v3_sync_hwstate(void *arg);
+
+void vgic_v3_mmio_init(struct hyp *hyp);
+void vgic_v3_mmio_destroy(struct hyp *hyp);
+
+int vgic_v3_vcpu_pending_irq(void *arg);
+int vgic_v3_inject_irq(void *arg, uint32_t irq,
+ enum vgic_v3_irqtype irqtype);
+int vgic_v3_remove_irq(void *arg, uint32_t irq, bool ignore_state);
+
+void vgic_v3_group_toggle_enabled(bool enabled, struct hyp *hyp);
+int vgic_v3_irq_toggle_enabled(uint32_t irq, bool enabled,
+ struct hyp *hyp, int vcpuid);
+
+DECLARE_CLASS(arm_vgic_driver);
+
+#endif /* !_VMM_VGIC_V3_H_ */
Index: sys/arm64/vmm/io/vgic_v3.c
===================================================================
--- /dev/null
+++ sys/arm64/vmm/io/vgic_v3.c
@@ -0,0 +1,983 @@
+/*
+ * Copyright (C) 2018 Alexandru Elisei <alexandru.elisei@gmail.com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+
+#include <sys/types.h>
+#include <sys/errno.h>
+#include <sys/systm.h>
+#include <sys/bus.h>
+#include <sys/rman.h>
+#include <sys/malloc.h>
+#include <sys/mutex.h>
+#include <sys/smp.h>
+#include <sys/bitstring.h>
+
+#include <vm/vm.h>
+#include <vm/pmap.h>
+
+#include <dev/ofw/openfirm.h>
+
+#include <machine/bus.h>
+#include <machine/bitops.h>
+#include <machine/cpufunc.h>
+#include <machine/cpu.h>
+#include <machine/param.h>
+#include <machine/pmap.h>
+#include <machine/vmparam.h>
+#include <machine/intr.h>
+#include <machine/vmm.h>
+#include <machine/vmm_instruction_emul.h>
+
+#include <arm/arm/gic_common.h>
+#include <arm/arm/generic_timer.h>
+#include <arm64/arm64/gic_v3_reg.h>
+#include <arm64/arm64/gic_v3_var.h>
+
+#include <arm64/vmm/hyp.h>
+#include <arm64/vmm/mmu.h>
+#include <arm64/vmm/arm64.h>
+
+#include "vgic_v3.h"
+#include "vgic_v3_reg.h"
+
+#define VGIC_V3_DEVNAME "vgic"
+#define VGIC_V3_DEVSTR "ARM Virtual Generic Interrupt Controller v3"
+
+#define RES0 0UL
+
+#define IRQBUF_SIZE_MIN 32
+#define IRQBUF_SIZE_MAX (1 << 10)
+
+#define IRQ_SCHEDULED (GIC_LAST_SPI + 1)
+
+#define lr_pending(lr) \
+ (ICH_LR_EL2_STATE(lr) == ICH_LR_EL2_STATE_PENDING)
+#define lr_inactive(lr) \
+ (ICH_LR_EL2_STATE(lr) == ICH_LR_EL2_STATE_INACTIVE)
+#define lr_active(lr) \
+ (ICH_LR_EL2_STATE(lr) == ICH_LR_EL2_STATE_ACTIVE)
+#define lr_pending_active(lr) \
+ (ICH_LR_EL2_STATE(lr) == ICH_LR_EL2_STATE_PENDING_ACTIVE)
+#define lr_not_active(lr) (!lr_active(lr) && !lr_pending_active(lr))
+
+#define lr_clear_irq(lr) ((lr) &= ~ICH_LR_EL2_STATE_MASK)
+
+MALLOC_DEFINE(M_VGIC_V3, "ARM VMM VGIC V3", "ARM VMM VGIC V3");
+
+struct vgic_v3_virt_features {
+ uint8_t min_prio;
+ size_t ich_lr_num;
+ size_t ich_ap0r_num;
+ size_t ich_ap1r_num;
+};
+
+struct vgic_v3_ro_regs {
+ uint32_t gicd_icfgr0;
+ uint32_t gicd_pidr2;
+ uint32_t gicd_typer;
+};
+
+struct vgic_v3_irq {
+ uint32_t irq;
+ enum vgic_v3_irqtype irqtype;
+ uint8_t enabled;
+ uint8_t priority;
+};
+
+#define vip_to_lr(vip, lr) \
+do { \
+ lr = ICH_LR_EL2_STATE_PENDING; \
+ lr |= ICH_LR_EL2_GROUP1; \
+ lr |= (uint64_t)vip->priority << ICH_LR_EL2_PRIO_SHIFT; \
+ lr |= vip->irq; \
+} while (0)
+
+#define lr_to_vip(lr, vip) \
+do { \
+ (vip)->irq = ICH_LR_EL2_VINTID(lr); \
+ (vip)->priority = \
+ (uint8_t)(((lr) & ICH_LR_EL2_PRIO_MASK) >> ICH_LR_EL2_PRIO_SHIFT); \
+} while (0)
+
+static struct vgic_v3_virt_features virt_features;
+static struct vgic_v3_ro_regs ro_regs;
+
+static struct gic_v3_softc *gic_sc;
+
+void
+vgic_v3_cpuinit(void *arg, bool last_vcpu)
+{
+ struct hypctx *hypctx = arg;
+ struct vgic_v3_cpu_if *cpu_if = &hypctx->vgic_cpu_if;
+ struct vgic_v3_redist *redist = &hypctx->vgic_redist;
+ uint64_t aff, vmpidr_el2;
+ int i;
+
+ vmpidr_el2 = hypctx->vmpidr_el2;
+ KASSERT(vmpidr_el2 != 0,
+ ("Trying to init this CPU's vGIC before the vCPU"));
+ /*
+ * Get affinity for the current CPU. The guest CPU affinity is taken
+ * from VMPIDR_EL2. The Redistributor corresponding to this CPU is
+ * the Redistributor with the same affinity from GICR_TYPER.
+ */
+ aff = (CPU_AFF3(vmpidr_el2) << 24) | (CPU_AFF2(vmpidr_el2) << 16) |
+ (CPU_AFF1(vmpidr_el2) << 8) | CPU_AFF0(vmpidr_el2);
+
+ /* Set up GICR_TYPER. */
+ redist->gicr_typer = aff << GICR_TYPER_AFF_SHIFT;
+ /* Redistributor doesn't support virtual or physical LPIS. */
+ redist->gicr_typer &= ~GICR_TYPER_VLPIS;
+ redist->gicr_typer &= ~GICR_TYPER_PLPIS;
+
+ if (last_vcpu)
+ /* Mark the last Redistributor */
+ redist->gicr_typer |= GICR_TYPER_LAST;
+
+ /*
+ * Configure the Redistributor Control Register.
+ *
+ * ~GICR_CTLR_LPI_ENABLE: LPIs are disabled
+ */
+ redist->gicr_ctlr = 0 & ~GICR_CTLR_LPI_ENABLE;
+
+ mtx_init(&cpu_if->lr_mtx, "VGICv3 ICH_LR_EL2 lock", NULL, MTX_SPIN);
+
+ /*
+ * Configure the Interrupt Controller Hyp Control Register.
+ *
+ * ICH_HCR_EL2_En: enable virtual CPU interface.
+ *
+ * Maintenance interrupts are disabled.
+ */
+ cpu_if->ich_hcr_el2 = ICH_HCR_EL2_En;
+
+ /*
+ * Configure the Interrupt Controller Virtual Machine Control Register.
+ *
+ * ICH_VMCR_EL2_VPMR: lowest priority mask for the VCPU interface
+ * ICH_VMCR_EL2_VBPR1_NO_PREEMPTION: disable interrupt preemption for
+ * Group 1 interrupts
+ * ICH_VMCR_EL2_VBPR0_NO_PREEMPTION: disable interrupt preemption for
+ * Group 0 interrupts
+ * ~ICH_VMCR_EL2_VEOIM: writes to EOI registers perform priority drop
+ * and interrupt deactivation.
+ * ICH_VMCR_EL2_VENG0: virtual Group 0 interrupts enabled.
+ * ICH_VMCR_EL2_VENG1: virtual Group 1 interrupts enabled.
+ */
+ cpu_if->ich_vmcr_el2 = \
+ (virt_features.min_prio << ICH_VMCR_EL2_VPMR_SHIFT) | \
+ ICH_VMCR_EL2_VBPR1_NO_PREEMPTION | ICH_VMCR_EL2_VBPR0_NO_PREEMPTION;
+ cpu_if->ich_vmcr_el2 &= ~ICH_VMCR_EL2_VEOIM;
+ cpu_if->ich_vmcr_el2 |= ICH_VMCR_EL2_VENG0 | ICH_VMCR_EL2_VENG1;
+
+ cpu_if->ich_lr_num = virt_features.ich_lr_num;
+ for (i = 0; i < cpu_if->ich_lr_num; i++)
+ cpu_if->ich_lr_el2[i] = 0UL;
+
+ cpu_if->ich_ap0r_num = virt_features.ich_ap0r_num;
+ cpu_if->ich_ap1r_num = virt_features.ich_ap1r_num;
+
+ cpu_if->irqbuf = malloc(IRQBUF_SIZE_MIN * sizeof(*cpu_if->irqbuf),
+ M_VGIC_V3, M_WAITOK | M_ZERO);
+ cpu_if->irqbuf_size = IRQBUF_SIZE_MIN;
+ cpu_if->irqbuf_num = 0;
+}
+
+void
+vgic_v3_vminit(void *arg)
+{
+ struct hyp *hyp = arg;
+ struct vgic_v3_dist *dist = &hyp->vgic_dist;
+
+ /*
+ * Configure the Distributor control register. The register resets to an
+ * architecturally UNKNOWN value, so we reset to 0 to disable all
+ * functionality controlled by the register.
+ *
+ * The exception is GICD_CTLR.DS, which is RA0/WI when the Distributor
+ * supports one security state (ARM GIC Architecture Specification for
+ * GICv3 and GICv4, p. 4-464)
+ */
+ dist->gicd_ctlr = GICD_CTLR_DS;
+
+ dist->gicd_typer = ro_regs.gicd_typer;
+ dist->nirqs = GICD_TYPER_I_NUM(dist->gicd_typer);
+ dist->gicd_pidr2 = ro_regs.gicd_pidr2;
+
+ mtx_init(&dist->dist_mtx, "VGICv3 Distributor lock", NULL, MTX_SPIN);
+}
+
+int
+vgic_v3_attach_to_vm(void *arg, uint64_t dist_start, size_t dist_size,
+ uint64_t redist_start, size_t redist_size)
+{
+ struct hyp *hyp = arg;
+ struct vgic_v3_dist *dist = &hyp->vgic_dist;
+ struct vgic_v3_redist *redist;
+ int i;
+
+ /* Set the distributor address and size for trapping guest access. */
+ dist->start = dist_start;
+ dist->end = dist_start + dist_size;
+
+ for (i = 0; i < VM_MAXCPU; i++) {
+ redist = &hyp->ctx[i].vgic_redist;
+ /* Set the redistributor address and size. */
+ redist->start = redist_start;
+ redist->end = redist_start + redist_size;
+ }
+ vgic_v3_mmio_init(hyp);
+
+ hyp->vgic_attached = true;
+
+ return (0);
+}
+
+void
+vgic_v3_detach_from_vm(void *arg)
+{
+ struct hyp *hyp;
+ struct hypctx *hypctx;
+ struct vgic_v3_cpu_if *cpu_if;
+ int i;
+
+ hyp = arg;
+
+ for (i = 0; i < VM_MAXCPU; i++) {
+ hypctx = & hyp->ctx[i];
+ cpu_if = &hypctx->vgic_cpu_if;
+ free(cpu_if->irqbuf, M_VGIC_V3);
+ }
+
+ vgic_v3_mmio_destroy(hyp);
+}
+
+int
+vgic_v3_vcpu_pending_irq(void *arg)
+{
+ struct hypctx *hypctx = arg;
+ struct vgic_v3_cpu_if *cpu_if = &hypctx->vgic_cpu_if;
+
+ return (cpu_if->irqbuf_num);
+}
+
+/* Removes ALL instances of interrupt 'irq' */
+static int
+vgic_v3_irqbuf_remove_nolock(uint32_t irq, struct vgic_v3_cpu_if *cpu_if)
+{
+ size_t dest = 0;
+ size_t from = cpu_if->irqbuf_num;
+
+ while (dest < cpu_if->irqbuf_num) {
+ if (cpu_if->irqbuf[dest].irq == irq) {
+ for (from = dest + 1; from < cpu_if->irqbuf_num; from++) {
+ if (cpu_if->irqbuf[from].irq == irq)
+ continue;
+ cpu_if->irqbuf[dest++] = cpu_if->irqbuf[from];
+ }
+ cpu_if->irqbuf_num = dest;
+ } else {
+ dest++;
+ }
+ }
+
+ return (from - dest);
+}
+
+int
+vgic_v3_remove_irq(void *arg, uint32_t irq, bool ignore_state)
+{
+ struct hypctx *hypctx = arg;
+ struct vgic_v3_cpu_if *cpu_if = &hypctx->vgic_cpu_if;
+ struct vgic_v3_dist *dist = &hypctx->hyp->vgic_dist;
+ size_t i;
+
+ if (irq >= dist->nirqs) {
+ eprintf("Malformed IRQ %u.\n", irq);
+ return (1);
+ }
+
+ mtx_lock_spin(&cpu_if->lr_mtx);
+
+ for (i = 0; i < cpu_if->ich_lr_num; i++) {
+ if (ICH_LR_EL2_VINTID(cpu_if->ich_lr_el2[i]) == irq &&
+ (lr_not_active(cpu_if->ich_lr_el2[i]) || ignore_state))
+ lr_clear_irq(cpu_if->ich_lr_el2[i]);
+ }
+ vgic_v3_irqbuf_remove_nolock(irq, cpu_if);
+
+ mtx_unlock_spin(&cpu_if->lr_mtx);
+
+ return (0);
+}
+
+static struct vgic_v3_irq *
+vgic_v3_irqbuf_add_nolock(struct vgic_v3_cpu_if *cpu_if)
+{
+ struct vgic_v3_irq *new_irqbuf, *old_irqbuf;
+ size_t new_size;
+
+ if (cpu_if->irqbuf_num == cpu_if->irqbuf_size) {
+ /* Double the size of the buffered interrupts list */
+ new_size = cpu_if->irqbuf_size << 1;
+ if (new_size > IRQBUF_SIZE_MAX)
+ return (NULL);
+
+ new_irqbuf = NULL;
+ /* TODO: malloc sleeps here and causes a panic */
+ while (new_irqbuf == NULL)
+ new_irqbuf = malloc(new_size * sizeof(*cpu_if->irqbuf),
+ M_VGIC_V3, M_NOWAIT | M_ZERO);
+ memcpy(new_irqbuf, cpu_if->irqbuf,
+ cpu_if->irqbuf_size * sizeof(*cpu_if->irqbuf));
+
+ old_irqbuf = cpu_if->irqbuf;
+ cpu_if->irqbuf = new_irqbuf;
+ cpu_if->irqbuf_size = new_size;
+ free(old_irqbuf, M_VGIC_V3);
+ }
+
+ cpu_if->irqbuf_num++;
+
+ return (&cpu_if->irqbuf[cpu_if->irqbuf_num - 1]);
+}
+
+static bool
+vgic_v3_int_target(uint32_t irq, struct hypctx *hypctx)
+{
+ struct vgic_v3_dist *dist = &hypctx->hyp->vgic_dist;
+ struct vgic_v3_redist *redist = &hypctx->vgic_redist;
+ uint64_t irouter;
+ uint64_t aff;
+ uint32_t irq_off, irq_mask;
+ int n;
+
+ if (irq <= GIC_LAST_PPI)
+ return (true);
+
+ /* XXX Affinity routing disabled not implemented */
+ if (!aff_routing_en(dist))
+ return (true);
+
+ irq_off = irq % 32;
+ irq_mask = 1 << irq_off;
+ n = irq / 32;
+
+ irouter = dist->gicd_irouter[irq];
+ /* Check if 1-of-N routing is active */
+ if (irouter & GICD_IROUTER_IRM)
+ /* Check if the VCPU is participating */
+ return (redist->gicr_ctlr & GICR_CTLR_DPG1NS ? true : false);
+
+ aff = redist->gicr_typer >> GICR_TYPER_AFF_SHIFT;
+ /* Affinity in format for comparison with irouter */
+ aff = GICR_TYPER_AFF0(redist->gicr_typer) | \
+ (GICR_TYPER_AFF1(redist->gicr_typer) << 8) | \
+ (GICR_TYPER_AFF2(redist->gicr_typer) << 16) | \
+ (GICR_TYPER_AFF3(redist->gicr_typer) << 32);
+ if ((irouter & aff) == aff)
+ return (true);
+ else
+ return (false);
+}
+
+static uint8_t
+vgic_v3_get_priority(uint32_t irq, struct hypctx *hypctx)
+{
+ struct vgic_v3_dist *dist = &hypctx->hyp->vgic_dist;
+ struct vgic_v3_redist *redist = &hypctx->vgic_redist;
+ size_t n;
+ uint32_t off, mask;
+ uint8_t priority;
+
+ n = irq / 4;
+ off = n % 4;
+ mask = 0xff << off;
+ /*
+ * When affinity routing is enabled, the Redistributor is used for
+ * SGIs and PPIs and the Distributor for SPIs. When affinity routing
+ * is not enabled, the Distributor registers are used for all
+ * interrupts.
+ */
+ if (aff_routing_en(dist) && (n <= 7))
+ priority = (redist->gicr_ipriorityr[n] & mask) >> off;
+ else
+ priority = (dist->gicd_ipriorityr[n] & mask) >> off;
+
+ return (priority);
+}
+
+static bool
+vgic_v3_intid_enabled(uint32_t irq, struct hypctx *hypctx)
+{
+ struct vgic_v3_dist *dist;
+ struct vgic_v3_redist *redist;
+ uint32_t irq_off, irq_mask;
+ int n;
+
+ irq_off = irq % 32;
+ irq_mask = 1 << irq_off;
+ n = irq / 32;
+
+ if (irq <= GIC_LAST_PPI) {
+ redist = &hypctx->vgic_redist;
+ if (!(redist->gicr_ixenabler0 & irq_mask))
+ return (false);
+ } else {
+ dist = &hypctx->hyp->vgic_dist;
+ if (!(dist->gicd_ixenabler[n] & irq_mask))
+ return (false);
+ }
+
+ return (true);
+}
+
+static inline bool
+dist_group_enabled(struct vgic_v3_dist *dist)
+{
+ return ((dist->gicd_ctlr & GICD_CTLR_G1A) != 0);
+}
+
+int
+vgic_v3_inject_irq(void *arg, uint32_t irq, enum vgic_v3_irqtype irqtype)
+{
+ struct hypctx *hypctx = arg;
+ struct vgic_v3_dist *dist = &hypctx->hyp->vgic_dist;
+ struct vgic_v3_cpu_if *cpu_if = &hypctx->vgic_cpu_if;
+ struct vgic_v3_irq *vip;
+ int error;
+ int i;
+ uint8_t priority;
+ bool enabled;
+
+ KASSERT(irq > GIC_LAST_SGI, ("SGI interrupts not implemented"));
+
+ if (irq >= dist->nirqs || irqtype >= VGIC_IRQ_INVALID) {
+ eprintf("Malformed IRQ %u.\n", irq);
+ return (1);
+ }
+
+ error = 0;
+ mtx_lock_spin(&dist->dist_mtx);
+
+ enabled = dist_group_enabled(&hypctx->hyp->vgic_dist) &&
+ vgic_v3_intid_enabled(irq, hypctx) &&
+ vgic_v3_int_target(irq, hypctx);
+ priority = vgic_v3_get_priority(irq, hypctx);
+
+ mtx_lock_spin(&cpu_if->lr_mtx);
+
+ /*
+ * If the guest is running behind timer interrupts, don't swamp it with
+ * one interrupt after another. However, if the timer interrupt is being
+ * serviced by the guest (it is in a state other than pending, either
+ * active or pending and active), then add it to the buffer to be
+ * injected later. Otherwise, the timer would stop working because we
+ * disable the timer in the host interrupt handler.
+ */
+ if (irqtype == VGIC_IRQ_CLK) {
+ for (i = 0; i < cpu_if->ich_lr_num; i++)
+ if (ICH_LR_EL2_VINTID(cpu_if->ich_lr_el2[i]) == irq &&
+ lr_pending(cpu_if->ich_lr_el2[i]))
+ goto out;
+ for (i = 0; i < cpu_if->irqbuf_num; i++)
+ if (cpu_if->irqbuf[i].irq == irq)
+ goto out;
+ }
+
+ vip = vgic_v3_irqbuf_add_nolock(cpu_if);
+ if (!vip) {
+ eprintf("Error adding IRQ %u to the IRQ buffer.\n", irq);
+ error = 1;
+ goto out;
+ }
+ vip->irq = irq;
+ vip->irqtype = irqtype;
+ vip->enabled = enabled;
+ vip->priority = priority;
+
+out:
+ mtx_unlock_spin(&cpu_if->lr_mtx);
+ mtx_unlock_spin(&dist->dist_mtx);
+
+ return (error);
+}
+
+void
+vgic_v3_group_toggle_enabled(bool enabled, struct hyp *hyp)
+{
+ struct hypctx *hypctx;
+ struct vgic_v3_cpu_if *cpu_if;
+ struct vgic_v3_irq *vip;
+ int i, j;
+
+ for (i = 0; i < VM_MAXCPU; i++) {
+ hypctx = &hyp->ctx[i];
+ cpu_if = &hypctx->vgic_cpu_if;
+
+ mtx_lock_spin(&cpu_if->lr_mtx);
+
+ for (j = 0; j < cpu_if->irqbuf_num; j++) {
+ vip = &cpu_if->irqbuf[j];
+ if (!enabled)
+ vip->enabled = 0;
+ else if (vgic_v3_intid_enabled(vip->irq, hypctx))
+ vip->enabled = 1;
+ }
+
+ mtx_unlock_spin(&cpu_if->lr_mtx);
+ }
+}
+
+static int
+vgic_v3_irq_toggle_enabled_vcpu(uint32_t irq, bool enabled,
+ struct vgic_v3_cpu_if *cpu_if)
+{
+ int i;
+
+ mtx_lock_spin(&cpu_if->lr_mtx);
+
+ if (enabled) {
+ /*
+ * Enable IRQs that were injected when the interrupt ID was
+ * disabled
+ */
+ for (i = 0; i < cpu_if->irqbuf_num; i++)
+ if (cpu_if->irqbuf[i].irq == irq)
+ cpu_if->irqbuf[i].enabled = true;
+ } else {
+ /* Remove the disabled IRQ from the LR regs if it is pending */
+ for (i = 0; i < cpu_if->ich_lr_num; i++)
+ if (lr_pending(cpu_if->ich_lr_el2[i]) &&
+ ICH_LR_EL2_VINTID(cpu_if->ich_lr_el2[i]) == irq)
+ lr_clear_irq(cpu_if->ich_lr_el2[i]);
+
+ /* Remove the IRQ from the interrupt buffer */
+ vgic_v3_irqbuf_remove_nolock(irq, cpu_if);
+ }
+
+ mtx_unlock_spin(&cpu_if->lr_mtx);
+
+ return (0);
+}
+
+int
+vgic_v3_irq_toggle_enabled(uint32_t irq, bool enabled,
+ struct hyp *hyp, int vcpuid)
+{
+ struct vgic_v3_cpu_if *cpu_if;
+ int error;
+ int i;
+
+ if (irq <= GIC_LAST_PPI) {
+ cpu_if = &hyp->ctx[vcpuid].vgic_cpu_if;
+ return (vgic_v3_irq_toggle_enabled_vcpu(irq, enabled, cpu_if));
+ } else {
+ /* TODO: Update irqbuf for all VCPUs, not just VCPU 0 */
+ for (i = 0; i < 1; i++) {
+ cpu_if = &hyp->ctx[i].vgic_cpu_if;
+ error = vgic_v3_irq_toggle_enabled_vcpu(irq, enabled, cpu_if);
+ if (error)
+ return (error);
+ }
+ }
+
+ return (0);
+}
+
+static int
+irqbuf_highest_priority(struct vgic_v3_cpu_if *cpu_if, int start, int end,
+ struct hypctx *hypctx)
+{
+ uint32_t irq;
+ int i, max_idx;
+ uint8_t priority, max_priority;
+ uint8_t vpmr;
+
+ vpmr = (cpu_if->ich_vmcr_el2 & ICH_VMCR_EL2_VPMR_MASK) >> \
+ ICH_VMCR_EL2_VPMR_SHIFT;
+
+ max_idx = -1;
+ max_priority = 0xff;
+ for (i = start; i < end; i++) {
+ irq = cpu_if->irqbuf[i].irq;
+ /* Check that the interrupt hasn't been already scheduled */
+ if (irq == IRQ_SCHEDULED)
+ continue;
+
+ if (!dist_group_enabled(&hypctx->hyp->vgic_dist))
+ continue;
+ if (!vgic_v3_int_target(irq, hypctx))
+ continue;
+
+ priority = cpu_if->irqbuf[i].priority;
+ if (priority >= vpmr)
+ continue;
+
+ if (max_idx == -1) {
+ max_idx = i;
+ max_priority = priority;
+ } else if (priority > max_priority) {
+ max_idx = i;
+ max_priority = priority;
+ } else if (priority == max_priority &&
+ cpu_if->irqbuf[i].irqtype < cpu_if->irqbuf[max_idx].irqtype) {
+ max_idx = i;
+ max_priority = priority;
+ }
+ }
+
+ return (max_idx);
+}
+
+static inline bool
+cpu_if_group_enabled(struct vgic_v3_cpu_if *cpu_if)
+{
+ return ((cpu_if->ich_vmcr_el2 & ICH_VMCR_EL2_VENG1) != 0);
+}
+
+static inline int
+irqbuf_next_enabled(struct vgic_v3_irq *irqbuf, int start, int end,
+ struct hypctx *hypctx, struct vgic_v3_cpu_if *cpu_if)
+{
+ int i;
+
+ if (!cpu_if_group_enabled(cpu_if))
+ return (-1);
+
+ for (i = start; i < end; i++)
+ if (irqbuf[i].enabled)
+ break;
+
+ if (i < end)
+ return (i);
+ else
+ return (-1);
+}
+
+static inline int
+vgic_v3_lr_next_empty(uint32_t ich_elrsr_el2, int start, int end)
+{
+ int i;
+
+ for (i = start; i < end; i++)
+ if (ich_elrsr_el2 & (1U << i))
+ break;
+
+ if (i < end)
+ return (i);
+ else
+ return (-1);
+}
+
+/*
+ * There are two cases in which the virtual timer interrupt is in the list
+ * registers:
+ *
+ * 1. The virtual interrupt is active. The guest is executing the interrupt
+ * handler, and the timer fired after it programmed the new alarm time but
+ * before the guest had the chance to write to the EOIR1 register.
+ *
+ * 2. The virtual interrupt is pending and active. The timer interrupt is level
+ * sensitive. The guest wrote to the EOR1 register, but the write hasn't yet
+ * propagated to the timer.
+ *
+ * Injecting the interrupt in these cases would mean that another timer
+ * interrupt is asserted as soon as the guest writes to the EOIR1 register (or
+ * very shortly thereafter, in the pending and active scenario). This can lead
+ * to the guest servicing timer interrupts one after the other and doing
+ * nothing else. So do not inject a timer interrupt while one is active pending.
+ * The buffered timer interrupts will be injected after the next world switch in
+ * this case.
+ */
+static bool
+clk_irq_in_lr(struct vgic_v3_cpu_if *cpu_if)
+{
+ uint64_t lr;
+ int i;
+
+ for (i = 0; i < cpu_if->ich_lr_num; i++) {
+ lr = cpu_if->ich_lr_el2[i];
+ if (ICH_LR_EL2_VINTID(lr) == GT_VIRT_IRQ &&
+ (lr_active(lr) || lr_pending_active(lr)))
+ return (true);
+ }
+
+ return (false);
+}
+
+static void
+vgic_v3_irqbuf_to_lr(struct hypctx *hypctx, struct vgic_v3_cpu_if *cpu_if,
+ bool by_priority)
+{
+ struct vgic_v3_irq *vip;
+ int irqbuf_idx;
+ int lr_idx;
+ bool clk_present;
+
+ clk_present = clk_irq_in_lr(cpu_if);
+
+ irqbuf_idx = 0;
+ lr_idx = 0;
+ for (;;) {
+ if (by_priority)
+ irqbuf_idx = irqbuf_highest_priority(cpu_if,
+ irqbuf_idx, cpu_if->irqbuf_num, hypctx);
+ else
+ irqbuf_idx = irqbuf_next_enabled(cpu_if->irqbuf,
+ irqbuf_idx, cpu_if->irqbuf_num, hypctx, cpu_if);
+ if (irqbuf_idx == -1)
+ break;
+
+ lr_idx = vgic_v3_lr_next_empty(cpu_if->ich_elrsr_el2,
+ lr_idx, cpu_if->ich_lr_num);
+ if (lr_idx == -1)
+ break;
+
+ vip = &cpu_if->irqbuf[irqbuf_idx];
+ if (vip->irqtype == VGIC_IRQ_CLK && clk_present) {
+ /* Skip injecting timer interrupt. */
+ irqbuf_idx++;
+ continue;
+ }
+
+ vip_to_lr(vip, cpu_if->ich_lr_el2[lr_idx]);
+ vip->irq = IRQ_SCHEDULED;
+ irqbuf_idx++;
+ lr_idx++;
+ }
+
+ /* Remove all interrupts that were just scheduled. */
+ vgic_v3_irqbuf_remove_nolock(IRQ_SCHEDULED, cpu_if);
+}
+
+void
+vgic_v3_sync_hwstate(void *arg)
+{
+ struct hypctx *hypctx;
+ struct vgic_v3_cpu_if *cpu_if;
+ int lr_free;
+ int i;
+ bool by_priority;
+ bool en_underflow_intr;
+
+ hypctx = arg;
+ cpu_if = &hypctx->vgic_cpu_if;
+
+ /*
+ * All Distributor writes have been executed at this point, do not
+ * protect Distributor reads with a mutex.
+ *
+ * This is callled with all interrupts disabled, so there is no need for
+ * a List Register spinlock either.
+ */
+ mtx_lock_spin(&cpu_if->lr_mtx);
+
+ /* Exit early if there are no buffered interrupts */
+ if (cpu_if->irqbuf_num == 0) {
+ cpu_if->ich_hcr_el2 &= ~ICH_HCR_EL2_UIE;
+ goto out;
+ }
+
+ /* Test if all buffered interrupts can fit in the LR regs */
+ lr_free = 0;
+ for (i = 0; i < cpu_if->ich_lr_num; i++)
+ if (cpu_if->ich_elrsr_el2 & (1U << i))
+ lr_free++;
+
+ by_priority = (lr_free <= cpu_if->ich_lr_num);
+ vgic_v3_irqbuf_to_lr(hypctx, cpu_if, by_priority);
+
+ lr_free = 0;
+ for (i = 0; i < cpu_if->ich_lr_num; i++)
+ if (cpu_if->ich_elrsr_el2 & (1U << i))
+ lr_free++;
+
+ en_underflow_intr = false;
+ if (cpu_if->irqbuf_num > 0)
+ for (i = 0; i < cpu_if->irqbuf_num; i++)
+ if (cpu_if->irqbuf[i].irqtype != VGIC_IRQ_CLK) {
+ en_underflow_intr = true;
+ break;
+ }
+ if (en_underflow_intr) {
+ cpu_if->ich_hcr_el2 |= ICH_HCR_EL2_UIE;
+ } else {
+ cpu_if->ich_hcr_el2 &= ~ICH_HCR_EL2_UIE;
+ }
+
+out:
+ mtx_unlock_spin(&cpu_if->lr_mtx);
+}
+
+static void
+vgic_v3_get_ro_regs()
+{
+ /* GICD_ICFGR0 configures SGIs and it is read-only. */
+ ro_regs.gicd_icfgr0 = gic_d_read(gic_sc, 4, GICD_ICFGR(0));
+
+ /*
+ * Configure the GIC type register for the guest.
+ *
+ * ~GICD_TYPER_SECURITYEXTN: disable security extensions.
+ * ~GICD_TYPER_DVIS: direct injection for virtual LPIs not supported.
+ * ~GICD_TYPER_LPIS: LPIs not supported.
+ */
+ ro_regs.gicd_typer = gic_d_read(gic_sc, 4, GICD_TYPER);
+ ro_regs.gicd_typer &= ~GICD_TYPER_SECURITYEXTN;
+ ro_regs.gicd_typer &= ~GICD_TYPER_DVIS;
+ ro_regs.gicd_typer &= ~GICD_TYPER_LPIS;
+
+ /*
+ * XXX. Guest reads of GICD_PIDR2 should return the same ArchRev as
+ * specified in the guest FDT.
+ */
+ ro_regs.gicd_pidr2 = gic_d_read(gic_sc, 4, GICD_PIDR2);
+}
+
+void
+vgic_v3_init(uint64_t ich_vtr_el2) {
+ uint32_t pribits, prebits;
+
+ KASSERT(gic_sc != NULL, ("GIC softc is NULL"));
+
+ vgic_v3_get_ro_regs();
+
+ pribits = ICH_VTR_EL2_PRIBITS(ich_vtr_el2);
+ switch (pribits) {
+ case 5:
+ virt_features.min_prio = 0xf8;
+ case 6:
+ virt_features.min_prio = 0xfc;
+ case 7:
+ virt_features.min_prio = 0xfe;
+ case 8:
+ virt_features.min_prio = 0xff;
+ }
+
+ prebits = ICH_VTR_EL2_PREBITS(ich_vtr_el2);
+ switch (prebits) {
+ case 5:
+ virt_features.ich_ap0r_num = 1;
+ virt_features.ich_ap1r_num = 1;
+ case 6:
+ virt_features.ich_ap0r_num = 2;
+ virt_features.ich_ap1r_num = 2;
+ case 7:
+ virt_features.ich_ap0r_num = 4;
+ virt_features.ich_ap1r_num = 4;
+ }
+
+ virt_features.ich_lr_num = ICH_VTR_EL2_LISTREGS(ich_vtr_el2);
+}
+
+static int
+vgic_v3_maint_intr(void *arg)
+{
+ printf("MAINTENANCE INTERRUPT\n");
+
+ return (FILTER_HANDLED);
+}
+
+/*
+ * TODO: Look at how gic_v3_fdt.c adds the gic driver.
+ *
+ * 1. In probe they set the device description.
+ * 2. In attach they create children devices for the GIC (in
+ * gic_v3_ofw_bus_attach).
+ * 3. There is no identify function being called.
+ *
+ * On the other hand, in man 9 DEVICE_IDENTIFY it is stated that a new device
+ * instance is created by the identify function.
+ */
+
+static void
+arm_vgic_identify(driver_t *driver, device_t parent)
+{
+ device_t dev;
+
+ if (strcmp(device_get_name(parent), "gic") == 0) {
+ dev = device_find_child(parent, VGIC_V3_DEVNAME, -1);
+ if (!dev)
+ dev = device_add_child(parent, VGIC_V3_DEVNAME, -1);
+ gic_sc = device_get_softc(parent);
+ }
+}
+
+static int
+arm_vgic_probe(device_t dev)
+{
+ device_t parent;
+
+ parent = device_get_parent(dev);
+ if (strcmp(device_get_name(parent), "gic") == 0) {
+ device_set_desc(dev, VGIC_V3_DEVSTR);
+ return (BUS_PROBE_DEFAULT);
+ }
+
+ return (ENXIO);
+}
+
+static int
+arm_vgic_attach(device_t dev)
+{
+ int error;
+
+ error = gic_v3_setup_maint_intr(vgic_v3_maint_intr, NULL, NULL);
+ if (error)
+ device_printf(dev, "Could not setup maintenance interrupt\n");
+
+ return (0);
+}
+
+static int
+arm_vgic_detach(device_t dev)
+{
+ int error;
+
+ error = gic_v3_teardown_maint_intr();
+ if (error)
+ device_printf(dev, "Could not teardown maintenance interrupt\n");
+
+ gic_sc = NULL;
+
+ return (0);
+}
+
+static device_method_t arm_vgic_methods[] = {
+ DEVMETHOD(device_identify, arm_vgic_identify),
+ DEVMETHOD(device_probe, arm_vgic_probe),
+ DEVMETHOD(device_attach, arm_vgic_attach),
+ DEVMETHOD(device_detach, arm_vgic_detach),
+ DEVMETHOD_END
+};
+
+DEFINE_CLASS_1(vgic, arm_vgic_driver, arm_vgic_methods, 0, gic_v3_driver);
+
+static devclass_t arm_vgic_devclass;
+DRIVER_MODULE(vgic, gic, arm_vgic_driver, arm_vgic_devclass, 0, 0);
Index: sys/arm64/vmm/io/vgic_v3_mmio.c
===================================================================
--- /dev/null
+++ sys/arm64/vmm/io/vgic_v3_mmio.c
@@ -0,0 +1,1025 @@
+#include <sys/malloc.h>
+#include <sys/mutex.h>
+
+#include <machine/vmm.h>
+#include <machine/vmm_instruction_emul.h>
+#include <arm64/vmm/arm64.h>
+
+#include "vgic_v3.h"
+
+#define DEBUG 0
+
+#define GICR_FRAME_RD 0
+#define GICR_FRAME_SGI GICR_RD_BASE_SIZE
+
+#define RES0 (0UL)
+#define RES1 (~0UL)
+
+#define redist_simple_read(src, destp, vm, vcpuid) \
+do { \
+ struct hyp *hyp = vm_get_cookie(vm); \
+ struct vgic_v3_redist *redist = &hyp->ctx[vcpuid].vgic_redist; \
+ *destp = redist->src; \
+} while (0);
+
+#define redist_simple_write(src, dest, vm, vcpuid) \
+do { \
+ struct hyp *hyp = vm_get_cookie(vm); \
+ struct vgic_v3_redist *redist = &hyp->ctx[vcpuid].vgic_redist; \
+ redist->dest = src; \
+} while (0);
+
+#define reg32_idx(ipa, region) (((ipa) - (region).start) / 4)
+#define reg64_idx(ipa, region) (((ipa) - (region).start) / 8)
+
+#define reg_changed(new, old, mask) (((new) & (mask)) != ((old) & (mask)))
+
+/* The names should always be in ascending order of memory address */
+enum vgic_mmio_region_name {
+ /* Distributor registers */
+ VGIC_GICD_CTLR,
+ VGIC_GICD_TYPER,
+ VGIC_GICD_IGROUPR,
+ VGIC_GICD_ISENABLER,
+ VGIC_GICD_ICENABLER,
+ VGIC_GICD_IPRIORITYR,
+ VGIC_GICD_ICFGR,
+ VGIC_GICD_IROUTER,
+ VGIC_GICD_PIDR2,
+ /* Redistributor registers */
+ VGIC_GICR_CTLR,
+ VGIC_GICR_TYPER,
+ VGIC_GICR_WAKER,
+ VGIC_GICR_PIDR2,
+ VGIC_GICR_IGROUPR0,
+ VGIC_GICR_ISENABLER0,
+ VGIC_GICR_ICENABLER0,
+ VGIC_GICR_IPRIORITYR,
+ VGIC_GICR_ICFGR0,
+ VGIC_GICR_ICFGR1,
+ VGIC_MMIO_REGIONS_NUM,
+};
+/*
+ * Necessary for calculating the number of Distributor and Redistributor
+ * regions emulated.
+ */
+#define FIRST_REDIST_MMIO_REGION VGIC_GICR_CTLR
+
+MALLOC_DEFINE(M_VGIC_V3_MMIO, "ARM VMM VGIC DIST MMIO", "ARM VMM VGIC DIST MMIO");
+
+static int
+dist_ctlr_read(void *vm, int vcpuid, uint64_t fault_ipa, uint64_t *rval,
+ int size, void *arg)
+{
+ struct hyp *hyp;
+ struct vgic_v3_dist *dist;
+ bool *retu = arg;
+
+ hyp = vm_get_cookie(vm);
+ dist = &hyp->vgic_dist;
+
+ mtx_lock_spin(&dist->dist_mtx);
+ *rval = dist->gicd_ctlr;
+ mtx_unlock_spin(&dist->dist_mtx);
+
+ /* Writes are never pending */
+ *rval &= ~GICD_CTLR_RWP;
+
+ *retu = false;
+ return (0);
+}
+
+static int
+dist_ctlr_write(void *vm, int vcpuid, uint64_t fault_ipa, uint64_t wval,
+ int size, void *arg)
+{
+ struct hyp *hyp;
+ struct vgic_v3_dist *dist;
+ bool *retu = arg;
+
+ hyp = vm_get_cookie(vm);
+ dist = &hyp->vgic_dist;
+ /* GICD_CTLR.DS is RAO/WI when only one security state is supported. */
+ wval |= GICD_CTLR_DS;
+
+ mtx_lock_spin(&dist->dist_mtx);
+
+ if (reg_changed(wval, dist->gicd_ctlr, GICD_CTLR_G1A)) {
+ if (!(wval & GICD_CTLR_G1A))
+ vgic_v3_group_toggle_enabled(false, hyp);
+ else
+ vgic_v3_group_toggle_enabled(true, hyp);
+ }
+ dist->gicd_ctlr = wval;
+
+ mtx_unlock_spin(&dist->dist_mtx);
+
+ *retu = false;
+ return (0);
+}
+
+static int
+dist_typer_read(void *vm, int vcpuid, uint64_t fault_ipa, uint64_t *rval,
+ int size, void *arg)
+{
+ struct hyp *hyp;
+ struct vgic_v3_dist *dist;
+ bool *retu = arg;
+
+ hyp = vm_get_cookie(vm);
+ dist = &hyp->vgic_dist;
+
+ *rval = dist->gicd_typer;
+
+ *retu = false;
+ return (0);
+}
+
+static int
+dist_typer_write(void *vm, int vcpuid, uint64_t fault_ipa, uint64_t wval,
+ int size, void *arg)
+{
+ bool *retu = arg;
+
+ eprintf("Warning: Attempted write to read-only register GICD_TYPER.\n");
+
+ *retu = false;
+ return (0);
+}
+
+/* Only group 1 interrupts are supported. Treat IGROUPR as RA0/WI. */
+static int
+dist_igroupr_read(void *vm, int vcpuid, uint64_t fault_ipa, uint64_t *rval,
+ int size, void *arg)
+{
+ struct hyp *hyp;
+ struct vgic_v3_dist *dist;
+ int n;
+ bool *retu = arg;
+
+ hyp = vm_get_cookie(vm);
+ dist = &hyp->vgic_dist;
+
+ n = reg32_idx(fault_ipa, hyp->vgic_mmio_regions[VGIC_GICD_IGROUPR]);
+ /*
+ * GIC Architecture specification, p 8-477: "For SGIs and PPIs: When
+ * ARE is 1 for the Security state of an interrupt, the field for that
+ * interrupt is RES0 and an implementation is permitted to make the
+ * field RAZ/WI in this case".
+ */
+ if (n == 0 && aff_routing_en(dist)) {
+ *rval = RES0;
+ } else {
+ *rval = RES1;
+ }
+
+ *retu = false;
+ return (0);
+}
+
+/* Only group 1 interrupts are supported. Treat IGROUPR as RA0/WI. */
+static int
+dist_igroupr_write(void *vm, int vcpuid, uint64_t fault_ipa, uint64_t wval,
+ int size, void *arg)
+{
+ bool *retu = arg;
+
+ *retu = false;
+ return (0);
+}
+
+static void
+mmio_update_int_enabled(uint32_t new_ixenabler, uint32_t old_ixenabler,
+ uint32_t irq, struct hyp *hyp, int vcpuid)
+{
+ uint32_t irq_mask;
+ int error;
+ int i;
+ bool enabled;
+
+ irq_mask = 0x1;
+ for (i = 0; i < 32; i++) {
+ if (reg_changed(new_ixenabler, old_ixenabler, irq_mask)) {
+ enabled = ((new_ixenabler & irq_mask) != 0);
+ error = vgic_v3_irq_toggle_enabled(irq, enabled,
+ hyp, vcpuid);
+ if (error)
+ eprintf("Warning: error while toggling IRQ %u\n", irq);
+ }
+ irq++;
+ irq_mask <<= 1;
+ }
+}
+
+static int
+dist_ixenabler_read(void *vm, int vcpuid, uint64_t fault_ipa, uint64_t *rval,
+ void *arg, enum vgic_mmio_region_name name)
+{
+ struct hyp *hyp;
+ struct vgic_v3_dist *dist;
+ size_t n;
+ bool *retu = arg;
+
+ hyp = vm_get_cookie(vm);
+ dist = &hyp->vgic_dist;
+
+ n = reg32_idx(fault_ipa, hyp->vgic_mmio_regions[name]);
+ /*
+ * GIC Architecture specification, p 8-471: "When ARE is 1 for the
+ * Security state of an interrupt, the field for that interrupt is RES0
+ * and an implementation is permitted to* make the field RAZ/WI in this
+ * case".
+ */
+ if (n == 0 && aff_routing_en(dist)) {
+ *rval = RES0;
+ goto out;
+ }
+
+ mtx_lock_spin(&dist->dist_mtx);
+ *rval = dist->gicd_ixenabler[n];
+ mtx_unlock_spin(&dist->dist_mtx);
+
+out:
+ *retu = false;
+ return (0);
+}
+
+static int
+dist_ixenabler_write(void *vm, int vcpuid, uint64_t fault_ipa, uint64_t wval,
+ void *arg, enum vgic_mmio_region_name name)
+{
+ struct hyp *hyp;
+ struct vgic_v3_dist *dist;
+ uint32_t old_ixenabler;
+ size_t n;
+ bool *retu = arg;
+
+ hyp = vm_get_cookie(vm);
+ dist = &hyp->vgic_dist;
+
+ n = reg32_idx(fault_ipa, hyp->vgic_mmio_regions[name]);
+ /* See dist_ixenabler_read() */
+ if (n == 0 && aff_routing_en(dist))
+ /* Ignore writes */
+ goto out;
+
+ mtx_lock_spin(&dist->dist_mtx);
+
+ old_ixenabler = dist->gicd_ixenabler[n];
+ if (name == VGIC_GICD_ICENABLER)
+ dist->gicd_ixenabler[n] &= ~wval;
+ else
+ dist->gicd_ixenabler[n] |= wval;
+ mmio_update_int_enabled(dist->gicd_ixenabler[n], old_ixenabler, n * 32,
+ hyp, vcpuid);
+
+ mtx_unlock_spin(&dist->dist_mtx);
+
+out:
+ *retu = false;
+ return (0);
+}
+
+static int
+dist_isenabler_read(void *vm, int vcpuid, uint64_t fault_ipa, uint64_t *rval,
+ int size, void *arg)
+{
+ return (dist_ixenabler_read(vm, vcpuid, fault_ipa, rval, arg,
+ VGIC_GICD_ISENABLER));
+}
+
+static int
+dist_isenabler_write(void *vm, int vcpuid, uint64_t fault_ipa, uint64_t wval,
+ int size, void *arg)
+{
+ return (dist_ixenabler_write(vm, vcpuid, fault_ipa, wval, arg,
+ VGIC_GICD_ISENABLER));
+}
+
+static int
+dist_icenabler_read(void *vm, int vcpuid, uint64_t fault_ipa, uint64_t *rval,
+ int size, void *arg)
+{
+ return (dist_ixenabler_read(vm, vcpuid, fault_ipa, rval, arg,
+ VGIC_GICD_ICENABLER));
+}
+
+static int
+dist_icenabler_write(void *vm, int vcpuid, uint64_t fault_ipa, uint64_t wval,
+ int size, void *arg)
+{
+ return (dist_ixenabler_write(vm, vcpuid, fault_ipa, wval, arg,
+ VGIC_GICD_ICENABLER));
+}
+
+/* XXX: Registers are byte accessible. */
+static int
+dist_ipriorityr_read(void *vm, int vcpuid, uint64_t fault_ipa, uint64_t *rval,
+ int size, void *arg)
+{
+ struct hyp *hyp;
+ struct vgic_v3_dist *dist;
+ bool *retu = arg;
+ size_t n;
+
+ hyp = vm_get_cookie(vm);
+ dist = &hyp->vgic_dist;
+
+ n = reg32_idx(fault_ipa, hyp->vgic_mmio_regions[VGIC_GICD_IPRIORITYR]);
+ /*
+ * GIC Architecture specification, p 8-483: when affinity
+ * routing is enabled, GICD_IPRIORITYR<n> is RAZ/WI for
+ * n = 0 to 7.
+ */
+ if (aff_routing_en(dist) && n <= 7) {
+ *rval = RES0;
+ goto out;
+ }
+
+ mtx_lock_spin(&dist->dist_mtx);
+ *rval = dist->gicd_ipriorityr[n];
+ mtx_unlock_spin(&dist->dist_mtx);
+
+out:
+ *retu = false;
+ return (0);
+
+}
+
+static int
+dist_ipriorityr_write(void *vm, int vcpuid, uint64_t fault_ipa, uint64_t wval,
+ int size, void *arg)
+{
+ struct hyp *hyp;
+ struct vgic_v3_dist *dist;
+ bool *retu = arg;
+ size_t n;
+
+ hyp = vm_get_cookie(vm);
+ dist = &hyp->vgic_dist;
+
+ n = reg32_idx(fault_ipa, hyp->vgic_mmio_regions[VGIC_GICD_IPRIORITYR]);
+ /* See dist_ipriorityr_read() */
+ if (aff_routing_en(dist) && n <= 7)
+ /* Ignore writes */
+ goto out;
+
+ mtx_lock_spin(&dist->dist_mtx);
+ dist->gicd_ipriorityr[n] = wval;
+ mtx_unlock_spin(&dist->dist_mtx);
+
+out:
+ *retu = false;
+ return (0);
+}
+
+static int
+dist_icfgr_read(void *vm, int vcpuid, uint64_t fault_ipa, uint64_t *rval,
+ int size, void *arg)
+{
+ struct hyp *hyp;
+ struct vgic_v3_dist *dist;
+ bool *retu = arg;
+ size_t n;
+
+ hyp = vm_get_cookie(vm);
+ dist = &hyp->vgic_dist;
+
+ n = reg32_idx(fault_ipa, hyp->vgic_mmio_regions[VGIC_GICD_ICFGR]);
+ /*
+ * ARM GIC Architecture Specification, p 8-472: "For SGIs,
+ * Int_config fields are RO, meaning that GICD_ICFGR0 is RO."
+ */
+ if (n == 0) {
+ *rval = RES0;
+ goto out;
+ }
+
+ mtx_lock_spin(&dist->dist_mtx);
+ *rval = dist->gicd_icfgr[n];
+ mtx_unlock_spin(&dist->dist_mtx);
+
+out:
+ *retu = false;
+ return (0);
+
+}
+
+static int
+dist_icfgr_write(void *vm, int vcpuid, uint64_t fault_ipa, uint64_t wval,
+ int size, void *arg)
+{
+ struct hyp *hyp;
+ struct vgic_v3_dist *dist;
+ bool *retu = arg;
+ size_t n;
+
+ hyp = vm_get_cookie(vm);
+ dist = &hyp->vgic_dist;
+
+ n = reg32_idx(fault_ipa, hyp->vgic_mmio_regions[VGIC_GICD_ICFGR]);
+ if (n == 0)
+ /* Ignore writes */
+ goto out;
+
+ mtx_lock_spin(&dist->dist_mtx);
+ dist->gicd_icfgr[n] = wval;
+ mtx_unlock_spin(&dist->dist_mtx);
+
+out:
+ *retu = false;
+ return (0);
+}
+
+static int
+dist_irouter_read(void *vm, int vcpuid, uint64_t fault_ipa, uint64_t *rval,
+ int size, void *arg)
+{
+ struct hyp *hyp;
+ struct vgic_v3_dist *dist;
+ size_t n;
+ bool *retu = arg;
+
+ hyp = vm_get_cookie(vm);
+ dist = &hyp->vgic_dist;
+
+ n = reg64_idx(fault_ipa, hyp->vgic_mmio_regions[VGIC_GICD_IROUTER]);
+ /* GIC Architecture Manual, p 8-485: registers 0 to 31 are reserved */
+ if (n <= 31) {
+ eprintf("Warning: Read from register GICD_IROUTER%zu\n", n);
+ *rval = RES0;
+ goto out;
+ }
+
+ /*
+ * GIC Architecture Manual, p 8-485: when affinity routing is not
+ * enabled, the registers are RAZ/WI.
+ */
+ if (!aff_routing_en(dist)) {
+ *rval = RES0;
+ goto out;
+ }
+
+ mtx_lock_spin(&dist->dist_mtx);
+ *rval = dist->gicd_irouter[n];
+ mtx_unlock_spin(&dist->dist_mtx);
+
+out:
+ *retu = false;
+ return (0);
+}
+
+static int
+dist_irouter_write(void *vm, int vcpuid, uint64_t fault_ipa, uint64_t wval,
+ int size, void *arg)
+{
+ struct hyp *hyp;
+ struct vgic_v3_dist *dist;
+ size_t n;
+ bool *retu = arg;
+
+ hyp = vm_get_cookie(vm);
+ dist = &hyp->vgic_dist;
+
+ n = reg64_idx(fault_ipa, hyp->vgic_mmio_regions[VGIC_GICD_IROUTER]);
+ if (n <= 31) {
+ eprintf("Warning: Write to register GICD_IROUTER%zu\n", n);
+ goto out;
+ }
+
+ /* See dist_irouter_read() */
+ if (!aff_routing_en(dist))
+ /* Ignore writes */
+ goto out;
+
+ mtx_lock_spin(&dist->dist_mtx);
+ dist->gicd_irouter[n] = wval;
+ mtx_unlock_spin(&dist->dist_mtx);
+
+out:
+ *retu = false;
+ return (0);
+}
+
+static int
+dist_pidr2_read(void *vm, int vcpuid, uint64_t fault_ipa, uint64_t *rval,
+ int size, void *arg)
+{
+ struct hyp *hyp;
+ struct vgic_v3_dist *dist;
+ bool *retu = arg;
+
+ hyp = vm_get_cookie(vm);
+ dist = &hyp->vgic_dist;
+
+ *rval = dist->gicd_pidr2;
+
+ *retu = false;
+ return (0);
+}
+
+static int
+dist_pidr2_write(void *vm, int vcpuid, uint64_t fault_ipa, uint64_t wval,
+ int size, void *arg)
+{
+ bool *retu = arg;
+
+ eprintf("Warning: Attempted write to read-only register GICD_PIDR2.\n");
+
+ *retu = false;
+ return (0);
+}
+
+static int
+redist_ctlr_read(void *vm, int vcpuid, uint64_t fault_ipa, uint64_t *rval,
+ int size, void *arg)
+{
+ bool *retu = arg;
+
+ redist_simple_read(gicr_ctlr, rval, vm, vcpuid);
+ /* Writes are never pending */
+ *rval &= ~GICR_CTLR_RWP & ~GICR_CTLR_UWP;
+
+#if (DEBUG > 0)
+ eprintf("\n");
+#endif
+
+ *retu = false;
+ return (0);
+}
+
+static int
+redist_ctlr_write(void *vm, int vcpuid, uint64_t fault_ipa, uint64_t wval,
+ int size, void *arg)
+{
+ bool *retu = arg;
+
+ redist_simple_write(wval, gicr_ctlr, vm, vcpuid);
+
+#if (DEBUG > 0)
+ eprintf("\n");
+#endif
+
+ *retu = false;
+ return (0);
+}
+
+static int
+redist_typer_read(void *vm, int vcpuid, uint64_t fault_ipa, uint64_t *rval,
+ int size, void *arg)
+{
+ bool *retu = arg;
+
+ redist_simple_read(gicr_typer, rval, vm, vcpuid);
+
+#if (DEBUG > 0)
+ eprintf("\n");
+#endif
+
+ *retu = false;
+ return (0);
+}
+
+static int
+redist_typer_write(void *vm, int vcpuid, uint64_t fault_ipa, uint64_t wval,
+ int size, void *arg)
+{
+ bool *retu = arg;
+
+ eprintf("Warning: Attempted write to read-only register GICR_TYPER.\n");
+
+ *retu = false;
+ return (0);
+}
+
+static int
+redist_waker_read(void *vm, int vcpuid, uint64_t fault_ipa, uint64_t *rval,
+ int size, void *arg)
+{
+ bool *retu = arg;
+
+ /* Redistributor is always awake */
+ *rval = 0 & ~GICR_WAKER_PS & ~GICR_WAKER_CA;
+
+#if (DEBUG > 0)
+ eprintf("\n");
+#endif
+
+ *retu = false;
+ return (0);
+}
+
+static int
+redist_waker_write(void *vm, int vcpuid, uint64_t fault_ipa, uint64_t wval,
+ int size, void *arg)
+{
+ bool *retu = arg;
+
+ /* Ignore writes */
+#if (DEBUG > 0)
+ eprintf("\n");
+#endif
+
+ *retu = false;
+ return (0);
+}
+
+/* Only group 1 interrupts are supported. Treat IGROUPR0 as RA0/WI. */
+static int
+redist_igroupr0_read(void *vm, int vcpuid, uint64_t fault_ipa, uint64_t *rval,
+ int size, void *arg)
+{
+ bool *retu = arg;
+
+ *rval = RES1;
+ *retu = false;
+ return (0);
+}
+
+/* Only group 1 interrupts are supported. Treat IGROUPR0 as RA0/WI. */
+static int
+redist_igroupr0_write(void *vm, int vcpuid, uint64_t fault_ipa, uint64_t wval,
+ int size, void *arg)
+{
+ bool *retu = arg;
+
+ if (wval == 0UL)
+ printf("Warning: Interrupts marked as group 0, ignoring\n");
+
+ *retu = false;
+ return (0);
+}
+
+static int
+redist_ixenabler_read(void *vm, int vcpuid, uint64_t *rval, void *arg,
+ enum vgic_mmio_region_name reg)
+{
+ struct hyp *hyp;
+ struct vgic_v3_redist *redist;
+ bool *retu = arg;
+
+ hyp = vm_get_cookie(vm);
+ redist = &hyp->ctx[vcpuid].vgic_redist;
+
+ *rval = redist->gicr_ixenabler0;
+
+ *retu = false;
+ return (0);
+}
+
+static int
+redist_ixenabler_write(void *vm, int vcpuid, uint64_t wval, void *arg,
+ enum vgic_mmio_region_name reg)
+{
+ struct hyp *hyp;
+ struct vgic_v3_redist *redist;
+ uint32_t old_ixenabler0, new_ixenabler0;
+ bool *retu = arg;
+
+ hyp = vm_get_cookie(vm);
+ redist = &hyp->ctx[vcpuid].vgic_redist;
+
+ old_ixenabler0 = redist->gicr_ixenabler0;
+ if (reg == VGIC_GICR_ICENABLER0)
+ new_ixenabler0 = old_ixenabler0 & ~wval;
+ else
+ new_ixenabler0 = old_ixenabler0 | wval;
+ mmio_update_int_enabled(new_ixenabler0, old_ixenabler0, 0, hyp, vcpuid);
+ redist->gicr_ixenabler0 = new_ixenabler0;
+
+ *retu = false;
+ return (0);
+}
+
+
+static int
+redist_isenabler0_read(void *vm, int vcpuid, uint64_t fault_ipa, uint64_t *rval,
+ int size, void *arg)
+{
+#if (DEBUG > 0)
+ eprintf("\n");
+#endif
+ return (redist_ixenabler_read(vm, vcpuid, rval, arg,
+ VGIC_GICR_ISENABLER0));
+}
+
+static int
+redist_isenabler0_write(void *vm, int vcpuid, uint64_t fault_ipa, uint64_t wval,
+ int size, void *arg)
+{
+#if (DEBUG > 0)
+ eprintf("\n");
+#endif
+ return (redist_ixenabler_write(vm, vcpuid, wval, arg,
+ VGIC_GICR_ISENABLER0));
+}
+
+static int
+redist_icenabler0_read(void *vm, int vcpuid, uint64_t fault_ipa, uint64_t *rval,
+ int size, void *arg)
+{
+#if (DEBUG > 0)
+ eprintf("\n");
+#endif
+ return (redist_ixenabler_read(vm, vcpuid, rval, arg,
+ VGIC_GICR_ICENABLER0));
+}
+
+static int
+redist_icenabler0_write(void *vm, int vcpuid, uint64_t fault_ipa, uint64_t wval,
+ int size, void *arg)
+{
+#if (DEBUG > 0)
+ eprintf("\n");
+#endif
+ return (redist_ixenabler_write(vm, vcpuid, wval, arg,
+ VGIC_GICR_ICENABLER0));
+}
+
+static int
+redist_ipriorityr_read(void *vm, int vcpuid, uint64_t fault_ipa, uint64_t *rval,
+ int size, void *arg)
+{
+ struct hyp *hyp;
+ struct vgic_v3_redist *redist;
+ size_t n;
+ bool *retu = arg;
+
+#if (DEBUG > 0)
+ eprintf("\n");
+#endif
+
+ hyp = vm_get_cookie(vm);
+ redist = &hyp->ctx[vcpuid].vgic_redist;
+
+ n = reg32_idx(fault_ipa, hyp->vgic_mmio_regions[VGIC_GICR_IPRIORITYR]);
+ *rval = redist->gicr_ipriorityr[n];
+
+ *retu = false;
+ return (0);
+}
+
+static int
+redist_ipriorityr_write(void *vm, int vcpuid, uint64_t fault_ipa, uint64_t wval,
+ int size, void *arg)
+{
+ struct hyp *hyp;
+ struct vgic_v3_redist *redist;
+ size_t n;
+ bool *retu = arg;
+
+#if (DEBUG > 0)
+ eprintf("\n");
+#endif
+
+ hyp = vm_get_cookie(vm);
+ redist = &hyp->ctx[vcpuid].vgic_redist;
+
+ n = reg32_idx(fault_ipa, hyp->vgic_mmio_regions[VGIC_GICR_IPRIORITYR]);
+ redist->gicr_ipriorityr[n] = wval;
+
+ *retu = false;
+ return (0);
+}
+
+static int
+redist_pidr2_read(void *vm, int vcpuid, uint64_t fault_ipa, uint64_t *rval,
+ int size, void *arg)
+{
+ struct hyp *hyp;
+ struct vgic_v3_dist *dist;
+ bool *retu = arg;
+
+ hyp = vm_get_cookie(vm);
+ dist = &hyp->vgic_dist;
+
+ /* GICR_PIDR2 has the same value as GICD_PIDR2 */
+ *rval = dist->gicd_pidr2;
+#if (DEBUG > 0)
+ eprintf("\n");
+#endif
+
+ *retu = false;
+ return (0);
+}
+
+static int
+redist_pidr2_write(void *vm, int vcpuid, uint64_t fault_ipa, uint64_t wval,
+ int size, void *arg)
+{
+ bool *retu = arg;
+
+ eprintf("Warning: Attempted write to read-only register GICR_PIDR2.\n");
+
+ *retu = false;
+ return (0);
+}
+
+static int
+redist_icfgr0_read(void *vm, int vcpuid, uint64_t fault_ipa, uint64_t *rval,
+ int size, void *arg)
+{
+ bool *retu = arg;
+
+ redist_simple_read(gicr_icfgr0, rval, vm, vcpuid);
+
+ *retu = false;
+ return (0);
+}
+
+static int
+redist_icfgr0_write(void *vm, int vcpuid, uint64_t fault_ipa, uint64_t wval,
+ int size, void *arg)
+{
+ bool *retu = arg;
+
+ redist_simple_write(wval, gicr_icfgr0, vm, vcpuid);
+
+ *retu = false;
+ return (0);
+}
+
+static int
+redist_icfgr1_read(void *vm, int vcpuid, uint64_t fault_ipa, uint64_t *rval,
+ int size, void *arg)
+{
+ bool *retu = arg;
+
+ redist_simple_read(gicr_icfgr0, rval, vm, vcpuid);
+
+ *retu = false;
+ return (0);
+}
+
+static int
+redist_icfgr1_write(void *vm, int vcpuid, uint64_t fault_ipa, uint64_t wval,
+ int size, void *arg)
+{
+ bool *retu = arg;
+
+ redist_simple_write(wval, gicr_icfgr0, vm, vcpuid);
+
+ *retu = false;
+ return (0);
+}
+
+#define alloc_registers(regs, num, size) \
+do { \
+ size = n * sizeof(*regs); \
+ regs = malloc(size, M_VGIC_V3_MMIO, M_WAITOK | M_ZERO); \
+} while (0)
+
+#define div_round_up(n, div) (((n) + (div) - 1) / (div))
+
+static inline void
+init_mmio_region(struct hyp *hyp, size_t regidx, vm_offset_t start,
+ size_t size, mem_region_read_t read_fn, mem_region_write_t write_fn)
+{
+ hyp->vgic_mmio_regions[regidx] = (struct vgic_mmio_region) {
+ .start = start,
+ .end = start + size,
+ .read = read_fn,
+ .write = write_fn,
+ };
+}
+
+static void
+dist_mmio_init_regions(struct vgic_v3_dist *dist, struct hyp *hyp)
+{
+ size_t n;
+ size_t region_size;
+
+ init_mmio_region(hyp, VGIC_GICD_CTLR, dist->start + GICD_CTLR,
+ sizeof(dist->gicd_ctlr), dist_ctlr_read, dist_ctlr_write);
+ init_mmio_region(hyp, VGIC_GICD_TYPER, dist->start + GICD_TYPER,
+ sizeof(dist->gicd_typer), dist_typer_read, dist_typer_write);
+
+ n = div_round_up(dist->nirqs, 32);
+ init_mmio_region(hyp, VGIC_GICD_IGROUPR, dist->start + GICD_IGROUPR_BASE,
+ n * sizeof(uint32_t), dist_igroupr_read, dist_igroupr_write);
+
+ /* ARM GIC Architecture Specification, page 8-471. */
+ n = (dist->gicd_typer & GICD_TYPER_ITLINESNUM_MASK) + 1;
+ alloc_registers(dist->gicd_ixenabler, n , region_size);
+ init_mmio_region(hyp, VGIC_GICD_ISENABLER, dist->start + GICD_ISENABLER_BASE,
+ region_size, dist_isenabler_read, dist_isenabler_write);
+ init_mmio_region(hyp, VGIC_GICD_ICENABLER, dist->start + GICD_ICENABLER_BASE,
+ region_size, dist_icenabler_read, dist_icenabler_write);
+
+ /* ARM GIC Architecture Specification, page 8-483. */
+ n = 8 * ((dist->gicd_typer & GICD_TYPER_ITLINESNUM_MASK) + 1);
+ alloc_registers(dist->gicd_ipriorityr, n, region_size);
+ init_mmio_region(hyp, VGIC_GICD_IPRIORITYR, dist->start + GICD_IPRIORITYR_BASE,
+ region_size, dist_ipriorityr_read, dist_ipriorityr_write);
+
+ n = div_round_up(dist->nirqs, 16);
+ alloc_registers(dist->gicd_icfgr, n, region_size);
+ init_mmio_region(hyp, VGIC_GICD_ICFGR, dist->start + GICD_ICFGR_BASE,
+ region_size, dist_icfgr_read, dist_icfgr_write);
+
+ /* ARM GIC Architecture Specification, page 8-485. */
+ n = 32 * (dist->gicd_typer & GICD_TYPER_ITLINESNUM_MASK + 1) - 1;
+ alloc_registers(dist->gicd_irouter, n, region_size);
+ init_mmio_region(hyp, VGIC_GICD_IROUTER, dist->start + GICD_IROUTER_BASE,
+ region_size, dist_irouter_read, dist_irouter_write);
+
+ init_mmio_region(hyp, VGIC_GICD_PIDR2, dist->start + GICD_PIDR2,
+ sizeof(dist->gicd_pidr2), dist_pidr2_read, dist_pidr2_write);
+}
+
+static void
+redist_mmio_init_regions(struct hyp *hyp, int vcpuid)
+{
+ struct vgic_v3_redist *redist;
+ vm_offset_t start;
+
+ redist = &hyp->ctx[vcpuid].vgic_redist;
+ start = redist->start + GICR_FRAME_RD + GICR_CTLR;
+ /*
+ hyp->vgic_mmio_regions[VGIC_GICR_CTLR] = (struct vgic_mmio_region) {
+ .start = start,
+ .end = start + sizeof(redist->gicr_ctlr),
+ .read = redist_ctlr_read,
+ .write = redist_ctlr_write,
+ };
+ */
+ init_mmio_region(hyp, VGIC_GICR_CTLR, start, sizeof(redist->gicr_ctlr),
+ redist_ctlr_read, redist_ctlr_write);
+
+ start = redist->start + GICR_FRAME_RD + GICR_TYPER;
+ init_mmio_region(hyp, VGIC_GICR_TYPER, start, sizeof(redist->gicr_typer),
+ redist_typer_read, redist_typer_write);
+
+ start = redist->start + GICR_FRAME_RD + GICR_WAKER;
+ init_mmio_region(hyp, VGIC_GICR_WAKER, start, 4, redist_waker_read,
+ redist_waker_write);
+
+ start = redist->start + GICR_FRAME_RD + GICR_PIDR2;
+ init_mmio_region(hyp, VGIC_GICR_PIDR2, start, 4, redist_pidr2_read,
+ redist_pidr2_write);
+
+ start = redist->start + GICR_FRAME_SGI + GICR_IGROUPR0;
+ init_mmio_region(hyp, VGIC_GICR_IGROUPR0, start,
+ sizeof(uint32_t), redist_igroupr0_read, redist_igroupr0_write);
+
+ start = redist->start + GICR_FRAME_SGI + GICR_ISENABLER0;
+ init_mmio_region(hyp, VGIC_GICR_ISENABLER0, start,
+ sizeof(redist->gicr_ixenabler0), redist_isenabler0_read,
+ redist_isenabler0_write);
+
+ start = redist->start + GICR_FRAME_SGI + GICR_ICENABLER0;
+ init_mmio_region(hyp, VGIC_GICR_ICENABLER0, start,
+ sizeof(redist->gicr_ixenabler0), redist_icenabler0_read,
+ redist_icenabler0_write);
+
+ start = redist->start + GICR_FRAME_SGI + GICR_IPRIORITYR_BASE;
+ init_mmio_region(hyp, VGIC_GICR_IPRIORITYR, start,
+ sizeof(redist->gicr_ipriorityr), redist_ipriorityr_read,
+ redist_ipriorityr_write);
+
+ start = redist->start + GICR_FRAME_SGI + GICR_ICFGR0_BASE;
+ init_mmio_region(hyp, VGIC_GICR_ICFGR0, start,
+ sizeof(redist->gicr_icfgr0), redist_icfgr0_read, redist_icfgr0_write);
+
+ start = redist->start + GICR_FRAME_SGI + GICR_ICFGR1_BASE;
+ init_mmio_region(hyp, VGIC_GICR_ICFGR1, start,
+ sizeof(redist->gicr_icfgr1), redist_icfgr1_read, redist_icfgr1_write);
+}
+
+void
+vgic_v3_mmio_init(struct hyp *hyp)
+{
+ struct vgic_v3_dist *dist = &hyp->vgic_dist;
+ int redist_region_num, dist_region_num, region_num;
+ int ncpus = 1;
+
+ dist_region_num = FIRST_REDIST_MMIO_REGION;
+ redist_region_num = \
+ ncpus * (VGIC_MMIO_REGIONS_NUM - FIRST_REDIST_MMIO_REGION);
+ region_num = dist_region_num + redist_region_num;
+
+ hyp->vgic_mmio_regions = \
+ malloc(region_num * sizeof(*hyp->vgic_mmio_regions),
+ M_VGIC_V3_MMIO, M_WAITOK | M_ZERO);
+ hyp->vgic_mmio_regions_num = region_num;
+
+ dist_mmio_init_regions(dist, hyp);
+
+ /* TODO: Do it for all VCPUs */
+ redist_mmio_init_regions(hyp, 0);
+}
+
+void
+vgic_v3_mmio_destroy(struct hyp *hyp)
+{
+ struct vgic_v3_dist *dist = &hyp->vgic_dist;
+
+ if (!hyp->vgic_mmio_regions)
+ return;
+ free(hyp->vgic_mmio_regions, M_VGIC_V3_MMIO);
+
+ free(dist->gicd_ixenabler, M_VGIC_V3_MMIO);
+ free(dist->gicd_ipriorityr, M_VGIC_V3_MMIO);
+ free(dist->gicd_icfgr, M_VGIC_V3_MMIO);
+ free(dist->gicd_irouter, M_VGIC_V3_MMIO);
+}
Index: sys/arm64/vmm/io/vgic_v3_reg.h
===================================================================
--- /dev/null
+++ sys/arm64/vmm/io/vgic_v3_reg.h
@@ -0,0 +1,97 @@
+#ifndef _VGIC_V3_REG_H_
+#define _VGIC_V3_REG_H_
+
+/* Interrupt Controller End of Interrupt Status Register */
+#define ICH_EISR_EL2_STATUS_MASK 0xffff
+#define ICH_EISR_EL2_EOI_NOT_HANDLED(lr) ((1 << lr) & ICH_EISR_EL2_STATUS_MASK)
+
+/* Interrupt Controller Empty List Register Status Register */
+#define ICH_ELRSR_EL2_STATUS_MASK 0xffff
+#define ICH_ELRSR_EL2_LR_EMPTY(x) ((1 << x) & ICH_ELRSR_EL2_STATUS_MASK)
+
+/* Interrupt Controller Hyp Control Register */
+#define ICH_HCR_EL2_EOICOUNT_SHIFT 27
+#define ICH_HCR_EL2_EOICOUNT_MASK (0x1f << ICH_HCR_EL2_EOICOUNT_SHIFT)
+#define ICH_HCR_EL2_TDIR (1 << 14) /* Trap non-secure EL1 writes to IC{C, V}_DIR_EL1 */
+#define ICH_HCR_EL2_TSEI (1 << 14) /* Trap System Error Interupts (SEI) to EL2 */
+#define ICH_HCR_EL2_TALL1 (1 << 12) /* Trap non-secure EL1 accesses to IC{C, V}_* for Group 1 interrupts */
+#define ICH_HCR_EL2_TALL0 (1 << 11) /* Trap non-secure EL1 accesses to IC{C, V}_* for Group 0 interrupts */
+#define ICH_HCR_EL2_TC (1 << 10) /* Trap non-secure EL1 accesses to common IC{C, V}_* registers */
+#define ICH_HCR_EL2_VGRP1DIE (1 << 7) /* VM Group 1 Disabled Interrupt Enable */
+#define ICH_HCR_EL2_VGRP1EIE (1 << 6) /* VM Group 1 Enabled Interrupt Enable */
+#define ICH_HCR_EL2_VGRP0DIE (1 << 5) /* VM Group 0 Disabled Interrupt Enable */
+#define ICH_HCR_EL2_VGRP0EIE (1 << 4) /* VM Group 0 Enabled Interrupt Enable */
+#define ICH_HCR_EL2_NPIE (1 << 3) /* No Pending Interrupt Enable */
+#define ICH_HCR_EL2_LRENPIE (1 << 2) /* List Register Entry Not Present Interrupt Enable */
+#define ICH_HCR_EL2_UIE (1 << 1) /* Underflow Interrupt Enable */
+#define ICH_HCR_EL2_En (1 << 0) /* Global enable for the virtual CPU interface */
+
+/* Interrupt Controller List Registers */
+#define ICH_LR_EL2_VINTID_MASK 0xffffffff
+#define ICH_LR_EL2_VINTID(x) ((x) & ICH_LR_EL2_VINTID_MASK)
+#define ICH_LR_EL2_PINTID_SHIFT 32
+#define ICH_LR_EL2_PINTID_MASK (0x3fUL << ICH_LR_EL2_PINTID_SHIFT)
+#define ICH_LR_EL2_PRIO_SHIFT 48
+#define ICH_LR_EL2_PRIO_MASK (0xffUL << ICH_LR_EL2_PRIO_SHIFT)
+#define ICH_LR_EL2_GROUP_SHIFT 60
+#define ICH_LR_EL2_GROUP1 (1UL << ICH_LR_EL2_GROUP_SHIFT)
+#define ICH_LR_EL2_HW (1UL << 61)
+#define ICH_LR_EL2_STATE_SHIFT 62
+#define ICH_LR_EL2_STATE_MASK (0x3UL << ICH_LR_EL2_STATE_SHIFT)
+#define ICH_LR_EL2_STATE(x) ((x) & ICH_LR_EL2_STATE_MASK)
+#define ICH_LR_EL2_STATE_INACTIVE (0x0UL << ICH_LR_EL2_STATE_SHIFT)
+#define ICH_LR_EL2_STATE_PENDING (0x1UL << ICH_LR_EL2_STATE_SHIFT)
+#define ICH_LR_EL2_STATE_ACTIVE (0x2UL << ICH_LR_EL2_STATE_SHIFT)
+#define ICH_LR_EL2_STATE_PENDING_ACTIVE (0x3UL << ICH_LR_EL2_STATE_SHIFT)
+
+/* Interrupt Controller Maintenance Interrupt State Register */
+#define ICH_MISR_EL2_VGRP1D (1 << 7) /* vPE Group 1 Disabled */
+#define ICH_MISR_EL2_VGRP1E (1 << 6) /* vPE Group 1 Enabled */
+#define ICH_MISR_EL2_VGRP0D (1 << 5) /* vPE Group 0 Disabled */
+#define ICH_MISR_EL2_VGRP0E (1 << 4) /* vPE Group 0 Enabled */
+#define ICH_MISR_EL2_NP (1 << 3) /* No Pending */
+#define ICH_MISR_EL2_LRENP (1 << 2) /* List Register Entry Not Present */
+#define ICH_MISR_EL2_U (1 << 1) /* Underflow */
+#define ICH_MISR_EL2_EOI (1 << 0) /* End Of Interrupt */
+
+/* Interrupt Controller Virtual Machine Control Register */
+#define ICH_VMCR_EL2_VPMR_SHIFT 24
+#define ICH_VMCR_EL2_VPMR_MASK (0xff << ICH_VMCR_EL2_VPMR_SHIFT)
+#define ICH_VMCR_EL2_VPMR_PRIO_LOWEST (0xff << ICH_VMCR_EL2_VPMR_SHIFT)
+#define ICH_VMCR_EL2_VPMR_PRIO_HIGHEST (0x00 << ICH_VMCR_EL2_VPMR_SHIFT)
+#define ICH_VMCR_EL2_VBPR0_SHIFT 21
+#define ICH_VMCR_EL2_VBPR0_MASK (0x7 << ICH_VMCR_EL2_VBPR0_SHIFT)
+#define ICH_VMCR_EL2_VBPR0_NO_PREEMPTION \
+ (0x7 << ICH_VMCR_EL2_VBPR0_SHIFT)
+#define ICH_VMCR_EL2_VBPR1_SHIFT 18
+#define ICH_VMCR_EL2_VBPR1_MASK (0x7 << ICH_VMCR_EL2_VBPR1_SHIFT)
+#define ICH_VMCR_EL2_VBPR1_NO_PREEMPTION \
+ (0x7 << ICH_VMCR_EL2_VBPR1_SHIFT)
+#define ICH_VMCR_EL2_VEOIM (1 << 9) /* Virtual EOI mode */
+#define ICH_VMCR_EL2_VCBPR (1 << 4) /* Virtual Common binary Point Register */
+#define ICH_VMCR_EL2_VFIQEN (1 << 3) /* Virtual FIQ enable */
+#define ICH_VMCR_EL2_VACKCTL (1 << 2) /* Virtual AckCtl */
+#define ICH_VMCR_EL2_VENG1 (1 << 1) /* Virtual Group 1 Interrupt Enable */
+#define ICH_VMCR_EL2_VENG0 (1 << 0) /* Virtual Group 0 Interrupt Enable */
+
+/* Interrupt Controller VGIC Type Register */
+#define ICH_VTR_EL2_PRIBITS_SHIFT 29
+#define ICH_VTR_EL2_PRIBITS_MASK (0x7 << ICH_VTR_EL2_PRIBITS_SHIFT)
+#define ICH_VTR_EL2_PRIBITS(x) \
+ ((((x) & ICH_VTR_EL2_PRIBITS_MASK) >> ICH_VTR_EL2_PRIBITS_SHIFT) + 1)
+#define ICH_VTR_EL2_PREBITS_SHIFT 26
+#define ICH_VTR_EL2_PREBITS_MASK (0x7 << ICH_VTR_EL2_PREBITS_SHIFT)
+#define ICH_VTR_EL2_PREBITS(x) \
+ (((x) & ICH_VTR_EL2_PREBITS_MASK) >> ICH_VTR_EL2_PREBITS_SHIFT)
+#define ICH_VTR_EL2_SEIS (1 << 22) /* System Error Interrupt (SEI) Support */
+#define ICH_VTR_EL2_A3V (1 << 21) /* Affinity 3 Valid */
+#define ICH_VTR_EL2_NV4 (1 << 20) /* Direct injection of virtual interrupts. RES1 for GICv3 */
+#define ICH_VTR_EL2_TDS (1 << 19) /* Implementation supports ICH_HCR_EL2.TDIR */
+#define ICH_VTR_EL2_LISTREGS_MASK 0x1f
+/*
+ * ICH_VTR_EL2.ListRegs holds the number of list registers, minus one. Add one
+ * to get the actual number of list registers.
+ */
+#define ICH_VTR_EL2_LISTREGS(x) (((x) & ICH_VTR_EL2_LISTREGS_MASK) + 1)
+
+#endif /* !_VGIC_V3_REG_H_ */
Index: sys/arm64/vmm/io/vtimer.h
===================================================================
--- /dev/null
+++ sys/arm64/vmm/io/vtimer.h
@@ -0,0 +1,112 @@
+/*-
+ * Copyright (c) 2017 The FreeBSD Foundation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. The name of the company nor the name of the author may be used to
+ * endorse or promote products derived from this software without specific
+ * prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef _VMM_VTIMER_H_
+#define _VMM_VTIMER_H_
+
+#define GT_PHYS_NS_IRQ 30
+#define GT_VIRT_IRQ 27
+
+#define CNTP_CTL_EL0_OP0 0b11
+#define CNTP_CTL_EL0_OP2 0b001
+#define CNTP_CTL_EL0_OP1 0b011
+#define CNTP_CTL_EL0_CRn 0b1110
+#define CNTP_CTL_EL0_CRm 0b0010
+#define ISS_CNTP_CTL_EL0 \
+ (CNTP_CTL_EL0_OP0 << ISS_MSR_OP0_SHIFT | \
+ CNTP_CTL_EL0_OP2 << ISS_MSR_OP2_SHIFT | \
+ CNTP_CTL_EL0_OP1 << ISS_MSR_OP1_SHIFT | \
+ CNTP_CTL_EL0_CRn << ISS_MSR_CRn_SHIFT | \
+ CNTP_CTL_EL0_CRm << ISS_MSR_CRm_SHIFT)
+
+#define CNTP_CVAL_EL0_OP0 0b11
+#define CNTP_CVAL_EL0_OP1 0b011
+#define CNTP_CVAL_EL0_OP2 0b010
+#define CNTP_CVAL_EL0_CRn 0b1110
+#define CNTP_CVAL_EL0_CRm 0b0010
+#define ISS_CNTP_CVAL_EL0 \
+ (CNTP_CVAL_EL0_OP0 << ISS_MSR_OP0_SHIFT | \
+ CNTP_CVAL_EL0_OP2 << ISS_MSR_OP2_SHIFT | \
+ CNTP_CVAL_EL0_OP1 << ISS_MSR_OP1_SHIFT | \
+ CNTP_CVAL_EL0_CRn << ISS_MSR_CRn_SHIFT | \
+ CNTP_CVAL_EL0_CRm << ISS_MSR_CRm_SHIFT)
+
+#define CNTP_TVAL_EL0_OP0 0b11
+#define CNTP_TVAL_EL0_OP1 0b011
+#define CNTP_TVAL_EL0_OP2 0b000
+#define CNTP_TVAL_EL0_CRn 0b1110
+#define CNTP_TVAL_EL0_CRm 0b0010
+#define ISS_CNTP_TVAL_EL0 \
+ (CNTP_TVAL_EL0_OP0 << ISS_MSR_OP0_SHIFT | \
+ CNTP_TVAL_EL0_OP2 << ISS_MSR_OP2_SHIFT | \
+ CNTP_TVAL_EL0_OP1 << ISS_MSR_OP1_SHIFT | \
+ CNTP_TVAL_EL0_CRn << ISS_MSR_CRn_SHIFT | \
+ CNTP_TVAL_EL0_CRm << ISS_MSR_CRm_SHIFT)
+
+struct vtimer
+{
+ uint64_t cnthctl_el2;
+ uint64_t cntvoff_el2;
+};
+
+struct vtimer_cpu
+{
+ struct callout callout;
+ uint32_t cntkctl_el1;
+ /*
+ * Emulated registers:
+ *
+ * CNTP_CTL_EL0: Counter-timer Physical Timer Control Register
+ * CNTP_CVAL_EL0: Counter-timer Physical Timer CompareValue Register
+ */
+ uint64_t cntp_cval_el0;
+ uint32_t cntp_ctl_el0;
+ /*
+ * The virtual machine has full access to the virtual timer. The
+ * following registers are part of the VM context for the current CPU:
+ *
+ * CNTV_CTL_EL0: Counter-timer Virtuel Timer Control Register
+ * CNTV_CVAL_EL0: Counter-timer Virtual Timer CompareValue Register
+ */
+ uint64_t cntv_cval_el0;
+ uint32_t cntv_ctl_el0;
+};
+
+int vtimer_init(uint64_t cnthctl_el2);
+void vtimer_vminit(void *arg);
+void vtimer_cpuinit(void *arg);
+void vtimer_vmcleanup(void *arg);
+void vtimer_cleanup(void);
+
+int vtimer_phys_ctl_read(void *vm, int vcpuid, uint64_t *rval, void *arg);
+int vtimer_phys_ctl_write(void *vm, int vcpuid, uint64_t wval, void *arg);
+int vtimer_phys_cval_read(void *vm, int vcpuid, uint64_t *rval, void *arg);
+int vtimer_phys_cval_write(void *vm, int vcpuid, uint64_t wval, void *arg);
+int vtimer_phys_tval_read(void *vm, int vcpuid, uint64_t *rval, void *arg);
+int vtimer_phys_tval_write(void *vm, int vcpuid, uint64_t wval, void *arg);
+#endif
Index: sys/arm64/vmm/io/vtimer.c
===================================================================
--- /dev/null
+++ sys/arm64/vmm/io/vtimer.c
@@ -0,0 +1,407 @@
+/*-
+ * Copyright (c) 2017 The FreeBSD Foundation
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. The name of the company nor the name of the author may be used to
+ * endorse or promote products derived from this software without specific
+ * prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/types.h>
+#include <sys/bus.h>
+#include <sys/mutex.h>
+#include <sys/systm.h>
+#include <sys/time.h>
+#include <sys/timeet.h>
+#include <sys/timetc.h>
+
+#include <machine/bus.h>
+#include <machine/vmm.h>
+#include <machine/armreg.h>
+
+#include <arm/arm/generic_timer.h>
+#include <arm64/vmm/arm64.h>
+
+#include "vgic_v3.h"
+#include "vtimer.h"
+
+#define RES1 0xffffffffffffffffUL
+
+#define timer_enabled(ctl) \
+ (!((ctl) & CNTP_CTL_IMASK) && ((ctl) & CNTP_CTL_ENABLE))
+
+static uint64_t cnthctl_el2_reg;
+static uint32_t tmr_frq;
+
+#define timer_condition_met(ctl) ((ctl) & CNTP_CTL_ISTATUS)
+
+static int
+vtimer_virtual_timer_intr(void *arg)
+{
+ struct hypctx *hypctx;
+ uint32_t cntv_ctl;
+
+ /*
+ * TODO everything here is very strange. The relantionship between the
+ * hardware value and the value in memory is not clear at all.
+ */
+
+ hypctx = arm64_get_active_vcpu();
+ cntv_ctl = READ_SPECIALREG(cntv_ctl_el0);
+
+ if (!hypctx) {
+ /* vm_destroy() was called. */
+ eprintf("No active vcpu\n");
+ cntv_ctl = READ_SPECIALREG(cntv_ctl_el0);
+ goto out;
+ }
+ if (!timer_enabled(cntv_ctl)) {
+ eprintf("Timer not enabled\n");
+ goto out;
+ }
+ if (!timer_condition_met(cntv_ctl)) {
+ eprintf("Timer condition not met\n");
+ goto out;
+ }
+
+ vgic_v3_inject_irq(hypctx, GT_VIRT_IRQ, VGIC_IRQ_CLK);
+
+ hypctx->vtimer_cpu.cntv_ctl_el0 &= ~CNTP_CTL_ENABLE;
+ cntv_ctl = hypctx->vtimer_cpu.cntv_ctl_el0;
+
+out:
+ /*
+ * Disable the timer interrupt. This will prevent the interrupt from
+ * being reasserted as soon as we exit the handler and getting stuck
+ * in an infinite loop.
+ *
+ * This is safe to do because the guest disabled the timer, and then
+ * enables it as part of the interrupt handling routine.
+ */
+ cntv_ctl &= ~CNTP_CTL_ENABLE;
+ WRITE_SPECIALREG(cntv_ctl_el0, cntv_ctl);
+
+ return (FILTER_HANDLED);
+}
+
+int
+vtimer_init(uint64_t cnthctl_el2)
+{
+ int error;
+
+ cnthctl_el2_reg = cnthctl_el2;
+ /*
+ * The guest *MUST* use the same timer frequency as the host. The
+ * register CNTFRQ_EL0 is accessible to the guest and a different value
+ * in the guest dts file might have unforseen consequences.
+ */
+ tmr_frq = READ_SPECIALREG(cntfrq_el0);
+
+ error = arm_tmr_setup_intr(GT_VIRT, vtimer_virtual_timer_intr, NULL, NULL);
+ if (error) {
+ printf("WARNING: arm_tmr_setup_intr() error: %d\n", error);
+ printf("WARNING: Expect reduced performance\n");
+ }
+
+ return (0);
+}
+
+void
+vtimer_vminit(void *arg)
+{
+ struct hyp *hyp;
+ uint64_t now;
+
+ hyp = (struct hyp *)arg;
+ /*
+ * Configure the Counter-timer Hypervisor Control Register for the VM.
+ *
+ * ~CNTHCTL_EL1PCEN: trap access to CNTP_{CTL, CVAL, TVAL}_EL0 from EL1
+ * CNTHCTL_EL1PCTEN: don't trap access to CNTPCT_EL0
+ */
+ hyp->vtimer.cnthctl_el2 = cnthctl_el2_reg & ~CNTHCTL_EL1PCEN;
+ hyp->vtimer.cnthctl_el2 |= CNTHCTL_EL1PCTEN;
+
+ now = READ_SPECIALREG(cntpct_el0);
+ hyp->vtimer.cntvoff_el2 = now;
+
+ return;
+}
+
+void
+vtimer_cpuinit(void *arg)
+{
+ struct hypctx *hypctx;
+ struct vtimer_cpu *vtimer_cpu;
+
+ hypctx = (struct hypctx *)arg;
+ vtimer_cpu = &hypctx->vtimer_cpu;
+ /*
+ * Configure physical timer interrupts for the VCPU.
+ *
+ * CNTP_CTL_IMASK: mask interrupts
+ * ~CNTP_CTL_ENABLE: disable the timer
+ */
+ vtimer_cpu->cntp_ctl_el0 = CNTP_CTL_IMASK & ~CNTP_CTL_ENABLE;
+ /*
+ * Callout function is MP_SAFE because the VGIC uses a spin
+ * mutex when modifying the list registers.
+ */
+ callout_init(&vtimer_cpu->callout, 1);
+}
+
+void
+vtimer_vmcleanup(void *arg)
+{
+ struct hyp *hyp;
+ struct hypctx *hypctx;
+ struct vtimer *vtimer;
+ struct vtimer_cpu *vtimer_cpu;
+ uint32_t cntv_ctl;
+ int i;
+
+ hyp = arg;
+ vtimer = &hyp->vtimer;
+
+ hypctx = arm64_get_active_vcpu();
+ if (!hypctx) {
+ /* The active VM was destroyed, stop the timer. */
+ cntv_ctl = READ_SPECIALREG(cntv_ctl_el0);
+ cntv_ctl &= ~CNTP_CTL_ENABLE;
+ WRITE_SPECIALREG(cntv_ctl_el0, cntv_ctl);
+ }
+
+ for (i = 0; i < VM_MAXCPU; i++) {
+ vtimer_cpu = &hyp->ctx[i].vtimer_cpu;
+ callout_drain(&vtimer_cpu->callout);
+ }
+}
+
+void
+vtimer_cleanup(void)
+{
+ int error;
+
+ error = arm_tmr_teardown_intr(GT_VIRT);
+ if (error)
+ printf("WARNING: arm_tmr_teardown_intr() error: %d\n", error);
+
+}
+
+static void
+vtimer_inject_irq_callout_func(void *context)
+{
+ struct hypctx *hypctx;
+
+ hypctx = context;
+ vgic_v3_inject_irq(hypctx, GT_PHYS_NS_IRQ, VGIC_IRQ_CLK);
+}
+
+
+static void
+vtimer_schedule_irq(struct vtimer_cpu *vtimer_cpu, struct hypctx *hypctx)
+{
+ sbintime_t time;
+ uint64_t cntpct_el0;
+ uint64_t diff;
+
+ cntpct_el0 = READ_SPECIALREG(cntpct_el0);
+ if (vtimer_cpu->cntp_cval_el0 < cntpct_el0) {
+ /* Timer set in the past, trigger interrupt */
+ vgic_v3_inject_irq(hypctx, GT_PHYS_NS_IRQ, VGIC_IRQ_CLK);
+ } else {
+ diff = vtimer_cpu->cntp_cval_el0 - cntpct_el0;
+ time = diff * SBT_1S / tmr_frq;
+ callout_reset_sbt(&vtimer_cpu->callout, time, 0,
+ vtimer_inject_irq_callout_func, hypctx, 0);
+ }
+}
+
+static void
+vtimer_remove_irq(struct hypctx *hypctx)
+{
+ struct vtimer_cpu *vtimer_cpu;
+
+ vtimer_cpu = &hypctx->vtimer_cpu;
+
+ callout_drain(&vtimer_cpu->callout);
+ /*
+ * The interrupt needs to be deactivated here regardless of the callout
+ * function having been executed. The timer interrupt can be masked with
+ * the CNTP_CTL_EL0.IMASK bit instead of reading the IAR register.
+ * Masking the interrupt doesn't remove it from the list registers.
+ */
+ vgic_v3_remove_irq(hypctx, GT_PHYS_NS_IRQ, true);
+}
+
+/*
+ * Timer emulation functions.
+ *
+ * The guest dts is configured to use the physical timer because the Generic
+ * Timer can only trap physical timer accesses. This is why we always read the
+ * physical counter value when programming the time for the timer interrupt in
+ * the guest.
+ */
+
+int
+vtimer_phys_ctl_read(void *vm, int vcpuid, uint64_t *rval, void *arg)
+{
+ struct hyp *hyp;
+ struct vtimer_cpu *vtimer_cpu;
+ uint64_t cntpct_el0;
+ bool *retu = arg;
+
+ hyp = vm_get_cookie(vm);
+ vtimer_cpu = &hyp->ctx[vcpuid].vtimer_cpu;
+
+ cntpct_el0 = READ_SPECIALREG(cntpct_el0);
+ if (vtimer_cpu->cntp_cval_el0 < cntpct_el0)
+ /* Timer condition met */
+ *rval = vtimer_cpu->cntp_ctl_el0 | CNTP_CTL_ISTATUS;
+ else
+ *rval = vtimer_cpu->cntp_ctl_el0 & ~CNTP_CTL_ISTATUS;
+
+ *retu = false;
+ return (0);
+}
+
+int
+vtimer_phys_ctl_write(void *vm, int vcpuid, uint64_t wval, void *arg)
+{
+ struct hyp *hyp;
+ struct hypctx *hypctx;
+ struct vtimer_cpu *vtimer_cpu;
+ uint64_t ctl_el0;
+ bool timer_toggled_on;
+ bool *retu = arg;
+
+ hyp = vm_get_cookie(vm);
+ hypctx = &hyp->ctx[vcpuid];
+ vtimer_cpu = &hypctx->vtimer_cpu;
+
+ timer_toggled_on = false;
+ ctl_el0 = vtimer_cpu->cntp_ctl_el0;
+
+ if (!timer_enabled(ctl_el0) && timer_enabled(wval))
+ timer_toggled_on = true;
+
+ vtimer_cpu->cntp_ctl_el0 = wval;
+
+ if (timer_toggled_on)
+ vtimer_schedule_irq(vtimer_cpu, hypctx);
+
+ *retu = false;
+ return (0);
+}
+
+int
+vtimer_phys_cval_read(void *vm, int vcpuid, uint64_t *rval, void *arg)
+{
+ struct hyp *hyp;
+ struct vtimer_cpu *vtimer_cpu;
+ bool *retu = arg;
+
+ hyp = vm_get_cookie(vm);
+ vtimer_cpu = &hyp->ctx[vcpuid].vtimer_cpu;
+
+ *rval = vtimer_cpu->cntp_cval_el0;
+
+ *retu = false;
+ return (0);
+}
+
+int
+vtimer_phys_cval_write(void *vm, int vcpuid, uint64_t wval, void *arg)
+{
+ struct hyp *hyp;
+ struct hypctx *hypctx;
+ struct vtimer_cpu *vtimer_cpu;
+ bool *retu = arg;
+
+ hyp = vm_get_cookie(vm);
+ hypctx = &hyp->ctx[vcpuid];
+ vtimer_cpu = &hypctx->vtimer_cpu;
+
+ vtimer_cpu->cntp_cval_el0 = wval;
+
+ if (timer_enabled(vtimer_cpu->cntp_ctl_el0)) {
+ vtimer_remove_irq(hypctx);
+ vtimer_schedule_irq(vtimer_cpu, hypctx);
+ }
+
+ *retu = false;
+ return (0);
+}
+
+int
+vtimer_phys_tval_read(void *vm, int vcpuid, uint64_t *rval, void *arg)
+{
+ struct hyp *hyp;
+ struct vtimer_cpu *vtimer_cpu;
+ uint32_t cntpct_el0;
+ bool *retu = arg;
+
+ hyp = vm_get_cookie(vm);
+ vtimer_cpu = &hyp->ctx[vcpuid].vtimer_cpu;
+
+ if (!(vtimer_cpu->cntp_ctl_el0 & CNTP_CTL_ENABLE)) {
+ /*
+ * ARMv8 Architecture Manual, p. D7-2702: the result of reading
+ * TVAL when the timer is disabled is UNKNOWN. I have chosen to
+ * return the maximum value possible on 32 bits which means the
+ * timer will fire very far into the future.
+ */
+ *rval = (uint32_t)RES1;
+ } else {
+ cntpct_el0 = READ_SPECIALREG(cntpct_el0);
+ *rval = vtimer_cpu->cntp_cval_el0 - cntpct_el0;
+ }
+
+ *retu = false;
+ return (0);
+}
+
+int
+vtimer_phys_tval_write(void *vm, int vcpuid, uint64_t wval, void *arg)
+{
+ struct hyp *hyp;
+ struct hypctx *hypctx;
+ struct vtimer_cpu *vtimer_cpu;
+ uint64_t cntpct_el0;
+ bool *retu = arg;
+
+ hyp = vm_get_cookie(vm);
+ hypctx = &hyp->ctx[vcpuid];
+ vtimer_cpu = &hypctx->vtimer_cpu;
+
+ cntpct_el0 = READ_SPECIALREG(cntpct_el0);
+ vtimer_cpu->cntp_cval_el0 = (int32_t)wval + cntpct_el0;
+
+ if (timer_enabled(vtimer_cpu->cntp_ctl_el0)) {
+ vtimer_remove_irq(hypctx);
+ vtimer_schedule_irq(vtimer_cpu, hypctx);
+ }
+
+ *retu = false;
+ return (0);
+}
Index: sys/arm64/vmm/mmu.h
===================================================================
--- /dev/null
+++ sys/arm64/vmm/mmu.h
@@ -0,0 +1,55 @@
+/*
+ * Copyright (C) 2017 Alexandru Elisei <alexandru.elisei@gmail.com>
+ * All rights reserved.
+ *
+ * This software was developed by Alexandru Elisei under sponsorship
+ * from the FreeBSD Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef _VMM_MMU_H_
+#define _VMM_MMU_H_
+
+#include <machine/vmparam.h>
+#include <machine/vmm.h>
+
+#include "hyp.h"
+
+#define ktohyp(kva) (((vm_offset_t)(kva) & HYP_KVA_MASK) | \
+ HYP_KVA_OFFSET)
+#define ipatok(ipa, hypmap) (PHYS_TO_DMAP(pmap_extract(hypmap, (ipa))))
+#define gtoipa(gva) ((gva) - KERNBASE + VM_GUEST_BASE_IPA)
+
+#define page_aligned(x) (((vm_offset_t)(x) & PAGE_MASK) == 0)
+
+void hypmap_init(pmap_t map, enum pmap_stage pm_stage);
+void hypmap_map(pmap_t map, vm_offset_t va, size_t len,
+ vm_prot_t prot);
+void hypmap_map_identity(pmap_t map, vm_offset_t va, size_t len,
+ vm_prot_t prot);
+void hypmap_set(void *arg, vm_offset_t va, vm_offset_t pa,
+ size_t len, vm_prot_t prot);
+vm_paddr_t hypmap_get(void *arg, vm_offset_t va);
+void hypmap_cleanup(pmap_t map);
+
+#endif
Index: sys/arm64/vmm/mmu.c
===================================================================
--- /dev/null
+++ sys/arm64/vmm/mmu.c
@@ -0,0 +1,166 @@
+/*
+ * Copyright (C) 2017 Alexandru Elisei <alexandru.elisei@gmail.com>
+ * All rights reserved.
+ *
+ * This software was developed by Alexandru Elisei under sponsorship
+ * from the FreeBSD Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+#include <sys/types.h>
+#include <sys/malloc.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
+#include <vm/vm.h>
+#include <vm/pmap.h>
+#include <vm/vm_page.h>
+#include <vm/vm_param.h>
+#include <machine/vm.h>
+#include <machine/vmm.h>
+#include <machine/vmparam.h>
+#include <machine/pmap.h>
+
+#include "mmu.h"
+#include "arm64.h"
+
+MALLOC_DECLARE(M_HYP);
+
+void
+hypmap_init(pmap_t map, enum pmap_stage pm_stage)
+{
+ mtx_init(&map->pm_mtx, "hypmap_pm_mtx", NULL, MTX_DEF);
+ pmap_pinit_stage(map, pm_stage, 4);
+}
+
+void
+hypmap_map(pmap_t map, vm_offset_t va, size_t len, vm_prot_t prot)
+{
+ vm_offset_t va_end, hypva;
+ vm_page_t dummy_page;
+
+ dummy_page = malloc(sizeof(*dummy_page), M_HYP, M_WAITOK | M_ZERO);
+ dummy_page->oflags = VPO_UNMANAGED;
+ dummy_page->md.pv_memattr = VM_MEMATTR_DEFAULT;
+
+ /*
+ * Add the physical pages which correspond to the specified virtual
+ * addresses.The virtual addresses span contiguous virtual pages, but
+ * they might not reside in contiguous physical pages.
+ */
+ va_end = va + len - 1;
+ va = trunc_page(va);
+ while (va < va_end) {
+ dummy_page->phys_addr = vtophys(va);
+ hypva = (va >= VM_MIN_KERNEL_ADDRESS) ? ktohyp(va) : va;
+ pmap_enter(map, hypva, dummy_page, prot, PMAP_ENTER_WIRED, 0);
+ va += PAGE_SIZE;
+ }
+
+ free(dummy_page, M_HYP);
+}
+
+void
+hypmap_map_identity(pmap_t map, vm_offset_t va, size_t len,
+ vm_prot_t prot)
+{
+ vm_offset_t va_end;
+ vm_page_t dummy_page;
+
+ dummy_page = malloc(sizeof(*dummy_page), M_HYP, M_WAITOK | M_ZERO);
+ dummy_page->oflags = VPO_UNMANAGED;
+ dummy_page->md.pv_memattr = VM_MEMATTR_DEFAULT;
+
+ /*
+ * The virtual addresses span contiguous virtual pages, but they might
+ * not reside in contiguous physical pages. For each virtual page we
+ * get the physical page address and use that for the mapping.
+ */
+ va_end = va + len - 1;
+ va = trunc_page(va);
+ while (va < va_end) {
+ dummy_page->phys_addr = vtophys(va);
+ pmap_enter(map, dummy_page->phys_addr, dummy_page,
+ prot, PMAP_ENTER_WIRED, 0);
+ va += PAGE_SIZE;
+ }
+
+ free(dummy_page, M_HYP);
+}
+
+/*
+ * Map 'len' bytes starting at virtual address 'va' to 'len' bytes
+ * starting at physical address 'pa'
+ */
+void
+hypmap_set(void *arg, vm_offset_t va, vm_offset_t pa, size_t len,
+ vm_prot_t prot)
+{
+ vm_offset_t va_end, hypva;
+ vm_page_t dummy_page;
+ struct hyp *hyp;
+ pmap_t map;
+
+ hyp = (struct hyp *)arg;
+ map = hyp->stage2_map;
+
+ dummy_page = malloc(sizeof(*dummy_page), M_HYP, M_WAITOK | M_ZERO);
+ dummy_page->oflags = VPO_UNMANAGED;
+ dummy_page->md.pv_memattr = VM_MEMATTR_DEFAULT;
+
+ va_end = va + len - 1;
+ va = trunc_page(va);
+ dummy_page->phys_addr = trunc_page(pa);
+ while (va < va_end) {
+ hypva = (va >= VM_MIN_KERNEL_ADDRESS) ? ktohyp(va) : va;
+ pmap_enter(map, hypva, dummy_page, prot, PMAP_ENTER_WIRED, 0);
+ va += PAGE_SIZE;
+ dummy_page->phys_addr += PAGE_SIZE;
+ }
+
+ free(dummy_page, M_HYP);
+}
+
+/*
+ * Return the physical address associated with virtual address 'va'
+ */
+vm_paddr_t
+hypmap_get(void *arg, vm_offset_t va)
+{
+ struct hyp *hyp;
+ pmap_t map;
+
+ hyp = (struct hyp *)arg;
+ map = hyp->stage2_map;
+
+ return pmap_extract(map, va);
+}
+
+/*
+ * Remove all the mappings from the hyp translation tables
+ */
+void
+hypmap_cleanup(pmap_t map)
+{
+ pmap_remove(map, HYP_VM_MIN_ADDRESS, HYP_VM_MAX_ADDRESS);
+ mtx_destroy(&map->pm_mtx);
+ pmap_release(map);
+}
Index: sys/arm64/vmm/psci.h
===================================================================
--- /dev/null
+++ sys/arm64/vmm/psci.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (C) 2018 Alexandru Elisei <alexandru.elisei@gmail.com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef _PSCI_H_
+#define _PSCI_H_
+
+#include "arm64.h"
+
+int psci_handle_call(struct vm *vm, int vcpuid, struct vm_exit *vme,
+ bool *retu);
+
+#endif
Index: sys/arm64/vmm/psci.c
===================================================================
--- /dev/null
+++ sys/arm64/vmm/psci.c
@@ -0,0 +1,97 @@
+/*
+ * Copyright (C) 2018 Alexandru Elisei <alexandru.elisei@gmail.com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+
+#include <dev/psci/psci.h>
+
+#include "arm64.h"
+#include "psci.h"
+
+#define PSCI_VERSION_0_2 0x2
+
+static int
+psci_version(struct hypctx *hypctx, bool *retu)
+{
+
+ hypctx->regs.x[0] = PSCI_VERSION_0_2;
+
+ *retu = false;
+ return (0);
+}
+
+static int
+psci_system_off(struct vm_exit *vme, bool *retu)
+{
+ vme->u.suspended.how = VM_SUSPEND_POWEROFF;
+ vme->exitcode = VM_EXITCODE_SUSPENDED;
+
+ *retu = true;
+ return (0);
+}
+
+int
+psci_handle_call(struct vm *vm, int vcpuid, struct vm_exit *vme, bool *retu)
+{
+ struct hyp *hyp;
+ struct hypctx *hypctx;
+ uint64_t func_id;
+ uint32_t esr_el2, esr_iss;
+ int error;
+
+ hyp = vm_get_cookie(vm);
+ hypctx = &hyp->ctx[vcpuid];
+
+ esr_el2 = hypctx->exit_info.esr_el2;
+ esr_iss = esr_el2 & ESR_ELx_ISS_MASK;
+
+ if (esr_iss != 0) {
+ eprintf("Malformed HVC instruction with immediate: 0x%x\n",
+ esr_iss);
+ error = 1;
+ goto out;
+ }
+
+ func_id = hypctx->regs.x[0];
+ switch (func_id) {
+ case PSCI_FNID_VERSION:
+ error = psci_version(hypctx, retu);
+ break;
+ case PSCI_FNID_SYSTEM_OFF:
+ error = psci_system_off(vme, retu);
+ break;
+ default:
+ eprintf("Unimplemented PSCI function: 0x%016lx\n", func_id);
+ hypctx->regs.x[0] = PSCI_RETVAL_NOT_SUPPORTED;
+ error = 1;
+ }
+
+out:
+ return (error);
+}
Index: sys/arm64/vmm/reset.h
===================================================================
--- /dev/null
+++ sys/arm64/vmm/reset.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright (C) 2018 Alexandru Elisei <alexandru.elisei@gmail.com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+#ifndef _VMM_RESET_H_
+#define _VMM_RESET_H_
+
+void reset_vm_el01_regs(void *vcpu);
+void reset_vm_el2_regs(void *vcpu);
+
+#endif
Index: sys/arm64/vmm/reset.c
===================================================================
--- /dev/null
+++ sys/arm64/vmm/reset.c
@@ -0,0 +1,139 @@
+/*
+ * Copyright (C) 2018 Alexandru Elisei <alexandru.elisei@gmail.com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/param.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+
+#include <machine/armreg.h>
+#include <machine/cpu.h>
+#include <machine/hypervisor.h>
+
+#include "arm64.h"
+#include "reset.h"
+
+/*
+ * Make the architecturally UNKNOWN value 0. As a bonus, we don't have to
+ * manually set all those RES0 fields.
+ */
+#define ARCH_UNKNOWN 0
+#define set_arch_unknown(reg) (memset(&(reg), ARCH_UNKNOWN, sizeof(reg)))
+
+void
+reset_vm_el01_regs(void *vcpu)
+{
+ struct hypctx *el2ctx;
+
+ el2ctx = vcpu;
+
+ set_arch_unknown(el2ctx->regs);
+
+ set_arch_unknown(el2ctx->actlr_el1);
+ set_arch_unknown(el2ctx->afsr0_el1);
+ set_arch_unknown(el2ctx->afsr1_el1);
+ set_arch_unknown(el2ctx->amair_el1);
+ set_arch_unknown(el2ctx->contextidr_el1);
+ set_arch_unknown(el2ctx->cpacr_el1);
+ set_arch_unknown(el2ctx->elr_el1);
+ set_arch_unknown(el2ctx->esr_el1);
+ set_arch_unknown(el2ctx->far_el1);
+ set_arch_unknown(el2ctx->mair_el1);
+ set_arch_unknown(el2ctx->par_el1);
+
+ /*
+ * Guest starts with:
+ * ~SCTLR_M: MMU off
+ * ~SCTLR_C: data cache off
+ * SCTLR_CP15BEN: memory barrier instruction enable from EL0; RAO/WI
+ * ~SCTLR_I: instruction cache off
+ */
+ el2ctx->sctlr_el1 = SCTLR_RES1;
+ el2ctx->sctlr_el1 &= ~SCTLR_M & ~SCTLR_C & ~SCTLR_I;
+ el2ctx->sctlr_el1 |= SCTLR_CP15BEN;
+
+ set_arch_unknown(el2ctx->sp_el0);
+ set_arch_unknown(el2ctx->tcr_el1);
+ set_arch_unknown(el2ctx->tpidr_el0);
+ set_arch_unknown(el2ctx->tpidr_el1);
+ set_arch_unknown(el2ctx->tpidrro_el0);
+ set_arch_unknown(el2ctx->ttbr0_el1);
+ set_arch_unknown(el2ctx->ttbr1_el1);
+ set_arch_unknown(el2ctx->vbar_el1);
+ set_arch_unknown(el2ctx->spsr_el1);
+}
+
+void
+reset_vm_el2_regs(void *vcpu)
+{
+ struct hypctx *el2ctx;
+ uint64_t cpu_aff;
+
+ el2ctx = vcpu;
+
+ /*
+ * Set the Hypervisor Configuration Register:
+ *
+ * HCR_RW: use AArch64 for EL1
+ * HCR_BSU_IS: barrier instructions apply to the inner shareable
+ * domain
+ * HCR_SWIO: turn set/way invalidate into set/way clean and
+ * invalidate
+ * HCR_FB: broadcast maintenance operations
+ * HCR_AMO: route physical SError interrupts to EL2
+ * HCR_IMO: route physical IRQ interrupts to EL2
+ * HCR_FMO: route physical FIQ interrupts to EL2
+ * HCR_VM: use stage 2 translation
+ */
+ el2ctx->hcr_el2 = HCR_RW | HCR_BSU_IS | HCR_SWIO | HCR_FB | \
+ HCR_VM | HCR_AMO | HCR_IMO | HCR_FMO;
+
+ el2ctx->vmpidr_el2 = VMPIDR_EL2_RES1;
+ /* The guest will detect a multi-core, single-threaded CPU */
+ el2ctx->vmpidr_el2 &= ~VMPIDR_EL2_U & ~VMPIDR_EL2_MT;
+ /* Only 24 bits of affinity, for a grand total of 16,777,216 cores. */
+ cpu_aff = el2ctx->vcpu & (CPU_AFF0_MASK | CPU_AFF1_MASK | CPU_AFF2_MASK);
+ el2ctx->vmpidr_el2 |= cpu_aff;
+
+ /* Use the same CPU identification information as the host */
+ el2ctx->vpidr_el2 = CPU_IMPL_TO_MIDR(CPU_IMPL_ARM);
+ el2ctx->vpidr_el2 |= CPU_VAR_TO_MIDR(0);
+ el2ctx->vpidr_el2 |= CPU_ARCH_TO_MIDR(0xf);
+ el2ctx->vpidr_el2 |= CPU_PART_TO_MIDR(CPU_PART_FOUNDATION);
+ el2ctx->vpidr_el2 |= CPU_REV_TO_MIDR(0);
+
+ /*
+ * Don't trap accesses to CPACR_EL1, trace, SVE, Advanced SIMD
+ * and floating point functionality to EL2.
+ */
+ el2ctx->cptr_el2 = CPTR_RES1;
+ /*
+ * Disable interrupts in the guest. The guest OS will re-enable
+ * them.
+ */
+ el2ctx->spsr_el2 = PSR_D | PSR_A | PSR_I | PSR_F;
+ /* Use the EL1 stack when taking exceptions to EL1 */
+ el2ctx->spsr_el2 |= PSR_M_EL1h;
+}
Index: sys/arm64/vmm/vmm.c
===================================================================
--- /dev/null
+++ sys/arm64/vmm/vmm.c
@@ -0,0 +1,910 @@
+/*
+ * Copyright (C) 2015 Mihai Carabas <mihai.carabas@gmail.com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/module.h>
+#include <sys/sysctl.h>
+#include <sys/malloc.h>
+#include <sys/pcpu.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
+#include <sys/proc.h>
+#include <sys/rwlock.h>
+#include <sys/sched.h>
+#include <sys/smp.h>
+#include <sys/cpuset.h>
+
+#include <vm/vm.h>
+#include <vm/vm_object.h>
+#include <vm/vm_page.h>
+#include <vm/pmap.h>
+#include <vm/vm_map.h>
+#include <vm/vm_extern.h>
+#include <vm/vm_param.h>
+
+#include <machine/cpu.h>
+#include <machine/vm.h>
+#include <machine/pcb.h>
+#include <machine/param.h>
+#include <machine/smp.h>
+#include <machine/vmparam.h>
+#include <machine/vmm.h>
+#include <machine/vmm_dev.h>
+#include <machine/armreg.h>
+
+#include "vmm_stat.h"
+#include "vmm_mem.h"
+#include "arm64.h"
+#include "mmu.h"
+#include "psci.h"
+
+#include "io/vgic_v3.h"
+#include "io/vtimer.h"
+
+#define BSP 0 /* the boostrap processor */
+
+struct vcpu {
+ int flags;
+ enum vcpu_state state;
+ struct mtx mtx;
+ int hostcpu; /* host cpuid this vcpu last ran on */
+ int vcpuid;
+ void *stats;
+ struct vm_exit exitinfo;
+ uint64_t nextpc; /* (x) next instruction to execute */
+};
+
+#define vcpu_lock_initialized(v) mtx_initialized(&((v)->mtx))
+#define vcpu_lock_init(v) mtx_init(&((v)->mtx), "vcpu lock", 0, MTX_SPIN)
+#define vcpu_lock(v) mtx_lock_spin(&((v)->mtx))
+#define vcpu_unlock(v) mtx_unlock_spin(&((v)->mtx))
+#define vcpu_assert_locked(v) mtx_assert(&((v)->mtx), MA_OWNED)
+
+struct mem_seg {
+ uint64_t gpa;
+ size_t len;
+ bool wired;
+ vm_object_t object;
+};
+#define VM_MAX_MEMORY_SEGMENTS 2
+
+struct vm {
+ void *cookie;
+ struct vcpu vcpu[VM_MAXCPU];
+ int num_mem_segs;
+ struct vm_memory_segment mem_segs[VM_MAX_MEMORY_SEGMENTS];
+ char name[VM_MAX_NAMELEN];
+ /*
+ * Set of active vcpus.
+ * An active vcpu is one that has been started implicitly (BSP) or
+ * explicitly (AP) by sending it a startup ipi.
+ */
+ cpuset_t active_cpus;
+ uint16_t maxcpus;
+};
+
+static bool vmm_initialized = false;
+
+static struct vmm_ops *ops = NULL;
+
+#define VMM_INIT(num) (ops != NULL ? (*ops->init)(num) : 0)
+#define VMM_CLEANUP() (ops != NULL ? (*ops->cleanup)() : 0)
+
+#define VMINIT(vm) (ops != NULL ? (*ops->vminit)(vm): NULL)
+#define VMRUN(vmi, vcpu, pc, pmap, rvc, sc) \
+ (ops != NULL ? (*ops->vmrun)(vmi, vcpu, pc, pmap, rvc, sc) : ENXIO)
+#define VMCLEANUP(vmi) (ops != NULL ? (*ops->vmcleanup)(vmi) : NULL)
+#define VMMMAP_SET(vmi, ipa, pa, len, prot) \
+ (ops != NULL ? \
+ (*ops->vmmapset)(vmi, ipa, pa, len, prot) : ENXIO)
+#define VMMMAP_GET(vmi, gpa) \
+ (ops != NULL ? (*ops->vmmapget)(vmi, gpa) : ENXIO)
+#define VMGETREG(vmi, vcpu, num, retval) \
+ (ops != NULL ? (*ops->vmgetreg)(vmi, vcpu, num, retval) : ENXIO)
+#define VMSETREG(vmi, vcpu, num, val) \
+ (ops != NULL ? (*ops->vmsetreg)(vmi, vcpu, num, val) : ENXIO)
+#define VMGETCAP(vmi, vcpu, num, retval) \
+ (ops != NULL ? (*ops->vmgetcap)(vmi, vcpu, num, retval) : ENXIO)
+#define VMSETCAP(vmi, vcpu, num, val) \
+ (ops != NULL ? (*ops->vmsetcap)(vmi, vcpu, num, val) : ENXIO)
+
+#define fpu_start_emulating() load_cr0(rcr0() | CR0_TS)
+#define fpu_stop_emulating() clts()
+
+static int vm_handle_wfi(struct vm *vm, int vcpuid,
+ struct vm_exit *vme, bool *retu);
+
+static MALLOC_DEFINE(M_VMM, "vmm", "vmm");
+
+/* statistics */
+static VMM_STAT(VCPU_TOTAL_RUNTIME, "vcpu total runtime");
+
+SYSCTL_NODE(_hw, OID_AUTO, vmm, CTLFLAG_RW, NULL, NULL);
+
+/*
+ * Halt the guest if all vcpus are executing a HLT instruction with
+ * interrupts disabled.
+ */
+static int halt_detection_enabled = 1;
+SYSCTL_INT(_hw_vmm, OID_AUTO, halt_detection, CTLFLAG_RDTUN,
+ &halt_detection_enabled, 0,
+ "Halt VM if all vcpus execute HLT with interrupts disabled");
+
+static int vmm_ipinum;
+SYSCTL_INT(_hw_vmm, OID_AUTO, ipinum, CTLFLAG_RD, &vmm_ipinum, 0,
+ "IPI vector used for vcpu notifications");
+
+static int trace_guest_exceptions;
+SYSCTL_INT(_hw_vmm, OID_AUTO, trace_guest_exceptions, CTLFLAG_RDTUN,
+ &trace_guest_exceptions, 0,
+ "Trap into hypervisor on all guest exceptions and reflect them back");
+
+static void
+vcpu_cleanup(struct vm *vm, int i, bool destroy)
+{
+// struct vcpu *vcpu = &vm->vcpu[i];
+}
+
+static void
+vcpu_init(struct vm *vm, uint32_t vcpu_id, bool create)
+{
+ struct vcpu *vcpu;
+
+ KASSERT(vcpu_id >= 0 && vcpu_id < vm->maxcpus,
+ ("cpus_init: invalid vcpu %d", vcpu_id));
+
+ vcpu = &vm->vcpu[vcpu_id];
+
+ if (create) {
+ KASSERT(!vcpu_lock_initialized(vcpu), ("vcou %d already "
+ "initialized", vcpu_id));
+ vcpu_lock_init(vcpu);
+ vcpu->hostcpu = NOCPU;
+ vcpu->vcpuid = vcpu_id;
+ }
+}
+
+struct vm_exit *
+vm_exitinfo(struct vm *vm, int cpuid)
+{
+ struct vcpu *vcpu;
+
+ if (cpuid < 0 || cpuid >= VM_MAXCPU)
+ panic("vm_exitinfo: invalid cpuid %d", cpuid);
+
+ vcpu = &vm->vcpu[cpuid];
+
+ return (&vcpu->exitinfo);
+}
+
+static int
+vmm_init(void)
+{
+ ops = &vmm_ops_arm;
+
+ return (VMM_INIT(0));
+}
+
+static int
+vmm_handler(module_t mod, int what, void *arg)
+{
+ int error;
+
+ switch (what) {
+ case MOD_LOAD:
+ vmmdev_init();
+ error = vmm_init();
+ if (error == 0)
+ vmm_initialized = true;
+ break;
+ case MOD_UNLOAD:
+ error = vmmdev_cleanup();
+ if (error == 0 && vmm_initialized) {
+ error = VMM_CLEANUP();
+ if (error)
+ vmm_initialized = false;
+ }
+ break;
+ default:
+ error = 0;
+ break;
+ }
+ return (error);
+}
+
+static moduledata_t vmm_kmod = {
+ "vmm",
+ vmm_handler,
+ NULL
+};
+
+/*
+ * vmm initialization has the following dependencies:
+ *
+ * - HYP initialization requires smp_rendezvous() and therefore must happen
+ * after SMP is fully functional (after SI_SUB_SMP).
+ */
+DECLARE_MODULE(vmm, vmm_kmod, SI_SUB_SMP + 1, SI_ORDER_ANY);
+MODULE_VERSION(vmm, 1);
+
+int
+vm_create(const char *name, struct vm **retvm)
+{
+ struct vm *vm;
+ int i;
+
+ /*
+ * If vmm.ko could not be successfully initialized then don't attempt
+ * to create the virtual machine.
+ */
+ if (!vmm_initialized)
+ return (ENXIO);
+
+ if (name == NULL || strlen(name) >= VM_MAX_NAMELEN)
+ return (EINVAL);
+
+ vm = malloc(sizeof(struct vm), M_VMM, M_WAITOK | M_ZERO);
+ strcpy(vm->name, name);
+ vm->maxcpus = VM_MAXCPU;
+ vm->cookie = VMINIT(vm);
+
+ for (i = 0; i < vm->maxcpus; i++)
+ vcpu_init(vm, i, true);
+
+ vm_activate_cpu(vm, BSP);
+
+ *retvm = vm;
+ return (0);
+}
+
+static void
+vm_cleanup(struct vm *vm, bool destroy)
+{
+ VMCLEANUP(vm->cookie);
+}
+
+void
+vm_destroy(struct vm *vm)
+{
+ vm_cleanup(vm, true);
+ free(vm, M_VMM);
+}
+
+const char *
+vm_name(struct vm *vm)
+{
+ return (vm->name);
+}
+
+#include <sys/queue.h>
+#include <sys/linker.h>
+
+static caddr_t
+search_by_type(const char *type, caddr_t preload_metadata)
+{
+ caddr_t curp, lname;
+ uint32_t *hdr;
+ int next;
+
+ if (preload_metadata != NULL) {
+
+ curp = preload_metadata;
+ lname = NULL;
+ for (;;) {
+ hdr = (uint32_t *)curp;
+ if (hdr[0] == 0 && hdr[1] == 0)
+ break;
+
+ /* remember the start of each record */
+ if (hdr[0] == MODINFO_NAME)
+ lname = curp;
+
+ /* Search for a MODINFO_TYPE field */
+ if ((hdr[0] == MODINFO_TYPE) &&
+ !strcmp(type, curp + sizeof(uint32_t) * 2))
+ return(lname);
+
+ /* skip to next field */
+ next = sizeof(uint32_t) * 2 + hdr[1];
+ next = roundup(next, sizeof(u_long));
+ curp += next;
+ }
+ }
+ return(NULL);
+}
+
+static int
+vm_handle_reg_emul(struct vm *vm, int vcpuid, bool *retu)
+{
+ struct hyp *hyp;
+ struct vm_exit *vme;
+ struct vre *vre;
+ reg_read_t rread;
+ reg_write_t rwrite;
+ uint32_t iss_reg;
+ int error;
+
+ hyp = (struct hyp *)vm->cookie;
+ vme = vm_exitinfo(vm, vcpuid);
+ vre = &vme->u.reg_emul.vre;
+
+ iss_reg = vre->inst_syndrome & ISS_MSR_REG_MASK;
+ switch (iss_reg) {
+ case ISS_CNTP_CTL_EL0:
+ rread = vtimer_phys_ctl_read;
+ rwrite = vtimer_phys_ctl_write;
+ break;
+ case ISS_CNTP_CVAL_EL0:
+ rread = vtimer_phys_cval_read;
+ rwrite = vtimer_phys_cval_write;
+ break;
+ case ISS_CNTP_TVAL_EL0:
+ rread = vtimer_phys_tval_read;
+ rwrite = vtimer_phys_tval_write;
+ break;
+ default:
+ goto out_user;
+ }
+
+ error = vmm_emulate_register(vm, vcpuid, vre, rread, rwrite, retu);
+
+ return (error);
+
+out_user:
+ *retu = true;
+ return (0);
+}
+
+static int
+vm_mmio_region_match(const void *key, const void *memb)
+{
+ const uint64_t *addr = key;
+ const struct vgic_mmio_region *vmr = memb;
+
+ if (*addr < vmr->start)
+ return (-1);
+ else if (*addr >= vmr->start && *addr < vmr->end)
+ return (0);
+ else
+ return (1);
+}
+
+static int
+vm_handle_inst_emul(struct vm *vm, int vcpuid, bool *retu)
+{
+ struct vm_exit *vme;
+ struct vie *vie;
+ struct hyp *hyp = vm->cookie;
+ uint64_t fault_ipa;
+ struct vgic_mmio_region *vmr;
+ int error;
+
+ if (!hyp->vgic_attached)
+ goto out_user;
+
+ vme = vm_exitinfo(vm, vcpuid);
+ vie = &vme->u.inst_emul.vie;
+
+ fault_ipa = vme->u.inst_emul.gpa;
+
+ vmr = bsearch(&fault_ipa, hyp->vgic_mmio_regions,
+ hyp->vgic_mmio_regions_num, sizeof(struct vgic_mmio_region),
+ vm_mmio_region_match);
+ if (!vmr)
+ goto out_user;
+
+ error = vmm_emulate_instruction(vm, vcpuid, fault_ipa, vie,
+ vmr->read, vmr->write, retu);
+
+ return (error);
+
+out_user:
+ *retu = true;
+ return (0);
+}
+
+static int
+vm_handle_poweroff(struct vm *vm, int vcpuid)
+{
+ return (0);
+}
+
+static int
+vm_handle_psci_call(struct vm *vm, int vcpuid, bool *retu)
+{
+ struct vm_exit *vme;
+ enum vm_suspend_how how;
+ int error;
+
+ vme = vm_exitinfo(vm, vcpuid);
+
+ error = psci_handle_call(vm, vcpuid, vme, retu);
+ if (error)
+ goto out;
+
+ if (vme->exitcode == VM_EXITCODE_SUSPENDED) {
+ how = vme->u.suspended.how;
+ switch (how) {
+ case VM_SUSPEND_POWEROFF:
+ vm_handle_poweroff(vm, vcpuid);
+ break;
+ default:
+ /* Nothing to do */
+ ;
+ }
+ }
+
+out:
+ return (error);
+}
+
+int
+vm_run(struct vm *vm, struct vm_run *vmrun)
+{
+ int error, vcpuid;
+ register_t pc;
+ struct vm_exit *vme;
+ bool retu;
+ void *rvc, *sc;
+
+ vcpuid = vmrun->cpuid;
+ pc = vmrun->pc;
+
+ if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
+ return (EINVAL);
+
+ if (!CPU_ISSET(vcpuid, &vm->active_cpus))
+ return (EINVAL);
+
+ rvc = sc = NULL;
+restart:
+ critical_enter();
+ error = VMRUN(vm->cookie, vcpuid, pc, NULL, rvc, sc);
+ critical_exit();
+
+ vme = vm_exitinfo(vm, vcpuid);
+ if (error == 0) {
+ retu = false;
+ switch (vme->exitcode) {
+ case VM_EXITCODE_INST_EMUL:
+ pc = vme->pc + vme->inst_length;
+ error = vm_handle_inst_emul(vm, vcpuid, &retu);
+ break;
+
+ case VM_EXITCODE_REG_EMUL:
+ pc = vme->pc + vme->inst_length;
+ error = vm_handle_reg_emul(vm, vcpuid, &retu);
+ break;
+
+ case VM_EXITCODE_HVC:
+ /*
+ * The HVC instruction saves the address for the
+ * next instruction as the return address.
+ */
+ pc = vme->pc;
+ /*
+ * The PSCI call can change the exit information in the
+ * case of suspend/reset/poweroff/cpu off/cpu on.
+ */
+ error = psci_handle_call(vm, vcpuid, vme, &retu);
+ break;
+
+ case VM_EXITCODE_WFI:
+ pc = vme->pc + vme->inst_length;
+ error = vm_handle_wfi(vm, vcpuid, vme, &retu);
+ break;
+
+ default:
+ /* Handle in userland */
+ retu = true;
+ break;
+ }
+ }
+
+ if (error == 0 && retu == false)
+ goto restart;
+
+ /* Copy the exit information */
+ bcopy(vme, &vmrun->vm_exit, sizeof(struct vm_exit));
+
+ return (error);
+}
+
+int
+vm_activate_cpu(struct vm *vm, int vcpuid)
+{
+
+ if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
+ return (EINVAL);
+
+ if (CPU_ISSET(vcpuid, &vm->active_cpus))
+ return (EBUSY);
+
+ CPU_SET_ATOMIC(vcpuid, &vm->active_cpus);
+ return (0);
+
+}
+
+cpuset_t
+vm_active_cpus(struct vm *vm)
+{
+
+ return (vm->active_cpus);
+}
+
+void *
+vcpu_stats(struct vm *vm, int vcpuid)
+{
+
+ return (vm->vcpu[vcpuid].stats);
+}
+
+static int
+vcpu_set_state_locked(struct vcpu *vcpu, enum vcpu_state newstate,
+ bool from_idle)
+{
+ int error;
+
+ vcpu_assert_locked(vcpu);
+
+ /*
+ * State transitions from the vmmdev_ioctl() must always begin from
+ * the VCPU_IDLE state. This guarantees that there is only a single
+ * ioctl() operating on a vcpu at any point.
+ */
+ if (from_idle) {
+ while (vcpu->state != VCPU_IDLE)
+ msleep_spin(&vcpu->state, &vcpu->mtx, "vmstat", hz);
+ } else {
+ KASSERT(vcpu->state != VCPU_IDLE, ("invalid transition from "
+ "vcpu idle state"));
+ }
+
+ if (vcpu->state == VCPU_RUNNING) {
+ KASSERT(vcpu->hostcpu == curcpu, ("curcpu %d and hostcpu %d "
+ "mismatch for running vcpu", curcpu, vcpu->hostcpu));
+ } else {
+ KASSERT(vcpu->hostcpu == NOCPU, ("Invalid hostcpu %d for a "
+ "vcpu that is not running", vcpu->hostcpu));
+ }
+
+ /*
+ * The following state transitions are allowed:
+ * IDLE -> FROZEN -> IDLE
+ * FROZEN -> RUNNING -> FROZEN
+ * FROZEN -> SLEEPING -> FROZEN
+ */
+ switch (vcpu->state) {
+ case VCPU_IDLE:
+ case VCPU_RUNNING:
+ case VCPU_SLEEPING:
+ error = (newstate != VCPU_FROZEN);
+ break;
+ case VCPU_FROZEN:
+ error = (newstate == VCPU_FROZEN);
+ break;
+ default:
+ error = 1;
+ break;
+ }
+
+ if (error)
+ return (EBUSY);
+
+ vcpu->state = newstate;
+ if (newstate == VCPU_RUNNING)
+ vcpu->hostcpu = curcpu;
+ else
+ vcpu->hostcpu = NOCPU;
+
+ if (newstate == VCPU_IDLE)
+ wakeup(&vcpu->state);
+
+ return (0);
+}
+
+int
+vcpu_set_state(struct vm *vm, int vcpuid, enum vcpu_state newstate,
+ bool from_idle)
+{
+ int error;
+ struct vcpu *vcpu;
+
+ if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
+ panic("vm_set_run_state: invalid vcpuid %d", vcpuid);
+
+ vcpu = &vm->vcpu[vcpuid];
+
+ vcpu_lock(vcpu);
+ error = vcpu_set_state_locked(vcpu, newstate, from_idle);
+ vcpu_unlock(vcpu);
+
+ return (error);
+}
+
+enum vcpu_state
+vcpu_get_state(struct vm *vm, int vcpuid, int *hostcpu)
+{
+ struct vcpu *vcpu;
+ enum vcpu_state state;
+
+ if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
+ panic("vm_get_run_state: invalid vcpuid %d", vcpuid);
+
+ vcpu = &vm->vcpu[vcpuid];
+
+ vcpu_lock(vcpu);
+ state = vcpu->state;
+ if (hostcpu != NULL)
+ *hostcpu = vcpu->hostcpu;
+ vcpu_unlock(vcpu);
+
+ return (state);
+}
+
+uint64_t
+vm_gpa2hpa(struct vm *vm, uint64_t gpa, size_t len)
+{
+ uint64_t nextpage;
+
+ nextpage = trunc_page(gpa + PAGE_SIZE);
+ if (len > nextpage - gpa)
+ panic("vm_gpa2hpa: invalid gpa/len: 0x%016lx/%zu", gpa, len);
+
+ return (VMMMAP_GET(vm->cookie, gpa));
+}
+
+int
+vm_gpabase2memseg(struct vm *vm, uint64_t gpabase,
+ struct vm_memory_segment *seg)
+{
+ int i;
+
+ for (i = 0; i < vm->num_mem_segs; i++) {
+ if (gpabase == vm->mem_segs[i].gpa) {
+ *seg = vm->mem_segs[i];
+ return (0);
+ }
+ }
+ return (-1);
+}
+
+int
+vm_get_register(struct vm *vm, int vcpu, int reg, uint64_t *retval)
+{
+
+ if (vcpu < 0 || vcpu >= VM_MAXCPU)
+ return (EINVAL);
+
+ if (reg >= VM_REG_LAST)
+ return (EINVAL);
+
+ return (VMGETREG(vm->cookie, vcpu, reg, retval));
+}
+
+int
+vm_set_register(struct vm *vm, int vcpuid, int reg, uint64_t val)
+{
+ struct vcpu *vcpu;
+ int error;
+
+ if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
+ return (EINVAL);
+
+ if (reg >= VM_REG_LAST)
+ return (EINVAL);
+ error = VMSETREG(vm->cookie, vcpuid, reg, val);
+ if (error)
+ return (error);
+
+ vcpu = &vm->vcpu[vcpuid];
+ vcpu->nextpc = val;
+
+ return(0);
+}
+
+void *
+vm_get_cookie(struct vm *vm)
+{
+ return vm->cookie;
+}
+
+uint16_t
+vm_get_maxcpus(struct vm *vm)
+{
+ return (vm->maxcpus);
+}
+
+static void
+vm_free_mem_seg(struct vm *vm, struct vm_memory_segment *seg)
+{
+ size_t len;
+ uint64_t hpa;
+
+ len = 0;
+ while (len < seg->len) {
+ hpa = vm_gpa2hpa(vm, seg->gpa + len, PAGE_SIZE);
+ if (hpa == (uint64_t)-1) {
+ panic("vm_free_mem_segs: cannot free hpa "
+ "associated with gpa 0x%016lx", seg->gpa + len);
+ }
+
+ vmm_mem_free(hpa, PAGE_SIZE);
+
+ len += PAGE_SIZE;
+ }
+
+ bzero(seg, sizeof(struct vm_memory_segment));
+}
+
+/*
+ * Return true if 'gpa' is available for allocation, false otherwise
+ */
+static bool
+vm_ipa_available(struct vm *vm, uint64_t ipa)
+{
+ uint64_t ipabase, ipalimit;
+ int i;
+
+ if (!page_aligned(ipa))
+ panic("vm_ipa_available: ipa (0x%016lx) not page aligned", ipa);
+
+ for (i = 0; i < vm->num_mem_segs; i++) {
+ ipabase = vm->mem_segs[i].gpa;
+ ipalimit = ipabase + vm->mem_segs[i].len;
+ if (ipa >= ipabase && ipa < ipalimit)
+ return (false);
+ }
+
+ return (true);
+}
+
+/*
+ * Allocate 'len' bytes for the virtual machine starting at address 'ipa'
+ */
+int
+vm_malloc(struct vm *vm, uint64_t ipa, size_t len)
+{
+ struct vm_memory_segment *seg;
+ int error, available, allocated;
+ uint64_t ipa2;
+ vm_paddr_t pa;
+
+ if (!page_aligned(ipa) != 0 || !page_aligned(len) || len == 0)
+ return (EINVAL);
+
+ available = allocated = 0;
+ ipa2 = ipa;
+ while (ipa2 < ipa + len) {
+ if (vm_ipa_available(vm, ipa2))
+ available++;
+ else
+ allocated++;
+ ipa2 += PAGE_SIZE;
+ }
+
+ /*
+ * If there are some allocated and some available pages in the address
+ * range then it is an error.
+ */
+ if (allocated != 0 && available != 0)
+ return (EINVAL);
+
+ /*
+ * If the entire address range being requested has already been
+ * allocated then there isn't anything more to do.
+ */
+ if (allocated != 0 && available == 0)
+ return (0);
+
+ if (vm->num_mem_segs == VM_MAX_MEMORY_SEGMENTS)
+ return (E2BIG);
+
+ seg = &vm->mem_segs[vm->num_mem_segs];
+ error = 0;
+ seg->gpa = ipa;
+ seg->len = 0;
+ while (seg->len < len) {
+ pa = vmm_mem_alloc(PAGE_SIZE);
+ if (pa == 0) {
+ error = ENOMEM;
+ break;
+ }
+ VMMMAP_SET(vm->cookie, ipa, pa, PAGE_SIZE, VM_PROT_ALL);
+
+ seg->len += PAGE_SIZE;
+ ipa += PAGE_SIZE;
+ }
+ vm->num_mem_segs++;
+
+ return (0);
+}
+
+int
+vm_attach_vgic(struct vm *vm, uint64_t dist_start, size_t dist_size,
+ uint64_t redist_start, size_t redist_size)
+{
+ int error;
+
+ error = vgic_v3_attach_to_vm(vm->cookie, dist_start, dist_size,
+ redist_start, redist_size);
+
+ return (error);
+}
+
+int
+vm_assert_irq(struct vm *vm, uint32_t irq)
+{
+ struct hyp *hyp = (struct hyp *)vm->cookie;
+ int error;
+
+ /* TODO: this is crap, send the vcpuid as an argument to vm_assert_irq */
+ error = vgic_v3_inject_irq(&hyp->ctx[0], irq, VGIC_IRQ_VIRTIO);
+
+ return (error);
+}
+
+int
+vm_deassert_irq(struct vm *vm, uint32_t irq)
+{
+ int error;
+
+ error = vgic_v3_remove_irq(vm->cookie, irq, false);
+
+ return (error);
+}
+
+static int
+vm_handle_wfi(struct vm *vm, int vcpuid, struct vm_exit *vme, bool *retu)
+{
+ struct vcpu *vcpu;
+ struct hypctx *hypctx;
+ bool intr_disabled;
+
+ vcpu = &vm->vcpu[vcpuid];
+ hypctx = vme->u.wfi.hypctx;
+ intr_disabled = !(hypctx->regs.spsr & PSR_I);
+
+ vcpu_lock(vcpu);
+ while (1) {
+ if (!intr_disabled && vgic_v3_vcpu_pending_irq(hypctx))
+ break;
+
+ if (vcpu_should_yield(vm, vcpuid))
+ break;
+
+ vcpu_set_state_locked(vcpu, VCPU_SLEEPING, false);
+ msleep_spin(vcpu, &vcpu->mtx, "vmidle", hz);
+ vcpu_set_state_locked(vcpu, VCPU_FROZEN, false);
+ }
+ vcpu_unlock(vcpu);
+
+ *retu = false;
+ return (0);
+}
Index: sys/arm64/vmm/vmm_dev.c
===================================================================
--- /dev/null
+++ sys/arm64/vmm/vmm_dev.c
@@ -0,0 +1,404 @@
+/*
+ * Copyright (C) 2015 Mihai Carabas <mihai.carabas@gmail.com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+#include <sys/param.h>
+#include <sys/kernel.h>
+#include <sys/queue.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
+#include <sys/malloc.h>
+#include <sys/conf.h>
+#include <sys/sysctl.h>
+#include <sys/libkern.h>
+#include <sys/ioccom.h>
+#include <sys/mman.h>
+#include <sys/uio.h>
+
+#include <vm/vm.h>
+#include <vm/pmap.h>
+#include <vm/vm_map.h>
+
+#include <machine/vmparam.h>
+#include <machine/vmm.h>
+#include <machine/vmm_dev.h>
+
+struct vmmdev_softc {
+ struct vm *vm; /* vm instance cookie */
+ struct cdev *cdev;
+ SLIST_ENTRY(vmmdev_softc) link;
+ int flags;
+};
+#define VSC_LINKED 0x01
+
+static SLIST_HEAD(, vmmdev_softc) head;
+
+static struct mtx vmmdev_mtx;
+
+static MALLOC_DEFINE(M_VMMDEV, "vmmdev", "vmmdev");
+
+SYSCTL_DECL(_hw_vmm);
+
+static struct vmmdev_softc *
+vmmdev_lookup(const char *name)
+{
+ struct vmmdev_softc *sc;
+
+#ifdef notyet /* XXX kernel is not compiled with invariants */
+ mtx_assert(&vmmdev_mtx, MA_OWNED);
+#endif
+
+ SLIST_FOREACH(sc, &head, link) {
+ if (strcmp(name, vm_name(sc->vm)) == 0)
+ break;
+ }
+
+ return (sc);
+}
+
+static struct vmmdev_softc *
+vmmdev_lookup2(struct cdev *cdev)
+{
+
+ return (cdev->si_drv1);
+}
+
+static int
+vmmdev_rw(struct cdev *cdev, struct uio *uio, int flags)
+{
+ int error = 0;
+
+ return (error);
+}
+
+static int
+vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag,
+ struct thread *td)
+{
+ int error, vcpu, state_changed;
+ struct vmmdev_softc *sc;
+ struct vm_run *vmrun;
+ struct vm_memory_segment *seg;
+ struct vm_register *vmreg;
+ struct vm_activate_cpu *vac;
+ struct vm_attach_vgic *vav;
+ struct vm_irq *vi;
+
+ sc = vmmdev_lookup2(cdev);
+ if (sc == NULL)
+ return (ENXIO);
+
+ error = 0;
+ vcpu = -1;
+ state_changed = 0;
+
+ /*
+ * Some VMM ioctls can operate only on vcpus that are not running.
+ */
+ switch (cmd) {
+ case VM_RUN:
+ case VM_GET_REGISTER:
+ case VM_SET_REGISTER:
+ /*
+ * XXX fragile, handle with care
+ * Assumes that the first field of the ioctl data is the vcpu.
+ */
+ vcpu = *(int *)data;
+ if (vcpu < 0 || vcpu >= VM_MAXCPU) {
+ error = EINVAL;
+ goto done;
+ }
+
+ error = vcpu_set_state(sc->vm, vcpu, VCPU_FROZEN, true);
+ if (error)
+ goto done;
+
+ state_changed = 1;
+ break;
+
+ case VM_MAP_MEMORY:
+ case VM_ATTACH_VGIC:
+ /*
+ * ioctls that operate on the entire virtual machine must
+ * prevent all vcpus from running.
+ */
+ error = 0;
+ for (vcpu = 0; vcpu < VM_MAXCPU; vcpu++) {
+ error = vcpu_set_state(sc->vm, vcpu, VCPU_FROZEN, true);
+ if (error)
+ break;
+ }
+
+ if (error) {
+ vcpu--;
+ while (vcpu >= 0) {
+ vcpu_set_state(sc->vm, vcpu, VCPU_IDLE, false);
+ vcpu--;
+ }
+ goto done;
+ }
+
+ state_changed = 2;
+ break;
+ case VM_ASSERT_IRQ:
+ vi =(struct vm_irq *)data;
+ error = vm_assert_irq(sc->vm, vi->irq);
+ break;
+ case VM_DEASSERT_IRQ:
+ vi = (struct vm_irq *)data;
+ error = vm_deassert_irq(sc->vm, vi->irq);
+ break;
+ default:
+ break;
+ }
+
+ switch(cmd) {
+ case VM_RUN:
+ vmrun = (struct vm_run *)data;
+ error = vm_run(sc->vm, vmrun);
+ break;
+ case VM_MAP_MEMORY:
+ seg = (struct vm_memory_segment *)data;
+ error = vm_malloc(sc->vm, seg->gpa, seg->len);
+ break;
+ case VM_GET_MEMORY_SEG:
+ seg = (struct vm_memory_segment *)data;
+ seg->len = 0;
+ (void)vm_gpabase2memseg(sc->vm, seg->gpa, seg);
+ error = 0;
+ break;
+ case VM_GET_REGISTER:
+ vmreg = (struct vm_register *)data;
+ error = vm_get_register(sc->vm, vmreg->cpuid, vmreg->regnum,
+ &vmreg->regval);
+ break;
+ case VM_SET_REGISTER:
+ vmreg = (struct vm_register *)data;
+ error = vm_set_register(sc->vm, vmreg->cpuid, vmreg->regnum,
+ vmreg->regval);
+ break;
+ case VM_ACTIVATE_CPU:
+ vac = (struct vm_activate_cpu *)data;
+ error = vm_activate_cpu(sc->vm, vac->vcpuid);
+ break;
+ case VM_ATTACH_VGIC:
+ vav = (struct vm_attach_vgic *)data;
+ error = vm_attach_vgic(sc->vm, vav->dist_start, vav->dist_size,
+ vav->redist_start, vav->redist_size);
+ break;
+ default:
+ error = ENOTTY;
+ break;
+ }
+
+ if (state_changed == 1) {
+ vcpu_set_state(sc->vm, vcpu, VCPU_IDLE, false);
+ } else if (state_changed == 2) {
+ for (vcpu = 0; vcpu < VM_MAXCPU; vcpu++)
+ vcpu_set_state(sc->vm, vcpu, VCPU_IDLE, false);
+ }
+
+done:
+ /* Make sure that no handler returns a bogus value like ERESTART */
+ KASSERT(error >= 0, ("vmmdev_ioctl: invalid error return %d", error));
+ return (error);
+}
+
+static int
+vmmdev_mmap(struct cdev *cdev, vm_ooffset_t offset, vm_paddr_t *paddr,
+ int nprot, vm_memattr_t *memattr)
+{
+ int error;
+ struct vmmdev_softc *sc;
+
+ error = -1;
+ mtx_lock(&vmmdev_mtx);
+
+ sc = vmmdev_lookup2(cdev);
+ if (sc != NULL && !(nprot & PROT_EXEC)) {
+ *paddr = (vm_paddr_t)vm_gpa2hpa(sc->vm, (vm_paddr_t)offset, PAGE_SIZE);
+ if (*paddr != (vm_paddr_t)-1)
+ error = 0;
+ }
+
+ mtx_unlock(&vmmdev_mtx);
+
+ return (error);
+}
+
+static void
+vmmdev_destroy(void *arg)
+{
+
+ struct vmmdev_softc *sc = arg;
+
+ if (sc->cdev != NULL)
+ destroy_dev(sc->cdev);
+
+ if (sc->vm != NULL)
+ vm_destroy(sc->vm);
+
+ if ((sc->flags & VSC_LINKED) != 0) {
+ mtx_lock(&vmmdev_mtx);
+ SLIST_REMOVE(&head, sc, vmmdev_softc, link);
+ mtx_unlock(&vmmdev_mtx);
+ }
+
+ free(sc, M_VMMDEV);
+}
+
+static int
+sysctl_vmm_destroy(SYSCTL_HANDLER_ARGS)
+{
+ int error;
+ char buf[VM_MAX_NAMELEN];
+ struct vmmdev_softc *sc;
+ struct cdev *cdev;
+
+ strlcpy(buf, "beavis", sizeof(buf));
+ error = sysctl_handle_string(oidp, buf, sizeof(buf), req);
+ if (error != 0 || req->newptr == NULL)
+ return (error);
+
+ mtx_lock(&vmmdev_mtx);
+ sc = vmmdev_lookup(buf);
+ if (sc == NULL || sc->cdev == NULL) {
+ mtx_unlock(&vmmdev_mtx);
+ return (EINVAL);
+ }
+
+ /*
+ * The 'cdev' will be destroyed asynchronously when 'si_threadcount'
+ * goes down to 0 so we should not do it again in the callback.
+ */
+ cdev = sc->cdev;
+ sc->cdev = NULL;
+ mtx_unlock(&vmmdev_mtx);
+
+ /*
+ * Schedule the 'cdev' to be destroyed:
+ *
+ * - any new operations on this 'cdev' will return an error (ENXIO).
+ *
+ * - when the 'si_threadcount' dwindles down to zero the 'cdev' will
+ * be destroyed and the callback will be invoked in a taskqueue
+ * context.
+ */
+ destroy_dev_sched_cb(cdev, vmmdev_destroy, sc);
+
+ return (0);
+}
+SYSCTL_PROC(_hw_vmm, OID_AUTO, destroy, CTLTYPE_STRING | CTLFLAG_RW,
+ NULL, 0, sysctl_vmm_destroy, "A", NULL);
+
+static struct cdevsw vmmdevsw = {
+ .d_name = "vmmdev",
+ .d_version = D_VERSION,
+ .d_ioctl = vmmdev_ioctl,
+ .d_mmap = vmmdev_mmap,
+ .d_read = vmmdev_rw,
+ .d_write = vmmdev_rw,
+};
+
+static int
+sysctl_vmm_create(SYSCTL_HANDLER_ARGS)
+{
+ int error;
+ struct vm *vm;
+ struct cdev *cdev;
+ struct vmmdev_softc *sc, *sc2;
+ char buf[VM_MAX_NAMELEN];
+
+ strlcpy(buf, "beavis", sizeof(buf));
+ error = sysctl_handle_string(oidp, buf, sizeof(buf), req);
+ if (error != 0 || req->newptr == NULL)
+ return (error);
+
+ mtx_lock(&vmmdev_mtx);
+ sc = vmmdev_lookup(buf);
+ mtx_unlock(&vmmdev_mtx);
+ if (sc != NULL)
+ return (EEXIST);
+
+ error = vm_create(buf, &vm);
+ if (error != 0)
+ return (error);
+
+ sc = malloc(sizeof(struct vmmdev_softc), M_VMMDEV, M_WAITOK | M_ZERO);
+ sc->vm = vm;
+
+ /*
+ * Lookup the name again just in case somebody sneaked in when we
+ * dropped the lock.
+ */
+ mtx_lock(&vmmdev_mtx);
+ sc2 = vmmdev_lookup(buf);
+ if (sc2 == NULL) {
+ SLIST_INSERT_HEAD(&head, sc, link);
+ sc->flags |= VSC_LINKED;
+ }
+ mtx_unlock(&vmmdev_mtx);
+
+ if (sc2 != NULL) {
+ vmmdev_destroy(sc);
+ return (EEXIST);
+ }
+
+ error = make_dev_p(MAKEDEV_CHECKNAME, &cdev, &vmmdevsw, NULL,
+ UID_ROOT, GID_WHEEL, 0600, "vmm/%s", buf);
+ if (error != 0) {
+ vmmdev_destroy(sc);
+ return (error);
+ }
+
+ mtx_lock(&vmmdev_mtx);
+ sc->cdev = cdev;
+ sc->cdev->si_drv1 = sc;
+ mtx_unlock(&vmmdev_mtx);
+
+ return (0);
+}
+SYSCTL_PROC(_hw_vmm, OID_AUTO, create, CTLTYPE_STRING | CTLFLAG_RW,
+ NULL, 0, sysctl_vmm_create, "A", NULL);
+
+void
+vmmdev_init(void)
+{
+ mtx_init(&vmmdev_mtx, "vmm device mutex", NULL, MTX_DEF);
+}
+
+int
+vmmdev_cleanup(void)
+{
+ int error;
+
+ if (SLIST_EMPTY(&head))
+ error = 0;
+ else
+ error = EBUSY;
+
+ return (error);
+}
Index: sys/arm64/vmm/vmm_instruction_emul.c
===================================================================
--- /dev/null
+++ sys/arm64/vmm/vmm_instruction_emul.c
@@ -0,0 +1,96 @@
+/*
+ * Copyright (C) 2015 Mihai Carabas <mihai.carabas@gmail.com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifdef _KERNEL
+#include <sys/param.h>
+#include <sys/pcpu.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+
+#include <vm/vm.h>
+
+#include <machine/vmm.h>
+
+#else
+#include <sys/types.h>
+#include <sys/errno.h>
+#include <sys/_iovec.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include <machine/vmm.h>
+
+#include <assert.h>
+#include <vmmapi.h>
+#endif
+
+#include <machine/vmm_instruction_emul.h>
+
+int
+vmm_emulate_instruction(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
+ mem_region_read_t memread, mem_region_write_t memwrite, void *memarg)
+{
+ uint64_t val;
+ int error;
+
+ if (vie->dir == VM_DIR_READ) {
+ error = memread(vm, vcpuid, gpa, &val, vie->access_size, memarg);
+ if (error)
+ goto out;
+ error = vm_set_register(vm, vcpuid, vie->reg, val);
+ } else {
+ error = vm_get_register(vm, vcpuid, vie->reg, &val);
+ if (error)
+ goto out;
+ error = memwrite(vm, vcpuid, gpa, val, vie->access_size, memarg);
+ }
+
+out:
+ return (error);
+}
+
+int
+vmm_emulate_register(void *vm, int vcpuid, struct vre *vre, reg_read_t regread,
+ reg_write_t regwrite, void *regarg)
+{
+ uint64_t val;
+ int error;
+
+ if (vre->dir == VM_DIR_READ) {
+ error = regread(vm, vcpuid, &val, regarg);
+ if (error)
+ goto out;
+ error = vm_set_register(vm, vcpuid, vre->reg, val);
+ } else {
+ error = vm_get_register(vm, vcpuid, vre->reg, &val);
+ if (error)
+ goto out;
+ error = regwrite(vm, vcpuid, val, regarg);
+ }
+
+out:
+ return (error);
+}
Index: sys/arm64/vmm/vmm_mem.h
===================================================================
--- /dev/null
+++ sys/arm64/vmm/vmm_mem.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (C) 2015 Mihai Carabas <mihai.carabas@gmail.com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef _VMM_MEM_H_
+#define _VMM_MEM_H_
+
+int vmm_mem_init(void);
+vm_paddr_t vmm_mem_alloc(size_t size);
+void vmm_mem_free(vm_paddr_t start, size_t size);
+vm_paddr_t vmm_mem_maxaddr(void);
+
+#endif
Index: sys/arm64/vmm/vmm_mem.c
===================================================================
--- /dev/null
+++ sys/arm64/vmm/vmm_mem.c
@@ -0,0 +1,130 @@
+/*
+ * Copyright (C) 2015 Mihai Carabas <mihai.carabas@gmail.com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+#include <sys/param.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
+#include <sys/linker.h>
+#include <sys/systm.h>
+#include <sys/malloc.h>
+#include <sys/kernel.h>
+#include <sys/sysctl.h>
+#include <sys/vmmeter.h>
+
+#include <vm/vm.h>
+#include <vm/pmap.h>
+#include <vm/vm_page.h>
+#include <vm/vm_pageout.h>
+
+#include <machine/md_var.h>
+#include <machine/metadata.h>
+#include <machine/vmparam.h>
+#include <machine/pmap.h>
+
+#include "vmm_mem.h"
+
+SYSCTL_DECL(_hw_vmm);
+
+static u_long pages_allocated;
+SYSCTL_ULONG(_hw_vmm, OID_AUTO, pages_allocated, CTLFLAG_RD,
+ &pages_allocated, 0, "4KB pages allocated");
+
+static void
+update_pages_allocated(int howmany)
+{
+ pages_allocated += howmany; /* XXX locking? */
+}
+
+int
+vmm_mem_init(void)
+{
+
+ return (0);
+}
+
+vm_paddr_t
+vmm_mem_alloc(size_t size)
+{
+
+ int flags;
+ vm_page_t m;
+ vm_paddr_t pa;
+
+ if (size != PAGE_SIZE)
+ panic("vmm_mem_alloc: invalid allocation size %zu", size);
+
+ flags = VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED |
+ VM_ALLOC_ZERO;
+
+ while (1) {
+ /*
+ * XXX need policy to determine when to back off the allocation
+ */
+ m = vm_page_alloc(NULL, 0, flags);
+ if (m == NULL)
+ vm_wait(NULL);
+ else
+ break;
+ }
+
+ pa = VM_PAGE_TO_PHYS(m);
+
+ if ((m->flags & PG_ZERO) == 0)
+ pmap_zero_page(m);
+
+ m->valid = VM_PAGE_BITS_ALL;
+ update_pages_allocated(1);
+
+ return (pa);
+}
+
+void
+vmm_mem_free(vm_paddr_t base, size_t length)
+{
+ vm_page_t m;
+
+ if (base & PAGE_MASK) {
+ panic("vmm_mem_free: base 0x%0lx must be aligned on a "
+ "0x%0x boundary\n", base, PAGE_SIZE);
+ }
+
+ if (length != PAGE_SIZE)
+ panic("vmm_mem_free: invalid length %zu", length);
+
+ m = PHYS_TO_VM_PAGE(base);
+ vm_page_unwire_noq(m);
+ vm_page_free(m);
+
+ update_pages_allocated(-1);
+}
+
+vm_paddr_t
+vmm_mem_maxaddr(void)
+{
+
+ return (ptoa(Maxmem));
+}
Index: sys/arm64/vmm/vmm_stat.h
===================================================================
--- /dev/null
+++ sys/arm64/vmm/vmm_stat.h
@@ -0,0 +1,155 @@
+/*
+ * Copyright (C) 2015 Mihai Carabas <mihai.carabas@gmail.com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef _VMM_STAT_H_
+#define _VMM_STAT_H_
+
+struct vm;
+
+#define MAX_VMM_STAT_ELEMS 64 /* arbitrary */
+
+enum vmm_stat_scope {
+ VMM_STAT_SCOPE_ANY,
+ VMM_STAT_SCOPE_INTEL, /* Intel VMX specific statistic */
+ VMM_STAT_SCOPE_AMD, /* AMD SVM specific statistic */
+};
+
+struct vmm_stat_type;
+typedef void (*vmm_stat_func_t)(struct vm *vm, int vcpu,
+ struct vmm_stat_type *stat);
+
+struct vmm_stat_type {
+ int index; /* position in the stats buffer */
+ int nelems; /* standalone or array */
+ const char *desc; /* description of statistic */
+ vmm_stat_func_t func;
+ enum vmm_stat_scope scope;
+};
+
+void vmm_stat_register(void *arg);
+
+#define VMM_STAT_FDEFINE(type, nelems, desc, func, scope) \
+ struct vmm_stat_type type[1] = { \
+ { -1, nelems, desc, func, scope } \
+ }; \
+ SYSINIT(type##_stat, SI_SUB_KLD, SI_ORDER_ANY, vmm_stat_register, type)
+
+#define VMM_STAT_DEFINE(type, nelems, desc, scope) \
+ VMM_STAT_FDEFINE(type, nelems, desc, NULL, scope)
+
+#define VMM_STAT_DECLARE(type) \
+ extern struct vmm_stat_type type[1]
+
+#define VMM_STAT(type, desc) \
+ VMM_STAT_DEFINE(type, 1, desc, VMM_STAT_SCOPE_ANY)
+#define VMM_STAT_INTEL(type, desc) \
+ VMM_STAT_DEFINE(type, 1, desc, VMM_STAT_SCOPE_INTEL)
+#define VMM_STAT_AMD(type, desc) \
+ VMM_STAT_DEFINE(type, 1, desc, VMM_STAT_SCOPE_AMD)
+
+#define VMM_STAT_FUNC(type, desc, func) \
+ VMM_STAT_FDEFINE(type, 1, desc, func, VMM_STAT_SCOPE_ANY)
+
+#define VMM_STAT_ARRAY(type, nelems, desc) \
+ VMM_STAT_DEFINE(type, nelems, desc, VMM_STAT_SCOPE_ANY)
+
+void *vmm_stat_alloc(void);
+void vmm_stat_init(void *vp);
+void vmm_stat_free(void *vp);
+
+/*
+ * 'buf' should be at least fit 'MAX_VMM_STAT_TYPES' entries
+ */
+int vmm_stat_copy(struct vm *vm, int vcpu, int *num_stats, uint64_t *buf);
+int vmm_stat_desc_copy(int index, char *buf, int buflen);
+
+static void __inline
+vmm_stat_array_incr(struct vm *vm, int vcpu, struct vmm_stat_type *vst,
+ int statidx, uint64_t x)
+{
+#ifdef VMM_KEEP_STATS
+ uint64_t *stats;
+
+ stats = vcpu_stats(vm, vcpu);
+
+ if (vst->index >= 0 && statidx < vst->nelems)
+ stats[vst->index + statidx] += x;
+#endif
+}
+
+static void __inline
+vmm_stat_array_set(struct vm *vm, int vcpu, struct vmm_stat_type *vst,
+ int statidx, uint64_t val)
+{
+#ifdef VMM_KEEP_STATS
+ uint64_t *stats;
+
+ stats = vcpu_stats(vm, vcpu);
+
+ if (vst->index >= 0 && statidx < vst->nelems)
+ stats[vst->index + statidx] = val;
+#endif
+}
+
+static void __inline
+vmm_stat_incr(struct vm *vm, int vcpu, struct vmm_stat_type *vst, uint64_t x)
+{
+
+#ifdef VMM_KEEP_STATS
+ vmm_stat_array_incr(vm, vcpu, vst, 0, x);
+#endif
+}
+
+static void __inline
+vmm_stat_set(struct vm *vm, int vcpu, struct vmm_stat_type *vst, uint64_t val)
+{
+
+#ifdef VMM_KEEP_STATS
+ vmm_stat_array_set(vm, vcpu, vst, 0, val);
+#endif
+}
+
+VMM_STAT_DECLARE(VCPU_MIGRATIONS);
+VMM_STAT_DECLARE(VMEXIT_COUNT);
+VMM_STAT_DECLARE(VMEXIT_EXTINT);
+VMM_STAT_DECLARE(VMEXIT_HLT);
+VMM_STAT_DECLARE(VMEXIT_CR_ACCESS);
+VMM_STAT_DECLARE(VMEXIT_RDMSR);
+VMM_STAT_DECLARE(VMEXIT_WRMSR);
+VMM_STAT_DECLARE(VMEXIT_MTRAP);
+VMM_STAT_DECLARE(VMEXIT_PAUSE);
+VMM_STAT_DECLARE(VMEXIT_INTR_WINDOW);
+VMM_STAT_DECLARE(VMEXIT_NMI_WINDOW);
+VMM_STAT_DECLARE(VMEXIT_INOUT);
+VMM_STAT_DECLARE(VMEXIT_CPUID);
+VMM_STAT_DECLARE(VMEXIT_NESTED_FAULT);
+VMM_STAT_DECLARE(VMEXIT_INST_EMUL);
+VMM_STAT_DECLARE(VMEXIT_UNKNOWN);
+VMM_STAT_DECLARE(VMEXIT_ASTPENDING);
+VMM_STAT_DECLARE(VMEXIT_USERSPACE);
+VMM_STAT_DECLARE(VMEXIT_RENDEZVOUS);
+VMM_STAT_DECLARE(VMEXIT_EXCEPTION);
+#endif
Index: sys/arm64/vmm/vmm_stat.c
===================================================================
--- /dev/null
+++ sys/arm64/vmm/vmm_stat.c
@@ -0,0 +1,159 @@
+/*
+ * Copyright (C) 2015 Mihai Carabas <mihai.carabas@gmail.com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+
+#include <sys/param.h>
+#include <sys/kernel.h>
+#include <sys/systm.h>
+#include <sys/malloc.h>
+
+#include <machine/vmm.h>
+#include "vmm_stat.h"
+
+/*
+ * 'vst_num_elems' is the total number of addressable statistic elements
+ * 'vst_num_types' is the number of unique statistic types
+ *
+ * It is always true that 'vst_num_elems' is greater than or equal to
+ * 'vst_num_types'. This is because a stat type may represent more than
+ * one element (for e.g. VMM_STAT_ARRAY).
+ */
+static int vst_num_elems, vst_num_types;
+static struct vmm_stat_type *vsttab[MAX_VMM_STAT_ELEMS];
+
+static MALLOC_DEFINE(M_VMM_STAT, "vmm stat", "vmm stat");
+
+#define vst_size ((size_t)vst_num_elems * sizeof(uint64_t))
+
+void
+vmm_stat_register(void *arg)
+{
+ struct vmm_stat_type *vst = arg;
+
+ /* We require all stats to identify themselves with a description */
+ if (vst->desc == NULL)
+ return;
+
+ if (vst_num_elems + vst->nelems >= MAX_VMM_STAT_ELEMS) {
+ printf("Cannot accomodate vmm stat type \"%s\"!\n", vst->desc);
+ return;
+ }
+
+ vst->index = vst_num_elems;
+ vst_num_elems += vst->nelems;
+
+ vsttab[vst_num_types++] = vst;
+}
+
+int
+vmm_stat_copy(struct vm *vm, int vcpu, int *num_stats, uint64_t *buf)
+{
+ struct vmm_stat_type *vst;
+ uint64_t *stats;
+ int i;
+
+ if (vcpu < 0 || vcpu >= VM_MAXCPU)
+ return (EINVAL);
+
+ /* Let stats functions update their counters */
+ for (i = 0; i < vst_num_types; i++) {
+ vst = vsttab[i];
+ if (vst->func != NULL)
+ (*vst->func)(vm, vcpu, vst);
+ }
+
+ /* Copy over the stats */
+ stats = vcpu_stats(vm, vcpu);
+ for (i = 0; i < vst_num_elems; i++)
+ buf[i] = stats[i];
+ *num_stats = vst_num_elems;
+ return (0);
+}
+
+void *
+vmm_stat_alloc(void)
+{
+
+ return (malloc(vst_size, M_VMM_STAT, M_WAITOK));
+}
+
+void
+vmm_stat_init(void *vp)
+{
+
+ bzero(vp, vst_size);
+}
+
+void
+vmm_stat_free(void *vp)
+{
+ free(vp, M_VMM_STAT);
+}
+
+int
+vmm_stat_desc_copy(int index, char *buf, int bufsize)
+{
+ int i;
+ struct vmm_stat_type *vst;
+
+ for (i = 0; i < vst_num_types; i++) {
+ vst = vsttab[i];
+ if (index >= vst->index && index < vst->index + vst->nelems) {
+ if (vst->nelems > 1) {
+ snprintf(buf, bufsize, "%s[%d]",
+ vst->desc, index - vst->index);
+ } else {
+ strlcpy(buf, vst->desc, bufsize);
+ }
+ return (0); /* found it */
+ }
+ }
+
+ return (EINVAL);
+}
+
+/* global statistics */
+VMM_STAT(VCPU_MIGRATIONS, "vcpu migration across host cpus");
+VMM_STAT(VMEXIT_COUNT, "total number of vm exits");
+VMM_STAT(VMEXIT_EXTINT, "vm exits due to external interrupt");
+VMM_STAT(VMEXIT_HLT, "number of times hlt was intercepted");
+VMM_STAT(VMEXIT_CR_ACCESS, "number of times %cr access was intercepted");
+VMM_STAT(VMEXIT_RDMSR, "number of times rdmsr was intercepted");
+VMM_STAT(VMEXIT_WRMSR, "number of times wrmsr was intercepted");
+VMM_STAT(VMEXIT_MTRAP, "number of monitor trap exits");
+VMM_STAT(VMEXIT_PAUSE, "number of times pause was intercepted");
+VMM_STAT(VMEXIT_INTR_WINDOW, "vm exits due to interrupt window opening");
+VMM_STAT(VMEXIT_NMI_WINDOW, "vm exits due to nmi window opening");
+VMM_STAT(VMEXIT_INOUT, "number of times in/out was intercepted");
+VMM_STAT(VMEXIT_CPUID, "number of times cpuid was intercepted");
+VMM_STAT(VMEXIT_NESTED_FAULT, "vm exits due to nested page fault");
+VMM_STAT(VMEXIT_INST_EMUL, "vm exits for instruction emulation");
+VMM_STAT(VMEXIT_UNKNOWN, "number of vm exits for unknown reason");
+VMM_STAT(VMEXIT_ASTPENDING, "number of times astpending at exit");
+VMM_STAT(VMEXIT_USERSPACE, "number of vm exits handled in userspace");
+VMM_STAT(VMEXIT_RENDEZVOUS, "number of times rendezvous pending at exit");
+VMM_STAT(VMEXIT_EXCEPTION, "number of vm exits due to exceptions");
Index: sys/conf/files.arm64
===================================================================
--- sys/conf/files.arm64
+++ sys/conf/files.arm64
@@ -176,6 +176,7 @@
arm64/arm64/identcpu.c standard
arm64/arm64/in_cksum.c optional inet | inet6
arm64/arm64/locore.S standard no-obj
+arm64/arm64/hyp_stub.S standard
arm64/arm64/machdep.c standard
arm64/arm64/machdep_boot.c standard
arm64/arm64/mem.c standard
@@ -412,6 +413,7 @@
dev/vnic/thunder_mdio_fdt.c optional vnic fdt
dev/vnic/thunder_mdio.c optional vnic
dev/vnic/lmac_if.m optional inet | inet6 | vnic
+dev/bvm/bvm_console.c optional bvmconsole
kern/msi_if.m optional intrng
kern/pic_if.m optional intrng
kern/subr_devmap.c standard
Index: sys/dev/bvm/bvm_console.c
===================================================================
--- /dev/null
+++ sys/dev/bvm/bvm_console.c
@@ -0,0 +1,268 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2011 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/kernel.h>
+#include <sys/systm.h>
+#include <sys/types.h>
+#include <sys/cons.h>
+#include <sys/tty.h>
+#include <sys/reboot.h>
+#include <sys/bus.h>
+
+#if defined(__aarch64__)
+#include <vm/vm.h>
+#include <vm/pmap.h>
+#endif
+
+#include <sys/kdb.h>
+#include <ddb/ddb.h>
+
+#ifndef BVMCONS_POLL_HZ
+#define BVMCONS_POLL_HZ 4
+#endif
+#define BVMBURSTLEN 16 /* max number of bytes to write in one chunk */
+
+static tsw_open_t bvm_tty_open;
+static tsw_close_t bvm_tty_close;
+static tsw_outwakeup_t bvm_tty_outwakeup;
+
+static struct ttydevsw bvm_ttydevsw = {
+ .tsw_flags = TF_NOPREFIX,
+ .tsw_open = bvm_tty_open,
+ .tsw_close = bvm_tty_close,
+ .tsw_outwakeup = bvm_tty_outwakeup,
+};
+
+static int polltime;
+static struct callout bvm_timer;
+
+#if defined(KDB)
+static int alt_break_state;
+#endif
+
+#if defined(__i386__) || defined(__amd64__)
+#define BVM_CONS_PORT 0x220
+#elif defined(__aarch64__)
+#define BVM_CONS_PORT 0x090000
+#endif
+
+static vm_offset_t bvm_cons_port = BVM_CONS_PORT;
+
+#define BVM_CONS_SIG ('b' << 8 | 'v')
+
+static void bvm_timeout(void *);
+
+static cn_probe_t bvm_cnprobe;
+static cn_init_t bvm_cninit;
+static cn_term_t bvm_cnterm;
+static cn_getc_t bvm_cngetc;
+static cn_putc_t bvm_cnputc;
+static cn_grab_t bvm_cngrab;
+static cn_ungrab_t bvm_cnungrab;
+
+CONSOLE_DRIVER(bvm);
+
+static int
+bvm_rcons(u_char *ch)
+{
+ int c;
+
+#if defined(__i386__) || defined(__amd64__)
+ c = inl(bvm_cons_port);
+#elif defined(__arm__) || defined(__aarch64__)
+ c = *(int *)bvm_cons_port;
+#endif
+
+ if (c != -1) {
+ *ch = (u_char)c;
+ return (0);
+ } else
+ return (-1);
+}
+
+static void
+bvm_wcons(u_char ch)
+{
+#if defined(__i386__) || defined(__amd64__)
+ outl(bvm_cons_port, ch);
+#elif defined(__arm__) || defined(__aarch64__)
+ *(int *)bvm_cons_port = ch;
+#endif
+}
+
+static void
+cn_drvinit(void *unused)
+{
+ struct tty *tp;
+
+ gone_in(13, "bvmconsole");
+
+ if (bvm_consdev.cn_pri != CN_DEAD) {
+ tp = tty_alloc(&bvm_ttydevsw, NULL);
+ callout_init_mtx(&bvm_timer, tty_getlock(tp), 0);
+ tty_makedev(tp, NULL, "bvmcons");
+ }
+}
+
+static int
+bvm_tty_open(struct tty *tp)
+{
+ polltime = hz / BVMCONS_POLL_HZ;
+ if (polltime < 1)
+ polltime = 1;
+ callout_reset(&bvm_timer, polltime, bvm_timeout, tp);
+
+ return (0);
+}
+
+static void
+bvm_tty_close(struct tty *tp)
+{
+
+ tty_assert_locked(tp);
+ callout_stop(&bvm_timer);
+}
+
+static void
+bvm_tty_outwakeup(struct tty *tp)
+{
+ int len, written;
+ u_char buf[BVMBURSTLEN];
+
+ for (;;) {
+ len = ttydisc_getc(tp, buf, sizeof(buf));
+ if (len == 0)
+ break;
+
+ written = 0;
+ while (written < len)
+ bvm_wcons(buf[written++]);
+ }
+}
+
+static void
+bvm_timeout(void *v)
+{
+ struct tty *tp;
+ int c;
+
+ tp = (struct tty *)v;
+
+ tty_assert_locked(tp);
+ while ((c = bvm_cngetc(NULL)) != -1)
+ ttydisc_rint(tp, c, 0);
+ ttydisc_rint_done(tp);
+
+ callout_reset(&bvm_timer, polltime, bvm_timeout, tp);
+}
+
+static void
+bvm_cnprobe(struct consdev *cp)
+{
+ int disabled;
+#if defined(__i386__) || defined(__amd64__)
+ int port;
+#endif
+
+ disabled = 0;
+ cp->cn_pri = CN_DEAD;
+ strcpy(cp->cn_name, "bvmcons");
+
+ resource_int_value("bvmconsole", 0, "disabled", &disabled);
+ if (!disabled) {
+#if defined(__i386__) || defined(__amd64__)
+ if (resource_int_value("bvmconsole", 0, "port", &port) == 0)
+ bvm_cons_port = port;
+
+ if (inw(bvm_cons_port) == BVM_CONS_SIG)
+#elif defined(__arm__) || defined(__aarch64__)
+ bvm_cons_port = (vm_offset_t)pmap_mapdev(bvm_cons_port, 0x1000);
+ if ((*(short *)bvm_cons_port) == BVM_CONS_SIG) {
+#endif
+ cp->cn_pri = CN_REMOTE;
+ }
+ }
+}
+
+static void
+bvm_cninit(struct consdev *cp)
+{
+ int i;
+ const char *bootmsg = "Using bvm console.\n";
+
+ if (boothowto & RB_VERBOSE) {
+ for (i = 0; i < strlen(bootmsg); i++)
+ bvm_cnputc(cp, bootmsg[i]);
+ }
+}
+
+static void
+bvm_cnterm(struct consdev *cp)
+{
+
+}
+
+static int
+bvm_cngetc(struct consdev *cp)
+{
+ unsigned char ch;
+
+ if (bvm_rcons(&ch) == 0) {
+#if defined(KDB)
+ kdb_alt_break(ch, &alt_break_state);
+#endif
+ return (ch);
+ }
+
+ return (-1);
+}
+
+static void
+bvm_cnputc(struct consdev *cp, int c)
+{
+
+ bvm_wcons(c);
+}
+
+static void
+bvm_cngrab(struct consdev *cp)
+{
+}
+
+static void
+bvm_cnungrab(struct consdev *cp)
+{
+}
+
+SYSINIT(cndev, SI_SUB_CONFIGURE, SI_ORDER_MIDDLE, cn_drvinit, NULL);
Index: sys/dts/Makefile
===================================================================
--- sys/dts/Makefile
+++ sys/dts/Makefile
@@ -1,5 +1,5 @@
# $FreeBSD$
-SUBDIR=arm mips powerpc
+SUBDIR=arm arm64 mips powerpc
.include <bsd.subdir.mk>
Index: sys/kern/kern_cons.c
===================================================================
--- sys/kern/kern_cons.c
+++ sys/kern/kern_cons.c
@@ -136,7 +136,6 @@
* Check if we should mute the console (for security reasons perhaps)
* It can be changes dynamically using sysctl kern.consmute
* once we are up and going.
- *
*/
cn_mute = ((boothowto & (RB_MUTE
|RB_SINGLE
@@ -174,6 +173,7 @@
cnadd(cn);
}
}
+
if (best_cn == NULL)
return;
if ((boothowto & RB_MULTIPLE) == 0) {
Index: sys/modules/Makefile
===================================================================
--- sys/modules/Makefile
+++ sys/modules/Makefile
@@ -613,6 +613,9 @@
_armv8crypto= armv8crypto
_em= em
_rockchip= rockchip
+.if ${MK_BHYVE} != "no" || defined(ALL_MODULES)
+_vmm= vmm
+.endif
.endif
.if ${MACHINE_CPUARCH} == "i386" || ${MACHINE_CPUARCH} == "amd64"
Index: sys/modules/vmm/Makefile
===================================================================
--- sys/modules/vmm/Makefile
+++ sys/modules/vmm/Makefile
@@ -4,10 +4,7 @@
KMOD= vmm
-SRCS= opt_acpi.h opt_bhyve_snapshot.h opt_ddb.h
-SRCS+= device_if.h bus_if.h pci_if.h pcib_if.h acpi_if.h vnode_if.h
-DPSRCS+= vmx_assym.h svm_assym.h
-DPSRCS+= vmx_genassym.c svm_genassym.c offset.inc
+SRCS= opt_acpi.h opt_ddb.h device_if.h bus_if.h pci_if.h pcib_if.h acpi_if.h
CFLAGS+= -DVMM_KEEP_STATS
CFLAGS+= -I${SRCTOP}/sys/amd64/vmm
@@ -15,76 +12,6 @@
CFLAGS+= -I${SRCTOP}/sys/amd64/vmm/intel
CFLAGS+= -I${SRCTOP}/sys/amd64/vmm/amd
-# generic vmm support
-.PATH: ${SRCTOP}/sys/amd64/vmm
-SRCS+= vmm.c \
- vmm_dev.c \
- vmm_host.c \
- vmm_instruction_emul.c \
- vmm_ioport.c \
- vmm_lapic.c \
- vmm_mem.c \
- vmm_stat.c \
- vmm_util.c \
- x86.c
-
-.PATH: ${SRCTOP}/sys/amd64/vmm/io
-SRCS+= iommu.c \
- ppt.c \
- vatpic.c \
- vatpit.c \
- vhpet.c \
- vioapic.c \
- vlapic.c \
- vpmtmr.c \
- vrtc.c
-
-# intel-specific files
-.PATH: ${SRCTOP}/sys/amd64/vmm/intel
-SRCS+= ept.c \
- vmcs.c \
- vmx_msr.c \
- vmx_support.S \
- vmx.c \
- vtd.c
-
-# amd-specific files
-.PATH: ${SRCTOP}/sys/amd64/vmm/amd
-SRCS+= vmcb.c \
- svm.c \
- svm_support.S \
- npt.c \
- ivrs_drv.c \
- amdvi_hw.c \
- svm_msr.c
-
-.if ${KERN_OPTS:MBHYVE_SNAPSHOT} != ""
-SRCS+= vmm_snapshot.c
-.endif
-
-CLEANFILES= vmx_assym.h vmx_genassym.o svm_assym.h svm_genassym.o
-
-OBJS_DEPEND_GUESS.vmx_support.o+= vmx_assym.h
-OBJS_DEPEND_GUESS.svm_support.o+= svm_assym.h
-
-vmx_assym.h: vmx_genassym.o
- sh ${SYSDIR}/kern/genassym.sh vmx_genassym.o > ${.TARGET}
-
-svm_assym.h: svm_genassym.o
- sh ${SYSDIR}/kern/genassym.sh svm_genassym.o > ${.TARGET}
-
-vmx_support.o:
- ${CC} -c -x assembler-with-cpp -DLOCORE ${CFLAGS} \
- ${.IMPSRC} -o ${.TARGET}
-
-svm_support.o:
- ${CC} -c -x assembler-with-cpp -DLOCORE ${CFLAGS} \
- ${.IMPSRC} -o ${.TARGET}
-
-vmx_genassym.o: offset.inc
- ${CC} -c ${CFLAGS:N-flto:N-fno-common} -fcommon ${.IMPSRC}
-
-svm_genassym.o: offset.inc
- ${CC} -c ${CFLAGS:N-flto:N-fno-common} -fcommon ${.IMPSRC}
+.include <bsd.arch.inc.mk>
.include <bsd.kmod.mk>
Index: sys/modules/vmm/Makefile.amd64
===================================================================
--- /dev/null
+++ sys/modules/vmm/Makefile.amd64
@@ -0,0 +1,77 @@
+# $FreeBSD$
+
+DPSRCS+= vmx_assym.h svm_assym.h
+DPSRCS+= vmx_genassym.c svm_genassym.c offset.inc
+
+CFLAGS+= -I${SRCTOP}/sys/amd64/vmm
+CFLAGS+= -I${SRCTOP}/sys/amd64/vmm/io
+CFLAGS+= -I${SRCTOP}/sys/amd64/vmm/intel
+CFLAGS+= -I${SRCTOP}/sys/amd64/vmm/amd
+
+# generic vmm support
+.PATH: ${SRCTOP}/sys/amd64/vmm
+SRCS+= vmm.c \
+ vmm_dev.c \
+ vmm_host.c \
+ vmm_instruction_emul.c \
+ vmm_ioport.c \
+ vmm_lapic.c \
+ vmm_mem.c \
+ vmm_stat.c \
+ vmm_util.c \
+ x86.c
+
+.PATH: ${SRCTOP}/sys/amd64/vmm/io
+SRCS+= iommu.c \
+ ppt.c \
+ vatpic.c \
+ vatpit.c \
+ vhpet.c \
+ vioapic.c \
+ vlapic.c \
+ vpmtmr.c \
+ vrtc.c
+
+# intel-specific files
+.PATH: ${SRCTOP}/sys/amd64/vmm/intel
+SRCS+= ept.c \
+ vmcs.c \
+ vmx_msr.c \
+ vmx_support.S \
+ vmx.c \
+ vtd.c
+
+# amd-specific files
+.PATH: ${SRCTOP}/sys/amd64/vmm/amd
+SRCS+= vmcb.c \
+ svm.c \
+ svm_support.S \
+ npt.c \
+ ivrs_drv.c \
+ amdvi_hw.c \
+ svm_msr.c
+
+CLEANFILES= vmx_assym.h vmx_genassym.o svm_assym.h svm_genassym.o
+
+OBJS_DEPEND_GUESS.vmx_support.o+= vmx_assym.h
+OBJS_DEPEND_GUESS.svm_support.o+= svm_assym.h
+
+vmx_assym.h: vmx_genassym.o
+ sh ${SYSDIR}/kern/genassym.sh vmx_genassym.o > ${.TARGET}
+
+svm_assym.h: svm_genassym.o
+ sh ${SYSDIR}/kern/genassym.sh svm_genassym.o > ${.TARGET}
+
+vmx_support.o:
+ ${CC} -c -x assembler-with-cpp -DLOCORE ${CFLAGS} \
+ ${.IMPSRC} -o ${.TARGET}
+
+svm_support.o:
+ ${CC} -c -x assembler-with-cpp -DLOCORE ${CFLAGS} \
+ ${.IMPSRC} -o ${.TARGET}
+
+vmx_genassym.o: offset.inc
+ ${CC} -c ${CFLAGS:N-flto:N-fno-common} ${.IMPSRC}
+
+svm_genassym.o: offset.inc
+ ${CC} -c ${CFLAGS:N-flto:N-fno-common} ${.IMPSRC}
Index: sys/modules/vmm/Makefile.arm64
===================================================================
--- /dev/null
+++ sys/modules/vmm/Makefile.arm64
@@ -0,0 +1,30 @@
+DPSRCS+= hyp_assym.h
+DPSRCS+= hyp_genassym.c
+
+CFLAGS+= -I${.CURDIR}/../../arm64/vmm -I${.CURDIR}/../../arm64/include
+
+# generic vmm support
+.PATH: ${.CURDIR}/../../arm64/vmm
+SRCS+= vmm.c \
+ vmm_dev.c \
+ vmm_instruction_emul.c \
+ vmm_mem.c \
+ mmu.c \
+ vmm_stat.c \
+ arm64.c \
+ psci.c \
+ reset.c \
+ hyp.S
+
+.PATH: ${.CURDIR}/../../arm64/vmm/io
+SRCS+= vgic_v3.c \
+ vgic_v3_mmio.c \
+ vtimer.c
+
+CLEANFILES= hyp_assym.h hyp_genassym.o
+
+hyp_assym.h: hyp_genassym.o
+ sh ${SYSDIR}/kern/genassym.sh hyp_genassym.o > ${.TARGET}
+
+hyp_genassym.o:
+ ${CC} -c ${CFLAGS:N-flto:N-fno-common} -fcommon ${.IMPSRC}
Index: sys/sys/bitstring.h
===================================================================
--- sys/sys/bitstring.h
+++ sys/sys/bitstring.h
@@ -419,4 +419,35 @@
*_result = _value;
}
+/* Computes _dstbitstr as the bitwise and of the two _srcbitstr */
+static inline void
+bitstr_and(bitstr_t *_dstbitstr, bitstr_t *_src1bitstr,
+ bitstr_t *_src2bitstr, int _nbits)
+{
+ bitstr_t mask;
+ while (_nbits >= (int)_BITSTR_BITS) {
+ *_dstbitstr++ = *_src1bitstr++ & *_src2bitstr++;
+ _nbits -= _BITSTR_BITS;
+ }
+ if (_nbits > 0) {
+ mask = _bit_make_mask(0, _bit_offset(_nbits - 1));
+ *_dstbitstr = (*_src1bitstr & *_src2bitstr) & mask;
+ }
+}
+
+/* Computes _dstbitstr as the bitwise or of the two _srcbitstr */
+static inline void
+bitstr_or(bitstr_t *_dstbitstr, bitstr_t *_src1bitstr,
+ bitstr_t *_src2bitstr, int _nbits)
+{
+ bitstr_t mask;
+ while (_nbits >= (int)_BITSTR_BITS) {
+ *_dstbitstr++ = *_src1bitstr++ | *_src2bitstr++;
+ _nbits -= _BITSTR_BITS;
+ }
+ if (_nbits > 0) {
+ mask = _bit_make_mask(0, _bit_offset(_nbits - 1));
+ *_dstbitstr = (*_src1bitstr | *_src2bitstr) & mask;
+ }
+}
#endif /* _SYS_BITSTRING_H_ */
Index: usr.sbin/Makefile.arm64
===================================================================
--- usr.sbin/Makefile.arm64
+++ usr.sbin/Makefile.arm64
@@ -4,3 +4,9 @@
SUBDIR+= acpi
.endif
SUBDIR+= ofwdump
+
+.if ${MK_BHYVE} != "no"
+SUBDIR+= bhyve
+SUBDIR+= bhyveload
+SUBDIR+= bhyvectl
+.endif
Index: usr.sbin/bhyve/Makefile
===================================================================
--- usr.sbin/bhyve/Makefile
+++ usr.sbin/bhyve/Makefile
@@ -3,132 +3,7 @@
#
.include <src.opts.mk>
-CFLAGS+=-I${.CURDIR}/../../contrib/lib9p
-CFLAGS+=-I${SRCTOP}/sys
-.PATH: ${SRCTOP}/sys/cam/ctl
-PROG= bhyve
-PACKAGE= bhyve
-
-MAN= bhyve.8
-
-BHYVE_SYSDIR?=${SRCTOP}
-
-SRCS= \
- atkbdc.c \
- acpi.c \
- audio.c \
- bhyvegc.c \
- bhyverun.c \
- block_if.c \
- bootrom.c \
- console.c \
- ctl_util.c \
- ctl_scsi_all.c \
- fwctl.c \
- gdb.c \
- hda_codec.c \
- inout.c \
- ioapic.c \
- kernemu_dev.c \
- mem.c \
- mevent.c \
- mptbl.c \
- net_backends.c \
- net_utils.c \
- pci_ahci.c \
- pci_e82545.c \
- pci_emul.c \
- pci_hda.c \
- pci_fbuf.c \
- pci_hostbridge.c \
- pci_irq.c \
- pci_lpc.c \
- pci_nvme.c \
- pci_passthru.c \
- pci_virtio_9p.c \
- pci_virtio_block.c \
- pci_virtio_console.c \
- pci_virtio_net.c \
- pci_virtio_rnd.c \
- pci_virtio_scsi.c \
- pci_uart.c \
- pci_xhci.c \
- pctestdev.c \
- pm.c \
- post.c \
- ps2kbd.c \
- ps2mouse.c \
- rfb.c \
- rtc.c \
- smbiostbl.c \
- sockstream.c \
- task_switch.c \
- uart_emul.c \
- usb_emul.c \
- usb_mouse.c \
- virtio.c \
- vga.c \
- vmgenc.c \
- xmsr.c \
- spinup_ap.c \
- iov.c
-
-.if ${MK_BHYVE_SNAPSHOT} != "no"
-SRCS+= snapshot.c
-.endif
-
-CFLAGS.kernemu_dev.c+= -I${SRCTOP}/sys/amd64
-
-.PATH: ${BHYVE_SYSDIR}/sys/amd64/vmm
-SRCS+= vmm_instruction_emul.c
-
-LIBADD= vmmapi md pthread z util sbuf cam 9p
-
-.if ${MK_CASPER} != "no"
-LIBADD+= casper
-LIBADD+= cap_pwd
-LIBADD+= cap_grp
-# Temporary disable capsicum, until we integrate checkpoint code with it.
-#CFLAGS+=-DWITH_CASPER
-.endif
-
-.if ${MK_BHYVE_SNAPSHOT} != "no"
-LIBADD+= ucl xo
-.endif
-
-.if ${MK_INET_SUPPORT} != "no"
-CFLAGS+=-DINET
-.endif
-.if ${MK_INET6_SUPPORT} != "no"
-CFLAGS+=-DINET6
-.endif
-.if ${MK_NETGRAPH_SUPPORT} != "no"
-CFLAGS+=-DNETGRAPH
-LIBADD+= netgraph
-.endif
-.if ${MK_OPENSSL} == "no"
-CFLAGS+=-DNO_OPENSSL
-.else
-LIBADD+= crypto
-.endif
-
-CFLAGS+= -I${BHYVE_SYSDIR}/sys/dev/e1000
-CFLAGS+= -I${BHYVE_SYSDIR}/sys/dev/mii
-CFLAGS+= -I${BHYVE_SYSDIR}/sys/dev/usb/controller
-.if ${MK_BHYVE_SNAPSHOT} != "no"
-CFLAGS+= -I${SRCTOP}/contrib/libucl/include
-
-# Temporary disable capsicum, until we integrate checkpoint code with it.
-CFLAGS+= -DWITHOUT_CAPSICUM
-
-CFLAGS+= -DBHYVE_SNAPSHOT
-.endif
-
-.ifdef GDB_LOG
-CFLAGS+=-DGDB_LOG
-.endif
-
-WARNS?= 2
+.include <bsd.arch.inc.mk>
.include <bsd.prog.mk>
Index: usr.sbin/bhyve/Makefile.amd64
===================================================================
--- /dev/null
+++ usr.sbin/bhyve/Makefile.amd64
@@ -0,0 +1,124 @@
+#
+# $FreeBSD$
+#
+
+CFLAGS+=-I${.CURDIR}/../../contrib/lib9p
+CFLAGS+=-I${SRCTOP}/sys
+.PATH: ${SRCTOP}/sys/cam/ctl
+
+PROG= bhyve
+PACKAGE= bhyve
+
+MAN= bhyve.8
+
+BHYVE_SYSDIR?=${SRCTOP}
+
+SRCS= \
+ atkbdc.c \
+ acpi.c \
+ audio.c \
+ bhyvegc.c \
+ bhyverun.c \
+ block_if.c \
+ bootrom.c \
+ console.c \
+ consport.c \
+ ctl_util.c \
+ ctl_scsi_all.c \
+ dbgport.c \
+ fwctl.c \
+ gdb.c \
+ hda_codec.c \
+ inout.c \
+ ioapic.c \
+ kernemu_dev.c \
+ mem.c \
+ mevent.c \
+ mptbl.c \
+ net_backends.c \
+ net_utils.c \
+ pci_ahci.c \
+ pci_e82545.c \
+ pci_emul.c \
+ pci_hda.c \
+ pci_fbuf.c \
+ pci_hostbridge.c \
+ pci_irq.c \
+ pci_lpc.c \
+ pci_nvme.c \
+ pci_passthru.c \
+ pci_virtio_9p.c \
+ pci_virtio_block.c \
+ pci_virtio_console.c \
+ pci_virtio_net.c \
+ pci_virtio_rnd.c \
+ pci_virtio_scsi.c \
+ pci_uart.c \
+ pci_xhci.c \
+ pctestdev.c \
+ pm.c \
+ post.c \
+ ps2kbd.c \
+ ps2mouse.c \
+ rfb.c \
+ rtc.c \
+ smbiostbl.c \
+ sockstream.c \
+ task_switch.c \
+ uart_emul.c \
+ usb_emul.c \
+ usb_mouse.c \
+ virtio.c \
+ vga.c \
+ vmgenc.c \
+ xmsr.c \
+ spinup_ap.c \
+ iov.c
+
+.if ${MK_BHYVE_SNAPSHOT} != "no"
+SRCS+= snapshot.c
+.endif
+
+CFLAGS.kernemu_dev.c+= -I${SRCTOP}/sys/amd64
+
+.PATH: ${BHYVE_SYSDIR}/sys/amd64/vmm
+SRCS+= vmm_instruction_emul.c
+
+LIBADD= vmmapi md pthread z util sbuf cam 9p casper cap_pwd cap_grp
+.if ${MK_BHYVE_SNAPSHOT} != "no"
+LIBADD+= ucl xo
+.endif
+
+.if ${MK_INET_SUPPORT} != "no"
+CFLAGS+=-DINET
+.endif
+.if ${MK_INET6_SUPPORT} != "no"
+CFLAGS+=-DINET6
+.endif
+.if ${MK_NETGRAPH_SUPPORT} != "no"
+CFLAGS+=-DNETGRAPH
+LIBADD+= netgraph
+.endif
+.if ${MK_OPENSSL} == "no"
+CFLAGS+=-DNO_OPENSSL
+.else
+LIBADD+= crypto
+.endif
+
+CFLAGS+= -I${BHYVE_SYSDIR}/sys/dev/e1000
+CFLAGS+= -I${BHYVE_SYSDIR}/sys/dev/mii
+CFLAGS+= -I${BHYVE_SYSDIR}/sys/dev/usb/controller
+.if ${MK_BHYVE_SNAPSHOT} != "no"
+CFLAGS+= -I${SRCTOP}/contrib/libucl/include
+
+# Temporary disable capsicum, until we integrate checkpoint code with it.
+CFLAGS+= -DWITHOUT_CAPSICUM
+
+CFLAGS+= -DBHYVE_SNAPSHOT
+.endif
+
+.ifdef GDB_LOG
+CFLAGS+=-DGDB_LOG
+.endif
+
+WARNS?= 2
Index: usr.sbin/bhyve/Makefile.arm64
===================================================================
--- /dev/null
+++ usr.sbin/bhyve/Makefile.arm64
@@ -0,0 +1,55 @@
+#
+# $FreeBSD$
+#
+
+CFLAGS+=-I${SRCTOP}/sys
+.PATH: ${SRCTOP}/sys/cam/ctl
+
+PROG= bhyve
+PACKAGE= bhyve
+
+MAN= bhyve.8
+
+BHYVE_SYSDIR?=${SRCTOP}
+BHYVE_SRCTOP?=${.CURDIR}
+
+SRCS= \
+ block_if.c \
+ iov.c \
+ mevent.c \
+ net_backends.c \
+ sockstream.c
+
+CFLAGS+= -DWITHOUT_CAPSICUM
+.include "${BHYVE_SRCTOP}/arm64/Makefile.inc"
+.include "${BHYVE_SRCTOP}/mmio/Makefile.inc"
+
+LIBADD= vmmapi md pthread
+
+.if ${MK_INET_SUPPORT} != "no"
+CFLAGS+=-DINET
+.endif
+.if ${MK_INET6_SUPPORT} != "no"
+CFLAGS+=-DINET6
+.endif
+.if ${MK_NETGRAPH_SUPPORT} != "no"
+CFLAGS+=-DNETGRAPH
+LIBADD+= netgraph
+.endif
+.if ${MK_OPENSSL} == "no"
+CFLAGS+=-DNO_OPENSSL
+.endif
+
+.PATH: ${BHYVE_SYSDIR}/sys/arm64/vmm
+SRCS+= vmm_instruction_emul.c
+
+CFLAGS+= -I${BHYVE_SRCTOP}
+CFLAGS+= -I${BHYVE_SRCTOP}/arm64
+CFLAGS+= -I${BHYVE_SYSDIR}/sys/dev/virtio
+CFLAGS+= -I${BHYVE_SYSDIR}/sys/dev/virtio/console
+
+.ifdef GDB_LOG
+CFLAGS+=-DGDB_LOG
+.endif
+
+WARNS?= 2
Index: usr.sbin/bhyve/arm64/Makefile.inc
===================================================================
--- /dev/null
+++ usr.sbin/bhyve/arm64/Makefile.inc
@@ -0,0 +1,17 @@
+#
+# $FreeBSD$
+#
+.PATH: ${BHYVE_SRCTOP}/arm64/
+SRCS+= \
+ arm64/bhyverun.c \
+ arm64/mem.c \
+ arm64/consport.c \
+ arm64/reset.c
+
+.PATH: ${BHYVE_SYSDIR}/sys/${BHYVE_ARCH}/vmm
+
+MK_MAN=no
+
+BHYVE_BUS= mmio
+
+CFLAGS+= -I${BHYVE_SYSDIR}/sys/dev/virtio/mmio
Index: usr.sbin/bhyve/arm64/bhyverun.h
===================================================================
--- /dev/null
+++ usr.sbin/bhyve/arm64/bhyverun.h
@@ -0,0 +1,50 @@
+/*-
+ * Copyright (c) 2011 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: src/usr.sbin/bhyve/arm/bhyverun.h 4 2017-04-18 20:28:32Z mihai.carabas $
+ */
+
+#ifndef _FBSDRUN_H_
+#define _FBSDRUN_H_
+
+#ifndef CTASSERT /* Allow lint to override */
+#define CTASSERT(x) _CTASSERT(x, __LINE__)
+#define _CTASSERT(x, y) __CTASSERT(x, y)
+#define __CTASSERT(x, y) typedef char __assert ## y[(x) ? 1 : -1]
+#endif
+
+struct vmctx;
+extern int guest_ncpus;
+extern char *vmname;
+
+void *paddr_guest2host(struct vmctx *ctx, uintptr_t addr, size_t len);
+
+void fbsdrun_addcpu(struct vmctx *ctx, int cpu, uint64_t rip);
+int fbsdrun_muxed(void);
+int fbsdrun_vmexit_on_hlt(void);
+int fbsdrun_vmexit_on_pause(void);
+int fbsdrun_disable_x2apic(void);
+int fbsdrun_virtio_msix(void);
+#endif
Index: usr.sbin/bhyve/arm64/bhyverun.c
===================================================================
--- /dev/null
+++ usr.sbin/bhyve/arm64/bhyverun.c
@@ -0,0 +1,468 @@
+/*
+ * Copyright (C) 2015 Mihai Carabas <mihai.carabas@gmail.com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/types.h>
+#include <sys/mman.h>
+#include <sys/time.h>
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdbool.h>
+#include <libgen.h>
+#include <unistd.h>
+#include <assert.h>
+#include <errno.h>
+#include <err.h>
+#include <pthread.h>
+#include <pthread_np.h>
+#include <sysexits.h>
+#include <vmmapi.h>
+
+#include <machine/vmm.h>
+
+#include "bhyverun.h"
+#include "../mmio/mmio_emul.h"
+#include "../mmio/mmio_irq.h"
+#include "mem.h"
+#include "mevent.h"
+
+/* Exit codes. */
+#define EXIT_REBOOT 0
+#define EXIT_POWEROFF 1
+#define EXIT_HALT 2
+#define EXIT_ERROR 4
+
+#define GUEST_NIO_PORT 0x488 /* guest upcalls via i/o port */
+
+#define VMEXIT_SWITCH 0 /* force vcpu switch in mux mode */
+#define VMEXIT_CONTINUE 1 /* continue from next instruction */
+#define VMEXIT_RESTART 2 /* restart current instruction */
+#define VMEXIT_ABORT 3 /* abort the vm run loop */
+#define VMEXIT_RESET 4 /* guest machine has reset */
+
+#define MB (1024UL * 1024)
+#define GB (1024UL * MB)
+
+typedef int (*vmexit_handler_t)(struct vmctx *, struct vm_exit *, int *vcpu);
+
+char *vmname;
+
+int guest_ncpus;
+
+int raw_stdio = 0;
+
+static int foundcpus;
+
+static char *progname;
+static const int BSP = 0;
+/* TODO Change this to cpuset_t */
+static int cpumask;
+
+static void vm_loop(struct vmctx *ctx, int vcpu, uint64_t pc);
+
+struct vm_exit vmexit[VM_MAXCPU];
+
+struct bhyvestats {
+ uint64_t vmexit_bogus;
+ uint64_t vmexit_inst_emul;
+} stats;
+
+struct mt_vmm_info {
+ pthread_t mt_thr;
+ struct vmctx *mt_ctx;
+ int mt_vcpu;
+} mt_vmm_info[VM_MAXCPU];
+
+static cpuset_t *vcpumap[VM_MAXCPU] = { NULL };
+
+static void
+usage(int code)
+{
+
+ fprintf(stderr,
+ "Usage: %s [-bh] [-c vcpus] [-p pincpu] [-s <devemu>] "
+ "<vmname>\n"
+ " -b: use bvmconsole\n"
+ " -c: # cpus (default 1)\n"
+ " -p: pin vcpu 'n' to host cpu 'pincpu + n'\n"
+ " -s: device emulation config\n"
+ " -h: help\n",
+ progname);
+
+ exit(code);
+}
+
+static int
+pincpu_parse(const char *opt)
+{
+ int vcpu, pcpu;
+
+ if (sscanf(opt, "%d:%d", &vcpu, &pcpu) != 2) {
+ fprintf(stderr, "invalid format: %s\n", opt);
+ return (-1);
+ }
+
+ if (vcpu < 0 || vcpu >= VM_MAXCPU) {
+ fprintf(stderr, "vcpu '%d' outside valid range from 0 to %d\n",
+ vcpu, VM_MAXCPU - 1);
+ return (-1);
+ }
+
+ if (pcpu < 0 || pcpu >= CPU_SETSIZE) {
+ fprintf(stderr, "hostcpu '%d' outside valid range from "
+ "0 to %d\n", pcpu, CPU_SETSIZE - 1);
+ return (-1);
+ }
+
+ if (vcpumap[vcpu] == NULL) {
+ if ((vcpumap[vcpu] = malloc(sizeof(cpuset_t))) == NULL) {
+ perror("malloc");
+ return (-1);
+ }
+ CPU_ZERO(vcpumap[vcpu]);
+ }
+ CPU_SET(pcpu, vcpumap[vcpu]);
+ return (0);
+}
+
+void *
+paddr_guest2host(struct vmctx *ctx, uintptr_t iaddr, size_t len)
+{
+
+ return (vm_map_ipa(ctx, iaddr, len));
+}
+
+int
+fbsdrun_virtio_msix(void)
+{
+
+ return 0;
+}
+
+static void *
+fbsdrun_start_thread(void *param)
+{
+ char tname[MAXCOMLEN + 1];
+ struct mt_vmm_info *mtp;
+ int vcpu;
+
+ mtp = param;
+ vcpu = mtp->mt_vcpu;
+
+ snprintf(tname, sizeof(tname), "%s vcpu %d", vmname, vcpu);
+ pthread_set_name_np(mtp->mt_thr, tname);
+
+ vm_loop(mtp->mt_ctx, vcpu, vmexit[vcpu].pc);
+
+ /* not reached */
+ return (NULL);
+}
+
+void
+fbsdrun_addcpu(struct vmctx *ctx, int vcpu, uint64_t pc)
+{
+ int error;
+
+ if (cpumask & (1 << vcpu)) {
+ fprintf(stderr, "addcpu: attempting to add existing cpu %d\n",
+ vcpu);
+ exit(4);
+ }
+
+ cpumask |= 1 << vcpu;
+ foundcpus++;
+
+ /*
+ * Set up the vmexit struct to allow execution to start
+ * at the given RIP
+ */
+ vmexit[vcpu].pc = pc;
+ vmexit[vcpu].inst_length = 0;
+
+ if (vcpu == BSP) {
+ mt_vmm_info[vcpu].mt_ctx = ctx;
+ mt_vmm_info[vcpu].mt_vcpu = vcpu;
+
+ error = pthread_create(&mt_vmm_info[vcpu].mt_thr, NULL,
+ fbsdrun_start_thread, &mt_vmm_info[vcpu]);
+ assert(error == 0);
+ }
+}
+
+static int
+fbsdrun_get_next_cpu(int curcpu)
+{
+
+ /*
+ * Get the next available CPU. Assumes they arrive
+ * in ascending order with no gaps.
+ */
+ return ((curcpu + 1) % foundcpus);
+}
+
+static int
+vmexit_hyp(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
+{
+
+ fprintf(stderr, "vm exit[%d]\n", *pvcpu);
+ fprintf(stderr, "\treason\t\tHYP\n");
+ fprintf(stderr, "\tpc\t\t0x%016lx\n", vmexit->pc);
+ fprintf(stderr, "\tinst_length\t%d\n", vmexit->inst_length);
+
+ return (VMEXIT_ABORT);
+}
+
+static int
+vmexit_bogus(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
+{
+
+ stats.vmexit_bogus++;
+
+ return (VMEXIT_RESTART);
+}
+
+static int
+vmexit_inst_emul(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
+{
+ int err;
+ struct vie *vie;
+
+ stats.vmexit_inst_emul++;
+
+ vie = &vmexit->u.inst_emul.vie;
+ err = emulate_mem(ctx, *pvcpu, vmexit->u.inst_emul.gpa, vie);
+
+ if (err) {
+ if (err == ESRCH) {
+ fprintf(stderr, "Unhandled memory access to 0x%lx\n",
+ vmexit->u.inst_emul.gpa);
+ }
+
+ fprintf(stderr, "Failed to emulate instruction at 0x%lx\n", vmexit->pc);
+ return (VMEXIT_ABORT);
+ }
+ return (VMEXIT_CONTINUE);
+}
+
+static int
+vmexit_suspend(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
+{
+ enum vm_suspend_how how;
+
+ how = vmexit->u.suspended.how;
+
+ switch (how) {
+ case VM_SUSPEND_POWEROFF:
+ exit(EXIT_POWEROFF);
+ case VM_SUSPEND_RESET:
+ exit(EXIT_REBOOT);
+ case VM_SUSPEND_HALT:
+ exit(EXIT_HALT);
+ case VM_SUSPEND_TRIPLEFAULT:
+ /* Not implemented yet. */
+ exit(EXIT_ERROR);
+ default:
+ fprintf(stderr, "vmexit_suspend: invalid or unimplemented reason %d\n", how);
+ exit(100);
+ }
+
+}
+
+static vmexit_handler_t handler[VM_EXITCODE_MAX] = {
+ [VM_EXITCODE_BOGUS] = vmexit_bogus,
+ [VM_EXITCODE_INST_EMUL] = vmexit_inst_emul,
+ [VM_EXITCODE_REG_EMUL] = vmexit_hyp,
+ [VM_EXITCODE_SUSPENDED] = vmexit_suspend,
+ [VM_EXITCODE_HYP] = vmexit_hyp,
+};
+
+static void
+vm_loop(struct vmctx *ctx, int vcpu, uint64_t pc)
+{
+ int error, rc, prevcpu;
+ enum vm_exitcode exitcode;
+
+ if (vcpumap[vcpu] != NULL) {
+ error = pthread_setaffinity_np(pthread_self(),
+ sizeof(cpuset_t), vcpumap[vcpu]);
+ assert(error == 0);
+ }
+
+ while (1) {
+
+ error = vm_run(ctx, vcpu, pc, &vmexit[vcpu]);
+
+ if (error != 0) {
+ /*
+ * It is possible that 'vmmctl' or some other process
+ * has transitioned the vcpu to CANNOT_RUN state right
+ * before we tried to transition it to RUNNING.
+ *
+ * This is expected to be temporary so just retry.
+ */
+ if (errno == EBUSY)
+ continue;
+ else
+ break;
+ }
+
+ prevcpu = vcpu;
+
+ exitcode = vmexit[vcpu].exitcode;
+ if (exitcode >= VM_EXITCODE_MAX || handler[exitcode] == NULL) {
+ fprintf(stderr, "vm_loop: unexpected exitcode 0x%x\n",
+ exitcode);
+ exit(4);
+ }
+
+ rc = (*handler[exitcode])(ctx, &vmexit[vcpu], &vcpu);
+
+ switch (rc) {
+ case VMEXIT_CONTINUE:
+ pc = vmexit[vcpu].pc + vmexit[vcpu].inst_length;
+ break;
+ case VMEXIT_RESTART:
+ pc = vmexit[vcpu].pc;
+ break;
+ case VMEXIT_RESET:
+ exit(0);
+ default:
+ exit(4);
+ }
+ }
+ fprintf(stderr, "vm_run error %d, errno %d\n", error, errno);
+}
+
+static int
+num_vcpus_allowed(struct vmctx *ctx)
+{
+ return (VM_MAXCPU);
+}
+
+int
+main(int argc, char *argv[])
+{
+ int c, error;
+ bool bvmcons;
+ int max_vcpus;
+ struct vmctx *ctx;
+ uint64_t pc;
+ uint64_t memory_base_address, mem_size;
+
+ bvmcons = false;
+ memory_base_address = VM_GUEST_BASE_IPA;
+ mem_size = 128 * MB;
+ progname = basename(argv[0]);
+ guest_ncpus = 1;
+
+ while ((c = getopt(argc, argv, "bhp:c:s:e:m:")) != -1) {
+ switch (c) {
+ case 'b':
+ bvmcons = true;
+ break;
+ case 'e':
+ memory_base_address = strtoul(optarg, NULL, 0);
+ break;
+ case 'p':
+ if (pincpu_parse(optarg) != 0) {
+ errx(EX_USAGE, "invalid vcpu pinning "
+ "configuration '%s'", optarg);
+ }
+ break;
+ case 'c':
+ guest_ncpus = atoi(optarg);
+ break;
+ case 'm':
+ error = vm_parse_memsize(optarg, &mem_size);
+ if (error) {
+ fprintf(stderr, "Invalid memsize '%s'\n", optarg);
+ exit(1);
+ }
+ break;
+ case 's':
+ if (mmio_parse_opts(optarg) != 0)
+ exit(1);
+ break;
+ case 'h':
+ usage(0);
+ default:
+ usage(4);
+ }
+ }
+ argc -= optind;
+ argv += optind;
+
+ if (argc != 1)
+ usage(4);
+
+ vmname = argv[0];
+
+ /* The VM must be created by bhyveload first. */
+ ctx = vm_open(vmname);
+ if (ctx == NULL) {
+ perror("vm_open");
+ exit(1);
+ }
+
+ max_vcpus = num_vcpus_allowed(ctx);
+ if (guest_ncpus > max_vcpus) {
+ fprintf(stderr, "%d vCPUs requested but only %d available\n",
+ guest_ncpus, max_vcpus);
+ exit(1);
+ }
+
+ error = vm_setup_memory(ctx, memory_base_address, mem_size, VM_MMAP_ALL);
+ if (error != 0) {
+ fprintf(stderr, "Unable to setup memory (%d)\n", error);
+ exit(1);
+ }
+
+ init_mem();
+ mmio_irq_init(ctx);
+
+ if (init_mmio(ctx) != 0) {
+ fprintf(stderr, "Failed to initialize device emulation\n");
+ exit(1);
+ }
+
+ if (bvmcons)
+ init_bvmcons();
+
+ error = vm_get_register(ctx, BSP, VM_REG_ELR_EL2, &pc);
+ assert(error == 0);
+ /*
+ * Add CPU 0
+ */
+ fbsdrun_addcpu(ctx, BSP, pc);
+
+ /*
+ * Head off to the main event dispatch loop
+ */
+ mevent_dispatch();
+
+ exit(1);
+}
Index: usr.sbin/bhyve/arm64/consport.c
===================================================================
--- /dev/null
+++ usr.sbin/bhyve/arm64/consport.c
@@ -0,0 +1,142 @@
+/* * Copyright (C) 2015 Mihai Carabas <mihai.carabas@gmail.com> * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/types.h>
+#include <sys/select.h>
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <termios.h>
+#include <unistd.h>
+#include <stdbool.h>
+
+#include "mem.h"
+
+#define BVM_CONS_PORT 0x090000
+#define BVM_CONS_SIG ('b' << 8 | 'v')
+
+static struct termios tio_orig, tio_new;
+
+static void
+ttyclose(void)
+{
+ tcsetattr(STDIN_FILENO, TCSANOW, &tio_orig);
+}
+
+static void
+ttyopen(void)
+{
+ tcgetattr(STDIN_FILENO, &tio_orig);
+
+ cfmakeraw(&tio_new);
+ tcsetattr(STDIN_FILENO, TCSANOW, &tio_new);
+
+ atexit(ttyclose);
+}
+
+static bool
+tty_char_available(void)
+{
+ fd_set rfds;
+ struct timeval tv;
+
+ FD_ZERO(&rfds);
+ FD_SET(STDIN_FILENO, &rfds);
+ tv.tv_sec = 0;
+ tv.tv_usec = 0;
+ if (select(STDIN_FILENO + 1, &rfds, NULL, NULL, &tv) > 0) {
+ return (true);
+ } else {
+ return (false);
+ }
+}
+
+static int
+ttyread(void)
+{
+ char rb;
+
+ if (tty_char_available()) {
+ read(STDIN_FILENO, &rb, 1);
+ return (rb & 0xff);
+ } else {
+ return (-1);
+ }
+}
+
+static void
+ttywrite(unsigned char wb)
+{
+ (void) write(STDOUT_FILENO, &wb, 1);
+}
+
+static int
+console_handler(struct vmctx *ctx, int vcpu, int dir, uint64_t addr, int size, uint64_t *val, void *arg1, long arg2)
+{
+ static int opened;
+
+ if (size == 2 && dir == MEM_F_READ) {
+ *val = BVM_CONS_SIG;
+ return (0);
+ }
+
+ /*
+ * Guests might probe this port to look for old ISA devices
+ * using single-byte reads. Return 0xff for those.
+ */
+ if (size == 1 && dir == MEM_F_READ) {
+ *val = 0xff;
+ return (0);
+ }
+
+ if (size != 4)
+ return (-1);
+
+ if (!opened) {
+ ttyopen();
+ opened = 1;
+ }
+
+ if (dir == MEM_F_READ)
+ *val = ttyread();
+ else
+ ttywrite(*val);
+ return (0);
+}
+
+struct mem_range consport ={
+ "bvmcons",
+ MEM_F_RW,
+ console_handler,
+ NULL,
+ 0,
+ BVM_CONS_PORT,
+ sizeof(int)
+};
+
+void
+init_bvmcons(void)
+{
+ register_mem(&consport);
+}
Index: usr.sbin/bhyve/arm64/mem.h
===================================================================
--- /dev/null
+++ usr.sbin/bhyve/arm64/mem.h
@@ -0,0 +1,59 @@
+/*-
+ * Copyright (c) 2012 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: src/usr.sbin/bhyve/arm/mem.h 38 2017-06-13 13:34:14Z darius.mihai $
+ */
+
+#ifndef _MEM_H_
+#define _MEM_H_
+
+#include <sys/linker_set.h>
+
+struct vmctx;
+
+typedef int (*mem_func_t)(struct vmctx *ctx, int vcpu, int dir, uint64_t addr,
+ int size, uint64_t *val, void *arg1, long arg2);
+
+struct mem_range {
+ const char *name;
+ int flags;
+ mem_func_t handler;
+ void *arg1;
+ long arg2;
+ uint64_t base;
+ uint64_t size;
+};
+#define MEM_F_READ 0x1
+#define MEM_F_WRITE 0x2
+#define MEM_F_RW 0x3
+
+void init_mem(void);
+int emulate_mem(struct vmctx *, int vcpu, uint64_t paddr, void *vie);
+int register_mem(struct mem_range *memp);
+int register_mem_fallback(struct mem_range *memp);
+int unregister_mem(struct mem_range *memp);
+
+void init_bvmcons(void);
+#endif /* _MEM_H_ */
Index: usr.sbin/bhyve/arm64/mem.c
===================================================================
--- /dev/null
+++ usr.sbin/bhyve/arm64/mem.c
@@ -0,0 +1,271 @@
+/*-
+ * Copyright (c) 2012 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: src/usr.sbin/bhyve/arm/mem.c 4 2017-04-18 20:28:32Z mihai.carabas $
+ */
+
+/*
+ * Memory ranges are represented with an RB tree. On insertion, the range
+ * is checked for overlaps. On lookup, the key has the same base and limit
+ * so it can be searched within the range.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/usr.sbin/bhyve/arm/mem.c 4 2017-04-18 20:28:32Z mihai.carabas $");
+
+#include <sys/types.h>
+#include <sys/tree.h>
+#include <sys/errno.h>
+#include <machine/vmm.h>
+#include <machine/vmm_instruction_emul.h>
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <pthread.h>
+
+#include "mem.h"
+
+struct mmio_rb_range {
+ RB_ENTRY(mmio_rb_range) mr_link; /* RB tree links */
+ struct mem_range mr_param;
+ uint64_t mr_base;
+ uint64_t mr_end;
+};
+
+struct mmio_rb_tree;
+RB_PROTOTYPE(mmio_rb_tree, mmio_rb_range, mr_link, mmio_rb_range_compare);
+
+RB_HEAD(mmio_rb_tree, mmio_rb_range) mmio_rb_root, mmio_rb_fallback;
+
+/*
+ * Per-vCPU cache. Since most accesses from a vCPU will be to
+ * consecutive addresses in a range, it makes sense to cache the
+ * result of a lookup.
+ */
+static struct mmio_rb_range *mmio_hint[VM_MAXCPU];
+
+static pthread_rwlock_t mmio_rwlock;
+
+static int
+mmio_rb_range_compare(struct mmio_rb_range *a, struct mmio_rb_range *b)
+{
+ if (a->mr_end < b->mr_base)
+ return (-1);
+ else if (a->mr_base > b->mr_end)
+ return (1);
+ return (0);
+}
+
+static int
+mmio_rb_lookup(struct mmio_rb_tree *rbt, uint64_t addr,
+ struct mmio_rb_range **entry)
+{
+ struct mmio_rb_range find, *res;
+
+ find.mr_base = find.mr_end = addr;
+
+ res = RB_FIND(mmio_rb_tree, rbt, &find);
+
+ if (res != NULL) {
+ *entry = res;
+ return (0);
+ }
+
+ return (ENOENT);
+}
+
+static int
+mmio_rb_add(struct mmio_rb_tree *rbt, struct mmio_rb_range *new)
+{
+ struct mmio_rb_range *overlap;
+
+ overlap = RB_INSERT(mmio_rb_tree, rbt, new);
+
+ if (overlap != NULL) {
+#ifdef RB_DEBUG
+ printf("overlap detected: new %lx:%lx, tree %lx:%lx\n",
+ new->mr_base, new->mr_end,
+ overlap->mr_base, overlap->mr_end);
+#endif
+
+ return (EEXIST);
+ }
+
+ return (0);
+}
+
+#if 0
+static void
+mmio_rb_dump(struct mmio_rb_tree *rbt)
+{
+ struct mmio_rb_range *np;
+
+ pthread_rwlock_rdlock(&mmio_rwlock);
+ RB_FOREACH(np, mmio_rb_tree, rbt) {
+ printf(" %lx:%lx, %s\n", np->mr_base, np->mr_end,
+ np->mr_param.name);
+ }
+ pthread_rwlock_unlock(&mmio_rwlock);
+}
+#endif
+
+RB_GENERATE(mmio_rb_tree, mmio_rb_range, mr_link, mmio_rb_range_compare);
+
+static int
+mem_read(void *ctx, int vcpu, uint64_t gpa, uint64_t *rval, int size, void *arg)
+{
+ int error;
+ struct mem_range *mr = arg;
+
+ error = (*mr->handler)(ctx, vcpu, MEM_F_READ, gpa, size,
+ rval, mr->arg1, mr->arg2);
+ return (error);
+}
+
+static int
+mem_write(void *ctx, int vcpu, uint64_t gpa, uint64_t wval, int size, void *arg)
+{
+ int error;
+ struct mem_range *mr = arg;
+
+ error = (*mr->handler)(ctx, vcpu, MEM_F_WRITE, gpa, size,
+ &wval, mr->arg1, mr->arg2);
+ return (error);
+}
+
+int
+emulate_mem(struct vmctx *ctx, int vcpu, uint64_t paddr, void *vie)
+{
+ struct mmio_rb_range *entry;
+ int err;
+
+ pthread_rwlock_rdlock(&mmio_rwlock);
+ /*
+ * First check the per-vCPU cache
+ */
+ if (mmio_hint[vcpu] &&
+ paddr >= mmio_hint[vcpu]->mr_base &&
+ paddr <= mmio_hint[vcpu]->mr_end) {
+ entry = mmio_hint[vcpu];
+ } else
+ entry = NULL;
+
+ if (entry == NULL) {
+ if (mmio_rb_lookup(&mmio_rb_root, paddr, &entry) == 0) {
+ /* Update the per-vCPU cache */
+ mmio_hint[vcpu] = entry;
+ } else if (mmio_rb_lookup(&mmio_rb_fallback, paddr, &entry)) {
+ pthread_rwlock_unlock(&mmio_rwlock);
+ return (ESRCH);
+ }
+ }
+
+ assert(entry != NULL);
+ assert(NULL == NULL);
+ err = vmm_emulate_instruction(ctx, vcpu, paddr, vie,
+ mem_read, mem_write, &entry->mr_param);
+
+ pthread_rwlock_unlock(&mmio_rwlock);
+
+ return (err);
+}
+
+static int
+register_mem_int(struct mmio_rb_tree *rbt, struct mem_range *memp)
+{
+ struct mmio_rb_range *entry, *mrp;
+ int err;
+
+ err = 0;
+
+ mrp = malloc(sizeof(struct mmio_rb_range));
+
+ if (mrp != NULL) {
+ mrp->mr_param = *memp;
+ mrp->mr_base = memp->base;
+ mrp->mr_end = memp->base + memp->size - 1;
+ pthread_rwlock_wrlock(&mmio_rwlock);
+ if (mmio_rb_lookup(rbt, memp->base, &entry) != 0)
+ err = mmio_rb_add(rbt, mrp);
+ pthread_rwlock_unlock(&mmio_rwlock);
+ if (err)
+ free(mrp);
+ } else
+ err = ENOMEM;
+
+ return (err);
+}
+
+int
+register_mem(struct mem_range *memp)
+{
+
+ return (register_mem_int(&mmio_rb_root, memp));
+}
+
+int
+register_mem_fallback(struct mem_range *memp)
+{
+
+ return (register_mem_int(&mmio_rb_fallback, memp));
+}
+
+int
+unregister_mem(struct mem_range *memp)
+{
+ struct mem_range *mr;
+ struct mmio_rb_range *entry = NULL;
+ int err, i;
+
+ pthread_rwlock_wrlock(&mmio_rwlock);
+ err = mmio_rb_lookup(&mmio_rb_root, memp->base, &entry);
+ if (err == 0) {
+ mr = &entry->mr_param;
+ assert(mr->name == memp->name);
+ assert(mr->base == memp->base && mr->size == memp->size);
+ RB_REMOVE(mmio_rb_tree, &mmio_rb_root, entry);
+
+ /* flush Per-vCPU cache */
+ for (i=0; i < VM_MAXCPU; i++) {
+ if (mmio_hint[i] == entry)
+ mmio_hint[i] = NULL;
+ }
+ }
+ pthread_rwlock_unlock(&mmio_rwlock);
+
+ if (entry)
+ free(entry);
+
+ return (err);
+}
+
+void
+init_mem(void)
+{
+ RB_INIT(&mmio_rb_root);
+ RB_INIT(&mmio_rb_fallback);
+ pthread_rwlock_init(&mmio_rwlock, NULL);
+}
Index: usr.sbin/bhyve/arm64/mevent_test.c
===================================================================
--- /dev/null
+++ usr.sbin/bhyve/arm64/mevent_test.c
@@ -0,0 +1,256 @@
+/*-
+ * Copyright (c) 2011 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: src/usr.sbin/bhyve/arm/mevent_test.c 4 2017-04-18 20:28:32Z mihai.carabas $
+ */
+
+/*
+ * Test program for the micro event library. Set up a simple TCP echo
+ * service.
+ *
+ * cc mevent_test.c mevent.c -lpthread
+ */
+
+#include <sys/types.h>
+#include <sys/stdint.h>
+#include <sys/sysctl.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <machine/cpufunc.h>
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <pthread.h>
+#include <unistd.h>
+
+#include "mevent.h"
+
+#define TEST_PORT 4321
+
+static pthread_mutex_t accept_mutex = PTHREAD_MUTEX_INITIALIZER;
+static pthread_cond_t accept_condvar = PTHREAD_COND_INITIALIZER;
+
+static struct mevent *tevp;
+
+char *vmname = "test vm";
+
+
+#define MEVENT_ECHO
+
+/* Number of timer events to capture */
+#define TEVSZ 4096
+uint64_t tevbuf[TEVSZ];
+
+static void
+timer_print(void)
+{
+ uint64_t min, max, diff, sum, tsc_freq;
+ size_t len;
+ int j;
+
+ min = UINT64_MAX;
+ max = 0;
+ sum = 0;
+
+ len = sizeof(tsc_freq);
+ sysctlbyname("machdep.tsc_freq", &tsc_freq, &len, NULL, 0);
+
+ for (j = 1; j < TEVSZ; j++) {
+ /* Convert a tsc diff into microseconds */
+ diff = (tevbuf[j] - tevbuf[j-1]) * 1000000 / tsc_freq;
+ sum += diff;
+ if (min > diff)
+ min = diff;
+ if (max < diff)
+ max = diff;
+ }
+
+ printf("timers done: usecs, min %ld, max %ld, mean %ld\n", min, max,
+ sum/(TEVSZ - 1));
+}
+
+static void
+timer_callback(int fd, enum ev_type type, void *param)
+{
+ static int i;
+
+ if (i >= TEVSZ)
+ abort();
+
+ tevbuf[i++] = rdtsc();
+
+ if (i == TEVSZ) {
+ mevent_delete(tevp);
+ timer_print();
+ }
+}
+
+
+#ifdef MEVENT_ECHO
+struct esync {
+ pthread_mutex_t e_mt;
+ pthread_cond_t e_cond;
+};
+
+static void
+echoer_callback(int fd, enum ev_type type, void *param)
+{
+ struct esync *sync = param;
+
+ pthread_mutex_lock(&sync->e_mt);
+ pthread_cond_signal(&sync->e_cond);
+ pthread_mutex_unlock(&sync->e_mt);
+}
+
+static void *
+echoer(void *param)
+{
+ struct esync sync;
+ struct mevent *mev;
+ char buf[128];
+ int fd = (int)(uintptr_t) param;
+ int len;
+
+ pthread_mutex_init(&sync.e_mt, NULL);
+ pthread_cond_init(&sync.e_cond, NULL);
+
+ pthread_mutex_lock(&sync.e_mt);
+
+ mev = mevent_add(fd, EVF_READ, echoer_callback, &sync);
+ if (mev == NULL) {
+ printf("Could not allocate echoer event\n");
+ exit(1);
+ }
+
+ while (!pthread_cond_wait(&sync.e_cond, &sync.e_mt)) {
+ len = read(fd, buf, sizeof(buf));
+ if (len > 0) {
+ write(fd, buf, len);
+ write(0, buf, len);
+ } else {
+ break;
+ }
+ }
+
+ mevent_delete_close(mev);
+
+ pthread_mutex_unlock(&sync.e_mt);
+ pthread_mutex_destroy(&sync.e_mt);
+ pthread_cond_destroy(&sync.e_cond);
+
+ return (NULL);
+}
+
+#else
+
+static void *
+echoer(void *param)
+{
+ char buf[128];
+ int fd = (int)(uintptr_t) param;
+ int len;
+
+ while ((len = read(fd, buf, sizeof(buf))) > 0) {
+ write(1, buf, len);
+ }
+
+ return (NULL);
+}
+#endif /* MEVENT_ECHO */
+
+static void
+acceptor_callback(int fd, enum ev_type type, void *param)
+{
+ pthread_mutex_lock(&accept_mutex);
+ pthread_cond_signal(&accept_condvar);
+ pthread_mutex_unlock(&accept_mutex);
+}
+
+static void *
+acceptor(void *param)
+{
+ struct sockaddr_in sin;
+ pthread_t tid;
+ int news;
+ int s;
+ static int first;
+
+ if ((s = socket(AF_INET, SOCK_STREAM, 0)) < 0) {
+ perror("socket");
+ exit(1);
+ }
+
+ sin.sin_len = sizeof(sin);
+ sin.sin_family = AF_INET;
+ sin.sin_addr.s_addr = htonl(INADDR_ANY);
+ sin.sin_port = htons(TEST_PORT);
+
+ if (bind(s, (struct sockaddr *)&sin, sizeof(sin)) < 0) {
+ perror("bind");
+ exit(1);
+ }
+
+ if (listen(s, 1) < 0) {
+ perror("listen");
+ exit(1);
+ }
+
+ (void) mevent_add(s, EVF_READ, acceptor_callback, NULL);
+
+ pthread_mutex_lock(&accept_mutex);
+
+ while (!pthread_cond_wait(&accept_condvar, &accept_mutex)) {
+ news = accept(s, NULL, NULL);
+ if (news < 0) {
+ perror("accept error");
+ } else {
+ static int first = 1;
+
+ if (first) {
+ /*
+ * Start a timer
+ */
+ first = 0;
+ tevp = mevent_add(1, EVF_TIMER, timer_callback,
+ NULL);
+ }
+
+ printf("incoming connection, spawning thread\n");
+ pthread_create(&tid, NULL, echoer,
+ (void *)(uintptr_t)news);
+ }
+ }
+
+ return (NULL);
+}
+
+main()
+{
+ pthread_t tid;
+
+ pthread_create(&tid, NULL, acceptor, NULL);
+
+ mevent_dispatch();
+}
Index: usr.sbin/bhyve/arm64/reset.h
===================================================================
--- /dev/null
+++ usr.sbin/bhyve/arm64/reset.h
@@ -0,0 +1,12 @@
+#ifndef _RESET_H_
+#define _RESET_H_
+
+#define RESET_MAGIC 0xDEAD9731
+
+#endif /* _RESET_H_ */
+#ifndef _RESET_H_
+#define _RESET_H_
+
+#define RESET_MAGIC 0xDEAD9731
+
+#endif /* _RESET_H_ */
Index: usr.sbin/bhyve/arm64/reset.c
===================================================================
--- /dev/null
+++ usr.sbin/bhyve/arm64/reset.c
@@ -0,0 +1,32 @@
+#include <stdio.h>
+#include <sys/types.h>
+
+#include "mem.h"
+#include "reset.h"
+#include "vmmapi.h"
+
+#define RESET_PORT 0x1c090100
+
+static int
+reset_handler(struct vmctx *ctx, int vcpu, int dir, uint64_t addr, int size, uint64_t *val, void *arg1, long arg2)
+{
+ vm_destroy(ctx);
+
+ return (RESET_MAGIC);
+}
+
+struct mem_range resetport ={
+ "reset",
+ 0,
+ reset_handler,
+ NULL,
+ 0,
+ RESET_PORT,
+ sizeof(int)
+};
+
+void
+init_reset(void)
+{
+ register_mem(&resetport);
+}
Index: usr.sbin/bhyve/block_if.c
===================================================================
--- usr.sbin/bhyve/block_if.c
+++ usr.sbin/bhyve/block_if.c
@@ -58,7 +58,10 @@
#include <unistd.h>
#include <machine/atomic.h>
+
+#ifdef BHYVE_SNAPSHOT
#include <machine/vmm_snapshot.h>
+#endif
#include "bhyverun.h"
#include "debug.h"
Index: usr.sbin/bhyve/consport.c
===================================================================
--- /dev/null
+++ usr.sbin/bhyve/consport.c
@@ -0,0 +1,178 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2011 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/types.h>
+#ifndef WITHOUT_CAPSICUM
+#include <sys/capsicum.h>
+#endif
+#include <sys/select.h>
+
+#ifndef WITHOUT_CAPSICUM
+#include <capsicum_helpers.h>
+#endif
+#include <err.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <termios.h>
+#include <unistd.h>
+#include <stdbool.h>
+#include <sysexits.h>
+
+#include "inout.h"
+#include "pci_lpc.h"
+#include "debug.h"
+
+#define BVM_CONSOLE_PORT 0x220
+#define BVM_CONS_SIG ('b' << 8 | 'v')
+
+static struct termios tio_orig, tio_new;
+
+static void
+ttyclose(void)
+{
+ tcsetattr(STDIN_FILENO, TCSANOW, &tio_orig);
+}
+
+static void
+ttyopen(void)
+{
+ tcgetattr(STDIN_FILENO, &tio_orig);
+
+ cfmakeraw(&tio_new);
+ tcsetattr(STDIN_FILENO, TCSANOW, &tio_new);
+ raw_stdio = 1;
+
+ atexit(ttyclose);
+}
+
+static bool
+tty_char_available(void)
+{
+ fd_set rfds;
+ struct timeval tv;
+
+ FD_ZERO(&rfds);
+ FD_SET(STDIN_FILENO, &rfds);
+ tv.tv_sec = 0;
+ tv.tv_usec = 0;
+ if (select(STDIN_FILENO + 1, &rfds, NULL, NULL, &tv) > 0) {
+ return (true);
+ } else {
+ return (false);
+ }
+}
+
+static int
+ttyread(void)
+{
+ char rb;
+
+ if (tty_char_available()) {
+ read(STDIN_FILENO, &rb, 1);
+ return (rb & 0xff);
+ } else {
+ return (-1);
+ }
+}
+
+static void
+ttywrite(unsigned char wb)
+{
+ (void) write(STDOUT_FILENO, &wb, 1);
+}
+
+static int
+console_handler(struct vmctx *ctx, int vcpu, int in, int port, int bytes,
+ uint32_t *eax, void *arg)
+{
+ static int opened;
+#ifndef WITHOUT_CAPSICUM
+ cap_rights_t rights;
+ cap_ioctl_t cmds[] = { TIOCGETA, TIOCSETA, TIOCGWINSZ };
+#endif
+
+ if (bytes == 2 && in) {
+ *eax = BVM_CONS_SIG;
+ return (0);
+ }
+
+ /*
+ * Guests might probe this port to look for old ISA devices
+ * using single-byte reads. Return 0xff for those.
+ */
+ if (bytes == 1 && in) {
+ *eax = 0xff;
+ return (0);
+ }
+
+ if (bytes != 4)
+ return (-1);
+
+ if (!opened) {
+#ifndef WITHOUT_CAPSICUM
+ cap_rights_init(&rights, CAP_EVENT, CAP_IOCTL, CAP_READ,
+ CAP_WRITE);
+ if (caph_rights_limit(STDIN_FILENO, &rights) == -1)
+ errx(EX_OSERR, "Unable to apply rights for sandbox");
+ if (caph_ioctls_limit(STDIN_FILENO, cmds, nitems(cmds)) == -1)
+ errx(EX_OSERR, "Unable to apply rights for sandbox");
+#endif
+ ttyopen();
+ opened = 1;
+ }
+
+ if (in)
+ *eax = ttyread();
+ else
+ ttywrite(*eax);
+
+ return (0);
+}
+
+SYSRES_IO(BVM_CONSOLE_PORT, 4);
+
+static struct inout_port consport = {
+ "bvmcons",
+ BVM_CONSOLE_PORT,
+ 1,
+ IOPORT_F_INOUT,
+ console_handler
+};
+
+void
+init_bvmcons(void)
+{
+
+ register_inout(&consport);
+}
Index: usr.sbin/bhyve/dbgport.h
===================================================================
--- /dev/null
+++ usr.sbin/bhyve/dbgport.h
@@ -0,0 +1,36 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2011 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _DBGPORT_H_
+#define _DBGPORT_H_
+
+void init_dbgport(int port);
+
+#endif
Index: usr.sbin/bhyve/dbgport.c
===================================================================
--- /dev/null
+++ usr.sbin/bhyve/dbgport.c
@@ -0,0 +1,178 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2011 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/types.h>
+#ifndef WITHOUT_CAPSICUM
+#include <sys/capsicum.h>
+#endif
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <netinet/tcp.h>
+#include <sys/uio.h>
+
+#ifndef WITHOUT_CAPSICUM
+#include <capsicum_helpers.h>
+#endif
+#include <err.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sysexits.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <errno.h>
+
+#include "inout.h"
+#include "dbgport.h"
+#include "pci_lpc.h"
+
+#define BVM_DBG_PORT 0x224
+#define BVM_DBG_SIG ('B' << 8 | 'V')
+
+static int listen_fd, conn_fd;
+
+static struct sockaddr_in sin;
+
+static int
+dbg_handler(struct vmctx *ctx, int vcpu, int in, int port, int bytes,
+ uint32_t *eax, void *arg)
+{
+ int nwritten, nread, printonce;
+ int on = 1;
+ char ch;
+
+ if (bytes == 2 && in) {
+ *eax = BVM_DBG_SIG;
+ return (0);
+ }
+
+ if (bytes != 4)
+ return (-1);
+
+again:
+ printonce = 0;
+ while (conn_fd < 0) {
+ if (!printonce) {
+ printf("Waiting for connection from gdb\r\n");
+ printonce = 1;
+ }
+ conn_fd = accept4(listen_fd, NULL, NULL, SOCK_NONBLOCK);
+ if (conn_fd >= 0) {
+ /* Avoid EPIPE after the client drops off. */
+ (void)setsockopt(conn_fd, SOL_SOCKET, SO_NOSIGPIPE,
+ &on, sizeof(on));
+ /* Improve latency for one byte at a time tranfers. */
+ (void)setsockopt(conn_fd, IPPROTO_TCP, TCP_NODELAY,
+ &on, sizeof(on));
+ } else if (errno != EINTR) {
+ perror("accept");
+ }
+ }
+
+ if (in) {
+ nread = read(conn_fd, &ch, 1);
+ if (nread == -1 && errno == EAGAIN)
+ *eax = -1;
+ else if (nread == 1)
+ *eax = ch;
+ else {
+ close(conn_fd);
+ conn_fd = -1;
+ goto again;
+ }
+ } else {
+ ch = *eax;
+ nwritten = write(conn_fd, &ch, 1);
+ if (nwritten != 1) {
+ close(conn_fd);
+ conn_fd = -1;
+ goto again;
+ }
+ }
+ return (0);
+}
+
+static struct inout_port dbgport = {
+ "bvmdbg",
+ BVM_DBG_PORT,
+ 1,
+ IOPORT_F_INOUT,
+ dbg_handler
+};
+
+SYSRES_IO(BVM_DBG_PORT, 4);
+
+void
+init_dbgport(int sport)
+{
+ int reuse;
+#ifndef WITHOUT_CAPSICUM
+ cap_rights_t rights;
+#endif
+
+ conn_fd = -1;
+
+ if ((listen_fd = socket(AF_INET, SOCK_STREAM, 0)) < 0) {
+ perror("cannot create socket");
+ exit(4);
+ }
+
+ sin.sin_len = sizeof(sin);
+ sin.sin_family = AF_INET;
+ sin.sin_addr.s_addr = htonl(INADDR_ANY);
+ sin.sin_port = htons(sport);
+
+ reuse = 1;
+ if (setsockopt(listen_fd, SOL_SOCKET, SO_REUSEADDR, &reuse,
+ sizeof(reuse)) < 0) {
+ perror("cannot set socket options");
+ exit(4);
+ }
+
+ if (bind(listen_fd, (struct sockaddr *)&sin, sizeof(sin)) < 0) {
+ perror("cannot bind socket");
+ exit(4);
+ }
+
+ if (listen(listen_fd, 1) < 0) {
+ perror("cannot listen socket");
+ exit(4);
+ }
+
+#ifndef WITHOUT_CAPSICUM
+ cap_rights_init(&rights, CAP_ACCEPT, CAP_READ, CAP_WRITE);
+ if (caph_rights_limit(listen_fd, &rights) == -1)
+ errx(EX_OSERR, "Unable to apply rights for sandbox");
+#endif
+
+ register_inout(&dbgport);
+}
Index: usr.sbin/bhyve/mmio/Makefile.inc
===================================================================
--- /dev/null
+++ usr.sbin/bhyve/mmio/Makefile.inc
@@ -0,0 +1,17 @@
+#
+# $FreeBSD$
+#
+
+.PATH: ${BHYVE_SRCTOP}/mmio/
+SRCS+= \
+ mmio/mmio_virtio_block.c \
+ mmio/mmio_virtio_console.c \
+ mmio/mmio_virtio_net.c \
+ mmio/mmio_virtio_rnd.c \
+ mmio/mmio_emul.c \
+ mmio/mmio_irq.c \
+ mmio/net_utils.c \
+ mmio/mmio_virtio.c
+
+
+CFLAGS+= -I${BHYVE_SRCTOP}/mmio
Index: usr.sbin/bhyve/mmio/mmio_emul.h
===================================================================
--- /dev/null
+++ usr.sbin/bhyve/mmio/mmio_emul.h
@@ -0,0 +1,116 @@
+#ifndef _EMUL_H_
+#define _EMUL_H_
+
+#include <sys/types.h>
+
+#include <assert.h>
+
+struct vmctx;
+struct mmio_devinst;
+
+// TODO suggestive naming
+struct mmio_devemu {
+ char *de_emu; /* Device emulation name */
+
+ /* Instance creation */
+ int (*de_init)(struct vmctx *ctx, struct mmio_devinst *di,
+ char *opts);
+
+ /* Read / Write callbacks */
+ void (*de_write)(struct vmctx *ctx, int vcpu,
+ struct mmio_devinst *di, int baridx,
+ uint64_t offset, int size, uint64_t val);
+
+ uint64_t (*de_read)(struct vmctx *ctx, int vcpu,
+ struct mmio_devinst *di, int baridx,
+ uint64_t offset, int size);
+};
+
+#define MMIO_EMUL_SET(x) DATA_SET(mmio_set, x);
+#define DI_NAMESZ 40
+#define MMIO_REGMAX 0xff
+#define MMIO_REGNUM (MMIO_REGMAX + 1)
+
+struct devinst_addr {
+ uint64_t baddr;
+ uint64_t size;
+};
+
+enum lintr_stat {
+ IDLE,
+ ASSERTED,
+ PENDING
+};
+
+// TODO suggestive naming
+struct mmio_devinst {
+ struct mmio_devemu *pi_d; /* Back ref to device */
+ struct vmctx *pi_vmctx; /* Owner VM context */
+ /* unused for mmio device emulation; may be used as uniquifiers */
+ int pi_slot, di_func;
+
+ char pi_name[DI_NAMESZ]; /* Instance name */
+
+ struct {
+ enum lintr_stat state;
+ int64_t irq;
+ pthread_mutex_t lock;
+ } di_lintr;
+
+ void *pi_arg; /* Private data */
+
+ u_char pi_cfgregs[MMIO_REGNUM];/* Config regsters */
+
+ struct devinst_addr addr; /* Address info */
+};
+
+int mmio_parse_opts(const char *args);
+int mmio_alloc_mem(struct mmio_devinst *di);
+int init_mmio(struct vmctx *ctx);
+void mmio_lintr_request(struct mmio_devinst *di);
+void mmio_lintr_assert(struct mmio_devinst *di);
+void mmio_lintr_deassert(struct mmio_devinst *di);
+
+static __inline void
+mmio_set_cfgreg8(struct mmio_devinst *di, size_t offset, uint32_t val)
+{
+ assert(offset <= MMIO_REGMAX);
+ *(uint32_t *)(di->pi_cfgregs + offset) = val;
+}
+
+static __inline void
+mmio_set_cfgreg16(struct mmio_devinst *di, size_t offset, uint32_t val)
+{
+ assert(offset <= (MMIO_REGMAX - 1) && (offset & 1) == 0);
+ *(uint32_t *)(di->pi_cfgregs + offset) = val;
+}
+
+static __inline void
+mmio_set_cfgreg32(struct mmio_devinst *di, size_t offset, uint32_t val)
+{
+ assert(offset <= (MMIO_REGMAX - 3) && (offset & 3) == 0);
+ *(uint32_t *)(di->pi_cfgregs + offset) = val;
+}
+
+static __inline uint8_t
+mmio_get_cfgreg8(struct mmio_devinst *di, size_t offset)
+{
+ assert(offset <= MMIO_REGMAX);
+ return (*(uint32_t *)(di->pi_cfgregs + offset));
+}
+
+static __inline uint16_t
+mmio_get_cfgreg16(struct mmio_devinst *di, size_t offset)
+{
+ assert(offset <= (MMIO_REGMAX - 1) && (offset & 1) == 0);
+ return (*(uint32_t *)(di->pi_cfgregs + offset));
+}
+
+static __inline uint32_t
+mmio_get_cfgreg32(struct mmio_devinst *di, size_t offset)
+{
+ assert(offset <= (MMIO_REGMAX - 3) && (offset & 3) == 0);
+ return (*(uint32_t *)(di->pi_cfgregs + offset));
+}
+
+#endif /* _EMUL_H_ */
Index: usr.sbin/bhyve/mmio/mmio_emul.c
===================================================================
--- /dev/null
+++ usr.sbin/bhyve/mmio/mmio_emul.c
@@ -0,0 +1,440 @@
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/linker_set.h>
+#include <sys/param.h>
+#include <sys/types.h>
+
+#include <errno.h>
+#include <pthread.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "arm64/mem.h"
+#include "mmio_emul.h"
+#include "mmio_irq.h"
+
+#define DEVEMU_MEMLIMIT 0xFD00000000UL
+#define DEVEMU_MEMBASE 0xD000000000UL
+#define MEM_ROUNDUP (1 << 20)
+#ifndef max
+# define max(A, B) ((A) > (B) ? (A) : (B))
+#endif
+
+static uint64_t mmio_membase;
+
+SET_DECLARE(mmio_set, struct mmio_devemu);
+
+static struct mmio_devemu *mmio_finddef(const char *name);
+static void mmio_lintr_route(struct mmio_devinst *di);
+static void mmio_lintr_update(struct mmio_devinst *di);
+
+static struct mmio_emul_info {
+ uint64_t size; /* address size */
+ uint64_t baddr; /* address */
+ int64_t irq; /* device interrupt number */
+ char *name; /* device name */
+ char *arg; /* device arguments */
+ struct mmio_emul_info *next; /* pointer for linked list */
+ struct mmio_devinst *di; /* pointer to device instance */
+} *mmio_emul_info_head = NULL;
+
+/*
+ * MMIO options are in the form:
+ *
+ * <size>@<base_addr>#<irq>:<emul>[,<config>]
+ *
+ * - size is the number of bytes required for the device mmio
+ * - base_addr is the base address for the MMIO mapped device;
+ * - irq specifies the device interrupt number the value MUST be a DECIMAL
+ * integer; if the device does not use interrupts, use -1
+ * - emul is a string describing the type of device - e.g., virtio-net;
+ * - config is an optional string, depending on the device, that is used
+ * for configuration
+ *
+ * Examples of use:
+ * 0x200@0x100000#25:virtio-net,tap0
+ * 0x100@0x200000#-1:dummy
+ */
+static void
+mmio_parse_opts_usage(const char *args)
+{
+ fprintf(stderr, "Invalid mmio arguments \"%s\"\r\n", args);
+}
+
+/*
+ * checks if two memory regions overlap
+ * checks are not required if one of the pointers is null
+ */
+static int
+mmio_mem_overlap(uint64_t pa, uint64_t sa, uint64_t pb, uint64_t sb)
+{
+#define IN_INTERVAL(lower, value, upper) \
+ (((lower) < (value)) && ((value) < (upper)))
+
+ if ((pa == 0) || (pb == 0))
+ return 0;
+
+ if (IN_INTERVAL(pa, pb, pa + sa) &&
+ IN_INTERVAL(pb, pa, pb + sb))
+ return 1;
+
+ return 0;
+
+#undef IN_INTERVAL
+}
+
+int
+mmio_parse_opts(const char *args)
+{
+ char *emul, *config, *str;
+ uint64_t size, baddr;
+ int64_t irq;
+ int error;
+ struct mmio_emul_info *dif;
+
+ error = -1;
+ emul = config = NULL;
+ baddr = 0, size = 0;
+ str = strdup(args);
+
+ if ((emul = strchr(str, ':')) != NULL) {
+ *emul++ = '\0';
+
+ /* <size>@<base-addr>#<irq> */
+ if (sscanf(str, "%jx@%jx#%jd", &size, &baddr, &irq) != 3 &&
+ sscanf(str, "%jx@%jx#%jd", &size, &baddr, &irq) != 3) {
+ mmio_parse_opts_usage(str);
+ goto parse_error;
+ }
+ } else {
+ mmio_parse_opts_usage(str);
+ goto parse_error;
+ }
+
+ if ((config = strchr(emul, ',')) != NULL)
+ *config++ = '\0';
+
+ /*
+ * check if the required address can be obtained;
+ * if an address has not been requested, ignore the checks
+ * (however, an address will have to be later identified)
+ */
+ if (baddr != 0) {
+ for (dif = mmio_emul_info_head; dif != NULL; dif = dif->next)
+ if (mmio_mem_overlap(dif->baddr, dif->size,
+ baddr, size))
+ break;
+
+ if (dif != NULL) {
+ fprintf(stderr, "The requested address 0x%jx is "
+ "already bound or overlapping\r\n", baddr);
+ error = EINVAL;
+ goto parse_error;
+ }
+ }
+
+ dif = calloc(1, sizeof(struct mmio_emul_info));
+ if (dif == NULL) {
+ error = ENOMEM;
+ goto parse_error;
+ }
+
+ dif->next = mmio_emul_info_head;
+ mmio_emul_info_head = dif;
+
+ dif->size = size;
+ dif->baddr = baddr;
+ dif->irq = irq;
+ if ((emul != NULL) && (strlen(emul)) > 0)
+ dif->name = strdup(emul);
+ else
+ dif->name = NULL;
+ if ((config != NULL) && (strlen(config)) > 0)
+ dif->arg = strdup(config);
+ else
+ dif->arg = NULL;
+
+ error = 0;
+
+parse_error:
+ free(str);
+
+ return error;
+}
+
+static int
+mmio_mem_handler(struct vmctx *ctx, int vcpu, int dir, uint64_t addr,
+ int size, uint64_t *val, void *arg1, long arg2)
+{
+ struct mmio_devinst *di = arg1;
+ struct mmio_devemu *de = di->pi_d;
+ uint64_t offset;
+ int bidx = (int) arg2;
+
+ assert(di->addr.baddr <= addr &&
+ addr + size <= di->addr.baddr + di->addr.size);
+
+ offset = addr - di->addr.baddr;
+
+ if (dir == MEM_F_WRITE) {
+ if (size == 8) {
+ (*de->de_write)(ctx, vcpu, di, bidx, offset,
+ 4, *val & 0xffffffff);
+ (*de->de_write)(ctx, vcpu, di, bidx, offset + 4,
+ 4, *val >> 32);
+ } else {
+ (*de->de_write)(ctx, vcpu, di, bidx, offset,
+ size, *val);
+ }
+ } else {
+ if (size == 8) {
+ *val = (*de->de_read)(ctx, vcpu, di, bidx,
+ offset, 4);
+ *val |= (*de->de_read)(ctx, vcpu, di, bidx,
+ offset + 4, 4) << 32;
+ } else {
+ *val = (*de->de_read)(ctx, vcpu, di, bidx,
+ offset, size);
+ }
+ }
+
+ return (0);
+}
+
+static void
+modify_mmio_registration(struct mmio_devinst *di, int registration)
+{
+ int error;
+ struct mem_range mr;
+
+ bzero(&mr, sizeof(struct mem_range));
+ mr.name = di->pi_name;
+ mr.base = di->addr.baddr;
+ mr.size = di->addr.size;
+ if (registration) {
+ mr.flags = MEM_F_RW;
+ mr.handler = mmio_mem_handler;
+ mr.arg1 = di;
+ mr.arg2 = 0;
+ error = register_mem(&mr);
+ } else {
+ error = unregister_mem(&mr);
+ }
+
+ assert(error == 0);
+}
+
+static void
+register_mmio(struct mmio_devinst *di)
+{
+ return modify_mmio_registration(di, 1);
+}
+
+static void
+unregister_mmio(struct mmio_devinst *di)
+{
+ return modify_mmio_registration(di, 0);
+}
+
+/*
+ * Update the MMIO address that is decoded
+ */
+static void
+update_mem_address(struct mmio_devinst *di, uint64_t addr)
+{
+ /* TODO: check if the decoding is running */
+ unregister_mmio(di);
+
+ di->addr.baddr = addr;
+
+ register_mmio(di);
+}
+
+static int
+mmio_alloc_resource(uint64_t *baseptr, uint64_t limit, uint64_t size,
+ uint64_t *addr)
+{
+ uint64_t base;
+
+ assert((size & (size - 1)) == 0); /* must be a power of 2 */
+
+ base = roundup2(*baseptr, size);
+
+ if (base + size <= limit) {
+ *addr = base;
+ *baseptr = base + size;
+ return (0);
+ } else
+ return (-1);
+}
+
+int
+mmio_alloc_mem(struct mmio_devinst *di)
+{
+ int error;
+ uint64_t *baseptr, limit, addr, size;
+
+ baseptr = &di->addr.baddr;
+ size = di->addr.size;
+ limit = DEVEMU_MEMLIMIT;
+
+ if ((size & (size - 1)) != 0)
+ /* Round up to a power of 2 */
+ size = 1UL << flsl(size);
+
+ error = mmio_alloc_resource(baseptr, limit, size, &addr);
+ if (error != 0)
+ return (error);
+
+ di->addr.baddr = addr;
+
+ register_mmio(di);
+
+ return (0);
+}
+
+static struct mmio_devemu *
+mmio_finddev(char *name)
+{
+ struct mmio_devemu **dpp, *dp;
+
+ SET_FOREACH(dpp, mmio_set) {
+ dp = *dpp;
+ if (!strcmp(dp->de_emu, name))
+ return (dp);
+ }
+
+ return (NULL);
+}
+
+static int
+mmio_init(struct vmctx *ctx, struct mmio_devemu *de, struct mmio_emul_info *dif)
+{
+ struct mmio_devinst *di;
+ int error;
+
+ di = calloc(1, sizeof(struct mmio_devinst));
+ if (di == NULL)
+ return (ENOMEM);
+
+ di->pi_d = de;
+ di->pi_vmctx = ctx;
+ snprintf(di->pi_name, DI_NAMESZ, "%s-mmio", de->de_emu);
+ di->di_lintr.state = IDLE;
+ di->di_lintr.irq = dif->irq;
+ pthread_mutex_init(&di->di_lintr.lock, NULL);
+ di->addr.baddr = dif->baddr;
+ di->addr.size = dif->size;
+ /* some devices (e.g., virtio-net) use these as uniquifiers; irq number
+ * should be unique and sufficient */
+ di->pi_slot = dif->irq;
+ di->di_func = dif->irq;
+
+ error = (*de->de_init)(ctx, di, dif->arg);
+
+ if (error == 0) {
+ dif->di = di;
+ } else {
+ fprintf(stderr, "Device \"%s\": initialization failed\r\n",
+ di->pi_name);
+ fprintf(stderr, "Device arguments were: %s\r\n", dif->arg);
+ free(di);
+ }
+
+ return (error);
+}
+
+static void
+init_mmio_error(const char *name)
+{
+ struct mmio_devemu **mdpp, *mdp;
+
+ fprintf(stderr, "Device \"%s\" does not exist\r\n", name);
+ fprintf(stderr, "The following devices are available:\r\n");
+
+ SET_FOREACH(mdpp, mmio_set) {
+ mdp = *mdpp;
+ fprintf(stderr, "\t%s\r\n", mdp->de_emu);
+ }
+}
+
+int init_mmio(struct vmctx *ctx)
+{
+ struct mmio_devemu *de;
+ struct mmio_emul_info *dif;
+ int error;
+
+ mmio_membase = DEVEMU_MEMBASE;
+
+ for (dif = mmio_emul_info_head; dif != NULL; dif = dif->next) {
+ if (dif->name == NULL)
+ continue;
+
+ de = mmio_finddev(dif->name);
+ if (de == NULL) {
+ init_mmio_error(dif->name);
+ return (1);
+ }
+
+ error = mmio_init(ctx, de, dif);
+ if (error != 0)
+ return (error);
+
+ /*
+ * as specified in the amd64 implementation, add some
+ * slop to the memory resources decoded, in order to
+ * give the guest some flexibility to reprogram the addresses
+ */
+ mmio_membase += MEM_ROUNDUP;
+ mmio_membase = roundup2(mmio_membase, MEM_ROUNDUP);
+ }
+
+ /* activate the interrupts */
+ for (dif = mmio_emul_info_head; dif != NULL; dif = dif->next)
+ if (dif->di != NULL)
+ mmio_lintr_route(dif->di);
+
+ /* TODO: register fallback handlers? */
+
+ return (0);
+}
+
+void
+mmio_lintr_request(struct mmio_devinst *di)
+{
+ /* do nothing */
+}
+
+static void
+mmio_lintr_route(struct mmio_devinst *di)
+{
+ /* do nothing */
+}
+
+void
+mmio_lintr_assert(struct mmio_devinst *di)
+{
+ pthread_mutex_lock(&di->di_lintr.lock);
+ if (di->di_lintr.state == IDLE) {
+ di->di_lintr.state = ASSERTED;
+ mmio_irq_assert(di);
+ }
+ pthread_mutex_unlock(&di->di_lintr.lock);
+}
+
+void
+mmio_lintr_deassert(struct mmio_devinst *di)
+{
+ pthread_mutex_lock(&di->di_lintr.lock);
+ if (di->di_lintr.state == ASSERTED) {
+ mmio_irq_deassert(di);
+ di->di_lintr.state = IDLE;
+ } else if (di->di_lintr.state == PENDING) {
+ di->di_lintr.state = IDLE;
+ }
+ pthread_mutex_unlock(&di->di_lintr.lock);
+}
+
+/* TODO: Add dummy? */
Index: usr.sbin/bhyve/mmio/mmio_irq.h
===================================================================
--- /dev/null
+++ usr.sbin/bhyve/mmio/mmio_irq.h
@@ -0,0 +1,12 @@
+#ifndef __MMIO_IRQ_H__
+#define __MMIO_IRQ_H__
+
+struct mmio_devinst;
+
+void mmio_irq_init(struct vmctx *ctx);
+void mmio_irq_reserve(int irq);
+void mmio_irq_use(int irq);
+void mmio_irq_assert(struct mmio_devinst *di);
+void mmio_irq_deassert(struct mmio_devinst *di);
+
+#endif
Index: usr.sbin/bhyve/mmio/mmio_irq.c
===================================================================
--- /dev/null
+++ usr.sbin/bhyve/mmio/mmio_irq.c
@@ -0,0 +1,113 @@
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <machine/vmm.h>
+
+#include <assert.h>
+#include <pthread.h>
+#include <stdio.h>
+#include <vmmapi.h>
+
+#include "mmio_emul.h"
+#include "mmio_irq.h"
+#include "mmio_virtio.h"
+
+/* IRQ count to disable IRQ */
+#define IRQ_DISABLED 0xff
+
+static struct mmio_irq {
+ uint32_t use_count; /* number of binds */
+ uint32_t active_count; /* number of asserts */
+ uint32_t active; /* irq active */
+ pthread_mutex_t lock;
+} irqs[50];
+
+void
+mmio_irq_reserve(int irq)
+{
+ assert(irq >= 0 && irq < nitems(irqs));
+ assert(irqs[irq].active == 0 || irqs[irq].active == IRQ_DISABLED);
+ irqs[irq].active = IRQ_DISABLED;
+}
+
+void
+mmio_irq_use(int irq) {
+ assert(irq >= 0 && irq < nitems(irqs));
+ assert(irqs[irq].active != IRQ_DISABLED);
+ irqs[irq].active++;
+}
+
+void
+mmio_irq_init(struct vmctx *ctx)
+{
+ int i;
+
+ for (i = 0; i < nitems(irqs); ++i) {
+ irqs[i].use_count = 0;
+ irqs[i].active_count = 0;
+ irqs[i].active = 0;
+ pthread_mutex_init(&irqs[i].lock, NULL);
+ }
+}
+
+void
+mmio_irq_assert(struct mmio_devinst *di)
+{
+ struct mmio_irq *irq;
+ uint32_t irq_status;
+
+ assert(di->di_lintr.irq <= nitems(irqs));
+ if (di->di_lintr.irq < 0)
+ return;
+
+ irq = &irqs[di->di_lintr.irq];
+
+ pthread_mutex_lock(&irq->lock);
+ irq->active_count++;
+
+ pthread_mutex_lock(&di->di_lintr.lock);
+
+ irq_status = mmio_get_cfgreg32(di, VIRTIO_MMIO_INTERRUPT_STATUS);
+ irq_status |= VIRTIO_MMIO_INT_VRING;
+ mmio_set_cfgreg32(di, VIRTIO_MMIO_INTERRUPT_STATUS, irq_status);
+
+ if (irq->active_count == 1)
+ vm_assert_irq(di->pi_vmctx, di->di_lintr.irq);
+
+ pthread_mutex_unlock(&di->di_lintr.lock);
+
+ pthread_mutex_unlock(&irq->lock);
+}
+
+void
+mmio_irq_deassert(struct mmio_devinst *di)
+{
+ struct mmio_irq *irq;
+ uint32_t irq_status;
+
+ assert(di->di_lintr.irq <= nitems(irqs));
+ if (di->di_lintr.irq < 0)
+ return;
+
+ irq = &irqs[di->di_lintr.irq];
+
+ pthread_mutex_lock(&irq->lock);
+ irq->active_count--;
+
+ pthread_mutex_lock(&di->di_lintr.lock);
+
+ irq_status = mmio_get_cfgreg32(di, VIRTIO_MMIO_INTERRUPT_STATUS);
+ irq_status &= ~VIRTIO_MMIO_INT_VRING;
+ mmio_set_cfgreg32(di, VIRTIO_MMIO_INTERRUPT_STATUS, irq_status);
+
+#if 0
+ /* MMIO devices do not require deassertions */
+ if (irq->active_count == 0)
+ vm_deassert_irq(di->di_vmctx, di->di_lintr.irq);
+#endif
+
+ pthread_mutex_unlock(&di->di_lintr.lock);
+
+ pthread_mutex_unlock(&irq->lock);
+}
Index: usr.sbin/bhyve/mmio/mmio_virtio.h
===================================================================
--- /dev/null
+++ usr.sbin/bhyve/mmio/mmio_virtio.h
@@ -0,0 +1,484 @@
+/*-
+ * Copyright (c) 2013 Chris Torek <torek @ torek net>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _VIRTIO_H_
+#define _VIRTIO_H_
+
+#include <machine/atomic.h>
+
+/*
+ * These are derived from several virtio specifications.
+ *
+ * Some useful links:
+ * https://github.com/rustyrussell/virtio-spec
+ * http://people.redhat.com/pbonzini/virtio-spec.pdf
+ */
+
+/*
+ * A virtual device has zero or more "virtual queues" (virtqueue).
+ * Each virtqueue uses at least two 4096-byte pages, laid out thus:
+ *
+ * +-----------------------------------------------+
+ * | "desc": <N> descriptors, 16 bytes each |
+ * | ----------------------------------------- |
+ * | "avail": 2 uint16; <N> uint16; 1 uint16 |
+ * | ----------------------------------------- |
+ * | pad to 4k boundary |
+ * +-----------------------------------------------+
+ * | "used": 2 x uint16; <N> elems; 1 uint16 |
+ * | ----------------------------------------- |
+ * | pad to 4k boundary |
+ * +-----------------------------------------------+
+ *
+ * The number <N> that appears here is always a power of two and is
+ * limited to no more than 32768 (as it must fit in a 16-bit field).
+ * If <N> is sufficiently large, the above will occupy more than
+ * two pages. In any case, all pages must be physically contiguous
+ * within the guest's physical address space.
+ *
+ * The <N> 16-byte "desc" descriptors consist of a 64-bit guest
+ * physical address <addr>, a 32-bit length <len>, a 16-bit
+ * <flags>, and a 16-bit <next> field (all in guest byte order).
+ *
+ * There are three flags that may be set :
+ * NEXT descriptor is chained, so use its "next" field
+ * WRITE descriptor is for host to write into guest RAM
+ * (else host is to read from guest RAM)
+ * INDIRECT descriptor address field is (guest physical)
+ * address of a linear array of descriptors
+ *
+ * Unless INDIRECT is set, <len> is the number of bytes that may
+ * be read/written from guest physical address <addr>. If
+ * INDIRECT is set, WRITE is ignored and <len> provides the length
+ * of the indirect descriptors (and <len> must be a multiple of
+ * 16). Note that NEXT may still be set in the main descriptor
+ * pointing to the indirect, and should be set in each indirect
+ * descriptor that uses the next descriptor (these should generally
+ * be numbered sequentially). However, INDIRECT must not be set
+ * in the indirect descriptors. Upon reaching an indirect descriptor
+ * without a NEXT bit, control returns to the direct descriptors.
+ *
+ * Except inside an indirect, each <next> value must be in the
+ * range [0 .. N) (i.e., the half-open interval). (Inside an
+ * indirect, each <next> must be in the range [0 .. <len>/16).)
+ *
+ * The "avail" data structures reside in the same pages as the
+ * "desc" structures since both together are used by the device to
+ * pass information to the hypervisor's virtual driver. These
+ * begin with a 16-bit <flags> field and 16-bit index <idx>, then
+ * have <N> 16-bit <ring> values, followed by one final 16-bit
+ * field <used_event>. The <N> <ring> entries are simply indices
+ * indices into the descriptor ring (and thus must meet the same
+ * constraints as each <next> value). However, <idx> is counted
+ * up from 0 (initially) and simply wraps around after 65535; it
+ * is taken mod <N> to find the next available entry.
+ *
+ * The "used" ring occupies a separate page or pages, and contains
+ * values written from the virtual driver back to the guest OS.
+ * This begins with a 16-bit <flags> and 16-bit <idx>, then there
+ * are <N> "vring_used" elements, followed by a 16-bit <avail_event>.
+ * The <N> "vring_used" elements consist of a 32-bit <id> and a
+ * 32-bit <len> (vu_tlen below). The <id> is simply the index of
+ * the head of a descriptor chain the guest made available
+ * earlier, and the <len> is the number of bytes actually written,
+ * e.g., in the case of a network driver that provided a large
+ * receive buffer but received only a small amount of data.
+ *
+ * The two event fields, <used_event> and <avail_event>, in the
+ * avail and used rings (respectively -- note the reversal!), are
+ * always provided, but are used only if the virtual device
+ * negotiates the VIRTIO_RING_F_EVENT_IDX feature during feature
+ * negotiation. Similarly, both rings provide a flag --
+ * VRING_AVAIL_F_NO_INTERRUPT and VRING_USED_F_NO_NOTIFY -- in
+ * their <flags> field, indicating that the guest does not need an
+ * interrupt, or that the hypervisor driver does not need a
+ * notify, when descriptors are added to the corresponding ring.
+ * (These are provided only for interrupt optimization and need
+ * not be implemented.)
+ */
+
+#define VIRTIO_MMIO_MAGIC_VALUE 0x000
+#define VIRTIO_MMIO_VERSION 0x004
+#define VIRTIO_MMIO_DEVICE_ID 0x008
+#define VIRTIO_MMIO_VENDOR_ID 0x00c
+#define VIRTIO_MMIO_HOST_FEATURES 0x010
+#define VIRTIO_MMIO_HOST_FEATURES_SEL 0x014
+#define VIRTIO_MMIO_GUEST_FEATURES 0x020
+#define VIRTIO_MMIO_GUEST_FEATURES_SEL 0x024
+#define VIRTIO_MMIO_GUEST_PAGE_SIZE 0x028
+#define VIRTIO_MMIO_QUEUE_SEL 0x030
+#define VIRTIO_MMIO_QUEUE_NUM_MAX 0x034
+#define VIRTIO_MMIO_QUEUE_NUM 0x038
+#define VIRTIO_MMIO_QUEUE_ALIGN 0x03c
+#define VIRTIO_MMIO_QUEUE_PFN 0x040
+#define VIRTIO_MMIO_QUEUE_NOTIFY 0x050
+#define VIRTIO_MMIO_INTERRUPT_STATUS 0x060
+#define VIRTIO_MMIO_INTERRUPT_ACK 0x064
+#define VIRTIO_MMIO_STATUS 0x070
+#define VIRTIO_MMIO_CONFIG 0x100
+#define VIRTIO_MMIO_INT_VRING (1 << 0)
+#define VIRTIO_MMIO_INT_CONFIG (1 << 1)
+#define VIRTIO_MMIO_VRING_ALIGN 4096
+
+#define VRING_ALIGN 4096
+
+#define VRING_DESC_F_NEXT (1 << 0)
+#define VRING_DESC_F_WRITE (1 << 1)
+#define VRING_DESC_F_INDIRECT (1 << 2)
+
+struct virtio_desc { /* AKA vring_desc */
+ uint64_t vd_addr; /* guest physical address */
+ uint32_t vd_len; /* length of scatter/gather seg */
+ uint16_t vd_flags; /* VRING_F_DESC_* */
+ uint16_t vd_next; /* next desc if F_NEXT */
+} __packed;
+
+struct virtio_used { /* AKA vring_used_elem */
+ uint32_t vu_idx; /* head of used descriptor chain */
+ uint32_t vu_tlen; /* length written-to */
+} __packed;
+
+#define VRING_AVAIL_F_NO_INTERRUPT 1
+
+struct vring_avail {
+ uint16_t va_flags; /* VRING_AVAIL_F_* */
+ uint16_t va_idx; /* counts to 65535, then cycles */
+ uint16_t va_ring[]; /* size N, reported in QNUM value */
+/* uint16_t va_used_event; -- after N ring entries */
+} __packed;
+
+#define VRING_USED_F_NO_NOTIFY 1
+struct vring_used {
+ uint16_t vu_flags; /* VRING_USED_F_* */
+ uint16_t vu_idx; /* counts to 65535, then cycles */
+ struct virtio_used vu_ring[]; /* size N */
+/* uint16_t vu_avail_event; -- after N ring entries */
+} __packed;
+
+/*
+ * The address of any given virtual queue is determined by a single
+ * Page Frame Number register. The guest writes the PFN into the
+ * PCI config space. However, a device that has two or more
+ * virtqueues can have a different PFN, and size, for each queue.
+ * The number of queues is determinable via the PCI config space
+ * VTCFG_R_QSEL register. Writes to QSEL select the queue: 0 means
+ * queue #0, 1 means queue#1, etc. Once a queue is selected, the
+ * remaining PFN and QNUM registers refer to that queue.
+ *
+ * QNUM is a read-only register containing a nonzero power of two
+ * that indicates the (hypervisor's) queue size. Or, if reading it
+ * produces zero, the hypervisor does not have a corresponding
+ * queue. (The number of possible queues depends on the virtual
+ * device. The block device has just one; the network device
+ * provides either two -- 0 = receive, 1 = transmit -- or three,
+ * with 2 = control.)
+ *
+ * PFN is a read/write register giving the physical page address of
+ * the virtqueue in guest memory (the guest must allocate enough space
+ * based on the hypervisor's provided QNUM).
+ *
+ * QNOTIFY is effectively write-only: when the guest writes a queue
+ * number to the register, the hypervisor should scan the specified
+ * virtqueue. (Reading QNOTIFY currently always gets 0).
+ */
+
+/*
+ * PFN register shift amount
+ */
+#define VRING_PFN 12
+
+/*
+ * Virtio device types
+ *
+ * XXX Should really be merged with <dev/virtio/virtio.h> defines
+ */
+#define VIRTIO_TYPE_NET 1
+#define VIRTIO_TYPE_BLOCK 2
+#define VIRTIO_TYPE_CONSOLE 3
+#define VIRTIO_TYPE_ENTROPY 4
+#define VIRTIO_TYPE_BALLOON 5
+#define VIRTIO_TYPE_IOMEMORY 6
+#define VIRTIO_TYPE_RPMSG 7
+#define VIRTIO_TYPE_SCSI 8
+#define VIRTIO_TYPE_9P 9
+
+/* experimental IDs start at 65535 and work down */
+
+/*
+ * PCI vendor/device IDs
+ */
+#define VIRTIO_VENDOR 0x1AF4
+#define VIRTIO_DEV_NET 0x1000
+#define VIRTIO_DEV_BLOCK 0x1001
+#define VIRTIO_DEV_CONSOLE 0x1003
+#define VIRTIO_DEV_RANDOM 0x1005
+
+#define VIRTIO_MMIO_MAGIC_NUM 0x74726976
+#define VIRTIO_MMIO_VERSION_NUM 0x1
+
+/*
+ * Bits in VTCFG_R_STATUS. Guests need not actually set any of these,
+ * but a guest writing 0 to this register means "please reset".
+ */
+#define VTCFG_STATUS_ACK 0x01 /* guest OS has acknowledged dev */
+#define VTCFG_STATUS_DRIVER 0x02 /* guest OS driver is loaded */
+#define VTCFG_STATUS_DRIVER_OK 0x04 /* guest OS driver ready */
+#define VTCFG_STATUS_FAILED 0x80 /* guest has given up on this dev */
+
+/*
+ * Bits in VTCFG_R_ISR. These apply only if not using MSI-X.
+ *
+ * (We don't [yet?] ever use CONF_CHANGED.)
+ */
+#define VTCFG_ISR_QUEUES 0x01 /* re-scan queues */
+#define VTCFG_ISR_CONF_CHANGED 0x80 /* configuration changed */
+
+#define VIRTIO_MSI_NO_VECTOR 0xFFFF
+
+/*
+ * Feature flags.
+ * Note: bits 0 through 23 are reserved to each device type.
+ */
+#define VIRTIO_F_NOTIFY_ON_EMPTY (1 << 24)
+#define VIRTIO_RING_F_INDIRECT_DESC (1 << 28)
+#define VIRTIO_RING_F_EVENT_IDX (1 << 29)
+
+/* From section 2.3, "Virtqueue Configuration", of the virtio specification */
+static inline size_t
+vring_size(u_int qsz, uint32_t align)
+{
+ size_t size;
+
+ /* constant 3 below = va_flags, va_idx, va_used_event */
+ size = sizeof(struct virtio_desc) * qsz + sizeof(uint16_t) * (3 + qsz);
+ size = roundup2(size, align);
+
+ /* constant 3 below = vu_flags, vu_idx, vu_avail_event */
+ size += sizeof(uint16_t) * 3 + sizeof(struct virtio_used) * qsz;
+ size = roundup2(size, align);
+
+ return (size);
+}
+
+struct vmctx;
+struct mmio_devinst;
+struct vqueue_info;
+
+/*
+ * A virtual device, with some number (possibly 0) of virtual
+ * queues and some size (possibly 0) of configuration-space
+ * registers private to the device. The virtio_softc should come
+ * at the front of each "derived class", so that a pointer to the
+ * virtio_softc is also a pointer to the more specific, derived-
+ * from-virtio driver's softc.
+ *
+ * Note: inside each hypervisor virtio driver, changes to these
+ * data structures must be locked against other threads, if any.
+ * Except for PCI config space register read/write, we assume each
+ * driver does the required locking, but we need a pointer to the
+ * lock (if there is one) for PCI config space read/write ops.
+ *
+ * When the guest reads or writes the device's config space, the
+ * generic layer checks for operations on the special registers
+ * described above. If the offset of the register(s) being read
+ * or written is past the CFG area (CFG0 or CFG1), the request is
+ * passed on to the virtual device, after subtracting off the
+ * generic-layer size. (So, drivers can just use the offset as
+ * an offset into "struct config", for instance.)
+ *
+ * (The virtio layer also makes sure that the read or write is to/
+ * from a "good" config offset, hence vc_cfgsize, and on BAR #0.
+ * However, the driver must verify the read or write size and offset
+ * and that no one is writing a readonly register.)
+ *
+ * The BROKED flag ("this thing done gone and broked") is for future
+ * use.
+ */
+#define VIRTIO_USE_MSIX 0x01
+#define VIRTIO_EVENT_IDX 0x02 /* use the event-index values */
+#define VIRTIO_BROKED 0x08 /* ??? */
+
+struct virtio_softc {
+ struct virtio_consts *vs_vc; /* constants (see below) */
+ int vs_flags; /* VIRTIO_* flags from above */
+ pthread_mutex_t *vs_mtx; /* POSIX mutex, if any */
+ struct mmio_devinst *vs_di; /* device instance */
+ uint32_t vs_negotiated_caps; /* negotiated capabilities */
+ uint32_t vs_align; /* virtual queue alignment */
+ struct vqueue_info *vs_queues; /* one per vc_nvq */
+ int vs_curq; /* current queue */
+ int irq; /* interrupt */
+ uint8_t vs_status; /* value from last status write */
+ uint32_t vs_guest_page_size; /* size of guest page in bytes */
+};
+
+#define VS_LOCK(vs) \
+do { \
+ if (vs->vs_mtx) \
+ pthread_mutex_lock(vs->vs_mtx); \
+} while (0)
+
+#define VS_UNLOCK(vs) \
+do { \
+ if (vs->vs_mtx) \
+ pthread_mutex_unlock(vs->vs_mtx); \
+} while (0)
+
+struct virtio_consts {
+ const char *vc_name; /* name of driver (for diagnostics) */
+ int vc_nvq; /* number of virtual queues */
+ size_t vc_cfgsize; /* size of dev-specific config regs */
+ void (*vc_reset)(void *); /* called on virtual device reset */
+ void (*vc_qnotify)(void *, struct vqueue_info *);
+ /* called on QNOTIFY if no VQ notify */
+ int (*vc_cfgread)(void *, int, int, uint32_t *);
+ /* called to read config regs */
+ int (*vc_cfgwrite)(void *, int, int, uint32_t);
+ /* called to write config regs */
+ void (*vc_apply_features)(void *, uint64_t);
+ /* called to apply negotiated features */
+ uint64_t vc_hv_caps; /* hypervisor-provided capabilities */
+};
+
+/*
+ * Data structure allocated (statically) per virtual queue.
+ *
+ * Drivers may change vq_qsize after a reset. When the guest OS
+ * requests a device reset, the hypervisor first calls
+ * vs->vs_vc->vc_reset(); then the data structure below is
+ * reinitialized (for each virtqueue: vs->vs_vc->vc_nvq).
+ *
+ * The remaining fields should only be fussed-with by the generic
+ * code.
+ *
+ * Note: the addresses of vq_desc, vq_avail, and vq_used are all
+ * computable from each other, but it's a lot simpler if we just
+ * keep a pointer to each one. The event indices are similarly
+ * (but more easily) computable, and this time we'll compute them:
+ * they're just XX_ring[N].
+ */
+#define VQ_ALLOC 0x01 /* set once we have a pfn */
+#define VQ_BROKED 0x02 /* ??? */
+struct vqueue_info {
+ uint16_t vq_qsize; /* size of this queue (a power of 2) */
+ void (*vq_notify)(void *, struct vqueue_info *);
+ /* called instead of vc_notify, if not NULL */
+
+ struct virtio_softc *vq_vs; /* backpointer to softc */
+ uint16_t vq_num; /* we're the num'th queue in the softc */
+
+ uint16_t vq_flags; /* flags (see above) */
+ uint16_t vq_last_avail; /* a recent value of vq_avail->va_idx */
+ uint16_t vq_save_used; /* saved vq_used->vu_idx; see vq_endchains */
+
+ uint32_t vq_pfn; /* PFN of virt queue (not shifted!) */
+
+ volatile struct virtio_desc *vq_desc; /* descriptor array */
+ volatile struct vring_avail *vq_avail; /* the "avail" ring */
+ volatile struct vring_used *vq_used; /* the "used" ring */
+};
+/* as noted above, these are sort of backwards, name-wise */
+#define VQ_AVAIL_EVENT_IDX(vq) \
+ (*(volatile uint16_t *)&(vq)->vq_used->vu_ring[(vq)->vq_qsize])
+#define VQ_USED_EVENT_IDX(vq) \
+ ((vq)->vq_avail->va_ring[(vq)->vq_qsize])
+
+/*
+ * Is this ring ready for I/O?
+ */
+static inline int
+vq_ring_ready(struct vqueue_info *vq)
+{
+
+ return (vq->vq_flags & VQ_ALLOC);
+}
+
+/*
+ * Are there "available" descriptors? (This does not count
+ * how many, just returns True if there are some.)
+ */
+static inline int
+vq_has_descs(struct vqueue_info *vq)
+{
+
+ return (vq_ring_ready(vq) && vq->vq_last_avail !=
+ vq->vq_avail->va_idx);
+}
+
+/*
+ * Deliver an interrupt to guest on the given virtual queue
+ * (if possible, or a generic MSI interrupt if not using MSI-X).
+ */
+static inline void
+vq_interrupt(struct virtio_softc *vs, struct vqueue_info *vq)
+{
+ VS_LOCK(vs);
+ mmio_lintr_assert(vs->vs_di);
+ VS_UNLOCK(vs);
+}
+
+static inline void
+vq_kick_enable(struct vqueue_info *vq)
+{
+
+ vq->vq_used->vu_flags &= ~VRING_USED_F_NO_NOTIFY;
+ /*
+ * Full memory barrier to make sure the store to vu_flags
+ * happens before the load from va_idx, which results from
+ * a subsequent call to vq_has_descs().
+ */
+ atomic_thread_fence_seq_cst();
+}
+
+static inline void
+vq_kick_disable(struct vqueue_info *vq)
+{
+
+ vq->vq_used->vu_flags |= VRING_USED_F_NO_NOTIFY;
+}
+
+struct iovec;
+void vi_softc_linkup(struct virtio_softc *vs, struct virtio_consts *vc,
+ void *dev_softc, struct mmio_devinst *di,
+ struct vqueue_info *queues);
+int vi_intr_init(struct virtio_softc *vs, int barnum, int use_msix);
+void vi_reset_dev(struct virtio_softc *);
+void vi_set_io_res(struct virtio_softc *, int);
+
+int vq_getchain(struct vqueue_info *vq, uint16_t *pidx,
+ struct iovec *iov, int n_iov, uint16_t *flags);
+void vq_retchains(struct vqueue_info *vq, uint16_t n_chains);
+void vq_relchain(struct vqueue_info *vq, uint16_t idx, uint32_t iolen);
+void vq_endchains(struct vqueue_info *vq, int used_all_avail);
+
+uint64_t vi_mmio_read(struct vmctx *ctx, int vcpu, struct mmio_devinst *di,
+ int baridx, uint64_t offset, int size);
+void vi_mmio_write(struct vmctx *ctx, int vcpu, struct mmio_devinst *di,
+ int baridx, uint64_t offset, int size, uint64_t value);
+void vi_devemu_init(struct mmio_devinst *di, uint32_t type);
+#endif /* _VIRTIO_H_ */
Index: usr.sbin/bhyve/mmio/mmio_virtio.c
===================================================================
--- /dev/null
+++ usr.sbin/bhyve/mmio/mmio_virtio.c
@@ -0,0 +1,707 @@
+/*-
+ * Copyright (c) 2013 Chris Torek <torek @ torek net>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/uio.h>
+
+#include <stdio.h>
+#include <stdint.h>
+#include <pthread.h>
+#include <pthread_np.h>
+
+#include "bhyverun.h"
+#include "mmio_emul.h"
+#include "mmio_virtio.h"
+#include "virtio_ids.h"
+
+static int debug_virtio = 0;
+
+#define DPRINTF(fmt, ...) if (debug_virtio) printf(fmt, ##__VA_ARGS__)
+#define CFG_RW_DBG(offset, value) \
+ DPRINTF("{device} | %-60s | %-35s | %-30s (%jx): value = %jx\r\n", \
+ __FILE__, __func__, #offset, (uintmax_t)offset, (uintmax_t)value);
+
+/*
+ * Functions for dealing with generalized "virtual devices" as
+ * defined by <https://www.google.com/#output=search&q=virtio+spec>
+ */
+
+/*
+ * In case we decide to relax the "virtio softc comes at the
+ * front of virtio-based device softc" constraint, let's use
+ * this to convert.
+ */
+#define DEV_SOFTC(vs) ((void *)(vs))
+
+/*
+ * Link a virtio_softc to its constants, the device softc, and
+ * the PCI emulation.
+ */
+void
+vi_softc_linkup(struct virtio_softc *vs, struct virtio_consts *vc,
+ void *dev_softc, struct mmio_devinst *di,
+ struct vqueue_info *queues)
+{
+ int i;
+
+ /* vs and dev_softc addresses must match */
+ assert((void *)vs == dev_softc);
+ vs->vs_vc = vc;
+ vs->vs_di = di;
+ di->pi_arg = vs;
+
+ vs->vs_queues = queues;
+ for (i = 0; i < vc->vc_nvq; i++) {
+ queues[i].vq_vs = vs;
+ queues[i].vq_num = i;
+ }
+}
+
+/*
+ * Reset device (device-wide). This erases all queues, i.e.,
+ * all the queues become invalid (though we don't wipe out the
+ * internal pointers, we just clear the VQ_ALLOC flag).
+ *
+ * It resets negotiated features to "none".
+ */
+void
+vi_reset_dev(struct virtio_softc *vs)
+{
+ struct vqueue_info *vq;
+ int i, nvq;
+
+ if (vs->vs_mtx)
+ assert(pthread_mutex_isowned_np(vs->vs_mtx));
+
+ nvq = vs->vs_vc->vc_nvq;
+ for (vq = vs->vs_queues, i = 0; i < nvq; vq++, i++) {
+ vq->vq_flags = 0;
+ vq->vq_last_avail = 0;
+ vq->vq_save_used = 0;
+ vq->vq_pfn = 0;
+ }
+ vs->vs_negotiated_caps = 0;
+ vs->vs_curq = 0;
+ /* vs->vs_status = 0; -- redundant */
+ mmio_lintr_deassert(vs->vs_di);
+}
+
+void
+vi_set_io_res(struct virtio_softc *vs, int barnum)
+{
+ mmio_alloc_mem(vs->vs_di);
+}
+
+/*
+ * Initialize interrupts for MMIO
+ */
+int
+vi_intr_init(struct virtio_softc *vs, int barnum, int use_msix)
+{
+ /* activate interrupts */
+ mmio_lintr_request(vs->vs_di);
+
+ return (0);
+}
+
+/*
+ * Initialize the currently-selected virtio queue (vs->vs_curq).
+ * The guest just gave us a page frame number, from which we can
+ * calculate the addresses of the queue.
+ */
+void
+vi_vq_init(struct virtio_softc *vs, uint32_t pfn)
+{
+ struct vqueue_info *vq;
+ uint64_t phys;
+ size_t size;
+ char *base;
+
+ vq = &vs->vs_queues[vs->vs_curq];
+ vq->vq_pfn = pfn;
+ phys = (uint64_t)pfn * vs->vs_guest_page_size;
+ size = vring_size(vq->vq_qsize, vs->vs_align);
+ base = paddr_guest2host(vs->vs_di->pi_vmctx, phys, size);
+
+ /* First page(s) are descriptors... */
+ vq->vq_desc = (struct virtio_desc *)base;
+ base += vq->vq_qsize * sizeof(struct virtio_desc);
+
+ /* ... immediately followed by "avail" ring (entirely uint16_t's) */
+ vq->vq_avail = (struct vring_avail *)base;
+ base += (2 + vq->vq_qsize + 1) * sizeof(uint16_t);
+
+ /* Then it's rounded up to the next page... */
+ base = (char *)roundup2((uintptr_t)base, vs->vs_align);
+
+ /* ... and the last page(s) are the used ring. */
+ vq->vq_used = (struct vring_used *)base;
+
+ /* Mark queue as allocated, and start at 0 when we use it. */
+ vq->vq_flags = VQ_ALLOC;
+ vq->vq_last_avail = 0;
+ vq->vq_save_used = 0;
+}
+
+/*
+ * Helper inline for vq_getchain(): record the i'th "real"
+ * descriptor.
+ */
+static inline void
+_vq_record(int i, volatile struct virtio_desc *vd, struct vmctx *ctx,
+ struct iovec *iov, int n_iov, uint16_t *flags) {
+
+ if (i >= n_iov)
+ return;
+ iov[i].iov_base = paddr_guest2host(ctx, vd->vd_addr, vd->vd_len);
+ iov[i].iov_len = vd->vd_len;
+ if (flags != NULL)
+ flags[i] = vd->vd_flags;
+}
+#define VQ_MAX_DESCRIPTORS 512 /* see below */
+
+/*
+ * Examine the chain of descriptors starting at the "next one" to
+ * make sure that they describe a sensible request. If so, return
+ * the number of "real" descriptors that would be needed/used in
+ * acting on this request. This may be smaller than the number of
+ * available descriptors, e.g., if there are two available but
+ * they are two separate requests, this just returns 1. Or, it
+ * may be larger: if there are indirect descriptors involved,
+ * there may only be one descriptor available but it may be an
+ * indirect pointing to eight more. We return 8 in this case,
+ * i.e., we do not count the indirect descriptors, only the "real"
+ * ones.
+ *
+ * Basically, this vets the vd_flags and vd_next field of each
+ * descriptor and tells you how many are involved. Since some may
+ * be indirect, this also needs the vmctx (in the pci_devinst
+ * at vs->vs_di) so that it can find indirect descriptors.
+ *
+ * As we process each descriptor, we copy and adjust it (guest to
+ * host address wise, also using the vmtctx) into the given iov[]
+ * array (of the given size). If the array overflows, we stop
+ * placing values into the array but keep processing descriptors,
+ * up to VQ_MAX_DESCRIPTORS, before giving up and returning -1.
+ * So you, the caller, must not assume that iov[] is as big as the
+ * return value (you can process the same thing twice to allocate
+ * a larger iov array if needed, or supply a zero length to find
+ * out how much space is needed).
+ *
+ * If you want to verify the WRITE flag on each descriptor, pass a
+ * non-NULL "flags" pointer to an array of "uint16_t" of the same size
+ * as n_iov and we'll copy each vd_flags field after unwinding any
+ * indirects.
+ *
+ * If some descriptor(s) are invalid, this prints a diagnostic message
+ * and returns -1. If no descriptors are ready now it simply returns 0.
+ *
+ * You are assumed to have done a vq_ring_ready() if needed (note
+ * that vq_has_descs() does one).
+ */
+int
+vq_getchain(struct vqueue_info *vq, uint16_t *pidx,
+ struct iovec *iov, int n_iov, uint16_t *flags)
+{
+ int i;
+ u_int ndesc, n_indir;
+ u_int idx, next;
+ volatile struct virtio_desc *vdir, *vindir, *vp;
+ struct vmctx *ctx;
+ struct virtio_softc *vs;
+ const char *name;
+
+ vs = vq->vq_vs;
+ name = vs->vs_vc->vc_name;
+
+ /*
+ * Note: it's the responsibility of the guest not to
+ * update vq->vq_avail->va_idx until all of the descriptors
+ * the guest has written are valid (including all their
+ * vd_next fields and vd_flags).
+ *
+ * Compute (last_avail - va_idx) in integers mod 2**16. This is
+ * the number of descriptors the device has made available
+ * since the last time we updated vq->vq_last_avail.
+ *
+ * We just need to do the subtraction as an unsigned int,
+ * then trim off excess bits.
+ */
+ idx = vq->vq_last_avail;
+ ndesc = (uint16_t)((u_int)vq->vq_avail->va_idx - idx);
+ if (ndesc == 0)
+ return (0);
+ if (ndesc > vq->vq_qsize) {
+ /* XXX need better way to diagnose issues */
+ fprintf(stderr,
+ "%s: ndesc (%u) out of range, driver confused?\r\n",
+ name, (u_int)ndesc);
+ return (-1);
+ }
+
+ /*
+ * Now count/parse "involved" descriptors starting from
+ * the head of the chain.
+ *
+ * To prevent loops, we could be more complicated and
+ * check whether we're re-visiting a previously visited
+ * index, but we just abort if the count gets excessive.
+ */
+ ctx = vs->vs_di->pi_vmctx;
+ *pidx = next = vq->vq_avail->va_ring[idx & (vq->vq_qsize - 1)];
+ vq->vq_last_avail++;
+ for (i = 0; i < VQ_MAX_DESCRIPTORS; next = vdir->vd_next) {
+ if (next >= vq->vq_qsize) {
+ fprintf(stderr,
+ "%s: descriptor index %u out of range, "
+ "driver confused?\r\n",
+ name, next);
+ return (-1);
+ }
+ vdir = &vq->vq_desc[next];
+ if ((vdir->vd_flags & VRING_DESC_F_INDIRECT) == 0) {
+ _vq_record(i, vdir, ctx, iov, n_iov, flags);
+ i++;
+ } else if ((vs->vs_vc->vc_hv_caps &
+ VIRTIO_RING_F_INDIRECT_DESC) == 0) {
+ fprintf(stderr,
+ "%s: descriptor has forbidden INDIRECT flag, "
+ "driver confused?\r\n",
+ name);
+ return (-1);
+ } else {
+ n_indir = vdir->vd_len / 16;
+ if ((vdir->vd_len & 0xf) || n_indir == 0) {
+ fprintf(stderr,
+ "%s: invalid indir len 0x%x, "
+ "driver confused?\r\n",
+ name, (u_int)vdir->vd_len);
+ return (-1);
+ }
+ vindir = paddr_guest2host(ctx,
+ vdir->vd_addr, vdir->vd_len);
+ /*
+ * Indirects start at the 0th, then follow
+ * their own embedded "next"s until those run
+ * out. Each one's indirect flag must be off
+ * (we don't really have to check, could just
+ * ignore errors...).
+ */
+ next = 0;
+ for (;;) {
+ vp = &vindir[next];
+ if (vp->vd_flags & VRING_DESC_F_INDIRECT) {
+ fprintf(stderr,
+ "%s: indirect desc has INDIR flag,"
+ " driver confused?\r\n",
+ name);
+ return (-1);
+ }
+ _vq_record(i, vp, ctx, iov, n_iov, flags);
+ if (++i > VQ_MAX_DESCRIPTORS)
+ goto loopy;
+ if ((vp->vd_flags & VRING_DESC_F_NEXT) == 0)
+ break;
+ next = vp->vd_next;
+ if (next >= n_indir) {
+ fprintf(stderr,
+ "%s: invalid next %u > %u, "
+ "driver confused?\r\n",
+ name, (u_int)next, n_indir);
+ return (-1);
+ }
+ }
+ }
+ if ((vdir->vd_flags & VRING_DESC_F_NEXT) == 0)
+ return (i);
+ }
+loopy:
+ fprintf(stderr,
+ "%s: descriptor loop? count > %d - driver confused?\r\n",
+ name, i);
+ return (-1);
+}
+
+/*
+ * Return the currently-first request chain back to the available queue.
+ *
+ * (This chain is the one you handled when you called vq_getchain()
+ * and used its positive return value.)
+ */
+void
+vq_retchains(struct vqueue_info *vq, uint16_t n_chains)
+{
+
+ vq->vq_last_avail -= n_chains;
+}
+
+/*
+ * Return specified request chain to the guest, setting its I/O length
+ * to the provided value.
+ *
+ * (This chain is the one you handled when you called vq_getchain()
+ * and used its positive return value.)
+ */
+void
+vq_relchain(struct vqueue_info *vq, uint16_t idx, uint32_t iolen)
+{
+ uint16_t uidx, mask;
+ volatile struct vring_used *vuh;
+ volatile struct virtio_used *vue;
+
+ /*
+ * Notes:
+ * - mask is N-1 where N is a power of 2 so computes x % N
+ * - vuh points to the "used" data shared with guest
+ * - vue points to the "used" ring entry we want to update
+ * - head is the same value we compute in vq_iovecs().
+ *
+ * (I apologize for the two fields named vu_idx; the
+ * virtio spec calls the one that vue points to, "id"...)
+ */
+ mask = vq->vq_qsize - 1;
+ vuh = vq->vq_used;
+
+ uidx = vuh->vu_idx;
+ vue = &vuh->vu_ring[uidx++ & mask];
+ vue->vu_idx = idx;
+ vue->vu_tlen = iolen;
+ vuh->vu_idx = uidx;
+}
+
+/*
+ * Driver has finished processing "available" chains and calling
+ * vq_relchain on each one. If driver used all the available
+ * chains, used_all should be set.
+ *
+ * If the "used" index moved we may need to inform the guest, i.e.,
+ * deliver an interrupt. Even if the used index did NOT move we
+ * may need to deliver an interrupt, if the avail ring is empty and
+ * we are supposed to interrupt on empty.
+ *
+ * Note that used_all_avail is provided by the caller because it's
+ * a snapshot of the ring state when he decided to finish interrupt
+ * processing -- it's possible that descriptors became available after
+ * that point. (It's also typically a constant 1/True as well.)
+ */
+void
+vq_endchains(struct vqueue_info *vq, int used_all_avail)
+{
+ struct virtio_softc *vs;
+ uint16_t event_idx, new_idx, old_idx;
+ int intr;
+
+ /*
+ * Interrupt generation: if we're using EVENT_IDX,
+ * interrupt if we've crossed the event threshold.
+ * Otherwise interrupt is generated if we added "used" entries,
+ * but suppressed by VRING_AVAIL_F_NO_INTERRUPT.
+ *
+ * In any case, though, if NOTIFY_ON_EMPTY is set and the
+ * entire avail was processed, we need to interrupt always.
+ */
+ vs = vq->vq_vs;
+ old_idx = vq->vq_save_used;
+ vq->vq_save_used = new_idx = vq->vq_used->vu_idx;
+ if (used_all_avail &&
+ (vs->vs_negotiated_caps & VIRTIO_F_NOTIFY_ON_EMPTY))
+ intr = 1;
+ else if (vs->vs_negotiated_caps & VIRTIO_RING_F_EVENT_IDX) {
+ event_idx = VQ_USED_EVENT_IDX(vq);
+ /*
+ * This calculation is per docs and the kernel
+ * (see src/sys/dev/virtio/virtio_ring.h).
+ */
+ intr = (uint16_t)(new_idx - event_idx - 1) <
+ (uint16_t)(new_idx - old_idx);
+ } else {
+ intr = new_idx != old_idx &&
+ !(vq->vq_avail->va_flags & VRING_AVAIL_F_NO_INTERRUPT);
+ }
+ if (intr)
+ vq_interrupt(vs, vq);
+}
+
+/*
+ * Handle pci config space reads.
+ * If it's to the interrupt system, do that
+ * If it's part of the virtio standard stuff, do that.
+ * Otherwise dispatch to the actual driver.
+ */
+uint64_t
+vi_mmio_read(struct vmctx *ctx, int vcpu, struct mmio_devinst *di,
+ int baridx, uint64_t offset, int size)
+{
+ struct virtio_softc *vs = di->pi_arg;
+ struct virtio_consts *vc;
+ const char *name;
+ uint64_t sel;
+ uint32_t value;
+ int error;
+
+ if (vs->vs_mtx)
+ pthread_mutex_lock(vs->vs_mtx);
+
+ vc = vs->vs_vc;
+ name = vc->vc_name;
+ value = size == 1 ? 0xff : size == 2 ? 0xffff : 0xffffffff;
+
+ if (size != 1 && size != 2 && size != 4)
+ goto bad;
+
+ if (offset >= VIRTIO_MMIO_CONFIG) {
+ error = (*vc->vc_cfgread)(DEV_SOFTC(vs),
+ offset - VIRTIO_MMIO_CONFIG,
+ size,
+ &value);
+ if (error)
+ goto bad;
+
+ CFG_RW_DBG(offset, value);
+ goto done;
+ }
+
+ switch (offset) {
+ case VIRTIO_MMIO_MAGIC_VALUE:
+ value = mmio_get_cfgreg32(di, offset);
+ CFG_RW_DBG(VIRTIO_MMIO_MAGIC_VALUE, value);
+ break;
+ case VIRTIO_MMIO_VERSION:
+ value = mmio_get_cfgreg32(di, offset);
+ CFG_RW_DBG(VIRTIO_MMIO_VERSION, value);
+ break;
+ case VIRTIO_MMIO_DEVICE_ID:
+ value = mmio_get_cfgreg32(di, offset);
+ CFG_RW_DBG(VIRTIO_MMIO_DEVICE_ID, value);
+ break;
+ case VIRTIO_MMIO_VENDOR_ID:
+ value = mmio_get_cfgreg32(di, offset);
+ CFG_RW_DBG(VIRTIO_MMIO_VENDOR_ID, value);
+ break;
+ case VIRTIO_MMIO_INTERRUPT_STATUS:
+ value = mmio_get_cfgreg32(di, offset);
+ CFG_RW_DBG(VIRTIO_MMIO_INTERRUPT_STATUS, value);
+ break;
+ case VIRTIO_MMIO_STATUS:
+ value = mmio_get_cfgreg32(di, offset);
+ CFG_RW_DBG(VIRTIO_MMIO_STATUS, value);
+ break;
+ case VIRTIO_MMIO_HOST_FEATURES:
+ sel = mmio_get_cfgreg32(di, VIRTIO_MMIO_HOST_FEATURES_SEL);
+ value = (vc->vc_hv_caps >> (32 * sel)) & 0xffffffff;
+ CFG_RW_DBG(VIRTIO_MMIO_HOST_FEATURES, value);
+ break;
+ case VIRTIO_MMIO_QUEUE_NUM_MAX:
+ value = vs->vs_curq < vc->vc_nvq ?
+ vs->vs_queues[vs->vs_curq].vq_qsize : 0;
+ CFG_RW_DBG(VIRTIO_MMIO_QUEUE_NUM_MAX, value);
+ break;
+ case VIRTIO_MMIO_QUEUE_PFN:
+ value = vs->vs_curq < vc->vc_nvq ?
+ vs->vs_queues[vs->vs_curq].vq_pfn : 0;
+ CFG_RW_DBG(VIRTIO_MMIO_QUEUE_PFN, value);
+ break;
+ default:
+ CFG_RW_DBG(offset, value);
+ goto bad;
+ break;
+ }
+
+ goto done;
+
+bad:
+ fprintf(stderr, "%s: read from bad offset/size: %jd/%d\r\n",
+ name, (uintmax_t)offset, size);
+
+done:
+ if (vs->vs_mtx)
+ pthread_mutex_unlock(vs->vs_mtx);
+ return (value);
+}
+
+/*
+ * Handle pci config space writes.
+ * If it's to the MSI-X info, do that.
+ * If it's part of the virtio standard stuff, do that.
+ * Otherwise dispatch to the actual driver.
+ */
+void
+vi_mmio_write(struct vmctx *ctx, int vcpu, struct mmio_devinst *di,
+ int baridx, uint64_t offset, int size, uint64_t value)
+{
+ struct virtio_softc *vs = di->pi_arg;
+ struct vqueue_info *vq;
+ struct virtio_consts *vc;
+ const char *name;
+ int error;
+
+ if (vs->vs_mtx)
+ pthread_mutex_lock(vs->vs_mtx);
+
+ vc = vs->vs_vc;
+ name = vc->vc_name;
+
+ if (size != 1 && size != 2 && size != 4)
+ goto bad;
+
+ if (offset >= VIRTIO_MMIO_CONFIG) {
+ error = (*vc->vc_cfgwrite)(DEV_SOFTC(vs),
+ offset - VIRTIO_MMIO_CONFIG,
+ size, value);
+ if (error)
+ goto bad;
+
+ CFG_RW_DBG(offset, value);
+ goto done;
+ }
+
+ switch (offset) {
+ case VIRTIO_MMIO_HOST_FEATURES_SEL:
+ CFG_RW_DBG(VIRTIO_MMIO_HOST_FEATURES_SEL, value);
+ mmio_set_cfgreg32(di, offset, value);
+ break;
+ case VIRTIO_MMIO_GUEST_FEATURES_SEL:
+ CFG_RW_DBG(VIRTIO_MMIO_GUEST_FEATURES_SEL, value);
+ mmio_set_cfgreg32(di, offset, value);
+ break;
+ case VIRTIO_MMIO_INTERRUPT_ACK:
+ CFG_RW_DBG(VIRTIO_MMIO_INTERRUPT_ACK, value);
+ mmio_lintr_deassert(di);
+ mmio_set_cfgreg32(di, offset, value);
+ break;
+ case VIRTIO_MMIO_STATUS:
+ CFG_RW_DBG(VIRTIO_MMIO_STATUS, value);
+ mmio_set_cfgreg32(di, offset, value);
+ vs->vs_status = value;
+ if (value == 0)
+ (*vc->vc_reset)(DEV_SOFTC(vs));
+ break;
+ case VIRTIO_MMIO_QUEUE_NUM:
+ CFG_RW_DBG(VIRTIO_MMIO_QUEUE_NUM, value);
+ mmio_set_cfgreg32(di, offset, value);
+ vq = &vs->vs_queues[vs->vs_curq];
+ vq->vq_qsize = value;
+ break;
+ case VIRTIO_MMIO_GUEST_FEATURES:
+ CFG_RW_DBG(VIRTIO_MMIO_GUEST_FEATURES, value);
+ mmio_set_cfgreg32(di, offset, value);
+ vs->vs_negotiated_caps = value & vc->vc_hv_caps;
+ if (vc->vc_apply_features)
+ (*vc->vc_apply_features)(DEV_SOFTC(vs),
+ vs->vs_negotiated_caps);
+ break;
+ case VIRTIO_MMIO_GUEST_PAGE_SIZE:
+ mmio_set_cfgreg32(di, offset, value);
+ vs->vs_guest_page_size = value;
+ break;
+ case VIRTIO_MMIO_QUEUE_SEL:
+ CFG_RW_DBG(VIRTIO_MMIO_QUEUE_SEL, value);
+ mmio_set_cfgreg32(di, offset, value);
+ /*
+ * Note that the guest is allowed to select an
+ * invalid queue; we just need to return a QNUM
+ * of 0 while the bad queue is selected.
+ */
+ vs->vs_curq = value;
+ break;
+ case VIRTIO_MMIO_QUEUE_ALIGN:
+ CFG_RW_DBG(VIRTIO_MMIO_QUEUE_ALIGN, value);
+ mmio_set_cfgreg32(di, offset, value);
+ vs->vs_align = value;
+ break;
+ case VIRTIO_MMIO_QUEUE_PFN:
+ CFG_RW_DBG(VIRTIO_MMIO_QUEUE_PFN, value);
+ mmio_set_cfgreg32(di, offset, value);
+ if (vs->vs_curq >= vc->vc_nvq)
+ fprintf(stderr, "%s: curq %d >= max %d\r\n",
+ name, vs->vs_curq, vc->vc_nvq);
+ else
+ vi_vq_init(vs, value);
+ break;
+ case VIRTIO_MMIO_QUEUE_NOTIFY:
+ CFG_RW_DBG(VIRTIO_MMIO_QUEUE_NOTIFY, value);
+ if (value >= vc->vc_nvq) {
+ fprintf(stderr, "%s: queue %d notify out of range\r\n",
+ name, (int)value);
+ break;
+ }
+ mmio_set_cfgreg32(di, offset, value);
+ vq = &vs->vs_queues[value];
+ if (vq->vq_notify)
+ (*vq->vq_notify)(DEV_SOFTC(vs), vq);
+ else if (vc->vc_qnotify)
+ (*vc->vc_qnotify)(DEV_SOFTC(vs), vq);
+ else
+ fprintf(stderr,
+ "%s: qnotify queue %d: missing vq/vc notify\r\n",
+ name, (int)value);
+ break;
+ default:
+ CFG_RW_DBG(offset, value);
+ goto bad;
+ break;
+ }
+
+ goto done;
+
+bad:
+ fprintf(stderr, "%s: write to bad offset/size %jd/%d\r\n",
+ name, (uintmax_t)offset, size);
+done:
+ if (vs->vs_mtx)
+ pthread_mutex_unlock(vs->vs_mtx);
+}
+
+void
+vi_devemu_init(struct mmio_devinst *di, uint32_t type)
+{
+ uint32_t id;
+
+ switch (type) {
+ case VIRTIO_TYPE_NET:
+ id = VIRTIO_ID_NETWORK;
+ break;
+ case VIRTIO_TYPE_BLOCK:
+ id = VIRTIO_ID_BLOCK;
+ break;
+ case VIRTIO_TYPE_CONSOLE:
+ id = VIRTIO_ID_CONSOLE;
+ break;
+ case VIRTIO_TYPE_ENTROPY:
+ id = VIRTIO_ID_ENTROPY;
+ break;
+ default:
+ return;
+ }
+
+ mmio_set_cfgreg32(di, VIRTIO_MMIO_MAGIC_VALUE, VIRTIO_MMIO_MAGIC_NUM);
+ mmio_set_cfgreg32(di, VIRTIO_MMIO_VERSION, VIRTIO_MMIO_VERSION_NUM);
+ mmio_set_cfgreg32(di, VIRTIO_MMIO_DEVICE_ID, id);
+ mmio_set_cfgreg32(di, VIRTIO_MMIO_VENDOR_ID, VIRTIO_VENDOR);
+}
Index: usr.sbin/bhyve/mmio/mmio_virtio_block.c
===================================================================
--- /dev/null
+++ usr.sbin/bhyve/mmio/mmio_virtio_block.c
@@ -0,0 +1,424 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2011 NetApp, Inc.
+ * All rights reserved.
+ * Copyright (c) 2019 Joyent, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/linker_set.h>
+#include <sys/stat.h>
+#include <sys/uio.h>
+#include <sys/ioctl.h>
+#include <sys/disk.h>
+
+#include <errno.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <string.h>
+#include <strings.h>
+#include <unistd.h>
+#include <assert.h>
+#include <pthread.h>
+#include <md5.h>
+#include <dev/pci/pcireg.h>
+
+#include "bhyverun.h"
+#include "debug.h"
+
+#include "mmio_emul.h"
+#include "mmio_virtio.h"
+
+#include "block_if.h"
+
+#define VTBLK_RINGSZ 128
+
+_Static_assert(VTBLK_RINGSZ <= BLOCKIF_RING_MAX, "Each ring entry must be able to queue a request");
+
+#define VTBLK_S_OK 0
+#define VTBLK_S_IOERR 1
+#define VTBLK_S_UNSUPP 2
+
+#define VTBLK_BLK_ID_BYTES 20 + 1
+
+/* Capability bits */
+#define VTBLK_F_SEG_MAX (1 << 2) /* Maximum request segments */
+#define VTBLK_F_BLK_SIZE (1 << 6) /* cfg block size valid */
+#define VTBLK_F_FLUSH (1 << 9) /* Cache flush support */
+#define VTBLK_F_TOPOLOGY (1 << 10) /* Optimal I/O alignment */
+
+/*
+ * Host capabilities
+ */
+#define VTBLK_S_HOSTCAPS \
+ ( VTBLK_F_SEG_MAX | \
+ VTBLK_F_BLK_SIZE | \
+ VTBLK_F_FLUSH | \
+ VTBLK_F_TOPOLOGY | \
+ VIRTIO_RING_F_INDIRECT_DESC ) /* indirect descriptors */
+
+/*
+ * Config space "registers"
+ */
+struct vtblk_config {
+ uint64_t vbc_capacity;
+ uint32_t vbc_size_max;
+ uint32_t vbc_seg_max;
+ struct {
+ uint16_t cylinders;
+ uint8_t heads;
+ uint8_t sectors;
+ } vbc_geometry;
+ uint32_t vbc_blk_size;
+ struct {
+ uint8_t physical_block_exp;
+ uint8_t alignment_offset;
+ uint16_t min_io_size;
+ uint32_t opt_io_size;
+ } vbc_topology;
+ uint8_t vbc_writeback;
+} __packed;
+
+/*
+ * Fixed-size block header
+ */
+struct virtio_blk_hdr {
+#define VBH_OP_READ 0
+#define VBH_OP_WRITE 1
+#define VBH_OP_FLUSH 4
+#define VBH_OP_FLUSH_OUT 5
+#define VBH_OP_IDENT 8
+#define VBH_FLAG_BARRIER 0x80000000 /* OR'ed into vbh_type */
+ uint32_t vbh_type;
+ uint32_t vbh_ioprio;
+ uint64_t vbh_sector;
+} __packed;
+
+/*
+ * Debug printf
+ */
+static int pci_vtblk_debug;
+#define DPRINTF(params) if (pci_vtblk_debug) PRINTLN params
+#define WPRINTF(params) PRINTLN params
+
+struct pci_vtblk_ioreq {
+ struct blockif_req io_req;
+ struct pci_vtblk_softc *io_sc;
+ uint8_t *io_status;
+ uint16_t io_idx;
+};
+
+/*
+ * Per-device softc
+ */
+struct pci_vtblk_softc {
+ struct virtio_softc vbsc_vs;
+ pthread_mutex_t vsc_mtx;
+ struct vqueue_info vbsc_vq;
+ struct vtblk_config vbsc_cfg;
+ struct blockif_ctxt *bc;
+ char vbsc_ident[VTBLK_BLK_ID_BYTES];
+ struct pci_vtblk_ioreq vbsc_ios[VTBLK_RINGSZ];
+};
+
+static void pci_vtblk_reset(void *);
+static void pci_vtblk_notify(void *, struct vqueue_info *);
+static int pci_vtblk_cfgread(void *, int, int, uint32_t *);
+static int pci_vtblk_cfgwrite(void *, int, int, uint32_t);
+
+static struct virtio_consts vtblk_vi_consts = {
+ "vtblk", /* our name */
+ 1, /* we support 1 virtqueue */
+ sizeof(struct vtblk_config), /* config reg size */
+ pci_vtblk_reset, /* reset */
+ pci_vtblk_notify, /* device-wide qnotify */
+ pci_vtblk_cfgread, /* read PCI config */
+ pci_vtblk_cfgwrite, /* write PCI config */
+ NULL, /* apply negotiated features */
+ VTBLK_S_HOSTCAPS, /* our capabilities */
+};
+
+static void
+pci_vtblk_reset(void *vsc)
+{
+ struct pci_vtblk_softc *sc = vsc;
+
+ DPRINTF(("vtblk: device reset requested !"));
+ vi_reset_dev(&sc->vbsc_vs);
+}
+
+static void
+pci_vtblk_done(struct blockif_req *br, int err)
+{
+ struct pci_vtblk_ioreq *io = br->br_param;
+ struct pci_vtblk_softc *sc = io->io_sc;
+
+ /* convert errno into a virtio block error return */
+ if (err == EOPNOTSUPP || err == ENOSYS)
+ *io->io_status = VTBLK_S_UNSUPP;
+ else if (err != 0)
+ *io->io_status = VTBLK_S_IOERR;
+ else
+ *io->io_status = VTBLK_S_OK;
+
+ /*
+ * Return the descriptor back to the host.
+ * We wrote 1 byte (our status) to host.
+ */
+ pthread_mutex_lock(&sc->vsc_mtx);
+ vq_relchain(&sc->vbsc_vq, io->io_idx, 1);
+ vq_endchains(&sc->vbsc_vq, 0);
+ pthread_mutex_unlock(&sc->vsc_mtx);
+}
+
+static void
+pci_vtblk_proc(struct pci_vtblk_softc *sc, struct vqueue_info *vq)
+{
+ struct virtio_blk_hdr *vbh;
+ struct pci_vtblk_ioreq *io;
+ int i, n;
+ int err;
+ ssize_t iolen;
+ int writeop, type;
+ struct iovec iov[BLOCKIF_IOV_MAX + 2];
+ uint16_t idx, flags[BLOCKIF_IOV_MAX + 2];
+
+ n = vq_getchain(vq, &idx, iov, BLOCKIF_IOV_MAX + 2, flags);
+
+ /*
+ * The first descriptor will be the read-only fixed header,
+ * and the last is for status (hence +2 above and below).
+ * The remaining iov's are the actual data I/O vectors.
+ *
+ * XXX - note - this fails on crash dump, which does a
+ * VIRTIO_BLK_T_FLUSH with a zero transfer length
+ */
+ assert(n >= 2 && n <= BLOCKIF_IOV_MAX + 2);
+
+ io = &sc->vbsc_ios[idx];
+ assert((flags[0] & VRING_DESC_F_WRITE) == 0);
+ assert(iov[0].iov_len == sizeof(struct virtio_blk_hdr));
+ vbh = iov[0].iov_base;
+ memcpy(&io->io_req.br_iov, &iov[1], sizeof(struct iovec) * (n - 2));
+ io->io_req.br_iovcnt = n - 2;
+ io->io_req.br_offset = vbh->vbh_sector * DEV_BSIZE;
+ io->io_status = iov[--n].iov_base;
+ assert(iov[n].iov_len == 1);
+ assert(flags[n] & VRING_DESC_F_WRITE);
+
+ /*
+ * XXX
+ * The guest should not be setting the BARRIER flag because
+ * we don't advertise the capability.
+ */
+ type = vbh->vbh_type & ~VBH_FLAG_BARRIER;
+ writeop = (type == VBH_OP_WRITE);
+
+ iolen = 0;
+ for (i = 1; i < n; i++) {
+ /*
+ * - write op implies read-only descriptor,
+ * - read/ident op implies write-only descriptor,
+ * therefore test the inverse of the descriptor bit
+ * to the op.
+ */
+ assert(((flags[i] & VRING_DESC_F_WRITE) == 0) == writeop);
+ iolen += iov[i].iov_len;
+ }
+ io->io_req.br_resid = iolen;
+
+ DPRINTF(("virtio-block: %s op, %zd bytes, %d segs, offset %lld",
+ writeop ? "write" : "read/ident", iolen, i - 1,
+ (long long) io->io_req.br_offset));
+
+ switch (type) {
+ case VBH_OP_READ:
+ err = blockif_read(sc->bc, &io->io_req);
+ break;
+ case VBH_OP_WRITE:
+ err = blockif_write(sc->bc, &io->io_req);
+ break;
+ case VBH_OP_FLUSH:
+ case VBH_OP_FLUSH_OUT:
+ err = blockif_flush(sc->bc, &io->io_req);
+ break;
+ case VBH_OP_IDENT:
+ /* Assume a single buffer */
+ /* S/n equal to buffer is not zero-terminated. */
+ memset(iov[1].iov_base, 0, iov[1].iov_len);
+ strncpy(iov[1].iov_base, sc->vbsc_ident,
+ MIN(iov[1].iov_len, sizeof(sc->vbsc_ident)));
+ pci_vtblk_done(&io->io_req, 0);
+ return;
+ default:
+ pci_vtblk_done(&io->io_req, EOPNOTSUPP);
+ return;
+ }
+ assert(err == 0);
+}
+
+static void
+pci_vtblk_notify(void *vsc, struct vqueue_info *vq)
+{
+ struct pci_vtblk_softc *sc = vsc;
+
+ while (vq_has_descs(vq))
+ pci_vtblk_proc(sc, vq);
+}
+
+static int
+pci_vtblk_init(struct vmctx *ctx, struct mmio_devinst *pi, char *opts)
+{
+ char bident[sizeof("XX:X:X")];
+ struct blockif_ctxt *bctxt;
+ MD5_CTX mdctx;
+ u_char digest[16];
+ struct pci_vtblk_softc *sc;
+ off_t size;
+ int i, sectsz, sts, sto;
+
+ if (opts == NULL) {
+ WPRINTF(("virtio-block: backing device required"));
+ return (1);
+ }
+
+ /*
+ * The supplied backing file has to exist
+ */
+ snprintf(bident, sizeof(bident), "%d:%d", pi->pi_slot, pi->di_func);
+ bctxt = blockif_open(opts, bident);
+ if (bctxt == NULL) {
+ perror("Could not open backing file");
+ return (1);
+ }
+
+ size = blockif_size(bctxt);
+ sectsz = blockif_sectsz(bctxt);
+ blockif_psectsz(bctxt, &sts, &sto);
+
+ sc = calloc(1, sizeof(struct pci_vtblk_softc));
+ sc->bc = bctxt;
+ for (i = 0; i < VTBLK_RINGSZ; i++) {
+ struct pci_vtblk_ioreq *io = &sc->vbsc_ios[i];
+ io->io_req.br_callback = pci_vtblk_done;
+ io->io_req.br_param = io;
+ io->io_sc = sc;
+ io->io_idx = i;
+ }
+
+ pthread_mutex_init(&sc->vsc_mtx, NULL);
+
+ /* init virtio softc and virtqueues */
+ vi_softc_linkup(&sc->vbsc_vs, &vtblk_vi_consts, sc, pi, &sc->vbsc_vq);
+ sc->vbsc_vs.vs_mtx = &sc->vsc_mtx;
+
+ sc->vbsc_vq.vq_qsize = VTBLK_RINGSZ;
+ /* sc->vbsc_vq.vq_notify = we have no per-queue notify */
+
+ /*
+ * Create an identifier for the backing file. Use parts of the
+ * md5 sum of the filename
+ */
+ MD5Init(&mdctx);
+ MD5Update(&mdctx, opts, strlen(opts));
+ MD5Final(digest, &mdctx);
+ snprintf(sc->vbsc_ident, VTBLK_BLK_ID_BYTES,
+ "BHYVE-%02X%02X-%02X%02X-%02X%02X",
+ digest[0], digest[1], digest[2], digest[3], digest[4], digest[5]);
+
+ /* setup virtio block config space */
+ sc->vbsc_cfg.vbc_capacity = size / DEV_BSIZE; /* 512-byte units */
+ sc->vbsc_cfg.vbc_size_max = 0; /* not negotiated */
+
+ /*
+ * If Linux is presented with a seg_max greater than the virtio queue
+ * size, it can stumble into situations where it violates its own
+ * invariants and panics. For safety, we keep seg_max clamped, paying
+ * heed to the two extra descriptors needed for the header and status
+ * of a request.
+ */
+ sc->vbsc_cfg.vbc_seg_max = MIN(VTBLK_RINGSZ - 2, BLOCKIF_IOV_MAX);
+ sc->vbsc_cfg.vbc_geometry.cylinders = 0; /* no geometry */
+ sc->vbsc_cfg.vbc_geometry.heads = 0;
+ sc->vbsc_cfg.vbc_geometry.sectors = 0;
+ sc->vbsc_cfg.vbc_blk_size = sectsz;
+ sc->vbsc_cfg.vbc_topology.physical_block_exp =
+ (sts > sectsz) ? (ffsll(sts / sectsz) - 1) : 0;
+ sc->vbsc_cfg.vbc_topology.alignment_offset =
+ (sto != 0) ? ((sts - sto) / sectsz) : 0;
+ sc->vbsc_cfg.vbc_topology.min_io_size = 0;
+ sc->vbsc_cfg.vbc_topology.opt_io_size = 0;
+ sc->vbsc_cfg.vbc_writeback = 0;
+
+ /*
+ * Should we move some of this into virtio.c? Could
+ * have the device, class, and subdev_0 as fields in
+ * the virtio constants structure.
+ */
+ vi_devemu_init(pi, VIRTIO_TYPE_BLOCK);
+
+ if (vi_intr_init(&sc->vbsc_vs, 1, fbsdrun_virtio_msix())) {
+ blockif_close(sc->bc);
+ free(sc);
+ return (1);
+ }
+ vi_set_io_res(&sc->vbsc_vs, 0);
+ return (0);
+}
+
+static int
+pci_vtblk_cfgwrite(void *vsc, int offset, int size, uint32_t value)
+{
+
+ DPRINTF(("vtblk: write to readonly reg %d", offset));
+ return (1);
+}
+
+static int
+pci_vtblk_cfgread(void *vsc, int offset, int size, uint32_t *retval)
+{
+ struct pci_vtblk_softc *sc = vsc;
+ void *ptr;
+
+ /* our caller has already verified offset and size */
+ ptr = (uint8_t *)&sc->vbsc_cfg + offset;
+ memcpy(retval, ptr, size);
+ return (0);
+}
+
+struct mmio_devemu pci_de_vblk = {
+ .de_emu = "virtio-blk",
+ .de_init = pci_vtblk_init,
+ .de_write = vi_mmio_write,
+ .de_read = vi_mmio_read
+};
+MMIO_EMUL_SET(pci_de_vblk);
Index: usr.sbin/bhyve/mmio/mmio_virtio_console.c
===================================================================
--- /dev/null
+++ usr.sbin/bhyve/mmio/mmio_virtio_console.c
@@ -0,0 +1,680 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2016 iXsystems Inc.
+ * All rights reserved.
+ *
+ * This software was developed by Jakub Klama <jceel@FreeBSD.org>
+ * under sponsorship from iXsystems Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer
+ * in this position and unchanged.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#ifndef WITHOUT_CAPSICUM
+#include <sys/capsicum.h>
+#endif
+#include <sys/linker_set.h>
+#include <sys/uio.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+#include <dev/pci/pcireg.h>
+
+#ifndef WITHOUT_CAPSICUM
+#include <capsicum_helpers.h>
+#endif
+#include <err.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <string.h>
+#include <unistd.h>
+#include <assert.h>
+#include <pthread.h>
+#include <libgen.h>
+#include <sysexits.h>
+
+#include "bhyverun.h"
+#include "debug.h"
+
+#include "mmio_emul.h"
+#include "mmio_virtio.h"
+
+#include "mevent.h"
+#include "sockstream.h"
+
+#define VTCON_RINGSZ 64
+#define VTCON_MAXPORTS 16
+#define VTCON_MAXQ (VTCON_MAXPORTS * 2 + 2)
+
+#define VTCON_DEVICE_READY 0
+#define VTCON_DEVICE_ADD 1
+#define VTCON_DEVICE_REMOVE 2
+#define VTCON_PORT_READY 3
+#define VTCON_CONSOLE_PORT 4
+#define VTCON_CONSOLE_RESIZE 5
+#define VTCON_PORT_OPEN 6
+#define VTCON_PORT_NAME 7
+
+#define VTCON_F_SIZE 0
+#define VTCON_F_MULTIPORT 1
+#define VTCON_F_EMERG_WRITE 2
+#define VTCON_S_HOSTCAPS \
+ (VTCON_F_SIZE | VTCON_F_MULTIPORT | VTCON_F_EMERG_WRITE)
+
+static int pci_vtcon_debug;
+#define DPRINTF(params) if (pci_vtcon_debug) PRINTLN params
+#define WPRINTF(params) PRINTLN params
+
+struct pci_vtcon_softc;
+struct pci_vtcon_port;
+struct pci_vtcon_config;
+typedef void (pci_vtcon_cb_t)(struct pci_vtcon_port *, void *, struct iovec *,
+ int);
+
+struct pci_vtcon_port {
+ struct pci_vtcon_softc * vsp_sc;
+ int vsp_id;
+ const char * vsp_name;
+ bool vsp_enabled;
+ bool vsp_console;
+ bool vsp_rx_ready;
+ bool vsp_open;
+ int vsp_rxq;
+ int vsp_txq;
+ void * vsp_arg;
+ pci_vtcon_cb_t * vsp_cb;
+};
+
+struct pci_vtcon_sock
+{
+ struct pci_vtcon_port * vss_port;
+ const char * vss_path;
+ struct mevent * vss_server_evp;
+ struct mevent * vss_conn_evp;
+ int vss_server_fd;
+ int vss_conn_fd;
+ bool vss_open;
+};
+
+struct pci_vtcon_softc {
+ struct virtio_softc vsc_vs;
+ struct vqueue_info vsc_queues[VTCON_MAXQ];
+ pthread_mutex_t vsc_mtx;
+ uint64_t vsc_cfg;
+ uint64_t vsc_features;
+ char * vsc_rootdir;
+ int vsc_kq;
+ int vsc_nports;
+ bool vsc_ready;
+ struct pci_vtcon_port vsc_control_port;
+ struct pci_vtcon_port vsc_ports[VTCON_MAXPORTS];
+ struct pci_vtcon_config *vsc_config;
+};
+
+struct pci_vtcon_config {
+ uint16_t cols;
+ uint16_t rows;
+ uint32_t max_nr_ports;
+ uint32_t emerg_wr;
+} __attribute__((packed));
+
+struct pci_vtcon_control {
+ uint32_t id;
+ uint16_t event;
+ uint16_t value;
+} __attribute__((packed));
+
+struct pci_vtcon_console_resize {
+ uint16_t cols;
+ uint16_t rows;
+} __attribute__((packed));
+
+static void pci_vtcon_reset(void *);
+static void pci_vtcon_notify_rx(void *, struct vqueue_info *);
+static void pci_vtcon_notify_tx(void *, struct vqueue_info *);
+static int pci_vtcon_cfgread(void *, int, int, uint32_t *);
+static int pci_vtcon_cfgwrite(void *, int, int, uint32_t);
+static void pci_vtcon_neg_features(void *, uint64_t);
+static void pci_vtcon_sock_accept(int, enum ev_type, void *);
+static void pci_vtcon_sock_rx(int, enum ev_type, void *);
+static void pci_vtcon_sock_tx(struct pci_vtcon_port *, void *, struct iovec *,
+ int);
+static void pci_vtcon_control_send(struct pci_vtcon_softc *,
+ struct pci_vtcon_control *, const void *, size_t);
+static void pci_vtcon_announce_port(struct pci_vtcon_port *);
+static void pci_vtcon_open_port(struct pci_vtcon_port *, bool);
+
+static struct virtio_consts vtcon_vi_consts = {
+ "vtcon", /* our name */
+ VTCON_MAXQ, /* we support VTCON_MAXQ virtqueues */
+ sizeof(struct pci_vtcon_config), /* config reg size */
+ pci_vtcon_reset, /* reset */
+ NULL, /* device-wide qnotify */
+ pci_vtcon_cfgread, /* read virtio config */
+ pci_vtcon_cfgwrite, /* write virtio config */
+ pci_vtcon_neg_features, /* apply negotiated features */
+ VTCON_S_HOSTCAPS, /* our capabilities */
+};
+
+
+static void
+pci_vtcon_reset(void *vsc)
+{
+ struct pci_vtcon_softc *sc;
+
+ sc = vsc;
+
+ DPRINTF(("vtcon: device reset requested!"));
+ vi_reset_dev(&sc->vsc_vs);
+}
+
+static void
+pci_vtcon_neg_features(void *vsc, uint64_t negotiated_features)
+{
+ struct pci_vtcon_softc *sc = vsc;
+
+ sc->vsc_features = negotiated_features;
+}
+
+static int
+pci_vtcon_cfgread(void *vsc, int offset, int size, uint32_t *retval)
+{
+ struct pci_vtcon_softc *sc = vsc;
+ void *ptr;
+
+ ptr = (uint8_t *)sc->vsc_config + offset;
+ memcpy(retval, ptr, size);
+ return (0);
+}
+
+static int
+pci_vtcon_cfgwrite(void *vsc, int offset, int size, uint32_t val)
+{
+
+ return (0);
+}
+
+static inline struct pci_vtcon_port *
+pci_vtcon_vq_to_port(struct pci_vtcon_softc *sc, struct vqueue_info *vq)
+{
+ uint16_t num = vq->vq_num;
+
+ if (num == 0 || num == 1)
+ return (&sc->vsc_ports[0]);
+
+ if (num == 2 || num == 3)
+ return (&sc->vsc_control_port);
+
+ return (&sc->vsc_ports[(num / 2) - 1]);
+}
+
+static inline struct vqueue_info *
+pci_vtcon_port_to_vq(struct pci_vtcon_port *port, bool tx_queue)
+{
+ int qnum;
+
+ qnum = tx_queue ? port->vsp_txq : port->vsp_rxq;
+ return (&port->vsp_sc->vsc_queues[qnum]);
+}
+
+static struct pci_vtcon_port *
+pci_vtcon_port_add(struct pci_vtcon_softc *sc, const char *name,
+ pci_vtcon_cb_t *cb, void *arg)
+{
+ struct pci_vtcon_port *port;
+
+ if (sc->vsc_nports == VTCON_MAXPORTS) {
+ errno = EBUSY;
+ return (NULL);
+ }
+
+ port = &sc->vsc_ports[sc->vsc_nports++];
+ port->vsp_id = sc->vsc_nports - 1;
+ port->vsp_sc = sc;
+ port->vsp_name = name;
+ port->vsp_cb = cb;
+ port->vsp_arg = arg;
+
+ if (port->vsp_id == 0) {
+ /* port0 */
+ port->vsp_txq = 0;
+ port->vsp_rxq = 1;
+ } else {
+ port->vsp_txq = sc->vsc_nports * 2;
+ port->vsp_rxq = port->vsp_txq + 1;
+ }
+
+ port->vsp_enabled = true;
+ return (port);
+}
+
+static int
+pci_vtcon_sock_add(struct pci_vtcon_softc *sc, const char *name,
+ const char *path)
+{
+ struct pci_vtcon_sock *sock;
+ struct sockaddr_un sun;
+ char *pathcopy;
+ int s = -1, fd = -1, error = 0;
+#ifndef WITHOUT_CAPSICUM
+ cap_rights_t rights;
+#endif
+
+ sock = calloc(1, sizeof(struct pci_vtcon_sock));
+ if (sock == NULL) {
+ error = -1;
+ goto out;
+ }
+
+ s = socket(AF_UNIX, SOCK_STREAM, 0);
+ if (s < 0) {
+ error = -1;
+ goto out;
+ }
+
+ pathcopy = strdup(path);
+ if (pathcopy == NULL) {
+ error = -1;
+ goto out;
+ }
+
+ fd = open(dirname(pathcopy), O_RDONLY | O_DIRECTORY);
+ if (fd < 0) {
+ free(pathcopy);
+ error = -1;
+ goto out;
+ }
+
+ sun.sun_family = AF_UNIX;
+ sun.sun_len = sizeof(struct sockaddr_un);
+ strcpy(pathcopy, path);
+ strlcpy(sun.sun_path, basename(pathcopy), sizeof(sun.sun_path));
+ free(pathcopy);
+
+ if (bindat(fd, s, (struct sockaddr *)&sun, sun.sun_len) < 0) {
+ error = -1;
+ goto out;
+ }
+
+ if (fcntl(s, F_SETFL, O_NONBLOCK) < 0) {
+ error = -1;
+ goto out;
+ }
+
+ if (listen(s, 1) < 0) {
+ error = -1;
+ goto out;
+ }
+
+#ifndef WITHOUT_CAPSICUM
+ cap_rights_init(&rights, CAP_ACCEPT, CAP_EVENT, CAP_READ, CAP_WRITE);
+ if (caph_rights_limit(s, &rights) == -1)
+ errx(EX_OSERR, "Unable to apply rights for sandbox");
+#endif
+
+ sock->vss_port = pci_vtcon_port_add(sc, name, pci_vtcon_sock_tx, sock);
+ if (sock->vss_port == NULL) {
+ error = -1;
+ goto out;
+ }
+
+ sock->vss_open = false;
+ sock->vss_conn_fd = -1;
+ sock->vss_server_fd = s;
+ sock->vss_server_evp = mevent_add(s, EVF_READ, pci_vtcon_sock_accept,
+ sock);
+
+ if (sock->vss_server_evp == NULL) {
+ error = -1;
+ goto out;
+ }
+
+out:
+ if (fd != -1)
+ close(fd);
+
+ if (error != 0) {
+ if (s != -1)
+ close(s);
+ free(sock);
+ }
+
+ return (error);
+}
+
+static void
+pci_vtcon_sock_accept(int fd __unused, enum ev_type t __unused, void *arg)
+{
+ struct pci_vtcon_sock *sock = (struct pci_vtcon_sock *)arg;
+ int s;
+
+ s = accept(sock->vss_server_fd, NULL, NULL);
+ if (s < 0)
+ return;
+
+ if (sock->vss_open) {
+ close(s);
+ return;
+ }
+
+ sock->vss_open = true;
+ sock->vss_conn_fd = s;
+ sock->vss_conn_evp = mevent_add(s, EVF_READ, pci_vtcon_sock_rx, sock);
+
+ pci_vtcon_open_port(sock->vss_port, true);
+}
+
+static void
+pci_vtcon_sock_rx(int fd __unused, enum ev_type t __unused, void *arg)
+{
+ struct pci_vtcon_port *port;
+ struct pci_vtcon_sock *sock = (struct pci_vtcon_sock *)arg;
+ struct vqueue_info *vq;
+ struct iovec iov;
+ static char dummybuf[2048];
+ int len, n;
+ uint16_t idx;
+
+ port = sock->vss_port;
+ vq = pci_vtcon_port_to_vq(port, true);
+
+ if (!sock->vss_open || !port->vsp_rx_ready) {
+ len = read(sock->vss_conn_fd, dummybuf, sizeof(dummybuf));
+ if (len == 0)
+ goto close;
+
+ return;
+ }
+
+ if (!vq_has_descs(vq)) {
+ len = read(sock->vss_conn_fd, dummybuf, sizeof(dummybuf));
+ vq_endchains(vq, 1);
+ if (len == 0)
+ goto close;
+
+ return;
+ }
+
+ do {
+ n = vq_getchain(vq, &idx, &iov, 1, NULL);
+ len = readv(sock->vss_conn_fd, &iov, n);
+
+ if (len == 0 || (len < 0 && errno == EWOULDBLOCK)) {
+ vq_retchains(vq, 1);
+ vq_endchains(vq, 0);
+ if (len == 0)
+ goto close;
+
+ return;
+ }
+
+ vq_relchain(vq, idx, len);
+ } while (vq_has_descs(vq));
+
+ vq_endchains(vq, 1);
+
+close:
+ mevent_delete_close(sock->vss_conn_evp);
+ sock->vss_conn_fd = -1;
+ sock->vss_open = false;
+}
+
+static void
+pci_vtcon_sock_tx(struct pci_vtcon_port *port, void *arg, struct iovec *iov,
+ int niov)
+{
+ struct pci_vtcon_sock *sock;
+ int i, ret;
+
+ sock = (struct pci_vtcon_sock *)arg;
+
+ if (sock->vss_conn_fd == -1)
+ return;
+
+ for (i = 0; i < niov; i++) {
+ ret = stream_write(sock->vss_conn_fd, iov[i].iov_base,
+ iov[i].iov_len);
+ if (ret <= 0)
+ break;
+ }
+
+ if (ret <= 0) {
+ mevent_delete_close(sock->vss_conn_evp);
+ sock->vss_conn_fd = -1;
+ sock->vss_open = false;
+ }
+}
+
+static void
+pci_vtcon_control_tx(struct pci_vtcon_port *port, void *arg, struct iovec *iov,
+ int niov)
+{
+ struct pci_vtcon_softc *sc;
+ struct pci_vtcon_port *tmp;
+ struct pci_vtcon_control resp, *ctrl;
+ int i;
+
+ assert(niov == 1);
+
+ sc = port->vsp_sc;
+ ctrl = (struct pci_vtcon_control *)iov->iov_base;
+
+ switch (ctrl->event) {
+ case VTCON_DEVICE_READY:
+ sc->vsc_ready = true;
+ /* set port ready events for registered ports */
+ for (i = 0; i < VTCON_MAXPORTS; i++) {
+ tmp = &sc->vsc_ports[i];
+ if (tmp->vsp_enabled)
+ pci_vtcon_announce_port(tmp);
+
+ if (tmp->vsp_open)
+ pci_vtcon_open_port(tmp, true);
+ }
+ break;
+
+ case VTCON_PORT_READY:
+ if (ctrl->id >= sc->vsc_nports) {
+ WPRINTF(("VTCON_PORT_READY event for unknown port %d",
+ ctrl->id));
+ return;
+ }
+
+ tmp = &sc->vsc_ports[ctrl->id];
+ if (tmp->vsp_console) {
+ resp.event = VTCON_CONSOLE_PORT;
+ resp.id = ctrl->id;
+ resp.value = 1;
+ pci_vtcon_control_send(sc, &resp, NULL, 0);
+ }
+ break;
+ }
+}
+
+static void
+pci_vtcon_announce_port(struct pci_vtcon_port *port)
+{
+ struct pci_vtcon_control event;
+
+ event.id = port->vsp_id;
+ event.event = VTCON_DEVICE_ADD;
+ event.value = 1;
+ pci_vtcon_control_send(port->vsp_sc, &event, NULL, 0);
+
+ event.event = VTCON_PORT_NAME;
+ pci_vtcon_control_send(port->vsp_sc, &event, port->vsp_name,
+ strlen(port->vsp_name));
+}
+
+static void
+pci_vtcon_open_port(struct pci_vtcon_port *port, bool open)
+{
+ struct pci_vtcon_control event;
+
+ if (!port->vsp_sc->vsc_ready) {
+ port->vsp_open = true;
+ return;
+ }
+
+ event.id = port->vsp_id;
+ event.event = VTCON_PORT_OPEN;
+ event.value = (int)open;
+ pci_vtcon_control_send(port->vsp_sc, &event, NULL, 0);
+}
+
+static void
+pci_vtcon_control_send(struct pci_vtcon_softc *sc,
+ struct pci_vtcon_control *ctrl, const void *payload, size_t len)
+{
+ struct vqueue_info *vq;
+ struct iovec iov;
+ uint16_t idx;
+ int n;
+
+ vq = pci_vtcon_port_to_vq(&sc->vsc_control_port, true);
+
+ if (!vq_has_descs(vq))
+ return;
+
+ n = vq_getchain(vq, &idx, &iov, 1, NULL);
+
+ assert(n == 1);
+
+ memcpy(iov.iov_base, ctrl, sizeof(struct pci_vtcon_control));
+ if (payload != NULL && len > 0)
+ memcpy(iov.iov_base + sizeof(struct pci_vtcon_control),
+ payload, len);
+
+ vq_relchain(vq, idx, sizeof(struct pci_vtcon_control) + len);
+ vq_endchains(vq, 1);
+}
+
+static void
+pci_vtcon_notify_tx(void *vsc, struct vqueue_info *vq)
+{
+ struct pci_vtcon_softc *sc;
+ struct pci_vtcon_port *port;
+ struct iovec iov[1];
+ uint16_t idx, n;
+ uint16_t flags[8];
+
+ sc = vsc;
+ port = pci_vtcon_vq_to_port(sc, vq);
+
+ while (vq_has_descs(vq)) {
+ n = vq_getchain(vq, &idx, iov, 1, flags);
+ assert(n >= 1);
+ if (port != NULL)
+ port->vsp_cb(port, port->vsp_arg, iov, 1);
+
+ /*
+ * Release this chain and handle more
+ */
+ vq_relchain(vq, idx, 0);
+ }
+ vq_endchains(vq, 1); /* Generate interrupt if appropriate. */
+}
+
+static void
+pci_vtcon_notify_rx(void *vsc, struct vqueue_info *vq)
+{
+ struct pci_vtcon_softc *sc;
+ struct pci_vtcon_port *port;
+
+ sc = vsc;
+ port = pci_vtcon_vq_to_port(sc, vq);
+
+ if (!port->vsp_rx_ready) {
+ port->vsp_rx_ready = 1;
+ vq_kick_disable(vq);
+ }
+}
+
+static int
+pci_vtcon_init(struct vmctx *ctx, struct mmio_devinst *pi, char *opts)
+{
+ struct pci_vtcon_softc *sc;
+ char *portname = NULL;
+ char *portpath = NULL;
+ char *opt;
+ int i;
+
+ sc = calloc(1, sizeof(struct pci_vtcon_softc));
+ sc->vsc_config = calloc(1, sizeof(struct pci_vtcon_config));
+ sc->vsc_config->max_nr_ports = VTCON_MAXPORTS;
+ sc->vsc_config->cols = 80;
+ sc->vsc_config->rows = 25;
+
+ vi_softc_linkup(&sc->vsc_vs, &vtcon_vi_consts, sc, pi, sc->vsc_queues);
+ sc->vsc_vs.vs_mtx = &sc->vsc_mtx;
+
+ for (i = 0; i < VTCON_MAXQ; i++) {
+ sc->vsc_queues[i].vq_qsize = VTCON_RINGSZ;
+ sc->vsc_queues[i].vq_notify = i % 2 == 0
+ ? pci_vtcon_notify_rx
+ : pci_vtcon_notify_tx;
+ }
+
+ /* initialize config space */
+ vi_devemu_init(pi, VIRTIO_TYPE_CONSOLE);
+
+ if (vi_intr_init(&sc->vsc_vs, 1, fbsdrun_virtio_msix()))
+ return (1);
+ vi_set_io_res(&sc->vsc_vs, 0);
+
+ /* create control port */
+ sc->vsc_control_port.vsp_sc = sc;
+ sc->vsc_control_port.vsp_txq = 2;
+ sc->vsc_control_port.vsp_rxq = 3;
+ sc->vsc_control_port.vsp_cb = pci_vtcon_control_tx;
+ sc->vsc_control_port.vsp_enabled = true;
+
+ while ((opt = strsep(&opts, ",")) != NULL) {
+ portname = strsep(&opt, "=");
+ portpath = opt;
+
+ /* create port */
+ if (pci_vtcon_sock_add(sc, portname, portpath) < 0) {
+ EPRINTLN("cannot create port %s: %s",
+ portname, strerror(errno));
+ return (1);
+ }
+ }
+
+ return (0);
+}
+
+struct mmio_devemu pci_de_vcon = {
+ .de_emu = "virtio-console",
+ .de_init = pci_vtcon_init,
+ .de_write = vi_mmio_write,
+ .de_read = vi_mmio_read
+};
+MMIO_EMUL_SET(pci_de_vcon);
Index: usr.sbin/bhyve/mmio/mmio_virtio_net.c
===================================================================
--- /dev/null
+++ usr.sbin/bhyve/mmio/mmio_virtio_net.c
@@ -0,0 +1,697 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2011 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/linker_set.h>
+#include <sys/select.h>
+#include <sys/uio.h>
+#include <sys/ioctl.h>
+#include <net/ethernet.h>
+#include <net/if.h> /* IFNAMSIZ */
+
+#include <err.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <string.h>
+#include <strings.h>
+#include <unistd.h>
+#include <assert.h>
+#include <pthread.h>
+#include <pthread_np.h>
+#include <dev/pci/pcireg.h>
+
+#include "bhyverun.h"
+#include "debug.h"
+
+#include "mmio_emul.h"
+#include "mmio_virtio.h"
+
+#include "mevent.h"
+#include "net_utils.h"
+#include "net_backends.h"
+#include "iov.h"
+
+#define VTNET_RINGSZ 1024
+
+#define VTNET_MAXSEGS 256
+
+#define VTNET_MAX_PKT_LEN (65536 + 64)
+
+#define VTNET_S_HOSTCAPS \
+ ( VIRTIO_NET_F_MAC | VIRTIO_NET_F_STATUS | \
+ VIRTIO_F_NOTIFY_ON_EMPTY | VIRTIO_RING_F_INDIRECT_DESC)
+
+/*
+ * PCI config-space "registers"
+ */
+struct virtio_net_config {
+ uint8_t mac[6];
+ uint16_t status;
+} __packed;
+
+/*
+ * Queue definitions.
+ */
+#define VTNET_RXQ 0
+#define VTNET_TXQ 1
+#define VTNET_CTLQ 2 /* NB: not yet supported */
+
+#define VTNET_MAXQ 3
+
+/*
+ * Debug printf
+ */
+static int pci_vtnet_debug;
+#define DPRINTF(params) if (pci_vtnet_debug) PRINTLN params
+#define WPRINTF(params) PRINTLN params
+
+/*
+ * Per-device softc
+ */
+struct pci_vtnet_softc {
+ struct virtio_softc vsc_vs;
+ struct vqueue_info vsc_queues[VTNET_MAXQ - 1];
+ pthread_mutex_t vsc_mtx;
+
+ net_backend_t *vsc_be;
+
+ int resetting; /* protected by tx_mtx */
+
+ uint64_t vsc_features; /* negotiated features */
+
+ pthread_mutex_t rx_mtx;
+ int rx_merge; /* merged rx bufs in use */
+
+ pthread_t tx_tid;
+ pthread_mutex_t tx_mtx;
+ pthread_cond_t tx_cond;
+ int tx_in_progress;
+
+ size_t vhdrlen;
+ size_t be_vhdrlen;
+
+ struct virtio_net_config vsc_config;
+ struct virtio_consts vsc_consts;
+};
+
+static void pci_vtnet_reset(void *);
+/* static void pci_vtnet_notify(void *, struct vqueue_info *); */
+static int pci_vtnet_cfgread(void *, int, int, uint32_t *);
+static int pci_vtnet_cfgwrite(void *, int, int, uint32_t);
+static void pci_vtnet_neg_features(void *, uint64_t);
+
+static struct virtio_consts vtnet_vi_consts = {
+ "vtnet", /* our name */
+ VTNET_MAXQ - 1, /* we currently support 2 virtqueues */
+ sizeof(struct virtio_net_config), /* config reg size */
+ pci_vtnet_reset, /* reset */
+ NULL, /* device-wide qnotify -- not used */
+ pci_vtnet_cfgread, /* read PCI config */
+ pci_vtnet_cfgwrite, /* write PCI config */
+ pci_vtnet_neg_features, /* apply negotiated features */
+ VTNET_S_HOSTCAPS, /* our capabilities */
+};
+
+static void
+pci_vtnet_reset(void *vsc)
+{
+ struct pci_vtnet_softc *sc = vsc;
+
+ DPRINTF(("vtnet: device reset requested !"));
+
+ /* Acquire the RX lock to block RX processing. */
+ pthread_mutex_lock(&sc->rx_mtx);
+
+ /*
+ * Make sure receive operation is disabled at least until we
+ * re-negotiate the features, since receive operation depends
+ * on the value of sc->rx_merge and the header length, which
+ * are both set in pci_vtnet_neg_features().
+ * Receive operation will be enabled again once the guest adds
+ * the first receive buffers and kicks us.
+ */
+ netbe_rx_disable(sc->vsc_be);
+
+ /* Set sc->resetting and give a chance to the TX thread to stop. */
+ pthread_mutex_lock(&sc->tx_mtx);
+ sc->resetting = 1;
+ while (sc->tx_in_progress) {
+ pthread_mutex_unlock(&sc->tx_mtx);
+ usleep(10000);
+ pthread_mutex_lock(&sc->tx_mtx);
+ }
+
+ /*
+ * Now reset rings, MSI-X vectors, and negotiated capabilities.
+ * Do that with the TX lock held, since we need to reset
+ * sc->resetting.
+ */
+ vi_reset_dev(&sc->vsc_vs);
+
+ sc->resetting = 0;
+ pthread_mutex_unlock(&sc->tx_mtx);
+ pthread_mutex_unlock(&sc->rx_mtx);
+}
+
+static __inline struct iovec *
+iov_trim_hdr(struct iovec *iov, int *iovcnt, unsigned int hlen)
+{
+ struct iovec *riov;
+
+ if (iov[0].iov_len < hlen) {
+ /*
+ * Not enough header space in the first fragment.
+ * That's not ok for us.
+ */
+ return NULL;
+ }
+
+ iov[0].iov_len -= hlen;
+ if (iov[0].iov_len == 0) {
+ *iovcnt -= 1;
+ if (*iovcnt == 0) {
+ /*
+ * Only space for the header. That's not
+ * enough for us.
+ */
+ return NULL;
+ }
+ riov = &iov[1];
+ } else {
+ iov[0].iov_base = (void *)((uintptr_t)iov[0].iov_base + hlen);
+ riov = &iov[0];
+ }
+
+ return (riov);
+}
+
+struct virtio_mrg_rxbuf_info {
+ uint16_t idx;
+ uint16_t pad;
+ uint32_t len;
+};
+
+static void
+pci_vtnet_rx(struct pci_vtnet_softc *sc)
+{
+ int prepend_hdr_len = sc->vhdrlen - sc->be_vhdrlen;
+ struct virtio_mrg_rxbuf_info info[VTNET_MAXSEGS];
+ struct iovec iov[VTNET_MAXSEGS + 1];
+ struct vqueue_info *vq;
+
+
+
+ vq = &sc->vsc_queues[VTNET_RXQ];
+ for (;;) {
+ struct virtio_net_rxhdr *hdr;
+ uint32_t riov_bytes;
+ struct iovec *riov;
+ uint32_t ulen;
+ int riov_len;
+ int n_chains;
+ ssize_t rlen;
+ ssize_t plen;
+
+ plen = netbe_peek_recvlen(sc->vsc_be);
+ if (plen <= 0) {
+ /*
+ * No more packets (plen == 0), or backend errored
+ * (plen < 0). Interrupt if needed and stop.
+ */
+ vq_endchains(vq, /*used_all_avail=*/0);
+ return;
+ }
+ plen += prepend_hdr_len;
+
+ /*
+ * Get a descriptor chain to store the next ingress
+ * packet. In case of mergeable rx buffers, get as
+ * many chains as necessary in order to make room
+ * for a maximum sized LRO packet.
+ */
+ riov_bytes = 0;
+ riov_len = 0;
+ riov = iov;
+ n_chains = 0;
+ do {
+ int n = vq_getchain(vq, &info[n_chains].idx, riov,
+ VTNET_MAXSEGS - riov_len, NULL);
+
+ if (n == 0) {
+ /*
+ * No rx buffers. Enable RX kicks and double
+ * check.
+ */
+ vq_kick_enable(vq);
+ if (!vq_has_descs(vq)) {
+ /*
+ * Still no buffers. Return the unused
+ * chains (if any), interrupt if needed
+ * (including for NOTIFY_ON_EMPTY), and
+ * disable the backend until the next
+ * kick.
+ */
+ vq_retchains(vq, n_chains);
+ vq_endchains(vq, /*used_all_avail=*/1);
+ netbe_rx_disable(sc->vsc_be);
+ return;
+ }
+
+ /* More rx buffers found, so keep going. */
+ vq_kick_disable(vq);
+ continue;
+ }
+ assert(n >= 1 && riov_len + n <= VTNET_MAXSEGS);
+ riov_len += n;
+ if (!sc->rx_merge) {
+ n_chains = 1;
+ break;
+ }
+ info[n_chains].len = (uint32_t)count_iov(riov, n);
+ riov_bytes += info[n_chains].len;
+ riov += n;
+ n_chains++;
+ } while (riov_bytes < plen && riov_len < VTNET_MAXSEGS);
+
+ riov = iov;
+ hdr = riov[0].iov_base;
+ if (prepend_hdr_len > 0) {
+ /*
+ * The frontend uses a virtio-net header, but the
+ * backend does not. We need to prepend a zeroed
+ * header.
+ */
+ riov = iov_trim_hdr(riov, &riov_len, prepend_hdr_len);
+ if (riov == NULL) {
+ /*
+ * The first collected chain is nonsensical,
+ * as it is not even enough to store the
+ * virtio-net header. Just drop it.
+ */
+ vq_relchain(vq, info[0].idx, 0);
+ vq_retchains(vq, n_chains - 1);
+ continue;
+ }
+ memset(hdr, 0, prepend_hdr_len);
+ }
+
+ rlen = netbe_recv(sc->vsc_be, riov, riov_len);
+
+ if (rlen != plen - prepend_hdr_len) {
+ /*
+ * No more packets (len == 0), or backend errored
+ * (err < 0). Return unused available buffers
+ * and stop.
+ */
+ vq_retchains(vq, n_chains);
+ /* Interrupt if needed/appropriate and stop. */
+ vq_endchains(vq, /*used_all_avail=*/0);
+ return;
+ }
+
+ ulen = (uint32_t)plen; /* avoid too many casts below */
+
+ /* Publish the used buffers to the guest. */
+ if (!sc->rx_merge) {
+ vq_relchain(vq, info[0].idx, ulen);
+ } else {
+ uint32_t iolen;
+ int i = 0;
+
+ do {
+ iolen = info[i].len;
+ if (iolen > ulen) {
+ iolen = ulen;
+ }
+ vq_relchain(vq, info[i].idx, iolen);
+ ulen -= iolen;
+ i++;
+ } while (ulen > 0);
+
+ hdr->vrh_bufs = i;
+ // TODO add publish for arm64
+ //vq_relchain_publish(vq);
+ vq_retchains(vq, n_chains - i);
+ }
+ }
+
+}
+
+/*
+ * Called when there is read activity on the backend file descriptor.
+ * Each buffer posted by the guest is assumed to be able to contain
+ * an entire ethernet frame + rx header.
+ */
+static void
+pci_vtnet_rx_callback(int fd, enum ev_type type, void *param)
+{
+ struct pci_vtnet_softc *sc = param;
+
+ pthread_mutex_lock(&sc->rx_mtx);
+ pci_vtnet_rx(sc);
+ pthread_mutex_unlock(&sc->rx_mtx);
+
+}
+
+/* Called on RX kick. */
+static void
+pci_vtnet_ping_rxq(void *vsc, struct vqueue_info *vq)
+{
+ struct pci_vtnet_softc *sc = vsc;
+
+ /*
+ * A qnotify means that the rx process can now begin.
+ */
+ pthread_mutex_lock(&sc->rx_mtx);
+ vq_kick_disable(vq);
+ netbe_rx_enable(sc->vsc_be);
+ pthread_mutex_unlock(&sc->rx_mtx);
+}
+
+/* TX virtqueue processing, called by the TX thread. */
+static void
+pci_vtnet_proctx(struct pci_vtnet_softc *sc, struct vqueue_info *vq)
+{
+ struct iovec iov[VTNET_MAXSEGS + 1];
+ struct iovec *siov = iov;
+ uint16_t idx;
+ ssize_t len;
+ int n;
+
+ /*
+ * Obtain chain of descriptors. The first descriptor also
+ * contains the virtio-net header.
+ */
+ n = vq_getchain(vq, &idx, iov, VTNET_MAXSEGS, NULL);
+ assert(n >= 1 && n <= VTNET_MAXSEGS);
+
+ if (sc->vhdrlen != sc->be_vhdrlen) {
+ /*
+ * The frontend uses a virtio-net header, but the backend
+ * does not. We simply strip the header and ignore it, as
+ * it should be zero-filled.
+ */
+ siov = iov_trim_hdr(siov, &n, sc->vhdrlen);
+ }
+
+ if (siov == NULL) {
+ /* The chain is nonsensical. Just drop it. */
+ len = 0;
+ } else {
+ len = netbe_send(sc->vsc_be, siov, n);
+ if (len < 0) {
+ /*
+ * If send failed, report that 0 bytes
+ * were read.
+ */
+ len = 0;
+ }
+ }
+
+ /*
+ * Return the processed chain to the guest, reporting
+ * the number of bytes that we read.
+ */
+ vq_relchain(vq, idx, len > 0 ? len : 0);
+}
+
+/* Called on TX kick. */
+static void
+pci_vtnet_ping_txq(void *vsc, struct vqueue_info *vq)
+{
+ struct pci_vtnet_softc *sc = vsc;
+
+ /*
+ * Any ring entries to process?
+ */
+ if (!vq_has_descs(vq))
+ return;
+
+ /* Signal the tx thread for processing */
+ pthread_mutex_lock(&sc->tx_mtx);
+ vq_kick_disable(vq);
+ if (sc->tx_in_progress == 0)
+ pthread_cond_signal(&sc->tx_cond);
+ pthread_mutex_unlock(&sc->tx_mtx);
+}
+
+/*
+ * Thread which will handle processing of TX desc
+ */
+static void *
+pci_vtnet_tx_thread(void *param)
+{
+ struct pci_vtnet_softc *sc = param;
+ struct vqueue_info *vq;
+ int error;
+
+ vq = &sc->vsc_queues[VTNET_TXQ];
+
+ /*
+ * Let us wait till the tx queue pointers get initialised &
+ * first tx signaled
+ */
+ pthread_mutex_lock(&sc->tx_mtx);
+ error = pthread_cond_wait(&sc->tx_cond, &sc->tx_mtx);
+ assert(error == 0);
+
+ for (;;) {
+ /* note - tx mutex is locked here */
+ while (sc->resetting || !vq_has_descs(vq)) {
+ vq_kick_enable(vq);
+ if (!sc->resetting && vq_has_descs(vq))
+ break;
+
+ sc->tx_in_progress = 0;
+ error = pthread_cond_wait(&sc->tx_cond, &sc->tx_mtx);
+ assert(error == 0);
+ }
+ vq_kick_disable(vq);
+ sc->tx_in_progress = 1;
+ pthread_mutex_unlock(&sc->tx_mtx);
+
+ do {
+ /*
+ * Run through entries, placing them into
+ * iovecs and sending when an end-of-packet
+ * is found
+ */
+ pci_vtnet_proctx(sc, vq);
+ } while (vq_has_descs(vq));
+
+ /*
+ * Generate an interrupt if needed.
+ */
+ vq_endchains(vq, /*used_all_avail=*/1);
+
+ pthread_mutex_lock(&sc->tx_mtx);
+ }
+}
+
+#ifdef notyet
+static void
+pci_vtnet_ping_ctlq(void *vsc, struct vqueue_info *vq)
+{
+
+ DPRINTF(("vtnet: control qnotify!"));
+}
+#endif
+
+static int
+pci_vtnet_init(struct vmctx *ctx, struct mmio_devinst *pi, char *opts)
+{
+ struct pci_vtnet_softc *sc;
+ char tname[MAXCOMLEN + 1];
+ int mac_provided;
+
+ /*
+ * Allocate data structures for further virtio initializations.
+ * sc also contains a copy of vtnet_vi_consts, since capabilities
+ * change depending on the backend.
+ */
+ sc = calloc(1, sizeof(struct pci_vtnet_softc));
+
+ sc->vsc_consts = vtnet_vi_consts;
+ pthread_mutex_init(&sc->vsc_mtx, NULL);
+
+ sc->vsc_queues[VTNET_RXQ].vq_qsize = VTNET_RINGSZ;
+ sc->vsc_queues[VTNET_RXQ].vq_notify = pci_vtnet_ping_rxq;
+ sc->vsc_queues[VTNET_TXQ].vq_qsize = VTNET_RINGSZ;
+ sc->vsc_queues[VTNET_TXQ].vq_notify = pci_vtnet_ping_txq;
+#ifdef notyet
+ sc->vsc_queues[VTNET_CTLQ].vq_qsize = VTNET_RINGSZ;
+ sc->vsc_queues[VTNET_CTLQ].vq_notify = pci_vtnet_ping_ctlq;
+#endif
+
+ /*
+ * Attempt to open the backend device and read the MAC address
+ * if specified.
+ */
+ mac_provided = 0;
+ if (opts != NULL) {
+ char *devname;
+ char *vtopts;
+ int err;
+
+ devname = vtopts = strdup(opts);
+ (void) strsep(&vtopts, ",");
+
+ if (vtopts != NULL) {
+ err = net_parsemac(vtopts, sc->vsc_config.mac);
+ if (err != 0) {
+ free(devname);
+ free(sc);
+ return (err);
+ }
+ mac_provided = 1;
+ }
+
+ err = netbe_init(&sc->vsc_be, devname, pci_vtnet_rx_callback,
+ sc);
+ free(devname);
+ if (err) {
+ free(sc);
+ return (err);
+ }
+ sc->vsc_consts.vc_hv_caps |= netbe_get_cap(sc->vsc_be);
+ }
+
+ if (!mac_provided) {
+ net_genmac(pi, sc->vsc_config.mac);
+ }
+
+ /* initialize config space */
+ vi_devemu_init(pi, VIRTIO_TYPE_NET);
+
+ /* Link is up if we managed to open backend device. */
+ sc->vsc_config.status = (opts == NULL || sc->vsc_be);
+
+ vi_softc_linkup(&sc->vsc_vs, &sc->vsc_consts, sc, pi, sc->vsc_queues);
+ sc->vsc_vs.vs_mtx = &sc->vsc_mtx;
+
+ /* use BAR 1 to map MSI-X table and PBA, if we're using MSI-X */
+ if (vi_intr_init(&sc->vsc_vs, 1, fbsdrun_virtio_msix())) {
+ free(sc);
+ return (1);
+ }
+
+ /* use BAR 0 to map config regs in IO space */
+ vi_set_io_res(&sc->vsc_vs, 0);
+
+ sc->resetting = 0;
+
+ sc->rx_merge = 0;
+ pthread_mutex_init(&sc->rx_mtx, NULL);
+
+ /*
+ * Initialize tx semaphore & spawn TX processing thread.
+ * As of now, only one thread for TX desc processing is
+ * spawned.
+ */
+ sc->tx_in_progress = 0;
+ pthread_mutex_init(&sc->tx_mtx, NULL);
+ pthread_cond_init(&sc->tx_cond, NULL);
+ pthread_create(&sc->tx_tid, NULL, pci_vtnet_tx_thread, (void *)sc);
+ snprintf(tname, sizeof(tname), "vtnet-%d:%d tx", pi->pi_slot,
+ pi->di_func);
+ pthread_set_name_np(sc->tx_tid, tname);
+
+ return (0);
+}
+
+static int
+pci_vtnet_cfgwrite(void *vsc, int offset, int size, uint32_t value)
+{
+ struct pci_vtnet_softc *sc = vsc;
+ void *ptr;
+
+ if (offset < (int)sizeof(sc->vsc_config.mac)) {
+ assert(offset + size <= (int)sizeof(sc->vsc_config.mac));
+ /*
+ * The driver is allowed to change the MAC address
+ */
+ ptr = &sc->vsc_config.mac[offset];
+ memcpy(ptr, &value, size);
+ } else {
+ /* silently ignore other writes */
+ DPRINTF(("vtnet: write to readonly reg %d", offset));
+ }
+
+ return (0);
+}
+
+static int
+pci_vtnet_cfgread(void *vsc, int offset, int size, uint32_t *retval)
+{
+ struct pci_vtnet_softc *sc = vsc;
+ void *ptr;
+
+ ptr = (uint8_t *)&sc->vsc_config + offset;
+ memcpy(retval, ptr, size);
+ return (0);
+}
+
+static void
+pci_vtnet_neg_features(void *vsc, uint64_t negotiated_features)
+{
+ struct pci_vtnet_softc *sc = vsc;
+
+ sc->vsc_features = negotiated_features;
+
+ if (negotiated_features & VIRTIO_NET_F_MRG_RXBUF) {
+ sc->vhdrlen = sizeof(struct virtio_net_rxhdr);
+ sc->rx_merge = 1;
+ } else {
+ /*
+ * Without mergeable rx buffers, virtio-net header is 2
+ * bytes shorter than sizeof(struct virtio_net_rxhdr).
+ */
+ sc->vhdrlen = sizeof(struct virtio_net_rxhdr) - 2;
+ sc->rx_merge = 0;
+ }
+
+ /* Tell the backend to enable some capabilities it has advertised. */
+ netbe_set_cap(sc->vsc_be, negotiated_features, sc->vhdrlen);
+ sc->be_vhdrlen = netbe_get_vnet_hdr_len(sc->vsc_be);
+}
+
+static struct mmio_devemu pci_de_vnet = {
+ .de_emu = "virtio-net",
+ .de_init = pci_vtnet_init,
+ .de_write = vi_mmio_write,
+ .de_read = vi_mmio_read
+};
+MMIO_EMUL_SET(pci_de_vnet);
Index: usr.sbin/bhyve/mmio/mmio_virtio_rnd.c
===================================================================
--- /dev/null
+++ usr.sbin/bhyve/mmio/mmio_virtio_rnd.c
@@ -0,0 +1,208 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2014 Nahanni Systems Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer
+ * in this position and unchanged.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * virtio entropy device emulation.
+ * Randomness is sourced from /dev/random which does not block
+ * once it has been seeded at bootup.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#ifndef WITHOUT_CAPSICUM
+#include <sys/capsicum.h>
+#endif
+#include <sys/linker_set.h>
+#include <sys/uio.h>
+
+#ifndef WITHOUT_CAPSICUM
+#include <capsicum_helpers.h>
+#endif
+#include <err.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <assert.h>
+#include <pthread.h>
+#include <dev/pci/pcireg.h>
+#include <sysexits.h>
+
+#include "bhyverun.h"
+#include "debug.h"
+
+#include "mmio_emul.h"
+#include "mmio_virtio.h"
+
+#define VTRND_RINGSZ 64
+
+
+static int pci_vtrnd_debug;
+#define DPRINTF(params) if (pci_vtrnd_debug) PRINTLN params
+#define WPRINTF(params) PRINTLN params
+
+/*
+ * Per-device softc
+ */
+struct pci_vtrnd_softc {
+ struct virtio_softc vrsc_vs;
+ struct vqueue_info vrsc_vq;
+ pthread_mutex_t vrsc_mtx;
+ uint64_t vrsc_cfg;
+ int vrsc_fd;
+};
+
+static void pci_vtrnd_reset(void *);
+static void pci_vtrnd_notify(void *, struct vqueue_info *);
+
+static struct virtio_consts vtrnd_vi_consts = {
+ "vtrnd", /* our name */
+ 1, /* we support 1 virtqueue */
+ 0, /* config reg size */
+ pci_vtrnd_reset, /* reset */
+ pci_vtrnd_notify, /* device-wide qnotify */
+ NULL, /* read virtio config */
+ NULL, /* write virtio config */
+ NULL, /* apply negotiated features */
+ 0, /* our capabilities */
+};
+
+
+static void
+pci_vtrnd_reset(void *vsc)
+{
+ struct pci_vtrnd_softc *sc;
+
+ sc = vsc;
+
+ DPRINTF(("vtrnd: device reset requested !"));
+ vi_reset_dev(&sc->vrsc_vs);
+}
+
+
+static void
+pci_vtrnd_notify(void *vsc, struct vqueue_info *vq)
+{
+ struct iovec iov;
+ struct pci_vtrnd_softc *sc;
+ int len;
+ uint16_t idx;
+
+ sc = vsc;
+
+ if (sc->vrsc_fd < 0) {
+ vq_endchains(vq, 0);
+ return;
+ }
+
+ while (vq_has_descs(vq)) {
+ vq_getchain(vq, &idx, &iov, 1, NULL);
+
+ len = read(sc->vrsc_fd, iov.iov_base, iov.iov_len);
+
+ DPRINTF(("vtrnd: vtrnd_notify(): %d", len));
+
+ /* Catastrophe if unable to read from /dev/random */
+ assert(len > 0);
+
+ /*
+ * Release this chain and handle more
+ */
+ vq_relchain(vq, idx, len);
+ }
+ vq_endchains(vq, 1); /* Generate interrupt if appropriate. */
+}
+
+
+static int
+pci_vtrnd_init(struct vmctx *ctx, struct mmio_devinst *pi, char *opts)
+{
+ struct pci_vtrnd_softc *sc;
+ int fd;
+ int len;
+ uint8_t v;
+#ifndef WITHOUT_CAPSICUM
+ cap_rights_t rights;
+#endif
+
+ /*
+ * Should always be able to open /dev/random.
+ */
+ fd = open("/dev/random", O_RDONLY | O_NONBLOCK);
+
+ assert(fd >= 0);
+
+#ifndef WITHOUT_CAPSICUM
+ cap_rights_init(&rights, CAP_READ);
+ if (caph_rights_limit(fd, &rights) == -1)
+ errx(EX_OSERR, "Unable to apply rights for sandbox");
+#endif
+
+ /*
+ * Check that device is seeded and non-blocking.
+ */
+ len = read(fd, &v, sizeof(v));
+ if (len <= 0) {
+ WPRINTF(("vtrnd: /dev/random not ready, read(): %d", len));
+ close(fd);
+ return (1);
+ }
+
+ sc = calloc(1, sizeof(struct pci_vtrnd_softc));
+
+ vi_softc_linkup(&sc->vrsc_vs, &vtrnd_vi_consts, sc, pi, &sc->vrsc_vq);
+ sc->vrsc_vs.vs_mtx = &sc->vrsc_mtx;
+
+ sc->vrsc_vq.vq_qsize = VTRND_RINGSZ;
+
+ /* keep /dev/random opened while emulating */
+ sc->vrsc_fd = fd;
+
+ /* initialize config space */
+ vi_devemu_init(pi, VIRTIO_TYPE_ENTROPY);
+
+ if (vi_intr_init(&sc->vrsc_vs, 1, fbsdrun_virtio_msix()))
+ return (1);
+ vi_set_io_res(&sc->vrsc_vs, 0);
+
+ return (0);
+}
+
+
+struct mmio_devemu pci_de_vrnd = {
+ .de_emu = "virtio-rnd",
+ .de_init = pci_vtrnd_init,
+ .de_write = vi_mmio_write,
+ .de_read = vi_mmio_read
+};
+MMIO_EMUL_SET(pci_de_vrnd);
Index: usr.sbin/bhyve/mmio/mmio_virtio_scsi.c
===================================================================
--- /dev/null
+++ usr.sbin/bhyve/mmio/mmio_virtio_scsi.c
@@ -0,0 +1,741 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2016 Jakub Klama <jceel@FreeBSD.org>.
+ * Copyright (c) 2018 Marcelo Araujo <araujo@FreeBSD.org>.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer
+ * in this position and unchanged.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/linker_set.h>
+#include <sys/types.h>
+#include <sys/uio.h>
+#include <sys/time.h>
+#include <sys/queue.h>
+#include <sys/sbuf.h>
+
+#include <errno.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <string.h>
+#include <unistd.h>
+#include <assert.h>
+#include <pthread.h>
+#include <pthread_np.h>
+
+#include <cam/scsi/scsi_all.h>
+#include <cam/scsi/scsi_message.h>
+#include <cam/ctl/ctl.h>
+#include <cam/ctl/ctl_io.h>
+#include <cam/ctl/ctl_backend.h>
+#include <cam/ctl/ctl_ioctl.h>
+#include <cam/ctl/ctl_util.h>
+#include <cam/ctl/ctl_scsi_all.h>
+#include <camlib.h>
+
+#include "bhyverun.h"
+#include "debug.h"
+#include "iov.h"
+
+#include "mmio_emul.h"
+#include "mmio_virtio.h"
+
+#define VTSCSI_RINGSZ 64
+#define VTSCSI_REQUESTQ 1
+#define VTSCSI_THR_PER_Q 16
+#define VTSCSI_MAXQ (VTSCSI_REQUESTQ + 2)
+#define VTSCSI_MAXSEG 64
+
+#define VTSCSI_IN_HEADER_LEN(_sc) \
+ (sizeof(struct pci_vtscsi_req_cmd_rd) + _sc->vss_config.cdb_size)
+
+#define VTSCSI_OUT_HEADER_LEN(_sc) \
+ (sizeof(struct pci_vtscsi_req_cmd_wr) + _sc->vss_config.sense_size)
+
+#define VIRTIO_SCSI_MAX_CHANNEL 0
+#define VIRTIO_SCSI_MAX_TARGET 0
+#define VIRTIO_SCSI_MAX_LUN 16383
+
+#define VIRTIO_SCSI_F_INOUT (1 << 0)
+#define VIRTIO_SCSI_F_HOTPLUG (1 << 1)
+#define VIRTIO_SCSI_F_CHANGE (1 << 2)
+
+static int pci_vtscsi_debug = 0;
+#define DPRINTF(params) if (pci_vtscsi_debug) PRINTLN params
+#define WPRINTF(params) PRINTLN params
+
+struct pci_vtscsi_config {
+ uint32_t num_queues;
+ uint32_t seg_max;
+ uint32_t max_sectors;
+ uint32_t cmd_per_lun;
+ uint32_t event_info_size;
+ uint32_t sense_size;
+ uint32_t cdb_size;
+ uint16_t max_channel;
+ uint16_t max_target;
+ uint32_t max_lun;
+} __attribute__((packed));
+
+struct pci_vtscsi_queue {
+ struct pci_vtscsi_softc * vsq_sc;
+ struct vqueue_info * vsq_vq;
+ pthread_mutex_t vsq_mtx;
+ pthread_mutex_t vsq_qmtx;
+ pthread_cond_t vsq_cv;
+ STAILQ_HEAD(, pci_vtscsi_request) vsq_requests;
+ LIST_HEAD(, pci_vtscsi_worker) vsq_workers;
+};
+
+struct pci_vtscsi_worker {
+ struct pci_vtscsi_queue * vsw_queue;
+ pthread_t vsw_thread;
+ bool vsw_exiting;
+ LIST_ENTRY(pci_vtscsi_worker) vsw_link;
+};
+
+struct pci_vtscsi_request {
+ struct pci_vtscsi_queue * vsr_queue;
+ struct iovec vsr_iov_in[VTSCSI_MAXSEG];
+ int vsr_niov_in;
+ struct iovec vsr_iov_out[VTSCSI_MAXSEG];
+ int vsr_niov_out;
+ uint32_t vsr_idx;
+ STAILQ_ENTRY(pci_vtscsi_request) vsr_link;
+};
+
+/*
+ * Per-device softc
+ */
+struct pci_vtscsi_softc {
+ struct virtio_softc vss_vs;
+ struct vqueue_info vss_vq[VTSCSI_MAXQ];
+ struct pci_vtscsi_queue vss_queues[VTSCSI_REQUESTQ];
+ pthread_mutex_t vss_mtx;
+ int vss_iid;
+ int vss_ctl_fd;
+ uint32_t vss_features;
+ struct pci_vtscsi_config vss_config;
+};
+
+#define VIRTIO_SCSI_T_TMF 0
+#define VIRTIO_SCSI_T_TMF_ABORT_TASK 0
+#define VIRTIO_SCSI_T_TMF_ABORT_TASK_SET 1
+#define VIRTIO_SCSI_T_TMF_CLEAR_ACA 2
+#define VIRTIO_SCSI_T_TMF_CLEAR_TASK_SET 3
+#define VIRTIO_SCSI_T_TMF_I_T_NEXUS_RESET 4
+#define VIRTIO_SCSI_T_TMF_LOGICAL_UNIT_RESET 5
+#define VIRTIO_SCSI_T_TMF_QUERY_TASK 6
+#define VIRTIO_SCSI_T_TMF_QUERY_TASK_SET 7
+
+/* command-specific response values */
+#define VIRTIO_SCSI_S_FUNCTION_COMPLETE 0
+#define VIRTIO_SCSI_S_FUNCTION_SUCCEEDED 10
+#define VIRTIO_SCSI_S_FUNCTION_REJECTED 11
+
+struct pci_vtscsi_ctrl_tmf {
+ uint32_t type;
+ uint32_t subtype;
+ uint8_t lun[8];
+ uint64_t id;
+ uint8_t response;
+} __attribute__((packed));
+
+#define VIRTIO_SCSI_T_AN_QUERY 1
+#define VIRTIO_SCSI_EVT_ASYNC_OPERATIONAL_CHANGE 2
+#define VIRTIO_SCSI_EVT_ASYNC_POWER_MGMT 4
+#define VIRTIO_SCSI_EVT_ASYNC_EXTERNAL_REQUEST 8
+#define VIRTIO_SCSI_EVT_ASYNC_MEDIA_CHANGE 16
+#define VIRTIO_SCSI_EVT_ASYNC_MULTI_HOST 32
+#define VIRTIO_SCSI_EVT_ASYNC_DEVICE_BUSY 64
+
+struct pci_vtscsi_ctrl_an {
+ uint32_t type;
+ uint8_t lun[8];
+ uint32_t event_requested;
+ uint32_t event_actual;
+ uint8_t response;
+} __attribute__((packed));
+
+/* command-specific response values */
+#define VIRTIO_SCSI_S_OK 0
+#define VIRTIO_SCSI_S_OVERRUN 1
+#define VIRTIO_SCSI_S_ABORTED 2
+#define VIRTIO_SCSI_S_BAD_TARGET 3
+#define VIRTIO_SCSI_S_RESET 4
+#define VIRTIO_SCSI_S_BUSY 5
+#define VIRTIO_SCSI_S_TRANSPORT_FAILURE 6
+#define VIRTIO_SCSI_S_TARGET_FAILURE 7
+#define VIRTIO_SCSI_S_NEXUS_FAILURE 8
+#define VIRTIO_SCSI_S_FAILURE 9
+#define VIRTIO_SCSI_S_INCORRECT_LUN 12
+
+/* task_attr */
+#define VIRTIO_SCSI_S_SIMPLE 0
+#define VIRTIO_SCSI_S_ORDERED 1
+#define VIRTIO_SCSI_S_HEAD 2
+#define VIRTIO_SCSI_S_ACA 3
+
+struct pci_vtscsi_event {
+ uint32_t event;
+ uint8_t lun[8];
+ uint32_t reason;
+} __attribute__((packed));
+
+struct pci_vtscsi_req_cmd_rd {
+ uint8_t lun[8];
+ uint64_t id;
+ uint8_t task_attr;
+ uint8_t prio;
+ uint8_t crn;
+ uint8_t cdb[];
+} __attribute__((packed));
+
+struct pci_vtscsi_req_cmd_wr {
+ uint32_t sense_len;
+ uint32_t residual;
+ uint16_t status_qualifier;
+ uint8_t status;
+ uint8_t response;
+ uint8_t sense[];
+} __attribute__((packed));
+
+static void *pci_vtscsi_proc(void *);
+static void pci_vtscsi_reset(void *);
+static void pci_vtscsi_neg_features(void *, uint64_t);
+static int pci_vtscsi_cfgread(void *, int, int, uint32_t *);
+static int pci_vtscsi_cfgwrite(void *, int, int, uint32_t);
+static inline int pci_vtscsi_get_lun(uint8_t *);
+static int pci_vtscsi_control_handle(struct pci_vtscsi_softc *, void *, size_t);
+static int pci_vtscsi_tmf_handle(struct pci_vtscsi_softc *,
+ struct pci_vtscsi_ctrl_tmf *);
+static int pci_vtscsi_an_handle(struct pci_vtscsi_softc *,
+ struct pci_vtscsi_ctrl_an *);
+static int pci_vtscsi_request_handle(struct pci_vtscsi_queue *, struct iovec *,
+ int, struct iovec *, int);
+static void pci_vtscsi_controlq_notify(void *, struct vqueue_info *);
+static void pci_vtscsi_eventq_notify(void *, struct vqueue_info *);
+static void pci_vtscsi_requestq_notify(void *, struct vqueue_info *);
+static int pci_vtscsi_init_queue(struct pci_vtscsi_softc *,
+ struct pci_vtscsi_queue *, int);
+static int pci_vtscsi_init(struct vmctx *, struct mmio_devinst *, char *);
+
+static struct virtio_consts vtscsi_vi_consts = {
+ "vtscsi", /* our name */
+ VTSCSI_MAXQ, /* we support 2+n virtqueues */
+ sizeof(struct pci_vtscsi_config), /* config reg size */
+ pci_vtscsi_reset, /* reset */
+ NULL, /* device-wide qnotify */
+ pci_vtscsi_cfgread, /* read virtio config */
+ pci_vtscsi_cfgwrite, /* write virtio config */
+ pci_vtscsi_neg_features, /* apply negotiated features */
+ 0, /* our capabilities */
+};
+
+static void *
+pci_vtscsi_proc(void *arg)
+{
+ struct pci_vtscsi_worker *worker = (struct pci_vtscsi_worker *)arg;
+ struct pci_vtscsi_queue *q = worker->vsw_queue;
+ struct pci_vtscsi_request *req;
+ int iolen;
+
+ for (;;) {
+ pthread_mutex_lock(&q->vsq_mtx);
+
+ while (STAILQ_EMPTY(&q->vsq_requests)
+ && !worker->vsw_exiting)
+ pthread_cond_wait(&q->vsq_cv, &q->vsq_mtx);
+
+ if (worker->vsw_exiting)
+ break;
+
+ req = STAILQ_FIRST(&q->vsq_requests);
+ STAILQ_REMOVE_HEAD(&q->vsq_requests, vsr_link);
+
+ pthread_mutex_unlock(&q->vsq_mtx);
+ iolen = pci_vtscsi_request_handle(q, req->vsr_iov_in,
+ req->vsr_niov_in, req->vsr_iov_out, req->vsr_niov_out);
+
+ pthread_mutex_lock(&q->vsq_qmtx);
+ vq_relchain(q->vsq_vq, req->vsr_idx, iolen);
+ vq_endchains(q->vsq_vq, 0);
+ pthread_mutex_unlock(&q->vsq_qmtx);
+
+ DPRINTF(("virtio-scsi: request <idx=%d> completed",
+ req->vsr_idx));
+ free(req);
+ }
+
+ pthread_mutex_unlock(&q->vsq_mtx);
+ return (NULL);
+}
+
+static void
+pci_vtscsi_reset(void *vsc)
+{
+ struct pci_vtscsi_softc *sc;
+
+ sc = vsc;
+
+ DPRINTF(("vtscsi: device reset requested"));
+ vi_reset_dev(&sc->vss_vs);
+
+ /* initialize config structure */
+ sc->vss_config = (struct pci_vtscsi_config){
+ .num_queues = VTSCSI_REQUESTQ,
+ /* Leave room for the request and the response. */
+ .seg_max = VTSCSI_MAXSEG - 2,
+ .max_sectors = 2,
+ .cmd_per_lun = 1,
+ .event_info_size = sizeof(struct pci_vtscsi_event),
+ .sense_size = 96,
+ .cdb_size = 32,
+ .max_channel = VIRTIO_SCSI_MAX_CHANNEL,
+ .max_target = VIRTIO_SCSI_MAX_TARGET,
+ .max_lun = VIRTIO_SCSI_MAX_LUN
+ };
+}
+
+static void
+pci_vtscsi_neg_features(void *vsc, uint64_t negotiated_features)
+{
+ struct pci_vtscsi_softc *sc = vsc;
+
+ sc->vss_features = negotiated_features;
+}
+
+static int
+pci_vtscsi_cfgread(void *vsc, int offset, int size, uint32_t *retval)
+{
+ struct pci_vtscsi_softc *sc = vsc;
+ void *ptr;
+
+ ptr = (uint8_t *)&sc->vss_config + offset;
+ memcpy(retval, ptr, size);
+ return (0);
+}
+
+static int
+pci_vtscsi_cfgwrite(void *vsc, int offset, int size, uint32_t val)
+{
+
+ return (0);
+}
+
+static inline int
+pci_vtscsi_get_lun(uint8_t *lun)
+{
+
+ return (((lun[2] << 8) | lun[3]) & 0x3fff);
+}
+
+static int
+pci_vtscsi_control_handle(struct pci_vtscsi_softc *sc, void *buf,
+ size_t bufsize)
+{
+ struct pci_vtscsi_ctrl_tmf *tmf;
+ struct pci_vtscsi_ctrl_an *an;
+ uint32_t type;
+
+ type = *(uint32_t *)buf;
+
+ if (type == VIRTIO_SCSI_T_TMF) {
+ tmf = (struct pci_vtscsi_ctrl_tmf *)buf;
+ return (pci_vtscsi_tmf_handle(sc, tmf));
+ }
+
+ if (type == VIRTIO_SCSI_T_AN_QUERY) {
+ an = (struct pci_vtscsi_ctrl_an *)buf;
+ return (pci_vtscsi_an_handle(sc, an));
+ }
+
+ return (0);
+}
+
+static int
+pci_vtscsi_tmf_handle(struct pci_vtscsi_softc *sc,
+ struct pci_vtscsi_ctrl_tmf *tmf)
+{
+ union ctl_io *io;
+ int err;
+
+ io = ctl_scsi_alloc_io(sc->vss_iid);
+ ctl_scsi_zero_io(io);
+
+ io->io_hdr.io_type = CTL_IO_TASK;
+ io->io_hdr.nexus.initid = sc->vss_iid;
+ io->io_hdr.nexus.targ_lun = pci_vtscsi_get_lun(tmf->lun);
+ io->taskio.tag_type = CTL_TAG_SIMPLE;
+ io->taskio.tag_num = (uint32_t)tmf->id;
+
+ switch (tmf->subtype) {
+ case VIRTIO_SCSI_T_TMF_ABORT_TASK:
+ io->taskio.task_action = CTL_TASK_ABORT_TASK;
+ break;
+
+ case VIRTIO_SCSI_T_TMF_ABORT_TASK_SET:
+ io->taskio.task_action = CTL_TASK_ABORT_TASK_SET;
+ break;
+
+ case VIRTIO_SCSI_T_TMF_CLEAR_ACA:
+ io->taskio.task_action = CTL_TASK_CLEAR_ACA;
+ break;
+
+ case VIRTIO_SCSI_T_TMF_CLEAR_TASK_SET:
+ io->taskio.task_action = CTL_TASK_CLEAR_TASK_SET;
+ break;
+
+ case VIRTIO_SCSI_T_TMF_I_T_NEXUS_RESET:
+ io->taskio.task_action = CTL_TASK_I_T_NEXUS_RESET;
+ break;
+
+ case VIRTIO_SCSI_T_TMF_LOGICAL_UNIT_RESET:
+ io->taskio.task_action = CTL_TASK_LUN_RESET;
+ break;
+
+ case VIRTIO_SCSI_T_TMF_QUERY_TASK:
+ io->taskio.task_action = CTL_TASK_QUERY_TASK;
+ break;
+
+ case VIRTIO_SCSI_T_TMF_QUERY_TASK_SET:
+ io->taskio.task_action = CTL_TASK_QUERY_TASK_SET;
+ break;
+ }
+
+ if (pci_vtscsi_debug) {
+ struct sbuf *sb = sbuf_new_auto();
+ ctl_io_sbuf(io, sb);
+ sbuf_finish(sb);
+ DPRINTF(("pci_virtio_scsi: %s", sbuf_data(sb)));
+ sbuf_delete(sb);
+ }
+
+ err = ioctl(sc->vss_ctl_fd, CTL_IO, io);
+ if (err != 0)
+ WPRINTF(("CTL_IO: err=%d (%s)", errno, strerror(errno)));
+
+ tmf->response = io->taskio.task_status;
+ ctl_scsi_free_io(io);
+ return (1);
+}
+
+static int
+pci_vtscsi_an_handle(struct pci_vtscsi_softc *sc,
+ struct pci_vtscsi_ctrl_an *an)
+{
+
+ return (0);
+}
+
+static int
+pci_vtscsi_request_handle(struct pci_vtscsi_queue *q, struct iovec *iov_in,
+ int niov_in, struct iovec *iov_out, int niov_out)
+{
+ struct pci_vtscsi_softc *sc = q->vsq_sc;
+ struct pci_vtscsi_req_cmd_rd *cmd_rd = NULL;
+ struct pci_vtscsi_req_cmd_wr *cmd_wr;
+ struct iovec data_iov_in[VTSCSI_MAXSEG], data_iov_out[VTSCSI_MAXSEG];
+ union ctl_io *io;
+ int data_niov_in, data_niov_out;
+ void *ext_data_ptr = NULL;
+ uint32_t ext_data_len = 0, ext_sg_entries = 0;
+ int err, nxferred;
+
+ seek_iov(iov_in, niov_in, data_iov_in, &data_niov_in,
+ VTSCSI_IN_HEADER_LEN(sc));
+ seek_iov(iov_out, niov_out, data_iov_out, &data_niov_out,
+ VTSCSI_OUT_HEADER_LEN(sc));
+
+ truncate_iov(iov_in, &niov_in, VTSCSI_IN_HEADER_LEN(sc));
+ truncate_iov(iov_out, &niov_out, VTSCSI_OUT_HEADER_LEN(sc));
+ iov_to_buf(iov_in, niov_in, (void **)&cmd_rd);
+
+ cmd_wr = malloc(VTSCSI_OUT_HEADER_LEN(sc));
+ io = ctl_scsi_alloc_io(sc->vss_iid);
+ ctl_scsi_zero_io(io);
+
+ io->io_hdr.nexus.initid = sc->vss_iid;
+ io->io_hdr.nexus.targ_lun = pci_vtscsi_get_lun(cmd_rd->lun);
+
+ io->io_hdr.io_type = CTL_IO_SCSI;
+
+ if (data_niov_in > 0) {
+ ext_data_ptr = (void *)data_iov_in;
+ ext_sg_entries = data_niov_in;
+ ext_data_len = count_iov(data_iov_in, data_niov_in);
+ io->io_hdr.flags |= CTL_FLAG_DATA_OUT;
+ } else if (data_niov_out > 0) {
+ ext_data_ptr = (void *)data_iov_out;
+ ext_sg_entries = data_niov_out;
+ ext_data_len = count_iov(data_iov_out, data_niov_out);
+ io->io_hdr.flags |= CTL_FLAG_DATA_IN;
+ }
+
+ io->scsiio.sense_len = sc->vss_config.sense_size;
+ io->scsiio.tag_num = (uint32_t)cmd_rd->id;
+ switch (cmd_rd->task_attr) {
+ case VIRTIO_SCSI_S_ORDERED:
+ io->scsiio.tag_type = CTL_TAG_ORDERED;
+ break;
+ case VIRTIO_SCSI_S_HEAD:
+ io->scsiio.tag_type = CTL_TAG_HEAD_OF_QUEUE;
+ break;
+ case VIRTIO_SCSI_S_ACA:
+ io->scsiio.tag_type = CTL_TAG_ACA;
+ break;
+ case VIRTIO_SCSI_S_SIMPLE:
+ default:
+ io->scsiio.tag_type = CTL_TAG_SIMPLE;
+ break;
+ }
+ io->scsiio.ext_sg_entries = ext_sg_entries;
+ io->scsiio.ext_data_ptr = ext_data_ptr;
+ io->scsiio.ext_data_len = ext_data_len;
+ io->scsiio.ext_data_filled = 0;
+ io->scsiio.cdb_len = sc->vss_config.cdb_size;
+ memcpy(io->scsiio.cdb, cmd_rd->cdb, sc->vss_config.cdb_size);
+
+ if (pci_vtscsi_debug) {
+ struct sbuf *sb = sbuf_new_auto();
+ ctl_io_sbuf(io, sb);
+ sbuf_finish(sb);
+ DPRINTF(("pci_virtio_scsi: %s", sbuf_data(sb)));
+ sbuf_delete(sb);
+ }
+
+ err = ioctl(sc->vss_ctl_fd, CTL_IO, io);
+ if (err != 0) {
+ WPRINTF(("CTL_IO: err=%d (%s)", errno, strerror(errno)));
+ cmd_wr->response = VIRTIO_SCSI_S_FAILURE;
+ } else {
+ cmd_wr->sense_len = MIN(io->scsiio.sense_len,
+ sc->vss_config.sense_size);
+ cmd_wr->residual = io->scsiio.residual;
+ cmd_wr->status = io->scsiio.scsi_status;
+ cmd_wr->response = VIRTIO_SCSI_S_OK;
+ memcpy(&cmd_wr->sense, &io->scsiio.sense_data,
+ cmd_wr->sense_len);
+ }
+
+ buf_to_iov(cmd_wr, VTSCSI_OUT_HEADER_LEN(sc), iov_out, niov_out, 0);
+ nxferred = VTSCSI_OUT_HEADER_LEN(sc) + io->scsiio.ext_data_filled;
+ free(cmd_rd);
+ free(cmd_wr);
+ ctl_scsi_free_io(io);
+ return (nxferred);
+}
+
+static void
+pci_vtscsi_controlq_notify(void *vsc, struct vqueue_info *vq)
+{
+ struct pci_vtscsi_softc *sc;
+ struct iovec iov[VTSCSI_MAXSEG];
+ uint16_t idx, n;
+ void *buf = NULL;
+ size_t bufsize;
+ int iolen;
+
+ sc = vsc;
+
+ while (vq_has_descs(vq)) {
+ n = vq_getchain(vq, &idx, iov, VTSCSI_MAXSEG, NULL);
+ bufsize = iov_to_buf(iov, n, &buf);
+ iolen = pci_vtscsi_control_handle(sc, buf, bufsize);
+ buf_to_iov(buf + bufsize - iolen, iolen, iov, n,
+ bufsize - iolen);
+
+ /*
+ * Release this chain and handle more
+ */
+ vq_relchain(vq, idx, iolen);
+ }
+ vq_endchains(vq, 1); /* Generate interrupt if appropriate. */
+ free(buf);
+}
+
+static void
+pci_vtscsi_eventq_notify(void *vsc, struct vqueue_info *vq)
+{
+
+ vq_kick_disable(vq);
+}
+
+static void
+pci_vtscsi_requestq_notify(void *vsc, struct vqueue_info *vq)
+{
+ struct pci_vtscsi_softc *sc;
+ struct pci_vtscsi_queue *q;
+ struct pci_vtscsi_request *req;
+ struct iovec iov[VTSCSI_MAXSEG];
+ uint16_t flags[VTSCSI_MAXSEG];
+ uint16_t idx, n, i;
+ int readable;
+
+ sc = vsc;
+ q = &sc->vss_queues[vq->vq_num - 2];
+
+ while (vq_has_descs(vq)) {
+ readable = 0;
+ n = vq_getchain(vq, &idx, iov, VTSCSI_MAXSEG, flags);
+
+ /* Count readable descriptors */
+ for (i = 0; i < n; i++) {
+ if (flags[i] & VRING_DESC_F_WRITE)
+ break;
+
+ readable++;
+ }
+
+ req = calloc(1, sizeof(struct pci_vtscsi_request));
+ req->vsr_idx = idx;
+ req->vsr_queue = q;
+ req->vsr_niov_in = readable;
+ req->vsr_niov_out = n - readable;
+ memcpy(req->vsr_iov_in, iov,
+ req->vsr_niov_in * sizeof(struct iovec));
+ memcpy(req->vsr_iov_out, iov + readable,
+ req->vsr_niov_out * sizeof(struct iovec));
+
+ pthread_mutex_lock(&q->vsq_mtx);
+ STAILQ_INSERT_TAIL(&q->vsq_requests, req, vsr_link);
+ pthread_cond_signal(&q->vsq_cv);
+ pthread_mutex_unlock(&q->vsq_mtx);
+
+ DPRINTF(("virtio-scsi: request <idx=%d> enqueued", idx));
+ }
+}
+
+static int
+pci_vtscsi_init_queue(struct pci_vtscsi_softc *sc,
+ struct pci_vtscsi_queue *queue, int num)
+{
+ struct pci_vtscsi_worker *worker;
+ char tname[MAXCOMLEN + 1];
+ int i;
+
+ queue->vsq_sc = sc;
+ queue->vsq_vq = &sc->vss_vq[num + 2];
+
+ pthread_mutex_init(&queue->vsq_mtx, NULL);
+ pthread_mutex_init(&queue->vsq_qmtx, NULL);
+ pthread_cond_init(&queue->vsq_cv, NULL);
+ STAILQ_INIT(&queue->vsq_requests);
+ LIST_INIT(&queue->vsq_workers);
+
+ for (i = 0; i < VTSCSI_THR_PER_Q; i++) {
+ worker = calloc(1, sizeof(struct pci_vtscsi_worker));
+ worker->vsw_queue = queue;
+
+ pthread_create(&worker->vsw_thread, NULL, &pci_vtscsi_proc,
+ (void *)worker);
+
+ snprintf(tname, sizeof(tname), "vtscsi:%d-%d", num, i);
+ pthread_set_name_np(worker->vsw_thread, tname);
+ LIST_INSERT_HEAD(&queue->vsq_workers, worker, vsw_link);
+ }
+
+ return (0);
+}
+
+static int
+pci_vtscsi_init(struct vmctx *ctx, struct mmio_devinst *pi, char *opts)
+{
+ struct pci_vtscsi_softc *sc;
+ char *opt, *optname;
+ const char *devname;
+ int i, optidx = 0;
+
+ sc = calloc(1, sizeof(struct pci_vtscsi_softc));
+ devname = "/dev/cam/ctl";
+ while ((opt = strsep(&opts, ",")) != NULL) {
+ optname = strsep(&opt, "=");
+ if (opt == NULL && optidx == 0) {
+ if (optname[0] != 0)
+ devname = optname;
+ } else if (strcmp(optname, "dev") == 0 && opt != NULL) {
+ devname = opt;
+ } else if (strcmp(optname, "iid") == 0 && opt != NULL) {
+ sc->vss_iid = strtoul(opt, NULL, 10);
+ } else {
+ EPRINTLN("Invalid option %s", optname);
+ free(sc);
+ return (1);
+ }
+ optidx++;
+ }
+
+ sc->vss_ctl_fd = open(devname, O_RDWR);
+ if (sc->vss_ctl_fd < 0) {
+ WPRINTF(("cannot open %s: %s", devname, strerror(errno)));
+ free(sc);
+ return (1);
+ }
+
+ vi_softc_linkup(&sc->vss_vs, &vtscsi_vi_consts, sc, pi, sc->vss_vq);
+ sc->vss_vs.vs_mtx = &sc->vss_mtx;
+
+ /* controlq */
+ sc->vss_vq[0].vq_qsize = VTSCSI_RINGSZ;
+ sc->vss_vq[0].vq_notify = pci_vtscsi_controlq_notify;
+
+ /* eventq */
+ sc->vss_vq[1].vq_qsize = VTSCSI_RINGSZ;
+ sc->vss_vq[1].vq_notify = pci_vtscsi_eventq_notify;
+
+ /* request queues */
+ for (i = 2; i < VTSCSI_MAXQ; i++) {
+ sc->vss_vq[i].vq_qsize = VTSCSI_RINGSZ;
+ sc->vss_vq[i].vq_notify = pci_vtscsi_requestq_notify;
+ pci_vtscsi_init_queue(sc, &sc->vss_queues[i - 2], i - 2);
+ }
+
+ /* initialize config space */
+ mmio_set_cfgreg16(pi, PCIR_DEVICE, VIRTIO_DEV_SCSI);
+ mmio_set_cfgreg16(pi, PCIR_VENDOR, VIRTIO_VENDOR);
+ mmio_set_cfgreg8(pi, PCIR_CLASS, PCIC_STORAGE);
+ mmio_set_cfgreg16(pi, PCIR_SUBDEV_0, VIRTIO_TYPE_SCSI);
+ mmio_set_cfgreg16(pi, PCIR_SUBVEND_0, VIRTIO_VENDOR);
+
+ if (vi_intr_init(&sc->vss_vs, 1, fbsdrun_virtio_msix()))
+ return (1);
+ vi_set_io_res(&sc->vss_vs, 0);
+
+ return (0);
+}
+
+
+struct mmio_devemu pci_de_vscsi = {
+ .de_emu = "virtio-scsi",
+ .de_init = pci_vtscsi_init,
+ .de_write = vi_mmio_write,
+ .de_read = vi_mmio_read
+};
+MMIO_EMUL_SET(pci_de_vscsi);
Index: usr.sbin/bhyve/mmio/net_utils.h
===================================================================
--- /dev/null
+++ usr.sbin/bhyve/mmio/net_utils.h
@@ -0,0 +1,39 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2019 Vincenzo Maffione <v.maffione@gmail.com>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
+ * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
+ * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
+ * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _NET_UTILS_H_
+#define _NET_UTILS_H_
+
+#include <stdint.h>
+#include "mmio_emul.h"
+
+void net_genmac(struct mmio_devinst *pi, uint8_t *macaddr);
+int net_parsemac(char *mac_str, uint8_t *mac_addr);
+
+#endif /* _NET_UTILS_H_ */
Index: usr.sbin/bhyve/mmio/net_utils.c
===================================================================
--- /dev/null
+++ usr.sbin/bhyve/mmio/net_utils.c
@@ -0,0 +1,90 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2011 NetApp, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
+ * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
+ * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
+ * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/types.h>
+#include <net/ethernet.h>
+
+#include <errno.h>
+#include <md5.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "bhyverun.h"
+#include "debug.h"
+#include "net_utils.h"
+
+int
+net_parsemac(char *mac_str, uint8_t *mac_addr)
+{
+ struct ether_addr *ea;
+ char *tmpstr;
+ char zero_addr[ETHER_ADDR_LEN] = { 0, 0, 0, 0, 0, 0 };
+
+ tmpstr = strsep(&mac_str,"=");
+
+ if ((mac_str != NULL) && (!strcmp(tmpstr,"mac"))) {
+ ea = ether_aton(mac_str);
+
+ if (ea == NULL || ETHER_IS_MULTICAST(ea->octet) ||
+ memcmp(ea->octet, zero_addr, ETHER_ADDR_LEN) == 0) {
+ EPRINTLN("Invalid MAC %s", mac_str);
+ return (EINVAL);
+ } else
+ memcpy(mac_addr, ea->octet, ETHER_ADDR_LEN);
+ }
+
+ return (0);
+}
+
+void
+net_genmac(struct mmio_devinst *pi, uint8_t *macaddr)
+{
+ /*
+ * The default MAC address is the standard NetApp OUI of 00-a0-98,
+ * followed by an MD5 of the PCI slot/func number and dev name
+ */
+ MD5_CTX mdctx;
+ unsigned char digest[16];
+ char nstr[80];
+
+ snprintf(nstr, sizeof(nstr), "%d-%d-%s", pi->pi_slot,
+ pi->di_func, vmname);
+
+ MD5Init(&mdctx);
+ MD5Update(&mdctx, nstr, (unsigned int)strlen(nstr));
+ MD5Final(digest, &mdctx);
+
+ macaddr[0] = 0x00;
+ macaddr[1] = 0xa0;
+ macaddr[2] = 0x98;
+ macaddr[3] = digest[0];
+ macaddr[4] = digest[1];
+ macaddr[5] = digest[2];
+}
Index: usr.sbin/bhyve/pci_virtio_net.c
===================================================================
--- usr.sbin/bhyve/pci_virtio_net.c
+++ usr.sbin/bhyve/pci_virtio_net.c
@@ -116,7 +116,7 @@
int resetting; /* protected by tx_mtx */
uint64_t vsc_features; /* negotiated features */
-
+
pthread_mutex_t rx_mtx;
int rx_merge; /* merged rx bufs in use */
Index: usr.sbin/bhyvectl/Makefile
===================================================================
--- usr.sbin/bhyvectl/Makefile
+++ usr.sbin/bhyvectl/Makefile
@@ -5,19 +5,17 @@
.include <src.opts.mk>
PROG= bhyvectl
-SRCS= bhyvectl.c
PACKAGE= bhyve
-MAN= bhyvectl.8
-
LIBADD= vmmapi util
WARNS?= 3
-CFLAGS+= -I${SRCTOP}/sys/amd64/vmm
+CFLAGS+= -I${SRCTOP}/sys/${MACHINE}/vmm
.if ${MK_BHYVE_SNAPSHOT} != "no"
CFLAGS+= -DBHYVE_SNAPSHOT
.endif
+.include "${.CURDIR}/${MACHINE}/Makefile.inc"
.include <bsd.prog.mk>
Index: usr.sbin/bhyvectl/amd64/Makefile.inc
===================================================================
--- /dev/null
+++ usr.sbin/bhyvectl/amd64/Makefile.inc
@@ -0,0 +1,7 @@
+#
+# $FreeBSD$
+#
+.PATH: ${.CURDIR}/amd64
+
+SRCS= bhyvectl.c
+MAN= bhyvectl.8
Index: usr.sbin/bhyvectl/arm64/Makefile.inc
===================================================================
--- /dev/null
+++ usr.sbin/bhyvectl/arm64/Makefile.inc
@@ -0,0 +1,7 @@
+#
+# $FreeBSD$
+#
+.PATH: ${.CURDIR}/arm64
+
+SRCS= bhyvectl.c
+MAN= bhyvectl.8
Index: usr.sbin/bhyvectl/arm64/bhyvectl.8
===================================================================
--- /dev/null
+++ usr.sbin/bhyvectl/arm64/bhyvectl.8
@@ -0,0 +1,97 @@
+.\" Copyright (c) 2015 Christian Brueffer
+.\" All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" $FreeBSD$
+.\"
+.Dd November 13, 2016
+.Dt BHYVECTL 8
+.Os
+.Sh NAME
+.Nm bhyvectl
+.Nd "control utility for bhyve instances"
+.Sh SYNOPSIS
+.Nm
+.Fl -vm= Ns Ar <vmname>
+.Op Fl -create
+.Op Fl -destroy
+.Op Fl -get-stats
+.Op Fl -inject-nmi
+.Op Fl -force-reset
+.Op Fl -force-poweroff
+.Sh DESCRIPTION
+The
+.Nm
+command is a control utility for active
+.Xr bhyve 8
+virtual machine instances.
+.Pp
+.Em Note :
+Most
+.Nm
+flags are intended for querying and setting the state of an active instance.
+These commands are intended for development purposes, and are not documented here.
+A complete list can be obtained by executing
+.Nm
+without any arguments.
+.Pp
+The user-facing options are as follows:
+.Bl -tag -width ".Fl d Ar argument"
+.It Fl -vm= Ns Ar <vmname>
+Operate on the virtual machine
+.Ar <vmname> .
+.It Fl -create
+Create the specified VM.
+.It Fl -destroy
+Destroy the specified VM.
+.It Fl -get-stats
+Retrieve statistics for the specified VM.
+.It Fl -inject-nmi
+Inject a non-maskable interrupt (NMI) into the VM.
+.It Fl -force-reset
+Force the VM to reset.
+.It Fl -force-poweroff
+Force the VM to power off.
+.El
+.Sh EXIT STATUS
+.Ex -std
+.Sh EXAMPLES
+Destroy the VM called fbsd10:
+.Pp
+.Dl "bhyvectl --vm=fbsd10 --destroy"
+.Sh SEE ALSO
+.Xr bhyve 8 ,
+.Xr bhyveload 8
+.Sh HISTORY
+The
+.Nm
+command first appeared in
+.Fx 10.1 .
+.Sh AUTHORS
+.An -nosplit
+The
+.Nm
+utility was written by
+.An Peter Grehan
+and
+.An Neel Natu .
Index: usr.sbin/bhyvectl/arm64/bhyvectl.c
===================================================================
--- /dev/null
+++ usr.sbin/bhyvectl/arm64/bhyvectl.c
@@ -0,0 +1,140 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2018 Alexandru Elisei <alexandru.elisei@gmail.com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/types.h>
+#include <sys/sysctl.h>
+#include <sys/errno.h>
+#include <sys/mman.h>
+#include <sys/cpuset.h>
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <string.h>
+#include <unistd.h>
+#include <libgen.h>
+#include <libutil.h>
+#include <fcntl.h>
+#include <getopt.h>
+#include <time.h>
+#include <assert.h>
+#include <libutil.h>
+
+#include <machine/vmm.h>
+#include <machine/vmm_dev.h>
+
+#include <vmmapi.h>
+
+#define MB (1UL << 20)
+#define GB (1UL << 30)
+
+#define REQ_ARG required_argument
+#define NO_ARG no_argument
+#define OPT_ARG optional_argument
+
+#define eprintf(fmt, ...) printf("%s:%d " fmt, __func__, __LINE__, ##__VA_ARGS__)
+
+static const char *progname;
+
+static void
+usage()
+{
+
+ (void)fprintf(stderr,
+ "Usage: %s --vm=<vmname>\n"
+ " %*s [--destroy]\n",
+ progname, (int)strlen(progname), "");
+ exit(1);
+}
+
+static int create;
+static int destroy;
+
+enum {
+ VMNAME = 1000, /* avoid collision with return values from getopt */
+};
+
+const struct option opts[] = {
+ { "vm", REQ_ARG, NULL, VMNAME },
+ { "destroy", NO_ARG, &destroy, 1 },
+ { NULL, 0, NULL, 1 },
+};
+
+int
+main(int argc, char *argv[])
+{
+ char *vmname;
+ int error, ch;
+ struct vmctx *ctx;
+
+ vmname = NULL;
+ progname = basename(argv[0]);
+
+ while ((ch = getopt_long(argc, argv, "", opts, NULL)) != -1) {
+ switch (ch) {
+ case 0:
+ break;
+ case VMNAME:
+ vmname = optarg;
+ break;
+ default:
+ usage();
+ }
+ }
+ argc -= optind;
+ argv += optind;
+
+ if (vmname == NULL)
+ usage();
+
+ error = 0;
+ if (!error && create)
+ error = vm_create(vmname);
+ if (!error) {
+ ctx = vm_open(vmname);
+ if (ctx == NULL) {
+ printf("VM:%s is not created.\n", vmname);
+ exit(1);
+ }
+ }
+
+
+ if (error)
+ printf("errno = %d\n", errno);
+
+ if (!error && destroy)
+ vm_destroy(ctx);
+
+ exit(error);
+}
Index: usr.sbin/bhyveload/Makefile
===================================================================
--- usr.sbin/bhyveload/Makefile
+++ usr.sbin/bhyveload/Makefile
@@ -1,14 +1,17 @@
# $FreeBSD$
PROG= bhyveload
-SRCS= bhyveload.c
-MAN= bhyveload.8
PACKAGE= bhyve
+BHYVELOAD_SYSDIR?=${SRCTOP}
+BHYVELOAD_SRCTOP?=${.CURDIR}
+
LIBADD= vmmapi
WARNS?= 3
CFLAGS+=-I${SRCTOP}/stand/userboot
+.include "${BHYVELOAD_SRCTOP}/${MACHINE}/Makefile.inc"
+
.include <bsd.prog.mk>
Index: usr.sbin/bhyveload/amd64/Makefile.inc
===================================================================
--- /dev/null
+++ usr.sbin/bhyveload/amd64/Makefile.inc
@@ -0,0 +1,7 @@
+# $FreeBSD$
+.PATH: ${BHYVELOAD_SRCTOP}/amd64/
+
+SRCS= bhyveload.c
+MAN= bhyveload.8
+
+CFLAGS+=-I${SRCTOP}/sys/boot/userboot
Index: usr.sbin/bhyveload/arm64/Makefile.inc
===================================================================
--- /dev/null
+++ usr.sbin/bhyveload/arm64/Makefile.inc
@@ -0,0 +1,13 @@
+# $FreeBSD$
+LIBADD+= util
+
+.PATH: ${BHYVELOAD_SRCTOP}/arm64/
+
+SRCS= bhyveload.c \
+ boot.c
+
+.PATH: ${.CURDIR}/../../sys/arm64/vmm
+
+CFLAGS += -I${.CURDIR}/../../stand/common
+
+MK_MAN=no
Index: usr.sbin/bhyveload/arm64/bhyveload.c
===================================================================
--- /dev/null
+++ usr.sbin/bhyveload/arm64/bhyveload.c
@@ -0,0 +1,404 @@
+/*
+ * Copyright (C) 2015 Mihai Carabas <mihai.carabas@gmail.com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+#include <sys/ioctl.h>
+#include <sys/stat.h>
+#include <sys/disk.h>
+#include <sys/queue.h>
+#include <sys/mman.h>
+#include <sys/queue.h>
+
+#include <machine/vmm.h>
+#include <machine/vmparam.h>
+
+#include <dirent.h>
+#include <dlfcn.h>
+#include <errno.h>
+#include <err.h>
+#include <fcntl.h>
+#include <getopt.h>
+#include <libgen.h>
+#include <limits.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sysexits.h>
+#include <termios.h>
+#include <unistd.h>
+#include <vmmapi.h>
+
+#include <libutil.h>
+
+#include "boot.h"
+
+#define gvatovm(addr) ((uint64_t)(addr) - KERNBASE + \
+ kernel_load_address - memory_base_address)
+#define overlap(x_start, x_end, y_start, y_end) \
+ ((x_start) >= (y_start) && (x_start) < (y_end) || \
+ (x_end) >= (y_start) && (x_end) < (y_end))
+
+#define MB (1024 * 1024UL)
+#define BSP 0
+#define KERNEL_IMAGE_NAME_LEN 32
+
+#define GIC_V3_DIST_START 0x2f000000UL
+#define GIC_V3_DIST_SIZE 0x10000UL
+#define GIC_V3_REDIST_START 0x2f100000UL
+#define GIC_V3_REDIST_SIZE 0x200000UL
+
+struct env {
+ const char *str;
+ SLIST_ENTRY(env) next;
+};
+static SLIST_HEAD(envhead, env) envhead;
+
+static uint64_t memory_base_address, kernel_load_address;
+
+static char *vmname, *progname;
+static struct vmctx *ctx;
+
+static int
+env_add(const char *str)
+{
+ struct env *env;
+
+ env = malloc(sizeof(*env));
+ if (env == NULL)
+ return (ENOMEM);
+ env->str = str;
+ SLIST_INSERT_HEAD(&envhead, env, next);
+
+ return (0);
+}
+
+static int
+env_tostr(char **envstrp, int *envlen)
+{
+ struct env *env;
+ int i;
+
+ *envlen = 0;
+ SLIST_FOREACH(env, &envhead, next)
+ *envlen = *envlen + strlen(env->str) + 1;
+ /* Make room for the two terminating zeroes */
+ if (*envlen == 0)
+ *envlen = 2;
+ else
+ (*envlen)++;
+
+ *envstrp = malloc(*envlen * sizeof(char));
+ if (*envstrp == NULL)
+ return (ENOMEM);
+
+ i = 0;
+ SLIST_FOREACH(env, &envhead, next) {
+ strncpy(*envstrp + i, env->str, strlen(env->str));
+ i += strlen(env->str);
+ (*envstrp)[i++] = 0;
+ }
+ (*envstrp)[i] = 0;
+
+ /*
+ * At this point we have envstr[0] == 0 if the environment is empty.
+ * Add the second 0 to properly terminate the environment string.
+ */
+ if (SLIST_EMPTY(&envhead))
+ (*envstrp)[1] = 0;
+
+ /*
+ for (i = 0; i < *envlen; i++)
+ printf("%d ", (int)(*envstrp)[i]);
+ printf("\n");
+ */
+
+ return (0);
+}
+
+/*
+ * Guest virtual machinee
+ */
+static int
+guest_copyin(const void *from, uint64_t to, size_t size)
+{
+ char *ptr;
+ ptr = vm_map_ipa(ctx, to, size);
+ if (ptr == NULL)
+ return (EFAULT);
+
+ memcpy(ptr, from, size);
+ return (0);
+}
+
+static int
+guest_copyout(uint64_t from, void *to, size_t size)
+{
+ char *ptr;
+
+ ptr = vm_map_ipa(ctx, from, size);
+ if (ptr == NULL)
+ return (EFAULT);
+
+ memcpy(to, ptr, size);
+ return (0);
+}
+
+static void
+guest_setreg(enum vm_reg_name vmreg, uint64_t v)
+{
+ int error;
+
+ error = vm_set_register(ctx, BSP, vmreg, v);
+ if (error)
+ perror("vm_set_register");
+}
+
+#if 0
+static int
+parse_memsize(const char *optarg, size_t *ret_memsize)
+{
+ char *endptr;
+ size_t optval;
+ int error;
+
+ optval = strtoul(optarg, &endptr, 0);
+ if (*optarg != '\0' && *endptr == '\0') {
+ /* Memory size must be at least one megabyte. */
+ if (optval < MB)
+ optval = optval * MB;
+ *ret_memsize = optval;
+ error = 0;
+ } else {
+ error = expand_number(optarg, ret_memsize);
+ }
+
+ return (error);
+}
+#endif
+
+static void
+usage(int code)
+{
+ fprintf(stderr,
+ "Usage: %s [-h] [-k <kernel-image>] [-e <name=value>] [-b base-address]\n"
+ " %*s [-m mem-size] [-l load-address] <vmname>\n"
+ " -k: path to guest kernel image\n"
+ " -e: guest boot environment\n"
+ " -b: memory base address\n"
+ " -m: memory size\n"
+ " -l: kernel load address in the guest physical memory\n"
+ " -h: help\n",
+ progname, (int)strlen(progname), "");
+ exit(code);
+}
+
+int
+main(int argc, char** argv)
+{
+ struct vm_bootparams bootparams;
+ uint64_t mem_size;
+ int opt, error;
+ int kernel_image_fd;
+ uint64_t periphbase;
+ char kernel_image_name[KERNEL_IMAGE_NAME_LEN];
+ struct stat st;
+ void *addr;
+ char *envstr;
+ int envlen;
+
+ progname = basename(argv[0]);
+
+ mem_size = 128 * MB;
+ memory_base_address = VM_GUEST_BASE_IPA;
+ kernel_load_address = memory_base_address;
+ periphbase = 0x2c000000UL;
+ strncpy(kernel_image_name, "kernel.bin", KERNEL_IMAGE_NAME_LEN);
+ memset(&bootparams, 0, sizeof(struct vm_bootparams));
+
+ while ((opt = getopt(argc, argv, "hk:l:b:m:e:")) != -1) {
+ switch (opt) {
+ case 'k':
+ strncpy(kernel_image_name, optarg, KERNEL_IMAGE_NAME_LEN);
+ break;
+ case 'l':
+ kernel_load_address = strtoul(optarg, NULL, 0);
+ break;
+ case 'b':
+ memory_base_address = strtoul(optarg, NULL, 0);
+ break;
+ case 'm':
+ error = vm_parse_memsize(optarg, &mem_size);
+ if (error) {
+ fprintf(stderr, "Invalid memsize '%s'\n", optarg);
+ exit(1);
+ }
+ break;
+ case 'e':
+ error = env_add(optarg);
+ if (error) {
+ perror("env_add");
+ exit(1);
+ }
+ break;
+ case 'h':
+ usage(0);
+ default:
+ fprintf(stderr, "Unknown argument '%c'\n", opt);
+ usage(1);
+ }
+ }
+
+ argc -= optind;
+ argv += optind;
+
+ if (argc != 1) {
+ fprintf(stderr, "Missing or unknown arguments\n");
+ usage(1);
+ }
+
+ if (kernel_load_address < memory_base_address) {
+ fprintf(stderr, "Kernel load address is below memory base address\n");
+ exit(1);
+ }
+
+ vmname = argv[0];
+
+ kernel_image_fd = open(kernel_image_name, O_RDONLY);
+ if (kernel_image_fd == -1) {
+ perror("open kernel_image_name");
+ exit(1);
+ }
+
+ error = vm_create(vmname);
+ if (error) {
+ perror("vm_create");
+ exit(1);
+ }
+
+ ctx = vm_open(vmname);
+ if (ctx == NULL) {
+ perror("vm_open");
+ exit(1);
+ }
+
+ error = vm_setup_memory(ctx, memory_base_address, mem_size, VM_MMAP_ALL);
+ if (error) {
+ perror("vm_setup_memory");
+ exit(1);
+ }
+
+ error = fstat(kernel_image_fd, &st);
+ if (error) {
+ perror("fstat");
+ exit(1);
+ }
+
+ if ((uint64_t)st.st_size > mem_size) {
+ fprintf(stderr, "Kernel image larger than memory size\n");
+ exit(1);
+ }
+ if (kernel_load_address + st.st_size >= memory_base_address + mem_size) {
+ fprintf(stderr, "Kernel image out of bounds of guest memory\n");
+ exit(1);
+ }
+
+ addr = mmap(NULL, st.st_size, PROT_READ, MAP_SHARED, kernel_image_fd, 0);
+ if (addr == MAP_FAILED) {
+ perror("mmap kernel_image_fd");
+ exit(1);
+ }
+
+ if (guest_copyin(addr, kernel_load_address - memory_base_address, st.st_size) != 0) {
+ perror("guest_copyin");
+ exit(1);
+ }
+
+ error = env_tostr(&envstr, &envlen);
+ if (error) {
+ perror("parse boot environment\n");
+ exit(1);
+ }
+
+ bootparams.envstr = envstr;
+ bootparams.envlen = envlen;
+ error = parse_kernel(addr, st.st_size, ctx, &bootparams);
+ if (error) {
+ fprintf(stderr, "Error parsing image\n");
+ exit(1);
+ }
+
+ /*
+ fprintf(stderr, "bootparams.envp_gva = 0x%016lx\n", bootparams.envp_gva);
+ fprintf(stderr, "gvatom(bootparams.envp_gva) = 0x%016lx\n", gvatovm(bootparams.envp_gva));
+ fprintf(stderr, "vm_map_ipa() = 0x%016lx\n", (uint64_t)vm_map_ipa(ctx, gvatovm(bootparams.envp_gva), PAGE_SIZE));
+ fprintf(stderr, "\n");
+
+ fprintf(stderr, "bootparams.mudulep_gva = 0x%016lx\n", bootparams.modulep_gva);
+ fprintf(stderr, "gvatom(bootparams.modulep_gva) = 0x%016lx\n", gvatovm(bootparams.modulep_gva));
+ fprintf(stderr, "vm_map_ipa() = 0x%016lx\n", (uint64_t)vm_map_ipa(ctx, gvatovm(bootparams.modulep_gva), PAGE_SIZE));
+ fprintf(stderr, "\n");
+ */
+
+ /* Copy the environment string in the guest memory */
+ if (guest_copyin((void *)envstr, gvatovm(bootparams.envp_gva), envlen) != 0) {
+ perror("guest_copyin");
+ exit(1);
+ }
+
+ /* Copy the module data in the guest memory */
+ if (guest_copyin(bootparams.modulep, gvatovm(bootparams.modulep_gva), bootparams.module_len) != 0) {
+ perror("guest_copyin");
+ exit(1);
+ }
+
+ uint64_t mem_end = memory_base_address + mem_size;
+ uint64_t dist_end = GIC_V3_DIST_START + GIC_V3_DIST_SIZE;
+ uint64_t redist_end = GIC_V3_REDIST_START + GIC_V3_REDIST_SIZE;
+
+ if (overlap(GIC_V3_DIST_SIZE, dist_end, memory_base_address, mem_end)) {
+ fprintf(stderr, "Guest memory overlaps with VGIC Distributor\n");
+ exit(1);
+ }
+
+ if (overlap(GIC_V3_REDIST_SIZE, redist_end, memory_base_address, mem_end)) {
+ fprintf(stderr, "Guest memory overlaps with VGIC Redistributor\n");
+ exit(1);
+ }
+
+ error = vm_attach_vgic(ctx, GIC_V3_DIST_START, GIC_V3_DIST_SIZE,
+ GIC_V3_REDIST_START, GIC_V3_REDIST_SIZE);
+ if (error) {
+ fprintf(stderr, "Error attaching VGIC to the virtual machine\n");
+ exit(1);
+ }
+
+ munmap(addr, st.st_size);
+
+ guest_setreg(VM_REG_ELR_EL2, kernel_load_address + bootparams.entry_off);
+ guest_setreg(VM_REG_GUEST_X0, bootparams.modulep_gva);
+
+ return 0;
+}
Index: usr.sbin/bhyveload/arm64/boot.h
===================================================================
--- /dev/null
+++ usr.sbin/bhyveload/arm64/boot.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright (C) 2017 Alexandru Elisei <alexandru.elisei@gmail.com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef _BOOT_H_
+#define _BOOT_H_
+
+
+
+struct vm_bootparams {
+ uint64_t entry_off;
+ uint64_t modulep_gva; /* Guest virtual address of modulep data */
+ uint64_t envp_gva; /* Guest virtual address for env */
+ char *envstr;
+ int envlen;
+ int module_len;
+ void *modulep; /* Bhyveload address of modulep data */
+};
+
+int parse_kernel(void *addr, size_t img_size, struct vmctx *ctx,
+ struct vm_bootparams *bootparams);
+
+#endif
Index: usr.sbin/bhyveload/arm64/boot.c
===================================================================
--- /dev/null
+++ usr.sbin/bhyveload/arm64/boot.c
@@ -0,0 +1,618 @@
+/*- * Copyright (c) 2017 Alexandru Elisei <alexandru.elisei@gmail.com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+#include <sys/elf.h>
+#include <sys/param.h>
+#include <sys/queue.h>
+#include <sys/linker.h>
+#include <sys/elf_generic.h>
+#include <sys/module.h>
+#include <sys/errno.h>
+
+#include <machine/vmm.h>
+#include <machine/vmparam.h>
+#include <bootstrap.h>
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <string.h>
+#include <vmmapi.h>
+
+#include "boot.h"
+
+#define gvatou(gva, addr) ((vm_offset_t)(gva) - KERNBASE + (vm_offset_t)(addr))
+
+struct elf_file {
+ Elf_Phdr *ph;
+ Elf_Ehdr *ehdr;
+ Elf_Sym *symtab;
+ Elf_Hashelt *hashtab;
+ Elf_Hashelt nbuckets;
+ Elf_Hashelt nchains;
+ Elf_Hashelt *buckets;
+ Elf_Hashelt *chains;
+ Elf_Rel *rel;
+ size_t relsz;
+ Elf_Rela *rela;
+ size_t relasz;
+ char *strtab;
+ size_t strsz;
+ caddr_t firstpage_u; /* Userspace address of mmap'ed guest kernel */
+};
+
+static uint64_t parse_image(struct preloaded_file *img, struct elf_file *ef);
+static void image_addmetadata(struct preloaded_file *img, int type,
+ size_t size, void *addr);
+static int image_addmodule(struct preloaded_file *img, char *modname, int version);
+static void parse_metadata(struct preloaded_file *img, struct elf_file *ef,
+ Elf_Addr p_startu, Elf_Addr p_endu);
+static int lookup_symbol(struct elf_file *ef, const char *name, Elf_Sym *symp);
+static struct kernel_module *image_findmodule(struct preloaded_file *img, char *modname,
+ struct mod_depend *verinfo);
+static uint64_t moddata_len(struct preloaded_file *img);
+static void moddata_copy(vm_offset_t dest, struct preloaded_file *img);
+
+static int
+load_elf_header(struct elf_file *ef)
+{
+ Elf_Ehdr *ehdr;
+
+ ehdr = ef->ehdr = (Elf_Ehdr *)ef->firstpage_u;
+ /* Is it ELF? */
+ if (!IS_ELF(*ehdr))
+ return (EFTYPE);
+
+ if (ehdr->e_ident[EI_CLASS] != ELF_TARG_CLASS ||/* Layout ? */
+ ehdr->e_ident[EI_DATA] != ELF_TARG_DATA ||
+ ehdr->e_ident[EI_VERSION] != EV_CURRENT || /* Version ? */
+ ehdr->e_version != EV_CURRENT ||
+ ehdr->e_machine != ELF_TARG_MACH) /* Machine ? */
+ return (EFTYPE);
+
+ return (0);
+}
+
+static caddr_t
+preload_search_by_type(const char *type, caddr_t preload_metadata)
+{
+ caddr_t curp, lname;
+ uint32_t *hdr;
+ int next;
+
+ if (preload_metadata != NULL) {
+
+ curp = preload_metadata;
+ lname = NULL;
+ for (;;) {
+ hdr = (uint32_t *)curp;
+ if (hdr[0] == 0 && hdr[1] == 0)
+ break;
+
+ /* remember the start of each record */
+ if (hdr[0] == MODINFO_NAME)
+ lname = curp;
+
+ /* Search for a MODINFO_TYPE field */
+ if ((hdr[0] == MODINFO_TYPE) &&
+ !strcmp(type, curp + sizeof(uint32_t) * 2))
+ return(lname);
+
+ /* skip to next field */
+ next = sizeof(uint32_t) * 2 + hdr[1];
+ next = roundup(next, sizeof(u_long));
+ curp += next;
+ }
+ }
+ return(NULL);
+}
+
+int
+parse_kernel(void *addr, size_t img_size, struct vmctx *ctx,
+ struct vm_bootparams *bootparams)
+{
+ struct elf_file ef;
+ struct preloaded_file img;
+ Elf_Ehdr *ehdr_u;
+ int err;
+ vm_offset_t lastaddr_gva;
+ uint64_t kernend;
+ uint64_t size;
+ uint64_t modlen;
+ int boothowto;
+
+ //fprintf(stderr, "[PARSE_KERNEL]\n\n");
+
+ memset(&ef, 0, sizeof(struct elf_file));
+ memset(&img, 0, sizeof(struct preloaded_file));
+
+ ef.firstpage_u = (caddr_t)addr;
+ err = load_elf_header(&ef);
+ if (err != 0)
+ return (err);
+
+ ehdr_u = ef.ehdr;
+ if (ehdr_u->e_type != ET_EXEC) {
+ fprintf(stderr, "Image not a kernel\n");
+ return (EPERM);
+ }
+ img.f_name = "elf kernel";
+ img.f_type = "elf kernel";
+ img.f_size = img_size;
+
+ size = parse_image(&img, &ef);
+ if (size == 0)
+ return (ENOEXEC);
+ bootparams->entry_off = ehdr_u->e_entry - KERNBASE;
+
+ image_addmetadata(&img, MODINFOMD_ELFHDR, sizeof(*ehdr_u), ehdr_u);
+
+ /* XXX: Add boothowto options? */
+ boothowto = 0;
+ image_addmetadata(&img, MODINFOMD_HOWTO, sizeof(boothowto), &boothowto);
+
+ lastaddr_gva = roundup(img.f_addr + img.f_size + 0x3fd000, PAGE_SIZE);
+ image_addmetadata(&img, MODINFOMD_ENVP, sizeof(lastaddr_gva), &lastaddr_gva);
+ bootparams->envp_gva = lastaddr_gva;
+
+ lastaddr_gva = roundup(lastaddr_gva + bootparams->envlen, PAGE_SIZE);
+ /* Module data start in the guest kernel virtual address space */
+ bootparams->modulep_gva = lastaddr_gva;
+
+ modlen = moddata_len(&img);
+ kernend = roundup(bootparams->modulep_gva + modlen, PAGE_SIZE);
+ image_addmetadata(&img, MODINFOMD_KERNEND, sizeof(kernend), &kernend);
+
+ bootparams->module_len = roundup(modlen, PAGE_SIZE);
+ bootparams->modulep = calloc(1, bootparams->module_len);
+ if (bootparams->modulep == NULL) {
+ perror("calloc");
+ return (ENOMEM);
+ }
+
+ moddata_copy((vm_offset_t)bootparams->modulep, &img);
+
+ return (0);
+}
+
+static uint64_t
+parse_image(struct preloaded_file *img, struct elf_file *ef)
+{
+ Elf_Ehdr *ehdr;
+ Elf_Phdr *phdr;
+ Elf_Phdr *php;
+ Elf_Shdr *shdr;
+ Elf_Dyn *dp;
+ Elf_Addr adp;
+ Elf_Addr ctors;
+ Elf_Addr ssym, esym;
+ Elf_Addr p_start, p_end;
+ Elf_Size size;
+ Elf_Sym sym;
+ vm_offset_t firstaddr, lastaddr;
+ vm_offset_t shstr_addr;
+ char *shstr;
+ int symstrindex;
+ int symtabindex;
+ size_t chunk_len;
+ uint64_t ret;
+ int ndp;
+ int i;
+ unsigned int j;
+
+ dp = NULL;
+ shdr = NULL;
+ ret = 0;
+
+ ehdr = ef->ehdr;
+ phdr = (Elf_Phdr *)(ef->firstpage_u + ehdr->e_phoff);
+
+ firstaddr = lastaddr = 0;
+ for (i = 0; i < ehdr->e_phnum; i++) {
+ if (phdr[i].p_type != PT_LOAD)
+ continue;
+ if (firstaddr == 0 || firstaddr > phdr[i].p_vaddr)
+ firstaddr = phdr[i].p_vaddr;
+ /* We mmap'ed the kernel, so p_memsz == p_filesz. */
+ if (lastaddr == 0 || lastaddr < (phdr[i].p_vaddr + phdr[i].p_filesz))
+ lastaddr = phdr[i].p_vaddr + phdr[i].p_filesz;
+ }
+ lastaddr = roundup(lastaddr, sizeof(long));
+
+ /*
+ * Get the section headers. We need this for finding the .ctors
+ * section as well as for loading any symbols. Both may be hard
+ * to do if reading from a .gz file as it involves seeking. I
+ * think the rule is going to have to be that you must strip a
+ * file to remove symbols before gzipping it.
+ */
+ chunk_len = ehdr->e_shnum * ehdr->e_shentsize;
+ if (chunk_len == 0 || ehdr->e_shoff == 0)
+ goto nosyms;
+ shdr = (Elf_Shdr *)(ef->firstpage_u + ehdr->e_shoff);
+ image_addmetadata(img, MODINFOMD_SHDR, chunk_len, shdr);
+
+ /*
+ * Read the section string table and look for the .ctors section.
+ * We need to tell the kernel where it is so that it can call the
+ * ctors.
+ */
+ chunk_len = shdr[ehdr->e_shstrndx].sh_size;
+ if (chunk_len > 0) {
+ shstr_addr = (vm_offset_t)(ef->firstpage_u + \
+ shdr[ehdr->e_shstrndx].sh_offset);
+ shstr = malloc(chunk_len);
+ memcpy(shstr, (void *)shstr_addr, chunk_len);
+ for (i = 0; i < ehdr->e_shnum; i++) {
+ if (strcmp(shstr + shdr[i].sh_name, ".ctors") != 0)
+ continue;
+ ctors = shdr[i].sh_addr;
+ image_addmetadata(img, MODINFOMD_CTORS_ADDR,
+ sizeof(ctors), &ctors);
+ size = shdr[i].sh_size;
+ image_addmetadata(img, MODINFOMD_CTORS_SIZE,
+ sizeof(size), &size);
+ break;
+ }
+ free(shstr);
+ }
+
+ /*
+ * Now load any symbols.
+ */
+ symtabindex = -1;
+ symstrindex = -1;
+ for (i = 0; i < ehdr->e_shnum; i++) {
+ if (shdr[i].sh_type != SHT_SYMTAB)
+ continue;
+ for (j = 0; j < ehdr->e_phnum; j++) {
+ if (phdr[j].p_type != PT_LOAD)
+ continue;
+ if (shdr[i].sh_offset >= phdr[j].p_offset &&
+ (shdr[i].sh_offset + shdr[i].sh_size <=
+ phdr[j].p_offset + phdr[j].p_filesz)) {
+ shdr[i].sh_offset = 0;
+ shdr[i].sh_size = 0;
+ break;
+ }
+ }
+ if (shdr[i].sh_offset == 0 || shdr[i].sh_size == 0)
+ continue; /* alread loaded in a PT_LOAD above */
+ /* Save it for loading below */
+ symtabindex = i;
+ symstrindex = shdr[i].sh_link;
+ }
+ if (symtabindex < 0 || symstrindex < 0)
+ goto nosyms;
+
+ ssym = lastaddr;
+ i = symtabindex;
+ for (;;) {
+ size = shdr[i].sh_size;
+ lastaddr += sizeof(size);
+ lastaddr += shdr[i].sh_size;
+ lastaddr = roundup(lastaddr, sizeof(size));
+
+ if (i == symtabindex)
+ i = symstrindex;
+ else if (i == symstrindex)
+ break;
+ }
+ esym = lastaddr;
+
+ image_addmetadata(img, MODINFOMD_SSYM, sizeof(ssym), &ssym);
+ image_addmetadata(img, MODINFOMD_ESYM, sizeof(esym), &esym);
+
+nosyms:
+ ret = lastaddr - firstaddr;
+ img->f_addr = firstaddr;
+
+ php = NULL;
+ for (i = 0; i < ehdr->e_phnum; i++) {
+ if (phdr[i].p_type == PT_DYNAMIC) {
+ php = &phdr[i];
+ adp = php->p_vaddr;
+ image_addmetadata(img, MODINFOMD_DYNAMIC,
+ sizeof(adp), &adp);
+ break;
+ }
+ }
+ if (php == NULL)
+ goto out;
+ ndp = php->p_filesz / sizeof(Elf_Dyn);
+ if (ndp == 0)
+ goto out;
+
+ ef->strsz = 0;
+ dp = (Elf_Dyn *)(ef->firstpage_u + php->p_offset);
+ for (i = 0; i < ndp; i++) {
+ if (dp[i].d_tag == 0)
+ break;
+ switch(dp[i].d_tag) {
+ case DT_HASH:
+ ef->hashtab = (Elf_Hashelt *)(uintptr_t)dp[i].d_un.d_ptr;
+ break;
+ case DT_STRTAB:
+ ef->strtab = (char *)(uintptr_t)dp[i].d_un.d_ptr;
+ case DT_STRSZ:
+ ef->strsz = dp[i].d_un.d_val;
+ break;
+ case DT_SYMTAB:
+ ef->symtab = (Elf_Sym *)(uintptr_t)dp[i].d_un.d_ptr;
+ break;
+ case DT_REL:
+ ef->rel = (Elf_Rel *)(uintptr_t)dp[i].d_un.d_ptr;
+ break;
+ case DT_RELSZ:
+ ef->relsz = dp[i].d_un.d_val;
+ break;
+ case DT_RELA:
+ ef->rela = (Elf_Rela *)(uintptr_t)dp[i].d_un.d_ptr;
+ break;
+ case DT_RELASZ:
+ ef->relasz = dp[i].d_un.d_val;
+ break;
+ }
+ }
+ if (ef->hashtab == NULL || ef->symtab == NULL ||
+ ef->strtab == NULL || ef->strsz == 0)
+ goto out;
+
+ memcpy(&ef->nbuckets, (void *)gvatou(ef->hashtab, ef->firstpage_u), sizeof(ef->nbuckets));
+ memcpy(&ef->nchains, (void *)gvatou(ef->hashtab + 1, ef->firstpage_u), sizeof(ef->nchains));
+ ef->buckets = (Elf_Hashelt *)gvatou(ef->hashtab + 2, ef->firstpage_u);
+ ef->chains = ef->buckets + ef->nbuckets;
+
+ if (lookup_symbol(ef, "__start_set_modmetadata_set", &sym) != 0) {
+ ret = 0;
+ goto out;
+ }
+ p_start = gvatou(sym.st_value, ef->firstpage_u);
+ if (lookup_symbol(ef, "__stop_set_modmetadata_set", &sym) != 0) {
+ ret = ENOENT;
+ goto out;
+ }
+ p_end = gvatou(sym.st_value, ef->firstpage_u);
+ parse_metadata(img, ef, p_start, p_end);
+
+out:
+ return ret;
+}
+
+static uint64_t
+moddata_len(struct preloaded_file *img)
+{
+ struct file_metadata *md;
+ uint64_t len;
+
+ /* Count the kernel image name */
+ len = 8 + roundup(strlen(img->f_name) + 1, sizeof(uint64_t));
+ /* Count the kernel's type */
+ len += 8 + roundup(strlen(img->f_type) + 1, sizeof(uint64_t));
+ /* Count the kernel's virtual address */
+ len += 8 + roundup(sizeof(img->f_addr), sizeof(uint64_t));
+ /* Count the kernel's size */
+ len += 8 + roundup(sizeof(img->f_size), sizeof(uint64_t));
+ /* Count the metadata size */
+ for (md = img->f_metadata; md != NULL; md = md->md_next)
+ len += 8 + roundup(md->md_size, sizeof(uint64_t));
+
+ return len;
+}
+
+#define COPY32(dest, what) \
+ do { \
+ uint32_t w = (what); \
+ memcpy((void *)dest, &w, sizeof(w)); \
+ dest += sizeof(w); \
+ } while (0)
+
+#define COPY_MODINFO(modinfo, dest, val, len) \
+ do { \
+ COPY32(dest, modinfo); \
+ COPY32(dest, len); \
+ memcpy((void *)dest, val, len); \
+ dest += roundup(len, sizeof(uint64_t)); \
+ } while (0)
+
+#define COPY_MODEND(dest) \
+ do { \
+ COPY32(dest, MODINFO_END); \
+ COPY32(dest, 0); \
+ } while (0);
+
+static void
+moddata_copy(vm_offset_t dest, struct preloaded_file *img)
+{
+ struct file_metadata *md;
+
+ COPY_MODINFO(MODINFO_NAME, dest, img->f_name, strlen(img->f_name) + 1);
+ COPY_MODINFO(MODINFO_TYPE, dest, img->f_type, strlen(img->f_type) + 1);
+ COPY_MODINFO(MODINFO_ADDR, dest, &img->f_addr, sizeof(img->f_addr));
+ COPY_MODINFO(MODINFO_SIZE, dest, &img->f_size, sizeof(img->f_size));
+
+ for (md = img->f_metadata; md != NULL; md = md->md_next)
+ COPY_MODINFO(MODINFO_METADATA | md->md_type, dest,
+ md->md_data, md->md_size);
+
+ COPY_MODEND(dest);
+}
+
+static void
+image_addmetadata(struct preloaded_file *img, int type,
+ size_t size, void *addr)
+{
+ struct file_metadata *md;
+
+ md = malloc(sizeof(struct file_metadata) - sizeof(md->md_data) + size);
+ md->md_size = size;
+ md->md_type = type;
+ memcpy(md->md_data, addr, size);
+ md->md_next = img->f_metadata;
+ img->f_metadata = md;
+}
+
+static uint64_t
+elf_hash(const char *name)
+{
+ const unsigned char *p = (const unsigned char *)name;
+ uint64_t h;
+ uint64_t g;
+
+ h = 0;
+ while (*p != '\0') {
+ h = (h << 4) + *p++;
+ if ((g = h & 0xf0000000) != 0)
+ h ^= g >> 24;
+ h &= ~g;
+ }
+
+ return h;
+}
+
+static int
+lookup_symbol(struct elf_file *ef, const char *name, Elf_Sym *symp)
+{
+ Elf_Hashelt symnum;
+ Elf_Sym sym;
+ char *strp;
+ uint64_t hash;
+
+ hash = elf_hash(name);
+ memcpy(&symnum, &ef->buckets[hash % ef->nbuckets], sizeof(symnum));
+
+ while (symnum != STN_UNDEF) {
+ if (symnum >= ef->nchains) {
+ fprintf(stderr, "lookup_symbol: corrupt symbol table\n");
+ return ENOENT;
+ }
+
+ memcpy(&sym, (void *)gvatou(ef->symtab + symnum, ef->firstpage_u), sizeof(sym));
+ if (sym.st_name == 0) {
+ fprintf(stderr, "lookup_symbol: corrupt symbol table\n");
+ return ENOENT;
+ }
+
+ strp = strdup((char *)gvatou(ef->strtab + sym.st_name, ef->firstpage_u));
+ if (strcmp(name, strp) == 0) {
+ free(strp);
+ if (sym.st_shndx != SHN_UNDEF ||
+ (sym.st_value != 0 &&
+ ELF_ST_TYPE(sym.st_info) == STT_FUNC)) {
+ *symp = sym;
+ return 0;
+ }
+ return ENOENT;
+ }
+ free(strp);
+ memcpy(&symnum, &ef->chains[symnum], sizeof(symnum));
+ }
+
+ return ENOENT;
+}
+
+static void
+parse_metadata(struct preloaded_file *img, struct elf_file *ef,
+ Elf_Addr p_startu, Elf_Addr p_endu)
+{
+ struct mod_metadata md;
+ struct mod_version mver;
+ char *s;
+ int modcnt;
+ Elf_Addr v, p;
+
+ modcnt = 0;
+ for (p = p_startu; p < p_endu; p += sizeof(Elf_Addr)) {
+ memcpy(&v, (void *)p, sizeof(v));
+ memcpy(&md, (void *)gvatou(v, ef->firstpage_u), sizeof(md));
+ if (md.md_type == MDT_VERSION) {
+ s = strdup((char *)gvatou(md.md_cval, ef->firstpage_u));
+ memcpy(&mver,
+ (void *)gvatou(md.md_data, ef->firstpage_u),
+ sizeof(mver));
+ image_addmodule(img, s, mver.mv_version);
+ free(s);
+ modcnt++;
+ }
+ }
+
+ if (modcnt == 0) {
+ image_addmodule(img, "kernel", 1);
+ free(s);
+ }
+}
+
+static int
+image_addmodule(struct preloaded_file *img, char *modname, int version)
+{
+ struct kernel_module *mp;
+ struct mod_depend mdepend;
+
+ bzero(&mdepend, sizeof(mdepend));
+ mdepend.md_ver_preferred = version;
+
+ mp = image_findmodule(img, modname, &mdepend);
+ if (mp)
+ return (EEXIST);
+ mp = malloc(sizeof(struct kernel_module));
+ if (mp == NULL)
+ return (ENOMEM);
+
+ bzero(mp, sizeof(struct kernel_module));
+ mp->m_name = strdup(modname);
+ mp->m_version = version;
+ mp->m_fp = img;
+ mp->m_next = img->f_modules;
+ img->f_modules = mp;
+
+ return (0);
+}
+
+static struct kernel_module *
+image_findmodule(struct preloaded_file *img, char *modname,
+ struct mod_depend *verinfo)
+{
+ struct kernel_module *mp, *best;
+ int bestver, mver;
+
+ best = NULL;
+ bestver = 0;
+ for (mp = img->f_modules; mp != NULL; mp = mp->m_next) {
+ if (strcmp(modname, mp->m_name) == 0) {
+ if (verinfo == NULL)
+ return (mp);
+ mver = mp->m_version;
+ if (mver == verinfo->md_ver_preferred)
+ return (mp);
+ if (mver >= verinfo->md_ver_minimum &&
+ mver <= verinfo->md_ver_maximum &&
+ mver > bestver) {
+ best = mp;
+ bestver = mver;
+ }
+ }
+ }
+
+ return (best);
+}

File Metadata

Mime Type
text/plain
Expires
Thu, Dec 25, 9:20 PM (6 h, 26 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
27262670
Default Alt Text
D26976.id82061.diff (528 KB)

Event Timeline