Page MenuHomeFreeBSD

D14289.id39118.diff
No OneTemporary

D14289.id39118.diff

Index: share/man/man4/vmci.4
===================================================================
--- /dev/null
+++ share/man/man4/vmci.4
@@ -0,0 +1,49 @@
+.Dd February 07, 2018
+.Dt VMCI 4
+.Os
+.Sh NAME
+.Nm vmci
+.Nd VMware Virtual Machine Communication Interface
+.Sh SYNOPSIS
+To compile this driver into the kernel,
+place the following line in your
+kernel configuration file:
+.Bd -ragged -offset indent
+.Cd "device vmci"
+.Ed
+.Pp
+Alternatively, to load the driver as a
+module at boot time, place the following line in
+.Xr loader.conf 5 :
+.Bd -literal -offset indent
+if_vmci_load="YES"
+.Ed
+.Sh DESCRIPTION
+The
+.Nm
+driver provides support for the VMware Virtual Machine Communication Interface
+(VMCI) in virtual machines by VMware.
+
+VMCI allows virtual machines to communicate with host kernel modules and the
+VMware hypervisors. User level applications in a virtual machine can use VMCI
+through vSockets (also known as VMCI Sockets and not included in this kernel
+module), a socket address family designed to be compatible with UDP and TCP at
+the interface level. Today, VMCI and vSockets are used by various VMware Tools
+components inside the guest for zero-config, network-less access to VMware
+host services. In addition to this, VMware's users are using vSockets for
+various applications, where network access of the virtual machine is restricted
+or non-existent. Examples of this are VMs communicating with device proxies for
+proprietary hardware running as host applications and automated testing of
+applications running within virtual machines.
+
+In a virtual machine, VMCI is exposed as a regular PCI device. The primary
+communication mechanisms supported are a point-to-point bidirectional transport
+based on a pair of memory-mapped queues, and asynchronous notifications in the
+form of datagrams and doorbells. These features are available to kernel level
+components such as vSockets through the VMCI kernel API. In addition to
+this, the VMCI kernel API provides support for receiving events related to the
+state of the VMCI communication channels, and the virtual machine itself.
+
+For additional information about the use of VMCI and in particular vSockets,
+please refer to the vSocket Programming Guide available at
+https://www.vmware.com/support/developer/vmci-sdk/.
Index: sys/conf/files.amd64
===================================================================
--- sys/conf/files.amd64
+++ sys/conf/files.amd64
@@ -472,6 +472,16 @@
dev/uart/uart_cpu_x86.c optional uart
dev/viawd/viawd.c optional viawd
dev/vmware/vmxnet3/if_vmx.c optional vmx
+dev/vmware/vmci/vmci.c optional vmci
+dev/vmware/vmci/vmci_datagram.c optional vmci
+dev/vmware/vmci/vmci_doorbell.c optional vmci
+dev/vmware/vmci/vmci_driver.c optional vmci
+dev/vmware/vmci/vmci_event.c optional vmci
+dev/vmware/vmci/vmci_hashtable.c optional vmci
+dev/vmware/vmci/vmci_kernel_if.c optional vmci
+dev/vmware/vmci/vmci_qpair.c optional vmci
+dev/vmware/vmci/vmci_queue_pair.c optional vmci
+dev/vmware/vmci/vmci_resource.c optional vmci
dev/wbwd/wbwd.c optional wbwd
dev/xen/pci/xen_acpi_pci.c optional xenhvm
dev/xen/pci/xen_pci.c optional xenhvm
Index: sys/conf/files.i386
===================================================================
--- sys/conf/files.i386
+++ sys/conf/files.i386
@@ -323,6 +323,16 @@
dev/uart/uart_cpu_x86.c optional uart
dev/viawd/viawd.c optional viawd
dev/vmware/vmxnet3/if_vmx.c optional vmx
+dev/vmware/vmci/vmci.c optional vmci
+dev/vmware/vmci/vmci_datagram.c optional vmci
+dev/vmware/vmci/vmci_doorbell.c optional vmci
+dev/vmware/vmci/vmci_driver.c optional vmci
+dev/vmware/vmci/vmci_event.c optional vmci
+dev/vmware/vmci/vmci_hashtable.c optional vmci
+dev/vmware/vmci/vmci_kernel_if.c optional vmci
+dev/vmware/vmci/vmci_qpair.c optional vmci
+dev/vmware/vmci/vmci_queue_pair.c optional vmci
+dev/vmware/vmci/vmci_resource.c optional vmci
dev/acpica/acpi_if.m standard
dev/acpica/acpi_hpet.c optional acpi
dev/acpica/acpi_timer.c optional acpi
Index: sys/dev/vmware/vmci/vmci.h
===================================================================
--- /dev/null
+++ sys/dev/vmware/vmci/vmci.h
@@ -0,0 +1,77 @@
+/*-
+ * Copyright (c) 2018 VMware, Inc. All Rights Reserved.
+ *
+ * SPDX-License-Identifier: (BSD-2-Clause AND GPL-2.0)
+ */
+
+/* Driver for VMware Virtual Machine Communication Interface (VMCI) device. */
+
+#ifndef _VMCI_H_
+#define _VMCI_H_
+
+#include <sys/param.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
+#include <sys/systm.h>
+#include <sys/taskqueue.h>
+
+#include <machine/bus.h>
+
+#include "vmci_datagram.h"
+#include "vmci_kernel_if.h"
+
+/* VMCI device vendor and device ID */
+#define VMCI_VMWARE_VENDOR_ID 0x15AD
+#define VMCI_VMWARE_DEVICE_ID 0x0740
+
+#define VMCI_VERSION 1
+
+struct vmci_dma_alloc {
+ bus_dma_tag_t dma_tag;
+ caddr_t dma_vaddr;
+ bus_addr_t dma_paddr;
+ bus_dmamap_t dma_map;
+ bus_size_t dma_size;
+};
+
+struct vmci_interrupt {
+ struct resource *vmci_irq;
+ int vmci_rid;
+ void *vmci_handler;
+};
+
+struct vmci_softc {
+ device_t vmci_dev;
+
+ struct mtx vmci_spinlock;
+
+ struct resource *vmci_res0;
+ bus_space_tag_t vmci_iot0;
+ bus_space_handle_t vmci_ioh0;
+ unsigned int vmci_ioaddr;
+ struct resource *vmci_res1;
+ bus_space_tag_t vmci_iot1;
+ bus_space_handle_t vmci_ioh1;
+
+ struct vmci_dma_alloc vmci_notifications_bitmap;
+
+ int vmci_num_intr;
+ vmci_intr_type vmci_intr_type;
+ struct vmci_interrupt vmci_intrs[VMCI_MAX_INTRS];
+ struct task vmci_interrupt_dq_task;
+ struct task vmci_interrupt_bm_task;
+
+ struct task vmci_delayed_work_task;
+ struct mtx vmci_delayed_work_lock;
+ vmci_list(vmci_delayed_work_info) vmci_delayed_work_infos;
+
+ unsigned int capabilities;
+};
+
+int vmci_dma_malloc(bus_size_t size, bus_size_t align,
+ struct vmci_dma_alloc *dma);
+void vmci_dma_free(struct vmci_dma_alloc *);
+int vmci_send_datagram(struct vmci_datagram *dg);
+int vmci_schedule_delayed_work_fn(vmci_work_fn *work_fn, void *data);
+
+#endif /* !_VMCI_H_ */
Index: sys/dev/vmware/vmci/vmci.c
===================================================================
--- /dev/null
+++ sys/dev/vmware/vmci/vmci.c
@@ -0,0 +1,1174 @@
+/*-
+ * Copyright (c) 2018 VMware, Inc. All Rights Reserved.
+ *
+ * SPDX-License-Identifier: (BSD-2-Clause AND GPL-2.0)
+ */
+
+/* Driver for VMware Virtual Machine Communication Interface (VMCI) device. */
+
+#include <sys/types.h>
+#include <sys/bus.h>
+#include <sys/kernel.h>
+#include <sys/malloc.h>
+#include <sys/module.h>
+#include <sys/rman.h>
+#include <sys/systm.h>
+
+#include <dev/pci/pcireg.h>
+#include <dev/pci/pcivar.h>
+
+#include <machine/bus.h>
+
+#include "vmci.h"
+#include "vmci_doorbell.h"
+#include "vmci_driver.h"
+#include "vmci_kernel_defs.h"
+#include "vmci_queue_pair.h"
+
+static int vmci_probe(device_t);
+static int vmci_attach(device_t);
+static int vmci_detach(device_t);
+static int vmci_shutdown(device_t);
+
+static int vmci_map_bars(struct vmci_softc *);
+static void vmci_unmap_bars(struct vmci_softc *);
+
+static int vmci_config_capabilities(struct vmci_softc *);
+
+static int vmci_dma_malloc_int(struct vmci_softc *, bus_size_t,
+ bus_size_t, struct vmci_dma_alloc *);
+static void vmci_dma_free_int(struct vmci_softc *,
+ struct vmci_dma_alloc *);
+
+static int vmci_config_interrupts(struct vmci_softc *);
+static int vmci_config_interrupt(struct vmci_softc *);
+static int vmci_check_intr_cnt(struct vmci_softc *);
+static int vmci_allocate_interrupt_resources(struct vmci_softc *);
+static int vmci_setup_interrupts(struct vmci_softc *);
+static void vmci_dismantle_interrupts(struct vmci_softc *);
+static void vmci_interrupt(void *);
+static void vmci_interrupt_bm(void *);
+static void dispatch_datagrams(void *, int);
+static void process_bitmap(void *, int);
+
+static void vmci_delayed_work_fn_cb(void *context, int data);
+
+static device_method_t vmci_methods[] = {
+ /* Device interface. */
+ DEVMETHOD(device_probe, vmci_probe),
+ DEVMETHOD(device_attach, vmci_attach),
+ DEVMETHOD(device_detach, vmci_detach),
+ DEVMETHOD(device_shutdown, vmci_shutdown),
+
+ DEVMETHOD_END
+};
+
+static driver_t vmci_driver = {
+ "vmci", vmci_methods, sizeof(struct vmci_softc)
+};
+
+static devclass_t vmci_devclass;
+DRIVER_MODULE(vmci, pci, vmci_driver, vmci_devclass, 0, 0);
+MODULE_VERSION(vmci, VMCI_VERSION);
+
+MODULE_DEPEND(vmci, pci, 1, 1, 1);
+
+static struct vmci_softc *vmci_sc;
+
+#define LGPFX "vmci: "
+/*
+ * Allocate a buffer for incoming datagrams globally to avoid repeated
+ * allocation in the interrupt handler's atomic context.
+ */
+static uint8_t *data_buffer = NULL;
+static uint32_t data_buffer_size = VMCI_MAX_DG_SIZE;
+
+struct vmci_delayed_work_info {
+ vmci_work_fn *work_fn;
+ void *data;
+ vmci_list_item(vmci_delayed_work_info) entry;
+};
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * vmci_probe --
+ *
+ * Probe to see if the VMCI device is present.
+ *
+ * Results:
+ * BUS_PROBE_DEFAULT if device exists, ENXIO otherwise.
+ *
+ * Side effects:
+ * None.
+ *
+ *------------------------------------------------------------------------------
+ */
+
+static int
+vmci_probe(device_t dev)
+{
+
+ if (pci_get_vendor(dev) == VMCI_VMWARE_VENDOR_ID &&
+ pci_get_device(dev) == VMCI_VMWARE_DEVICE_ID) {
+ device_set_desc(dev,
+ "VMware Virtual Machine Communication Interface");
+
+ return (BUS_PROBE_DEFAULT);
+ }
+
+ return (ENXIO);
+}
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * vmci_attach --
+ *
+ * Attach VMCI device to the system after vmci_probe() has been called and
+ * the device has been detected.
+ *
+ * Results:
+ * 0 if success, ENXIO otherwise.
+ *
+ * Side effects:
+ * None.
+ *
+ *------------------------------------------------------------------------------
+ */
+
+static int
+vmci_attach(device_t dev)
+{
+ struct vmci_softc *sc;
+ int error, i;
+
+ sc = device_get_softc(dev);
+ sc->vmci_dev = dev;
+ vmci_sc = sc;
+
+ data_buffer = NULL;
+ sc->vmci_num_intr = 0;
+ for (i = 0; i < VMCI_MAX_INTRS; i++) {
+ sc->vmci_intrs[i].vmci_irq = NULL;
+ sc->vmci_intrs[i].vmci_handler = NULL;
+ }
+
+ TASK_INIT(&sc->vmci_interrupt_dq_task, 0, dispatch_datagrams, sc);
+ TASK_INIT(&sc->vmci_interrupt_bm_task, 0, process_bitmap, sc);
+
+ TASK_INIT(&sc->vmci_delayed_work_task, 0, vmci_delayed_work_fn_cb, sc);
+
+ pci_enable_busmaster(dev);
+
+ mtx_init(&sc->vmci_spinlock, "VMCI Spinlock", NULL, MTX_SPIN);
+ mtx_init(&sc->vmci_delayed_work_lock, "VMCI Delayed Work Lock",
+ NULL, MTX_DEF);
+
+ error = vmci_map_bars(sc);
+ if (error) {
+ VMCI_LOG_ERROR(LGPFX"Failed to map PCI BARs.\n");
+ goto fail;
+ }
+
+ error = vmci_config_capabilities(sc);
+ if (error) {
+ VMCI_LOG_ERROR(LGPFX"Failed to configure capabilities.\n");
+ goto fail;
+ }
+
+ vmci_list_init(&sc->vmci_delayed_work_infos);
+
+ vmci_components_init();
+ vmci_util_init();
+ error = vmci_qp_guest_endpoints_init();
+ if (error) {
+ VMCI_LOG_ERROR(LGPFX"vmci_qp_guest_endpoints_init failed.\n");
+ goto fail;
+ }
+
+ error = vmci_config_interrupts(sc);
+ if (error)
+ VMCI_LOG_ERROR(LGPFX"Failed to enable interrupts.\n");
+
+fail:
+ if (error) {
+ vmci_detach(dev);
+ return (ENXIO);
+ }
+
+ return (0);
+}
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * vmci_detach --
+ *
+ * Detach the VMCI device.
+ *
+ * Results:
+ * 0
+ *
+ * Side effects:
+ * None.
+ *
+ *------------------------------------------------------------------------------
+ */
+
+static int
+vmci_detach(device_t dev)
+{
+ struct vmci_softc *sc;
+
+ sc = device_get_softc(dev);
+
+ vmci_qp_guest_endpoints_exit();
+ vmci_util_exit();
+
+ vmci_dismantle_interrupts(sc);
+
+ vmci_components_cleanup();
+
+ taskqueue_drain(taskqueue_thread, &sc->vmci_delayed_work_task);
+ mtx_destroy(&sc->vmci_delayed_work_lock);
+
+ if (sc->vmci_res0 != NULL)
+ bus_space_write_4(sc->vmci_iot0, sc->vmci_ioh0,
+ VMCI_CONTROL_ADDR, VMCI_CONTROL_RESET);
+
+ if (sc->vmci_notifications_bitmap.dma_vaddr != NULL)
+ vmci_dma_free(&sc->vmci_notifications_bitmap);
+
+ vmci_unmap_bars(sc);
+
+ mtx_destroy(&sc->vmci_spinlock);
+
+ pci_disable_busmaster(dev);
+
+ return (0);
+}
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * vmci_shutdown --
+ *
+ * This function is called during system shutdown. We don't do anything.
+ *
+ * Results:
+ * 0
+ *
+ * Side effects:
+ * None.
+ *
+ *------------------------------------------------------------------------------
+ */
+
+static int
+vmci_shutdown(device_t dev)
+{
+
+ return (0);
+}
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * vmci_map_bars --
+ *
+ * Maps the PCI I/O and MMIO BARs.
+ *
+ * Results:
+ * 0 on success, ENXIO otherwise.
+ *
+ * Side effects:
+ * None.
+ *
+ *------------------------------------------------------------------------------
+ */
+
+static int
+vmci_map_bars(struct vmci_softc *sc)
+{
+ int rid;
+
+ /* Map the PCI I/O BAR: BAR0 */
+ rid = PCIR_BAR(0);
+ sc->vmci_res0 = bus_alloc_resource_any(sc->vmci_dev, SYS_RES_IOPORT,
+ &rid, RF_ACTIVE);
+ if (sc->vmci_res0 == NULL) {
+ VMCI_LOG_ERROR(LGPFX"Could not map: BAR0\n");
+ return (ENXIO);
+ }
+
+ sc->vmci_iot0 = rman_get_bustag(sc->vmci_res0);
+ sc->vmci_ioh0 = rman_get_bushandle(sc->vmci_res0);
+ sc->vmci_ioaddr = rman_get_start(sc->vmci_res0);
+
+ /* Map the PCI MMIO BAR: BAR1 */
+ rid = PCIR_BAR(1);
+ sc->vmci_res1 = bus_alloc_resource_any(sc->vmci_dev, SYS_RES_MEMORY,
+ &rid, RF_ACTIVE);
+ if (sc->vmci_res1 == NULL) {
+ VMCI_LOG_ERROR(LGPFX"Could not map: BAR1\n");
+ return (ENXIO);
+ }
+
+ sc->vmci_iot1 = rman_get_bustag(sc->vmci_res1);
+ sc->vmci_ioh1 = rman_get_bushandle(sc->vmci_res1);
+
+ return (0);
+}
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * vmci_unmap_bars --
+ *
+ * Unmaps the VMCI PCI I/O and MMIO BARs.
+ *
+ * Results:
+ * None.
+ *
+ * Side effects:
+ * None.
+ *
+ *------------------------------------------------------------------------------
+ */
+
+static void
+vmci_unmap_bars(struct vmci_softc *sc)
+{
+ int rid;
+
+ if (sc->vmci_res0 != NULL) {
+ rid = PCIR_BAR(0);
+ bus_release_resource(sc->vmci_dev, SYS_RES_IOPORT, rid,
+ sc->vmci_res0);
+ sc->vmci_res0 = NULL;
+ }
+
+ if (sc->vmci_res1 != NULL) {
+ rid = PCIR_BAR(1);
+ bus_release_resource(sc->vmci_dev, SYS_RES_MEMORY, rid,
+ sc->vmci_res1);
+ sc->vmci_res1 = NULL;
+ }
+}
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * vmci_config_capabilities --
+ *
+ * Check the VMCI device capabilities and configure the device accordingly.
+ *
+ * Results:
+ * 0 if success, ENODEV otherwise.
+ *
+ * Side effects:
+ * Device capabilities are enabled.
+ *
+ *------------------------------------------------------------------------------
+ */
+
+static int
+vmci_config_capabilities(struct vmci_softc *sc)
+{
+ unsigned long bitmap_PPN;
+ int error;
+
+ /*
+ * Verify that the VMCI device supports the capabilities that we
+ * need. Datagrams are necessary and notifications will be used
+ * if the device supports it.
+ */
+ sc->capabilities = bus_space_read_4(sc->vmci_iot0, sc->vmci_ioh0,
+ VMCI_CAPS_ADDR);
+
+ if ((sc->capabilities & VMCI_CAPS_DATAGRAM) == 0) {
+ VMCI_LOG_ERROR(LGPFX"VMCI device does not support "
+ "datagrams.\n");
+ return (ENODEV);
+ }
+
+ if (sc->capabilities & VMCI_CAPS_NOTIFICATIONS) {
+ sc->capabilities = VMCI_CAPS_DATAGRAM;
+ error = vmci_dma_malloc(PAGE_SIZE, 1,
+ &sc->vmci_notifications_bitmap);
+ if (error)
+ VMCI_LOG_ERROR(LGPFX"Failed to alloc memory for "
+ "notification bitmap.\n");
+ else {
+ memset(sc->vmci_notifications_bitmap.dma_vaddr, 0,
+ PAGE_SIZE);
+ sc->capabilities |= VMCI_CAPS_NOTIFICATIONS;
+ }
+ } else
+ sc->capabilities = VMCI_CAPS_DATAGRAM;
+
+ /* Let the host know which capabilities we intend to use. */
+ bus_space_write_4(sc->vmci_iot0, sc->vmci_ioh0,
+ VMCI_CAPS_ADDR, sc->capabilities);
+
+ /*
+ * Register notification bitmap with device if that capability is
+ * used.
+ */
+ if (sc->capabilities & VMCI_CAPS_NOTIFICATIONS) {
+ bitmap_PPN =
+ sc->vmci_notifications_bitmap.dma_paddr >> PAGE_SHIFT;
+ vmci_register_notification_bitmap(bitmap_PPN);
+ }
+
+ /* Check host capabilities. */
+ if (!vmci_check_host_capabilities())
+ return (ENODEV);
+
+ return (0);
+}
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * vmci_dmamap_cb --
+ *
+ * Callback to receive mapping information resulting from the load of a
+ * bus_dmamap_t via bus_dmamap_load()
+ *
+ * Results:
+ * None.
+ *
+ * Side effects:
+ * None.
+ *
+ *------------------------------------------------------------------------------
+ */
+
+static void
+vmci_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
+{
+ bus_addr_t *baddr = arg;
+
+ if (error == 0)
+ *baddr = segs->ds_addr;
+}
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * vmci_dma_malloc_int --
+ *
+ * Internal function that allocates DMA memory.
+ *
+ * Results:
+ * 0 if success.
+ * ENOMEM if insufficient memory.
+ * EINPROGRESS if mapping is deferred.
+ * EINVAL if the request was invalid.
+ *
+ * Side effects:
+ * DMA memory is allocated.
+ *
+ *------------------------------------------------------------------------------
+ */
+
+static int
+vmci_dma_malloc_int(struct vmci_softc *sc, bus_size_t size, bus_size_t align,
+ struct vmci_dma_alloc *dma)
+{
+ int error;
+
+ bzero(dma, sizeof(struct vmci_dma_alloc));
+
+ error = bus_dma_tag_create(bus_get_dma_tag(vmci_sc->vmci_dev),
+ align, 0, /* alignment, bounds */
+ BUS_SPACE_MAXADDR, /* lowaddr */
+ BUS_SPACE_MAXADDR, /* highaddr */
+ NULL, NULL, /* filter, filterarg */
+ size, /* maxsize */
+ 1, /* nsegments */
+ size, /* maxsegsize */
+ BUS_DMA_ALLOCNOW, /* flags */
+ NULL, /* lockfunc */
+ NULL, /* lockfuncarg */
+ &dma->dma_tag);
+ if (error) {
+ VMCI_LOG_ERROR(LGPFX"bus_dma_tag_create failed: %d\n", error);
+ goto fail;
+ }
+
+ error = bus_dmamem_alloc(dma->dma_tag, (void **)&dma->dma_vaddr,
+ BUS_DMA_ZERO | BUS_DMA_NOWAIT, &dma->dma_map);
+ if (error) {
+ VMCI_LOG_ERROR(LGPFX"bus_dmamem_alloc failed: %d\n", error);
+ goto fail;
+ }
+
+ error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
+ size, vmci_dmamap_cb, &dma->dma_paddr, BUS_DMA_NOWAIT);
+ if (error) {
+ VMCI_LOG_ERROR(LGPFX"bus_dmamap_load failed: %d\n", error);
+ goto fail;
+ }
+
+ dma->dma_size = size;
+
+fail:
+ if (error)
+ vmci_dma_free(dma);
+
+ return (error);
+}
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * vmci_dma_malloc --
+ *
+ * This function is a wrapper around vmci_dma_malloc_int for callers
+ * outside of this module. Since we only support a single VMCI device, this
+ * wrapper provides access to the device softc structure.
+ *
+ * Results:
+ * 0 if success.
+ * ENOMEM if insufficient memory.
+ * EINPROGRESS if mapping is deferred.
+ * EINVAL if the request was invalid.
+ *
+ * Side effects:
+ * DMA memory is allocated.
+ *
+ *------------------------------------------------------------------------------
+ */
+
+int
+vmci_dma_malloc(bus_size_t size, bus_size_t align, struct vmci_dma_alloc *dma)
+{
+
+ return (vmci_dma_malloc_int(vmci_sc, size, align, dma));
+}
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * vmci_dma_free_int --
+ *
+ * Internal function that frees DMA memory.
+ *
+ * Results:
+ * None.
+ *
+ * Side effects:
+ * Frees DMA memory.
+ *
+ *------------------------------------------------------------------------------
+ */
+
+static void
+vmci_dma_free_int(struct vmci_softc *sc, struct vmci_dma_alloc *dma)
+{
+
+ if (dma->dma_tag != NULL) {
+ if (dma->dma_paddr != 0) {
+ bus_dmamap_sync(dma->dma_tag, dma->dma_map,
+ BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
+ bus_dmamap_unload(dma->dma_tag, dma->dma_map);
+ }
+
+ if (dma->dma_vaddr != NULL)
+ bus_dmamem_free(dma->dma_tag, dma->dma_vaddr,
+ dma->dma_map);
+
+ bus_dma_tag_destroy(dma->dma_tag);
+ }
+ bzero(dma, sizeof(struct vmci_dma_alloc));
+}
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * vmci_dma_free --
+ *
+ * This function is a wrapper around vmci_dma_free_int for callers outside
+ * of this module. Since we only support a single VMCI device, this wrapper
+ * provides access to the device softc structure.
+ *
+ * Results:
+ * None.
+ *
+ * Side effects:
+ * Frees DMA memory.
+ *
+ *------------------------------------------------------------------------------
+ */
+
+void
+vmci_dma_free(struct vmci_dma_alloc *dma)
+{
+
+ vmci_dma_free_int(vmci_sc, dma);
+}
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * vmci_config_interrupts --
+ *
+ * Configures and enables interrupts. Try to configure MSI-X. If this fails,
+ * try to configure MSI. If even this fails, try legacy interrupts.
+ *
+ * Results:
+ * 0 if success.
+ * ENOMEM if insufficient memory.
+ * ENODEV if the device doesn't support interrupts.
+ * ENXIO if the device configuration failed.
+ *
+ * Side effects:
+ * Interrupts get enabled if successful.
+ *
+ *------------------------------------------------------------------------------
+ */
+
+static int
+vmci_config_interrupts(struct vmci_softc *sc)
+{
+ int error;
+
+ data_buffer = malloc(data_buffer_size, M_DEVBUF, M_ZERO | M_NOWAIT);
+ if (data_buffer == NULL)
+ return (ENOMEM);
+
+ sc->vmci_intr_type = VMCI_INTR_TYPE_MSIX;
+ error = vmci_config_interrupt(sc);
+ if (error) {
+ sc->vmci_intr_type = VMCI_INTR_TYPE_MSI;
+ error = vmci_config_interrupt(sc);
+ }
+ if (error) {
+ sc->vmci_intr_type = VMCI_INTR_TYPE_INTX;
+ error = vmci_config_interrupt(sc);
+ }
+ if (error)
+ return (error);
+
+ /* Enable specific interrupt bits. */
+ if (sc->capabilities & VMCI_CAPS_NOTIFICATIONS)
+ bus_space_write_4(sc->vmci_iot0, sc->vmci_ioh0,
+ VMCI_IMR_ADDR, VMCI_IMR_DATAGRAM | VMCI_IMR_NOTIFICATION);
+ else
+ bus_space_write_4(sc->vmci_iot0, sc->vmci_ioh0,
+ VMCI_IMR_ADDR, VMCI_IMR_DATAGRAM);
+
+ /* Enable interrupts. */
+ bus_space_write_4(sc->vmci_iot0, sc->vmci_ioh0,
+ VMCI_CONTROL_ADDR, VMCI_CONTROL_INT_ENABLE);
+
+ return (0);
+}
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * vmci_config_interrupt --
+ *
+ * Check the number of interrupts supported, allocate resources and setup
+ * interrupts.
+ *
+ * Results:
+ * 0 if success.
+ * ENOMEM if insufficient memory.
+ * ENODEV if the device doesn't support interrupts.
+ * ENXIO if the device configuration failed.
+ *
+ * Side effects:
+ * Resources get allocated and interrupts get setup (but not enabled) if
+ * successful.
+ *
+ *------------------------------------------------------------------------------
+ */
+
+static int
+vmci_config_interrupt(struct vmci_softc *sc)
+{
+ int error;
+
+ error = vmci_check_intr_cnt(sc);
+ if (error)
+ return (error);
+
+ error = vmci_allocate_interrupt_resources(sc);
+ if (error)
+ return (error);
+
+ error = vmci_setup_interrupts(sc);
+ if (error)
+ return (error);
+
+ return (0);
+}
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * vmci_check_intr_cnt --
+ *
+ * Check the number of interrupts supported by the device and ask PCI bus
+ * to allocate appropriate number of interrupts.
+ *
+ * Results:
+ * 0 if success.
+ * ENODEV if the device doesn't support any interrupts.
+ * ENXIO if the device configuration failed.
+ *
+ * Side effects:
+ * Resources get allocated on success.
+ *
+ *------------------------------------------------------------------------------
+ */
+
+static int
+vmci_check_intr_cnt(struct vmci_softc *sc)
+{
+
+ if (sc->vmci_intr_type == VMCI_INTR_TYPE_INTX) {
+ sc->vmci_num_intr = 1;
+ return (0);
+ }
+
+ /*
+ * Make sure that the device supports the required number of MSI/MSI-X
+ * messages. We try for 2 MSI-X messages but 1 is good too. We need at
+ * least 1 MSI message.
+ */
+ sc->vmci_num_intr = (sc->vmci_intr_type == VMCI_INTR_TYPE_MSIX) ?
+ pci_msix_count(sc->vmci_dev) : pci_msi_count(sc->vmci_dev);
+
+ if (!sc->vmci_num_intr) {
+ VMCI_LOG_ERROR(LGPFX"Device does not support any interrupt"
+ " messages");
+ return (ENODEV);
+ }
+
+ sc->vmci_num_intr = (sc->vmci_intr_type == VMCI_INTR_TYPE_MSIX) ?
+ VMCI_MAX_INTRS : 1;
+ if (sc->vmci_intr_type == VMCI_INTR_TYPE_MSIX) {
+ if (pci_alloc_msix(sc->vmci_dev, &sc->vmci_num_intr))
+ return (ENXIO);
+ } else if (sc->vmci_intr_type == VMCI_INTR_TYPE_MSI) {
+ if (pci_alloc_msi(sc->vmci_dev, &sc->vmci_num_intr))
+ return (ENXIO);
+ }
+
+ return (0);
+}
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * vmci_allocate_interrupt_resources --
+ *
+ * Allocate resources necessary for interrupts.
+ *
+ * Results:
+ * 0 if success, ENXIO otherwise.
+ *
+ * Side effects:
+ * Resources get allocated on success.
+ *
+ *------------------------------------------------------------------------------
+ */
+
+static int
+vmci_allocate_interrupt_resources(struct vmci_softc *sc)
+{
+ struct resource *irq;
+ int flags, i, rid;
+
+ flags = RF_ACTIVE;
+ flags |= (sc->vmci_num_intr == 1) ? RF_SHAREABLE : 0;
+ rid = (sc->vmci_intr_type == VMCI_INTR_TYPE_INTX) ? 0 : 1;
+
+ for (i = 0; i < sc->vmci_num_intr; i++, rid++) {
+ irq = bus_alloc_resource_any(sc->vmci_dev, SYS_RES_IRQ, &rid,
+ flags);
+ if (irq == NULL)
+ return (ENXIO);
+ sc->vmci_intrs[i].vmci_irq = irq;
+ sc->vmci_intrs[i].vmci_rid = rid;
+ }
+
+ return (0);
+}
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * vmci_setup_interrupts --
+ *
+ * Sets up the interrupts.
+ *
+ * Results:
+ * 0 if success, appropriate error code from bus_setup_intr otherwise.
+ *
+ * Side effects:
+ * Interrupt handler gets attached.
+ *
+ *------------------------------------------------------------------------------
+ */
+
+static int
+vmci_setup_interrupts(struct vmci_softc *sc)
+{
+ struct vmci_interrupt *intr;
+ int error, flags;
+
+ flags = INTR_TYPE_NET | INTR_MPSAFE;
+ if (sc->vmci_num_intr > 1)
+ flags |= INTR_EXCL;
+
+ intr = &sc->vmci_intrs[0];
+ error = bus_setup_intr(sc->vmci_dev, intr->vmci_irq, flags, NULL,
+ vmci_interrupt, NULL, &intr->vmci_handler);
+ if (error)
+ return (error);
+ bus_describe_intr(sc->vmci_dev, intr->vmci_irq, intr->vmci_handler,
+ "vmci_interrupt");
+
+ if (sc->vmci_num_intr == 2) {
+ intr = &sc->vmci_intrs[1];
+ error = bus_setup_intr(sc->vmci_dev, intr->vmci_irq, flags,
+ NULL, vmci_interrupt_bm, NULL, &intr->vmci_handler);
+ if (error)
+ return (error);
+ bus_describe_intr(sc->vmci_dev, intr->vmci_irq,
+ intr->vmci_handler, "vmci_interrupt_bm");
+ }
+
+ return (0);
+}
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * vmci_interrupt --
+ *
+ * Interrupt handler for legacy or MSI interrupt, or for first MSI-X
+ * interrupt (vector VMCI_INTR_DATAGRAM).
+ *
+ * Results:
+ * None.
+ *
+ * Side effects:
+ * None.
+ *
+ *------------------------------------------------------------------------------
+ */
+
+static void
+vmci_interrupt(void *arg)
+{
+
+ if (vmci_sc->vmci_num_intr == 2)
+ taskqueue_enqueue(taskqueue_swi,
+ &vmci_sc->vmci_interrupt_dq_task);
+ else {
+ unsigned int icr;
+
+ icr = inl(vmci_sc->vmci_ioaddr + VMCI_ICR_ADDR);
+ if (icr == 0 || icr == 0xffffffff)
+ return;
+ if (icr & VMCI_ICR_DATAGRAM) {
+ taskqueue_enqueue(taskqueue_swi,
+ &vmci_sc->vmci_interrupt_dq_task);
+ icr &= ~VMCI_ICR_DATAGRAM;
+ }
+ if (icr & VMCI_ICR_NOTIFICATION) {
+ taskqueue_enqueue(taskqueue_swi,
+ &vmci_sc->vmci_interrupt_bm_task);
+ icr &= ~VMCI_ICR_NOTIFICATION;
+ }
+ if (icr != 0)
+ VMCI_LOG_INFO(LGPFX"Ignoring unknown interrupt "
+ "cause");
+ }
+}
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * vmci_interrupt_bm --
+ *
+ * Interrupt handler for MSI-X interrupt vector VMCI_INTR_NOTIFICATION,
+ * which is for the notification bitmap. Will only get called if we are
+ * using MSI-X with exclusive vectors.
+ *
+ * Results:
+ * None.
+ *
+ * Side effects:
+ * None.
+ *
+ *------------------------------------------------------------------------------
+ */
+
+static void
+vmci_interrupt_bm(void *arg)
+{
+
+ ASSERT(vmci_sc->vmci_num_intr == 2);
+ taskqueue_enqueue(taskqueue_swi, &vmci_sc->vmci_interrupt_bm_task);
+}
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * dispatch_datagrams --
+ *
+ * Reads and dispatches incoming datagrams.
+ *
+ * Results:
+ * None.
+ *
+ * Side effects:
+ * Reads data from the device.
+ *
+ *------------------------------------------------------------------------------
+ */
+
+static void
+dispatch_datagrams(void *context, int data)
+{
+
+ if (data_buffer == NULL)
+ VMCI_LOG_INFO(LGPFX"dispatch_datagrams(): no buffer "
+ "present");
+
+ vmci_read_datagrams_from_port((vmci_io_handle) 0,
+ vmci_sc->vmci_ioaddr + VMCI_DATA_IN_ADDR,
+ data_buffer, data_buffer_size);
+}
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * process_bitmap --
+ *
+ * Scans the notification bitmap for raised flags, clears them and handles
+ * the notifications.
+ *
+ * Results:
+ * None.
+ *
+ * Side effects:
+ * None.
+ *
+ *------------------------------------------------------------------------------
+ */
+
+static void
+process_bitmap(void *context, int data)
+{
+
+ if (vmci_sc->vmci_notifications_bitmap.dma_vaddr == NULL)
+ VMCI_LOG_INFO(LGPFX"process_bitmaps(): no bitmap present");
+
+ vmci_scan_notification_bitmap(
+ vmci_sc->vmci_notifications_bitmap.dma_vaddr);
+}
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * vmci_dismantle_interrupts --
+ *
+ * Releases resources, detaches the interrupt handler and drains the task
+ * queue.
+ *
+ * Results:
+ * None.
+ *
+ * Side effects:
+ * No more interrupts.
+ *
+ *------------------------------------------------------------------------------
+ */
+
+static void
+vmci_dismantle_interrupts(struct vmci_softc *sc)
+{
+ struct vmci_interrupt *intr;
+ int i;
+
+ for (i = 0; i < sc->vmci_num_intr; i++) {
+ intr = &sc->vmci_intrs[i];
+ if (intr->vmci_handler != NULL) {
+ bus_teardown_intr(sc->vmci_dev, intr->vmci_irq,
+ intr->vmci_handler);
+ intr->vmci_handler = NULL;
+ }
+ if (intr->vmci_irq != NULL) {
+ bus_release_resource(sc->vmci_dev, SYS_RES_IRQ,
+ intr->vmci_rid, intr->vmci_irq);
+ intr->vmci_irq = NULL;
+ intr->vmci_rid = -1;
+ }
+ }
+
+ if ((sc->vmci_intr_type != VMCI_INTR_TYPE_INTX) &&
+ (sc->vmci_num_intr))
+ pci_release_msi(sc->vmci_dev);
+
+ taskqueue_drain(taskqueue_swi, &sc->vmci_interrupt_dq_task);
+ taskqueue_drain(taskqueue_swi, &sc->vmci_interrupt_bm_task);
+
+ if (data_buffer != NULL)
+ free(data_buffer, M_DEVBUF);
+}
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * vmci_delayed_work_fn_cb --
+ *
+ * Callback function that executes the queued up delayed work functions.
+ *
+ * Results:
+ * None.
+ *
+ * Side effects:
+ * None.
+ *
+ *------------------------------------------------------------------------------
+ */
+
+static void
+vmci_delayed_work_fn_cb(void *context, int data)
+{
+ vmci_list(vmci_delayed_work_info) temp_list;
+
+ vmci_list_init(&temp_list);
+
+ /*
+ * Swap vmci_delayed_work_infos list with the empty temp_list while
+ * holding a lock. vmci_delayed_work_infos would then be an empty list
+ * and temp_list would contain the elements from the original
+ * vmci_delayed_work_infos. Finally, iterate through temp_list
+ * executing the delayed callbacks.
+ */
+
+ mtx_lock(&vmci_sc->vmci_delayed_work_lock);
+ vmci_list_swap(&temp_list, &vmci_sc->vmci_delayed_work_infos,
+ vmci_delayed_work_info, entry);
+ mtx_unlock(&vmci_sc->vmci_delayed_work_lock);
+
+ while (!vmci_list_empty(&temp_list)) {
+ struct vmci_delayed_work_info *delayed_work_info =
+ vmci_list_first(&temp_list);
+
+ delayed_work_info->work_fn(delayed_work_info->data);
+
+ vmci_list_remove(delayed_work_info, entry);
+ vmci_free_kernel_mem(delayed_work_info,
+ sizeof(*delayed_work_info));
+ }
+}
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * vmci_schedule_delayed_work_fn --
+ *
+ * Schedule the specified callback.
+ *
+ * Results:
+ * 0 if success, error code otherwise.
+ *
+ * Side effects:
+ * None.
+ *
+ *------------------------------------------------------------------------------
+ */
+
+int
+vmci_schedule_delayed_work_fn(vmci_work_fn *work_fn, void *data)
+{
+ struct vmci_delayed_work_info *delayed_work_info;
+
+ delayed_work_info = vmci_alloc_kernel_mem(sizeof(*delayed_work_info),
+ VMCI_MEMORY_ATOMIC);
+
+ if (!delayed_work_info)
+ return (VMCI_ERROR_NO_MEM);
+
+ delayed_work_info->work_fn = work_fn;
+ delayed_work_info->data = data;
+ mtx_lock(&vmci_sc->vmci_delayed_work_lock);
+ vmci_list_insert(&vmci_sc->vmci_delayed_work_infos,
+ delayed_work_info, entry);
+ mtx_unlock(&vmci_sc->vmci_delayed_work_lock);
+
+ taskqueue_enqueue(taskqueue_thread,
+ &vmci_sc->vmci_delayed_work_task);
+
+ return (VMCI_SUCCESS);
+}
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * vmci_send_datagram --
+ *
+ * VM to hypervisor call mechanism.
+ *
+ * Results:
+ * The result of the hypercall.
+ *
+ * Side effects:
+ * None.
+ *
+ *------------------------------------------------------------------------------
+ */
+
+int
+vmci_send_datagram(struct vmci_datagram *dg)
+{
+ int result;
+
+ if (dg == NULL)
+ return (VMCI_ERROR_INVALID_ARGS);
+
+ /*
+ * Need to acquire spinlock on the device because
+ * the datagram data may be spread over multiple pages and the monitor
+ * may interleave device user rpc calls from multiple VCPUs. Acquiring
+ * the spinlock precludes that possibility. Disabling interrupts to
+ * avoid incoming datagrams during a "rep out" and possibly landing up
+ * in this function.
+ */
+ mtx_lock_spin(&vmci_sc->vmci_spinlock);
+
+ /*
+ * Send the datagram and retrieve the return value from the result
+ * register.
+ */
+ __asm__ __volatile__(
+ "cld\n\t"
+ "rep outsb\n\t"
+ : /* No output. */
+ : "d"(vmci_sc->vmci_ioaddr + VMCI_DATA_OUT_ADDR),
+ "c"(VMCI_DG_SIZE(dg)), "S"(dg)
+ );
+
+ /*
+ * XXX: Should read result high port as well when updating handlers to
+ * return 64bit.
+ */
+
+ result = bus_space_read_4(vmci_sc->vmci_iot0,
+ vmci_sc->vmci_ioh0, VMCI_RESULT_LOW_ADDR);
+ mtx_unlock_spin(&vmci_sc->vmci_spinlock);
+
+ return (result);
+}
Index: sys/dev/vmware/vmci/vmci_call_defs.h
===================================================================
--- /dev/null
+++ sys/dev/vmware/vmci/vmci_call_defs.h
@@ -0,0 +1,242 @@
+/*-
+ * Copyright (c) 2018 VMware, Inc. All Rights Reserved.
+ *
+ * SPDX-License-Identifier: (BSD-2-Clause AND GPL-2.0)
+ */
+
+#ifndef _VMCI_CALL_DEFS_H_
+#define _VMCI_CALL_DEFS_H_
+
+#include "vmci_defs.h"
+
+/*
+ * All structs here are an integral size of their largest member, ie. a struct
+ * with at least one 8-byte member will have a size that is an integral of 8.
+ * A struct which has a largest member of size 4 will have a size that is an
+ * integral of 4.
+ */
+
+/*
+ * Base struct for vmci datagrams.
+ */
+struct vmci_datagram {
+ struct vmci_handle dst;
+ struct vmci_handle src;
+ uint64_t payload_size;
+};
+
+/*
+ * Second flag is for creating a well-known handle instead of a per context
+ * handle. Next flag is for deferring datagram delivery, so that the
+ * datagram callback is invoked in a delayed context (not interrupt context).
+ */
+#define VMCI_FLAG_DG_NONE 0
+#define VMCI_FLAG_WELLKNOWN_DG_HND 0x1
+#define VMCI_FLAG_ANYCID_DG_HND 0x2
+#define VMCI_FLAG_DG_DELAYED_CB 0x4
+
+/* Event callback should fire in a delayed context (not interrupt context.) */
+#define VMCI_FLAG_EVENT_NONE 0
+#define VMCI_FLAG_EVENT_DELAYED_CB 0x1
+
+/*
+ * Maximum supported size of a VMCI datagram for routable datagrams.
+ * Datagrams going to the hypervisor are allowed to be larger.
+ */
+#define VMCI_MAX_DG_SIZE \
+ (17 * 4096)
+#define VMCI_MAX_DG_PAYLOAD_SIZE \
+ (VMCI_MAX_DG_SIZE - sizeof(struct vmci_datagram))
+#define VMCI_DG_PAYLOAD(_dg) \
+ (void *)((char *)(_dg) + sizeof(struct vmci_datagram))
+#define VMCI_DG_HEADERSIZE \
+ sizeof(struct vmci_datagram)
+#define VMCI_DG_SIZE(_dg) \
+ (VMCI_DG_HEADERSIZE + (size_t)(_dg)->payload_size)
+#define VMCI_DG_SIZE_ALIGNED(_dg) \
+ ((VMCI_DG_SIZE(_dg) + 7) & (size_t)~7)
+
+/*
+ * Struct used for querying, via VMCI_RESOURCES_QUERY, the availability of
+ * hypervisor resources.
+ * Struct size is 16 bytes. All fields in struct are aligned to their natural
+ * alignment.
+ */
+struct vmci_resources_query_hdr {
+ struct vmci_datagram hdr;
+ uint32_t num_resources;
+ uint32_t _padding;
+};
+
+/*
+ * Convenience struct for negotiating vectors. Must match layout of
+ * vmci_resource_query_hdr minus the struct vmci_datagram header.
+ */
+struct vmci_resources_query_msg {
+ uint32_t num_resources;
+ uint32_t _padding;
+ vmci_resource resources[1];
+};
+
+/*
+ * Struct used for setting the notification bitmap. All fields in struct are
+ * aligned to their natural alignment.
+ */
+struct vmci_notify_bitmap_set_msg {
+ struct vmci_datagram hdr;
+ PPN bitmap_ppn;
+ uint32_t _pad;
+};
+
+/*
+ * Struct used for linking a doorbell handle with an index in the notify
+ * bitmap. All fields in struct are aligned to their natural alignment.
+ */
+struct vmci_doorbell_link_msg {
+ struct vmci_datagram hdr;
+ struct vmci_handle handle;
+ uint64_t notify_idx;
+};
+
+/*
+ * Struct used for unlinking a doorbell handle from an index in the notify
+ * bitmap. All fields in struct are aligned to their natural alignment.
+ */
+struct vmci_doorbell_unlink_msg {
+ struct vmci_datagram hdr;
+ struct vmci_handle handle;
+};
+
+/*
+ * Struct used for generating a notification on a doorbell handle. All fields
+ * in struct are aligned to their natural alignment.
+ */
+struct vmci_doorbell_notify_msg {
+ struct vmci_datagram hdr;
+ struct vmci_handle handle;
+};
+
+/*
+ * This struct is used to contain data for events. Size of this struct is a
+ * multiple of 8 bytes, and all fields are aligned to their natural alignment.
+ */
+struct vmci_event_data {
+ vmci_event_type event; /* 4 bytes. */
+ uint32_t _pad;
+ /*
+ * Event payload is put here.
+ */
+};
+
+/* Callback needed for correctly waiting on events. */
+
+typedef int
+(*vmci_datagram_recv_cb)(void *client_data, struct vmci_datagram *msg);
+
+/*
+ * We use the following inline function to access the payload data associated
+ * with an event data.
+ */
+
+static inline void *
+vmci_event_data_payload(struct vmci_event_data *ev_data)
+{
+
+ return ((void *)((char *)ev_data + sizeof(*ev_data)));
+}
+
+/*
+ * Define the different VMCI_EVENT payload data types here. All structs must
+ * be a multiple of 8 bytes, and fields must be aligned to their natural
+ * alignment.
+ */
+struct vmci_event_payload_context {
+ vmci_id context_id; /* 4 bytes. */
+ uint32_t _pad;
+};
+
+struct vmci_event_payload_qp {
+ /* QueuePair handle. */
+ struct vmci_handle handle;
+ /* Context id of attaching/detaching VM. */
+ vmci_id peer_id;
+ uint32_t _pad;
+};
+
+/*
+ * We define the following struct to get the size of the maximum event data
+ * the hypervisor may send to the guest. If adding a new event payload type
+ * above, add it to the following struct too (inside the union).
+ */
+struct vmci_event_data_max {
+ struct vmci_event_data event_data;
+ union {
+ struct vmci_event_payload_context context_payload;
+ struct vmci_event_payload_qp qp_payload;
+ } ev_data_payload;
+};
+
+/*
+ * Struct used for VMCI_EVENT_SUBSCRIBE/UNSUBSCRIBE and VMCI_EVENT_HANDLER
+ * messages. Struct size is 32 bytes. All fields in struct are aligned to
+ * their natural alignment.
+ */
+struct vmci_event_msg {
+ struct vmci_datagram hdr;
+ struct vmci_event_data event_data; /* Has event type & payload. */
+ /*
+ * Payload gets put here.
+ */
+};
+
+/*
+ * We use the following inline function to access the payload data associated
+ * with an event message.
+ */
+
+static inline void *
+vmci_event_msg_payload(struct vmci_event_msg *e_msg)
+{
+
+ return (vmci_event_data_payload(&e_msg->event_data));
+}
+
+/* Flags for VMCI QueuePair API. */
+#define VMCI_QPFLAG_ATTACH_ONLY \
+ 0x1 /* Fail alloc if QP not created by peer. */
+#define VMCI_QPFLAG_LOCAL \
+ 0x2 /* Only allow attaches from local context. */
+#define VMCI_QPFLAG_NONBLOCK \
+ 0x4 /* Host won't block when guest is quiesced. */
+
+/* For asymmetric queuepairs, update as new flags are added. */
+#define VMCI_QP_ASYMM \
+ VMCI_QPFLAG_NONBLOCK
+#define VMCI_QP_ASYMM_PEER \
+ (VMCI_QPFLAG_ATTACH_ONLY | VMCI_QP_ASYMM)
+
+/* Update the following (bitwise OR flags) while adding new flags. */
+#define VMCI_QP_ALL_FLAGS \
+ (VMCI_QPFLAG_ATTACH_ONLY | VMCI_QPFLAG_LOCAL | VMCI_QPFLAG_NONBLOCK)
+
+/*
+ * Structs used for QueuePair alloc and detach messages. We align fields of
+ * these structs to 64 bit boundaries.
+ */
+struct vmci_queue_pair_alloc_msg {
+ struct vmci_datagram hdr;
+ struct vmci_handle handle;
+ vmci_id peer; /* 32bit field. */
+ uint32_t flags;
+ uint64_t produce_size;
+ uint64_t consume_size;
+ uint64_t num_ppns;
+ /* List of PPNs placed here. */
+};
+
+struct vmci_queue_pair_detach_msg {
+ struct vmci_datagram hdr;
+ struct vmci_handle handle;
+};
+
+#endif /* !_VMCI_CALL_DEFS_H_ */
Index: sys/dev/vmware/vmci/vmci_datagram.h
===================================================================
--- /dev/null
+++ sys/dev/vmware/vmci/vmci_datagram.h
@@ -0,0 +1,24 @@
+/*-
+ * Copyright (c) 2018 VMware, Inc. All Rights Reserved.
+ *
+ * SPDX-License-Identifier: (BSD-2-Clause AND GPL-2.0)
+ */
+
+/* Internal functions in the VMCI Simple Datagram API */
+
+#ifndef _VMCI_DATAGRAM_H_
+#define _VMCI_DATAGRAM_H_
+
+#include "vmci_call_defs.h"
+
+/* Datagram API for non-public use. */
+int vmci_datagram_dispatch(vmci_id context_id, struct vmci_datagram *dg);
+int vmci_datagram_invoke_guest_handler(struct vmci_datagram *dg);
+int vmci_datagram_get_priv_flags(struct vmci_handle handle,
+ vmci_privilege_flags *priv_flags);
+
+/* Misc. */
+void vmci_datagram_sync(void);
+bool vmci_datagram_check_host_capabilities(void);
+
+#endif /* !_VMCI_DATAGRAM_H_ */
Index: sys/dev/vmware/vmci/vmci_datagram.c
===================================================================
--- /dev/null
+++ sys/dev/vmware/vmci/vmci_datagram.c
@@ -0,0 +1,647 @@
+/*-
+ * Copyright (c) 2018 VMware, Inc. All Rights Reserved.
+ *
+ * SPDX-License-Identifier: (BSD-2-Clause AND GPL-2.0)
+ */
+
+/* This file implements the VMCI Simple Datagram API on the host. */
+
+#include <sys/types.h>
+#include <sys/systm.h>
+
+#include "vmci_datagram.h"
+#include "vmci_driver.h"
+#include "vmci_kernel_api.h"
+#include "vmci_kernel_defs.h"
+#include "vmci_resource.h"
+
+#define LGPFX "vmci_datagram: "
+
+/*
+ * datagram_entry describes the datagram entity. It is used for datagram
+ * entities created only on the host.
+ */
+struct datagram_entry {
+ struct vmci_resource resource;
+ uint32_t flags;
+ bool run_delayed;
+ vmci_datagram_recv_cb recv_cb;
+ void *client_data;
+ vmci_event destroy_event;
+ vmci_privilege_flags priv_flags;
+};
+
+struct vmci_delayed_datagram_info {
+ struct datagram_entry *entry;
+ struct vmci_datagram msg;
+};
+
+static int vmci_datagram_get_priv_flags_int(vmci_id contextID,
+ struct vmci_handle handle,
+ vmci_privilege_flags *priv_flags);
+static void datagram_free_cb(void *resource);
+static int datagram_release_cb(void *client_data);
+
+/*------------------------------ Helper functions ----------------------------*/
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * datagram_free_cb --
+ *
+ * Callback to free datagram structure when resource is no longer used,
+ * ie. the reference count reached 0.
+ *
+ * Result:
+ * None.
+ *
+ * Side effects:
+ * None.
+ *
+ *------------------------------------------------------------------------------
+ */
+
+static void
+datagram_free_cb(void *client_data)
+{
+ struct datagram_entry *entry = (struct datagram_entry *)client_data;
+
+ ASSERT(entry);
+
+ vmci_signal_event(&entry->destroy_event);
+
+ /*
+ * The entry is freed in vmci_datagram_destroy_hnd, who is waiting for
+ * the above signal.
+ */
+}
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * datagram_release_cb --
+ *
+ * Callback to release the resource reference. It is called by the
+ * vmci_wait_on_event function before it blocks.
+ *
+ * Result:
+ * None.
+ *
+ * Side effects:
+ * None.
+ *
+ *------------------------------------------------------------------------------
+ */
+
+static int
+datagram_release_cb(void *client_data)
+{
+ struct datagram_entry *entry;
+
+ entry = (struct datagram_entry *)client_data;
+
+ ASSERT(entry);
+
+ vmci_resource_release(&entry->resource);
+
+ return (0);
+}
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * datagram_create_hnd --
+ *
+ * Internal function to create a datagram entry given a handle.
+ *
+ * Results:
+ * VMCI_SUCCESS if created, negative errno value otherwise.
+ *
+ * Side effects:
+ * None.
+ *
+ *------------------------------------------------------------------------------
+ */
+
+static int
+datagram_create_hnd(vmci_id resource_id, uint32_t flags,
+ vmci_privilege_flags priv_flags, vmci_datagram_recv_cb recv_cb,
+ void *client_data, struct vmci_handle *out_handle)
+{
+ struct datagram_entry *entry;
+ struct vmci_handle handle;
+ vmci_id context_id;
+ int result;
+
+ ASSERT(recv_cb != NULL);
+ ASSERT(out_handle != NULL);
+ ASSERT(!(priv_flags & ~VMCI_PRIVILEGE_ALL_FLAGS));
+
+ if ((flags & VMCI_FLAG_WELLKNOWN_DG_HND) != 0)
+ return (VMCI_ERROR_INVALID_ARGS);
+ else {
+ if ((flags & VMCI_FLAG_ANYCID_DG_HND) != 0)
+ context_id = VMCI_INVALID_ID;
+ else {
+ context_id = vmci_get_context_id();
+ if (context_id == VMCI_INVALID_ID)
+ return (VMCI_ERROR_NO_RESOURCES);
+ }
+
+ if (resource_id == VMCI_INVALID_ID) {
+ resource_id = vmci_resource_get_id(context_id);
+ if (resource_id == VMCI_INVALID_ID)
+ return (VMCI_ERROR_NO_HANDLE);
+ }
+
+ handle = VMCI_MAKE_HANDLE(context_id, resource_id);
+ }
+
+ entry = vmci_alloc_kernel_mem(sizeof(*entry), VMCI_MEMORY_NORMAL);
+ if (entry == NULL) {
+ VMCI_LOG_WARNING(LGPFX"Failed allocating memory for datagram "
+ "entry.\n");
+ return (VMCI_ERROR_NO_MEM);
+ }
+
+ if (!vmci_can_schedule_delayed_work()) {
+ if (flags & VMCI_FLAG_DG_DELAYED_CB) {
+ vmci_free_kernel_mem(entry, sizeof(*entry));
+ return (VMCI_ERROR_INVALID_ARGS);
+ }
+ entry->run_delayed = false;
+ } else
+ entry->run_delayed = (flags & VMCI_FLAG_DG_DELAYED_CB) ?
+ true : false;
+
+ entry->flags = flags;
+ entry->recv_cb = recv_cb;
+ entry->client_data = client_data;
+ vmci_create_event(&entry->destroy_event);
+ entry->priv_flags = priv_flags;
+
+ /* Make datagram resource live. */
+ result = vmci_resource_add(&entry->resource,
+ VMCI_RESOURCE_TYPE_DATAGRAM, handle, datagram_free_cb, entry);
+ if (result != VMCI_SUCCESS) {
+ VMCI_LOG_WARNING(LGPFX"Failed to add new resource "
+ "(handle=0x%x:0x%x).\n", handle.context, handle.resource);
+ vmci_destroy_event(&entry->destroy_event);
+ vmci_free_kernel_mem(entry, sizeof(*entry));
+ return (result);
+ }
+ *out_handle = handle;
+
+ return (VMCI_SUCCESS);
+}
+
+/*------------------------------ Public API functions ------------------------*/
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * vmci_datagram_create_handle --
+ *
+ * Creates a host context datagram endpoint and returns a handle to it.
+ *
+ * Results:
+ * VMCI_SUCCESS if created, negative errno value otherwise.
+ *
+ * Side effects:
+ * None.
+ *
+ *------------------------------------------------------------------------------
+ */
+
+int
+vmci_datagram_create_handle(vmci_id resource_id, uint32_t flags,
+ vmci_datagram_recv_cb recv_cb, void *client_data,
+ struct vmci_handle *out_handle)
+{
+
+ if (out_handle == NULL)
+ return (VMCI_ERROR_INVALID_ARGS);
+
+ if (recv_cb == NULL) {
+ VMCI_LOG_DEBUG(LGPFX"Client callback needed when creating "
+ "datagram.\n");
+ return (VMCI_ERROR_INVALID_ARGS);
+ }
+
+ return (datagram_create_hnd(resource_id, flags,
+ VMCI_DEFAULT_PROC_PRIVILEGE_FLAGS,
+ recv_cb, client_data, out_handle));
+}
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * vmci_datagram_create_handle_priv --
+ *
+ * Creates a host context datagram endpoint and returns a handle to it.
+ *
+ * Results:
+ * VMCI_SUCCESS if created, negative errno value otherwise.
+ *
+ * Side effects:
+ * None.
+ *
+ *------------------------------------------------------------------------------
+ */
+
+int
+vmci_datagram_create_handle_priv(vmci_id resource_id, uint32_t flags,
+ vmci_privilege_flags priv_flags, vmci_datagram_recv_cb recv_cb,
+ void *client_data, struct vmci_handle *out_handle)
+{
+
+ if (out_handle == NULL)
+ return (VMCI_ERROR_INVALID_ARGS);
+
+ if (recv_cb == NULL) {
+ VMCI_LOG_DEBUG(LGPFX"Client callback needed when creating "
+ "datagram.\n");
+ return (VMCI_ERROR_INVALID_ARGS);
+ }
+
+ if (priv_flags & ~VMCI_PRIVILEGE_ALL_FLAGS)
+ return (VMCI_ERROR_INVALID_ARGS);
+
+ return (datagram_create_hnd(resource_id, flags, priv_flags, recv_cb,
+ client_data, out_handle));
+}
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * vmci_datagram_destroy_handle --
+ *
+ * Destroys a handle.
+ *
+ * Results:
+ * None.
+ *
+ * Side effects:
+ * None.
+ *
+ *------------------------------------------------------------------------------
+ */
+
+int
+vmci_datagram_destroy_handle(struct vmci_handle handle)
+{
+ struct datagram_entry *entry;
+ struct vmci_resource *resource;
+
+ resource = vmci_resource_get(handle,
+ VMCI_RESOURCE_TYPE_DATAGRAM);
+ if (resource == NULL) {
+ VMCI_LOG_DEBUG(LGPFX"Failed to destroy datagram "
+ "(handle=0x%x:0x%x).\n", handle.context, handle.resource);
+ return (VMCI_ERROR_NOT_FOUND);
+ }
+ entry = RESOURCE_CONTAINER(resource, struct datagram_entry, resource);
+
+ vmci_resource_remove(handle, VMCI_RESOURCE_TYPE_DATAGRAM);
+
+ /*
+ * We now wait on the destroyEvent and release the reference we got
+ * above.
+ */
+ vmci_wait_on_event(&entry->destroy_event, datagram_release_cb, entry);
+
+ /*
+ * We know that we are now the only reference to the above entry so
+ * can safely free it.
+ */
+ vmci_destroy_event(&entry->destroy_event);
+ vmci_free_kernel_mem(entry, sizeof(*entry));
+
+ return (VMCI_SUCCESS);
+}
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * vmci_datagram_get_priv_flags_int --
+ *
+ * Internal utilility function with the same purpose as
+ * vmci_datagram_get_priv_flags that also takes a context_id.
+ *
+ * Result:
+ * VMCI_SUCCESS on success, VMCI_ERROR_INVALID_ARGS if handle is invalid.
+ *
+ * Side effects:
+ * None.
+ *
+ *------------------------------------------------------------------------------
+ */
+
+static int
+vmci_datagram_get_priv_flags_int(vmci_id context_id, struct vmci_handle handle,
+ vmci_privilege_flags *priv_flags)
+{
+
+ ASSERT(priv_flags);
+ ASSERT(context_id != VMCI_INVALID_ID);
+
+ if (context_id == VMCI_HOST_CONTEXT_ID) {
+ struct datagram_entry *src_entry;
+ struct vmci_resource *resource;
+
+ resource = vmci_resource_get(handle,
+ VMCI_RESOURCE_TYPE_DATAGRAM);
+ if (resource == NULL)
+ return (VMCI_ERROR_INVALID_ARGS);
+ src_entry = RESOURCE_CONTAINER(resource, struct datagram_entry,
+ resource);
+ *priv_flags = src_entry->priv_flags;
+ vmci_resource_release(resource);
+ } else if (context_id == VMCI_HYPERVISOR_CONTEXT_ID)
+ *priv_flags = VMCI_MAX_PRIVILEGE_FLAGS;
+ else
+ *priv_flags = VMCI_NO_PRIVILEGE_FLAGS;
+
+ return (VMCI_SUCCESS);
+}
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * vmci_datagram_fet_priv_flags --
+ *
+ * Utility function that retrieves the privilege flags associated with a
+ * given datagram handle. For hypervisor and guest endpoints, the
+ * privileges are determined by the context ID, but for host endpoints
+ * privileges are associated with the complete handle.
+ *
+ * Result:
+ * VMCI_SUCCESS on success, VMCI_ERROR_INVALID_ARGS if handle is invalid.
+ *
+ * Side effects:
+ * None.
+ *
+ *------------------------------------------------------------------------------
+ */
+
+int
+vmci_datagram_get_priv_flags(struct vmci_handle handle,
+ vmci_privilege_flags *priv_flags)
+{
+
+ if (priv_flags == NULL || handle.context == VMCI_INVALID_ID)
+ return (VMCI_ERROR_INVALID_ARGS);
+
+ return (vmci_datagram_get_priv_flags_int(handle.context, handle,
+ priv_flags));
+}
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * vmci_datagram_delayed_dispatch_cb --
+ *
+ * Calls the specified callback in a delayed context.
+ *
+ * Results:
+ * None.
+ *
+ * Side effects:
+ * None.
+ *
+ *------------------------------------------------------------------------------
+ */
+
+static void
+vmci_datagram_delayed_dispatch_cb(void *data)
+{
+ struct vmci_delayed_datagram_info *dg_info;
+
+ dg_info = (struct vmci_delayed_datagram_info *)data;
+
+ ASSERT(data);
+
+ dg_info->entry->recv_cb(dg_info->entry->client_data, &dg_info->msg);
+
+ vmci_resource_release(&dg_info->entry->resource);
+
+ vmci_free_kernel_mem(dg_info, sizeof(*dg_info) +
+ (size_t)dg_info->msg.payload_size);
+}
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * vmci_datagram_dispatch_as_guest --
+ *
+ * Dispatch datagram as a guest, down through the VMX and potentially to
+ * the host.
+ *
+ * Result:
+ * Number of bytes sent on success, appropriate error code otherwise.
+ *
+ * Side effects:
+ * None.
+ *
+ *------------------------------------------------------------------------------
+ */
+
+static int
+vmci_datagram_dispatch_as_guest(struct vmci_datagram *dg)
+{
+ struct vmci_resource *resource;
+ int retval;
+
+ resource = vmci_resource_get(dg->src, VMCI_RESOURCE_TYPE_DATAGRAM);
+ if (NULL == resource)
+ return VMCI_ERROR_NO_HANDLE;
+
+ retval = vmci_send_datagram(dg);
+ vmci_resource_release(resource);
+
+ return (retval);
+}
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * vmci_datagram_dispatch --
+ *
+ * Dispatch datagram. This will determine the routing for the datagram and
+ * dispatch it accordingly.
+ *
+ * Result:
+ * Number of bytes sent on success, appropriate error code otherwise.
+ *
+ * Side effects:
+ * None.
+ *
+ *------------------------------------------------------------------------------
+ */
+
+int
+vmci_datagram_dispatch(vmci_id context_id, struct vmci_datagram *dg)
+{
+
+ ASSERT(dg);
+ ASSERT_ON_COMPILE(sizeof(struct vmci_datagram) == 24);
+
+ if (VMCI_DG_SIZE(dg) > VMCI_MAX_DG_SIZE) {
+ VMCI_LOG_DEBUG(LGPFX"Payload (size=%lu bytes) too big to send."
+ "\n", dg->payload_size);
+ return (VMCI_ERROR_INVALID_ARGS);
+ }
+
+ return (vmci_datagram_dispatch_as_guest(dg));
+}
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * vmci_datagram_invoke_guest_handler --
+ *
+ * Invoke the handler for the given datagram. This is intended to be called
+ * only when acting as a guest and receiving a datagram from the virtual
+ * device.
+ *
+ * Result:
+ * VMCI_SUCCESS on success, other error values on failure.
+ *
+ * Side effects:
+ * None.
+ *
+ *------------------------------------------------------------------------------
+ */
+
+int
+vmci_datagram_invoke_guest_handler(struct vmci_datagram *dg)
+{
+ struct datagram_entry *dst_entry;
+ struct vmci_resource *resource;
+ int retval;
+
+ ASSERT(dg);
+
+ if (dg->payload_size > VMCI_MAX_DG_PAYLOAD_SIZE) {
+ VMCI_LOG_DEBUG(LGPFX"Payload (size=%lu bytes) too large to "
+ "deliver.\n", dg->payload_size);
+ return (VMCI_ERROR_PAYLOAD_TOO_LARGE);
+ }
+
+ resource = vmci_resource_get(dg->dst, VMCI_RESOURCE_TYPE_DATAGRAM);
+ if (NULL == resource) {
+ VMCI_LOG_DEBUG(LGPFX"destination (handle=0x%x:0x%x) doesn't "
+ "exist.\n", dg->dst.context, dg->dst.resource);
+ return (VMCI_ERROR_NO_HANDLE);
+ }
+
+ dst_entry = RESOURCE_CONTAINER(resource, struct datagram_entry,
+ resource);
+ if (dst_entry->run_delayed) {
+ struct vmci_delayed_datagram_info *dg_info;
+
+ dg_info = vmci_alloc_kernel_mem(sizeof(*dg_info) +
+ (size_t)dg->payload_size, VMCI_MEMORY_ATOMIC);
+ if (NULL == dg_info) {
+ vmci_resource_release(resource);
+ retval = VMCI_ERROR_NO_MEM;
+ goto exit;
+ }
+
+ dg_info->entry = dst_entry;
+ memcpy(&dg_info->msg, dg, VMCI_DG_SIZE(dg));
+
+ retval = vmci_schedule_delayed_work(
+ vmci_datagram_delayed_dispatch_cb, dg_info);
+ if (retval < VMCI_SUCCESS) {
+ VMCI_LOG_WARNING(LGPFX"Failed to schedule delayed "
+ "work for datagram (result=%d).\n", retval);
+ vmci_free_kernel_mem(dg_info, sizeof(*dg_info) +
+ (size_t)dg->payload_size);
+ vmci_resource_release(resource);
+ dg_info = NULL;
+ goto exit;
+ }
+ } else {
+ dst_entry->recv_cb(dst_entry->client_data, dg);
+ vmci_resource_release(resource);
+ retval = VMCI_SUCCESS;
+ }
+
+exit:
+ return (retval);
+}
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * vmci_datagram_send --
+ *
+ * Sends the payload to the destination datagram handle.
+ *
+ * Results:
+ * Returns number of bytes sent if success, or error code if failure.
+ *
+ * Side effects:
+ * None.
+ *
+ *------------------------------------------------------------------------------
+ */
+
+int
+vmci_datagram_send(struct vmci_datagram *msg)
+{
+
+ if (msg == NULL)
+ return (VMCI_ERROR_INVALID_ARGS);
+
+ return (vmci_datagram_dispatch(VMCI_INVALID_ID, msg));
+}
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * vmci_datagram_sync --
+ *
+ * Use this as a synchronization point when setting globals, for example,
+ * during device shutdown.
+ *
+ * Results:
+ * None.
+ *
+ * Side effects:
+ * None.
+ *
+ *------------------------------------------------------------------------------
+ */
+
+void
+vmci_datagram_sync(void)
+{
+
+ vmci_resource_sync();
+}
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * vmci_datagram_check_host_capabilities --
+ *
+ * Verify that the host supports the resources we need. None are required
+ * for datagrams since they are implicitly supported.
+ *
+ * Results:
+ * true.
+ *
+ * Side effects:
+ * None.
+ *
+ *------------------------------------------------------------------------------
+ */
+
+bool
+vmci_datagram_check_host_capabilities(void)
+{
+
+ return (true);
+}
Index: sys/dev/vmware/vmci/vmci_defs.h
===================================================================
--- /dev/null
+++ sys/dev/vmware/vmci/vmci_defs.h
@@ -0,0 +1,715 @@
+/*-
+ * Copyright (c) 2018 VMware, Inc. All Rights Reserved.
+ *
+ * SPDX-License-Identifier: (BSD-2-Clause AND GPL-2.0)
+ */
+
+#ifndef _VMCI_DEFS_H_
+#define _VMCI_DEFS_H_
+
+#include <sys/types.h>
+#include <machine/atomic.h>
+
+#include "vmci_kernel_defs.h"
+
+#pragma GCC diagnostic ignored "-Wcast-qual"
+
+/* Register offsets. */
+#define VMCI_STATUS_ADDR 0x00
+#define VMCI_CONTROL_ADDR 0x04
+#define VMCI_ICR_ADDR 0x08
+#define VMCI_IMR_ADDR 0x0c
+#define VMCI_DATA_OUT_ADDR 0x10
+#define VMCI_DATA_IN_ADDR 0x14
+#define VMCI_CAPS_ADDR 0x18
+#define VMCI_RESULT_LOW_ADDR 0x1c
+#define VMCI_RESULT_HIGH_ADDR 0x20
+
+/* Status register bits. */
+#define VMCI_STATUS_INT_ON 0x1
+
+/* Control register bits. */
+#define VMCI_CONTROL_RESET 0x1
+#define VMCI_CONTROL_INT_ENABLE 0x2
+#define VMCI_CONTROL_INT_DISABLE 0x4
+
+/* Capabilities register bits. */
+#define VMCI_CAPS_HYPERCALL 0x1
+#define VMCI_CAPS_GUESTCALL 0x2
+#define VMCI_CAPS_DATAGRAM 0x4
+#define VMCI_CAPS_NOTIFICATIONS 0x8
+
+/* Interrupt Cause register bits. */
+#define VMCI_ICR_DATAGRAM 0x1
+#define VMCI_ICR_NOTIFICATION 0x2
+
+/* Interrupt Mask register bits. */
+#define VMCI_IMR_DATAGRAM 0x1
+#define VMCI_IMR_NOTIFICATION 0x2
+
+/* Interrupt type. */
+typedef enum vmci_intr_type {
+ VMCI_INTR_TYPE_INTX = 0,
+ VMCI_INTR_TYPE_MSI = 1,
+ VMCI_INTR_TYPE_MSIX = 2
+} vmci_intr_type;
+
+/*
+ * Maximum MSI/MSI-X interrupt vectors in the device.
+ */
+#define VMCI_MAX_INTRS 2
+
+/*
+ * Supported interrupt vectors. There is one for each ICR value above,
+ * but here they indicate the position in the vector array/message ID.
+ */
+#define VMCI_INTR_DATAGRAM 0
+#define VMCI_INTR_NOTIFICATION 1
+
+/*
+ * A single VMCI device has an upper limit of 128 MiB on the amount of
+ * memory that can be used for queue pairs.
+ */
+#define VMCI_MAX_GUEST_QP_MEMORY (128 * 1024 * 1024)
+
+/*
+ * We have a fixed set of resource IDs available in the VMX.
+ * This allows us to have a very simple implementation since we statically
+ * know how many will create datagram handles. If a new caller arrives and
+ * we have run out of slots we can manually increment the maximum size of
+ * available resource IDs.
+ */
+
+typedef uint32_t vmci_resource;
+
+/* VMCI reserved hypervisor datagram resource IDs. */
+#define VMCI_RESOURCES_QUERY 0
+#define VMCI_GET_CONTEXT_ID 1
+#define VMCI_SET_NOTIFY_BITMAP 2
+#define VMCI_DOORBELL_LINK 3
+#define VMCI_DOORBELL_UNLINK 4
+#define VMCI_DOORBELL_NOTIFY 5
+/*
+ * VMCI_DATAGRAM_REQUEST_MAP and VMCI_DATAGRAM_REMOVE_MAP are
+ * obsoleted by the removal of VM to VM communication.
+ */
+#define VMCI_DATAGRAM_REQUEST_MAP 6
+#define VMCI_DATAGRAM_REMOVE_MAP 7
+#define VMCI_EVENT_SUBSCRIBE 8
+#define VMCI_EVENT_UNSUBSCRIBE 9
+#define VMCI_QUEUEPAIR_ALLOC 10
+#define VMCI_QUEUEPAIR_DETACH 11
+/*
+ * VMCI_VSOCK_VMX_LOOKUP was assigned to 12 for Fusion 3.0/3.1,
+ * WS 7.0/7.1 and ESX 4.1
+ */
+#define VMCI_HGFS_TRANSPORT 13
+#define VMCI_UNITY_PBRPC_REGISTER 14
+/*
+ * This resource is used for VMCI socket control packets sent to the
+ * hypervisor (CID 0) because RID 1 is already reserved.
+ */
+#define VSOCK_PACKET_HYPERVISOR_RID 15
+#define VMCI_RESOURCE_MAX 16
+/*
+ * The core VMCI device functionality only requires the resource IDs of
+ * VMCI_QUEUEPAIR_DETACH and below.
+ */
+#define VMCI_CORE_DEVICE_RESOURCE_MAX VMCI_QUEUEPAIR_DETACH
+
+/*
+ * VMCI reserved host datagram resource IDs.
+ * vsock control channel has resource id 1.
+ */
+#define VMCI_DVFILTER_DATA_PATH_DATAGRAM 2
+
+/* VMCI Ids. */
+typedef uint32_t vmci_id;
+
+struct vmci_id_range {
+ int8_t action; /* VMCI_FA_X, for use in filters. */
+ vmci_id begin; /* Beginning of range. */
+ vmci_id end; /* End of range. */
+};
+
+struct vmci_handle {
+ vmci_id context;
+ vmci_id resource;
+};
+
+static inline struct vmci_handle
+VMCI_MAKE_HANDLE(vmci_id cid, vmci_id rid)
+{
+ struct vmci_handle h;
+
+ h.context = cid;
+ h.resource = rid;
+ return (h);
+}
+
+#define VMCI_HANDLE_TO_CONTEXT_ID(_handle) \
+ ((_handle).context)
+#define VMCI_HANDLE_TO_RESOURCE_ID(_handle) \
+ ((_handle).resource)
+#define VMCI_HANDLE_EQUAL(_h1, _h2) \
+ ((_h1).context == (_h2).context && (_h1).resource == (_h2).resource)
+
+#define VMCI_INVALID_ID 0xFFFFFFFF
+static const struct vmci_handle VMCI_INVALID_HANDLE = {VMCI_INVALID_ID,
+ VMCI_INVALID_ID};
+
+#define VMCI_HANDLE_INVALID(_handle) \
+ VMCI_HANDLE_EQUAL((_handle), VMCI_INVALID_HANDLE)
+
+/*
+ * The below defines can be used to send anonymous requests.
+ * This also indicates that no response is expected.
+ */
+#define VMCI_ANON_SRC_CONTEXT_ID \
+ VMCI_INVALID_ID
+#define VMCI_ANON_SRC_RESOURCE_ID \
+ VMCI_INVALID_ID
+#define VMCI_ANON_SRC_HANDLE \
+ VMCI_MAKE_HANDLE(VMCI_ANON_SRC_CONTEXT_ID, \
+ VMCI_ANON_SRC_RESOURCE_ID)
+
+/* The lowest 16 context ids are reserved for internal use. */
+#define VMCI_RESERVED_CID_LIMIT 16
+
+/*
+ * Hypervisor context id, used for calling into hypervisor
+ * supplied services from the VM.
+ */
+#define VMCI_HYPERVISOR_CONTEXT_ID 0
+
+/*
+ * Well-known context id, a logical context that contains a set of
+ * well-known services. This context ID is now obsolete.
+ */
+#define VMCI_WELL_KNOWN_CONTEXT_ID 1
+
+/*
+ * Context ID used by host endpoints.
+ */
+#define VMCI_HOST_CONTEXT_ID 2
+#define VMCI_HOST_CONTEXT_INVALID_EVENT ((uintptr_t)~0)
+
+#define VMCI_CONTEXT_IS_VM(_cid) \
+ (VMCI_INVALID_ID != _cid && _cid > VMCI_HOST_CONTEXT_ID)
+
+/*
+ * The VMCI_CONTEXT_RESOURCE_ID is used together with VMCI_MAKE_HANDLE to make
+ * handles that refer to a specific context.
+ */
+#define VMCI_CONTEXT_RESOURCE_ID 0
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * VMCI error codes.
+ *
+ *------------------------------------------------------------------------------
+ */
+
+#define VMCI_SUCCESS_QUEUEPAIR_ATTACH 5
+#define VMCI_SUCCESS_QUEUEPAIR_CREATE 4
+#define VMCI_SUCCESS_LAST_DETACH 3
+#define VMCI_SUCCESS_ACCESS_GRANTED 2
+#define VMCI_SUCCESS_ENTRY_DEAD 1
+#define VMCI_SUCCESS 0LL
+#define VMCI_ERROR_INVALID_RESOURCE (-1)
+#define VMCI_ERROR_INVALID_ARGS (-2)
+#define VMCI_ERROR_NO_MEM (-3)
+#define VMCI_ERROR_DATAGRAM_FAILED (-4)
+#define VMCI_ERROR_MORE_DATA (-5)
+#define VMCI_ERROR_NO_MORE_DATAGRAMS (-6)
+#define VMCI_ERROR_NO_ACCESS (-7)
+#define VMCI_ERROR_NO_HANDLE (-8)
+#define VMCI_ERROR_DUPLICATE_ENTRY (-9)
+#define VMCI_ERROR_DST_UNREACHABLE (-10)
+#define VMCI_ERROR_PAYLOAD_TOO_LARGE (-11)
+#define VMCI_ERROR_INVALID_PRIV (-12)
+#define VMCI_ERROR_GENERIC (-13)
+#define VMCI_ERROR_PAGE_ALREADY_SHARED (-14)
+#define VMCI_ERROR_CANNOT_SHARE_PAGE (-15)
+#define VMCI_ERROR_CANNOT_UNSHARE_PAGE (-16)
+#define VMCI_ERROR_NO_PROCESS (-17)
+#define VMCI_ERROR_NO_DATAGRAM (-18)
+#define VMCI_ERROR_NO_RESOURCES (-19)
+#define VMCI_ERROR_UNAVAILABLE (-20)
+#define VMCI_ERROR_NOT_FOUND (-21)
+#define VMCI_ERROR_ALREADY_EXISTS (-22)
+#define VMCI_ERROR_NOT_PAGE_ALIGNED (-23)
+#define VMCI_ERROR_INVALID_SIZE (-24)
+#define VMCI_ERROR_REGION_ALREADY_SHARED (-25)
+#define VMCI_ERROR_TIMEOUT (-26)
+#define VMCI_ERROR_DATAGRAM_INCOMPLETE (-27)
+#define VMCI_ERROR_INCORRECT_IRQL (-28)
+#define VMCI_ERROR_EVENT_UNKNOWN (-29)
+#define VMCI_ERROR_OBSOLETE (-30)
+#define VMCI_ERROR_QUEUEPAIR_MISMATCH (-31)
+#define VMCI_ERROR_QUEUEPAIR_NOTSET (-32)
+#define VMCI_ERROR_QUEUEPAIR_NOTOWNER (-33)
+#define VMCI_ERROR_QUEUEPAIR_NOTATTACHED (-34)
+#define VMCI_ERROR_QUEUEPAIR_NOSPACE (-35)
+#define VMCI_ERROR_QUEUEPAIR_NODATA (-36)
+#define VMCI_ERROR_BUSMEM_INVALIDATION (-37)
+#define VMCI_ERROR_MODULE_NOT_LOADED (-38)
+#define VMCI_ERROR_DEVICE_NOT_FOUND (-39)
+#define VMCI_ERROR_QUEUEPAIR_NOT_READY (-40)
+#define VMCI_ERROR_WOULD_BLOCK (-41)
+
+/* VMCI clients should return error code withing this range */
+#define VMCI_ERROR_CLIENT_MIN (-500)
+#define VMCI_ERROR_CLIENT_MAX (-550)
+
+/* Internal error codes. */
+#define VMCI_SHAREDMEM_ERROR_BAD_CONTEXT (-1000)
+
+#define VMCI_PATH_MAX 256
+
+/* VMCI reserved events. */
+typedef uint32_t vmci_event_type;
+
+#define VMCI_EVENT_CTX_ID_UPDATE 0 // Only applicable to guest
+ // endpoints
+#define VMCI_EVENT_CTX_REMOVED 1 // Applicable to guest and host
+#define VMCI_EVENT_QP_RESUMED 2 // Only applicable to guest
+ // endpoints
+#define VMCI_EVENT_QP_PEER_ATTACH 3 // Applicable to guest, host
+ // and VMX
+#define VMCI_EVENT_QP_PEER_DETACH 4 // Applicable to guest, host
+ // and VMX
+#define VMCI_EVENT_MEM_ACCESS_ON 5 // Applicable to VMX and vmk. On
+ // vmk, this event has the
+ // Context payload type
+#define VMCI_EVENT_MEM_ACCESS_OFF 6 // Applicable to VMX and vmk.
+ // Same as above for the payload
+ // type
+#define VMCI_EVENT_GUEST_PAUSED 7 // Applicable to vmk. This
+ // event has the Context
+ // payload type
+#define VMCI_EVENT_GUEST_UNPAUSED 8 // Applicable to vmk. Same as
+ // above for the payload type.
+#define VMCI_EVENT_MAX 9
+
+/*
+ * Of the above events, a few are reserved for use in the VMX, and other
+ * endpoints (guest and host kernel) should not use them. For the rest of the
+ * events, we allow both host and guest endpoints to subscribe to them, to
+ * maintain the same API for host and guest endpoints.
+ */
+
+#define VMCI_EVENT_VALID_VMX(_event) \
+ (_event == VMCI_EVENT_QP_PEER_ATTACH || \
+ _event == VMCI_EVENT_QP_PEER_DETACH || \
+ _event == VMCI_EVENT_MEM_ACCESS_ON || \
+ _event == VMCI_EVENT_MEM_ACCESS_OFF)
+
+#define VMCI_EVENT_VALID(_event) \
+ (_event < VMCI_EVENT_MAX && \
+ _event != VMCI_EVENT_MEM_ACCESS_ON && \
+ _event != VMCI_EVENT_MEM_ACCESS_OFF && \
+ _event != VMCI_EVENT_GUEST_PAUSED && \
+ _event != VMCI_EVENT_GUEST_UNPAUSED)
+
+/* Reserved guest datagram resource ids. */
+#define VMCI_EVENT_HANDLER 0
+
+/*
+ * VMCI coarse-grained privileges (per context or host process/endpoint. An
+ * entity with the restricted flag is only allowed to interact with the
+ * hypervisor and trusted entities.
+ */
+typedef uint32_t vmci_privilege_flags;
+
+#define VMCI_PRIVILEGE_FLAG_RESTRICTED 0x01
+#define VMCI_PRIVILEGE_FLAG_TRUSTED 0x02
+#define VMCI_PRIVILEGE_ALL_FLAGS \
+ (VMCI_PRIVILEGE_FLAG_RESTRICTED | VMCI_PRIVILEGE_FLAG_TRUSTED)
+#define VMCI_NO_PRIVILEGE_FLAGS 0x00
+#define VMCI_DEFAULT_PROC_PRIVILEGE_FLAGS VMCI_NO_PRIVILEGE_FLAGS
+#define VMCI_LEAST_PRIVILEGE_FLAGS VMCI_PRIVILEGE_FLAG_RESTRICTED
+#define VMCI_MAX_PRIVILEGE_FLAGS VMCI_PRIVILEGE_FLAG_TRUSTED
+
+/* 0 through VMCI_RESERVED_RESOURCE_ID_MAX are reserved. */
+#define VMCI_RESERVED_RESOURCE_ID_MAX 1023
+
+#define VMCI_DOMAIN_NAME_MAXLEN 32
+
+#define VMCI_LGPFX "vmci: "
+
+/*
+ * struct vmci_queue_header
+ *
+ * A Queue cannot stand by itself as designed. Each Queue's header contains a
+ * pointer into itself (the producer_tail) and into its peer (consumer_head).
+ * The reason for the separation is one of accessibility: Each end-point can
+ * modify two things: where the next location to enqueue is within its produce_q
+ * (producer_tail); and where the next dequeue location is in its consume_q
+ * (consumer_head).
+ *
+ * An end-point cannot modify the pointers of its peer (guest to guest; NOTE
+ * that in the host both queue headers are mapped r/w). But, each end-point
+ * needs read access to both Queue header structures in order to determine how
+ * much space is used (or left) in the Queue. This is because for an end-point
+ * to know how full its produce_q is, it needs to use the consumer_head that
+ * points into the produce_q but -that- consumer_head is in the Queue header
+ * for that end-points consume_q.
+ *
+ * Thoroughly confused? Sorry.
+ *
+ * producer_tail: the point to enqueue new entrants. When you approach a line
+ * in a store, for example, you walk up to the tail.
+ *
+ * consumer_head: the point in the queue from which the next element is
+ * dequeued. In other words, who is next in line is he who is at the head of
+ * the line.
+ *
+ * Also, producer_tail points to an empty byte in the Queue, whereas
+ * consumer_head points to a valid byte of data (unless producer_tail ==
+ * consumer_head in which case consumerHead does not point to a valid byte of
+ * data).
+ *
+ * For a queue of buffer 'size' bytes, the tail and head pointers will be in
+ * the range [0, size-1].
+ *
+ * If produce_q_header->producer_tail == consume_q_header->consumer_head then
+ * the produce_q is empty.
+ */
+struct vmci_queue_header {
+ /* All fields are 64bit and aligned. */
+ struct vmci_handle handle; /* Identifier. */
+ volatile uint64_t producer_tail; /* Offset in this queue. */
+ volatile uint64_t consumer_head; /* Offset in peer queue. */
+};
+
+
+/*
+ * If one client of a QueuePair is a 32bit entity, we restrict the QueuePair
+ * size to be less than 4GB, and use 32bit atomic operations on the head and
+ * tail pointers. 64bit atomic read on a 32bit entity involves cmpxchg8b which
+ * is an atomic read-modify-write. This will cause traces to fire when a 32bit
+ * consumer tries to read the producer's tail pointer, for example, because the
+ * consumer has read-only access to the producer's tail pointer.
+ *
+ * We provide the following macros to invoke 32bit or 64bit atomic operations
+ * based on the architecture the code is being compiled on.
+ */
+
+#ifdef __x86_64__
+#define QP_MAX_QUEUE_SIZE_ARCH CONST64U(0xffffffffffffffff)
+#define qp_atomic_read_offset(x) atomic_load_64(x)
+#define qp_atomic_write_offset(x, y) atomic_store_64(x, y)
+#else /* __x86_64__ */
+ /*
+ * Wrappers below are being used because atomic_store_<type> operates
+ * on a specific <type>. Likewise for atomic_load_<type>
+ */
+
+ static inline uint32_t
+ type_safe_atomic_read_32(void *var)
+ {
+ return (atomic_load_32((volatile uint32_t *)(var)));
+ }
+
+ static inline void
+ type_safe_atomic_write_32(void *var, uint32_t val)
+ {
+ atomic_store_32((volatile uint32_t *)(var), (uint32_t)(val));
+ }
+
+#define QP_MAX_QUEUE_SIZE_ARCH CONST64U(0xffffffff)
+#define qp_atomic_read_offset(x) type_safe_atomic_read_32((void *)(x))
+#define qp_atomic_write_offset(x, y) \
+ type_safe_atomic_write_32((void *)(x), (uint32_t)(y))
+#endif /* __x86_64__ */
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * qp_add_pointer --
+ *
+ * Helper to add a given offset to a head or tail pointer. Wraps the value
+ * of the pointer around the max size of the queue.
+ *
+ * Results:
+ * None.
+ *
+ * Side effects:
+ * None.
+ *
+ *------------------------------------------------------------------------------
+ */
+
+static inline void
+qp_add_pointer(volatile uint64_t *var, size_t add, uint64_t size)
+{
+ uint64_t new_val = qp_atomic_read_offset(var);
+
+ if (new_val >= size - add)
+ new_val -= size;
+
+ new_val += add;
+ qp_atomic_write_offset(var, new_val);
+}
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * vmci_queue_header_producer_tail --
+ *
+ * Helper routine to get the Producer Tail from the supplied queue.
+ *
+ * Results:
+ * The contents of the queue's producer tail.
+ *
+ * Side effects:
+ * None.
+ *
+ *------------------------------------------------------------------------------
+ */
+
+static inline uint64_t
+vmci_queue_header_producer_tail(const struct vmci_queue_header *q_header)
+{
+ struct vmci_queue_header *qh = (struct vmci_queue_header *)q_header;
+ return (qp_atomic_read_offset(&qh->producer_tail));
+}
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * vmci_queue_header_consumer_head --
+ *
+ * Helper routine to get the Consumer Head from the supplied queue.
+ *
+ * Results:
+ * The contents of the queue's consumer tail.
+ *
+ * Side effects:
+ * None.
+ *
+ *------------------------------------------------------------------------------
+ */
+
+static inline uint64_t
+vmci_queue_header_consumer_head(const struct vmci_queue_header *q_header)
+{
+ struct vmci_queue_header *qh = (struct vmci_queue_header *)q_header;
+ return (qp_atomic_read_offset(&qh->consumer_head));
+}
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * vmci_queue_header_add_producer_tail --
+ *
+ * Helper routine to increment the Producer Tail. Fundamentally,
+ * qp_add_pointer() is used to manipulate the tail itself.
+ *
+ * Results:
+ * None.
+ *
+ * Side effects:
+ * None.
+ *
+ *------------------------------------------------------------------------------
+ */
+
+static inline void
+vmci_queue_header_add_producer_tail(struct vmci_queue_header *q_header,
+ size_t add, uint64_t queue_size)
+{
+
+ qp_add_pointer(&q_header->producer_tail, add, queue_size);
+}
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * vmci_queue_header_add_consumer_head --
+ *
+ * Helper routine to increment the Consumer Head. Fundamentally,
+ * qp_add_pointer() is used to manipulate the head itself.
+ *
+ * Results:
+ * None.
+ *
+ * Side effects:
+ * None.
+ *
+ *------------------------------------------------------------------------------
+ */
+
+static inline void
+vmci_queue_header_add_consumer_head(struct vmci_queue_header *q_header,
+ size_t add, uint64_t queue_size)
+{
+
+ qp_add_pointer(&q_header->consumer_head, add, queue_size);
+}
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * vmci_queue_header_get_pointers --
+ *
+ * Helper routine for getting the head and the tail pointer for a queue.
+ * Both the VMCIQueues are needed to get both the pointers for one queue.
+ *
+ * Results:
+ * None.
+ *
+ * Side effects:
+ * None.
+ *
+ *------------------------------------------------------------------------------
+ */
+
+static inline void
+vmci_queue_header_get_pointers(const struct vmci_queue_header *produce_q_header,
+ const struct vmci_queue_header *consume_q_header, uint64_t *producer_tail,
+ uint64_t *consumer_head)
+{
+
+ if (producer_tail)
+ *producer_tail =
+ vmci_queue_header_producer_tail(produce_q_header);
+
+ if (consumer_head)
+ *consumer_head =
+ vmci_queue_header_consumer_head(consume_q_header);
+}
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * vmci_queue_header_reset_pointers --
+ *
+ * Reset the tail pointer (of "this" queue) and the head pointer (of "peer"
+ * queue).
+ *
+ * Results:
+ * None.
+ *
+ * Side effects:
+ * None.
+ *
+ *------------------------------------------------------------------------------
+ */
+
+static inline void
+vmci_queue_header_reset_pointers(struct vmci_queue_header *q_header)
+{
+
+ qp_atomic_write_offset(&q_header->producer_tail, CONST64U(0));
+ qp_atomic_write_offset(&q_header->consumer_head, CONST64U(0));
+}
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * vmci_queue_header_init --
+ *
+ * Initializes a queue's state (head & tail pointers).
+ *
+ * Results:
+ * None.
+ *
+ * Side effects:
+ * None.
+ *
+ *------------------------------------------------------------------------------
+ */
+
+static inline void
+vmci_queue_header_init(struct vmci_queue_header *q_header,
+ const struct vmci_handle handle)
+{
+
+ q_header->handle = handle;
+ vmci_queue_header_reset_pointers(q_header);
+}
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * vmci_queue_header_free_space --
+ *
+ * Finds available free space in a produce queue to enqueue more data or
+ * reports an error if queue pair corruption is detected.
+ *
+ * Results:
+ * Free space size in bytes or an error code.
+ *
+ * Side effects:
+ * None.
+ *
+ *------------------------------------------------------------------------------
+ */
+
+static inline int64_t
+vmci_queue_header_free_space(const struct vmci_queue_header *produce_q_header,
+ const struct vmci_queue_header *consume_q_header,
+ const uint64_t produce_q_size)
+{
+ uint64_t free_space;
+ uint64_t head;
+ uint64_t tail;
+
+ tail = vmci_queue_header_producer_tail(produce_q_header);
+ head = vmci_queue_header_consumer_head(consume_q_header);
+
+ if (tail >= produce_q_size || head >= produce_q_size)
+ return (VMCI_ERROR_INVALID_SIZE);
+
+ /*
+ * Deduct 1 to avoid tail becoming equal to head which causes ambiguity.
+ * If head and tail are equal it means that the queue is empty.
+ */
+
+ if (tail >= head)
+ free_space = produce_q_size - (tail - head) - 1;
+ else
+ free_space = head - tail - 1;
+
+ return (free_space);
+}
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * vmci_queue_header_buf_ready --
+ *
+ * vmci_queue_header_free_space() does all the heavy lifting of determing
+ * the number of free bytes in a Queue. This routine, then subtracts that
+ * size from the full size of the Queue so the caller knows how many bytes
+ * are ready to be dequeued.
+ *
+ * Results:
+ * On success, available data size in bytes (up to MAX_INT64).
+ * On failure, appropriate error code.
+ *
+ * Side effects:
+ * None.
+ *
+ *------------------------------------------------------------------------------
+ */
+
+static inline int64_t
+vmci_queue_header_buf_ready(const struct vmci_queue_header *consume_q_header,
+ const struct vmci_queue_header *produce_q_header,
+ const uint64_t consume_q_size)
+{
+ int64_t free_space;
+
+ free_space = vmci_queue_header_free_space(consume_q_header,
+ produce_q_header, consume_q_size);
+ if (free_space < VMCI_SUCCESS)
+ return (free_space);
+ else
+ return (consume_q_size - free_space - 1);
+}
+
+#endif /* !_VMCI_DEFS_H_ */
Index: sys/dev/vmware/vmci/vmci_doorbell.h
===================================================================
--- /dev/null
+++ sys/dev/vmware/vmci/vmci_doorbell.h
@@ -0,0 +1,27 @@
+/*-
+ * Copyright (c) 2018 VMware, Inc. All Rights Reserved.
+ *
+ * SPDX-License-Identifier: (BSD-2-Clause AND GPL-2.0)
+ */
+
+/* Internal functions in the VMCI Doorbell API. */
+
+#ifndef _VMCI_DOORBELL_H_
+#define _VMCI_DOORBELL_H_
+
+#include "vmci_defs.h"
+
+int vmci_doorbell_init(void);
+void vmci_doorbell_exit(void);
+void vmci_doorbell_hibernate(bool enter_hibernate);
+void vmci_doorbell_sync(void);
+
+int vmci_doorbell_host_context_notify(vmci_id src_CID,
+ struct vmci_handle handle);
+int vmci_doorbell_get_priv_flags(struct vmci_handle handle,
+ vmci_privilege_flags *priv_flags);
+
+bool vmci_register_notification_bitmap(PPN bitmap_PPN);
+void vmci_scan_notification_bitmap(uint8_t *bitmap);
+
+#endif /* !_VMCI_DOORBELL_H_ */
Index: sys/dev/vmware/vmci/vmci_doorbell.c
===================================================================
--- /dev/null
+++ sys/dev/vmware/vmci/vmci_doorbell.c
@@ -0,0 +1,906 @@
+/*-
+ * Copyright (c) 2018 VMware, Inc. All Rights Reserved.
+ *
+ * SPDX-License-Identifier: (BSD-2-Clause AND GPL-2.0)
+ */
+
+/* This file implements the VMCI doorbell API. */
+
+#include <sys/types.h>
+
+#include "vmci_doorbell.h"
+#include "vmci_driver.h"
+#include "vmci_kernel_api.h"
+#include "vmci_kernel_defs.h"
+#include "vmci_resource.h"
+#include "vmci_utils.h"
+
+#define LGPFX "vmci_doorbell: "
+
+#define VMCI_DOORBELL_INDEX_TABLE_SIZE 64
+#define VMCI_DOORBELL_HASH(_idx) \
+ vmci_hash_id((_idx), VMCI_DOORBELL_INDEX_TABLE_SIZE)
+
+/* Describes a doorbell notification handle allocated by the host. */
+struct vmci_doorbell_entry {
+ struct vmci_resource resource;
+ uint32_t idx;
+ vmci_list_item(vmci_doorbell_entry) idx_list_item;
+ vmci_privilege_flags priv_flags;
+ bool is_doorbell;
+ bool run_delayed;
+ vmci_callback notify_cb;
+ void *client_data;
+ vmci_event destroy_event;
+ volatile int active;
+};
+
+struct vmci_doorbell_index_table {
+ vmci_lock lock;
+ vmci_list(vmci_doorbell_entry) entries[VMCI_DOORBELL_INDEX_TABLE_SIZE];
+};
+
+/* The VMCI index table keeps track of currently registered doorbells. */
+static struct vmci_doorbell_index_table vmci_doorbell_it;
+
+/*
+ * The max_notify_idx is one larger than the currently known bitmap index in
+ * use, and is used to determine how much of the bitmap needs to be scanned.
+ */
+static uint32_t max_notify_idx;
+
+/*
+ * The notify_idx_count is used for determining whether there are free entries
+ * within the bitmap (if notify_idx_count + 1 < max_notify_idx).
+ */
+static uint32_t notify_idx_count;
+
+/*
+ * The last_notify_idx_reserved is used to track the last index handed out - in
+ * the case where multiple handles share a notification index, we hand out
+ * indexes round robin based on last_notify_idx_reserved.
+ */
+static uint32_t last_notify_idx_reserved;
+
+/* This is a one entry cache used to by the index allocation. */
+static uint32_t last_notify_idx_released = PAGE_SIZE;
+
+static void vmci_doorbell_free_cb(void *client_data);
+static int vmci_doorbell_release_cb(void *client_data);
+static void vmci_doorbell_delayed_dispatch_cb(void *data);
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * vmci_doorbell_init --
+ *
+ * General init code.
+ *
+ * Result:
+ * VMCI_SUCCESS on success, lock allocation error otherwise.
+ *
+ * Side effects:
+ * None.
+ *
+ *------------------------------------------------------------------------------
+ */
+
+int
+vmci_doorbell_init(void)
+{
+ uint32_t bucket;
+
+ for (bucket = 0; bucket < ARRAYSIZE(vmci_doorbell_it.entries);
+ ++bucket)
+ vmci_list_init(&vmci_doorbell_it.entries[bucket]);
+
+ return (vmci_init_lock(&vmci_doorbell_it.lock,
+ "VMCI Doorbell index table lock"));
+}
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * vmci_doorbell_exit --
+ *
+ * General exit code.
+ *
+ * Result:
+ * None.
+ *
+ * Side effects:
+ * None.
+ *
+ *------------------------------------------------------------------------------
+ */
+
+void
+vmci_doorbell_exit(void)
+{
+
+ vmci_cleanup_lock(&vmci_doorbell_it.lock);
+}
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * vmci_doorbell_free_cb --
+ *
+ * Callback to free doorbell entry structure when resource is no longer used,
+ * i.e. the reference count reached 0. The entry is freed in
+ * vmci_doorbell_destroy(), which is waiting on the signal that gets fired
+ * here.
+ *
+ * Result:
+ * None.
+ *
+ * Side effects:
+ * Signals VMCI event.
+ *
+ *------------------------------------------------------------------------------
+ */
+
+static void
+vmci_doorbell_free_cb(void *client_data)
+{
+ struct vmci_doorbell_entry *entry;
+
+ entry = (struct vmci_doorbell_entry *)client_data;
+ ASSERT(entry);
+ vmci_signal_event(&entry->destroy_event);
+}
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * vmci_doorbell_release_cb --
+ *
+ * Callback to release the resource reference. It is called by the
+ * vmci_wait_on_event function before it blocks.
+ *
+ * Result:
+ * Always 0.
+ *
+ * Side effects:
+ * None.
+ *
+ *------------------------------------------------------------------------------
+ */
+
+static int
+vmci_doorbell_release_cb(void *client_data)
+{
+ struct vmci_doorbell_entry *entry;
+
+ entry = (struct vmci_doorbell_entry *)client_data;
+ ASSERT(entry);
+ vmci_resource_release(&entry->resource);
+ return (0);
+}
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * vmci_doorbell_get_priv_flags --
+ *
+ * Utility function that retrieves the privilege flags associated with a
+ * given doorbell handle. For guest endpoints, the privileges are determined
+ * by the context ID, but for host endpoints privileges are associated with
+ * the complete handle. Hypervisor endpoints are not yet supported.
+ *
+ * Result:
+ * VMCI_SUCCESS on success,
+ * VMCI_ERROR_NOT_FOUND if handle isn't found,
+ * VMCI_ERROR_INVALID_ARGS if handle is invalid.
+ *
+ * Side effects:
+ * None.
+ *
+ *------------------------------------------------------------------------------
+ */
+
+int
+vmci_doorbell_get_priv_flags(struct vmci_handle handle,
+ vmci_privilege_flags *priv_flags)
+{
+
+ if (priv_flags == NULL || handle.context == VMCI_INVALID_ID)
+ return (VMCI_ERROR_INVALID_ARGS);
+
+ if (handle.context == VMCI_HOST_CONTEXT_ID) {
+ struct vmci_doorbell_entry *entry;
+ struct vmci_resource *resource;
+
+ resource = vmci_resource_get(handle,
+ VMCI_RESOURCE_TYPE_DOORBELL);
+ if (resource == NULL)
+ return (VMCI_ERROR_NOT_FOUND);
+ entry = RESOURCE_CONTAINER(
+ resource, struct vmci_doorbell_entry, resource);
+ *priv_flags = entry->priv_flags;
+ vmci_resource_release(resource);
+ } else if (handle.context == VMCI_HYPERVISOR_CONTEXT_ID) {
+ /* Hypervisor endpoints for notifications are not supported. */
+ return (VMCI_ERROR_INVALID_ARGS);
+ } else
+ *priv_flags = VMCI_NO_PRIVILEGE_FLAGS;
+
+ return (VMCI_SUCCESS);
+}
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * vmci_doorbell_index_table_find --
+ *
+ * Find doorbell entry by bitmap index.
+ *
+ * Results:
+ * Entry if found, NULL if not.
+ *
+ * Side effects:
+ * None.
+ *
+ *------------------------------------------------------------------------------
+ */
+
+static struct vmci_doorbell_entry *
+vmci_doorbell_index_table_find(uint32_t idx)
+{
+ struct vmci_doorbell_entry *iter;
+ uint32_t bucket;
+
+ bucket = VMCI_DOORBELL_HASH(idx);
+
+ vmci_list_scan(iter, &vmci_doorbell_it.entries[bucket], idx_list_item) {
+ if (idx == iter->idx)
+ return (iter);
+ }
+
+ return (NULL);
+}
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * vmci_doorbell_index_table_add --
+ *
+ * Add the given entry to the index table. This will hold() the entry's
+ * resource so that the entry is not deleted before it is removed from the
+ * table.
+ *
+ * Results:
+ * None.
+ *
+ * Side effects:
+ * None.
+ *
+ *------------------------------------------------------------------------------
+ */
+
+static void
+vmci_doorbell_index_table_add(struct vmci_doorbell_entry *entry)
+{
+ uint32_t bucket;
+ uint32_t new_notify_idx;
+
+ ASSERT(entry);
+
+ vmci_resource_hold(&entry->resource);
+
+ vmci_grab_lock_bh(&vmci_doorbell_it.lock);
+
+ /*
+ * Below we try to allocate an index in the notification bitmap with
+ * "not too much" sharing between resources. If we use less that the
+ * full bitmap, we either add to the end if there are no unused flags
+ * within the currently used area, or we search for unused ones. If we
+ * use the full bitmap, we allocate the index round robin.
+ */
+
+ if (max_notify_idx < PAGE_SIZE || notify_idx_count < PAGE_SIZE) {
+ if (last_notify_idx_released < max_notify_idx &&
+ !vmci_doorbell_index_table_find(last_notify_idx_released)) {
+ new_notify_idx = last_notify_idx_released;
+ last_notify_idx_released = PAGE_SIZE;
+ } else {
+ bool reused = false;
+ new_notify_idx = last_notify_idx_reserved;
+ if (notify_idx_count + 1 < max_notify_idx) {
+ do {
+ if (!vmci_doorbell_index_table_find(
+ new_notify_idx)) {
+ reused = true;
+ break;
+ }
+ new_notify_idx = (new_notify_idx + 1) %
+ max_notify_idx;
+ } while (new_notify_idx !=
+ last_notify_idx_released);
+ }
+ if (!reused) {
+ new_notify_idx = max_notify_idx;
+ max_notify_idx++;
+ }
+ }
+ } else {
+ new_notify_idx = (last_notify_idx_reserved + 1) % PAGE_SIZE;
+ }
+ last_notify_idx_reserved = new_notify_idx;
+ notify_idx_count++;
+
+ entry->idx = new_notify_idx;
+ bucket = VMCI_DOORBELL_HASH(entry->idx);
+ vmci_list_insert(&vmci_doorbell_it.entries[bucket], entry,
+ idx_list_item);
+
+ vmci_release_lock_bh(&vmci_doorbell_it.lock);
+}
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * vmci_doorbell_index_table_remove --
+ *
+ * Remove the given entry from the index table. This will release() the
+ * entry's resource.
+ *
+ * Results:
+ * None.
+ *
+ * Side effects:
+ * None.
+ *
+ *------------------------------------------------------------------------------
+ */
+
+static void
+vmci_doorbell_index_table_remove(struct vmci_doorbell_entry *entry)
+{
+ ASSERT(entry);
+
+ vmci_grab_lock_bh(&vmci_doorbell_it.lock);
+
+ vmci_list_remove(entry, idx_list_item);
+
+ notify_idx_count--;
+ if (entry->idx == max_notify_idx - 1) {
+ /*
+ * If we delete an entry with the maximum known notification
+ * index, we take the opportunity to prune the current max. As
+ * there might be other unused indices immediately below, we
+ * lower the maximum until we hit an index in use
+ */
+
+ while (max_notify_idx > 0 &&
+ !vmci_doorbell_index_table_find(max_notify_idx - 1))
+ max_notify_idx--;
+ }
+ last_notify_idx_released = entry->idx;
+
+ vmci_release_lock_bh(&vmci_doorbell_it.lock);
+
+ vmci_resource_release(&entry->resource);
+}
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * vmci_doorbell_link --
+ *
+ * Creates a link between the given doorbell handle and the given index in
+ * the bitmap in the device backend.
+ *
+ * Results:
+ * VMCI_SUCCESS if success, appropriate error code otherwise.
+ *
+ * Side effects:
+ * Notification state is created in hypervisor.
+ *
+ *------------------------------------------------------------------------------
+ */
+
+static int
+vmci_doorbell_link(struct vmci_handle handle, bool is_doorbell,
+ uint32_t notify_idx)
+{
+ struct vmci_doorbell_link_msg link_msg;
+ vmci_id resource_id;
+
+ ASSERT(!VMCI_HANDLE_INVALID(handle));
+
+ if (is_doorbell)
+ resource_id = VMCI_DOORBELL_LINK;
+ else {
+ ASSERT(false);
+ return (VMCI_ERROR_UNAVAILABLE);
+ }
+
+ link_msg.hdr.dst = VMCI_MAKE_HANDLE(VMCI_HYPERVISOR_CONTEXT_ID,
+ resource_id);
+ link_msg.hdr.src = VMCI_ANON_SRC_HANDLE;
+ link_msg.hdr.payload_size = sizeof(link_msg) - VMCI_DG_HEADERSIZE;
+ link_msg.handle = handle;
+ link_msg.notify_idx = notify_idx;
+
+ return (vmci_send_datagram((struct vmci_datagram *)&link_msg));
+}
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * vmci_doorbell_unlink --
+ *
+ * Unlinks the given doorbell handle from an index in the bitmap in the
+ * device backend.
+ *
+ * Results:
+ * VMCI_SUCCESS if success, appropriate error code otherwise.
+ *
+ * Side effects:
+ * Notification state is destroyed in hypervisor.
+ *
+ *------------------------------------------------------------------------------
+ */
+
+static int
+vmci_doorbell_unlink(struct vmci_handle handle, bool is_doorbell)
+{
+ struct vmci_doorbell_unlink_msg unlink_msg;
+ vmci_id resource_id;
+
+ ASSERT(!VMCI_HANDLE_INVALID(handle));
+
+ if (is_doorbell)
+ resource_id = VMCI_DOORBELL_UNLINK;
+ else {
+ ASSERT(false);
+ return (VMCI_ERROR_UNAVAILABLE);
+ }
+
+ unlink_msg.hdr.dst = VMCI_MAKE_HANDLE(VMCI_HYPERVISOR_CONTEXT_ID,
+ resource_id);
+ unlink_msg.hdr.src = VMCI_ANON_SRC_HANDLE;
+ unlink_msg.hdr.payload_size = sizeof(unlink_msg) - VMCI_DG_HEADERSIZE;
+ unlink_msg.handle = handle;
+
+ return (vmci_send_datagram((struct vmci_datagram *)&unlink_msg));
+}
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * vmci_doorbell_create --
+ *
+ * Creates a doorbell with the given callback. If the handle is
+ * VMCI_INVALID_HANDLE, a free handle will be assigned, if possible. The
+ * callback can be run immediately (potentially with locks held - the
+ * default) or delayed (in a kernel thread) by specifying the flag
+ * VMCI_FLAG_DELAYED_CB. If delayed execution is selected, a given callback
+ * may not be run if the kernel is unable to allocate memory for the delayed
+ * execution (highly unlikely).
+ *
+ * Results:
+ * VMCI_SUCCESS on success, appropriate error code otherwise.
+ *
+ * Side effects:
+ * None.
+ *
+ *------------------------------------------------------------------------------
+ */
+
+int
+vmci_doorbell_create(struct vmci_handle *handle, uint32_t flags,
+ vmci_privilege_flags priv_flags, vmci_callback notify_cb, void *client_data)
+{
+ struct vmci_doorbell_entry *entry;
+ struct vmci_handle new_handle;
+ int result;
+
+ if (!handle || !notify_cb || flags & ~VMCI_FLAG_DELAYED_CB ||
+ priv_flags & ~VMCI_PRIVILEGE_ALL_FLAGS)
+ return (VMCI_ERROR_INVALID_ARGS);
+
+ entry = vmci_alloc_kernel_mem(sizeof(*entry), VMCI_MEMORY_NORMAL);
+ if (entry == NULL) {
+ VMCI_LOG_WARNING(LGPFX"Failed allocating memory for datagram "
+ "entry.\n");
+ return (VMCI_ERROR_NO_MEM);
+ }
+
+ if (!vmci_can_schedule_delayed_work() &&
+ (flags & VMCI_FLAG_DELAYED_CB)) {
+ result = VMCI_ERROR_INVALID_ARGS;
+ goto free_mem;
+ }
+
+ if (VMCI_HANDLE_INVALID(*handle)) {
+ vmci_id context_id;
+
+ context_id = vmci_get_context_id();
+ vmci_id resource_id = vmci_resource_get_id(context_id);
+ if (resource_id == VMCI_INVALID_ID) {
+ result = VMCI_ERROR_NO_HANDLE;
+ goto free_mem;
+ }
+ new_handle = VMCI_MAKE_HANDLE(context_id, resource_id);
+ } else {
+ if (VMCI_INVALID_ID == handle->resource) {
+ VMCI_LOG_DEBUG(LGPFX"Invalid argument "
+ "(handle=0x%x:0x%x).\n", handle->context,
+ handle->resource);
+ result = VMCI_ERROR_INVALID_ARGS;
+ goto free_mem;
+ }
+ new_handle = *handle;
+ }
+
+ entry->idx = 0;
+ entry->priv_flags = priv_flags;
+ entry->is_doorbell = true;
+ entry->run_delayed = (flags & VMCI_FLAG_DELAYED_CB) ? true : false;
+ entry->notify_cb = notify_cb;
+ entry->client_data = client_data;
+ atomic_store_int(&entry->active, 0);
+ vmci_create_event(&entry->destroy_event);
+
+ result = vmci_resource_add(&entry->resource,
+ VMCI_RESOURCE_TYPE_DOORBELL, new_handle, vmci_doorbell_free_cb,
+ entry);
+ if (result != VMCI_SUCCESS) {
+ VMCI_LOG_WARNING(LGPFX"Failed to add new resource "
+ "(handle=0x%x:0x%x).\n", new_handle.context,
+ new_handle.resource);
+ if (result == VMCI_ERROR_DUPLICATE_ENTRY)
+ result = VMCI_ERROR_ALREADY_EXISTS;
+
+ goto destroy;
+ }
+
+ vmci_doorbell_index_table_add(entry);
+ result = vmci_doorbell_link(new_handle, entry->is_doorbell, entry->idx);
+ if (VMCI_SUCCESS != result)
+ goto destroy_resource;
+ atomic_store_int(&entry->active, 1);
+
+ if (VMCI_HANDLE_INVALID(*handle))
+ *handle = new_handle;
+
+ return (result);
+
+destroy_resource:
+ vmci_doorbell_index_table_remove(entry);
+ vmci_resource_remove(new_handle, VMCI_RESOURCE_TYPE_DOORBELL);
+destroy:
+ vmci_destroy_event(&entry->destroy_event);
+free_mem:
+ vmci_free_kernel_mem(entry, sizeof(*entry));
+ return (result);
+}
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * vmci_doorbell_destroy --
+ *
+ * Destroys a doorbell previously created with vmci_doorbell_create. This
+ * operation may block waiting for a callback to finish.
+ *
+ * Results:
+ * VMCI_SUCCESS on success, appropriate error code otherwise.
+ *
+ * Side effects:
+ * May block.
+ *
+ *------------------------------------------------------------------------------
+ */
+
+int
+vmci_doorbell_destroy(struct vmci_handle handle)
+{
+ struct vmci_doorbell_entry *entry;
+ struct vmci_resource *resource;
+ int result;
+
+ if (VMCI_HANDLE_INVALID(handle))
+ return (VMCI_ERROR_INVALID_ARGS);
+
+ resource = vmci_resource_get(handle, VMCI_RESOURCE_TYPE_DOORBELL);
+ if (resource == NULL) {
+ VMCI_LOG_DEBUG(LGPFX"Failed to destroy doorbell "
+ "(handle=0x%x:0x%x).\n", handle.context, handle.resource);
+ return (VMCI_ERROR_NOT_FOUND);
+ }
+ entry = RESOURCE_CONTAINER(resource, struct vmci_doorbell_entry,
+ resource);
+
+ vmci_doorbell_index_table_remove(entry);
+
+ result = vmci_doorbell_unlink(handle, entry->is_doorbell);
+ if (VMCI_SUCCESS != result) {
+
+ /*
+ * The only reason this should fail would be an inconsistency
+ * between guest and hypervisor state, where the guest believes
+ * it has an active registration whereas the hypervisor doesn't.
+ * One case where this may happen is if a doorbell is
+ * unregistered following a hibernation at a time where the
+ * doorbell state hasn't been restored on the hypervisor side
+ * yet. Since the handle has now been removed in the guest,
+ * we just print a warning and return success.
+ */
+
+ VMCI_LOG_DEBUG(LGPFX"Unlink of %s (handle=0x%x:0x%x) unknown "
+ "by hypervisor (error=%d).\n",
+ entry->is_doorbell ? "doorbell" : "queuepair",
+ handle.context, handle.resource, result);
+ }
+
+ /*
+ * Now remove the resource from the table. It might still be in use
+ * after this, in a callback or still on the delayed work queue.
+ */
+
+ vmci_resource_remove(handle, VMCI_RESOURCE_TYPE_DOORBELL);
+
+ /*
+ * We now wait on the destroyEvent and release the reference we got
+ * above.
+ */
+
+ vmci_wait_on_event(&entry->destroy_event, vmci_doorbell_release_cb,
+ entry);
+
+ /*
+ * We know that we are now the only reference to the above entry so
+ * can safely free it.
+ */
+
+ vmci_destroy_event(&entry->destroy_event);
+ vmci_free_kernel_mem(entry, sizeof(*entry));
+
+ return (VMCI_SUCCESS);
+}
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * vmci_doorbell_notify_as_guest --
+ *
+ * Notify another guest or the host. We send a datagram down to the host
+ * via the hypervisor with the notification info.
+ *
+ * Results:
+ * VMCI_SUCCESS on success, appropriate error code otherwise.
+ *
+ * Side effects:
+ * May do a hypercall.
+ *
+ *------------------------------------------------------------------------------
+ */
+
+static int
+vmci_doorbell_notify_as_guest(struct vmci_handle handle,
+ vmci_privilege_flags priv_flags)
+{
+ struct vmci_doorbell_notify_msg notify_msg;
+
+ notify_msg.hdr.dst = VMCI_MAKE_HANDLE(VMCI_HYPERVISOR_CONTEXT_ID,
+ VMCI_DOORBELL_NOTIFY);
+ notify_msg.hdr.src = VMCI_ANON_SRC_HANDLE;
+ notify_msg.hdr.payload_size = sizeof(notify_msg) - VMCI_DG_HEADERSIZE;
+ notify_msg.handle = handle;
+
+ return (vmci_send_datagram((struct vmci_datagram *)&notify_msg));
+}
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * vmci_doorbell_notify --
+ *
+ * Generates a notification on the doorbell identified by the handle. For
+ * host side generation of notifications, the caller can specify what the
+ * privilege of the calling side is.
+ *
+ * Results:
+ * VMCI_SUCCESS on success, appropriate error code otherwise.
+ *
+ * Side effects:
+ * May do a hypercall.
+ *
+ *------------------------------------------------------------------------------
+ */
+
+int
+vmci_doorbell_notify(struct vmci_handle dst, vmci_privilege_flags priv_flags)
+{
+ struct vmci_handle src;
+
+ if (VMCI_HANDLE_INVALID(dst) ||
+ (priv_flags & ~VMCI_PRIVILEGE_ALL_FLAGS))
+ return (VMCI_ERROR_INVALID_ARGS);
+
+ src = VMCI_INVALID_HANDLE;
+
+ return (vmci_doorbell_notify_as_guest(dst, priv_flags));
+}
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * vmci_doorbell_delayed_dispatch_cb --
+ *
+ * Calls the specified callback in a delayed context.
+ *
+ * Results:
+ * None.
+ *
+ * Side effects:
+ * None.
+ *
+ *------------------------------------------------------------------------------
+ */
+
+static void
+vmci_doorbell_delayed_dispatch_cb(void *data)
+{
+ struct vmci_doorbell_entry *entry = (struct vmci_doorbell_entry *)data;
+
+ ASSERT(data);
+
+ entry->notify_cb(entry->client_data);
+
+ vmci_resource_release(&entry->resource);
+}
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * vmci_doorbell_sync --
+ *
+ * Use this as a synchronization point when setting globals, for example,
+ * during device shutdown.
+ *
+ * Results:
+ * None.
+ *
+ * Side effects:
+ * None.
+ *
+ *------------------------------------------------------------------------------
+ */
+
+void
+vmci_doorbell_sync(void)
+{
+
+ vmci_grab_lock_bh(&vmci_doorbell_it.lock);
+ vmci_release_lock_bh(&vmci_doorbell_it.lock);
+ vmci_resource_sync();
+}
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * vmci_register_notification_bitmap --
+ *
+ * Register the notification bitmap with the host.
+ *
+ * Results:
+ * true if the bitmap is registered successfully with the device, false
+ * otherwise.
+ *
+ * Side effects:
+ * None.
+ *
+ *------------------------------------------------------------------------------
+ */
+
+bool
+vmci_register_notification_bitmap(PPN bitmap_ppn)
+{
+ struct vmci_notify_bitmap_set_msg bitmap_set_msg;
+ int result;
+
+ /*
+ * Do not ASSERT() on the guest device here. This function can get
+ * called during device initialization, so the ASSERT() will fail even
+ * though the device is (almost) up.
+ */
+
+ bitmap_set_msg.hdr.dst = VMCI_MAKE_HANDLE(VMCI_HYPERVISOR_CONTEXT_ID,
+ VMCI_SET_NOTIFY_BITMAP);
+ bitmap_set_msg.hdr.src = VMCI_ANON_SRC_HANDLE;
+ bitmap_set_msg.hdr.payload_size =
+ sizeof(bitmap_set_msg) - VMCI_DG_HEADERSIZE;
+ bitmap_set_msg.bitmap_ppn = bitmap_ppn;
+
+ result = vmci_send_datagram((struct vmci_datagram *)&bitmap_set_msg);
+ if (result != VMCI_SUCCESS) {
+ VMCI_LOG_DEBUG(LGPFX"Failed to register (PPN=%u) as "
+ "notification bitmap (error=%d).\n",
+ bitmap_ppn, result);
+ return (false);
+ }
+ return (true);
+}
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * vmci_doorbell_fire_entries --
+ *
+ * Executes or schedules the handlers for a given notify index.
+ *
+ * Result:
+ * Notification hash entry if found. NULL otherwise.
+ *
+ * Side effects:
+ * Whatever the side effects of the handlers are.
+ *
+ *------------------------------------------------------------------------------
+ */
+
+static void
+vmci_doorbell_fire_entries(uint32_t notify_idx)
+{
+ struct vmci_doorbell_entry *iter;
+ uint32_t bucket = VMCI_DOORBELL_HASH(notify_idx);
+
+ vmci_grab_lock_bh(&vmci_doorbell_it.lock);
+
+ vmci_list_scan(iter, &vmci_doorbell_it.entries[bucket], idx_list_item) {
+ if (iter->idx == notify_idx &&
+ atomic_load_int(&iter->active) == 1) {
+ ASSERT(iter->notify_cb);
+ if (iter->run_delayed) {
+ int err;
+
+ vmci_resource_hold(&iter->resource);
+ err = vmci_schedule_delayed_work(
+ vmci_doorbell_delayed_dispatch_cb, iter);
+ if (err != VMCI_SUCCESS) {
+ vmci_resource_release(&iter->resource);
+ goto out;
+ }
+ } else
+ iter->notify_cb(iter->client_data);
+ }
+ }
+
+out:
+ vmci_release_lock_bh(&vmci_doorbell_it.lock);
+}
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * vmci_scan_notification_bitmap --
+ *
+ * Scans the notification bitmap, collects pending notifications, resets
+ * the bitmap and invokes appropriate callbacks.
+ *
+ * Results:
+ * None.
+ *
+ * Side effects:
+ * May schedule tasks, allocate memory and run callbacks.
+ *
+ *------------------------------------------------------------------------------
+ */
+
+void
+vmci_scan_notification_bitmap(uint8_t *bitmap)
+{
+ uint32_t idx;
+
+ ASSERT(bitmap);
+
+ for (idx = 0; idx < max_notify_idx; idx++) {
+ if (bitmap[idx] & 0x1) {
+ bitmap[idx] &= ~1;
+ vmci_doorbell_fire_entries(idx);
+ }
+ }
+}
Index: sys/dev/vmware/vmci/vmci_driver.h
===================================================================
--- /dev/null
+++ sys/dev/vmware/vmci/vmci_driver.h
@@ -0,0 +1,43 @@
+/*-
+ * Copyright (c) 2018 VMware, Inc. All Rights Reserved.
+ *
+ * SPDX-License-Identifier: (BSD-2-Clause AND GPL-2.0)
+ */
+
+/* VMCI driver interface. */
+
+#ifndef _VMCI_DRIVER_H_
+#define _VMCI_DRIVER_H_
+
+#include <sys/types.h>
+#include <sys/syslog.h>
+#include <sys/systm.h>
+
+#include "vmci_call_defs.h"
+#include "vmci_kernel_if.h"
+
+#ifndef VMCI_DEBUG_LOGGING
+#define VMCI_LOG_DEBUG(_args, ...)
+#else /* VMCI_DEBUG_LOGGING */
+#define VMCI_LOG_DEBUG(_args, ...) \
+ log(LOG_DEBUG, _args, ##__VA_ARGS__)
+#endif /* !VMCI_DEBUG_LOGGING */
+#define VMCI_LOG_INFO(_args, ...) \
+ log(LOG_INFO, _args, ##__VA_ARGS__)
+#define VMCI_LOG_WARNING(_args, ...) \
+ log(LOG_WARNING, _args, ##__VA_ARGS__)
+#define VMCI_LOG_ERROR(_args, ...) \
+ log(LOG_ERR, _args, ##__VA_ARGS__)
+
+int vmci_components_init(void);
+void vmci_components_cleanup(void);
+int vmci_send_datagram(struct vmci_datagram *dg);
+
+void vmci_util_init(void);
+void vmci_util_exit(void);
+bool vmci_check_host_capabilities(void);
+void vmci_read_datagrams_from_port(vmci_io_handle io_handle,
+ vmci_io_port dg_in_port, uint8_t *dg_in_buffer,
+ size_t dg_in_buffer_size);
+
+#endif /* !_VMCI_DRIVER_H_ */
Index: sys/dev/vmware/vmci/vmci_driver.c
===================================================================
--- /dev/null
+++ sys/dev/vmware/vmci/vmci_driver.c
@@ -0,0 +1,481 @@
+/*-
+ * Copyright (c) 2018 VMware, Inc. All Rights Reserved.
+ *
+ * SPDX-License-Identifier: (BSD-2-Clause AND GPL-2.0)
+ */
+
+/* VMCI initialization. */
+
+#include "vmci.h"
+#include "vmci_doorbell.h"
+#include "vmci_driver.h"
+#include "vmci_event.h"
+#include "vmci_kernel_api.h"
+#include "vmci_kernel_defs.h"
+#include "vmci_resource.h"
+
+#define LGPFX "vmci: "
+#define VMCI_UTIL_NUM_RESOURCES 1
+
+static vmci_id ctx_update_sub_id = VMCI_INVALID_ID;
+static volatile int vm_context_id = VMCI_INVALID_ID;
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * vmci_util_cid_update --
+ *
+ * Gets called with the new context id if updated or resumed.
+ *
+ * Results:
+ * Context id.
+ *
+ * Side effects:
+ * None.
+ *
+ *------------------------------------------------------------------------------
+ */
+
+static void
+vmci_util_cid_update(vmci_id sub_id, struct vmci_event_data *event_data,
+ void *client_data)
+{
+ struct vmci_event_payload_context *ev_payload;
+
+ ev_payload = vmci_event_data_payload(event_data);
+
+ if (sub_id != ctx_update_sub_id) {
+ VMCI_LOG_DEBUG(LGPFX"Invalid subscriber (ID=0x%x).\n", sub_id);
+ return;
+ }
+ if (event_data == NULL || ev_payload->context_id == VMCI_INVALID_ID) {
+ VMCI_LOG_DEBUG(LGPFX"Invalid event data.\n");
+ return;
+ }
+ VMCI_LOG_INFO(LGPFX"Updating context from (ID=0x%x) to (ID=0x%x) on "
+ "event (type=%d).\n", atomic_load_int(&vm_context_id),
+ ev_payload->context_id, event_data->event);
+ atomic_store_int(&vm_context_id, ev_payload->context_id);
+}
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * vmci_util_init --
+ *
+ * Subscribe to context id update event.
+ *
+ * Results:
+ * None.
+ *
+ * Side effects:
+ * None.
+ *
+ *------------------------------------------------------------------------------
+ */
+
+void
+vmci_util_init(void)
+{
+
+ /*
+ * We subscribe to the VMCI_EVENT_CTX_ID_UPDATE here so we can update
+ * the internal context id when needed.
+ */
+ if (vmci_event_subscribe(VMCI_EVENT_CTX_ID_UPDATE,
+ vmci_util_cid_update, NULL, &ctx_update_sub_id) < VMCI_SUCCESS) {
+ VMCI_LOG_WARNING(LGPFX"Failed to subscribe to event "
+ "(type=%d).\n", VMCI_EVENT_CTX_ID_UPDATE);
+ }
+}
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * vmci_util_exit --
+ *
+ * Cleanup
+ *
+ * Results:
+ * None.
+ *
+ * Side effects:
+ * None.
+ *
+ *------------------------------------------------------------------------------
+ */
+
+void
+vmci_util_exit(void)
+{
+
+ if (vmci_event_unsubscribe(ctx_update_sub_id) < VMCI_SUCCESS)
+ VMCI_LOG_WARNING(LGPFX"Failed to unsubscribe to event "
+ "(type=%d) with subscriber (ID=0x%x).\n",
+ VMCI_EVENT_CTX_ID_UPDATE, ctx_update_sub_id);
+}
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * vmci_util_check_host_capabilities --
+ *
+ * Verify that the host supports the hypercalls we need. If it does not, try
+ * to find fallback hypercalls and use those instead.
+ *
+ * Results:
+ * true if required hypercalls (or fallback hypercalls) are supported by the
+ * host, false otherwise.
+ *
+ * Side effects:
+ * None.
+ *
+ *------------------------------------------------------------------------------
+ */
+
+static bool
+vmci_util_check_host_capabilities(void)
+{
+ struct vmci_resources_query_msg *msg;
+ struct vmci_datagram *check_msg;
+ int result;
+ uint32_t msg_size;
+
+ msg_size = sizeof(struct vmci_resources_query_hdr) +
+ VMCI_UTIL_NUM_RESOURCES * sizeof(vmci_resource);
+ check_msg = vmci_alloc_kernel_mem(msg_size, VMCI_MEMORY_NORMAL);
+
+ if (check_msg == NULL) {
+ VMCI_LOG_WARNING(LGPFX"Check host: Insufficient memory.\n");
+ return (false);
+ }
+
+ check_msg->dst = VMCI_MAKE_HANDLE(VMCI_HYPERVISOR_CONTEXT_ID,
+ VMCI_RESOURCES_QUERY);
+ check_msg->src = VMCI_ANON_SRC_HANDLE;
+ check_msg->payload_size = msg_size - VMCI_DG_HEADERSIZE;
+ msg = (struct vmci_resources_query_msg *)VMCI_DG_PAYLOAD(check_msg);
+
+ msg->num_resources = VMCI_UTIL_NUM_RESOURCES;
+ msg->resources[0] = VMCI_GET_CONTEXT_ID;
+
+ result = vmci_send_datagram(check_msg);
+ vmci_free_kernel_mem(check_msg, msg_size);
+
+ /* We need the vector. There are no fallbacks. */
+ return (result == 0x1);
+}
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * vmci_check_host_capabilities --
+ *
+ * Tell host which guestcalls we support and let each API check that the
+ * host supports the hypercalls it needs. If a hypercall is not supported,
+ * the API can check for a fallback hypercall, or fail the check.
+ *
+ * Results:
+ * true if successful, false otherwise.
+ *
+ * Side effects:
+ * Fallback mechanisms may be enabled in the API and vmmon.
+ *
+ *------------------------------------------------------------------------------
+ */
+
+bool
+vmci_check_host_capabilities(void)
+{
+ bool result;
+
+ result = vmci_event_check_host_capabilities();
+ result &= vmci_datagram_check_host_capabilities();
+ result &= vmci_util_check_host_capabilities();
+
+ if (!result) {
+ /*
+ * If it failed, then make sure this goes to the system event
+ * log.
+ */
+ VMCI_LOG_WARNING(LGPFX"Host capability checked failed.\n");
+ } else
+ VMCI_LOG_DEBUG(LGPFX"Host capability check passed.\n");
+
+ return (result);
+}
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * vmci_read_datagrams_from_port --
+ *
+ * Reads datagrams from the data in port and dispatches them. We always
+ * start reading datagrams into only the first page of the datagram buffer.
+ * If the datagrams don't fit into one page, we use the maximum datagram
+ * buffer size for the remainder of the invocation. This is a simple
+ * heuristic for not penalizing small datagrams.
+ *
+ * This function assumes that it has exclusive access to the data in port
+ * for the duration of the call.
+ *
+ * Results:
+ * No result.
+ *
+ * Side effects:
+ * Datagram handlers may be invoked.
+ *
+ *------------------------------------------------------------------------------
+ */
+
+void
+vmci_read_datagrams_from_port(vmci_io_handle io_handle, vmci_io_port dg_in_port,
+ uint8_t *dg_in_buffer, size_t dg_in_buffer_size)
+{
+ struct vmci_datagram *dg;
+ size_t current_dg_in_buffer_size;
+ size_t remaining_bytes;
+
+ current_dg_in_buffer_size = PAGE_SIZE;
+
+ ASSERT(dg_in_buffer_size >= PAGE_SIZE);
+
+ vmci_read_port_bytes(io_handle, dg_in_port, dg_in_buffer,
+ current_dg_in_buffer_size);
+ dg = (struct vmci_datagram *)dg_in_buffer;
+ remaining_bytes = current_dg_in_buffer_size;
+
+ while (dg->dst.resource != VMCI_INVALID_ID ||
+ remaining_bytes > PAGE_SIZE) {
+ size_t dg_in_size;
+
+ /*
+ * When the input buffer spans multiple pages, a datagram can
+ * start on any page boundary in the buffer.
+ */
+
+ if (dg->dst.resource == VMCI_INVALID_ID) {
+ ASSERT(remaining_bytes > PAGE_SIZE);
+ dg = (struct vmci_datagram *)ROUNDUP((uintptr_t)dg + 1,
+ PAGE_SIZE);
+ ASSERT((uint8_t *)dg < dg_in_buffer +
+ current_dg_in_buffer_size);
+ remaining_bytes = (size_t)(dg_in_buffer +
+ current_dg_in_buffer_size - (uint8_t *)dg);
+ continue;
+ }
+
+ dg_in_size = VMCI_DG_SIZE_ALIGNED(dg);
+
+ if (dg_in_size <= dg_in_buffer_size) {
+ int result;
+
+ /*
+ * If the remaining bytes in the datagram buffer doesn't
+ * contain the complete datagram, we first make sure we
+ * have enough room for it and then we read the reminder
+ * of the datagram and possibly any following datagrams.
+ */
+
+ if (dg_in_size > remaining_bytes) {
+
+ if (remaining_bytes !=
+ current_dg_in_buffer_size) {
+
+ /*
+ * We move the partial datagram to the
+ * front and read the reminder of the
+ * datagram and possibly following calls
+ * into the following bytes.
+ */
+
+ memmove(dg_in_buffer, dg_in_buffer +
+ current_dg_in_buffer_size -
+ remaining_bytes,
+ remaining_bytes);
+
+ dg = (struct vmci_datagram *)
+ dg_in_buffer;
+ }
+
+ if (current_dg_in_buffer_size !=
+ dg_in_buffer_size)
+ current_dg_in_buffer_size =
+ dg_in_buffer_size;
+
+ vmci_read_port_bytes(io_handle, dg_in_port,
+ dg_in_buffer + remaining_bytes,
+ current_dg_in_buffer_size -
+ remaining_bytes);
+ }
+
+ /*
+ * We special case event datagrams from the
+ * hypervisor.
+ */
+ if (dg->src.context == VMCI_HYPERVISOR_CONTEXT_ID &&
+ dg->dst.resource == VMCI_EVENT_HANDLER)
+ result = vmci_event_dispatch(dg);
+ else
+ result =
+ vmci_datagram_invoke_guest_handler(dg);
+ if (result < VMCI_SUCCESS)
+ VMCI_LOG_DEBUG(LGPFX"Datagram with resource"
+ " (ID=0x%x) failed (err=%d).\n",
+ dg->dst.resource, result);
+
+ /* On to the next datagram. */
+ dg = (struct vmci_datagram *)((uint8_t *)dg +
+ dg_in_size);
+ } else {
+ size_t bytes_to_skip;
+
+ /*
+ * Datagram doesn't fit in datagram buffer of maximal
+ * size. We drop it.
+ */
+
+ VMCI_LOG_DEBUG(LGPFX"Failed to receive datagram "
+ "(size=%zu bytes).\n", dg_in_size);
+
+ bytes_to_skip = dg_in_size - remaining_bytes;
+ if (current_dg_in_buffer_size != dg_in_buffer_size)
+ current_dg_in_buffer_size = dg_in_buffer_size;
+ for (;;) {
+ vmci_read_port_bytes(io_handle, dg_in_port,
+ dg_in_buffer, current_dg_in_buffer_size);
+ if (bytes_to_skip <=
+ current_dg_in_buffer_size)
+ break;
+ bytes_to_skip -= current_dg_in_buffer_size;
+ }
+ dg = (struct vmci_datagram *)(dg_in_buffer +
+ bytes_to_skip);
+ }
+
+ remaining_bytes = (size_t) (dg_in_buffer +
+ current_dg_in_buffer_size - (uint8_t *)dg);
+
+ if (remaining_bytes < VMCI_DG_HEADERSIZE) {
+ /* Get the next batch of datagrams. */
+
+ vmci_read_port_bytes(io_handle, dg_in_port,
+ dg_in_buffer, current_dg_in_buffer_size);
+ dg = (struct vmci_datagram *)dg_in_buffer;
+ remaining_bytes = current_dg_in_buffer_size;
+ }
+ }
+}
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * vmci_get_context_id --
+ *
+ * Returns the current context ID. Note that since this is accessed only
+ * from code running in the host, this always returns the host context ID.
+ *
+ * Results:
+ * Context ID.
+ *
+ * Side effects:
+ * None.
+ *
+ *------------------------------------------------------------------------------
+ */
+
+vmci_id
+vmci_get_context_id(void)
+{
+ if (atomic_load_int(&vm_context_id) == VMCI_INVALID_ID) {
+ uint32_t result;
+ struct vmci_datagram get_cid_msg;
+ get_cid_msg.dst = VMCI_MAKE_HANDLE(VMCI_HYPERVISOR_CONTEXT_ID,
+ VMCI_GET_CONTEXT_ID);
+ get_cid_msg.src = VMCI_ANON_SRC_HANDLE;
+ get_cid_msg.payload_size = 0;
+ result = vmci_send_datagram(&get_cid_msg);
+ atomic_store_int(&vm_context_id, result);
+ }
+ return (atomic_load_int(&vm_context_id));
+}
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * vmci_components_init --
+ *
+ * Initializes VMCI components and registers core hypercalls.
+ *
+ * Results:
+ * VMCI_SUCCESS if successful, appropriate error code otherwise.
+ *
+ * Side effects:
+ * None.
+ *
+ *------------------------------------------------------------------------------
+ */
+
+int
+vmci_components_init(void)
+{
+ int result;
+
+ result = vmci_resource_init();
+ if (result < VMCI_SUCCESS) {
+ VMCI_LOG_WARNING(LGPFX"Failed to initialize vmci_resource "
+ "(result=%d).\n", result);
+ goto error_exit;
+ }
+
+ result = vmci_event_init();
+ if (result < VMCI_SUCCESS) {
+ VMCI_LOG_WARNING(LGPFX"Failed to initialize vmci_event "
+ "(result=%d).\n", result);
+ goto resource_exit;
+ }
+
+ result = vmci_doorbell_init();
+ if (result < VMCI_SUCCESS) {
+ VMCI_LOG_WARNING(LGPFX"Failed to initialize vmci_doorbell "
+ "(result=%d).\n", result);
+ goto event_exit;
+ }
+
+ VMCI_LOG_DEBUG(LGPFX"components initialized.\n");
+ return (VMCI_SUCCESS);
+
+event_exit:
+ vmci_event_exit();
+
+resource_exit:
+ vmci_resource_exit();
+
+error_exit:
+ return (result);
+}
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * vmci_components_cleanup --
+ *
+ * Cleans up VMCI components.
+ *
+ * Results:
+ * None.
+ *
+ * Side effects:
+ * None.
+ *
+ *------------------------------------------------------------------------------
+ */
+
+void
+vmci_components_cleanup(void)
+{
+
+ vmci_doorbell_exit();
+ vmci_event_exit();
+ vmci_resource_exit();
+}
Index: sys/dev/vmware/vmci/vmci_event.h
===================================================================
--- /dev/null
+++ sys/dev/vmware/vmci/vmci_event.h
@@ -0,0 +1,21 @@
+/*-
+ * Copyright (c) 2018 VMware, Inc. All Rights Reserved.
+ *
+ * SPDX-License-Identifier: (BSD-2-Clause AND GPL-2.0)
+ */
+
+/* Event code for the vmci guest driver. */
+
+#ifndef _VMCI_EVENT_H_
+#define _VMCI_EVENT_H_
+
+#include "vmci_call_defs.h"
+#include "vmci_defs.h"
+
+int vmci_event_init(void);
+void vmci_event_exit(void);
+void vmci_event_sync(void);
+int vmci_event_dispatch(struct vmci_datagram *msg);
+bool vmci_event_check_host_capabilities(void);
+
+#endif /* !_VMCI_EVENT_H_ */
Index: sys/dev/vmware/vmci/vmci_event.c
===================================================================
--- /dev/null
+++ sys/dev/vmware/vmci/vmci_event.c
@@ -0,0 +1,693 @@
+/*-
+ * Copyright (c) 2018 VMware, Inc. All Rights Reserved.
+ *
+ * SPDX-License-Identifier: (BSD-2-Clause AND GPL-2.0)
+ */
+
+/* This file implements VMCI Event code. */
+
+#include "vmci.h"
+#include "vmci_driver.h"
+#include "vmci_event.h"
+#include "vmci_kernel_api.h"
+#include "vmci_kernel_defs.h"
+#include "vmci_kernel_if.h"
+
+#define LGPFX "vmci_event: "
+#define EVENT_MAGIC 0xEABE0000
+
+struct vmci_subscription {
+ vmci_id id;
+ int ref_count;
+ bool run_delayed;
+ vmci_event destroy_event;
+ vmci_event_type event;
+ vmci_event_cb callback;
+ void *callback_data;
+ vmci_list_item(vmci_subscription) subscriber_list_item;
+};
+
+static struct vmci_subscription *vmci_event_find(vmci_id sub_id);
+static int vmci_event_deliver(struct vmci_event_msg *event_msg);
+static int vmci_event_register_subscription(struct vmci_subscription *sub,
+ vmci_event_type event, uint32_t flags,
+ vmci_event_cb callback, void *callback_data);
+static struct vmci_subscription *vmci_event_unregister_subscription(
+ vmci_id sub_id);
+
+static vmci_list(vmci_subscription) subscriber_array[VMCI_EVENT_MAX];
+static vmci_lock subscriber_lock;
+
+struct vmci_delayed_event_info {
+ struct vmci_subscription *sub;
+ uint8_t event_payload[sizeof(struct vmci_event_data_max)];
+};
+
+struct vmci_event_ref {
+ struct vmci_subscription *sub;
+ vmci_list_item(vmci_event_ref) list_item;
+};
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * vmci_event_init --
+ *
+ * General init code.
+ *
+ * Results:
+ * VMCI_SUCCESS on success, appropriate error code otherwise.
+ *
+ * Side effects:
+ * None.
+ *
+ *------------------------------------------------------------------------------
+ */
+
+int
+vmci_event_init(void)
+{
+ int i;
+
+ for (i = 0; i < VMCI_EVENT_MAX; i++)
+ vmci_list_init(&subscriber_array[i]);
+
+ return (vmci_init_lock(&subscriber_lock, "VMCI Event subscriber lock"));
+}
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * vmci_event_exit --
+ *
+ * General exit code.
+ *
+ * Results:
+ * None.
+ *
+ * Side effects:
+ * None.
+ *
+ *------------------------------------------------------------------------------
+ */
+
+void
+vmci_event_exit(void)
+{
+ struct vmci_subscription *iter, *iter_2;
+ vmci_event_type e;
+
+ /* We free all memory at exit. */
+ for (e = 0; e < VMCI_EVENT_MAX; e++) {
+ vmci_list_scan_safe(iter, &subscriber_array[e],
+ subscriber_list_item, iter_2) {
+
+ /*
+ * We should never get here because all events should
+ * have been unregistered before we try to unload the
+ * driver module. Also, delayed callbacks could still
+ * be firing so this cleanup would not be safe. Still
+ * it is better to free the memory than not ... so we
+ * leave this code in just in case....
+ */
+ ASSERT(false);
+
+ vmci_free_kernel_mem(iter, sizeof(*iter));
+ }
+ }
+ vmci_cleanup_lock(&subscriber_lock);
+}
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * vmci_event_sync --
+ *
+ * Use this as a synchronization point when setting globals, for example,
+ * during device shutdown.
+ *
+ * Results:
+ * true.
+ *
+ * Side effects:
+ * None.
+ *
+ *------------------------------------------------------------------------------
+ */
+
+void
+vmci_event_sync(void)
+{
+
+ vmci_grab_lock_bh(&subscriber_lock);
+ vmci_release_lock_bh(&subscriber_lock);
+}
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * vmci_event_check_host_capabilities --
+ *
+ * Verify that the host supports the hypercalls we need. If it does not,
+ * try to find fallback hypercalls and use those instead.
+ *
+ * Results:
+ * true if required hypercalls (or fallback hypercalls) are
+ * supported by the host, false otherwise.
+ *
+ * Side effects:
+ * None.
+ *
+ *------------------------------------------------------------------------------
+ */
+
+bool
+vmci_event_check_host_capabilities(void)
+{
+
+ /* vmci_event does not require any hypercalls. */
+ return (true);
+}
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * vmci_event_get --
+ *
+ * Gets a reference to the given struct vmci_subscription.
+ *
+ * Results:
+ * None.
+ *
+ * Side effects:
+ * None.
+ *
+ *------------------------------------------------------------------------------
+ */
+
+static void
+vmci_event_get(struct vmci_subscription *entry)
+{
+
+ ASSERT(entry);
+
+ entry->ref_count++;
+}
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * vmci_event_release --
+ *
+ * Releases the given struct vmci_subscription.
+ *
+ * Results:
+ * None.
+ *
+ * Side effects:
+ * Fires the destroy event if the reference count has gone to zero.
+ *
+ *------------------------------------------------------------------------------
+ */
+
+static void
+vmci_event_release(struct vmci_subscription *entry)
+{
+
+ ASSERT(entry);
+ ASSERT(entry->ref_count > 0);
+
+ entry->ref_count--;
+ if (entry->ref_count == 0)
+ vmci_signal_event(&entry->destroy_event);
+}
+
+ /*
+ *------------------------------------------------------------------------------
+ *
+ * event_release_cb --
+ *
+ * Callback to release the event entry reference. It is called by the
+ * vmci_wait_on_event function before it blocks.
+ *
+ * Result:
+ * None.
+ *
+ * Side effects:
+ * None.
+ *
+ *------------------------------------------------------------------------------
+ */
+
+static int
+event_release_cb(void *client_data)
+{
+ struct vmci_subscription *sub = (struct vmci_subscription *)client_data;
+
+ ASSERT(sub);
+
+ vmci_grab_lock_bh(&subscriber_lock);
+ vmci_event_release(sub);
+ vmci_release_lock_bh(&subscriber_lock);
+
+ return (0);
+}
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * vmci_event_find --
+ *
+ * Find entry. Assumes lock is held.
+ *
+ * Results:
+ * Entry if found, NULL if not.
+ *
+ * Side effects:
+ * Increments the struct vmci_subscription refcount if an entry is found.
+ *
+ *------------------------------------------------------------------------------
+ */
+
+static struct vmci_subscription *
+vmci_event_find(vmci_id sub_id)
+{
+ struct vmci_subscription *iter;
+ vmci_event_type e;
+
+ for (e = 0; e < VMCI_EVENT_MAX; e++) {
+ vmci_list_scan(iter, &subscriber_array[e],
+ subscriber_list_item) {
+ if (iter->id == sub_id) {
+ vmci_event_get(iter);
+ return (iter);
+ }
+ }
+ }
+ return (NULL);
+}
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * vmci_event_delayed_dispatch_cb --
+ *
+ * Calls the specified callback in a delayed context.
+ *
+ * Results:
+ * None.
+ *
+ * Side effects:
+ * None.
+ *
+ *------------------------------------------------------------------------------
+ */
+
+static void
+vmci_event_delayed_dispatch_cb(void *data)
+{
+ struct vmci_delayed_event_info *event_info;
+ struct vmci_subscription *sub;
+ struct vmci_event_data *ed;
+
+ event_info = (struct vmci_delayed_event_info *)data;
+
+ ASSERT(event_info);
+ ASSERT(event_info->sub);
+
+ sub = event_info->sub;
+ ed = (struct vmci_event_data *)event_info->event_payload;
+
+ sub->callback(sub->id, ed, sub->callback_data);
+
+ vmci_grab_lock_bh(&subscriber_lock);
+ vmci_event_release(sub);
+ vmci_release_lock_bh(&subscriber_lock);
+
+ vmci_free_kernel_mem(event_info, sizeof(*event_info));
+}
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * vmci_event_deliver --
+ *
+ * Actually delivers the events to the subscribers.
+ *
+ * Results:
+ * None.
+ *
+ * Side effects:
+ * The callback function for each subscriber is invoked.
+ *
+ *------------------------------------------------------------------------------
+ */
+
+static int
+vmci_event_deliver(struct vmci_event_msg *event_msg)
+{
+ struct vmci_subscription *iter;
+ int err = VMCI_SUCCESS;
+
+ vmci_list(vmci_event_ref) no_delay_list;
+ vmci_list_init(&no_delay_list);
+
+ ASSERT(event_msg);
+
+ vmci_grab_lock_bh(&subscriber_lock);
+ vmci_list_scan(iter, &subscriber_array[event_msg->event_data.event],
+ subscriber_list_item) {
+ if (iter->run_delayed) {
+ struct vmci_delayed_event_info *event_info;
+ if ((event_info =
+ vmci_alloc_kernel_mem(sizeof(*event_info),
+ VMCI_MEMORY_ATOMIC)) == NULL) {
+ err = VMCI_ERROR_NO_MEM;
+ goto out;
+ }
+
+ vmci_event_get(iter);
+
+ memset(event_info, 0, sizeof(*event_info));
+ memcpy(event_info->event_payload,
+ VMCI_DG_PAYLOAD(event_msg),
+ (size_t)event_msg->hdr.payload_size);
+ event_info->sub = iter;
+ err =
+ vmci_schedule_delayed_work(
+ vmci_event_delayed_dispatch_cb, event_info);
+ if (err != VMCI_SUCCESS) {
+ vmci_event_release(iter);
+ vmci_free_kernel_mem(
+ event_info, sizeof(*event_info));
+ goto out;
+ }
+
+ } else {
+ struct vmci_event_ref *event_ref;
+
+ /*
+ * We construct a local list of subscribers and release
+ * subscriber_lock before invoking the callbacks. This
+ * is similar to delayed callbacks, but callbacks are
+ * invoked right away here.
+ */
+ if ((event_ref = vmci_alloc_kernel_mem(
+ sizeof(*event_ref), VMCI_MEMORY_ATOMIC)) == NULL) {
+ err = VMCI_ERROR_NO_MEM;
+ goto out;
+ }
+
+ vmci_event_get(iter);
+ event_ref->sub = iter;
+ vmci_list_insert(&no_delay_list, event_ref, list_item);
+ }
+ }
+
+out:
+ vmci_release_lock_bh(&subscriber_lock);
+
+ if (!vmci_list_empty(&no_delay_list)) {
+ struct vmci_event_data *ed;
+ struct vmci_event_ref *iter;
+ struct vmci_event_ref *iter_2;
+
+ vmci_list_scan_safe(iter, &no_delay_list, list_item, iter_2) {
+ struct vmci_subscription *cur;
+ uint8_t event_payload[sizeof(
+ struct vmci_event_data_max)];
+
+ cur = iter->sub;
+
+ /*
+ * We set event data before each callback to ensure
+ * isolation.
+ */
+ memset(event_payload, 0, sizeof(event_payload));
+ memcpy(event_payload, VMCI_DG_PAYLOAD(event_msg),
+ (size_t)event_msg->hdr.payload_size);
+ ed = (struct vmci_event_data *)event_payload;
+ cur->callback(cur->id, ed, cur->callback_data);
+
+ vmci_grab_lock_bh(&subscriber_lock);
+ vmci_event_release(cur);
+ vmci_release_lock_bh(&subscriber_lock);
+ vmci_free_kernel_mem(iter, sizeof(*iter));
+ }
+ }
+
+ return (err);
+}
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * vmci_event_dispatch --
+ *
+ * Dispatcher for the VMCI_EVENT_RECEIVE datagrams. Calls all
+ * subscribers for given event.
+ *
+ * Results:
+ * VMCI_SUCCESS on success, error code otherwise.
+ *
+ * Side effects:
+ * None.
+ *
+ *------------------------------------------------------------------------------
+ */
+
+int
+vmci_event_dispatch(struct vmci_datagram *msg)
+{
+ struct vmci_event_msg *event_msg = (struct vmci_event_msg *)msg;
+
+ ASSERT(msg &&
+ msg->src.context == VMCI_HYPERVISOR_CONTEXT_ID &&
+ msg->dst.resource == VMCI_EVENT_HANDLER);
+
+ if (msg->payload_size < sizeof(vmci_event_type) ||
+ msg->payload_size > sizeof(struct vmci_event_data_max))
+ return (VMCI_ERROR_INVALID_ARGS);
+
+ if (!VMCI_EVENT_VALID(event_msg->event_data.event))
+ return (VMCI_ERROR_EVENT_UNKNOWN);
+
+ vmci_event_deliver(event_msg);
+
+ return (VMCI_SUCCESS);
+}
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * vmci_event_register_subscription --
+ *
+ * Initialize and add subscription to subscriber list.
+ *
+ * Results:
+ * VMCI_SUCCESS on success, error code otherwise.
+ *
+ * Side effects:
+ * None.
+ *
+ *------------------------------------------------------------------------------
+ */
+
+static int
+vmci_event_register_subscription(struct vmci_subscription *sub,
+ vmci_event_type event, uint32_t flags, vmci_event_cb callback,
+ void *callback_data)
+{
+#define VMCI_EVENT_MAX_ATTEMPTS 10
+ static vmci_id subscription_id = 0;
+ int result;
+ uint32_t attempts = 0;
+ bool success;
+
+ ASSERT(sub);
+
+ if (!VMCI_EVENT_VALID(event) || callback == NULL) {
+ VMCI_LOG_DEBUG(LGPFX"Failed to subscribe to event"
+ " (type=%d) (callback=%p) (data=%p).\n",
+ event, callback, callback_data);
+ return (VMCI_ERROR_INVALID_ARGS);
+ }
+
+ if (!vmci_can_schedule_delayed_work()) {
+ /*
+ * If the platform doesn't support delayed work callbacks then
+ * don't allow registration for them.
+ */
+ if (flags & VMCI_FLAG_EVENT_DELAYED_CB)
+ return (VMCI_ERROR_INVALID_ARGS);
+ sub->run_delayed = false;
+ } else {
+ /*
+ * The platform supports delayed work callbacks. Honor the
+ * requested flags
+ */
+ sub->run_delayed = (flags & VMCI_FLAG_EVENT_DELAYED_CB) ?
+ true : false;
+ }
+
+ sub->ref_count = 1;
+ sub->event = event;
+ sub->callback = callback;
+ sub->callback_data = callback_data;
+
+ vmci_grab_lock_bh(&subscriber_lock);
+
+ for (success = false, attempts = 0;
+ success == false && attempts < VMCI_EVENT_MAX_ATTEMPTS;
+ attempts++) {
+ struct vmci_subscription *existing_sub = NULL;
+
+ /*
+ * We try to get an id a couple of time before claiming we are
+ * out of resources.
+ */
+ sub->id = ++subscription_id;
+
+ /* Test for duplicate id. */
+ existing_sub = vmci_event_find(sub->id);
+ if (existing_sub == NULL) {
+ /* We succeeded if we didn't find a duplicate. */
+ success = true;
+ } else
+ vmci_event_release(existing_sub);
+ }
+
+ if (success) {
+ vmci_create_event(&sub->destroy_event);
+ vmci_list_insert(&subscriber_array[event], sub,
+ subscriber_list_item);
+ result = VMCI_SUCCESS;
+ } else
+ result = VMCI_ERROR_NO_RESOURCES;
+
+ vmci_release_lock_bh(&subscriber_lock);
+ return (result);
+#undef VMCI_EVENT_MAX_ATTEMPTS
+}
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * vmci_event_unregister_subscription --
+ *
+ * Remove subscription from subscriber list.
+ *
+ * Results:
+ * struct vmci_subscription when found, NULL otherwise.
+ *
+ * Side effects:
+ * None.
+ *
+ *------------------------------------------------------------------------------
+ */
+
+static struct vmci_subscription *
+vmci_event_unregister_subscription(vmci_id sub_id)
+{
+ struct vmci_subscription *s;
+
+ vmci_grab_lock_bh(&subscriber_lock);
+ s = vmci_event_find(sub_id);
+ if (s != NULL) {
+ vmci_event_release(s);
+ vmci_list_remove(s, subscriber_list_item);
+ }
+ vmci_release_lock_bh(&subscriber_lock);
+
+ if (s != NULL) {
+ vmci_wait_on_event(&s->destroy_event, event_release_cb, s);
+ vmci_destroy_event(&s->destroy_event);
+ }
+
+ return (s);
+}
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * vmci_event_subscribe --
+ *
+ * Subscribe to given event. The callback specified can be fired in
+ * different contexts depending on what flag is specified while registering.
+ * If flags contains VMCI_FLAG_EVENT_NONE then the callback is fired with
+ * the subscriber lock held (and BH context on the guest). If flags contain
+ * VMCI_FLAG_EVENT_DELAYED_CB then the callback is fired with no locks held
+ * in thread context. This is useful because other vmci_event functions can
+ * be called, but it also increases the chances that an event will be
+ * dropped.
+ *
+ * Results:
+ * VMCI_SUCCESS on success, error code otherwise.
+ *
+ * Side effects:
+ * None.
+ *
+ *------------------------------------------------------------------------------
+ */
+
+int
+vmci_event_subscribe(vmci_event_type event, vmci_event_cb callback,
+ void *callback_data, vmci_id *subscription_id)
+{
+ int retval;
+ uint32_t flags = VMCI_FLAG_EVENT_NONE;
+ struct vmci_subscription *s = NULL;
+
+ if (subscription_id == NULL) {
+ VMCI_LOG_DEBUG(LGPFX"Invalid subscription (NULL).\n");
+ return (VMCI_ERROR_INVALID_ARGS);
+ }
+
+ s = vmci_alloc_kernel_mem(sizeof(*s), VMCI_MEMORY_NORMAL);
+ if (s == NULL)
+ return (VMCI_ERROR_NO_MEM);
+
+ retval = vmci_event_register_subscription(s, event, flags,
+ callback, callback_data);
+ if (retval < VMCI_SUCCESS) {
+ vmci_free_kernel_mem(s, sizeof(*s));
+ return (retval);
+ }
+
+ *subscription_id = s->id;
+ return (retval);
+}
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * vmci_event_unsubscribe --
+ *
+ * Unsubscribe to given event. Removes it from list and frees it.
+ * Will return callback_data if requested by caller.
+ *
+ * Results:
+ * VMCI_SUCCESS on success, error code otherwise.
+ *
+ * Side effects:
+ * None.
+ *
+ *------------------------------------------------------------------------------
+ */
+
+int
+vmci_event_unsubscribe(vmci_id sub_id)
+{
+ struct vmci_subscription *s;
+
+ /*
+ * Return subscription. At this point we know noone else is accessing
+ * the subscription so we can free it.
+ */
+ s = vmci_event_unregister_subscription(sub_id);
+ if (s == NULL)
+ return (VMCI_ERROR_NOT_FOUND);
+ vmci_free_kernel_mem(s, sizeof(*s));
+
+ return (VMCI_SUCCESS);
+}
Index: sys/dev/vmware/vmci/vmci_hashtable.h
===================================================================
--- /dev/null
+++ sys/dev/vmware/vmci/vmci_hashtable.h
@@ -0,0 +1,46 @@
+/*-
+ * Copyright (c) 2018 VMware, Inc. All Rights Reserved.
+ *
+ * SPDX-License-Identifier: (BSD-2-Clause AND GPL-2.0)
+ */
+
+/* Hash table for use in the APIs. */
+
+#ifndef _VMCI_HASHTABLE_H_
+#define _VMCI_HASHTABLE_H_
+
+#include "vmci_defs.h"
+#include "vmci_kernel_if.h"
+
+struct vmci_hash_entry {
+ struct vmci_handle handle;
+ int ref_count;
+ struct vmci_hash_entry *next;
+};
+
+struct vmci_hashtable {
+ struct vmci_hash_entry **entries;
+ /* Number of buckets in above array. */
+ int size;
+ vmci_lock lock;
+};
+
+struct vmci_hashtable *vmci_hashtable_create(int size);
+void vmci_hashtable_destroy(struct vmci_hashtable *table);
+void vmci_hashtable_init_entry(struct vmci_hash_entry *entry,
+ struct vmci_handle handle);
+int vmci_hashtable_add_entry(struct vmci_hashtable *table,
+ struct vmci_hash_entry *entry);
+int vmci_hashtable_remove_entry(struct vmci_hashtable *table,
+ struct vmci_hash_entry *entry);
+struct vmci_hash_entry *vmci_hashtable_get_entry(struct vmci_hashtable *table,
+ struct vmci_handle handle);
+void vmci_hashtable_hold_entry(struct vmci_hashtable *table,
+ struct vmci_hash_entry *entry);
+int vmci_hashtable_release_entry(struct vmci_hashtable *table,
+ struct vmci_hash_entry *entry);
+bool vmci_hashtable_entry_exists(struct vmci_hashtable *table,
+ struct vmci_handle handle);
+void vmci_hashtable_sync(struct vmci_hashtable *table);
+
+#endif /* !_VMCI_HASHTABLE_H_ */
Index: sys/dev/vmware/vmci/vmci_hashtable.c
===================================================================
--- /dev/null
+++ sys/dev/vmware/vmci/vmci_hashtable.c
@@ -0,0 +1,565 @@
+/*-
+ * Copyright (c) 2018 VMware, Inc. All Rights Reserved.
+ *
+ * SPDX-License-Identifier: (BSD-2-Clause AND GPL-2.0)
+ */
+
+/* Implementation of the VMCI Hashtable. */
+
+#include "vmci.h"
+#include "vmci_driver.h"
+#include "vmci_hashtable.h"
+#include "vmci_kernel_defs.h"
+#include "vmci_utils.h"
+
+#define LGPFX "vmci_hashtable: "
+
+#define VMCI_HASHTABLE_HASH(_h, _sz) \
+ vmci_hash_id(VMCI_HANDLE_TO_RESOURCE_ID(_h), (_sz))
+
+static int hashtable_unlink_entry(struct vmci_hashtable *table,
+ struct vmci_hash_entry *entry);
+static bool vmci_hashtable_entry_exists_locked(struct vmci_hashtable *table,
+ struct vmci_handle handle);
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * vmci_hashtable_create --
+ *
+ * Creates a hashtable.
+ *
+ * Result:
+ * None.
+ *
+ * Side effects:
+ * None.
+ *
+ *------------------------------------------------------------------------------
+ */
+
+struct vmci_hashtable *
+vmci_hashtable_create(int size)
+{
+ struct vmci_hashtable *table;
+
+ table = vmci_alloc_kernel_mem(sizeof(*table),
+ VMCI_MEMORY_NORMAL);
+ if (table == NULL)
+ return (NULL);
+ memset(table, 0, sizeof(*table));
+
+ table->entries = vmci_alloc_kernel_mem(sizeof(*table->entries) * size,
+ VMCI_MEMORY_NORMAL);
+ if (table->entries == NULL) {
+ vmci_free_kernel_mem(table, sizeof(*table));
+ return (NULL);
+ }
+ memset(table->entries, 0, sizeof(*table->entries) * size);
+ table->size = size;
+ if (vmci_init_lock(&table->lock, "VMCI Hashtable lock") <
+ VMCI_SUCCESS) {
+ vmci_free_kernel_mem(table->entries, sizeof(*table->entries) * size);
+ vmci_free_kernel_mem(table, sizeof(*table));
+ return (NULL);
+ }
+
+ return (table);
+}
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * vmci_hashtable_destroy --
+ *
+ * This function should be called at module exit time. We rely on the
+ * module ref count to insure that no one is accessing any hash table
+ * entries at this point in time. Hence we should be able to just remove
+ * all entries from the hash table.
+ *
+ * Result:
+ * None.
+ *
+ * Side effects:
+ * None.
+ *
+ *------------------------------------------------------------------------------
+ */
+
+void
+vmci_hashtable_destroy(struct vmci_hashtable *table)
+{
+
+ ASSERT(table);
+
+ vmci_grab_lock_bh(&table->lock);
+ vmci_free_kernel_mem(table->entries, sizeof(*table->entries) *
+ table->size);
+ table->entries = NULL;
+ vmci_release_lock_bh(&table->lock);
+ vmci_cleanup_lock(&table->lock);
+ vmci_free_kernel_mem(table, sizeof(*table));
+}
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * vmci_hashtable_init_entry --
+ *
+ * Initializes a hash entry.
+ *
+ * Result:
+ * None.
+ *
+ * Side effects:
+ * None.
+ *
+ *------------------------------------------------------------------------------
+ */
+void
+vmci_hashtable_init_entry(struct vmci_hash_entry *entry,
+ struct vmci_handle handle)
+{
+
+ ASSERT(entry);
+ entry->handle = handle;
+ entry->ref_count = 0;
+}
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * vmci_hashtable_add_entry --
+ *
+ * Adds an entry to the hashtable.
+ *
+ * Result:
+ * None.
+ *
+ * Side effects:
+ * None.
+ *
+ *------------------------------------------------------------------------------
+ */
+
+int
+vmci_hashtable_add_entry(struct vmci_hashtable *table,
+ struct vmci_hash_entry *entry)
+{
+ int idx;
+
+ ASSERT(entry);
+ ASSERT(table);
+
+ vmci_grab_lock_bh(&table->lock);
+
+ if (vmci_hashtable_entry_exists_locked(table, entry->handle)) {
+ VMCI_LOG_DEBUG(LGPFX"Entry (handle=0x%x:0x%x) already "
+ "exists.\n", entry->handle.context,
+ entry->handle.resource);
+ vmci_release_lock_bh(&table->lock);
+ return (VMCI_ERROR_DUPLICATE_ENTRY);
+ }
+
+ idx = VMCI_HASHTABLE_HASH(entry->handle, table->size);
+ ASSERT(idx < table->size);
+
+ /* New entry is added to top/front of hash bucket. */
+ entry->ref_count++;
+ entry->next = table->entries[idx];
+ table->entries[idx] = entry;
+ vmci_release_lock_bh(&table->lock);
+
+ return (VMCI_SUCCESS);
+}
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * vmci_hashtable_remove_entry --
+ *
+ * Removes an entry from the hashtable.
+ *
+ * Result:
+ * None.
+ *
+ * Side effects:
+ * None.
+ *
+ *------------------------------------------------------------------------------
+ */
+
+int
+vmci_hashtable_remove_entry(struct vmci_hashtable *table,
+ struct vmci_hash_entry *entry)
+{
+ int result;
+
+ ASSERT(table);
+ ASSERT(entry);
+
+ vmci_grab_lock_bh(&table->lock);
+
+ /* First unlink the entry. */
+ result = hashtable_unlink_entry(table, entry);
+ if (result != VMCI_SUCCESS) {
+ /* We failed to find the entry. */
+ goto done;
+ }
+
+ /* Decrement refcount and check if this is last reference. */
+ entry->ref_count--;
+ if (entry->ref_count == 0) {
+ result = VMCI_SUCCESS_ENTRY_DEAD;
+ goto done;
+ }
+
+done:
+ vmci_release_lock_bh(&table->lock);
+
+ return (result);
+}
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * vmci_hashtable_get_entry_locked --
+ *
+ * Looks up an entry in the hash table, that is already locked.
+ *
+ * Result:
+ * If the element is found, a pointer to the element is returned.
+ * Otherwise NULL is returned.
+ *
+ * Side effects:
+ * The reference count of the returned element is increased.
+ *
+ *------------------------------------------------------------------------------
+ */
+
+static struct vmci_hash_entry *
+vmci_hashtable_get_entry_locked(struct vmci_hashtable *table,
+ struct vmci_handle handle)
+{
+ struct vmci_hash_entry *cur = NULL;
+ int idx;
+
+ ASSERT(!VMCI_HANDLE_EQUAL(handle, VMCI_INVALID_HANDLE));
+ ASSERT(table);
+
+ idx = VMCI_HASHTABLE_HASH(handle, table->size);
+
+ cur = table->entries[idx];
+ while (true) {
+ if (cur == NULL)
+ break;
+
+ if (VMCI_HANDLE_TO_RESOURCE_ID(cur->handle) ==
+ VMCI_HANDLE_TO_RESOURCE_ID(handle)) {
+ if ((VMCI_HANDLE_TO_CONTEXT_ID(cur->handle) ==
+ VMCI_HANDLE_TO_CONTEXT_ID(handle)) ||
+ (VMCI_INVALID_ID == VMCI_HANDLE_TO_CONTEXT_ID(cur->handle))) {
+ cur->ref_count++;
+ break;
+ }
+ }
+ cur = cur->next;
+ }
+
+ return (cur);
+}
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * vmci_hashtable_get_entry --
+ *
+ * Gets an entry from the hashtable.
+ *
+ * Result:
+ * None.
+ *
+ * Side effects:
+ * None.
+ *
+ *------------------------------------------------------------------------------
+ */
+
+struct vmci_hash_entry *
+vmci_hashtable_get_entry(struct vmci_hashtable *table,
+ struct vmci_handle handle)
+{
+ struct vmci_hash_entry *entry;
+
+ if (VMCI_HANDLE_EQUAL(handle, VMCI_INVALID_HANDLE))
+ return (NULL);
+
+ ASSERT(table);
+
+ vmci_grab_lock_bh(&table->lock);
+ entry = vmci_hashtable_get_entry_locked(table, handle);
+ vmci_release_lock_bh(&table->lock);
+
+ return (entry);
+}
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * vmci_hashtable_hold_entry --
+ *
+ * Hold the given entry. This will increment the entry's reference count.
+ * This is like a GetEntry() but without having to lookup the entry by
+ * handle.
+ *
+ * Result:
+ * None.
+ *
+ * Side effects:
+ * None.
+ *
+ *------------------------------------------------------------------------------
+ */
+
+void
+vmci_hashtable_hold_entry(struct vmci_hashtable *table,
+ struct vmci_hash_entry *entry)
+{
+
+ ASSERT(table);
+ ASSERT(entry);
+
+ vmci_grab_lock_bh(&table->lock);
+ entry->ref_count++;
+ vmci_release_lock_bh(&table->lock);
+}
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * vmci_hashtable_release_entry_locked --
+ *
+ * Releases an element previously obtained with
+ * vmci_hashtable_get_entry_locked.
+ *
+ * Result:
+ * If the entry is removed from the hash table, VMCI_SUCCESS_ENTRY_DEAD
+ * is returned. Otherwise, VMCI_SUCCESS is returned.
+ *
+ * Side effects:
+ * The reference count of the entry is decreased and the entry is removed
+ * from the hash table on 0.
+ *
+ *------------------------------------------------------------------------------
+ */
+
+static int
+vmci_hashtable_release_entry_locked(struct vmci_hashtable *table,
+ struct vmci_hash_entry *entry)
+{
+ int result = VMCI_SUCCESS;
+
+ ASSERT(table);
+ ASSERT(entry);
+
+ entry->ref_count--;
+ /* Check if this is last reference and report if so. */
+ if (entry->ref_count == 0) {
+
+ /*
+ * Remove entry from hash table if not already removed. This
+ * could have happened already because VMCIHashTable_RemoveEntry
+ * was called to unlink it. We ignore if it is not found.
+ * Datagram handles will often have RemoveEntry called, whereas
+ * SharedMemory regions rely on ReleaseEntry to unlink the entry
+ * , since the creator does not call RemoveEntry when it
+ * detaches.
+ */
+
+ hashtable_unlink_entry(table, entry);
+ result = VMCI_SUCCESS_ENTRY_DEAD;
+ }
+
+ return (result);
+}
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * vmci_hashtable_release_entry --
+ *
+ * Releases an entry from the hashtable.
+ *
+ * Result:
+ * None.
+ *
+ * Side effects:
+ * None.
+ *
+ *------------------------------------------------------------------------------
+ */
+
+int
+vmci_hashtable_release_entry(struct vmci_hashtable *table,
+ struct vmci_hash_entry *entry)
+{
+ int result;
+
+ ASSERT(table);
+ vmci_grab_lock_bh(&table->lock);
+ result = vmci_hashtable_release_entry_locked(table, entry);
+ vmci_release_lock_bh(&table->lock);
+
+ return (result);
+}
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * vmci_hashtable_entry_exists --
+ *
+ * Returns whether an entry exists in the hashtable
+ *
+ * Result:
+ * true if handle already in hashtable. false otherwise.
+ *
+ * Side effects:
+ * None.
+ *
+ *------------------------------------------------------------------------------
+ */
+
+bool
+vmci_hashtable_entry_exists(struct vmci_hashtable *table,
+ struct vmci_handle handle)
+{
+ bool exists;
+
+ ASSERT(table);
+
+ vmci_grab_lock_bh(&table->lock);
+ exists = vmci_hashtable_entry_exists_locked(table, handle);
+ vmci_release_lock_bh(&table->lock);
+
+ return (exists);
+}
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * vmci_hashtable_entry_exists_locked --
+ *
+ * Unlocked version of vmci_hashtable_entry_exists.
+ *
+ * Result:
+ * true if handle already in hashtable. false otherwise.
+ *
+ * Side effects:
+ * None.
+ *
+ *------------------------------------------------------------------------------
+ */
+
+static bool
+vmci_hashtable_entry_exists_locked(struct vmci_hashtable *table,
+ struct vmci_handle handle)
+
+{
+ struct vmci_hash_entry *entry;
+ int idx;
+
+ ASSERT(table);
+
+ idx = VMCI_HASHTABLE_HASH(handle, table->size);
+
+ entry = table->entries[idx];
+ while (entry) {
+ if (VMCI_HANDLE_TO_RESOURCE_ID(entry->handle) ==
+ VMCI_HANDLE_TO_RESOURCE_ID(handle))
+ if ((VMCI_HANDLE_TO_CONTEXT_ID(entry->handle) ==
+ VMCI_HANDLE_TO_CONTEXT_ID(handle)) ||
+ (VMCI_INVALID_ID == VMCI_HANDLE_TO_CONTEXT_ID(handle)) ||
+ (VMCI_INVALID_ID == VMCI_HANDLE_TO_CONTEXT_ID(entry->handle)))
+ return (true);
+ entry = entry->next;
+ }
+
+ return (false);
+}
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * hashtable_unlink_entry --
+ *
+ * Assumes caller holds table lock.
+ *
+ * Result:
+ * None.
+ *
+ * Side effects:
+ * None.
+ *
+ *------------------------------------------------------------------------------
+ */
+
+static int
+hashtable_unlink_entry(struct vmci_hashtable *table,
+ struct vmci_hash_entry *entry)
+{
+ int result;
+ struct vmci_hash_entry *prev, *cur;
+ int idx;
+
+ idx = VMCI_HASHTABLE_HASH(entry->handle, table->size);
+
+ prev = NULL;
+ cur = table->entries[idx];
+ while (true) {
+ if (cur == NULL) {
+ result = VMCI_ERROR_NOT_FOUND;
+ break;
+ }
+ if (VMCI_HANDLE_EQUAL(cur->handle, entry->handle)) {
+ ASSERT(cur == entry);
+
+ /* Remove entry and break. */
+ if (prev)
+ prev->next = cur->next;
+ else
+ table->entries[idx] = cur->next;
+ cur->next = NULL;
+ result = VMCI_SUCCESS;
+ break;
+ }
+ prev = cur;
+ cur = cur->next;
+ }
+ return (result);
+}
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * vmci_hashtable_sync --
+ *
+ * Use this as a synchronization point when setting globals, for example,
+ * during device shutdown.
+ *
+ * Results:
+ * None.
+ *
+ * Side effects:
+ * None.
+ *
+ *------------------------------------------------------------------------------
+ */
+
+void
+vmci_hashtable_sync(struct vmci_hashtable *table)
+{
+
+ ASSERT(table);
+ vmci_grab_lock_bh(&table->lock);
+ vmci_release_lock_bh(&table->lock);
+}
Index: sys/dev/vmware/vmci/vmci_kernel_api.h
===================================================================
--- /dev/null
+++ sys/dev/vmware/vmci/vmci_kernel_api.h
@@ -0,0 +1,16 @@
+/*-
+ * Copyright (c) 2018 VMware, Inc. All Rights Reserved.
+ *
+ * SPDX-License-Identifier: (BSD-2-Clause AND GPL-2.0)
+ */
+
+/* Kernel API (current) exported from the VMCI guest driver. */
+
+#ifndef _VMCI_KERNEL_API_H_
+#define _VMCI_KERNEL_API_H_
+
+/* With this file you always get the latest version. */
+#include "vmci_kernel_api_1.h"
+#include "vmci_kernel_api_2.h"
+
+#endif /* !_VMCI_KERNEL_API_H_ */
Index: sys/dev/vmware/vmci/vmci_kernel_api_1.h
===================================================================
--- /dev/null
+++ sys/dev/vmware/vmci/vmci_kernel_api_1.h
@@ -0,0 +1,69 @@
+/*-
+ * Copyright (c) 2018 VMware, Inc. All Rights Reserved.
+ *
+ * SPDX-License-Identifier: (BSD-2-Clause AND GPL-2.0)
+ */
+
+/* Kernel API (v1) exported from the VMCI guest driver. */
+
+#ifndef _VMCI_KERNEL_API_1_H_
+#define _VMCI_KERNEL_API_1_H_
+
+#include "vmci_call_defs.h"
+#include "vmci_defs.h"
+
+/* Define version 1. */
+#undef VMCI_KERNEL_API_VERSION
+#define VMCI_KERNEL_API_VERSION_1 1
+#define VMCI_KERNEL_API_VERSION VMCI_KERNEL_API_VERSION_1
+
+/* VMCI Datagram API. */
+int vmci_datagram_create_handle(uint32_t resource_id, uint32_t flags,
+ vmci_datagram_recv_cb recv_cb, void *client_data,
+ struct vmci_handle *out_handle);
+int vmci_datagram_create_handle_priv(uint32_t resource_id, uint32_t flags,
+ vmci_privilege_flags priv_flags, vmci_datagram_recv_cb recv_cb,
+ void *client_data, struct vmci_handle *out_handle);
+int vmci_datagram_destroy_handle(struct vmci_handle handle);
+int vmci_datagram_send(struct vmci_datagram *msg);
+
+/* VMCI Utility API. */
+vmci_id vmci_get_context_id(void);
+
+/* VMCI Event API. */
+typedef void (*vmci_event_cb)(vmci_id sub_id, struct vmci_event_data *ed,
+ void *client_data);
+
+int vmci_event_subscribe(vmci_event_type event, vmci_event_cb callback,
+ void *callback_data, vmci_id *sub_id);
+int vmci_event_unsubscribe(vmci_id sub_id);
+
+/* VMCI Queue Pair API. */
+struct vmci_qpair;
+
+int vmci_qpair_alloc(struct vmci_qpair **qpair, struct vmci_handle *handle,
+ uint64_t produce_q_size, uint64_t consume_q_size, vmci_id peer,
+ uint32_t flags, vmci_privilege_flags priv_flags);
+int vmci_qpair_detach(struct vmci_qpair **qpair);
+int vmci_qpair_get_produce_indexes(const struct vmci_qpair *qpair,
+ uint64_t *producer_tail, uint64_t *consumer_head);
+int vmci_qpair_get_consume_indexes(const struct vmci_qpair *qpair,
+ uint64_t *consumer_tail, uint64_t *producer_head);
+int64_t vmci_qpair_produce_free_space(const struct vmci_qpair *qpair);
+int64_t vmci_qpair_produce_buf_ready(const struct vmci_qpair *qpair);
+int64_t vmci_qpair_consume_free_space(const struct vmci_qpair *qpair);
+int64_t vmci_qpair_consume_buf_ready(const struct vmci_qpair *qpair);
+ssize_t vmci_qpair_enqueue(struct vmci_qpair *qpair, const void *buf,
+ size_t buf_size, int mode);
+ssize_t vmci_qpair_dequeue(struct vmci_qpair *qpair, void *buf,
+ size_t buf_size, int mode);
+ssize_t vmci_qpair_peek(struct vmci_qpair *qpair, void *buf,
+ size_t buf_size, int mode);
+ssize_t vmci_qpair_enquev(struct vmci_qpair *qpair, void *iov, size_t iov_size,
+ int mode);
+ssize_t vmci_qpair_dequev(struct vmci_qpair *qpair, void *iov, size_t iov_size,
+ int mode);
+ssize_t vmci_qpair_peekv(struct vmci_qpair *qpair, void *iov, size_t iov_size,
+ int mode);
+
+#endif /* !_VMCI_KERNEL_API_1_H_ */
Index: sys/dev/vmware/vmci/vmci_kernel_api_2.h
===================================================================
--- /dev/null
+++ sys/dev/vmware/vmci/vmci_kernel_api_2.h
@@ -0,0 +1,32 @@
+/*-
+ * Copyright (c) 2018 VMware, Inc. All Rights Reserved.
+ *
+ * SPDX-License-Identifier: (BSD-2-Clause AND GPL-2.0)
+ */
+
+/* Kernel API (v2) exported from the VMCI guest driver. */
+
+#ifndef _VMCI_KERNEL_API_2_H_
+#define _VMCI_KERNEL_API_2_H_
+
+#include "vmci_kernel_api_1.h"
+
+/* Define version 2. */
+
+#undef VMCI_KERNEL_API_VERSION
+#define VMCI_KERNEL_API_VERSION_2 2
+#define VMCI_KERNEL_API_VERSION VMCI_KERNEL_API_VERSION_2
+
+/* VMCI Doorbell API. */
+#define VMCI_FLAG_DELAYED_CB 0x01
+
+typedef void (*vmci_callback)(void *client_data);
+
+int vmci_doorbell_create(struct vmci_handle *handle, uint32_t flags,
+ vmci_privilege_flags priv_flags, vmci_callback notify_cb,
+ void *client_data);
+int vmci_doorbell_destroy(struct vmci_handle handle);
+int vmci_doorbell_notify(struct vmci_handle handle,
+ vmci_privilege_flags priv_flags);
+
+#endif /* !_VMCI_KERNEL_API_2_H_ */
Index: sys/dev/vmware/vmci/vmci_kernel_defs.h
===================================================================
--- /dev/null
+++ sys/dev/vmware/vmci/vmci_kernel_defs.h
@@ -0,0 +1,30 @@
+/*-
+ * Copyright (c) 2018 VMware, Inc. All Rights Reserved.
+ *
+ * SPDX-License-Identifier: (BSD-2-Clause AND GPL-2.0)
+ */
+
+/* Some common utilities used by the VMCI kernel module. */
+
+#ifndef _VMCI_KERNEL_DEFS_H_
+#define _VMCI_KERNEL_DEFS_H_
+
+#include <sys/param.h>
+#include <sys/systm.h>
+
+typedef uint32_t PPN;
+
+#define ASSERT(cond) KASSERT(cond, (""))
+#define ASSERT_ON_COMPILE(e) _Static_assert(e, #e);
+
+#define LIKELY(_exp) __builtin_expect(!!(_exp), 1)
+#define UNLIKELY(_exp) __builtin_expect((_exp), 0)
+
+#define CONST64U(c) c##uL
+
+#define ARRAYSIZE(a) (sizeof(a) / sizeof(*(a)))
+
+#define ROUNDUP(x, y) (((x) + (y) - 1) / (y) * (y))
+#define CEILING(x, y) (((x) + (y) - 1) / (y))
+
+#endif /* !_VMCI_KERNEL_DEFS_H_ */
Index: sys/dev/vmware/vmci/vmci_kernel_if.h
===================================================================
--- /dev/null
+++ sys/dev/vmware/vmci/vmci_kernel_if.h
@@ -0,0 +1,92 @@
+/*-
+ * Copyright (c) 2018 VMware, Inc. All Rights Reserved.
+ *
+ * SPDX-License-Identifier: (BSD-2-Clause AND GPL-2.0)
+ */
+
+/* This file defines helper functions */
+
+#ifndef _VMCI_KERNEL_IF_H_
+#define _VMCI_KERNEL_IF_H_
+
+#include <sys/param.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
+#include <sys/queue.h>
+#include <sys/sema.h>
+
+#include "vmci_defs.h"
+
+#define VMCI_MEMORY_NORMAL 0x0
+#define VMCI_MEMORY_ATOMIC 0x1
+
+#define vmci_list(_l) LIST_HEAD(, _l)
+#define vmci_list_item(_l) LIST_ENTRY(_l)
+#define vmci_list_init(_l) LIST_INIT(_l)
+#define vmci_list_empty(_l) LIST_EMPTY(_l)
+#define vmci_list_first(_l) LIST_FIRST(_l)
+#define vmci_list_next(e, f) LIST_NEXT(e, f)
+#define vmci_list_insert(_l, _e, n) LIST_INSERT_HEAD(_l, _e, n)
+#define vmci_list_remove(_e, n) LIST_REMOVE(_e, n)
+#define vmci_list_scan(v, _l, n) LIST_FOREACH(v, _l, n)
+#define vmci_list_scan_safe(_e, _l, n, t) \
+ LIST_FOREACH_SAFE(_e, _l, n, t)
+#define vmci_list_swap(_l1, _l2, t, f) LIST_SWAP(_l1, _l2, t, f)
+
+typedef unsigned short int vmci_io_port;
+typedef int vmci_io_handle;
+
+void vmci_read_port_bytes(vmci_io_handle handle, vmci_io_port port,
+ uint8_t *buffer, size_t buffer_length);
+
+typedef struct mtx vmci_lock;
+int vmci_init_lock(vmci_lock *lock, char *name);
+void vmci_cleanup_lock(vmci_lock *lock);
+void vmci_grab_lock(vmci_lock *lock);
+void vmci_release_lock(vmci_lock *lock);
+void vmci_grab_lock_bh(vmci_lock *lock);
+void vmci_release_lock_bh(vmci_lock *lock);
+
+void *vmci_alloc_kernel_mem(size_t size, int flags);
+void vmci_free_kernel_mem(void *ptr, size_t size);
+
+typedef struct sema vmci_event;
+typedef int (*vmci_event_release_cb)(void *client_data);
+void vmci_create_event(vmci_event *event);
+void vmci_destroy_event(vmci_event *event);
+void vmci_signal_event(vmci_event *event);
+void vmci_wait_on_event(vmci_event *event, vmci_event_release_cb release_cb,
+ void *client_data);
+bool vmci_wait_on_event_interruptible(vmci_event *event,
+ vmci_event_release_cb release_cb, void *client_data);
+
+typedef void (vmci_work_fn)(void *data);
+bool vmci_can_schedule_delayed_work(void);
+int vmci_schedule_delayed_work(vmci_work_fn *work_fn, void *data);
+void vmci_delayed_work_cb(void *context, int data);
+
+typedef struct mtx vmci_mutex;
+int vmci_mutex_init(vmci_mutex *mutex, char *name);
+void vmci_mutex_destroy(vmci_mutex *mutex);
+void vmci_mutex_acquire(vmci_mutex *mutex);
+void vmci_mutex_release(vmci_mutex *mutex);
+
+void *vmci_alloc_queue(uint64_t size, uint32_t flags);
+void vmci_free_queue(void *q, uint64_t size);
+
+typedef PPN *vmci_ppn_list;
+struct ppn_set {
+ uint64_t num_produce_pages;
+ uint64_t num_consume_pages;
+ vmci_ppn_list produce_ppns;
+ vmci_ppn_list consume_ppns;
+ bool initialized;
+};
+
+int vmci_alloc_ppn_set(void *produce_q, uint64_t num_produce_pages,
+ void *consume_q, uint64_t num_consume_pages,
+ struct ppn_set *ppn_set);
+void vmci_free_ppn_set(struct ppn_set *ppn_set);
+int vmci_populate_ppn_list(uint8_t *call_buf, const struct ppn_set *ppnset);
+
+#endif /* !_VMCI_KERNEL_IF_H_ */
Index: sys/dev/vmware/vmci/vmci_kernel_if.c
===================================================================
--- /dev/null
+++ sys/dev/vmware/vmci/vmci_kernel_if.c
@@ -0,0 +1,1066 @@
+/*-
+ * Copyright (c) 2018 VMware, Inc. All Rights Reserved.
+ *
+ * SPDX-License-Identifier: (BSD-2-Clause AND GPL-2.0)
+ */
+
+/* This file implements defines and helper functions. */
+
+#include <sys/malloc.h>
+#include <sys/proc.h>
+#include <sys/uio.h>
+
+#include <machine/bus.h>
+
+#include "vmci.h"
+#include "vmci_defs.h"
+#include "vmci_kernel_defs.h"
+#include "vmci_kernel_if.h"
+#include "vmci_queue.h"
+
+struct vmci_queue_kernel_if {
+ size_t num_pages; /* Num pages incl. header. */
+ struct vmci_dma_alloc *dmas; /* For dma alloc. */
+};
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * vmci_init_lock
+ *
+ * Initializes the lock. Must be called before use.
+ *
+ * Results:
+ * Always VMCI_SUCCESS.
+ *
+ * Side effects:
+ * Thread can block.
+ *
+ *------------------------------------------------------------------------------
+ */
+
+int
+vmci_init_lock(vmci_lock *lock, char *name)
+{
+
+ mtx_init(lock, name, NULL, MTX_DEF | MTX_NOWITNESS);
+ return (VMCI_SUCCESS);
+}
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * vmci_cleanup_lock
+ *
+ * Cleanup the lock. Must be called before deallocating lock.
+ *
+ * Results:
+ * None
+ *
+ * Side effects:
+ * Deletes kernel lock state
+ *
+ *------------------------------------------------------------------------------
+ */
+
+void
+vmci_cleanup_lock(vmci_lock *lock)
+{
+
+ mtx_destroy(lock);
+}
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * vmci_grab_lock
+ *
+ * Grabs the given lock.
+ *
+ * Results:
+ * None
+ *
+ * Side effects:
+ * Thread can block.
+ *
+ *------------------------------------------------------------------------------
+ */
+
+void
+vmci_grab_lock(vmci_lock *lock)
+{
+
+ mtx_lock(lock);
+}
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * vmci_release_lock
+ *
+ * Releases the given lock.
+ *
+ * Results:
+ * None
+ *
+ * Side effects:
+ * A thread blocked on this lock may wake up.
+ *
+ *------------------------------------------------------------------------------
+ */
+
+void
+vmci_release_lock(vmci_lock *lock)
+{
+
+ mtx_unlock(lock);
+}
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * vmci_grab_lock_bh
+ *
+ * Grabs the given lock.
+ *
+ * Results:
+ * None
+ *
+ * Side effects:
+ * None.
+ *
+ *------------------------------------------------------------------------------
+ */
+
+void
+vmci_grab_lock_bh(vmci_lock *lock)
+{
+
+ mtx_lock(lock);
+}
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * vmci_release_lock_bh
+ *
+ * Releases the given lock.
+ *
+ * Results:
+ * None
+ *
+ * Side effects:
+ * None.
+ *
+ *------------------------------------------------------------------------------
+ */
+
+void
+vmci_release_lock_bh(vmci_lock *lock)
+{
+
+ mtx_unlock(lock);
+}
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * vmci_alloc_kernel_mem
+ *
+ * Allocate physically contiguous memory for the VMCI driver.
+ *
+ * Results:
+ * The address allocated or NULL on error.
+ *
+ *
+ * Side effects:
+ * Memory may be allocated.
+ *
+ *------------------------------------------------------------------------------
+ */
+
+void *
+vmci_alloc_kernel_mem(size_t size, int flags)
+{
+ void *ptr;
+
+ if ((flags & VMCI_MEMORY_ATOMIC) != 0)
+ ptr = contigmalloc(size, M_DEVBUF, M_NOWAIT, 0, 0xFFFFFFFF,
+ 8, 1024 * 1024);
+ else
+ ptr = contigmalloc(size, M_DEVBUF, M_WAITOK, 0, 0xFFFFFFFF,
+ 8, 1024 * 1024);
+
+ return (ptr);
+}
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * vmci_free_kernel_mem
+ *
+ * Free kernel memory allocated for the VMCI driver.
+ *
+ * Results:
+ * None.
+ *
+ * Side effects:
+ * Memory is freed.
+ *
+ *------------------------------------------------------------------------------
+ */
+
+void
+vmci_free_kernel_mem(void *ptr, size_t size)
+{
+
+ contigfree(ptr, size, M_DEVBUF);
+}
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * vmci_can_schedule_delayed_work --
+ *
+ * Checks to see if the given platform supports delayed work callbacks.
+ *
+ * Results:
+ * true if it does. false otherwise.
+ *
+ * Side effects:
+ * None.
+ *
+ *------------------------------------------------------------------------------
+ */
+
+bool
+vmci_can_schedule_delayed_work(void)
+{
+
+ return (true);
+}
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * vmci_schedule_delayed_work --
+ *
+ * Schedule the specified callback.
+ *
+ * Results:
+ * Zero on success, error code otherwise.
+ *
+ * Side effects:
+ * None.
+ *
+ *------------------------------------------------------------------------------
+ */
+
+int
+vmci_schedule_delayed_work(vmci_work_fn *work_fn, void *data)
+{
+
+ return (vmci_schedule_delayed_work_fn(work_fn, data));
+}
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * vmci_create_event --
+ *
+ * Results:
+ * None.
+ *
+ * Side effects:
+ * None.
+ *
+ *------------------------------------------------------------------------------
+ */
+
+void
+vmci_create_event(vmci_event *event)
+{
+
+ sema_init(event, 0, "vmci_event");
+}
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * vmci_destroy_event --
+ *
+ * Results:
+ * None.
+ *
+ * Side effects:
+ * None.
+ *
+ *------------------------------------------------------------------------------
+ */
+
+void
+vmci_destroy_event(vmci_event *event)
+{
+
+ if (mtx_owned(&event->sema_mtx))
+ sema_destroy(event);
+}
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * vmci_signal_event --
+ *
+ * Results:
+ * None.
+ *
+ * Side effects:
+ * None.
+ *
+ *------------------------------------------------------------------------------
+ */
+
+void
+vmci_signal_event(vmci_event *event)
+{
+
+ sema_post(event);
+}
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * vmci_wait_on_event --
+ *
+ * Results:
+ * None.
+ *
+ * Side effects:
+ * None.
+ *
+ *------------------------------------------------------------------------------
+ */
+
+void
+vmci_wait_on_event(vmci_event *event, vmci_event_release_cb release_cb,
+ void *client_data)
+{
+
+ release_cb(client_data);
+ sema_wait(event);
+}
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * vmci_mutex_init --
+ *
+ * Initializes the mutex. Must be called before use.
+ *
+ * Results:
+ * Success.
+ *
+ * Side effects:
+ * None.
+ *
+ *------------------------------------------------------------------------------
+ */
+
+int
+vmci_mutex_init(vmci_mutex *mutex, char *name)
+{
+
+ mtx_init(mutex, name, NULL, MTX_DEF | MTX_NOWITNESS);
+ return (VMCI_SUCCESS);
+}
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * vmci_mutex_destroy --
+ *
+ * Destroys the mutex.
+ *
+ * Results:
+ * None.
+ *
+ * Side effects:
+ * None.
+ *
+ *------------------------------------------------------------------------------
+ */
+
+void
+vmci_mutex_destroy(vmci_mutex *mutex)
+{
+
+ mtx_destroy(mutex);
+}
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * vmci_mutex_acquire --
+ *
+ * Acquires the mutex.
+ *
+ * Results:
+ * None.
+ *
+ * Side effects:
+ * Thread may block.
+ *
+ *------------------------------------------------------------------------------
+ */
+
+void
+vmci_mutex_acquire(vmci_mutex *mutex)
+{
+
+ mtx_lock(mutex);
+}
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * vmci_mutex_release --
+ *
+ * Releases the mutex.
+ *
+ * Results:
+ * None.
+ *
+ * Side effects:
+ * May wake up the thread blocking on this mutex.
+ *
+ *------------------------------------------------------------------------------
+ */
+
+void
+vmci_mutex_release(vmci_mutex *mutex)
+{
+
+ mtx_unlock(mutex);
+}
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * vmci_alloc_queue --
+ *
+ * Allocates kernel queue pages of specified size with IOMMU mappings, plus
+ * space for the queue structure/kernel interface and the queue header.
+ *
+ * Results:
+ * Pointer to the queue on success, NULL otherwise.
+ *
+ * Side effects:
+ * Memory is allocated.
+ *
+ *------------------------------------------------------------------------------
+ */
+
+void *
+vmci_alloc_queue(uint64_t size, uint32_t flags)
+{
+ struct vmci_queue *queue;
+ size_t i;
+ const size_t num_pages = CEILING(size, PAGE_SIZE) + 1;
+ const size_t dmas_size = num_pages * sizeof(struct vmci_dma_alloc);
+ const size_t queue_size =
+ sizeof(*queue) + sizeof(*(queue->kernel_if)) + dmas_size;
+
+ /* Size should be enforced by vmci_qpair_alloc(), double-check here. */
+ if (size > VMCI_MAX_GUEST_QP_MEMORY) {
+ ASSERT(false);
+ return (NULL);
+ }
+
+ queue = malloc(queue_size, M_DEVBUF, M_NOWAIT);
+ if (!queue)
+ return (NULL);
+
+ queue->q_header = NULL;
+ queue->saved_header = NULL;
+ queue->kernel_if = (struct vmci_queue_kernel_if *)(queue + 1);
+ queue->kernel_if->num_pages = num_pages;
+ queue->kernel_if->dmas = (struct vmci_dma_alloc *)(queue->kernel_if +
+ 1);
+ for (i = 0; i < num_pages; i++) {
+ vmci_dma_malloc(PAGE_SIZE, 1, &queue->kernel_if->dmas[i]);
+ if (!queue->kernel_if->dmas[i].dma_vaddr) {
+ /* Size excl. the header. */
+ vmci_free_queue(queue, i * PAGE_SIZE);
+ return (NULL);
+ }
+ }
+
+ /* Queue header is the first page. */
+ queue->q_header = (void *)queue->kernel_if->dmas[0].dma_vaddr;
+
+ return ((void *)queue);
+}
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * vmci_free_queue --
+ *
+ * Frees kernel VA space for a given queue and its queue header, and frees
+ * physical data pages.
+ *
+ * Results:
+ * None.
+ *
+ * Side effects:
+ * Memory is freed.
+ *
+ *------------------------------------------------------------------------------
+ */
+
+void
+vmci_free_queue(void *q, uint64_t size)
+{
+ struct vmci_queue *queue = q;
+
+ if (queue) {
+ const size_t num_pages = CEILING(size, PAGE_SIZE) + 1;
+ uint64_t i;
+
+ /* Given size doesn't include header, so add in a page here. */
+ for (i = 0; i < num_pages; i++)
+ vmci_dma_free(&queue->kernel_if->dmas[i]);
+ free(queue, M_DEVBUF);
+ }
+}
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * vmci_alloc_ppn_set --
+ *
+ * Allocates two list of PPNs --- one for the pages in the produce queue,
+ * and the other for the pages in the consume queue. Intializes the list of
+ * PPNs with the page frame numbers of the KVA for the two queues (and the
+ * queue headers).
+ *
+ * Results:
+ * Success or failure.
+ *
+ * Side effects:
+ * Memory may be allocated.
+ *
+ *-----------------------------------------------------------------------------
+ */
+
+int
+vmci_alloc_ppn_set(void *prod_q, uint64_t num_produce_pages, void *cons_q,
+ uint64_t num_consume_pages, struct ppn_set *ppn_set)
+{
+ struct vmci_queue *consume_q = cons_q;
+ struct vmci_queue *produce_q = prod_q;
+ vmci_ppn_list consume_ppns;
+ vmci_ppn_list produce_ppns;
+ uint64_t i;
+
+ if (!produce_q || !num_produce_pages || !consume_q ||
+ !num_consume_pages || !ppn_set)
+ return (VMCI_ERROR_INVALID_ARGS);
+
+ if (ppn_set->initialized)
+ return (VMCI_ERROR_ALREADY_EXISTS);
+
+ produce_ppns =
+ vmci_alloc_kernel_mem(num_produce_pages * sizeof(*produce_ppns),
+ VMCI_MEMORY_NORMAL);
+ if (!produce_ppns)
+ return (VMCI_ERROR_NO_MEM);
+
+ consume_ppns =
+ vmci_alloc_kernel_mem(num_consume_pages * sizeof(*consume_ppns),
+ VMCI_MEMORY_NORMAL);
+ if (!consume_ppns) {
+ vmci_free_kernel_mem(produce_ppns,
+ num_produce_pages * sizeof(*produce_ppns));
+ return (VMCI_ERROR_NO_MEM);
+ }
+
+ for (i = 0; i < num_produce_pages; i++) {
+ unsigned long pfn;
+
+ produce_ppns[i] =
+ pfn = produce_q->kernel_if->dmas[i].dma_paddr >> PAGE_SHIFT;
+
+ /*
+ * Fail allocation if PFN isn't supported by hypervisor.
+ */
+
+ if (sizeof(pfn) >
+ sizeof(*produce_ppns) && pfn != produce_ppns[i])
+ goto ppn_error;
+ }
+ for (i = 0; i < num_consume_pages; i++) {
+ unsigned long pfn;
+
+ consume_ppns[i] =
+ pfn = consume_q->kernel_if->dmas[i].dma_paddr >> PAGE_SHIFT;
+
+ /*
+ * Fail allocation if PFN isn't supported by hypervisor.
+ */
+
+ if (sizeof(pfn) >
+ sizeof(*consume_ppns) && pfn != consume_ppns[i])
+ goto ppn_error;
+
+ }
+
+ ppn_set->num_produce_pages = num_produce_pages;
+ ppn_set->num_consume_pages = num_consume_pages;
+ ppn_set->produce_ppns = produce_ppns;
+ ppn_set->consume_ppns = consume_ppns;
+ ppn_set->initialized = true;
+ return (VMCI_SUCCESS);
+
+ppn_error:
+ vmci_free_kernel_mem(produce_ppns, num_produce_pages *
+ sizeof(*produce_ppns));
+ vmci_free_kernel_mem(consume_ppns, num_consume_pages *
+ sizeof(*consume_ppns));
+ return (VMCI_ERROR_INVALID_ARGS);
+}
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * vmci_free_ppn_set --
+ *
+ * Frees the two list of PPNs for a queue pair.
+ *
+ * Results:
+ * None.
+ *
+ * Side effects:
+ * None.
+ *
+ *------------------------------------------------------------------------------
+ */
+
+void
+vmci_free_ppn_set(struct ppn_set *ppn_set)
+{
+
+ ASSERT(ppn_set);
+ if (ppn_set->initialized) {
+ /* Do not call these functions on NULL inputs. */
+ ASSERT(ppn_set->produce_ppns && ppn_set->consume_ppns);
+ vmci_free_kernel_mem(ppn_set->produce_ppns,
+ ppn_set->num_produce_pages *
+ sizeof(*ppn_set->produce_ppns));
+ vmci_free_kernel_mem(ppn_set->consume_ppns,
+ ppn_set->num_consume_pages *
+ sizeof(*ppn_set->consume_ppns));
+ }
+ memset(ppn_set, 0, sizeof(*ppn_set));
+}
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * vmci_populate_ppn_list --
+ *
+ * Populates the list of PPNs in the hypercall structure with the PPNS
+ * of the produce queue and the consume queue.
+ *
+ * Results:
+ * VMCI_SUCCESS.
+ *
+ * Side effects:
+ * None.
+ *
+ *------------------------------------------------------------------------------
+ */
+
+int
+vmci_populate_ppn_list(uint8_t *call_buf, const struct ppn_set *ppn_set)
+{
+
+ ASSERT(call_buf && ppn_set && ppn_set->initialized);
+ memcpy(call_buf, ppn_set->produce_ppns,
+ ppn_set->num_produce_pages * sizeof(*ppn_set->produce_ppns));
+ memcpy(call_buf + ppn_set->num_produce_pages *
+ sizeof(*ppn_set->produce_ppns), ppn_set->consume_ppns,
+ ppn_set->num_consume_pages * sizeof(*ppn_set->consume_ppns));
+
+ return (VMCI_SUCCESS);
+}
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * vmci_memcpy_{to,from}iovec --
+ *
+ * These helper routines will copy the specified bytes to/from memory that's
+ * specified as a struct iovec. The routines can not verify the correctness
+ * of the struct iovec's contents.
+ *
+ * Results:
+ * None.
+ *
+ * Side effects:
+ * None.
+ *
+ *------------------------------------------------------------------------------
+ */
+
+static inline void
+vmci_memcpy_toiovec(struct iovec *iov, uint8_t *src, size_t len)
+{
+
+ while (len > 0) {
+ if (iov->iov_len) {
+ size_t to_copy = MIN(iov->iov_len, len);
+ memcpy(iov->iov_base, src, to_copy);
+ src += to_copy;
+ len -= to_copy;
+ iov->iov_base = (void *)((uintptr_t) iov->iov_base +
+ to_copy);
+ iov->iov_len -= to_copy;
+ }
+ iov++;
+ }
+}
+
+static inline void
+vmci_memcpy_fromiovec(uint8_t *dst, struct iovec *iov, size_t len)
+{
+
+ while (len > 0) {
+ if (iov->iov_len) {
+ size_t to_copy = MIN(iov->iov_len, len);
+ memcpy(dst, iov->iov_base, to_copy);
+ dst += to_copy;
+ len -= to_copy;
+ iov->iov_base = (void *)((uintptr_t) iov->iov_base +
+ to_copy);
+ iov->iov_len -= to_copy;
+ }
+ iov++;
+ }
+}
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * __vmci_memcpy_to_queue --
+ *
+ * Copies from a given buffer or iovector to a VMCI Queue. Assumes that
+ * offset + size does not wrap around in the queue.
+ *
+ * Results:
+ * Zero on success, negative error code on failure.
+ *
+ * Side effects:
+ * None.
+ *
+ *------------------------------------------------------------------------------
+ */
+
+#pragma GCC diagnostic ignored "-Wcast-qual"
+static int
+__vmci_memcpy_to_queue(struct vmci_queue *queue, uint64_t queue_offset,
+ const void *src, size_t size, bool is_iovec)
+{
+ struct vmci_queue_kernel_if *kernel_if = queue->kernel_if;
+ size_t bytes_copied = 0;
+
+ while (bytes_copied < size) {
+ const uint64_t page_index =
+ (queue_offset + bytes_copied) / PAGE_SIZE;
+ const size_t page_offset =
+ (queue_offset + bytes_copied) & (PAGE_SIZE - 1);
+ void *va;
+ size_t to_copy;
+
+ /* Skip header. */
+ va = (void *)kernel_if->dmas[page_index + 1].dma_vaddr;
+
+ ASSERT(va);
+ /*
+ * Fill up the page if we have enough payload, or else
+ * copy the remaining bytes.
+ */
+ to_copy = MIN(PAGE_SIZE - page_offset, size - bytes_copied);
+
+ if (is_iovec) {
+ struct iovec *iov = (struct iovec *)src;
+
+ /* The iovec will track bytes_copied internally. */
+ vmci_memcpy_fromiovec((uint8_t *)va + page_offset,
+ iov, to_copy);
+ } else
+ memcpy((uint8_t *)va + page_offset,
+ (uint8_t *)src + bytes_copied, to_copy);
+ bytes_copied += to_copy;
+ }
+
+ return (VMCI_SUCCESS);
+}
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * __vmci_memcpy_from_queue --
+ *
+ * Copies to a given buffer or iovector from a VMCI Queue. Assumes that
+ * offset + size does not wrap around in the queue.
+ *
+ * Results:
+ * Zero on success, negative error code on failure.
+ *
+ * Side effects:
+ * None.
+ *
+ *------------------------------------------------------------------------------
+ */
+
+static int
+__vmci_memcpy_from_queue(void *dest, const struct vmci_queue *queue,
+ uint64_t queue_offset, size_t size, bool is_iovec)
+{
+ struct vmci_queue_kernel_if *kernel_if = queue->kernel_if;
+ size_t bytes_copied = 0;
+
+ while (bytes_copied < size) {
+ const uint64_t page_index =
+ (queue_offset + bytes_copied) / PAGE_SIZE;
+ const size_t page_offset =
+ (queue_offset + bytes_copied) & (PAGE_SIZE - 1);
+ void *va;
+ size_t to_copy;
+
+ /* Skip header. */
+ va = (void *)kernel_if->dmas[page_index + 1].dma_vaddr;
+
+ ASSERT(va);
+ /*
+ * Fill up the page if we have enough payload, or else
+ * copy the remaining bytes.
+ */
+ to_copy = MIN(PAGE_SIZE - page_offset, size - bytes_copied);
+
+ if (is_iovec) {
+ struct iovec *iov = (struct iovec *)dest;
+
+ /* The iovec will track bytesCopied internally. */
+ vmci_memcpy_toiovec(iov, (uint8_t *)va +
+ page_offset, to_copy);
+ } else
+ memcpy((uint8_t *)dest + bytes_copied,
+ (uint8_t *)va + page_offset, to_copy);
+
+ bytes_copied += to_copy;
+ }
+
+ return (VMCI_SUCCESS);
+}
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * vmci_memcpy_to_queue --
+ *
+ * Copies from a given buffer to a VMCI Queue.
+ *
+ * Results:
+ * Zero on success, negative error code on failure.
+ *
+ * Side effects:
+ * None.
+ *
+ *------------------------------------------------------------------------------
+ */
+
+int
+vmci_memcpy_to_queue(struct vmci_queue *queue, uint64_t queue_offset,
+ const void *src, size_t src_offset, size_t size, int buf_type,
+ bool can_block)
+{
+
+ ASSERT(can_block);
+
+ return (__vmci_memcpy_to_queue(queue, queue_offset,
+ (uint8_t *)src + src_offset, size, false));
+}
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * vmci_memcpy_from_queue --
+ *
+ * Copies to a given buffer from a VMCI Queue.
+ *
+ * Results:
+ * Zero on success, negative error code on failure.
+ *
+ * Side effects:
+ * None.
+ *
+ *------------------------------------------------------------------------------
+ */
+
+int
+vmci_memcpy_from_queue(void *dest, size_t dest_offset,
+ const struct vmci_queue *queue, uint64_t queue_offset, size_t size,
+ int buf_type, bool can_block)
+{
+
+ ASSERT(can_block);
+
+ return (__vmci_memcpy_from_queue((uint8_t *)dest + dest_offset,
+ queue, queue_offset, size, false));
+}
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * vmci_memcpy_to_queue_local --
+ *
+ * Copies from a given buffer to a local VMCI queue. This is the
+ * same as a regular copy.
+ *
+ * Results:
+ * Zero on success, negative error code on failure.
+ *
+ * Side effects:
+ * None.
+ *
+ *------------------------------------------------------------------------------
+ */
+
+int
+vmci_memcpy_to_queue_local(struct vmci_queue *queue, uint64_t queue_offset,
+ const void *src, size_t src_offset, size_t size, int buf_type,
+ bool can_block)
+{
+
+ ASSERT(can_block);
+
+ return (__vmci_memcpy_to_queue(queue, queue_offset,
+ (uint8_t *)src + src_offset, size, false));
+}
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * vmci_memcpy_from_queue_local --
+ *
+ * Copies to a given buffer from a VMCI Queue.
+ *
+ * Results:
+ * Zero on success, negative error code on failure.
+ *
+ * Side effects:
+ * None.
+ *
+ *------------------------------------------------------------------------------
+ */
+
+int
+vmci_memcpy_from_queue_local(void *dest, size_t dest_offset,
+ const struct vmci_queue *queue, uint64_t queue_offset, size_t size,
+ int buf_type, bool can_block)
+{
+
+ ASSERT(can_block);
+
+ return (__vmci_memcpy_from_queue((uint8_t *)dest + dest_offset,
+ queue, queue_offset, size, false));
+}
+
+/*------------------------------------------------------------------------------
+ *
+ * vmci_memcpy_to_queue_v --
+ *
+ * Copies from a given iovec from a VMCI Queue.
+ *
+ * Results:
+ * Zero on success, negative error code on failure.
+ *
+ * Side effects:
+ * None.
+ *
+ *------------------------------------------------------------------------------
+ */
+
+int
+vmci_memcpy_to_queue_v(struct vmci_queue *queue, uint64_t queue_offset,
+ const void *src, size_t src_offset, size_t size, int buf_type,
+ bool can_block)
+{
+
+ ASSERT(can_block);
+
+ /*
+ * We ignore src_offset because src is really a struct iovec * and will
+ * maintain offset internally.
+ */
+ return (__vmci_memcpy_to_queue(queue, queue_offset, src, size,
+ true));
+}
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * vmci_memcpy_from_queue_v --
+ *
+ * Copies to a given iovec from a VMCI Queue.
+ *
+ * Results:
+ * Zero on success, negative error code on failure.
+ *
+ * Side effects:
+ * None.
+ *
+ *------------------------------------------------------------------------------
+ */
+
+int
+vmci_memcpy_from_queue_v(void *dest, size_t dest_offset,
+ const struct vmci_queue *queue, uint64_t queue_offset, size_t size,
+ int buf_type, bool can_block)
+{
+
+ ASSERT(can_block);
+
+ /*
+ * We ignore dest_offset because dest is really a struct iovec * and
+ * will maintain offset internally.
+ */
+ return (__vmci_memcpy_from_queue(dest, queue, queue_offset, size,
+ true));
+}
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * vmci_read_port_bytes --
+ *
+ * Copy memory from an I/O port to kernel memory.
+ *
+ * Results:
+ * No results.
+ *
+ * Side effects:
+ * None.
+ *
+ *------------------------------------------------------------------------------
+ */
+
+void
+vmci_read_port_bytes(vmci_io_handle handle, vmci_io_port port, uint8_t *buffer,
+ size_t buffer_length)
+{
+
+ insb(port, buffer, buffer_length);
+}
Index: sys/dev/vmware/vmci/vmci_qpair.c
===================================================================
--- /dev/null
+++ sys/dev/vmware/vmci/vmci_qpair.c
@@ -0,0 +1,834 @@
+/*-
+ * Copyright (c) 2018 VMware, Inc. All Rights Reserved.
+ *
+ * SPDX-License-Identifier: (BSD-2-Clause AND GPL-2.0)
+ */
+
+/* This file implements Queue accessor methods. */
+
+/*
+ * vmci_qpair is an interface that hides the queue pair internals. Rather than
+ * access each queue in a pair directly, operations are performed on the queue
+ * as a whole. This is simpler and less error-prone, and allows for future
+ * queue pair features to be added under the hood with no change to the client
+ * code.
+ */
+
+#include "vmci_kernel_api.h"
+#include "vmci_kernel_defs.h"
+#include "vmci_kernel_if.h"
+#include "vmci_queue.h"
+#include "vmci_queue_pair.h"
+
+/* This structure is opaque to the clients. */
+struct vmci_qpair {
+ struct vmci_handle handle;
+ struct vmci_queue *produce_q;
+ struct vmci_queue *consume_q;
+ uint64_t produce_q_size;
+ uint64_t consume_q_size;
+ vmci_id peer;
+ uint32_t flags;
+ vmci_privilege_flags priv_flags;
+ uint32_t blocked;
+ vmci_event event;
+};
+
+static void vmci_qpair_get_queue_headers(const struct vmci_qpair *qpair,
+ struct vmci_queue_header **produce_q_header,
+ struct vmci_queue_header **consume_q_header);
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * vmci_queue_add_producer_tail --
+ *
+ * Helper routine to increment the Producer Tail.
+ *
+ * Results:
+ * VMCI_ERROR_NOT_FOUND if the vmm_world registered with the queue cannot
+ * be found. Otherwise VMCI_SUCCESS.
+ *
+ * Side effects:
+ * None.
+ *
+ *------------------------------------------------------------------------------
+ */
+
+static inline int
+vmci_queue_add_producer_tail(struct vmci_queue *queue,
+ size_t add, uint64_t queue_size)
+{
+
+ vmci_queue_header_add_producer_tail(queue->q_header, add, queue_size);
+ return (VMCI_SUCCESS);
+}
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * vmci_queue_add_consumer_head --
+ *
+ * Helper routine to increment the Consumer Head.
+ *
+ * Results:
+ * VMCI_ERROR_NOT_FOUND if the vmm_world registered with the queue cannot
+ * be found. Otherwise VMCI_SUCCESS.
+ *
+ * Side effects:
+ * None.
+ *
+ *------------------------------------------------------------------------------
+ */
+
+static inline int
+vmci_queue_add_consumer_head(struct vmci_queue *queue,
+ size_t add, uint64_t queue_size)
+{
+
+ vmci_queue_header_add_consumer_head(queue->q_header, add, queue_size);
+ return (VMCI_SUCCESS);
+}
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * vmci_qpair_get_queue_headers --
+ *
+ * Helper routine that will retrieve the produce and consume headers of a
+ * given queue pair.
+ *
+ * Results:
+ * VMCI_SUCCESS if either current or saved queue headers are found.
+ * Appropriate error code otherwise.
+ *
+ * Side effects:
+ * None.
+ *
+ *------------------------------------------------------------------------------
+ */
+
+static void
+vmci_qpair_get_queue_headers(const struct vmci_qpair *qpair,
+ struct vmci_queue_header **produce_q_header,
+ struct vmci_queue_header **consume_q_header)
+{
+
+ ASSERT((qpair->produce_q != NULL) && (qpair->consume_q != NULL));
+ *produce_q_header = qpair->produce_q->q_header;
+ *consume_q_header = qpair->consume_q->q_header;
+}
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * vmci_qpair_alloc --
+ *
+ * This is the client interface for allocating the memory for a vmci_qpair
+ * structure and then attaching to the underlying queue. If an error occurs
+ * allocating the memory for the vmci_qpair structure, no attempt is made to
+ * attach. If an error occurs attaching, then there's the vmci_qpair
+ * structure is freed.
+ *
+ * Results:
+ * An err, if < 0.
+ *
+ * Side effects:
+ * None.
+ *
+ *------------------------------------------------------------------------------
+ */
+
+int
+vmci_qpair_alloc(struct vmci_qpair **qpair, struct vmci_handle *handle,
+ uint64_t produce_q_size, uint64_t consume_q_size, vmci_id peer,
+ uint32_t flags, vmci_privilege_flags priv_flags)
+{
+ struct vmci_qpair *my_qpair;
+ vmci_event_release_cb wakeup_cb;
+ void *client_data;
+ int retval;
+
+ /*
+ * Restrict the size of a queuepair. Though the device enforces a limit
+ * on the total amount of memory that can be allocated to queuepairs for
+ * a guest, we avoid unnecessarily allocating a lot of memory. Also, we
+ * try to allocate this memory before we make the queuepair allocation
+ * hypercall.
+ *
+ * (Note that this doesn't prevent all cases; a user with only this much
+ * physical memory could still get into trouble.) The error used by the
+ * device is NO_RESOURCES, so use that here too.
+ */
+
+ if (produce_q_size + consume_q_size <
+ MAX(produce_q_size, consume_q_size) ||
+ produce_q_size + consume_q_size > VMCI_MAX_GUEST_QP_MEMORY)
+ return (VMCI_ERROR_NO_RESOURCES);
+
+ if (flags & VMCI_QPFLAG_NONBLOCK)
+ return (VMCI_ERROR_INVALID_ARGS);
+
+ my_qpair = vmci_alloc_kernel_mem(sizeof(*my_qpair), VMCI_MEMORY_NORMAL);
+ if (!my_qpair)
+ return (VMCI_ERROR_NO_MEM);
+
+ my_qpair->produce_q_size = produce_q_size;
+ my_qpair->consume_q_size = consume_q_size;
+ my_qpair->peer = peer;
+ my_qpair->flags = flags;
+ my_qpair->priv_flags = priv_flags;
+
+ client_data = NULL;
+ wakeup_cb = NULL;
+
+ retval = vmci_queue_pair_alloc(handle, &my_qpair->produce_q,
+ my_qpair->produce_q_size, &my_qpair->consume_q,
+ my_qpair->consume_q_size, my_qpair->peer, my_qpair->flags,
+ my_qpair->priv_flags);
+
+ if (retval < VMCI_SUCCESS) {
+ vmci_free_kernel_mem(my_qpair, sizeof(*my_qpair));
+ return (retval);
+ }
+
+ *qpair = my_qpair;
+ my_qpair->handle = *handle;
+
+ return (retval);
+}
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * vmci_qpair_detach --
+ *
+ * This is the client interface for detaching from a vmci_qpair. Note that
+ * this routine will free the memory allocated for the vmci_qpair structure,
+ * too.
+ *
+ * Results:
+ * An error, if < 0.
+ *
+ * Side effects:
+ * Will clear the caller's pointer to the vmci_qpair structure.
+ *
+ *------------------------------------------------------------------------------
+ */
+
+int
+vmci_qpair_detach(struct vmci_qpair **qpair)
+{
+ struct vmci_qpair *old_qpair;
+ int result;
+
+ if (!qpair || !(*qpair))
+ return (VMCI_ERROR_INVALID_ARGS);
+
+ old_qpair = *qpair;
+ result = vmci_queue_pair_detach(old_qpair->handle);
+
+ /*
+ * The guest can fail to detach for a number of reasons, and if it does
+ * so, it will cleanup the entry (if there is one). We need to release
+ * the qpair struct here; there isn't much the caller can do, and we
+ * don't want to leak.
+ */
+
+ if (old_qpair->flags & VMCI_QPFLAG_LOCAL)
+ vmci_destroy_event(&old_qpair->event);
+
+ vmci_free_kernel_mem(old_qpair, sizeof(*old_qpair));
+ *qpair = NULL;
+
+ return (result);
+}
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * vmci_qpair_get_produce_indexes --
+ *
+ * This is the client interface for getting the current indexes of the
+ * qpair from the point of the view of the caller as the producer.
+ *
+ * Results:
+ * err, if < 0
+ * Success otherwise.
+ *
+ * Side effects:
+ * None.
+ *
+ *------------------------------------------------------------------------------
+ */
+
+int
+vmci_qpair_get_produce_indexes(const struct vmci_qpair *qpair,
+ uint64_t *producer_tail, uint64_t *consumer_head)
+{
+ struct vmci_queue_header *consume_q_header;
+ struct vmci_queue_header *produce_q_header;
+
+ if (!qpair)
+ return (VMCI_ERROR_INVALID_ARGS);
+
+ vmci_qpair_get_queue_headers(qpair, &produce_q_header,
+ &consume_q_header);
+ vmci_queue_header_get_pointers(produce_q_header, consume_q_header,
+ producer_tail, consumer_head);
+
+ if ((producer_tail && *producer_tail >= qpair->produce_q_size) ||
+ (consumer_head && *consumer_head >= qpair->produce_q_size))
+ return (VMCI_ERROR_INVALID_SIZE);
+
+ return (VMCI_SUCCESS);
+}
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * vmci_qpair_get_consume_indexes --
+ *
+ * This is the client interface for getting the current indexes of the
+ * QPair from the point of the view of the caller as the consumer.
+ *
+ * Results:
+ * err, if < 0
+ * Success otherwise.
+ *
+ * Side effects:
+ * None.
+ *
+ *------------------------------------------------------------------------------
+ */
+
+int
+vmci_qpair_get_consume_indexes(const struct vmci_qpair *qpair,
+ uint64_t *consumer_tail, uint64_t *producer_head)
+{
+ struct vmci_queue_header *consume_q_header;
+ struct vmci_queue_header *produce_q_header;
+
+ if (!qpair)
+ return (VMCI_ERROR_INVALID_ARGS);
+
+ vmci_qpair_get_queue_headers(qpair, &produce_q_header,
+ &consume_q_header);
+ vmci_queue_header_get_pointers(consume_q_header, produce_q_header,
+ consumer_tail, producer_head);
+
+ if ((consumer_tail && *consumer_tail >= qpair->consume_q_size) ||
+ (producer_head && *producer_head >= qpair->consume_q_size))
+ return (VMCI_ERROR_INVALID_SIZE);
+
+ return (VMCI_SUCCESS);
+}
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * vmci_qpair_produce_free_space --
+ *
+ * This is the client interface for getting the amount of free space in the
+ * QPair from the point of the view of the caller as the producer which is
+ * the common case.
+ *
+ * Results:
+ * Err, if < 0.
+ * Full queue if = 0.
+ * Number of available bytes into which data can be enqueued if > 0.
+ *
+ * Side effects:
+ * None.
+ *
+ *------------------------------------------------------------------------------
+ */
+
+int64_t
+vmci_qpair_produce_free_space(const struct vmci_qpair *qpair)
+{
+ struct vmci_queue_header *consume_q_header;
+ struct vmci_queue_header *produce_q_header;
+ int64_t result;
+
+ if (!qpair)
+ return (VMCI_ERROR_INVALID_ARGS);
+
+ vmci_qpair_get_queue_headers(qpair, &produce_q_header,
+ &consume_q_header);
+ result = vmci_queue_header_free_space(produce_q_header, consume_q_header,
+ qpair->produce_q_size);
+
+ return (result);
+}
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * vmci_qpair_consume_free_space --
+ *
+ * This is the client interface for getting the amount of free space in the
+ * QPair from the point of the view of the caller as the consumer which is
+ * not the common case (see vmci_qpair_Produce_free_space(), above).
+ *
+ * Results:
+ * Err, if < 0.
+ * Full queue if = 0.
+ * Number of available bytes into which data can be enqueued if > 0.
+ *
+ * Side effects:
+ * None.
+ *
+ *------------------------------------------------------------------------------
+ */
+
+int64_t
+vmci_qpair_consume_free_space(const struct vmci_qpair *qpair)
+{
+ struct vmci_queue_header *consume_q_header;
+ struct vmci_queue_header *produce_q_header;
+ int64_t result;
+
+ if (!qpair)
+ return (VMCI_ERROR_INVALID_ARGS);
+
+ vmci_qpair_get_queue_headers(qpair, &produce_q_header,
+ &consume_q_header);
+ result = vmci_queue_header_free_space(consume_q_header, produce_q_header,
+ qpair->consume_q_size);
+
+ return (result);
+}
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * vmci_qpair_produce_buf_ready --
+ *
+ * This is the client interface for getting the amount of enqueued data in
+ * the QPair from the point of the view of the caller as the producer which
+ * is not the common case (see vmci_qpair_Consume_buf_ready(), above).
+ *
+ * Results:
+ * Err, if < 0.
+ * Empty queue if = 0.
+ * Number of bytes ready to be dequeued if > 0.
+ *
+ * Side effects:
+ * None.
+ *
+ *------------------------------------------------------------------------------
+ */
+
+int64_t
+vmci_qpair_produce_buf_ready(const struct vmci_qpair *qpair)
+{
+ struct vmci_queue_header *consume_q_header;
+ struct vmci_queue_header *produce_q_header;
+ int64_t result;
+
+ if (!qpair)
+ return (VMCI_ERROR_INVALID_ARGS);
+
+ vmci_qpair_get_queue_headers(qpair, &produce_q_header,
+ &consume_q_header);
+ result = vmci_queue_header_buf_ready(produce_q_header, consume_q_header,
+ qpair->produce_q_size);
+
+ return (result);
+}
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * vmci_qpair_consume_buf_ready --
+ *
+ * This is the client interface for getting the amount of enqueued data in
+ * the QPair from the point of the view of the caller as the consumer which
+ * is the normal case.
+ *
+ * Results:
+ * Err, if < 0.
+ * Empty queue if = 0.
+ * Number of bytes ready to be dequeued if > 0.
+ *
+ * Side effects:
+ * None.
+ *
+ *------------------------------------------------------------------------------
+ */
+
+int64_t
+vmci_qpair_consume_buf_ready(const struct vmci_qpair *qpair)
+{
+ struct vmci_queue_header *consume_q_header;
+ struct vmci_queue_header *produce_q_header;
+ int64_t result;
+
+ if (!qpair)
+ return (VMCI_ERROR_INVALID_ARGS);
+
+ vmci_qpair_get_queue_headers(qpair, &produce_q_header,
+ &consume_q_header);
+ result = vmci_queue_header_buf_ready(consume_q_header, produce_q_header,
+ qpair->consume_q_size);
+
+ return (result);
+}
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * enqueue --
+ *
+ * Enqueues a given buffer to the produce queue using the provided function.
+ * As many bytes as possible (space available in the queue) are enqueued.
+ *
+ * Results:
+ * VMCI_ERROR_QUEUEPAIR_NOSPACE if no space was available to enqueue data.
+ * VMCI_ERROR_INVALID_SIZE, if any queue pointer is outside the queue
+ * (as defined by the queue size).
+ * VMCI_ERROR_INVALID_ARGS, if an error occured when accessing the buffer.
+ * VMCI_ERROR_QUEUEPAIR_NOTATTACHED, if the queue pair pages aren't
+ * available.
+ * Otherwise, the number of bytes written to the queue is returned.
+ *
+ * Side effects:
+ * Updates the tail pointer of the produce queue.
+ *
+ *------------------------------------------------------------------------------
+ */
+
+static ssize_t
+enqueue(struct vmci_queue *produce_q, struct vmci_queue *consume_q,
+ const uint64_t produce_q_size, const void *buf, size_t buf_size,
+ int buf_type, vmci_memcpy_to_queue_func memcpy_to_queue, bool can_block)
+{
+ ssize_t result;
+ size_t written;
+ int64_t free_space;
+ uint64_t tail;
+
+ ASSERT((produce_q != NULL) && (consume_q != NULL));
+
+ free_space = vmci_queue_header_free_space(produce_q->q_header,
+ consume_q->q_header,
+ produce_q_size);
+ if (free_space == 0)
+ return (VMCI_ERROR_QUEUEPAIR_NOSPACE);
+
+ if (free_space < VMCI_SUCCESS)
+ return ((ssize_t)free_space);
+
+ written = (size_t)(free_space > buf_size ? buf_size : free_space);
+ tail = vmci_queue_header_producer_tail(produce_q->q_header);
+ if (LIKELY(tail + written < produce_q_size))
+ result = memcpy_to_queue(produce_q, tail, buf, 0, written,
+ buf_type, can_block);
+ else {
+ /* Tail pointer wraps around. */
+
+ const size_t tmp = (size_t)(produce_q_size - tail);
+
+ result = memcpy_to_queue(produce_q, tail, buf, 0, tmp, buf_type,
+ can_block);
+ if (result >= VMCI_SUCCESS)
+ result = memcpy_to_queue(produce_q, 0, buf, tmp,
+ written - tmp, buf_type, can_block);
+ }
+
+ if (result < VMCI_SUCCESS)
+ return (result);
+
+ result = vmci_queue_add_producer_tail(produce_q, written,
+ produce_q_size);
+ if (result < VMCI_SUCCESS)
+ return (result);
+ return (written);
+}
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * dequeue --
+ *
+ * Dequeues data (if available) from the given consume queue. Writes data
+ * to the user provided buffer using the provided function.
+ *
+ * Results:
+ * VMCI_ERROR_QUEUEPAIR_NODATA if no data was available to dequeue.
+ * VMCI_ERROR_INVALID_SIZE, if any queue pointer is outside the queue
+ * (as defined by the queue size).
+ * VMCI_ERROR_INVALID_ARGS, if an error occured when accessing the buffer.
+ * VMCI_ERROR_NOT_FOUND, if the vmm_world registered with the queue pair
+ * cannot be found.
+ * Otherwise the number of bytes dequeued is returned.
+ *
+ * Side effects:
+ * Updates the head pointer of the consume queue.
+ *
+ *------------------------------------------------------------------------------
+ */
+
+static ssize_t
+dequeue(struct vmci_queue *produce_q,
+ struct vmci_queue *consume_q, const uint64_t consume_q_size, void *buf,
+ size_t buf_size, int buf_type,
+ vmci_memcpy_from_queue_func memcpy_from_queue, bool update_consumer,
+ bool can_block)
+{
+ ssize_t result;
+ size_t read;
+ int64_t buf_ready;
+ uint64_t head;
+
+ ASSERT((produce_q != NULL) && (consume_q != NULL));
+
+ buf_ready = vmci_queue_header_buf_ready(consume_q->q_header,
+ produce_q->q_header, consume_q_size);
+ if (buf_ready == 0)
+ return (VMCI_ERROR_QUEUEPAIR_NODATA);
+ if (buf_ready < VMCI_SUCCESS)
+ return ((ssize_t)buf_ready);
+
+ read = (size_t)(buf_ready > buf_size ? buf_size : buf_ready);
+ head = vmci_queue_header_consumer_head(produce_q->q_header);
+ if (LIKELY(head + read < consume_q_size))
+ result = memcpy_from_queue(buf, 0, consume_q, head, read,
+ buf_type, can_block);
+ else {
+ /* Head pointer wraps around. */
+
+ const size_t tmp = (size_t)(consume_q_size - head);
+
+ result = memcpy_from_queue(buf, 0, consume_q, head, tmp,
+ buf_type, can_block);
+ if (result >= VMCI_SUCCESS)
+ result = memcpy_from_queue(buf, tmp, consume_q, 0,
+ read - tmp, buf_type, can_block);
+ }
+
+ if (result < VMCI_SUCCESS)
+ return (result);
+
+ if (update_consumer) {
+ result = vmci_queue_add_consumer_head(produce_q, read,
+ consume_q_size);
+ if (result < VMCI_SUCCESS)
+ return (result);
+ }
+
+ return (read);
+}
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * vmci_qpair_enqueue --
+ *
+ * This is the client interface for enqueueing data into the queue.
+ *
+ * Results:
+ * Err, if < 0.
+ * Number of bytes enqueued if >= 0.
+ *
+ * Side effects:
+ * None.
+ *
+ *------------------------------------------------------------------------------
+ */
+
+ssize_t
+vmci_qpair_enqueue(struct vmci_qpair *qpair, const void *buf, size_t buf_size,
+ int buf_type)
+{
+ ssize_t result;
+
+ if (!qpair || !buf)
+ return (VMCI_ERROR_INVALID_ARGS);
+
+ result = enqueue(qpair->produce_q, qpair->consume_q,
+ qpair->produce_q_size, buf, buf_size, buf_type,
+ qpair->flags & VMCI_QPFLAG_LOCAL?
+ vmci_memcpy_to_queue_local : vmci_memcpy_to_queue,
+ !(qpair->flags & VMCI_QPFLAG_NONBLOCK));
+
+ return (result);
+}
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * vmci_qpair_dequeue --
+ *
+ * This is the client interface for dequeueing data from the queue.
+ *
+ * Results:
+ * Err, if < 0.
+ * Number of bytes dequeued if >= 0.
+ *
+ * Side effects:
+ * None.
+ *
+ *------------------------------------------------------------------------------
+ */
+
+ssize_t
+vmci_qpair_dequeue(struct vmci_qpair *qpair, void *buf, size_t buf_size,
+ int buf_type)
+{
+ ssize_t result;
+
+ if (!qpair || !buf)
+ return (VMCI_ERROR_INVALID_ARGS);
+
+ result = dequeue(qpair->produce_q, qpair->consume_q,
+ qpair->consume_q_size, buf, buf_size, buf_type,
+ qpair->flags & VMCI_QPFLAG_LOCAL?
+ vmci_memcpy_from_queue_local : vmci_memcpy_from_queue, true,
+ !(qpair->flags & VMCI_QPFLAG_NONBLOCK));
+
+ return (result);
+}
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * vmci_qpair_peek --
+ *
+ * This is the client interface for peeking into a queue. (I.e., copy
+ * data from the queue without updating the head pointer.)
+ *
+ * Results:
+ * Err, if < 0.
+ * Number of bytes peeked, if >= 0.
+ *
+ * Side effects:
+ * None.
+ *
+ *------------------------------------------------------------------------------
+ */
+
+ssize_t
+vmci_qpair_peek(struct vmci_qpair *qpair, void *buf, size_t buf_size,
+ int buf_type)
+{
+ ssize_t result;
+
+ if (!qpair || !buf)
+ return (VMCI_ERROR_INVALID_ARGS);
+
+ result = dequeue(qpair->produce_q, qpair->consume_q,
+ qpair->consume_q_size, buf, buf_size, buf_type,
+ qpair->flags & VMCI_QPFLAG_LOCAL?
+ vmci_memcpy_from_queue_local : vmci_memcpy_from_queue, false,
+ !(qpair->flags & VMCI_QPFLAG_NONBLOCK));
+
+ return (result);
+}
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * vmci_qpair_enquev --
+ *
+ * This is the client interface for enqueueing data into the queue.
+ *
+ * Results:
+ * Err, if < 0.
+ * Number of bytes enqueued if >= 0.
+ *
+ * Side effects:
+ * None.
+ *
+ *------------------------------------------------------------------------------
+ */
+
+ssize_t
+vmci_qpair_enquev(struct vmci_qpair *qpair, void *iov, size_t iov_size,
+ int buf_type)
+{
+ ssize_t result;
+
+ if (!qpair || !iov)
+ return (VMCI_ERROR_INVALID_ARGS);
+
+ result = enqueue(qpair->produce_q, qpair->consume_q,
+ qpair->produce_q_size, iov, iov_size, buf_type,
+ qpair->flags & VMCI_QPFLAG_LOCAL?
+ vmci_memcpy_to_queue_v_local : vmci_memcpy_to_queue_v,
+ !(qpair->flags & VMCI_QPFLAG_NONBLOCK));
+
+ return (result);
+}
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * vmci_qpair_dequev --
+ *
+ * This is the client interface for dequeueing data from the queue.
+ *
+ * Results:
+ * Err, if < 0.
+ * Number of bytes dequeued if >= 0.
+ *
+ * Side effects:
+ * None.
+ *
+ *------------------------------------------------------------------------------
+ */
+
+ssize_t
+vmci_qpair_dequev(struct vmci_qpair *qpair, void *iov, size_t iov_size,
+ int buf_type)
+{
+ ssize_t result;
+
+ if (!qpair || !iov)
+ return (VMCI_ERROR_INVALID_ARGS);
+
+ result = dequeue(qpair->produce_q, qpair->consume_q,
+ qpair->consume_q_size, iov, iov_size, buf_type,
+ qpair->flags & VMCI_QPFLAG_LOCAL?
+ vmci_memcpy_from_queue_v_local : vmci_memcpy_from_queue_v, true,
+ !(qpair->flags & VMCI_QPFLAG_NONBLOCK));
+
+ return (result);
+}
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * vmci_qpair_peekv --
+ *
+ * This is the client interface for peeking into a queue. (I.e., copy
+ * data from the queue without updating the head pointer.)
+ *
+ * Results:
+ * Err, if < 0.
+ * Number of bytes peeked, if >= 0.
+ *
+ * Side effects:
+ * None.
+ *
+ *------------------------------------------------------------------------------
+ */
+
+ssize_t
+vmci_qpair_peekv(struct vmci_qpair *qpair, void *iov, size_t iov_size,
+ int buf_type)
+{
+ ssize_t result;
+
+ if (!qpair || !iov)
+ return (VMCI_ERROR_INVALID_ARGS);
+
+ result = dequeue(qpair->produce_q, qpair->consume_q,
+ qpair->consume_q_size, iov, iov_size, buf_type,
+ qpair->flags & VMCI_QPFLAG_LOCAL?
+ vmci_memcpy_from_queue_v_local : vmci_memcpy_from_queue_v, false,
+ !(qpair->flags & VMCI_QPFLAG_NONBLOCK));
+
+ return (result);
+}
Index: sys/dev/vmware/vmci/vmci_queue.h
===================================================================
--- /dev/null
+++ sys/dev/vmware/vmci/vmci_queue.h
@@ -0,0 +1,115 @@
+/*-
+ * Copyright (c) 2018 VMware, Inc. All Rights Reserved.
+ *
+ * SPDX-License-Identifier: (BSD-2-Clause AND GPL-2.0)
+ */
+
+/* Defines the queue structure and helper functions to enqueue/dequeue items. */
+
+#ifndef _VMCI_QUEUE_H_
+#define _VMCI_QUEUE_H_
+
+/*
+ * vmci_queue
+ *
+ * This data type contains the information about a queue.
+ *
+ * There are two queues (hence, queue pairs) per transaction model between a
+ * pair of end points, A & B. One queue is used by end point A to transmit
+ * commands and responses to B. The other queue is used by B to transmit
+ * commands and responses.
+ *
+ * vmci_queue_kernel_if is a per-OS defined queue structure. It contains
+ * either a direct pointer to the linear address of the buffer contents or a
+ * pointer to structures which help the OS locate those data pages.
+ * See vmci_kernel_if.c for its definition.
+ */
+
+struct vmci_queue_kernel_if;
+
+struct vmci_queue {
+ struct vmci_queue_header *q_header;
+ struct vmci_queue_header *saved_header;
+ struct vmci_queue_kernel_if *kernel_if;
+};
+
+#define BUF_TYPE int
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * vmci_memcpy{to,from}_queue_func() prototypes. Functions of these types are
+ * passed around to enqueue and dequeue routines. Note that often the functions
+ * passed are simply wrappers around memcpy itself.
+ *
+ * Note: In order for the memcpy typedefs to be compatible with the VMKernel,
+ * there's an unused last parameter for the hosted side. In ESX, that parameter
+ * holds a buffer type.
+ *
+ *------------------------------------------------------------------------------
+ */
+typedef int vmci_memcpy_to_queue_func(struct vmci_queue *queue,
+ uint64_t queue_offset, const void *src, size_t src_offset,
+ size_t size, BUF_TYPE buf_type, bool can_block);
+typedef int vmci_memcpy_from_queue_func(void *dest, size_t dest_offset,
+ const struct vmci_queue *queue, uint64_t queue_offset, size_t size,
+ BUF_TYPE buf_type, bool can_block);
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * vmci_memcpy{to,from}_queue_[v]_[local]() prototypes
+ *
+ * Note that these routines are NOT SAFE to call on a host end-point until the
+ * guest end of the queue pair has attached -AND- SetPageStore(). The VMX
+ * crosstalk device will issue the SetPageStore() on behalf of the guest when
+ * the guest creates a QueuePair or attaches to one created by the host. So, if
+ * the guest notifies the host that it's attached then the queue is safe to use.
+ * Also, if the host registers notification of the connection of the guest, then
+ * it will only receive that notification when the guest has issued the
+ * SetPageStore() call and not before (when the guest had attached).
+ *
+ *------------------------------------------------------------------------------
+ */
+
+int vmci_memcpy_to_queue(struct vmci_queue *queue, uint64_t queue_offset,
+ const void *src, size_t src_offset, size_t size, BUF_TYPE buf_type,
+ bool can_block);
+int vmci_memcpy_from_queue(void *dest, size_t dest_offset,
+ const struct vmci_queue *queue, uint64_t queue_offset, size_t size,
+ BUF_TYPE buf_type, bool can_block);
+int vmci_memcpy_to_queue_local(struct vmci_queue *queue,
+ uint64_t queue_offset, const void *src, size_t src_offset,
+ size_t size, BUF_TYPE buf_type, bool can_block);
+int vmci_memcpy_from_queue_local(void *dest, size_t dest_offset,
+ const struct vmci_queue *queue, uint64_t queue_offset, size_t size,
+ BUF_TYPE buf_type, bool can_block);
+
+int vmci_memcpy_to_queue_v(struct vmci_queue *queue, uint64_t queue_offset,
+ const void *src, size_t src_offset, size_t size, BUF_TYPE buf_type,
+ bool can_block);
+int vmci_memcpy_from_queue_v(void *dest, size_t dest_offset,
+ const struct vmci_queue *queue, uint64_t queue_offset, size_t size,
+ BUF_TYPE buf_type, bool can_block);
+
+static inline int
+vmci_memcpy_to_queue_v_local(struct vmci_queue *queue, uint64_t queue_offset,
+ const void *src, size_t src_offset, size_t size, int buf_type,
+ bool can_block)
+{
+
+ return (vmci_memcpy_to_queue_v(queue, queue_offset, src, src_offset,
+ size, buf_type, can_block));
+}
+
+static inline int
+vmci_memcpy_from_queue_v_local(void *dest, size_t dest_offset,
+ const struct vmci_queue *queue, uint64_t queue_offset, size_t size,
+ int buf_type, bool can_block)
+{
+
+ return (vmci_memcpy_from_queue_v(dest, dest_offset, queue, queue_offset,
+ size, buf_type, can_block));
+}
+
+#endif /* !_VMCI_QUEUE_H_ */
Index: sys/dev/vmware/vmci/vmci_queue_pair.h
===================================================================
--- /dev/null
+++ sys/dev/vmware/vmci/vmci_queue_pair.h
@@ -0,0 +1,26 @@
+/*-
+ * Copyright (c) 2018 VMware, Inc. All Rights Reserved.
+ *
+ * SPDX-License-Identifier: (BSD-2-Clause AND GPL-2.0)
+ */
+
+/* VMCI QueuePair API definition. */
+
+#ifndef _VMCI_QUEUE_PAIR_H_
+#define _VMCI_QUEUE_PAIR_H_
+
+#include "vmci_kernel_if.h"
+#include "vmci_queue.h"
+
+int vmci_qp_guest_endpoints_init(void);
+void vmci_qp_guest_endpoints_exit(void);
+void vmci_qp_guest_endpoints_sync(void);
+void vmci_qp_guest_endpoints_convert(bool to_local, bool device_reset);
+
+int vmci_queue_pair_alloc(struct vmci_handle *handle,
+ struct vmci_queue **produce_q, uint64_t produce_size,
+ struct vmci_queue **consume_q, uint64_t consume_size,
+ vmci_id peer, uint32_t flags, vmci_privilege_flags priv_flags);
+int vmci_queue_pair_detach(struct vmci_handle handle);
+
+#endif /* !_VMCI_QUEUE_PAIR_H_ */
Index: sys/dev/vmware/vmci/vmci_queue_pair.c
===================================================================
--- /dev/null
+++ sys/dev/vmware/vmci/vmci_queue_pair.c
@@ -0,0 +1,937 @@
+/*-
+ * Copyright (c) 2018 VMware, Inc. All Rights Reserved.
+ *
+ * SPDX-License-Identifier: (BSD-2-Clause AND GPL-2.0)
+ */
+
+/* VMCI QueuePair API implementation. */
+
+#include "vmci.h"
+#include "vmci_driver.h"
+#include "vmci_event.h"
+#include "vmci_kernel_api.h"
+#include "vmci_kernel_defs.h"
+#include "vmci_queue_pair.h"
+
+#define LGPFX "vmci_queue_pair: "
+
+struct queue_pair_entry {
+ vmci_list_item(queue_pair_entry) list_item;
+ struct vmci_handle handle;
+ vmci_id peer;
+ uint32_t flags;
+ uint64_t produce_size;
+ uint64_t consume_size;
+ uint32_t ref_count;
+};
+
+struct qp_guest_endpoint {
+ struct queue_pair_entry qp;
+ uint64_t num_ppns;
+ void *produce_q;
+ void *consume_q;
+ bool hibernate_failure;
+ struct ppn_set ppn_set;
+};
+
+struct queue_pair_list {
+ vmci_list(queue_pair_entry) head;
+ volatile int hibernate;
+ vmci_mutex mutex;
+};
+
+#define QPE_NUM_PAGES(_QPE) \
+ ((uint32_t)(CEILING(_QPE.produce_size, PAGE_SIZE) + \
+ CEILING(_QPE.consume_size, PAGE_SIZE) + 2))
+
+static struct queue_pair_list qp_guest_endpoints;
+
+static struct queue_pair_entry *queue_pair_list_find_entry(
+ struct queue_pair_list *qp_list, struct vmci_handle handle);
+static void queue_pair_list_add_entry(struct queue_pair_list *qp_list,
+ struct queue_pair_entry *entry);
+static void queue_pair_list_remove_entry(struct queue_pair_list *qp_list,
+ struct queue_pair_entry *entry);
+static struct queue_pair_entry *queue_pair_list_get_head(
+ struct queue_pair_list *qp_list);
+static int queue_pair_notify_peer_local(bool attach,
+ struct vmci_handle handle);
+static struct qp_guest_endpoint *qp_guest_endpoint_create(
+ struct vmci_handle handle, vmci_id peer, uint32_t flags,
+ uint64_t produce_size, uint64_t consume_size,
+ void *produce_q, void *consume_q);
+static void qp_guest_endpoint_destroy(struct qp_guest_endpoint *entry);
+static int vmci_queue_pair_alloc_hypercall(
+ const struct qp_guest_endpoint *entry);
+static int vmci_queue_pair_alloc_guest_work(struct vmci_handle *handle,
+ struct vmci_queue **produce_q, uint64_t produce_size,
+ struct vmci_queue **consume_q, uint64_t consume_size,
+ vmci_id peer, uint32_t flags,
+ vmci_privilege_flags priv_flags);
+static int vmci_queue_pair_detach_guest_work(struct vmci_handle handle);
+static int vmci_queue_pair_detach_hypercall(struct vmci_handle handle);
+
+extern int vmci_send_datagram(struct vmci_datagram *);
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * vmci_queue_pair_alloc --
+ *
+ * Allocates a VMCI QueuePair. Only checks validity of input arguments. The
+ * real work is done in the host or guest specific function.
+ *
+ * Results:
+ * VMCI_SUCCESS on success, appropriate error code otherwise.
+ *
+ * Side effects:
+ * None.
+ *
+ *------------------------------------------------------------------------------
+ */
+
+int
+vmci_queue_pair_alloc(struct vmci_handle *handle, struct vmci_queue **produce_q,
+ uint64_t produce_size, struct vmci_queue **consume_q, uint64_t consume_size,
+ vmci_id peer, uint32_t flags, vmci_privilege_flags priv_flags)
+{
+
+ if (!handle || !produce_q || !consume_q ||
+ (!produce_size && !consume_size) || (flags & ~VMCI_QP_ALL_FLAGS))
+ return (VMCI_ERROR_INVALID_ARGS);
+
+ return (vmci_queue_pair_alloc_guest_work(handle, produce_q,
+ produce_size, consume_q, consume_size, peer, flags, priv_flags));
+}
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * vmci_queue_pair_detach --
+ *
+ * Detaches from a VMCI QueuePair. Only checks validity of input argument.
+ * Real work is done in the host or guest specific function.
+ *
+ * Results:
+ * Success or failure.
+ *
+ * Side effects:
+ * Memory is freed.
+ *
+ *------------------------------------------------------------------------------
+ */
+
+int
+vmci_queue_pair_detach(struct vmci_handle handle)
+{
+
+ if (VMCI_HANDLE_INVALID(handle))
+ return (VMCI_ERROR_INVALID_ARGS);
+
+ return (vmci_queue_pair_detach_guest_work(handle));
+}
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * queue_pair_list_init --
+ *
+ * Initializes the list of QueuePairs.
+ *
+ * Results:
+ * Success or failure.
+ *
+ * Side effects:
+ * None.
+ *
+ *------------------------------------------------------------------------------
+ */
+
+static inline int
+queue_pair_list_init(struct queue_pair_list *qp_list)
+{
+ int ret;
+
+ vmci_list_init(&qp_list->head);
+ atomic_store_int(&qp_list->hibernate, 0);
+ ret = vmci_mutex_init(&qp_list->mutex, "VMCI QP List lock");
+ return (ret);
+}
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * queue_pair_list_destroy --
+ *
+ * Destroy the list's mutex.
+ *
+ * Results:
+ * None.
+ *
+ * Side effects:
+ * None.
+ *
+ *------------------------------------------------------------------------------
+ */
+
+static inline void
+queue_pair_list_destroy(struct queue_pair_list *qp_list)
+{
+
+ vmci_mutex_destroy(&qp_list->mutex);
+ vmci_list_init(&qp_list->head);
+}
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * queue_pair_list_find_entry --
+ *
+ * Finds the entry in the list corresponding to a given handle. Assumes that
+ * the list is locked.
+ *
+ * Results:
+ * Pointer to entry.
+ *
+ * Side effects:
+ * None.
+ *
+ *------------------------------------------------------------------------------
+ */
+
+static struct queue_pair_entry *
+queue_pair_list_find_entry(struct queue_pair_list *qp_list,
+ struct vmci_handle handle)
+{
+ struct queue_pair_entry *next;
+
+ if (VMCI_HANDLE_INVALID(handle))
+ return (NULL);
+
+ vmci_list_scan(next, &qp_list->head, list_item) {
+ if (VMCI_HANDLE_EQUAL(next->handle, handle))
+ return (next);
+ }
+
+ return (NULL);
+}
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * queue_pair_list_add_entry --
+ *
+ * Adds the given entry to the list. Assumes that the list is locked.
+ *
+ * Results:
+ * None.
+ *
+ * Side effects:
+ * None.
+ *
+ *------------------------------------------------------------------------------
+ */
+
+static void
+queue_pair_list_add_entry(struct queue_pair_list *qp_list,
+ struct queue_pair_entry *entry)
+{
+
+ if (entry)
+ vmci_list_insert(&qp_list->head, entry, list_item);
+}
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * queue_pair_list_remove_entry --
+ *
+ * Removes the given entry from the list. Assumes that the list is locked.
+ *
+ * Results:
+ * None.
+ *
+ * Side effects:
+ * None.
+ *
+ *------------------------------------------------------------------------------
+ */
+
+static void
+queue_pair_list_remove_entry(struct queue_pair_list *qp_list,
+ struct queue_pair_entry *entry)
+{
+
+ if (entry)
+ vmci_list_remove(entry, list_item);
+}
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * queue_pair_list_get_head --
+ *
+ * Returns the entry from the head of the list. Assumes that the list is
+ * locked.
+ *
+ * Results:
+ * Pointer to entry.
+ *
+ * Side effects:
+ * None.
+ *
+ *------------------------------------------------------------------------------
+ */
+
+static struct queue_pair_entry *
+queue_pair_list_get_head(struct queue_pair_list *qp_list)
+{
+
+ return (vmci_list_first(&qp_list->head));
+}
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * vmci_qp_guest_endpoints_init --
+ *
+ * Initalizes data structure state keeping track of queue pair guest
+ * endpoints.
+ *
+ * Results:
+ * VMCI_SUCCESS on success and appropriate failure code otherwise.
+ *
+ * Side effects:
+ * None.
+ *
+ *------------------------------------------------------------------------------
+ */
+
+int
+vmci_qp_guest_endpoints_init(void)
+{
+
+ return (queue_pair_list_init(&qp_guest_endpoints));
+}
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * vmci_qp_guest_endpoints_exit --
+ *
+ * Destroys all guest queue pair endpoints. If active guest queue pairs
+ * still exist, hypercalls to attempt detach from these queue pairs will be
+ * made. Any failure to detach is silently ignored.
+ *
+ * Results:
+ * None.
+ *
+ * Side effects:
+ * None.
+ *
+ *------------------------------------------------------------------------------
+ */
+
+void
+vmci_qp_guest_endpoints_exit(void)
+{
+ struct qp_guest_endpoint *entry;
+
+ vmci_mutex_acquire(&qp_guest_endpoints.mutex);
+
+ while ((entry =
+ (struct qp_guest_endpoint *)queue_pair_list_get_head(
+ &qp_guest_endpoints)) != NULL) {
+ /*
+ * Don't make a hypercall for local QueuePairs.
+ */
+ if (!(entry->qp.flags & VMCI_QPFLAG_LOCAL))
+ vmci_queue_pair_detach_hypercall(entry->qp.handle);
+ /*
+ * We cannot fail the exit, so let's reset ref_count.
+ */
+ entry->qp.ref_count = 0;
+ queue_pair_list_remove_entry(&qp_guest_endpoints, &entry->qp);
+ qp_guest_endpoint_destroy(entry);
+ }
+
+ atomic_store_int(&qp_guest_endpoints.hibernate, 0);
+ vmci_mutex_release(&qp_guest_endpoints.mutex);
+ queue_pair_list_destroy(&qp_guest_endpoints);
+}
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * vmci_qp_guest_endpoints_sync --
+ *
+ * Use this as a synchronization point when setting globals, for example,
+ * during device shutdown.
+ *
+ * Results:
+ * true.
+ *
+ * Side effects:
+ * None.
+ *
+ *------------------------------------------------------------------------------
+ */
+
+void
+vmci_qp_guest_endpoints_sync(void)
+{
+
+ vmci_mutex_acquire(&qp_guest_endpoints.mutex);
+ vmci_mutex_release(&qp_guest_endpoints.mutex);
+}
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * qp_guest_endpoint_create --
+ *
+ * Allocates and initializes a qp_guest_endpoint structure. Allocates a
+ * QueuePair rid (and handle) iff the given entry has an invalid handle.
+ * 0 through VMCI_RESERVED_RESOURCE_ID_MAX are reserved handles. Assumes
+ * that the QP list mutex is held by the caller.
+ *
+ * Results:
+ * Pointer to structure intialized.
+ *
+ * Side effects:
+ * None.
+ *
+ *------------------------------------------------------------------------------
+ */
+
+struct qp_guest_endpoint *
+qp_guest_endpoint_create(struct vmci_handle handle, vmci_id peer,
+ uint32_t flags, uint64_t produce_size, uint64_t consume_size,
+ void *produce_q, void *consume_q)
+{
+ struct qp_guest_endpoint *entry;
+ static vmci_id queue_pair_rid;
+ const uint64_t num_ppns = CEILING(produce_size, PAGE_SIZE) +
+ CEILING(consume_size, PAGE_SIZE) +
+ 2; /* One page each for the queue headers. */
+
+ queue_pair_rid = VMCI_RESERVED_RESOURCE_ID_MAX + 1;
+
+ ASSERT((produce_size || consume_size) && produce_q && consume_q);
+
+ if (VMCI_HANDLE_INVALID(handle)) {
+ vmci_id context_id = vmci_get_context_id();
+ vmci_id old_rid = queue_pair_rid;
+
+ /*
+ * Generate a unique QueuePair rid. Keep on trying until we
+ * wrap around in the RID space.
+ */
+ ASSERT(old_rid > VMCI_RESERVED_RESOURCE_ID_MAX);
+ do {
+ handle = VMCI_MAKE_HANDLE(context_id, queue_pair_rid);
+ entry =
+ (struct qp_guest_endpoint *)
+ queue_pair_list_find_entry(&qp_guest_endpoints,
+ handle);
+ queue_pair_rid++;
+ if (UNLIKELY(!queue_pair_rid)) {
+ /*
+ * Skip the reserved rids.
+ */
+ queue_pair_rid =
+ VMCI_RESERVED_RESOURCE_ID_MAX + 1;
+ }
+ } while (entry && queue_pair_rid != old_rid);
+
+ if (UNLIKELY(entry != NULL)) {
+ ASSERT(queue_pair_rid == old_rid);
+ /*
+ * We wrapped around --- no rids were free.
+ */
+ return (NULL);
+ }
+ }
+
+ ASSERT(!VMCI_HANDLE_INVALID(handle) &&
+ queue_pair_list_find_entry(&qp_guest_endpoints, handle) == NULL);
+ entry = vmci_alloc_kernel_mem(sizeof(*entry), VMCI_MEMORY_NORMAL);
+ if (entry) {
+ entry->qp.handle = handle;
+ entry->qp.peer = peer;
+ entry->qp.flags = flags;
+ entry->qp.produce_size = produce_size;
+ entry->qp.consume_size = consume_size;
+ entry->qp.ref_count = 0;
+ entry->num_ppns = num_ppns;
+ memset(&entry->ppn_set, 0, sizeof(entry->ppn_set));
+ entry->produce_q = produce_q;
+ entry->consume_q = consume_q;
+ }
+ return (entry);
+}
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * qp_guest_endpoint_destroy --
+ *
+ * Frees a qp_guest_endpoint structure.
+ *
+ * Results:
+ * None.
+ *
+ * Side effects:
+ * None.
+ *
+ *------------------------------------------------------------------------------
+ */
+
+void
+qp_guest_endpoint_destroy(struct qp_guest_endpoint *entry)
+{
+
+ ASSERT(entry);
+ ASSERT(entry->qp.ref_count == 0);
+
+ vmci_free_ppn_set(&entry->ppn_set);
+ vmci_free_queue(entry->produce_q, entry->qp.produce_size);
+ vmci_free_queue(entry->consume_q, entry->qp.consume_size);
+ vmci_free_kernel_mem(entry, sizeof(*entry));
+}
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * vmci_queue_pair_alloc_hypercall --
+ *
+ * Helper to make a QueuePairAlloc hypercall when the driver is
+ * supporting a guest device.
+ *
+ * Results:
+ * Result of the hypercall.
+ *
+ * Side effects:
+ * Memory is allocated & freed.
+ *
+ *------------------------------------------------------------------------------
+ */
+static int
+vmci_queue_pair_alloc_hypercall(const struct qp_guest_endpoint *entry)
+{
+ struct vmci_queue_pair_alloc_msg *alloc_msg;
+ size_t msg_size;
+ int result;
+
+ if (!entry || entry->num_ppns <= 2)
+ return (VMCI_ERROR_INVALID_ARGS);
+
+ ASSERT(!(entry->qp.flags & VMCI_QPFLAG_LOCAL));
+
+ msg_size = sizeof(*alloc_msg) + (size_t)entry->num_ppns * sizeof(PPN);
+ alloc_msg = vmci_alloc_kernel_mem(msg_size, VMCI_MEMORY_NORMAL);
+ if (!alloc_msg)
+ return (VMCI_ERROR_NO_MEM);
+
+ alloc_msg->hdr.dst = VMCI_MAKE_HANDLE(VMCI_HYPERVISOR_CONTEXT_ID,
+ VMCI_QUEUEPAIR_ALLOC);
+ alloc_msg->hdr.src = VMCI_ANON_SRC_HANDLE;
+ alloc_msg->hdr.payload_size = msg_size - VMCI_DG_HEADERSIZE;
+ alloc_msg->handle = entry->qp.handle;
+ alloc_msg->peer = entry->qp.peer;
+ alloc_msg->flags = entry->qp.flags;
+ alloc_msg->produce_size = entry->qp.produce_size;
+ alloc_msg->consume_size = entry->qp.consume_size;
+ alloc_msg->num_ppns = entry->num_ppns;
+ result = vmci_populate_ppn_list((uint8_t *)alloc_msg +
+ sizeof(*alloc_msg), &entry->ppn_set);
+ if (result == VMCI_SUCCESS)
+ result = vmci_send_datagram((struct vmci_datagram *)alloc_msg);
+ vmci_free_kernel_mem(alloc_msg, msg_size);
+
+ return (result);
+}
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * vmci_queue_pair_alloc_guest_work --
+ *
+ * This functions handles the actual allocation of a VMCI queue pair guest
+ * endpoint. Allocates physical pages for the queue pair. It makes OS
+ * dependent calls through generic wrappers.
+ *
+ * Results:
+ * Success or failure.
+ *
+ * Side effects:
+ * Memory is allocated.
+ *
+ *------------------------------------------------------------------------------
+ */
+
+static int
+vmci_queue_pair_alloc_guest_work(struct vmci_handle *handle,
+ struct vmci_queue **produce_q, uint64_t produce_size,
+ struct vmci_queue **consume_q, uint64_t consume_size, vmci_id peer,
+ uint32_t flags, vmci_privilege_flags priv_flags)
+{
+ struct qp_guest_endpoint *queue_pair_entry = NULL;
+ void *my_consume_q = NULL;
+ void *my_produce_q = NULL;
+ const uint64_t num_consume_pages = CEILING(consume_size, PAGE_SIZE) + 1;
+ const uint64_t num_produce_pages = CEILING(produce_size, PAGE_SIZE) + 1;
+ int result;
+
+ ASSERT(handle && produce_q && consume_q &&
+ (produce_size || consume_size));
+
+ if (priv_flags != VMCI_NO_PRIVILEGE_FLAGS)
+ return (VMCI_ERROR_NO_ACCESS);
+
+ vmci_mutex_acquire(&qp_guest_endpoints.mutex);
+
+ if ((atomic_load_int(&qp_guest_endpoints.hibernate) == 1) &&
+ !(flags & VMCI_QPFLAG_LOCAL)) {
+ /*
+ * While guest OS is in hibernate state, creating non-local
+ * queue pairs is not allowed after the point where the VMCI
+ * guest driver converted the existing queue pairs to local
+ * ones.
+ */
+
+ result = VMCI_ERROR_UNAVAILABLE;
+ goto error;
+ }
+
+ if ((queue_pair_entry =
+ (struct qp_guest_endpoint *)queue_pair_list_find_entry(
+ &qp_guest_endpoints, *handle)) != NULL) {
+ if (queue_pair_entry->qp.flags & VMCI_QPFLAG_LOCAL) {
+ /* Local attach case. */
+ if (queue_pair_entry->qp.ref_count > 1) {
+ VMCI_LOG_DEBUG(LGPFX"Error attempting to "
+ "attach more than once.\n");
+ result = VMCI_ERROR_UNAVAILABLE;
+ goto error_keep_entry;
+ }
+
+ if (queue_pair_entry->qp.produce_size != consume_size ||
+ queue_pair_entry->qp.consume_size != produce_size ||
+ queue_pair_entry->qp.flags !=
+ (flags & ~VMCI_QPFLAG_ATTACH_ONLY)) {
+ VMCI_LOG_DEBUG(LGPFX"Error mismatched "
+ "queue pair in local attach.\n");
+ result = VMCI_ERROR_QUEUEPAIR_MISMATCH;
+ goto error_keep_entry;
+ }
+
+ /*
+ * Do a local attach. We swap the consume and produce
+ * queues for the attacher and deliver an attach event.
+ */
+ result = queue_pair_notify_peer_local(true, *handle);
+ if (result < VMCI_SUCCESS)
+ goto error_keep_entry;
+ my_produce_q = queue_pair_entry->consume_q;
+ my_consume_q = queue_pair_entry->produce_q;
+ goto out;
+ }
+ result = VMCI_ERROR_ALREADY_EXISTS;
+ goto error_keep_entry;
+ }
+
+ my_produce_q = vmci_alloc_queue(produce_size, flags);
+ if (!my_produce_q) {
+ VMCI_LOG_WARNING(LGPFX"Error allocating pages for produce "
+ "queue.\n");
+ result = VMCI_ERROR_NO_MEM;
+ goto error;
+ }
+
+ my_consume_q = vmci_alloc_queue(consume_size, flags);
+ if (!my_consume_q) {
+ VMCI_LOG_WARNING(LGPFX"Error allocating pages for consume "
+ "queue.\n");
+ result = VMCI_ERROR_NO_MEM;
+ goto error;
+ }
+
+ queue_pair_entry = qp_guest_endpoint_create(*handle, peer, flags,
+ produce_size, consume_size, my_produce_q, my_consume_q);
+ if (!queue_pair_entry) {
+ VMCI_LOG_WARNING(LGPFX"Error allocating memory in %s.\n",
+ __FUNCTION__);
+ result = VMCI_ERROR_NO_MEM;
+ goto error;
+ }
+
+ result = vmci_alloc_ppn_set(my_produce_q, num_produce_pages,
+ my_consume_q, num_consume_pages, &queue_pair_entry->ppn_set);
+ if (result < VMCI_SUCCESS) {
+ VMCI_LOG_WARNING(LGPFX"vmci_alloc_ppn_set failed.\n");
+ goto error;
+ }
+
+ /*
+ * It's only necessary to notify the host if this queue pair will be
+ * attached to from another context.
+ */
+ if (queue_pair_entry->qp.flags & VMCI_QPFLAG_LOCAL) {
+ /* Local create case. */
+ vmci_id context_id = vmci_get_context_id();
+
+ /*
+ * Enforce similar checks on local queue pairs as we do for
+ * regular ones. The handle's context must match the creator
+ * or attacher context id (here they are both the current
+ * context id) and the attach-only flag cannot exist during
+ * create. We also ensure specified peer is this context or
+ * an invalid one.
+ */
+ if (queue_pair_entry->qp.handle.context != context_id ||
+ (queue_pair_entry->qp.peer != VMCI_INVALID_ID &&
+ queue_pair_entry->qp.peer != context_id)) {
+ result = VMCI_ERROR_NO_ACCESS;
+ goto error;
+ }
+
+ if (queue_pair_entry->qp.flags & VMCI_QPFLAG_ATTACH_ONLY) {
+ result = VMCI_ERROR_NOT_FOUND;
+ goto error;
+ }
+ } else {
+ result = vmci_queue_pair_alloc_hypercall(queue_pair_entry);
+ if (result < VMCI_SUCCESS) {
+ VMCI_LOG_WARNING(
+ LGPFX"vmci_queue_pair_alloc_hypercall result = "
+ "%d.\n", result);
+ goto error;
+ }
+ }
+
+ queue_pair_list_add_entry(&qp_guest_endpoints, &queue_pair_entry->qp);
+
+out:
+ queue_pair_entry->qp.ref_count++;
+ *handle = queue_pair_entry->qp.handle;
+ *produce_q = (struct vmci_queue *)my_produce_q;
+ *consume_q = (struct vmci_queue *)my_consume_q;
+
+ /*
+ * We should initialize the queue pair header pages on a local queue
+ * pair create. For non-local queue pairs, the hypervisor initializes
+ * the header pages in the create step.
+ */
+ if ((queue_pair_entry->qp.flags & VMCI_QPFLAG_LOCAL) &&
+ queue_pair_entry->qp.ref_count == 1) {
+ vmci_queue_header_init((*produce_q)->q_header, *handle);
+ vmci_queue_header_init((*consume_q)->q_header, *handle);
+ }
+
+ vmci_mutex_release(&qp_guest_endpoints.mutex);
+
+ return (VMCI_SUCCESS);
+
+error:
+ vmci_mutex_release(&qp_guest_endpoints.mutex);
+ if (queue_pair_entry) {
+ /* The queues will be freed inside the destroy routine. */
+ qp_guest_endpoint_destroy(queue_pair_entry);
+ } else {
+ if (my_produce_q)
+ vmci_free_queue(my_produce_q, produce_size);
+ if (my_consume_q)
+ vmci_free_queue(my_consume_q, consume_size);
+ }
+ return (result);
+
+error_keep_entry:
+ /* This path should only be used when an existing entry was found. */
+ ASSERT(queue_pair_entry->qp.ref_count > 0);
+ vmci_mutex_release(&qp_guest_endpoints.mutex);
+ return (result);
+}
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * vmci_queue_pair_detach_hypercall --
+ *
+ * Helper to make a QueuePairDetach hypercall when the driver is supporting
+ * a guest device.
+ *
+ * Results:
+ * Result of the hypercall.
+ *
+ * Side effects:
+ * None.
+ *
+ *------------------------------------------------------------------------------
+ */
+
+int
+vmci_queue_pair_detach_hypercall(struct vmci_handle handle)
+{
+ struct vmci_queue_pair_detach_msg detach_msg;
+
+ detach_msg.hdr.dst = VMCI_MAKE_HANDLE(VMCI_HYPERVISOR_CONTEXT_ID,
+ VMCI_QUEUEPAIR_DETACH);
+ detach_msg.hdr.src = VMCI_ANON_SRC_HANDLE;
+ detach_msg.hdr.payload_size = sizeof(handle);
+ detach_msg.handle = handle;
+
+ return (vmci_send_datagram((struct vmci_datagram *)&detach_msg));
+}
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * vmci_queue_pair_detach_guest_work --
+ *
+ * Helper for VMCI QueuePair detach interface. Frees the physical pages for
+ * the queue pair.
+ *
+ * Results:
+ * Success or failure.
+ *
+ * Side effects:
+ * Memory may be freed.
+ *
+ *------------------------------------------------------------------------------
+ */
+
+static int
+vmci_queue_pair_detach_guest_work(struct vmci_handle handle)
+{
+ struct qp_guest_endpoint *entry;
+ int result;
+ uint32_t ref_count;
+
+ ASSERT(!VMCI_HANDLE_INVALID(handle));
+
+ vmci_mutex_acquire(&qp_guest_endpoints.mutex);
+
+ entry = (struct qp_guest_endpoint *)queue_pair_list_find_entry(
+ &qp_guest_endpoints, handle);
+ if (!entry) {
+ vmci_mutex_release(&qp_guest_endpoints.mutex);
+ return (VMCI_ERROR_NOT_FOUND);
+ }
+
+ ASSERT(entry->qp.ref_count >= 1);
+
+ if (entry->qp.flags & VMCI_QPFLAG_LOCAL) {
+ result = VMCI_SUCCESS;
+
+ if (entry->qp.ref_count > 1) {
+ result = queue_pair_notify_peer_local(false, handle);
+
+ /*
+ * We can fail to notify a local queuepair because we
+ * can't allocate. We still want to release the entry
+ * if that happens, so don't bail out yet.
+ */
+ }
+ } else {
+ result = vmci_queue_pair_detach_hypercall(handle);
+ if (entry->hibernate_failure) {
+ if (result == VMCI_ERROR_NOT_FOUND) {
+
+ /*
+ * If a queue pair detach failed when entering
+ * hibernation, the guest driver and the device
+ * may disagree on its existence when coming
+ * out of hibernation. The guest driver will
+ * regard it as a non-local queue pair, but
+ * the device state is gone, since the device
+ * has been powered off. In this case, we
+ * treat the queue pair as a local queue pair
+ * with no peer.
+ */
+
+ ASSERT(entry->qp.ref_count == 1);
+ result = VMCI_SUCCESS;
+ }
+ }
+ if (result < VMCI_SUCCESS) {
+
+ /*
+ * We failed to notify a non-local queuepair. That other
+ * queuepair might still be accessing the shared
+ * memory, so don't release the entry yet. It will get
+ * cleaned up by vmci_queue_pair_Exit() if necessary
+ * (assuming we are going away, otherwise why did this
+ * fail?).
+ */
+
+ vmci_mutex_release(&qp_guest_endpoints.mutex);
+ return (result);
+ }
+ }
+
+ /*
+ * If we get here then we either failed to notify a local queuepair, or
+ * we succeeded in all cases. Release the entry if required.
+ */
+
+ entry->qp.ref_count--;
+ if (entry->qp.ref_count == 0)
+ queue_pair_list_remove_entry(&qp_guest_endpoints, &entry->qp);
+
+ /* If we didn't remove the entry, this could change once we unlock. */
+ ref_count = entry ? entry->qp.ref_count :
+ 0xffffffff; /*
+ * Value does not matter, silence the
+ * compiler.
+ */
+
+ vmci_mutex_release(&qp_guest_endpoints.mutex);
+
+ if (ref_count == 0)
+ qp_guest_endpoint_destroy(entry);
+ return (result);
+}
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * queue_pair_notify_peer_local --
+ *
+ * Dispatches a queue pair event message directly into the local event
+ * queue.
+ *
+ * Results:
+ * VMCI_SUCCESS on success, error code otherwise
+ *
+ * Side effects:
+ * None.
+ *
+ *------------------------------------------------------------------------------
+ */
+
+static int
+queue_pair_notify_peer_local(bool attach, struct vmci_handle handle)
+{
+ struct vmci_event_msg *e_msg;
+ struct vmci_event_payload_qp *e_payload;
+ /* buf is only 48 bytes. */
+ vmci_id context_id;
+ context_id = vmci_get_context_id();
+ char buf[sizeof(*e_msg) + sizeof(*e_payload)];
+
+ e_msg = (struct vmci_event_msg *)buf;
+ e_payload = vmci_event_msg_payload(e_msg);
+
+ e_msg->hdr.dst = VMCI_MAKE_HANDLE(context_id, VMCI_EVENT_HANDLER);
+ e_msg->hdr.src = VMCI_MAKE_HANDLE(VMCI_HYPERVISOR_CONTEXT_ID,
+ VMCI_CONTEXT_RESOURCE_ID);
+ e_msg->hdr.payload_size = sizeof(*e_msg) + sizeof(*e_payload) -
+ sizeof(e_msg->hdr);
+ e_msg->event_data.event = attach ? VMCI_EVENT_QP_PEER_ATTACH :
+ VMCI_EVENT_QP_PEER_DETACH;
+ e_payload->peer_id = context_id;
+ e_payload->handle = handle;
+
+ return (vmci_event_dispatch((struct vmci_datagram *)e_msg));
+}
Index: sys/dev/vmware/vmci/vmci_resource.h
===================================================================
--- /dev/null
+++ sys/dev/vmware/vmci/vmci_resource.h
@@ -0,0 +1,56 @@
+/*-
+ * Copyright (c) 2018 VMware, Inc. All Rights Reserved.
+ *
+ * SPDX-License-Identifier: (BSD-2-Clause AND GPL-2.0)
+ */
+
+/* VMCI Resource Access Control API. */
+
+#ifndef _VMCI_RESOURCE_H_
+#define _VMCI_RESOURCE_H_
+
+#include "vmci_defs.h"
+#include "vmci_hashtable.h"
+#include "vmci_kernel_if.h"
+
+#define RESOURCE_CONTAINER(ptr, type, member) \
+ ((type *)((char *)(ptr) - offsetof(type, member)))
+
+typedef void(*vmci_resource_free_cb)(void *resource);
+
+typedef enum {
+ VMCI_RESOURCE_TYPE_ANY,
+ VMCI_RESOURCE_TYPE_API,
+ VMCI_RESOURCE_TYPE_GROUP,
+ VMCI_RESOURCE_TYPE_DATAGRAM,
+ VMCI_RESOURCE_TYPE_DOORBELL,
+} vmci_resource_type;
+
+struct vmci_resource {
+ struct vmci_hash_entry hash_entry;
+ vmci_resource_type type;
+ /* Callback to free container object when refCount is 0. */
+ vmci_resource_free_cb container_free_cb;
+ /* Container object reference. */
+ void *container_object;
+};
+
+int vmci_resource_init(void);
+void vmci_resource_exit(void);
+void vmci_resource_sync(void);
+
+vmci_id vmci_resource_get_id(vmci_id context_id);
+
+int vmci_resource_add(struct vmci_resource *resource,
+ vmci_resource_type resource_type,
+ struct vmci_handle resource_handle,
+ vmci_resource_free_cb container_free_cb, void *container_object);
+void vmci_resource_remove(struct vmci_handle resource_handle,
+ vmci_resource_type resource_type);
+struct vmci_resource *vmci_resource_get(struct vmci_handle resource_handle,
+ vmci_resource_type resource_type);
+void vmci_resource_hold(struct vmci_resource *resource);
+int vmci_resource_release(struct vmci_resource *resource);
+struct vmci_handle vmci_resource_handle(struct vmci_resource *resource);
+
+#endif /* !_VMCI_RESOURCE_H_ */
Index: sys/dev/vmware/vmci/vmci_resource.c
===================================================================
--- /dev/null
+++ sys/dev/vmware/vmci/vmci_resource.c
@@ -0,0 +1,395 @@
+/*-
+ * Copyright (c) 2018 VMware, Inc. All Rights Reserved.
+ *
+ * SPDX-License-Identifier: (BSD-2-Clause AND GPL-2.0)
+ */
+
+/* Implementation of the VMCI Resource Access Control API. */
+
+#include "vmci_driver.h"
+#include "vmci_kernel_defs.h"
+#include "vmci_resource.h"
+
+#define LGPFX "vmci_resource: "
+
+/* 0 through VMCI_RESERVED_RESOURCE_ID_MAX are reserved. */
+static uint32_t resource_id = VMCI_RESERVED_RESOURCE_ID_MAX + 1;
+static vmci_lock resource_id_lock;
+
+static void vmci_resource_do_remove(struct vmci_resource *resource);
+
+static struct vmci_hashtable *resource_table = NULL;
+
+/* Public Resource Access Control API. */
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * vmci_resource_init --
+ *
+ * Initializes the VMCI Resource Access Control API. Creates a hashtable to
+ * hold all resources, and registers vectors and callbacks for hypercalls.
+ *
+ * Results:
+ * None.
+ *
+ * Side effects:
+ * None.
+ *
+ *------------------------------------------------------------------------------
+ */
+
+int
+vmci_resource_init(void)
+{
+ int err;
+
+ err = vmci_init_lock(&resource_id_lock, "VMCI RID lock");
+ if (err < VMCI_SUCCESS)
+ return (err);
+
+ resource_table = vmci_hashtable_create(128);
+ if (resource_table == NULL) {
+ VMCI_LOG_WARNING((LGPFX"Failed creating a resource hash table "
+ "for VMCI.\n"));
+ vmci_cleanup_lock(&resource_id_lock);
+ return (VMCI_ERROR_NO_MEM);
+ }
+
+ return (VMCI_SUCCESS);
+}
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * vmci_resource_exit --
+ *
+ * Cleans up resources.
+ *
+ * Results:
+ * None.
+ *
+ * Side effects:
+ * None.
+ *
+ *------------------------------------------------------------------------------
+ */
+
+void
+vmci_resource_exit(void)
+{
+
+ /* Cleanup resources.*/
+ vmci_cleanup_lock(&resource_id_lock);
+
+ if (resource_table)
+ vmci_hashtable_destroy(resource_table);
+}
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * vmci_resource_get_id --
+ *
+ * Return resource ID. The first VMCI_RESERVED_RESOURCE_ID_MAX are reserved
+ * so we start from its value + 1.
+ *
+ * Result:
+ * VMCI resource id on success, VMCI_INVALID_ID on failure.
+ *
+ * Side effects:
+ * None.
+ *
+ *
+ *------------------------------------------------------------------------------
+ */
+
+vmci_id
+vmci_resource_get_id(vmci_id context_id)
+{
+ vmci_id current_rid;
+ vmci_id old_rid;
+ bool found_rid;
+
+ old_rid = resource_id;
+ found_rid = false;
+
+ /*
+ * Generate a unique resource ID. Keep on trying until we wrap around
+ * in the RID space.
+ */
+ ASSERT(old_rid > VMCI_RESERVED_RESOURCE_ID_MAX);
+
+ do {
+ struct vmci_handle handle;
+
+ vmci_grab_lock(&resource_id_lock);
+ current_rid = resource_id;
+ handle = VMCI_MAKE_HANDLE(context_id, current_rid);
+ resource_id++;
+ if (UNLIKELY(resource_id == VMCI_INVALID_ID)) {
+ /* Skip the reserved rids. */
+ resource_id = VMCI_RESERVED_RESOURCE_ID_MAX + 1;
+ }
+ vmci_release_lock(&resource_id_lock);
+ found_rid = !vmci_hashtable_entry_exists(resource_table,
+ handle);
+ } while (!found_rid && resource_id != old_rid);
+
+ if (UNLIKELY(!found_rid))
+ return (VMCI_INVALID_ID);
+ else
+ return (current_rid);
+}
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * vmci_resource_add --
+ *
+ * Add resource to hashtable.
+ *
+ * Results:
+ * VMCI_SUCCESS if successful, error code if not.
+ *
+ * Side effects:
+ * None.
+ *
+ *------------------------------------------------------------------------------
+ */
+
+int
+vmci_resource_add(struct vmci_resource *resource,
+ vmci_resource_type resource_type, struct vmci_handle resource_handle,
+ vmci_resource_free_cb container_free_cb, void *container_object)
+{
+ int result;
+
+ ASSERT(resource);
+
+ if (VMCI_HANDLE_EQUAL(resource_handle, VMCI_INVALID_HANDLE)) {
+ VMCI_LOG_DEBUG(LGPFX"Invalid argument resource "
+ "(handle=0x%x:0x%x).\n", resource_handle.context,
+ resource_handle.resource);
+ return (VMCI_ERROR_INVALID_ARGS);
+ }
+
+ vmci_hashtable_init_entry(&resource->hash_entry, resource_handle);
+ resource->type = resource_type;
+ resource->container_free_cb = container_free_cb;
+ resource->container_object = container_object;
+
+ /* Add resource to hashtable. */
+ result = vmci_hashtable_add_entry(resource_table,
+ &resource->hash_entry);
+ if (result != VMCI_SUCCESS) {
+ VMCI_LOG_DEBUG(LGPFX"Failed to add entry to hash table "
+ "(result=%d).\n", result);
+ return (result);
+ }
+
+ return (result);
+}
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * vmci_resource_remove --
+ *
+ * Remove resource from hashtable.
+ *
+ * Results:
+ * None.
+ *
+ * Side effects:
+ * None.
+ *
+ *------------------------------------------------------------------------------
+ */
+
+void
+vmci_resource_remove(struct vmci_handle resource_handle,
+ vmci_resource_type resource_type)
+{
+ struct vmci_resource *resource;
+
+ resource = vmci_resource_get(resource_handle, resource_type);
+ if (resource == NULL)
+ return;
+
+ /* Remove resource from hashtable. */
+ vmci_hashtable_remove_entry(resource_table, &resource->hash_entry);
+
+ vmci_resource_release(resource);
+ /* resource could be freed by now. */
+}
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * vmci_resource_get --
+ *
+ * Get resource from hashtable.
+ *
+ * Results:
+ * Resource if successful. Otherwise NULL.
+ *
+ * Side effects:
+ * None.
+ *
+ *------------------------------------------------------------------------------
+ */
+
+struct vmci_resource *
+vmci_resource_get(struct vmci_handle resource_handle,
+ vmci_resource_type resource_type)
+{
+ struct vmci_hash_entry *entry;
+ struct vmci_resource *resource;
+
+ entry = vmci_hashtable_get_entry(resource_table, resource_handle);
+ if (entry == NULL)
+ return (NULL);
+ resource = RESOURCE_CONTAINER(entry, struct vmci_resource, hash_entry);
+ if (resource_type == VMCI_RESOURCE_TYPE_ANY ||
+ resource->type == resource_type) {
+ return (resource);
+ }
+ vmci_hashtable_release_entry(resource_table, entry);
+ return (NULL);
+}
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * vmci_resource_hold --
+ *
+ * Hold the given resource. This will hold the hashtable entry. This is like
+ * doing a Get() but without having to lookup the resource by handle.
+ *
+ * Results:
+ * None.
+ *
+ * Side effects:
+ * None.
+ *
+ *------------------------------------------------------------------------------
+ */
+
+void
+vmci_resource_hold(struct vmci_resource *resource)
+{
+
+ ASSERT(resource);
+ vmci_hashtable_hold_entry(resource_table, &resource->hash_entry);
+}
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * vmci_resource_do_remove --
+ *
+ * Deallocates data structures associated with the given resource and
+ * invoke any call back registered for the resource.
+ *
+ * Results:
+ * None.
+ *
+ * Side effects:
+ * May deallocate memory and invoke a callback for the removed resource.
+ *
+ *------------------------------------------------------------------------------
+ */
+
+static void inline
+vmci_resource_do_remove(struct vmci_resource *resource)
+{
+
+ ASSERT(resource);
+
+ if (resource->container_free_cb) {
+ resource->container_free_cb(resource->container_object);
+ /* Resource has been freed don't dereference it. */
+ }
+}
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * vmci_resource_release --
+ *
+ * Results:
+ * None.
+ *
+ * Side effects:
+ * Resource's containerFreeCB will get called if last reference.
+ *
+ *------------------------------------------------------------------------------
+ */
+
+int
+vmci_resource_release(struct vmci_resource *resource)
+{
+ int result;
+
+ ASSERT(resource);
+
+ result = vmci_hashtable_release_entry(resource_table,
+ &resource->hash_entry);
+ if (result == VMCI_SUCCESS_ENTRY_DEAD)
+ vmci_resource_do_remove(resource);
+
+ /*
+ * We propagate the information back to caller in case it wants to know
+ * whether entry was freed.
+ */
+ return (result);
+}
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * vmci_resource_handle --
+ *
+ * Get the handle for the given resource.
+ *
+ * Results:
+ * The resource's associated handle.
+ *
+ * Side effects:
+ * None.
+ *
+ *------------------------------------------------------------------------------
+ */
+
+struct vmci_handle
+vmci_resource_handle(struct vmci_resource *resource)
+{
+
+ ASSERT(resource);
+ return (resource->hash_entry.handle);
+}
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * vmci_resource_sync --
+ *
+ * Use this as a synchronization point when setting globals, for example,
+ * during device shutdown.
+ *
+ * Results:
+ * None.
+ *
+ * Side effects:
+ * None.
+ *
+ *------------------------------------------------------------------------------
+ */
+
+void
+vmci_resource_sync(void)
+{
+
+ vmci_hashtable_sync(resource_table);
+}
Index: sys/dev/vmware/vmci/vmci_utils.h
===================================================================
--- /dev/null
+++ sys/dev/vmware/vmci/vmci_utils.h
@@ -0,0 +1,41 @@
+/*-
+ * Copyright (c) 2018 VMware, Inc. All Rights Reserved.
+ *
+ * SPDX-License-Identifier: (BSD-2-Clause AND GPL-2.0)
+ */
+
+/* Some common utilities used by the VMCI kernel module. */
+
+#ifndef _VMCI_UTILS_H_
+#define _VMCI_UTILS_H_
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * vmci_hash_id --
+ *
+ * Hash function used by the Simple Datagram API. Hashes only a VMCI ID (not
+ * the full VMCI handle). Based on the djb2 hash function by Dan Bernstein.
+ *
+ * Result:
+ * Returns guest call size.
+ *
+ * Side effects:
+ * None.
+ *
+ *------------------------------------------------------------------------------
+ */
+
+static inline int
+vmci_hash_id(vmci_id id, unsigned size)
+{
+ unsigned i;
+ int hash = 5381;
+
+ for (i = 0; i < sizeof(id); i++)
+ hash = ((hash << 5) + hash) + (uint8_t)(id >> (i * 8));
+
+ return (hash & (size - 1));
+}
+
+#endif /* !_VMCI_UTILS_H_ */
Index: sys/modules/vmware/Makefile
===================================================================
--- sys/modules/vmware/Makefile
+++ sys/modules/vmware/Makefile
@@ -23,6 +23,6 @@
# SUCH DAMAGE.
#
-SUBDIR= vmxnet3
+SUBDIR= vmci vmxnet3
.include <bsd.subdir.mk>
Index: sys/modules/vmware/vmci/Makefile
===================================================================
--- /dev/null
+++ sys/modules/vmware/vmci/Makefile
@@ -0,0 +1,15 @@
+#
+# Copyright (c) 2018 VMware, Inc. All Rights Reserved.
+#
+# SPDX-License-Identifier: (BSD-2-Clause AND GPL-2.0)
+#
+
+.PATH: ${.CURDIR}/../../../dev/vmware/vmci
+
+KMOD= vmci
+SRCS= vmci.c vmci_datagram.c vmci_doorbell.c vmci_driver.c vmci_event.c
+SRCS+= vmci_hashtable.c vmci_kernel_if.c vmci_qpair.c vmci_queue_pair.c
+SRCS+= vmci_resource.c
+SRCS+= device_if.h bus_if.h pci_if.h
+
+.include <bsd.kmod.mk>

File Metadata

Mime Type
text/plain
Expires
Fri, Feb 7, 9:54 PM (17 h, 51 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
16517142
Default Alt Text
D14289.id39118.diff (253 KB)

Event Timeline