D14289.id39118.diff
No OneTemporary
Actions

Size

253 KB

Referenced Files

None

Subscribers

None

D14289.id39118.diff
View Options

	Index: share/man/man4/vmci.4
	===================================================================
	--- /dev/null
	+++ share/man/man4/vmci.4
	@@ -0,0 +1,49 @@
	+.Dd February 07, 2018
	+.Dt VMCI 4
	+.Os
	+.Sh NAME
	+.Nm vmci
	+.Nd VMware Virtual Machine Communication Interface
	+.Sh SYNOPSIS
	+To compile this driver into the kernel,
	+place the following line in your
	+kernel configuration file:
	+.Bd -ragged -offset indent
	+.Cd "device vmci"
	+.Ed
	+.Pp
	+Alternatively, to load the driver as a
	+module at boot time, place the following line in
	+.Xr loader.conf 5 :
	+.Bd -literal -offset indent
	+if_vmci_load="YES"
	+.Ed
	+.Sh DESCRIPTION
	+The
	+.Nm
	+driver provides support for the VMware Virtual Machine Communication Interface
	+(VMCI) in virtual machines by VMware.
	+
	+VMCI allows virtual machines to communicate with host kernel modules and the
	+VMware hypervisors. User level applications in a virtual machine can use VMCI
	+through vSockets (also known as VMCI Sockets and not included in this kernel
	+module), a socket address family designed to be compatible with UDP and TCP at
	+the interface level. Today, VMCI and vSockets are used by various VMware Tools
	+components inside the guest for zero-config, network-less access to VMware
	+host services. In addition to this, VMware's users are using vSockets for
	+various applications, where network access of the virtual machine is restricted
	+or non-existent. Examples of this are VMs communicating with device proxies for
	+proprietary hardware running as host applications and automated testing of
	+applications running within virtual machines.
	+
	+In a virtual machine, VMCI is exposed as a regular PCI device. The primary
	+communication mechanisms supported are a point-to-point bidirectional transport
	+based on a pair of memory-mapped queues, and asynchronous notifications in the
	+form of datagrams and doorbells. These features are available to kernel level
	+components such as vSockets through the VMCI kernel API. In addition to
	+this, the VMCI kernel API provides support for receiving events related to the
	+state of the VMCI communication channels, and the virtual machine itself.
	+
	+For additional information about the use of VMCI and in particular vSockets,
	+please refer to the vSocket Programming Guide available at
	+https://www.vmware.com/support/developer/vmci-sdk/.
	Index: sys/conf/files.amd64
	===================================================================
	--- sys/conf/files.amd64
	+++ sys/conf/files.amd64
	@@ -472,6 +472,16 @@
	dev/uart/uart_cpu_x86.c optional uart
	dev/viawd/viawd.c optional viawd
	dev/vmware/vmxnet3/if_vmx.c optional vmx
	+dev/vmware/vmci/vmci.c optional vmci
	+dev/vmware/vmci/vmci_datagram.c optional vmci
	+dev/vmware/vmci/vmci_doorbell.c optional vmci
	+dev/vmware/vmci/vmci_driver.c optional vmci
	+dev/vmware/vmci/vmci_event.c optional vmci
	+dev/vmware/vmci/vmci_hashtable.c optional vmci
	+dev/vmware/vmci/vmci_kernel_if.c optional vmci
	+dev/vmware/vmci/vmci_qpair.c optional vmci
	+dev/vmware/vmci/vmci_queue_pair.c optional vmci
	+dev/vmware/vmci/vmci_resource.c optional vmci
	dev/wbwd/wbwd.c optional wbwd
	dev/xen/pci/xen_acpi_pci.c optional xenhvm
	dev/xen/pci/xen_pci.c optional xenhvm
	Index: sys/conf/files.i386
	===================================================================
	--- sys/conf/files.i386
	+++ sys/conf/files.i386
	@@ -323,6 +323,16 @@
	dev/uart/uart_cpu_x86.c optional uart
	dev/viawd/viawd.c optional viawd
	dev/vmware/vmxnet3/if_vmx.c optional vmx
	+dev/vmware/vmci/vmci.c optional vmci
	+dev/vmware/vmci/vmci_datagram.c optional vmci
	+dev/vmware/vmci/vmci_doorbell.c optional vmci
	+dev/vmware/vmci/vmci_driver.c optional vmci
	+dev/vmware/vmci/vmci_event.c optional vmci
	+dev/vmware/vmci/vmci_hashtable.c optional vmci
	+dev/vmware/vmci/vmci_kernel_if.c optional vmci
	+dev/vmware/vmci/vmci_qpair.c optional vmci
	+dev/vmware/vmci/vmci_queue_pair.c optional vmci
	+dev/vmware/vmci/vmci_resource.c optional vmci
	dev/acpica/acpi_if.m standard
	dev/acpica/acpi_hpet.c optional acpi
	dev/acpica/acpi_timer.c optional acpi
	Index: sys/dev/vmware/vmci/vmci.h
	===================================================================
	--- /dev/null
	+++ sys/dev/vmware/vmci/vmci.h
	@@ -0,0 +1,77 @@
	+/*-
	+ * Copyright (c) 2018 VMware, Inc. All Rights Reserved.
	+ *
	+ * SPDX-License-Identifier: (BSD-2-Clause AND GPL-2.0)
	+ */
	+
	+/* Driver for VMware Virtual Machine Communication Interface (VMCI) device. */
	+
	+#ifndef _VMCI_H_
	+#define _VMCI_H_
	+
	+#include <sys/param.h>
	+#include <sys/lock.h>
	+#include <sys/mutex.h>
	+#include <sys/systm.h>
	+#include <sys/taskqueue.h>
	+
	+#include <machine/bus.h>
	+
	+#include "vmci_datagram.h"
	+#include "vmci_kernel_if.h"
	+
	+/* VMCI device vendor and device ID */
	+#define VMCI_VMWARE_VENDOR_ID 0x15AD
	+#define VMCI_VMWARE_DEVICE_ID 0x0740
	+
	+#define VMCI_VERSION 1
	+
	+struct vmci_dma_alloc {
	+ bus_dma_tag_t dma_tag;
	+ caddr_t dma_vaddr;
	+ bus_addr_t dma_paddr;
	+ bus_dmamap_t dma_map;
	+ bus_size_t dma_size;
	+};
	+
	+struct vmci_interrupt {
	+ struct resource *vmci_irq;
	+ int vmci_rid;
	+ void *vmci_handler;
	+};
	+
	+struct vmci_softc {
	+ device_t vmci_dev;
	+
	+ struct mtx vmci_spinlock;
	+
	+ struct resource *vmci_res0;
	+ bus_space_tag_t vmci_iot0;
	+ bus_space_handle_t vmci_ioh0;
	+ unsigned int vmci_ioaddr;
	+ struct resource *vmci_res1;
	+ bus_space_tag_t vmci_iot1;
	+ bus_space_handle_t vmci_ioh1;
	+
	+ struct vmci_dma_alloc vmci_notifications_bitmap;
	+
	+ int vmci_num_intr;
	+ vmci_intr_type vmci_intr_type;
	+ struct vmci_interrupt vmci_intrs[VMCI_MAX_INTRS];
	+ struct task vmci_interrupt_dq_task;
	+ struct task vmci_interrupt_bm_task;
	+
	+ struct task vmci_delayed_work_task;
	+ struct mtx vmci_delayed_work_lock;
	+ vmci_list(vmci_delayed_work_info) vmci_delayed_work_infos;
	+
	+ unsigned int capabilities;
	+};
	+
	+int vmci_dma_malloc(bus_size_t size, bus_size_t align,
	+ struct vmci_dma_alloc *dma);
	+void vmci_dma_free(struct vmci_dma_alloc *);
	+int vmci_send_datagram(struct vmci_datagram *dg);
	+int vmci_schedule_delayed_work_fn(vmci_work_fn work_fn, void data);
	+
	+#endif /* !_VMCI_H_ */
	Index: sys/dev/vmware/vmci/vmci.c
	===================================================================
	--- /dev/null
	+++ sys/dev/vmware/vmci/vmci.c
	@@ -0,0 +1,1174 @@
	+/*-
	+ * Copyright (c) 2018 VMware, Inc. All Rights Reserved.
	+ *
	+ * SPDX-License-Identifier: (BSD-2-Clause AND GPL-2.0)
	+ */
	+
	+/* Driver for VMware Virtual Machine Communication Interface (VMCI) device. */
	+
	+#include <sys/types.h>
	+#include <sys/bus.h>
	+#include <sys/kernel.h>
	+#include <sys/malloc.h>
	+#include <sys/module.h>
	+#include <sys/rman.h>
	+#include <sys/systm.h>
	+
	+#include <dev/pci/pcireg.h>
	+#include <dev/pci/pcivar.h>
	+
	+#include <machine/bus.h>
	+
	+#include "vmci.h"
	+#include "vmci_doorbell.h"
	+#include "vmci_driver.h"
	+#include "vmci_kernel_defs.h"
	+#include "vmci_queue_pair.h"
	+
	+static int vmci_probe(device_t);
	+static int vmci_attach(device_t);
	+static int vmci_detach(device_t);
	+static int vmci_shutdown(device_t);
	+
	+static int vmci_map_bars(struct vmci_softc *);
	+static void vmci_unmap_bars(struct vmci_softc *);
	+
	+static int vmci_config_capabilities(struct vmci_softc *);
	+
	+static int vmci_dma_malloc_int(struct vmci_softc *, bus_size_t,
	+ bus_size_t, struct vmci_dma_alloc *);
	+static void vmci_dma_free_int(struct vmci_softc *,
	+ struct vmci_dma_alloc *);
	+
	+static int vmci_config_interrupts(struct vmci_softc *);
	+static int vmci_config_interrupt(struct vmci_softc *);
	+static int vmci_check_intr_cnt(struct vmci_softc *);
	+static int vmci_allocate_interrupt_resources(struct vmci_softc *);
	+static int vmci_setup_interrupts(struct vmci_softc *);
	+static void vmci_dismantle_interrupts(struct vmci_softc *);
	+static void vmci_interrupt(void *);
	+static void vmci_interrupt_bm(void *);
	+static void dispatch_datagrams(void *, int);
	+static void process_bitmap(void *, int);
	+
	+static void vmci_delayed_work_fn_cb(void *context, int data);
	+
	+static device_method_t vmci_methods[] = {
	+ /* Device interface. */
	+ DEVMETHOD(device_probe, vmci_probe),
	+ DEVMETHOD(device_attach, vmci_attach),
	+ DEVMETHOD(device_detach, vmci_detach),
	+ DEVMETHOD(device_shutdown, vmci_shutdown),
	+
	+ DEVMETHOD_END
	+};
	+
	+static driver_t vmci_driver = {
	+ "vmci", vmci_methods, sizeof(struct vmci_softc)
	+};
	+
	+static devclass_t vmci_devclass;
	+DRIVER_MODULE(vmci, pci, vmci_driver, vmci_devclass, 0, 0);
	+MODULE_VERSION(vmci, VMCI_VERSION);
	+
	+MODULE_DEPEND(vmci, pci, 1, 1, 1);
	+
	+static struct vmci_softc *vmci_sc;
	+
	+#define LGPFX "vmci: "
	+/*
	+ * Allocate a buffer for incoming datagrams globally to avoid repeated
	+ * allocation in the interrupt handler's atomic context.
	+ */
	+static uint8_t *data_buffer = NULL;
	+static uint32_t data_buffer_size = VMCI_MAX_DG_SIZE;
	+
	+struct vmci_delayed_work_info {
	+ vmci_work_fn *work_fn;
	+ void *data;
	+ vmci_list_item(vmci_delayed_work_info) entry;
	+};
	+
	+/*
	+ *------------------------------------------------------------------------------
	+ *
	+ * vmci_probe --
	+ *
	+ * Probe to see if the VMCI device is present.
	+ *
	+ * Results:
	+ * BUS_PROBE_DEFAULT if device exists, ENXIO otherwise.
	+ *
	+ * Side effects:
	+ * None.
	+ *
	+ *------------------------------------------------------------------------------
	+ */
	+
	+static int
	+vmci_probe(device_t dev)
	+{
	+
	+ if (pci_get_vendor(dev) == VMCI_VMWARE_VENDOR_ID &&
	+ pci_get_device(dev) == VMCI_VMWARE_DEVICE_ID) {
	+ device_set_desc(dev,
	+ "VMware Virtual Machine Communication Interface");
	+
	+ return (BUS_PROBE_DEFAULT);
	+ }
	+
	+ return (ENXIO);
	+}
	+
	+/*
	+ *------------------------------------------------------------------------------
	+ *
	+ * vmci_attach --
	+ *
	+ * Attach VMCI device to the system after vmci_probe() has been called and
	+ * the device has been detected.
	+ *
	+ * Results:
	+ * 0 if success, ENXIO otherwise.
	+ *
	+ * Side effects:
	+ * None.
	+ *
	+ *------------------------------------------------------------------------------
	+ */
	+
	+static int
	+vmci_attach(device_t dev)
	+{
	+ struct vmci_softc *sc;
	+ int error, i;
	+
	+ sc = device_get_softc(dev);
	+ sc->vmci_dev = dev;
	+ vmci_sc = sc;
	+
	+ data_buffer = NULL;
	+ sc->vmci_num_intr = 0;
	+ for (i = 0; i < VMCI_MAX_INTRS; i++) {
	+ sc->vmci_intrs[i].vmci_irq = NULL;
	+ sc->vmci_intrs[i].vmci_handler = NULL;
	+ }
	+
	+ TASK_INIT(&sc->vmci_interrupt_dq_task, 0, dispatch_datagrams, sc);
	+ TASK_INIT(&sc->vmci_interrupt_bm_task, 0, process_bitmap, sc);
	+
	+ TASK_INIT(&sc->vmci_delayed_work_task, 0, vmci_delayed_work_fn_cb, sc);
	+
	+ pci_enable_busmaster(dev);
	+
	+ mtx_init(&sc->vmci_spinlock, "VMCI Spinlock", NULL, MTX_SPIN);
	+ mtx_init(&sc->vmci_delayed_work_lock, "VMCI Delayed Work Lock",
	+ NULL, MTX_DEF);
	+
	+ error = vmci_map_bars(sc);
	+ if (error) {
	+ VMCI_LOG_ERROR(LGPFX"Failed to map PCI BARs.\n");
	+ goto fail;
	+ }
	+
	+ error = vmci_config_capabilities(sc);
	+ if (error) {
	+ VMCI_LOG_ERROR(LGPFX"Failed to configure capabilities.\n");
	+ goto fail;
	+ }
	+
	+ vmci_list_init(&sc->vmci_delayed_work_infos);
	+
	+ vmci_components_init();
	+ vmci_util_init();
	+ error = vmci_qp_guest_endpoints_init();
	+ if (error) {
	+ VMCI_LOG_ERROR(LGPFX"vmci_qp_guest_endpoints_init failed.\n");
	+ goto fail;
	+ }
	+
	+ error = vmci_config_interrupts(sc);
	+ if (error)
	+ VMCI_LOG_ERROR(LGPFX"Failed to enable interrupts.\n");
	+
	+fail:
	+ if (error) {
	+ vmci_detach(dev);
	+ return (ENXIO);
	+ }
	+
	+ return (0);
	+}
	+
	+/*
	+ *------------------------------------------------------------------------------
	+ *
	+ * vmci_detach --
	+ *
	+ * Detach the VMCI device.
	+ *
	+ * Results:
	+ * 0
	+ *
	+ * Side effects:
	+ * None.
	+ *
	+ *------------------------------------------------------------------------------
	+ */
	+
	+static int
	+vmci_detach(device_t dev)
	+{
	+ struct vmci_softc *sc;
	+
	+ sc = device_get_softc(dev);
	+
	+ vmci_qp_guest_endpoints_exit();
	+ vmci_util_exit();
	+
	+ vmci_dismantle_interrupts(sc);
	+
	+ vmci_components_cleanup();
	+
	+ taskqueue_drain(taskqueue_thread, &sc->vmci_delayed_work_task);
	+ mtx_destroy(&sc->vmci_delayed_work_lock);
	+
	+ if (sc->vmci_res0 != NULL)
	+ bus_space_write_4(sc->vmci_iot0, sc->vmci_ioh0,
	+ VMCI_CONTROL_ADDR, VMCI_CONTROL_RESET);
	+
	+ if (sc->vmci_notifications_bitmap.dma_vaddr != NULL)
	+ vmci_dma_free(&sc->vmci_notifications_bitmap);
	+
	+ vmci_unmap_bars(sc);
	+
	+ mtx_destroy(&sc->vmci_spinlock);
	+
	+ pci_disable_busmaster(dev);
	+
	+ return (0);
	+}
	+
	+/*
	+ *------------------------------------------------------------------------------
	+ *
	+ * vmci_shutdown --
	+ *
	+ * This function is called during system shutdown. We don't do anything.
	+ *
	+ * Results:
	+ * 0
	+ *
	+ * Side effects:
	+ * None.
	+ *
	+ *------------------------------------------------------------------------------
	+ */
	+
	+static int
	+vmci_shutdown(device_t dev)
	+{
	+
	+ return (0);
	+}
	+
	+/*
	+ *------------------------------------------------------------------------------
	+ *
	+ * vmci_map_bars --
	+ *
	+ * Maps the PCI I/O and MMIO BARs.
	+ *
	+ * Results:
	+ * 0 on success, ENXIO otherwise.
	+ *
	+ * Side effects:
	+ * None.
	+ *
	+ *------------------------------------------------------------------------------
	+ */
	+
	+static int
	+vmci_map_bars(struct vmci_softc *sc)
	+{
	+ int rid;
	+
	+ /* Map the PCI I/O BAR: BAR0 */
	+ rid = PCIR_BAR(0);
	+ sc->vmci_res0 = bus_alloc_resource_any(sc->vmci_dev, SYS_RES_IOPORT,
	+ &rid, RF_ACTIVE);
	+ if (sc->vmci_res0 == NULL) {
	+ VMCI_LOG_ERROR(LGPFX"Could not map: BAR0\n");
	+ return (ENXIO);
	+ }
	+
	+ sc->vmci_iot0 = rman_get_bustag(sc->vmci_res0);
	+ sc->vmci_ioh0 = rman_get_bushandle(sc->vmci_res0);
	+ sc->vmci_ioaddr = rman_get_start(sc->vmci_res0);
	+
	+ /* Map the PCI MMIO BAR: BAR1 */
	+ rid = PCIR_BAR(1);
	+ sc->vmci_res1 = bus_alloc_resource_any(sc->vmci_dev, SYS_RES_MEMORY,
	+ &rid, RF_ACTIVE);
	+ if (sc->vmci_res1 == NULL) {
	+ VMCI_LOG_ERROR(LGPFX"Could not map: BAR1\n");
	+ return (ENXIO);
	+ }
	+
	+ sc->vmci_iot1 = rman_get_bustag(sc->vmci_res1);
	+ sc->vmci_ioh1 = rman_get_bushandle(sc->vmci_res1);
	+
	+ return (0);
	+}
	+
	+/*
	+ *------------------------------------------------------------------------------
	+ *
	+ * vmci_unmap_bars --
	+ *
	+ * Unmaps the VMCI PCI I/O and MMIO BARs.
	+ *
	+ * Results:
	+ * None.
	+ *
	+ * Side effects:
	+ * None.
	+ *
	+ *------------------------------------------------------------------------------
	+ */
	+
	+static void
	+vmci_unmap_bars(struct vmci_softc *sc)
	+{
	+ int rid;
	+
	+ if (sc->vmci_res0 != NULL) {
	+ rid = PCIR_BAR(0);
	+ bus_release_resource(sc->vmci_dev, SYS_RES_IOPORT, rid,
	+ sc->vmci_res0);
	+ sc->vmci_res0 = NULL;
	+ }
	+
	+ if (sc->vmci_res1 != NULL) {
	+ rid = PCIR_BAR(1);
	+ bus_release_resource(sc->vmci_dev, SYS_RES_MEMORY, rid,
	+ sc->vmci_res1);
	+ sc->vmci_res1 = NULL;
	+ }
	+}
	+
	+/*
	+ *------------------------------------------------------------------------------
	+ *
	+ * vmci_config_capabilities --
	+ *
	+ * Check the VMCI device capabilities and configure the device accordingly.
	+ *
	+ * Results:
	+ * 0 if success, ENODEV otherwise.
	+ *
	+ * Side effects:
	+ * Device capabilities are enabled.
	+ *
	+ *------------------------------------------------------------------------------
	+ */
	+
	+static int
	+vmci_config_capabilities(struct vmci_softc *sc)
	+{
	+ unsigned long bitmap_PPN;
	+ int error;
	+
	+ /*
	+ * Verify that the VMCI device supports the capabilities that we
	+ * need. Datagrams are necessary and notifications will be used
	+ * if the device supports it.
	+ */
	+ sc->capabilities = bus_space_read_4(sc->vmci_iot0, sc->vmci_ioh0,
	+ VMCI_CAPS_ADDR);
	+
	+ if ((sc->capabilities & VMCI_CAPS_DATAGRAM) == 0) {
	+ VMCI_LOG_ERROR(LGPFX"VMCI device does not support "
	+ "datagrams.\n");
	+ return (ENODEV);
	+ }
	+
	+ if (sc->capabilities & VMCI_CAPS_NOTIFICATIONS) {
	+ sc->capabilities = VMCI_CAPS_DATAGRAM;
	+ error = vmci_dma_malloc(PAGE_SIZE, 1,
	+ &sc->vmci_notifications_bitmap);
	+ if (error)
	+ VMCI_LOG_ERROR(LGPFX"Failed to alloc memory for "
	+ "notification bitmap.\n");
	+ else {
	+ memset(sc->vmci_notifications_bitmap.dma_vaddr, 0,
	+ PAGE_SIZE);
	+ sc->capabilities \|= VMCI_CAPS_NOTIFICATIONS;
	+ }
	+ } else
	+ sc->capabilities = VMCI_CAPS_DATAGRAM;
	+
	+ /* Let the host know which capabilities we intend to use. */
	+ bus_space_write_4(sc->vmci_iot0, sc->vmci_ioh0,
	+ VMCI_CAPS_ADDR, sc->capabilities);
	+
	+ /*
	+ * Register notification bitmap with device if that capability is
	+ * used.
	+ */
	+ if (sc->capabilities & VMCI_CAPS_NOTIFICATIONS) {
	+ bitmap_PPN =
	+ sc->vmci_notifications_bitmap.dma_paddr >> PAGE_SHIFT;
	+ vmci_register_notification_bitmap(bitmap_PPN);
	+ }
	+
	+ /* Check host capabilities. */
	+ if (!vmci_check_host_capabilities())
	+ return (ENODEV);
	+
	+ return (0);
	+}
	+
	+/*
	+ *------------------------------------------------------------------------------
	+ *
	+ * vmci_dmamap_cb --
	+ *
	+ * Callback to receive mapping information resulting from the load of a
	+ * bus_dmamap_t via bus_dmamap_load()
	+ *
	+ * Results:
	+ * None.
	+ *
	+ * Side effects:
	+ * None.
	+ *
	+ *------------------------------------------------------------------------------
	+ */
	+
	+static void
	+vmci_dmamap_cb(void arg, bus_dma_segment_t segs, int nseg, int error)
	+{
	+ bus_addr_t *baddr = arg;
	+
	+ if (error == 0)
	+ *baddr = segs->ds_addr;
	+}
	+
	+/*
	+ *------------------------------------------------------------------------------
	+ *
	+ * vmci_dma_malloc_int --
	+ *
	+ * Internal function that allocates DMA memory.
	+ *
	+ * Results:
	+ * 0 if success.
	+ * ENOMEM if insufficient memory.
	+ * EINPROGRESS if mapping is deferred.
	+ * EINVAL if the request was invalid.
	+ *
	+ * Side effects:
	+ * DMA memory is allocated.
	+ *
	+ *------------------------------------------------------------------------------
	+ */
	+
	+static int
	+vmci_dma_malloc_int(struct vmci_softc *sc, bus_size_t size, bus_size_t align,
	+ struct vmci_dma_alloc *dma)
	+{
	+ int error;
	+
	+ bzero(dma, sizeof(struct vmci_dma_alloc));
	+
	+ error = bus_dma_tag_create(bus_get_dma_tag(vmci_sc->vmci_dev),
	+ align, 0, /* alignment, bounds */
	+ BUS_SPACE_MAXADDR, /* lowaddr */
	+ BUS_SPACE_MAXADDR, /* highaddr */
	+ NULL, NULL, /* filter, filterarg */
	+ size, /* maxsize */
	+ 1, /* nsegments */
	+ size, /* maxsegsize */
	+ BUS_DMA_ALLOCNOW, /* flags */
	+ NULL, /* lockfunc */
	+ NULL, /* lockfuncarg */
	+ &dma->dma_tag);
	+ if (error) {
	+ VMCI_LOG_ERROR(LGPFX"bus_dma_tag_create failed: %d\n", error);
	+ goto fail;
	+ }
	+
	+ error = bus_dmamem_alloc(dma->dma_tag, (void **)&dma->dma_vaddr,
	+ BUS_DMA_ZERO \| BUS_DMA_NOWAIT, &dma->dma_map);
	+ if (error) {
	+ VMCI_LOG_ERROR(LGPFX"bus_dmamem_alloc failed: %d\n", error);
	+ goto fail;
	+ }
	+
	+ error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
	+ size, vmci_dmamap_cb, &dma->dma_paddr, BUS_DMA_NOWAIT);
	+ if (error) {
	+ VMCI_LOG_ERROR(LGPFX"bus_dmamap_load failed: %d\n", error);
	+ goto fail;
	+ }
	+
	+ dma->dma_size = size;
	+
	+fail:
	+ if (error)
	+ vmci_dma_free(dma);
	+
	+ return (error);
	+}
	+
	+/*
	+ *------------------------------------------------------------------------------
	+ *
	+ * vmci_dma_malloc --
	+ *
	+ * This function is a wrapper around vmci_dma_malloc_int for callers
	+ * outside of this module. Since we only support a single VMCI device, this
	+ * wrapper provides access to the device softc structure.
	+ *
	+ * Results:
	+ * 0 if success.
	+ * ENOMEM if insufficient memory.
	+ * EINPROGRESS if mapping is deferred.
	+ * EINVAL if the request was invalid.
	+ *
	+ * Side effects:
	+ * DMA memory is allocated.
	+ *
	+ *------------------------------------------------------------------------------
	+ */
	+
	+int
	+vmci_dma_malloc(bus_size_t size, bus_size_t align, struct vmci_dma_alloc *dma)
	+{
	+
	+ return (vmci_dma_malloc_int(vmci_sc, size, align, dma));
	+}
	+
	+/*
	+ *------------------------------------------------------------------------------
	+ *
	+ * vmci_dma_free_int --
	+ *
	+ * Internal function that frees DMA memory.
	+ *
	+ * Results:
	+ * None.
	+ *
	+ * Side effects:
	+ * Frees DMA memory.
	+ *
	+ *------------------------------------------------------------------------------
	+ */
	+
	+static void
	+vmci_dma_free_int(struct vmci_softc sc, struct vmci_dma_alloc dma)
	+{
	+
	+ if (dma->dma_tag != NULL) {
	+ if (dma->dma_paddr != 0) {
	+ bus_dmamap_sync(dma->dma_tag, dma->dma_map,
	+ BUS_DMASYNC_POSTREAD \| BUS_DMASYNC_POSTWRITE);
	+ bus_dmamap_unload(dma->dma_tag, dma->dma_map);
	+ }
	+
	+ if (dma->dma_vaddr != NULL)
	+ bus_dmamem_free(dma->dma_tag, dma->dma_vaddr,
	+ dma->dma_map);
	+
	+ bus_dma_tag_destroy(dma->dma_tag);
	+ }
	+ bzero(dma, sizeof(struct vmci_dma_alloc));
	+}
	+
	+/*
	+ *------------------------------------------------------------------------------
	+ *
	+ * vmci_dma_free --
	+ *
	+ * This function is a wrapper around vmci_dma_free_int for callers outside
	+ * of this module. Since we only support a single VMCI device, this wrapper
	+ * provides access to the device softc structure.
	+ *
	+ * Results:
	+ * None.
	+ *
	+ * Side effects:
	+ * Frees DMA memory.
	+ *
	+ *------------------------------------------------------------------------------
	+ */
	+
	+void
	+vmci_dma_free(struct vmci_dma_alloc *dma)
	+{
	+
	+ vmci_dma_free_int(vmci_sc, dma);
	+}
	+
	+/*
	+ *------------------------------------------------------------------------------
	+ *
	+ * vmci_config_interrupts --
	+ *
	+ * Configures and enables interrupts. Try to configure MSI-X. If this fails,
	+ * try to configure MSI. If even this fails, try legacy interrupts.
	+ *
	+ * Results:
	+ * 0 if success.
	+ * ENOMEM if insufficient memory.
	+ * ENODEV if the device doesn't support interrupts.
	+ * ENXIO if the device configuration failed.
	+ *
	+ * Side effects:
	+ * Interrupts get enabled if successful.
	+ *
	+ *------------------------------------------------------------------------------
	+ */
	+
	+static int
	+vmci_config_interrupts(struct vmci_softc *sc)
	+{
	+ int error;
	+
	+ data_buffer = malloc(data_buffer_size, M_DEVBUF, M_ZERO \| M_NOWAIT);
	+ if (data_buffer == NULL)
	+ return (ENOMEM);
	+
	+ sc->vmci_intr_type = VMCI_INTR_TYPE_MSIX;
	+ error = vmci_config_interrupt(sc);
	+ if (error) {
	+ sc->vmci_intr_type = VMCI_INTR_TYPE_MSI;
	+ error = vmci_config_interrupt(sc);
	+ }
	+ if (error) {
	+ sc->vmci_intr_type = VMCI_INTR_TYPE_INTX;
	+ error = vmci_config_interrupt(sc);
	+ }
	+ if (error)
	+ return (error);
	+
	+ /* Enable specific interrupt bits. */
	+ if (sc->capabilities & VMCI_CAPS_NOTIFICATIONS)
	+ bus_space_write_4(sc->vmci_iot0, sc->vmci_ioh0,
	+ VMCI_IMR_ADDR, VMCI_IMR_DATAGRAM \| VMCI_IMR_NOTIFICATION);
	+ else
	+ bus_space_write_4(sc->vmci_iot0, sc->vmci_ioh0,
	+ VMCI_IMR_ADDR, VMCI_IMR_DATAGRAM);
	+
	+ /* Enable interrupts. */
	+ bus_space_write_4(sc->vmci_iot0, sc->vmci_ioh0,
	+ VMCI_CONTROL_ADDR, VMCI_CONTROL_INT_ENABLE);
	+
	+ return (0);
	+}
	+
	+/*
	+ *------------------------------------------------------------------------------
	+ *
	+ * vmci_config_interrupt --
	+ *
	+ * Check the number of interrupts supported, allocate resources and setup
	+ * interrupts.
	+ *
	+ * Results:
	+ * 0 if success.
	+ * ENOMEM if insufficient memory.
	+ * ENODEV if the device doesn't support interrupts.
	+ * ENXIO if the device configuration failed.
	+ *
	+ * Side effects:
	+ * Resources get allocated and interrupts get setup (but not enabled) if
	+ * successful.
	+ *
	+ *------------------------------------------------------------------------------
	+ */
	+
	+static int
	+vmci_config_interrupt(struct vmci_softc *sc)
	+{
	+ int error;
	+
	+ error = vmci_check_intr_cnt(sc);
	+ if (error)
	+ return (error);
	+
	+ error = vmci_allocate_interrupt_resources(sc);
	+ if (error)
	+ return (error);
	+
	+ error = vmci_setup_interrupts(sc);
	+ if (error)
	+ return (error);
	+
	+ return (0);
	+}
	+
	+/*
	+ *------------------------------------------------------------------------------
	+ *
	+ * vmci_check_intr_cnt --
	+ *
	+ * Check the number of interrupts supported by the device and ask PCI bus
	+ * to allocate appropriate number of interrupts.
	+ *
	+ * Results:
	+ * 0 if success.
	+ * ENODEV if the device doesn't support any interrupts.
	+ * ENXIO if the device configuration failed.
	+ *
	+ * Side effects:
	+ * Resources get allocated on success.
	+ *
	+ *------------------------------------------------------------------------------
	+ */
	+
	+static int
	+vmci_check_intr_cnt(struct vmci_softc *sc)
	+{
	+
	+ if (sc->vmci_intr_type == VMCI_INTR_TYPE_INTX) {
	+ sc->vmci_num_intr = 1;
	+ return (0);
	+ }
	+
	+ /*
	+ * Make sure that the device supports the required number of MSI/MSI-X
	+ * messages. We try for 2 MSI-X messages but 1 is good too. We need at
	+ * least 1 MSI message.
	+ */
	+ sc->vmci_num_intr = (sc->vmci_intr_type == VMCI_INTR_TYPE_MSIX) ?
	+ pci_msix_count(sc->vmci_dev) : pci_msi_count(sc->vmci_dev);
	+
	+ if (!sc->vmci_num_intr) {
	+ VMCI_LOG_ERROR(LGPFX"Device does not support any interrupt"
	+ " messages");
	+ return (ENODEV);
	+ }
	+
	+ sc->vmci_num_intr = (sc->vmci_intr_type == VMCI_INTR_TYPE_MSIX) ?
	+ VMCI_MAX_INTRS : 1;
	+ if (sc->vmci_intr_type == VMCI_INTR_TYPE_MSIX) {
	+ if (pci_alloc_msix(sc->vmci_dev, &sc->vmci_num_intr))
	+ return (ENXIO);
	+ } else if (sc->vmci_intr_type == VMCI_INTR_TYPE_MSI) {
	+ if (pci_alloc_msi(sc->vmci_dev, &sc->vmci_num_intr))
	+ return (ENXIO);
	+ }
	+
	+ return (0);
	+}
	+
	+/*
	+ *------------------------------------------------------------------------------
	+ *
	+ * vmci_allocate_interrupt_resources --
	+ *
	+ * Allocate resources necessary for interrupts.
	+ *
	+ * Results:
	+ * 0 if success, ENXIO otherwise.
	+ *
	+ * Side effects:
	+ * Resources get allocated on success.
	+ *
	+ *------------------------------------------------------------------------------
	+ */
	+
	+static int
	+vmci_allocate_interrupt_resources(struct vmci_softc *sc)
	+{
	+ struct resource *irq;
	+ int flags, i, rid;
	+
	+ flags = RF_ACTIVE;
	+ flags \|= (sc->vmci_num_intr == 1) ? RF_SHAREABLE : 0;
	+ rid = (sc->vmci_intr_type == VMCI_INTR_TYPE_INTX) ? 0 : 1;
	+
	+ for (i = 0; i < sc->vmci_num_intr; i++, rid++) {
	+ irq = bus_alloc_resource_any(sc->vmci_dev, SYS_RES_IRQ, &rid,
	+ flags);
	+ if (irq == NULL)
	+ return (ENXIO);
	+ sc->vmci_intrs[i].vmci_irq = irq;
	+ sc->vmci_intrs[i].vmci_rid = rid;
	+ }
	+
	+ return (0);
	+}
	+
	+/*
	+ *------------------------------------------------------------------------------
	+ *
	+ * vmci_setup_interrupts --
	+ *
	+ * Sets up the interrupts.
	+ *
	+ * Results:
	+ * 0 if success, appropriate error code from bus_setup_intr otherwise.
	+ *
	+ * Side effects:
	+ * Interrupt handler gets attached.
	+ *
	+ *------------------------------------------------------------------------------
	+ */
	+
	+static int
	+vmci_setup_interrupts(struct vmci_softc *sc)
	+{
	+ struct vmci_interrupt *intr;
	+ int error, flags;
	+
	+ flags = INTR_TYPE_NET \| INTR_MPSAFE;
	+ if (sc->vmci_num_intr > 1)
	+ flags \|= INTR_EXCL;
	+
	+ intr = &sc->vmci_intrs[0];
	+ error = bus_setup_intr(sc->vmci_dev, intr->vmci_irq, flags, NULL,
	+ vmci_interrupt, NULL, &intr->vmci_handler);
	+ if (error)
	+ return (error);
	+ bus_describe_intr(sc->vmci_dev, intr->vmci_irq, intr->vmci_handler,
	+ "vmci_interrupt");
	+
	+ if (sc->vmci_num_intr == 2) {
	+ intr = &sc->vmci_intrs[1];
	+ error = bus_setup_intr(sc->vmci_dev, intr->vmci_irq, flags,
	+ NULL, vmci_interrupt_bm, NULL, &intr->vmci_handler);
	+ if (error)
	+ return (error);
	+ bus_describe_intr(sc->vmci_dev, intr->vmci_irq,
	+ intr->vmci_handler, "vmci_interrupt_bm");
	+ }
	+
	+ return (0);
	+}
	+
	+/*
	+ *------------------------------------------------------------------------------
	+ *
	+ * vmci_interrupt --
	+ *
	+ * Interrupt handler for legacy or MSI interrupt, or for first MSI-X
	+ * interrupt (vector VMCI_INTR_DATAGRAM).
	+ *
	+ * Results:
	+ * None.
	+ *
	+ * Side effects:
	+ * None.
	+ *
	+ *------------------------------------------------------------------------------
	+ */
	+
	+static void
	+vmci_interrupt(void *arg)
	+{
	+
	+ if (vmci_sc->vmci_num_intr == 2)
	+ taskqueue_enqueue(taskqueue_swi,
	+ &vmci_sc->vmci_interrupt_dq_task);
	+ else {
	+ unsigned int icr;
	+
	+ icr = inl(vmci_sc->vmci_ioaddr + VMCI_ICR_ADDR);
	+ if (icr == 0 \|\| icr == 0xffffffff)
	+ return;
	+ if (icr & VMCI_ICR_DATAGRAM) {
	+ taskqueue_enqueue(taskqueue_swi,
	+ &vmci_sc->vmci_interrupt_dq_task);
	+ icr &= ~VMCI_ICR_DATAGRAM;
	+ }
	+ if (icr & VMCI_ICR_NOTIFICATION) {
	+ taskqueue_enqueue(taskqueue_swi,
	+ &vmci_sc->vmci_interrupt_bm_task);
	+ icr &= ~VMCI_ICR_NOTIFICATION;
	+ }
	+ if (icr != 0)
	+ VMCI_LOG_INFO(LGPFX"Ignoring unknown interrupt "
	+ "cause");
	+ }
	+}
	+
	+/*
	+ *------------------------------------------------------------------------------
	+ *
	+ * vmci_interrupt_bm --
	+ *
	+ * Interrupt handler for MSI-X interrupt vector VMCI_INTR_NOTIFICATION,
	+ * which is for the notification bitmap. Will only get called if we are
	+ * using MSI-X with exclusive vectors.
	+ *
	+ * Results:
	+ * None.
	+ *
	+ * Side effects:
	+ * None.
	+ *
	+ *------------------------------------------------------------------------------
	+ */
	+
	+static void
	+vmci_interrupt_bm(void *arg)
	+{
	+
	+ ASSERT(vmci_sc->vmci_num_intr == 2);
	+ taskqueue_enqueue(taskqueue_swi, &vmci_sc->vmci_interrupt_bm_task);
	+}
	+
	+/*
	+ *------------------------------------------------------------------------------
	+ *
	+ * dispatch_datagrams --
	+ *
	+ * Reads and dispatches incoming datagrams.
	+ *
	+ * Results:
	+ * None.
	+ *
	+ * Side effects:
	+ * Reads data from the device.
	+ *
	+ *------------------------------------------------------------------------------
	+ */
	+
	+static void
	+dispatch_datagrams(void *context, int data)
	+{
	+
	+ if (data_buffer == NULL)
	+ VMCI_LOG_INFO(LGPFX"dispatch_datagrams(): no buffer "
	+ "present");
	+
	+ vmci_read_datagrams_from_port((vmci_io_handle) 0,
	+ vmci_sc->vmci_ioaddr + VMCI_DATA_IN_ADDR,
	+ data_buffer, data_buffer_size);
	+}
	+
	+/*
	+ *------------------------------------------------------------------------------
	+ *
	+ * process_bitmap --
	+ *
	+ * Scans the notification bitmap for raised flags, clears them and handles
	+ * the notifications.
	+ *
	+ * Results:
	+ * None.
	+ *
	+ * Side effects:
	+ * None.
	+ *
	+ *------------------------------------------------------------------------------
	+ */
	+
	+static void
	+process_bitmap(void *context, int data)
	+{
	+
	+ if (vmci_sc->vmci_notifications_bitmap.dma_vaddr == NULL)
	+ VMCI_LOG_INFO(LGPFX"process_bitmaps(): no bitmap present");
	+
	+ vmci_scan_notification_bitmap(
	+ vmci_sc->vmci_notifications_bitmap.dma_vaddr);
	+}
	+
	+/*
	+ *------------------------------------------------------------------------------
	+ *
	+ * vmci_dismantle_interrupts --
	+ *
	+ * Releases resources, detaches the interrupt handler and drains the task
	+ * queue.
	+ *
	+ * Results:
	+ * None.
	+ *
	+ * Side effects:
	+ * No more interrupts.
	+ *
	+ *------------------------------------------------------------------------------
	+ */
	+
	+static void
	+vmci_dismantle_interrupts(struct vmci_softc *sc)
	+{
	+ struct vmci_interrupt *intr;
	+ int i;
	+
	+ for (i = 0; i < sc->vmci_num_intr; i++) {
	+ intr = &sc->vmci_intrs[i];
	+ if (intr->vmci_handler != NULL) {
	+ bus_teardown_intr(sc->vmci_dev, intr->vmci_irq,
	+ intr->vmci_handler);
	+ intr->vmci_handler = NULL;
	+ }
	+ if (intr->vmci_irq != NULL) {
	+ bus_release_resource(sc->vmci_dev, SYS_RES_IRQ,
	+ intr->vmci_rid, intr->vmci_irq);
	+ intr->vmci_irq = NULL;
	+ intr->vmci_rid = -1;
	+ }
	+ }
	+
	+ if ((sc->vmci_intr_type != VMCI_INTR_TYPE_INTX) &&
	+ (sc->vmci_num_intr))
	+ pci_release_msi(sc->vmci_dev);
	+
	+ taskqueue_drain(taskqueue_swi, &sc->vmci_interrupt_dq_task);
	+ taskqueue_drain(taskqueue_swi, &sc->vmci_interrupt_bm_task);
	+
	+ if (data_buffer != NULL)
	+ free(data_buffer, M_DEVBUF);
	+}
	+
	+/*
	+ *------------------------------------------------------------------------------
	+ *
	+ * vmci_delayed_work_fn_cb --
	+ *
	+ * Callback function that executes the queued up delayed work functions.
	+ *
	+ * Results:
	+ * None.
	+ *
	+ * Side effects:
	+ * None.
	+ *
	+ *------------------------------------------------------------------------------
	+ */
	+
	+static void
	+vmci_delayed_work_fn_cb(void *context, int data)
	+{
	+ vmci_list(vmci_delayed_work_info) temp_list;
	+
	+ vmci_list_init(&temp_list);
	+
	+ /*
	+ * Swap vmci_delayed_work_infos list with the empty temp_list while
	+ * holding a lock. vmci_delayed_work_infos would then be an empty list
	+ * and temp_list would contain the elements from the original
	+ * vmci_delayed_work_infos. Finally, iterate through temp_list
	+ * executing the delayed callbacks.
	+ */
	+
	+ mtx_lock(&vmci_sc->vmci_delayed_work_lock);
	+ vmci_list_swap(&temp_list, &vmci_sc->vmci_delayed_work_infos,
	+ vmci_delayed_work_info, entry);
	+ mtx_unlock(&vmci_sc->vmci_delayed_work_lock);
	+
	+ while (!vmci_list_empty(&temp_list)) {
	+ struct vmci_delayed_work_info *delayed_work_info =
	+ vmci_list_first(&temp_list);
	+
	+ delayed_work_info->work_fn(delayed_work_info->data);
	+
	+ vmci_list_remove(delayed_work_info, entry);
	+ vmci_free_kernel_mem(delayed_work_info,
	+ sizeof(*delayed_work_info));
	+ }
	+}
	+
	+/*
	+ *------------------------------------------------------------------------------
	+ *
	+ * vmci_schedule_delayed_work_fn --
	+ *
	+ * Schedule the specified callback.
	+ *
	+ * Results:
	+ * 0 if success, error code otherwise.
	+ *
	+ * Side effects:
	+ * None.
	+ *
	+ *------------------------------------------------------------------------------
	+ */
	+
	+int
	+vmci_schedule_delayed_work_fn(vmci_work_fn work_fn, void data)
	+{
	+ struct vmci_delayed_work_info *delayed_work_info;
	+
	+ delayed_work_info = vmci_alloc_kernel_mem(sizeof(*delayed_work_info),
	+ VMCI_MEMORY_ATOMIC);
	+
	+ if (!delayed_work_info)
	+ return (VMCI_ERROR_NO_MEM);
	+
	+ delayed_work_info->work_fn = work_fn;
	+ delayed_work_info->data = data;
	+ mtx_lock(&vmci_sc->vmci_delayed_work_lock);
	+ vmci_list_insert(&vmci_sc->vmci_delayed_work_infos,
	+ delayed_work_info, entry);
	+ mtx_unlock(&vmci_sc->vmci_delayed_work_lock);
	+
	+ taskqueue_enqueue(taskqueue_thread,
	+ &vmci_sc->vmci_delayed_work_task);
	+
	+ return (VMCI_SUCCESS);
	+}
	+
	+/*
	+ *------------------------------------------------------------------------------
	+ *
	+ * vmci_send_datagram --
	+ *
	+ * VM to hypervisor call mechanism.
	+ *
	+ * Results:
	+ * The result of the hypercall.
	+ *
	+ * Side effects:
	+ * None.
	+ *
	+ *------------------------------------------------------------------------------
	+ */
	+
	+int
	+vmci_send_datagram(struct vmci_datagram *dg)
	+{
	+ int result;
	+
	+ if (dg == NULL)
	+ return (VMCI_ERROR_INVALID_ARGS);
	+
	+ /*
	+ * Need to acquire spinlock on the device because
	+ * the datagram data may be spread over multiple pages and the monitor
	+ * may interleave device user rpc calls from multiple VCPUs. Acquiring
	+ * the spinlock precludes that possibility. Disabling interrupts to
	+ * avoid incoming datagrams during a "rep out" and possibly landing up
	+ * in this function.
	+ */
	+ mtx_lock_spin(&vmci_sc->vmci_spinlock);
	+
	+ /*
	+ * Send the datagram and retrieve the return value from the result
	+ * register.
	+ */
	+ __asm__ __volatile__(
	+ "cld\n\t"
	+ "rep outsb\n\t"
	+ : /* No output. */
	+ : "d"(vmci_sc->vmci_ioaddr + VMCI_DATA_OUT_ADDR),
	+ "c"(VMCI_DG_SIZE(dg)), "S"(dg)
	+ );
	+
	+ /*
	+ * XXX: Should read result high port as well when updating handlers to
	+ * return 64bit.
	+ */
	+
	+ result = bus_space_read_4(vmci_sc->vmci_iot0,
	+ vmci_sc->vmci_ioh0, VMCI_RESULT_LOW_ADDR);
	+ mtx_unlock_spin(&vmci_sc->vmci_spinlock);
	+
	+ return (result);
	+}
	Index: sys/dev/vmware/vmci/vmci_call_defs.h
	===================================================================
	--- /dev/null
	+++ sys/dev/vmware/vmci/vmci_call_defs.h
	@@ -0,0 +1,242 @@
	+/*-
	+ * Copyright (c) 2018 VMware, Inc. All Rights Reserved.
	+ *
	+ * SPDX-License-Identifier: (BSD-2-Clause AND GPL-2.0)
	+ */
	+
	+#ifndef _VMCI_CALL_DEFS_H_
	+#define _VMCI_CALL_DEFS_H_
	+
	+#include "vmci_defs.h"
	+
	+/*
	+ * All structs here are an integral size of their largest member, ie. a struct
	+ * with at least one 8-byte member will have a size that is an integral of 8.
	+ * A struct which has a largest member of size 4 will have a size that is an
	+ * integral of 4.
	+ */
	+
	+/*
	+ * Base struct for vmci datagrams.
	+ */
	+struct vmci_datagram {
	+ struct vmci_handle dst;
	+ struct vmci_handle src;
	+ uint64_t payload_size;
	+};
	+
	+/*
	+ * Second flag is for creating a well-known handle instead of a per context
	+ * handle. Next flag is for deferring datagram delivery, so that the
	+ * datagram callback is invoked in a delayed context (not interrupt context).
	+ */
	+#define VMCI_FLAG_DG_NONE 0
	+#define VMCI_FLAG_WELLKNOWN_DG_HND 0x1
	+#define VMCI_FLAG_ANYCID_DG_HND 0x2
	+#define VMCI_FLAG_DG_DELAYED_CB 0x4
	+
	+/* Event callback should fire in a delayed context (not interrupt context.) */
	+#define VMCI_FLAG_EVENT_NONE 0
	+#define VMCI_FLAG_EVENT_DELAYED_CB 0x1
	+
	+/*
	+ * Maximum supported size of a VMCI datagram for routable datagrams.
	+ * Datagrams going to the hypervisor are allowed to be larger.
	+ */
	+#define VMCI_MAX_DG_SIZE \
	+ (17 * 4096)
	+#define VMCI_MAX_DG_PAYLOAD_SIZE \
	+ (VMCI_MAX_DG_SIZE - sizeof(struct vmci_datagram))
	+#define VMCI_DG_PAYLOAD(_dg) \
	+ (void )((char )(_dg) + sizeof(struct vmci_datagram))
	+#define VMCI_DG_HEADERSIZE \
	+ sizeof(struct vmci_datagram)
	+#define VMCI_DG_SIZE(_dg) \
	+ (VMCI_DG_HEADERSIZE + (size_t)(_dg)->payload_size)
	+#define VMCI_DG_SIZE_ALIGNED(_dg) \
	+ ((VMCI_DG_SIZE(_dg) + 7) & (size_t)~7)
	+
	+/*
	+ * Struct used for querying, via VMCI_RESOURCES_QUERY, the availability of
	+ * hypervisor resources.
	+ * Struct size is 16 bytes. All fields in struct are aligned to their natural
	+ * alignment.
	+ */
	+struct vmci_resources_query_hdr {
	+ struct vmci_datagram hdr;
	+ uint32_t num_resources;
	+ uint32_t _padding;
	+};
	+
	+/*
	+ * Convenience struct for negotiating vectors. Must match layout of
	+ * vmci_resource_query_hdr minus the struct vmci_datagram header.
	+ */
	+struct vmci_resources_query_msg {
	+ uint32_t num_resources;
	+ uint32_t _padding;
	+ vmci_resource resources[1];
	+};
	+
	+/*
	+ * Struct used for setting the notification bitmap. All fields in struct are
	+ * aligned to their natural alignment.
	+ */
	+struct vmci_notify_bitmap_set_msg {
	+ struct vmci_datagram hdr;
	+ PPN bitmap_ppn;
	+ uint32_t _pad;
	+};
	+
	+/*
	+ * Struct used for linking a doorbell handle with an index in the notify
	+ * bitmap. All fields in struct are aligned to their natural alignment.
	+ */
	+struct vmci_doorbell_link_msg {
	+ struct vmci_datagram hdr;
	+ struct vmci_handle handle;
	+ uint64_t notify_idx;
	+};
	+
	+/*
	+ * Struct used for unlinking a doorbell handle from an index in the notify
	+ * bitmap. All fields in struct are aligned to their natural alignment.
	+ */
	+struct vmci_doorbell_unlink_msg {
	+ struct vmci_datagram hdr;
	+ struct vmci_handle handle;
	+};
	+
	+/*
	+ * Struct used for generating a notification on a doorbell handle. All fields
	+ * in struct are aligned to their natural alignment.
	+ */
	+struct vmci_doorbell_notify_msg {
	+ struct vmci_datagram hdr;
	+ struct vmci_handle handle;
	+};
	+
	+/*
	+ * This struct is used to contain data for events. Size of this struct is a
	+ * multiple of 8 bytes, and all fields are aligned to their natural alignment.
	+ */
	+struct vmci_event_data {
	+ vmci_event_type event; /* 4 bytes. */
	+ uint32_t _pad;
	+ /*
	+ * Event payload is put here.
	+ */
	+};
	+
	+/* Callback needed for correctly waiting on events. */
	+
	+typedef int
	+(vmci_datagram_recv_cb)(void client_data, struct vmci_datagram *msg);
	+
	+/*
	+ * We use the following inline function to access the payload data associated
	+ * with an event data.
	+ */
	+
	+static inline void *
	+vmci_event_data_payload(struct vmci_event_data *ev_data)
	+{
	+
	+ return ((void )((char )ev_data + sizeof(*ev_data)));
	+}
	+
	+/*
	+ * Define the different VMCI_EVENT payload data types here. All structs must
	+ * be a multiple of 8 bytes, and fields must be aligned to their natural
	+ * alignment.
	+ */
	+struct vmci_event_payload_context {
	+ vmci_id context_id; /* 4 bytes. */
	+ uint32_t _pad;
	+};
	+
	+struct vmci_event_payload_qp {
	+ /* QueuePair handle. */
	+ struct vmci_handle handle;
	+ /* Context id of attaching/detaching VM. */
	+ vmci_id peer_id;
	+ uint32_t _pad;
	+};
	+
	+/*
	+ * We define the following struct to get the size of the maximum event data
	+ * the hypervisor may send to the guest. If adding a new event payload type
	+ * above, add it to the following struct too (inside the union).
	+ */
	+struct vmci_event_data_max {
	+ struct vmci_event_data event_data;
	+ union {
	+ struct vmci_event_payload_context context_payload;
	+ struct vmci_event_payload_qp qp_payload;
	+ } ev_data_payload;
	+};
	+
	+/*
	+ * Struct used for VMCI_EVENT_SUBSCRIBE/UNSUBSCRIBE and VMCI_EVENT_HANDLER
	+ * messages. Struct size is 32 bytes. All fields in struct are aligned to
	+ * their natural alignment.
	+ */
	+struct vmci_event_msg {
	+ struct vmci_datagram hdr;
	+ struct vmci_event_data event_data; /* Has event type & payload. */
	+ /*
	+ * Payload gets put here.
	+ */
	+};
	+
	+/*
	+ * We use the following inline function to access the payload data associated
	+ * with an event message.
	+ */
	+
	+static inline void *
	+vmci_event_msg_payload(struct vmci_event_msg *e_msg)
	+{
	+
	+ return (vmci_event_data_payload(&e_msg->event_data));
	+}
	+
	+/* Flags for VMCI QueuePair API. */
	+#define VMCI_QPFLAG_ATTACH_ONLY \
	+ 0x1 /* Fail alloc if QP not created by peer. */
	+#define VMCI_QPFLAG_LOCAL \
	+ 0x2 /* Only allow attaches from local context. */
	+#define VMCI_QPFLAG_NONBLOCK \
	+ 0x4 /* Host won't block when guest is quiesced. */
	+
	+/* For asymmetric queuepairs, update as new flags are added. */
	+#define VMCI_QP_ASYMM \
	+ VMCI_QPFLAG_NONBLOCK
	+#define VMCI_QP_ASYMM_PEER \
	+ (VMCI_QPFLAG_ATTACH_ONLY \| VMCI_QP_ASYMM)
	+
	+/* Update the following (bitwise OR flags) while adding new flags. */
	+#define VMCI_QP_ALL_FLAGS \
	+ (VMCI_QPFLAG_ATTACH_ONLY \| VMCI_QPFLAG_LOCAL \| VMCI_QPFLAG_NONBLOCK)
	+
	+/*
	+ * Structs used for QueuePair alloc and detach messages. We align fields of
	+ * these structs to 64 bit boundaries.
	+ */
	+struct vmci_queue_pair_alloc_msg {
	+ struct vmci_datagram hdr;
	+ struct vmci_handle handle;
	+ vmci_id peer; /* 32bit field. */
	+ uint32_t flags;
	+ uint64_t produce_size;
	+ uint64_t consume_size;
	+ uint64_t num_ppns;
	+ /* List of PPNs placed here. */
	+};
	+
	+struct vmci_queue_pair_detach_msg {
	+ struct vmci_datagram hdr;
	+ struct vmci_handle handle;
	+};
	+
	+#endif /* !_VMCI_CALL_DEFS_H_ */
	Index: sys/dev/vmware/vmci/vmci_datagram.h
	===================================================================
	--- /dev/null
	+++ sys/dev/vmware/vmci/vmci_datagram.h
	@@ -0,0 +1,24 @@
	+/*-
	+ * Copyright (c) 2018 VMware, Inc. All Rights Reserved.
	+ *
	+ * SPDX-License-Identifier: (BSD-2-Clause AND GPL-2.0)
	+ */
	+
	+/* Internal functions in the VMCI Simple Datagram API */
	+
	+#ifndef _VMCI_DATAGRAM_H_
	+#define _VMCI_DATAGRAM_H_
	+
	+#include "vmci_call_defs.h"
	+
	+/* Datagram API for non-public use. */
	+int vmci_datagram_dispatch(vmci_id context_id, struct vmci_datagram *dg);
	+int vmci_datagram_invoke_guest_handler(struct vmci_datagram *dg);
	+int vmci_datagram_get_priv_flags(struct vmci_handle handle,
	+ vmci_privilege_flags *priv_flags);
	+
	+/* Misc. */
	+void vmci_datagram_sync(void);
	+bool vmci_datagram_check_host_capabilities(void);
	+
	+#endif /* !_VMCI_DATAGRAM_H_ */
	Index: sys/dev/vmware/vmci/vmci_datagram.c
	===================================================================
	--- /dev/null
	+++ sys/dev/vmware/vmci/vmci_datagram.c
	@@ -0,0 +1,647 @@
	+/*-
	+ * Copyright (c) 2018 VMware, Inc. All Rights Reserved.
	+ *
	+ * SPDX-License-Identifier: (BSD-2-Clause AND GPL-2.0)
	+ */
	+
	+/* This file implements the VMCI Simple Datagram API on the host. */
	+
	+#include <sys/types.h>
	+#include <sys/systm.h>
	+
	+#include "vmci_datagram.h"
	+#include "vmci_driver.h"
	+#include "vmci_kernel_api.h"
	+#include "vmci_kernel_defs.h"
	+#include "vmci_resource.h"
	+
	+#define LGPFX "vmci_datagram: "
	+
	+/*
	+ * datagram_entry describes the datagram entity. It is used for datagram
	+ * entities created only on the host.
	+ */
	+struct datagram_entry {
	+ struct vmci_resource resource;
	+ uint32_t flags;
	+ bool run_delayed;
	+ vmci_datagram_recv_cb recv_cb;
	+ void *client_data;
	+ vmci_event destroy_event;
	+ vmci_privilege_flags priv_flags;
	+};
	+
	+struct vmci_delayed_datagram_info {
	+ struct datagram_entry *entry;
	+ struct vmci_datagram msg;
	+};
	+
	+static int vmci_datagram_get_priv_flags_int(vmci_id contextID,
	+ struct vmci_handle handle,
	+ vmci_privilege_flags *priv_flags);
	+static void datagram_free_cb(void *resource);
	+static int datagram_release_cb(void *client_data);
	+
	+/------------------------------ Helper functions ----------------------------/
	+
	+/*
	+ *------------------------------------------------------------------------------
	+ *
	+ * datagram_free_cb --
	+ *
	+ * Callback to free datagram structure when resource is no longer used,
	+ * ie. the reference count reached 0.
	+ *
	+ * Result:
	+ * None.
	+ *
	+ * Side effects:
	+ * None.
	+ *
	+ *------------------------------------------------------------------------------
	+ */
	+
	+static void
	+datagram_free_cb(void *client_data)
	+{
	+ struct datagram_entry entry = (struct datagram_entry )client_data;
	+
	+ ASSERT(entry);
	+
	+ vmci_signal_event(&entry->destroy_event);
	+
	+ /*
	+ * The entry is freed in vmci_datagram_destroy_hnd, who is waiting for
	+ * the above signal.
	+ */
	+}
	+
	+/*
	+ *------------------------------------------------------------------------------
	+ *
	+ * datagram_release_cb --
	+ *
	+ * Callback to release the resource reference. It is called by the
	+ * vmci_wait_on_event function before it blocks.
	+ *
	+ * Result:
	+ * None.
	+ *
	+ * Side effects:
	+ * None.
	+ *
	+ *------------------------------------------------------------------------------
	+ */
	+
	+static int
	+datagram_release_cb(void *client_data)
	+{
	+ struct datagram_entry *entry;
	+
	+ entry = (struct datagram_entry *)client_data;
	+
	+ ASSERT(entry);
	+
	+ vmci_resource_release(&entry->resource);
	+
	+ return (0);
	+}
	+
	+/*
	+ *------------------------------------------------------------------------------
	+ *
	+ * datagram_create_hnd --
	+ *
	+ * Internal function to create a datagram entry given a handle.
	+ *
	+ * Results:
	+ * VMCI_SUCCESS if created, negative errno value otherwise.
	+ *
	+ * Side effects:
	+ * None.
	+ *
	+ *------------------------------------------------------------------------------
	+ */
	+
	+static int
	+datagram_create_hnd(vmci_id resource_id, uint32_t flags,
	+ vmci_privilege_flags priv_flags, vmci_datagram_recv_cb recv_cb,
	+ void client_data, struct vmci_handle out_handle)
	+{
	+ struct datagram_entry *entry;
	+ struct vmci_handle handle;
	+ vmci_id context_id;
	+ int result;
	+
	+ ASSERT(recv_cb != NULL);
	+ ASSERT(out_handle != NULL);
	+ ASSERT(!(priv_flags & ~VMCI_PRIVILEGE_ALL_FLAGS));
	+
	+ if ((flags & VMCI_FLAG_WELLKNOWN_DG_HND) != 0)
	+ return (VMCI_ERROR_INVALID_ARGS);
	+ else {
	+ if ((flags & VMCI_FLAG_ANYCID_DG_HND) != 0)
	+ context_id = VMCI_INVALID_ID;
	+ else {
	+ context_id = vmci_get_context_id();
	+ if (context_id == VMCI_INVALID_ID)
	+ return (VMCI_ERROR_NO_RESOURCES);
	+ }
	+
	+ if (resource_id == VMCI_INVALID_ID) {
	+ resource_id = vmci_resource_get_id(context_id);
	+ if (resource_id == VMCI_INVALID_ID)
	+ return (VMCI_ERROR_NO_HANDLE);
	+ }
	+
	+ handle = VMCI_MAKE_HANDLE(context_id, resource_id);
	+ }
	+
	+ entry = vmci_alloc_kernel_mem(sizeof(*entry), VMCI_MEMORY_NORMAL);
	+ if (entry == NULL) {
	+ VMCI_LOG_WARNING(LGPFX"Failed allocating memory for datagram "
	+ "entry.\n");
	+ return (VMCI_ERROR_NO_MEM);
	+ }
	+
	+ if (!vmci_can_schedule_delayed_work()) {
	+ if (flags & VMCI_FLAG_DG_DELAYED_CB) {
	+ vmci_free_kernel_mem(entry, sizeof(*entry));
	+ return (VMCI_ERROR_INVALID_ARGS);
	+ }
	+ entry->run_delayed = false;
	+ } else
	+ entry->run_delayed = (flags & VMCI_FLAG_DG_DELAYED_CB) ?
	+ true : false;
	+
	+ entry->flags = flags;
	+ entry->recv_cb = recv_cb;
	+ entry->client_data = client_data;
	+ vmci_create_event(&entry->destroy_event);
	+ entry->priv_flags = priv_flags;
	+
	+ /* Make datagram resource live. */
	+ result = vmci_resource_add(&entry->resource,
	+ VMCI_RESOURCE_TYPE_DATAGRAM, handle, datagram_free_cb, entry);
	+ if (result != VMCI_SUCCESS) {
	+ VMCI_LOG_WARNING(LGPFX"Failed to add new resource "
	+ "(handle=0x%x:0x%x).\n", handle.context, handle.resource);
	+ vmci_destroy_event(&entry->destroy_event);
	+ vmci_free_kernel_mem(entry, sizeof(*entry));
	+ return (result);
	+ }
	+ *out_handle = handle;
	+
	+ return (VMCI_SUCCESS);
	+}
	+
	+/------------------------------ Public API functions ------------------------/
	+
	+/*
	+ *------------------------------------------------------------------------------
	+ *
	+ * vmci_datagram_create_handle --
	+ *
	+ * Creates a host context datagram endpoint and returns a handle to it.
	+ *
	+ * Results:
	+ * VMCI_SUCCESS if created, negative errno value otherwise.
	+ *
	+ * Side effects:
	+ * None.
	+ *
	+ *------------------------------------------------------------------------------
	+ */
	+
	+int
	+vmci_datagram_create_handle(vmci_id resource_id, uint32_t flags,
	+ vmci_datagram_recv_cb recv_cb, void *client_data,
	+ struct vmci_handle *out_handle)
	+{
	+
	+ if (out_handle == NULL)
	+ return (VMCI_ERROR_INVALID_ARGS);
	+
	+ if (recv_cb == NULL) {
	+ VMCI_LOG_DEBUG(LGPFX"Client callback needed when creating "
	+ "datagram.\n");
	+ return (VMCI_ERROR_INVALID_ARGS);
	+ }
	+
	+ return (datagram_create_hnd(resource_id, flags,
	+ VMCI_DEFAULT_PROC_PRIVILEGE_FLAGS,
	+ recv_cb, client_data, out_handle));
	+}
	+
	+/*
	+ *------------------------------------------------------------------------------
	+ *
	+ * vmci_datagram_create_handle_priv --
	+ *
	+ * Creates a host context datagram endpoint and returns a handle to it.
	+ *
	+ * Results:
	+ * VMCI_SUCCESS if created, negative errno value otherwise.
	+ *
	+ * Side effects:
	+ * None.
	+ *
	+ *------------------------------------------------------------------------------
	+ */
	+
	+int
	+vmci_datagram_create_handle_priv(vmci_id resource_id, uint32_t flags,
	+ vmci_privilege_flags priv_flags, vmci_datagram_recv_cb recv_cb,
	+ void client_data, struct vmci_handle out_handle)
	+{
	+
	+ if (out_handle == NULL)
	+ return (VMCI_ERROR_INVALID_ARGS);
	+
	+ if (recv_cb == NULL) {
	+ VMCI_LOG_DEBUG(LGPFX"Client callback needed when creating "
	+ "datagram.\n");
	+ return (VMCI_ERROR_INVALID_ARGS);
	+ }
	+
	+ if (priv_flags & ~VMCI_PRIVILEGE_ALL_FLAGS)
	+ return (VMCI_ERROR_INVALID_ARGS);
	+
	+ return (datagram_create_hnd(resource_id, flags, priv_flags, recv_cb,
	+ client_data, out_handle));
	+}
	+
	+/*
	+ *------------------------------------------------------------------------------
	+ *
	+ * vmci_datagram_destroy_handle --
	+ *
	+ * Destroys a handle.
	+ *
	+ * Results:
	+ * None.
	+ *
	+ * Side effects:
	+ * None.
	+ *
	+ *------------------------------------------------------------------------------
	+ */
	+
	+int
	+vmci_datagram_destroy_handle(struct vmci_handle handle)
	+{
	+ struct datagram_entry *entry;
	+ struct vmci_resource *resource;
	+
	+ resource = vmci_resource_get(handle,
	+ VMCI_RESOURCE_TYPE_DATAGRAM);
	+ if (resource == NULL) {
	+ VMCI_LOG_DEBUG(LGPFX"Failed to destroy datagram "
	+ "(handle=0x%x:0x%x).\n", handle.context, handle.resource);
	+ return (VMCI_ERROR_NOT_FOUND);
	+ }
	+ entry = RESOURCE_CONTAINER(resource, struct datagram_entry, resource);
	+
	+ vmci_resource_remove(handle, VMCI_RESOURCE_TYPE_DATAGRAM);
	+
	+ /*
	+ * We now wait on the destroyEvent and release the reference we got
	+ * above.
	+ */
	+ vmci_wait_on_event(&entry->destroy_event, datagram_release_cb, entry);
	+
	+ /*
	+ * We know that we are now the only reference to the above entry so
	+ * can safely free it.
	+ */
	+ vmci_destroy_event(&entry->destroy_event);
	+ vmci_free_kernel_mem(entry, sizeof(*entry));
	+
	+ return (VMCI_SUCCESS);
	+}
	+
	+/*
	+ *------------------------------------------------------------------------------
	+ *
	+ * vmci_datagram_get_priv_flags_int --
	+ *
	+ * Internal utilility function with the same purpose as
	+ * vmci_datagram_get_priv_flags that also takes a context_id.
	+ *
	+ * Result:
	+ * VMCI_SUCCESS on success, VMCI_ERROR_INVALID_ARGS if handle is invalid.
	+ *
	+ * Side effects:
	+ * None.
	+ *
	+ *------------------------------------------------------------------------------
	+ */
	+
	+static int
	+vmci_datagram_get_priv_flags_int(vmci_id context_id, struct vmci_handle handle,
	+ vmci_privilege_flags *priv_flags)
	+{
	+
	+ ASSERT(priv_flags);
	+ ASSERT(context_id != VMCI_INVALID_ID);
	+
	+ if (context_id == VMCI_HOST_CONTEXT_ID) {
	+ struct datagram_entry *src_entry;
	+ struct vmci_resource *resource;
	+
	+ resource = vmci_resource_get(handle,
	+ VMCI_RESOURCE_TYPE_DATAGRAM);
	+ if (resource == NULL)
	+ return (VMCI_ERROR_INVALID_ARGS);
	+ src_entry = RESOURCE_CONTAINER(resource, struct datagram_entry,
	+ resource);
	+ *priv_flags = src_entry->priv_flags;
	+ vmci_resource_release(resource);
	+ } else if (context_id == VMCI_HYPERVISOR_CONTEXT_ID)
	+ *priv_flags = VMCI_MAX_PRIVILEGE_FLAGS;
	+ else
	+ *priv_flags = VMCI_NO_PRIVILEGE_FLAGS;
	+
	+ return (VMCI_SUCCESS);
	+}
	+
	+/*
	+ *------------------------------------------------------------------------------
	+ *
	+ * vmci_datagram_fet_priv_flags --
	+ *
	+ * Utility function that retrieves the privilege flags associated with a
	+ * given datagram handle. For hypervisor and guest endpoints, the
	+ * privileges are determined by the context ID, but for host endpoints
	+ * privileges are associated with the complete handle.
	+ *
	+ * Result:
	+ * VMCI_SUCCESS on success, VMCI_ERROR_INVALID_ARGS if handle is invalid.
	+ *
	+ * Side effects:
	+ * None.
	+ *
	+ *------------------------------------------------------------------------------
	+ */
	+
	+int
	+vmci_datagram_get_priv_flags(struct vmci_handle handle,
	+ vmci_privilege_flags *priv_flags)
	+{
	+
	+ if (priv_flags == NULL \|\| handle.context == VMCI_INVALID_ID)
	+ return (VMCI_ERROR_INVALID_ARGS);
	+
	+ return (vmci_datagram_get_priv_flags_int(handle.context, handle,
	+ priv_flags));
	+}
	+
	+/*
	+ *------------------------------------------------------------------------------
	+ *
	+ * vmci_datagram_delayed_dispatch_cb --
	+ *
	+ * Calls the specified callback in a delayed context.
	+ *
	+ * Results:
	+ * None.
	+ *
	+ * Side effects:
	+ * None.
	+ *
	+ *------------------------------------------------------------------------------
	+ */
	+
	+static void
	+vmci_datagram_delayed_dispatch_cb(void *data)
	+{
	+ struct vmci_delayed_datagram_info *dg_info;
	+
	+ dg_info = (struct vmci_delayed_datagram_info *)data;
	+
	+ ASSERT(data);
	+
	+ dg_info->entry->recv_cb(dg_info->entry->client_data, &dg_info->msg);
	+
	+ vmci_resource_release(&dg_info->entry->resource);
	+
	+ vmci_free_kernel_mem(dg_info, sizeof(*dg_info) +
	+ (size_t)dg_info->msg.payload_size);
	+}
	+
	+/*
	+ *------------------------------------------------------------------------------
	+ *
	+ * vmci_datagram_dispatch_as_guest --
	+ *
	+ * Dispatch datagram as a guest, down through the VMX and potentially to
	+ * the host.
	+ *
	+ * Result:
	+ * Number of bytes sent on success, appropriate error code otherwise.
	+ *
	+ * Side effects:
	+ * None.
	+ *
	+ *------------------------------------------------------------------------------
	+ */
	+
	+static int
	+vmci_datagram_dispatch_as_guest(struct vmci_datagram *dg)
	+{
	+ struct vmci_resource *resource;
	+ int retval;
	+
	+ resource = vmci_resource_get(dg->src, VMCI_RESOURCE_TYPE_DATAGRAM);
	+ if (NULL == resource)
	+ return VMCI_ERROR_NO_HANDLE;
	+
	+ retval = vmci_send_datagram(dg);
	+ vmci_resource_release(resource);
	+
	+ return (retval);
	+}
	+
	+/*
	+ *------------------------------------------------------------------------------
	+ *
	+ * vmci_datagram_dispatch --
	+ *
	+ * Dispatch datagram. This will determine the routing for the datagram and
	+ * dispatch it accordingly.
	+ *
	+ * Result:
	+ * Number of bytes sent on success, appropriate error code otherwise.
	+ *
	+ * Side effects:
	+ * None.
	+ *
	+ *------------------------------------------------------------------------------
	+ */
	+
	+int
	+vmci_datagram_dispatch(vmci_id context_id, struct vmci_datagram *dg)
	+{
	+
	+ ASSERT(dg);
	+ ASSERT_ON_COMPILE(sizeof(struct vmci_datagram) == 24);
	+
	+ if (VMCI_DG_SIZE(dg) > VMCI_MAX_DG_SIZE) {
	+ VMCI_LOG_DEBUG(LGPFX"Payload (size=%lu bytes) too big to send."
	+ "\n", dg->payload_size);
	+ return (VMCI_ERROR_INVALID_ARGS);
	+ }
	+
	+ return (vmci_datagram_dispatch_as_guest(dg));
	+}
	+
	+/*
	+ *------------------------------------------------------------------------------
	+ *
	+ * vmci_datagram_invoke_guest_handler --
	+ *
	+ * Invoke the handler for the given datagram. This is intended to be called
	+ * only when acting as a guest and receiving a datagram from the virtual
	+ * device.
	+ *
	+ * Result:
	+ * VMCI_SUCCESS on success, other error values on failure.
	+ *
	+ * Side effects:
	+ * None.
	+ *
	+ *------------------------------------------------------------------------------
	+ */
	+
	+int
	+vmci_datagram_invoke_guest_handler(struct vmci_datagram *dg)
	+{
	+ struct datagram_entry *dst_entry;
	+ struct vmci_resource *resource;
	+ int retval;
	+
	+ ASSERT(dg);
	+
	+ if (dg->payload_size > VMCI_MAX_DG_PAYLOAD_SIZE) {
	+ VMCI_LOG_DEBUG(LGPFX"Payload (size=%lu bytes) too large to "
	+ "deliver.\n", dg->payload_size);
	+ return (VMCI_ERROR_PAYLOAD_TOO_LARGE);
	+ }
	+
	+ resource = vmci_resource_get(dg->dst, VMCI_RESOURCE_TYPE_DATAGRAM);
	+ if (NULL == resource) {
	+ VMCI_LOG_DEBUG(LGPFX"destination (handle=0x%x:0x%x) doesn't "
	+ "exist.\n", dg->dst.context, dg->dst.resource);
	+ return (VMCI_ERROR_NO_HANDLE);
	+ }
	+
	+ dst_entry = RESOURCE_CONTAINER(resource, struct datagram_entry,
	+ resource);
	+ if (dst_entry->run_delayed) {
	+ struct vmci_delayed_datagram_info *dg_info;
	+
	+ dg_info = vmci_alloc_kernel_mem(sizeof(*dg_info) +
	+ (size_t)dg->payload_size, VMCI_MEMORY_ATOMIC);
	+ if (NULL == dg_info) {
	+ vmci_resource_release(resource);
	+ retval = VMCI_ERROR_NO_MEM;
	+ goto exit;
	+ }
	+
	+ dg_info->entry = dst_entry;
	+ memcpy(&dg_info->msg, dg, VMCI_DG_SIZE(dg));
	+
	+ retval = vmci_schedule_delayed_work(
	+ vmci_datagram_delayed_dispatch_cb, dg_info);
	+ if (retval < VMCI_SUCCESS) {
	+ VMCI_LOG_WARNING(LGPFX"Failed to schedule delayed "
	+ "work for datagram (result=%d).\n", retval);
	+ vmci_free_kernel_mem(dg_info, sizeof(*dg_info) +
	+ (size_t)dg->payload_size);
	+ vmci_resource_release(resource);
	+ dg_info = NULL;
	+ goto exit;
	+ }
	+ } else {
	+ dst_entry->recv_cb(dst_entry->client_data, dg);
	+ vmci_resource_release(resource);
	+ retval = VMCI_SUCCESS;
	+ }
	+
	+exit:
	+ return (retval);
	+}
	+
	+/*
	+ *------------------------------------------------------------------------------
	+ *
	+ * vmci_datagram_send --
	+ *
	+ * Sends the payload to the destination datagram handle.
	+ *
	+ * Results:
	+ * Returns number of bytes sent if success, or error code if failure.
	+ *
	+ * Side effects:
	+ * None.
	+ *
	+ *------------------------------------------------------------------------------
	+ */
	+
	+int
	+vmci_datagram_send(struct vmci_datagram *msg)
	+{
	+
	+ if (msg == NULL)
	+ return (VMCI_ERROR_INVALID_ARGS);
	+
	+ return (vmci_datagram_dispatch(VMCI_INVALID_ID, msg));
	+}
	+
	+/*
	+ *------------------------------------------------------------------------------
	+ *
	+ * vmci_datagram_sync --
	+ *
	+ * Use this as a synchronization point when setting globals, for example,
	+ * during device shutdown.
	+ *
	+ * Results:
	+ * None.
	+ *
	+ * Side effects:
	+ * None.
	+ *
	+ *------------------------------------------------------------------------------
	+ */
	+
	+void
	+vmci_datagram_sync(void)
	+{
	+
	+ vmci_resource_sync();
	+}
	+
	+/*
	+ *------------------------------------------------------------------------------
	+ *
	+ * vmci_datagram_check_host_capabilities --
	+ *
	+ * Verify that the host supports the resources we need. None are required
	+ * for datagrams since they are implicitly supported.
	+ *
	+ * Results:
	+ * true.
	+ *
	+ * Side effects:
	+ * None.
	+ *
	+ *------------------------------------------------------------------------------
	+ */
	+
	+bool
	+vmci_datagram_check_host_capabilities(void)
	+{
	+
	+ return (true);
	+}
	Index: sys/dev/vmware/vmci/vmci_defs.h
	===================================================================
	--- /dev/null
	+++ sys/dev/vmware/vmci/vmci_defs.h
	@@ -0,0 +1,715 @@
	+/*-
	+ * Copyright (c) 2018 VMware, Inc. All Rights Reserved.
	+ *
	+ * SPDX-License-Identifier: (BSD-2-Clause AND GPL-2.0)
	+ */
	+
	+#ifndef _VMCI_DEFS_H_
	+#define _VMCI_DEFS_H_
	+
	+#include <sys/types.h>
	+#include <machine/atomic.h>
	+
	+#include "vmci_kernel_defs.h"
	+
	+#pragma GCC diagnostic ignored "-Wcast-qual"
	+
	+/* Register offsets. */
	+#define VMCI_STATUS_ADDR 0x00
	+#define VMCI_CONTROL_ADDR 0x04
	+#define VMCI_ICR_ADDR 0x08
	+#define VMCI_IMR_ADDR 0x0c
	+#define VMCI_DATA_OUT_ADDR 0x10
	+#define VMCI_DATA_IN_ADDR 0x14
	+#define VMCI_CAPS_ADDR 0x18
	+#define VMCI_RESULT_LOW_ADDR 0x1c
	+#define VMCI_RESULT_HIGH_ADDR 0x20
	+
	+/* Status register bits. */
	+#define VMCI_STATUS_INT_ON 0x1
	+
	+/* Control register bits. */
	+#define VMCI_CONTROL_RESET 0x1
	+#define VMCI_CONTROL_INT_ENABLE 0x2
	+#define VMCI_CONTROL_INT_DISABLE 0x4
	+
	+/* Capabilities register bits. */
	+#define VMCI_CAPS_HYPERCALL 0x1
	+#define VMCI_CAPS_GUESTCALL 0x2
	+#define VMCI_CAPS_DATAGRAM 0x4
	+#define VMCI_CAPS_NOTIFICATIONS 0x8
	+
	+/* Interrupt Cause register bits. */
	+#define VMCI_ICR_DATAGRAM 0x1
	+#define VMCI_ICR_NOTIFICATION 0x2
	+
	+/* Interrupt Mask register bits. */
	+#define VMCI_IMR_DATAGRAM 0x1
	+#define VMCI_IMR_NOTIFICATION 0x2
	+
	+/* Interrupt type. */
	+typedef enum vmci_intr_type {
	+ VMCI_INTR_TYPE_INTX = 0,
	+ VMCI_INTR_TYPE_MSI = 1,
	+ VMCI_INTR_TYPE_MSIX = 2
	+} vmci_intr_type;
	+
	+/*
	+ * Maximum MSI/MSI-X interrupt vectors in the device.
	+ */
	+#define VMCI_MAX_INTRS 2
	+
	+/*
	+ * Supported interrupt vectors. There is one for each ICR value above,
	+ * but here they indicate the position in the vector array/message ID.
	+ */
	+#define VMCI_INTR_DATAGRAM 0
	+#define VMCI_INTR_NOTIFICATION 1
	+
	+/*
	+ * A single VMCI device has an upper limit of 128 MiB on the amount of
	+ * memory that can be used for queue pairs.
	+ */
	+#define VMCI_MAX_GUEST_QP_MEMORY (128 * 1024 * 1024)
	+
	+/*
	+ * We have a fixed set of resource IDs available in the VMX.
	+ * This allows us to have a very simple implementation since we statically
	+ * know how many will create datagram handles. If a new caller arrives and
	+ * we have run out of slots we can manually increment the maximum size of
	+ * available resource IDs.
	+ */
	+
	+typedef uint32_t vmci_resource;
	+
	+/* VMCI reserved hypervisor datagram resource IDs. */
	+#define VMCI_RESOURCES_QUERY 0
	+#define VMCI_GET_CONTEXT_ID 1
	+#define VMCI_SET_NOTIFY_BITMAP 2
	+#define VMCI_DOORBELL_LINK 3
	+#define VMCI_DOORBELL_UNLINK 4
	+#define VMCI_DOORBELL_NOTIFY 5
	+/*
	+ * VMCI_DATAGRAM_REQUEST_MAP and VMCI_DATAGRAM_REMOVE_MAP are
	+ * obsoleted by the removal of VM to VM communication.
	+ */
	+#define VMCI_DATAGRAM_REQUEST_MAP 6
	+#define VMCI_DATAGRAM_REMOVE_MAP 7
	+#define VMCI_EVENT_SUBSCRIBE 8
	+#define VMCI_EVENT_UNSUBSCRIBE 9
	+#define VMCI_QUEUEPAIR_ALLOC 10
	+#define VMCI_QUEUEPAIR_DETACH 11
	+/*
	+ * VMCI_VSOCK_VMX_LOOKUP was assigned to 12 for Fusion 3.0/3.1,
	+ * WS 7.0/7.1 and ESX 4.1
	+ */
	+#define VMCI_HGFS_TRANSPORT 13
	+#define VMCI_UNITY_PBRPC_REGISTER 14
	+/*
	+ * This resource is used for VMCI socket control packets sent to the
	+ * hypervisor (CID 0) because RID 1 is already reserved.
	+ */
	+#define VSOCK_PACKET_HYPERVISOR_RID 15
	+#define VMCI_RESOURCE_MAX 16
	+/*
	+ * The core VMCI device functionality only requires the resource IDs of
	+ * VMCI_QUEUEPAIR_DETACH and below.
	+ */
	+#define VMCI_CORE_DEVICE_RESOURCE_MAX VMCI_QUEUEPAIR_DETACH
	+
	+/*
	+ * VMCI reserved host datagram resource IDs.
	+ * vsock control channel has resource id 1.
	+ */
	+#define VMCI_DVFILTER_DATA_PATH_DATAGRAM 2
	+
	+/* VMCI Ids. */
	+typedef uint32_t vmci_id;
	+
	+struct vmci_id_range {
	+ int8_t action; /* VMCI_FA_X, for use in filters. */
	+ vmci_id begin; /* Beginning of range. */
	+ vmci_id end; /* End of range. */
	+};
	+
	+struct vmci_handle {
	+ vmci_id context;
	+ vmci_id resource;
	+};
	+
	+static inline struct vmci_handle
	+VMCI_MAKE_HANDLE(vmci_id cid, vmci_id rid)
	+{
	+ struct vmci_handle h;
	+
	+ h.context = cid;
	+ h.resource = rid;
	+ return (h);
	+}
	+
	+#define VMCI_HANDLE_TO_CONTEXT_ID(_handle) \
	+ ((_handle).context)
	+#define VMCI_HANDLE_TO_RESOURCE_ID(_handle) \
	+ ((_handle).resource)
	+#define VMCI_HANDLE_EQUAL(_h1, _h2) \
	+ ((_h1).context == (_h2).context && (_h1).resource == (_h2).resource)
	+
	+#define VMCI_INVALID_ID 0xFFFFFFFF
	+static const struct vmci_handle VMCI_INVALID_HANDLE = {VMCI_INVALID_ID,
	+ VMCI_INVALID_ID};
	+
	+#define VMCI_HANDLE_INVALID(_handle) \
	+ VMCI_HANDLE_EQUAL((_handle), VMCI_INVALID_HANDLE)
	+
	+/*
	+ * The below defines can be used to send anonymous requests.
	+ * This also indicates that no response is expected.
	+ */
	+#define VMCI_ANON_SRC_CONTEXT_ID \
	+ VMCI_INVALID_ID
	+#define VMCI_ANON_SRC_RESOURCE_ID \
	+ VMCI_INVALID_ID
	+#define VMCI_ANON_SRC_HANDLE \
	+ VMCI_MAKE_HANDLE(VMCI_ANON_SRC_CONTEXT_ID, \
	+ VMCI_ANON_SRC_RESOURCE_ID)
	+
	+/* The lowest 16 context ids are reserved for internal use. */
	+#define VMCI_RESERVED_CID_LIMIT 16
	+
	+/*
	+ * Hypervisor context id, used for calling into hypervisor
	+ * supplied services from the VM.
	+ */
	+#define VMCI_HYPERVISOR_CONTEXT_ID 0
	+
	+/*
	+ * Well-known context id, a logical context that contains a set of
	+ * well-known services. This context ID is now obsolete.
	+ */
	+#define VMCI_WELL_KNOWN_CONTEXT_ID 1
	+
	+/*
	+ * Context ID used by host endpoints.
	+ */
	+#define VMCI_HOST_CONTEXT_ID 2
	+#define VMCI_HOST_CONTEXT_INVALID_EVENT ((uintptr_t)~0)
	+
	+#define VMCI_CONTEXT_IS_VM(_cid) \
	+ (VMCI_INVALID_ID != _cid && _cid > VMCI_HOST_CONTEXT_ID)
	+
	+/*
	+ * The VMCI_CONTEXT_RESOURCE_ID is used together with VMCI_MAKE_HANDLE to make
	+ * handles that refer to a specific context.
	+ */
	+#define VMCI_CONTEXT_RESOURCE_ID 0
	+
	+/*
	+ *------------------------------------------------------------------------------
	+ *
	+ * VMCI error codes.
	+ *
	+ *------------------------------------------------------------------------------
	+ */
	+
	+#define VMCI_SUCCESS_QUEUEPAIR_ATTACH 5
	+#define VMCI_SUCCESS_QUEUEPAIR_CREATE 4
	+#define VMCI_SUCCESS_LAST_DETACH 3
	+#define VMCI_SUCCESS_ACCESS_GRANTED 2
	+#define VMCI_SUCCESS_ENTRY_DEAD 1
	+#define VMCI_SUCCESS 0LL
	+#define VMCI_ERROR_INVALID_RESOURCE (-1)
	+#define VMCI_ERROR_INVALID_ARGS (-2)
	+#define VMCI_ERROR_NO_MEM (-3)
	+#define VMCI_ERROR_DATAGRAM_FAILED (-4)
	+#define VMCI_ERROR_MORE_DATA (-5)
	+#define VMCI_ERROR_NO_MORE_DATAGRAMS (-6)
	+#define VMCI_ERROR_NO_ACCESS (-7)
	+#define VMCI_ERROR_NO_HANDLE (-8)
	+#define VMCI_ERROR_DUPLICATE_ENTRY (-9)
	+#define VMCI_ERROR_DST_UNREACHABLE (-10)
	+#define VMCI_ERROR_PAYLOAD_TOO_LARGE (-11)
	+#define VMCI_ERROR_INVALID_PRIV (-12)
	+#define VMCI_ERROR_GENERIC (-13)
	+#define VMCI_ERROR_PAGE_ALREADY_SHARED (-14)
	+#define VMCI_ERROR_CANNOT_SHARE_PAGE (-15)
	+#define VMCI_ERROR_CANNOT_UNSHARE_PAGE (-16)
	+#define VMCI_ERROR_NO_PROCESS (-17)
	+#define VMCI_ERROR_NO_DATAGRAM (-18)
	+#define VMCI_ERROR_NO_RESOURCES (-19)
	+#define VMCI_ERROR_UNAVAILABLE (-20)
	+#define VMCI_ERROR_NOT_FOUND (-21)
	+#define VMCI_ERROR_ALREADY_EXISTS (-22)
	+#define VMCI_ERROR_NOT_PAGE_ALIGNED (-23)
	+#define VMCI_ERROR_INVALID_SIZE (-24)
	+#define VMCI_ERROR_REGION_ALREADY_SHARED (-25)
	+#define VMCI_ERROR_TIMEOUT (-26)
	+#define VMCI_ERROR_DATAGRAM_INCOMPLETE (-27)
	+#define VMCI_ERROR_INCORRECT_IRQL (-28)
	+#define VMCI_ERROR_EVENT_UNKNOWN (-29)
	+#define VMCI_ERROR_OBSOLETE (-30)
	+#define VMCI_ERROR_QUEUEPAIR_MISMATCH (-31)
	+#define VMCI_ERROR_QUEUEPAIR_NOTSET (-32)
	+#define VMCI_ERROR_QUEUEPAIR_NOTOWNER (-33)
	+#define VMCI_ERROR_QUEUEPAIR_NOTATTACHED (-34)
	+#define VMCI_ERROR_QUEUEPAIR_NOSPACE (-35)
	+#define VMCI_ERROR_QUEUEPAIR_NODATA (-36)
	+#define VMCI_ERROR_BUSMEM_INVALIDATION (-37)
	+#define VMCI_ERROR_MODULE_NOT_LOADED (-38)
	+#define VMCI_ERROR_DEVICE_NOT_FOUND (-39)
	+#define VMCI_ERROR_QUEUEPAIR_NOT_READY (-40)
	+#define VMCI_ERROR_WOULD_BLOCK (-41)
	+
	+/* VMCI clients should return error code withing this range */
	+#define VMCI_ERROR_CLIENT_MIN (-500)
	+#define VMCI_ERROR_CLIENT_MAX (-550)
	+
	+/* Internal error codes. */
	+#define VMCI_SHAREDMEM_ERROR_BAD_CONTEXT (-1000)
	+
	+#define VMCI_PATH_MAX 256
	+
	+/* VMCI reserved events. */
	+typedef uint32_t vmci_event_type;
	+
	+#define VMCI_EVENT_CTX_ID_UPDATE 0 // Only applicable to guest
	+ // endpoints
	+#define VMCI_EVENT_CTX_REMOVED 1 // Applicable to guest and host
	+#define VMCI_EVENT_QP_RESUMED 2 // Only applicable to guest
	+ // endpoints
	+#define VMCI_EVENT_QP_PEER_ATTACH 3 // Applicable to guest, host
	+ // and VMX
	+#define VMCI_EVENT_QP_PEER_DETACH 4 // Applicable to guest, host
	+ // and VMX
	+#define VMCI_EVENT_MEM_ACCESS_ON 5 // Applicable to VMX and vmk. On
	+ // vmk, this event has the
	+ // Context payload type
	+#define VMCI_EVENT_MEM_ACCESS_OFF 6 // Applicable to VMX and vmk.
	+ // Same as above for the payload
	+ // type
	+#define VMCI_EVENT_GUEST_PAUSED 7 // Applicable to vmk. This
	+ // event has the Context
	+ // payload type
	+#define VMCI_EVENT_GUEST_UNPAUSED 8 // Applicable to vmk. Same as
	+ // above for the payload type.
	+#define VMCI_EVENT_MAX 9
	+
	+/*
	+ * Of the above events, a few are reserved for use in the VMX, and other
	+ * endpoints (guest and host kernel) should not use them. For the rest of the
	+ * events, we allow both host and guest endpoints to subscribe to them, to
	+ * maintain the same API for host and guest endpoints.
	+ */
	+
	+#define VMCI_EVENT_VALID_VMX(_event) \
	+ (_event == VMCI_EVENT_QP_PEER_ATTACH \|\| \
	+ _event == VMCI_EVENT_QP_PEER_DETACH \|\| \
	+ _event == VMCI_EVENT_MEM_ACCESS_ON \|\| \
	+ _event == VMCI_EVENT_MEM_ACCESS_OFF)
	+
	+#define VMCI_EVENT_VALID(_event) \
	+ (_event < VMCI_EVENT_MAX && \
	+ _event != VMCI_EVENT_MEM_ACCESS_ON && \
	+ _event != VMCI_EVENT_MEM_ACCESS_OFF && \
	+ _event != VMCI_EVENT_GUEST_PAUSED && \
	+ _event != VMCI_EVENT_GUEST_UNPAUSED)
	+
	+/* Reserved guest datagram resource ids. */
	+#define VMCI_EVENT_HANDLER 0
	+
	+/*
	+ * VMCI coarse-grained privileges (per context or host process/endpoint. An
	+ * entity with the restricted flag is only allowed to interact with the
	+ * hypervisor and trusted entities.
	+ */
	+typedef uint32_t vmci_privilege_flags;
	+
	+#define VMCI_PRIVILEGE_FLAG_RESTRICTED 0x01
	+#define VMCI_PRIVILEGE_FLAG_TRUSTED 0x02
	+#define VMCI_PRIVILEGE_ALL_FLAGS \
	+ (VMCI_PRIVILEGE_FLAG_RESTRICTED \| VMCI_PRIVILEGE_FLAG_TRUSTED)
	+#define VMCI_NO_PRIVILEGE_FLAGS 0x00
	+#define VMCI_DEFAULT_PROC_PRIVILEGE_FLAGS VMCI_NO_PRIVILEGE_FLAGS
	+#define VMCI_LEAST_PRIVILEGE_FLAGS VMCI_PRIVILEGE_FLAG_RESTRICTED
	+#define VMCI_MAX_PRIVILEGE_FLAGS VMCI_PRIVILEGE_FLAG_TRUSTED
	+
	+/* 0 through VMCI_RESERVED_RESOURCE_ID_MAX are reserved. */
	+#define VMCI_RESERVED_RESOURCE_ID_MAX 1023
	+
	+#define VMCI_DOMAIN_NAME_MAXLEN 32
	+
	+#define VMCI_LGPFX "vmci: "
	+
	+/*
	+ * struct vmci_queue_header
	+ *
	+ * A Queue cannot stand by itself as designed. Each Queue's header contains a
	+ * pointer into itself (the producer_tail) and into its peer (consumer_head).
	+ * The reason for the separation is one of accessibility: Each end-point can
	+ * modify two things: where the next location to enqueue is within its produce_q
	+ * (producer_tail); and where the next dequeue location is in its consume_q
	+ * (consumer_head).
	+ *
	+ * An end-point cannot modify the pointers of its peer (guest to guest; NOTE
	+ * that in the host both queue headers are mapped r/w). But, each end-point
	+ * needs read access to both Queue header structures in order to determine how
	+ * much space is used (or left) in the Queue. This is because for an end-point
	+ * to know how full its produce_q is, it needs to use the consumer_head that
	+ * points into the produce_q but -that- consumer_head is in the Queue header
	+ * for that end-points consume_q.
	+ *
	+ * Thoroughly confused? Sorry.
	+ *
	+ * producer_tail: the point to enqueue new entrants. When you approach a line
	+ * in a store, for example, you walk up to the tail.
	+ *
	+ * consumer_head: the point in the queue from which the next element is
	+ * dequeued. In other words, who is next in line is he who is at the head of
	+ * the line.
	+ *
	+ * Also, producer_tail points to an empty byte in the Queue, whereas
	+ * consumer_head points to a valid byte of data (unless producer_tail ==
	+ * consumer_head in which case consumerHead does not point to a valid byte of
	+ * data).
	+ *
	+ * For a queue of buffer 'size' bytes, the tail and head pointers will be in
	+ * the range [0, size-1].
	+ *
	+ * If produce_q_header->producer_tail == consume_q_header->consumer_head then
	+ * the produce_q is empty.
	+ */
	+struct vmci_queue_header {
	+ /* All fields are 64bit and aligned. */
	+ struct vmci_handle handle; /* Identifier. */
	+ volatile uint64_t producer_tail; /* Offset in this queue. */
	+ volatile uint64_t consumer_head; /* Offset in peer queue. */
	+};
	+
	+
	+/*
	+ * If one client of a QueuePair is a 32bit entity, we restrict the QueuePair
	+ * size to be less than 4GB, and use 32bit atomic operations on the head and
	+ * tail pointers. 64bit atomic read on a 32bit entity involves cmpxchg8b which
	+ * is an atomic read-modify-write. This will cause traces to fire when a 32bit
	+ * consumer tries to read the producer's tail pointer, for example, because the
	+ * consumer has read-only access to the producer's tail pointer.
	+ *
	+ * We provide the following macros to invoke 32bit or 64bit atomic operations
	+ * based on the architecture the code is being compiled on.
	+ */
	+
	+#ifdef __x86_64__
	+#define QP_MAX_QUEUE_SIZE_ARCH CONST64U(0xffffffffffffffff)
	+#define qp_atomic_read_offset(x) atomic_load_64(x)
	+#define qp_atomic_write_offset(x, y) atomic_store_64(x, y)
	+#else /* __x86_64__ */
	+ /*
	+ * Wrappers below are being used because atomic_store_<type> operates
	+ * on a specific <type>. Likewise for atomic_load_<type>
	+ */
	+
	+ static inline uint32_t
	+ type_safe_atomic_read_32(void *var)
	+ {
	+ return (atomic_load_32((volatile uint32_t *)(var)));
	+ }
	+
	+ static inline void
	+ type_safe_atomic_write_32(void *var, uint32_t val)
	+ {
	+ atomic_store_32((volatile uint32_t *)(var), (uint32_t)(val));
	+ }
	+
	+#define QP_MAX_QUEUE_SIZE_ARCH CONST64U(0xffffffff)
	+#define qp_atomic_read_offset(x) type_safe_atomic_read_32((void *)(x))
	+#define qp_atomic_write_offset(x, y) \
	+ type_safe_atomic_write_32((void *)(x), (uint32_t)(y))
	+#endif /* __x86_64__ */
	+
	+/*
	+ *------------------------------------------------------------------------------
	+ *
	+ * qp_add_pointer --
	+ *
	+ * Helper to add a given offset to a head or tail pointer. Wraps the value
	+ * of the pointer around the max size of the queue.
	+ *
	+ * Results:
	+ * None.
	+ *
	+ * Side effects:
	+ * None.
	+ *
	+ *------------------------------------------------------------------------------
	+ */
	+
	+static inline void
	+qp_add_pointer(volatile uint64_t *var, size_t add, uint64_t size)
	+{
	+ uint64_t new_val = qp_atomic_read_offset(var);
	+
	+ if (new_val >= size - add)
	+ new_val -= size;
	+
	+ new_val += add;
	+ qp_atomic_write_offset(var, new_val);
	+}
	+
	+/*
	+ *------------------------------------------------------------------------------
	+ *
	+ * vmci_queue_header_producer_tail --
	+ *
	+ * Helper routine to get the Producer Tail from the supplied queue.
	+ *
	+ * Results:
	+ * The contents of the queue's producer tail.
	+ *
	+ * Side effects:
	+ * None.
	+ *
	+ *------------------------------------------------------------------------------
	+ */
	+
	+static inline uint64_t
	+vmci_queue_header_producer_tail(const struct vmci_queue_header *q_header)
	+{
	+ struct vmci_queue_header qh = (struct vmci_queue_header )q_header;
	+ return (qp_atomic_read_offset(&qh->producer_tail));
	+}
	+
	+/*
	+ *------------------------------------------------------------------------------
	+ *
	+ * vmci_queue_header_consumer_head --
	+ *
	+ * Helper routine to get the Consumer Head from the supplied queue.
	+ *
	+ * Results:
	+ * The contents of the queue's consumer tail.
	+ *
	+ * Side effects:
	+ * None.
	+ *
	+ *------------------------------------------------------------------------------
	+ */
	+
	+static inline uint64_t
	+vmci_queue_header_consumer_head(const struct vmci_queue_header *q_header)
	+{
	+ struct vmci_queue_header qh = (struct vmci_queue_header )q_header;
	+ return (qp_atomic_read_offset(&qh->consumer_head));
	+}
	+
	+/*
	+ *------------------------------------------------------------------------------
	+ *
	+ * vmci_queue_header_add_producer_tail --
	+ *
	+ * Helper routine to increment the Producer Tail. Fundamentally,
	+ * qp_add_pointer() is used to manipulate the tail itself.
	+ *
	+ * Results:
	+ * None.
	+ *
	+ * Side effects:
	+ * None.
	+ *
	+ *------------------------------------------------------------------------------
	+ */
	+
	+static inline void
	+vmci_queue_header_add_producer_tail(struct vmci_queue_header *q_header,
	+ size_t add, uint64_t queue_size)
	+{
	+
	+ qp_add_pointer(&q_header->producer_tail, add, queue_size);
	+}
	+
	+/*
	+ *------------------------------------------------------------------------------
	+ *
	+ * vmci_queue_header_add_consumer_head --
	+ *
	+ * Helper routine to increment the Consumer Head. Fundamentally,
	+ * qp_add_pointer() is used to manipulate the head itself.
	+ *
	+ * Results:
	+ * None.
	+ *
	+ * Side effects:
	+ * None.
	+ *
	+ *------------------------------------------------------------------------------
	+ */
	+
	+static inline void
	+vmci_queue_header_add_consumer_head(struct vmci_queue_header *q_header,
	+ size_t add, uint64_t queue_size)
	+{
	+
	+ qp_add_pointer(&q_header->consumer_head, add, queue_size);
	+}
	+
	+/*
	+ *------------------------------------------------------------------------------
	+ *
	+ * vmci_queue_header_get_pointers --
	+ *
	+ * Helper routine for getting the head and the tail pointer for a queue.
	+ * Both the VMCIQueues are needed to get both the pointers for one queue.
	+ *
	+ * Results:
	+ * None.
	+ *
	+ * Side effects:
	+ * None.
	+ *
	+ *------------------------------------------------------------------------------
	+ */
	+
	+static inline void
	+vmci_queue_header_get_pointers(const struct vmci_queue_header *produce_q_header,
	+ const struct vmci_queue_header consume_q_header, uint64_t producer_tail,
	+ uint64_t *consumer_head)
	+{
	+
	+ if (producer_tail)
	+ *producer_tail =
	+ vmci_queue_header_producer_tail(produce_q_header);
	+
	+ if (consumer_head)
	+ *consumer_head =
	+ vmci_queue_header_consumer_head(consume_q_header);
	+}
	+
	+/*
	+ *------------------------------------------------------------------------------
	+ *
	+ * vmci_queue_header_reset_pointers --
	+ *
	+ * Reset the tail pointer (of "this" queue) and the head pointer (of "peer"
	+ * queue).
	+ *
	+ * Results:
	+ * None.
	+ *
	+ * Side effects:
	+ * None.
	+ *
	+ *------------------------------------------------------------------------------
	+ */
	+
	+static inline void
	+vmci_queue_header_reset_pointers(struct vmci_queue_header *q_header)
	+{
	+
	+ qp_atomic_write_offset(&q_header->producer_tail, CONST64U(0));
	+ qp_atomic_write_offset(&q_header->consumer_head, CONST64U(0));
	+}
	+
	+/*
	+ *------------------------------------------------------------------------------
	+ *
	+ * vmci_queue_header_init --
	+ *
	+ * Initializes a queue's state (head & tail pointers).
	+ *
	+ * Results:
	+ * None.
	+ *
	+ * Side effects:
	+ * None.
	+ *
	+ *------------------------------------------------------------------------------
	+ */
	+
	+static inline void
	+vmci_queue_header_init(struct vmci_queue_header *q_header,
	+ const struct vmci_handle handle)
	+{
	+
	+ q_header->handle = handle;
	+ vmci_queue_header_reset_pointers(q_header);
	+}
	+
	+/*
	+ *------------------------------------------------------------------------------
	+ *
	+ * vmci_queue_header_free_space --
	+ *
	+ * Finds available free space in a produce queue to enqueue more data or
	+ * reports an error if queue pair corruption is detected.
	+ *
	+ * Results:
	+ * Free space size in bytes or an error code.
	+ *
	+ * Side effects:
	+ * None.
	+ *
	+ *------------------------------------------------------------------------------
	+ */
	+
	+static inline int64_t
	+vmci_queue_header_free_space(const struct vmci_queue_header *produce_q_header,
	+ const struct vmci_queue_header *consume_q_header,
	+ const uint64_t produce_q_size)
	+{
	+ uint64_t free_space;
	+ uint64_t head;
	+ uint64_t tail;
	+
	+ tail = vmci_queue_header_producer_tail(produce_q_header);
	+ head = vmci_queue_header_consumer_head(consume_q_header);
	+
	+ if (tail >= produce_q_size \|\| head >= produce_q_size)
	+ return (VMCI_ERROR_INVALID_SIZE);
	+
	+ /*
	+ * Deduct 1 to avoid tail becoming equal to head which causes ambiguity.
	+ * If head and tail are equal it means that the queue is empty.
	+ */
	+
	+ if (tail >= head)
	+ free_space = produce_q_size - (tail - head) - 1;
	+ else
	+ free_space = head - tail - 1;
	+
	+ return (free_space);
	+}
	+
	+/*
	+ *------------------------------------------------------------------------------
	+ *
	+ * vmci_queue_header_buf_ready --
	+ *
	+ * vmci_queue_header_free_space() does all the heavy lifting of determing
	+ * the number of free bytes in a Queue. This routine, then subtracts that
	+ * size from the full size of the Queue so the caller knows how many bytes
	+ * are ready to be dequeued.
	+ *
	+ * Results:
	+ * On success, available data size in bytes (up to MAX_INT64).
	+ * On failure, appropriate error code.
	+ *
	+ * Side effects:
	+ * None.
	+ *
	+ *------------------------------------------------------------------------------
	+ */
	+
	+static inline int64_t
	+vmci_queue_header_buf_ready(const struct vmci_queue_header *consume_q_header,
	+ const struct vmci_queue_header *produce_q_header,
	+ const uint64_t consume_q_size)
	+{
	+ int64_t free_space;
	+
	+ free_space = vmci_queue_header_free_space(consume_q_header,
	+ produce_q_header, consume_q_size);
	+ if (free_space < VMCI_SUCCESS)
	+ return (free_space);
	+ else
	+ return (consume_q_size - free_space - 1);
	+}
	+
	+#endif /* !_VMCI_DEFS_H_ */
	Index: sys/dev/vmware/vmci/vmci_doorbell.h
	===================================================================
	--- /dev/null
	+++ sys/dev/vmware/vmci/vmci_doorbell.h
	@@ -0,0 +1,27 @@
	+/*-
	+ * Copyright (c) 2018 VMware, Inc. All Rights Reserved.
	+ *
	+ * SPDX-License-Identifier: (BSD-2-Clause AND GPL-2.0)
	+ */
	+
	+/* Internal functions in the VMCI Doorbell API. */
	+
	+#ifndef _VMCI_DOORBELL_H_
	+#define _VMCI_DOORBELL_H_
	+
	+#include "vmci_defs.h"
	+
	+int vmci_doorbell_init(void);
	+void vmci_doorbell_exit(void);
	+void vmci_doorbell_hibernate(bool enter_hibernate);
	+void vmci_doorbell_sync(void);
	+
	+int vmci_doorbell_host_context_notify(vmci_id src_CID,
	+ struct vmci_handle handle);
	+int vmci_doorbell_get_priv_flags(struct vmci_handle handle,
	+ vmci_privilege_flags *priv_flags);
	+
	+bool vmci_register_notification_bitmap(PPN bitmap_PPN);
	+void vmci_scan_notification_bitmap(uint8_t *bitmap);
	+
	+#endif /* !_VMCI_DOORBELL_H_ */
	Index: sys/dev/vmware/vmci/vmci_doorbell.c
	===================================================================
	--- /dev/null
	+++ sys/dev/vmware/vmci/vmci_doorbell.c
	@@ -0,0 +1,906 @@
	+/*-
	+ * Copyright (c) 2018 VMware, Inc. All Rights Reserved.
	+ *
	+ * SPDX-License-Identifier: (BSD-2-Clause AND GPL-2.0)
	+ */
	+
	+/* This file implements the VMCI doorbell API. */
	+
	+#include <sys/types.h>
	+
	+#include "vmci_doorbell.h"
	+#include "vmci_driver.h"
	+#include "vmci_kernel_api.h"
	+#include "vmci_kernel_defs.h"
	+#include "vmci_resource.h"
	+#include "vmci_utils.h"
	+
	+#define LGPFX "vmci_doorbell: "
	+
	+#define VMCI_DOORBELL_INDEX_TABLE_SIZE 64
	+#define VMCI_DOORBELL_HASH(_idx) \
	+ vmci_hash_id((_idx), VMCI_DOORBELL_INDEX_TABLE_SIZE)
	+
	+/* Describes a doorbell notification handle allocated by the host. */
	+struct vmci_doorbell_entry {
	+ struct vmci_resource resource;
	+ uint32_t idx;
	+ vmci_list_item(vmci_doorbell_entry) idx_list_item;
	+ vmci_privilege_flags priv_flags;
	+ bool is_doorbell;
	+ bool run_delayed;
	+ vmci_callback notify_cb;
	+ void *client_data;
	+ vmci_event destroy_event;
	+ volatile int active;
	+};
	+
	+struct vmci_doorbell_index_table {
	+ vmci_lock lock;
	+ vmci_list(vmci_doorbell_entry) entries[VMCI_DOORBELL_INDEX_TABLE_SIZE];
	+};
	+
	+/* The VMCI index table keeps track of currently registered doorbells. */
	+static struct vmci_doorbell_index_table vmci_doorbell_it;
	+
	+/*
	+ * The max_notify_idx is one larger than the currently known bitmap index in
	+ * use, and is used to determine how much of the bitmap needs to be scanned.
	+ */
	+static uint32_t max_notify_idx;
	+
	+/*
	+ * The notify_idx_count is used for determining whether there are free entries
	+ * within the bitmap (if notify_idx_count + 1 < max_notify_idx).
	+ */
	+static uint32_t notify_idx_count;
	+
	+/*
	+ * The last_notify_idx_reserved is used to track the last index handed out - in
	+ * the case where multiple handles share a notification index, we hand out
	+ * indexes round robin based on last_notify_idx_reserved.
	+ */
	+static uint32_t last_notify_idx_reserved;
	+
	+/* This is a one entry cache used to by the index allocation. */
	+static uint32_t last_notify_idx_released = PAGE_SIZE;
	+
	+static void vmci_doorbell_free_cb(void *client_data);
	+static int vmci_doorbell_release_cb(void *client_data);
	+static void vmci_doorbell_delayed_dispatch_cb(void *data);
	+
	+/*
	+ *------------------------------------------------------------------------------
	+ *
	+ * vmci_doorbell_init --
	+ *
	+ * General init code.
	+ *
	+ * Result:
	+ * VMCI_SUCCESS on success, lock allocation error otherwise.
	+ *
	+ * Side effects:
	+ * None.
	+ *
	+ *------------------------------------------------------------------------------
	+ */
	+
	+int
	+vmci_doorbell_init(void)
	+{
	+ uint32_t bucket;
	+
	+ for (bucket = 0; bucket < ARRAYSIZE(vmci_doorbell_it.entries);
	+ ++bucket)
	+ vmci_list_init(&vmci_doorbell_it.entries[bucket]);
	+
	+ return (vmci_init_lock(&vmci_doorbell_it.lock,
	+ "VMCI Doorbell index table lock"));
	+}
	+
	+/*
	+ *------------------------------------------------------------------------------
	+ *
	+ * vmci_doorbell_exit --
	+ *
	+ * General exit code.
	+ *
	+ * Result:
	+ * None.
	+ *
	+ * Side effects:
	+ * None.
	+ *
	+ *------------------------------------------------------------------------------
	+ */
	+
	+void
	+vmci_doorbell_exit(void)
	+{
	+
	+ vmci_cleanup_lock(&vmci_doorbell_it.lock);
	+}
	+
	+/*
	+ *------------------------------------------------------------------------------
	+ *
	+ * vmci_doorbell_free_cb --
	+ *
	+ * Callback to free doorbell entry structure when resource is no longer used,
	+ * i.e. the reference count reached 0. The entry is freed in
	+ * vmci_doorbell_destroy(), which is waiting on the signal that gets fired
	+ * here.
	+ *
	+ * Result:
	+ * None.
	+ *
	+ * Side effects:
	+ * Signals VMCI event.
	+ *
	+ *------------------------------------------------------------------------------
	+ */
	+
	+static void
	+vmci_doorbell_free_cb(void *client_data)
	+{
	+ struct vmci_doorbell_entry *entry;
	+
	+ entry = (struct vmci_doorbell_entry *)client_data;
	+ ASSERT(entry);
	+ vmci_signal_event(&entry->destroy_event);
	+}
	+
	+/*
	+ *------------------------------------------------------------------------------
	+ *
	+ * vmci_doorbell_release_cb --
	+ *
	+ * Callback to release the resource reference. It is called by the
	+ * vmci_wait_on_event function before it blocks.
	+ *
	+ * Result:
	+ * Always 0.
	+ *
	+ * Side effects:
	+ * None.
	+ *
	+ *------------------------------------------------------------------------------
	+ */
	+
	+static int
	+vmci_doorbell_release_cb(void *client_data)
	+{
	+ struct vmci_doorbell_entry *entry;
	+
	+ entry = (struct vmci_doorbell_entry *)client_data;
	+ ASSERT(entry);
	+ vmci_resource_release(&entry->resource);
	+ return (0);
	+}
	+
	+/*
	+ *------------------------------------------------------------------------------
	+ *
	+ * vmci_doorbell_get_priv_flags --
	+ *
	+ * Utility function that retrieves the privilege flags associated with a
	+ * given doorbell handle. For guest endpoints, the privileges are determined
	+ * by the context ID, but for host endpoints privileges are associated with
	+ * the complete handle. Hypervisor endpoints are not yet supported.
	+ *
	+ * Result:
	+ * VMCI_SUCCESS on success,
	+ * VMCI_ERROR_NOT_FOUND if handle isn't found,
	+ * VMCI_ERROR_INVALID_ARGS if handle is invalid.
	+ *
	+ * Side effects:
	+ * None.
	+ *
	+ *------------------------------------------------------------------------------
	+ */
	+
	+int
	+vmci_doorbell_get_priv_flags(struct vmci_handle handle,
	+ vmci_privilege_flags *priv_flags)
	+{
	+
	+ if (priv_flags == NULL \|\| handle.context == VMCI_INVALID_ID)
	+ return (VMCI_ERROR_INVALID_ARGS);
	+
	+ if (handle.context == VMCI_HOST_CONTEXT_ID) {
	+ struct vmci_doorbell_entry *entry;
	+ struct vmci_resource *resource;
	+
	+ resource = vmci_resource_get(handle,
	+ VMCI_RESOURCE_TYPE_DOORBELL);
	+ if (resource == NULL)
	+ return (VMCI_ERROR_NOT_FOUND);
	+ entry = RESOURCE_CONTAINER(
	+ resource, struct vmci_doorbell_entry, resource);
	+ *priv_flags = entry->priv_flags;
	+ vmci_resource_release(resource);
	+ } else if (handle.context == VMCI_HYPERVISOR_CONTEXT_ID) {
	+ /* Hypervisor endpoints for notifications are not supported. */
	+ return (VMCI_ERROR_INVALID_ARGS);
	+ } else
	+ *priv_flags = VMCI_NO_PRIVILEGE_FLAGS;
	+
	+ return (VMCI_SUCCESS);
	+}
	+
	+/*
	+ *------------------------------------------------------------------------------
	+ *
	+ * vmci_doorbell_index_table_find --
	+ *
	+ * Find doorbell entry by bitmap index.
	+ *
	+ * Results:
	+ * Entry if found, NULL if not.
	+ *
	+ * Side effects:
	+ * None.
	+ *
	+ *------------------------------------------------------------------------------
	+ */
	+
	+static struct vmci_doorbell_entry *
	+vmci_doorbell_index_table_find(uint32_t idx)
	+{
	+ struct vmci_doorbell_entry *iter;
	+ uint32_t bucket;
	+
	+ bucket = VMCI_DOORBELL_HASH(idx);
	+
	+ vmci_list_scan(iter, &vmci_doorbell_it.entries[bucket], idx_list_item) {
	+ if (idx == iter->idx)
	+ return (iter);
	+ }
	+
	+ return (NULL);
	+}
	+
	+/*
	+ *------------------------------------------------------------------------------
	+ *
	+ * vmci_doorbell_index_table_add --
	+ *
	+ * Add the given entry to the index table. This will hold() the entry's
	+ * resource so that the entry is not deleted before it is removed from the
	+ * table.
	+ *
	+ * Results:
	+ * None.
	+ *
	+ * Side effects:
	+ * None.
	+ *
	+ *------------------------------------------------------------------------------
	+ */
	+
	+static void
	+vmci_doorbell_index_table_add(struct vmci_doorbell_entry *entry)
	+{
	+ uint32_t bucket;
	+ uint32_t new_notify_idx;
	+
	+ ASSERT(entry);
	+
	+ vmci_resource_hold(&entry->resource);
	+
	+ vmci_grab_lock_bh(&vmci_doorbell_it.lock);
	+
	+ /*
	+ * Below we try to allocate an index in the notification bitmap with
	+ * "not too much" sharing between resources. If we use less that the
	+ * full bitmap, we either add to the end if there are no unused flags
	+ * within the currently used area, or we search for unused ones. If we
	+ * use the full bitmap, we allocate the index round robin.
	+ */
	+
	+ if (max_notify_idx < PAGE_SIZE \|\| notify_idx_count < PAGE_SIZE) {
	+ if (last_notify_idx_released < max_notify_idx &&
	+ !vmci_doorbell_index_table_find(last_notify_idx_released)) {
	+ new_notify_idx = last_notify_idx_released;
	+ last_notify_idx_released = PAGE_SIZE;
	+ } else {
	+ bool reused = false;
	+ new_notify_idx = last_notify_idx_reserved;
	+ if (notify_idx_count + 1 < max_notify_idx) {
	+ do {
	+ if (!vmci_doorbell_index_table_find(
	+ new_notify_idx)) {
	+ reused = true;
	+ break;
	+ }
	+ new_notify_idx = (new_notify_idx + 1) %
	+ max_notify_idx;
	+ } while (new_notify_idx !=
	+ last_notify_idx_released);
	+ }
	+ if (!reused) {
	+ new_notify_idx = max_notify_idx;
	+ max_notify_idx++;
	+ }
	+ }
	+ } else {
	+ new_notify_idx = (last_notify_idx_reserved + 1) % PAGE_SIZE;
	+ }
	+ last_notify_idx_reserved = new_notify_idx;
	+ notify_idx_count++;
	+
	+ entry->idx = new_notify_idx;
	+ bucket = VMCI_DOORBELL_HASH(entry->idx);
	+ vmci_list_insert(&vmci_doorbell_it.entries[bucket], entry,
	+ idx_list_item);
	+
	+ vmci_release_lock_bh(&vmci_doorbell_it.lock);
	+}
	+
	+/*
	+ *------------------------------------------------------------------------------
	+ *
	+ * vmci_doorbell_index_table_remove --
	+ *
	+ * Remove the given entry from the index table. This will release() the
	+ * entry's resource.
	+ *
	+ * Results:
	+ * None.
	+ *
	+ * Side effects:
	+ * None.
	+ *
	+ *------------------------------------------------------------------------------
	+ */
	+
	+static void
	+vmci_doorbell_index_table_remove(struct vmci_doorbell_entry *entry)
	+{
	+ ASSERT(entry);
	+
	+ vmci_grab_lock_bh(&vmci_doorbell_it.lock);
	+
	+ vmci_list_remove(entry, idx_list_item);
	+
	+ notify_idx_count--;
	+ if (entry->idx == max_notify_idx - 1) {
	+ /*
	+ * If we delete an entry with the maximum known notification
	+ * index, we take the opportunity to prune the current max. As
	+ * there might be other unused indices immediately below, we
	+ * lower the maximum until we hit an index in use
	+ */
	+
	+ while (max_notify_idx > 0 &&
	+ !vmci_doorbell_index_table_find(max_notify_idx - 1))
	+ max_notify_idx--;
	+ }
	+ last_notify_idx_released = entry->idx;
	+
	+ vmci_release_lock_bh(&vmci_doorbell_it.lock);
	+
	+ vmci_resource_release(&entry->resource);
	+}
	+
	+/*
	+ *------------------------------------------------------------------------------
	+ *
	+ * vmci_doorbell_link --
	+ *
	+ * Creates a link between the given doorbell handle and the given index in
	+ * the bitmap in the device backend.
	+ *
	+ * Results:
	+ * VMCI_SUCCESS if success, appropriate error code otherwise.
	+ *
	+ * Side effects:
	+ * Notification state is created in hypervisor.
	+ *
	+ *------------------------------------------------------------------------------
	+ */
	+
	+static int
	+vmci_doorbell_link(struct vmci_handle handle, bool is_doorbell,
	+ uint32_t notify_idx)
	+{
	+ struct vmci_doorbell_link_msg link_msg;
	+ vmci_id resource_id;
	+
	+ ASSERT(!VMCI_HANDLE_INVALID(handle));
	+
	+ if (is_doorbell)
	+ resource_id = VMCI_DOORBELL_LINK;
	+ else {
	+ ASSERT(false);
	+ return (VMCI_ERROR_UNAVAILABLE);
	+ }
	+
	+ link_msg.hdr.dst = VMCI_MAKE_HANDLE(VMCI_HYPERVISOR_CONTEXT_ID,
	+ resource_id);
	+ link_msg.hdr.src = VMCI_ANON_SRC_HANDLE;
	+ link_msg.hdr.payload_size = sizeof(link_msg) - VMCI_DG_HEADERSIZE;
	+ link_msg.handle = handle;
	+ link_msg.notify_idx = notify_idx;
	+
	+ return (vmci_send_datagram((struct vmci_datagram *)&link_msg));
	+}
	+
	+/*
	+ *------------------------------------------------------------------------------
	+ *
	+ * vmci_doorbell_unlink --
	+ *
	+ * Unlinks the given doorbell handle from an index in the bitmap in the
	+ * device backend.
	+ *
	+ * Results:
	+ * VMCI_SUCCESS if success, appropriate error code otherwise.
	+ *
	+ * Side effects:
	+ * Notification state is destroyed in hypervisor.
	+ *
	+ *------------------------------------------------------------------------------
	+ */
	+
	+static int
	+vmci_doorbell_unlink(struct vmci_handle handle, bool is_doorbell)
	+{
	+ struct vmci_doorbell_unlink_msg unlink_msg;
	+ vmci_id resource_id;
	+
	+ ASSERT(!VMCI_HANDLE_INVALID(handle));
	+
	+ if (is_doorbell)
	+ resource_id = VMCI_DOORBELL_UNLINK;
	+ else {
	+ ASSERT(false);
	+ return (VMCI_ERROR_UNAVAILABLE);
	+ }
	+
	+ unlink_msg.hdr.dst = VMCI_MAKE_HANDLE(VMCI_HYPERVISOR_CONTEXT_ID,
	+ resource_id);
	+ unlink_msg.hdr.src = VMCI_ANON_SRC_HANDLE;
	+ unlink_msg.hdr.payload_size = sizeof(unlink_msg) - VMCI_DG_HEADERSIZE;
	+ unlink_msg.handle = handle;
	+
	+ return (vmci_send_datagram((struct vmci_datagram *)&unlink_msg));
	+}
	+
	+/*
	+ *------------------------------------------------------------------------------
	+ *
	+ * vmci_doorbell_create --
	+ *
	+ * Creates a doorbell with the given callback. If the handle is
	+ * VMCI_INVALID_HANDLE, a free handle will be assigned, if possible. The
	+ * callback can be run immediately (potentially with locks held - the
	+ * default) or delayed (in a kernel thread) by specifying the flag
	+ * VMCI_FLAG_DELAYED_CB. If delayed execution is selected, a given callback
	+ * may not be run if the kernel is unable to allocate memory for the delayed
	+ * execution (highly unlikely).
	+ *
	+ * Results:
	+ * VMCI_SUCCESS on success, appropriate error code otherwise.
	+ *
	+ * Side effects:
	+ * None.
	+ *
	+ *------------------------------------------------------------------------------
	+ */
	+
	+int
	+vmci_doorbell_create(struct vmci_handle *handle, uint32_t flags,
	+ vmci_privilege_flags priv_flags, vmci_callback notify_cb, void *client_data)
	+{
	+ struct vmci_doorbell_entry *entry;
	+ struct vmci_handle new_handle;
	+ int result;
	+
	+ if (!handle \|\| !notify_cb \|\| flags & ~VMCI_FLAG_DELAYED_CB \|\|
	+ priv_flags & ~VMCI_PRIVILEGE_ALL_FLAGS)
	+ return (VMCI_ERROR_INVALID_ARGS);
	+
	+ entry = vmci_alloc_kernel_mem(sizeof(*entry), VMCI_MEMORY_NORMAL);
	+ if (entry == NULL) {
	+ VMCI_LOG_WARNING(LGPFX"Failed allocating memory for datagram "
	+ "entry.\n");
	+ return (VMCI_ERROR_NO_MEM);
	+ }
	+
	+ if (!vmci_can_schedule_delayed_work() &&
	+ (flags & VMCI_FLAG_DELAYED_CB)) {
	+ result = VMCI_ERROR_INVALID_ARGS;
	+ goto free_mem;
	+ }
	+
	+ if (VMCI_HANDLE_INVALID(*handle)) {
	+ vmci_id context_id;
	+
	+ context_id = vmci_get_context_id();
	+ vmci_id resource_id = vmci_resource_get_id(context_id);
	+ if (resource_id == VMCI_INVALID_ID) {
	+ result = VMCI_ERROR_NO_HANDLE;
	+ goto free_mem;
	+ }
	+ new_handle = VMCI_MAKE_HANDLE(context_id, resource_id);
	+ } else {
	+ if (VMCI_INVALID_ID == handle->resource) {
	+ VMCI_LOG_DEBUG(LGPFX"Invalid argument "
	+ "(handle=0x%x:0x%x).\n", handle->context,
	+ handle->resource);
	+ result = VMCI_ERROR_INVALID_ARGS;
	+ goto free_mem;
	+ }
	+ new_handle = *handle;
	+ }
	+
	+ entry->idx = 0;
	+ entry->priv_flags = priv_flags;
	+ entry->is_doorbell = true;
	+ entry->run_delayed = (flags & VMCI_FLAG_DELAYED_CB) ? true : false;
	+ entry->notify_cb = notify_cb;
	+ entry->client_data = client_data;
	+ atomic_store_int(&entry->active, 0);
	+ vmci_create_event(&entry->destroy_event);
	+
	+ result = vmci_resource_add(&entry->resource,
	+ VMCI_RESOURCE_TYPE_DOORBELL, new_handle, vmci_doorbell_free_cb,
	+ entry);
	+ if (result != VMCI_SUCCESS) {
	+ VMCI_LOG_WARNING(LGPFX"Failed to add new resource "
	+ "(handle=0x%x:0x%x).\n", new_handle.context,
	+ new_handle.resource);
	+ if (result == VMCI_ERROR_DUPLICATE_ENTRY)
	+ result = VMCI_ERROR_ALREADY_EXISTS;
	+
	+ goto destroy;
	+ }
	+
	+ vmci_doorbell_index_table_add(entry);
	+ result = vmci_doorbell_link(new_handle, entry->is_doorbell, entry->idx);
	+ if (VMCI_SUCCESS != result)
	+ goto destroy_resource;
	+ atomic_store_int(&entry->active, 1);
	+
	+ if (VMCI_HANDLE_INVALID(*handle))
	+ *handle = new_handle;
	+
	+ return (result);
	+
	+destroy_resource:
	+ vmci_doorbell_index_table_remove(entry);
	+ vmci_resource_remove(new_handle, VMCI_RESOURCE_TYPE_DOORBELL);
	+destroy:
	+ vmci_destroy_event(&entry->destroy_event);
	+free_mem:
	+ vmci_free_kernel_mem(entry, sizeof(*entry));
	+ return (result);
	+}
	+
	+/*
	+ *------------------------------------------------------------------------------
	+ *
	+ * vmci_doorbell_destroy --
	+ *
	+ * Destroys a doorbell previously created with vmci_doorbell_create. This
	+ * operation may block waiting for a callback to finish.
	+ *
	+ * Results:
	+ * VMCI_SUCCESS on success, appropriate error code otherwise.
	+ *
	+ * Side effects:
	+ * May block.
	+ *
	+ *------------------------------------------------------------------------------
	+ */
	+
	+int
	+vmci_doorbell_destroy(struct vmci_handle handle)
	+{
	+ struct vmci_doorbell_entry *entry;
	+ struct vmci_resource *resource;
	+ int result;
	+
	+ if (VMCI_HANDLE_INVALID(handle))
	+ return (VMCI_ERROR_INVALID_ARGS);
	+
	+ resource = vmci_resource_get(handle, VMCI_RESOURCE_TYPE_DOORBELL);
	+ if (resource == NULL) {
	+ VMCI_LOG_DEBUG(LGPFX"Failed to destroy doorbell "
	+ "(handle=0x%x:0x%x).\n", handle.context, handle.resource);
	+ return (VMCI_ERROR_NOT_FOUND);
	+ }
	+ entry = RESOURCE_CONTAINER(resource, struct vmci_doorbell_entry,
	+ resource);
	+
	+ vmci_doorbell_index_table_remove(entry);
	+
	+ result = vmci_doorbell_unlink(handle, entry->is_doorbell);
	+ if (VMCI_SUCCESS != result) {
	+
	+ /*
	+ * The only reason this should fail would be an inconsistency
	+ * between guest and hypervisor state, where the guest believes
	+ * it has an active registration whereas the hypervisor doesn't.
	+ * One case where this may happen is if a doorbell is
	+ * unregistered following a hibernation at a time where the
	+ * doorbell state hasn't been restored on the hypervisor side
	+ * yet. Since the handle has now been removed in the guest,
	+ * we just print a warning and return success.
	+ */
	+
	+ VMCI_LOG_DEBUG(LGPFX"Unlink of %s (handle=0x%x:0x%x) unknown "
	+ "by hypervisor (error=%d).\n",
	+ entry->is_doorbell ? "doorbell" : "queuepair",
	+ handle.context, handle.resource, result);
	+ }
	+
	+ /*
	+ * Now remove the resource from the table. It might still be in use
	+ * after this, in a callback or still on the delayed work queue.
	+ */
	+
	+ vmci_resource_remove(handle, VMCI_RESOURCE_TYPE_DOORBELL);
	+
	+ /*
	+ * We now wait on the destroyEvent and release the reference we got
	+ * above.
	+ */
	+
	+ vmci_wait_on_event(&entry->destroy_event, vmci_doorbell_release_cb,
	+ entry);
	+
	+ /*
	+ * We know that we are now the only reference to the above entry so
	+ * can safely free it.
	+ */
	+
	+ vmci_destroy_event(&entry->destroy_event);
	+ vmci_free_kernel_mem(entry, sizeof(*entry));
	+
	+ return (VMCI_SUCCESS);
	+}
	+
	+/*
	+ *------------------------------------------------------------------------------
	+ *
	+ * vmci_doorbell_notify_as_guest --
	+ *
	+ * Notify another guest or the host. We send a datagram down to the host
	+ * via the hypervisor with the notification info.
	+ *
	+ * Results:
	+ * VMCI_SUCCESS on success, appropriate error code otherwise.
	+ *
	+ * Side effects:
	+ * May do a hypercall.
	+ *
	+ *------------------------------------------------------------------------------
	+ */
	+
	+static int
	+vmci_doorbell_notify_as_guest(struct vmci_handle handle,
	+ vmci_privilege_flags priv_flags)
	+{
	+ struct vmci_doorbell_notify_msg notify_msg;
	+
	+ notify_msg.hdr.dst = VMCI_MAKE_HANDLE(VMCI_HYPERVISOR_CONTEXT_ID,
	+ VMCI_DOORBELL_NOTIFY);
	+ notify_msg.hdr.src = VMCI_ANON_SRC_HANDLE;
	+ notify_msg.hdr.payload_size = sizeof(notify_msg) - VMCI_DG_HEADERSIZE;
	+ notify_msg.handle = handle;
	+
	+ return (vmci_send_datagram((struct vmci_datagram *)&notify_msg));
	+}
	+
	+/*
	+ *------------------------------------------------------------------------------
	+ *
	+ * vmci_doorbell_notify --
	+ *
	+ * Generates a notification on the doorbell identified by the handle. For
	+ * host side generation of notifications, the caller can specify what the
	+ * privilege of the calling side is.
	+ *
	+ * Results:
	+ * VMCI_SUCCESS on success, appropriate error code otherwise.
	+ *
	+ * Side effects:
	+ * May do a hypercall.
	+ *
	+ *------------------------------------------------------------------------------
	+ */
	+
	+int
	+vmci_doorbell_notify(struct vmci_handle dst, vmci_privilege_flags priv_flags)
	+{
	+ struct vmci_handle src;
	+
	+ if (VMCI_HANDLE_INVALID(dst) \|\|
	+ (priv_flags & ~VMCI_PRIVILEGE_ALL_FLAGS))
	+ return (VMCI_ERROR_INVALID_ARGS);
	+
	+ src = VMCI_INVALID_HANDLE;
	+
	+ return (vmci_doorbell_notify_as_guest(dst, priv_flags));
	+}
	+
	+/*
	+ *------------------------------------------------------------------------------
	+ *
	+ * vmci_doorbell_delayed_dispatch_cb --
	+ *
	+ * Calls the specified callback in a delayed context.
	+ *
	+ * Results:
	+ * None.
	+ *
	+ * Side effects:
	+ * None.
	+ *
	+ *------------------------------------------------------------------------------
	+ */
	+
	+static void
	+vmci_doorbell_delayed_dispatch_cb(void *data)
	+{
	+ struct vmci_doorbell_entry entry = (struct vmci_doorbell_entry )data;
	+
	+ ASSERT(data);
	+
	+ entry->notify_cb(entry->client_data);
	+
	+ vmci_resource_release(&entry->resource);
	+}
	+
	+/*
	+ *------------------------------------------------------------------------------
	+ *
	+ * vmci_doorbell_sync --
	+ *
	+ * Use this as a synchronization point when setting globals, for example,
	+ * during device shutdown.
	+ *
	+ * Results:
	+ * None.
	+ *
	+ * Side effects:
	+ * None.
	+ *
	+ *------------------------------------------------------------------------------
	+ */
	+
	+void
	+vmci_doorbell_sync(void)
	+{
	+
	+ vmci_grab_lock_bh(&vmci_doorbell_it.lock);
	+ vmci_release_lock_bh(&vmci_doorbell_it.lock);
	+ vmci_resource_sync();
	+}
	+
	+/*
	+ *------------------------------------------------------------------------------
	+ *
	+ * vmci_register_notification_bitmap --
	+ *
	+ * Register the notification bitmap with the host.
	+ *
	+ * Results:
	+ * true if the bitmap is registered successfully with the device, false
	+ * otherwise.
	+ *
	+ * Side effects:
	+ * None.
	+ *
	+ *------------------------------------------------------------------------------
	+ */
	+
	+bool
	+vmci_register_notification_bitmap(PPN bitmap_ppn)
	+{
	+ struct vmci_notify_bitmap_set_msg bitmap_set_msg;
	+ int result;
	+
	+ /*
	+ * Do not ASSERT() on the guest device here. This function can get
	+ * called during device initialization, so the ASSERT() will fail even
	+ * though the device is (almost) up.
	+ */
	+
	+ bitmap_set_msg.hdr.dst = VMCI_MAKE_HANDLE(VMCI_HYPERVISOR_CONTEXT_ID,
	+ VMCI_SET_NOTIFY_BITMAP);
	+ bitmap_set_msg.hdr.src = VMCI_ANON_SRC_HANDLE;
	+ bitmap_set_msg.hdr.payload_size =
	+ sizeof(bitmap_set_msg) - VMCI_DG_HEADERSIZE;
	+ bitmap_set_msg.bitmap_ppn = bitmap_ppn;
	+
	+ result = vmci_send_datagram((struct vmci_datagram *)&bitmap_set_msg);
	+ if (result != VMCI_SUCCESS) {
	+ VMCI_LOG_DEBUG(LGPFX"Failed to register (PPN=%u) as "
	+ "notification bitmap (error=%d).\n",
	+ bitmap_ppn, result);
	+ return (false);
	+ }
	+ return (true);
	+}
	+
	+/*
	+ *------------------------------------------------------------------------------
	+ *
	+ * vmci_doorbell_fire_entries --
	+ *
	+ * Executes or schedules the handlers for a given notify index.
	+ *
	+ * Result:
	+ * Notification hash entry if found. NULL otherwise.
	+ *
	+ * Side effects:
	+ * Whatever the side effects of the handlers are.
	+ *
	+ *------------------------------------------------------------------------------
	+ */
	+
	+static void
	+vmci_doorbell_fire_entries(uint32_t notify_idx)
	+{
	+ struct vmci_doorbell_entry *iter;
	+ uint32_t bucket = VMCI_DOORBELL_HASH(notify_idx);
	+
	+ vmci_grab_lock_bh(&vmci_doorbell_it.lock);
	+
	+ vmci_list_scan(iter, &vmci_doorbell_it.entries[bucket], idx_list_item) {
	+ if (iter->idx == notify_idx &&
	+ atomic_load_int(&iter->active) == 1) {
	+ ASSERT(iter->notify_cb);
	+ if (iter->run_delayed) {
	+ int err;
	+
	+ vmci_resource_hold(&iter->resource);
	+ err = vmci_schedule_delayed_work(
	+ vmci_doorbell_delayed_dispatch_cb, iter);
	+ if (err != VMCI_SUCCESS) {
	+ vmci_resource_release(&iter->resource);
	+ goto out;
	+ }
	+ } else
	+ iter->notify_cb(iter->client_data);
	+ }
	+ }
	+
	+out:
	+ vmci_release_lock_bh(&vmci_doorbell_it.lock);
	+}
	+
	+/*
	+ *------------------------------------------------------------------------------
	+ *
	+ * vmci_scan_notification_bitmap --
	+ *
	+ * Scans the notification bitmap, collects pending notifications, resets
	+ * the bitmap and invokes appropriate callbacks.
	+ *
	+ * Results:
	+ * None.
	+ *
	+ * Side effects:
	+ * May schedule tasks, allocate memory and run callbacks.
	+ *
	+ *------------------------------------------------------------------------------
	+ */
	+
	+void
	+vmci_scan_notification_bitmap(uint8_t *bitmap)
	+{
	+ uint32_t idx;
	+
	+ ASSERT(bitmap);
	+
	+ for (idx = 0; idx < max_notify_idx; idx++) {
	+ if (bitmap[idx] & 0x1) {
	+ bitmap[idx] &= ~1;
	+ vmci_doorbell_fire_entries(idx);
	+ }
	+ }
	+}
	Index: sys/dev/vmware/vmci/vmci_driver.h
	===================================================================
	--- /dev/null
	+++ sys/dev/vmware/vmci/vmci_driver.h
	@@ -0,0 +1,43 @@
	+/*-
	+ * Copyright (c) 2018 VMware, Inc. All Rights Reserved.
	+ *
	+ * SPDX-License-Identifier: (BSD-2-Clause AND GPL-2.0)
	+ */
	+
	+/* VMCI driver interface. */
	+
	+#ifndef _VMCI_DRIVER_H_
	+#define _VMCI_DRIVER_H_
	+
	+#include <sys/types.h>
	+#include <sys/syslog.h>
	+#include <sys/systm.h>
	+
	+#include "vmci_call_defs.h"
	+#include "vmci_kernel_if.h"
	+
	+#ifndef VMCI_DEBUG_LOGGING
	+#define VMCI_LOG_DEBUG(_args, ...)
	+#else /* VMCI_DEBUG_LOGGING */
	+#define VMCI_LOG_DEBUG(_args, ...) \
	+ log(LOG_DEBUG, _args, ##__VA_ARGS__)
	+#endif /* !VMCI_DEBUG_LOGGING */
	+#define VMCI_LOG_INFO(_args, ...) \
	+ log(LOG_INFO, _args, ##__VA_ARGS__)
	+#define VMCI_LOG_WARNING(_args, ...) \
	+ log(LOG_WARNING, _args, ##__VA_ARGS__)
	+#define VMCI_LOG_ERROR(_args, ...) \
	+ log(LOG_ERR, _args, ##__VA_ARGS__)
	+
	+int vmci_components_init(void);
	+void vmci_components_cleanup(void);
	+int vmci_send_datagram(struct vmci_datagram *dg);
	+
	+void vmci_util_init(void);
	+void vmci_util_exit(void);
	+bool vmci_check_host_capabilities(void);
	+void vmci_read_datagrams_from_port(vmci_io_handle io_handle,
	+ vmci_io_port dg_in_port, uint8_t *dg_in_buffer,
	+ size_t dg_in_buffer_size);
	+
	+#endif /* !_VMCI_DRIVER_H_ */
	Index: sys/dev/vmware/vmci/vmci_driver.c
	===================================================================
	--- /dev/null
	+++ sys/dev/vmware/vmci/vmci_driver.c
	@@ -0,0 +1,481 @@
	+/*-
	+ * Copyright (c) 2018 VMware, Inc. All Rights Reserved.
	+ *
	+ * SPDX-License-Identifier: (BSD-2-Clause AND GPL-2.0)
	+ */
	+
	+/* VMCI initialization. */
	+
	+#include "vmci.h"
	+#include "vmci_doorbell.h"
	+#include "vmci_driver.h"
	+#include "vmci_event.h"
	+#include "vmci_kernel_api.h"
	+#include "vmci_kernel_defs.h"
	+#include "vmci_resource.h"
	+
	+#define LGPFX "vmci: "
	+#define VMCI_UTIL_NUM_RESOURCES 1
	+
	+static vmci_id ctx_update_sub_id = VMCI_INVALID_ID;
	+static volatile int vm_context_id = VMCI_INVALID_ID;
	+
	+/*
	+ *------------------------------------------------------------------------------
	+ *
	+ * vmci_util_cid_update --
	+ *
	+ * Gets called with the new context id if updated or resumed.
	+ *
	+ * Results:
	+ * Context id.
	+ *
	+ * Side effects:
	+ * None.
	+ *
	+ *------------------------------------------------------------------------------
	+ */
	+
	+static void
	+vmci_util_cid_update(vmci_id sub_id, struct vmci_event_data *event_data,
	+ void *client_data)
	+{
	+ struct vmci_event_payload_context *ev_payload;
	+
	+ ev_payload = vmci_event_data_payload(event_data);
	+
	+ if (sub_id != ctx_update_sub_id) {
	+ VMCI_LOG_DEBUG(LGPFX"Invalid subscriber (ID=0x%x).\n", sub_id);
	+ return;
	+ }
	+ if (event_data == NULL \|\| ev_payload->context_id == VMCI_INVALID_ID) {
	+ VMCI_LOG_DEBUG(LGPFX"Invalid event data.\n");
	+ return;
	+ }
	+ VMCI_LOG_INFO(LGPFX"Updating context from (ID=0x%x) to (ID=0x%x) on "
	+ "event (type=%d).\n", atomic_load_int(&vm_context_id),
	+ ev_payload->context_id, event_data->event);
	+ atomic_store_int(&vm_context_id, ev_payload->context_id);
	+}
	+
	+/*
	+ *------------------------------------------------------------------------------
	+ *
	+ * vmci_util_init --
	+ *
	+ * Subscribe to context id update event.
	+ *
	+ * Results:
	+ * None.
	+ *
	+ * Side effects:
	+ * None.
	+ *
	+ *------------------------------------------------------------------------------
	+ */
	+
	+void
	+vmci_util_init(void)
	+{
	+
	+ /*
	+ * We subscribe to the VMCI_EVENT_CTX_ID_UPDATE here so we can update
	+ * the internal context id when needed.
	+ */
	+ if (vmci_event_subscribe(VMCI_EVENT_CTX_ID_UPDATE,
	+ vmci_util_cid_update, NULL, &ctx_update_sub_id) < VMCI_SUCCESS) {
	+ VMCI_LOG_WARNING(LGPFX"Failed to subscribe to event "
	+ "(type=%d).\n", VMCI_EVENT_CTX_ID_UPDATE);
	+ }
	+}
	+
	+/*
	+ *------------------------------------------------------------------------------
	+ *
	+ * vmci_util_exit --
	+ *
	+ * Cleanup
	+ *
	+ * Results:
	+ * None.
	+ *
	+ * Side effects:
	+ * None.
	+ *
	+ *------------------------------------------------------------------------------
	+ */
	+
	+void
	+vmci_util_exit(void)
	+{
	+
	+ if (vmci_event_unsubscribe(ctx_update_sub_id) < VMCI_SUCCESS)
	+ VMCI_LOG_WARNING(LGPFX"Failed to unsubscribe to event "
	+ "(type=%d) with subscriber (ID=0x%x).\n",
	+ VMCI_EVENT_CTX_ID_UPDATE, ctx_update_sub_id);
	+}
	+
	+/*
	+ *------------------------------------------------------------------------------
	+ *
	+ * vmci_util_check_host_capabilities --
	+ *
	+ * Verify that the host supports the hypercalls we need. If it does not, try
	+ * to find fallback hypercalls and use those instead.
	+ *
	+ * Results:
	+ * true if required hypercalls (or fallback hypercalls) are supported by the
	+ * host, false otherwise.
	+ *
	+ * Side effects:
	+ * None.
	+ *
	+ *------------------------------------------------------------------------------
	+ */
	+
	+static bool
	+vmci_util_check_host_capabilities(void)
	+{
	+ struct vmci_resources_query_msg *msg;
	+ struct vmci_datagram *check_msg;
	+ int result;
	+ uint32_t msg_size;
	+
	+ msg_size = sizeof(struct vmci_resources_query_hdr) +
	+ VMCI_UTIL_NUM_RESOURCES * sizeof(vmci_resource);
	+ check_msg = vmci_alloc_kernel_mem(msg_size, VMCI_MEMORY_NORMAL);
	+
	+ if (check_msg == NULL) {
	+ VMCI_LOG_WARNING(LGPFX"Check host: Insufficient memory.\n");
	+ return (false);
	+ }
	+
	+ check_msg->dst = VMCI_MAKE_HANDLE(VMCI_HYPERVISOR_CONTEXT_ID,
	+ VMCI_RESOURCES_QUERY);
	+ check_msg->src = VMCI_ANON_SRC_HANDLE;
	+ check_msg->payload_size = msg_size - VMCI_DG_HEADERSIZE;
	+ msg = (struct vmci_resources_query_msg *)VMCI_DG_PAYLOAD(check_msg);
	+
	+ msg->num_resources = VMCI_UTIL_NUM_RESOURCES;
	+ msg->resources[0] = VMCI_GET_CONTEXT_ID;
	+
	+ result = vmci_send_datagram(check_msg);
	+ vmci_free_kernel_mem(check_msg, msg_size);
	+
	+ /* We need the vector. There are no fallbacks. */
	+ return (result == 0x1);
	+}
	+
	+/*
	+ *------------------------------------------------------------------------------
	+ *
	+ * vmci_check_host_capabilities --
	+ *
	+ * Tell host which guestcalls we support and let each API check that the
	+ * host supports the hypercalls it needs. If a hypercall is not supported,
	+ * the API can check for a fallback hypercall, or fail the check.
	+ *
	+ * Results:
	+ * true if successful, false otherwise.
	+ *
	+ * Side effects:
	+ * Fallback mechanisms may be enabled in the API and vmmon.
	+ *
	+ *------------------------------------------------------------------------------
	+ */
	+
	+bool
	+vmci_check_host_capabilities(void)
	+{
	+ bool result;
	+
	+ result = vmci_event_check_host_capabilities();
	+ result &= vmci_datagram_check_host_capabilities();
	+ result &= vmci_util_check_host_capabilities();
	+
	+ if (!result) {
	+ /*
	+ * If it failed, then make sure this goes to the system event
	+ * log.
	+ */
	+ VMCI_LOG_WARNING(LGPFX"Host capability checked failed.\n");
	+ } else
	+ VMCI_LOG_DEBUG(LGPFX"Host capability check passed.\n");
	+
	+ return (result);
	+}
	+
	+/*
	+ *------------------------------------------------------------------------------
	+ *
	+ * vmci_read_datagrams_from_port --
	+ *
	+ * Reads datagrams from the data in port and dispatches them. We always
	+ * start reading datagrams into only the first page of the datagram buffer.
	+ * If the datagrams don't fit into one page, we use the maximum datagram
	+ * buffer size for the remainder of the invocation. This is a simple
	+ * heuristic for not penalizing small datagrams.
	+ *
	+ * This function assumes that it has exclusive access to the data in port
	+ * for the duration of the call.
	+ *
	+ * Results:
	+ * No result.
	+ *
	+ * Side effects:
	+ * Datagram handlers may be invoked.
	+ *
	+ *------------------------------------------------------------------------------
	+ */
	+
	+void
	+vmci_read_datagrams_from_port(vmci_io_handle io_handle, vmci_io_port dg_in_port,
	+ uint8_t *dg_in_buffer, size_t dg_in_buffer_size)
	+{
	+ struct vmci_datagram *dg;
	+ size_t current_dg_in_buffer_size;
	+ size_t remaining_bytes;
	+
	+ current_dg_in_buffer_size = PAGE_SIZE;
	+
	+ ASSERT(dg_in_buffer_size >= PAGE_SIZE);
	+
	+ vmci_read_port_bytes(io_handle, dg_in_port, dg_in_buffer,
	+ current_dg_in_buffer_size);
	+ dg = (struct vmci_datagram *)dg_in_buffer;
	+ remaining_bytes = current_dg_in_buffer_size;
	+
	+ while (dg->dst.resource != VMCI_INVALID_ID \|\|
	+ remaining_bytes > PAGE_SIZE) {
	+ size_t dg_in_size;
	+
	+ /*
	+ * When the input buffer spans multiple pages, a datagram can
	+ * start on any page boundary in the buffer.
	+ */
	+
	+ if (dg->dst.resource == VMCI_INVALID_ID) {
	+ ASSERT(remaining_bytes > PAGE_SIZE);
	+ dg = (struct vmci_datagram *)ROUNDUP((uintptr_t)dg + 1,
	+ PAGE_SIZE);
	+ ASSERT((uint8_t *)dg < dg_in_buffer +
	+ current_dg_in_buffer_size);
	+ remaining_bytes = (size_t)(dg_in_buffer +
	+ current_dg_in_buffer_size - (uint8_t *)dg);
	+ continue;
	+ }
	+
	+ dg_in_size = VMCI_DG_SIZE_ALIGNED(dg);
	+
	+ if (dg_in_size <= dg_in_buffer_size) {
	+ int result;
	+
	+ /*
	+ * If the remaining bytes in the datagram buffer doesn't
	+ * contain the complete datagram, we first make sure we
	+ * have enough room for it and then we read the reminder
	+ * of the datagram and possibly any following datagrams.
	+ */
	+
	+ if (dg_in_size > remaining_bytes) {
	+
	+ if (remaining_bytes !=
	+ current_dg_in_buffer_size) {
	+
	+ /*
	+ * We move the partial datagram to the
	+ * front and read the reminder of the
	+ * datagram and possibly following calls
	+ * into the following bytes.
	+ */
	+
	+ memmove(dg_in_buffer, dg_in_buffer +
	+ current_dg_in_buffer_size -
	+ remaining_bytes,
	+ remaining_bytes);
	+
	+ dg = (struct vmci_datagram *)
	+ dg_in_buffer;
	+ }
	+
	+ if (current_dg_in_buffer_size !=
	+ dg_in_buffer_size)
	+ current_dg_in_buffer_size =
	+ dg_in_buffer_size;
	+
	+ vmci_read_port_bytes(io_handle, dg_in_port,
	+ dg_in_buffer + remaining_bytes,
	+ current_dg_in_buffer_size -
	+ remaining_bytes);
	+ }
	+
	+ /*
	+ * We special case event datagrams from the
	+ * hypervisor.
	+ */
	+ if (dg->src.context == VMCI_HYPERVISOR_CONTEXT_ID &&
	+ dg->dst.resource == VMCI_EVENT_HANDLER)
	+ result = vmci_event_dispatch(dg);
	+ else
	+ result =
	+ vmci_datagram_invoke_guest_handler(dg);
	+ if (result < VMCI_SUCCESS)
	+ VMCI_LOG_DEBUG(LGPFX"Datagram with resource"
	+ " (ID=0x%x) failed (err=%d).\n",
	+ dg->dst.resource, result);
	+
	+ /* On to the next datagram. */
	+ dg = (struct vmci_datagram )((uint8_t )dg +
	+ dg_in_size);
	+ } else {
	+ size_t bytes_to_skip;
	+
	+ /*
	+ * Datagram doesn't fit in datagram buffer of maximal
	+ * size. We drop it.
	+ */
	+
	+ VMCI_LOG_DEBUG(LGPFX"Failed to receive datagram "
	+ "(size=%zu bytes).\n", dg_in_size);
	+
	+ bytes_to_skip = dg_in_size - remaining_bytes;
	+ if (current_dg_in_buffer_size != dg_in_buffer_size)
	+ current_dg_in_buffer_size = dg_in_buffer_size;
	+ for (;;) {
	+ vmci_read_port_bytes(io_handle, dg_in_port,
	+ dg_in_buffer, current_dg_in_buffer_size);
	+ if (bytes_to_skip <=
	+ current_dg_in_buffer_size)
	+ break;
	+ bytes_to_skip -= current_dg_in_buffer_size;
	+ }
	+ dg = (struct vmci_datagram *)(dg_in_buffer +
	+ bytes_to_skip);
	+ }
	+
	+ remaining_bytes = (size_t) (dg_in_buffer +
	+ current_dg_in_buffer_size - (uint8_t *)dg);
	+
	+ if (remaining_bytes < VMCI_DG_HEADERSIZE) {
	+ /* Get the next batch of datagrams. */
	+
	+ vmci_read_port_bytes(io_handle, dg_in_port,
	+ dg_in_buffer, current_dg_in_buffer_size);
	+ dg = (struct vmci_datagram *)dg_in_buffer;
	+ remaining_bytes = current_dg_in_buffer_size;
	+ }
	+ }
	+}
	+
	+/*
	+ *------------------------------------------------------------------------------
	+ *
	+ * vmci_get_context_id --
	+ *
	+ * Returns the current context ID. Note that since this is accessed only
	+ * from code running in the host, this always returns the host context ID.
	+ *
	+ * Results:
	+ * Context ID.
	+ *
	+ * Side effects:
	+ * None.
	+ *
	+ *------------------------------------------------------------------------------
	+ */
	+
	+vmci_id
	+vmci_get_context_id(void)
	+{
	+ if (atomic_load_int(&vm_context_id) == VMCI_INVALID_ID) {
	+ uint32_t result;
	+ struct vmci_datagram get_cid_msg;
	+ get_cid_msg.dst = VMCI_MAKE_HANDLE(VMCI_HYPERVISOR_CONTEXT_ID,
	+ VMCI_GET_CONTEXT_ID);
	+ get_cid_msg.src = VMCI_ANON_SRC_HANDLE;
	+ get_cid_msg.payload_size = 0;
	+ result = vmci_send_datagram(&get_cid_msg);
	+ atomic_store_int(&vm_context_id, result);
	+ }
	+ return (atomic_load_int(&vm_context_id));
	+}
	+
	+/*
	+ *------------------------------------------------------------------------------
	+ *
	+ * vmci_components_init --
	+ *
	+ * Initializes VMCI components and registers core hypercalls.
	+ *
	+ * Results:
	+ * VMCI_SUCCESS if successful, appropriate error code otherwise.
	+ *
	+ * Side effects:
	+ * None.
	+ *
	+ *------------------------------------------------------------------------------
	+ */
	+
	+int
	+vmci_components_init(void)
	+{
	+ int result;
	+
	+ result = vmci_resource_init();
	+ if (result < VMCI_SUCCESS) {
	+ VMCI_LOG_WARNING(LGPFX"Failed to initialize vmci_resource "
	+ "(result=%d).\n", result);
	+ goto error_exit;
	+ }
	+
	+ result = vmci_event_init();
	+ if (result < VMCI_SUCCESS) {
	+ VMCI_LOG_WARNING(LGPFX"Failed to initialize vmci_event "
	+ "(result=%d).\n", result);
	+ goto resource_exit;
	+ }
	+
	+ result = vmci_doorbell_init();
	+ if (result < VMCI_SUCCESS) {
	+ VMCI_LOG_WARNING(LGPFX"Failed to initialize vmci_doorbell "
	+ "(result=%d).\n", result);
	+ goto event_exit;
	+ }
	+
	+ VMCI_LOG_DEBUG(LGPFX"components initialized.\n");
	+ return (VMCI_SUCCESS);
	+
	+event_exit:
	+ vmci_event_exit();
	+
	+resource_exit:
	+ vmci_resource_exit();
	+
	+error_exit:
	+ return (result);
	+}
	+
	+/*
	+ *------------------------------------------------------------------------------
	+ *
	+ * vmci_components_cleanup --
	+ *
	+ * Cleans up VMCI components.
	+ *
	+ * Results:
	+ * None.
	+ *
	+ * Side effects:
	+ * None.
	+ *
	+ *------------------------------------------------------------------------------
	+ */
	+
	+void
	+vmci_components_cleanup(void)
	+{
	+
	+ vmci_doorbell_exit();
	+ vmci_event_exit();
	+ vmci_resource_exit();
	+}
	Index: sys/dev/vmware/vmci/vmci_event.h
	===================================================================
	--- /dev/null
	+++ sys/dev/vmware/vmci/vmci_event.h
	@@ -0,0 +1,21 @@
	+/*-
	+ * Copyright (c) 2018 VMware, Inc. All Rights Reserved.
	+ *
	+ * SPDX-License-Identifier: (BSD-2-Clause AND GPL-2.0)
	+ */
	+
	+/* Event code for the vmci guest driver. */
	+
	+#ifndef _VMCI_EVENT_H_
	+#define _VMCI_EVENT_H_
	+
	+#include "vmci_call_defs.h"
	+#include "vmci_defs.h"
	+
	+int vmci_event_init(void);
	+void vmci_event_exit(void);
	+void vmci_event_sync(void);
	+int vmci_event_dispatch(struct vmci_datagram *msg);
	+bool vmci_event_check_host_capabilities(void);
	+
	+#endif /* !_VMCI_EVENT_H_ */
	Index: sys/dev/vmware/vmci/vmci_event.c
	===================================================================
	--- /dev/null
	+++ sys/dev/vmware/vmci/vmci_event.c
	@@ -0,0 +1,693 @@
	+/*-
	+ * Copyright (c) 2018 VMware, Inc. All Rights Reserved.
	+ *
	+ * SPDX-License-Identifier: (BSD-2-Clause AND GPL-2.0)
	+ */
	+
	+/* This file implements VMCI Event code. */
	+
	+#include "vmci.h"
	+#include "vmci_driver.h"
	+#include "vmci_event.h"
	+#include "vmci_kernel_api.h"
	+#include "vmci_kernel_defs.h"
	+#include "vmci_kernel_if.h"
	+
	+#define LGPFX "vmci_event: "
	+#define EVENT_MAGIC 0xEABE0000
	+
	+struct vmci_subscription {
	+ vmci_id id;
	+ int ref_count;
	+ bool run_delayed;
	+ vmci_event destroy_event;
	+ vmci_event_type event;
	+ vmci_event_cb callback;
	+ void *callback_data;
	+ vmci_list_item(vmci_subscription) subscriber_list_item;
	+};
	+
	+static struct vmci_subscription *vmci_event_find(vmci_id sub_id);
	+static int vmci_event_deliver(struct vmci_event_msg *event_msg);
	+static int vmci_event_register_subscription(struct vmci_subscription *sub,
	+ vmci_event_type event, uint32_t flags,
	+ vmci_event_cb callback, void *callback_data);
	+static struct vmci_subscription *vmci_event_unregister_subscription(
	+ vmci_id sub_id);
	+
	+static vmci_list(vmci_subscription) subscriber_array[VMCI_EVENT_MAX];
	+static vmci_lock subscriber_lock;
	+
	+struct vmci_delayed_event_info {
	+ struct vmci_subscription *sub;
	+ uint8_t event_payload[sizeof(struct vmci_event_data_max)];
	+};
	+
	+struct vmci_event_ref {
	+ struct vmci_subscription *sub;
	+ vmci_list_item(vmci_event_ref) list_item;
	+};
	+
	+/*
	+ *------------------------------------------------------------------------------
	+ *
	+ * vmci_event_init --
	+ *
	+ * General init code.
	+ *
	+ * Results:
	+ * VMCI_SUCCESS on success, appropriate error code otherwise.
	+ *
	+ * Side effects:
	+ * None.
	+ *
	+ *------------------------------------------------------------------------------
	+ */
	+
	+int
	+vmci_event_init(void)
	+{
	+ int i;
	+
	+ for (i = 0; i < VMCI_EVENT_MAX; i++)
	+ vmci_list_init(&subscriber_array[i]);
	+
	+ return (vmci_init_lock(&subscriber_lock, "VMCI Event subscriber lock"));
	+}
	+
	+/*
	+ *------------------------------------------------------------------------------
	+ *
	+ * vmci_event_exit --
	+ *
	+ * General exit code.
	+ *
	+ * Results:
	+ * None.
	+ *
	+ * Side effects:
	+ * None.
	+ *
	+ *------------------------------------------------------------------------------
	+ */
	+
	+void
	+vmci_event_exit(void)
	+{
	+ struct vmci_subscription iter, iter_2;
	+ vmci_event_type e;
	+
	+ /* We free all memory at exit. */
	+ for (e = 0; e < VMCI_EVENT_MAX; e++) {
	+ vmci_list_scan_safe(iter, &subscriber_array[e],
	+ subscriber_list_item, iter_2) {
	+
	+ /*
	+ * We should never get here because all events should
	+ * have been unregistered before we try to unload the
	+ * driver module. Also, delayed callbacks could still
	+ * be firing so this cleanup would not be safe. Still
	+ * it is better to free the memory than not ... so we
	+ * leave this code in just in case....
	+ */
	+ ASSERT(false);
	+
	+ vmci_free_kernel_mem(iter, sizeof(*iter));
	+ }
	+ }
	+ vmci_cleanup_lock(&subscriber_lock);
	+}
	+
	+/*
	+ *------------------------------------------------------------------------------
	+ *
	+ * vmci_event_sync --
	+ *
	+ * Use this as a synchronization point when setting globals, for example,
	+ * during device shutdown.
	+ *
	+ * Results:
	+ * true.
	+ *
	+ * Side effects:
	+ * None.
	+ *
	+ *------------------------------------------------------------------------------
	+ */
	+
	+void
	+vmci_event_sync(void)
	+{
	+
	+ vmci_grab_lock_bh(&subscriber_lock);
	+ vmci_release_lock_bh(&subscriber_lock);
	+}
	+
	+/*
	+ *------------------------------------------------------------------------------
	+ *
	+ * vmci_event_check_host_capabilities --
	+ *
	+ * Verify that the host supports the hypercalls we need. If it does not,
	+ * try to find fallback hypercalls and use those instead.
	+ *
	+ * Results:
	+ * true if required hypercalls (or fallback hypercalls) are
	+ * supported by the host, false otherwise.
	+ *
	+ * Side effects:
	+ * None.
	+ *
	+ *------------------------------------------------------------------------------
	+ */
	+
	+bool
	+vmci_event_check_host_capabilities(void)
	+{
	+
	+ /* vmci_event does not require any hypercalls. */
	+ return (true);
	+}
	+
	+/*
	+ *------------------------------------------------------------------------------
	+ *
	+ * vmci_event_get --
	+ *
	+ * Gets a reference to the given struct vmci_subscription.
	+ *
	+ * Results:
	+ * None.
	+ *
	+ * Side effects:
	+ * None.
	+ *
	+ *------------------------------------------------------------------------------
	+ */
	+
	+static void
	+vmci_event_get(struct vmci_subscription *entry)
	+{
	+
	+ ASSERT(entry);
	+
	+ entry->ref_count++;
	+}
	+
	+/*
	+ *------------------------------------------------------------------------------
	+ *
	+ * vmci_event_release --
	+ *
	+ * Releases the given struct vmci_subscription.
	+ *
	+ * Results:
	+ * None.
	+ *
	+ * Side effects:
	+ * Fires the destroy event if the reference count has gone to zero.
	+ *
	+ *------------------------------------------------------------------------------
	+ */
	+
	+static void
	+vmci_event_release(struct vmci_subscription *entry)
	+{
	+
	+ ASSERT(entry);
	+ ASSERT(entry->ref_count > 0);
	+
	+ entry->ref_count--;
	+ if (entry->ref_count == 0)
	+ vmci_signal_event(&entry->destroy_event);
	+}
	+
	+ /*
	+ *------------------------------------------------------------------------------
	+ *
	+ * event_release_cb --
	+ *
	+ * Callback to release the event entry reference. It is called by the
	+ * vmci_wait_on_event function before it blocks.
	+ *
	+ * Result:
	+ * None.
	+ *
	+ * Side effects:
	+ * None.
	+ *
	+ *------------------------------------------------------------------------------
	+ */
	+
	+static int
	+event_release_cb(void *client_data)
	+{
	+ struct vmci_subscription sub = (struct vmci_subscription )client_data;
	+
	+ ASSERT(sub);
	+
	+ vmci_grab_lock_bh(&subscriber_lock);
	+ vmci_event_release(sub);
	+ vmci_release_lock_bh(&subscriber_lock);
	+
	+ return (0);
	+}
	+
	+/*
	+ *------------------------------------------------------------------------------
	+ *
	+ * vmci_event_find --
	+ *
	+ * Find entry. Assumes lock is held.
	+ *
	+ * Results:
	+ * Entry if found, NULL if not.
	+ *
	+ * Side effects:
	+ * Increments the struct vmci_subscription refcount if an entry is found.
	+ *
	+ *------------------------------------------------------------------------------
	+ */
	+
	+static struct vmci_subscription *
	+vmci_event_find(vmci_id sub_id)
	+{
	+ struct vmci_subscription *iter;
	+ vmci_event_type e;
	+
	+ for (e = 0; e < VMCI_EVENT_MAX; e++) {
	+ vmci_list_scan(iter, &subscriber_array[e],
	+ subscriber_list_item) {
	+ if (iter->id == sub_id) {
	+ vmci_event_get(iter);
	+ return (iter);
	+ }
	+ }
	+ }
	+ return (NULL);
	+}
	+
	+/*
	+ *------------------------------------------------------------------------------
	+ *
	+ * vmci_event_delayed_dispatch_cb --
	+ *
	+ * Calls the specified callback in a delayed context.
	+ *
	+ * Results:
	+ * None.
	+ *
	+ * Side effects:
	+ * None.
	+ *
	+ *------------------------------------------------------------------------------
	+ */
	+
	+static void
	+vmci_event_delayed_dispatch_cb(void *data)
	+{
	+ struct vmci_delayed_event_info *event_info;
	+ struct vmci_subscription *sub;
	+ struct vmci_event_data *ed;
	+
	+ event_info = (struct vmci_delayed_event_info *)data;
	+
	+ ASSERT(event_info);
	+ ASSERT(event_info->sub);
	+
	+ sub = event_info->sub;
	+ ed = (struct vmci_event_data *)event_info->event_payload;
	+
	+ sub->callback(sub->id, ed, sub->callback_data);
	+
	+ vmci_grab_lock_bh(&subscriber_lock);
	+ vmci_event_release(sub);
	+ vmci_release_lock_bh(&subscriber_lock);
	+
	+ vmci_free_kernel_mem(event_info, sizeof(*event_info));
	+}
	+
	+/*
	+ *------------------------------------------------------------------------------
	+ *
	+ * vmci_event_deliver --
	+ *
	+ * Actually delivers the events to the subscribers.
	+ *
	+ * Results:
	+ * None.
	+ *
	+ * Side effects:
	+ * The callback function for each subscriber is invoked.
	+ *
	+ *------------------------------------------------------------------------------
	+ */
	+
	+static int
	+vmci_event_deliver(struct vmci_event_msg *event_msg)
	+{
	+ struct vmci_subscription *iter;
	+ int err = VMCI_SUCCESS;
	+
	+ vmci_list(vmci_event_ref) no_delay_list;
	+ vmci_list_init(&no_delay_list);
	+
	+ ASSERT(event_msg);
	+
	+ vmci_grab_lock_bh(&subscriber_lock);
	+ vmci_list_scan(iter, &subscriber_array[event_msg->event_data.event],
	+ subscriber_list_item) {
	+ if (iter->run_delayed) {
	+ struct vmci_delayed_event_info *event_info;
	+ if ((event_info =
	+ vmci_alloc_kernel_mem(sizeof(*event_info),
	+ VMCI_MEMORY_ATOMIC)) == NULL) {
	+ err = VMCI_ERROR_NO_MEM;
	+ goto out;
	+ }
	+
	+ vmci_event_get(iter);
	+
	+ memset(event_info, 0, sizeof(*event_info));
	+ memcpy(event_info->event_payload,
	+ VMCI_DG_PAYLOAD(event_msg),
	+ (size_t)event_msg->hdr.payload_size);
	+ event_info->sub = iter;
	+ err =
	+ vmci_schedule_delayed_work(
	+ vmci_event_delayed_dispatch_cb, event_info);
	+ if (err != VMCI_SUCCESS) {
	+ vmci_event_release(iter);
	+ vmci_free_kernel_mem(
	+ event_info, sizeof(*event_info));
	+ goto out;
	+ }
	+
	+ } else {
	+ struct vmci_event_ref *event_ref;
	+
	+ /*
	+ * We construct a local list of subscribers and release
	+ * subscriber_lock before invoking the callbacks. This
	+ * is similar to delayed callbacks, but callbacks are
	+ * invoked right away here.
	+ */
	+ if ((event_ref = vmci_alloc_kernel_mem(
	+ sizeof(*event_ref), VMCI_MEMORY_ATOMIC)) == NULL) {
	+ err = VMCI_ERROR_NO_MEM;
	+ goto out;
	+ }
	+
	+ vmci_event_get(iter);
	+ event_ref->sub = iter;
	+ vmci_list_insert(&no_delay_list, event_ref, list_item);
	+ }
	+ }
	+
	+out:
	+ vmci_release_lock_bh(&subscriber_lock);
	+
	+ if (!vmci_list_empty(&no_delay_list)) {
	+ struct vmci_event_data *ed;
	+ struct vmci_event_ref *iter;
	+ struct vmci_event_ref *iter_2;
	+
	+ vmci_list_scan_safe(iter, &no_delay_list, list_item, iter_2) {
	+ struct vmci_subscription *cur;
	+ uint8_t event_payload[sizeof(
	+ struct vmci_event_data_max)];
	+
	+ cur = iter->sub;
	+
	+ /*
	+ * We set event data before each callback to ensure
	+ * isolation.
	+ */
	+ memset(event_payload, 0, sizeof(event_payload));
	+ memcpy(event_payload, VMCI_DG_PAYLOAD(event_msg),
	+ (size_t)event_msg->hdr.payload_size);
	+ ed = (struct vmci_event_data *)event_payload;
	+ cur->callback(cur->id, ed, cur->callback_data);
	+
	+ vmci_grab_lock_bh(&subscriber_lock);
	+ vmci_event_release(cur);
	+ vmci_release_lock_bh(&subscriber_lock);
	+ vmci_free_kernel_mem(iter, sizeof(*iter));
	+ }
	+ }
	+
	+ return (err);
	+}
	+
	+/*
	+ *------------------------------------------------------------------------------
	+ *
	+ * vmci_event_dispatch --
	+ *
	+ * Dispatcher for the VMCI_EVENT_RECEIVE datagrams. Calls all
	+ * subscribers for given event.
	+ *
	+ * Results:
	+ * VMCI_SUCCESS on success, error code otherwise.
	+ *
	+ * Side effects:
	+ * None.
	+ *
	+ *------------------------------------------------------------------------------
	+ */
	+
	+int
	+vmci_event_dispatch(struct vmci_datagram *msg)
	+{
	+ struct vmci_event_msg event_msg = (struct vmci_event_msg )msg;
	+
	+ ASSERT(msg &&
	+ msg->src.context == VMCI_HYPERVISOR_CONTEXT_ID &&
	+ msg->dst.resource == VMCI_EVENT_HANDLER);
	+
	+ if (msg->payload_size < sizeof(vmci_event_type) \|\|
	+ msg->payload_size > sizeof(struct vmci_event_data_max))
	+ return (VMCI_ERROR_INVALID_ARGS);
	+
	+ if (!VMCI_EVENT_VALID(event_msg->event_data.event))
	+ return (VMCI_ERROR_EVENT_UNKNOWN);
	+
	+ vmci_event_deliver(event_msg);
	+
	+ return (VMCI_SUCCESS);
	+}
	+
	+/*
	+ *------------------------------------------------------------------------------
	+ *
	+ * vmci_event_register_subscription --
	+ *
	+ * Initialize and add subscription to subscriber list.
	+ *
	+ * Results:
	+ * VMCI_SUCCESS on success, error code otherwise.
	+ *
	+ * Side effects:
	+ * None.
	+ *
	+ *------------------------------------------------------------------------------
	+ */
	+
	+static int
	+vmci_event_register_subscription(struct vmci_subscription *sub,
	+ vmci_event_type event, uint32_t flags, vmci_event_cb callback,
	+ void *callback_data)
	+{
	+#define VMCI_EVENT_MAX_ATTEMPTS 10
	+ static vmci_id subscription_id = 0;
	+ int result;
	+ uint32_t attempts = 0;
	+ bool success;
	+
	+ ASSERT(sub);
	+
	+ if (!VMCI_EVENT_VALID(event) \|\| callback == NULL) {
	+ VMCI_LOG_DEBUG(LGPFX"Failed to subscribe to event"
	+ " (type=%d) (callback=%p) (data=%p).\n",
	+ event, callback, callback_data);
	+ return (VMCI_ERROR_INVALID_ARGS);
	+ }
	+
	+ if (!vmci_can_schedule_delayed_work()) {
	+ /*
	+ * If the platform doesn't support delayed work callbacks then
	+ * don't allow registration for them.
	+ */
	+ if (flags & VMCI_FLAG_EVENT_DELAYED_CB)
	+ return (VMCI_ERROR_INVALID_ARGS);
	+ sub->run_delayed = false;
	+ } else {
	+ /*
	+ * The platform supports delayed work callbacks. Honor the
	+ * requested flags
	+ */
	+ sub->run_delayed = (flags & VMCI_FLAG_EVENT_DELAYED_CB) ?
	+ true : false;
	+ }
	+
	+ sub->ref_count = 1;
	+ sub->event = event;
	+ sub->callback = callback;
	+ sub->callback_data = callback_data;
	+
	+ vmci_grab_lock_bh(&subscriber_lock);
	+
	+ for (success = false, attempts = 0;
	+ success == false && attempts < VMCI_EVENT_MAX_ATTEMPTS;
	+ attempts++) {
	+ struct vmci_subscription *existing_sub = NULL;
	+
	+ /*
	+ * We try to get an id a couple of time before claiming we are
	+ * out of resources.
	+ */
	+ sub->id = ++subscription_id;
	+
	+ /* Test for duplicate id. */
	+ existing_sub = vmci_event_find(sub->id);
	+ if (existing_sub == NULL) {
	+ /* We succeeded if we didn't find a duplicate. */
	+ success = true;
	+ } else
	+ vmci_event_release(existing_sub);
	+ }
	+
	+ if (success) {
	+ vmci_create_event(&sub->destroy_event);
	+ vmci_list_insert(&subscriber_array[event], sub,
	+ subscriber_list_item);
	+ result = VMCI_SUCCESS;
	+ } else
	+ result = VMCI_ERROR_NO_RESOURCES;
	+
	+ vmci_release_lock_bh(&subscriber_lock);
	+ return (result);
	+#undef VMCI_EVENT_MAX_ATTEMPTS
	+}
	+
	+/*
	+ *------------------------------------------------------------------------------
	+ *
	+ * vmci_event_unregister_subscription --
	+ *
	+ * Remove subscription from subscriber list.
	+ *
	+ * Results:
	+ * struct vmci_subscription when found, NULL otherwise.
	+ *
	+ * Side effects:
	+ * None.
	+ *
	+ *------------------------------------------------------------------------------
	+ */
	+
	+static struct vmci_subscription *
	+vmci_event_unregister_subscription(vmci_id sub_id)
	+{
	+ struct vmci_subscription *s;
	+
	+ vmci_grab_lock_bh(&subscriber_lock);
	+ s = vmci_event_find(sub_id);
	+ if (s != NULL) {
	+ vmci_event_release(s);
	+ vmci_list_remove(s, subscriber_list_item);
	+ }
	+ vmci_release_lock_bh(&subscriber_lock);
	+
	+ if (s != NULL) {
	+ vmci_wait_on_event(&s->destroy_event, event_release_cb, s);
	+ vmci_destroy_event(&s->destroy_event);
	+ }
	+
	+ return (s);
	+}
	+
	+/*
	+ *------------------------------------------------------------------------------
	+ *
	+ * vmci_event_subscribe --
	+ *
	+ * Subscribe to given event. The callback specified can be fired in
	+ * different contexts depending on what flag is specified while registering.
	+ * If flags contains VMCI_FLAG_EVENT_NONE then the callback is fired with
	+ * the subscriber lock held (and BH context on the guest). If flags contain
	+ * VMCI_FLAG_EVENT_DELAYED_CB then the callback is fired with no locks held
	+ * in thread context. This is useful because other vmci_event functions can
	+ * be called, but it also increases the chances that an event will be
	+ * dropped.
	+ *
	+ * Results:
	+ * VMCI_SUCCESS on success, error code otherwise.
	+ *
	+ * Side effects:
	+ * None.
	+ *
	+ *------------------------------------------------------------------------------
	+ */
	+
	+int
	+vmci_event_subscribe(vmci_event_type event, vmci_event_cb callback,
	+ void callback_data, vmci_id subscription_id)
	+{
	+ int retval;
	+ uint32_t flags = VMCI_FLAG_EVENT_NONE;
	+ struct vmci_subscription *s = NULL;
	+
	+ if (subscription_id == NULL) {
	+ VMCI_LOG_DEBUG(LGPFX"Invalid subscription (NULL).\n");
	+ return (VMCI_ERROR_INVALID_ARGS);
	+ }
	+
	+ s = vmci_alloc_kernel_mem(sizeof(*s), VMCI_MEMORY_NORMAL);
	+ if (s == NULL)
	+ return (VMCI_ERROR_NO_MEM);
	+
	+ retval = vmci_event_register_subscription(s, event, flags,
	+ callback, callback_data);
	+ if (retval < VMCI_SUCCESS) {
	+ vmci_free_kernel_mem(s, sizeof(*s));
	+ return (retval);
	+ }
	+
	+ *subscription_id = s->id;
	+ return (retval);
	+}
	+
	+/*
	+ *------------------------------------------------------------------------------
	+ *
	+ * vmci_event_unsubscribe --
	+ *
	+ * Unsubscribe to given event. Removes it from list and frees it.
	+ * Will return callback_data if requested by caller.
	+ *
	+ * Results:
	+ * VMCI_SUCCESS on success, error code otherwise.
	+ *
	+ * Side effects:
	+ * None.
	+ *
	+ *------------------------------------------------------------------------------
	+ */
	+
	+int
	+vmci_event_unsubscribe(vmci_id sub_id)
	+{
	+ struct vmci_subscription *s;
	+
	+ /*
	+ * Return subscription. At this point we know noone else is accessing
	+ * the subscription so we can free it.
	+ */
	+ s = vmci_event_unregister_subscription(sub_id);
	+ if (s == NULL)
	+ return (VMCI_ERROR_NOT_FOUND);
	+ vmci_free_kernel_mem(s, sizeof(*s));
	+
	+ return (VMCI_SUCCESS);
	+}
	Index: sys/dev/vmware/vmci/vmci_hashtable.h
	===================================================================
	--- /dev/null
	+++ sys/dev/vmware/vmci/vmci_hashtable.h
	@@ -0,0 +1,46 @@
	+/*-
	+ * Copyright (c) 2018 VMware, Inc. All Rights Reserved.
	+ *
	+ * SPDX-License-Identifier: (BSD-2-Clause AND GPL-2.0)
	+ */
	+
	+/* Hash table for use in the APIs. */
	+
	+#ifndef _VMCI_HASHTABLE_H_
	+#define _VMCI_HASHTABLE_H_
	+
	+#include "vmci_defs.h"
	+#include "vmci_kernel_if.h"
	+
	+struct vmci_hash_entry {
	+ struct vmci_handle handle;
	+ int ref_count;
	+ struct vmci_hash_entry *next;
	+};
	+
	+struct vmci_hashtable {
	+ struct vmci_hash_entry **entries;
	+ /* Number of buckets in above array. */
	+ int size;
	+ vmci_lock lock;
	+};
	+
	+struct vmci_hashtable *vmci_hashtable_create(int size);
	+void vmci_hashtable_destroy(struct vmci_hashtable *table);
	+void vmci_hashtable_init_entry(struct vmci_hash_entry *entry,
	+ struct vmci_handle handle);
	+int vmci_hashtable_add_entry(struct vmci_hashtable *table,
	+ struct vmci_hash_entry *entry);
	+int vmci_hashtable_remove_entry(struct vmci_hashtable *table,
	+ struct vmci_hash_entry *entry);
	+struct vmci_hash_entry vmci_hashtable_get_entry(struct vmci_hashtable table,
	+ struct vmci_handle handle);
	+void vmci_hashtable_hold_entry(struct vmci_hashtable *table,
	+ struct vmci_hash_entry *entry);
	+int vmci_hashtable_release_entry(struct vmci_hashtable *table,
	+ struct vmci_hash_entry *entry);
	+bool vmci_hashtable_entry_exists(struct vmci_hashtable *table,
	+ struct vmci_handle handle);
	+void vmci_hashtable_sync(struct vmci_hashtable *table);
	+
	+#endif /* !_VMCI_HASHTABLE_H_ */
	Index: sys/dev/vmware/vmci/vmci_hashtable.c
	===================================================================
	--- /dev/null
	+++ sys/dev/vmware/vmci/vmci_hashtable.c
	@@ -0,0 +1,565 @@
	+/*-
	+ * Copyright (c) 2018 VMware, Inc. All Rights Reserved.
	+ *
	+ * SPDX-License-Identifier: (BSD-2-Clause AND GPL-2.0)
	+ */
	+
	+/* Implementation of the VMCI Hashtable. */
	+
	+#include "vmci.h"
	+#include "vmci_driver.h"
	+#include "vmci_hashtable.h"
	+#include "vmci_kernel_defs.h"
	+#include "vmci_utils.h"
	+
	+#define LGPFX "vmci_hashtable: "
	+
	+#define VMCI_HASHTABLE_HASH(_h, _sz) \
	+ vmci_hash_id(VMCI_HANDLE_TO_RESOURCE_ID(_h), (_sz))
	+
	+static int hashtable_unlink_entry(struct vmci_hashtable *table,
	+ struct vmci_hash_entry *entry);
	+static bool vmci_hashtable_entry_exists_locked(struct vmci_hashtable *table,
	+ struct vmci_handle handle);
	+
	+/*
	+ *------------------------------------------------------------------------------
	+ *
	+ * vmci_hashtable_create --
	+ *
	+ * Creates a hashtable.
	+ *
	+ * Result:
	+ * None.
	+ *
	+ * Side effects:
	+ * None.
	+ *
	+ *------------------------------------------------------------------------------
	+ */
	+
	+struct vmci_hashtable *
	+vmci_hashtable_create(int size)
	+{
	+ struct vmci_hashtable *table;
	+
	+ table = vmci_alloc_kernel_mem(sizeof(*table),
	+ VMCI_MEMORY_NORMAL);
	+ if (table == NULL)
	+ return (NULL);
	+ memset(table, 0, sizeof(*table));
	+
	+ table->entries = vmci_alloc_kernel_mem(sizeof(table->entries) size,
	+ VMCI_MEMORY_NORMAL);
	+ if (table->entries == NULL) {
	+ vmci_free_kernel_mem(table, sizeof(*table));
	+ return (NULL);
	+ }
	+ memset(table->entries, 0, sizeof(table->entries) size);
	+ table->size = size;
	+ if (vmci_init_lock(&table->lock, "VMCI Hashtable lock") <
	+ VMCI_SUCCESS) {
	+ vmci_free_kernel_mem(table->entries, sizeof(table->entries) size);
	+ vmci_free_kernel_mem(table, sizeof(*table));
	+ return (NULL);
	+ }
	+
	+ return (table);
	+}
	+
	+/*
	+ *------------------------------------------------------------------------------
	+ *
	+ * vmci_hashtable_destroy --
	+ *
	+ * This function should be called at module exit time. We rely on the
	+ * module ref count to insure that no one is accessing any hash table
	+ * entries at this point in time. Hence we should be able to just remove
	+ * all entries from the hash table.
	+ *
	+ * Result:
	+ * None.
	+ *
	+ * Side effects:
	+ * None.
	+ *
	+ *------------------------------------------------------------------------------
	+ */
	+
	+void
	+vmci_hashtable_destroy(struct vmci_hashtable *table)
	+{
	+
	+ ASSERT(table);
	+
	+ vmci_grab_lock_bh(&table->lock);
	+ vmci_free_kernel_mem(table->entries, sizeof(table->entries)
	+ table->size);
	+ table->entries = NULL;
	+ vmci_release_lock_bh(&table->lock);
	+ vmci_cleanup_lock(&table->lock);
	+ vmci_free_kernel_mem(table, sizeof(*table));
	+}
	+
	+/*
	+ *------------------------------------------------------------------------------
	+ *
	+ * vmci_hashtable_init_entry --
	+ *
	+ * Initializes a hash entry.
	+ *
	+ * Result:
	+ * None.
	+ *
	+ * Side effects:
	+ * None.
	+ *
	+ *------------------------------------------------------------------------------
	+ */
	+void
	+vmci_hashtable_init_entry(struct vmci_hash_entry *entry,
	+ struct vmci_handle handle)
	+{
	+
	+ ASSERT(entry);
	+ entry->handle = handle;
	+ entry->ref_count = 0;
	+}
	+
	+/*
	+ *------------------------------------------------------------------------------
	+ *
	+ * vmci_hashtable_add_entry --
	+ *
	+ * Adds an entry to the hashtable.
	+ *
	+ * Result:
	+ * None.
	+ *
	+ * Side effects:
	+ * None.
	+ *
	+ *------------------------------------------------------------------------------
	+ */
	+
	+int
	+vmci_hashtable_add_entry(struct vmci_hashtable *table,
	+ struct vmci_hash_entry *entry)
	+{
	+ int idx;
	+
	+ ASSERT(entry);
	+ ASSERT(table);
	+
	+ vmci_grab_lock_bh(&table->lock);
	+
	+ if (vmci_hashtable_entry_exists_locked(table, entry->handle)) {
	+ VMCI_LOG_DEBUG(LGPFX"Entry (handle=0x%x:0x%x) already "
	+ "exists.\n", entry->handle.context,
	+ entry->handle.resource);
	+ vmci_release_lock_bh(&table->lock);
	+ return (VMCI_ERROR_DUPLICATE_ENTRY);
	+ }
	+
	+ idx = VMCI_HASHTABLE_HASH(entry->handle, table->size);
	+ ASSERT(idx < table->size);
	+
	+ /* New entry is added to top/front of hash bucket. */
	+ entry->ref_count++;
	+ entry->next = table->entries[idx];
	+ table->entries[idx] = entry;
	+ vmci_release_lock_bh(&table->lock);
	+
	+ return (VMCI_SUCCESS);
	+}
	+
	+/*
	+ *------------------------------------------------------------------------------
	+ *
	+ * vmci_hashtable_remove_entry --
	+ *
	+ * Removes an entry from the hashtable.
	+ *
	+ * Result:
	+ * None.
	+ *
	+ * Side effects:
	+ * None.
	+ *
	+ *------------------------------------------------------------------------------
	+ */
	+
	+int
	+vmci_hashtable_remove_entry(struct vmci_hashtable *table,
	+ struct vmci_hash_entry *entry)
	+{
	+ int result;
	+
	+ ASSERT(table);
	+ ASSERT(entry);
	+
	+ vmci_grab_lock_bh(&table->lock);
	+
	+ /* First unlink the entry. */
	+ result = hashtable_unlink_entry(table, entry);
	+ if (result != VMCI_SUCCESS) {
	+ /* We failed to find the entry. */
	+ goto done;
	+ }
	+
	+ /* Decrement refcount and check if this is last reference. */
	+ entry->ref_count--;
	+ if (entry->ref_count == 0) {
	+ result = VMCI_SUCCESS_ENTRY_DEAD;
	+ goto done;
	+ }
	+
	+done:
	+ vmci_release_lock_bh(&table->lock);
	+
	+ return (result);
	+}
	+
	+/*
	+ *------------------------------------------------------------------------------
	+ *
	+ * vmci_hashtable_get_entry_locked --
	+ *
	+ * Looks up an entry in the hash table, that is already locked.
	+ *
	+ * Result:
	+ * If the element is found, a pointer to the element is returned.
	+ * Otherwise NULL is returned.
	+ *
	+ * Side effects:
	+ * The reference count of the returned element is increased.
	+ *
	+ *------------------------------------------------------------------------------
	+ */
	+
	+static struct vmci_hash_entry *
	+vmci_hashtable_get_entry_locked(struct vmci_hashtable *table,
	+ struct vmci_handle handle)
	+{
	+ struct vmci_hash_entry *cur = NULL;
	+ int idx;
	+
	+ ASSERT(!VMCI_HANDLE_EQUAL(handle, VMCI_INVALID_HANDLE));
	+ ASSERT(table);
	+
	+ idx = VMCI_HASHTABLE_HASH(handle, table->size);
	+
	+ cur = table->entries[idx];
	+ while (true) {
	+ if (cur == NULL)
	+ break;
	+
	+ if (VMCI_HANDLE_TO_RESOURCE_ID(cur->handle) ==
	+ VMCI_HANDLE_TO_RESOURCE_ID(handle)) {
	+ if ((VMCI_HANDLE_TO_CONTEXT_ID(cur->handle) ==
	+ VMCI_HANDLE_TO_CONTEXT_ID(handle)) \|\|
	+ (VMCI_INVALID_ID == VMCI_HANDLE_TO_CONTEXT_ID(cur->handle))) {
	+ cur->ref_count++;
	+ break;
	+ }
	+ }
	+ cur = cur->next;
	+ }
	+
	+ return (cur);
	+}
	+
	+/*
	+ *------------------------------------------------------------------------------
	+ *
	+ * vmci_hashtable_get_entry --
	+ *
	+ * Gets an entry from the hashtable.
	+ *
	+ * Result:
	+ * None.
	+ *
	+ * Side effects:
	+ * None.
	+ *
	+ *------------------------------------------------------------------------------
	+ */
	+
	+struct vmci_hash_entry *
	+vmci_hashtable_get_entry(struct vmci_hashtable *table,
	+ struct vmci_handle handle)
	+{
	+ struct vmci_hash_entry *entry;
	+
	+ if (VMCI_HANDLE_EQUAL(handle, VMCI_INVALID_HANDLE))
	+ return (NULL);
	+
	+ ASSERT(table);
	+
	+ vmci_grab_lock_bh(&table->lock);
	+ entry = vmci_hashtable_get_entry_locked(table, handle);
	+ vmci_release_lock_bh(&table->lock);
	+
	+ return (entry);
	+}
	+
	+/*
	+ *------------------------------------------------------------------------------
	+ *
	+ * vmci_hashtable_hold_entry --
	+ *
	+ * Hold the given entry. This will increment the entry's reference count.
	+ * This is like a GetEntry() but without having to lookup the entry by
	+ * handle.
	+ *
	+ * Result:
	+ * None.
	+ *
	+ * Side effects:
	+ * None.
	+ *
	+ *------------------------------------------------------------------------------
	+ */
	+
	+void
	+vmci_hashtable_hold_entry(struct vmci_hashtable *table,
	+ struct vmci_hash_entry *entry)
	+{
	+
	+ ASSERT(table);
	+ ASSERT(entry);
	+
	+ vmci_grab_lock_bh(&table->lock);
	+ entry->ref_count++;
	+ vmci_release_lock_bh(&table->lock);
	+}
	+
	+/*
	+ *------------------------------------------------------------------------------
	+ *
	+ * vmci_hashtable_release_entry_locked --
	+ *
	+ * Releases an element previously obtained with
	+ * vmci_hashtable_get_entry_locked.
	+ *
	+ * Result:
	+ * If the entry is removed from the hash table, VMCI_SUCCESS_ENTRY_DEAD
	+ * is returned. Otherwise, VMCI_SUCCESS is returned.
	+ *
	+ * Side effects:
	+ * The reference count of the entry is decreased and the entry is removed
	+ * from the hash table on 0.
	+ *
	+ *------------------------------------------------------------------------------
	+ */
	+
	+static int
	+vmci_hashtable_release_entry_locked(struct vmci_hashtable *table,
	+ struct vmci_hash_entry *entry)
	+{
	+ int result = VMCI_SUCCESS;
	+
	+ ASSERT(table);
	+ ASSERT(entry);
	+
	+ entry->ref_count--;
	+ /* Check if this is last reference and report if so. */
	+ if (entry->ref_count == 0) {
	+
	+ /*
	+ * Remove entry from hash table if not already removed. This
	+ * could have happened already because VMCIHashTable_RemoveEntry
	+ * was called to unlink it. We ignore if it is not found.
	+ * Datagram handles will often have RemoveEntry called, whereas
	+ * SharedMemory regions rely on ReleaseEntry to unlink the entry
	+ * , since the creator does not call RemoveEntry when it
	+ * detaches.
	+ */
	+
	+ hashtable_unlink_entry(table, entry);
	+ result = VMCI_SUCCESS_ENTRY_DEAD;
	+ }
	+
	+ return (result);
	+}
	+
	+/*
	+ *------------------------------------------------------------------------------
	+ *
	+ * vmci_hashtable_release_entry --
	+ *
	+ * Releases an entry from the hashtable.
	+ *
	+ * Result:
	+ * None.
	+ *
	+ * Side effects:
	+ * None.
	+ *
	+ *------------------------------------------------------------------------------
	+ */
	+
	+int
	+vmci_hashtable_release_entry(struct vmci_hashtable *table,
	+ struct vmci_hash_entry *entry)
	+{
	+ int result;
	+
	+ ASSERT(table);
	+ vmci_grab_lock_bh(&table->lock);
	+ result = vmci_hashtable_release_entry_locked(table, entry);
	+ vmci_release_lock_bh(&table->lock);
	+
	+ return (result);
	+}
	+
	+/*
	+ *------------------------------------------------------------------------------
	+ *
	+ * vmci_hashtable_entry_exists --
	+ *
	+ * Returns whether an entry exists in the hashtable
	+ *
	+ * Result:
	+ * true if handle already in hashtable. false otherwise.
	+ *
	+ * Side effects:
	+ * None.
	+ *
	+ *------------------------------------------------------------------------------
	+ */
	+
	+bool
	+vmci_hashtable_entry_exists(struct vmci_hashtable *table,
	+ struct vmci_handle handle)
	+{
	+ bool exists;
	+
	+ ASSERT(table);
	+
	+ vmci_grab_lock_bh(&table->lock);
	+ exists = vmci_hashtable_entry_exists_locked(table, handle);
	+ vmci_release_lock_bh(&table->lock);
	+
	+ return (exists);
	+}
	+
	+/*
	+ *------------------------------------------------------------------------------
	+ *
	+ * vmci_hashtable_entry_exists_locked --
	+ *
	+ * Unlocked version of vmci_hashtable_entry_exists.
	+ *
	+ * Result:
	+ * true if handle already in hashtable. false otherwise.
	+ *
	+ * Side effects:
	+ * None.
	+ *
	+ *------------------------------------------------------------------------------
	+ */
	+
	+static bool
	+vmci_hashtable_entry_exists_locked(struct vmci_hashtable *table,
	+ struct vmci_handle handle)
	+
	+{
	+ struct vmci_hash_entry *entry;
	+ int idx;
	+
	+ ASSERT(table);
	+
	+ idx = VMCI_HASHTABLE_HASH(handle, table->size);
	+
	+ entry = table->entries[idx];
	+ while (entry) {
	+ if (VMCI_HANDLE_TO_RESOURCE_ID(entry->handle) ==
	+ VMCI_HANDLE_TO_RESOURCE_ID(handle))
	+ if ((VMCI_HANDLE_TO_CONTEXT_ID(entry->handle) ==
	+ VMCI_HANDLE_TO_CONTEXT_ID(handle)) \|\|
	+ (VMCI_INVALID_ID == VMCI_HANDLE_TO_CONTEXT_ID(handle)) \|\|
	+ (VMCI_INVALID_ID == VMCI_HANDLE_TO_CONTEXT_ID(entry->handle)))
	+ return (true);
	+ entry = entry->next;
	+ }
	+
	+ return (false);
	+}
	+
	+/*
	+ *------------------------------------------------------------------------------
	+ *
	+ * hashtable_unlink_entry --
	+ *
	+ * Assumes caller holds table lock.
	+ *
	+ * Result:
	+ * None.
	+ *
	+ * Side effects:
	+ * None.
	+ *
	+ *------------------------------------------------------------------------------
	+ */
	+
	+static int
	+hashtable_unlink_entry(struct vmci_hashtable *table,
	+ struct vmci_hash_entry *entry)
	+{
	+ int result;
	+ struct vmci_hash_entry prev, cur;
	+ int idx;
	+
	+ idx = VMCI_HASHTABLE_HASH(entry->handle, table->size);
	+
	+ prev = NULL;
	+ cur = table->entries[idx];
	+ while (true) {
	+ if (cur == NULL) {
	+ result = VMCI_ERROR_NOT_FOUND;
	+ break;
	+ }
	+ if (VMCI_HANDLE_EQUAL(cur->handle, entry->handle)) {
	+ ASSERT(cur == entry);
	+
	+ /* Remove entry and break. */
	+ if (prev)
	+ prev->next = cur->next;
	+ else
	+ table->entries[idx] = cur->next;
	+ cur->next = NULL;
	+ result = VMCI_SUCCESS;
	+ break;
	+ }
	+ prev = cur;
	+ cur = cur->next;
	+ }
	+ return (result);
	+}
	+
	+/*
	+ *------------------------------------------------------------------------------
	+ *
	+ * vmci_hashtable_sync --
	+ *
	+ * Use this as a synchronization point when setting globals, for example,
	+ * during device shutdown.
	+ *
	+ * Results:
	+ * None.
	+ *
	+ * Side effects:
	+ * None.
	+ *
	+ *------------------------------------------------------------------------------
	+ */
	+
	+void
	+vmci_hashtable_sync(struct vmci_hashtable *table)
	+{
	+
	+ ASSERT(table);
	+ vmci_grab_lock_bh(&table->lock);
	+ vmci_release_lock_bh(&table->lock);
	+}
	Index: sys/dev/vmware/vmci/vmci_kernel_api.h
	===================================================================
	--- /dev/null
	+++ sys/dev/vmware/vmci/vmci_kernel_api.h
	@@ -0,0 +1,16 @@
	+/*-
	+ * Copyright (c) 2018 VMware, Inc. All Rights Reserved.
	+ *
	+ * SPDX-License-Identifier: (BSD-2-Clause AND GPL-2.0)
	+ */
	+
	+/* Kernel API (current) exported from the VMCI guest driver. */
	+
	+#ifndef _VMCI_KERNEL_API_H_
	+#define _VMCI_KERNEL_API_H_
	+
	+/* With this file you always get the latest version. */
	+#include "vmci_kernel_api_1.h"
	+#include "vmci_kernel_api_2.h"
	+
	+#endif /* !_VMCI_KERNEL_API_H_ */
	Index: sys/dev/vmware/vmci/vmci_kernel_api_1.h
	===================================================================
	--- /dev/null
	+++ sys/dev/vmware/vmci/vmci_kernel_api_1.h
	@@ -0,0 +1,69 @@
	+/*-
	+ * Copyright (c) 2018 VMware, Inc. All Rights Reserved.
	+ *
	+ * SPDX-License-Identifier: (BSD-2-Clause AND GPL-2.0)
	+ */
	+
	+/* Kernel API (v1) exported from the VMCI guest driver. */
	+
	+#ifndef _VMCI_KERNEL_API_1_H_
	+#define _VMCI_KERNEL_API_1_H_
	+
	+#include "vmci_call_defs.h"
	+#include "vmci_defs.h"
	+
	+/* Define version 1. */
	+#undef VMCI_KERNEL_API_VERSION
	+#define VMCI_KERNEL_API_VERSION_1 1
	+#define VMCI_KERNEL_API_VERSION VMCI_KERNEL_API_VERSION_1
	+
	+/* VMCI Datagram API. */
	+int vmci_datagram_create_handle(uint32_t resource_id, uint32_t flags,
	+ vmci_datagram_recv_cb recv_cb, void *client_data,
	+ struct vmci_handle *out_handle);
	+int vmci_datagram_create_handle_priv(uint32_t resource_id, uint32_t flags,
	+ vmci_privilege_flags priv_flags, vmci_datagram_recv_cb recv_cb,
	+ void client_data, struct vmci_handle out_handle);
	+int vmci_datagram_destroy_handle(struct vmci_handle handle);
	+int vmci_datagram_send(struct vmci_datagram *msg);
	+
	+/* VMCI Utility API. */
	+vmci_id vmci_get_context_id(void);
	+
	+/* VMCI Event API. */
	+typedef void (vmci_event_cb)(vmci_id sub_id, struct vmci_event_data ed,
	+ void *client_data);
	+
	+int vmci_event_subscribe(vmci_event_type event, vmci_event_cb callback,
	+ void callback_data, vmci_id sub_id);
	+int vmci_event_unsubscribe(vmci_id sub_id);
	+
	+/* VMCI Queue Pair API. */
	+struct vmci_qpair;
	+
	+int vmci_qpair_alloc(struct vmci_qpair *qpair, struct vmci_handle handle,
	+ uint64_t produce_q_size, uint64_t consume_q_size, vmci_id peer,
	+ uint32_t flags, vmci_privilege_flags priv_flags);
	+int vmci_qpair_detach(struct vmci_qpair **qpair);
	+int vmci_qpair_get_produce_indexes(const struct vmci_qpair *qpair,
	+ uint64_t producer_tail, uint64_t consumer_head);
	+int vmci_qpair_get_consume_indexes(const struct vmci_qpair *qpair,
	+ uint64_t consumer_tail, uint64_t producer_head);
	+int64_t vmci_qpair_produce_free_space(const struct vmci_qpair *qpair);
	+int64_t vmci_qpair_produce_buf_ready(const struct vmci_qpair *qpair);
	+int64_t vmci_qpair_consume_free_space(const struct vmci_qpair *qpair);
	+int64_t vmci_qpair_consume_buf_ready(const struct vmci_qpair *qpair);
	+ssize_t vmci_qpair_enqueue(struct vmci_qpair qpair, const void buf,
	+ size_t buf_size, int mode);
	+ssize_t vmci_qpair_dequeue(struct vmci_qpair qpair, void buf,
	+ size_t buf_size, int mode);
	+ssize_t vmci_qpair_peek(struct vmci_qpair qpair, void buf,
	+ size_t buf_size, int mode);
	+ssize_t vmci_qpair_enquev(struct vmci_qpair qpair, void iov, size_t iov_size,
	+ int mode);
	+ssize_t vmci_qpair_dequev(struct vmci_qpair qpair, void iov, size_t iov_size,
	+ int mode);
	+ssize_t vmci_qpair_peekv(struct vmci_qpair qpair, void iov, size_t iov_size,
	+ int mode);
	+
	+#endif /* !_VMCI_KERNEL_API_1_H_ */
	Index: sys/dev/vmware/vmci/vmci_kernel_api_2.h
	===================================================================
	--- /dev/null
	+++ sys/dev/vmware/vmci/vmci_kernel_api_2.h
	@@ -0,0 +1,32 @@
	+/*-
	+ * Copyright (c) 2018 VMware, Inc. All Rights Reserved.
	+ *
	+ * SPDX-License-Identifier: (BSD-2-Clause AND GPL-2.0)
	+ */
	+
	+/* Kernel API (v2) exported from the VMCI guest driver. */
	+
	+#ifndef _VMCI_KERNEL_API_2_H_
	+#define _VMCI_KERNEL_API_2_H_
	+
	+#include "vmci_kernel_api_1.h"
	+
	+/* Define version 2. */
	+
	+#undef VMCI_KERNEL_API_VERSION
	+#define VMCI_KERNEL_API_VERSION_2 2
	+#define VMCI_KERNEL_API_VERSION VMCI_KERNEL_API_VERSION_2
	+
	+/* VMCI Doorbell API. */
	+#define VMCI_FLAG_DELAYED_CB 0x01
	+
	+typedef void (vmci_callback)(void client_data);
	+
	+int vmci_doorbell_create(struct vmci_handle *handle, uint32_t flags,
	+ vmci_privilege_flags priv_flags, vmci_callback notify_cb,
	+ void *client_data);
	+int vmci_doorbell_destroy(struct vmci_handle handle);
	+int vmci_doorbell_notify(struct vmci_handle handle,
	+ vmci_privilege_flags priv_flags);
	+
	+#endif /* !_VMCI_KERNEL_API_2_H_ */
	Index: sys/dev/vmware/vmci/vmci_kernel_defs.h
	===================================================================
	--- /dev/null
	+++ sys/dev/vmware/vmci/vmci_kernel_defs.h
	@@ -0,0 +1,30 @@
	+/*-
	+ * Copyright (c) 2018 VMware, Inc. All Rights Reserved.
	+ *
	+ * SPDX-License-Identifier: (BSD-2-Clause AND GPL-2.0)
	+ */
	+
	+/* Some common utilities used by the VMCI kernel module. */
	+
	+#ifndef _VMCI_KERNEL_DEFS_H_
	+#define _VMCI_KERNEL_DEFS_H_
	+
	+#include <sys/param.h>
	+#include <sys/systm.h>
	+
	+typedef uint32_t PPN;
	+
	+#define ASSERT(cond) KASSERT(cond, (""))
	+#define ASSERT_ON_COMPILE(e) _Static_assert(e, #e);
	+
	+#define LIKELY(_exp) __builtin_expect(!!(_exp), 1)
	+#define UNLIKELY(_exp) __builtin_expect((_exp), 0)
	+
	+#define CONST64U(c) c##uL
	+
	+#define ARRAYSIZE(a) (sizeof(a) / sizeof(*(a)))
	+
	+#define ROUNDUP(x, y) (((x) + (y) - 1) / (y) * (y))
	+#define CEILING(x, y) (((x) + (y) - 1) / (y))
	+
	+#endif /* !_VMCI_KERNEL_DEFS_H_ */
	Index: sys/dev/vmware/vmci/vmci_kernel_if.h
	===================================================================
	--- /dev/null
	+++ sys/dev/vmware/vmci/vmci_kernel_if.h
	@@ -0,0 +1,92 @@
	+/*-
	+ * Copyright (c) 2018 VMware, Inc. All Rights Reserved.
	+ *
	+ * SPDX-License-Identifier: (BSD-2-Clause AND GPL-2.0)
	+ */
	+
	+/* This file defines helper functions */
	+
	+#ifndef _VMCI_KERNEL_IF_H_
	+#define _VMCI_KERNEL_IF_H_
	+
	+#include <sys/param.h>
	+#include <sys/lock.h>
	+#include <sys/mutex.h>
	+#include <sys/queue.h>
	+#include <sys/sema.h>
	+
	+#include "vmci_defs.h"
	+
	+#define VMCI_MEMORY_NORMAL 0x0
	+#define VMCI_MEMORY_ATOMIC 0x1
	+
	+#define vmci_list(_l) LIST_HEAD(, _l)
	+#define vmci_list_item(_l) LIST_ENTRY(_l)
	+#define vmci_list_init(_l) LIST_INIT(_l)
	+#define vmci_list_empty(_l) LIST_EMPTY(_l)
	+#define vmci_list_first(_l) LIST_FIRST(_l)
	+#define vmci_list_next(e, f) LIST_NEXT(e, f)
	+#define vmci_list_insert(_l, _e, n) LIST_INSERT_HEAD(_l, _e, n)
	+#define vmci_list_remove(_e, n) LIST_REMOVE(_e, n)
	+#define vmci_list_scan(v, _l, n) LIST_FOREACH(v, _l, n)
	+#define vmci_list_scan_safe(_e, _l, n, t) \
	+ LIST_FOREACH_SAFE(_e, _l, n, t)
	+#define vmci_list_swap(_l1, _l2, t, f) LIST_SWAP(_l1, _l2, t, f)
	+
	+typedef unsigned short int vmci_io_port;
	+typedef int vmci_io_handle;
	+
	+void vmci_read_port_bytes(vmci_io_handle handle, vmci_io_port port,
	+ uint8_t *buffer, size_t buffer_length);
	+
	+typedef struct mtx vmci_lock;
	+int vmci_init_lock(vmci_lock lock, char name);
	+void vmci_cleanup_lock(vmci_lock *lock);
	+void vmci_grab_lock(vmci_lock *lock);
	+void vmci_release_lock(vmci_lock *lock);
	+void vmci_grab_lock_bh(vmci_lock *lock);
	+void vmci_release_lock_bh(vmci_lock *lock);
	+
	+void *vmci_alloc_kernel_mem(size_t size, int flags);
	+void vmci_free_kernel_mem(void *ptr, size_t size);
	+
	+typedef struct sema vmci_event;
	+typedef int (vmci_event_release_cb)(void client_data);
	+void vmci_create_event(vmci_event *event);
	+void vmci_destroy_event(vmci_event *event);
	+void vmci_signal_event(vmci_event *event);
	+void vmci_wait_on_event(vmci_event *event, vmci_event_release_cb release_cb,
	+ void *client_data);
	+bool vmci_wait_on_event_interruptible(vmci_event *event,
	+ vmci_event_release_cb release_cb, void *client_data);
	+
	+typedef void (vmci_work_fn)(void *data);
	+bool vmci_can_schedule_delayed_work(void);
	+int vmci_schedule_delayed_work(vmci_work_fn work_fn, void data);
	+void vmci_delayed_work_cb(void *context, int data);
	+
	+typedef struct mtx vmci_mutex;
	+int vmci_mutex_init(vmci_mutex mutex, char name);
	+void vmci_mutex_destroy(vmci_mutex *mutex);
	+void vmci_mutex_acquire(vmci_mutex *mutex);
	+void vmci_mutex_release(vmci_mutex *mutex);
	+
	+void *vmci_alloc_queue(uint64_t size, uint32_t flags);
	+void vmci_free_queue(void *q, uint64_t size);
	+
	+typedef PPN *vmci_ppn_list;
	+struct ppn_set {
	+ uint64_t num_produce_pages;
	+ uint64_t num_consume_pages;
	+ vmci_ppn_list produce_ppns;
	+ vmci_ppn_list consume_ppns;
	+ bool initialized;
	+};
	+
	+int vmci_alloc_ppn_set(void *produce_q, uint64_t num_produce_pages,
	+ void *consume_q, uint64_t num_consume_pages,
	+ struct ppn_set *ppn_set);
	+void vmci_free_ppn_set(struct ppn_set *ppn_set);
	+int vmci_populate_ppn_list(uint8_t call_buf, const struct ppn_set ppnset);
	+
	+#endif /* !_VMCI_KERNEL_IF_H_ */
	Index: sys/dev/vmware/vmci/vmci_kernel_if.c
	===================================================================
	--- /dev/null
	+++ sys/dev/vmware/vmci/vmci_kernel_if.c
	@@ -0,0 +1,1066 @@
	+/*-
	+ * Copyright (c) 2018 VMware, Inc. All Rights Reserved.
	+ *
	+ * SPDX-License-Identifier: (BSD-2-Clause AND GPL-2.0)
	+ */
	+
	+/* This file implements defines and helper functions. */
	+
	+#include <sys/malloc.h>
	+#include <sys/proc.h>
	+#include <sys/uio.h>
	+
	+#include <machine/bus.h>
	+
	+#include "vmci.h"
	+#include "vmci_defs.h"
	+#include "vmci_kernel_defs.h"
	+#include "vmci_kernel_if.h"
	+#include "vmci_queue.h"
	+
	+struct vmci_queue_kernel_if {
	+ size_t num_pages; /* Num pages incl. header. */
	+ struct vmci_dma_alloc dmas; / For dma alloc. */
	+};
	+
	+/*
	+ *------------------------------------------------------------------------------
	+ *
	+ * vmci_init_lock
	+ *
	+ * Initializes the lock. Must be called before use.
	+ *
	+ * Results:
	+ * Always VMCI_SUCCESS.
	+ *
	+ * Side effects:
	+ * Thread can block.
	+ *
	+ *------------------------------------------------------------------------------
	+ */
	+
	+int
	+vmci_init_lock(vmci_lock lock, char name)
	+{
	+
	+ mtx_init(lock, name, NULL, MTX_DEF \| MTX_NOWITNESS);
	+ return (VMCI_SUCCESS);
	+}
	+
	+/*
	+ *------------------------------------------------------------------------------
	+ *
	+ * vmci_cleanup_lock
	+ *
	+ * Cleanup the lock. Must be called before deallocating lock.
	+ *
	+ * Results:
	+ * None
	+ *
	+ * Side effects:
	+ * Deletes kernel lock state
	+ *
	+ *------------------------------------------------------------------------------
	+ */
	+
	+void
	+vmci_cleanup_lock(vmci_lock *lock)
	+{
	+
	+ mtx_destroy(lock);
	+}
	+
	+/*
	+ *------------------------------------------------------------------------------
	+ *
	+ * vmci_grab_lock
	+ *
	+ * Grabs the given lock.
	+ *
	+ * Results:
	+ * None
	+ *
	+ * Side effects:
	+ * Thread can block.
	+ *
	+ *------------------------------------------------------------------------------
	+ */
	+
	+void
	+vmci_grab_lock(vmci_lock *lock)
	+{
	+
	+ mtx_lock(lock);
	+}
	+
	+/*
	+ *------------------------------------------------------------------------------
	+ *
	+ * vmci_release_lock
	+ *
	+ * Releases the given lock.
	+ *
	+ * Results:
	+ * None
	+ *
	+ * Side effects:
	+ * A thread blocked on this lock may wake up.
	+ *
	+ *------------------------------------------------------------------------------
	+ */
	+
	+void
	+vmci_release_lock(vmci_lock *lock)
	+{
	+
	+ mtx_unlock(lock);
	+}
	+
	+/*
	+ *------------------------------------------------------------------------------
	+ *
	+ * vmci_grab_lock_bh
	+ *
	+ * Grabs the given lock.
	+ *
	+ * Results:
	+ * None
	+ *
	+ * Side effects:
	+ * None.
	+ *
	+ *------------------------------------------------------------------------------
	+ */
	+
	+void
	+vmci_grab_lock_bh(vmci_lock *lock)
	+{
	+
	+ mtx_lock(lock);
	+}
	+
	+/*
	+ *------------------------------------------------------------------------------
	+ *
	+ * vmci_release_lock_bh
	+ *
	+ * Releases the given lock.
	+ *
	+ * Results:
	+ * None
	+ *
	+ * Side effects:
	+ * None.
	+ *
	+ *------------------------------------------------------------------------------
	+ */
	+
	+void
	+vmci_release_lock_bh(vmci_lock *lock)
	+{
	+
	+ mtx_unlock(lock);
	+}
	+
	+/*
	+ *------------------------------------------------------------------------------
	+ *
	+ * vmci_alloc_kernel_mem
	+ *
	+ * Allocate physically contiguous memory for the VMCI driver.
	+ *
	+ * Results:
	+ * The address allocated or NULL on error.
	+ *
	+ *
	+ * Side effects:
	+ * Memory may be allocated.
	+ *
	+ *------------------------------------------------------------------------------
	+ */
	+
	+void *
	+vmci_alloc_kernel_mem(size_t size, int flags)
	+{
	+ void *ptr;
	+
	+ if ((flags & VMCI_MEMORY_ATOMIC) != 0)
	+ ptr = contigmalloc(size, M_DEVBUF, M_NOWAIT, 0, 0xFFFFFFFF,
	+ 8, 1024 * 1024);
	+ else
	+ ptr = contigmalloc(size, M_DEVBUF, M_WAITOK, 0, 0xFFFFFFFF,
	+ 8, 1024 * 1024);
	+
	+ return (ptr);
	+}
	+
	+/*
	+ *------------------------------------------------------------------------------
	+ *
	+ * vmci_free_kernel_mem
	+ *
	+ * Free kernel memory allocated for the VMCI driver.
	+ *
	+ * Results:
	+ * None.
	+ *
	+ * Side effects:
	+ * Memory is freed.
	+ *
	+ *------------------------------------------------------------------------------
	+ */
	+
	+void
	+vmci_free_kernel_mem(void *ptr, size_t size)
	+{
	+
	+ contigfree(ptr, size, M_DEVBUF);
	+}
	+
	+/*
	+ *------------------------------------------------------------------------------
	+ *
	+ * vmci_can_schedule_delayed_work --
	+ *
	+ * Checks to see if the given platform supports delayed work callbacks.
	+ *
	+ * Results:
	+ * true if it does. false otherwise.
	+ *
	+ * Side effects:
	+ * None.
	+ *
	+ *------------------------------------------------------------------------------
	+ */
	+
	+bool
	+vmci_can_schedule_delayed_work(void)
	+{
	+
	+ return (true);
	+}
	+
	+/*
	+ *------------------------------------------------------------------------------
	+ *
	+ * vmci_schedule_delayed_work --
	+ *
	+ * Schedule the specified callback.
	+ *
	+ * Results:
	+ * Zero on success, error code otherwise.
	+ *
	+ * Side effects:
	+ * None.
	+ *
	+ *------------------------------------------------------------------------------
	+ */
	+
	+int
	+vmci_schedule_delayed_work(vmci_work_fn work_fn, void data)
	+{
	+
	+ return (vmci_schedule_delayed_work_fn(work_fn, data));
	+}
	+
	+/*
	+ *------------------------------------------------------------------------------
	+ *
	+ * vmci_create_event --
	+ *
	+ * Results:
	+ * None.
	+ *
	+ * Side effects:
	+ * None.
	+ *
	+ *------------------------------------------------------------------------------
	+ */
	+
	+void
	+vmci_create_event(vmci_event *event)
	+{
	+
	+ sema_init(event, 0, "vmci_event");
	+}
	+
	+/*
	+ *------------------------------------------------------------------------------
	+ *
	+ * vmci_destroy_event --
	+ *
	+ * Results:
	+ * None.
	+ *
	+ * Side effects:
	+ * None.
	+ *
	+ *------------------------------------------------------------------------------
	+ */
	+
	+void
	+vmci_destroy_event(vmci_event *event)
	+{
	+
	+ if (mtx_owned(&event->sema_mtx))
	+ sema_destroy(event);
	+}
	+
	+/*
	+ *------------------------------------------------------------------------------
	+ *
	+ * vmci_signal_event --
	+ *
	+ * Results:
	+ * None.
	+ *
	+ * Side effects:
	+ * None.
	+ *
	+ *------------------------------------------------------------------------------
	+ */
	+
	+void
	+vmci_signal_event(vmci_event *event)
	+{
	+
	+ sema_post(event);
	+}
	+
	+/*
	+ *------------------------------------------------------------------------------
	+ *
	+ * vmci_wait_on_event --
	+ *
	+ * Results:
	+ * None.
	+ *
	+ * Side effects:
	+ * None.
	+ *
	+ *------------------------------------------------------------------------------
	+ */
	+
	+void
	+vmci_wait_on_event(vmci_event *event, vmci_event_release_cb release_cb,
	+ void *client_data)
	+{
	+
	+ release_cb(client_data);
	+ sema_wait(event);
	+}
	+
	+/*
	+ *------------------------------------------------------------------------------
	+ *
	+ * vmci_mutex_init --
	+ *
	+ * Initializes the mutex. Must be called before use.
	+ *
	+ * Results:
	+ * Success.
	+ *
	+ * Side effects:
	+ * None.
	+ *
	+ *------------------------------------------------------------------------------
	+ */
	+
	+int
	+vmci_mutex_init(vmci_mutex mutex, char name)
	+{
	+
	+ mtx_init(mutex, name, NULL, MTX_DEF \| MTX_NOWITNESS);
	+ return (VMCI_SUCCESS);
	+}
	+
	+/*
	+ *------------------------------------------------------------------------------
	+ *
	+ * vmci_mutex_destroy --
	+ *
	+ * Destroys the mutex.
	+ *
	+ * Results:
	+ * None.
	+ *
	+ * Side effects:
	+ * None.
	+ *
	+ *------------------------------------------------------------------------------
	+ */
	+
	+void
	+vmci_mutex_destroy(vmci_mutex *mutex)
	+{
	+
	+ mtx_destroy(mutex);
	+}
	+
	+/*
	+ *------------------------------------------------------------------------------
	+ *
	+ * vmci_mutex_acquire --
	+ *
	+ * Acquires the mutex.
	+ *
	+ * Results:
	+ * None.
	+ *
	+ * Side effects:
	+ * Thread may block.
	+ *
	+ *------------------------------------------------------------------------------
	+ */
	+
	+void
	+vmci_mutex_acquire(vmci_mutex *mutex)
	+{
	+
	+ mtx_lock(mutex);
	+}
	+
	+/*
	+ *------------------------------------------------------------------------------
	+ *
	+ * vmci_mutex_release --
	+ *
	+ * Releases the mutex.
	+ *
	+ * Results:
	+ * None.
	+ *
	+ * Side effects:
	+ * May wake up the thread blocking on this mutex.
	+ *
	+ *------------------------------------------------------------------------------
	+ */
	+
	+void
	+vmci_mutex_release(vmci_mutex *mutex)
	+{
	+
	+ mtx_unlock(mutex);
	+}
	+
	+/*
	+ *------------------------------------------------------------------------------
	+ *
	+ * vmci_alloc_queue --
	+ *
	+ * Allocates kernel queue pages of specified size with IOMMU mappings, plus
	+ * space for the queue structure/kernel interface and the queue header.
	+ *
	+ * Results:
	+ * Pointer to the queue on success, NULL otherwise.
	+ *
	+ * Side effects:
	+ * Memory is allocated.
	+ *
	+ *------------------------------------------------------------------------------
	+ */
	+
	+void *
	+vmci_alloc_queue(uint64_t size, uint32_t flags)
	+{
	+ struct vmci_queue *queue;
	+ size_t i;
	+ const size_t num_pages = CEILING(size, PAGE_SIZE) + 1;
	+ const size_t dmas_size = num_pages * sizeof(struct vmci_dma_alloc);
	+ const size_t queue_size =
	+ sizeof(queue) + sizeof((queue->kernel_if)) + dmas_size;
	+
	+ /* Size should be enforced by vmci_qpair_alloc(), double-check here. */
	+ if (size > VMCI_MAX_GUEST_QP_MEMORY) {
	+ ASSERT(false);
	+ return (NULL);
	+ }
	+
	+ queue = malloc(queue_size, M_DEVBUF, M_NOWAIT);
	+ if (!queue)
	+ return (NULL);
	+
	+ queue->q_header = NULL;
	+ queue->saved_header = NULL;
	+ queue->kernel_if = (struct vmci_queue_kernel_if *)(queue + 1);
	+ queue->kernel_if->num_pages = num_pages;
	+ queue->kernel_if->dmas = (struct vmci_dma_alloc *)(queue->kernel_if +
	+ 1);
	+ for (i = 0; i < num_pages; i++) {
	+ vmci_dma_malloc(PAGE_SIZE, 1, &queue->kernel_if->dmas[i]);
	+ if (!queue->kernel_if->dmas[i].dma_vaddr) {
	+ /* Size excl. the header. */
	+ vmci_free_queue(queue, i * PAGE_SIZE);
	+ return (NULL);
	+ }
	+ }
	+
	+ /* Queue header is the first page. */
	+ queue->q_header = (void *)queue->kernel_if->dmas[0].dma_vaddr;
	+
	+ return ((void *)queue);
	+}
	+
	+/*
	+ *------------------------------------------------------------------------------
	+ *
	+ * vmci_free_queue --
	+ *
	+ * Frees kernel VA space for a given queue and its queue header, and frees
	+ * physical data pages.
	+ *
	+ * Results:
	+ * None.
	+ *
	+ * Side effects:
	+ * Memory is freed.
	+ *
	+ *------------------------------------------------------------------------------
	+ */
	+
	+void
	+vmci_free_queue(void *q, uint64_t size)
	+{
	+ struct vmci_queue *queue = q;
	+
	+ if (queue) {
	+ const size_t num_pages = CEILING(size, PAGE_SIZE) + 1;
	+ uint64_t i;
	+
	+ /* Given size doesn't include header, so add in a page here. */
	+ for (i = 0; i < num_pages; i++)
	+ vmci_dma_free(&queue->kernel_if->dmas[i]);
	+ free(queue, M_DEVBUF);
	+ }
	+}
	+
	+/*
	+ *------------------------------------------------------------------------------
	+ *
	+ * vmci_alloc_ppn_set --
	+ *
	+ * Allocates two list of PPNs --- one for the pages in the produce queue,
	+ * and the other for the pages in the consume queue. Intializes the list of
	+ * PPNs with the page frame numbers of the KVA for the two queues (and the
	+ * queue headers).
	+ *
	+ * Results:
	+ * Success or failure.
	+ *
	+ * Side effects:
	+ * Memory may be allocated.
	+ *
	+ *-----------------------------------------------------------------------------
	+ */
	+
	+int
	+vmci_alloc_ppn_set(void prod_q, uint64_t num_produce_pages, void cons_q,
	+ uint64_t num_consume_pages, struct ppn_set *ppn_set)
	+{
	+ struct vmci_queue *consume_q = cons_q;
	+ struct vmci_queue *produce_q = prod_q;
	+ vmci_ppn_list consume_ppns;
	+ vmci_ppn_list produce_ppns;
	+ uint64_t i;
	+
	+ if (!produce_q \|\| !num_produce_pages \|\| !consume_q \|\|
	+ !num_consume_pages \|\| !ppn_set)
	+ return (VMCI_ERROR_INVALID_ARGS);
	+
	+ if (ppn_set->initialized)
	+ return (VMCI_ERROR_ALREADY_EXISTS);
	+
	+ produce_ppns =
	+ vmci_alloc_kernel_mem(num_produce_pages * sizeof(*produce_ppns),
	+ VMCI_MEMORY_NORMAL);
	+ if (!produce_ppns)
	+ return (VMCI_ERROR_NO_MEM);
	+
	+ consume_ppns =
	+ vmci_alloc_kernel_mem(num_consume_pages * sizeof(*consume_ppns),
	+ VMCI_MEMORY_NORMAL);
	+ if (!consume_ppns) {
	+ vmci_free_kernel_mem(produce_ppns,
	+ num_produce_pages * sizeof(*produce_ppns));
	+ return (VMCI_ERROR_NO_MEM);
	+ }
	+
	+ for (i = 0; i < num_produce_pages; i++) {
	+ unsigned long pfn;
	+
	+ produce_ppns[i] =
	+ pfn = produce_q->kernel_if->dmas[i].dma_paddr >> PAGE_SHIFT;
	+
	+ /*
	+ * Fail allocation if PFN isn't supported by hypervisor.
	+ */
	+
	+ if (sizeof(pfn) >
	+ sizeof(*produce_ppns) && pfn != produce_ppns[i])
	+ goto ppn_error;
	+ }
	+ for (i = 0; i < num_consume_pages; i++) {
	+ unsigned long pfn;
	+
	+ consume_ppns[i] =
	+ pfn = consume_q->kernel_if->dmas[i].dma_paddr >> PAGE_SHIFT;
	+
	+ /*
	+ * Fail allocation if PFN isn't supported by hypervisor.
	+ */
	+
	+ if (sizeof(pfn) >
	+ sizeof(*consume_ppns) && pfn != consume_ppns[i])
	+ goto ppn_error;
	+
	+ }
	+
	+ ppn_set->num_produce_pages = num_produce_pages;
	+ ppn_set->num_consume_pages = num_consume_pages;
	+ ppn_set->produce_ppns = produce_ppns;
	+ ppn_set->consume_ppns = consume_ppns;
	+ ppn_set->initialized = true;
	+ return (VMCI_SUCCESS);
	+
	+ppn_error:
	+ vmci_free_kernel_mem(produce_ppns, num_produce_pages *
	+ sizeof(*produce_ppns));
	+ vmci_free_kernel_mem(consume_ppns, num_consume_pages *
	+ sizeof(*consume_ppns));
	+ return (VMCI_ERROR_INVALID_ARGS);
	+}
	+
	+/*
	+ *------------------------------------------------------------------------------
	+ *
	+ * vmci_free_ppn_set --
	+ *
	+ * Frees the two list of PPNs for a queue pair.
	+ *
	+ * Results:
	+ * None.
	+ *
	+ * Side effects:
	+ * None.
	+ *
	+ *------------------------------------------------------------------------------
	+ */
	+
	+void
	+vmci_free_ppn_set(struct ppn_set *ppn_set)
	+{
	+
	+ ASSERT(ppn_set);
	+ if (ppn_set->initialized) {
	+ /* Do not call these functions on NULL inputs. */
	+ ASSERT(ppn_set->produce_ppns && ppn_set->consume_ppns);
	+ vmci_free_kernel_mem(ppn_set->produce_ppns,
	+ ppn_set->num_produce_pages *
	+ sizeof(*ppn_set->produce_ppns));
	+ vmci_free_kernel_mem(ppn_set->consume_ppns,
	+ ppn_set->num_consume_pages *
	+ sizeof(*ppn_set->consume_ppns));
	+ }
	+ memset(ppn_set, 0, sizeof(*ppn_set));
	+}
	+
	+/*
	+ *------------------------------------------------------------------------------
	+ *
	+ * vmci_populate_ppn_list --
	+ *
	+ * Populates the list of PPNs in the hypercall structure with the PPNS
	+ * of the produce queue and the consume queue.
	+ *
	+ * Results:
	+ * VMCI_SUCCESS.
	+ *
	+ * Side effects:
	+ * None.
	+ *
	+ *------------------------------------------------------------------------------
	+ */
	+
	+int
	+vmci_populate_ppn_list(uint8_t call_buf, const struct ppn_set ppn_set)
	+{
	+
	+ ASSERT(call_buf && ppn_set && ppn_set->initialized);
	+ memcpy(call_buf, ppn_set->produce_ppns,
	+ ppn_set->num_produce_pages * sizeof(*ppn_set->produce_ppns));
	+ memcpy(call_buf + ppn_set->num_produce_pages *
	+ sizeof(*ppn_set->produce_ppns), ppn_set->consume_ppns,
	+ ppn_set->num_consume_pages * sizeof(*ppn_set->consume_ppns));
	+
	+ return (VMCI_SUCCESS);
	+}
	+
	+/*
	+ *------------------------------------------------------------------------------
	+ *
	+ * vmci_memcpy_{to,from}iovec --
	+ *
	+ * These helper routines will copy the specified bytes to/from memory that's
	+ * specified as a struct iovec. The routines can not verify the correctness
	+ * of the struct iovec's contents.
	+ *
	+ * Results:
	+ * None.
	+ *
	+ * Side effects:
	+ * None.
	+ *
	+ *------------------------------------------------------------------------------
	+ */
	+
	+static inline void
	+vmci_memcpy_toiovec(struct iovec iov, uint8_t src, size_t len)
	+{
	+
	+ while (len > 0) {
	+ if (iov->iov_len) {
	+ size_t to_copy = MIN(iov->iov_len, len);
	+ memcpy(iov->iov_base, src, to_copy);
	+ src += to_copy;
	+ len -= to_copy;
	+ iov->iov_base = (void *)((uintptr_t) iov->iov_base +
	+ to_copy);
	+ iov->iov_len -= to_copy;
	+ }
	+ iov++;
	+ }
	+}
	+
	+static inline void
	+vmci_memcpy_fromiovec(uint8_t dst, struct iovec iov, size_t len)
	+{
	+
	+ while (len > 0) {
	+ if (iov->iov_len) {
	+ size_t to_copy = MIN(iov->iov_len, len);
	+ memcpy(dst, iov->iov_base, to_copy);
	+ dst += to_copy;
	+ len -= to_copy;
	+ iov->iov_base = (void *)((uintptr_t) iov->iov_base +
	+ to_copy);
	+ iov->iov_len -= to_copy;
	+ }
	+ iov++;
	+ }
	+}
	+
	+/*
	+ *------------------------------------------------------------------------------
	+ *
	+ * __vmci_memcpy_to_queue --
	+ *
	+ * Copies from a given buffer or iovector to a VMCI Queue. Assumes that
	+ * offset + size does not wrap around in the queue.
	+ *
	+ * Results:
	+ * Zero on success, negative error code on failure.
	+ *
	+ * Side effects:
	+ * None.
	+ *
	+ *------------------------------------------------------------------------------
	+ */
	+
	+#pragma GCC diagnostic ignored "-Wcast-qual"
	+static int
	+__vmci_memcpy_to_queue(struct vmci_queue *queue, uint64_t queue_offset,
	+ const void *src, size_t size, bool is_iovec)
	+{
	+ struct vmci_queue_kernel_if *kernel_if = queue->kernel_if;
	+ size_t bytes_copied = 0;
	+
	+ while (bytes_copied < size) {
	+ const uint64_t page_index =
	+ (queue_offset + bytes_copied) / PAGE_SIZE;
	+ const size_t page_offset =
	+ (queue_offset + bytes_copied) & (PAGE_SIZE - 1);
	+ void *va;
	+ size_t to_copy;
	+
	+ /* Skip header. */
	+ va = (void *)kernel_if->dmas[page_index + 1].dma_vaddr;
	+
	+ ASSERT(va);
	+ /*
	+ * Fill up the page if we have enough payload, or else
	+ * copy the remaining bytes.
	+ */
	+ to_copy = MIN(PAGE_SIZE - page_offset, size - bytes_copied);
	+
	+ if (is_iovec) {
	+ struct iovec iov = (struct iovec )src;
	+
	+ /* The iovec will track bytes_copied internally. */
	+ vmci_memcpy_fromiovec((uint8_t *)va + page_offset,
	+ iov, to_copy);
	+ } else
	+ memcpy((uint8_t *)va + page_offset,
	+ (uint8_t *)src + bytes_copied, to_copy);
	+ bytes_copied += to_copy;
	+ }
	+
	+ return (VMCI_SUCCESS);
	+}
	+
	+/*
	+ *------------------------------------------------------------------------------
	+ *
	+ * __vmci_memcpy_from_queue --
	+ *
	+ * Copies to a given buffer or iovector from a VMCI Queue. Assumes that
	+ * offset + size does not wrap around in the queue.
	+ *
	+ * Results:
	+ * Zero on success, negative error code on failure.
	+ *
	+ * Side effects:
	+ * None.
	+ *
	+ *------------------------------------------------------------------------------
	+ */
	+
	+static int
	+__vmci_memcpy_from_queue(void dest, const struct vmci_queue queue,
	+ uint64_t queue_offset, size_t size, bool is_iovec)
	+{
	+ struct vmci_queue_kernel_if *kernel_if = queue->kernel_if;
	+ size_t bytes_copied = 0;
	+
	+ while (bytes_copied < size) {
	+ const uint64_t page_index =
	+ (queue_offset + bytes_copied) / PAGE_SIZE;
	+ const size_t page_offset =
	+ (queue_offset + bytes_copied) & (PAGE_SIZE - 1);
	+ void *va;
	+ size_t to_copy;
	+
	+ /* Skip header. */
	+ va = (void *)kernel_if->dmas[page_index + 1].dma_vaddr;
	+
	+ ASSERT(va);
	+ /*
	+ * Fill up the page if we have enough payload, or else
	+ * copy the remaining bytes.
	+ */
	+ to_copy = MIN(PAGE_SIZE - page_offset, size - bytes_copied);
	+
	+ if (is_iovec) {
	+ struct iovec iov = (struct iovec )dest;
	+
	+ /* The iovec will track bytesCopied internally. */
	+ vmci_memcpy_toiovec(iov, (uint8_t *)va +
	+ page_offset, to_copy);
	+ } else
	+ memcpy((uint8_t *)dest + bytes_copied,
	+ (uint8_t *)va + page_offset, to_copy);
	+
	+ bytes_copied += to_copy;
	+ }
	+
	+ return (VMCI_SUCCESS);
	+}
	+
	+/*
	+ *------------------------------------------------------------------------------
	+ *
	+ * vmci_memcpy_to_queue --
	+ *
	+ * Copies from a given buffer to a VMCI Queue.
	+ *
	+ * Results:
	+ * Zero on success, negative error code on failure.
	+ *
	+ * Side effects:
	+ * None.
	+ *
	+ *------------------------------------------------------------------------------
	+ */
	+
	+int
	+vmci_memcpy_to_queue(struct vmci_queue *queue, uint64_t queue_offset,
	+ const void *src, size_t src_offset, size_t size, int buf_type,
	+ bool can_block)
	+{
	+
	+ ASSERT(can_block);
	+
	+ return (__vmci_memcpy_to_queue(queue, queue_offset,
	+ (uint8_t *)src + src_offset, size, false));
	+}
	+
	+/*
	+ *------------------------------------------------------------------------------
	+ *
	+ * vmci_memcpy_from_queue --
	+ *
	+ * Copies to a given buffer from a VMCI Queue.
	+ *
	+ * Results:
	+ * Zero on success, negative error code on failure.
	+ *
	+ * Side effects:
	+ * None.
	+ *
	+ *------------------------------------------------------------------------------
	+ */
	+
	+int
	+vmci_memcpy_from_queue(void *dest, size_t dest_offset,
	+ const struct vmci_queue *queue, uint64_t queue_offset, size_t size,
	+ int buf_type, bool can_block)
	+{
	+
	+ ASSERT(can_block);
	+
	+ return (__vmci_memcpy_from_queue((uint8_t *)dest + dest_offset,
	+ queue, queue_offset, size, false));
	+}
	+
	+/*
	+ *------------------------------------------------------------------------------
	+ *
	+ * vmci_memcpy_to_queue_local --
	+ *
	+ * Copies from a given buffer to a local VMCI queue. This is the
	+ * same as a regular copy.
	+ *
	+ * Results:
	+ * Zero on success, negative error code on failure.
	+ *
	+ * Side effects:
	+ * None.
	+ *
	+ *------------------------------------------------------------------------------
	+ */
	+
	+int
	+vmci_memcpy_to_queue_local(struct vmci_queue *queue, uint64_t queue_offset,
	+ const void *src, size_t src_offset, size_t size, int buf_type,
	+ bool can_block)
	+{
	+
	+ ASSERT(can_block);
	+
	+ return (__vmci_memcpy_to_queue(queue, queue_offset,
	+ (uint8_t *)src + src_offset, size, false));
	+}
	+
	+/*
	+ *------------------------------------------------------------------------------
	+ *
	+ * vmci_memcpy_from_queue_local --
	+ *
	+ * Copies to a given buffer from a VMCI Queue.
	+ *
	+ * Results:
	+ * Zero on success, negative error code on failure.
	+ *
	+ * Side effects:
	+ * None.
	+ *
	+ *------------------------------------------------------------------------------
	+ */
	+
	+int
	+vmci_memcpy_from_queue_local(void *dest, size_t dest_offset,
	+ const struct vmci_queue *queue, uint64_t queue_offset, size_t size,
	+ int buf_type, bool can_block)
	+{
	+
	+ ASSERT(can_block);
	+
	+ return (__vmci_memcpy_from_queue((uint8_t *)dest + dest_offset,
	+ queue, queue_offset, size, false));
	+}
	+
	+/*------------------------------------------------------------------------------
	+ *
	+ * vmci_memcpy_to_queue_v --
	+ *
	+ * Copies from a given iovec from a VMCI Queue.
	+ *
	+ * Results:
	+ * Zero on success, negative error code on failure.
	+ *
	+ * Side effects:
	+ * None.
	+ *
	+ *------------------------------------------------------------------------------
	+ */
	+
	+int
	+vmci_memcpy_to_queue_v(struct vmci_queue *queue, uint64_t queue_offset,
	+ const void *src, size_t src_offset, size_t size, int buf_type,
	+ bool can_block)
	+{
	+
	+ ASSERT(can_block);
	+
	+ /*
	+ * We ignore src_offset because src is really a struct iovec * and will
	+ * maintain offset internally.
	+ */
	+ return (__vmci_memcpy_to_queue(queue, queue_offset, src, size,
	+ true));
	+}
	+
	+/*
	+ *------------------------------------------------------------------------------
	+ *
	+ * vmci_memcpy_from_queue_v --
	+ *
	+ * Copies to a given iovec from a VMCI Queue.
	+ *
	+ * Results:
	+ * Zero on success, negative error code on failure.
	+ *
	+ * Side effects:
	+ * None.
	+ *
	+ *------------------------------------------------------------------------------
	+ */
	+
	+int
	+vmci_memcpy_from_queue_v(void *dest, size_t dest_offset,
	+ const struct vmci_queue *queue, uint64_t queue_offset, size_t size,
	+ int buf_type, bool can_block)
	+{
	+
	+ ASSERT(can_block);
	+
	+ /*
	+ * We ignore dest_offset because dest is really a struct iovec * and
	+ * will maintain offset internally.
	+ */
	+ return (__vmci_memcpy_from_queue(dest, queue, queue_offset, size,
	+ true));
	+}
	+
	+/*
	+ *------------------------------------------------------------------------------
	+ *
	+ * vmci_read_port_bytes --
	+ *
	+ * Copy memory from an I/O port to kernel memory.
	+ *
	+ * Results:
	+ * No results.
	+ *
	+ * Side effects:
	+ * None.
	+ *
	+ *------------------------------------------------------------------------------
	+ */
	+
	+void
	+vmci_read_port_bytes(vmci_io_handle handle, vmci_io_port port, uint8_t *buffer,
	+ size_t buffer_length)
	+{
	+
	+ insb(port, buffer, buffer_length);
	+}
	Index: sys/dev/vmware/vmci/vmci_qpair.c
	===================================================================
	--- /dev/null
	+++ sys/dev/vmware/vmci/vmci_qpair.c
	@@ -0,0 +1,834 @@
	+/*-
	+ * Copyright (c) 2018 VMware, Inc. All Rights Reserved.
	+ *
	+ * SPDX-License-Identifier: (BSD-2-Clause AND GPL-2.0)
	+ */
	+
	+/* This file implements Queue accessor methods. */
	+
	+/*
	+ * vmci_qpair is an interface that hides the queue pair internals. Rather than
	+ * access each queue in a pair directly, operations are performed on the queue
	+ * as a whole. This is simpler and less error-prone, and allows for future
	+ * queue pair features to be added under the hood with no change to the client
	+ * code.
	+ */
	+
	+#include "vmci_kernel_api.h"
	+#include "vmci_kernel_defs.h"
	+#include "vmci_kernel_if.h"
	+#include "vmci_queue.h"
	+#include "vmci_queue_pair.h"
	+
	+/* This structure is opaque to the clients. */
	+struct vmci_qpair {
	+ struct vmci_handle handle;
	+ struct vmci_queue *produce_q;
	+ struct vmci_queue *consume_q;
	+ uint64_t produce_q_size;
	+ uint64_t consume_q_size;
	+ vmci_id peer;
	+ uint32_t flags;
	+ vmci_privilege_flags priv_flags;
	+ uint32_t blocked;
	+ vmci_event event;
	+};
	+
	+static void vmci_qpair_get_queue_headers(const struct vmci_qpair *qpair,
	+ struct vmci_queue_header **produce_q_header,
	+ struct vmci_queue_header **consume_q_header);
	+
	+/*
	+ *------------------------------------------------------------------------------
	+ *
	+ * vmci_queue_add_producer_tail --
	+ *
	+ * Helper routine to increment the Producer Tail.
	+ *
	+ * Results:
	+ * VMCI_ERROR_NOT_FOUND if the vmm_world registered with the queue cannot
	+ * be found. Otherwise VMCI_SUCCESS.
	+ *
	+ * Side effects:
	+ * None.
	+ *
	+ *------------------------------------------------------------------------------
	+ */
	+
	+static inline int
	+vmci_queue_add_producer_tail(struct vmci_queue *queue,
	+ size_t add, uint64_t queue_size)
	+{
	+
	+ vmci_queue_header_add_producer_tail(queue->q_header, add, queue_size);
	+ return (VMCI_SUCCESS);
	+}
	+
	+/*
	+ *------------------------------------------------------------------------------
	+ *
	+ * vmci_queue_add_consumer_head --
	+ *
	+ * Helper routine to increment the Consumer Head.
	+ *
	+ * Results:
	+ * VMCI_ERROR_NOT_FOUND if the vmm_world registered with the queue cannot
	+ * be found. Otherwise VMCI_SUCCESS.
	+ *
	+ * Side effects:
	+ * None.
	+ *
	+ *------------------------------------------------------------------------------
	+ */
	+
	+static inline int
	+vmci_queue_add_consumer_head(struct vmci_queue *queue,
	+ size_t add, uint64_t queue_size)
	+{
	+
	+ vmci_queue_header_add_consumer_head(queue->q_header, add, queue_size);
	+ return (VMCI_SUCCESS);
	+}
	+
	+/*
	+ *------------------------------------------------------------------------------
	+ *
	+ * vmci_qpair_get_queue_headers --
	+ *
	+ * Helper routine that will retrieve the produce and consume headers of a
	+ * given queue pair.
	+ *
	+ * Results:
	+ * VMCI_SUCCESS if either current or saved queue headers are found.
	+ * Appropriate error code otherwise.
	+ *
	+ * Side effects:
	+ * None.
	+ *
	+ *------------------------------------------------------------------------------
	+ */
	+
	+static void
	+vmci_qpair_get_queue_headers(const struct vmci_qpair *qpair,
	+ struct vmci_queue_header **produce_q_header,
	+ struct vmci_queue_header **consume_q_header)
	+{
	+
	+ ASSERT((qpair->produce_q != NULL) && (qpair->consume_q != NULL));
	+ *produce_q_header = qpair->produce_q->q_header;
	+ *consume_q_header = qpair->consume_q->q_header;
	+}
	+
	+/*
	+ *------------------------------------------------------------------------------
	+ *
	+ * vmci_qpair_alloc --
	+ *
	+ * This is the client interface for allocating the memory for a vmci_qpair
	+ * structure and then attaching to the underlying queue. If an error occurs
	+ * allocating the memory for the vmci_qpair structure, no attempt is made to
	+ * attach. If an error occurs attaching, then there's the vmci_qpair
	+ * structure is freed.
	+ *
	+ * Results:
	+ * An err, if < 0.
	+ *
	+ * Side effects:
	+ * None.
	+ *
	+ *------------------------------------------------------------------------------
	+ */
	+
	+int
	+vmci_qpair_alloc(struct vmci_qpair *qpair, struct vmci_handle handle,
	+ uint64_t produce_q_size, uint64_t consume_q_size, vmci_id peer,
	+ uint32_t flags, vmci_privilege_flags priv_flags)
	+{
	+ struct vmci_qpair *my_qpair;
	+ vmci_event_release_cb wakeup_cb;
	+ void *client_data;
	+ int retval;
	+
	+ /*
	+ * Restrict the size of a queuepair. Though the device enforces a limit
	+ * on the total amount of memory that can be allocated to queuepairs for
	+ * a guest, we avoid unnecessarily allocating a lot of memory. Also, we
	+ * try to allocate this memory before we make the queuepair allocation
	+ * hypercall.
	+ *
	+ * (Note that this doesn't prevent all cases; a user with only this much
	+ * physical memory could still get into trouble.) The error used by the
	+ * device is NO_RESOURCES, so use that here too.
	+ */
	+
	+ if (produce_q_size + consume_q_size <
	+ MAX(produce_q_size, consume_q_size) \|\|
	+ produce_q_size + consume_q_size > VMCI_MAX_GUEST_QP_MEMORY)
	+ return (VMCI_ERROR_NO_RESOURCES);
	+
	+ if (flags & VMCI_QPFLAG_NONBLOCK)
	+ return (VMCI_ERROR_INVALID_ARGS);
	+
	+ my_qpair = vmci_alloc_kernel_mem(sizeof(*my_qpair), VMCI_MEMORY_NORMAL);
	+ if (!my_qpair)
	+ return (VMCI_ERROR_NO_MEM);
	+
	+ my_qpair->produce_q_size = produce_q_size;
	+ my_qpair->consume_q_size = consume_q_size;
	+ my_qpair->peer = peer;
	+ my_qpair->flags = flags;
	+ my_qpair->priv_flags = priv_flags;
	+
	+ client_data = NULL;
	+ wakeup_cb = NULL;
	+
	+ retval = vmci_queue_pair_alloc(handle, &my_qpair->produce_q,
	+ my_qpair->produce_q_size, &my_qpair->consume_q,
	+ my_qpair->consume_q_size, my_qpair->peer, my_qpair->flags,
	+ my_qpair->priv_flags);
	+
	+ if (retval < VMCI_SUCCESS) {
	+ vmci_free_kernel_mem(my_qpair, sizeof(*my_qpair));
	+ return (retval);
	+ }
	+
	+ *qpair = my_qpair;
	+ my_qpair->handle = *handle;
	+
	+ return (retval);
	+}
	+
	+/*
	+ *------------------------------------------------------------------------------
	+ *
	+ * vmci_qpair_detach --
	+ *
	+ * This is the client interface for detaching from a vmci_qpair. Note that
	+ * this routine will free the memory allocated for the vmci_qpair structure,
	+ * too.
	+ *
	+ * Results:
	+ * An error, if < 0.
	+ *
	+ * Side effects:
	+ * Will clear the caller's pointer to the vmci_qpair structure.
	+ *
	+ *------------------------------------------------------------------------------
	+ */
	+
	+int
	+vmci_qpair_detach(struct vmci_qpair **qpair)
	+{
	+ struct vmci_qpair *old_qpair;
	+ int result;
	+
	+ if (!qpair \|\| !(*qpair))
	+ return (VMCI_ERROR_INVALID_ARGS);
	+
	+ old_qpair = *qpair;
	+ result = vmci_queue_pair_detach(old_qpair->handle);
	+
	+ /*
	+ * The guest can fail to detach for a number of reasons, and if it does
	+ * so, it will cleanup the entry (if there is one). We need to release
	+ * the qpair struct here; there isn't much the caller can do, and we
	+ * don't want to leak.
	+ */
	+
	+ if (old_qpair->flags & VMCI_QPFLAG_LOCAL)
	+ vmci_destroy_event(&old_qpair->event);
	+
	+ vmci_free_kernel_mem(old_qpair, sizeof(*old_qpair));
	+ *qpair = NULL;
	+
	+ return (result);
	+}
	+
	+/*
	+ *------------------------------------------------------------------------------
	+ *
	+ * vmci_qpair_get_produce_indexes --
	+ *
	+ * This is the client interface for getting the current indexes of the
	+ * qpair from the point of the view of the caller as the producer.
	+ *
	+ * Results:
	+ * err, if < 0
	+ * Success otherwise.
	+ *
	+ * Side effects:
	+ * None.
	+ *
	+ *------------------------------------------------------------------------------
	+ */
	+
	+int
	+vmci_qpair_get_produce_indexes(const struct vmci_qpair *qpair,
	+ uint64_t producer_tail, uint64_t consumer_head)
	+{
	+ struct vmci_queue_header *consume_q_header;
	+ struct vmci_queue_header *produce_q_header;
	+
	+ if (!qpair)
	+ return (VMCI_ERROR_INVALID_ARGS);
	+
	+ vmci_qpair_get_queue_headers(qpair, &produce_q_header,
	+ &consume_q_header);
	+ vmci_queue_header_get_pointers(produce_q_header, consume_q_header,
	+ producer_tail, consumer_head);
	+
	+ if ((producer_tail && *producer_tail >= qpair->produce_q_size) \|\|
	+ (consumer_head && *consumer_head >= qpair->produce_q_size))
	+ return (VMCI_ERROR_INVALID_SIZE);
	+
	+ return (VMCI_SUCCESS);
	+}
	+
	+/*
	+ *------------------------------------------------------------------------------
	+ *
	+ * vmci_qpair_get_consume_indexes --
	+ *
	+ * This is the client interface for getting the current indexes of the
	+ * QPair from the point of the view of the caller as the consumer.
	+ *
	+ * Results:
	+ * err, if < 0
	+ * Success otherwise.
	+ *
	+ * Side effects:
	+ * None.
	+ *
	+ *------------------------------------------------------------------------------
	+ */
	+
	+int
	+vmci_qpair_get_consume_indexes(const struct vmci_qpair *qpair,
	+ uint64_t consumer_tail, uint64_t producer_head)
	+{
	+ struct vmci_queue_header *consume_q_header;
	+ struct vmci_queue_header *produce_q_header;
	+
	+ if (!qpair)
	+ return (VMCI_ERROR_INVALID_ARGS);
	+
	+ vmci_qpair_get_queue_headers(qpair, &produce_q_header,
	+ &consume_q_header);
	+ vmci_queue_header_get_pointers(consume_q_header, produce_q_header,
	+ consumer_tail, producer_head);
	+
	+ if ((consumer_tail && *consumer_tail >= qpair->consume_q_size) \|\|
	+ (producer_head && *producer_head >= qpair->consume_q_size))
	+ return (VMCI_ERROR_INVALID_SIZE);
	+
	+ return (VMCI_SUCCESS);
	+}
	+
	+/*
	+ *------------------------------------------------------------------------------
	+ *
	+ * vmci_qpair_produce_free_space --
	+ *
	+ * This is the client interface for getting the amount of free space in the
	+ * QPair from the point of the view of the caller as the producer which is
	+ * the common case.
	+ *
	+ * Results:
	+ * Err, if < 0.
	+ * Full queue if = 0.
	+ * Number of available bytes into which data can be enqueued if > 0.
	+ *
	+ * Side effects:
	+ * None.
	+ *
	+ *------------------------------------------------------------------------------
	+ */
	+
	+int64_t
	+vmci_qpair_produce_free_space(const struct vmci_qpair *qpair)
	+{
	+ struct vmci_queue_header *consume_q_header;
	+ struct vmci_queue_header *produce_q_header;
	+ int64_t result;
	+
	+ if (!qpair)
	+ return (VMCI_ERROR_INVALID_ARGS);
	+
	+ vmci_qpair_get_queue_headers(qpair, &produce_q_header,
	+ &consume_q_header);
	+ result = vmci_queue_header_free_space(produce_q_header, consume_q_header,
	+ qpair->produce_q_size);
	+
	+ return (result);
	+}
	+
	+/*
	+ *------------------------------------------------------------------------------
	+ *
	+ * vmci_qpair_consume_free_space --
	+ *
	+ * This is the client interface for getting the amount of free space in the
	+ * QPair from the point of the view of the caller as the consumer which is
	+ * not the common case (see vmci_qpair_Produce_free_space(), above).
	+ *
	+ * Results:
	+ * Err, if < 0.
	+ * Full queue if = 0.
	+ * Number of available bytes into which data can be enqueued if > 0.
	+ *
	+ * Side effects:
	+ * None.
	+ *
	+ *------------------------------------------------------------------------------
	+ */
	+
	+int64_t
	+vmci_qpair_consume_free_space(const struct vmci_qpair *qpair)
	+{
	+ struct vmci_queue_header *consume_q_header;
	+ struct vmci_queue_header *produce_q_header;
	+ int64_t result;
	+
	+ if (!qpair)
	+ return (VMCI_ERROR_INVALID_ARGS);
	+
	+ vmci_qpair_get_queue_headers(qpair, &produce_q_header,
	+ &consume_q_header);
	+ result = vmci_queue_header_free_space(consume_q_header, produce_q_header,
	+ qpair->consume_q_size);
	+
	+ return (result);
	+}
	+
	+/*
	+ *------------------------------------------------------------------------------
	+ *
	+ * vmci_qpair_produce_buf_ready --
	+ *
	+ * This is the client interface for getting the amount of enqueued data in
	+ * the QPair from the point of the view of the caller as the producer which
	+ * is not the common case (see vmci_qpair_Consume_buf_ready(), above).
	+ *
	+ * Results:
	+ * Err, if < 0.
	+ * Empty queue if = 0.
	+ * Number of bytes ready to be dequeued if > 0.
	+ *
	+ * Side effects:
	+ * None.
	+ *
	+ *------------------------------------------------------------------------------
	+ */
	+
	+int64_t
	+vmci_qpair_produce_buf_ready(const struct vmci_qpair *qpair)
	+{
	+ struct vmci_queue_header *consume_q_header;
	+ struct vmci_queue_header *produce_q_header;
	+ int64_t result;
	+
	+ if (!qpair)
	+ return (VMCI_ERROR_INVALID_ARGS);
	+
	+ vmci_qpair_get_queue_headers(qpair, &produce_q_header,
	+ &consume_q_header);
	+ result = vmci_queue_header_buf_ready(produce_q_header, consume_q_header,
	+ qpair->produce_q_size);
	+
	+ return (result);
	+}
	+
	+/*
	+ *------------------------------------------------------------------------------
	+ *
	+ * vmci_qpair_consume_buf_ready --
	+ *
	+ * This is the client interface for getting the amount of enqueued data in
	+ * the QPair from the point of the view of the caller as the consumer which
	+ * is the normal case.
	+ *
	+ * Results:
	+ * Err, if < 0.
	+ * Empty queue if = 0.
	+ * Number of bytes ready to be dequeued if > 0.
	+ *
	+ * Side effects:
	+ * None.
	+ *
	+ *------------------------------------------------------------------------------
	+ */
	+
	+int64_t
	+vmci_qpair_consume_buf_ready(const struct vmci_qpair *qpair)
	+{
	+ struct vmci_queue_header *consume_q_header;
	+ struct vmci_queue_header *produce_q_header;
	+ int64_t result;
	+
	+ if (!qpair)
	+ return (VMCI_ERROR_INVALID_ARGS);
	+
	+ vmci_qpair_get_queue_headers(qpair, &produce_q_header,
	+ &consume_q_header);
	+ result = vmci_queue_header_buf_ready(consume_q_header, produce_q_header,
	+ qpair->consume_q_size);
	+
	+ return (result);
	+}
	+
	+/*
	+ *------------------------------------------------------------------------------
	+ *
	+ * enqueue --
	+ *
	+ * Enqueues a given buffer to the produce queue using the provided function.
	+ * As many bytes as possible (space available in the queue) are enqueued.
	+ *
	+ * Results:
	+ * VMCI_ERROR_QUEUEPAIR_NOSPACE if no space was available to enqueue data.
	+ * VMCI_ERROR_INVALID_SIZE, if any queue pointer is outside the queue
	+ * (as defined by the queue size).
	+ * VMCI_ERROR_INVALID_ARGS, if an error occured when accessing the buffer.
	+ * VMCI_ERROR_QUEUEPAIR_NOTATTACHED, if the queue pair pages aren't
	+ * available.
	+ * Otherwise, the number of bytes written to the queue is returned.
	+ *
	+ * Side effects:
	+ * Updates the tail pointer of the produce queue.
	+ *
	+ *------------------------------------------------------------------------------
	+ */
	+
	+static ssize_t
	+enqueue(struct vmci_queue produce_q, struct vmci_queue consume_q,
	+ const uint64_t produce_q_size, const void *buf, size_t buf_size,
	+ int buf_type, vmci_memcpy_to_queue_func memcpy_to_queue, bool can_block)
	+{
	+ ssize_t result;
	+ size_t written;
	+ int64_t free_space;
	+ uint64_t tail;
	+
	+ ASSERT((produce_q != NULL) && (consume_q != NULL));
	+
	+ free_space = vmci_queue_header_free_space(produce_q->q_header,
	+ consume_q->q_header,
	+ produce_q_size);
	+ if (free_space == 0)
	+ return (VMCI_ERROR_QUEUEPAIR_NOSPACE);
	+
	+ if (free_space < VMCI_SUCCESS)
	+ return ((ssize_t)free_space);
	+
	+ written = (size_t)(free_space > buf_size ? buf_size : free_space);
	+ tail = vmci_queue_header_producer_tail(produce_q->q_header);
	+ if (LIKELY(tail + written < produce_q_size))
	+ result = memcpy_to_queue(produce_q, tail, buf, 0, written,
	+ buf_type, can_block);
	+ else {
	+ /* Tail pointer wraps around. */
	+
	+ const size_t tmp = (size_t)(produce_q_size - tail);
	+
	+ result = memcpy_to_queue(produce_q, tail, buf, 0, tmp, buf_type,
	+ can_block);
	+ if (result >= VMCI_SUCCESS)
	+ result = memcpy_to_queue(produce_q, 0, buf, tmp,
	+ written - tmp, buf_type, can_block);
	+ }
	+
	+ if (result < VMCI_SUCCESS)
	+ return (result);
	+
	+ result = vmci_queue_add_producer_tail(produce_q, written,
	+ produce_q_size);
	+ if (result < VMCI_SUCCESS)
	+ return (result);
	+ return (written);
	+}
	+
	+/*
	+ *------------------------------------------------------------------------------
	+ *
	+ * dequeue --
	+ *
	+ * Dequeues data (if available) from the given consume queue. Writes data
	+ * to the user provided buffer using the provided function.
	+ *
	+ * Results:
	+ * VMCI_ERROR_QUEUEPAIR_NODATA if no data was available to dequeue.
	+ * VMCI_ERROR_INVALID_SIZE, if any queue pointer is outside the queue
	+ * (as defined by the queue size).
	+ * VMCI_ERROR_INVALID_ARGS, if an error occured when accessing the buffer.
	+ * VMCI_ERROR_NOT_FOUND, if the vmm_world registered with the queue pair
	+ * cannot be found.
	+ * Otherwise the number of bytes dequeued is returned.
	+ *
	+ * Side effects:
	+ * Updates the head pointer of the consume queue.
	+ *
	+ *------------------------------------------------------------------------------
	+ */
	+
	+static ssize_t
	+dequeue(struct vmci_queue *produce_q,
	+ struct vmci_queue consume_q, const uint64_t consume_q_size, void buf,
	+ size_t buf_size, int buf_type,
	+ vmci_memcpy_from_queue_func memcpy_from_queue, bool update_consumer,
	+ bool can_block)
	+{
	+ ssize_t result;
	+ size_t read;
	+ int64_t buf_ready;
	+ uint64_t head;
	+
	+ ASSERT((produce_q != NULL) && (consume_q != NULL));
	+
	+ buf_ready = vmci_queue_header_buf_ready(consume_q->q_header,
	+ produce_q->q_header, consume_q_size);
	+ if (buf_ready == 0)
	+ return (VMCI_ERROR_QUEUEPAIR_NODATA);
	+ if (buf_ready < VMCI_SUCCESS)
	+ return ((ssize_t)buf_ready);
	+
	+ read = (size_t)(buf_ready > buf_size ? buf_size : buf_ready);
	+ head = vmci_queue_header_consumer_head(produce_q->q_header);
	+ if (LIKELY(head + read < consume_q_size))
	+ result = memcpy_from_queue(buf, 0, consume_q, head, read,
	+ buf_type, can_block);
	+ else {
	+ /* Head pointer wraps around. */
	+
	+ const size_t tmp = (size_t)(consume_q_size - head);
	+
	+ result = memcpy_from_queue(buf, 0, consume_q, head, tmp,
	+ buf_type, can_block);
	+ if (result >= VMCI_SUCCESS)
	+ result = memcpy_from_queue(buf, tmp, consume_q, 0,
	+ read - tmp, buf_type, can_block);
	+ }
	+
	+ if (result < VMCI_SUCCESS)
	+ return (result);
	+
	+ if (update_consumer) {
	+ result = vmci_queue_add_consumer_head(produce_q, read,
	+ consume_q_size);
	+ if (result < VMCI_SUCCESS)
	+ return (result);
	+ }
	+
	+ return (read);
	+}
	+
	+/*
	+ *------------------------------------------------------------------------------
	+ *
	+ * vmci_qpair_enqueue --
	+ *
	+ * This is the client interface for enqueueing data into the queue.
	+ *
	+ * Results:
	+ * Err, if < 0.
	+ * Number of bytes enqueued if >= 0.
	+ *
	+ * Side effects:
	+ * None.
	+ *
	+ *------------------------------------------------------------------------------
	+ */
	+
	+ssize_t
	+vmci_qpair_enqueue(struct vmci_qpair qpair, const void buf, size_t buf_size,
	+ int buf_type)
	+{
	+ ssize_t result;
	+
	+ if (!qpair \|\| !buf)
	+ return (VMCI_ERROR_INVALID_ARGS);
	+
	+ result = enqueue(qpair->produce_q, qpair->consume_q,
	+ qpair->produce_q_size, buf, buf_size, buf_type,
	+ qpair->flags & VMCI_QPFLAG_LOCAL?
	+ vmci_memcpy_to_queue_local : vmci_memcpy_to_queue,
	+ !(qpair->flags & VMCI_QPFLAG_NONBLOCK));
	+
	+ return (result);
	+}
	+
	+/*
	+ *------------------------------------------------------------------------------
	+ *
	+ * vmci_qpair_dequeue --
	+ *
	+ * This is the client interface for dequeueing data from the queue.
	+ *
	+ * Results:
	+ * Err, if < 0.
	+ * Number of bytes dequeued if >= 0.
	+ *
	+ * Side effects:
	+ * None.
	+ *
	+ *------------------------------------------------------------------------------
	+ */
	+
	+ssize_t
	+vmci_qpair_dequeue(struct vmci_qpair qpair, void buf, size_t buf_size,
	+ int buf_type)
	+{
	+ ssize_t result;
	+
	+ if (!qpair \|\| !buf)
	+ return (VMCI_ERROR_INVALID_ARGS);
	+
	+ result = dequeue(qpair->produce_q, qpair->consume_q,
	+ qpair->consume_q_size, buf, buf_size, buf_type,
	+ qpair->flags & VMCI_QPFLAG_LOCAL?
	+ vmci_memcpy_from_queue_local : vmci_memcpy_from_queue, true,
	+ !(qpair->flags & VMCI_QPFLAG_NONBLOCK));
	+
	+ return (result);
	+}
	+
	+/*
	+ *------------------------------------------------------------------------------
	+ *
	+ * vmci_qpair_peek --
	+ *
	+ * This is the client interface for peeking into a queue. (I.e., copy
	+ * data from the queue without updating the head pointer.)
	+ *
	+ * Results:
	+ * Err, if < 0.
	+ * Number of bytes peeked, if >= 0.
	+ *
	+ * Side effects:
	+ * None.
	+ *
	+ *------------------------------------------------------------------------------
	+ */
	+
	+ssize_t
	+vmci_qpair_peek(struct vmci_qpair qpair, void buf, size_t buf_size,
	+ int buf_type)
	+{
	+ ssize_t result;
	+
	+ if (!qpair \|\| !buf)
	+ return (VMCI_ERROR_INVALID_ARGS);
	+
	+ result = dequeue(qpair->produce_q, qpair->consume_q,
	+ qpair->consume_q_size, buf, buf_size, buf_type,
	+ qpair->flags & VMCI_QPFLAG_LOCAL?
	+ vmci_memcpy_from_queue_local : vmci_memcpy_from_queue, false,
	+ !(qpair->flags & VMCI_QPFLAG_NONBLOCK));
	+
	+ return (result);
	+}
	+
	+/*
	+ *------------------------------------------------------------------------------
	+ *
	+ * vmci_qpair_enquev --
	+ *
	+ * This is the client interface for enqueueing data into the queue.
	+ *
	+ * Results:
	+ * Err, if < 0.
	+ * Number of bytes enqueued if >= 0.
	+ *
	+ * Side effects:
	+ * None.
	+ *
	+ *------------------------------------------------------------------------------
	+ */
	+
	+ssize_t
	+vmci_qpair_enquev(struct vmci_qpair qpair, void iov, size_t iov_size,
	+ int buf_type)
	+{
	+ ssize_t result;
	+
	+ if (!qpair \|\| !iov)
	+ return (VMCI_ERROR_INVALID_ARGS);
	+
	+ result = enqueue(qpair->produce_q, qpair->consume_q,
	+ qpair->produce_q_size, iov, iov_size, buf_type,
	+ qpair->flags & VMCI_QPFLAG_LOCAL?
	+ vmci_memcpy_to_queue_v_local : vmci_memcpy_to_queue_v,
	+ !(qpair->flags & VMCI_QPFLAG_NONBLOCK));
	+
	+ return (result);
	+}
	+
	+/*
	+ *------------------------------------------------------------------------------
	+ *
	+ * vmci_qpair_dequev --
	+ *
	+ * This is the client interface for dequeueing data from the queue.
	+ *
	+ * Results:
	+ * Err, if < 0.
	+ * Number of bytes dequeued if >= 0.
	+ *
	+ * Side effects:
	+ * None.
	+ *
	+ *------------------------------------------------------------------------------
	+ */
	+
	+ssize_t
	+vmci_qpair_dequev(struct vmci_qpair qpair, void iov, size_t iov_size,
	+ int buf_type)
	+{
	+ ssize_t result;
	+
	+ if (!qpair \|\| !iov)
	+ return (VMCI_ERROR_INVALID_ARGS);
	+
	+ result = dequeue(qpair->produce_q, qpair->consume_q,
	+ qpair->consume_q_size, iov, iov_size, buf_type,
	+ qpair->flags & VMCI_QPFLAG_LOCAL?
	+ vmci_memcpy_from_queue_v_local : vmci_memcpy_from_queue_v, true,
	+ !(qpair->flags & VMCI_QPFLAG_NONBLOCK));
	+
	+ return (result);
	+}
	+
	+/*
	+ *------------------------------------------------------------------------------
	+ *
	+ * vmci_qpair_peekv --
	+ *
	+ * This is the client interface for peeking into a queue. (I.e., copy
	+ * data from the queue without updating the head pointer.)
	+ *
	+ * Results:
	+ * Err, if < 0.
	+ * Number of bytes peeked, if >= 0.
	+ *
	+ * Side effects:
	+ * None.
	+ *
	+ *------------------------------------------------------------------------------
	+ */
	+
	+ssize_t
	+vmci_qpair_peekv(struct vmci_qpair qpair, void iov, size_t iov_size,
	+ int buf_type)
	+{
	+ ssize_t result;
	+
	+ if (!qpair \|\| !iov)
	+ return (VMCI_ERROR_INVALID_ARGS);
	+
	+ result = dequeue(qpair->produce_q, qpair->consume_q,
	+ qpair->consume_q_size, iov, iov_size, buf_type,
	+ qpair->flags & VMCI_QPFLAG_LOCAL?
	+ vmci_memcpy_from_queue_v_local : vmci_memcpy_from_queue_v, false,
	+ !(qpair->flags & VMCI_QPFLAG_NONBLOCK));
	+
	+ return (result);
	+}
	Index: sys/dev/vmware/vmci/vmci_queue.h
	===================================================================
	--- /dev/null
	+++ sys/dev/vmware/vmci/vmci_queue.h
	@@ -0,0 +1,115 @@
	+/*-
	+ * Copyright (c) 2018 VMware, Inc. All Rights Reserved.
	+ *
	+ * SPDX-License-Identifier: (BSD-2-Clause AND GPL-2.0)
	+ */
	+
	+/* Defines the queue structure and helper functions to enqueue/dequeue items. */
	+
	+#ifndef _VMCI_QUEUE_H_
	+#define _VMCI_QUEUE_H_
	+
	+/*
	+ * vmci_queue
	+ *
	+ * This data type contains the information about a queue.
	+ *
	+ * There are two queues (hence, queue pairs) per transaction model between a
	+ * pair of end points, A & B. One queue is used by end point A to transmit
	+ * commands and responses to B. The other queue is used by B to transmit
	+ * commands and responses.
	+ *
	+ * vmci_queue_kernel_if is a per-OS defined queue structure. It contains
	+ * either a direct pointer to the linear address of the buffer contents or a
	+ * pointer to structures which help the OS locate those data pages.
	+ * See vmci_kernel_if.c for its definition.
	+ */
	+
	+struct vmci_queue_kernel_if;
	+
	+struct vmci_queue {
	+ struct vmci_queue_header *q_header;
	+ struct vmci_queue_header *saved_header;
	+ struct vmci_queue_kernel_if *kernel_if;
	+};
	+
	+#define BUF_TYPE int
	+
	+/*
	+ *------------------------------------------------------------------------------
	+ *
	+ * vmci_memcpy{to,from}_queue_func() prototypes. Functions of these types are
	+ * passed around to enqueue and dequeue routines. Note that often the functions
	+ * passed are simply wrappers around memcpy itself.
	+ *
	+ * Note: In order for the memcpy typedefs to be compatible with the VMKernel,
	+ * there's an unused last parameter for the hosted side. In ESX, that parameter
	+ * holds a buffer type.
	+ *
	+ *------------------------------------------------------------------------------
	+ */
	+typedef int vmci_memcpy_to_queue_func(struct vmci_queue *queue,
	+ uint64_t queue_offset, const void *src, size_t src_offset,
	+ size_t size, BUF_TYPE buf_type, bool can_block);
	+typedef int vmci_memcpy_from_queue_func(void *dest, size_t dest_offset,
	+ const struct vmci_queue *queue, uint64_t queue_offset, size_t size,
	+ BUF_TYPE buf_type, bool can_block);
	+
	+/*
	+ *------------------------------------------------------------------------------
	+ *
	+ * vmci_memcpy{to,from}_queue_[v]_[local]() prototypes
	+ *
	+ * Note that these routines are NOT SAFE to call on a host end-point until the
	+ * guest end of the queue pair has attached -AND- SetPageStore(). The VMX
	+ * crosstalk device will issue the SetPageStore() on behalf of the guest when
	+ * the guest creates a QueuePair or attaches to one created by the host. So, if
	+ * the guest notifies the host that it's attached then the queue is safe to use.
	+ * Also, if the host registers notification of the connection of the guest, then
	+ * it will only receive that notification when the guest has issued the
	+ * SetPageStore() call and not before (when the guest had attached).
	+ *
	+ *------------------------------------------------------------------------------
	+ */
	+
	+int vmci_memcpy_to_queue(struct vmci_queue *queue, uint64_t queue_offset,
	+ const void *src, size_t src_offset, size_t size, BUF_TYPE buf_type,
	+ bool can_block);
	+int vmci_memcpy_from_queue(void *dest, size_t dest_offset,
	+ const struct vmci_queue *queue, uint64_t queue_offset, size_t size,
	+ BUF_TYPE buf_type, bool can_block);
	+int vmci_memcpy_to_queue_local(struct vmci_queue *queue,
	+ uint64_t queue_offset, const void *src, size_t src_offset,
	+ size_t size, BUF_TYPE buf_type, bool can_block);
	+int vmci_memcpy_from_queue_local(void *dest, size_t dest_offset,
	+ const struct vmci_queue *queue, uint64_t queue_offset, size_t size,
	+ BUF_TYPE buf_type, bool can_block);
	+
	+int vmci_memcpy_to_queue_v(struct vmci_queue *queue, uint64_t queue_offset,
	+ const void *src, size_t src_offset, size_t size, BUF_TYPE buf_type,
	+ bool can_block);
	+int vmci_memcpy_from_queue_v(void *dest, size_t dest_offset,
	+ const struct vmci_queue *queue, uint64_t queue_offset, size_t size,
	+ BUF_TYPE buf_type, bool can_block);
	+
	+static inline int
	+vmci_memcpy_to_queue_v_local(struct vmci_queue *queue, uint64_t queue_offset,
	+ const void *src, size_t src_offset, size_t size, int buf_type,
	+ bool can_block)
	+{
	+
	+ return (vmci_memcpy_to_queue_v(queue, queue_offset, src, src_offset,
	+ size, buf_type, can_block));
	+}
	+
	+static inline int
	+vmci_memcpy_from_queue_v_local(void *dest, size_t dest_offset,
	+ const struct vmci_queue *queue, uint64_t queue_offset, size_t size,
	+ int buf_type, bool can_block)
	+{
	+
	+ return (vmci_memcpy_from_queue_v(dest, dest_offset, queue, queue_offset,
	+ size, buf_type, can_block));
	+}
	+
	+#endif /* !_VMCI_QUEUE_H_ */
	Index: sys/dev/vmware/vmci/vmci_queue_pair.h
	===================================================================
	--- /dev/null
	+++ sys/dev/vmware/vmci/vmci_queue_pair.h
	@@ -0,0 +1,26 @@
	+/*-
	+ * Copyright (c) 2018 VMware, Inc. All Rights Reserved.
	+ *
	+ * SPDX-License-Identifier: (BSD-2-Clause AND GPL-2.0)
	+ */
	+
	+/* VMCI QueuePair API definition. */
	+
	+#ifndef _VMCI_QUEUE_PAIR_H_
	+#define _VMCI_QUEUE_PAIR_H_
	+
	+#include "vmci_kernel_if.h"
	+#include "vmci_queue.h"
	+
	+int vmci_qp_guest_endpoints_init(void);
	+void vmci_qp_guest_endpoints_exit(void);
	+void vmci_qp_guest_endpoints_sync(void);
	+void vmci_qp_guest_endpoints_convert(bool to_local, bool device_reset);
	+
	+int vmci_queue_pair_alloc(struct vmci_handle *handle,
	+ struct vmci_queue **produce_q, uint64_t produce_size,
	+ struct vmci_queue **consume_q, uint64_t consume_size,
	+ vmci_id peer, uint32_t flags, vmci_privilege_flags priv_flags);
	+int vmci_queue_pair_detach(struct vmci_handle handle);
	+
	+#endif /* !_VMCI_QUEUE_PAIR_H_ */
	Index: sys/dev/vmware/vmci/vmci_queue_pair.c
	===================================================================
	--- /dev/null
	+++ sys/dev/vmware/vmci/vmci_queue_pair.c
	@@ -0,0 +1,937 @@
	+/*-
	+ * Copyright (c) 2018 VMware, Inc. All Rights Reserved.
	+ *
	+ * SPDX-License-Identifier: (BSD-2-Clause AND GPL-2.0)
	+ */
	+
	+/* VMCI QueuePair API implementation. */
	+
	+#include "vmci.h"
	+#include "vmci_driver.h"
	+#include "vmci_event.h"
	+#include "vmci_kernel_api.h"
	+#include "vmci_kernel_defs.h"
	+#include "vmci_queue_pair.h"
	+
	+#define LGPFX "vmci_queue_pair: "
	+
	+struct queue_pair_entry {
	+ vmci_list_item(queue_pair_entry) list_item;
	+ struct vmci_handle handle;
	+ vmci_id peer;
	+ uint32_t flags;
	+ uint64_t produce_size;
	+ uint64_t consume_size;
	+ uint32_t ref_count;
	+};
	+
	+struct qp_guest_endpoint {
	+ struct queue_pair_entry qp;
	+ uint64_t num_ppns;
	+ void *produce_q;
	+ void *consume_q;
	+ bool hibernate_failure;
	+ struct ppn_set ppn_set;
	+};
	+
	+struct queue_pair_list {
	+ vmci_list(queue_pair_entry) head;
	+ volatile int hibernate;
	+ vmci_mutex mutex;
	+};
	+
	+#define QPE_NUM_PAGES(_QPE) \
	+ ((uint32_t)(CEILING(_QPE.produce_size, PAGE_SIZE) + \
	+ CEILING(_QPE.consume_size, PAGE_SIZE) + 2))
	+
	+static struct queue_pair_list qp_guest_endpoints;
	+
	+static struct queue_pair_entry *queue_pair_list_find_entry(
	+ struct queue_pair_list *qp_list, struct vmci_handle handle);
	+static void queue_pair_list_add_entry(struct queue_pair_list *qp_list,
	+ struct queue_pair_entry *entry);
	+static void queue_pair_list_remove_entry(struct queue_pair_list *qp_list,
	+ struct queue_pair_entry *entry);
	+static struct queue_pair_entry *queue_pair_list_get_head(
	+ struct queue_pair_list *qp_list);
	+static int queue_pair_notify_peer_local(bool attach,
	+ struct vmci_handle handle);
	+static struct qp_guest_endpoint *qp_guest_endpoint_create(
	+ struct vmci_handle handle, vmci_id peer, uint32_t flags,
	+ uint64_t produce_size, uint64_t consume_size,
	+ void produce_q, void consume_q);
	+static void qp_guest_endpoint_destroy(struct qp_guest_endpoint *entry);
	+static int vmci_queue_pair_alloc_hypercall(
	+ const struct qp_guest_endpoint *entry);
	+static int vmci_queue_pair_alloc_guest_work(struct vmci_handle *handle,
	+ struct vmci_queue **produce_q, uint64_t produce_size,
	+ struct vmci_queue **consume_q, uint64_t consume_size,
	+ vmci_id peer, uint32_t flags,
	+ vmci_privilege_flags priv_flags);
	+static int vmci_queue_pair_detach_guest_work(struct vmci_handle handle);
	+static int vmci_queue_pair_detach_hypercall(struct vmci_handle handle);
	+
	+extern int vmci_send_datagram(struct vmci_datagram *);
	+
	+/*
	+ *------------------------------------------------------------------------------
	+ *
	+ * vmci_queue_pair_alloc --
	+ *
	+ * Allocates a VMCI QueuePair. Only checks validity of input arguments. The
	+ * real work is done in the host or guest specific function.
	+ *
	+ * Results:
	+ * VMCI_SUCCESS on success, appropriate error code otherwise.
	+ *
	+ * Side effects:
	+ * None.
	+ *
	+ *------------------------------------------------------------------------------
	+ */
	+
	+int
	+vmci_queue_pair_alloc(struct vmci_handle handle, struct vmci_queue *produce_q,
	+ uint64_t produce_size, struct vmci_queue **consume_q, uint64_t consume_size,
	+ vmci_id peer, uint32_t flags, vmci_privilege_flags priv_flags)
	+{
	+
	+ if (!handle \|\| !produce_q \|\| !consume_q \|\|
	+ (!produce_size && !consume_size) \|\| (flags & ~VMCI_QP_ALL_FLAGS))
	+ return (VMCI_ERROR_INVALID_ARGS);
	+
	+ return (vmci_queue_pair_alloc_guest_work(handle, produce_q,
	+ produce_size, consume_q, consume_size, peer, flags, priv_flags));
	+}
	+
	+/*
	+ *------------------------------------------------------------------------------
	+ *
	+ * vmci_queue_pair_detach --
	+ *
	+ * Detaches from a VMCI QueuePair. Only checks validity of input argument.
	+ * Real work is done in the host or guest specific function.
	+ *
	+ * Results:
	+ * Success or failure.
	+ *
	+ * Side effects:
	+ * Memory is freed.
	+ *
	+ *------------------------------------------------------------------------------
	+ */
	+
	+int
	+vmci_queue_pair_detach(struct vmci_handle handle)
	+{
	+
	+ if (VMCI_HANDLE_INVALID(handle))
	+ return (VMCI_ERROR_INVALID_ARGS);
	+
	+ return (vmci_queue_pair_detach_guest_work(handle));
	+}
	+
	+/*
	+ *------------------------------------------------------------------------------
	+ *
	+ * queue_pair_list_init --
	+ *
	+ * Initializes the list of QueuePairs.
	+ *
	+ * Results:
	+ * Success or failure.
	+ *
	+ * Side effects:
	+ * None.
	+ *
	+ *------------------------------------------------------------------------------
	+ */
	+
	+static inline int
	+queue_pair_list_init(struct queue_pair_list *qp_list)
	+{
	+ int ret;
	+
	+ vmci_list_init(&qp_list->head);
	+ atomic_store_int(&qp_list->hibernate, 0);
	+ ret = vmci_mutex_init(&qp_list->mutex, "VMCI QP List lock");
	+ return (ret);
	+}
	+
	+/*
	+ *------------------------------------------------------------------------------
	+ *
	+ * queue_pair_list_destroy --
	+ *
	+ * Destroy the list's mutex.
	+ *
	+ * Results:
	+ * None.
	+ *
	+ * Side effects:
	+ * None.
	+ *
	+ *------------------------------------------------------------------------------
	+ */
	+
	+static inline void
	+queue_pair_list_destroy(struct queue_pair_list *qp_list)
	+{
	+
	+ vmci_mutex_destroy(&qp_list->mutex);
	+ vmci_list_init(&qp_list->head);
	+}
	+
	+/*
	+ *------------------------------------------------------------------------------
	+ *
	+ * queue_pair_list_find_entry --
	+ *
	+ * Finds the entry in the list corresponding to a given handle. Assumes that
	+ * the list is locked.
	+ *
	+ * Results:
	+ * Pointer to entry.
	+ *
	+ * Side effects:
	+ * None.
	+ *
	+ *------------------------------------------------------------------------------
	+ */
	+
	+static struct queue_pair_entry *
	+queue_pair_list_find_entry(struct queue_pair_list *qp_list,
	+ struct vmci_handle handle)
	+{
	+ struct queue_pair_entry *next;
	+
	+ if (VMCI_HANDLE_INVALID(handle))
	+ return (NULL);
	+
	+ vmci_list_scan(next, &qp_list->head, list_item) {
	+ if (VMCI_HANDLE_EQUAL(next->handle, handle))
	+ return (next);
	+ }
	+
	+ return (NULL);
	+}
	+
	+/*
	+ *------------------------------------------------------------------------------
	+ *
	+ * queue_pair_list_add_entry --
	+ *
	+ * Adds the given entry to the list. Assumes that the list is locked.
	+ *
	+ * Results:
	+ * None.
	+ *
	+ * Side effects:
	+ * None.
	+ *
	+ *------------------------------------------------------------------------------
	+ */
	+
	+static void
	+queue_pair_list_add_entry(struct queue_pair_list *qp_list,
	+ struct queue_pair_entry *entry)
	+{
	+
	+ if (entry)
	+ vmci_list_insert(&qp_list->head, entry, list_item);
	+}
	+
	+/*
	+ *------------------------------------------------------------------------------
	+ *
	+ * queue_pair_list_remove_entry --
	+ *
	+ * Removes the given entry from the list. Assumes that the list is locked.
	+ *
	+ * Results:
	+ * None.
	+ *
	+ * Side effects:
	+ * None.
	+ *
	+ *------------------------------------------------------------------------------
	+ */
	+
	+static void
	+queue_pair_list_remove_entry(struct queue_pair_list *qp_list,
	+ struct queue_pair_entry *entry)
	+{
	+
	+ if (entry)
	+ vmci_list_remove(entry, list_item);
	+}
	+
	+/*
	+ *------------------------------------------------------------------------------
	+ *
	+ * queue_pair_list_get_head --
	+ *
	+ * Returns the entry from the head of the list. Assumes that the list is
	+ * locked.
	+ *
	+ * Results:
	+ * Pointer to entry.
	+ *
	+ * Side effects:
	+ * None.
	+ *
	+ *------------------------------------------------------------------------------
	+ */
	+
	+static struct queue_pair_entry *
	+queue_pair_list_get_head(struct queue_pair_list *qp_list)
	+{
	+
	+ return (vmci_list_first(&qp_list->head));
	+}
	+
	+/*
	+ *------------------------------------------------------------------------------
	+ *
	+ * vmci_qp_guest_endpoints_init --
	+ *
	+ * Initalizes data structure state keeping track of queue pair guest
	+ * endpoints.
	+ *
	+ * Results:
	+ * VMCI_SUCCESS on success and appropriate failure code otherwise.
	+ *
	+ * Side effects:
	+ * None.
	+ *
	+ *------------------------------------------------------------------------------
	+ */
	+
	+int
	+vmci_qp_guest_endpoints_init(void)
	+{
	+
	+ return (queue_pair_list_init(&qp_guest_endpoints));
	+}
	+
	+/*
	+ *------------------------------------------------------------------------------
	+ *
	+ * vmci_qp_guest_endpoints_exit --
	+ *
	+ * Destroys all guest queue pair endpoints. If active guest queue pairs
	+ * still exist, hypercalls to attempt detach from these queue pairs will be
	+ * made. Any failure to detach is silently ignored.
	+ *
	+ * Results:
	+ * None.
	+ *
	+ * Side effects:
	+ * None.
	+ *
	+ *------------------------------------------------------------------------------
	+ */
	+
	+void
	+vmci_qp_guest_endpoints_exit(void)
	+{
	+ struct qp_guest_endpoint *entry;
	+
	+ vmci_mutex_acquire(&qp_guest_endpoints.mutex);
	+
	+ while ((entry =
	+ (struct qp_guest_endpoint *)queue_pair_list_get_head(
	+ &qp_guest_endpoints)) != NULL) {
	+ /*
	+ * Don't make a hypercall for local QueuePairs.
	+ */
	+ if (!(entry->qp.flags & VMCI_QPFLAG_LOCAL))
	+ vmci_queue_pair_detach_hypercall(entry->qp.handle);
	+ /*
	+ * We cannot fail the exit, so let's reset ref_count.
	+ */
	+ entry->qp.ref_count = 0;
	+ queue_pair_list_remove_entry(&qp_guest_endpoints, &entry->qp);
	+ qp_guest_endpoint_destroy(entry);
	+ }
	+
	+ atomic_store_int(&qp_guest_endpoints.hibernate, 0);
	+ vmci_mutex_release(&qp_guest_endpoints.mutex);
	+ queue_pair_list_destroy(&qp_guest_endpoints);
	+}
	+
	+/*
	+ *------------------------------------------------------------------------------
	+ *
	+ * vmci_qp_guest_endpoints_sync --
	+ *
	+ * Use this as a synchronization point when setting globals, for example,
	+ * during device shutdown.
	+ *
	+ * Results:
	+ * true.
	+ *
	+ * Side effects:
	+ * None.
	+ *
	+ *------------------------------------------------------------------------------
	+ */
	+
	+void
	+vmci_qp_guest_endpoints_sync(void)
	+{
	+
	+ vmci_mutex_acquire(&qp_guest_endpoints.mutex);
	+ vmci_mutex_release(&qp_guest_endpoints.mutex);
	+}
	+
	+/*
	+ *------------------------------------------------------------------------------
	+ *
	+ * qp_guest_endpoint_create --
	+ *
	+ * Allocates and initializes a qp_guest_endpoint structure. Allocates a
	+ * QueuePair rid (and handle) iff the given entry has an invalid handle.
	+ * 0 through VMCI_RESERVED_RESOURCE_ID_MAX are reserved handles. Assumes
	+ * that the QP list mutex is held by the caller.
	+ *
	+ * Results:
	+ * Pointer to structure intialized.
	+ *
	+ * Side effects:
	+ * None.
	+ *
	+ *------------------------------------------------------------------------------
	+ */
	+
	+struct qp_guest_endpoint *
	+qp_guest_endpoint_create(struct vmci_handle handle, vmci_id peer,
	+ uint32_t flags, uint64_t produce_size, uint64_t consume_size,
	+ void produce_q, void consume_q)
	+{
	+ struct qp_guest_endpoint *entry;
	+ static vmci_id queue_pair_rid;
	+ const uint64_t num_ppns = CEILING(produce_size, PAGE_SIZE) +
	+ CEILING(consume_size, PAGE_SIZE) +
	+ 2; /* One page each for the queue headers. */
	+
	+ queue_pair_rid = VMCI_RESERVED_RESOURCE_ID_MAX + 1;
	+
	+ ASSERT((produce_size \|\| consume_size) && produce_q && consume_q);
	+
	+ if (VMCI_HANDLE_INVALID(handle)) {
	+ vmci_id context_id = vmci_get_context_id();
	+ vmci_id old_rid = queue_pair_rid;
	+
	+ /*
	+ * Generate a unique QueuePair rid. Keep on trying until we
	+ * wrap around in the RID space.
	+ */
	+ ASSERT(old_rid > VMCI_RESERVED_RESOURCE_ID_MAX);
	+ do {
	+ handle = VMCI_MAKE_HANDLE(context_id, queue_pair_rid);
	+ entry =
	+ (struct qp_guest_endpoint *)
	+ queue_pair_list_find_entry(&qp_guest_endpoints,
	+ handle);
	+ queue_pair_rid++;
	+ if (UNLIKELY(!queue_pair_rid)) {
	+ /*
	+ * Skip the reserved rids.
	+ */
	+ queue_pair_rid =
	+ VMCI_RESERVED_RESOURCE_ID_MAX + 1;
	+ }
	+ } while (entry && queue_pair_rid != old_rid);
	+
	+ if (UNLIKELY(entry != NULL)) {
	+ ASSERT(queue_pair_rid == old_rid);
	+ /*
	+ * We wrapped around --- no rids were free.
	+ */
	+ return (NULL);
	+ }
	+ }
	+
	+ ASSERT(!VMCI_HANDLE_INVALID(handle) &&
	+ queue_pair_list_find_entry(&qp_guest_endpoints, handle) == NULL);
	+ entry = vmci_alloc_kernel_mem(sizeof(*entry), VMCI_MEMORY_NORMAL);
	+ if (entry) {
	+ entry->qp.handle = handle;
	+ entry->qp.peer = peer;
	+ entry->qp.flags = flags;
	+ entry->qp.produce_size = produce_size;
	+ entry->qp.consume_size = consume_size;
	+ entry->qp.ref_count = 0;
	+ entry->num_ppns = num_ppns;
	+ memset(&entry->ppn_set, 0, sizeof(entry->ppn_set));
	+ entry->produce_q = produce_q;
	+ entry->consume_q = consume_q;
	+ }
	+ return (entry);
	+}
	+
	+/*
	+ *------------------------------------------------------------------------------
	+ *
	+ * qp_guest_endpoint_destroy --
	+ *
	+ * Frees a qp_guest_endpoint structure.
	+ *
	+ * Results:
	+ * None.
	+ *
	+ * Side effects:
	+ * None.
	+ *
	+ *------------------------------------------------------------------------------
	+ */
	+
	+void
	+qp_guest_endpoint_destroy(struct qp_guest_endpoint *entry)
	+{
	+
	+ ASSERT(entry);
	+ ASSERT(entry->qp.ref_count == 0);
	+
	+ vmci_free_ppn_set(&entry->ppn_set);
	+ vmci_free_queue(entry->produce_q, entry->qp.produce_size);
	+ vmci_free_queue(entry->consume_q, entry->qp.consume_size);
	+ vmci_free_kernel_mem(entry, sizeof(*entry));
	+}
	+
	+/*
	+ *------------------------------------------------------------------------------
	+ *
	+ * vmci_queue_pair_alloc_hypercall --
	+ *
	+ * Helper to make a QueuePairAlloc hypercall when the driver is
	+ * supporting a guest device.
	+ *
	+ * Results:
	+ * Result of the hypercall.
	+ *
	+ * Side effects:
	+ * Memory is allocated & freed.
	+ *
	+ *------------------------------------------------------------------------------
	+ */
	+static int
	+vmci_queue_pair_alloc_hypercall(const struct qp_guest_endpoint *entry)
	+{
	+ struct vmci_queue_pair_alloc_msg *alloc_msg;
	+ size_t msg_size;
	+ int result;
	+
	+ if (!entry \|\| entry->num_ppns <= 2)
	+ return (VMCI_ERROR_INVALID_ARGS);
	+
	+ ASSERT(!(entry->qp.flags & VMCI_QPFLAG_LOCAL));
	+
	+ msg_size = sizeof(alloc_msg) + (size_t)entry->num_ppns sizeof(PPN);
	+ alloc_msg = vmci_alloc_kernel_mem(msg_size, VMCI_MEMORY_NORMAL);
	+ if (!alloc_msg)
	+ return (VMCI_ERROR_NO_MEM);
	+
	+ alloc_msg->hdr.dst = VMCI_MAKE_HANDLE(VMCI_HYPERVISOR_CONTEXT_ID,
	+ VMCI_QUEUEPAIR_ALLOC);
	+ alloc_msg->hdr.src = VMCI_ANON_SRC_HANDLE;
	+ alloc_msg->hdr.payload_size = msg_size - VMCI_DG_HEADERSIZE;
	+ alloc_msg->handle = entry->qp.handle;
	+ alloc_msg->peer = entry->qp.peer;
	+ alloc_msg->flags = entry->qp.flags;
	+ alloc_msg->produce_size = entry->qp.produce_size;
	+ alloc_msg->consume_size = entry->qp.consume_size;
	+ alloc_msg->num_ppns = entry->num_ppns;
	+ result = vmci_populate_ppn_list((uint8_t *)alloc_msg +
	+ sizeof(*alloc_msg), &entry->ppn_set);
	+ if (result == VMCI_SUCCESS)
	+ result = vmci_send_datagram((struct vmci_datagram *)alloc_msg);
	+ vmci_free_kernel_mem(alloc_msg, msg_size);
	+
	+ return (result);
	+}
	+
	+/*
	+ *------------------------------------------------------------------------------
	+ *
	+ * vmci_queue_pair_alloc_guest_work --
	+ *
	+ * This functions handles the actual allocation of a VMCI queue pair guest
	+ * endpoint. Allocates physical pages for the queue pair. It makes OS
	+ * dependent calls through generic wrappers.
	+ *
	+ * Results:
	+ * Success or failure.
	+ *
	+ * Side effects:
	+ * Memory is allocated.
	+ *
	+ *------------------------------------------------------------------------------
	+ */
	+
	+static int
	+vmci_queue_pair_alloc_guest_work(struct vmci_handle *handle,
	+ struct vmci_queue **produce_q, uint64_t produce_size,
	+ struct vmci_queue **consume_q, uint64_t consume_size, vmci_id peer,
	+ uint32_t flags, vmci_privilege_flags priv_flags)
	+{
	+ struct qp_guest_endpoint *queue_pair_entry = NULL;
	+ void *my_consume_q = NULL;
	+ void *my_produce_q = NULL;
	+ const uint64_t num_consume_pages = CEILING(consume_size, PAGE_SIZE) + 1;
	+ const uint64_t num_produce_pages = CEILING(produce_size, PAGE_SIZE) + 1;
	+ int result;
	+
	+ ASSERT(handle && produce_q && consume_q &&
	+ (produce_size \|\| consume_size));
	+
	+ if (priv_flags != VMCI_NO_PRIVILEGE_FLAGS)
	+ return (VMCI_ERROR_NO_ACCESS);
	+
	+ vmci_mutex_acquire(&qp_guest_endpoints.mutex);
	+
	+ if ((atomic_load_int(&qp_guest_endpoints.hibernate) == 1) &&
	+ !(flags & VMCI_QPFLAG_LOCAL)) {
	+ /*
	+ * While guest OS is in hibernate state, creating non-local
	+ * queue pairs is not allowed after the point where the VMCI
	+ * guest driver converted the existing queue pairs to local
	+ * ones.
	+ */
	+
	+ result = VMCI_ERROR_UNAVAILABLE;
	+ goto error;
	+ }
	+
	+ if ((queue_pair_entry =
	+ (struct qp_guest_endpoint *)queue_pair_list_find_entry(
	+ &qp_guest_endpoints, *handle)) != NULL) {
	+ if (queue_pair_entry->qp.flags & VMCI_QPFLAG_LOCAL) {
	+ /* Local attach case. */
	+ if (queue_pair_entry->qp.ref_count > 1) {
	+ VMCI_LOG_DEBUG(LGPFX"Error attempting to "
	+ "attach more than once.\n");
	+ result = VMCI_ERROR_UNAVAILABLE;
	+ goto error_keep_entry;
	+ }
	+
	+ if (queue_pair_entry->qp.produce_size != consume_size \|\|
	+ queue_pair_entry->qp.consume_size != produce_size \|\|
	+ queue_pair_entry->qp.flags !=
	+ (flags & ~VMCI_QPFLAG_ATTACH_ONLY)) {
	+ VMCI_LOG_DEBUG(LGPFX"Error mismatched "
	+ "queue pair in local attach.\n");
	+ result = VMCI_ERROR_QUEUEPAIR_MISMATCH;
	+ goto error_keep_entry;
	+ }
	+
	+ /*
	+ * Do a local attach. We swap the consume and produce
	+ * queues for the attacher and deliver an attach event.
	+ */
	+ result = queue_pair_notify_peer_local(true, *handle);
	+ if (result < VMCI_SUCCESS)
	+ goto error_keep_entry;
	+ my_produce_q = queue_pair_entry->consume_q;
	+ my_consume_q = queue_pair_entry->produce_q;
	+ goto out;
	+ }
	+ result = VMCI_ERROR_ALREADY_EXISTS;
	+ goto error_keep_entry;
	+ }
	+
	+ my_produce_q = vmci_alloc_queue(produce_size, flags);
	+ if (!my_produce_q) {
	+ VMCI_LOG_WARNING(LGPFX"Error allocating pages for produce "
	+ "queue.\n");
	+ result = VMCI_ERROR_NO_MEM;
	+ goto error;
	+ }
	+
	+ my_consume_q = vmci_alloc_queue(consume_size, flags);
	+ if (!my_consume_q) {
	+ VMCI_LOG_WARNING(LGPFX"Error allocating pages for consume "
	+ "queue.\n");
	+ result = VMCI_ERROR_NO_MEM;
	+ goto error;
	+ }
	+
	+ queue_pair_entry = qp_guest_endpoint_create(*handle, peer, flags,
	+ produce_size, consume_size, my_produce_q, my_consume_q);
	+ if (!queue_pair_entry) {
	+ VMCI_LOG_WARNING(LGPFX"Error allocating memory in %s.\n",
	+ __FUNCTION__);
	+ result = VMCI_ERROR_NO_MEM;
	+ goto error;
	+ }
	+
	+ result = vmci_alloc_ppn_set(my_produce_q, num_produce_pages,
	+ my_consume_q, num_consume_pages, &queue_pair_entry->ppn_set);
	+ if (result < VMCI_SUCCESS) {
	+ VMCI_LOG_WARNING(LGPFX"vmci_alloc_ppn_set failed.\n");
	+ goto error;
	+ }
	+
	+ /*
	+ * It's only necessary to notify the host if this queue pair will be
	+ * attached to from another context.
	+ */
	+ if (queue_pair_entry->qp.flags & VMCI_QPFLAG_LOCAL) {
	+ /* Local create case. */
	+ vmci_id context_id = vmci_get_context_id();
	+
	+ /*
	+ * Enforce similar checks on local queue pairs as we do for
	+ * regular ones. The handle's context must match the creator
	+ * or attacher context id (here they are both the current
	+ * context id) and the attach-only flag cannot exist during
	+ * create. We also ensure specified peer is this context or
	+ * an invalid one.
	+ */
	+ if (queue_pair_entry->qp.handle.context != context_id \|\|
	+ (queue_pair_entry->qp.peer != VMCI_INVALID_ID &&
	+ queue_pair_entry->qp.peer != context_id)) {
	+ result = VMCI_ERROR_NO_ACCESS;
	+ goto error;
	+ }
	+
	+ if (queue_pair_entry->qp.flags & VMCI_QPFLAG_ATTACH_ONLY) {
	+ result = VMCI_ERROR_NOT_FOUND;
	+ goto error;
	+ }
	+ } else {
	+ result = vmci_queue_pair_alloc_hypercall(queue_pair_entry);
	+ if (result < VMCI_SUCCESS) {
	+ VMCI_LOG_WARNING(
	+ LGPFX"vmci_queue_pair_alloc_hypercall result = "
	+ "%d.\n", result);
	+ goto error;
	+ }
	+ }
	+
	+ queue_pair_list_add_entry(&qp_guest_endpoints, &queue_pair_entry->qp);
	+
	+out:
	+ queue_pair_entry->qp.ref_count++;
	+ *handle = queue_pair_entry->qp.handle;
	+ produce_q = (struct vmci_queue )my_produce_q;
	+ consume_q = (struct vmci_queue )my_consume_q;
	+
	+ /*
	+ * We should initialize the queue pair header pages on a local queue
	+ * pair create. For non-local queue pairs, the hypervisor initializes
	+ * the header pages in the create step.
	+ */
	+ if ((queue_pair_entry->qp.flags & VMCI_QPFLAG_LOCAL) &&
	+ queue_pair_entry->qp.ref_count == 1) {
	+ vmci_queue_header_init((produce_q)->q_header, handle);
	+ vmci_queue_header_init((consume_q)->q_header, handle);
	+ }
	+
	+ vmci_mutex_release(&qp_guest_endpoints.mutex);
	+
	+ return (VMCI_SUCCESS);
	+
	+error:
	+ vmci_mutex_release(&qp_guest_endpoints.mutex);
	+ if (queue_pair_entry) {
	+ /* The queues will be freed inside the destroy routine. */
	+ qp_guest_endpoint_destroy(queue_pair_entry);
	+ } else {
	+ if (my_produce_q)
	+ vmci_free_queue(my_produce_q, produce_size);
	+ if (my_consume_q)
	+ vmci_free_queue(my_consume_q, consume_size);
	+ }
	+ return (result);
	+
	+error_keep_entry:
	+ /* This path should only be used when an existing entry was found. */
	+ ASSERT(queue_pair_entry->qp.ref_count > 0);
	+ vmci_mutex_release(&qp_guest_endpoints.mutex);
	+ return (result);
	+}
	+
	+/*
	+ *------------------------------------------------------------------------------
	+ *
	+ * vmci_queue_pair_detach_hypercall --
	+ *
	+ * Helper to make a QueuePairDetach hypercall when the driver is supporting
	+ * a guest device.
	+ *
	+ * Results:
	+ * Result of the hypercall.
	+ *
	+ * Side effects:
	+ * None.
	+ *
	+ *------------------------------------------------------------------------------
	+ */
	+
	+int
	+vmci_queue_pair_detach_hypercall(struct vmci_handle handle)
	+{
	+ struct vmci_queue_pair_detach_msg detach_msg;
	+
	+ detach_msg.hdr.dst = VMCI_MAKE_HANDLE(VMCI_HYPERVISOR_CONTEXT_ID,
	+ VMCI_QUEUEPAIR_DETACH);
	+ detach_msg.hdr.src = VMCI_ANON_SRC_HANDLE;
	+ detach_msg.hdr.payload_size = sizeof(handle);
	+ detach_msg.handle = handle;
	+
	+ return (vmci_send_datagram((struct vmci_datagram *)&detach_msg));
	+}
	+
	+/*
	+ *------------------------------------------------------------------------------
	+ *
	+ * vmci_queue_pair_detach_guest_work --
	+ *
	+ * Helper for VMCI QueuePair detach interface. Frees the physical pages for
	+ * the queue pair.
	+ *
	+ * Results:
	+ * Success or failure.
	+ *
	+ * Side effects:
	+ * Memory may be freed.
	+ *
	+ *------------------------------------------------------------------------------
	+ */
	+
	+static int
	+vmci_queue_pair_detach_guest_work(struct vmci_handle handle)
	+{
	+ struct qp_guest_endpoint *entry;
	+ int result;
	+ uint32_t ref_count;
	+
	+ ASSERT(!VMCI_HANDLE_INVALID(handle));
	+
	+ vmci_mutex_acquire(&qp_guest_endpoints.mutex);
	+
	+ entry = (struct qp_guest_endpoint *)queue_pair_list_find_entry(
	+ &qp_guest_endpoints, handle);
	+ if (!entry) {
	+ vmci_mutex_release(&qp_guest_endpoints.mutex);
	+ return (VMCI_ERROR_NOT_FOUND);
	+ }
	+
	+ ASSERT(entry->qp.ref_count >= 1);
	+
	+ if (entry->qp.flags & VMCI_QPFLAG_LOCAL) {
	+ result = VMCI_SUCCESS;
	+
	+ if (entry->qp.ref_count > 1) {
	+ result = queue_pair_notify_peer_local(false, handle);
	+
	+ /*
	+ * We can fail to notify a local queuepair because we
	+ * can't allocate. We still want to release the entry
	+ * if that happens, so don't bail out yet.
	+ */
	+ }
	+ } else {
	+ result = vmci_queue_pair_detach_hypercall(handle);
	+ if (entry->hibernate_failure) {
	+ if (result == VMCI_ERROR_NOT_FOUND) {
	+
	+ /*
	+ * If a queue pair detach failed when entering
	+ * hibernation, the guest driver and the device
	+ * may disagree on its existence when coming
	+ * out of hibernation. The guest driver will
	+ * regard it as a non-local queue pair, but
	+ * the device state is gone, since the device
	+ * has been powered off. In this case, we
	+ * treat the queue pair as a local queue pair
	+ * with no peer.
	+ */
	+
	+ ASSERT(entry->qp.ref_count == 1);
	+ result = VMCI_SUCCESS;
	+ }
	+ }
	+ if (result < VMCI_SUCCESS) {
	+
	+ /*
	+ * We failed to notify a non-local queuepair. That other
	+ * queuepair might still be accessing the shared
	+ * memory, so don't release the entry yet. It will get
	+ * cleaned up by vmci_queue_pair_Exit() if necessary
	+ * (assuming we are going away, otherwise why did this
	+ * fail?).
	+ */
	+
	+ vmci_mutex_release(&qp_guest_endpoints.mutex);
	+ return (result);
	+ }
	+ }
	+
	+ /*
	+ * If we get here then we either failed to notify a local queuepair, or
	+ * we succeeded in all cases. Release the entry if required.
	+ */
	+
	+ entry->qp.ref_count--;
	+ if (entry->qp.ref_count == 0)
	+ queue_pair_list_remove_entry(&qp_guest_endpoints, &entry->qp);
	+
	+ /* If we didn't remove the entry, this could change once we unlock. */
	+ ref_count = entry ? entry->qp.ref_count :
	+ 0xffffffff; /*
	+ * Value does not matter, silence the
	+ * compiler.
	+ */
	+
	+ vmci_mutex_release(&qp_guest_endpoints.mutex);
	+
	+ if (ref_count == 0)
	+ qp_guest_endpoint_destroy(entry);
	+ return (result);
	+}
	+
	+/*
	+ *------------------------------------------------------------------------------
	+ *
	+ * queue_pair_notify_peer_local --
	+ *
	+ * Dispatches a queue pair event message directly into the local event
	+ * queue.
	+ *
	+ * Results:
	+ * VMCI_SUCCESS on success, error code otherwise
	+ *
	+ * Side effects:
	+ * None.
	+ *
	+ *------------------------------------------------------------------------------
	+ */
	+
	+static int
	+queue_pair_notify_peer_local(bool attach, struct vmci_handle handle)
	+{
	+ struct vmci_event_msg *e_msg;
	+ struct vmci_event_payload_qp *e_payload;
	+ /* buf is only 48 bytes. */
	+ vmci_id context_id;
	+ context_id = vmci_get_context_id();
	+ char buf[sizeof(e_msg) + sizeof(e_payload)];
	+
	+ e_msg = (struct vmci_event_msg *)buf;
	+ e_payload = vmci_event_msg_payload(e_msg);
	+
	+ e_msg->hdr.dst = VMCI_MAKE_HANDLE(context_id, VMCI_EVENT_HANDLER);
	+ e_msg->hdr.src = VMCI_MAKE_HANDLE(VMCI_HYPERVISOR_CONTEXT_ID,
	+ VMCI_CONTEXT_RESOURCE_ID);
	+ e_msg->hdr.payload_size = sizeof(e_msg) + sizeof(e_payload) -
	+ sizeof(e_msg->hdr);
	+ e_msg->event_data.event = attach ? VMCI_EVENT_QP_PEER_ATTACH :
	+ VMCI_EVENT_QP_PEER_DETACH;
	+ e_payload->peer_id = context_id;
	+ e_payload->handle = handle;
	+
	+ return (vmci_event_dispatch((struct vmci_datagram *)e_msg));
	+}
	Index: sys/dev/vmware/vmci/vmci_resource.h
	===================================================================
	--- /dev/null
	+++ sys/dev/vmware/vmci/vmci_resource.h
	@@ -0,0 +1,56 @@
	+/*-
	+ * Copyright (c) 2018 VMware, Inc. All Rights Reserved.
	+ *
	+ * SPDX-License-Identifier: (BSD-2-Clause AND GPL-2.0)
	+ */
	+
	+/* VMCI Resource Access Control API. */
	+
	+#ifndef _VMCI_RESOURCE_H_
	+#define _VMCI_RESOURCE_H_
	+
	+#include "vmci_defs.h"
	+#include "vmci_hashtable.h"
	+#include "vmci_kernel_if.h"
	+
	+#define RESOURCE_CONTAINER(ptr, type, member) \
	+ ((type )((char )(ptr) - offsetof(type, member)))
	+
	+typedef void(vmci_resource_free_cb)(void resource);
	+
	+typedef enum {
	+ VMCI_RESOURCE_TYPE_ANY,
	+ VMCI_RESOURCE_TYPE_API,
	+ VMCI_RESOURCE_TYPE_GROUP,
	+ VMCI_RESOURCE_TYPE_DATAGRAM,
	+ VMCI_RESOURCE_TYPE_DOORBELL,
	+} vmci_resource_type;
	+
	+struct vmci_resource {
	+ struct vmci_hash_entry hash_entry;
	+ vmci_resource_type type;
	+ /* Callback to free container object when refCount is 0. */
	+ vmci_resource_free_cb container_free_cb;
	+ /* Container object reference. */
	+ void *container_object;
	+};
	+
	+int vmci_resource_init(void);
	+void vmci_resource_exit(void);
	+void vmci_resource_sync(void);
	+
	+vmci_id vmci_resource_get_id(vmci_id context_id);
	+
	+int vmci_resource_add(struct vmci_resource *resource,
	+ vmci_resource_type resource_type,
	+ struct vmci_handle resource_handle,
	+ vmci_resource_free_cb container_free_cb, void *container_object);
	+void vmci_resource_remove(struct vmci_handle resource_handle,
	+ vmci_resource_type resource_type);
	+struct vmci_resource *vmci_resource_get(struct vmci_handle resource_handle,
	+ vmci_resource_type resource_type);
	+void vmci_resource_hold(struct vmci_resource *resource);
	+int vmci_resource_release(struct vmci_resource *resource);
	+struct vmci_handle vmci_resource_handle(struct vmci_resource *resource);
	+
	+#endif /* !_VMCI_RESOURCE_H_ */
	Index: sys/dev/vmware/vmci/vmci_resource.c
	===================================================================
	--- /dev/null
	+++ sys/dev/vmware/vmci/vmci_resource.c
	@@ -0,0 +1,395 @@
	+/*-
	+ * Copyright (c) 2018 VMware, Inc. All Rights Reserved.
	+ *
	+ * SPDX-License-Identifier: (BSD-2-Clause AND GPL-2.0)
	+ */
	+
	+/* Implementation of the VMCI Resource Access Control API. */
	+
	+#include "vmci_driver.h"
	+#include "vmci_kernel_defs.h"
	+#include "vmci_resource.h"
	+
	+#define LGPFX "vmci_resource: "
	+
	+/* 0 through VMCI_RESERVED_RESOURCE_ID_MAX are reserved. */
	+static uint32_t resource_id = VMCI_RESERVED_RESOURCE_ID_MAX + 1;
	+static vmci_lock resource_id_lock;
	+
	+static void vmci_resource_do_remove(struct vmci_resource *resource);
	+
	+static struct vmci_hashtable *resource_table = NULL;
	+
	+/* Public Resource Access Control API. */
	+
	+/*
	+ *------------------------------------------------------------------------------
	+ *
	+ * vmci_resource_init --
	+ *
	+ * Initializes the VMCI Resource Access Control API. Creates a hashtable to
	+ * hold all resources, and registers vectors and callbacks for hypercalls.
	+ *
	+ * Results:
	+ * None.
	+ *
	+ * Side effects:
	+ * None.
	+ *
	+ *------------------------------------------------------------------------------
	+ */
	+
	+int
	+vmci_resource_init(void)
	+{
	+ int err;
	+
	+ err = vmci_init_lock(&resource_id_lock, "VMCI RID lock");
	+ if (err < VMCI_SUCCESS)
	+ return (err);
	+
	+ resource_table = vmci_hashtable_create(128);
	+ if (resource_table == NULL) {
	+ VMCI_LOG_WARNING((LGPFX"Failed creating a resource hash table "
	+ "for VMCI.\n"));
	+ vmci_cleanup_lock(&resource_id_lock);
	+ return (VMCI_ERROR_NO_MEM);
	+ }
	+
	+ return (VMCI_SUCCESS);
	+}
	+
	+/*
	+ *------------------------------------------------------------------------------
	+ *
	+ * vmci_resource_exit --
	+ *
	+ * Cleans up resources.
	+ *
	+ * Results:
	+ * None.
	+ *
	+ * Side effects:
	+ * None.
	+ *
	+ *------------------------------------------------------------------------------
	+ */
	+
	+void
	+vmci_resource_exit(void)
	+{
	+
	+ /* Cleanup resources.*/
	+ vmci_cleanup_lock(&resource_id_lock);
	+
	+ if (resource_table)
	+ vmci_hashtable_destroy(resource_table);
	+}
	+
	+/*
	+ *------------------------------------------------------------------------------
	+ *
	+ * vmci_resource_get_id --
	+ *
	+ * Return resource ID. The first VMCI_RESERVED_RESOURCE_ID_MAX are reserved
	+ * so we start from its value + 1.
	+ *
	+ * Result:
	+ * VMCI resource id on success, VMCI_INVALID_ID on failure.
	+ *
	+ * Side effects:
	+ * None.
	+ *
	+ *
	+ *------------------------------------------------------------------------------
	+ */
	+
	+vmci_id
	+vmci_resource_get_id(vmci_id context_id)
	+{
	+ vmci_id current_rid;
	+ vmci_id old_rid;
	+ bool found_rid;
	+
	+ old_rid = resource_id;
	+ found_rid = false;
	+
	+ /*
	+ * Generate a unique resource ID. Keep on trying until we wrap around
	+ * in the RID space.
	+ */
	+ ASSERT(old_rid > VMCI_RESERVED_RESOURCE_ID_MAX);
	+
	+ do {
	+ struct vmci_handle handle;
	+
	+ vmci_grab_lock(&resource_id_lock);
	+ current_rid = resource_id;
	+ handle = VMCI_MAKE_HANDLE(context_id, current_rid);
	+ resource_id++;
	+ if (UNLIKELY(resource_id == VMCI_INVALID_ID)) {
	+ /* Skip the reserved rids. */
	+ resource_id = VMCI_RESERVED_RESOURCE_ID_MAX + 1;
	+ }
	+ vmci_release_lock(&resource_id_lock);
	+ found_rid = !vmci_hashtable_entry_exists(resource_table,
	+ handle);
	+ } while (!found_rid && resource_id != old_rid);
	+
	+ if (UNLIKELY(!found_rid))
	+ return (VMCI_INVALID_ID);
	+ else
	+ return (current_rid);
	+}
	+
	+/*
	+ *------------------------------------------------------------------------------
	+ *
	+ * vmci_resource_add --
	+ *
	+ * Add resource to hashtable.
	+ *
	+ * Results:
	+ * VMCI_SUCCESS if successful, error code if not.
	+ *
	+ * Side effects:
	+ * None.
	+ *
	+ *------------------------------------------------------------------------------
	+ */
	+
	+int
	+vmci_resource_add(struct vmci_resource *resource,
	+ vmci_resource_type resource_type, struct vmci_handle resource_handle,
	+ vmci_resource_free_cb container_free_cb, void *container_object)
	+{
	+ int result;
	+
	+ ASSERT(resource);
	+
	+ if (VMCI_HANDLE_EQUAL(resource_handle, VMCI_INVALID_HANDLE)) {
	+ VMCI_LOG_DEBUG(LGPFX"Invalid argument resource "
	+ "(handle=0x%x:0x%x).\n", resource_handle.context,
	+ resource_handle.resource);
	+ return (VMCI_ERROR_INVALID_ARGS);
	+ }
	+
	+ vmci_hashtable_init_entry(&resource->hash_entry, resource_handle);
	+ resource->type = resource_type;
	+ resource->container_free_cb = container_free_cb;
	+ resource->container_object = container_object;
	+
	+ /* Add resource to hashtable. */
	+ result = vmci_hashtable_add_entry(resource_table,
	+ &resource->hash_entry);
	+ if (result != VMCI_SUCCESS) {
	+ VMCI_LOG_DEBUG(LGPFX"Failed to add entry to hash table "
	+ "(result=%d).\n", result);
	+ return (result);
	+ }
	+
	+ return (result);
	+}
	+
	+/*
	+ *------------------------------------------------------------------------------
	+ *
	+ * vmci_resource_remove --
	+ *
	+ * Remove resource from hashtable.
	+ *
	+ * Results:
	+ * None.
	+ *
	+ * Side effects:
	+ * None.
	+ *
	+ *------------------------------------------------------------------------------
	+ */
	+
	+void
	+vmci_resource_remove(struct vmci_handle resource_handle,
	+ vmci_resource_type resource_type)
	+{
	+ struct vmci_resource *resource;
	+
	+ resource = vmci_resource_get(resource_handle, resource_type);
	+ if (resource == NULL)
	+ return;
	+
	+ /* Remove resource from hashtable. */
	+ vmci_hashtable_remove_entry(resource_table, &resource->hash_entry);
	+
	+ vmci_resource_release(resource);
	+ /* resource could be freed by now. */
	+}
	+
	+/*
	+ *------------------------------------------------------------------------------
	+ *
	+ * vmci_resource_get --
	+ *
	+ * Get resource from hashtable.
	+ *
	+ * Results:
	+ * Resource if successful. Otherwise NULL.
	+ *
	+ * Side effects:
	+ * None.
	+ *
	+ *------------------------------------------------------------------------------
	+ */
	+
	+struct vmci_resource *
	+vmci_resource_get(struct vmci_handle resource_handle,
	+ vmci_resource_type resource_type)
	+{
	+ struct vmci_hash_entry *entry;
	+ struct vmci_resource *resource;
	+
	+ entry = vmci_hashtable_get_entry(resource_table, resource_handle);
	+ if (entry == NULL)
	+ return (NULL);
	+ resource = RESOURCE_CONTAINER(entry, struct vmci_resource, hash_entry);
	+ if (resource_type == VMCI_RESOURCE_TYPE_ANY \|\|
	+ resource->type == resource_type) {
	+ return (resource);
	+ }
	+ vmci_hashtable_release_entry(resource_table, entry);
	+ return (NULL);
	+}
	+
	+/*
	+ *------------------------------------------------------------------------------
	+ *
	+ * vmci_resource_hold --
	+ *
	+ * Hold the given resource. This will hold the hashtable entry. This is like
	+ * doing a Get() but without having to lookup the resource by handle.
	+ *
	+ * Results:
	+ * None.
	+ *
	+ * Side effects:
	+ * None.
	+ *
	+ *------------------------------------------------------------------------------
	+ */
	+
	+void
	+vmci_resource_hold(struct vmci_resource *resource)
	+{
	+
	+ ASSERT(resource);
	+ vmci_hashtable_hold_entry(resource_table, &resource->hash_entry);
	+}
	+
	+/*
	+ *------------------------------------------------------------------------------
	+ *
	+ * vmci_resource_do_remove --
	+ *
	+ * Deallocates data structures associated with the given resource and
	+ * invoke any call back registered for the resource.
	+ *
	+ * Results:
	+ * None.
	+ *
	+ * Side effects:
	+ * May deallocate memory and invoke a callback for the removed resource.
	+ *
	+ *------------------------------------------------------------------------------
	+ */
	+
	+static void inline
	+vmci_resource_do_remove(struct vmci_resource *resource)
	+{
	+
	+ ASSERT(resource);
	+
	+ if (resource->container_free_cb) {
	+ resource->container_free_cb(resource->container_object);
	+ /* Resource has been freed don't dereference it. */
	+ }
	+}
	+
	+/*
	+ *------------------------------------------------------------------------------
	+ *
	+ * vmci_resource_release --
	+ *
	+ * Results:
	+ * None.
	+ *
	+ * Side effects:
	+ * Resource's containerFreeCB will get called if last reference.
	+ *
	+ *------------------------------------------------------------------------------
	+ */
	+
	+int
	+vmci_resource_release(struct vmci_resource *resource)
	+{
	+ int result;
	+
	+ ASSERT(resource);
	+
	+ result = vmci_hashtable_release_entry(resource_table,
	+ &resource->hash_entry);
	+ if (result == VMCI_SUCCESS_ENTRY_DEAD)
	+ vmci_resource_do_remove(resource);
	+
	+ /*
	+ * We propagate the information back to caller in case it wants to know
	+ * whether entry was freed.
	+ */
	+ return (result);
	+}
	+
	+/*
	+ *------------------------------------------------------------------------------
	+ *
	+ * vmci_resource_handle --
	+ *
	+ * Get the handle for the given resource.
	+ *
	+ * Results:
	+ * The resource's associated handle.
	+ *
	+ * Side effects:
	+ * None.
	+ *
	+ *------------------------------------------------------------------------------
	+ */
	+
	+struct vmci_handle
	+vmci_resource_handle(struct vmci_resource *resource)
	+{
	+
	+ ASSERT(resource);
	+ return (resource->hash_entry.handle);
	+}
	+
	+/*
	+ *------------------------------------------------------------------------------
	+ *
	+ * vmci_resource_sync --
	+ *
	+ * Use this as a synchronization point when setting globals, for example,
	+ * during device shutdown.
	+ *
	+ * Results:
	+ * None.
	+ *
	+ * Side effects:
	+ * None.
	+ *
	+ *------------------------------------------------------------------------------
	+ */
	+
	+void
	+vmci_resource_sync(void)
	+{
	+
	+ vmci_hashtable_sync(resource_table);
	+}
	Index: sys/dev/vmware/vmci/vmci_utils.h
	===================================================================
	--- /dev/null
	+++ sys/dev/vmware/vmci/vmci_utils.h
	@@ -0,0 +1,41 @@
	+/*-
	+ * Copyright (c) 2018 VMware, Inc. All Rights Reserved.
	+ *
	+ * SPDX-License-Identifier: (BSD-2-Clause AND GPL-2.0)
	+ */
	+
	+/* Some common utilities used by the VMCI kernel module. */
	+
	+#ifndef _VMCI_UTILS_H_
	+#define _VMCI_UTILS_H_
	+
	+/*
	+ *------------------------------------------------------------------------------
	+ *
	+ * vmci_hash_id --
	+ *
	+ * Hash function used by the Simple Datagram API. Hashes only a VMCI ID (not
	+ * the full VMCI handle). Based on the djb2 hash function by Dan Bernstein.
	+ *
	+ * Result:
	+ * Returns guest call size.
	+ *
	+ * Side effects:
	+ * None.
	+ *
	+ *------------------------------------------------------------------------------
	+ */
	+
	+static inline int
	+vmci_hash_id(vmci_id id, unsigned size)
	+{
	+ unsigned i;
	+ int hash = 5381;
	+
	+ for (i = 0; i < sizeof(id); i++)
	+ hash = ((hash << 5) + hash) + (uint8_t)(id >> (i * 8));
	+
	+ return (hash & (size - 1));
	+}
	+
	+#endif /* !_VMCI_UTILS_H_ */
	Index: sys/modules/vmware/Makefile
	===================================================================
	--- sys/modules/vmware/Makefile
	+++ sys/modules/vmware/Makefile
	@@ -23,6 +23,6 @@
	# SUCH DAMAGE.
	#

	-SUBDIR= vmxnet3
	+SUBDIR= vmci vmxnet3

	.include <bsd.subdir.mk>
	Index: sys/modules/vmware/vmci/Makefile
	===================================================================
	--- /dev/null
	+++ sys/modules/vmware/vmci/Makefile
	@@ -0,0 +1,15 @@
	+#
	+# Copyright (c) 2018 VMware, Inc. All Rights Reserved.
	+#
	+# SPDX-License-Identifier: (BSD-2-Clause AND GPL-2.0)
	+#
	+
	+.PATH: ${.CURDIR}/../../../dev/vmware/vmci
	+
	+KMOD= vmci
	+SRCS= vmci.c vmci_datagram.c vmci_doorbell.c vmci_driver.c vmci_event.c
	+SRCS+= vmci_hashtable.c vmci_kernel_if.c vmci_qpair.c vmci_queue_pair.c
	+SRCS+= vmci_resource.c
	+SRCS+= device_if.h bus_if.h pci_if.h
	+
	+.include <bsd.kmod.mk>

File Metadata

Mime Type: text/plain
Expires: Fri, Feb 7, 9:54 PM (17 h, 51 m)
Storage Engine: blob
Storage Format: Raw Data
Storage Handle: 16517142
Default Alt Text: D14289.id39118.diff (253 KB)

D14289.id39118.diffNo OneTemporaryActions

D14289.id39118.diffView Options

File Metadata

Event Timeline

D14289.id39118.diff
No OneTemporary
Actions

D14289.id39118.diff
View Options