Index: share/man/man4/vmci.4 =================================================================== --- /dev/null +++ share/man/man4/vmci.4 @@ -0,0 +1,66 @@ +.Dd February 10, 2018 +.Dt VMCI 4 +.Os +.Sh NAME +.Nm vmci +.Nd VMware Virtual Machine Communication Interface +.Sh SYNOPSIS +To compile this driver into the kernel, +place the following line in your +kernel configuration file: +.Bd -ragged -offset indent +.Cd "device vmci" +.Ed +.Pp +Alternatively, to load the driver as a +module at boot time, place the following line in +.Xr loader.conf 5 : +.Bd -literal -offset indent +if_vmci_load="YES" +.Ed +.Sh DESCRIPTION +The +.Nm +driver provides support for the VMware Virtual Machine Communication Interface +(VMCI) in virtual machines by VMware. +.Pp +VMCI allows virtual machines to communicate with host kernel modules and the +VMware hypervisors. +User level applications in a virtual machine can use VMCI through vSockets +(also known as VMCI Sockets and not included in this kernel module), a socket +address family designed to be compatible with UDP and TCP at the interface +level. +Today, VMCI and vSockets are used by various VMware Tools components inside +the guest for zero-config, network-less access to VMware host services. +In addition to this, VMware's users are using vSockets for various +applications, where network access of the virtual machine is restricted +or non-existent. +Examples of this are VMs communicating with device proxies for proprietary +hardware running as host applications and automated testing of applications +running within virtual machines. +.Pp +In a virtual machine, VMCI is exposed as a regular PCI device. +The primary communication mechanisms supported are a point-to-point +bidirectional transport based on a pair of memory-mapped queues, and +asynchronous notifications in the form of datagrams and doorbells. +These features are available to kernel level components such as vSockets +through the VMCI kernel API. +In addition to this, the VMCI kernel API provides support for receiving +events related to the state of the VMCI communication channels, and the +virtual machine itself. +.Sh SEE ALSO +.Xr pci 9 , +.Xr socket 2 +.Rs +.%T "VMware vSockets Documentation" +.%U https://www.vmware.com/support/developer/vmci-sdk/ +.Sh HISTORY +The +.Nm +driver first appeared in +.Fx 12.0 . +.Sh AUTHORS +The +.Nm +driver and man page were written by +.An Vishnu Dasa Aq Mt vdasahar@gmail.com . Index: sys/conf/files.amd64 =================================================================== --- sys/conf/files.amd64 +++ sys/conf/files.amd64 @@ -472,6 +472,16 @@ dev/uart/uart_cpu_x86.c optional uart dev/viawd/viawd.c optional viawd dev/vmware/vmxnet3/if_vmx.c optional vmx +dev/vmware/vmci/vmci.c optional vmci +dev/vmware/vmci/vmci_datagram.c optional vmci +dev/vmware/vmci/vmci_doorbell.c optional vmci +dev/vmware/vmci/vmci_driver.c optional vmci +dev/vmware/vmci/vmci_event.c optional vmci +dev/vmware/vmci/vmci_hashtable.c optional vmci +dev/vmware/vmci/vmci_kernel_if.c optional vmci +dev/vmware/vmci/vmci_qpair.c optional vmci +dev/vmware/vmci/vmci_queue_pair.c optional vmci +dev/vmware/vmci/vmci_resource.c optional vmci dev/wbwd/wbwd.c optional wbwd dev/xen/pci/xen_acpi_pci.c optional xenhvm dev/xen/pci/xen_pci.c optional xenhvm Index: sys/conf/files.i386 =================================================================== --- sys/conf/files.i386 +++ sys/conf/files.i386 @@ -323,6 +323,16 @@ dev/uart/uart_cpu_x86.c optional uart dev/viawd/viawd.c optional viawd dev/vmware/vmxnet3/if_vmx.c optional vmx +dev/vmware/vmci/vmci.c optional vmci +dev/vmware/vmci/vmci_datagram.c optional vmci +dev/vmware/vmci/vmci_doorbell.c optional vmci +dev/vmware/vmci/vmci_driver.c optional vmci +dev/vmware/vmci/vmci_event.c optional vmci +dev/vmware/vmci/vmci_hashtable.c optional vmci +dev/vmware/vmci/vmci_kernel_if.c optional vmci +dev/vmware/vmci/vmci_qpair.c optional vmci +dev/vmware/vmci/vmci_queue_pair.c optional vmci +dev/vmware/vmci/vmci_resource.c optional vmci dev/acpica/acpi_if.m standard dev/acpica/acpi_hpet.c optional acpi dev/acpica/acpi_timer.c optional acpi Index: sys/dev/vmware/vmci/vmci.h =================================================================== --- /dev/null +++ sys/dev/vmware/vmci/vmci.h @@ -0,0 +1,77 @@ +/*- + * Copyright (c) 2018 VMware, Inc. All Rights Reserved. + * + * SPDX-License-Identifier: (BSD-2-Clause AND GPL-2.0) + */ + +/* Driver for VMware Virtual Machine Communication Interface (VMCI) device. */ + +#ifndef _VMCI_H_ +#define _VMCI_H_ + +#include +#include +#include +#include +#include + +#include + +#include "vmci_datagram.h" +#include "vmci_kernel_if.h" + +/* VMCI device vendor and device ID */ +#define VMCI_VMWARE_VENDOR_ID 0x15AD +#define VMCI_VMWARE_DEVICE_ID 0x0740 + +#define VMCI_VERSION 1 + +struct vmci_dma_alloc { + bus_dma_tag_t dma_tag; + caddr_t dma_vaddr; + bus_addr_t dma_paddr; + bus_dmamap_t dma_map; + bus_size_t dma_size; +}; + +struct vmci_interrupt { + struct resource *vmci_irq; + int vmci_rid; + void *vmci_handler; +}; + +struct vmci_softc { + device_t vmci_dev; + + struct mtx vmci_spinlock; + + struct resource *vmci_res0; + bus_space_tag_t vmci_iot0; + bus_space_handle_t vmci_ioh0; + unsigned int vmci_ioaddr; + struct resource *vmci_res1; + bus_space_tag_t vmci_iot1; + bus_space_handle_t vmci_ioh1; + + struct vmci_dma_alloc vmci_notifications_bitmap; + + int vmci_num_intr; + vmci_intr_type vmci_intr_type; + struct vmci_interrupt vmci_intrs[VMCI_MAX_INTRS]; + struct task vmci_interrupt_dq_task; + struct task vmci_interrupt_bm_task; + + struct task vmci_delayed_work_task; + struct mtx vmci_delayed_work_lock; + vmci_list(vmci_delayed_work_info) vmci_delayed_work_infos; + + unsigned int capabilities; +}; + +int vmci_dma_malloc(bus_size_t size, bus_size_t align, + struct vmci_dma_alloc *dma); +void vmci_dma_free(struct vmci_dma_alloc *); +int vmci_send_datagram(struct vmci_datagram *dg); +int vmci_schedule_delayed_work_fn(vmci_work_fn *work_fn, void *data); + +#endif /* !_VMCI_H_ */ Index: sys/dev/vmware/vmci/vmci.c =================================================================== --- /dev/null +++ sys/dev/vmware/vmci/vmci.c @@ -0,0 +1,1174 @@ +/*- + * Copyright (c) 2018 VMware, Inc. All Rights Reserved. + * + * SPDX-License-Identifier: (BSD-2-Clause AND GPL-2.0) + */ + +/* Driver for VMware Virtual Machine Communication Interface (VMCI) device. */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include + +#include "vmci.h" +#include "vmci_doorbell.h" +#include "vmci_driver.h" +#include "vmci_kernel_defs.h" +#include "vmci_queue_pair.h" + +static int vmci_probe(device_t); +static int vmci_attach(device_t); +static int vmci_detach(device_t); +static int vmci_shutdown(device_t); + +static int vmci_map_bars(struct vmci_softc *); +static void vmci_unmap_bars(struct vmci_softc *); + +static int vmci_config_capabilities(struct vmci_softc *); + +static int vmci_dma_malloc_int(struct vmci_softc *, bus_size_t, + bus_size_t, struct vmci_dma_alloc *); +static void vmci_dma_free_int(struct vmci_softc *, + struct vmci_dma_alloc *); + +static int vmci_config_interrupts(struct vmci_softc *); +static int vmci_config_interrupt(struct vmci_softc *); +static int vmci_check_intr_cnt(struct vmci_softc *); +static int vmci_allocate_interrupt_resources(struct vmci_softc *); +static int vmci_setup_interrupts(struct vmci_softc *); +static void vmci_dismantle_interrupts(struct vmci_softc *); +static void vmci_interrupt(void *); +static void vmci_interrupt_bm(void *); +static void dispatch_datagrams(void *, int); +static void process_bitmap(void *, int); + +static void vmci_delayed_work_fn_cb(void *context, int data); + +static device_method_t vmci_methods[] = { + /* Device interface. */ + DEVMETHOD(device_probe, vmci_probe), + DEVMETHOD(device_attach, vmci_attach), + DEVMETHOD(device_detach, vmci_detach), + DEVMETHOD(device_shutdown, vmci_shutdown), + + DEVMETHOD_END +}; + +static driver_t vmci_driver = { + "vmci", vmci_methods, sizeof(struct vmci_softc) +}; + +static devclass_t vmci_devclass; +DRIVER_MODULE(vmci, pci, vmci_driver, vmci_devclass, 0, 0); +MODULE_VERSION(vmci, VMCI_VERSION); + +MODULE_DEPEND(vmci, pci, 1, 1, 1); + +static struct vmci_softc *vmci_sc; + +#define LGPFX "vmci: " +/* + * Allocate a buffer for incoming datagrams globally to avoid repeated + * allocation in the interrupt handler's atomic context. + */ +static uint8_t *data_buffer = NULL; +static uint32_t data_buffer_size = VMCI_MAX_DG_SIZE; + +struct vmci_delayed_work_info { + vmci_work_fn *work_fn; + void *data; + vmci_list_item(vmci_delayed_work_info) entry; +}; + +/* + *------------------------------------------------------------------------------ + * + * vmci_probe -- + * + * Probe to see if the VMCI device is present. + * + * Results: + * BUS_PROBE_DEFAULT if device exists, ENXIO otherwise. + * + * Side effects: + * None. + * + *------------------------------------------------------------------------------ + */ + +static int +vmci_probe(device_t dev) +{ + + if (pci_get_vendor(dev) == VMCI_VMWARE_VENDOR_ID && + pci_get_device(dev) == VMCI_VMWARE_DEVICE_ID) { + device_set_desc(dev, + "VMware Virtual Machine Communication Interface"); + + return (BUS_PROBE_DEFAULT); + } + + return (ENXIO); +} + +/* + *------------------------------------------------------------------------------ + * + * vmci_attach -- + * + * Attach VMCI device to the system after vmci_probe() has been called and + * the device has been detected. + * + * Results: + * 0 if success, ENXIO otherwise. + * + * Side effects: + * None. + * + *------------------------------------------------------------------------------ + */ + +static int +vmci_attach(device_t dev) +{ + struct vmci_softc *sc; + int error, i; + + sc = device_get_softc(dev); + sc->vmci_dev = dev; + vmci_sc = sc; + + data_buffer = NULL; + sc->vmci_num_intr = 0; + for (i = 0; i < VMCI_MAX_INTRS; i++) { + sc->vmci_intrs[i].vmci_irq = NULL; + sc->vmci_intrs[i].vmci_handler = NULL; + } + + TASK_INIT(&sc->vmci_interrupt_dq_task, 0, dispatch_datagrams, sc); + TASK_INIT(&sc->vmci_interrupt_bm_task, 0, process_bitmap, sc); + + TASK_INIT(&sc->vmci_delayed_work_task, 0, vmci_delayed_work_fn_cb, sc); + + pci_enable_busmaster(dev); + + mtx_init(&sc->vmci_spinlock, "VMCI Spinlock", NULL, MTX_SPIN); + mtx_init(&sc->vmci_delayed_work_lock, "VMCI Delayed Work Lock", + NULL, MTX_DEF); + + error = vmci_map_bars(sc); + if (error) { + VMCI_LOG_ERROR(LGPFX"Failed to map PCI BARs.\n"); + goto fail; + } + + error = vmci_config_capabilities(sc); + if (error) { + VMCI_LOG_ERROR(LGPFX"Failed to configure capabilities.\n"); + goto fail; + } + + vmci_list_init(&sc->vmci_delayed_work_infos); + + vmci_components_init(); + vmci_util_init(); + error = vmci_qp_guest_endpoints_init(); + if (error) { + VMCI_LOG_ERROR(LGPFX"vmci_qp_guest_endpoints_init failed.\n"); + goto fail; + } + + error = vmci_config_interrupts(sc); + if (error) + VMCI_LOG_ERROR(LGPFX"Failed to enable interrupts.\n"); + +fail: + if (error) { + vmci_detach(dev); + return (ENXIO); + } + + return (0); +} + +/* + *------------------------------------------------------------------------------ + * + * vmci_detach -- + * + * Detach the VMCI device. + * + * Results: + * 0 + * + * Side effects: + * None. + * + *------------------------------------------------------------------------------ + */ + +static int +vmci_detach(device_t dev) +{ + struct vmci_softc *sc; + + sc = device_get_softc(dev); + + vmci_qp_guest_endpoints_exit(); + vmci_util_exit(); + + vmci_dismantle_interrupts(sc); + + vmci_components_cleanup(); + + taskqueue_drain(taskqueue_thread, &sc->vmci_delayed_work_task); + mtx_destroy(&sc->vmci_delayed_work_lock); + + if (sc->vmci_res0 != NULL) + bus_space_write_4(sc->vmci_iot0, sc->vmci_ioh0, + VMCI_CONTROL_ADDR, VMCI_CONTROL_RESET); + + if (sc->vmci_notifications_bitmap.dma_vaddr != NULL) + vmci_dma_free(&sc->vmci_notifications_bitmap); + + vmci_unmap_bars(sc); + + mtx_destroy(&sc->vmci_spinlock); + + pci_disable_busmaster(dev); + + return (0); +} + +/* + *------------------------------------------------------------------------------ + * + * vmci_shutdown -- + * + * This function is called during system shutdown. We don't do anything. + * + * Results: + * 0 + * + * Side effects: + * None. + * + *------------------------------------------------------------------------------ + */ + +static int +vmci_shutdown(device_t dev) +{ + + return (0); +} + +/* + *------------------------------------------------------------------------------ + * + * vmci_map_bars -- + * + * Maps the PCI I/O and MMIO BARs. + * + * Results: + * 0 on success, ENXIO otherwise. + * + * Side effects: + * None. + * + *------------------------------------------------------------------------------ + */ + +static int +vmci_map_bars(struct vmci_softc *sc) +{ + int rid; + + /* Map the PCI I/O BAR: BAR0 */ + rid = PCIR_BAR(0); + sc->vmci_res0 = bus_alloc_resource_any(sc->vmci_dev, SYS_RES_IOPORT, + &rid, RF_ACTIVE); + if (sc->vmci_res0 == NULL) { + VMCI_LOG_ERROR(LGPFX"Could not map: BAR0\n"); + return (ENXIO); + } + + sc->vmci_iot0 = rman_get_bustag(sc->vmci_res0); + sc->vmci_ioh0 = rman_get_bushandle(sc->vmci_res0); + sc->vmci_ioaddr = rman_get_start(sc->vmci_res0); + + /* Map the PCI MMIO BAR: BAR1 */ + rid = PCIR_BAR(1); + sc->vmci_res1 = bus_alloc_resource_any(sc->vmci_dev, SYS_RES_MEMORY, + &rid, RF_ACTIVE); + if (sc->vmci_res1 == NULL) { + VMCI_LOG_ERROR(LGPFX"Could not map: BAR1\n"); + return (ENXIO); + } + + sc->vmci_iot1 = rman_get_bustag(sc->vmci_res1); + sc->vmci_ioh1 = rman_get_bushandle(sc->vmci_res1); + + return (0); +} + +/* + *------------------------------------------------------------------------------ + * + * vmci_unmap_bars -- + * + * Unmaps the VMCI PCI I/O and MMIO BARs. + * + * Results: + * None. + * + * Side effects: + * None. + * + *------------------------------------------------------------------------------ + */ + +static void +vmci_unmap_bars(struct vmci_softc *sc) +{ + int rid; + + if (sc->vmci_res0 != NULL) { + rid = PCIR_BAR(0); + bus_release_resource(sc->vmci_dev, SYS_RES_IOPORT, rid, + sc->vmci_res0); + sc->vmci_res0 = NULL; + } + + if (sc->vmci_res1 != NULL) { + rid = PCIR_BAR(1); + bus_release_resource(sc->vmci_dev, SYS_RES_MEMORY, rid, + sc->vmci_res1); + sc->vmci_res1 = NULL; + } +} + +/* + *------------------------------------------------------------------------------ + * + * vmci_config_capabilities -- + * + * Check the VMCI device capabilities and configure the device accordingly. + * + * Results: + * 0 if success, ENODEV otherwise. + * + * Side effects: + * Device capabilities are enabled. + * + *------------------------------------------------------------------------------ + */ + +static int +vmci_config_capabilities(struct vmci_softc *sc) +{ + unsigned long bitmap_PPN; + int error; + + /* + * Verify that the VMCI device supports the capabilities that we + * need. Datagrams are necessary and notifications will be used + * if the device supports it. + */ + sc->capabilities = bus_space_read_4(sc->vmci_iot0, sc->vmci_ioh0, + VMCI_CAPS_ADDR); + + if ((sc->capabilities & VMCI_CAPS_DATAGRAM) == 0) { + VMCI_LOG_ERROR(LGPFX"VMCI device does not support " + "datagrams.\n"); + return (ENODEV); + } + + if (sc->capabilities & VMCI_CAPS_NOTIFICATIONS) { + sc->capabilities = VMCI_CAPS_DATAGRAM; + error = vmci_dma_malloc(PAGE_SIZE, 1, + &sc->vmci_notifications_bitmap); + if (error) + VMCI_LOG_ERROR(LGPFX"Failed to alloc memory for " + "notification bitmap.\n"); + else { + memset(sc->vmci_notifications_bitmap.dma_vaddr, 0, + PAGE_SIZE); + sc->capabilities |= VMCI_CAPS_NOTIFICATIONS; + } + } else + sc->capabilities = VMCI_CAPS_DATAGRAM; + + /* Let the host know which capabilities we intend to use. */ + bus_space_write_4(sc->vmci_iot0, sc->vmci_ioh0, + VMCI_CAPS_ADDR, sc->capabilities); + + /* + * Register notification bitmap with device if that capability is + * used. + */ + if (sc->capabilities & VMCI_CAPS_NOTIFICATIONS) { + bitmap_PPN = + sc->vmci_notifications_bitmap.dma_paddr >> PAGE_SHIFT; + vmci_register_notification_bitmap(bitmap_PPN); + } + + /* Check host capabilities. */ + if (!vmci_check_host_capabilities()) + return (ENODEV); + + return (0); +} + +/* + *------------------------------------------------------------------------------ + * + * vmci_dmamap_cb -- + * + * Callback to receive mapping information resulting from the load of a + * bus_dmamap_t via bus_dmamap_load() + * + * Results: + * None. + * + * Side effects: + * None. + * + *------------------------------------------------------------------------------ + */ + +static void +vmci_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error) +{ + bus_addr_t *baddr = arg; + + if (error == 0) + *baddr = segs->ds_addr; +} + +/* + *------------------------------------------------------------------------------ + * + * vmci_dma_malloc_int -- + * + * Internal function that allocates DMA memory. + * + * Results: + * 0 if success. + * ENOMEM if insufficient memory. + * EINPROGRESS if mapping is deferred. + * EINVAL if the request was invalid. + * + * Side effects: + * DMA memory is allocated. + * + *------------------------------------------------------------------------------ + */ + +static int +vmci_dma_malloc_int(struct vmci_softc *sc, bus_size_t size, bus_size_t align, + struct vmci_dma_alloc *dma) +{ + int error; + + bzero(dma, sizeof(struct vmci_dma_alloc)); + + error = bus_dma_tag_create(bus_get_dma_tag(vmci_sc->vmci_dev), + align, 0, /* alignment, bounds */ + BUS_SPACE_MAXADDR, /* lowaddr */ + BUS_SPACE_MAXADDR, /* highaddr */ + NULL, NULL, /* filter, filterarg */ + size, /* maxsize */ + 1, /* nsegments */ + size, /* maxsegsize */ + BUS_DMA_ALLOCNOW, /* flags */ + NULL, /* lockfunc */ + NULL, /* lockfuncarg */ + &dma->dma_tag); + if (error) { + VMCI_LOG_ERROR(LGPFX"bus_dma_tag_create failed: %d\n", error); + goto fail; + } + + error = bus_dmamem_alloc(dma->dma_tag, (void **)&dma->dma_vaddr, + BUS_DMA_ZERO | BUS_DMA_NOWAIT, &dma->dma_map); + if (error) { + VMCI_LOG_ERROR(LGPFX"bus_dmamem_alloc failed: %d\n", error); + goto fail; + } + + error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr, + size, vmci_dmamap_cb, &dma->dma_paddr, BUS_DMA_NOWAIT); + if (error) { + VMCI_LOG_ERROR(LGPFX"bus_dmamap_load failed: %d\n", error); + goto fail; + } + + dma->dma_size = size; + +fail: + if (error) + vmci_dma_free(dma); + + return (error); +} + +/* + *------------------------------------------------------------------------------ + * + * vmci_dma_malloc -- + * + * This function is a wrapper around vmci_dma_malloc_int for callers + * outside of this module. Since we only support a single VMCI device, this + * wrapper provides access to the device softc structure. + * + * Results: + * 0 if success. + * ENOMEM if insufficient memory. + * EINPROGRESS if mapping is deferred. + * EINVAL if the request was invalid. + * + * Side effects: + * DMA memory is allocated. + * + *------------------------------------------------------------------------------ + */ + +int +vmci_dma_malloc(bus_size_t size, bus_size_t align, struct vmci_dma_alloc *dma) +{ + + return (vmci_dma_malloc_int(vmci_sc, size, align, dma)); +} + +/* + *------------------------------------------------------------------------------ + * + * vmci_dma_free_int -- + * + * Internal function that frees DMA memory. + * + * Results: + * None. + * + * Side effects: + * Frees DMA memory. + * + *------------------------------------------------------------------------------ + */ + +static void +vmci_dma_free_int(struct vmci_softc *sc, struct vmci_dma_alloc *dma) +{ + + if (dma->dma_tag != NULL) { + if (dma->dma_paddr != 0) { + bus_dmamap_sync(dma->dma_tag, dma->dma_map, + BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); + bus_dmamap_unload(dma->dma_tag, dma->dma_map); + } + + if (dma->dma_vaddr != NULL) + bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, + dma->dma_map); + + bus_dma_tag_destroy(dma->dma_tag); + } + bzero(dma, sizeof(struct vmci_dma_alloc)); +} + +/* + *------------------------------------------------------------------------------ + * + * vmci_dma_free -- + * + * This function is a wrapper around vmci_dma_free_int for callers outside + * of this module. Since we only support a single VMCI device, this wrapper + * provides access to the device softc structure. + * + * Results: + * None. + * + * Side effects: + * Frees DMA memory. + * + *------------------------------------------------------------------------------ + */ + +void +vmci_dma_free(struct vmci_dma_alloc *dma) +{ + + vmci_dma_free_int(vmci_sc, dma); +} + +/* + *------------------------------------------------------------------------------ + * + * vmci_config_interrupts -- + * + * Configures and enables interrupts. Try to configure MSI-X. If this fails, + * try to configure MSI. If even this fails, try legacy interrupts. + * + * Results: + * 0 if success. + * ENOMEM if insufficient memory. + * ENODEV if the device doesn't support interrupts. + * ENXIO if the device configuration failed. + * + * Side effects: + * Interrupts get enabled if successful. + * + *------------------------------------------------------------------------------ + */ + +static int +vmci_config_interrupts(struct vmci_softc *sc) +{ + int error; + + data_buffer = malloc(data_buffer_size, M_DEVBUF, M_ZERO | M_NOWAIT); + if (data_buffer == NULL) + return (ENOMEM); + + sc->vmci_intr_type = VMCI_INTR_TYPE_MSIX; + error = vmci_config_interrupt(sc); + if (error) { + sc->vmci_intr_type = VMCI_INTR_TYPE_MSI; + error = vmci_config_interrupt(sc); + } + if (error) { + sc->vmci_intr_type = VMCI_INTR_TYPE_INTX; + error = vmci_config_interrupt(sc); + } + if (error) + return (error); + + /* Enable specific interrupt bits. */ + if (sc->capabilities & VMCI_CAPS_NOTIFICATIONS) + bus_space_write_4(sc->vmci_iot0, sc->vmci_ioh0, + VMCI_IMR_ADDR, VMCI_IMR_DATAGRAM | VMCI_IMR_NOTIFICATION); + else + bus_space_write_4(sc->vmci_iot0, sc->vmci_ioh0, + VMCI_IMR_ADDR, VMCI_IMR_DATAGRAM); + + /* Enable interrupts. */ + bus_space_write_4(sc->vmci_iot0, sc->vmci_ioh0, + VMCI_CONTROL_ADDR, VMCI_CONTROL_INT_ENABLE); + + return (0); +} + +/* + *------------------------------------------------------------------------------ + * + * vmci_config_interrupt -- + * + * Check the number of interrupts supported, allocate resources and setup + * interrupts. + * + * Results: + * 0 if success. + * ENOMEM if insufficient memory. + * ENODEV if the device doesn't support interrupts. + * ENXIO if the device configuration failed. + * + * Side effects: + * Resources get allocated and interrupts get setup (but not enabled) if + * successful. + * + *------------------------------------------------------------------------------ + */ + +static int +vmci_config_interrupt(struct vmci_softc *sc) +{ + int error; + + error = vmci_check_intr_cnt(sc); + if (error) + return (error); + + error = vmci_allocate_interrupt_resources(sc); + if (error) + return (error); + + error = vmci_setup_interrupts(sc); + if (error) + return (error); + + return (0); +} + +/* + *------------------------------------------------------------------------------ + * + * vmci_check_intr_cnt -- + * + * Check the number of interrupts supported by the device and ask PCI bus + * to allocate appropriate number of interrupts. + * + * Results: + * 0 if success. + * ENODEV if the device doesn't support any interrupts. + * ENXIO if the device configuration failed. + * + * Side effects: + * Resources get allocated on success. + * + *------------------------------------------------------------------------------ + */ + +static int +vmci_check_intr_cnt(struct vmci_softc *sc) +{ + + if (sc->vmci_intr_type == VMCI_INTR_TYPE_INTX) { + sc->vmci_num_intr = 1; + return (0); + } + + /* + * Make sure that the device supports the required number of MSI/MSI-X + * messages. We try for 2 MSI-X messages but 1 is good too. We need at + * least 1 MSI message. + */ + sc->vmci_num_intr = (sc->vmci_intr_type == VMCI_INTR_TYPE_MSIX) ? + pci_msix_count(sc->vmci_dev) : pci_msi_count(sc->vmci_dev); + + if (!sc->vmci_num_intr) { + VMCI_LOG_ERROR(LGPFX"Device does not support any interrupt" + " messages"); + return (ENODEV); + } + + sc->vmci_num_intr = (sc->vmci_intr_type == VMCI_INTR_TYPE_MSIX) ? + VMCI_MAX_INTRS : 1; + if (sc->vmci_intr_type == VMCI_INTR_TYPE_MSIX) { + if (pci_alloc_msix(sc->vmci_dev, &sc->vmci_num_intr)) + return (ENXIO); + } else if (sc->vmci_intr_type == VMCI_INTR_TYPE_MSI) { + if (pci_alloc_msi(sc->vmci_dev, &sc->vmci_num_intr)) + return (ENXIO); + } + + return (0); +} + +/* + *------------------------------------------------------------------------------ + * + * vmci_allocate_interrupt_resources -- + * + * Allocate resources necessary for interrupts. + * + * Results: + * 0 if success, ENXIO otherwise. + * + * Side effects: + * Resources get allocated on success. + * + *------------------------------------------------------------------------------ + */ + +static int +vmci_allocate_interrupt_resources(struct vmci_softc *sc) +{ + struct resource *irq; + int flags, i, rid; + + flags = RF_ACTIVE; + flags |= (sc->vmci_num_intr == 1) ? RF_SHAREABLE : 0; + rid = (sc->vmci_intr_type == VMCI_INTR_TYPE_INTX) ? 0 : 1; + + for (i = 0; i < sc->vmci_num_intr; i++, rid++) { + irq = bus_alloc_resource_any(sc->vmci_dev, SYS_RES_IRQ, &rid, + flags); + if (irq == NULL) + return (ENXIO); + sc->vmci_intrs[i].vmci_irq = irq; + sc->vmci_intrs[i].vmci_rid = rid; + } + + return (0); +} + +/* + *------------------------------------------------------------------------------ + * + * vmci_setup_interrupts -- + * + * Sets up the interrupts. + * + * Results: + * 0 if success, appropriate error code from bus_setup_intr otherwise. + * + * Side effects: + * Interrupt handler gets attached. + * + *------------------------------------------------------------------------------ + */ + +static int +vmci_setup_interrupts(struct vmci_softc *sc) +{ + struct vmci_interrupt *intr; + int error, flags; + + flags = INTR_TYPE_NET | INTR_MPSAFE; + if (sc->vmci_num_intr > 1) + flags |= INTR_EXCL; + + intr = &sc->vmci_intrs[0]; + error = bus_setup_intr(sc->vmci_dev, intr->vmci_irq, flags, NULL, + vmci_interrupt, NULL, &intr->vmci_handler); + if (error) + return (error); + bus_describe_intr(sc->vmci_dev, intr->vmci_irq, intr->vmci_handler, + "vmci_interrupt"); + + if (sc->vmci_num_intr == 2) { + intr = &sc->vmci_intrs[1]; + error = bus_setup_intr(sc->vmci_dev, intr->vmci_irq, flags, + NULL, vmci_interrupt_bm, NULL, &intr->vmci_handler); + if (error) + return (error); + bus_describe_intr(sc->vmci_dev, intr->vmci_irq, + intr->vmci_handler, "vmci_interrupt_bm"); + } + + return (0); +} + +/* + *------------------------------------------------------------------------------ + * + * vmci_interrupt -- + * + * Interrupt handler for legacy or MSI interrupt, or for first MSI-X + * interrupt (vector VMCI_INTR_DATAGRAM). + * + * Results: + * None. + * + * Side effects: + * None. + * + *------------------------------------------------------------------------------ + */ + +static void +vmci_interrupt(void *arg) +{ + + if (vmci_sc->vmci_num_intr == 2) + taskqueue_enqueue(taskqueue_swi, + &vmci_sc->vmci_interrupt_dq_task); + else { + unsigned int icr; + + icr = inl(vmci_sc->vmci_ioaddr + VMCI_ICR_ADDR); + if (icr == 0 || icr == 0xffffffff) + return; + if (icr & VMCI_ICR_DATAGRAM) { + taskqueue_enqueue(taskqueue_swi, + &vmci_sc->vmci_interrupt_dq_task); + icr &= ~VMCI_ICR_DATAGRAM; + } + if (icr & VMCI_ICR_NOTIFICATION) { + taskqueue_enqueue(taskqueue_swi, + &vmci_sc->vmci_interrupt_bm_task); + icr &= ~VMCI_ICR_NOTIFICATION; + } + if (icr != 0) + VMCI_LOG_INFO(LGPFX"Ignoring unknown interrupt " + "cause"); + } +} + +/* + *------------------------------------------------------------------------------ + * + * vmci_interrupt_bm -- + * + * Interrupt handler for MSI-X interrupt vector VMCI_INTR_NOTIFICATION, + * which is for the notification bitmap. Will only get called if we are + * using MSI-X with exclusive vectors. + * + * Results: + * None. + * + * Side effects: + * None. + * + *------------------------------------------------------------------------------ + */ + +static void +vmci_interrupt_bm(void *arg) +{ + + ASSERT(vmci_sc->vmci_num_intr == 2); + taskqueue_enqueue(taskqueue_swi, &vmci_sc->vmci_interrupt_bm_task); +} + +/* + *------------------------------------------------------------------------------ + * + * dispatch_datagrams -- + * + * Reads and dispatches incoming datagrams. + * + * Results: + * None. + * + * Side effects: + * Reads data from the device. + * + *------------------------------------------------------------------------------ + */ + +static void +dispatch_datagrams(void *context, int data) +{ + + if (data_buffer == NULL) + VMCI_LOG_INFO(LGPFX"dispatch_datagrams(): no buffer " + "present"); + + vmci_read_datagrams_from_port((vmci_io_handle) 0, + vmci_sc->vmci_ioaddr + VMCI_DATA_IN_ADDR, + data_buffer, data_buffer_size); +} + +/* + *------------------------------------------------------------------------------ + * + * process_bitmap -- + * + * Scans the notification bitmap for raised flags, clears them and handles + * the notifications. + * + * Results: + * None. + * + * Side effects: + * None. + * + *------------------------------------------------------------------------------ + */ + +static void +process_bitmap(void *context, int data) +{ + + if (vmci_sc->vmci_notifications_bitmap.dma_vaddr == NULL) + VMCI_LOG_INFO(LGPFX"process_bitmaps(): no bitmap present"); + + vmci_scan_notification_bitmap( + vmci_sc->vmci_notifications_bitmap.dma_vaddr); +} + +/* + *------------------------------------------------------------------------------ + * + * vmci_dismantle_interrupts -- + * + * Releases resources, detaches the interrupt handler and drains the task + * queue. + * + * Results: + * None. + * + * Side effects: + * No more interrupts. + * + *------------------------------------------------------------------------------ + */ + +static void +vmci_dismantle_interrupts(struct vmci_softc *sc) +{ + struct vmci_interrupt *intr; + int i; + + for (i = 0; i < sc->vmci_num_intr; i++) { + intr = &sc->vmci_intrs[i]; + if (intr->vmci_handler != NULL) { + bus_teardown_intr(sc->vmci_dev, intr->vmci_irq, + intr->vmci_handler); + intr->vmci_handler = NULL; + } + if (intr->vmci_irq != NULL) { + bus_release_resource(sc->vmci_dev, SYS_RES_IRQ, + intr->vmci_rid, intr->vmci_irq); + intr->vmci_irq = NULL; + intr->vmci_rid = -1; + } + } + + if ((sc->vmci_intr_type != VMCI_INTR_TYPE_INTX) && + (sc->vmci_num_intr)) + pci_release_msi(sc->vmci_dev); + + taskqueue_drain(taskqueue_swi, &sc->vmci_interrupt_dq_task); + taskqueue_drain(taskqueue_swi, &sc->vmci_interrupt_bm_task); + + if (data_buffer != NULL) + free(data_buffer, M_DEVBUF); +} + +/* + *------------------------------------------------------------------------------ + * + * vmci_delayed_work_fn_cb -- + * + * Callback function that executes the queued up delayed work functions. + * + * Results: + * None. + * + * Side effects: + * None. + * + *------------------------------------------------------------------------------ + */ + +static void +vmci_delayed_work_fn_cb(void *context, int data) +{ + vmci_list(vmci_delayed_work_info) temp_list; + + vmci_list_init(&temp_list); + + /* + * Swap vmci_delayed_work_infos list with the empty temp_list while + * holding a lock. vmci_delayed_work_infos would then be an empty list + * and temp_list would contain the elements from the original + * vmci_delayed_work_infos. Finally, iterate through temp_list + * executing the delayed callbacks. + */ + + mtx_lock(&vmci_sc->vmci_delayed_work_lock); + vmci_list_swap(&temp_list, &vmci_sc->vmci_delayed_work_infos, + vmci_delayed_work_info, entry); + mtx_unlock(&vmci_sc->vmci_delayed_work_lock); + + while (!vmci_list_empty(&temp_list)) { + struct vmci_delayed_work_info *delayed_work_info = + vmci_list_first(&temp_list); + + delayed_work_info->work_fn(delayed_work_info->data); + + vmci_list_remove(delayed_work_info, entry); + vmci_free_kernel_mem(delayed_work_info, + sizeof(*delayed_work_info)); + } +} + +/* + *------------------------------------------------------------------------------ + * + * vmci_schedule_delayed_work_fn -- + * + * Schedule the specified callback. + * + * Results: + * 0 if success, error code otherwise. + * + * Side effects: + * None. + * + *------------------------------------------------------------------------------ + */ + +int +vmci_schedule_delayed_work_fn(vmci_work_fn *work_fn, void *data) +{ + struct vmci_delayed_work_info *delayed_work_info; + + delayed_work_info = vmci_alloc_kernel_mem(sizeof(*delayed_work_info), + VMCI_MEMORY_ATOMIC); + + if (!delayed_work_info) + return (VMCI_ERROR_NO_MEM); + + delayed_work_info->work_fn = work_fn; + delayed_work_info->data = data; + mtx_lock(&vmci_sc->vmci_delayed_work_lock); + vmci_list_insert(&vmci_sc->vmci_delayed_work_infos, + delayed_work_info, entry); + mtx_unlock(&vmci_sc->vmci_delayed_work_lock); + + taskqueue_enqueue(taskqueue_thread, + &vmci_sc->vmci_delayed_work_task); + + return (VMCI_SUCCESS); +} + +/* + *------------------------------------------------------------------------------ + * + * vmci_send_datagram -- + * + * VM to hypervisor call mechanism. + * + * Results: + * The result of the hypercall. + * + * Side effects: + * None. + * + *------------------------------------------------------------------------------ + */ + +int +vmci_send_datagram(struct vmci_datagram *dg) +{ + int result; + + if (dg == NULL) + return (VMCI_ERROR_INVALID_ARGS); + + /* + * Need to acquire spinlock on the device because + * the datagram data may be spread over multiple pages and the monitor + * may interleave device user rpc calls from multiple VCPUs. Acquiring + * the spinlock precludes that possibility. Disabling interrupts to + * avoid incoming datagrams during a "rep out" and possibly landing up + * in this function. + */ + mtx_lock_spin(&vmci_sc->vmci_spinlock); + + /* + * Send the datagram and retrieve the return value from the result + * register. + */ + __asm__ __volatile__( + "cld\n\t" + "rep outsb\n\t" + : /* No output. */ + : "d"(vmci_sc->vmci_ioaddr + VMCI_DATA_OUT_ADDR), + "c"(VMCI_DG_SIZE(dg)), "S"(dg) + ); + + /* + * XXX: Should read result high port as well when updating handlers to + * return 64bit. + */ + + result = bus_space_read_4(vmci_sc->vmci_iot0, + vmci_sc->vmci_ioh0, VMCI_RESULT_LOW_ADDR); + mtx_unlock_spin(&vmci_sc->vmci_spinlock); + + return (result); +} Index: sys/dev/vmware/vmci/vmci_call_defs.h =================================================================== --- /dev/null +++ sys/dev/vmware/vmci/vmci_call_defs.h @@ -0,0 +1,242 @@ +/*- + * Copyright (c) 2018 VMware, Inc. All Rights Reserved. + * + * SPDX-License-Identifier: (BSD-2-Clause AND GPL-2.0) + */ + +#ifndef _VMCI_CALL_DEFS_H_ +#define _VMCI_CALL_DEFS_H_ + +#include "vmci_defs.h" + +/* + * All structs here are an integral size of their largest member, ie. a struct + * with at least one 8-byte member will have a size that is an integral of 8. + * A struct which has a largest member of size 4 will have a size that is an + * integral of 4. + */ + +/* + * Base struct for vmci datagrams. + */ +struct vmci_datagram { + struct vmci_handle dst; + struct vmci_handle src; + uint64_t payload_size; +}; + +/* + * Second flag is for creating a well-known handle instead of a per context + * handle. Next flag is for deferring datagram delivery, so that the + * datagram callback is invoked in a delayed context (not interrupt context). + */ +#define VMCI_FLAG_DG_NONE 0 +#define VMCI_FLAG_WELLKNOWN_DG_HND 0x1 +#define VMCI_FLAG_ANYCID_DG_HND 0x2 +#define VMCI_FLAG_DG_DELAYED_CB 0x4 + +/* Event callback should fire in a delayed context (not interrupt context.) */ +#define VMCI_FLAG_EVENT_NONE 0 +#define VMCI_FLAG_EVENT_DELAYED_CB 0x1 + +/* + * Maximum supported size of a VMCI datagram for routable datagrams. + * Datagrams going to the hypervisor are allowed to be larger. + */ +#define VMCI_MAX_DG_SIZE \ + (17 * 4096) +#define VMCI_MAX_DG_PAYLOAD_SIZE \ + (VMCI_MAX_DG_SIZE - sizeof(struct vmci_datagram)) +#define VMCI_DG_PAYLOAD(_dg) \ + (void *)((char *)(_dg) + sizeof(struct vmci_datagram)) +#define VMCI_DG_HEADERSIZE \ + sizeof(struct vmci_datagram) +#define VMCI_DG_SIZE(_dg) \ + (VMCI_DG_HEADERSIZE + (size_t)(_dg)->payload_size) +#define VMCI_DG_SIZE_ALIGNED(_dg) \ + ((VMCI_DG_SIZE(_dg) + 7) & (size_t)~7) + +/* + * Struct used for querying, via VMCI_RESOURCES_QUERY, the availability of + * hypervisor resources. + * Struct size is 16 bytes. All fields in struct are aligned to their natural + * alignment. + */ +struct vmci_resources_query_hdr { + struct vmci_datagram hdr; + uint32_t num_resources; + uint32_t _padding; +}; + +/* + * Convenience struct for negotiating vectors. Must match layout of + * vmci_resource_query_hdr minus the struct vmci_datagram header. + */ +struct vmci_resources_query_msg { + uint32_t num_resources; + uint32_t _padding; + vmci_resource resources[1]; +}; + +/* + * Struct used for setting the notification bitmap. All fields in struct are + * aligned to their natural alignment. + */ +struct vmci_notify_bitmap_set_msg { + struct vmci_datagram hdr; + PPN bitmap_ppn; + uint32_t _pad; +}; + +/* + * Struct used for linking a doorbell handle with an index in the notify + * bitmap. All fields in struct are aligned to their natural alignment. + */ +struct vmci_doorbell_link_msg { + struct vmci_datagram hdr; + struct vmci_handle handle; + uint64_t notify_idx; +}; + +/* + * Struct used for unlinking a doorbell handle from an index in the notify + * bitmap. All fields in struct are aligned to their natural alignment. + */ +struct vmci_doorbell_unlink_msg { + struct vmci_datagram hdr; + struct vmci_handle handle; +}; + +/* + * Struct used for generating a notification on a doorbell handle. All fields + * in struct are aligned to their natural alignment. + */ +struct vmci_doorbell_notify_msg { + struct vmci_datagram hdr; + struct vmci_handle handle; +}; + +/* + * This struct is used to contain data for events. Size of this struct is a + * multiple of 8 bytes, and all fields are aligned to their natural alignment. + */ +struct vmci_event_data { + vmci_event_type event; /* 4 bytes. */ + uint32_t _pad; + /* + * Event payload is put here. + */ +}; + +/* Callback needed for correctly waiting on events. */ + +typedef int +(*vmci_datagram_recv_cb)(void *client_data, struct vmci_datagram *msg); + +/* + * We use the following inline function to access the payload data associated + * with an event data. + */ + +static inline void * +vmci_event_data_payload(struct vmci_event_data *ev_data) +{ + + return ((void *)((char *)ev_data + sizeof(*ev_data))); +} + +/* + * Define the different VMCI_EVENT payload data types here. All structs must + * be a multiple of 8 bytes, and fields must be aligned to their natural + * alignment. + */ +struct vmci_event_payload_context { + vmci_id context_id; /* 4 bytes. */ + uint32_t _pad; +}; + +struct vmci_event_payload_qp { + /* QueuePair handle. */ + struct vmci_handle handle; + /* Context id of attaching/detaching VM. */ + vmci_id peer_id; + uint32_t _pad; +}; + +/* + * We define the following struct to get the size of the maximum event data + * the hypervisor may send to the guest. If adding a new event payload type + * above, add it to the following struct too (inside the union). + */ +struct vmci_event_data_max { + struct vmci_event_data event_data; + union { + struct vmci_event_payload_context context_payload; + struct vmci_event_payload_qp qp_payload; + } ev_data_payload; +}; + +/* + * Struct used for VMCI_EVENT_SUBSCRIBE/UNSUBSCRIBE and VMCI_EVENT_HANDLER + * messages. Struct size is 32 bytes. All fields in struct are aligned to + * their natural alignment. + */ +struct vmci_event_msg { + struct vmci_datagram hdr; + struct vmci_event_data event_data; /* Has event type & payload. */ + /* + * Payload gets put here. + */ +}; + +/* + * We use the following inline function to access the payload data associated + * with an event message. + */ + +static inline void * +vmci_event_msg_payload(struct vmci_event_msg *e_msg) +{ + + return (vmci_event_data_payload(&e_msg->event_data)); +} + +/* Flags for VMCI QueuePair API. */ +#define VMCI_QPFLAG_ATTACH_ONLY \ + 0x1 /* Fail alloc if QP not created by peer. */ +#define VMCI_QPFLAG_LOCAL \ + 0x2 /* Only allow attaches from local context. */ +#define VMCI_QPFLAG_NONBLOCK \ + 0x4 /* Host won't block when guest is quiesced. */ + +/* For asymmetric queuepairs, update as new flags are added. */ +#define VMCI_QP_ASYMM \ + VMCI_QPFLAG_NONBLOCK +#define VMCI_QP_ASYMM_PEER \ + (VMCI_QPFLAG_ATTACH_ONLY | VMCI_QP_ASYMM) + +/* Update the following (bitwise OR flags) while adding new flags. */ +#define VMCI_QP_ALL_FLAGS \ + (VMCI_QPFLAG_ATTACH_ONLY | VMCI_QPFLAG_LOCAL | VMCI_QPFLAG_NONBLOCK) + +/* + * Structs used for QueuePair alloc and detach messages. We align fields of + * these structs to 64 bit boundaries. + */ +struct vmci_queue_pair_alloc_msg { + struct vmci_datagram hdr; + struct vmci_handle handle; + vmci_id peer; /* 32bit field. */ + uint32_t flags; + uint64_t produce_size; + uint64_t consume_size; + uint64_t num_ppns; + /* List of PPNs placed here. */ +}; + +struct vmci_queue_pair_detach_msg { + struct vmci_datagram hdr; + struct vmci_handle handle; +}; + +#endif /* !_VMCI_CALL_DEFS_H_ */ Index: sys/dev/vmware/vmci/vmci_datagram.h =================================================================== --- /dev/null +++ sys/dev/vmware/vmci/vmci_datagram.h @@ -0,0 +1,24 @@ +/*- + * Copyright (c) 2018 VMware, Inc. All Rights Reserved. + * + * SPDX-License-Identifier: (BSD-2-Clause AND GPL-2.0) + */ + +/* Internal functions in the VMCI Simple Datagram API */ + +#ifndef _VMCI_DATAGRAM_H_ +#define _VMCI_DATAGRAM_H_ + +#include "vmci_call_defs.h" + +/* Datagram API for non-public use. */ +int vmci_datagram_dispatch(vmci_id context_id, struct vmci_datagram *dg); +int vmci_datagram_invoke_guest_handler(struct vmci_datagram *dg); +int vmci_datagram_get_priv_flags(struct vmci_handle handle, + vmci_privilege_flags *priv_flags); + +/* Misc. */ +void vmci_datagram_sync(void); +bool vmci_datagram_check_host_capabilities(void); + +#endif /* !_VMCI_DATAGRAM_H_ */ Index: sys/dev/vmware/vmci/vmci_datagram.c =================================================================== --- /dev/null +++ sys/dev/vmware/vmci/vmci_datagram.c @@ -0,0 +1,647 @@ +/*- + * Copyright (c) 2018 VMware, Inc. All Rights Reserved. + * + * SPDX-License-Identifier: (BSD-2-Clause AND GPL-2.0) + */ + +/* This file implements the VMCI Simple Datagram API on the host. */ + +#include +#include + +#include "vmci_datagram.h" +#include "vmci_driver.h" +#include "vmci_kernel_api.h" +#include "vmci_kernel_defs.h" +#include "vmci_resource.h" + +#define LGPFX "vmci_datagram: " + +/* + * datagram_entry describes the datagram entity. It is used for datagram + * entities created only on the host. + */ +struct datagram_entry { + struct vmci_resource resource; + uint32_t flags; + bool run_delayed; + vmci_datagram_recv_cb recv_cb; + void *client_data; + vmci_event destroy_event; + vmci_privilege_flags priv_flags; +}; + +struct vmci_delayed_datagram_info { + struct datagram_entry *entry; + struct vmci_datagram msg; +}; + +static int vmci_datagram_get_priv_flags_int(vmci_id contextID, + struct vmci_handle handle, + vmci_privilege_flags *priv_flags); +static void datagram_free_cb(void *resource); +static int datagram_release_cb(void *client_data); + +/*------------------------------ Helper functions ----------------------------*/ + +/* + *------------------------------------------------------------------------------ + * + * datagram_free_cb -- + * + * Callback to free datagram structure when resource is no longer used, + * ie. the reference count reached 0. + * + * Result: + * None. + * + * Side effects: + * None. + * + *------------------------------------------------------------------------------ + */ + +static void +datagram_free_cb(void *client_data) +{ + struct datagram_entry *entry = (struct datagram_entry *)client_data; + + ASSERT(entry); + + vmci_signal_event(&entry->destroy_event); + + /* + * The entry is freed in vmci_datagram_destroy_hnd, who is waiting for + * the above signal. + */ +} + +/* + *------------------------------------------------------------------------------ + * + * datagram_release_cb -- + * + * Callback to release the resource reference. It is called by the + * vmci_wait_on_event function before it blocks. + * + * Result: + * None. + * + * Side effects: + * None. + * + *------------------------------------------------------------------------------ + */ + +static int +datagram_release_cb(void *client_data) +{ + struct datagram_entry *entry; + + entry = (struct datagram_entry *)client_data; + + ASSERT(entry); + + vmci_resource_release(&entry->resource); + + return (0); +} + +/* + *------------------------------------------------------------------------------ + * + * datagram_create_hnd -- + * + * Internal function to create a datagram entry given a handle. + * + * Results: + * VMCI_SUCCESS if created, negative errno value otherwise. + * + * Side effects: + * None. + * + *------------------------------------------------------------------------------ + */ + +static int +datagram_create_hnd(vmci_id resource_id, uint32_t flags, + vmci_privilege_flags priv_flags, vmci_datagram_recv_cb recv_cb, + void *client_data, struct vmci_handle *out_handle) +{ + struct datagram_entry *entry; + struct vmci_handle handle; + vmci_id context_id; + int result; + + ASSERT(recv_cb != NULL); + ASSERT(out_handle != NULL); + ASSERT(!(priv_flags & ~VMCI_PRIVILEGE_ALL_FLAGS)); + + if ((flags & VMCI_FLAG_WELLKNOWN_DG_HND) != 0) + return (VMCI_ERROR_INVALID_ARGS); + else { + if ((flags & VMCI_FLAG_ANYCID_DG_HND) != 0) + context_id = VMCI_INVALID_ID; + else { + context_id = vmci_get_context_id(); + if (context_id == VMCI_INVALID_ID) + return (VMCI_ERROR_NO_RESOURCES); + } + + if (resource_id == VMCI_INVALID_ID) { + resource_id = vmci_resource_get_id(context_id); + if (resource_id == VMCI_INVALID_ID) + return (VMCI_ERROR_NO_HANDLE); + } + + handle = VMCI_MAKE_HANDLE(context_id, resource_id); + } + + entry = vmci_alloc_kernel_mem(sizeof(*entry), VMCI_MEMORY_NORMAL); + if (entry == NULL) { + VMCI_LOG_WARNING(LGPFX"Failed allocating memory for datagram " + "entry.\n"); + return (VMCI_ERROR_NO_MEM); + } + + if (!vmci_can_schedule_delayed_work()) { + if (flags & VMCI_FLAG_DG_DELAYED_CB) { + vmci_free_kernel_mem(entry, sizeof(*entry)); + return (VMCI_ERROR_INVALID_ARGS); + } + entry->run_delayed = false; + } else + entry->run_delayed = (flags & VMCI_FLAG_DG_DELAYED_CB) ? + true : false; + + entry->flags = flags; + entry->recv_cb = recv_cb; + entry->client_data = client_data; + vmci_create_event(&entry->destroy_event); + entry->priv_flags = priv_flags; + + /* Make datagram resource live. */ + result = vmci_resource_add(&entry->resource, + VMCI_RESOURCE_TYPE_DATAGRAM, handle, datagram_free_cb, entry); + if (result != VMCI_SUCCESS) { + VMCI_LOG_WARNING(LGPFX"Failed to add new resource " + "(handle=0x%x:0x%x).\n", handle.context, handle.resource); + vmci_destroy_event(&entry->destroy_event); + vmci_free_kernel_mem(entry, sizeof(*entry)); + return (result); + } + *out_handle = handle; + + return (VMCI_SUCCESS); +} + +/*------------------------------ Public API functions ------------------------*/ + +/* + *------------------------------------------------------------------------------ + * + * vmci_datagram_create_handle -- + * + * Creates a host context datagram endpoint and returns a handle to it. + * + * Results: + * VMCI_SUCCESS if created, negative errno value otherwise. + * + * Side effects: + * None. + * + *------------------------------------------------------------------------------ + */ + +int +vmci_datagram_create_handle(vmci_id resource_id, uint32_t flags, + vmci_datagram_recv_cb recv_cb, void *client_data, + struct vmci_handle *out_handle) +{ + + if (out_handle == NULL) + return (VMCI_ERROR_INVALID_ARGS); + + if (recv_cb == NULL) { + VMCI_LOG_DEBUG(LGPFX"Client callback needed when creating " + "datagram.\n"); + return (VMCI_ERROR_INVALID_ARGS); + } + + return (datagram_create_hnd(resource_id, flags, + VMCI_DEFAULT_PROC_PRIVILEGE_FLAGS, + recv_cb, client_data, out_handle)); +} + +/* + *------------------------------------------------------------------------------ + * + * vmci_datagram_create_handle_priv -- + * + * Creates a host context datagram endpoint and returns a handle to it. + * + * Results: + * VMCI_SUCCESS if created, negative errno value otherwise. + * + * Side effects: + * None. + * + *------------------------------------------------------------------------------ + */ + +int +vmci_datagram_create_handle_priv(vmci_id resource_id, uint32_t flags, + vmci_privilege_flags priv_flags, vmci_datagram_recv_cb recv_cb, + void *client_data, struct vmci_handle *out_handle) +{ + + if (out_handle == NULL) + return (VMCI_ERROR_INVALID_ARGS); + + if (recv_cb == NULL) { + VMCI_LOG_DEBUG(LGPFX"Client callback needed when creating " + "datagram.\n"); + return (VMCI_ERROR_INVALID_ARGS); + } + + if (priv_flags & ~VMCI_PRIVILEGE_ALL_FLAGS) + return (VMCI_ERROR_INVALID_ARGS); + + return (datagram_create_hnd(resource_id, flags, priv_flags, recv_cb, + client_data, out_handle)); +} + +/* + *------------------------------------------------------------------------------ + * + * vmci_datagram_destroy_handle -- + * + * Destroys a handle. + * + * Results: + * None. + * + * Side effects: + * None. + * + *------------------------------------------------------------------------------ + */ + +int +vmci_datagram_destroy_handle(struct vmci_handle handle) +{ + struct datagram_entry *entry; + struct vmci_resource *resource; + + resource = vmci_resource_get(handle, + VMCI_RESOURCE_TYPE_DATAGRAM); + if (resource == NULL) { + VMCI_LOG_DEBUG(LGPFX"Failed to destroy datagram " + "(handle=0x%x:0x%x).\n", handle.context, handle.resource); + return (VMCI_ERROR_NOT_FOUND); + } + entry = RESOURCE_CONTAINER(resource, struct datagram_entry, resource); + + vmci_resource_remove(handle, VMCI_RESOURCE_TYPE_DATAGRAM); + + /* + * We now wait on the destroyEvent and release the reference we got + * above. + */ + vmci_wait_on_event(&entry->destroy_event, datagram_release_cb, entry); + + /* + * We know that we are now the only reference to the above entry so + * can safely free it. + */ + vmci_destroy_event(&entry->destroy_event); + vmci_free_kernel_mem(entry, sizeof(*entry)); + + return (VMCI_SUCCESS); +} + +/* + *------------------------------------------------------------------------------ + * + * vmci_datagram_get_priv_flags_int -- + * + * Internal utilility function with the same purpose as + * vmci_datagram_get_priv_flags that also takes a context_id. + * + * Result: + * VMCI_SUCCESS on success, VMCI_ERROR_INVALID_ARGS if handle is invalid. + * + * Side effects: + * None. + * + *------------------------------------------------------------------------------ + */ + +static int +vmci_datagram_get_priv_flags_int(vmci_id context_id, struct vmci_handle handle, + vmci_privilege_flags *priv_flags) +{ + + ASSERT(priv_flags); + ASSERT(context_id != VMCI_INVALID_ID); + + if (context_id == VMCI_HOST_CONTEXT_ID) { + struct datagram_entry *src_entry; + struct vmci_resource *resource; + + resource = vmci_resource_get(handle, + VMCI_RESOURCE_TYPE_DATAGRAM); + if (resource == NULL) + return (VMCI_ERROR_INVALID_ARGS); + src_entry = RESOURCE_CONTAINER(resource, struct datagram_entry, + resource); + *priv_flags = src_entry->priv_flags; + vmci_resource_release(resource); + } else if (context_id == VMCI_HYPERVISOR_CONTEXT_ID) + *priv_flags = VMCI_MAX_PRIVILEGE_FLAGS; + else + *priv_flags = VMCI_NO_PRIVILEGE_FLAGS; + + return (VMCI_SUCCESS); +} + +/* + *------------------------------------------------------------------------------ + * + * vmci_datagram_fet_priv_flags -- + * + * Utility function that retrieves the privilege flags associated with a + * given datagram handle. For hypervisor and guest endpoints, the + * privileges are determined by the context ID, but for host endpoints + * privileges are associated with the complete handle. + * + * Result: + * VMCI_SUCCESS on success, VMCI_ERROR_INVALID_ARGS if handle is invalid. + * + * Side effects: + * None. + * + *------------------------------------------------------------------------------ + */ + +int +vmci_datagram_get_priv_flags(struct vmci_handle handle, + vmci_privilege_flags *priv_flags) +{ + + if (priv_flags == NULL || handle.context == VMCI_INVALID_ID) + return (VMCI_ERROR_INVALID_ARGS); + + return (vmci_datagram_get_priv_flags_int(handle.context, handle, + priv_flags)); +} + +/* + *------------------------------------------------------------------------------ + * + * vmci_datagram_delayed_dispatch_cb -- + * + * Calls the specified callback in a delayed context. + * + * Results: + * None. + * + * Side effects: + * None. + * + *------------------------------------------------------------------------------ + */ + +static void +vmci_datagram_delayed_dispatch_cb(void *data) +{ + struct vmci_delayed_datagram_info *dg_info; + + dg_info = (struct vmci_delayed_datagram_info *)data; + + ASSERT(data); + + dg_info->entry->recv_cb(dg_info->entry->client_data, &dg_info->msg); + + vmci_resource_release(&dg_info->entry->resource); + + vmci_free_kernel_mem(dg_info, sizeof(*dg_info) + + (size_t)dg_info->msg.payload_size); +} + +/* + *------------------------------------------------------------------------------ + * + * vmci_datagram_dispatch_as_guest -- + * + * Dispatch datagram as a guest, down through the VMX and potentially to + * the host. + * + * Result: + * Number of bytes sent on success, appropriate error code otherwise. + * + * Side effects: + * None. + * + *------------------------------------------------------------------------------ + */ + +static int +vmci_datagram_dispatch_as_guest(struct vmci_datagram *dg) +{ + struct vmci_resource *resource; + int retval; + + resource = vmci_resource_get(dg->src, VMCI_RESOURCE_TYPE_DATAGRAM); + if (NULL == resource) + return VMCI_ERROR_NO_HANDLE; + + retval = vmci_send_datagram(dg); + vmci_resource_release(resource); + + return (retval); +} + +/* + *------------------------------------------------------------------------------ + * + * vmci_datagram_dispatch -- + * + * Dispatch datagram. This will determine the routing for the datagram and + * dispatch it accordingly. + * + * Result: + * Number of bytes sent on success, appropriate error code otherwise. + * + * Side effects: + * None. + * + *------------------------------------------------------------------------------ + */ + +int +vmci_datagram_dispatch(vmci_id context_id, struct vmci_datagram *dg) +{ + + ASSERT(dg); + ASSERT_ON_COMPILE(sizeof(struct vmci_datagram) == 24); + + if (VMCI_DG_SIZE(dg) > VMCI_MAX_DG_SIZE) { + VMCI_LOG_DEBUG(LGPFX"Payload (size=%lu bytes) too big to send." + "\n", dg->payload_size); + return (VMCI_ERROR_INVALID_ARGS); + } + + return (vmci_datagram_dispatch_as_guest(dg)); +} + +/* + *------------------------------------------------------------------------------ + * + * vmci_datagram_invoke_guest_handler -- + * + * Invoke the handler for the given datagram. This is intended to be called + * only when acting as a guest and receiving a datagram from the virtual + * device. + * + * Result: + * VMCI_SUCCESS on success, other error values on failure. + * + * Side effects: + * None. + * + *------------------------------------------------------------------------------ + */ + +int +vmci_datagram_invoke_guest_handler(struct vmci_datagram *dg) +{ + struct datagram_entry *dst_entry; + struct vmci_resource *resource; + int retval; + + ASSERT(dg); + + if (dg->payload_size > VMCI_MAX_DG_PAYLOAD_SIZE) { + VMCI_LOG_DEBUG(LGPFX"Payload (size=%lu bytes) too large to " + "deliver.\n", dg->payload_size); + return (VMCI_ERROR_PAYLOAD_TOO_LARGE); + } + + resource = vmci_resource_get(dg->dst, VMCI_RESOURCE_TYPE_DATAGRAM); + if (NULL == resource) { + VMCI_LOG_DEBUG(LGPFX"destination (handle=0x%x:0x%x) doesn't " + "exist.\n", dg->dst.context, dg->dst.resource); + return (VMCI_ERROR_NO_HANDLE); + } + + dst_entry = RESOURCE_CONTAINER(resource, struct datagram_entry, + resource); + if (dst_entry->run_delayed) { + struct vmci_delayed_datagram_info *dg_info; + + dg_info = vmci_alloc_kernel_mem(sizeof(*dg_info) + + (size_t)dg->payload_size, VMCI_MEMORY_ATOMIC); + if (NULL == dg_info) { + vmci_resource_release(resource); + retval = VMCI_ERROR_NO_MEM; + goto exit; + } + + dg_info->entry = dst_entry; + memcpy(&dg_info->msg, dg, VMCI_DG_SIZE(dg)); + + retval = vmci_schedule_delayed_work( + vmci_datagram_delayed_dispatch_cb, dg_info); + if (retval < VMCI_SUCCESS) { + VMCI_LOG_WARNING(LGPFX"Failed to schedule delayed " + "work for datagram (result=%d).\n", retval); + vmci_free_kernel_mem(dg_info, sizeof(*dg_info) + + (size_t)dg->payload_size); + vmci_resource_release(resource); + dg_info = NULL; + goto exit; + } + } else { + dst_entry->recv_cb(dst_entry->client_data, dg); + vmci_resource_release(resource); + retval = VMCI_SUCCESS; + } + +exit: + return (retval); +} + +/* + *------------------------------------------------------------------------------ + * + * vmci_datagram_send -- + * + * Sends the payload to the destination datagram handle. + * + * Results: + * Returns number of bytes sent if success, or error code if failure. + * + * Side effects: + * None. + * + *------------------------------------------------------------------------------ + */ + +int +vmci_datagram_send(struct vmci_datagram *msg) +{ + + if (msg == NULL) + return (VMCI_ERROR_INVALID_ARGS); + + return (vmci_datagram_dispatch(VMCI_INVALID_ID, msg)); +} + +/* + *------------------------------------------------------------------------------ + * + * vmci_datagram_sync -- + * + * Use this as a synchronization point when setting globals, for example, + * during device shutdown. + * + * Results: + * None. + * + * Side effects: + * None. + * + *------------------------------------------------------------------------------ + */ + +void +vmci_datagram_sync(void) +{ + + vmci_resource_sync(); +} + +/* + *------------------------------------------------------------------------------ + * + * vmci_datagram_check_host_capabilities -- + * + * Verify that the host supports the resources we need. None are required + * for datagrams since they are implicitly supported. + * + * Results: + * true. + * + * Side effects: + * None. + * + *------------------------------------------------------------------------------ + */ + +bool +vmci_datagram_check_host_capabilities(void) +{ + + return (true); +} Index: sys/dev/vmware/vmci/vmci_defs.h =================================================================== --- /dev/null +++ sys/dev/vmware/vmci/vmci_defs.h @@ -0,0 +1,715 @@ +/*- + * Copyright (c) 2018 VMware, Inc. All Rights Reserved. + * + * SPDX-License-Identifier: (BSD-2-Clause AND GPL-2.0) + */ + +#ifndef _VMCI_DEFS_H_ +#define _VMCI_DEFS_H_ + +#include +#include + +#include "vmci_kernel_defs.h" + +#pragma GCC diagnostic ignored "-Wcast-qual" + +/* Register offsets. */ +#define VMCI_STATUS_ADDR 0x00 +#define VMCI_CONTROL_ADDR 0x04 +#define VMCI_ICR_ADDR 0x08 +#define VMCI_IMR_ADDR 0x0c +#define VMCI_DATA_OUT_ADDR 0x10 +#define VMCI_DATA_IN_ADDR 0x14 +#define VMCI_CAPS_ADDR 0x18 +#define VMCI_RESULT_LOW_ADDR 0x1c +#define VMCI_RESULT_HIGH_ADDR 0x20 + +/* Status register bits. */ +#define VMCI_STATUS_INT_ON 0x1 + +/* Control register bits. */ +#define VMCI_CONTROL_RESET 0x1 +#define VMCI_CONTROL_INT_ENABLE 0x2 +#define VMCI_CONTROL_INT_DISABLE 0x4 + +/* Capabilities register bits. */ +#define VMCI_CAPS_HYPERCALL 0x1 +#define VMCI_CAPS_GUESTCALL 0x2 +#define VMCI_CAPS_DATAGRAM 0x4 +#define VMCI_CAPS_NOTIFICATIONS 0x8 + +/* Interrupt Cause register bits. */ +#define VMCI_ICR_DATAGRAM 0x1 +#define VMCI_ICR_NOTIFICATION 0x2 + +/* Interrupt Mask register bits. */ +#define VMCI_IMR_DATAGRAM 0x1 +#define VMCI_IMR_NOTIFICATION 0x2 + +/* Interrupt type. */ +typedef enum vmci_intr_type { + VMCI_INTR_TYPE_INTX = 0, + VMCI_INTR_TYPE_MSI = 1, + VMCI_INTR_TYPE_MSIX = 2 +} vmci_intr_type; + +/* + * Maximum MSI/MSI-X interrupt vectors in the device. + */ +#define VMCI_MAX_INTRS 2 + +/* + * Supported interrupt vectors. There is one for each ICR value above, + * but here they indicate the position in the vector array/message ID. + */ +#define VMCI_INTR_DATAGRAM 0 +#define VMCI_INTR_NOTIFICATION 1 + +/* + * A single VMCI device has an upper limit of 128 MiB on the amount of + * memory that can be used for queue pairs. + */ +#define VMCI_MAX_GUEST_QP_MEMORY (128 * 1024 * 1024) + +/* + * We have a fixed set of resource IDs available in the VMX. + * This allows us to have a very simple implementation since we statically + * know how many will create datagram handles. If a new caller arrives and + * we have run out of slots we can manually increment the maximum size of + * available resource IDs. + */ + +typedef uint32_t vmci_resource; + +/* VMCI reserved hypervisor datagram resource IDs. */ +#define VMCI_RESOURCES_QUERY 0 +#define VMCI_GET_CONTEXT_ID 1 +#define VMCI_SET_NOTIFY_BITMAP 2 +#define VMCI_DOORBELL_LINK 3 +#define VMCI_DOORBELL_UNLINK 4 +#define VMCI_DOORBELL_NOTIFY 5 +/* + * VMCI_DATAGRAM_REQUEST_MAP and VMCI_DATAGRAM_REMOVE_MAP are + * obsoleted by the removal of VM to VM communication. + */ +#define VMCI_DATAGRAM_REQUEST_MAP 6 +#define VMCI_DATAGRAM_REMOVE_MAP 7 +#define VMCI_EVENT_SUBSCRIBE 8 +#define VMCI_EVENT_UNSUBSCRIBE 9 +#define VMCI_QUEUEPAIR_ALLOC 10 +#define VMCI_QUEUEPAIR_DETACH 11 +/* + * VMCI_VSOCK_VMX_LOOKUP was assigned to 12 for Fusion 3.0/3.1, + * WS 7.0/7.1 and ESX 4.1 + */ +#define VMCI_HGFS_TRANSPORT 13 +#define VMCI_UNITY_PBRPC_REGISTER 14 +/* + * This resource is used for VMCI socket control packets sent to the + * hypervisor (CID 0) because RID 1 is already reserved. + */ +#define VSOCK_PACKET_HYPERVISOR_RID 15 +#define VMCI_RESOURCE_MAX 16 +/* + * The core VMCI device functionality only requires the resource IDs of + * VMCI_QUEUEPAIR_DETACH and below. + */ +#define VMCI_CORE_DEVICE_RESOURCE_MAX VMCI_QUEUEPAIR_DETACH + +/* + * VMCI reserved host datagram resource IDs. + * vsock control channel has resource id 1. + */ +#define VMCI_DVFILTER_DATA_PATH_DATAGRAM 2 + +/* VMCI Ids. */ +typedef uint32_t vmci_id; + +struct vmci_id_range { + int8_t action; /* VMCI_FA_X, for use in filters. */ + vmci_id begin; /* Beginning of range. */ + vmci_id end; /* End of range. */ +}; + +struct vmci_handle { + vmci_id context; + vmci_id resource; +}; + +static inline struct vmci_handle +VMCI_MAKE_HANDLE(vmci_id cid, vmci_id rid) +{ + struct vmci_handle h; + + h.context = cid; + h.resource = rid; + return (h); +} + +#define VMCI_HANDLE_TO_CONTEXT_ID(_handle) \ + ((_handle).context) +#define VMCI_HANDLE_TO_RESOURCE_ID(_handle) \ + ((_handle).resource) +#define VMCI_HANDLE_EQUAL(_h1, _h2) \ + ((_h1).context == (_h2).context && (_h1).resource == (_h2).resource) + +#define VMCI_INVALID_ID 0xFFFFFFFF +static const struct vmci_handle VMCI_INVALID_HANDLE = {VMCI_INVALID_ID, + VMCI_INVALID_ID}; + +#define VMCI_HANDLE_INVALID(_handle) \ + VMCI_HANDLE_EQUAL((_handle), VMCI_INVALID_HANDLE) + +/* + * The below defines can be used to send anonymous requests. + * This also indicates that no response is expected. + */ +#define VMCI_ANON_SRC_CONTEXT_ID \ + VMCI_INVALID_ID +#define VMCI_ANON_SRC_RESOURCE_ID \ + VMCI_INVALID_ID +#define VMCI_ANON_SRC_HANDLE \ + VMCI_MAKE_HANDLE(VMCI_ANON_SRC_CONTEXT_ID, \ + VMCI_ANON_SRC_RESOURCE_ID) + +/* The lowest 16 context ids are reserved for internal use. */ +#define VMCI_RESERVED_CID_LIMIT 16 + +/* + * Hypervisor context id, used for calling into hypervisor + * supplied services from the VM. + */ +#define VMCI_HYPERVISOR_CONTEXT_ID 0 + +/* + * Well-known context id, a logical context that contains a set of + * well-known services. This context ID is now obsolete. + */ +#define VMCI_WELL_KNOWN_CONTEXT_ID 1 + +/* + * Context ID used by host endpoints. + */ +#define VMCI_HOST_CONTEXT_ID 2 +#define VMCI_HOST_CONTEXT_INVALID_EVENT ((uintptr_t)~0) + +#define VMCI_CONTEXT_IS_VM(_cid) \ + (VMCI_INVALID_ID != _cid && _cid > VMCI_HOST_CONTEXT_ID) + +/* + * The VMCI_CONTEXT_RESOURCE_ID is used together with VMCI_MAKE_HANDLE to make + * handles that refer to a specific context. + */ +#define VMCI_CONTEXT_RESOURCE_ID 0 + +/* + *------------------------------------------------------------------------------ + * + * VMCI error codes. + * + *------------------------------------------------------------------------------ + */ + +#define VMCI_SUCCESS_QUEUEPAIR_ATTACH 5 +#define VMCI_SUCCESS_QUEUEPAIR_CREATE 4 +#define VMCI_SUCCESS_LAST_DETACH 3 +#define VMCI_SUCCESS_ACCESS_GRANTED 2 +#define VMCI_SUCCESS_ENTRY_DEAD 1 +#define VMCI_SUCCESS 0LL +#define VMCI_ERROR_INVALID_RESOURCE (-1) +#define VMCI_ERROR_INVALID_ARGS (-2) +#define VMCI_ERROR_NO_MEM (-3) +#define VMCI_ERROR_DATAGRAM_FAILED (-4) +#define VMCI_ERROR_MORE_DATA (-5) +#define VMCI_ERROR_NO_MORE_DATAGRAMS (-6) +#define VMCI_ERROR_NO_ACCESS (-7) +#define VMCI_ERROR_NO_HANDLE (-8) +#define VMCI_ERROR_DUPLICATE_ENTRY (-9) +#define VMCI_ERROR_DST_UNREACHABLE (-10) +#define VMCI_ERROR_PAYLOAD_TOO_LARGE (-11) +#define VMCI_ERROR_INVALID_PRIV (-12) +#define VMCI_ERROR_GENERIC (-13) +#define VMCI_ERROR_PAGE_ALREADY_SHARED (-14) +#define VMCI_ERROR_CANNOT_SHARE_PAGE (-15) +#define VMCI_ERROR_CANNOT_UNSHARE_PAGE (-16) +#define VMCI_ERROR_NO_PROCESS (-17) +#define VMCI_ERROR_NO_DATAGRAM (-18) +#define VMCI_ERROR_NO_RESOURCES (-19) +#define VMCI_ERROR_UNAVAILABLE (-20) +#define VMCI_ERROR_NOT_FOUND (-21) +#define VMCI_ERROR_ALREADY_EXISTS (-22) +#define VMCI_ERROR_NOT_PAGE_ALIGNED (-23) +#define VMCI_ERROR_INVALID_SIZE (-24) +#define VMCI_ERROR_REGION_ALREADY_SHARED (-25) +#define VMCI_ERROR_TIMEOUT (-26) +#define VMCI_ERROR_DATAGRAM_INCOMPLETE (-27) +#define VMCI_ERROR_INCORRECT_IRQL (-28) +#define VMCI_ERROR_EVENT_UNKNOWN (-29) +#define VMCI_ERROR_OBSOLETE (-30) +#define VMCI_ERROR_QUEUEPAIR_MISMATCH (-31) +#define VMCI_ERROR_QUEUEPAIR_NOTSET (-32) +#define VMCI_ERROR_QUEUEPAIR_NOTOWNER (-33) +#define VMCI_ERROR_QUEUEPAIR_NOTATTACHED (-34) +#define VMCI_ERROR_QUEUEPAIR_NOSPACE (-35) +#define VMCI_ERROR_QUEUEPAIR_NODATA (-36) +#define VMCI_ERROR_BUSMEM_INVALIDATION (-37) +#define VMCI_ERROR_MODULE_NOT_LOADED (-38) +#define VMCI_ERROR_DEVICE_NOT_FOUND (-39) +#define VMCI_ERROR_QUEUEPAIR_NOT_READY (-40) +#define VMCI_ERROR_WOULD_BLOCK (-41) + +/* VMCI clients should return error code withing this range */ +#define VMCI_ERROR_CLIENT_MIN (-500) +#define VMCI_ERROR_CLIENT_MAX (-550) + +/* Internal error codes. */ +#define VMCI_SHAREDMEM_ERROR_BAD_CONTEXT (-1000) + +#define VMCI_PATH_MAX 256 + +/* VMCI reserved events. */ +typedef uint32_t vmci_event_type; + +#define VMCI_EVENT_CTX_ID_UPDATE 0 // Only applicable to guest + // endpoints +#define VMCI_EVENT_CTX_REMOVED 1 // Applicable to guest and host +#define VMCI_EVENT_QP_RESUMED 2 // Only applicable to guest + // endpoints +#define VMCI_EVENT_QP_PEER_ATTACH 3 // Applicable to guest, host + // and VMX +#define VMCI_EVENT_QP_PEER_DETACH 4 // Applicable to guest, host + // and VMX +#define VMCI_EVENT_MEM_ACCESS_ON 5 // Applicable to VMX and vmk. On + // vmk, this event has the + // Context payload type +#define VMCI_EVENT_MEM_ACCESS_OFF 6 // Applicable to VMX and vmk. + // Same as above for the payload + // type +#define VMCI_EVENT_GUEST_PAUSED 7 // Applicable to vmk. This + // event has the Context + // payload type +#define VMCI_EVENT_GUEST_UNPAUSED 8 // Applicable to vmk. Same as + // above for the payload type. +#define VMCI_EVENT_MAX 9 + +/* + * Of the above events, a few are reserved for use in the VMX, and other + * endpoints (guest and host kernel) should not use them. For the rest of the + * events, we allow both host and guest endpoints to subscribe to them, to + * maintain the same API for host and guest endpoints. + */ + +#define VMCI_EVENT_VALID_VMX(_event) \ + (_event == VMCI_EVENT_QP_PEER_ATTACH || \ + _event == VMCI_EVENT_QP_PEER_DETACH || \ + _event == VMCI_EVENT_MEM_ACCESS_ON || \ + _event == VMCI_EVENT_MEM_ACCESS_OFF) + +#define VMCI_EVENT_VALID(_event) \ + (_event < VMCI_EVENT_MAX && \ + _event != VMCI_EVENT_MEM_ACCESS_ON && \ + _event != VMCI_EVENT_MEM_ACCESS_OFF && \ + _event != VMCI_EVENT_GUEST_PAUSED && \ + _event != VMCI_EVENT_GUEST_UNPAUSED) + +/* Reserved guest datagram resource ids. */ +#define VMCI_EVENT_HANDLER 0 + +/* + * VMCI coarse-grained privileges (per context or host process/endpoint. An + * entity with the restricted flag is only allowed to interact with the + * hypervisor and trusted entities. + */ +typedef uint32_t vmci_privilege_flags; + +#define VMCI_PRIVILEGE_FLAG_RESTRICTED 0x01 +#define VMCI_PRIVILEGE_FLAG_TRUSTED 0x02 +#define VMCI_PRIVILEGE_ALL_FLAGS \ + (VMCI_PRIVILEGE_FLAG_RESTRICTED | VMCI_PRIVILEGE_FLAG_TRUSTED) +#define VMCI_NO_PRIVILEGE_FLAGS 0x00 +#define VMCI_DEFAULT_PROC_PRIVILEGE_FLAGS VMCI_NO_PRIVILEGE_FLAGS +#define VMCI_LEAST_PRIVILEGE_FLAGS VMCI_PRIVILEGE_FLAG_RESTRICTED +#define VMCI_MAX_PRIVILEGE_FLAGS VMCI_PRIVILEGE_FLAG_TRUSTED + +/* 0 through VMCI_RESERVED_RESOURCE_ID_MAX are reserved. */ +#define VMCI_RESERVED_RESOURCE_ID_MAX 1023 + +#define VMCI_DOMAIN_NAME_MAXLEN 32 + +#define VMCI_LGPFX "vmci: " + +/* + * struct vmci_queue_header + * + * A Queue cannot stand by itself as designed. Each Queue's header contains a + * pointer into itself (the producer_tail) and into its peer (consumer_head). + * The reason for the separation is one of accessibility: Each end-point can + * modify two things: where the next location to enqueue is within its produce_q + * (producer_tail); and where the next dequeue location is in its consume_q + * (consumer_head). + * + * An end-point cannot modify the pointers of its peer (guest to guest; NOTE + * that in the host both queue headers are mapped r/w). But, each end-point + * needs read access to both Queue header structures in order to determine how + * much space is used (or left) in the Queue. This is because for an end-point + * to know how full its produce_q is, it needs to use the consumer_head that + * points into the produce_q but -that- consumer_head is in the Queue header + * for that end-points consume_q. + * + * Thoroughly confused? Sorry. + * + * producer_tail: the point to enqueue new entrants. When you approach a line + * in a store, for example, you walk up to the tail. + * + * consumer_head: the point in the queue from which the next element is + * dequeued. In other words, who is next in line is he who is at the head of + * the line. + * + * Also, producer_tail points to an empty byte in the Queue, whereas + * consumer_head points to a valid byte of data (unless producer_tail == + * consumer_head in which case consumerHead does not point to a valid byte of + * data). + * + * For a queue of buffer 'size' bytes, the tail and head pointers will be in + * the range [0, size-1]. + * + * If produce_q_header->producer_tail == consume_q_header->consumer_head then + * the produce_q is empty. + */ +struct vmci_queue_header { + /* All fields are 64bit and aligned. */ + struct vmci_handle handle; /* Identifier. */ + volatile uint64_t producer_tail; /* Offset in this queue. */ + volatile uint64_t consumer_head; /* Offset in peer queue. */ +}; + + +/* + * If one client of a QueuePair is a 32bit entity, we restrict the QueuePair + * size to be less than 4GB, and use 32bit atomic operations on the head and + * tail pointers. 64bit atomic read on a 32bit entity involves cmpxchg8b which + * is an atomic read-modify-write. This will cause traces to fire when a 32bit + * consumer tries to read the producer's tail pointer, for example, because the + * consumer has read-only access to the producer's tail pointer. + * + * We provide the following macros to invoke 32bit or 64bit atomic operations + * based on the architecture the code is being compiled on. + */ + +#ifdef __x86_64__ +#define QP_MAX_QUEUE_SIZE_ARCH CONST64U(0xffffffffffffffff) +#define qp_atomic_read_offset(x) atomic_load_64(x) +#define qp_atomic_write_offset(x, y) atomic_store_64(x, y) +#else /* __x86_64__ */ + /* + * Wrappers below are being used because atomic_store_ operates + * on a specific . Likewise for atomic_load_ + */ + + static inline uint32_t + type_safe_atomic_read_32(void *var) + { + return (atomic_load_32((volatile uint32_t *)(var))); + } + + static inline void + type_safe_atomic_write_32(void *var, uint32_t val) + { + atomic_store_32((volatile uint32_t *)(var), (uint32_t)(val)); + } + +#define QP_MAX_QUEUE_SIZE_ARCH CONST64U(0xffffffff) +#define qp_atomic_read_offset(x) type_safe_atomic_read_32((void *)(x)) +#define qp_atomic_write_offset(x, y) \ + type_safe_atomic_write_32((void *)(x), (uint32_t)(y)) +#endif /* __x86_64__ */ + +/* + *------------------------------------------------------------------------------ + * + * qp_add_pointer -- + * + * Helper to add a given offset to a head or tail pointer. Wraps the value + * of the pointer around the max size of the queue. + * + * Results: + * None. + * + * Side effects: + * None. + * + *------------------------------------------------------------------------------ + */ + +static inline void +qp_add_pointer(volatile uint64_t *var, size_t add, uint64_t size) +{ + uint64_t new_val = qp_atomic_read_offset(var); + + if (new_val >= size - add) + new_val -= size; + + new_val += add; + qp_atomic_write_offset(var, new_val); +} + +/* + *------------------------------------------------------------------------------ + * + * vmci_queue_header_producer_tail -- + * + * Helper routine to get the Producer Tail from the supplied queue. + * + * Results: + * The contents of the queue's producer tail. + * + * Side effects: + * None. + * + *------------------------------------------------------------------------------ + */ + +static inline uint64_t +vmci_queue_header_producer_tail(const struct vmci_queue_header *q_header) +{ + struct vmci_queue_header *qh = (struct vmci_queue_header *)q_header; + return (qp_atomic_read_offset(&qh->producer_tail)); +} + +/* + *------------------------------------------------------------------------------ + * + * vmci_queue_header_consumer_head -- + * + * Helper routine to get the Consumer Head from the supplied queue. + * + * Results: + * The contents of the queue's consumer tail. + * + * Side effects: + * None. + * + *------------------------------------------------------------------------------ + */ + +static inline uint64_t +vmci_queue_header_consumer_head(const struct vmci_queue_header *q_header) +{ + struct vmci_queue_header *qh = (struct vmci_queue_header *)q_header; + return (qp_atomic_read_offset(&qh->consumer_head)); +} + +/* + *------------------------------------------------------------------------------ + * + * vmci_queue_header_add_producer_tail -- + * + * Helper routine to increment the Producer Tail. Fundamentally, + * qp_add_pointer() is used to manipulate the tail itself. + * + * Results: + * None. + * + * Side effects: + * None. + * + *------------------------------------------------------------------------------ + */ + +static inline void +vmci_queue_header_add_producer_tail(struct vmci_queue_header *q_header, + size_t add, uint64_t queue_size) +{ + + qp_add_pointer(&q_header->producer_tail, add, queue_size); +} + +/* + *------------------------------------------------------------------------------ + * + * vmci_queue_header_add_consumer_head -- + * + * Helper routine to increment the Consumer Head. Fundamentally, + * qp_add_pointer() is used to manipulate the head itself. + * + * Results: + * None. + * + * Side effects: + * None. + * + *------------------------------------------------------------------------------ + */ + +static inline void +vmci_queue_header_add_consumer_head(struct vmci_queue_header *q_header, + size_t add, uint64_t queue_size) +{ + + qp_add_pointer(&q_header->consumer_head, add, queue_size); +} + +/* + *------------------------------------------------------------------------------ + * + * vmci_queue_header_get_pointers -- + * + * Helper routine for getting the head and the tail pointer for a queue. + * Both the VMCIQueues are needed to get both the pointers for one queue. + * + * Results: + * None. + * + * Side effects: + * None. + * + *------------------------------------------------------------------------------ + */ + +static inline void +vmci_queue_header_get_pointers(const struct vmci_queue_header *produce_q_header, + const struct vmci_queue_header *consume_q_header, uint64_t *producer_tail, + uint64_t *consumer_head) +{ + + if (producer_tail) + *producer_tail = + vmci_queue_header_producer_tail(produce_q_header); + + if (consumer_head) + *consumer_head = + vmci_queue_header_consumer_head(consume_q_header); +} + +/* + *------------------------------------------------------------------------------ + * + * vmci_queue_header_reset_pointers -- + * + * Reset the tail pointer (of "this" queue) and the head pointer (of "peer" + * queue). + * + * Results: + * None. + * + * Side effects: + * None. + * + *------------------------------------------------------------------------------ + */ + +static inline void +vmci_queue_header_reset_pointers(struct vmci_queue_header *q_header) +{ + + qp_atomic_write_offset(&q_header->producer_tail, CONST64U(0)); + qp_atomic_write_offset(&q_header->consumer_head, CONST64U(0)); +} + +/* + *------------------------------------------------------------------------------ + * + * vmci_queue_header_init -- + * + * Initializes a queue's state (head & tail pointers). + * + * Results: + * None. + * + * Side effects: + * None. + * + *------------------------------------------------------------------------------ + */ + +static inline void +vmci_queue_header_init(struct vmci_queue_header *q_header, + const struct vmci_handle handle) +{ + + q_header->handle = handle; + vmci_queue_header_reset_pointers(q_header); +} + +/* + *------------------------------------------------------------------------------ + * + * vmci_queue_header_free_space -- + * + * Finds available free space in a produce queue to enqueue more data or + * reports an error if queue pair corruption is detected. + * + * Results: + * Free space size in bytes or an error code. + * + * Side effects: + * None. + * + *------------------------------------------------------------------------------ + */ + +static inline int64_t +vmci_queue_header_free_space(const struct vmci_queue_header *produce_q_header, + const struct vmci_queue_header *consume_q_header, + const uint64_t produce_q_size) +{ + uint64_t free_space; + uint64_t head; + uint64_t tail; + + tail = vmci_queue_header_producer_tail(produce_q_header); + head = vmci_queue_header_consumer_head(consume_q_header); + + if (tail >= produce_q_size || head >= produce_q_size) + return (VMCI_ERROR_INVALID_SIZE); + + /* + * Deduct 1 to avoid tail becoming equal to head which causes ambiguity. + * If head and tail are equal it means that the queue is empty. + */ + + if (tail >= head) + free_space = produce_q_size - (tail - head) - 1; + else + free_space = head - tail - 1; + + return (free_space); +} + +/* + *------------------------------------------------------------------------------ + * + * vmci_queue_header_buf_ready -- + * + * vmci_queue_header_free_space() does all the heavy lifting of determing + * the number of free bytes in a Queue. This routine, then subtracts that + * size from the full size of the Queue so the caller knows how many bytes + * are ready to be dequeued. + * + * Results: + * On success, available data size in bytes (up to MAX_INT64). + * On failure, appropriate error code. + * + * Side effects: + * None. + * + *------------------------------------------------------------------------------ + */ + +static inline int64_t +vmci_queue_header_buf_ready(const struct vmci_queue_header *consume_q_header, + const struct vmci_queue_header *produce_q_header, + const uint64_t consume_q_size) +{ + int64_t free_space; + + free_space = vmci_queue_header_free_space(consume_q_header, + produce_q_header, consume_q_size); + if (free_space < VMCI_SUCCESS) + return (free_space); + else + return (consume_q_size - free_space - 1); +} + +#endif /* !_VMCI_DEFS_H_ */ Index: sys/dev/vmware/vmci/vmci_doorbell.h =================================================================== --- /dev/null +++ sys/dev/vmware/vmci/vmci_doorbell.h @@ -0,0 +1,27 @@ +/*- + * Copyright (c) 2018 VMware, Inc. All Rights Reserved. + * + * SPDX-License-Identifier: (BSD-2-Clause AND GPL-2.0) + */ + +/* Internal functions in the VMCI Doorbell API. */ + +#ifndef _VMCI_DOORBELL_H_ +#define _VMCI_DOORBELL_H_ + +#include "vmci_defs.h" + +int vmci_doorbell_init(void); +void vmci_doorbell_exit(void); +void vmci_doorbell_hibernate(bool enter_hibernate); +void vmci_doorbell_sync(void); + +int vmci_doorbell_host_context_notify(vmci_id src_CID, + struct vmci_handle handle); +int vmci_doorbell_get_priv_flags(struct vmci_handle handle, + vmci_privilege_flags *priv_flags); + +bool vmci_register_notification_bitmap(PPN bitmap_PPN); +void vmci_scan_notification_bitmap(uint8_t *bitmap); + +#endif /* !_VMCI_DOORBELL_H_ */ Index: sys/dev/vmware/vmci/vmci_doorbell.c =================================================================== --- /dev/null +++ sys/dev/vmware/vmci/vmci_doorbell.c @@ -0,0 +1,906 @@ +/*- + * Copyright (c) 2018 VMware, Inc. All Rights Reserved. + * + * SPDX-License-Identifier: (BSD-2-Clause AND GPL-2.0) + */ + +/* This file implements the VMCI doorbell API. */ + +#include + +#include "vmci_doorbell.h" +#include "vmci_driver.h" +#include "vmci_kernel_api.h" +#include "vmci_kernel_defs.h" +#include "vmci_resource.h" +#include "vmci_utils.h" + +#define LGPFX "vmci_doorbell: " + +#define VMCI_DOORBELL_INDEX_TABLE_SIZE 64 +#define VMCI_DOORBELL_HASH(_idx) \ + vmci_hash_id((_idx), VMCI_DOORBELL_INDEX_TABLE_SIZE) + +/* Describes a doorbell notification handle allocated by the host. */ +struct vmci_doorbell_entry { + struct vmci_resource resource; + uint32_t idx; + vmci_list_item(vmci_doorbell_entry) idx_list_item; + vmci_privilege_flags priv_flags; + bool is_doorbell; + bool run_delayed; + vmci_callback notify_cb; + void *client_data; + vmci_event destroy_event; + volatile int active; +}; + +struct vmci_doorbell_index_table { + vmci_lock lock; + vmci_list(vmci_doorbell_entry) entries[VMCI_DOORBELL_INDEX_TABLE_SIZE]; +}; + +/* The VMCI index table keeps track of currently registered doorbells. */ +static struct vmci_doorbell_index_table vmci_doorbell_it; + +/* + * The max_notify_idx is one larger than the currently known bitmap index in + * use, and is used to determine how much of the bitmap needs to be scanned. + */ +static uint32_t max_notify_idx; + +/* + * The notify_idx_count is used for determining whether there are free entries + * within the bitmap (if notify_idx_count + 1 < max_notify_idx). + */ +static uint32_t notify_idx_count; + +/* + * The last_notify_idx_reserved is used to track the last index handed out - in + * the case where multiple handles share a notification index, we hand out + * indexes round robin based on last_notify_idx_reserved. + */ +static uint32_t last_notify_idx_reserved; + +/* This is a one entry cache used to by the index allocation. */ +static uint32_t last_notify_idx_released = PAGE_SIZE; + +static void vmci_doorbell_free_cb(void *client_data); +static int vmci_doorbell_release_cb(void *client_data); +static void vmci_doorbell_delayed_dispatch_cb(void *data); + +/* + *------------------------------------------------------------------------------ + * + * vmci_doorbell_init -- + * + * General init code. + * + * Result: + * VMCI_SUCCESS on success, lock allocation error otherwise. + * + * Side effects: + * None. + * + *------------------------------------------------------------------------------ + */ + +int +vmci_doorbell_init(void) +{ + uint32_t bucket; + + for (bucket = 0; bucket < ARRAYSIZE(vmci_doorbell_it.entries); + ++bucket) + vmci_list_init(&vmci_doorbell_it.entries[bucket]); + + return (vmci_init_lock(&vmci_doorbell_it.lock, + "VMCI Doorbell index table lock")); +} + +/* + *------------------------------------------------------------------------------ + * + * vmci_doorbell_exit -- + * + * General exit code. + * + * Result: + * None. + * + * Side effects: + * None. + * + *------------------------------------------------------------------------------ + */ + +void +vmci_doorbell_exit(void) +{ + + vmci_cleanup_lock(&vmci_doorbell_it.lock); +} + +/* + *------------------------------------------------------------------------------ + * + * vmci_doorbell_free_cb -- + * + * Callback to free doorbell entry structure when resource is no longer used, + * i.e. the reference count reached 0. The entry is freed in + * vmci_doorbell_destroy(), which is waiting on the signal that gets fired + * here. + * + * Result: + * None. + * + * Side effects: + * Signals VMCI event. + * + *------------------------------------------------------------------------------ + */ + +static void +vmci_doorbell_free_cb(void *client_data) +{ + struct vmci_doorbell_entry *entry; + + entry = (struct vmci_doorbell_entry *)client_data; + ASSERT(entry); + vmci_signal_event(&entry->destroy_event); +} + +/* + *------------------------------------------------------------------------------ + * + * vmci_doorbell_release_cb -- + * + * Callback to release the resource reference. It is called by the + * vmci_wait_on_event function before it blocks. + * + * Result: + * Always 0. + * + * Side effects: + * None. + * + *------------------------------------------------------------------------------ + */ + +static int +vmci_doorbell_release_cb(void *client_data) +{ + struct vmci_doorbell_entry *entry; + + entry = (struct vmci_doorbell_entry *)client_data; + ASSERT(entry); + vmci_resource_release(&entry->resource); + return (0); +} + +/* + *------------------------------------------------------------------------------ + * + * vmci_doorbell_get_priv_flags -- + * + * Utility function that retrieves the privilege flags associated with a + * given doorbell handle. For guest endpoints, the privileges are determined + * by the context ID, but for host endpoints privileges are associated with + * the complete handle. Hypervisor endpoints are not yet supported. + * + * Result: + * VMCI_SUCCESS on success, + * VMCI_ERROR_NOT_FOUND if handle isn't found, + * VMCI_ERROR_INVALID_ARGS if handle is invalid. + * + * Side effects: + * None. + * + *------------------------------------------------------------------------------ + */ + +int +vmci_doorbell_get_priv_flags(struct vmci_handle handle, + vmci_privilege_flags *priv_flags) +{ + + if (priv_flags == NULL || handle.context == VMCI_INVALID_ID) + return (VMCI_ERROR_INVALID_ARGS); + + if (handle.context == VMCI_HOST_CONTEXT_ID) { + struct vmci_doorbell_entry *entry; + struct vmci_resource *resource; + + resource = vmci_resource_get(handle, + VMCI_RESOURCE_TYPE_DOORBELL); + if (resource == NULL) + return (VMCI_ERROR_NOT_FOUND); + entry = RESOURCE_CONTAINER( + resource, struct vmci_doorbell_entry, resource); + *priv_flags = entry->priv_flags; + vmci_resource_release(resource); + } else if (handle.context == VMCI_HYPERVISOR_CONTEXT_ID) { + /* Hypervisor endpoints for notifications are not supported. */ + return (VMCI_ERROR_INVALID_ARGS); + } else + *priv_flags = VMCI_NO_PRIVILEGE_FLAGS; + + return (VMCI_SUCCESS); +} + +/* + *------------------------------------------------------------------------------ + * + * vmci_doorbell_index_table_find -- + * + * Find doorbell entry by bitmap index. + * + * Results: + * Entry if found, NULL if not. + * + * Side effects: + * None. + * + *------------------------------------------------------------------------------ + */ + +static struct vmci_doorbell_entry * +vmci_doorbell_index_table_find(uint32_t idx) +{ + struct vmci_doorbell_entry *iter; + uint32_t bucket; + + bucket = VMCI_DOORBELL_HASH(idx); + + vmci_list_scan(iter, &vmci_doorbell_it.entries[bucket], idx_list_item) { + if (idx == iter->idx) + return (iter); + } + + return (NULL); +} + +/* + *------------------------------------------------------------------------------ + * + * vmci_doorbell_index_table_add -- + * + * Add the given entry to the index table. This will hold() the entry's + * resource so that the entry is not deleted before it is removed from the + * table. + * + * Results: + * None. + * + * Side effects: + * None. + * + *------------------------------------------------------------------------------ + */ + +static void +vmci_doorbell_index_table_add(struct vmci_doorbell_entry *entry) +{ + uint32_t bucket; + uint32_t new_notify_idx; + + ASSERT(entry); + + vmci_resource_hold(&entry->resource); + + vmci_grab_lock_bh(&vmci_doorbell_it.lock); + + /* + * Below we try to allocate an index in the notification bitmap with + * "not too much" sharing between resources. If we use less that the + * full bitmap, we either add to the end if there are no unused flags + * within the currently used area, or we search for unused ones. If we + * use the full bitmap, we allocate the index round robin. + */ + + if (max_notify_idx < PAGE_SIZE || notify_idx_count < PAGE_SIZE) { + if (last_notify_idx_released < max_notify_idx && + !vmci_doorbell_index_table_find(last_notify_idx_released)) { + new_notify_idx = last_notify_idx_released; + last_notify_idx_released = PAGE_SIZE; + } else { + bool reused = false; + new_notify_idx = last_notify_idx_reserved; + if (notify_idx_count + 1 < max_notify_idx) { + do { + if (!vmci_doorbell_index_table_find( + new_notify_idx)) { + reused = true; + break; + } + new_notify_idx = (new_notify_idx + 1) % + max_notify_idx; + } while (new_notify_idx != + last_notify_idx_released); + } + if (!reused) { + new_notify_idx = max_notify_idx; + max_notify_idx++; + } + } + } else { + new_notify_idx = (last_notify_idx_reserved + 1) % PAGE_SIZE; + } + last_notify_idx_reserved = new_notify_idx; + notify_idx_count++; + + entry->idx = new_notify_idx; + bucket = VMCI_DOORBELL_HASH(entry->idx); + vmci_list_insert(&vmci_doorbell_it.entries[bucket], entry, + idx_list_item); + + vmci_release_lock_bh(&vmci_doorbell_it.lock); +} + +/* + *------------------------------------------------------------------------------ + * + * vmci_doorbell_index_table_remove -- + * + * Remove the given entry from the index table. This will release() the + * entry's resource. + * + * Results: + * None. + * + * Side effects: + * None. + * + *------------------------------------------------------------------------------ + */ + +static void +vmci_doorbell_index_table_remove(struct vmci_doorbell_entry *entry) +{ + ASSERT(entry); + + vmci_grab_lock_bh(&vmci_doorbell_it.lock); + + vmci_list_remove(entry, idx_list_item); + + notify_idx_count--; + if (entry->idx == max_notify_idx - 1) { + /* + * If we delete an entry with the maximum known notification + * index, we take the opportunity to prune the current max. As + * there might be other unused indices immediately below, we + * lower the maximum until we hit an index in use + */ + + while (max_notify_idx > 0 && + !vmci_doorbell_index_table_find(max_notify_idx - 1)) + max_notify_idx--; + } + last_notify_idx_released = entry->idx; + + vmci_release_lock_bh(&vmci_doorbell_it.lock); + + vmci_resource_release(&entry->resource); +} + +/* + *------------------------------------------------------------------------------ + * + * vmci_doorbell_link -- + * + * Creates a link between the given doorbell handle and the given index in + * the bitmap in the device backend. + * + * Results: + * VMCI_SUCCESS if success, appropriate error code otherwise. + * + * Side effects: + * Notification state is created in hypervisor. + * + *------------------------------------------------------------------------------ + */ + +static int +vmci_doorbell_link(struct vmci_handle handle, bool is_doorbell, + uint32_t notify_idx) +{ + struct vmci_doorbell_link_msg link_msg; + vmci_id resource_id; + + ASSERT(!VMCI_HANDLE_INVALID(handle)); + + if (is_doorbell) + resource_id = VMCI_DOORBELL_LINK; + else { + ASSERT(false); + return (VMCI_ERROR_UNAVAILABLE); + } + + link_msg.hdr.dst = VMCI_MAKE_HANDLE(VMCI_HYPERVISOR_CONTEXT_ID, + resource_id); + link_msg.hdr.src = VMCI_ANON_SRC_HANDLE; + link_msg.hdr.payload_size = sizeof(link_msg) - VMCI_DG_HEADERSIZE; + link_msg.handle = handle; + link_msg.notify_idx = notify_idx; + + return (vmci_send_datagram((struct vmci_datagram *)&link_msg)); +} + +/* + *------------------------------------------------------------------------------ + * + * vmci_doorbell_unlink -- + * + * Unlinks the given doorbell handle from an index in the bitmap in the + * device backend. + * + * Results: + * VMCI_SUCCESS if success, appropriate error code otherwise. + * + * Side effects: + * Notification state is destroyed in hypervisor. + * + *------------------------------------------------------------------------------ + */ + +static int +vmci_doorbell_unlink(struct vmci_handle handle, bool is_doorbell) +{ + struct vmci_doorbell_unlink_msg unlink_msg; + vmci_id resource_id; + + ASSERT(!VMCI_HANDLE_INVALID(handle)); + + if (is_doorbell) + resource_id = VMCI_DOORBELL_UNLINK; + else { + ASSERT(false); + return (VMCI_ERROR_UNAVAILABLE); + } + + unlink_msg.hdr.dst = VMCI_MAKE_HANDLE(VMCI_HYPERVISOR_CONTEXT_ID, + resource_id); + unlink_msg.hdr.src = VMCI_ANON_SRC_HANDLE; + unlink_msg.hdr.payload_size = sizeof(unlink_msg) - VMCI_DG_HEADERSIZE; + unlink_msg.handle = handle; + + return (vmci_send_datagram((struct vmci_datagram *)&unlink_msg)); +} + +/* + *------------------------------------------------------------------------------ + * + * vmci_doorbell_create -- + * + * Creates a doorbell with the given callback. If the handle is + * VMCI_INVALID_HANDLE, a free handle will be assigned, if possible. The + * callback can be run immediately (potentially with locks held - the + * default) or delayed (in a kernel thread) by specifying the flag + * VMCI_FLAG_DELAYED_CB. If delayed execution is selected, a given callback + * may not be run if the kernel is unable to allocate memory for the delayed + * execution (highly unlikely). + * + * Results: + * VMCI_SUCCESS on success, appropriate error code otherwise. + * + * Side effects: + * None. + * + *------------------------------------------------------------------------------ + */ + +int +vmci_doorbell_create(struct vmci_handle *handle, uint32_t flags, + vmci_privilege_flags priv_flags, vmci_callback notify_cb, void *client_data) +{ + struct vmci_doorbell_entry *entry; + struct vmci_handle new_handle; + int result; + + if (!handle || !notify_cb || flags & ~VMCI_FLAG_DELAYED_CB || + priv_flags & ~VMCI_PRIVILEGE_ALL_FLAGS) + return (VMCI_ERROR_INVALID_ARGS); + + entry = vmci_alloc_kernel_mem(sizeof(*entry), VMCI_MEMORY_NORMAL); + if (entry == NULL) { + VMCI_LOG_WARNING(LGPFX"Failed allocating memory for datagram " + "entry.\n"); + return (VMCI_ERROR_NO_MEM); + } + + if (!vmci_can_schedule_delayed_work() && + (flags & VMCI_FLAG_DELAYED_CB)) { + result = VMCI_ERROR_INVALID_ARGS; + goto free_mem; + } + + if (VMCI_HANDLE_INVALID(*handle)) { + vmci_id context_id; + + context_id = vmci_get_context_id(); + vmci_id resource_id = vmci_resource_get_id(context_id); + if (resource_id == VMCI_INVALID_ID) { + result = VMCI_ERROR_NO_HANDLE; + goto free_mem; + } + new_handle = VMCI_MAKE_HANDLE(context_id, resource_id); + } else { + if (VMCI_INVALID_ID == handle->resource) { + VMCI_LOG_DEBUG(LGPFX"Invalid argument " + "(handle=0x%x:0x%x).\n", handle->context, + handle->resource); + result = VMCI_ERROR_INVALID_ARGS; + goto free_mem; + } + new_handle = *handle; + } + + entry->idx = 0; + entry->priv_flags = priv_flags; + entry->is_doorbell = true; + entry->run_delayed = (flags & VMCI_FLAG_DELAYED_CB) ? true : false; + entry->notify_cb = notify_cb; + entry->client_data = client_data; + atomic_store_int(&entry->active, 0); + vmci_create_event(&entry->destroy_event); + + result = vmci_resource_add(&entry->resource, + VMCI_RESOURCE_TYPE_DOORBELL, new_handle, vmci_doorbell_free_cb, + entry); + if (result != VMCI_SUCCESS) { + VMCI_LOG_WARNING(LGPFX"Failed to add new resource " + "(handle=0x%x:0x%x).\n", new_handle.context, + new_handle.resource); + if (result == VMCI_ERROR_DUPLICATE_ENTRY) + result = VMCI_ERROR_ALREADY_EXISTS; + + goto destroy; + } + + vmci_doorbell_index_table_add(entry); + result = vmci_doorbell_link(new_handle, entry->is_doorbell, entry->idx); + if (VMCI_SUCCESS != result) + goto destroy_resource; + atomic_store_int(&entry->active, 1); + + if (VMCI_HANDLE_INVALID(*handle)) + *handle = new_handle; + + return (result); + +destroy_resource: + vmci_doorbell_index_table_remove(entry); + vmci_resource_remove(new_handle, VMCI_RESOURCE_TYPE_DOORBELL); +destroy: + vmci_destroy_event(&entry->destroy_event); +free_mem: + vmci_free_kernel_mem(entry, sizeof(*entry)); + return (result); +} + +/* + *------------------------------------------------------------------------------ + * + * vmci_doorbell_destroy -- + * + * Destroys a doorbell previously created with vmci_doorbell_create. This + * operation may block waiting for a callback to finish. + * + * Results: + * VMCI_SUCCESS on success, appropriate error code otherwise. + * + * Side effects: + * May block. + * + *------------------------------------------------------------------------------ + */ + +int +vmci_doorbell_destroy(struct vmci_handle handle) +{ + struct vmci_doorbell_entry *entry; + struct vmci_resource *resource; + int result; + + if (VMCI_HANDLE_INVALID(handle)) + return (VMCI_ERROR_INVALID_ARGS); + + resource = vmci_resource_get(handle, VMCI_RESOURCE_TYPE_DOORBELL); + if (resource == NULL) { + VMCI_LOG_DEBUG(LGPFX"Failed to destroy doorbell " + "(handle=0x%x:0x%x).\n", handle.context, handle.resource); + return (VMCI_ERROR_NOT_FOUND); + } + entry = RESOURCE_CONTAINER(resource, struct vmci_doorbell_entry, + resource); + + vmci_doorbell_index_table_remove(entry); + + result = vmci_doorbell_unlink(handle, entry->is_doorbell); + if (VMCI_SUCCESS != result) { + + /* + * The only reason this should fail would be an inconsistency + * between guest and hypervisor state, where the guest believes + * it has an active registration whereas the hypervisor doesn't. + * One case where this may happen is if a doorbell is + * unregistered following a hibernation at a time where the + * doorbell state hasn't been restored on the hypervisor side + * yet. Since the handle has now been removed in the guest, + * we just print a warning and return success. + */ + + VMCI_LOG_DEBUG(LGPFX"Unlink of %s (handle=0x%x:0x%x) unknown " + "by hypervisor (error=%d).\n", + entry->is_doorbell ? "doorbell" : "queuepair", + handle.context, handle.resource, result); + } + + /* + * Now remove the resource from the table. It might still be in use + * after this, in a callback or still on the delayed work queue. + */ + + vmci_resource_remove(handle, VMCI_RESOURCE_TYPE_DOORBELL); + + /* + * We now wait on the destroyEvent and release the reference we got + * above. + */ + + vmci_wait_on_event(&entry->destroy_event, vmci_doorbell_release_cb, + entry); + + /* + * We know that we are now the only reference to the above entry so + * can safely free it. + */ + + vmci_destroy_event(&entry->destroy_event); + vmci_free_kernel_mem(entry, sizeof(*entry)); + + return (VMCI_SUCCESS); +} + +/* + *------------------------------------------------------------------------------ + * + * vmci_doorbell_notify_as_guest -- + * + * Notify another guest or the host. We send a datagram down to the host + * via the hypervisor with the notification info. + * + * Results: + * VMCI_SUCCESS on success, appropriate error code otherwise. + * + * Side effects: + * May do a hypercall. + * + *------------------------------------------------------------------------------ + */ + +static int +vmci_doorbell_notify_as_guest(struct vmci_handle handle, + vmci_privilege_flags priv_flags) +{ + struct vmci_doorbell_notify_msg notify_msg; + + notify_msg.hdr.dst = VMCI_MAKE_HANDLE(VMCI_HYPERVISOR_CONTEXT_ID, + VMCI_DOORBELL_NOTIFY); + notify_msg.hdr.src = VMCI_ANON_SRC_HANDLE; + notify_msg.hdr.payload_size = sizeof(notify_msg) - VMCI_DG_HEADERSIZE; + notify_msg.handle = handle; + + return (vmci_send_datagram((struct vmci_datagram *)¬ify_msg)); +} + +/* + *------------------------------------------------------------------------------ + * + * vmci_doorbell_notify -- + * + * Generates a notification on the doorbell identified by the handle. For + * host side generation of notifications, the caller can specify what the + * privilege of the calling side is. + * + * Results: + * VMCI_SUCCESS on success, appropriate error code otherwise. + * + * Side effects: + * May do a hypercall. + * + *------------------------------------------------------------------------------ + */ + +int +vmci_doorbell_notify(struct vmci_handle dst, vmci_privilege_flags priv_flags) +{ + struct vmci_handle src; + + if (VMCI_HANDLE_INVALID(dst) || + (priv_flags & ~VMCI_PRIVILEGE_ALL_FLAGS)) + return (VMCI_ERROR_INVALID_ARGS); + + src = VMCI_INVALID_HANDLE; + + return (vmci_doorbell_notify_as_guest(dst, priv_flags)); +} + +/* + *------------------------------------------------------------------------------ + * + * vmci_doorbell_delayed_dispatch_cb -- + * + * Calls the specified callback in a delayed context. + * + * Results: + * None. + * + * Side effects: + * None. + * + *------------------------------------------------------------------------------ + */ + +static void +vmci_doorbell_delayed_dispatch_cb(void *data) +{ + struct vmci_doorbell_entry *entry = (struct vmci_doorbell_entry *)data; + + ASSERT(data); + + entry->notify_cb(entry->client_data); + + vmci_resource_release(&entry->resource); +} + +/* + *------------------------------------------------------------------------------ + * + * vmci_doorbell_sync -- + * + * Use this as a synchronization point when setting globals, for example, + * during device shutdown. + * + * Results: + * None. + * + * Side effects: + * None. + * + *------------------------------------------------------------------------------ + */ + +void +vmci_doorbell_sync(void) +{ + + vmci_grab_lock_bh(&vmci_doorbell_it.lock); + vmci_release_lock_bh(&vmci_doorbell_it.lock); + vmci_resource_sync(); +} + +/* + *------------------------------------------------------------------------------ + * + * vmci_register_notification_bitmap -- + * + * Register the notification bitmap with the host. + * + * Results: + * true if the bitmap is registered successfully with the device, false + * otherwise. + * + * Side effects: + * None. + * + *------------------------------------------------------------------------------ + */ + +bool +vmci_register_notification_bitmap(PPN bitmap_ppn) +{ + struct vmci_notify_bitmap_set_msg bitmap_set_msg; + int result; + + /* + * Do not ASSERT() on the guest device here. This function can get + * called during device initialization, so the ASSERT() will fail even + * though the device is (almost) up. + */ + + bitmap_set_msg.hdr.dst = VMCI_MAKE_HANDLE(VMCI_HYPERVISOR_CONTEXT_ID, + VMCI_SET_NOTIFY_BITMAP); + bitmap_set_msg.hdr.src = VMCI_ANON_SRC_HANDLE; + bitmap_set_msg.hdr.payload_size = + sizeof(bitmap_set_msg) - VMCI_DG_HEADERSIZE; + bitmap_set_msg.bitmap_ppn = bitmap_ppn; + + result = vmci_send_datagram((struct vmci_datagram *)&bitmap_set_msg); + if (result != VMCI_SUCCESS) { + VMCI_LOG_DEBUG(LGPFX"Failed to register (PPN=%u) as " + "notification bitmap (error=%d).\n", + bitmap_ppn, result); + return (false); + } + return (true); +} + +/* + *------------------------------------------------------------------------------ + * + * vmci_doorbell_fire_entries -- + * + * Executes or schedules the handlers for a given notify index. + * + * Result: + * Notification hash entry if found. NULL otherwise. + * + * Side effects: + * Whatever the side effects of the handlers are. + * + *------------------------------------------------------------------------------ + */ + +static void +vmci_doorbell_fire_entries(uint32_t notify_idx) +{ + struct vmci_doorbell_entry *iter; + uint32_t bucket = VMCI_DOORBELL_HASH(notify_idx); + + vmci_grab_lock_bh(&vmci_doorbell_it.lock); + + vmci_list_scan(iter, &vmci_doorbell_it.entries[bucket], idx_list_item) { + if (iter->idx == notify_idx && + atomic_load_int(&iter->active) == 1) { + ASSERT(iter->notify_cb); + if (iter->run_delayed) { + int err; + + vmci_resource_hold(&iter->resource); + err = vmci_schedule_delayed_work( + vmci_doorbell_delayed_dispatch_cb, iter); + if (err != VMCI_SUCCESS) { + vmci_resource_release(&iter->resource); + goto out; + } + } else + iter->notify_cb(iter->client_data); + } + } + +out: + vmci_release_lock_bh(&vmci_doorbell_it.lock); +} + +/* + *------------------------------------------------------------------------------ + * + * vmci_scan_notification_bitmap -- + * + * Scans the notification bitmap, collects pending notifications, resets + * the bitmap and invokes appropriate callbacks. + * + * Results: + * None. + * + * Side effects: + * May schedule tasks, allocate memory and run callbacks. + * + *------------------------------------------------------------------------------ + */ + +void +vmci_scan_notification_bitmap(uint8_t *bitmap) +{ + uint32_t idx; + + ASSERT(bitmap); + + for (idx = 0; idx < max_notify_idx; idx++) { + if (bitmap[idx] & 0x1) { + bitmap[idx] &= ~1; + vmci_doorbell_fire_entries(idx); + } + } +} Index: sys/dev/vmware/vmci/vmci_driver.h =================================================================== --- /dev/null +++ sys/dev/vmware/vmci/vmci_driver.h @@ -0,0 +1,43 @@ +/*- + * Copyright (c) 2018 VMware, Inc. All Rights Reserved. + * + * SPDX-License-Identifier: (BSD-2-Clause AND GPL-2.0) + */ + +/* VMCI driver interface. */ + +#ifndef _VMCI_DRIVER_H_ +#define _VMCI_DRIVER_H_ + +#include +#include +#include + +#include "vmci_call_defs.h" +#include "vmci_kernel_if.h" + +#ifndef VMCI_DEBUG_LOGGING +#define VMCI_LOG_DEBUG(_args, ...) +#else /* VMCI_DEBUG_LOGGING */ +#define VMCI_LOG_DEBUG(_args, ...) \ + log(LOG_DEBUG, _args, ##__VA_ARGS__) +#endif /* !VMCI_DEBUG_LOGGING */ +#define VMCI_LOG_INFO(_args, ...) \ + log(LOG_INFO, _args, ##__VA_ARGS__) +#define VMCI_LOG_WARNING(_args, ...) \ + log(LOG_WARNING, _args, ##__VA_ARGS__) +#define VMCI_LOG_ERROR(_args, ...) \ + log(LOG_ERR, _args, ##__VA_ARGS__) + +int vmci_components_init(void); +void vmci_components_cleanup(void); +int vmci_send_datagram(struct vmci_datagram *dg); + +void vmci_util_init(void); +void vmci_util_exit(void); +bool vmci_check_host_capabilities(void); +void vmci_read_datagrams_from_port(vmci_io_handle io_handle, + vmci_io_port dg_in_port, uint8_t *dg_in_buffer, + size_t dg_in_buffer_size); + +#endif /* !_VMCI_DRIVER_H_ */ Index: sys/dev/vmware/vmci/vmci_driver.c =================================================================== --- /dev/null +++ sys/dev/vmware/vmci/vmci_driver.c @@ -0,0 +1,481 @@ +/*- + * Copyright (c) 2018 VMware, Inc. All Rights Reserved. + * + * SPDX-License-Identifier: (BSD-2-Clause AND GPL-2.0) + */ + +/* VMCI initialization. */ + +#include "vmci.h" +#include "vmci_doorbell.h" +#include "vmci_driver.h" +#include "vmci_event.h" +#include "vmci_kernel_api.h" +#include "vmci_kernel_defs.h" +#include "vmci_resource.h" + +#define LGPFX "vmci: " +#define VMCI_UTIL_NUM_RESOURCES 1 + +static vmci_id ctx_update_sub_id = VMCI_INVALID_ID; +static volatile int vm_context_id = VMCI_INVALID_ID; + +/* + *------------------------------------------------------------------------------ + * + * vmci_util_cid_update -- + * + * Gets called with the new context id if updated or resumed. + * + * Results: + * Context id. + * + * Side effects: + * None. + * + *------------------------------------------------------------------------------ + */ + +static void +vmci_util_cid_update(vmci_id sub_id, struct vmci_event_data *event_data, + void *client_data) +{ + struct vmci_event_payload_context *ev_payload; + + ev_payload = vmci_event_data_payload(event_data); + + if (sub_id != ctx_update_sub_id) { + VMCI_LOG_DEBUG(LGPFX"Invalid subscriber (ID=0x%x).\n", sub_id); + return; + } + if (event_data == NULL || ev_payload->context_id == VMCI_INVALID_ID) { + VMCI_LOG_DEBUG(LGPFX"Invalid event data.\n"); + return; + } + VMCI_LOG_INFO(LGPFX"Updating context from (ID=0x%x) to (ID=0x%x) on " + "event (type=%d).\n", atomic_load_int(&vm_context_id), + ev_payload->context_id, event_data->event); + atomic_store_int(&vm_context_id, ev_payload->context_id); +} + +/* + *------------------------------------------------------------------------------ + * + * vmci_util_init -- + * + * Subscribe to context id update event. + * + * Results: + * None. + * + * Side effects: + * None. + * + *------------------------------------------------------------------------------ + */ + +void +vmci_util_init(void) +{ + + /* + * We subscribe to the VMCI_EVENT_CTX_ID_UPDATE here so we can update + * the internal context id when needed. + */ + if (vmci_event_subscribe(VMCI_EVENT_CTX_ID_UPDATE, + vmci_util_cid_update, NULL, &ctx_update_sub_id) < VMCI_SUCCESS) { + VMCI_LOG_WARNING(LGPFX"Failed to subscribe to event " + "(type=%d).\n", VMCI_EVENT_CTX_ID_UPDATE); + } +} + +/* + *------------------------------------------------------------------------------ + * + * vmci_util_exit -- + * + * Cleanup + * + * Results: + * None. + * + * Side effects: + * None. + * + *------------------------------------------------------------------------------ + */ + +void +vmci_util_exit(void) +{ + + if (vmci_event_unsubscribe(ctx_update_sub_id) < VMCI_SUCCESS) + VMCI_LOG_WARNING(LGPFX"Failed to unsubscribe to event " + "(type=%d) with subscriber (ID=0x%x).\n", + VMCI_EVENT_CTX_ID_UPDATE, ctx_update_sub_id); +} + +/* + *------------------------------------------------------------------------------ + * + * vmci_util_check_host_capabilities -- + * + * Verify that the host supports the hypercalls we need. If it does not, try + * to find fallback hypercalls and use those instead. + * + * Results: + * true if required hypercalls (or fallback hypercalls) are supported by the + * host, false otherwise. + * + * Side effects: + * None. + * + *------------------------------------------------------------------------------ + */ + +static bool +vmci_util_check_host_capabilities(void) +{ + struct vmci_resources_query_msg *msg; + struct vmci_datagram *check_msg; + int result; + uint32_t msg_size; + + msg_size = sizeof(struct vmci_resources_query_hdr) + + VMCI_UTIL_NUM_RESOURCES * sizeof(vmci_resource); + check_msg = vmci_alloc_kernel_mem(msg_size, VMCI_MEMORY_NORMAL); + + if (check_msg == NULL) { + VMCI_LOG_WARNING(LGPFX"Check host: Insufficient memory.\n"); + return (false); + } + + check_msg->dst = VMCI_MAKE_HANDLE(VMCI_HYPERVISOR_CONTEXT_ID, + VMCI_RESOURCES_QUERY); + check_msg->src = VMCI_ANON_SRC_HANDLE; + check_msg->payload_size = msg_size - VMCI_DG_HEADERSIZE; + msg = (struct vmci_resources_query_msg *)VMCI_DG_PAYLOAD(check_msg); + + msg->num_resources = VMCI_UTIL_NUM_RESOURCES; + msg->resources[0] = VMCI_GET_CONTEXT_ID; + + result = vmci_send_datagram(check_msg); + vmci_free_kernel_mem(check_msg, msg_size); + + /* We need the vector. There are no fallbacks. */ + return (result == 0x1); +} + +/* + *------------------------------------------------------------------------------ + * + * vmci_check_host_capabilities -- + * + * Tell host which guestcalls we support and let each API check that the + * host supports the hypercalls it needs. If a hypercall is not supported, + * the API can check for a fallback hypercall, or fail the check. + * + * Results: + * true if successful, false otherwise. + * + * Side effects: + * Fallback mechanisms may be enabled in the API and vmmon. + * + *------------------------------------------------------------------------------ + */ + +bool +vmci_check_host_capabilities(void) +{ + bool result; + + result = vmci_event_check_host_capabilities(); + result &= vmci_datagram_check_host_capabilities(); + result &= vmci_util_check_host_capabilities(); + + if (!result) { + /* + * If it failed, then make sure this goes to the system event + * log. + */ + VMCI_LOG_WARNING(LGPFX"Host capability checked failed.\n"); + } else + VMCI_LOG_DEBUG(LGPFX"Host capability check passed.\n"); + + return (result); +} + +/* + *------------------------------------------------------------------------------ + * + * vmci_read_datagrams_from_port -- + * + * Reads datagrams from the data in port and dispatches them. We always + * start reading datagrams into only the first page of the datagram buffer. + * If the datagrams don't fit into one page, we use the maximum datagram + * buffer size for the remainder of the invocation. This is a simple + * heuristic for not penalizing small datagrams. + * + * This function assumes that it has exclusive access to the data in port + * for the duration of the call. + * + * Results: + * No result. + * + * Side effects: + * Datagram handlers may be invoked. + * + *------------------------------------------------------------------------------ + */ + +void +vmci_read_datagrams_from_port(vmci_io_handle io_handle, vmci_io_port dg_in_port, + uint8_t *dg_in_buffer, size_t dg_in_buffer_size) +{ + struct vmci_datagram *dg; + size_t current_dg_in_buffer_size; + size_t remaining_bytes; + + current_dg_in_buffer_size = PAGE_SIZE; + + ASSERT(dg_in_buffer_size >= PAGE_SIZE); + + vmci_read_port_bytes(io_handle, dg_in_port, dg_in_buffer, + current_dg_in_buffer_size); + dg = (struct vmci_datagram *)dg_in_buffer; + remaining_bytes = current_dg_in_buffer_size; + + while (dg->dst.resource != VMCI_INVALID_ID || + remaining_bytes > PAGE_SIZE) { + size_t dg_in_size; + + /* + * When the input buffer spans multiple pages, a datagram can + * start on any page boundary in the buffer. + */ + + if (dg->dst.resource == VMCI_INVALID_ID) { + ASSERT(remaining_bytes > PAGE_SIZE); + dg = (struct vmci_datagram *)ROUNDUP((uintptr_t)dg + 1, + PAGE_SIZE); + ASSERT((uint8_t *)dg < dg_in_buffer + + current_dg_in_buffer_size); + remaining_bytes = (size_t)(dg_in_buffer + + current_dg_in_buffer_size - (uint8_t *)dg); + continue; + } + + dg_in_size = VMCI_DG_SIZE_ALIGNED(dg); + + if (dg_in_size <= dg_in_buffer_size) { + int result; + + /* + * If the remaining bytes in the datagram buffer doesn't + * contain the complete datagram, we first make sure we + * have enough room for it and then we read the reminder + * of the datagram and possibly any following datagrams. + */ + + if (dg_in_size > remaining_bytes) { + + if (remaining_bytes != + current_dg_in_buffer_size) { + + /* + * We move the partial datagram to the + * front and read the reminder of the + * datagram and possibly following calls + * into the following bytes. + */ + + memmove(dg_in_buffer, dg_in_buffer + + current_dg_in_buffer_size - + remaining_bytes, + remaining_bytes); + + dg = (struct vmci_datagram *) + dg_in_buffer; + } + + if (current_dg_in_buffer_size != + dg_in_buffer_size) + current_dg_in_buffer_size = + dg_in_buffer_size; + + vmci_read_port_bytes(io_handle, dg_in_port, + dg_in_buffer + remaining_bytes, + current_dg_in_buffer_size - + remaining_bytes); + } + + /* + * We special case event datagrams from the + * hypervisor. + */ + if (dg->src.context == VMCI_HYPERVISOR_CONTEXT_ID && + dg->dst.resource == VMCI_EVENT_HANDLER) + result = vmci_event_dispatch(dg); + else + result = + vmci_datagram_invoke_guest_handler(dg); + if (result < VMCI_SUCCESS) + VMCI_LOG_DEBUG(LGPFX"Datagram with resource" + " (ID=0x%x) failed (err=%d).\n", + dg->dst.resource, result); + + /* On to the next datagram. */ + dg = (struct vmci_datagram *)((uint8_t *)dg + + dg_in_size); + } else { + size_t bytes_to_skip; + + /* + * Datagram doesn't fit in datagram buffer of maximal + * size. We drop it. + */ + + VMCI_LOG_DEBUG(LGPFX"Failed to receive datagram " + "(size=%zu bytes).\n", dg_in_size); + + bytes_to_skip = dg_in_size - remaining_bytes; + if (current_dg_in_buffer_size != dg_in_buffer_size) + current_dg_in_buffer_size = dg_in_buffer_size; + for (;;) { + vmci_read_port_bytes(io_handle, dg_in_port, + dg_in_buffer, current_dg_in_buffer_size); + if (bytes_to_skip <= + current_dg_in_buffer_size) + break; + bytes_to_skip -= current_dg_in_buffer_size; + } + dg = (struct vmci_datagram *)(dg_in_buffer + + bytes_to_skip); + } + + remaining_bytes = (size_t) (dg_in_buffer + + current_dg_in_buffer_size - (uint8_t *)dg); + + if (remaining_bytes < VMCI_DG_HEADERSIZE) { + /* Get the next batch of datagrams. */ + + vmci_read_port_bytes(io_handle, dg_in_port, + dg_in_buffer, current_dg_in_buffer_size); + dg = (struct vmci_datagram *)dg_in_buffer; + remaining_bytes = current_dg_in_buffer_size; + } + } +} + +/* + *------------------------------------------------------------------------------ + * + * vmci_get_context_id -- + * + * Returns the current context ID. Note that since this is accessed only + * from code running in the host, this always returns the host context ID. + * + * Results: + * Context ID. + * + * Side effects: + * None. + * + *------------------------------------------------------------------------------ + */ + +vmci_id +vmci_get_context_id(void) +{ + if (atomic_load_int(&vm_context_id) == VMCI_INVALID_ID) { + uint32_t result; + struct vmci_datagram get_cid_msg; + get_cid_msg.dst = VMCI_MAKE_HANDLE(VMCI_HYPERVISOR_CONTEXT_ID, + VMCI_GET_CONTEXT_ID); + get_cid_msg.src = VMCI_ANON_SRC_HANDLE; + get_cid_msg.payload_size = 0; + result = vmci_send_datagram(&get_cid_msg); + atomic_store_int(&vm_context_id, result); + } + return (atomic_load_int(&vm_context_id)); +} + +/* + *------------------------------------------------------------------------------ + * + * vmci_components_init -- + * + * Initializes VMCI components and registers core hypercalls. + * + * Results: + * VMCI_SUCCESS if successful, appropriate error code otherwise. + * + * Side effects: + * None. + * + *------------------------------------------------------------------------------ + */ + +int +vmci_components_init(void) +{ + int result; + + result = vmci_resource_init(); + if (result < VMCI_SUCCESS) { + VMCI_LOG_WARNING(LGPFX"Failed to initialize vmci_resource " + "(result=%d).\n", result); + goto error_exit; + } + + result = vmci_event_init(); + if (result < VMCI_SUCCESS) { + VMCI_LOG_WARNING(LGPFX"Failed to initialize vmci_event " + "(result=%d).\n", result); + goto resource_exit; + } + + result = vmci_doorbell_init(); + if (result < VMCI_SUCCESS) { + VMCI_LOG_WARNING(LGPFX"Failed to initialize vmci_doorbell " + "(result=%d).\n", result); + goto event_exit; + } + + VMCI_LOG_DEBUG(LGPFX"components initialized.\n"); + return (VMCI_SUCCESS); + +event_exit: + vmci_event_exit(); + +resource_exit: + vmci_resource_exit(); + +error_exit: + return (result); +} + +/* + *------------------------------------------------------------------------------ + * + * vmci_components_cleanup -- + * + * Cleans up VMCI components. + * + * Results: + * None. + * + * Side effects: + * None. + * + *------------------------------------------------------------------------------ + */ + +void +vmci_components_cleanup(void) +{ + + vmci_doorbell_exit(); + vmci_event_exit(); + vmci_resource_exit(); +} Index: sys/dev/vmware/vmci/vmci_event.h =================================================================== --- /dev/null +++ sys/dev/vmware/vmci/vmci_event.h @@ -0,0 +1,21 @@ +/*- + * Copyright (c) 2018 VMware, Inc. All Rights Reserved. + * + * SPDX-License-Identifier: (BSD-2-Clause AND GPL-2.0) + */ + +/* Event code for the vmci guest driver. */ + +#ifndef _VMCI_EVENT_H_ +#define _VMCI_EVENT_H_ + +#include "vmci_call_defs.h" +#include "vmci_defs.h" + +int vmci_event_init(void); +void vmci_event_exit(void); +void vmci_event_sync(void); +int vmci_event_dispatch(struct vmci_datagram *msg); +bool vmci_event_check_host_capabilities(void); + +#endif /* !_VMCI_EVENT_H_ */ Index: sys/dev/vmware/vmci/vmci_event.c =================================================================== --- /dev/null +++ sys/dev/vmware/vmci/vmci_event.c @@ -0,0 +1,693 @@ +/*- + * Copyright (c) 2018 VMware, Inc. All Rights Reserved. + * + * SPDX-License-Identifier: (BSD-2-Clause AND GPL-2.0) + */ + +/* This file implements VMCI Event code. */ + +#include "vmci.h" +#include "vmci_driver.h" +#include "vmci_event.h" +#include "vmci_kernel_api.h" +#include "vmci_kernel_defs.h" +#include "vmci_kernel_if.h" + +#define LGPFX "vmci_event: " +#define EVENT_MAGIC 0xEABE0000 + +struct vmci_subscription { + vmci_id id; + int ref_count; + bool run_delayed; + vmci_event destroy_event; + vmci_event_type event; + vmci_event_cb callback; + void *callback_data; + vmci_list_item(vmci_subscription) subscriber_list_item; +}; + +static struct vmci_subscription *vmci_event_find(vmci_id sub_id); +static int vmci_event_deliver(struct vmci_event_msg *event_msg); +static int vmci_event_register_subscription(struct vmci_subscription *sub, + vmci_event_type event, uint32_t flags, + vmci_event_cb callback, void *callback_data); +static struct vmci_subscription *vmci_event_unregister_subscription( + vmci_id sub_id); + +static vmci_list(vmci_subscription) subscriber_array[VMCI_EVENT_MAX]; +static vmci_lock subscriber_lock; + +struct vmci_delayed_event_info { + struct vmci_subscription *sub; + uint8_t event_payload[sizeof(struct vmci_event_data_max)]; +}; + +struct vmci_event_ref { + struct vmci_subscription *sub; + vmci_list_item(vmci_event_ref) list_item; +}; + +/* + *------------------------------------------------------------------------------ + * + * vmci_event_init -- + * + * General init code. + * + * Results: + * VMCI_SUCCESS on success, appropriate error code otherwise. + * + * Side effects: + * None. + * + *------------------------------------------------------------------------------ + */ + +int +vmci_event_init(void) +{ + int i; + + for (i = 0; i < VMCI_EVENT_MAX; i++) + vmci_list_init(&subscriber_array[i]); + + return (vmci_init_lock(&subscriber_lock, "VMCI Event subscriber lock")); +} + +/* + *------------------------------------------------------------------------------ + * + * vmci_event_exit -- + * + * General exit code. + * + * Results: + * None. + * + * Side effects: + * None. + * + *------------------------------------------------------------------------------ + */ + +void +vmci_event_exit(void) +{ + struct vmci_subscription *iter, *iter_2; + vmci_event_type e; + + /* We free all memory at exit. */ + for (e = 0; e < VMCI_EVENT_MAX; e++) { + vmci_list_scan_safe(iter, &subscriber_array[e], + subscriber_list_item, iter_2) { + + /* + * We should never get here because all events should + * have been unregistered before we try to unload the + * driver module. Also, delayed callbacks could still + * be firing so this cleanup would not be safe. Still + * it is better to free the memory than not ... so we + * leave this code in just in case.... + */ + ASSERT(false); + + vmci_free_kernel_mem(iter, sizeof(*iter)); + } + } + vmci_cleanup_lock(&subscriber_lock); +} + +/* + *------------------------------------------------------------------------------ + * + * vmci_event_sync -- + * + * Use this as a synchronization point when setting globals, for example, + * during device shutdown. + * + * Results: + * true. + * + * Side effects: + * None. + * + *------------------------------------------------------------------------------ + */ + +void +vmci_event_sync(void) +{ + + vmci_grab_lock_bh(&subscriber_lock); + vmci_release_lock_bh(&subscriber_lock); +} + +/* + *------------------------------------------------------------------------------ + * + * vmci_event_check_host_capabilities -- + * + * Verify that the host supports the hypercalls we need. If it does not, + * try to find fallback hypercalls and use those instead. + * + * Results: + * true if required hypercalls (or fallback hypercalls) are + * supported by the host, false otherwise. + * + * Side effects: + * None. + * + *------------------------------------------------------------------------------ + */ + +bool +vmci_event_check_host_capabilities(void) +{ + + /* vmci_event does not require any hypercalls. */ + return (true); +} + +/* + *------------------------------------------------------------------------------ + * + * vmci_event_get -- + * + * Gets a reference to the given struct vmci_subscription. + * + * Results: + * None. + * + * Side effects: + * None. + * + *------------------------------------------------------------------------------ + */ + +static void +vmci_event_get(struct vmci_subscription *entry) +{ + + ASSERT(entry); + + entry->ref_count++; +} + +/* + *------------------------------------------------------------------------------ + * + * vmci_event_release -- + * + * Releases the given struct vmci_subscription. + * + * Results: + * None. + * + * Side effects: + * Fires the destroy event if the reference count has gone to zero. + * + *------------------------------------------------------------------------------ + */ + +static void +vmci_event_release(struct vmci_subscription *entry) +{ + + ASSERT(entry); + ASSERT(entry->ref_count > 0); + + entry->ref_count--; + if (entry->ref_count == 0) + vmci_signal_event(&entry->destroy_event); +} + + /* + *------------------------------------------------------------------------------ + * + * event_release_cb -- + * + * Callback to release the event entry reference. It is called by the + * vmci_wait_on_event function before it blocks. + * + * Result: + * None. + * + * Side effects: + * None. + * + *------------------------------------------------------------------------------ + */ + +static int +event_release_cb(void *client_data) +{ + struct vmci_subscription *sub = (struct vmci_subscription *)client_data; + + ASSERT(sub); + + vmci_grab_lock_bh(&subscriber_lock); + vmci_event_release(sub); + vmci_release_lock_bh(&subscriber_lock); + + return (0); +} + +/* + *------------------------------------------------------------------------------ + * + * vmci_event_find -- + * + * Find entry. Assumes lock is held. + * + * Results: + * Entry if found, NULL if not. + * + * Side effects: + * Increments the struct vmci_subscription refcount if an entry is found. + * + *------------------------------------------------------------------------------ + */ + +static struct vmci_subscription * +vmci_event_find(vmci_id sub_id) +{ + struct vmci_subscription *iter; + vmci_event_type e; + + for (e = 0; e < VMCI_EVENT_MAX; e++) { + vmci_list_scan(iter, &subscriber_array[e], + subscriber_list_item) { + if (iter->id == sub_id) { + vmci_event_get(iter); + return (iter); + } + } + } + return (NULL); +} + +/* + *------------------------------------------------------------------------------ + * + * vmci_event_delayed_dispatch_cb -- + * + * Calls the specified callback in a delayed context. + * + * Results: + * None. + * + * Side effects: + * None. + * + *------------------------------------------------------------------------------ + */ + +static void +vmci_event_delayed_dispatch_cb(void *data) +{ + struct vmci_delayed_event_info *event_info; + struct vmci_subscription *sub; + struct vmci_event_data *ed; + + event_info = (struct vmci_delayed_event_info *)data; + + ASSERT(event_info); + ASSERT(event_info->sub); + + sub = event_info->sub; + ed = (struct vmci_event_data *)event_info->event_payload; + + sub->callback(sub->id, ed, sub->callback_data); + + vmci_grab_lock_bh(&subscriber_lock); + vmci_event_release(sub); + vmci_release_lock_bh(&subscriber_lock); + + vmci_free_kernel_mem(event_info, sizeof(*event_info)); +} + +/* + *------------------------------------------------------------------------------ + * + * vmci_event_deliver -- + * + * Actually delivers the events to the subscribers. + * + * Results: + * None. + * + * Side effects: + * The callback function for each subscriber is invoked. + * + *------------------------------------------------------------------------------ + */ + +static int +vmci_event_deliver(struct vmci_event_msg *event_msg) +{ + struct vmci_subscription *iter; + int err = VMCI_SUCCESS; + + vmci_list(vmci_event_ref) no_delay_list; + vmci_list_init(&no_delay_list); + + ASSERT(event_msg); + + vmci_grab_lock_bh(&subscriber_lock); + vmci_list_scan(iter, &subscriber_array[event_msg->event_data.event], + subscriber_list_item) { + if (iter->run_delayed) { + struct vmci_delayed_event_info *event_info; + if ((event_info = + vmci_alloc_kernel_mem(sizeof(*event_info), + VMCI_MEMORY_ATOMIC)) == NULL) { + err = VMCI_ERROR_NO_MEM; + goto out; + } + + vmci_event_get(iter); + + memset(event_info, 0, sizeof(*event_info)); + memcpy(event_info->event_payload, + VMCI_DG_PAYLOAD(event_msg), + (size_t)event_msg->hdr.payload_size); + event_info->sub = iter; + err = + vmci_schedule_delayed_work( + vmci_event_delayed_dispatch_cb, event_info); + if (err != VMCI_SUCCESS) { + vmci_event_release(iter); + vmci_free_kernel_mem( + event_info, sizeof(*event_info)); + goto out; + } + + } else { + struct vmci_event_ref *event_ref; + + /* + * We construct a local list of subscribers and release + * subscriber_lock before invoking the callbacks. This + * is similar to delayed callbacks, but callbacks are + * invoked right away here. + */ + if ((event_ref = vmci_alloc_kernel_mem( + sizeof(*event_ref), VMCI_MEMORY_ATOMIC)) == NULL) { + err = VMCI_ERROR_NO_MEM; + goto out; + } + + vmci_event_get(iter); + event_ref->sub = iter; + vmci_list_insert(&no_delay_list, event_ref, list_item); + } + } + +out: + vmci_release_lock_bh(&subscriber_lock); + + if (!vmci_list_empty(&no_delay_list)) { + struct vmci_event_data *ed; + struct vmci_event_ref *iter; + struct vmci_event_ref *iter_2; + + vmci_list_scan_safe(iter, &no_delay_list, list_item, iter_2) { + struct vmci_subscription *cur; + uint8_t event_payload[sizeof( + struct vmci_event_data_max)]; + + cur = iter->sub; + + /* + * We set event data before each callback to ensure + * isolation. + */ + memset(event_payload, 0, sizeof(event_payload)); + memcpy(event_payload, VMCI_DG_PAYLOAD(event_msg), + (size_t)event_msg->hdr.payload_size); + ed = (struct vmci_event_data *)event_payload; + cur->callback(cur->id, ed, cur->callback_data); + + vmci_grab_lock_bh(&subscriber_lock); + vmci_event_release(cur); + vmci_release_lock_bh(&subscriber_lock); + vmci_free_kernel_mem(iter, sizeof(*iter)); + } + } + + return (err); +} + +/* + *------------------------------------------------------------------------------ + * + * vmci_event_dispatch -- + * + * Dispatcher for the VMCI_EVENT_RECEIVE datagrams. Calls all + * subscribers for given event. + * + * Results: + * VMCI_SUCCESS on success, error code otherwise. + * + * Side effects: + * None. + * + *------------------------------------------------------------------------------ + */ + +int +vmci_event_dispatch(struct vmci_datagram *msg) +{ + struct vmci_event_msg *event_msg = (struct vmci_event_msg *)msg; + + ASSERT(msg && + msg->src.context == VMCI_HYPERVISOR_CONTEXT_ID && + msg->dst.resource == VMCI_EVENT_HANDLER); + + if (msg->payload_size < sizeof(vmci_event_type) || + msg->payload_size > sizeof(struct vmci_event_data_max)) + return (VMCI_ERROR_INVALID_ARGS); + + if (!VMCI_EVENT_VALID(event_msg->event_data.event)) + return (VMCI_ERROR_EVENT_UNKNOWN); + + vmci_event_deliver(event_msg); + + return (VMCI_SUCCESS); +} + +/* + *------------------------------------------------------------------------------ + * + * vmci_event_register_subscription -- + * + * Initialize and add subscription to subscriber list. + * + * Results: + * VMCI_SUCCESS on success, error code otherwise. + * + * Side effects: + * None. + * + *------------------------------------------------------------------------------ + */ + +static int +vmci_event_register_subscription(struct vmci_subscription *sub, + vmci_event_type event, uint32_t flags, vmci_event_cb callback, + void *callback_data) +{ +#define VMCI_EVENT_MAX_ATTEMPTS 10 + static vmci_id subscription_id = 0; + int result; + uint32_t attempts = 0; + bool success; + + ASSERT(sub); + + if (!VMCI_EVENT_VALID(event) || callback == NULL) { + VMCI_LOG_DEBUG(LGPFX"Failed to subscribe to event" + " (type=%d) (callback=%p) (data=%p).\n", + event, callback, callback_data); + return (VMCI_ERROR_INVALID_ARGS); + } + + if (!vmci_can_schedule_delayed_work()) { + /* + * If the platform doesn't support delayed work callbacks then + * don't allow registration for them. + */ + if (flags & VMCI_FLAG_EVENT_DELAYED_CB) + return (VMCI_ERROR_INVALID_ARGS); + sub->run_delayed = false; + } else { + /* + * The platform supports delayed work callbacks. Honor the + * requested flags + */ + sub->run_delayed = (flags & VMCI_FLAG_EVENT_DELAYED_CB) ? + true : false; + } + + sub->ref_count = 1; + sub->event = event; + sub->callback = callback; + sub->callback_data = callback_data; + + vmci_grab_lock_bh(&subscriber_lock); + + for (success = false, attempts = 0; + success == false && attempts < VMCI_EVENT_MAX_ATTEMPTS; + attempts++) { + struct vmci_subscription *existing_sub = NULL; + + /* + * We try to get an id a couple of time before claiming we are + * out of resources. + */ + sub->id = ++subscription_id; + + /* Test for duplicate id. */ + existing_sub = vmci_event_find(sub->id); + if (existing_sub == NULL) { + /* We succeeded if we didn't find a duplicate. */ + success = true; + } else + vmci_event_release(existing_sub); + } + + if (success) { + vmci_create_event(&sub->destroy_event); + vmci_list_insert(&subscriber_array[event], sub, + subscriber_list_item); + result = VMCI_SUCCESS; + } else + result = VMCI_ERROR_NO_RESOURCES; + + vmci_release_lock_bh(&subscriber_lock); + return (result); +#undef VMCI_EVENT_MAX_ATTEMPTS +} + +/* + *------------------------------------------------------------------------------ + * + * vmci_event_unregister_subscription -- + * + * Remove subscription from subscriber list. + * + * Results: + * struct vmci_subscription when found, NULL otherwise. + * + * Side effects: + * None. + * + *------------------------------------------------------------------------------ + */ + +static struct vmci_subscription * +vmci_event_unregister_subscription(vmci_id sub_id) +{ + struct vmci_subscription *s; + + vmci_grab_lock_bh(&subscriber_lock); + s = vmci_event_find(sub_id); + if (s != NULL) { + vmci_event_release(s); + vmci_list_remove(s, subscriber_list_item); + } + vmci_release_lock_bh(&subscriber_lock); + + if (s != NULL) { + vmci_wait_on_event(&s->destroy_event, event_release_cb, s); + vmci_destroy_event(&s->destroy_event); + } + + return (s); +} + +/* + *------------------------------------------------------------------------------ + * + * vmci_event_subscribe -- + * + * Subscribe to given event. The callback specified can be fired in + * different contexts depending on what flag is specified while registering. + * If flags contains VMCI_FLAG_EVENT_NONE then the callback is fired with + * the subscriber lock held (and BH context on the guest). If flags contain + * VMCI_FLAG_EVENT_DELAYED_CB then the callback is fired with no locks held + * in thread context. This is useful because other vmci_event functions can + * be called, but it also increases the chances that an event will be + * dropped. + * + * Results: + * VMCI_SUCCESS on success, error code otherwise. + * + * Side effects: + * None. + * + *------------------------------------------------------------------------------ + */ + +int +vmci_event_subscribe(vmci_event_type event, vmci_event_cb callback, + void *callback_data, vmci_id *subscription_id) +{ + int retval; + uint32_t flags = VMCI_FLAG_EVENT_NONE; + struct vmci_subscription *s = NULL; + + if (subscription_id == NULL) { + VMCI_LOG_DEBUG(LGPFX"Invalid subscription (NULL).\n"); + return (VMCI_ERROR_INVALID_ARGS); + } + + s = vmci_alloc_kernel_mem(sizeof(*s), VMCI_MEMORY_NORMAL); + if (s == NULL) + return (VMCI_ERROR_NO_MEM); + + retval = vmci_event_register_subscription(s, event, flags, + callback, callback_data); + if (retval < VMCI_SUCCESS) { + vmci_free_kernel_mem(s, sizeof(*s)); + return (retval); + } + + *subscription_id = s->id; + return (retval); +} + +/* + *------------------------------------------------------------------------------ + * + * vmci_event_unsubscribe -- + * + * Unsubscribe to given event. Removes it from list and frees it. + * Will return callback_data if requested by caller. + * + * Results: + * VMCI_SUCCESS on success, error code otherwise. + * + * Side effects: + * None. + * + *------------------------------------------------------------------------------ + */ + +int +vmci_event_unsubscribe(vmci_id sub_id) +{ + struct vmci_subscription *s; + + /* + * Return subscription. At this point we know noone else is accessing + * the subscription so we can free it. + */ + s = vmci_event_unregister_subscription(sub_id); + if (s == NULL) + return (VMCI_ERROR_NOT_FOUND); + vmci_free_kernel_mem(s, sizeof(*s)); + + return (VMCI_SUCCESS); +} Index: sys/dev/vmware/vmci/vmci_hashtable.h =================================================================== --- /dev/null +++ sys/dev/vmware/vmci/vmci_hashtable.h @@ -0,0 +1,46 @@ +/*- + * Copyright (c) 2018 VMware, Inc. All Rights Reserved. + * + * SPDX-License-Identifier: (BSD-2-Clause AND GPL-2.0) + */ + +/* Hash table for use in the APIs. */ + +#ifndef _VMCI_HASHTABLE_H_ +#define _VMCI_HASHTABLE_H_ + +#include "vmci_defs.h" +#include "vmci_kernel_if.h" + +struct vmci_hash_entry { + struct vmci_handle handle; + int ref_count; + struct vmci_hash_entry *next; +}; + +struct vmci_hashtable { + struct vmci_hash_entry **entries; + /* Number of buckets in above array. */ + int size; + vmci_lock lock; +}; + +struct vmci_hashtable *vmci_hashtable_create(int size); +void vmci_hashtable_destroy(struct vmci_hashtable *table); +void vmci_hashtable_init_entry(struct vmci_hash_entry *entry, + struct vmci_handle handle); +int vmci_hashtable_add_entry(struct vmci_hashtable *table, + struct vmci_hash_entry *entry); +int vmci_hashtable_remove_entry(struct vmci_hashtable *table, + struct vmci_hash_entry *entry); +struct vmci_hash_entry *vmci_hashtable_get_entry(struct vmci_hashtable *table, + struct vmci_handle handle); +void vmci_hashtable_hold_entry(struct vmci_hashtable *table, + struct vmci_hash_entry *entry); +int vmci_hashtable_release_entry(struct vmci_hashtable *table, + struct vmci_hash_entry *entry); +bool vmci_hashtable_entry_exists(struct vmci_hashtable *table, + struct vmci_handle handle); +void vmci_hashtable_sync(struct vmci_hashtable *table); + +#endif /* !_VMCI_HASHTABLE_H_ */ Index: sys/dev/vmware/vmci/vmci_hashtable.c =================================================================== --- /dev/null +++ sys/dev/vmware/vmci/vmci_hashtable.c @@ -0,0 +1,565 @@ +/*- + * Copyright (c) 2018 VMware, Inc. All Rights Reserved. + * + * SPDX-License-Identifier: (BSD-2-Clause AND GPL-2.0) + */ + +/* Implementation of the VMCI Hashtable. */ + +#include "vmci.h" +#include "vmci_driver.h" +#include "vmci_hashtable.h" +#include "vmci_kernel_defs.h" +#include "vmci_utils.h" + +#define LGPFX "vmci_hashtable: " + +#define VMCI_HASHTABLE_HASH(_h, _sz) \ + vmci_hash_id(VMCI_HANDLE_TO_RESOURCE_ID(_h), (_sz)) + +static int hashtable_unlink_entry(struct vmci_hashtable *table, + struct vmci_hash_entry *entry); +static bool vmci_hashtable_entry_exists_locked(struct vmci_hashtable *table, + struct vmci_handle handle); + +/* + *------------------------------------------------------------------------------ + * + * vmci_hashtable_create -- + * + * Creates a hashtable. + * + * Result: + * None. + * + * Side effects: + * None. + * + *------------------------------------------------------------------------------ + */ + +struct vmci_hashtable * +vmci_hashtable_create(int size) +{ + struct vmci_hashtable *table; + + table = vmci_alloc_kernel_mem(sizeof(*table), + VMCI_MEMORY_NORMAL); + if (table == NULL) + return (NULL); + memset(table, 0, sizeof(*table)); + + table->entries = vmci_alloc_kernel_mem(sizeof(*table->entries) * size, + VMCI_MEMORY_NORMAL); + if (table->entries == NULL) { + vmci_free_kernel_mem(table, sizeof(*table)); + return (NULL); + } + memset(table->entries, 0, sizeof(*table->entries) * size); + table->size = size; + if (vmci_init_lock(&table->lock, "VMCI Hashtable lock") < + VMCI_SUCCESS) { + vmci_free_kernel_mem(table->entries, sizeof(*table->entries) * size); + vmci_free_kernel_mem(table, sizeof(*table)); + return (NULL); + } + + return (table); +} + +/* + *------------------------------------------------------------------------------ + * + * vmci_hashtable_destroy -- + * + * This function should be called at module exit time. We rely on the + * module ref count to insure that no one is accessing any hash table + * entries at this point in time. Hence we should be able to just remove + * all entries from the hash table. + * + * Result: + * None. + * + * Side effects: + * None. + * + *------------------------------------------------------------------------------ + */ + +void +vmci_hashtable_destroy(struct vmci_hashtable *table) +{ + + ASSERT(table); + + vmci_grab_lock_bh(&table->lock); + vmci_free_kernel_mem(table->entries, sizeof(*table->entries) * + table->size); + table->entries = NULL; + vmci_release_lock_bh(&table->lock); + vmci_cleanup_lock(&table->lock); + vmci_free_kernel_mem(table, sizeof(*table)); +} + +/* + *------------------------------------------------------------------------------ + * + * vmci_hashtable_init_entry -- + * + * Initializes a hash entry. + * + * Result: + * None. + * + * Side effects: + * None. + * + *------------------------------------------------------------------------------ + */ +void +vmci_hashtable_init_entry(struct vmci_hash_entry *entry, + struct vmci_handle handle) +{ + + ASSERT(entry); + entry->handle = handle; + entry->ref_count = 0; +} + +/* + *------------------------------------------------------------------------------ + * + * vmci_hashtable_add_entry -- + * + * Adds an entry to the hashtable. + * + * Result: + * None. + * + * Side effects: + * None. + * + *------------------------------------------------------------------------------ + */ + +int +vmci_hashtable_add_entry(struct vmci_hashtable *table, + struct vmci_hash_entry *entry) +{ + int idx; + + ASSERT(entry); + ASSERT(table); + + vmci_grab_lock_bh(&table->lock); + + if (vmci_hashtable_entry_exists_locked(table, entry->handle)) { + VMCI_LOG_DEBUG(LGPFX"Entry (handle=0x%x:0x%x) already " + "exists.\n", entry->handle.context, + entry->handle.resource); + vmci_release_lock_bh(&table->lock); + return (VMCI_ERROR_DUPLICATE_ENTRY); + } + + idx = VMCI_HASHTABLE_HASH(entry->handle, table->size); + ASSERT(idx < table->size); + + /* New entry is added to top/front of hash bucket. */ + entry->ref_count++; + entry->next = table->entries[idx]; + table->entries[idx] = entry; + vmci_release_lock_bh(&table->lock); + + return (VMCI_SUCCESS); +} + +/* + *------------------------------------------------------------------------------ + * + * vmci_hashtable_remove_entry -- + * + * Removes an entry from the hashtable. + * + * Result: + * None. + * + * Side effects: + * None. + * + *------------------------------------------------------------------------------ + */ + +int +vmci_hashtable_remove_entry(struct vmci_hashtable *table, + struct vmci_hash_entry *entry) +{ + int result; + + ASSERT(table); + ASSERT(entry); + + vmci_grab_lock_bh(&table->lock); + + /* First unlink the entry. */ + result = hashtable_unlink_entry(table, entry); + if (result != VMCI_SUCCESS) { + /* We failed to find the entry. */ + goto done; + } + + /* Decrement refcount and check if this is last reference. */ + entry->ref_count--; + if (entry->ref_count == 0) { + result = VMCI_SUCCESS_ENTRY_DEAD; + goto done; + } + +done: + vmci_release_lock_bh(&table->lock); + + return (result); +} + +/* + *------------------------------------------------------------------------------ + * + * vmci_hashtable_get_entry_locked -- + * + * Looks up an entry in the hash table, that is already locked. + * + * Result: + * If the element is found, a pointer to the element is returned. + * Otherwise NULL is returned. + * + * Side effects: + * The reference count of the returned element is increased. + * + *------------------------------------------------------------------------------ + */ + +static struct vmci_hash_entry * +vmci_hashtable_get_entry_locked(struct vmci_hashtable *table, + struct vmci_handle handle) +{ + struct vmci_hash_entry *cur = NULL; + int idx; + + ASSERT(!VMCI_HANDLE_EQUAL(handle, VMCI_INVALID_HANDLE)); + ASSERT(table); + + idx = VMCI_HASHTABLE_HASH(handle, table->size); + + cur = table->entries[idx]; + while (true) { + if (cur == NULL) + break; + + if (VMCI_HANDLE_TO_RESOURCE_ID(cur->handle) == + VMCI_HANDLE_TO_RESOURCE_ID(handle)) { + if ((VMCI_HANDLE_TO_CONTEXT_ID(cur->handle) == + VMCI_HANDLE_TO_CONTEXT_ID(handle)) || + (VMCI_INVALID_ID == VMCI_HANDLE_TO_CONTEXT_ID(cur->handle))) { + cur->ref_count++; + break; + } + } + cur = cur->next; + } + + return (cur); +} + +/* + *------------------------------------------------------------------------------ + * + * vmci_hashtable_get_entry -- + * + * Gets an entry from the hashtable. + * + * Result: + * None. + * + * Side effects: + * None. + * + *------------------------------------------------------------------------------ + */ + +struct vmci_hash_entry * +vmci_hashtable_get_entry(struct vmci_hashtable *table, + struct vmci_handle handle) +{ + struct vmci_hash_entry *entry; + + if (VMCI_HANDLE_EQUAL(handle, VMCI_INVALID_HANDLE)) + return (NULL); + + ASSERT(table); + + vmci_grab_lock_bh(&table->lock); + entry = vmci_hashtable_get_entry_locked(table, handle); + vmci_release_lock_bh(&table->lock); + + return (entry); +} + +/* + *------------------------------------------------------------------------------ + * + * vmci_hashtable_hold_entry -- + * + * Hold the given entry. This will increment the entry's reference count. + * This is like a GetEntry() but without having to lookup the entry by + * handle. + * + * Result: + * None. + * + * Side effects: + * None. + * + *------------------------------------------------------------------------------ + */ + +void +vmci_hashtable_hold_entry(struct vmci_hashtable *table, + struct vmci_hash_entry *entry) +{ + + ASSERT(table); + ASSERT(entry); + + vmci_grab_lock_bh(&table->lock); + entry->ref_count++; + vmci_release_lock_bh(&table->lock); +} + +/* + *------------------------------------------------------------------------------ + * + * vmci_hashtable_release_entry_locked -- + * + * Releases an element previously obtained with + * vmci_hashtable_get_entry_locked. + * + * Result: + * If the entry is removed from the hash table, VMCI_SUCCESS_ENTRY_DEAD + * is returned. Otherwise, VMCI_SUCCESS is returned. + * + * Side effects: + * The reference count of the entry is decreased and the entry is removed + * from the hash table on 0. + * + *------------------------------------------------------------------------------ + */ + +static int +vmci_hashtable_release_entry_locked(struct vmci_hashtable *table, + struct vmci_hash_entry *entry) +{ + int result = VMCI_SUCCESS; + + ASSERT(table); + ASSERT(entry); + + entry->ref_count--; + /* Check if this is last reference and report if so. */ + if (entry->ref_count == 0) { + + /* + * Remove entry from hash table if not already removed. This + * could have happened already because VMCIHashTable_RemoveEntry + * was called to unlink it. We ignore if it is not found. + * Datagram handles will often have RemoveEntry called, whereas + * SharedMemory regions rely on ReleaseEntry to unlink the entry + * , since the creator does not call RemoveEntry when it + * detaches. + */ + + hashtable_unlink_entry(table, entry); + result = VMCI_SUCCESS_ENTRY_DEAD; + } + + return (result); +} + +/* + *------------------------------------------------------------------------------ + * + * vmci_hashtable_release_entry -- + * + * Releases an entry from the hashtable. + * + * Result: + * None. + * + * Side effects: + * None. + * + *------------------------------------------------------------------------------ + */ + +int +vmci_hashtable_release_entry(struct vmci_hashtable *table, + struct vmci_hash_entry *entry) +{ + int result; + + ASSERT(table); + vmci_grab_lock_bh(&table->lock); + result = vmci_hashtable_release_entry_locked(table, entry); + vmci_release_lock_bh(&table->lock); + + return (result); +} + +/* + *------------------------------------------------------------------------------ + * + * vmci_hashtable_entry_exists -- + * + * Returns whether an entry exists in the hashtable + * + * Result: + * true if handle already in hashtable. false otherwise. + * + * Side effects: + * None. + * + *------------------------------------------------------------------------------ + */ + +bool +vmci_hashtable_entry_exists(struct vmci_hashtable *table, + struct vmci_handle handle) +{ + bool exists; + + ASSERT(table); + + vmci_grab_lock_bh(&table->lock); + exists = vmci_hashtable_entry_exists_locked(table, handle); + vmci_release_lock_bh(&table->lock); + + return (exists); +} + +/* + *------------------------------------------------------------------------------ + * + * vmci_hashtable_entry_exists_locked -- + * + * Unlocked version of vmci_hashtable_entry_exists. + * + * Result: + * true if handle already in hashtable. false otherwise. + * + * Side effects: + * None. + * + *------------------------------------------------------------------------------ + */ + +static bool +vmci_hashtable_entry_exists_locked(struct vmci_hashtable *table, + struct vmci_handle handle) + +{ + struct vmci_hash_entry *entry; + int idx; + + ASSERT(table); + + idx = VMCI_HASHTABLE_HASH(handle, table->size); + + entry = table->entries[idx]; + while (entry) { + if (VMCI_HANDLE_TO_RESOURCE_ID(entry->handle) == + VMCI_HANDLE_TO_RESOURCE_ID(handle)) + if ((VMCI_HANDLE_TO_CONTEXT_ID(entry->handle) == + VMCI_HANDLE_TO_CONTEXT_ID(handle)) || + (VMCI_INVALID_ID == VMCI_HANDLE_TO_CONTEXT_ID(handle)) || + (VMCI_INVALID_ID == VMCI_HANDLE_TO_CONTEXT_ID(entry->handle))) + return (true); + entry = entry->next; + } + + return (false); +} + +/* + *------------------------------------------------------------------------------ + * + * hashtable_unlink_entry -- + * + * Assumes caller holds table lock. + * + * Result: + * None. + * + * Side effects: + * None. + * + *------------------------------------------------------------------------------ + */ + +static int +hashtable_unlink_entry(struct vmci_hashtable *table, + struct vmci_hash_entry *entry) +{ + int result; + struct vmci_hash_entry *prev, *cur; + int idx; + + idx = VMCI_HASHTABLE_HASH(entry->handle, table->size); + + prev = NULL; + cur = table->entries[idx]; + while (true) { + if (cur == NULL) { + result = VMCI_ERROR_NOT_FOUND; + break; + } + if (VMCI_HANDLE_EQUAL(cur->handle, entry->handle)) { + ASSERT(cur == entry); + + /* Remove entry and break. */ + if (prev) + prev->next = cur->next; + else + table->entries[idx] = cur->next; + cur->next = NULL; + result = VMCI_SUCCESS; + break; + } + prev = cur; + cur = cur->next; + } + return (result); +} + +/* + *------------------------------------------------------------------------------ + * + * vmci_hashtable_sync -- + * + * Use this as a synchronization point when setting globals, for example, + * during device shutdown. + * + * Results: + * None. + * + * Side effects: + * None. + * + *------------------------------------------------------------------------------ + */ + +void +vmci_hashtable_sync(struct vmci_hashtable *table) +{ + + ASSERT(table); + vmci_grab_lock_bh(&table->lock); + vmci_release_lock_bh(&table->lock); +} Index: sys/dev/vmware/vmci/vmci_kernel_api.h =================================================================== --- /dev/null +++ sys/dev/vmware/vmci/vmci_kernel_api.h @@ -0,0 +1,16 @@ +/*- + * Copyright (c) 2018 VMware, Inc. All Rights Reserved. + * + * SPDX-License-Identifier: (BSD-2-Clause AND GPL-2.0) + */ + +/* Kernel API (current) exported from the VMCI guest driver. */ + +#ifndef _VMCI_KERNEL_API_H_ +#define _VMCI_KERNEL_API_H_ + +/* With this file you always get the latest version. */ +#include "vmci_kernel_api_1.h" +#include "vmci_kernel_api_2.h" + +#endif /* !_VMCI_KERNEL_API_H_ */ Index: sys/dev/vmware/vmci/vmci_kernel_api_1.h =================================================================== --- /dev/null +++ sys/dev/vmware/vmci/vmci_kernel_api_1.h @@ -0,0 +1,69 @@ +/*- + * Copyright (c) 2018 VMware, Inc. All Rights Reserved. + * + * SPDX-License-Identifier: (BSD-2-Clause AND GPL-2.0) + */ + +/* Kernel API (v1) exported from the VMCI guest driver. */ + +#ifndef _VMCI_KERNEL_API_1_H_ +#define _VMCI_KERNEL_API_1_H_ + +#include "vmci_call_defs.h" +#include "vmci_defs.h" + +/* Define version 1. */ +#undef VMCI_KERNEL_API_VERSION +#define VMCI_KERNEL_API_VERSION_1 1 +#define VMCI_KERNEL_API_VERSION VMCI_KERNEL_API_VERSION_1 + +/* VMCI Datagram API. */ +int vmci_datagram_create_handle(uint32_t resource_id, uint32_t flags, + vmci_datagram_recv_cb recv_cb, void *client_data, + struct vmci_handle *out_handle); +int vmci_datagram_create_handle_priv(uint32_t resource_id, uint32_t flags, + vmci_privilege_flags priv_flags, vmci_datagram_recv_cb recv_cb, + void *client_data, struct vmci_handle *out_handle); +int vmci_datagram_destroy_handle(struct vmci_handle handle); +int vmci_datagram_send(struct vmci_datagram *msg); + +/* VMCI Utility API. */ +vmci_id vmci_get_context_id(void); + +/* VMCI Event API. */ +typedef void (*vmci_event_cb)(vmci_id sub_id, struct vmci_event_data *ed, + void *client_data); + +int vmci_event_subscribe(vmci_event_type event, vmci_event_cb callback, + void *callback_data, vmci_id *sub_id); +int vmci_event_unsubscribe(vmci_id sub_id); + +/* VMCI Queue Pair API. */ +struct vmci_qpair; + +int vmci_qpair_alloc(struct vmci_qpair **qpair, struct vmci_handle *handle, + uint64_t produce_q_size, uint64_t consume_q_size, vmci_id peer, + uint32_t flags, vmci_privilege_flags priv_flags); +int vmci_qpair_detach(struct vmci_qpair **qpair); +int vmci_qpair_get_produce_indexes(const struct vmci_qpair *qpair, + uint64_t *producer_tail, uint64_t *consumer_head); +int vmci_qpair_get_consume_indexes(const struct vmci_qpair *qpair, + uint64_t *consumer_tail, uint64_t *producer_head); +int64_t vmci_qpair_produce_free_space(const struct vmci_qpair *qpair); +int64_t vmci_qpair_produce_buf_ready(const struct vmci_qpair *qpair); +int64_t vmci_qpair_consume_free_space(const struct vmci_qpair *qpair); +int64_t vmci_qpair_consume_buf_ready(const struct vmci_qpair *qpair); +ssize_t vmci_qpair_enqueue(struct vmci_qpair *qpair, const void *buf, + size_t buf_size, int mode); +ssize_t vmci_qpair_dequeue(struct vmci_qpair *qpair, void *buf, + size_t buf_size, int mode); +ssize_t vmci_qpair_peek(struct vmci_qpair *qpair, void *buf, + size_t buf_size, int mode); +ssize_t vmci_qpair_enquev(struct vmci_qpair *qpair, void *iov, size_t iov_size, + int mode); +ssize_t vmci_qpair_dequev(struct vmci_qpair *qpair, void *iov, size_t iov_size, + int mode); +ssize_t vmci_qpair_peekv(struct vmci_qpair *qpair, void *iov, size_t iov_size, + int mode); + +#endif /* !_VMCI_KERNEL_API_1_H_ */ Index: sys/dev/vmware/vmci/vmci_kernel_api_2.h =================================================================== --- /dev/null +++ sys/dev/vmware/vmci/vmci_kernel_api_2.h @@ -0,0 +1,32 @@ +/*- + * Copyright (c) 2018 VMware, Inc. All Rights Reserved. + * + * SPDX-License-Identifier: (BSD-2-Clause AND GPL-2.0) + */ + +/* Kernel API (v2) exported from the VMCI guest driver. */ + +#ifndef _VMCI_KERNEL_API_2_H_ +#define _VMCI_KERNEL_API_2_H_ + +#include "vmci_kernel_api_1.h" + +/* Define version 2. */ + +#undef VMCI_KERNEL_API_VERSION +#define VMCI_KERNEL_API_VERSION_2 2 +#define VMCI_KERNEL_API_VERSION VMCI_KERNEL_API_VERSION_2 + +/* VMCI Doorbell API. */ +#define VMCI_FLAG_DELAYED_CB 0x01 + +typedef void (*vmci_callback)(void *client_data); + +int vmci_doorbell_create(struct vmci_handle *handle, uint32_t flags, + vmci_privilege_flags priv_flags, vmci_callback notify_cb, + void *client_data); +int vmci_doorbell_destroy(struct vmci_handle handle); +int vmci_doorbell_notify(struct vmci_handle handle, + vmci_privilege_flags priv_flags); + +#endif /* !_VMCI_KERNEL_API_2_H_ */ Index: sys/dev/vmware/vmci/vmci_kernel_defs.h =================================================================== --- /dev/null +++ sys/dev/vmware/vmci/vmci_kernel_defs.h @@ -0,0 +1,30 @@ +/*- + * Copyright (c) 2018 VMware, Inc. All Rights Reserved. + * + * SPDX-License-Identifier: (BSD-2-Clause AND GPL-2.0) + */ + +/* Some common utilities used by the VMCI kernel module. */ + +#ifndef _VMCI_KERNEL_DEFS_H_ +#define _VMCI_KERNEL_DEFS_H_ + +#include +#include + +typedef uint32_t PPN; + +#define ASSERT(cond) KASSERT(cond, ("")) +#define ASSERT_ON_COMPILE(e) _Static_assert(e, #e); + +#define LIKELY(_exp) __builtin_expect(!!(_exp), 1) +#define UNLIKELY(_exp) __builtin_expect((_exp), 0) + +#define CONST64U(c) c##uL + +#define ARRAYSIZE(a) (sizeof(a) / sizeof(*(a))) + +#define ROUNDUP(x, y) (((x) + (y) - 1) / (y) * (y)) +#define CEILING(x, y) (((x) + (y) - 1) / (y)) + +#endif /* !_VMCI_KERNEL_DEFS_H_ */ Index: sys/dev/vmware/vmci/vmci_kernel_if.h =================================================================== --- /dev/null +++ sys/dev/vmware/vmci/vmci_kernel_if.h @@ -0,0 +1,92 @@ +/*- + * Copyright (c) 2018 VMware, Inc. All Rights Reserved. + * + * SPDX-License-Identifier: (BSD-2-Clause AND GPL-2.0) + */ + +/* This file defines helper functions */ + +#ifndef _VMCI_KERNEL_IF_H_ +#define _VMCI_KERNEL_IF_H_ + +#include +#include +#include +#include +#include + +#include "vmci_defs.h" + +#define VMCI_MEMORY_NORMAL 0x0 +#define VMCI_MEMORY_ATOMIC 0x1 + +#define vmci_list(_l) LIST_HEAD(, _l) +#define vmci_list_item(_l) LIST_ENTRY(_l) +#define vmci_list_init(_l) LIST_INIT(_l) +#define vmci_list_empty(_l) LIST_EMPTY(_l) +#define vmci_list_first(_l) LIST_FIRST(_l) +#define vmci_list_next(e, f) LIST_NEXT(e, f) +#define vmci_list_insert(_l, _e, n) LIST_INSERT_HEAD(_l, _e, n) +#define vmci_list_remove(_e, n) LIST_REMOVE(_e, n) +#define vmci_list_scan(v, _l, n) LIST_FOREACH(v, _l, n) +#define vmci_list_scan_safe(_e, _l, n, t) \ + LIST_FOREACH_SAFE(_e, _l, n, t) +#define vmci_list_swap(_l1, _l2, t, f) LIST_SWAP(_l1, _l2, t, f) + +typedef unsigned short int vmci_io_port; +typedef int vmci_io_handle; + +void vmci_read_port_bytes(vmci_io_handle handle, vmci_io_port port, + uint8_t *buffer, size_t buffer_length); + +typedef struct mtx vmci_lock; +int vmci_init_lock(vmci_lock *lock, char *name); +void vmci_cleanup_lock(vmci_lock *lock); +void vmci_grab_lock(vmci_lock *lock); +void vmci_release_lock(vmci_lock *lock); +void vmci_grab_lock_bh(vmci_lock *lock); +void vmci_release_lock_bh(vmci_lock *lock); + +void *vmci_alloc_kernel_mem(size_t size, int flags); +void vmci_free_kernel_mem(void *ptr, size_t size); + +typedef struct sema vmci_event; +typedef int (*vmci_event_release_cb)(void *client_data); +void vmci_create_event(vmci_event *event); +void vmci_destroy_event(vmci_event *event); +void vmci_signal_event(vmci_event *event); +void vmci_wait_on_event(vmci_event *event, vmci_event_release_cb release_cb, + void *client_data); +bool vmci_wait_on_event_interruptible(vmci_event *event, + vmci_event_release_cb release_cb, void *client_data); + +typedef void (vmci_work_fn)(void *data); +bool vmci_can_schedule_delayed_work(void); +int vmci_schedule_delayed_work(vmci_work_fn *work_fn, void *data); +void vmci_delayed_work_cb(void *context, int data); + +typedef struct mtx vmci_mutex; +int vmci_mutex_init(vmci_mutex *mutex, char *name); +void vmci_mutex_destroy(vmci_mutex *mutex); +void vmci_mutex_acquire(vmci_mutex *mutex); +void vmci_mutex_release(vmci_mutex *mutex); + +void *vmci_alloc_queue(uint64_t size, uint32_t flags); +void vmci_free_queue(void *q, uint64_t size); + +typedef PPN *vmci_ppn_list; +struct ppn_set { + uint64_t num_produce_pages; + uint64_t num_consume_pages; + vmci_ppn_list produce_ppns; + vmci_ppn_list consume_ppns; + bool initialized; +}; + +int vmci_alloc_ppn_set(void *produce_q, uint64_t num_produce_pages, + void *consume_q, uint64_t num_consume_pages, + struct ppn_set *ppn_set); +void vmci_free_ppn_set(struct ppn_set *ppn_set); +int vmci_populate_ppn_list(uint8_t *call_buf, const struct ppn_set *ppnset); + +#endif /* !_VMCI_KERNEL_IF_H_ */ Index: sys/dev/vmware/vmci/vmci_kernel_if.c =================================================================== --- /dev/null +++ sys/dev/vmware/vmci/vmci_kernel_if.c @@ -0,0 +1,1066 @@ +/*- + * Copyright (c) 2018 VMware, Inc. All Rights Reserved. + * + * SPDX-License-Identifier: (BSD-2-Clause AND GPL-2.0) + */ + +/* This file implements defines and helper functions. */ + +#include +#include +#include + +#include + +#include "vmci.h" +#include "vmci_defs.h" +#include "vmci_kernel_defs.h" +#include "vmci_kernel_if.h" +#include "vmci_queue.h" + +struct vmci_queue_kernel_if { + size_t num_pages; /* Num pages incl. header. */ + struct vmci_dma_alloc *dmas; /* For dma alloc. */ +}; + +/* + *------------------------------------------------------------------------------ + * + * vmci_init_lock + * + * Initializes the lock. Must be called before use. + * + * Results: + * Always VMCI_SUCCESS. + * + * Side effects: + * Thread can block. + * + *------------------------------------------------------------------------------ + */ + +int +vmci_init_lock(vmci_lock *lock, char *name) +{ + + mtx_init(lock, name, NULL, MTX_DEF | MTX_NOWITNESS); + return (VMCI_SUCCESS); +} + +/* + *------------------------------------------------------------------------------ + * + * vmci_cleanup_lock + * + * Cleanup the lock. Must be called before deallocating lock. + * + * Results: + * None + * + * Side effects: + * Deletes kernel lock state + * + *------------------------------------------------------------------------------ + */ + +void +vmci_cleanup_lock(vmci_lock *lock) +{ + + mtx_destroy(lock); +} + +/* + *------------------------------------------------------------------------------ + * + * vmci_grab_lock + * + * Grabs the given lock. + * + * Results: + * None + * + * Side effects: + * Thread can block. + * + *------------------------------------------------------------------------------ + */ + +void +vmci_grab_lock(vmci_lock *lock) +{ + + mtx_lock(lock); +} + +/* + *------------------------------------------------------------------------------ + * + * vmci_release_lock + * + * Releases the given lock. + * + * Results: + * None + * + * Side effects: + * A thread blocked on this lock may wake up. + * + *------------------------------------------------------------------------------ + */ + +void +vmci_release_lock(vmci_lock *lock) +{ + + mtx_unlock(lock); +} + +/* + *------------------------------------------------------------------------------ + * + * vmci_grab_lock_bh + * + * Grabs the given lock. + * + * Results: + * None + * + * Side effects: + * None. + * + *------------------------------------------------------------------------------ + */ + +void +vmci_grab_lock_bh(vmci_lock *lock) +{ + + mtx_lock(lock); +} + +/* + *------------------------------------------------------------------------------ + * + * vmci_release_lock_bh + * + * Releases the given lock. + * + * Results: + * None + * + * Side effects: + * None. + * + *------------------------------------------------------------------------------ + */ + +void +vmci_release_lock_bh(vmci_lock *lock) +{ + + mtx_unlock(lock); +} + +/* + *------------------------------------------------------------------------------ + * + * vmci_alloc_kernel_mem + * + * Allocate physically contiguous memory for the VMCI driver. + * + * Results: + * The address allocated or NULL on error. + * + * + * Side effects: + * Memory may be allocated. + * + *------------------------------------------------------------------------------ + */ + +void * +vmci_alloc_kernel_mem(size_t size, int flags) +{ + void *ptr; + + if ((flags & VMCI_MEMORY_ATOMIC) != 0) + ptr = contigmalloc(size, M_DEVBUF, M_NOWAIT, 0, 0xFFFFFFFF, + 8, 1024 * 1024); + else + ptr = contigmalloc(size, M_DEVBUF, M_WAITOK, 0, 0xFFFFFFFF, + 8, 1024 * 1024); + + return (ptr); +} + +/* + *------------------------------------------------------------------------------ + * + * vmci_free_kernel_mem + * + * Free kernel memory allocated for the VMCI driver. + * + * Results: + * None. + * + * Side effects: + * Memory is freed. + * + *------------------------------------------------------------------------------ + */ + +void +vmci_free_kernel_mem(void *ptr, size_t size) +{ + + contigfree(ptr, size, M_DEVBUF); +} + +/* + *------------------------------------------------------------------------------ + * + * vmci_can_schedule_delayed_work -- + * + * Checks to see if the given platform supports delayed work callbacks. + * + * Results: + * true if it does. false otherwise. + * + * Side effects: + * None. + * + *------------------------------------------------------------------------------ + */ + +bool +vmci_can_schedule_delayed_work(void) +{ + + return (true); +} + +/* + *------------------------------------------------------------------------------ + * + * vmci_schedule_delayed_work -- + * + * Schedule the specified callback. + * + * Results: + * Zero on success, error code otherwise. + * + * Side effects: + * None. + * + *------------------------------------------------------------------------------ + */ + +int +vmci_schedule_delayed_work(vmci_work_fn *work_fn, void *data) +{ + + return (vmci_schedule_delayed_work_fn(work_fn, data)); +} + +/* + *------------------------------------------------------------------------------ + * + * vmci_create_event -- + * + * Results: + * None. + * + * Side effects: + * None. + * + *------------------------------------------------------------------------------ + */ + +void +vmci_create_event(vmci_event *event) +{ + + sema_init(event, 0, "vmci_event"); +} + +/* + *------------------------------------------------------------------------------ + * + * vmci_destroy_event -- + * + * Results: + * None. + * + * Side effects: + * None. + * + *------------------------------------------------------------------------------ + */ + +void +vmci_destroy_event(vmci_event *event) +{ + + if (mtx_owned(&event->sema_mtx)) + sema_destroy(event); +} + +/* + *------------------------------------------------------------------------------ + * + * vmci_signal_event -- + * + * Results: + * None. + * + * Side effects: + * None. + * + *------------------------------------------------------------------------------ + */ + +void +vmci_signal_event(vmci_event *event) +{ + + sema_post(event); +} + +/* + *------------------------------------------------------------------------------ + * + * vmci_wait_on_event -- + * + * Results: + * None. + * + * Side effects: + * None. + * + *------------------------------------------------------------------------------ + */ + +void +vmci_wait_on_event(vmci_event *event, vmci_event_release_cb release_cb, + void *client_data) +{ + + release_cb(client_data); + sema_wait(event); +} + +/* + *------------------------------------------------------------------------------ + * + * vmci_mutex_init -- + * + * Initializes the mutex. Must be called before use. + * + * Results: + * Success. + * + * Side effects: + * None. + * + *------------------------------------------------------------------------------ + */ + +int +vmci_mutex_init(vmci_mutex *mutex, char *name) +{ + + mtx_init(mutex, name, NULL, MTX_DEF | MTX_NOWITNESS); + return (VMCI_SUCCESS); +} + +/* + *------------------------------------------------------------------------------ + * + * vmci_mutex_destroy -- + * + * Destroys the mutex. + * + * Results: + * None. + * + * Side effects: + * None. + * + *------------------------------------------------------------------------------ + */ + +void +vmci_mutex_destroy(vmci_mutex *mutex) +{ + + mtx_destroy(mutex); +} + +/* + *------------------------------------------------------------------------------ + * + * vmci_mutex_acquire -- + * + * Acquires the mutex. + * + * Results: + * None. + * + * Side effects: + * Thread may block. + * + *------------------------------------------------------------------------------ + */ + +void +vmci_mutex_acquire(vmci_mutex *mutex) +{ + + mtx_lock(mutex); +} + +/* + *------------------------------------------------------------------------------ + * + * vmci_mutex_release -- + * + * Releases the mutex. + * + * Results: + * None. + * + * Side effects: + * May wake up the thread blocking on this mutex. + * + *------------------------------------------------------------------------------ + */ + +void +vmci_mutex_release(vmci_mutex *mutex) +{ + + mtx_unlock(mutex); +} + +/* + *------------------------------------------------------------------------------ + * + * vmci_alloc_queue -- + * + * Allocates kernel queue pages of specified size with IOMMU mappings, plus + * space for the queue structure/kernel interface and the queue header. + * + * Results: + * Pointer to the queue on success, NULL otherwise. + * + * Side effects: + * Memory is allocated. + * + *------------------------------------------------------------------------------ + */ + +void * +vmci_alloc_queue(uint64_t size, uint32_t flags) +{ + struct vmci_queue *queue; + size_t i; + const size_t num_pages = CEILING(size, PAGE_SIZE) + 1; + const size_t dmas_size = num_pages * sizeof(struct vmci_dma_alloc); + const size_t queue_size = + sizeof(*queue) + sizeof(*(queue->kernel_if)) + dmas_size; + + /* Size should be enforced by vmci_qpair_alloc(), double-check here. */ + if (size > VMCI_MAX_GUEST_QP_MEMORY) { + ASSERT(false); + return (NULL); + } + + queue = malloc(queue_size, M_DEVBUF, M_NOWAIT); + if (!queue) + return (NULL); + + queue->q_header = NULL; + queue->saved_header = NULL; + queue->kernel_if = (struct vmci_queue_kernel_if *)(queue + 1); + queue->kernel_if->num_pages = num_pages; + queue->kernel_if->dmas = (struct vmci_dma_alloc *)(queue->kernel_if + + 1); + for (i = 0; i < num_pages; i++) { + vmci_dma_malloc(PAGE_SIZE, 1, &queue->kernel_if->dmas[i]); + if (!queue->kernel_if->dmas[i].dma_vaddr) { + /* Size excl. the header. */ + vmci_free_queue(queue, i * PAGE_SIZE); + return (NULL); + } + } + + /* Queue header is the first page. */ + queue->q_header = (void *)queue->kernel_if->dmas[0].dma_vaddr; + + return ((void *)queue); +} + +/* + *------------------------------------------------------------------------------ + * + * vmci_free_queue -- + * + * Frees kernel VA space for a given queue and its queue header, and frees + * physical data pages. + * + * Results: + * None. + * + * Side effects: + * Memory is freed. + * + *------------------------------------------------------------------------------ + */ + +void +vmci_free_queue(void *q, uint64_t size) +{ + struct vmci_queue *queue = q; + + if (queue) { + const size_t num_pages = CEILING(size, PAGE_SIZE) + 1; + uint64_t i; + + /* Given size doesn't include header, so add in a page here. */ + for (i = 0; i < num_pages; i++) + vmci_dma_free(&queue->kernel_if->dmas[i]); + free(queue, M_DEVBUF); + } +} + +/* + *------------------------------------------------------------------------------ + * + * vmci_alloc_ppn_set -- + * + * Allocates two list of PPNs --- one for the pages in the produce queue, + * and the other for the pages in the consume queue. Intializes the list of + * PPNs with the page frame numbers of the KVA for the two queues (and the + * queue headers). + * + * Results: + * Success or failure. + * + * Side effects: + * Memory may be allocated. + * + *----------------------------------------------------------------------------- + */ + +int +vmci_alloc_ppn_set(void *prod_q, uint64_t num_produce_pages, void *cons_q, + uint64_t num_consume_pages, struct ppn_set *ppn_set) +{ + struct vmci_queue *consume_q = cons_q; + struct vmci_queue *produce_q = prod_q; + vmci_ppn_list consume_ppns; + vmci_ppn_list produce_ppns; + uint64_t i; + + if (!produce_q || !num_produce_pages || !consume_q || + !num_consume_pages || !ppn_set) + return (VMCI_ERROR_INVALID_ARGS); + + if (ppn_set->initialized) + return (VMCI_ERROR_ALREADY_EXISTS); + + produce_ppns = + vmci_alloc_kernel_mem(num_produce_pages * sizeof(*produce_ppns), + VMCI_MEMORY_NORMAL); + if (!produce_ppns) + return (VMCI_ERROR_NO_MEM); + + consume_ppns = + vmci_alloc_kernel_mem(num_consume_pages * sizeof(*consume_ppns), + VMCI_MEMORY_NORMAL); + if (!consume_ppns) { + vmci_free_kernel_mem(produce_ppns, + num_produce_pages * sizeof(*produce_ppns)); + return (VMCI_ERROR_NO_MEM); + } + + for (i = 0; i < num_produce_pages; i++) { + unsigned long pfn; + + produce_ppns[i] = + pfn = produce_q->kernel_if->dmas[i].dma_paddr >> PAGE_SHIFT; + + /* + * Fail allocation if PFN isn't supported by hypervisor. + */ + + if (sizeof(pfn) > + sizeof(*produce_ppns) && pfn != produce_ppns[i]) + goto ppn_error; + } + for (i = 0; i < num_consume_pages; i++) { + unsigned long pfn; + + consume_ppns[i] = + pfn = consume_q->kernel_if->dmas[i].dma_paddr >> PAGE_SHIFT; + + /* + * Fail allocation if PFN isn't supported by hypervisor. + */ + + if (sizeof(pfn) > + sizeof(*consume_ppns) && pfn != consume_ppns[i]) + goto ppn_error; + + } + + ppn_set->num_produce_pages = num_produce_pages; + ppn_set->num_consume_pages = num_consume_pages; + ppn_set->produce_ppns = produce_ppns; + ppn_set->consume_ppns = consume_ppns; + ppn_set->initialized = true; + return (VMCI_SUCCESS); + +ppn_error: + vmci_free_kernel_mem(produce_ppns, num_produce_pages * + sizeof(*produce_ppns)); + vmci_free_kernel_mem(consume_ppns, num_consume_pages * + sizeof(*consume_ppns)); + return (VMCI_ERROR_INVALID_ARGS); +} + +/* + *------------------------------------------------------------------------------ + * + * vmci_free_ppn_set -- + * + * Frees the two list of PPNs for a queue pair. + * + * Results: + * None. + * + * Side effects: + * None. + * + *------------------------------------------------------------------------------ + */ + +void +vmci_free_ppn_set(struct ppn_set *ppn_set) +{ + + ASSERT(ppn_set); + if (ppn_set->initialized) { + /* Do not call these functions on NULL inputs. */ + ASSERT(ppn_set->produce_ppns && ppn_set->consume_ppns); + vmci_free_kernel_mem(ppn_set->produce_ppns, + ppn_set->num_produce_pages * + sizeof(*ppn_set->produce_ppns)); + vmci_free_kernel_mem(ppn_set->consume_ppns, + ppn_set->num_consume_pages * + sizeof(*ppn_set->consume_ppns)); + } + memset(ppn_set, 0, sizeof(*ppn_set)); +} + +/* + *------------------------------------------------------------------------------ + * + * vmci_populate_ppn_list -- + * + * Populates the list of PPNs in the hypercall structure with the PPNS + * of the produce queue and the consume queue. + * + * Results: + * VMCI_SUCCESS. + * + * Side effects: + * None. + * + *------------------------------------------------------------------------------ + */ + +int +vmci_populate_ppn_list(uint8_t *call_buf, const struct ppn_set *ppn_set) +{ + + ASSERT(call_buf && ppn_set && ppn_set->initialized); + memcpy(call_buf, ppn_set->produce_ppns, + ppn_set->num_produce_pages * sizeof(*ppn_set->produce_ppns)); + memcpy(call_buf + ppn_set->num_produce_pages * + sizeof(*ppn_set->produce_ppns), ppn_set->consume_ppns, + ppn_set->num_consume_pages * sizeof(*ppn_set->consume_ppns)); + + return (VMCI_SUCCESS); +} + +/* + *------------------------------------------------------------------------------ + * + * vmci_memcpy_{to,from}iovec -- + * + * These helper routines will copy the specified bytes to/from memory that's + * specified as a struct iovec. The routines can not verify the correctness + * of the struct iovec's contents. + * + * Results: + * None. + * + * Side effects: + * None. + * + *------------------------------------------------------------------------------ + */ + +static inline void +vmci_memcpy_toiovec(struct iovec *iov, uint8_t *src, size_t len) +{ + + while (len > 0) { + if (iov->iov_len) { + size_t to_copy = MIN(iov->iov_len, len); + memcpy(iov->iov_base, src, to_copy); + src += to_copy; + len -= to_copy; + iov->iov_base = (void *)((uintptr_t) iov->iov_base + + to_copy); + iov->iov_len -= to_copy; + } + iov++; + } +} + +static inline void +vmci_memcpy_fromiovec(uint8_t *dst, struct iovec *iov, size_t len) +{ + + while (len > 0) { + if (iov->iov_len) { + size_t to_copy = MIN(iov->iov_len, len); + memcpy(dst, iov->iov_base, to_copy); + dst += to_copy; + len -= to_copy; + iov->iov_base = (void *)((uintptr_t) iov->iov_base + + to_copy); + iov->iov_len -= to_copy; + } + iov++; + } +} + +/* + *------------------------------------------------------------------------------ + * + * __vmci_memcpy_to_queue -- + * + * Copies from a given buffer or iovector to a VMCI Queue. Assumes that + * offset + size does not wrap around in the queue. + * + * Results: + * Zero on success, negative error code on failure. + * + * Side effects: + * None. + * + *------------------------------------------------------------------------------ + */ + +#pragma GCC diagnostic ignored "-Wcast-qual" +static int +__vmci_memcpy_to_queue(struct vmci_queue *queue, uint64_t queue_offset, + const void *src, size_t size, bool is_iovec) +{ + struct vmci_queue_kernel_if *kernel_if = queue->kernel_if; + size_t bytes_copied = 0; + + while (bytes_copied < size) { + const uint64_t page_index = + (queue_offset + bytes_copied) / PAGE_SIZE; + const size_t page_offset = + (queue_offset + bytes_copied) & (PAGE_SIZE - 1); + void *va; + size_t to_copy; + + /* Skip header. */ + va = (void *)kernel_if->dmas[page_index + 1].dma_vaddr; + + ASSERT(va); + /* + * Fill up the page if we have enough payload, or else + * copy the remaining bytes. + */ + to_copy = MIN(PAGE_SIZE - page_offset, size - bytes_copied); + + if (is_iovec) { + struct iovec *iov = (struct iovec *)src; + + /* The iovec will track bytes_copied internally. */ + vmci_memcpy_fromiovec((uint8_t *)va + page_offset, + iov, to_copy); + } else + memcpy((uint8_t *)va + page_offset, + (uint8_t *)src + bytes_copied, to_copy); + bytes_copied += to_copy; + } + + return (VMCI_SUCCESS); +} + +/* + *------------------------------------------------------------------------------ + * + * __vmci_memcpy_from_queue -- + * + * Copies to a given buffer or iovector from a VMCI Queue. Assumes that + * offset + size does not wrap around in the queue. + * + * Results: + * Zero on success, negative error code on failure. + * + * Side effects: + * None. + * + *------------------------------------------------------------------------------ + */ + +static int +__vmci_memcpy_from_queue(void *dest, const struct vmci_queue *queue, + uint64_t queue_offset, size_t size, bool is_iovec) +{ + struct vmci_queue_kernel_if *kernel_if = queue->kernel_if; + size_t bytes_copied = 0; + + while (bytes_copied < size) { + const uint64_t page_index = + (queue_offset + bytes_copied) / PAGE_SIZE; + const size_t page_offset = + (queue_offset + bytes_copied) & (PAGE_SIZE - 1); + void *va; + size_t to_copy; + + /* Skip header. */ + va = (void *)kernel_if->dmas[page_index + 1].dma_vaddr; + + ASSERT(va); + /* + * Fill up the page if we have enough payload, or else + * copy the remaining bytes. + */ + to_copy = MIN(PAGE_SIZE - page_offset, size - bytes_copied); + + if (is_iovec) { + struct iovec *iov = (struct iovec *)dest; + + /* The iovec will track bytesCopied internally. */ + vmci_memcpy_toiovec(iov, (uint8_t *)va + + page_offset, to_copy); + } else + memcpy((uint8_t *)dest + bytes_copied, + (uint8_t *)va + page_offset, to_copy); + + bytes_copied += to_copy; + } + + return (VMCI_SUCCESS); +} + +/* + *------------------------------------------------------------------------------ + * + * vmci_memcpy_to_queue -- + * + * Copies from a given buffer to a VMCI Queue. + * + * Results: + * Zero on success, negative error code on failure. + * + * Side effects: + * None. + * + *------------------------------------------------------------------------------ + */ + +int +vmci_memcpy_to_queue(struct vmci_queue *queue, uint64_t queue_offset, + const void *src, size_t src_offset, size_t size, int buf_type, + bool can_block) +{ + + ASSERT(can_block); + + return (__vmci_memcpy_to_queue(queue, queue_offset, + (uint8_t *)src + src_offset, size, false)); +} + +/* + *------------------------------------------------------------------------------ + * + * vmci_memcpy_from_queue -- + * + * Copies to a given buffer from a VMCI Queue. + * + * Results: + * Zero on success, negative error code on failure. + * + * Side effects: + * None. + * + *------------------------------------------------------------------------------ + */ + +int +vmci_memcpy_from_queue(void *dest, size_t dest_offset, + const struct vmci_queue *queue, uint64_t queue_offset, size_t size, + int buf_type, bool can_block) +{ + + ASSERT(can_block); + + return (__vmci_memcpy_from_queue((uint8_t *)dest + dest_offset, + queue, queue_offset, size, false)); +} + +/* + *------------------------------------------------------------------------------ + * + * vmci_memcpy_to_queue_local -- + * + * Copies from a given buffer to a local VMCI queue. This is the + * same as a regular copy. + * + * Results: + * Zero on success, negative error code on failure. + * + * Side effects: + * None. + * + *------------------------------------------------------------------------------ + */ + +int +vmci_memcpy_to_queue_local(struct vmci_queue *queue, uint64_t queue_offset, + const void *src, size_t src_offset, size_t size, int buf_type, + bool can_block) +{ + + ASSERT(can_block); + + return (__vmci_memcpy_to_queue(queue, queue_offset, + (uint8_t *)src + src_offset, size, false)); +} + +/* + *------------------------------------------------------------------------------ + * + * vmci_memcpy_from_queue_local -- + * + * Copies to a given buffer from a VMCI Queue. + * + * Results: + * Zero on success, negative error code on failure. + * + * Side effects: + * None. + * + *------------------------------------------------------------------------------ + */ + +int +vmci_memcpy_from_queue_local(void *dest, size_t dest_offset, + const struct vmci_queue *queue, uint64_t queue_offset, size_t size, + int buf_type, bool can_block) +{ + + ASSERT(can_block); + + return (__vmci_memcpy_from_queue((uint8_t *)dest + dest_offset, + queue, queue_offset, size, false)); +} + +/*------------------------------------------------------------------------------ + * + * vmci_memcpy_to_queue_v -- + * + * Copies from a given iovec from a VMCI Queue. + * + * Results: + * Zero on success, negative error code on failure. + * + * Side effects: + * None. + * + *------------------------------------------------------------------------------ + */ + +int +vmci_memcpy_to_queue_v(struct vmci_queue *queue, uint64_t queue_offset, + const void *src, size_t src_offset, size_t size, int buf_type, + bool can_block) +{ + + ASSERT(can_block); + + /* + * We ignore src_offset because src is really a struct iovec * and will + * maintain offset internally. + */ + return (__vmci_memcpy_to_queue(queue, queue_offset, src, size, + true)); +} + +/* + *------------------------------------------------------------------------------ + * + * vmci_memcpy_from_queue_v -- + * + * Copies to a given iovec from a VMCI Queue. + * + * Results: + * Zero on success, negative error code on failure. + * + * Side effects: + * None. + * + *------------------------------------------------------------------------------ + */ + +int +vmci_memcpy_from_queue_v(void *dest, size_t dest_offset, + const struct vmci_queue *queue, uint64_t queue_offset, size_t size, + int buf_type, bool can_block) +{ + + ASSERT(can_block); + + /* + * We ignore dest_offset because dest is really a struct iovec * and + * will maintain offset internally. + */ + return (__vmci_memcpy_from_queue(dest, queue, queue_offset, size, + true)); +} + +/* + *------------------------------------------------------------------------------ + * + * vmci_read_port_bytes -- + * + * Copy memory from an I/O port to kernel memory. + * + * Results: + * No results. + * + * Side effects: + * None. + * + *------------------------------------------------------------------------------ + */ + +void +vmci_read_port_bytes(vmci_io_handle handle, vmci_io_port port, uint8_t *buffer, + size_t buffer_length) +{ + + insb(port, buffer, buffer_length); +} Index: sys/dev/vmware/vmci/vmci_qpair.c =================================================================== --- /dev/null +++ sys/dev/vmware/vmci/vmci_qpair.c @@ -0,0 +1,834 @@ +/*- + * Copyright (c) 2018 VMware, Inc. All Rights Reserved. + * + * SPDX-License-Identifier: (BSD-2-Clause AND GPL-2.0) + */ + +/* This file implements Queue accessor methods. */ + +/* + * vmci_qpair is an interface that hides the queue pair internals. Rather than + * access each queue in a pair directly, operations are performed on the queue + * as a whole. This is simpler and less error-prone, and allows for future + * queue pair features to be added under the hood with no change to the client + * code. + */ + +#include "vmci_kernel_api.h" +#include "vmci_kernel_defs.h" +#include "vmci_kernel_if.h" +#include "vmci_queue.h" +#include "vmci_queue_pair.h" + +/* This structure is opaque to the clients. */ +struct vmci_qpair { + struct vmci_handle handle; + struct vmci_queue *produce_q; + struct vmci_queue *consume_q; + uint64_t produce_q_size; + uint64_t consume_q_size; + vmci_id peer; + uint32_t flags; + vmci_privilege_flags priv_flags; + uint32_t blocked; + vmci_event event; +}; + +static void vmci_qpair_get_queue_headers(const struct vmci_qpair *qpair, + struct vmci_queue_header **produce_q_header, + struct vmci_queue_header **consume_q_header); + +/* + *------------------------------------------------------------------------------ + * + * vmci_queue_add_producer_tail -- + * + * Helper routine to increment the Producer Tail. + * + * Results: + * VMCI_ERROR_NOT_FOUND if the vmm_world registered with the queue cannot + * be found. Otherwise VMCI_SUCCESS. + * + * Side effects: + * None. + * + *------------------------------------------------------------------------------ + */ + +static inline int +vmci_queue_add_producer_tail(struct vmci_queue *queue, + size_t add, uint64_t queue_size) +{ + + vmci_queue_header_add_producer_tail(queue->q_header, add, queue_size); + return (VMCI_SUCCESS); +} + +/* + *------------------------------------------------------------------------------ + * + * vmci_queue_add_consumer_head -- + * + * Helper routine to increment the Consumer Head. + * + * Results: + * VMCI_ERROR_NOT_FOUND if the vmm_world registered with the queue cannot + * be found. Otherwise VMCI_SUCCESS. + * + * Side effects: + * None. + * + *------------------------------------------------------------------------------ + */ + +static inline int +vmci_queue_add_consumer_head(struct vmci_queue *queue, + size_t add, uint64_t queue_size) +{ + + vmci_queue_header_add_consumer_head(queue->q_header, add, queue_size); + return (VMCI_SUCCESS); +} + +/* + *------------------------------------------------------------------------------ + * + * vmci_qpair_get_queue_headers -- + * + * Helper routine that will retrieve the produce and consume headers of a + * given queue pair. + * + * Results: + * VMCI_SUCCESS if either current or saved queue headers are found. + * Appropriate error code otherwise. + * + * Side effects: + * None. + * + *------------------------------------------------------------------------------ + */ + +static void +vmci_qpair_get_queue_headers(const struct vmci_qpair *qpair, + struct vmci_queue_header **produce_q_header, + struct vmci_queue_header **consume_q_header) +{ + + ASSERT((qpair->produce_q != NULL) && (qpair->consume_q != NULL)); + *produce_q_header = qpair->produce_q->q_header; + *consume_q_header = qpair->consume_q->q_header; +} + +/* + *------------------------------------------------------------------------------ + * + * vmci_qpair_alloc -- + * + * This is the client interface for allocating the memory for a vmci_qpair + * structure and then attaching to the underlying queue. If an error occurs + * allocating the memory for the vmci_qpair structure, no attempt is made to + * attach. If an error occurs attaching, then there's the vmci_qpair + * structure is freed. + * + * Results: + * An err, if < 0. + * + * Side effects: + * None. + * + *------------------------------------------------------------------------------ + */ + +int +vmci_qpair_alloc(struct vmci_qpair **qpair, struct vmci_handle *handle, + uint64_t produce_q_size, uint64_t consume_q_size, vmci_id peer, + uint32_t flags, vmci_privilege_flags priv_flags) +{ + struct vmci_qpair *my_qpair; + vmci_event_release_cb wakeup_cb; + void *client_data; + int retval; + + /* + * Restrict the size of a queuepair. Though the device enforces a limit + * on the total amount of memory that can be allocated to queuepairs for + * a guest, we avoid unnecessarily allocating a lot of memory. Also, we + * try to allocate this memory before we make the queuepair allocation + * hypercall. + * + * (Note that this doesn't prevent all cases; a user with only this much + * physical memory could still get into trouble.) The error used by the + * device is NO_RESOURCES, so use that here too. + */ + + if (produce_q_size + consume_q_size < + MAX(produce_q_size, consume_q_size) || + produce_q_size + consume_q_size > VMCI_MAX_GUEST_QP_MEMORY) + return (VMCI_ERROR_NO_RESOURCES); + + if (flags & VMCI_QPFLAG_NONBLOCK) + return (VMCI_ERROR_INVALID_ARGS); + + my_qpair = vmci_alloc_kernel_mem(sizeof(*my_qpair), VMCI_MEMORY_NORMAL); + if (!my_qpair) + return (VMCI_ERROR_NO_MEM); + + my_qpair->produce_q_size = produce_q_size; + my_qpair->consume_q_size = consume_q_size; + my_qpair->peer = peer; + my_qpair->flags = flags; + my_qpair->priv_flags = priv_flags; + + client_data = NULL; + wakeup_cb = NULL; + + retval = vmci_queue_pair_alloc(handle, &my_qpair->produce_q, + my_qpair->produce_q_size, &my_qpair->consume_q, + my_qpair->consume_q_size, my_qpair->peer, my_qpair->flags, + my_qpair->priv_flags); + + if (retval < VMCI_SUCCESS) { + vmci_free_kernel_mem(my_qpair, sizeof(*my_qpair)); + return (retval); + } + + *qpair = my_qpair; + my_qpair->handle = *handle; + + return (retval); +} + +/* + *------------------------------------------------------------------------------ + * + * vmci_qpair_detach -- + * + * This is the client interface for detaching from a vmci_qpair. Note that + * this routine will free the memory allocated for the vmci_qpair structure, + * too. + * + * Results: + * An error, if < 0. + * + * Side effects: + * Will clear the caller's pointer to the vmci_qpair structure. + * + *------------------------------------------------------------------------------ + */ + +int +vmci_qpair_detach(struct vmci_qpair **qpair) +{ + struct vmci_qpair *old_qpair; + int result; + + if (!qpair || !(*qpair)) + return (VMCI_ERROR_INVALID_ARGS); + + old_qpair = *qpair; + result = vmci_queue_pair_detach(old_qpair->handle); + + /* + * The guest can fail to detach for a number of reasons, and if it does + * so, it will cleanup the entry (if there is one). We need to release + * the qpair struct here; there isn't much the caller can do, and we + * don't want to leak. + */ + + if (old_qpair->flags & VMCI_QPFLAG_LOCAL) + vmci_destroy_event(&old_qpair->event); + + vmci_free_kernel_mem(old_qpair, sizeof(*old_qpair)); + *qpair = NULL; + + return (result); +} + +/* + *------------------------------------------------------------------------------ + * + * vmci_qpair_get_produce_indexes -- + * + * This is the client interface for getting the current indexes of the + * qpair from the point of the view of the caller as the producer. + * + * Results: + * err, if < 0 + * Success otherwise. + * + * Side effects: + * None. + * + *------------------------------------------------------------------------------ + */ + +int +vmci_qpair_get_produce_indexes(const struct vmci_qpair *qpair, + uint64_t *producer_tail, uint64_t *consumer_head) +{ + struct vmci_queue_header *consume_q_header; + struct vmci_queue_header *produce_q_header; + + if (!qpair) + return (VMCI_ERROR_INVALID_ARGS); + + vmci_qpair_get_queue_headers(qpair, &produce_q_header, + &consume_q_header); + vmci_queue_header_get_pointers(produce_q_header, consume_q_header, + producer_tail, consumer_head); + + if ((producer_tail && *producer_tail >= qpair->produce_q_size) || + (consumer_head && *consumer_head >= qpair->produce_q_size)) + return (VMCI_ERROR_INVALID_SIZE); + + return (VMCI_SUCCESS); +} + +/* + *------------------------------------------------------------------------------ + * + * vmci_qpair_get_consume_indexes -- + * + * This is the client interface for getting the current indexes of the + * QPair from the point of the view of the caller as the consumer. + * + * Results: + * err, if < 0 + * Success otherwise. + * + * Side effects: + * None. + * + *------------------------------------------------------------------------------ + */ + +int +vmci_qpair_get_consume_indexes(const struct vmci_qpair *qpair, + uint64_t *consumer_tail, uint64_t *producer_head) +{ + struct vmci_queue_header *consume_q_header; + struct vmci_queue_header *produce_q_header; + + if (!qpair) + return (VMCI_ERROR_INVALID_ARGS); + + vmci_qpair_get_queue_headers(qpair, &produce_q_header, + &consume_q_header); + vmci_queue_header_get_pointers(consume_q_header, produce_q_header, + consumer_tail, producer_head); + + if ((consumer_tail && *consumer_tail >= qpair->consume_q_size) || + (producer_head && *producer_head >= qpair->consume_q_size)) + return (VMCI_ERROR_INVALID_SIZE); + + return (VMCI_SUCCESS); +} + +/* + *------------------------------------------------------------------------------ + * + * vmci_qpair_produce_free_space -- + * + * This is the client interface for getting the amount of free space in the + * QPair from the point of the view of the caller as the producer which is + * the common case. + * + * Results: + * Err, if < 0. + * Full queue if = 0. + * Number of available bytes into which data can be enqueued if > 0. + * + * Side effects: + * None. + * + *------------------------------------------------------------------------------ + */ + +int64_t +vmci_qpair_produce_free_space(const struct vmci_qpair *qpair) +{ + struct vmci_queue_header *consume_q_header; + struct vmci_queue_header *produce_q_header; + int64_t result; + + if (!qpair) + return (VMCI_ERROR_INVALID_ARGS); + + vmci_qpair_get_queue_headers(qpair, &produce_q_header, + &consume_q_header); + result = vmci_queue_header_free_space(produce_q_header, consume_q_header, + qpair->produce_q_size); + + return (result); +} + +/* + *------------------------------------------------------------------------------ + * + * vmci_qpair_consume_free_space -- + * + * This is the client interface for getting the amount of free space in the + * QPair from the point of the view of the caller as the consumer which is + * not the common case (see vmci_qpair_Produce_free_space(), above). + * + * Results: + * Err, if < 0. + * Full queue if = 0. + * Number of available bytes into which data can be enqueued if > 0. + * + * Side effects: + * None. + * + *------------------------------------------------------------------------------ + */ + +int64_t +vmci_qpair_consume_free_space(const struct vmci_qpair *qpair) +{ + struct vmci_queue_header *consume_q_header; + struct vmci_queue_header *produce_q_header; + int64_t result; + + if (!qpair) + return (VMCI_ERROR_INVALID_ARGS); + + vmci_qpair_get_queue_headers(qpair, &produce_q_header, + &consume_q_header); + result = vmci_queue_header_free_space(consume_q_header, produce_q_header, + qpair->consume_q_size); + + return (result); +} + +/* + *------------------------------------------------------------------------------ + * + * vmci_qpair_produce_buf_ready -- + * + * This is the client interface for getting the amount of enqueued data in + * the QPair from the point of the view of the caller as the producer which + * is not the common case (see vmci_qpair_Consume_buf_ready(), above). + * + * Results: + * Err, if < 0. + * Empty queue if = 0. + * Number of bytes ready to be dequeued if > 0. + * + * Side effects: + * None. + * + *------------------------------------------------------------------------------ + */ + +int64_t +vmci_qpair_produce_buf_ready(const struct vmci_qpair *qpair) +{ + struct vmci_queue_header *consume_q_header; + struct vmci_queue_header *produce_q_header; + int64_t result; + + if (!qpair) + return (VMCI_ERROR_INVALID_ARGS); + + vmci_qpair_get_queue_headers(qpair, &produce_q_header, + &consume_q_header); + result = vmci_queue_header_buf_ready(produce_q_header, consume_q_header, + qpair->produce_q_size); + + return (result); +} + +/* + *------------------------------------------------------------------------------ + * + * vmci_qpair_consume_buf_ready -- + * + * This is the client interface for getting the amount of enqueued data in + * the QPair from the point of the view of the caller as the consumer which + * is the normal case. + * + * Results: + * Err, if < 0. + * Empty queue if = 0. + * Number of bytes ready to be dequeued if > 0. + * + * Side effects: + * None. + * + *------------------------------------------------------------------------------ + */ + +int64_t +vmci_qpair_consume_buf_ready(const struct vmci_qpair *qpair) +{ + struct vmci_queue_header *consume_q_header; + struct vmci_queue_header *produce_q_header; + int64_t result; + + if (!qpair) + return (VMCI_ERROR_INVALID_ARGS); + + vmci_qpair_get_queue_headers(qpair, &produce_q_header, + &consume_q_header); + result = vmci_queue_header_buf_ready(consume_q_header, produce_q_header, + qpair->consume_q_size); + + return (result); +} + +/* + *------------------------------------------------------------------------------ + * + * enqueue -- + * + * Enqueues a given buffer to the produce queue using the provided function. + * As many bytes as possible (space available in the queue) are enqueued. + * + * Results: + * VMCI_ERROR_QUEUEPAIR_NOSPACE if no space was available to enqueue data. + * VMCI_ERROR_INVALID_SIZE, if any queue pointer is outside the queue + * (as defined by the queue size). + * VMCI_ERROR_INVALID_ARGS, if an error occured when accessing the buffer. + * VMCI_ERROR_QUEUEPAIR_NOTATTACHED, if the queue pair pages aren't + * available. + * Otherwise, the number of bytes written to the queue is returned. + * + * Side effects: + * Updates the tail pointer of the produce queue. + * + *------------------------------------------------------------------------------ + */ + +static ssize_t +enqueue(struct vmci_queue *produce_q, struct vmci_queue *consume_q, + const uint64_t produce_q_size, const void *buf, size_t buf_size, + int buf_type, vmci_memcpy_to_queue_func memcpy_to_queue, bool can_block) +{ + ssize_t result; + size_t written; + int64_t free_space; + uint64_t tail; + + ASSERT((produce_q != NULL) && (consume_q != NULL)); + + free_space = vmci_queue_header_free_space(produce_q->q_header, + consume_q->q_header, + produce_q_size); + if (free_space == 0) + return (VMCI_ERROR_QUEUEPAIR_NOSPACE); + + if (free_space < VMCI_SUCCESS) + return ((ssize_t)free_space); + + written = (size_t)(free_space > buf_size ? buf_size : free_space); + tail = vmci_queue_header_producer_tail(produce_q->q_header); + if (LIKELY(tail + written < produce_q_size)) + result = memcpy_to_queue(produce_q, tail, buf, 0, written, + buf_type, can_block); + else { + /* Tail pointer wraps around. */ + + const size_t tmp = (size_t)(produce_q_size - tail); + + result = memcpy_to_queue(produce_q, tail, buf, 0, tmp, buf_type, + can_block); + if (result >= VMCI_SUCCESS) + result = memcpy_to_queue(produce_q, 0, buf, tmp, + written - tmp, buf_type, can_block); + } + + if (result < VMCI_SUCCESS) + return (result); + + result = vmci_queue_add_producer_tail(produce_q, written, + produce_q_size); + if (result < VMCI_SUCCESS) + return (result); + return (written); +} + +/* + *------------------------------------------------------------------------------ + * + * dequeue -- + * + * Dequeues data (if available) from the given consume queue. Writes data + * to the user provided buffer using the provided function. + * + * Results: + * VMCI_ERROR_QUEUEPAIR_NODATA if no data was available to dequeue. + * VMCI_ERROR_INVALID_SIZE, if any queue pointer is outside the queue + * (as defined by the queue size). + * VMCI_ERROR_INVALID_ARGS, if an error occured when accessing the buffer. + * VMCI_ERROR_NOT_FOUND, if the vmm_world registered with the queue pair + * cannot be found. + * Otherwise the number of bytes dequeued is returned. + * + * Side effects: + * Updates the head pointer of the consume queue. + * + *------------------------------------------------------------------------------ + */ + +static ssize_t +dequeue(struct vmci_queue *produce_q, + struct vmci_queue *consume_q, const uint64_t consume_q_size, void *buf, + size_t buf_size, int buf_type, + vmci_memcpy_from_queue_func memcpy_from_queue, bool update_consumer, + bool can_block) +{ + ssize_t result; + size_t read; + int64_t buf_ready; + uint64_t head; + + ASSERT((produce_q != NULL) && (consume_q != NULL)); + + buf_ready = vmci_queue_header_buf_ready(consume_q->q_header, + produce_q->q_header, consume_q_size); + if (buf_ready == 0) + return (VMCI_ERROR_QUEUEPAIR_NODATA); + if (buf_ready < VMCI_SUCCESS) + return ((ssize_t)buf_ready); + + read = (size_t)(buf_ready > buf_size ? buf_size : buf_ready); + head = vmci_queue_header_consumer_head(produce_q->q_header); + if (LIKELY(head + read < consume_q_size)) + result = memcpy_from_queue(buf, 0, consume_q, head, read, + buf_type, can_block); + else { + /* Head pointer wraps around. */ + + const size_t tmp = (size_t)(consume_q_size - head); + + result = memcpy_from_queue(buf, 0, consume_q, head, tmp, + buf_type, can_block); + if (result >= VMCI_SUCCESS) + result = memcpy_from_queue(buf, tmp, consume_q, 0, + read - tmp, buf_type, can_block); + } + + if (result < VMCI_SUCCESS) + return (result); + + if (update_consumer) { + result = vmci_queue_add_consumer_head(produce_q, read, + consume_q_size); + if (result < VMCI_SUCCESS) + return (result); + } + + return (read); +} + +/* + *------------------------------------------------------------------------------ + * + * vmci_qpair_enqueue -- + * + * This is the client interface for enqueueing data into the queue. + * + * Results: + * Err, if < 0. + * Number of bytes enqueued if >= 0. + * + * Side effects: + * None. + * + *------------------------------------------------------------------------------ + */ + +ssize_t +vmci_qpair_enqueue(struct vmci_qpair *qpair, const void *buf, size_t buf_size, + int buf_type) +{ + ssize_t result; + + if (!qpair || !buf) + return (VMCI_ERROR_INVALID_ARGS); + + result = enqueue(qpair->produce_q, qpair->consume_q, + qpair->produce_q_size, buf, buf_size, buf_type, + qpair->flags & VMCI_QPFLAG_LOCAL? + vmci_memcpy_to_queue_local : vmci_memcpy_to_queue, + !(qpair->flags & VMCI_QPFLAG_NONBLOCK)); + + return (result); +} + +/* + *------------------------------------------------------------------------------ + * + * vmci_qpair_dequeue -- + * + * This is the client interface for dequeueing data from the queue. + * + * Results: + * Err, if < 0. + * Number of bytes dequeued if >= 0. + * + * Side effects: + * None. + * + *------------------------------------------------------------------------------ + */ + +ssize_t +vmci_qpair_dequeue(struct vmci_qpair *qpair, void *buf, size_t buf_size, + int buf_type) +{ + ssize_t result; + + if (!qpair || !buf) + return (VMCI_ERROR_INVALID_ARGS); + + result = dequeue(qpair->produce_q, qpair->consume_q, + qpair->consume_q_size, buf, buf_size, buf_type, + qpair->flags & VMCI_QPFLAG_LOCAL? + vmci_memcpy_from_queue_local : vmci_memcpy_from_queue, true, + !(qpair->flags & VMCI_QPFLAG_NONBLOCK)); + + return (result); +} + +/* + *------------------------------------------------------------------------------ + * + * vmci_qpair_peek -- + * + * This is the client interface for peeking into a queue. (I.e., copy + * data from the queue without updating the head pointer.) + * + * Results: + * Err, if < 0. + * Number of bytes peeked, if >= 0. + * + * Side effects: + * None. + * + *------------------------------------------------------------------------------ + */ + +ssize_t +vmci_qpair_peek(struct vmci_qpair *qpair, void *buf, size_t buf_size, + int buf_type) +{ + ssize_t result; + + if (!qpair || !buf) + return (VMCI_ERROR_INVALID_ARGS); + + result = dequeue(qpair->produce_q, qpair->consume_q, + qpair->consume_q_size, buf, buf_size, buf_type, + qpair->flags & VMCI_QPFLAG_LOCAL? + vmci_memcpy_from_queue_local : vmci_memcpy_from_queue, false, + !(qpair->flags & VMCI_QPFLAG_NONBLOCK)); + + return (result); +} + +/* + *------------------------------------------------------------------------------ + * + * vmci_qpair_enquev -- + * + * This is the client interface for enqueueing data into the queue. + * + * Results: + * Err, if < 0. + * Number of bytes enqueued if >= 0. + * + * Side effects: + * None. + * + *------------------------------------------------------------------------------ + */ + +ssize_t +vmci_qpair_enquev(struct vmci_qpair *qpair, void *iov, size_t iov_size, + int buf_type) +{ + ssize_t result; + + if (!qpair || !iov) + return (VMCI_ERROR_INVALID_ARGS); + + result = enqueue(qpair->produce_q, qpair->consume_q, + qpair->produce_q_size, iov, iov_size, buf_type, + qpair->flags & VMCI_QPFLAG_LOCAL? + vmci_memcpy_to_queue_v_local : vmci_memcpy_to_queue_v, + !(qpair->flags & VMCI_QPFLAG_NONBLOCK)); + + return (result); +} + +/* + *------------------------------------------------------------------------------ + * + * vmci_qpair_dequev -- + * + * This is the client interface for dequeueing data from the queue. + * + * Results: + * Err, if < 0. + * Number of bytes dequeued if >= 0. + * + * Side effects: + * None. + * + *------------------------------------------------------------------------------ + */ + +ssize_t +vmci_qpair_dequev(struct vmci_qpair *qpair, void *iov, size_t iov_size, + int buf_type) +{ + ssize_t result; + + if (!qpair || !iov) + return (VMCI_ERROR_INVALID_ARGS); + + result = dequeue(qpair->produce_q, qpair->consume_q, + qpair->consume_q_size, iov, iov_size, buf_type, + qpair->flags & VMCI_QPFLAG_LOCAL? + vmci_memcpy_from_queue_v_local : vmci_memcpy_from_queue_v, true, + !(qpair->flags & VMCI_QPFLAG_NONBLOCK)); + + return (result); +} + +/* + *------------------------------------------------------------------------------ + * + * vmci_qpair_peekv -- + * + * This is the client interface for peeking into a queue. (I.e., copy + * data from the queue without updating the head pointer.) + * + * Results: + * Err, if < 0. + * Number of bytes peeked, if >= 0. + * + * Side effects: + * None. + * + *------------------------------------------------------------------------------ + */ + +ssize_t +vmci_qpair_peekv(struct vmci_qpair *qpair, void *iov, size_t iov_size, + int buf_type) +{ + ssize_t result; + + if (!qpair || !iov) + return (VMCI_ERROR_INVALID_ARGS); + + result = dequeue(qpair->produce_q, qpair->consume_q, + qpair->consume_q_size, iov, iov_size, buf_type, + qpair->flags & VMCI_QPFLAG_LOCAL? + vmci_memcpy_from_queue_v_local : vmci_memcpy_from_queue_v, false, + !(qpair->flags & VMCI_QPFLAG_NONBLOCK)); + + return (result); +} Index: sys/dev/vmware/vmci/vmci_queue.h =================================================================== --- /dev/null +++ sys/dev/vmware/vmci/vmci_queue.h @@ -0,0 +1,115 @@ +/*- + * Copyright (c) 2018 VMware, Inc. All Rights Reserved. + * + * SPDX-License-Identifier: (BSD-2-Clause AND GPL-2.0) + */ + +/* Defines the queue structure and helper functions to enqueue/dequeue items. */ + +#ifndef _VMCI_QUEUE_H_ +#define _VMCI_QUEUE_H_ + +/* + * vmci_queue + * + * This data type contains the information about a queue. + * + * There are two queues (hence, queue pairs) per transaction model between a + * pair of end points, A & B. One queue is used by end point A to transmit + * commands and responses to B. The other queue is used by B to transmit + * commands and responses. + * + * vmci_queue_kernel_if is a per-OS defined queue structure. It contains + * either a direct pointer to the linear address of the buffer contents or a + * pointer to structures which help the OS locate those data pages. + * See vmci_kernel_if.c for its definition. + */ + +struct vmci_queue_kernel_if; + +struct vmci_queue { + struct vmci_queue_header *q_header; + struct vmci_queue_header *saved_header; + struct vmci_queue_kernel_if *kernel_if; +}; + +#define BUF_TYPE int + +/* + *------------------------------------------------------------------------------ + * + * vmci_memcpy{to,from}_queue_func() prototypes. Functions of these types are + * passed around to enqueue and dequeue routines. Note that often the functions + * passed are simply wrappers around memcpy itself. + * + * Note: In order for the memcpy typedefs to be compatible with the VMKernel, + * there's an unused last parameter for the hosted side. In ESX, that parameter + * holds a buffer type. + * + *------------------------------------------------------------------------------ + */ +typedef int vmci_memcpy_to_queue_func(struct vmci_queue *queue, + uint64_t queue_offset, const void *src, size_t src_offset, + size_t size, BUF_TYPE buf_type, bool can_block); +typedef int vmci_memcpy_from_queue_func(void *dest, size_t dest_offset, + const struct vmci_queue *queue, uint64_t queue_offset, size_t size, + BUF_TYPE buf_type, bool can_block); + +/* + *------------------------------------------------------------------------------ + * + * vmci_memcpy{to,from}_queue_[v]_[local]() prototypes + * + * Note that these routines are NOT SAFE to call on a host end-point until the + * guest end of the queue pair has attached -AND- SetPageStore(). The VMX + * crosstalk device will issue the SetPageStore() on behalf of the guest when + * the guest creates a QueuePair or attaches to one created by the host. So, if + * the guest notifies the host that it's attached then the queue is safe to use. + * Also, if the host registers notification of the connection of the guest, then + * it will only receive that notification when the guest has issued the + * SetPageStore() call and not before (when the guest had attached). + * + *------------------------------------------------------------------------------ + */ + +int vmci_memcpy_to_queue(struct vmci_queue *queue, uint64_t queue_offset, + const void *src, size_t src_offset, size_t size, BUF_TYPE buf_type, + bool can_block); +int vmci_memcpy_from_queue(void *dest, size_t dest_offset, + const struct vmci_queue *queue, uint64_t queue_offset, size_t size, + BUF_TYPE buf_type, bool can_block); +int vmci_memcpy_to_queue_local(struct vmci_queue *queue, + uint64_t queue_offset, const void *src, size_t src_offset, + size_t size, BUF_TYPE buf_type, bool can_block); +int vmci_memcpy_from_queue_local(void *dest, size_t dest_offset, + const struct vmci_queue *queue, uint64_t queue_offset, size_t size, + BUF_TYPE buf_type, bool can_block); + +int vmci_memcpy_to_queue_v(struct vmci_queue *queue, uint64_t queue_offset, + const void *src, size_t src_offset, size_t size, BUF_TYPE buf_type, + bool can_block); +int vmci_memcpy_from_queue_v(void *dest, size_t dest_offset, + const struct vmci_queue *queue, uint64_t queue_offset, size_t size, + BUF_TYPE buf_type, bool can_block); + +static inline int +vmci_memcpy_to_queue_v_local(struct vmci_queue *queue, uint64_t queue_offset, + const void *src, size_t src_offset, size_t size, int buf_type, + bool can_block) +{ + + return (vmci_memcpy_to_queue_v(queue, queue_offset, src, src_offset, + size, buf_type, can_block)); +} + +static inline int +vmci_memcpy_from_queue_v_local(void *dest, size_t dest_offset, + const struct vmci_queue *queue, uint64_t queue_offset, size_t size, + int buf_type, bool can_block) +{ + + return (vmci_memcpy_from_queue_v(dest, dest_offset, queue, queue_offset, + size, buf_type, can_block)); +} + +#endif /* !_VMCI_QUEUE_H_ */ Index: sys/dev/vmware/vmci/vmci_queue_pair.h =================================================================== --- /dev/null +++ sys/dev/vmware/vmci/vmci_queue_pair.h @@ -0,0 +1,26 @@ +/*- + * Copyright (c) 2018 VMware, Inc. All Rights Reserved. + * + * SPDX-License-Identifier: (BSD-2-Clause AND GPL-2.0) + */ + +/* VMCI QueuePair API definition. */ + +#ifndef _VMCI_QUEUE_PAIR_H_ +#define _VMCI_QUEUE_PAIR_H_ + +#include "vmci_kernel_if.h" +#include "vmci_queue.h" + +int vmci_qp_guest_endpoints_init(void); +void vmci_qp_guest_endpoints_exit(void); +void vmci_qp_guest_endpoints_sync(void); +void vmci_qp_guest_endpoints_convert(bool to_local, bool device_reset); + +int vmci_queue_pair_alloc(struct vmci_handle *handle, + struct vmci_queue **produce_q, uint64_t produce_size, + struct vmci_queue **consume_q, uint64_t consume_size, + vmci_id peer, uint32_t flags, vmci_privilege_flags priv_flags); +int vmci_queue_pair_detach(struct vmci_handle handle); + +#endif /* !_VMCI_QUEUE_PAIR_H_ */ Index: sys/dev/vmware/vmci/vmci_queue_pair.c =================================================================== --- /dev/null +++ sys/dev/vmware/vmci/vmci_queue_pair.c @@ -0,0 +1,937 @@ +/*- + * Copyright (c) 2018 VMware, Inc. All Rights Reserved. + * + * SPDX-License-Identifier: (BSD-2-Clause AND GPL-2.0) + */ + +/* VMCI QueuePair API implementation. */ + +#include "vmci.h" +#include "vmci_driver.h" +#include "vmci_event.h" +#include "vmci_kernel_api.h" +#include "vmci_kernel_defs.h" +#include "vmci_queue_pair.h" + +#define LGPFX "vmci_queue_pair: " + +struct queue_pair_entry { + vmci_list_item(queue_pair_entry) list_item; + struct vmci_handle handle; + vmci_id peer; + uint32_t flags; + uint64_t produce_size; + uint64_t consume_size; + uint32_t ref_count; +}; + +struct qp_guest_endpoint { + struct queue_pair_entry qp; + uint64_t num_ppns; + void *produce_q; + void *consume_q; + bool hibernate_failure; + struct ppn_set ppn_set; +}; + +struct queue_pair_list { + vmci_list(queue_pair_entry) head; + volatile int hibernate; + vmci_mutex mutex; +}; + +#define QPE_NUM_PAGES(_QPE) \ + ((uint32_t)(CEILING(_QPE.produce_size, PAGE_SIZE) + \ + CEILING(_QPE.consume_size, PAGE_SIZE) + 2)) + +static struct queue_pair_list qp_guest_endpoints; + +static struct queue_pair_entry *queue_pair_list_find_entry( + struct queue_pair_list *qp_list, struct vmci_handle handle); +static void queue_pair_list_add_entry(struct queue_pair_list *qp_list, + struct queue_pair_entry *entry); +static void queue_pair_list_remove_entry(struct queue_pair_list *qp_list, + struct queue_pair_entry *entry); +static struct queue_pair_entry *queue_pair_list_get_head( + struct queue_pair_list *qp_list); +static int queue_pair_notify_peer_local(bool attach, + struct vmci_handle handle); +static struct qp_guest_endpoint *qp_guest_endpoint_create( + struct vmci_handle handle, vmci_id peer, uint32_t flags, + uint64_t produce_size, uint64_t consume_size, + void *produce_q, void *consume_q); +static void qp_guest_endpoint_destroy(struct qp_guest_endpoint *entry); +static int vmci_queue_pair_alloc_hypercall( + const struct qp_guest_endpoint *entry); +static int vmci_queue_pair_alloc_guest_work(struct vmci_handle *handle, + struct vmci_queue **produce_q, uint64_t produce_size, + struct vmci_queue **consume_q, uint64_t consume_size, + vmci_id peer, uint32_t flags, + vmci_privilege_flags priv_flags); +static int vmci_queue_pair_detach_guest_work(struct vmci_handle handle); +static int vmci_queue_pair_detach_hypercall(struct vmci_handle handle); + +extern int vmci_send_datagram(struct vmci_datagram *); + +/* + *------------------------------------------------------------------------------ + * + * vmci_queue_pair_alloc -- + * + * Allocates a VMCI QueuePair. Only checks validity of input arguments. The + * real work is done in the host or guest specific function. + * + * Results: + * VMCI_SUCCESS on success, appropriate error code otherwise. + * + * Side effects: + * None. + * + *------------------------------------------------------------------------------ + */ + +int +vmci_queue_pair_alloc(struct vmci_handle *handle, struct vmci_queue **produce_q, + uint64_t produce_size, struct vmci_queue **consume_q, uint64_t consume_size, + vmci_id peer, uint32_t flags, vmci_privilege_flags priv_flags) +{ + + if (!handle || !produce_q || !consume_q || + (!produce_size && !consume_size) || (flags & ~VMCI_QP_ALL_FLAGS)) + return (VMCI_ERROR_INVALID_ARGS); + + return (vmci_queue_pair_alloc_guest_work(handle, produce_q, + produce_size, consume_q, consume_size, peer, flags, priv_flags)); +} + +/* + *------------------------------------------------------------------------------ + * + * vmci_queue_pair_detach -- + * + * Detaches from a VMCI QueuePair. Only checks validity of input argument. + * Real work is done in the host or guest specific function. + * + * Results: + * Success or failure. + * + * Side effects: + * Memory is freed. + * + *------------------------------------------------------------------------------ + */ + +int +vmci_queue_pair_detach(struct vmci_handle handle) +{ + + if (VMCI_HANDLE_INVALID(handle)) + return (VMCI_ERROR_INVALID_ARGS); + + return (vmci_queue_pair_detach_guest_work(handle)); +} + +/* + *------------------------------------------------------------------------------ + * + * queue_pair_list_init -- + * + * Initializes the list of QueuePairs. + * + * Results: + * Success or failure. + * + * Side effects: + * None. + * + *------------------------------------------------------------------------------ + */ + +static inline int +queue_pair_list_init(struct queue_pair_list *qp_list) +{ + int ret; + + vmci_list_init(&qp_list->head); + atomic_store_int(&qp_list->hibernate, 0); + ret = vmci_mutex_init(&qp_list->mutex, "VMCI QP List lock"); + return (ret); +} + +/* + *------------------------------------------------------------------------------ + * + * queue_pair_list_destroy -- + * + * Destroy the list's mutex. + * + * Results: + * None. + * + * Side effects: + * None. + * + *------------------------------------------------------------------------------ + */ + +static inline void +queue_pair_list_destroy(struct queue_pair_list *qp_list) +{ + + vmci_mutex_destroy(&qp_list->mutex); + vmci_list_init(&qp_list->head); +} + +/* + *------------------------------------------------------------------------------ + * + * queue_pair_list_find_entry -- + * + * Finds the entry in the list corresponding to a given handle. Assumes that + * the list is locked. + * + * Results: + * Pointer to entry. + * + * Side effects: + * None. + * + *------------------------------------------------------------------------------ + */ + +static struct queue_pair_entry * +queue_pair_list_find_entry(struct queue_pair_list *qp_list, + struct vmci_handle handle) +{ + struct queue_pair_entry *next; + + if (VMCI_HANDLE_INVALID(handle)) + return (NULL); + + vmci_list_scan(next, &qp_list->head, list_item) { + if (VMCI_HANDLE_EQUAL(next->handle, handle)) + return (next); + } + + return (NULL); +} + +/* + *------------------------------------------------------------------------------ + * + * queue_pair_list_add_entry -- + * + * Adds the given entry to the list. Assumes that the list is locked. + * + * Results: + * None. + * + * Side effects: + * None. + * + *------------------------------------------------------------------------------ + */ + +static void +queue_pair_list_add_entry(struct queue_pair_list *qp_list, + struct queue_pair_entry *entry) +{ + + if (entry) + vmci_list_insert(&qp_list->head, entry, list_item); +} + +/* + *------------------------------------------------------------------------------ + * + * queue_pair_list_remove_entry -- + * + * Removes the given entry from the list. Assumes that the list is locked. + * + * Results: + * None. + * + * Side effects: + * None. + * + *------------------------------------------------------------------------------ + */ + +static void +queue_pair_list_remove_entry(struct queue_pair_list *qp_list, + struct queue_pair_entry *entry) +{ + + if (entry) + vmci_list_remove(entry, list_item); +} + +/* + *------------------------------------------------------------------------------ + * + * queue_pair_list_get_head -- + * + * Returns the entry from the head of the list. Assumes that the list is + * locked. + * + * Results: + * Pointer to entry. + * + * Side effects: + * None. + * + *------------------------------------------------------------------------------ + */ + +static struct queue_pair_entry * +queue_pair_list_get_head(struct queue_pair_list *qp_list) +{ + + return (vmci_list_first(&qp_list->head)); +} + +/* + *------------------------------------------------------------------------------ + * + * vmci_qp_guest_endpoints_init -- + * + * Initalizes data structure state keeping track of queue pair guest + * endpoints. + * + * Results: + * VMCI_SUCCESS on success and appropriate failure code otherwise. + * + * Side effects: + * None. + * + *------------------------------------------------------------------------------ + */ + +int +vmci_qp_guest_endpoints_init(void) +{ + + return (queue_pair_list_init(&qp_guest_endpoints)); +} + +/* + *------------------------------------------------------------------------------ + * + * vmci_qp_guest_endpoints_exit -- + * + * Destroys all guest queue pair endpoints. If active guest queue pairs + * still exist, hypercalls to attempt detach from these queue pairs will be + * made. Any failure to detach is silently ignored. + * + * Results: + * None. + * + * Side effects: + * None. + * + *------------------------------------------------------------------------------ + */ + +void +vmci_qp_guest_endpoints_exit(void) +{ + struct qp_guest_endpoint *entry; + + vmci_mutex_acquire(&qp_guest_endpoints.mutex); + + while ((entry = + (struct qp_guest_endpoint *)queue_pair_list_get_head( + &qp_guest_endpoints)) != NULL) { + /* + * Don't make a hypercall for local QueuePairs. + */ + if (!(entry->qp.flags & VMCI_QPFLAG_LOCAL)) + vmci_queue_pair_detach_hypercall(entry->qp.handle); + /* + * We cannot fail the exit, so let's reset ref_count. + */ + entry->qp.ref_count = 0; + queue_pair_list_remove_entry(&qp_guest_endpoints, &entry->qp); + qp_guest_endpoint_destroy(entry); + } + + atomic_store_int(&qp_guest_endpoints.hibernate, 0); + vmci_mutex_release(&qp_guest_endpoints.mutex); + queue_pair_list_destroy(&qp_guest_endpoints); +} + +/* + *------------------------------------------------------------------------------ + * + * vmci_qp_guest_endpoints_sync -- + * + * Use this as a synchronization point when setting globals, for example, + * during device shutdown. + * + * Results: + * true. + * + * Side effects: + * None. + * + *------------------------------------------------------------------------------ + */ + +void +vmci_qp_guest_endpoints_sync(void) +{ + + vmci_mutex_acquire(&qp_guest_endpoints.mutex); + vmci_mutex_release(&qp_guest_endpoints.mutex); +} + +/* + *------------------------------------------------------------------------------ + * + * qp_guest_endpoint_create -- + * + * Allocates and initializes a qp_guest_endpoint structure. Allocates a + * QueuePair rid (and handle) iff the given entry has an invalid handle. + * 0 through VMCI_RESERVED_RESOURCE_ID_MAX are reserved handles. Assumes + * that the QP list mutex is held by the caller. + * + * Results: + * Pointer to structure intialized. + * + * Side effects: + * None. + * + *------------------------------------------------------------------------------ + */ + +struct qp_guest_endpoint * +qp_guest_endpoint_create(struct vmci_handle handle, vmci_id peer, + uint32_t flags, uint64_t produce_size, uint64_t consume_size, + void *produce_q, void *consume_q) +{ + struct qp_guest_endpoint *entry; + static vmci_id queue_pair_rid; + const uint64_t num_ppns = CEILING(produce_size, PAGE_SIZE) + + CEILING(consume_size, PAGE_SIZE) + + 2; /* One page each for the queue headers. */ + + queue_pair_rid = VMCI_RESERVED_RESOURCE_ID_MAX + 1; + + ASSERT((produce_size || consume_size) && produce_q && consume_q); + + if (VMCI_HANDLE_INVALID(handle)) { + vmci_id context_id = vmci_get_context_id(); + vmci_id old_rid = queue_pair_rid; + + /* + * Generate a unique QueuePair rid. Keep on trying until we + * wrap around in the RID space. + */ + ASSERT(old_rid > VMCI_RESERVED_RESOURCE_ID_MAX); + do { + handle = VMCI_MAKE_HANDLE(context_id, queue_pair_rid); + entry = + (struct qp_guest_endpoint *) + queue_pair_list_find_entry(&qp_guest_endpoints, + handle); + queue_pair_rid++; + if (UNLIKELY(!queue_pair_rid)) { + /* + * Skip the reserved rids. + */ + queue_pair_rid = + VMCI_RESERVED_RESOURCE_ID_MAX + 1; + } + } while (entry && queue_pair_rid != old_rid); + + if (UNLIKELY(entry != NULL)) { + ASSERT(queue_pair_rid == old_rid); + /* + * We wrapped around --- no rids were free. + */ + return (NULL); + } + } + + ASSERT(!VMCI_HANDLE_INVALID(handle) && + queue_pair_list_find_entry(&qp_guest_endpoints, handle) == NULL); + entry = vmci_alloc_kernel_mem(sizeof(*entry), VMCI_MEMORY_NORMAL); + if (entry) { + entry->qp.handle = handle; + entry->qp.peer = peer; + entry->qp.flags = flags; + entry->qp.produce_size = produce_size; + entry->qp.consume_size = consume_size; + entry->qp.ref_count = 0; + entry->num_ppns = num_ppns; + memset(&entry->ppn_set, 0, sizeof(entry->ppn_set)); + entry->produce_q = produce_q; + entry->consume_q = consume_q; + } + return (entry); +} + +/* + *------------------------------------------------------------------------------ + * + * qp_guest_endpoint_destroy -- + * + * Frees a qp_guest_endpoint structure. + * + * Results: + * None. + * + * Side effects: + * None. + * + *------------------------------------------------------------------------------ + */ + +void +qp_guest_endpoint_destroy(struct qp_guest_endpoint *entry) +{ + + ASSERT(entry); + ASSERT(entry->qp.ref_count == 0); + + vmci_free_ppn_set(&entry->ppn_set); + vmci_free_queue(entry->produce_q, entry->qp.produce_size); + vmci_free_queue(entry->consume_q, entry->qp.consume_size); + vmci_free_kernel_mem(entry, sizeof(*entry)); +} + +/* + *------------------------------------------------------------------------------ + * + * vmci_queue_pair_alloc_hypercall -- + * + * Helper to make a QueuePairAlloc hypercall when the driver is + * supporting a guest device. + * + * Results: + * Result of the hypercall. + * + * Side effects: + * Memory is allocated & freed. + * + *------------------------------------------------------------------------------ + */ +static int +vmci_queue_pair_alloc_hypercall(const struct qp_guest_endpoint *entry) +{ + struct vmci_queue_pair_alloc_msg *alloc_msg; + size_t msg_size; + int result; + + if (!entry || entry->num_ppns <= 2) + return (VMCI_ERROR_INVALID_ARGS); + + ASSERT(!(entry->qp.flags & VMCI_QPFLAG_LOCAL)); + + msg_size = sizeof(*alloc_msg) + (size_t)entry->num_ppns * sizeof(PPN); + alloc_msg = vmci_alloc_kernel_mem(msg_size, VMCI_MEMORY_NORMAL); + if (!alloc_msg) + return (VMCI_ERROR_NO_MEM); + + alloc_msg->hdr.dst = VMCI_MAKE_HANDLE(VMCI_HYPERVISOR_CONTEXT_ID, + VMCI_QUEUEPAIR_ALLOC); + alloc_msg->hdr.src = VMCI_ANON_SRC_HANDLE; + alloc_msg->hdr.payload_size = msg_size - VMCI_DG_HEADERSIZE; + alloc_msg->handle = entry->qp.handle; + alloc_msg->peer = entry->qp.peer; + alloc_msg->flags = entry->qp.flags; + alloc_msg->produce_size = entry->qp.produce_size; + alloc_msg->consume_size = entry->qp.consume_size; + alloc_msg->num_ppns = entry->num_ppns; + result = vmci_populate_ppn_list((uint8_t *)alloc_msg + + sizeof(*alloc_msg), &entry->ppn_set); + if (result == VMCI_SUCCESS) + result = vmci_send_datagram((struct vmci_datagram *)alloc_msg); + vmci_free_kernel_mem(alloc_msg, msg_size); + + return (result); +} + +/* + *------------------------------------------------------------------------------ + * + * vmci_queue_pair_alloc_guest_work -- + * + * This functions handles the actual allocation of a VMCI queue pair guest + * endpoint. Allocates physical pages for the queue pair. It makes OS + * dependent calls through generic wrappers. + * + * Results: + * Success or failure. + * + * Side effects: + * Memory is allocated. + * + *------------------------------------------------------------------------------ + */ + +static int +vmci_queue_pair_alloc_guest_work(struct vmci_handle *handle, + struct vmci_queue **produce_q, uint64_t produce_size, + struct vmci_queue **consume_q, uint64_t consume_size, vmci_id peer, + uint32_t flags, vmci_privilege_flags priv_flags) +{ + struct qp_guest_endpoint *queue_pair_entry = NULL; + void *my_consume_q = NULL; + void *my_produce_q = NULL; + const uint64_t num_consume_pages = CEILING(consume_size, PAGE_SIZE) + 1; + const uint64_t num_produce_pages = CEILING(produce_size, PAGE_SIZE) + 1; + int result; + + ASSERT(handle && produce_q && consume_q && + (produce_size || consume_size)); + + if (priv_flags != VMCI_NO_PRIVILEGE_FLAGS) + return (VMCI_ERROR_NO_ACCESS); + + vmci_mutex_acquire(&qp_guest_endpoints.mutex); + + if ((atomic_load_int(&qp_guest_endpoints.hibernate) == 1) && + !(flags & VMCI_QPFLAG_LOCAL)) { + /* + * While guest OS is in hibernate state, creating non-local + * queue pairs is not allowed after the point where the VMCI + * guest driver converted the existing queue pairs to local + * ones. + */ + + result = VMCI_ERROR_UNAVAILABLE; + goto error; + } + + if ((queue_pair_entry = + (struct qp_guest_endpoint *)queue_pair_list_find_entry( + &qp_guest_endpoints, *handle)) != NULL) { + if (queue_pair_entry->qp.flags & VMCI_QPFLAG_LOCAL) { + /* Local attach case. */ + if (queue_pair_entry->qp.ref_count > 1) { + VMCI_LOG_DEBUG(LGPFX"Error attempting to " + "attach more than once.\n"); + result = VMCI_ERROR_UNAVAILABLE; + goto error_keep_entry; + } + + if (queue_pair_entry->qp.produce_size != consume_size || + queue_pair_entry->qp.consume_size != produce_size || + queue_pair_entry->qp.flags != + (flags & ~VMCI_QPFLAG_ATTACH_ONLY)) { + VMCI_LOG_DEBUG(LGPFX"Error mismatched " + "queue pair in local attach.\n"); + result = VMCI_ERROR_QUEUEPAIR_MISMATCH; + goto error_keep_entry; + } + + /* + * Do a local attach. We swap the consume and produce + * queues for the attacher and deliver an attach event. + */ + result = queue_pair_notify_peer_local(true, *handle); + if (result < VMCI_SUCCESS) + goto error_keep_entry; + my_produce_q = queue_pair_entry->consume_q; + my_consume_q = queue_pair_entry->produce_q; + goto out; + } + result = VMCI_ERROR_ALREADY_EXISTS; + goto error_keep_entry; + } + + my_produce_q = vmci_alloc_queue(produce_size, flags); + if (!my_produce_q) { + VMCI_LOG_WARNING(LGPFX"Error allocating pages for produce " + "queue.\n"); + result = VMCI_ERROR_NO_MEM; + goto error; + } + + my_consume_q = vmci_alloc_queue(consume_size, flags); + if (!my_consume_q) { + VMCI_LOG_WARNING(LGPFX"Error allocating pages for consume " + "queue.\n"); + result = VMCI_ERROR_NO_MEM; + goto error; + } + + queue_pair_entry = qp_guest_endpoint_create(*handle, peer, flags, + produce_size, consume_size, my_produce_q, my_consume_q); + if (!queue_pair_entry) { + VMCI_LOG_WARNING(LGPFX"Error allocating memory in %s.\n", + __FUNCTION__); + result = VMCI_ERROR_NO_MEM; + goto error; + } + + result = vmci_alloc_ppn_set(my_produce_q, num_produce_pages, + my_consume_q, num_consume_pages, &queue_pair_entry->ppn_set); + if (result < VMCI_SUCCESS) { + VMCI_LOG_WARNING(LGPFX"vmci_alloc_ppn_set failed.\n"); + goto error; + } + + /* + * It's only necessary to notify the host if this queue pair will be + * attached to from another context. + */ + if (queue_pair_entry->qp.flags & VMCI_QPFLAG_LOCAL) { + /* Local create case. */ + vmci_id context_id = vmci_get_context_id(); + + /* + * Enforce similar checks on local queue pairs as we do for + * regular ones. The handle's context must match the creator + * or attacher context id (here they are both the current + * context id) and the attach-only flag cannot exist during + * create. We also ensure specified peer is this context or + * an invalid one. + */ + if (queue_pair_entry->qp.handle.context != context_id || + (queue_pair_entry->qp.peer != VMCI_INVALID_ID && + queue_pair_entry->qp.peer != context_id)) { + result = VMCI_ERROR_NO_ACCESS; + goto error; + } + + if (queue_pair_entry->qp.flags & VMCI_QPFLAG_ATTACH_ONLY) { + result = VMCI_ERROR_NOT_FOUND; + goto error; + } + } else { + result = vmci_queue_pair_alloc_hypercall(queue_pair_entry); + if (result < VMCI_SUCCESS) { + VMCI_LOG_WARNING( + LGPFX"vmci_queue_pair_alloc_hypercall result = " + "%d.\n", result); + goto error; + } + } + + queue_pair_list_add_entry(&qp_guest_endpoints, &queue_pair_entry->qp); + +out: + queue_pair_entry->qp.ref_count++; + *handle = queue_pair_entry->qp.handle; + *produce_q = (struct vmci_queue *)my_produce_q; + *consume_q = (struct vmci_queue *)my_consume_q; + + /* + * We should initialize the queue pair header pages on a local queue + * pair create. For non-local queue pairs, the hypervisor initializes + * the header pages in the create step. + */ + if ((queue_pair_entry->qp.flags & VMCI_QPFLAG_LOCAL) && + queue_pair_entry->qp.ref_count == 1) { + vmci_queue_header_init((*produce_q)->q_header, *handle); + vmci_queue_header_init((*consume_q)->q_header, *handle); + } + + vmci_mutex_release(&qp_guest_endpoints.mutex); + + return (VMCI_SUCCESS); + +error: + vmci_mutex_release(&qp_guest_endpoints.mutex); + if (queue_pair_entry) { + /* The queues will be freed inside the destroy routine. */ + qp_guest_endpoint_destroy(queue_pair_entry); + } else { + if (my_produce_q) + vmci_free_queue(my_produce_q, produce_size); + if (my_consume_q) + vmci_free_queue(my_consume_q, consume_size); + } + return (result); + +error_keep_entry: + /* This path should only be used when an existing entry was found. */ + ASSERT(queue_pair_entry->qp.ref_count > 0); + vmci_mutex_release(&qp_guest_endpoints.mutex); + return (result); +} + +/* + *------------------------------------------------------------------------------ + * + * vmci_queue_pair_detach_hypercall -- + * + * Helper to make a QueuePairDetach hypercall when the driver is supporting + * a guest device. + * + * Results: + * Result of the hypercall. + * + * Side effects: + * None. + * + *------------------------------------------------------------------------------ + */ + +int +vmci_queue_pair_detach_hypercall(struct vmci_handle handle) +{ + struct vmci_queue_pair_detach_msg detach_msg; + + detach_msg.hdr.dst = VMCI_MAKE_HANDLE(VMCI_HYPERVISOR_CONTEXT_ID, + VMCI_QUEUEPAIR_DETACH); + detach_msg.hdr.src = VMCI_ANON_SRC_HANDLE; + detach_msg.hdr.payload_size = sizeof(handle); + detach_msg.handle = handle; + + return (vmci_send_datagram((struct vmci_datagram *)&detach_msg)); +} + +/* + *------------------------------------------------------------------------------ + * + * vmci_queue_pair_detach_guest_work -- + * + * Helper for VMCI QueuePair detach interface. Frees the physical pages for + * the queue pair. + * + * Results: + * Success or failure. + * + * Side effects: + * Memory may be freed. + * + *------------------------------------------------------------------------------ + */ + +static int +vmci_queue_pair_detach_guest_work(struct vmci_handle handle) +{ + struct qp_guest_endpoint *entry; + int result; + uint32_t ref_count; + + ASSERT(!VMCI_HANDLE_INVALID(handle)); + + vmci_mutex_acquire(&qp_guest_endpoints.mutex); + + entry = (struct qp_guest_endpoint *)queue_pair_list_find_entry( + &qp_guest_endpoints, handle); + if (!entry) { + vmci_mutex_release(&qp_guest_endpoints.mutex); + return (VMCI_ERROR_NOT_FOUND); + } + + ASSERT(entry->qp.ref_count >= 1); + + if (entry->qp.flags & VMCI_QPFLAG_LOCAL) { + result = VMCI_SUCCESS; + + if (entry->qp.ref_count > 1) { + result = queue_pair_notify_peer_local(false, handle); + + /* + * We can fail to notify a local queuepair because we + * can't allocate. We still want to release the entry + * if that happens, so don't bail out yet. + */ + } + } else { + result = vmci_queue_pair_detach_hypercall(handle); + if (entry->hibernate_failure) { + if (result == VMCI_ERROR_NOT_FOUND) { + + /* + * If a queue pair detach failed when entering + * hibernation, the guest driver and the device + * may disagree on its existence when coming + * out of hibernation. The guest driver will + * regard it as a non-local queue pair, but + * the device state is gone, since the device + * has been powered off. In this case, we + * treat the queue pair as a local queue pair + * with no peer. + */ + + ASSERT(entry->qp.ref_count == 1); + result = VMCI_SUCCESS; + } + } + if (result < VMCI_SUCCESS) { + + /* + * We failed to notify a non-local queuepair. That other + * queuepair might still be accessing the shared + * memory, so don't release the entry yet. It will get + * cleaned up by vmci_queue_pair_Exit() if necessary + * (assuming we are going away, otherwise why did this + * fail?). + */ + + vmci_mutex_release(&qp_guest_endpoints.mutex); + return (result); + } + } + + /* + * If we get here then we either failed to notify a local queuepair, or + * we succeeded in all cases. Release the entry if required. + */ + + entry->qp.ref_count--; + if (entry->qp.ref_count == 0) + queue_pair_list_remove_entry(&qp_guest_endpoints, &entry->qp); + + /* If we didn't remove the entry, this could change once we unlock. */ + ref_count = entry ? entry->qp.ref_count : + 0xffffffff; /* + * Value does not matter, silence the + * compiler. + */ + + vmci_mutex_release(&qp_guest_endpoints.mutex); + + if (ref_count == 0) + qp_guest_endpoint_destroy(entry); + return (result); +} + +/* + *------------------------------------------------------------------------------ + * + * queue_pair_notify_peer_local -- + * + * Dispatches a queue pair event message directly into the local event + * queue. + * + * Results: + * VMCI_SUCCESS on success, error code otherwise + * + * Side effects: + * None. + * + *------------------------------------------------------------------------------ + */ + +static int +queue_pair_notify_peer_local(bool attach, struct vmci_handle handle) +{ + struct vmci_event_msg *e_msg; + struct vmci_event_payload_qp *e_payload; + /* buf is only 48 bytes. */ + vmci_id context_id; + context_id = vmci_get_context_id(); + char buf[sizeof(*e_msg) + sizeof(*e_payload)]; + + e_msg = (struct vmci_event_msg *)buf; + e_payload = vmci_event_msg_payload(e_msg); + + e_msg->hdr.dst = VMCI_MAKE_HANDLE(context_id, VMCI_EVENT_HANDLER); + e_msg->hdr.src = VMCI_MAKE_HANDLE(VMCI_HYPERVISOR_CONTEXT_ID, + VMCI_CONTEXT_RESOURCE_ID); + e_msg->hdr.payload_size = sizeof(*e_msg) + sizeof(*e_payload) - + sizeof(e_msg->hdr); + e_msg->event_data.event = attach ? VMCI_EVENT_QP_PEER_ATTACH : + VMCI_EVENT_QP_PEER_DETACH; + e_payload->peer_id = context_id; + e_payload->handle = handle; + + return (vmci_event_dispatch((struct vmci_datagram *)e_msg)); +} Index: sys/dev/vmware/vmci/vmci_resource.h =================================================================== --- /dev/null +++ sys/dev/vmware/vmci/vmci_resource.h @@ -0,0 +1,56 @@ +/*- + * Copyright (c) 2018 VMware, Inc. All Rights Reserved. + * + * SPDX-License-Identifier: (BSD-2-Clause AND GPL-2.0) + */ + +/* VMCI Resource Access Control API. */ + +#ifndef _VMCI_RESOURCE_H_ +#define _VMCI_RESOURCE_H_ + +#include "vmci_defs.h" +#include "vmci_hashtable.h" +#include "vmci_kernel_if.h" + +#define RESOURCE_CONTAINER(ptr, type, member) \ + ((type *)((char *)(ptr) - offsetof(type, member))) + +typedef void(*vmci_resource_free_cb)(void *resource); + +typedef enum { + VMCI_RESOURCE_TYPE_ANY, + VMCI_RESOURCE_TYPE_API, + VMCI_RESOURCE_TYPE_GROUP, + VMCI_RESOURCE_TYPE_DATAGRAM, + VMCI_RESOURCE_TYPE_DOORBELL, +} vmci_resource_type; + +struct vmci_resource { + struct vmci_hash_entry hash_entry; + vmci_resource_type type; + /* Callback to free container object when refCount is 0. */ + vmci_resource_free_cb container_free_cb; + /* Container object reference. */ + void *container_object; +}; + +int vmci_resource_init(void); +void vmci_resource_exit(void); +void vmci_resource_sync(void); + +vmci_id vmci_resource_get_id(vmci_id context_id); + +int vmci_resource_add(struct vmci_resource *resource, + vmci_resource_type resource_type, + struct vmci_handle resource_handle, + vmci_resource_free_cb container_free_cb, void *container_object); +void vmci_resource_remove(struct vmci_handle resource_handle, + vmci_resource_type resource_type); +struct vmci_resource *vmci_resource_get(struct vmci_handle resource_handle, + vmci_resource_type resource_type); +void vmci_resource_hold(struct vmci_resource *resource); +int vmci_resource_release(struct vmci_resource *resource); +struct vmci_handle vmci_resource_handle(struct vmci_resource *resource); + +#endif /* !_VMCI_RESOURCE_H_ */ Index: sys/dev/vmware/vmci/vmci_resource.c =================================================================== --- /dev/null +++ sys/dev/vmware/vmci/vmci_resource.c @@ -0,0 +1,395 @@ +/*- + * Copyright (c) 2018 VMware, Inc. All Rights Reserved. + * + * SPDX-License-Identifier: (BSD-2-Clause AND GPL-2.0) + */ + +/* Implementation of the VMCI Resource Access Control API. */ + +#include "vmci_driver.h" +#include "vmci_kernel_defs.h" +#include "vmci_resource.h" + +#define LGPFX "vmci_resource: " + +/* 0 through VMCI_RESERVED_RESOURCE_ID_MAX are reserved. */ +static uint32_t resource_id = VMCI_RESERVED_RESOURCE_ID_MAX + 1; +static vmci_lock resource_id_lock; + +static void vmci_resource_do_remove(struct vmci_resource *resource); + +static struct vmci_hashtable *resource_table = NULL; + +/* Public Resource Access Control API. */ + +/* + *------------------------------------------------------------------------------ + * + * vmci_resource_init -- + * + * Initializes the VMCI Resource Access Control API. Creates a hashtable to + * hold all resources, and registers vectors and callbacks for hypercalls. + * + * Results: + * None. + * + * Side effects: + * None. + * + *------------------------------------------------------------------------------ + */ + +int +vmci_resource_init(void) +{ + int err; + + err = vmci_init_lock(&resource_id_lock, "VMCI RID lock"); + if (err < VMCI_SUCCESS) + return (err); + + resource_table = vmci_hashtable_create(128); + if (resource_table == NULL) { + VMCI_LOG_WARNING((LGPFX"Failed creating a resource hash table " + "for VMCI.\n")); + vmci_cleanup_lock(&resource_id_lock); + return (VMCI_ERROR_NO_MEM); + } + + return (VMCI_SUCCESS); +} + +/* + *------------------------------------------------------------------------------ + * + * vmci_resource_exit -- + * + * Cleans up resources. + * + * Results: + * None. + * + * Side effects: + * None. + * + *------------------------------------------------------------------------------ + */ + +void +vmci_resource_exit(void) +{ + + /* Cleanup resources.*/ + vmci_cleanup_lock(&resource_id_lock); + + if (resource_table) + vmci_hashtable_destroy(resource_table); +} + +/* + *------------------------------------------------------------------------------ + * + * vmci_resource_get_id -- + * + * Return resource ID. The first VMCI_RESERVED_RESOURCE_ID_MAX are reserved + * so we start from its value + 1. + * + * Result: + * VMCI resource id on success, VMCI_INVALID_ID on failure. + * + * Side effects: + * None. + * + * + *------------------------------------------------------------------------------ + */ + +vmci_id +vmci_resource_get_id(vmci_id context_id) +{ + vmci_id current_rid; + vmci_id old_rid; + bool found_rid; + + old_rid = resource_id; + found_rid = false; + + /* + * Generate a unique resource ID. Keep on trying until we wrap around + * in the RID space. + */ + ASSERT(old_rid > VMCI_RESERVED_RESOURCE_ID_MAX); + + do { + struct vmci_handle handle; + + vmci_grab_lock(&resource_id_lock); + current_rid = resource_id; + handle = VMCI_MAKE_HANDLE(context_id, current_rid); + resource_id++; + if (UNLIKELY(resource_id == VMCI_INVALID_ID)) { + /* Skip the reserved rids. */ + resource_id = VMCI_RESERVED_RESOURCE_ID_MAX + 1; + } + vmci_release_lock(&resource_id_lock); + found_rid = !vmci_hashtable_entry_exists(resource_table, + handle); + } while (!found_rid && resource_id != old_rid); + + if (UNLIKELY(!found_rid)) + return (VMCI_INVALID_ID); + else + return (current_rid); +} + +/* + *------------------------------------------------------------------------------ + * + * vmci_resource_add -- + * + * Add resource to hashtable. + * + * Results: + * VMCI_SUCCESS if successful, error code if not. + * + * Side effects: + * None. + * + *------------------------------------------------------------------------------ + */ + +int +vmci_resource_add(struct vmci_resource *resource, + vmci_resource_type resource_type, struct vmci_handle resource_handle, + vmci_resource_free_cb container_free_cb, void *container_object) +{ + int result; + + ASSERT(resource); + + if (VMCI_HANDLE_EQUAL(resource_handle, VMCI_INVALID_HANDLE)) { + VMCI_LOG_DEBUG(LGPFX"Invalid argument resource " + "(handle=0x%x:0x%x).\n", resource_handle.context, + resource_handle.resource); + return (VMCI_ERROR_INVALID_ARGS); + } + + vmci_hashtable_init_entry(&resource->hash_entry, resource_handle); + resource->type = resource_type; + resource->container_free_cb = container_free_cb; + resource->container_object = container_object; + + /* Add resource to hashtable. */ + result = vmci_hashtable_add_entry(resource_table, + &resource->hash_entry); + if (result != VMCI_SUCCESS) { + VMCI_LOG_DEBUG(LGPFX"Failed to add entry to hash table " + "(result=%d).\n", result); + return (result); + } + + return (result); +} + +/* + *------------------------------------------------------------------------------ + * + * vmci_resource_remove -- + * + * Remove resource from hashtable. + * + * Results: + * None. + * + * Side effects: + * None. + * + *------------------------------------------------------------------------------ + */ + +void +vmci_resource_remove(struct vmci_handle resource_handle, + vmci_resource_type resource_type) +{ + struct vmci_resource *resource; + + resource = vmci_resource_get(resource_handle, resource_type); + if (resource == NULL) + return; + + /* Remove resource from hashtable. */ + vmci_hashtable_remove_entry(resource_table, &resource->hash_entry); + + vmci_resource_release(resource); + /* resource could be freed by now. */ +} + +/* + *------------------------------------------------------------------------------ + * + * vmci_resource_get -- + * + * Get resource from hashtable. + * + * Results: + * Resource if successful. Otherwise NULL. + * + * Side effects: + * None. + * + *------------------------------------------------------------------------------ + */ + +struct vmci_resource * +vmci_resource_get(struct vmci_handle resource_handle, + vmci_resource_type resource_type) +{ + struct vmci_hash_entry *entry; + struct vmci_resource *resource; + + entry = vmci_hashtable_get_entry(resource_table, resource_handle); + if (entry == NULL) + return (NULL); + resource = RESOURCE_CONTAINER(entry, struct vmci_resource, hash_entry); + if (resource_type == VMCI_RESOURCE_TYPE_ANY || + resource->type == resource_type) { + return (resource); + } + vmci_hashtable_release_entry(resource_table, entry); + return (NULL); +} + +/* + *------------------------------------------------------------------------------ + * + * vmci_resource_hold -- + * + * Hold the given resource. This will hold the hashtable entry. This is like + * doing a Get() but without having to lookup the resource by handle. + * + * Results: + * None. + * + * Side effects: + * None. + * + *------------------------------------------------------------------------------ + */ + +void +vmci_resource_hold(struct vmci_resource *resource) +{ + + ASSERT(resource); + vmci_hashtable_hold_entry(resource_table, &resource->hash_entry); +} + +/* + *------------------------------------------------------------------------------ + * + * vmci_resource_do_remove -- + * + * Deallocates data structures associated with the given resource and + * invoke any call back registered for the resource. + * + * Results: + * None. + * + * Side effects: + * May deallocate memory and invoke a callback for the removed resource. + * + *------------------------------------------------------------------------------ + */ + +static void inline +vmci_resource_do_remove(struct vmci_resource *resource) +{ + + ASSERT(resource); + + if (resource->container_free_cb) { + resource->container_free_cb(resource->container_object); + /* Resource has been freed don't dereference it. */ + } +} + +/* + *------------------------------------------------------------------------------ + * + * vmci_resource_release -- + * + * Results: + * None. + * + * Side effects: + * Resource's containerFreeCB will get called if last reference. + * + *------------------------------------------------------------------------------ + */ + +int +vmci_resource_release(struct vmci_resource *resource) +{ + int result; + + ASSERT(resource); + + result = vmci_hashtable_release_entry(resource_table, + &resource->hash_entry); + if (result == VMCI_SUCCESS_ENTRY_DEAD) + vmci_resource_do_remove(resource); + + /* + * We propagate the information back to caller in case it wants to know + * whether entry was freed. + */ + return (result); +} + +/* + *------------------------------------------------------------------------------ + * + * vmci_resource_handle -- + * + * Get the handle for the given resource. + * + * Results: + * The resource's associated handle. + * + * Side effects: + * None. + * + *------------------------------------------------------------------------------ + */ + +struct vmci_handle +vmci_resource_handle(struct vmci_resource *resource) +{ + + ASSERT(resource); + return (resource->hash_entry.handle); +} + +/* + *------------------------------------------------------------------------------ + * + * vmci_resource_sync -- + * + * Use this as a synchronization point when setting globals, for example, + * during device shutdown. + * + * Results: + * None. + * + * Side effects: + * None. + * + *------------------------------------------------------------------------------ + */ + +void +vmci_resource_sync(void) +{ + + vmci_hashtable_sync(resource_table); +} Index: sys/dev/vmware/vmci/vmci_utils.h =================================================================== --- /dev/null +++ sys/dev/vmware/vmci/vmci_utils.h @@ -0,0 +1,41 @@ +/*- + * Copyright (c) 2018 VMware, Inc. All Rights Reserved. + * + * SPDX-License-Identifier: (BSD-2-Clause AND GPL-2.0) + */ + +/* Some common utilities used by the VMCI kernel module. */ + +#ifndef _VMCI_UTILS_H_ +#define _VMCI_UTILS_H_ + +/* + *------------------------------------------------------------------------------ + * + * vmci_hash_id -- + * + * Hash function used by the Simple Datagram API. Hashes only a VMCI ID (not + * the full VMCI handle). Based on the djb2 hash function by Dan Bernstein. + * + * Result: + * Returns guest call size. + * + * Side effects: + * None. + * + *------------------------------------------------------------------------------ + */ + +static inline int +vmci_hash_id(vmci_id id, unsigned size) +{ + unsigned i; + int hash = 5381; + + for (i = 0; i < sizeof(id); i++) + hash = ((hash << 5) + hash) + (uint8_t)(id >> (i * 8)); + + return (hash & (size - 1)); +} + +#endif /* !_VMCI_UTILS_H_ */ Index: sys/modules/vmware/Makefile =================================================================== --- sys/modules/vmware/Makefile +++ sys/modules/vmware/Makefile @@ -23,6 +23,6 @@ # SUCH DAMAGE. # -SUBDIR= vmxnet3 +SUBDIR= vmci vmxnet3 .include Index: sys/modules/vmware/vmci/Makefile =================================================================== --- /dev/null +++ sys/modules/vmware/vmci/Makefile @@ -0,0 +1,15 @@ +# +# Copyright (c) 2018 VMware, Inc. All Rights Reserved. +# +# SPDX-License-Identifier: (BSD-2-Clause AND GPL-2.0) +# + +.PATH: ${SRCTOP}/sys/dev/vmware/vmci + +KMOD= vmci +SRCS= vmci.c vmci_datagram.c vmci_doorbell.c vmci_driver.c vmci_event.c +SRCS+= vmci_hashtable.c vmci_kernel_if.c vmci_qpair.c vmci_queue_pair.c +SRCS+= vmci_resource.c +SRCS+= device_if.h bus_if.h pci_if.h + +.include