diff --git a/sys/dev/vmware/vmci/vmci.c b/sys/dev/vmware/vmci/vmci.c index bbf17bbe7e41..8adcb7f532b7 100644 --- a/sys/dev/vmware/vmci/vmci.c +++ b/sys/dev/vmware/vmci/vmci.c @@ -1,1186 +1,1189 @@ /*- * Copyright (c) 2018 VMware, Inc. * * SPDX-License-Identifier: (BSD-2-Clause OR GPL-2.0) */ /* Driver for VMware Virtual Machine Communication Interface (VMCI) device. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include "vmci.h" #include "vmci_doorbell.h" #include "vmci_driver.h" #include "vmci_kernel_defs.h" #include "vmci_queue_pair.h" static int vmci_probe(device_t); static int vmci_attach(device_t); static int vmci_detach(device_t); static int vmci_shutdown(device_t); static int vmci_map_bars(struct vmci_softc *); static void vmci_unmap_bars(struct vmci_softc *); static int vmci_config_capabilities(struct vmci_softc *); static int vmci_dma_malloc_int(struct vmci_softc *, bus_size_t, bus_size_t, struct vmci_dma_alloc *); static void vmci_dma_free_int(struct vmci_softc *, struct vmci_dma_alloc *); static int vmci_config_interrupts(struct vmci_softc *); static int vmci_config_interrupt(struct vmci_softc *); static int vmci_check_intr_cnt(struct vmci_softc *); static int vmci_allocate_interrupt_resources(struct vmci_softc *); static int vmci_setup_interrupts(struct vmci_softc *); static void vmci_dismantle_interrupts(struct vmci_softc *); static void vmci_interrupt(void *); static void vmci_interrupt_bm(void *); static void dispatch_datagrams(void *, int); static void process_bitmap(void *, int); static void vmci_delayed_work_fn_cb(void *context, int data); static device_method_t vmci_methods[] = { /* Device interface. */ DEVMETHOD(device_probe, vmci_probe), DEVMETHOD(device_attach, vmci_attach), DEVMETHOD(device_detach, vmci_detach), DEVMETHOD(device_shutdown, vmci_shutdown), DEVMETHOD_END }; static driver_t vmci_driver = { "vmci", vmci_methods, sizeof(struct vmci_softc) }; static devclass_t vmci_devclass; DRIVER_MODULE(vmci, pci, vmci_driver, vmci_devclass, 0, 0); MODULE_VERSION(vmci, VMCI_VERSION); const struct { uint16_t vendor; uint16_t device; const char *desc; } vmci_ids[] = { { VMCI_VMWARE_VENDOR_ID, VMCI_VMWARE_DEVICE_ID, "VMware Virtual Machine Communication Interface" }, }; MODULE_PNP_INFO("U16:vendor;U16:device;D:#", pci, vmci, vmci_ids, nitems(vmci_ids)); MODULE_DEPEND(vmci, pci, 1, 1, 1); static struct vmci_softc *vmci_sc; #define LGPFX "vmci: " /* * Allocate a buffer for incoming datagrams globally to avoid repeated * allocation in the interrupt handler's atomic context. */ static uint8_t *data_buffer = NULL; static uint32_t data_buffer_size = VMCI_MAX_DG_SIZE; struct vmci_delayed_work_info { vmci_work_fn *work_fn; void *data; vmci_list_item(vmci_delayed_work_info) entry; }; /* *------------------------------------------------------------------------------ * * vmci_probe -- * * Probe to see if the VMCI device is present. * * Results: * BUS_PROBE_DEFAULT if device exists, ENXIO otherwise. * * Side effects: * None. * *------------------------------------------------------------------------------ */ static int vmci_probe(device_t dev) { if (pci_get_vendor(dev) == vmci_ids[0].vendor && pci_get_device(dev) == vmci_ids[0].device) { device_set_desc(dev, vmci_ids[0].desc); return (BUS_PROBE_DEFAULT); } return (ENXIO); } /* *------------------------------------------------------------------------------ * * vmci_attach -- * * Attach VMCI device to the system after vmci_probe() has been called and * the device has been detected. * * Results: * 0 if success, ENXIO otherwise. * * Side effects: * None. * *------------------------------------------------------------------------------ */ static int vmci_attach(device_t dev) { struct vmci_softc *sc; int error, i; sc = device_get_softc(dev); sc->vmci_dev = dev; vmci_sc = sc; data_buffer = NULL; sc->vmci_num_intr = 0; for (i = 0; i < VMCI_MAX_INTRS; i++) { sc->vmci_intrs[i].vmci_irq = NULL; sc->vmci_intrs[i].vmci_handler = NULL; } TASK_INIT(&sc->vmci_interrupt_dq_task, 0, dispatch_datagrams, sc); TASK_INIT(&sc->vmci_interrupt_bm_task, 0, process_bitmap, sc); TASK_INIT(&sc->vmci_delayed_work_task, 0, vmci_delayed_work_fn_cb, sc); pci_enable_busmaster(dev); mtx_init(&sc->vmci_spinlock, "VMCI Spinlock", NULL, MTX_SPIN); mtx_init(&sc->vmci_delayed_work_lock, "VMCI Delayed Work Lock", NULL, MTX_DEF); error = vmci_map_bars(sc); if (error) { VMCI_LOG_ERROR(LGPFX"Failed to map PCI BARs.\n"); goto fail; } error = vmci_config_capabilities(sc); if (error) { VMCI_LOG_ERROR(LGPFX"Failed to configure capabilities.\n"); goto fail; } vmci_list_init(&sc->vmci_delayed_work_infos); vmci_components_init(); vmci_util_init(); error = vmci_qp_guest_endpoints_init(); if (error) { VMCI_LOG_ERROR(LGPFX"vmci_qp_guest_endpoints_init failed.\n"); goto fail; } error = vmci_config_interrupts(sc); if (error) VMCI_LOG_ERROR(LGPFX"Failed to enable interrupts.\n"); fail: if (error) { vmci_detach(dev); return (ENXIO); } return (0); } /* *------------------------------------------------------------------------------ * * vmci_detach -- * * Detach the VMCI device. * * Results: * 0 * * Side effects: * None. * *------------------------------------------------------------------------------ */ static int vmci_detach(device_t dev) { struct vmci_softc *sc; sc = device_get_softc(dev); vmci_qp_guest_endpoints_exit(); vmci_util_exit(); vmci_dismantle_interrupts(sc); vmci_components_cleanup(); - taskqueue_drain(taskqueue_thread, &sc->vmci_delayed_work_task); - mtx_destroy(&sc->vmci_delayed_work_lock); + if mtx_initialized(&sc->vmci_spinlock) { + taskqueue_drain(taskqueue_thread, &sc->vmci_delayed_work_task); + mtx_destroy(&sc->vmci_delayed_work_lock); + } if (sc->vmci_res0 != NULL) bus_space_write_4(sc->vmci_iot0, sc->vmci_ioh0, VMCI_CONTROL_ADDR, VMCI_CONTROL_RESET); if (sc->vmci_notifications_bitmap.dma_vaddr != NULL) vmci_dma_free(&sc->vmci_notifications_bitmap); vmci_unmap_bars(sc); - mtx_destroy(&sc->vmci_spinlock); + if mtx_initialized(&sc->vmci_spinlock) + mtx_destroy(&sc->vmci_spinlock); pci_disable_busmaster(dev); return (0); } /* *------------------------------------------------------------------------------ * * vmci_shutdown -- * * This function is called during system shutdown. We don't do anything. * * Results: * 0 * * Side effects: * None. * *------------------------------------------------------------------------------ */ static int vmci_shutdown(device_t dev) { return (0); } /* *------------------------------------------------------------------------------ * * vmci_map_bars -- * * Maps the PCI I/O and MMIO BARs. * * Results: * 0 on success, ENXIO otherwise. * * Side effects: * None. * *------------------------------------------------------------------------------ */ static int vmci_map_bars(struct vmci_softc *sc) { int rid; /* Map the PCI I/O BAR: BAR0 */ rid = PCIR_BAR(0); sc->vmci_res0 = bus_alloc_resource_any(sc->vmci_dev, SYS_RES_IOPORT, &rid, RF_ACTIVE); if (sc->vmci_res0 == NULL) { VMCI_LOG_ERROR(LGPFX"Could not map: BAR0\n"); return (ENXIO); } sc->vmci_iot0 = rman_get_bustag(sc->vmci_res0); sc->vmci_ioh0 = rman_get_bushandle(sc->vmci_res0); sc->vmci_ioaddr = rman_get_start(sc->vmci_res0); /* Map the PCI MMIO BAR: BAR1 */ rid = PCIR_BAR(1); sc->vmci_res1 = bus_alloc_resource_any(sc->vmci_dev, SYS_RES_MEMORY, &rid, RF_ACTIVE); if (sc->vmci_res1 == NULL) { VMCI_LOG_ERROR(LGPFX"Could not map: BAR1\n"); return (ENXIO); } sc->vmci_iot1 = rman_get_bustag(sc->vmci_res1); sc->vmci_ioh1 = rman_get_bushandle(sc->vmci_res1); return (0); } /* *------------------------------------------------------------------------------ * * vmci_unmap_bars -- * * Unmaps the VMCI PCI I/O and MMIO BARs. * * Results: * None. * * Side effects: * None. * *------------------------------------------------------------------------------ */ static void vmci_unmap_bars(struct vmci_softc *sc) { int rid; if (sc->vmci_res0 != NULL) { rid = PCIR_BAR(0); bus_release_resource(sc->vmci_dev, SYS_RES_IOPORT, rid, sc->vmci_res0); sc->vmci_res0 = NULL; } if (sc->vmci_res1 != NULL) { rid = PCIR_BAR(1); bus_release_resource(sc->vmci_dev, SYS_RES_MEMORY, rid, sc->vmci_res1); sc->vmci_res1 = NULL; } } /* *------------------------------------------------------------------------------ * * vmci_config_capabilities -- * * Check the VMCI device capabilities and configure the device accordingly. * * Results: * 0 if success, ENODEV otherwise. * * Side effects: * Device capabilities are enabled. * *------------------------------------------------------------------------------ */ static int vmci_config_capabilities(struct vmci_softc *sc) { unsigned long bitmap_PPN; int error; /* * Verify that the VMCI device supports the capabilities that we * need. Datagrams are necessary and notifications will be used * if the device supports it. */ sc->capabilities = bus_space_read_4(sc->vmci_iot0, sc->vmci_ioh0, VMCI_CAPS_ADDR); if ((sc->capabilities & VMCI_CAPS_DATAGRAM) == 0) { VMCI_LOG_ERROR(LGPFX"VMCI device does not support " "datagrams.\n"); return (ENODEV); } if (sc->capabilities & VMCI_CAPS_NOTIFICATIONS) { sc->capabilities = VMCI_CAPS_DATAGRAM; error = vmci_dma_malloc(PAGE_SIZE, 1, &sc->vmci_notifications_bitmap); if (error) VMCI_LOG_ERROR(LGPFX"Failed to alloc memory for " "notification bitmap.\n"); else { memset(sc->vmci_notifications_bitmap.dma_vaddr, 0, PAGE_SIZE); sc->capabilities |= VMCI_CAPS_NOTIFICATIONS; } } else sc->capabilities = VMCI_CAPS_DATAGRAM; /* Let the host know which capabilities we intend to use. */ bus_space_write_4(sc->vmci_iot0, sc->vmci_ioh0, VMCI_CAPS_ADDR, sc->capabilities); /* * Register notification bitmap with device if that capability is * used. */ if (sc->capabilities & VMCI_CAPS_NOTIFICATIONS) { bitmap_PPN = sc->vmci_notifications_bitmap.dma_paddr >> PAGE_SHIFT; vmci_register_notification_bitmap(bitmap_PPN); } /* Check host capabilities. */ if (!vmci_check_host_capabilities()) return (ENODEV); return (0); } /* *------------------------------------------------------------------------------ * * vmci_dmamap_cb -- * * Callback to receive mapping information resulting from the load of a * bus_dmamap_t via bus_dmamap_load() * * Results: * None. * * Side effects: * None. * *------------------------------------------------------------------------------ */ static void vmci_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error) { bus_addr_t *baddr = arg; if (error == 0) *baddr = segs->ds_addr; } /* *------------------------------------------------------------------------------ * * vmci_dma_malloc_int -- * * Internal function that allocates DMA memory. * * Results: * 0 if success. * ENOMEM if insufficient memory. * EINPROGRESS if mapping is deferred. * EINVAL if the request was invalid. * * Side effects: * DMA memory is allocated. * *------------------------------------------------------------------------------ */ static int vmci_dma_malloc_int(struct vmci_softc *sc, bus_size_t size, bus_size_t align, struct vmci_dma_alloc *dma) { int error; bzero(dma, sizeof(struct vmci_dma_alloc)); error = bus_dma_tag_create(bus_get_dma_tag(vmci_sc->vmci_dev), align, 0, /* alignment, bounds */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ size, /* maxsize */ 1, /* nsegments */ size, /* maxsegsize */ BUS_DMA_ALLOCNOW, /* flags */ NULL, /* lockfunc */ NULL, /* lockfuncarg */ &dma->dma_tag); if (error) { VMCI_LOG_ERROR(LGPFX"bus_dma_tag_create failed: %d\n", error); goto fail; } error = bus_dmamem_alloc(dma->dma_tag, (void **)&dma->dma_vaddr, BUS_DMA_ZERO | BUS_DMA_NOWAIT, &dma->dma_map); if (error) { VMCI_LOG_ERROR(LGPFX"bus_dmamem_alloc failed: %d\n", error); goto fail; } error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr, size, vmci_dmamap_cb, &dma->dma_paddr, BUS_DMA_NOWAIT); if (error) { VMCI_LOG_ERROR(LGPFX"bus_dmamap_load failed: %d\n", error); goto fail; } dma->dma_size = size; fail: if (error) vmci_dma_free(dma); return (error); } /* *------------------------------------------------------------------------------ * * vmci_dma_malloc -- * * This function is a wrapper around vmci_dma_malloc_int for callers * outside of this module. Since we only support a single VMCI device, this * wrapper provides access to the device softc structure. * * Results: * 0 if success. * ENOMEM if insufficient memory. * EINPROGRESS if mapping is deferred. * EINVAL if the request was invalid. * * Side effects: * DMA memory is allocated. * *------------------------------------------------------------------------------ */ int vmci_dma_malloc(bus_size_t size, bus_size_t align, struct vmci_dma_alloc *dma) { return (vmci_dma_malloc_int(vmci_sc, size, align, dma)); } /* *------------------------------------------------------------------------------ * * vmci_dma_free_int -- * * Internal function that frees DMA memory. * * Results: * None. * * Side effects: * Frees DMA memory. * *------------------------------------------------------------------------------ */ static void vmci_dma_free_int(struct vmci_softc *sc, struct vmci_dma_alloc *dma) { if (dma->dma_tag != NULL) { if (dma->dma_paddr != 0) { bus_dmamap_sync(dma->dma_tag, dma->dma_map, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(dma->dma_tag, dma->dma_map); } if (dma->dma_vaddr != NULL) bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map); bus_dma_tag_destroy(dma->dma_tag); } bzero(dma, sizeof(struct vmci_dma_alloc)); } /* *------------------------------------------------------------------------------ * * vmci_dma_free -- * * This function is a wrapper around vmci_dma_free_int for callers outside * of this module. Since we only support a single VMCI device, this wrapper * provides access to the device softc structure. * * Results: * None. * * Side effects: * Frees DMA memory. * *------------------------------------------------------------------------------ */ void vmci_dma_free(struct vmci_dma_alloc *dma) { vmci_dma_free_int(vmci_sc, dma); } /* *------------------------------------------------------------------------------ * * vmci_config_interrupts -- * * Configures and enables interrupts. Try to configure MSI-X. If this fails, * try to configure MSI. If even this fails, try legacy interrupts. * * Results: * 0 if success. * ENOMEM if insufficient memory. * ENODEV if the device doesn't support interrupts. * ENXIO if the device configuration failed. * * Side effects: * Interrupts get enabled if successful. * *------------------------------------------------------------------------------ */ static int vmci_config_interrupts(struct vmci_softc *sc) { int error; data_buffer = malloc(data_buffer_size, M_DEVBUF, M_ZERO | M_NOWAIT); if (data_buffer == NULL) return (ENOMEM); sc->vmci_intr_type = VMCI_INTR_TYPE_MSIX; error = vmci_config_interrupt(sc); if (error) { sc->vmci_intr_type = VMCI_INTR_TYPE_MSI; error = vmci_config_interrupt(sc); } if (error) { sc->vmci_intr_type = VMCI_INTR_TYPE_INTX; error = vmci_config_interrupt(sc); } if (error) return (error); /* Enable specific interrupt bits. */ if (sc->capabilities & VMCI_CAPS_NOTIFICATIONS) bus_space_write_4(sc->vmci_iot0, sc->vmci_ioh0, VMCI_IMR_ADDR, VMCI_IMR_DATAGRAM | VMCI_IMR_NOTIFICATION); else bus_space_write_4(sc->vmci_iot0, sc->vmci_ioh0, VMCI_IMR_ADDR, VMCI_IMR_DATAGRAM); /* Enable interrupts. */ bus_space_write_4(sc->vmci_iot0, sc->vmci_ioh0, VMCI_CONTROL_ADDR, VMCI_CONTROL_INT_ENABLE); return (0); } /* *------------------------------------------------------------------------------ * * vmci_config_interrupt -- * * Check the number of interrupts supported, allocate resources and setup * interrupts. * * Results: * 0 if success. * ENOMEM if insufficient memory. * ENODEV if the device doesn't support interrupts. * ENXIO if the device configuration failed. * * Side effects: * Resources get allocated and interrupts get setup (but not enabled) if * successful. * *------------------------------------------------------------------------------ */ static int vmci_config_interrupt(struct vmci_softc *sc) { int error; error = vmci_check_intr_cnt(sc); if (error) return (error); error = vmci_allocate_interrupt_resources(sc); if (error) return (error); error = vmci_setup_interrupts(sc); if (error) return (error); return (0); } /* *------------------------------------------------------------------------------ * * vmci_check_intr_cnt -- * * Check the number of interrupts supported by the device and ask PCI bus * to allocate appropriate number of interrupts. * * Results: * 0 if success. * ENODEV if the device doesn't support any interrupts. * ENXIO if the device configuration failed. * * Side effects: * Resources get allocated on success. * *------------------------------------------------------------------------------ */ static int vmci_check_intr_cnt(struct vmci_softc *sc) { if (sc->vmci_intr_type == VMCI_INTR_TYPE_INTX) { sc->vmci_num_intr = 1; return (0); } /* * Make sure that the device supports the required number of MSI/MSI-X * messages. We try for 2 MSI-X messages but 1 is good too. We need at * least 1 MSI message. */ sc->vmci_num_intr = (sc->vmci_intr_type == VMCI_INTR_TYPE_MSIX) ? pci_msix_count(sc->vmci_dev) : pci_msi_count(sc->vmci_dev); if (!sc->vmci_num_intr) { VMCI_LOG_ERROR(LGPFX"Device does not support any interrupt" " messages"); return (ENODEV); } sc->vmci_num_intr = (sc->vmci_intr_type == VMCI_INTR_TYPE_MSIX) ? VMCI_MAX_INTRS : 1; if (sc->vmci_intr_type == VMCI_INTR_TYPE_MSIX) { if (pci_alloc_msix(sc->vmci_dev, &sc->vmci_num_intr)) return (ENXIO); } else if (sc->vmci_intr_type == VMCI_INTR_TYPE_MSI) { if (pci_alloc_msi(sc->vmci_dev, &sc->vmci_num_intr)) return (ENXIO); } return (0); } /* *------------------------------------------------------------------------------ * * vmci_allocate_interrupt_resources -- * * Allocate resources necessary for interrupts. * * Results: * 0 if success, ENXIO otherwise. * * Side effects: * Resources get allocated on success. * *------------------------------------------------------------------------------ */ static int vmci_allocate_interrupt_resources(struct vmci_softc *sc) { struct resource *irq; int flags, i, rid; flags = RF_ACTIVE; flags |= (sc->vmci_num_intr == 1) ? RF_SHAREABLE : 0; rid = (sc->vmci_intr_type == VMCI_INTR_TYPE_INTX) ? 0 : 1; for (i = 0; i < sc->vmci_num_intr; i++, rid++) { irq = bus_alloc_resource_any(sc->vmci_dev, SYS_RES_IRQ, &rid, flags); if (irq == NULL) return (ENXIO); sc->vmci_intrs[i].vmci_irq = irq; sc->vmci_intrs[i].vmci_rid = rid; } return (0); } /* *------------------------------------------------------------------------------ * * vmci_setup_interrupts -- * * Sets up the interrupts. * * Results: * 0 if success, appropriate error code from bus_setup_intr otherwise. * * Side effects: * Interrupt handler gets attached. * *------------------------------------------------------------------------------ */ static int vmci_setup_interrupts(struct vmci_softc *sc) { struct vmci_interrupt *intr; int error, flags; flags = INTR_TYPE_NET | INTR_MPSAFE; if (sc->vmci_num_intr > 1) flags |= INTR_EXCL; intr = &sc->vmci_intrs[0]; error = bus_setup_intr(sc->vmci_dev, intr->vmci_irq, flags, NULL, vmci_interrupt, NULL, &intr->vmci_handler); if (error) return (error); if (sc->vmci_num_intr == 2) { bus_describe_intr(sc->vmci_dev, intr->vmci_irq, intr->vmci_handler, "dg"); intr = &sc->vmci_intrs[1]; error = bus_setup_intr(sc->vmci_dev, intr->vmci_irq, flags, NULL, vmci_interrupt_bm, NULL, &intr->vmci_handler); if (error) return (error); bus_describe_intr(sc->vmci_dev, intr->vmci_irq, intr->vmci_handler, "bm"); } return (0); } /* *------------------------------------------------------------------------------ * * vmci_interrupt -- * * Interrupt handler for legacy or MSI interrupt, or for first MSI-X * interrupt (vector VMCI_INTR_DATAGRAM). * * Results: * None. * * Side effects: * None. * *------------------------------------------------------------------------------ */ static void vmci_interrupt(void *arg) { if (vmci_sc->vmci_num_intr == 2) taskqueue_enqueue(taskqueue_swi, &vmci_sc->vmci_interrupt_dq_task); else { unsigned int icr; icr = inl(vmci_sc->vmci_ioaddr + VMCI_ICR_ADDR); if (icr == 0 || icr == 0xffffffff) return; if (icr & VMCI_ICR_DATAGRAM) { taskqueue_enqueue(taskqueue_swi, &vmci_sc->vmci_interrupt_dq_task); icr &= ~VMCI_ICR_DATAGRAM; } if (icr & VMCI_ICR_NOTIFICATION) { taskqueue_enqueue(taskqueue_swi, &vmci_sc->vmci_interrupt_bm_task); icr &= ~VMCI_ICR_NOTIFICATION; } if (icr != 0) VMCI_LOG_INFO(LGPFX"Ignoring unknown interrupt " "cause"); } } /* *------------------------------------------------------------------------------ * * vmci_interrupt_bm -- * * Interrupt handler for MSI-X interrupt vector VMCI_INTR_NOTIFICATION, * which is for the notification bitmap. Will only get called if we are * using MSI-X with exclusive vectors. * * Results: * None. * * Side effects: * None. * *------------------------------------------------------------------------------ */ static void vmci_interrupt_bm(void *arg) { ASSERT(vmci_sc->vmci_num_intr == 2); taskqueue_enqueue(taskqueue_swi, &vmci_sc->vmci_interrupt_bm_task); } /* *------------------------------------------------------------------------------ * * dispatch_datagrams -- * * Reads and dispatches incoming datagrams. * * Results: * None. * * Side effects: * Reads data from the device. * *------------------------------------------------------------------------------ */ static void dispatch_datagrams(void *context, int data) { if (data_buffer == NULL) VMCI_LOG_INFO(LGPFX"dispatch_datagrams(): no buffer " "present"); vmci_read_datagrams_from_port((vmci_io_handle) 0, vmci_sc->vmci_ioaddr + VMCI_DATA_IN_ADDR, data_buffer, data_buffer_size); } /* *------------------------------------------------------------------------------ * * process_bitmap -- * * Scans the notification bitmap for raised flags, clears them and handles * the notifications. * * Results: * None. * * Side effects: * None. * *------------------------------------------------------------------------------ */ static void process_bitmap(void *context, int data) { if (vmci_sc->vmci_notifications_bitmap.dma_vaddr == NULL) VMCI_LOG_INFO(LGPFX"process_bitmaps(): no bitmap present"); vmci_scan_notification_bitmap( vmci_sc->vmci_notifications_bitmap.dma_vaddr); } /* *------------------------------------------------------------------------------ * * vmci_dismantle_interrupts -- * * Releases resources, detaches the interrupt handler and drains the task * queue. * * Results: * None. * * Side effects: * No more interrupts. * *------------------------------------------------------------------------------ */ static void vmci_dismantle_interrupts(struct vmci_softc *sc) { struct vmci_interrupt *intr; int i; for (i = 0; i < sc->vmci_num_intr; i++) { intr = &sc->vmci_intrs[i]; if (intr->vmci_handler != NULL) { bus_teardown_intr(sc->vmci_dev, intr->vmci_irq, intr->vmci_handler); intr->vmci_handler = NULL; } if (intr->vmci_irq != NULL) { bus_release_resource(sc->vmci_dev, SYS_RES_IRQ, intr->vmci_rid, intr->vmci_irq); intr->vmci_irq = NULL; intr->vmci_rid = -1; } } if ((sc->vmci_intr_type != VMCI_INTR_TYPE_INTX) && (sc->vmci_num_intr)) pci_release_msi(sc->vmci_dev); taskqueue_drain(taskqueue_swi, &sc->vmci_interrupt_dq_task); taskqueue_drain(taskqueue_swi, &sc->vmci_interrupt_bm_task); if (data_buffer != NULL) free(data_buffer, M_DEVBUF); } /* *------------------------------------------------------------------------------ * * vmci_delayed_work_fn_cb -- * * Callback function that executes the queued up delayed work functions. * * Results: * None. * * Side effects: * None. * *------------------------------------------------------------------------------ */ static void vmci_delayed_work_fn_cb(void *context, int data) { vmci_list(vmci_delayed_work_info) temp_list; vmci_list_init(&temp_list); /* * Swap vmci_delayed_work_infos list with the empty temp_list while * holding a lock. vmci_delayed_work_infos would then be an empty list * and temp_list would contain the elements from the original * vmci_delayed_work_infos. Finally, iterate through temp_list * executing the delayed callbacks. */ mtx_lock(&vmci_sc->vmci_delayed_work_lock); vmci_list_swap(&temp_list, &vmci_sc->vmci_delayed_work_infos, vmci_delayed_work_info, entry); mtx_unlock(&vmci_sc->vmci_delayed_work_lock); while (!vmci_list_empty(&temp_list)) { struct vmci_delayed_work_info *delayed_work_info = vmci_list_first(&temp_list); delayed_work_info->work_fn(delayed_work_info->data); vmci_list_remove(delayed_work_info, entry); vmci_free_kernel_mem(delayed_work_info, sizeof(*delayed_work_info)); } } /* *------------------------------------------------------------------------------ * * vmci_schedule_delayed_work_fn -- * * Schedule the specified callback. * * Results: * 0 if success, error code otherwise. * * Side effects: * None. * *------------------------------------------------------------------------------ */ int vmci_schedule_delayed_work_fn(vmci_work_fn *work_fn, void *data) { struct vmci_delayed_work_info *delayed_work_info; delayed_work_info = vmci_alloc_kernel_mem(sizeof(*delayed_work_info), VMCI_MEMORY_ATOMIC); if (!delayed_work_info) return (VMCI_ERROR_NO_MEM); delayed_work_info->work_fn = work_fn; delayed_work_info->data = data; mtx_lock(&vmci_sc->vmci_delayed_work_lock); vmci_list_insert(&vmci_sc->vmci_delayed_work_infos, delayed_work_info, entry); mtx_unlock(&vmci_sc->vmci_delayed_work_lock); taskqueue_enqueue(taskqueue_thread, &vmci_sc->vmci_delayed_work_task); return (VMCI_SUCCESS); } /* *------------------------------------------------------------------------------ * * vmci_send_datagram -- * * VM to hypervisor call mechanism. * * Results: * The result of the hypercall. * * Side effects: * None. * *------------------------------------------------------------------------------ */ int vmci_send_datagram(struct vmci_datagram *dg) { int result; if (dg == NULL) return (VMCI_ERROR_INVALID_ARGS); /* * Need to acquire spinlock on the device because * the datagram data may be spread over multiple pages and the monitor * may interleave device user rpc calls from multiple VCPUs. Acquiring * the spinlock precludes that possibility. Disabling interrupts to * avoid incoming datagrams during a "rep out" and possibly landing up * in this function. */ mtx_lock_spin(&vmci_sc->vmci_spinlock); /* * Send the datagram and retrieve the return value from the result * register. */ __asm__ __volatile__( "cld\n\t" "rep outsb\n\t" : /* No output. */ : "d"(vmci_sc->vmci_ioaddr + VMCI_DATA_OUT_ADDR), "c"(VMCI_DG_SIZE(dg)), "S"(dg) ); /* * XXX: Should read result high port as well when updating handlers to * return 64bit. */ result = bus_space_read_4(vmci_sc->vmci_iot0, vmci_sc->vmci_ioh0, VMCI_RESULT_LOW_ADDR); mtx_unlock_spin(&vmci_sc->vmci_spinlock); return (result); } diff --git a/sys/dev/vmware/vmci/vmci_event.c b/sys/dev/vmware/vmci/vmci_event.c index 7f3bf9039e12..192828cc6f6a 100644 --- a/sys/dev/vmware/vmci/vmci_event.c +++ b/sys/dev/vmware/vmci/vmci_event.c @@ -1,695 +1,698 @@ /*- * Copyright (c) 2018 VMware, Inc. * * SPDX-License-Identifier: (BSD-2-Clause OR GPL-2.0) */ /* This file implements VMCI Event code. */ #include __FBSDID("$FreeBSD$"); #include "vmci.h" #include "vmci_driver.h" #include "vmci_event.h" #include "vmci_kernel_api.h" #include "vmci_kernel_defs.h" #include "vmci_kernel_if.h" #define LGPFX "vmci_event: " #define EVENT_MAGIC 0xEABE0000 struct vmci_subscription { vmci_id id; int ref_count; bool run_delayed; vmci_event destroy_event; vmci_event_type event; vmci_event_cb callback; void *callback_data; vmci_list_item(vmci_subscription) subscriber_list_item; }; static struct vmci_subscription *vmci_event_find(vmci_id sub_id); static int vmci_event_deliver(struct vmci_event_msg *event_msg); static int vmci_event_register_subscription(struct vmci_subscription *sub, vmci_event_type event, uint32_t flags, vmci_event_cb callback, void *callback_data); static struct vmci_subscription *vmci_event_unregister_subscription( vmci_id sub_id); static vmci_list(vmci_subscription) subscriber_array[VMCI_EVENT_MAX]; static vmci_lock subscriber_lock; struct vmci_delayed_event_info { struct vmci_subscription *sub; uint8_t event_payload[sizeof(struct vmci_event_data_max)]; }; struct vmci_event_ref { struct vmci_subscription *sub; vmci_list_item(vmci_event_ref) list_item; }; /* *------------------------------------------------------------------------------ * * vmci_event_init -- * * General init code. * * Results: * VMCI_SUCCESS on success, appropriate error code otherwise. * * Side effects: * None. * *------------------------------------------------------------------------------ */ int vmci_event_init(void) { int i; for (i = 0; i < VMCI_EVENT_MAX; i++) vmci_list_init(&subscriber_array[i]); return (vmci_init_lock(&subscriber_lock, "VMCI Event subscriber lock")); } /* *------------------------------------------------------------------------------ * * vmci_event_exit -- * * General exit code. * * Results: * None. * * Side effects: * None. * *------------------------------------------------------------------------------ */ void vmci_event_exit(void) { struct vmci_subscription *iter, *iter_2; vmci_event_type e; /* We free all memory at exit. */ for (e = 0; e < VMCI_EVENT_MAX; e++) { vmci_list_scan_safe(iter, &subscriber_array[e], subscriber_list_item, iter_2) { /* * We should never get here because all events should * have been unregistered before we try to unload the * driver module. Also, delayed callbacks could still * be firing so this cleanup would not be safe. Still * it is better to free the memory than not ... so we * leave this code in just in case.... */ ASSERT(false); vmci_free_kernel_mem(iter, sizeof(*iter)); } } vmci_cleanup_lock(&subscriber_lock); } /* *------------------------------------------------------------------------------ * * vmci_event_sync -- * * Use this as a synchronization point when setting globals, for example, * during device shutdown. * * Results: * true. * * Side effects: * None. * *------------------------------------------------------------------------------ */ void vmci_event_sync(void) { vmci_grab_lock_bh(&subscriber_lock); vmci_release_lock_bh(&subscriber_lock); } /* *------------------------------------------------------------------------------ * * vmci_event_check_host_capabilities -- * * Verify that the host supports the hypercalls we need. If it does not, * try to find fallback hypercalls and use those instead. * * Results: * true if required hypercalls (or fallback hypercalls) are * supported by the host, false otherwise. * * Side effects: * None. * *------------------------------------------------------------------------------ */ bool vmci_event_check_host_capabilities(void) { /* vmci_event does not require any hypercalls. */ return (true); } /* *------------------------------------------------------------------------------ * * vmci_event_get -- * * Gets a reference to the given struct vmci_subscription. * * Results: * None. * * Side effects: * None. * *------------------------------------------------------------------------------ */ static void vmci_event_get(struct vmci_subscription *entry) { ASSERT(entry); entry->ref_count++; } /* *------------------------------------------------------------------------------ * * vmci_event_release -- * * Releases the given struct vmci_subscription. * * Results: * None. * * Side effects: * Fires the destroy event if the reference count has gone to zero. * *------------------------------------------------------------------------------ */ static void vmci_event_release(struct vmci_subscription *entry) { ASSERT(entry); ASSERT(entry->ref_count > 0); entry->ref_count--; if (entry->ref_count == 0) vmci_signal_event(&entry->destroy_event); } /* *------------------------------------------------------------------------------ * * event_release_cb -- * * Callback to release the event entry reference. It is called by the * vmci_wait_on_event function before it blocks. * * Result: * None. * * Side effects: * None. * *------------------------------------------------------------------------------ */ static int event_release_cb(void *client_data) { struct vmci_subscription *sub = (struct vmci_subscription *)client_data; ASSERT(sub); vmci_grab_lock_bh(&subscriber_lock); vmci_event_release(sub); vmci_release_lock_bh(&subscriber_lock); return (0); } /* *------------------------------------------------------------------------------ * * vmci_event_find -- * * Find entry. Assumes lock is held. * * Results: * Entry if found, NULL if not. * * Side effects: * Increments the struct vmci_subscription refcount if an entry is found. * *------------------------------------------------------------------------------ */ static struct vmci_subscription * vmci_event_find(vmci_id sub_id) { struct vmci_subscription *iter; vmci_event_type e; for (e = 0; e < VMCI_EVENT_MAX; e++) { vmci_list_scan(iter, &subscriber_array[e], subscriber_list_item) { if (iter->id == sub_id) { vmci_event_get(iter); return (iter); } } } return (NULL); } /* *------------------------------------------------------------------------------ * * vmci_event_delayed_dispatch_cb -- * * Calls the specified callback in a delayed context. * * Results: * None. * * Side effects: * None. * *------------------------------------------------------------------------------ */ static void vmci_event_delayed_dispatch_cb(void *data) { struct vmci_delayed_event_info *event_info; struct vmci_subscription *sub; struct vmci_event_data *ed; event_info = (struct vmci_delayed_event_info *)data; ASSERT(event_info); ASSERT(event_info->sub); sub = event_info->sub; ed = (struct vmci_event_data *)event_info->event_payload; sub->callback(sub->id, ed, sub->callback_data); vmci_grab_lock_bh(&subscriber_lock); vmci_event_release(sub); vmci_release_lock_bh(&subscriber_lock); vmci_free_kernel_mem(event_info, sizeof(*event_info)); } /* *------------------------------------------------------------------------------ * * vmci_event_deliver -- * * Actually delivers the events to the subscribers. * * Results: * None. * * Side effects: * The callback function for each subscriber is invoked. * *------------------------------------------------------------------------------ */ static int vmci_event_deliver(struct vmci_event_msg *event_msg) { struct vmci_subscription *iter; int err = VMCI_SUCCESS; vmci_list(vmci_event_ref) no_delay_list; vmci_list_init(&no_delay_list); ASSERT(event_msg); vmci_grab_lock_bh(&subscriber_lock); vmci_list_scan(iter, &subscriber_array[event_msg->event_data.event], subscriber_list_item) { if (iter->run_delayed) { struct vmci_delayed_event_info *event_info; if ((event_info = vmci_alloc_kernel_mem(sizeof(*event_info), VMCI_MEMORY_ATOMIC)) == NULL) { err = VMCI_ERROR_NO_MEM; goto out; } vmci_event_get(iter); memset(event_info, 0, sizeof(*event_info)); memcpy(event_info->event_payload, VMCI_DG_PAYLOAD(event_msg), (size_t)event_msg->hdr.payload_size); event_info->sub = iter; err = vmci_schedule_delayed_work( vmci_event_delayed_dispatch_cb, event_info); if (err != VMCI_SUCCESS) { vmci_event_release(iter); vmci_free_kernel_mem( event_info, sizeof(*event_info)); goto out; } } else { struct vmci_event_ref *event_ref; /* * We construct a local list of subscribers and release * subscriber_lock before invoking the callbacks. This * is similar to delayed callbacks, but callbacks are * invoked right away here. */ if ((event_ref = vmci_alloc_kernel_mem( sizeof(*event_ref), VMCI_MEMORY_ATOMIC)) == NULL) { err = VMCI_ERROR_NO_MEM; goto out; } vmci_event_get(iter); event_ref->sub = iter; vmci_list_insert(&no_delay_list, event_ref, list_item); } } out: vmci_release_lock_bh(&subscriber_lock); if (!vmci_list_empty(&no_delay_list)) { struct vmci_event_data *ed; struct vmci_event_ref *iter; struct vmci_event_ref *iter_2; vmci_list_scan_safe(iter, &no_delay_list, list_item, iter_2) { struct vmci_subscription *cur; uint8_t event_payload[sizeof( struct vmci_event_data_max)]; cur = iter->sub; /* * We set event data before each callback to ensure * isolation. */ memset(event_payload, 0, sizeof(event_payload)); memcpy(event_payload, VMCI_DG_PAYLOAD(event_msg), (size_t)event_msg->hdr.payload_size); ed = (struct vmci_event_data *)event_payload; cur->callback(cur->id, ed, cur->callback_data); vmci_grab_lock_bh(&subscriber_lock); vmci_event_release(cur); vmci_release_lock_bh(&subscriber_lock); vmci_free_kernel_mem(iter, sizeof(*iter)); } } return (err); } /* *------------------------------------------------------------------------------ * * vmci_event_dispatch -- * * Dispatcher for the VMCI_EVENT_RECEIVE datagrams. Calls all * subscribers for given event. * * Results: * VMCI_SUCCESS on success, error code otherwise. * * Side effects: * None. * *------------------------------------------------------------------------------ */ int vmci_event_dispatch(struct vmci_datagram *msg) { struct vmci_event_msg *event_msg = (struct vmci_event_msg *)msg; ASSERT(msg && msg->src.context == VMCI_HYPERVISOR_CONTEXT_ID && msg->dst.resource == VMCI_EVENT_HANDLER); if (msg->payload_size < sizeof(vmci_event_type) || msg->payload_size > sizeof(struct vmci_event_data_max)) return (VMCI_ERROR_INVALID_ARGS); if (!VMCI_EVENT_VALID(event_msg->event_data.event)) return (VMCI_ERROR_EVENT_UNKNOWN); vmci_event_deliver(event_msg); return (VMCI_SUCCESS); } /* *------------------------------------------------------------------------------ * * vmci_event_register_subscription -- * * Initialize and add subscription to subscriber list. * * Results: * VMCI_SUCCESS on success, error code otherwise. * * Side effects: * None. * *------------------------------------------------------------------------------ */ static int vmci_event_register_subscription(struct vmci_subscription *sub, vmci_event_type event, uint32_t flags, vmci_event_cb callback, void *callback_data) { #define VMCI_EVENT_MAX_ATTEMPTS 10 static vmci_id subscription_id = 0; int result; uint32_t attempts = 0; bool success; ASSERT(sub); if (!VMCI_EVENT_VALID(event) || callback == NULL) { VMCI_LOG_DEBUG(LGPFX"Failed to subscribe to event" " (type=%d) (callback=%p) (data=%p).\n", event, callback, callback_data); return (VMCI_ERROR_INVALID_ARGS); } if (!vmci_can_schedule_delayed_work()) { /* * If the platform doesn't support delayed work callbacks then * don't allow registration for them. */ if (flags & VMCI_FLAG_EVENT_DELAYED_CB) return (VMCI_ERROR_INVALID_ARGS); sub->run_delayed = false; } else { /* * The platform supports delayed work callbacks. Honor the * requested flags */ sub->run_delayed = (flags & VMCI_FLAG_EVENT_DELAYED_CB) ? true : false; } sub->ref_count = 1; sub->event = event; sub->callback = callback; sub->callback_data = callback_data; vmci_grab_lock_bh(&subscriber_lock); for (success = false, attempts = 0; success == false && attempts < VMCI_EVENT_MAX_ATTEMPTS; attempts++) { struct vmci_subscription *existing_sub = NULL; /* * We try to get an id a couple of time before claiming we are * out of resources. */ sub->id = ++subscription_id; /* Test for duplicate id. */ existing_sub = vmci_event_find(sub->id); if (existing_sub == NULL) { /* We succeeded if we didn't find a duplicate. */ success = true; } else vmci_event_release(existing_sub); } if (success) { vmci_create_event(&sub->destroy_event); vmci_list_insert(&subscriber_array[event], sub, subscriber_list_item); result = VMCI_SUCCESS; } else result = VMCI_ERROR_NO_RESOURCES; vmci_release_lock_bh(&subscriber_lock); return (result); #undef VMCI_EVENT_MAX_ATTEMPTS } /* *------------------------------------------------------------------------------ * * vmci_event_unregister_subscription -- * * Remove subscription from subscriber list. * * Results: * struct vmci_subscription when found, NULL otherwise. * * Side effects: * None. * *------------------------------------------------------------------------------ */ static struct vmci_subscription * vmci_event_unregister_subscription(vmci_id sub_id) { struct vmci_subscription *s; + if (!vmci_initialized_lock(&subscriber_lock)) + return NULL; + vmci_grab_lock_bh(&subscriber_lock); s = vmci_event_find(sub_id); if (s != NULL) { vmci_event_release(s); vmci_list_remove(s, subscriber_list_item); } vmci_release_lock_bh(&subscriber_lock); if (s != NULL) { vmci_wait_on_event(&s->destroy_event, event_release_cb, s); vmci_destroy_event(&s->destroy_event); } return (s); } /* *------------------------------------------------------------------------------ * * vmci_event_subscribe -- * * Subscribe to given event. The callback specified can be fired in * different contexts depending on what flag is specified while registering. * If flags contains VMCI_FLAG_EVENT_NONE then the callback is fired with * the subscriber lock held (and BH context on the guest). If flags contain * VMCI_FLAG_EVENT_DELAYED_CB then the callback is fired with no locks held * in thread context. This is useful because other vmci_event functions can * be called, but it also increases the chances that an event will be * dropped. * * Results: * VMCI_SUCCESS on success, error code otherwise. * * Side effects: * None. * *------------------------------------------------------------------------------ */ int vmci_event_subscribe(vmci_event_type event, vmci_event_cb callback, void *callback_data, vmci_id *subscription_id) { int retval; uint32_t flags = VMCI_FLAG_EVENT_NONE; struct vmci_subscription *s = NULL; if (subscription_id == NULL) { VMCI_LOG_DEBUG(LGPFX"Invalid subscription (NULL).\n"); return (VMCI_ERROR_INVALID_ARGS); } s = vmci_alloc_kernel_mem(sizeof(*s), VMCI_MEMORY_NORMAL); if (s == NULL) return (VMCI_ERROR_NO_MEM); retval = vmci_event_register_subscription(s, event, flags, callback, callback_data); if (retval < VMCI_SUCCESS) { vmci_free_kernel_mem(s, sizeof(*s)); return (retval); } *subscription_id = s->id; return (retval); } /* *------------------------------------------------------------------------------ * * vmci_event_unsubscribe -- * * Unsubscribe to given event. Removes it from list and frees it. * Will return callback_data if requested by caller. * * Results: * VMCI_SUCCESS on success, error code otherwise. * * Side effects: * None. * *------------------------------------------------------------------------------ */ int vmci_event_unsubscribe(vmci_id sub_id) { struct vmci_subscription *s; /* * Return subscription. At this point we know noone else is accessing * the subscription so we can free it. */ s = vmci_event_unregister_subscription(sub_id); if (s == NULL) return (VMCI_ERROR_NOT_FOUND); vmci_free_kernel_mem(s, sizeof(*s)); return (VMCI_SUCCESS); } diff --git a/sys/dev/vmware/vmci/vmci_kernel_if.c b/sys/dev/vmware/vmci/vmci_kernel_if.c index e845650873b5..de54a8d1ca4f 100644 --- a/sys/dev/vmware/vmci/vmci_kernel_if.c +++ b/sys/dev/vmware/vmci/vmci_kernel_if.c @@ -1,1068 +1,1114 @@ /*- * Copyright (c) 2018 VMware, Inc. * * SPDX-License-Identifier: (BSD-2-Clause OR GPL-2.0) */ /* This file implements defines and helper functions. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include "vmci.h" #include "vmci_defs.h" #include "vmci_kernel_defs.h" #include "vmci_kernel_if.h" #include "vmci_queue.h" struct vmci_queue_kernel_if { size_t num_pages; /* Num pages incl. header. */ struct vmci_dma_alloc *dmas; /* For dma alloc. */ }; /* *------------------------------------------------------------------------------ * * vmci_init_lock * * Initializes the lock. Must be called before use. * * Results: * Always VMCI_SUCCESS. * * Side effects: * Thread can block. * *------------------------------------------------------------------------------ */ int vmci_init_lock(vmci_lock *lock, char *name) { mtx_init(lock, name, NULL, MTX_DEF | MTX_NOWITNESS); return (VMCI_SUCCESS); } /* *------------------------------------------------------------------------------ * * vmci_cleanup_lock * * Cleanup the lock. Must be called before deallocating lock. * * Results: * None * * Side effects: * Deletes kernel lock state * *------------------------------------------------------------------------------ */ void vmci_cleanup_lock(vmci_lock *lock) { - mtx_destroy(lock); + if mtx_initialized(lock) + mtx_destroy(lock); } /* *------------------------------------------------------------------------------ * * vmci_grab_lock * * Grabs the given lock. * * Results: * None * * Side effects: * Thread can block. * *------------------------------------------------------------------------------ */ void vmci_grab_lock(vmci_lock *lock) { mtx_lock(lock); } /* *------------------------------------------------------------------------------ * * vmci_release_lock * * Releases the given lock. * * Results: * None * * Side effects: * A thread blocked on this lock may wake up. * *------------------------------------------------------------------------------ */ void vmci_release_lock(vmci_lock *lock) { mtx_unlock(lock); } /* *------------------------------------------------------------------------------ * * vmci_grab_lock_bh * * Grabs the given lock. * * Results: * None * * Side effects: * None. * *------------------------------------------------------------------------------ */ void vmci_grab_lock_bh(vmci_lock *lock) { mtx_lock(lock); } /* *------------------------------------------------------------------------------ * * vmci_release_lock_bh * * Releases the given lock. * * Results: * None * * Side effects: * None. * *------------------------------------------------------------------------------ */ void vmci_release_lock_bh(vmci_lock *lock) { mtx_unlock(lock); } +/* + *------------------------------------------------------------------------------ + * + * vmci_initialized_lock + * + * Returns whether a lock has been initialized. + * + * Results: + * Return 1 if initialized or 0 if unininitialized. + * + * Side effects: + * None + * + *------------------------------------------------------------------------------ + */ + +int +vmci_initialized_lock(vmci_lock *lock) +{ + + return mtx_initialized(lock); +} + /* *------------------------------------------------------------------------------ * * vmci_alloc_kernel_mem * * Allocate physically contiguous memory for the VMCI driver. * * Results: * The address allocated or NULL on error. * * * Side effects: * Memory may be allocated. * *------------------------------------------------------------------------------ */ void * vmci_alloc_kernel_mem(size_t size, int flags) { void *ptr; if ((flags & VMCI_MEMORY_ATOMIC) != 0) ptr = contigmalloc(size, M_DEVBUF, M_NOWAIT, 0, 0xFFFFFFFF, 8, 1024 * 1024); else ptr = contigmalloc(size, M_DEVBUF, M_WAITOK, 0, 0xFFFFFFFF, 8, 1024 * 1024); return (ptr); } /* *------------------------------------------------------------------------------ * * vmci_free_kernel_mem * * Free kernel memory allocated for the VMCI driver. * * Results: * None. * * Side effects: * Memory is freed. * *------------------------------------------------------------------------------ */ void vmci_free_kernel_mem(void *ptr, size_t size) { contigfree(ptr, size, M_DEVBUF); } /* *------------------------------------------------------------------------------ * * vmci_can_schedule_delayed_work -- * * Checks to see if the given platform supports delayed work callbacks. * * Results: * true if it does. false otherwise. * * Side effects: * None. * *------------------------------------------------------------------------------ */ bool vmci_can_schedule_delayed_work(void) { return (true); } /* *------------------------------------------------------------------------------ * * vmci_schedule_delayed_work -- * * Schedule the specified callback. * * Results: * Zero on success, error code otherwise. * * Side effects: * None. * *------------------------------------------------------------------------------ */ int vmci_schedule_delayed_work(vmci_work_fn *work_fn, void *data) { return (vmci_schedule_delayed_work_fn(work_fn, data)); } /* *------------------------------------------------------------------------------ * * vmci_create_event -- * * Results: * None. * * Side effects: * None. * *------------------------------------------------------------------------------ */ void vmci_create_event(vmci_event *event) { sema_init(event, 0, "vmci_event"); } /* *------------------------------------------------------------------------------ * * vmci_destroy_event -- * * Results: * None. * * Side effects: * None. * *------------------------------------------------------------------------------ */ void vmci_destroy_event(vmci_event *event) { if (mtx_owned(&event->sema_mtx)) sema_destroy(event); } /* *------------------------------------------------------------------------------ * * vmci_signal_event -- * * Results: * None. * * Side effects: * None. * *------------------------------------------------------------------------------ */ void vmci_signal_event(vmci_event *event) { sema_post(event); } /* *------------------------------------------------------------------------------ * * vmci_wait_on_event -- * * Results: * None. * * Side effects: * None. * *------------------------------------------------------------------------------ */ void vmci_wait_on_event(vmci_event *event, vmci_event_release_cb release_cb, void *client_data) { release_cb(client_data); sema_wait(event); } /* *------------------------------------------------------------------------------ * * vmci_mutex_init -- * * Initializes the mutex. Must be called before use. * * Results: * Success. * * Side effects: * None. * *------------------------------------------------------------------------------ */ int vmci_mutex_init(vmci_mutex *mutex, char *name) { mtx_init(mutex, name, NULL, MTX_DEF | MTX_NOWITNESS); return (VMCI_SUCCESS); } /* *------------------------------------------------------------------------------ * * vmci_mutex_destroy -- * * Destroys the mutex. * * Results: * None. * * Side effects: * None. * *------------------------------------------------------------------------------ */ void vmci_mutex_destroy(vmci_mutex *mutex) { mtx_destroy(mutex); } /* *------------------------------------------------------------------------------ * * vmci_mutex_acquire -- * * Acquires the mutex. * * Results: * None. * * Side effects: * Thread may block. * *------------------------------------------------------------------------------ */ void vmci_mutex_acquire(vmci_mutex *mutex) { mtx_lock(mutex); } /* *------------------------------------------------------------------------------ * * vmci_mutex_release -- * * Releases the mutex. * * Results: * None. * * Side effects: * May wake up the thread blocking on this mutex. * *------------------------------------------------------------------------------ */ void vmci_mutex_release(vmci_mutex *mutex) { mtx_unlock(mutex); } +/* + *------------------------------------------------------------------------------ + * + * vmci_mutex_initialized + * + * Returns whether a mutex has been initialized. + * + * Results: + * Return 1 if initialized or 0 if unininitialized. + * + * Side effects: + * None + * + *------------------------------------------------------------------------------ + */ + +int +vmci_mutex_initialized(vmci_mutex *mutex) +{ + + return mtx_initialized(mutex); +} /* *------------------------------------------------------------------------------ * * vmci_alloc_queue -- * * Allocates kernel queue pages of specified size with IOMMU mappings, plus * space for the queue structure/kernel interface and the queue header. * * Results: * Pointer to the queue on success, NULL otherwise. * * Side effects: * Memory is allocated. * *------------------------------------------------------------------------------ */ void * vmci_alloc_queue(uint64_t size, uint32_t flags) { struct vmci_queue *queue; size_t i; const size_t num_pages = CEILING(size, PAGE_SIZE) + 1; const size_t dmas_size = num_pages * sizeof(struct vmci_dma_alloc); const size_t queue_size = sizeof(*queue) + sizeof(*(queue->kernel_if)) + dmas_size; /* Size should be enforced by vmci_qpair_alloc(), double-check here. */ if (size > VMCI_MAX_GUEST_QP_MEMORY) { ASSERT(false); return (NULL); } queue = malloc(queue_size, M_DEVBUF, M_NOWAIT); if (!queue) return (NULL); queue->q_header = NULL; queue->saved_header = NULL; queue->kernel_if = (struct vmci_queue_kernel_if *)(queue + 1); queue->kernel_if->num_pages = num_pages; queue->kernel_if->dmas = (struct vmci_dma_alloc *)(queue->kernel_if + 1); for (i = 0; i < num_pages; i++) { vmci_dma_malloc(PAGE_SIZE, 1, &queue->kernel_if->dmas[i]); if (!queue->kernel_if->dmas[i].dma_vaddr) { /* Size excl. the header. */ vmci_free_queue(queue, i * PAGE_SIZE); return (NULL); } } /* Queue header is the first page. */ queue->q_header = (void *)queue->kernel_if->dmas[0].dma_vaddr; return ((void *)queue); } /* *------------------------------------------------------------------------------ * * vmci_free_queue -- * * Frees kernel VA space for a given queue and its queue header, and frees * physical data pages. * * Results: * None. * * Side effects: * Memory is freed. * *------------------------------------------------------------------------------ */ void vmci_free_queue(void *q, uint64_t size) { struct vmci_queue *queue = q; if (queue) { const size_t num_pages = CEILING(size, PAGE_SIZE) + 1; uint64_t i; /* Given size doesn't include header, so add in a page here. */ for (i = 0; i < num_pages; i++) vmci_dma_free(&queue->kernel_if->dmas[i]); free(queue, M_DEVBUF); } } /* *------------------------------------------------------------------------------ * * vmci_alloc_ppn_set -- * * Allocates two list of PPNs --- one for the pages in the produce queue, * and the other for the pages in the consume queue. Intializes the list of * PPNs with the page frame numbers of the KVA for the two queues (and the * queue headers). * * Results: * Success or failure. * * Side effects: * Memory may be allocated. * *----------------------------------------------------------------------------- */ int vmci_alloc_ppn_set(void *prod_q, uint64_t num_produce_pages, void *cons_q, uint64_t num_consume_pages, struct ppn_set *ppn_set) { struct vmci_queue *consume_q = cons_q; struct vmci_queue *produce_q = prod_q; vmci_ppn_list consume_ppns; vmci_ppn_list produce_ppns; uint64_t i; if (!produce_q || !num_produce_pages || !consume_q || !num_consume_pages || !ppn_set) return (VMCI_ERROR_INVALID_ARGS); if (ppn_set->initialized) return (VMCI_ERROR_ALREADY_EXISTS); produce_ppns = vmci_alloc_kernel_mem(num_produce_pages * sizeof(*produce_ppns), VMCI_MEMORY_NORMAL); if (!produce_ppns) return (VMCI_ERROR_NO_MEM); consume_ppns = vmci_alloc_kernel_mem(num_consume_pages * sizeof(*consume_ppns), VMCI_MEMORY_NORMAL); if (!consume_ppns) { vmci_free_kernel_mem(produce_ppns, num_produce_pages * sizeof(*produce_ppns)); return (VMCI_ERROR_NO_MEM); } for (i = 0; i < num_produce_pages; i++) { unsigned long pfn; produce_ppns[i] = pfn = produce_q->kernel_if->dmas[i].dma_paddr >> PAGE_SHIFT; /* * Fail allocation if PFN isn't supported by hypervisor. */ if (sizeof(pfn) > sizeof(*produce_ppns) && pfn != produce_ppns[i]) goto ppn_error; } for (i = 0; i < num_consume_pages; i++) { unsigned long pfn; consume_ppns[i] = pfn = consume_q->kernel_if->dmas[i].dma_paddr >> PAGE_SHIFT; /* * Fail allocation if PFN isn't supported by hypervisor. */ if (sizeof(pfn) > sizeof(*consume_ppns) && pfn != consume_ppns[i]) goto ppn_error; } ppn_set->num_produce_pages = num_produce_pages; ppn_set->num_consume_pages = num_consume_pages; ppn_set->produce_ppns = produce_ppns; ppn_set->consume_ppns = consume_ppns; ppn_set->initialized = true; return (VMCI_SUCCESS); ppn_error: vmci_free_kernel_mem(produce_ppns, num_produce_pages * sizeof(*produce_ppns)); vmci_free_kernel_mem(consume_ppns, num_consume_pages * sizeof(*consume_ppns)); return (VMCI_ERROR_INVALID_ARGS); } /* *------------------------------------------------------------------------------ * * vmci_free_ppn_set -- * * Frees the two list of PPNs for a queue pair. * * Results: * None. * * Side effects: * None. * *------------------------------------------------------------------------------ */ void vmci_free_ppn_set(struct ppn_set *ppn_set) { ASSERT(ppn_set); if (ppn_set->initialized) { /* Do not call these functions on NULL inputs. */ ASSERT(ppn_set->produce_ppns && ppn_set->consume_ppns); vmci_free_kernel_mem(ppn_set->produce_ppns, ppn_set->num_produce_pages * sizeof(*ppn_set->produce_ppns)); vmci_free_kernel_mem(ppn_set->consume_ppns, ppn_set->num_consume_pages * sizeof(*ppn_set->consume_ppns)); } memset(ppn_set, 0, sizeof(*ppn_set)); } /* *------------------------------------------------------------------------------ * * vmci_populate_ppn_list -- * * Populates the list of PPNs in the hypercall structure with the PPNS * of the produce queue and the consume queue. * * Results: * VMCI_SUCCESS. * * Side effects: * None. * *------------------------------------------------------------------------------ */ int vmci_populate_ppn_list(uint8_t *call_buf, const struct ppn_set *ppn_set) { ASSERT(call_buf && ppn_set && ppn_set->initialized); memcpy(call_buf, ppn_set->produce_ppns, ppn_set->num_produce_pages * sizeof(*ppn_set->produce_ppns)); memcpy(call_buf + ppn_set->num_produce_pages * sizeof(*ppn_set->produce_ppns), ppn_set->consume_ppns, ppn_set->num_consume_pages * sizeof(*ppn_set->consume_ppns)); return (VMCI_SUCCESS); } /* *------------------------------------------------------------------------------ * * vmci_memcpy_{to,from}iovec -- * * These helper routines will copy the specified bytes to/from memory that's * specified as a struct iovec. The routines can not verify the correctness * of the struct iovec's contents. * * Results: * None. * * Side effects: * None. * *------------------------------------------------------------------------------ */ static inline void vmci_memcpy_toiovec(struct iovec *iov, uint8_t *src, size_t len) { while (len > 0) { if (iov->iov_len) { size_t to_copy = MIN(iov->iov_len, len); memcpy(iov->iov_base, src, to_copy); src += to_copy; len -= to_copy; iov->iov_base = (void *)((uintptr_t) iov->iov_base + to_copy); iov->iov_len -= to_copy; } iov++; } } static inline void vmci_memcpy_fromiovec(uint8_t *dst, struct iovec *iov, size_t len) { while (len > 0) { if (iov->iov_len) { size_t to_copy = MIN(iov->iov_len, len); memcpy(dst, iov->iov_base, to_copy); dst += to_copy; len -= to_copy; iov->iov_base = (void *)((uintptr_t) iov->iov_base + to_copy); iov->iov_len -= to_copy; } iov++; } } /* *------------------------------------------------------------------------------ * * __vmci_memcpy_to_queue -- * * Copies from a given buffer or iovector to a VMCI Queue. Assumes that * offset + size does not wrap around in the queue. * * Results: * Zero on success, negative error code on failure. * * Side effects: * None. * *------------------------------------------------------------------------------ */ #pragma GCC diagnostic ignored "-Wcast-qual" static int __vmci_memcpy_to_queue(struct vmci_queue *queue, uint64_t queue_offset, const void *src, size_t size, bool is_iovec) { struct vmci_queue_kernel_if *kernel_if = queue->kernel_if; size_t bytes_copied = 0; while (bytes_copied < size) { const uint64_t page_index = (queue_offset + bytes_copied) / PAGE_SIZE; const size_t page_offset = (queue_offset + bytes_copied) & (PAGE_SIZE - 1); void *va; size_t to_copy; /* Skip header. */ va = (void *)kernel_if->dmas[page_index + 1].dma_vaddr; ASSERT(va); /* * Fill up the page if we have enough payload, or else * copy the remaining bytes. */ to_copy = MIN(PAGE_SIZE - page_offset, size - bytes_copied); if (is_iovec) { struct iovec *iov = (struct iovec *)src; /* The iovec will track bytes_copied internally. */ vmci_memcpy_fromiovec((uint8_t *)va + page_offset, iov, to_copy); } else memcpy((uint8_t *)va + page_offset, (uint8_t *)src + bytes_copied, to_copy); bytes_copied += to_copy; } return (VMCI_SUCCESS); } /* *------------------------------------------------------------------------------ * * __vmci_memcpy_from_queue -- * * Copies to a given buffer or iovector from a VMCI Queue. Assumes that * offset + size does not wrap around in the queue. * * Results: * Zero on success, negative error code on failure. * * Side effects: * None. * *------------------------------------------------------------------------------ */ static int __vmci_memcpy_from_queue(void *dest, const struct vmci_queue *queue, uint64_t queue_offset, size_t size, bool is_iovec) { struct vmci_queue_kernel_if *kernel_if = queue->kernel_if; size_t bytes_copied = 0; while (bytes_copied < size) { const uint64_t page_index = (queue_offset + bytes_copied) / PAGE_SIZE; const size_t page_offset = (queue_offset + bytes_copied) & (PAGE_SIZE - 1); void *va; size_t to_copy; /* Skip header. */ va = (void *)kernel_if->dmas[page_index + 1].dma_vaddr; ASSERT(va); /* * Fill up the page if we have enough payload, or else * copy the remaining bytes. */ to_copy = MIN(PAGE_SIZE - page_offset, size - bytes_copied); if (is_iovec) { struct iovec *iov = (struct iovec *)dest; /* The iovec will track bytesCopied internally. */ vmci_memcpy_toiovec(iov, (uint8_t *)va + page_offset, to_copy); } else memcpy((uint8_t *)dest + bytes_copied, (uint8_t *)va + page_offset, to_copy); bytes_copied += to_copy; } return (VMCI_SUCCESS); } /* *------------------------------------------------------------------------------ * * vmci_memcpy_to_queue -- * * Copies from a given buffer to a VMCI Queue. * * Results: * Zero on success, negative error code on failure. * * Side effects: * None. * *------------------------------------------------------------------------------ */ int vmci_memcpy_to_queue(struct vmci_queue *queue, uint64_t queue_offset, const void *src, size_t src_offset, size_t size, int buf_type, bool can_block) { ASSERT(can_block); return (__vmci_memcpy_to_queue(queue, queue_offset, (uint8_t *)src + src_offset, size, false)); } /* *------------------------------------------------------------------------------ * * vmci_memcpy_from_queue -- * * Copies to a given buffer from a VMCI Queue. * * Results: * Zero on success, negative error code on failure. * * Side effects: * None. * *------------------------------------------------------------------------------ */ int vmci_memcpy_from_queue(void *dest, size_t dest_offset, const struct vmci_queue *queue, uint64_t queue_offset, size_t size, int buf_type, bool can_block) { ASSERT(can_block); return (__vmci_memcpy_from_queue((uint8_t *)dest + dest_offset, queue, queue_offset, size, false)); } /* *------------------------------------------------------------------------------ * * vmci_memcpy_to_queue_local -- * * Copies from a given buffer to a local VMCI queue. This is the * same as a regular copy. * * Results: * Zero on success, negative error code on failure. * * Side effects: * None. * *------------------------------------------------------------------------------ */ int vmci_memcpy_to_queue_local(struct vmci_queue *queue, uint64_t queue_offset, const void *src, size_t src_offset, size_t size, int buf_type, bool can_block) { ASSERT(can_block); return (__vmci_memcpy_to_queue(queue, queue_offset, (uint8_t *)src + src_offset, size, false)); } /* *------------------------------------------------------------------------------ * * vmci_memcpy_from_queue_local -- * * Copies to a given buffer from a VMCI Queue. * * Results: * Zero on success, negative error code on failure. * * Side effects: * None. * *------------------------------------------------------------------------------ */ int vmci_memcpy_from_queue_local(void *dest, size_t dest_offset, const struct vmci_queue *queue, uint64_t queue_offset, size_t size, int buf_type, bool can_block) { ASSERT(can_block); return (__vmci_memcpy_from_queue((uint8_t *)dest + dest_offset, queue, queue_offset, size, false)); } /*------------------------------------------------------------------------------ * * vmci_memcpy_to_queue_v -- * * Copies from a given iovec from a VMCI Queue. * * Results: * Zero on success, negative error code on failure. * * Side effects: * None. * *------------------------------------------------------------------------------ */ int vmci_memcpy_to_queue_v(struct vmci_queue *queue, uint64_t queue_offset, const void *src, size_t src_offset, size_t size, int buf_type, bool can_block) { ASSERT(can_block); /* * We ignore src_offset because src is really a struct iovec * and will * maintain offset internally. */ return (__vmci_memcpy_to_queue(queue, queue_offset, src, size, true)); } /* *------------------------------------------------------------------------------ * * vmci_memcpy_from_queue_v -- * * Copies to a given iovec from a VMCI Queue. * * Results: * Zero on success, negative error code on failure. * * Side effects: * None. * *------------------------------------------------------------------------------ */ int vmci_memcpy_from_queue_v(void *dest, size_t dest_offset, const struct vmci_queue *queue, uint64_t queue_offset, size_t size, int buf_type, bool can_block) { ASSERT(can_block); /* * We ignore dest_offset because dest is really a struct iovec * and * will maintain offset internally. */ return (__vmci_memcpy_from_queue(dest, queue, queue_offset, size, true)); } /* *------------------------------------------------------------------------------ * * vmci_read_port_bytes -- * * Copy memory from an I/O port to kernel memory. * * Results: * No results. * * Side effects: * None. * *------------------------------------------------------------------------------ */ void vmci_read_port_bytes(vmci_io_handle handle, vmci_io_port port, uint8_t *buffer, size_t buffer_length) { insb(port, buffer, buffer_length); } diff --git a/sys/dev/vmware/vmci/vmci_kernel_if.h b/sys/dev/vmware/vmci/vmci_kernel_if.h index fc23eefe98e0..048e480b0698 100644 --- a/sys/dev/vmware/vmci/vmci_kernel_if.h +++ b/sys/dev/vmware/vmci/vmci_kernel_if.h @@ -1,94 +1,96 @@ /*- * Copyright (c) 2018 VMware, Inc. * * SPDX-License-Identifier: (BSD-2-Clause OR GPL-2.0) * * $FreeBSD$ */ /* This file defines helper functions */ #ifndef _VMCI_KERNEL_IF_H_ #define _VMCI_KERNEL_IF_H_ #include #include #include #include #include #include "vmci_defs.h" #define VMCI_MEMORY_NORMAL 0x0 #define VMCI_MEMORY_ATOMIC 0x1 #define vmci_list(_l) LIST_HEAD(, _l) #define vmci_list_item(_l) LIST_ENTRY(_l) #define vmci_list_init(_l) LIST_INIT(_l) #define vmci_list_empty(_l) LIST_EMPTY(_l) #define vmci_list_first(_l) LIST_FIRST(_l) #define vmci_list_next(e, f) LIST_NEXT(e, f) #define vmci_list_insert(_l, _e, n) LIST_INSERT_HEAD(_l, _e, n) #define vmci_list_remove(_e, n) LIST_REMOVE(_e, n) #define vmci_list_scan(v, _l, n) LIST_FOREACH(v, _l, n) #define vmci_list_scan_safe(_e, _l, n, t) \ LIST_FOREACH_SAFE(_e, _l, n, t) #define vmci_list_swap(_l1, _l2, t, f) LIST_SWAP(_l1, _l2, t, f) typedef unsigned short int vmci_io_port; typedef int vmci_io_handle; void vmci_read_port_bytes(vmci_io_handle handle, vmci_io_port port, uint8_t *buffer, size_t buffer_length); typedef struct mtx vmci_lock; int vmci_init_lock(vmci_lock *lock, char *name); void vmci_cleanup_lock(vmci_lock *lock); void vmci_grab_lock(vmci_lock *lock); void vmci_release_lock(vmci_lock *lock); void vmci_grab_lock_bh(vmci_lock *lock); void vmci_release_lock_bh(vmci_lock *lock); +int vmci_initialized_lock(vmci_lock *lock); void *vmci_alloc_kernel_mem(size_t size, int flags); void vmci_free_kernel_mem(void *ptr, size_t size); typedef struct sema vmci_event; typedef int (*vmci_event_release_cb)(void *client_data); void vmci_create_event(vmci_event *event); void vmci_destroy_event(vmci_event *event); void vmci_signal_event(vmci_event *event); void vmci_wait_on_event(vmci_event *event, vmci_event_release_cb release_cb, void *client_data); bool vmci_wait_on_event_interruptible(vmci_event *event, vmci_event_release_cb release_cb, void *client_data); typedef void (vmci_work_fn)(void *data); bool vmci_can_schedule_delayed_work(void); int vmci_schedule_delayed_work(vmci_work_fn *work_fn, void *data); void vmci_delayed_work_cb(void *context, int data); typedef struct mtx vmci_mutex; int vmci_mutex_init(vmci_mutex *mutex, char *name); void vmci_mutex_destroy(vmci_mutex *mutex); void vmci_mutex_acquire(vmci_mutex *mutex); void vmci_mutex_release(vmci_mutex *mutex); +int vmci_mutex_initialized(vmci_mutex *mutex); void *vmci_alloc_queue(uint64_t size, uint32_t flags); void vmci_free_queue(void *q, uint64_t size); typedef PPN *vmci_ppn_list; struct ppn_set { uint64_t num_produce_pages; uint64_t num_consume_pages; vmci_ppn_list produce_ppns; vmci_ppn_list consume_ppns; bool initialized; }; int vmci_alloc_ppn_set(void *produce_q, uint64_t num_produce_pages, void *consume_q, uint64_t num_consume_pages, struct ppn_set *ppn_set); void vmci_free_ppn_set(struct ppn_set *ppn_set); int vmci_populate_ppn_list(uint8_t *call_buf, const struct ppn_set *ppnset); #endif /* !_VMCI_KERNEL_IF_H_ */ diff --git a/sys/dev/vmware/vmci/vmci_queue_pair.c b/sys/dev/vmware/vmci/vmci_queue_pair.c index 2ff963c691d0..0e2f83b20c67 100644 --- a/sys/dev/vmware/vmci/vmci_queue_pair.c +++ b/sys/dev/vmware/vmci/vmci_queue_pair.c @@ -1,936 +1,939 @@ /*- * Copyright (c) 2018 VMware, Inc. * * SPDX-License-Identifier: (BSD-2-Clause OR GPL-2.0) */ /* VMCI QueuePair API implementation. */ #include __FBSDID("$FreeBSD$"); #include "vmci.h" #include "vmci_driver.h" #include "vmci_event.h" #include "vmci_kernel_api.h" #include "vmci_kernel_defs.h" #include "vmci_queue_pair.h" #define LGPFX "vmci_queue_pair: " struct queue_pair_entry { vmci_list_item(queue_pair_entry) list_item; struct vmci_handle handle; vmci_id peer; uint32_t flags; uint64_t produce_size; uint64_t consume_size; uint32_t ref_count; }; struct qp_guest_endpoint { struct queue_pair_entry qp; uint64_t num_ppns; void *produce_q; void *consume_q; bool hibernate_failure; struct ppn_set ppn_set; }; struct queue_pair_list { vmci_list(queue_pair_entry) head; volatile int hibernate; vmci_mutex mutex; }; #define QPE_NUM_PAGES(_QPE) \ ((uint32_t)(CEILING(_QPE.produce_size, PAGE_SIZE) + \ CEILING(_QPE.consume_size, PAGE_SIZE) + 2)) static struct queue_pair_list qp_guest_endpoints; static struct queue_pair_entry *queue_pair_list_find_entry( struct queue_pair_list *qp_list, struct vmci_handle handle); static void queue_pair_list_add_entry(struct queue_pair_list *qp_list, struct queue_pair_entry *entry); static void queue_pair_list_remove_entry(struct queue_pair_list *qp_list, struct queue_pair_entry *entry); static struct queue_pair_entry *queue_pair_list_get_head( struct queue_pair_list *qp_list); static int queue_pair_notify_peer_local(bool attach, struct vmci_handle handle); static struct qp_guest_endpoint *qp_guest_endpoint_create( struct vmci_handle handle, vmci_id peer, uint32_t flags, uint64_t produce_size, uint64_t consume_size, void *produce_q, void *consume_q); static void qp_guest_endpoint_destroy(struct qp_guest_endpoint *entry); static int vmci_queue_pair_alloc_hypercall( const struct qp_guest_endpoint *entry); static int vmci_queue_pair_alloc_guest_work(struct vmci_handle *handle, struct vmci_queue **produce_q, uint64_t produce_size, struct vmci_queue **consume_q, uint64_t consume_size, vmci_id peer, uint32_t flags, vmci_privilege_flags priv_flags); static int vmci_queue_pair_detach_guest_work(struct vmci_handle handle); static int vmci_queue_pair_detach_hypercall(struct vmci_handle handle); /* *------------------------------------------------------------------------------ * * vmci_queue_pair_alloc -- * * Allocates a VMCI QueuePair. Only checks validity of input arguments. The * real work is done in the host or guest specific function. * * Results: * VMCI_SUCCESS on success, appropriate error code otherwise. * * Side effects: * None. * *------------------------------------------------------------------------------ */ int vmci_queue_pair_alloc(struct vmci_handle *handle, struct vmci_queue **produce_q, uint64_t produce_size, struct vmci_queue **consume_q, uint64_t consume_size, vmci_id peer, uint32_t flags, vmci_privilege_flags priv_flags) { if (!handle || !produce_q || !consume_q || (!produce_size && !consume_size) || (flags & ~VMCI_QP_ALL_FLAGS)) return (VMCI_ERROR_INVALID_ARGS); return (vmci_queue_pair_alloc_guest_work(handle, produce_q, produce_size, consume_q, consume_size, peer, flags, priv_flags)); } /* *------------------------------------------------------------------------------ * * vmci_queue_pair_detach -- * * Detaches from a VMCI QueuePair. Only checks validity of input argument. * Real work is done in the host or guest specific function. * * Results: * Success or failure. * * Side effects: * Memory is freed. * *------------------------------------------------------------------------------ */ int vmci_queue_pair_detach(struct vmci_handle handle) { if (VMCI_HANDLE_INVALID(handle)) return (VMCI_ERROR_INVALID_ARGS); return (vmci_queue_pair_detach_guest_work(handle)); } /* *------------------------------------------------------------------------------ * * queue_pair_list_init -- * * Initializes the list of QueuePairs. * * Results: * Success or failure. * * Side effects: * None. * *------------------------------------------------------------------------------ */ static inline int queue_pair_list_init(struct queue_pair_list *qp_list) { int ret; vmci_list_init(&qp_list->head); atomic_store_int(&qp_list->hibernate, 0); ret = vmci_mutex_init(&qp_list->mutex, "VMCI QP List lock"); return (ret); } /* *------------------------------------------------------------------------------ * * queue_pair_list_destroy -- * * Destroy the list's mutex. * * Results: * None. * * Side effects: * None. * *------------------------------------------------------------------------------ */ static inline void queue_pair_list_destroy(struct queue_pair_list *qp_list) { vmci_mutex_destroy(&qp_list->mutex); vmci_list_init(&qp_list->head); } /* *------------------------------------------------------------------------------ * * queue_pair_list_find_entry -- * * Finds the entry in the list corresponding to a given handle. Assumes that * the list is locked. * * Results: * Pointer to entry. * * Side effects: * None. * *------------------------------------------------------------------------------ */ static struct queue_pair_entry * queue_pair_list_find_entry(struct queue_pair_list *qp_list, struct vmci_handle handle) { struct queue_pair_entry *next; if (VMCI_HANDLE_INVALID(handle)) return (NULL); vmci_list_scan(next, &qp_list->head, list_item) { if (VMCI_HANDLE_EQUAL(next->handle, handle)) return (next); } return (NULL); } /* *------------------------------------------------------------------------------ * * queue_pair_list_add_entry -- * * Adds the given entry to the list. Assumes that the list is locked. * * Results: * None. * * Side effects: * None. * *------------------------------------------------------------------------------ */ static void queue_pair_list_add_entry(struct queue_pair_list *qp_list, struct queue_pair_entry *entry) { if (entry) vmci_list_insert(&qp_list->head, entry, list_item); } /* *------------------------------------------------------------------------------ * * queue_pair_list_remove_entry -- * * Removes the given entry from the list. Assumes that the list is locked. * * Results: * None. * * Side effects: * None. * *------------------------------------------------------------------------------ */ static void queue_pair_list_remove_entry(struct queue_pair_list *qp_list, struct queue_pair_entry *entry) { if (entry) vmci_list_remove(entry, list_item); } /* *------------------------------------------------------------------------------ * * queue_pair_list_get_head -- * * Returns the entry from the head of the list. Assumes that the list is * locked. * * Results: * Pointer to entry. * * Side effects: * None. * *------------------------------------------------------------------------------ */ static struct queue_pair_entry * queue_pair_list_get_head(struct queue_pair_list *qp_list) { return (vmci_list_first(&qp_list->head)); } /* *------------------------------------------------------------------------------ * * vmci_qp_guest_endpoints_init -- * * Initalizes data structure state keeping track of queue pair guest * endpoints. * * Results: * VMCI_SUCCESS on success and appropriate failure code otherwise. * * Side effects: * None. * *------------------------------------------------------------------------------ */ int vmci_qp_guest_endpoints_init(void) { return (queue_pair_list_init(&qp_guest_endpoints)); } /* *------------------------------------------------------------------------------ * * vmci_qp_guest_endpoints_exit -- * * Destroys all guest queue pair endpoints. If active guest queue pairs * still exist, hypercalls to attempt detach from these queue pairs will be * made. Any failure to detach is silently ignored. * * Results: * None. * * Side effects: * None. * *------------------------------------------------------------------------------ */ void vmci_qp_guest_endpoints_exit(void) { struct qp_guest_endpoint *entry; + if (!vmci_mutex_initialized(&qp_guest_endpoints.mutex)) + return; + vmci_mutex_acquire(&qp_guest_endpoints.mutex); while ((entry = (struct qp_guest_endpoint *)queue_pair_list_get_head( &qp_guest_endpoints)) != NULL) { /* * Don't make a hypercall for local QueuePairs. */ if (!(entry->qp.flags & VMCI_QPFLAG_LOCAL)) vmci_queue_pair_detach_hypercall(entry->qp.handle); /* * We cannot fail the exit, so let's reset ref_count. */ entry->qp.ref_count = 0; queue_pair_list_remove_entry(&qp_guest_endpoints, &entry->qp); qp_guest_endpoint_destroy(entry); } atomic_store_int(&qp_guest_endpoints.hibernate, 0); vmci_mutex_release(&qp_guest_endpoints.mutex); queue_pair_list_destroy(&qp_guest_endpoints); } /* *------------------------------------------------------------------------------ * * vmci_qp_guest_endpoints_sync -- * * Use this as a synchronization point when setting globals, for example, * during device shutdown. * * Results: * true. * * Side effects: * None. * *------------------------------------------------------------------------------ */ void vmci_qp_guest_endpoints_sync(void) { vmci_mutex_acquire(&qp_guest_endpoints.mutex); vmci_mutex_release(&qp_guest_endpoints.mutex); } /* *------------------------------------------------------------------------------ * * qp_guest_endpoint_create -- * * Allocates and initializes a qp_guest_endpoint structure. Allocates a * QueuePair rid (and handle) iff the given entry has an invalid handle. * 0 through VMCI_RESERVED_RESOURCE_ID_MAX are reserved handles. Assumes * that the QP list mutex is held by the caller. * * Results: * Pointer to structure intialized. * * Side effects: * None. * *------------------------------------------------------------------------------ */ struct qp_guest_endpoint * qp_guest_endpoint_create(struct vmci_handle handle, vmci_id peer, uint32_t flags, uint64_t produce_size, uint64_t consume_size, void *produce_q, void *consume_q) { struct qp_guest_endpoint *entry; static vmci_id queue_pair_rid; const uint64_t num_ppns = CEILING(produce_size, PAGE_SIZE) + CEILING(consume_size, PAGE_SIZE) + 2; /* One page each for the queue headers. */ queue_pair_rid = VMCI_RESERVED_RESOURCE_ID_MAX + 1; ASSERT((produce_size || consume_size) && produce_q && consume_q); if (VMCI_HANDLE_INVALID(handle)) { vmci_id context_id = vmci_get_context_id(); vmci_id old_rid = queue_pair_rid; /* * Generate a unique QueuePair rid. Keep on trying until we * wrap around in the RID space. */ ASSERT(old_rid > VMCI_RESERVED_RESOURCE_ID_MAX); do { handle = VMCI_MAKE_HANDLE(context_id, queue_pair_rid); entry = (struct qp_guest_endpoint *) queue_pair_list_find_entry(&qp_guest_endpoints, handle); queue_pair_rid++; if (UNLIKELY(!queue_pair_rid)) { /* * Skip the reserved rids. */ queue_pair_rid = VMCI_RESERVED_RESOURCE_ID_MAX + 1; } } while (entry && queue_pair_rid != old_rid); if (UNLIKELY(entry != NULL)) { ASSERT(queue_pair_rid == old_rid); /* * We wrapped around --- no rids were free. */ return (NULL); } } ASSERT(!VMCI_HANDLE_INVALID(handle) && queue_pair_list_find_entry(&qp_guest_endpoints, handle) == NULL); entry = vmci_alloc_kernel_mem(sizeof(*entry), VMCI_MEMORY_NORMAL); if (entry) { entry->qp.handle = handle; entry->qp.peer = peer; entry->qp.flags = flags; entry->qp.produce_size = produce_size; entry->qp.consume_size = consume_size; entry->qp.ref_count = 0; entry->num_ppns = num_ppns; memset(&entry->ppn_set, 0, sizeof(entry->ppn_set)); entry->produce_q = produce_q; entry->consume_q = consume_q; } return (entry); } /* *------------------------------------------------------------------------------ * * qp_guest_endpoint_destroy -- * * Frees a qp_guest_endpoint structure. * * Results: * None. * * Side effects: * None. * *------------------------------------------------------------------------------ */ void qp_guest_endpoint_destroy(struct qp_guest_endpoint *entry) { ASSERT(entry); ASSERT(entry->qp.ref_count == 0); vmci_free_ppn_set(&entry->ppn_set); vmci_free_queue(entry->produce_q, entry->qp.produce_size); vmci_free_queue(entry->consume_q, entry->qp.consume_size); vmci_free_kernel_mem(entry, sizeof(*entry)); } /* *------------------------------------------------------------------------------ * * vmci_queue_pair_alloc_hypercall -- * * Helper to make a QueuePairAlloc hypercall when the driver is * supporting a guest device. * * Results: * Result of the hypercall. * * Side effects: * Memory is allocated & freed. * *------------------------------------------------------------------------------ */ static int vmci_queue_pair_alloc_hypercall(const struct qp_guest_endpoint *entry) { struct vmci_queue_pair_alloc_msg *alloc_msg; size_t msg_size; int result; if (!entry || entry->num_ppns <= 2) return (VMCI_ERROR_INVALID_ARGS); ASSERT(!(entry->qp.flags & VMCI_QPFLAG_LOCAL)); msg_size = sizeof(*alloc_msg) + (size_t)entry->num_ppns * sizeof(PPN); alloc_msg = vmci_alloc_kernel_mem(msg_size, VMCI_MEMORY_NORMAL); if (!alloc_msg) return (VMCI_ERROR_NO_MEM); alloc_msg->hdr.dst = VMCI_MAKE_HANDLE(VMCI_HYPERVISOR_CONTEXT_ID, VMCI_QUEUEPAIR_ALLOC); alloc_msg->hdr.src = VMCI_ANON_SRC_HANDLE; alloc_msg->hdr.payload_size = msg_size - VMCI_DG_HEADERSIZE; alloc_msg->handle = entry->qp.handle; alloc_msg->peer = entry->qp.peer; alloc_msg->flags = entry->qp.flags; alloc_msg->produce_size = entry->qp.produce_size; alloc_msg->consume_size = entry->qp.consume_size; alloc_msg->num_ppns = entry->num_ppns; result = vmci_populate_ppn_list((uint8_t *)alloc_msg + sizeof(*alloc_msg), &entry->ppn_set); if (result == VMCI_SUCCESS) result = vmci_send_datagram((struct vmci_datagram *)alloc_msg); vmci_free_kernel_mem(alloc_msg, msg_size); return (result); } /* *------------------------------------------------------------------------------ * * vmci_queue_pair_alloc_guest_work -- * * This functions handles the actual allocation of a VMCI queue pair guest * endpoint. Allocates physical pages for the queue pair. It makes OS * dependent calls through generic wrappers. * * Results: * Success or failure. * * Side effects: * Memory is allocated. * *------------------------------------------------------------------------------ */ static int vmci_queue_pair_alloc_guest_work(struct vmci_handle *handle, struct vmci_queue **produce_q, uint64_t produce_size, struct vmci_queue **consume_q, uint64_t consume_size, vmci_id peer, uint32_t flags, vmci_privilege_flags priv_flags) { struct qp_guest_endpoint *queue_pair_entry = NULL; void *my_consume_q = NULL; void *my_produce_q = NULL; const uint64_t num_consume_pages = CEILING(consume_size, PAGE_SIZE) + 1; const uint64_t num_produce_pages = CEILING(produce_size, PAGE_SIZE) + 1; int result; ASSERT(handle && produce_q && consume_q && (produce_size || consume_size)); if (priv_flags != VMCI_NO_PRIVILEGE_FLAGS) return (VMCI_ERROR_NO_ACCESS); vmci_mutex_acquire(&qp_guest_endpoints.mutex); if ((atomic_load_int(&qp_guest_endpoints.hibernate) == 1) && !(flags & VMCI_QPFLAG_LOCAL)) { /* * While guest OS is in hibernate state, creating non-local * queue pairs is not allowed after the point where the VMCI * guest driver converted the existing queue pairs to local * ones. */ result = VMCI_ERROR_UNAVAILABLE; goto error; } if ((queue_pair_entry = (struct qp_guest_endpoint *)queue_pair_list_find_entry( &qp_guest_endpoints, *handle)) != NULL) { if (queue_pair_entry->qp.flags & VMCI_QPFLAG_LOCAL) { /* Local attach case. */ if (queue_pair_entry->qp.ref_count > 1) { VMCI_LOG_DEBUG(LGPFX"Error attempting to " "attach more than once.\n"); result = VMCI_ERROR_UNAVAILABLE; goto error_keep_entry; } if (queue_pair_entry->qp.produce_size != consume_size || queue_pair_entry->qp.consume_size != produce_size || queue_pair_entry->qp.flags != (flags & ~VMCI_QPFLAG_ATTACH_ONLY)) { VMCI_LOG_DEBUG(LGPFX"Error mismatched " "queue pair in local attach.\n"); result = VMCI_ERROR_QUEUEPAIR_MISMATCH; goto error_keep_entry; } /* * Do a local attach. We swap the consume and produce * queues for the attacher and deliver an attach event. */ result = queue_pair_notify_peer_local(true, *handle); if (result < VMCI_SUCCESS) goto error_keep_entry; my_produce_q = queue_pair_entry->consume_q; my_consume_q = queue_pair_entry->produce_q; goto out; } result = VMCI_ERROR_ALREADY_EXISTS; goto error_keep_entry; } my_produce_q = vmci_alloc_queue(produce_size, flags); if (!my_produce_q) { VMCI_LOG_WARNING(LGPFX"Error allocating pages for produce " "queue.\n"); result = VMCI_ERROR_NO_MEM; goto error; } my_consume_q = vmci_alloc_queue(consume_size, flags); if (!my_consume_q) { VMCI_LOG_WARNING(LGPFX"Error allocating pages for consume " "queue.\n"); result = VMCI_ERROR_NO_MEM; goto error; } queue_pair_entry = qp_guest_endpoint_create(*handle, peer, flags, produce_size, consume_size, my_produce_q, my_consume_q); if (!queue_pair_entry) { VMCI_LOG_WARNING(LGPFX"Error allocating memory in %s.\n", __FUNCTION__); result = VMCI_ERROR_NO_MEM; goto error; } result = vmci_alloc_ppn_set(my_produce_q, num_produce_pages, my_consume_q, num_consume_pages, &queue_pair_entry->ppn_set); if (result < VMCI_SUCCESS) { VMCI_LOG_WARNING(LGPFX"vmci_alloc_ppn_set failed.\n"); goto error; } /* * It's only necessary to notify the host if this queue pair will be * attached to from another context. */ if (queue_pair_entry->qp.flags & VMCI_QPFLAG_LOCAL) { /* Local create case. */ vmci_id context_id = vmci_get_context_id(); /* * Enforce similar checks on local queue pairs as we do for * regular ones. The handle's context must match the creator * or attacher context id (here they are both the current * context id) and the attach-only flag cannot exist during * create. We also ensure specified peer is this context or * an invalid one. */ if (queue_pair_entry->qp.handle.context != context_id || (queue_pair_entry->qp.peer != VMCI_INVALID_ID && queue_pair_entry->qp.peer != context_id)) { result = VMCI_ERROR_NO_ACCESS; goto error; } if (queue_pair_entry->qp.flags & VMCI_QPFLAG_ATTACH_ONLY) { result = VMCI_ERROR_NOT_FOUND; goto error; } } else { result = vmci_queue_pair_alloc_hypercall(queue_pair_entry); if (result < VMCI_SUCCESS) { VMCI_LOG_WARNING( LGPFX"vmci_queue_pair_alloc_hypercall result = " "%d.\n", result); goto error; } } queue_pair_list_add_entry(&qp_guest_endpoints, &queue_pair_entry->qp); out: queue_pair_entry->qp.ref_count++; *handle = queue_pair_entry->qp.handle; *produce_q = (struct vmci_queue *)my_produce_q; *consume_q = (struct vmci_queue *)my_consume_q; /* * We should initialize the queue pair header pages on a local queue * pair create. For non-local queue pairs, the hypervisor initializes * the header pages in the create step. */ if ((queue_pair_entry->qp.flags & VMCI_QPFLAG_LOCAL) && queue_pair_entry->qp.ref_count == 1) { vmci_queue_header_init((*produce_q)->q_header, *handle); vmci_queue_header_init((*consume_q)->q_header, *handle); } vmci_mutex_release(&qp_guest_endpoints.mutex); return (VMCI_SUCCESS); error: vmci_mutex_release(&qp_guest_endpoints.mutex); if (queue_pair_entry) { /* The queues will be freed inside the destroy routine. */ qp_guest_endpoint_destroy(queue_pair_entry); } else { if (my_produce_q) vmci_free_queue(my_produce_q, produce_size); if (my_consume_q) vmci_free_queue(my_consume_q, consume_size); } return (result); error_keep_entry: /* This path should only be used when an existing entry was found. */ ASSERT(queue_pair_entry->qp.ref_count > 0); vmci_mutex_release(&qp_guest_endpoints.mutex); return (result); } /* *------------------------------------------------------------------------------ * * vmci_queue_pair_detach_hypercall -- * * Helper to make a QueuePairDetach hypercall when the driver is supporting * a guest device. * * Results: * Result of the hypercall. * * Side effects: * None. * *------------------------------------------------------------------------------ */ int vmci_queue_pair_detach_hypercall(struct vmci_handle handle) { struct vmci_queue_pair_detach_msg detach_msg; detach_msg.hdr.dst = VMCI_MAKE_HANDLE(VMCI_HYPERVISOR_CONTEXT_ID, VMCI_QUEUEPAIR_DETACH); detach_msg.hdr.src = VMCI_ANON_SRC_HANDLE; detach_msg.hdr.payload_size = sizeof(handle); detach_msg.handle = handle; return (vmci_send_datagram((struct vmci_datagram *)&detach_msg)); } /* *------------------------------------------------------------------------------ * * vmci_queue_pair_detach_guest_work -- * * Helper for VMCI QueuePair detach interface. Frees the physical pages for * the queue pair. * * Results: * Success or failure. * * Side effects: * Memory may be freed. * *------------------------------------------------------------------------------ */ static int vmci_queue_pair_detach_guest_work(struct vmci_handle handle) { struct qp_guest_endpoint *entry; int result; uint32_t ref_count; ASSERT(!VMCI_HANDLE_INVALID(handle)); vmci_mutex_acquire(&qp_guest_endpoints.mutex); entry = (struct qp_guest_endpoint *)queue_pair_list_find_entry( &qp_guest_endpoints, handle); if (!entry) { vmci_mutex_release(&qp_guest_endpoints.mutex); return (VMCI_ERROR_NOT_FOUND); } ASSERT(entry->qp.ref_count >= 1); if (entry->qp.flags & VMCI_QPFLAG_LOCAL) { result = VMCI_SUCCESS; if (entry->qp.ref_count > 1) { result = queue_pair_notify_peer_local(false, handle); /* * We can fail to notify a local queuepair because we * can't allocate. We still want to release the entry * if that happens, so don't bail out yet. */ } } else { result = vmci_queue_pair_detach_hypercall(handle); if (entry->hibernate_failure) { if (result == VMCI_ERROR_NOT_FOUND) { /* * If a queue pair detach failed when entering * hibernation, the guest driver and the device * may disagree on its existence when coming * out of hibernation. The guest driver will * regard it as a non-local queue pair, but * the device state is gone, since the device * has been powered off. In this case, we * treat the queue pair as a local queue pair * with no peer. */ ASSERT(entry->qp.ref_count == 1); result = VMCI_SUCCESS; } } if (result < VMCI_SUCCESS) { /* * We failed to notify a non-local queuepair. That other * queuepair might still be accessing the shared * memory, so don't release the entry yet. It will get * cleaned up by vmci_queue_pair_Exit() if necessary * (assuming we are going away, otherwise why did this * fail?). */ vmci_mutex_release(&qp_guest_endpoints.mutex); return (result); } } /* * If we get here then we either failed to notify a local queuepair, or * we succeeded in all cases. Release the entry if required. */ entry->qp.ref_count--; if (entry->qp.ref_count == 0) queue_pair_list_remove_entry(&qp_guest_endpoints, &entry->qp); /* If we didn't remove the entry, this could change once we unlock. */ ref_count = entry ? entry->qp.ref_count : 0xffffffff; /* * Value does not matter, silence the * compiler. */ vmci_mutex_release(&qp_guest_endpoints.mutex); if (ref_count == 0) qp_guest_endpoint_destroy(entry); return (result); } /* *------------------------------------------------------------------------------ * * queue_pair_notify_peer_local -- * * Dispatches a queue pair event message directly into the local event * queue. * * Results: * VMCI_SUCCESS on success, error code otherwise * * Side effects: * None. * *------------------------------------------------------------------------------ */ static int queue_pair_notify_peer_local(bool attach, struct vmci_handle handle) { struct vmci_event_msg *e_msg; struct vmci_event_payload_qp *e_payload; /* buf is only 48 bytes. */ vmci_id context_id; context_id = vmci_get_context_id(); char buf[sizeof(*e_msg) + sizeof(*e_payload)]; e_msg = (struct vmci_event_msg *)buf; e_payload = vmci_event_msg_payload(e_msg); e_msg->hdr.dst = VMCI_MAKE_HANDLE(context_id, VMCI_EVENT_HANDLER); e_msg->hdr.src = VMCI_MAKE_HANDLE(VMCI_HYPERVISOR_CONTEXT_ID, VMCI_CONTEXT_RESOURCE_ID); e_msg->hdr.payload_size = sizeof(*e_msg) + sizeof(*e_payload) - sizeof(e_msg->hdr); e_msg->event_data.event = attach ? VMCI_EVENT_QP_PEER_ATTACH : VMCI_EVENT_QP_PEER_DETACH; e_payload->peer_id = context_id; e_payload->handle = handle; return (vmci_event_dispatch((struct vmci_datagram *)e_msg)); }