diff --git a/sys/dev/vmware/vmxnet3/if_vmx.c b/sys/dev/vmware/vmxnet3/if_vmx.c index fdcad0dd4bba..62b5f313a137 100644 --- a/sys/dev/vmware/vmxnet3/if_vmx.c +++ b/sys/dev/vmware/vmxnet3/if_vmx.c @@ -1,2544 +1,2545 @@ /*- * Copyright (c) 2013 Tsubai Masanari * Copyright (c) 2013 Bryan Venteicher * Copyright (c) 2018 Patrick Kelsey * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above * copyright notice and this permission notice appear in all copies. * * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. * * $OpenBSD: src/sys/dev/pci/if_vmx.c,v 1.11 2013/06/22 00:28:10 uebayasi Exp $ */ /* Driver for VMware vmxnet3 virtual ethernet devices. */ #include #include "opt_rss.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef RSS #include #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include "ifdi_if.h" #include "if_vmxreg.h" #include "if_vmxvar.h" #include "opt_inet.h" #include "opt_inet6.h" #define VMXNET3_VMWARE_VENDOR_ID 0x15AD #define VMXNET3_VMWARE_DEVICE_ID 0x07B0 static const pci_vendor_info_t vmxnet3_vendor_info_array[] = { PVID(VMXNET3_VMWARE_VENDOR_ID, VMXNET3_VMWARE_DEVICE_ID, "VMware VMXNET3 Ethernet Adapter"), /* required last entry */ PVID_END }; static void *vmxnet3_register(device_t); static int vmxnet3_attach_pre(if_ctx_t); static int vmxnet3_msix_intr_assign(if_ctx_t, int); static void vmxnet3_free_irqs(struct vmxnet3_softc *); static int vmxnet3_attach_post(if_ctx_t); static int vmxnet3_detach(if_ctx_t); static int vmxnet3_shutdown(if_ctx_t); static int vmxnet3_suspend(if_ctx_t); static int vmxnet3_resume(if_ctx_t); static int vmxnet3_alloc_resources(struct vmxnet3_softc *); static void vmxnet3_free_resources(struct vmxnet3_softc *); static int vmxnet3_check_version(struct vmxnet3_softc *); static void vmxnet3_set_interrupt_idx(struct vmxnet3_softc *); static int vmxnet3_queues_shared_alloc(struct vmxnet3_softc *); static void vmxnet3_init_txq(struct vmxnet3_softc *, int); static int vmxnet3_tx_queues_alloc(if_ctx_t, caddr_t *, uint64_t *, int, int); static void vmxnet3_init_rxq(struct vmxnet3_softc *, int, int); static int vmxnet3_rx_queues_alloc(if_ctx_t, caddr_t *, uint64_t *, int, int); static void vmxnet3_queues_free(if_ctx_t); static int vmxnet3_alloc_shared_data(struct vmxnet3_softc *); static void vmxnet3_free_shared_data(struct vmxnet3_softc *); static int vmxnet3_alloc_mcast_table(struct vmxnet3_softc *); static void vmxnet3_free_mcast_table(struct vmxnet3_softc *); static void vmxnet3_init_shared_data(struct vmxnet3_softc *); static void vmxnet3_reinit_rss_shared_data(struct vmxnet3_softc *); static void vmxnet3_reinit_shared_data(struct vmxnet3_softc *); static int vmxnet3_alloc_data(struct vmxnet3_softc *); static void vmxnet3_free_data(struct vmxnet3_softc *); static void vmxnet3_evintr(struct vmxnet3_softc *); static int vmxnet3_isc_txd_encap(void *, if_pkt_info_t); static void vmxnet3_isc_txd_flush(void *, uint16_t, qidx_t); static int vmxnet3_isc_txd_credits_update(void *, uint16_t, bool); static int vmxnet3_isc_rxd_available(void *, uint16_t, qidx_t, qidx_t); static int vmxnet3_isc_rxd_pkt_get(void *, if_rxd_info_t); static void vmxnet3_isc_rxd_refill(void *, if_rxd_update_t); static void vmxnet3_isc_rxd_flush(void *, uint16_t, uint8_t, qidx_t); static int vmxnet3_legacy_intr(void *); static int vmxnet3_rxq_intr(void *); static int vmxnet3_event_intr(void *); static void vmxnet3_stop(if_ctx_t); static void vmxnet3_txinit(struct vmxnet3_softc *, struct vmxnet3_txqueue *); static void vmxnet3_rxinit(struct vmxnet3_softc *, struct vmxnet3_rxqueue *); static void vmxnet3_reinit_queues(struct vmxnet3_softc *); static int vmxnet3_enable_device(struct vmxnet3_softc *); static void vmxnet3_reinit_rxfilters(struct vmxnet3_softc *); static void vmxnet3_init(if_ctx_t); static void vmxnet3_multi_set(if_ctx_t); static int vmxnet3_mtu_set(if_ctx_t, uint32_t); static void vmxnet3_media_status(if_ctx_t, struct ifmediareq *); static int vmxnet3_media_change(if_ctx_t); static int vmxnet3_promisc_set(if_ctx_t, int); static uint64_t vmxnet3_get_counter(if_ctx_t, ift_counter); static void vmxnet3_update_admin_status(if_ctx_t); static void vmxnet3_txq_timer(if_ctx_t, uint16_t); static void vmxnet3_update_vlan_filter(struct vmxnet3_softc *, int, uint16_t); static void vmxnet3_vlan_register(if_ctx_t, uint16_t); static void vmxnet3_vlan_unregister(if_ctx_t, uint16_t); static void vmxnet3_set_rxfilter(struct vmxnet3_softc *, int); static void vmxnet3_refresh_host_stats(struct vmxnet3_softc *); static int vmxnet3_link_is_up(struct vmxnet3_softc *); static void vmxnet3_link_status(struct vmxnet3_softc *); static void vmxnet3_set_lladdr(struct vmxnet3_softc *); static void vmxnet3_get_lladdr(struct vmxnet3_softc *); static void vmxnet3_setup_txq_sysctl(struct vmxnet3_txqueue *, struct sysctl_ctx_list *, struct sysctl_oid_list *); static void vmxnet3_setup_rxq_sysctl(struct vmxnet3_rxqueue *, struct sysctl_ctx_list *, struct sysctl_oid_list *); static void vmxnet3_setup_queue_sysctl(struct vmxnet3_softc *, struct sysctl_ctx_list *, struct sysctl_oid_list *); static void vmxnet3_setup_sysctl(struct vmxnet3_softc *); static void vmxnet3_write_bar0(struct vmxnet3_softc *, bus_size_t, uint32_t); static uint32_t vmxnet3_read_bar1(struct vmxnet3_softc *, bus_size_t); static void vmxnet3_write_bar1(struct vmxnet3_softc *, bus_size_t, uint32_t); static void vmxnet3_write_cmd(struct vmxnet3_softc *, uint32_t); static uint32_t vmxnet3_read_cmd(struct vmxnet3_softc *, uint32_t); static int vmxnet3_tx_queue_intr_enable(if_ctx_t, uint16_t); static int vmxnet3_rx_queue_intr_enable(if_ctx_t, uint16_t); static void vmxnet3_link_intr_enable(if_ctx_t); static void vmxnet3_enable_intr(struct vmxnet3_softc *, int); static void vmxnet3_disable_intr(struct vmxnet3_softc *, int); static void vmxnet3_intr_enable_all(if_ctx_t); static void vmxnet3_intr_disable_all(if_ctx_t); static bool vmxnet3_if_needs_restart(if_ctx_t, enum iflib_restart_event); typedef enum { VMXNET3_BARRIER_RD, VMXNET3_BARRIER_WR, VMXNET3_BARRIER_RDWR, } vmxnet3_barrier_t; static void vmxnet3_barrier(struct vmxnet3_softc *, vmxnet3_barrier_t); static device_method_t vmxnet3_methods[] = { /* Device interface */ DEVMETHOD(device_register, vmxnet3_register), DEVMETHOD(device_probe, iflib_device_probe), DEVMETHOD(device_attach, iflib_device_attach), DEVMETHOD(device_detach, iflib_device_detach), DEVMETHOD(device_shutdown, iflib_device_shutdown), DEVMETHOD(device_suspend, iflib_device_suspend), DEVMETHOD(device_resume, iflib_device_resume), DEVMETHOD_END }; static driver_t vmxnet3_driver = { "vmx", vmxnet3_methods, sizeof(struct vmxnet3_softc) }; DRIVER_MODULE(vmx, pci, vmxnet3_driver, 0, 0); IFLIB_PNP_INFO(pci, vmx, vmxnet3_vendor_info_array); MODULE_VERSION(vmx, 2); MODULE_DEPEND(vmx, pci, 1, 1, 1); MODULE_DEPEND(vmx, ether, 1, 1, 1); MODULE_DEPEND(vmx, iflib, 1, 1, 1); static device_method_t vmxnet3_iflib_methods[] = { DEVMETHOD(ifdi_tx_queues_alloc, vmxnet3_tx_queues_alloc), DEVMETHOD(ifdi_rx_queues_alloc, vmxnet3_rx_queues_alloc), DEVMETHOD(ifdi_queues_free, vmxnet3_queues_free), DEVMETHOD(ifdi_attach_pre, vmxnet3_attach_pre), DEVMETHOD(ifdi_attach_post, vmxnet3_attach_post), DEVMETHOD(ifdi_detach, vmxnet3_detach), DEVMETHOD(ifdi_init, vmxnet3_init), DEVMETHOD(ifdi_stop, vmxnet3_stop), DEVMETHOD(ifdi_multi_set, vmxnet3_multi_set), DEVMETHOD(ifdi_mtu_set, vmxnet3_mtu_set), DEVMETHOD(ifdi_media_status, vmxnet3_media_status), DEVMETHOD(ifdi_media_change, vmxnet3_media_change), DEVMETHOD(ifdi_promisc_set, vmxnet3_promisc_set), DEVMETHOD(ifdi_get_counter, vmxnet3_get_counter), DEVMETHOD(ifdi_update_admin_status, vmxnet3_update_admin_status), DEVMETHOD(ifdi_timer, vmxnet3_txq_timer), DEVMETHOD(ifdi_tx_queue_intr_enable, vmxnet3_tx_queue_intr_enable), DEVMETHOD(ifdi_rx_queue_intr_enable, vmxnet3_rx_queue_intr_enable), DEVMETHOD(ifdi_link_intr_enable, vmxnet3_link_intr_enable), DEVMETHOD(ifdi_intr_enable, vmxnet3_intr_enable_all), DEVMETHOD(ifdi_intr_disable, vmxnet3_intr_disable_all), DEVMETHOD(ifdi_msix_intr_assign, vmxnet3_msix_intr_assign), DEVMETHOD(ifdi_vlan_register, vmxnet3_vlan_register), DEVMETHOD(ifdi_vlan_unregister, vmxnet3_vlan_unregister), DEVMETHOD(ifdi_shutdown, vmxnet3_shutdown), DEVMETHOD(ifdi_suspend, vmxnet3_suspend), DEVMETHOD(ifdi_resume, vmxnet3_resume), DEVMETHOD(ifdi_needs_restart, vmxnet3_if_needs_restart), DEVMETHOD_END }; static driver_t vmxnet3_iflib_driver = { "vmx", vmxnet3_iflib_methods, sizeof(struct vmxnet3_softc) }; struct if_txrx vmxnet3_txrx = { .ift_txd_encap = vmxnet3_isc_txd_encap, .ift_txd_flush = vmxnet3_isc_txd_flush, .ift_txd_credits_update = vmxnet3_isc_txd_credits_update, .ift_rxd_available = vmxnet3_isc_rxd_available, .ift_rxd_pkt_get = vmxnet3_isc_rxd_pkt_get, .ift_rxd_refill = vmxnet3_isc_rxd_refill, .ift_rxd_flush = vmxnet3_isc_rxd_flush, .ift_legacy_intr = vmxnet3_legacy_intr }; static struct if_shared_ctx vmxnet3_sctx_init = { .isc_magic = IFLIB_MAGIC, .isc_q_align = 512, .isc_tx_maxsize = VMXNET3_TX_MAXSIZE, .isc_tx_maxsegsize = VMXNET3_TX_MAXSEGSIZE, .isc_tso_maxsize = VMXNET3_TSO_MAXSIZE + sizeof(struct ether_vlan_header), .isc_tso_maxsegsize = VMXNET3_TX_MAXSEGSIZE, /* * These values are used to configure the busdma tag used for * receive descriptors. Each receive descriptor only points to one * buffer. */ .isc_rx_maxsize = VMXNET3_RX_MAXSEGSIZE, /* One buf per descriptor */ .isc_rx_nsegments = 1, /* One mapping per descriptor */ .isc_rx_maxsegsize = VMXNET3_RX_MAXSEGSIZE, .isc_admin_intrcnt = 1, .isc_vendor_info = vmxnet3_vendor_info_array, .isc_driver_version = "2", .isc_driver = &vmxnet3_iflib_driver, .isc_flags = IFLIB_HAS_RXCQ | IFLIB_HAS_TXCQ | IFLIB_SINGLE_IRQ_RX_ONLY, /* * Number of receive queues per receive queue set, with associated * descriptor settings for each. */ .isc_nrxqs = 3, .isc_nfl = 2, /* one free list for each receive command queue */ .isc_nrxd_min = {VMXNET3_MIN_RX_NDESC, VMXNET3_MIN_RX_NDESC, VMXNET3_MIN_RX_NDESC}, .isc_nrxd_max = {VMXNET3_MAX_RX_NDESC, VMXNET3_MAX_RX_NDESC, VMXNET3_MAX_RX_NDESC}, .isc_nrxd_default = {VMXNET3_DEF_RX_NDESC, VMXNET3_DEF_RX_NDESC, VMXNET3_DEF_RX_NDESC}, /* * Number of transmit queues per transmit queue set, with associated * descriptor settings for each. */ .isc_ntxqs = 2, .isc_ntxd_min = {VMXNET3_MIN_TX_NDESC, VMXNET3_MIN_TX_NDESC}, .isc_ntxd_max = {VMXNET3_MAX_TX_NDESC, VMXNET3_MAX_TX_NDESC}, .isc_ntxd_default = {VMXNET3_DEF_TX_NDESC, VMXNET3_DEF_TX_NDESC}, }; static void * vmxnet3_register(device_t dev) { return (&vmxnet3_sctx_init); } static int trunc_powerof2(int val) { return (1U << (fls(val) - 1)); } static int vmxnet3_attach_pre(if_ctx_t ctx) { device_t dev; if_softc_ctx_t scctx; struct vmxnet3_softc *sc; uint32_t intr_config; int error; dev = iflib_get_dev(ctx); sc = iflib_get_softc(ctx); sc->vmx_dev = dev; sc->vmx_ctx = ctx; sc->vmx_sctx = iflib_get_sctx(ctx); sc->vmx_scctx = iflib_get_softc_ctx(ctx); sc->vmx_ifp = iflib_get_ifp(ctx); sc->vmx_media = iflib_get_media(ctx); scctx = sc->vmx_scctx; scctx->isc_tx_nsegments = VMXNET3_TX_MAXSEGS; scctx->isc_tx_tso_segments_max = VMXNET3_TX_MAXSEGS; /* isc_tx_tso_size_max doesn't include possible vlan header */ scctx->isc_tx_tso_size_max = VMXNET3_TSO_MAXSIZE; scctx->isc_tx_tso_segsize_max = VMXNET3_TX_MAXSEGSIZE; scctx->isc_txrx = &vmxnet3_txrx; /* If 0, the iflib tunable was not set, so set to the default */ if (scctx->isc_nrxqsets == 0) scctx->isc_nrxqsets = VMXNET3_DEF_RX_QUEUES; scctx->isc_nrxqsets = trunc_powerof2(scctx->isc_nrxqsets); scctx->isc_nrxqsets_max = min(VMXNET3_MAX_RX_QUEUES, mp_ncpus); scctx->isc_nrxqsets_max = trunc_powerof2(scctx->isc_nrxqsets_max); /* If 0, the iflib tunable was not set, so set to the default */ if (scctx->isc_ntxqsets == 0) scctx->isc_ntxqsets = VMXNET3_DEF_TX_QUEUES; scctx->isc_ntxqsets = trunc_powerof2(scctx->isc_ntxqsets); scctx->isc_ntxqsets_max = min(VMXNET3_MAX_TX_QUEUES, mp_ncpus); scctx->isc_ntxqsets_max = trunc_powerof2(scctx->isc_ntxqsets_max); /* * Enforce that the transmit completion queue descriptor count is * the same as the transmit command queue descriptor count. */ scctx->isc_ntxd[0] = scctx->isc_ntxd[1]; scctx->isc_txqsizes[0] = sizeof(struct vmxnet3_txcompdesc) * scctx->isc_ntxd[0]; scctx->isc_txqsizes[1] = sizeof(struct vmxnet3_txdesc) * scctx->isc_ntxd[1]; /* * Enforce that the receive completion queue descriptor count is the * sum of the receive command queue descriptor counts, and that the * second receive command queue descriptor count is the same as the * first one. */ scctx->isc_nrxd[2] = scctx->isc_nrxd[1]; scctx->isc_nrxd[0] = scctx->isc_nrxd[1] + scctx->isc_nrxd[2]; scctx->isc_rxqsizes[0] = sizeof(struct vmxnet3_rxcompdesc) * scctx->isc_nrxd[0]; scctx->isc_rxqsizes[1] = sizeof(struct vmxnet3_rxdesc) * scctx->isc_nrxd[1]; scctx->isc_rxqsizes[2] = sizeof(struct vmxnet3_rxdesc) * scctx->isc_nrxd[2]; /* * Initialize the max frame size and descriptor queue buffer * sizes. */ vmxnet3_mtu_set(ctx, if_getmtu(sc->vmx_ifp)); scctx->isc_rss_table_size = UPT1_RSS_MAX_IND_TABLE_SIZE; /* Map PCI BARs */ error = vmxnet3_alloc_resources(sc); if (error) goto fail; /* Check device versions */ error = vmxnet3_check_version(sc); if (error) goto fail; /* * The interrupt mode can be set in the hypervisor configuration via * the parameter ethernet.intrMode. */ intr_config = vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_INTRCFG); sc->vmx_intr_mask_mode = (intr_config >> 2) & 0x03; /* * Configure the softc context to attempt to configure the interrupt * mode now indicated by intr_config. iflib will follow the usual * fallback path MSI-X -> MSI -> LEGACY, starting at the configured * starting mode. */ switch (intr_config & 0x03) { case VMXNET3_IT_AUTO: case VMXNET3_IT_MSIX: scctx->isc_msix_bar = pci_msix_table_bar(dev); break; case VMXNET3_IT_MSI: scctx->isc_msix_bar = -1; scctx->isc_disable_msix = 1; break; case VMXNET3_IT_LEGACY: scctx->isc_msix_bar = 0; break; } scctx->isc_tx_csum_flags = VMXNET3_CSUM_ALL_OFFLOAD; scctx->isc_capabilities = scctx->isc_capenable = IFCAP_TXCSUM | IFCAP_TXCSUM_IPV6 | IFCAP_TSO4 | IFCAP_TSO6 | IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6 | IFCAP_VLAN_MTU | IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWCSUM | IFCAP_VLAN_HWTSO | IFCAP_JUMBO_MTU; /* These capabilities are not enabled by default. */ scctx->isc_capabilities |= IFCAP_LRO | IFCAP_VLAN_HWFILTER; vmxnet3_get_lladdr(sc); iflib_set_mac(ctx, sc->vmx_lladdr); return (0); fail: /* * We must completely clean up anything allocated above as iflib * will not invoke any other driver entry points as a result of this * failure. */ vmxnet3_free_resources(sc); return (error); } static int vmxnet3_msix_intr_assign(if_ctx_t ctx, int msix) { struct vmxnet3_softc *sc; if_softc_ctx_t scctx; struct vmxnet3_rxqueue *rxq; int error; int i; char irq_name[16]; sc = iflib_get_softc(ctx); scctx = sc->vmx_scctx; for (i = 0; i < scctx->isc_nrxqsets; i++) { snprintf(irq_name, sizeof(irq_name), "rxq%d", i); rxq = &sc->vmx_rxq[i]; error = iflib_irq_alloc_generic(ctx, &rxq->vxrxq_irq, i + 1, IFLIB_INTR_RXTX, vmxnet3_rxq_intr, rxq, i, irq_name); if (error) { device_printf(iflib_get_dev(ctx), "Failed to register rxq %d interrupt handler\n", i); return (error); } } for (i = 0; i < scctx->isc_ntxqsets; i++) { snprintf(irq_name, sizeof(irq_name), "txq%d", i); /* * Don't provide the corresponding rxq irq for reference - * we want the transmit task to be attached to a task queue * that is different from the one used by the corresponding * rxq irq. That is because the TX doorbell writes are very * expensive as virtualized MMIO operations, so we want to * be able to defer them to another core when possible so * that they don't steal receive processing cycles during * stack turnarounds like TCP ACK generation. The other * piece to this approach is enabling the iflib abdicate * option (currently via an interface-specific * tunable/sysctl). */ iflib_softirq_alloc_generic(ctx, NULL, IFLIB_INTR_TX, NULL, i, irq_name); } error = iflib_irq_alloc_generic(ctx, &sc->vmx_event_intr_irq, scctx->isc_nrxqsets + 1, IFLIB_INTR_ADMIN, vmxnet3_event_intr, sc, 0, "event"); if (error) { device_printf(iflib_get_dev(ctx), "Failed to register event interrupt handler\n"); return (error); } return (0); } static void vmxnet3_free_irqs(struct vmxnet3_softc *sc) { if_softc_ctx_t scctx; struct vmxnet3_rxqueue *rxq; int i; scctx = sc->vmx_scctx; for (i = 0; i < scctx->isc_nrxqsets; i++) { rxq = &sc->vmx_rxq[i]; iflib_irq_free(sc->vmx_ctx, &rxq->vxrxq_irq); } iflib_irq_free(sc->vmx_ctx, &sc->vmx_event_intr_irq); } static int vmxnet3_attach_post(if_ctx_t ctx) { if_softc_ctx_t scctx; struct vmxnet3_softc *sc; int error; scctx = iflib_get_softc_ctx(ctx); sc = iflib_get_softc(ctx); if (scctx->isc_nrxqsets > 1) sc->vmx_flags |= VMXNET3_FLAG_RSS; error = vmxnet3_alloc_data(sc); if (error) goto fail; vmxnet3_set_interrupt_idx(sc); vmxnet3_setup_sysctl(sc); ifmedia_add(sc->vmx_media, IFM_ETHER | IFM_AUTO, 0, NULL); ifmedia_set(sc->vmx_media, IFM_ETHER | IFM_AUTO); fail: return (error); } static int vmxnet3_detach(if_ctx_t ctx) { struct vmxnet3_softc *sc; sc = iflib_get_softc(ctx); vmxnet3_free_irqs(sc); vmxnet3_free_data(sc); vmxnet3_free_resources(sc); return (0); } static int vmxnet3_shutdown(if_ctx_t ctx) { return (0); } static int vmxnet3_suspend(if_ctx_t ctx) { return (0); } static int vmxnet3_resume(if_ctx_t ctx) { return (0); } static int vmxnet3_alloc_resources(struct vmxnet3_softc *sc) { device_t dev; int rid; dev = sc->vmx_dev; rid = PCIR_BAR(0); sc->vmx_res0 = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, RF_ACTIVE); if (sc->vmx_res0 == NULL) { device_printf(dev, "could not map BAR0 memory\n"); return (ENXIO); } sc->vmx_iot0 = rman_get_bustag(sc->vmx_res0); sc->vmx_ioh0 = rman_get_bushandle(sc->vmx_res0); rid = PCIR_BAR(1); sc->vmx_res1 = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, RF_ACTIVE); if (sc->vmx_res1 == NULL) { device_printf(dev, "could not map BAR1 memory\n"); return (ENXIO); } sc->vmx_iot1 = rman_get_bustag(sc->vmx_res1); sc->vmx_ioh1 = rman_get_bushandle(sc->vmx_res1); return (0); } static void vmxnet3_free_resources(struct vmxnet3_softc *sc) { device_t dev; dev = sc->vmx_dev; if (sc->vmx_res0 != NULL) { bus_release_resource(dev, SYS_RES_MEMORY, rman_get_rid(sc->vmx_res0), sc->vmx_res0); sc->vmx_res0 = NULL; } if (sc->vmx_res1 != NULL) { bus_release_resource(dev, SYS_RES_MEMORY, rman_get_rid(sc->vmx_res1), sc->vmx_res1); sc->vmx_res1 = NULL; } } static int vmxnet3_check_version(struct vmxnet3_softc *sc) { device_t dev; uint32_t version; dev = sc->vmx_dev; version = vmxnet3_read_bar1(sc, VMXNET3_BAR1_VRRS); if ((version & 0x01) == 0) { device_printf(dev, "unsupported hardware version %#x\n", version); return (ENOTSUP); } vmxnet3_write_bar1(sc, VMXNET3_BAR1_VRRS, 1); version = vmxnet3_read_bar1(sc, VMXNET3_BAR1_UVRS); if ((version & 0x01) == 0) { device_printf(dev, "unsupported UPT version %#x\n", version); return (ENOTSUP); } vmxnet3_write_bar1(sc, VMXNET3_BAR1_UVRS, 1); return (0); } static void vmxnet3_set_interrupt_idx(struct vmxnet3_softc *sc) { if_softc_ctx_t scctx; struct vmxnet3_txqueue *txq; struct vmxnet3_txq_shared *txs; struct vmxnet3_rxqueue *rxq; struct vmxnet3_rxq_shared *rxs; int intr_idx; int i; scctx = sc->vmx_scctx; /* * There is always one interrupt per receive queue, assigned * starting with the first interrupt. When there is only one * interrupt available, the event interrupt shares the receive queue * interrupt, otherwise it uses the interrupt following the last * receive queue interrupt. Transmit queues are not assigned * interrupts, so they are given indexes beyond the indexes that * correspond to the real interrupts. */ /* The event interrupt is always the last vector. */ sc->vmx_event_intr_idx = scctx->isc_vectors - 1; intr_idx = 0; for (i = 0; i < scctx->isc_nrxqsets; i++, intr_idx++) { rxq = &sc->vmx_rxq[i]; rxs = rxq->vxrxq_rs; rxq->vxrxq_intr_idx = intr_idx; rxs->intr_idx = rxq->vxrxq_intr_idx; } /* * Assign the tx queues interrupt indexes above what we are actually * using. These interrupts will never be enabled. */ intr_idx = scctx->isc_vectors; for (i = 0; i < scctx->isc_ntxqsets; i++, intr_idx++) { txq = &sc->vmx_txq[i]; txs = txq->vxtxq_ts; txq->vxtxq_intr_idx = intr_idx; txs->intr_idx = txq->vxtxq_intr_idx; } } static int vmxnet3_queues_shared_alloc(struct vmxnet3_softc *sc) { if_softc_ctx_t scctx; int size; int error; scctx = sc->vmx_scctx; /* * The txq and rxq shared data areas must be allocated contiguously * as vmxnet3_driver_shared contains only a single address member * for the shared queue data area. */ size = scctx->isc_ntxqsets * sizeof(struct vmxnet3_txq_shared) + scctx->isc_nrxqsets * sizeof(struct vmxnet3_rxq_shared); error = iflib_dma_alloc_align(sc->vmx_ctx, size, 128, &sc->vmx_qs_dma, 0); if (error) { device_printf(sc->vmx_dev, "cannot alloc queue shared memory\n"); return (error); } return (0); } static void vmxnet3_init_txq(struct vmxnet3_softc *sc, int q) { struct vmxnet3_txqueue *txq; struct vmxnet3_comp_ring *txc; struct vmxnet3_txring *txr; if_softc_ctx_t scctx; txq = &sc->vmx_txq[q]; txc = &txq->vxtxq_comp_ring; txr = &txq->vxtxq_cmd_ring; scctx = sc->vmx_scctx; snprintf(txq->vxtxq_name, sizeof(txq->vxtxq_name), "%s-tx%d", device_get_nameunit(sc->vmx_dev), q); txq->vxtxq_sc = sc; txq->vxtxq_id = q; txc->vxcr_ndesc = scctx->isc_ntxd[0]; txr->vxtxr_ndesc = scctx->isc_ntxd[1]; } static int vmxnet3_tx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs, int ntxqs, int ntxqsets) { struct vmxnet3_softc *sc; int q; int error; caddr_t kva; sc = iflib_get_softc(ctx); /* Allocate the array of transmit queues */ sc->vmx_txq = malloc(sizeof(struct vmxnet3_txqueue) * ntxqsets, M_DEVBUF, M_NOWAIT | M_ZERO); if (sc->vmx_txq == NULL) return (ENOMEM); /* Initialize driver state for each transmit queue */ for (q = 0; q < ntxqsets; q++) vmxnet3_init_txq(sc, q); /* * Allocate queue state that is shared with the device. This check * and call is performed in both vmxnet3_tx_queues_alloc() and * vmxnet3_rx_queues_alloc() so that we don't have to care which * order iflib invokes those routines in. */ if (sc->vmx_qs_dma.idi_size == 0) { error = vmxnet3_queues_shared_alloc(sc); if (error) return (error); } kva = sc->vmx_qs_dma.idi_vaddr; for (q = 0; q < ntxqsets; q++) { sc->vmx_txq[q].vxtxq_ts = (struct vmxnet3_txq_shared *) kva; kva += sizeof(struct vmxnet3_txq_shared); } /* Record descriptor ring vaddrs and paddrs */ for (q = 0; q < ntxqsets; q++) { struct vmxnet3_txqueue *txq; struct vmxnet3_txring *txr; struct vmxnet3_comp_ring *txc; txq = &sc->vmx_txq[q]; txc = &txq->vxtxq_comp_ring; txr = &txq->vxtxq_cmd_ring; /* Completion ring */ txc->vxcr_u.txcd = (struct vmxnet3_txcompdesc *) vaddrs[q * ntxqs + 0]; txc->vxcr_paddr = paddrs[q * ntxqs + 0]; /* Command ring */ txr->vxtxr_txd = (struct vmxnet3_txdesc *) vaddrs[q * ntxqs + 1]; txr->vxtxr_paddr = paddrs[q * ntxqs + 1]; } return (0); } static void vmxnet3_init_rxq(struct vmxnet3_softc *sc, int q, int nrxqs) { struct vmxnet3_rxqueue *rxq; struct vmxnet3_comp_ring *rxc; struct vmxnet3_rxring *rxr; if_softc_ctx_t scctx; int i; rxq = &sc->vmx_rxq[q]; rxc = &rxq->vxrxq_comp_ring; scctx = sc->vmx_scctx; snprintf(rxq->vxrxq_name, sizeof(rxq->vxrxq_name), "%s-rx%d", device_get_nameunit(sc->vmx_dev), q); rxq->vxrxq_sc = sc; rxq->vxrxq_id = q; /* * First rxq is the completion queue, so there are nrxqs - 1 command * rings starting at iflib queue id 1. */ rxc->vxcr_ndesc = scctx->isc_nrxd[0]; for (i = 0; i < nrxqs - 1; i++) { rxr = &rxq->vxrxq_cmd_ring[i]; rxr->vxrxr_ndesc = scctx->isc_nrxd[i + 1]; } } static int vmxnet3_rx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs, int nrxqs, int nrxqsets) { struct vmxnet3_softc *sc; if_softc_ctx_t scctx; int q; int i; int error; caddr_t kva; sc = iflib_get_softc(ctx); scctx = sc->vmx_scctx; /* Allocate the array of receive queues */ sc->vmx_rxq = malloc(sizeof(struct vmxnet3_rxqueue) * nrxqsets, M_DEVBUF, M_NOWAIT | M_ZERO); if (sc->vmx_rxq == NULL) return (ENOMEM); /* Initialize driver state for each receive queue */ for (q = 0; q < nrxqsets; q++) vmxnet3_init_rxq(sc, q, nrxqs); /* * Allocate queue state that is shared with the device. This check * and call is performed in both vmxnet3_tx_queues_alloc() and * vmxnet3_rx_queues_alloc() so that we don't have to care which * order iflib invokes those routines in. */ if (sc->vmx_qs_dma.idi_size == 0) { error = vmxnet3_queues_shared_alloc(sc); if (error) return (error); } kva = sc->vmx_qs_dma.idi_vaddr + scctx->isc_ntxqsets * sizeof(struct vmxnet3_txq_shared); for (q = 0; q < nrxqsets; q++) { sc->vmx_rxq[q].vxrxq_rs = (struct vmxnet3_rxq_shared *) kva; kva += sizeof(struct vmxnet3_rxq_shared); } /* Record descriptor ring vaddrs and paddrs */ for (q = 0; q < nrxqsets; q++) { struct vmxnet3_rxqueue *rxq; struct vmxnet3_rxring *rxr; struct vmxnet3_comp_ring *rxc; rxq = &sc->vmx_rxq[q]; rxc = &rxq->vxrxq_comp_ring; /* Completion ring */ rxc->vxcr_u.rxcd = (struct vmxnet3_rxcompdesc *) vaddrs[q * nrxqs + 0]; rxc->vxcr_paddr = paddrs[q * nrxqs + 0]; /* Command ring(s) */ for (i = 0; i < nrxqs - 1; i++) { rxr = &rxq->vxrxq_cmd_ring[i]; rxr->vxrxr_rxd = (struct vmxnet3_rxdesc *) vaddrs[q * nrxqs + 1 + i]; rxr->vxrxr_paddr = paddrs[q * nrxqs + 1 + i]; } } return (0); } static void vmxnet3_queues_free(if_ctx_t ctx) { struct vmxnet3_softc *sc; sc = iflib_get_softc(ctx); /* Free queue state area that is shared with the device */ if (sc->vmx_qs_dma.idi_size != 0) { iflib_dma_free(&sc->vmx_qs_dma); sc->vmx_qs_dma.idi_size = 0; } /* Free array of receive queues */ if (sc->vmx_rxq != NULL) { free(sc->vmx_rxq, M_DEVBUF); sc->vmx_rxq = NULL; } /* Free array of transmit queues */ if (sc->vmx_txq != NULL) { free(sc->vmx_txq, M_DEVBUF); sc->vmx_txq = NULL; } } static int vmxnet3_alloc_shared_data(struct vmxnet3_softc *sc) { device_t dev; size_t size; int error; dev = sc->vmx_dev; /* Top level state structure shared with the device */ size = sizeof(struct vmxnet3_driver_shared); error = iflib_dma_alloc_align(sc->vmx_ctx, size, 1, &sc->vmx_ds_dma, 0); if (error) { device_printf(dev, "cannot alloc shared memory\n"); return (error); } sc->vmx_ds = (struct vmxnet3_driver_shared *) sc->vmx_ds_dma.idi_vaddr; /* RSS table state shared with the device */ if (sc->vmx_flags & VMXNET3_FLAG_RSS) { size = sizeof(struct vmxnet3_rss_shared); error = iflib_dma_alloc_align(sc->vmx_ctx, size, 128, &sc->vmx_rss_dma, 0); if (error) { device_printf(dev, "cannot alloc rss shared memory\n"); return (error); } sc->vmx_rss = (struct vmxnet3_rss_shared *) sc->vmx_rss_dma.idi_vaddr; } return (0); } static void vmxnet3_free_shared_data(struct vmxnet3_softc *sc) { /* Free RSS table state shared with the device */ if (sc->vmx_rss != NULL) { iflib_dma_free(&sc->vmx_rss_dma); sc->vmx_rss = NULL; } /* Free top level state structure shared with the device */ if (sc->vmx_ds != NULL) { iflib_dma_free(&sc->vmx_ds_dma); sc->vmx_ds = NULL; } } static int vmxnet3_alloc_mcast_table(struct vmxnet3_softc *sc) { int error; /* Multicast table state shared with the device */ error = iflib_dma_alloc_align(sc->vmx_ctx, VMXNET3_MULTICAST_MAX * ETHER_ADDR_LEN, 32, &sc->vmx_mcast_dma, 0); if (error) device_printf(sc->vmx_dev, "unable to alloc multicast table\n"); else sc->vmx_mcast = sc->vmx_mcast_dma.idi_vaddr; return (error); } static void vmxnet3_free_mcast_table(struct vmxnet3_softc *sc) { /* Free multicast table state shared with the device */ if (sc->vmx_mcast != NULL) { iflib_dma_free(&sc->vmx_mcast_dma); sc->vmx_mcast = NULL; } } static void vmxnet3_init_shared_data(struct vmxnet3_softc *sc) { struct vmxnet3_driver_shared *ds; if_softc_ctx_t scctx; struct vmxnet3_txqueue *txq; struct vmxnet3_txq_shared *txs; struct vmxnet3_rxqueue *rxq; struct vmxnet3_rxq_shared *rxs; int i; ds = sc->vmx_ds; scctx = sc->vmx_scctx; /* * Initialize fields of the shared data that remains the same across * reinits. Note the shared data is zero'd when allocated. */ ds->magic = VMXNET3_REV1_MAGIC; /* DriverInfo */ ds->version = VMXNET3_DRIVER_VERSION; ds->guest = VMXNET3_GOS_FREEBSD | #ifdef __LP64__ VMXNET3_GOS_64BIT; #else VMXNET3_GOS_32BIT; #endif ds->vmxnet3_revision = 1; ds->upt_version = 1; /* Misc. conf */ ds->driver_data = vtophys(sc); ds->driver_data_len = sizeof(struct vmxnet3_softc); ds->queue_shared = sc->vmx_qs_dma.idi_paddr; ds->queue_shared_len = sc->vmx_qs_dma.idi_size; ds->nrxsg_max = IFLIB_MAX_RX_SEGS; /* RSS conf */ if (sc->vmx_flags & VMXNET3_FLAG_RSS) { ds->rss.version = 1; ds->rss.paddr = sc->vmx_rss_dma.idi_paddr; ds->rss.len = sc->vmx_rss_dma.idi_size; } /* Interrupt control. */ ds->automask = sc->vmx_intr_mask_mode == VMXNET3_IMM_AUTO; /* * Total number of interrupt indexes we are using in the shared * config data, even though we don't actually allocate interrupt * resources for the tx queues. Some versions of the device will * fail to initialize successfully if interrupt indexes are used in * the shared config that exceed the number of interrupts configured * here. */ ds->nintr = (scctx->isc_vectors == 1) ? 2 : (scctx->isc_nrxqsets + scctx->isc_ntxqsets + 1); ds->evintr = sc->vmx_event_intr_idx; ds->ictrl = VMXNET3_ICTRL_DISABLE_ALL; for (i = 0; i < ds->nintr; i++) ds->modlevel[i] = UPT1_IMOD_ADAPTIVE; /* Receive filter. */ ds->mcast_table = sc->vmx_mcast_dma.idi_paddr; ds->mcast_tablelen = sc->vmx_mcast_dma.idi_size; /* Tx queues */ for (i = 0; i < scctx->isc_ntxqsets; i++) { txq = &sc->vmx_txq[i]; txs = txq->vxtxq_ts; txs->cmd_ring = txq->vxtxq_cmd_ring.vxtxr_paddr; txs->cmd_ring_len = txq->vxtxq_cmd_ring.vxtxr_ndesc; txs->comp_ring = txq->vxtxq_comp_ring.vxcr_paddr; txs->comp_ring_len = txq->vxtxq_comp_ring.vxcr_ndesc; txs->driver_data = vtophys(txq); txs->driver_data_len = sizeof(struct vmxnet3_txqueue); } /* Rx queues */ for (i = 0; i < scctx->isc_nrxqsets; i++) { rxq = &sc->vmx_rxq[i]; rxs = rxq->vxrxq_rs; rxs->cmd_ring[0] = rxq->vxrxq_cmd_ring[0].vxrxr_paddr; rxs->cmd_ring_len[0] = rxq->vxrxq_cmd_ring[0].vxrxr_ndesc; rxs->cmd_ring[1] = rxq->vxrxq_cmd_ring[1].vxrxr_paddr; rxs->cmd_ring_len[1] = rxq->vxrxq_cmd_ring[1].vxrxr_ndesc; rxs->comp_ring = rxq->vxrxq_comp_ring.vxcr_paddr; rxs->comp_ring_len = rxq->vxrxq_comp_ring.vxcr_ndesc; rxs->driver_data = vtophys(rxq); rxs->driver_data_len = sizeof(struct vmxnet3_rxqueue); } } static void vmxnet3_reinit_rss_shared_data(struct vmxnet3_softc *sc) { /* * Use the same key as the Linux driver until FreeBSD can do * RSS (presumably Toeplitz) in software. */ static const uint8_t rss_key[UPT1_RSS_MAX_KEY_SIZE] = { 0x3b, 0x56, 0xd1, 0x56, 0x13, 0x4a, 0xe7, 0xac, 0xe8, 0x79, 0x09, 0x75, 0xe8, 0x65, 0x79, 0x28, 0x35, 0x12, 0xb9, 0x56, 0x7c, 0x76, 0x4b, 0x70, 0xd8, 0x56, 0xa3, 0x18, 0x9b, 0x0a, 0xee, 0xf3, 0x96, 0xa6, 0x9f, 0x8f, 0x9e, 0x8c, 0x90, 0xc9, }; if_softc_ctx_t scctx; struct vmxnet3_rss_shared *rss; #ifdef RSS uint8_t rss_algo; #endif int i; scctx = sc->vmx_scctx; rss = sc->vmx_rss; rss->hash_type = UPT1_RSS_HASH_TYPE_IPV4 | UPT1_RSS_HASH_TYPE_TCP_IPV4 | UPT1_RSS_HASH_TYPE_IPV6 | UPT1_RSS_HASH_TYPE_TCP_IPV6; rss->hash_func = UPT1_RSS_HASH_FUNC_TOEPLITZ; rss->hash_key_size = UPT1_RSS_MAX_KEY_SIZE; rss->ind_table_size = UPT1_RSS_MAX_IND_TABLE_SIZE; #ifdef RSS /* * If the software RSS is configured to anything else other than * Toeplitz, then just do Toeplitz in "hardware" for the sake of * the packet distribution, but report the hash as opaque to * disengage from the software RSS. */ rss_algo = rss_gethashalgo(); if (rss_algo == RSS_HASH_TOEPLITZ) { rss_getkey(rss->hash_key); for (i = 0; i < UPT1_RSS_MAX_IND_TABLE_SIZE; i++) { rss->ind_table[i] = rss_get_indirection_to_bucket(i) % scctx->isc_nrxqsets; } sc->vmx_flags |= VMXNET3_FLAG_SOFT_RSS; } else #endif { memcpy(rss->hash_key, rss_key, UPT1_RSS_MAX_KEY_SIZE); for (i = 0; i < UPT1_RSS_MAX_IND_TABLE_SIZE; i++) rss->ind_table[i] = i % scctx->isc_nrxqsets; sc->vmx_flags &= ~VMXNET3_FLAG_SOFT_RSS; } } static void vmxnet3_reinit_shared_data(struct vmxnet3_softc *sc) { if_t ifp; struct vmxnet3_driver_shared *ds; if_softc_ctx_t scctx; ifp = sc->vmx_ifp; ds = sc->vmx_ds; scctx = sc->vmx_scctx; ds->mtu = if_getmtu(ifp); ds->ntxqueue = scctx->isc_ntxqsets; ds->nrxqueue = scctx->isc_nrxqsets; ds->upt_features = 0; if (if_getcapenable(ifp) & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6)) ds->upt_features |= UPT1_F_CSUM; if (if_getcapenable(ifp) & IFCAP_VLAN_HWTAGGING) ds->upt_features |= UPT1_F_VLAN; if (if_getcapenable(ifp) & IFCAP_LRO) ds->upt_features |= UPT1_F_LRO; if (sc->vmx_flags & VMXNET3_FLAG_RSS) { ds->upt_features |= UPT1_F_RSS; vmxnet3_reinit_rss_shared_data(sc); } vmxnet3_write_bar1(sc, VMXNET3_BAR1_DSL, sc->vmx_ds_dma.idi_paddr); vmxnet3_write_bar1(sc, VMXNET3_BAR1_DSH, (uint64_t) sc->vmx_ds_dma.idi_paddr >> 32); } static int vmxnet3_alloc_data(struct vmxnet3_softc *sc) { int error; error = vmxnet3_alloc_shared_data(sc); if (error) return (error); error = vmxnet3_alloc_mcast_table(sc); if (error) return (error); vmxnet3_init_shared_data(sc); return (0); } static void vmxnet3_free_data(struct vmxnet3_softc *sc) { vmxnet3_free_mcast_table(sc); vmxnet3_free_shared_data(sc); } static void vmxnet3_evintr(struct vmxnet3_softc *sc) { device_t dev; struct vmxnet3_txq_shared *ts; struct vmxnet3_rxq_shared *rs; uint32_t event; dev = sc->vmx_dev; /* Clear events. */ event = sc->vmx_ds->event; vmxnet3_write_bar1(sc, VMXNET3_BAR1_EVENT, event); if (event & VMXNET3_EVENT_LINK) vmxnet3_link_status(sc); if (event & (VMXNET3_EVENT_TQERROR | VMXNET3_EVENT_RQERROR)) { vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_STATUS); ts = sc->vmx_txq[0].vxtxq_ts; if (ts->stopped != 0) device_printf(dev, "Tx queue error %#x\n", ts->error); rs = sc->vmx_rxq[0].vxrxq_rs; if (rs->stopped != 0) device_printf(dev, "Rx queue error %#x\n", rs->error); /* XXX - rely on liflib watchdog to reset us? */ device_printf(dev, "Rx/Tx queue error event ... " "waiting for iflib watchdog reset\n"); } if (event & VMXNET3_EVENT_DIC) device_printf(dev, "device implementation change event\n"); if (event & VMXNET3_EVENT_DEBUG) device_printf(dev, "debug event\n"); } static int vmxnet3_isc_txd_encap(void *vsc, if_pkt_info_t pi) { struct vmxnet3_softc *sc; struct vmxnet3_txqueue *txq; struct vmxnet3_txring *txr; struct vmxnet3_txdesc *txd, *sop; bus_dma_segment_t *segs; int nsegs; int pidx; int hdrlen; int i; int gen; sc = vsc; txq = &sc->vmx_txq[pi->ipi_qsidx]; txr = &txq->vxtxq_cmd_ring; segs = pi->ipi_segs; nsegs = pi->ipi_nsegs; pidx = pi->ipi_pidx; KASSERT(nsegs <= VMXNET3_TX_MAXSEGS, ("%s: packet with too many segments %d", __func__, nsegs)); sop = &txr->vxtxr_txd[pidx]; gen = txr->vxtxr_gen ^ 1; /* Owned by cpu (yet) */ for (i = 0; i < nsegs; i++) { txd = &txr->vxtxr_txd[pidx]; txd->addr = segs[i].ds_addr; txd->len = segs[i].ds_len; txd->gen = gen; txd->dtype = 0; txd->offload_mode = VMXNET3_OM_NONE; txd->offload_pos = 0; txd->hlen = 0; txd->eop = 0; txd->compreq = 0; txd->vtag_mode = 0; txd->vtag = 0; if (++pidx == txr->vxtxr_ndesc) { pidx = 0; txr->vxtxr_gen ^= 1; } gen = txr->vxtxr_gen; } txd->eop = 1; txd->compreq = !!(pi->ipi_flags & IPI_TX_INTR); pi->ipi_new_pidx = pidx; /* * VLAN */ if (pi->ipi_mflags & M_VLANTAG) { sop->vtag_mode = 1; sop->vtag = pi->ipi_vtag; } /* * TSO and checksum offloads */ hdrlen = pi->ipi_ehdrlen + pi->ipi_ip_hlen; if (pi->ipi_csum_flags & CSUM_TSO) { sop->offload_mode = VMXNET3_OM_TSO; sop->hlen = hdrlen + pi->ipi_tcp_hlen; sop->offload_pos = pi->ipi_tso_segsz; } else if (pi->ipi_csum_flags & (VMXNET3_CSUM_OFFLOAD | VMXNET3_CSUM_OFFLOAD_IPV6)) { sop->offload_mode = VMXNET3_OM_CSUM; sop->hlen = hdrlen; sop->offload_pos = hdrlen + ((pi->ipi_ipproto == IPPROTO_TCP) ? offsetof(struct tcphdr, th_sum) : offsetof(struct udphdr, uh_sum)); } /* Finally, change the ownership. */ vmxnet3_barrier(sc, VMXNET3_BARRIER_WR); sop->gen ^= 1; return (0); } static void vmxnet3_isc_txd_flush(void *vsc, uint16_t txqid, qidx_t pidx) { struct vmxnet3_softc *sc; struct vmxnet3_txqueue *txq; sc = vsc; txq = &sc->vmx_txq[txqid]; /* * pidx is what we last set ipi_new_pidx to in * vmxnet3_isc_txd_encap() */ /* * Avoid expensive register updates if the flush request is * redundant. */ if (txq->vxtxq_last_flush == pidx) return; txq->vxtxq_last_flush = pidx; vmxnet3_write_bar0(sc, VMXNET3_BAR0_TXH(txq->vxtxq_id), pidx); } static int vmxnet3_isc_txd_credits_update(void *vsc, uint16_t txqid, bool clear) { struct vmxnet3_softc *sc; struct vmxnet3_txqueue *txq; struct vmxnet3_comp_ring *txc; struct vmxnet3_txcompdesc *txcd; struct vmxnet3_txring *txr; int processed; sc = vsc; txq = &sc->vmx_txq[txqid]; txc = &txq->vxtxq_comp_ring; txr = &txq->vxtxq_cmd_ring; /* * If clear is true, we need to report the number of TX command ring * descriptors that have been processed by the device. If clear is * false, we just need to report whether or not at least one TX * command ring descriptor has been processed by the device. */ processed = 0; for (;;) { txcd = &txc->vxcr_u.txcd[txc->vxcr_next]; if (txcd->gen != txc->vxcr_gen) break; else if (!clear) return (1); vmxnet3_barrier(sc, VMXNET3_BARRIER_RD); - if (++txc->vxcr_next == txc->vxcr_ndesc) { + MPASS(txc->vxcr_next < txc->vxcr_ndesc); + if (++txc->vxcr_next >= txc->vxcr_ndesc) { txc->vxcr_next = 0; txc->vxcr_gen ^= 1; } if (txcd->eop_idx < txr->vxtxr_next) processed += txr->vxtxr_ndesc - (txr->vxtxr_next - txcd->eop_idx) + 1; else processed += txcd->eop_idx - txr->vxtxr_next + 1; txr->vxtxr_next = (txcd->eop_idx + 1) % txr->vxtxr_ndesc; } return (processed); } static int vmxnet3_isc_rxd_available(void *vsc, uint16_t rxqid, qidx_t idx, qidx_t budget) { struct vmxnet3_softc *sc; struct vmxnet3_rxqueue *rxq; struct vmxnet3_comp_ring *rxc; struct vmxnet3_rxcompdesc *rxcd; int avail; int completed_gen; #ifdef INVARIANTS int expect_sop = 1; #endif sc = vsc; rxq = &sc->vmx_rxq[rxqid]; rxc = &rxq->vxrxq_comp_ring; avail = 0; completed_gen = rxc->vxcr_gen; for (;;) { rxcd = &rxc->vxcr_u.rxcd[idx]; if (rxcd->gen != completed_gen) break; vmxnet3_barrier(sc, VMXNET3_BARRIER_RD); #ifdef INVARIANTS if (expect_sop) KASSERT(rxcd->sop, ("%s: expected sop", __func__)); else KASSERT(!rxcd->sop, ("%s: unexpected sop", __func__)); expect_sop = rxcd->eop; #endif if (rxcd->eop && (rxcd->len != 0)) avail++; if (avail > budget) break; if (++idx == rxc->vxcr_ndesc) { idx = 0; completed_gen ^= 1; } } return (avail); } static int vmxnet3_isc_rxd_pkt_get(void *vsc, if_rxd_info_t ri) { struct vmxnet3_softc *sc; if_softc_ctx_t scctx; struct vmxnet3_rxqueue *rxq; struct vmxnet3_comp_ring *rxc; struct vmxnet3_rxcompdesc *rxcd; if_rxd_frag_t frag; int cqidx; uint16_t total_len; uint8_t nfrags; uint8_t i; uint8_t flid; sc = vsc; scctx = sc->vmx_scctx; rxq = &sc->vmx_rxq[ri->iri_qsidx]; rxc = &rxq->vxrxq_comp_ring; /* * Get a single packet starting at the given index in the completion * queue. That we have been called indicates that * vmxnet3_isc_rxd_available() has already verified that either * there is a complete packet available starting at the given index, * or there are one or more zero length packets starting at the * given index followed by a complete packet, so no verification of * ownership of the descriptors (and no associated read barrier) is * required here. */ cqidx = ri->iri_cidx; rxcd = &rxc->vxcr_u.rxcd[cqidx]; while (rxcd->len == 0) { KASSERT(rxcd->sop && rxcd->eop, ("%s: zero-length packet without both sop and eop set", __func__)); rxc->vxcr_zero_length++; if (++cqidx == rxc->vxcr_ndesc) { cqidx = 0; rxc->vxcr_gen ^= 1; } rxcd = &rxc->vxcr_u.rxcd[cqidx]; } KASSERT(rxcd->sop, ("%s: expected sop", __func__)); /* * RSS and flow ID. * Types other than M_HASHTYPE_NONE and M_HASHTYPE_OPAQUE_HASH should * be used only if the software RSS is enabled and it uses the same * algorithm and the hash key as the "hardware". If the software RSS * is not enabled, then it's simply pointless to use those types. * If it's enabled but with different parameters, then hash values will * not match. */ ri->iri_flowid = rxcd->rss_hash; #ifdef RSS if ((sc->vmx_flags & VMXNET3_FLAG_SOFT_RSS) != 0) { switch (rxcd->rss_type) { case VMXNET3_RCD_RSS_TYPE_NONE: ri->iri_flowid = ri->iri_qsidx; ri->iri_rsstype = M_HASHTYPE_NONE; break; case VMXNET3_RCD_RSS_TYPE_IPV4: ri->iri_rsstype = M_HASHTYPE_RSS_IPV4; break; case VMXNET3_RCD_RSS_TYPE_TCPIPV4: ri->iri_rsstype = M_HASHTYPE_RSS_TCP_IPV4; break; case VMXNET3_RCD_RSS_TYPE_IPV6: ri->iri_rsstype = M_HASHTYPE_RSS_IPV6; break; case VMXNET3_RCD_RSS_TYPE_TCPIPV6: ri->iri_rsstype = M_HASHTYPE_RSS_TCP_IPV6; break; default: ri->iri_rsstype = M_HASHTYPE_OPAQUE_HASH; break; } } else #endif { switch (rxcd->rss_type) { case VMXNET3_RCD_RSS_TYPE_NONE: ri->iri_flowid = ri->iri_qsidx; ri->iri_rsstype = M_HASHTYPE_NONE; break; default: ri->iri_rsstype = M_HASHTYPE_OPAQUE_HASH; break; } } /* * The queue numbering scheme used for rxcd->qid is as follows: * - All of the command ring 0s are numbered [0, nrxqsets - 1] * - All of the command ring 1s are numbered [nrxqsets, 2*nrxqsets - 1] * * Thus, rxcd->qid less than nrxqsets indicates command ring (and * flid) 0, and rxcd->qid greater than or equal to nrxqsets * indicates command ring (and flid) 1. */ nfrags = 0; total_len = 0; do { rxcd = &rxc->vxcr_u.rxcd[cqidx]; KASSERT(rxcd->gen == rxc->vxcr_gen, ("%s: generation mismatch", __func__)); KASSERT(nfrags < IFLIB_MAX_RX_SEGS, ("%s: too many fragments", __func__)); if (__predict_true(rxcd->len != 0)) { frag = &ri->iri_frags[nfrags]; flid = (rxcd->qid >= scctx->isc_nrxqsets) ? 1 : 0; frag->irf_flid = flid; frag->irf_idx = rxcd->rxd_idx; frag->irf_len = rxcd->len; total_len += rxcd->len; nfrags++; } else { rxc->vcxr_zero_length_frag++; } if (++cqidx == rxc->vxcr_ndesc) { cqidx = 0; rxc->vxcr_gen ^= 1; } } while (!rxcd->eop); ri->iri_cidx = cqidx; ri->iri_nfrags = nfrags; ri->iri_len = total_len; /* * If there's an error, the last descriptor in the packet will * have the error indicator set. In this case, set all * fragment lengths to zero. This will cause iflib to discard * the packet, but process all associated descriptors through * the refill mechanism. */ if (__predict_false(rxcd->error)) { rxc->vxcr_pkt_errors++; for (i = 0; i < nfrags; i++) { frag = &ri->iri_frags[i]; frag->irf_len = 0; } } else { /* Checksum offload information is in the last descriptor. */ if (!rxcd->no_csum) { uint32_t csum_flags = 0; if (rxcd->ipv4) { csum_flags |= CSUM_IP_CHECKED; if (rxcd->ipcsum_ok) csum_flags |= CSUM_IP_VALID; } if (!rxcd->fragment && (rxcd->tcp || rxcd->udp)) { csum_flags |= CSUM_L4_CALC; if (rxcd->csum_ok) { csum_flags |= CSUM_L4_VALID; ri->iri_csum_data = 0xffff; } } ri->iri_csum_flags = csum_flags; } /* VLAN information is in the last descriptor. */ if (rxcd->vlan) { ri->iri_flags |= M_VLANTAG; ri->iri_vtag = rxcd->vtag; } } return (0); } static void vmxnet3_isc_rxd_refill(void *vsc, if_rxd_update_t iru) { struct vmxnet3_softc *sc; struct vmxnet3_rxqueue *rxq; struct vmxnet3_rxring *rxr; struct vmxnet3_rxdesc *rxd; uint64_t *paddrs; int count; int len; int idx; int i; uint8_t flid; uint8_t btype; count = iru->iru_count; len = iru->iru_buf_size; flid = iru->iru_flidx; paddrs = iru->iru_paddrs; sc = vsc; rxq = &sc->vmx_rxq[iru->iru_qsidx]; rxr = &rxq->vxrxq_cmd_ring[flid]; rxd = rxr->vxrxr_rxd; /* * Command ring 0 is filled with BTYPE_HEAD descriptors, and * command ring 1 is filled with BTYPE_BODY descriptors. */ btype = (flid == 0) ? VMXNET3_BTYPE_HEAD : VMXNET3_BTYPE_BODY; /* * The refill entries from iflib will advance monotonically, * but the refilled descriptors may not be contiguous due to * earlier skipping of descriptors by the device. The refill * entries from iflib need an entire state update, while the * descriptors previously skipped by the device only need to * have their generation numbers updated. */ idx = rxr->vxrxr_refill_start; i = 0; do { if (idx == iru->iru_idxs[i]) { rxd[idx].addr = paddrs[i]; rxd[idx].len = len; rxd[idx].btype = btype; i++; } else rxr->vxrxr_desc_skips++; rxd[idx].gen = rxr->vxrxr_gen; if (++idx == rxr->vxrxr_ndesc) { idx = 0; rxr->vxrxr_gen ^= 1; } } while (i != count); rxr->vxrxr_refill_start = idx; } static void vmxnet3_isc_rxd_flush(void *vsc, uint16_t rxqid, uint8_t flid, qidx_t pidx) { struct vmxnet3_softc *sc; bus_size_t r; sc = vsc; if (flid == 0) r = VMXNET3_BAR0_RXH1(rxqid); else r = VMXNET3_BAR0_RXH2(rxqid); vmxnet3_write_bar0(sc, r, pidx); } static int vmxnet3_legacy_intr(void *xsc) { struct vmxnet3_softc *sc; if_softc_ctx_t scctx; if_ctx_t ctx; sc = xsc; scctx = sc->vmx_scctx; ctx = sc->vmx_ctx; /* * When there is only a single interrupt configured, this routine * runs in fast interrupt context, following which the rxq 0 task * will be enqueued. */ if (scctx->isc_intr == IFLIB_INTR_LEGACY) { if (vmxnet3_read_bar1(sc, VMXNET3_BAR1_INTR) == 0) return (FILTER_HANDLED); } if (sc->vmx_intr_mask_mode == VMXNET3_IMM_ACTIVE) vmxnet3_intr_disable_all(ctx); if (sc->vmx_ds->event != 0) iflib_admin_intr_deferred(ctx); /* * XXX - When there is both rxq and event activity, do we care * whether the rxq 0 task or the admin task re-enables the interrupt * first? */ return (FILTER_SCHEDULE_THREAD); } static int vmxnet3_rxq_intr(void *vrxq) { struct vmxnet3_softc *sc; struct vmxnet3_rxqueue *rxq; rxq = vrxq; sc = rxq->vxrxq_sc; if (sc->vmx_intr_mask_mode == VMXNET3_IMM_ACTIVE) vmxnet3_disable_intr(sc, rxq->vxrxq_intr_idx); return (FILTER_SCHEDULE_THREAD); } static int vmxnet3_event_intr(void *vsc) { struct vmxnet3_softc *sc; sc = vsc; if (sc->vmx_intr_mask_mode == VMXNET3_IMM_ACTIVE) vmxnet3_disable_intr(sc, sc->vmx_event_intr_idx); /* * The work will be done via vmxnet3_update_admin_status(), and the * interrupt will be re-enabled in vmxnet3_link_intr_enable(). * * The interrupt will be re-enabled by vmxnet3_link_intr_enable(). */ return (FILTER_SCHEDULE_THREAD); } static void vmxnet3_stop(if_ctx_t ctx) { struct vmxnet3_softc *sc; sc = iflib_get_softc(ctx); sc->vmx_link_active = 0; vmxnet3_write_cmd(sc, VMXNET3_CMD_DISABLE); vmxnet3_write_cmd(sc, VMXNET3_CMD_RESET); } static void vmxnet3_txinit(struct vmxnet3_softc *sc, struct vmxnet3_txqueue *txq) { struct vmxnet3_txring *txr; struct vmxnet3_comp_ring *txc; txq->vxtxq_last_flush = -1; txr = &txq->vxtxq_cmd_ring; txr->vxtxr_next = 0; txr->vxtxr_gen = VMXNET3_INIT_GEN; /* * iflib has zeroed out the descriptor array during the prior attach * or stop */ txc = &txq->vxtxq_comp_ring; txc->vxcr_next = 0; txc->vxcr_gen = VMXNET3_INIT_GEN; /* * iflib has zeroed out the descriptor array during the prior attach * or stop */ } static void vmxnet3_rxinit(struct vmxnet3_softc *sc, struct vmxnet3_rxqueue *rxq) { struct vmxnet3_rxring *rxr; struct vmxnet3_comp_ring *rxc; int i; /* * The descriptors will be populated with buffers during a * subsequent invocation of vmxnet3_isc_rxd_refill() */ for (i = 0; i < sc->vmx_sctx->isc_nrxqs - 1; i++) { rxr = &rxq->vxrxq_cmd_ring[i]; rxr->vxrxr_gen = VMXNET3_INIT_GEN; rxr->vxrxr_desc_skips = 0; rxr->vxrxr_refill_start = 0; /* * iflib has zeroed out the descriptor array during the * prior attach or stop */ } for (/**/; i < VMXNET3_RXRINGS_PERQ; i++) { rxr = &rxq->vxrxq_cmd_ring[i]; rxr->vxrxr_gen = 0; rxr->vxrxr_desc_skips = 0; rxr->vxrxr_refill_start = 0; bzero(rxr->vxrxr_rxd, rxr->vxrxr_ndesc * sizeof(struct vmxnet3_rxdesc)); } rxc = &rxq->vxrxq_comp_ring; rxc->vxcr_next = 0; rxc->vxcr_gen = VMXNET3_INIT_GEN; rxc->vxcr_zero_length = 0; rxc->vcxr_zero_length_frag = 0; rxc->vxcr_pkt_errors = 0; /* * iflib has zeroed out the descriptor array during the prior attach * or stop */ } static void vmxnet3_reinit_queues(struct vmxnet3_softc *sc) { if_softc_ctx_t scctx; int q; scctx = sc->vmx_scctx; for (q = 0; q < scctx->isc_ntxqsets; q++) vmxnet3_txinit(sc, &sc->vmx_txq[q]); for (q = 0; q < scctx->isc_nrxqsets; q++) vmxnet3_rxinit(sc, &sc->vmx_rxq[q]); } static int vmxnet3_enable_device(struct vmxnet3_softc *sc) { if_softc_ctx_t scctx; int q; scctx = sc->vmx_scctx; if (vmxnet3_read_cmd(sc, VMXNET3_CMD_ENABLE) != 0) { device_printf(sc->vmx_dev, "device enable command failed!\n"); return (1); } /* Reset the Rx queue heads. */ for (q = 0; q < scctx->isc_nrxqsets; q++) { vmxnet3_write_bar0(sc, VMXNET3_BAR0_RXH1(q), 0); vmxnet3_write_bar0(sc, VMXNET3_BAR0_RXH2(q), 0); } return (0); } static void vmxnet3_reinit_rxfilters(struct vmxnet3_softc *sc) { if_t ifp; ifp = sc->vmx_ifp; vmxnet3_set_rxfilter(sc, if_getflags(ifp)); if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER) bcopy(sc->vmx_vlan_filter, sc->vmx_ds->vlan_filter, sizeof(sc->vmx_ds->vlan_filter)); else bzero(sc->vmx_ds->vlan_filter, sizeof(sc->vmx_ds->vlan_filter)); vmxnet3_write_cmd(sc, VMXNET3_CMD_VLAN_FILTER); } static void vmxnet3_init(if_ctx_t ctx) { struct vmxnet3_softc *sc; sc = iflib_get_softc(ctx); /* Use the current MAC address. */ bcopy(if_getlladdr(sc->vmx_ifp), sc->vmx_lladdr, ETHER_ADDR_LEN); vmxnet3_set_lladdr(sc); vmxnet3_reinit_shared_data(sc); vmxnet3_reinit_queues(sc); vmxnet3_enable_device(sc); vmxnet3_reinit_rxfilters(sc); vmxnet3_link_status(sc); } static void vmxnet3_multi_set(if_ctx_t ctx) { vmxnet3_set_rxfilter(iflib_get_softc(ctx), if_getflags(iflib_get_ifp(ctx))); } static int vmxnet3_mtu_set(if_ctx_t ctx, uint32_t mtu) { struct vmxnet3_softc *sc; if_softc_ctx_t scctx; sc = iflib_get_softc(ctx); scctx = sc->vmx_scctx; if (mtu > VMXNET3_TX_MAXSIZE - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + ETHER_CRC_LEN)) return (EINVAL); /* * Update the max frame size so that the rx mbuf size is * chosen based on the new mtu during the interface init that * will occur after this routine returns. */ scctx->isc_max_frame_size = mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + ETHER_CRC_LEN; /* RX completion queue - n/a */ scctx->isc_rxd_buf_size[0] = 0; /* * For header-type descriptors (used for first segment of * packet), let iflib determine the buffer size based on the * max frame size. */ scctx->isc_rxd_buf_size[1] = 0; /* * For body-type descriptors (used for jumbo frames and LRO), * always use page-sized buffers. */ scctx->isc_rxd_buf_size[2] = MJUMPAGESIZE; return (0); } static void vmxnet3_media_status(if_ctx_t ctx, struct ifmediareq * ifmr) { struct vmxnet3_softc *sc; sc = iflib_get_softc(ctx); ifmr->ifm_status = IFM_AVALID; ifmr->ifm_active = IFM_ETHER; if (vmxnet3_link_is_up(sc) != 0) { ifmr->ifm_status |= IFM_ACTIVE; ifmr->ifm_active |= IFM_AUTO; } else ifmr->ifm_active |= IFM_NONE; } static int vmxnet3_media_change(if_ctx_t ctx) { /* Ignore. */ return (0); } static int vmxnet3_promisc_set(if_ctx_t ctx, int flags) { vmxnet3_set_rxfilter(iflib_get_softc(ctx), flags); return (0); } static uint64_t vmxnet3_get_counter(if_ctx_t ctx, ift_counter cnt) { if_t ifp = iflib_get_ifp(ctx); if (cnt < IFCOUNTERS) return if_get_counter_default(ifp, cnt); return (0); } static void vmxnet3_update_admin_status(if_ctx_t ctx) { struct vmxnet3_softc *sc; sc = iflib_get_softc(ctx); if (sc->vmx_ds->event != 0) vmxnet3_evintr(sc); vmxnet3_refresh_host_stats(sc); } static void vmxnet3_txq_timer(if_ctx_t ctx, uint16_t qid) { /* Host stats refresh is global, so just trigger it on txq 0 */ if (qid == 0) vmxnet3_refresh_host_stats(iflib_get_softc(ctx)); } static void vmxnet3_update_vlan_filter(struct vmxnet3_softc *sc, int add, uint16_t tag) { int idx, bit; if (tag == 0 || tag > 4095) return; idx = (tag >> 5) & 0x7F; bit = tag & 0x1F; /* Update our private VLAN bitvector. */ if (add) sc->vmx_vlan_filter[idx] |= (1 << bit); else sc->vmx_vlan_filter[idx] &= ~(1 << bit); } static void vmxnet3_vlan_register(if_ctx_t ctx, uint16_t tag) { vmxnet3_update_vlan_filter(iflib_get_softc(ctx), 1, tag); } static void vmxnet3_vlan_unregister(if_ctx_t ctx, uint16_t tag) { vmxnet3_update_vlan_filter(iflib_get_softc(ctx), 0, tag); } static u_int vmxnet3_hash_maddr(void *arg, struct sockaddr_dl *sdl, u_int count) { struct vmxnet3_softc *sc = arg; if (count < VMXNET3_MULTICAST_MAX) bcopy(LLADDR(sdl), &sc->vmx_mcast[count * ETHER_ADDR_LEN], ETHER_ADDR_LEN); return (1); } static void vmxnet3_set_rxfilter(struct vmxnet3_softc *sc, int flags) { if_t ifp; struct vmxnet3_driver_shared *ds; u_int mode; ifp = sc->vmx_ifp; ds = sc->vmx_ds; mode = VMXNET3_RXMODE_UCAST | VMXNET3_RXMODE_BCAST; if (flags & IFF_PROMISC) mode |= VMXNET3_RXMODE_PROMISC; if (flags & IFF_ALLMULTI) mode |= VMXNET3_RXMODE_ALLMULTI; else { int cnt; cnt = if_foreach_llmaddr(ifp, vmxnet3_hash_maddr, sc); if (cnt >= VMXNET3_MULTICAST_MAX) { cnt = 0; mode |= VMXNET3_RXMODE_ALLMULTI; } else if (cnt > 0) mode |= VMXNET3_RXMODE_MCAST; ds->mcast_tablelen = cnt * ETHER_ADDR_LEN; } ds->rxmode = mode; vmxnet3_write_cmd(sc, VMXNET3_CMD_SET_FILTER); vmxnet3_write_cmd(sc, VMXNET3_CMD_SET_RXMODE); } static void vmxnet3_refresh_host_stats(struct vmxnet3_softc *sc) { vmxnet3_write_cmd(sc, VMXNET3_CMD_GET_STATS); } static int vmxnet3_link_is_up(struct vmxnet3_softc *sc) { uint32_t status; status = vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_LINK); return !!(status & 0x1); } static void vmxnet3_link_status(struct vmxnet3_softc *sc) { if_ctx_t ctx; uint64_t speed; int link; ctx = sc->vmx_ctx; link = vmxnet3_link_is_up(sc); speed = IF_Gbps(10); if (link != 0 && sc->vmx_link_active == 0) { sc->vmx_link_active = 1; iflib_link_state_change(ctx, LINK_STATE_UP, speed); } else if (link == 0 && sc->vmx_link_active != 0) { sc->vmx_link_active = 0; iflib_link_state_change(ctx, LINK_STATE_DOWN, speed); } } static void vmxnet3_set_lladdr(struct vmxnet3_softc *sc) { uint32_t ml, mh; ml = sc->vmx_lladdr[0]; ml |= sc->vmx_lladdr[1] << 8; ml |= sc->vmx_lladdr[2] << 16; ml |= sc->vmx_lladdr[3] << 24; vmxnet3_write_bar1(sc, VMXNET3_BAR1_MACL, ml); mh = sc->vmx_lladdr[4]; mh |= sc->vmx_lladdr[5] << 8; vmxnet3_write_bar1(sc, VMXNET3_BAR1_MACH, mh); } static void vmxnet3_get_lladdr(struct vmxnet3_softc *sc) { uint32_t ml, mh; ml = vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_MACL); mh = vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_MACH); sc->vmx_lladdr[0] = ml; sc->vmx_lladdr[1] = ml >> 8; sc->vmx_lladdr[2] = ml >> 16; sc->vmx_lladdr[3] = ml >> 24; sc->vmx_lladdr[4] = mh; sc->vmx_lladdr[5] = mh >> 8; } static void vmxnet3_setup_txq_sysctl(struct vmxnet3_txqueue *txq, struct sysctl_ctx_list *ctx, struct sysctl_oid_list *child) { struct sysctl_oid *node, *txsnode; struct sysctl_oid_list *list, *txslist; struct UPT1_TxStats *txstats; char namebuf[16]; txstats = &txq->vxtxq_ts->stats; snprintf(namebuf, sizeof(namebuf), "txq%d", txq->vxtxq_id); node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Transmit Queue"); txq->vxtxq_sysctl = list = SYSCTL_CHILDREN(node); /* * Add statistics reported by the host. These are updated by the * iflib txq timer on txq 0. */ txsnode = SYSCTL_ADD_NODE(ctx, list, OID_AUTO, "hstats", CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Host Statistics"); txslist = SYSCTL_CHILDREN(txsnode); SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "tso_packets", CTLFLAG_RD, &txstats->TSO_packets, "TSO packets"); SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "tso_bytes", CTLFLAG_RD, &txstats->TSO_bytes, "TSO bytes"); SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "ucast_packets", CTLFLAG_RD, &txstats->ucast_packets, "Unicast packets"); SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "unicast_bytes", CTLFLAG_RD, &txstats->ucast_bytes, "Unicast bytes"); SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "mcast_packets", CTLFLAG_RD, &txstats->mcast_packets, "Multicast packets"); SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "mcast_bytes", CTLFLAG_RD, &txstats->mcast_bytes, "Multicast bytes"); SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "error", CTLFLAG_RD, &txstats->error, "Errors"); SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "discard", CTLFLAG_RD, &txstats->discard, "Discards"); } static void vmxnet3_setup_rxq_sysctl(struct vmxnet3_rxqueue *rxq, struct sysctl_ctx_list *ctx, struct sysctl_oid_list *child) { struct sysctl_oid *node, *rxsnode; struct sysctl_oid_list *list, *rxslist; struct UPT1_RxStats *rxstats; char namebuf[16]; rxstats = &rxq->vxrxq_rs->stats; snprintf(namebuf, sizeof(namebuf), "rxq%d", rxq->vxrxq_id); node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Receive Queue"); rxq->vxrxq_sysctl = list = SYSCTL_CHILDREN(node); /* * Add statistics reported by the host. These are updated by the * iflib txq timer on txq 0. */ rxsnode = SYSCTL_ADD_NODE(ctx, list, OID_AUTO, "hstats", CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Host Statistics"); rxslist = SYSCTL_CHILDREN(rxsnode); SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "lro_packets", CTLFLAG_RD, &rxstats->LRO_packets, "LRO packets"); SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "lro_bytes", CTLFLAG_RD, &rxstats->LRO_bytes, "LRO bytes"); SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "ucast_packets", CTLFLAG_RD, &rxstats->ucast_packets, "Unicast packets"); SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "unicast_bytes", CTLFLAG_RD, &rxstats->ucast_bytes, "Unicast bytes"); SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "mcast_packets", CTLFLAG_RD, &rxstats->mcast_packets, "Multicast packets"); SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "mcast_bytes", CTLFLAG_RD, &rxstats->mcast_bytes, "Multicast bytes"); SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "bcast_packets", CTLFLAG_RD, &rxstats->bcast_packets, "Broadcast packets"); SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "bcast_bytes", CTLFLAG_RD, &rxstats->bcast_bytes, "Broadcast bytes"); SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "nobuffer", CTLFLAG_RD, &rxstats->nobuffer, "No buffer"); SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "error", CTLFLAG_RD, &rxstats->error, "Errors"); } static void vmxnet3_setup_debug_sysctl(struct vmxnet3_softc *sc, struct sysctl_ctx_list *ctx, struct sysctl_oid_list *child) { if_softc_ctx_t scctx; struct sysctl_oid *node; struct sysctl_oid_list *list; int i; scctx = sc->vmx_scctx; for (i = 0; i < scctx->isc_ntxqsets; i++) { struct vmxnet3_txqueue *txq = &sc->vmx_txq[i]; node = SYSCTL_ADD_NODE(ctx, txq->vxtxq_sysctl, OID_AUTO, "debug", CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, ""); list = SYSCTL_CHILDREN(node); SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd_next", CTLFLAG_RD, &txq->vxtxq_cmd_ring.vxtxr_next, 0, ""); SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd_ndesc", CTLFLAG_RD, &txq->vxtxq_cmd_ring.vxtxr_ndesc, 0, ""); SYSCTL_ADD_INT(ctx, list, OID_AUTO, "cmd_gen", CTLFLAG_RD, &txq->vxtxq_cmd_ring.vxtxr_gen, 0, ""); SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "comp_next", CTLFLAG_RD, &txq->vxtxq_comp_ring.vxcr_next, 0, ""); SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "comp_ndesc", CTLFLAG_RD, &txq->vxtxq_comp_ring.vxcr_ndesc, 0,""); SYSCTL_ADD_INT(ctx, list, OID_AUTO, "comp_gen", CTLFLAG_RD, &txq->vxtxq_comp_ring.vxcr_gen, 0, ""); } for (i = 0; i < scctx->isc_nrxqsets; i++) { struct vmxnet3_rxqueue *rxq = &sc->vmx_rxq[i]; node = SYSCTL_ADD_NODE(ctx, rxq->vxrxq_sysctl, OID_AUTO, "debug", CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, ""); list = SYSCTL_CHILDREN(node); SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd0_ndesc", CTLFLAG_RD, &rxq->vxrxq_cmd_ring[0].vxrxr_ndesc, 0, ""); SYSCTL_ADD_INT(ctx, list, OID_AUTO, "cmd0_gen", CTLFLAG_RD, &rxq->vxrxq_cmd_ring[0].vxrxr_gen, 0, ""); SYSCTL_ADD_U64(ctx, list, OID_AUTO, "cmd0_desc_skips", CTLFLAG_RD, &rxq->vxrxq_cmd_ring[0].vxrxr_desc_skips, 0, ""); SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd1_ndesc", CTLFLAG_RD, &rxq->vxrxq_cmd_ring[1].vxrxr_ndesc, 0, ""); SYSCTL_ADD_INT(ctx, list, OID_AUTO, "cmd1_gen", CTLFLAG_RD, &rxq->vxrxq_cmd_ring[1].vxrxr_gen, 0, ""); SYSCTL_ADD_U64(ctx, list, OID_AUTO, "cmd1_desc_skips", CTLFLAG_RD, &rxq->vxrxq_cmd_ring[1].vxrxr_desc_skips, 0, ""); SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "comp_ndesc", CTLFLAG_RD, &rxq->vxrxq_comp_ring.vxcr_ndesc, 0,""); SYSCTL_ADD_INT(ctx, list, OID_AUTO, "comp_gen", CTLFLAG_RD, &rxq->vxrxq_comp_ring.vxcr_gen, 0, ""); SYSCTL_ADD_U64(ctx, list, OID_AUTO, "comp_zero_length", CTLFLAG_RD, &rxq->vxrxq_comp_ring.vxcr_zero_length, 0, ""); SYSCTL_ADD_U64(ctx, list, OID_AUTO, "comp_zero_length_frag", CTLFLAG_RD, &rxq->vxrxq_comp_ring.vcxr_zero_length_frag, 0, ""); SYSCTL_ADD_U64(ctx, list, OID_AUTO, "comp_pkt_errors", CTLFLAG_RD, &rxq->vxrxq_comp_ring.vxcr_pkt_errors, 0, ""); } } static void vmxnet3_setup_queue_sysctl(struct vmxnet3_softc *sc, struct sysctl_ctx_list *ctx, struct sysctl_oid_list *child) { if_softc_ctx_t scctx; int i; scctx = sc->vmx_scctx; for (i = 0; i < scctx->isc_ntxqsets; i++) vmxnet3_setup_txq_sysctl(&sc->vmx_txq[i], ctx, child); for (i = 0; i < scctx->isc_nrxqsets; i++) vmxnet3_setup_rxq_sysctl(&sc->vmx_rxq[i], ctx, child); vmxnet3_setup_debug_sysctl(sc, ctx, child); } static void vmxnet3_setup_sysctl(struct vmxnet3_softc *sc) { device_t dev; struct sysctl_ctx_list *ctx; struct sysctl_oid *tree; struct sysctl_oid_list *child; dev = sc->vmx_dev; ctx = device_get_sysctl_ctx(dev); tree = device_get_sysctl_tree(dev); child = SYSCTL_CHILDREN(tree); vmxnet3_setup_queue_sysctl(sc, ctx, child); } static void vmxnet3_write_bar0(struct vmxnet3_softc *sc, bus_size_t r, uint32_t v) { bus_space_write_4(sc->vmx_iot0, sc->vmx_ioh0, r, v); } static uint32_t vmxnet3_read_bar1(struct vmxnet3_softc *sc, bus_size_t r) { return (bus_space_read_4(sc->vmx_iot1, sc->vmx_ioh1, r)); } static void vmxnet3_write_bar1(struct vmxnet3_softc *sc, bus_size_t r, uint32_t v) { bus_space_write_4(sc->vmx_iot1, sc->vmx_ioh1, r, v); } static void vmxnet3_write_cmd(struct vmxnet3_softc *sc, uint32_t cmd) { vmxnet3_write_bar1(sc, VMXNET3_BAR1_CMD, cmd); } static uint32_t vmxnet3_read_cmd(struct vmxnet3_softc *sc, uint32_t cmd) { vmxnet3_write_cmd(sc, cmd); bus_space_barrier(sc->vmx_iot1, sc->vmx_ioh1, 0, 0, BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE); return (vmxnet3_read_bar1(sc, VMXNET3_BAR1_CMD)); } static void vmxnet3_enable_intr(struct vmxnet3_softc *sc, int irq) { vmxnet3_write_bar0(sc, VMXNET3_BAR0_IMASK(irq), 0); } static void vmxnet3_disable_intr(struct vmxnet3_softc *sc, int irq) { vmxnet3_write_bar0(sc, VMXNET3_BAR0_IMASK(irq), 1); } static int vmxnet3_tx_queue_intr_enable(if_ctx_t ctx, uint16_t qid) { /* Not using interrupts for TX */ return (0); } static int vmxnet3_rx_queue_intr_enable(if_ctx_t ctx, uint16_t qid) { struct vmxnet3_softc *sc; sc = iflib_get_softc(ctx); vmxnet3_enable_intr(sc, sc->vmx_rxq[qid].vxrxq_intr_idx); return (0); } static void vmxnet3_link_intr_enable(if_ctx_t ctx) { struct vmxnet3_softc *sc; sc = iflib_get_softc(ctx); vmxnet3_enable_intr(sc, sc->vmx_event_intr_idx); } static void vmxnet3_intr_enable_all(if_ctx_t ctx) { struct vmxnet3_softc *sc; if_softc_ctx_t scctx; int i; sc = iflib_get_softc(ctx); scctx = sc->vmx_scctx; sc->vmx_ds->ictrl &= ~VMXNET3_ICTRL_DISABLE_ALL; for (i = 0; i < scctx->isc_vectors; i++) vmxnet3_enable_intr(sc, i); } static void vmxnet3_intr_disable_all(if_ctx_t ctx) { struct vmxnet3_softc *sc; int i; sc = iflib_get_softc(ctx); /* * iflib may invoke this routine before vmxnet3_attach_post() has * run, which is before the top level shared data area is * initialized and the device made aware of it. */ if (sc->vmx_ds != NULL) sc->vmx_ds->ictrl |= VMXNET3_ICTRL_DISABLE_ALL; for (i = 0; i < VMXNET3_MAX_INTRS; i++) vmxnet3_disable_intr(sc, i); } static bool vmxnet3_if_needs_restart(if_ctx_t ctx __unused, enum iflib_restart_event event) { switch (event) { case IFLIB_RESTART_VLAN_CONFIG: return (true); default: return (false); } } /* * Since this is a purely paravirtualized device, we do not have * to worry about DMA coherency. But at times, we must make sure * both the compiler and CPU do not reorder memory operations. */ static inline void vmxnet3_barrier(struct vmxnet3_softc *sc, vmxnet3_barrier_t type) { switch (type) { case VMXNET3_BARRIER_RD: rmb(); break; case VMXNET3_BARRIER_WR: wmb(); break; case VMXNET3_BARRIER_RDWR: mb(); break; default: panic("%s: bad barrier type %d", __func__, type); } }