diff --git a/sys/dev/axgbe/if_axgbe_pci.c b/sys/dev/axgbe/if_axgbe_pci.c index 4c4883e1cb4f..7c4645792321 100644 --- a/sys/dev/axgbe/if_axgbe_pci.c +++ b/sys/dev/axgbe/if_axgbe_pci.c @@ -1,2339 +1,2339 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2020 Advanced Micro Devices, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * Contact Information : * Rajesh Kumar * Shreyank Amartya */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "xgbe.h" #include "xgbe-common.h" #include "miibus_if.h" #include "ifdi_if.h" #include "opt_inet.h" #include "opt_inet6.h" MALLOC_DEFINE(M_AXGBE, "axgbe", "axgbe data"); extern struct if_txrx axgbe_txrx; /* Function prototypes */ static void *axgbe_register(device_t); static int axgbe_if_attach_pre(if_ctx_t); static int axgbe_if_attach_post(if_ctx_t); static int axgbe_if_detach(if_ctx_t); static void axgbe_if_stop(if_ctx_t); static void axgbe_if_init(if_ctx_t); /* Queue related routines */ static int axgbe_if_tx_queues_alloc(if_ctx_t, caddr_t *, uint64_t *, int, int); static int axgbe_if_rx_queues_alloc(if_ctx_t, caddr_t *, uint64_t *, int, int); static int axgbe_alloc_channels(if_ctx_t); static void axgbe_if_queues_free(if_ctx_t); static int axgbe_if_tx_queue_intr_enable(if_ctx_t, uint16_t); static int axgbe_if_rx_queue_intr_enable(if_ctx_t, uint16_t); /* Interrupt related routines */ static void axgbe_if_disable_intr(if_ctx_t); static void axgbe_if_enable_intr(if_ctx_t); static int axgbe_if_msix_intr_assign(if_ctx_t, int); static void xgbe_free_intr(struct xgbe_prv_data *, struct resource *, void *, int); /* Init and Iflib routines */ static void axgbe_pci_init(struct xgbe_prv_data *); static void axgbe_pci_stop(if_ctx_t); static void xgbe_disable_rx_tx_int(struct xgbe_prv_data *, struct xgbe_channel *); static void xgbe_disable_rx_tx_ints(struct xgbe_prv_data *); static int axgbe_if_mtu_set(if_ctx_t, uint32_t); static void axgbe_if_update_admin_status(if_ctx_t); static void axgbe_if_media_status(if_ctx_t, struct ifmediareq *); static int axgbe_if_media_change(if_ctx_t); static int axgbe_if_promisc_set(if_ctx_t, int); static uint64_t axgbe_if_get_counter(if_ctx_t, ift_counter); static void axgbe_if_vlan_register(if_ctx_t, uint16_t); static void axgbe_if_vlan_unregister(if_ctx_t, uint16_t); #if __FreeBSD_version >= 1300000 static bool axgbe_if_needs_restart(if_ctx_t, enum iflib_restart_event); #endif static void axgbe_set_counts(if_ctx_t); static void axgbe_init_iflib_softc_ctx(struct axgbe_if_softc *); /* MII interface registered functions */ static int axgbe_miibus_readreg(device_t, int, int); static int axgbe_miibus_writereg(device_t, int, int, int); static void axgbe_miibus_statchg(device_t); /* ISR routines */ static int axgbe_dev_isr(void *); static void axgbe_ecc_isr(void *); static void axgbe_i2c_isr(void *); static void axgbe_an_isr(void *); static int axgbe_msix_que(void *); /* Timer routines */ static void xgbe_service(void *, int); static void xgbe_service_timer(void *); static void xgbe_init_timers(struct xgbe_prv_data *); static void xgbe_stop_timers(struct xgbe_prv_data *); /* Dump routines */ static void xgbe_dump_prop_registers(struct xgbe_prv_data *); /* * Allocate only for MAC (BAR0) and PCS (BAR1) registers, and just point the * MSI-X table bar (BAR5) to iflib. iflib will do the allocation for MSI-X * table. */ static struct resource_spec axgbe_pci_mac_spec[] = { { SYS_RES_MEMORY, PCIR_BAR(0), RF_ACTIVE }, /* MAC regs */ { SYS_RES_MEMORY, PCIR_BAR(1), RF_ACTIVE }, /* PCS regs */ { -1, 0 } }; static pci_vendor_info_t axgbe_vendor_info_array[] = { PVID(0x1022, 0x1458, "AMD 10 Gigabit Ethernet Driver"), PVID(0x1022, 0x1459, "AMD 10 Gigabit Ethernet Driver"), PVID_END }; static struct xgbe_version_data xgbe_v2a = { .init_function_ptrs_phy_impl = xgbe_init_function_ptrs_phy_v2, .xpcs_access = XGBE_XPCS_ACCESS_V2, .mmc_64bit = 1, .tx_max_fifo_size = 229376, .rx_max_fifo_size = 229376, .tx_tstamp_workaround = 1, .ecc_support = 1, .i2c_support = 1, .irq_reissue_support = 1, .tx_desc_prefetch = 5, .rx_desc_prefetch = 5, .an_cdr_workaround = 1, }; static struct xgbe_version_data xgbe_v2b = { .init_function_ptrs_phy_impl = xgbe_init_function_ptrs_phy_v2, .xpcs_access = XGBE_XPCS_ACCESS_V2, .mmc_64bit = 1, .tx_max_fifo_size = 65536, .rx_max_fifo_size = 65536, .tx_tstamp_workaround = 1, .ecc_support = 1, .i2c_support = 1, .irq_reissue_support = 1, .tx_desc_prefetch = 5, .rx_desc_prefetch = 5, .an_cdr_workaround = 1, }; /* Device Interface */ static device_method_t ax_methods[] = { DEVMETHOD(device_register, axgbe_register), DEVMETHOD(device_probe, iflib_device_probe), DEVMETHOD(device_attach, iflib_device_attach), DEVMETHOD(device_detach, iflib_device_detach), /* MII interface */ DEVMETHOD(miibus_readreg, axgbe_miibus_readreg), DEVMETHOD(miibus_writereg, axgbe_miibus_writereg), DEVMETHOD(miibus_statchg, axgbe_miibus_statchg), DEVMETHOD_END }; static driver_t ax_driver = { "ax", ax_methods, sizeof(struct axgbe_if_softc), }; devclass_t ax_devclass; DRIVER_MODULE(axp, pci, ax_driver, ax_devclass, 0, 0); DRIVER_MODULE(miibus, ax, miibus_driver, miibus_devclass, 0, 0); IFLIB_PNP_INFO(pci, ax_driver, axgbe_vendor_info_array); MODULE_DEPEND(ax, pci, 1, 1, 1); MODULE_DEPEND(ax, ether, 1, 1, 1); MODULE_DEPEND(ax, iflib, 1, 1, 1); MODULE_DEPEND(ax, miibus, 1, 1, 1); /* Iflib Interface */ static device_method_t axgbe_if_methods[] = { DEVMETHOD(ifdi_attach_pre, axgbe_if_attach_pre), DEVMETHOD(ifdi_attach_post, axgbe_if_attach_post), DEVMETHOD(ifdi_detach, axgbe_if_detach), DEVMETHOD(ifdi_init, axgbe_if_init), DEVMETHOD(ifdi_stop, axgbe_if_stop), DEVMETHOD(ifdi_msix_intr_assign, axgbe_if_msix_intr_assign), DEVMETHOD(ifdi_intr_enable, axgbe_if_enable_intr), DEVMETHOD(ifdi_intr_disable, axgbe_if_disable_intr), DEVMETHOD(ifdi_tx_queue_intr_enable, axgbe_if_tx_queue_intr_enable), DEVMETHOD(ifdi_rx_queue_intr_enable, axgbe_if_rx_queue_intr_enable), DEVMETHOD(ifdi_tx_queues_alloc, axgbe_if_tx_queues_alloc), DEVMETHOD(ifdi_rx_queues_alloc, axgbe_if_rx_queues_alloc), DEVMETHOD(ifdi_queues_free, axgbe_if_queues_free), DEVMETHOD(ifdi_update_admin_status, axgbe_if_update_admin_status), DEVMETHOD(ifdi_mtu_set, axgbe_if_mtu_set), DEVMETHOD(ifdi_media_status, axgbe_if_media_status), DEVMETHOD(ifdi_media_change, axgbe_if_media_change), DEVMETHOD(ifdi_promisc_set, axgbe_if_promisc_set), DEVMETHOD(ifdi_get_counter, axgbe_if_get_counter), DEVMETHOD(ifdi_vlan_register, axgbe_if_vlan_register), DEVMETHOD(ifdi_vlan_unregister, axgbe_if_vlan_unregister), #if __FreeBSD_version >= 1300000 DEVMETHOD(ifdi_needs_restart, axgbe_if_needs_restart), #endif DEVMETHOD_END }; static driver_t axgbe_if_driver = { "axgbe_if", axgbe_if_methods, sizeof(struct axgbe_if_softc) }; /* Iflib Shared Context */ static struct if_shared_ctx axgbe_sctx_init = { .isc_magic = IFLIB_MAGIC, .isc_driver = &axgbe_if_driver, .isc_q_align = PAGE_SIZE, .isc_tx_maxsize = XGBE_TSO_MAX_SIZE + sizeof(struct ether_vlan_header), .isc_tx_maxsegsize = PAGE_SIZE, .isc_tso_maxsize = XGBE_TSO_MAX_SIZE + sizeof(struct ether_vlan_header), .isc_tso_maxsegsize = PAGE_SIZE, .isc_rx_maxsize = MJUM9BYTES, .isc_rx_maxsegsize = MJUM9BYTES, .isc_rx_nsegments = 1, .isc_admin_intrcnt = 4, .isc_vendor_info = axgbe_vendor_info_array, .isc_driver_version = XGBE_DRV_VERSION, .isc_nrxd_min = {XGBE_RX_DESC_CNT_MIN, XGBE_RX_DESC_CNT_MIN}, .isc_nrxd_default = {XGBE_RX_DESC_CNT_DEFAULT, XGBE_RX_DESC_CNT_DEFAULT}, .isc_nrxd_max = {XGBE_RX_DESC_CNT_MAX, XGBE_RX_DESC_CNT_MAX}, .isc_ntxd_min = {XGBE_TX_DESC_CNT_MIN}, .isc_ntxd_default = {XGBE_TX_DESC_CNT_DEFAULT}, .isc_ntxd_max = {XGBE_TX_DESC_CNT_MAX}, .isc_nfl = 2, .isc_ntxqs = 1, .isc_nrxqs = 2, .isc_flags = IFLIB_TSO_INIT_IP | IFLIB_NEED_SCRATCH | IFLIB_NEED_ZERO_CSUM | IFLIB_NEED_ETHER_PAD, }; static void * axgbe_register(device_t dev) { return (&axgbe_sctx_init); } /* MII Interface Functions */ static int axgbe_miibus_readreg(device_t dev, int phy, int reg) { struct axgbe_if_softc *sc = iflib_get_softc(device_get_softc(dev)); struct xgbe_prv_data *pdata = &sc->pdata; int val; axgbe_printf(3, "%s: phy %d reg %d\n", __func__, phy, reg); val = xgbe_phy_mii_read(pdata, phy, reg); axgbe_printf(2, "%s: val 0x%x\n", __func__, val); return (val & 0xFFFF); } static int axgbe_miibus_writereg(device_t dev, int phy, int reg, int val) { struct axgbe_if_softc *sc = iflib_get_softc(device_get_softc(dev)); struct xgbe_prv_data *pdata = &sc->pdata; axgbe_printf(3, "%s: phy %d reg %d val 0x%x\n", __func__, phy, reg, val); xgbe_phy_mii_write(pdata, phy, reg, val); return(0); } static void axgbe_miibus_statchg(device_t dev) { struct axgbe_if_softc *sc = iflib_get_softc(device_get_softc(dev)); struct xgbe_prv_data *pdata = &sc->pdata; struct mii_data *mii = device_get_softc(pdata->axgbe_miibus); struct ifnet *ifp = pdata->netdev; int bmsr; axgbe_printf(2, "%s: Link %d/%d\n", __func__, pdata->phy.link, pdata->phy_link); if (mii == NULL || ifp == NULL || (ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) return; if ((mii->mii_media_status & (IFM_ACTIVE | IFM_AVALID)) == (IFM_ACTIVE | IFM_AVALID)) { switch (IFM_SUBTYPE(mii->mii_media_active)) { case IFM_10_T: case IFM_100_TX: pdata->phy.link = 1; break; case IFM_1000_T: case IFM_1000_SX: case IFM_2500_SX: pdata->phy.link = 1; break; default: pdata->phy.link = 0; break; } } else pdata->phy_link = 0; bmsr = axgbe_miibus_readreg(pdata->dev, pdata->mdio_addr, MII_BMSR); if (bmsr & BMSR_ANEG) { axgbe_printf(2, "%s: Autoneg Done\n", __func__); /* Raise AN Interrupt */ XMDIO_WRITE(pdata, MDIO_MMD_AN, MDIO_AN_INTMASK, XGBE_AN_CL73_INT_MASK); } } static int axgbe_if_attach_pre(if_ctx_t ctx) { struct axgbe_if_softc *sc; struct xgbe_prv_data *pdata; struct resource *mac_res[2]; if_softc_ctx_t scctx; if_shared_ctx_t sctx; device_t dev; unsigned int ma_lo, ma_hi; unsigned int reg; sc = iflib_get_softc(ctx); sc->pdata.dev = dev = iflib_get_dev(ctx); sc->sctx = sctx = iflib_get_sctx(ctx); sc->scctx = scctx = iflib_get_softc_ctx(ctx); sc->media = iflib_get_media(ctx); sc->ctx = ctx; sc->link_status = LINK_STATE_DOWN; pdata = &sc->pdata; pdata->netdev = iflib_get_ifp(ctx); spin_lock_init(&pdata->xpcs_lock); /* Initialize locks */ mtx_init(&pdata->rss_mutex, "xgbe rss mutex lock", NULL, MTX_DEF); mtx_init(&pdata->mdio_mutex, "xgbe MDIO mutex lock", NULL, MTX_SPIN); /* Allocate VLAN bitmap */ pdata->active_vlans = bit_alloc(VLAN_NVID, M_AXGBE, M_WAITOK|M_ZERO); pdata->num_active_vlans = 0; /* Get the version data */ DBGPR("%s: Device ID: 0x%x\n", __func__, pci_get_device(dev)); if (pci_get_device(dev) == 0x1458) sc->pdata.vdata = &xgbe_v2a; else if (pci_get_device(dev) == 0x1459) sc->pdata.vdata = &xgbe_v2b; /* PCI setup */ if (bus_alloc_resources(dev, axgbe_pci_mac_spec, mac_res)) return (ENXIO); sc->pdata.xgmac_res = mac_res[0]; sc->pdata.xpcs_res = mac_res[1]; /* Set the PCS indirect addressing definition registers*/ pdata->xpcs_window_def_reg = PCS_V2_WINDOW_DEF; pdata->xpcs_window_sel_reg = PCS_V2_WINDOW_SELECT; /* Configure the PCS indirect addressing support */ reg = XPCS32_IOREAD(pdata, pdata->xpcs_window_def_reg); pdata->xpcs_window = XPCS_GET_BITS(reg, PCS_V2_WINDOW_DEF, OFFSET); pdata->xpcs_window <<= 6; pdata->xpcs_window_size = XPCS_GET_BITS(reg, PCS_V2_WINDOW_DEF, SIZE); pdata->xpcs_window_size = 1 << (pdata->xpcs_window_size + 7); pdata->xpcs_window_mask = pdata->xpcs_window_size - 1; DBGPR("xpcs window def : %#010x\n", pdata->xpcs_window_def_reg); DBGPR("xpcs window sel : %#010x\n", pdata->xpcs_window_sel_reg); DBGPR("xpcs window : %#010x\n", pdata->xpcs_window); DBGPR("xpcs window size : %#010x\n", pdata->xpcs_window_size); DBGPR("xpcs window mask : %#010x\n", pdata->xpcs_window_mask); /* Enable all interrupts in the hardware */ XP_IOWRITE(pdata, XP_INT_EN, 0x1fffff); /* Retrieve the MAC address */ ma_lo = XP_IOREAD(pdata, XP_MAC_ADDR_LO); ma_hi = XP_IOREAD(pdata, XP_MAC_ADDR_HI); pdata->mac_addr[0] = ma_lo & 0xff; pdata->mac_addr[1] = (ma_lo >> 8) & 0xff; pdata->mac_addr[2] = (ma_lo >>16) & 0xff; pdata->mac_addr[3] = (ma_lo >> 24) & 0xff; pdata->mac_addr[4] = ma_hi & 0xff; pdata->mac_addr[5] = (ma_hi >> 8) & 0xff; if (!XP_GET_BITS(ma_hi, XP_MAC_ADDR_HI, VALID)) { axgbe_error("Invalid mac address\n"); return (EINVAL); } iflib_set_mac(ctx, pdata->mac_addr); /* Clock settings */ pdata->sysclk_rate = XGBE_V2_DMA_CLOCK_FREQ; pdata->ptpclk_rate = XGBE_V2_PTP_CLOCK_FREQ; /* Set the DMA coherency values */ pdata->coherent = 1; pdata->arcr = XGBE_DMA_PCI_ARCR; pdata->awcr = XGBE_DMA_PCI_AWCR; pdata->awarcr = XGBE_DMA_PCI_AWARCR; /* Read the port property registers */ pdata->pp0 = XP_IOREAD(pdata, XP_PROP_0); pdata->pp1 = XP_IOREAD(pdata, XP_PROP_1); pdata->pp2 = XP_IOREAD(pdata, XP_PROP_2); pdata->pp3 = XP_IOREAD(pdata, XP_PROP_3); pdata->pp4 = XP_IOREAD(pdata, XP_PROP_4); DBGPR("port property 0 = %#010x\n", pdata->pp0); DBGPR("port property 1 = %#010x\n", pdata->pp1); DBGPR("port property 2 = %#010x\n", pdata->pp2); DBGPR("port property 3 = %#010x\n", pdata->pp3); DBGPR("port property 4 = %#010x\n", pdata->pp4); /* Set the maximum channels and queues */ pdata->tx_max_channel_count = XP_GET_BITS(pdata->pp1, XP_PROP_1, MAX_TX_DMA); pdata->rx_max_channel_count = XP_GET_BITS(pdata->pp1, XP_PROP_1, MAX_RX_DMA); pdata->tx_max_q_count = XP_GET_BITS(pdata->pp1, XP_PROP_1, MAX_TX_QUEUES); pdata->rx_max_q_count = XP_GET_BITS(pdata->pp1, XP_PROP_1, MAX_RX_QUEUES); DBGPR("max tx/rx channel count = %u/%u\n", pdata->tx_max_channel_count, pdata->rx_max_channel_count); DBGPR("max tx/rx hw queue count = %u/%u\n", pdata->tx_max_q_count, pdata->rx_max_q_count); axgbe_set_counts(ctx); /* Set the maximum fifo amounts */ pdata->tx_max_fifo_size = XP_GET_BITS(pdata->pp2, XP_PROP_2, TX_FIFO_SIZE); pdata->tx_max_fifo_size *= 16384; pdata->tx_max_fifo_size = min(pdata->tx_max_fifo_size, pdata->vdata->tx_max_fifo_size); pdata->rx_max_fifo_size = XP_GET_BITS(pdata->pp2, XP_PROP_2, RX_FIFO_SIZE); pdata->rx_max_fifo_size *= 16384; pdata->rx_max_fifo_size = min(pdata->rx_max_fifo_size, pdata->vdata->rx_max_fifo_size); DBGPR("max tx/rx max fifo size = %u/%u\n", pdata->tx_max_fifo_size, pdata->rx_max_fifo_size); /* Initialize IFLIB if_softc_ctx_t */ axgbe_init_iflib_softc_ctx(sc); /* Alloc channels */ if (axgbe_alloc_channels(ctx)) { axgbe_error("Unable to allocate channel memory\n"); return (ENOMEM); } TASK_INIT(&pdata->service_work, 0, xgbe_service, pdata); /* create the workqueue */ pdata->dev_workqueue = taskqueue_create("axgbe", M_WAITOK, taskqueue_thread_enqueue, &pdata->dev_workqueue); taskqueue_start_threads(&pdata->dev_workqueue, 1, PI_NET, "axgbe dev taskq"); /* Init timers */ xgbe_init_timers(pdata); return (0); } /* axgbe_if_attach_pre */ static void xgbe_init_all_fptrs(struct xgbe_prv_data *pdata) { xgbe_init_function_ptrs_dev(&pdata->hw_if); xgbe_init_function_ptrs_phy(&pdata->phy_if); xgbe_init_function_ptrs_i2c(&pdata->i2c_if); xgbe_init_function_ptrs_desc(&pdata->desc_if); pdata->vdata->init_function_ptrs_phy_impl(&pdata->phy_if); } static void axgbe_set_counts(if_ctx_t ctx) { struct axgbe_if_softc *sc = iflib_get_softc(ctx);; struct xgbe_prv_data *pdata = &sc->pdata; cpuset_t lcpus; int cpu_count, err; size_t len; /* Set all function pointers */ xgbe_init_all_fptrs(pdata); /* Populate the hardware features */ xgbe_get_all_hw_features(pdata); if (!pdata->tx_max_channel_count) pdata->tx_max_channel_count = pdata->hw_feat.tx_ch_cnt; if (!pdata->rx_max_channel_count) pdata->rx_max_channel_count = pdata->hw_feat.rx_ch_cnt; if (!pdata->tx_max_q_count) pdata->tx_max_q_count = pdata->hw_feat.tx_q_cnt; if (!pdata->rx_max_q_count) pdata->rx_max_q_count = pdata->hw_feat.rx_q_cnt; /* * Calculate the number of Tx and Rx rings to be created * -Tx (DMA) Channels map 1-to-1 to Tx Queues so set * the number of Tx queues to the number of Tx channels * enabled * -Rx (DMA) Channels do not map 1-to-1 so use the actual * number of Rx queues or maximum allowed */ /* Get cpu count from sysctl */ len = sizeof(cpu_count); err = kernel_sysctlbyname(curthread, "hw.ncpu", &cpu_count, &len, NULL, 0, NULL, 0); if (err) { axgbe_error("Unable to fetch number of cpus\n"); cpu_count = 1; } if (bus_get_cpus(pdata->dev, INTR_CPUS, sizeof(lcpus), &lcpus) != 0) { axgbe_error("Unable to fetch CPU list\n"); /* TODO - handle CPU_COPY(&all_cpus, &lcpus); */ } DBGPR("ncpu %d intrcpu %d\n", cpu_count, CPU_COUNT(&lcpus)); pdata->tx_ring_count = min(CPU_COUNT(&lcpus), pdata->hw_feat.tx_ch_cnt); pdata->tx_ring_count = min(pdata->tx_ring_count, pdata->tx_max_channel_count); pdata->tx_ring_count = min(pdata->tx_ring_count, pdata->tx_max_q_count); pdata->tx_q_count = pdata->tx_ring_count; pdata->rx_ring_count = min(CPU_COUNT(&lcpus), pdata->hw_feat.rx_ch_cnt); pdata->rx_ring_count = min(pdata->rx_ring_count, pdata->rx_max_channel_count); pdata->rx_q_count = min(pdata->hw_feat.rx_q_cnt, pdata->rx_max_q_count); DBGPR("TX/RX max channel count = %u/%u\n", pdata->tx_max_channel_count, pdata->rx_max_channel_count); DBGPR("TX/RX max queue count = %u/%u\n", pdata->tx_max_q_count, pdata->rx_max_q_count); DBGPR("TX/RX DMA ring count = %u/%u\n", pdata->tx_ring_count, pdata->rx_ring_count); DBGPR("TX/RX hardware queue count = %u/%u\n", pdata->tx_q_count, pdata->rx_q_count); } /* axgbe_set_counts */ static void axgbe_init_iflib_softc_ctx(struct axgbe_if_softc *sc) { struct xgbe_prv_data *pdata = &sc->pdata; if_softc_ctx_t scctx = sc->scctx; if_shared_ctx_t sctx = sc->sctx; int i; scctx->isc_nrxqsets = pdata->rx_q_count; scctx->isc_ntxqsets = pdata->tx_q_count; scctx->isc_msix_bar = pci_msix_table_bar(pdata->dev); scctx->isc_tx_nsegments = 32; for (i = 0; i < sctx->isc_ntxqs; i++) { scctx->isc_txqsizes[i] = roundup2(scctx->isc_ntxd[i] * sizeof(struct xgbe_ring_desc), 128); scctx->isc_txd_size[i] = sizeof(struct xgbe_ring_desc); } for (i = 0; i < sctx->isc_nrxqs; i++) { scctx->isc_rxqsizes[i] = roundup2(scctx->isc_nrxd[i] * sizeof(struct xgbe_ring_desc), 128); scctx->isc_rxd_size[i] = sizeof(struct xgbe_ring_desc); } scctx->isc_tx_tso_segments_max = 32; scctx->isc_tx_tso_size_max = XGBE_TSO_MAX_SIZE; scctx->isc_tx_tso_segsize_max = PAGE_SIZE; /* * Set capabilities * 1) IFLIB automatically adds IFCAP_HWSTATS, so need to set explicitly * 2) isc_tx_csum_flags is mandatory if IFCAP_TXCSUM (included in * IFCAP_HWCSUM) is set */ scctx->isc_tx_csum_flags = (CSUM_IP | CSUM_TCP | CSUM_UDP | CSUM_SCTP | CSUM_TCP_IPV6 | CSUM_UDP_IPV6 | CSUM_SCTP_IPV6 | CSUM_TSO); scctx->isc_capenable = (IFCAP_HWCSUM | IFCAP_HWCSUM_IPV6 | IFCAP_JUMBO_MTU | IFCAP_VLAN_MTU | IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWFILTER | IFCAP_VLAN_HWCSUM | IFCAP_TSO | IFCAP_VLAN_HWTSO); scctx->isc_capabilities = scctx->isc_capenable; /* * Set rss_table_size alone when adding RSS support. rss_table_mask * will be set by IFLIB based on rss_table_size */ scctx->isc_rss_table_size = XGBE_RSS_MAX_TABLE_SIZE; scctx->isc_ntxqsets_max = XGBE_MAX_QUEUES; scctx->isc_nrxqsets_max = XGBE_MAX_QUEUES; scctx->isc_txrx = &axgbe_txrx; } static int axgbe_alloc_channels(if_ctx_t ctx) { struct axgbe_if_softc *sc = iflib_get_softc(ctx); struct xgbe_prv_data *pdata = &sc->pdata; struct xgbe_channel *channel; int i, j, count; DBGPR("%s: txqs %d rxqs %d\n", __func__, pdata->tx_ring_count, pdata->rx_ring_count); /* Iflibe sets based on isc_ntxqsets/nrxqsets */ count = max_t(unsigned int, pdata->tx_ring_count, pdata->rx_ring_count); /* Allocate channel memory */ for (i = 0; i < count ; i++) { channel = (struct xgbe_channel*)malloc(sizeof(struct xgbe_channel), M_AXGBE, M_NOWAIT | M_ZERO); if (channel == NULL) { for (j = 0; j < i; j++) { free(pdata->channel[j], M_AXGBE); pdata->channel[j] = NULL; } return (ENOMEM); } pdata->channel[i] = channel; } pdata->total_channel_count = count; DBGPR("Channel count set to: %u\n", pdata->total_channel_count); for (i = 0; i < count; i++) { channel = pdata->channel[i]; snprintf(channel->name, sizeof(channel->name), "channel-%d",i); channel->pdata = pdata; channel->queue_index = i; channel->dma_tag = rman_get_bustag(pdata->xgmac_res); bus_space_subregion(channel->dma_tag, rman_get_bushandle(pdata->xgmac_res), DMA_CH_BASE + (DMA_CH_INC * i), DMA_CH_INC, &channel->dma_handle); channel->tx_ring = NULL; channel->rx_ring = NULL; } return (0); } /* axgbe_alloc_channels */ static void xgbe_service(void *ctx, int pending) { struct xgbe_prv_data *pdata = ctx; struct axgbe_if_softc *sc = (struct axgbe_if_softc *)pdata; bool prev_state = false; /* Get previous link status */ prev_state = pdata->phy.link; pdata->phy_if.phy_status(pdata); if (prev_state != pdata->phy.link) { pdata->phy_link = pdata->phy.link; axgbe_if_update_admin_status(sc->ctx); } callout_reset(&pdata->service_timer, 1*hz, xgbe_service_timer, pdata); } static void xgbe_service_timer(void *data) { struct xgbe_prv_data *pdata = data; taskqueue_enqueue(pdata->dev_workqueue, &pdata->service_work); } static void xgbe_init_timers(struct xgbe_prv_data *pdata) { callout_init(&pdata->service_timer, 1*hz); } static void xgbe_start_timers(struct xgbe_prv_data *pdata) { callout_reset(&pdata->service_timer, 1*hz, xgbe_service_timer, pdata); } static void xgbe_stop_timers(struct xgbe_prv_data *pdata) { callout_drain(&pdata->service_timer); callout_stop(&pdata->service_timer); } static void xgbe_dump_phy_registers(struct xgbe_prv_data *pdata) { axgbe_printf(1, "\n************* PHY Reg dump *********************\n"); axgbe_printf(1, "PCS Control Reg (%#06x) = %#06x\n", MDIO_CTRL1, XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_CTRL1)); axgbe_printf(1, "PCS Status Reg (%#06x) = %#06x\n", MDIO_STAT1, XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_STAT1)); axgbe_printf(1, "Phy Id (PHYS ID 1 %#06x)= %#06x\n", MDIO_DEVID1, XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_DEVID1)); axgbe_printf(1, "Phy Id (PHYS ID 2 %#06x)= %#06x\n", MDIO_DEVID2, XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_DEVID2)); axgbe_printf(1, "Devices in Package (%#06x)= %#06x\n", MDIO_DEVS1, XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_DEVS1)); axgbe_printf(1, "Devices in Package (%#06x)= %#06x\n", MDIO_DEVS2, XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_DEVS2)); axgbe_printf(1, "Auto-Neg Control Reg (%#06x) = %#06x\n", MDIO_CTRL1, XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_CTRL1)); axgbe_printf(1, "Auto-Neg Status Reg (%#06x) = %#06x\n", MDIO_STAT1, XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_STAT1)); axgbe_printf(1, "Auto-Neg Ad Reg 1 (%#06x) = %#06x\n", MDIO_AN_ADVERTISE, XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_AN_ADVERTISE)); axgbe_printf(1, "Auto-Neg Ad Reg 2 (%#06x) = %#06x\n", MDIO_AN_ADVERTISE + 1, XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_AN_ADVERTISE + 1)); axgbe_printf(1, "Auto-Neg Ad Reg 3 (%#06x) = %#06x\n", MDIO_AN_ADVERTISE + 2, XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_AN_ADVERTISE + 2)); axgbe_printf(1, "Auto-Neg Completion Reg (%#06x) = %#06x\n", MDIO_AN_COMP_STAT, XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_AN_COMP_STAT)); axgbe_printf(1, "\n************************************************\n"); } static void xgbe_dump_prop_registers(struct xgbe_prv_data *pdata) { int i; axgbe_printf(1, "\n************* PROP Reg dump ********************\n"); for (i = 0 ; i < 38 ; i++) { axgbe_printf(1, "PROP Offset 0x%08x = %08x\n", (XP_PROP_0 + (i * 4)), XP_IOREAD(pdata, (XP_PROP_0 + (i * 4)))); } } static void xgbe_dump_dma_registers(struct xgbe_prv_data *pdata, int ch) { struct xgbe_channel *channel; int i; axgbe_printf(1, "\n************* DMA Reg dump *********************\n"); axgbe_printf(1, "DMA MR Reg (%08x) = %08x\n", DMA_MR, XGMAC_IOREAD(pdata, DMA_MR)); axgbe_printf(1, "DMA SBMR Reg (%08x) = %08x\n", DMA_SBMR, XGMAC_IOREAD(pdata, DMA_SBMR)); axgbe_printf(1, "DMA ISR Reg (%08x) = %08x\n", DMA_ISR, XGMAC_IOREAD(pdata, DMA_ISR)); axgbe_printf(1, "DMA AXIARCR Reg (%08x) = %08x\n", DMA_AXIARCR, XGMAC_IOREAD(pdata, DMA_AXIARCR)); axgbe_printf(1, "DMA AXIAWCR Reg (%08x) = %08x\n", DMA_AXIAWCR, XGMAC_IOREAD(pdata, DMA_AXIAWCR)); axgbe_printf(1, "DMA AXIAWARCR Reg (%08x) = %08x\n", DMA_AXIAWARCR, XGMAC_IOREAD(pdata, DMA_AXIAWARCR)); axgbe_printf(1, "DMA DSR0 Reg (%08x) = %08x\n", DMA_DSR0, XGMAC_IOREAD(pdata, DMA_DSR0)); axgbe_printf(1, "DMA DSR1 Reg (%08x) = %08x\n", DMA_DSR1, XGMAC_IOREAD(pdata, DMA_DSR1)); axgbe_printf(1, "DMA DSR2 Reg (%08x) = %08x\n", DMA_DSR2, XGMAC_IOREAD(pdata, DMA_DSR2)); axgbe_printf(1, "DMA DSR3 Reg (%08x) = %08x\n", DMA_DSR3, XGMAC_IOREAD(pdata, DMA_DSR3)); axgbe_printf(1, "DMA DSR4 Reg (%08x) = %08x\n", DMA_DSR4, XGMAC_IOREAD(pdata, DMA_DSR4)); axgbe_printf(1, "DMA TXEDMACR Reg (%08x) = %08x\n", DMA_TXEDMACR, XGMAC_IOREAD(pdata, DMA_TXEDMACR)); axgbe_printf(1, "DMA RXEDMACR Reg (%08x) = %08x\n", DMA_RXEDMACR, XGMAC_IOREAD(pdata, DMA_RXEDMACR)); for (i = 0 ; i < 8 ; i++ ) { if (ch >= 0) { if (i != ch) continue; } channel = pdata->channel[i]; axgbe_printf(1, "\n************* DMA CH %d dump ****************\n", i); axgbe_printf(1, "DMA_CH_CR Reg (%08x) = %08x\n", DMA_CH_CR, XGMAC_DMA_IOREAD(channel, DMA_CH_CR)); axgbe_printf(1, "DMA_CH_TCR Reg (%08x) = %08x\n", DMA_CH_TCR, XGMAC_DMA_IOREAD(channel, DMA_CH_TCR)); axgbe_printf(1, "DMA_CH_RCR Reg (%08x) = %08x\n", DMA_CH_RCR, XGMAC_DMA_IOREAD(channel, DMA_CH_RCR)); axgbe_printf(1, "DMA_CH_TDLR_HI Reg (%08x) = %08x\n", DMA_CH_TDLR_HI, XGMAC_DMA_IOREAD(channel, DMA_CH_TDLR_HI)); axgbe_printf(1, "DMA_CH_TDLR_LO Reg (%08x) = %08x\n", DMA_CH_TDLR_LO, XGMAC_DMA_IOREAD(channel, DMA_CH_TDLR_LO)); axgbe_printf(1, "DMA_CH_RDLR_HI Reg (%08x) = %08x\n", DMA_CH_RDLR_HI, XGMAC_DMA_IOREAD(channel, DMA_CH_RDLR_HI)); axgbe_printf(1, "DMA_CH_RDLR_LO Reg (%08x) = %08x\n", DMA_CH_RDLR_LO, XGMAC_DMA_IOREAD(channel, DMA_CH_RDLR_LO)); axgbe_printf(1, "DMA_CH_TDTR_LO Reg (%08x) = %08x\n", DMA_CH_TDTR_LO, XGMAC_DMA_IOREAD(channel, DMA_CH_TDTR_LO)); axgbe_printf(1, "DMA_CH_RDTR_LO Reg (%08x) = %08x\n", DMA_CH_RDTR_LO, XGMAC_DMA_IOREAD(channel, DMA_CH_RDTR_LO)); axgbe_printf(1, "DMA_CH_TDRLR Reg (%08x) = %08x\n", DMA_CH_TDRLR, XGMAC_DMA_IOREAD(channel, DMA_CH_TDRLR)); axgbe_printf(1, "DMA_CH_RDRLR Reg (%08x) = %08x\n", DMA_CH_RDRLR, XGMAC_DMA_IOREAD(channel, DMA_CH_RDRLR)); axgbe_printf(1, "DMA_CH_IER Reg (%08x) = %08x\n", DMA_CH_IER, XGMAC_DMA_IOREAD(channel, DMA_CH_IER)); axgbe_printf(1, "DMA_CH_RIWT Reg (%08x) = %08x\n", DMA_CH_RIWT, XGMAC_DMA_IOREAD(channel, DMA_CH_RIWT)); axgbe_printf(1, "DMA_CH_CATDR_LO Reg (%08x) = %08x\n", DMA_CH_CATDR_LO, XGMAC_DMA_IOREAD(channel, DMA_CH_CATDR_LO)); axgbe_printf(1, "DMA_CH_CARDR_LO Reg (%08x) = %08x\n", DMA_CH_CARDR_LO, XGMAC_DMA_IOREAD(channel, DMA_CH_CARDR_LO)); axgbe_printf(1, "DMA_CH_CATBR_HI Reg (%08x) = %08x\n", DMA_CH_CATBR_HI, XGMAC_DMA_IOREAD(channel, DMA_CH_CATBR_HI)); axgbe_printf(1, "DMA_CH_CATBR_LO Reg (%08x) = %08x\n", DMA_CH_CATBR_LO, XGMAC_DMA_IOREAD(channel, DMA_CH_CATBR_LO)); axgbe_printf(1, "DMA_CH_CARBR_HI Reg (%08x) = %08x\n", DMA_CH_CARBR_HI, XGMAC_DMA_IOREAD(channel, DMA_CH_CARBR_HI)); axgbe_printf(1, "DMA_CH_CARBR_LO Reg (%08x) = %08x\n", DMA_CH_CARBR_LO, XGMAC_DMA_IOREAD(channel, DMA_CH_CARBR_LO)); axgbe_printf(1, "DMA_CH_SR Reg (%08x) = %08x\n", DMA_CH_SR, XGMAC_DMA_IOREAD(channel, DMA_CH_SR)); axgbe_printf(1, "DMA_CH_DSR Reg (%08x) = %08x\n", DMA_CH_DSR, XGMAC_DMA_IOREAD(channel, DMA_CH_DSR)); axgbe_printf(1, "DMA_CH_DCFL Reg (%08x) = %08x\n", DMA_CH_DCFL, XGMAC_DMA_IOREAD(channel, DMA_CH_DCFL)); axgbe_printf(1, "DMA_CH_MFC Reg (%08x) = %08x\n", DMA_CH_MFC, XGMAC_DMA_IOREAD(channel, DMA_CH_MFC)); axgbe_printf(1, "DMA_CH_TDTRO Reg (%08x) = %08x\n", DMA_CH_TDTRO, XGMAC_DMA_IOREAD(channel, DMA_CH_TDTRO)); axgbe_printf(1, "DMA_CH_RDTRO Reg (%08x) = %08x\n", DMA_CH_RDTRO, XGMAC_DMA_IOREAD(channel, DMA_CH_RDTRO)); axgbe_printf(1, "DMA_CH_TDWRO Reg (%08x) = %08x\n", DMA_CH_TDWRO, XGMAC_DMA_IOREAD(channel, DMA_CH_TDWRO)); axgbe_printf(1, "DMA_CH_RDWRO Reg (%08x) = %08x\n", DMA_CH_RDWRO, XGMAC_DMA_IOREAD(channel, DMA_CH_RDWRO)); } } static void xgbe_dump_mtl_registers(struct xgbe_prv_data *pdata) { int i; axgbe_printf(1, "\n************* MTL Reg dump *********************\n"); axgbe_printf(1, "MTL OMR Reg (%08x) = %08x\n", MTL_OMR, XGMAC_IOREAD(pdata, MTL_OMR)); axgbe_printf(1, "MTL FDCR Reg (%08x) = %08x\n", MTL_FDCR, XGMAC_IOREAD(pdata, MTL_FDCR)); axgbe_printf(1, "MTL FDSR Reg (%08x) = %08x\n", MTL_FDSR, XGMAC_IOREAD(pdata, MTL_FDSR)); axgbe_printf(1, "MTL FDDR Reg (%08x) = %08x\n", MTL_FDDR, XGMAC_IOREAD(pdata, MTL_FDDR)); axgbe_printf(1, "MTL ISR Reg (%08x) = %08x\n", MTL_ISR, XGMAC_IOREAD(pdata, MTL_ISR)); axgbe_printf(1, "MTL RQDCM0R Reg (%08x) = %08x\n", MTL_RQDCM0R, XGMAC_IOREAD(pdata, MTL_RQDCM0R)); axgbe_printf(1, "MTL RQDCM1R Reg (%08x) = %08x\n", MTL_RQDCM1R, XGMAC_IOREAD(pdata, MTL_RQDCM1R)); axgbe_printf(1, "MTL RQDCM2R Reg (%08x) = %08x\n", MTL_RQDCM2R, XGMAC_IOREAD(pdata, MTL_RQDCM2R)); axgbe_printf(1, "MTL TCPM0R Reg (%08x) = %08x\n", MTL_TCPM0R, XGMAC_IOREAD(pdata, MTL_TCPM0R)); axgbe_printf(1, "MTL TCPM1R Reg (%08x) = %08x\n", MTL_TCPM1R, XGMAC_IOREAD(pdata, MTL_TCPM1R)); for (i = 0 ; i < 8 ; i++ ) { axgbe_printf(1, "\n************* MTL CH %d dump ****************\n", i); axgbe_printf(1, "MTL_Q_TQOMR Reg (%08x) = %08x\n", MTL_Q_TQOMR, XGMAC_MTL_IOREAD(pdata, i, MTL_Q_TQOMR)); axgbe_printf(1, "MTL_Q_TQUR Reg (%08x) = %08x\n", MTL_Q_TQUR, XGMAC_MTL_IOREAD(pdata, i, MTL_Q_TQUR)); axgbe_printf(1, "MTL_Q_TQDR Reg (%08x) = %08x\n", MTL_Q_TQDR, XGMAC_MTL_IOREAD(pdata, i, MTL_Q_TQDR)); axgbe_printf(1, "MTL_Q_TC0ETSCR Reg (%08x) = %08x\n", MTL_Q_TC0ETSCR, XGMAC_MTL_IOREAD(pdata, i, MTL_Q_TC0ETSCR)); axgbe_printf(1, "MTL_Q_TC0ETSSR Reg (%08x) = %08x\n", MTL_Q_TC0ETSSR, XGMAC_MTL_IOREAD(pdata, i, MTL_Q_TC0ETSSR)); axgbe_printf(1, "MTL_Q_TC0QWR Reg (%08x) = %08x\n", MTL_Q_TC0QWR, XGMAC_MTL_IOREAD(pdata, i, MTL_Q_TC0QWR)); axgbe_printf(1, "MTL_Q_RQOMR Reg (%08x) = %08x\n", MTL_Q_RQOMR, XGMAC_MTL_IOREAD(pdata, i, MTL_Q_RQOMR)); axgbe_printf(1, "MTL_Q_RQMPOCR Reg (%08x) = %08x\n", MTL_Q_RQMPOCR, XGMAC_MTL_IOREAD(pdata, i, MTL_Q_RQMPOCR)); axgbe_printf(1, "MTL_Q_RQDR Reg (%08x) = %08x\n", MTL_Q_RQDR, XGMAC_MTL_IOREAD(pdata, i, MTL_Q_RQDR)); axgbe_printf(1, "MTL_Q_RQCR Reg (%08x) = %08x\n", MTL_Q_RQCR, XGMAC_MTL_IOREAD(pdata, i, MTL_Q_RQCR)); axgbe_printf(1, "MTL_Q_RQFCR Reg (%08x) = %08x\n", MTL_Q_RQFCR, XGMAC_MTL_IOREAD(pdata, i, MTL_Q_RQFCR)); axgbe_printf(1, "MTL_Q_IER Reg (%08x) = %08x\n", MTL_Q_IER, XGMAC_MTL_IOREAD(pdata, i, MTL_Q_IER)); axgbe_printf(1, "MTL_Q_ISR Reg (%08x) = %08x\n", MTL_Q_ISR, XGMAC_MTL_IOREAD(pdata, i, MTL_Q_ISR)); } } static void xgbe_dump_mac_registers(struct xgbe_prv_data *pdata) { axgbe_printf(1, "\n************* MAC Reg dump **********************\n"); axgbe_printf(1, "MAC TCR Reg (%08x) = %08x\n", MAC_TCR, XGMAC_IOREAD(pdata, MAC_TCR)); axgbe_printf(1, "MAC RCR Reg (%08x) = %08x\n", MAC_RCR, XGMAC_IOREAD(pdata, MAC_RCR)); axgbe_printf(1, "MAC PFR Reg (%08x) = %08x\n", MAC_PFR, XGMAC_IOREAD(pdata, MAC_PFR)); axgbe_printf(1, "MAC WTR Reg (%08x) = %08x\n", MAC_WTR, XGMAC_IOREAD(pdata, MAC_WTR)); axgbe_printf(1, "MAC HTR0 Reg (%08x) = %08x\n", MAC_HTR0, XGMAC_IOREAD(pdata, MAC_HTR0)); axgbe_printf(1, "MAC HTR1 Reg (%08x) = %08x\n", MAC_HTR1, XGMAC_IOREAD(pdata, MAC_HTR1)); axgbe_printf(1, "MAC HTR2 Reg (%08x) = %08x\n", MAC_HTR2, XGMAC_IOREAD(pdata, MAC_HTR2)); axgbe_printf(1, "MAC HTR3 Reg (%08x) = %08x\n", MAC_HTR3, XGMAC_IOREAD(pdata, MAC_HTR3)); axgbe_printf(1, "MAC HTR4 Reg (%08x) = %08x\n", MAC_HTR4, XGMAC_IOREAD(pdata, MAC_HTR4)); axgbe_printf(1, "MAC HTR5 Reg (%08x) = %08x\n", MAC_HTR5, XGMAC_IOREAD(pdata, MAC_HTR5)); axgbe_printf(1, "MAC HTR6 Reg (%08x) = %08x\n", MAC_HTR6, XGMAC_IOREAD(pdata, MAC_HTR6)); axgbe_printf(1, "MAC HTR7 Reg (%08x) = %08x\n", MAC_HTR7, XGMAC_IOREAD(pdata, MAC_HTR7)); axgbe_printf(1, "MAC VLANTR Reg (%08x) = %08x\n", MAC_VLANTR, XGMAC_IOREAD(pdata, MAC_VLANTR)); axgbe_printf(1, "MAC VLANHTR Reg (%08x) = %08x\n", MAC_VLANHTR, XGMAC_IOREAD(pdata, MAC_VLANHTR)); axgbe_printf(1, "MAC VLANIR Reg (%08x) = %08x\n", MAC_VLANIR, XGMAC_IOREAD(pdata, MAC_VLANIR)); axgbe_printf(1, "MAC IVLANIR Reg (%08x) = %08x\n", MAC_IVLANIR, XGMAC_IOREAD(pdata, MAC_IVLANIR)); axgbe_printf(1, "MAC RETMR Reg (%08x) = %08x\n", MAC_RETMR, XGMAC_IOREAD(pdata, MAC_RETMR)); axgbe_printf(1, "MAC Q0TFCR Reg (%08x) = %08x\n", MAC_Q0TFCR, XGMAC_IOREAD(pdata, MAC_Q0TFCR)); axgbe_printf(1, "MAC Q1TFCR Reg (%08x) = %08x\n", MAC_Q1TFCR, XGMAC_IOREAD(pdata, MAC_Q1TFCR)); axgbe_printf(1, "MAC Q2TFCR Reg (%08x) = %08x\n", MAC_Q2TFCR, XGMAC_IOREAD(pdata, MAC_Q2TFCR)); axgbe_printf(1, "MAC Q3TFCR Reg (%08x) = %08x\n", MAC_Q3TFCR, XGMAC_IOREAD(pdata, MAC_Q3TFCR)); axgbe_printf(1, "MAC Q4TFCR Reg (%08x) = %08x\n", MAC_Q4TFCR, XGMAC_IOREAD(pdata, MAC_Q4TFCR)); axgbe_printf(1, "MAC Q5TFCR Reg (%08x) = %08x\n", MAC_Q5TFCR, XGMAC_IOREAD(pdata, MAC_Q5TFCR)); axgbe_printf(1, "MAC Q6TFCR Reg (%08x) = %08x\n", MAC_Q6TFCR, XGMAC_IOREAD(pdata, MAC_Q6TFCR)); axgbe_printf(1, "MAC Q7TFCR Reg (%08x) = %08x\n", MAC_Q7TFCR, XGMAC_IOREAD(pdata, MAC_Q7TFCR)); axgbe_printf(1, "MAC RFCR Reg (%08x) = %08x\n", MAC_RFCR, XGMAC_IOREAD(pdata, MAC_RFCR)); axgbe_printf(1, "MAC RQC0R Reg (%08x) = %08x\n", MAC_RQC0R, XGMAC_IOREAD(pdata, MAC_RQC0R)); axgbe_printf(1, "MAC RQC1R Reg (%08x) = %08x\n", MAC_RQC1R, XGMAC_IOREAD(pdata, MAC_RQC1R)); axgbe_printf(1, "MAC RQC2R Reg (%08x) = %08x\n", MAC_RQC2R, XGMAC_IOREAD(pdata, MAC_RQC2R)); axgbe_printf(1, "MAC RQC3R Reg (%08x) = %08x\n", MAC_RQC3R, XGMAC_IOREAD(pdata, MAC_RQC3R)); axgbe_printf(1, "MAC ISR Reg (%08x) = %08x\n", MAC_ISR, XGMAC_IOREAD(pdata, MAC_ISR)); axgbe_printf(1, "MAC IER Reg (%08x) = %08x\n", MAC_IER, XGMAC_IOREAD(pdata, MAC_IER)); axgbe_printf(1, "MAC RTSR Reg (%08x) = %08x\n", MAC_RTSR, XGMAC_IOREAD(pdata, MAC_RTSR)); axgbe_printf(1, "MAC PMTCSR Reg (%08x) = %08x\n", MAC_PMTCSR, XGMAC_IOREAD(pdata, MAC_PMTCSR)); axgbe_printf(1, "MAC RWKPFR Reg (%08x) = %08x\n", MAC_RWKPFR, XGMAC_IOREAD(pdata, MAC_RWKPFR)); axgbe_printf(1, "MAC LPICSR Reg (%08x) = %08x\n", MAC_LPICSR, XGMAC_IOREAD(pdata, MAC_LPICSR)); axgbe_printf(1, "MAC LPITCR Reg (%08x) = %08x\n", MAC_LPITCR, XGMAC_IOREAD(pdata, MAC_LPITCR)); axgbe_printf(1, "MAC TIR Reg (%08x) = %08x\n", MAC_TIR, XGMAC_IOREAD(pdata, MAC_TIR)); axgbe_printf(1, "MAC VR Reg (%08x) = %08x\n", MAC_VR, XGMAC_IOREAD(pdata, MAC_VR)); axgbe_printf(1, "MAC DR Reg (%08x) = %08x\n", MAC_DR, XGMAC_IOREAD(pdata, MAC_DR)); axgbe_printf(1, "MAC HWF0R Reg (%08x) = %08x\n", MAC_HWF0R, XGMAC_IOREAD(pdata, MAC_HWF0R)); axgbe_printf(1, "MAC HWF1R Reg (%08x) = %08x\n", MAC_HWF1R, XGMAC_IOREAD(pdata, MAC_HWF1R)); axgbe_printf(1, "MAC HWF2R Reg (%08x) = %08x\n", MAC_HWF2R, XGMAC_IOREAD(pdata, MAC_HWF2R)); axgbe_printf(1, "MAC MDIOSCAR Reg (%08x) = %08x\n", MAC_MDIOSCAR, XGMAC_IOREAD(pdata, MAC_MDIOSCAR)); axgbe_printf(1, "MAC MDIOSCCDR Reg (%08x) = %08x\n", MAC_MDIOSCCDR, XGMAC_IOREAD(pdata, MAC_MDIOSCCDR)); axgbe_printf(1, "MAC MDIOISR Reg (%08x) = %08x\n", MAC_MDIOISR, XGMAC_IOREAD(pdata, MAC_MDIOISR)); axgbe_printf(1, "MAC MDIOIER Reg (%08x) = %08x\n", MAC_MDIOIER, XGMAC_IOREAD(pdata, MAC_MDIOIER)); axgbe_printf(1, "MAC MDIOCL22R Reg (%08x) = %08x\n", MAC_MDIOCL22R, XGMAC_IOREAD(pdata, MAC_MDIOCL22R)); axgbe_printf(1, "MAC GPIOCR Reg (%08x) = %08x\n", MAC_GPIOCR, XGMAC_IOREAD(pdata, MAC_GPIOCR)); axgbe_printf(1, "MAC GPIOSR Reg (%08x) = %08x\n", MAC_GPIOSR, XGMAC_IOREAD(pdata, MAC_GPIOSR)); axgbe_printf(1, "MAC MACA0HR Reg (%08x) = %08x\n", MAC_MACA0HR, XGMAC_IOREAD(pdata, MAC_MACA0HR)); axgbe_printf(1, "MAC MACA0LR Reg (%08x) = %08x\n", MAC_TCR, XGMAC_IOREAD(pdata, MAC_MACA0LR)); axgbe_printf(1, "MAC MACA1HR Reg (%08x) = %08x\n", MAC_MACA1HR, XGMAC_IOREAD(pdata, MAC_MACA1HR)); axgbe_printf(1, "MAC MACA1LR Reg (%08x) = %08x\n", MAC_MACA1LR, XGMAC_IOREAD(pdata, MAC_MACA1LR)); axgbe_printf(1, "MAC RSSCR Reg (%08x) = %08x\n", MAC_RSSCR, XGMAC_IOREAD(pdata, MAC_RSSCR)); axgbe_printf(1, "MAC RSSDR Reg (%08x) = %08x\n", MAC_RSSDR, XGMAC_IOREAD(pdata, MAC_RSSDR)); axgbe_printf(1, "MAC RSSAR Reg (%08x) = %08x\n", MAC_RSSAR, XGMAC_IOREAD(pdata, MAC_RSSAR)); axgbe_printf(1, "MAC TSCR Reg (%08x) = %08x\n", MAC_TSCR, XGMAC_IOREAD(pdata, MAC_TSCR)); axgbe_printf(1, "MAC SSIR Reg (%08x) = %08x\n", MAC_SSIR, XGMAC_IOREAD(pdata, MAC_SSIR)); axgbe_printf(1, "MAC STSR Reg (%08x) = %08x\n", MAC_STSR, XGMAC_IOREAD(pdata, MAC_STSR)); axgbe_printf(1, "MAC STNR Reg (%08x) = %08x\n", MAC_STNR, XGMAC_IOREAD(pdata, MAC_STNR)); axgbe_printf(1, "MAC STSUR Reg (%08x) = %08x\n", MAC_STSUR, XGMAC_IOREAD(pdata, MAC_STSUR)); axgbe_printf(1, "MAC STNUR Reg (%08x) = %08x\n", MAC_STNUR, XGMAC_IOREAD(pdata, MAC_STNUR)); axgbe_printf(1, "MAC TSAR Reg (%08x) = %08x\n", MAC_TSAR, XGMAC_IOREAD(pdata, MAC_TSAR)); axgbe_printf(1, "MAC TSSR Reg (%08x) = %08x\n", MAC_TSSR, XGMAC_IOREAD(pdata, MAC_TSSR)); axgbe_printf(1, "MAC TXSNR Reg (%08x) = %08x\n", MAC_TXSNR, XGMAC_IOREAD(pdata, MAC_TXSNR)); axgbe_printf(1, "MAC TXSSR Reg (%08x) = %08x\n", MAC_TXSSR, XGMAC_IOREAD(pdata, MAC_TXSSR)); } static void xgbe_dump_rmon_counters(struct xgbe_prv_data *pdata) { struct xgbe_mmc_stats *stats = &pdata->mmc_stats; axgbe_printf(1, "\n************* RMON counters dump ***************\n"); pdata->hw_if.read_mmc_stats(pdata); axgbe_printf(1, "rmon txoctetcount_gb (%08x) = %08lx\n", MMC_TXOCTETCOUNT_GB_LO, stats->txoctetcount_gb); axgbe_printf(1, "rmon txframecount_gb (%08x) = %08lx\n", MMC_TXFRAMECOUNT_GB_LO, stats->txframecount_gb); axgbe_printf(1, "rmon txbroadcastframes_g (%08x) = %08lx\n", MMC_TXBROADCASTFRAMES_G_LO, stats->txbroadcastframes_g); axgbe_printf(1, "rmon txmulticastframes_g (%08x) = %08lx\n", MMC_TXMULTICASTFRAMES_G_LO, stats->txmulticastframes_g); axgbe_printf(1, "rmon tx64octets_gb (%08x) = %08lx\n", MMC_TX64OCTETS_GB_LO, stats->tx64octets_gb); axgbe_printf(1, "rmon tx65to127octets_gb (%08x) = %08lx\n", MMC_TX65TO127OCTETS_GB_LO, stats->tx65to127octets_gb); axgbe_printf(1, "rmon tx128to255octets_gb (%08x) = %08lx\n", MMC_TX128TO255OCTETS_GB_LO, stats->tx128to255octets_gb); axgbe_printf(1, "rmon tx256to511octets_gb (%08x) = %08lx\n", MMC_TX256TO511OCTETS_GB_LO, stats->tx256to511octets_gb); axgbe_printf(1, "rmon tx512to1023octets_gb (%08x) = %08lx\n", MMC_TX512TO1023OCTETS_GB_LO, stats->tx512to1023octets_gb); axgbe_printf(1, "rmon tx1024tomaxoctets_gb (%08x) = %08lx\n", MMC_TX1024TOMAXOCTETS_GB_LO, stats->tx1024tomaxoctets_gb); axgbe_printf(1, "rmon txunicastframes_gb (%08x) = %08lx\n", MMC_TXUNICASTFRAMES_GB_LO, stats->txunicastframes_gb); axgbe_printf(1, "rmon txmulticastframes_gb (%08x) = %08lx\n", MMC_TXMULTICASTFRAMES_GB_LO, stats->txmulticastframes_gb); axgbe_printf(1, "rmon txbroadcastframes_gb (%08x) = %08lx\n", MMC_TXBROADCASTFRAMES_GB_LO, stats->txbroadcastframes_gb); axgbe_printf(1, "rmon txunderflowerror (%08x) = %08lx\n", MMC_TXUNDERFLOWERROR_LO, stats->txunderflowerror); axgbe_printf(1, "rmon txoctetcount_g (%08x) = %08lx\n", MMC_TXOCTETCOUNT_G_LO, stats->txoctetcount_g); axgbe_printf(1, "rmon txframecount_g (%08x) = %08lx\n", MMC_TXFRAMECOUNT_G_LO, stats->txframecount_g); axgbe_printf(1, "rmon txpauseframes (%08x) = %08lx\n", MMC_TXPAUSEFRAMES_LO, stats->txpauseframes); axgbe_printf(1, "rmon txvlanframes_g (%08x) = %08lx\n", MMC_TXVLANFRAMES_G_LO, stats->txvlanframes_g); axgbe_printf(1, "rmon rxframecount_gb (%08x) = %08lx\n", MMC_RXFRAMECOUNT_GB_LO, stats->rxframecount_gb); axgbe_printf(1, "rmon rxoctetcount_gb (%08x) = %08lx\n", MMC_RXOCTETCOUNT_GB_LO, stats->rxoctetcount_gb); axgbe_printf(1, "rmon rxoctetcount_g (%08x) = %08lx\n", MMC_RXOCTETCOUNT_G_LO, stats->rxoctetcount_g); axgbe_printf(1, "rmon rxbroadcastframes_g (%08x) = %08lx\n", MMC_RXBROADCASTFRAMES_G_LO, stats->rxbroadcastframes_g); axgbe_printf(1, "rmon rxmulticastframes_g (%08x) = %08lx\n", MMC_RXMULTICASTFRAMES_G_LO, stats->rxmulticastframes_g); axgbe_printf(1, "rmon rxcrcerror (%08x) = %08lx\n", MMC_RXCRCERROR_LO, stats->rxcrcerror); axgbe_printf(1, "rmon rxrunterror (%08x) = %08lx\n", MMC_RXRUNTERROR, stats->rxrunterror); axgbe_printf(1, "rmon rxjabbererror (%08x) = %08lx\n", MMC_RXJABBERERROR, stats->rxjabbererror); axgbe_printf(1, "rmon rxundersize_g (%08x) = %08lx\n", MMC_RXUNDERSIZE_G, stats->rxundersize_g); axgbe_printf(1, "rmon rxoversize_g (%08x) = %08lx\n", MMC_RXOVERSIZE_G, stats->rxoversize_g); axgbe_printf(1, "rmon rx64octets_gb (%08x) = %08lx\n", MMC_RX64OCTETS_GB_LO, stats->rx64octets_gb); axgbe_printf(1, "rmon rx65to127octets_gb (%08x) = %08lx\n", MMC_RX65TO127OCTETS_GB_LO, stats->rx65to127octets_gb); axgbe_printf(1, "rmon rx128to255octets_gb (%08x) = %08lx\n", MMC_RX128TO255OCTETS_GB_LO, stats->rx128to255octets_gb); axgbe_printf(1, "rmon rx256to511octets_gb (%08x) = %08lx\n", MMC_RX256TO511OCTETS_GB_LO, stats->rx256to511octets_gb); axgbe_printf(1, "rmon rx512to1023octets_gb (%08x) = %08lx\n", MMC_RX512TO1023OCTETS_GB_LO, stats->rx512to1023octets_gb); axgbe_printf(1, "rmon rx1024tomaxoctets_gb (%08x) = %08lx\n", MMC_RX1024TOMAXOCTETS_GB_LO, stats->rx1024tomaxoctets_gb); axgbe_printf(1, "rmon rxunicastframes_g (%08x) = %08lx\n", MMC_RXUNICASTFRAMES_G_LO, stats->rxunicastframes_g); axgbe_printf(1, "rmon rxlengtherror (%08x) = %08lx\n", MMC_RXLENGTHERROR_LO, stats->rxlengtherror); axgbe_printf(1, "rmon rxoutofrangetype (%08x) = %08lx\n", MMC_RXOUTOFRANGETYPE_LO, stats->rxoutofrangetype); axgbe_printf(1, "rmon rxpauseframes (%08x) = %08lx\n", MMC_RXPAUSEFRAMES_LO, stats->rxpauseframes); axgbe_printf(1, "rmon rxfifooverflow (%08x) = %08lx\n", MMC_RXFIFOOVERFLOW_LO, stats->rxfifooverflow); axgbe_printf(1, "rmon rxvlanframes_gb (%08x) = %08lx\n", MMC_RXVLANFRAMES_GB_LO, stats->rxvlanframes_gb); axgbe_printf(1, "rmon rxwatchdogerror (%08x) = %08lx\n", MMC_RXWATCHDOGERROR, stats->rxwatchdogerror); } void xgbe_dump_i2c_registers(struct xgbe_prv_data *pdata) { axgbe_printf(1, "*************** I2C Registers **************\n"); axgbe_printf(1, " IC_CON : %010x\n", XI2C_IOREAD(pdata, 0x00)); axgbe_printf(1, " IC_TAR : %010x\n", XI2C_IOREAD(pdata, 0x04)); axgbe_printf(1, " IC_HS_MADDR : %010x\n", XI2C_IOREAD(pdata, 0x0c)); axgbe_printf(1, " IC_INTR_STAT : %010x\n", XI2C_IOREAD(pdata, 0x2c)); axgbe_printf(1, " IC_INTR_MASK : %010x\n", XI2C_IOREAD(pdata, 0x30)); axgbe_printf(1, " IC_RAW_INTR_STAT : %010x\n", XI2C_IOREAD(pdata, 0x34)); axgbe_printf(1, " IC_RX_TL : %010x\n", XI2C_IOREAD(pdata, 0x38)); axgbe_printf(1, " IC_TX_TL : %010x\n", XI2C_IOREAD(pdata, 0x3c)); axgbe_printf(1, " IC_ENABLE : %010x\n", XI2C_IOREAD(pdata, 0x6c)); axgbe_printf(1, " IC_STATUS : %010x\n", XI2C_IOREAD(pdata, 0x70)); axgbe_printf(1, " IC_TXFLR : %010x\n", XI2C_IOREAD(pdata, 0x74)); axgbe_printf(1, " IC_RXFLR : %010x\n", XI2C_IOREAD(pdata, 0x78)); axgbe_printf(1, " IC_ENABLE_STATUS : %010x\n", XI2C_IOREAD(pdata, 0x9c)); axgbe_printf(1, " IC_COMP_PARAM1 : %010x\n", XI2C_IOREAD(pdata, 0xf4)); } static void xgbe_dump_active_vlans(struct xgbe_prv_data *pdata) { int i; for(i=0 ; iactive_vlans[i]); } axgbe_printf(1, "\n"); } static void xgbe_default_config(struct xgbe_prv_data *pdata) { pdata->blen = DMA_SBMR_BLEN_64; pdata->pbl = DMA_PBL_128; pdata->aal = 1; pdata->rd_osr_limit = 8; pdata->wr_osr_limit = 8; pdata->tx_sf_mode = MTL_TSF_ENABLE; pdata->tx_threshold = MTL_TX_THRESHOLD_64; pdata->tx_osp_mode = DMA_OSP_ENABLE; pdata->rx_sf_mode = MTL_RSF_DISABLE; pdata->rx_threshold = MTL_RX_THRESHOLD_64; pdata->pause_autoneg = 1; pdata->tx_pause = 1; pdata->rx_pause = 1; pdata->phy_speed = SPEED_UNKNOWN; pdata->power_down = 0; pdata->enable_rss = 1; } static void axgbe_setup_sysctl(struct xgbe_prv_data *pdata) { struct sysctl_ctx_list *clist; struct sysctl_oid *parent; struct sysctl_oid_list *top; clist = device_get_sysctl_ctx(pdata->dev); parent = device_get_sysctl_tree(pdata->dev); top = SYSCTL_CHILDREN(parent); } static int axgbe_if_attach_post(if_ctx_t ctx) { struct axgbe_if_softc *sc = iflib_get_softc(ctx); struct xgbe_prv_data *pdata = &sc->pdata; struct ifnet *ifp = pdata->netdev; struct xgbe_phy_if *phy_if = &pdata->phy_if; struct xgbe_hw_if *hw_if = &pdata->hw_if; if_softc_ctx_t scctx = sc->scctx; int i, ret; /* Initialize ECC timestamps */ pdata->tx_sec_period = ticks; pdata->tx_ded_period = ticks; pdata->rx_sec_period = ticks; pdata->rx_ded_period = ticks; pdata->desc_sec_period = ticks; pdata->desc_ded_period = ticks; /* Reset the hardware */ ret = hw_if->exit(&sc->pdata); if (ret) axgbe_error("%s: exit error %d\n", __func__, ret); /* Configure the defaults */ xgbe_default_config(pdata); /* Set default max values if not provided */ if (!pdata->tx_max_fifo_size) pdata->tx_max_fifo_size = pdata->hw_feat.tx_fifo_size; if (!pdata->rx_max_fifo_size) pdata->rx_max_fifo_size = pdata->hw_feat.rx_fifo_size; DBGPR("%s: tx fifo 0x%x rx fifo 0x%x\n", __func__, pdata->tx_max_fifo_size, pdata->rx_max_fifo_size); /* Set and validate the number of descriptors for a ring */ MPASS(powerof2(XGBE_TX_DESC_CNT)); pdata->tx_desc_count = XGBE_TX_DESC_CNT; MPASS(powerof2(XGBE_RX_DESC_CNT)); pdata->rx_desc_count = XGBE_RX_DESC_CNT; /* Adjust the number of queues based on interrupts assigned */ if (pdata->channel_irq_count) { pdata->tx_ring_count = min_t(unsigned int, pdata->tx_ring_count, pdata->channel_irq_count); pdata->rx_ring_count = min_t(unsigned int, pdata->rx_ring_count, pdata->channel_irq_count); DBGPR("adjusted TX %u/%u RX %u/%u\n", pdata->tx_ring_count, pdata->tx_q_count, pdata->rx_ring_count, pdata->rx_q_count); } /* Set channel count based on interrupts assigned */ pdata->channel_count = max_t(unsigned int, scctx->isc_ntxqsets, scctx->isc_nrxqsets); DBGPR("Channel count set to: %u\n", pdata->channel_count); /* Get RSS key */ #ifdef RSS rss_getkey((uint8_t *)pdata->rss_key); #else arc4rand(&pdata->rss_key, ARRAY_SIZE(pdata->rss_key), 0); #endif XGMAC_SET_BITS(pdata->rss_options, MAC_RSSCR, IP2TE, 1); XGMAC_SET_BITS(pdata->rss_options, MAC_RSSCR, TCP4TE, 1); XGMAC_SET_BITS(pdata->rss_options, MAC_RSSCR, UDP4TE, 1); /* Initialize the PHY device */ pdata->sysctl_an_cdr_workaround = pdata->vdata->an_cdr_workaround; phy_if->phy_init(pdata); /* Set the coalescing */ xgbe_init_rx_coalesce(&sc->pdata); xgbe_init_tx_coalesce(&sc->pdata); ifmedia_add(sc->media, IFM_ETHER | IFM_10G_KR, 0, NULL); ifmedia_add(sc->media, IFM_ETHER | IFM_10G_T, 0, NULL); ifmedia_add(sc->media, IFM_ETHER | IFM_10G_SFI, 0, NULL); ifmedia_add(sc->media, IFM_ETHER | IFM_1000_KX, 0, NULL); ifmedia_add(sc->media, IFM_ETHER | IFM_1000_CX, 0, NULL); ifmedia_add(sc->media, IFM_ETHER | IFM_1000_LX, 0, NULL); ifmedia_add(sc->media, IFM_ETHER | IFM_1000_SX, 0, NULL); ifmedia_add(sc->media, IFM_ETHER | IFM_1000_T, 0, NULL); ifmedia_add(sc->media, IFM_ETHER | IFM_1000_SGMII, 0, NULL); ifmedia_add(sc->media, IFM_ETHER | IFM_100_TX, 0, NULL); ifmedia_add(sc->media, IFM_ETHER | IFM_100_SGMII, 0, NULL); ifmedia_add(sc->media, IFM_ETHER | IFM_AUTO, 0, NULL); ifmedia_set(sc->media, IFM_ETHER | IFM_AUTO); /* Initialize the phy */ pdata->phy_link = -1; pdata->phy_speed = SPEED_UNKNOWN; ret = phy_if->phy_reset(pdata); if (ret) return (ret); /* Calculate the Rx buffer size before allocating rings */ ret = xgbe_calc_rx_buf_size(pdata->netdev, if_getmtu(pdata->netdev)); pdata->rx_buf_size = ret; DBGPR("%s: rx_buf_size %d\n", __func__, ret); /* Setup RSS lookup table */ for (i = 0; i < XGBE_RSS_MAX_TABLE_SIZE; i++) XGMAC_SET_BITS(pdata->rss_table[i], MAC_RSSDR, DMCH, i % pdata->rx_ring_count); /* * Mark the device down until it is initialized, which happens * when the device is accessed first (for configuring the iface, * eg: setting IP) */ set_bit(XGBE_DOWN, &pdata->dev_state); DBGPR("mtu %d\n", ifp->if_mtu); scctx->isc_max_frame_size = ifp->if_mtu + 18; scctx->isc_min_frame_size = XGMAC_MIN_PACKET; axgbe_setup_sysctl(pdata); axgbe_sysctl_init(pdata); return (0); } /* axgbe_if_attach_post */ static void xgbe_free_intr(struct xgbe_prv_data *pdata, struct resource *res, void *tag, int rid) { if (tag) bus_teardown_intr(pdata->dev, res, tag); if (res) bus_release_resource(pdata->dev, SYS_RES_IRQ, rid, res); } static void axgbe_interrupts_free(if_ctx_t ctx) { struct axgbe_if_softc *sc = iflib_get_softc(ctx); struct xgbe_prv_data *pdata = &sc->pdata; if_softc_ctx_t scctx = sc->scctx; struct xgbe_channel *channel; struct if_irq irq; int i; axgbe_printf(2, "%s: mode %d\n", __func__, scctx->isc_intr); /* Free dev_irq */ iflib_irq_free(ctx, &pdata->dev_irq); /* Free ecc_irq */ xgbe_free_intr(pdata, pdata->ecc_irq_res, pdata->ecc_irq_tag, pdata->ecc_rid); /* Free i2c_irq */ xgbe_free_intr(pdata, pdata->i2c_irq_res, pdata->i2c_irq_tag, pdata->i2c_rid); /* Free an_irq */ xgbe_free_intr(pdata, pdata->an_irq_res, pdata->an_irq_tag, pdata->an_rid); for (i = 0; i < scctx->isc_nrxqsets; i++) { channel = pdata->channel[i]; axgbe_printf(2, "%s: rid %d\n", __func__, channel->dma_irq_rid); irq.ii_res = channel->dma_irq_res; irq.ii_tag = channel->dma_irq_tag; iflib_irq_free(ctx, &irq); } } static int axgbe_if_detach(if_ctx_t ctx) { struct axgbe_if_softc *sc = iflib_get_softc(ctx); struct xgbe_prv_data *pdata = &sc->pdata; struct xgbe_phy_if *phy_if = &pdata->phy_if; struct resource *mac_res[2]; mac_res[0] = pdata->xgmac_res; mac_res[1] = pdata->xpcs_res; phy_if->phy_exit(pdata); /* Free Interrupts */ axgbe_interrupts_free(ctx); /* Free workqueues */ taskqueue_free(pdata->dev_workqueue); /* Release bus resources */ bus_release_resources(iflib_get_dev(ctx), axgbe_pci_mac_spec, mac_res); /* Free VLAN bitmap */ free(pdata->active_vlans, M_AXGBE); axgbe_sysctl_exit(pdata); return (0); } /* axgbe_if_detach */ static void axgbe_pci_init(struct xgbe_prv_data *pdata) { struct xgbe_phy_if *phy_if = &pdata->phy_if; struct xgbe_hw_if *hw_if = &pdata->hw_if; int ret = 0; hw_if->init(pdata); ret = phy_if->phy_start(pdata); if (ret) { axgbe_error("%s: phy start %d\n", __func__, ret); ret = hw_if->exit(pdata); if (ret) axgbe_error("%s: exit error %d\n", __func__, ret); return; } hw_if->enable_tx(pdata); hw_if->enable_rx(pdata); xgbe_start_timers(pdata); clear_bit(XGBE_DOWN, &pdata->dev_state); xgbe_dump_phy_registers(pdata); xgbe_dump_prop_registers(pdata); xgbe_dump_dma_registers(pdata, -1); xgbe_dump_mtl_registers(pdata); xgbe_dump_mac_registers(pdata); xgbe_dump_rmon_counters(pdata); } static void axgbe_if_init(if_ctx_t ctx) { struct axgbe_if_softc *sc = iflib_get_softc(ctx); struct xgbe_prv_data *pdata = &sc->pdata; axgbe_pci_init(pdata); } static void axgbe_pci_stop(if_ctx_t ctx) { struct axgbe_if_softc *sc = iflib_get_softc(ctx); struct xgbe_prv_data *pdata = &sc->pdata; struct xgbe_phy_if *phy_if = &pdata->phy_if; struct xgbe_hw_if *hw_if = &pdata->hw_if; int ret; if (__predict_false(test_bit(XGBE_DOWN, &pdata->dev_state))) { axgbe_printf(1, "%s: Stopping when XGBE_DOWN\n", __func__); return; } xgbe_stop_timers(pdata); taskqueue_drain_all(pdata->dev_workqueue); hw_if->disable_tx(pdata); hw_if->disable_rx(pdata); phy_if->phy_stop(pdata); ret = hw_if->exit(pdata); if (ret) axgbe_error("%s: exit error %d\n", __func__, ret); set_bit(XGBE_DOWN, &pdata->dev_state); } static void axgbe_if_stop(if_ctx_t ctx) { axgbe_pci_stop(ctx); } static void axgbe_if_disable_intr(if_ctx_t ctx) { /* TODO - implement */ } static void axgbe_if_enable_intr(if_ctx_t ctx) { /* TODO - implement */ } static int axgbe_if_tx_queues_alloc(if_ctx_t ctx, caddr_t *va, uint64_t *pa, int ntxqs, int ntxqsets) { struct axgbe_if_softc *sc = iflib_get_softc(ctx); struct xgbe_prv_data *pdata = &sc->pdata; if_softc_ctx_t scctx = sc->scctx; struct xgbe_channel *channel; struct xgbe_ring *tx_ring; int i, j, k; MPASS(scctx->isc_ntxqsets > 0); MPASS(scctx->isc_ntxqsets == ntxqsets); MPASS(ntxqs == 1); axgbe_printf(1, "%s: txqsets %d/%d txqs %d\n", __func__, scctx->isc_ntxqsets, ntxqsets, ntxqs); for (i = 0 ; i < ntxqsets; i++) { channel = pdata->channel[i]; tx_ring = (struct xgbe_ring*)malloc(ntxqs * sizeof(struct xgbe_ring), M_AXGBE, M_NOWAIT | M_ZERO); if (tx_ring == NULL) { axgbe_error("Unable to allocate TX ring memory\n"); goto tx_ring_fail; } channel->tx_ring = tx_ring; for (j = 0; j < ntxqs; j++, tx_ring++) { tx_ring->rdata = (struct xgbe_ring_data*)malloc(scctx->isc_ntxd[j] * sizeof(struct xgbe_ring_data), M_AXGBE, M_NOWAIT); /* Get the virtual & physical address of hw queues */ tx_ring->rdesc = (struct xgbe_ring_desc *)va[i*ntxqs + j]; tx_ring->rdesc_paddr = pa[i*ntxqs + j]; tx_ring->rdesc_count = scctx->isc_ntxd[j]; spin_lock_init(&tx_ring->lock); } } axgbe_printf(1, "allocated for %d tx queues\n", scctx->isc_ntxqsets); return (0); tx_ring_fail: for (j = 0; j < i ; j++) { channel = pdata->channel[j]; tx_ring = channel->tx_ring; for (k = 0; k < ntxqs ; k++, tx_ring++) { if (tx_ring && tx_ring->rdata) free(tx_ring->rdata, M_AXGBE); } free(channel->tx_ring, M_AXGBE); channel->tx_ring = NULL; } return (ENOMEM); } /* axgbe_if_tx_queues_alloc */ static int axgbe_if_rx_queues_alloc(if_ctx_t ctx, caddr_t *va, uint64_t *pa, int nrxqs, int nrxqsets) { struct axgbe_if_softc *sc = iflib_get_softc(ctx); struct xgbe_prv_data *pdata = &sc->pdata; if_softc_ctx_t scctx = sc->scctx; struct xgbe_channel *channel; struct xgbe_ring *rx_ring; int i, j, k; MPASS(scctx->isc_nrxqsets > 0); MPASS(scctx->isc_nrxqsets == nrxqsets); MPASS(nrxqs == 2); axgbe_printf(1, "%s: rxqsets %d/%d rxqs %d\n", __func__, scctx->isc_nrxqsets, nrxqsets, nrxqs); for (i = 0 ; i < nrxqsets; i++) { channel = pdata->channel[i]; rx_ring = (struct xgbe_ring*)malloc(nrxqs * sizeof(struct xgbe_ring), M_AXGBE, M_NOWAIT | M_ZERO); if (rx_ring == NULL) { axgbe_error("Unable to allocate RX ring memory\n"); goto rx_ring_fail; } channel->rx_ring = rx_ring; for (j = 0; j < nrxqs; j++, rx_ring++) { rx_ring->rdata = (struct xgbe_ring_data*)malloc(scctx->isc_nrxd[j] * sizeof(struct xgbe_ring_data), M_AXGBE, M_NOWAIT); /* Get the virtual and physical address of the hw queues */ rx_ring->rdesc = (struct xgbe_ring_desc *)va[i*nrxqs + j]; rx_ring->rdesc_paddr = pa[i*nrxqs + j]; rx_ring->rdesc_count = scctx->isc_nrxd[j]; spin_lock_init(&rx_ring->lock); } } axgbe_printf(2, "allocated for %d rx queues\n", scctx->isc_nrxqsets); return (0); rx_ring_fail: for (j = 0 ; j < i ; j++) { channel = pdata->channel[j]; rx_ring = channel->rx_ring; for (k = 0; k < nrxqs ; k++, rx_ring++) { if (rx_ring && rx_ring->rdata) free(rx_ring->rdata, M_AXGBE); } free(channel->rx_ring, M_AXGBE); channel->rx_ring = NULL; } return (ENOMEM); } /* axgbe_if_rx_queues_alloc */ static void axgbe_if_queues_free(if_ctx_t ctx) { struct axgbe_if_softc *sc = iflib_get_softc(ctx); struct xgbe_prv_data *pdata = &sc->pdata; if_softc_ctx_t scctx = sc->scctx; if_shared_ctx_t sctx = sc->sctx; struct xgbe_channel *channel; struct xgbe_ring *tx_ring; struct xgbe_ring *rx_ring; int i, j; for (i = 0 ; i < scctx->isc_ntxqsets; i++) { channel = pdata->channel[i]; tx_ring = channel->tx_ring; for (j = 0; j < sctx->isc_ntxqs ; j++, tx_ring++) { if (tx_ring && tx_ring->rdata) free(tx_ring->rdata, M_AXGBE); } free(channel->tx_ring, M_AXGBE); channel->tx_ring = NULL; } for (i = 0 ; i < scctx->isc_nrxqsets; i++) { channel = pdata->channel[i]; rx_ring = channel->rx_ring; for (j = 0; j < sctx->isc_nrxqs ; j++, rx_ring++) { if (rx_ring && rx_ring->rdata) free(rx_ring->rdata, M_AXGBE); } free(channel->rx_ring, M_AXGBE); channel->rx_ring = NULL; } /* Free Channels */ for (i = 0; i < pdata->total_channel_count ; i++) { free(pdata->channel[i], M_AXGBE); pdata->channel[i] = NULL; } pdata->total_channel_count = 0; pdata->channel_count = 0; } /* axgbe_if_queues_free */ static void axgbe_if_vlan_register(if_ctx_t ctx, uint16_t vtag) { struct axgbe_if_softc *sc = iflib_get_softc(ctx); struct xgbe_prv_data *pdata = &sc->pdata; struct xgbe_hw_if *hw_if = &pdata->hw_if; if (!bit_test(pdata->active_vlans, vtag)) { axgbe_printf(0, "Registering VLAN %d\n", vtag); bit_set(pdata->active_vlans, vtag); hw_if->update_vlan_hash_table(pdata); pdata->num_active_vlans++; axgbe_printf(1, "Total active vlans: %d\n", pdata->num_active_vlans); } else axgbe_printf(0, "VLAN %d already registered\n", vtag); xgbe_dump_active_vlans(pdata); } static void axgbe_if_vlan_unregister(if_ctx_t ctx, uint16_t vtag) { struct axgbe_if_softc *sc = iflib_get_softc(ctx); struct xgbe_prv_data *pdata = &sc->pdata; struct xgbe_hw_if *hw_if = &pdata->hw_if; if (pdata->num_active_vlans == 0) { axgbe_printf(1, "No active VLANs to unregister\n"); return; } if (bit_test(pdata->active_vlans, vtag)){ axgbe_printf(0, "Un-Registering VLAN %d\n", vtag); bit_clear(pdata->active_vlans, vtag); hw_if->update_vlan_hash_table(pdata); pdata->num_active_vlans--; axgbe_printf(1, "Total active vlans: %d\n", pdata->num_active_vlans); } else axgbe_printf(0, "VLAN %d already unregistered\n", vtag); xgbe_dump_active_vlans(pdata); } #if __FreeBSD_version >= 1300000 static bool axgbe_if_needs_restart(if_ctx_t ctx __unused, enum iflib_restart_event event) { switch (event) { case IFLIB_RESTART_VLAN_CONFIG: default: return (true); } } #endif static int axgbe_if_msix_intr_assign(if_ctx_t ctx, int msix) { struct axgbe_if_softc *sc = iflib_get_softc(ctx); struct xgbe_prv_data *pdata = &sc->pdata; if_softc_ctx_t scctx = sc->scctx; struct xgbe_channel *channel; struct if_irq irq; int i, error, rid = 0, flags; char buf[16]; MPASS(scctx->isc_intr != IFLIB_INTR_LEGACY); pdata->isr_as_tasklet = 1; if (scctx->isc_intr == IFLIB_INTR_MSI) { pdata->irq_count = 1; pdata->channel_irq_count = 1; return (0); } axgbe_printf(1, "%s: msix %d txqsets %d rxqsets %d\n", __func__, msix, scctx->isc_ntxqsets, scctx->isc_nrxqsets); flags = RF_ACTIVE; /* DEV INTR SETUP */ rid++; error = iflib_irq_alloc_generic(ctx, &pdata->dev_irq, rid, IFLIB_INTR_ADMIN, axgbe_dev_isr, sc, 0, "dev_irq"); if (error) { axgbe_error("Failed to register device interrupt rid %d name %s\n", rid, "dev_irq"); return (error); } /* ECC INTR SETUP */ rid++; pdata->ecc_rid = rid; pdata->ecc_irq_res = bus_alloc_resource_any(pdata->dev, SYS_RES_IRQ, &rid, flags); if (!pdata->ecc_irq_res) { axgbe_error("failed to allocate IRQ for rid %d, name %s.\n", rid, "ecc_irq"); return (ENOMEM); } error = bus_setup_intr(pdata->dev, pdata->ecc_irq_res, INTR_MPSAFE | INTR_TYPE_NET, NULL, axgbe_ecc_isr, sc, &pdata->ecc_irq_tag); if (error) { axgbe_error("failed to setup interrupt for rid %d, name %s: %d\n", rid, "ecc_irq", error); return (error); } /* I2C INTR SETUP */ rid++; pdata->i2c_rid = rid; pdata->i2c_irq_res = bus_alloc_resource_any(pdata->dev, SYS_RES_IRQ, &rid, flags); if (!pdata->i2c_irq_res) { axgbe_error("failed to allocate IRQ for rid %d, name %s.\n", rid, "i2c_irq"); return (ENOMEM); } error = bus_setup_intr(pdata->dev, pdata->i2c_irq_res, INTR_MPSAFE | INTR_TYPE_NET, NULL, axgbe_i2c_isr, sc, &pdata->i2c_irq_tag); if (error) { axgbe_error("failed to setup interrupt for rid %d, name %s: %d\n", rid, "i2c_irq", error); return (error); } /* AN INTR SETUP */ rid++; pdata->an_rid = rid; pdata->an_irq_res = bus_alloc_resource_any(pdata->dev, SYS_RES_IRQ, &rid, flags); if (!pdata->an_irq_res) { axgbe_error("failed to allocate IRQ for rid %d, name %s.\n", rid, "an_irq"); return (ENOMEM); } error = bus_setup_intr(pdata->dev, pdata->an_irq_res, INTR_MPSAFE | INTR_TYPE_NET, NULL, axgbe_an_isr, sc, &pdata->an_irq_tag); if (error) { axgbe_error("failed to setup interrupt for rid %d, name %s: %d\n", rid, "an_irq", error); return (error); } pdata->per_channel_irq = 1; pdata->channel_irq_mode = XGBE_IRQ_MODE_LEVEL; rid++; for (i = 0; i < scctx->isc_nrxqsets; i++, rid++) { channel = pdata->channel[i]; snprintf(buf, sizeof(buf), "rxq%d", i); - error = iflib_irq_alloc_generic(ctx, &irq, rid, IFLIB_INTR_RX, + error = iflib_irq_alloc_generic(ctx, &irq, rid, IFLIB_INTR_RXTX, axgbe_msix_que, channel, channel->queue_index, buf); if (error) { axgbe_error("Failed to allocated que int %d err: %d\n", i, error); return (error); } channel->dma_irq_rid = rid; channel->dma_irq_res = irq.ii_res; channel->dma_irq_tag = irq.ii_tag; axgbe_printf(1, "%s: channel count %d idx %d irq %d\n", __func__, scctx->isc_nrxqsets, i, rid); } pdata->irq_count = msix; pdata->channel_irq_count = scctx->isc_nrxqsets; for (i = 0; i < scctx->isc_ntxqsets; i++) { channel = pdata->channel[i]; snprintf(buf, sizeof(buf), "txq%d", i); irq.ii_res = channel->dma_irq_res; iflib_softirq_alloc_generic(ctx, &irq, IFLIB_INTR_TX, channel, channel->queue_index, buf); } return (0); } /* axgbe_if_msix_intr_assign */ static int xgbe_enable_rx_tx_int(struct xgbe_prv_data *pdata, struct xgbe_channel *channel) { struct xgbe_hw_if *hw_if = &pdata->hw_if; enum xgbe_int int_id; if (channel->tx_ring && channel->rx_ring) int_id = XGMAC_INT_DMA_CH_SR_TI_RI; else if (channel->tx_ring) int_id = XGMAC_INT_DMA_CH_SR_TI; else if (channel->rx_ring) int_id = XGMAC_INT_DMA_CH_SR_RI; else return (-1); axgbe_printf(1, "%s channel: %d rx_tx interrupt enabled %d\n", __func__, channel->queue_index, int_id); return (hw_if->enable_int(channel, int_id)); } static void xgbe_disable_rx_tx_int(struct xgbe_prv_data *pdata, struct xgbe_channel *channel) { struct xgbe_hw_if *hw_if = &pdata->hw_if; enum xgbe_int int_id; if (channel->tx_ring && channel->rx_ring) int_id = XGMAC_INT_DMA_CH_SR_TI_RI; else if (channel->tx_ring) int_id = XGMAC_INT_DMA_CH_SR_TI; else if (channel->rx_ring) int_id = XGMAC_INT_DMA_CH_SR_RI; else return; axgbe_printf(1, "%s channel: %d rx_tx interrupt disabled %d\n", __func__, channel->queue_index, int_id); hw_if->disable_int(channel, int_id); } static void xgbe_disable_rx_tx_ints(struct xgbe_prv_data *pdata) { unsigned int i; for (i = 0; i < pdata->channel_count; i++) xgbe_disable_rx_tx_int(pdata, pdata->channel[i]); } static int axgbe_msix_que(void *arg) { struct xgbe_channel *channel = (struct xgbe_channel *)arg; struct xgbe_prv_data *pdata = channel->pdata; unsigned int dma_ch_isr, dma_status; axgbe_printf(1, "%s: Channel: %d SR 0x%04x DSR 0x%04x IER:0x%04x D_ISR:0x%04x M_ISR:0x%04x\n", __func__, channel->queue_index, XGMAC_DMA_IOREAD(channel, DMA_CH_SR), XGMAC_DMA_IOREAD(channel, DMA_CH_DSR), XGMAC_DMA_IOREAD(channel, DMA_CH_IER), XGMAC_IOREAD(pdata, DMA_ISR), XGMAC_IOREAD(pdata, MAC_ISR)); dma_ch_isr = XGMAC_DMA_IOREAD(channel, DMA_CH_SR); /* Disable Tx and Rx channel interrupts */ xgbe_disable_rx_tx_int(pdata, channel); /* Clear the interrupts */ dma_status = 0; XGMAC_SET_BITS(dma_status, DMA_CH_SR, TI, 1); XGMAC_SET_BITS(dma_status, DMA_CH_SR, RI, 1); XGMAC_DMA_IOWRITE(channel, DMA_CH_SR, dma_status); return (FILTER_SCHEDULE_THREAD); } static int axgbe_dev_isr(void *arg) { struct axgbe_if_softc *sc = (struct axgbe_if_softc *)arg; struct xgbe_prv_data *pdata = &sc->pdata; struct xgbe_channel *channel; struct xgbe_hw_if *hw_if = &pdata->hw_if; unsigned int i, dma_isr, dma_ch_isr; unsigned int mac_isr, mac_mdioisr; int ret = FILTER_HANDLED; dma_isr = XGMAC_IOREAD(pdata, DMA_ISR); axgbe_printf(2, "%s DMA ISR: 0x%x\n", __func__, dma_isr); if (!dma_isr) return (FILTER_HANDLED); for (i = 0; i < pdata->channel_count; i++) { if (!(dma_isr & (1 << i))) continue; channel = pdata->channel[i]; dma_ch_isr = XGMAC_DMA_IOREAD(channel, DMA_CH_SR); axgbe_printf(2, "%s: channel %d SR 0x%x DSR 0x%x\n", __func__, channel->queue_index, dma_ch_isr, XGMAC_DMA_IOREAD(channel, DMA_CH_DSR)); /* * The TI or RI interrupt bits may still be set even if using * per channel DMA interrupts. Check to be sure those are not * enabled before using the private data napi structure. */ if (!pdata->per_channel_irq && (XGMAC_GET_BITS(dma_ch_isr, DMA_CH_SR, TI) || XGMAC_GET_BITS(dma_ch_isr, DMA_CH_SR, RI))) { /* Disable Tx and Rx interrupts */ xgbe_disable_rx_tx_ints(pdata); } else { /* * Don't clear Rx/Tx status if doing per channel DMA * interrupts, these will be cleared by the ISR for * per channel DMA interrupts */ XGMAC_SET_BITS(dma_ch_isr, DMA_CH_SR, TI, 0); XGMAC_SET_BITS(dma_ch_isr, DMA_CH_SR, RI, 0); } if (XGMAC_GET_BITS(dma_ch_isr, DMA_CH_SR, RBU)) pdata->ext_stats.rx_buffer_unavailable++; /* Restart the device on a Fatal Bus Error */ if (XGMAC_GET_BITS(dma_ch_isr, DMA_CH_SR, FBE)) axgbe_error("%s: Fatal bus error reported 0x%x\n", __func__, dma_ch_isr); /* Clear all interrupt signals */ XGMAC_DMA_IOWRITE(channel, DMA_CH_SR, dma_ch_isr); ret = FILTER_SCHEDULE_THREAD; } if (XGMAC_GET_BITS(dma_isr, DMA_ISR, MACIS)) { mac_isr = XGMAC_IOREAD(pdata, MAC_ISR); axgbe_printf(2, "%s MAC ISR: 0x%x\n", __func__, mac_isr); if (XGMAC_GET_BITS(mac_isr, MAC_ISR, MMCTXIS)) hw_if->tx_mmc_int(pdata); if (XGMAC_GET_BITS(mac_isr, MAC_ISR, MMCRXIS)) hw_if->rx_mmc_int(pdata); if (XGMAC_GET_BITS(mac_isr, MAC_ISR, SMI)) { mac_mdioisr = XGMAC_IOREAD(pdata, MAC_MDIOISR); if (XGMAC_GET_BITS(mac_mdioisr, MAC_MDIOISR, SNGLCOMPINT)) wakeup_one(pdata); } } return (ret); } /* axgbe_dev_isr */ static void axgbe_i2c_isr(void *arg) { struct axgbe_if_softc *sc = (struct axgbe_if_softc *)arg; sc->pdata.i2c_if.i2c_isr(&sc->pdata); } static void axgbe_ecc_isr(void *arg) { /* TODO - implement */ } static void axgbe_an_isr(void *arg) { struct axgbe_if_softc *sc = (struct axgbe_if_softc *)arg; sc->pdata.phy_if.an_isr(&sc->pdata); } static int axgbe_if_tx_queue_intr_enable(if_ctx_t ctx, uint16_t qid) { struct axgbe_if_softc *sc = iflib_get_softc(ctx); struct xgbe_prv_data *pdata = &sc->pdata; int ret; if (qid < pdata->tx_q_count) { ret = xgbe_enable_rx_tx_int(pdata, pdata->channel[qid]); if (ret) { axgbe_error("Enable TX INT failed\n"); return (ret); } } else axgbe_error("Queue ID exceed channel count\n"); return (0); } static int axgbe_if_rx_queue_intr_enable(if_ctx_t ctx, uint16_t qid) { struct axgbe_if_softc *sc = iflib_get_softc(ctx); struct xgbe_prv_data *pdata = &sc->pdata; int ret; if (qid < pdata->rx_q_count) { ret = xgbe_enable_rx_tx_int(pdata, pdata->channel[qid]); if (ret) { axgbe_error("Enable RX INT failed\n"); return (ret); } } else axgbe_error("Queue ID exceed channel count\n"); return (0); } static void axgbe_if_update_admin_status(if_ctx_t ctx) { struct axgbe_if_softc *sc = iflib_get_softc(ctx); struct xgbe_prv_data *pdata = &sc->pdata; axgbe_printf(1, "%s: phy_link %d status %d speed %d\n", __func__, pdata->phy_link, sc->link_status, pdata->phy.speed); if (pdata->phy_link < 0) return; if (pdata->phy_link) { if (sc->link_status == LINK_STATE_DOWN) { sc->link_status = LINK_STATE_UP; if (pdata->phy.speed & SPEED_10000) iflib_link_state_change(ctx, LINK_STATE_UP, IF_Gbps(10)); else if (pdata->phy.speed & SPEED_2500) iflib_link_state_change(ctx, LINK_STATE_UP, IF_Gbps(2.5)); else if (pdata->phy.speed & SPEED_1000) iflib_link_state_change(ctx, LINK_STATE_UP, IF_Gbps(1)); else if (pdata->phy.speed & SPEED_100) iflib_link_state_change(ctx, LINK_STATE_UP, IF_Mbps(100)); else if (pdata->phy.speed & SPEED_10) iflib_link_state_change(ctx, LINK_STATE_UP, IF_Mbps(10)); } } else { if (sc->link_status == LINK_STATE_UP) { sc->link_status = LINK_STATE_DOWN; iflib_link_state_change(ctx, LINK_STATE_DOWN, 0); } } } static int axgbe_if_media_change(if_ctx_t ctx) { struct axgbe_if_softc *sc = iflib_get_softc(ctx); struct ifmedia *ifm = iflib_get_media(ctx); sx_xlock(&sc->pdata.an_mutex); if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER) return (EINVAL); switch (IFM_SUBTYPE(ifm->ifm_media)) { case IFM_10G_KR: sc->pdata.phy.speed = SPEED_10000; sc->pdata.phy.autoneg = AUTONEG_DISABLE; break; case IFM_2500_KX: sc->pdata.phy.speed = SPEED_2500; sc->pdata.phy.autoneg = AUTONEG_DISABLE; break; case IFM_1000_KX: sc->pdata.phy.speed = SPEED_1000; sc->pdata.phy.autoneg = AUTONEG_DISABLE; break; case IFM_100_TX: sc->pdata.phy.speed = SPEED_100; sc->pdata.phy.autoneg = AUTONEG_DISABLE; break; case IFM_AUTO: sc->pdata.phy.autoneg = AUTONEG_ENABLE; break; } sx_xunlock(&sc->pdata.an_mutex); return (-sc->pdata.phy_if.phy_config_aneg(&sc->pdata)); } static int axgbe_if_promisc_set(if_ctx_t ctx, int flags) { struct axgbe_if_softc *sc = iflib_get_softc(ctx); if (XGMAC_IOREAD_BITS(&sc->pdata, MAC_PFR, PR) == 1) return (0); XGMAC_IOWRITE_BITS(&sc->pdata, MAC_PFR, PR, 1); XGMAC_IOWRITE_BITS(&sc->pdata, MAC_PFR, VTFE, 0); return (0); } static uint64_t axgbe_if_get_counter(if_ctx_t ctx, ift_counter cnt) { struct axgbe_if_softc *sc = iflib_get_softc(ctx); struct ifnet *ifp = iflib_get_ifp(ctx); struct xgbe_prv_data *pdata = &sc->pdata; struct xgbe_mmc_stats *pstats = &pdata->mmc_stats; pdata->hw_if.read_mmc_stats(pdata); switch(cnt) { case IFCOUNTER_IPACKETS: return (pstats->rxframecount_gb); case IFCOUNTER_IERRORS: return (pstats->rxframecount_gb - pstats->rxbroadcastframes_g - pstats->rxmulticastframes_g - pstats->rxunicastframes_g); case IFCOUNTER_OPACKETS: return (pstats->txframecount_gb); case IFCOUNTER_OERRORS: return (pstats->txframecount_gb - pstats->txframecount_g); case IFCOUNTER_IBYTES: return (pstats->rxoctetcount_gb); case IFCOUNTER_OBYTES: return (pstats->txoctetcount_gb); default: return (if_get_counter_default(ifp, cnt)); } } static int axgbe_if_mtu_set(if_ctx_t ctx, uint32_t mtu) { struct axgbe_if_softc *sc = iflib_get_softc(ctx); struct xgbe_prv_data *pdata = &sc->pdata; int ret; if (mtu > XGMAC_JUMBO_PACKET_MTU) return (EINVAL); ret = xgbe_calc_rx_buf_size(pdata->netdev, mtu); pdata->rx_buf_size = ret; axgbe_printf(1, "%s: rx_buf_size %d\n", __func__, ret); sc->scctx->isc_max_frame_size = mtu + ETHER_HDR_LEN + ETHER_CRC_LEN; return (0); } static void axgbe_if_media_status(if_ctx_t ctx, struct ifmediareq * ifmr) { struct axgbe_if_softc *sc = iflib_get_softc(ctx); struct xgbe_prv_data *pdata = &sc->pdata; ifmr->ifm_status = IFM_AVALID; if (!sc->pdata.phy.link) return; ifmr->ifm_active = IFM_ETHER; ifmr->ifm_status |= IFM_ACTIVE; axgbe_printf(1, "Speed 0x%x Mode %d\n", sc->pdata.phy.speed, pdata->phy_if.phy_impl.cur_mode(pdata)); pdata->phy_if.phy_impl.get_type(pdata, ifmr); ifmr->ifm_active |= IFM_FDX; ifmr->ifm_active |= IFM_ETH_TXPAUSE; ifmr->ifm_active |= IFM_ETH_RXPAUSE; } diff --git a/sys/dev/bnxt/if_bnxt.c b/sys/dev/bnxt/if_bnxt.c index 724fd0469ef5..7811f4fdebf0 100644 --- a/sys/dev/bnxt/if_bnxt.c +++ b/sys/dev/bnxt/if_bnxt.c @@ -1,2507 +1,2507 @@ /*- * Broadcom NetXtreme-C/E network driver. * * Copyright (c) 2016 Broadcom, All Rights Reserved. * The term Broadcom refers to Broadcom Limited and/or its subsidiaries * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS' * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "opt_inet.h" #include "opt_inet6.h" #include "opt_rss.h" #include "ifdi_if.h" #include "bnxt.h" #include "bnxt_hwrm.h" #include "bnxt_ioctl.h" #include "bnxt_sysctl.h" #include "hsi_struct_def.h" /* * PCI Device ID Table */ static pci_vendor_info_t bnxt_vendor_info_array[] = { PVID(BROADCOM_VENDOR_ID, BCM57301, "Broadcom BCM57301 NetXtreme-C 10Gb Ethernet Controller"), PVID(BROADCOM_VENDOR_ID, BCM57302, "Broadcom BCM57302 NetXtreme-C 10Gb/25Gb Ethernet Controller"), PVID(BROADCOM_VENDOR_ID, BCM57304, "Broadcom BCM57304 NetXtreme-C 10Gb/25Gb/40Gb/50Gb Ethernet Controller"), PVID(BROADCOM_VENDOR_ID, BCM57311, "Broadcom BCM57311 NetXtreme-C 10Gb Ethernet"), PVID(BROADCOM_VENDOR_ID, BCM57312, "Broadcom BCM57312 NetXtreme-C 10Gb/25Gb Ethernet"), PVID(BROADCOM_VENDOR_ID, BCM57314, "Broadcom BCM57314 NetXtreme-C 10Gb/25Gb/40Gb/50Gb Ethernet"), PVID(BROADCOM_VENDOR_ID, BCM57402, "Broadcom BCM57402 NetXtreme-E 10Gb Ethernet Controller"), PVID(BROADCOM_VENDOR_ID, BCM57402_NPAR, "Broadcom BCM57402 NetXtreme-E Partition"), PVID(BROADCOM_VENDOR_ID, BCM57404, "Broadcom BCM57404 NetXtreme-E 10Gb/25Gb Ethernet Controller"), PVID(BROADCOM_VENDOR_ID, BCM57404_NPAR, "Broadcom BCM57404 NetXtreme-E Partition"), PVID(BROADCOM_VENDOR_ID, BCM57406, "Broadcom BCM57406 NetXtreme-E 10GBase-T Ethernet Controller"), PVID(BROADCOM_VENDOR_ID, BCM57406_NPAR, "Broadcom BCM57406 NetXtreme-E Partition"), PVID(BROADCOM_VENDOR_ID, BCM57407, "Broadcom BCM57407 NetXtreme-E 10GBase-T Ethernet Controller"), PVID(BROADCOM_VENDOR_ID, BCM57407_NPAR, "Broadcom BCM57407 NetXtreme-E Ethernet Partition"), PVID(BROADCOM_VENDOR_ID, BCM57407_SFP, "Broadcom BCM57407 NetXtreme-E 25Gb Ethernet Controller"), PVID(BROADCOM_VENDOR_ID, BCM57412, "Broadcom BCM57412 NetXtreme-E 10Gb Ethernet"), PVID(BROADCOM_VENDOR_ID, BCM57412_NPAR1, "Broadcom BCM57412 NetXtreme-E Ethernet Partition"), PVID(BROADCOM_VENDOR_ID, BCM57412_NPAR2, "Broadcom BCM57412 NetXtreme-E Ethernet Partition"), PVID(BROADCOM_VENDOR_ID, BCM57414, "Broadcom BCM57414 NetXtreme-E 10Gb/25Gb Ethernet"), PVID(BROADCOM_VENDOR_ID, BCM57414_NPAR1, "Broadcom BCM57414 NetXtreme-E Ethernet Partition"), PVID(BROADCOM_VENDOR_ID, BCM57414_NPAR2, "Broadcom BCM57414 NetXtreme-E Ethernet Partition"), PVID(BROADCOM_VENDOR_ID, BCM57416, "Broadcom BCM57416 NetXtreme-E 10GBase-T Ethernet"), PVID(BROADCOM_VENDOR_ID, BCM57416_NPAR1, "Broadcom BCM57416 NetXtreme-E Ethernet Partition"), PVID(BROADCOM_VENDOR_ID, BCM57416_NPAR2, "Broadcom BCM57416 NetXtreme-E Ethernet Partition"), PVID(BROADCOM_VENDOR_ID, BCM57416_SFP, "Broadcom BCM57416 NetXtreme-E 10Gb Ethernet"), PVID(BROADCOM_VENDOR_ID, BCM57417, "Broadcom BCM57417 NetXtreme-E 10GBase-T Ethernet"), PVID(BROADCOM_VENDOR_ID, BCM57417_NPAR1, "Broadcom BCM57417 NetXtreme-E Ethernet Partition"), PVID(BROADCOM_VENDOR_ID, BCM57417_NPAR2, "Broadcom BCM57417 NetXtreme-E Ethernet Partition"), PVID(BROADCOM_VENDOR_ID, BCM57417_SFP, "Broadcom BCM57417 NetXtreme-E 10Gb/25Gb Ethernet"), PVID(BROADCOM_VENDOR_ID, BCM57454, "Broadcom BCM57454 NetXtreme-E 10Gb/25Gb/40Gb/50Gb/100Gb Ethernet"), PVID(BROADCOM_VENDOR_ID, BCM58700, "Broadcom BCM58700 Nitro 1Gb/2.5Gb/10Gb Ethernet"), PVID(BROADCOM_VENDOR_ID, NETXTREME_C_VF1, "Broadcom NetXtreme-C Ethernet Virtual Function"), PVID(BROADCOM_VENDOR_ID, NETXTREME_C_VF2, "Broadcom NetXtreme-C Ethernet Virtual Function"), PVID(BROADCOM_VENDOR_ID, NETXTREME_C_VF3, "Broadcom NetXtreme-C Ethernet Virtual Function"), PVID(BROADCOM_VENDOR_ID, NETXTREME_E_VF1, "Broadcom NetXtreme-E Ethernet Virtual Function"), PVID(BROADCOM_VENDOR_ID, NETXTREME_E_VF2, "Broadcom NetXtreme-E Ethernet Virtual Function"), PVID(BROADCOM_VENDOR_ID, NETXTREME_E_VF3, "Broadcom NetXtreme-E Ethernet Virtual Function"), /* required last entry */ PVID_END }; /* * Function prototypes */ static void *bnxt_register(device_t dev); /* Soft queue setup and teardown */ static int bnxt_tx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs, int ntxqs, int ntxqsets); static int bnxt_rx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs, int nrxqs, int nrxqsets); static void bnxt_queues_free(if_ctx_t ctx); /* Device setup and teardown */ static int bnxt_attach_pre(if_ctx_t ctx); static int bnxt_attach_post(if_ctx_t ctx); static int bnxt_detach(if_ctx_t ctx); /* Device configuration */ static void bnxt_init(if_ctx_t ctx); static void bnxt_stop(if_ctx_t ctx); static void bnxt_multi_set(if_ctx_t ctx); static int bnxt_mtu_set(if_ctx_t ctx, uint32_t mtu); static void bnxt_media_status(if_ctx_t ctx, struct ifmediareq * ifmr); static int bnxt_media_change(if_ctx_t ctx); static int bnxt_promisc_set(if_ctx_t ctx, int flags); static uint64_t bnxt_get_counter(if_ctx_t, ift_counter); static void bnxt_update_admin_status(if_ctx_t ctx); static void bnxt_if_timer(if_ctx_t ctx, uint16_t qid); /* Interrupt enable / disable */ static void bnxt_intr_enable(if_ctx_t ctx); static int bnxt_rx_queue_intr_enable(if_ctx_t ctx, uint16_t qid); static int bnxt_tx_queue_intr_enable(if_ctx_t ctx, uint16_t qid); static void bnxt_disable_intr(if_ctx_t ctx); static int bnxt_msix_intr_assign(if_ctx_t ctx, int msix); /* vlan support */ static void bnxt_vlan_register(if_ctx_t ctx, uint16_t vtag); static void bnxt_vlan_unregister(if_ctx_t ctx, uint16_t vtag); /* ioctl */ static int bnxt_priv_ioctl(if_ctx_t ctx, u_long command, caddr_t data); static int bnxt_shutdown(if_ctx_t ctx); static int bnxt_suspend(if_ctx_t ctx); static int bnxt_resume(if_ctx_t ctx); /* Internal support functions */ static int bnxt_probe_phy(struct bnxt_softc *softc); static void bnxt_add_media_types(struct bnxt_softc *softc); static int bnxt_pci_mapping(struct bnxt_softc *softc); static void bnxt_pci_mapping_free(struct bnxt_softc *softc); static int bnxt_update_link(struct bnxt_softc *softc, bool chng_link_state); static int bnxt_handle_def_cp(void *arg); static int bnxt_handle_rx_cp(void *arg); static void bnxt_clear_ids(struct bnxt_softc *softc); static void inline bnxt_do_enable_intr(struct bnxt_cp_ring *cpr); static void inline bnxt_do_disable_intr(struct bnxt_cp_ring *cpr); static void bnxt_mark_cpr_invalid(struct bnxt_cp_ring *cpr); static void bnxt_def_cp_task(void *context); static void bnxt_handle_async_event(struct bnxt_softc *softc, struct cmpl_base *cmpl); static uint8_t get_phy_type(struct bnxt_softc *softc); static uint64_t bnxt_get_baudrate(struct bnxt_link_info *link); static void bnxt_get_wol_settings(struct bnxt_softc *softc); static int bnxt_wol_config(if_ctx_t ctx); /* * Device Interface Declaration */ static device_method_t bnxt_methods[] = { /* Device interface */ DEVMETHOD(device_register, bnxt_register), DEVMETHOD(device_probe, iflib_device_probe), DEVMETHOD(device_attach, iflib_device_attach), DEVMETHOD(device_detach, iflib_device_detach), DEVMETHOD(device_shutdown, iflib_device_shutdown), DEVMETHOD(device_suspend, iflib_device_suspend), DEVMETHOD(device_resume, iflib_device_resume), DEVMETHOD_END }; static driver_t bnxt_driver = { "bnxt", bnxt_methods, sizeof(struct bnxt_softc), }; devclass_t bnxt_devclass; DRIVER_MODULE(bnxt, pci, bnxt_driver, bnxt_devclass, 0, 0); MODULE_DEPEND(bnxt, pci, 1, 1, 1); MODULE_DEPEND(bnxt, ether, 1, 1, 1); MODULE_DEPEND(bnxt, iflib, 1, 1, 1); IFLIB_PNP_INFO(pci, bnxt, bnxt_vendor_info_array); static device_method_t bnxt_iflib_methods[] = { DEVMETHOD(ifdi_tx_queues_alloc, bnxt_tx_queues_alloc), DEVMETHOD(ifdi_rx_queues_alloc, bnxt_rx_queues_alloc), DEVMETHOD(ifdi_queues_free, bnxt_queues_free), DEVMETHOD(ifdi_attach_pre, bnxt_attach_pre), DEVMETHOD(ifdi_attach_post, bnxt_attach_post), DEVMETHOD(ifdi_detach, bnxt_detach), DEVMETHOD(ifdi_init, bnxt_init), DEVMETHOD(ifdi_stop, bnxt_stop), DEVMETHOD(ifdi_multi_set, bnxt_multi_set), DEVMETHOD(ifdi_mtu_set, bnxt_mtu_set), DEVMETHOD(ifdi_media_status, bnxt_media_status), DEVMETHOD(ifdi_media_change, bnxt_media_change), DEVMETHOD(ifdi_promisc_set, bnxt_promisc_set), DEVMETHOD(ifdi_get_counter, bnxt_get_counter), DEVMETHOD(ifdi_update_admin_status, bnxt_update_admin_status), DEVMETHOD(ifdi_timer, bnxt_if_timer), DEVMETHOD(ifdi_intr_enable, bnxt_intr_enable), DEVMETHOD(ifdi_tx_queue_intr_enable, bnxt_tx_queue_intr_enable), DEVMETHOD(ifdi_rx_queue_intr_enable, bnxt_rx_queue_intr_enable), DEVMETHOD(ifdi_intr_disable, bnxt_disable_intr), DEVMETHOD(ifdi_msix_intr_assign, bnxt_msix_intr_assign), DEVMETHOD(ifdi_vlan_register, bnxt_vlan_register), DEVMETHOD(ifdi_vlan_unregister, bnxt_vlan_unregister), DEVMETHOD(ifdi_priv_ioctl, bnxt_priv_ioctl), DEVMETHOD(ifdi_suspend, bnxt_suspend), DEVMETHOD(ifdi_shutdown, bnxt_shutdown), DEVMETHOD(ifdi_resume, bnxt_resume), DEVMETHOD_END }; static driver_t bnxt_iflib_driver = { "bnxt", bnxt_iflib_methods, sizeof(struct bnxt_softc) }; /* * iflib shared context */ #define BNXT_DRIVER_VERSION "1.0.0.2" char bnxt_driver_version[] = BNXT_DRIVER_VERSION; extern struct if_txrx bnxt_txrx; static struct if_shared_ctx bnxt_sctx_init = { .isc_magic = IFLIB_MAGIC, .isc_driver = &bnxt_iflib_driver, .isc_nfl = 2, // Number of Free Lists .isc_flags = IFLIB_HAS_RXCQ | IFLIB_HAS_TXCQ | IFLIB_NEED_ETHER_PAD, .isc_q_align = PAGE_SIZE, .isc_tx_maxsize = BNXT_TSO_SIZE + sizeof(struct ether_vlan_header), .isc_tx_maxsegsize = BNXT_TSO_SIZE + sizeof(struct ether_vlan_header), .isc_tso_maxsize = BNXT_TSO_SIZE + sizeof(struct ether_vlan_header), .isc_tso_maxsegsize = BNXT_TSO_SIZE + sizeof(struct ether_vlan_header), .isc_rx_maxsize = BNXT_TSO_SIZE + sizeof(struct ether_vlan_header), .isc_rx_maxsegsize = BNXT_TSO_SIZE + sizeof(struct ether_vlan_header), // Only use a single segment to avoid page size constraints .isc_rx_nsegments = 1, .isc_ntxqs = 2, .isc_nrxqs = 3, .isc_nrxd_min = {16, 16, 16}, .isc_nrxd_default = {PAGE_SIZE / sizeof(struct cmpl_base) * 8, PAGE_SIZE / sizeof(struct rx_prod_pkt_bd), PAGE_SIZE / sizeof(struct rx_prod_pkt_bd)}, .isc_nrxd_max = {BNXT_MAX_RXD, BNXT_MAX_RXD, BNXT_MAX_RXD}, .isc_ntxd_min = {16, 16, 16}, .isc_ntxd_default = {PAGE_SIZE / sizeof(struct cmpl_base) * 2, PAGE_SIZE / sizeof(struct tx_bd_short)}, .isc_ntxd_max = {BNXT_MAX_TXD, BNXT_MAX_TXD, BNXT_MAX_TXD}, .isc_admin_intrcnt = 1, .isc_vendor_info = bnxt_vendor_info_array, .isc_driver_version = bnxt_driver_version, }; if_shared_ctx_t bnxt_sctx = &bnxt_sctx_init; /* * Device Methods */ static void * bnxt_register(device_t dev) { return bnxt_sctx; } /* * Device Dependent Configuration Functions */ /* Soft queue setup and teardown */ static int bnxt_tx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs, int ntxqs, int ntxqsets) { struct bnxt_softc *softc; int i; int rc; softc = iflib_get_softc(ctx); softc->tx_cp_rings = malloc(sizeof(struct bnxt_cp_ring) * ntxqsets, M_DEVBUF, M_NOWAIT | M_ZERO); if (!softc->tx_cp_rings) { device_printf(iflib_get_dev(ctx), "unable to allocate TX completion rings\n"); rc = ENOMEM; goto cp_alloc_fail; } softc->tx_rings = malloc(sizeof(struct bnxt_ring) * ntxqsets, M_DEVBUF, M_NOWAIT | M_ZERO); if (!softc->tx_rings) { device_printf(iflib_get_dev(ctx), "unable to allocate TX rings\n"); rc = ENOMEM; goto ring_alloc_fail; } rc = iflib_dma_alloc(ctx, sizeof(struct ctx_hw_stats) * ntxqsets, &softc->tx_stats, 0); if (rc) goto dma_alloc_fail; bus_dmamap_sync(softc->tx_stats.idi_tag, softc->tx_stats.idi_map, BUS_DMASYNC_PREREAD); for (i = 0; i < ntxqsets; i++) { /* Set up the completion ring */ softc->tx_cp_rings[i].stats_ctx_id = HWRM_NA_SIGNATURE; softc->tx_cp_rings[i].ring.phys_id = (uint16_t)HWRM_NA_SIGNATURE; softc->tx_cp_rings[i].ring.softc = softc; softc->tx_cp_rings[i].ring.id = (softc->scctx->isc_nrxqsets * 2) + 1 + i; softc->tx_cp_rings[i].ring.doorbell = softc->tx_cp_rings[i].ring.id * 0x80; softc->tx_cp_rings[i].ring.ring_size = softc->scctx->isc_ntxd[0]; softc->tx_cp_rings[i].ring.vaddr = vaddrs[i * ntxqs]; softc->tx_cp_rings[i].ring.paddr = paddrs[i * ntxqs]; /* Set up the TX ring */ softc->tx_rings[i].phys_id = (uint16_t)HWRM_NA_SIGNATURE; softc->tx_rings[i].softc = softc; softc->tx_rings[i].id = (softc->scctx->isc_nrxqsets * 2) + 1 + i; softc->tx_rings[i].doorbell = softc->tx_rings[i].id * 0x80; softc->tx_rings[i].ring_size = softc->scctx->isc_ntxd[1]; softc->tx_rings[i].vaddr = vaddrs[i * ntxqs + 1]; softc->tx_rings[i].paddr = paddrs[i * ntxqs + 1]; bnxt_create_tx_sysctls(softc, i); } softc->ntxqsets = ntxqsets; return rc; dma_alloc_fail: free(softc->tx_rings, M_DEVBUF); ring_alloc_fail: free(softc->tx_cp_rings, M_DEVBUF); cp_alloc_fail: return rc; } static void bnxt_queues_free(if_ctx_t ctx) { struct bnxt_softc *softc = iflib_get_softc(ctx); // Free TX queues iflib_dma_free(&softc->tx_stats); free(softc->tx_rings, M_DEVBUF); softc->tx_rings = NULL; free(softc->tx_cp_rings, M_DEVBUF); softc->tx_cp_rings = NULL; softc->ntxqsets = 0; // Free RX queues iflib_dma_free(&softc->rx_stats); iflib_dma_free(&softc->hw_tx_port_stats); iflib_dma_free(&softc->hw_rx_port_stats); free(softc->grp_info, M_DEVBUF); free(softc->ag_rings, M_DEVBUF); free(softc->rx_rings, M_DEVBUF); free(softc->rx_cp_rings, M_DEVBUF); } static int bnxt_rx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs, int nrxqs, int nrxqsets) { struct bnxt_softc *softc; int i; int rc; softc = iflib_get_softc(ctx); softc->rx_cp_rings = malloc(sizeof(struct bnxt_cp_ring) * nrxqsets, M_DEVBUF, M_NOWAIT | M_ZERO); if (!softc->rx_cp_rings) { device_printf(iflib_get_dev(ctx), "unable to allocate RX completion rings\n"); rc = ENOMEM; goto cp_alloc_fail; } softc->rx_rings = malloc(sizeof(struct bnxt_ring) * nrxqsets, M_DEVBUF, M_NOWAIT | M_ZERO); if (!softc->rx_rings) { device_printf(iflib_get_dev(ctx), "unable to allocate RX rings\n"); rc = ENOMEM; goto ring_alloc_fail; } softc->ag_rings = malloc(sizeof(struct bnxt_ring) * nrxqsets, M_DEVBUF, M_NOWAIT | M_ZERO); if (!softc->ag_rings) { device_printf(iflib_get_dev(ctx), "unable to allocate aggregation rings\n"); rc = ENOMEM; goto ag_alloc_fail; } softc->grp_info = malloc(sizeof(struct bnxt_grp_info) * nrxqsets, M_DEVBUF, M_NOWAIT | M_ZERO); if (!softc->grp_info) { device_printf(iflib_get_dev(ctx), "unable to allocate ring groups\n"); rc = ENOMEM; goto grp_alloc_fail; } rc = iflib_dma_alloc(ctx, sizeof(struct ctx_hw_stats) * nrxqsets, &softc->rx_stats, 0); if (rc) goto hw_stats_alloc_fail; bus_dmamap_sync(softc->rx_stats.idi_tag, softc->rx_stats.idi_map, BUS_DMASYNC_PREREAD); /* * Additional 512 bytes for future expansion. * To prevent corruption when loaded with newer firmwares with added counters. * This can be deleted when there will be no further additions of counters. */ #define BNXT_PORT_STAT_PADDING 512 rc = iflib_dma_alloc(ctx, sizeof(struct rx_port_stats) + BNXT_PORT_STAT_PADDING, &softc->hw_rx_port_stats, 0); if (rc) goto hw_port_rx_stats_alloc_fail; bus_dmamap_sync(softc->hw_rx_port_stats.idi_tag, softc->hw_rx_port_stats.idi_map, BUS_DMASYNC_PREREAD); rc = iflib_dma_alloc(ctx, sizeof(struct tx_port_stats) + BNXT_PORT_STAT_PADDING, &softc->hw_tx_port_stats, 0); if (rc) goto hw_port_tx_stats_alloc_fail; bus_dmamap_sync(softc->hw_tx_port_stats.idi_tag, softc->hw_tx_port_stats.idi_map, BUS_DMASYNC_PREREAD); softc->rx_port_stats = (void *) softc->hw_rx_port_stats.idi_vaddr; softc->tx_port_stats = (void *) softc->hw_tx_port_stats.idi_vaddr; for (i = 0; i < nrxqsets; i++) { /* Allocation the completion ring */ softc->rx_cp_rings[i].stats_ctx_id = HWRM_NA_SIGNATURE; softc->rx_cp_rings[i].ring.phys_id = (uint16_t)HWRM_NA_SIGNATURE; softc->rx_cp_rings[i].ring.softc = softc; softc->rx_cp_rings[i].ring.id = i + 1; softc->rx_cp_rings[i].ring.doorbell = softc->rx_cp_rings[i].ring.id * 0x80; /* * If this ring overflows, RX stops working. */ softc->rx_cp_rings[i].ring.ring_size = softc->scctx->isc_nrxd[0]; softc->rx_cp_rings[i].ring.vaddr = vaddrs[i * nrxqs]; softc->rx_cp_rings[i].ring.paddr = paddrs[i * nrxqs]; /* Allocate the RX ring */ softc->rx_rings[i].phys_id = (uint16_t)HWRM_NA_SIGNATURE; softc->rx_rings[i].softc = softc; softc->rx_rings[i].id = i + 1; softc->rx_rings[i].doorbell = softc->rx_rings[i].id * 0x80; softc->rx_rings[i].ring_size = softc->scctx->isc_nrxd[1]; softc->rx_rings[i].vaddr = vaddrs[i * nrxqs + 1]; softc->rx_rings[i].paddr = paddrs[i * nrxqs + 1]; /* Allocate the TPA start buffer */ softc->rx_rings[i].tpa_start = malloc(sizeof(struct bnxt_full_tpa_start) * (RX_TPA_START_CMPL_AGG_ID_MASK >> RX_TPA_START_CMPL_AGG_ID_SFT), M_DEVBUF, M_NOWAIT | M_ZERO); if (softc->rx_rings[i].tpa_start == NULL) { rc = -ENOMEM; device_printf(softc->dev, "Unable to allocate space for TPA\n"); goto tpa_alloc_fail; } /* Allocate the AG ring */ softc->ag_rings[i].phys_id = (uint16_t)HWRM_NA_SIGNATURE; softc->ag_rings[i].softc = softc; softc->ag_rings[i].id = nrxqsets + i + 1; softc->ag_rings[i].doorbell = softc->ag_rings[i].id * 0x80; softc->ag_rings[i].ring_size = softc->scctx->isc_nrxd[2]; softc->ag_rings[i].vaddr = vaddrs[i * nrxqs + 2]; softc->ag_rings[i].paddr = paddrs[i * nrxqs + 2]; /* Allocate the ring group */ softc->grp_info[i].grp_id = (uint16_t)HWRM_NA_SIGNATURE; softc->grp_info[i].stats_ctx = softc->rx_cp_rings[i].stats_ctx_id; softc->grp_info[i].rx_ring_id = softc->rx_rings[i].phys_id; softc->grp_info[i].ag_ring_id = softc->ag_rings[i].phys_id; softc->grp_info[i].cp_ring_id = softc->rx_cp_rings[i].ring.phys_id; bnxt_create_rx_sysctls(softc, i); } /* * When SR-IOV is enabled, avoid each VF sending PORT_QSTATS * HWRM every sec with which firmware timeouts can happen */ if (BNXT_PF(softc)) bnxt_create_port_stats_sysctls(softc); /* And finally, the VNIC */ softc->vnic_info.id = (uint16_t)HWRM_NA_SIGNATURE; softc->vnic_info.flow_id = (uint16_t)HWRM_NA_SIGNATURE; softc->vnic_info.filter_id = -1; softc->vnic_info.def_ring_grp = (uint16_t)HWRM_NA_SIGNATURE; softc->vnic_info.cos_rule = (uint16_t)HWRM_NA_SIGNATURE; softc->vnic_info.lb_rule = (uint16_t)HWRM_NA_SIGNATURE; softc->vnic_info.rx_mask = HWRM_CFA_L2_SET_RX_MASK_INPUT_MASK_BCAST; softc->vnic_info.mc_list_count = 0; softc->vnic_info.flags = BNXT_VNIC_FLAG_DEFAULT; rc = iflib_dma_alloc(ctx, BNXT_MAX_MC_ADDRS * ETHER_ADDR_LEN, &softc->vnic_info.mc_list, 0); if (rc) goto mc_list_alloc_fail; /* The VNIC RSS Hash Key */ rc = iflib_dma_alloc(ctx, HW_HASH_KEY_SIZE, &softc->vnic_info.rss_hash_key_tbl, 0); if (rc) goto rss_hash_alloc_fail; bus_dmamap_sync(softc->vnic_info.rss_hash_key_tbl.idi_tag, softc->vnic_info.rss_hash_key_tbl.idi_map, BUS_DMASYNC_PREWRITE); memcpy(softc->vnic_info.rss_hash_key_tbl.idi_vaddr, softc->vnic_info.rss_hash_key, HW_HASH_KEY_SIZE); /* Allocate the RSS tables */ rc = iflib_dma_alloc(ctx, HW_HASH_INDEX_SIZE * sizeof(uint16_t), &softc->vnic_info.rss_grp_tbl, 0); if (rc) goto rss_grp_alloc_fail; bus_dmamap_sync(softc->vnic_info.rss_grp_tbl.idi_tag, softc->vnic_info.rss_grp_tbl.idi_map, BUS_DMASYNC_PREWRITE); memset(softc->vnic_info.rss_grp_tbl.idi_vaddr, 0xff, softc->vnic_info.rss_grp_tbl.idi_size); softc->nrxqsets = nrxqsets; return rc; rss_grp_alloc_fail: iflib_dma_free(&softc->vnic_info.rss_hash_key_tbl); rss_hash_alloc_fail: iflib_dma_free(&softc->vnic_info.mc_list); tpa_alloc_fail: mc_list_alloc_fail: for (i = i - 1; i >= 0; i--) free(softc->rx_rings[i].tpa_start, M_DEVBUF); iflib_dma_free(&softc->hw_tx_port_stats); hw_port_tx_stats_alloc_fail: iflib_dma_free(&softc->hw_rx_port_stats); hw_port_rx_stats_alloc_fail: iflib_dma_free(&softc->rx_stats); hw_stats_alloc_fail: free(softc->grp_info, M_DEVBUF); grp_alloc_fail: free(softc->ag_rings, M_DEVBUF); ag_alloc_fail: free(softc->rx_rings, M_DEVBUF); ring_alloc_fail: free(softc->rx_cp_rings, M_DEVBUF); cp_alloc_fail: return rc; } static void bnxt_free_hwrm_short_cmd_req(struct bnxt_softc *softc) { if (softc->hwrm_short_cmd_req_addr.idi_vaddr) iflib_dma_free(&softc->hwrm_short_cmd_req_addr); softc->hwrm_short_cmd_req_addr.idi_vaddr = NULL; } static int bnxt_alloc_hwrm_short_cmd_req(struct bnxt_softc *softc) { int rc; rc = iflib_dma_alloc(softc->ctx, softc->hwrm_max_req_len, &softc->hwrm_short_cmd_req_addr, BUS_DMA_NOWAIT); return rc; } /* Device setup and teardown */ static int bnxt_attach_pre(if_ctx_t ctx) { struct bnxt_softc *softc = iflib_get_softc(ctx); if_softc_ctx_t scctx; int rc = 0; softc->ctx = ctx; softc->dev = iflib_get_dev(ctx); softc->media = iflib_get_media(ctx); softc->scctx = iflib_get_softc_ctx(ctx); softc->sctx = iflib_get_sctx(ctx); scctx = softc->scctx; /* TODO: Better way of detecting NPAR/VF is needed */ switch (pci_get_device(softc->dev)) { case BCM57402_NPAR: case BCM57404_NPAR: case BCM57406_NPAR: case BCM57407_NPAR: case BCM57412_NPAR1: case BCM57412_NPAR2: case BCM57414_NPAR1: case BCM57414_NPAR2: case BCM57416_NPAR1: case BCM57416_NPAR2: softc->flags |= BNXT_FLAG_NPAR; break; case NETXTREME_C_VF1: case NETXTREME_C_VF2: case NETXTREME_C_VF3: case NETXTREME_E_VF1: case NETXTREME_E_VF2: case NETXTREME_E_VF3: softc->flags |= BNXT_FLAG_VF; break; } pci_enable_busmaster(softc->dev); if (bnxt_pci_mapping(softc)) return (ENXIO); /* HWRM setup/init */ BNXT_HWRM_LOCK_INIT(softc, device_get_nameunit(softc->dev)); rc = bnxt_alloc_hwrm_dma_mem(softc); if (rc) goto dma_fail; /* Get firmware version and compare with driver */ softc->ver_info = malloc(sizeof(struct bnxt_ver_info), M_DEVBUF, M_NOWAIT | M_ZERO); if (softc->ver_info == NULL) { rc = ENOMEM; device_printf(softc->dev, "Unable to allocate space for version info\n"); goto ver_alloc_fail; } /* Default minimum required HWRM version */ softc->ver_info->hwrm_min_major = 1; softc->ver_info->hwrm_min_minor = 2; softc->ver_info->hwrm_min_update = 2; rc = bnxt_hwrm_ver_get(softc); if (rc) { device_printf(softc->dev, "attach: hwrm ver get failed\n"); goto ver_fail; } if (softc->flags & BNXT_FLAG_SHORT_CMD) { rc = bnxt_alloc_hwrm_short_cmd_req(softc); if (rc) goto hwrm_short_cmd_alloc_fail; } /* Get NVRAM info */ if (BNXT_PF(softc)) { softc->nvm_info = malloc(sizeof(struct bnxt_nvram_info), M_DEVBUF, M_NOWAIT | M_ZERO); if (softc->nvm_info == NULL) { rc = ENOMEM; device_printf(softc->dev, "Unable to allocate space for NVRAM info\n"); goto nvm_alloc_fail; } rc = bnxt_hwrm_nvm_get_dev_info(softc, &softc->nvm_info->mfg_id, &softc->nvm_info->device_id, &softc->nvm_info->sector_size, &softc->nvm_info->size, &softc->nvm_info->reserved_size, &softc->nvm_info->available_size); } /* Register the driver with the FW */ rc = bnxt_hwrm_func_drv_rgtr(softc); if (rc) { device_printf(softc->dev, "attach: hwrm drv rgtr failed\n"); goto drv_rgtr_fail; } rc = bnxt_hwrm_func_rgtr_async_events(softc, NULL, 0); if (rc) { device_printf(softc->dev, "attach: hwrm rgtr async evts failed\n"); goto drv_rgtr_fail; } /* Get the HW capabilities */ rc = bnxt_hwrm_func_qcaps(softc); if (rc) goto failed; /* Get the current configuration of this function */ rc = bnxt_hwrm_func_qcfg(softc); if (rc) { device_printf(softc->dev, "attach: hwrm func qcfg failed\n"); goto failed; } iflib_set_mac(ctx, softc->func.mac_addr); scctx->isc_txrx = &bnxt_txrx; scctx->isc_tx_csum_flags = (CSUM_IP | CSUM_TCP | CSUM_UDP | CSUM_TCP_IPV6 | CSUM_UDP_IPV6 | CSUM_TSO); scctx->isc_capabilities = scctx->isc_capenable = /* These are translated to hwassit bits */ IFCAP_TXCSUM | IFCAP_TXCSUM_IPV6 | IFCAP_TSO4 | IFCAP_TSO6 | /* These are checked by iflib */ IFCAP_LRO | IFCAP_VLAN_HWFILTER | /* These are part of the iflib mask */ IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6 | IFCAP_VLAN_MTU | IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWTSO | /* These likely get lost... */ IFCAP_VLAN_HWCSUM | IFCAP_JUMBO_MTU; if (bnxt_wol_supported(softc)) scctx->isc_capenable |= IFCAP_WOL_MAGIC; /* Get the queue config */ rc = bnxt_hwrm_queue_qportcfg(softc); if (rc) { device_printf(softc->dev, "attach: hwrm qportcfg failed\n"); goto failed; } bnxt_get_wol_settings(softc); /* Now perform a function reset */ rc = bnxt_hwrm_func_reset(softc); bnxt_clear_ids(softc); if (rc) goto failed; /* Now set up iflib sc */ scctx->isc_tx_nsegments = 31, scctx->isc_tx_tso_segments_max = 31; scctx->isc_tx_tso_size_max = BNXT_TSO_SIZE; scctx->isc_tx_tso_segsize_max = BNXT_TSO_SIZE; scctx->isc_vectors = softc->func.max_cp_rings; scctx->isc_min_frame_size = BNXT_MIN_FRAME_SIZE; scctx->isc_txrx = &bnxt_txrx; if (scctx->isc_nrxd[0] < ((scctx->isc_nrxd[1] * 4) + scctx->isc_nrxd[2])) device_printf(softc->dev, "WARNING: nrxd0 (%d) should be at least 4 * nrxd1 (%d) + nrxd2 (%d). Driver may be unstable\n", scctx->isc_nrxd[0], scctx->isc_nrxd[1], scctx->isc_nrxd[2]); if (scctx->isc_ntxd[0] < scctx->isc_ntxd[1] * 2) device_printf(softc->dev, "WARNING: ntxd0 (%d) should be at least 2 * ntxd1 (%d). Driver may be unstable\n", scctx->isc_ntxd[0], scctx->isc_ntxd[1]); scctx->isc_txqsizes[0] = sizeof(struct cmpl_base) * scctx->isc_ntxd[0]; scctx->isc_txqsizes[1] = sizeof(struct tx_bd_short) * scctx->isc_ntxd[1]; scctx->isc_rxqsizes[0] = sizeof(struct cmpl_base) * scctx->isc_nrxd[0]; scctx->isc_rxqsizes[1] = sizeof(struct rx_prod_pkt_bd) * scctx->isc_nrxd[1]; scctx->isc_rxqsizes[2] = sizeof(struct rx_prod_pkt_bd) * scctx->isc_nrxd[2]; scctx->isc_nrxqsets_max = min(pci_msix_count(softc->dev)-1, softc->fn_qcfg.alloc_completion_rings - 1); scctx->isc_nrxqsets_max = min(scctx->isc_nrxqsets_max, softc->fn_qcfg.alloc_rx_rings); scctx->isc_nrxqsets_max = min(scctx->isc_nrxqsets_max, softc->fn_qcfg.alloc_vnics); scctx->isc_ntxqsets_max = min(softc->fn_qcfg.alloc_tx_rings, softc->fn_qcfg.alloc_completion_rings - scctx->isc_nrxqsets_max - 1); scctx->isc_rss_table_size = HW_HASH_INDEX_SIZE; scctx->isc_rss_table_mask = scctx->isc_rss_table_size - 1; /* iflib will map and release this bar */ scctx->isc_msix_bar = pci_msix_table_bar(softc->dev); /* * Default settings for HW LRO (TPA): * Disable HW LRO by default * Can be enabled after taking care of 'packet forwarding' */ softc->hw_lro.enable = 0; softc->hw_lro.is_mode_gro = 0; softc->hw_lro.max_agg_segs = 5; /* 2^5 = 32 segs */ softc->hw_lro.max_aggs = HWRM_VNIC_TPA_CFG_INPUT_MAX_AGGS_MAX; softc->hw_lro.min_agg_len = 512; /* Allocate the default completion ring */ softc->def_cp_ring.stats_ctx_id = HWRM_NA_SIGNATURE; softc->def_cp_ring.ring.phys_id = (uint16_t)HWRM_NA_SIGNATURE; softc->def_cp_ring.ring.softc = softc; softc->def_cp_ring.ring.id = 0; softc->def_cp_ring.ring.doorbell = softc->def_cp_ring.ring.id * 0x80; softc->def_cp_ring.ring.ring_size = PAGE_SIZE / sizeof(struct cmpl_base); rc = iflib_dma_alloc(ctx, sizeof(struct cmpl_base) * softc->def_cp_ring.ring.ring_size, &softc->def_cp_ring_mem, 0); softc->def_cp_ring.ring.vaddr = softc->def_cp_ring_mem.idi_vaddr; softc->def_cp_ring.ring.paddr = softc->def_cp_ring_mem.idi_paddr; iflib_config_gtask_init(ctx, &softc->def_cp_task, bnxt_def_cp_task, "dflt_cp"); rc = bnxt_init_sysctl_ctx(softc); if (rc) goto init_sysctl_failed; if (BNXT_PF(softc)) { rc = bnxt_create_nvram_sysctls(softc->nvm_info); if (rc) goto failed; } arc4rand(softc->vnic_info.rss_hash_key, HW_HASH_KEY_SIZE, 0); softc->vnic_info.rss_hash_type = HWRM_VNIC_RSS_CFG_INPUT_HASH_TYPE_IPV4 | HWRM_VNIC_RSS_CFG_INPUT_HASH_TYPE_TCP_IPV4 | HWRM_VNIC_RSS_CFG_INPUT_HASH_TYPE_UDP_IPV4 | HWRM_VNIC_RSS_CFG_INPUT_HASH_TYPE_IPV6 | HWRM_VNIC_RSS_CFG_INPUT_HASH_TYPE_TCP_IPV6 | HWRM_VNIC_RSS_CFG_INPUT_HASH_TYPE_UDP_IPV6; rc = bnxt_create_config_sysctls_pre(softc); if (rc) goto failed; rc = bnxt_create_hw_lro_sysctls(softc); if (rc) goto failed; rc = bnxt_create_pause_fc_sysctls(softc); if (rc) goto failed; /* Initialize the vlan list */ SLIST_INIT(&softc->vnic_info.vlan_tags); softc->vnic_info.vlan_tag_list.idi_vaddr = NULL; return (rc); failed: bnxt_free_sysctl_ctx(softc); init_sysctl_failed: bnxt_hwrm_func_drv_unrgtr(softc, false); drv_rgtr_fail: if (BNXT_PF(softc)) free(softc->nvm_info, M_DEVBUF); nvm_alloc_fail: bnxt_free_hwrm_short_cmd_req(softc); hwrm_short_cmd_alloc_fail: ver_fail: free(softc->ver_info, M_DEVBUF); ver_alloc_fail: bnxt_free_hwrm_dma_mem(softc); dma_fail: BNXT_HWRM_LOCK_DESTROY(softc); bnxt_pci_mapping_free(softc); pci_disable_busmaster(softc->dev); return (rc); } static int bnxt_attach_post(if_ctx_t ctx) { struct bnxt_softc *softc = iflib_get_softc(ctx); if_t ifp = iflib_get_ifp(ctx); int rc; bnxt_create_config_sysctls_post(softc); /* Update link state etc... */ rc = bnxt_probe_phy(softc); if (rc) goto failed; /* Needs to be done after probing the phy */ bnxt_create_ver_sysctls(softc); bnxt_add_media_types(softc); ifmedia_set(softc->media, IFM_ETHER | IFM_AUTO); softc->scctx->isc_max_frame_size = ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN; failed: return rc; } static int bnxt_detach(if_ctx_t ctx) { struct bnxt_softc *softc = iflib_get_softc(ctx); struct bnxt_vlan_tag *tag; struct bnxt_vlan_tag *tmp; int i; bnxt_wol_config(ctx); bnxt_do_disable_intr(&softc->def_cp_ring); bnxt_free_sysctl_ctx(softc); bnxt_hwrm_func_reset(softc); bnxt_clear_ids(softc); iflib_irq_free(ctx, &softc->def_cp_ring.irq); iflib_config_gtask_deinit(&softc->def_cp_task); /* We need to free() these here... */ for (i = softc->nrxqsets-1; i>=0; i--) { iflib_irq_free(ctx, &softc->rx_cp_rings[i].irq); } iflib_dma_free(&softc->vnic_info.mc_list); iflib_dma_free(&softc->vnic_info.rss_hash_key_tbl); iflib_dma_free(&softc->vnic_info.rss_grp_tbl); if (softc->vnic_info.vlan_tag_list.idi_vaddr) iflib_dma_free(&softc->vnic_info.vlan_tag_list); SLIST_FOREACH_SAFE(tag, &softc->vnic_info.vlan_tags, next, tmp) free(tag, M_DEVBUF); iflib_dma_free(&softc->def_cp_ring_mem); for (i = 0; i < softc->nrxqsets; i++) free(softc->rx_rings[i].tpa_start, M_DEVBUF); free(softc->ver_info, M_DEVBUF); if (BNXT_PF(softc)) free(softc->nvm_info, M_DEVBUF); bnxt_hwrm_func_drv_unrgtr(softc, false); bnxt_free_hwrm_dma_mem(softc); bnxt_free_hwrm_short_cmd_req(softc); BNXT_HWRM_LOCK_DESTROY(softc); pci_disable_busmaster(softc->dev); bnxt_pci_mapping_free(softc); return 0; } /* Device configuration */ static void bnxt_init(if_ctx_t ctx) { struct bnxt_softc *softc = iflib_get_softc(ctx); struct ifmediareq ifmr; int i, j; int rc; rc = bnxt_hwrm_func_reset(softc); if (rc) return; bnxt_clear_ids(softc); /* Allocate the default completion ring */ softc->def_cp_ring.cons = UINT32_MAX; softc->def_cp_ring.v_bit = 1; bnxt_mark_cpr_invalid(&softc->def_cp_ring); rc = bnxt_hwrm_ring_alloc(softc, HWRM_RING_ALLOC_INPUT_RING_TYPE_L2_CMPL, &softc->def_cp_ring.ring, (uint16_t)HWRM_NA_SIGNATURE, HWRM_NA_SIGNATURE, true); if (rc) goto fail; /* And now set the default CP ring as the async CP ring */ rc = bnxt_cfg_async_cr(softc); if (rc) goto fail; for (i = 0; i < softc->nrxqsets; i++) { /* Allocate the statistics context */ rc = bnxt_hwrm_stat_ctx_alloc(softc, &softc->rx_cp_rings[i], softc->rx_stats.idi_paddr + (sizeof(struct ctx_hw_stats) * i)); if (rc) goto fail; /* Allocate the completion ring */ softc->rx_cp_rings[i].cons = UINT32_MAX; softc->rx_cp_rings[i].v_bit = 1; softc->rx_cp_rings[i].last_idx = UINT32_MAX; bnxt_mark_cpr_invalid(&softc->rx_cp_rings[i]); rc = bnxt_hwrm_ring_alloc(softc, HWRM_RING_ALLOC_INPUT_RING_TYPE_L2_CMPL, &softc->rx_cp_rings[i].ring, (uint16_t)HWRM_NA_SIGNATURE, HWRM_NA_SIGNATURE, true); if (rc) goto fail; /* Allocate the RX ring */ rc = bnxt_hwrm_ring_alloc(softc, HWRM_RING_ALLOC_INPUT_RING_TYPE_RX, &softc->rx_rings[i], (uint16_t)HWRM_NA_SIGNATURE, HWRM_NA_SIGNATURE, false); if (rc) goto fail; BNXT_RX_DB(&softc->rx_rings[i], 0); /* TODO: Cumulus+ doesn't need the double doorbell */ BNXT_RX_DB(&softc->rx_rings[i], 0); /* Allocate the AG ring */ rc = bnxt_hwrm_ring_alloc(softc, HWRM_RING_ALLOC_INPUT_RING_TYPE_RX, &softc->ag_rings[i], (uint16_t)HWRM_NA_SIGNATURE, HWRM_NA_SIGNATURE, false); if (rc) goto fail; BNXT_RX_DB(&softc->rx_rings[i], 0); /* TODO: Cumulus+ doesn't need the double doorbell */ BNXT_RX_DB(&softc->ag_rings[i], 0); /* Allocate the ring group */ softc->grp_info[i].stats_ctx = softc->rx_cp_rings[i].stats_ctx_id; softc->grp_info[i].rx_ring_id = softc->rx_rings[i].phys_id; softc->grp_info[i].ag_ring_id = softc->ag_rings[i].phys_id; softc->grp_info[i].cp_ring_id = softc->rx_cp_rings[i].ring.phys_id; rc = bnxt_hwrm_ring_grp_alloc(softc, &softc->grp_info[i]); if (rc) goto fail; } /* Allocate the VNIC RSS context */ rc = bnxt_hwrm_vnic_ctx_alloc(softc, &softc->vnic_info.rss_id); if (rc) goto fail; /* Allocate the vnic */ softc->vnic_info.def_ring_grp = softc->grp_info[0].grp_id; softc->vnic_info.mru = softc->scctx->isc_max_frame_size; rc = bnxt_hwrm_vnic_alloc(softc, &softc->vnic_info); if (rc) goto fail; rc = bnxt_hwrm_vnic_cfg(softc, &softc->vnic_info); if (rc) goto fail; rc = bnxt_hwrm_set_filter(softc, &softc->vnic_info); if (rc) goto fail; /* Enable RSS on the VNICs */ for (i = 0, j = 0; i < HW_HASH_INDEX_SIZE; i++) { ((uint16_t *) softc->vnic_info.rss_grp_tbl.idi_vaddr)[i] = htole16(softc->grp_info[j].grp_id); if (++j == softc->nrxqsets) j = 0; } rc = bnxt_hwrm_rss_cfg(softc, &softc->vnic_info, softc->vnic_info.rss_hash_type); if (rc) goto fail; rc = bnxt_hwrm_vnic_tpa_cfg(softc); if (rc) goto fail; for (i = 0; i < softc->ntxqsets; i++) { /* Allocate the statistics context */ rc = bnxt_hwrm_stat_ctx_alloc(softc, &softc->tx_cp_rings[i], softc->tx_stats.idi_paddr + (sizeof(struct ctx_hw_stats) * i)); if (rc) goto fail; /* Allocate the completion ring */ softc->tx_cp_rings[i].cons = UINT32_MAX; softc->tx_cp_rings[i].v_bit = 1; bnxt_mark_cpr_invalid(&softc->tx_cp_rings[i]); rc = bnxt_hwrm_ring_alloc(softc, HWRM_RING_ALLOC_INPUT_RING_TYPE_L2_CMPL, &softc->tx_cp_rings[i].ring, (uint16_t)HWRM_NA_SIGNATURE, HWRM_NA_SIGNATURE, false); if (rc) goto fail; /* Allocate the TX ring */ rc = bnxt_hwrm_ring_alloc(softc, HWRM_RING_ALLOC_INPUT_RING_TYPE_TX, &softc->tx_rings[i], softc->tx_cp_rings[i].ring.phys_id, softc->tx_cp_rings[i].stats_ctx_id, false); if (rc) goto fail; BNXT_TX_DB(&softc->tx_rings[i], 0); /* TODO: Cumulus+ doesn't need the double doorbell */ BNXT_TX_DB(&softc->tx_rings[i], 0); } bnxt_do_enable_intr(&softc->def_cp_ring); bnxt_media_status(softc->ctx, &ifmr); bnxt_hwrm_cfa_l2_set_rx_mask(softc, &softc->vnic_info); return; fail: bnxt_hwrm_func_reset(softc); bnxt_clear_ids(softc); return; } static void bnxt_stop(if_ctx_t ctx) { struct bnxt_softc *softc = iflib_get_softc(ctx); bnxt_do_disable_intr(&softc->def_cp_ring); bnxt_hwrm_func_reset(softc); bnxt_clear_ids(softc); return; } static u_int bnxt_copy_maddr(void *arg, struct sockaddr_dl *sdl, u_int cnt) { uint8_t *mta = arg; if (cnt == BNXT_MAX_MC_ADDRS) return (1); bcopy(LLADDR(sdl), &mta[cnt * ETHER_ADDR_LEN], ETHER_ADDR_LEN); return (1); } static void bnxt_multi_set(if_ctx_t ctx) { struct bnxt_softc *softc = iflib_get_softc(ctx); if_t ifp = iflib_get_ifp(ctx); uint8_t *mta; int mcnt; mta = softc->vnic_info.mc_list.idi_vaddr; bzero(mta, softc->vnic_info.mc_list.idi_size); mcnt = if_foreach_llmaddr(ifp, bnxt_copy_maddr, mta); if (mcnt > BNXT_MAX_MC_ADDRS) { softc->vnic_info.rx_mask |= HWRM_CFA_L2_SET_RX_MASK_INPUT_MASK_ALL_MCAST; bnxt_hwrm_cfa_l2_set_rx_mask(softc, &softc->vnic_info); } else { softc->vnic_info.rx_mask &= ~HWRM_CFA_L2_SET_RX_MASK_INPUT_MASK_ALL_MCAST; bus_dmamap_sync(softc->vnic_info.mc_list.idi_tag, softc->vnic_info.mc_list.idi_map, BUS_DMASYNC_PREWRITE); softc->vnic_info.mc_list_count = mcnt; softc->vnic_info.rx_mask |= HWRM_CFA_L2_SET_RX_MASK_INPUT_MASK_MCAST; if (bnxt_hwrm_cfa_l2_set_rx_mask(softc, &softc->vnic_info)) device_printf(softc->dev, "set_multi: rx_mask set failed\n"); } } static int bnxt_mtu_set(if_ctx_t ctx, uint32_t mtu) { struct bnxt_softc *softc = iflib_get_softc(ctx); if (mtu > BNXT_MAX_MTU) return EINVAL; softc->scctx->isc_max_frame_size = mtu + ETHER_HDR_LEN + ETHER_CRC_LEN; return 0; } static void bnxt_media_status(if_ctx_t ctx, struct ifmediareq * ifmr) { struct bnxt_softc *softc = iflib_get_softc(ctx); struct bnxt_link_info *link_info = &softc->link_info; struct ifmedia_entry *next; uint64_t target_baudrate = bnxt_get_baudrate(link_info); int active_media = IFM_UNKNOWN; bnxt_update_link(softc, true); ifmr->ifm_status = IFM_AVALID; ifmr->ifm_active = IFM_ETHER; if (link_info->link_up) ifmr->ifm_status |= IFM_ACTIVE; else ifmr->ifm_status &= ~IFM_ACTIVE; if (link_info->duplex == HWRM_PORT_PHY_QCFG_OUTPUT_DUPLEX_CFG_FULL) ifmr->ifm_active |= IFM_FDX; else ifmr->ifm_active |= IFM_HDX; /* * Go through the list of supported media which got prepared * as part of bnxt_add_media_types() using api ifmedia_add(). */ LIST_FOREACH(next, &(iflib_get_media(ctx)->ifm_list), ifm_list) { if (ifmedia_baudrate(next->ifm_media) == target_baudrate) { active_media = next->ifm_media; break; } } ifmr->ifm_active |= active_media; if (link_info->flow_ctrl.rx) ifmr->ifm_active |= IFM_ETH_RXPAUSE; if (link_info->flow_ctrl.tx) ifmr->ifm_active |= IFM_ETH_TXPAUSE; bnxt_report_link(softc); return; } static int bnxt_media_change(if_ctx_t ctx) { struct bnxt_softc *softc = iflib_get_softc(ctx); struct ifmedia *ifm = iflib_get_media(ctx); struct ifmediareq ifmr; int rc; if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER) return EINVAL; switch (IFM_SUBTYPE(ifm->ifm_media)) { case IFM_100_T: softc->link_info.autoneg &= ~BNXT_AUTONEG_SPEED; softc->link_info.req_link_speed = HWRM_PORT_PHY_CFG_INPUT_FORCE_LINK_SPEED_100MB; break; case IFM_1000_KX: case IFM_1000_T: case IFM_1000_SGMII: softc->link_info.autoneg &= ~BNXT_AUTONEG_SPEED; softc->link_info.req_link_speed = HWRM_PORT_PHY_CFG_INPUT_FORCE_LINK_SPEED_1GB; break; case IFM_2500_KX: case IFM_2500_T: softc->link_info.autoneg &= ~BNXT_AUTONEG_SPEED; softc->link_info.req_link_speed = HWRM_PORT_PHY_CFG_INPUT_FORCE_LINK_SPEED_2_5GB; break; case IFM_10G_CR1: case IFM_10G_KR: case IFM_10G_LR: case IFM_10G_SR: case IFM_10G_T: softc->link_info.autoneg &= ~BNXT_AUTONEG_SPEED; softc->link_info.req_link_speed = HWRM_PORT_PHY_CFG_INPUT_FORCE_LINK_SPEED_10GB; break; case IFM_20G_KR2: softc->link_info.autoneg &= ~BNXT_AUTONEG_SPEED; softc->link_info.req_link_speed = HWRM_PORT_PHY_CFG_INPUT_FORCE_LINK_SPEED_20GB; break; case IFM_25G_CR: case IFM_25G_KR: case IFM_25G_SR: softc->link_info.autoneg &= ~BNXT_AUTONEG_SPEED; softc->link_info.req_link_speed = HWRM_PORT_PHY_CFG_INPUT_FORCE_LINK_SPEED_25GB; break; case IFM_40G_CR4: case IFM_40G_KR4: case IFM_40G_LR4: case IFM_40G_SR4: softc->link_info.autoneg &= ~BNXT_AUTONEG_SPEED; softc->link_info.req_link_speed = HWRM_PORT_PHY_CFG_INPUT_FORCE_LINK_SPEED_40GB; break; case IFM_50G_CR2: case IFM_50G_KR2: softc->link_info.autoneg &= ~BNXT_AUTONEG_SPEED; softc->link_info.req_link_speed = HWRM_PORT_PHY_CFG_INPUT_FORCE_LINK_SPEED_50GB; break; case IFM_100G_CR4: case IFM_100G_KR4: case IFM_100G_LR4: case IFM_100G_SR4: softc->link_info.autoneg &= ~BNXT_AUTONEG_SPEED; softc->link_info.req_link_speed = HWRM_PORT_PHY_CFG_INPUT_FORCE_LINK_SPEED_100GB; break; default: device_printf(softc->dev, "Unsupported media type! Using auto\n"); /* Fall-through */ case IFM_AUTO: // Auto softc->link_info.autoneg |= BNXT_AUTONEG_SPEED; break; } rc = bnxt_hwrm_set_link_setting(softc, true, true, true); bnxt_media_status(softc->ctx, &ifmr); return rc; } static int bnxt_promisc_set(if_ctx_t ctx, int flags) { struct bnxt_softc *softc = iflib_get_softc(ctx); if_t ifp = iflib_get_ifp(ctx); int rc; if (ifp->if_flags & IFF_ALLMULTI || if_llmaddr_count(ifp) > BNXT_MAX_MC_ADDRS) softc->vnic_info.rx_mask |= HWRM_CFA_L2_SET_RX_MASK_INPUT_MASK_ALL_MCAST; else softc->vnic_info.rx_mask &= ~HWRM_CFA_L2_SET_RX_MASK_INPUT_MASK_ALL_MCAST; if (ifp->if_flags & IFF_PROMISC) softc->vnic_info.rx_mask |= HWRM_CFA_L2_SET_RX_MASK_INPUT_MASK_PROMISCUOUS | HWRM_CFA_L2_SET_RX_MASK_INPUT_MASK_ANYVLAN_NONVLAN; else softc->vnic_info.rx_mask &= ~(HWRM_CFA_L2_SET_RX_MASK_INPUT_MASK_PROMISCUOUS | HWRM_CFA_L2_SET_RX_MASK_INPUT_MASK_ANYVLAN_NONVLAN); rc = bnxt_hwrm_cfa_l2_set_rx_mask(softc, &softc->vnic_info); return rc; } static uint64_t bnxt_get_counter(if_ctx_t ctx, ift_counter cnt) { if_t ifp = iflib_get_ifp(ctx); if (cnt < IFCOUNTERS) return if_get_counter_default(ifp, cnt); return 0; } static void bnxt_update_admin_status(if_ctx_t ctx) { struct bnxt_softc *softc = iflib_get_softc(ctx); /* * When SR-IOV is enabled, avoid each VF sending this HWRM * request every sec with which firmware timeouts can happen */ if (BNXT_PF(softc)) { bnxt_hwrm_port_qstats(softc); } return; } static void bnxt_if_timer(if_ctx_t ctx, uint16_t qid) { struct bnxt_softc *softc = iflib_get_softc(ctx); uint64_t ticks_now = ticks; /* Schedule bnxt_update_admin_status() once per sec */ if (ticks_now - softc->admin_ticks >= hz) { softc->admin_ticks = ticks_now; iflib_admin_intr_deferred(ctx); } return; } static void inline bnxt_do_enable_intr(struct bnxt_cp_ring *cpr) { if (cpr->ring.phys_id != (uint16_t)HWRM_NA_SIGNATURE) { /* First time enabling, do not set index */ if (cpr->cons == UINT32_MAX) BNXT_CP_ENABLE_DB(&cpr->ring); else BNXT_CP_IDX_ENABLE_DB(&cpr->ring, cpr->cons); } } static void inline bnxt_do_disable_intr(struct bnxt_cp_ring *cpr) { if (cpr->ring.phys_id != (uint16_t)HWRM_NA_SIGNATURE) BNXT_CP_DISABLE_DB(&cpr->ring); } /* Enable all interrupts */ static void bnxt_intr_enable(if_ctx_t ctx) { struct bnxt_softc *softc = iflib_get_softc(ctx); int i; bnxt_do_enable_intr(&softc->def_cp_ring); for (i = 0; i < softc->nrxqsets; i++) bnxt_do_enable_intr(&softc->rx_cp_rings[i]); return; } /* Enable interrupt for a single queue */ static int bnxt_tx_queue_intr_enable(if_ctx_t ctx, uint16_t qid) { struct bnxt_softc *softc = iflib_get_softc(ctx); bnxt_do_enable_intr(&softc->tx_cp_rings[qid]); return 0; } static int bnxt_rx_queue_intr_enable(if_ctx_t ctx, uint16_t qid) { struct bnxt_softc *softc = iflib_get_softc(ctx); bnxt_do_enable_intr(&softc->rx_cp_rings[qid]); return 0; } /* Disable all interrupts */ static void bnxt_disable_intr(if_ctx_t ctx) { struct bnxt_softc *softc = iflib_get_softc(ctx); int i; /* * NOTE: These TX interrupts should never get enabled, so don't * update the index */ for (i = 0; i < softc->ntxqsets; i++) bnxt_do_disable_intr(&softc->tx_cp_rings[i]); for (i = 0; i < softc->nrxqsets; i++) bnxt_do_disable_intr(&softc->rx_cp_rings[i]); return; } static int bnxt_msix_intr_assign(if_ctx_t ctx, int msix) { struct bnxt_softc *softc = iflib_get_softc(ctx); int rc; int i; char irq_name[16]; rc = iflib_irq_alloc_generic(ctx, &softc->def_cp_ring.irq, softc->def_cp_ring.ring.id + 1, IFLIB_INTR_ADMIN, bnxt_handle_def_cp, softc, 0, "def_cp"); if (rc) { device_printf(iflib_get_dev(ctx), "Failed to register default completion ring handler\n"); return rc; } for (i=0; iscctx->isc_nrxqsets; i++) { snprintf(irq_name, sizeof(irq_name), "rxq%d", i); rc = iflib_irq_alloc_generic(ctx, &softc->rx_cp_rings[i].irq, - softc->rx_cp_rings[i].ring.id + 1, IFLIB_INTR_RX, + softc->rx_cp_rings[i].ring.id + 1, IFLIB_INTR_RXTX, bnxt_handle_rx_cp, &softc->rx_cp_rings[i], i, irq_name); if (rc) { device_printf(iflib_get_dev(ctx), "Failed to register RX completion ring handler\n"); i--; goto fail; } } for (i=0; iscctx->isc_ntxqsets; i++) iflib_softirq_alloc_generic(ctx, NULL, IFLIB_INTR_TX, NULL, i, "tx_cp"); return rc; fail: for (; i>=0; i--) iflib_irq_free(ctx, &softc->rx_cp_rings[i].irq); iflib_irq_free(ctx, &softc->def_cp_ring.irq); return rc; } /* * We're explicitly allowing duplicates here. They will need to be * removed as many times as they are added. */ static void bnxt_vlan_register(if_ctx_t ctx, uint16_t vtag) { struct bnxt_softc *softc = iflib_get_softc(ctx); struct bnxt_vlan_tag *new_tag; new_tag = malloc(sizeof(struct bnxt_vlan_tag), M_DEVBUF, M_NOWAIT); if (new_tag == NULL) return; new_tag->tag = vtag; new_tag->tpid = 8100; SLIST_INSERT_HEAD(&softc->vnic_info.vlan_tags, new_tag, next); }; static void bnxt_vlan_unregister(if_ctx_t ctx, uint16_t vtag) { struct bnxt_softc *softc = iflib_get_softc(ctx); struct bnxt_vlan_tag *vlan_tag; SLIST_FOREACH(vlan_tag, &softc->vnic_info.vlan_tags, next) { if (vlan_tag->tag == vtag) { SLIST_REMOVE(&softc->vnic_info.vlan_tags, vlan_tag, bnxt_vlan_tag, next); free(vlan_tag, M_DEVBUF); break; } } } static int bnxt_wol_config(if_ctx_t ctx) { struct bnxt_softc *softc = iflib_get_softc(ctx); if_t ifp = iflib_get_ifp(ctx); if (!softc) return -EBUSY; if (!bnxt_wol_supported(softc)) return -ENOTSUP; if (if_getcapabilities(ifp) & IFCAP_WOL_MAGIC) { if (!softc->wol) { if (bnxt_hwrm_alloc_wol_fltr(softc)) return -EBUSY; softc->wol = 1; } } else { if (softc->wol) { if (bnxt_hwrm_free_wol_fltr(softc)) return -EBUSY; softc->wol = 0; } } return 0; } static int bnxt_shutdown(if_ctx_t ctx) { bnxt_wol_config(ctx); return 0; } static int bnxt_suspend(if_ctx_t ctx) { bnxt_wol_config(ctx); return 0; } static int bnxt_resume(if_ctx_t ctx) { struct bnxt_softc *softc = iflib_get_softc(ctx); bnxt_get_wol_settings(softc); return 0; } static int bnxt_priv_ioctl(if_ctx_t ctx, u_long command, caddr_t data) { struct bnxt_softc *softc = iflib_get_softc(ctx); struct ifreq *ifr = (struct ifreq *)data; struct bnxt_ioctl_header *ioh; size_t iol; int rc = ENOTSUP; struct bnxt_ioctl_data iod_storage, *iod = &iod_storage; switch (command) { case SIOCGPRIVATE_0: if ((rc = priv_check(curthread, PRIV_DRIVER)) != 0) goto exit; ioh = ifr_buffer_get_buffer(ifr); iol = ifr_buffer_get_length(ifr); if (iol > sizeof(iod_storage)) return (EINVAL); if ((rc = copyin(ioh, iod, iol)) != 0) goto exit; switch (iod->hdr.type) { case BNXT_HWRM_NVM_FIND_DIR_ENTRY: { struct bnxt_ioctl_hwrm_nvm_find_dir_entry *find = &iod->find; rc = bnxt_hwrm_nvm_find_dir_entry(softc, find->type, &find->ordinal, find->ext, &find->index, find->use_index, find->search_opt, &find->data_length, &find->item_length, &find->fw_ver); if (rc) { iod->hdr.rc = rc; copyout(&iod->hdr.rc, &ioh->rc, sizeof(ioh->rc)); } else { iod->hdr.rc = 0; copyout(iod, ioh, iol); } rc = 0; goto exit; } case BNXT_HWRM_NVM_READ: { struct bnxt_ioctl_hwrm_nvm_read *rd = &iod->read; struct iflib_dma_info dma_data; size_t offset; size_t remain; size_t csize; /* * Some HWRM versions can't read more than 0x8000 bytes */ rc = iflib_dma_alloc(softc->ctx, min(rd->length, 0x8000), &dma_data, BUS_DMA_NOWAIT); if (rc) break; for (remain = rd->length, offset = 0; remain && offset < rd->length; offset += 0x8000) { csize = min(remain, 0x8000); rc = bnxt_hwrm_nvm_read(softc, rd->index, rd->offset + offset, csize, &dma_data); if (rc) { iod->hdr.rc = rc; copyout(&iod->hdr.rc, &ioh->rc, sizeof(ioh->rc)); break; } else { copyout(dma_data.idi_vaddr, rd->data + offset, csize); iod->hdr.rc = 0; } remain -= csize; } if (iod->hdr.rc == 0) copyout(iod, ioh, iol); iflib_dma_free(&dma_data); rc = 0; goto exit; } case BNXT_HWRM_FW_RESET: { struct bnxt_ioctl_hwrm_fw_reset *rst = &iod->reset; rc = bnxt_hwrm_fw_reset(softc, rst->processor, &rst->selfreset); if (rc) { iod->hdr.rc = rc; copyout(&iod->hdr.rc, &ioh->rc, sizeof(ioh->rc)); } else { iod->hdr.rc = 0; copyout(iod, ioh, iol); } rc = 0; goto exit; } case BNXT_HWRM_FW_QSTATUS: { struct bnxt_ioctl_hwrm_fw_qstatus *qstat = &iod->status; rc = bnxt_hwrm_fw_qstatus(softc, qstat->processor, &qstat->selfreset); if (rc) { iod->hdr.rc = rc; copyout(&iod->hdr.rc, &ioh->rc, sizeof(ioh->rc)); } else { iod->hdr.rc = 0; copyout(iod, ioh, iol); } rc = 0; goto exit; } case BNXT_HWRM_NVM_WRITE: { struct bnxt_ioctl_hwrm_nvm_write *wr = &iod->write; rc = bnxt_hwrm_nvm_write(softc, wr->data, true, wr->type, wr->ordinal, wr->ext, wr->attr, wr->option, wr->data_length, wr->keep, &wr->item_length, &wr->index); if (rc) { iod->hdr.rc = rc; copyout(&iod->hdr.rc, &ioh->rc, sizeof(ioh->rc)); } else { iod->hdr.rc = 0; copyout(iod, ioh, iol); } rc = 0; goto exit; } case BNXT_HWRM_NVM_ERASE_DIR_ENTRY: { struct bnxt_ioctl_hwrm_nvm_erase_dir_entry *erase = &iod->erase; rc = bnxt_hwrm_nvm_erase_dir_entry(softc, erase->index); if (rc) { iod->hdr.rc = rc; copyout(&iod->hdr.rc, &ioh->rc, sizeof(ioh->rc)); } else { iod->hdr.rc = 0; copyout(iod, ioh, iol); } rc = 0; goto exit; } case BNXT_HWRM_NVM_GET_DIR_INFO: { struct bnxt_ioctl_hwrm_nvm_get_dir_info *info = &iod->dir_info; rc = bnxt_hwrm_nvm_get_dir_info(softc, &info->entries, &info->entry_length); if (rc) { iod->hdr.rc = rc; copyout(&iod->hdr.rc, &ioh->rc, sizeof(ioh->rc)); } else { iod->hdr.rc = 0; copyout(iod, ioh, iol); } rc = 0; goto exit; } case BNXT_HWRM_NVM_GET_DIR_ENTRIES: { struct bnxt_ioctl_hwrm_nvm_get_dir_entries *get = &iod->dir_entries; struct iflib_dma_info dma_data; rc = iflib_dma_alloc(softc->ctx, get->max_size, &dma_data, BUS_DMA_NOWAIT); if (rc) break; rc = bnxt_hwrm_nvm_get_dir_entries(softc, &get->entries, &get->entry_length, &dma_data); if (rc) { iod->hdr.rc = rc; copyout(&iod->hdr.rc, &ioh->rc, sizeof(ioh->rc)); } else { copyout(dma_data.idi_vaddr, get->data, get->entry_length * get->entries); iod->hdr.rc = 0; copyout(iod, ioh, iol); } iflib_dma_free(&dma_data); rc = 0; goto exit; } case BNXT_HWRM_NVM_VERIFY_UPDATE: { struct bnxt_ioctl_hwrm_nvm_verify_update *vrfy = &iod->verify; rc = bnxt_hwrm_nvm_verify_update(softc, vrfy->type, vrfy->ordinal, vrfy->ext); if (rc) { iod->hdr.rc = rc; copyout(&iod->hdr.rc, &ioh->rc, sizeof(ioh->rc)); } else { iod->hdr.rc = 0; copyout(iod, ioh, iol); } rc = 0; goto exit; } case BNXT_HWRM_NVM_INSTALL_UPDATE: { struct bnxt_ioctl_hwrm_nvm_install_update *inst = &iod->install; rc = bnxt_hwrm_nvm_install_update(softc, inst->install_type, &inst->installed_items, &inst->result, &inst->problem_item, &inst->reset_required); if (rc) { iod->hdr.rc = rc; copyout(&iod->hdr.rc, &ioh->rc, sizeof(ioh->rc)); } else { iod->hdr.rc = 0; copyout(iod, ioh, iol); } rc = 0; goto exit; } case BNXT_HWRM_NVM_MODIFY: { struct bnxt_ioctl_hwrm_nvm_modify *mod = &iod->modify; rc = bnxt_hwrm_nvm_modify(softc, mod->index, mod->offset, mod->data, true, mod->length); if (rc) { iod->hdr.rc = rc; copyout(&iod->hdr.rc, &ioh->rc, sizeof(ioh->rc)); } else { iod->hdr.rc = 0; copyout(iod, ioh, iol); } rc = 0; goto exit; } case BNXT_HWRM_FW_GET_TIME: { struct bnxt_ioctl_hwrm_fw_get_time *gtm = &iod->get_time; rc = bnxt_hwrm_fw_get_time(softc, >m->year, >m->month, >m->day, >m->hour, >m->minute, >m->second, >m->millisecond, >m->zone); if (rc) { iod->hdr.rc = rc; copyout(&iod->hdr.rc, &ioh->rc, sizeof(ioh->rc)); } else { iod->hdr.rc = 0; copyout(iod, ioh, iol); } rc = 0; goto exit; } case BNXT_HWRM_FW_SET_TIME: { struct bnxt_ioctl_hwrm_fw_set_time *stm = &iod->set_time; rc = bnxt_hwrm_fw_set_time(softc, stm->year, stm->month, stm->day, stm->hour, stm->minute, stm->second, stm->millisecond, stm->zone); if (rc) { iod->hdr.rc = rc; copyout(&iod->hdr.rc, &ioh->rc, sizeof(ioh->rc)); } else { iod->hdr.rc = 0; copyout(iod, ioh, iol); } rc = 0; goto exit; } } break; } exit: return rc; } /* * Support functions */ static int bnxt_probe_phy(struct bnxt_softc *softc) { struct bnxt_link_info *link_info = &softc->link_info; int rc = 0; rc = bnxt_update_link(softc, false); if (rc) { device_printf(softc->dev, "Probe phy can't update link (rc: %x)\n", rc); return (rc); } /*initialize the ethool setting copy with NVM settings */ if (link_info->auto_mode != HWRM_PORT_PHY_QCFG_OUTPUT_AUTO_MODE_NONE) link_info->autoneg |= BNXT_AUTONEG_SPEED; link_info->req_duplex = link_info->duplex_setting; if (link_info->autoneg & BNXT_AUTONEG_SPEED) link_info->req_link_speed = link_info->auto_link_speed; else link_info->req_link_speed = link_info->force_link_speed; return (rc); } static void bnxt_add_media_types(struct bnxt_softc *softc) { struct bnxt_link_info *link_info = &softc->link_info; uint16_t supported; uint8_t phy_type = get_phy_type(softc); supported = link_info->support_speeds; /* Auto is always supported */ ifmedia_add(softc->media, IFM_ETHER | IFM_AUTO, 0, NULL); if (softc->flags & BNXT_FLAG_NPAR) return; switch (phy_type) { case HWRM_PORT_PHY_QCFG_OUTPUT_PHY_TYPE_100G_BASECR4: case HWRM_PORT_PHY_QCFG_OUTPUT_PHY_TYPE_40G_BASECR4: case HWRM_PORT_PHY_QCFG_OUTPUT_PHY_TYPE_25G_BASECR_CA_L: case HWRM_PORT_PHY_QCFG_OUTPUT_PHY_TYPE_25G_BASECR_CA_S: case HWRM_PORT_PHY_QCFG_OUTPUT_PHY_TYPE_25G_BASECR_CA_N: case HWRM_PORT_PHY_QCFG_OUTPUT_PHY_TYPE_BASECR: BNXT_IFMEDIA_ADD(supported, SPEEDS_100GB, IFM_100G_CR4); BNXT_IFMEDIA_ADD(supported, SPEEDS_50GB, IFM_50G_CR2); BNXT_IFMEDIA_ADD(supported, SPEEDS_40GB, IFM_40G_CR4); BNXT_IFMEDIA_ADD(supported, SPEEDS_25GB, IFM_25G_CR); BNXT_IFMEDIA_ADD(supported, SPEEDS_10GB, IFM_10G_CR1); BNXT_IFMEDIA_ADD(supported, SPEEDS_1GB, IFM_1000_T); break; case HWRM_PORT_PHY_QCFG_OUTPUT_PHY_TYPE_100G_BASELR4: case HWRM_PORT_PHY_QCFG_OUTPUT_PHY_TYPE_40G_BASELR4: case HWRM_PORT_PHY_QCFG_OUTPUT_PHY_TYPE_BASELR: BNXT_IFMEDIA_ADD(supported, SPEEDS_100GB, IFM_100G_LR4); BNXT_IFMEDIA_ADD(supported, SPEEDS_40GB, IFM_40G_LR4); BNXT_IFMEDIA_ADD(supported, SPEEDS_25GB, IFM_25G_LR); BNXT_IFMEDIA_ADD(supported, SPEEDS_10GB, IFM_10G_LR); BNXT_IFMEDIA_ADD(supported, SPEEDS_1GB, IFM_1000_LX); break; case HWRM_PORT_PHY_QCFG_OUTPUT_PHY_TYPE_100G_BASESR10: case HWRM_PORT_PHY_QCFG_OUTPUT_PHY_TYPE_100G_BASESR4: case HWRM_PORT_PHY_QCFG_OUTPUT_PHY_TYPE_40G_BASESR4: case HWRM_PORT_PHY_QCFG_OUTPUT_PHY_TYPE_BASESR: case HWRM_PORT_PHY_QCFG_OUTPUT_PHY_TYPE_40G_BASEER4: case HWRM_PORT_PHY_QCFG_OUTPUT_PHY_TYPE_100G_BASEER4: case HWRM_PORT_PHY_QCFG_OUTPUT_PHY_TYPE_25G_BASESR: case HWRM_PORT_PHY_QCFG_OUTPUT_PHY_TYPE_1G_BASESX: BNXT_IFMEDIA_ADD(supported, SPEEDS_100GB, IFM_100G_SR4); BNXT_IFMEDIA_ADD(supported, SPEEDS_40GB, IFM_40G_SR4); BNXT_IFMEDIA_ADD(supported, SPEEDS_25GB, IFM_25G_SR); BNXT_IFMEDIA_ADD(supported, SPEEDS_10GB, IFM_10G_SR); BNXT_IFMEDIA_ADD(supported, SPEEDS_1GB, IFM_1000_SX); break; case HWRM_PORT_PHY_QCFG_OUTPUT_PHY_TYPE_BASEKR4: case HWRM_PORT_PHY_QCFG_OUTPUT_PHY_TYPE_BASEKR2: case HWRM_PORT_PHY_QCFG_OUTPUT_PHY_TYPE_BASEKR: BNXT_IFMEDIA_ADD(supported, SPEEDS_100GB, IFM_100G_KR4); BNXT_IFMEDIA_ADD(supported, SPEEDS_50GB, IFM_50G_KR2); BNXT_IFMEDIA_ADD(supported, SPEEDS_40GB, IFM_40G_KR4); BNXT_IFMEDIA_ADD(supported, SPEEDS_25GB, IFM_25G_KR); BNXT_IFMEDIA_ADD(supported, SPEEDS_20GB, IFM_20G_KR2); BNXT_IFMEDIA_ADD(supported, SPEEDS_10GB, IFM_10G_KR); BNXT_IFMEDIA_ADD(supported, SPEEDS_1GB, IFM_1000_KX); break; case HWRM_PORT_PHY_QCFG_OUTPUT_PHY_TYPE_40G_ACTIVE_CABLE: BNXT_IFMEDIA_ADD(supported, SPEEDS_25GB, IFM_25G_ACC); BNXT_IFMEDIA_ADD(supported, SPEEDS_10GB, IFM_10G_AOC); break; case HWRM_PORT_PHY_QCFG_OUTPUT_PHY_TYPE_1G_BASECX: BNXT_IFMEDIA_ADD(supported, SPEEDS_1GBHD, IFM_1000_CX); break; case HWRM_PORT_PHY_QCFG_OUTPUT_PHY_TYPE_1G_BASET: case HWRM_PORT_PHY_QCFG_OUTPUT_PHY_TYPE_BASET: case HWRM_PORT_PHY_QCFG_OUTPUT_PHY_TYPE_BASETE: BNXT_IFMEDIA_ADD(supported, SPEEDS_10GB, IFM_10G_T); BNXT_IFMEDIA_ADD(supported, SPEEDS_2_5GB, IFM_2500_T); BNXT_IFMEDIA_ADD(supported, SPEEDS_1GB, IFM_1000_T); BNXT_IFMEDIA_ADD(supported, SPEEDS_100MB, IFM_100_T); BNXT_IFMEDIA_ADD(supported, SPEEDS_10MB, IFM_10_T); break; case HWRM_PORT_PHY_QCFG_OUTPUT_PHY_TYPE_BASEKX: BNXT_IFMEDIA_ADD(supported, SPEEDS_10GB, IFM_10G_KR); BNXT_IFMEDIA_ADD(supported, SPEEDS_2_5GB, IFM_2500_KX); BNXT_IFMEDIA_ADD(supported, SPEEDS_1GB, IFM_1000_KX); break; case HWRM_PORT_PHY_QCFG_OUTPUT_PHY_TYPE_SGMIIEXTPHY: BNXT_IFMEDIA_ADD(supported, SPEEDS_1GB, IFM_1000_SGMII); break; case HWRM_PORT_PHY_QCFG_OUTPUT_PHY_TYPE_UNKNOWN: /* Only Autoneg is supported for TYPE_UNKNOWN */ device_printf(softc->dev, "Unknown phy type\n"); break; default: /* Only Autoneg is supported for new phy type values */ device_printf(softc->dev, "phy type %d not supported by driver\n", phy_type); break; } return; } static int bnxt_map_bar(struct bnxt_softc *softc, struct bnxt_bar_info *bar, int bar_num, bool shareable) { uint32_t flag; if (bar->res != NULL) { device_printf(softc->dev, "Bar %d already mapped\n", bar_num); return EDOOFUS; } bar->rid = PCIR_BAR(bar_num); flag = RF_ACTIVE; if (shareable) flag |= RF_SHAREABLE; if ((bar->res = bus_alloc_resource_any(softc->dev, SYS_RES_MEMORY, &bar->rid, flag)) == NULL) { device_printf(softc->dev, "PCI BAR%d mapping failure\n", bar_num); return (ENXIO); } bar->tag = rman_get_bustag(bar->res); bar->handle = rman_get_bushandle(bar->res); bar->size = rman_get_size(bar->res); return 0; } static int bnxt_pci_mapping(struct bnxt_softc *softc) { int rc; rc = bnxt_map_bar(softc, &softc->hwrm_bar, 0, true); if (rc) return rc; rc = bnxt_map_bar(softc, &softc->doorbell_bar, 2, false); return rc; } static void bnxt_pci_mapping_free(struct bnxt_softc *softc) { if (softc->hwrm_bar.res != NULL) bus_release_resource(softc->dev, SYS_RES_MEMORY, softc->hwrm_bar.rid, softc->hwrm_bar.res); softc->hwrm_bar.res = NULL; if (softc->doorbell_bar.res != NULL) bus_release_resource(softc->dev, SYS_RES_MEMORY, softc->doorbell_bar.rid, softc->doorbell_bar.res); softc->doorbell_bar.res = NULL; } static int bnxt_update_link(struct bnxt_softc *softc, bool chng_link_state) { struct bnxt_link_info *link_info = &softc->link_info; uint8_t link_up = link_info->link_up; int rc = 0; rc = bnxt_hwrm_port_phy_qcfg(softc); if (rc) goto exit; /* TODO: need to add more logic to report VF link */ if (chng_link_state) { if (link_info->phy_link_status == HWRM_PORT_PHY_QCFG_OUTPUT_LINK_LINK) link_info->link_up = 1; else link_info->link_up = 0; if (link_up != link_info->link_up) bnxt_report_link(softc); } else { /* always link down if not require to update link state */ link_info->link_up = 0; } exit: return rc; } void bnxt_report_link(struct bnxt_softc *softc) { struct bnxt_link_info *link_info = &softc->link_info; const char *duplex = NULL, *flow_ctrl = NULL; if (link_info->link_up == link_info->last_link_up) { if (!link_info->link_up) return; if ((link_info->duplex == link_info->last_duplex) && (!(BNXT_IS_FLOW_CTRL_CHANGED(link_info)))) return; } if (link_info->link_up) { if (link_info->duplex == HWRM_PORT_PHY_QCFG_OUTPUT_DUPLEX_CFG_FULL) duplex = "full duplex"; else duplex = "half duplex"; if (link_info->flow_ctrl.tx & link_info->flow_ctrl.rx) flow_ctrl = "FC - receive & transmit"; else if (link_info->flow_ctrl.tx) flow_ctrl = "FC - transmit"; else if (link_info->flow_ctrl.rx) flow_ctrl = "FC - receive"; else flow_ctrl = "FC - none"; iflib_link_state_change(softc->ctx, LINK_STATE_UP, IF_Gbps(100)); device_printf(softc->dev, "Link is UP %s, %s - %d Mbps \n", duplex, flow_ctrl, (link_info->link_speed * 100)); } else { iflib_link_state_change(softc->ctx, LINK_STATE_DOWN, bnxt_get_baudrate(&softc->link_info)); device_printf(softc->dev, "Link is Down\n"); } link_info->last_link_up = link_info->link_up; link_info->last_duplex = link_info->duplex; link_info->last_flow_ctrl.tx = link_info->flow_ctrl.tx; link_info->last_flow_ctrl.rx = link_info->flow_ctrl.rx; link_info->last_flow_ctrl.autoneg = link_info->flow_ctrl.autoneg; /* update media types */ ifmedia_removeall(softc->media); bnxt_add_media_types(softc); ifmedia_set(softc->media, IFM_ETHER | IFM_AUTO); } static int bnxt_handle_rx_cp(void *arg) { struct bnxt_cp_ring *cpr = arg; /* Disable further interrupts for this queue */ BNXT_CP_DISABLE_DB(&cpr->ring); return FILTER_SCHEDULE_THREAD; } static int bnxt_handle_def_cp(void *arg) { struct bnxt_softc *softc = arg; BNXT_CP_DISABLE_DB(&softc->def_cp_ring.ring); GROUPTASK_ENQUEUE(&softc->def_cp_task); return FILTER_HANDLED; } static void bnxt_clear_ids(struct bnxt_softc *softc) { int i; softc->def_cp_ring.stats_ctx_id = HWRM_NA_SIGNATURE; softc->def_cp_ring.ring.phys_id = (uint16_t)HWRM_NA_SIGNATURE; for (i = 0; i < softc->ntxqsets; i++) { softc->tx_cp_rings[i].stats_ctx_id = HWRM_NA_SIGNATURE; softc->tx_cp_rings[i].ring.phys_id = (uint16_t)HWRM_NA_SIGNATURE; softc->tx_rings[i].phys_id = (uint16_t)HWRM_NA_SIGNATURE; } for (i = 0; i < softc->nrxqsets; i++) { softc->rx_cp_rings[i].stats_ctx_id = HWRM_NA_SIGNATURE; softc->rx_cp_rings[i].ring.phys_id = (uint16_t)HWRM_NA_SIGNATURE; softc->rx_rings[i].phys_id = (uint16_t)HWRM_NA_SIGNATURE; softc->ag_rings[i].phys_id = (uint16_t)HWRM_NA_SIGNATURE; softc->grp_info[i].grp_id = (uint16_t)HWRM_NA_SIGNATURE; } softc->vnic_info.filter_id = -1; softc->vnic_info.id = (uint16_t)HWRM_NA_SIGNATURE; softc->vnic_info.rss_id = (uint16_t)HWRM_NA_SIGNATURE; memset(softc->vnic_info.rss_grp_tbl.idi_vaddr, 0xff, softc->vnic_info.rss_grp_tbl.idi_size); } static void bnxt_mark_cpr_invalid(struct bnxt_cp_ring *cpr) { struct cmpl_base *cmp = (void *)cpr->ring.vaddr; int i; for (i = 0; i < cpr->ring.ring_size; i++) cmp[i].info3_v = !cpr->v_bit; } static void bnxt_handle_async_event(struct bnxt_softc *softc, struct cmpl_base *cmpl) { struct hwrm_async_event_cmpl *ae = (void *)cmpl; uint16_t async_id = le16toh(ae->event_id); struct ifmediareq ifmr; switch (async_id) { case HWRM_ASYNC_EVENT_CMPL_EVENT_ID_LINK_STATUS_CHANGE: case HWRM_ASYNC_EVENT_CMPL_EVENT_ID_LINK_SPEED_CHANGE: case HWRM_ASYNC_EVENT_CMPL_EVENT_ID_LINK_SPEED_CFG_CHANGE: bnxt_media_status(softc->ctx, &ifmr); break; case HWRM_ASYNC_EVENT_CMPL_EVENT_ID_LINK_MTU_CHANGE: case HWRM_ASYNC_EVENT_CMPL_EVENT_ID_DCB_CONFIG_CHANGE: case HWRM_ASYNC_EVENT_CMPL_EVENT_ID_PORT_CONN_NOT_ALLOWED: case HWRM_ASYNC_EVENT_CMPL_EVENT_ID_LINK_SPEED_CFG_NOT_ALLOWED: case HWRM_ASYNC_EVENT_CMPL_EVENT_ID_FUNC_DRVR_UNLOAD: case HWRM_ASYNC_EVENT_CMPL_EVENT_ID_FUNC_DRVR_LOAD: case HWRM_ASYNC_EVENT_CMPL_EVENT_ID_PF_DRVR_UNLOAD: case HWRM_ASYNC_EVENT_CMPL_EVENT_ID_PF_DRVR_LOAD: case HWRM_ASYNC_EVENT_CMPL_EVENT_ID_VF_FLR: case HWRM_ASYNC_EVENT_CMPL_EVENT_ID_VF_MAC_ADDR_CHANGE: case HWRM_ASYNC_EVENT_CMPL_EVENT_ID_PF_VF_COMM_STATUS_CHANGE: case HWRM_ASYNC_EVENT_CMPL_EVENT_ID_VF_CFG_CHANGE: case HWRM_ASYNC_EVENT_CMPL_EVENT_ID_HWRM_ERROR: device_printf(softc->dev, "Unhandled async completion type %u\n", async_id); break; default: device_printf(softc->dev, "Unknown async completion type %u\n", async_id); break; } } static void bnxt_def_cp_task(void *context) { if_ctx_t ctx = context; struct bnxt_softc *softc = iflib_get_softc(ctx); struct bnxt_cp_ring *cpr = &softc->def_cp_ring; /* Handle completions on the default completion ring */ struct cmpl_base *cmpl; uint32_t cons = cpr->cons; bool v_bit = cpr->v_bit; bool last_v_bit; uint32_t last_cons; uint16_t type; for (;;) { last_cons = cons; last_v_bit = v_bit; NEXT_CP_CONS_V(&cpr->ring, cons, v_bit); cmpl = &((struct cmpl_base *)cpr->ring.vaddr)[cons]; if (!CMP_VALID(cmpl, v_bit)) break; type = le16toh(cmpl->type) & CMPL_BASE_TYPE_MASK; switch (type) { case CMPL_BASE_TYPE_HWRM_ASYNC_EVENT: bnxt_handle_async_event(softc, cmpl); break; case CMPL_BASE_TYPE_TX_L2: case CMPL_BASE_TYPE_RX_L2: case CMPL_BASE_TYPE_RX_AGG: case CMPL_BASE_TYPE_RX_TPA_START: case CMPL_BASE_TYPE_RX_TPA_END: case CMPL_BASE_TYPE_STAT_EJECT: case CMPL_BASE_TYPE_HWRM_DONE: case CMPL_BASE_TYPE_HWRM_FWD_REQ: case CMPL_BASE_TYPE_HWRM_FWD_RESP: case CMPL_BASE_TYPE_CQ_NOTIFICATION: case CMPL_BASE_TYPE_SRQ_EVENT: case CMPL_BASE_TYPE_DBQ_EVENT: case CMPL_BASE_TYPE_QP_EVENT: case CMPL_BASE_TYPE_FUNC_EVENT: device_printf(softc->dev, "Unhandled completion type %u\n", type); break; default: device_printf(softc->dev, "Unknown completion type %u\n", type); break; } } cpr->cons = last_cons; cpr->v_bit = last_v_bit; BNXT_CP_IDX_ENABLE_DB(&cpr->ring, cpr->cons); } static uint8_t get_phy_type(struct bnxt_softc *softc) { struct bnxt_link_info *link_info = &softc->link_info; uint8_t phy_type = link_info->phy_type; uint16_t supported; if (phy_type != HWRM_PORT_PHY_QCFG_OUTPUT_PHY_TYPE_UNKNOWN) return phy_type; /* Deduce the phy type from the media type and supported speeds */ supported = link_info->support_speeds; if (link_info->media_type == HWRM_PORT_PHY_QCFG_OUTPUT_MEDIA_TYPE_TP) return HWRM_PORT_PHY_QCFG_OUTPUT_PHY_TYPE_BASET; if (link_info->media_type == HWRM_PORT_PHY_QCFG_OUTPUT_MEDIA_TYPE_DAC) { if (supported & HWRM_PORT_PHY_QCFG_OUTPUT_SUPPORT_SPEEDS_2_5GB) return HWRM_PORT_PHY_QCFG_OUTPUT_PHY_TYPE_BASEKX; if (supported & HWRM_PORT_PHY_QCFG_OUTPUT_SUPPORT_SPEEDS_20GB) return HWRM_PORT_PHY_QCFG_OUTPUT_PHY_TYPE_BASEKR; return HWRM_PORT_PHY_QCFG_OUTPUT_PHY_TYPE_BASECR; } if (link_info->media_type == HWRM_PORT_PHY_QCFG_OUTPUT_MEDIA_TYPE_FIBRE) return HWRM_PORT_PHY_QCFG_OUTPUT_PHY_TYPE_BASESR; return phy_type; } bool bnxt_check_hwrm_version(struct bnxt_softc *softc) { char buf[16]; sprintf(buf, "%hhu.%hhu.%hhu", softc->ver_info->hwrm_min_major, softc->ver_info->hwrm_min_minor, softc->ver_info->hwrm_min_update); if (softc->ver_info->hwrm_min_major > softc->ver_info->hwrm_if_major) { device_printf(softc->dev, "WARNING: HWRM version %s is too old (older than %s)\n", softc->ver_info->hwrm_if_ver, buf); return false; } else if(softc->ver_info->hwrm_min_major == softc->ver_info->hwrm_if_major) { if (softc->ver_info->hwrm_min_minor > softc->ver_info->hwrm_if_minor) { device_printf(softc->dev, "WARNING: HWRM version %s is too old (older than %s)\n", softc->ver_info->hwrm_if_ver, buf); return false; } else if (softc->ver_info->hwrm_min_minor == softc->ver_info->hwrm_if_minor) { if (softc->ver_info->hwrm_min_update > softc->ver_info->hwrm_if_update) { device_printf(softc->dev, "WARNING: HWRM version %s is too old (older than %s)\n", softc->ver_info->hwrm_if_ver, buf); return false; } } } return true; } static uint64_t bnxt_get_baudrate(struct bnxt_link_info *link) { switch (link->link_speed) { case HWRM_PORT_PHY_QCFG_OUTPUT_LINK_SPEED_100MB: return IF_Mbps(100); case HWRM_PORT_PHY_QCFG_OUTPUT_LINK_SPEED_1GB: return IF_Gbps(1); case HWRM_PORT_PHY_QCFG_OUTPUT_LINK_SPEED_2GB: return IF_Gbps(2); case HWRM_PORT_PHY_QCFG_OUTPUT_LINK_SPEED_2_5GB: return IF_Mbps(2500); case HWRM_PORT_PHY_QCFG_OUTPUT_LINK_SPEED_10GB: return IF_Gbps(10); case HWRM_PORT_PHY_QCFG_OUTPUT_LINK_SPEED_20GB: return IF_Gbps(20); case HWRM_PORT_PHY_QCFG_OUTPUT_LINK_SPEED_25GB: return IF_Gbps(25); case HWRM_PORT_PHY_QCFG_OUTPUT_LINK_SPEED_40GB: return IF_Gbps(40); case HWRM_PORT_PHY_QCFG_OUTPUT_LINK_SPEED_50GB: return IF_Gbps(50); case HWRM_PORT_PHY_QCFG_OUTPUT_LINK_SPEED_100GB: return IF_Gbps(100); case HWRM_PORT_PHY_QCFG_OUTPUT_LINK_SPEED_10MB: return IF_Mbps(10); } return IF_Gbps(100); } static void bnxt_get_wol_settings(struct bnxt_softc *softc) { uint16_t wol_handle = 0; if (!bnxt_wol_supported(softc)) return; do { wol_handle = bnxt_hwrm_get_wol_fltrs(softc, wol_handle); } while (wol_handle && wol_handle != BNXT_NO_MORE_WOL_FILTERS); } diff --git a/sys/dev/ice/if_ice_iflib.c b/sys/dev/ice/if_ice_iflib.c index dffdbc391f78..a3004b8f6c8d 100644 --- a/sys/dev/ice/if_ice_iflib.c +++ b/sys/dev/ice/if_ice_iflib.c @@ -1,2933 +1,2933 @@ /* SPDX-License-Identifier: BSD-3-Clause */ /* Copyright (c) 2020, Intel Corporation * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * 3. Neither the name of the Intel Corporation nor the names of its * contributors may be used to endorse or promote products derived from * this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ /*$FreeBSD$*/ /** * @file if_ice_iflib.c * @brief iflib driver implementation * * Contains the main entry point for the iflib driver implementation. It * implements the various ifdi driver methods, and sets up the module and * driver values to load an iflib driver. */ #include "ice_iflib.h" #include "ice_drv_info.h" #include "ice_switch.h" #include "ice_sched.h" #include #include #include #include #include /* * Device method prototypes */ static void *ice_register(device_t); static int ice_if_attach_pre(if_ctx_t); static int ice_attach_pre_recovery_mode(struct ice_softc *sc); static int ice_if_attach_post(if_ctx_t); static void ice_attach_post_recovery_mode(struct ice_softc *sc); static int ice_if_detach(if_ctx_t); static int ice_if_tx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs, int ntxqs, int ntxqsets); static int ice_if_rx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs, int nqs, int nqsets); static int ice_if_msix_intr_assign(if_ctx_t ctx, int msix); static void ice_if_queues_free(if_ctx_t ctx); static int ice_if_mtu_set(if_ctx_t ctx, uint32_t mtu); static void ice_if_intr_enable(if_ctx_t ctx); static void ice_if_intr_disable(if_ctx_t ctx); static int ice_if_rx_queue_intr_enable(if_ctx_t ctx, uint16_t rxqid); static int ice_if_tx_queue_intr_enable(if_ctx_t ctx, uint16_t txqid); static int ice_if_promisc_set(if_ctx_t ctx, int flags); static void ice_if_media_status(if_ctx_t ctx, struct ifmediareq *ifmr); static int ice_if_media_change(if_ctx_t ctx); static void ice_if_init(if_ctx_t ctx); static void ice_if_timer(if_ctx_t ctx, uint16_t qid); static void ice_if_update_admin_status(if_ctx_t ctx); static void ice_if_multi_set(if_ctx_t ctx); static void ice_if_vlan_register(if_ctx_t ctx, u16 vtag); static void ice_if_vlan_unregister(if_ctx_t ctx, u16 vtag); static void ice_if_stop(if_ctx_t ctx); static uint64_t ice_if_get_counter(if_ctx_t ctx, ift_counter counter); static int ice_if_priv_ioctl(if_ctx_t ctx, u_long command, caddr_t data); static int ice_if_i2c_req(if_ctx_t ctx, struct ifi2creq *req); static int ice_if_suspend(if_ctx_t ctx); static int ice_if_resume(if_ctx_t ctx); static int ice_msix_que(void *arg); static int ice_msix_admin(void *arg); /* * Helper function prototypes */ static int ice_pci_mapping(struct ice_softc *sc); static void ice_free_pci_mapping(struct ice_softc *sc); static void ice_update_link_status(struct ice_softc *sc, bool update_media); static void ice_init_device_features(struct ice_softc *sc); static void ice_init_tx_tracking(struct ice_vsi *vsi); static void ice_handle_reset_event(struct ice_softc *sc); static void ice_handle_pf_reset_request(struct ice_softc *sc); static void ice_prepare_for_reset(struct ice_softc *sc); static int ice_rebuild_pf_vsi_qmap(struct ice_softc *sc); static void ice_rebuild(struct ice_softc *sc); static void ice_rebuild_recovery_mode(struct ice_softc *sc); static void ice_free_irqvs(struct ice_softc *sc); static void ice_update_rx_mbuf_sz(struct ice_softc *sc); static void ice_poll_for_media_avail(struct ice_softc *sc); static void ice_setup_scctx(struct ice_softc *sc); static int ice_allocate_msix(struct ice_softc *sc); static void ice_admin_timer(void *arg); static void ice_transition_recovery_mode(struct ice_softc *sc); static void ice_transition_safe_mode(struct ice_softc *sc); /* * Device Interface Declaration */ /** * @var ice_methods * @brief ice driver method entry points * * List of device methods implementing the generic device interface used by * the device stack to interact with the ice driver. Since this is an iflib * driver, most of the methods point to the generic iflib implementation. */ static device_method_t ice_methods[] = { /* Device interface */ DEVMETHOD(device_register, ice_register), DEVMETHOD(device_probe, iflib_device_probe_vendor), DEVMETHOD(device_attach, iflib_device_attach), DEVMETHOD(device_detach, iflib_device_detach), DEVMETHOD(device_shutdown, iflib_device_shutdown), DEVMETHOD(device_suspend, iflib_device_suspend), DEVMETHOD(device_resume, iflib_device_resume), DEVMETHOD_END }; /** * @var ice_iflib_methods * @brief iflib method entry points * * List of device methods used by the iflib stack to interact with this * driver. These are the real main entry points used to interact with this * driver. */ static device_method_t ice_iflib_methods[] = { DEVMETHOD(ifdi_attach_pre, ice_if_attach_pre), DEVMETHOD(ifdi_attach_post, ice_if_attach_post), DEVMETHOD(ifdi_detach, ice_if_detach), DEVMETHOD(ifdi_tx_queues_alloc, ice_if_tx_queues_alloc), DEVMETHOD(ifdi_rx_queues_alloc, ice_if_rx_queues_alloc), DEVMETHOD(ifdi_msix_intr_assign, ice_if_msix_intr_assign), DEVMETHOD(ifdi_queues_free, ice_if_queues_free), DEVMETHOD(ifdi_mtu_set, ice_if_mtu_set), DEVMETHOD(ifdi_intr_enable, ice_if_intr_enable), DEVMETHOD(ifdi_intr_disable, ice_if_intr_disable), DEVMETHOD(ifdi_rx_queue_intr_enable, ice_if_rx_queue_intr_enable), DEVMETHOD(ifdi_tx_queue_intr_enable, ice_if_tx_queue_intr_enable), DEVMETHOD(ifdi_promisc_set, ice_if_promisc_set), DEVMETHOD(ifdi_media_status, ice_if_media_status), DEVMETHOD(ifdi_media_change, ice_if_media_change), DEVMETHOD(ifdi_init, ice_if_init), DEVMETHOD(ifdi_stop, ice_if_stop), DEVMETHOD(ifdi_timer, ice_if_timer), DEVMETHOD(ifdi_update_admin_status, ice_if_update_admin_status), DEVMETHOD(ifdi_multi_set, ice_if_multi_set), DEVMETHOD(ifdi_vlan_register, ice_if_vlan_register), DEVMETHOD(ifdi_vlan_unregister, ice_if_vlan_unregister), DEVMETHOD(ifdi_get_counter, ice_if_get_counter), DEVMETHOD(ifdi_priv_ioctl, ice_if_priv_ioctl), DEVMETHOD(ifdi_i2c_req, ice_if_i2c_req), DEVMETHOD(ifdi_suspend, ice_if_suspend), DEVMETHOD(ifdi_resume, ice_if_resume), DEVMETHOD_END }; /** * @var ice_driver * @brief driver structure for the generic device stack * * driver_t definition used to setup the generic device methods. */ static driver_t ice_driver = { .name = "ice", .methods = ice_methods, .size = sizeof(struct ice_softc), }; /** * @var ice_iflib_driver * @brief driver structure for the iflib stack * * driver_t definition used to setup the iflib device methods. */ static driver_t ice_iflib_driver = { .name = "ice", .methods = ice_iflib_methods, .size = sizeof(struct ice_softc), }; extern struct if_txrx ice_txrx; extern struct if_txrx ice_recovery_txrx; /** * @var ice_sctx * @brief ice driver shared context * * Structure defining shared values (context) that is used by all instances of * the device. Primarily used to setup details about how the iflib stack * should treat this driver. Also defines the default, minimum, and maximum * number of descriptors in each ring. */ static struct if_shared_ctx ice_sctx = { .isc_magic = IFLIB_MAGIC, .isc_q_align = PAGE_SIZE, .isc_tx_maxsize = ICE_MAX_FRAME_SIZE, /* We could technically set this as high as ICE_MAX_DMA_SEG_SIZE, but * that doesn't make sense since that would be larger than the maximum * size of a single packet. */ .isc_tx_maxsegsize = ICE_MAX_FRAME_SIZE, /* XXX: This is only used by iflib to ensure that * scctx->isc_tx_tso_size_max + the VLAN header is a valid size. */ .isc_tso_maxsize = ICE_TSO_SIZE + sizeof(struct ether_vlan_header), /* XXX: This is used by iflib to set the number of segments in the TSO * DMA tag. However, scctx->isc_tx_tso_segsize_max is used to set the * related ifnet parameter. */ .isc_tso_maxsegsize = ICE_MAX_DMA_SEG_SIZE, .isc_rx_maxsize = ICE_MAX_FRAME_SIZE, .isc_rx_nsegments = ICE_MAX_RX_SEGS, .isc_rx_maxsegsize = ICE_MAX_FRAME_SIZE, .isc_nfl = 1, .isc_ntxqs = 1, .isc_nrxqs = 1, .isc_admin_intrcnt = 1, .isc_vendor_info = ice_vendor_info_array, .isc_driver_version = __DECONST(char *, ice_driver_version), .isc_driver = &ice_iflib_driver, /* * IFLIB_NEED_SCRATCH ensures that mbufs have scratch space available * for hardware checksum offload * * IFLIB_TSO_INIT_IP ensures that the TSO packets have zeroed out the * IP sum field, required by our hardware to calculate valid TSO * checksums. * * IFLIB_ADMIN_ALWAYS_RUN ensures that the administrative task runs * even when the interface is down. * * IFLIB_SKIP_MSIX allows the driver to handle allocating MSI-X * vectors manually instead of relying on iflib code to do this. */ .isc_flags = IFLIB_NEED_SCRATCH | IFLIB_TSO_INIT_IP | IFLIB_ADMIN_ALWAYS_RUN | IFLIB_SKIP_MSIX, .isc_nrxd_min = {ICE_MIN_DESC_COUNT}, .isc_ntxd_min = {ICE_MIN_DESC_COUNT}, .isc_nrxd_max = {ICE_IFLIB_MAX_DESC_COUNT}, .isc_ntxd_max = {ICE_IFLIB_MAX_DESC_COUNT}, .isc_nrxd_default = {ICE_DEFAULT_DESC_COUNT}, .isc_ntxd_default = {ICE_DEFAULT_DESC_COUNT}, }; /** * @var ice_devclass * @brief ice driver device class * * device class used to setup the ice driver module kobject class. */ devclass_t ice_devclass; DRIVER_MODULE(ice, pci, ice_driver, ice_devclass, ice_module_event_handler, 0); MODULE_VERSION(ice, 1); MODULE_DEPEND(ice, pci, 1, 1, 1); MODULE_DEPEND(ice, ether, 1, 1, 1); MODULE_DEPEND(ice, iflib, 1, 1, 1); IFLIB_PNP_INFO(pci, ice, ice_vendor_info_array); /* Static driver-wide sysctls */ #include "ice_iflib_sysctls.h" /** * ice_pci_mapping - Map PCI BAR memory * @sc: device private softc * * Map PCI BAR 0 for device operation. */ static int ice_pci_mapping(struct ice_softc *sc) { int rc; /* Map BAR0 */ rc = ice_map_bar(sc->dev, &sc->bar0, 0); if (rc) return rc; return 0; } /** * ice_free_pci_mapping - Release PCI BAR memory * @sc: device private softc * * Release PCI BARs which were previously mapped by ice_pci_mapping(). */ static void ice_free_pci_mapping(struct ice_softc *sc) { /* Free BAR0 */ ice_free_bar(sc->dev, &sc->bar0); } /* * Device methods */ /** * ice_register - register device method callback * @dev: the device being registered * * Returns a pointer to the shared context structure, which is used by iflib. */ static void * ice_register(device_t dev __unused) { return &ice_sctx; } /* ice_register */ /** * ice_setup_scctx - Setup the iflib softc context structure * @sc: the device private structure * * Setup the parameters in if_softc_ctx_t structure used by the iflib stack * when loading. */ static void ice_setup_scctx(struct ice_softc *sc) { if_softc_ctx_t scctx = sc->scctx; struct ice_hw *hw = &sc->hw; bool safe_mode, recovery_mode; safe_mode = ice_is_bit_set(sc->feat_en, ICE_FEATURE_SAFE_MODE); recovery_mode = ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE); /* * If the driver loads in Safe mode or Recovery mode, limit iflib to * a single queue pair. */ if (safe_mode || recovery_mode) { scctx->isc_ntxqsets = scctx->isc_nrxqsets = 1; scctx->isc_ntxqsets_max = 1; scctx->isc_nrxqsets_max = 1; } else { /* * iflib initially sets the isc_ntxqsets and isc_nrxqsets to * the values of the override sysctls. Cache these initial * values so that the driver can be aware of what the iflib * sysctl value is when setting up MSI-X vectors. */ sc->ifc_sysctl_ntxqs = scctx->isc_ntxqsets; sc->ifc_sysctl_nrxqs = scctx->isc_nrxqsets; if (scctx->isc_ntxqsets == 0) scctx->isc_ntxqsets = hw->func_caps.common_cap.rss_table_size; if (scctx->isc_nrxqsets == 0) scctx->isc_nrxqsets = hw->func_caps.common_cap.rss_table_size; scctx->isc_ntxqsets_max = hw->func_caps.common_cap.num_txq; scctx->isc_nrxqsets_max = hw->func_caps.common_cap.num_rxq; /* * Sanity check that the iflib sysctl values are within the * maximum supported range. */ if (sc->ifc_sysctl_ntxqs > scctx->isc_ntxqsets_max) sc->ifc_sysctl_ntxqs = scctx->isc_ntxqsets_max; if (sc->ifc_sysctl_nrxqs > scctx->isc_nrxqsets_max) sc->ifc_sysctl_nrxqs = scctx->isc_nrxqsets_max; } scctx->isc_txqsizes[0] = roundup2(scctx->isc_ntxd[0] * sizeof(struct ice_tx_desc), DBA_ALIGN); scctx->isc_rxqsizes[0] = roundup2(scctx->isc_nrxd[0] * sizeof(union ice_32b_rx_flex_desc), DBA_ALIGN); scctx->isc_tx_nsegments = ICE_MAX_TX_SEGS; scctx->isc_tx_tso_segments_max = ICE_MAX_TSO_SEGS; scctx->isc_tx_tso_size_max = ICE_TSO_SIZE; scctx->isc_tx_tso_segsize_max = ICE_MAX_DMA_SEG_SIZE; scctx->isc_msix_bar = PCIR_BAR(ICE_MSIX_BAR); scctx->isc_rss_table_size = hw->func_caps.common_cap.rss_table_size; /* * If the driver loads in recovery mode, disable Tx/Rx functionality */ if (recovery_mode) scctx->isc_txrx = &ice_recovery_txrx; else scctx->isc_txrx = &ice_txrx; /* * If the driver loads in Safe mode or Recovery mode, disable * advanced features including hardware offloads. */ if (safe_mode || recovery_mode) { scctx->isc_capenable = ICE_SAFE_CAPS; scctx->isc_tx_csum_flags = 0; } else { scctx->isc_capenable = ICE_FULL_CAPS; scctx->isc_tx_csum_flags = ICE_CSUM_OFFLOAD; } scctx->isc_capabilities = scctx->isc_capenable; } /* ice_setup_scctx */ /** * ice_if_attach_pre - Early device attach logic * @ctx: the iflib context structure * * Called by iflib during the attach process. Earliest main driver entry * point which performs necessary hardware and driver initialization. Called * before the Tx and Rx queues are allocated. */ static int ice_if_attach_pre(if_ctx_t ctx) { struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx); enum ice_fw_modes fw_mode; enum ice_status status; if_softc_ctx_t scctx; struct ice_hw *hw; device_t dev; int err; device_printf(iflib_get_dev(ctx), "Loading the iflib ice driver\n"); sc->ctx = ctx; sc->media = iflib_get_media(ctx); sc->sctx = iflib_get_sctx(ctx); sc->iflib_ctx_lock = iflib_ctx_lock_get(ctx); dev = sc->dev = iflib_get_dev(ctx); scctx = sc->scctx = iflib_get_softc_ctx(ctx); hw = &sc->hw; hw->back = sc; snprintf(sc->admin_mtx_name, sizeof(sc->admin_mtx_name), "%s:admin", device_get_nameunit(dev)); mtx_init(&sc->admin_mtx, sc->admin_mtx_name, NULL, MTX_DEF); callout_init_mtx(&sc->admin_timer, &sc->admin_mtx, 0); ASSERT_CTX_LOCKED(sc); if (ice_pci_mapping(sc)) { err = (ENXIO); goto destroy_admin_timer; } /* Save off the PCI information */ ice_save_pci_info(hw, dev); /* create tunables as early as possible */ ice_add_device_tunables(sc); /* Setup ControlQ lengths */ ice_set_ctrlq_len(hw); fw_mode = ice_get_fw_mode(hw); if (fw_mode == ICE_FW_MODE_REC) { device_printf(dev, "Firmware recovery mode detected. Limiting functionality. Refer to Intel(R) Ethernet Adapters and Devices User Guide for details on firmware recovery mode.\n"); err = ice_attach_pre_recovery_mode(sc); if (err) goto free_pci_mapping; return (0); } /* Initialize the hw data structure */ status = ice_init_hw(hw); if (status) { if (status == ICE_ERR_FW_API_VER) { /* Enter recovery mode, so that the driver remains * loaded. This way, if the system administrator * cannot update the driver, they may still attempt to * downgrade the NVM. */ err = ice_attach_pre_recovery_mode(sc); if (err) goto free_pci_mapping; return (0); } else { err = EIO; device_printf(dev, "Unable to initialize hw, err %s aq_err %s\n", ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status)); } goto free_pci_mapping; } /* Notify firmware of the device driver version */ err = ice_send_version(sc); if (err) goto deinit_hw; ice_load_pkg_file(sc); err = ice_init_link_events(sc); if (err) { device_printf(dev, "ice_init_link_events failed: %s\n", ice_err_str(err)); goto deinit_hw; } ice_print_nvm_version(sc); ice_init_device_features(sc); /* Setup the MAC address */ iflib_set_mac(ctx, hw->port_info->mac.lan_addr); /* Setup the iflib softc context structure */ ice_setup_scctx(sc); /* Initialize the Tx queue manager */ err = ice_resmgr_init(&sc->tx_qmgr, hw->func_caps.common_cap.num_txq); if (err) { device_printf(dev, "Unable to initialize Tx queue manager: %s\n", ice_err_str(err)); goto deinit_hw; } /* Initialize the Rx queue manager */ err = ice_resmgr_init(&sc->rx_qmgr, hw->func_caps.common_cap.num_rxq); if (err) { device_printf(dev, "Unable to initialize Rx queue manager: %s\n", ice_err_str(err)); goto free_tx_qmgr; } /* Initialize the interrupt resource manager */ err = ice_alloc_intr_tracking(sc); if (err) /* Errors are already printed */ goto free_rx_qmgr; /* Determine maximum number of VSIs we'll prepare for */ sc->num_available_vsi = min(ICE_MAX_VSI_AVAILABLE, hw->func_caps.guar_num_vsi); if (!sc->num_available_vsi) { err = EIO; device_printf(dev, "No VSIs allocated to host\n"); goto free_intr_tracking; } /* Allocate storage for the VSI pointers */ sc->all_vsi = (struct ice_vsi **) malloc(sizeof(struct ice_vsi *) * sc->num_available_vsi, M_ICE, M_WAITOK | M_ZERO); if (!sc->all_vsi) { err = ENOMEM; device_printf(dev, "Unable to allocate VSI array\n"); goto free_intr_tracking; } /* * Prepare the statically allocated primary PF VSI in the softc * structure. Other VSIs will be dynamically allocated as needed. */ ice_setup_pf_vsi(sc); err = ice_alloc_vsi_qmap(&sc->pf_vsi, scctx->isc_ntxqsets_max, scctx->isc_nrxqsets_max); if (err) { device_printf(dev, "Unable to allocate VSI Queue maps\n"); goto free_main_vsi; } /* Allocate MSI-X vectors (due to isc_flags IFLIB_SKIP_MSIX) */ err = ice_allocate_msix(sc); if (err) goto free_main_vsi; return 0; free_main_vsi: /* ice_release_vsi will free the queue maps if they were allocated */ ice_release_vsi(&sc->pf_vsi); free(sc->all_vsi, M_ICE); sc->all_vsi = NULL; free_intr_tracking: ice_free_intr_tracking(sc); free_rx_qmgr: ice_resmgr_destroy(&sc->rx_qmgr); free_tx_qmgr: ice_resmgr_destroy(&sc->tx_qmgr); deinit_hw: ice_deinit_hw(hw); free_pci_mapping: ice_free_pci_mapping(sc); destroy_admin_timer: mtx_lock(&sc->admin_mtx); callout_stop(&sc->admin_timer); mtx_unlock(&sc->admin_mtx); mtx_destroy(&sc->admin_mtx); return err; } /* ice_if_attach_pre */ /** * ice_attach_pre_recovery_mode - Limited driver attach_pre for FW recovery * @sc: the device private softc * * Loads the device driver in limited Firmware Recovery mode, intended to * allow users to update the firmware to attempt to recover the device. * * @remark We may enter recovery mode in case either (a) the firmware is * detected to be in an invalid state and must be re-programmed, or (b) the * driver detects that the loaded firmware has a non-compatible API version * that the driver cannot operate with. */ static int ice_attach_pre_recovery_mode(struct ice_softc *sc) { ice_set_state(&sc->state, ICE_STATE_RECOVERY_MODE); /* Setup the iflib softc context */ ice_setup_scctx(sc); /* Setup the PF VSI back pointer */ sc->pf_vsi.sc = sc; /* * We still need to allocate MSI-X vectors since we need one vector to * run the administrative admin interrupt */ return ice_allocate_msix(sc); } /** * ice_update_link_status - notify OS of link state change * @sc: device private softc structure * @update_media: true if we should update media even if link didn't change * * Called to notify iflib core of link status changes. Should be called once * during attach_post, and whenever link status changes during runtime. * * This call only updates the currently supported media types if the link * status changed, or if update_media is set to true. */ static void ice_update_link_status(struct ice_softc *sc, bool update_media) { struct ice_hw *hw = &sc->hw; enum ice_status status; /* Never report link up when in recovery mode */ if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE)) return; /* Report link status to iflib only once each time it changes */ if (!ice_testandset_state(&sc->state, ICE_STATE_LINK_STATUS_REPORTED)) { if (sc->link_up) { /* link is up */ uint64_t baudrate = ice_aq_speed_to_rate(sc->hw.port_info); ice_set_default_local_lldp_mib(sc); iflib_link_state_change(sc->ctx, LINK_STATE_UP, baudrate); ice_link_up_msg(sc); update_media = true; } else { /* link is down */ iflib_link_state_change(sc->ctx, LINK_STATE_DOWN, 0); update_media = true; } } /* Update the supported media types */ if (update_media) { status = ice_add_media_types(sc, sc->media); if (status) device_printf(sc->dev, "Error adding device media types: %s aq_err %s\n", ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status)); } /* TODO: notify VFs of link state change */ } /** * ice_if_attach_post - Late device attach logic * @ctx: the iflib context structure * * Called by iflib to finish up attaching the device. Performs any attach * logic which must wait until after the Tx and Rx queues have been * allocated. */ static int ice_if_attach_post(if_ctx_t ctx) { struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx); if_t ifp = iflib_get_ifp(ctx); int err; ASSERT_CTX_LOCKED(sc); /* We don't yet support loading if MSI-X is not supported */ if (sc->scctx->isc_intr != IFLIB_INTR_MSIX) { device_printf(sc->dev, "The ice driver does not support loading without MSI-X\n"); return (ENOTSUP); } /* The ifnet structure hasn't yet been initialized when the attach_pre * handler is called, so wait until attach_post to setup the * isc_max_frame_size. */ sc->ifp = ifp; sc->scctx->isc_max_frame_size = ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN + ETHER_VLAN_ENCAP_LEN; /* * If we are in recovery mode, only perform a limited subset of * initialization to support NVM recovery. */ if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE)) { ice_attach_post_recovery_mode(sc); return (0); } sc->pf_vsi.max_frame_size = sc->scctx->isc_max_frame_size; err = ice_initialize_vsi(&sc->pf_vsi); if (err) { device_printf(sc->dev, "Unable to initialize Main VSI: %s\n", ice_err_str(err)); return err; } /* Configure the main PF VSI for RSS */ err = ice_config_rss(&sc->pf_vsi); if (err) { device_printf(sc->dev, "Unable to configure RSS for the main VSI, err %s\n", ice_err_str(err)); return err; } /* Configure switch to drop transmitted LLDP and PAUSE frames */ err = ice_cfg_pf_ethertype_filters(sc); if (err) return err; ice_get_and_print_bus_info(sc); ice_set_link_management_mode(sc); ice_init_saved_phy_cfg(sc); ice_add_device_sysctls(sc); /* Get DCBX/LLDP state and start DCBX agent */ ice_init_dcb_setup(sc); /* Setup link configuration parameters */ ice_init_link_configuration(sc); ice_update_link_status(sc, true); /* Configure interrupt causes for the administrative interrupt */ ice_configure_misc_interrupts(sc); /* Enable ITR 0 right away, so that we can handle admin interrupts */ ice_enable_intr(&sc->hw, sc->irqvs[0].me); /* Start the admin timer */ mtx_lock(&sc->admin_mtx); callout_reset(&sc->admin_timer, hz/2, ice_admin_timer, sc); mtx_unlock(&sc->admin_mtx); return 0; } /* ice_if_attach_post */ /** * ice_attach_post_recovery_mode - Limited driver attach_post for FW recovery * @sc: the device private softc * * Performs minimal work to prepare the driver to recover an NVM in case the * firmware is in recovery mode. */ static void ice_attach_post_recovery_mode(struct ice_softc *sc) { /* Configure interrupt causes for the administrative interrupt */ ice_configure_misc_interrupts(sc); /* Enable ITR 0 right away, so that we can handle admin interrupts */ ice_enable_intr(&sc->hw, sc->irqvs[0].me); /* Start the admin timer */ mtx_lock(&sc->admin_mtx); callout_reset(&sc->admin_timer, hz/2, ice_admin_timer, sc); mtx_unlock(&sc->admin_mtx); } /** * ice_free_irqvs - Free IRQ vector memory * @sc: the device private softc structure * * Free IRQ vector memory allocated during ice_if_msix_intr_assign. */ static void ice_free_irqvs(struct ice_softc *sc) { struct ice_vsi *vsi = &sc->pf_vsi; if_ctx_t ctx = sc->ctx; int i; /* If the irqvs array is NULL, then there are no vectors to free */ if (sc->irqvs == NULL) return; /* Free the IRQ vectors */ for (i = 0; i < sc->num_irq_vectors; i++) iflib_irq_free(ctx, &sc->irqvs[i].irq); /* Clear the irqv pointers */ for (i = 0; i < vsi->num_rx_queues; i++) vsi->rx_queues[i].irqv = NULL; for (i = 0; i < vsi->num_tx_queues; i++) vsi->tx_queues[i].irqv = NULL; /* Release the vector array memory */ free(sc->irqvs, M_ICE); sc->irqvs = NULL; sc->num_irq_vectors = 0; } /** * ice_if_detach - Device driver detach logic * @ctx: iflib context structure * * Perform device shutdown logic to detach the device driver. * * Note that there is no guarantee of the ordering of ice_if_queues_free() and * ice_if_detach(). It is possible for the functions to be called in either * order, and they must not assume to have a strict ordering. */ static int ice_if_detach(if_ctx_t ctx) { struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx); struct ice_vsi *vsi = &sc->pf_vsi; int i; ASSERT_CTX_LOCKED(sc); /* Indicate that we're detaching */ ice_set_state(&sc->state, ICE_STATE_DETACHING); /* Stop the admin timer */ mtx_lock(&sc->admin_mtx); callout_stop(&sc->admin_timer); mtx_unlock(&sc->admin_mtx); mtx_destroy(&sc->admin_mtx); /* Free allocated media types */ ifmedia_removeall(sc->media); /* Free the Tx and Rx sysctl contexts, and assign NULL to the node * pointers. Note, the calls here and those in ice_if_queues_free() * are *BOTH* necessary, as we cannot guarantee which path will be * run first */ ice_vsi_del_txqs_ctx(vsi); ice_vsi_del_rxqs_ctx(vsi); /* Release MSI-X resources */ ice_free_irqvs(sc); for (i = 0; i < sc->num_available_vsi; i++) { if (sc->all_vsi[i]) ice_release_vsi(sc->all_vsi[i]); } if (sc->all_vsi) { free(sc->all_vsi, M_ICE); sc->all_vsi = NULL; } /* Release MSI-X memory */ pci_release_msi(sc->dev); if (sc->msix_table != NULL) { bus_release_resource(sc->dev, SYS_RES_MEMORY, rman_get_rid(sc->msix_table), sc->msix_table); sc->msix_table = NULL; } ice_free_intr_tracking(sc); /* Destroy the queue managers */ ice_resmgr_destroy(&sc->tx_qmgr); ice_resmgr_destroy(&sc->rx_qmgr); if (!ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE)) ice_deinit_hw(&sc->hw); ice_free_pci_mapping(sc); return 0; } /* ice_if_detach */ /** * ice_if_tx_queues_alloc - Allocate Tx queue memory * @ctx: iflib context structure * @vaddrs: virtual addresses for the queue memory * @paddrs: physical addresses for the queue memory * @ntxqs: the number of Tx queues per set (should always be 1) * @ntxqsets: the number of Tx queue sets to allocate * * Called by iflib to allocate Tx queues for the device. Allocates driver * memory to track each queue, the status arrays used for descriptor * status reporting, and Tx queue sysctls. */ static int ice_if_tx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs, int __invariant_only ntxqs, int ntxqsets) { struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx); struct ice_vsi *vsi = &sc->pf_vsi; struct ice_tx_queue *txq; int err, i, j; MPASS(ntxqs == 1); MPASS(sc->scctx->isc_ntxd[0] <= ICE_MAX_DESC_COUNT); ASSERT_CTX_LOCKED(sc); /* Do not bother allocating queues if we're in recovery mode */ if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE)) return (0); /* Allocate queue structure memory */ if (!(vsi->tx_queues = (struct ice_tx_queue *) malloc(sizeof(struct ice_tx_queue) * ntxqsets, M_ICE, M_WAITOK | M_ZERO))) { device_printf(sc->dev, "Unable to allocate Tx queue memory\n"); return (ENOMEM); } /* Allocate report status arrays */ for (i = 0, txq = vsi->tx_queues; i < ntxqsets; i++, txq++) { if (!(txq->tx_rsq = (uint16_t *) malloc(sizeof(uint16_t) * sc->scctx->isc_ntxd[0], M_ICE, M_WAITOK))) { device_printf(sc->dev, "Unable to allocate tx_rsq memory\n"); err = ENOMEM; goto free_tx_queues; } /* Initialize report status array */ for (j = 0; j < sc->scctx->isc_ntxd[0]; j++) txq->tx_rsq[j] = QIDX_INVALID; } /* Assign queues from PF space to the main VSI */ err = ice_resmgr_assign_contiguous(&sc->tx_qmgr, vsi->tx_qmap, ntxqsets); if (err) { device_printf(sc->dev, "Unable to assign PF queues: %s\n", ice_err_str(err)); goto free_tx_queues; } vsi->qmap_type = ICE_RESMGR_ALLOC_CONTIGUOUS; /* Add Tx queue sysctls context */ ice_vsi_add_txqs_ctx(vsi); for (i = 0, txq = vsi->tx_queues; i < ntxqsets; i++, txq++) { txq->me = i; txq->vsi = vsi; /* store the queue size for easier access */ txq->desc_count = sc->scctx->isc_ntxd[0]; /* get the virtual and physical address of the hardware queues */ txq->tail = QTX_COMM_DBELL(vsi->tx_qmap[i]); txq->tx_base = (struct ice_tx_desc *)vaddrs[i]; txq->tx_paddr = paddrs[i]; ice_add_txq_sysctls(txq); } vsi->num_tx_queues = ntxqsets; return (0); free_tx_queues: for (i = 0, txq = vsi->tx_queues; i < ntxqsets; i++, txq++) { if (txq->tx_rsq != NULL) { free(txq->tx_rsq, M_ICE); txq->tx_rsq = NULL; } } free(vsi->tx_queues, M_ICE); vsi->tx_queues = NULL; return err; } /** * ice_if_rx_queues_alloc - Allocate Rx queue memory * @ctx: iflib context structure * @vaddrs: virtual addresses for the queue memory * @paddrs: physical addresses for the queue memory * @nrxqs: number of Rx queues per set (should always be 1) * @nrxqsets: number of Rx queue sets to allocate * * Called by iflib to allocate Rx queues for the device. Allocates driver * memory to track each queue, as well as sets up the Rx queue sysctls. */ static int ice_if_rx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs, int __invariant_only nrxqs, int nrxqsets) { struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx); struct ice_vsi *vsi = &sc->pf_vsi; struct ice_rx_queue *rxq; int err, i; MPASS(nrxqs == 1); MPASS(sc->scctx->isc_nrxd[0] <= ICE_MAX_DESC_COUNT); ASSERT_CTX_LOCKED(sc); /* Do not bother allocating queues if we're in recovery mode */ if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE)) return (0); /* Allocate queue structure memory */ if (!(vsi->rx_queues = (struct ice_rx_queue *) malloc(sizeof(struct ice_rx_queue) * nrxqsets, M_ICE, M_WAITOK | M_ZERO))) { device_printf(sc->dev, "Unable to allocate Rx queue memory\n"); return (ENOMEM); } /* Assign queues from PF space to the main VSI */ err = ice_resmgr_assign_contiguous(&sc->rx_qmgr, vsi->rx_qmap, nrxqsets); if (err) { device_printf(sc->dev, "Unable to assign PF queues: %s\n", ice_err_str(err)); goto free_rx_queues; } vsi->qmap_type = ICE_RESMGR_ALLOC_CONTIGUOUS; /* Add Rx queue sysctls context */ ice_vsi_add_rxqs_ctx(vsi); for (i = 0, rxq = vsi->rx_queues; i < nrxqsets; i++, rxq++) { rxq->me = i; rxq->vsi = vsi; /* store the queue size for easier access */ rxq->desc_count = sc->scctx->isc_nrxd[0]; /* get the virtual and physical address of the hardware queues */ rxq->tail = QRX_TAIL(vsi->rx_qmap[i]); rxq->rx_base = (union ice_32b_rx_flex_desc *)vaddrs[i]; rxq->rx_paddr = paddrs[i]; ice_add_rxq_sysctls(rxq); } vsi->num_rx_queues = nrxqsets; return (0); free_rx_queues: free(vsi->rx_queues, M_ICE); vsi->rx_queues = NULL; return err; } /** * ice_if_queues_free - Free queue memory * @ctx: the iflib context structure * * Free queue memory allocated by ice_if_tx_queues_alloc() and * ice_if_rx_queues_alloc(). * * There is no guarantee that ice_if_queues_free() and ice_if_detach() will be * called in the same order. It's possible for ice_if_queues_free() to be * called prior to ice_if_detach(), and vice versa. * * For this reason, the main VSI is a static member of the ice_softc, which is * not free'd until after iflib finishes calling both of these functions. * * Thus, care must be taken in how we manage the memory being freed by this * function, and in what tasks it can and must perform. */ static void ice_if_queues_free(if_ctx_t ctx) { struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx); struct ice_vsi *vsi = &sc->pf_vsi; struct ice_tx_queue *txq; int i; /* Free the Tx and Rx sysctl contexts, and assign NULL to the node * pointers. Note, the calls here and those in ice_if_detach() * are *BOTH* necessary, as we cannot guarantee which path will be * run first */ ice_vsi_del_txqs_ctx(vsi); ice_vsi_del_rxqs_ctx(vsi); /* Release MSI-X IRQ vectors, if not yet released in ice_if_detach */ ice_free_irqvs(sc); if (vsi->tx_queues != NULL) { /* free the tx_rsq arrays */ for (i = 0, txq = vsi->tx_queues; i < vsi->num_tx_queues; i++, txq++) { if (txq->tx_rsq != NULL) { free(txq->tx_rsq, M_ICE); txq->tx_rsq = NULL; } } free(vsi->tx_queues, M_ICE); vsi->tx_queues = NULL; vsi->num_tx_queues = 0; } if (vsi->rx_queues != NULL) { free(vsi->rx_queues, M_ICE); vsi->rx_queues = NULL; vsi->num_rx_queues = 0; } } /** * ice_msix_que - Fast interrupt handler for MSI-X receive queues * @arg: The Rx queue memory * * Interrupt filter function for iflib MSI-X interrupts. Called by iflib when * an MSI-X interrupt for a given queue is triggered. Currently this just asks * iflib to schedule the main Rx thread. */ static int ice_msix_que(void *arg) { struct ice_rx_queue __unused *rxq = (struct ice_rx_queue *)arg; /* TODO: dynamic ITR algorithm?? */ return (FILTER_SCHEDULE_THREAD); } /** * ice_msix_admin - Fast interrupt handler for MSI-X admin interrupt * @arg: pointer to device softc memory * * Called by iflib when an administrative interrupt occurs. Should perform any * fast logic for handling the interrupt cause, and then indicate whether the * admin task needs to be queued. */ static int ice_msix_admin(void *arg) { struct ice_softc *sc = (struct ice_softc *)arg; struct ice_hw *hw = &sc->hw; device_t dev = sc->dev; u32 oicr; /* There is no safe way to modify the enabled miscellaneous causes of * the OICR vector at runtime, as doing so would be prone to race * conditions. Reading PFINT_OICR will unmask the associated interrupt * causes and allow future interrupts to occur. The admin interrupt * vector will not be re-enabled until after we exit this function, * but any delayed tasks must be resilient against possible "late * arrival" interrupts that occur while we're already handling the * task. This is done by using state bits and serializing these * delayed tasks via the admin status task function. */ oicr = rd32(hw, PFINT_OICR); /* Processing multiple controlq interrupts on a single vector does not * provide an indication of which controlq triggered the interrupt. * We might try reading the INTEVENT bit of the respective PFINT_*_CTL * registers. However, the INTEVENT bit is not guaranteed to be set as * it gets automatically cleared when the hardware acknowledges the * interrupt. * * This means we don't really have a good indication of whether or * which controlq triggered this interrupt. We'll just notify the * admin task that it should check all the controlqs. */ ice_set_state(&sc->state, ICE_STATE_CONTROLQ_EVENT_PENDING); if (oicr & PFINT_OICR_VFLR_M) { ice_set_state(&sc->state, ICE_STATE_VFLR_PENDING); } if (oicr & PFINT_OICR_MAL_DETECT_M) { ice_set_state(&sc->state, ICE_STATE_MDD_PENDING); } if (oicr & PFINT_OICR_GRST_M) { u32 reset; reset = (rd32(hw, GLGEN_RSTAT) & GLGEN_RSTAT_RESET_TYPE_M) >> GLGEN_RSTAT_RESET_TYPE_S; if (reset == ICE_RESET_CORER) sc->soft_stats.corer_count++; else if (reset == ICE_RESET_GLOBR) sc->soft_stats.globr_count++; else sc->soft_stats.empr_count++; /* There are a couple of bits at play for handling resets. * First, the ICE_STATE_RESET_OICR_RECV bit is used to * indicate that the driver has received an OICR with a reset * bit active, indicating that a CORER/GLOBR/EMPR is about to * happen. Second, we set hw->reset_ongoing to indicate that * the hardware is in reset. We will set this back to false as * soon as the driver has determined that the hardware is out * of reset. * * If the driver wishes to trigger a reqest, it can set one of * the ICE_STATE_RESET_*_REQ bits, which will trigger the * correct type of reset. */ if (!ice_testandset_state(&sc->state, ICE_STATE_RESET_OICR_RECV)) hw->reset_ongoing = true; } if (oicr & PFINT_OICR_ECC_ERR_M) { device_printf(dev, "ECC Error detected!\n"); ice_set_state(&sc->state, ICE_STATE_RESET_PFR_REQ); } if (oicr & PFINT_OICR_PE_CRITERR_M) { device_printf(dev, "Critical Protocol Engine Error detected!\n"); ice_set_state(&sc->state, ICE_STATE_RESET_PFR_REQ); } if (oicr & PFINT_OICR_PCI_EXCEPTION_M) { device_printf(dev, "PCI Exception detected!\n"); ice_set_state(&sc->state, ICE_STATE_RESET_PFR_REQ); } if (oicr & PFINT_OICR_HMC_ERR_M) { /* Log the HMC errors, but don't disable the interrupt cause */ ice_log_hmc_error(hw, dev); } return (FILTER_SCHEDULE_THREAD); } /** * ice_allocate_msix - Allocate MSI-X vectors for the interface * @sc: the device private softc * * Map the MSI-X bar, and then request MSI-X vectors in a two-stage process. * * First, determine a suitable total number of vectors based on the number * of CPUs, RSS buckets, the administrative vector, and other demands such as * RDMA. * * Request the desired amount of vectors, and see how many we obtain. If we * don't obtain as many as desired, reduce the demands by lowering the number * of requested queues or reducing the demand from other features such as * RDMA. * * @remark This function is required because the driver sets the * IFLIB_SKIP_MSIX flag indicating that the driver will manage MSI-X vectors * manually. * * @remark This driver will only use MSI-X vectors. If this is not possible, * neither MSI or legacy interrupts will be tried. * * @post on success this function must set the following scctx parameters: * isc_vectors, isc_nrxqsets, isc_ntxqsets, and isc_intr. * * @returns zero on success or an error code on failure. */ static int ice_allocate_msix(struct ice_softc *sc) { bool iflib_override_queue_count = false; if_softc_ctx_t scctx = sc->scctx; device_t dev = sc->dev; cpuset_t cpus; int bar, queues, vectors, requested; int err = 0; /* Allocate the MSI-X bar */ bar = scctx->isc_msix_bar; sc->msix_table = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &bar, RF_ACTIVE); if (!sc->msix_table) { device_printf(dev, "Unable to map MSI-X table\n"); return (ENOMEM); } /* Check if the iflib queue count sysctls have been set */ if (sc->ifc_sysctl_ntxqs || sc->ifc_sysctl_nrxqs) iflib_override_queue_count = true; err = bus_get_cpus(dev, INTR_CPUS, sizeof(cpus), &cpus); if (err) { device_printf(dev, "%s: Unable to fetch the CPU list: %s\n", __func__, ice_err_str(err)); CPU_COPY(&all_cpus, &cpus); } /* Attempt to mimic behavior of iflib_msix_init */ if (iflib_override_queue_count) { /* * If the override sysctls have been set, limit the queues to * the number of logical CPUs. */ queues = mp_ncpus; } else { /* * Otherwise, limit the queue count to the CPUs associated * with the NUMA node the device is associated with. */ queues = CPU_COUNT(&cpus); } /* Clamp to the number of RSS buckets */ queues = imin(queues, rss_getnumbuckets()); /* * Clamp the number of queue pairs to the minimum of the requested Tx * and Rx queues. */ queues = imin(queues, sc->ifc_sysctl_ntxqs ?: scctx->isc_ntxqsets); queues = imin(queues, sc->ifc_sysctl_nrxqs ?: scctx->isc_nrxqsets); /* * Determine the number of vectors to request. Note that we also need * to allocate one vector for administrative tasks. */ requested = queues + 1; vectors = requested; err = pci_alloc_msix(dev, &vectors); if (err) { device_printf(dev, "Failed to allocate %d MSI-X vectors, err %s\n", vectors, ice_err_str(err)); goto err_free_msix_table; } /* If we don't receive enough vectors, reduce demands */ if (vectors < requested) { int diff = requested - vectors; device_printf(dev, "Requested %d MSI-X vectors, but got only %d\n", requested, vectors); /* * If we still have a difference, we need to reduce the number * of queue pairs. * * However, we still need at least one vector for the admin * interrupt and one queue pair. */ if (queues <= diff) { device_printf(dev, "Unable to allocate sufficient MSI-X vectors\n"); err = (ERANGE); goto err_pci_release_msi; } queues -= diff; } device_printf(dev, "Using %d Tx and Rx queues\n", queues); device_printf(dev, "Using MSI-X interrupts with %d vectors\n", vectors); scctx->isc_vectors = vectors; scctx->isc_nrxqsets = queues; scctx->isc_ntxqsets = queues; scctx->isc_intr = IFLIB_INTR_MSIX; /* Interrupt allocation tracking isn't required in recovery mode, * since neither RDMA nor VFs are enabled. */ if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE)) return (0); /* Keep track of which interrupt indices are being used for what */ sc->lan_vectors = vectors; err = ice_resmgr_assign_contiguous(&sc->imgr, sc->pf_imap, sc->lan_vectors); if (err) { device_printf(dev, "Unable to assign PF interrupt mapping: %s\n", ice_err_str(err)); goto err_pci_release_msi; } return (0); err_pci_release_msi: pci_release_msi(dev); err_free_msix_table: if (sc->msix_table != NULL) { bus_release_resource(sc->dev, SYS_RES_MEMORY, rman_get_rid(sc->msix_table), sc->msix_table); sc->msix_table = NULL; } return (err); } /** * ice_if_msix_intr_assign - Assign MSI-X interrupt vectors to queues * @ctx: the iflib context structure * @msix: the number of vectors we were assigned * * Called by iflib to assign MSI-X vectors to queues. Currently requires that * we get at least the same number of vectors as we have queues, and that we * always have the same number of Tx and Rx queues. * * Tx queues use a softirq instead of using their own hardware interrupt. */ static int ice_if_msix_intr_assign(if_ctx_t ctx, int msix) { struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx); struct ice_vsi *vsi = &sc->pf_vsi; int err, i, vector; ASSERT_CTX_LOCKED(sc); if (vsi->num_rx_queues != vsi->num_tx_queues) { device_printf(sc->dev, "iflib requested %d Tx queues, and %d Rx queues, but the driver isn't able to support a differing number of Tx and Rx queues\n", vsi->num_tx_queues, vsi->num_rx_queues); return (EOPNOTSUPP); } if (msix < (vsi->num_rx_queues + 1)) { device_printf(sc->dev, "Not enough MSI-X vectors to assign one vector to each queue pair\n"); return (EOPNOTSUPP); } /* Save the number of vectors for future use */ sc->num_irq_vectors = vsi->num_rx_queues + 1; /* Allocate space to store the IRQ vector data */ if (!(sc->irqvs = (struct ice_irq_vector *) malloc(sizeof(struct ice_irq_vector) * (sc->num_irq_vectors), M_ICE, M_NOWAIT))) { device_printf(sc->dev, "Unable to allocate irqv memory\n"); return (ENOMEM); } /* Administrative interrupt events will use vector 0 */ err = iflib_irq_alloc_generic(ctx, &sc->irqvs[0].irq, 1, IFLIB_INTR_ADMIN, ice_msix_admin, sc, 0, "admin"); if (err) { device_printf(sc->dev, "Failed to register Admin queue handler: %s\n", ice_err_str(err)); goto free_irqvs; } sc->irqvs[0].me = 0; /* Do not allocate queue interrupts when in recovery mode */ if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE)) return (0); for (i = 0, vector = 1; i < vsi->num_rx_queues; i++, vector++) { struct ice_rx_queue *rxq = &vsi->rx_queues[i]; struct ice_tx_queue *txq = &vsi->tx_queues[i]; int rid = vector + 1; char irq_name[16]; snprintf(irq_name, sizeof(irq_name), "rxq%d", i); err = iflib_irq_alloc_generic(ctx, &sc->irqvs[vector].irq, rid, - IFLIB_INTR_RX, ice_msix_que, + IFLIB_INTR_RXTX, ice_msix_que, rxq, rxq->me, irq_name); if (err) { device_printf(sc->dev, "Failed to allocate q int %d err: %s\n", i, ice_err_str(err)); vector--; i--; goto fail; } sc->irqvs[vector].me = vector; rxq->irqv = &sc->irqvs[vector]; bzero(irq_name, sizeof(irq_name)); snprintf(irq_name, sizeof(irq_name), "txq%d", i); iflib_softirq_alloc_generic(ctx, &sc->irqvs[vector].irq, IFLIB_INTR_TX, txq, txq->me, irq_name); txq->irqv = &sc->irqvs[vector]; } return (0); fail: for (; i >= 0; i--, vector--) iflib_irq_free(ctx, &sc->irqvs[vector].irq); iflib_irq_free(ctx, &sc->irqvs[0].irq); free_irqvs: free(sc->irqvs, M_ICE); sc->irqvs = NULL; return err; } /** * ice_if_mtu_set - Set the device MTU * @ctx: iflib context structure * @mtu: the MTU requested * * Called by iflib to configure the device's Maximum Transmission Unit (MTU). * * @pre assumes the caller holds the iflib CTX lock */ static int ice_if_mtu_set(if_ctx_t ctx, uint32_t mtu) { struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx); ASSERT_CTX_LOCKED(sc); /* Do not support configuration when in recovery mode */ if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE)) return (ENOSYS); if (mtu < ICE_MIN_MTU || mtu > ICE_MAX_MTU) return (EINVAL); sc->scctx->isc_max_frame_size = mtu + ETHER_HDR_LEN + ETHER_CRC_LEN + ETHER_VLAN_ENCAP_LEN; sc->pf_vsi.max_frame_size = sc->scctx->isc_max_frame_size; return (0); } /** * ice_if_intr_enable - Enable device interrupts * @ctx: iflib context structure * * Called by iflib to request enabling device interrupts. */ static void ice_if_intr_enable(if_ctx_t ctx) { struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx); struct ice_vsi *vsi = &sc->pf_vsi; struct ice_hw *hw = &sc->hw; ASSERT_CTX_LOCKED(sc); /* Enable ITR 0 */ ice_enable_intr(hw, sc->irqvs[0].me); /* Do not enable queue interrupts in recovery mode */ if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE)) return; /* Enable all queue interrupts */ for (int i = 0; i < vsi->num_rx_queues; i++) ice_enable_intr(hw, vsi->rx_queues[i].irqv->me); } /** * ice_if_intr_disable - Disable device interrupts * @ctx: iflib context structure * * Called by iflib to request disabling device interrupts. */ static void ice_if_intr_disable(if_ctx_t ctx) { struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx); struct ice_hw *hw = &sc->hw; unsigned int i; ASSERT_CTX_LOCKED(sc); /* IFDI_INTR_DISABLE may be called prior to interrupts actually being * assigned to queues. Instead of assuming that the interrupt * assignment in the rx_queues structure is valid, just disable all * possible interrupts * * Note that we choose not to disable ITR 0 because this handles the * AdminQ interrupts, and we want to keep processing these even when * the interface is offline. */ for (i = 1; i < hw->func_caps.common_cap.num_msix_vectors; i++) ice_disable_intr(hw, i); } /** * ice_if_rx_queue_intr_enable - Enable a specific Rx queue interrupt * @ctx: iflib context structure * @rxqid: the Rx queue to enable * * Enable a specific Rx queue interrupt. * * This function is not protected by the iflib CTX lock. */ static int ice_if_rx_queue_intr_enable(if_ctx_t ctx, uint16_t rxqid) { struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx); struct ice_vsi *vsi = &sc->pf_vsi; struct ice_hw *hw = &sc->hw; /* Do not enable queue interrupts in recovery mode */ if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE)) return (ENOSYS); ice_enable_intr(hw, vsi->rx_queues[rxqid].irqv->me); return (0); } /** * ice_if_tx_queue_intr_enable - Enable a specific Tx queue interrupt * @ctx: iflib context structure * @txqid: the Tx queue to enable * * Enable a specific Tx queue interrupt. * * This function is not protected by the iflib CTX lock. */ static int ice_if_tx_queue_intr_enable(if_ctx_t ctx, uint16_t txqid) { struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx); struct ice_vsi *vsi = &sc->pf_vsi; struct ice_hw *hw = &sc->hw; /* Do not enable queue interrupts in recovery mode */ if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE)) return (ENOSYS); ice_enable_intr(hw, vsi->tx_queues[txqid].irqv->me); return (0); } /** * ice_if_promisc_set - Set device promiscuous mode * @ctx: iflib context structure * @flags: promiscuous flags to configure * * Called by iflib to configure device promiscuous mode. * * @remark Calls to this function will always overwrite the previous setting */ static int ice_if_promisc_set(if_ctx_t ctx, int flags) { struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx); struct ice_hw *hw = &sc->hw; device_t dev = sc->dev; enum ice_status status; bool promisc_enable = flags & IFF_PROMISC; bool multi_enable = flags & IFF_ALLMULTI; /* Do not support configuration when in recovery mode */ if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE)) return (ENOSYS); if (multi_enable) return (EOPNOTSUPP); if (promisc_enable) { status = ice_set_vsi_promisc(hw, sc->pf_vsi.idx, ICE_VSI_PROMISC_MASK, 0); if (status && status != ICE_ERR_ALREADY_EXISTS) { device_printf(dev, "Failed to enable promiscuous mode for PF VSI, err %s aq_err %s\n", ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status)); return (EIO); } } else { status = ice_clear_vsi_promisc(hw, sc->pf_vsi.idx, ICE_VSI_PROMISC_MASK, 0); if (status) { device_printf(dev, "Failed to disable promiscuous mode for PF VSI, err %s aq_err %s\n", ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status)); return (EIO); } } return (0); } /** * ice_if_media_change - Change device media * @ctx: device ctx structure * * Called by iflib when a media change is requested. This operation is not * supported by the hardware, so we just return an error code. */ static int ice_if_media_change(if_ctx_t ctx) { struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx); device_printf(sc->dev, "Media change is not supported.\n"); return (ENODEV); } /** * ice_if_media_status - Report current device media * @ctx: iflib context structure * @ifmr: ifmedia request structure to update * * Updates the provided ifmr with current device media status, including link * status and media type. */ static void ice_if_media_status(if_ctx_t ctx, struct ifmediareq *ifmr) { struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx); struct ice_link_status *li = &sc->hw.port_info->phy.link_info; ifmr->ifm_status = IFM_AVALID; ifmr->ifm_active = IFM_ETHER; /* Never report link up or media types when in recovery mode */ if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE)) return; if (!sc->link_up) return; ifmr->ifm_status |= IFM_ACTIVE; ifmr->ifm_active |= IFM_FDX; if (li->phy_type_low) ifmr->ifm_active |= ice_get_phy_type_low(li->phy_type_low); else if (li->phy_type_high) ifmr->ifm_active |= ice_get_phy_type_high(li->phy_type_high); else ifmr->ifm_active |= IFM_UNKNOWN; /* Report flow control status as well */ if (li->an_info & ICE_AQ_LINK_PAUSE_TX) ifmr->ifm_active |= IFM_ETH_TXPAUSE; if (li->an_info & ICE_AQ_LINK_PAUSE_RX) ifmr->ifm_active |= IFM_ETH_RXPAUSE; } /** * ice_init_tx_tracking - Initialize Tx queue software tracking values * @vsi: the VSI to initialize * * Initialize Tx queue software tracking values, including the Report Status * queue, and related software tracking values. */ static void ice_init_tx_tracking(struct ice_vsi *vsi) { struct ice_tx_queue *txq; size_t j; int i; for (i = 0, txq = vsi->tx_queues; i < vsi->num_tx_queues; i++, txq++) { txq->tx_rs_cidx = txq->tx_rs_pidx = 0; /* Initialize the last processed descriptor to be the end of * the ring, rather than the start, so that we avoid an * off-by-one error in ice_ift_txd_credits_update for the * first packet. */ txq->tx_cidx_processed = txq->desc_count - 1; for (j = 0; j < txq->desc_count; j++) txq->tx_rsq[j] = QIDX_INVALID; } } /** * ice_update_rx_mbuf_sz - Update the Rx buffer size for all queues * @sc: the device softc * * Called to update the Rx queue mbuf_sz parameter for configuring the receive * buffer sizes when programming hardware. */ static void ice_update_rx_mbuf_sz(struct ice_softc *sc) { uint32_t mbuf_sz = iflib_get_rx_mbuf_sz(sc->ctx); struct ice_vsi *vsi = &sc->pf_vsi; MPASS(mbuf_sz <= UINT16_MAX); vsi->mbuf_sz = mbuf_sz; } /** * ice_if_init - Initialize the device * @ctx: iflib ctx structure * * Called by iflib to bring the device up, i.e. ifconfig ice0 up. Initializes * device filters and prepares the Tx and Rx engines. * * @pre assumes the caller holds the iflib CTX lock */ static void ice_if_init(if_ctx_t ctx) { struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx); device_t dev = sc->dev; int err; ASSERT_CTX_LOCKED(sc); /* * We've seen an issue with 11.3/12.1 where sideband routines are * called after detach is called. This would call routines after * if_stop, causing issues with the teardown process. This has * seemingly been fixed in STABLE snapshots, but it seems like a * good idea to have this guard here regardless. */ if (ice_driver_is_detaching(sc)) return; if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE)) return; if (ice_test_state(&sc->state, ICE_STATE_RESET_FAILED)) { device_printf(sc->dev, "request to start interface cannot be completed as the device failed to reset\n"); return; } if (ice_test_state(&sc->state, ICE_STATE_PREPARED_FOR_RESET)) { device_printf(sc->dev, "request to start interface while device is prepared for impending reset\n"); return; } ice_update_rx_mbuf_sz(sc); /* Update the MAC address... User might use a LAA */ err = ice_update_laa_mac(sc); if (err) { device_printf(dev, "LAA address change failed, err %s\n", ice_err_str(err)); return; } /* Initialize software Tx tracking values */ ice_init_tx_tracking(&sc->pf_vsi); err = ice_cfg_vsi_for_tx(&sc->pf_vsi); if (err) { device_printf(dev, "Unable to configure the main VSI for Tx: %s\n", ice_err_str(err)); return; } err = ice_cfg_vsi_for_rx(&sc->pf_vsi); if (err) { device_printf(dev, "Unable to configure the main VSI for Rx: %s\n", ice_err_str(err)); goto err_cleanup_tx; } err = ice_control_rx_queues(&sc->pf_vsi, true); if (err) { device_printf(dev, "Unable to enable Rx rings for transmit: %s\n", ice_err_str(err)); goto err_cleanup_tx; } err = ice_cfg_pf_default_mac_filters(sc); if (err) { device_printf(dev, "Unable to configure default MAC filters: %s\n", ice_err_str(err)); goto err_stop_rx; } /* We use software interrupts for Tx, so we only program the hardware * interrupts for Rx. */ ice_configure_rxq_interrupts(&sc->pf_vsi); ice_configure_rx_itr(&sc->pf_vsi); /* Configure promiscuous mode */ ice_if_promisc_set(ctx, if_getflags(sc->ifp)); ice_set_state(&sc->state, ICE_STATE_DRIVER_INITIALIZED); return; err_stop_rx: ice_control_rx_queues(&sc->pf_vsi, false); err_cleanup_tx: ice_vsi_disable_tx(&sc->pf_vsi); } /** * ice_poll_for_media_avail - Re-enable link if media is detected * @sc: device private structure * * Intended to be called from the driver's timer function, this function * sends the Get Link Status AQ command and re-enables HW link if the * command says that media is available. * * If the driver doesn't have the "NO_MEDIA" state set, then this does nothing, * since media removal events are supposed to be sent to the driver through * a link status event. */ static void ice_poll_for_media_avail(struct ice_softc *sc) { struct ice_hw *hw = &sc->hw; struct ice_port_info *pi = hw->port_info; if (ice_test_state(&sc->state, ICE_STATE_NO_MEDIA)) { pi->phy.get_link_info = true; ice_get_link_status(pi, &sc->link_up); if (pi->phy.link_info.link_info & ICE_AQ_MEDIA_AVAILABLE) { enum ice_status status; /* Re-enable link and re-apply user link settings */ ice_apply_saved_phy_cfg(sc); /* Update the OS about changes in media capability */ status = ice_add_media_types(sc, sc->media); if (status) device_printf(sc->dev, "Error adding device media types: %s aq_err %s\n", ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status)); ice_clear_state(&sc->state, ICE_STATE_NO_MEDIA); } } } /** * ice_if_timer - called by iflib periodically * @ctx: iflib ctx structure * @qid: the queue this timer was called for * * This callback is triggered by iflib periodically. We use it to update the * hw statistics. * * @remark this function is not protected by the iflib CTX lock. */ static void ice_if_timer(if_ctx_t ctx, uint16_t qid) { struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx); uint64_t prev_link_xoff_rx = sc->stats.cur.link_xoff_rx; if (qid != 0) return; /* Do not attempt to update stats when in recovery mode */ if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE)) return; /* Update device statistics */ ice_update_pf_stats(sc); /* * For proper watchdog management, the iflib stack needs to know if * we've been paused during the last interval. Check if the * link_xoff_rx stat changed, and set the isc_pause_frames, if so. */ if (sc->stats.cur.link_xoff_rx != prev_link_xoff_rx) sc->scctx->isc_pause_frames = 1; /* Update the primary VSI stats */ ice_update_vsi_hw_stats(&sc->pf_vsi); } /** * ice_admin_timer - called periodically to trigger the admin task * @arg: callout(9) argument pointing to the device private softc structure * * Timer function used as part of a callout(9) timer that will periodically * trigger the admin task, even when the interface is down. * * @remark this function is not called by iflib and is not protected by the * iflib CTX lock. * * @remark because this is a callout function, it cannot sleep and should not * attempt taking the iflib CTX lock. */ static void ice_admin_timer(void *arg) { struct ice_softc *sc = (struct ice_softc *)arg; /* Fire off the admin task */ iflib_admin_intr_deferred(sc->ctx); /* Reschedule the admin timer */ callout_schedule(&sc->admin_timer, hz/2); } /** * ice_transition_recovery_mode - Transition to recovery mode * @sc: the device private softc * * Called when the driver detects that the firmware has entered recovery mode * at run time. */ static void ice_transition_recovery_mode(struct ice_softc *sc) { struct ice_vsi *vsi = &sc->pf_vsi; int i; device_printf(sc->dev, "Firmware recovery mode detected. Limiting functionality. Refer to Intel(R) Ethernet Adapters and Devices User Guide for details on firmware recovery mode.\n"); /* Tell the stack that the link has gone down */ iflib_link_state_change(sc->ctx, LINK_STATE_DOWN, 0); /* Request that the device be re-initialized */ ice_request_stack_reinit(sc); ice_clear_bit(ICE_FEATURE_SRIOV, sc->feat_en); ice_clear_bit(ICE_FEATURE_SRIOV, sc->feat_cap); ice_vsi_del_txqs_ctx(vsi); ice_vsi_del_rxqs_ctx(vsi); for (i = 0; i < sc->num_available_vsi; i++) { if (sc->all_vsi[i]) ice_release_vsi(sc->all_vsi[i]); } sc->num_available_vsi = 0; if (sc->all_vsi) { free(sc->all_vsi, M_ICE); sc->all_vsi = NULL; } /* Destroy the interrupt manager */ ice_resmgr_destroy(&sc->imgr); /* Destroy the queue managers */ ice_resmgr_destroy(&sc->tx_qmgr); ice_resmgr_destroy(&sc->rx_qmgr); ice_deinit_hw(&sc->hw); } /** * ice_transition_safe_mode - Transition to safe mode * @sc: the device private softc * * Called when the driver attempts to reload the DDP package during a device * reset, and the new download fails. If so, we must transition to safe mode * at run time. * * @remark although safe mode normally allocates only a single queue, we can't * change the number of queues dynamically when using iflib. Due to this, we * do not attempt to reduce the number of queues. */ static void ice_transition_safe_mode(struct ice_softc *sc) { /* Indicate that we are in Safe mode */ ice_set_bit(ICE_FEATURE_SAFE_MODE, sc->feat_cap); ice_set_bit(ICE_FEATURE_SAFE_MODE, sc->feat_en); ice_clear_bit(ICE_FEATURE_SRIOV, sc->feat_en); ice_clear_bit(ICE_FEATURE_SRIOV, sc->feat_cap); ice_clear_bit(ICE_FEATURE_RSS, sc->feat_cap); ice_clear_bit(ICE_FEATURE_RSS, sc->feat_en); } /** * ice_if_update_admin_status - update admin status * @ctx: iflib ctx structure * * Called by iflib to update the admin status. For our purposes, this means * check the adminq, and update the link status. It's ultimately triggered by * our admin interrupt, or by the ice_if_timer periodically. * * @pre assumes the caller holds the iflib CTX lock */ static void ice_if_update_admin_status(if_ctx_t ctx) { struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx); enum ice_fw_modes fw_mode; bool reschedule = false; u16 pending = 0; ASSERT_CTX_LOCKED(sc); /* Check if the firmware entered recovery mode at run time */ fw_mode = ice_get_fw_mode(&sc->hw); if (fw_mode == ICE_FW_MODE_REC) { if (!ice_testandset_state(&sc->state, ICE_STATE_RECOVERY_MODE)) { /* If we just entered recovery mode, log a warning to * the system administrator and deinit driver state * that is no longer functional. */ ice_transition_recovery_mode(sc); } } else if (fw_mode == ICE_FW_MODE_ROLLBACK) { if (!ice_testandset_state(&sc->state, ICE_STATE_ROLLBACK_MODE)) { /* Rollback mode isn't fatal, but we don't want to * repeatedly post a message about it. */ ice_print_rollback_msg(&sc->hw); } } /* Handle global reset events */ ice_handle_reset_event(sc); /* Handle PF reset requests */ ice_handle_pf_reset_request(sc); /* Handle MDD events */ ice_handle_mdd_event(sc); if (ice_test_state(&sc->state, ICE_STATE_RESET_FAILED) || ice_test_state(&sc->state, ICE_STATE_PREPARED_FOR_RESET) || ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE)) { /* * If we know the control queues are disabled, skip processing * the control queues entirely. */ ; } else if (ice_testandclear_state(&sc->state, ICE_STATE_CONTROLQ_EVENT_PENDING)) { ice_process_ctrlq(sc, ICE_CTL_Q_ADMIN, &pending); if (pending > 0) reschedule = true; ice_process_ctrlq(sc, ICE_CTL_Q_MAILBOX, &pending); if (pending > 0) reschedule = true; } /* Poll for link up */ ice_poll_for_media_avail(sc); /* Check and update link status */ ice_update_link_status(sc, false); /* * If there are still messages to process, we need to reschedule * ourselves. Otherwise, we can just re-enable the interrupt. We'll be * woken up at the next interrupt or timer event. */ if (reschedule) { ice_set_state(&sc->state, ICE_STATE_CONTROLQ_EVENT_PENDING); iflib_admin_intr_deferred(ctx); } else { ice_enable_intr(&sc->hw, sc->irqvs[0].me); } } /** * ice_prepare_for_reset - Prepare device for an impending reset * @sc: The device private softc * * Prepare the driver for an impending reset, shutting down VSIs, clearing the * scheduler setup, and shutting down controlqs. Uses the * ICE_STATE_PREPARED_FOR_RESET to indicate whether we've already prepared the * driver for reset or not. */ static void ice_prepare_for_reset(struct ice_softc *sc) { struct ice_hw *hw = &sc->hw; /* If we're already prepared, there's nothing to do */ if (ice_testandset_state(&sc->state, ICE_STATE_PREPARED_FOR_RESET)) return; log(LOG_INFO, "%s: preparing to reset device logic\n", sc->ifp->if_xname); /* In recovery mode, hardware is not initialized */ if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE)) return; /* Release the main PF VSI queue mappings */ ice_resmgr_release_map(&sc->tx_qmgr, sc->pf_vsi.tx_qmap, sc->pf_vsi.num_tx_queues); ice_resmgr_release_map(&sc->rx_qmgr, sc->pf_vsi.rx_qmap, sc->pf_vsi.num_rx_queues); ice_clear_hw_tbls(hw); if (hw->port_info) ice_sched_clear_port(hw->port_info); ice_shutdown_all_ctrlq(hw); } /** * ice_rebuild_pf_vsi_qmap - Rebuild the main PF VSI queue mapping * @sc: the device softc pointer * * Loops over the Tx and Rx queues for the main PF VSI and reassigns the queue * mapping after a reset occurred. */ static int ice_rebuild_pf_vsi_qmap(struct ice_softc *sc) { struct ice_vsi *vsi = &sc->pf_vsi; struct ice_tx_queue *txq; struct ice_rx_queue *rxq; int err, i; /* Re-assign Tx queues from PF space to the main VSI */ err = ice_resmgr_assign_contiguous(&sc->tx_qmgr, vsi->tx_qmap, vsi->num_tx_queues); if (err) { device_printf(sc->dev, "Unable to re-assign PF Tx queues: %s\n", ice_err_str(err)); return (err); } /* Re-assign Rx queues from PF space to this VSI */ err = ice_resmgr_assign_contiguous(&sc->rx_qmgr, vsi->rx_qmap, vsi->num_rx_queues); if (err) { device_printf(sc->dev, "Unable to re-assign PF Rx queues: %s\n", ice_err_str(err)); goto err_release_tx_queues; } vsi->qmap_type = ICE_RESMGR_ALLOC_CONTIGUOUS; /* Re-assign Tx queue tail pointers */ for (i = 0, txq = vsi->tx_queues; i < vsi->num_tx_queues; i++, txq++) txq->tail = QTX_COMM_DBELL(vsi->tx_qmap[i]); /* Re-assign Rx queue tail pointers */ for (i = 0, rxq = vsi->rx_queues; i < vsi->num_rx_queues; i++, rxq++) rxq->tail = QRX_TAIL(vsi->rx_qmap[i]); return (0); err_release_tx_queues: ice_resmgr_release_map(&sc->tx_qmgr, sc->pf_vsi.tx_qmap, sc->pf_vsi.num_tx_queues); return (err); } /* determine if the iflib context is active */ #define CTX_ACTIVE(ctx) ((if_getdrvflags(iflib_get_ifp(ctx)) & IFF_DRV_RUNNING)) /** * ice_rebuild_recovery_mode - Rebuild driver state while in recovery mode * @sc: The device private softc * * Handle a driver rebuild while in recovery mode. This will only rebuild the * limited functionality supported while in recovery mode. */ static void ice_rebuild_recovery_mode(struct ice_softc *sc) { device_t dev = sc->dev; /* enable PCIe bus master */ pci_enable_busmaster(dev); /* Configure interrupt causes for the administrative interrupt */ ice_configure_misc_interrupts(sc); /* Enable ITR 0 right away, so that we can handle admin interrupts */ ice_enable_intr(&sc->hw, sc->irqvs[0].me); /* Now that the rebuild is finished, we're no longer prepared to reset */ ice_clear_state(&sc->state, ICE_STATE_PREPARED_FOR_RESET); log(LOG_INFO, "%s: device rebuild successful\n", sc->ifp->if_xname); /* In order to completely restore device functionality, the iflib core * needs to be reset. We need to request an iflib reset. Additionally, * because the state of IFC_DO_RESET is cached within task_fn_admin in * the iflib core, we also want re-run the admin task so that iflib * resets immediately instead of waiting for the next interrupt. */ ice_request_stack_reinit(sc); return; } /** * ice_rebuild - Rebuild driver state post reset * @sc: The device private softc * * Restore driver state after a reset occurred. Restart the controlqs, setup * the hardware port, and re-enable the VSIs. */ static void ice_rebuild(struct ice_softc *sc) { struct ice_hw *hw = &sc->hw; device_t dev = sc->dev; enum ice_status status; int err; sc->rebuild_ticks = ticks; /* If we're rebuilding, then a reset has succeeded. */ ice_clear_state(&sc->state, ICE_STATE_RESET_FAILED); /* * If the firmware is in recovery mode, only restore the limited * functionality supported by recovery mode. */ if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE)) { ice_rebuild_recovery_mode(sc); return; } /* enable PCIe bus master */ pci_enable_busmaster(dev); status = ice_init_all_ctrlq(hw); if (status) { device_printf(dev, "failed to re-init controlqs, err %s\n", ice_status_str(status)); goto err_shutdown_ctrlq; } /* Query the allocated resources for Tx scheduler */ status = ice_sched_query_res_alloc(hw); if (status) { device_printf(dev, "Failed to query scheduler resources, err %s aq_err %s\n", ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status)); goto err_shutdown_ctrlq; } err = ice_send_version(sc); if (err) goto err_shutdown_ctrlq; err = ice_init_link_events(sc); if (err) { device_printf(dev, "ice_init_link_events failed: %s\n", ice_err_str(err)); goto err_shutdown_ctrlq; } status = ice_clear_pf_cfg(hw); if (status) { device_printf(dev, "failed to clear PF configuration, err %s\n", ice_status_str(status)); goto err_shutdown_ctrlq; } ice_clear_pxe_mode(hw); status = ice_get_caps(hw); if (status) { device_printf(dev, "failed to get capabilities, err %s\n", ice_status_str(status)); goto err_shutdown_ctrlq; } status = ice_sched_init_port(hw->port_info); if (status) { device_printf(dev, "failed to initialize port, err %s\n", ice_status_str(status)); goto err_sched_cleanup; } /* If we previously loaded the package, it needs to be reloaded now */ if (!ice_is_bit_set(sc->feat_en, ICE_FEATURE_SAFE_MODE)) { status = ice_init_pkg(hw, hw->pkg_copy, hw->pkg_size); if (status) { ice_log_pkg_init(sc, &status); ice_transition_safe_mode(sc); } } ice_reset_pf_stats(sc); err = ice_rebuild_pf_vsi_qmap(sc); if (err) { device_printf(sc->dev, "Unable to re-assign main VSI queues, err %s\n", ice_err_str(err)); goto err_sched_cleanup; } err = ice_initialize_vsi(&sc->pf_vsi); if (err) { device_printf(sc->dev, "Unable to re-initialize Main VSI, err %s\n", ice_err_str(err)); goto err_release_queue_allocations; } /* Replay all VSI configuration */ err = ice_replay_all_vsi_cfg(sc); if (err) goto err_deinit_pf_vsi; /* Reconfigure the main PF VSI for RSS */ err = ice_config_rss(&sc->pf_vsi); if (err) { device_printf(sc->dev, "Unable to reconfigure RSS for the main VSI, err %s\n", ice_err_str(err)); goto err_deinit_pf_vsi; } /* Refresh link status */ ice_clear_state(&sc->state, ICE_STATE_LINK_STATUS_REPORTED); sc->hw.port_info->phy.get_link_info = true; ice_get_link_status(sc->hw.port_info, &sc->link_up); ice_update_link_status(sc, true); /* Configure interrupt causes for the administrative interrupt */ ice_configure_misc_interrupts(sc); /* Enable ITR 0 right away, so that we can handle admin interrupts */ ice_enable_intr(&sc->hw, sc->irqvs[0].me); /* Now that the rebuild is finished, we're no longer prepared to reset */ ice_clear_state(&sc->state, ICE_STATE_PREPARED_FOR_RESET); log(LOG_INFO, "%s: device rebuild successful\n", sc->ifp->if_xname); /* In order to completely restore device functionality, the iflib core * needs to be reset. We need to request an iflib reset. Additionally, * because the state of IFC_DO_RESET is cached within task_fn_admin in * the iflib core, we also want re-run the admin task so that iflib * resets immediately instead of waiting for the next interrupt. */ ice_request_stack_reinit(sc); return; err_deinit_pf_vsi: ice_deinit_vsi(&sc->pf_vsi); err_release_queue_allocations: ice_resmgr_release_map(&sc->tx_qmgr, sc->pf_vsi.tx_qmap, sc->pf_vsi.num_tx_queues); ice_resmgr_release_map(&sc->rx_qmgr, sc->pf_vsi.rx_qmap, sc->pf_vsi.num_rx_queues); err_sched_cleanup: ice_sched_cleanup_all(hw); err_shutdown_ctrlq: ice_shutdown_all_ctrlq(hw); ice_set_state(&sc->state, ICE_STATE_RESET_FAILED); device_printf(dev, "Driver rebuild failed, please reload the device driver\n"); } /** * ice_handle_reset_event - Handle reset events triggered by OICR * @sc: The device private softc * * Handle reset events triggered by an OICR notification. This includes CORER, * GLOBR, and EMPR resets triggered by software on this or any other PF or by * firmware. * * @pre assumes the iflib context lock is held, and will unlock it while * waiting for the hardware to finish reset. */ static void ice_handle_reset_event(struct ice_softc *sc) { struct ice_hw *hw = &sc->hw; enum ice_status status; device_t dev = sc->dev; /* When a CORER, GLOBR, or EMPR is about to happen, the hardware will * trigger an OICR interrupt. Our OICR handler will determine when * this occurs and set the ICE_STATE_RESET_OICR_RECV bit as * appropriate. */ if (!ice_testandclear_state(&sc->state, ICE_STATE_RESET_OICR_RECV)) return; ice_prepare_for_reset(sc); /* * Release the iflib context lock and wait for the device to finish * resetting. */ IFLIB_CTX_UNLOCK(sc); status = ice_check_reset(hw); IFLIB_CTX_LOCK(sc); if (status) { device_printf(dev, "Device never came out of reset, err %s\n", ice_status_str(status)); ice_set_state(&sc->state, ICE_STATE_RESET_FAILED); return; } /* We're done with the reset, so we can rebuild driver state */ sc->hw.reset_ongoing = false; ice_rebuild(sc); /* In the unlikely event that a PF reset request occurs at the same * time as a global reset, clear the request now. This avoids * resetting a second time right after we reset due to a global event. */ if (ice_testandclear_state(&sc->state, ICE_STATE_RESET_PFR_REQ)) device_printf(dev, "Ignoring PFR request that occurred while a reset was ongoing\n"); } /** * ice_handle_pf_reset_request - Initiate PF reset requested by software * @sc: The device private softc * * Initiate a PF reset requested by software. We handle this in the admin task * so that only one thread actually handles driver preparation and cleanup, * rather than having multiple threads possibly attempt to run this code * simultaneously. * * @pre assumes the iflib context lock is held and will unlock it while * waiting for the PF reset to complete. */ static void ice_handle_pf_reset_request(struct ice_softc *sc) { struct ice_hw *hw = &sc->hw; enum ice_status status; /* Check for PF reset requests */ if (!ice_testandclear_state(&sc->state, ICE_STATE_RESET_PFR_REQ)) return; /* Make sure we're prepared for reset */ ice_prepare_for_reset(sc); /* * Release the iflib context lock and wait for the device to finish * resetting. */ IFLIB_CTX_UNLOCK(sc); status = ice_reset(hw, ICE_RESET_PFR); IFLIB_CTX_LOCK(sc); if (status) { device_printf(sc->dev, "device PF reset failed, err %s\n", ice_status_str(status)); ice_set_state(&sc->state, ICE_STATE_RESET_FAILED); return; } sc->soft_stats.pfr_count++; ice_rebuild(sc); } /** * ice_init_device_features - Init device driver features * @sc: driver softc structure * * @pre assumes that the function capabilities bits have been set up by * ice_init_hw(). */ static void ice_init_device_features(struct ice_softc *sc) { /* * A failed pkg file download triggers safe mode, disabling advanced * device feature support */ if (ice_is_bit_set(sc->feat_en, ICE_FEATURE_SAFE_MODE)) return; /* Set capabilities that all devices support */ ice_set_bit(ICE_FEATURE_SRIOV, sc->feat_cap); ice_set_bit(ICE_FEATURE_RSS, sc->feat_cap); ice_set_bit(ICE_FEATURE_LENIENT_LINK_MODE, sc->feat_cap); ice_set_bit(ICE_FEATURE_DEFAULT_OVERRIDE, sc->feat_cap); /* Disable features due to hardware limitations... */ if (!sc->hw.func_caps.common_cap.rss_table_size) ice_clear_bit(ICE_FEATURE_RSS, sc->feat_cap); /* Disable capabilities not supported by the OS */ ice_disable_unsupported_features(sc->feat_cap); /* RSS is always enabled for iflib */ if (ice_is_bit_set(sc->feat_cap, ICE_FEATURE_RSS)) ice_set_bit(ICE_FEATURE_RSS, sc->feat_en); } /** * ice_if_multi_set - Callback to update Multicast filters in HW * @ctx: iflib ctx structure * * Called by iflib in response to SIOCDELMULTI and SIOCADDMULTI. Must search * the if_multiaddrs list and determine which filters have been added or * removed from the list, and update HW programming to reflect the new list. * * @pre assumes the caller holds the iflib CTX lock */ static void ice_if_multi_set(if_ctx_t ctx) { struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx); int err; ASSERT_CTX_LOCKED(sc); /* Do not handle multicast configuration in recovery mode */ if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE)) return; err = ice_sync_multicast_filters(sc); if (err) { device_printf(sc->dev, "Failed to synchronize multicast filter list: %s\n", ice_err_str(err)); return; } } /** * ice_if_vlan_register - Register a VLAN with the hardware * @ctx: iflib ctx pointer * @vtag: VLAN to add * * Programs the main PF VSI with a hardware filter for the given VLAN. * * @pre assumes the caller holds the iflib CTX lock */ static void ice_if_vlan_register(if_ctx_t ctx, u16 vtag) { struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx); enum ice_status status; ASSERT_CTX_LOCKED(sc); /* Do not handle VLAN configuration in recovery mode */ if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE)) return; status = ice_add_vlan_hw_filter(&sc->pf_vsi, vtag); if (status) { device_printf(sc->dev, "Failure adding VLAN %d to main VSI, err %s aq_err %s\n", vtag, ice_status_str(status), ice_aq_str(sc->hw.adminq.sq_last_status)); } } /** * ice_if_vlan_unregister - Remove a VLAN filter from the hardware * @ctx: iflib ctx pointer * @vtag: VLAN to add * * Removes the previously programmed VLAN filter from the main PF VSI. * * @pre assumes the caller holds the iflib CTX lock */ static void ice_if_vlan_unregister(if_ctx_t ctx, u16 vtag) { struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx); enum ice_status status; ASSERT_CTX_LOCKED(sc); /* Do not handle VLAN configuration in recovery mode */ if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE)) return; status = ice_remove_vlan_hw_filter(&sc->pf_vsi, vtag); if (status) { device_printf(sc->dev, "Failure removing VLAN %d from main VSI, err %s aq_err %s\n", vtag, ice_status_str(status), ice_aq_str(sc->hw.adminq.sq_last_status)); } } /** * ice_if_stop - Stop the device * @ctx: iflib context structure * * Called by iflib to stop the device and bring it down. (i.e. ifconfig ice0 * down) * * @pre assumes the caller holds the iflib CTX lock */ static void ice_if_stop(if_ctx_t ctx) { struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx); ASSERT_CTX_LOCKED(sc); /* * The iflib core may call IFDI_STOP prior to the first call to * IFDI_INIT. This will cause us to attempt to remove MAC filters we * don't have, and disable Tx queues which aren't yet configured. * Although it is likely these extra operations are harmless, they do * cause spurious warning messages to be displayed, which may confuse * users. * * To avoid these messages, we use a state bit indicating if we've * been initialized. It will be set when ice_if_init is called, and * cleared here in ice_if_stop. */ if (!ice_testandclear_state(&sc->state, ICE_STATE_DRIVER_INITIALIZED)) return; if (ice_test_state(&sc->state, ICE_STATE_RESET_FAILED)) { device_printf(sc->dev, "request to stop interface cannot be completed as the device failed to reset\n"); return; } if (ice_test_state(&sc->state, ICE_STATE_PREPARED_FOR_RESET)) { device_printf(sc->dev, "request to stop interface while device is prepared for impending reset\n"); return; } /* Remove the MAC filters, stop Tx, and stop Rx. We don't check the * return of these functions because there's nothing we can really do * if they fail, and the functions already print error messages. * Just try to shut down as much as we can. */ ice_rm_pf_default_mac_filters(sc); /* Dissociate the Tx and Rx queues from the interrupts */ ice_flush_txq_interrupts(&sc->pf_vsi); ice_flush_rxq_interrupts(&sc->pf_vsi); /* Disable the Tx and Rx queues */ ice_vsi_disable_tx(&sc->pf_vsi); ice_control_rx_queues(&sc->pf_vsi, false); } /** * ice_if_get_counter - Get current value of an ifnet statistic * @ctx: iflib context pointer * @counter: ifnet counter to read * * Reads the current value of an ifnet counter for the device. * * This function is not protected by the iflib CTX lock. */ static uint64_t ice_if_get_counter(if_ctx_t ctx, ift_counter counter) { struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx); /* Return the counter for the main PF VSI */ return ice_get_ifnet_counter(&sc->pf_vsi, counter); } /** * ice_request_stack_reinit - Request that iflib re-initialize * @sc: the device private softc * * Request that the device be brought down and up, to re-initialize. For * example, this may be called when a device reset occurs, or when Tx and Rx * queues need to be re-initialized. * * This is required because the iflib state is outside the driver, and must be * re-initialized if we need to resart Tx and Rx queues. */ void ice_request_stack_reinit(struct ice_softc *sc) { if (CTX_ACTIVE(sc->ctx)) { iflib_request_reset(sc->ctx); iflib_admin_intr_deferred(sc->ctx); } } /** * ice_driver_is_detaching - Check if the driver is detaching/unloading * @sc: device private softc * * Returns true if the driver is detaching, false otherwise. * * @remark on newer kernels, take advantage of iflib_in_detach in order to * report detachment correctly as early as possible. * * @remark this function is used by various code paths that want to avoid * running if the driver is about to be removed. This includes sysctls and * other driver access points. Note that it does not fully resolve * detach-based race conditions as it is possible for a thread to race with * iflib_in_detach. */ bool ice_driver_is_detaching(struct ice_softc *sc) { return (ice_test_state(&sc->state, ICE_STATE_DETACHING) || iflib_in_detach(sc->ctx)); } /** * ice_if_priv_ioctl - Device private ioctl handler * @ctx: iflib context pointer * @command: The ioctl command issued * @data: ioctl specific data * * iflib callback for handling custom driver specific ioctls. * * @pre Assumes that the iflib context lock is held. */ static int ice_if_priv_ioctl(if_ctx_t ctx, u_long command, caddr_t data) { struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx); struct ifdrv *ifd; device_t dev = sc->dev; if (data == NULL) return (EINVAL); ASSERT_CTX_LOCKED(sc); /* Make sure the command type is valid */ switch (command) { case SIOCSDRVSPEC: case SIOCGDRVSPEC: /* Accepted commands */ break; case SIOCGPRIVATE_0: /* * Although we do not support this ioctl command, it's * expected that iflib will forward it to the IFDI_PRIV_IOCTL * handler. Do not print a message in this case */ return (ENOTSUP); default: /* * If we get a different command for this function, it's * definitely unexpected, so log a message indicating what * command we got for debugging purposes. */ device_printf(dev, "%s: unexpected ioctl command %08lx\n", __func__, command); return (EINVAL); } ifd = (struct ifdrv *)data; switch (ifd->ifd_cmd) { case ICE_NVM_ACCESS: return ice_handle_nvm_access_ioctl(sc, ifd); default: return EINVAL; } } /** * ice_if_i2c_req - I2C request handler for iflib * @ctx: iflib context pointer * @req: The I2C parameters to use * * Read from the port's I2C eeprom using the parameters from the ioctl. * * @remark The iflib-only part is pretty simple. */ static int ice_if_i2c_req(if_ctx_t ctx, struct ifi2creq *req) { struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx); return ice_handle_i2c_req(sc, req); } /** * ice_if_suspend - PCI device suspend handler for iflib * @ctx: iflib context pointer * * Deinitializes the driver and clears HW resources in preparation for * suspend or an FLR. * * @returns 0; this return value is ignored */ static int ice_if_suspend(if_ctx_t ctx) { struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx); /* At least a PFR is always going to happen after this; * either via FLR or during the D3->D0 transition. */ ice_clear_state(&sc->state, ICE_STATE_RESET_PFR_REQ); ice_prepare_for_reset(sc); return (0); } /** * ice_if_resume - PCI device resume handler for iflib * @ctx: iflib context pointer * * Reinitializes the driver and the HW after PCI resume or after * an FLR. An init is performed by iflib after this function is finished. * * @returns 0; this return value is ignored */ static int ice_if_resume(if_ctx_t ctx) { struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx); ice_rebuild(sc); return (0); } diff --git a/sys/dev/ixgbe/if_ix.c b/sys/dev/ixgbe/if_ix.c index 783c50d5a3cc..7eb32077f8b0 100644 --- a/sys/dev/ixgbe/if_ix.c +++ b/sys/dev/ixgbe/if_ix.c @@ -1,4588 +1,4588 @@ /****************************************************************************** Copyright (c) 2001-2017, Intel Corporation All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the Intel Corporation nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ******************************************************************************/ /*$FreeBSD$*/ #include "opt_inet.h" #include "opt_inet6.h" #include "opt_rss.h" #include "ixgbe.h" #include "ixgbe_sriov.h" #include "ifdi_if.h" #include #include /************************************************************************ * Driver version ************************************************************************/ char ixgbe_driver_version[] = "4.0.1-k"; /************************************************************************ * PCI Device ID Table * * Used by probe to select devices to load on * Last field stores an index into ixgbe_strings * Last entry must be all 0s * * { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index } ************************************************************************/ static pci_vendor_info_t ixgbe_vendor_info_array[] = { PVID(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AF_DUAL_PORT, "Intel(R) PRO/10GbE PCI-Express Network Driver"), PVID(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AF_SINGLE_PORT, "Intel(R) PRO/10GbE PCI-Express Network Driver"), PVID(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598EB_CX4, "Intel(R) PRO/10GbE PCI-Express Network Driver"), PVID(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AT, "Intel(R) PRO/10GbE PCI-Express Network Driver"), PVID(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AT2, "Intel(R) PRO/10GbE PCI-Express Network Driver"), PVID(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598, "Intel(R) PRO/10GbE PCI-Express Network Driver"), PVID(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598_DA_DUAL_PORT, "Intel(R) PRO/10GbE PCI-Express Network Driver"), PVID(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598_CX4_DUAL_PORT, "Intel(R) PRO/10GbE PCI-Express Network Driver"), PVID(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598EB_XF_LR, "Intel(R) PRO/10GbE PCI-Express Network Driver"), PVID(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598_SR_DUAL_PORT_EM, "Intel(R) PRO/10GbE PCI-Express Network Driver"), PVID(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598EB_SFP_LOM, "Intel(R) PRO/10GbE PCI-Express Network Driver"), PVID(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_KX4, "Intel(R) PRO/10GbE PCI-Express Network Driver"), PVID(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_KX4_MEZZ, "Intel(R) PRO/10GbE PCI-Express Network Driver"), PVID(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_SFP, "Intel(R) PRO/10GbE PCI-Express Network Driver"), PVID(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_XAUI_LOM, "Intel(R) PRO/10GbE PCI-Express Network Driver"), PVID(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_CX4, "Intel(R) PRO/10GbE PCI-Express Network Driver"), PVID(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_T3_LOM, "Intel(R) PRO/10GbE PCI-Express Network Driver"), PVID(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_COMBO_BACKPLANE, "Intel(R) PRO/10GbE PCI-Express Network Driver"), PVID(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_BACKPLANE_FCOE, "Intel(R) PRO/10GbE PCI-Express Network Driver"), PVID(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_SFP_SF2, "Intel(R) PRO/10GbE PCI-Express Network Driver"), PVID(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_SFP_FCOE, "Intel(R) PRO/10GbE PCI-Express Network Driver"), PVID(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599EN_SFP, "Intel(R) PRO/10GbE PCI-Express Network Driver"), PVID(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_SFP_SF_QP, "Intel(R) PRO/10GbE PCI-Express Network Driver"), PVID(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_QSFP_SF_QP, "Intel(R) PRO/10GbE PCI-Express Network Driver"), PVID(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X540T, "Intel(R) PRO/10GbE PCI-Express Network Driver"), PVID(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X540T1, "Intel(R) PRO/10GbE PCI-Express Network Driver"), PVID(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550T, "Intel(R) PRO/10GbE PCI-Express Network Driver"), PVID(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550T1, "Intel(R) PRO/10GbE PCI-Express Network Driver"), PVID(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550EM_X_KR, "Intel(R) PRO/10GbE PCI-Express Network Driver"), PVID(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550EM_X_KX4, "Intel(R) PRO/10GbE PCI-Express Network Driver"), PVID(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550EM_X_10G_T, "Intel(R) PRO/10GbE PCI-Express Network Driver"), PVID(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550EM_X_1G_T, "Intel(R) PRO/10GbE PCI-Express Network Driver"), PVID(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550EM_X_SFP, "Intel(R) PRO/10GbE PCI-Express Network Driver"), PVID(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550EM_A_KR, "Intel(R) PRO/10GbE PCI-Express Network Driver"), PVID(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550EM_A_KR_L, "Intel(R) PRO/10GbE PCI-Express Network Driver"), PVID(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550EM_A_SFP, "Intel(R) PRO/10GbE PCI-Express Network Driver"), PVID(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550EM_A_SFP_N, "Intel(R) PRO/10GbE PCI-Express Network Driver"), PVID(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550EM_A_SGMII, "Intel(R) PRO/10GbE PCI-Express Network Driver"), PVID(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550EM_A_SGMII_L, "Intel(R) PRO/10GbE PCI-Express Network Driver"), PVID(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550EM_A_10G_T, "Intel(R) PRO/10GbE PCI-Express Network Driver"), PVID(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550EM_A_1G_T, "Intel(R) PRO/10GbE PCI-Express Network Driver"), PVID(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550EM_A_1G_T_L, "Intel(R) PRO/10GbE PCI-Express Network Driver"), PVID(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X540_BYPASS, "Intel(R) PRO/10GbE PCI-Express Network Driver"), PVID(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_BYPASS, "Intel(R) PRO/10GbE PCI-Express Network Driver"), /* required last entry */ PVID_END }; static void *ixgbe_register(device_t dev); static int ixgbe_if_attach_pre(if_ctx_t ctx); static int ixgbe_if_attach_post(if_ctx_t ctx); static int ixgbe_if_detach(if_ctx_t ctx); static int ixgbe_if_shutdown(if_ctx_t ctx); static int ixgbe_if_suspend(if_ctx_t ctx); static int ixgbe_if_resume(if_ctx_t ctx); static void ixgbe_if_stop(if_ctx_t ctx); void ixgbe_if_enable_intr(if_ctx_t ctx); static void ixgbe_if_disable_intr(if_ctx_t ctx); static void ixgbe_link_intr_enable(if_ctx_t ctx); static int ixgbe_if_rx_queue_intr_enable(if_ctx_t ctx, uint16_t qid); static void ixgbe_if_media_status(if_ctx_t ctx, struct ifmediareq * ifmr); static int ixgbe_if_media_change(if_ctx_t ctx); static int ixgbe_if_msix_intr_assign(if_ctx_t, int); static int ixgbe_if_mtu_set(if_ctx_t ctx, uint32_t mtu); static void ixgbe_if_crcstrip_set(if_ctx_t ctx, int onoff, int strip); static void ixgbe_if_multi_set(if_ctx_t ctx); static int ixgbe_if_promisc_set(if_ctx_t ctx, int flags); static int ixgbe_if_tx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs, int nrxqs, int nrxqsets); static int ixgbe_if_rx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs, int nrxqs, int nrxqsets); static void ixgbe_if_queues_free(if_ctx_t ctx); static void ixgbe_if_timer(if_ctx_t ctx, uint16_t); static void ixgbe_if_update_admin_status(if_ctx_t ctx); static void ixgbe_if_vlan_register(if_ctx_t ctx, u16 vtag); static void ixgbe_if_vlan_unregister(if_ctx_t ctx, u16 vtag); static int ixgbe_if_i2c_req(if_ctx_t ctx, struct ifi2creq *req); static bool ixgbe_if_needs_restart(if_ctx_t ctx, enum iflib_restart_event event); int ixgbe_intr(void *arg); /************************************************************************ * Function prototypes ************************************************************************/ #if __FreeBSD_version >= 1100036 static uint64_t ixgbe_if_get_counter(if_ctx_t, ift_counter); #endif static void ixgbe_enable_queue(struct adapter *adapter, u32 vector); static void ixgbe_disable_queue(struct adapter *adapter, u32 vector); static void ixgbe_add_device_sysctls(if_ctx_t ctx); static int ixgbe_allocate_pci_resources(if_ctx_t ctx); static int ixgbe_setup_low_power_mode(if_ctx_t ctx); static void ixgbe_config_dmac(struct adapter *adapter); static void ixgbe_configure_ivars(struct adapter *adapter); static void ixgbe_set_ivar(struct adapter *adapter, u8 entry, u8 vector, s8 type); static u8 *ixgbe_mc_array_itr(struct ixgbe_hw *, u8 **, u32 *); static bool ixgbe_sfp_probe(if_ctx_t ctx); static void ixgbe_free_pci_resources(if_ctx_t ctx); static int ixgbe_msix_link(void *arg); static int ixgbe_msix_que(void *arg); static void ixgbe_initialize_rss_mapping(struct adapter *adapter); static void ixgbe_initialize_receive_units(if_ctx_t ctx); static void ixgbe_initialize_transmit_units(if_ctx_t ctx); static int ixgbe_setup_interface(if_ctx_t ctx); static void ixgbe_init_device_features(struct adapter *adapter); static void ixgbe_check_fan_failure(struct adapter *, u32, bool); static void ixgbe_add_media_types(if_ctx_t ctx); static void ixgbe_update_stats_counters(struct adapter *adapter); static void ixgbe_config_link(if_ctx_t ctx); static void ixgbe_get_slot_info(struct adapter *); static void ixgbe_check_wol_support(struct adapter *adapter); static void ixgbe_enable_rx_drop(struct adapter *); static void ixgbe_disable_rx_drop(struct adapter *); static void ixgbe_add_hw_stats(struct adapter *adapter); static int ixgbe_set_flowcntl(struct adapter *, int); static int ixgbe_set_advertise(struct adapter *, int); static int ixgbe_get_advertise(struct adapter *); static void ixgbe_setup_vlan_hw_support(if_ctx_t ctx); static void ixgbe_config_gpie(struct adapter *adapter); static void ixgbe_config_delay_values(struct adapter *adapter); /* Sysctl handlers */ static int ixgbe_sysctl_flowcntl(SYSCTL_HANDLER_ARGS); static int ixgbe_sysctl_advertise(SYSCTL_HANDLER_ARGS); static int ixgbe_sysctl_interrupt_rate_handler(SYSCTL_HANDLER_ARGS); static int ixgbe_sysctl_dmac(SYSCTL_HANDLER_ARGS); static int ixgbe_sysctl_phy_temp(SYSCTL_HANDLER_ARGS); static int ixgbe_sysctl_phy_overtemp_occurred(SYSCTL_HANDLER_ARGS); #ifdef IXGBE_DEBUG static int ixgbe_sysctl_power_state(SYSCTL_HANDLER_ARGS); static int ixgbe_sysctl_print_rss_config(SYSCTL_HANDLER_ARGS); #endif static int ixgbe_sysctl_rdh_handler(SYSCTL_HANDLER_ARGS); static int ixgbe_sysctl_rdt_handler(SYSCTL_HANDLER_ARGS); static int ixgbe_sysctl_tdt_handler(SYSCTL_HANDLER_ARGS); static int ixgbe_sysctl_tdh_handler(SYSCTL_HANDLER_ARGS); static int ixgbe_sysctl_eee_state(SYSCTL_HANDLER_ARGS); static int ixgbe_sysctl_wol_enable(SYSCTL_HANDLER_ARGS); static int ixgbe_sysctl_wufc(SYSCTL_HANDLER_ARGS); /* Deferred interrupt tasklets */ static void ixgbe_handle_msf(void *); static void ixgbe_handle_mod(void *); static void ixgbe_handle_phy(void *); /************************************************************************ * FreeBSD Device Interface Entry Points ************************************************************************/ static device_method_t ix_methods[] = { /* Device interface */ DEVMETHOD(device_register, ixgbe_register), DEVMETHOD(device_probe, iflib_device_probe), DEVMETHOD(device_attach, iflib_device_attach), DEVMETHOD(device_detach, iflib_device_detach), DEVMETHOD(device_shutdown, iflib_device_shutdown), DEVMETHOD(device_suspend, iflib_device_suspend), DEVMETHOD(device_resume, iflib_device_resume), #ifdef PCI_IOV DEVMETHOD(pci_iov_init, iflib_device_iov_init), DEVMETHOD(pci_iov_uninit, iflib_device_iov_uninit), DEVMETHOD(pci_iov_add_vf, iflib_device_iov_add_vf), #endif /* PCI_IOV */ DEVMETHOD_END }; static driver_t ix_driver = { "ix", ix_methods, sizeof(struct adapter), }; devclass_t ix_devclass; DRIVER_MODULE(ix, pci, ix_driver, ix_devclass, 0, 0); IFLIB_PNP_INFO(pci, ix_driver, ixgbe_vendor_info_array); MODULE_DEPEND(ix, pci, 1, 1, 1); MODULE_DEPEND(ix, ether, 1, 1, 1); MODULE_DEPEND(ix, iflib, 1, 1, 1); static device_method_t ixgbe_if_methods[] = { DEVMETHOD(ifdi_attach_pre, ixgbe_if_attach_pre), DEVMETHOD(ifdi_attach_post, ixgbe_if_attach_post), DEVMETHOD(ifdi_detach, ixgbe_if_detach), DEVMETHOD(ifdi_shutdown, ixgbe_if_shutdown), DEVMETHOD(ifdi_suspend, ixgbe_if_suspend), DEVMETHOD(ifdi_resume, ixgbe_if_resume), DEVMETHOD(ifdi_init, ixgbe_if_init), DEVMETHOD(ifdi_stop, ixgbe_if_stop), DEVMETHOD(ifdi_msix_intr_assign, ixgbe_if_msix_intr_assign), DEVMETHOD(ifdi_intr_enable, ixgbe_if_enable_intr), DEVMETHOD(ifdi_intr_disable, ixgbe_if_disable_intr), DEVMETHOD(ifdi_link_intr_enable, ixgbe_link_intr_enable), DEVMETHOD(ifdi_tx_queue_intr_enable, ixgbe_if_rx_queue_intr_enable), DEVMETHOD(ifdi_rx_queue_intr_enable, ixgbe_if_rx_queue_intr_enable), DEVMETHOD(ifdi_tx_queues_alloc, ixgbe_if_tx_queues_alloc), DEVMETHOD(ifdi_rx_queues_alloc, ixgbe_if_rx_queues_alloc), DEVMETHOD(ifdi_queues_free, ixgbe_if_queues_free), DEVMETHOD(ifdi_update_admin_status, ixgbe_if_update_admin_status), DEVMETHOD(ifdi_multi_set, ixgbe_if_multi_set), DEVMETHOD(ifdi_mtu_set, ixgbe_if_mtu_set), DEVMETHOD(ifdi_crcstrip_set, ixgbe_if_crcstrip_set), DEVMETHOD(ifdi_media_status, ixgbe_if_media_status), DEVMETHOD(ifdi_media_change, ixgbe_if_media_change), DEVMETHOD(ifdi_promisc_set, ixgbe_if_promisc_set), DEVMETHOD(ifdi_timer, ixgbe_if_timer), DEVMETHOD(ifdi_vlan_register, ixgbe_if_vlan_register), DEVMETHOD(ifdi_vlan_unregister, ixgbe_if_vlan_unregister), DEVMETHOD(ifdi_get_counter, ixgbe_if_get_counter), DEVMETHOD(ifdi_i2c_req, ixgbe_if_i2c_req), DEVMETHOD(ifdi_needs_restart, ixgbe_if_needs_restart), #ifdef PCI_IOV DEVMETHOD(ifdi_iov_init, ixgbe_if_iov_init), DEVMETHOD(ifdi_iov_uninit, ixgbe_if_iov_uninit), DEVMETHOD(ifdi_iov_vf_add, ixgbe_if_iov_vf_add), #endif /* PCI_IOV */ DEVMETHOD_END }; /* * TUNEABLE PARAMETERS: */ static SYSCTL_NODE(_hw, OID_AUTO, ix, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "IXGBE driver parameters"); static driver_t ixgbe_if_driver = { "ixgbe_if", ixgbe_if_methods, sizeof(struct adapter) }; static int ixgbe_max_interrupt_rate = (4000000 / IXGBE_LOW_LATENCY); SYSCTL_INT(_hw_ix, OID_AUTO, max_interrupt_rate, CTLFLAG_RDTUN, &ixgbe_max_interrupt_rate, 0, "Maximum interrupts per second"); /* Flow control setting, default to full */ static int ixgbe_flow_control = ixgbe_fc_full; SYSCTL_INT(_hw_ix, OID_AUTO, flow_control, CTLFLAG_RDTUN, &ixgbe_flow_control, 0, "Default flow control used for all adapters"); /* Advertise Speed, default to 0 (auto) */ static int ixgbe_advertise_speed = 0; SYSCTL_INT(_hw_ix, OID_AUTO, advertise_speed, CTLFLAG_RDTUN, &ixgbe_advertise_speed, 0, "Default advertised speed for all adapters"); /* * Smart speed setting, default to on * this only works as a compile option * right now as its during attach, set * this to 'ixgbe_smart_speed_off' to * disable. */ static int ixgbe_smart_speed = ixgbe_smart_speed_on; /* * MSI-X should be the default for best performance, * but this allows it to be forced off for testing. */ static int ixgbe_enable_msix = 1; SYSCTL_INT(_hw_ix, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &ixgbe_enable_msix, 0, "Enable MSI-X interrupts"); /* * Defining this on will allow the use * of unsupported SFP+ modules, note that * doing so you are on your own :) */ static int allow_unsupported_sfp = FALSE; SYSCTL_INT(_hw_ix, OID_AUTO, unsupported_sfp, CTLFLAG_RDTUN, &allow_unsupported_sfp, 0, "Allow unsupported SFP modules...use at your own risk"); /* * Not sure if Flow Director is fully baked, * so we'll default to turning it off. */ static int ixgbe_enable_fdir = 0; SYSCTL_INT(_hw_ix, OID_AUTO, enable_fdir, CTLFLAG_RDTUN, &ixgbe_enable_fdir, 0, "Enable Flow Director"); /* Receive-Side Scaling */ static int ixgbe_enable_rss = 1; SYSCTL_INT(_hw_ix, OID_AUTO, enable_rss, CTLFLAG_RDTUN, &ixgbe_enable_rss, 0, "Enable Receive-Side Scaling (RSS)"); #if 0 /* Keep running tab on them for sanity check */ static int ixgbe_total_ports; #endif MALLOC_DEFINE(M_IXGBE, "ix", "ix driver allocations"); /* * For Flow Director: this is the number of TX packets we sample * for the filter pool, this means every 20th packet will be probed. * * This feature can be disabled by setting this to 0. */ static int atr_sample_rate = 20; extern struct if_txrx ixgbe_txrx; static struct if_shared_ctx ixgbe_sctx_init = { .isc_magic = IFLIB_MAGIC, .isc_q_align = PAGE_SIZE,/* max(DBA_ALIGN, PAGE_SIZE) */ .isc_tx_maxsize = IXGBE_TSO_SIZE + sizeof(struct ether_vlan_header), .isc_tx_maxsegsize = PAGE_SIZE, .isc_tso_maxsize = IXGBE_TSO_SIZE + sizeof(struct ether_vlan_header), .isc_tso_maxsegsize = PAGE_SIZE, .isc_rx_maxsize = PAGE_SIZE*4, .isc_rx_nsegments = 1, .isc_rx_maxsegsize = PAGE_SIZE*4, .isc_nfl = 1, .isc_ntxqs = 1, .isc_nrxqs = 1, .isc_admin_intrcnt = 1, .isc_vendor_info = ixgbe_vendor_info_array, .isc_driver_version = ixgbe_driver_version, .isc_driver = &ixgbe_if_driver, .isc_flags = IFLIB_TSO_INIT_IP, .isc_nrxd_min = {MIN_RXD}, .isc_ntxd_min = {MIN_TXD}, .isc_nrxd_max = {MAX_RXD}, .isc_ntxd_max = {MAX_TXD}, .isc_nrxd_default = {DEFAULT_RXD}, .isc_ntxd_default = {DEFAULT_TXD}, }; if_shared_ctx_t ixgbe_sctx = &ixgbe_sctx_init; /************************************************************************ * ixgbe_if_tx_queues_alloc ************************************************************************/ static int ixgbe_if_tx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs, int ntxqs, int ntxqsets) { struct adapter *adapter = iflib_get_softc(ctx); if_softc_ctx_t scctx = adapter->shared; struct ix_tx_queue *que; int i, j, error; MPASS(adapter->num_tx_queues > 0); MPASS(adapter->num_tx_queues == ntxqsets); MPASS(ntxqs == 1); /* Allocate queue structure memory */ adapter->tx_queues = (struct ix_tx_queue *)malloc(sizeof(struct ix_tx_queue) * ntxqsets, M_IXGBE, M_NOWAIT | M_ZERO); if (!adapter->tx_queues) { device_printf(iflib_get_dev(ctx), "Unable to allocate TX ring memory\n"); return (ENOMEM); } for (i = 0, que = adapter->tx_queues; i < ntxqsets; i++, que++) { struct tx_ring *txr = &que->txr; /* In case SR-IOV is enabled, align the index properly */ txr->me = ixgbe_vf_que_index(adapter->iov_mode, adapter->pool, i); txr->adapter = que->adapter = adapter; /* Allocate report status array */ txr->tx_rsq = (qidx_t *)malloc(sizeof(qidx_t) * scctx->isc_ntxd[0], M_IXGBE, M_NOWAIT | M_ZERO); if (txr->tx_rsq == NULL) { error = ENOMEM; goto fail; } for (j = 0; j < scctx->isc_ntxd[0]; j++) txr->tx_rsq[j] = QIDX_INVALID; /* get the virtual and physical address of the hardware queues */ txr->tail = IXGBE_TDT(txr->me); txr->tx_base = (union ixgbe_adv_tx_desc *)vaddrs[i]; txr->tx_paddr = paddrs[i]; txr->bytes = 0; txr->total_packets = 0; /* Set the rate at which we sample packets */ if (adapter->feat_en & IXGBE_FEATURE_FDIR) txr->atr_sample = atr_sample_rate; } device_printf(iflib_get_dev(ctx), "allocated for %d queues\n", adapter->num_tx_queues); return (0); fail: ixgbe_if_queues_free(ctx); return (error); } /* ixgbe_if_tx_queues_alloc */ /************************************************************************ * ixgbe_if_rx_queues_alloc ************************************************************************/ static int ixgbe_if_rx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs, int nrxqs, int nrxqsets) { struct adapter *adapter = iflib_get_softc(ctx); struct ix_rx_queue *que; int i; MPASS(adapter->num_rx_queues > 0); MPASS(adapter->num_rx_queues == nrxqsets); MPASS(nrxqs == 1); /* Allocate queue structure memory */ adapter->rx_queues = (struct ix_rx_queue *)malloc(sizeof(struct ix_rx_queue)*nrxqsets, M_IXGBE, M_NOWAIT | M_ZERO); if (!adapter->rx_queues) { device_printf(iflib_get_dev(ctx), "Unable to allocate TX ring memory\n"); return (ENOMEM); } for (i = 0, que = adapter->rx_queues; i < nrxqsets; i++, que++) { struct rx_ring *rxr = &que->rxr; /* In case SR-IOV is enabled, align the index properly */ rxr->me = ixgbe_vf_que_index(adapter->iov_mode, adapter->pool, i); rxr->adapter = que->adapter = adapter; /* get the virtual and physical address of the hw queues */ rxr->tail = IXGBE_RDT(rxr->me); rxr->rx_base = (union ixgbe_adv_rx_desc *)vaddrs[i]; rxr->rx_paddr = paddrs[i]; rxr->bytes = 0; rxr->que = que; } device_printf(iflib_get_dev(ctx), "allocated for %d rx queues\n", adapter->num_rx_queues); return (0); } /* ixgbe_if_rx_queues_alloc */ /************************************************************************ * ixgbe_if_queues_free ************************************************************************/ static void ixgbe_if_queues_free(if_ctx_t ctx) { struct adapter *adapter = iflib_get_softc(ctx); struct ix_tx_queue *tx_que = adapter->tx_queues; struct ix_rx_queue *rx_que = adapter->rx_queues; int i; if (tx_que != NULL) { for (i = 0; i < adapter->num_tx_queues; i++, tx_que++) { struct tx_ring *txr = &tx_que->txr; if (txr->tx_rsq == NULL) break; free(txr->tx_rsq, M_IXGBE); txr->tx_rsq = NULL; } free(adapter->tx_queues, M_IXGBE); adapter->tx_queues = NULL; } if (rx_que != NULL) { free(adapter->rx_queues, M_IXGBE); adapter->rx_queues = NULL; } } /* ixgbe_if_queues_free */ /************************************************************************ * ixgbe_initialize_rss_mapping ************************************************************************/ static void ixgbe_initialize_rss_mapping(struct adapter *adapter) { struct ixgbe_hw *hw = &adapter->hw; u32 reta = 0, mrqc, rss_key[10]; int queue_id, table_size, index_mult; int i, j; u32 rss_hash_config; if (adapter->feat_en & IXGBE_FEATURE_RSS) { /* Fetch the configured RSS key */ rss_getkey((uint8_t *)&rss_key); } else { /* set up random bits */ arc4rand(&rss_key, sizeof(rss_key), 0); } /* Set multiplier for RETA setup and table size based on MAC */ index_mult = 0x1; table_size = 128; switch (adapter->hw.mac.type) { case ixgbe_mac_82598EB: index_mult = 0x11; break; case ixgbe_mac_X550: case ixgbe_mac_X550EM_x: case ixgbe_mac_X550EM_a: table_size = 512; break; default: break; } /* Set up the redirection table */ for (i = 0, j = 0; i < table_size; i++, j++) { if (j == adapter->num_rx_queues) j = 0; if (adapter->feat_en & IXGBE_FEATURE_RSS) { /* * Fetch the RSS bucket id for the given indirection * entry. Cap it at the number of configured buckets * (which is num_rx_queues.) */ queue_id = rss_get_indirection_to_bucket(i); queue_id = queue_id % adapter->num_rx_queues; } else queue_id = (j * index_mult); /* * The low 8 bits are for hash value (n+0); * The next 8 bits are for hash value (n+1), etc. */ reta = reta >> 8; reta = reta | (((uint32_t)queue_id) << 24); if ((i & 3) == 3) { if (i < 128) IXGBE_WRITE_REG(hw, IXGBE_RETA(i >> 2), reta); else IXGBE_WRITE_REG(hw, IXGBE_ERETA((i >> 2) - 32), reta); reta = 0; } } /* Now fill our hash function seeds */ for (i = 0; i < 10; i++) IXGBE_WRITE_REG(hw, IXGBE_RSSRK(i), rss_key[i]); /* Perform hash on these packet types */ if (adapter->feat_en & IXGBE_FEATURE_RSS) rss_hash_config = rss_gethashconfig(); else { /* * Disable UDP - IP fragments aren't currently being handled * and so we end up with a mix of 2-tuple and 4-tuple * traffic. */ rss_hash_config = RSS_HASHTYPE_RSS_IPV4 | RSS_HASHTYPE_RSS_TCP_IPV4 | RSS_HASHTYPE_RSS_IPV6 | RSS_HASHTYPE_RSS_TCP_IPV6 | RSS_HASHTYPE_RSS_IPV6_EX | RSS_HASHTYPE_RSS_TCP_IPV6_EX; } mrqc = IXGBE_MRQC_RSSEN; if (rss_hash_config & RSS_HASHTYPE_RSS_IPV4) mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4; if (rss_hash_config & RSS_HASHTYPE_RSS_TCP_IPV4) mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4_TCP; if (rss_hash_config & RSS_HASHTYPE_RSS_IPV6) mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6; if (rss_hash_config & RSS_HASHTYPE_RSS_TCP_IPV6) mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_TCP; if (rss_hash_config & RSS_HASHTYPE_RSS_IPV6_EX) mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX; if (rss_hash_config & RSS_HASHTYPE_RSS_TCP_IPV6_EX) mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP; if (rss_hash_config & RSS_HASHTYPE_RSS_UDP_IPV4) mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4_UDP; if (rss_hash_config & RSS_HASHTYPE_RSS_UDP_IPV6) mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_UDP; if (rss_hash_config & RSS_HASHTYPE_RSS_UDP_IPV6_EX) mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX_UDP; mrqc |= ixgbe_get_mrqc(adapter->iov_mode); IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc); } /* ixgbe_initialize_rss_mapping */ /************************************************************************ * ixgbe_initialize_receive_units - Setup receive registers and features. ************************************************************************/ #define BSIZEPKT_ROUNDUP ((1<shared; struct ixgbe_hw *hw = &adapter->hw; struct ifnet *ifp = iflib_get_ifp(ctx); struct ix_rx_queue *que; int i, j; u32 bufsz, fctrl, srrctl, rxcsum; u32 hlreg; /* * Make sure receives are disabled while * setting up the descriptor ring */ ixgbe_disable_rx(hw); /* Enable broadcasts */ fctrl = IXGBE_READ_REG(hw, IXGBE_FCTRL); fctrl |= IXGBE_FCTRL_BAM; if (adapter->hw.mac.type == ixgbe_mac_82598EB) { fctrl |= IXGBE_FCTRL_DPF; fctrl |= IXGBE_FCTRL_PMCF; } IXGBE_WRITE_REG(hw, IXGBE_FCTRL, fctrl); /* Set for Jumbo Frames? */ hlreg = IXGBE_READ_REG(hw, IXGBE_HLREG0); if (ifp->if_mtu > ETHERMTU) hlreg |= IXGBE_HLREG0_JUMBOEN; else hlreg &= ~IXGBE_HLREG0_JUMBOEN; IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg); bufsz = (adapter->rx_mbuf_sz + BSIZEPKT_ROUNDUP) >> IXGBE_SRRCTL_BSIZEPKT_SHIFT; /* Setup the Base and Length of the Rx Descriptor Ring */ for (i = 0, que = adapter->rx_queues; i < adapter->num_rx_queues; i++, que++) { struct rx_ring *rxr = &que->rxr; u64 rdba = rxr->rx_paddr; j = rxr->me; /* Setup the Base and Length of the Rx Descriptor Ring */ IXGBE_WRITE_REG(hw, IXGBE_RDBAL(j), (rdba & 0x00000000ffffffffULL)); IXGBE_WRITE_REG(hw, IXGBE_RDBAH(j), (rdba >> 32)); IXGBE_WRITE_REG(hw, IXGBE_RDLEN(j), scctx->isc_nrxd[0] * sizeof(union ixgbe_adv_rx_desc)); /* Set up the SRRCTL register */ srrctl = IXGBE_READ_REG(hw, IXGBE_SRRCTL(j)); srrctl &= ~IXGBE_SRRCTL_BSIZEHDR_MASK; srrctl &= ~IXGBE_SRRCTL_BSIZEPKT_MASK; srrctl |= bufsz; srrctl |= IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF; /* * Set DROP_EN iff we have no flow control and >1 queue. * Note that srrctl was cleared shortly before during reset, * so we do not need to clear the bit, but do it just in case * this code is moved elsewhere. */ if (adapter->num_rx_queues > 1 && adapter->hw.fc.requested_mode == ixgbe_fc_none) { srrctl |= IXGBE_SRRCTL_DROP_EN; } else { srrctl &= ~IXGBE_SRRCTL_DROP_EN; } IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(j), srrctl); /* Setup the HW Rx Head and Tail Descriptor Pointers */ IXGBE_WRITE_REG(hw, IXGBE_RDH(j), 0); IXGBE_WRITE_REG(hw, IXGBE_RDT(j), 0); /* Set the driver rx tail address */ rxr->tail = IXGBE_RDT(rxr->me); } if (adapter->hw.mac.type != ixgbe_mac_82598EB) { u32 psrtype = IXGBE_PSRTYPE_TCPHDR | IXGBE_PSRTYPE_UDPHDR | IXGBE_PSRTYPE_IPV4HDR | IXGBE_PSRTYPE_IPV6HDR; IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0), psrtype); } rxcsum = IXGBE_READ_REG(hw, IXGBE_RXCSUM); ixgbe_initialize_rss_mapping(adapter); if (adapter->num_rx_queues > 1) { /* RSS and RX IPP Checksum are mutually exclusive */ rxcsum |= IXGBE_RXCSUM_PCSD; } if (ifp->if_capenable & IFCAP_RXCSUM) rxcsum |= IXGBE_RXCSUM_PCSD; /* This is useful for calculating UDP/IP fragment checksums */ if (!(rxcsum & IXGBE_RXCSUM_PCSD)) rxcsum |= IXGBE_RXCSUM_IPPCSE; IXGBE_WRITE_REG(hw, IXGBE_RXCSUM, rxcsum); } /* ixgbe_initialize_receive_units */ /************************************************************************ * ixgbe_initialize_transmit_units - Enable transmit units. ************************************************************************/ static void ixgbe_initialize_transmit_units(if_ctx_t ctx) { struct adapter *adapter = iflib_get_softc(ctx); struct ixgbe_hw *hw = &adapter->hw; if_softc_ctx_t scctx = adapter->shared; struct ix_tx_queue *que; int i; /* Setup the Base and Length of the Tx Descriptor Ring */ for (i = 0, que = adapter->tx_queues; i < adapter->num_tx_queues; i++, que++) { struct tx_ring *txr = &que->txr; u64 tdba = txr->tx_paddr; u32 txctrl = 0; int j = txr->me; IXGBE_WRITE_REG(hw, IXGBE_TDBAL(j), (tdba & 0x00000000ffffffffULL)); IXGBE_WRITE_REG(hw, IXGBE_TDBAH(j), (tdba >> 32)); IXGBE_WRITE_REG(hw, IXGBE_TDLEN(j), scctx->isc_ntxd[0] * sizeof(union ixgbe_adv_tx_desc)); /* Setup the HW Tx Head and Tail descriptor pointers */ IXGBE_WRITE_REG(hw, IXGBE_TDH(j), 0); IXGBE_WRITE_REG(hw, IXGBE_TDT(j), 0); /* Cache the tail address */ txr->tail = IXGBE_TDT(txr->me); txr->tx_rs_cidx = txr->tx_rs_pidx; txr->tx_cidx_processed = scctx->isc_ntxd[0] - 1; for (int k = 0; k < scctx->isc_ntxd[0]; k++) txr->tx_rsq[k] = QIDX_INVALID; /* Disable Head Writeback */ /* * Note: for X550 series devices, these registers are actually * prefixed with TPH_ isntead of DCA_, but the addresses and * fields remain the same. */ switch (hw->mac.type) { case ixgbe_mac_82598EB: txctrl = IXGBE_READ_REG(hw, IXGBE_DCA_TXCTRL(j)); break; default: txctrl = IXGBE_READ_REG(hw, IXGBE_DCA_TXCTRL_82599(j)); break; } txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN; switch (hw->mac.type) { case ixgbe_mac_82598EB: IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL(j), txctrl); break; default: IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL_82599(j), txctrl); break; } } if (hw->mac.type != ixgbe_mac_82598EB) { u32 dmatxctl, rttdcs; dmatxctl = IXGBE_READ_REG(hw, IXGBE_DMATXCTL); dmatxctl |= IXGBE_DMATXCTL_TE; IXGBE_WRITE_REG(hw, IXGBE_DMATXCTL, dmatxctl); /* Disable arbiter to set MTQC */ rttdcs = IXGBE_READ_REG(hw, IXGBE_RTTDCS); rttdcs |= IXGBE_RTTDCS_ARBDIS; IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs); IXGBE_WRITE_REG(hw, IXGBE_MTQC, ixgbe_get_mtqc(adapter->iov_mode)); rttdcs &= ~IXGBE_RTTDCS_ARBDIS; IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs); } } /* ixgbe_initialize_transmit_units */ /************************************************************************ * ixgbe_register ************************************************************************/ static void * ixgbe_register(device_t dev) { return (ixgbe_sctx); } /* ixgbe_register */ /************************************************************************ * ixgbe_if_attach_pre - Device initialization routine, part 1 * * Called when the driver is being loaded. * Identifies the type of hardware, initializes the hardware, * and initializes iflib structures. * * return 0 on success, positive on failure ************************************************************************/ static int ixgbe_if_attach_pre(if_ctx_t ctx) { struct adapter *adapter; device_t dev; if_softc_ctx_t scctx; struct ixgbe_hw *hw; int error = 0; u32 ctrl_ext; INIT_DEBUGOUT("ixgbe_attach: begin"); /* Allocate, clear, and link in our adapter structure */ dev = iflib_get_dev(ctx); adapter = iflib_get_softc(ctx); adapter->hw.back = adapter; adapter->ctx = ctx; adapter->dev = dev; scctx = adapter->shared = iflib_get_softc_ctx(ctx); adapter->media = iflib_get_media(ctx); hw = &adapter->hw; /* Determine hardware revision */ hw->vendor_id = pci_get_vendor(dev); hw->device_id = pci_get_device(dev); hw->revision_id = pci_get_revid(dev); hw->subsystem_vendor_id = pci_get_subvendor(dev); hw->subsystem_device_id = pci_get_subdevice(dev); /* Do base PCI setup - map BAR0 */ if (ixgbe_allocate_pci_resources(ctx)) { device_printf(dev, "Allocation of PCI resources failed\n"); return (ENXIO); } /* let hardware know driver is loaded */ ctrl_ext = IXGBE_READ_REG(hw, IXGBE_CTRL_EXT); ctrl_ext |= IXGBE_CTRL_EXT_DRV_LOAD; IXGBE_WRITE_REG(hw, IXGBE_CTRL_EXT, ctrl_ext); /* * Initialize the shared code */ if (ixgbe_init_shared_code(hw) != 0) { device_printf(dev, "Unable to initialize the shared code\n"); error = ENXIO; goto err_pci; } if (hw->mbx.ops.init_params) hw->mbx.ops.init_params(hw); hw->allow_unsupported_sfp = allow_unsupported_sfp; if (hw->mac.type != ixgbe_mac_82598EB) hw->phy.smart_speed = ixgbe_smart_speed; ixgbe_init_device_features(adapter); /* Enable WoL (if supported) */ ixgbe_check_wol_support(adapter); /* Verify adapter fan is still functional (if applicable) */ if (adapter->feat_en & IXGBE_FEATURE_FAN_FAIL) { u32 esdp = IXGBE_READ_REG(hw, IXGBE_ESDP); ixgbe_check_fan_failure(adapter, esdp, FALSE); } /* Ensure SW/FW semaphore is free */ ixgbe_init_swfw_semaphore(hw); /* Set an initial default flow control value */ hw->fc.requested_mode = ixgbe_flow_control; hw->phy.reset_if_overtemp = TRUE; error = ixgbe_reset_hw(hw); hw->phy.reset_if_overtemp = FALSE; if (error == IXGBE_ERR_SFP_NOT_PRESENT) { /* * No optics in this port, set up * so the timer routine will probe * for later insertion. */ adapter->sfp_probe = TRUE; error = 0; } else if (error == IXGBE_ERR_SFP_NOT_SUPPORTED) { device_printf(dev, "Unsupported SFP+ module detected!\n"); error = EIO; goto err_pci; } else if (error) { device_printf(dev, "Hardware initialization failed\n"); error = EIO; goto err_pci; } /* Make sure we have a good EEPROM before we read from it */ if (ixgbe_validate_eeprom_checksum(&adapter->hw, NULL) < 0) { device_printf(dev, "The EEPROM Checksum Is Not Valid\n"); error = EIO; goto err_pci; } error = ixgbe_start_hw(hw); switch (error) { case IXGBE_ERR_EEPROM_VERSION: device_printf(dev, "This device is a pre-production adapter/LOM. Please be aware there may be issues associated with your hardware.\nIf you are experiencing problems please contact your Intel or hardware representative who provided you with this hardware.\n"); break; case IXGBE_ERR_SFP_NOT_SUPPORTED: device_printf(dev, "Unsupported SFP+ Module\n"); error = EIO; goto err_pci; case IXGBE_ERR_SFP_NOT_PRESENT: device_printf(dev, "No SFP+ Module found\n"); /* falls thru */ default: break; } /* Most of the iflib initialization... */ iflib_set_mac(ctx, hw->mac.addr); switch (adapter->hw.mac.type) { case ixgbe_mac_X550: case ixgbe_mac_X550EM_x: case ixgbe_mac_X550EM_a: scctx->isc_rss_table_size = 512; scctx->isc_ntxqsets_max = scctx->isc_nrxqsets_max = 64; break; default: scctx->isc_rss_table_size = 128; scctx->isc_ntxqsets_max = scctx->isc_nrxqsets_max = 16; } /* Allow legacy interrupts */ ixgbe_txrx.ift_legacy_intr = ixgbe_intr; scctx->isc_txqsizes[0] = roundup2(scctx->isc_ntxd[0] * sizeof(union ixgbe_adv_tx_desc) + sizeof(u32), DBA_ALIGN), scctx->isc_rxqsizes[0] = roundup2(scctx->isc_nrxd[0] * sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN); /* XXX */ scctx->isc_tx_csum_flags = CSUM_IP | CSUM_TCP | CSUM_UDP | CSUM_TSO | CSUM_IP6_TCP | CSUM_IP6_UDP | CSUM_IP6_TSO; if (adapter->hw.mac.type == ixgbe_mac_82598EB) { scctx->isc_tx_nsegments = IXGBE_82598_SCATTER; } else { scctx->isc_tx_csum_flags |= CSUM_SCTP |CSUM_IP6_SCTP; scctx->isc_tx_nsegments = IXGBE_82599_SCATTER; } scctx->isc_msix_bar = pci_msix_table_bar(dev); scctx->isc_tx_tso_segments_max = scctx->isc_tx_nsegments; scctx->isc_tx_tso_size_max = IXGBE_TSO_SIZE; scctx->isc_tx_tso_segsize_max = PAGE_SIZE; scctx->isc_txrx = &ixgbe_txrx; scctx->isc_capabilities = scctx->isc_capenable = IXGBE_CAPS; return (0); err_pci: ctrl_ext = IXGBE_READ_REG(&adapter->hw, IXGBE_CTRL_EXT); ctrl_ext &= ~IXGBE_CTRL_EXT_DRV_LOAD; IXGBE_WRITE_REG(&adapter->hw, IXGBE_CTRL_EXT, ctrl_ext); ixgbe_free_pci_resources(ctx); return (error); } /* ixgbe_if_attach_pre */ /********************************************************************* * ixgbe_if_attach_post - Device initialization routine, part 2 * * Called during driver load, but after interrupts and * resources have been allocated and configured. * Sets up some data structures not relevant to iflib. * * return 0 on success, positive on failure *********************************************************************/ static int ixgbe_if_attach_post(if_ctx_t ctx) { device_t dev; struct adapter *adapter; struct ixgbe_hw *hw; int error = 0; dev = iflib_get_dev(ctx); adapter = iflib_get_softc(ctx); hw = &adapter->hw; if (adapter->intr_type == IFLIB_INTR_LEGACY && (adapter->feat_cap & IXGBE_FEATURE_LEGACY_IRQ) == 0) { device_printf(dev, "Device does not support legacy interrupts"); error = ENXIO; goto err; } /* Allocate multicast array memory. */ adapter->mta = malloc(sizeof(*adapter->mta) * MAX_NUM_MULTICAST_ADDRESSES, M_IXGBE, M_NOWAIT); if (adapter->mta == NULL) { device_printf(dev, "Can not allocate multicast setup array\n"); error = ENOMEM; goto err; } /* hw.ix defaults init */ ixgbe_set_advertise(adapter, ixgbe_advertise_speed); /* Enable the optics for 82599 SFP+ fiber */ ixgbe_enable_tx_laser(hw); /* Enable power to the phy. */ ixgbe_set_phy_power(hw, TRUE); ixgbe_initialize_iov(adapter); error = ixgbe_setup_interface(ctx); if (error) { device_printf(dev, "Interface setup failed: %d\n", error); goto err; } ixgbe_if_update_admin_status(ctx); /* Initialize statistics */ ixgbe_update_stats_counters(adapter); ixgbe_add_hw_stats(adapter); /* Check PCIE slot type/speed/width */ ixgbe_get_slot_info(adapter); /* * Do time init and sysctl init here, but * only on the first port of a bypass adapter. */ ixgbe_bypass_init(adapter); /* Set an initial dmac value */ adapter->dmac = 0; /* Set initial advertised speeds (if applicable) */ adapter->advertise = ixgbe_get_advertise(adapter); if (adapter->feat_cap & IXGBE_FEATURE_SRIOV) ixgbe_define_iov_schemas(dev, &error); /* Add sysctls */ ixgbe_add_device_sysctls(ctx); return (0); err: return (error); } /* ixgbe_if_attach_post */ /************************************************************************ * ixgbe_check_wol_support * * Checks whether the adapter's ports are capable of * Wake On LAN by reading the adapter's NVM. * * Sets each port's hw->wol_enabled value depending * on the value read here. ************************************************************************/ static void ixgbe_check_wol_support(struct adapter *adapter) { struct ixgbe_hw *hw = &adapter->hw; u16 dev_caps = 0; /* Find out WoL support for port */ adapter->wol_support = hw->wol_enabled = 0; ixgbe_get_device_caps(hw, &dev_caps); if ((dev_caps & IXGBE_DEVICE_CAPS_WOL_PORT0_1) || ((dev_caps & IXGBE_DEVICE_CAPS_WOL_PORT0) && hw->bus.func == 0)) adapter->wol_support = hw->wol_enabled = 1; /* Save initial wake up filter configuration */ adapter->wufc = IXGBE_READ_REG(hw, IXGBE_WUFC); return; } /* ixgbe_check_wol_support */ /************************************************************************ * ixgbe_setup_interface * * Setup networking device structure and register an interface. ************************************************************************/ static int ixgbe_setup_interface(if_ctx_t ctx) { struct ifnet *ifp = iflib_get_ifp(ctx); struct adapter *adapter = iflib_get_softc(ctx); INIT_DEBUGOUT("ixgbe_setup_interface: begin"); if_setbaudrate(ifp, IF_Gbps(10)); adapter->max_frame_size = ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN; adapter->phy_layer = ixgbe_get_supported_physical_layer(&adapter->hw); ixgbe_add_media_types(ctx); /* Autoselect media by default */ ifmedia_set(adapter->media, IFM_ETHER | IFM_AUTO); return (0); } /* ixgbe_setup_interface */ /************************************************************************ * ixgbe_if_get_counter ************************************************************************/ static uint64_t ixgbe_if_get_counter(if_ctx_t ctx, ift_counter cnt) { struct adapter *adapter = iflib_get_softc(ctx); if_t ifp = iflib_get_ifp(ctx); switch (cnt) { case IFCOUNTER_IPACKETS: return (adapter->ipackets); case IFCOUNTER_OPACKETS: return (adapter->opackets); case IFCOUNTER_IBYTES: return (adapter->ibytes); case IFCOUNTER_OBYTES: return (adapter->obytes); case IFCOUNTER_IMCASTS: return (adapter->imcasts); case IFCOUNTER_OMCASTS: return (adapter->omcasts); case IFCOUNTER_COLLISIONS: return (0); case IFCOUNTER_IQDROPS: return (adapter->iqdrops); case IFCOUNTER_OQDROPS: return (0); case IFCOUNTER_IERRORS: return (adapter->ierrors); default: return (if_get_counter_default(ifp, cnt)); } } /* ixgbe_if_get_counter */ /************************************************************************ * ixgbe_if_i2c_req ************************************************************************/ static int ixgbe_if_i2c_req(if_ctx_t ctx, struct ifi2creq *req) { struct adapter *adapter = iflib_get_softc(ctx); struct ixgbe_hw *hw = &adapter->hw; int i; if (hw->phy.ops.read_i2c_byte == NULL) return (ENXIO); for (i = 0; i < req->len; i++) hw->phy.ops.read_i2c_byte(hw, req->offset + i, req->dev_addr, &req->data[i]); return (0); } /* ixgbe_if_i2c_req */ /* ixgbe_if_needs_restart - Tell iflib when the driver needs to be reinitialized * @ctx: iflib context * @event: event code to check * * Defaults to returning true for unknown events. * * @returns true if iflib needs to reinit the interface */ static bool ixgbe_if_needs_restart(if_ctx_t ctx __unused, enum iflib_restart_event event) { switch (event) { case IFLIB_RESTART_VLAN_CONFIG: return (false); default: return (true); } } /************************************************************************ * ixgbe_add_media_types ************************************************************************/ static void ixgbe_add_media_types(if_ctx_t ctx) { struct adapter *adapter = iflib_get_softc(ctx); struct ixgbe_hw *hw = &adapter->hw; device_t dev = iflib_get_dev(ctx); u64 layer; layer = adapter->phy_layer = ixgbe_get_supported_physical_layer(hw); /* Media types with matching FreeBSD media defines */ if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_T) ifmedia_add(adapter->media, IFM_ETHER | IFM_10G_T, 0, NULL); if (layer & IXGBE_PHYSICAL_LAYER_1000BASE_T) ifmedia_add(adapter->media, IFM_ETHER | IFM_1000_T, 0, NULL); if (layer & IXGBE_PHYSICAL_LAYER_100BASE_TX) ifmedia_add(adapter->media, IFM_ETHER | IFM_100_TX, 0, NULL); if (layer & IXGBE_PHYSICAL_LAYER_10BASE_T) ifmedia_add(adapter->media, IFM_ETHER | IFM_10_T, 0, NULL); if (layer & IXGBE_PHYSICAL_LAYER_SFP_PLUS_CU || layer & IXGBE_PHYSICAL_LAYER_SFP_ACTIVE_DA) ifmedia_add(adapter->media, IFM_ETHER | IFM_10G_TWINAX, 0, NULL); if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_LR) { ifmedia_add(adapter->media, IFM_ETHER | IFM_10G_LR, 0, NULL); if (hw->phy.multispeed_fiber) ifmedia_add(adapter->media, IFM_ETHER | IFM_1000_LX, 0, NULL); } if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_SR) { ifmedia_add(adapter->media, IFM_ETHER | IFM_10G_SR, 0, NULL); if (hw->phy.multispeed_fiber) ifmedia_add(adapter->media, IFM_ETHER | IFM_1000_SX, 0, NULL); } else if (layer & IXGBE_PHYSICAL_LAYER_1000BASE_SX) ifmedia_add(adapter->media, IFM_ETHER | IFM_1000_SX, 0, NULL); if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_CX4) ifmedia_add(adapter->media, IFM_ETHER | IFM_10G_CX4, 0, NULL); #ifdef IFM_ETH_XTYPE if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_KR) ifmedia_add(adapter->media, IFM_ETHER | IFM_10G_KR, 0, NULL); if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_KX4) ifmedia_add( adapter->media, IFM_ETHER | IFM_10G_KX4, 0, NULL); if (layer & IXGBE_PHYSICAL_LAYER_1000BASE_KX) ifmedia_add(adapter->media, IFM_ETHER | IFM_1000_KX, 0, NULL); if (layer & IXGBE_PHYSICAL_LAYER_2500BASE_KX) ifmedia_add(adapter->media, IFM_ETHER | IFM_2500_KX, 0, NULL); #else if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_KR) { device_printf(dev, "Media supported: 10GbaseKR\n"); device_printf(dev, "10GbaseKR mapped to 10GbaseSR\n"); ifmedia_add(adapter->media, IFM_ETHER | IFM_10G_SR, 0, NULL); } if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_KX4) { device_printf(dev, "Media supported: 10GbaseKX4\n"); device_printf(dev, "10GbaseKX4 mapped to 10GbaseCX4\n"); ifmedia_add(adapter->media, IFM_ETHER | IFM_10G_CX4, 0, NULL); } if (layer & IXGBE_PHYSICAL_LAYER_1000BASE_KX) { device_printf(dev, "Media supported: 1000baseKX\n"); device_printf(dev, "1000baseKX mapped to 1000baseCX\n"); ifmedia_add(adapter->media, IFM_ETHER | IFM_1000_CX, 0, NULL); } if (layer & IXGBE_PHYSICAL_LAYER_2500BASE_KX) { device_printf(dev, "Media supported: 2500baseKX\n"); device_printf(dev, "2500baseKX mapped to 2500baseSX\n"); ifmedia_add(adapter->media, IFM_ETHER | IFM_2500_SX, 0, NULL); } #endif if (layer & IXGBE_PHYSICAL_LAYER_1000BASE_BX) device_printf(dev, "Media supported: 1000baseBX\n"); if (hw->device_id == IXGBE_DEV_ID_82598AT) { ifmedia_add(adapter->media, IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL); ifmedia_add(adapter->media, IFM_ETHER | IFM_1000_T, 0, NULL); } ifmedia_add(adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL); } /* ixgbe_add_media_types */ /************************************************************************ * ixgbe_is_sfp ************************************************************************/ static inline bool ixgbe_is_sfp(struct ixgbe_hw *hw) { switch (hw->mac.type) { case ixgbe_mac_82598EB: if (hw->phy.type == ixgbe_phy_nl) return (TRUE); return (FALSE); case ixgbe_mac_82599EB: switch (hw->mac.ops.get_media_type(hw)) { case ixgbe_media_type_fiber: case ixgbe_media_type_fiber_qsfp: return (TRUE); default: return (FALSE); } case ixgbe_mac_X550EM_x: case ixgbe_mac_X550EM_a: if (hw->mac.ops.get_media_type(hw) == ixgbe_media_type_fiber) return (TRUE); return (FALSE); default: return (FALSE); } } /* ixgbe_is_sfp */ /************************************************************************ * ixgbe_config_link ************************************************************************/ static void ixgbe_config_link(if_ctx_t ctx) { struct adapter *adapter = iflib_get_softc(ctx); struct ixgbe_hw *hw = &adapter->hw; u32 autoneg, err = 0; bool sfp, negotiate; sfp = ixgbe_is_sfp(hw); if (sfp) { adapter->task_requests |= IXGBE_REQUEST_TASK_MOD; iflib_admin_intr_deferred(ctx); } else { if (hw->mac.ops.check_link) err = ixgbe_check_link(hw, &adapter->link_speed, &adapter->link_up, FALSE); if (err) return; autoneg = hw->phy.autoneg_advertised; if ((!autoneg) && (hw->mac.ops.get_link_capabilities)) err = hw->mac.ops.get_link_capabilities(hw, &autoneg, &negotiate); if (err) return; if (hw->mac.ops.setup_link) err = hw->mac.ops.setup_link(hw, autoneg, adapter->link_up); } } /* ixgbe_config_link */ /************************************************************************ * ixgbe_update_stats_counters - Update board statistics counters. ************************************************************************/ static void ixgbe_update_stats_counters(struct adapter *adapter) { struct ixgbe_hw *hw = &adapter->hw; struct ixgbe_hw_stats *stats = &adapter->stats.pf; u32 missed_rx = 0, bprc, lxon, lxoff, total; u32 lxoffrxc; u64 total_missed_rx = 0; stats->crcerrs += IXGBE_READ_REG(hw, IXGBE_CRCERRS); stats->illerrc += IXGBE_READ_REG(hw, IXGBE_ILLERRC); stats->errbc += IXGBE_READ_REG(hw, IXGBE_ERRBC); stats->mspdc += IXGBE_READ_REG(hw, IXGBE_MSPDC); stats->mpc[0] += IXGBE_READ_REG(hw, IXGBE_MPC(0)); for (int i = 0; i < 16; i++) { stats->qprc[i] += IXGBE_READ_REG(hw, IXGBE_QPRC(i)); stats->qptc[i] += IXGBE_READ_REG(hw, IXGBE_QPTC(i)); stats->qprdc[i] += IXGBE_READ_REG(hw, IXGBE_QPRDC(i)); } stats->mlfc += IXGBE_READ_REG(hw, IXGBE_MLFC); stats->mrfc += IXGBE_READ_REG(hw, IXGBE_MRFC); stats->rlec += IXGBE_READ_REG(hw, IXGBE_RLEC); /* Hardware workaround, gprc counts missed packets */ stats->gprc += IXGBE_READ_REG(hw, IXGBE_GPRC); stats->gprc -= missed_rx; if (hw->mac.type != ixgbe_mac_82598EB) { stats->gorc += IXGBE_READ_REG(hw, IXGBE_GORCL) + ((u64)IXGBE_READ_REG(hw, IXGBE_GORCH) << 32); stats->gotc += IXGBE_READ_REG(hw, IXGBE_GOTCL) + ((u64)IXGBE_READ_REG(hw, IXGBE_GOTCH) << 32); stats->tor += IXGBE_READ_REG(hw, IXGBE_TORL) + ((u64)IXGBE_READ_REG(hw, IXGBE_TORH) << 32); stats->lxonrxc += IXGBE_READ_REG(hw, IXGBE_LXONRXCNT); lxoffrxc = IXGBE_READ_REG(hw, IXGBE_LXOFFRXCNT); stats->lxoffrxc += lxoffrxc; } else { stats->lxonrxc += IXGBE_READ_REG(hw, IXGBE_LXONRXC); lxoffrxc = IXGBE_READ_REG(hw, IXGBE_LXOFFRXC); stats->lxoffrxc += lxoffrxc; /* 82598 only has a counter in the high register */ stats->gorc += IXGBE_READ_REG(hw, IXGBE_GORCH); stats->gotc += IXGBE_READ_REG(hw, IXGBE_GOTCH); stats->tor += IXGBE_READ_REG(hw, IXGBE_TORH); } /* * For watchdog management we need to know if we have been paused * during the last interval, so capture that here. */ if (lxoffrxc) adapter->shared->isc_pause_frames = 1; /* * Workaround: mprc hardware is incorrectly counting * broadcasts, so for now we subtract those. */ bprc = IXGBE_READ_REG(hw, IXGBE_BPRC); stats->bprc += bprc; stats->mprc += IXGBE_READ_REG(hw, IXGBE_MPRC); if (hw->mac.type == ixgbe_mac_82598EB) stats->mprc -= bprc; stats->prc64 += IXGBE_READ_REG(hw, IXGBE_PRC64); stats->prc127 += IXGBE_READ_REG(hw, IXGBE_PRC127); stats->prc255 += IXGBE_READ_REG(hw, IXGBE_PRC255); stats->prc511 += IXGBE_READ_REG(hw, IXGBE_PRC511); stats->prc1023 += IXGBE_READ_REG(hw, IXGBE_PRC1023); stats->prc1522 += IXGBE_READ_REG(hw, IXGBE_PRC1522); lxon = IXGBE_READ_REG(hw, IXGBE_LXONTXC); stats->lxontxc += lxon; lxoff = IXGBE_READ_REG(hw, IXGBE_LXOFFTXC); stats->lxofftxc += lxoff; total = lxon + lxoff; stats->gptc += IXGBE_READ_REG(hw, IXGBE_GPTC); stats->mptc += IXGBE_READ_REG(hw, IXGBE_MPTC); stats->ptc64 += IXGBE_READ_REG(hw, IXGBE_PTC64); stats->gptc -= total; stats->mptc -= total; stats->ptc64 -= total; stats->gotc -= total * ETHER_MIN_LEN; stats->ruc += IXGBE_READ_REG(hw, IXGBE_RUC); stats->rfc += IXGBE_READ_REG(hw, IXGBE_RFC); stats->roc += IXGBE_READ_REG(hw, IXGBE_ROC); stats->rjc += IXGBE_READ_REG(hw, IXGBE_RJC); stats->mngprc += IXGBE_READ_REG(hw, IXGBE_MNGPRC); stats->mngpdc += IXGBE_READ_REG(hw, IXGBE_MNGPDC); stats->mngptc += IXGBE_READ_REG(hw, IXGBE_MNGPTC); stats->tpr += IXGBE_READ_REG(hw, IXGBE_TPR); stats->tpt += IXGBE_READ_REG(hw, IXGBE_TPT); stats->ptc127 += IXGBE_READ_REG(hw, IXGBE_PTC127); stats->ptc255 += IXGBE_READ_REG(hw, IXGBE_PTC255); stats->ptc511 += IXGBE_READ_REG(hw, IXGBE_PTC511); stats->ptc1023 += IXGBE_READ_REG(hw, IXGBE_PTC1023); stats->ptc1522 += IXGBE_READ_REG(hw, IXGBE_PTC1522); stats->bptc += IXGBE_READ_REG(hw, IXGBE_BPTC); stats->xec += IXGBE_READ_REG(hw, IXGBE_XEC); stats->fccrc += IXGBE_READ_REG(hw, IXGBE_FCCRC); stats->fclast += IXGBE_READ_REG(hw, IXGBE_FCLAST); /* Only read FCOE on 82599 */ if (hw->mac.type != ixgbe_mac_82598EB) { stats->fcoerpdc += IXGBE_READ_REG(hw, IXGBE_FCOERPDC); stats->fcoeprc += IXGBE_READ_REG(hw, IXGBE_FCOEPRC); stats->fcoeptc += IXGBE_READ_REG(hw, IXGBE_FCOEPTC); stats->fcoedwrc += IXGBE_READ_REG(hw, IXGBE_FCOEDWRC); stats->fcoedwtc += IXGBE_READ_REG(hw, IXGBE_FCOEDWTC); } /* Fill out the OS statistics structure */ IXGBE_SET_IPACKETS(adapter, stats->gprc); IXGBE_SET_OPACKETS(adapter, stats->gptc); IXGBE_SET_IBYTES(adapter, stats->gorc); IXGBE_SET_OBYTES(adapter, stats->gotc); IXGBE_SET_IMCASTS(adapter, stats->mprc); IXGBE_SET_OMCASTS(adapter, stats->mptc); IXGBE_SET_COLLISIONS(adapter, 0); IXGBE_SET_IQDROPS(adapter, total_missed_rx); IXGBE_SET_IERRORS(adapter, stats->crcerrs + stats->rlec); } /* ixgbe_update_stats_counters */ /************************************************************************ * ixgbe_add_hw_stats * * Add sysctl variables, one per statistic, to the system. ************************************************************************/ static void ixgbe_add_hw_stats(struct adapter *adapter) { device_t dev = iflib_get_dev(adapter->ctx); struct ix_rx_queue *rx_que; struct ix_tx_queue *tx_que; struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev); struct sysctl_oid *tree = device_get_sysctl_tree(dev); struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree); struct ixgbe_hw_stats *stats = &adapter->stats.pf; struct sysctl_oid *stat_node, *queue_node; struct sysctl_oid_list *stat_list, *queue_list; int i; #define QUEUE_NAME_LEN 32 char namebuf[QUEUE_NAME_LEN]; /* Driver Statistics */ SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped", CTLFLAG_RD, &adapter->dropped_pkts, "Driver dropped packets"); SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_events", CTLFLAG_RD, &adapter->watchdog_events, "Watchdog timeouts"); SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "link_irq", CTLFLAG_RD, &adapter->link_irq, "Link MSI-X IRQ Handled"); for (i = 0, tx_que = adapter->tx_queues; i < adapter->num_tx_queues; i++, tx_que++) { struct tx_ring *txr = &tx_que->txr; snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i); queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Queue Name"); queue_list = SYSCTL_CHILDREN(queue_node); SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head", CTLTYPE_UINT | CTLFLAG_RD | CTLFLAG_NEEDGIANT, txr, 0, ixgbe_sysctl_tdh_handler, "IU", "Transmit Descriptor Head"); SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail", CTLTYPE_UINT | CTLFLAG_RD | CTLFLAG_NEEDGIANT, txr, 0, ixgbe_sysctl_tdt_handler, "IU", "Transmit Descriptor Tail"); SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "tso_tx", CTLFLAG_RD, &txr->tso_tx, "TSO"); SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "tx_packets", CTLFLAG_RD, &txr->total_packets, "Queue Packets Transmitted"); } for (i = 0, rx_que = adapter->rx_queues; i < adapter->num_rx_queues; i++, rx_que++) { struct rx_ring *rxr = &rx_que->rxr; snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i); queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Queue Name"); queue_list = SYSCTL_CHILDREN(queue_node); SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "interrupt_rate", CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, &adapter->rx_queues[i], 0, ixgbe_sysctl_interrupt_rate_handler, "IU", "Interrupt Rate"); SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "irqs", CTLFLAG_RD, &(adapter->rx_queues[i].irqs), "irqs on this queue"); SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head", CTLTYPE_UINT | CTLFLAG_RD | CTLFLAG_NEEDGIANT, rxr, 0, ixgbe_sysctl_rdh_handler, "IU", "Receive Descriptor Head"); SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail", CTLTYPE_UINT | CTLFLAG_RD | CTLFLAG_NEEDGIANT, rxr, 0, ixgbe_sysctl_rdt_handler, "IU", "Receive Descriptor Tail"); SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "rx_packets", CTLFLAG_RD, &rxr->rx_packets, "Queue Packets Received"); SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "rx_bytes", CTLFLAG_RD, &rxr->rx_bytes, "Queue Bytes Received"); SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "rx_copies", CTLFLAG_RD, &rxr->rx_copies, "Copied RX Frames"); SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "rx_discarded", CTLFLAG_RD, &rxr->rx_discarded, "Discarded RX packets"); } /* MAC stats get their own sub node */ stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats", CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "MAC Statistics"); stat_list = SYSCTL_CHILDREN(stat_node); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "crc_errs", CTLFLAG_RD, &stats->crcerrs, "CRC Errors"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "ill_errs", CTLFLAG_RD, &stats->illerrc, "Illegal Byte Errors"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "byte_errs", CTLFLAG_RD, &stats->errbc, "Byte Errors"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "short_discards", CTLFLAG_RD, &stats->mspdc, "MAC Short Packets Discarded"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "local_faults", CTLFLAG_RD, &stats->mlfc, "MAC Local Faults"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "remote_faults", CTLFLAG_RD, &stats->mrfc, "MAC Remote Faults"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rec_len_errs", CTLFLAG_RD, &stats->rlec, "Receive Length Errors"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_missed_packets", CTLFLAG_RD, &stats->mpc[0], "RX Missed Packet Count"); /* Flow Control stats */ SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_txd", CTLFLAG_RD, &stats->lxontxc, "Link XON Transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_recvd", CTLFLAG_RD, &stats->lxonrxc, "Link XON Received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_txd", CTLFLAG_RD, &stats->lxofftxc, "Link XOFF Transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_recvd", CTLFLAG_RD, &stats->lxoffrxc, "Link XOFF Received"); /* Packet Reception Stats */ SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_octets_rcvd", CTLFLAG_RD, &stats->tor, "Total Octets Received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_rcvd", CTLFLAG_RD, &stats->gorc, "Good Octets Received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_rcvd", CTLFLAG_RD, &stats->tpr, "Total Packets Received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_rcvd", CTLFLAG_RD, &stats->gprc, "Good Packets Received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_rcvd", CTLFLAG_RD, &stats->mprc, "Multicast Packets Received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_rcvd", CTLFLAG_RD, &stats->bprc, "Broadcast Packets Received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_64", CTLFLAG_RD, &stats->prc64, "64 byte frames received "); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127", CTLFLAG_RD, &stats->prc127, "65-127 byte frames received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255", CTLFLAG_RD, &stats->prc255, "128-255 byte frames received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511", CTLFLAG_RD, &stats->prc511, "256-511 byte frames received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023", CTLFLAG_RD, &stats->prc1023, "512-1023 byte frames received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522", CTLFLAG_RD, &stats->prc1522, "1023-1522 byte frames received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_undersized", CTLFLAG_RD, &stats->ruc, "Receive Undersized"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_fragmented", CTLFLAG_RD, &stats->rfc, "Fragmented Packets Received "); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_oversized", CTLFLAG_RD, &stats->roc, "Oversized Packets Received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_jabberd", CTLFLAG_RD, &stats->rjc, "Received Jabber"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "management_pkts_rcvd", CTLFLAG_RD, &stats->mngprc, "Management Packets Received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "management_pkts_drpd", CTLFLAG_RD, &stats->mngptc, "Management Packets Dropped"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "checksum_errs", CTLFLAG_RD, &stats->xec, "Checksum Errors"); /* Packet Transmission Stats */ SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_txd", CTLFLAG_RD, &stats->gotc, "Good Octets Transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd", CTLFLAG_RD, &stats->tpt, "Total Packets Transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd", CTLFLAG_RD, &stats->gptc, "Good Packets Transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd", CTLFLAG_RD, &stats->bptc, "Broadcast Packets Transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd", CTLFLAG_RD, &stats->mptc, "Multicast Packets Transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "management_pkts_txd", CTLFLAG_RD, &stats->mngptc, "Management Packets Transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_64", CTLFLAG_RD, &stats->ptc64, "64 byte frames transmitted "); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127", CTLFLAG_RD, &stats->ptc127, "65-127 byte frames transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255", CTLFLAG_RD, &stats->ptc255, "128-255 byte frames transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511", CTLFLAG_RD, &stats->ptc511, "256-511 byte frames transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023", CTLFLAG_RD, &stats->ptc1023, "512-1023 byte frames transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522", CTLFLAG_RD, &stats->ptc1522, "1024-1522 byte frames transmitted"); } /* ixgbe_add_hw_stats */ /************************************************************************ * ixgbe_sysctl_tdh_handler - Transmit Descriptor Head handler function * * Retrieves the TDH value from the hardware ************************************************************************/ static int ixgbe_sysctl_tdh_handler(SYSCTL_HANDLER_ARGS) { struct tx_ring *txr = ((struct tx_ring *)oidp->oid_arg1); int error; unsigned int val; if (!txr) return (0); val = IXGBE_READ_REG(&txr->adapter->hw, IXGBE_TDH(txr->me)); error = sysctl_handle_int(oidp, &val, 0, req); if (error || !req->newptr) return error; return (0); } /* ixgbe_sysctl_tdh_handler */ /************************************************************************ * ixgbe_sysctl_tdt_handler - Transmit Descriptor Tail handler function * * Retrieves the TDT value from the hardware ************************************************************************/ static int ixgbe_sysctl_tdt_handler(SYSCTL_HANDLER_ARGS) { struct tx_ring *txr = ((struct tx_ring *)oidp->oid_arg1); int error; unsigned int val; if (!txr) return (0); val = IXGBE_READ_REG(&txr->adapter->hw, IXGBE_TDT(txr->me)); error = sysctl_handle_int(oidp, &val, 0, req); if (error || !req->newptr) return error; return (0); } /* ixgbe_sysctl_tdt_handler */ /************************************************************************ * ixgbe_sysctl_rdh_handler - Receive Descriptor Head handler function * * Retrieves the RDH value from the hardware ************************************************************************/ static int ixgbe_sysctl_rdh_handler(SYSCTL_HANDLER_ARGS) { struct rx_ring *rxr = ((struct rx_ring *)oidp->oid_arg1); int error; unsigned int val; if (!rxr) return (0); val = IXGBE_READ_REG(&rxr->adapter->hw, IXGBE_RDH(rxr->me)); error = sysctl_handle_int(oidp, &val, 0, req); if (error || !req->newptr) return error; return (0); } /* ixgbe_sysctl_rdh_handler */ /************************************************************************ * ixgbe_sysctl_rdt_handler - Receive Descriptor Tail handler function * * Retrieves the RDT value from the hardware ************************************************************************/ static int ixgbe_sysctl_rdt_handler(SYSCTL_HANDLER_ARGS) { struct rx_ring *rxr = ((struct rx_ring *)oidp->oid_arg1); int error; unsigned int val; if (!rxr) return (0); val = IXGBE_READ_REG(&rxr->adapter->hw, IXGBE_RDT(rxr->me)); error = sysctl_handle_int(oidp, &val, 0, req); if (error || !req->newptr) return error; return (0); } /* ixgbe_sysctl_rdt_handler */ /************************************************************************ * ixgbe_if_vlan_register * * Run via vlan config EVENT, it enables us to use the * HW Filter table since we can get the vlan id. This * just creates the entry in the soft version of the * VFTA, init will repopulate the real table. ************************************************************************/ static void ixgbe_if_vlan_register(if_ctx_t ctx, u16 vtag) { struct adapter *adapter = iflib_get_softc(ctx); u16 index, bit; index = (vtag >> 5) & 0x7F; bit = vtag & 0x1F; adapter->shadow_vfta[index] |= (1 << bit); ++adapter->num_vlans; ixgbe_setup_vlan_hw_support(ctx); } /* ixgbe_if_vlan_register */ /************************************************************************ * ixgbe_if_vlan_unregister * * Run via vlan unconfig EVENT, remove our entry in the soft vfta. ************************************************************************/ static void ixgbe_if_vlan_unregister(if_ctx_t ctx, u16 vtag) { struct adapter *adapter = iflib_get_softc(ctx); u16 index, bit; index = (vtag >> 5) & 0x7F; bit = vtag & 0x1F; adapter->shadow_vfta[index] &= ~(1 << bit); --adapter->num_vlans; /* Re-init to load the changes */ ixgbe_setup_vlan_hw_support(ctx); } /* ixgbe_if_vlan_unregister */ /************************************************************************ * ixgbe_setup_vlan_hw_support ************************************************************************/ static void ixgbe_setup_vlan_hw_support(if_ctx_t ctx) { struct ifnet *ifp = iflib_get_ifp(ctx); struct adapter *adapter = iflib_get_softc(ctx); struct ixgbe_hw *hw = &adapter->hw; struct rx_ring *rxr; int i; u32 ctrl; /* * We get here thru init_locked, meaning * a soft reset, this has already cleared * the VFTA and other state, so if there * have been no vlan's registered do nothing. */ if (adapter->num_vlans == 0) return; /* Setup the queues for vlans */ if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) { for (i = 0; i < adapter->num_rx_queues; i++) { rxr = &adapter->rx_queues[i].rxr; /* On 82599 the VLAN enable is per/queue in RXDCTL */ if (hw->mac.type != ixgbe_mac_82598EB) { ctrl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxr->me)); ctrl |= IXGBE_RXDCTL_VME; IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(rxr->me), ctrl); } rxr->vtag_strip = TRUE; } } if ((ifp->if_capenable & IFCAP_VLAN_HWFILTER) == 0) return; /* * A soft reset zero's out the VFTA, so * we need to repopulate it now. */ for (i = 0; i < IXGBE_VFTA_SIZE; i++) if (adapter->shadow_vfta[i] != 0) IXGBE_WRITE_REG(hw, IXGBE_VFTA(i), adapter->shadow_vfta[i]); ctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL); /* Enable the Filter Table if enabled */ if (ifp->if_capenable & IFCAP_VLAN_HWFILTER) { ctrl &= ~IXGBE_VLNCTRL_CFIEN; ctrl |= IXGBE_VLNCTRL_VFE; } if (hw->mac.type == ixgbe_mac_82598EB) ctrl |= IXGBE_VLNCTRL_VME; IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, ctrl); } /* ixgbe_setup_vlan_hw_support */ /************************************************************************ * ixgbe_get_slot_info * * Get the width and transaction speed of * the slot this adapter is plugged into. ************************************************************************/ static void ixgbe_get_slot_info(struct adapter *adapter) { device_t dev = iflib_get_dev(adapter->ctx); struct ixgbe_hw *hw = &adapter->hw; int bus_info_valid = TRUE; u32 offset; u16 link; /* Some devices are behind an internal bridge */ switch (hw->device_id) { case IXGBE_DEV_ID_82599_SFP_SF_QP: case IXGBE_DEV_ID_82599_QSFP_SF_QP: goto get_parent_info; default: break; } ixgbe_get_bus_info(hw); /* * Some devices don't use PCI-E, but there is no need * to display "Unknown" for bus speed and width. */ switch (hw->mac.type) { case ixgbe_mac_X550EM_x: case ixgbe_mac_X550EM_a: return; default: goto display; } get_parent_info: /* * For the Quad port adapter we need to parse back * up the PCI tree to find the speed of the expansion * slot into which this adapter is plugged. A bit more work. */ dev = device_get_parent(device_get_parent(dev)); #ifdef IXGBE_DEBUG device_printf(dev, "parent pcib = %x,%x,%x\n", pci_get_bus(dev), pci_get_slot(dev), pci_get_function(dev)); #endif dev = device_get_parent(device_get_parent(dev)); #ifdef IXGBE_DEBUG device_printf(dev, "slot pcib = %x,%x,%x\n", pci_get_bus(dev), pci_get_slot(dev), pci_get_function(dev)); #endif /* Now get the PCI Express Capabilities offset */ if (pci_find_cap(dev, PCIY_EXPRESS, &offset)) { /* * Hmm...can't get PCI-Express capabilities. * Falling back to default method. */ bus_info_valid = FALSE; ixgbe_get_bus_info(hw); goto display; } /* ...and read the Link Status Register */ link = pci_read_config(dev, offset + PCIER_LINK_STA, 2); ixgbe_set_pci_config_data_generic(hw, link); display: device_printf(dev, "PCI Express Bus: Speed %s %s\n", ((hw->bus.speed == ixgbe_bus_speed_8000) ? "8.0GT/s" : (hw->bus.speed == ixgbe_bus_speed_5000) ? "5.0GT/s" : (hw->bus.speed == ixgbe_bus_speed_2500) ? "2.5GT/s" : "Unknown"), ((hw->bus.width == ixgbe_bus_width_pcie_x8) ? "Width x8" : (hw->bus.width == ixgbe_bus_width_pcie_x4) ? "Width x4" : (hw->bus.width == ixgbe_bus_width_pcie_x1) ? "Width x1" : "Unknown")); if (bus_info_valid) { if ((hw->device_id != IXGBE_DEV_ID_82599_SFP_SF_QP) && ((hw->bus.width <= ixgbe_bus_width_pcie_x4) && (hw->bus.speed == ixgbe_bus_speed_2500))) { device_printf(dev, "PCI-Express bandwidth available for this card\n is not sufficient for optimal performance.\n"); device_printf(dev, "For optimal performance a x8 PCIE, or x4 PCIE Gen2 slot is required.\n"); } if ((hw->device_id == IXGBE_DEV_ID_82599_SFP_SF_QP) && ((hw->bus.width <= ixgbe_bus_width_pcie_x8) && (hw->bus.speed < ixgbe_bus_speed_8000))) { device_printf(dev, "PCI-Express bandwidth available for this card\n is not sufficient for optimal performance.\n"); device_printf(dev, "For optimal performance a x8 PCIE Gen3 slot is required.\n"); } } else device_printf(dev, "Unable to determine slot speed/width. The speed/width reported are that of the internal switch.\n"); return; } /* ixgbe_get_slot_info */ /************************************************************************ * ixgbe_if_msix_intr_assign * * Setup MSI-X Interrupt resources and handlers ************************************************************************/ static int ixgbe_if_msix_intr_assign(if_ctx_t ctx, int msix) { struct adapter *adapter = iflib_get_softc(ctx); struct ix_rx_queue *rx_que = adapter->rx_queues; struct ix_tx_queue *tx_que; int error, rid, vector = 0; int cpu_id = 0; char buf[16]; /* Admin Que is vector 0*/ rid = vector + 1; for (int i = 0; i < adapter->num_rx_queues; i++, vector++, rx_que++) { rid = vector + 1; snprintf(buf, sizeof(buf), "rxq%d", i); error = iflib_irq_alloc_generic(ctx, &rx_que->que_irq, rid, - IFLIB_INTR_RX, ixgbe_msix_que, rx_que, rx_que->rxr.me, buf); + IFLIB_INTR_RXTX, ixgbe_msix_que, rx_que, rx_que->rxr.me, buf); if (error) { device_printf(iflib_get_dev(ctx), "Failed to allocate que int %d err: %d", i, error); adapter->num_rx_queues = i + 1; goto fail; } rx_que->msix = vector; if (adapter->feat_en & IXGBE_FEATURE_RSS) { /* * The queue ID is used as the RSS layer bucket ID. * We look up the queue ID -> RSS CPU ID and select * that. */ cpu_id = rss_getcpu(i % rss_getnumbuckets()); } else { /* * Bind the MSI-X vector, and thus the * rings to the corresponding cpu. * * This just happens to match the default RSS * round-robin bucket -> queue -> CPU allocation. */ if (adapter->num_rx_queues > 1) cpu_id = i; } } for (int i = 0; i < adapter->num_tx_queues; i++) { snprintf(buf, sizeof(buf), "txq%d", i); tx_que = &adapter->tx_queues[i]; tx_que->msix = i % adapter->num_rx_queues; iflib_softirq_alloc_generic(ctx, &adapter->rx_queues[tx_que->msix].que_irq, IFLIB_INTR_TX, tx_que, tx_que->txr.me, buf); } rid = vector + 1; error = iflib_irq_alloc_generic(ctx, &adapter->irq, rid, IFLIB_INTR_ADMIN, ixgbe_msix_link, adapter, 0, "aq"); if (error) { device_printf(iflib_get_dev(ctx), "Failed to register admin handler"); return (error); } adapter->vector = vector; return (0); fail: iflib_irq_free(ctx, &adapter->irq); rx_que = adapter->rx_queues; for (int i = 0; i < adapter->num_rx_queues; i++, rx_que++) iflib_irq_free(ctx, &rx_que->que_irq); return (error); } /* ixgbe_if_msix_intr_assign */ /********************************************************************* * ixgbe_msix_que - MSI-X Queue Interrupt Service routine **********************************************************************/ static int ixgbe_msix_que(void *arg) { struct ix_rx_queue *que = arg; struct adapter *adapter = que->adapter; struct ifnet *ifp = iflib_get_ifp(que->adapter->ctx); /* Protect against spurious interrupts */ if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) return (FILTER_HANDLED); ixgbe_disable_queue(adapter, que->msix); ++que->irqs; return (FILTER_SCHEDULE_THREAD); } /* ixgbe_msix_que */ /************************************************************************ * ixgbe_media_status - Media Ioctl callback * * Called whenever the user queries the status of * the interface using ifconfig. ************************************************************************/ static void ixgbe_if_media_status(if_ctx_t ctx, struct ifmediareq * ifmr) { struct adapter *adapter = iflib_get_softc(ctx); struct ixgbe_hw *hw = &adapter->hw; int layer; INIT_DEBUGOUT("ixgbe_if_media_status: begin"); ifmr->ifm_status = IFM_AVALID; ifmr->ifm_active = IFM_ETHER; if (!adapter->link_active) return; ifmr->ifm_status |= IFM_ACTIVE; layer = adapter->phy_layer; if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_T || layer & IXGBE_PHYSICAL_LAYER_1000BASE_T || layer & IXGBE_PHYSICAL_LAYER_100BASE_TX || layer & IXGBE_PHYSICAL_LAYER_10BASE_T) switch (adapter->link_speed) { case IXGBE_LINK_SPEED_10GB_FULL: ifmr->ifm_active |= IFM_10G_T | IFM_FDX; break; case IXGBE_LINK_SPEED_1GB_FULL: ifmr->ifm_active |= IFM_1000_T | IFM_FDX; break; case IXGBE_LINK_SPEED_100_FULL: ifmr->ifm_active |= IFM_100_TX | IFM_FDX; break; case IXGBE_LINK_SPEED_10_FULL: ifmr->ifm_active |= IFM_10_T | IFM_FDX; break; } if (layer & IXGBE_PHYSICAL_LAYER_SFP_PLUS_CU || layer & IXGBE_PHYSICAL_LAYER_SFP_ACTIVE_DA) switch (adapter->link_speed) { case IXGBE_LINK_SPEED_10GB_FULL: ifmr->ifm_active |= IFM_10G_TWINAX | IFM_FDX; break; } if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_LR) switch (adapter->link_speed) { case IXGBE_LINK_SPEED_10GB_FULL: ifmr->ifm_active |= IFM_10G_LR | IFM_FDX; break; case IXGBE_LINK_SPEED_1GB_FULL: ifmr->ifm_active |= IFM_1000_LX | IFM_FDX; break; } if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_LRM) switch (adapter->link_speed) { case IXGBE_LINK_SPEED_10GB_FULL: ifmr->ifm_active |= IFM_10G_LRM | IFM_FDX; break; case IXGBE_LINK_SPEED_1GB_FULL: ifmr->ifm_active |= IFM_1000_LX | IFM_FDX; break; } if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_SR || layer & IXGBE_PHYSICAL_LAYER_1000BASE_SX) switch (adapter->link_speed) { case IXGBE_LINK_SPEED_10GB_FULL: ifmr->ifm_active |= IFM_10G_SR | IFM_FDX; break; case IXGBE_LINK_SPEED_1GB_FULL: ifmr->ifm_active |= IFM_1000_SX | IFM_FDX; break; } if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_CX4) switch (adapter->link_speed) { case IXGBE_LINK_SPEED_10GB_FULL: ifmr->ifm_active |= IFM_10G_CX4 | IFM_FDX; break; } /* * XXX: These need to use the proper media types once * they're added. */ #ifndef IFM_ETH_XTYPE if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_KR) switch (adapter->link_speed) { case IXGBE_LINK_SPEED_10GB_FULL: ifmr->ifm_active |= IFM_10G_SR | IFM_FDX; break; case IXGBE_LINK_SPEED_2_5GB_FULL: ifmr->ifm_active |= IFM_2500_SX | IFM_FDX; break; case IXGBE_LINK_SPEED_1GB_FULL: ifmr->ifm_active |= IFM_1000_CX | IFM_FDX; break; } else if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_KX4 || layer & IXGBE_PHYSICAL_LAYER_2500BASE_KX || layer & IXGBE_PHYSICAL_LAYER_1000BASE_KX) switch (adapter->link_speed) { case IXGBE_LINK_SPEED_10GB_FULL: ifmr->ifm_active |= IFM_10G_CX4 | IFM_FDX; break; case IXGBE_LINK_SPEED_2_5GB_FULL: ifmr->ifm_active |= IFM_2500_SX | IFM_FDX; break; case IXGBE_LINK_SPEED_1GB_FULL: ifmr->ifm_active |= IFM_1000_CX | IFM_FDX; break; } #else if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_KR) switch (adapter->link_speed) { case IXGBE_LINK_SPEED_10GB_FULL: ifmr->ifm_active |= IFM_10G_KR | IFM_FDX; break; case IXGBE_LINK_SPEED_2_5GB_FULL: ifmr->ifm_active |= IFM_2500_KX | IFM_FDX; break; case IXGBE_LINK_SPEED_1GB_FULL: ifmr->ifm_active |= IFM_1000_KX | IFM_FDX; break; } else if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_KX4 || layer & IXGBE_PHYSICAL_LAYER_2500BASE_KX || layer & IXGBE_PHYSICAL_LAYER_1000BASE_KX) switch (adapter->link_speed) { case IXGBE_LINK_SPEED_10GB_FULL: ifmr->ifm_active |= IFM_10G_KX4 | IFM_FDX; break; case IXGBE_LINK_SPEED_2_5GB_FULL: ifmr->ifm_active |= IFM_2500_KX | IFM_FDX; break; case IXGBE_LINK_SPEED_1GB_FULL: ifmr->ifm_active |= IFM_1000_KX | IFM_FDX; break; } #endif /* If nothing is recognized... */ if (IFM_SUBTYPE(ifmr->ifm_active) == 0) ifmr->ifm_active |= IFM_UNKNOWN; /* Display current flow control setting used on link */ if (hw->fc.current_mode == ixgbe_fc_rx_pause || hw->fc.current_mode == ixgbe_fc_full) ifmr->ifm_active |= IFM_ETH_RXPAUSE; if (hw->fc.current_mode == ixgbe_fc_tx_pause || hw->fc.current_mode == ixgbe_fc_full) ifmr->ifm_active |= IFM_ETH_TXPAUSE; } /* ixgbe_media_status */ /************************************************************************ * ixgbe_media_change - Media Ioctl callback * * Called when the user changes speed/duplex using * media/mediopt option with ifconfig. ************************************************************************/ static int ixgbe_if_media_change(if_ctx_t ctx) { struct adapter *adapter = iflib_get_softc(ctx); struct ifmedia *ifm = iflib_get_media(ctx); struct ixgbe_hw *hw = &adapter->hw; ixgbe_link_speed speed = 0; INIT_DEBUGOUT("ixgbe_if_media_change: begin"); if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER) return (EINVAL); if (hw->phy.media_type == ixgbe_media_type_backplane) return (EPERM); /* * We don't actually need to check against the supported * media types of the adapter; ifmedia will take care of * that for us. */ switch (IFM_SUBTYPE(ifm->ifm_media)) { case IFM_AUTO: case IFM_10G_T: speed |= IXGBE_LINK_SPEED_100_FULL; speed |= IXGBE_LINK_SPEED_1GB_FULL; speed |= IXGBE_LINK_SPEED_10GB_FULL; break; case IFM_10G_LRM: case IFM_10G_LR: #ifndef IFM_ETH_XTYPE case IFM_10G_SR: /* KR, too */ case IFM_10G_CX4: /* KX4 */ #else case IFM_10G_KR: case IFM_10G_KX4: #endif speed |= IXGBE_LINK_SPEED_1GB_FULL; speed |= IXGBE_LINK_SPEED_10GB_FULL; break; #ifndef IFM_ETH_XTYPE case IFM_1000_CX: /* KX */ #else case IFM_1000_KX: #endif case IFM_1000_LX: case IFM_1000_SX: speed |= IXGBE_LINK_SPEED_1GB_FULL; break; case IFM_1000_T: speed |= IXGBE_LINK_SPEED_100_FULL; speed |= IXGBE_LINK_SPEED_1GB_FULL; break; case IFM_10G_TWINAX: speed |= IXGBE_LINK_SPEED_10GB_FULL; break; case IFM_100_TX: speed |= IXGBE_LINK_SPEED_100_FULL; break; case IFM_10_T: speed |= IXGBE_LINK_SPEED_10_FULL; break; default: goto invalid; } hw->mac.autotry_restart = TRUE; hw->mac.ops.setup_link(hw, speed, TRUE); adapter->advertise = ((speed & IXGBE_LINK_SPEED_10GB_FULL) ? 4 : 0) | ((speed & IXGBE_LINK_SPEED_1GB_FULL) ? 2 : 0) | ((speed & IXGBE_LINK_SPEED_100_FULL) ? 1 : 0) | ((speed & IXGBE_LINK_SPEED_10_FULL) ? 8 : 0); return (0); invalid: device_printf(iflib_get_dev(ctx), "Invalid media type!\n"); return (EINVAL); } /* ixgbe_if_media_change */ /************************************************************************ * ixgbe_set_promisc ************************************************************************/ static int ixgbe_if_promisc_set(if_ctx_t ctx, int flags) { struct adapter *adapter = iflib_get_softc(ctx); struct ifnet *ifp = iflib_get_ifp(ctx); u32 rctl; int mcnt = 0; rctl = IXGBE_READ_REG(&adapter->hw, IXGBE_FCTRL); rctl &= (~IXGBE_FCTRL_UPE); if (ifp->if_flags & IFF_ALLMULTI) mcnt = MAX_NUM_MULTICAST_ADDRESSES; else { mcnt = min(if_llmaddr_count(ifp), MAX_NUM_MULTICAST_ADDRESSES); } if (mcnt < MAX_NUM_MULTICAST_ADDRESSES) rctl &= (~IXGBE_FCTRL_MPE); IXGBE_WRITE_REG(&adapter->hw, IXGBE_FCTRL, rctl); if (ifp->if_flags & IFF_PROMISC) { rctl |= (IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE); IXGBE_WRITE_REG(&adapter->hw, IXGBE_FCTRL, rctl); } else if (ifp->if_flags & IFF_ALLMULTI) { rctl |= IXGBE_FCTRL_MPE; rctl &= ~IXGBE_FCTRL_UPE; IXGBE_WRITE_REG(&adapter->hw, IXGBE_FCTRL, rctl); } return (0); } /* ixgbe_if_promisc_set */ /************************************************************************ * ixgbe_msix_link - Link status change ISR (MSI/MSI-X) ************************************************************************/ static int ixgbe_msix_link(void *arg) { struct adapter *adapter = arg; struct ixgbe_hw *hw = &adapter->hw; u32 eicr, eicr_mask; s32 retval; ++adapter->link_irq; /* Pause other interrupts */ IXGBE_WRITE_REG(hw, IXGBE_EIMC, IXGBE_EIMC_OTHER); /* First get the cause */ eicr = IXGBE_READ_REG(hw, IXGBE_EICS); /* Be sure the queue bits are not cleared */ eicr &= ~IXGBE_EICR_RTX_QUEUE; /* Clear interrupt with write */ IXGBE_WRITE_REG(hw, IXGBE_EICR, eicr); /* Link status change */ if (eicr & IXGBE_EICR_LSC) { IXGBE_WRITE_REG(hw, IXGBE_EIMC, IXGBE_EIMC_LSC); adapter->task_requests |= IXGBE_REQUEST_TASK_LSC; } if (adapter->hw.mac.type != ixgbe_mac_82598EB) { if ((adapter->feat_en & IXGBE_FEATURE_FDIR) && (eicr & IXGBE_EICR_FLOW_DIR)) { /* This is probably overkill :) */ if (!atomic_cmpset_int(&adapter->fdir_reinit, 0, 1)) return (FILTER_HANDLED); /* Disable the interrupt */ IXGBE_WRITE_REG(hw, IXGBE_EIMC, IXGBE_EICR_FLOW_DIR); adapter->task_requests |= IXGBE_REQUEST_TASK_FDIR; } else if (eicr & IXGBE_EICR_ECC) { device_printf(iflib_get_dev(adapter->ctx), "\nCRITICAL: ECC ERROR!! Please Reboot!!\n"); IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_ECC); } /* Check for over temp condition */ if (adapter->feat_en & IXGBE_FEATURE_TEMP_SENSOR) { switch (adapter->hw.mac.type) { case ixgbe_mac_X550EM_a: if (!(eicr & IXGBE_EICR_GPI_SDP0_X550EM_a)) break; IXGBE_WRITE_REG(hw, IXGBE_EIMC, IXGBE_EICR_GPI_SDP0_X550EM_a); IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_GPI_SDP0_X550EM_a); retval = hw->phy.ops.check_overtemp(hw); if (retval != IXGBE_ERR_OVERTEMP) break; device_printf(iflib_get_dev(adapter->ctx), "\nCRITICAL: OVER TEMP!! PHY IS SHUT DOWN!!\n"); device_printf(iflib_get_dev(adapter->ctx), "System shutdown required!\n"); break; default: if (!(eicr & IXGBE_EICR_TS)) break; retval = hw->phy.ops.check_overtemp(hw); if (retval != IXGBE_ERR_OVERTEMP) break; device_printf(iflib_get_dev(adapter->ctx), "\nCRITICAL: OVER TEMP!! PHY IS SHUT DOWN!!\n"); device_printf(iflib_get_dev(adapter->ctx), "System shutdown required!\n"); IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_TS); break; } } /* Check for VF message */ if ((adapter->feat_en & IXGBE_FEATURE_SRIOV) && (eicr & IXGBE_EICR_MAILBOX)) adapter->task_requests |= IXGBE_REQUEST_TASK_MBX; } if (ixgbe_is_sfp(hw)) { /* Pluggable optics-related interrupt */ if (hw->mac.type >= ixgbe_mac_X540) eicr_mask = IXGBE_EICR_GPI_SDP0_X540; else eicr_mask = IXGBE_EICR_GPI_SDP2_BY_MAC(hw); if (eicr & eicr_mask) { IXGBE_WRITE_REG(hw, IXGBE_EICR, eicr_mask); adapter->task_requests |= IXGBE_REQUEST_TASK_MOD; } if ((hw->mac.type == ixgbe_mac_82599EB) && (eicr & IXGBE_EICR_GPI_SDP1_BY_MAC(hw))) { IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_GPI_SDP1_BY_MAC(hw)); adapter->task_requests |= IXGBE_REQUEST_TASK_MSF; } } /* Check for fan failure */ if (adapter->feat_en & IXGBE_FEATURE_FAN_FAIL) { ixgbe_check_fan_failure(adapter, eicr, TRUE); IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_GPI_SDP1_BY_MAC(hw)); } /* External PHY interrupt */ if ((hw->phy.type == ixgbe_phy_x550em_ext_t) && (eicr & IXGBE_EICR_GPI_SDP0_X540)) { IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_GPI_SDP0_X540); adapter->task_requests |= IXGBE_REQUEST_TASK_PHY; } return (adapter->task_requests != 0) ? FILTER_SCHEDULE_THREAD : FILTER_HANDLED; } /* ixgbe_msix_link */ /************************************************************************ * ixgbe_sysctl_interrupt_rate_handler ************************************************************************/ static int ixgbe_sysctl_interrupt_rate_handler(SYSCTL_HANDLER_ARGS) { struct ix_rx_queue *que = ((struct ix_rx_queue *)oidp->oid_arg1); int error; unsigned int reg, usec, rate; reg = IXGBE_READ_REG(&que->adapter->hw, IXGBE_EITR(que->msix)); usec = ((reg & 0x0FF8) >> 3); if (usec > 0) rate = 500000 / usec; else rate = 0; error = sysctl_handle_int(oidp, &rate, 0, req); if (error || !req->newptr) return error; reg &= ~0xfff; /* default, no limitation */ ixgbe_max_interrupt_rate = 0; if (rate > 0 && rate < 500000) { if (rate < 1000) rate = 1000; ixgbe_max_interrupt_rate = rate; reg |= ((4000000/rate) & 0xff8); } IXGBE_WRITE_REG(&que->adapter->hw, IXGBE_EITR(que->msix), reg); return (0); } /* ixgbe_sysctl_interrupt_rate_handler */ /************************************************************************ * ixgbe_add_device_sysctls ************************************************************************/ static void ixgbe_add_device_sysctls(if_ctx_t ctx) { struct adapter *adapter = iflib_get_softc(ctx); device_t dev = iflib_get_dev(ctx); struct ixgbe_hw *hw = &adapter->hw; struct sysctl_oid_list *child; struct sysctl_ctx_list *ctx_list; ctx_list = device_get_sysctl_ctx(dev); child = SYSCTL_CHILDREN(device_get_sysctl_tree(dev)); /* Sysctls for all devices */ SYSCTL_ADD_PROC(ctx_list, child, OID_AUTO, "fc", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, adapter, 0, ixgbe_sysctl_flowcntl, "I", IXGBE_SYSCTL_DESC_SET_FC); SYSCTL_ADD_PROC(ctx_list, child, OID_AUTO, "advertise_speed", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, adapter, 0, ixgbe_sysctl_advertise, "I", IXGBE_SYSCTL_DESC_ADV_SPEED); #ifdef IXGBE_DEBUG /* testing sysctls (for all devices) */ SYSCTL_ADD_PROC(ctx_list, child, OID_AUTO, "power_state", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, adapter, 0, ixgbe_sysctl_power_state, "I", "PCI Power State"); SYSCTL_ADD_PROC(ctx_list, child, OID_AUTO, "print_rss_config", CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_NEEDGIANT, adapter, 0, ixgbe_sysctl_print_rss_config, "A", "Prints RSS Configuration"); #endif /* for X550 series devices */ if (hw->mac.type >= ixgbe_mac_X550) SYSCTL_ADD_PROC(ctx_list, child, OID_AUTO, "dmac", CTLTYPE_U16 | CTLFLAG_RW | CTLFLAG_NEEDGIANT, adapter, 0, ixgbe_sysctl_dmac, "I", "DMA Coalesce"); /* for WoL-capable devices */ if (hw->device_id == IXGBE_DEV_ID_X550EM_X_10G_T) { SYSCTL_ADD_PROC(ctx_list, child, OID_AUTO, "wol_enable", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, adapter, 0, ixgbe_sysctl_wol_enable, "I", "Enable/Disable Wake on LAN"); SYSCTL_ADD_PROC(ctx_list, child, OID_AUTO, "wufc", CTLTYPE_U32 | CTLFLAG_RW | CTLFLAG_NEEDGIANT, adapter, 0, ixgbe_sysctl_wufc, "I", "Enable/Disable Wake Up Filters"); } /* for X552/X557-AT devices */ if (hw->device_id == IXGBE_DEV_ID_X550EM_X_10G_T) { struct sysctl_oid *phy_node; struct sysctl_oid_list *phy_list; phy_node = SYSCTL_ADD_NODE(ctx_list, child, OID_AUTO, "phy", CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "External PHY sysctls"); phy_list = SYSCTL_CHILDREN(phy_node); SYSCTL_ADD_PROC(ctx_list, phy_list, OID_AUTO, "temp", CTLTYPE_U16 | CTLFLAG_RD | CTLFLAG_NEEDGIANT, adapter, 0, ixgbe_sysctl_phy_temp, "I", "Current External PHY Temperature (Celsius)"); SYSCTL_ADD_PROC(ctx_list, phy_list, OID_AUTO, "overtemp_occurred", CTLTYPE_U16 | CTLFLAG_RD | CTLFLAG_NEEDGIANT, adapter, 0, ixgbe_sysctl_phy_overtemp_occurred, "I", "External PHY High Temperature Event Occurred"); } if (adapter->feat_cap & IXGBE_FEATURE_EEE) { SYSCTL_ADD_PROC(ctx_list, child, OID_AUTO, "eee_state", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, adapter, 0, ixgbe_sysctl_eee_state, "I", "EEE Power Save State"); } } /* ixgbe_add_device_sysctls */ /************************************************************************ * ixgbe_allocate_pci_resources ************************************************************************/ static int ixgbe_allocate_pci_resources(if_ctx_t ctx) { struct adapter *adapter = iflib_get_softc(ctx); device_t dev = iflib_get_dev(ctx); int rid; rid = PCIR_BAR(0); adapter->pci_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, RF_ACTIVE); if (!(adapter->pci_mem)) { device_printf(dev, "Unable to allocate bus resource: memory\n"); return (ENXIO); } /* Save bus_space values for READ/WRITE_REG macros */ adapter->osdep.mem_bus_space_tag = rman_get_bustag(adapter->pci_mem); adapter->osdep.mem_bus_space_handle = rman_get_bushandle(adapter->pci_mem); /* Set hw values for shared code */ adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle; return (0); } /* ixgbe_allocate_pci_resources */ /************************************************************************ * ixgbe_detach - Device removal routine * * Called when the driver is being removed. * Stops the adapter and deallocates all the resources * that were allocated for driver operation. * * return 0 on success, positive on failure ************************************************************************/ static int ixgbe_if_detach(if_ctx_t ctx) { struct adapter *adapter = iflib_get_softc(ctx); device_t dev = iflib_get_dev(ctx); u32 ctrl_ext; INIT_DEBUGOUT("ixgbe_detach: begin"); if (ixgbe_pci_iov_detach(dev) != 0) { device_printf(dev, "SR-IOV in use; detach first.\n"); return (EBUSY); } ixgbe_setup_low_power_mode(ctx); /* let hardware know driver is unloading */ ctrl_ext = IXGBE_READ_REG(&adapter->hw, IXGBE_CTRL_EXT); ctrl_ext &= ~IXGBE_CTRL_EXT_DRV_LOAD; IXGBE_WRITE_REG(&adapter->hw, IXGBE_CTRL_EXT, ctrl_ext); ixgbe_free_pci_resources(ctx); free(adapter->mta, M_IXGBE); return (0); } /* ixgbe_if_detach */ /************************************************************************ * ixgbe_setup_low_power_mode - LPLU/WoL preparation * * Prepare the adapter/port for LPLU and/or WoL ************************************************************************/ static int ixgbe_setup_low_power_mode(if_ctx_t ctx) { struct adapter *adapter = iflib_get_softc(ctx); struct ixgbe_hw *hw = &adapter->hw; device_t dev = iflib_get_dev(ctx); s32 error = 0; if (!hw->wol_enabled) ixgbe_set_phy_power(hw, FALSE); /* Limit power management flow to X550EM baseT */ if (hw->device_id == IXGBE_DEV_ID_X550EM_X_10G_T && hw->phy.ops.enter_lplu) { /* Turn off support for APM wakeup. (Using ACPI instead) */ IXGBE_WRITE_REG(hw, IXGBE_GRC, IXGBE_READ_REG(hw, IXGBE_GRC) & ~(u32)2); /* * Clear Wake Up Status register to prevent any previous wakeup * events from waking us up immediately after we suspend. */ IXGBE_WRITE_REG(hw, IXGBE_WUS, 0xffffffff); /* * Program the Wakeup Filter Control register with user filter * settings */ IXGBE_WRITE_REG(hw, IXGBE_WUFC, adapter->wufc); /* Enable wakeups and power management in Wakeup Control */ IXGBE_WRITE_REG(hw, IXGBE_WUC, IXGBE_WUC_WKEN | IXGBE_WUC_PME_EN); /* X550EM baseT adapters need a special LPLU flow */ hw->phy.reset_disable = TRUE; ixgbe_if_stop(ctx); error = hw->phy.ops.enter_lplu(hw); if (error) device_printf(dev, "Error entering LPLU: %d\n", error); hw->phy.reset_disable = FALSE; } else { /* Just stop for other adapters */ ixgbe_if_stop(ctx); } return error; } /* ixgbe_setup_low_power_mode */ /************************************************************************ * ixgbe_shutdown - Shutdown entry point ************************************************************************/ static int ixgbe_if_shutdown(if_ctx_t ctx) { int error = 0; INIT_DEBUGOUT("ixgbe_shutdown: begin"); error = ixgbe_setup_low_power_mode(ctx); return (error); } /* ixgbe_if_shutdown */ /************************************************************************ * ixgbe_suspend * * From D0 to D3 ************************************************************************/ static int ixgbe_if_suspend(if_ctx_t ctx) { int error = 0; INIT_DEBUGOUT("ixgbe_suspend: begin"); error = ixgbe_setup_low_power_mode(ctx); return (error); } /* ixgbe_if_suspend */ /************************************************************************ * ixgbe_resume * * From D3 to D0 ************************************************************************/ static int ixgbe_if_resume(if_ctx_t ctx) { struct adapter *adapter = iflib_get_softc(ctx); device_t dev = iflib_get_dev(ctx); struct ifnet *ifp = iflib_get_ifp(ctx); struct ixgbe_hw *hw = &adapter->hw; u32 wus; INIT_DEBUGOUT("ixgbe_resume: begin"); /* Read & clear WUS register */ wus = IXGBE_READ_REG(hw, IXGBE_WUS); if (wus) device_printf(dev, "Woken up by (WUS): %#010x\n", IXGBE_READ_REG(hw, IXGBE_WUS)); IXGBE_WRITE_REG(hw, IXGBE_WUS, 0xffffffff); /* And clear WUFC until next low-power transition */ IXGBE_WRITE_REG(hw, IXGBE_WUFC, 0); /* * Required after D3->D0 transition; * will re-advertise all previous advertised speeds */ if (ifp->if_flags & IFF_UP) ixgbe_if_init(ctx); return (0); } /* ixgbe_if_resume */ /************************************************************************ * ixgbe_if_mtu_set - Ioctl mtu entry point * * Return 0 on success, EINVAL on failure ************************************************************************/ static int ixgbe_if_mtu_set(if_ctx_t ctx, uint32_t mtu) { struct adapter *adapter = iflib_get_softc(ctx); int error = 0; IOCTL_DEBUGOUT("ioctl: SIOCIFMTU (Set Interface MTU)"); if (mtu > IXGBE_MAX_MTU) { error = EINVAL; } else { adapter->max_frame_size = mtu + IXGBE_MTU_HDR; } return error; } /* ixgbe_if_mtu_set */ /************************************************************************ * ixgbe_if_crcstrip_set ************************************************************************/ static void ixgbe_if_crcstrip_set(if_ctx_t ctx, int onoff, int crcstrip) { struct adapter *sc = iflib_get_softc(ctx); struct ixgbe_hw *hw = &sc->hw; /* crc stripping is set in two places: * IXGBE_HLREG0 (modified on init_locked and hw reset) * IXGBE_RDRXCTL (set by the original driver in * ixgbe_setup_hw_rsc() called in init_locked. * We disable the setting when netmap is compiled in). * We update the values here, but also in ixgbe.c because * init_locked sometimes is called outside our control. */ uint32_t hl, rxc; hl = IXGBE_READ_REG(hw, IXGBE_HLREG0); rxc = IXGBE_READ_REG(hw, IXGBE_RDRXCTL); #ifdef NETMAP if (netmap_verbose) D("%s read HLREG 0x%x rxc 0x%x", onoff ? "enter" : "exit", hl, rxc); #endif /* hw requirements ... */ rxc &= ~IXGBE_RDRXCTL_RSCFRSTSIZE; rxc |= IXGBE_RDRXCTL_RSCACKC; if (onoff && !crcstrip) { /* keep the crc. Fast rx */ hl &= ~IXGBE_HLREG0_RXCRCSTRP; rxc &= ~IXGBE_RDRXCTL_CRCSTRIP; } else { /* reset default mode */ hl |= IXGBE_HLREG0_RXCRCSTRP; rxc |= IXGBE_RDRXCTL_CRCSTRIP; } #ifdef NETMAP if (netmap_verbose) D("%s write HLREG 0x%x rxc 0x%x", onoff ? "enter" : "exit", hl, rxc); #endif IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hl); IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rxc); } /* ixgbe_if_crcstrip_set */ /********************************************************************* * ixgbe_if_init - Init entry point * * Used in two ways: It is used by the stack as an init * entry point in network interface structure. It is also * used by the driver as a hw/sw initialization routine to * get to a consistent state. * * Return 0 on success, positive on failure **********************************************************************/ void ixgbe_if_init(if_ctx_t ctx) { struct adapter *adapter = iflib_get_softc(ctx); struct ifnet *ifp = iflib_get_ifp(ctx); device_t dev = iflib_get_dev(ctx); struct ixgbe_hw *hw = &adapter->hw; struct ix_rx_queue *rx_que; struct ix_tx_queue *tx_que; u32 txdctl, mhadd; u32 rxdctl, rxctrl; u32 ctrl_ext; int i, j, err; INIT_DEBUGOUT("ixgbe_if_init: begin"); /* Queue indices may change with IOV mode */ ixgbe_align_all_queue_indices(adapter); /* reprogram the RAR[0] in case user changed it. */ ixgbe_set_rar(hw, 0, hw->mac.addr, adapter->pool, IXGBE_RAH_AV); /* Get the latest mac address, User can use a LAA */ bcopy(IF_LLADDR(ifp), hw->mac.addr, IXGBE_ETH_LENGTH_OF_ADDRESS); ixgbe_set_rar(hw, 0, hw->mac.addr, adapter->pool, 1); hw->addr_ctrl.rar_used_count = 1; ixgbe_init_hw(hw); ixgbe_initialize_iov(adapter); ixgbe_initialize_transmit_units(ctx); /* Setup Multicast table */ ixgbe_if_multi_set(ctx); /* Determine the correct mbuf pool, based on frame size */ adapter->rx_mbuf_sz = iflib_get_rx_mbuf_sz(ctx); /* Configure RX settings */ ixgbe_initialize_receive_units(ctx); /* * Initialize variable holding task enqueue requests * from MSI-X interrupts */ adapter->task_requests = 0; /* Enable SDP & MSI-X interrupts based on adapter */ ixgbe_config_gpie(adapter); /* Set MTU size */ if (ifp->if_mtu > ETHERMTU) { /* aka IXGBE_MAXFRS on 82599 and newer */ mhadd = IXGBE_READ_REG(hw, IXGBE_MHADD); mhadd &= ~IXGBE_MHADD_MFS_MASK; mhadd |= adapter->max_frame_size << IXGBE_MHADD_MFS_SHIFT; IXGBE_WRITE_REG(hw, IXGBE_MHADD, mhadd); } /* Now enable all the queues */ for (i = 0, tx_que = adapter->tx_queues; i < adapter->num_tx_queues; i++, tx_que++) { struct tx_ring *txr = &tx_que->txr; txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(txr->me)); txdctl |= IXGBE_TXDCTL_ENABLE; /* Set WTHRESH to 8, burst writeback */ txdctl |= (8 << 16); /* * When the internal queue falls below PTHRESH (32), * start prefetching as long as there are at least * HTHRESH (1) buffers ready. The values are taken * from the Intel linux driver 3.8.21. * Prefetching enables tx line rate even with 1 queue. */ txdctl |= (32 << 0) | (1 << 8); IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(txr->me), txdctl); } for (i = 0, rx_que = adapter->rx_queues; i < adapter->num_rx_queues; i++, rx_que++) { struct rx_ring *rxr = &rx_que->rxr; rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxr->me)); if (hw->mac.type == ixgbe_mac_82598EB) { /* * PTHRESH = 21 * HTHRESH = 4 * WTHRESH = 8 */ rxdctl &= ~0x3FFFFF; rxdctl |= 0x080420; } rxdctl |= IXGBE_RXDCTL_ENABLE; IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(rxr->me), rxdctl); for (j = 0; j < 10; j++) { if (IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxr->me)) & IXGBE_RXDCTL_ENABLE) break; else msec_delay(1); } wmb(); } /* Enable Receive engine */ rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL); if (hw->mac.type == ixgbe_mac_82598EB) rxctrl |= IXGBE_RXCTRL_DMBYPS; rxctrl |= IXGBE_RXCTRL_RXEN; ixgbe_enable_rx_dma(hw, rxctrl); /* Set up MSI/MSI-X routing */ if (ixgbe_enable_msix) { ixgbe_configure_ivars(adapter); /* Set up auto-mask */ if (hw->mac.type == ixgbe_mac_82598EB) IXGBE_WRITE_REG(hw, IXGBE_EIAM, IXGBE_EICS_RTX_QUEUE); else { IXGBE_WRITE_REG(hw, IXGBE_EIAM_EX(0), 0xFFFFFFFF); IXGBE_WRITE_REG(hw, IXGBE_EIAM_EX(1), 0xFFFFFFFF); } } else { /* Simple settings for Legacy/MSI */ ixgbe_set_ivar(adapter, 0, 0, 0); ixgbe_set_ivar(adapter, 0, 0, 1); IXGBE_WRITE_REG(hw, IXGBE_EIAM, IXGBE_EICS_RTX_QUEUE); } ixgbe_init_fdir(adapter); /* * Check on any SFP devices that * need to be kick-started */ if (hw->phy.type == ixgbe_phy_none) { err = hw->phy.ops.identify(hw); if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) { device_printf(dev, "Unsupported SFP+ module type was detected.\n"); return; } } /* Set moderation on the Link interrupt */ IXGBE_WRITE_REG(hw, IXGBE_EITR(adapter->vector), IXGBE_LINK_ITR); /* Enable power to the phy. */ ixgbe_set_phy_power(hw, TRUE); /* Config/Enable Link */ ixgbe_config_link(ctx); /* Hardware Packet Buffer & Flow Control setup */ ixgbe_config_delay_values(adapter); /* Initialize the FC settings */ ixgbe_start_hw(hw); /* Set up VLAN support and filter */ ixgbe_setup_vlan_hw_support(ctx); /* Setup DMA Coalescing */ ixgbe_config_dmac(adapter); /* And now turn on interrupts */ ixgbe_if_enable_intr(ctx); /* Enable the use of the MBX by the VF's */ if (adapter->feat_en & IXGBE_FEATURE_SRIOV) { ctrl_ext = IXGBE_READ_REG(hw, IXGBE_CTRL_EXT); ctrl_ext |= IXGBE_CTRL_EXT_PFRSTD; IXGBE_WRITE_REG(hw, IXGBE_CTRL_EXT, ctrl_ext); } } /* ixgbe_init_locked */ /************************************************************************ * ixgbe_set_ivar * * Setup the correct IVAR register for a particular MSI-X interrupt * (yes this is all very magic and confusing :) * - entry is the register array entry * - vector is the MSI-X vector for this queue * - type is RX/TX/MISC ************************************************************************/ static void ixgbe_set_ivar(struct adapter *adapter, u8 entry, u8 vector, s8 type) { struct ixgbe_hw *hw = &adapter->hw; u32 ivar, index; vector |= IXGBE_IVAR_ALLOC_VAL; switch (hw->mac.type) { case ixgbe_mac_82598EB: if (type == -1) entry = IXGBE_IVAR_OTHER_CAUSES_INDEX; else entry += (type * 64); index = (entry >> 2) & 0x1F; ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(index)); ivar &= ~(0xFF << (8 * (entry & 0x3))); ivar |= (vector << (8 * (entry & 0x3))); IXGBE_WRITE_REG(&adapter->hw, IXGBE_IVAR(index), ivar); break; case ixgbe_mac_82599EB: case ixgbe_mac_X540: case ixgbe_mac_X550: case ixgbe_mac_X550EM_x: case ixgbe_mac_X550EM_a: if (type == -1) { /* MISC IVAR */ index = (entry & 1) * 8; ivar = IXGBE_READ_REG(hw, IXGBE_IVAR_MISC); ivar &= ~(0xFF << index); ivar |= (vector << index); IXGBE_WRITE_REG(hw, IXGBE_IVAR_MISC, ivar); } else { /* RX/TX IVARS */ index = (16 * (entry & 1)) + (8 * type); ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(entry >> 1)); ivar &= ~(0xFF << index); ivar |= (vector << index); IXGBE_WRITE_REG(hw, IXGBE_IVAR(entry >> 1), ivar); } default: break; } } /* ixgbe_set_ivar */ /************************************************************************ * ixgbe_configure_ivars ************************************************************************/ static void ixgbe_configure_ivars(struct adapter *adapter) { struct ix_rx_queue *rx_que = adapter->rx_queues; struct ix_tx_queue *tx_que = adapter->tx_queues; u32 newitr; if (ixgbe_max_interrupt_rate > 0) newitr = (4000000 / ixgbe_max_interrupt_rate) & 0x0FF8; else { /* * Disable DMA coalescing if interrupt moderation is * disabled. */ adapter->dmac = 0; newitr = 0; } for (int i = 0; i < adapter->num_rx_queues; i++, rx_que++) { struct rx_ring *rxr = &rx_que->rxr; /* First the RX queue entry */ ixgbe_set_ivar(adapter, rxr->me, rx_que->msix, 0); /* Set an Initial EITR value */ IXGBE_WRITE_REG(&adapter->hw, IXGBE_EITR(rx_que->msix), newitr); } for (int i = 0; i < adapter->num_tx_queues; i++, tx_que++) { struct tx_ring *txr = &tx_que->txr; /* ... and the TX */ ixgbe_set_ivar(adapter, txr->me, tx_que->msix, 1); } /* For the Link interrupt */ ixgbe_set_ivar(adapter, 1, adapter->vector, -1); } /* ixgbe_configure_ivars */ /************************************************************************ * ixgbe_config_gpie ************************************************************************/ static void ixgbe_config_gpie(struct adapter *adapter) { struct ixgbe_hw *hw = &adapter->hw; u32 gpie; gpie = IXGBE_READ_REG(hw, IXGBE_GPIE); if (adapter->intr_type == IFLIB_INTR_MSIX) { /* Enable Enhanced MSI-X mode */ gpie |= IXGBE_GPIE_MSIX_MODE | IXGBE_GPIE_EIAME | IXGBE_GPIE_PBA_SUPPORT | IXGBE_GPIE_OCD; } /* Fan Failure Interrupt */ if (adapter->feat_en & IXGBE_FEATURE_FAN_FAIL) gpie |= IXGBE_SDP1_GPIEN; /* Thermal Sensor Interrupt */ if (adapter->feat_en & IXGBE_FEATURE_TEMP_SENSOR) gpie |= IXGBE_SDP0_GPIEN_X540; /* Link detection */ switch (hw->mac.type) { case ixgbe_mac_82599EB: gpie |= IXGBE_SDP1_GPIEN | IXGBE_SDP2_GPIEN; break; case ixgbe_mac_X550EM_x: case ixgbe_mac_X550EM_a: gpie |= IXGBE_SDP0_GPIEN_X540; break; default: break; } IXGBE_WRITE_REG(hw, IXGBE_GPIE, gpie); } /* ixgbe_config_gpie */ /************************************************************************ * ixgbe_config_delay_values * * Requires adapter->max_frame_size to be set. ************************************************************************/ static void ixgbe_config_delay_values(struct adapter *adapter) { struct ixgbe_hw *hw = &adapter->hw; u32 rxpb, frame, size, tmp; frame = adapter->max_frame_size; /* Calculate High Water */ switch (hw->mac.type) { case ixgbe_mac_X540: case ixgbe_mac_X550: case ixgbe_mac_X550EM_x: case ixgbe_mac_X550EM_a: tmp = IXGBE_DV_X540(frame, frame); break; default: tmp = IXGBE_DV(frame, frame); break; } size = IXGBE_BT2KB(tmp); rxpb = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(0)) >> 10; hw->fc.high_water[0] = rxpb - size; /* Now calculate Low Water */ switch (hw->mac.type) { case ixgbe_mac_X540: case ixgbe_mac_X550: case ixgbe_mac_X550EM_x: case ixgbe_mac_X550EM_a: tmp = IXGBE_LOW_DV_X540(frame); break; default: tmp = IXGBE_LOW_DV(frame); break; } hw->fc.low_water[0] = IXGBE_BT2KB(tmp); hw->fc.pause_time = IXGBE_FC_PAUSE; hw->fc.send_xon = TRUE; } /* ixgbe_config_delay_values */ /************************************************************************ * ixgbe_set_multi - Multicast Update * * Called whenever multicast address list is updated. ************************************************************************/ static u_int ixgbe_mc_filter_apply(void *arg, struct sockaddr_dl *sdl, u_int count) { struct adapter *adapter = arg; struct ixgbe_mc_addr *mta = adapter->mta; if (count == MAX_NUM_MULTICAST_ADDRESSES) return (0); bcopy(LLADDR(sdl), mta[count].addr, IXGBE_ETH_LENGTH_OF_ADDRESS); mta[count].vmdq = adapter->pool; return (1); } /* ixgbe_mc_filter_apply */ static void ixgbe_if_multi_set(if_ctx_t ctx) { struct adapter *adapter = iflib_get_softc(ctx); struct ixgbe_mc_addr *mta; struct ifnet *ifp = iflib_get_ifp(ctx); u8 *update_ptr; u32 fctrl; u_int mcnt; IOCTL_DEBUGOUT("ixgbe_if_multi_set: begin"); mta = adapter->mta; bzero(mta, sizeof(*mta) * MAX_NUM_MULTICAST_ADDRESSES); mcnt = if_foreach_llmaddr(iflib_get_ifp(ctx), ixgbe_mc_filter_apply, adapter); fctrl = IXGBE_READ_REG(&adapter->hw, IXGBE_FCTRL); fctrl |= (IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE); if (ifp->if_flags & IFF_PROMISC) fctrl |= (IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE); else if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES || ifp->if_flags & IFF_ALLMULTI) { fctrl |= IXGBE_FCTRL_MPE; fctrl &= ~IXGBE_FCTRL_UPE; } else fctrl &= ~(IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE); IXGBE_WRITE_REG(&adapter->hw, IXGBE_FCTRL, fctrl); if (mcnt < MAX_NUM_MULTICAST_ADDRESSES) { update_ptr = (u8 *)mta; ixgbe_update_mc_addr_list(&adapter->hw, update_ptr, mcnt, ixgbe_mc_array_itr, TRUE); } } /* ixgbe_if_multi_set */ /************************************************************************ * ixgbe_mc_array_itr * * An iterator function needed by the multicast shared code. * It feeds the shared code routine the addresses in the * array of ixgbe_set_multi() one by one. ************************************************************************/ static u8 * ixgbe_mc_array_itr(struct ixgbe_hw *hw, u8 **update_ptr, u32 *vmdq) { struct ixgbe_mc_addr *mta; mta = (struct ixgbe_mc_addr *)*update_ptr; *vmdq = mta->vmdq; *update_ptr = (u8*)(mta + 1); return (mta->addr); } /* ixgbe_mc_array_itr */ /************************************************************************ * ixgbe_local_timer - Timer routine * * Checks for link status, updates statistics, * and runs the watchdog check. ************************************************************************/ static void ixgbe_if_timer(if_ctx_t ctx, uint16_t qid) { struct adapter *adapter = iflib_get_softc(ctx); if (qid != 0) return; /* Check for pluggable optics */ if (adapter->sfp_probe) if (!ixgbe_sfp_probe(ctx)) return; /* Nothing to do */ ixgbe_check_link(&adapter->hw, &adapter->link_speed, &adapter->link_up, 0); /* Fire off the adminq task */ iflib_admin_intr_deferred(ctx); } /* ixgbe_if_timer */ /************************************************************************ * ixgbe_sfp_probe * * Determine if a port had optics inserted. ************************************************************************/ static bool ixgbe_sfp_probe(if_ctx_t ctx) { struct adapter *adapter = iflib_get_softc(ctx); struct ixgbe_hw *hw = &adapter->hw; device_t dev = iflib_get_dev(ctx); bool result = FALSE; if ((hw->phy.type == ixgbe_phy_nl) && (hw->phy.sfp_type == ixgbe_sfp_type_not_present)) { s32 ret = hw->phy.ops.identify_sfp(hw); if (ret) goto out; ret = hw->phy.ops.reset(hw); adapter->sfp_probe = FALSE; if (ret == IXGBE_ERR_SFP_NOT_SUPPORTED) { device_printf(dev, "Unsupported SFP+ module detected!"); device_printf(dev, "Reload driver with supported module.\n"); goto out; } else device_printf(dev, "SFP+ module detected!\n"); /* We now have supported optics */ result = TRUE; } out: return (result); } /* ixgbe_sfp_probe */ /************************************************************************ * ixgbe_handle_mod - Tasklet for SFP module interrupts ************************************************************************/ static void ixgbe_handle_mod(void *context) { if_ctx_t ctx = context; struct adapter *adapter = iflib_get_softc(ctx); struct ixgbe_hw *hw = &adapter->hw; device_t dev = iflib_get_dev(ctx); u32 err, cage_full = 0; if (adapter->hw.need_crosstalk_fix) { switch (hw->mac.type) { case ixgbe_mac_82599EB: cage_full = IXGBE_READ_REG(hw, IXGBE_ESDP) & IXGBE_ESDP_SDP2; break; case ixgbe_mac_X550EM_x: case ixgbe_mac_X550EM_a: cage_full = IXGBE_READ_REG(hw, IXGBE_ESDP) & IXGBE_ESDP_SDP0; break; default: break; } if (!cage_full) goto handle_mod_out; } err = hw->phy.ops.identify_sfp(hw); if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) { device_printf(dev, "Unsupported SFP+ module type was detected.\n"); goto handle_mod_out; } if (hw->mac.type == ixgbe_mac_82598EB) err = hw->phy.ops.reset(hw); else err = hw->mac.ops.setup_sfp(hw); if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) { device_printf(dev, "Setup failure - unsupported SFP+ module type.\n"); goto handle_mod_out; } adapter->task_requests |= IXGBE_REQUEST_TASK_MSF; return; handle_mod_out: adapter->task_requests &= ~(IXGBE_REQUEST_TASK_MSF); } /* ixgbe_handle_mod */ /************************************************************************ * ixgbe_handle_msf - Tasklet for MSF (multispeed fiber) interrupts ************************************************************************/ static void ixgbe_handle_msf(void *context) { if_ctx_t ctx = context; struct adapter *adapter = iflib_get_softc(ctx); struct ixgbe_hw *hw = &adapter->hw; u32 autoneg; bool negotiate; /* get_supported_phy_layer will call hw->phy.ops.identify_sfp() */ adapter->phy_layer = ixgbe_get_supported_physical_layer(hw); autoneg = hw->phy.autoneg_advertised; if ((!autoneg) && (hw->mac.ops.get_link_capabilities)) hw->mac.ops.get_link_capabilities(hw, &autoneg, &negotiate); if (hw->mac.ops.setup_link) hw->mac.ops.setup_link(hw, autoneg, TRUE); /* Adjust media types shown in ifconfig */ ifmedia_removeall(adapter->media); ixgbe_add_media_types(adapter->ctx); ifmedia_set(adapter->media, IFM_ETHER | IFM_AUTO); } /* ixgbe_handle_msf */ /************************************************************************ * ixgbe_handle_phy - Tasklet for external PHY interrupts ************************************************************************/ static void ixgbe_handle_phy(void *context) { if_ctx_t ctx = context; struct adapter *adapter = iflib_get_softc(ctx); struct ixgbe_hw *hw = &adapter->hw; int error; error = hw->phy.ops.handle_lasi(hw); if (error == IXGBE_ERR_OVERTEMP) device_printf(adapter->dev, "CRITICAL: EXTERNAL PHY OVER TEMP!! PHY will downshift to lower power state!\n"); else if (error) device_printf(adapter->dev, "Error handling LASI interrupt: %d\n", error); } /* ixgbe_handle_phy */ /************************************************************************ * ixgbe_if_stop - Stop the hardware * * Disables all traffic on the adapter by issuing a * global reset on the MAC and deallocates TX/RX buffers. ************************************************************************/ static void ixgbe_if_stop(if_ctx_t ctx) { struct adapter *adapter = iflib_get_softc(ctx); struct ixgbe_hw *hw = &adapter->hw; INIT_DEBUGOUT("ixgbe_if_stop: begin\n"); ixgbe_reset_hw(hw); hw->adapter_stopped = FALSE; ixgbe_stop_adapter(hw); if (hw->mac.type == ixgbe_mac_82599EB) ixgbe_stop_mac_link_on_d3_82599(hw); /* Turn off the laser - noop with no optics */ ixgbe_disable_tx_laser(hw); /* Update the stack */ adapter->link_up = FALSE; ixgbe_if_update_admin_status(ctx); /* reprogram the RAR[0] in case user changed it. */ ixgbe_set_rar(&adapter->hw, 0, adapter->hw.mac.addr, 0, IXGBE_RAH_AV); return; } /* ixgbe_if_stop */ /************************************************************************ * ixgbe_update_link_status - Update OS on link state * * Note: Only updates the OS on the cached link state. * The real check of the hardware only happens with * a link interrupt. ************************************************************************/ static void ixgbe_if_update_admin_status(if_ctx_t ctx) { struct adapter *adapter = iflib_get_softc(ctx); device_t dev = iflib_get_dev(ctx); if (adapter->link_up) { if (adapter->link_active == FALSE) { if (bootverbose) device_printf(dev, "Link is up %d Gbps %s \n", ((adapter->link_speed == 128) ? 10 : 1), "Full Duplex"); adapter->link_active = TRUE; /* Update any Flow Control changes */ ixgbe_fc_enable(&adapter->hw); /* Update DMA coalescing config */ ixgbe_config_dmac(adapter); /* should actually be negotiated value */ iflib_link_state_change(ctx, LINK_STATE_UP, IF_Gbps(10)); if (adapter->feat_en & IXGBE_FEATURE_SRIOV) ixgbe_ping_all_vfs(adapter); } } else { /* Link down */ if (adapter->link_active == TRUE) { if (bootverbose) device_printf(dev, "Link is Down\n"); iflib_link_state_change(ctx, LINK_STATE_DOWN, 0); adapter->link_active = FALSE; if (adapter->feat_en & IXGBE_FEATURE_SRIOV) ixgbe_ping_all_vfs(adapter); } } /* Handle task requests from msix_link() */ if (adapter->task_requests & IXGBE_REQUEST_TASK_MOD) ixgbe_handle_mod(ctx); if (adapter->task_requests & IXGBE_REQUEST_TASK_MSF) ixgbe_handle_msf(ctx); if (adapter->task_requests & IXGBE_REQUEST_TASK_MBX) ixgbe_handle_mbx(ctx); if (adapter->task_requests & IXGBE_REQUEST_TASK_FDIR) ixgbe_reinit_fdir(ctx); if (adapter->task_requests & IXGBE_REQUEST_TASK_PHY) ixgbe_handle_phy(ctx); adapter->task_requests = 0; ixgbe_update_stats_counters(adapter); } /* ixgbe_if_update_admin_status */ /************************************************************************ * ixgbe_config_dmac - Configure DMA Coalescing ************************************************************************/ static void ixgbe_config_dmac(struct adapter *adapter) { struct ixgbe_hw *hw = &adapter->hw; struct ixgbe_dmac_config *dcfg = &hw->mac.dmac_config; if (hw->mac.type < ixgbe_mac_X550 || !hw->mac.ops.dmac_config) return; if (dcfg->watchdog_timer ^ adapter->dmac || dcfg->link_speed ^ adapter->link_speed) { dcfg->watchdog_timer = adapter->dmac; dcfg->fcoe_en = FALSE; dcfg->link_speed = adapter->link_speed; dcfg->num_tcs = 1; INIT_DEBUGOUT2("dmac settings: watchdog %d, link speed %d\n", dcfg->watchdog_timer, dcfg->link_speed); hw->mac.ops.dmac_config(hw); } } /* ixgbe_config_dmac */ /************************************************************************ * ixgbe_if_enable_intr ************************************************************************/ void ixgbe_if_enable_intr(if_ctx_t ctx) { struct adapter *adapter = iflib_get_softc(ctx); struct ixgbe_hw *hw = &adapter->hw; struct ix_rx_queue *que = adapter->rx_queues; u32 mask, fwsm; mask = (IXGBE_EIMS_ENABLE_MASK & ~IXGBE_EIMS_RTX_QUEUE); switch (adapter->hw.mac.type) { case ixgbe_mac_82599EB: mask |= IXGBE_EIMS_ECC; /* Temperature sensor on some adapters */ mask |= IXGBE_EIMS_GPI_SDP0; /* SFP+ (RX_LOS_N & MOD_ABS_N) */ mask |= IXGBE_EIMS_GPI_SDP1; mask |= IXGBE_EIMS_GPI_SDP2; break; case ixgbe_mac_X540: /* Detect if Thermal Sensor is enabled */ fwsm = IXGBE_READ_REG(hw, IXGBE_FWSM); if (fwsm & IXGBE_FWSM_TS_ENABLED) mask |= IXGBE_EIMS_TS; mask |= IXGBE_EIMS_ECC; break; case ixgbe_mac_X550: /* MAC thermal sensor is automatically enabled */ mask |= IXGBE_EIMS_TS; mask |= IXGBE_EIMS_ECC; break; case ixgbe_mac_X550EM_x: case ixgbe_mac_X550EM_a: /* Some devices use SDP0 for important information */ if (hw->device_id == IXGBE_DEV_ID_X550EM_X_SFP || hw->device_id == IXGBE_DEV_ID_X550EM_A_SFP || hw->device_id == IXGBE_DEV_ID_X550EM_A_SFP_N || hw->device_id == IXGBE_DEV_ID_X550EM_X_10G_T) mask |= IXGBE_EIMS_GPI_SDP0_BY_MAC(hw); if (hw->phy.type == ixgbe_phy_x550em_ext_t) mask |= IXGBE_EICR_GPI_SDP0_X540; mask |= IXGBE_EIMS_ECC; break; default: break; } /* Enable Fan Failure detection */ if (adapter->feat_en & IXGBE_FEATURE_FAN_FAIL) mask |= IXGBE_EIMS_GPI_SDP1; /* Enable SR-IOV */ if (adapter->feat_en & IXGBE_FEATURE_SRIOV) mask |= IXGBE_EIMS_MAILBOX; /* Enable Flow Director */ if (adapter->feat_en & IXGBE_FEATURE_FDIR) mask |= IXGBE_EIMS_FLOW_DIR; IXGBE_WRITE_REG(hw, IXGBE_EIMS, mask); /* With MSI-X we use auto clear */ if (adapter->intr_type == IFLIB_INTR_MSIX) { mask = IXGBE_EIMS_ENABLE_MASK; /* Don't autoclear Link */ mask &= ~IXGBE_EIMS_OTHER; mask &= ~IXGBE_EIMS_LSC; if (adapter->feat_cap & IXGBE_FEATURE_SRIOV) mask &= ~IXGBE_EIMS_MAILBOX; IXGBE_WRITE_REG(hw, IXGBE_EIAC, mask); } /* * Now enable all queues, this is done separately to * allow for handling the extended (beyond 32) MSI-X * vectors that can be used by 82599 */ for (int i = 0; i < adapter->num_rx_queues; i++, que++) ixgbe_enable_queue(adapter, que->msix); IXGBE_WRITE_FLUSH(hw); } /* ixgbe_if_enable_intr */ /************************************************************************ * ixgbe_disable_intr ************************************************************************/ static void ixgbe_if_disable_intr(if_ctx_t ctx) { struct adapter *adapter = iflib_get_softc(ctx); if (adapter->intr_type == IFLIB_INTR_MSIX) IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIAC, 0); if (adapter->hw.mac.type == ixgbe_mac_82598EB) { IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC, ~0); } else { IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC, 0xFFFF0000); IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC_EX(0), ~0); IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC_EX(1), ~0); } IXGBE_WRITE_FLUSH(&adapter->hw); } /* ixgbe_if_disable_intr */ /************************************************************************ * ixgbe_link_intr_enable ************************************************************************/ static void ixgbe_link_intr_enable(if_ctx_t ctx) { struct ixgbe_hw *hw = &((struct adapter *)iflib_get_softc(ctx))->hw; /* Re-enable other interrupts */ IXGBE_WRITE_REG(hw, IXGBE_EIMS, IXGBE_EIMS_OTHER | IXGBE_EIMS_LSC); } /* ixgbe_link_intr_enable */ /************************************************************************ * ixgbe_if_rx_queue_intr_enable ************************************************************************/ static int ixgbe_if_rx_queue_intr_enable(if_ctx_t ctx, uint16_t rxqid) { struct adapter *adapter = iflib_get_softc(ctx); struct ix_rx_queue *que = &adapter->rx_queues[rxqid]; ixgbe_enable_queue(adapter, que->msix); return (0); } /* ixgbe_if_rx_queue_intr_enable */ /************************************************************************ * ixgbe_enable_queue ************************************************************************/ static void ixgbe_enable_queue(struct adapter *adapter, u32 vector) { struct ixgbe_hw *hw = &adapter->hw; u64 queue = 1ULL << vector; u32 mask; if (hw->mac.type == ixgbe_mac_82598EB) { mask = (IXGBE_EIMS_RTX_QUEUE & queue); IXGBE_WRITE_REG(hw, IXGBE_EIMS, mask); } else { mask = (queue & 0xFFFFFFFF); if (mask) IXGBE_WRITE_REG(hw, IXGBE_EIMS_EX(0), mask); mask = (queue >> 32); if (mask) IXGBE_WRITE_REG(hw, IXGBE_EIMS_EX(1), mask); } } /* ixgbe_enable_queue */ /************************************************************************ * ixgbe_disable_queue ************************************************************************/ static void ixgbe_disable_queue(struct adapter *adapter, u32 vector) { struct ixgbe_hw *hw = &adapter->hw; u64 queue = 1ULL << vector; u32 mask; if (hw->mac.type == ixgbe_mac_82598EB) { mask = (IXGBE_EIMS_RTX_QUEUE & queue); IXGBE_WRITE_REG(hw, IXGBE_EIMC, mask); } else { mask = (queue & 0xFFFFFFFF); if (mask) IXGBE_WRITE_REG(hw, IXGBE_EIMC_EX(0), mask); mask = (queue >> 32); if (mask) IXGBE_WRITE_REG(hw, IXGBE_EIMC_EX(1), mask); } } /* ixgbe_disable_queue */ /************************************************************************ * ixgbe_intr - Legacy Interrupt Service Routine ************************************************************************/ int ixgbe_intr(void *arg) { struct adapter *adapter = arg; struct ix_rx_queue *que = adapter->rx_queues; struct ixgbe_hw *hw = &adapter->hw; if_ctx_t ctx = adapter->ctx; u32 eicr, eicr_mask; eicr = IXGBE_READ_REG(hw, IXGBE_EICR); ++que->irqs; if (eicr == 0) { ixgbe_if_enable_intr(ctx); return (FILTER_HANDLED); } /* Check for fan failure */ if ((hw->device_id == IXGBE_DEV_ID_82598AT) && (eicr & IXGBE_EICR_GPI_SDP1)) { device_printf(adapter->dev, "\nCRITICAL: FAN FAILURE!! REPLACE IMMEDIATELY!!\n"); IXGBE_WRITE_REG(hw, IXGBE_EIMS, IXGBE_EICR_GPI_SDP1_BY_MAC(hw)); } /* Link status change */ if (eicr & IXGBE_EICR_LSC) { IXGBE_WRITE_REG(hw, IXGBE_EIMC, IXGBE_EIMC_LSC); iflib_admin_intr_deferred(ctx); } if (ixgbe_is_sfp(hw)) { /* Pluggable optics-related interrupt */ if (hw->mac.type >= ixgbe_mac_X540) eicr_mask = IXGBE_EICR_GPI_SDP0_X540; else eicr_mask = IXGBE_EICR_GPI_SDP2_BY_MAC(hw); if (eicr & eicr_mask) { IXGBE_WRITE_REG(hw, IXGBE_EICR, eicr_mask); adapter->task_requests |= IXGBE_REQUEST_TASK_MOD; } if ((hw->mac.type == ixgbe_mac_82599EB) && (eicr & IXGBE_EICR_GPI_SDP1_BY_MAC(hw))) { IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_GPI_SDP1_BY_MAC(hw)); adapter->task_requests |= IXGBE_REQUEST_TASK_MSF; } } /* External PHY interrupt */ if ((hw->phy.type == ixgbe_phy_x550em_ext_t) && (eicr & IXGBE_EICR_GPI_SDP0_X540)) adapter->task_requests |= IXGBE_REQUEST_TASK_PHY; return (FILTER_SCHEDULE_THREAD); } /* ixgbe_intr */ /************************************************************************ * ixgbe_free_pci_resources ************************************************************************/ static void ixgbe_free_pci_resources(if_ctx_t ctx) { struct adapter *adapter = iflib_get_softc(ctx); struct ix_rx_queue *que = adapter->rx_queues; device_t dev = iflib_get_dev(ctx); /* Release all MSI-X queue resources */ if (adapter->intr_type == IFLIB_INTR_MSIX) iflib_irq_free(ctx, &adapter->irq); if (que != NULL) { for (int i = 0; i < adapter->num_rx_queues; i++, que++) { iflib_irq_free(ctx, &que->que_irq); } } if (adapter->pci_mem != NULL) bus_release_resource(dev, SYS_RES_MEMORY, rman_get_rid(adapter->pci_mem), adapter->pci_mem); } /* ixgbe_free_pci_resources */ /************************************************************************ * ixgbe_sysctl_flowcntl * * SYSCTL wrapper around setting Flow Control ************************************************************************/ static int ixgbe_sysctl_flowcntl(SYSCTL_HANDLER_ARGS) { struct adapter *adapter; int error, fc; adapter = (struct adapter *)arg1; fc = adapter->hw.fc.current_mode; error = sysctl_handle_int(oidp, &fc, 0, req); if ((error) || (req->newptr == NULL)) return (error); /* Don't bother if it's not changed */ if (fc == adapter->hw.fc.current_mode) return (0); return ixgbe_set_flowcntl(adapter, fc); } /* ixgbe_sysctl_flowcntl */ /************************************************************************ * ixgbe_set_flowcntl - Set flow control * * Flow control values: * 0 - off * 1 - rx pause * 2 - tx pause * 3 - full ************************************************************************/ static int ixgbe_set_flowcntl(struct adapter *adapter, int fc) { switch (fc) { case ixgbe_fc_rx_pause: case ixgbe_fc_tx_pause: case ixgbe_fc_full: adapter->hw.fc.requested_mode = fc; if (adapter->num_rx_queues > 1) ixgbe_disable_rx_drop(adapter); break; case ixgbe_fc_none: adapter->hw.fc.requested_mode = ixgbe_fc_none; if (adapter->num_rx_queues > 1) ixgbe_enable_rx_drop(adapter); break; default: return (EINVAL); } /* Don't autoneg if forcing a value */ adapter->hw.fc.disable_fc_autoneg = TRUE; ixgbe_fc_enable(&adapter->hw); return (0); } /* ixgbe_set_flowcntl */ /************************************************************************ * ixgbe_enable_rx_drop * * Enable the hardware to drop packets when the buffer is * full. This is useful with multiqueue, so that no single * queue being full stalls the entire RX engine. We only * enable this when Multiqueue is enabled AND Flow Control * is disabled. ************************************************************************/ static void ixgbe_enable_rx_drop(struct adapter *adapter) { struct ixgbe_hw *hw = &adapter->hw; struct rx_ring *rxr; u32 srrctl; for (int i = 0; i < adapter->num_rx_queues; i++) { rxr = &adapter->rx_queues[i].rxr; srrctl = IXGBE_READ_REG(hw, IXGBE_SRRCTL(rxr->me)); srrctl |= IXGBE_SRRCTL_DROP_EN; IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(rxr->me), srrctl); } /* enable drop for each vf */ for (int i = 0; i < adapter->num_vfs; i++) { IXGBE_WRITE_REG(hw, IXGBE_QDE, (IXGBE_QDE_WRITE | (i << IXGBE_QDE_IDX_SHIFT) | IXGBE_QDE_ENABLE)); } } /* ixgbe_enable_rx_drop */ /************************************************************************ * ixgbe_disable_rx_drop ************************************************************************/ static void ixgbe_disable_rx_drop(struct adapter *adapter) { struct ixgbe_hw *hw = &adapter->hw; struct rx_ring *rxr; u32 srrctl; for (int i = 0; i < adapter->num_rx_queues; i++) { rxr = &adapter->rx_queues[i].rxr; srrctl = IXGBE_READ_REG(hw, IXGBE_SRRCTL(rxr->me)); srrctl &= ~IXGBE_SRRCTL_DROP_EN; IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(rxr->me), srrctl); } /* disable drop for each vf */ for (int i = 0; i < adapter->num_vfs; i++) { IXGBE_WRITE_REG(hw, IXGBE_QDE, (IXGBE_QDE_WRITE | (i << IXGBE_QDE_IDX_SHIFT))); } } /* ixgbe_disable_rx_drop */ /************************************************************************ * ixgbe_sysctl_advertise * * SYSCTL wrapper around setting advertised speed ************************************************************************/ static int ixgbe_sysctl_advertise(SYSCTL_HANDLER_ARGS) { struct adapter *adapter; int error, advertise; adapter = (struct adapter *)arg1; advertise = adapter->advertise; error = sysctl_handle_int(oidp, &advertise, 0, req); if ((error) || (req->newptr == NULL)) return (error); return ixgbe_set_advertise(adapter, advertise); } /* ixgbe_sysctl_advertise */ /************************************************************************ * ixgbe_set_advertise - Control advertised link speed * * Flags: * 0x1 - advertise 100 Mb * 0x2 - advertise 1G * 0x4 - advertise 10G * 0x8 - advertise 10 Mb (yes, Mb) ************************************************************************/ static int ixgbe_set_advertise(struct adapter *adapter, int advertise) { device_t dev = iflib_get_dev(adapter->ctx); struct ixgbe_hw *hw; ixgbe_link_speed speed = 0; ixgbe_link_speed link_caps = 0; s32 err = IXGBE_NOT_IMPLEMENTED; bool negotiate = FALSE; /* Checks to validate new value */ if (adapter->advertise == advertise) /* no change */ return (0); hw = &adapter->hw; /* No speed changes for backplane media */ if (hw->phy.media_type == ixgbe_media_type_backplane) return (ENODEV); if (!((hw->phy.media_type == ixgbe_media_type_copper) || (hw->phy.multispeed_fiber))) { device_printf(dev, "Advertised speed can only be set on copper or multispeed fiber media types.\n"); return (EINVAL); } if (advertise < 0x1 || advertise > 0xF) { device_printf(dev, "Invalid advertised speed; valid modes are 0x1 through 0xF\n"); return (EINVAL); } if (hw->mac.ops.get_link_capabilities) { err = hw->mac.ops.get_link_capabilities(hw, &link_caps, &negotiate); if (err != IXGBE_SUCCESS) { device_printf(dev, "Unable to determine supported advertise speeds\n"); return (ENODEV); } } /* Set new value and report new advertised mode */ if (advertise & 0x1) { if (!(link_caps & IXGBE_LINK_SPEED_100_FULL)) { device_printf(dev, "Interface does not support 100Mb advertised speed\n"); return (EINVAL); } speed |= IXGBE_LINK_SPEED_100_FULL; } if (advertise & 0x2) { if (!(link_caps & IXGBE_LINK_SPEED_1GB_FULL)) { device_printf(dev, "Interface does not support 1Gb advertised speed\n"); return (EINVAL); } speed |= IXGBE_LINK_SPEED_1GB_FULL; } if (advertise & 0x4) { if (!(link_caps & IXGBE_LINK_SPEED_10GB_FULL)) { device_printf(dev, "Interface does not support 10Gb advertised speed\n"); return (EINVAL); } speed |= IXGBE_LINK_SPEED_10GB_FULL; } if (advertise & 0x8) { if (!(link_caps & IXGBE_LINK_SPEED_10_FULL)) { device_printf(dev, "Interface does not support 10Mb advertised speed\n"); return (EINVAL); } speed |= IXGBE_LINK_SPEED_10_FULL; } hw->mac.autotry_restart = TRUE; hw->mac.ops.setup_link(hw, speed, TRUE); adapter->advertise = advertise; return (0); } /* ixgbe_set_advertise */ /************************************************************************ * ixgbe_get_advertise - Get current advertised speed settings * * Formatted for sysctl usage. * Flags: * 0x1 - advertise 100 Mb * 0x2 - advertise 1G * 0x4 - advertise 10G * 0x8 - advertise 10 Mb (yes, Mb) ************************************************************************/ static int ixgbe_get_advertise(struct adapter *adapter) { struct ixgbe_hw *hw = &adapter->hw; int speed; ixgbe_link_speed link_caps = 0; s32 err; bool negotiate = FALSE; /* * Advertised speed means nothing unless it's copper or * multi-speed fiber */ if (!(hw->phy.media_type == ixgbe_media_type_copper) && !(hw->phy.multispeed_fiber)) return (0); err = hw->mac.ops.get_link_capabilities(hw, &link_caps, &negotiate); if (err != IXGBE_SUCCESS) return (0); speed = ((link_caps & IXGBE_LINK_SPEED_10GB_FULL) ? 4 : 0) | ((link_caps & IXGBE_LINK_SPEED_1GB_FULL) ? 2 : 0) | ((link_caps & IXGBE_LINK_SPEED_100_FULL) ? 1 : 0) | ((link_caps & IXGBE_LINK_SPEED_10_FULL) ? 8 : 0); return speed; } /* ixgbe_get_advertise */ /************************************************************************ * ixgbe_sysctl_dmac - Manage DMA Coalescing * * Control values: * 0/1 - off / on (use default value of 1000) * * Legal timer values are: * 50,100,250,500,1000,2000,5000,10000 * * Turning off interrupt moderation will also turn this off. ************************************************************************/ static int ixgbe_sysctl_dmac(SYSCTL_HANDLER_ARGS) { struct adapter *adapter = (struct adapter *)arg1; struct ifnet *ifp = iflib_get_ifp(adapter->ctx); int error; u16 newval; newval = adapter->dmac; error = sysctl_handle_16(oidp, &newval, 0, req); if ((error) || (req->newptr == NULL)) return (error); switch (newval) { case 0: /* Disabled */ adapter->dmac = 0; break; case 1: /* Enable and use default */ adapter->dmac = 1000; break; case 50: case 100: case 250: case 500: case 1000: case 2000: case 5000: case 10000: /* Legal values - allow */ adapter->dmac = newval; break; default: /* Do nothing, illegal value */ return (EINVAL); } /* Re-initialize hardware if it's already running */ if (ifp->if_drv_flags & IFF_DRV_RUNNING) ifp->if_init(ifp); return (0); } /* ixgbe_sysctl_dmac */ #ifdef IXGBE_DEBUG /************************************************************************ * ixgbe_sysctl_power_state * * Sysctl to test power states * Values: * 0 - set device to D0 * 3 - set device to D3 * (none) - get current device power state ************************************************************************/ static int ixgbe_sysctl_power_state(SYSCTL_HANDLER_ARGS) { struct adapter *adapter = (struct adapter *)arg1; device_t dev = adapter->dev; int curr_ps, new_ps, error = 0; curr_ps = new_ps = pci_get_powerstate(dev); error = sysctl_handle_int(oidp, &new_ps, 0, req); if ((error) || (req->newptr == NULL)) return (error); if (new_ps == curr_ps) return (0); if (new_ps == 3 && curr_ps == 0) error = DEVICE_SUSPEND(dev); else if (new_ps == 0 && curr_ps == 3) error = DEVICE_RESUME(dev); else return (EINVAL); device_printf(dev, "New state: %d\n", pci_get_powerstate(dev)); return (error); } /* ixgbe_sysctl_power_state */ #endif /************************************************************************ * ixgbe_sysctl_wol_enable * * Sysctl to enable/disable the WoL capability, * if supported by the adapter. * * Values: * 0 - disabled * 1 - enabled ************************************************************************/ static int ixgbe_sysctl_wol_enable(SYSCTL_HANDLER_ARGS) { struct adapter *adapter = (struct adapter *)arg1; struct ixgbe_hw *hw = &adapter->hw; int new_wol_enabled; int error = 0; new_wol_enabled = hw->wol_enabled; error = sysctl_handle_int(oidp, &new_wol_enabled, 0, req); if ((error) || (req->newptr == NULL)) return (error); new_wol_enabled = !!(new_wol_enabled); if (new_wol_enabled == hw->wol_enabled) return (0); if (new_wol_enabled > 0 && !adapter->wol_support) return (ENODEV); else hw->wol_enabled = new_wol_enabled; return (0); } /* ixgbe_sysctl_wol_enable */ /************************************************************************ * ixgbe_sysctl_wufc - Wake Up Filter Control * * Sysctl to enable/disable the types of packets that the * adapter will wake up on upon receipt. * Flags: * 0x1 - Link Status Change * 0x2 - Magic Packet * 0x4 - Direct Exact * 0x8 - Directed Multicast * 0x10 - Broadcast * 0x20 - ARP/IPv4 Request Packet * 0x40 - Direct IPv4 Packet * 0x80 - Direct IPv6 Packet * * Settings not listed above will cause the sysctl to return an error. ************************************************************************/ static int ixgbe_sysctl_wufc(SYSCTL_HANDLER_ARGS) { struct adapter *adapter = (struct adapter *)arg1; int error = 0; u32 new_wufc; new_wufc = adapter->wufc; error = sysctl_handle_32(oidp, &new_wufc, 0, req); if ((error) || (req->newptr == NULL)) return (error); if (new_wufc == adapter->wufc) return (0); if (new_wufc & 0xffffff00) return (EINVAL); new_wufc &= 0xff; new_wufc |= (0xffffff & adapter->wufc); adapter->wufc = new_wufc; return (0); } /* ixgbe_sysctl_wufc */ #ifdef IXGBE_DEBUG /************************************************************************ * ixgbe_sysctl_print_rss_config ************************************************************************/ static int ixgbe_sysctl_print_rss_config(SYSCTL_HANDLER_ARGS) { struct adapter *adapter = (struct adapter *)arg1; struct ixgbe_hw *hw = &adapter->hw; device_t dev = adapter->dev; struct sbuf *buf; int error = 0, reta_size; u32 reg; buf = sbuf_new_for_sysctl(NULL, NULL, 128, req); if (!buf) { device_printf(dev, "Could not allocate sbuf for output.\n"); return (ENOMEM); } // TODO: use sbufs to make a string to print out /* Set multiplier for RETA setup and table size based on MAC */ switch (adapter->hw.mac.type) { case ixgbe_mac_X550: case ixgbe_mac_X550EM_x: case ixgbe_mac_X550EM_a: reta_size = 128; break; default: reta_size = 32; break; } /* Print out the redirection table */ sbuf_cat(buf, "\n"); for (int i = 0; i < reta_size; i++) { if (i < 32) { reg = IXGBE_READ_REG(hw, IXGBE_RETA(i)); sbuf_printf(buf, "RETA(%2d): 0x%08x\n", i, reg); } else { reg = IXGBE_READ_REG(hw, IXGBE_ERETA(i - 32)); sbuf_printf(buf, "ERETA(%2d): 0x%08x\n", i - 32, reg); } } // TODO: print more config error = sbuf_finish(buf); if (error) device_printf(dev, "Error finishing sbuf: %d\n", error); sbuf_delete(buf); return (0); } /* ixgbe_sysctl_print_rss_config */ #endif /* IXGBE_DEBUG */ /************************************************************************ * ixgbe_sysctl_phy_temp - Retrieve temperature of PHY * * For X552/X557-AT devices using an external PHY ************************************************************************/ static int ixgbe_sysctl_phy_temp(SYSCTL_HANDLER_ARGS) { struct adapter *adapter = (struct adapter *)arg1; struct ixgbe_hw *hw = &adapter->hw; u16 reg; if (hw->device_id != IXGBE_DEV_ID_X550EM_X_10G_T) { device_printf(iflib_get_dev(adapter->ctx), "Device has no supported external thermal sensor.\n"); return (ENODEV); } if (hw->phy.ops.read_reg(hw, IXGBE_PHY_CURRENT_TEMP, IXGBE_MDIO_VENDOR_SPECIFIC_1_DEV_TYPE, ®)) { device_printf(iflib_get_dev(adapter->ctx), "Error reading from PHY's current temperature register\n"); return (EAGAIN); } /* Shift temp for output */ reg = reg >> 8; return (sysctl_handle_16(oidp, NULL, reg, req)); } /* ixgbe_sysctl_phy_temp */ /************************************************************************ * ixgbe_sysctl_phy_overtemp_occurred * * Reports (directly from the PHY) whether the current PHY * temperature is over the overtemp threshold. ************************************************************************/ static int ixgbe_sysctl_phy_overtemp_occurred(SYSCTL_HANDLER_ARGS) { struct adapter *adapter = (struct adapter *)arg1; struct ixgbe_hw *hw = &adapter->hw; u16 reg; if (hw->device_id != IXGBE_DEV_ID_X550EM_X_10G_T) { device_printf(iflib_get_dev(adapter->ctx), "Device has no supported external thermal sensor.\n"); return (ENODEV); } if (hw->phy.ops.read_reg(hw, IXGBE_PHY_OVERTEMP_STATUS, IXGBE_MDIO_VENDOR_SPECIFIC_1_DEV_TYPE, ®)) { device_printf(iflib_get_dev(adapter->ctx), "Error reading from PHY's temperature status register\n"); return (EAGAIN); } /* Get occurrence bit */ reg = !!(reg & 0x4000); return (sysctl_handle_16(oidp, 0, reg, req)); } /* ixgbe_sysctl_phy_overtemp_occurred */ /************************************************************************ * ixgbe_sysctl_eee_state * * Sysctl to set EEE power saving feature * Values: * 0 - disable EEE * 1 - enable EEE * (none) - get current device EEE state ************************************************************************/ static int ixgbe_sysctl_eee_state(SYSCTL_HANDLER_ARGS) { struct adapter *adapter = (struct adapter *)arg1; device_t dev = adapter->dev; struct ifnet *ifp = iflib_get_ifp(adapter->ctx); int curr_eee, new_eee, error = 0; s32 retval; curr_eee = new_eee = !!(adapter->feat_en & IXGBE_FEATURE_EEE); error = sysctl_handle_int(oidp, &new_eee, 0, req); if ((error) || (req->newptr == NULL)) return (error); /* Nothing to do */ if (new_eee == curr_eee) return (0); /* Not supported */ if (!(adapter->feat_cap & IXGBE_FEATURE_EEE)) return (EINVAL); /* Bounds checking */ if ((new_eee < 0) || (new_eee > 1)) return (EINVAL); retval = ixgbe_setup_eee(&adapter->hw, new_eee); if (retval) { device_printf(dev, "Error in EEE setup: 0x%08X\n", retval); return (EINVAL); } /* Restart auto-neg */ ifp->if_init(ifp); device_printf(dev, "New EEE state: %d\n", new_eee); /* Cache new value */ if (new_eee) adapter->feat_en |= IXGBE_FEATURE_EEE; else adapter->feat_en &= ~IXGBE_FEATURE_EEE; return (error); } /* ixgbe_sysctl_eee_state */ /************************************************************************ * ixgbe_init_device_features ************************************************************************/ static void ixgbe_init_device_features(struct adapter *adapter) { adapter->feat_cap = IXGBE_FEATURE_NETMAP | IXGBE_FEATURE_RSS | IXGBE_FEATURE_MSI | IXGBE_FEATURE_MSIX | IXGBE_FEATURE_LEGACY_IRQ; /* Set capabilities first... */ switch (adapter->hw.mac.type) { case ixgbe_mac_82598EB: if (adapter->hw.device_id == IXGBE_DEV_ID_82598AT) adapter->feat_cap |= IXGBE_FEATURE_FAN_FAIL; break; case ixgbe_mac_X540: adapter->feat_cap |= IXGBE_FEATURE_SRIOV; adapter->feat_cap |= IXGBE_FEATURE_FDIR; if ((adapter->hw.device_id == IXGBE_DEV_ID_X540_BYPASS) && (adapter->hw.bus.func == 0)) adapter->feat_cap |= IXGBE_FEATURE_BYPASS; break; case ixgbe_mac_X550: adapter->feat_cap |= IXGBE_FEATURE_TEMP_SENSOR; adapter->feat_cap |= IXGBE_FEATURE_SRIOV; adapter->feat_cap |= IXGBE_FEATURE_FDIR; break; case ixgbe_mac_X550EM_x: adapter->feat_cap |= IXGBE_FEATURE_SRIOV; adapter->feat_cap |= IXGBE_FEATURE_FDIR; break; case ixgbe_mac_X550EM_a: adapter->feat_cap |= IXGBE_FEATURE_SRIOV; adapter->feat_cap |= IXGBE_FEATURE_FDIR; adapter->feat_cap &= ~IXGBE_FEATURE_LEGACY_IRQ; if ((adapter->hw.device_id == IXGBE_DEV_ID_X550EM_A_1G_T) || (adapter->hw.device_id == IXGBE_DEV_ID_X550EM_A_1G_T_L)) { adapter->feat_cap |= IXGBE_FEATURE_TEMP_SENSOR; adapter->feat_cap |= IXGBE_FEATURE_EEE; } break; case ixgbe_mac_82599EB: adapter->feat_cap |= IXGBE_FEATURE_SRIOV; adapter->feat_cap |= IXGBE_FEATURE_FDIR; if ((adapter->hw.device_id == IXGBE_DEV_ID_82599_BYPASS) && (adapter->hw.bus.func == 0)) adapter->feat_cap |= IXGBE_FEATURE_BYPASS; if (adapter->hw.device_id == IXGBE_DEV_ID_82599_QSFP_SF_QP) adapter->feat_cap &= ~IXGBE_FEATURE_LEGACY_IRQ; break; default: break; } /* Enabled by default... */ /* Fan failure detection */ if (adapter->feat_cap & IXGBE_FEATURE_FAN_FAIL) adapter->feat_en |= IXGBE_FEATURE_FAN_FAIL; /* Netmap */ if (adapter->feat_cap & IXGBE_FEATURE_NETMAP) adapter->feat_en |= IXGBE_FEATURE_NETMAP; /* EEE */ if (adapter->feat_cap & IXGBE_FEATURE_EEE) adapter->feat_en |= IXGBE_FEATURE_EEE; /* Thermal Sensor */ if (adapter->feat_cap & IXGBE_FEATURE_TEMP_SENSOR) adapter->feat_en |= IXGBE_FEATURE_TEMP_SENSOR; /* Enabled via global sysctl... */ /* Flow Director */ if (ixgbe_enable_fdir) { if (adapter->feat_cap & IXGBE_FEATURE_FDIR) adapter->feat_en |= IXGBE_FEATURE_FDIR; else device_printf(adapter->dev, "Device does not support Flow Director. Leaving disabled."); } /* * Message Signal Interrupts - Extended (MSI-X) * Normal MSI is only enabled if MSI-X calls fail. */ if (!ixgbe_enable_msix) adapter->feat_cap &= ~IXGBE_FEATURE_MSIX; /* Receive-Side Scaling (RSS) */ if ((adapter->feat_cap & IXGBE_FEATURE_RSS) && ixgbe_enable_rss) adapter->feat_en |= IXGBE_FEATURE_RSS; /* Disable features with unmet dependencies... */ /* No MSI-X */ if (!(adapter->feat_cap & IXGBE_FEATURE_MSIX)) { adapter->feat_cap &= ~IXGBE_FEATURE_RSS; adapter->feat_cap &= ~IXGBE_FEATURE_SRIOV; adapter->feat_en &= ~IXGBE_FEATURE_RSS; adapter->feat_en &= ~IXGBE_FEATURE_SRIOV; } } /* ixgbe_init_device_features */ /************************************************************************ * ixgbe_check_fan_failure ************************************************************************/ static void ixgbe_check_fan_failure(struct adapter *adapter, u32 reg, bool in_interrupt) { u32 mask; mask = (in_interrupt) ? IXGBE_EICR_GPI_SDP1_BY_MAC(&adapter->hw) : IXGBE_ESDP_SDP1; if (reg & mask) device_printf(adapter->dev, "\nCRITICAL: FAN FAILURE!! REPLACE IMMEDIATELY!!\n"); } /* ixgbe_check_fan_failure */ diff --git a/sys/dev/ixgbe/if_ixv.c b/sys/dev/ixgbe/if_ixv.c index 7b8c7f169a7e..6bd92d262558 100644 --- a/sys/dev/ixgbe/if_ixv.c +++ b/sys/dev/ixgbe/if_ixv.c @@ -1,1952 +1,1952 @@ /****************************************************************************** Copyright (c) 2001-2017, Intel Corporation All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the Intel Corporation nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ******************************************************************************/ /*$FreeBSD$*/ #include "opt_inet.h" #include "opt_inet6.h" #include "opt_rss.h" #include "ixgbe.h" #include "ifdi_if.h" #include #include /************************************************************************ * Driver version ************************************************************************/ char ixv_driver_version[] = "2.0.1-k"; /************************************************************************ * PCI Device ID Table * * Used by probe to select devices to load on * Last field stores an index into ixv_strings * Last entry must be all 0s * * { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index } ************************************************************************/ static pci_vendor_info_t ixv_vendor_info_array[] = { PVID(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_VF, "Intel(R) PRO/10GbE Virtual Function Network Driver"), PVID(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X540_VF, "Intel(R) PRO/10GbE Virtual Function Network Driver"), PVID(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550_VF, "Intel(R) PRO/10GbE Virtual Function Network Driver"), PVID(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550EM_X_VF, "Intel(R) PRO/10GbE Virtual Function Network Driver"), PVID(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550EM_A_VF, "Intel(R) PRO/10GbE Virtual Function Network Driver"), /* required last entry */ PVID_END }; /************************************************************************ * Function prototypes ************************************************************************/ static void *ixv_register(device_t dev); static int ixv_if_attach_pre(if_ctx_t ctx); static int ixv_if_attach_post(if_ctx_t ctx); static int ixv_if_detach(if_ctx_t ctx); static int ixv_if_rx_queue_intr_enable(if_ctx_t ctx, uint16_t qid); static int ixv_if_tx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs, int nqs, int nqsets); static int ixv_if_rx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs, int nqs, int nqsets); static void ixv_if_queues_free(if_ctx_t ctx); static void ixv_identify_hardware(if_ctx_t ctx); static void ixv_init_device_features(struct adapter *); static int ixv_allocate_pci_resources(if_ctx_t ctx); static void ixv_free_pci_resources(if_ctx_t ctx); static int ixv_setup_interface(if_ctx_t ctx); static void ixv_if_media_status(if_ctx_t , struct ifmediareq *); static int ixv_if_media_change(if_ctx_t ctx); static void ixv_if_update_admin_status(if_ctx_t ctx); static int ixv_if_msix_intr_assign(if_ctx_t ctx, int msix); static int ixv_if_mtu_set(if_ctx_t ctx, uint32_t mtu); static void ixv_if_init(if_ctx_t ctx); static void ixv_if_local_timer(if_ctx_t ctx, uint16_t qid); static void ixv_if_stop(if_ctx_t ctx); static int ixv_negotiate_api(struct adapter *); static void ixv_initialize_transmit_units(if_ctx_t ctx); static void ixv_initialize_receive_units(if_ctx_t ctx); static void ixv_initialize_rss_mapping(struct adapter *); static void ixv_setup_vlan_support(if_ctx_t ctx); static void ixv_configure_ivars(struct adapter *); static void ixv_if_enable_intr(if_ctx_t ctx); static void ixv_if_disable_intr(if_ctx_t ctx); static void ixv_if_multi_set(if_ctx_t ctx); static void ixv_if_register_vlan(if_ctx_t, u16); static void ixv_if_unregister_vlan(if_ctx_t, u16); static uint64_t ixv_if_get_counter(if_ctx_t, ift_counter); static bool ixv_if_needs_restart(if_ctx_t, enum iflib_restart_event); static void ixv_save_stats(struct adapter *); static void ixv_init_stats(struct adapter *); static void ixv_update_stats(struct adapter *); static void ixv_add_stats_sysctls(struct adapter *adapter); static int ixv_sysctl_debug(SYSCTL_HANDLER_ARGS); static void ixv_set_ivar(struct adapter *, u8, u8, s8); static u8 *ixv_mc_array_itr(struct ixgbe_hw *, u8 **, u32 *); /* The MSI-X Interrupt handlers */ static int ixv_msix_que(void *); static int ixv_msix_mbx(void *); /************************************************************************ * FreeBSD Device Interface Entry Points ************************************************************************/ static device_method_t ixv_methods[] = { /* Device interface */ DEVMETHOD(device_register, ixv_register), DEVMETHOD(device_probe, iflib_device_probe), DEVMETHOD(device_attach, iflib_device_attach), DEVMETHOD(device_detach, iflib_device_detach), DEVMETHOD(device_shutdown, iflib_device_shutdown), DEVMETHOD_END }; static driver_t ixv_driver = { "ixv", ixv_methods, sizeof(struct adapter), }; devclass_t ixv_devclass; DRIVER_MODULE(ixv, pci, ixv_driver, ixv_devclass, 0, 0); IFLIB_PNP_INFO(pci, ixv_driver, ixv_vendor_info_array); MODULE_DEPEND(ixv, iflib, 1, 1, 1); MODULE_DEPEND(ixv, pci, 1, 1, 1); MODULE_DEPEND(ixv, ether, 1, 1, 1); static device_method_t ixv_if_methods[] = { DEVMETHOD(ifdi_attach_pre, ixv_if_attach_pre), DEVMETHOD(ifdi_attach_post, ixv_if_attach_post), DEVMETHOD(ifdi_detach, ixv_if_detach), DEVMETHOD(ifdi_init, ixv_if_init), DEVMETHOD(ifdi_stop, ixv_if_stop), DEVMETHOD(ifdi_msix_intr_assign, ixv_if_msix_intr_assign), DEVMETHOD(ifdi_intr_enable, ixv_if_enable_intr), DEVMETHOD(ifdi_intr_disable, ixv_if_disable_intr), DEVMETHOD(ifdi_tx_queue_intr_enable, ixv_if_rx_queue_intr_enable), DEVMETHOD(ifdi_rx_queue_intr_enable, ixv_if_rx_queue_intr_enable), DEVMETHOD(ifdi_tx_queues_alloc, ixv_if_tx_queues_alloc), DEVMETHOD(ifdi_rx_queues_alloc, ixv_if_rx_queues_alloc), DEVMETHOD(ifdi_queues_free, ixv_if_queues_free), DEVMETHOD(ifdi_update_admin_status, ixv_if_update_admin_status), DEVMETHOD(ifdi_multi_set, ixv_if_multi_set), DEVMETHOD(ifdi_mtu_set, ixv_if_mtu_set), DEVMETHOD(ifdi_media_status, ixv_if_media_status), DEVMETHOD(ifdi_media_change, ixv_if_media_change), DEVMETHOD(ifdi_timer, ixv_if_local_timer), DEVMETHOD(ifdi_vlan_register, ixv_if_register_vlan), DEVMETHOD(ifdi_vlan_unregister, ixv_if_unregister_vlan), DEVMETHOD(ifdi_get_counter, ixv_if_get_counter), DEVMETHOD(ifdi_needs_restart, ixv_if_needs_restart), DEVMETHOD_END }; static driver_t ixv_if_driver = { "ixv_if", ixv_if_methods, sizeof(struct adapter) }; /* * TUNEABLE PARAMETERS: */ /* Flow control setting, default to full */ static int ixv_flow_control = ixgbe_fc_full; TUNABLE_INT("hw.ixv.flow_control", &ixv_flow_control); /* * Header split: this causes the hardware to DMA * the header into a separate mbuf from the payload, * it can be a performance win in some workloads, but * in others it actually hurts, its off by default. */ static int ixv_header_split = FALSE; TUNABLE_INT("hw.ixv.hdr_split", &ixv_header_split); /* * Shadow VFTA table, this is needed because * the real filter table gets cleared during * a soft reset and we need to repopulate it. */ static u32 ixv_shadow_vfta[IXGBE_VFTA_SIZE]; extern struct if_txrx ixgbe_txrx; static struct if_shared_ctx ixv_sctx_init = { .isc_magic = IFLIB_MAGIC, .isc_q_align = PAGE_SIZE,/* max(DBA_ALIGN, PAGE_SIZE) */ .isc_tx_maxsize = IXGBE_TSO_SIZE + sizeof(struct ether_vlan_header), .isc_tx_maxsegsize = PAGE_SIZE, .isc_tso_maxsize = IXGBE_TSO_SIZE + sizeof(struct ether_vlan_header), .isc_tso_maxsegsize = PAGE_SIZE, .isc_rx_maxsize = MJUM16BYTES, .isc_rx_nsegments = 1, .isc_rx_maxsegsize = MJUM16BYTES, .isc_nfl = 1, .isc_ntxqs = 1, .isc_nrxqs = 1, .isc_admin_intrcnt = 1, .isc_vendor_info = ixv_vendor_info_array, .isc_driver_version = ixv_driver_version, .isc_driver = &ixv_if_driver, .isc_flags = IFLIB_IS_VF | IFLIB_TSO_INIT_IP, .isc_nrxd_min = {MIN_RXD}, .isc_ntxd_min = {MIN_TXD}, .isc_nrxd_max = {MAX_RXD}, .isc_ntxd_max = {MAX_TXD}, .isc_nrxd_default = {DEFAULT_RXD}, .isc_ntxd_default = {DEFAULT_TXD}, }; if_shared_ctx_t ixv_sctx = &ixv_sctx_init; static void * ixv_register(device_t dev) { return (ixv_sctx); } /************************************************************************ * ixv_if_tx_queues_alloc ************************************************************************/ static int ixv_if_tx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs, int ntxqs, int ntxqsets) { struct adapter *adapter = iflib_get_softc(ctx); if_softc_ctx_t scctx = adapter->shared; struct ix_tx_queue *que; int i, j, error; MPASS(adapter->num_tx_queues == ntxqsets); MPASS(ntxqs == 1); /* Allocate queue structure memory */ adapter->tx_queues = (struct ix_tx_queue *)malloc(sizeof(struct ix_tx_queue) * ntxqsets, M_DEVBUF, M_NOWAIT | M_ZERO); if (!adapter->tx_queues) { device_printf(iflib_get_dev(ctx), "Unable to allocate TX ring memory\n"); return (ENOMEM); } for (i = 0, que = adapter->tx_queues; i < ntxqsets; i++, que++) { struct tx_ring *txr = &que->txr; txr->me = i; txr->adapter = que->adapter = adapter; /* Allocate report status array */ if (!(txr->tx_rsq = (qidx_t *)malloc(sizeof(qidx_t) * scctx->isc_ntxd[0], M_DEVBUF, M_NOWAIT | M_ZERO))) { error = ENOMEM; goto fail; } for (j = 0; j < scctx->isc_ntxd[0]; j++) txr->tx_rsq[j] = QIDX_INVALID; /* get the virtual and physical address of the hardware queues */ txr->tail = IXGBE_VFTDT(txr->me); txr->tx_base = (union ixgbe_adv_tx_desc *)vaddrs[i*ntxqs]; txr->tx_paddr = paddrs[i*ntxqs]; txr->bytes = 0; txr->total_packets = 0; } device_printf(iflib_get_dev(ctx), "allocated for %d queues\n", adapter->num_tx_queues); return (0); fail: ixv_if_queues_free(ctx); return (error); } /* ixv_if_tx_queues_alloc */ /************************************************************************ * ixv_if_rx_queues_alloc ************************************************************************/ static int ixv_if_rx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs, int nrxqs, int nrxqsets) { struct adapter *adapter = iflib_get_softc(ctx); struct ix_rx_queue *que; int i, error; MPASS(adapter->num_rx_queues == nrxqsets); MPASS(nrxqs == 1); /* Allocate queue structure memory */ adapter->rx_queues = (struct ix_rx_queue *)malloc(sizeof(struct ix_rx_queue) * nrxqsets, M_DEVBUF, M_NOWAIT | M_ZERO); if (!adapter->rx_queues) { device_printf(iflib_get_dev(ctx), "Unable to allocate TX ring memory\n"); error = ENOMEM; goto fail; } for (i = 0, que = adapter->rx_queues; i < nrxqsets; i++, que++) { struct rx_ring *rxr = &que->rxr; rxr->me = i; rxr->adapter = que->adapter = adapter; /* get the virtual and physical address of the hw queues */ rxr->tail = IXGBE_VFRDT(rxr->me); rxr->rx_base = (union ixgbe_adv_rx_desc *)vaddrs[i]; rxr->rx_paddr = paddrs[i*nrxqs]; rxr->bytes = 0; rxr->que = que; } device_printf(iflib_get_dev(ctx), "allocated for %d rx queues\n", adapter->num_rx_queues); return (0); fail: ixv_if_queues_free(ctx); return (error); } /* ixv_if_rx_queues_alloc */ /************************************************************************ * ixv_if_queues_free ************************************************************************/ static void ixv_if_queues_free(if_ctx_t ctx) { struct adapter *adapter = iflib_get_softc(ctx); struct ix_tx_queue *que = adapter->tx_queues; int i; if (que == NULL) goto free; for (i = 0; i < adapter->num_tx_queues; i++, que++) { struct tx_ring *txr = &que->txr; if (txr->tx_rsq == NULL) break; free(txr->tx_rsq, M_DEVBUF); txr->tx_rsq = NULL; } if (adapter->tx_queues != NULL) free(adapter->tx_queues, M_DEVBUF); free: if (adapter->rx_queues != NULL) free(adapter->rx_queues, M_DEVBUF); adapter->tx_queues = NULL; adapter->rx_queues = NULL; } /* ixv_if_queues_free */ /************************************************************************ * ixv_if_attach_pre - Device initialization routine * * Called when the driver is being loaded. * Identifies the type of hardware, allocates all resources * and initializes the hardware. * * return 0 on success, positive on failure ************************************************************************/ static int ixv_if_attach_pre(if_ctx_t ctx) { struct adapter *adapter; device_t dev; if_softc_ctx_t scctx; struct ixgbe_hw *hw; int error = 0; INIT_DEBUGOUT("ixv_attach: begin"); /* Allocate, clear, and link in our adapter structure */ dev = iflib_get_dev(ctx); adapter = iflib_get_softc(ctx); adapter->dev = dev; adapter->ctx = ctx; adapter->hw.back = adapter; scctx = adapter->shared = iflib_get_softc_ctx(ctx); adapter->media = iflib_get_media(ctx); hw = &adapter->hw; /* Do base PCI setup - map BAR0 */ if (ixv_allocate_pci_resources(ctx)) { device_printf(dev, "ixv_allocate_pci_resources() failed!\n"); error = ENXIO; goto err_out; } /* SYSCTL APIs */ SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "debug", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, adapter, 0, ixv_sysctl_debug, "I", "Debug Info"); /* Determine hardware revision */ ixv_identify_hardware(ctx); ixv_init_device_features(adapter); /* Initialize the shared code */ error = ixgbe_init_ops_vf(hw); if (error) { device_printf(dev, "ixgbe_init_ops_vf() failed!\n"); error = EIO; goto err_out; } /* Setup the mailbox */ ixgbe_init_mbx_params_vf(hw); error = hw->mac.ops.reset_hw(hw); if (error == IXGBE_ERR_RESET_FAILED) device_printf(dev, "...reset_hw() failure: Reset Failed!\n"); else if (error) device_printf(dev, "...reset_hw() failed with error %d\n", error); if (error) { error = EIO; goto err_out; } error = hw->mac.ops.init_hw(hw); if (error) { device_printf(dev, "...init_hw() failed with error %d\n", error); error = EIO; goto err_out; } /* Negotiate mailbox API version */ error = ixv_negotiate_api(adapter); if (error) { device_printf(dev, "Mailbox API negotiation failed during attach!\n"); goto err_out; } /* If no mac address was assigned, make a random one */ if (!ixv_check_ether_addr(hw->mac.addr)) { u8 addr[ETHER_ADDR_LEN]; arc4rand(&addr, sizeof(addr), 0); addr[0] &= 0xFE; addr[0] |= 0x02; bcopy(addr, hw->mac.addr, sizeof(addr)); bcopy(addr, hw->mac.perm_addr, sizeof(addr)); } /* Most of the iflib initialization... */ iflib_set_mac(ctx, hw->mac.addr); switch (adapter->hw.mac.type) { case ixgbe_mac_X550_vf: case ixgbe_mac_X550EM_x_vf: case ixgbe_mac_X550EM_a_vf: scctx->isc_ntxqsets_max = scctx->isc_nrxqsets_max = 2; break; default: scctx->isc_ntxqsets_max = scctx->isc_nrxqsets_max = 1; } scctx->isc_txqsizes[0] = roundup2(scctx->isc_ntxd[0] * sizeof(union ixgbe_adv_tx_desc) + sizeof(u32), DBA_ALIGN); scctx->isc_rxqsizes[0] = roundup2(scctx->isc_nrxd[0] * sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN); /* XXX */ scctx->isc_tx_csum_flags = CSUM_IP | CSUM_TCP | CSUM_UDP | CSUM_TSO | CSUM_IP6_TCP | CSUM_IP6_UDP | CSUM_IP6_TSO; scctx->isc_tx_nsegments = IXGBE_82599_SCATTER; scctx->isc_msix_bar = pci_msix_table_bar(dev); scctx->isc_tx_tso_segments_max = scctx->isc_tx_nsegments; scctx->isc_tx_tso_size_max = IXGBE_TSO_SIZE; scctx->isc_tx_tso_segsize_max = PAGE_SIZE; scctx->isc_txrx = &ixgbe_txrx; /* * Tell the upper layer(s) we support everything the PF * driver does except... * Wake-on-LAN */ scctx->isc_capabilities = IXGBE_CAPS; scctx->isc_capabilities ^= IFCAP_WOL; scctx->isc_capenable = scctx->isc_capabilities; INIT_DEBUGOUT("ixv_if_attach_pre: end"); return (0); err_out: ixv_free_pci_resources(ctx); return (error); } /* ixv_if_attach_pre */ static int ixv_if_attach_post(if_ctx_t ctx) { struct adapter *adapter = iflib_get_softc(ctx); device_t dev = iflib_get_dev(ctx); int error = 0; /* Setup OS specific network interface */ error = ixv_setup_interface(ctx); if (error) { device_printf(dev, "Interface setup failed: %d\n", error); goto end; } /* Do the stats setup */ ixv_save_stats(adapter); ixv_init_stats(adapter); ixv_add_stats_sysctls(adapter); end: return error; } /* ixv_if_attach_post */ /************************************************************************ * ixv_detach - Device removal routine * * Called when the driver is being removed. * Stops the adapter and deallocates all the resources * that were allocated for driver operation. * * return 0 on success, positive on failure ************************************************************************/ static int ixv_if_detach(if_ctx_t ctx) { INIT_DEBUGOUT("ixv_detach: begin"); ixv_free_pci_resources(ctx); return (0); } /* ixv_if_detach */ /************************************************************************ * ixv_if_mtu_set ************************************************************************/ static int ixv_if_mtu_set(if_ctx_t ctx, uint32_t mtu) { struct adapter *adapter = iflib_get_softc(ctx); struct ifnet *ifp = iflib_get_ifp(ctx); int error = 0; IOCTL_DEBUGOUT("ioctl: SIOCSIFMTU (Set Interface MTU)"); if (mtu > IXGBE_MAX_FRAME_SIZE - IXGBE_MTU_HDR) { error = EINVAL; } else { ifp->if_mtu = mtu; adapter->max_frame_size = ifp->if_mtu + IXGBE_MTU_HDR; } return error; } /* ixv_if_mtu_set */ /************************************************************************ * ixv_if_init - Init entry point * * Used in two ways: It is used by the stack as an init entry * point in network interface structure. It is also used * by the driver as a hw/sw initialization routine to get * to a consistent state. * * return 0 on success, positive on failure ************************************************************************/ static void ixv_if_init(if_ctx_t ctx) { struct adapter *adapter = iflib_get_softc(ctx); struct ifnet *ifp = iflib_get_ifp(ctx); device_t dev = iflib_get_dev(ctx); struct ixgbe_hw *hw = &adapter->hw; int error = 0; INIT_DEBUGOUT("ixv_if_init: begin"); hw->adapter_stopped = FALSE; hw->mac.ops.stop_adapter(hw); /* reprogram the RAR[0] in case user changed it. */ hw->mac.ops.set_rar(hw, 0, hw->mac.addr, 0, IXGBE_RAH_AV); /* Get the latest mac address, User can use a LAA */ bcopy(IF_LLADDR(ifp), hw->mac.addr, IXGBE_ETH_LENGTH_OF_ADDRESS); hw->mac.ops.set_rar(hw, 0, hw->mac.addr, 0, 1); /* Reset VF and renegotiate mailbox API version */ hw->mac.ops.reset_hw(hw); hw->mac.ops.start_hw(hw); error = ixv_negotiate_api(adapter); if (error) { device_printf(dev, "Mailbox API negotiation failed in if_init!\n"); return; } ixv_initialize_transmit_units(ctx); /* Setup Multicast table */ ixv_if_multi_set(ctx); adapter->rx_mbuf_sz = iflib_get_rx_mbuf_sz(ctx); /* Configure RX settings */ ixv_initialize_receive_units(ctx); /* Set up VLAN offload and filter */ ixv_setup_vlan_support(ctx); /* Set up MSI-X routing */ ixv_configure_ivars(adapter); /* Set up auto-mask */ IXGBE_WRITE_REG(hw, IXGBE_VTEIAM, IXGBE_EICS_RTX_QUEUE); /* Set moderation on the Link interrupt */ IXGBE_WRITE_REG(hw, IXGBE_VTEITR(adapter->vector), IXGBE_LINK_ITR); /* Stats init */ ixv_init_stats(adapter); /* Config/Enable Link */ hw->mac.ops.check_link(hw, &adapter->link_speed, &adapter->link_up, FALSE); /* And now turn on interrupts */ ixv_if_enable_intr(ctx); return; } /* ixv_if_init */ /************************************************************************ * ixv_enable_queue ************************************************************************/ static inline void ixv_enable_queue(struct adapter *adapter, u32 vector) { struct ixgbe_hw *hw = &adapter->hw; u32 queue = 1 << vector; u32 mask; mask = (IXGBE_EIMS_RTX_QUEUE & queue); IXGBE_WRITE_REG(hw, IXGBE_VTEIMS, mask); } /* ixv_enable_queue */ /************************************************************************ * ixv_disable_queue ************************************************************************/ static inline void ixv_disable_queue(struct adapter *adapter, u32 vector) { struct ixgbe_hw *hw = &adapter->hw; u64 queue = (u64)(1 << vector); u32 mask; mask = (IXGBE_EIMS_RTX_QUEUE & queue); IXGBE_WRITE_REG(hw, IXGBE_VTEIMC, mask); } /* ixv_disable_queue */ /************************************************************************ * ixv_msix_que - MSI-X Queue Interrupt Service routine ************************************************************************/ static int ixv_msix_que(void *arg) { struct ix_rx_queue *que = arg; struct adapter *adapter = que->adapter; ixv_disable_queue(adapter, que->msix); ++que->irqs; return (FILTER_SCHEDULE_THREAD); } /* ixv_msix_que */ /************************************************************************ * ixv_msix_mbx ************************************************************************/ static int ixv_msix_mbx(void *arg) { struct adapter *adapter = arg; struct ixgbe_hw *hw = &adapter->hw; u32 reg; ++adapter->link_irq; /* First get the cause */ reg = IXGBE_READ_REG(hw, IXGBE_VTEICS); /* Clear interrupt with write */ IXGBE_WRITE_REG(hw, IXGBE_VTEICR, reg); /* Link status change */ if (reg & IXGBE_EICR_LSC) iflib_admin_intr_deferred(adapter->ctx); IXGBE_WRITE_REG(hw, IXGBE_VTEIMS, IXGBE_EIMS_OTHER); return (FILTER_HANDLED); } /* ixv_msix_mbx */ /************************************************************************ * ixv_media_status - Media Ioctl callback * * Called whenever the user queries the status of * the interface using ifconfig. ************************************************************************/ static void ixv_if_media_status(if_ctx_t ctx, struct ifmediareq * ifmr) { struct adapter *adapter = iflib_get_softc(ctx); INIT_DEBUGOUT("ixv_media_status: begin"); iflib_admin_intr_deferred(ctx); ifmr->ifm_status = IFM_AVALID; ifmr->ifm_active = IFM_ETHER; if (!adapter->link_active) return; ifmr->ifm_status |= IFM_ACTIVE; switch (adapter->link_speed) { case IXGBE_LINK_SPEED_1GB_FULL: ifmr->ifm_active |= IFM_1000_T | IFM_FDX; break; case IXGBE_LINK_SPEED_10GB_FULL: ifmr->ifm_active |= IFM_10G_T | IFM_FDX; break; case IXGBE_LINK_SPEED_100_FULL: ifmr->ifm_active |= IFM_100_TX | IFM_FDX; break; case IXGBE_LINK_SPEED_10_FULL: ifmr->ifm_active |= IFM_10_T | IFM_FDX; break; } } /* ixv_if_media_status */ /************************************************************************ * ixv_if_media_change - Media Ioctl callback * * Called when the user changes speed/duplex using * media/mediopt option with ifconfig. ************************************************************************/ static int ixv_if_media_change(if_ctx_t ctx) { struct adapter *adapter = iflib_get_softc(ctx); struct ifmedia *ifm = iflib_get_media(ctx); INIT_DEBUGOUT("ixv_media_change: begin"); if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER) return (EINVAL); switch (IFM_SUBTYPE(ifm->ifm_media)) { case IFM_AUTO: break; default: device_printf(adapter->dev, "Only auto media type\n"); return (EINVAL); } return (0); } /* ixv_if_media_change */ /************************************************************************ * ixv_negotiate_api * * Negotiate the Mailbox API with the PF; * start with the most featured API first. ************************************************************************/ static int ixv_negotiate_api(struct adapter *adapter) { struct ixgbe_hw *hw = &adapter->hw; int mbx_api[] = { ixgbe_mbox_api_11, ixgbe_mbox_api_10, ixgbe_mbox_api_unknown }; int i = 0; while (mbx_api[i] != ixgbe_mbox_api_unknown) { if (ixgbevf_negotiate_api_version(hw, mbx_api[i]) == 0) return (0); i++; } return (EINVAL); } /* ixv_negotiate_api */ /************************************************************************ * ixv_if_multi_set - Multicast Update * * Called whenever multicast address list is updated. ************************************************************************/ static void ixv_if_multi_set(if_ctx_t ctx) { u8 mta[MAX_NUM_MULTICAST_ADDRESSES * IXGBE_ETH_LENGTH_OF_ADDRESS]; struct adapter *adapter = iflib_get_softc(ctx); u8 *update_ptr; struct ifmultiaddr *ifma; if_t ifp = iflib_get_ifp(ctx); int mcnt = 0; IOCTL_DEBUGOUT("ixv_if_multi_set: begin"); CK_STAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { if (ifma->ifma_addr->sa_family != AF_LINK) continue; bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr), &mta[mcnt * IXGBE_ETH_LENGTH_OF_ADDRESS], IXGBE_ETH_LENGTH_OF_ADDRESS); mcnt++; } update_ptr = mta; adapter->hw.mac.ops.update_mc_addr_list(&adapter->hw, update_ptr, mcnt, ixv_mc_array_itr, TRUE); } /* ixv_if_multi_set */ /************************************************************************ * ixv_mc_array_itr * * An iterator function needed by the multicast shared code. * It feeds the shared code routine the addresses in the * array of ixv_set_multi() one by one. ************************************************************************/ static u8 * ixv_mc_array_itr(struct ixgbe_hw *hw, u8 **update_ptr, u32 *vmdq) { u8 *addr = *update_ptr; u8 *newptr; *vmdq = 0; newptr = addr + IXGBE_ETH_LENGTH_OF_ADDRESS; *update_ptr = newptr; return addr; } /* ixv_mc_array_itr */ /************************************************************************ * ixv_if_local_timer - Timer routine * * Checks for link status, updates statistics, * and runs the watchdog check. ************************************************************************/ static void ixv_if_local_timer(if_ctx_t ctx, uint16_t qid) { if (qid != 0) return; /* Fire off the adminq task */ iflib_admin_intr_deferred(ctx); } /* ixv_if_local_timer */ /************************************************************************ * ixv_if_update_admin_status - Update OS on link state * * Note: Only updates the OS on the cached link state. * The real check of the hardware only happens with * a link interrupt. ************************************************************************/ static void ixv_if_update_admin_status(if_ctx_t ctx) { struct adapter *adapter = iflib_get_softc(ctx); device_t dev = iflib_get_dev(ctx); s32 status; adapter->hw.mac.get_link_status = TRUE; status = ixgbe_check_link(&adapter->hw, &adapter->link_speed, &adapter->link_up, FALSE); if (status != IXGBE_SUCCESS && adapter->hw.adapter_stopped == FALSE) { /* Mailbox's Clear To Send status is lost or timeout occurred. * We need reinitialization. */ iflib_get_ifp(ctx)->if_init(ctx); } if (adapter->link_up) { if (adapter->link_active == FALSE) { if (bootverbose) device_printf(dev, "Link is up %d Gbps %s \n", ((adapter->link_speed == 128) ? 10 : 1), "Full Duplex"); adapter->link_active = TRUE; iflib_link_state_change(ctx, LINK_STATE_UP, IF_Gbps(10)); } } else { /* Link down */ if (adapter->link_active == TRUE) { if (bootverbose) device_printf(dev, "Link is Down\n"); iflib_link_state_change(ctx, LINK_STATE_DOWN, 0); adapter->link_active = FALSE; } } /* Stats Update */ ixv_update_stats(adapter); } /* ixv_if_update_admin_status */ /************************************************************************ * ixv_if_stop - Stop the hardware * * Disables all traffic on the adapter by issuing a * global reset on the MAC and deallocates TX/RX buffers. ************************************************************************/ static void ixv_if_stop(if_ctx_t ctx) { struct adapter *adapter = iflib_get_softc(ctx); struct ixgbe_hw *hw = &adapter->hw; INIT_DEBUGOUT("ixv_stop: begin\n"); ixv_if_disable_intr(ctx); hw->mac.ops.reset_hw(hw); adapter->hw.adapter_stopped = FALSE; hw->mac.ops.stop_adapter(hw); /* Update the stack */ adapter->link_up = FALSE; ixv_if_update_admin_status(ctx); /* reprogram the RAR[0] in case user changed it. */ hw->mac.ops.set_rar(hw, 0, hw->mac.addr, 0, IXGBE_RAH_AV); } /* ixv_if_stop */ /************************************************************************ * ixv_identify_hardware - Determine hardware revision. ************************************************************************/ static void ixv_identify_hardware(if_ctx_t ctx) { struct adapter *adapter = iflib_get_softc(ctx); device_t dev = iflib_get_dev(ctx); struct ixgbe_hw *hw = &adapter->hw; /* Save off the information about this board */ hw->vendor_id = pci_get_vendor(dev); hw->device_id = pci_get_device(dev); hw->revision_id = pci_get_revid(dev); hw->subsystem_vendor_id = pci_get_subvendor(dev); hw->subsystem_device_id = pci_get_subdevice(dev); /* A subset of set_mac_type */ switch (hw->device_id) { case IXGBE_DEV_ID_82599_VF: hw->mac.type = ixgbe_mac_82599_vf; break; case IXGBE_DEV_ID_X540_VF: hw->mac.type = ixgbe_mac_X540_vf; break; case IXGBE_DEV_ID_X550_VF: hw->mac.type = ixgbe_mac_X550_vf; break; case IXGBE_DEV_ID_X550EM_X_VF: hw->mac.type = ixgbe_mac_X550EM_x_vf; break; case IXGBE_DEV_ID_X550EM_A_VF: hw->mac.type = ixgbe_mac_X550EM_a_vf; break; default: device_printf(dev, "unknown mac type\n"); hw->mac.type = ixgbe_mac_unknown; break; } } /* ixv_identify_hardware */ /************************************************************************ * ixv_if_msix_intr_assign - Setup MSI-X Interrupt resources and handlers ************************************************************************/ static int ixv_if_msix_intr_assign(if_ctx_t ctx, int msix) { struct adapter *adapter = iflib_get_softc(ctx); device_t dev = iflib_get_dev(ctx); struct ix_rx_queue *rx_que = adapter->rx_queues; struct ix_tx_queue *tx_que; int error, rid, vector = 0; char buf[16]; for (int i = 0; i < adapter->num_rx_queues; i++, vector++, rx_que++) { rid = vector + 1; snprintf(buf, sizeof(buf), "rxq%d", i); error = iflib_irq_alloc_generic(ctx, &rx_que->que_irq, rid, - IFLIB_INTR_RX, ixv_msix_que, rx_que, rx_que->rxr.me, buf); + IFLIB_INTR_RXTX, ixv_msix_que, rx_que, rx_que->rxr.me, buf); if (error) { device_printf(iflib_get_dev(ctx), "Failed to allocate que int %d err: %d", i, error); adapter->num_rx_queues = i + 1; goto fail; } rx_que->msix = vector; } for (int i = 0; i < adapter->num_tx_queues; i++) { snprintf(buf, sizeof(buf), "txq%d", i); tx_que = &adapter->tx_queues[i]; tx_que->msix = i % adapter->num_rx_queues; iflib_softirq_alloc_generic(ctx, &adapter->rx_queues[tx_que->msix].que_irq, IFLIB_INTR_TX, tx_que, tx_que->txr.me, buf); } rid = vector + 1; error = iflib_irq_alloc_generic(ctx, &adapter->irq, rid, IFLIB_INTR_ADMIN, ixv_msix_mbx, adapter, 0, "aq"); if (error) { device_printf(iflib_get_dev(ctx), "Failed to register admin handler"); return (error); } adapter->vector = vector; /* * Due to a broken design QEMU will fail to properly * enable the guest for MSIX unless the vectors in * the table are all set up, so we must rewrite the * ENABLE in the MSIX control register again at this * point to cause it to successfully initialize us. */ if (adapter->hw.mac.type == ixgbe_mac_82599_vf) { int msix_ctrl; pci_find_cap(dev, PCIY_MSIX, &rid); rid += PCIR_MSIX_CTRL; msix_ctrl = pci_read_config(dev, rid, 2); msix_ctrl |= PCIM_MSIXCTRL_MSIX_ENABLE; pci_write_config(dev, rid, msix_ctrl, 2); } return (0); fail: iflib_irq_free(ctx, &adapter->irq); rx_que = adapter->rx_queues; for (int i = 0; i < adapter->num_rx_queues; i++, rx_que++) iflib_irq_free(ctx, &rx_que->que_irq); return (error); } /* ixv_if_msix_intr_assign */ /************************************************************************ * ixv_allocate_pci_resources ************************************************************************/ static int ixv_allocate_pci_resources(if_ctx_t ctx) { struct adapter *adapter = iflib_get_softc(ctx); device_t dev = iflib_get_dev(ctx); int rid; rid = PCIR_BAR(0); adapter->pci_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, RF_ACTIVE); if (!(adapter->pci_mem)) { device_printf(dev, "Unable to allocate bus resource: memory\n"); return (ENXIO); } adapter->osdep.mem_bus_space_tag = rman_get_bustag(adapter->pci_mem); adapter->osdep.mem_bus_space_handle = rman_get_bushandle(adapter->pci_mem); adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle; return (0); } /* ixv_allocate_pci_resources */ /************************************************************************ * ixv_free_pci_resources ************************************************************************/ static void ixv_free_pci_resources(if_ctx_t ctx) { struct adapter *adapter = iflib_get_softc(ctx); struct ix_rx_queue *que = adapter->rx_queues; device_t dev = iflib_get_dev(ctx); /* Release all MSI-X queue resources */ if (adapter->intr_type == IFLIB_INTR_MSIX) iflib_irq_free(ctx, &adapter->irq); if (que != NULL) { for (int i = 0; i < adapter->num_rx_queues; i++, que++) { iflib_irq_free(ctx, &que->que_irq); } } if (adapter->pci_mem != NULL) bus_release_resource(dev, SYS_RES_MEMORY, rman_get_rid(adapter->pci_mem), adapter->pci_mem); } /* ixv_free_pci_resources */ /************************************************************************ * ixv_setup_interface * * Setup networking device structure and register an interface. ************************************************************************/ static int ixv_setup_interface(if_ctx_t ctx) { struct adapter *adapter = iflib_get_softc(ctx); if_softc_ctx_t scctx = adapter->shared; struct ifnet *ifp = iflib_get_ifp(ctx); INIT_DEBUGOUT("ixv_setup_interface: begin"); if_setbaudrate(ifp, IF_Gbps(10)); ifp->if_snd.ifq_maxlen = scctx->isc_ntxd[0] - 2; adapter->max_frame_size = ifp->if_mtu + IXGBE_MTU_HDR; ifmedia_add(adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL); ifmedia_set(adapter->media, IFM_ETHER | IFM_AUTO); return 0; } /* ixv_setup_interface */ /************************************************************************ * ixv_if_get_counter ************************************************************************/ static uint64_t ixv_if_get_counter(if_ctx_t ctx, ift_counter cnt) { struct adapter *adapter = iflib_get_softc(ctx); if_t ifp = iflib_get_ifp(ctx); switch (cnt) { case IFCOUNTER_IPACKETS: return (adapter->ipackets); case IFCOUNTER_OPACKETS: return (adapter->opackets); case IFCOUNTER_IBYTES: return (adapter->ibytes); case IFCOUNTER_OBYTES: return (adapter->obytes); case IFCOUNTER_IMCASTS: return (adapter->imcasts); default: return (if_get_counter_default(ifp, cnt)); } } /* ixv_if_get_counter */ /* ixv_if_needs_restart - Tell iflib when the driver needs to be reinitialized * @ctx: iflib context * @event: event code to check * * Defaults to returning true for every event. * * @returns true if iflib needs to reinit the interface */ static bool ixv_if_needs_restart(if_ctx_t ctx __unused, enum iflib_restart_event event) { switch (event) { case IFLIB_RESTART_VLAN_CONFIG: /* XXX: This may not need to return true */ default: return (true); } } /************************************************************************ * ixv_initialize_transmit_units - Enable transmit unit. ************************************************************************/ static void ixv_initialize_transmit_units(if_ctx_t ctx) { struct adapter *adapter = iflib_get_softc(ctx); struct ixgbe_hw *hw = &adapter->hw; if_softc_ctx_t scctx = adapter->shared; struct ix_tx_queue *que = adapter->tx_queues; int i; for (i = 0; i < adapter->num_tx_queues; i++, que++) { struct tx_ring *txr = &que->txr; u64 tdba = txr->tx_paddr; u32 txctrl, txdctl; int j = txr->me; /* Set WTHRESH to 8, burst writeback */ txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(j)); txdctl |= (8 << 16); IXGBE_WRITE_REG(hw, IXGBE_VFTXDCTL(j), txdctl); /* Set the HW Tx Head and Tail indices */ IXGBE_WRITE_REG(&adapter->hw, IXGBE_VFTDH(j), 0); IXGBE_WRITE_REG(&adapter->hw, IXGBE_VFTDT(j), 0); /* Set Tx Tail register */ txr->tail = IXGBE_VFTDT(j); txr->tx_rs_cidx = txr->tx_rs_pidx; /* Initialize the last processed descriptor to be the end of * the ring, rather than the start, so that we avoid an * off-by-one error when calculating how many descriptors are * done in the credits_update function. */ txr->tx_cidx_processed = scctx->isc_ntxd[0] - 1; for (int k = 0; k < scctx->isc_ntxd[0]; k++) txr->tx_rsq[k] = QIDX_INVALID; /* Set Ring parameters */ IXGBE_WRITE_REG(hw, IXGBE_VFTDBAL(j), (tdba & 0x00000000ffffffffULL)); IXGBE_WRITE_REG(hw, IXGBE_VFTDBAH(j), (tdba >> 32)); IXGBE_WRITE_REG(hw, IXGBE_VFTDLEN(j), scctx->isc_ntxd[0] * sizeof(struct ixgbe_legacy_tx_desc)); txctrl = IXGBE_READ_REG(hw, IXGBE_VFDCA_TXCTRL(j)); txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN; IXGBE_WRITE_REG(hw, IXGBE_VFDCA_TXCTRL(j), txctrl); /* Now enable */ txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(j)); txdctl |= IXGBE_TXDCTL_ENABLE; IXGBE_WRITE_REG(hw, IXGBE_VFTXDCTL(j), txdctl); } return; } /* ixv_initialize_transmit_units */ /************************************************************************ * ixv_initialize_rss_mapping ************************************************************************/ static void ixv_initialize_rss_mapping(struct adapter *adapter) { struct ixgbe_hw *hw = &adapter->hw; u32 reta = 0, mrqc, rss_key[10]; int queue_id; int i, j; u32 rss_hash_config; if (adapter->feat_en & IXGBE_FEATURE_RSS) { /* Fetch the configured RSS key */ rss_getkey((uint8_t *)&rss_key); } else { /* set up random bits */ arc4rand(&rss_key, sizeof(rss_key), 0); } /* Now fill out hash function seeds */ for (i = 0; i < 10; i++) IXGBE_WRITE_REG(hw, IXGBE_VFRSSRK(i), rss_key[i]); /* Set up the redirection table */ for (i = 0, j = 0; i < 64; i++, j++) { if (j == adapter->num_rx_queues) j = 0; if (adapter->feat_en & IXGBE_FEATURE_RSS) { /* * Fetch the RSS bucket id for the given indirection * entry. Cap it at the number of configured buckets * (which is num_rx_queues.) */ queue_id = rss_get_indirection_to_bucket(i); queue_id = queue_id % adapter->num_rx_queues; } else queue_id = j; /* * The low 8 bits are for hash value (n+0); * The next 8 bits are for hash value (n+1), etc. */ reta >>= 8; reta |= ((uint32_t)queue_id) << 24; if ((i & 3) == 3) { IXGBE_WRITE_REG(hw, IXGBE_VFRETA(i >> 2), reta); reta = 0; } } /* Perform hash on these packet types */ if (adapter->feat_en & IXGBE_FEATURE_RSS) rss_hash_config = rss_gethashconfig(); else { /* * Disable UDP - IP fragments aren't currently being handled * and so we end up with a mix of 2-tuple and 4-tuple * traffic. */ rss_hash_config = RSS_HASHTYPE_RSS_IPV4 | RSS_HASHTYPE_RSS_TCP_IPV4 | RSS_HASHTYPE_RSS_IPV6 | RSS_HASHTYPE_RSS_TCP_IPV6; } mrqc = IXGBE_MRQC_RSSEN; if (rss_hash_config & RSS_HASHTYPE_RSS_IPV4) mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4; if (rss_hash_config & RSS_HASHTYPE_RSS_TCP_IPV4) mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4_TCP; if (rss_hash_config & RSS_HASHTYPE_RSS_IPV6) mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6; if (rss_hash_config & RSS_HASHTYPE_RSS_TCP_IPV6) mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_TCP; if (rss_hash_config & RSS_HASHTYPE_RSS_IPV6_EX) device_printf(adapter->dev, "%s: RSS_HASHTYPE_RSS_IPV6_EX defined, but not supported\n", __func__); if (rss_hash_config & RSS_HASHTYPE_RSS_TCP_IPV6_EX) device_printf(adapter->dev, "%s: RSS_HASHTYPE_RSS_TCP_IPV6_EX defined, but not supported\n", __func__); if (rss_hash_config & RSS_HASHTYPE_RSS_UDP_IPV4) mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4_UDP; if (rss_hash_config & RSS_HASHTYPE_RSS_UDP_IPV6) mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_UDP; if (rss_hash_config & RSS_HASHTYPE_RSS_UDP_IPV6_EX) device_printf(adapter->dev, "%s: RSS_HASHTYPE_RSS_UDP_IPV6_EX defined, but not supported\n", __func__); IXGBE_WRITE_REG(hw, IXGBE_VFMRQC, mrqc); } /* ixv_initialize_rss_mapping */ /************************************************************************ * ixv_initialize_receive_units - Setup receive registers and features. ************************************************************************/ static void ixv_initialize_receive_units(if_ctx_t ctx) { struct adapter *adapter = iflib_get_softc(ctx); if_softc_ctx_t scctx; struct ixgbe_hw *hw = &adapter->hw; struct ifnet *ifp = iflib_get_ifp(ctx); struct ix_rx_queue *que = adapter->rx_queues; u32 bufsz, psrtype; if (ifp->if_mtu > ETHERMTU) bufsz = 4096 >> IXGBE_SRRCTL_BSIZEPKT_SHIFT; else bufsz = 2048 >> IXGBE_SRRCTL_BSIZEPKT_SHIFT; psrtype = IXGBE_PSRTYPE_TCPHDR | IXGBE_PSRTYPE_UDPHDR | IXGBE_PSRTYPE_IPV4HDR | IXGBE_PSRTYPE_IPV6HDR | IXGBE_PSRTYPE_L2HDR; if (adapter->num_rx_queues > 1) psrtype |= 1 << 29; IXGBE_WRITE_REG(hw, IXGBE_VFPSRTYPE, psrtype); /* Tell PF our max_frame size */ if (ixgbevf_rlpml_set_vf(hw, adapter->max_frame_size) != 0) { device_printf(adapter->dev, "There is a problem with the PF setup. It is likely the receive unit for this VF will not function correctly.\n"); } scctx = adapter->shared; for (int i = 0; i < adapter->num_rx_queues; i++, que++) { struct rx_ring *rxr = &que->rxr; u64 rdba = rxr->rx_paddr; u32 reg, rxdctl; int j = rxr->me; /* Disable the queue */ rxdctl = IXGBE_READ_REG(hw, IXGBE_VFRXDCTL(j)); rxdctl &= ~IXGBE_RXDCTL_ENABLE; IXGBE_WRITE_REG(hw, IXGBE_VFRXDCTL(j), rxdctl); for (int k = 0; k < 10; k++) { if (IXGBE_READ_REG(hw, IXGBE_VFRXDCTL(j)) & IXGBE_RXDCTL_ENABLE) msec_delay(1); else break; } wmb(); /* Setup the Base and Length of the Rx Descriptor Ring */ IXGBE_WRITE_REG(hw, IXGBE_VFRDBAL(j), (rdba & 0x00000000ffffffffULL)); IXGBE_WRITE_REG(hw, IXGBE_VFRDBAH(j), (rdba >> 32)); IXGBE_WRITE_REG(hw, IXGBE_VFRDLEN(j), scctx->isc_nrxd[0] * sizeof(union ixgbe_adv_rx_desc)); /* Reset the ring indices */ IXGBE_WRITE_REG(hw, IXGBE_VFRDH(rxr->me), 0); IXGBE_WRITE_REG(hw, IXGBE_VFRDT(rxr->me), 0); /* Set up the SRRCTL register */ reg = IXGBE_READ_REG(hw, IXGBE_VFSRRCTL(j)); reg &= ~IXGBE_SRRCTL_BSIZEHDR_MASK; reg &= ~IXGBE_SRRCTL_BSIZEPKT_MASK; reg |= bufsz; reg |= IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF; IXGBE_WRITE_REG(hw, IXGBE_VFSRRCTL(j), reg); /* Capture Rx Tail index */ rxr->tail = IXGBE_VFRDT(rxr->me); /* Do the queue enabling last */ rxdctl |= IXGBE_RXDCTL_ENABLE | IXGBE_RXDCTL_VME; IXGBE_WRITE_REG(hw, IXGBE_VFRXDCTL(j), rxdctl); for (int l = 0; l < 10; l++) { if (IXGBE_READ_REG(hw, IXGBE_VFRXDCTL(j)) & IXGBE_RXDCTL_ENABLE) break; msec_delay(1); } wmb(); /* Set the Tail Pointer */ #ifdef DEV_NETMAP /* * In netmap mode, we must preserve the buffers made * available to userspace before the if_init() * (this is true by default on the TX side, because * init makes all buffers available to userspace). * * netmap_reset() and the device specific routines * (e.g. ixgbe_setup_receive_rings()) map these * buffers at the end of the NIC ring, so here we * must set the RDT (tail) register to make sure * they are not overwritten. * * In this driver the NIC ring starts at RDH = 0, * RDT points to the last slot available for reception (?), * so RDT = num_rx_desc - 1 means the whole ring is available. */ if (ifp->if_capenable & IFCAP_NETMAP) { struct netmap_adapter *na = NA(ifp); struct netmap_kring *kring = na->rx_rings[j]; int t = na->num_rx_desc - 1 - nm_kr_rxspace(kring); IXGBE_WRITE_REG(hw, IXGBE_VFRDT(rxr->me), t); } else #endif /* DEV_NETMAP */ IXGBE_WRITE_REG(hw, IXGBE_VFRDT(rxr->me), scctx->isc_nrxd[0] - 1); } /* * Do not touch RSS and RETA settings for older hardware * as those are shared among PF and all VF. */ if (adapter->hw.mac.type >= ixgbe_mac_X550_vf) ixv_initialize_rss_mapping(adapter); } /* ixv_initialize_receive_units */ /************************************************************************ * ixv_setup_vlan_support ************************************************************************/ static void ixv_setup_vlan_support(if_ctx_t ctx) { struct ifnet *ifp = iflib_get_ifp(ctx); struct adapter *adapter = iflib_get_softc(ctx); struct ixgbe_hw *hw = &adapter->hw; u32 ctrl, vid, vfta, retry; /* * We get here thru if_init, meaning * a soft reset, this has already cleared * the VFTA and other state, so if there * have been no vlan's registered do nothing. */ if (adapter->num_vlans == 0) return; if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) { /* Enable the queues */ for (int i = 0; i < adapter->num_rx_queues; i++) { ctrl = IXGBE_READ_REG(hw, IXGBE_VFRXDCTL(i)); ctrl |= IXGBE_RXDCTL_VME; IXGBE_WRITE_REG(hw, IXGBE_VFRXDCTL(i), ctrl); /* * Let Rx path know that it needs to store VLAN tag * as part of extra mbuf info. */ adapter->rx_queues[i].rxr.vtag_strip = TRUE; } } /* * If filtering VLAN tags is disabled, * there is no need to fill VLAN Filter Table Array (VFTA). */ if ((ifp->if_capenable & IFCAP_VLAN_HWFILTER) == 0) return; /* * A soft reset zero's out the VFTA, so * we need to repopulate it now. */ for (int i = 0; i < IXGBE_VFTA_SIZE; i++) { if (ixv_shadow_vfta[i] == 0) continue; vfta = ixv_shadow_vfta[i]; /* * Reconstruct the vlan id's * based on the bits set in each * of the array ints. */ for (int j = 0; j < 32; j++) { retry = 0; if ((vfta & (1 << j)) == 0) continue; vid = (i * 32) + j; /* Call the shared code mailbox routine */ while (hw->mac.ops.set_vfta(hw, vid, 0, TRUE, FALSE)) { if (++retry > 5) break; } } } } /* ixv_setup_vlan_support */ /************************************************************************ * ixv_if_register_vlan * * Run via a vlan config EVENT, it enables us to use the * HW Filter table since we can get the vlan id. This just * creates the entry in the soft version of the VFTA, init * will repopulate the real table. ************************************************************************/ static void ixv_if_register_vlan(if_ctx_t ctx, u16 vtag) { struct adapter *adapter = iflib_get_softc(ctx); u16 index, bit; index = (vtag >> 5) & 0x7F; bit = vtag & 0x1F; ixv_shadow_vfta[index] |= (1 << bit); ++adapter->num_vlans; } /* ixv_if_register_vlan */ /************************************************************************ * ixv_if_unregister_vlan * * Run via a vlan unconfig EVENT, remove our entry * in the soft vfta. ************************************************************************/ static void ixv_if_unregister_vlan(if_ctx_t ctx, u16 vtag) { struct adapter *adapter = iflib_get_softc(ctx); u16 index, bit; index = (vtag >> 5) & 0x7F; bit = vtag & 0x1F; ixv_shadow_vfta[index] &= ~(1 << bit); --adapter->num_vlans; } /* ixv_if_unregister_vlan */ /************************************************************************ * ixv_if_enable_intr ************************************************************************/ static void ixv_if_enable_intr(if_ctx_t ctx) { struct adapter *adapter = iflib_get_softc(ctx); struct ixgbe_hw *hw = &adapter->hw; struct ix_rx_queue *que = adapter->rx_queues; u32 mask = (IXGBE_EIMS_ENABLE_MASK & ~IXGBE_EIMS_RTX_QUEUE); IXGBE_WRITE_REG(hw, IXGBE_VTEIMS, mask); mask = IXGBE_EIMS_ENABLE_MASK; mask &= ~(IXGBE_EIMS_OTHER | IXGBE_EIMS_LSC); IXGBE_WRITE_REG(hw, IXGBE_VTEIAC, mask); for (int i = 0; i < adapter->num_rx_queues; i++, que++) ixv_enable_queue(adapter, que->msix); IXGBE_WRITE_FLUSH(hw); } /* ixv_if_enable_intr */ /************************************************************************ * ixv_if_disable_intr ************************************************************************/ static void ixv_if_disable_intr(if_ctx_t ctx) { struct adapter *adapter = iflib_get_softc(ctx); IXGBE_WRITE_REG(&adapter->hw, IXGBE_VTEIAC, 0); IXGBE_WRITE_REG(&adapter->hw, IXGBE_VTEIMC, ~0); IXGBE_WRITE_FLUSH(&adapter->hw); } /* ixv_if_disable_intr */ /************************************************************************ * ixv_if_rx_queue_intr_enable ************************************************************************/ static int ixv_if_rx_queue_intr_enable(if_ctx_t ctx, uint16_t rxqid) { struct adapter *adapter = iflib_get_softc(ctx); struct ix_rx_queue *que = &adapter->rx_queues[rxqid]; ixv_enable_queue(adapter, que->rxr.me); return (0); } /* ixv_if_rx_queue_intr_enable */ /************************************************************************ * ixv_set_ivar * * Setup the correct IVAR register for a particular MSI-X interrupt * - entry is the register array entry * - vector is the MSI-X vector for this queue * - type is RX/TX/MISC ************************************************************************/ static void ixv_set_ivar(struct adapter *adapter, u8 entry, u8 vector, s8 type) { struct ixgbe_hw *hw = &adapter->hw; u32 ivar, index; vector |= IXGBE_IVAR_ALLOC_VAL; if (type == -1) { /* MISC IVAR */ ivar = IXGBE_READ_REG(hw, IXGBE_VTIVAR_MISC); ivar &= ~0xFF; ivar |= vector; IXGBE_WRITE_REG(hw, IXGBE_VTIVAR_MISC, ivar); } else { /* RX/TX IVARS */ index = (16 * (entry & 1)) + (8 * type); ivar = IXGBE_READ_REG(hw, IXGBE_VTIVAR(entry >> 1)); ivar &= ~(0xFF << index); ivar |= (vector << index); IXGBE_WRITE_REG(hw, IXGBE_VTIVAR(entry >> 1), ivar); } } /* ixv_set_ivar */ /************************************************************************ * ixv_configure_ivars ************************************************************************/ static void ixv_configure_ivars(struct adapter *adapter) { struct ix_rx_queue *que = adapter->rx_queues; MPASS(adapter->num_rx_queues == adapter->num_tx_queues); for (int i = 0; i < adapter->num_rx_queues; i++, que++) { /* First the RX queue entry */ ixv_set_ivar(adapter, i, que->msix, 0); /* ... and the TX */ ixv_set_ivar(adapter, i, que->msix, 1); /* Set an initial value in EITR */ IXGBE_WRITE_REG(&adapter->hw, IXGBE_VTEITR(que->msix), IXGBE_EITR_DEFAULT); } /* For the mailbox interrupt */ ixv_set_ivar(adapter, 1, adapter->vector, -1); } /* ixv_configure_ivars */ /************************************************************************ * ixv_save_stats * * The VF stats registers never have a truly virgin * starting point, so this routine tries to make an * artificial one, marking ground zero on attach as * it were. ************************************************************************/ static void ixv_save_stats(struct adapter *adapter) { if (adapter->stats.vf.vfgprc || adapter->stats.vf.vfgptc) { adapter->stats.vf.saved_reset_vfgprc += adapter->stats.vf.vfgprc - adapter->stats.vf.base_vfgprc; adapter->stats.vf.saved_reset_vfgptc += adapter->stats.vf.vfgptc - adapter->stats.vf.base_vfgptc; adapter->stats.vf.saved_reset_vfgorc += adapter->stats.vf.vfgorc - adapter->stats.vf.base_vfgorc; adapter->stats.vf.saved_reset_vfgotc += adapter->stats.vf.vfgotc - adapter->stats.vf.base_vfgotc; adapter->stats.vf.saved_reset_vfmprc += adapter->stats.vf.vfmprc - adapter->stats.vf.base_vfmprc; } } /* ixv_save_stats */ /************************************************************************ * ixv_init_stats ************************************************************************/ static void ixv_init_stats(struct adapter *adapter) { struct ixgbe_hw *hw = &adapter->hw; adapter->stats.vf.last_vfgprc = IXGBE_READ_REG(hw, IXGBE_VFGPRC); adapter->stats.vf.last_vfgorc = IXGBE_READ_REG(hw, IXGBE_VFGORC_LSB); adapter->stats.vf.last_vfgorc |= (((u64)(IXGBE_READ_REG(hw, IXGBE_VFGORC_MSB))) << 32); adapter->stats.vf.last_vfgptc = IXGBE_READ_REG(hw, IXGBE_VFGPTC); adapter->stats.vf.last_vfgotc = IXGBE_READ_REG(hw, IXGBE_VFGOTC_LSB); adapter->stats.vf.last_vfgotc |= (((u64)(IXGBE_READ_REG(hw, IXGBE_VFGOTC_MSB))) << 32); adapter->stats.vf.last_vfmprc = IXGBE_READ_REG(hw, IXGBE_VFMPRC); adapter->stats.vf.base_vfgprc = adapter->stats.vf.last_vfgprc; adapter->stats.vf.base_vfgorc = adapter->stats.vf.last_vfgorc; adapter->stats.vf.base_vfgptc = adapter->stats.vf.last_vfgptc; adapter->stats.vf.base_vfgotc = adapter->stats.vf.last_vfgotc; adapter->stats.vf.base_vfmprc = adapter->stats.vf.last_vfmprc; } /* ixv_init_stats */ #define UPDATE_STAT_32(reg, last, count) \ { \ u32 current = IXGBE_READ_REG(hw, reg); \ if (current < last) \ count += 0x100000000LL; \ last = current; \ count &= 0xFFFFFFFF00000000LL; \ count |= current; \ } #define UPDATE_STAT_36(lsb, msb, last, count) \ { \ u64 cur_lsb = IXGBE_READ_REG(hw, lsb); \ u64 cur_msb = IXGBE_READ_REG(hw, msb); \ u64 current = ((cur_msb << 32) | cur_lsb); \ if (current < last) \ count += 0x1000000000LL; \ last = current; \ count &= 0xFFFFFFF000000000LL; \ count |= current; \ } /************************************************************************ * ixv_update_stats - Update the board statistics counters. ************************************************************************/ void ixv_update_stats(struct adapter *adapter) { struct ixgbe_hw *hw = &adapter->hw; struct ixgbevf_hw_stats *stats = &adapter->stats.vf; UPDATE_STAT_32(IXGBE_VFGPRC, adapter->stats.vf.last_vfgprc, adapter->stats.vf.vfgprc); UPDATE_STAT_32(IXGBE_VFGPTC, adapter->stats.vf.last_vfgptc, adapter->stats.vf.vfgptc); UPDATE_STAT_36(IXGBE_VFGORC_LSB, IXGBE_VFGORC_MSB, adapter->stats.vf.last_vfgorc, adapter->stats.vf.vfgorc); UPDATE_STAT_36(IXGBE_VFGOTC_LSB, IXGBE_VFGOTC_MSB, adapter->stats.vf.last_vfgotc, adapter->stats.vf.vfgotc); UPDATE_STAT_32(IXGBE_VFMPRC, adapter->stats.vf.last_vfmprc, adapter->stats.vf.vfmprc); /* Fill out the OS statistics structure */ IXGBE_SET_IPACKETS(adapter, stats->vfgprc); IXGBE_SET_OPACKETS(adapter, stats->vfgptc); IXGBE_SET_IBYTES(adapter, stats->vfgorc); IXGBE_SET_OBYTES(adapter, stats->vfgotc); IXGBE_SET_IMCASTS(adapter, stats->vfmprc); } /* ixv_update_stats */ /************************************************************************ * ixv_add_stats_sysctls - Add statistic sysctls for the VF. ************************************************************************/ static void ixv_add_stats_sysctls(struct adapter *adapter) { device_t dev = adapter->dev; struct ix_tx_queue *tx_que = adapter->tx_queues; struct ix_rx_queue *rx_que = adapter->rx_queues; struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev); struct sysctl_oid *tree = device_get_sysctl_tree(dev); struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree); struct ixgbevf_hw_stats *stats = &adapter->stats.vf; struct sysctl_oid *stat_node, *queue_node; struct sysctl_oid_list *stat_list, *queue_list; #define QUEUE_NAME_LEN 32 char namebuf[QUEUE_NAME_LEN]; /* Driver Statistics */ SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_events", CTLFLAG_RD, &adapter->watchdog_events, "Watchdog timeouts"); SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "link_irq", CTLFLAG_RD, &adapter->link_irq, "Link MSI-X IRQ Handled"); for (int i = 0; i < adapter->num_tx_queues; i++, tx_que++) { struct tx_ring *txr = &tx_que->txr; snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i); queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Queue Name"); queue_list = SYSCTL_CHILDREN(queue_node); SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "tso_tx", CTLFLAG_RD, &(txr->tso_tx), "TSO Packets"); SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "tx_packets", CTLFLAG_RD, &(txr->total_packets), "TX Packets"); } for (int i = 0; i < adapter->num_rx_queues; i++, rx_que++) { struct rx_ring *rxr = &rx_que->rxr; snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i); queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Queue Name"); queue_list = SYSCTL_CHILDREN(queue_node); SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "irqs", CTLFLAG_RD, &(rx_que->irqs), "IRQs on queue"); SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "rx_packets", CTLFLAG_RD, &(rxr->rx_packets), "RX packets"); SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "rx_bytes", CTLFLAG_RD, &(rxr->rx_bytes), "RX bytes"); SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "rx_discarded", CTLFLAG_RD, &(rxr->rx_discarded), "Discarded RX packets"); } stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac", CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "VF Statistics (read from HW registers)"); stat_list = SYSCTL_CHILDREN(stat_node); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_rcvd", CTLFLAG_RD, &stats->vfgprc, "Good Packets Received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_rcvd", CTLFLAG_RD, &stats->vfgorc, "Good Octets Received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_rcvd", CTLFLAG_RD, &stats->vfmprc, "Multicast Packets Received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd", CTLFLAG_RD, &stats->vfgptc, "Good Packets Transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_txd", CTLFLAG_RD, &stats->vfgotc, "Good Octets Transmitted"); } /* ixv_add_stats_sysctls */ /************************************************************************ * ixv_print_debug_info * * Called only when em_display_debug_stats is enabled. * Provides a way to take a look at important statistics * maintained by the driver and hardware. ************************************************************************/ static void ixv_print_debug_info(struct adapter *adapter) { device_t dev = adapter->dev; struct ixgbe_hw *hw = &adapter->hw; device_printf(dev, "Error Byte Count = %u \n", IXGBE_READ_REG(hw, IXGBE_ERRBC)); device_printf(dev, "MBX IRQ Handled: %lu\n", (long)adapter->link_irq); } /* ixv_print_debug_info */ /************************************************************************ * ixv_sysctl_debug ************************************************************************/ static int ixv_sysctl_debug(SYSCTL_HANDLER_ARGS) { struct adapter *adapter; int error, result; result = -1; error = sysctl_handle_int(oidp, &result, 0, req); if (error || !req->newptr) return (error); if (result == 1) { adapter = (struct adapter *)arg1; ixv_print_debug_info(adapter); } return error; } /* ixv_sysctl_debug */ /************************************************************************ * ixv_init_device_features ************************************************************************/ static void ixv_init_device_features(struct adapter *adapter) { adapter->feat_cap = IXGBE_FEATURE_NETMAP | IXGBE_FEATURE_VF | IXGBE_FEATURE_LEGACY_TX; /* A tad short on feature flags for VFs, atm. */ switch (adapter->hw.mac.type) { case ixgbe_mac_82599_vf: break; case ixgbe_mac_X540_vf: break; case ixgbe_mac_X550_vf: case ixgbe_mac_X550EM_x_vf: case ixgbe_mac_X550EM_a_vf: adapter->feat_cap |= IXGBE_FEATURE_NEEDS_CTXD; adapter->feat_cap |= IXGBE_FEATURE_RSS; break; default: break; } /* Enabled by default... */ /* Is a virtual function (VF) */ if (adapter->feat_cap & IXGBE_FEATURE_VF) adapter->feat_en |= IXGBE_FEATURE_VF; /* Netmap */ if (adapter->feat_cap & IXGBE_FEATURE_NETMAP) adapter->feat_en |= IXGBE_FEATURE_NETMAP; /* Receive-Side Scaling (RSS) */ if (adapter->feat_cap & IXGBE_FEATURE_RSS) adapter->feat_en |= IXGBE_FEATURE_RSS; /* Needs advanced context descriptor regardless of offloads req'd */ if (adapter->feat_cap & IXGBE_FEATURE_NEEDS_CTXD) adapter->feat_en |= IXGBE_FEATURE_NEEDS_CTXD; } /* ixv_init_device_features */ diff --git a/sys/dev/ixl/if_iavf.c b/sys/dev/ixl/if_iavf.c index d85cb8ac0541..2079e2d7306b 100644 --- a/sys/dev/ixl/if_iavf.c +++ b/sys/dev/ixl/if_iavf.c @@ -1,2454 +1,2454 @@ /****************************************************************************** Copyright (c) 2013-2018, Intel Corporation All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the Intel Corporation nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ******************************************************************************/ /*$FreeBSD$*/ #include "iavf.h" /********************************************************************* * Driver version *********************************************************************/ #define IAVF_DRIVER_VERSION_MAJOR 2 #define IAVF_DRIVER_VERSION_MINOR 0 #define IAVF_DRIVER_VERSION_BUILD 0 #define IAVF_DRIVER_VERSION_STRING \ __XSTRING(IAVF_DRIVER_VERSION_MAJOR) "." \ __XSTRING(IAVF_DRIVER_VERSION_MINOR) "." \ __XSTRING(IAVF_DRIVER_VERSION_BUILD) "-k" /********************************************************************* * PCI Device ID Table * * Used by probe to select devices to load on * * ( Vendor ID, Device ID, Branding String ) *********************************************************************/ static pci_vendor_info_t iavf_vendor_info_array[] = { PVID(I40E_INTEL_VENDOR_ID, I40E_DEV_ID_VF, "Intel(R) Ethernet Virtual Function 700 Series"), PVID(I40E_INTEL_VENDOR_ID, I40E_DEV_ID_X722_VF, "Intel(R) Ethernet Virtual Function 700 Series (X722)"), PVID(I40E_INTEL_VENDOR_ID, I40E_DEV_ID_ADAPTIVE_VF, "Intel(R) Ethernet Adaptive Virtual Function"), /* required last entry */ PVID_END }; /********************************************************************* * Function prototypes *********************************************************************/ static void *iavf_register(device_t dev); static int iavf_if_attach_pre(if_ctx_t ctx); static int iavf_if_attach_post(if_ctx_t ctx); static int iavf_if_detach(if_ctx_t ctx); static int iavf_if_shutdown(if_ctx_t ctx); static int iavf_if_suspend(if_ctx_t ctx); static int iavf_if_resume(if_ctx_t ctx); static int iavf_if_msix_intr_assign(if_ctx_t ctx, int msix); static void iavf_if_enable_intr(if_ctx_t ctx); static void iavf_if_disable_intr(if_ctx_t ctx); static int iavf_if_rx_queue_intr_enable(if_ctx_t ctx, uint16_t rxqid); static int iavf_if_tx_queue_intr_enable(if_ctx_t ctx, uint16_t txqid); static int iavf_if_tx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs, int ntxqs, int ntxqsets); static int iavf_if_rx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs, int nqs, int nqsets); static void iavf_if_queues_free(if_ctx_t ctx); static void iavf_if_update_admin_status(if_ctx_t ctx); static void iavf_if_multi_set(if_ctx_t ctx); static int iavf_if_mtu_set(if_ctx_t ctx, uint32_t mtu); static void iavf_if_media_status(if_ctx_t ctx, struct ifmediareq *ifmr); static int iavf_if_media_change(if_ctx_t ctx); static int iavf_if_promisc_set(if_ctx_t ctx, int flags); static void iavf_if_timer(if_ctx_t ctx, uint16_t qid); static void iavf_if_vlan_register(if_ctx_t ctx, u16 vtag); static void iavf_if_vlan_unregister(if_ctx_t ctx, u16 vtag); static uint64_t iavf_if_get_counter(if_ctx_t ctx, ift_counter cnt); static void iavf_if_stop(if_ctx_t ctx); static bool iavf_if_needs_restart(if_ctx_t ctx, enum iflib_restart_event event); static int iavf_allocate_pci_resources(struct iavf_sc *); static int iavf_reset_complete(struct i40e_hw *); static int iavf_setup_vc(struct iavf_sc *); static int iavf_reset(struct iavf_sc *); static int iavf_vf_config(struct iavf_sc *); static void iavf_init_filters(struct iavf_sc *); static void iavf_free_pci_resources(struct iavf_sc *); static void iavf_free_filters(struct iavf_sc *); static void iavf_setup_interface(device_t, struct iavf_sc *); static void iavf_add_device_sysctls(struct iavf_sc *); static void iavf_enable_adminq_irq(struct i40e_hw *); static void iavf_disable_adminq_irq(struct i40e_hw *); static void iavf_enable_queue_irq(struct i40e_hw *, int); static void iavf_disable_queue_irq(struct i40e_hw *, int); static void iavf_config_rss(struct iavf_sc *); static void iavf_stop(struct iavf_sc *); static int iavf_add_mac_filter(struct iavf_sc *, u8 *, u16); static int iavf_del_mac_filter(struct iavf_sc *sc, u8 *macaddr); static int iavf_msix_que(void *); static int iavf_msix_adminq(void *); //static void iavf_del_multi(struct iavf_sc *sc); static void iavf_init_multi(struct iavf_sc *sc); static void iavf_configure_itr(struct iavf_sc *sc); static int iavf_sysctl_rx_itr(SYSCTL_HANDLER_ARGS); static int iavf_sysctl_tx_itr(SYSCTL_HANDLER_ARGS); static int iavf_sysctl_current_speed(SYSCTL_HANDLER_ARGS); static int iavf_sysctl_sw_filter_list(SYSCTL_HANDLER_ARGS); static int iavf_sysctl_queue_interrupt_table(SYSCTL_HANDLER_ARGS); static int iavf_sysctl_vf_reset(SYSCTL_HANDLER_ARGS); static int iavf_sysctl_vflr_reset(SYSCTL_HANDLER_ARGS); static void iavf_save_tunables(struct iavf_sc *); static enum i40e_status_code iavf_process_adminq(struct iavf_sc *, u16 *); static int iavf_send_vc_msg(struct iavf_sc *sc, u32 op); static int iavf_send_vc_msg_sleep(struct iavf_sc *sc, u32 op); /********************************************************************* * FreeBSD Device Interface Entry Points *********************************************************************/ static device_method_t iavf_methods[] = { /* Device interface */ DEVMETHOD(device_register, iavf_register), DEVMETHOD(device_probe, iflib_device_probe), DEVMETHOD(device_attach, iflib_device_attach), DEVMETHOD(device_detach, iflib_device_detach), DEVMETHOD(device_shutdown, iflib_device_shutdown), DEVMETHOD_END }; static driver_t iavf_driver = { "iavf", iavf_methods, sizeof(struct iavf_sc), }; devclass_t iavf_devclass; DRIVER_MODULE(iavf, pci, iavf_driver, iavf_devclass, 0, 0); MODULE_PNP_INFO("U32:vendor;U32:device;U32:subvendor;U32:subdevice;U32:revision", pci, iavf, iavf_vendor_info_array, nitems(iavf_vendor_info_array) - 1); MODULE_VERSION(iavf, 1); MODULE_DEPEND(iavf, pci, 1, 1, 1); MODULE_DEPEND(iavf, ether, 1, 1, 1); MODULE_DEPEND(iavf, iflib, 1, 1, 1); MALLOC_DEFINE(M_IAVF, "iavf", "iavf driver allocations"); static device_method_t iavf_if_methods[] = { DEVMETHOD(ifdi_attach_pre, iavf_if_attach_pre), DEVMETHOD(ifdi_attach_post, iavf_if_attach_post), DEVMETHOD(ifdi_detach, iavf_if_detach), DEVMETHOD(ifdi_shutdown, iavf_if_shutdown), DEVMETHOD(ifdi_suspend, iavf_if_suspend), DEVMETHOD(ifdi_resume, iavf_if_resume), DEVMETHOD(ifdi_init, iavf_if_init), DEVMETHOD(ifdi_stop, iavf_if_stop), DEVMETHOD(ifdi_msix_intr_assign, iavf_if_msix_intr_assign), DEVMETHOD(ifdi_intr_enable, iavf_if_enable_intr), DEVMETHOD(ifdi_intr_disable, iavf_if_disable_intr), DEVMETHOD(ifdi_rx_queue_intr_enable, iavf_if_rx_queue_intr_enable), DEVMETHOD(ifdi_tx_queue_intr_enable, iavf_if_tx_queue_intr_enable), DEVMETHOD(ifdi_tx_queues_alloc, iavf_if_tx_queues_alloc), DEVMETHOD(ifdi_rx_queues_alloc, iavf_if_rx_queues_alloc), DEVMETHOD(ifdi_queues_free, iavf_if_queues_free), DEVMETHOD(ifdi_update_admin_status, iavf_if_update_admin_status), DEVMETHOD(ifdi_multi_set, iavf_if_multi_set), DEVMETHOD(ifdi_mtu_set, iavf_if_mtu_set), DEVMETHOD(ifdi_media_status, iavf_if_media_status), DEVMETHOD(ifdi_media_change, iavf_if_media_change), DEVMETHOD(ifdi_promisc_set, iavf_if_promisc_set), DEVMETHOD(ifdi_timer, iavf_if_timer), DEVMETHOD(ifdi_vlan_register, iavf_if_vlan_register), DEVMETHOD(ifdi_vlan_unregister, iavf_if_vlan_unregister), DEVMETHOD(ifdi_get_counter, iavf_if_get_counter), DEVMETHOD(ifdi_needs_restart, iavf_if_needs_restart), DEVMETHOD_END }; static driver_t iavf_if_driver = { "iavf_if", iavf_if_methods, sizeof(struct iavf_sc) }; /* ** TUNEABLE PARAMETERS: */ static SYSCTL_NODE(_hw, OID_AUTO, iavf, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "iavf driver parameters"); /* * Different method for processing TX descriptor * completion. */ static int iavf_enable_head_writeback = 0; TUNABLE_INT("hw.iavf.enable_head_writeback", &iavf_enable_head_writeback); SYSCTL_INT(_hw_iavf, OID_AUTO, enable_head_writeback, CTLFLAG_RDTUN, &iavf_enable_head_writeback, 0, "For detecting last completed TX descriptor by hardware, use value written by HW instead of checking descriptors"); static int iavf_core_debug_mask = 0; TUNABLE_INT("hw.iavf.core_debug_mask", &iavf_core_debug_mask); SYSCTL_INT(_hw_iavf, OID_AUTO, core_debug_mask, CTLFLAG_RDTUN, &iavf_core_debug_mask, 0, "Display debug statements that are printed in non-shared code"); static int iavf_shared_debug_mask = 0; TUNABLE_INT("hw.iavf.shared_debug_mask", &iavf_shared_debug_mask); SYSCTL_INT(_hw_iavf, OID_AUTO, shared_debug_mask, CTLFLAG_RDTUN, &iavf_shared_debug_mask, 0, "Display debug statements that are printed in shared code"); int iavf_rx_itr = IXL_ITR_8K; TUNABLE_INT("hw.iavf.rx_itr", &iavf_rx_itr); SYSCTL_INT(_hw_iavf, OID_AUTO, rx_itr, CTLFLAG_RDTUN, &iavf_rx_itr, 0, "RX Interrupt Rate"); int iavf_tx_itr = IXL_ITR_4K; TUNABLE_INT("hw.iavf.tx_itr", &iavf_tx_itr); SYSCTL_INT(_hw_iavf, OID_AUTO, tx_itr, CTLFLAG_RDTUN, &iavf_tx_itr, 0, "TX Interrupt Rate"); extern struct if_txrx ixl_txrx_hwb; extern struct if_txrx ixl_txrx_dwb; static struct if_shared_ctx iavf_sctx_init = { .isc_magic = IFLIB_MAGIC, .isc_q_align = PAGE_SIZE,/* max(DBA_ALIGN, PAGE_SIZE) */ .isc_tx_maxsize = IXL_TSO_SIZE + sizeof(struct ether_vlan_header), .isc_tx_maxsegsize = IXL_MAX_DMA_SEG_SIZE, .isc_tso_maxsize = IXL_TSO_SIZE + sizeof(struct ether_vlan_header), .isc_tso_maxsegsize = IXL_MAX_DMA_SEG_SIZE, .isc_rx_maxsize = 16384, .isc_rx_nsegments = IXL_MAX_RX_SEGS, .isc_rx_maxsegsize = IXL_MAX_DMA_SEG_SIZE, .isc_nfl = 1, .isc_ntxqs = 1, .isc_nrxqs = 1, .isc_admin_intrcnt = 1, .isc_vendor_info = iavf_vendor_info_array, .isc_driver_version = IAVF_DRIVER_VERSION_STRING, .isc_driver = &iavf_if_driver, .isc_flags = IFLIB_NEED_SCRATCH | IFLIB_NEED_ZERO_CSUM | IFLIB_TSO_INIT_IP | IFLIB_IS_VF, .isc_nrxd_min = {IXL_MIN_RING}, .isc_ntxd_min = {IXL_MIN_RING}, .isc_nrxd_max = {IXL_MAX_RING}, .isc_ntxd_max = {IXL_MAX_RING}, .isc_nrxd_default = {IXL_DEFAULT_RING}, .isc_ntxd_default = {IXL_DEFAULT_RING}, }; if_shared_ctx_t iavf_sctx = &iavf_sctx_init; /*** Functions ***/ static void * iavf_register(device_t dev) { return (iavf_sctx); } static int iavf_allocate_pci_resources(struct iavf_sc *sc) { struct i40e_hw *hw = &sc->hw; device_t dev = iflib_get_dev(sc->vsi.ctx); int rid; /* Map BAR0 */ rid = PCIR_BAR(0); sc->pci_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, RF_ACTIVE); if (!(sc->pci_mem)) { device_printf(dev, "Unable to allocate bus resource: PCI memory\n"); return (ENXIO); } /* Save off the PCI information */ hw->vendor_id = pci_get_vendor(dev); hw->device_id = pci_get_device(dev); hw->revision_id = pci_read_config(dev, PCIR_REVID, 1); hw->subsystem_vendor_id = pci_read_config(dev, PCIR_SUBVEND_0, 2); hw->subsystem_device_id = pci_read_config(dev, PCIR_SUBDEV_0, 2); hw->bus.device = pci_get_slot(dev); hw->bus.func = pci_get_function(dev); /* Save off register access information */ sc->osdep.mem_bus_space_tag = rman_get_bustag(sc->pci_mem); sc->osdep.mem_bus_space_handle = rman_get_bushandle(sc->pci_mem); sc->osdep.mem_bus_space_size = rman_get_size(sc->pci_mem); sc->osdep.flush_reg = I40E_VFGEN_RSTAT; sc->osdep.dev = dev; sc->hw.hw_addr = (u8 *) &sc->osdep.mem_bus_space_handle; sc->hw.back = &sc->osdep; return (0); } static int iavf_if_attach_pre(if_ctx_t ctx) { device_t dev; struct iavf_sc *sc; struct i40e_hw *hw; struct ixl_vsi *vsi; if_softc_ctx_t scctx; int error = 0; dev = iflib_get_dev(ctx); sc = iflib_get_softc(ctx); vsi = &sc->vsi; vsi->back = sc; sc->dev = dev; hw = &sc->hw; vsi->dev = dev; vsi->hw = &sc->hw; vsi->num_vlans = 0; vsi->ctx = ctx; vsi->media = iflib_get_media(ctx); vsi->shared = scctx = iflib_get_softc_ctx(ctx); iavf_save_tunables(sc); /* Do PCI setup - map BAR0, etc */ if (iavf_allocate_pci_resources(sc)) { device_printf(dev, "%s: Allocation of PCI resources failed\n", __func__); error = ENXIO; goto err_early; } iavf_dbg_init(sc, "Allocated PCI resources and MSI-X vectors\n"); /* * XXX: This is called by init_shared_code in the PF driver, * but the rest of that function does not support VFs. */ error = i40e_set_mac_type(hw); if (error) { device_printf(dev, "%s: set_mac_type failed: %d\n", __func__, error); goto err_pci_res; } error = iavf_reset_complete(hw); if (error) { device_printf(dev, "%s: Device is still being reset\n", __func__); goto err_pci_res; } iavf_dbg_init(sc, "VF Device is ready for configuration\n"); /* Sets up Admin Queue */ error = iavf_setup_vc(sc); if (error) { device_printf(dev, "%s: Error setting up PF comms, %d\n", __func__, error); goto err_pci_res; } iavf_dbg_init(sc, "PF API version verified\n"); /* Need API version before sending reset message */ error = iavf_reset(sc); if (error) { device_printf(dev, "VF reset failed; reload the driver\n"); goto err_aq; } iavf_dbg_init(sc, "VF reset complete\n"); /* Ask for VF config from PF */ error = iavf_vf_config(sc); if (error) { device_printf(dev, "Error getting configuration from PF: %d\n", error); goto err_aq; } device_printf(dev, "VSIs %d, QPs %d, MSI-X %d, RSS sizes: key %d lut %d\n", sc->vf_res->num_vsis, sc->vf_res->num_queue_pairs, sc->vf_res->max_vectors, sc->vf_res->rss_key_size, sc->vf_res->rss_lut_size); iavf_dbg_info(sc, "Capabilities=%b\n", sc->vf_res->vf_cap_flags, IAVF_PRINTF_VF_OFFLOAD_FLAGS); /* got VF config message back from PF, now we can parse it */ for (int i = 0; i < sc->vf_res->num_vsis; i++) { /* XXX: We only use the first VSI we find */ if (sc->vf_res->vsi_res[i].vsi_type == I40E_VSI_SRIOV) sc->vsi_res = &sc->vf_res->vsi_res[i]; } if (!sc->vsi_res) { device_printf(dev, "%s: no LAN VSI found\n", __func__); error = EIO; goto err_res_buf; } vsi->id = sc->vsi_res->vsi_id; iavf_dbg_init(sc, "Resource Acquisition complete\n"); /* If no mac address was assigned just make a random one */ if (!iavf_check_ether_addr(hw->mac.addr)) { u8 addr[ETHER_ADDR_LEN]; arc4rand(&addr, sizeof(addr), 0); addr[0] &= 0xFE; addr[0] |= 0x02; bcopy(addr, hw->mac.addr, sizeof(addr)); } bcopy(hw->mac.addr, hw->mac.perm_addr, ETHER_ADDR_LEN); iflib_set_mac(ctx, hw->mac.addr); /* Allocate filter lists */ iavf_init_filters(sc); /* Fill out more iflib parameters */ scctx->isc_ntxqsets_max = scctx->isc_nrxqsets_max = sc->vsi_res->num_queue_pairs; if (vsi->enable_head_writeback) { scctx->isc_txqsizes[0] = roundup2(scctx->isc_ntxd[0] * sizeof(struct i40e_tx_desc) + sizeof(u32), DBA_ALIGN); scctx->isc_txrx = &ixl_txrx_hwb; } else { scctx->isc_txqsizes[0] = roundup2(scctx->isc_ntxd[0] * sizeof(struct i40e_tx_desc), DBA_ALIGN); scctx->isc_txrx = &ixl_txrx_dwb; } scctx->isc_rxqsizes[0] = roundup2(scctx->isc_nrxd[0] * sizeof(union i40e_32byte_rx_desc), DBA_ALIGN); scctx->isc_msix_bar = PCIR_BAR(IXL_MSIX_BAR); scctx->isc_tx_nsegments = IXL_MAX_TX_SEGS; scctx->isc_tx_tso_segments_max = IXL_MAX_TSO_SEGS; scctx->isc_tx_tso_size_max = IXL_TSO_SIZE; scctx->isc_tx_tso_segsize_max = IXL_MAX_DMA_SEG_SIZE; scctx->isc_rss_table_size = IXL_RSS_VSI_LUT_SIZE; scctx->isc_tx_csum_flags = CSUM_OFFLOAD; scctx->isc_capabilities = scctx->isc_capenable = IXL_CAPS; return (0); err_res_buf: free(sc->vf_res, M_IAVF); err_aq: i40e_shutdown_adminq(hw); err_pci_res: iavf_free_pci_resources(sc); err_early: return (error); } static int iavf_if_attach_post(if_ctx_t ctx) { device_t dev; struct iavf_sc *sc; struct i40e_hw *hw; struct ixl_vsi *vsi; int error = 0; INIT_DBG_DEV(dev, "begin"); dev = iflib_get_dev(ctx); sc = iflib_get_softc(ctx); vsi = &sc->vsi; vsi->ifp = iflib_get_ifp(ctx); hw = &sc->hw; /* Save off determined number of queues for interface */ vsi->num_rx_queues = vsi->shared->isc_nrxqsets; vsi->num_tx_queues = vsi->shared->isc_ntxqsets; /* Setup the stack interface */ iavf_setup_interface(dev, sc); INIT_DBG_DEV(dev, "Interface setup complete"); /* Initialize statistics & add sysctls */ bzero(&sc->vsi.eth_stats, sizeof(struct i40e_eth_stats)); iavf_add_device_sysctls(sc); sc->init_state = IAVF_INIT_READY; atomic_store_rel_32(&sc->queues_enabled, 0); /* We want AQ enabled early for init */ iavf_enable_adminq_irq(hw); INIT_DBG_DEV(dev, "end"); return (error); } /** * XXX: iflib always ignores the return value of detach() * -> This means that this isn't allowed to fail */ static int iavf_if_detach(if_ctx_t ctx) { struct iavf_sc *sc = iflib_get_softc(ctx); struct ixl_vsi *vsi = &sc->vsi; struct i40e_hw *hw = &sc->hw; device_t dev = sc->dev; enum i40e_status_code status; INIT_DBG_DEV(dev, "begin"); /* Remove all the media and link information */ ifmedia_removeall(vsi->media); iavf_disable_adminq_irq(hw); status = i40e_shutdown_adminq(&sc->hw); if (status != I40E_SUCCESS) { device_printf(dev, "i40e_shutdown_adminq() failed with status %s\n", i40e_stat_str(hw, status)); } free(sc->vf_res, M_IAVF); iavf_free_pci_resources(sc); iavf_free_filters(sc); INIT_DBG_DEV(dev, "end"); return (0); } static int iavf_if_shutdown(if_ctx_t ctx) { return (0); } static int iavf_if_suspend(if_ctx_t ctx) { return (0); } static int iavf_if_resume(if_ctx_t ctx) { return (0); } static int iavf_send_vc_msg_sleep(struct iavf_sc *sc, u32 op) { int error = 0; if_ctx_t ctx = sc->vsi.ctx; error = ixl_vc_send_cmd(sc, op); if (error != 0) { iavf_dbg_vc(sc, "Error sending %b: %d\n", op, IAVF_FLAGS, error); return (error); } /* Don't wait for a response if the device is being detached. */ if (!iflib_in_detach(ctx)) { iavf_dbg_vc(sc, "Sleeping for op %b\n", op, IAVF_FLAGS); error = sx_sleep(ixl_vc_get_op_chan(sc, op), iflib_ctx_lock_get(ctx), PRI_MAX, "iavf_vc", IAVF_AQ_TIMEOUT); if (error == EWOULDBLOCK) device_printf(sc->dev, "%b timed out\n", op, IAVF_FLAGS); } return (error); } static int iavf_send_vc_msg(struct iavf_sc *sc, u32 op) { int error = 0; error = ixl_vc_send_cmd(sc, op); if (error != 0) iavf_dbg_vc(sc, "Error sending %b: %d\n", op, IAVF_FLAGS, error); return (error); } static void iavf_init_queues(struct ixl_vsi *vsi) { struct ixl_tx_queue *tx_que = vsi->tx_queues; struct ixl_rx_queue *rx_que = vsi->rx_queues; struct rx_ring *rxr; for (int i = 0; i < vsi->num_tx_queues; i++, tx_que++) ixl_init_tx_ring(vsi, tx_que); for (int i = 0; i < vsi->num_rx_queues; i++, rx_que++) { rxr = &rx_que->rxr; rxr->mbuf_sz = iflib_get_rx_mbuf_sz(vsi->ctx); wr32(vsi->hw, rxr->tail, 0); } } void iavf_if_init(if_ctx_t ctx) { struct iavf_sc *sc = iflib_get_softc(ctx); struct ixl_vsi *vsi = &sc->vsi; struct i40e_hw *hw = &sc->hw; struct ifnet *ifp = iflib_get_ifp(ctx); u8 tmpaddr[ETHER_ADDR_LEN]; int error = 0; INIT_DBG_IF(ifp, "begin"); MPASS(sx_xlocked(iflib_ctx_lock_get(ctx))); error = iavf_reset_complete(hw); if (error) { device_printf(sc->dev, "%s: VF reset failed\n", __func__); } if (!i40e_check_asq_alive(hw)) { iavf_dbg_info(sc, "ASQ is not alive, re-initializing AQ\n"); pci_enable_busmaster(sc->dev); i40e_shutdown_adminq(hw); i40e_init_adminq(hw); } /* Make sure queues are disabled */ iavf_send_vc_msg(sc, IAVF_FLAG_AQ_DISABLE_QUEUES); bcopy(IF_LLADDR(ifp), tmpaddr, ETHER_ADDR_LEN); if (!cmp_etheraddr(hw->mac.addr, tmpaddr) && (i40e_validate_mac_addr(tmpaddr) == I40E_SUCCESS)) { error = iavf_del_mac_filter(sc, hw->mac.addr); if (error == 0) iavf_send_vc_msg(sc, IAVF_FLAG_AQ_DEL_MAC_FILTER); bcopy(tmpaddr, hw->mac.addr, ETH_ALEN); } error = iavf_add_mac_filter(sc, hw->mac.addr, 0); if (!error || error == EEXIST) iavf_send_vc_msg(sc, IAVF_FLAG_AQ_ADD_MAC_FILTER); iflib_set_mac(ctx, hw->mac.addr); /* Prepare the queues for operation */ iavf_init_queues(vsi); /* Set initial ITR values */ iavf_configure_itr(sc); iavf_send_vc_msg(sc, IAVF_FLAG_AQ_CONFIGURE_QUEUES); /* Set up RSS */ iavf_config_rss(sc); /* Map vectors */ iavf_send_vc_msg(sc, IAVF_FLAG_AQ_MAP_VECTORS); /* Init SW TX ring indices */ if (vsi->enable_head_writeback) ixl_init_tx_cidx(vsi); else ixl_init_tx_rsqs(vsi); /* Configure promiscuous mode */ iavf_if_promisc_set(ctx, if_getflags(ifp)); /* Enable queues */ iavf_send_vc_msg_sleep(sc, IAVF_FLAG_AQ_ENABLE_QUEUES); sc->init_state = IAVF_RUNNING; } /* * iavf_attach() helper function; initializes the admin queue * and attempts to establish contact with the PF by * retrying the initial "API version" message several times * or until the PF responds. */ static int iavf_setup_vc(struct iavf_sc *sc) { struct i40e_hw *hw = &sc->hw; device_t dev = sc->dev; int error = 0, ret_error = 0, asq_retries = 0; bool send_api_ver_retried = 0; /* Need to set these AQ paramters before initializing AQ */ hw->aq.num_arq_entries = IXL_AQ_LEN; hw->aq.num_asq_entries = IXL_AQ_LEN; hw->aq.arq_buf_size = IXL_AQ_BUF_SZ; hw->aq.asq_buf_size = IXL_AQ_BUF_SZ; for (int i = 0; i < IAVF_AQ_MAX_ERR; i++) { /* Initialize admin queue */ error = i40e_init_adminq(hw); if (error) { device_printf(dev, "%s: init_adminq failed: %d\n", __func__, error); ret_error = 1; continue; } iavf_dbg_init(sc, "Initialized Admin Queue; starting" " send_api_ver attempt %d", i+1); retry_send: /* Send VF's API version */ error = iavf_send_api_ver(sc); if (error) { i40e_shutdown_adminq(hw); ret_error = 2; device_printf(dev, "%s: unable to send api" " version to PF on attempt %d, error %d\n", __func__, i+1, error); } asq_retries = 0; while (!i40e_asq_done(hw)) { if (++asq_retries > IAVF_AQ_MAX_ERR) { i40e_shutdown_adminq(hw); device_printf(dev, "Admin Queue timeout " "(waiting for send_api_ver), %d more tries...\n", IAVF_AQ_MAX_ERR - (i + 1)); ret_error = 3; break; } i40e_msec_pause(10); } if (asq_retries > IAVF_AQ_MAX_ERR) continue; iavf_dbg_init(sc, "Sent API version message to PF"); /* Verify that the VF accepts the PF's API version */ error = iavf_verify_api_ver(sc); if (error == ETIMEDOUT) { if (!send_api_ver_retried) { /* Resend message, one more time */ send_api_ver_retried = true; device_printf(dev, "%s: Timeout while verifying API version on first" " try!\n", __func__); goto retry_send; } else { device_printf(dev, "%s: Timeout while verifying API version on second" " try!\n", __func__); ret_error = 4; break; } } if (error) { device_printf(dev, "%s: Unable to verify API version," " error %s\n", __func__, i40e_stat_str(hw, error)); ret_error = 5; } break; } if (ret_error >= 4) i40e_shutdown_adminq(hw); return (ret_error); } /* * iavf_attach() helper function; asks the PF for this VF's * configuration, and saves the information if it receives it. */ static int iavf_vf_config(struct iavf_sc *sc) { struct i40e_hw *hw = &sc->hw; device_t dev = sc->dev; int bufsz, error = 0, ret_error = 0; int asq_retries, retried = 0; retry_config: error = iavf_send_vf_config_msg(sc); if (error) { device_printf(dev, "%s: Unable to send VF config request, attempt %d," " error %d\n", __func__, retried + 1, error); ret_error = 2; } asq_retries = 0; while (!i40e_asq_done(hw)) { if (++asq_retries > IAVF_AQ_MAX_ERR) { device_printf(dev, "%s: Admin Queue timeout " "(waiting for send_vf_config_msg), attempt %d\n", __func__, retried + 1); ret_error = 3; goto fail; } i40e_msec_pause(10); } iavf_dbg_init(sc, "Sent VF config message to PF, attempt %d\n", retried + 1); if (!sc->vf_res) { bufsz = sizeof(struct virtchnl_vf_resource) + (I40E_MAX_VF_VSI * sizeof(struct virtchnl_vsi_resource)); sc->vf_res = malloc(bufsz, M_IAVF, M_NOWAIT); if (!sc->vf_res) { device_printf(dev, "%s: Unable to allocate memory for VF configuration" " message from PF on attempt %d\n", __func__, retried + 1); ret_error = 1; goto fail; } } /* Check for VF config response */ error = iavf_get_vf_config(sc); if (error == ETIMEDOUT) { /* The 1st time we timeout, send the configuration message again */ if (!retried) { retried++; goto retry_config; } device_printf(dev, "%s: iavf_get_vf_config() timed out waiting for a response\n", __func__); } if (error) { device_printf(dev, "%s: Unable to get VF configuration from PF after %d tries!\n", __func__, retried + 1); ret_error = 4; } goto done; fail: free(sc->vf_res, M_IAVF); done: return (ret_error); } static int iavf_if_msix_intr_assign(if_ctx_t ctx, int msix) { struct iavf_sc *sc = iflib_get_softc(ctx); struct ixl_vsi *vsi = &sc->vsi; struct ixl_rx_queue *rx_que = vsi->rx_queues; struct ixl_tx_queue *tx_que = vsi->tx_queues; int err, i, rid, vector = 0; char buf[16]; MPASS(vsi->shared->isc_nrxqsets > 0); MPASS(vsi->shared->isc_ntxqsets > 0); /* Admin Que is vector 0*/ rid = vector + 1; err = iflib_irq_alloc_generic(ctx, &vsi->irq, rid, IFLIB_INTR_ADMIN, iavf_msix_adminq, sc, 0, "aq"); if (err) { iflib_irq_free(ctx, &vsi->irq); device_printf(iflib_get_dev(ctx), "Failed to register Admin Que handler"); return (err); } /* Now set up the stations */ for (i = 0, vector = 1; i < vsi->shared->isc_nrxqsets; i++, vector++, rx_que++) { rid = vector + 1; snprintf(buf, sizeof(buf), "rxq%d", i); err = iflib_irq_alloc_generic(ctx, &rx_que->que_irq, rid, - IFLIB_INTR_RX, iavf_msix_que, rx_que, rx_que->rxr.me, buf); + IFLIB_INTR_RXTX, iavf_msix_que, rx_que, rx_que->rxr.me, buf); /* XXX: Does the driver work as expected if there are fewer num_rx_queues than * what's expected in the iflib context? */ if (err) { device_printf(iflib_get_dev(ctx), "Failed to allocate queue RX int vector %d, err: %d\n", i, err); vsi->num_rx_queues = i + 1; goto fail; } rx_que->msix = vector; } bzero(buf, sizeof(buf)); for (i = 0; i < vsi->shared->isc_ntxqsets; i++, tx_que++) { snprintf(buf, sizeof(buf), "txq%d", i); iflib_softirq_alloc_generic(ctx, &vsi->rx_queues[i % vsi->shared->isc_nrxqsets].que_irq, IFLIB_INTR_TX, tx_que, tx_que->txr.me, buf); /* TODO: Maybe call a strategy function for this to figure out which * interrupts to map Tx queues to. I don't know if there's an immediately * better way than this other than a user-supplied map, though. */ tx_que->msix = (i % vsi->shared->isc_nrxqsets) + 1; } return (0); fail: iflib_irq_free(ctx, &vsi->irq); rx_que = vsi->rx_queues; for (int i = 0; i < vsi->num_rx_queues; i++, rx_que++) iflib_irq_free(ctx, &rx_que->que_irq); return (err); } /* Enable all interrupts */ static void iavf_if_enable_intr(if_ctx_t ctx) { struct iavf_sc *sc = iflib_get_softc(ctx); struct ixl_vsi *vsi = &sc->vsi; iavf_enable_intr(vsi); } /* Disable all interrupts */ static void iavf_if_disable_intr(if_ctx_t ctx) { struct iavf_sc *sc = iflib_get_softc(ctx); struct ixl_vsi *vsi = &sc->vsi; iavf_disable_intr(vsi); } static int iavf_if_rx_queue_intr_enable(if_ctx_t ctx, uint16_t rxqid) { struct iavf_sc *sc = iflib_get_softc(ctx); struct ixl_vsi *vsi = &sc->vsi; struct i40e_hw *hw = vsi->hw; struct ixl_rx_queue *rx_que = &vsi->rx_queues[rxqid]; iavf_enable_queue_irq(hw, rx_que->msix - 1); return (0); } static int iavf_if_tx_queue_intr_enable(if_ctx_t ctx, uint16_t txqid) { struct iavf_sc *sc = iflib_get_softc(ctx); struct ixl_vsi *vsi = &sc->vsi; struct i40e_hw *hw = vsi->hw; struct ixl_tx_queue *tx_que = &vsi->tx_queues[txqid]; iavf_enable_queue_irq(hw, tx_que->msix - 1); return (0); } static int iavf_if_tx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs, int ntxqs, int ntxqsets) { struct iavf_sc *sc = iflib_get_softc(ctx); struct ixl_vsi *vsi = &sc->vsi; if_softc_ctx_t scctx = vsi->shared; struct ixl_tx_queue *que; int i, j, error = 0; MPASS(scctx->isc_ntxqsets > 0); MPASS(ntxqs == 1); MPASS(scctx->isc_ntxqsets == ntxqsets); /* Allocate queue structure memory */ if (!(vsi->tx_queues = (struct ixl_tx_queue *) malloc(sizeof(struct ixl_tx_queue) *ntxqsets, M_IAVF, M_NOWAIT | M_ZERO))) { device_printf(iflib_get_dev(ctx), "Unable to allocate TX ring memory\n"); return (ENOMEM); } for (i = 0, que = vsi->tx_queues; i < ntxqsets; i++, que++) { struct tx_ring *txr = &que->txr; txr->me = i; que->vsi = vsi; if (!vsi->enable_head_writeback) { /* Allocate report status array */ if (!(txr->tx_rsq = malloc(sizeof(qidx_t) * scctx->isc_ntxd[0], M_IAVF, M_NOWAIT))) { device_printf(iflib_get_dev(ctx), "failed to allocate tx_rsq memory\n"); error = ENOMEM; goto fail; } /* Init report status array */ for (j = 0; j < scctx->isc_ntxd[0]; j++) txr->tx_rsq[j] = QIDX_INVALID; } /* get the virtual and physical address of the hardware queues */ txr->tail = I40E_QTX_TAIL1(txr->me); txr->tx_base = (struct i40e_tx_desc *)vaddrs[i * ntxqs]; txr->tx_paddr = paddrs[i * ntxqs]; txr->que = que; } return (0); fail: iavf_if_queues_free(ctx); return (error); } static int iavf_if_rx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs, int nrxqs, int nrxqsets) { struct iavf_sc *sc = iflib_get_softc(ctx); struct ixl_vsi *vsi = &sc->vsi; struct ixl_rx_queue *que; int i, error = 0; #ifdef INVARIANTS if_softc_ctx_t scctx = vsi->shared; MPASS(scctx->isc_nrxqsets > 0); MPASS(nrxqs == 1); MPASS(scctx->isc_nrxqsets == nrxqsets); #endif /* Allocate queue structure memory */ if (!(vsi->rx_queues = (struct ixl_rx_queue *) malloc(sizeof(struct ixl_rx_queue) * nrxqsets, M_IAVF, M_NOWAIT | M_ZERO))) { device_printf(iflib_get_dev(ctx), "Unable to allocate RX ring memory\n"); error = ENOMEM; goto fail; } for (i = 0, que = vsi->rx_queues; i < nrxqsets; i++, que++) { struct rx_ring *rxr = &que->rxr; rxr->me = i; que->vsi = vsi; /* get the virtual and physical address of the hardware queues */ rxr->tail = I40E_QRX_TAIL1(rxr->me); rxr->rx_base = (union i40e_rx_desc *)vaddrs[i * nrxqs]; rxr->rx_paddr = paddrs[i * nrxqs]; rxr->que = que; } return (0); fail: iavf_if_queues_free(ctx); return (error); } static void iavf_if_queues_free(if_ctx_t ctx) { struct iavf_sc *sc = iflib_get_softc(ctx); struct ixl_vsi *vsi = &sc->vsi; if (!vsi->enable_head_writeback) { struct ixl_tx_queue *que; int i = 0; for (i = 0, que = vsi->tx_queues; i < vsi->shared->isc_ntxqsets; i++, que++) { struct tx_ring *txr = &que->txr; if (txr->tx_rsq != NULL) { free(txr->tx_rsq, M_IAVF); txr->tx_rsq = NULL; } } } if (vsi->tx_queues != NULL) { free(vsi->tx_queues, M_IAVF); vsi->tx_queues = NULL; } if (vsi->rx_queues != NULL) { free(vsi->rx_queues, M_IAVF); vsi->rx_queues = NULL; } } static int iavf_check_aq_errors(struct iavf_sc *sc) { struct i40e_hw *hw = &sc->hw; device_t dev = sc->dev; u32 reg, oldreg; u8 aq_error = false; /* check for Admin queue errors */ oldreg = reg = rd32(hw, hw->aq.arq.len); if (reg & I40E_VF_ARQLEN1_ARQVFE_MASK) { device_printf(dev, "ARQ VF Error detected\n"); reg &= ~I40E_VF_ARQLEN1_ARQVFE_MASK; aq_error = true; } if (reg & I40E_VF_ARQLEN1_ARQOVFL_MASK) { device_printf(dev, "ARQ Overflow Error detected\n"); reg &= ~I40E_VF_ARQLEN1_ARQOVFL_MASK; aq_error = true; } if (reg & I40E_VF_ARQLEN1_ARQCRIT_MASK) { device_printf(dev, "ARQ Critical Error detected\n"); reg &= ~I40E_VF_ARQLEN1_ARQCRIT_MASK; aq_error = true; } if (oldreg != reg) wr32(hw, hw->aq.arq.len, reg); oldreg = reg = rd32(hw, hw->aq.asq.len); if (reg & I40E_VF_ATQLEN1_ATQVFE_MASK) { device_printf(dev, "ASQ VF Error detected\n"); reg &= ~I40E_VF_ATQLEN1_ATQVFE_MASK; aq_error = true; } if (reg & I40E_VF_ATQLEN1_ATQOVFL_MASK) { device_printf(dev, "ASQ Overflow Error detected\n"); reg &= ~I40E_VF_ATQLEN1_ATQOVFL_MASK; aq_error = true; } if (reg & I40E_VF_ATQLEN1_ATQCRIT_MASK) { device_printf(dev, "ASQ Critical Error detected\n"); reg &= ~I40E_VF_ATQLEN1_ATQCRIT_MASK; aq_error = true; } if (oldreg != reg) wr32(hw, hw->aq.asq.len, reg); if (aq_error) { device_printf(dev, "WARNING: Stopping VF!\n"); /* * A VF reset might not be enough to fix a problem here; * a PF reset could be required. */ sc->init_state = IAVF_RESET_REQUIRED; iavf_stop(sc); iavf_request_reset(sc); } return (aq_error ? EIO : 0); } static enum i40e_status_code iavf_process_adminq(struct iavf_sc *sc, u16 *pending) { enum i40e_status_code status = I40E_SUCCESS; struct i40e_arq_event_info event; struct i40e_hw *hw = &sc->hw; struct virtchnl_msg *v_msg; int error = 0, loop = 0; u32 reg; error = iavf_check_aq_errors(sc); if (error) return (I40E_ERR_ADMIN_QUEUE_CRITICAL_ERROR); event.buf_len = IXL_AQ_BUF_SZ; event.msg_buf = sc->aq_buffer; bzero(event.msg_buf, IXL_AQ_BUF_SZ); v_msg = (struct virtchnl_msg *)&event.desc; /* clean and process any events */ do { status = i40e_clean_arq_element(hw, &event, pending); /* * Also covers normal case when i40e_clean_arq_element() * returns "I40E_ERR_ADMIN_QUEUE_NO_WORK" */ if (status) break; iavf_vc_completion(sc, v_msg->v_opcode, v_msg->v_retval, event.msg_buf, event.msg_len); bzero(event.msg_buf, IXL_AQ_BUF_SZ); } while (*pending && (loop++ < IXL_ADM_LIMIT)); /* Re-enable admin queue interrupt cause */ reg = rd32(hw, I40E_VFINT_ICR0_ENA1); reg |= I40E_VFINT_ICR0_ENA1_ADMINQ_MASK; wr32(hw, I40E_VFINT_ICR0_ENA1, reg); return (status); } static void iavf_if_update_admin_status(if_ctx_t ctx) { struct iavf_sc *sc = iflib_get_softc(ctx); struct i40e_hw *hw = &sc->hw; u16 pending; iavf_process_adminq(sc, &pending); iavf_update_link_status(sc); /* * If there are still messages to process, reschedule. * Otherwise, re-enable the Admin Queue interrupt. */ if (pending > 0) iflib_admin_intr_deferred(ctx); else iavf_enable_adminq_irq(hw); } static u_int iavf_mc_filter_apply(void *arg, struct sockaddr_dl *sdl, u_int count __unused) { struct iavf_sc *sc = arg; int error; error = iavf_add_mac_filter(sc, (u8*)LLADDR(sdl), IXL_FILTER_MC); return (!error); } static void iavf_if_multi_set(if_ctx_t ctx) { struct iavf_sc *sc = iflib_get_softc(ctx); IOCTL_DEBUGOUT("iavf_if_multi_set: begin"); if (__predict_false(if_llmaddr_count(iflib_get_ifp(ctx)) >= MAX_MULTICAST_ADDR)) { /* Delete MC filters and enable mulitcast promisc instead */ iavf_init_multi(sc); sc->promisc_flags |= FLAG_VF_MULTICAST_PROMISC; iavf_send_vc_msg(sc, IAVF_FLAG_AQ_CONFIGURE_PROMISC); return; } /* If there aren't too many filters, delete existing MC filters */ iavf_init_multi(sc); /* And (re-)install filters for all mcast addresses */ if (if_foreach_llmaddr(iflib_get_ifp(ctx), iavf_mc_filter_apply, sc) > 0) iavf_send_vc_msg(sc, IAVF_FLAG_AQ_ADD_MAC_FILTER); } static int iavf_if_mtu_set(if_ctx_t ctx, uint32_t mtu) { struct iavf_sc *sc = iflib_get_softc(ctx); struct ixl_vsi *vsi = &sc->vsi; IOCTL_DEBUGOUT("ioctl: SIOCSIFMTU (Set Interface MTU)"); if (mtu > IXL_MAX_FRAME - ETHER_HDR_LEN - ETHER_CRC_LEN - ETHER_VLAN_ENCAP_LEN) return (EINVAL); vsi->shared->isc_max_frame_size = mtu + ETHER_HDR_LEN + ETHER_CRC_LEN + ETHER_VLAN_ENCAP_LEN; return (0); } static void iavf_if_media_status(if_ctx_t ctx, struct ifmediareq *ifmr) { #ifdef IXL_DEBUG struct ifnet *ifp = iflib_get_ifp(ctx); #endif struct iavf_sc *sc = iflib_get_softc(ctx); INIT_DBG_IF(ifp, "begin"); iavf_update_link_status(sc); ifmr->ifm_status = IFM_AVALID; ifmr->ifm_active = IFM_ETHER; if (!sc->link_up) return; ifmr->ifm_status |= IFM_ACTIVE; /* Hardware is always full-duplex */ ifmr->ifm_active |= IFM_FDX; /* Based on the link speed reported by the PF over the AdminQ, choose a * PHY type to report. This isn't 100% correct since we don't really * know the underlying PHY type of the PF, but at least we can report * a valid link speed... */ switch (sc->link_speed) { case VIRTCHNL_LINK_SPEED_100MB: ifmr->ifm_active |= IFM_100_TX; break; case VIRTCHNL_LINK_SPEED_1GB: ifmr->ifm_active |= IFM_1000_T; break; case VIRTCHNL_LINK_SPEED_10GB: ifmr->ifm_active |= IFM_10G_SR; break; case VIRTCHNL_LINK_SPEED_20GB: case VIRTCHNL_LINK_SPEED_25GB: ifmr->ifm_active |= IFM_25G_SR; break; case VIRTCHNL_LINK_SPEED_40GB: ifmr->ifm_active |= IFM_40G_SR4; break; default: ifmr->ifm_active |= IFM_UNKNOWN; break; } INIT_DBG_IF(ifp, "end"); } static int iavf_if_media_change(if_ctx_t ctx) { struct ifmedia *ifm = iflib_get_media(ctx); INIT_DEBUGOUT("ixl_media_change: begin"); if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER) return (EINVAL); if_printf(iflib_get_ifp(ctx), "Media change is not supported.\n"); return (ENODEV); } static int iavf_if_promisc_set(if_ctx_t ctx, int flags) { struct iavf_sc *sc = iflib_get_softc(ctx); struct ifnet *ifp = iflib_get_ifp(ctx); sc->promisc_flags = 0; if (flags & IFF_ALLMULTI || if_llmaddr_count(ifp) >= MAX_MULTICAST_ADDR) sc->promisc_flags |= FLAG_VF_MULTICAST_PROMISC; if (flags & IFF_PROMISC) sc->promisc_flags |= FLAG_VF_UNICAST_PROMISC; iavf_send_vc_msg(sc, IAVF_FLAG_AQ_CONFIGURE_PROMISC); return (0); } static void iavf_if_timer(if_ctx_t ctx, uint16_t qid) { struct iavf_sc *sc = iflib_get_softc(ctx); struct i40e_hw *hw = &sc->hw; u32 val; if (qid != 0) return; /* Check for when PF triggers a VF reset */ val = rd32(hw, I40E_VFGEN_RSTAT) & I40E_VFGEN_RSTAT_VFR_STATE_MASK; if (val != VIRTCHNL_VFR_VFACTIVE && val != VIRTCHNL_VFR_COMPLETED) { iavf_dbg_info(sc, "reset in progress! (%d)\n", val); return; } /* Fire off the adminq task */ iflib_admin_intr_deferred(ctx); /* Update stats */ iavf_request_stats(sc); } static void iavf_if_vlan_register(if_ctx_t ctx, u16 vtag) { struct iavf_sc *sc = iflib_get_softc(ctx); struct ixl_vsi *vsi = &sc->vsi; struct iavf_vlan_filter *v; if ((vtag == 0) || (vtag > 4095)) /* Invalid */ return; ++vsi->num_vlans; v = malloc(sizeof(struct iavf_vlan_filter), M_IAVF, M_WAITOK | M_ZERO); SLIST_INSERT_HEAD(sc->vlan_filters, v, next); v->vlan = vtag; v->flags = IXL_FILTER_ADD; iavf_send_vc_msg(sc, IAVF_FLAG_AQ_ADD_VLAN_FILTER); } static void iavf_if_vlan_unregister(if_ctx_t ctx, u16 vtag) { struct iavf_sc *sc = iflib_get_softc(ctx); struct ixl_vsi *vsi = &sc->vsi; struct iavf_vlan_filter *v; int i = 0; if ((vtag == 0) || (vtag > 4095)) /* Invalid */ return; SLIST_FOREACH(v, sc->vlan_filters, next) { if (v->vlan == vtag) { v->flags = IXL_FILTER_DEL; ++i; --vsi->num_vlans; } } if (i) iavf_send_vc_msg(sc, IAVF_FLAG_AQ_DEL_VLAN_FILTER); } static uint64_t iavf_if_get_counter(if_ctx_t ctx, ift_counter cnt) { struct iavf_sc *sc = iflib_get_softc(ctx); struct ixl_vsi *vsi = &sc->vsi; if_t ifp = iflib_get_ifp(ctx); switch (cnt) { case IFCOUNTER_IPACKETS: return (vsi->ipackets); case IFCOUNTER_IERRORS: return (vsi->ierrors); case IFCOUNTER_OPACKETS: return (vsi->opackets); case IFCOUNTER_OERRORS: return (vsi->oerrors); case IFCOUNTER_COLLISIONS: /* Collisions are by standard impossible in 40G/10G Ethernet */ return (0); case IFCOUNTER_IBYTES: return (vsi->ibytes); case IFCOUNTER_OBYTES: return (vsi->obytes); case IFCOUNTER_IMCASTS: return (vsi->imcasts); case IFCOUNTER_OMCASTS: return (vsi->omcasts); case IFCOUNTER_IQDROPS: return (vsi->iqdrops); case IFCOUNTER_OQDROPS: return (vsi->oqdrops); case IFCOUNTER_NOPROTO: return (vsi->noproto); default: return (if_get_counter_default(ifp, cnt)); } } /* iavf_if_needs_restart - Tell iflib when the driver needs to be reinitialized * @ctx: iflib context * @event: event code to check * * Defaults to returning true for every event. * * @returns true if iflib needs to reinit the interface */ static bool iavf_if_needs_restart(if_ctx_t ctx __unused, enum iflib_restart_event event) { switch (event) { case IFLIB_RESTART_VLAN_CONFIG: /* This case must return true if VLAN anti-spoof checks are * enabled by the PF driver for the VF. */ default: return (true); } } static void iavf_free_pci_resources(struct iavf_sc *sc) { struct ixl_vsi *vsi = &sc->vsi; struct ixl_rx_queue *rx_que = vsi->rx_queues; device_t dev = sc->dev; /* We may get here before stations are set up */ if (rx_que == NULL) goto early; /* Release all interrupts */ iflib_irq_free(vsi->ctx, &vsi->irq); for (int i = 0; i < vsi->num_rx_queues; i++, rx_que++) iflib_irq_free(vsi->ctx, &rx_que->que_irq); early: if (sc->pci_mem != NULL) bus_release_resource(dev, SYS_RES_MEMORY, rman_get_rid(sc->pci_mem), sc->pci_mem); } /* ** Requests a VF reset from the PF. ** ** Requires the VF's Admin Queue to be initialized. */ static int iavf_reset(struct iavf_sc *sc) { struct i40e_hw *hw = &sc->hw; device_t dev = sc->dev; int error = 0; /* Ask the PF to reset us if we are initiating */ if (sc->init_state != IAVF_RESET_PENDING) iavf_request_reset(sc); i40e_msec_pause(100); error = iavf_reset_complete(hw); if (error) { device_printf(dev, "%s: VF reset failed\n", __func__); return (error); } pci_enable_busmaster(dev); error = i40e_shutdown_adminq(hw); if (error) { device_printf(dev, "%s: shutdown_adminq failed: %d\n", __func__, error); return (error); } error = i40e_init_adminq(hw); if (error) { device_printf(dev, "%s: init_adminq failed: %d\n", __func__, error); return (error); } iavf_enable_adminq_irq(hw); return (0); } static int iavf_reset_complete(struct i40e_hw *hw) { u32 reg; /* Wait up to ~10 seconds */ for (int i = 0; i < 100; i++) { reg = rd32(hw, I40E_VFGEN_RSTAT) & I40E_VFGEN_RSTAT_VFR_STATE_MASK; if ((reg == VIRTCHNL_VFR_VFACTIVE) || (reg == VIRTCHNL_VFR_COMPLETED)) return (0); i40e_msec_pause(100); } return (EBUSY); } static void iavf_setup_interface(device_t dev, struct iavf_sc *sc) { struct ixl_vsi *vsi = &sc->vsi; if_ctx_t ctx = vsi->ctx; struct ifnet *ifp = iflib_get_ifp(ctx); INIT_DBG_DEV(dev, "begin"); vsi->shared->isc_max_frame_size = ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN + ETHER_VLAN_ENCAP_LEN; #if __FreeBSD_version >= 1100000 if_setbaudrate(ifp, IF_Gbps(40)); #else if_initbaudrate(ifp, IF_Gbps(40)); #endif ifmedia_add(vsi->media, IFM_ETHER | IFM_AUTO, 0, NULL); ifmedia_set(vsi->media, IFM_ETHER | IFM_AUTO); } /* ** Get a new filter and add it to the mac filter list. */ static struct iavf_mac_filter * iavf_get_mac_filter(struct iavf_sc *sc) { struct iavf_mac_filter *f; f = malloc(sizeof(struct iavf_mac_filter), M_IAVF, M_NOWAIT | M_ZERO); if (f) SLIST_INSERT_HEAD(sc->mac_filters, f, next); return (f); } /* ** Find the filter with matching MAC address */ static struct iavf_mac_filter * iavf_find_mac_filter(struct iavf_sc *sc, u8 *macaddr) { struct iavf_mac_filter *f; bool match = FALSE; SLIST_FOREACH(f, sc->mac_filters, next) { if (cmp_etheraddr(f->macaddr, macaddr)) { match = TRUE; break; } } if (!match) f = NULL; return (f); } /* ** Admin Queue interrupt handler */ static int iavf_msix_adminq(void *arg) { struct iavf_sc *sc = arg; struct i40e_hw *hw = &sc->hw; u32 reg, mask; bool do_task = FALSE; ++sc->admin_irq; reg = rd32(hw, I40E_VFINT_ICR01); /* * For masking off interrupt causes that need to be handled before * they can be re-enabled */ mask = rd32(hw, I40E_VFINT_ICR0_ENA1); /* Check on the cause */ if (reg & I40E_VFINT_ICR0_ADMINQ_MASK) { mask &= ~I40E_VFINT_ICR0_ENA_ADMINQ_MASK; do_task = TRUE; } wr32(hw, I40E_VFINT_ICR0_ENA1, mask); iavf_enable_adminq_irq(hw); if (do_task) return (FILTER_SCHEDULE_THREAD); else return (FILTER_HANDLED); } void iavf_enable_intr(struct ixl_vsi *vsi) { struct i40e_hw *hw = vsi->hw; struct ixl_rx_queue *que = vsi->rx_queues; iavf_enable_adminq_irq(hw); for (int i = 0; i < vsi->num_rx_queues; i++, que++) iavf_enable_queue_irq(hw, que->rxr.me); } void iavf_disable_intr(struct ixl_vsi *vsi) { struct i40e_hw *hw = vsi->hw; struct ixl_rx_queue *que = vsi->rx_queues; for (int i = 0; i < vsi->num_rx_queues; i++, que++) iavf_disable_queue_irq(hw, que->rxr.me); } static void iavf_disable_adminq_irq(struct i40e_hw *hw) { wr32(hw, I40E_VFINT_DYN_CTL01, 0); wr32(hw, I40E_VFINT_ICR0_ENA1, 0); /* flush */ rd32(hw, I40E_VFGEN_RSTAT); } static void iavf_enable_adminq_irq(struct i40e_hw *hw) { wr32(hw, I40E_VFINT_DYN_CTL01, I40E_VFINT_DYN_CTL01_INTENA_MASK | I40E_VFINT_DYN_CTL01_ITR_INDX_MASK); wr32(hw, I40E_VFINT_ICR0_ENA1, I40E_VFINT_ICR0_ENA1_ADMINQ_MASK); /* flush */ rd32(hw, I40E_VFGEN_RSTAT); } static void iavf_enable_queue_irq(struct i40e_hw *hw, int id) { u32 reg; reg = I40E_VFINT_DYN_CTLN1_INTENA_MASK | I40E_VFINT_DYN_CTLN1_CLEARPBA_MASK | I40E_VFINT_DYN_CTLN1_ITR_INDX_MASK; wr32(hw, I40E_VFINT_DYN_CTLN1(id), reg); } static void iavf_disable_queue_irq(struct i40e_hw *hw, int id) { wr32(hw, I40E_VFINT_DYN_CTLN1(id), I40E_VFINT_DYN_CTLN1_ITR_INDX_MASK); rd32(hw, I40E_VFGEN_RSTAT); } static void iavf_configure_tx_itr(struct iavf_sc *sc) { struct i40e_hw *hw = &sc->hw; struct ixl_vsi *vsi = &sc->vsi; struct ixl_tx_queue *que = vsi->tx_queues; vsi->tx_itr_setting = sc->tx_itr; for (int i = 0; i < vsi->num_tx_queues; i++, que++) { struct tx_ring *txr = &que->txr; wr32(hw, I40E_VFINT_ITRN1(IXL_TX_ITR, i), vsi->tx_itr_setting); txr->itr = vsi->tx_itr_setting; txr->latency = IXL_AVE_LATENCY; } } static void iavf_configure_rx_itr(struct iavf_sc *sc) { struct i40e_hw *hw = &sc->hw; struct ixl_vsi *vsi = &sc->vsi; struct ixl_rx_queue *que = vsi->rx_queues; vsi->rx_itr_setting = sc->rx_itr; for (int i = 0; i < vsi->num_rx_queues; i++, que++) { struct rx_ring *rxr = &que->rxr; wr32(hw, I40E_VFINT_ITRN1(IXL_RX_ITR, i), vsi->rx_itr_setting); rxr->itr = vsi->rx_itr_setting; rxr->latency = IXL_AVE_LATENCY; } } /* * Get initial ITR values from tunable values. */ static void iavf_configure_itr(struct iavf_sc *sc) { iavf_configure_tx_itr(sc); iavf_configure_rx_itr(sc); } /* ** Provide a update to the queue RX ** interrupt moderation value. */ static void iavf_set_queue_rx_itr(struct ixl_rx_queue *que) { struct ixl_vsi *vsi = que->vsi; struct i40e_hw *hw = vsi->hw; struct rx_ring *rxr = &que->rxr; /* Idle, do nothing */ if (rxr->bytes == 0) return; /* Update the hardware if needed */ if (rxr->itr != vsi->rx_itr_setting) { rxr->itr = vsi->rx_itr_setting; wr32(hw, I40E_VFINT_ITRN1(IXL_RX_ITR, que->rxr.me), rxr->itr); } } static int iavf_msix_que(void *arg) { struct ixl_rx_queue *rx_que = arg; ++rx_que->irqs; iavf_set_queue_rx_itr(rx_que); // iavf_set_queue_tx_itr(que); return (FILTER_SCHEDULE_THREAD); } /********************************************************************* * Multicast Initialization * * This routine is called by init to reset a fresh state. * **********************************************************************/ static void iavf_init_multi(struct iavf_sc *sc) { struct iavf_mac_filter *f; int mcnt = 0; /* First clear any multicast filters */ SLIST_FOREACH(f, sc->mac_filters, next) { if ((f->flags & IXL_FILTER_USED) && (f->flags & IXL_FILTER_MC)) { f->flags |= IXL_FILTER_DEL; mcnt++; } } if (mcnt > 0) iavf_send_vc_msg(sc, IAVF_FLAG_AQ_DEL_MAC_FILTER); } /* ** Note: this routine updates the OS on the link state ** the real check of the hardware only happens with ** a link interrupt. */ void iavf_update_link_status(struct iavf_sc *sc) { struct ixl_vsi *vsi = &sc->vsi; u64 baudrate; if (sc->link_up){ if (vsi->link_active == FALSE) { vsi->link_active = TRUE; baudrate = ixl_max_vc_speed_to_value(sc->link_speed); iavf_dbg_info(sc, "baudrate: %lu\n", baudrate); iflib_link_state_change(vsi->ctx, LINK_STATE_UP, baudrate); } } else { /* Link down */ if (vsi->link_active == TRUE) { vsi->link_active = FALSE; iflib_link_state_change(vsi->ctx, LINK_STATE_DOWN, 0); } } } /********************************************************************* * * This routine disables all traffic on the adapter by issuing a * global reset on the MAC and deallocates TX/RX buffers. * **********************************************************************/ static void iavf_stop(struct iavf_sc *sc) { struct ifnet *ifp; ifp = sc->vsi.ifp; iavf_disable_intr(&sc->vsi); if (atomic_load_acq_32(&sc->queues_enabled)) iavf_send_vc_msg_sleep(sc, IAVF_FLAG_AQ_DISABLE_QUEUES); } static void iavf_if_stop(if_ctx_t ctx) { struct iavf_sc *sc = iflib_get_softc(ctx); iavf_stop(sc); } static void iavf_config_rss_reg(struct iavf_sc *sc) { struct i40e_hw *hw = &sc->hw; struct ixl_vsi *vsi = &sc->vsi; u32 lut = 0; u64 set_hena = 0, hena; int i, j, que_id; u32 rss_seed[IXL_RSS_KEY_SIZE_REG]; #ifdef RSS u32 rss_hash_config; #endif /* Don't set up RSS if using a single queue */ if (vsi->num_rx_queues == 1) { wr32(hw, I40E_VFQF_HENA(0), 0); wr32(hw, I40E_VFQF_HENA(1), 0); ixl_flush(hw); return; } #ifdef RSS /* Fetch the configured RSS key */ rss_getkey((uint8_t *) &rss_seed); #else ixl_get_default_rss_key(rss_seed); #endif /* Fill out hash function seed */ for (i = 0; i < IXL_RSS_KEY_SIZE_REG; i++) wr32(hw, I40E_VFQF_HKEY(i), rss_seed[i]); /* Enable PCTYPES for RSS: */ #ifdef RSS rss_hash_config = rss_gethashconfig(); if (rss_hash_config & RSS_HASHTYPE_RSS_IPV4) set_hena |= ((u64)1 << I40E_FILTER_PCTYPE_NONF_IPV4_OTHER); if (rss_hash_config & RSS_HASHTYPE_RSS_TCP_IPV4) set_hena |= ((u64)1 << I40E_FILTER_PCTYPE_NONF_IPV4_TCP); if (rss_hash_config & RSS_HASHTYPE_RSS_UDP_IPV4) set_hena |= ((u64)1 << I40E_FILTER_PCTYPE_NONF_IPV4_UDP); if (rss_hash_config & RSS_HASHTYPE_RSS_IPV6) set_hena |= ((u64)1 << I40E_FILTER_PCTYPE_NONF_IPV6_OTHER); if (rss_hash_config & RSS_HASHTYPE_RSS_IPV6_EX) set_hena |= ((u64)1 << I40E_FILTER_PCTYPE_FRAG_IPV6); if (rss_hash_config & RSS_HASHTYPE_RSS_TCP_IPV6) set_hena |= ((u64)1 << I40E_FILTER_PCTYPE_NONF_IPV6_TCP); if (rss_hash_config & RSS_HASHTYPE_RSS_UDP_IPV6) set_hena |= ((u64)1 << I40E_FILTER_PCTYPE_NONF_IPV6_UDP); #else set_hena = IXL_DEFAULT_RSS_HENA_XL710; #endif hena = (u64)rd32(hw, I40E_VFQF_HENA(0)) | ((u64)rd32(hw, I40E_VFQF_HENA(1)) << 32); hena |= set_hena; wr32(hw, I40E_VFQF_HENA(0), (u32)hena); wr32(hw, I40E_VFQF_HENA(1), (u32)(hena >> 32)); /* Populate the LUT with max no. of queues in round robin fashion */ for (i = 0, j = 0; i < IXL_RSS_VSI_LUT_SIZE; i++, j++) { if (j == vsi->num_rx_queues) j = 0; #ifdef RSS /* * Fetch the RSS bucket id for the given indirection entry. * Cap it at the number of configured buckets (which is * num_rx_queues.) */ que_id = rss_get_indirection_to_bucket(i); que_id = que_id % vsi->num_rx_queues; #else que_id = j; #endif /* lut = 4-byte sliding window of 4 lut entries */ lut = (lut << 8) | (que_id & IXL_RSS_VF_LUT_ENTRY_MASK); /* On i = 3, we have 4 entries in lut; write to the register */ if ((i & 3) == 3) { wr32(hw, I40E_VFQF_HLUT(i >> 2), lut); DDPRINTF(sc->dev, "HLUT(%2d): %#010x", i, lut); } } ixl_flush(hw); } static void iavf_config_rss_pf(struct iavf_sc *sc) { iavf_send_vc_msg(sc, IAVF_FLAG_AQ_CONFIG_RSS_KEY); iavf_send_vc_msg(sc, IAVF_FLAG_AQ_SET_RSS_HENA); iavf_send_vc_msg(sc, IAVF_FLAG_AQ_CONFIG_RSS_LUT); } /* ** iavf_config_rss - setup RSS ** ** RSS keys and table are cleared on VF reset. */ static void iavf_config_rss(struct iavf_sc *sc) { if (sc->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_RSS_REG) { iavf_dbg_info(sc, "Setting up RSS using VF registers..."); iavf_config_rss_reg(sc); } else if (sc->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_RSS_PF) { iavf_dbg_info(sc, "Setting up RSS using messages to PF..."); iavf_config_rss_pf(sc); } else device_printf(sc->dev, "VF does not support RSS capability sent by PF.\n"); } /* ** This routine adds new MAC filters to the sc's list; ** these are later added in hardware by sending a virtual ** channel message. */ static int iavf_add_mac_filter(struct iavf_sc *sc, u8 *macaddr, u16 flags) { struct iavf_mac_filter *f; /* Does one already exist? */ f = iavf_find_mac_filter(sc, macaddr); if (f != NULL) { iavf_dbg_filter(sc, "exists: " MAC_FORMAT "\n", MAC_FORMAT_ARGS(macaddr)); return (EEXIST); } /* If not, get a new empty filter */ f = iavf_get_mac_filter(sc); if (f == NULL) { device_printf(sc->dev, "%s: no filters available!!\n", __func__); return (ENOMEM); } iavf_dbg_filter(sc, "marked: " MAC_FORMAT "\n", MAC_FORMAT_ARGS(macaddr)); bcopy(macaddr, f->macaddr, ETHER_ADDR_LEN); f->flags |= (IXL_FILTER_ADD | IXL_FILTER_USED); f->flags |= flags; return (0); } /* ** Marks a MAC filter for deletion. */ static int iavf_del_mac_filter(struct iavf_sc *sc, u8 *macaddr) { struct iavf_mac_filter *f; f = iavf_find_mac_filter(sc, macaddr); if (f == NULL) return (ENOENT); f->flags |= IXL_FILTER_DEL; return (0); } /* * Re-uses the name from the PF driver. */ static void iavf_add_device_sysctls(struct iavf_sc *sc) { struct ixl_vsi *vsi = &sc->vsi; device_t dev = sc->dev; struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev); struct sysctl_oid_list *ctx_list = SYSCTL_CHILDREN(device_get_sysctl_tree(dev)); struct sysctl_oid *debug_node; struct sysctl_oid_list *debug_list; SYSCTL_ADD_PROC(ctx, ctx_list, OID_AUTO, "current_speed", CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_NEEDGIANT, sc, 0, iavf_sysctl_current_speed, "A", "Current Port Speed"); SYSCTL_ADD_PROC(ctx, ctx_list, OID_AUTO, "tx_itr", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, sc, 0, iavf_sysctl_tx_itr, "I", "Immediately set TX ITR value for all queues"); SYSCTL_ADD_PROC(ctx, ctx_list, OID_AUTO, "rx_itr", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, sc, 0, iavf_sysctl_rx_itr, "I", "Immediately set RX ITR value for all queues"); /* Add sysctls meant to print debug information, but don't list them * in "sysctl -a" output. */ debug_node = SYSCTL_ADD_NODE(ctx, ctx_list, OID_AUTO, "debug", CTLFLAG_RD | CTLFLAG_SKIP | CTLFLAG_MPSAFE, NULL, "Debug Sysctls"); debug_list = SYSCTL_CHILDREN(debug_node); SYSCTL_ADD_UINT(ctx, debug_list, OID_AUTO, "shared_debug_mask", CTLFLAG_RW, &sc->hw.debug_mask, 0, "Shared code debug message level"); SYSCTL_ADD_UINT(ctx, debug_list, OID_AUTO, "core_debug_mask", CTLFLAG_RW, &sc->dbg_mask, 0, "Non-shared code debug message level"); SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "filter_list", CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_NEEDGIANT, sc, 0, iavf_sysctl_sw_filter_list, "A", "SW Filter List"); SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "queue_interrupt_table", CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_NEEDGIANT, sc, 0, iavf_sysctl_queue_interrupt_table, "A", "View MSI-X indices for TX/RX queues"); SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "do_vf_reset", CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_NEEDGIANT, sc, 0, iavf_sysctl_vf_reset, "A", "Request a VF reset from PF"); SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "do_vflr_reset", CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_NEEDGIANT, sc, 0, iavf_sysctl_vflr_reset, "A", "Request a VFLR reset from HW"); /* Add stats sysctls */ ixl_add_vsi_sysctls(dev, vsi, ctx, "vsi"); ixl_vsi_add_queues_stats(vsi, ctx); } static void iavf_init_filters(struct iavf_sc *sc) { sc->mac_filters = malloc(sizeof(struct mac_list), M_IAVF, M_WAITOK | M_ZERO); SLIST_INIT(sc->mac_filters); sc->vlan_filters = malloc(sizeof(struct vlan_list), M_IAVF, M_WAITOK | M_ZERO); SLIST_INIT(sc->vlan_filters); } static void iavf_free_filters(struct iavf_sc *sc) { struct iavf_mac_filter *f; struct iavf_vlan_filter *v; while (!SLIST_EMPTY(sc->mac_filters)) { f = SLIST_FIRST(sc->mac_filters); SLIST_REMOVE_HEAD(sc->mac_filters, next); free(f, M_IAVF); } free(sc->mac_filters, M_IAVF); while (!SLIST_EMPTY(sc->vlan_filters)) { v = SLIST_FIRST(sc->vlan_filters); SLIST_REMOVE_HEAD(sc->vlan_filters, next); free(v, M_IAVF); } free(sc->vlan_filters, M_IAVF); } char * iavf_vc_speed_to_string(enum virtchnl_link_speed link_speed) { int index; char *speeds[] = { "Unknown", "100 Mbps", "1 Gbps", "10 Gbps", "40 Gbps", "20 Gbps", "25 Gbps", }; switch (link_speed) { case VIRTCHNL_LINK_SPEED_100MB: index = 1; break; case VIRTCHNL_LINK_SPEED_1GB: index = 2; break; case VIRTCHNL_LINK_SPEED_10GB: index = 3; break; case VIRTCHNL_LINK_SPEED_40GB: index = 4; break; case VIRTCHNL_LINK_SPEED_20GB: index = 5; break; case VIRTCHNL_LINK_SPEED_25GB: index = 6; break; case VIRTCHNL_LINK_SPEED_UNKNOWN: default: index = 0; break; } return speeds[index]; } static int iavf_sysctl_current_speed(SYSCTL_HANDLER_ARGS) { struct iavf_sc *sc = (struct iavf_sc *)arg1; int error = 0; error = sysctl_handle_string(oidp, iavf_vc_speed_to_string(sc->link_speed), 8, req); return (error); } /* * Sanity check and save off tunable values. */ static void iavf_save_tunables(struct iavf_sc *sc) { device_t dev = sc->dev; /* Save tunable information */ sc->dbg_mask = iavf_core_debug_mask; sc->hw.debug_mask = iavf_shared_debug_mask; sc->vsi.enable_head_writeback = !!(iavf_enable_head_writeback); if (iavf_tx_itr < 0 || iavf_tx_itr > IXL_MAX_ITR) { device_printf(dev, "Invalid tx_itr value of %d set!\n", iavf_tx_itr); device_printf(dev, "tx_itr must be between %d and %d, " "inclusive\n", 0, IXL_MAX_ITR); device_printf(dev, "Using default value of %d instead\n", IXL_ITR_4K); sc->tx_itr = IXL_ITR_4K; } else sc->tx_itr = iavf_tx_itr; if (iavf_rx_itr < 0 || iavf_rx_itr > IXL_MAX_ITR) { device_printf(dev, "Invalid rx_itr value of %d set!\n", iavf_rx_itr); device_printf(dev, "rx_itr must be between %d and %d, " "inclusive\n", 0, IXL_MAX_ITR); device_printf(dev, "Using default value of %d instead\n", IXL_ITR_8K); sc->rx_itr = IXL_ITR_8K; } else sc->rx_itr = iavf_rx_itr; } /* * Used to set the Tx ITR value for all of the VF's queues. * Writes to the ITR registers immediately. */ static int iavf_sysctl_tx_itr(SYSCTL_HANDLER_ARGS) { struct iavf_sc *sc = (struct iavf_sc *)arg1; device_t dev = sc->dev; int requested_tx_itr; int error = 0; requested_tx_itr = sc->tx_itr; error = sysctl_handle_int(oidp, &requested_tx_itr, 0, req); if ((error) || (req->newptr == NULL)) return (error); if (requested_tx_itr < 0 || requested_tx_itr > IXL_MAX_ITR) { device_printf(dev, "Invalid TX itr value; value must be between 0 and %d\n", IXL_MAX_ITR); return (EINVAL); } sc->tx_itr = requested_tx_itr; iavf_configure_tx_itr(sc); return (error); } /* * Used to set the Rx ITR value for all of the VF's queues. * Writes to the ITR registers immediately. */ static int iavf_sysctl_rx_itr(SYSCTL_HANDLER_ARGS) { struct iavf_sc *sc = (struct iavf_sc *)arg1; device_t dev = sc->dev; int requested_rx_itr; int error = 0; requested_rx_itr = sc->rx_itr; error = sysctl_handle_int(oidp, &requested_rx_itr, 0, req); if ((error) || (req->newptr == NULL)) return (error); if (requested_rx_itr < 0 || requested_rx_itr > IXL_MAX_ITR) { device_printf(dev, "Invalid RX itr value; value must be between 0 and %d\n", IXL_MAX_ITR); return (EINVAL); } sc->rx_itr = requested_rx_itr; iavf_configure_rx_itr(sc); return (error); } static int iavf_sysctl_sw_filter_list(SYSCTL_HANDLER_ARGS) { struct iavf_sc *sc = (struct iavf_sc *)arg1; struct iavf_mac_filter *f; struct iavf_vlan_filter *v; device_t dev = sc->dev; int ftl_len, ftl_counter = 0, error = 0; struct sbuf *buf; buf = sbuf_new_for_sysctl(NULL, NULL, 128, req); if (!buf) { device_printf(dev, "Could not allocate sbuf for output.\n"); return (ENOMEM); } sbuf_printf(buf, "\n"); /* Print MAC filters */ sbuf_printf(buf, "MAC Filters:\n"); ftl_len = 0; SLIST_FOREACH(f, sc->mac_filters, next) ftl_len++; if (ftl_len < 1) sbuf_printf(buf, "(none)\n"); else { SLIST_FOREACH(f, sc->mac_filters, next) { sbuf_printf(buf, MAC_FORMAT ", flags %#06x\n", MAC_FORMAT_ARGS(f->macaddr), f->flags); } } /* Print VLAN filters */ sbuf_printf(buf, "VLAN Filters:\n"); ftl_len = 0; SLIST_FOREACH(v, sc->vlan_filters, next) ftl_len++; if (ftl_len < 1) sbuf_printf(buf, "(none)"); else { SLIST_FOREACH(v, sc->vlan_filters, next) { sbuf_printf(buf, "%d, flags %#06x", v->vlan, v->flags); /* don't print '\n' for last entry */ if (++ftl_counter != ftl_len) sbuf_printf(buf, "\n"); } } error = sbuf_finish(buf); if (error) device_printf(dev, "Error finishing sbuf: %d\n", error); sbuf_delete(buf); return (error); } /* * Print out mapping of TX queue indexes and Rx queue indexes * to MSI-X vectors. */ static int iavf_sysctl_queue_interrupt_table(SYSCTL_HANDLER_ARGS) { struct iavf_sc *sc = (struct iavf_sc *)arg1; struct ixl_vsi *vsi = &sc->vsi; device_t dev = sc->dev; struct sbuf *buf; int error = 0; struct ixl_rx_queue *rx_que = vsi->rx_queues; struct ixl_tx_queue *tx_que = vsi->tx_queues; buf = sbuf_new_for_sysctl(NULL, NULL, 128, req); if (!buf) { device_printf(dev, "Could not allocate sbuf for output.\n"); return (ENOMEM); } sbuf_cat(buf, "\n"); for (int i = 0; i < vsi->num_rx_queues; i++) { rx_que = &vsi->rx_queues[i]; sbuf_printf(buf, "(rxq %3d): %d\n", i, rx_que->msix); } for (int i = 0; i < vsi->num_tx_queues; i++) { tx_que = &vsi->tx_queues[i]; sbuf_printf(buf, "(txq %3d): %d\n", i, tx_que->msix); } error = sbuf_finish(buf); if (error) device_printf(dev, "Error finishing sbuf: %d\n", error); sbuf_delete(buf); return (error); } #define CTX_ACTIVE(ctx) ((if_getdrvflags(iflib_get_ifp(ctx)) & IFF_DRV_RUNNING)) static int iavf_sysctl_vf_reset(SYSCTL_HANDLER_ARGS) { struct iavf_sc *sc = (struct iavf_sc *)arg1; int do_reset = 0, error = 0; error = sysctl_handle_int(oidp, &do_reset, 0, req); if ((error) || (req->newptr == NULL)) return (error); if (do_reset == 1) { iavf_reset(sc); if (CTX_ACTIVE(sc->vsi.ctx)) iflib_request_reset(sc->vsi.ctx); } return (error); } static int iavf_sysctl_vflr_reset(SYSCTL_HANDLER_ARGS) { struct iavf_sc *sc = (struct iavf_sc *)arg1; device_t dev = sc->dev; int do_reset = 0, error = 0; error = sysctl_handle_int(oidp, &do_reset, 0, req); if ((error) || (req->newptr == NULL)) return (error); if (do_reset == 1) { if (!pcie_flr(dev, max(pcie_get_max_completion_timeout(dev) / 1000, 10), true)) { device_printf(dev, "PCIE FLR failed\n"); error = EIO; } else if (CTX_ACTIVE(sc->vsi.ctx)) iflib_request_reset(sc->vsi.ctx); } return (error); } #undef CTX_ACTIVE diff --git a/sys/dev/ixl/if_ixl.c b/sys/dev/ixl/if_ixl.c index 4d953c45ab91..50eb448a1154 100644 --- a/sys/dev/ixl/if_ixl.c +++ b/sys/dev/ixl/if_ixl.c @@ -1,1863 +1,1863 @@ /****************************************************************************** Copyright (c) 2013-2018, Intel Corporation All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the Intel Corporation nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ******************************************************************************/ /*$FreeBSD$*/ #include "ixl.h" #include "ixl_pf.h" #ifdef IXL_IW #include "ixl_iw.h" #include "ixl_iw_int.h" #endif #ifdef PCI_IOV #include "ixl_pf_iov.h" #endif /********************************************************************* * Driver version *********************************************************************/ #define IXL_DRIVER_VERSION_MAJOR 2 #define IXL_DRIVER_VERSION_MINOR 3 #define IXL_DRIVER_VERSION_BUILD 0 #define IXL_DRIVER_VERSION_STRING \ __XSTRING(IXL_DRIVER_VERSION_MAJOR) "." \ __XSTRING(IXL_DRIVER_VERSION_MINOR) "." \ __XSTRING(IXL_DRIVER_VERSION_BUILD) "-k" /********************************************************************* * PCI Device ID Table * * Used by probe to select devices to load on * * ( Vendor ID, Device ID, Branding String ) *********************************************************************/ static pci_vendor_info_t ixl_vendor_info_array[] = { PVIDV(I40E_INTEL_VENDOR_ID, I40E_DEV_ID_SFP_XL710, "Intel(R) Ethernet Controller X710 for 10GbE SFP+"), PVIDV(I40E_INTEL_VENDOR_ID, I40E_DEV_ID_KX_B, "Intel(R) Ethernet Controller XL710 for 40GbE backplane"), PVIDV(I40E_INTEL_VENDOR_ID, I40E_DEV_ID_KX_C, "Intel(R) Ethernet Controller X710 for 10GbE backplane"), PVIDV(I40E_INTEL_VENDOR_ID, I40E_DEV_ID_QSFP_A, "Intel(R) Ethernet Controller XL710 for 40GbE QSFP+"), PVIDV(I40E_INTEL_VENDOR_ID, I40E_DEV_ID_QSFP_B, "Intel(R) Ethernet Controller XL710 for 40GbE QSFP+"), PVIDV(I40E_INTEL_VENDOR_ID, I40E_DEV_ID_QSFP_C, "Intel(R) Ethernet Controller X710 for 10GbE QSFP+"), PVIDV(I40E_INTEL_VENDOR_ID, I40E_DEV_ID_10G_BASE_T, "Intel(R) Ethernet Controller X710 for 10GBASE-T"), PVIDV(I40E_INTEL_VENDOR_ID, I40E_DEV_ID_10G_BASE_T4, "Intel(R) Ethernet Controller X710/X557-AT 10GBASE-T"), PVIDV(I40E_INTEL_VENDOR_ID, I40E_DEV_ID_KX_X722, "Intel(R) Ethernet Connection X722 for 10GbE backplane"), PVIDV(I40E_INTEL_VENDOR_ID, I40E_DEV_ID_QSFP_X722, "Intel(R) Ethernet Connection X722 for 10GbE QSFP+"), PVIDV(I40E_INTEL_VENDOR_ID, I40E_DEV_ID_SFP_X722, "Intel(R) Ethernet Connection X722 for 10GbE SFP+"), PVIDV(I40E_INTEL_VENDOR_ID, I40E_DEV_ID_1G_BASE_T_X722, "Intel(R) Ethernet Connection X722 for 1GbE"), PVIDV(I40E_INTEL_VENDOR_ID, I40E_DEV_ID_10G_BASE_T_X722, "Intel(R) Ethernet Connection X722 for 10GBASE-T"), PVIDV(I40E_INTEL_VENDOR_ID, I40E_DEV_ID_SFP_I_X722, "Intel(R) Ethernet Connection X722 for 10GbE SFP+"), PVIDV(I40E_INTEL_VENDOR_ID, I40E_DEV_ID_25G_B, "Intel(R) Ethernet Controller XXV710 for 25GbE backplane"), PVIDV(I40E_INTEL_VENDOR_ID, I40E_DEV_ID_25G_SFP28, "Intel(R) Ethernet Controller XXV710 for 25GbE SFP28"), PVIDV(I40E_INTEL_VENDOR_ID, I40E_DEV_ID_10G_BASE_T_BC, "Intel(R) Ethernet Controller X710 for 10GBASE-T"), PVIDV(I40E_INTEL_VENDOR_ID, I40E_DEV_ID_10G_SFP, "Intel(R) Ethernet Controller X710 for 10GbE SFP+"), PVIDV(I40E_INTEL_VENDOR_ID, I40E_DEV_ID_10G_B, "Intel(R) Ethernet Controller X710 for 10GbE backplane"), PVIDV(I40E_INTEL_VENDOR_ID, I40E_DEV_ID_5G_BASE_T_BC, "Intel(R) Ethernet Controller V710 for 5GBASE-T"), /* required last entry */ PVID_END }; /********************************************************************* * Function prototypes *********************************************************************/ /*** IFLIB interface ***/ static void *ixl_register(device_t dev); static int ixl_if_attach_pre(if_ctx_t ctx); static int ixl_if_attach_post(if_ctx_t ctx); static int ixl_if_detach(if_ctx_t ctx); static int ixl_if_shutdown(if_ctx_t ctx); static int ixl_if_suspend(if_ctx_t ctx); static int ixl_if_resume(if_ctx_t ctx); static int ixl_if_msix_intr_assign(if_ctx_t ctx, int msix); static void ixl_if_enable_intr(if_ctx_t ctx); static void ixl_if_disable_intr(if_ctx_t ctx); static int ixl_if_rx_queue_intr_enable(if_ctx_t ctx, uint16_t rxqid); static int ixl_if_tx_queue_intr_enable(if_ctx_t ctx, uint16_t txqid); static int ixl_if_tx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs, int ntxqs, int ntxqsets); static int ixl_if_rx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs, int nqs, int nqsets); static void ixl_if_queues_free(if_ctx_t ctx); static void ixl_if_update_admin_status(if_ctx_t ctx); static void ixl_if_multi_set(if_ctx_t ctx); static int ixl_if_mtu_set(if_ctx_t ctx, uint32_t mtu); static void ixl_if_media_status(if_ctx_t ctx, struct ifmediareq *ifmr); static int ixl_if_media_change(if_ctx_t ctx); static int ixl_if_promisc_set(if_ctx_t ctx, int flags); static void ixl_if_timer(if_ctx_t ctx, uint16_t qid); static void ixl_if_vlan_register(if_ctx_t ctx, u16 vtag); static void ixl_if_vlan_unregister(if_ctx_t ctx, u16 vtag); static uint64_t ixl_if_get_counter(if_ctx_t ctx, ift_counter cnt); static int ixl_if_i2c_req(if_ctx_t ctx, struct ifi2creq *req); static int ixl_if_priv_ioctl(if_ctx_t ctx, u_long command, caddr_t data); static bool ixl_if_needs_restart(if_ctx_t ctx, enum iflib_restart_event event); #ifdef PCI_IOV static void ixl_if_vflr_handle(if_ctx_t ctx); #endif /*** Other ***/ static u_int ixl_mc_filter_apply(void *, struct sockaddr_dl *, u_int); static void ixl_save_pf_tunables(struct ixl_pf *); static int ixl_allocate_pci_resources(struct ixl_pf *); static void ixl_setup_ssctx(struct ixl_pf *pf); static void ixl_admin_timer(void *arg); /********************************************************************* * FreeBSD Device Interface Entry Points *********************************************************************/ static device_method_t ixl_methods[] = { /* Device interface */ DEVMETHOD(device_register, ixl_register), DEVMETHOD(device_probe, iflib_device_probe), DEVMETHOD(device_attach, iflib_device_attach), DEVMETHOD(device_detach, iflib_device_detach), DEVMETHOD(device_shutdown, iflib_device_shutdown), #ifdef PCI_IOV DEVMETHOD(pci_iov_init, iflib_device_iov_init), DEVMETHOD(pci_iov_uninit, iflib_device_iov_uninit), DEVMETHOD(pci_iov_add_vf, iflib_device_iov_add_vf), #endif DEVMETHOD_END }; static driver_t ixl_driver = { "ixl", ixl_methods, sizeof(struct ixl_pf), }; devclass_t ixl_devclass; DRIVER_MODULE(ixl, pci, ixl_driver, ixl_devclass, 0, 0); IFLIB_PNP_INFO(pci, ixl, ixl_vendor_info_array); MODULE_VERSION(ixl, 3); MODULE_DEPEND(ixl, pci, 1, 1, 1); MODULE_DEPEND(ixl, ether, 1, 1, 1); MODULE_DEPEND(ixl, iflib, 1, 1, 1); static device_method_t ixl_if_methods[] = { DEVMETHOD(ifdi_attach_pre, ixl_if_attach_pre), DEVMETHOD(ifdi_attach_post, ixl_if_attach_post), DEVMETHOD(ifdi_detach, ixl_if_detach), DEVMETHOD(ifdi_shutdown, ixl_if_shutdown), DEVMETHOD(ifdi_suspend, ixl_if_suspend), DEVMETHOD(ifdi_resume, ixl_if_resume), DEVMETHOD(ifdi_init, ixl_if_init), DEVMETHOD(ifdi_stop, ixl_if_stop), DEVMETHOD(ifdi_msix_intr_assign, ixl_if_msix_intr_assign), DEVMETHOD(ifdi_intr_enable, ixl_if_enable_intr), DEVMETHOD(ifdi_intr_disable, ixl_if_disable_intr), DEVMETHOD(ifdi_rx_queue_intr_enable, ixl_if_rx_queue_intr_enable), DEVMETHOD(ifdi_tx_queue_intr_enable, ixl_if_tx_queue_intr_enable), DEVMETHOD(ifdi_tx_queues_alloc, ixl_if_tx_queues_alloc), DEVMETHOD(ifdi_rx_queues_alloc, ixl_if_rx_queues_alloc), DEVMETHOD(ifdi_queues_free, ixl_if_queues_free), DEVMETHOD(ifdi_update_admin_status, ixl_if_update_admin_status), DEVMETHOD(ifdi_multi_set, ixl_if_multi_set), DEVMETHOD(ifdi_mtu_set, ixl_if_mtu_set), DEVMETHOD(ifdi_media_status, ixl_if_media_status), DEVMETHOD(ifdi_media_change, ixl_if_media_change), DEVMETHOD(ifdi_promisc_set, ixl_if_promisc_set), DEVMETHOD(ifdi_timer, ixl_if_timer), DEVMETHOD(ifdi_vlan_register, ixl_if_vlan_register), DEVMETHOD(ifdi_vlan_unregister, ixl_if_vlan_unregister), DEVMETHOD(ifdi_get_counter, ixl_if_get_counter), DEVMETHOD(ifdi_i2c_req, ixl_if_i2c_req), DEVMETHOD(ifdi_priv_ioctl, ixl_if_priv_ioctl), DEVMETHOD(ifdi_needs_restart, ixl_if_needs_restart), #ifdef PCI_IOV DEVMETHOD(ifdi_iov_init, ixl_if_iov_init), DEVMETHOD(ifdi_iov_uninit, ixl_if_iov_uninit), DEVMETHOD(ifdi_iov_vf_add, ixl_if_iov_vf_add), DEVMETHOD(ifdi_vflr_handle, ixl_if_vflr_handle), #endif // ifdi_led_func // ifdi_debug DEVMETHOD_END }; static driver_t ixl_if_driver = { "ixl_if", ixl_if_methods, sizeof(struct ixl_pf) }; /* ** TUNEABLE PARAMETERS: */ static SYSCTL_NODE(_hw, OID_AUTO, ixl, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "ixl driver parameters"); #ifdef IXL_DEBUG_FC /* * Leave this on unless you need to send flow control * frames (or other control frames) from software */ static int ixl_enable_tx_fc_filter = 1; TUNABLE_INT("hw.ixl.enable_tx_fc_filter", &ixl_enable_tx_fc_filter); SYSCTL_INT(_hw_ixl, OID_AUTO, enable_tx_fc_filter, CTLFLAG_RDTUN, &ixl_enable_tx_fc_filter, 0, "Filter out packets with Ethertype 0x8808 from being sent out by non-HW sources"); #endif #ifdef IXL_DEBUG static int ixl_debug_recovery_mode = 0; TUNABLE_INT("hw.ixl.debug_recovery_mode", &ixl_debug_recovery_mode); SYSCTL_INT(_hw_ixl, OID_AUTO, debug_recovery_mode, CTLFLAG_RDTUN, &ixl_debug_recovery_mode, 0, "Act like when FW entered recovery mode (for debuging)"); #endif static int ixl_i2c_access_method = 0; TUNABLE_INT("hw.ixl.i2c_access_method", &ixl_i2c_access_method); SYSCTL_INT(_hw_ixl, OID_AUTO, i2c_access_method, CTLFLAG_RDTUN, &ixl_i2c_access_method, 0, IXL_SYSCTL_HELP_I2C_METHOD); static int ixl_enable_vf_loopback = 1; TUNABLE_INT("hw.ixl.enable_vf_loopback", &ixl_enable_vf_loopback); SYSCTL_INT(_hw_ixl, OID_AUTO, enable_vf_loopback, CTLFLAG_RDTUN, &ixl_enable_vf_loopback, 0, IXL_SYSCTL_HELP_VF_LOOPBACK); /* * Different method for processing TX descriptor * completion. */ static int ixl_enable_head_writeback = 1; TUNABLE_INT("hw.ixl.enable_head_writeback", &ixl_enable_head_writeback); SYSCTL_INT(_hw_ixl, OID_AUTO, enable_head_writeback, CTLFLAG_RDTUN, &ixl_enable_head_writeback, 0, "For detecting last completed TX descriptor by hardware, use value written by HW instead of checking descriptors"); static int ixl_core_debug_mask = 0; TUNABLE_INT("hw.ixl.core_debug_mask", &ixl_core_debug_mask); SYSCTL_INT(_hw_ixl, OID_AUTO, core_debug_mask, CTLFLAG_RDTUN, &ixl_core_debug_mask, 0, "Display debug statements that are printed in non-shared code"); static int ixl_shared_debug_mask = 0; TUNABLE_INT("hw.ixl.shared_debug_mask", &ixl_shared_debug_mask); SYSCTL_INT(_hw_ixl, OID_AUTO, shared_debug_mask, CTLFLAG_RDTUN, &ixl_shared_debug_mask, 0, "Display debug statements that are printed in shared code"); #if 0 /* ** Controls for Interrupt Throttling ** - true/false for dynamic adjustment ** - default values for static ITR */ static int ixl_dynamic_rx_itr = 0; TUNABLE_INT("hw.ixl.dynamic_rx_itr", &ixl_dynamic_rx_itr); SYSCTL_INT(_hw_ixl, OID_AUTO, dynamic_rx_itr, CTLFLAG_RDTUN, &ixl_dynamic_rx_itr, 0, "Dynamic RX Interrupt Rate"); static int ixl_dynamic_tx_itr = 0; TUNABLE_INT("hw.ixl.dynamic_tx_itr", &ixl_dynamic_tx_itr); SYSCTL_INT(_hw_ixl, OID_AUTO, dynamic_tx_itr, CTLFLAG_RDTUN, &ixl_dynamic_tx_itr, 0, "Dynamic TX Interrupt Rate"); #endif static int ixl_rx_itr = IXL_ITR_8K; TUNABLE_INT("hw.ixl.rx_itr", &ixl_rx_itr); SYSCTL_INT(_hw_ixl, OID_AUTO, rx_itr, CTLFLAG_RDTUN, &ixl_rx_itr, 0, "RX Interrupt Rate"); static int ixl_tx_itr = IXL_ITR_4K; TUNABLE_INT("hw.ixl.tx_itr", &ixl_tx_itr); SYSCTL_INT(_hw_ixl, OID_AUTO, tx_itr, CTLFLAG_RDTUN, &ixl_tx_itr, 0, "TX Interrupt Rate"); #ifdef IXL_IW int ixl_enable_iwarp = 0; TUNABLE_INT("hw.ixl.enable_iwarp", &ixl_enable_iwarp); SYSCTL_INT(_hw_ixl, OID_AUTO, enable_iwarp, CTLFLAG_RDTUN, &ixl_enable_iwarp, 0, "iWARP enabled"); #if __FreeBSD_version < 1100000 int ixl_limit_iwarp_msix = 1; #else int ixl_limit_iwarp_msix = IXL_IW_MAX_MSIX; #endif TUNABLE_INT("hw.ixl.limit_iwarp_msix", &ixl_limit_iwarp_msix); SYSCTL_INT(_hw_ixl, OID_AUTO, limit_iwarp_msix, CTLFLAG_RDTUN, &ixl_limit_iwarp_msix, 0, "Limit MSI-X vectors assigned to iWARP"); #endif extern struct if_txrx ixl_txrx_hwb; extern struct if_txrx ixl_txrx_dwb; static struct if_shared_ctx ixl_sctx_init = { .isc_magic = IFLIB_MAGIC, .isc_q_align = PAGE_SIZE, .isc_tx_maxsize = IXL_TSO_SIZE + sizeof(struct ether_vlan_header), .isc_tx_maxsegsize = IXL_MAX_DMA_SEG_SIZE, .isc_tso_maxsize = IXL_TSO_SIZE + sizeof(struct ether_vlan_header), .isc_tso_maxsegsize = IXL_MAX_DMA_SEG_SIZE, .isc_rx_maxsize = 16384, .isc_rx_nsegments = IXL_MAX_RX_SEGS, .isc_rx_maxsegsize = IXL_MAX_DMA_SEG_SIZE, .isc_nfl = 1, .isc_ntxqs = 1, .isc_nrxqs = 1, .isc_admin_intrcnt = 1, .isc_vendor_info = ixl_vendor_info_array, .isc_driver_version = IXL_DRIVER_VERSION_STRING, .isc_driver = &ixl_if_driver, .isc_flags = IFLIB_NEED_SCRATCH | IFLIB_NEED_ZERO_CSUM | IFLIB_TSO_INIT_IP | IFLIB_ADMIN_ALWAYS_RUN, .isc_nrxd_min = {IXL_MIN_RING}, .isc_ntxd_min = {IXL_MIN_RING}, .isc_nrxd_max = {IXL_MAX_RING}, .isc_ntxd_max = {IXL_MAX_RING}, .isc_nrxd_default = {IXL_DEFAULT_RING}, .isc_ntxd_default = {IXL_DEFAULT_RING}, }; if_shared_ctx_t ixl_sctx = &ixl_sctx_init; /*** Functions ***/ static void * ixl_register(device_t dev) { return (ixl_sctx); } static int ixl_allocate_pci_resources(struct ixl_pf *pf) { device_t dev = iflib_get_dev(pf->vsi.ctx); struct i40e_hw *hw = &pf->hw; int rid; /* Map BAR0 */ rid = PCIR_BAR(0); pf->pci_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, RF_ACTIVE); if (!(pf->pci_mem)) { device_printf(dev, "Unable to allocate bus resource: PCI memory\n"); return (ENXIO); } /* Save off the PCI information */ hw->vendor_id = pci_get_vendor(dev); hw->device_id = pci_get_device(dev); hw->revision_id = pci_read_config(dev, PCIR_REVID, 1); hw->subsystem_vendor_id = pci_read_config(dev, PCIR_SUBVEND_0, 2); hw->subsystem_device_id = pci_read_config(dev, PCIR_SUBDEV_0, 2); hw->bus.device = pci_get_slot(dev); hw->bus.func = pci_get_function(dev); /* Save off register access information */ pf->osdep.mem_bus_space_tag = rman_get_bustag(pf->pci_mem); pf->osdep.mem_bus_space_handle = rman_get_bushandle(pf->pci_mem); pf->osdep.mem_bus_space_size = rman_get_size(pf->pci_mem); pf->osdep.flush_reg = I40E_GLGEN_STAT; pf->osdep.dev = dev; pf->hw.hw_addr = (u8 *) &pf->osdep.mem_bus_space_handle; pf->hw.back = &pf->osdep; return (0); } static void ixl_setup_ssctx(struct ixl_pf *pf) { if_softc_ctx_t scctx = pf->vsi.shared; struct i40e_hw *hw = &pf->hw; if (IXL_PF_IN_RECOVERY_MODE(pf)) { scctx->isc_ntxqsets_max = scctx->isc_nrxqsets_max = 1; scctx->isc_ntxqsets = scctx->isc_nrxqsets = 1; } else if (hw->mac.type == I40E_MAC_X722) scctx->isc_ntxqsets_max = scctx->isc_nrxqsets_max = 128; else scctx->isc_ntxqsets_max = scctx->isc_nrxqsets_max = 64; if (pf->vsi.enable_head_writeback) { scctx->isc_txqsizes[0] = roundup2(scctx->isc_ntxd[0] * sizeof(struct i40e_tx_desc) + sizeof(u32), DBA_ALIGN); scctx->isc_txrx = &ixl_txrx_hwb; } else { scctx->isc_txqsizes[0] = roundup2(scctx->isc_ntxd[0] * sizeof(struct i40e_tx_desc), DBA_ALIGN); scctx->isc_txrx = &ixl_txrx_dwb; } scctx->isc_txrx->ift_legacy_intr = ixl_intr; scctx->isc_rxqsizes[0] = roundup2(scctx->isc_nrxd[0] * sizeof(union i40e_32byte_rx_desc), DBA_ALIGN); scctx->isc_msix_bar = PCIR_BAR(IXL_MSIX_BAR); scctx->isc_tx_nsegments = IXL_MAX_TX_SEGS; scctx->isc_tx_tso_segments_max = IXL_MAX_TSO_SEGS; scctx->isc_tx_tso_size_max = IXL_TSO_SIZE; scctx->isc_tx_tso_segsize_max = IXL_MAX_DMA_SEG_SIZE; scctx->isc_rss_table_size = pf->hw.func_caps.rss_table_size; scctx->isc_tx_csum_flags = CSUM_OFFLOAD; scctx->isc_capabilities = scctx->isc_capenable = IXL_CAPS; } static void ixl_admin_timer(void *arg) { struct ixl_pf *pf = (struct ixl_pf *)arg; /* Fire off the admin task */ iflib_admin_intr_deferred(pf->vsi.ctx); /* Reschedule the admin timer */ callout_schedule(&pf->admin_timer, hz/2); } static int ixl_attach_pre_recovery_mode(struct ixl_pf *pf) { struct ixl_vsi *vsi = &pf->vsi; struct i40e_hw *hw = &pf->hw; device_t dev = pf->dev; device_printf(dev, "Firmware recovery mode detected. Limiting functionality. Refer to Intel(R) Ethernet Adapters and Devices User Guide for details on firmware recovery mode.\n"); i40e_get_mac_addr(hw, hw->mac.addr); if (vsi->shared->isc_intr == IFLIB_INTR_MSIX) { ixl_configure_intr0_msix(pf); ixl_enable_intr0(hw); } ixl_setup_ssctx(pf); return (0); } static int ixl_if_attach_pre(if_ctx_t ctx) { device_t dev; struct ixl_pf *pf; struct i40e_hw *hw; struct ixl_vsi *vsi; enum i40e_get_fw_lldp_status_resp lldp_status; struct i40e_filter_control_settings filter; enum i40e_status_code status; int error = 0; dev = iflib_get_dev(ctx); pf = iflib_get_softc(ctx); INIT_DBG_DEV(dev, "begin"); vsi = &pf->vsi; vsi->back = pf; pf->dev = dev; hw = &pf->hw; vsi->dev = dev; vsi->hw = &pf->hw; vsi->id = 0; vsi->num_vlans = 0; vsi->ctx = ctx; vsi->media = iflib_get_media(ctx); vsi->shared = iflib_get_softc_ctx(ctx); snprintf(pf->admin_mtx_name, sizeof(pf->admin_mtx_name), "%s:admin", device_get_nameunit(dev)); mtx_init(&pf->admin_mtx, pf->admin_mtx_name, NULL, MTX_DEF); callout_init_mtx(&pf->admin_timer, &pf->admin_mtx, 0); /* Save tunable values */ ixl_save_pf_tunables(pf); /* Do PCI setup - map BAR0, etc */ if (ixl_allocate_pci_resources(pf)) { device_printf(dev, "Allocation of PCI resources failed\n"); error = ENXIO; goto err_pci_res; } /* Establish a clean starting point */ i40e_clear_hw(hw); i40e_set_mac_type(hw); error = ixl_pf_reset(pf); if (error) goto err_out; /* Initialize the shared code */ status = i40e_init_shared_code(hw); if (status) { device_printf(dev, "Unable to initialize shared code, error %s\n", i40e_stat_str(hw, status)); error = EIO; goto err_out; } /* Set up the admin queue */ hw->aq.num_arq_entries = IXL_AQ_LEN; hw->aq.num_asq_entries = IXL_AQ_LEN; hw->aq.arq_buf_size = IXL_AQ_BUF_SZ; hw->aq.asq_buf_size = IXL_AQ_BUF_SZ; status = i40e_init_adminq(hw); if (status != 0 && status != I40E_ERR_FIRMWARE_API_VERSION) { device_printf(dev, "Unable to initialize Admin Queue, error %s\n", i40e_stat_str(hw, status)); error = EIO; goto err_out; } ixl_print_nvm_version(pf); if (status == I40E_ERR_FIRMWARE_API_VERSION) { device_printf(dev, "The driver for the device stopped " "because the NVM image is newer than expected.\n"); device_printf(dev, "You must install the most recent version of " "the network driver.\n"); error = EIO; goto err_out; } if (hw->aq.api_maj_ver == I40E_FW_API_VERSION_MAJOR && hw->aq.api_min_ver > I40E_FW_MINOR_VERSION(hw)) { device_printf(dev, "The driver for the device detected " "a newer version of the NVM image than expected.\n"); device_printf(dev, "Please install the most recent version " "of the network driver.\n"); } else if (hw->aq.api_maj_ver == 1 && hw->aq.api_min_ver < 4) { device_printf(dev, "The driver for the device detected " "an older version of the NVM image than expected.\n"); device_printf(dev, "Please update the NVM image.\n"); } if (IXL_PF_IN_RECOVERY_MODE(pf)) { error = ixl_attach_pre_recovery_mode(pf); if (error) goto err_out; return (error); } /* Clear PXE mode */ i40e_clear_pxe_mode(hw); /* Get capabilities from the device */ error = ixl_get_hw_capabilities(pf); if (error) { device_printf(dev, "get_hw_capabilities failed: %d\n", error); goto err_get_cap; } /* Set up host memory cache */ error = ixl_setup_hmc(pf); if (error) goto err_mac_hmc; /* Disable LLDP from the firmware for certain NVM versions */ if (((pf->hw.aq.fw_maj_ver == 4) && (pf->hw.aq.fw_min_ver < 3)) || (pf->hw.aq.fw_maj_ver < 4)) { i40e_aq_stop_lldp(hw, true, false, NULL); pf->state |= IXL_PF_STATE_FW_LLDP_DISABLED; } /* Try enabling Energy Efficient Ethernet (EEE) mode */ if (i40e_enable_eee(hw, true) == I40E_SUCCESS) atomic_set_32(&pf->state, IXL_PF_STATE_EEE_ENABLED); else atomic_clear_32(&pf->state, IXL_PF_STATE_EEE_ENABLED); /* Get MAC addresses from hardware */ i40e_get_mac_addr(hw, hw->mac.addr); error = i40e_validate_mac_addr(hw->mac.addr); if (error) { device_printf(dev, "validate_mac_addr failed: %d\n", error); goto err_mac_hmc; } bcopy(hw->mac.addr, hw->mac.perm_addr, ETHER_ADDR_LEN); iflib_set_mac(ctx, hw->mac.addr); i40e_get_port_mac_addr(hw, hw->mac.port_addr); /* Set up the device filtering */ bzero(&filter, sizeof(filter)); filter.enable_ethtype = TRUE; filter.enable_macvlan = TRUE; filter.enable_fdir = FALSE; filter.hash_lut_size = I40E_HASH_LUT_SIZE_512; if (i40e_set_filter_control(hw, &filter)) device_printf(dev, "i40e_set_filter_control() failed\n"); /* Query device FW LLDP status */ if (i40e_get_fw_lldp_status(hw, &lldp_status) == I40E_SUCCESS) { if (lldp_status == I40E_GET_FW_LLDP_STATUS_DISABLED) { atomic_set_32(&pf->state, IXL_PF_STATE_FW_LLDP_DISABLED); } else { atomic_clear_32(&pf->state, IXL_PF_STATE_FW_LLDP_DISABLED); } } /* Tell FW to apply DCB config on link up */ i40e_aq_set_dcb_parameters(hw, true, NULL); /* Fill out iflib parameters */ ixl_setup_ssctx(pf); INIT_DBG_DEV(dev, "end"); return (0); err_mac_hmc: ixl_shutdown_hmc(pf); err_get_cap: i40e_shutdown_adminq(hw); err_out: ixl_free_pci_resources(pf); err_pci_res: mtx_lock(&pf->admin_mtx); callout_stop(&pf->admin_timer); mtx_unlock(&pf->admin_mtx); mtx_destroy(&pf->admin_mtx); return (error); } static int ixl_if_attach_post(if_ctx_t ctx) { device_t dev; struct ixl_pf *pf; struct i40e_hw *hw; struct ixl_vsi *vsi; int error = 0; enum i40e_status_code status; dev = iflib_get_dev(ctx); pf = iflib_get_softc(ctx); INIT_DBG_DEV(dev, "begin"); vsi = &pf->vsi; vsi->ifp = iflib_get_ifp(ctx); hw = &pf->hw; /* Save off determined number of queues for interface */ vsi->num_rx_queues = vsi->shared->isc_nrxqsets; vsi->num_tx_queues = vsi->shared->isc_ntxqsets; /* Setup OS network interface / ifnet */ if (ixl_setup_interface(dev, pf)) { device_printf(dev, "interface setup failed!\n"); error = EIO; goto err; } if (IXL_PF_IN_RECOVERY_MODE(pf)) { /* Keep admin queue interrupts active while driver is loaded */ if (vsi->shared->isc_intr == IFLIB_INTR_MSIX) { ixl_configure_intr0_msix(pf); ixl_enable_intr0(hw); } ixl_add_sysctls_recovery_mode(pf); /* Start the admin timer */ mtx_lock(&pf->admin_mtx); callout_reset(&pf->admin_timer, hz/2, ixl_admin_timer, pf); mtx_unlock(&pf->admin_mtx); return (0); } /* Determine link state */ if (ixl_attach_get_link_status(pf)) { error = EINVAL; goto err; } error = ixl_switch_config(pf); if (error) { device_printf(dev, "Initial ixl_switch_config() failed: %d\n", error); goto err; } /* Add protocol filters to list */ ixl_init_filters(vsi); /* Init queue allocation manager */ error = ixl_pf_qmgr_init(&pf->qmgr, hw->func_caps.num_tx_qp); if (error) { device_printf(dev, "Failed to init queue manager for PF queues, error %d\n", error); goto err; } /* reserve a contiguous allocation for the PF's VSI */ error = ixl_pf_qmgr_alloc_contiguous(&pf->qmgr, max(vsi->num_rx_queues, vsi->num_tx_queues), &pf->qtag); if (error) { device_printf(dev, "Failed to reserve queues for PF LAN VSI, error %d\n", error); goto err; } device_printf(dev, "Allocating %d queues for PF LAN VSI; %d queues active\n", pf->qtag.num_allocated, pf->qtag.num_active); /* Limit PHY interrupts to link, autoneg, and modules failure */ status = i40e_aq_set_phy_int_mask(hw, IXL_DEFAULT_PHY_INT_MASK, NULL); if (status) { device_printf(dev, "i40e_aq_set_phy_mask() failed: err %s," " aq_err %s\n", i40e_stat_str(hw, status), i40e_aq_str(hw, hw->aq.asq_last_status)); goto err; } /* Get the bus configuration and set the shared code */ ixl_get_bus_info(pf); /* Keep admin queue interrupts active while driver is loaded */ if (vsi->shared->isc_intr == IFLIB_INTR_MSIX) { ixl_configure_intr0_msix(pf); ixl_enable_intr0(hw); } /* Set initial advertised speed sysctl value */ ixl_set_initial_advertised_speeds(pf); /* Initialize statistics & add sysctls */ ixl_add_device_sysctls(pf); ixl_pf_reset_stats(pf); ixl_update_stats_counters(pf); ixl_add_hw_stats(pf); hw->phy.get_link_info = true; i40e_get_link_status(hw, &pf->link_up); ixl_update_link_status(pf); #ifdef PCI_IOV ixl_initialize_sriov(pf); #endif #ifdef IXL_IW if (hw->func_caps.iwarp && ixl_enable_iwarp) { pf->iw_enabled = (pf->iw_msix > 0) ? true : false; if (pf->iw_enabled) { error = ixl_iw_pf_attach(pf); if (error) { device_printf(dev, "interfacing to iWARP driver failed: %d\n", error); goto err; } else device_printf(dev, "iWARP ready\n"); } else device_printf(dev, "iWARP disabled on this device " "(no MSI-X vectors)\n"); } else { pf->iw_enabled = false; device_printf(dev, "The device is not iWARP enabled\n"); } #endif /* Start the admin timer */ mtx_lock(&pf->admin_mtx); callout_reset(&pf->admin_timer, hz/2, ixl_admin_timer, pf); mtx_unlock(&pf->admin_mtx); INIT_DBG_DEV(dev, "end"); return (0); err: INIT_DEBUGOUT("end: error %d", error); /* ixl_if_detach() is called on error from this */ return (error); } /** * XXX: iflib always ignores the return value of detach() * -> This means that this isn't allowed to fail */ static int ixl_if_detach(if_ctx_t ctx) { struct ixl_pf *pf = iflib_get_softc(ctx); struct ixl_vsi *vsi = &pf->vsi; struct i40e_hw *hw = &pf->hw; device_t dev = pf->dev; enum i40e_status_code status; #ifdef IXL_IW int error; #endif INIT_DBG_DEV(dev, "begin"); /* Stop the admin timer */ mtx_lock(&pf->admin_mtx); callout_stop(&pf->admin_timer); mtx_unlock(&pf->admin_mtx); mtx_destroy(&pf->admin_mtx); #ifdef IXL_IW if (ixl_enable_iwarp && pf->iw_enabled) { error = ixl_iw_pf_detach(pf); if (error == EBUSY) { device_printf(dev, "iwarp in use; stop it first.\n"); //return (error); } } #endif /* Remove all previously allocated media types */ ifmedia_removeall(vsi->media); /* Shutdown LAN HMC */ ixl_shutdown_hmc(pf); /* Shutdown admin queue */ ixl_disable_intr0(hw); status = i40e_shutdown_adminq(hw); if (status) device_printf(dev, "i40e_shutdown_adminq() failed with status %s\n", i40e_stat_str(hw, status)); ixl_pf_qmgr_destroy(&pf->qmgr); ixl_free_pci_resources(pf); ixl_free_mac_filters(vsi); INIT_DBG_DEV(dev, "end"); return (0); } static int ixl_if_shutdown(if_ctx_t ctx) { int error = 0; INIT_DEBUGOUT("ixl_if_shutdown: begin"); /* TODO: Call ixl_if_stop()? */ /* TODO: Then setup low power mode */ return (error); } static int ixl_if_suspend(if_ctx_t ctx) { int error = 0; INIT_DEBUGOUT("ixl_if_suspend: begin"); /* TODO: Call ixl_if_stop()? */ /* TODO: Then setup low power mode */ return (error); } static int ixl_if_resume(if_ctx_t ctx) { struct ifnet *ifp = iflib_get_ifp(ctx); INIT_DEBUGOUT("ixl_if_resume: begin"); /* Read & clear wake-up registers */ /* Required after D3->D0 transition */ if (ifp->if_flags & IFF_UP) ixl_if_init(ctx); return (0); } void ixl_if_init(if_ctx_t ctx) { struct ixl_pf *pf = iflib_get_softc(ctx); struct ixl_vsi *vsi = &pf->vsi; struct i40e_hw *hw = &pf->hw; struct ifnet *ifp = iflib_get_ifp(ctx); device_t dev = iflib_get_dev(ctx); u8 tmpaddr[ETHER_ADDR_LEN]; int ret; if (IXL_PF_IN_RECOVERY_MODE(pf)) return; /* * If the aq is dead here, it probably means something outside of the driver * did something to the adapter, like a PF reset. * So, rebuild the driver's state here if that occurs. */ if (!i40e_check_asq_alive(&pf->hw)) { device_printf(dev, "Admin Queue is down; resetting...\n"); ixl_teardown_hw_structs(pf); ixl_rebuild_hw_structs_after_reset(pf, false); } /* Get the latest mac address... User might use a LAA */ bcopy(IF_LLADDR(vsi->ifp), tmpaddr, ETH_ALEN); if (!cmp_etheraddr(hw->mac.addr, tmpaddr) && (i40e_validate_mac_addr(tmpaddr) == I40E_SUCCESS)) { ixl_del_filter(vsi, hw->mac.addr, IXL_VLAN_ANY); bcopy(tmpaddr, hw->mac.addr, ETH_ALEN); ret = i40e_aq_mac_address_write(hw, I40E_AQC_WRITE_TYPE_LAA_ONLY, hw->mac.addr, NULL); if (ret) { device_printf(dev, "LLA address change failed!!\n"); return; } ixl_add_filter(vsi, hw->mac.addr, IXL_VLAN_ANY); } iflib_set_mac(ctx, hw->mac.addr); /* Prepare the VSI: rings, hmc contexts, etc... */ if (ixl_initialize_vsi(vsi)) { device_printf(dev, "initialize vsi failed!!\n"); return; } /* Reconfigure multicast filters in HW */ ixl_if_multi_set(ctx); /* Set up RSS */ ixl_config_rss(pf); /* Set up MSI-X routing and the ITR settings */ if (vsi->shared->isc_intr == IFLIB_INTR_MSIX) { ixl_configure_queue_intr_msix(pf); ixl_configure_itr(pf); } else ixl_configure_legacy(pf); if (vsi->enable_head_writeback) ixl_init_tx_cidx(vsi); else ixl_init_tx_rsqs(vsi); ixl_enable_rings(vsi); i40e_aq_set_default_vsi(hw, vsi->seid, NULL); /* Re-add configure filters to HW */ ixl_reconfigure_filters(vsi); /* Configure promiscuous mode */ ixl_if_promisc_set(ctx, if_getflags(ifp)); #ifdef IXL_IW if (ixl_enable_iwarp && pf->iw_enabled) { ret = ixl_iw_pf_init(pf); if (ret) device_printf(dev, "initialize iwarp failed, code %d\n", ret); } #endif } void ixl_if_stop(if_ctx_t ctx) { struct ixl_pf *pf = iflib_get_softc(ctx); struct ixl_vsi *vsi = &pf->vsi; INIT_DEBUGOUT("ixl_if_stop: begin\n"); if (IXL_PF_IN_RECOVERY_MODE(pf)) return; // TODO: This may need to be reworked #ifdef IXL_IW /* Stop iWARP device */ if (ixl_enable_iwarp && pf->iw_enabled) ixl_iw_pf_stop(pf); #endif ixl_disable_rings_intr(vsi); ixl_disable_rings(pf, vsi, &pf->qtag); } static int ixl_if_msix_intr_assign(if_ctx_t ctx, int msix) { struct ixl_pf *pf = iflib_get_softc(ctx); struct ixl_vsi *vsi = &pf->vsi; struct ixl_rx_queue *rx_que = vsi->rx_queues; struct ixl_tx_queue *tx_que = vsi->tx_queues; int err, i, rid, vector = 0; char buf[16]; MPASS(vsi->shared->isc_nrxqsets > 0); MPASS(vsi->shared->isc_ntxqsets > 0); /* Admin Que must use vector 0*/ rid = vector + 1; err = iflib_irq_alloc_generic(ctx, &vsi->irq, rid, IFLIB_INTR_ADMIN, ixl_msix_adminq, pf, 0, "aq"); if (err) { iflib_irq_free(ctx, &vsi->irq); device_printf(iflib_get_dev(ctx), "Failed to register Admin Que handler"); return (err); } /* Create soft IRQ for handling VFLRs */ iflib_softirq_alloc_generic(ctx, NULL, IFLIB_INTR_IOV, pf, 0, "iov"); /* Now set up the stations */ for (i = 0, vector = 1; i < vsi->shared->isc_nrxqsets; i++, vector++, rx_que++) { rid = vector + 1; snprintf(buf, sizeof(buf), "rxq%d", i); err = iflib_irq_alloc_generic(ctx, &rx_que->que_irq, rid, - IFLIB_INTR_RX, ixl_msix_que, rx_que, rx_que->rxr.me, buf); + IFLIB_INTR_RXTX, ixl_msix_que, rx_que, rx_que->rxr.me, buf); /* XXX: Does the driver work as expected if there are fewer num_rx_queues than * what's expected in the iflib context? */ if (err) { device_printf(iflib_get_dev(ctx), "Failed to allocate queue RX int vector %d, err: %d\n", i, err); vsi->num_rx_queues = i + 1; goto fail; } rx_que->msix = vector; } bzero(buf, sizeof(buf)); for (i = 0; i < vsi->shared->isc_ntxqsets; i++, tx_que++) { snprintf(buf, sizeof(buf), "txq%d", i); iflib_softirq_alloc_generic(ctx, &vsi->rx_queues[i % vsi->shared->isc_nrxqsets].que_irq, IFLIB_INTR_TX, tx_que, tx_que->txr.me, buf); /* TODO: Maybe call a strategy function for this to figure out which * interrupts to map Tx queues to. I don't know if there's an immediately * better way than this other than a user-supplied map, though. */ tx_que->msix = (i % vsi->shared->isc_nrxqsets) + 1; } return (0); fail: iflib_irq_free(ctx, &vsi->irq); rx_que = vsi->rx_queues; for (int i = 0; i < vsi->num_rx_queues; i++, rx_que++) iflib_irq_free(ctx, &rx_que->que_irq); return (err); } /* * Enable all interrupts * * Called in: * iflib_init_locked, after ixl_if_init() */ static void ixl_if_enable_intr(if_ctx_t ctx) { struct ixl_pf *pf = iflib_get_softc(ctx); struct ixl_vsi *vsi = &pf->vsi; struct i40e_hw *hw = vsi->hw; struct ixl_rx_queue *que = vsi->rx_queues; ixl_enable_intr0(hw); /* Enable queue interrupts */ for (int i = 0; i < vsi->num_rx_queues; i++, que++) /* TODO: Queue index parameter is probably wrong */ ixl_enable_queue(hw, que->rxr.me); } /* * Disable queue interrupts * * Other interrupt causes need to remain active. */ static void ixl_if_disable_intr(if_ctx_t ctx) { struct ixl_pf *pf = iflib_get_softc(ctx); struct ixl_vsi *vsi = &pf->vsi; struct i40e_hw *hw = vsi->hw; struct ixl_rx_queue *rx_que = vsi->rx_queues; if (vsi->shared->isc_intr == IFLIB_INTR_MSIX) { for (int i = 0; i < vsi->num_rx_queues; i++, rx_que++) ixl_disable_queue(hw, rx_que->msix - 1); } else { // Set PFINT_LNKLST0 FIRSTQ_INDX to 0x7FF // stops queues from triggering interrupts wr32(hw, I40E_PFINT_LNKLST0, 0x7FF); } } static int ixl_if_rx_queue_intr_enable(if_ctx_t ctx, uint16_t rxqid) { struct ixl_pf *pf = iflib_get_softc(ctx); struct ixl_vsi *vsi = &pf->vsi; struct i40e_hw *hw = vsi->hw; struct ixl_rx_queue *rx_que = &vsi->rx_queues[rxqid]; ixl_enable_queue(hw, rx_que->msix - 1); return (0); } static int ixl_if_tx_queue_intr_enable(if_ctx_t ctx, uint16_t txqid) { struct ixl_pf *pf = iflib_get_softc(ctx); struct ixl_vsi *vsi = &pf->vsi; struct i40e_hw *hw = vsi->hw; struct ixl_tx_queue *tx_que = &vsi->tx_queues[txqid]; ixl_enable_queue(hw, tx_que->msix - 1); return (0); } static int ixl_if_tx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs, int ntxqs, int ntxqsets) { struct ixl_pf *pf = iflib_get_softc(ctx); struct ixl_vsi *vsi = &pf->vsi; if_softc_ctx_t scctx = vsi->shared; struct ixl_tx_queue *que; int i, j, error = 0; MPASS(scctx->isc_ntxqsets > 0); MPASS(ntxqs == 1); MPASS(scctx->isc_ntxqsets == ntxqsets); /* Allocate queue structure memory */ if (!(vsi->tx_queues = (struct ixl_tx_queue *) malloc(sizeof(struct ixl_tx_queue) *ntxqsets, M_IXL, M_NOWAIT | M_ZERO))) { device_printf(iflib_get_dev(ctx), "Unable to allocate TX ring memory\n"); return (ENOMEM); } for (i = 0, que = vsi->tx_queues; i < ntxqsets; i++, que++) { struct tx_ring *txr = &que->txr; txr->me = i; que->vsi = vsi; if (!vsi->enable_head_writeback) { /* Allocate report status array */ if (!(txr->tx_rsq = malloc(sizeof(qidx_t) * scctx->isc_ntxd[0], M_IXL, M_NOWAIT))) { device_printf(iflib_get_dev(ctx), "failed to allocate tx_rsq memory\n"); error = ENOMEM; goto fail; } /* Init report status array */ for (j = 0; j < scctx->isc_ntxd[0]; j++) txr->tx_rsq[j] = QIDX_INVALID; } /* get the virtual and physical address of the hardware queues */ txr->tail = I40E_QTX_TAIL(txr->me); txr->tx_base = (struct i40e_tx_desc *)vaddrs[i * ntxqs]; txr->tx_paddr = paddrs[i * ntxqs]; txr->que = que; } return (0); fail: ixl_if_queues_free(ctx); return (error); } static int ixl_if_rx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs, int nrxqs, int nrxqsets) { struct ixl_pf *pf = iflib_get_softc(ctx); struct ixl_vsi *vsi = &pf->vsi; struct ixl_rx_queue *que; int i, error = 0; #ifdef INVARIANTS if_softc_ctx_t scctx = vsi->shared; MPASS(scctx->isc_nrxqsets > 0); MPASS(nrxqs == 1); MPASS(scctx->isc_nrxqsets == nrxqsets); #endif /* Allocate queue structure memory */ if (!(vsi->rx_queues = (struct ixl_rx_queue *) malloc(sizeof(struct ixl_rx_queue) * nrxqsets, M_IXL, M_NOWAIT | M_ZERO))) { device_printf(iflib_get_dev(ctx), "Unable to allocate RX ring memory\n"); error = ENOMEM; goto fail; } for (i = 0, que = vsi->rx_queues; i < nrxqsets; i++, que++) { struct rx_ring *rxr = &que->rxr; rxr->me = i; que->vsi = vsi; /* get the virtual and physical address of the hardware queues */ rxr->tail = I40E_QRX_TAIL(rxr->me); rxr->rx_base = (union i40e_rx_desc *)vaddrs[i * nrxqs]; rxr->rx_paddr = paddrs[i * nrxqs]; rxr->que = que; } return (0); fail: ixl_if_queues_free(ctx); return (error); } static void ixl_if_queues_free(if_ctx_t ctx) { struct ixl_pf *pf = iflib_get_softc(ctx); struct ixl_vsi *vsi = &pf->vsi; if (!vsi->enable_head_writeback) { struct ixl_tx_queue *que; int i = 0; for (i = 0, que = vsi->tx_queues; i < vsi->num_tx_queues; i++, que++) { struct tx_ring *txr = &que->txr; if (txr->tx_rsq != NULL) { free(txr->tx_rsq, M_IXL); txr->tx_rsq = NULL; } } } if (vsi->tx_queues != NULL) { free(vsi->tx_queues, M_IXL); vsi->tx_queues = NULL; } if (vsi->rx_queues != NULL) { free(vsi->rx_queues, M_IXL); vsi->rx_queues = NULL; } if (!IXL_PF_IN_RECOVERY_MODE(pf)) sysctl_ctx_free(&vsi->sysctl_ctx); } void ixl_update_link_status(struct ixl_pf *pf) { struct ixl_vsi *vsi = &pf->vsi; struct i40e_hw *hw = &pf->hw; u64 baudrate; if (pf->link_up) { if (vsi->link_active == FALSE) { vsi->link_active = TRUE; baudrate = ixl_max_aq_speed_to_value(hw->phy.link_info.link_speed); iflib_link_state_change(vsi->ctx, LINK_STATE_UP, baudrate); ixl_link_up_msg(pf); #ifdef PCI_IOV ixl_broadcast_link_state(pf); #endif } } else { /* Link down */ if (vsi->link_active == TRUE) { vsi->link_active = FALSE; iflib_link_state_change(vsi->ctx, LINK_STATE_DOWN, 0); #ifdef PCI_IOV ixl_broadcast_link_state(pf); #endif } } } static void ixl_handle_lan_overflow_event(struct ixl_pf *pf, struct i40e_arq_event_info *e) { device_t dev = pf->dev; u32 rxq_idx, qtx_ctl; rxq_idx = (e->desc.params.external.param0 & I40E_PRTDCB_RUPTQ_RXQNUM_MASK) >> I40E_PRTDCB_RUPTQ_RXQNUM_SHIFT; qtx_ctl = e->desc.params.external.param1; device_printf(dev, "LAN overflow event: global rxq_idx %d\n", rxq_idx); device_printf(dev, "LAN overflow event: QTX_CTL 0x%08x\n", qtx_ctl); } static int ixl_process_adminq(struct ixl_pf *pf, u16 *pending) { enum i40e_status_code status = I40E_SUCCESS; struct i40e_arq_event_info event; struct i40e_hw *hw = &pf->hw; device_t dev = pf->dev; u16 opcode; u32 loop = 0, reg; event.buf_len = IXL_AQ_BUF_SZ; event.msg_buf = malloc(event.buf_len, M_IXL, M_NOWAIT | M_ZERO); if (!event.msg_buf) { device_printf(dev, "%s: Unable to allocate memory for Admin" " Queue event!\n", __func__); return (ENOMEM); } /* clean and process any events */ do { status = i40e_clean_arq_element(hw, &event, pending); if (status) break; opcode = LE16_TO_CPU(event.desc.opcode); ixl_dbg(pf, IXL_DBG_AQ, "Admin Queue event: %#06x\n", opcode); switch (opcode) { case i40e_aqc_opc_get_link_status: ixl_link_event(pf, &event); break; case i40e_aqc_opc_send_msg_to_pf: #ifdef PCI_IOV ixl_handle_vf_msg(pf, &event); #endif break; /* * This should only occur on no-drop queues, which * aren't currently configured. */ case i40e_aqc_opc_event_lan_overflow: ixl_handle_lan_overflow_event(pf, &event); break; default: break; } } while (*pending && (loop++ < IXL_ADM_LIMIT)); free(event.msg_buf, M_IXL); /* Re-enable admin queue interrupt cause */ reg = rd32(hw, I40E_PFINT_ICR0_ENA); reg |= I40E_PFINT_ICR0_ENA_ADMINQ_MASK; wr32(hw, I40E_PFINT_ICR0_ENA, reg); return (status); } static void ixl_if_update_admin_status(if_ctx_t ctx) { struct ixl_pf *pf = iflib_get_softc(ctx); struct i40e_hw *hw = &pf->hw; u16 pending; if (pf->state & IXL_PF_STATE_ADAPTER_RESETTING) ixl_handle_empr_reset(pf); /* * Admin Queue is shut down while handling reset. * Don't proceed if it hasn't been re-initialized * e.g due to an issue with new FW. */ if (!i40e_check_asq_alive(&pf->hw)) return; if (pf->state & IXL_PF_STATE_MDD_PENDING) ixl_handle_mdd_event(pf); ixl_process_adminq(pf, &pending); ixl_update_link_status(pf); /* * If there are still messages to process, reschedule ourselves. * Otherwise, re-enable our interrupt and go to sleep. */ if (pending > 0) iflib_admin_intr_deferred(ctx); else ixl_enable_intr0(hw); } static void ixl_if_multi_set(if_ctx_t ctx) { struct ixl_pf *pf = iflib_get_softc(ctx); struct ixl_vsi *vsi = &pf->vsi; struct i40e_hw *hw = vsi->hw; int mcnt, flags; int del_mcnt; IOCTL_DEBUGOUT("ixl_if_multi_set: begin"); mcnt = min(if_llmaddr_count(iflib_get_ifp(ctx)), MAX_MULTICAST_ADDR); /* Delete filters for removed multicast addresses */ del_mcnt = ixl_del_multi(vsi); vsi->num_macs -= del_mcnt; if (__predict_false(mcnt == MAX_MULTICAST_ADDR)) { i40e_aq_set_vsi_multicast_promiscuous(hw, vsi->seid, TRUE, NULL); return; } /* (re-)install filters for all mcast addresses */ /* XXX: This bypasses filter count tracking code! */ mcnt = if_foreach_llmaddr(iflib_get_ifp(ctx), ixl_mc_filter_apply, vsi); if (mcnt > 0) { vsi->num_macs += mcnt; flags = (IXL_FILTER_ADD | IXL_FILTER_USED | IXL_FILTER_MC); ixl_add_hw_filters(vsi, flags, mcnt); } ixl_dbg_filter(pf, "%s: filter mac total: %d\n", __func__, vsi->num_macs); IOCTL_DEBUGOUT("ixl_if_multi_set: end"); } static int ixl_if_mtu_set(if_ctx_t ctx, uint32_t mtu) { struct ixl_pf *pf = iflib_get_softc(ctx); struct ixl_vsi *vsi = &pf->vsi; IOCTL_DEBUGOUT("ioctl: SIOCSIFMTU (Set Interface MTU)"); if (mtu > IXL_MAX_FRAME - ETHER_HDR_LEN - ETHER_CRC_LEN - ETHER_VLAN_ENCAP_LEN) return (EINVAL); vsi->shared->isc_max_frame_size = mtu + ETHER_HDR_LEN + ETHER_CRC_LEN + ETHER_VLAN_ENCAP_LEN; return (0); } static void ixl_if_media_status(if_ctx_t ctx, struct ifmediareq *ifmr) { struct ixl_pf *pf = iflib_get_softc(ctx); struct i40e_hw *hw = &pf->hw; INIT_DEBUGOUT("ixl_media_status: begin"); ifmr->ifm_status = IFM_AVALID; ifmr->ifm_active = IFM_ETHER; if (!pf->link_up) { return; } ifmr->ifm_status |= IFM_ACTIVE; /* Hardware is always full-duplex */ ifmr->ifm_active |= IFM_FDX; switch (hw->phy.link_info.phy_type) { /* 100 M */ case I40E_PHY_TYPE_100BASE_TX: ifmr->ifm_active |= IFM_100_TX; break; /* 1 G */ case I40E_PHY_TYPE_1000BASE_T: ifmr->ifm_active |= IFM_1000_T; break; case I40E_PHY_TYPE_1000BASE_SX: ifmr->ifm_active |= IFM_1000_SX; break; case I40E_PHY_TYPE_1000BASE_LX: ifmr->ifm_active |= IFM_1000_LX; break; case I40E_PHY_TYPE_1000BASE_T_OPTICAL: ifmr->ifm_active |= IFM_1000_T; break; /* 2.5 G */ case I40E_PHY_TYPE_2_5GBASE_T: ifmr->ifm_active |= IFM_2500_T; break; /* 5 G */ case I40E_PHY_TYPE_5GBASE_T: ifmr->ifm_active |= IFM_5000_T; break; /* 10 G */ case I40E_PHY_TYPE_10GBASE_SFPP_CU: ifmr->ifm_active |= IFM_10G_TWINAX; break; case I40E_PHY_TYPE_10GBASE_SR: ifmr->ifm_active |= IFM_10G_SR; break; case I40E_PHY_TYPE_10GBASE_LR: ifmr->ifm_active |= IFM_10G_LR; break; case I40E_PHY_TYPE_10GBASE_T: ifmr->ifm_active |= IFM_10G_T; break; case I40E_PHY_TYPE_XAUI: case I40E_PHY_TYPE_XFI: ifmr->ifm_active |= IFM_10G_TWINAX; break; case I40E_PHY_TYPE_10GBASE_AOC: ifmr->ifm_active |= IFM_10G_AOC; break; /* 25 G */ case I40E_PHY_TYPE_25GBASE_KR: ifmr->ifm_active |= IFM_25G_KR; break; case I40E_PHY_TYPE_25GBASE_CR: ifmr->ifm_active |= IFM_25G_CR; break; case I40E_PHY_TYPE_25GBASE_SR: ifmr->ifm_active |= IFM_25G_SR; break; case I40E_PHY_TYPE_25GBASE_LR: ifmr->ifm_active |= IFM_25G_LR; break; case I40E_PHY_TYPE_25GBASE_AOC: ifmr->ifm_active |= IFM_25G_AOC; break; case I40E_PHY_TYPE_25GBASE_ACC: ifmr->ifm_active |= IFM_25G_ACC; break; /* 40 G */ case I40E_PHY_TYPE_40GBASE_CR4: case I40E_PHY_TYPE_40GBASE_CR4_CU: ifmr->ifm_active |= IFM_40G_CR4; break; case I40E_PHY_TYPE_40GBASE_SR4: ifmr->ifm_active |= IFM_40G_SR4; break; case I40E_PHY_TYPE_40GBASE_LR4: ifmr->ifm_active |= IFM_40G_LR4; break; case I40E_PHY_TYPE_XLAUI: ifmr->ifm_active |= IFM_OTHER; break; case I40E_PHY_TYPE_1000BASE_KX: ifmr->ifm_active |= IFM_1000_KX; break; case I40E_PHY_TYPE_SGMII: ifmr->ifm_active |= IFM_1000_SGMII; break; /* ERJ: What's the difference between these? */ case I40E_PHY_TYPE_10GBASE_CR1_CU: case I40E_PHY_TYPE_10GBASE_CR1: ifmr->ifm_active |= IFM_10G_CR1; break; case I40E_PHY_TYPE_10GBASE_KX4: ifmr->ifm_active |= IFM_10G_KX4; break; case I40E_PHY_TYPE_10GBASE_KR: ifmr->ifm_active |= IFM_10G_KR; break; case I40E_PHY_TYPE_SFI: ifmr->ifm_active |= IFM_10G_SFI; break; /* Our single 20G media type */ case I40E_PHY_TYPE_20GBASE_KR2: ifmr->ifm_active |= IFM_20G_KR2; break; case I40E_PHY_TYPE_40GBASE_KR4: ifmr->ifm_active |= IFM_40G_KR4; break; case I40E_PHY_TYPE_XLPPI: case I40E_PHY_TYPE_40GBASE_AOC: ifmr->ifm_active |= IFM_40G_XLPPI; break; /* Unknown to driver */ default: ifmr->ifm_active |= IFM_UNKNOWN; break; } /* Report flow control status as well */ if (hw->phy.link_info.an_info & I40E_AQ_LINK_PAUSE_TX) ifmr->ifm_active |= IFM_ETH_TXPAUSE; if (hw->phy.link_info.an_info & I40E_AQ_LINK_PAUSE_RX) ifmr->ifm_active |= IFM_ETH_RXPAUSE; } static int ixl_if_media_change(if_ctx_t ctx) { struct ifmedia *ifm = iflib_get_media(ctx); INIT_DEBUGOUT("ixl_media_change: begin"); if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER) return (EINVAL); if_printf(iflib_get_ifp(ctx), "Media change is not supported.\n"); return (ENODEV); } static int ixl_if_promisc_set(if_ctx_t ctx, int flags) { struct ixl_pf *pf = iflib_get_softc(ctx); struct ixl_vsi *vsi = &pf->vsi; struct ifnet *ifp = iflib_get_ifp(ctx); struct i40e_hw *hw = vsi->hw; int err; bool uni = FALSE, multi = FALSE; if (flags & IFF_PROMISC) uni = multi = TRUE; else if (flags & IFF_ALLMULTI || if_llmaddr_count(ifp) >= MAX_MULTICAST_ADDR) multi = TRUE; err = i40e_aq_set_vsi_unicast_promiscuous(hw, vsi->seid, uni, NULL, true); if (err) return (err); err = i40e_aq_set_vsi_multicast_promiscuous(hw, vsi->seid, multi, NULL); return (err); } static void ixl_if_timer(if_ctx_t ctx, uint16_t qid) { struct ixl_pf *pf = iflib_get_softc(ctx); if (qid != 0) return; ixl_update_stats_counters(pf); } static void ixl_if_vlan_register(if_ctx_t ctx, u16 vtag) { struct ixl_pf *pf = iflib_get_softc(ctx); struct ixl_vsi *vsi = &pf->vsi; struct i40e_hw *hw = vsi->hw; if ((vtag == 0) || (vtag > 4095)) /* Invalid */ return; ++vsi->num_vlans; ixl_add_filter(vsi, hw->mac.addr, vtag); } static void ixl_if_vlan_unregister(if_ctx_t ctx, u16 vtag) { struct ixl_pf *pf = iflib_get_softc(ctx); struct ixl_vsi *vsi = &pf->vsi; struct i40e_hw *hw = vsi->hw; if ((vtag == 0) || (vtag > 4095)) /* Invalid */ return; --vsi->num_vlans; ixl_del_filter(vsi, hw->mac.addr, vtag); } static uint64_t ixl_if_get_counter(if_ctx_t ctx, ift_counter cnt) { struct ixl_pf *pf = iflib_get_softc(ctx); struct ixl_vsi *vsi = &pf->vsi; if_t ifp = iflib_get_ifp(ctx); switch (cnt) { case IFCOUNTER_IPACKETS: return (vsi->ipackets); case IFCOUNTER_IERRORS: return (vsi->ierrors); case IFCOUNTER_OPACKETS: return (vsi->opackets); case IFCOUNTER_OERRORS: return (vsi->oerrors); case IFCOUNTER_COLLISIONS: /* Collisions are by standard impossible in 40G/10G Ethernet */ return (0); case IFCOUNTER_IBYTES: return (vsi->ibytes); case IFCOUNTER_OBYTES: return (vsi->obytes); case IFCOUNTER_IMCASTS: return (vsi->imcasts); case IFCOUNTER_OMCASTS: return (vsi->omcasts); case IFCOUNTER_IQDROPS: return (vsi->iqdrops); case IFCOUNTER_OQDROPS: return (vsi->oqdrops); case IFCOUNTER_NOPROTO: return (vsi->noproto); default: return (if_get_counter_default(ifp, cnt)); } } #ifdef PCI_IOV static void ixl_if_vflr_handle(if_ctx_t ctx) { struct ixl_pf *pf = iflib_get_softc(ctx); ixl_handle_vflr(pf); } #endif static int ixl_if_i2c_req(if_ctx_t ctx, struct ifi2creq *req) { struct ixl_pf *pf = iflib_get_softc(ctx); if (pf->read_i2c_byte == NULL) return (EINVAL); for (int i = 0; i < req->len; i++) if (pf->read_i2c_byte(pf, req->offset + i, req->dev_addr, &req->data[i])) return (EIO); return (0); } static int ixl_if_priv_ioctl(if_ctx_t ctx, u_long command, caddr_t data) { struct ixl_pf *pf = iflib_get_softc(ctx); struct ifdrv *ifd = (struct ifdrv *)data; int error = 0; /* * The iflib_if_ioctl forwards SIOCxDRVSPEC and SIOGPRIVATE_0 without * performing privilege checks. It is important that this function * perform the necessary checks for commands which should only be * executed by privileged threads. */ switch(command) { case SIOCGDRVSPEC: case SIOCSDRVSPEC: /* NVM update command */ if (ifd->ifd_cmd == I40E_NVM_ACCESS) { error = priv_check(curthread, PRIV_DRIVER); if (error) break; error = ixl_handle_nvmupd_cmd(pf, ifd); } else { error = EINVAL; } break; default: error = EOPNOTSUPP; } return (error); } /* ixl_if_needs_restart - Tell iflib when the driver needs to be reinitialized * @ctx: iflib context * @event: event code to check * * Defaults to returning false for every event. * * @returns true if iflib needs to reinit the interface, false otherwise */ static bool ixl_if_needs_restart(if_ctx_t ctx __unused, enum iflib_restart_event event) { switch (event) { case IFLIB_RESTART_VLAN_CONFIG: default: return (false); } } static u_int ixl_mc_filter_apply(void *arg, struct sockaddr_dl *sdl, u_int count __unused) { struct ixl_vsi *vsi = arg; ixl_add_mc_filter(vsi, (u8*)LLADDR(sdl)); return (1); } /* * Sanity check and save off tunable values. */ static void ixl_save_pf_tunables(struct ixl_pf *pf) { device_t dev = pf->dev; /* Save tunable information */ #ifdef IXL_DEBUG_FC pf->enable_tx_fc_filter = ixl_enable_tx_fc_filter; #endif #ifdef IXL_DEBUG pf->recovery_mode = ixl_debug_recovery_mode; #endif pf->dbg_mask = ixl_core_debug_mask; pf->hw.debug_mask = ixl_shared_debug_mask; pf->vsi.enable_head_writeback = !!(ixl_enable_head_writeback); pf->enable_vf_loopback = !!(ixl_enable_vf_loopback); #if 0 pf->dynamic_rx_itr = ixl_dynamic_rx_itr; pf->dynamic_tx_itr = ixl_dynamic_tx_itr; #endif if (ixl_i2c_access_method > 3 || ixl_i2c_access_method < 0) pf->i2c_access_method = 0; else pf->i2c_access_method = ixl_i2c_access_method; if (ixl_tx_itr < 0 || ixl_tx_itr > IXL_MAX_ITR) { device_printf(dev, "Invalid tx_itr value of %d set!\n", ixl_tx_itr); device_printf(dev, "tx_itr must be between %d and %d, " "inclusive\n", 0, IXL_MAX_ITR); device_printf(dev, "Using default value of %d instead\n", IXL_ITR_4K); pf->tx_itr = IXL_ITR_4K; } else pf->tx_itr = ixl_tx_itr; if (ixl_rx_itr < 0 || ixl_rx_itr > IXL_MAX_ITR) { device_printf(dev, "Invalid rx_itr value of %d set!\n", ixl_rx_itr); device_printf(dev, "rx_itr must be between %d and %d, " "inclusive\n", 0, IXL_MAX_ITR); device_printf(dev, "Using default value of %d instead\n", IXL_ITR_8K); pf->rx_itr = IXL_ITR_8K; } else pf->rx_itr = ixl_rx_itr; } diff --git a/sys/dev/mgb/if_mgb.c b/sys/dev/mgb/if_mgb.c index ce4d43b8d34c..3f7a9d1a9dbb 100644 --- a/sys/dev/mgb/if_mgb.c +++ b/sys/dev/mgb/if_mgb.c @@ -1,1637 +1,1637 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2019 The FreeBSD Foundation, Inc. * * This driver was written by Gerald ND Aryeetey * under sponsorship from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); /* * Microchip LAN7430/LAN7431 PCIe to Gigabit Ethernet Controller driver. * * Product information: * LAN7430 https://www.microchip.com/wwwproducts/en/LAN7430 * - Integrated IEEE 802.3 compliant PHY * LAN7431 https://www.microchip.com/wwwproducts/en/LAN7431 * - RGMII Interface * * This driver uses the iflib interface and the default 'ukphy' PHY driver. * * UNIMPLEMENTED FEATURES * ---------------------- * A number of features supported by LAN743X device are not yet implemented in * this driver: * * - Multiple (up to 4) RX queues support * - Just needs to remove asserts and malloc multiple `rx_ring_data` * structs based on ncpus. * - RX/TX Checksum Offloading support * - VLAN support * - Receive Packet Filtering (Multicast Perfect/Hash Address) support * - Wake on LAN (WoL) support * - TX LSO support * - Receive Side Scaling (RSS) support * - Debugging Capabilities: * - Could include MAC statistics and * error status registers in sysctl. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "ifdi_if.h" #include "miibus_if.h" static pci_vendor_info_t mgb_vendor_info_array[] = { PVID(MGB_MICROCHIP_VENDOR_ID, MGB_LAN7430_DEVICE_ID, "Microchip LAN7430 PCIe Gigabit Ethernet Controller"), PVID(MGB_MICROCHIP_VENDOR_ID, MGB_LAN7431_DEVICE_ID, "Microchip LAN7431 PCIe Gigabit Ethernet Controller"), PVID_END }; /* Device methods */ static device_register_t mgb_register; /* IFLIB methods */ static ifdi_attach_pre_t mgb_attach_pre; static ifdi_attach_post_t mgb_attach_post; static ifdi_detach_t mgb_detach; static ifdi_tx_queues_alloc_t mgb_tx_queues_alloc; static ifdi_rx_queues_alloc_t mgb_rx_queues_alloc; static ifdi_queues_free_t mgb_queues_free; static ifdi_init_t mgb_init; static ifdi_stop_t mgb_stop; static ifdi_msix_intr_assign_t mgb_msix_intr_assign; static ifdi_tx_queue_intr_enable_t mgb_tx_queue_intr_enable; static ifdi_rx_queue_intr_enable_t mgb_rx_queue_intr_enable; static ifdi_intr_enable_t mgb_intr_enable_all; static ifdi_intr_disable_t mgb_intr_disable_all; /* IFLIB_TXRX methods */ static int mgb_isc_txd_encap(void *, if_pkt_info_t); static void mgb_isc_txd_flush(void *, uint16_t, qidx_t); static int mgb_isc_txd_credits_update(void *, uint16_t, bool); static int mgb_isc_rxd_available(void *, uint16_t, qidx_t, qidx_t); static int mgb_isc_rxd_pkt_get(void *, if_rxd_info_t); static void mgb_isc_rxd_refill(void *, if_rxd_update_t); static void mgb_isc_rxd_flush(void *, uint16_t, uint8_t, qidx_t); /* Interrupts */ static driver_filter_t mgb_legacy_intr; static driver_filter_t mgb_admin_intr; static driver_filter_t mgb_rxq_intr; static bool mgb_intr_test(struct mgb_softc *); /* MII methods */ static miibus_readreg_t mgb_miibus_readreg; static miibus_writereg_t mgb_miibus_writereg; static miibus_linkchg_t mgb_miibus_linkchg; static miibus_statchg_t mgb_miibus_statchg; static int mgb_media_change(if_t); static void mgb_media_status(if_t, struct ifmediareq *); /* Helper/Test functions */ static int mgb_test_bar(struct mgb_softc *); static int mgb_alloc_regs(struct mgb_softc *); static int mgb_release_regs(struct mgb_softc *); static void mgb_get_ethaddr(struct mgb_softc *, struct ether_addr *); static int mgb_wait_for_bits(struct mgb_softc *, int, int, int); /* H/W init, reset and teardown helpers */ static int mgb_hw_init(struct mgb_softc *); static int mgb_hw_teardown(struct mgb_softc *); static int mgb_hw_reset(struct mgb_softc *); static int mgb_mac_init(struct mgb_softc *); static int mgb_dmac_reset(struct mgb_softc *); static int mgb_phy_reset(struct mgb_softc *); static int mgb_dma_init(struct mgb_softc *); static int mgb_dma_tx_ring_init(struct mgb_softc *, int); static int mgb_dma_rx_ring_init(struct mgb_softc *, int); static int mgb_dmac_control(struct mgb_softc *, int, int, enum mgb_dmac_cmd); static int mgb_fct_control(struct mgb_softc *, int, int, enum mgb_fct_cmd); /********************************************************************* * FreeBSD Device Interface Entry Points *********************************************************************/ static device_method_t mgb_methods[] = { /* Device interface */ DEVMETHOD(device_register, mgb_register), DEVMETHOD(device_probe, iflib_device_probe), DEVMETHOD(device_attach, iflib_device_attach), DEVMETHOD(device_detach, iflib_device_detach), DEVMETHOD(device_shutdown, iflib_device_shutdown), DEVMETHOD(device_suspend, iflib_device_suspend), DEVMETHOD(device_resume, iflib_device_resume), /* MII Interface */ DEVMETHOD(miibus_readreg, mgb_miibus_readreg), DEVMETHOD(miibus_writereg, mgb_miibus_writereg), DEVMETHOD(miibus_linkchg, mgb_miibus_linkchg), DEVMETHOD(miibus_statchg, mgb_miibus_statchg), DEVMETHOD_END }; static driver_t mgb_driver = { "mgb", mgb_methods, sizeof(struct mgb_softc) }; devclass_t mgb_devclass; DRIVER_MODULE(mgb, pci, mgb_driver, mgb_devclass, NULL, NULL); IFLIB_PNP_INFO(pci, mgb, mgb_vendor_info_array); MODULE_VERSION(mgb, 1); #if 0 /* MIIBUS_DEBUG */ /* If MIIBUS debug stuff is in attach then order matters. Use below instead. */ DRIVER_MODULE_ORDERED(miibus, mgb, miibus_driver, miibus_devclass, NULL, NULL, SI_ORDER_ANY); #endif /* MIIBUS_DEBUG */ DRIVER_MODULE(miibus, mgb, miibus_driver, miibus_devclass, NULL, NULL); MODULE_DEPEND(mgb, pci, 1, 1, 1); MODULE_DEPEND(mgb, ether, 1, 1, 1); MODULE_DEPEND(mgb, miibus, 1, 1, 1); MODULE_DEPEND(mgb, iflib, 1, 1, 1); static device_method_t mgb_iflib_methods[] = { DEVMETHOD(ifdi_attach_pre, mgb_attach_pre), DEVMETHOD(ifdi_attach_post, mgb_attach_post), DEVMETHOD(ifdi_detach, mgb_detach), DEVMETHOD(ifdi_init, mgb_init), DEVMETHOD(ifdi_stop, mgb_stop), DEVMETHOD(ifdi_tx_queues_alloc, mgb_tx_queues_alloc), DEVMETHOD(ifdi_rx_queues_alloc, mgb_rx_queues_alloc), DEVMETHOD(ifdi_queues_free, mgb_queues_free), DEVMETHOD(ifdi_msix_intr_assign, mgb_msix_intr_assign), DEVMETHOD(ifdi_tx_queue_intr_enable, mgb_tx_queue_intr_enable), DEVMETHOD(ifdi_rx_queue_intr_enable, mgb_rx_queue_intr_enable), DEVMETHOD(ifdi_intr_enable, mgb_intr_enable_all), DEVMETHOD(ifdi_intr_disable, mgb_intr_disable_all), #if 0 /* Not yet implemented IFLIB methods */ /* * Set multicast addresses, mtu and promiscuous mode */ DEVMETHOD(ifdi_multi_set, mgb_multi_set), DEVMETHOD(ifdi_mtu_set, mgb_mtu_set), DEVMETHOD(ifdi_promisc_set, mgb_promisc_set), /* * Needed for VLAN support */ DEVMETHOD(ifdi_vlan_register, mgb_vlan_register), DEVMETHOD(ifdi_vlan_unregister, mgb_vlan_unregister), /* * Needed for WOL support * at the very least. */ DEVMETHOD(ifdi_shutdown, mgb_shutdown), DEVMETHOD(ifdi_suspend, mgb_suspend), DEVMETHOD(ifdi_resume, mgb_resume), #endif /* UNUSED_IFLIB_METHODS */ DEVMETHOD_END }; static driver_t mgb_iflib_driver = { "mgb", mgb_iflib_methods, sizeof(struct mgb_softc) }; struct if_txrx mgb_txrx = { .ift_txd_encap = mgb_isc_txd_encap, .ift_txd_flush = mgb_isc_txd_flush, .ift_txd_credits_update = mgb_isc_txd_credits_update, .ift_rxd_available = mgb_isc_rxd_available, .ift_rxd_pkt_get = mgb_isc_rxd_pkt_get, .ift_rxd_refill = mgb_isc_rxd_refill, .ift_rxd_flush = mgb_isc_rxd_flush, .ift_legacy_intr = mgb_legacy_intr }; struct if_shared_ctx mgb_sctx_init = { .isc_magic = IFLIB_MAGIC, .isc_q_align = PAGE_SIZE, .isc_admin_intrcnt = 1, .isc_flags = IFLIB_DRIVER_MEDIA /* | IFLIB_HAS_RXCQ | IFLIB_HAS_TXCQ*/, .isc_vendor_info = mgb_vendor_info_array, .isc_driver_version = "1", .isc_driver = &mgb_iflib_driver, /* 2 queues per set for TX and RX (ring queue, head writeback queue) */ .isc_ntxqs = 2, .isc_tx_maxsize = MGB_DMA_MAXSEGS * MCLBYTES, /* .isc_tx_nsegments = MGB_DMA_MAXSEGS, */ .isc_tx_maxsegsize = MCLBYTES, .isc_ntxd_min = {1, 1}, /* Will want to make this bigger */ .isc_ntxd_max = {MGB_DMA_RING_SIZE, 1}, .isc_ntxd_default = {MGB_DMA_RING_SIZE, 1}, .isc_nrxqs = 2, .isc_rx_maxsize = MCLBYTES, .isc_rx_nsegments = 1, .isc_rx_maxsegsize = MCLBYTES, .isc_nrxd_min = {1, 1}, /* Will want to make this bigger */ .isc_nrxd_max = {MGB_DMA_RING_SIZE, 1}, .isc_nrxd_default = {MGB_DMA_RING_SIZE, 1}, .isc_nfl = 1, /*one free list since there is only one queue */ #if 0 /* UNUSED_CTX */ .isc_tso_maxsize = MGB_TSO_MAXSIZE + sizeof(struct ether_vlan_header), .isc_tso_maxsegsize = MGB_TX_MAXSEGSIZE, #endif /* UNUSED_CTX */ }; /*********************************************************************/ static void * mgb_register(device_t dev) { return (&mgb_sctx_init); } static int mgb_attach_pre(if_ctx_t ctx) { struct mgb_softc *sc; if_softc_ctx_t scctx; int error, phyaddr, rid; struct ether_addr hwaddr; struct mii_data *miid; sc = iflib_get_softc(ctx); sc->ctx = ctx; sc->dev = iflib_get_dev(ctx); scctx = iflib_get_softc_ctx(ctx); /* IFLIB required setup */ scctx->isc_txrx = &mgb_txrx; scctx->isc_tx_nsegments = MGB_DMA_MAXSEGS; /* Ring desc queues */ scctx->isc_txqsizes[0] = sizeof(struct mgb_ring_desc) * scctx->isc_ntxd[0]; scctx->isc_rxqsizes[0] = sizeof(struct mgb_ring_desc) * scctx->isc_nrxd[0]; /* Head WB queues */ scctx->isc_txqsizes[1] = sizeof(uint32_t) * scctx->isc_ntxd[1]; scctx->isc_rxqsizes[1] = sizeof(uint32_t) * scctx->isc_nrxd[1]; /* XXX: Must have 1 txqset, but can have up to 4 rxqsets */ scctx->isc_nrxqsets = 1; scctx->isc_ntxqsets = 1; /* scctx->isc_tx_csum_flags = (CSUM_TCP | CSUM_UDP) | (CSUM_TCP_IPV6 | CSUM_UDP_IPV6) | CSUM_TSO */ scctx->isc_tx_csum_flags = 0; scctx->isc_capabilities = scctx->isc_capenable = 0; #if 0 /* * CSUM, TSO and VLAN support are TBD */ IFCAP_TXCSUM | IFCAP_TXCSUM_IPV6 | IFCAP_TSO4 | IFCAP_TSO6 | IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6 | IFCAP_VLAN_MTU | IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWCSUM | IFCAP_VLAN_HWTSO | IFCAP_JUMBO_MTU; scctx->isc_capabilities |= IFCAP_LRO | IFCAP_VLAN_HWFILTER; #endif /* get the BAR */ error = mgb_alloc_regs(sc); if (error != 0) { device_printf(sc->dev, "Unable to allocate bus resource: registers.\n"); goto fail; } error = mgb_test_bar(sc); if (error != 0) goto fail; error = mgb_hw_init(sc); if (error != 0) { device_printf(sc->dev, "MGB device init failed. (err: %d)\n", error); goto fail; } switch (pci_get_device(sc->dev)) { case MGB_LAN7430_DEVICE_ID: phyaddr = 1; break; case MGB_LAN7431_DEVICE_ID: default: phyaddr = MII_PHY_ANY; break; } /* XXX: Would be nice(r) if locked methods were here */ error = mii_attach(sc->dev, &sc->miibus, iflib_get_ifp(ctx), mgb_media_change, mgb_media_status, BMSR_DEFCAPMASK, phyaddr, MII_OFFSET_ANY, MIIF_DOPAUSE); if (error != 0) { device_printf(sc->dev, "Failed to attach MII interface\n"); goto fail; } miid = device_get_softc(sc->miibus); scctx->isc_media = &miid->mii_media; scctx->isc_msix_bar = pci_msix_table_bar(sc->dev); /** Setup PBA BAR **/ rid = pci_msix_pba_bar(sc->dev); if (rid != scctx->isc_msix_bar) { sc->pba = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY, &rid, RF_ACTIVE); if (sc->pba == NULL) { error = ENXIO; device_printf(sc->dev, "Failed to setup PBA BAR\n"); goto fail; } } mgb_get_ethaddr(sc, &hwaddr); if (ETHER_IS_BROADCAST(hwaddr.octet) || ETHER_IS_MULTICAST(hwaddr.octet) || ETHER_IS_ZERO(hwaddr.octet)) ether_gen_addr(iflib_get_ifp(ctx), &hwaddr); /* * XXX: if the MAC address was generated the linux driver * writes it back to the device. */ iflib_set_mac(ctx, hwaddr.octet); /* Map all vectors to vector 0 (admin interrupts) by default. */ CSR_WRITE_REG(sc, MGB_INTR_VEC_RX_MAP, 0); CSR_WRITE_REG(sc, MGB_INTR_VEC_TX_MAP, 0); CSR_WRITE_REG(sc, MGB_INTR_VEC_OTHER_MAP, 0); return (0); fail: mgb_detach(ctx); return (error); } static int mgb_attach_post(if_ctx_t ctx) { struct mgb_softc *sc; sc = iflib_get_softc(ctx); device_printf(sc->dev, "Interrupt test: %s\n", (mgb_intr_test(sc) ? "PASS" : "FAIL")); return (0); } static int mgb_detach(if_ctx_t ctx) { struct mgb_softc *sc; int error; sc = iflib_get_softc(ctx); /* XXX: Should report errors but still detach everything. */ error = mgb_hw_teardown(sc); /* Release IRQs */ iflib_irq_free(ctx, &sc->rx_irq); iflib_irq_free(ctx, &sc->admin_irq); if (sc->miibus != NULL) device_delete_child(sc->dev, sc->miibus); if (sc->pba != NULL) error = bus_release_resource(sc->dev, SYS_RES_MEMORY, rman_get_rid(sc->pba), sc->pba); sc->pba = NULL; error = mgb_release_regs(sc); return (error); } static int mgb_media_change(if_t ifp) { struct mii_data *miid; struct mii_softc *miisc; struct mgb_softc *sc; if_ctx_t ctx; int needs_reset; ctx = if_getsoftc(ifp); sc = iflib_get_softc(ctx); miid = device_get_softc(sc->miibus); LIST_FOREACH(miisc, &miid->mii_phys, mii_list) PHY_RESET(miisc); needs_reset = mii_mediachg(miid); if (needs_reset != 0) ifp->if_init(ctx); return (needs_reset); } static void mgb_media_status(if_t ifp, struct ifmediareq *ifmr) { struct mgb_softc *sc; struct mii_data *miid; sc = iflib_get_softc(if_getsoftc(ifp)); miid = device_get_softc(sc->miibus); if ((if_getflags(ifp) & IFF_UP) == 0) return; mii_pollstat(miid); ifmr->ifm_active = miid->mii_media_active; ifmr->ifm_status = miid->mii_media_status; } static int mgb_tx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs, int ntxqs, int ntxqsets) { struct mgb_softc *sc; struct mgb_ring_data *rdata; int q; sc = iflib_get_softc(ctx); KASSERT(ntxqsets == 1, ("ntxqsets = %d", ntxqsets)); rdata = &sc->tx_ring_data; for (q = 0; q < ntxqsets; q++) { KASSERT(ntxqs == 2, ("ntxqs = %d", ntxqs)); /* Ring */ rdata->ring = (struct mgb_ring_desc *) vaddrs[q * ntxqs + 0]; rdata->ring_bus_addr = paddrs[q * ntxqs + 0]; /* Head WB */ rdata->head_wb = (uint32_t *) vaddrs[q * ntxqs + 1]; rdata->head_wb_bus_addr = paddrs[q * ntxqs + 1]; } return 0; } static int mgb_rx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs, int nrxqs, int nrxqsets) { struct mgb_softc *sc; struct mgb_ring_data *rdata; int q; sc = iflib_get_softc(ctx); KASSERT(nrxqsets == 1, ("nrxqsets = %d", nrxqsets)); rdata = &sc->rx_ring_data; for (q = 0; q < nrxqsets; q++) { KASSERT(nrxqs == 2, ("nrxqs = %d", nrxqs)); /* Ring */ rdata->ring = (struct mgb_ring_desc *) vaddrs[q * nrxqs + 0]; rdata->ring_bus_addr = paddrs[q * nrxqs + 0]; /* Head WB */ rdata->head_wb = (uint32_t *) vaddrs[q * nrxqs + 1]; rdata->head_wb_bus_addr = paddrs[q * nrxqs + 1]; } return 0; } static void mgb_queues_free(if_ctx_t ctx) { struct mgb_softc *sc; sc = iflib_get_softc(ctx); memset(&sc->rx_ring_data, 0, sizeof(struct mgb_ring_data)); memset(&sc->tx_ring_data, 0, sizeof(struct mgb_ring_data)); } static void mgb_init(if_ctx_t ctx) { struct mgb_softc *sc; struct mii_data *miid; int error; sc = iflib_get_softc(ctx); miid = device_get_softc(sc->miibus); device_printf(sc->dev, "running init ...\n"); mgb_dma_init(sc); /* XXX: Turn off perfect filtering, turn on (broad|multi|uni)cast rx */ CSR_CLEAR_REG(sc, MGB_RFE_CTL, MGB_RFE_ALLOW_PERFECT_FILTER); CSR_UPDATE_REG(sc, MGB_RFE_CTL, MGB_RFE_ALLOW_BROADCAST | MGB_RFE_ALLOW_UNICAST | MGB_RFE_ALLOW_UNICAST); error = mii_mediachg(miid); KASSERT(!error, ("mii_mediachg returned: %d", error)); } #ifdef DEBUG static void mgb_dump_some_stats(struct mgb_softc *sc) { int i; int first_stat = 0x1200; int last_stat = 0x12FC; for (i = first_stat; i <= last_stat; i += 4) if (CSR_READ_REG(sc, i) != 0) device_printf(sc->dev, "0x%04x: 0x%08x\n", i, CSR_READ_REG(sc, i)); char *stat_names[] = { "MAC_ERR_STS ", "FCT_INT_STS ", "DMAC_CFG ", "DMAC_CMD ", "DMAC_INT_STS ", "DMAC_INT_EN ", "DMAC_RX_ERR_STS0 ", "DMAC_RX_ERR_STS1 ", "DMAC_RX_ERR_STS2 ", "DMAC_RX_ERR_STS3 ", "INT_STS ", "INT_EN ", "INT_VEC_EN ", "INT_VEC_MAP0 ", "INT_VEC_MAP1 ", "INT_VEC_MAP2 ", "TX_HEAD0", "TX_TAIL0", "DMAC_TX_ERR_STS0 ", NULL }; int stats[] = { 0x114, 0xA0, 0xC00, 0xC0C, 0xC10, 0xC14, 0xC60, 0xCA0, 0xCE0, 0xD20, 0x780, 0x788, 0x794, 0x7A0, 0x7A4, 0x780, 0xD58, 0xD5C, 0xD60, 0x0 }; i = 0; printf("==============================\n"); while (stats[i++]) device_printf(sc->dev, "%s at offset 0x%04x = 0x%08x\n", stat_names[i - 1], stats[i - 1], CSR_READ_REG(sc, stats[i - 1])); printf("==== TX RING DESCS ====\n"); for (i = 0; i < MGB_DMA_RING_SIZE; i++) device_printf(sc->dev, "ring[%d].data0=0x%08x\n" "ring[%d].data1=0x%08x\n" "ring[%d].data2=0x%08x\n" "ring[%d].data3=0x%08x\n", i, sc->tx_ring_data.ring[i].ctl, i, sc->tx_ring_data.ring[i].addr.low, i, sc->tx_ring_data.ring[i].addr.high, i, sc->tx_ring_data.ring[i].sts); device_printf(sc->dev, "==== DUMP_TX_DMA_RAM ====\n"); int i; CSR_WRITE_REG(sc, 0x24, 0xF); // DP_SEL & TX_RAM_0 for (i = 0; i < 128; i++) { CSR_WRITE_REG(sc, 0x2C, i); // DP_ADDR CSR_WRITE_REG(sc, 0x28, 0); // DP_CMD while ((CSR_READ_REG(sc, 0x24) & 0x80000000) == 0) // DP_SEL & READY DELAY(1000); device_printf(sc->dev, "DMAC_TX_RAM_0[%u]=%08x\n", i, CSR_READ_REG(sc, 0x30)); // DP_DATA } } #endif static void mgb_stop(if_ctx_t ctx) { struct mgb_softc *sc ; if_softc_ctx_t scctx; int i; sc = iflib_get_softc(ctx); scctx = iflib_get_softc_ctx(ctx); /* XXX: Could potentially timeout */ for (i = 0; i < scctx->isc_nrxqsets; i++) { mgb_dmac_control(sc, MGB_DMAC_RX_START, 0, DMAC_STOP); mgb_fct_control(sc, MGB_FCT_RX_CTL, 0, FCT_DISABLE); } for (i = 0; i < scctx->isc_ntxqsets; i++) { mgb_dmac_control(sc, MGB_DMAC_TX_START, 0, DMAC_STOP); mgb_fct_control(sc, MGB_FCT_TX_CTL, 0, FCT_DISABLE); } } static int mgb_legacy_intr(void *xsc) { struct mgb_softc *sc; sc = xsc; iflib_admin_intr_deferred(sc->ctx); return (FILTER_HANDLED); } static int mgb_rxq_intr(void *xsc) { struct mgb_softc *sc; if_softc_ctx_t scctx; uint32_t intr_sts, intr_en; int qidx; sc = xsc; scctx = iflib_get_softc_ctx(sc->ctx); intr_sts = CSR_READ_REG(sc, MGB_INTR_STS); intr_en = CSR_READ_REG(sc, MGB_INTR_ENBL_SET); intr_sts &= intr_en; for (qidx = 0; qidx < scctx->isc_nrxqsets; qidx++) { if ((intr_sts & MGB_INTR_STS_RX(qidx))){ CSR_WRITE_REG(sc, MGB_INTR_ENBL_CLR, MGB_INTR_STS_RX(qidx)); CSR_WRITE_REG(sc, MGB_INTR_STS, MGB_INTR_STS_RX(qidx)); } } return (FILTER_SCHEDULE_THREAD); } static int mgb_admin_intr(void *xsc) { struct mgb_softc *sc; if_softc_ctx_t scctx; uint32_t intr_sts, intr_en; int qidx; sc = xsc; scctx = iflib_get_softc_ctx(sc->ctx); intr_sts = CSR_READ_REG(sc, MGB_INTR_STS); intr_en = CSR_READ_REG(sc, MGB_INTR_ENBL_SET); intr_sts &= intr_en; /* * NOTE: Debugging printfs here * will likely cause interrupt test failure. */ /* TODO: shouldn't continue if suspended */ if ((intr_sts & MGB_INTR_STS_ANY) == 0) { device_printf(sc->dev, "non-mgb interrupt triggered.\n"); return (FILTER_SCHEDULE_THREAD); } if ((intr_sts & MGB_INTR_STS_TEST) != 0) { sc->isr_test_flag = true; CSR_WRITE_REG(sc, MGB_INTR_STS, MGB_INTR_STS_TEST); return (FILTER_HANDLED); } if ((intr_sts & MGB_INTR_STS_RX_ANY) != 0) { for (qidx = 0; qidx < scctx->isc_nrxqsets; qidx++) { if ((intr_sts & MGB_INTR_STS_RX(qidx))){ iflib_rx_intr_deferred(sc->ctx, qidx); } } return (FILTER_HANDLED); } /* XXX: TX interrupts should not occur */ if ((intr_sts & MGB_INTR_STS_TX_ANY) != 0) { for (qidx = 0; qidx < scctx->isc_ntxqsets; qidx++) { if ((intr_sts & MGB_INTR_STS_RX(qidx))) { /* clear the interrupt sts and run handler */ CSR_WRITE_REG(sc, MGB_INTR_ENBL_CLR, MGB_INTR_STS_TX(qidx)); CSR_WRITE_REG(sc, MGB_INTR_STS, MGB_INTR_STS_TX(qidx)); iflib_tx_intr_deferred(sc->ctx, qidx); } } return (FILTER_HANDLED); } return (FILTER_SCHEDULE_THREAD); } static int mgb_msix_intr_assign(if_ctx_t ctx, int msix) { struct mgb_softc *sc; if_softc_ctx_t scctx; int error, i, vectorid; char irq_name[16]; sc = iflib_get_softc(ctx); scctx = iflib_get_softc_ctx(ctx); KASSERT(scctx->isc_nrxqsets == 1 && scctx->isc_ntxqsets == 1, ("num rxqsets/txqsets != 1 ")); /* * First vector should be admin interrupts, others vectors are TX/RX * * RIDs start at 1, and vector ids start at 0. */ vectorid = 0; error = iflib_irq_alloc_generic(ctx, &sc->admin_irq, vectorid + 1, IFLIB_INTR_ADMIN, mgb_admin_intr, sc, 0, "admin"); if (error) { device_printf(sc->dev, "Failed to register admin interrupt handler\n"); return (error); } for (i = 0; i < scctx->isc_nrxqsets; i++) { vectorid++; snprintf(irq_name, sizeof(irq_name), "rxq%d", i); error = iflib_irq_alloc_generic(ctx, &sc->rx_irq, vectorid + 1, - IFLIB_INTR_RX, mgb_rxq_intr, sc, i, irq_name); + IFLIB_INTR_RXTX, mgb_rxq_intr, sc, i, irq_name); if (error) { device_printf(sc->dev, "Failed to register rxq %d interrupt handler\n", i); return (error); } CSR_UPDATE_REG(sc, MGB_INTR_VEC_RX_MAP, MGB_INTR_VEC_MAP(vectorid, i)); } /* Not actually mapping hw TX interrupts ... */ for (i = 0; i < scctx->isc_ntxqsets; i++) { snprintf(irq_name, sizeof(irq_name), "txq%d", i); iflib_softirq_alloc_generic(ctx, NULL, IFLIB_INTR_TX, NULL, i, irq_name); } return (0); } static void mgb_intr_enable_all(if_ctx_t ctx) { struct mgb_softc *sc; if_softc_ctx_t scctx; int i, dmac_enable = 0, intr_sts = 0, vec_en = 0; sc = iflib_get_softc(ctx); scctx = iflib_get_softc_ctx(ctx); intr_sts |= MGB_INTR_STS_ANY; vec_en |= MGB_INTR_STS_ANY; for (i = 0; i < scctx->isc_nrxqsets; i++) { intr_sts |= MGB_INTR_STS_RX(i); dmac_enable |= MGB_DMAC_RX_INTR_ENBL(i); vec_en |= MGB_INTR_RX_VEC_STS(i); } /* TX interrupts aren't needed ... */ CSR_WRITE_REG(sc, MGB_INTR_ENBL_SET, intr_sts); CSR_WRITE_REG(sc, MGB_INTR_VEC_ENBL_SET, vec_en); CSR_WRITE_REG(sc, MGB_DMAC_INTR_STS, dmac_enable); CSR_WRITE_REG(sc, MGB_DMAC_INTR_ENBL_SET, dmac_enable); } static void mgb_intr_disable_all(if_ctx_t ctx) { struct mgb_softc *sc; sc = iflib_get_softc(ctx); CSR_WRITE_REG(sc, MGB_INTR_ENBL_CLR, UINT32_MAX); CSR_WRITE_REG(sc, MGB_INTR_VEC_ENBL_CLR, UINT32_MAX); CSR_WRITE_REG(sc, MGB_INTR_STS, UINT32_MAX); CSR_WRITE_REG(sc, MGB_DMAC_INTR_ENBL_CLR, UINT32_MAX); CSR_WRITE_REG(sc, MGB_DMAC_INTR_STS, UINT32_MAX); } static int mgb_rx_queue_intr_enable(if_ctx_t ctx, uint16_t qid) { /* called after successful rx isr */ struct mgb_softc *sc; sc = iflib_get_softc(ctx); CSR_WRITE_REG(sc, MGB_INTR_VEC_ENBL_SET, MGB_INTR_RX_VEC_STS(qid)); CSR_WRITE_REG(sc, MGB_INTR_ENBL_SET, MGB_INTR_STS_RX(qid)); CSR_WRITE_REG(sc, MGB_DMAC_INTR_STS, MGB_DMAC_RX_INTR_ENBL(qid)); CSR_WRITE_REG(sc, MGB_DMAC_INTR_ENBL_SET, MGB_DMAC_RX_INTR_ENBL(qid)); return (0); } static int mgb_tx_queue_intr_enable(if_ctx_t ctx, uint16_t qid) { /* XXX: not called (since tx interrupts not used) */ struct mgb_softc *sc; sc = iflib_get_softc(ctx); CSR_WRITE_REG(sc, MGB_INTR_ENBL_SET, MGB_INTR_STS_TX(qid)); CSR_WRITE_REG(sc, MGB_DMAC_INTR_STS, MGB_DMAC_TX_INTR_ENBL(qid)); CSR_WRITE_REG(sc, MGB_DMAC_INTR_ENBL_SET, MGB_DMAC_TX_INTR_ENBL(qid)); return (0); } static bool mgb_intr_test(struct mgb_softc *sc) { int i; sc->isr_test_flag = false; CSR_WRITE_REG(sc, MGB_INTR_STS, MGB_INTR_STS_TEST); CSR_WRITE_REG(sc, MGB_INTR_VEC_ENBL_SET, MGB_INTR_STS_ANY); CSR_WRITE_REG(sc, MGB_INTR_ENBL_SET, MGB_INTR_STS_ANY | MGB_INTR_STS_TEST); CSR_WRITE_REG(sc, MGB_INTR_SET, MGB_INTR_STS_TEST); if (sc->isr_test_flag) return true; for (i = 0; i < MGB_TIMEOUT; i++) { DELAY(10); if (sc->isr_test_flag) break; } CSR_WRITE_REG(sc, MGB_INTR_ENBL_CLR, MGB_INTR_STS_TEST); CSR_WRITE_REG(sc, MGB_INTR_STS, MGB_INTR_STS_TEST); return sc->isr_test_flag; } static int mgb_isc_txd_encap(void *xsc , if_pkt_info_t ipi) { struct mgb_softc *sc; if_softc_ctx_t scctx; struct mgb_ring_data *rdata; struct mgb_ring_desc *txd; bus_dma_segment_t *segs; qidx_t pidx, nsegs; int i; KASSERT(ipi->ipi_qsidx == 0, ("tried to refill TX Channel %d.\n", ipi->ipi_qsidx)); sc = xsc; scctx = iflib_get_softc_ctx(sc->ctx); rdata = &sc->tx_ring_data; pidx = ipi->ipi_pidx; segs = ipi->ipi_segs; nsegs = ipi->ipi_nsegs; /* For each seg, create a descriptor */ for (i = 0; i < nsegs; ++i) { KASSERT(nsegs == 1, ("Multisegment packet !!!!!\n")); txd = &rdata->ring[pidx]; txd->ctl = htole32( (segs[i].ds_len & MGB_DESC_CTL_BUFLEN_MASK ) | /* * XXX: This will be wrong in the multipacket case * I suspect FS should be for the first packet and * LS should be for the last packet */ MGB_TX_DESC_CTL_FS | MGB_TX_DESC_CTL_LS | MGB_DESC_CTL_FCS); txd->addr.low = htole32(CSR_TRANSLATE_ADDR_LOW32( segs[i].ds_addr)); txd->addr.high = htole32(CSR_TRANSLATE_ADDR_HIGH32( segs[i].ds_addr)); txd->sts = htole32( (segs[i].ds_len << 16) & MGB_DESC_FRAME_LEN_MASK); pidx = MGB_NEXT_RING_IDX(pidx); } ipi->ipi_new_pidx = pidx; return (0); } static void mgb_isc_txd_flush(void *xsc, uint16_t txqid, qidx_t pidx) { struct mgb_softc *sc; struct mgb_ring_data *rdata; KASSERT(txqid == 0, ("tried to flush TX Channel %d.\n", txqid)); sc = xsc; rdata = &sc->tx_ring_data; if (rdata->last_tail != pidx) { rdata->last_tail = pidx; CSR_WRITE_REG(sc, MGB_DMA_TX_TAIL(txqid), rdata->last_tail); } } static int mgb_isc_txd_credits_update(void *xsc, uint16_t txqid, bool clear) { struct mgb_softc *sc; struct mgb_ring_desc *txd; struct mgb_ring_data *rdata; int processed = 0; /* * > If clear is true, we need to report the number of TX command ring * > descriptors that have been processed by the device. If clear is * > false, we just need to report whether or not at least one TX * > command ring descriptor has been processed by the device. * - vmx driver */ KASSERT(txqid == 0, ("tried to credits_update TX Channel %d.\n", txqid)); sc = xsc; rdata = &sc->tx_ring_data; while (*(rdata->head_wb) != rdata->last_head) { if (!clear) return 1; txd = &rdata->ring[rdata->last_head]; memset(txd, 0, sizeof(struct mgb_ring_desc)); rdata->last_head = MGB_NEXT_RING_IDX(rdata->last_head); processed++; } return (processed); } static int mgb_isc_rxd_available(void *xsc, uint16_t rxqid, qidx_t idx, qidx_t budget) { struct mgb_softc *sc; if_softc_ctx_t scctx; struct mgb_ring_data *rdata; int avail = 0; sc = xsc; KASSERT(rxqid == 0, ("tried to check availability in RX Channel %d.\n", rxqid)); rdata = &sc->rx_ring_data; scctx = iflib_get_softc_ctx(sc->ctx); for (; idx != *(rdata->head_wb); idx = MGB_NEXT_RING_IDX(idx)) { avail++; /* XXX: Could verify desc is device owned here */ if (avail == budget) break; } return (avail); } static int mgb_isc_rxd_pkt_get(void *xsc, if_rxd_info_t ri) { struct mgb_softc *sc; struct mgb_ring_data *rdata; struct mgb_ring_desc rxd; int total_len; KASSERT(ri->iri_qsidx == 0, ("tried to check availability in RX Channel %d\n", ri->iri_qsidx)); sc = xsc; total_len = 0; rdata = &sc->rx_ring_data; while (*(rdata->head_wb) != rdata->last_head) { /* copy ring desc and do swapping */ rxd = rdata->ring[rdata->last_head]; rxd.ctl = le32toh(rxd.ctl); rxd.addr.low = le32toh(rxd.ctl); rxd.addr.high = le32toh(rxd.ctl); rxd.sts = le32toh(rxd.ctl); if ((rxd.ctl & MGB_DESC_CTL_OWN) != 0) { device_printf(sc->dev, "Tried to read descriptor ... " "found that it's owned by the driver\n"); return EINVAL; } if ((rxd.ctl & MGB_RX_DESC_CTL_FS) == 0) { device_printf(sc->dev, "Tried to read descriptor ... " "found that FS is not set.\n"); device_printf(sc->dev, "Tried to read descriptor ... that it FS is not set.\n"); return EINVAL; } /* XXX: Multi-packet support */ if ((rxd.ctl & MGB_RX_DESC_CTL_LS) == 0) { device_printf(sc->dev, "Tried to read descriptor ... " "found that LS is not set. (Multi-buffer packets not yet supported)\n"); return EINVAL; } ri->iri_frags[0].irf_flid = 0; ri->iri_frags[0].irf_idx = rdata->last_head; ri->iri_frags[0].irf_len = MGB_DESC_GET_FRAME_LEN(&rxd); total_len += ri->iri_frags[0].irf_len; rdata->last_head = MGB_NEXT_RING_IDX(rdata->last_head); break; } ri->iri_nfrags = 1; ri->iri_len = total_len; return (0); } static void mgb_isc_rxd_refill(void *xsc, if_rxd_update_t iru) { if_softc_ctx_t scctx; struct mgb_softc *sc; struct mgb_ring_data *rdata; struct mgb_ring_desc *rxd; uint64_t *paddrs; qidx_t *idxs; qidx_t idx; int count, len; count = iru->iru_count; len = iru->iru_buf_size; idxs = iru->iru_idxs; paddrs = iru->iru_paddrs; KASSERT(iru->iru_qsidx == 0, ("tried to refill RX Channel %d.\n", iru->iru_qsidx)); sc = xsc; scctx = iflib_get_softc_ctx(sc->ctx); rdata = &sc->rx_ring_data; while (count > 0) { idx = idxs[--count]; rxd = &rdata->ring[idx]; rxd->sts = 0; rxd->addr.low = htole32(CSR_TRANSLATE_ADDR_LOW32(paddrs[count])); rxd->addr.high = htole32(CSR_TRANSLATE_ADDR_HIGH32(paddrs[count])); rxd->ctl = htole32(MGB_DESC_CTL_OWN | (len & MGB_DESC_CTL_BUFLEN_MASK)); } return; } static void mgb_isc_rxd_flush(void *xsc, uint16_t rxqid, uint8_t flid, qidx_t pidx) { struct mgb_softc *sc; sc = xsc; KASSERT(rxqid == 0, ("tried to flush RX Channel %d.\n", rxqid)); /* * According to the programming guide, last_tail must be set to * the last valid RX descriptor, rather than to the one past that. * Note that this is not true for the TX ring! */ sc->rx_ring_data.last_tail = MGB_PREV_RING_IDX(pidx); CSR_WRITE_REG(sc, MGB_DMA_RX_TAIL(rxqid), sc->rx_ring_data.last_tail); return; } static int mgb_test_bar(struct mgb_softc *sc) { uint32_t id_rev, dev_id, rev; id_rev = CSR_READ_REG(sc, 0); dev_id = id_rev >> 16; rev = id_rev & 0xFFFF; if (dev_id == MGB_LAN7430_DEVICE_ID || dev_id == MGB_LAN7431_DEVICE_ID) { return 0; } else { device_printf(sc->dev, "ID check failed.\n"); return ENXIO; } } static int mgb_alloc_regs(struct mgb_softc *sc) { int rid; rid = PCIR_BAR(MGB_BAR); pci_enable_busmaster(sc->dev); sc->regs = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY, &rid, RF_ACTIVE); if (sc->regs == NULL) return ENXIO; return (0); } static int mgb_release_regs(struct mgb_softc *sc) { int error = 0; if (sc->regs != NULL) error = bus_release_resource(sc->dev, SYS_RES_MEMORY, rman_get_rid(sc->regs), sc->regs); sc->regs = NULL; pci_disable_busmaster(sc->dev); return error; } static int mgb_dma_init(struct mgb_softc *sc) { if_softc_ctx_t scctx; int ch, error = 0; scctx = iflib_get_softc_ctx(sc->ctx); for (ch = 0; ch < scctx->isc_nrxqsets; ch++) if ((error = mgb_dma_rx_ring_init(sc, ch))) goto fail; for (ch = 0; ch < scctx->isc_nrxqsets; ch++) if ((error = mgb_dma_tx_ring_init(sc, ch))) goto fail; fail: return error; } static int mgb_dma_rx_ring_init(struct mgb_softc *sc, int channel) { struct mgb_ring_data *rdata; int ring_config, error = 0; rdata = &sc->rx_ring_data; mgb_dmac_control(sc, MGB_DMAC_RX_START, 0, DMAC_RESET); KASSERT(MGB_DMAC_STATE_IS_INITIAL(sc, MGB_DMAC_RX_START, channel), ("Trying to init channels when not in init state\n")); /* write ring address */ if (rdata->ring_bus_addr == 0) { device_printf(sc->dev, "Invalid ring bus addr.\n"); goto fail; } CSR_WRITE_REG(sc, MGB_DMA_RX_BASE_H(channel), CSR_TRANSLATE_ADDR_HIGH32(rdata->ring_bus_addr)); CSR_WRITE_REG(sc, MGB_DMA_RX_BASE_L(channel), CSR_TRANSLATE_ADDR_LOW32(rdata->ring_bus_addr)); /* write head pointer writeback address */ if (rdata->head_wb_bus_addr == 0) { device_printf(sc->dev, "Invalid head wb bus addr.\n"); goto fail; } CSR_WRITE_REG(sc, MGB_DMA_RX_HEAD_WB_H(channel), CSR_TRANSLATE_ADDR_HIGH32(rdata->head_wb_bus_addr)); CSR_WRITE_REG(sc, MGB_DMA_RX_HEAD_WB_L(channel), CSR_TRANSLATE_ADDR_LOW32(rdata->head_wb_bus_addr)); /* Enable head pointer writeback */ CSR_WRITE_REG(sc, MGB_DMA_RX_CONFIG0(channel), MGB_DMA_HEAD_WB_ENBL); ring_config = CSR_READ_REG(sc, MGB_DMA_RX_CONFIG1(channel)); /* ring size */ ring_config &= ~MGB_DMA_RING_LEN_MASK; ring_config |= (MGB_DMA_RING_SIZE & MGB_DMA_RING_LEN_MASK); /* packet padding (PAD_2 is better for IP header alignment ...) */ ring_config &= ~MGB_DMA_RING_PAD_MASK; ring_config |= (MGB_DMA_RING_PAD_0 & MGB_DMA_RING_PAD_MASK); CSR_WRITE_REG(sc, MGB_DMA_RX_CONFIG1(channel), ring_config); rdata->last_head = CSR_READ_REG(sc, MGB_DMA_RX_HEAD(channel)); mgb_fct_control(sc, MGB_FCT_RX_CTL, channel, FCT_RESET); if (error != 0) { device_printf(sc->dev, "Failed to reset RX FCT.\n"); goto fail; } mgb_fct_control(sc, MGB_FCT_RX_CTL, channel, FCT_ENABLE); if (error != 0) { device_printf(sc->dev, "Failed to enable RX FCT.\n"); goto fail; } mgb_dmac_control(sc, MGB_DMAC_RX_START, channel, DMAC_START); if (error != 0) device_printf(sc->dev, "Failed to start RX DMAC.\n"); fail: return (error); } static int mgb_dma_tx_ring_init(struct mgb_softc *sc, int channel) { struct mgb_ring_data *rdata; int ring_config, error = 0; rdata = &sc->tx_ring_data; if ((error = mgb_fct_control(sc, MGB_FCT_TX_CTL, channel, FCT_RESET))) { device_printf(sc->dev, "Failed to reset TX FCT.\n"); goto fail; } if ((error = mgb_fct_control(sc, MGB_FCT_TX_CTL, channel, FCT_ENABLE))) { device_printf(sc->dev, "Failed to enable TX FCT.\n"); goto fail; } if ((error = mgb_dmac_control(sc, MGB_DMAC_TX_START, channel, DMAC_RESET))) { device_printf(sc->dev, "Failed to reset TX DMAC.\n"); goto fail; } KASSERT(MGB_DMAC_STATE_IS_INITIAL(sc, MGB_DMAC_TX_START, channel), ("Trying to init channels in not init state\n")); /* write ring address */ if (rdata->ring_bus_addr == 0) { device_printf(sc->dev, "Invalid ring bus addr.\n"); goto fail; } CSR_WRITE_REG(sc, MGB_DMA_TX_BASE_H(channel), CSR_TRANSLATE_ADDR_HIGH32(rdata->ring_bus_addr)); CSR_WRITE_REG(sc, MGB_DMA_TX_BASE_L(channel), CSR_TRANSLATE_ADDR_LOW32(rdata->ring_bus_addr)); /* write ring size */ ring_config = CSR_READ_REG(sc, MGB_DMA_TX_CONFIG1(channel)); ring_config &= ~MGB_DMA_RING_LEN_MASK; ring_config |= (MGB_DMA_RING_SIZE & MGB_DMA_RING_LEN_MASK); CSR_WRITE_REG(sc, MGB_DMA_TX_CONFIG1(channel), ring_config); /* Enable interrupt on completion and head pointer writeback */ ring_config = (MGB_DMA_HEAD_WB_LS_ENBL | MGB_DMA_HEAD_WB_ENBL); CSR_WRITE_REG(sc, MGB_DMA_TX_CONFIG0(channel), ring_config); /* write head pointer writeback address */ if (rdata->head_wb_bus_addr == 0) { device_printf(sc->dev, "Invalid head wb bus addr.\n"); goto fail; } CSR_WRITE_REG(sc, MGB_DMA_TX_HEAD_WB_H(channel), CSR_TRANSLATE_ADDR_HIGH32(rdata->head_wb_bus_addr)); CSR_WRITE_REG(sc, MGB_DMA_TX_HEAD_WB_L(channel), CSR_TRANSLATE_ADDR_LOW32(rdata->head_wb_bus_addr)); rdata->last_head = CSR_READ_REG(sc, MGB_DMA_TX_HEAD(channel)); KASSERT(rdata->last_head == 0, ("MGB_DMA_TX_HEAD was not reset.\n")); rdata->last_tail = 0; CSR_WRITE_REG(sc, MGB_DMA_TX_TAIL(channel), rdata->last_tail); if ((error = mgb_dmac_control(sc, MGB_DMAC_TX_START, channel, DMAC_START))) device_printf(sc->dev, "Failed to start TX DMAC.\n"); fail: return error; } static int mgb_dmac_control(struct mgb_softc *sc, int start, int channel, enum mgb_dmac_cmd cmd) { int error = 0; switch (cmd) { case DMAC_RESET: CSR_WRITE_REG(sc, MGB_DMAC_CMD, MGB_DMAC_CMD_RESET(start, channel)); error = mgb_wait_for_bits(sc, MGB_DMAC_CMD, 0, MGB_DMAC_CMD_RESET(start, channel)); break; case DMAC_START: /* * NOTE: this simplifies the logic, since it will never * try to start in STOP_PENDING, but it also increases work. */ error = mgb_dmac_control(sc, start, channel, DMAC_STOP); if (error != 0) return error; CSR_WRITE_REG(sc, MGB_DMAC_CMD, MGB_DMAC_CMD_START(start, channel)); break; case DMAC_STOP: CSR_WRITE_REG(sc, MGB_DMAC_CMD, MGB_DMAC_CMD_STOP(start, channel)); error = mgb_wait_for_bits(sc, MGB_DMAC_CMD, MGB_DMAC_CMD_STOP(start, channel), MGB_DMAC_CMD_START(start, channel)); break; } return error; } static int mgb_fct_control(struct mgb_softc *sc, int reg, int channel, enum mgb_fct_cmd cmd) { switch (cmd) { case FCT_RESET: CSR_WRITE_REG(sc, reg, MGB_FCT_RESET(channel)); return mgb_wait_for_bits(sc, reg, 0, MGB_FCT_RESET(channel)); case FCT_ENABLE: CSR_WRITE_REG(sc, reg, MGB_FCT_ENBL(channel)); return (0); case FCT_DISABLE: CSR_WRITE_REG(sc, reg, MGB_FCT_DSBL(channel)); return mgb_wait_for_bits(sc, reg, 0, MGB_FCT_ENBL(channel)); } } static int mgb_hw_teardown(struct mgb_softc *sc) { int err = 0; /* Stop MAC */ CSR_CLEAR_REG(sc, MGB_MAC_RX, MGB_MAC_ENBL); CSR_WRITE_REG(sc, MGB_MAC_TX, MGB_MAC_ENBL); if ((err = mgb_wait_for_bits(sc, MGB_MAC_RX, MGB_MAC_DSBL, 0))) return (err); if ((err = mgb_wait_for_bits(sc, MGB_MAC_TX, MGB_MAC_DSBL, 0))) return (err); return (err); } static int mgb_hw_init(struct mgb_softc *sc) { int error = 0; error = mgb_hw_reset(sc); if (error != 0) goto fail; mgb_mac_init(sc); error = mgb_phy_reset(sc); if (error != 0) goto fail; error = mgb_dmac_reset(sc); if (error != 0) goto fail; fail: return error; } static int mgb_hw_reset(struct mgb_softc *sc) { CSR_UPDATE_REG(sc, MGB_HW_CFG, MGB_LITE_RESET); return (mgb_wait_for_bits(sc, MGB_HW_CFG, 0, MGB_LITE_RESET)); } static int mgb_mac_init(struct mgb_softc *sc) { /** * enable automatic duplex detection and * automatic speed detection */ CSR_UPDATE_REG(sc, MGB_MAC_CR, MGB_MAC_ADD_ENBL | MGB_MAC_ASD_ENBL); CSR_UPDATE_REG(sc, MGB_MAC_TX, MGB_MAC_ENBL); CSR_UPDATE_REG(sc, MGB_MAC_RX, MGB_MAC_ENBL); return MGB_STS_OK; } static int mgb_phy_reset(struct mgb_softc *sc) { CSR_UPDATE_BYTE(sc, MGB_PMT_CTL, MGB_PHY_RESET); if (mgb_wait_for_bits(sc, MGB_PMT_CTL, 0, MGB_PHY_RESET) == MGB_STS_TIMEOUT) return MGB_STS_TIMEOUT; return (mgb_wait_for_bits(sc, MGB_PMT_CTL, MGB_PHY_READY, 0)); } static int mgb_dmac_reset(struct mgb_softc *sc) { CSR_WRITE_REG(sc, MGB_DMAC_CMD, MGB_DMAC_RESET); return (mgb_wait_for_bits(sc, MGB_DMAC_CMD, 0, MGB_DMAC_RESET)); } static int mgb_wait_for_bits(struct mgb_softc *sc, int reg, int set_bits, int clear_bits) { int i, val; i = 0; do { /* * XXX: Datasheets states delay should be > 5 microseconds * for device reset. */ DELAY(100); val = CSR_READ_REG(sc, reg); if ((val & set_bits) == set_bits && (val & clear_bits) == 0) return MGB_STS_OK; } while (i++ < MGB_TIMEOUT); return MGB_STS_TIMEOUT; } static void mgb_get_ethaddr(struct mgb_softc *sc, struct ether_addr *dest) { CSR_READ_REG_BYTES(sc, MGB_MAC_ADDR_BASE_L, &dest->octet[0], 4); CSR_READ_REG_BYTES(sc, MGB_MAC_ADDR_BASE_H, &dest->octet[4], 2); } static int mgb_miibus_readreg(device_t dev, int phy, int reg) { struct mgb_softc *sc; int mii_access; sc = iflib_get_softc(device_get_softc(dev)); if (mgb_wait_for_bits(sc, MGB_MII_ACCESS, 0, MGB_MII_BUSY) == MGB_STS_TIMEOUT) return EIO; mii_access = (phy & MGB_MII_PHY_ADDR_MASK) << MGB_MII_PHY_ADDR_SHIFT; mii_access |= (reg & MGB_MII_REG_ADDR_MASK) << MGB_MII_REG_ADDR_SHIFT; mii_access |= MGB_MII_BUSY | MGB_MII_READ; CSR_WRITE_REG(sc, MGB_MII_ACCESS, mii_access); if (mgb_wait_for_bits(sc, MGB_MII_ACCESS, 0, MGB_MII_BUSY) == MGB_STS_TIMEOUT) return EIO; return (CSR_READ_2_BYTES(sc, MGB_MII_DATA)); } static int mgb_miibus_writereg(device_t dev, int phy, int reg, int data) { struct mgb_softc *sc; int mii_access; sc = iflib_get_softc(device_get_softc(dev)); if (mgb_wait_for_bits(sc, MGB_MII_ACCESS, 0, MGB_MII_BUSY) == MGB_STS_TIMEOUT) return EIO; mii_access = (phy & MGB_MII_PHY_ADDR_MASK) << MGB_MII_PHY_ADDR_SHIFT; mii_access |= (reg & MGB_MII_REG_ADDR_MASK) << MGB_MII_REG_ADDR_SHIFT; mii_access |= MGB_MII_BUSY | MGB_MII_WRITE; CSR_WRITE_REG(sc, MGB_MII_DATA, data); CSR_WRITE_REG(sc, MGB_MII_ACCESS, mii_access); if (mgb_wait_for_bits(sc, MGB_MII_ACCESS, 0, MGB_MII_BUSY) == MGB_STS_TIMEOUT) return EIO; return 0; } /* XXX: May need to lock these up */ static void mgb_miibus_statchg(device_t dev) { struct mgb_softc *sc; struct mii_data *miid; sc = iflib_get_softc(device_get_softc(dev)); miid = device_get_softc(sc->miibus); /* Update baudrate in iflib */ sc->baudrate = ifmedia_baudrate(miid->mii_media_active); iflib_link_state_change(sc->ctx, sc->link_state, sc->baudrate); } static void mgb_miibus_linkchg(device_t dev) { struct mgb_softc *sc; struct mii_data *miid; int link_state; sc = iflib_get_softc(device_get_softc(dev)); miid = device_get_softc(sc->miibus); /* XXX: copied from miibus_linkchg **/ if (miid->mii_media_status & IFM_AVALID) { if (miid->mii_media_status & IFM_ACTIVE) link_state = LINK_STATE_UP; else link_state = LINK_STATE_DOWN; } else link_state = LINK_STATE_UNKNOWN; sc->link_state = link_state; iflib_link_state_change(sc->ctx, sc->link_state, sc->baudrate); } diff --git a/sys/dev/vmware/vmxnet3/if_vmx.c b/sys/dev/vmware/vmxnet3/if_vmx.c index c2ae55532ed0..340fe1a7dd80 100644 --- a/sys/dev/vmware/vmxnet3/if_vmx.c +++ b/sys/dev/vmware/vmxnet3/if_vmx.c @@ -1,2538 +1,2538 @@ /*- * Copyright (c) 2013 Tsubai Masanari * Copyright (c) 2013 Bryan Venteicher * Copyright (c) 2018 Patrick Kelsey * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above * copyright notice and this permission notice appear in all copies. * * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. * * $OpenBSD: src/sys/dev/pci/if_vmx.c,v 1.11 2013/06/22 00:28:10 uebayasi Exp $ */ /* Driver for VMware vmxnet3 virtual ethernet devices. */ #include __FBSDID("$FreeBSD$"); #include "opt_rss.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef RSS #include #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include "ifdi_if.h" #include "if_vmxreg.h" #include "if_vmxvar.h" #include "opt_inet.h" #include "opt_inet6.h" #define VMXNET3_VMWARE_VENDOR_ID 0x15AD #define VMXNET3_VMWARE_DEVICE_ID 0x07B0 static pci_vendor_info_t vmxnet3_vendor_info_array[] = { PVID(VMXNET3_VMWARE_VENDOR_ID, VMXNET3_VMWARE_DEVICE_ID, "VMware VMXNET3 Ethernet Adapter"), /* required last entry */ PVID_END }; static void *vmxnet3_register(device_t); static int vmxnet3_attach_pre(if_ctx_t); static int vmxnet3_msix_intr_assign(if_ctx_t, int); static void vmxnet3_free_irqs(struct vmxnet3_softc *); static int vmxnet3_attach_post(if_ctx_t); static int vmxnet3_detach(if_ctx_t); static int vmxnet3_shutdown(if_ctx_t); static int vmxnet3_suspend(if_ctx_t); static int vmxnet3_resume(if_ctx_t); static int vmxnet3_alloc_resources(struct vmxnet3_softc *); static void vmxnet3_free_resources(struct vmxnet3_softc *); static int vmxnet3_check_version(struct vmxnet3_softc *); static void vmxnet3_set_interrupt_idx(struct vmxnet3_softc *); static int vmxnet3_queues_shared_alloc(struct vmxnet3_softc *); static void vmxnet3_init_txq(struct vmxnet3_softc *, int); static int vmxnet3_tx_queues_alloc(if_ctx_t, caddr_t *, uint64_t *, int, int); static void vmxnet3_init_rxq(struct vmxnet3_softc *, int, int); static int vmxnet3_rx_queues_alloc(if_ctx_t, caddr_t *, uint64_t *, int, int); static void vmxnet3_queues_free(if_ctx_t); static int vmxnet3_alloc_shared_data(struct vmxnet3_softc *); static void vmxnet3_free_shared_data(struct vmxnet3_softc *); static int vmxnet3_alloc_mcast_table(struct vmxnet3_softc *); static void vmxnet3_free_mcast_table(struct vmxnet3_softc *); static void vmxnet3_init_shared_data(struct vmxnet3_softc *); static void vmxnet3_reinit_rss_shared_data(struct vmxnet3_softc *); static void vmxnet3_reinit_shared_data(struct vmxnet3_softc *); static int vmxnet3_alloc_data(struct vmxnet3_softc *); static void vmxnet3_free_data(struct vmxnet3_softc *); static void vmxnet3_evintr(struct vmxnet3_softc *); static int vmxnet3_isc_txd_encap(void *, if_pkt_info_t); static void vmxnet3_isc_txd_flush(void *, uint16_t, qidx_t); static int vmxnet3_isc_txd_credits_update(void *, uint16_t, bool); static int vmxnet3_isc_rxd_available(void *, uint16_t, qidx_t, qidx_t); static int vmxnet3_isc_rxd_pkt_get(void *, if_rxd_info_t); static void vmxnet3_isc_rxd_refill(void *, if_rxd_update_t); static void vmxnet3_isc_rxd_flush(void *, uint16_t, uint8_t, qidx_t); static int vmxnet3_legacy_intr(void *); static int vmxnet3_rxq_intr(void *); static int vmxnet3_event_intr(void *); static void vmxnet3_stop(if_ctx_t); static void vmxnet3_txinit(struct vmxnet3_softc *, struct vmxnet3_txqueue *); static void vmxnet3_rxinit(struct vmxnet3_softc *, struct vmxnet3_rxqueue *); static void vmxnet3_reinit_queues(struct vmxnet3_softc *); static int vmxnet3_enable_device(struct vmxnet3_softc *); static void vmxnet3_reinit_rxfilters(struct vmxnet3_softc *); static void vmxnet3_init(if_ctx_t); static void vmxnet3_multi_set(if_ctx_t); static int vmxnet3_mtu_set(if_ctx_t, uint32_t); static void vmxnet3_media_status(if_ctx_t, struct ifmediareq *); static int vmxnet3_media_change(if_ctx_t); static int vmxnet3_promisc_set(if_ctx_t, int); static uint64_t vmxnet3_get_counter(if_ctx_t, ift_counter); static void vmxnet3_update_admin_status(if_ctx_t); static void vmxnet3_txq_timer(if_ctx_t, uint16_t); static void vmxnet3_update_vlan_filter(struct vmxnet3_softc *, int, uint16_t); static void vmxnet3_vlan_register(if_ctx_t, uint16_t); static void vmxnet3_vlan_unregister(if_ctx_t, uint16_t); static void vmxnet3_set_rxfilter(struct vmxnet3_softc *, int); static void vmxnet3_refresh_host_stats(struct vmxnet3_softc *); static int vmxnet3_link_is_up(struct vmxnet3_softc *); static void vmxnet3_link_status(struct vmxnet3_softc *); static void vmxnet3_set_lladdr(struct vmxnet3_softc *); static void vmxnet3_get_lladdr(struct vmxnet3_softc *); static void vmxnet3_setup_txq_sysctl(struct vmxnet3_txqueue *, struct sysctl_ctx_list *, struct sysctl_oid_list *); static void vmxnet3_setup_rxq_sysctl(struct vmxnet3_rxqueue *, struct sysctl_ctx_list *, struct sysctl_oid_list *); static void vmxnet3_setup_queue_sysctl(struct vmxnet3_softc *, struct sysctl_ctx_list *, struct sysctl_oid_list *); static void vmxnet3_setup_sysctl(struct vmxnet3_softc *); static void vmxnet3_write_bar0(struct vmxnet3_softc *, bus_size_t, uint32_t); static uint32_t vmxnet3_read_bar1(struct vmxnet3_softc *, bus_size_t); static void vmxnet3_write_bar1(struct vmxnet3_softc *, bus_size_t, uint32_t); static void vmxnet3_write_cmd(struct vmxnet3_softc *, uint32_t); static uint32_t vmxnet3_read_cmd(struct vmxnet3_softc *, uint32_t); static int vmxnet3_tx_queue_intr_enable(if_ctx_t, uint16_t); static int vmxnet3_rx_queue_intr_enable(if_ctx_t, uint16_t); static void vmxnet3_link_intr_enable(if_ctx_t); static void vmxnet3_enable_intr(struct vmxnet3_softc *, int); static void vmxnet3_disable_intr(struct vmxnet3_softc *, int); static void vmxnet3_intr_enable_all(if_ctx_t); static void vmxnet3_intr_disable_all(if_ctx_t); typedef enum { VMXNET3_BARRIER_RD, VMXNET3_BARRIER_WR, VMXNET3_BARRIER_RDWR, } vmxnet3_barrier_t; static void vmxnet3_barrier(struct vmxnet3_softc *, vmxnet3_barrier_t); static device_method_t vmxnet3_methods[] = { /* Device interface */ DEVMETHOD(device_register, vmxnet3_register), DEVMETHOD(device_probe, iflib_device_probe), DEVMETHOD(device_attach, iflib_device_attach), DEVMETHOD(device_detach, iflib_device_detach), DEVMETHOD(device_shutdown, iflib_device_shutdown), DEVMETHOD(device_suspend, iflib_device_suspend), DEVMETHOD(device_resume, iflib_device_resume), DEVMETHOD_END }; static driver_t vmxnet3_driver = { "vmx", vmxnet3_methods, sizeof(struct vmxnet3_softc) }; static devclass_t vmxnet3_devclass; DRIVER_MODULE(vmx, pci, vmxnet3_driver, vmxnet3_devclass, 0, 0); IFLIB_PNP_INFO(pci, vmx, vmxnet3_vendor_info_array); MODULE_VERSION(vmx, 2); MODULE_DEPEND(vmx, pci, 1, 1, 1); MODULE_DEPEND(vmx, ether, 1, 1, 1); MODULE_DEPEND(vmx, iflib, 1, 1, 1); static device_method_t vmxnet3_iflib_methods[] = { DEVMETHOD(ifdi_tx_queues_alloc, vmxnet3_tx_queues_alloc), DEVMETHOD(ifdi_rx_queues_alloc, vmxnet3_rx_queues_alloc), DEVMETHOD(ifdi_queues_free, vmxnet3_queues_free), DEVMETHOD(ifdi_attach_pre, vmxnet3_attach_pre), DEVMETHOD(ifdi_attach_post, vmxnet3_attach_post), DEVMETHOD(ifdi_detach, vmxnet3_detach), DEVMETHOD(ifdi_init, vmxnet3_init), DEVMETHOD(ifdi_stop, vmxnet3_stop), DEVMETHOD(ifdi_multi_set, vmxnet3_multi_set), DEVMETHOD(ifdi_mtu_set, vmxnet3_mtu_set), DEVMETHOD(ifdi_media_status, vmxnet3_media_status), DEVMETHOD(ifdi_media_change, vmxnet3_media_change), DEVMETHOD(ifdi_promisc_set, vmxnet3_promisc_set), DEVMETHOD(ifdi_get_counter, vmxnet3_get_counter), DEVMETHOD(ifdi_update_admin_status, vmxnet3_update_admin_status), DEVMETHOD(ifdi_timer, vmxnet3_txq_timer), DEVMETHOD(ifdi_tx_queue_intr_enable, vmxnet3_tx_queue_intr_enable), DEVMETHOD(ifdi_rx_queue_intr_enable, vmxnet3_rx_queue_intr_enable), DEVMETHOD(ifdi_link_intr_enable, vmxnet3_link_intr_enable), DEVMETHOD(ifdi_intr_enable, vmxnet3_intr_enable_all), DEVMETHOD(ifdi_intr_disable, vmxnet3_intr_disable_all), DEVMETHOD(ifdi_msix_intr_assign, vmxnet3_msix_intr_assign), DEVMETHOD(ifdi_vlan_register, vmxnet3_vlan_register), DEVMETHOD(ifdi_vlan_unregister, vmxnet3_vlan_unregister), DEVMETHOD(ifdi_shutdown, vmxnet3_shutdown), DEVMETHOD(ifdi_suspend, vmxnet3_suspend), DEVMETHOD(ifdi_resume, vmxnet3_resume), DEVMETHOD_END }; static driver_t vmxnet3_iflib_driver = { "vmx", vmxnet3_iflib_methods, sizeof(struct vmxnet3_softc) }; struct if_txrx vmxnet3_txrx = { .ift_txd_encap = vmxnet3_isc_txd_encap, .ift_txd_flush = vmxnet3_isc_txd_flush, .ift_txd_credits_update = vmxnet3_isc_txd_credits_update, .ift_rxd_available = vmxnet3_isc_rxd_available, .ift_rxd_pkt_get = vmxnet3_isc_rxd_pkt_get, .ift_rxd_refill = vmxnet3_isc_rxd_refill, .ift_rxd_flush = vmxnet3_isc_rxd_flush, .ift_legacy_intr = vmxnet3_legacy_intr }; static struct if_shared_ctx vmxnet3_sctx_init = { .isc_magic = IFLIB_MAGIC, .isc_q_align = 512, .isc_tx_maxsize = VMXNET3_TX_MAXSIZE, .isc_tx_maxsegsize = VMXNET3_TX_MAXSEGSIZE, .isc_tso_maxsize = VMXNET3_TSO_MAXSIZE + sizeof(struct ether_vlan_header), .isc_tso_maxsegsize = VMXNET3_TX_MAXSEGSIZE, /* * These values are used to configure the busdma tag used for * receive descriptors. Each receive descriptor only points to one * buffer. */ .isc_rx_maxsize = VMXNET3_RX_MAXSEGSIZE, /* One buf per descriptor */ .isc_rx_nsegments = 1, /* One mapping per descriptor */ .isc_rx_maxsegsize = VMXNET3_RX_MAXSEGSIZE, .isc_admin_intrcnt = 1, .isc_vendor_info = vmxnet3_vendor_info_array, .isc_driver_version = "2", .isc_driver = &vmxnet3_iflib_driver, .isc_flags = IFLIB_HAS_RXCQ | IFLIB_HAS_TXCQ | IFLIB_SINGLE_IRQ_RX_ONLY, /* * Number of receive queues per receive queue set, with associated * descriptor settings for each. */ .isc_nrxqs = 3, .isc_nfl = 2, /* one free list for each receive command queue */ .isc_nrxd_min = {VMXNET3_MIN_RX_NDESC, VMXNET3_MIN_RX_NDESC, VMXNET3_MIN_RX_NDESC}, .isc_nrxd_max = {VMXNET3_MAX_RX_NDESC, VMXNET3_MAX_RX_NDESC, VMXNET3_MAX_RX_NDESC}, .isc_nrxd_default = {VMXNET3_DEF_RX_NDESC, VMXNET3_DEF_RX_NDESC, VMXNET3_DEF_RX_NDESC}, /* * Number of transmit queues per transmit queue set, with associated * descriptor settings for each. */ .isc_ntxqs = 2, .isc_ntxd_min = {VMXNET3_MIN_TX_NDESC, VMXNET3_MIN_TX_NDESC}, .isc_ntxd_max = {VMXNET3_MAX_TX_NDESC, VMXNET3_MAX_TX_NDESC}, .isc_ntxd_default = {VMXNET3_DEF_TX_NDESC, VMXNET3_DEF_TX_NDESC}, }; static void * vmxnet3_register(device_t dev) { return (&vmxnet3_sctx_init); } static int trunc_powerof2(int val) { return (1U << (fls(val) - 1)); } static int vmxnet3_attach_pre(if_ctx_t ctx) { device_t dev; if_softc_ctx_t scctx; struct vmxnet3_softc *sc; uint32_t intr_config; int error; dev = iflib_get_dev(ctx); sc = iflib_get_softc(ctx); sc->vmx_dev = dev; sc->vmx_ctx = ctx; sc->vmx_sctx = iflib_get_sctx(ctx); sc->vmx_scctx = iflib_get_softc_ctx(ctx); sc->vmx_ifp = iflib_get_ifp(ctx); sc->vmx_media = iflib_get_media(ctx); scctx = sc->vmx_scctx; scctx->isc_tx_nsegments = VMXNET3_TX_MAXSEGS; scctx->isc_tx_tso_segments_max = VMXNET3_TX_MAXSEGS; /* isc_tx_tso_size_max doesn't include possible vlan header */ scctx->isc_tx_tso_size_max = VMXNET3_TSO_MAXSIZE; scctx->isc_tx_tso_segsize_max = VMXNET3_TX_MAXSEGSIZE; scctx->isc_txrx = &vmxnet3_txrx; /* If 0, the iflib tunable was not set, so set to the default */ if (scctx->isc_nrxqsets == 0) scctx->isc_nrxqsets = VMXNET3_DEF_RX_QUEUES; scctx->isc_nrxqsets = trunc_powerof2(scctx->isc_nrxqsets); scctx->isc_nrxqsets_max = min(VMXNET3_MAX_RX_QUEUES, mp_ncpus); scctx->isc_nrxqsets_max = trunc_powerof2(scctx->isc_nrxqsets_max); /* If 0, the iflib tunable was not set, so set to the default */ if (scctx->isc_ntxqsets == 0) scctx->isc_ntxqsets = VMXNET3_DEF_TX_QUEUES; scctx->isc_ntxqsets = trunc_powerof2(scctx->isc_ntxqsets); scctx->isc_ntxqsets_max = min(VMXNET3_MAX_TX_QUEUES, mp_ncpus); scctx->isc_ntxqsets_max = trunc_powerof2(scctx->isc_ntxqsets_max); /* * Enforce that the transmit completion queue descriptor count is * the same as the transmit command queue descriptor count. */ scctx->isc_ntxd[0] = scctx->isc_ntxd[1]; scctx->isc_txqsizes[0] = sizeof(struct vmxnet3_txcompdesc) * scctx->isc_ntxd[0]; scctx->isc_txqsizes[1] = sizeof(struct vmxnet3_txdesc) * scctx->isc_ntxd[1]; /* * Enforce that the receive completion queue descriptor count is the * sum of the receive command queue descriptor counts, and that the * second receive command queue descriptor count is the same as the * first one. */ scctx->isc_nrxd[2] = scctx->isc_nrxd[1]; scctx->isc_nrxd[0] = scctx->isc_nrxd[1] + scctx->isc_nrxd[2]; scctx->isc_rxqsizes[0] = sizeof(struct vmxnet3_rxcompdesc) * scctx->isc_nrxd[0]; scctx->isc_rxqsizes[1] = sizeof(struct vmxnet3_rxdesc) * scctx->isc_nrxd[1]; scctx->isc_rxqsizes[2] = sizeof(struct vmxnet3_rxdesc) * scctx->isc_nrxd[2]; /* * Initialize the max frame size and descriptor queue buffer * sizes. */ vmxnet3_mtu_set(ctx, if_getmtu(sc->vmx_ifp)); scctx->isc_rss_table_size = UPT1_RSS_MAX_IND_TABLE_SIZE; /* Map PCI BARs */ error = vmxnet3_alloc_resources(sc); if (error) goto fail; /* Check device versions */ error = vmxnet3_check_version(sc); if (error) goto fail; /* * The interrupt mode can be set in the hypervisor configuration via * the parameter ethernet.intrMode. */ intr_config = vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_INTRCFG); sc->vmx_intr_mask_mode = (intr_config >> 2) & 0x03; /* * Configure the softc context to attempt to configure the interrupt * mode now indicated by intr_config. iflib will follow the usual * fallback path MSI-X -> MSI -> LEGACY, starting at the configured * starting mode. */ switch (intr_config & 0x03) { case VMXNET3_IT_AUTO: case VMXNET3_IT_MSIX: scctx->isc_msix_bar = pci_msix_table_bar(dev); break; case VMXNET3_IT_MSI: scctx->isc_msix_bar = -1; scctx->isc_disable_msix = 1; break; case VMXNET3_IT_LEGACY: scctx->isc_msix_bar = 0; break; } scctx->isc_tx_csum_flags = VMXNET3_CSUM_ALL_OFFLOAD; scctx->isc_capabilities = scctx->isc_capenable = IFCAP_TXCSUM | IFCAP_TXCSUM_IPV6 | IFCAP_TSO4 | IFCAP_TSO6 | IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6 | IFCAP_VLAN_MTU | IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWCSUM | IFCAP_VLAN_HWTSO | IFCAP_JUMBO_MTU; /* These capabilities are not enabled by default. */ scctx->isc_capabilities |= IFCAP_LRO | IFCAP_VLAN_HWFILTER; vmxnet3_get_lladdr(sc); iflib_set_mac(ctx, sc->vmx_lladdr); return (0); fail: /* * We must completely clean up anything allocated above as iflib * will not invoke any other driver entry points as a result of this * failure. */ vmxnet3_free_resources(sc); return (error); } static int vmxnet3_msix_intr_assign(if_ctx_t ctx, int msix) { struct vmxnet3_softc *sc; if_softc_ctx_t scctx; struct vmxnet3_rxqueue *rxq; int error; int i; char irq_name[16]; sc = iflib_get_softc(ctx); scctx = sc->vmx_scctx; for (i = 0; i < scctx->isc_nrxqsets; i++) { snprintf(irq_name, sizeof(irq_name), "rxq%d", i); rxq = &sc->vmx_rxq[i]; error = iflib_irq_alloc_generic(ctx, &rxq->vxrxq_irq, i + 1, - IFLIB_INTR_RX, vmxnet3_rxq_intr, rxq, i, irq_name); + IFLIB_INTR_RXTX, vmxnet3_rxq_intr, rxq, i, irq_name); if (error) { device_printf(iflib_get_dev(ctx), "Failed to register rxq %d interrupt handler\n", i); return (error); } } for (i = 0; i < scctx->isc_ntxqsets; i++) { snprintf(irq_name, sizeof(irq_name), "txq%d", i); /* * Don't provide the corresponding rxq irq for reference - * we want the transmit task to be attached to a task queue * that is different from the one used by the corresponding * rxq irq. That is because the TX doorbell writes are very * expensive as virtualized MMIO operations, so we want to * be able to defer them to another core when possible so * that they don't steal receive processing cycles during * stack turnarounds like TCP ACK generation. The other * piece to this approach is enabling the iflib abdicate * option (currently via an interface-specific * tunable/sysctl). */ iflib_softirq_alloc_generic(ctx, NULL, IFLIB_INTR_TX, NULL, i, irq_name); } error = iflib_irq_alloc_generic(ctx, &sc->vmx_event_intr_irq, scctx->isc_nrxqsets + 1, IFLIB_INTR_ADMIN, vmxnet3_event_intr, sc, 0, "event"); if (error) { device_printf(iflib_get_dev(ctx), "Failed to register event interrupt handler\n"); return (error); } return (0); } static void vmxnet3_free_irqs(struct vmxnet3_softc *sc) { if_softc_ctx_t scctx; struct vmxnet3_rxqueue *rxq; int i; scctx = sc->vmx_scctx; for (i = 0; i < scctx->isc_nrxqsets; i++) { rxq = &sc->vmx_rxq[i]; iflib_irq_free(sc->vmx_ctx, &rxq->vxrxq_irq); } iflib_irq_free(sc->vmx_ctx, &sc->vmx_event_intr_irq); } static int vmxnet3_attach_post(if_ctx_t ctx) { device_t dev; if_softc_ctx_t scctx; struct vmxnet3_softc *sc; int error; dev = iflib_get_dev(ctx); scctx = iflib_get_softc_ctx(ctx); sc = iflib_get_softc(ctx); if (scctx->isc_nrxqsets > 1) sc->vmx_flags |= VMXNET3_FLAG_RSS; error = vmxnet3_alloc_data(sc); if (error) goto fail; vmxnet3_set_interrupt_idx(sc); vmxnet3_setup_sysctl(sc); ifmedia_add(sc->vmx_media, IFM_ETHER | IFM_AUTO, 0, NULL); ifmedia_set(sc->vmx_media, IFM_ETHER | IFM_AUTO); fail: return (error); } static int vmxnet3_detach(if_ctx_t ctx) { struct vmxnet3_softc *sc; sc = iflib_get_softc(ctx); vmxnet3_free_irqs(sc); vmxnet3_free_data(sc); vmxnet3_free_resources(sc); return (0); } static int vmxnet3_shutdown(if_ctx_t ctx) { return (0); } static int vmxnet3_suspend(if_ctx_t ctx) { return (0); } static int vmxnet3_resume(if_ctx_t ctx) { return (0); } static int vmxnet3_alloc_resources(struct vmxnet3_softc *sc) { device_t dev; int rid; dev = sc->vmx_dev; rid = PCIR_BAR(0); sc->vmx_res0 = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, RF_ACTIVE); if (sc->vmx_res0 == NULL) { device_printf(dev, "could not map BAR0 memory\n"); return (ENXIO); } sc->vmx_iot0 = rman_get_bustag(sc->vmx_res0); sc->vmx_ioh0 = rman_get_bushandle(sc->vmx_res0); rid = PCIR_BAR(1); sc->vmx_res1 = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, RF_ACTIVE); if (sc->vmx_res1 == NULL) { device_printf(dev, "could not map BAR1 memory\n"); return (ENXIO); } sc->vmx_iot1 = rman_get_bustag(sc->vmx_res1); sc->vmx_ioh1 = rman_get_bushandle(sc->vmx_res1); return (0); } static void vmxnet3_free_resources(struct vmxnet3_softc *sc) { device_t dev; dev = sc->vmx_dev; if (sc->vmx_res0 != NULL) { bus_release_resource(dev, SYS_RES_MEMORY, rman_get_rid(sc->vmx_res0), sc->vmx_res0); sc->vmx_res0 = NULL; } if (sc->vmx_res1 != NULL) { bus_release_resource(dev, SYS_RES_MEMORY, rman_get_rid(sc->vmx_res1), sc->vmx_res1); sc->vmx_res1 = NULL; } } static int vmxnet3_check_version(struct vmxnet3_softc *sc) { device_t dev; uint32_t version; dev = sc->vmx_dev; version = vmxnet3_read_bar1(sc, VMXNET3_BAR1_VRRS); if ((version & 0x01) == 0) { device_printf(dev, "unsupported hardware version %#x\n", version); return (ENOTSUP); } vmxnet3_write_bar1(sc, VMXNET3_BAR1_VRRS, 1); version = vmxnet3_read_bar1(sc, VMXNET3_BAR1_UVRS); if ((version & 0x01) == 0) { device_printf(dev, "unsupported UPT version %#x\n", version); return (ENOTSUP); } vmxnet3_write_bar1(sc, VMXNET3_BAR1_UVRS, 1); return (0); } static void vmxnet3_set_interrupt_idx(struct vmxnet3_softc *sc) { if_softc_ctx_t scctx; struct vmxnet3_txqueue *txq; struct vmxnet3_txq_shared *txs; struct vmxnet3_rxqueue *rxq; struct vmxnet3_rxq_shared *rxs; int intr_idx; int i; scctx = sc->vmx_scctx; /* * There is always one interrupt per receive queue, assigned * starting with the first interrupt. When there is only one * interrupt available, the event interrupt shares the receive queue * interrupt, otherwise it uses the interrupt following the last * receive queue interrupt. Transmit queues are not assigned * interrupts, so they are given indexes beyond the indexes that * correspond to the real interrupts. */ /* The event interrupt is always the last vector. */ sc->vmx_event_intr_idx = scctx->isc_vectors - 1; intr_idx = 0; for (i = 0; i < scctx->isc_nrxqsets; i++, intr_idx++) { rxq = &sc->vmx_rxq[i]; rxs = rxq->vxrxq_rs; rxq->vxrxq_intr_idx = intr_idx; rxs->intr_idx = rxq->vxrxq_intr_idx; } /* * Assign the tx queues interrupt indexes above what we are actually * using. These interrupts will never be enabled. */ intr_idx = scctx->isc_vectors; for (i = 0; i < scctx->isc_ntxqsets; i++, intr_idx++) { txq = &sc->vmx_txq[i]; txs = txq->vxtxq_ts; txq->vxtxq_intr_idx = intr_idx; txs->intr_idx = txq->vxtxq_intr_idx; } } static int vmxnet3_queues_shared_alloc(struct vmxnet3_softc *sc) { if_softc_ctx_t scctx; int size; int error; scctx = sc->vmx_scctx; /* * The txq and rxq shared data areas must be allocated contiguously * as vmxnet3_driver_shared contains only a single address member * for the shared queue data area. */ size = scctx->isc_ntxqsets * sizeof(struct vmxnet3_txq_shared) + scctx->isc_nrxqsets * sizeof(struct vmxnet3_rxq_shared); error = iflib_dma_alloc_align(sc->vmx_ctx, size, 128, &sc->vmx_qs_dma, 0); if (error) { device_printf(sc->vmx_dev, "cannot alloc queue shared memory\n"); return (error); } return (0); } static void vmxnet3_init_txq(struct vmxnet3_softc *sc, int q) { struct vmxnet3_txqueue *txq; struct vmxnet3_comp_ring *txc; struct vmxnet3_txring *txr; if_softc_ctx_t scctx; txq = &sc->vmx_txq[q]; txc = &txq->vxtxq_comp_ring; txr = &txq->vxtxq_cmd_ring; scctx = sc->vmx_scctx; snprintf(txq->vxtxq_name, sizeof(txq->vxtxq_name), "%s-tx%d", device_get_nameunit(sc->vmx_dev), q); txq->vxtxq_sc = sc; txq->vxtxq_id = q; txc->vxcr_ndesc = scctx->isc_ntxd[0]; txr->vxtxr_ndesc = scctx->isc_ntxd[1]; } static int vmxnet3_tx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs, int ntxqs, int ntxqsets) { struct vmxnet3_softc *sc; int q; int error; caddr_t kva; sc = iflib_get_softc(ctx); /* Allocate the array of transmit queues */ sc->vmx_txq = malloc(sizeof(struct vmxnet3_txqueue) * ntxqsets, M_DEVBUF, M_NOWAIT | M_ZERO); if (sc->vmx_txq == NULL) return (ENOMEM); /* Initialize driver state for each transmit queue */ for (q = 0; q < ntxqsets; q++) vmxnet3_init_txq(sc, q); /* * Allocate queue state that is shared with the device. This check * and call is performed in both vmxnet3_tx_queues_alloc() and * vmxnet3_rx_queues_alloc() so that we don't have to care which * order iflib invokes those routines in. */ if (sc->vmx_qs_dma.idi_size == 0) { error = vmxnet3_queues_shared_alloc(sc); if (error) return (error); } kva = sc->vmx_qs_dma.idi_vaddr; for (q = 0; q < ntxqsets; q++) { sc->vmx_txq[q].vxtxq_ts = (struct vmxnet3_txq_shared *) kva; kva += sizeof(struct vmxnet3_txq_shared); } /* Record descriptor ring vaddrs and paddrs */ for (q = 0; q < ntxqsets; q++) { struct vmxnet3_txqueue *txq; struct vmxnet3_txring *txr; struct vmxnet3_comp_ring *txc; txq = &sc->vmx_txq[q]; txc = &txq->vxtxq_comp_ring; txr = &txq->vxtxq_cmd_ring; /* Completion ring */ txc->vxcr_u.txcd = (struct vmxnet3_txcompdesc *) vaddrs[q * ntxqs + 0]; txc->vxcr_paddr = paddrs[q * ntxqs + 0]; /* Command ring */ txr->vxtxr_txd = (struct vmxnet3_txdesc *) vaddrs[q * ntxqs + 1]; txr->vxtxr_paddr = paddrs[q * ntxqs + 1]; } return (0); } static void vmxnet3_init_rxq(struct vmxnet3_softc *sc, int q, int nrxqs) { struct vmxnet3_rxqueue *rxq; struct vmxnet3_comp_ring *rxc; struct vmxnet3_rxring *rxr; if_softc_ctx_t scctx; int i; rxq = &sc->vmx_rxq[q]; rxc = &rxq->vxrxq_comp_ring; scctx = sc->vmx_scctx; snprintf(rxq->vxrxq_name, sizeof(rxq->vxrxq_name), "%s-rx%d", device_get_nameunit(sc->vmx_dev), q); rxq->vxrxq_sc = sc; rxq->vxrxq_id = q; /* * First rxq is the completion queue, so there are nrxqs - 1 command * rings starting at iflib queue id 1. */ rxc->vxcr_ndesc = scctx->isc_nrxd[0]; for (i = 0; i < nrxqs - 1; i++) { rxr = &rxq->vxrxq_cmd_ring[i]; rxr->vxrxr_ndesc = scctx->isc_nrxd[i + 1]; } } static int vmxnet3_rx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs, int nrxqs, int nrxqsets) { struct vmxnet3_softc *sc; if_softc_ctx_t scctx; int q; int i; int error; caddr_t kva; sc = iflib_get_softc(ctx); scctx = sc->vmx_scctx; /* Allocate the array of receive queues */ sc->vmx_rxq = malloc(sizeof(struct vmxnet3_rxqueue) * nrxqsets, M_DEVBUF, M_NOWAIT | M_ZERO); if (sc->vmx_rxq == NULL) return (ENOMEM); /* Initialize driver state for each receive queue */ for (q = 0; q < nrxqsets; q++) vmxnet3_init_rxq(sc, q, nrxqs); /* * Allocate queue state that is shared with the device. This check * and call is performed in both vmxnet3_tx_queues_alloc() and * vmxnet3_rx_queues_alloc() so that we don't have to care which * order iflib invokes those routines in. */ if (sc->vmx_qs_dma.idi_size == 0) { error = vmxnet3_queues_shared_alloc(sc); if (error) return (error); } kva = sc->vmx_qs_dma.idi_vaddr + scctx->isc_ntxqsets * sizeof(struct vmxnet3_txq_shared); for (q = 0; q < nrxqsets; q++) { sc->vmx_rxq[q].vxrxq_rs = (struct vmxnet3_rxq_shared *) kva; kva += sizeof(struct vmxnet3_rxq_shared); } /* Record descriptor ring vaddrs and paddrs */ for (q = 0; q < nrxqsets; q++) { struct vmxnet3_rxqueue *rxq; struct vmxnet3_rxring *rxr; struct vmxnet3_comp_ring *rxc; rxq = &sc->vmx_rxq[q]; rxc = &rxq->vxrxq_comp_ring; /* Completion ring */ rxc->vxcr_u.rxcd = (struct vmxnet3_rxcompdesc *) vaddrs[q * nrxqs + 0]; rxc->vxcr_paddr = paddrs[q * nrxqs + 0]; /* Command ring(s) */ for (i = 0; i < nrxqs - 1; i++) { rxr = &rxq->vxrxq_cmd_ring[i]; rxr->vxrxr_rxd = (struct vmxnet3_rxdesc *) vaddrs[q * nrxqs + 1 + i]; rxr->vxrxr_paddr = paddrs[q * nrxqs + 1 + i]; } } return (0); } static void vmxnet3_queues_free(if_ctx_t ctx) { struct vmxnet3_softc *sc; sc = iflib_get_softc(ctx); /* Free queue state area that is shared with the device */ if (sc->vmx_qs_dma.idi_size != 0) { iflib_dma_free(&sc->vmx_qs_dma); sc->vmx_qs_dma.idi_size = 0; } /* Free array of receive queues */ if (sc->vmx_rxq != NULL) { free(sc->vmx_rxq, M_DEVBUF); sc->vmx_rxq = NULL; } /* Free array of transmit queues */ if (sc->vmx_txq != NULL) { free(sc->vmx_txq, M_DEVBUF); sc->vmx_txq = NULL; } } static int vmxnet3_alloc_shared_data(struct vmxnet3_softc *sc) { device_t dev; size_t size; int error; dev = sc->vmx_dev; /* Top level state structure shared with the device */ size = sizeof(struct vmxnet3_driver_shared); error = iflib_dma_alloc_align(sc->vmx_ctx, size, 1, &sc->vmx_ds_dma, 0); if (error) { device_printf(dev, "cannot alloc shared memory\n"); return (error); } sc->vmx_ds = (struct vmxnet3_driver_shared *) sc->vmx_ds_dma.idi_vaddr; /* RSS table state shared with the device */ if (sc->vmx_flags & VMXNET3_FLAG_RSS) { size = sizeof(struct vmxnet3_rss_shared); error = iflib_dma_alloc_align(sc->vmx_ctx, size, 128, &sc->vmx_rss_dma, 0); if (error) { device_printf(dev, "cannot alloc rss shared memory\n"); return (error); } sc->vmx_rss = (struct vmxnet3_rss_shared *) sc->vmx_rss_dma.idi_vaddr; } return (0); } static void vmxnet3_free_shared_data(struct vmxnet3_softc *sc) { /* Free RSS table state shared with the device */ if (sc->vmx_rss != NULL) { iflib_dma_free(&sc->vmx_rss_dma); sc->vmx_rss = NULL; } /* Free top level state structure shared with the device */ if (sc->vmx_ds != NULL) { iflib_dma_free(&sc->vmx_ds_dma); sc->vmx_ds = NULL; } } static int vmxnet3_alloc_mcast_table(struct vmxnet3_softc *sc) { int error; /* Multicast table state shared with the device */ error = iflib_dma_alloc_align(sc->vmx_ctx, VMXNET3_MULTICAST_MAX * ETHER_ADDR_LEN, 32, &sc->vmx_mcast_dma, 0); if (error) device_printf(sc->vmx_dev, "unable to alloc multicast table\n"); else sc->vmx_mcast = sc->vmx_mcast_dma.idi_vaddr; return (error); } static void vmxnet3_free_mcast_table(struct vmxnet3_softc *sc) { /* Free multicast table state shared with the device */ if (sc->vmx_mcast != NULL) { iflib_dma_free(&sc->vmx_mcast_dma); sc->vmx_mcast = NULL; } } static void vmxnet3_init_shared_data(struct vmxnet3_softc *sc) { struct vmxnet3_driver_shared *ds; if_shared_ctx_t sctx; if_softc_ctx_t scctx; struct vmxnet3_txqueue *txq; struct vmxnet3_txq_shared *txs; struct vmxnet3_rxqueue *rxq; struct vmxnet3_rxq_shared *rxs; int i; ds = sc->vmx_ds; sctx = sc->vmx_sctx; scctx = sc->vmx_scctx; /* * Initialize fields of the shared data that remains the same across * reinits. Note the shared data is zero'd when allocated. */ ds->magic = VMXNET3_REV1_MAGIC; /* DriverInfo */ ds->version = VMXNET3_DRIVER_VERSION; ds->guest = VMXNET3_GOS_FREEBSD | #ifdef __LP64__ VMXNET3_GOS_64BIT; #else VMXNET3_GOS_32BIT; #endif ds->vmxnet3_revision = 1; ds->upt_version = 1; /* Misc. conf */ ds->driver_data = vtophys(sc); ds->driver_data_len = sizeof(struct vmxnet3_softc); ds->queue_shared = sc->vmx_qs_dma.idi_paddr; ds->queue_shared_len = sc->vmx_qs_dma.idi_size; ds->nrxsg_max = IFLIB_MAX_RX_SEGS; /* RSS conf */ if (sc->vmx_flags & VMXNET3_FLAG_RSS) { ds->rss.version = 1; ds->rss.paddr = sc->vmx_rss_dma.idi_paddr; ds->rss.len = sc->vmx_rss_dma.idi_size; } /* Interrupt control. */ ds->automask = sc->vmx_intr_mask_mode == VMXNET3_IMM_AUTO; /* * Total number of interrupt indexes we are using in the shared * config data, even though we don't actually allocate interrupt * resources for the tx queues. Some versions of the device will * fail to initialize successfully if interrupt indexes are used in * the shared config that exceed the number of interrupts configured * here. */ ds->nintr = (scctx->isc_vectors == 1) ? 2 : (scctx->isc_nrxqsets + scctx->isc_ntxqsets + 1); ds->evintr = sc->vmx_event_intr_idx; ds->ictrl = VMXNET3_ICTRL_DISABLE_ALL; for (i = 0; i < ds->nintr; i++) ds->modlevel[i] = UPT1_IMOD_ADAPTIVE; /* Receive filter. */ ds->mcast_table = sc->vmx_mcast_dma.idi_paddr; ds->mcast_tablelen = sc->vmx_mcast_dma.idi_size; /* Tx queues */ for (i = 0; i < scctx->isc_ntxqsets; i++) { txq = &sc->vmx_txq[i]; txs = txq->vxtxq_ts; txs->cmd_ring = txq->vxtxq_cmd_ring.vxtxr_paddr; txs->cmd_ring_len = txq->vxtxq_cmd_ring.vxtxr_ndesc; txs->comp_ring = txq->vxtxq_comp_ring.vxcr_paddr; txs->comp_ring_len = txq->vxtxq_comp_ring.vxcr_ndesc; txs->driver_data = vtophys(txq); txs->driver_data_len = sizeof(struct vmxnet3_txqueue); } /* Rx queues */ for (i = 0; i < scctx->isc_nrxqsets; i++) { rxq = &sc->vmx_rxq[i]; rxs = rxq->vxrxq_rs; rxs->cmd_ring[0] = rxq->vxrxq_cmd_ring[0].vxrxr_paddr; rxs->cmd_ring_len[0] = rxq->vxrxq_cmd_ring[0].vxrxr_ndesc; rxs->cmd_ring[1] = rxq->vxrxq_cmd_ring[1].vxrxr_paddr; rxs->cmd_ring_len[1] = rxq->vxrxq_cmd_ring[1].vxrxr_ndesc; rxs->comp_ring = rxq->vxrxq_comp_ring.vxcr_paddr; rxs->comp_ring_len = rxq->vxrxq_comp_ring.vxcr_ndesc; rxs->driver_data = vtophys(rxq); rxs->driver_data_len = sizeof(struct vmxnet3_rxqueue); } } static void vmxnet3_reinit_rss_shared_data(struct vmxnet3_softc *sc) { /* * Use the same key as the Linux driver until FreeBSD can do * RSS (presumably Toeplitz) in software. */ static const uint8_t rss_key[UPT1_RSS_MAX_KEY_SIZE] = { 0x3b, 0x56, 0xd1, 0x56, 0x13, 0x4a, 0xe7, 0xac, 0xe8, 0x79, 0x09, 0x75, 0xe8, 0x65, 0x79, 0x28, 0x35, 0x12, 0xb9, 0x56, 0x7c, 0x76, 0x4b, 0x70, 0xd8, 0x56, 0xa3, 0x18, 0x9b, 0x0a, 0xee, 0xf3, 0x96, 0xa6, 0x9f, 0x8f, 0x9e, 0x8c, 0x90, 0xc9, }; struct vmxnet3_driver_shared *ds; if_softc_ctx_t scctx; struct vmxnet3_rss_shared *rss; #ifdef RSS uint8_t rss_algo; #endif int i; ds = sc->vmx_ds; scctx = sc->vmx_scctx; rss = sc->vmx_rss; rss->hash_type = UPT1_RSS_HASH_TYPE_IPV4 | UPT1_RSS_HASH_TYPE_TCP_IPV4 | UPT1_RSS_HASH_TYPE_IPV6 | UPT1_RSS_HASH_TYPE_TCP_IPV6; rss->hash_func = UPT1_RSS_HASH_FUNC_TOEPLITZ; rss->hash_key_size = UPT1_RSS_MAX_KEY_SIZE; rss->ind_table_size = UPT1_RSS_MAX_IND_TABLE_SIZE; #ifdef RSS /* * If the software RSS is configured to anything else other than * Toeplitz, then just do Toeplitz in "hardware" for the sake of * the packet distribution, but report the hash as opaque to * disengage from the software RSS. */ rss_algo = rss_gethashalgo(); if (rss_algo == RSS_HASH_TOEPLITZ) { rss_getkey(rss->hash_key); for (i = 0; i < UPT1_RSS_MAX_IND_TABLE_SIZE; i++) { rss->ind_table[i] = rss_get_indirection_to_bucket(i) % scctx->isc_nrxqsets; } sc->vmx_flags |= VMXNET3_FLAG_SOFT_RSS; } else #endif { memcpy(rss->hash_key, rss_key, UPT1_RSS_MAX_KEY_SIZE); for (i = 0; i < UPT1_RSS_MAX_IND_TABLE_SIZE; i++) rss->ind_table[i] = i % scctx->isc_nrxqsets; sc->vmx_flags &= ~VMXNET3_FLAG_SOFT_RSS; } } static void vmxnet3_reinit_shared_data(struct vmxnet3_softc *sc) { struct ifnet *ifp; struct vmxnet3_driver_shared *ds; if_softc_ctx_t scctx; ifp = sc->vmx_ifp; ds = sc->vmx_ds; scctx = sc->vmx_scctx; ds->mtu = ifp->if_mtu; ds->ntxqueue = scctx->isc_ntxqsets; ds->nrxqueue = scctx->isc_nrxqsets; ds->upt_features = 0; if (ifp->if_capenable & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6)) ds->upt_features |= UPT1_F_CSUM; if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) ds->upt_features |= UPT1_F_VLAN; if (ifp->if_capenable & IFCAP_LRO) ds->upt_features |= UPT1_F_LRO; if (sc->vmx_flags & VMXNET3_FLAG_RSS) { ds->upt_features |= UPT1_F_RSS; vmxnet3_reinit_rss_shared_data(sc); } vmxnet3_write_bar1(sc, VMXNET3_BAR1_DSL, sc->vmx_ds_dma.idi_paddr); vmxnet3_write_bar1(sc, VMXNET3_BAR1_DSH, (uint64_t) sc->vmx_ds_dma.idi_paddr >> 32); } static int vmxnet3_alloc_data(struct vmxnet3_softc *sc) { int error; error = vmxnet3_alloc_shared_data(sc); if (error) return (error); error = vmxnet3_alloc_mcast_table(sc); if (error) return (error); vmxnet3_init_shared_data(sc); return (0); } static void vmxnet3_free_data(struct vmxnet3_softc *sc) { vmxnet3_free_mcast_table(sc); vmxnet3_free_shared_data(sc); } static void vmxnet3_evintr(struct vmxnet3_softc *sc) { device_t dev; struct vmxnet3_txq_shared *ts; struct vmxnet3_rxq_shared *rs; uint32_t event; dev = sc->vmx_dev; /* Clear events. */ event = sc->vmx_ds->event; vmxnet3_write_bar1(sc, VMXNET3_BAR1_EVENT, event); if (event & VMXNET3_EVENT_LINK) vmxnet3_link_status(sc); if (event & (VMXNET3_EVENT_TQERROR | VMXNET3_EVENT_RQERROR)) { vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_STATUS); ts = sc->vmx_txq[0].vxtxq_ts; if (ts->stopped != 0) device_printf(dev, "Tx queue error %#x\n", ts->error); rs = sc->vmx_rxq[0].vxrxq_rs; if (rs->stopped != 0) device_printf(dev, "Rx queue error %#x\n", rs->error); /* XXX - rely on liflib watchdog to reset us? */ device_printf(dev, "Rx/Tx queue error event ... " "waiting for iflib watchdog reset\n"); } if (event & VMXNET3_EVENT_DIC) device_printf(dev, "device implementation change event\n"); if (event & VMXNET3_EVENT_DEBUG) device_printf(dev, "debug event\n"); } static int vmxnet3_isc_txd_encap(void *vsc, if_pkt_info_t pi) { struct vmxnet3_softc *sc; struct vmxnet3_txqueue *txq; struct vmxnet3_txring *txr; struct vmxnet3_txdesc *txd, *sop; bus_dma_segment_t *segs; int nsegs; int pidx; int hdrlen; int i; int gen; sc = vsc; txq = &sc->vmx_txq[pi->ipi_qsidx]; txr = &txq->vxtxq_cmd_ring; segs = pi->ipi_segs; nsegs = pi->ipi_nsegs; pidx = pi->ipi_pidx; KASSERT(nsegs <= VMXNET3_TX_MAXSEGS, ("%s: packet with too many segments %d", __func__, nsegs)); sop = &txr->vxtxr_txd[pidx]; gen = txr->vxtxr_gen ^ 1; /* Owned by cpu (yet) */ for (i = 0; i < nsegs; i++) { txd = &txr->vxtxr_txd[pidx]; txd->addr = segs[i].ds_addr; txd->len = segs[i].ds_len; txd->gen = gen; txd->dtype = 0; txd->offload_mode = VMXNET3_OM_NONE; txd->offload_pos = 0; txd->hlen = 0; txd->eop = 0; txd->compreq = 0; txd->vtag_mode = 0; txd->vtag = 0; if (++pidx == txr->vxtxr_ndesc) { pidx = 0; txr->vxtxr_gen ^= 1; } gen = txr->vxtxr_gen; } txd->eop = 1; txd->compreq = !!(pi->ipi_flags & IPI_TX_INTR); pi->ipi_new_pidx = pidx; /* * VLAN */ if (pi->ipi_mflags & M_VLANTAG) { sop->vtag_mode = 1; sop->vtag = pi->ipi_vtag; } /* * TSO and checksum offloads */ hdrlen = pi->ipi_ehdrlen + pi->ipi_ip_hlen; if (pi->ipi_csum_flags & CSUM_TSO) { sop->offload_mode = VMXNET3_OM_TSO; sop->hlen = hdrlen + pi->ipi_tcp_hlen; sop->offload_pos = pi->ipi_tso_segsz; } else if (pi->ipi_csum_flags & (VMXNET3_CSUM_OFFLOAD | VMXNET3_CSUM_OFFLOAD_IPV6)) { sop->offload_mode = VMXNET3_OM_CSUM; sop->hlen = hdrlen; sop->offload_pos = hdrlen + ((pi->ipi_ipproto == IPPROTO_TCP) ? offsetof(struct tcphdr, th_sum) : offsetof(struct udphdr, uh_sum)); } /* Finally, change the ownership. */ vmxnet3_barrier(sc, VMXNET3_BARRIER_WR); sop->gen ^= 1; return (0); } static void vmxnet3_isc_txd_flush(void *vsc, uint16_t txqid, qidx_t pidx) { struct vmxnet3_softc *sc; struct vmxnet3_txqueue *txq; sc = vsc; txq = &sc->vmx_txq[txqid]; /* * pidx is what we last set ipi_new_pidx to in * vmxnet3_isc_txd_encap() */ /* * Avoid expensive register updates if the flush request is * redundant. */ if (txq->vxtxq_last_flush == pidx) return; txq->vxtxq_last_flush = pidx; vmxnet3_write_bar0(sc, VMXNET3_BAR0_TXH(txq->vxtxq_id), pidx); } static int vmxnet3_isc_txd_credits_update(void *vsc, uint16_t txqid, bool clear) { struct vmxnet3_softc *sc; struct vmxnet3_txqueue *txq; struct vmxnet3_comp_ring *txc; struct vmxnet3_txcompdesc *txcd; struct vmxnet3_txring *txr; int processed; sc = vsc; txq = &sc->vmx_txq[txqid]; txc = &txq->vxtxq_comp_ring; txr = &txq->vxtxq_cmd_ring; /* * If clear is true, we need to report the number of TX command ring * descriptors that have been processed by the device. If clear is * false, we just need to report whether or not at least one TX * command ring descriptor has been processed by the device. */ processed = 0; for (;;) { txcd = &txc->vxcr_u.txcd[txc->vxcr_next]; if (txcd->gen != txc->vxcr_gen) break; else if (!clear) return (1); vmxnet3_barrier(sc, VMXNET3_BARRIER_RD); if (++txc->vxcr_next == txc->vxcr_ndesc) { txc->vxcr_next = 0; txc->vxcr_gen ^= 1; } if (txcd->eop_idx < txr->vxtxr_next) processed += txr->vxtxr_ndesc - (txr->vxtxr_next - txcd->eop_idx) + 1; else processed += txcd->eop_idx - txr->vxtxr_next + 1; txr->vxtxr_next = (txcd->eop_idx + 1) % txr->vxtxr_ndesc; } return (processed); } static int vmxnet3_isc_rxd_available(void *vsc, uint16_t rxqid, qidx_t idx, qidx_t budget) { struct vmxnet3_softc *sc; struct vmxnet3_rxqueue *rxq; struct vmxnet3_comp_ring *rxc; struct vmxnet3_rxcompdesc *rxcd; int avail; int completed_gen; #ifdef INVARIANTS int expect_sop = 1; #endif sc = vsc; rxq = &sc->vmx_rxq[rxqid]; rxc = &rxq->vxrxq_comp_ring; avail = 0; completed_gen = rxc->vxcr_gen; for (;;) { rxcd = &rxc->vxcr_u.rxcd[idx]; if (rxcd->gen != completed_gen) break; vmxnet3_barrier(sc, VMXNET3_BARRIER_RD); #ifdef INVARIANTS if (expect_sop) KASSERT(rxcd->sop, ("%s: expected sop", __func__)); else KASSERT(!rxcd->sop, ("%s: unexpected sop", __func__)); expect_sop = rxcd->eop; #endif if (rxcd->eop && (rxcd->len != 0)) avail++; if (avail > budget) break; if (++idx == rxc->vxcr_ndesc) { idx = 0; completed_gen ^= 1; } } return (avail); } static int vmxnet3_isc_rxd_pkt_get(void *vsc, if_rxd_info_t ri) { struct vmxnet3_softc *sc; if_softc_ctx_t scctx; struct vmxnet3_rxqueue *rxq; struct vmxnet3_comp_ring *rxc; struct vmxnet3_rxcompdesc *rxcd; struct vmxnet3_rxring *rxr; struct vmxnet3_rxdesc *rxd; if_rxd_frag_t frag; int cqidx; uint16_t total_len; uint8_t nfrags; uint8_t i; uint8_t flid; sc = vsc; scctx = sc->vmx_scctx; rxq = &sc->vmx_rxq[ri->iri_qsidx]; rxc = &rxq->vxrxq_comp_ring; /* * Get a single packet starting at the given index in the completion * queue. That we have been called indicates that * vmxnet3_isc_rxd_available() has already verified that either * there is a complete packet available starting at the given index, * or there are one or more zero length packets starting at the * given index followed by a complete packet, so no verification of * ownership of the descriptors (and no associated read barrier) is * required here. */ cqidx = ri->iri_cidx; rxcd = &rxc->vxcr_u.rxcd[cqidx]; while (rxcd->len == 0) { KASSERT(rxcd->sop && rxcd->eop, ("%s: zero-length packet without both sop and eop set", __func__)); rxc->vxcr_zero_length++; if (++cqidx == rxc->vxcr_ndesc) { cqidx = 0; rxc->vxcr_gen ^= 1; } rxcd = &rxc->vxcr_u.rxcd[cqidx]; } KASSERT(rxcd->sop, ("%s: expected sop", __func__)); /* * RSS and flow ID. * Types other than M_HASHTYPE_NONE and M_HASHTYPE_OPAQUE_HASH should * be used only if the software RSS is enabled and it uses the same * algorithm and the hash key as the "hardware". If the software RSS * is not enabled, then it's simply pointless to use those types. * If it's enabled but with different parameters, then hash values will * not match. */ ri->iri_flowid = rxcd->rss_hash; #ifdef RSS if ((sc->vmx_flags & VMXNET3_FLAG_SOFT_RSS) != 0) { switch (rxcd->rss_type) { case VMXNET3_RCD_RSS_TYPE_NONE: ri->iri_flowid = ri->iri_qsidx; ri->iri_rsstype = M_HASHTYPE_NONE; break; case VMXNET3_RCD_RSS_TYPE_IPV4: ri->iri_rsstype = M_HASHTYPE_RSS_IPV4; break; case VMXNET3_RCD_RSS_TYPE_TCPIPV4: ri->iri_rsstype = M_HASHTYPE_RSS_TCP_IPV4; break; case VMXNET3_RCD_RSS_TYPE_IPV6: ri->iri_rsstype = M_HASHTYPE_RSS_IPV6; break; case VMXNET3_RCD_RSS_TYPE_TCPIPV6: ri->iri_rsstype = M_HASHTYPE_RSS_TCP_IPV6; break; default: ri->iri_rsstype = M_HASHTYPE_OPAQUE_HASH; break; } } else #endif { switch (rxcd->rss_type) { case VMXNET3_RCD_RSS_TYPE_NONE: ri->iri_flowid = ri->iri_qsidx; ri->iri_rsstype = M_HASHTYPE_NONE; break; default: ri->iri_rsstype = M_HASHTYPE_OPAQUE_HASH; break; } } /* * The queue numbering scheme used for rxcd->qid is as follows: * - All of the command ring 0s are numbered [0, nrxqsets - 1] * - All of the command ring 1s are numbered [nrxqsets, 2*nrxqsets - 1] * * Thus, rxcd->qid less than nrxqsets indicates command ring (and * flid) 0, and rxcd->qid greater than or equal to nrxqsets * indicates command ring (and flid) 1. */ nfrags = 0; total_len = 0; do { rxcd = &rxc->vxcr_u.rxcd[cqidx]; KASSERT(rxcd->gen == rxc->vxcr_gen, ("%s: generation mismatch", __func__)); flid = (rxcd->qid >= scctx->isc_nrxqsets) ? 1 : 0; rxr = &rxq->vxrxq_cmd_ring[flid]; rxd = &rxr->vxrxr_rxd[rxcd->rxd_idx]; frag = &ri->iri_frags[nfrags]; frag->irf_flid = flid; frag->irf_idx = rxcd->rxd_idx; frag->irf_len = rxcd->len; total_len += rxcd->len; nfrags++; if (++cqidx == rxc->vxcr_ndesc) { cqidx = 0; rxc->vxcr_gen ^= 1; } } while (!rxcd->eop); ri->iri_cidx = cqidx; ri->iri_nfrags = nfrags; ri->iri_len = total_len; /* * If there's an error, the last descriptor in the packet will * have the error indicator set. In this case, set all * fragment lengths to zero. This will cause iflib to discard * the packet, but process all associated descriptors through * the refill mechanism. */ if (__predict_false(rxcd->error)) { rxc->vxcr_pkt_errors++; for (i = 0; i < nfrags; i++) { frag = &ri->iri_frags[i]; frag->irf_len = 0; } } else { /* Checksum offload information is in the last descriptor. */ if (!rxcd->no_csum) { uint32_t csum_flags = 0; if (rxcd->ipv4) { csum_flags |= CSUM_IP_CHECKED; if (rxcd->ipcsum_ok) csum_flags |= CSUM_IP_VALID; } if (!rxcd->fragment && (rxcd->tcp || rxcd->udp)) { csum_flags |= CSUM_L4_CALC; if (rxcd->csum_ok) { csum_flags |= CSUM_L4_VALID; ri->iri_csum_data = 0xffff; } } ri->iri_csum_flags = csum_flags; } /* VLAN information is in the last descriptor. */ if (rxcd->vlan) { ri->iri_flags |= M_VLANTAG; ri->iri_vtag = rxcd->vtag; } } return (0); } static void vmxnet3_isc_rxd_refill(void *vsc, if_rxd_update_t iru) { struct vmxnet3_softc *sc; struct vmxnet3_rxqueue *rxq; struct vmxnet3_rxring *rxr; struct vmxnet3_rxdesc *rxd; uint64_t *paddrs; int count; int len; int idx; int i; uint8_t flid; uint8_t btype; count = iru->iru_count; len = iru->iru_buf_size; flid = iru->iru_flidx; paddrs = iru->iru_paddrs; sc = vsc; rxq = &sc->vmx_rxq[iru->iru_qsidx]; rxr = &rxq->vxrxq_cmd_ring[flid]; rxd = rxr->vxrxr_rxd; /* * Command ring 0 is filled with BTYPE_HEAD descriptors, and * command ring 1 is filled with BTYPE_BODY descriptors. */ btype = (flid == 0) ? VMXNET3_BTYPE_HEAD : VMXNET3_BTYPE_BODY; /* * The refill entries from iflib will advance monotonically, * but the refilled descriptors may not be contiguous due to * earlier skipping of descriptors by the device. The refill * entries from iflib need an entire state update, while the * descriptors previously skipped by the device only need to * have their generation numbers updated. */ idx = rxr->vxrxr_refill_start; i = 0; do { if (idx == iru->iru_idxs[i]) { rxd[idx].addr = paddrs[i]; rxd[idx].len = len; rxd[idx].btype = btype; i++; } else rxr->vxrxr_desc_skips++; rxd[idx].gen = rxr->vxrxr_gen; if (++idx == rxr->vxrxr_ndesc) { idx = 0; rxr->vxrxr_gen ^= 1; } } while (i != count); rxr->vxrxr_refill_start = idx; } static void vmxnet3_isc_rxd_flush(void *vsc, uint16_t rxqid, uint8_t flid, qidx_t pidx) { struct vmxnet3_softc *sc; struct vmxnet3_rxqueue *rxq; struct vmxnet3_rxring *rxr; bus_size_t r; sc = vsc; rxq = &sc->vmx_rxq[rxqid]; rxr = &rxq->vxrxq_cmd_ring[flid]; if (flid == 0) r = VMXNET3_BAR0_RXH1(rxqid); else r = VMXNET3_BAR0_RXH2(rxqid); vmxnet3_write_bar0(sc, r, pidx); } static int vmxnet3_legacy_intr(void *xsc) { struct vmxnet3_softc *sc; if_softc_ctx_t scctx; if_ctx_t ctx; sc = xsc; scctx = sc->vmx_scctx; ctx = sc->vmx_ctx; /* * When there is only a single interrupt configured, this routine * runs in fast interrupt context, following which the rxq 0 task * will be enqueued. */ if (scctx->isc_intr == IFLIB_INTR_LEGACY) { if (vmxnet3_read_bar1(sc, VMXNET3_BAR1_INTR) == 0) return (FILTER_HANDLED); } if (sc->vmx_intr_mask_mode == VMXNET3_IMM_ACTIVE) vmxnet3_intr_disable_all(ctx); if (sc->vmx_ds->event != 0) iflib_admin_intr_deferred(ctx); /* * XXX - When there is both rxq and event activity, do we care * whether the rxq 0 task or the admin task re-enables the interrupt * first? */ return (FILTER_SCHEDULE_THREAD); } static int vmxnet3_rxq_intr(void *vrxq) { struct vmxnet3_softc *sc; struct vmxnet3_rxqueue *rxq; rxq = vrxq; sc = rxq->vxrxq_sc; if (sc->vmx_intr_mask_mode == VMXNET3_IMM_ACTIVE) vmxnet3_disable_intr(sc, rxq->vxrxq_intr_idx); return (FILTER_SCHEDULE_THREAD); } static int vmxnet3_event_intr(void *vsc) { struct vmxnet3_softc *sc; sc = vsc; if (sc->vmx_intr_mask_mode == VMXNET3_IMM_ACTIVE) vmxnet3_disable_intr(sc, sc->vmx_event_intr_idx); /* * The work will be done via vmxnet3_update_admin_status(), and the * interrupt will be re-enabled in vmxnet3_link_intr_enable(). * * The interrupt will be re-enabled by vmxnet3_link_intr_enable(). */ return (FILTER_SCHEDULE_THREAD); } static void vmxnet3_stop(if_ctx_t ctx) { struct vmxnet3_softc *sc; sc = iflib_get_softc(ctx); sc->vmx_link_active = 0; vmxnet3_write_cmd(sc, VMXNET3_CMD_DISABLE); vmxnet3_write_cmd(sc, VMXNET3_CMD_RESET); } static void vmxnet3_txinit(struct vmxnet3_softc *sc, struct vmxnet3_txqueue *txq) { struct vmxnet3_txring *txr; struct vmxnet3_comp_ring *txc; txq->vxtxq_last_flush = -1; txr = &txq->vxtxq_cmd_ring; txr->vxtxr_next = 0; txr->vxtxr_gen = VMXNET3_INIT_GEN; /* * iflib has zeroed out the descriptor array during the prior attach * or stop */ txc = &txq->vxtxq_comp_ring; txc->vxcr_next = 0; txc->vxcr_gen = VMXNET3_INIT_GEN; /* * iflib has zeroed out the descriptor array during the prior attach * or stop */ } static void vmxnet3_rxinit(struct vmxnet3_softc *sc, struct vmxnet3_rxqueue *rxq) { struct vmxnet3_rxring *rxr; struct vmxnet3_comp_ring *rxc; int i; /* * The descriptors will be populated with buffers during a * subsequent invocation of vmxnet3_isc_rxd_refill() */ for (i = 0; i < sc->vmx_sctx->isc_nrxqs - 1; i++) { rxr = &rxq->vxrxq_cmd_ring[i]; rxr->vxrxr_gen = VMXNET3_INIT_GEN; rxr->vxrxr_desc_skips = 0; rxr->vxrxr_refill_start = 0; /* * iflib has zeroed out the descriptor array during the * prior attach or stop */ } for (/**/; i < VMXNET3_RXRINGS_PERQ; i++) { rxr = &rxq->vxrxq_cmd_ring[i]; rxr->vxrxr_gen = 0; rxr->vxrxr_desc_skips = 0; rxr->vxrxr_refill_start = 0; bzero(rxr->vxrxr_rxd, rxr->vxrxr_ndesc * sizeof(struct vmxnet3_rxdesc)); } rxc = &rxq->vxrxq_comp_ring; rxc->vxcr_next = 0; rxc->vxcr_gen = VMXNET3_INIT_GEN; rxc->vxcr_zero_length = 0; rxc->vxcr_pkt_errors = 0; /* * iflib has zeroed out the descriptor array during the prior attach * or stop */ } static void vmxnet3_reinit_queues(struct vmxnet3_softc *sc) { if_softc_ctx_t scctx; int q; scctx = sc->vmx_scctx; for (q = 0; q < scctx->isc_ntxqsets; q++) vmxnet3_txinit(sc, &sc->vmx_txq[q]); for (q = 0; q < scctx->isc_nrxqsets; q++) vmxnet3_rxinit(sc, &sc->vmx_rxq[q]); } static int vmxnet3_enable_device(struct vmxnet3_softc *sc) { if_softc_ctx_t scctx; int q; scctx = sc->vmx_scctx; if (vmxnet3_read_cmd(sc, VMXNET3_CMD_ENABLE) != 0) { device_printf(sc->vmx_dev, "device enable command failed!\n"); return (1); } /* Reset the Rx queue heads. */ for (q = 0; q < scctx->isc_nrxqsets; q++) { vmxnet3_write_bar0(sc, VMXNET3_BAR0_RXH1(q), 0); vmxnet3_write_bar0(sc, VMXNET3_BAR0_RXH2(q), 0); } return (0); } static void vmxnet3_reinit_rxfilters(struct vmxnet3_softc *sc) { struct ifnet *ifp; ifp = sc->vmx_ifp; vmxnet3_set_rxfilter(sc, if_getflags(ifp)); if (ifp->if_capenable & IFCAP_VLAN_HWFILTER) bcopy(sc->vmx_vlan_filter, sc->vmx_ds->vlan_filter, sizeof(sc->vmx_ds->vlan_filter)); else bzero(sc->vmx_ds->vlan_filter, sizeof(sc->vmx_ds->vlan_filter)); vmxnet3_write_cmd(sc, VMXNET3_CMD_VLAN_FILTER); } static void vmxnet3_init(if_ctx_t ctx) { struct vmxnet3_softc *sc; sc = iflib_get_softc(ctx); /* Use the current MAC address. */ bcopy(IF_LLADDR(sc->vmx_ifp), sc->vmx_lladdr, ETHER_ADDR_LEN); vmxnet3_set_lladdr(sc); vmxnet3_reinit_shared_data(sc); vmxnet3_reinit_queues(sc); vmxnet3_enable_device(sc); vmxnet3_reinit_rxfilters(sc); vmxnet3_link_status(sc); } static void vmxnet3_multi_set(if_ctx_t ctx) { vmxnet3_set_rxfilter(iflib_get_softc(ctx), if_getflags(iflib_get_ifp(ctx))); } static int vmxnet3_mtu_set(if_ctx_t ctx, uint32_t mtu) { struct vmxnet3_softc *sc; if_softc_ctx_t scctx; sc = iflib_get_softc(ctx); scctx = sc->vmx_scctx; if (mtu > VMXNET3_TX_MAXSIZE - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + ETHER_CRC_LEN)) return (EINVAL); /* * Update the max frame size so that the rx mbuf size is * chosen based on the new mtu during the interface init that * will occur after this routine returns. */ scctx->isc_max_frame_size = mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + ETHER_CRC_LEN; /* RX completion queue - n/a */ scctx->isc_rxd_buf_size[0] = 0; /* * For header-type descriptors (used for first segment of * packet), let iflib determine the buffer size based on the * max frame size. */ scctx->isc_rxd_buf_size[1] = 0; /* * For body-type descriptors (used for jumbo frames and LRO), * always use page-sized buffers. */ scctx->isc_rxd_buf_size[2] = MJUMPAGESIZE; return (0); } static void vmxnet3_media_status(if_ctx_t ctx, struct ifmediareq * ifmr) { struct vmxnet3_softc *sc; sc = iflib_get_softc(ctx); ifmr->ifm_status = IFM_AVALID; ifmr->ifm_active = IFM_ETHER; if (vmxnet3_link_is_up(sc) != 0) { ifmr->ifm_status |= IFM_ACTIVE; ifmr->ifm_active |= IFM_AUTO; } else ifmr->ifm_active |= IFM_NONE; } static int vmxnet3_media_change(if_ctx_t ctx) { /* Ignore. */ return (0); } static int vmxnet3_promisc_set(if_ctx_t ctx, int flags) { vmxnet3_set_rxfilter(iflib_get_softc(ctx), flags); return (0); } static uint64_t vmxnet3_get_counter(if_ctx_t ctx, ift_counter cnt) { if_t ifp = iflib_get_ifp(ctx); if (cnt < IFCOUNTERS) return if_get_counter_default(ifp, cnt); return (0); } static void vmxnet3_update_admin_status(if_ctx_t ctx) { struct vmxnet3_softc *sc; sc = iflib_get_softc(ctx); if (sc->vmx_ds->event != 0) vmxnet3_evintr(sc); vmxnet3_refresh_host_stats(sc); } static void vmxnet3_txq_timer(if_ctx_t ctx, uint16_t qid) { /* Host stats refresh is global, so just trigger it on txq 0 */ if (qid == 0) vmxnet3_refresh_host_stats(iflib_get_softc(ctx)); } static void vmxnet3_update_vlan_filter(struct vmxnet3_softc *sc, int add, uint16_t tag) { int idx, bit; if (tag == 0 || tag > 4095) return; idx = (tag >> 5) & 0x7F; bit = tag & 0x1F; /* Update our private VLAN bitvector. */ if (add) sc->vmx_vlan_filter[idx] |= (1 << bit); else sc->vmx_vlan_filter[idx] &= ~(1 << bit); } static void vmxnet3_vlan_register(if_ctx_t ctx, uint16_t tag) { vmxnet3_update_vlan_filter(iflib_get_softc(ctx), 1, tag); } static void vmxnet3_vlan_unregister(if_ctx_t ctx, uint16_t tag) { vmxnet3_update_vlan_filter(iflib_get_softc(ctx), 0, tag); } static u_int vmxnet3_hash_maddr(void *arg, struct sockaddr_dl *sdl, u_int count) { struct vmxnet3_softc *sc = arg; if (count < VMXNET3_MULTICAST_MAX) bcopy(LLADDR(sdl), &sc->vmx_mcast[count * ETHER_ADDR_LEN], ETHER_ADDR_LEN); return (1); } static void vmxnet3_set_rxfilter(struct vmxnet3_softc *sc, int flags) { struct ifnet *ifp; struct vmxnet3_driver_shared *ds; u_int mode; ifp = sc->vmx_ifp; ds = sc->vmx_ds; mode = VMXNET3_RXMODE_UCAST | VMXNET3_RXMODE_BCAST; if (flags & IFF_PROMISC) mode |= VMXNET3_RXMODE_PROMISC; if (flags & IFF_ALLMULTI) mode |= VMXNET3_RXMODE_ALLMULTI; else { int cnt; cnt = if_foreach_llmaddr(ifp, vmxnet3_hash_maddr, sc); if (cnt >= VMXNET3_MULTICAST_MAX) { cnt = 0; mode |= VMXNET3_RXMODE_ALLMULTI; } else if (cnt > 0) mode |= VMXNET3_RXMODE_MCAST; ds->mcast_tablelen = cnt * ETHER_ADDR_LEN; } ds->rxmode = mode; vmxnet3_write_cmd(sc, VMXNET3_CMD_SET_FILTER); vmxnet3_write_cmd(sc, VMXNET3_CMD_SET_RXMODE); } static void vmxnet3_refresh_host_stats(struct vmxnet3_softc *sc) { vmxnet3_write_cmd(sc, VMXNET3_CMD_GET_STATS); } static int vmxnet3_link_is_up(struct vmxnet3_softc *sc) { uint32_t status; status = vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_LINK); return !!(status & 0x1); } static void vmxnet3_link_status(struct vmxnet3_softc *sc) { if_ctx_t ctx; uint64_t speed; int link; ctx = sc->vmx_ctx; link = vmxnet3_link_is_up(sc); speed = IF_Gbps(10); if (link != 0 && sc->vmx_link_active == 0) { sc->vmx_link_active = 1; iflib_link_state_change(ctx, LINK_STATE_UP, speed); } else if (link == 0 && sc->vmx_link_active != 0) { sc->vmx_link_active = 0; iflib_link_state_change(ctx, LINK_STATE_DOWN, speed); } } static void vmxnet3_set_lladdr(struct vmxnet3_softc *sc) { uint32_t ml, mh; ml = sc->vmx_lladdr[0]; ml |= sc->vmx_lladdr[1] << 8; ml |= sc->vmx_lladdr[2] << 16; ml |= sc->vmx_lladdr[3] << 24; vmxnet3_write_bar1(sc, VMXNET3_BAR1_MACL, ml); mh = sc->vmx_lladdr[4]; mh |= sc->vmx_lladdr[5] << 8; vmxnet3_write_bar1(sc, VMXNET3_BAR1_MACH, mh); } static void vmxnet3_get_lladdr(struct vmxnet3_softc *sc) { uint32_t ml, mh; ml = vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_MACL); mh = vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_MACH); sc->vmx_lladdr[0] = ml; sc->vmx_lladdr[1] = ml >> 8; sc->vmx_lladdr[2] = ml >> 16; sc->vmx_lladdr[3] = ml >> 24; sc->vmx_lladdr[4] = mh; sc->vmx_lladdr[5] = mh >> 8; } static void vmxnet3_setup_txq_sysctl(struct vmxnet3_txqueue *txq, struct sysctl_ctx_list *ctx, struct sysctl_oid_list *child) { struct sysctl_oid *node, *txsnode; struct sysctl_oid_list *list, *txslist; struct UPT1_TxStats *txstats; char namebuf[16]; txstats = &txq->vxtxq_ts->stats; snprintf(namebuf, sizeof(namebuf), "txq%d", txq->vxtxq_id); node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Transmit Queue"); txq->vxtxq_sysctl = list = SYSCTL_CHILDREN(node); /* * Add statistics reported by the host. These are updated by the * iflib txq timer on txq 0. */ txsnode = SYSCTL_ADD_NODE(ctx, list, OID_AUTO, "hstats", CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Host Statistics"); txslist = SYSCTL_CHILDREN(txsnode); SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "tso_packets", CTLFLAG_RD, &txstats->TSO_packets, "TSO packets"); SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "tso_bytes", CTLFLAG_RD, &txstats->TSO_bytes, "TSO bytes"); SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "ucast_packets", CTLFLAG_RD, &txstats->ucast_packets, "Unicast packets"); SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "unicast_bytes", CTLFLAG_RD, &txstats->ucast_bytes, "Unicast bytes"); SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "mcast_packets", CTLFLAG_RD, &txstats->mcast_packets, "Multicast packets"); SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "mcast_bytes", CTLFLAG_RD, &txstats->mcast_bytes, "Multicast bytes"); SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "error", CTLFLAG_RD, &txstats->error, "Errors"); SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "discard", CTLFLAG_RD, &txstats->discard, "Discards"); } static void vmxnet3_setup_rxq_sysctl(struct vmxnet3_rxqueue *rxq, struct sysctl_ctx_list *ctx, struct sysctl_oid_list *child) { struct sysctl_oid *node, *rxsnode; struct sysctl_oid_list *list, *rxslist; struct UPT1_RxStats *rxstats; char namebuf[16]; rxstats = &rxq->vxrxq_rs->stats; snprintf(namebuf, sizeof(namebuf), "rxq%d", rxq->vxrxq_id); node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Receive Queue"); rxq->vxrxq_sysctl = list = SYSCTL_CHILDREN(node); /* * Add statistics reported by the host. These are updated by the * iflib txq timer on txq 0. */ rxsnode = SYSCTL_ADD_NODE(ctx, list, OID_AUTO, "hstats", CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Host Statistics"); rxslist = SYSCTL_CHILDREN(rxsnode); SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "lro_packets", CTLFLAG_RD, &rxstats->LRO_packets, "LRO packets"); SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "lro_bytes", CTLFLAG_RD, &rxstats->LRO_bytes, "LRO bytes"); SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "ucast_packets", CTLFLAG_RD, &rxstats->ucast_packets, "Unicast packets"); SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "unicast_bytes", CTLFLAG_RD, &rxstats->ucast_bytes, "Unicast bytes"); SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "mcast_packets", CTLFLAG_RD, &rxstats->mcast_packets, "Multicast packets"); SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "mcast_bytes", CTLFLAG_RD, &rxstats->mcast_bytes, "Multicast bytes"); SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "bcast_packets", CTLFLAG_RD, &rxstats->bcast_packets, "Broadcast packets"); SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "bcast_bytes", CTLFLAG_RD, &rxstats->bcast_bytes, "Broadcast bytes"); SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "nobuffer", CTLFLAG_RD, &rxstats->nobuffer, "No buffer"); SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "error", CTLFLAG_RD, &rxstats->error, "Errors"); } static void vmxnet3_setup_debug_sysctl(struct vmxnet3_softc *sc, struct sysctl_ctx_list *ctx, struct sysctl_oid_list *child) { if_softc_ctx_t scctx; struct sysctl_oid *node; struct sysctl_oid_list *list; int i; scctx = sc->vmx_scctx; for (i = 0; i < scctx->isc_ntxqsets; i++) { struct vmxnet3_txqueue *txq = &sc->vmx_txq[i]; node = SYSCTL_ADD_NODE(ctx, txq->vxtxq_sysctl, OID_AUTO, "debug", CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, ""); list = SYSCTL_CHILDREN(node); SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd_next", CTLFLAG_RD, &txq->vxtxq_cmd_ring.vxtxr_next, 0, ""); SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd_ndesc", CTLFLAG_RD, &txq->vxtxq_cmd_ring.vxtxr_ndesc, 0, ""); SYSCTL_ADD_INT(ctx, list, OID_AUTO, "cmd_gen", CTLFLAG_RD, &txq->vxtxq_cmd_ring.vxtxr_gen, 0, ""); SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "comp_next", CTLFLAG_RD, &txq->vxtxq_comp_ring.vxcr_next, 0, ""); SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "comp_ndesc", CTLFLAG_RD, &txq->vxtxq_comp_ring.vxcr_ndesc, 0,""); SYSCTL_ADD_INT(ctx, list, OID_AUTO, "comp_gen", CTLFLAG_RD, &txq->vxtxq_comp_ring.vxcr_gen, 0, ""); } for (i = 0; i < scctx->isc_nrxqsets; i++) { struct vmxnet3_rxqueue *rxq = &sc->vmx_rxq[i]; node = SYSCTL_ADD_NODE(ctx, rxq->vxrxq_sysctl, OID_AUTO, "debug", CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, ""); list = SYSCTL_CHILDREN(node); SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd0_ndesc", CTLFLAG_RD, &rxq->vxrxq_cmd_ring[0].vxrxr_ndesc, 0, ""); SYSCTL_ADD_INT(ctx, list, OID_AUTO, "cmd0_gen", CTLFLAG_RD, &rxq->vxrxq_cmd_ring[0].vxrxr_gen, 0, ""); SYSCTL_ADD_U64(ctx, list, OID_AUTO, "cmd0_desc_skips", CTLFLAG_RD, &rxq->vxrxq_cmd_ring[0].vxrxr_desc_skips, 0, ""); SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd1_ndesc", CTLFLAG_RD, &rxq->vxrxq_cmd_ring[1].vxrxr_ndesc, 0, ""); SYSCTL_ADD_INT(ctx, list, OID_AUTO, "cmd1_gen", CTLFLAG_RD, &rxq->vxrxq_cmd_ring[1].vxrxr_gen, 0, ""); SYSCTL_ADD_U64(ctx, list, OID_AUTO, "cmd1_desc_skips", CTLFLAG_RD, &rxq->vxrxq_cmd_ring[1].vxrxr_desc_skips, 0, ""); SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "comp_ndesc", CTLFLAG_RD, &rxq->vxrxq_comp_ring.vxcr_ndesc, 0,""); SYSCTL_ADD_INT(ctx, list, OID_AUTO, "comp_gen", CTLFLAG_RD, &rxq->vxrxq_comp_ring.vxcr_gen, 0, ""); SYSCTL_ADD_U64(ctx, list, OID_AUTO, "comp_zero_length", CTLFLAG_RD, &rxq->vxrxq_comp_ring.vxcr_zero_length, 0, ""); SYSCTL_ADD_U64(ctx, list, OID_AUTO, "comp_pkt_errors", CTLFLAG_RD, &rxq->vxrxq_comp_ring.vxcr_pkt_errors, 0, ""); } } static void vmxnet3_setup_queue_sysctl(struct vmxnet3_softc *sc, struct sysctl_ctx_list *ctx, struct sysctl_oid_list *child) { if_softc_ctx_t scctx; int i; scctx = sc->vmx_scctx; for (i = 0; i < scctx->isc_ntxqsets; i++) vmxnet3_setup_txq_sysctl(&sc->vmx_txq[i], ctx, child); for (i = 0; i < scctx->isc_nrxqsets; i++) vmxnet3_setup_rxq_sysctl(&sc->vmx_rxq[i], ctx, child); vmxnet3_setup_debug_sysctl(sc, ctx, child); } static void vmxnet3_setup_sysctl(struct vmxnet3_softc *sc) { device_t dev; struct sysctl_ctx_list *ctx; struct sysctl_oid *tree; struct sysctl_oid_list *child; dev = sc->vmx_dev; ctx = device_get_sysctl_ctx(dev); tree = device_get_sysctl_tree(dev); child = SYSCTL_CHILDREN(tree); vmxnet3_setup_queue_sysctl(sc, ctx, child); } static void vmxnet3_write_bar0(struct vmxnet3_softc *sc, bus_size_t r, uint32_t v) { bus_space_write_4(sc->vmx_iot0, sc->vmx_ioh0, r, v); } static uint32_t vmxnet3_read_bar1(struct vmxnet3_softc *sc, bus_size_t r) { return (bus_space_read_4(sc->vmx_iot1, sc->vmx_ioh1, r)); } static void vmxnet3_write_bar1(struct vmxnet3_softc *sc, bus_size_t r, uint32_t v) { bus_space_write_4(sc->vmx_iot1, sc->vmx_ioh1, r, v); } static void vmxnet3_write_cmd(struct vmxnet3_softc *sc, uint32_t cmd) { vmxnet3_write_bar1(sc, VMXNET3_BAR1_CMD, cmd); } static uint32_t vmxnet3_read_cmd(struct vmxnet3_softc *sc, uint32_t cmd) { vmxnet3_write_cmd(sc, cmd); bus_space_barrier(sc->vmx_iot1, sc->vmx_ioh1, 0, 0, BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE); return (vmxnet3_read_bar1(sc, VMXNET3_BAR1_CMD)); } static void vmxnet3_enable_intr(struct vmxnet3_softc *sc, int irq) { vmxnet3_write_bar0(sc, VMXNET3_BAR0_IMASK(irq), 0); } static void vmxnet3_disable_intr(struct vmxnet3_softc *sc, int irq) { vmxnet3_write_bar0(sc, VMXNET3_BAR0_IMASK(irq), 1); } static int vmxnet3_tx_queue_intr_enable(if_ctx_t ctx, uint16_t qid) { /* Not using interrupts for TX */ return (0); } static int vmxnet3_rx_queue_intr_enable(if_ctx_t ctx, uint16_t qid) { struct vmxnet3_softc *sc; sc = iflib_get_softc(ctx); vmxnet3_enable_intr(sc, sc->vmx_rxq[qid].vxrxq_intr_idx); return (0); } static void vmxnet3_link_intr_enable(if_ctx_t ctx) { struct vmxnet3_softc *sc; sc = iflib_get_softc(ctx); vmxnet3_enable_intr(sc, sc->vmx_event_intr_idx); } static void vmxnet3_intr_enable_all(if_ctx_t ctx) { struct vmxnet3_softc *sc; if_softc_ctx_t scctx; int i; sc = iflib_get_softc(ctx); scctx = sc->vmx_scctx; sc->vmx_ds->ictrl &= ~VMXNET3_ICTRL_DISABLE_ALL; for (i = 0; i < scctx->isc_vectors; i++) vmxnet3_enable_intr(sc, i); } static void vmxnet3_intr_disable_all(if_ctx_t ctx) { struct vmxnet3_softc *sc; int i; sc = iflib_get_softc(ctx); /* * iflib may invoke this routine before vmxnet3_attach_post() has * run, which is before the top level shared data area is * initialized and the device made aware of it. */ if (sc->vmx_ds != NULL) sc->vmx_ds->ictrl |= VMXNET3_ICTRL_DISABLE_ALL; for (i = 0; i < VMXNET3_MAX_INTRS; i++) vmxnet3_disable_intr(sc, i); } /* * Since this is a purely paravirtualized device, we do not have * to worry about DMA coherency. But at times, we must make sure * both the compiler and CPU do not reorder memory operations. */ static inline void vmxnet3_barrier(struct vmxnet3_softc *sc, vmxnet3_barrier_t type) { switch (type) { case VMXNET3_BARRIER_RD: rmb(); break; case VMXNET3_BARRIER_WR: wmb(); break; case VMXNET3_BARRIER_RDWR: mb(); break; default: panic("%s: bad barrier type %d", __func__, type); } } diff --git a/sys/net/iflib.c b/sys/net/iflib.c index 5db925fef05f..cc401a5db24d 100644 --- a/sys/net/iflib.c +++ b/sys/net/iflib.c @@ -1,6893 +1,6923 @@ /*- * Copyright (c) 2014-2018, Matthew Macy * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * * 2. Neither the name of Matthew Macy nor the names of its * contributors may be used to endorse or promote products derived from * this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_inet.h" #include "opt_inet6.h" #include "opt_acpi.h" #include "opt_sched.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "ifdi_if.h" #ifdef PCI_IOV #include #endif #include /* * enable accounting of every mbuf as it comes in to and goes out of * iflib's software descriptor references */ #define MEMORY_LOGGING 0 /* * Enable mbuf vectors for compressing long mbuf chains */ /* * NB: * - Prefetching in tx cleaning should perhaps be a tunable. The distance ahead * we prefetch needs to be determined by the time spent in m_free vis a vis * the cost of a prefetch. This will of course vary based on the workload: * - NFLX's m_free path is dominated by vm-based M_EXT manipulation which * is quite expensive, thus suggesting very little prefetch. * - small packet forwarding which is just returning a single mbuf to * UMA will typically be very fast vis a vis the cost of a memory * access. */ /* * File organization: * - private structures * - iflib private utility functions * - ifnet functions * - vlan registry and other exported functions * - iflib public core functions * * */ MALLOC_DEFINE(M_IFLIB, "iflib", "ifnet library"); #define IFLIB_RXEOF_MORE (1U << 0) #define IFLIB_RXEOF_EMPTY (2U << 0) struct iflib_txq; typedef struct iflib_txq *iflib_txq_t; struct iflib_rxq; typedef struct iflib_rxq *iflib_rxq_t; struct iflib_fl; typedef struct iflib_fl *iflib_fl_t; struct iflib_ctx; static void iru_init(if_rxd_update_t iru, iflib_rxq_t rxq, uint8_t flid); static void iflib_timer(void *arg); static void iflib_tqg_detach(if_ctx_t ctx); typedef struct iflib_filter_info { driver_filter_t *ifi_filter; void *ifi_filter_arg; struct grouptask *ifi_task; void *ifi_ctx; } *iflib_filter_info_t; struct iflib_ctx { KOBJ_FIELDS; /* * Pointer to hardware driver's softc */ void *ifc_softc; device_t ifc_dev; if_t ifc_ifp; cpuset_t ifc_cpus; if_shared_ctx_t ifc_sctx; struct if_softc_ctx ifc_softc_ctx; struct sx ifc_ctx_sx; struct mtx ifc_state_mtx; iflib_txq_t ifc_txqs; iflib_rxq_t ifc_rxqs; uint32_t ifc_if_flags; uint32_t ifc_flags; uint32_t ifc_max_fl_buf_size; uint32_t ifc_rx_mbuf_sz; int ifc_link_state; int ifc_watchdog_events; struct cdev *ifc_led_dev; struct resource *ifc_msix_mem; struct if_irq ifc_legacy_irq; struct grouptask ifc_admin_task; struct grouptask ifc_vflr_task; struct iflib_filter_info ifc_filter_info; struct ifmedia ifc_media; struct ifmedia *ifc_mediap; struct sysctl_oid *ifc_sysctl_node; uint16_t ifc_sysctl_ntxqs; uint16_t ifc_sysctl_nrxqs; uint16_t ifc_sysctl_qs_eq_override; uint16_t ifc_sysctl_rx_budget; uint16_t ifc_sysctl_tx_abdicate; uint16_t ifc_sysctl_core_offset; #define CORE_OFFSET_UNSPECIFIED 0xffff uint8_t ifc_sysctl_separate_txrx; qidx_t ifc_sysctl_ntxds[8]; qidx_t ifc_sysctl_nrxds[8]; struct if_txrx ifc_txrx; #define isc_txd_encap ifc_txrx.ift_txd_encap #define isc_txd_flush ifc_txrx.ift_txd_flush #define isc_txd_credits_update ifc_txrx.ift_txd_credits_update #define isc_rxd_available ifc_txrx.ift_rxd_available #define isc_rxd_pkt_get ifc_txrx.ift_rxd_pkt_get #define isc_rxd_refill ifc_txrx.ift_rxd_refill #define isc_rxd_flush ifc_txrx.ift_rxd_flush #define isc_legacy_intr ifc_txrx.ift_legacy_intr eventhandler_tag ifc_vlan_attach_event; eventhandler_tag ifc_vlan_detach_event; struct ether_addr ifc_mac; }; void * iflib_get_softc(if_ctx_t ctx) { return (ctx->ifc_softc); } device_t iflib_get_dev(if_ctx_t ctx) { return (ctx->ifc_dev); } if_t iflib_get_ifp(if_ctx_t ctx) { return (ctx->ifc_ifp); } struct ifmedia * iflib_get_media(if_ctx_t ctx) { return (ctx->ifc_mediap); } uint32_t iflib_get_flags(if_ctx_t ctx) { return (ctx->ifc_flags); } void iflib_set_mac(if_ctx_t ctx, uint8_t mac[ETHER_ADDR_LEN]) { bcopy(mac, ctx->ifc_mac.octet, ETHER_ADDR_LEN); } if_softc_ctx_t iflib_get_softc_ctx(if_ctx_t ctx) { return (&ctx->ifc_softc_ctx); } if_shared_ctx_t iflib_get_sctx(if_ctx_t ctx) { return (ctx->ifc_sctx); } #define IP_ALIGNED(m) ((((uintptr_t)(m)->m_data) & 0x3) == 0x2) #define CACHE_PTR_INCREMENT (CACHE_LINE_SIZE/sizeof(void*)) #define CACHE_PTR_NEXT(ptr) ((void *)(((uintptr_t)(ptr)+CACHE_LINE_SIZE-1) & (CACHE_LINE_SIZE-1))) #define LINK_ACTIVE(ctx) ((ctx)->ifc_link_state == LINK_STATE_UP) #define CTX_IS_VF(ctx) ((ctx)->ifc_sctx->isc_flags & IFLIB_IS_VF) typedef struct iflib_sw_rx_desc_array { bus_dmamap_t *ifsd_map; /* bus_dma maps for packet */ struct mbuf **ifsd_m; /* pkthdr mbufs */ caddr_t *ifsd_cl; /* direct cluster pointer for rx */ bus_addr_t *ifsd_ba; /* bus addr of cluster for rx */ } iflib_rxsd_array_t; typedef struct iflib_sw_tx_desc_array { bus_dmamap_t *ifsd_map; /* bus_dma maps for packet */ bus_dmamap_t *ifsd_tso_map; /* bus_dma maps for TSO packet */ struct mbuf **ifsd_m; /* pkthdr mbufs */ } if_txsd_vec_t; /* magic number that should be high enough for any hardware */ #define IFLIB_MAX_TX_SEGS 128 #define IFLIB_RX_COPY_THRESH 128 #define IFLIB_MAX_RX_REFRESH 32 /* The minimum descriptors per second before we start coalescing */ #define IFLIB_MIN_DESC_SEC 16384 #define IFLIB_DEFAULT_TX_UPDATE_FREQ 16 #define IFLIB_QUEUE_IDLE 0 #define IFLIB_QUEUE_HUNG 1 #define IFLIB_QUEUE_WORKING 2 /* maximum number of txqs that can share an rx interrupt */ #define IFLIB_MAX_TX_SHARED_INTR 4 /* this should really scale with ring size - this is a fairly arbitrary value */ #define TX_BATCH_SIZE 32 #define IFLIB_RESTART_BUDGET 8 #define CSUM_OFFLOAD (CSUM_IP_TSO|CSUM_IP6_TSO|CSUM_IP| \ CSUM_IP_UDP|CSUM_IP_TCP|CSUM_IP_SCTP| \ CSUM_IP6_UDP|CSUM_IP6_TCP|CSUM_IP6_SCTP) struct iflib_txq { qidx_t ift_in_use; qidx_t ift_cidx; qidx_t ift_cidx_processed; qidx_t ift_pidx; uint8_t ift_gen; uint8_t ift_br_offset; uint16_t ift_npending; uint16_t ift_db_pending; uint16_t ift_rs_pending; /* implicit pad */ uint8_t ift_txd_size[8]; uint64_t ift_processed; uint64_t ift_cleaned; uint64_t ift_cleaned_prev; #if MEMORY_LOGGING uint64_t ift_enqueued; uint64_t ift_dequeued; #endif uint64_t ift_no_tx_dma_setup; uint64_t ift_no_desc_avail; uint64_t ift_mbuf_defrag_failed; uint64_t ift_mbuf_defrag; uint64_t ift_map_failed; uint64_t ift_txd_encap_efbig; uint64_t ift_pullups; uint64_t ift_last_timer_tick; struct mtx ift_mtx; struct mtx ift_db_mtx; /* constant values */ if_ctx_t ift_ctx; struct ifmp_ring *ift_br; struct grouptask ift_task; qidx_t ift_size; uint16_t ift_id; struct callout ift_timer; #ifdef DEV_NETMAP struct callout ift_netmap_timer; #endif /* DEV_NETMAP */ if_txsd_vec_t ift_sds; uint8_t ift_qstatus; uint8_t ift_closed; uint8_t ift_update_freq; struct iflib_filter_info ift_filter_info; bus_dma_tag_t ift_buf_tag; bus_dma_tag_t ift_tso_buf_tag; iflib_dma_info_t ift_ifdi; #define MTX_NAME_LEN 32 char ift_mtx_name[MTX_NAME_LEN]; bus_dma_segment_t ift_segs[IFLIB_MAX_TX_SEGS] __aligned(CACHE_LINE_SIZE); #ifdef IFLIB_DIAGNOSTICS uint64_t ift_cpu_exec_count[256]; #endif } __aligned(CACHE_LINE_SIZE); struct iflib_fl { qidx_t ifl_cidx; qidx_t ifl_pidx; qidx_t ifl_credits; uint8_t ifl_gen; uint8_t ifl_rxd_size; #if MEMORY_LOGGING uint64_t ifl_m_enqueued; uint64_t ifl_m_dequeued; uint64_t ifl_cl_enqueued; uint64_t ifl_cl_dequeued; #endif /* implicit pad */ bitstr_t *ifl_rx_bitmap; qidx_t ifl_fragidx; /* constant */ qidx_t ifl_size; uint16_t ifl_buf_size; uint16_t ifl_cltype; uma_zone_t ifl_zone; iflib_rxsd_array_t ifl_sds; iflib_rxq_t ifl_rxq; uint8_t ifl_id; bus_dma_tag_t ifl_buf_tag; iflib_dma_info_t ifl_ifdi; uint64_t ifl_bus_addrs[IFLIB_MAX_RX_REFRESH] __aligned(CACHE_LINE_SIZE); qidx_t ifl_rxd_idxs[IFLIB_MAX_RX_REFRESH]; } __aligned(CACHE_LINE_SIZE); static inline qidx_t get_inuse(int size, qidx_t cidx, qidx_t pidx, uint8_t gen) { qidx_t used; if (pidx > cidx) used = pidx - cidx; else if (pidx < cidx) used = size - cidx + pidx; else if (gen == 0 && pidx == cidx) used = 0; else if (gen == 1 && pidx == cidx) used = size; else panic("bad state"); return (used); } #define TXQ_AVAIL(txq) (txq->ift_size - get_inuse(txq->ift_size, txq->ift_cidx, txq->ift_pidx, txq->ift_gen)) #define IDXDIFF(head, tail, wrap) \ ((head) >= (tail) ? (head) - (tail) : (wrap) - (tail) + (head)) struct iflib_rxq { if_ctx_t ifr_ctx; iflib_fl_t ifr_fl; uint64_t ifr_rx_irq; struct pfil_head *pfil; /* * If there is a separate completion queue (IFLIB_HAS_RXCQ), this is * the completion queue consumer index. Otherwise it's unused. */ qidx_t ifr_cq_cidx; uint16_t ifr_id; uint8_t ifr_nfl; uint8_t ifr_ntxqirq; uint8_t ifr_txqid[IFLIB_MAX_TX_SHARED_INTR]; uint8_t ifr_fl_offset; struct lro_ctrl ifr_lc; struct grouptask ifr_task; struct callout ifr_watchdog; struct iflib_filter_info ifr_filter_info; iflib_dma_info_t ifr_ifdi; /* dynamically allocate if any drivers need a value substantially larger than this */ struct if_rxd_frag ifr_frags[IFLIB_MAX_RX_SEGS] __aligned(CACHE_LINE_SIZE); #ifdef IFLIB_DIAGNOSTICS uint64_t ifr_cpu_exec_count[256]; #endif } __aligned(CACHE_LINE_SIZE); typedef struct if_rxsd { caddr_t *ifsd_cl; iflib_fl_t ifsd_fl; } *if_rxsd_t; /* multiple of word size */ #ifdef __LP64__ #define PKT_INFO_SIZE 6 #define RXD_INFO_SIZE 5 #define PKT_TYPE uint64_t #else #define PKT_INFO_SIZE 11 #define RXD_INFO_SIZE 8 #define PKT_TYPE uint32_t #endif #define PKT_LOOP_BOUND ((PKT_INFO_SIZE/3)*3) #define RXD_LOOP_BOUND ((RXD_INFO_SIZE/4)*4) typedef struct if_pkt_info_pad { PKT_TYPE pkt_val[PKT_INFO_SIZE]; } *if_pkt_info_pad_t; typedef struct if_rxd_info_pad { PKT_TYPE rxd_val[RXD_INFO_SIZE]; } *if_rxd_info_pad_t; CTASSERT(sizeof(struct if_pkt_info_pad) == sizeof(struct if_pkt_info)); CTASSERT(sizeof(struct if_rxd_info_pad) == sizeof(struct if_rxd_info)); static inline void pkt_info_zero(if_pkt_info_t pi) { if_pkt_info_pad_t pi_pad; pi_pad = (if_pkt_info_pad_t)pi; pi_pad->pkt_val[0] = 0; pi_pad->pkt_val[1] = 0; pi_pad->pkt_val[2] = 0; pi_pad->pkt_val[3] = 0; pi_pad->pkt_val[4] = 0; pi_pad->pkt_val[5] = 0; #ifndef __LP64__ pi_pad->pkt_val[6] = 0; pi_pad->pkt_val[7] = 0; pi_pad->pkt_val[8] = 0; pi_pad->pkt_val[9] = 0; pi_pad->pkt_val[10] = 0; #endif } static device_method_t iflib_pseudo_methods[] = { DEVMETHOD(device_attach, noop_attach), DEVMETHOD(device_detach, iflib_pseudo_detach), DEVMETHOD_END }; driver_t iflib_pseudodriver = { "iflib_pseudo", iflib_pseudo_methods, sizeof(struct iflib_ctx), }; static inline void rxd_info_zero(if_rxd_info_t ri) { if_rxd_info_pad_t ri_pad; int i; ri_pad = (if_rxd_info_pad_t)ri; for (i = 0; i < RXD_LOOP_BOUND; i += 4) { ri_pad->rxd_val[i] = 0; ri_pad->rxd_val[i+1] = 0; ri_pad->rxd_val[i+2] = 0; ri_pad->rxd_val[i+3] = 0; } #ifdef __LP64__ ri_pad->rxd_val[RXD_INFO_SIZE-1] = 0; #endif } /* * Only allow a single packet to take up most 1/nth of the tx ring */ #define MAX_SINGLE_PACKET_FRACTION 12 #define IF_BAD_DMA (bus_addr_t)-1 #define CTX_ACTIVE(ctx) ((if_getdrvflags((ctx)->ifc_ifp) & IFF_DRV_RUNNING)) #define CTX_LOCK_INIT(_sc) sx_init(&(_sc)->ifc_ctx_sx, "iflib ctx lock") #define CTX_LOCK(ctx) sx_xlock(&(ctx)->ifc_ctx_sx) #define CTX_UNLOCK(ctx) sx_xunlock(&(ctx)->ifc_ctx_sx) #define CTX_LOCK_DESTROY(ctx) sx_destroy(&(ctx)->ifc_ctx_sx) #define STATE_LOCK_INIT(_sc, _name) mtx_init(&(_sc)->ifc_state_mtx, _name, "iflib state lock", MTX_DEF) #define STATE_LOCK(ctx) mtx_lock(&(ctx)->ifc_state_mtx) #define STATE_UNLOCK(ctx) mtx_unlock(&(ctx)->ifc_state_mtx) #define STATE_LOCK_DESTROY(ctx) mtx_destroy(&(ctx)->ifc_state_mtx) #define CALLOUT_LOCK(txq) mtx_lock(&txq->ift_mtx) #define CALLOUT_UNLOCK(txq) mtx_unlock(&txq->ift_mtx) void iflib_set_detach(if_ctx_t ctx) { STATE_LOCK(ctx); ctx->ifc_flags |= IFC_IN_DETACH; STATE_UNLOCK(ctx); } /* Our boot-time initialization hook */ static int iflib_module_event_handler(module_t, int, void *); static moduledata_t iflib_moduledata = { "iflib", iflib_module_event_handler, NULL }; DECLARE_MODULE(iflib, iflib_moduledata, SI_SUB_INIT_IF, SI_ORDER_ANY); MODULE_VERSION(iflib, 1); MODULE_DEPEND(iflib, pci, 1, 1, 1); MODULE_DEPEND(iflib, ether, 1, 1, 1); TASKQGROUP_DEFINE(if_io_tqg, mp_ncpus, 1); TASKQGROUP_DEFINE(if_config_tqg, 1, 1); #ifndef IFLIB_DEBUG_COUNTERS #ifdef INVARIANTS #define IFLIB_DEBUG_COUNTERS 1 #else #define IFLIB_DEBUG_COUNTERS 0 #endif /* !INVARIANTS */ #endif static SYSCTL_NODE(_net, OID_AUTO, iflib, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "iflib driver parameters"); /* * XXX need to ensure that this can't accidentally cause the head to be moved backwards */ static int iflib_min_tx_latency = 0; SYSCTL_INT(_net_iflib, OID_AUTO, min_tx_latency, CTLFLAG_RW, &iflib_min_tx_latency, 0, "minimize transmit latency at the possible expense of throughput"); static int iflib_no_tx_batch = 0; SYSCTL_INT(_net_iflib, OID_AUTO, no_tx_batch, CTLFLAG_RW, &iflib_no_tx_batch, 0, "minimize transmit latency at the possible expense of throughput"); +static int iflib_timer_default = 1000; +SYSCTL_INT(_net_iflib, OID_AUTO, timer_default, CTLFLAG_RW, + &iflib_timer_default, 0, "number of ticks between iflib_timer calls"); + #if IFLIB_DEBUG_COUNTERS static int iflib_tx_seen; static int iflib_tx_sent; static int iflib_tx_encap; static int iflib_rx_allocs; static int iflib_fl_refills; static int iflib_fl_refills_large; static int iflib_tx_frees; SYSCTL_INT(_net_iflib, OID_AUTO, tx_seen, CTLFLAG_RD, &iflib_tx_seen, 0, "# TX mbufs seen"); SYSCTL_INT(_net_iflib, OID_AUTO, tx_sent, CTLFLAG_RD, &iflib_tx_sent, 0, "# TX mbufs sent"); SYSCTL_INT(_net_iflib, OID_AUTO, tx_encap, CTLFLAG_RD, &iflib_tx_encap, 0, "# TX mbufs encapped"); SYSCTL_INT(_net_iflib, OID_AUTO, tx_frees, CTLFLAG_RD, &iflib_tx_frees, 0, "# TX frees"); SYSCTL_INT(_net_iflib, OID_AUTO, rx_allocs, CTLFLAG_RD, &iflib_rx_allocs, 0, "# RX allocations"); SYSCTL_INT(_net_iflib, OID_AUTO, fl_refills, CTLFLAG_RD, &iflib_fl_refills, 0, "# refills"); SYSCTL_INT(_net_iflib, OID_AUTO, fl_refills_large, CTLFLAG_RD, &iflib_fl_refills_large, 0, "# large refills"); static int iflib_txq_drain_flushing; static int iflib_txq_drain_oactive; static int iflib_txq_drain_notready; SYSCTL_INT(_net_iflib, OID_AUTO, txq_drain_flushing, CTLFLAG_RD, &iflib_txq_drain_flushing, 0, "# drain flushes"); SYSCTL_INT(_net_iflib, OID_AUTO, txq_drain_oactive, CTLFLAG_RD, &iflib_txq_drain_oactive, 0, "# drain oactives"); SYSCTL_INT(_net_iflib, OID_AUTO, txq_drain_notready, CTLFLAG_RD, &iflib_txq_drain_notready, 0, "# drain notready"); static int iflib_encap_load_mbuf_fail; static int iflib_encap_pad_mbuf_fail; static int iflib_encap_txq_avail_fail; static int iflib_encap_txd_encap_fail; SYSCTL_INT(_net_iflib, OID_AUTO, encap_load_mbuf_fail, CTLFLAG_RD, &iflib_encap_load_mbuf_fail, 0, "# busdma load failures"); SYSCTL_INT(_net_iflib, OID_AUTO, encap_pad_mbuf_fail, CTLFLAG_RD, &iflib_encap_pad_mbuf_fail, 0, "# runt frame pad failures"); SYSCTL_INT(_net_iflib, OID_AUTO, encap_txq_avail_fail, CTLFLAG_RD, &iflib_encap_txq_avail_fail, 0, "# txq avail failures"); SYSCTL_INT(_net_iflib, OID_AUTO, encap_txd_encap_fail, CTLFLAG_RD, &iflib_encap_txd_encap_fail, 0, "# driver encap failures"); static int iflib_task_fn_rxs; static int iflib_rx_intr_enables; static int iflib_fast_intrs; static int iflib_rx_unavail; static int iflib_rx_ctx_inactive; static int iflib_rx_if_input; static int iflib_rxd_flush; static int iflib_verbose_debug; SYSCTL_INT(_net_iflib, OID_AUTO, task_fn_rx, CTLFLAG_RD, &iflib_task_fn_rxs, 0, "# task_fn_rx calls"); SYSCTL_INT(_net_iflib, OID_AUTO, rx_intr_enables, CTLFLAG_RD, &iflib_rx_intr_enables, 0, "# RX intr enables"); SYSCTL_INT(_net_iflib, OID_AUTO, fast_intrs, CTLFLAG_RD, &iflib_fast_intrs, 0, "# fast_intr calls"); SYSCTL_INT(_net_iflib, OID_AUTO, rx_unavail, CTLFLAG_RD, &iflib_rx_unavail, 0, "# times rxeof called with no available data"); SYSCTL_INT(_net_iflib, OID_AUTO, rx_ctx_inactive, CTLFLAG_RD, &iflib_rx_ctx_inactive, 0, "# times rxeof called with inactive context"); SYSCTL_INT(_net_iflib, OID_AUTO, rx_if_input, CTLFLAG_RD, &iflib_rx_if_input, 0, "# times rxeof called if_input"); SYSCTL_INT(_net_iflib, OID_AUTO, rxd_flush, CTLFLAG_RD, &iflib_rxd_flush, 0, "# times rxd_flush called"); SYSCTL_INT(_net_iflib, OID_AUTO, verbose_debug, CTLFLAG_RW, &iflib_verbose_debug, 0, "enable verbose debugging"); #define DBG_COUNTER_INC(name) atomic_add_int(&(iflib_ ## name), 1) static void iflib_debug_reset(void) { iflib_tx_seen = iflib_tx_sent = iflib_tx_encap = iflib_rx_allocs = iflib_fl_refills = iflib_fl_refills_large = iflib_tx_frees = iflib_txq_drain_flushing = iflib_txq_drain_oactive = iflib_txq_drain_notready = iflib_encap_load_mbuf_fail = iflib_encap_pad_mbuf_fail = iflib_encap_txq_avail_fail = iflib_encap_txd_encap_fail = iflib_task_fn_rxs = iflib_rx_intr_enables = iflib_fast_intrs = iflib_rx_unavail = iflib_rx_ctx_inactive = iflib_rx_if_input = iflib_rxd_flush = 0; } #else #define DBG_COUNTER_INC(name) static void iflib_debug_reset(void) {} #endif #define IFLIB_DEBUG 0 static void iflib_tx_structures_free(if_ctx_t ctx); static void iflib_rx_structures_free(if_ctx_t ctx); static int iflib_queues_alloc(if_ctx_t ctx); static int iflib_tx_credits_update(if_ctx_t ctx, iflib_txq_t txq); static int iflib_rxd_avail(if_ctx_t ctx, iflib_rxq_t rxq, qidx_t cidx, qidx_t budget); static int iflib_qset_structures_setup(if_ctx_t ctx); static int iflib_msix_init(if_ctx_t ctx); static int iflib_legacy_setup(if_ctx_t ctx, driver_filter_t filter, void *filterarg, int *rid, const char *str); static void iflib_txq_check_drain(iflib_txq_t txq, int budget); static uint32_t iflib_txq_can_drain(struct ifmp_ring *); #ifdef ALTQ static void iflib_altq_if_start(if_t ifp); static int iflib_altq_if_transmit(if_t ifp, struct mbuf *m); #endif static int iflib_register(if_ctx_t); static void iflib_deregister(if_ctx_t); static void iflib_unregister_vlan_handlers(if_ctx_t ctx); static uint16_t iflib_get_mbuf_size_for(unsigned int size); static void iflib_init_locked(if_ctx_t ctx); static void iflib_add_device_sysctl_pre(if_ctx_t ctx); static void iflib_add_device_sysctl_post(if_ctx_t ctx); static void iflib_ifmp_purge(iflib_txq_t txq); static void _iflib_pre_assert(if_softc_ctx_t scctx); static void iflib_if_init_locked(if_ctx_t ctx); static void iflib_free_intr_mem(if_ctx_t ctx); #ifndef __NO_STRICT_ALIGNMENT static struct mbuf * iflib_fixup_rx(struct mbuf *m); #endif static SLIST_HEAD(cpu_offset_list, cpu_offset) cpu_offsets = SLIST_HEAD_INITIALIZER(cpu_offsets); struct cpu_offset { SLIST_ENTRY(cpu_offset) entries; cpuset_t set; unsigned int refcount; uint16_t offset; }; static struct mtx cpu_offset_mtx; MTX_SYSINIT(iflib_cpu_offset, &cpu_offset_mtx, "iflib_cpu_offset lock", MTX_DEF); DEBUGNET_DEFINE(iflib); static int iflib_num_rx_descs(if_ctx_t ctx) { if_softc_ctx_t scctx = &ctx->ifc_softc_ctx; if_shared_ctx_t sctx = ctx->ifc_sctx; uint16_t first_rxq = (sctx->isc_flags & IFLIB_HAS_RXCQ) ? 1 : 0; return scctx->isc_nrxd[first_rxq]; } static int iflib_num_tx_descs(if_ctx_t ctx) { if_softc_ctx_t scctx = &ctx->ifc_softc_ctx; if_shared_ctx_t sctx = ctx->ifc_sctx; uint16_t first_txq = (sctx->isc_flags & IFLIB_HAS_TXCQ) ? 1 : 0; return scctx->isc_ntxd[first_txq]; } #ifdef DEV_NETMAP #include #include #include MODULE_DEPEND(iflib, netmap, 1, 1, 1); static int netmap_fl_refill(iflib_rxq_t rxq, struct netmap_kring *kring, bool init); static void iflib_netmap_timer(void *arg); /* * device-specific sysctl variables: * * iflib_crcstrip: 0: keep CRC in rx frames (default), 1: strip it. * During regular operations the CRC is stripped, but on some * hardware reception of frames not multiple of 64 is slower, * so using crcstrip=0 helps in benchmarks. * * iflib_rx_miss, iflib_rx_miss_bufs: * count packets that might be missed due to lost interrupts. */ SYSCTL_DECL(_dev_netmap); /* * The xl driver by default strips CRCs and we do not override it. */ int iflib_crcstrip = 1; SYSCTL_INT(_dev_netmap, OID_AUTO, iflib_crcstrip, CTLFLAG_RW, &iflib_crcstrip, 1, "strip CRC on RX frames"); int iflib_rx_miss, iflib_rx_miss_bufs; SYSCTL_INT(_dev_netmap, OID_AUTO, iflib_rx_miss, CTLFLAG_RW, &iflib_rx_miss, 0, "potentially missed RX intr"); SYSCTL_INT(_dev_netmap, OID_AUTO, iflib_rx_miss_bufs, CTLFLAG_RW, &iflib_rx_miss_bufs, 0, "potentially missed RX intr bufs"); /* * Register/unregister. We are already under netmap lock. * Only called on the first register or the last unregister. */ static int iflib_netmap_register(struct netmap_adapter *na, int onoff) { if_t ifp = na->ifp; if_ctx_t ctx = ifp->if_softc; int status; CTX_LOCK(ctx); IFDI_INTR_DISABLE(ctx); /* Tell the stack that the interface is no longer active */ ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE); if (!CTX_IS_VF(ctx)) IFDI_CRCSTRIP_SET(ctx, onoff, iflib_crcstrip); iflib_stop(ctx); /* * Enable (or disable) netmap flags, and intercept (or restore) * ifp->if_transmit. This is done once the device has been stopped * to prevent race conditions. */ if (onoff) { nm_set_native_flags(na); } else { nm_clear_native_flags(na); } iflib_init_locked(ctx); IFDI_CRCSTRIP_SET(ctx, onoff, iflib_crcstrip); // XXX why twice ? status = ifp->if_drv_flags & IFF_DRV_RUNNING ? 0 : 1; if (status) nm_clear_native_flags(na); CTX_UNLOCK(ctx); return (status); } static int netmap_fl_refill(iflib_rxq_t rxq, struct netmap_kring *kring, bool init) { struct netmap_adapter *na = kring->na; u_int const lim = kring->nkr_num_slots - 1; u_int nm_i = kring->nr_hwcur; struct netmap_ring *ring = kring->ring; bus_dmamap_t *map; struct if_rxd_update iru; if_ctx_t ctx = rxq->ifr_ctx; iflib_fl_t fl = &rxq->ifr_fl[0]; u_int nic_i_first, nic_i; int i, n; #if IFLIB_DEBUG_COUNTERS int rf_count = 0; #endif /* * This function is used both at initialization and in rxsync. * At initialization we need to prepare (with isc_rxd_refill()) * all the (N) netmap buffers in the ring, in such a way to keep * fl->ifl_pidx and kring->nr_hwcur in sync (except for * kring->nkr_hwofs); at rxsync time, both indexes point to the * next buffer to be refilled. * In any case we publish (with isc_rxd_flush()) up to * (fl->ifl_pidx - 1) % N (included), to avoid the NIC tail/prod * pointer to overrun the head/cons pointer, although this is * not necessary for some NICs (e.g. vmx). */ if (__predict_false(init)) n = kring->nkr_num_slots; else { n = kring->rhead - nm_i; if (n == 0) return (0); /* Nothing to do. */ if (n < 0) n += kring->nkr_num_slots; } /* Start to refill from nr_hwcur, publishing n buffers. */ iru_init(&iru, rxq, 0 /* flid */); map = fl->ifl_sds.ifsd_map; nic_i = fl->ifl_pidx; MPASS(nic_i == netmap_idx_k2n(kring, nm_i)); DBG_COUNTER_INC(fl_refills); while (n > 0) { #if IFLIB_DEBUG_COUNTERS if (++rf_count == 9) DBG_COUNTER_INC(fl_refills_large); #endif nic_i_first = nic_i; for (i = 0; n > 0 && i < IFLIB_MAX_RX_REFRESH; n--, i++) { struct netmap_slot *slot = &ring->slot[nm_i]; void *addr = PNMB(na, slot, &fl->ifl_bus_addrs[i]); MPASS(i < IFLIB_MAX_RX_REFRESH); if (addr == NETMAP_BUF_BASE(na)) /* bad buf */ return netmap_ring_reinit(kring); fl->ifl_rxd_idxs[i] = nic_i; if (__predict_false(init)) { netmap_load_map(na, fl->ifl_buf_tag, map[nic_i], addr); } else if (slot->flags & NS_BUF_CHANGED) { /* buffer has changed, reload map */ netmap_reload_map(na, fl->ifl_buf_tag, map[nic_i], addr); } bus_dmamap_sync(fl->ifl_buf_tag, map[nic_i], BUS_DMASYNC_PREREAD); slot->flags &= ~NS_BUF_CHANGED; nm_i = nm_next(nm_i, lim); nic_i = nm_next(nic_i, lim); } iru.iru_pidx = nic_i_first; iru.iru_count = i; ctx->isc_rxd_refill(ctx->ifc_softc, &iru); } fl->ifl_pidx = nic_i; MPASS(!init || nm_i == 0); MPASS(nm_i == kring->rhead); kring->nr_hwcur = nm_i; bus_dmamap_sync(fl->ifl_ifdi->idi_tag, fl->ifl_ifdi->idi_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); ctx->isc_rxd_flush(ctx->ifc_softc, rxq->ifr_id, fl->ifl_id, nm_prev(nic_i, lim)); DBG_COUNTER_INC(rxd_flush); return (0); } #define NETMAP_TX_TIMER_US 90 /* * Reconcile kernel and user view of the transmit ring. * * All information is in the kring. * Userspace wants to send packets up to the one before kring->rhead, * kernel knows kring->nr_hwcur is the first unsent packet. * * Here we push packets out (as many as possible), and possibly * reclaim buffers from previously completed transmission. * * The caller (netmap) guarantees that there is only one instance * running at any time. Any interference with other driver * methods should be handled by the individual drivers. */ static int iflib_netmap_txsync(struct netmap_kring *kring, int flags) { struct netmap_adapter *na = kring->na; if_t ifp = na->ifp; struct netmap_ring *ring = kring->ring; u_int nm_i; /* index into the netmap kring */ u_int nic_i; /* index into the NIC ring */ u_int n; u_int const lim = kring->nkr_num_slots - 1; u_int const head = kring->rhead; struct if_pkt_info pi; /* * interrupts on every tx packet are expensive so request * them every half ring, or where NS_REPORT is set */ u_int report_frequency = kring->nkr_num_slots >> 1; /* device-specific */ if_ctx_t ctx = ifp->if_softc; iflib_txq_t txq = &ctx->ifc_txqs[kring->ring_id]; bus_dmamap_sync(txq->ift_ifdi->idi_tag, txq->ift_ifdi->idi_map, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); /* * First part: process new packets to send. * nm_i is the current index in the netmap kring, * nic_i is the corresponding index in the NIC ring. * * If we have packets to send (nm_i != head) * iterate over the netmap ring, fetch length and update * the corresponding slot in the NIC ring. Some drivers also * need to update the buffer's physical address in the NIC slot * even NS_BUF_CHANGED is not set (PNMB computes the addresses). * * The netmap_reload_map() calls is especially expensive, * even when (as in this case) the tag is 0, so do only * when the buffer has actually changed. * * If possible do not set the report/intr bit on all slots, * but only a few times per ring or when NS_REPORT is set. * * Finally, on 10G and faster drivers, it might be useful * to prefetch the next slot and txr entry. */ nm_i = kring->nr_hwcur; if (nm_i != head) { /* we have new packets to send */ pkt_info_zero(&pi); pi.ipi_segs = txq->ift_segs; pi.ipi_qsidx = kring->ring_id; nic_i = netmap_idx_k2n(kring, nm_i); __builtin_prefetch(&ring->slot[nm_i]); __builtin_prefetch(&txq->ift_sds.ifsd_m[nic_i]); __builtin_prefetch(&txq->ift_sds.ifsd_map[nic_i]); for (n = 0; nm_i != head; n++) { struct netmap_slot *slot = &ring->slot[nm_i]; u_int len = slot->len; uint64_t paddr; void *addr = PNMB(na, slot, &paddr); int flags = (slot->flags & NS_REPORT || nic_i == 0 || nic_i == report_frequency) ? IPI_TX_INTR : 0; /* device-specific */ pi.ipi_len = len; pi.ipi_segs[0].ds_addr = paddr; pi.ipi_segs[0].ds_len = len; pi.ipi_nsegs = 1; pi.ipi_ndescs = 0; pi.ipi_pidx = nic_i; pi.ipi_flags = flags; /* Fill the slot in the NIC ring. */ ctx->isc_txd_encap(ctx->ifc_softc, &pi); DBG_COUNTER_INC(tx_encap); /* prefetch for next round */ __builtin_prefetch(&ring->slot[nm_i + 1]); __builtin_prefetch(&txq->ift_sds.ifsd_m[nic_i + 1]); __builtin_prefetch(&txq->ift_sds.ifsd_map[nic_i + 1]); NM_CHECK_ADDR_LEN(na, addr, len); if (slot->flags & NS_BUF_CHANGED) { /* buffer has changed, reload map */ netmap_reload_map(na, txq->ift_buf_tag, txq->ift_sds.ifsd_map[nic_i], addr); } /* make sure changes to the buffer are synced */ bus_dmamap_sync(txq->ift_buf_tag, txq->ift_sds.ifsd_map[nic_i], BUS_DMASYNC_PREWRITE); slot->flags &= ~(NS_REPORT | NS_BUF_CHANGED); nm_i = nm_next(nm_i, lim); nic_i = nm_next(nic_i, lim); } kring->nr_hwcur = nm_i; /* synchronize the NIC ring */ bus_dmamap_sync(txq->ift_ifdi->idi_tag, txq->ift_ifdi->idi_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); /* (re)start the tx unit up to slot nic_i (excluded) */ ctx->isc_txd_flush(ctx->ifc_softc, txq->ift_id, nic_i); } /* * Second part: reclaim buffers for completed transmissions. * * If there are unclaimed buffers, attempt to reclaim them. * If we don't manage to reclaim them all, and TX IRQs are not in use, * trigger a per-tx-queue timer to try again later. */ if (kring->nr_hwtail != nm_prev(kring->nr_hwcur, lim)) { if (iflib_tx_credits_update(ctx, txq)) { /* some tx completed, increment avail */ nic_i = txq->ift_cidx_processed; kring->nr_hwtail = nm_prev(netmap_idx_n2k(kring, nic_i), lim); } } if (!(ctx->ifc_flags & IFC_NETMAP_TX_IRQ)) if (kring->nr_hwtail != nm_prev(kring->nr_hwcur, lim)) { callout_reset_sbt_on(&txq->ift_netmap_timer, NETMAP_TX_TIMER_US * SBT_1US, SBT_1US, iflib_netmap_timer, txq, txq->ift_netmap_timer.c_cpu, 0); } return (0); } /* * Reconcile kernel and user view of the receive ring. * Same as for the txsync, this routine must be efficient. * The caller guarantees a single invocations, but races against * the rest of the driver should be handled here. * * On call, kring->rhead is the first packet that userspace wants * to keep, and kring->rcur is the wakeup point. * The kernel has previously reported packets up to kring->rtail. * * If (flags & NAF_FORCE_READ) also check for incoming packets irrespective * of whether or not we received an interrupt. */ static int iflib_netmap_rxsync(struct netmap_kring *kring, int flags) { struct netmap_adapter *na = kring->na; struct netmap_ring *ring = kring->ring; if_t ifp = na->ifp; uint32_t nm_i; /* index into the netmap ring */ uint32_t nic_i; /* index into the NIC ring */ u_int n; u_int const lim = kring->nkr_num_slots - 1; int force_update = (flags & NAF_FORCE_READ) || kring->nr_kflags & NKR_PENDINTR; if_ctx_t ctx = ifp->if_softc; if_shared_ctx_t sctx = ctx->ifc_sctx; if_softc_ctx_t scctx = &ctx->ifc_softc_ctx; iflib_rxq_t rxq = &ctx->ifc_rxqs[kring->ring_id]; iflib_fl_t fl = &rxq->ifr_fl[0]; struct if_rxd_info ri; qidx_t *cidxp; /* * netmap only uses free list 0, to avoid out of order consumption * of receive buffers */ bus_dmamap_sync(fl->ifl_ifdi->idi_tag, fl->ifl_ifdi->idi_map, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); /* * First part: import newly received packets. * * nm_i is the index of the next free slot in the netmap ring, * nic_i is the index of the next received packet in the NIC ring * (or in the free list 0 if IFLIB_HAS_RXCQ is set), and they may * differ in case if_init() has been called while * in netmap mode. For the receive ring we have * * nic_i = fl->ifl_cidx; * nm_i = kring->nr_hwtail (previous) * and * nm_i == (nic_i + kring->nkr_hwofs) % ring_size * * fl->ifl_cidx is set to 0 on a ring reinit */ if (netmap_no_pendintr || force_update) { uint32_t hwtail_lim = nm_prev(kring->nr_hwcur, lim); bool have_rxcq = sctx->isc_flags & IFLIB_HAS_RXCQ; int crclen = iflib_crcstrip ? 0 : 4; int error, avail; /* * For the free list consumer index, we use the same * logic as in iflib_rxeof(). */ if (have_rxcq) cidxp = &rxq->ifr_cq_cidx; else cidxp = &fl->ifl_cidx; avail = ctx->isc_rxd_available(ctx->ifc_softc, rxq->ifr_id, *cidxp, USHRT_MAX); nic_i = fl->ifl_cidx; nm_i = netmap_idx_n2k(kring, nic_i); MPASS(nm_i == kring->nr_hwtail); for (n = 0; avail > 0 && nm_i != hwtail_lim; n++, avail--) { rxd_info_zero(&ri); ri.iri_frags = rxq->ifr_frags; ri.iri_qsidx = kring->ring_id; ri.iri_ifp = ctx->ifc_ifp; ri.iri_cidx = *cidxp; error = ctx->isc_rxd_pkt_get(ctx->ifc_softc, &ri); ring->slot[nm_i].len = error ? 0 : ri.iri_len - crclen; ring->slot[nm_i].flags = 0; if (have_rxcq) { *cidxp = ri.iri_cidx; while (*cidxp >= scctx->isc_nrxd[0]) *cidxp -= scctx->isc_nrxd[0]; } bus_dmamap_sync(fl->ifl_buf_tag, fl->ifl_sds.ifsd_map[nic_i], BUS_DMASYNC_POSTREAD); nm_i = nm_next(nm_i, lim); fl->ifl_cidx = nic_i = nm_next(nic_i, lim); } if (n) { /* update the state variables */ if (netmap_no_pendintr && !force_update) { /* diagnostics */ iflib_rx_miss ++; iflib_rx_miss_bufs += n; } kring->nr_hwtail = nm_i; } kring->nr_kflags &= ~NKR_PENDINTR; } /* * Second part: skip past packets that userspace has released. * (kring->nr_hwcur to head excluded), * and make the buffers available for reception. * As usual nm_i is the index in the netmap ring, * nic_i is the index in the NIC ring, and * nm_i == (nic_i + kring->nkr_hwofs) % ring_size */ netmap_fl_refill(rxq, kring, false); return (0); } static void iflib_netmap_intr(struct netmap_adapter *na, int onoff) { if_ctx_t ctx = na->ifp->if_softc; CTX_LOCK(ctx); if (onoff) { IFDI_INTR_ENABLE(ctx); } else { IFDI_INTR_DISABLE(ctx); } CTX_UNLOCK(ctx); } static int iflib_netmap_attach(if_ctx_t ctx) { struct netmap_adapter na; bzero(&na, sizeof(na)); na.ifp = ctx->ifc_ifp; na.na_flags = NAF_BDG_MAYSLEEP; MPASS(ctx->ifc_softc_ctx.isc_ntxqsets); MPASS(ctx->ifc_softc_ctx.isc_nrxqsets); na.num_tx_desc = iflib_num_tx_descs(ctx); na.num_rx_desc = iflib_num_rx_descs(ctx); na.nm_txsync = iflib_netmap_txsync; na.nm_rxsync = iflib_netmap_rxsync; na.nm_register = iflib_netmap_register; na.nm_intr = iflib_netmap_intr; na.num_tx_rings = ctx->ifc_softc_ctx.isc_ntxqsets; na.num_rx_rings = ctx->ifc_softc_ctx.isc_nrxqsets; return (netmap_attach(&na)); } static int iflib_netmap_txq_init(if_ctx_t ctx, iflib_txq_t txq) { struct netmap_adapter *na = NA(ctx->ifc_ifp); struct netmap_slot *slot; slot = netmap_reset(na, NR_TX, txq->ift_id, 0); if (slot == NULL) return (0); for (int i = 0; i < ctx->ifc_softc_ctx.isc_ntxd[0]; i++) { /* * In netmap mode, set the map for the packet buffer. * NOTE: Some drivers (not this one) also need to set * the physical buffer address in the NIC ring. * netmap_idx_n2k() maps a nic index, i, into the corresponding * netmap slot index, si */ int si = netmap_idx_n2k(na->tx_rings[txq->ift_id], i); netmap_load_map(na, txq->ift_buf_tag, txq->ift_sds.ifsd_map[i], NMB(na, slot + si)); } return (1); } static int iflib_netmap_rxq_init(if_ctx_t ctx, iflib_rxq_t rxq) { struct netmap_adapter *na = NA(ctx->ifc_ifp); struct netmap_kring *kring; struct netmap_slot *slot; slot = netmap_reset(na, NR_RX, rxq->ifr_id, 0); if (slot == NULL) return (0); kring = na->rx_rings[rxq->ifr_id]; netmap_fl_refill(rxq, kring, true); return (1); } static void iflib_netmap_timer(void *arg) { iflib_txq_t txq = arg; if_ctx_t ctx = txq->ift_ctx; /* * Wake up the netmap application, to give it a chance to * call txsync and reclaim more completed TX buffers. */ netmap_tx_irq(ctx->ifc_ifp, txq->ift_id); } #define iflib_netmap_detach(ifp) netmap_detach(ifp) #else #define iflib_netmap_txq_init(ctx, txq) (0) #define iflib_netmap_rxq_init(ctx, rxq) (0) #define iflib_netmap_detach(ifp) #define iflib_netmap_attach(ctx) (0) #define netmap_rx_irq(ifp, qid, budget) (0) #endif #if defined(__i386__) || defined(__amd64__) static __inline void prefetch(void *x) { __asm volatile("prefetcht0 %0" :: "m" (*(unsigned long *)x)); } static __inline void prefetch2cachelines(void *x) { __asm volatile("prefetcht0 %0" :: "m" (*(unsigned long *)x)); #if (CACHE_LINE_SIZE < 128) __asm volatile("prefetcht0 %0" :: "m" (*(((unsigned long *)x)+CACHE_LINE_SIZE/(sizeof(unsigned long))))); #endif } #else #define prefetch(x) #define prefetch2cachelines(x) #endif static void iru_init(if_rxd_update_t iru, iflib_rxq_t rxq, uint8_t flid) { iflib_fl_t fl; fl = &rxq->ifr_fl[flid]; iru->iru_paddrs = fl->ifl_bus_addrs; iru->iru_idxs = fl->ifl_rxd_idxs; iru->iru_qsidx = rxq->ifr_id; iru->iru_buf_size = fl->ifl_buf_size; iru->iru_flidx = fl->ifl_id; } static void _iflib_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int err) { if (err) return; *(bus_addr_t *) arg = segs[0].ds_addr; } int iflib_dma_alloc_align(if_ctx_t ctx, int size, int align, iflib_dma_info_t dma, int mapflags) { int err; device_t dev = ctx->ifc_dev; err = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */ align, 0, /* alignment, bounds */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ size, /* maxsize */ 1, /* nsegments */ size, /* maxsegsize */ BUS_DMA_ALLOCNOW, /* flags */ NULL, /* lockfunc */ NULL, /* lockarg */ &dma->idi_tag); if (err) { device_printf(dev, "%s: bus_dma_tag_create failed: %d\n", __func__, err); goto fail_0; } err = bus_dmamem_alloc(dma->idi_tag, (void**) &dma->idi_vaddr, BUS_DMA_NOWAIT | BUS_DMA_COHERENT | BUS_DMA_ZERO, &dma->idi_map); if (err) { device_printf(dev, "%s: bus_dmamem_alloc(%ju) failed: %d\n", __func__, (uintmax_t)size, err); goto fail_1; } dma->idi_paddr = IF_BAD_DMA; err = bus_dmamap_load(dma->idi_tag, dma->idi_map, dma->idi_vaddr, size, _iflib_dmamap_cb, &dma->idi_paddr, mapflags | BUS_DMA_NOWAIT); if (err || dma->idi_paddr == IF_BAD_DMA) { device_printf(dev, "%s: bus_dmamap_load failed: %d\n", __func__, err); goto fail_2; } dma->idi_size = size; return (0); fail_2: bus_dmamem_free(dma->idi_tag, dma->idi_vaddr, dma->idi_map); fail_1: bus_dma_tag_destroy(dma->idi_tag); fail_0: dma->idi_tag = NULL; return (err); } int iflib_dma_alloc(if_ctx_t ctx, int size, iflib_dma_info_t dma, int mapflags) { if_shared_ctx_t sctx = ctx->ifc_sctx; KASSERT(sctx->isc_q_align != 0, ("alignment value not initialized")); return (iflib_dma_alloc_align(ctx, size, sctx->isc_q_align, dma, mapflags)); } int iflib_dma_alloc_multi(if_ctx_t ctx, int *sizes, iflib_dma_info_t *dmalist, int mapflags, int count) { int i, err; iflib_dma_info_t *dmaiter; dmaiter = dmalist; for (i = 0; i < count; i++, dmaiter++) { if ((err = iflib_dma_alloc(ctx, sizes[i], *dmaiter, mapflags)) != 0) break; } if (err) iflib_dma_free_multi(dmalist, i); return (err); } void iflib_dma_free(iflib_dma_info_t dma) { if (dma->idi_tag == NULL) return; if (dma->idi_paddr != IF_BAD_DMA) { bus_dmamap_sync(dma->idi_tag, dma->idi_map, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(dma->idi_tag, dma->idi_map); dma->idi_paddr = IF_BAD_DMA; } if (dma->idi_vaddr != NULL) { bus_dmamem_free(dma->idi_tag, dma->idi_vaddr, dma->idi_map); dma->idi_vaddr = NULL; } bus_dma_tag_destroy(dma->idi_tag); dma->idi_tag = NULL; } void iflib_dma_free_multi(iflib_dma_info_t *dmalist, int count) { int i; iflib_dma_info_t *dmaiter = dmalist; for (i = 0; i < count; i++, dmaiter++) iflib_dma_free(*dmaiter); } static int iflib_fast_intr(void *arg) { iflib_filter_info_t info = arg; struct grouptask *gtask = info->ifi_task; int result; DBG_COUNTER_INC(fast_intrs); if (info->ifi_filter != NULL) { result = info->ifi_filter(info->ifi_filter_arg); if ((result & FILTER_SCHEDULE_THREAD) == 0) return (result); } GROUPTASK_ENQUEUE(gtask); return (FILTER_HANDLED); } static int iflib_fast_intr_rxtx(void *arg) { iflib_filter_info_t info = arg; struct grouptask *gtask = info->ifi_task; if_ctx_t ctx; iflib_rxq_t rxq = (iflib_rxq_t)info->ifi_ctx; iflib_txq_t txq; void *sc; int i, cidx, result; qidx_t txqid; bool intr_enable, intr_legacy; DBG_COUNTER_INC(fast_intrs); if (info->ifi_filter != NULL) { result = info->ifi_filter(info->ifi_filter_arg); if ((result & FILTER_SCHEDULE_THREAD) == 0) return (result); } ctx = rxq->ifr_ctx; sc = ctx->ifc_softc; intr_enable = false; intr_legacy = !!(ctx->ifc_flags & IFC_LEGACY); MPASS(rxq->ifr_ntxqirq); for (i = 0; i < rxq->ifr_ntxqirq; i++) { txqid = rxq->ifr_txqid[i]; txq = &ctx->ifc_txqs[txqid]; bus_dmamap_sync(txq->ift_ifdi->idi_tag, txq->ift_ifdi->idi_map, BUS_DMASYNC_POSTREAD); if (!ctx->isc_txd_credits_update(sc, txqid, false)) { if (intr_legacy) intr_enable = true; else IFDI_TX_QUEUE_INTR_ENABLE(ctx, txqid); continue; } GROUPTASK_ENQUEUE(&txq->ift_task); } if (ctx->ifc_sctx->isc_flags & IFLIB_HAS_RXCQ) cidx = rxq->ifr_cq_cidx; else cidx = rxq->ifr_fl[0].ifl_cidx; if (iflib_rxd_avail(ctx, rxq, cidx, 1)) GROUPTASK_ENQUEUE(gtask); else { if (intr_legacy) intr_enable = true; else IFDI_RX_QUEUE_INTR_ENABLE(ctx, rxq->ifr_id); DBG_COUNTER_INC(rx_intr_enables); } if (intr_enable) IFDI_INTR_ENABLE(ctx); return (FILTER_HANDLED); } static int iflib_fast_intr_ctx(void *arg) { iflib_filter_info_t info = arg; struct grouptask *gtask = info->ifi_task; int result; DBG_COUNTER_INC(fast_intrs); if (info->ifi_filter != NULL) { result = info->ifi_filter(info->ifi_filter_arg); if ((result & FILTER_SCHEDULE_THREAD) == 0) return (result); } GROUPTASK_ENQUEUE(gtask); return (FILTER_HANDLED); } static int _iflib_irq_alloc(if_ctx_t ctx, if_irq_t irq, int rid, driver_filter_t filter, driver_intr_t handler, void *arg, const char *name) { struct resource *res; void *tag = NULL; device_t dev = ctx->ifc_dev; int flags, i, rc; flags = RF_ACTIVE; if (ctx->ifc_flags & IFC_LEGACY) flags |= RF_SHAREABLE; MPASS(rid < 512); i = rid; res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &i, flags); if (res == NULL) { device_printf(dev, "failed to allocate IRQ for rid %d, name %s.\n", rid, name); return (ENOMEM); } irq->ii_res = res; KASSERT(filter == NULL || handler == NULL, ("filter and handler can't both be non-NULL")); rc = bus_setup_intr(dev, res, INTR_MPSAFE | INTR_TYPE_NET, filter, handler, arg, &tag); if (rc != 0) { device_printf(dev, "failed to setup interrupt for rid %d, name %s: %d\n", rid, name ? name : "unknown", rc); return (rc); } else if (name) bus_describe_intr(dev, res, tag, "%s", name); irq->ii_tag = tag; return (0); } /********************************************************************* * * Allocate DMA resources for TX buffers as well as memory for the TX * mbuf map. TX DMA maps (non-TSO/TSO) and TX mbuf map are kept in a * iflib_sw_tx_desc_array structure, storing all the information that * is needed to transmit a packet on the wire. This is called only * once at attach, setup is done every reset. * **********************************************************************/ static int iflib_txsd_alloc(iflib_txq_t txq) { if_ctx_t ctx = txq->ift_ctx; if_shared_ctx_t sctx = ctx->ifc_sctx; if_softc_ctx_t scctx = &ctx->ifc_softc_ctx; device_t dev = ctx->ifc_dev; bus_size_t tsomaxsize; int err, nsegments, ntsosegments; bool tso; nsegments = scctx->isc_tx_nsegments; ntsosegments = scctx->isc_tx_tso_segments_max; tsomaxsize = scctx->isc_tx_tso_size_max; if (if_getcapabilities(ctx->ifc_ifp) & IFCAP_VLAN_MTU) tsomaxsize += sizeof(struct ether_vlan_header); MPASS(scctx->isc_ntxd[0] > 0); MPASS(scctx->isc_ntxd[txq->ift_br_offset] > 0); MPASS(nsegments > 0); if (if_getcapabilities(ctx->ifc_ifp) & IFCAP_TSO) { MPASS(ntsosegments > 0); MPASS(sctx->isc_tso_maxsize >= tsomaxsize); } /* * Set up DMA tags for TX buffers. */ if ((err = bus_dma_tag_create(bus_get_dma_tag(dev), 1, 0, /* alignment, bounds */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ sctx->isc_tx_maxsize, /* maxsize */ nsegments, /* nsegments */ sctx->isc_tx_maxsegsize, /* maxsegsize */ 0, /* flags */ NULL, /* lockfunc */ NULL, /* lockfuncarg */ &txq->ift_buf_tag))) { device_printf(dev,"Unable to allocate TX DMA tag: %d\n", err); device_printf(dev,"maxsize: %ju nsegments: %d maxsegsize: %ju\n", (uintmax_t)sctx->isc_tx_maxsize, nsegments, (uintmax_t)sctx->isc_tx_maxsegsize); goto fail; } tso = (if_getcapabilities(ctx->ifc_ifp) & IFCAP_TSO) != 0; if (tso && (err = bus_dma_tag_create(bus_get_dma_tag(dev), 1, 0, /* alignment, bounds */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ tsomaxsize, /* maxsize */ ntsosegments, /* nsegments */ sctx->isc_tso_maxsegsize,/* maxsegsize */ 0, /* flags */ NULL, /* lockfunc */ NULL, /* lockfuncarg */ &txq->ift_tso_buf_tag))) { device_printf(dev, "Unable to allocate TSO TX DMA tag: %d\n", err); goto fail; } /* Allocate memory for the TX mbuf map. */ if (!(txq->ift_sds.ifsd_m = (struct mbuf **) malloc(sizeof(struct mbuf *) * scctx->isc_ntxd[txq->ift_br_offset], M_IFLIB, M_NOWAIT | M_ZERO))) { device_printf(dev, "Unable to allocate TX mbuf map memory\n"); err = ENOMEM; goto fail; } /* * Create the DMA maps for TX buffers. */ if ((txq->ift_sds.ifsd_map = (bus_dmamap_t *)malloc( sizeof(bus_dmamap_t) * scctx->isc_ntxd[txq->ift_br_offset], M_IFLIB, M_NOWAIT | M_ZERO)) == NULL) { device_printf(dev, "Unable to allocate TX buffer DMA map memory\n"); err = ENOMEM; goto fail; } if (tso && (txq->ift_sds.ifsd_tso_map = (bus_dmamap_t *)malloc( sizeof(bus_dmamap_t) * scctx->isc_ntxd[txq->ift_br_offset], M_IFLIB, M_NOWAIT | M_ZERO)) == NULL) { device_printf(dev, "Unable to allocate TSO TX buffer map memory\n"); err = ENOMEM; goto fail; } for (int i = 0; i < scctx->isc_ntxd[txq->ift_br_offset]; i++) { err = bus_dmamap_create(txq->ift_buf_tag, 0, &txq->ift_sds.ifsd_map[i]); if (err != 0) { device_printf(dev, "Unable to create TX DMA map\n"); goto fail; } if (!tso) continue; err = bus_dmamap_create(txq->ift_tso_buf_tag, 0, &txq->ift_sds.ifsd_tso_map[i]); if (err != 0) { device_printf(dev, "Unable to create TSO TX DMA map\n"); goto fail; } } return (0); fail: /* We free all, it handles case where we are in the middle */ iflib_tx_structures_free(ctx); return (err); } static void iflib_txsd_destroy(if_ctx_t ctx, iflib_txq_t txq, int i) { bus_dmamap_t map; if (txq->ift_sds.ifsd_map != NULL) { map = txq->ift_sds.ifsd_map[i]; bus_dmamap_sync(txq->ift_buf_tag, map, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(txq->ift_buf_tag, map); bus_dmamap_destroy(txq->ift_buf_tag, map); txq->ift_sds.ifsd_map[i] = NULL; } if (txq->ift_sds.ifsd_tso_map != NULL) { map = txq->ift_sds.ifsd_tso_map[i]; bus_dmamap_sync(txq->ift_tso_buf_tag, map, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(txq->ift_tso_buf_tag, map); bus_dmamap_destroy(txq->ift_tso_buf_tag, map); txq->ift_sds.ifsd_tso_map[i] = NULL; } } static void iflib_txq_destroy(iflib_txq_t txq) { if_ctx_t ctx = txq->ift_ctx; for (int i = 0; i < txq->ift_size; i++) iflib_txsd_destroy(ctx, txq, i); if (txq->ift_br != NULL) { ifmp_ring_free(txq->ift_br); txq->ift_br = NULL; } mtx_destroy(&txq->ift_mtx); if (txq->ift_sds.ifsd_map != NULL) { free(txq->ift_sds.ifsd_map, M_IFLIB); txq->ift_sds.ifsd_map = NULL; } if (txq->ift_sds.ifsd_tso_map != NULL) { free(txq->ift_sds.ifsd_tso_map, M_IFLIB); txq->ift_sds.ifsd_tso_map = NULL; } if (txq->ift_sds.ifsd_m != NULL) { free(txq->ift_sds.ifsd_m, M_IFLIB); txq->ift_sds.ifsd_m = NULL; } if (txq->ift_buf_tag != NULL) { bus_dma_tag_destroy(txq->ift_buf_tag); txq->ift_buf_tag = NULL; } if (txq->ift_tso_buf_tag != NULL) { bus_dma_tag_destroy(txq->ift_tso_buf_tag); txq->ift_tso_buf_tag = NULL; } if (txq->ift_ifdi != NULL) { free(txq->ift_ifdi, M_IFLIB); } } static void iflib_txsd_free(if_ctx_t ctx, iflib_txq_t txq, int i) { struct mbuf **mp; mp = &txq->ift_sds.ifsd_m[i]; if (*mp == NULL) return; if (txq->ift_sds.ifsd_map != NULL) { bus_dmamap_sync(txq->ift_buf_tag, txq->ift_sds.ifsd_map[i], BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(txq->ift_buf_tag, txq->ift_sds.ifsd_map[i]); } if (txq->ift_sds.ifsd_tso_map != NULL) { bus_dmamap_sync(txq->ift_tso_buf_tag, txq->ift_sds.ifsd_tso_map[i], BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(txq->ift_tso_buf_tag, txq->ift_sds.ifsd_tso_map[i]); } m_freem(*mp); DBG_COUNTER_INC(tx_frees); *mp = NULL; } static int iflib_txq_setup(iflib_txq_t txq) { if_ctx_t ctx = txq->ift_ctx; if_softc_ctx_t scctx = &ctx->ifc_softc_ctx; if_shared_ctx_t sctx = ctx->ifc_sctx; iflib_dma_info_t di; int i; /* Set number of descriptors available */ txq->ift_qstatus = IFLIB_QUEUE_IDLE; /* XXX make configurable */ txq->ift_update_freq = IFLIB_DEFAULT_TX_UPDATE_FREQ; /* Reset indices */ txq->ift_cidx_processed = 0; txq->ift_pidx = txq->ift_cidx = txq->ift_npending = 0; txq->ift_size = scctx->isc_ntxd[txq->ift_br_offset]; for (i = 0, di = txq->ift_ifdi; i < sctx->isc_ntxqs; i++, di++) bzero((void *)di->idi_vaddr, di->idi_size); IFDI_TXQ_SETUP(ctx, txq->ift_id); for (i = 0, di = txq->ift_ifdi; i < sctx->isc_ntxqs; i++, di++) bus_dmamap_sync(di->idi_tag, di->idi_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); return (0); } /********************************************************************* * * Allocate DMA resources for RX buffers as well as memory for the RX * mbuf map, direct RX cluster pointer map and RX cluster bus address * map. RX DMA map, RX mbuf map, direct RX cluster pointer map and * RX cluster map are kept in a iflib_sw_rx_desc_array structure. * Since we use use one entry in iflib_sw_rx_desc_array per received * packet, the maximum number of entries we'll need is equal to the * number of hardware receive descriptors that we've allocated. * **********************************************************************/ static int iflib_rxsd_alloc(iflib_rxq_t rxq) { if_ctx_t ctx = rxq->ifr_ctx; if_shared_ctx_t sctx = ctx->ifc_sctx; if_softc_ctx_t scctx = &ctx->ifc_softc_ctx; device_t dev = ctx->ifc_dev; iflib_fl_t fl; int err; MPASS(scctx->isc_nrxd[0] > 0); MPASS(scctx->isc_nrxd[rxq->ifr_fl_offset] > 0); fl = rxq->ifr_fl; for (int i = 0; i < rxq->ifr_nfl; i++, fl++) { fl->ifl_size = scctx->isc_nrxd[rxq->ifr_fl_offset]; /* this isn't necessarily the same */ /* Set up DMA tag for RX buffers. */ err = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */ 1, 0, /* alignment, bounds */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ sctx->isc_rx_maxsize, /* maxsize */ sctx->isc_rx_nsegments, /* nsegments */ sctx->isc_rx_maxsegsize, /* maxsegsize */ 0, /* flags */ NULL, /* lockfunc */ NULL, /* lockarg */ &fl->ifl_buf_tag); if (err) { device_printf(dev, "Unable to allocate RX DMA tag: %d\n", err); goto fail; } /* Allocate memory for the RX mbuf map. */ if (!(fl->ifl_sds.ifsd_m = (struct mbuf **) malloc(sizeof(struct mbuf *) * scctx->isc_nrxd[rxq->ifr_fl_offset], M_IFLIB, M_NOWAIT | M_ZERO))) { device_printf(dev, "Unable to allocate RX mbuf map memory\n"); err = ENOMEM; goto fail; } /* Allocate memory for the direct RX cluster pointer map. */ if (!(fl->ifl_sds.ifsd_cl = (caddr_t *) malloc(sizeof(caddr_t) * scctx->isc_nrxd[rxq->ifr_fl_offset], M_IFLIB, M_NOWAIT | M_ZERO))) { device_printf(dev, "Unable to allocate RX cluster map memory\n"); err = ENOMEM; goto fail; } /* Allocate memory for the RX cluster bus address map. */ if (!(fl->ifl_sds.ifsd_ba = (bus_addr_t *) malloc(sizeof(bus_addr_t) * scctx->isc_nrxd[rxq->ifr_fl_offset], M_IFLIB, M_NOWAIT | M_ZERO))) { device_printf(dev, "Unable to allocate RX bus address map memory\n"); err = ENOMEM; goto fail; } /* * Create the DMA maps for RX buffers. */ if (!(fl->ifl_sds.ifsd_map = (bus_dmamap_t *) malloc(sizeof(bus_dmamap_t) * scctx->isc_nrxd[rxq->ifr_fl_offset], M_IFLIB, M_NOWAIT | M_ZERO))) { device_printf(dev, "Unable to allocate RX buffer DMA map memory\n"); err = ENOMEM; goto fail; } for (int i = 0; i < scctx->isc_nrxd[rxq->ifr_fl_offset]; i++) { err = bus_dmamap_create(fl->ifl_buf_tag, 0, &fl->ifl_sds.ifsd_map[i]); if (err != 0) { device_printf(dev, "Unable to create RX buffer DMA map\n"); goto fail; } } } return (0); fail: iflib_rx_structures_free(ctx); return (err); } /* * Internal service routines */ struct rxq_refill_cb_arg { int error; bus_dma_segment_t seg; int nseg; }; static void _rxq_refill_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error) { struct rxq_refill_cb_arg *cb_arg = arg; cb_arg->error = error; cb_arg->seg = segs[0]; cb_arg->nseg = nseg; } /** * iflib_fl_refill - refill an rxq free-buffer list * @ctx: the iflib context * @fl: the free list to refill * @count: the number of new buffers to allocate * * (Re)populate an rxq free-buffer list with up to @count new packet buffers. * The caller must assure that @count does not exceed the queue's capacity * minus one (since we always leave a descriptor unavailable). */ static uint8_t iflib_fl_refill(if_ctx_t ctx, iflib_fl_t fl, int count) { struct if_rxd_update iru; struct rxq_refill_cb_arg cb_arg; struct mbuf *m; caddr_t cl, *sd_cl; struct mbuf **sd_m; bus_dmamap_t *sd_map; bus_addr_t bus_addr, *sd_ba; int err, frag_idx, i, idx, n, pidx; qidx_t credits; MPASS(count <= fl->ifl_size - fl->ifl_credits - 1); sd_m = fl->ifl_sds.ifsd_m; sd_map = fl->ifl_sds.ifsd_map; sd_cl = fl->ifl_sds.ifsd_cl; sd_ba = fl->ifl_sds.ifsd_ba; pidx = fl->ifl_pidx; idx = pidx; frag_idx = fl->ifl_fragidx; credits = fl->ifl_credits; i = 0; n = count; MPASS(n > 0); MPASS(credits + n <= fl->ifl_size); if (pidx < fl->ifl_cidx) MPASS(pidx + n <= fl->ifl_cidx); if (pidx == fl->ifl_cidx && (credits < fl->ifl_size)) MPASS(fl->ifl_gen == 0); if (pidx > fl->ifl_cidx) MPASS(n <= fl->ifl_size - pidx + fl->ifl_cidx); DBG_COUNTER_INC(fl_refills); if (n > 8) DBG_COUNTER_INC(fl_refills_large); iru_init(&iru, fl->ifl_rxq, fl->ifl_id); while (n-- > 0) { /* * We allocate an uninitialized mbuf + cluster, mbuf is * initialized after rx. * * If the cluster is still set then we know a minimum sized * packet was received */ bit_ffc_at(fl->ifl_rx_bitmap, frag_idx, fl->ifl_size, &frag_idx); if (frag_idx < 0) bit_ffc(fl->ifl_rx_bitmap, fl->ifl_size, &frag_idx); MPASS(frag_idx >= 0); if ((cl = sd_cl[frag_idx]) == NULL) { cl = uma_zalloc(fl->ifl_zone, M_NOWAIT); if (__predict_false(cl == NULL)) break; cb_arg.error = 0; MPASS(sd_map != NULL); err = bus_dmamap_load(fl->ifl_buf_tag, sd_map[frag_idx], cl, fl->ifl_buf_size, _rxq_refill_cb, &cb_arg, BUS_DMA_NOWAIT); if (__predict_false(err != 0 || cb_arg.error)) { uma_zfree(fl->ifl_zone, cl); break; } sd_ba[frag_idx] = bus_addr = cb_arg.seg.ds_addr; sd_cl[frag_idx] = cl; #if MEMORY_LOGGING fl->ifl_cl_enqueued++; #endif } else { bus_addr = sd_ba[frag_idx]; } bus_dmamap_sync(fl->ifl_buf_tag, sd_map[frag_idx], BUS_DMASYNC_PREREAD); if (sd_m[frag_idx] == NULL) { m = m_gethdr(M_NOWAIT, MT_NOINIT); if (__predict_false(m == NULL)) break; sd_m[frag_idx] = m; } bit_set(fl->ifl_rx_bitmap, frag_idx); #if MEMORY_LOGGING fl->ifl_m_enqueued++; #endif DBG_COUNTER_INC(rx_allocs); fl->ifl_rxd_idxs[i] = frag_idx; fl->ifl_bus_addrs[i] = bus_addr; credits++; i++; MPASS(credits <= fl->ifl_size); if (++idx == fl->ifl_size) { #ifdef INVARIANTS fl->ifl_gen = 1; #endif idx = 0; } if (n == 0 || i == IFLIB_MAX_RX_REFRESH) { iru.iru_pidx = pidx; iru.iru_count = i; ctx->isc_rxd_refill(ctx->ifc_softc, &iru); fl->ifl_pidx = idx; fl->ifl_credits = credits; pidx = idx; i = 0; } } if (n < count - 1) { if (i != 0) { iru.iru_pidx = pidx; iru.iru_count = i; ctx->isc_rxd_refill(ctx->ifc_softc, &iru); fl->ifl_pidx = idx; fl->ifl_credits = credits; } DBG_COUNTER_INC(rxd_flush); bus_dmamap_sync(fl->ifl_ifdi->idi_tag, fl->ifl_ifdi->idi_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); ctx->isc_rxd_flush(ctx->ifc_softc, fl->ifl_rxq->ifr_id, fl->ifl_id, fl->ifl_pidx); if (__predict_true(bit_test(fl->ifl_rx_bitmap, frag_idx))) { fl->ifl_fragidx = frag_idx + 1; if (fl->ifl_fragidx == fl->ifl_size) fl->ifl_fragidx = 0; } else { fl->ifl_fragidx = frag_idx; } } return (n == -1 ? 0 : IFLIB_RXEOF_EMPTY); } static inline uint8_t iflib_fl_refill_all(if_ctx_t ctx, iflib_fl_t fl) { /* * We leave an unused descriptor to avoid pidx to catch up with cidx. * This is important as it confuses most NICs. For instance, * Intel NICs have (per receive ring) RDH and RDT registers, where * RDH points to the next receive descriptor to be used by the NIC, * and RDT for the next receive descriptor to be published by the * driver to the NIC (RDT - 1 is thus the last valid one). * The condition RDH == RDT means no descriptors are available to * the NIC, and thus it would be ambiguous if it also meant that * all the descriptors are available to the NIC. */ int32_t reclaimable = fl->ifl_size - fl->ifl_credits - 1; #ifdef INVARIANTS int32_t delta = fl->ifl_size - get_inuse(fl->ifl_size, fl->ifl_cidx, fl->ifl_pidx, fl->ifl_gen) - 1; #endif MPASS(fl->ifl_credits <= fl->ifl_size); MPASS(reclaimable == delta); if (reclaimable > 0) return (iflib_fl_refill(ctx, fl, reclaimable)); return (0); } uint8_t iflib_in_detach(if_ctx_t ctx) { bool in_detach; STATE_LOCK(ctx); in_detach = !!(ctx->ifc_flags & IFC_IN_DETACH); STATE_UNLOCK(ctx); return (in_detach); } static void iflib_fl_bufs_free(iflib_fl_t fl) { iflib_dma_info_t idi = fl->ifl_ifdi; bus_dmamap_t sd_map; uint32_t i; for (i = 0; i < fl->ifl_size; i++) { struct mbuf **sd_m = &fl->ifl_sds.ifsd_m[i]; caddr_t *sd_cl = &fl->ifl_sds.ifsd_cl[i]; if (*sd_cl != NULL) { sd_map = fl->ifl_sds.ifsd_map[i]; bus_dmamap_sync(fl->ifl_buf_tag, sd_map, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(fl->ifl_buf_tag, sd_map); uma_zfree(fl->ifl_zone, *sd_cl); *sd_cl = NULL; if (*sd_m != NULL) { m_init(*sd_m, M_NOWAIT, MT_DATA, 0); uma_zfree(zone_mbuf, *sd_m); *sd_m = NULL; } } else { MPASS(*sd_m == NULL); } #if MEMORY_LOGGING fl->ifl_m_dequeued++; fl->ifl_cl_dequeued++; #endif } #ifdef INVARIANTS for (i = 0; i < fl->ifl_size; i++) { MPASS(fl->ifl_sds.ifsd_cl[i] == NULL); MPASS(fl->ifl_sds.ifsd_m[i] == NULL); } #endif /* * Reset free list values */ fl->ifl_credits = fl->ifl_cidx = fl->ifl_pidx = fl->ifl_gen = fl->ifl_fragidx = 0; bzero(idi->idi_vaddr, idi->idi_size); } /********************************************************************* * * Initialize a free list and its buffers. * **********************************************************************/ static int iflib_fl_setup(iflib_fl_t fl) { iflib_rxq_t rxq = fl->ifl_rxq; if_ctx_t ctx = rxq->ifr_ctx; if_softc_ctx_t scctx = &ctx->ifc_softc_ctx; int qidx; bit_nclear(fl->ifl_rx_bitmap, 0, fl->ifl_size - 1); /* ** Free current RX buffer structs and their mbufs */ iflib_fl_bufs_free(fl); /* Now replenish the mbufs */ MPASS(fl->ifl_credits == 0); qidx = rxq->ifr_fl_offset + fl->ifl_id; if (scctx->isc_rxd_buf_size[qidx] != 0) fl->ifl_buf_size = scctx->isc_rxd_buf_size[qidx]; else fl->ifl_buf_size = ctx->ifc_rx_mbuf_sz; /* * ifl_buf_size may be a driver-supplied value, so pull it up * to the selected mbuf size. */ fl->ifl_buf_size = iflib_get_mbuf_size_for(fl->ifl_buf_size); if (fl->ifl_buf_size > ctx->ifc_max_fl_buf_size) ctx->ifc_max_fl_buf_size = fl->ifl_buf_size; fl->ifl_cltype = m_gettype(fl->ifl_buf_size); fl->ifl_zone = m_getzone(fl->ifl_buf_size); /* * Avoid pre-allocating zillions of clusters to an idle card * potentially speeding up attach. In any case make sure * to leave a descriptor unavailable. See the comment in * iflib_fl_refill_all(). */ MPASS(fl->ifl_size > 0); (void)iflib_fl_refill(ctx, fl, min(128, fl->ifl_size - 1)); if (min(128, fl->ifl_size - 1) != fl->ifl_credits) return (ENOBUFS); /* * handle failure */ MPASS(rxq != NULL); MPASS(fl->ifl_ifdi != NULL); bus_dmamap_sync(fl->ifl_ifdi->idi_tag, fl->ifl_ifdi->idi_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); return (0); } /********************************************************************* * * Free receive ring data structures * **********************************************************************/ static void iflib_rx_sds_free(iflib_rxq_t rxq) { iflib_fl_t fl; int i, j; if (rxq->ifr_fl != NULL) { for (i = 0; i < rxq->ifr_nfl; i++) { fl = &rxq->ifr_fl[i]; if (fl->ifl_buf_tag != NULL) { if (fl->ifl_sds.ifsd_map != NULL) { for (j = 0; j < fl->ifl_size; j++) { bus_dmamap_sync( fl->ifl_buf_tag, fl->ifl_sds.ifsd_map[j], BUS_DMASYNC_POSTREAD); bus_dmamap_unload( fl->ifl_buf_tag, fl->ifl_sds.ifsd_map[j]); bus_dmamap_destroy( fl->ifl_buf_tag, fl->ifl_sds.ifsd_map[j]); } } bus_dma_tag_destroy(fl->ifl_buf_tag); fl->ifl_buf_tag = NULL; } free(fl->ifl_sds.ifsd_m, M_IFLIB); free(fl->ifl_sds.ifsd_cl, M_IFLIB); free(fl->ifl_sds.ifsd_ba, M_IFLIB); free(fl->ifl_sds.ifsd_map, M_IFLIB); free(fl->ifl_rx_bitmap, M_IFLIB); fl->ifl_sds.ifsd_m = NULL; fl->ifl_sds.ifsd_cl = NULL; fl->ifl_sds.ifsd_ba = NULL; fl->ifl_sds.ifsd_map = NULL; fl->ifl_rx_bitmap = NULL; } free(rxq->ifr_fl, M_IFLIB); rxq->ifr_fl = NULL; free(rxq->ifr_ifdi, M_IFLIB); rxq->ifr_ifdi = NULL; rxq->ifr_cq_cidx = 0; } } /* * Timer routine */ static void iflib_timer(void *arg) { iflib_txq_t txq = arg; if_ctx_t ctx = txq->ift_ctx; if_softc_ctx_t sctx = &ctx->ifc_softc_ctx; uint64_t this_tick = ticks; if (!(if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING)) return; /* ** Check on the state of the TX queue(s), this ** can be done without the lock because its RO ** and the HUNG state will be static if set. */ - if (this_tick - txq->ift_last_timer_tick >= hz / 2) { + if (this_tick - txq->ift_last_timer_tick >= iflib_timer_default) { txq->ift_last_timer_tick = this_tick; IFDI_TIMER(ctx, txq->ift_id); if ((txq->ift_qstatus == IFLIB_QUEUE_HUNG) && ((txq->ift_cleaned_prev == txq->ift_cleaned) || (sctx->isc_pause_frames == 0))) goto hung; if (txq->ift_qstatus != IFLIB_QUEUE_IDLE && ifmp_ring_is_stalled(txq->ift_br)) { - KASSERT(ctx->ifc_link_state == LINK_STATE_UP, ("queue can't be marked as hung if interface is down")); + KASSERT(ctx->ifc_link_state == LINK_STATE_UP, + ("queue can't be marked as hung if interface is down")); txq->ift_qstatus = IFLIB_QUEUE_HUNG; } txq->ift_cleaned_prev = txq->ift_cleaned; } /* handle any laggards */ if (txq->ift_db_pending) GROUPTASK_ENQUEUE(&txq->ift_task); sctx->isc_pause_frames = 0; if (if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING) - callout_reset_on(&txq->ift_timer, hz / 2, iflib_timer, txq, txq->ift_timer.c_cpu); + callout_reset_on(&txq->ift_timer, iflib_timer_default, iflib_timer, + txq, txq->ift_timer.c_cpu); return; hung: device_printf(ctx->ifc_dev, "Watchdog timeout (TX: %d desc avail: %d pidx: %d) -- resetting\n", txq->ift_id, TXQ_AVAIL(txq), txq->ift_pidx); STATE_LOCK(ctx); if_setdrvflagbits(ctx->ifc_ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING); ctx->ifc_flags |= (IFC_DO_WATCHDOG|IFC_DO_RESET); iflib_admin_intr_deferred(ctx); STATE_UNLOCK(ctx); } static uint16_t iflib_get_mbuf_size_for(unsigned int size) { if (size <= MCLBYTES) return (MCLBYTES); else return (MJUMPAGESIZE); } static void iflib_calc_rx_mbuf_sz(if_ctx_t ctx) { if_softc_ctx_t sctx = &ctx->ifc_softc_ctx; /* * XXX don't set the max_frame_size to larger * than the hardware can handle */ ctx->ifc_rx_mbuf_sz = iflib_get_mbuf_size_for(sctx->isc_max_frame_size); } uint32_t iflib_get_rx_mbuf_sz(if_ctx_t ctx) { return (ctx->ifc_rx_mbuf_sz); } static void iflib_init_locked(if_ctx_t ctx) { if_softc_ctx_t sctx = &ctx->ifc_softc_ctx; if_softc_ctx_t scctx = &ctx->ifc_softc_ctx; if_t ifp = ctx->ifc_ifp; iflib_fl_t fl; iflib_txq_t txq; iflib_rxq_t rxq; int i, j, tx_ip_csum_flags, tx_ip6_csum_flags; if_setdrvflagbits(ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING); IFDI_INTR_DISABLE(ctx); tx_ip_csum_flags = scctx->isc_tx_csum_flags & (CSUM_IP | CSUM_TCP | CSUM_UDP | CSUM_SCTP); tx_ip6_csum_flags = scctx->isc_tx_csum_flags & (CSUM_IP6_TCP | CSUM_IP6_UDP | CSUM_IP6_SCTP); /* Set hardware offload abilities */ if_clearhwassist(ifp); if (if_getcapenable(ifp) & IFCAP_TXCSUM) if_sethwassistbits(ifp, tx_ip_csum_flags, 0); if (if_getcapenable(ifp) & IFCAP_TXCSUM_IPV6) if_sethwassistbits(ifp, tx_ip6_csum_flags, 0); if (if_getcapenable(ifp) & IFCAP_TSO4) if_sethwassistbits(ifp, CSUM_IP_TSO, 0); if (if_getcapenable(ifp) & IFCAP_TSO6) if_sethwassistbits(ifp, CSUM_IP6_TSO, 0); for (i = 0, txq = ctx->ifc_txqs; i < sctx->isc_ntxqsets; i++, txq++) { CALLOUT_LOCK(txq); callout_stop(&txq->ift_timer); #ifdef DEV_NETMAP callout_stop(&txq->ift_netmap_timer); #endif /* DEV_NETMAP */ CALLOUT_UNLOCK(txq); iflib_netmap_txq_init(ctx, txq); } /* * Calculate a suitable Rx mbuf size prior to calling IFDI_INIT, so * that drivers can use the value when setting up the hardware receive * buffers. */ iflib_calc_rx_mbuf_sz(ctx); #ifdef INVARIANTS i = if_getdrvflags(ifp); #endif IFDI_INIT(ctx); MPASS(if_getdrvflags(ifp) == i); for (i = 0, rxq = ctx->ifc_rxqs; i < sctx->isc_nrxqsets; i++, rxq++) { if (iflib_netmap_rxq_init(ctx, rxq) > 0) { /* This rxq is in netmap mode. Skip normal init. */ continue; } for (j = 0, fl = rxq->ifr_fl; j < rxq->ifr_nfl; j++, fl++) { if (iflib_fl_setup(fl)) { device_printf(ctx->ifc_dev, "setting up free list %d failed - " "check cluster settings\n", j); goto done; } } } done: if_setdrvflagbits(ctx->ifc_ifp, IFF_DRV_RUNNING, IFF_DRV_OACTIVE); IFDI_INTR_ENABLE(ctx); txq = ctx->ifc_txqs; for (i = 0; i < sctx->isc_ntxqsets; i++, txq++) - callout_reset_on(&txq->ift_timer, hz/2, iflib_timer, txq, + callout_reset_on(&txq->ift_timer, iflib_timer_default, iflib_timer, txq, txq->ift_timer.c_cpu); } static int iflib_media_change(if_t ifp) { if_ctx_t ctx = if_getsoftc(ifp); int err; CTX_LOCK(ctx); if ((err = IFDI_MEDIA_CHANGE(ctx)) == 0) iflib_init_locked(ctx); CTX_UNLOCK(ctx); return (err); } static void iflib_media_status(if_t ifp, struct ifmediareq *ifmr) { if_ctx_t ctx = if_getsoftc(ifp); CTX_LOCK(ctx); IFDI_UPDATE_ADMIN_STATUS(ctx); IFDI_MEDIA_STATUS(ctx, ifmr); CTX_UNLOCK(ctx); } void iflib_stop(if_ctx_t ctx) { iflib_txq_t txq = ctx->ifc_txqs; iflib_rxq_t rxq = ctx->ifc_rxqs; if_softc_ctx_t scctx = &ctx->ifc_softc_ctx; if_shared_ctx_t sctx = ctx->ifc_sctx; iflib_dma_info_t di; iflib_fl_t fl; int i, j; /* Tell the stack that the interface is no longer active */ if_setdrvflagbits(ctx->ifc_ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING); IFDI_INTR_DISABLE(ctx); DELAY(1000); IFDI_STOP(ctx); DELAY(1000); iflib_debug_reset(); /* Wait for current tx queue users to exit to disarm watchdog timer. */ for (i = 0; i < scctx->isc_ntxqsets; i++, txq++) { /* make sure all transmitters have completed before proceeding XXX */ CALLOUT_LOCK(txq); callout_stop(&txq->ift_timer); #ifdef DEV_NETMAP callout_stop(&txq->ift_netmap_timer); #endif /* DEV_NETMAP */ CALLOUT_UNLOCK(txq); /* clean any enqueued buffers */ iflib_ifmp_purge(txq); /* Free any existing tx buffers. */ for (j = 0; j < txq->ift_size; j++) { iflib_txsd_free(ctx, txq, j); } txq->ift_processed = txq->ift_cleaned = txq->ift_cidx_processed = 0; txq->ift_in_use = txq->ift_gen = txq->ift_cidx = txq->ift_pidx = txq->ift_no_desc_avail = 0; txq->ift_closed = txq->ift_mbuf_defrag = txq->ift_mbuf_defrag_failed = 0; txq->ift_no_tx_dma_setup = txq->ift_txd_encap_efbig = txq->ift_map_failed = 0; txq->ift_pullups = 0; ifmp_ring_reset_stats(txq->ift_br); for (j = 0, di = txq->ift_ifdi; j < sctx->isc_ntxqs; j++, di++) bzero((void *)di->idi_vaddr, di->idi_size); } for (i = 0; i < scctx->isc_nrxqsets; i++, rxq++) { /* make sure all transmitters have completed before proceeding XXX */ rxq->ifr_cq_cidx = 0; for (j = 0, di = rxq->ifr_ifdi; j < sctx->isc_nrxqs; j++, di++) bzero((void *)di->idi_vaddr, di->idi_size); /* also resets the free lists pidx/cidx */ for (j = 0, fl = rxq->ifr_fl; j < rxq->ifr_nfl; j++, fl++) iflib_fl_bufs_free(fl); } } static inline caddr_t calc_next_rxd(iflib_fl_t fl, int cidx) { qidx_t size; int nrxd; caddr_t start, end, cur, next; nrxd = fl->ifl_size; size = fl->ifl_rxd_size; start = fl->ifl_ifdi->idi_vaddr; if (__predict_false(size == 0)) return (start); cur = start + size*cidx; end = start + size*nrxd; next = CACHE_PTR_NEXT(cur); return (next < end ? next : start); } static inline void prefetch_pkts(iflib_fl_t fl, int cidx) { int nextptr; int nrxd = fl->ifl_size; caddr_t next_rxd; nextptr = (cidx + CACHE_PTR_INCREMENT) & (nrxd-1); prefetch(&fl->ifl_sds.ifsd_m[nextptr]); prefetch(&fl->ifl_sds.ifsd_cl[nextptr]); next_rxd = calc_next_rxd(fl, cidx); prefetch(next_rxd); prefetch(fl->ifl_sds.ifsd_m[(cidx + 1) & (nrxd-1)]); prefetch(fl->ifl_sds.ifsd_m[(cidx + 2) & (nrxd-1)]); prefetch(fl->ifl_sds.ifsd_m[(cidx + 3) & (nrxd-1)]); prefetch(fl->ifl_sds.ifsd_m[(cidx + 4) & (nrxd-1)]); prefetch(fl->ifl_sds.ifsd_cl[(cidx + 1) & (nrxd-1)]); prefetch(fl->ifl_sds.ifsd_cl[(cidx + 2) & (nrxd-1)]); prefetch(fl->ifl_sds.ifsd_cl[(cidx + 3) & (nrxd-1)]); prefetch(fl->ifl_sds.ifsd_cl[(cidx + 4) & (nrxd-1)]); } static struct mbuf * rxd_frag_to_sd(iflib_rxq_t rxq, if_rxd_frag_t irf, bool unload, if_rxsd_t sd, int *pf_rv, if_rxd_info_t ri) { bus_dmamap_t map; iflib_fl_t fl; caddr_t payload; struct mbuf *m; int flid, cidx, len, next; map = NULL; flid = irf->irf_flid; cidx = irf->irf_idx; fl = &rxq->ifr_fl[flid]; sd->ifsd_fl = fl; m = fl->ifl_sds.ifsd_m[cidx]; sd->ifsd_cl = &fl->ifl_sds.ifsd_cl[cidx]; fl->ifl_credits--; #if MEMORY_LOGGING fl->ifl_m_dequeued++; #endif if (rxq->ifr_ctx->ifc_flags & IFC_PREFETCH) prefetch_pkts(fl, cidx); next = (cidx + CACHE_PTR_INCREMENT) & (fl->ifl_size-1); prefetch(&fl->ifl_sds.ifsd_map[next]); map = fl->ifl_sds.ifsd_map[cidx]; bus_dmamap_sync(fl->ifl_buf_tag, map, BUS_DMASYNC_POSTREAD); if (rxq->pfil != NULL && PFIL_HOOKED_IN(rxq->pfil) && pf_rv != NULL && irf->irf_len != 0) { payload = *sd->ifsd_cl; payload += ri->iri_pad; len = ri->iri_len - ri->iri_pad; *pf_rv = pfil_run_hooks(rxq->pfil, payload, ri->iri_ifp, len | PFIL_MEMPTR | PFIL_IN, NULL); switch (*pf_rv) { case PFIL_DROPPED: case PFIL_CONSUMED: /* * The filter ate it. Everything is recycled. */ m = NULL; unload = 0; break; case PFIL_REALLOCED: /* * The filter copied it. Everything is recycled. */ m = pfil_mem2mbuf(payload); unload = 0; break; case PFIL_PASS: /* * Filter said it was OK, so receive like * normal */ fl->ifl_sds.ifsd_m[cidx] = NULL; break; default: MPASS(0); } } else { fl->ifl_sds.ifsd_m[cidx] = NULL; *pf_rv = PFIL_PASS; } if (unload && irf->irf_len != 0) bus_dmamap_unload(fl->ifl_buf_tag, map); fl->ifl_cidx = (fl->ifl_cidx + 1) & (fl->ifl_size-1); if (__predict_false(fl->ifl_cidx == 0)) fl->ifl_gen = 0; bit_clear(fl->ifl_rx_bitmap, cidx); return (m); } static struct mbuf * assemble_segments(iflib_rxq_t rxq, if_rxd_info_t ri, if_rxsd_t sd, int *pf_rv) { struct mbuf *m, *mh, *mt; caddr_t cl; int *pf_rv_ptr, flags, i, padlen; bool consumed; i = 0; mh = NULL; consumed = false; *pf_rv = PFIL_PASS; pf_rv_ptr = pf_rv; do { m = rxd_frag_to_sd(rxq, &ri->iri_frags[i], !consumed, sd, pf_rv_ptr, ri); MPASS(*sd->ifsd_cl != NULL); /* * Exclude zero-length frags & frags from * packets the filter has consumed or dropped */ if (ri->iri_frags[i].irf_len == 0 || consumed || *pf_rv == PFIL_CONSUMED || *pf_rv == PFIL_DROPPED) { if (mh == NULL) { /* everything saved here */ consumed = true; pf_rv_ptr = NULL; continue; } /* XXX we can save the cluster here, but not the mbuf */ m_init(m, M_NOWAIT, MT_DATA, 0); m_free(m); continue; } if (mh == NULL) { flags = M_PKTHDR|M_EXT; mh = mt = m; padlen = ri->iri_pad; } else { flags = M_EXT; mt->m_next = m; mt = m; /* assuming padding is only on the first fragment */ padlen = 0; } cl = *sd->ifsd_cl; *sd->ifsd_cl = NULL; /* Can these two be made one ? */ m_init(m, M_NOWAIT, MT_DATA, flags); m_cljset(m, cl, sd->ifsd_fl->ifl_cltype); /* * These must follow m_init and m_cljset */ m->m_data += padlen; ri->iri_len -= padlen; m->m_len = ri->iri_frags[i].irf_len; } while (++i < ri->iri_nfrags); return (mh); } /* * Process one software descriptor */ static struct mbuf * iflib_rxd_pkt_get(iflib_rxq_t rxq, if_rxd_info_t ri) { struct if_rxsd sd; struct mbuf *m; int pf_rv; /* should I merge this back in now that the two paths are basically duplicated? */ if (ri->iri_nfrags == 1 && ri->iri_frags[0].irf_len != 0 && ri->iri_frags[0].irf_len <= MIN(IFLIB_RX_COPY_THRESH, MHLEN)) { m = rxd_frag_to_sd(rxq, &ri->iri_frags[0], false, &sd, &pf_rv, ri); if (pf_rv != PFIL_PASS && pf_rv != PFIL_REALLOCED) return (m); if (pf_rv == PFIL_PASS) { m_init(m, M_NOWAIT, MT_DATA, M_PKTHDR); #ifndef __NO_STRICT_ALIGNMENT if (!IP_ALIGNED(m)) m->m_data += 2; #endif memcpy(m->m_data, *sd.ifsd_cl, ri->iri_len); m->m_len = ri->iri_frags[0].irf_len; } } else { m = assemble_segments(rxq, ri, &sd, &pf_rv); if (m == NULL) return (NULL); if (pf_rv != PFIL_PASS && pf_rv != PFIL_REALLOCED) return (m); } m->m_pkthdr.len = ri->iri_len; m->m_pkthdr.rcvif = ri->iri_ifp; m->m_flags |= ri->iri_flags; m->m_pkthdr.ether_vtag = ri->iri_vtag; m->m_pkthdr.flowid = ri->iri_flowid; M_HASHTYPE_SET(m, ri->iri_rsstype); m->m_pkthdr.csum_flags = ri->iri_csum_flags; m->m_pkthdr.csum_data = ri->iri_csum_data; return (m); } #if defined(INET6) || defined(INET) static void iflib_get_ip_forwarding(struct lro_ctrl *lc, bool *v4, bool *v6) { CURVNET_SET(lc->ifp->if_vnet); #if defined(INET6) *v6 = V_ip6_forwarding; #endif #if defined(INET) *v4 = V_ipforwarding; #endif CURVNET_RESTORE(); } /* * Returns true if it's possible this packet could be LROed. * if it returns false, it is guaranteed that tcp_lro_rx() * would not return zero. */ static bool iflib_check_lro_possible(struct mbuf *m, bool v4_forwarding, bool v6_forwarding) { struct ether_header *eh; eh = mtod(m, struct ether_header *); switch (eh->ether_type) { #if defined(INET6) case htons(ETHERTYPE_IPV6): return (!v6_forwarding); #endif #if defined (INET) case htons(ETHERTYPE_IP): return (!v4_forwarding); #endif } return false; } #else static void iflib_get_ip_forwarding(struct lro_ctrl *lc __unused, bool *v4 __unused, bool *v6 __unused) { } #endif static void _task_fn_rx_watchdog(void *context) { iflib_rxq_t rxq = context; GROUPTASK_ENQUEUE(&rxq->ifr_task); } static uint8_t iflib_rxeof(iflib_rxq_t rxq, qidx_t budget) { if_t ifp; if_ctx_t ctx = rxq->ifr_ctx; if_shared_ctx_t sctx = ctx->ifc_sctx; if_softc_ctx_t scctx = &ctx->ifc_softc_ctx; int avail, i; qidx_t *cidxp; struct if_rxd_info ri; int err, budget_left, rx_bytes, rx_pkts; iflib_fl_t fl; int lro_enabled; bool v4_forwarding, v6_forwarding, lro_possible; uint8_t retval = 0; /* * XXX early demux data packets so that if_input processing only handles * acks in interrupt context */ struct mbuf *m, *mh, *mt, *mf; NET_EPOCH_ASSERT(); lro_possible = v4_forwarding = v6_forwarding = false; ifp = ctx->ifc_ifp; mh = mt = NULL; MPASS(budget > 0); rx_pkts = rx_bytes = 0; if (sctx->isc_flags & IFLIB_HAS_RXCQ) cidxp = &rxq->ifr_cq_cidx; else cidxp = &rxq->ifr_fl[0].ifl_cidx; if ((avail = iflib_rxd_avail(ctx, rxq, *cidxp, budget)) == 0) { for (i = 0, fl = &rxq->ifr_fl[0]; i < sctx->isc_nfl; i++, fl++) retval |= iflib_fl_refill_all(ctx, fl); DBG_COUNTER_INC(rx_unavail); return (retval); } /* pfil needs the vnet to be set */ CURVNET_SET_QUIET(ifp->if_vnet); for (budget_left = budget; budget_left > 0 && avail > 0;) { if (__predict_false(!CTX_ACTIVE(ctx))) { DBG_COUNTER_INC(rx_ctx_inactive); break; } /* * Reset client set fields to their default values */ rxd_info_zero(&ri); ri.iri_qsidx = rxq->ifr_id; ri.iri_cidx = *cidxp; ri.iri_ifp = ifp; ri.iri_frags = rxq->ifr_frags; err = ctx->isc_rxd_pkt_get(ctx->ifc_softc, &ri); if (err) goto err; rx_pkts += 1; rx_bytes += ri.iri_len; if (sctx->isc_flags & IFLIB_HAS_RXCQ) { *cidxp = ri.iri_cidx; /* Update our consumer index */ /* XXX NB: shurd - check if this is still safe */ while (rxq->ifr_cq_cidx >= scctx->isc_nrxd[0]) rxq->ifr_cq_cidx -= scctx->isc_nrxd[0]; /* was this only a completion queue message? */ if (__predict_false(ri.iri_nfrags == 0)) continue; } MPASS(ri.iri_nfrags != 0); MPASS(ri.iri_len != 0); /* will advance the cidx on the corresponding free lists */ m = iflib_rxd_pkt_get(rxq, &ri); avail--; budget_left--; if (avail == 0 && budget_left) avail = iflib_rxd_avail(ctx, rxq, *cidxp, budget_left); if (__predict_false(m == NULL)) continue; /* imm_pkt: -- cxgb */ if (mh == NULL) mh = mt = m; else { mt->m_nextpkt = m; mt = m; } } CURVNET_RESTORE(); /* make sure that we can refill faster than drain */ for (i = 0, fl = &rxq->ifr_fl[0]; i < sctx->isc_nfl; i++, fl++) retval |= iflib_fl_refill_all(ctx, fl); lro_enabled = (if_getcapenable(ifp) & IFCAP_LRO); if (lro_enabled) iflib_get_ip_forwarding(&rxq->ifr_lc, &v4_forwarding, &v6_forwarding); mt = mf = NULL; while (mh != NULL) { m = mh; mh = mh->m_nextpkt; m->m_nextpkt = NULL; #ifndef __NO_STRICT_ALIGNMENT if (!IP_ALIGNED(m) && (m = iflib_fixup_rx(m)) == NULL) continue; #endif rx_bytes += m->m_pkthdr.len; rx_pkts++; #if defined(INET6) || defined(INET) if (lro_enabled) { if (!lro_possible) { lro_possible = iflib_check_lro_possible(m, v4_forwarding, v6_forwarding); if (lro_possible && mf != NULL) { ifp->if_input(ifp, mf); DBG_COUNTER_INC(rx_if_input); mt = mf = NULL; } } if ((m->m_pkthdr.csum_flags & (CSUM_L4_CALC|CSUM_L4_VALID)) == (CSUM_L4_CALC|CSUM_L4_VALID)) { if (lro_possible && tcp_lro_rx(&rxq->ifr_lc, m, 0) == 0) continue; } } #endif if (lro_possible) { ifp->if_input(ifp, m); DBG_COUNTER_INC(rx_if_input); continue; } if (mf == NULL) mf = m; if (mt != NULL) mt->m_nextpkt = m; mt = m; } if (mf != NULL) { ifp->if_input(ifp, mf); DBG_COUNTER_INC(rx_if_input); } if_inc_counter(ifp, IFCOUNTER_IBYTES, rx_bytes); if_inc_counter(ifp, IFCOUNTER_IPACKETS, rx_pkts); /* * Flush any outstanding LRO work */ #if defined(INET6) || defined(INET) tcp_lro_flush_all(&rxq->ifr_lc); #endif if (avail != 0 || iflib_rxd_avail(ctx, rxq, *cidxp, 1) != 0) retval |= IFLIB_RXEOF_MORE; return (retval); err: STATE_LOCK(ctx); ctx->ifc_flags |= IFC_DO_RESET; iflib_admin_intr_deferred(ctx); STATE_UNLOCK(ctx); return (0); } #define TXD_NOTIFY_COUNT(txq) (((txq)->ift_size / (txq)->ift_update_freq)-1) static inline qidx_t txq_max_db_deferred(iflib_txq_t txq, qidx_t in_use) { qidx_t notify_count = TXD_NOTIFY_COUNT(txq); qidx_t minthresh = txq->ift_size / 8; if (in_use > 4*minthresh) return (notify_count); if (in_use > 2*minthresh) return (notify_count >> 1); if (in_use > minthresh) return (notify_count >> 3); return (0); } static inline qidx_t txq_max_rs_deferred(iflib_txq_t txq) { qidx_t notify_count = TXD_NOTIFY_COUNT(txq); qidx_t minthresh = txq->ift_size / 8; if (txq->ift_in_use > 4*minthresh) return (notify_count); if (txq->ift_in_use > 2*minthresh) return (notify_count >> 1); if (txq->ift_in_use > minthresh) return (notify_count >> 2); return (2); } #define M_CSUM_FLAGS(m) ((m)->m_pkthdr.csum_flags) #define M_HAS_VLANTAG(m) (m->m_flags & M_VLANTAG) #define TXQ_MAX_DB_DEFERRED(txq, in_use) txq_max_db_deferred((txq), (in_use)) #define TXQ_MAX_RS_DEFERRED(txq) txq_max_rs_deferred(txq) #define TXQ_MAX_DB_CONSUMED(size) (size >> 4) /* forward compatibility for cxgb */ #define FIRST_QSET(ctx) 0 #define NTXQSETS(ctx) ((ctx)->ifc_softc_ctx.isc_ntxqsets) #define NRXQSETS(ctx) ((ctx)->ifc_softc_ctx.isc_nrxqsets) #define QIDX(ctx, m) ((((m)->m_pkthdr.flowid & ctx->ifc_softc_ctx.isc_rss_table_mask) % NTXQSETS(ctx)) + FIRST_QSET(ctx)) #define DESC_RECLAIMABLE(q) ((int)((q)->ift_processed - (q)->ift_cleaned - (q)->ift_ctx->ifc_softc_ctx.isc_tx_nsegments)) /* XXX we should be setting this to something other than zero */ #define RECLAIM_THRESH(ctx) ((ctx)->ifc_sctx->isc_tx_reclaim_thresh) -#define MAX_TX_DESC(ctx) max((ctx)->ifc_softc_ctx.isc_tx_tso_segments_max, \ +#define MAX_TX_DESC(ctx) MAX((ctx)->ifc_softc_ctx.isc_tx_tso_segments_max, \ (ctx)->ifc_softc_ctx.isc_tx_nsegments) static inline bool -iflib_txd_db_check(if_ctx_t ctx, iflib_txq_t txq, int ring, qidx_t in_use) +iflib_txd_db_check(iflib_txq_t txq, int ring) { + if_ctx_t ctx = txq->ift_ctx; qidx_t dbval, max; - bool rang; - rang = false; - max = TXQ_MAX_DB_DEFERRED(txq, in_use); - if (ring || txq->ift_db_pending >= max) { + max = TXQ_MAX_DB_DEFERRED(txq, txq->ift_in_use); + + /* force || threshold exceeded || at the edge of the ring */ + if (ring || (txq->ift_db_pending >= max) || (TXQ_AVAIL(txq) <= MAX_TX_DESC(ctx) + 2)) { + + /* + * 'npending' is used if the card's doorbell is in terms of the number of descriptors + * pending flush (BRCM). 'pidx' is used in cases where the card's doorbeel uses the + * producer index explicitly (INTC). + */ dbval = txq->ift_npending ? txq->ift_npending : txq->ift_pidx; bus_dmamap_sync(txq->ift_ifdi->idi_tag, txq->ift_ifdi->idi_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); ctx->isc_txd_flush(ctx->ifc_softc, txq->ift_id, dbval); + + /* + * Absent bugs there are zero packets pending so reset pending counts to zero. + */ txq->ift_db_pending = txq->ift_npending = 0; - rang = true; + return (true); } - return (rang); + return (false); } #ifdef PKT_DEBUG static void print_pkt(if_pkt_info_t pi) { printf("pi len: %d qsidx: %d nsegs: %d ndescs: %d flags: %x pidx: %d\n", pi->ipi_len, pi->ipi_qsidx, pi->ipi_nsegs, pi->ipi_ndescs, pi->ipi_flags, pi->ipi_pidx); printf("pi new_pidx: %d csum_flags: %lx tso_segsz: %d mflags: %x vtag: %d\n", pi->ipi_new_pidx, pi->ipi_csum_flags, pi->ipi_tso_segsz, pi->ipi_mflags, pi->ipi_vtag); printf("pi etype: %d ehdrlen: %d ip_hlen: %d ipproto: %d\n", pi->ipi_etype, pi->ipi_ehdrlen, pi->ipi_ip_hlen, pi->ipi_ipproto); } #endif #define IS_TSO4(pi) ((pi)->ipi_csum_flags & CSUM_IP_TSO) #define IS_TX_OFFLOAD4(pi) ((pi)->ipi_csum_flags & (CSUM_IP_TCP | CSUM_IP_TSO)) #define IS_TSO6(pi) ((pi)->ipi_csum_flags & CSUM_IP6_TSO) #define IS_TX_OFFLOAD6(pi) ((pi)->ipi_csum_flags & (CSUM_IP6_TCP | CSUM_IP6_TSO)) static int iflib_parse_header(iflib_txq_t txq, if_pkt_info_t pi, struct mbuf **mp) { if_shared_ctx_t sctx = txq->ift_ctx->ifc_sctx; struct ether_vlan_header *eh; struct mbuf *m; m = *mp; if ((sctx->isc_flags & IFLIB_NEED_SCRATCH) && M_WRITABLE(m) == 0) { if ((m = m_dup(m, M_NOWAIT)) == NULL) { return (ENOMEM); } else { m_freem(*mp); DBG_COUNTER_INC(tx_frees); *mp = m; } } /* * Determine where frame payload starts. * Jump over vlan headers if already present, * helpful for QinQ too. */ if (__predict_false(m->m_len < sizeof(*eh))) { txq->ift_pullups++; if (__predict_false((m = m_pullup(m, sizeof(*eh))) == NULL)) return (ENOMEM); } eh = mtod(m, struct ether_vlan_header *); if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { pi->ipi_etype = ntohs(eh->evl_proto); pi->ipi_ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; } else { pi->ipi_etype = ntohs(eh->evl_encap_proto); pi->ipi_ehdrlen = ETHER_HDR_LEN; } switch (pi->ipi_etype) { #ifdef INET case ETHERTYPE_IP: { struct mbuf *n; struct ip *ip = NULL; struct tcphdr *th = NULL; int minthlen; minthlen = min(m->m_pkthdr.len, pi->ipi_ehdrlen + sizeof(*ip) + sizeof(*th)); if (__predict_false(m->m_len < minthlen)) { /* * if this code bloat is causing too much of a hit * move it to a separate function and mark it noinline */ if (m->m_len == pi->ipi_ehdrlen) { n = m->m_next; MPASS(n); if (n->m_len >= sizeof(*ip)) { ip = (struct ip *)n->m_data; if (n->m_len >= (ip->ip_hl << 2) + sizeof(*th)) th = (struct tcphdr *)((caddr_t)ip + (ip->ip_hl << 2)); } else { txq->ift_pullups++; if (__predict_false((m = m_pullup(m, minthlen)) == NULL)) return (ENOMEM); ip = (struct ip *)(m->m_data + pi->ipi_ehdrlen); } } else { txq->ift_pullups++; if (__predict_false((m = m_pullup(m, minthlen)) == NULL)) return (ENOMEM); ip = (struct ip *)(m->m_data + pi->ipi_ehdrlen); if (m->m_len >= (ip->ip_hl << 2) + sizeof(*th)) th = (struct tcphdr *)((caddr_t)ip + (ip->ip_hl << 2)); } } else { ip = (struct ip *)(m->m_data + pi->ipi_ehdrlen); if (m->m_len >= (ip->ip_hl << 2) + sizeof(*th)) th = (struct tcphdr *)((caddr_t)ip + (ip->ip_hl << 2)); } pi->ipi_ip_hlen = ip->ip_hl << 2; pi->ipi_ipproto = ip->ip_p; pi->ipi_flags |= IPI_TX_IPV4; /* TCP checksum offload may require TCP header length */ if (IS_TX_OFFLOAD4(pi)) { if (__predict_true(pi->ipi_ipproto == IPPROTO_TCP)) { if (__predict_false(th == NULL)) { txq->ift_pullups++; if (__predict_false((m = m_pullup(m, (ip->ip_hl << 2) + sizeof(*th))) == NULL)) return (ENOMEM); th = (struct tcphdr *)((caddr_t)ip + pi->ipi_ip_hlen); } pi->ipi_tcp_hflags = th->th_flags; pi->ipi_tcp_hlen = th->th_off << 2; pi->ipi_tcp_seq = th->th_seq; } if (IS_TSO4(pi)) { if (__predict_false(ip->ip_p != IPPROTO_TCP)) return (ENXIO); /* * TSO always requires hardware checksum offload. */ pi->ipi_csum_flags |= (CSUM_IP_TCP | CSUM_IP); th->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, htons(IPPROTO_TCP)); pi->ipi_tso_segsz = m->m_pkthdr.tso_segsz; if (sctx->isc_flags & IFLIB_TSO_INIT_IP) { ip->ip_sum = 0; ip->ip_len = htons(pi->ipi_ip_hlen + pi->ipi_tcp_hlen + pi->ipi_tso_segsz); } } } if ((sctx->isc_flags & IFLIB_NEED_ZERO_CSUM) && (pi->ipi_csum_flags & CSUM_IP)) ip->ip_sum = 0; break; } #endif #ifdef INET6 case ETHERTYPE_IPV6: { struct ip6_hdr *ip6 = (struct ip6_hdr *)(m->m_data + pi->ipi_ehdrlen); struct tcphdr *th; pi->ipi_ip_hlen = sizeof(struct ip6_hdr); if (__predict_false(m->m_len < pi->ipi_ehdrlen + sizeof(struct ip6_hdr))) { txq->ift_pullups++; if (__predict_false((m = m_pullup(m, pi->ipi_ehdrlen + sizeof(struct ip6_hdr))) == NULL)) return (ENOMEM); } th = (struct tcphdr *)((caddr_t)ip6 + pi->ipi_ip_hlen); /* XXX-BZ this will go badly in case of ext hdrs. */ pi->ipi_ipproto = ip6->ip6_nxt; pi->ipi_flags |= IPI_TX_IPV6; /* TCP checksum offload may require TCP header length */ if (IS_TX_OFFLOAD6(pi)) { if (pi->ipi_ipproto == IPPROTO_TCP) { if (__predict_false(m->m_len < pi->ipi_ehdrlen + sizeof(struct ip6_hdr) + sizeof(struct tcphdr))) { txq->ift_pullups++; if (__predict_false((m = m_pullup(m, pi->ipi_ehdrlen + sizeof(struct ip6_hdr) + sizeof(struct tcphdr))) == NULL)) return (ENOMEM); } pi->ipi_tcp_hflags = th->th_flags; pi->ipi_tcp_hlen = th->th_off << 2; pi->ipi_tcp_seq = th->th_seq; } if (IS_TSO6(pi)) { if (__predict_false(ip6->ip6_nxt != IPPROTO_TCP)) return (ENXIO); /* * TSO always requires hardware checksum offload. */ pi->ipi_csum_flags |= CSUM_IP6_TCP; th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0); pi->ipi_tso_segsz = m->m_pkthdr.tso_segsz; } } break; } #endif default: pi->ipi_csum_flags &= ~CSUM_OFFLOAD; pi->ipi_ip_hlen = 0; break; } *mp = m; return (0); } /* * If dodgy hardware rejects the scatter gather chain we've handed it * we'll need to remove the mbuf chain from ifsg_m[] before we can add the * m_defrag'd mbufs */ static __noinline struct mbuf * iflib_remove_mbuf(iflib_txq_t txq) { int ntxd, pidx; struct mbuf *m, **ifsd_m; ifsd_m = txq->ift_sds.ifsd_m; ntxd = txq->ift_size; pidx = txq->ift_pidx & (ntxd - 1); ifsd_m = txq->ift_sds.ifsd_m; m = ifsd_m[pidx]; ifsd_m[pidx] = NULL; bus_dmamap_unload(txq->ift_buf_tag, txq->ift_sds.ifsd_map[pidx]); if (txq->ift_sds.ifsd_tso_map != NULL) bus_dmamap_unload(txq->ift_tso_buf_tag, txq->ift_sds.ifsd_tso_map[pidx]); #if MEMORY_LOGGING txq->ift_dequeued++; #endif return (m); } static inline caddr_t calc_next_txd(iflib_txq_t txq, int cidx, uint8_t qid) { qidx_t size; int ntxd; caddr_t start, end, cur, next; ntxd = txq->ift_size; size = txq->ift_txd_size[qid]; start = txq->ift_ifdi[qid].idi_vaddr; if (__predict_false(size == 0)) return (start); cur = start + size*cidx; end = start + size*ntxd; next = CACHE_PTR_NEXT(cur); return (next < end ? next : start); } /* * Pad an mbuf to ensure a minimum ethernet frame size. * min_frame_size is the frame size (less CRC) to pad the mbuf to */ static __noinline int iflib_ether_pad(device_t dev, struct mbuf **m_head, uint16_t min_frame_size) { /* * 18 is enough bytes to pad an ARP packet to 46 bytes, and * and ARP message is the smallest common payload I can think of */ static char pad[18]; /* just zeros */ int n; struct mbuf *new_head; if (!M_WRITABLE(*m_head)) { new_head = m_dup(*m_head, M_NOWAIT); if (new_head == NULL) { m_freem(*m_head); device_printf(dev, "cannot pad short frame, m_dup() failed"); DBG_COUNTER_INC(encap_pad_mbuf_fail); DBG_COUNTER_INC(tx_frees); return ENOMEM; } m_freem(*m_head); *m_head = new_head; } for (n = min_frame_size - (*m_head)->m_pkthdr.len; n > 0; n -= sizeof(pad)) if (!m_append(*m_head, min(n, sizeof(pad)), pad)) break; if (n > 0) { m_freem(*m_head); device_printf(dev, "cannot pad short frame\n"); DBG_COUNTER_INC(encap_pad_mbuf_fail); DBG_COUNTER_INC(tx_frees); return (ENOBUFS); } return 0; } static int iflib_encap(iflib_txq_t txq, struct mbuf **m_headp) { if_ctx_t ctx; if_shared_ctx_t sctx; if_softc_ctx_t scctx; bus_dma_tag_t buf_tag; bus_dma_segment_t *segs; struct mbuf *m_head, **ifsd_m; void *next_txd; bus_dmamap_t map; struct if_pkt_info pi; int remap = 0; int err, nsegs, ndesc, max_segs, pidx, cidx, next, ntxd; ctx = txq->ift_ctx; sctx = ctx->ifc_sctx; scctx = &ctx->ifc_softc_ctx; segs = txq->ift_segs; ntxd = txq->ift_size; m_head = *m_headp; map = NULL; /* * If we're doing TSO the next descriptor to clean may be quite far ahead */ cidx = txq->ift_cidx; pidx = txq->ift_pidx; if (ctx->ifc_flags & IFC_PREFETCH) { next = (cidx + CACHE_PTR_INCREMENT) & (ntxd-1); if (!(ctx->ifc_flags & IFLIB_HAS_TXCQ)) { next_txd = calc_next_txd(txq, cidx, 0); prefetch(next_txd); } /* prefetch the next cache line of mbuf pointers and flags */ prefetch(&txq->ift_sds.ifsd_m[next]); prefetch(&txq->ift_sds.ifsd_map[next]); next = (cidx + CACHE_LINE_SIZE) & (ntxd-1); } map = txq->ift_sds.ifsd_map[pidx]; ifsd_m = txq->ift_sds.ifsd_m; if (m_head->m_pkthdr.csum_flags & CSUM_TSO) { buf_tag = txq->ift_tso_buf_tag; max_segs = scctx->isc_tx_tso_segments_max; map = txq->ift_sds.ifsd_tso_map[pidx]; MPASS(buf_tag != NULL); MPASS(max_segs > 0); } else { buf_tag = txq->ift_buf_tag; max_segs = scctx->isc_tx_nsegments; map = txq->ift_sds.ifsd_map[pidx]; } if ((sctx->isc_flags & IFLIB_NEED_ETHER_PAD) && __predict_false(m_head->m_pkthdr.len < scctx->isc_min_frame_size)) { err = iflib_ether_pad(ctx->ifc_dev, m_headp, scctx->isc_min_frame_size); if (err) { DBG_COUNTER_INC(encap_txd_encap_fail); return err; } } m_head = *m_headp; pkt_info_zero(&pi); pi.ipi_mflags = (m_head->m_flags & (M_VLANTAG|M_BCAST|M_MCAST)); pi.ipi_pidx = pidx; pi.ipi_qsidx = txq->ift_id; pi.ipi_len = m_head->m_pkthdr.len; pi.ipi_csum_flags = m_head->m_pkthdr.csum_flags; pi.ipi_vtag = M_HAS_VLANTAG(m_head) ? m_head->m_pkthdr.ether_vtag : 0; /* deliberate bitwise OR to make one condition */ if (__predict_true((pi.ipi_csum_flags | pi.ipi_vtag))) { if (__predict_false((err = iflib_parse_header(txq, &pi, m_headp)) != 0)) { DBG_COUNTER_INC(encap_txd_encap_fail); return (err); } m_head = *m_headp; } retry: err = bus_dmamap_load_mbuf_sg(buf_tag, map, m_head, segs, &nsegs, BUS_DMA_NOWAIT); defrag: if (__predict_false(err)) { switch (err) { case EFBIG: /* try collapse once and defrag once */ if (remap == 0) { m_head = m_collapse(*m_headp, M_NOWAIT, max_segs); /* try defrag if collapsing fails */ if (m_head == NULL) remap++; } if (remap == 1) { txq->ift_mbuf_defrag++; m_head = m_defrag(*m_headp, M_NOWAIT); } /* * remap should never be >1 unless bus_dmamap_load_mbuf_sg * failed to map an mbuf that was run through m_defrag */ MPASS(remap <= 1); if (__predict_false(m_head == NULL || remap > 1)) goto defrag_failed; remap++; *m_headp = m_head; goto retry; break; case ENOMEM: txq->ift_no_tx_dma_setup++; break; default: txq->ift_no_tx_dma_setup++; m_freem(*m_headp); DBG_COUNTER_INC(tx_frees); *m_headp = NULL; break; } txq->ift_map_failed++; DBG_COUNTER_INC(encap_load_mbuf_fail); DBG_COUNTER_INC(encap_txd_encap_fail); return (err); } ifsd_m[pidx] = m_head; /* * XXX assumes a 1 to 1 relationship between segments and * descriptors - this does not hold true on all drivers, e.g. * cxgb */ if (__predict_false(nsegs + 2 > TXQ_AVAIL(txq))) { txq->ift_no_desc_avail++; bus_dmamap_unload(buf_tag, map); DBG_COUNTER_INC(encap_txq_avail_fail); DBG_COUNTER_INC(encap_txd_encap_fail); if ((txq->ift_task.gt_task.ta_flags & TASK_ENQUEUED) == 0) GROUPTASK_ENQUEUE(&txq->ift_task); return (ENOBUFS); } /* * On Intel cards we can greatly reduce the number of TX interrupts * we see by only setting report status on every Nth descriptor. * However, this also means that the driver will need to keep track * of the descriptors that RS was set on to check them for the DD bit. */ txq->ift_rs_pending += nsegs + 1; if (txq->ift_rs_pending > TXQ_MAX_RS_DEFERRED(txq) || iflib_no_tx_batch || (TXQ_AVAIL(txq) - nsegs) <= MAX_TX_DESC(ctx) + 2) { pi.ipi_flags |= IPI_TX_INTR; txq->ift_rs_pending = 0; } pi.ipi_segs = segs; pi.ipi_nsegs = nsegs; MPASS(pidx >= 0 && pidx < txq->ift_size); #ifdef PKT_DEBUG print_pkt(&pi); #endif if ((err = ctx->isc_txd_encap(ctx->ifc_softc, &pi)) == 0) { bus_dmamap_sync(buf_tag, map, BUS_DMASYNC_PREWRITE); DBG_COUNTER_INC(tx_encap); MPASS(pi.ipi_new_pidx < txq->ift_size); ndesc = pi.ipi_new_pidx - pi.ipi_pidx; if (pi.ipi_new_pidx < pi.ipi_pidx) { ndesc += txq->ift_size; txq->ift_gen = 1; } /* * drivers can need as many as * two sentinels */ MPASS(ndesc <= pi.ipi_nsegs + 2); MPASS(pi.ipi_new_pidx != pidx); MPASS(ndesc > 0); txq->ift_in_use += ndesc; + txq->ift_db_pending += ndesc; /* * We update the last software descriptor again here because there may * be a sentinel and/or there may be more mbufs than segments */ txq->ift_pidx = pi.ipi_new_pidx; txq->ift_npending += pi.ipi_ndescs; } else { *m_headp = m_head = iflib_remove_mbuf(txq); if (err == EFBIG) { txq->ift_txd_encap_efbig++; if (remap < 2) { remap = 1; goto defrag; } } goto defrag_failed; } /* * err can't possibly be non-zero here, so we don't neet to test it * to see if we need to DBG_COUNTER_INC(encap_txd_encap_fail). */ return (err); defrag_failed: txq->ift_mbuf_defrag_failed++; txq->ift_map_failed++; m_freem(*m_headp); DBG_COUNTER_INC(tx_frees); *m_headp = NULL; DBG_COUNTER_INC(encap_txd_encap_fail); return (ENOMEM); } static void iflib_tx_desc_free(iflib_txq_t txq, int n) { uint32_t qsize, cidx, mask, gen; struct mbuf *m, **ifsd_m; bool do_prefetch; cidx = txq->ift_cidx; gen = txq->ift_gen; qsize = txq->ift_size; mask = qsize-1; ifsd_m = txq->ift_sds.ifsd_m; do_prefetch = (txq->ift_ctx->ifc_flags & IFC_PREFETCH); while (n-- > 0) { if (do_prefetch) { prefetch(ifsd_m[(cidx + 3) & mask]); prefetch(ifsd_m[(cidx + 4) & mask]); } if ((m = ifsd_m[cidx]) != NULL) { prefetch(&ifsd_m[(cidx + CACHE_PTR_INCREMENT) & mask]); if (m->m_pkthdr.csum_flags & CSUM_TSO) { bus_dmamap_sync(txq->ift_tso_buf_tag, txq->ift_sds.ifsd_tso_map[cidx], BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(txq->ift_tso_buf_tag, txq->ift_sds.ifsd_tso_map[cidx]); } else { bus_dmamap_sync(txq->ift_buf_tag, txq->ift_sds.ifsd_map[cidx], BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(txq->ift_buf_tag, txq->ift_sds.ifsd_map[cidx]); } /* XXX we don't support any drivers that batch packets yet */ MPASS(m->m_nextpkt == NULL); m_freem(m); ifsd_m[cidx] = NULL; #if MEMORY_LOGGING txq->ift_dequeued++; #endif DBG_COUNTER_INC(tx_frees); } if (__predict_false(++cidx == qsize)) { cidx = 0; gen = 0; } } txq->ift_cidx = cidx; txq->ift_gen = gen; } static __inline int iflib_completed_tx_reclaim(iflib_txq_t txq, int thresh) { int reclaim; if_ctx_t ctx = txq->ift_ctx; KASSERT(thresh >= 0, ("invalid threshold to reclaim")); MPASS(thresh /*+ MAX_TX_DESC(txq->ift_ctx) */ < txq->ift_size); /* * Need a rate-limiting check so that this isn't called every time */ iflib_tx_credits_update(ctx, txq); reclaim = DESC_RECLAIMABLE(txq); if (reclaim <= thresh /* + MAX_TX_DESC(txq->ift_ctx) */) { #ifdef INVARIANTS if (iflib_verbose_debug) { printf("%s processed=%ju cleaned=%ju tx_nsegments=%d reclaim=%d thresh=%d\n", __FUNCTION__, txq->ift_processed, txq->ift_cleaned, txq->ift_ctx->ifc_softc_ctx.isc_tx_nsegments, reclaim, thresh); } #endif return (0); } iflib_tx_desc_free(txq, reclaim); txq->ift_cleaned += reclaim; txq->ift_in_use -= reclaim; return (reclaim); } static struct mbuf ** _ring_peek_one(struct ifmp_ring *r, int cidx, int offset, int remaining) { int next, size; struct mbuf **items; size = r->size; next = (cidx + CACHE_PTR_INCREMENT) & (size-1); items = __DEVOLATILE(struct mbuf **, &r->items[0]); prefetch(items[(cidx + offset) & (size-1)]); if (remaining > 1) { prefetch2cachelines(&items[next]); prefetch2cachelines(items[(cidx + offset + 1) & (size-1)]); prefetch2cachelines(items[(cidx + offset + 2) & (size-1)]); prefetch2cachelines(items[(cidx + offset + 3) & (size-1)]); } return (__DEVOLATILE(struct mbuf **, &r->items[(cidx + offset) & (size-1)])); } static void iflib_txq_check_drain(iflib_txq_t txq, int budget) { ifmp_ring_check_drainage(txq->ift_br, budget); } static uint32_t iflib_txq_can_drain(struct ifmp_ring *r) { iflib_txq_t txq = r->cookie; if_ctx_t ctx = txq->ift_ctx; if (TXQ_AVAIL(txq) > MAX_TX_DESC(ctx) + 2) return (1); bus_dmamap_sync(txq->ift_ifdi->idi_tag, txq->ift_ifdi->idi_map, BUS_DMASYNC_POSTREAD); return (ctx->isc_txd_credits_update(ctx->ifc_softc, txq->ift_id, false)); } static uint32_t iflib_txq_drain(struct ifmp_ring *r, uint32_t cidx, uint32_t pidx) { iflib_txq_t txq = r->cookie; if_ctx_t ctx = txq->ift_ctx; if_t ifp = ctx->ifc_ifp; struct mbuf *m, **mp; - int avail, bytes_sent, consumed, count, err, i, in_use_prev; - int mcast_sent, pkt_sent, reclaimed, txq_avail; + int avail, bytes_sent, skipped, count, err, i; + int mcast_sent, pkt_sent, reclaimed; bool do_prefetch, rang, ring; if (__predict_false(!(if_getdrvflags(ifp) & IFF_DRV_RUNNING) || !LINK_ACTIVE(ctx))) { DBG_COUNTER_INC(txq_drain_notready); return (0); } reclaimed = iflib_completed_tx_reclaim(txq, RECLAIM_THRESH(ctx)); - rang = iflib_txd_db_check(ctx, txq, reclaimed, txq->ift_in_use); + rang = iflib_txd_db_check(txq, reclaimed && txq->ift_db_pending); avail = IDXDIFF(pidx, cidx, r->size); + if (__predict_false(ctx->ifc_flags & IFC_QFLUSH)) { + /* + * The driver is unloading so we need to free all pending packets. + */ DBG_COUNTER_INC(txq_drain_flushing); for (i = 0; i < avail; i++) { if (__predict_true(r->items[(cidx + i) & (r->size-1)] != (void *)txq)) m_freem(r->items[(cidx + i) & (r->size-1)]); r->items[(cidx + i) & (r->size-1)] = NULL; } return (avail); } if (__predict_false(if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_OACTIVE)) { txq->ift_qstatus = IFLIB_QUEUE_IDLE; CALLOUT_LOCK(txq); callout_stop(&txq->ift_timer); CALLOUT_UNLOCK(txq); DBG_COUNTER_INC(txq_drain_oactive); return (0); } + + /* + * If we've reclaimed any packets this queue cannot be hung. + */ if (reclaimed) txq->ift_qstatus = IFLIB_QUEUE_IDLE; - consumed = mcast_sent = bytes_sent = pkt_sent = 0; + skipped = mcast_sent = bytes_sent = pkt_sent = 0; count = MIN(avail, TX_BATCH_SIZE); #ifdef INVARIANTS if (iflib_verbose_debug) printf("%s avail=%d ifc_flags=%x txq_avail=%d ", __FUNCTION__, avail, ctx->ifc_flags, TXQ_AVAIL(txq)); #endif do_prefetch = (ctx->ifc_flags & IFC_PREFETCH); - txq_avail = TXQ_AVAIL(txq); err = 0; - for (i = 0; i < count && txq_avail > MAX_TX_DESC(ctx) + 2; i++) { + for (i = 0; i < count && TXQ_AVAIL(txq) >= MAX_TX_DESC(ctx) + 2; i++) { int rem = do_prefetch ? count - i : 0; mp = _ring_peek_one(r, cidx, i, rem); MPASS(mp != NULL && *mp != NULL); + + /* + * Completion interrupts will use the address of the txq + * as a sentinel to enqueue _something_ in order to acquire + * the lock on the mp_ring (there's no direct lock call). + * We obviously whave to check for these sentinel cases + * and skip them. + */ if (__predict_false(*mp == (struct mbuf *)txq)) { - consumed++; + skipped++; continue; } - in_use_prev = txq->ift_in_use; err = iflib_encap(txq, mp); if (__predict_false(err)) { /* no room - bail out */ if (err == ENOBUFS) break; - consumed++; + skipped++; /* we can't send this packet - skip it */ continue; } - consumed++; pkt_sent++; m = *mp; DBG_COUNTER_INC(tx_sent); bytes_sent += m->m_pkthdr.len; mcast_sent += !!(m->m_flags & M_MCAST); - txq_avail = TXQ_AVAIL(txq); - txq->ift_db_pending += (txq->ift_in_use - in_use_prev); - ETHER_BPF_MTAP(ifp, m); if (__predict_false(!(ifp->if_drv_flags & IFF_DRV_RUNNING))) break; - rang = iflib_txd_db_check(ctx, txq, false, in_use_prev); + ETHER_BPF_MTAP(ifp, m); + rang = iflib_txd_db_check(txq, false); } /* deliberate use of bitwise or to avoid gratuitous short-circuit */ - ring = rang ? false : (iflib_min_tx_latency | err) || (TXQ_AVAIL(txq) < MAX_TX_DESC(ctx)); - iflib_txd_db_check(ctx, txq, ring, txq->ift_in_use); + ring = rang ? false : (iflib_min_tx_latency | err); + iflib_txd_db_check(txq, ring); if_inc_counter(ifp, IFCOUNTER_OBYTES, bytes_sent); if_inc_counter(ifp, IFCOUNTER_OPACKETS, pkt_sent); if (mcast_sent) if_inc_counter(ifp, IFCOUNTER_OMCASTS, mcast_sent); #ifdef INVARIANTS if (iflib_verbose_debug) - printf("consumed=%d\n", consumed); + printf("consumed=%d\n", skipped + pkt_sent); #endif - return (consumed); + return (skipped + pkt_sent); } static uint32_t iflib_txq_drain_always(struct ifmp_ring *r) { return (1); } static uint32_t iflib_txq_drain_free(struct ifmp_ring *r, uint32_t cidx, uint32_t pidx) { int i, avail; struct mbuf **mp; iflib_txq_t txq; txq = r->cookie; txq->ift_qstatus = IFLIB_QUEUE_IDLE; CALLOUT_LOCK(txq); callout_stop(&txq->ift_timer); CALLOUT_UNLOCK(txq); avail = IDXDIFF(pidx, cidx, r->size); for (i = 0; i < avail; i++) { mp = _ring_peek_one(r, cidx, i, avail - i); if (__predict_false(*mp == (struct mbuf *)txq)) continue; m_freem(*mp); DBG_COUNTER_INC(tx_frees); } MPASS(ifmp_ring_is_stalled(r) == 0); return (avail); } static void iflib_ifmp_purge(iflib_txq_t txq) { struct ifmp_ring *r; r = txq->ift_br; r->drain = iflib_txq_drain_free; r->can_drain = iflib_txq_drain_always; ifmp_ring_check_drainage(r, r->size); r->drain = iflib_txq_drain; r->can_drain = iflib_txq_can_drain; } static void _task_fn_tx(void *context) { iflib_txq_t txq = context; if_ctx_t ctx = txq->ift_ctx; if_t ifp = ctx->ifc_ifp; int abdicate = ctx->ifc_sysctl_tx_abdicate; #ifdef IFLIB_DIAGNOSTICS txq->ift_cpu_exec_count[curcpu]++; #endif if (!(if_getdrvflags(ifp) & IFF_DRV_RUNNING)) return; #ifdef DEV_NETMAP if ((if_getcapenable(ifp) & IFCAP_NETMAP) && netmap_tx_irq(ifp, txq->ift_id)) goto skip_ifmp; #endif #ifdef ALTQ if (ALTQ_IS_ENABLED(&ifp->if_snd)) iflib_altq_if_start(ifp); #endif if (txq->ift_db_pending) ifmp_ring_enqueue(txq->ift_br, (void **)&txq, 1, TX_BATCH_SIZE, abdicate); else if (!abdicate) ifmp_ring_check_drainage(txq->ift_br, TX_BATCH_SIZE); /* * When abdicating, we always need to check drainage, not just when we don't enqueue */ if (abdicate) ifmp_ring_check_drainage(txq->ift_br, TX_BATCH_SIZE); #ifdef DEV_NETMAP skip_ifmp: #endif if (ctx->ifc_flags & IFC_LEGACY) IFDI_INTR_ENABLE(ctx); else IFDI_TX_QUEUE_INTR_ENABLE(ctx, txq->ift_id); } static void _task_fn_rx(void *context) { iflib_rxq_t rxq = context; if_ctx_t ctx = rxq->ifr_ctx; uint8_t more; uint16_t budget; #ifdef DEV_NETMAP u_int work = 0; int nmirq; #endif #ifdef IFLIB_DIAGNOSTICS rxq->ifr_cpu_exec_count[curcpu]++; #endif DBG_COUNTER_INC(task_fn_rxs); if (__predict_false(!(if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING))) return; #ifdef DEV_NETMAP nmirq = netmap_rx_irq(ctx->ifc_ifp, rxq->ifr_id, &work); if (nmirq != NM_IRQ_PASS) { more = (nmirq == NM_IRQ_RESCHED) ? IFLIB_RXEOF_MORE : 0; goto skip_rxeof; } #endif budget = ctx->ifc_sysctl_rx_budget; if (budget == 0) budget = 16; /* XXX */ more = iflib_rxeof(rxq, budget); #ifdef DEV_NETMAP skip_rxeof: #endif if ((more & IFLIB_RXEOF_MORE) == 0) { if (ctx->ifc_flags & IFC_LEGACY) IFDI_INTR_ENABLE(ctx); else IFDI_RX_QUEUE_INTR_ENABLE(ctx, rxq->ifr_id); DBG_COUNTER_INC(rx_intr_enables); } if (__predict_false(!(if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING))) return; if (more & IFLIB_RXEOF_MORE) GROUPTASK_ENQUEUE(&rxq->ifr_task); else if (more & IFLIB_RXEOF_EMPTY) callout_reset_curcpu(&rxq->ifr_watchdog, 1, &_task_fn_rx_watchdog, rxq); } static void _task_fn_admin(void *context) { if_ctx_t ctx = context; if_softc_ctx_t sctx = &ctx->ifc_softc_ctx; iflib_txq_t txq; int i; bool oactive, running, do_reset, do_watchdog, in_detach; STATE_LOCK(ctx); running = (if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING); oactive = (if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_OACTIVE); do_reset = (ctx->ifc_flags & IFC_DO_RESET); do_watchdog = (ctx->ifc_flags & IFC_DO_WATCHDOG); in_detach = (ctx->ifc_flags & IFC_IN_DETACH); ctx->ifc_flags &= ~(IFC_DO_RESET|IFC_DO_WATCHDOG); STATE_UNLOCK(ctx); if ((!running && !oactive) && !(ctx->ifc_sctx->isc_flags & IFLIB_ADMIN_ALWAYS_RUN)) return; if (in_detach) return; CTX_LOCK(ctx); for (txq = ctx->ifc_txqs, i = 0; i < sctx->isc_ntxqsets; i++, txq++) { CALLOUT_LOCK(txq); callout_stop(&txq->ift_timer); CALLOUT_UNLOCK(txq); } if (do_watchdog) { ctx->ifc_watchdog_events++; IFDI_WATCHDOG_RESET(ctx); } IFDI_UPDATE_ADMIN_STATUS(ctx); for (txq = ctx->ifc_txqs, i = 0; i < sctx->isc_ntxqsets; i++, txq++) { - callout_reset_on(&txq->ift_timer, hz / 2, iflib_timer, txq, + callout_reset_on(&txq->ift_timer, iflib_timer_default, iflib_timer, txq, txq->ift_timer.c_cpu); } IFDI_LINK_INTR_ENABLE(ctx); if (do_reset) iflib_if_init_locked(ctx); CTX_UNLOCK(ctx); if (LINK_ACTIVE(ctx) == 0) return; for (txq = ctx->ifc_txqs, i = 0; i < sctx->isc_ntxqsets; i++, txq++) iflib_txq_check_drain(txq, IFLIB_RESTART_BUDGET); } static void _task_fn_iov(void *context) { if_ctx_t ctx = context; if (!(if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING) && !(ctx->ifc_sctx->isc_flags & IFLIB_ADMIN_ALWAYS_RUN)) return; CTX_LOCK(ctx); IFDI_VFLR_HANDLE(ctx); CTX_UNLOCK(ctx); } static int iflib_sysctl_int_delay(SYSCTL_HANDLER_ARGS) { int err; if_int_delay_info_t info; if_ctx_t ctx; info = (if_int_delay_info_t)arg1; ctx = info->iidi_ctx; info->iidi_req = req; info->iidi_oidp = oidp; CTX_LOCK(ctx); err = IFDI_SYSCTL_INT_DELAY(ctx, info); CTX_UNLOCK(ctx); return (err); } /********************************************************************* * * IFNET FUNCTIONS * **********************************************************************/ static void iflib_if_init_locked(if_ctx_t ctx) { iflib_stop(ctx); iflib_init_locked(ctx); } static void iflib_if_init(void *arg) { if_ctx_t ctx = arg; CTX_LOCK(ctx); iflib_if_init_locked(ctx); CTX_UNLOCK(ctx); } static int iflib_if_transmit(if_t ifp, struct mbuf *m) { if_ctx_t ctx = if_getsoftc(ifp); iflib_txq_t txq; int err, qidx; int abdicate = ctx->ifc_sysctl_tx_abdicate; if (__predict_false((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || !LINK_ACTIVE(ctx))) { DBG_COUNTER_INC(tx_frees); m_freem(m); return (ENETDOWN); } MPASS(m->m_nextpkt == NULL); /* ALTQ-enabled interfaces always use queue 0. */ qidx = 0; if ((NTXQSETS(ctx) > 1) && M_HASHTYPE_GET(m) && !ALTQ_IS_ENABLED(&ifp->if_snd)) qidx = QIDX(ctx, m); /* * XXX calculate buf_ring based on flowid (divvy up bits?) */ txq = &ctx->ifc_txqs[qidx]; #ifdef DRIVER_BACKPRESSURE if (txq->ift_closed) { while (m != NULL) { next = m->m_nextpkt; m->m_nextpkt = NULL; m_freem(m); DBG_COUNTER_INC(tx_frees); m = next; } return (ENOBUFS); } #endif #ifdef notyet qidx = count = 0; mp = marr; next = m; do { count++; next = next->m_nextpkt; } while (next != NULL); if (count > nitems(marr)) if ((mp = malloc(count*sizeof(struct mbuf *), M_IFLIB, M_NOWAIT)) == NULL) { /* XXX check nextpkt */ m_freem(m); /* XXX simplify for now */ DBG_COUNTER_INC(tx_frees); return (ENOBUFS); } for (next = m, i = 0; next != NULL; i++) { mp[i] = next; next = next->m_nextpkt; mp[i]->m_nextpkt = NULL; } #endif DBG_COUNTER_INC(tx_seen); err = ifmp_ring_enqueue(txq->ift_br, (void **)&m, 1, TX_BATCH_SIZE, abdicate); if (abdicate) GROUPTASK_ENQUEUE(&txq->ift_task); if (err) { if (!abdicate) GROUPTASK_ENQUEUE(&txq->ift_task); /* support forthcoming later */ #ifdef DRIVER_BACKPRESSURE txq->ift_closed = TRUE; #endif ifmp_ring_check_drainage(txq->ift_br, TX_BATCH_SIZE); m_freem(m); DBG_COUNTER_INC(tx_frees); } return (err); } #ifdef ALTQ /* * The overall approach to integrating iflib with ALTQ is to continue to use * the iflib mp_ring machinery between the ALTQ queue(s) and the hardware * ring. Technically, when using ALTQ, queueing to an intermediate mp_ring * is redundant/unnecessary, but doing so minimizes the amount of * ALTQ-specific code required in iflib. It is assumed that the overhead of * redundantly queueing to an intermediate mp_ring is swamped by the * performance limitations inherent in using ALTQ. * * When ALTQ support is compiled in, all iflib drivers will use a transmit * routine, iflib_altq_if_transmit(), that checks if ALTQ is enabled for the * given interface. If ALTQ is enabled for an interface, then all * transmitted packets for that interface will be submitted to the ALTQ * subsystem via IFQ_ENQUEUE(). We don't use the legacy if_transmit() * implementation because it uses IFQ_HANDOFF(), which will duplicatively * update stats that the iflib machinery handles, and which is sensitve to * the disused IFF_DRV_OACTIVE flag. Additionally, iflib_altq_if_start() * will be installed as the start routine for use by ALTQ facilities that * need to trigger queue drains on a scheduled basis. * */ static void iflib_altq_if_start(if_t ifp) { struct ifaltq *ifq = &ifp->if_snd; struct mbuf *m; IFQ_LOCK(ifq); IFQ_DEQUEUE_NOLOCK(ifq, m); while (m != NULL) { iflib_if_transmit(ifp, m); IFQ_DEQUEUE_NOLOCK(ifq, m); } IFQ_UNLOCK(ifq); } static int iflib_altq_if_transmit(if_t ifp, struct mbuf *m) { int err; if (ALTQ_IS_ENABLED(&ifp->if_snd)) { IFQ_ENQUEUE(&ifp->if_snd, m, err); if (err == 0) iflib_altq_if_start(ifp); } else err = iflib_if_transmit(ifp, m); return (err); } #endif /* ALTQ */ static void iflib_if_qflush(if_t ifp) { if_ctx_t ctx = if_getsoftc(ifp); iflib_txq_t txq = ctx->ifc_txqs; int i; STATE_LOCK(ctx); ctx->ifc_flags |= IFC_QFLUSH; STATE_UNLOCK(ctx); for (i = 0; i < NTXQSETS(ctx); i++, txq++) while (!(ifmp_ring_is_idle(txq->ift_br) || ifmp_ring_is_stalled(txq->ift_br))) iflib_txq_check_drain(txq, 0); STATE_LOCK(ctx); ctx->ifc_flags &= ~IFC_QFLUSH; STATE_UNLOCK(ctx); /* * When ALTQ is enabled, this will also take care of purging the * ALTQ queue(s). */ if_qflush(ifp); } #define IFCAP_FLAGS (IFCAP_HWCSUM_IPV6 | IFCAP_HWCSUM | IFCAP_LRO | \ IFCAP_TSO | IFCAP_VLAN_HWTAGGING | IFCAP_HWSTATS | \ IFCAP_VLAN_MTU | IFCAP_VLAN_HWFILTER | \ IFCAP_VLAN_HWTSO | IFCAP_VLAN_HWCSUM | IFCAP_NOMAP) static int iflib_if_ioctl(if_t ifp, u_long command, caddr_t data) { if_ctx_t ctx = if_getsoftc(ifp); struct ifreq *ifr = (struct ifreq *)data; #if defined(INET) || defined(INET6) struct ifaddr *ifa = (struct ifaddr *)data; #endif bool avoid_reset = false; int err = 0, reinit = 0, bits; switch (command) { case SIOCSIFADDR: #ifdef INET if (ifa->ifa_addr->sa_family == AF_INET) avoid_reset = true; #endif #ifdef INET6 if (ifa->ifa_addr->sa_family == AF_INET6) avoid_reset = true; #endif /* ** Calling init results in link renegotiation, ** so we avoid doing it when possible. */ if (avoid_reset) { if_setflagbits(ifp, IFF_UP,0); if (!(if_getdrvflags(ifp) & IFF_DRV_RUNNING)) reinit = 1; #ifdef INET if (!(if_getflags(ifp) & IFF_NOARP)) arp_ifinit(ifp, ifa); #endif } else err = ether_ioctl(ifp, command, data); break; case SIOCSIFMTU: CTX_LOCK(ctx); if (ifr->ifr_mtu == if_getmtu(ifp)) { CTX_UNLOCK(ctx); break; } bits = if_getdrvflags(ifp); /* stop the driver and free any clusters before proceeding */ iflib_stop(ctx); if ((err = IFDI_MTU_SET(ctx, ifr->ifr_mtu)) == 0) { STATE_LOCK(ctx); if (ifr->ifr_mtu > ctx->ifc_max_fl_buf_size) ctx->ifc_flags |= IFC_MULTISEG; else ctx->ifc_flags &= ~IFC_MULTISEG; STATE_UNLOCK(ctx); err = if_setmtu(ifp, ifr->ifr_mtu); } iflib_init_locked(ctx); STATE_LOCK(ctx); if_setdrvflags(ifp, bits); STATE_UNLOCK(ctx); CTX_UNLOCK(ctx); break; case SIOCSIFFLAGS: CTX_LOCK(ctx); if (if_getflags(ifp) & IFF_UP) { if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) { if ((if_getflags(ifp) ^ ctx->ifc_if_flags) & (IFF_PROMISC | IFF_ALLMULTI)) { CTX_UNLOCK(ctx); err = IFDI_PROMISC_SET(ctx, if_getflags(ifp)); CTX_LOCK(ctx); } } else reinit = 1; } else if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) { iflib_stop(ctx); } ctx->ifc_if_flags = if_getflags(ifp); CTX_UNLOCK(ctx); break; case SIOCADDMULTI: case SIOCDELMULTI: if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) { CTX_LOCK(ctx); IFDI_INTR_DISABLE(ctx); IFDI_MULTI_SET(ctx); IFDI_INTR_ENABLE(ctx); CTX_UNLOCK(ctx); } break; case SIOCSIFMEDIA: CTX_LOCK(ctx); IFDI_MEDIA_SET(ctx); CTX_UNLOCK(ctx); /* FALLTHROUGH */ case SIOCGIFMEDIA: case SIOCGIFXMEDIA: err = ifmedia_ioctl(ifp, ifr, ctx->ifc_mediap, command); break; case SIOCGI2C: { struct ifi2creq i2c; err = copyin(ifr_data_get_ptr(ifr), &i2c, sizeof(i2c)); if (err != 0) break; if (i2c.dev_addr != 0xA0 && i2c.dev_addr != 0xA2) { err = EINVAL; break; } if (i2c.len > sizeof(i2c.data)) { err = EINVAL; break; } if ((err = IFDI_I2C_REQ(ctx, &i2c)) == 0) err = copyout(&i2c, ifr_data_get_ptr(ifr), sizeof(i2c)); break; } case SIOCSIFCAP: { int mask, setmask, oldmask; oldmask = if_getcapenable(ifp); mask = ifr->ifr_reqcap ^ oldmask; mask &= ctx->ifc_softc_ctx.isc_capabilities | IFCAP_NOMAP; setmask = 0; #ifdef TCP_OFFLOAD setmask |= mask & (IFCAP_TOE4|IFCAP_TOE6); #endif setmask |= (mask & IFCAP_FLAGS); setmask |= (mask & IFCAP_WOL); /* * If any RX csum has changed, change all the ones that * are supported by the driver. */ if (setmask & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6)) { setmask |= ctx->ifc_softc_ctx.isc_capabilities & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6); } /* * want to ensure that traffic has stopped before we change any of the flags */ if (setmask) { CTX_LOCK(ctx); bits = if_getdrvflags(ifp); if (bits & IFF_DRV_RUNNING && setmask & ~IFCAP_WOL) iflib_stop(ctx); STATE_LOCK(ctx); if_togglecapenable(ifp, setmask); STATE_UNLOCK(ctx); if (bits & IFF_DRV_RUNNING && setmask & ~IFCAP_WOL) iflib_init_locked(ctx); STATE_LOCK(ctx); if_setdrvflags(ifp, bits); STATE_UNLOCK(ctx); CTX_UNLOCK(ctx); } if_vlancap(ifp); break; } case SIOCGPRIVATE_0: case SIOCSDRVSPEC: case SIOCGDRVSPEC: CTX_LOCK(ctx); err = IFDI_PRIV_IOCTL(ctx, command, data); CTX_UNLOCK(ctx); break; default: err = ether_ioctl(ifp, command, data); break; } if (reinit) iflib_if_init(ctx); return (err); } static uint64_t iflib_if_get_counter(if_t ifp, ift_counter cnt) { if_ctx_t ctx = if_getsoftc(ifp); return (IFDI_GET_COUNTER(ctx, cnt)); } /********************************************************************* * * OTHER FUNCTIONS EXPORTED TO THE STACK * **********************************************************************/ static void iflib_vlan_register(void *arg, if_t ifp, uint16_t vtag) { if_ctx_t ctx = if_getsoftc(ifp); if ((void *)ctx != arg) return; if ((vtag == 0) || (vtag > 4095)) return; if (iflib_in_detach(ctx)) return; CTX_LOCK(ctx); /* Driver may need all untagged packets to be flushed */ if (IFDI_NEEDS_RESTART(ctx, IFLIB_RESTART_VLAN_CONFIG)) iflib_stop(ctx); IFDI_VLAN_REGISTER(ctx, vtag); /* Re-init to load the changes, if required */ if (IFDI_NEEDS_RESTART(ctx, IFLIB_RESTART_VLAN_CONFIG)) iflib_init_locked(ctx); CTX_UNLOCK(ctx); } static void iflib_vlan_unregister(void *arg, if_t ifp, uint16_t vtag) { if_ctx_t ctx = if_getsoftc(ifp); if ((void *)ctx != arg) return; if ((vtag == 0) || (vtag > 4095)) return; CTX_LOCK(ctx); /* Driver may need all tagged packets to be flushed */ if (IFDI_NEEDS_RESTART(ctx, IFLIB_RESTART_VLAN_CONFIG)) iflib_stop(ctx); IFDI_VLAN_UNREGISTER(ctx, vtag); /* Re-init to load the changes, if required */ if (IFDI_NEEDS_RESTART(ctx, IFLIB_RESTART_VLAN_CONFIG)) iflib_init_locked(ctx); CTX_UNLOCK(ctx); } static void iflib_led_func(void *arg, int onoff) { if_ctx_t ctx = arg; CTX_LOCK(ctx); IFDI_LED_FUNC(ctx, onoff); CTX_UNLOCK(ctx); } /********************************************************************* * * BUS FUNCTION DEFINITIONS * **********************************************************************/ int iflib_device_probe(device_t dev) { const pci_vendor_info_t *ent; if_shared_ctx_t sctx; uint16_t pci_device_id, pci_rev_id, pci_subdevice_id, pci_subvendor_id; uint16_t pci_vendor_id; if ((sctx = DEVICE_REGISTER(dev)) == NULL || sctx->isc_magic != IFLIB_MAGIC) return (ENOTSUP); pci_vendor_id = pci_get_vendor(dev); pci_device_id = pci_get_device(dev); pci_subvendor_id = pci_get_subvendor(dev); pci_subdevice_id = pci_get_subdevice(dev); pci_rev_id = pci_get_revid(dev); if (sctx->isc_parse_devinfo != NULL) sctx->isc_parse_devinfo(&pci_device_id, &pci_subvendor_id, &pci_subdevice_id, &pci_rev_id); ent = sctx->isc_vendor_info; while (ent->pvi_vendor_id != 0) { if (pci_vendor_id != ent->pvi_vendor_id) { ent++; continue; } if ((pci_device_id == ent->pvi_device_id) && ((pci_subvendor_id == ent->pvi_subvendor_id) || (ent->pvi_subvendor_id == 0)) && ((pci_subdevice_id == ent->pvi_subdevice_id) || (ent->pvi_subdevice_id == 0)) && ((pci_rev_id == ent->pvi_rev_id) || (ent->pvi_rev_id == 0))) { device_set_desc_copy(dev, ent->pvi_name); /* this needs to be changed to zero if the bus probing code * ever stops re-probing on best match because the sctx * may have its values over written by register calls * in subsequent probes */ return (BUS_PROBE_DEFAULT); } ent++; } return (ENXIO); } int iflib_device_probe_vendor(device_t dev) { int probe; probe = iflib_device_probe(dev); if (probe == BUS_PROBE_DEFAULT) return (BUS_PROBE_VENDOR); else return (probe); } static void iflib_reset_qvalues(if_ctx_t ctx) { if_softc_ctx_t scctx = &ctx->ifc_softc_ctx; if_shared_ctx_t sctx = ctx->ifc_sctx; device_t dev = ctx->ifc_dev; int i; if (ctx->ifc_sysctl_ntxqs != 0) scctx->isc_ntxqsets = ctx->ifc_sysctl_ntxqs; if (ctx->ifc_sysctl_nrxqs != 0) scctx->isc_nrxqsets = ctx->ifc_sysctl_nrxqs; for (i = 0; i < sctx->isc_ntxqs; i++) { if (ctx->ifc_sysctl_ntxds[i] != 0) scctx->isc_ntxd[i] = ctx->ifc_sysctl_ntxds[i]; else scctx->isc_ntxd[i] = sctx->isc_ntxd_default[i]; } for (i = 0; i < sctx->isc_nrxqs; i++) { if (ctx->ifc_sysctl_nrxds[i] != 0) scctx->isc_nrxd[i] = ctx->ifc_sysctl_nrxds[i]; else scctx->isc_nrxd[i] = sctx->isc_nrxd_default[i]; } for (i = 0; i < sctx->isc_nrxqs; i++) { if (scctx->isc_nrxd[i] < sctx->isc_nrxd_min[i]) { device_printf(dev, "nrxd%d: %d less than nrxd_min %d - resetting to min\n", i, scctx->isc_nrxd[i], sctx->isc_nrxd_min[i]); scctx->isc_nrxd[i] = sctx->isc_nrxd_min[i]; } if (scctx->isc_nrxd[i] > sctx->isc_nrxd_max[i]) { device_printf(dev, "nrxd%d: %d greater than nrxd_max %d - resetting to max\n", i, scctx->isc_nrxd[i], sctx->isc_nrxd_max[i]); scctx->isc_nrxd[i] = sctx->isc_nrxd_max[i]; } if (!powerof2(scctx->isc_nrxd[i])) { device_printf(dev, "nrxd%d: %d is not a power of 2 - using default value of %d\n", i, scctx->isc_nrxd[i], sctx->isc_nrxd_default[i]); scctx->isc_nrxd[i] = sctx->isc_nrxd_default[i]; } } for (i = 0; i < sctx->isc_ntxqs; i++) { if (scctx->isc_ntxd[i] < sctx->isc_ntxd_min[i]) { device_printf(dev, "ntxd%d: %d less than ntxd_min %d - resetting to min\n", i, scctx->isc_ntxd[i], sctx->isc_ntxd_min[i]); scctx->isc_ntxd[i] = sctx->isc_ntxd_min[i]; } if (scctx->isc_ntxd[i] > sctx->isc_ntxd_max[i]) { device_printf(dev, "ntxd%d: %d greater than ntxd_max %d - resetting to max\n", i, scctx->isc_ntxd[i], sctx->isc_ntxd_max[i]); scctx->isc_ntxd[i] = sctx->isc_ntxd_max[i]; } if (!powerof2(scctx->isc_ntxd[i])) { device_printf(dev, "ntxd%d: %d is not a power of 2 - using default value of %d\n", i, scctx->isc_ntxd[i], sctx->isc_ntxd_default[i]); scctx->isc_ntxd[i] = sctx->isc_ntxd_default[i]; } } } static void iflib_add_pfil(if_ctx_t ctx) { struct pfil_head *pfil; struct pfil_head_args pa; iflib_rxq_t rxq; int i; pa.pa_version = PFIL_VERSION; pa.pa_flags = PFIL_IN; pa.pa_type = PFIL_TYPE_ETHERNET; pa.pa_headname = ctx->ifc_ifp->if_xname; pfil = pfil_head_register(&pa); for (i = 0, rxq = ctx->ifc_rxqs; i < NRXQSETS(ctx); i++, rxq++) { rxq->pfil = pfil; } } static void iflib_rem_pfil(if_ctx_t ctx) { struct pfil_head *pfil; iflib_rxq_t rxq; int i; rxq = ctx->ifc_rxqs; pfil = rxq->pfil; for (i = 0; i < NRXQSETS(ctx); i++, rxq++) { rxq->pfil = NULL; } pfil_head_unregister(pfil); } static uint16_t get_ctx_core_offset(if_ctx_t ctx) { if_softc_ctx_t scctx = &ctx->ifc_softc_ctx; struct cpu_offset *op; uint16_t qc; uint16_t ret = ctx->ifc_sysctl_core_offset; if (ret != CORE_OFFSET_UNSPECIFIED) return (ret); if (ctx->ifc_sysctl_separate_txrx) qc = scctx->isc_ntxqsets + scctx->isc_nrxqsets; else qc = max(scctx->isc_ntxqsets, scctx->isc_nrxqsets); mtx_lock(&cpu_offset_mtx); SLIST_FOREACH(op, &cpu_offsets, entries) { if (CPU_CMP(&ctx->ifc_cpus, &op->set) == 0) { ret = op->offset; op->offset += qc; MPASS(op->refcount < UINT_MAX); op->refcount++; break; } } if (ret == CORE_OFFSET_UNSPECIFIED) { ret = 0; op = malloc(sizeof(struct cpu_offset), M_IFLIB, M_NOWAIT | M_ZERO); if (op == NULL) { device_printf(ctx->ifc_dev, "allocation for cpu offset failed.\n"); } else { op->offset = qc; op->refcount = 1; CPU_COPY(&ctx->ifc_cpus, &op->set); SLIST_INSERT_HEAD(&cpu_offsets, op, entries); } } mtx_unlock(&cpu_offset_mtx); return (ret); } static void unref_ctx_core_offset(if_ctx_t ctx) { struct cpu_offset *op, *top; mtx_lock(&cpu_offset_mtx); SLIST_FOREACH_SAFE(op, &cpu_offsets, entries, top) { if (CPU_CMP(&ctx->ifc_cpus, &op->set) == 0) { MPASS(op->refcount > 0); op->refcount--; if (op->refcount == 0) { SLIST_REMOVE(&cpu_offsets, op, cpu_offset, entries); free(op, M_IFLIB); } break; } } mtx_unlock(&cpu_offset_mtx); } int iflib_device_register(device_t dev, void *sc, if_shared_ctx_t sctx, if_ctx_t *ctxp) { if_ctx_t ctx; if_t ifp; if_softc_ctx_t scctx; kobjop_desc_t kobj_desc; kobj_method_t *kobj_method; int err, msix, rid; int num_txd, num_rxd; ctx = malloc(sizeof(* ctx), M_IFLIB, M_WAITOK|M_ZERO); if (sc == NULL) { sc = malloc(sctx->isc_driver->size, M_IFLIB, M_WAITOK|M_ZERO); device_set_softc(dev, ctx); ctx->ifc_flags |= IFC_SC_ALLOCATED; } ctx->ifc_sctx = sctx; ctx->ifc_dev = dev; ctx->ifc_softc = sc; if ((err = iflib_register(ctx)) != 0) { device_printf(dev, "iflib_register failed %d\n", err); goto fail_ctx_free; } iflib_add_device_sysctl_pre(ctx); scctx = &ctx->ifc_softc_ctx; ifp = ctx->ifc_ifp; iflib_reset_qvalues(ctx); CTX_LOCK(ctx); if ((err = IFDI_ATTACH_PRE(ctx)) != 0) { device_printf(dev, "IFDI_ATTACH_PRE failed %d\n", err); goto fail_unlock; } _iflib_pre_assert(scctx); ctx->ifc_txrx = *scctx->isc_txrx; if (sctx->isc_flags & IFLIB_DRIVER_MEDIA) ctx->ifc_mediap = scctx->isc_media; #ifdef INVARIANTS if (scctx->isc_capabilities & IFCAP_TXCSUM) MPASS(scctx->isc_tx_csum_flags); #endif if_setcapabilities(ifp, scctx->isc_capabilities | IFCAP_HWSTATS | IFCAP_NOMAP); if_setcapenable(ifp, scctx->isc_capenable | IFCAP_HWSTATS | IFCAP_NOMAP); if (scctx->isc_ntxqsets == 0 || (scctx->isc_ntxqsets_max && scctx->isc_ntxqsets_max < scctx->isc_ntxqsets)) scctx->isc_ntxqsets = scctx->isc_ntxqsets_max; if (scctx->isc_nrxqsets == 0 || (scctx->isc_nrxqsets_max && scctx->isc_nrxqsets_max < scctx->isc_nrxqsets)) scctx->isc_nrxqsets = scctx->isc_nrxqsets_max; num_txd = iflib_num_tx_descs(ctx); num_rxd = iflib_num_rx_descs(ctx); /* XXX change for per-queue sizes */ device_printf(dev, "Using %d TX descriptors and %d RX descriptors\n", num_txd, num_rxd); if (scctx->isc_tx_nsegments > num_txd / MAX_SINGLE_PACKET_FRACTION) scctx->isc_tx_nsegments = max(1, num_txd / MAX_SINGLE_PACKET_FRACTION); if (scctx->isc_tx_tso_segments_max > num_txd / MAX_SINGLE_PACKET_FRACTION) scctx->isc_tx_tso_segments_max = max(1, num_txd / MAX_SINGLE_PACKET_FRACTION); /* TSO parameters - dig these out of the data sheet - simply correspond to tag setup */ if (if_getcapabilities(ifp) & IFCAP_TSO) { /* * The stack can't handle a TSO size larger than IP_MAXPACKET, * but some MACs do. */ if_sethwtsomax(ifp, min(scctx->isc_tx_tso_size_max, IP_MAXPACKET)); /* * Take maximum number of m_pullup(9)'s in iflib_parse_header() * into account. In the worst case, each of these calls will * add another mbuf and, thus, the requirement for another DMA * segment. So for best performance, it doesn't make sense to * advertize a maximum of TSO segments that typically will * require defragmentation in iflib_encap(). */ if_sethwtsomaxsegcount(ifp, scctx->isc_tx_tso_segments_max - 3); if_sethwtsomaxsegsize(ifp, scctx->isc_tx_tso_segsize_max); } if (scctx->isc_rss_table_size == 0) scctx->isc_rss_table_size = 64; scctx->isc_rss_table_mask = scctx->isc_rss_table_size-1; GROUPTASK_INIT(&ctx->ifc_admin_task, 0, _task_fn_admin, ctx); /* XXX format name */ taskqgroup_attach(qgroup_if_config_tqg, &ctx->ifc_admin_task, ctx, NULL, NULL, "admin"); /* Set up cpu set. If it fails, use the set of all CPUs. */ if (bus_get_cpus(dev, INTR_CPUS, sizeof(ctx->ifc_cpus), &ctx->ifc_cpus) != 0) { device_printf(dev, "Unable to fetch CPU list\n"); CPU_COPY(&all_cpus, &ctx->ifc_cpus); } MPASS(CPU_COUNT(&ctx->ifc_cpus) > 0); /* ** Now set up MSI or MSI-X, should return us the number of supported ** vectors (will be 1 for a legacy interrupt and MSI). */ if (sctx->isc_flags & IFLIB_SKIP_MSIX) { msix = scctx->isc_vectors; } else if (scctx->isc_msix_bar != 0) /* * The simple fact that isc_msix_bar is not 0 does not mean we * we have a good value there that is known to work. */ msix = iflib_msix_init(ctx); else { scctx->isc_vectors = 1; scctx->isc_ntxqsets = 1; scctx->isc_nrxqsets = 1; scctx->isc_intr = IFLIB_INTR_LEGACY; msix = 0; } /* Get memory for the station queues */ if ((err = iflib_queues_alloc(ctx))) { device_printf(dev, "Unable to allocate queue memory\n"); goto fail_intr_free; } if ((err = iflib_qset_structures_setup(ctx))) goto fail_queues; /* * Now that we know how many queues there are, get the core offset. */ ctx->ifc_sysctl_core_offset = get_ctx_core_offset(ctx); if (msix > 1) { /* * When using MSI-X, ensure that ifdi_{r,t}x_queue_intr_enable * aren't the default NULL implementation. */ kobj_desc = &ifdi_rx_queue_intr_enable_desc; kobj_method = kobj_lookup_method(((kobj_t)ctx)->ops->cls, NULL, kobj_desc); if (kobj_method == &kobj_desc->deflt) { device_printf(dev, "MSI-X requires ifdi_rx_queue_intr_enable method"); err = EOPNOTSUPP; goto fail_queues; } kobj_desc = &ifdi_tx_queue_intr_enable_desc; kobj_method = kobj_lookup_method(((kobj_t)ctx)->ops->cls, NULL, kobj_desc); if (kobj_method == &kobj_desc->deflt) { device_printf(dev, "MSI-X requires ifdi_tx_queue_intr_enable method"); err = EOPNOTSUPP; goto fail_queues; } /* * Assign the MSI-X vectors. * Note that the default NULL ifdi_msix_intr_assign method will * fail here, too. */ err = IFDI_MSIX_INTR_ASSIGN(ctx, msix); if (err != 0) { device_printf(dev, "IFDI_MSIX_INTR_ASSIGN failed %d\n", err); goto fail_queues; } } else if (scctx->isc_intr != IFLIB_INTR_MSIX) { rid = 0; if (scctx->isc_intr == IFLIB_INTR_MSI) { MPASS(msix == 1); rid = 1; } if ((err = iflib_legacy_setup(ctx, ctx->isc_legacy_intr, ctx->ifc_softc, &rid, "irq0")) != 0) { device_printf(dev, "iflib_legacy_setup failed %d\n", err); goto fail_queues; } } else { device_printf(dev, "Cannot use iflib with only 1 MSI-X interrupt!\n"); err = ENODEV; goto fail_intr_free; } ether_ifattach(ctx->ifc_ifp, ctx->ifc_mac.octet); if ((err = IFDI_ATTACH_POST(ctx)) != 0) { device_printf(dev, "IFDI_ATTACH_POST failed %d\n", err); goto fail_detach; } /* * Tell the upper layer(s) if IFCAP_VLAN_MTU is supported. * This must appear after the call to ether_ifattach() because * ether_ifattach() sets if_hdrlen to the default value. */ if (if_getcapabilities(ifp) & IFCAP_VLAN_MTU) if_setifheaderlen(ifp, sizeof(struct ether_vlan_header)); if ((err = iflib_netmap_attach(ctx))) { device_printf(ctx->ifc_dev, "netmap attach failed: %d\n", err); goto fail_detach; } *ctxp = ctx; DEBUGNET_SET(ctx->ifc_ifp, iflib); if_setgetcounterfn(ctx->ifc_ifp, iflib_if_get_counter); iflib_add_device_sysctl_post(ctx); iflib_add_pfil(ctx); ctx->ifc_flags |= IFC_INIT_DONE; CTX_UNLOCK(ctx); return (0); fail_detach: ether_ifdetach(ctx->ifc_ifp); fail_intr_free: iflib_free_intr_mem(ctx); fail_queues: iflib_tx_structures_free(ctx); iflib_rx_structures_free(ctx); iflib_tqg_detach(ctx); IFDI_DETACH(ctx); fail_unlock: CTX_UNLOCK(ctx); iflib_deregister(ctx); fail_ctx_free: device_set_softc(ctx->ifc_dev, NULL); if (ctx->ifc_flags & IFC_SC_ALLOCATED) free(ctx->ifc_softc, M_IFLIB); free(ctx, M_IFLIB); return (err); } int iflib_pseudo_register(device_t dev, if_shared_ctx_t sctx, if_ctx_t *ctxp, struct iflib_cloneattach_ctx *clctx) { int num_txd, num_rxd; int err; if_ctx_t ctx; if_t ifp; if_softc_ctx_t scctx; int i; void *sc; ctx = malloc(sizeof(*ctx), M_IFLIB, M_WAITOK|M_ZERO); sc = malloc(sctx->isc_driver->size, M_IFLIB, M_WAITOK|M_ZERO); ctx->ifc_flags |= IFC_SC_ALLOCATED; if (sctx->isc_flags & (IFLIB_PSEUDO|IFLIB_VIRTUAL)) ctx->ifc_flags |= IFC_PSEUDO; ctx->ifc_sctx = sctx; ctx->ifc_softc = sc; ctx->ifc_dev = dev; if ((err = iflib_register(ctx)) != 0) { device_printf(dev, "%s: iflib_register failed %d\n", __func__, err); goto fail_ctx_free; } iflib_add_device_sysctl_pre(ctx); scctx = &ctx->ifc_softc_ctx; ifp = ctx->ifc_ifp; iflib_reset_qvalues(ctx); CTX_LOCK(ctx); if ((err = IFDI_ATTACH_PRE(ctx)) != 0) { device_printf(dev, "IFDI_ATTACH_PRE failed %d\n", err); goto fail_unlock; } if (sctx->isc_flags & IFLIB_GEN_MAC) ether_gen_addr(ifp, &ctx->ifc_mac); if ((err = IFDI_CLONEATTACH(ctx, clctx->cc_ifc, clctx->cc_name, clctx->cc_params)) != 0) { device_printf(dev, "IFDI_CLONEATTACH failed %d\n", err); goto fail_unlock; } #ifdef INVARIANTS if (scctx->isc_capabilities & IFCAP_TXCSUM) MPASS(scctx->isc_tx_csum_flags); #endif if_setcapabilities(ifp, scctx->isc_capabilities | IFCAP_HWSTATS | IFCAP_LINKSTATE); if_setcapenable(ifp, scctx->isc_capenable | IFCAP_HWSTATS | IFCAP_LINKSTATE); ifp->if_flags |= IFF_NOGROUP; if (sctx->isc_flags & IFLIB_PSEUDO) { ifmedia_add(ctx->ifc_mediap, IFM_ETHER | IFM_AUTO, 0, NULL); ifmedia_set(ctx->ifc_mediap, IFM_ETHER | IFM_AUTO); if (sctx->isc_flags & IFLIB_PSEUDO_ETHER) { ether_ifattach(ctx->ifc_ifp, ctx->ifc_mac.octet); } else { if_attach(ctx->ifc_ifp); bpfattach(ctx->ifc_ifp, DLT_NULL, sizeof(u_int32_t)); } if ((err = IFDI_ATTACH_POST(ctx)) != 0) { device_printf(dev, "IFDI_ATTACH_POST failed %d\n", err); goto fail_detach; } *ctxp = ctx; /* * Tell the upper layer(s) if IFCAP_VLAN_MTU is supported. * This must appear after the call to ether_ifattach() because * ether_ifattach() sets if_hdrlen to the default value. */ if (if_getcapabilities(ifp) & IFCAP_VLAN_MTU) if_setifheaderlen(ifp, sizeof(struct ether_vlan_header)); if_setgetcounterfn(ctx->ifc_ifp, iflib_if_get_counter); iflib_add_device_sysctl_post(ctx); ctx->ifc_flags |= IFC_INIT_DONE; CTX_UNLOCK(ctx); return (0); } ifmedia_add(ctx->ifc_mediap, IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL); ifmedia_add(ctx->ifc_mediap, IFM_ETHER | IFM_AUTO, 0, NULL); ifmedia_set(ctx->ifc_mediap, IFM_ETHER | IFM_AUTO); _iflib_pre_assert(scctx); ctx->ifc_txrx = *scctx->isc_txrx; if (scctx->isc_ntxqsets == 0 || (scctx->isc_ntxqsets_max && scctx->isc_ntxqsets_max < scctx->isc_ntxqsets)) scctx->isc_ntxqsets = scctx->isc_ntxqsets_max; if (scctx->isc_nrxqsets == 0 || (scctx->isc_nrxqsets_max && scctx->isc_nrxqsets_max < scctx->isc_nrxqsets)) scctx->isc_nrxqsets = scctx->isc_nrxqsets_max; num_txd = iflib_num_tx_descs(ctx); num_rxd = iflib_num_rx_descs(ctx); /* XXX change for per-queue sizes */ device_printf(dev, "Using %d TX descriptors and %d RX descriptors\n", num_txd, num_rxd); if (scctx->isc_tx_nsegments > num_txd / MAX_SINGLE_PACKET_FRACTION) scctx->isc_tx_nsegments = max(1, num_txd / MAX_SINGLE_PACKET_FRACTION); if (scctx->isc_tx_tso_segments_max > num_txd / MAX_SINGLE_PACKET_FRACTION) scctx->isc_tx_tso_segments_max = max(1, num_txd / MAX_SINGLE_PACKET_FRACTION); /* TSO parameters - dig these out of the data sheet - simply correspond to tag setup */ if (if_getcapabilities(ifp) & IFCAP_TSO) { /* * The stack can't handle a TSO size larger than IP_MAXPACKET, * but some MACs do. */ if_sethwtsomax(ifp, min(scctx->isc_tx_tso_size_max, IP_MAXPACKET)); /* * Take maximum number of m_pullup(9)'s in iflib_parse_header() * into account. In the worst case, each of these calls will * add another mbuf and, thus, the requirement for another DMA * segment. So for best performance, it doesn't make sense to * advertize a maximum of TSO segments that typically will * require defragmentation in iflib_encap(). */ if_sethwtsomaxsegcount(ifp, scctx->isc_tx_tso_segments_max - 3); if_sethwtsomaxsegsize(ifp, scctx->isc_tx_tso_segsize_max); } if (scctx->isc_rss_table_size == 0) scctx->isc_rss_table_size = 64; scctx->isc_rss_table_mask = scctx->isc_rss_table_size-1; GROUPTASK_INIT(&ctx->ifc_admin_task, 0, _task_fn_admin, ctx); /* XXX format name */ taskqgroup_attach(qgroup_if_config_tqg, &ctx->ifc_admin_task, ctx, NULL, NULL, "admin"); /* XXX --- can support > 1 -- but keep it simple for now */ scctx->isc_intr = IFLIB_INTR_LEGACY; /* Get memory for the station queues */ if ((err = iflib_queues_alloc(ctx))) { device_printf(dev, "Unable to allocate queue memory\n"); goto fail_iflib_detach; } if ((err = iflib_qset_structures_setup(ctx))) { device_printf(dev, "qset structure setup failed %d\n", err); goto fail_queues; } /* * XXX What if anything do we want to do about interrupts? */ ether_ifattach(ctx->ifc_ifp, ctx->ifc_mac.octet); if ((err = IFDI_ATTACH_POST(ctx)) != 0) { device_printf(dev, "IFDI_ATTACH_POST failed %d\n", err); goto fail_detach; } /* * Tell the upper layer(s) if IFCAP_VLAN_MTU is supported. * This must appear after the call to ether_ifattach() because * ether_ifattach() sets if_hdrlen to the default value. */ if (if_getcapabilities(ifp) & IFCAP_VLAN_MTU) if_setifheaderlen(ifp, sizeof(struct ether_vlan_header)); /* XXX handle more than one queue */ for (i = 0; i < scctx->isc_nrxqsets; i++) IFDI_RX_CLSET(ctx, 0, i, ctx->ifc_rxqs[i].ifr_fl[0].ifl_sds.ifsd_cl); *ctxp = ctx; if_setgetcounterfn(ctx->ifc_ifp, iflib_if_get_counter); iflib_add_device_sysctl_post(ctx); ctx->ifc_flags |= IFC_INIT_DONE; CTX_UNLOCK(ctx); return (0); fail_detach: ether_ifdetach(ctx->ifc_ifp); fail_queues: iflib_tx_structures_free(ctx); iflib_rx_structures_free(ctx); iflib_tqg_detach(ctx); fail_iflib_detach: IFDI_DETACH(ctx); fail_unlock: CTX_UNLOCK(ctx); iflib_deregister(ctx); fail_ctx_free: free(ctx->ifc_softc, M_IFLIB); free(ctx, M_IFLIB); return (err); } int iflib_pseudo_deregister(if_ctx_t ctx) { if_t ifp = ctx->ifc_ifp; if_shared_ctx_t sctx = ctx->ifc_sctx; /* Unregister VLAN event handlers early */ iflib_unregister_vlan_handlers(ctx); if ((sctx->isc_flags & IFLIB_PSEUDO) && (sctx->isc_flags & IFLIB_PSEUDO_ETHER) == 0) { bpfdetach(ifp); if_detach(ifp); } else { ether_ifdetach(ifp); } iflib_tqg_detach(ctx); iflib_tx_structures_free(ctx); iflib_rx_structures_free(ctx); iflib_deregister(ctx); if (ctx->ifc_flags & IFC_SC_ALLOCATED) free(ctx->ifc_softc, M_IFLIB); free(ctx, M_IFLIB); return (0); } int iflib_device_attach(device_t dev) { if_ctx_t ctx; if_shared_ctx_t sctx; if ((sctx = DEVICE_REGISTER(dev)) == NULL || sctx->isc_magic != IFLIB_MAGIC) return (ENOTSUP); pci_enable_busmaster(dev); return (iflib_device_register(dev, NULL, sctx, &ctx)); } int iflib_device_deregister(if_ctx_t ctx) { if_t ifp = ctx->ifc_ifp; device_t dev = ctx->ifc_dev; /* Make sure VLANS are not using driver */ if (if_vlantrunkinuse(ifp)) { device_printf(dev, "Vlan in use, detach first\n"); return (EBUSY); } #ifdef PCI_IOV if (!CTX_IS_VF(ctx) && pci_iov_detach(dev) != 0) { device_printf(dev, "SR-IOV in use; detach first.\n"); return (EBUSY); } #endif STATE_LOCK(ctx); ctx->ifc_flags |= IFC_IN_DETACH; STATE_UNLOCK(ctx); /* Unregister VLAN handlers before calling iflib_stop() */ iflib_unregister_vlan_handlers(ctx); iflib_netmap_detach(ifp); ether_ifdetach(ifp); CTX_LOCK(ctx); iflib_stop(ctx); CTX_UNLOCK(ctx); iflib_rem_pfil(ctx); if (ctx->ifc_led_dev != NULL) led_destroy(ctx->ifc_led_dev); iflib_tqg_detach(ctx); CTX_LOCK(ctx); IFDI_DETACH(ctx); CTX_UNLOCK(ctx); /* ether_ifdetach calls if_qflush - lock must be destroy afterwards*/ iflib_free_intr_mem(ctx); bus_generic_detach(dev); iflib_tx_structures_free(ctx); iflib_rx_structures_free(ctx); iflib_deregister(ctx); device_set_softc(ctx->ifc_dev, NULL); if (ctx->ifc_flags & IFC_SC_ALLOCATED) free(ctx->ifc_softc, M_IFLIB); unref_ctx_core_offset(ctx); free(ctx, M_IFLIB); return (0); } static void iflib_tqg_detach(if_ctx_t ctx) { iflib_txq_t txq; iflib_rxq_t rxq; int i; struct taskqgroup *tqg; /* XXX drain any dependent tasks */ tqg = qgroup_if_io_tqg; for (txq = ctx->ifc_txqs, i = 0; i < NTXQSETS(ctx); i++, txq++) { callout_drain(&txq->ift_timer); #ifdef DEV_NETMAP callout_drain(&txq->ift_netmap_timer); #endif /* DEV_NETMAP */ if (txq->ift_task.gt_uniq != NULL) taskqgroup_detach(tqg, &txq->ift_task); } for (i = 0, rxq = ctx->ifc_rxqs; i < NRXQSETS(ctx); i++, rxq++) { if (rxq->ifr_task.gt_uniq != NULL) taskqgroup_detach(tqg, &rxq->ifr_task); } tqg = qgroup_if_config_tqg; if (ctx->ifc_admin_task.gt_uniq != NULL) taskqgroup_detach(tqg, &ctx->ifc_admin_task); if (ctx->ifc_vflr_task.gt_uniq != NULL) taskqgroup_detach(tqg, &ctx->ifc_vflr_task); } static void iflib_free_intr_mem(if_ctx_t ctx) { if (ctx->ifc_softc_ctx.isc_intr != IFLIB_INTR_MSIX) { iflib_irq_free(ctx, &ctx->ifc_legacy_irq); } if (ctx->ifc_softc_ctx.isc_intr != IFLIB_INTR_LEGACY) { pci_release_msi(ctx->ifc_dev); } if (ctx->ifc_msix_mem != NULL) { bus_release_resource(ctx->ifc_dev, SYS_RES_MEMORY, rman_get_rid(ctx->ifc_msix_mem), ctx->ifc_msix_mem); ctx->ifc_msix_mem = NULL; } } int iflib_device_detach(device_t dev) { if_ctx_t ctx = device_get_softc(dev); return (iflib_device_deregister(ctx)); } int iflib_device_suspend(device_t dev) { if_ctx_t ctx = device_get_softc(dev); CTX_LOCK(ctx); IFDI_SUSPEND(ctx); CTX_UNLOCK(ctx); return bus_generic_suspend(dev); } int iflib_device_shutdown(device_t dev) { if_ctx_t ctx = device_get_softc(dev); CTX_LOCK(ctx); IFDI_SHUTDOWN(ctx); CTX_UNLOCK(ctx); return bus_generic_suspend(dev); } int iflib_device_resume(device_t dev) { if_ctx_t ctx = device_get_softc(dev); iflib_txq_t txq = ctx->ifc_txqs; CTX_LOCK(ctx); IFDI_RESUME(ctx); iflib_if_init_locked(ctx); CTX_UNLOCK(ctx); for (int i = 0; i < NTXQSETS(ctx); i++, txq++) iflib_txq_check_drain(txq, IFLIB_RESTART_BUDGET); return (bus_generic_resume(dev)); } int iflib_device_iov_init(device_t dev, uint16_t num_vfs, const nvlist_t *params) { int error; if_ctx_t ctx = device_get_softc(dev); CTX_LOCK(ctx); error = IFDI_IOV_INIT(ctx, num_vfs, params); CTX_UNLOCK(ctx); return (error); } void iflib_device_iov_uninit(device_t dev) { if_ctx_t ctx = device_get_softc(dev); CTX_LOCK(ctx); IFDI_IOV_UNINIT(ctx); CTX_UNLOCK(ctx); } int iflib_device_iov_add_vf(device_t dev, uint16_t vfnum, const nvlist_t *params) { int error; if_ctx_t ctx = device_get_softc(dev); CTX_LOCK(ctx); error = IFDI_IOV_VF_ADD(ctx, vfnum, params); CTX_UNLOCK(ctx); return (error); } /********************************************************************* * * MODULE FUNCTION DEFINITIONS * **********************************************************************/ /* * - Start a fast taskqueue thread for each core * - Start a taskqueue for control operations */ static int iflib_module_init(void) { + iflib_timer_default = hz / 2; return (0); } static int iflib_module_event_handler(module_t mod, int what, void *arg) { int err; switch (what) { case MOD_LOAD: if ((err = iflib_module_init()) != 0) return (err); break; case MOD_UNLOAD: return (EBUSY); default: return (EOPNOTSUPP); } return (0); } /********************************************************************* * * PUBLIC FUNCTION DEFINITIONS * ordered as in iflib.h * **********************************************************************/ static void _iflib_assert(if_shared_ctx_t sctx) { int i; MPASS(sctx->isc_tx_maxsize); MPASS(sctx->isc_tx_maxsegsize); MPASS(sctx->isc_rx_maxsize); MPASS(sctx->isc_rx_nsegments); MPASS(sctx->isc_rx_maxsegsize); MPASS(sctx->isc_nrxqs >= 1 && sctx->isc_nrxqs <= 8); for (i = 0; i < sctx->isc_nrxqs; i++) { MPASS(sctx->isc_nrxd_min[i]); MPASS(powerof2(sctx->isc_nrxd_min[i])); MPASS(sctx->isc_nrxd_max[i]); MPASS(powerof2(sctx->isc_nrxd_max[i])); MPASS(sctx->isc_nrxd_default[i]); MPASS(powerof2(sctx->isc_nrxd_default[i])); } MPASS(sctx->isc_ntxqs >= 1 && sctx->isc_ntxqs <= 8); for (i = 0; i < sctx->isc_ntxqs; i++) { MPASS(sctx->isc_ntxd_min[i]); MPASS(powerof2(sctx->isc_ntxd_min[i])); MPASS(sctx->isc_ntxd_max[i]); MPASS(powerof2(sctx->isc_ntxd_max[i])); MPASS(sctx->isc_ntxd_default[i]); MPASS(powerof2(sctx->isc_ntxd_default[i])); } } static void _iflib_pre_assert(if_softc_ctx_t scctx) { MPASS(scctx->isc_txrx->ift_txd_encap); MPASS(scctx->isc_txrx->ift_txd_flush); MPASS(scctx->isc_txrx->ift_txd_credits_update); MPASS(scctx->isc_txrx->ift_rxd_available); MPASS(scctx->isc_txrx->ift_rxd_pkt_get); MPASS(scctx->isc_txrx->ift_rxd_refill); MPASS(scctx->isc_txrx->ift_rxd_flush); } static int iflib_register(if_ctx_t ctx) { if_shared_ctx_t sctx = ctx->ifc_sctx; driver_t *driver = sctx->isc_driver; device_t dev = ctx->ifc_dev; if_t ifp; u_char type; int iflags; if ((sctx->isc_flags & IFLIB_PSEUDO) == 0) _iflib_assert(sctx); CTX_LOCK_INIT(ctx); STATE_LOCK_INIT(ctx, device_get_nameunit(ctx->ifc_dev)); if (sctx->isc_flags & IFLIB_PSEUDO) { if (sctx->isc_flags & IFLIB_PSEUDO_ETHER) type = IFT_ETHER; else type = IFT_PPP; } else type = IFT_ETHER; ifp = ctx->ifc_ifp = if_alloc(type); if (ifp == NULL) { device_printf(dev, "can not allocate ifnet structure\n"); return (ENOMEM); } /* * Initialize our context's device specific methods */ kobj_init((kobj_t) ctx, (kobj_class_t) driver); kobj_class_compile((kobj_class_t) driver); if_initname(ifp, device_get_name(dev), device_get_unit(dev)); if_setsoftc(ifp, ctx); if_setdev(ifp, dev); if_setinitfn(ifp, iflib_if_init); if_setioctlfn(ifp, iflib_if_ioctl); #ifdef ALTQ if_setstartfn(ifp, iflib_altq_if_start); if_settransmitfn(ifp, iflib_altq_if_transmit); if_setsendqready(ifp); #else if_settransmitfn(ifp, iflib_if_transmit); #endif if_setqflushfn(ifp, iflib_if_qflush); iflags = IFF_MULTICAST | IFF_KNOWSEPOCH; if ((sctx->isc_flags & IFLIB_PSEUDO) && (sctx->isc_flags & IFLIB_PSEUDO_ETHER) == 0) iflags |= IFF_POINTOPOINT; else iflags |= IFF_BROADCAST | IFF_SIMPLEX; if_setflags(ifp, iflags); ctx->ifc_vlan_attach_event = EVENTHANDLER_REGISTER(vlan_config, iflib_vlan_register, ctx, EVENTHANDLER_PRI_FIRST); ctx->ifc_vlan_detach_event = EVENTHANDLER_REGISTER(vlan_unconfig, iflib_vlan_unregister, ctx, EVENTHANDLER_PRI_FIRST); if ((sctx->isc_flags & IFLIB_DRIVER_MEDIA) == 0) { ctx->ifc_mediap = &ctx->ifc_media; ifmedia_init(ctx->ifc_mediap, IFM_IMASK, iflib_media_change, iflib_media_status); } return (0); } static void iflib_unregister_vlan_handlers(if_ctx_t ctx) { /* Unregister VLAN events */ if (ctx->ifc_vlan_attach_event != NULL) { EVENTHANDLER_DEREGISTER(vlan_config, ctx->ifc_vlan_attach_event); ctx->ifc_vlan_attach_event = NULL; } if (ctx->ifc_vlan_detach_event != NULL) { EVENTHANDLER_DEREGISTER(vlan_unconfig, ctx->ifc_vlan_detach_event); ctx->ifc_vlan_detach_event = NULL; } } static void iflib_deregister(if_ctx_t ctx) { if_t ifp = ctx->ifc_ifp; /* Remove all media */ ifmedia_removeall(&ctx->ifc_media); /* Ensure that VLAN event handlers are unregistered */ iflib_unregister_vlan_handlers(ctx); /* Release kobject reference */ kobj_delete((kobj_t) ctx, NULL); /* Free the ifnet structure */ if_free(ifp); STATE_LOCK_DESTROY(ctx); /* ether_ifdetach calls if_qflush - lock must be destroy afterwards*/ CTX_LOCK_DESTROY(ctx); } static int iflib_queues_alloc(if_ctx_t ctx) { if_shared_ctx_t sctx = ctx->ifc_sctx; if_softc_ctx_t scctx = &ctx->ifc_softc_ctx; device_t dev = ctx->ifc_dev; int nrxqsets = scctx->isc_nrxqsets; int ntxqsets = scctx->isc_ntxqsets; iflib_txq_t txq; iflib_rxq_t rxq; iflib_fl_t fl = NULL; int i, j, cpu, err, txconf, rxconf; iflib_dma_info_t ifdip; uint32_t *rxqsizes = scctx->isc_rxqsizes; uint32_t *txqsizes = scctx->isc_txqsizes; uint8_t nrxqs = sctx->isc_nrxqs; uint8_t ntxqs = sctx->isc_ntxqs; int nfree_lists = sctx->isc_nfl ? sctx->isc_nfl : 1; caddr_t *vaddrs; uint64_t *paddrs; KASSERT(ntxqs > 0, ("number of queues per qset must be at least 1")); KASSERT(nrxqs > 0, ("number of queues per qset must be at least 1")); /* Allocate the TX ring struct memory */ if (!(ctx->ifc_txqs = (iflib_txq_t) malloc(sizeof(struct iflib_txq) * ntxqsets, M_IFLIB, M_NOWAIT | M_ZERO))) { device_printf(dev, "Unable to allocate TX ring memory\n"); err = ENOMEM; goto fail; } /* Now allocate the RX */ if (!(ctx->ifc_rxqs = (iflib_rxq_t) malloc(sizeof(struct iflib_rxq) * nrxqsets, M_IFLIB, M_NOWAIT | M_ZERO))) { device_printf(dev, "Unable to allocate RX ring memory\n"); err = ENOMEM; goto rx_fail; } txq = ctx->ifc_txqs; rxq = ctx->ifc_rxqs; /* * XXX handle allocation failure */ for (txconf = i = 0, cpu = CPU_FIRST(); i < ntxqsets; i++, txconf++, txq++, cpu = CPU_NEXT(cpu)) { /* Set up some basics */ if ((ifdip = malloc(sizeof(struct iflib_dma_info) * ntxqs, M_IFLIB, M_NOWAIT | M_ZERO)) == NULL) { device_printf(dev, "Unable to allocate TX DMA info memory\n"); err = ENOMEM; goto err_tx_desc; } txq->ift_ifdi = ifdip; for (j = 0; j < ntxqs; j++, ifdip++) { if (iflib_dma_alloc(ctx, txqsizes[j], ifdip, 0)) { device_printf(dev, "Unable to allocate TX descriptors\n"); err = ENOMEM; goto err_tx_desc; } txq->ift_txd_size[j] = scctx->isc_txd_size[j]; bzero((void *)ifdip->idi_vaddr, txqsizes[j]); } txq->ift_ctx = ctx; txq->ift_id = i; if (sctx->isc_flags & IFLIB_HAS_TXCQ) { txq->ift_br_offset = 1; } else { txq->ift_br_offset = 0; } if (iflib_txsd_alloc(txq)) { device_printf(dev, "Critical Failure setting up TX buffers\n"); err = ENOMEM; goto err_tx_desc; } /* Initialize the TX lock */ snprintf(txq->ift_mtx_name, MTX_NAME_LEN, "%s:TX(%d):callout", device_get_nameunit(dev), txq->ift_id); mtx_init(&txq->ift_mtx, txq->ift_mtx_name, NULL, MTX_DEF); callout_init_mtx(&txq->ift_timer, &txq->ift_mtx, 0); txq->ift_timer.c_cpu = cpu; #ifdef DEV_NETMAP callout_init_mtx(&txq->ift_netmap_timer, &txq->ift_mtx, 0); txq->ift_netmap_timer.c_cpu = cpu; #endif /* DEV_NETMAP */ err = ifmp_ring_alloc(&txq->ift_br, 2048, txq, iflib_txq_drain, iflib_txq_can_drain, M_IFLIB, M_WAITOK); if (err) { /* XXX free any allocated rings */ device_printf(dev, "Unable to allocate buf_ring\n"); goto err_tx_desc; } } for (rxconf = i = 0; i < nrxqsets; i++, rxconf++, rxq++) { /* Set up some basics */ callout_init(&rxq->ifr_watchdog, 1); if ((ifdip = malloc(sizeof(struct iflib_dma_info) * nrxqs, M_IFLIB, M_NOWAIT | M_ZERO)) == NULL) { device_printf(dev, "Unable to allocate RX DMA info memory\n"); err = ENOMEM; goto err_tx_desc; } rxq->ifr_ifdi = ifdip; /* XXX this needs to be changed if #rx queues != #tx queues */ rxq->ifr_ntxqirq = 1; rxq->ifr_txqid[0] = i; for (j = 0; j < nrxqs; j++, ifdip++) { if (iflib_dma_alloc(ctx, rxqsizes[j], ifdip, 0)) { device_printf(dev, "Unable to allocate RX descriptors\n"); err = ENOMEM; goto err_tx_desc; } bzero((void *)ifdip->idi_vaddr, rxqsizes[j]); } rxq->ifr_ctx = ctx; rxq->ifr_id = i; if (sctx->isc_flags & IFLIB_HAS_RXCQ) { rxq->ifr_fl_offset = 1; } else { rxq->ifr_fl_offset = 0; } rxq->ifr_nfl = nfree_lists; if (!(fl = (iflib_fl_t) malloc(sizeof(struct iflib_fl) * nfree_lists, M_IFLIB, M_NOWAIT | M_ZERO))) { device_printf(dev, "Unable to allocate free list memory\n"); err = ENOMEM; goto err_tx_desc; } rxq->ifr_fl = fl; for (j = 0; j < nfree_lists; j++) { fl[j].ifl_rxq = rxq; fl[j].ifl_id = j; fl[j].ifl_ifdi = &rxq->ifr_ifdi[j + rxq->ifr_fl_offset]; fl[j].ifl_rxd_size = scctx->isc_rxd_size[j]; } /* Allocate receive buffers for the ring */ if (iflib_rxsd_alloc(rxq)) { device_printf(dev, "Critical Failure setting up receive buffers\n"); err = ENOMEM; goto err_rx_desc; } for (j = 0, fl = rxq->ifr_fl; j < rxq->ifr_nfl; j++, fl++) fl->ifl_rx_bitmap = bit_alloc(fl->ifl_size, M_IFLIB, M_WAITOK); } /* TXQs */ vaddrs = malloc(sizeof(caddr_t)*ntxqsets*ntxqs, M_IFLIB, M_WAITOK); paddrs = malloc(sizeof(uint64_t)*ntxqsets*ntxqs, M_IFLIB, M_WAITOK); for (i = 0; i < ntxqsets; i++) { iflib_dma_info_t di = ctx->ifc_txqs[i].ift_ifdi; for (j = 0; j < ntxqs; j++, di++) { vaddrs[i*ntxqs + j] = di->idi_vaddr; paddrs[i*ntxqs + j] = di->idi_paddr; } } if ((err = IFDI_TX_QUEUES_ALLOC(ctx, vaddrs, paddrs, ntxqs, ntxqsets)) != 0) { device_printf(ctx->ifc_dev, "Unable to allocate device TX queue\n"); iflib_tx_structures_free(ctx); free(vaddrs, M_IFLIB); free(paddrs, M_IFLIB); goto err_rx_desc; } free(vaddrs, M_IFLIB); free(paddrs, M_IFLIB); /* RXQs */ vaddrs = malloc(sizeof(caddr_t)*nrxqsets*nrxqs, M_IFLIB, M_WAITOK); paddrs = malloc(sizeof(uint64_t)*nrxqsets*nrxqs, M_IFLIB, M_WAITOK); for (i = 0; i < nrxqsets; i++) { iflib_dma_info_t di = ctx->ifc_rxqs[i].ifr_ifdi; for (j = 0; j < nrxqs; j++, di++) { vaddrs[i*nrxqs + j] = di->idi_vaddr; paddrs[i*nrxqs + j] = di->idi_paddr; } } if ((err = IFDI_RX_QUEUES_ALLOC(ctx, vaddrs, paddrs, nrxqs, nrxqsets)) != 0) { device_printf(ctx->ifc_dev, "Unable to allocate device RX queue\n"); iflib_tx_structures_free(ctx); free(vaddrs, M_IFLIB); free(paddrs, M_IFLIB); goto err_rx_desc; } free(vaddrs, M_IFLIB); free(paddrs, M_IFLIB); return (0); /* XXX handle allocation failure changes */ err_rx_desc: err_tx_desc: rx_fail: if (ctx->ifc_rxqs != NULL) free(ctx->ifc_rxqs, M_IFLIB); ctx->ifc_rxqs = NULL; if (ctx->ifc_txqs != NULL) free(ctx->ifc_txqs, M_IFLIB); ctx->ifc_txqs = NULL; fail: return (err); } static int iflib_tx_structures_setup(if_ctx_t ctx) { iflib_txq_t txq = ctx->ifc_txqs; int i; for (i = 0; i < NTXQSETS(ctx); i++, txq++) iflib_txq_setup(txq); return (0); } static void iflib_tx_structures_free(if_ctx_t ctx) { iflib_txq_t txq = ctx->ifc_txqs; if_shared_ctx_t sctx = ctx->ifc_sctx; int i, j; for (i = 0; i < NTXQSETS(ctx); i++, txq++) { for (j = 0; j < sctx->isc_ntxqs; j++) iflib_dma_free(&txq->ift_ifdi[j]); iflib_txq_destroy(txq); } free(ctx->ifc_txqs, M_IFLIB); ctx->ifc_txqs = NULL; IFDI_QUEUES_FREE(ctx); } /********************************************************************* * * Initialize all receive rings. * **********************************************************************/ static int iflib_rx_structures_setup(if_ctx_t ctx) { iflib_rxq_t rxq = ctx->ifc_rxqs; int q; #if defined(INET6) || defined(INET) int err, i; #endif for (q = 0; q < ctx->ifc_softc_ctx.isc_nrxqsets; q++, rxq++) { #if defined(INET6) || defined(INET) if (if_getcapabilities(ctx->ifc_ifp) & IFCAP_LRO) { err = tcp_lro_init_args(&rxq->ifr_lc, ctx->ifc_ifp, TCP_LRO_ENTRIES, min(1024, ctx->ifc_softc_ctx.isc_nrxd[rxq->ifr_fl_offset])); if (err != 0) { device_printf(ctx->ifc_dev, "LRO Initialization failed!\n"); goto fail; } } #endif IFDI_RXQ_SETUP(ctx, rxq->ifr_id); } return (0); #if defined(INET6) || defined(INET) fail: /* * Free LRO resources allocated so far, we will only handle * the rings that completed, the failing case will have * cleaned up for itself. 'q' failed, so its the terminus. */ rxq = ctx->ifc_rxqs; for (i = 0; i < q; ++i, rxq++) { if (if_getcapabilities(ctx->ifc_ifp) & IFCAP_LRO) tcp_lro_free(&rxq->ifr_lc); } return (err); #endif } /********************************************************************* * * Free all receive rings. * **********************************************************************/ static void iflib_rx_structures_free(if_ctx_t ctx) { iflib_rxq_t rxq = ctx->ifc_rxqs; if_shared_ctx_t sctx = ctx->ifc_sctx; int i, j; for (i = 0; i < ctx->ifc_softc_ctx.isc_nrxqsets; i++, rxq++) { for (j = 0; j < sctx->isc_nrxqs; j++) iflib_dma_free(&rxq->ifr_ifdi[j]); iflib_rx_sds_free(rxq); #if defined(INET6) || defined(INET) if (if_getcapabilities(ctx->ifc_ifp) & IFCAP_LRO) tcp_lro_free(&rxq->ifr_lc); #endif } free(ctx->ifc_rxqs, M_IFLIB); ctx->ifc_rxqs = NULL; } static int iflib_qset_structures_setup(if_ctx_t ctx) { int err; /* * It is expected that the caller takes care of freeing queues if this * fails. */ if ((err = iflib_tx_structures_setup(ctx)) != 0) { device_printf(ctx->ifc_dev, "iflib_tx_structures_setup failed: %d\n", err); return (err); } if ((err = iflib_rx_structures_setup(ctx)) != 0) device_printf(ctx->ifc_dev, "iflib_rx_structures_setup failed: %d\n", err); return (err); } int iflib_irq_alloc(if_ctx_t ctx, if_irq_t irq, int rid, driver_filter_t filter, void *filter_arg, driver_intr_t handler, void *arg, const char *name) { return (_iflib_irq_alloc(ctx, irq, rid, filter, handler, arg, name)); } #ifdef SMP static int find_nth(if_ctx_t ctx, int qid) { cpuset_t cpus; int i, cpuid, eqid, count; CPU_COPY(&ctx->ifc_cpus, &cpus); count = CPU_COUNT(&cpus); eqid = qid % count; /* clear up to the qid'th bit */ for (i = 0; i < eqid; i++) { cpuid = CPU_FFS(&cpus); MPASS(cpuid != 0); CPU_CLR(cpuid-1, &cpus); } cpuid = CPU_FFS(&cpus); MPASS(cpuid != 0); return (cpuid-1); } #ifdef SCHED_ULE extern struct cpu_group *cpu_top; /* CPU topology */ static int find_child_with_core(int cpu, struct cpu_group *grp) { int i; if (grp->cg_children == 0) return -1; MPASS(grp->cg_child); for (i = 0; i < grp->cg_children; i++) { if (CPU_ISSET(cpu, &grp->cg_child[i].cg_mask)) return i; } return -1; } /* * Find the nth "close" core to the specified core * "close" is defined as the deepest level that shares * at least an L2 cache. With threads, this will be * threads on the same core. If the shared cache is L3 * or higher, simply returns the same core. */ static int find_close_core(int cpu, int core_offset) { struct cpu_group *grp; int i; int fcpu; cpuset_t cs; grp = cpu_top; if (grp == NULL) return cpu; i = 0; while ((i = find_child_with_core(cpu, grp)) != -1) { /* If the child only has one cpu, don't descend */ if (grp->cg_child[i].cg_count <= 1) break; grp = &grp->cg_child[i]; } /* If they don't share at least an L2 cache, use the same CPU */ if (grp->cg_level > CG_SHARE_L2 || grp->cg_level == CG_SHARE_NONE) return cpu; /* Now pick one */ CPU_COPY(&grp->cg_mask, &cs); /* Add the selected CPU offset to core offset. */ for (i = 0; (fcpu = CPU_FFS(&cs)) != 0; i++) { if (fcpu - 1 == cpu) break; CPU_CLR(fcpu - 1, &cs); } MPASS(fcpu); core_offset += i; CPU_COPY(&grp->cg_mask, &cs); for (i = core_offset % grp->cg_count; i > 0; i--) { MPASS(CPU_FFS(&cs)); CPU_CLR(CPU_FFS(&cs) - 1, &cs); } MPASS(CPU_FFS(&cs)); return CPU_FFS(&cs) - 1; } #else static int find_close_core(int cpu, int core_offset __unused) { return cpu; } #endif static int get_core_offset(if_ctx_t ctx, iflib_intr_type_t type, int qid) { switch (type) { case IFLIB_INTR_TX: /* TX queues get cores which share at least an L2 cache with the corresponding RX queue */ /* XXX handle multiple RX threads per core and more than two core per L2 group */ return qid / CPU_COUNT(&ctx->ifc_cpus) + 1; case IFLIB_INTR_RX: case IFLIB_INTR_RXTX: /* RX queues get the specified core */ return qid / CPU_COUNT(&ctx->ifc_cpus); default: return -1; } } #else #define get_core_offset(ctx, type, qid) CPU_FIRST() #define find_close_core(cpuid, tid) CPU_FIRST() #define find_nth(ctx, gid) CPU_FIRST() #endif /* Just to avoid copy/paste */ static inline int iflib_irq_set_affinity(if_ctx_t ctx, if_irq_t irq, iflib_intr_type_t type, int qid, struct grouptask *gtask, struct taskqgroup *tqg, void *uniq, const char *name) { device_t dev; int co, cpuid, err, tid; dev = ctx->ifc_dev; co = ctx->ifc_sysctl_core_offset; if (ctx->ifc_sysctl_separate_txrx && type == IFLIB_INTR_TX) co += ctx->ifc_softc_ctx.isc_nrxqsets; cpuid = find_nth(ctx, qid + co); tid = get_core_offset(ctx, type, qid); if (tid < 0) { device_printf(dev, "get_core_offset failed\n"); return (EOPNOTSUPP); } cpuid = find_close_core(cpuid, tid); err = taskqgroup_attach_cpu(tqg, gtask, uniq, cpuid, dev, irq->ii_res, name); if (err) { device_printf(dev, "taskqgroup_attach_cpu failed %d\n", err); return (err); } #ifdef notyet if (cpuid > ctx->ifc_cpuid_highest) ctx->ifc_cpuid_highest = cpuid; #endif return (0); } int iflib_irq_alloc_generic(if_ctx_t ctx, if_irq_t irq, int rid, iflib_intr_type_t type, driver_filter_t *filter, void *filter_arg, int qid, const char *name) { device_t dev; struct grouptask *gtask; struct taskqgroup *tqg; iflib_filter_info_t info; gtask_fn_t *fn; int tqrid, err; driver_filter_t *intr_fast; void *q; info = &ctx->ifc_filter_info; tqrid = rid; switch (type) { /* XXX merge tx/rx for netmap? */ case IFLIB_INTR_TX: q = &ctx->ifc_txqs[qid]; info = &ctx->ifc_txqs[qid].ift_filter_info; gtask = &ctx->ifc_txqs[qid].ift_task; tqg = qgroup_if_io_tqg; fn = _task_fn_tx; intr_fast = iflib_fast_intr; GROUPTASK_INIT(gtask, 0, fn, q); ctx->ifc_flags |= IFC_NETMAP_TX_IRQ; break; case IFLIB_INTR_RX: q = &ctx->ifc_rxqs[qid]; info = &ctx->ifc_rxqs[qid].ifr_filter_info; gtask = &ctx->ifc_rxqs[qid].ifr_task; tqg = qgroup_if_io_tqg; fn = _task_fn_rx; intr_fast = iflib_fast_intr; NET_GROUPTASK_INIT(gtask, 0, fn, q); break; case IFLIB_INTR_RXTX: q = &ctx->ifc_rxqs[qid]; info = &ctx->ifc_rxqs[qid].ifr_filter_info; gtask = &ctx->ifc_rxqs[qid].ifr_task; tqg = qgroup_if_io_tqg; fn = _task_fn_rx; intr_fast = iflib_fast_intr_rxtx; NET_GROUPTASK_INIT(gtask, 0, fn, q); break; case IFLIB_INTR_ADMIN: q = ctx; tqrid = -1; info = &ctx->ifc_filter_info; gtask = &ctx->ifc_admin_task; tqg = qgroup_if_config_tqg; fn = _task_fn_admin; intr_fast = iflib_fast_intr_ctx; break; default: device_printf(ctx->ifc_dev, "%s: unknown net intr type\n", __func__); return (EINVAL); } info->ifi_filter = filter; info->ifi_filter_arg = filter_arg; info->ifi_task = gtask; info->ifi_ctx = q; dev = ctx->ifc_dev; err = _iflib_irq_alloc(ctx, irq, rid, intr_fast, NULL, info, name); if (err != 0) { device_printf(dev, "_iflib_irq_alloc failed %d\n", err); return (err); } if (type == IFLIB_INTR_ADMIN) return (0); if (tqrid != -1) { err = iflib_irq_set_affinity(ctx, irq, type, qid, gtask, tqg, q, name); if (err) return (err); } else { taskqgroup_attach(tqg, gtask, q, dev, irq->ii_res, name); } return (0); } void iflib_softirq_alloc_generic(if_ctx_t ctx, if_irq_t irq, iflib_intr_type_t type, void *arg, int qid, const char *name) { struct grouptask *gtask; struct taskqgroup *tqg; gtask_fn_t *fn; void *q; int err; switch (type) { case IFLIB_INTR_TX: q = &ctx->ifc_txqs[qid]; gtask = &ctx->ifc_txqs[qid].ift_task; tqg = qgroup_if_io_tqg; fn = _task_fn_tx; GROUPTASK_INIT(gtask, 0, fn, q); break; case IFLIB_INTR_RX: q = &ctx->ifc_rxqs[qid]; gtask = &ctx->ifc_rxqs[qid].ifr_task; tqg = qgroup_if_io_tqg; fn = _task_fn_rx; NET_GROUPTASK_INIT(gtask, 0, fn, q); break; case IFLIB_INTR_IOV: q = ctx; gtask = &ctx->ifc_vflr_task; tqg = qgroup_if_config_tqg; fn = _task_fn_iov; GROUPTASK_INIT(gtask, 0, fn, q); break; default: panic("unknown net intr type"); } if (irq != NULL) { err = iflib_irq_set_affinity(ctx, irq, type, qid, gtask, tqg, q, name); if (err) taskqgroup_attach(tqg, gtask, q, ctx->ifc_dev, irq->ii_res, name); } else { taskqgroup_attach(tqg, gtask, q, NULL, NULL, name); } } void iflib_irq_free(if_ctx_t ctx, if_irq_t irq) { if (irq->ii_tag) bus_teardown_intr(ctx->ifc_dev, irq->ii_res, irq->ii_tag); if (irq->ii_res) bus_release_resource(ctx->ifc_dev, SYS_RES_IRQ, rman_get_rid(irq->ii_res), irq->ii_res); } static int iflib_legacy_setup(if_ctx_t ctx, driver_filter_t filter, void *filter_arg, int *rid, const char *name) { iflib_txq_t txq = ctx->ifc_txqs; iflib_rxq_t rxq = ctx->ifc_rxqs; if_irq_t irq = &ctx->ifc_legacy_irq; iflib_filter_info_t info; device_t dev; struct grouptask *gtask; struct resource *res; struct taskqgroup *tqg; void *q; int err, tqrid; bool rx_only; q = &ctx->ifc_rxqs[0]; info = &rxq[0].ifr_filter_info; gtask = &rxq[0].ifr_task; tqg = qgroup_if_io_tqg; tqrid = *rid; rx_only = (ctx->ifc_sctx->isc_flags & IFLIB_SINGLE_IRQ_RX_ONLY) != 0; ctx->ifc_flags |= IFC_LEGACY; info->ifi_filter = filter; info->ifi_filter_arg = filter_arg; info->ifi_task = gtask; info->ifi_ctx = rx_only ? ctx : q; dev = ctx->ifc_dev; /* We allocate a single interrupt resource */ err = _iflib_irq_alloc(ctx, irq, tqrid, rx_only ? iflib_fast_intr_ctx : iflib_fast_intr_rxtx, NULL, info, name); if (err != 0) return (err); NET_GROUPTASK_INIT(gtask, 0, _task_fn_rx, q); res = irq->ii_res; taskqgroup_attach(tqg, gtask, q, dev, res, name); GROUPTASK_INIT(&txq->ift_task, 0, _task_fn_tx, txq); taskqgroup_attach(qgroup_if_io_tqg, &txq->ift_task, txq, dev, res, "tx"); return (0); } void iflib_led_create(if_ctx_t ctx) { ctx->ifc_led_dev = led_create(iflib_led_func, ctx, device_get_nameunit(ctx->ifc_dev)); } void iflib_tx_intr_deferred(if_ctx_t ctx, int txqid) { GROUPTASK_ENQUEUE(&ctx->ifc_txqs[txqid].ift_task); } void iflib_rx_intr_deferred(if_ctx_t ctx, int rxqid) { GROUPTASK_ENQUEUE(&ctx->ifc_rxqs[rxqid].ifr_task); } void iflib_admin_intr_deferred(if_ctx_t ctx) { MPASS(ctx->ifc_admin_task.gt_taskqueue != NULL); GROUPTASK_ENQUEUE(&ctx->ifc_admin_task); } void iflib_iov_intr_deferred(if_ctx_t ctx) { GROUPTASK_ENQUEUE(&ctx->ifc_vflr_task); } void iflib_io_tqg_attach(struct grouptask *gt, void *uniq, int cpu, const char *name) { taskqgroup_attach_cpu(qgroup_if_io_tqg, gt, uniq, cpu, NULL, NULL, name); } void iflib_config_gtask_init(void *ctx, struct grouptask *gtask, gtask_fn_t *fn, const char *name) { GROUPTASK_INIT(gtask, 0, fn, ctx); taskqgroup_attach(qgroup_if_config_tqg, gtask, gtask, NULL, NULL, name); } void iflib_config_gtask_deinit(struct grouptask *gtask) { taskqgroup_detach(qgroup_if_config_tqg, gtask); } void iflib_link_state_change(if_ctx_t ctx, int link_state, uint64_t baudrate) { if_t ifp = ctx->ifc_ifp; iflib_txq_t txq = ctx->ifc_txqs; if_setbaudrate(ifp, baudrate); if (baudrate >= IF_Gbps(10)) { STATE_LOCK(ctx); ctx->ifc_flags |= IFC_PREFETCH; STATE_UNLOCK(ctx); } /* If link down, disable watchdog */ if ((ctx->ifc_link_state == LINK_STATE_UP) && (link_state == LINK_STATE_DOWN)) { for (int i = 0; i < ctx->ifc_softc_ctx.isc_ntxqsets; i++, txq++) txq->ift_qstatus = IFLIB_QUEUE_IDLE; } ctx->ifc_link_state = link_state; if_link_state_change(ifp, link_state); } static int iflib_tx_credits_update(if_ctx_t ctx, iflib_txq_t txq) { int credits; #ifdef INVARIANTS int credits_pre = txq->ift_cidx_processed; #endif bus_dmamap_sync(txq->ift_ifdi->idi_tag, txq->ift_ifdi->idi_map, BUS_DMASYNC_POSTREAD); if ((credits = ctx->isc_txd_credits_update(ctx->ifc_softc, txq->ift_id, true)) == 0) return (0); txq->ift_processed += credits; txq->ift_cidx_processed += credits; MPASS(credits_pre + credits == txq->ift_cidx_processed); if (txq->ift_cidx_processed >= txq->ift_size) txq->ift_cidx_processed -= txq->ift_size; return (credits); } static int iflib_rxd_avail(if_ctx_t ctx, iflib_rxq_t rxq, qidx_t cidx, qidx_t budget) { iflib_fl_t fl; u_int i; for (i = 0, fl = &rxq->ifr_fl[0]; i < rxq->ifr_nfl; i++, fl++) bus_dmamap_sync(fl->ifl_ifdi->idi_tag, fl->ifl_ifdi->idi_map, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); return (ctx->isc_rxd_available(ctx->ifc_softc, rxq->ifr_id, cidx, budget)); } void iflib_add_int_delay_sysctl(if_ctx_t ctx, const char *name, const char *description, if_int_delay_info_t info, int offset, int value) { info->iidi_ctx = ctx; info->iidi_offset = offset; info->iidi_value = value; SYSCTL_ADD_PROC(device_get_sysctl_ctx(ctx->ifc_dev), SYSCTL_CHILDREN(device_get_sysctl_tree(ctx->ifc_dev)), OID_AUTO, name, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, info, 0, iflib_sysctl_int_delay, "I", description); } struct sx * iflib_ctx_lock_get(if_ctx_t ctx) { return (&ctx->ifc_ctx_sx); } static int iflib_msix_init(if_ctx_t ctx) { device_t dev = ctx->ifc_dev; if_shared_ctx_t sctx = ctx->ifc_sctx; if_softc_ctx_t scctx = &ctx->ifc_softc_ctx; int admincnt, bar, err, iflib_num_rx_queues, iflib_num_tx_queues; int msgs, queuemsgs, queues, rx_queues, tx_queues, vectors; iflib_num_tx_queues = ctx->ifc_sysctl_ntxqs; iflib_num_rx_queues = ctx->ifc_sysctl_nrxqs; if (bootverbose) device_printf(dev, "msix_init qsets capped at %d\n", imax(scctx->isc_ntxqsets, scctx->isc_nrxqsets)); /* Override by tuneable */ if (scctx->isc_disable_msix) goto msi; /* First try MSI-X */ if ((msgs = pci_msix_count(dev)) == 0) { if (bootverbose) device_printf(dev, "MSI-X not supported or disabled\n"); goto msi; } bar = ctx->ifc_softc_ctx.isc_msix_bar; /* * bar == -1 => "trust me I know what I'm doing" * Some drivers are for hardware that is so shoddily * documented that no one knows which bars are which * so the developer has to map all bars. This hack * allows shoddy garbage to use MSI-X in this framework. */ if (bar != -1) { ctx->ifc_msix_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &bar, RF_ACTIVE); if (ctx->ifc_msix_mem == NULL) { device_printf(dev, "Unable to map MSI-X table\n"); goto msi; } } admincnt = sctx->isc_admin_intrcnt; #if IFLIB_DEBUG /* use only 1 qset in debug mode */ queuemsgs = min(msgs - admincnt, 1); #else queuemsgs = msgs - admincnt; #endif #ifdef RSS queues = imin(queuemsgs, rss_getnumbuckets()); #else queues = queuemsgs; #endif queues = imin(CPU_COUNT(&ctx->ifc_cpus), queues); if (bootverbose) device_printf(dev, "intr CPUs: %d queue msgs: %d admincnt: %d\n", CPU_COUNT(&ctx->ifc_cpus), queuemsgs, admincnt); #ifdef RSS /* If we're doing RSS, clamp at the number of RSS buckets */ if (queues > rss_getnumbuckets()) queues = rss_getnumbuckets(); #endif if (iflib_num_rx_queues > 0 && iflib_num_rx_queues < queuemsgs - admincnt) rx_queues = iflib_num_rx_queues; else rx_queues = queues; if (rx_queues > scctx->isc_nrxqsets) rx_queues = scctx->isc_nrxqsets; /* * We want this to be all logical CPUs by default */ if (iflib_num_tx_queues > 0 && iflib_num_tx_queues < queues) tx_queues = iflib_num_tx_queues; else tx_queues = mp_ncpus; if (tx_queues > scctx->isc_ntxqsets) tx_queues = scctx->isc_ntxqsets; if (ctx->ifc_sysctl_qs_eq_override == 0) { #ifdef INVARIANTS if (tx_queues != rx_queues) device_printf(dev, "queue equality override not set, capping rx_queues at %d and tx_queues at %d\n", min(rx_queues, tx_queues), min(rx_queues, tx_queues)); #endif tx_queues = min(rx_queues, tx_queues); rx_queues = min(rx_queues, tx_queues); } vectors = rx_queues + admincnt; if (msgs < vectors) { device_printf(dev, "insufficient number of MSI-X vectors " "(supported %d, need %d)\n", msgs, vectors); goto msi; } device_printf(dev, "Using %d RX queues %d TX queues\n", rx_queues, tx_queues); msgs = vectors; if ((err = pci_alloc_msix(dev, &vectors)) == 0) { if (vectors != msgs) { device_printf(dev, "Unable to allocate sufficient MSI-X vectors " "(got %d, need %d)\n", vectors, msgs); pci_release_msi(dev); if (bar != -1) { bus_release_resource(dev, SYS_RES_MEMORY, bar, ctx->ifc_msix_mem); ctx->ifc_msix_mem = NULL; } goto msi; } device_printf(dev, "Using MSI-X interrupts with %d vectors\n", vectors); scctx->isc_vectors = vectors; scctx->isc_nrxqsets = rx_queues; scctx->isc_ntxqsets = tx_queues; scctx->isc_intr = IFLIB_INTR_MSIX; return (vectors); } else { device_printf(dev, "failed to allocate %d MSI-X vectors, err: %d\n", vectors, err); if (bar != -1) { bus_release_resource(dev, SYS_RES_MEMORY, bar, ctx->ifc_msix_mem); ctx->ifc_msix_mem = NULL; } } msi: vectors = pci_msi_count(dev); scctx->isc_nrxqsets = 1; scctx->isc_ntxqsets = 1; scctx->isc_vectors = vectors; if (vectors == 1 && pci_alloc_msi(dev, &vectors) == 0) { device_printf(dev,"Using an MSI interrupt\n"); scctx->isc_intr = IFLIB_INTR_MSI; } else { scctx->isc_vectors = 1; device_printf(dev,"Using a Legacy interrupt\n"); scctx->isc_intr = IFLIB_INTR_LEGACY; } return (vectors); } static const char *ring_states[] = { "IDLE", "BUSY", "STALLED", "ABDICATED" }; static int mp_ring_state_handler(SYSCTL_HANDLER_ARGS) { int rc; uint16_t *state = ((uint16_t *)oidp->oid_arg1); struct sbuf *sb; const char *ring_state = "UNKNOWN"; /* XXX needed ? */ rc = sysctl_wire_old_buffer(req, 0); MPASS(rc == 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, 80, req); MPASS(sb != NULL); if (sb == NULL) return (ENOMEM); if (state[3] <= 3) ring_state = ring_states[state[3]]; sbuf_printf(sb, "pidx_head: %04hd pidx_tail: %04hd cidx: %04hd state: %s", state[0], state[1], state[2], ring_state); rc = sbuf_finish(sb); sbuf_delete(sb); return(rc); } enum iflib_ndesc_handler { IFLIB_NTXD_HANDLER, IFLIB_NRXD_HANDLER, }; static int mp_ndesc_handler(SYSCTL_HANDLER_ARGS) { if_ctx_t ctx = (void *)arg1; enum iflib_ndesc_handler type = arg2; char buf[256] = {0}; qidx_t *ndesc; char *p, *next; int nqs, rc, i; nqs = 8; switch(type) { case IFLIB_NTXD_HANDLER: ndesc = ctx->ifc_sysctl_ntxds; if (ctx->ifc_sctx) nqs = ctx->ifc_sctx->isc_ntxqs; break; case IFLIB_NRXD_HANDLER: ndesc = ctx->ifc_sysctl_nrxds; if (ctx->ifc_sctx) nqs = ctx->ifc_sctx->isc_nrxqs; break; default: printf("%s: unhandled type\n", __func__); return (EINVAL); } if (nqs == 0) nqs = 8; for (i=0; i<8; i++) { if (i >= nqs) break; if (i) strcat(buf, ","); sprintf(strchr(buf, 0), "%d", ndesc[i]); } rc = sysctl_handle_string(oidp, buf, sizeof(buf), req); if (rc || req->newptr == NULL) return rc; for (i = 0, next = buf, p = strsep(&next, " ,"); i < 8 && p; i++, p = strsep(&next, " ,")) { ndesc[i] = strtoul(p, NULL, 10); } return(rc); } #define NAME_BUFLEN 32 static void iflib_add_device_sysctl_pre(if_ctx_t ctx) { device_t dev = iflib_get_dev(ctx); struct sysctl_oid_list *child, *oid_list; struct sysctl_ctx_list *ctx_list; struct sysctl_oid *node; ctx_list = device_get_sysctl_ctx(dev); child = SYSCTL_CHILDREN(device_get_sysctl_tree(dev)); ctx->ifc_sysctl_node = node = SYSCTL_ADD_NODE(ctx_list, child, OID_AUTO, "iflib", CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "IFLIB fields"); oid_list = SYSCTL_CHILDREN(node); SYSCTL_ADD_CONST_STRING(ctx_list, oid_list, OID_AUTO, "driver_version", CTLFLAG_RD, ctx->ifc_sctx->isc_driver_version, "driver version"); SYSCTL_ADD_U16(ctx_list, oid_list, OID_AUTO, "override_ntxqs", CTLFLAG_RWTUN, &ctx->ifc_sysctl_ntxqs, 0, "# of txqs to use, 0 => use default #"); SYSCTL_ADD_U16(ctx_list, oid_list, OID_AUTO, "override_nrxqs", CTLFLAG_RWTUN, &ctx->ifc_sysctl_nrxqs, 0, "# of rxqs to use, 0 => use default #"); SYSCTL_ADD_U16(ctx_list, oid_list, OID_AUTO, "override_qs_enable", CTLFLAG_RWTUN, &ctx->ifc_sysctl_qs_eq_override, 0, "permit #txq != #rxq"); SYSCTL_ADD_INT(ctx_list, oid_list, OID_AUTO, "disable_msix", CTLFLAG_RWTUN, &ctx->ifc_softc_ctx.isc_disable_msix, 0, "disable MSI-X (default 0)"); SYSCTL_ADD_U16(ctx_list, oid_list, OID_AUTO, "rx_budget", CTLFLAG_RWTUN, &ctx->ifc_sysctl_rx_budget, 0, "set the RX budget"); SYSCTL_ADD_U16(ctx_list, oid_list, OID_AUTO, "tx_abdicate", CTLFLAG_RWTUN, &ctx->ifc_sysctl_tx_abdicate, 0, "cause TX to abdicate instead of running to completion"); ctx->ifc_sysctl_core_offset = CORE_OFFSET_UNSPECIFIED; SYSCTL_ADD_U16(ctx_list, oid_list, OID_AUTO, "core_offset", CTLFLAG_RDTUN, &ctx->ifc_sysctl_core_offset, 0, "offset to start using cores at"); SYSCTL_ADD_U8(ctx_list, oid_list, OID_AUTO, "separate_txrx", CTLFLAG_RDTUN, &ctx->ifc_sysctl_separate_txrx, 0, "use separate cores for TX and RX"); /* XXX change for per-queue sizes */ SYSCTL_ADD_PROC(ctx_list, oid_list, OID_AUTO, "override_ntxds", CTLTYPE_STRING | CTLFLAG_RWTUN | CTLFLAG_NEEDGIANT, ctx, IFLIB_NTXD_HANDLER, mp_ndesc_handler, "A", "list of # of TX descriptors to use, 0 = use default #"); SYSCTL_ADD_PROC(ctx_list, oid_list, OID_AUTO, "override_nrxds", CTLTYPE_STRING | CTLFLAG_RWTUN | CTLFLAG_NEEDGIANT, ctx, IFLIB_NRXD_HANDLER, mp_ndesc_handler, "A", "list of # of RX descriptors to use, 0 = use default #"); } static void iflib_add_device_sysctl_post(if_ctx_t ctx) { if_shared_ctx_t sctx = ctx->ifc_sctx; if_softc_ctx_t scctx = &ctx->ifc_softc_ctx; device_t dev = iflib_get_dev(ctx); struct sysctl_oid_list *child; struct sysctl_ctx_list *ctx_list; iflib_fl_t fl; iflib_txq_t txq; iflib_rxq_t rxq; int i, j; char namebuf[NAME_BUFLEN]; char *qfmt; struct sysctl_oid *queue_node, *fl_node, *node; struct sysctl_oid_list *queue_list, *fl_list; ctx_list = device_get_sysctl_ctx(dev); node = ctx->ifc_sysctl_node; child = SYSCTL_CHILDREN(node); if (scctx->isc_ntxqsets > 100) qfmt = "txq%03d"; else if (scctx->isc_ntxqsets > 10) qfmt = "txq%02d"; else qfmt = "txq%d"; for (i = 0, txq = ctx->ifc_txqs; i < scctx->isc_ntxqsets; i++, txq++) { snprintf(namebuf, NAME_BUFLEN, qfmt, i); queue_node = SYSCTL_ADD_NODE(ctx_list, child, OID_AUTO, namebuf, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Queue Name"); queue_list = SYSCTL_CHILDREN(queue_node); #if MEMORY_LOGGING SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "txq_dequeued", CTLFLAG_RD, &txq->ift_dequeued, "total mbufs freed"); SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "txq_enqueued", CTLFLAG_RD, &txq->ift_enqueued, "total mbufs enqueued"); #endif SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "mbuf_defrag", CTLFLAG_RD, &txq->ift_mbuf_defrag, "# of times m_defrag was called"); SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "m_pullups", CTLFLAG_RD, &txq->ift_pullups, "# of times m_pullup was called"); SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "mbuf_defrag_failed", CTLFLAG_RD, &txq->ift_mbuf_defrag_failed, "# of times m_defrag failed"); SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "no_desc_avail", CTLFLAG_RD, &txq->ift_no_desc_avail, "# of times no descriptors were available"); SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "tx_map_failed", CTLFLAG_RD, &txq->ift_map_failed, "# of times DMA map failed"); SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "txd_encap_efbig", CTLFLAG_RD, &txq->ift_txd_encap_efbig, "# of times txd_encap returned EFBIG"); SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "no_tx_dma_setup", CTLFLAG_RD, &txq->ift_no_tx_dma_setup, "# of times map failed for other than EFBIG"); SYSCTL_ADD_U16(ctx_list, queue_list, OID_AUTO, "txq_pidx", CTLFLAG_RD, &txq->ift_pidx, 1, "Producer Index"); SYSCTL_ADD_U16(ctx_list, queue_list, OID_AUTO, "txq_cidx", CTLFLAG_RD, &txq->ift_cidx, 1, "Consumer Index"); SYSCTL_ADD_U16(ctx_list, queue_list, OID_AUTO, "txq_cidx_processed", CTLFLAG_RD, &txq->ift_cidx_processed, 1, "Consumer Index seen by credit update"); SYSCTL_ADD_U16(ctx_list, queue_list, OID_AUTO, "txq_in_use", CTLFLAG_RD, &txq->ift_in_use, 1, "descriptors in use"); SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "txq_processed", CTLFLAG_RD, &txq->ift_processed, "descriptors procesed for clean"); SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "txq_cleaned", CTLFLAG_RD, &txq->ift_cleaned, "total cleaned"); SYSCTL_ADD_PROC(ctx_list, queue_list, OID_AUTO, "ring_state", CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_NEEDGIANT, __DEVOLATILE(uint64_t *, &txq->ift_br->state), 0, mp_ring_state_handler, "A", "soft ring state"); SYSCTL_ADD_COUNTER_U64(ctx_list, queue_list, OID_AUTO, "r_enqueues", CTLFLAG_RD, &txq->ift_br->enqueues, "# of enqueues to the mp_ring for this queue"); SYSCTL_ADD_COUNTER_U64(ctx_list, queue_list, OID_AUTO, "r_drops", CTLFLAG_RD, &txq->ift_br->drops, "# of drops in the mp_ring for this queue"); SYSCTL_ADD_COUNTER_U64(ctx_list, queue_list, OID_AUTO, "r_starts", CTLFLAG_RD, &txq->ift_br->starts, "# of normal consumer starts in the mp_ring for this queue"); SYSCTL_ADD_COUNTER_U64(ctx_list, queue_list, OID_AUTO, "r_stalls", CTLFLAG_RD, &txq->ift_br->stalls, "# of consumer stalls in the mp_ring for this queue"); SYSCTL_ADD_COUNTER_U64(ctx_list, queue_list, OID_AUTO, "r_restarts", CTLFLAG_RD, &txq->ift_br->restarts, "# of consumer restarts in the mp_ring for this queue"); SYSCTL_ADD_COUNTER_U64(ctx_list, queue_list, OID_AUTO, "r_abdications", CTLFLAG_RD, &txq->ift_br->abdications, "# of consumer abdications in the mp_ring for this queue"); } if (scctx->isc_nrxqsets > 100) qfmt = "rxq%03d"; else if (scctx->isc_nrxqsets > 10) qfmt = "rxq%02d"; else qfmt = "rxq%d"; for (i = 0, rxq = ctx->ifc_rxqs; i < scctx->isc_nrxqsets; i++, rxq++) { snprintf(namebuf, NAME_BUFLEN, qfmt, i); queue_node = SYSCTL_ADD_NODE(ctx_list, child, OID_AUTO, namebuf, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Queue Name"); queue_list = SYSCTL_CHILDREN(queue_node); if (sctx->isc_flags & IFLIB_HAS_RXCQ) { SYSCTL_ADD_U16(ctx_list, queue_list, OID_AUTO, "rxq_cq_cidx", CTLFLAG_RD, &rxq->ifr_cq_cidx, 1, "Consumer Index"); } for (j = 0, fl = rxq->ifr_fl; j < rxq->ifr_nfl; j++, fl++) { snprintf(namebuf, NAME_BUFLEN, "rxq_fl%d", j); fl_node = SYSCTL_ADD_NODE(ctx_list, queue_list, OID_AUTO, namebuf, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "freelist Name"); fl_list = SYSCTL_CHILDREN(fl_node); SYSCTL_ADD_U16(ctx_list, fl_list, OID_AUTO, "pidx", CTLFLAG_RD, &fl->ifl_pidx, 1, "Producer Index"); SYSCTL_ADD_U16(ctx_list, fl_list, OID_AUTO, "cidx", CTLFLAG_RD, &fl->ifl_cidx, 1, "Consumer Index"); SYSCTL_ADD_U16(ctx_list, fl_list, OID_AUTO, "credits", CTLFLAG_RD, &fl->ifl_credits, 1, "credits available"); SYSCTL_ADD_U16(ctx_list, fl_list, OID_AUTO, "buf_size", CTLFLAG_RD, &fl->ifl_buf_size, 1, "buffer size"); #if MEMORY_LOGGING SYSCTL_ADD_QUAD(ctx_list, fl_list, OID_AUTO, "fl_m_enqueued", CTLFLAG_RD, &fl->ifl_m_enqueued, "mbufs allocated"); SYSCTL_ADD_QUAD(ctx_list, fl_list, OID_AUTO, "fl_m_dequeued", CTLFLAG_RD, &fl->ifl_m_dequeued, "mbufs freed"); SYSCTL_ADD_QUAD(ctx_list, fl_list, OID_AUTO, "fl_cl_enqueued", CTLFLAG_RD, &fl->ifl_cl_enqueued, "clusters allocated"); SYSCTL_ADD_QUAD(ctx_list, fl_list, OID_AUTO, "fl_cl_dequeued", CTLFLAG_RD, &fl->ifl_cl_dequeued, "clusters freed"); #endif } } } void iflib_request_reset(if_ctx_t ctx) { STATE_LOCK(ctx); ctx->ifc_flags |= IFC_DO_RESET; STATE_UNLOCK(ctx); } #ifndef __NO_STRICT_ALIGNMENT static struct mbuf * iflib_fixup_rx(struct mbuf *m) { struct mbuf *n; if (m->m_len <= (MCLBYTES - ETHER_HDR_LEN)) { bcopy(m->m_data, m->m_data + ETHER_HDR_LEN, m->m_len); m->m_data += ETHER_HDR_LEN; n = m; } else { MGETHDR(n, M_NOWAIT, MT_DATA); if (n == NULL) { m_freem(m); return (NULL); } bcopy(m->m_data, n->m_data, ETHER_HDR_LEN); m->m_data += ETHER_HDR_LEN; m->m_len -= ETHER_HDR_LEN; n->m_len = ETHER_HDR_LEN; M_MOVE_PKTHDR(n, m); n->m_next = m; } return (n); } #endif #ifdef DEBUGNET static void iflib_debugnet_init(if_t ifp, int *nrxr, int *ncl, int *clsize) { if_ctx_t ctx; ctx = if_getsoftc(ifp); CTX_LOCK(ctx); *nrxr = NRXQSETS(ctx); *ncl = ctx->ifc_rxqs[0].ifr_fl->ifl_size; *clsize = ctx->ifc_rxqs[0].ifr_fl->ifl_buf_size; CTX_UNLOCK(ctx); } static void iflib_debugnet_event(if_t ifp, enum debugnet_ev event) { if_ctx_t ctx; if_softc_ctx_t scctx; iflib_fl_t fl; iflib_rxq_t rxq; int i, j; ctx = if_getsoftc(ifp); scctx = &ctx->ifc_softc_ctx; switch (event) { case DEBUGNET_START: for (i = 0; i < scctx->isc_nrxqsets; i++) { rxq = &ctx->ifc_rxqs[i]; for (j = 0; j < rxq->ifr_nfl; j++) { fl = rxq->ifr_fl; fl->ifl_zone = m_getzone(fl->ifl_buf_size); } } iflib_no_tx_batch = 1; break; default: break; } } static int iflib_debugnet_transmit(if_t ifp, struct mbuf *m) { if_ctx_t ctx; iflib_txq_t txq; int error; ctx = if_getsoftc(ifp); if ((if_getdrvflags(ifp) & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) != IFF_DRV_RUNNING) return (EBUSY); txq = &ctx->ifc_txqs[0]; error = iflib_encap(txq, &m); if (error == 0) - (void)iflib_txd_db_check(ctx, txq, true, txq->ift_in_use); + (void)iflib_txd_db_check(txq, true); return (error); } static int iflib_debugnet_poll(if_t ifp, int count) { struct epoch_tracker et; if_ctx_t ctx; if_softc_ctx_t scctx; iflib_txq_t txq; int i; ctx = if_getsoftc(ifp); scctx = &ctx->ifc_softc_ctx; if ((if_getdrvflags(ifp) & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) != IFF_DRV_RUNNING) return (EBUSY); txq = &ctx->ifc_txqs[0]; (void)iflib_completed_tx_reclaim(txq, RECLAIM_THRESH(ctx)); NET_EPOCH_ENTER(et); for (i = 0; i < scctx->isc_nrxqsets; i++) (void)iflib_rxeof(&ctx->ifc_rxqs[i], 16 /* XXX */); NET_EPOCH_EXIT(et); return (0); } #endif /* DEBUGNET */ diff --git a/sys/net/iflib.h b/sys/net/iflib.h index eaf8ea33355d..a30740e67b6e 100644 --- a/sys/net/iflib.h +++ b/sys/net/iflib.h @@ -1,481 +1,498 @@ /*- * Copyright (c) 2014-2017, Matthew Macy (mmacy@mattmacy.io) * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * * 2. Neither the name of Matthew Macy nor the names of its * contributors may be used to endorse or promote products derived from * this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD$ */ #ifndef __IFLIB_H_ #define __IFLIB_H_ #include #include #include #include #include #include struct if_clone; /* * The value type for indexing, limits max descriptors * to 65535 can be conditionally redefined to uint32_t * in the future if the need arises. */ typedef uint16_t qidx_t; #define QIDX_INVALID 0xFFFF struct iflib_ctx; typedef struct iflib_ctx *if_ctx_t; struct if_shared_ctx; typedef struct if_shared_ctx *if_shared_ctx_t; struct if_int_delay_info; typedef struct if_int_delay_info *if_int_delay_info_t; struct if_pseudo; typedef struct if_pseudo *if_pseudo_t; /* * File organization: * - public structures * - iflib accessors * - iflib utility functions * - iflib core functions */ typedef struct if_rxd_frag { uint8_t irf_flid; qidx_t irf_idx; uint16_t irf_len; } *if_rxd_frag_t; /* bnxt supports 64 with hardware LRO enabled */ #define IFLIB_MAX_RX_SEGS 64 typedef struct if_rxd_info { /* set by iflib */ uint16_t iri_qsidx; /* qset index */ uint16_t iri_vtag; /* vlan tag - if flag set */ /* XXX redundant with the new irf_len field */ uint16_t iri_len; /* packet length */ qidx_t iri_cidx; /* consumer index of cq */ if_t iri_ifp; /* driver may have >1 iface per softc */ /* updated by driver */ if_rxd_frag_t iri_frags; uint32_t iri_flowid; /* RSS hash for packet */ uint32_t iri_csum_flags; /* m_pkthdr csum flags */ uint32_t iri_csum_data; /* m_pkthdr csum data */ uint8_t iri_flags; /* mbuf flags for packet */ uint8_t iri_nfrags; /* number of fragments in packet */ uint8_t iri_rsstype; /* RSS hash type */ uint8_t iri_pad; /* any padding in the received data */ } *if_rxd_info_t; typedef struct if_rxd_update { uint64_t *iru_paddrs; qidx_t *iru_idxs; qidx_t iru_pidx; uint16_t iru_qsidx; uint16_t iru_count; uint16_t iru_buf_size; uint8_t iru_flidx; } *if_rxd_update_t; #define IPI_TX_INTR 0x1 /* send an interrupt when this packet is sent */ #define IPI_TX_IPV4 0x2 /* ethertype IPv4 */ #define IPI_TX_IPV6 0x4 /* ethertype IPv6 */ typedef struct if_pkt_info { bus_dma_segment_t *ipi_segs; /* physical addresses */ uint32_t ipi_len; /* packet length */ uint16_t ipi_qsidx; /* queue set index */ qidx_t ipi_nsegs; /* number of segments */ qidx_t ipi_ndescs; /* number of descriptors used by encap */ uint16_t ipi_flags; /* iflib per-packet flags */ qidx_t ipi_pidx; /* start pidx for encap */ qidx_t ipi_new_pidx; /* next available pidx post-encap */ /* offload handling */ uint8_t ipi_ehdrlen; /* ether header length */ uint8_t ipi_ip_hlen; /* ip header length */ uint8_t ipi_tcp_hlen; /* tcp header length */ uint8_t ipi_ipproto; /* ip protocol */ uint32_t ipi_csum_flags; /* packet checksum flags */ uint16_t ipi_tso_segsz; /* tso segment size */ uint16_t ipi_vtag; /* VLAN tag */ uint16_t ipi_etype; /* ether header type */ uint8_t ipi_tcp_hflags; /* tcp header flags */ uint8_t ipi_mflags; /* packet mbuf flags */ uint32_t ipi_tcp_seq; /* tcp seqno */ uint32_t __spare0__; } *if_pkt_info_t; typedef struct if_irq { struct resource *ii_res; int __spare0__; void *ii_tag; } *if_irq_t; struct if_int_delay_info { if_ctx_t iidi_ctx; /* Back-pointer to the iflib ctx (softc) */ int iidi_offset; /* Register offset to read/write */ int iidi_value; /* Current value in usecs */ struct sysctl_oid *iidi_oidp; struct sysctl_req *iidi_req; }; typedef enum { IFLIB_INTR_LEGACY, IFLIB_INTR_MSI, IFLIB_INTR_MSIX } iflib_intr_mode_t; /* * This really belongs in pciio.h or some place more general * but this is the only consumer for now. */ typedef struct pci_vendor_info { uint32_t pvi_vendor_id; uint32_t pvi_device_id; uint32_t pvi_subvendor_id; uint32_t pvi_subdevice_id; uint32_t pvi_rev_id; uint32_t pvi_class_mask; const char *pvi_name; } pci_vendor_info_t; #define PVID(vendor, devid, name) {vendor, devid, 0, 0, 0, 0, name} #define PVID_OEM(vendor, devid, svid, sdevid, revid, name) {vendor, devid, svid, sdevid, revid, 0, name} #define PVID_END {0, 0, 0, 0, 0, 0, NULL} /* No drivers in tree currently match on anything except vendor:device. */ #define IFLIB_PNP_DESCR "U32:vendor;U32:device;U32:#;U32:#;" \ "U32:#;U32:#;D:#" #define IFLIB_PNP_INFO(b, u, t) \ MODULE_PNP_INFO(IFLIB_PNP_DESCR, b, u, t, nitems(t) - 1) typedef struct if_txrx { int (*ift_txd_encap) (void *, if_pkt_info_t); void (*ift_txd_flush) (void *, uint16_t, qidx_t pidx); int (*ift_txd_credits_update) (void *, uint16_t qsidx, bool clear); int (*ift_rxd_available) (void *, uint16_t qsidx, qidx_t pidx, qidx_t budget); int (*ift_rxd_pkt_get) (void *, if_rxd_info_t ri); void (*ift_rxd_refill) (void * , if_rxd_update_t iru); void (*ift_rxd_flush) (void *, uint16_t qsidx, uint8_t flidx, qidx_t pidx); int (*ift_legacy_intr) (void *); } *if_txrx_t; typedef struct if_softc_ctx { int isc_vectors; int isc_nrxqsets; int isc_ntxqsets; uint16_t __spare0__; uint32_t __spare1__; int isc_msix_bar; /* can be model specific - initialize in attach_pre */ int isc_tx_nsegments; /* can be model specific - initialize in attach_pre */ int isc_ntxd[8]; int isc_nrxd[8]; uint32_t isc_txqsizes[8]; uint32_t isc_rxqsizes[8]; /* is there such thing as a descriptor that is more than 248 bytes ? */ uint8_t isc_txd_size[8]; uint8_t isc_rxd_size[8]; int isc_tx_tso_segments_max; int isc_tx_tso_size_max; int isc_tx_tso_segsize_max; int isc_tx_csum_flags; int isc_capabilities; int isc_capenable; int isc_rss_table_size; int isc_rss_table_mask; int isc_nrxqsets_max; int isc_ntxqsets_max; uint32_t __spare2__; iflib_intr_mode_t isc_intr; uint16_t isc_rxd_buf_size[8]; /* set at init time by driver, 0 means use iflib-calculated size based on isc_max_frame_size */ uint16_t isc_max_frame_size; /* set at init time by driver */ uint16_t isc_min_frame_size; /* set at init time by driver, only used if IFLIB_NEED_ETHER_PAD is set. */ uint32_t isc_pause_frames; /* set by driver for iflib_timer to detect */ uint32_t __spare3__; uint32_t __spare4__; uint32_t __spare5__; uint32_t __spare6__; uint32_t __spare7__; uint32_t __spare8__; caddr_t __spare9__; int isc_disable_msix; if_txrx_t isc_txrx; struct ifmedia *isc_media; } *if_softc_ctx_t; /* * Initialization values for device */ struct if_shared_ctx { unsigned isc_magic; driver_t *isc_driver; bus_size_t isc_q_align; bus_size_t isc_tx_maxsize; bus_size_t isc_tx_maxsegsize; bus_size_t isc_tso_maxsize; bus_size_t isc_tso_maxsegsize; bus_size_t isc_rx_maxsize; bus_size_t isc_rx_maxsegsize; int isc_rx_nsegments; int isc_admin_intrcnt; /* # of admin/link interrupts */ /* fields necessary for probe */ const pci_vendor_info_t *isc_vendor_info; const char *isc_driver_version; /* optional function to transform the read values to match the table*/ void (*isc_parse_devinfo) (uint16_t *device_id, uint16_t *subvendor_id, uint16_t *subdevice_id, uint16_t *rev_id); int isc_nrxd_min[8]; int isc_nrxd_default[8]; int isc_nrxd_max[8]; int isc_ntxd_min[8]; int isc_ntxd_default[8]; int isc_ntxd_max[8]; /* actively used during operation */ int isc_nfl __aligned(CACHE_LINE_SIZE); int isc_ntxqs; /* # of tx queues per tx qset - usually 1 */ int isc_nrxqs; /* # of rx queues per rx qset - intel 1, chelsio 2, broadcom 3 */ int __spare0__; int isc_tx_reclaim_thresh; int isc_flags; const char *isc_name; }; typedef struct iflib_dma_info { bus_addr_t idi_paddr; caddr_t idi_vaddr; bus_dma_tag_t idi_tag; bus_dmamap_t idi_map; uint32_t idi_size; } *iflib_dma_info_t; #define IFLIB_MAGIC 0xCAFEF00D typedef enum { + /* Interrupt or softirq handles only receive */ IFLIB_INTR_RX, + + /* Interrupt or softirq handles only transmit */ IFLIB_INTR_TX, + + /* + * Interrupt will check for both pending receive + * and available tx credits and dispatch a task + * for one or both depending on the disposition + * of the respective queues. + */ IFLIB_INTR_RXTX, + + /* + * Other interrupt - typically link status and + * or error conditions. + */ IFLIB_INTR_ADMIN, + + /* Softirq (task) for iov handling */ IFLIB_INTR_IOV, } iflib_intr_type_t; /* * Interface has a separate completion queue for RX */ #define IFLIB_HAS_RXCQ 0x01 /* * Driver has already allocated vectors */ #define IFLIB_SKIP_MSIX 0x02 /* * Interface is a virtual function */ #define IFLIB_IS_VF 0x04 /* * Interface has a separate completion queue for TX */ #define IFLIB_HAS_TXCQ 0x08 /* * Interface does checksum in place */ #define IFLIB_NEED_SCRATCH 0x10 /* * Interface doesn't expect in_pseudo for th_sum */ #define IFLIB_TSO_INIT_IP 0x20 /* * Interface doesn't align IP header */ #define IFLIB_DO_RX_FIXUP 0x40 /* * Driver needs csum zeroed for offloading */ #define IFLIB_NEED_ZERO_CSUM 0x80 /* * Driver needs frames padded to some minimum length */ #define IFLIB_NEED_ETHER_PAD 0x100 /* * Packets can be freed immediately after encap */ #define IFLIB_TXD_ENCAP_PIO 0x00200 /* * Use RX completion handler */ #define IFLIB_RX_COMPLETION 0x00400 /* * Skip refilling cluster free lists */ #define IFLIB_SKIP_CLREFILL 0x00800 /* * Don't reset on hang */ #define IFLIB_NO_HANG_RESET 0x01000 /* * Don't need/want most of the niceties of * queue management */ #define IFLIB_PSEUDO 0x02000 /* * No DMA support needed / wanted */ #define IFLIB_VIRTUAL 0x04000 /* * autogenerate a MAC address */ #define IFLIB_GEN_MAC 0x08000 /* * Interface needs admin task to ignore interface up/down status */ #define IFLIB_ADMIN_ALWAYS_RUN 0x10000 /* * Driver will pass the media */ #define IFLIB_DRIVER_MEDIA 0x20000 /* * When using a single hardware interrupt for the interface, only process RX * interrupts instead of doing combined RX/TX processing. */ #define IFLIB_SINGLE_IRQ_RX_ONLY 0x40000 /* * Don't need/want most of the niceties of * emulating ethernet */ #define IFLIB_PSEUDO_ETHER 0x80000 /* * These enum values are used in iflib_needs_restart to indicate to iflib * functions whether or not the interface needs restarting when certain events * happen. */ enum iflib_restart_event { IFLIB_RESTART_VLAN_CONFIG, }; /* * field accessors */ void *iflib_get_softc(if_ctx_t ctx); device_t iflib_get_dev(if_ctx_t ctx); if_t iflib_get_ifp(if_ctx_t ctx); struct ifmedia *iflib_get_media(if_ctx_t ctx); if_softc_ctx_t iflib_get_softc_ctx(if_ctx_t ctx); if_shared_ctx_t iflib_get_sctx(if_ctx_t ctx); void iflib_set_mac(if_ctx_t ctx, uint8_t mac[ETHER_ADDR_LEN]); void iflib_request_reset(if_ctx_t ctx); uint8_t iflib_in_detach(if_ctx_t ctx); uint32_t iflib_get_rx_mbuf_sz(if_ctx_t ctx); /* * If the driver can plug cleanly in to newbus use these */ int iflib_device_probe(device_t); int iflib_device_attach(device_t); int iflib_device_detach(device_t); int iflib_device_suspend(device_t); int iflib_device_resume(device_t); int iflib_device_shutdown(device_t); /* * Use this instead of iflib_device_probe if the driver should report * BUS_PROBE_VENDOR instead of BUS_PROBE_DEFAULT. (For example, an out-of-tree * driver based on iflib). */ int iflib_device_probe_vendor(device_t); int iflib_device_iov_init(device_t, uint16_t, const nvlist_t *); void iflib_device_iov_uninit(device_t); int iflib_device_iov_add_vf(device_t, uint16_t, const nvlist_t *); /* * If the driver can't plug cleanly in to newbus * use these */ int iflib_device_register(device_t dev, void *softc, if_shared_ctx_t sctx, if_ctx_t *ctxp); int iflib_device_deregister(if_ctx_t); int iflib_irq_alloc(if_ctx_t, if_irq_t, int, driver_filter_t, void *filter_arg, driver_intr_t, void *arg, const char *name); int iflib_irq_alloc_generic(if_ctx_t ctx, if_irq_t irq, int rid, iflib_intr_type_t type, driver_filter_t *filter, void *filter_arg, int qid, const char *name); void iflib_softirq_alloc_generic(if_ctx_t ctx, if_irq_t irq, iflib_intr_type_t type, void *arg, int qid, const char *name); void iflib_irq_free(if_ctx_t ctx, if_irq_t irq); void iflib_io_tqg_attach(struct grouptask *gt, void *uniq, int cpu, const char *name); void iflib_config_gtask_init(void *ctx, struct grouptask *gtask, gtask_fn_t *fn, const char *name); void iflib_config_gtask_deinit(struct grouptask *gtask); void iflib_tx_intr_deferred(if_ctx_t ctx, int txqid); void iflib_rx_intr_deferred(if_ctx_t ctx, int rxqid); void iflib_admin_intr_deferred(if_ctx_t ctx); void iflib_iov_intr_deferred(if_ctx_t ctx); void iflib_link_state_change(if_ctx_t ctx, int linkstate, uint64_t baudrate); int iflib_dma_alloc(if_ctx_t ctx, int size, iflib_dma_info_t dma, int mapflags); int iflib_dma_alloc_align(if_ctx_t ctx, int size, int align, iflib_dma_info_t dma, int mapflags); void iflib_dma_free(iflib_dma_info_t dma); int iflib_dma_alloc_multi(if_ctx_t ctx, int *sizes, iflib_dma_info_t *dmalist, int mapflags, int count); void iflib_dma_free_multi(iflib_dma_info_t *dmalist, int count); struct sx *iflib_ctx_lock_get(if_ctx_t); void iflib_led_create(if_ctx_t ctx); void iflib_add_int_delay_sysctl(if_ctx_t, const char *, const char *, if_int_delay_info_t, int, int); /* * Pseudo device support */ if_pseudo_t iflib_clone_register(if_shared_ctx_t); void iflib_clone_deregister(if_pseudo_t); #endif /* __IFLIB_H_ */